diff options
Diffstat (limited to 'core')
| -rw-r--r-- | core/lexer.c | 84 |
1 files changed, 84 insertions, 0 deletions
diff --git a/core/lexer.c b/core/lexer.c index a11a630..51e4d40 100644 --- a/core/lexer.c +++ b/core/lexer.c @@ -5,8 +5,35 @@ #include <errno.h> #include <stdbool.h> +#include <stdlib.h> +#include <ctype.h> #include "cescal/lexer.h" #include "cescal/log.h" +#include "cescal/ptrbox.h" + +static inline void +lexer_putback(struct cescal_state *state, char c) +{ + if (state == NULL) { + return; + } + + state->lex_putback = c; +} + +static inline char +lexer_putback_pop(struct cescal_state *state) +{ + char retc; + + if (state == NULL) { + return '\0'; + } + + retc = state->lex_putback; + state->lex_putback = '\0'; + return retc; +} /* * Returns true if the given character is a whitespace @@ -44,6 +71,12 @@ lexer_consume_single(struct cescal_state *state, bool skip_ws) return '\0'; } + if ((c = lexer_putback_pop(state)) != '\0') { + if (lexer_is_ws(c) && !skip_ws) { + return c; + } + } + while ((c = readbuf_read(&state->rb, state->in_fd)) != '\0') { if (lexer_is_ws(c)) { continue; @@ -55,6 +88,53 @@ lexer_consume_single(struct cescal_state *state, bool skip_ws) return '\0'; } +static int +lexer_scan_ident(struct cescal_state *state, char lc, struct token *res) +{ + char *buf, c; + size_t bufsz, bufcap; + + if (state == NULL || res == NULL) { + errno = EINVAL; + return -1; + } + + bufsz = 0; + bufcap = 8; + if ((buf = malloc(bufcap)) == NULL) { + return -1; + } + + if (lc != '_' && !isalpha(lc)) { + cc_error("bad identifier\n"); + } + + buf[bufsz++] = lc; + for (;;) { + c = lexer_consume_single(state, false); + if (c != '_' && !isalnum(c)) { + lexer_putback(state, c); + buf[bufsz] = '\0'; + break; + } + + buf[bufsz++] = c; + if (bufsz >= bufcap) { + bufcap += 8; + buf = realloc(buf, bufcap); + } + + if (buf == NULL) { + return -1; + } + } + + res->s = ptrbox_strdup(&state->ptrbox, buf); + res->type = TT_IDENT; + free(buf); + return 0; +} + int lexer_nom(struct cescal_state *state, struct token *res) { @@ -82,6 +162,10 @@ lexer_nom(struct cescal_state *state, struct token *res) res->type = TT_COMMA; res->c = c; return 0; + default: + if (lexer_scan_ident(state, c, res) == 0) { + return 0; + } } cc_error("got unknown token '%c'\n", c); |
