diff options
| author | Chloe M. <chloe@mirocom.org> | 2026-05-23 10:53:41 +0000 |
|---|---|---|
| committer | Chloe M. <chloe@mirocom.org> | 2026-05-23 10:53:41 +0000 |
| commit | 07b0a3b2583e7b17ef477c1e57a6c35a60711847 (patch) | |
| tree | aeb6f1edbbfa2ff6bc8d5c7968c18de5e5b1cb79 | |
| parent | d5f1f2a22fc1c35e93e1dfde58d08e2b3827192e (diff) | |
lexer: Add scanning of identifiers
Signed-off-by: Chloe M. <chloe@mirocom.org>
| -rw-r--r-- | core/lexer.c | 84 | ||||
| -rw-r--r-- | include/cescal/state.h | 2 | ||||
| -rw-r--r-- | include/cescal/token.h | 1 |
3 files changed, 87 insertions, 0 deletions
diff --git a/core/lexer.c b/core/lexer.c index a11a630..51e4d40 100644 --- a/core/lexer.c +++ b/core/lexer.c @@ -5,8 +5,35 @@ #include <errno.h> #include <stdbool.h> +#include <stdlib.h> +#include <ctype.h> #include "cescal/lexer.h" #include "cescal/log.h" +#include "cescal/ptrbox.h" + +static inline void +lexer_putback(struct cescal_state *state, char c) +{ + if (state == NULL) { + return; + } + + state->lex_putback = c; +} + +static inline char +lexer_putback_pop(struct cescal_state *state) +{ + char retc; + + if (state == NULL) { + return '\0'; + } + + retc = state->lex_putback; + state->lex_putback = '\0'; + return retc; +} /* * Returns true if the given character is a whitespace @@ -44,6 +71,12 @@ lexer_consume_single(struct cescal_state *state, bool skip_ws) return '\0'; } + if ((c = lexer_putback_pop(state)) != '\0') { + if (lexer_is_ws(c) && !skip_ws) { + return c; + } + } + while ((c = readbuf_read(&state->rb, state->in_fd)) != '\0') { if (lexer_is_ws(c)) { continue; @@ -55,6 +88,53 @@ lexer_consume_single(struct cescal_state *state, bool skip_ws) return '\0'; } +static int +lexer_scan_ident(struct cescal_state *state, char lc, struct token *res) +{ + char *buf, c; + size_t bufsz, bufcap; + + if (state == NULL || res == NULL) { + errno = EINVAL; + return -1; + } + + bufsz = 0; + bufcap = 8; + if ((buf = malloc(bufcap)) == NULL) { + return -1; + } + + if (lc != '_' && !isalpha(lc)) { + cc_error("bad identifier\n"); + } + + buf[bufsz++] = lc; + for (;;) { + c = lexer_consume_single(state, false); + if (c != '_' && !isalnum(c)) { + lexer_putback(state, c); + buf[bufsz] = '\0'; + break; + } + + buf[bufsz++] = c; + if (bufsz >= bufcap) { + bufcap += 8; + buf = realloc(buf, bufcap); + } + + if (buf == NULL) { + return -1; + } + } + + res->s = ptrbox_strdup(&state->ptrbox, buf); + res->type = TT_IDENT; + free(buf); + return 0; +} + int lexer_nom(struct cescal_state *state, struct token *res) { @@ -82,6 +162,10 @@ lexer_nom(struct cescal_state *state, struct token *res) res->type = TT_COMMA; res->c = c; return 0; + default: + if (lexer_scan_ident(state, c, res) == 0) { + return 0; + } } cc_error("got unknown token '%c'\n", c); diff --git a/include/cescal/state.h b/include/cescal/state.h index 566b5a2..3c1ad86 100644 --- a/include/cescal/state.h +++ b/include/cescal/state.h @@ -18,12 +18,14 @@ * @rb: Read buffer * @tokbuf: Token buffer * @ptrbox: Global pointer box + * @lex_putback: Lexer putback buffer */ struct cescal_state { int in_fd; struct readbuf rb; struct tokbuf tokbuf; struct ptrbox ptrbox; + char lex_putback; }; /* diff --git a/include/cescal/token.h b/include/cescal/token.h index 0be8ff0..990df77 100644 --- a/include/cescal/token.h +++ b/include/cescal/token.h @@ -32,6 +32,7 @@ struct token { tt_t type; union { char c; + char *s; }; }; |
