/* * Copyright (c) 2026, Chloe M. * Provided under the BSD-3 clause */ #include #include #include #include #include "cescal/lexer.h" #include "cescal/log.h" #include "cescal/ptrbox.h" static inline void lexer_putback(struct cescal_state *state, char c) { if (state == NULL) { return; } state->lex_putback = c; } static inline char lexer_putback_pop(struct cescal_state *state) { char retc; if (state == NULL) { return '\0'; } retc = state->lex_putback; state->lex_putback = '\0'; return retc; } /* * Returns true if the given character is a whitespace * * @c: Character to check */ static inline bool lexer_is_ws(char c) { switch (c) { case '\t': case '\n': case ' ': case '\f': case '\r': return true; } return false; } /* * Consume a single character from the input source file and * optionally skip whitespace * * @state: Compiler state * @skip_ws: If true skip whitespace */ static char lexer_consume_single(struct cescal_state *state, bool skip_ws) { char c; if (state == NULL) { return '\0'; } if ((c = lexer_putback_pop(state)) != '\0') { if (lexer_is_ws(c) && !skip_ws) { return c; } } while ((c = readbuf_read(&state->rb, state->in_fd)) != '\0') { if (lexer_is_ws(c)) { continue; } return c; } return '\0'; } static int lexer_scan_ident(struct cescal_state *state, char lc, struct token *res) { char *buf, c; size_t bufsz, bufcap; if (state == NULL || res == NULL) { errno = EINVAL; return -1; } bufsz = 0; bufcap = 8; if ((buf = malloc(bufcap)) == NULL) { return -1; } if (lc != '_' && !isalpha(lc)) { cc_error("bad identifier\n"); } buf[bufsz++] = lc; for (;;) { c = lexer_consume_single(state, false); if (c != '_' && !isalnum(c)) { lexer_putback(state, c); buf[bufsz] = '\0'; break; } buf[bufsz++] = c; if (bufsz >= bufcap) { bufcap += 8; buf = realloc(buf, bufcap); } if (buf == NULL) { return -1; } } res->s = ptrbox_strdup(&state->ptrbox, buf); res->type = TT_IDENT; free(buf); return 0; } int lexer_nom(struct cescal_state *state, struct token *res) { char c; if (state == NULL || res == NULL) { errno = EINVAL; return -1; } if ((c = lexer_consume_single(state, true)) == '\0') { return -1; } switch (c) { case '(': res->type = TT_LPAREN; res->c = c; return 0; case ')': res->type = TT_RPAREN; res->c = c; return 0; case ',': res->type = TT_COMMA; res->c = c; return 0; default: if (lexer_scan_ident(state, c, res) == 0) { return 0; } } cc_error("got unknown token '%c'\n", c); return -1; }