/* * Copyright (c) 2026, Chloe M. * Provided under the BSD-3 clause */ #include #include #include #include #include #include "cescal/lexer.h" #include "cescal/log.h" #include "cescal/ptrbox.h" static inline void lexer_putback(struct cescal_state *state, char c) { if (state == NULL) { return; } state->lex_putback = c; } static inline char lexer_putback_pop(struct cescal_state *state) { char retc; if (state == NULL) { return '\0'; } retc = state->lex_putback; state->lex_putback = '\0'; return retc; } /* * Returns true if the given character is a whitespace * * @c: Character to check */ static inline bool lexer_is_ws(char c) { switch (c) { case '\t': case '\n': case ' ': case '\f': case '\r': return true; } return false; } /* * Consume a single character from the input source file and * optionally skip whitespace * * @state: Compiler state * @skip_ws: If true skip whitespace */ static char lexer_consume_single(struct cescal_state *state, bool skip_ws) { char c; if (state == NULL) { return '\0'; } if ((c = lexer_putback_pop(state)) != '\0') { if (!skip_ws) { return c; } if (skip_ws && !lexer_is_ws(c)) { return c; } } while ((c = readbuf_read(&state->rb, state->in_fd)) != '\0') { if (lexer_is_ws(c) && skip_ws) { continue; } return c; } return '\0'; } static int lexer_scan_ident(struct cescal_state *state, char lc, struct token *res) { char *buf, c; size_t bufsz, bufcap; if (state == NULL || res == NULL) { errno = EINVAL; return -1; } bufsz = 0; bufcap = 8; if ((buf = malloc(bufcap)) == NULL) { return -1; } if (lc != '_' && !isalpha(lc)) { cc_error("bad identifier\n"); return -1; } buf[bufsz++] = lc; for (;;) { c = lexer_consume_single(state, false); if (c != '_' && !isalnum(c)) { lexer_putback(state, c); buf[bufsz] = '\0'; break; } buf[bufsz++] = c; if (bufsz >= bufcap) { bufcap += 8; buf = realloc(buf, bufcap); } if (buf == NULL) { return -1; } } res->s = ptrbox_strdup(&state->ptrbox, buf); res->type = TT_IDENT; free(buf); return 0; } /* * Checks if an identifier token is actually a keyword * * @state: Compiler state * @res: Token result */ static void lexer_check_kw(struct cescal_state *state, struct token *res) { if (state == NULL || res == NULL) { return; } switch (*res->s) { case 'p': if (strcmp(res->s, "pub") == 0) { res->type = TT_PUB; return; } if (strcmp(res->s, "proc") == 0) { res->type = TT_PROC; return; } break; case 'b': if (strcmp(res->s, "begin") == 0) { res->type = TT_BEGIN; return; } break; case 'e': if (strcmp(res->s, "end") == 0) { res->type = TT_END; return; } break; case 'r': if (strcmp(res->s, "return") == 0) { res->type = TT_RETURN; return; } } } /* * Skip anything after a comment * * @state: Compiler state */ static void lexer_skip_comment(struct cescal_state *state) { char c; if (state == NULL) { return; } while ((c = lexer_consume_single(state, false)) != '\n') { if (c == '\0') { break; } } } int lexer_nom(struct cescal_state *state, struct token *res) { char c; if (state == NULL || res == NULL) { errno = EINVAL; return -1; } if ((c = lexer_consume_single(state, true)) == '\0') { return -1; } switch (c) { case '(': res->type = TT_LPAREN; res->c = c; return 0; case ')': res->type = TT_RPAREN; res->c = c; return 0; case ',': res->type = TT_COMMA; res->c = c; return 0; case '/': if (lexer_consume_single(state, true) == '/') { res->type = TT_COMMENT; res->c = c; lexer_skip_comment(state); return 0; } return -1; case '-': res->c = c; if (lexer_consume_single(state, true) == '>') { res->type = TT_ARROW; return 0; } return -1; default: if (lexer_scan_ident(state, c, res) == 0) { lexer_check_kw(state, res); return 0; } } cc_error("got unknown token '%c'\n", c); return -1; }