summaryrefslogtreecommitdiff
path: root/core
diff options
context:
space:
mode:
authorChloe M. <chloe@mirocom.org>2026-05-23 10:53:41 +0000
committerChloe M. <chloe@mirocom.org>2026-05-23 10:53:41 +0000
commit07b0a3b2583e7b17ef477c1e57a6c35a60711847 (patch)
treeaeb6f1edbbfa2ff6bc8d5c7968c18de5e5b1cb79 /core
parentd5f1f2a22fc1c35e93e1dfde58d08e2b3827192e (diff)
lexer: Add scanning of identifiers
Signed-off-by: Chloe M. <chloe@mirocom.org>
Diffstat (limited to 'core')
-rw-r--r--core/lexer.c84
1 files changed, 84 insertions, 0 deletions
diff --git a/core/lexer.c b/core/lexer.c
index a11a630..51e4d40 100644
--- a/core/lexer.c
+++ b/core/lexer.c
@@ -5,8 +5,35 @@
#include <errno.h>
#include <stdbool.h>
+#include <stdlib.h>
+#include <ctype.h>
#include "cescal/lexer.h"
#include "cescal/log.h"
+#include "cescal/ptrbox.h"
+
+static inline void
+lexer_putback(struct cescal_state *state, char c)
+{
+ if (state == NULL) {
+ return;
+ }
+
+ state->lex_putback = c;
+}
+
+static inline char
+lexer_putback_pop(struct cescal_state *state)
+{
+ char retc;
+
+ if (state == NULL) {
+ return '\0';
+ }
+
+ retc = state->lex_putback;
+ state->lex_putback = '\0';
+ return retc;
+}
/*
* Returns true if the given character is a whitespace
@@ -44,6 +71,12 @@ lexer_consume_single(struct cescal_state *state, bool skip_ws)
return '\0';
}
+ if ((c = lexer_putback_pop(state)) != '\0') {
+ if (lexer_is_ws(c) && !skip_ws) {
+ return c;
+ }
+ }
+
while ((c = readbuf_read(&state->rb, state->in_fd)) != '\0') {
if (lexer_is_ws(c)) {
continue;
@@ -55,6 +88,53 @@ lexer_consume_single(struct cescal_state *state, bool skip_ws)
return '\0';
}
+static int
+lexer_scan_ident(struct cescal_state *state, char lc, struct token *res)
+{
+ char *buf, c;
+ size_t bufsz, bufcap;
+
+ if (state == NULL || res == NULL) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ bufsz = 0;
+ bufcap = 8;
+ if ((buf = malloc(bufcap)) == NULL) {
+ return -1;
+ }
+
+ if (lc != '_' && !isalpha(lc)) {
+ cc_error("bad identifier\n");
+ }
+
+ buf[bufsz++] = lc;
+ for (;;) {
+ c = lexer_consume_single(state, false);
+ if (c != '_' && !isalnum(c)) {
+ lexer_putback(state, c);
+ buf[bufsz] = '\0';
+ break;
+ }
+
+ buf[bufsz++] = c;
+ if (bufsz >= bufcap) {
+ bufcap += 8;
+ buf = realloc(buf, bufcap);
+ }
+
+ if (buf == NULL) {
+ return -1;
+ }
+ }
+
+ res->s = ptrbox_strdup(&state->ptrbox, buf);
+ res->type = TT_IDENT;
+ free(buf);
+ return 0;
+}
+
int
lexer_nom(struct cescal_state *state, struct token *res)
{
@@ -82,6 +162,10 @@ lexer_nom(struct cescal_state *state, struct token *res)
res->type = TT_COMMA;
res->c = c;
return 0;
+ default:
+ if (lexer_scan_ident(state, c, res) == 0) {
+ return 0;
+ }
}
cc_error("got unknown token '%c'\n", c);