summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--core/lexer.c84
-rw-r--r--include/cescal/state.h2
-rw-r--r--include/cescal/token.h1
3 files changed, 87 insertions, 0 deletions
diff --git a/core/lexer.c b/core/lexer.c
index a11a630..51e4d40 100644
--- a/core/lexer.c
+++ b/core/lexer.c
@@ -5,8 +5,35 @@
#include <errno.h>
#include <stdbool.h>
+#include <stdlib.h>
+#include <ctype.h>
#include "cescal/lexer.h"
#include "cescal/log.h"
+#include "cescal/ptrbox.h"
+
+static inline void
+lexer_putback(struct cescal_state *state, char c)
+{
+ if (state == NULL) {
+ return;
+ }
+
+ state->lex_putback = c;
+}
+
+static inline char
+lexer_putback_pop(struct cescal_state *state)
+{
+ char retc;
+
+ if (state == NULL) {
+ return '\0';
+ }
+
+ retc = state->lex_putback;
+ state->lex_putback = '\0';
+ return retc;
+}
/*
* Returns true if the given character is a whitespace
@@ -44,6 +71,12 @@ lexer_consume_single(struct cescal_state *state, bool skip_ws)
return '\0';
}
+ if ((c = lexer_putback_pop(state)) != '\0') {
+ if (lexer_is_ws(c) && !skip_ws) {
+ return c;
+ }
+ }
+
while ((c = readbuf_read(&state->rb, state->in_fd)) != '\0') {
if (lexer_is_ws(c)) {
continue;
@@ -55,6 +88,53 @@ lexer_consume_single(struct cescal_state *state, bool skip_ws)
return '\0';
}
+static int
+lexer_scan_ident(struct cescal_state *state, char lc, struct token *res)
+{
+ char *buf, c;
+ size_t bufsz, bufcap;
+
+ if (state == NULL || res == NULL) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ bufsz = 0;
+ bufcap = 8;
+ if ((buf = malloc(bufcap)) == NULL) {
+ return -1;
+ }
+
+ if (lc != '_' && !isalpha(lc)) {
+ cc_error("bad identifier\n");
+ }
+
+ buf[bufsz++] = lc;
+ for (;;) {
+ c = lexer_consume_single(state, false);
+ if (c != '_' && !isalnum(c)) {
+ lexer_putback(state, c);
+ buf[bufsz] = '\0';
+ break;
+ }
+
+ buf[bufsz++] = c;
+ if (bufsz >= bufcap) {
+ bufcap += 8;
+ buf = realloc(buf, bufcap);
+ }
+
+ if (buf == NULL) {
+ return -1;
+ }
+ }
+
+ res->s = ptrbox_strdup(&state->ptrbox, buf);
+ res->type = TT_IDENT;
+ free(buf);
+ return 0;
+}
+
int
lexer_nom(struct cescal_state *state, struct token *res)
{
@@ -82,6 +162,10 @@ lexer_nom(struct cescal_state *state, struct token *res)
res->type = TT_COMMA;
res->c = c;
return 0;
+ default:
+ if (lexer_scan_ident(state, c, res) == 0) {
+ return 0;
+ }
}
cc_error("got unknown token '%c'\n", c);
diff --git a/include/cescal/state.h b/include/cescal/state.h
index 566b5a2..3c1ad86 100644
--- a/include/cescal/state.h
+++ b/include/cescal/state.h
@@ -18,12 +18,14 @@
* @rb: Read buffer
* @tokbuf: Token buffer
* @ptrbox: Global pointer box
+ * @lex_putback: Lexer putback buffer
*/
struct cescal_state {
int in_fd;
struct readbuf rb;
struct tokbuf tokbuf;
struct ptrbox ptrbox;
+ char lex_putback;
};
/*
diff --git a/include/cescal/token.h b/include/cescal/token.h
index 0be8ff0..990df77 100644
--- a/include/cescal/token.h
+++ b/include/cescal/token.h
@@ -32,6 +32,7 @@ struct token {
tt_t type;
union {
char c;
+ char *s;
};
};