summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIan Moffett <ian@mirocom.org>2026-05-23 02:21:09 -0400
committerIan Moffett <ian@mirocom.org>2026-05-23 02:21:09 -0400
commit74e2e8c772d0f88da6684918f782b37156f10fb3 (patch)
treefa5d9431e7e3f8df98f1d7f8e5dc9f8c02d09002
parent659dd389326f73e6bcbce5311088c182d7cb580e (diff)
core: Add lexer + parser groundwork
Signed-off-by: Ian Moffett <ian@mirocom.org>
-rw-r--r--core/cescal.c5
-rw-r--r--core/lexer.c84
-rw-r--r--core/parser.c61
-rw-r--r--include/cescal/lexer.h24
-rw-r--r--include/cescal/parser.h20
-rw-r--r--include/cescal/token.h38
6 files changed, 232 insertions, 0 deletions
diff --git a/core/cescal.c b/core/cescal.c
index 6c828e1..0121389 100644
--- a/core/cescal.c
+++ b/core/cescal.c
@@ -6,6 +6,7 @@
#include <stdio.h>
#include <unistd.h>
#include "cescal/state.h"
+#include "cescal/parser.h"
#include "cescal/log.h"
static void
@@ -28,6 +29,10 @@ compile(const char *pathname)
return -1;
}
+ if (parser_parse(&st) < 0) {
+ return -1;
+ }
+
state_close(&st);
return 0;
}
diff --git a/core/lexer.c b/core/lexer.c
new file mode 100644
index 0000000..d6e5e66
--- /dev/null
+++ b/core/lexer.c
@@ -0,0 +1,84 @@
+#include <errno.h>
+#include <stdbool.h>
+#include "cescal/lexer.h"
+#include "cescal/log.h"
+
+/*
+ * Returns true if the given character is a whitespace
+ *
+ * @c: Character to check
+ */
+static inline bool
+lexer_is_ws(char c)
+{
+ switch (c) {
+ case '\t':
+ case '\n':
+ case ' ':
+ case '\f':
+ case '\r':
+ return true;
+ }
+
+ return false;
+}
+
+/*
+ * Consume a single character from the input source file and
+ * optionally skip whitespace
+ *
+ * @state: Compiler state
+ * @skip_ws: If true skip whitespace
+ */
+static char
+lexer_consume_single(struct cescal_state *state, bool skip_ws)
+{
+ char c;
+
+ if (state == NULL) {
+ return '\0';
+ }
+
+ while ((c = readbuf_read(&state->rb, state->in_fd)) != '\0') {
+ if (lexer_is_ws(c)) {
+ continue;
+ }
+
+ return c;
+ }
+
+ return '\0';
+}
+
+int
+lexer_nom(struct cescal_state *state, struct token *res)
+{
+ char c;
+
+ if (state == NULL || res == NULL) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ if ((c = lexer_consume_single(state, true)) == '\0') {
+ return -1;
+ }
+
+ switch (c) {
+ case '(':
+ res->type = TT_LPAREN;
+ res->c = c;
+ return 0;
+ case ')':
+ res->type = TT_RPAREN;
+ res->c = c;
+ return 0;
+ case ',':
+ res->type = TT_COMMA;
+ res->c = c;
+ return 0;
+ }
+
+ cc_error("got unknown token '%c'\n", c);
+ return -1;
+}
diff --git a/core/parser.c b/core/parser.c
new file mode 100644
index 0000000..a584a56
--- /dev/null
+++ b/core/parser.c
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2026, Chloe M.
+ * Provided under the BSD-3 clause
+ */
+
+#include <stdint.h>
+#include <errno.h>
+#include "cescal/log.h"
+#include "cescal/parser.h"
+#include "cescal/state.h"
+#include "cescal/lexer.h"
+
+/* Symbolic token */
+#define symtok(tok) \
+ "[" tok "]"
+
+/* Quoted token */
+#define qtok(tok) \
+ "'" tok "'"
+
+/* Convert token to string */
+#define tokstr1(tt) \
+ toktab[(tt)]
+
+/* Convert token to string */
+#define tokstr(tok) \
+ toktab[(tok)->type]
+
+/*
+ * Converts numeric tokens into human readable strings
+ */
+static const char *toktab[] = {
+ [TT_NONE] = symtok("none"),
+ [TT_IDENT] = symtok("ident"),
+ [TT_INTLIT] = symtok("number"),
+ [TT_LPAREN] = qtok("("),
+ [TT_RPAREN] = qtok(")"),
+ [TT_COMMA] = qtok(","),
+ [TT_RETURN] = qtok("return"),
+ [TT_PUB] = qtok("pub"),
+ [TT_PROC] = qtok("proc"),
+ [TT_BEGIN] = qtok("begin"),
+ [TT_END] = qtok("end")
+};
+
+int
+parser_parse(struct cescal_state *state)
+{
+ struct token tok;
+
+ if (state == NULL) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ while (lexer_nom(state, &tok) == 0) {
+ cc_trace("got token %s\n", tokstr(&tok));
+ }
+
+ return 0;
+}
diff --git a/include/cescal/lexer.h b/include/cescal/lexer.h
new file mode 100644
index 0000000..7d4a9b4
--- /dev/null
+++ b/include/cescal/lexer.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2026, Chloe M.
+ * Provided under the BSD-3 clause
+ */
+
+#ifndef CESCAL_LEXER_H
+#define CESCAL_LEXER_H 1
+
+#include <stdint.h>
+#include <stddef.h>
+#include "cescal/token.h"
+#include "cescal/state.h"
+
+/*
+ * Consume a single token from the input source file
+ *
+ * @state: Compiler state
+ * @res: Token result is written here
+ *
+ * Returns zero on success
+ */
+int lexer_nom(struct cescal_state *state, struct token *res);
+
+#endif /* !CESCAL_LEXER_H */
diff --git a/include/cescal/parser.h b/include/cescal/parser.h
new file mode 100644
index 0000000..c97006b
--- /dev/null
+++ b/include/cescal/parser.h
@@ -0,0 +1,20 @@
+/*
+ * Copyright (c) 2026, Chloe M.
+ * Provided under the BSD-3 clause
+ */
+
+#ifndef CESCAL_PARSER_H
+#define CESCAL_PARSER_H 1
+
+#include "cescal/state.h"
+
+/*
+ * Begin parsing the input source file
+ *
+ * @state: Compiler state
+ *
+ * Returns zero on success
+ */
+int parser_parse(struct cescal_state *state);
+
+#endif /* !CESCAL_PARSER_H */
diff --git a/include/cescal/token.h b/include/cescal/token.h
new file mode 100644
index 0000000..0be8ff0
--- /dev/null
+++ b/include/cescal/token.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2026, Chloe M.
+ * Provided under the BSD-3 clause
+ */
+
+#ifndef CESCAL_TOKEN_H
+#define CESCAL_TOKEN_H 1
+
+/*
+ * Represents valid source file token types
+ */
+typedef enum {
+ TT_NONE, /* [none] */
+ TT_IDENT, /* [identifier] */
+ TT_INTLIT, /* [0-9]+ */
+ TT_LPAREN, /* '(' */
+ TT_RPAREN, /* '( */
+ TT_COMMA, /* ',' */
+ TT_RETURN, /* 'return' */
+ TT_PUB, /* 'pub' */
+ TT_PROC, /* 'proc' */
+ TT_BEGIN, /* 'begin' */
+ TT_END, /* 'end' */
+} tt_t;
+
+/*
+ * Represents a source file token
+ *
+ * @type: Token type
+ */
+struct token {
+ tt_t type;
+ union {
+ char c;
+ };
+};
+
+#endif /* !CESCAL_TOKEN_H */