core/lexer.c


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84

#include <errno.h>
#include <stdbool.h>
#include "cescal/lexer.h"
#include "cescal/log.h"

/*
 * Returns true if the given character is a whitespace
 *
 * @c: Character to check
 */
static inline bool
lexer_is_ws(char c)
{
    switch (c) {
    case '\t':
    case '\n':
    case ' ':
    case '\f':
    case '\r':
        return true;
    }

    return false;
}

/*
 * Consume a single character from the input source file and
 * optionally skip whitespace
 *
 * @state: Compiler state
 * @skip_ws: If true skip whitespace
 */
static char
lexer_consume_single(struct cescal_state *state, bool skip_ws)
{
    char c;

    if (state == NULL) {
        return '\0';
    }

    while ((c = readbuf_read(&state->rb, state->in_fd)) != '\0') {
        if (lexer_is_ws(c)) {
            continue;
        }

        return c;
    }

    return '\0';
}

int
lexer_nom(struct cescal_state *state, struct token *res)
{
    char c;

    if (state == NULL || res == NULL) {
        errno = EINVAL;
        return -1;
    }

    if ((c = lexer_consume_single(state, true)) == '\0') {
        return -1;
    }

    switch (c) {
    case '(':
        res->type = TT_LPAREN;
        res->c = c;
        return 0;
    case ')':
        res->type = TT_RPAREN;
        res->c = c;
        return 0;
    case ',':
        res->type = TT_COMMA;
        res->c = c;
        return 0;
    }

    cc_error("got unknown token '%c'\n", c);
    return -1;
}