From b6703209345d0bda86b4e71b46be06bfff5d8687 Mon Sep 17 00:00:00 2001 From: Ian Moffett Date: Sun, 15 Feb 2026 16:48:03 -0500 Subject: [PATCH] lexer: Add token for numbers Signed-off-by: Ian Moffett --- core/lexer.c | 81 +++++++++++++++++++++++++++++++++++++++++++++++ core/parser.c | 1 + inc/rifle/token.h | 2 ++ 3 files changed, 84 insertions(+) diff --git a/core/lexer.c b/core/lexer.c index 06e0e73..8d7d09e 100644 --- a/core/lexer.c +++ b/core/lexer.c @@ -283,6 +283,82 @@ lexer_assert_preproc(struct rifle_state *state, struct token *tok) return -1; } +/* + * Scan for a series of numbers + * + * @state: Compiler state + * @lc: Last character + * @tok: Token result + */ +static int +lexer_scan_num(struct rifle_state *state, int lc, struct token *tok) +{ + char c, buf[22]; + char prefix = '\0'; + size_t buf_ind; + uint8_t radix; + + if (state == NULL || tok == NULL) { + return -1; + } + + if (!isdigit(lc)) { + return -1; + } + + /* Obtain the prefix if any */ + if (lc == '0') { + if ((prefix = lexer_nom(state, false)) == '\0') + return -1; + if ((lc = lexer_nom(state, false)) == '\0') + return -1; + if (!isxdigit(lc)) + return -1; + } + + /* + * Determine the radix based on the prefix + * + * 'x'): Base-16 + * 'o'): Base-8 + */ + switch (prefix) { + case 'x': + radix = 16; + break; + case 'o': + radix = 8; + break; + default: + radix = 10; + } + + buf_ind = 0; + buf[buf_ind++] = lc; + + for (;;) { + c = lexer_nom(state, true); + if (c == '\0') { + return -1; + } + + if (!isxdigit(c) && c != '_') { + buf[buf_ind] = '\0'; + break; + } + + buf[buf_ind++] = c; + if (buf_ind >= sizeof(buf) - 1) { + trace_error(state, "value exceeds width of u64\n"); + return -1; + } + } + + tok->type = TT_NUMBER; + tok->v = strtoll(buf, NULL, radix); + return 0; +} + int lexer_scan(struct rifle_state *state, struct token *res) { @@ -351,6 +427,11 @@ lexer_scan(struct rifle_state *state, struct token *res) return 0; default: + if (isdigit(c)) { + if (lexer_scan_num(state, c, res) == 0) + return 0; + } + if (lexer_scan_ident(state, c, res) == 0) { lexer_check_kw(state, res); return 0; diff --git a/core/parser.c b/core/parser.c index 92f65bc..f6ac7d7 100644 --- a/core/parser.c +++ b/core/parser.c @@ -46,6 +46,7 @@ static const char *toktab[] = { [TT_NONE] = symtok("none"), [TT_IDENT] = symtok("ident"), + [TT_NUMBER] = symtok("number"), [TT_PLUS] = qtok("+"), [TT_MINUS] = qtok("-"), [TT_STAR] = qtok("*"), diff --git a/inc/rifle/token.h b/inc/rifle/token.h index 4b14f23..a1150cf 100644 --- a/inc/rifle/token.h +++ b/inc/rifle/token.h @@ -7,6 +7,7 @@ typedef enum { TT_NONE, /* */ TT_IDENT, /* */ + TT_NUMBER, /* */ TT_PLUS, /* '+' */ TT_MINUS, /* '-' */ TT_STAR, /* '*' */ @@ -36,6 +37,7 @@ struct token { union { char c; char *s; + ssize_t v; }; };