Files
rifle/core/lexer.c
2026-02-15 16:31:46 -05:00

365 lines
6.8 KiB
C

#include <stdint.h>
#include <stddef.h>
#include <stdbool.h>
#include <stdlib.h>
#include <ctype.h>
#include <string.h>
#include <unistd.h>
#include "rifle/lexer.h"
#include "rifle/trace.h"
/*
* Returns true if the given input character is a
* whitespace character
*/
static inline bool
lexer_is_ws(char c)
{
switch (c) {
case '\n':
case '\t':
case '\r':
case '\f':
case ' ':
return true;
}
return false;
}
/*
* Place a given character into the putback buffer
*
* @state: Compiler state
* @c: Character to insert
*/
static inline void
lexer_putback(struct rifle_state *state, char c)
{
if (state == NULL) {
return;
}
state->putback = c;
}
/*
* Pop a character from the putback buffer
*
* @state: Compiler state
*
* Returns '\0' on failure
*/
static inline char
lexer_putback_pop(struct rifle_state *state)
{
char c;
if (state == NULL) {
return '\0';
}
c = state->putback;
state->putback = '\0';
return c;
}
/*
* Consume a single byte from the input source file
* while skipping any whitespace characters.
*
* @state: Compiler state
* @keep_ws: If true, preserve whitespace
*
* Returns the consumed character on success, otherwise
* '\0' upon failure.
*/
static char
lexer_nom(struct rifle_state *state, bool keep_ws)
{
char c;
if (state == NULL) {
return '\0';
}
/*
* If there is any data in the putback buffer then
* we shall take it.
*/
if ((c = lexer_putback_pop(state)) != '\0') {
if (keep_ws && lexer_is_ws(c))
return c;
if ((!lexer_is_ws(c)))
return c;
}
while (read(state->in_fd, &c, 1) > 0) {
if (c == '\n') {
++state->line_num;
}
if (lexer_is_ws(c) && !keep_ws) {
continue;
}
return c;
}
return '\0';
}
/*
* Scan for an identifier in the input source file
*
* @state: Compiler state
* @lc: Last character
* @tok: Last token
*
* Returns zero on success
*/
static int
lexer_scan_ident(struct rifle_state *state, int lc, struct token *tok)
{
char *buf, c;
size_t bufcap, bufsz;
if (state == NULL || tok == NULL) {
return -1;
}
bufcap = 8;
bufsz = 0;
if ((buf = malloc(bufcap)) == NULL) {
return -1;
}
if (!isalnum(lc) && lc != '.' && lc != '_') {
return -1;
}
buf[bufsz++] = lc;
for (;;) {
c = lexer_nom(state, true);
if (c == '\0') {
return -1;
}
if (!isalnum(c) && c != '.' && c != '_') {
lexer_putback(state, c);
buf[bufsz] = '\0';
break;
}
buf[bufsz++] = c;
if (bufsz >= bufcap - 1) {
bufcap += 8;
buf = realloc(buf, bufcap);
}
if (buf == NULL) {
return -1;
}
}
tok->type = TT_IDENT;
tok->s = ptrbox_strdup(&state->ptrbox, buf);
free(buf);
return 0;
}
/*
* Check a identifier and potentially override it if it
* counts as a directive.
*
* @state: Compiler state
* @tok: Token
*/
static void
lexer_check_direc(struct rifle_state *state, struct token *tok)
{
if (state == NULL || tok == NULL) {
return;
}
if (tok->type != TT_IDENT) {
return;
}
/* Check the character after the '.' prefix */
switch (tok->s[1]) {
case 'f':
if (strcmp(tok->s, ".f") == 0) {
tok->type = TT_F;
return;
}
break;
case 'e':
if (strcmp(tok->s, ".extern") == 0) {
tok->type = TT_EXTERN;
return;
}
break;
}
}
/*
* Check a identifier and potentially override it if it
* counts as a reserved keyword.
*
* @state: Compiler state
* @tok: Token
*/
static void
lexer_check_kw(struct rifle_state *state, struct token *tok)
{
if (state == NULL || tok == NULL) {
return;
}
if (tok->type != TT_IDENT) {
return;
}
switch (*tok->s) {
case '.':
lexer_check_direc(state, tok);
break;
}
}
/*
* Assert that the given identifier token is actually a preprocessor directive.
* This is only used when a '#' has been encountered before the identifier.
*
* @state: Compiler state
* @tok: Last token
*
* Returns zero on success
*/
static int
lexer_assert_preproc(struct rifle_state *state, struct token *tok)
{
if (state == NULL || tok == NULL) {
return -1;
}
if (tok->type != TT_IDENT) {
return -1;
}
switch (*tok->s) {
case 'd':
if (strcmp(tok->s, "define") == 0) {
tok->type = TT_DEFINE;
return 0;
}
break;
case 'i':
if (strcmp(tok->s, "ifdef") == 0) {
tok->type = TT_IFDEF;
return 0;
}
if (strcmp(tok->s, "ifndef") == 0) {
tok->type = TT_IFNDEF;
return 0;
}
break;
case 'e':
if (strcmp(tok->s, "endif") == 0) {
tok->type = TT_ENDIF;
return 0;
}
break;
}
trace_error(state, "bad preprocessor directive\n");
return -1;
}
int
lexer_scan(struct rifle_state *state, struct token *res)
{
char c;
if (state == NULL || res == NULL) {
return -1;
}
if ((c = lexer_nom(state, false)) == '\0') {
return -1;
}
switch (c) {
case '+':
res->type = TT_PLUS;
res->c = c;
return 0;
case '-':
res->type = TT_MINUS;
res->c = c;
return 0;
case '*':
res->type = TT_STAR;
res->c = c;
return 0;
case '/':
res->type = TT_SLASH;
res->c = c;
return 0;
case ':':
res->type = TT_COLON;
res->c = c;
return 0;
case '(':
res->type = TT_LPAREN;
res->c = c;
return 0;
case ')':
res->type = TT_RPAREN;
res->c = c;
return 0;
case '{':
res->type = TT_LBRACE;
res->c = c;
return 0;
case '}':
res->type = TT_RBRACE;
res->c = c;
return 0;
case ';':
res->type = TT_SEMI;
res->c = c;
return 0;
case '#':
if ((c = lexer_nom(state, false)) == '\0') {
trace_error(state, "unexpected end of file\n");
return -1;
}
/* Assert that we have a preprocessor directive */
if (lexer_scan_ident(state, c, res) == 0) {
if (lexer_assert_preproc(state, res) < 0)
return -1;
}
return 0;
default:
if (lexer_scan_ident(state, c, res) == 0) {
lexer_check_kw(state, res);
return 0;
}
trace_error(state, "unexpected token '%c'\n", c);
break;
}
return -1;
}