Files
quip/frontend/lexer.c
2026-03-21 08:32:40 -04:00

177 lines
3.2 KiB
C

/*
* Copyright (c) 2026, Mirocom Laboratories
* Provided under the BSD-3 clause
*
* Abstract:
* This file implements the lexer.
* Author:
* Ian M. Moffett <ian@mirocom.org>
*/
#include <sys/types.h>
#include <stddef.h>
#include <stdbool.h>
#include <unistd.h>
#include <errno.h>
#include <ctype.h>
#include "frontend/token.h"
#include "frontend/lexer.h"
/*
* Test if a given character counts as a whitespace character
*
* @c: Character to test
*/
static inline bool
lexer_is_ws(char c)
{
switch (c) {
case '\t':
case '\f':
case '\a':
case ' ':
return true;
}
return false;
}
/*
* Place a character in the lexer putback buffer
*
* @state: Quip state
* @c: Character to insert
*/
static inline void
lexer_putback(struct quip_state *state, char c)
{
if (state == NULL) {
return;
}
state->lex_putback = c;
}
/*
* Pop the last character from the lexer putback
* buffer
*/
static inline char
lexer_putback_pop(struct quip_state *state)
{
char c;
if (state == NULL) {
return '\0';
}
c = state->lex_putback;
state->lex_putback = '\0';
return c;
}
/*
* Consume a single character in a buffered manner from the
* build source file.
*
* @state: Quip state machine
*
* Returns the fetched character on success, otherwise a value
* of '\0' on failure.
*/
static char
lexer_buffer_consume(struct quip_state *state)
{
ssize_t n;
if (state == NULL) {
return '\0';
}
/*
* If there is nothing in the lexer-side buffer, fill it
* with what we can.
*/
if (state->lex_buf_cap == 0) {
n = read(state->in_fd, state->lex_buf, LEX_FILEBUF_LEN);
if (n <= 0)
return '\0';
state->lex_buf_cap = n;
}
/* Grab a single character if not empty */
if (state->lex_buf_i < state->lex_buf_cap) {
return state->lex_buf[state->lex_buf_i++];
}
/* Empty, reset everything and try again */
state->lex_buf_cap = 0;
state->lex_buf_i = 0;
return lexer_buffer_consume(state);
}
/*
* Consume a single character optionally skipping whitespace
*
* @state: Quip state machine
* @skip_ws: If true, skip whitespace
*/
static char
lexer_consume(struct quip_state *state, bool skip_ws)
{
char c;
/*
* If there is anything in the putback buffer, take
* it.
*/
if ((c = lexer_putback_pop(state)) != '\0') {
if (!skip_ws || !lexer_is_ws(c))
return c;
}
while ((c = lexer_buffer_consume(state)) != '\0') {
if (skip_ws && lexer_is_ws(c)) {
continue;
}
return c;
}
return '\0';
}
int
lexer_scan(struct quip_state *state, struct token *tokres)
{
char c;
if (state == NULL || tokres == NULL) {
return -1;
}
if ((c = lexer_consume(state, true)) == '\0') {
return -1;
}
switch (c) {
case '\n':
tokres->type = TT_NEWLINE;
tokres->c = c;
return 0;
case ':':
tokres->c = c;
if ((c = lexer_consume(state, false)) == ':') {
tokres->type = TT_COLONDUB;
return 0;
}
lexer_putback(state, c);
tokres->type = TT_COLON;
return 0;
}
return -1;
}