177 lines
3.2 KiB
C
177 lines
3.2 KiB
C
/*
|
|
* Copyright (c) 2026, Mirocom Laboratories
|
|
* Provided under the BSD-3 clause
|
|
*
|
|
* Abstract:
|
|
* This file implements the lexer.
|
|
* Author:
|
|
* Ian M. Moffett <ian@mirocom.org>
|
|
*/
|
|
|
|
#include <sys/types.h>
|
|
#include <stddef.h>
|
|
#include <stdbool.h>
|
|
#include <unistd.h>
|
|
#include <errno.h>
|
|
#include <ctype.h>
|
|
#include "frontend/token.h"
|
|
#include "frontend/lexer.h"
|
|
|
|
/*
|
|
* Test if a given character counts as a whitespace character
|
|
*
|
|
* @c: Character to test
|
|
*/
|
|
static inline bool
|
|
lexer_is_ws(char c)
|
|
{
|
|
switch (c) {
|
|
case '\t':
|
|
case '\f':
|
|
case '\a':
|
|
case ' ':
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
/*
|
|
* Place a character in the lexer putback buffer
|
|
*
|
|
* @state: Quip state
|
|
* @c: Character to insert
|
|
*/
|
|
static inline void
|
|
lexer_putback(struct quip_state *state, char c)
|
|
{
|
|
if (state == NULL) {
|
|
return;
|
|
}
|
|
|
|
state->lex_putback = c;
|
|
}
|
|
|
|
/*
|
|
* Pop the last character from the lexer putback
|
|
* buffer
|
|
*/
|
|
static inline char
|
|
lexer_putback_pop(struct quip_state *state)
|
|
{
|
|
char c;
|
|
|
|
if (state == NULL) {
|
|
return '\0';
|
|
}
|
|
|
|
c = state->lex_putback;
|
|
state->lex_putback = '\0';
|
|
return c;
|
|
}
|
|
|
|
/*
|
|
* Consume a single character in a buffered manner from the
|
|
* build source file.
|
|
*
|
|
* @state: Quip state machine
|
|
*
|
|
* Returns the fetched character on success, otherwise a value
|
|
* of '\0' on failure.
|
|
*/
|
|
static char
|
|
lexer_buffer_consume(struct quip_state *state)
|
|
{
|
|
ssize_t n;
|
|
|
|
if (state == NULL) {
|
|
return '\0';
|
|
}
|
|
|
|
/*
|
|
* If there is nothing in the lexer-side buffer, fill it
|
|
* with what we can.
|
|
*/
|
|
if (state->lex_buf_cap == 0) {
|
|
n = read(state->in_fd, state->lex_buf, LEX_FILEBUF_LEN);
|
|
if (n <= 0)
|
|
return '\0';
|
|
|
|
state->lex_buf_cap = n;
|
|
}
|
|
|
|
/* Grab a single character if not empty */
|
|
if (state->lex_buf_i < state->lex_buf_cap) {
|
|
return state->lex_buf[state->lex_buf_i++];
|
|
}
|
|
|
|
/* Empty, reset everything and try again */
|
|
state->lex_buf_cap = 0;
|
|
state->lex_buf_i = 0;
|
|
return lexer_buffer_consume(state);
|
|
}
|
|
|
|
/*
|
|
* Consume a single character optionally skipping whitespace
|
|
*
|
|
* @state: Quip state machine
|
|
* @skip_ws: If true, skip whitespace
|
|
*/
|
|
static char
|
|
lexer_consume(struct quip_state *state, bool skip_ws)
|
|
{
|
|
char c;
|
|
|
|
/*
|
|
* If there is anything in the putback buffer, take
|
|
* it.
|
|
*/
|
|
if ((c = lexer_putback_pop(state)) != '\0') {
|
|
if (!skip_ws || !lexer_is_ws(c))
|
|
return c;
|
|
}
|
|
|
|
while ((c = lexer_buffer_consume(state)) != '\0') {
|
|
if (skip_ws && lexer_is_ws(c)) {
|
|
continue;
|
|
}
|
|
|
|
return c;
|
|
}
|
|
|
|
return '\0';
|
|
}
|
|
|
|
int
|
|
lexer_scan(struct quip_state *state, struct token *tokres)
|
|
{
|
|
char c;
|
|
|
|
if (state == NULL || tokres == NULL) {
|
|
return -1;
|
|
}
|
|
|
|
if ((c = lexer_consume(state, true)) == '\0') {
|
|
return -1;
|
|
}
|
|
|
|
switch (c) {
|
|
case '\n':
|
|
tokres->type = TT_NEWLINE;
|
|
tokres->c = c;
|
|
return 0;
|
|
case ':':
|
|
tokres->c = c;
|
|
if ((c = lexer_consume(state, false)) == ':') {
|
|
tokres->type = TT_COLONDUB;
|
|
return 0;
|
|
}
|
|
|
|
lexer_putback(state, c);
|
|
tokres->type = TT_COLON;
|
|
return 0;
|
|
}
|
|
|
|
return -1;
|
|
}
|