diff --git a/example/ref.quip b/example/ref.quip new file mode 100644 index 0000000..a37e400 --- /dev/null +++ b/example/ref.quip @@ -0,0 +1,19 @@ +.cc clang +.ld lld-link + +CFLAGS :: + -Iinclude/ + -mgeneral-regs-only + -ffreestanding + $CONF # expands to -D chain +~ + +.obiter echo $OBJ +.obiter objcopy -O binary $OBJ $OBJ.bin +.oblink + +# configuration section +# these get + +conf FOO : true +conf BAR : 123 diff --git a/frontend/lexer.c b/frontend/lexer.c new file mode 100644 index 0000000..fab31e7 --- /dev/null +++ b/frontend/lexer.c @@ -0,0 +1,127 @@ +/* + * Copyright (c) 2026, Mirocom Laboratories + * Provided under the BSD-3 clause + * + * Abstract: + * This file implements the lexer. + * Author: + * Ian M. Moffett + */ + +#include +#include +#include +#include +#include +#include +#include "frontend/token.h" +#include "frontend/lexer.h" + +/* + * Test if a given character counts as a whitespace character + * + * @c: Character to test + */ +static inline bool +lexer_is_ws(char c) +{ + switch (c) { + case '\t': + case '\f': + case '\a': + case ' ': + return true; + } + + return false; +} + +/* + * Consume a single character in a buffered manner from the + * build source file. + * + * @state: Quip state machine + * + * Returns the fetched character on success, otherwise a value + * of '\0' on failure. + */ +static char +lexer_buffer_consume(struct quip_state *state) +{ + ssize_t n; + + if (state == NULL) { + return '\0'; + } + + /* + * If there is nothing in the lexer-side buffer, fill it + * with what we can. + */ + if (state->lex_buf_cap == 0) { + n = read(state->in_fd, state->lex_buf, LEX_FILEBUF_LEN); + if (n <= 0) + return '\0'; + + state->lex_buf_cap = n; + } + + /* Grab a single character if not empty */ + if (state->lex_buf_i < state->lex_buf_cap) { + return state->lex_buf[state->lex_buf_i++]; + } + + /* Empty, reset everything and try again */ + state->lex_buf_cap = 0; + state->lex_buf_i = 0; + return lexer_buffer_consume(state); +} + +/* + * Consume a single character optionally skipping whitespace + * + * @state: Quip state machine + * @skip_ws: If true, skip whitespace + */ +static char +lexer_consume(struct quip_state *state, bool skip_ws) +{ + char c; + + while ((c = lexer_buffer_consume(state)) != '\0') { + if (skip_ws && lexer_is_ws(c)) { + continue; + } + + return c; + } + + return '\0'; +} + +int +lexer_scan(struct quip_state *state, struct token *tokres) +{ + char c; + + if (state == NULL || tokres == NULL) { + return -1; + } + + if ((c = lexer_consume(state, true)) == '\0') { + return -1; + } + + switch (c) { + case '\n': + tokres->type = TT_NEWLINE; + tokres->c = c; + return 0; + case ':': + tokres->type = TT_COLON; + tokres->c = c; + return 0; + } + + return -1; +} diff --git a/include/common/knobs.h b/include/common/knobs.h index cc1bf59..c9a9639 100644 --- a/include/common/knobs.h +++ b/include/common/knobs.h @@ -11,7 +11,11 @@ #ifndef COMMON_KNOBS_H #define COMMON_KNOBS_H 1 +/* Core settings */ #define QUIP_VERSION "0.0.1" #define QUIP_FILEPATH "build.quip" +/* Buffering settings */ +#define LEX_FILEBUF_LEN 16 + #endif /* !_COMMON_KNOBS_H_ */ diff --git a/include/common/state.h b/include/common/state.h index d7639bb..34d3036 100644 --- a/include/common/state.h +++ b/include/common/state.h @@ -13,16 +13,23 @@ #include #include +#include "common/knobs.h" /* * Represents the build state machine * - * @in_fd: Input file descriptor of build file - * @line_num: Current line number + * @in_fd: Input file descriptor of build file + * @line_num: Current line number + * @lex_buf: Used to reduce system call frequency + * @lex_buf_cap: Lexer buffer capacity + * @lex_buf_i: Lexer buffer index */ struct quip_state { int in_fd; size_t line_num; + char lex_buf[LEX_FILEBUF_LEN]; + size_t lex_buf_cap; + size_t lex_buf_i; }; /* diff --git a/include/frontend/lexer.h b/include/frontend/lexer.h new file mode 100644 index 0000000..c440ef0 --- /dev/null +++ b/include/frontend/lexer.h @@ -0,0 +1,19 @@ +#ifndef FRONTEND_LEXER_H +#define FRONTEND_LEXER_H 1 + +#include +#include +#include "frontend/token.h" +#include "common/state.h" + +/* + * Scan for a single token within the source file + * + * @state: Quip state + * @tokres: Token result is written here + * + * Returns zero on success + */ +int lexer_scan(struct quip_state *state, struct token *tokres); + +#endif /* !FRONTEND_LEXER_H */ diff --git a/include/frontend/token.h b/include/frontend/token.h new file mode 100644 index 0000000..a8ebc39 --- /dev/null +++ b/include/frontend/token.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2026, Mirocom Laboratories + * Provided under the BSD-3 clause + * + * Abstract: + * This file defines the list of valid tokens. + * Author: + * Ian M. Moffett + */ + +#ifndef FRONTEND_TOKEN_H +#define FRONTEND_TOKEN_H 1 + +/* + * Represents valid token types + */ +typedef enum { + TT_NONE, /* [none] */ + TT_NAME, /* [name] */ + TT_NEWLINE, /* [newline] */ + TT_COLON, /* ':' */ +} tt_t; + +/* + * Represents a lexical token + * + * @type: Token type + */ +struct token { + tt_t type; + union { + char c; + }; +}; + +#endif /* !FRONTEND_TOKEN_H */