361 lines
6.7 KiB
C
361 lines
6.7 KiB
C
#include <stdint.h>
|
|
#include <stddef.h>
|
|
#include <stdbool.h>
|
|
#include <stdlib.h>
|
|
#include <ctype.h>
|
|
#include <string.h>
|
|
#include <unistd.h>
|
|
#include "rifle/lexer.h"
|
|
#include "rifle/trace.h"
|
|
|
|
/*
|
|
* Returns true if the given input character is a
|
|
* whitespace character
|
|
*/
|
|
static inline bool
|
|
lexer_is_ws(char c)
|
|
{
|
|
switch (c) {
|
|
case '\n':
|
|
case '\t':
|
|
case '\r':
|
|
case '\f':
|
|
case ' ':
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
/*
|
|
* Place a given character into the putback buffer
|
|
*
|
|
* @state: Compiler state
|
|
* @c: Character to insert
|
|
*/
|
|
static inline void
|
|
lexer_putback(struct rifle_state *state, char c)
|
|
{
|
|
if (state == NULL) {
|
|
return;
|
|
}
|
|
|
|
state->putback = c;
|
|
}
|
|
|
|
/*
|
|
* Pop a character from the putback buffer
|
|
*
|
|
* @state: Compiler state
|
|
*
|
|
* Returns '\0' on failure
|
|
*/
|
|
static inline char
|
|
lexer_putback_pop(struct rifle_state *state)
|
|
{
|
|
char c;
|
|
|
|
if (state == NULL) {
|
|
return '\0';
|
|
}
|
|
|
|
c = state->putback;
|
|
state->putback = '\0';
|
|
return c;
|
|
}
|
|
|
|
/*
|
|
* Consume a single byte from the input source file
|
|
* while skipping any whitespace characters.
|
|
*
|
|
* @state: Compiler state
|
|
* @keep_ws: If true, preserve whitespace
|
|
*
|
|
* Returns the consumed character on success, otherwise
|
|
* '\0' upon failure.
|
|
*/
|
|
static char
|
|
lexer_nom(struct rifle_state *state, bool keep_ws)
|
|
{
|
|
char c;
|
|
|
|
if (state == NULL) {
|
|
return '\0';
|
|
}
|
|
|
|
/*
|
|
* If there is any data in the putback buffer then
|
|
* we shall take it.
|
|
*/
|
|
if ((c = lexer_putback_pop(state)) != '\0') {
|
|
if (keep_ws && lexer_is_ws(c))
|
|
return c;
|
|
if ((!lexer_is_ws(c)))
|
|
return c;
|
|
}
|
|
|
|
while (read(state->in_fd, &c, 1) > 0) {
|
|
if (c == '\n') {
|
|
++state->line_num;
|
|
}
|
|
|
|
if (lexer_is_ws(c) && !keep_ws) {
|
|
continue;
|
|
}
|
|
|
|
return c;
|
|
}
|
|
|
|
return '\0';
|
|
}
|
|
|
|
/*
|
|
* Scan for an identifier in the input source file
|
|
*
|
|
* @state: Compiler state
|
|
* @lc: Last character
|
|
* @tok: Last token
|
|
*
|
|
* Returns zero on success
|
|
*/
|
|
static int
|
|
lexer_scan_ident(struct rifle_state *state, int lc, struct token *tok)
|
|
{
|
|
char *buf, c;
|
|
size_t bufcap, bufsz;
|
|
|
|
if (state == NULL || tok == NULL) {
|
|
return -1;
|
|
}
|
|
|
|
bufcap = 8;
|
|
bufsz = 0;
|
|
if ((buf = malloc(bufcap)) == NULL) {
|
|
return -1;
|
|
}
|
|
|
|
if (!isalnum(lc) && lc != '.' && lc != '_') {
|
|
return -1;
|
|
}
|
|
|
|
buf[bufsz++] = lc;
|
|
for (;;) {
|
|
c = lexer_nom(state, true);
|
|
if (c == '\0') {
|
|
return -1;
|
|
}
|
|
|
|
if (!isalnum(c) && c != '.' && c != '_') {
|
|
lexer_putback(state, c);
|
|
buf[bufsz] = '\0';
|
|
break;
|
|
}
|
|
|
|
buf[bufsz++] = c;
|
|
if (bufsz >= bufcap - 1) {
|
|
bufcap += 8;
|
|
buf = realloc(buf, bufcap);
|
|
}
|
|
|
|
if (buf == NULL) {
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
tok->type = TT_IDENT;
|
|
tok->s = ptrbox_strdup(&state->ptrbox, buf);
|
|
free(buf);
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Check a identifier and potentially override it if it
|
|
* counts as a directive.
|
|
*
|
|
* @state: Compiler state
|
|
* @tok: Token
|
|
*/
|
|
static void
|
|
lexer_check_direc(struct rifle_state *state, struct token *tok)
|
|
{
|
|
if (state == NULL || tok == NULL) {
|
|
return;
|
|
}
|
|
|
|
if (tok->type != TT_IDENT) {
|
|
return;
|
|
}
|
|
|
|
/* Check the character after the '.' prefix */
|
|
switch (tok->s[1]) {
|
|
case 'f':
|
|
if (strcmp(tok->s, ".f") == 0) {
|
|
tok->type = TT_F;
|
|
return;
|
|
}
|
|
|
|
break;
|
|
case 'e':
|
|
if (strcmp(tok->s, ".extern") == 0) {
|
|
tok->type = TT_EXTERN;
|
|
return;
|
|
}
|
|
|
|
break;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Check a identifier and potentially override it if it
|
|
* counts as a reserved keyword.
|
|
*
|
|
* @state: Compiler state
|
|
* @tok: Token
|
|
*/
|
|
static void
|
|
lexer_check_kw(struct rifle_state *state, struct token *tok)
|
|
{
|
|
if (state == NULL || tok == NULL) {
|
|
return;
|
|
}
|
|
|
|
if (tok->type != TT_IDENT) {
|
|
return;
|
|
}
|
|
|
|
switch (*tok->s) {
|
|
case '.':
|
|
lexer_check_direc(state, tok);
|
|
break;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Assert that the given identifier token is actually a preprocessor directive.
|
|
* This is only used when a '#' has been encountered before the identifier.
|
|
*
|
|
* @state: Compiler state
|
|
* @tok: Last token
|
|
*
|
|
* Returns zero on success
|
|
*/
|
|
static int
|
|
lexer_assert_preproc(struct rifle_state *state, struct token *tok)
|
|
{
|
|
if (state == NULL || tok == NULL) {
|
|
return -1;
|
|
}
|
|
|
|
if (tok->type != TT_IDENT) {
|
|
return -1;
|
|
}
|
|
|
|
switch (*tok->s) {
|
|
case 'd':
|
|
if (strcmp(tok->s, "define") == 0) {
|
|
tok->type = TT_DEFINE;
|
|
return 0;
|
|
}
|
|
|
|
break;
|
|
case 'i':
|
|
if (strcmp(tok->s, "ifdef") == 0) {
|
|
tok->type = TT_IFDEF;
|
|
return 0;
|
|
}
|
|
|
|
if (strcmp(tok->s, "ifndef") == 0) {
|
|
tok->type = TT_IFNDEF;
|
|
return 0;
|
|
}
|
|
|
|
break;
|
|
case 'e':
|
|
if (strcmp(tok->s, "endif") == 0) {
|
|
tok->type = TT_ENDIF;
|
|
return 0;
|
|
}
|
|
|
|
break;
|
|
}
|
|
|
|
trace_error(state, "bad preprocessor directive\n");
|
|
return -1;
|
|
}
|
|
|
|
int
|
|
lexer_scan(struct rifle_state *state, struct token *res)
|
|
{
|
|
char c;
|
|
|
|
if (state == NULL || res == NULL) {
|
|
return -1;
|
|
}
|
|
|
|
if ((c = lexer_nom(state, false)) == '\0') {
|
|
return -1;
|
|
}
|
|
|
|
switch (c) {
|
|
case '+':
|
|
res->type = TT_PLUS;
|
|
res->c = c;
|
|
return 0;
|
|
case '-':
|
|
res->type = TT_MINUS;
|
|
res->c = c;
|
|
return 0;
|
|
case '*':
|
|
res->type = TT_STAR;
|
|
res->c = c;
|
|
return 0;
|
|
case '/':
|
|
res->type = TT_SLASH;
|
|
res->c = c;
|
|
return 0;
|
|
case ':':
|
|
res->type = TT_COLON;
|
|
res->c = c;
|
|
return 0;
|
|
case '(':
|
|
res->type = TT_LPAREN;
|
|
res->c = c;
|
|
return 0;
|
|
case ')':
|
|
res->type = TT_RPAREN;
|
|
res->c = c;
|
|
return 0;
|
|
case '{':
|
|
res->type = TT_LBRACE;
|
|
res->c = c;
|
|
return 0;
|
|
case '}':
|
|
res->type = TT_RBRACE;
|
|
res->c = c;
|
|
return 0;
|
|
case '#':
|
|
if ((c = lexer_nom(state, false)) == '\0') {
|
|
trace_error(state, "unexpected end of file\n");
|
|
return -1;
|
|
}
|
|
|
|
/* Assert that we have a preprocessor directive */
|
|
if (lexer_scan_ident(state, c, res) == 0) {
|
|
if (lexer_assert_preproc(state, res) < 0)
|
|
return -1;
|
|
}
|
|
|
|
return 0;
|
|
default:
|
|
if (lexer_scan_ident(state, c, res) == 0) {
|
|
lexer_check_kw(state, res);
|
|
return 0;
|
|
}
|
|
|
|
trace_error(state, "unexpected token '%c'\n", c);
|
|
break;
|
|
}
|
|
|
|
return -1;
|
|
}
|