#include #include #include #include #include #include #include #include "rifle/lexer.h" #include "rifle/trace.h" /* * Returns true if the given input character is a * whitespace character */ static inline bool lexer_is_ws(char c) { switch (c) { case '\n': case '\t': case '\r': case '\f': case ' ': return true; } return false; } /* * Place a given character into the putback buffer * * @state: Compiler state * @c: Character to insert */ static inline void lexer_putback(struct rifle_state *state, char c) { if (state == NULL) { return; } state->putback = c; } /* * Pop a character from the putback buffer * * @state: Compiler state * * Returns '\0' on failure */ static inline char lexer_putback_pop(struct rifle_state *state) { char c; if (state == NULL) { return '\0'; } c = state->putback; state->putback = '\0'; return c; } /* * Consume a single byte from the input source file * while skipping any whitespace characters. * * @state: Compiler state * @keep_ws: If true, preserve whitespace * * Returns the consumed character on success, otherwise * '\0' upon failure. */ static char lexer_nom(struct rifle_state *state, bool keep_ws) { char c; if (state == NULL) { return '\0'; } /* * If there is any data in the putback buffer then * we shall take it. */ if ((c = lexer_putback_pop(state)) != '\0') { if (keep_ws && lexer_is_ws(c)) return c; if ((!lexer_is_ws(c))) return c; } while (read(state->in_fd, &c, 1) > 0) { if (c == '\n') { ++state->line_num; } if (lexer_is_ws(c) && !keep_ws) { continue; } return c; } return '\0'; } /* * Scan for an identifier in the input source file * * @state: Compiler state * @lc: Last character * @tok: Last token * * Returns zero on success */ static int lexer_scan_ident(struct rifle_state *state, int lc, struct token *tok) { char *buf, c; size_t bufcap, bufsz; if (state == NULL || tok == NULL) { return -1; } bufcap = 8; bufsz = 0; if ((buf = malloc(bufcap)) == NULL) { return -1; } if (!isalnum(lc) && lc != '.' && lc != '_') { return -1; } buf[bufsz++] = lc; for (;;) { c = lexer_nom(state, true); if (c == '\0') { return -1; } if (!isalnum(c) && c != '.' && c != '_') { lexer_putback(state, c); buf[bufsz] = '\0'; break; } buf[bufsz++] = c; if (bufsz >= bufcap - 1) { bufcap += 8; buf = realloc(buf, bufcap); } if (buf == NULL) { return -1; } } tok->type = TT_IDENT; tok->s = ptrbox_strdup(&state->ptrbox, buf); free(buf); return 0; } /* * Check a identifier and potentially override it if it * counts as a directive. * * @state: Compiler state * @tok: Token */ static void lexer_check_direc(struct rifle_state *state, struct token *tok) { if (state == NULL || tok == NULL) { return; } if (tok->type != TT_IDENT) { return; } /* Check the character after the '.' prefix */ switch (tok->s[1]) { case 'f': if (strcmp(tok->s, ".f") == 0) { tok->type = TT_F; return; } break; case 'e': if (strcmp(tok->s, ".extern") == 0) { tok->type = TT_EXTERN; return; } break; } } /* * Check a identifier and potentially override it if it * counts as a reserved keyword. * * @state: Compiler state * @tok: Token */ static void lexer_check_kw(struct rifle_state *state, struct token *tok) { if (state == NULL || tok == NULL) { return; } if (tok->type != TT_IDENT) { return; } switch (*tok->s) { case '.': lexer_check_direc(state, tok); break; } } /* * Assert that the given identifier token is actually a preprocessor directive. * This is only used when a '#' has been encountered before the identifier. * * @state: Compiler state * @tok: Last token * * Returns zero on success */ static int lexer_assert_preproc(struct rifle_state *state, struct token *tok) { if (state == NULL || tok == NULL) { return -1; } if (tok->type != TT_IDENT) { return -1; } switch (*tok->s) { case 'd': if (strcmp(tok->s, "define") == 0) { tok->type = TT_DEFINE; return 0; } break; case 'i': if (strcmp(tok->s, "ifdef") == 0) { tok->type = TT_IFDEF; return 0; } if (strcmp(tok->s, "ifndef") == 0) { tok->type = TT_IFNDEF; return 0; } break; case 'e': if (strcmp(tok->s, "endif") == 0) { tok->type = TT_ENDIF; return 0; } break; } trace_error(state, "bad preprocessor directive\n"); return -1; } int lexer_scan(struct rifle_state *state, struct token *res) { char c; if (state == NULL || res == NULL) { return -1; } if ((c = lexer_nom(state, false)) == '\0') { return -1; } switch (c) { case '+': res->type = TT_PLUS; res->c = c; return 0; case '-': res->type = TT_MINUS; res->c = c; return 0; case '*': res->type = TT_STAR; res->c = c; return 0; case '/': res->type = TT_SLASH; res->c = c; return 0; case ':': res->type = TT_COLON; res->c = c; return 0; case '(': res->type = TT_LPAREN; res->c = c; return 0; case ')': res->type = TT_RPAREN; res->c = c; return 0; case '{': res->type = TT_LBRACE; res->c = c; return 0; case '}': res->type = TT_RBRACE; res->c = c; return 0; case '#': if ((c = lexer_nom(state, false)) == '\0') { trace_error(state, "unexpected end of file\n"); return -1; } /* Assert that we have a preprocessor directive */ if (lexer_scan_ident(state, c, res) == 0) { if (lexer_assert_preproc(state, res) < 0) return -1; } return 0; default: if (lexer_scan_ident(state, c, res) == 0) { lexer_check_kw(state, res); return 0; } trace_error(state, "unexpected token '%c'\n", c); break; } return -1; }