Files
rifle/core/parser.c
2026-02-24 22:32:54 -05:00

807 lines
15 KiB
C

#include <stdint.h>
#include <stddef.h>
#include <stdbool.h>
#include "rifle/parser.h"
#include "rifle/token.h"
#include "rifle/trace.h"
#include "rifle/lexer.h"
#include "rifle/state.h"
#include "rifle/symbol.h"
#include "rifle/types.h"
#include "rifle/ast.h"
#include "rifle/scope.h"
#include "rifle/codegen.h"
/* Symbolic token */
#define symtok(tok) \
"[" tok "]"
/* Quoted token */
#define qtok(tok) \
"'" tok "'"
/* Convert token to string */
#define tokstr1(tt) \
toktab[(tt)]
/* Convert token to string */
#define tokstr(tok) \
tokstr1((tok)->type)
/* Unexpected end of file */
#define ueof(state) \
trace_error( \
(state), \
"unexpected end of file\n" \
);
#define utok1(state, tok) \
trace_error( \
(state), \
"unexpected token %s\n", \
tokstr(tok) \
)
/* Unexpected token */
#define utok(state, expt, got) \
trace_error( \
(state), \
"expected %s, got %s instead\n", \
(expt), \
(got) \
)
/*
* Table used to convert token constants into human
* readable strings
*/
static const char *toktab[] = {
[TT_NONE] = symtok("none"),
[TT_IDENT] = symtok("ident"),
[TT_NUMBER] = symtok("number"),
[TT_PLUS] = qtok("+"),
[TT_MINUS] = qtok("-"),
[TT_STAR] = qtok("*"),
[TT_SLASH] = qtok("/"),
[TT_COLON] = qtok(":"),
[TT_LPAREN] = qtok("("),
[TT_RPAREN] = qtok(")"),
[TT_LBRACE] = qtok("{"),
[TT_RBRACE] = qtok("}"),
[TT_SEMI] = qtok(";"),
[TT_COMMA] = qtok(","),
[TT_F] = qtok(".f"),
[TT_STRUCT] = qtok(".struct"),
[TT_EXTERN] = qtok(".extern"),
[TT_PUB] = qtok(".pub"),
[TT_RETURN] = qtok("return"),
[TT_BREAK] = qtok("break"),
[TT_LOOP] = qtok("loop"),
[TT_VOID] = qtok("void"),
[TT_U8] = qtok("u8"),
[TT_U16] = qtok("u16"),
[TT_U32] = qtok("u32"),
[TT_U64] = qtok("u64"),
[TT_DEFINE] = qtok("#define"),
[TT_IFDEF] = qtok("#ifdef"),
[TT_IFNDEF] = qtok("#ifndef"),
[TT_ENDIF] = qtok("#endif")
};
/*
* Parse-side token scan function
*
* @state: Compiler state
* @tok: Last token
*
* Returns zero on success
*/
static int
parse_scan(struct rifle_state *state, struct token *tok)
{
struct token *popped;
if (state == NULL || tok == NULL) {
return -1;
}
switch (state->pass_num) {
case 0:
if (lexer_scan(state, tok) < 0) {
return -1;
}
break;
case 1:
if ((popped = tokbuf_pop(&state->tokbuf)) == NULL) {
return -1;
}
/*
* The parser stage is what handles the incrementation of the line
* number counter. The preprocessor does not filter out newlines for
* this reason and therefore we are supposed to do it ourselves.
*/
while (popped->type == TT_NEWLINE) {
++state->line_num;
if ((popped = tokbuf_pop(&state->tokbuf)) == NULL)
return -1;
}
*tok = *popped;
break;
}
return 0;
}
/*
* Assert that the next token is of a specific type
*
* @state: Compiler state
* @what: Token to assert that is next
* @tok: Last token
*
* Returns zero on success
*/
static int
parse_expect(struct rifle_state *state, tt_t what, struct token *tok)
{
if (state == NULL || tok == NULL) {
return -1;
}
if (parse_scan(state, tok) < 0) {
ueof(state);
return -1;
}
if (tok->type != what) {
utok(state, tokstr1(what), tokstr(tok));
return -1;
}
return 0;
}
static int
parse_define(struct rifle_state *state, struct token *tok)
{
int error;
if (state == NULL || tok == NULL) {
return -1;
}
if (tok->type != TT_DEFINE) {
return -1;
}
/* EXPECT <IDENT> */
if (parse_expect(state, TT_IDENT, tok) < 0) {
return -1;
}
error = symbol_new(
&state->symtab,
tok->s,
SYMBOL_MACRO,
NULL
);
if (error < 0) {
trace_error(state, "failed to create symbol '%s'\n", tok->s);
return -1;
}
return 0;
}
/*
* Skip to an '#endif' token
*
* @state: Compiler state
* @tok: Last token
*
* Returns zero on success
*/
static int
parse_skip_to_endif(struct rifle_state *state, struct token *tok)
{
if (state == NULL || tok == NULL) {
return -1;
}
while (parse_scan(state, tok) == 0) {
if (tok->type == TT_ENDIF) {
break;
}
}
if (tok->type == TT_ENDIF) {
--state->ifx_depth;
}
return 0;
}
/*
* Parse an '#ifdef' preprocessing directive
*
* @state: Compiler state
* @tok: Last token
*
* Returns zero on success
*/
static int
parse_ifdef(struct rifle_state *state, struct token *tok)
{
struct symbol *symbol;
if (state == NULL || tok == NULL) {
return -1;
}
if (tok->type != TT_IFDEF) {
return -1;
}
/* EXPECT <IDENT> */
if (parse_expect(state, TT_IDENT, tok) < 0) {
return -1;
}
symbol = symbol_from_name(
&state->symtab,
tok->s
);
if (symbol == NULL) {
parse_skip_to_endif(state, tok);
}
return 0;
}
/*
* Handle an '#ifndef' directive
*
* @state: Compiler state
* @tok: Last token
*
* Returns zero on success
*/
static int
parse_ifndef(struct rifle_state *state, struct token *tok)
{
struct symbol *symbol;
if (state == NULL || tok == NULL) {
return -1;
}
if (tok->type != TT_IFNDEF) {
return -1;
}
if (parse_expect(state, TT_IDENT, tok) < 0) {
return -1;
}
symbol = symbol_from_name(
&state->symtab,
tok->s
);
if (symbol != NULL) {
parse_skip_to_endif(state, tok);
}
return 0;
}
/*
* Handle the preprocessing stage
*
* @state: Compiler state
*/
static int
parse_preprocess(struct rifle_state *state)
{
struct token tok;
while (parse_scan(state, &tok) == 0) {
switch (tok.type) {
case TT_DEFINE:
if (parse_define(state, &tok) < 0) {
return -1;
}
break;
case TT_IFDEF:
++state->ifx_depth;
if (parse_ifdef(state, &tok) < 0) {
return -1;
}
break;
case TT_IFNDEF:
++state->ifx_depth;
if (parse_ifndef(state, &tok) < 0) {
return -1;
}
break;
case TT_ENDIF:
if (state->ifx_depth == 0) {
trace_error(state, "extraneous '#endif' directive\n");
return -1;
}
--state->ifx_depth;
break;
default:
tokbuf_push(&state->tokbuf, &tok);
break;
}
}
return 0;
}
/*
* Parse a type definition
*
* @state: Compiler state
* @tok: Last token
* @res: Data type result
*
* Returns zero on success
*/
static int
parse_type(struct rifle_state *state, struct token *tok, struct data_type *res)
{
if (state == NULL || tok == NULL){
return -1;
}
res->ptr_depth = 0;
res->type = token_to_data_type(tok->type);
if (res->type == DATA_TYPE_BAD) {
utok(state, "TYPE", tokstr(tok));
return -1;
}
return 0;
}
/*
* Parse a lbrace
*
* @state: Compiler state
* @scope: Scope token type
* @tok: Last token
*
* Returns zero on success
*/
static int
parse_lbrace(struct rifle_state *state, tt_t scope, struct token *tok)
{
if (state == NULL || tok == NULL) {
return -1;
}
/* EXPECT '{' */
if (parse_expect(state, TT_LBRACE, tok) < 0) {
return -1;
}
if (scope_push(state, scope) < 0) {
return -1;
}
return 0;
}
/*
* Parse a function
*
* @state: Compiler state
* @tok: Last token
* @res: AST node
*
* Returns zero on success
*/
static int
parse_func(struct rifle_state *state, struct token *tok, struct ast_node **res)
{
struct token *prevtok;
struct ast_node *root;
struct symbol *symbol;
bool is_pub = false;
int error;
if (state == NULL || tok == NULL) {
return -1;
}
if (res == NULL) {
return -1;
}
if (tok->type != TT_F) {
return -1;
}
if (state->cur_func != NULL) {
trace_error(state, "nested functions not supported\n");
return -1;
}
/* Is this marked as public? */
if ((prevtok = tokbuf_lookbehind(&state->tokbuf, 1)) != NULL) {
if (prevtok->type == TT_PUB)
is_pub = true;
}
/* EXPECT <IDENT> */
if (parse_expect(state, TT_IDENT, tok) < 0) {
return -1;
}
error = symbol_new(
&state->symtab,
tok->s,
SYMBOL_FUNC,
&symbol
);
if (error < 0) {
trace_error(state, "failed to create symbol '%s'\n", tok->s);
return error;
}
if (ast_alloc_node(state, AST_FUNC, &root) < 0) {
trace_error(state, "failed to allocate AST_FUNC\n");
return -1;
}
symbol->pub = is_pub;
root->symbol = symbol;
/* EXPECT '(' */
if (parse_expect(state, TT_LPAREN, tok) < 0) {
return -1;
}
/* EXPECT ')' : TODO: ARGUMENTS */
if (parse_expect(state, TT_RPAREN, tok) < 0) {
return -1;
}
/* EXPECT ':' */
if (parse_expect(state, TT_COLON, tok) < 0) {
return -1;
}
if (parse_scan(state, tok) < 0) {
ueof(state);
return -1;
}
/* Parse the return type */
if (parse_type(state, tok, &symbol->dtype) < 0) {
return -1;
}
/* Expect '{' */
if (parse_lbrace(state, TT_F, tok) < 0) {
return -1;
}
state->cur_func = symbol;
*res = root;
return 0;
}
/*
* Parse a '{' token
*
* @state: Compiler state
* @tok: Last token
* @res: AST result
*/
static int
parse_rbrace(struct rifle_state *state, struct token *tok, struct ast_node **res)
{
struct ast_node *root = NULL;
tt_t scope;
if (state == NULL || tok == NULL) {
return -1;
}
if (res == NULL) {
return -1;
}
if (tok->type != TT_RBRACE) {
return -1;
}
scope = scope_pop(state);
switch (scope) {
case TT_F:
state->cur_func = NULL;
/* Don't double return */
if (state->have_ret) {
state->have_ret = 0;
break;
}
if (ast_alloc_node(state, AST_FUNC, &root) < 0) {
trace_error(state, "failed to allocate AST_FUNC\n");
return -1;
}
root->epilogue = 1;
break;
case TT_LOOP:
if (ast_alloc_node(state, AST_LOOP, &root) < 0) {
trace_error(state, "failed to allocate AST_LOOP\n");
return -1;
}
root->epilogue = 1;
break;
default:
break;
}
*res = root;
return 0;
}
/*
* Parse a value
*
* @state: Compiler state
* @tok: Last token
* @res: AST result
*
* Returns zero on success
*/
static int
parse_value(struct rifle_state *state, struct token *tok, struct ast_node **res)
{
struct ast_node *root;
if (state == NULL || tok == NULL) {
return -1;
}
if (res == NULL) {
return -1;
}
switch (tok->type) {
case TT_NUMBER:
if (ast_alloc_node(state, AST_NUMBER, &root) < 0) {
trace_error(state, "failed to allocate AST_NUMBER\n");
return -1;
}
root->v = tok->v;
*res = root;
return 0;
default:
utok(state, symtok("number"), tokstr(tok));
break;
}
return -1;
}
/*
* Parse a binary expression
*
* @state: Compiler state
* @tok: Last token
* @res: AST result
*
* Returns zero on success
*/
static int
parse_binexpr(struct rifle_state *state, struct token *tok, struct ast_node **res)
{
struct ast_node *root;
if (state == NULL || tok == NULL) {
return -1;
}
if (res == NULL) {
return -1;
}
if (parse_value(state, tok, &root) < 0) {
return -1;
}
/* TODO: Allow actual binary expressions */
*res = root;
return 0;
}
/*
* Parse a 'return' token
*
* @state: Compiler state
* @tok: Last token
* @res: AST result
*
* Returns zero on success
*/
static int
parse_return(struct rifle_state *state, struct token *tok, struct ast_node **res)
{
struct ast_node *root, *rhs;
if (state == NULL || tok == NULL) {
return -1;
}
if (res == NULL) {
return -1;
}
if (state->cur_func == NULL) {
trace_error(state, "'return' not in function\n");
return -1;
}
if (parse_scan(state, tok) < 0) {
ueof(state);
return -1;
}
if (parse_binexpr(state, tok, &rhs) < 0) {
return -1;
}
if (ast_alloc_node(state, AST_RETURN, &root) < 0) {
trace_error(state, "failed to allocate AST_RETURN\n");
return -1;
}
if (parse_expect(state, TT_SEMI, tok) < 0) {
return -1;
}
state->have_ret = 1;
root->right = rhs;
*res = root;
return 0;
}
/*
* Parse a loop statement
*
* @state: Compiler state
* @tok: Last token
* @res: AST result
*
* Returns zero on success
*/
static int
parse_loop(struct rifle_state *state, struct token *tok, struct ast_node **res)
{
struct ast_node *root;
if (state == NULL || tok == NULL) {
return -1;
}
if (res == NULL) {
return -1;
}
if (tok->type != TT_LOOP) {
return -1;
}
if (parse_lbrace(state, TT_LOOP, tok) < 0) {
return -1;
}
if (ast_alloc_node(state, AST_LOOP, &root) < 0) {
trace_error(state, "failed to allocate AST_LOOP\n");
return -1;
}
*res = root;
return 0;
}
static int
parse_begin(struct rifle_state *state)
{
struct token tok;
struct ast_node *root = NULL;
while (parse_scan(state, &tok) == 0) {
switch (tok.type) {
case TT_F:
if (parse_func(state, &tok, &root) < 0) {
return -1;
}
break;
case TT_RBRACE:
if (parse_rbrace(state, &tok, &root) < 0) {
return -1;
}
break;
case TT_RETURN:
if (parse_return(state, &tok, &root) < 0) {
return -1;
}
break;
case TT_LOOP:
if (parse_loop(state, &tok, &root) < 0) {
return -1;
}
break;
case TT_PUB:
break;
default:
utok1(state, &tok);
return -1;
}
if (root != NULL) {
if (cg_resolve_node(state, root) < 0)
return -1;
}
}
return 0;
}
int
parser_parse(struct rifle_state *state)
{
if (state == NULL) {
return 0;
}
switch (state->pass_num) {
case 0:
/* Pre-processor */
if (parse_preprocess(state) < 0) {
return -1;
}
if (state->ifx_depth > 0) {
trace_error(state, "unterminated '#if' directive\n");
return -1;
}
break;
case 1:
/* Parse loop */
if (parse_begin(state) < 0) {
return -1;
}
break;
}
++state->pass_num;
return 0;
}