From 84f45606e5e83078db532e4e3f58dff7e30f1bb2 Mon Sep 17 00:00:00 2001 From: "Michael D. Lowis" Date: Wed, 7 Jul 2021 16:55:03 -0400 Subject: [PATCH] started integrating AST generation. Null pointer making its way into printing or generation. haven't tracked it down yet --- cerise/inc/cerise.h | 41 +------ cerise/src/ast.c | 43 ++++++- cerise/src/grammar.c | 272 +++++++----------------------------------- cerise/tests/Module.m | 7 +- 4 files changed, 94 insertions(+), 269 deletions(-) diff --git a/cerise/inc/cerise.h b/cerise/inc/cerise.h index 246b261..def3378 100644 --- a/cerise/inc/cerise.h +++ b/cerise/inc/cerise.h @@ -232,44 +232,6 @@ void codegen_return(Parser* p, Item* item); void codegen_index(Parser* p, Item* array, Item* index); void codegen_field(Parser* p, Item* record, char* name); -enum { - IR_MODE_CONST, - IR_MODE_VAR, - IR_MODE_TYPE, - IR_MODE_INFO, -}; - -typedef struct Operation { -// struct Operation* next; - int code : 28; - int mode : 4; - Type* type; - union { - Operand args[3]; - struct { - size_t nargs; - size_t margs; - long* args; - } call; - } u; -} Operation; - -typedef struct { - size_t nops; - size_t mops; - Operation* ops; -} Block; - -long ir_startblock(Parser* p); -void ir_endblock(Parser* p); -void ir_rawop(Parser* p, Operation* op); -void ir_binopi(Parser* p, int op, long dest, long arg, long imm); -void ir_binopf(Parser* p, int op, long dest, long arg, double imm); -void ir_unnopi(Parser* p, int op, long dest, long imm); -void ir_unopf(Parser* p, int op, long dest, double imm); - - - /* Abstract Syntax Tree Definition *****************************************************************************/ struct AstNode; @@ -296,6 +258,8 @@ typedef struct { } AstValue; AstNode* ast_new(int type, AstNode* l0, AstNode* l1, AstNode* l2); +AstNode* ast_ident(long long index); +AstNode* ast_bool(bool val); AstNode* ast_int(long long val); AstNode* ast_real(double val); AstNode* ast_binop(int op, AstNode* left, AstNode* right); @@ -304,3 +268,4 @@ AstNode* ast_block(void); AstNode* ast_call(AstNode* func); AstNode* ast_if(AstNode* cond, AstNode* br1, AstNode* br2); AstNode* ast_return(AstNode* expr); +void ast_print(AstNode* expr); diff --git a/cerise/src/ast.c b/cerise/src/ast.c index 6732513..40e7735 100644 --- a/cerise/src/ast.c +++ b/cerise/src/ast.c @@ -27,6 +27,13 @@ AstNode* ast_ident(long long index) return (AstNode*)node; } +AstNode* ast_bool(bool val) +{ + AstValue* node = (AstValue*)ast_new(BOOL, NULL, NULL, NULL); + node->val.i = val; + return (AstNode*)node; +} + AstNode* ast_int(long long val) { AstValue* node = (AstValue*)ast_new(INT, NULL, NULL, NULL); @@ -37,17 +44,20 @@ AstNode* ast_int(long long val) AstNode* ast_real(double val) { AstValue* node = (AstValue*)ast_new(REAL, NULL, NULL, NULL); - node->val.i = val; + node->val.f = val; return (AstNode*)node; } AstNode* ast_binop(int op, AstNode* left, AstNode* right) { + assert(left); + assert(right); return ast_new(op, left, right, NULL); } AstNode* ast_unop(int op, AstNode* operand) { + assert(operand); return ast_new(op, operand, NULL, NULL); } @@ -59,6 +69,7 @@ AstNode* ast_block(void) void ast_block_add(AstNode* func) { /* TODO: append to linked list */ + (void)func; } AstNode* ast_call(AstNode* func) @@ -69,6 +80,7 @@ AstNode* ast_call(AstNode* func) void ast_call_add(AstNode* func) { /* TODO: append to linked list */ + (void)func; } AstNode* ast_if(AstNode* cond, AstNode* br1, AstNode* br2) @@ -83,5 +95,32 @@ AstNode* ast_return(AstNode* expr) void ast_print(AstNode* node) { - /* TODO: append to linked list */ + assert(node); + switch(node->hdr.code) + { + case BOOL: + printf("B:%lld", ((AstValue*)node)->val.i); + break; + + case INT: + printf("I:%lld", ((AstValue*)node)->val.i); + break; + + case REAL: + printf("R:%f", ((AstValue*)node)->val.f); + break; + + default: + if (node->links[1]) + { + printf("(binop)"); + } + else + { + printf("(%d ", node->hdr.code); + ast_print(node->links[0]); + printf(")"); + } + break; + } } diff --git a/cerise/src/grammar.c b/cerise/src/grammar.c index 3554da5..21ea4dd 100644 --- a/cerise/src/grammar.c +++ b/cerise/src/grammar.c @@ -17,205 +17,6 @@ #define EXIT_RULE() ((void)0) #endif -/* Basic Blocks and Operations - *****************************************************************************/ -#define MAX_OPS (256) -static size_t NumOps = 0; -static Operation* Ops[MAX_OPS] = {0}; - -static void print_op(Operation* op) -{ - switch (op->code) - { - case INT: - case BOOL: - printf(" $%lld = %lld\n", op->u.args[0].i, op->u.args[1].i); - break; - - case REAL: - printf(" $%lld = %f\n", op->u.args[0].i, op->u.args[1].f); - break; - - default: - printf(" \n", op->code); - break; - } -} - -/* Intermediate Representation - *****************************************************************************/ -//void codegen_startproc(Parser* p, Symbol* func); -//void codegen_endproc(Parser* p); -//void codegen_unop(Parser* p, int op, Item* a); -//void codegen_binop(Parser* p, int op, Item* a, Item* b); -//void codegen_store(Parser* p, Item* a, Item* b); -//void codegen_if(Parser* p, Item* item); -//void codegen_else(Parser* p, Item* item); -//void codegen_endif(Parser* p, long elsifs, Item* item); -//void codegen_prepcall(Parser* p, Item* item); -//void codegen_call(Parser* p, Item* item, Item* args); -//void codegen_setarg(Parser* p, Item* item, bool firstarg); -//void codegen_return(Parser* p, Item* item); -//void codegen_index(Parser* p, Item* array, Item* index); -//void codegen_field(Parser* p, Item* record, char* name); - -static Operation* ir_pop(void) -{ - assert(NumOps > 0); - return Ops[--NumOps]; -} - -static void ir_push(Operation* op) -{ - assert(NumOps < MAX_OPS); - Ops[NumOps++] = op; -} - -static void ir_bool(Parser* p, bool val) -{ - Operation* op = calloc(1, sizeof(Operation)); - op->code = BOOL; - op->mode = IR_MODE_CONST; - op->type = &BoolType; - op->u.args[0].i = p->curr_reg++; - op->u.args[1].i = (long)val; - ir_push(op); -} - -static void ir_int(Parser* p, long val) -{ - Operation* op = calloc(1, sizeof(Operation)); - op->code = INT; - op->mode = IR_MODE_CONST; - op->type = &IntType; - op->u.args[0].i = p->curr_reg++; - op->u.args[1].i = val; - ir_push(op); -} - -static void ir_real(Parser* p, double val) -{ - Operation* op = calloc(1, sizeof(Operation)); - op->code = REAL; - op->mode = IR_MODE_CONST; - op->type = &RealType; - op->u.args[0].i = p->curr_reg++; - op->u.args[1].f = val; - ir_push(op); -} - -static void ir_getconst(Parser* p, Symbol* sym) -{ - Operation* op = ir_pop(); - print_op(op); - if (op->mode != IR_MODE_CONST) - { - error(p, "Constant definition is non-constant"); - } - sym->type = op->type; - sym->imm = op->u.args[1]; -} - -static void ir_unop(Parser* p, int op) -{ - Operation* a = ir_pop(); - if (a->mode == IR_MODE_CONST) - { - if (a->code == INT) - { - switch (op) - { - case '+': a->u.args[1].i = +a->u.args[1].i; break; - case '-': a->u.args[1].i = -a->u.args[1].i; break; - default: - error(p, "invalid unary operation on integer value"); - break; - } - } - else if (a->code == REAL) - { - switch (op) - { - case '+': a->u.args[1].f = +a->u.args[1].f; break; - case '-': a->u.args[1].f = -a->u.args[1].f; break; - default: - error(p, "invalid unary operation on real value"); - break; - } - } - else if (a->code == BOOL) - { - switch (op) - { - case NOT: a->u.args[1].i = !a->u.args[1].i; break; - default: - error(p, "invalid unary operation on boolean value"); - break; - } - } - } - else - { - error(p, "unimplemented unary operation"); - } - ir_push(a); -} - -void ir_load(Parser* p, char* name) -{ - size_t id = symbol_getid(p, name, -1); - Symbol* sym = symbol_getbyid(p, id); - - Operation* op = calloc(1, sizeof(Operation)); - op->type = sym->type; - switch (sym->class) - { - case SYM_CONST: - op->mode = IR_MODE_CONST; - op->u.args[0].i = p->curr_reg++; - op->u.args[1] = sym->imm; - if (sym->type == &IntType) - { - op->code = INT; - } - else if (sym->type == &RealType) - { - op->code = REAL; - } - else if (sym->type == &BoolType) - { - op->code = BOOL; - } - else - { - error(p, "constant definition has unexpected type"); - } - break; - - case SYM_VAR: - op->code = VAR; - op->mode = IR_MODE_VAR; - op->u.args[0].i = p->curr_reg++; - op->u.args[1].i = id; - break; - - case SYM_PROC: - op->code = VAR; - op->mode = IR_MODE_VAR; - op->u.args[0].i = p->curr_reg++; - op->u.args[1].i = id; - break; - - case SYM_TYPE: - op->code = TYPE; - op->mode = IR_MODE_INFO; - break; - - default: - break; - } - ir_push(op); -} /* Item Handling *****************************************************************************/ @@ -251,14 +52,16 @@ Field* add_field(Parser* p, Type* type, char* name, bool export) /* Grammar Definition *****************************************************************************/ -static void expression(Parser* p); +static AstNode* expression(Parser* p); -static void qualident(Parser* p) +static AstNode* qualident(Parser* p) { ENTER_RULE(); + AstNode* expr = ast_ident(0); char* name = expect_text(p, IDENT); - ir_load(p, name); + (void)name; /* TODO: get real index of variable */ +// ir_load(p, name); // if (accept(p, '.')) // { @@ -266,12 +69,15 @@ static void qualident(Parser* p) // } EXIT_RULE(); + return expr; } -static void designator(Parser* p) +static AstNode* designator(Parser* p) { ENTER_RULE(); - qualident(p); + + AstNode* expr = qualident(p); + // /* selector */ // for (int done = 0; !done;) // { @@ -323,48 +129,47 @@ static void designator(Parser* p) // break; // } // } + EXIT_RULE(); + return expr; } -static void factor(Parser* p) +static AstNode* factor(Parser* p) { ENTER_RULE(); + + AstNode* expr = NULL; + switch ((int)peek(p)->type) { case INT: - ir_int(p, peek(p)->value.integer); + expr = ast_int(peek(p)->value.integer); consume(p); break; case REAL: - ir_real(p, peek(p)->value.floating); + expr = ast_real(peek(p)->value.floating); consume(p); break; -// case STRING: -// codegen_setstr(item, peek(p)->text); -// consume(p); -// break; - case BOOL: - ir_bool(p, peek(p)->value.integer); + expr = ast_bool(peek(p)->value.integer); consume(p); break; case '(': expect(p, '('); - expression(p); + expr = expression(p); expect(p, ')'); break; case NOT: consume(p); - factor(p); - ir_unop(p, NOT); + expr = ast_unop(NOT, factor(p)); break; case IDENT: - designator(p); + expr = designator(p); // if (accept(p, '(')) // { // Symbol* proc = symbol_get(p, item->imm.s, SYM_PROC); @@ -412,15 +217,17 @@ static void factor(Parser* p) printf("unknown factor: %d\n", peek(p)->type); break; } + EXIT_RULE(); + return expr; } -static void term(Parser* p) +static AstNode* term(Parser* p) { ENTER_RULE(); - factor(p); + AstNode* expr = factor(p); // while (matches_oneof(p, (int[]){'*', '/', '%', AND, 0})) // { @@ -448,22 +255,25 @@ static void term(Parser* p) // } EXIT_RULE(); + return expr; } -static void simple_expr(Parser* p) +static AstNode* simple_expr(Parser* p) { ENTER_RULE(); + AstNode* expr; + /* first term and +/- */ if (matches_oneof(p, (int[]){'+', '-', 0})) { int op = consume(p); // OP - term(p); - ir_unop(p, op); + AstNode* operand = term(p); + expr = ast_unop(op, operand); } else { - term(p); + expr = term(p); } // /* optional second term and op */ @@ -483,15 +293,18 @@ static void simple_expr(Parser* p) // codegen_binop(p, op, item, &right); // } + return expr; + EXIT_RULE(); } -static void expression(Parser* p) +static AstNode* expression(Parser* p) { ENTER_RULE(); + // int ops[] = { EQ, NEQ, '<', LTEQ, '>', GTEQ, IS, 0 }; - simple_expr(p); + AstNode* expr = simple_expr(p); // if (matches_oneof(p, ops)) // { @@ -502,6 +315,8 @@ static void expression(Parser* p) // codegen_binop(p, op, item, &right); // } + return expr; + EXIT_RULE(); } @@ -680,8 +495,11 @@ static void const_decl(Parser* p) export = accept(p, '*'); sym = symbol_new(p, 0, name, SYM_CONST, export); expect(p, '='); - expression(p); - ir_getconst(p, sym); + AstNode* expr = expression(p); + ast_print(expr); + puts(""); + (void)sym; /* TODO: put const value into symbol table */ +// ir_getconst(p, sym); // sym->imm = item->imm; // sym->type = item->type; } diff --git a/cerise/tests/Module.m b/cerise/tests/Module.m index 8d02658..192d9da 100644 --- a/cerise/tests/Module.m +++ b/cerise/tests/Module.m @@ -6,8 +6,11 @@ module Module const A = true - B = -42 - C = not A + B = 42 + C = 42.0 + D = -42 + E = not A +# F = B + 1 - 1 #type # TypeA = Int -- 2.49.0