From 8f35371230da0fbe9f77dcd5ec0ab04e4089973b Mon Sep 17 00:00:00 2001 From: "Michael D. Lowis" Date: Tue, 20 Apr 2021 15:28:35 -0400 Subject: [PATCH] started implementing item based transformation based on oberon0 compiler --- cerise/build.sh | 4 +- cerise/cerise.h | 53 +++--- cerise/codegen.c | 295 +++++++++++++++------------------- cerise/parser.c | 409 ++++++++++++++++++++++++++++------------------- 4 files changed, 400 insertions(+), 361 deletions(-) diff --git a/cerise/build.sh b/cerise/build.sh index b183a1d..ec4bcc0 100755 --- a/cerise/build.sh +++ b/cerise/build.sh @@ -1,6 +1,6 @@ #!/bin/sh ctags -R & cc -g -D CERISE_TESTS -Wall -Wextra --std=c99 -o cerisec-test *.c \ - && ./cerisec-test \ - && cc -g -Wall -Wextra -Werror --std=c99 -o cerisec *.c + && ./cerisec-test +# && cc -g -Wall -Wextra -Werror --std=c99 -o cerisec *.c [ $? -gt 0 ] && printf "\a" diff --git a/cerise/cerise.h b/cerise/cerise.h index 87e5f5c..36f9ce1 100644 --- a/cerise/cerise.h +++ b/cerise/cerise.h @@ -85,50 +85,55 @@ typedef struct LexFile { typedef struct { enum { - VAL_I8, VAL_I16, VAL_I32, VAL_I64, VAL_BOOL, VAL_REAL, VAL_STRING, VAL_NIL - } type; - union { - long long integer; - double floating; - char* text; - } value; -} Value; + FORM_BOOL, FORM_INT, FORM_REAL, FORM_STRING + } form; + int size; +} Type; typedef struct Symbol { struct Symbol* next; - char* name; enum{ SYM_CONST, SYM_VAR, SYM_TYPE, SYM_PROC - } type; + } class; + char* name; + Type* type; } Symbol; +typedef struct { + enum { + ITEM_CONST, ITEM_VAR + } mode; + Type* type; + union { + long long i; + double f; + char* s; + } imm; +} Item; + typedef struct { LexFile* done; LexFile* file; Tok tok; - Symbol globals; - Symbol locals; - int validx; - Value vals[1024]; + Symbol* scope; } Parser; void lexfile(Parser* ctx, char* path); void lex(Parser* ctx); void lexprintpos(Parser* p, FILE* file, Tok* tok); void gettoken(Parser* ctx); -void module(Parser* p); +void module(Parser* p, Item* item); /* Code Generation *****************************************************************************/ -void codegen_print(Parser* p); -void codegen_int(Parser* p, long long val); -void codegen_real(Parser* p, double val); -void codegen_string(Parser* p, char* val); -void codegen_bool(Parser* p, int val); -void codegen_nil(Parser* p); -void codegen_unop(Parser* p, int val); -void codegen_binop(Parser* p, int val); -void codegen_const(Parser* p, char* name, int export); + +extern Type BoolType, IntType, RealType, StringType; + +void codegen_setint(Item* item, Type* type, long long val); +void codegen_setreal(Item* item, double val); +void codegen_setstr(Item* item, char* val); +void codegen_not(Item* item); +void codegen_mul(Item* a, Item* b); /* Option Parsing *****************************************************************************/ diff --git a/cerise/codegen.c b/cerise/codegen.c index b83c125..48e527c 100644 --- a/cerise/codegen.c +++ b/cerise/codegen.c @@ -2,196 +2,155 @@ #include #include -static Value* peek_val(Parser* p) -{ - return &(p->vals[p->validx-1]); -} - -static void push_int(Parser* p, int type, long long val) -{ - Value* v = &(p->vals[p->validx++]); - v->type = type; - v->value.integer = val; -} - -static void push_float(Parser* p, double val) -{ - Value* v = &(p->vals[p->validx++]); - v->type = VAL_REAL; - v->value.floating = val; -} - -static void push_string(Parser* p, char* val) -{ - Value* v = &(p->vals[p->validx++]); - v->type = VAL_STRING; - v->value.text = val; -} - -static void push_nil(Parser* p) -{ - Value* v = &(p->vals[p->validx++]); - v->type = VAL_NIL; - v->value.integer = 0; -} +Type BoolType = { + .form = FORM_BOOL, + .size = sizeof(int) +}; + +Type IntType = { + .form = FORM_INT, + .size = sizeof(int) +}; + +Type RealType = { + .form = FORM_REAL, + .size = sizeof(double) +}; + +Type StringType = { + .form = FORM_STRING, + .size = -1 +}; + +void codegen_setint(Item* item, Type* type, long long val) +{ + if (type == &BoolType) + printf("%s ", val ? "true" : "false"); + else + printf("%lld ", val); -static void pop_val(Parser* p, Value* val) -{ - p->validx--; - *val = p->vals[p->validx]; + item->mode = ITEM_CONST; + item->type = type; + item->imm.i = val; } - -static int is_int(Value* val) +void codegen_setreal(Item* item, double val) { - return (val->type <= VAL_I64); -} - - - + printf("%f ", val); -void codegen_print(Parser* p) -{ - printf("DUMP: [ "); - for (int i = 0; i < p->validx; i++) + item->mode = ITEM_CONST; + item->type = &RealType; + item->imm.i = val; +} + +void codegen_setstr(Item* item, char* val) +{ + printf("'%s' ", val); + + item->mode = ITEM_CONST; + item->type = &StringType; + item->imm.s = val; +} + +/* I do not "get" this code + PROCEDURE MakeConstItem*(VAR x: Item; typ: Type; val: LONGINT); + BEGIN + x.mode := Const; + x.type := typ; + x.a := val + END MakeConstItem; + + PROCEDURE negated(cond: LONGINT): LONGINT; + BEGIN + IF cond < 8 THEN + cond := cond+8 + ELSE + cond := cond-8 + END ; + RETURN cond + END negated; + + PROCEDURE Not*(VAR x: Item); (* x := ~x *) + VAR t: LONGINT; + BEGIN + IF x.mode # Cond THEN loadCond(x) END ; + x.r := negated(x.r); + t := x.a; + x.a := x.b; + x.b := t + END Not; + + + PROCEDURE MulOp*(VAR x, y: Item); (* x := x * y *) + BEGIN + IF (x.mode = Const) & (y.mode = Const) THEN + x.a := x.a * y.a + ELSIF (y.mode = Const) & (y.a = 2) THEN + load(x); + Put1(Lsl, x.r, x.r, 1) + ELSIF y.mode = Const THEN + load(x); + Put1(Mul, x.r, x.r, y.a) + ELSIF x.mode = Const THEN + load(y); + Put1(Mul, y.r, y.r, x.a); + x.mode := Reg; + x.r := y.r + ELSE + load(x); + load(y); + Put0(Mul, RH-2, x.r, y.r); + DEC(RH); + x.r := RH-1 + END + END MulOp; + +*/ + +void codegen_not(Item* item) +{ + if (item->mode == ITEM_CONST) { - switch (p->vals[i].type) - { - case VAL_I8: - case VAL_I16: - case VAL_I32: - case VAL_I64: - case VAL_BOOL: - printf("%lld ", p->vals[i].value.integer); - break; - case VAL_REAL: - printf("%f ", p->vals[i].value.floating); - break; - case VAL_STRING: - printf("%s ", p->vals[i].value.text); - break; - case VAL_NIL: - printf("nil "); - break; - } + item->imm.i = !item->imm.i; + } + else + { + assert(!"not implemented"); } - printf("]\n"); } -void codegen_int(Parser* p, long long val) +void codegen_imul(Item* a, Item* b) { - printf("%lld ", val); - - if (CHAR_MIN <= val && CHAR_MAX >= val) + if ((b->mode == ITEM_CONST) && (b->mode == ITEM_CONST)) { - push_int(p, VAL_I8, val); + a->imm.i = a->imm.i * b->imm.i; } - else if (SHRT_MIN <= val && SHRT_MAX >= val) - { - push_int(p, VAL_I16, val); - } - else if (INT_MIN <= val && INT_MAX >= val) + else { - push_int(p, VAL_I32, val); + assert(!"not implemented"); } - else if (LONG_MIN <= val && LONG_MAX >= val) +} + +void codegen_fmul(Item* a, Item* b) +{ + if ((b->mode == ITEM_CONST) && (b->mode == ITEM_CONST)) { - push_int(p, VAL_I64, val); + a->imm.f = a->imm.f * b->imm.f; } else { -// printf("unknown unary operator %d\n", val); + assert(!"not implemented"); } } -void codegen_real(Parser* p, double val) -{ - printf("%f ", val); - push_float(p, val); -} - -void codegen_string(Parser* p, char* val) +void codegen_mul(Item* a, Item* b) { - printf("\"%s\" ", val); - push_string(p, val); -} - -void codegen_bool(Parser* p, int val) -{ - (void)p; - printf("%s ", val ? "true" : "false"); - push_int(p, VAL_BOOL, val); -} - -void codegen_nil(Parser* p) -{ - (void)p; - printf("nil "); - push_nil(p); -} - -void codegen_unop(Parser* p, int val) -{ - (void)p; - if (val < 256) { printf("(%c) ", val); } else { printf("(%d) ", val); } - Value* v = peek_val(p); - switch (val) + if (a->type->form == FORM_REAL) { - case '+': - if (is_int(v)) - { - v->value.integer = +v->value.integer; - } - else if (v->type == VAL_REAL) - { - v->value.floating = +v->value.floating; - } - else - { - assert(false); - } - break; - - case '-': - if (is_int(v)) - { - v->value.integer = -v->value.integer; - } - else if (v->type == VAL_REAL) - { - v->value.floating = -v->value.floating; - } - else - { - assert(false); - } - break; - - case NOT: - if (v->type == VAL_BOOL) - { - v->value.integer = !v->value.integer; - } - else - { - assert(false); - } - break; - - default: - printf("unknown unary operator %d\n", val); - break; + codegen_fmul(a,b); + } + else + { + codegen_imul(a,b); } -} - -void codegen_binop(Parser* p, int val) -{ - (void)p; - if (val < 256) { printf("(%c) ", val); } else { printf("(%d) ", val); } -} - -void codegen_const(Parser* p, char* name, int export) -{ - (void)p; - printf(" -> const %s%c\n", name, (export ? '*' : ' ')); } diff --git a/cerise/parser.c b/cerise/parser.c index 77dcd64..360c774 100644 --- a/cerise/parser.c +++ b/cerise/parser.c @@ -5,56 +5,17 @@ #ifdef TRACE static int Indent = 0; #define RULE(name) \ - void name##_actual(Parser* p); \ - void name(Parser* p) { \ + void name##_actual(Parser* p, Item* item); \ + void name(Parser* p, Item* item) { \ printf("%*c-> %s\n", ++Indent * 2, ' ', __func__); \ name##_actual(p); \ Indent--; } \ - void name##_actual(Parser* p) + void name##_actual(Parser* p, Item* item) #else #define RULE(name) \ - void name(Parser* p) + void name(Parser* p, Item* item) #endif -/* Precedence Table - *****************************************************************************/ -//enum { /* Define precedence levels(based on C) */ -// LVL_NONE, -// LVL_LITERAL, -// LVL_COMMA, -// LVL_ASSIGN, -// LVL_TERNARY, -// LVL_BOOL_OR, -// LVL_BOOL_AND, -// LVL_BITWISE_OR, -// LVL_BITWISE_XOR, -// LVL_BITWISE_AND, -// LVL_EQUALITY, -// LVL_RELATIONAL, -// LVL_BITSHIFT, -// LVL_ADD_SUB, -// LVL_MUL_DIV, -// LVL_PREFIX, -// LVL_POSTFIX, -//}; -// -//typedef struct { -// int level; -// AST* (*prefixfn)(Parser* p); -// AST* (*infixfn)(Parser* p, AST* left); -//} OpRule; -// -//OpRule PrecedenceTable[T_COUNT] = { -// [T_BOOL] = { .level = LVL_LITERAL, .prefixfn = literal, .infixfn = NULL }, -// [T_CHAR] = { .level = LVL_LITERAL, .prefixfn = literal, .infixfn = NULL }, -// [T_STRING] = { .level = LVL_LITERAL, .prefixfn = literal, .infixfn = NULL }, -// [T_INT] = { .level = LVL_LITERAL, .prefixfn = literal, .infixfn = NULL }, -// [T_FLOAT] = { .level = LVL_LITERAL, .prefixfn = literal, .infixfn = NULL }, -// [T_ID] = { .level = LVL_LITERAL, .prefixfn = literal, .infixfn = NULL }, -// ['('] = { .level = LVL_POSTFIX, .prefixfn = grouping, .infixfn = func_call }, -// ['.'] = { .level = LVL_POSTFIX, .prefixfn = NULL, .infixfn = dot_call }, -//}; - /* Parsing Routines *****************************************************************************/ static Tok* peek(Parser* p) @@ -137,20 +98,78 @@ static int consume(Parser* p) return type; } +/* Symbol Table + *****************************************************************************/ +static Symbol* symbol_new(Parser* p, char* name, int class, bool export) +{ + (void)p, (void)name, (void)class, (void)export; + return NULL; +} + +/* Type Checking + *****************************************************************************/ +static void check_int(Parser* p, Item* item) +{ + if (item->type->form != FORM_INT) + { + error(p, "not an int"); + } +} + +static void check_int2(Parser* p, Item* a, Item* b) +{ + check_int(p, a); + check_int(p, b); +} + +static void check_real(Parser* p, Item* item) +{ + if (item->type->form != FORM_REAL) + { + error(p, "not a real"); + } +} + +static void check_real2(Parser* p, Item* a, Item* b) +{ + check_real(p, a); + check_real(p, b); +} + +static void check_num2(Parser* p, Item* a, Item* b) +{ + if (a->type->form == FORM_REAL) + { + check_real2(p, a, b); + } + else + { + check_int2(p, a, b); + } +} + +static void check_bool(Parser* p, Item* item) +{ + if (item->type->form != FORM_BOOL) + { + error(p, "not an bool"); + } +} + /* Grammar Definition *****************************************************************************/ -void module(Parser* p); -void import_list(Parser* p); -void declaration_seq(Parser* p); -void statement_seq(Parser* p); -void const_decl(Parser* p); -void expression(Parser* p); -void simple_expr(Parser* p); -void term(Parser* p); -void factor(Parser* p); -void designator(Parser* p); -void qualident(Parser* p); -void expr_list(Parser* p); +void module(Parser* p, Item* item); +void import_list(Parser* p, Item* item); +void declaration_seq(Parser* p, Item* item); +void statement_seq(Parser* p, Item* item); +void const_decl(Parser* p, Item* item); +void expression(Parser* p, Item* item); +void simple_expr(Parser* p, Item* item); +void term(Parser* p, Item* item); +void factor(Parser* p, Item* item); +void designator(Parser* p, Item* item); +void qualident(Parser* p, Item* item); +void expr_list(Parser* p, Item* item); RULE(module) { @@ -160,12 +179,12 @@ RULE(module) expect(p, ';'); if (matches(p, IMPORT)) { - import_list(p); + import_list(p, item); } - declaration_seq(p); + declaration_seq(p, item); if (accept(p, BEGIN)) { - statement_seq(p); + statement_seq(p, item); } expect(p, END); char* ename = expect_text(p, IDENT); @@ -178,6 +197,7 @@ RULE(module) RULE(import_list) { + (void)item; expect(p, IMPORT); while (1) { @@ -199,36 +219,70 @@ RULE(declaration_seq) { if (accept(p, CONST)) { - const_decl(p); + const_decl(p, item); expect(p, ';'); } if (accept(p, TYPE)) { -// type_decl(p); +// type_decl(p, item); expect(p, ';'); } if (accept(p, VAR)) { -// var_decl(p); +// var_decl(p, item); expect(p, ';'); } } RULE(statement_seq) { - (void)p; + (void)p, (void)item; } RULE(const_decl) { +/* + VAR + obj, first: OSG.Object; + x: OSG.Item; + tp: OSG.Type; + L: LONGINT; + + (* ... *) + + WHILE sym = OSS.ident DO + NewObj(obj, OSG.Const); + OSS.Get(sym); + IF sym = OSS.eql THEN + OSS.Get(sym) + ELSE + OSS.Mark("=?") + END; + expression(x); + IF x.mode = OSG.Const THEN + obj.val := x.a; + obj.type := x.type + ELSE + OSS.Mark("expression not constant") + END ; + Check(OSS.semicolon, "; expected") + END +*/ + + while (1) { - char* name = expect_text(p, IDENT); - bool export = accept(p, '*'); + char* name = NULL; + bool export = false; + Symbol* sym = NULL; + + name = expect_text(p, IDENT); + export = accept(p, '*'); + sym = symbol_new(p, name, SYM_CONST, export); expect(p, '='); - expression(p); - codegen_const(p, name, export); - codegen_print(p); + expression(p, item); + printf("-> %s\n", name); + if (!accept(p, ',')) { break; @@ -242,11 +296,11 @@ RULE(expression) // relation = "=" | "!=" | "<" | "<=" | ">" | ">=" | "is". int ops[] = { EQ, NEQ, '<', LTEQ, '>', GTEQ, IS, 0 }; - simple_expr(p); + simple_expr(p, item); if (matches_oneof(p, ops)) { consume(p); - simple_expr(p); + simple_expr(p, item); } } @@ -259,20 +313,18 @@ RULE(simple_expr) if (matches_oneof(p, (int[]){'+', '-', 0})) { int op = consume(p); // OP - term(p); - codegen_unop(p, op); + term(p, item); } else { - term(p); + term(p, item); } /* optional second term and op */ while (matches_oneof(p, (int[]){'+', '-', OR, 0})) { int op = consume(p); - term(p); - codegen_binop(p, op); + term(p, item); } } @@ -280,12 +332,34 @@ RULE(term) { // term = factor {MulOperator factor}. // MulOperator = "*" | "/" | "and". - factor(p); + factor(p, item); while (matches_oneof(p, (int[]){'*', '/', AND, 0})) { + Item right = { 0 }; int op = consume(p); - factor(p); - codegen_binop(p, op); + factor(p, &right); + switch(op) + { + case '/': + check_num2(p, item, &right); +// codegen_div(item, &right); + assert(!"not implemented"); + break; + + case '*': + check_num2(p, item, &right); + codegen_mul(item, &right); + break; + + case AND: +// check_bool2(p, item, &right); +// codegen_and(item, &right); +// break; + + default: + assert(!"not supported"); + break; + } } } @@ -305,103 +379,104 @@ RULE(factor) switch ((int)peek(p)->type) { case INT: - codegen_int(p, peek(p)->value.integer); + codegen_setint(item, &IntType, peek(p)->value.integer); consume(p); break; case REAL: - codegen_real(p, peek(p)->value.floating); + codegen_setreal(item, peek(p)->value.floating); consume(p); break; case STRING: - codegen_string(p, peek(p)->text); + codegen_setstr(item, peek(p)->text); consume(p); break; - case NIL: - codegen_nil(p); - consume(p); - break; +// case NIL: +// codegen_nil(item); +// consume(p); +// break; case BOOL: - codegen_bool(p, peek(p)->value.integer); + codegen_setint(item, &BoolType, peek(p)->value.integer); consume(p); break; case '(': expect(p, '('); - expression(p); + expression(p, item); expect(p, ')'); break; case NOT: consume(p); - factor(p); - codegen_unop(p, NOT); - break; - - case IDENT: - designator(p); -// actual_params(p); - break; - } -} - -/* - designator = qualident {selector}. - selector = "." ident | "[" ExpList "]" | "^" | "(" qualident ")". - ExpList = expression {"," expression}. - qualident = [ident "."] ident. - - ActualParameters = "(" [ExpList] ")" . -*/ -RULE(designator) -{ - qualident(p); - /* selector */ - switch ((int)peek(p)->type) - { - case '.': - expect(p, IDENT); - break; - - case '[': - expr_list(p); - expect(p, ']'); - break; - - case '^': - expect(p, '^'); - break; - - case '(': - qualident(p); - expect(p, ')'); + factor(p, item); + check_bool(p, item); + codegen_not(item); break; +// +// case IDENT: +// designator(p, item); +//// actual_params(p); +// break; } } -RULE(qualident) -{ - expect(p, IDENT); - if (accept(p, '.')) - { - expect(p, IDENT); - } -} - -RULE(expr_list) -{ - while (1) - { - expression(p); - if (!accept(p, ',')) - { - break; - } - } -} +///* +// designator = qualident {selector}. +// selector = "." ident | "[" ExpList "]" | "^" | "(" qualident ")". +// ExpList = expression {"," expression}. +// qualident = [ident "."] ident. +// +// ActualParameters = "(" [ExpList] ")" . +//*/ +//RULE(designator) +//{ +// qualident(p); +// /* selector */ +// switch ((int)peek(p)->type) +// { +// case '.': +// expect(p, IDENT); +// break; +// +// case '[': +// expr_list(p); +// expect(p, ']'); +// break; +// +// case '^': +// expect(p, '^'); +// break; +// +// case '(': +// qualident(p); +// expect(p, ')'); +// break; +// } +//} +// +//RULE(qualident) +//{ +// expect(p, IDENT); +// if (accept(p, '.')) +// { +// expect(p, IDENT); +// } +//} +// +//RULE(expr_list) +//{ +// while (1) +// { +// expression(p); +// if (!accept(p, ',')) +// { +// break; +// } +// } +//} /* Grammar Unit Tests *****************************************************************************/ @@ -420,16 +495,16 @@ void parse_init(char* fname, char* string) Ctx.file = file; } -static void parse_rule(void (*rule)(Parser*), char* string) +static void parse_rule(void (*rule)(Parser*, Item*), Item* item, char* string) { parse_init("test_input", string); - rule(&Ctx); + rule(&Ctx, (item ? item : &(Item){0})); } static void parse_module(char* fname, char* string) { parse_init(fname, string); - module(&Ctx); + module(&Ctx, &(Item){0}); } TEST_SUITE(Grammar) @@ -444,41 +519,41 @@ TEST_SUITE(Grammar) TEST(Should parse imports) { - parse_rule(import_list, + parse_rule(import_list, 0, "import A;"); - parse_rule(import_list, + parse_rule(import_list, 0, "import A = ModA;"); - parse_rule(import_list, + parse_rule(import_list, 0, "import A, B;"); - parse_rule(import_list, + parse_rule(import_list, 0, "import A, B = ModB, C;"); } TEST(Should parse constant declarations) { - parse_rule(const_decl, + parse_rule(const_decl, 0, "FOO = 123"); - parse_rule(const_decl, + parse_rule(const_decl, 0, "FOO = 123.123"); - parse_rule(const_decl, + parse_rule(const_decl, 0, "FOO = \"\""); - parse_rule(const_decl, + parse_rule(const_decl, 0, "FOO = true"); - parse_rule(const_decl, + parse_rule(const_decl, 0, "FOO = false"); - parse_rule(const_decl, + parse_rule(const_decl, 0, "FOO = nil"); - parse_rule(const_decl, + parse_rule(const_decl, 0, "FOO = not true"); - parse_rule(const_decl, - "FOO = (not true)"); - parse_rule(const_decl, + parse_rule(const_decl, 0, + "FOO = (not false)"); + parse_rule(const_decl, 0, "FOO = +1"); - parse_rule(const_decl, + parse_rule(const_decl, 0, "FOO = -1"); - parse_rule(const_decl, + parse_rule(const_decl, 0, "FOO = 1 + 2 + 3"); - parse_rule(const_decl, + parse_rule(const_decl, 0, "FOO = 1 + 2 * 4"); } } -- 2.49.0