From 3d87b4276d9f31d93ff0d0b743394e594cf390f0 Mon Sep 17 00:00:00 2001 From: "Michael D. Lowis" Date: Thu, 15 Apr 2021 13:22:18 -0400 Subject: [PATCH] plopped in code for a simple compiler based on sclpl. Will start reworking it into an Oberon/Cerise compiler --- cerise/ast.c | 193 +++++++++++++++++++++++++++++ cerise/atf.h | 160 ++++++++++++++++++++++++ cerise/build.sh | 3 + cerise/codegen.c | 12 ++ cerise/emalloc.c | 11 ++ cerise/lex.c | 250 +++++++++++++++++++++++++++++++++++++ cerise/main.c | 49 ++++++++ cerise/parser.c | 117 ++++++++++++++++++ cerise/pkg.c | 23 ++++ cerise/pprint.c | 217 ++++++++++++++++++++++++++++++++ cerise/sclpl | Bin 0 -> 34928 bytes cerise/sclpl.h | 313 +++++++++++++++++++++++++++++++++++++++++++++++ cerise/syms.c | 22 ++++ cerise/types.c | 59 +++++++++ cerise/value | 0 15 files changed, 1429 insertions(+) create mode 100644 cerise/ast.c create mode 100644 cerise/atf.h create mode 100755 cerise/build.sh create mode 100644 cerise/codegen.c create mode 100644 cerise/emalloc.c create mode 100644 cerise/lex.c create mode 100644 cerise/main.c create mode 100644 cerise/parser.c create mode 100644 cerise/pkg.c create mode 100644 cerise/pprint.c create mode 100755 cerise/sclpl create mode 100644 cerise/sclpl.h create mode 100644 cerise/syms.c create mode 100644 cerise/types.c create mode 100644 cerise/value diff --git a/cerise/ast.c b/cerise/ast.c new file mode 100644 index 0000000..a3fd9d4 --- /dev/null +++ b/cerise/ast.c @@ -0,0 +1,193 @@ +#include + +static AST* ast(ASTType type) { + AST* tree = emalloc(sizeof(AST)); + memset(tree, 0, sizeof(AST)); + tree->nodetype = type; + return tree; +} + +AST* String(char* val) { + AST* node = ast(AST_STRING); + node->value.text = val; + return node; +} + +char* string_value(AST* val) { + assert(val->nodetype == AST_STRING); + return val->value.text; +} + +AST* Char(int val) { + AST* node = ast(AST_CHAR); + node->value.integer = val; + return node; +} + +uint32_t char_value(AST* val) { + assert(val->nodetype == AST_CHAR); + return val->value.integer; +} + +AST* Integer(int val) { + AST* node = ast(AST_INT); + node->value.integer = val; + return node; +} + +intptr_t integer_value(AST* val) { + assert(val->nodetype == AST_INT); + return val->value.integer; +} + +AST* Float(double val) { + AST* node = ast(AST_FLOAT); + node->value.floating = val; + return node; +} + +double float_value(AST* val) { + assert(val->nodetype == AST_FLOAT); + return val->value.floating; +} + +AST* Bool(bool val) { + AST* node = ast(AST_BOOL); + node->value.integer = val; + return node; +} + +bool bool_value(AST* val) { + assert(val->nodetype == AST_BOOL); + return val->value.integer; +} + +AST* Ident(char* val) { + AST* node = ast(AST_IDENT); + node->value.text = strdup(val); + return node; +} + +char* ident_value(AST* val) { + assert(val->nodetype == AST_IDENT); + return val->value.text; +} + +AST* Var(char* name, AST* value, AST* type, int flags) { + (void)type; + AST* node = ast(AST_VAR); + node->value.var.name = name; + node->value.var.value = value; + node->value.var.flags = flags; + return node; +} + +char* var_name(AST* var) { + assert(var->nodetype == AST_VAR); + return var->value.var.name; +} + +AST* var_value(AST* var) { + assert(var->nodetype == AST_VAR); + return var->value.var.value; +} + +bool var_flagset(AST* var, int mask) { + assert(var->nodetype == AST_VAR); + return ((var->value.var.flags & mask) == mask); +} + +AST* Func(AST* args, AST* body, AST* type) +{ + (void)type; + AST* node = ast(AST_FUNC); + node->value.nodes[0] = args; + node->value.nodes[1] = body; + return node; +} + +AST* func_args(AST* func) { + assert(func->nodetype == AST_FUNC); + return func->value.nodes[0]; +} + +AST* func_body(AST* func) { + assert(func->nodetype == AST_FUNC); + return func->value.nodes[1]; +} + +AST* ExpList(void) { + AST* node = ast(AST_EXPLIST); + node->value.explist.nexprs = 0; + node->value.explist.exprs = 0; + return node; +} + +AST** explist_get(AST* explist, size_t* nexprs) { + assert(explist->nodetype == AST_EXPLIST); + *nexprs = explist->value.explist.nexprs; + return explist->value.explist.exprs; +} + +void explist_append(AST* explist, AST* expr) { + assert(explist->nodetype == AST_EXPLIST); + explist->value.explist.nexprs += 1; + explist->value.explist.exprs = realloc(explist->value.explist.exprs, explist->value.explist.nexprs * sizeof(AST*)); + explist->value.explist.exprs[explist->value.explist.nexprs-1] = expr; +} + +void explist_prepend(AST* explist, AST* expr) { + assert(explist->nodetype == AST_EXPLIST); + explist->value.explist.nexprs++; + explist->value.explist.exprs = realloc(explist->value.explist.exprs, explist->value.explist.nexprs * sizeof(AST*)); + memmove(explist->value.explist.exprs+1, explist->value.explist.exprs, explist->value.explist.nexprs-1); + explist->value.explist.exprs[0] = expr; +} + +AST* If(AST* cond, AST* b1, AST* b2) { + AST* node = ast(AST_IF); + node->value.nodes[0] = cond; + node->value.nodes[1] = b1; + node->value.nodes[2] = b2; + return node; +} + +AST* if_cond(AST* ifexp) { + assert(ifexp->nodetype == AST_IF); + return ifexp->value.nodes[0]; +} + +AST* if_then(AST* ifexp) { + assert(ifexp->nodetype == AST_IF); + return ifexp->value.nodes[1]; +} + +AST* if_else(AST* ifexp) { + assert(ifexp->nodetype == AST_IF); + return ifexp->value.nodes[2]; +} + +AST* Apply(AST* func, AST* args) { + AST* node = ast(AST_APPLY); + node->value.nodes[0] = func; + node->value.nodes[1] = args; + return node; +} + +AST* apply_func(AST* apply) { + assert(apply->nodetype == AST_APPLY); + return apply->value.nodes[0]; +} + +AST* apply_args(AST* apply) { + assert(apply->nodetype == AST_APPLY); + return apply->value.nodes[1]; +} + +AST* OpCall(int oper, AST* left, AST* right) { + AST* node = ast(AST_OPER); + node->value.op.oper = oper; + node->value.op.left = left; + node->value.op.right = right; + return node; +} diff --git a/cerise/atf.h b/cerise/atf.h new file mode 100644 index 0000000..40088ed --- /dev/null +++ b/cerise/atf.h @@ -0,0 +1,160 @@ +/** + Aardvark Test Framework - A minimalistic unit testing framework for C. + + Copyright 2014 Michael D. Lowis + + Permission to use, copy, modify, and/or distribute this software + for any purpose with or without fee is hereby granted, provided + that the above copyright notice and this permission notice appear + in all copies. + + THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL + WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE + AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL + DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA + OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER + TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR + PERFORMANCE OF THIS SOFTWARE. +*/ +#ifndef ATF_H +#define ATF_H + +#include +#include +#include +#include + +extern char* Curr_Test; +void atf_init(int argc, char** argv); +void atf_test_start(char* file, unsigned int line, char* name); +bool atf_test_assert(bool success, char* expr_str, char* file, int line); +void atf_test_fail(char* expr, char* file, int line); +int atf_print_results(void); + +#define IGNORE(msg) \ + printf("%s:%d:%s:IGNORE:\n\t%s\n", __FILE__, __LINE__, Curr_Test, msg); break + +#define CHECK(expr) \ + if(atf_test_assert((expr), #expr, __FILE__, __LINE__)) break + +#define CHECK_EXITCODE(code) \ + CHECK(ExitCode == code) + +#define TEST_SUITE(name) \ + void name(void) + +#define TEST(desc) \ + for(atf_test_start(__FILE__,__LINE__,#desc); Curr_Test != NULL; Curr_Test = NULL) + +#define RUN_TEST_SUITE(name) \ + name(); + +#define RUN_EXTERN_TEST_SUITE(name) \ + do { extern TEST_SUITE(name); RUN_TEST_SUITE(name); } while(0) + +#define EXPECT_EXIT \ + if ((ExitExpected = true, 0 == setjmp(ExitPad))) + +/* Function Definitions + *****************************************************************************/ +#ifdef INCLUDE_DEFS +#include +#include +#ifndef NO_SIGNALS +#include +#endif + +char* Curr_Test = NULL; +char* Curr_File = NULL; +unsigned int Curr_Line = 0; +static unsigned int Total = 0; +static unsigned int Failed = 0; +bool ExitExpected; +int ExitCode; +jmp_buf ExitPad; + +#ifndef NO_SIGNALS +static void handle_signal(int sig) { + /* Determine the signal name */ + char* sig_name = NULL; + switch(sig) { + case SIGABRT: sig_name = "SIGABRT"; break; + case SIGBUS: sig_name = "SIGBUS"; break; + case SIGFPE: sig_name = "SIGFPE"; break; + case SIGILL: sig_name = "SIGILL"; break; + case SIGSEGV: sig_name = "SIGSEGV"; break; + case SIGSYS: sig_name = "SIGSYS"; break; + /* If we don't recognize it then just return and let the default handler + catch it. */ + default: return; + } + /* Error and exit. No summary will be printed but the user will know which + test has crashed. */ + fprintf(stderr,"%s:%d:0:%s:CRASH (signal: %d - %s)\n", Curr_File, Curr_Line, Curr_Test, sig, sig_name); + Failed++; + (void)atf_print_results(); + _Exit(1); +} +#endif + +void atf_init(int argc, char** argv) { + /* I reserve the right to use these later */ + (void)argc; + (void)argv; + +#ifndef NO_SIGNALS + /* Init signal handler */ + signal(SIGABRT, handle_signal); + signal(SIGBUS, handle_signal); + signal(SIGFPE, handle_signal); + signal(SIGILL, handle_signal); + signal(SIGSEGV, handle_signal); + signal(SIGSYS, handle_signal); +#endif +} + +void atf_test_start(char* file, unsigned int line, char* name) { + Curr_File = file; + Curr_Line = line; + Curr_Test = name; + Total++; +} + +bool atf_test_assert(bool success, char* expr, char* file, int line) { + bool failed = !success; + if (failed) atf_test_fail(expr,file,line); + return failed; +} + +void atf_test_fail(char* expr, char* file, int line) { + Failed++; + printf("%s:%d:0:%s:FAIL:( %s )\n", file, line, Curr_Test, expr); \ +} + +int atf_print_results(void) { + static const char* results_string = + "\nUnit Test Summary" + "\n-----------------" + "\nTotal: %d" + "\nPassed: %d" + "\nFailed: %d" + "\n\n"; + printf(results_string, Total, Total - Failed, Failed); + return Failed; +} + +void exit(int code) { + if (ExitExpected) { + ExitCode = code; + ExitExpected = false; + longjmp(ExitPad, 1); + } else { + assert(!"Unexpected exit. Something went wrong"); + } +} + +#undef INCLUDE_DEFS +#endif + +#endif /* ATF_H */ \ No newline at end of file diff --git a/cerise/build.sh b/cerise/build.sh new file mode 100755 index 0000000..c5d2f06 --- /dev/null +++ b/cerise/build.sh @@ -0,0 +1,3 @@ +#!/bin/sh +ctags -R & +cc -Wall -Wextra -Werror --std=c99 -o sclpl -I. *.c diff --git a/cerise/codegen.c b/cerise/codegen.c new file mode 100644 index 0000000..a625f09 --- /dev/null +++ b/cerise/codegen.c @@ -0,0 +1,12 @@ +#include + +void codegen_init(Parser* p) { + sym_add(&(p->syms), SF_TYPEDEF, "void", VoidType()); + sym_add(&(p->syms), SF_TYPEDEF, "bool", UIntType(1u)); + sym_add(&(p->syms), SF_TYPEDEF, "byte", UIntType(8u)); + sym_add(&(p->syms), SF_TYPEDEF, "int", IntType(64u)); + sym_add(&(p->syms), SF_TYPEDEF, "uint", UIntType(64u)); + sym_add(&(p->syms), SF_TYPEDEF, "float", FloatType(32u)); + sym_add(&(p->syms), SF_TYPEDEF, "string", + ArrayOf(sym_get(&(p->syms), "byte")->type, -1)); +} diff --git a/cerise/emalloc.c b/cerise/emalloc.c new file mode 100644 index 0000000..751a40f --- /dev/null +++ b/cerise/emalloc.c @@ -0,0 +1,11 @@ +#include +void fatal(char* estr) { + perror(estr); + exit(1); +} + +void* emalloc(size_t size) { + void* ptr = malloc(size); + if (!ptr) fatal("malloc()"); + return ptr; +} diff --git a/cerise/lex.c b/cerise/lex.c new file mode 100644 index 0000000..648295e --- /dev/null +++ b/cerise/lex.c @@ -0,0 +1,250 @@ +#include +#include +#include +#include + +typedef struct { + char* keyword; + int type; +} KeywordDef; + +static const char FirstChar[256] = { + /* Whitespace */ + [' '] = 1, ['\t'] = 1, ['\r'] = 1, ['\n'] = 1, + /* comment start */ + ['#'] = 2, + /* number or op */ + ['+'] = 3, ['-'] = 3, + /* number digits */ + ['0'] = 4, ['1'] = 4, ['2'] = 4, ['3'] = 4, ['4'] = 4, + ['5'] = 4, ['6'] = 4, ['7'] = 4, ['8'] = 4, ['9'] = 4, + /* alpha characters */ + ['A'] = 5, ['B'] = 5, ['C'] = 5, ['D'] = 5, ['E'] = 5, + ['F'] = 5, ['G'] = 5, ['H'] = 5, ['I'] = 5, ['J'] = 5, + ['K'] = 5, ['L'] = 5, ['M'] = 5, ['N'] = 5, ['O'] = 5, + ['P'] = 5, ['Q'] = 5, ['R'] = 5, ['S'] = 5, ['T'] = 5, + ['U'] = 5, ['V'] = 5, ['W'] = 5, ['X'] = 5, ['Y'] = 5, + ['Z'] = 5, ['a'] = 5, ['b'] = 5, ['c'] = 5, ['d'] = 5, + ['e'] = 5, ['f'] = 5, ['g'] = 5, ['h'] = 5, ['i'] = 5, + ['j'] = 5, ['k'] = 5, ['l'] = 5, ['m'] = 5, ['n'] = 5, + ['o'] = 5, ['p'] = 5, ['q'] = 5, ['r'] = 5, ['s'] = 5, + ['t'] = 5, ['u'] = 5, ['v'] = 5, ['w'] = 5, ['x'] = 5, + ['y'] = 5, ['z'] = 5, + /* punctuation */ + ['('] = 6, [')'] = 6, ['['] = 6, [']'] = 6, ['{'] = 6, ['}'] = 6, + ['.'] = 6, [','] = 6, [':'] = 6, ['&'] = 6, ['='] = 6, [';'] = 6, + ['*'] = 6, ['\''] = 6, + /* strings */ + ['"'] = 7 +}; + +char SPACE[256] = { + [' '] = 1, ['\t'] = 1, ['\r'] = 1, ['\n'] = 1, +}; + +char DIGIT[256] = { + ['0'] = 1, ['1'] = 1, ['2'] = 1, ['3'] = 1, ['4'] = 1, + ['5'] = 1, ['6'] = 1, ['7'] = 1, ['8'] = 1, ['9'] = 1, +}; + +char ALNUM_[256] = { + ['0'] = 1, ['1'] = 1, ['2'] = 1, ['3'] = 1, ['4'] = 1, + ['5'] = 1, ['6'] = 1, ['7'] = 1, ['8'] = 1, ['9'] = 1, + ['A'] = 1, ['B'] = 1, ['C'] = 1, ['D'] = 1, ['E'] = 1, + ['F'] = 1, ['G'] = 1, ['H'] = 1, ['I'] = 1, ['J'] = 1, + ['K'] = 1, ['L'] = 1, ['M'] = 1, ['N'] = 1, ['O'] = 1, + ['P'] = 1, ['Q'] = 1, ['R'] = 1, ['S'] = 1, ['T'] = 1, + ['U'] = 1, ['V'] = 1, ['W'] = 1, ['X'] = 1, ['Y'] = 1, + ['Z'] = 1, ['a'] = 1, ['b'] = 1, ['c'] = 1, ['d'] = 1, + ['e'] = 1, ['f'] = 1, ['g'] = 1, ['h'] = 1, ['i'] = 1, + ['j'] = 1, ['k'] = 1, ['l'] = 1, ['m'] = 1, ['n'] = 1, + ['o'] = 1, ['p'] = 1, ['q'] = 1, ['r'] = 1, ['s'] = 1, + ['t'] = 1, ['u'] = 1, ['v'] = 1, ['w'] = 1, ['x'] = 1, + ['y'] = 1, ['z'] = 1, ['_'] = 1, +}; + +#define NUM_KEYWORDS (sizeof(Keywords) / sizeof(Keywords[0])) +KeywordDef Keywords[] = { + { "else", T_ELSE }, + { "false", T_BOOL }, + { "fun", T_FUN }, + { "if", T_IF }, + { "let", T_LET }, + { "provide", T_PROVIDES }, + { "require", T_REQUIRES }, + { "return", T_RETURN }, + { "struct", T_STRUCT }, + { "true", T_BOOL }, + { "type", T_TYPE }, + { "union", T_UNION }, + { "var", T_VAR }, +}; + +static int keywcmp(const void* a, const void* b) { + return strcmp(((KeywordDef*)a)->keyword, ((KeywordDef*)b)->keyword); +} + +static inline char* file_load(char* path) { + int fd = -1, nread = 0, length = 0; + struct stat sb = {0}; + char* contents = NULL; + if (((fd = open(path, O_RDONLY, 0)) >= 0) && (fstat(fd, &sb) >= 0) && (sb.st_size > 0)) { + contents = calloc(sb.st_size + 1u, 1u); + while (sb.st_size && (nread = read(fd, contents+length, sb.st_size)) > 0) + length += nread, sb.st_size -= nread; + } + if (fd > 0) close(fd); + return contents; +} + +static inline void convert_value(Tok* tok) { + switch (tok->type) { + case T_STRING: { + size_t len = strlen(tok->text+1); + char* strtext = malloc(len); + strncpy(strtext, tok->text+1, len); + strtext[len-1] = '\0'; + free(tok->text), tok->text = strtext; + break; + } + + case T_INT: { + tok->value.integer = strtol(tok->text, NULL, 0); + break; + } + + case T_ID: { + KeywordDef key = { .keyword = tok->text }; + KeywordDef* match = bsearch( + &key, Keywords, NUM_KEYWORDS, sizeof(KeywordDef), keywcmp); + if (match) { + tok->type = match->type; + if (tok->type != T_ID) + convert_value(tok); /* recurse to ensure correct conversion */ + } + break; + } + + case T_BOOL: { + tok->value.integer = (tok->text[0] == 't'); + break; + } + + default: + break; + } +} + +static inline void readtok(Parser* ctx) { + Tok* tok = &(ctx->tok); + char *beg = ctx->file->fpos, *curr = ctx->file->fpos; + tok->offset = (beg - ctx->file->fbeg); + switch (FirstChar[(int)*curr++]) { + case 1: /* skip whitespace */ + for (; SPACE[(int)*curr]; curr++); + break; + + case 2: /* skip comments */ + for (; *curr != '\n'; curr++); + break; + + case 3: /* +/- as ops or number signs */ + tok->type = *(curr-1); + if (!DIGIT[(int)*curr]) break; + /* parse it as an int */ + tok->type = T_INT; + for (; DIGIT[(int)*curr]; curr++); + break; + + case 4: + tok->type = T_INT; + for (; DIGIT[(int)*curr]; curr++); + break; + + case 5: + tok->type = T_ID; + for (; ALNUM_[(int)*curr]; curr++); + break; + + case 6: /* single char tokens */ + tok->type = *(curr-1); + break; + + case 7: /* string parsing */ + tok->type = T_STRING; + for (; *curr != '"'; curr++); + curr++; + break; + + case 0: /* error handling */ + default: + fprintf(stderr, "Failed to parse token '%c'\n", *(curr-1)); + exit(1); + } + + if (tok->type) { + size_t sz = (curr - beg); + tok->text = malloc(sz+1); + tok->text[sz] = '\0'; + strncpy(tok->text, beg, sz); + convert_value(tok); + } + + ctx->file->fpos = curr; +} + +void lexfile(Parser* ctx, char* path) { + LexFile* file = calloc(sizeof(LexFile), 1u); + file->path = strdup(path); + file->fbeg = file->fpos = file_load(path); + file->next = ctx->file; + ctx->file = file; +} + +void lex(Parser* ctx) { + ctx->tok.file = ctx->file->path; + ctx->tok.type = T_NONE; + while (ctx->tok.type == T_NONE) { + if (!ctx->file) { + /* no more files left to process */ + ctx->tok.type = T_END_FILE; + return; + } else if (!(ctx->file->fpos) || !*(ctx->file->fpos)) { + /* grab the next file to process */ + LexFile* f = ctx->file; + ctx->file = f->next; + f->next = ctx->done; + ctx->done = f; + } else { + /* parse out a token */ + readtok(ctx); + } + } +} + +static LexFile* get_file(Parser* p, char const* path) { + LexFile* lf = p->file; + while (lf && strcmp(lf->path, path)) + lf = lf->next; + if (!lf) { + lf = p->done; + while (lf && strcmp(lf->path, path)) + lf = lf->next; + } + return lf; +} + +void lexprintpos(Parser* p, FILE* file, Tok* tok) { + size_t line = 1, col = 1; + char* data = get_file(p, tok->file)->fbeg; + char* end = data + tok->offset; + for (; *data && data < end; data++) { + if (*data == '\n') { + line++; + col = 1; + } else { + col++; + } + } + fprintf(file, "%s:%zu:%zu:", tok->file, line, col); +} diff --git a/cerise/main.c b/cerise/main.c new file mode 100644 index 0000000..d1beeeb --- /dev/null +++ b/cerise/main.c @@ -0,0 +1,49 @@ +#include + +char* ARGV0; +char* Artifact = "bin"; + +/* Driver Modes + *****************************************************************************/ +static int emit_binary(Parser* ctx, int argc, char **argv) { + (void)ctx, (void)argc, (void)argv; + return 0; +} + +static int emit_library(Parser* ctx, int argc, char **argv) { + (void)ctx, (void)argc, (void)argv; + return 0; +} + +/* Main Routine and Usage + *****************************************************************************/ +void usage(void) { + fprintf(stderr, "%s\n", + "Usage: sclpl [options...] [-A artifact] [file...]\n" + "\n-A Emit the given type of artifact" + "\n-h Print help information" + ); + exit(1); +} + +int main(int argc, char **argv) { + /* Option parsing */ + OPTBEGIN { + case 'A': Artifact = EOPTARG(usage()); break; + default: usage(); + } OPTEND; + /* initialize the parser */ + Parser ctx = {0}; + for (; argc; argc--,argv++) + lexfile(&ctx, *argv); + /* Execute the main compiler process */ + if (0 == strcmp("bin", Artifact)) { + return emit_binary(&ctx, argc, argv); + } else if (0 == strcmp("lib", Artifact)) { + return emit_library(&ctx, argc, argv); + } else { + fprintf(stderr, "Unknown artifact type: '%s'\n\n", Artifact); + usage(); + } + return 1; +} diff --git a/cerise/parser.c b/cerise/parser.c new file mode 100644 index 0000000..3ed1869 --- /dev/null +++ b/cerise/parser.c @@ -0,0 +1,117 @@ +#include +#include + +//#define TRACE +#ifdef TRACE +static int Indent = 0; +#define parse_enter() \ + (printf("%*c-> %s\n", ++Indent * 2, ' ', __func__)) +#define parse_exit() \ + (printf("%*c<- %s\n", --Indent * 2, ' ', __func__)) +#else +#define parse_enter() +#define parse_exit() +#endif + +/* Precedence Table + *****************************************************************************/ +//enum { /* Define precedence levels(based on C) */ +// LVL_NONE, +// LVL_LITERAL, +// LVL_COMMA, +// LVL_ASSIGN, +// LVL_TERNARY, +// LVL_BOOL_OR, +// LVL_BOOL_AND, +// LVL_BITWISE_OR, +// LVL_BITWISE_XOR, +// LVL_BITWISE_AND, +// LVL_EQUALITY, +// LVL_RELATIONAL, +// LVL_BITSHIFT, +// LVL_ADD_SUB, +// LVL_MUL_DIV, +// LVL_PREFIX, +// LVL_POSTFIX, +//}; +// +//typedef struct { +// int level; +// AST* (*prefixfn)(Parser* p); +// AST* (*infixfn)(Parser* p, AST* left); +//} OpRule; +// +//OpRule PrecedenceTable[T_COUNT] = { +// [T_BOOL] = { .level = LVL_LITERAL, .prefixfn = literal, .infixfn = NULL }, +// [T_CHAR] = { .level = LVL_LITERAL, .prefixfn = literal, .infixfn = NULL }, +// [T_STRING] = { .level = LVL_LITERAL, .prefixfn = literal, .infixfn = NULL }, +// [T_INT] = { .level = LVL_LITERAL, .prefixfn = literal, .infixfn = NULL }, +// [T_FLOAT] = { .level = LVL_LITERAL, .prefixfn = literal, .infixfn = NULL }, +// [T_ID] = { .level = LVL_LITERAL, .prefixfn = literal, .infixfn = NULL }, +// ['('] = { .level = LVL_POSTFIX, .prefixfn = grouping, .infixfn = func_call }, +// ['.'] = { .level = LVL_POSTFIX, .prefixfn = NULL, .infixfn = dot_call }, +//}; + +/* Parsing Routines + *****************************************************************************/ +//static Tok* peek(Parser* p) { +// if (T_NONE == p->tok.type) +// lex(p); +// return &(p->tok); +//} +// +//static void error(Parser* parser, const char* fmt, ...) { +// Tok* tok = peek(parser); +// va_list args; +// va_start(args, fmt); +// lexprintpos(parser, stderr, tok); +// fprintf(stderr, " error: "); +// vfprintf(stderr, fmt, args); +// fprintf(stderr, "\n"); +// va_end(args); +// exit(1); +//} +// +//static bool matches(Parser* p, TokType type) { +// return (peek(p)->type == type); +//} +// +//static bool accept(Parser* p, TokType type) { +// if (matches(p, type)) { +// p->tok.type = T_NONE; +// return true; +// } +// return false; +//} +// +//static void expect(Parser* p, TokType type) { +// if (!accept(p, type)) +// error(p, "Unexpected token"); +//} +// +//static Tok* expect_val(Parser* p, TokType type) { +// static Tok token = {0}; +// /* perform the match */ +// if (matches(p, type)) { +// token = *(peek(p)); +// p->tok.type = T_NONE; +// } else { +// error(p, "Unexpected token"); +// } +// return &token; +//} +// +//static int consume(Parser* p) { +// int type = peek(p)->type; +// if (!accept(p, type)) +// error(p, "Unexpected token"); +// return type; +//} + +/* Grammar Definition + *****************************************************************************/ +void toplevel(Parser* p) { + parse_enter(); + (void)p; + parse_exit(); +} diff --git a/cerise/pkg.c b/cerise/pkg.c new file mode 100644 index 0000000..0bed7ff --- /dev/null +++ b/cerise/pkg.c @@ -0,0 +1,23 @@ +#include + +void pkg_add_require(Package* p, char* req) +{ + Require* rq = malloc(sizeof(Require)); + rq->path = strdup(req); + rq->next = p->requires; + p->requires = rq; +} + +void pkg_add_provide(Package* p, char* exp) +{ + Provide* pr = malloc(sizeof(Provide)); + pr->name = strdup(exp); + pr->next = p->provides; + p->provides = pr; +} + +void pkg_add_definition(Package* p, AST* ast) +{ + (void)p, (void)ast; +} + diff --git a/cerise/pprint.c b/cerise/pprint.c new file mode 100644 index 0000000..8a9705c --- /dev/null +++ b/cerise/pprint.c @@ -0,0 +1,217 @@ +#include + +static void indent(FILE* file, int depth) { + fprintf(file, "\n"); + if (depth) fprintf(file, "%*c", depth * 2, ' '); +} + +static const char* token_type_to_string(int type) { + #define TOK(name) case (name): return #name + switch(type) { + TOK(T_NONE); TOK(T_ERROR); TOK(T_END_FILE); + TOK(T_REQUIRES); TOK(T_PROVIDES); TOK(T_LET); TOK(T_VAR); + TOK(T_FUN); TOK(T_TYPE); TOK(T_STRUCT); TOK(T_UNION); + TOK(T_RETURN); TOK(T_IF); TOK(T_ELSE); TOK(T_ID); + TOK(T_CHAR); TOK(T_INT); TOK(T_FLOAT); TOK(T_BOOL); + TOK(T_STRING); + case '{': return "T_LBRACE"; + case '}': return "T_RBRACE"; + case '[': return "T_LBRACK"; + case ']': return "T_RBRACK"; + case '(': return "T_LPAR"; + case ')': return "T_RPAR"; + case ',': return "T_COMMA"; + case ':': return "T_COLON"; + case '&': return "T_AMP"; + case '\'': return "T_SQUOTE"; + case '"': return "T_DQUOTE"; + default: return "???"; + } + #undef TOK +} + +static void print_char(FILE* file, char ch) { + int i; + static const char* lookup_table[5] = { + " \0space", + "\n\0newline", + "\r\0return", + "\t\0tab", + "\v\0vtab" + }; + for(i = 0; i < 5; i++) { + if (ch == lookup_table[i][0]) { + fprintf(file, "\\%s", &(lookup_table[i][2])); + break; + } + } + if (i == 5) fprintf(file, "\\%c", ch); +} + +static void pprint_token_type(FILE* file, Tok* token) { + if (token->type > 256) + fprintf(file, "%s", token_type_to_string(token->type)); + else + fprintf(file, "%c", token->type); +} + +static void pprint_token_value(FILE* file, Tok* token) { + #define TOK(name) case (name): fprintf(file, "%s", #name); break + switch(token->type) { + /* value tokens */ + case T_STRING: fprintf(file, "\"%s\"", token->text); break; + case T_ID: fprintf(file, "%s", token->text); break; + case T_CHAR: print_char(file, token->value.integer); break; + case T_INT: fprintf(file, "%lld", token->value.integer); break; + case T_FLOAT: fprintf(file, "%f", token->value.floating); break; + case T_BOOL: fprintf(file, "%s", (token->value.integer)?"true":"false"); break; + + /* keyword tokens */ + TOK(T_NONE); TOK(T_ERROR); TOK(T_END_FILE); + TOK(T_REQUIRES); TOK(T_PROVIDES); TOK(T_LET); TOK(T_VAR); + TOK(T_FUN); TOK(T_TYPE); TOK(T_STRUCT); TOK(T_UNION); TOK(T_RETURN); + TOK(T_IF); TOK(T_ELSE); + + /* evertything else */ + default: + fprintf(file, "???"); + break; + } + #undef TOK +} + +void pprint_token(FILE* file, Tok* token, bool print_loc) +{ + if (print_loc) { + fprintf(file, "%zu:", token->offset); + } + pprint_token_type(file, token); + if (token->type > 256) { + fprintf(file, ":"); + pprint_token_value(file, token); + } + fprintf(file, "\n"); +} + +/*****************************************************************************/ + +static const char* tree_type_to_string(ASTType type) { + switch(type) { + case AST_STRING: return "T_STRING"; + case AST_SYMBOL: return "T_SYMBOL"; + case AST_IDENT: return "T_ID"; + case AST_CHAR: return "T_CHAR"; + case AST_INT: return "T_INT"; + case AST_FLOAT: return "T_FLOAT"; + case AST_BOOL: return "T_BOOL"; + default: return "???"; + } +} + +static void pprint_literal(FILE* file, AST* tree, int depth) +{ + (void)depth; + fprintf(file, "%s:", tree_type_to_string(tree->nodetype)); + switch(tree->nodetype) { + case AST_STRING: fprintf(file, "\"%s\"", string_value(tree)); break; + case AST_IDENT: fprintf(file, "%s", ident_value(tree)); break; + case AST_CHAR: fprintf(file, "%c", char_value(tree)); break; + case AST_INT: fprintf(file, "%ld", integer_value(tree)); break; + case AST_FLOAT: fprintf(file, "%lf", float_value(tree)); break; + case AST_BOOL: + fprintf(file, "%s", bool_value(tree) ? "true" : "false"); + break; + default: fprintf(file, "???"); + } +} + +static char* getvartype(AST* tree) { + if (var_flagset(tree, SF_CONSTANT)) + return "let"; + else if (var_flagset(tree, SF_TYPEDEF)) + return "typedef"; + else + return "var"; +} + +void pprint_fargs(FILE* file, AST* tree) { + size_t nargs = 0; + AST** args = explist_get(tree, &nargs); + fprintf(file, "("); + for (size_t i = 0; i < nargs; i++) { + fprintf(file, "("); + fprintf(file, "%s : type", var_name(args[i])); + fprintf(file, ") "); + } + fprintf(file, ")"); +} + +void pprint_block(FILE* file, AST* tree, int depth) { + if (!tree) return; + size_t nexprs = 0; + AST** exprs = explist_get(tree, &nexprs); + for (size_t i = 0; i < nexprs; i++) { + indent(file, depth); + pprint_tree(file, exprs[i], depth); + } +} + +void pprint_branch(FILE* file, AST* tree, int depth) { + indent(file, depth); + pprint_tree(file, tree, depth); +} + +void pprint_ifexpr(FILE* file, AST* tree, int depth) { + fprintf(file, "(if "); + pprint_tree(file, if_cond(tree), depth); + pprint_branch(file, if_then(tree), depth+1); + pprint_branch(file, if_else(tree), depth+1); + fprintf(file, ")"); +} + +void pprint_apply(FILE* file, AST* tree, int depth) { + fprintf(file, "(apply "); + pprint_tree(file, apply_func(tree), depth); + size_t nexprs = 0; + AST** exprs = explist_get(apply_args(tree), &nexprs); + for (size_t i = 0; i < nexprs; i++) { + indent(file, depth+1); + pprint_tree(file, exprs[i], depth+1); + } + fprintf(file, ")"); +} + +void pprint_tree(FILE* file, AST* tree, int depth) { + if (tree == NULL) return; + switch (tree->nodetype) { + case AST_VAR: + fprintf(file, "(%s %s ", getvartype(tree), var_name(tree)); + pprint_tree(file, var_value(tree), depth); + fprintf(file, ")"); + break; + + case AST_FUNC: + pprint_fargs(file, func_args(tree)); + pprint_block(file, func_body(tree), depth+1); + break; + + case AST_EXPLIST: + fprintf(file, "(block"); + pprint_block(file, tree, depth+1); + fprintf(file, ")"); + break; + + case AST_IF: + pprint_ifexpr(file, tree, depth); + break; + + case AST_APPLY: + pprint_apply(file, tree, depth); + break; + + default: + pprint_literal(file, tree, depth); + break; + } +} + diff --git a/cerise/sclpl b/cerise/sclpl new file mode 100755 index 0000000000000000000000000000000000000000..6fd7fbd2bae7ffd297f1fd01748f11a1dce6a5a6 GIT binary patch literal 34928 zcmeHw4SZC^)%PSJ5Iz$XD^-*giGoTDp8^I=fDIc75JG}j!8Ifs5)IjOKlsp~*ac-> zx6*IhqNT0%(b`t4)gnd+NQqk1pgf97OCCkLYfw|QHLa0-{%7XQ-Ft7cXy4!azR&af zl?!*z%=wx#XJ*dKoqKQYEzZ*7^t3dgj|_3Spj`VY3RB30{98EzP$;H}!T3E}j1>Km z?ZnV%!YsyT0FvATOt-_4%%`EYkRZvWI-VV(C>n|!l_$AON^;%~U%yE4 zX{hN&d4-B@RE+bRM<~38W;yarY`LPxR#~BAfXbIK)Nf}orYQ{IzHwM>^UpsB;_^Ffg{NB8YvfiYhc;_ytlqOOCVpTFv>2q)rkF-1O zR>G$fZWPV~o)=e5|0trn8__JstH_;d!Kh4L3jEL%_ti zxdu<8Smjye_XJ41M$|S11J&}^FIH);8Wb#Q*ZArK9#P%k^?QW3339bfzWT;MEsCgK zTjj2;Z>(ylztJOB_&rs=>N--p%DdV_hE%U2Ll7*g$z~X{s;VCHK92|J+O@z11W_q+ zOcirWOJ>b>Pt2Qaq$cJ~5$=+T`EFR^Sy}H7czhM}XE%5oJrz|e8c4~?Ro+I{?N+RP zGQzFt@*lwrPAiQ5)f_sXi7%>+2C(GQM{M{%GrrY^Kg#%a8$N%I@>_=uznACRm<_+5 z@m)6jtBmiq;g2w0*v9cYj6dX6?>ce5e>%LF4u_k&C8oRuQX=;# zezQjvB1QQ+qQ^TDIC9^hJcaUjyUf2#c?#k2R+)dE@)Ww`kI4KU%F|R7-zM`rC{H0g zzD4FAr96f1_y(E(CFLn($D3sSXOyQ<9luWIe?)l-(eW~w|2E|*G{*~N{tn7hNRH>r z{5L31p*Zf4`5P%uQ(!zt=6#f>kQ^5>e?8?X6vw-c1ChIe@;Q|6kom7so~FupyUdqU zo2pt0s{7&lLdEy91}AJM09Vy2HRkNA{=Wv-0MES1ON)P5#qX7~-hz>&a`m%*($-)*v-p95WYBrh*ThwP^A7!87jP0WF7xQp&RI87& zbDS~I3Qyq3fwH)dpnJzeRou<0xcvaJw?0#KgC>h7yEi+J?V|A=`y06$)ysaO^4J%9 za*!s;L^yG==s^-YfUzu>?nhNN=($GoTVvXdI@SZO&e6TG1 z0xI043jfZ9(z9Qo!ax0|s;Z?K^Ql-@IN0F|R~`z_Ke!8WB9@l4039;fRgnf`nS)G; zR2~eMji8iK2U7>eu%`a|5DMP0MAM?lkF$mr>}BMrFrw#13@#$6-!v1>0e16q6&g{Le_`^#?KnPd>_a;dn7 zYM9KB@vN81Pa(f!lT!IHA^=>mADUxV87eV2qGf5)RF08yz(j}t!KebtafM%{(a|Br zDiPsk$yRVhRIijGyE`∓(qHjYbcQOy81pQs35novgc4Sy!p7%OmTa{8U%?U$Smi zFYBbZoGr*%uE_juSEOWwE3`Yy72f9xzZ4sl^aIG1-hn6PEj9E02@8>3j+AzK{oA@PWF3oUBCd7I9JTGI@_zWx!pMi$;VIwB2D zlg6TlZ&7*I-L7$aT;X<9Zwo66%#?}nzKG(b#jMofiVjKFhJ54-zX)4aqJ*$i9C}*% zS74 zRr0RuD48y?TTyqFyz5HB()ME@XC~eI&fa)Gi1A5Ck|r7b;UVf-1t=P|M&)oKe@x{M z8GKO}ro&32BaFXDEdS_|eX1QZCH1XCuC(@jCdJ_Rz2ZPYGHi^9KdstDmTYPDSOb+8 zsoah^)Dizav9&on00_n;)CPy09j>Ue16O<#utt26))YJFvKGVcj0ymh`>- zme!)4Sc8NsJ77-s&Q^5J=;;T~MKeCpF9-Qh_kir%uSG4wt*+?sP0~rx%rnjtBGNBi zG7m-?t&-?)g`6F8A zX-(N1$*aVL#U6FX&P=mbT?wxtfTCr1`67v#W{G>LwUq=F*LN9%7ipt{9i84jtI@fX zwa^f;YAG0vWhnWhbFy7D^FTR@k52jaxd1v|-SdGC%%^>rNKh|{VVvR$;5sRO5v}~x zWjnft9B>gxsCfW!Tdo(;TX@~52NE#oHxaBXBlDp^j=IqMa@5IS|FXK6<1QsHj7B79 z4u@Wx$Ox!rvi~G$Wl0Bh`|lZ6bi)etQ?xeF>GJBPj}@(Ie6o~yA%kv9-JDpnPS3%7*jIMRX39>S&sTR+?c0XDJL-lKd}c z&ke^C2~_75$q}5PG)S2*so)B&z&v9Y%HR?Xwj&}uOVJUXRz^n&5#m&B53v!GQ5$q0 z3PWA6094o*Ru>^@(BbUv90Pn8g}On(Y&A>~Ye}0N3$Zldx+rQ8&&JlkAWFI}b9VdM zltMT+2p`YyK(@UE`(x25Po85M6q`FS6xgG*uVkz?(CUabtrS$yif2l_5(6*Neq3;X zTm^KNX^M$GKuwAoX&s#x>3mc+I>rfc;EF;`OWiWd0?XB=7>uE-^l)WNmUBXEB=f%( zpia@#Kca{D%WGT9@_>FhM#UA|A!x^6;Q89d=p{iYAf@*P#)Pt z%m}h!nZV*e(nGCiFqw?I&?*{j_@ZEZ@G=WBUHS8TjZ7-Al@Ol*eQleXQ7UXrYR++m z_8>UHf69Web3;60VwmpSkiF?gCPV~tk=VTHCMe3@DM6^aDCl&lm7goB>Z3*l?_zDn zsMsumgJSn$#*O_VMr%hhgnRL3S2)u#pSoadJ#DkGB^bz7(`3o$lsTcNWi_%lUIitX zp>0*w3X;Ci82AxUQ4%h-sf8sWOLDUSm`Frp)Ut zNdyiG_dI`ER2nlPhoGda=`$T?!4+HC#KyBLb=KM{fRBF;~u!{H`p_md9l^U*0k z#+_TkQNIKuN4N0$p3wT9<+ph8dwl)0_&&HU;_L~$gxjdp2p;&PU+KCI!5V%D*YCvKbO_dJWT1os`RUren+JbQ2KS1ev#5I zsPyxc-mTJYlzv>LTPgjZNU}VKTK&_)1od9BHh`H!~>M@BXK_^uGfhw zomi?93w5GcCuSmXA4yF{;%Ah&2#KFkVl)!lDDhP!?x6&RvPul#gtecCyEZ$IwmExX zo#^};T8$RlyfYZh^toU(nhKF)n)7IIs-0fWbS&=EWv|6`3pJ=-N|qnRARQEP_N1Xj z``x?^znQ_2**hzbGWYwmsi3*XfLqV8p<}W~>v@(g6lrC9^JoZmA%}su00Y|?h}i3U zE#`k4#UbEH4mAqtvieS-PoP(IyP??qlSQn*fEG}BC=Bqfzhmz=)O|dA;}t-Jx)a$O zmm(2A0`zz?DAb*keft6AEJI=X>}aWFAX&r_4}Sl+0W zvI@ZWyan2wNtz>jXFJ%s74;HC^LV2{Rd*9GzPFmyKTj%3Y)rMT?8ZIdT*I8Gjfia{ z&QS(up@Sxtf#^CF;OeA4R7UJF!pQ|<;=ds90c6P}S9tf1{!pX#DWWrH;GSsgPo#3c ziDQ6?V-#^bMI660aSSqXWD>^@iDPpLjt_7rHnxU1YEy8$MjVyIF~`J_X)2|aI3^Lt z=oB3H5yvUSLG7+#?P*Asb}CewbdZjr%6o=0Tb3+mNif5oll*u_LXN0Vt+TuUz}>7gKR zNscF)+vIvU+>X0^8`d8a0S9E!i}|`LI%OpSkul0I@4&F7{Q+6L>_dap#S4vVHOgr< z51spV%|PL?{YQF9qfu=* zB$~MgNEuc?#NB@xRy!#{VKq((3ajr@g2L)Ml%OF|3~@_XRsN87IJ+VOA*comIvuN` zQ$kd;BrSe`SnTm*GtwG@d?`LkKuCStwU#Jb8-^5bUStHCQTd!>nw5hJx| zbjo*_3suB;Bd&$oxX-|aRWJpczhKNu&%TX>DcF4bPpNw5GuV`k`JvC$Gt~@&a(5!0 zkym04)SrvE2!?d#l2xj6gPNYCo;5d7Foj?rDHQwgGw-8-nN zdbn0m=)4v_!__Y?foTCd-5K-Up-aWhIT34w@D^Y!TY&?CG^$;ws~xV~Z?Ac1eY*(w zjSWt#13pBw#L~ITsPE;G%KcQhn!1YAt(@NV&0;p*COV&{BTEgo~eR@3#Ad~;pkH3d9a;|1rBtP%J3Up^gKicE7!$+aOG0`ke2M*C0lL# z8`Nr{y<@L20?&z>N%p|qJIwP)DTVqXI1PH+sS^3BT&wSYNh3#hZ)Th;8L@Pr3nCds z`pdp)YUnu;=YDn5ncKP&H$e;>T71YKpjgGFv}aury5>Rmm43UF2$tr4R8hI=Pru$r zhg}M!>ae{UFtok{cMEPpYd}p0-JR{1eR~D6mKb{#g1Md=<{_<@Y_ff?M__S;jwgdJaTYh1ktYU{q&`?9vv-*TC30 zp*MatmgAjY?Tvq@8+Z&?OJ6nLuyuv!x9@Vm=-8(!VA7?weE6$u2&EAJ;S}QrF1iV? zq3ITmygcqULY0HYJ@1D>FGXX>qjynY4#sd9jOJS_{ZH5ErUw{E621tM2(G>bUGmfZ z0?A|NF}Wk!_?q0#=uf21GZcg5R$HVmY)4I!Q-*xr4O6y=ooTAv4#MmtM(R%cUzM(x zRYpnr5g_&O(C=WZF~p^j>Za*ds)X8>AP>QGwcN6}BBmZ)Y5H=!w9<>s`BJ>(DD|Ac z@D%*!R%4scG}1X-9b49V#n+8pGE9FFF?~;fcTg+IHFEf3SI)e^l)J%llspDwFu#u^ zPOLwr3fX+q_h26uLmqo^(u5H1hGa^_bI)ukx~eof^R9_P6fG$UzgkpT68>9JMNzn? z(iI)Q0qDi0<36WnZeyW?@cHh~GXiH#cr$yHC=Gv98vbBT_%B6?;YVDdXVP5L{}geam<|QC|X?MUR+ULvS4oCwArqr z^1dl03o8016qhb6NJ38!E9Ye&)TL2yw%y8 z$+@(wv}AFGsILV}Z*obosA_6zSl63UR904cHNART-?-8Z#|1r9R&`yKFPRepudjMm zdXlKx25(is#IVBaZ7^j;eT}ElOkWLkja93pZqiSAqcG0bHdL*o*K6q@Pq?dmEB!LZ z`4!%pb)08ExK}d2&ZCl^#u{b<(D){wM-u95-PK+&00!#tW-g$o0k7#w=eVgRpeqg7w#4fXt5@rcQtwlUZnndu-->75cD;YFN4(qfnSfH zkt>3Y_1;G6E20>$ZF_1Q0k5N}%IEhWaf7GPaY1hN1%t#?Tv*Un!WdlLPsaHpocnQp zhVxX=a{V)MZw$&)tiapUbX5AQR(fVQ{M8Lj4UTKPO#u|-&&$hO?zm=rkpnMY*Vk56 z2au~pxr84yXnawD#$WDmuBs0>(1RQ+>sP}N8E3q;hTx#_b&ljwMsI&R>O2iij{3%0 zuWwZq6(@52gXmpuQQ3GyqjycCAuP4cK;`@w(2F4=_rhvX;a;$CffK-4UcRuL;DR~s z;*wHAl{*(zmXtdeBUx6ya0y}zB}<(Z_@$^);YI|(Z^hMR#Ev*vIh$ye3rZFezT8<+ zSxyNA2`DaIOk5>%@FT+_(ui45A_HE9TShk{oT8vve{(JkN6Mf-v zoG;*f3+HZ}uj8D6b0*FboRv7MaIV6+4(BGE-@Eg z_|;@_1U&t)w{|(sC{72?YMf)xcEvdBaNdFQS2$n9*@g2|#D)nt%W!&eQqZTka`pUK zSdR&D9v&^Y80Y0U=i)5Gc{R={I59^En3GF}G}H<~PdYq+ryD*891FMz=Ng=EAWt(G z1^gOMt(=>3{f-$jW{YtSF}A)IKXUm12onB_u`BS`MsA>aX9peuyATiS`Ej=3Jc!dF z{7qHW9x+HXde-3219`+yJtGbjfvOewV^ymOq@9&<*&|pjR6@_>KO_=8NFUq>pJ21% zDBynNpKD7b3XmW0EPM<&39uQ^54aU@8(<4y2jF4Aq0d1N;C#T5*w<|aEC75Oa2eox zfX#sU&qE$?C14BSe*hi^`~)n+knl0=e-1Zz-qu2z@Gpf20Q}T130Q3^4QX?11tdC zcL4H$F~GHeXTJ=2Ko?*O;I3C759q_DLl2<94&O-JJ8T9l0Gx@v!DWCi0X75Pg+1o2 zfFrR<+yXce@GzhYum^B0;D~`jRAIeQ0N4Py3~(1 z{1|W~Ci{7aM+JaCMt@!gcnSJ(GvGsjTLGuUAP;yO;9lX=e=?khLXkKn~&Y zFo4L}n@C)wSmbZt{HPD}=kG}*w$VCzSWfY<5m#mpUX!&!Tyf@Q7fu>8niP=y9Gnk4 zlSrUhjDyNrhI13jyPU8^!*Xs*pFOnyIv9j3(VKDp9Q2C;4f@I?eJjupgI;K+-;|`c z0RIx`gb(hfg5i1A5O1LNncD-&WAqo`Bv0 z`rRj>9|nCp=#$O*y-E9fK;IAgVlzE`fnncB#OVa+_gd&bH0TAOuSfiU!b1P8L0<;? zX^8u!7W!O+-VFK`pck6y=K6!~J^{T2^jl6qKMeX$Krb-sUn7f``km${zzYhAhE%c~C-wOIX%%`mu z`cnqI1@unPqZayRgMJwFrI>$lCq8N4B1u;55Bh=>bZVcGnCIt!o^O^%+JSQ#Xrw!C z@y$``8)BFB2hZ7 z8^F^7od;{WYM|V}_~LIU7N5I|2PZ&|f(L zeJ|)oKrb+tZ;JCrLH`W&RO6%{*0b-OfIbHF*FnG8tUp~X7K7V`mL>%o;WV{IiB- z$#vwN&{GCI)YSSQpWX+&3wUY2CY*#PJ#f+kCp~b|1OHDvp#Ki6{=2U_CLiU}hUik| zq=$Vi&bMEt5_sxb9{TUg;=z4+U@Df!nGE&cnWeP=9h&~P;|VY2W3;}+fu*WEaHL|6fouexQGGqIlL&i?9F!*P*kt~Hg#(Sh>&^Aif6tr# zk_sLA?|94WIpp=<@6Kkr{=3~-oY#M^djRKm&X#IKf6njY{4mbzzso&@^ZM^`>+<#A z;U3KNKePN$&g;LsO|NCrq5s~t%3(kKN|n-oS6lxbZT)w%-(mXP*|TRj#?Hk;X|_zYtnwBq|2^Pd%;DfB#P#rGF_ zKC$8l2t5y3@mWI8Z&v(3q31y>9+M--X)AuPaP)~EB3f;6m*g_An$X{qvFaHn^!mbz z$CY8B8Kcq}Fh+l$#7xb=mkhSaO>Z-n$~{%+^@^F2Ax;x|onplg7k$l#8CY%TzoTyD z$L!t5ypkc#5FLHuM;PB*uej*lC(Zb6WPL5 zr0{zEHiq#_8L!t-w8umIb&MaT#KaQDuVuVm7t-Q|_&3}1Y-ar3HvXS7{sG48_0tZ< zZ)ZF&zr-tyf7Yhw7~>Dw_%qX#Juw^p9L9GuUfX{ulvTV zc)cF{2IHsM@IPjJp$-2##=97={r>{vm)h{}0^e7=ccsApGX)-+i+&_*);p>q_Y+*?QTLdIX~P?lB|z2IJ3~pzw0< z7|9P9{{zO~!R`Aw_!lW4vT%Xam;bYYr~c3~O)+YFPEWyqHuL{ty5iUIhjM-C z{}uuk>2KxhNImZ8nG5w%H=Ob(re6~l&=Z)l_^xo@}ddY7slXB}* z;D5;aqr7qO1dBe%_@y>KJjwV6dBcLkf!LWs|LZC6$5P;FP!H%TEr zW&8-n59SS@62^~`c+Qx>#1wcZ>)Fr!MAvJ63VwGA{LR3hf_ArmXXo}5{67Ys`iXr# z_CN}Lx(7@Cp}!?T#}x{QeT*L|Z(!4(_fW?{;K?50V+03|H&W>T5O@c(n!$0_zmPZ5 zs-^uxv&hX6CX7y1PQ0zD6$LX?0Vm*wGE-FvUCH=D#xLgaQposH z#+PwCT+aAq63-bEXkz}mzN(OgOk2zNI`_j%nEzXhKQ>w+^nCY2iDxDg zc$oRea{Mf0T0X{a4t5Kt+2X^K%>RHbKL3>cudh?|x@af!KQ%@vZe{(?Gd^dkLJZ<| ze~t0<7jEg`?jnvbzJvX&`&B09YwB-q+qj4^|48P)RH+jmNIYjuAOrKP?1x`xB-=9( zc+wx?epSfi;lPuB?D3PnOG*4S=PO3}JOGlpjGxT-eAaUjF|qB2A1Isk&3J7Et1(4cxA-x&YL)G+T;|3=@a`V zOwZ3xMVOX?Fkxy6!o(DXPoIRZANFPL^hwiG5GJQmH6mAO+dO;N_Bsa$etDv4>S z2-7E|C}VoxHJ_B9-*-Jgm?o<6isVX9Bi@+ws83iT)!(iU2i?_E{- z)~Ph!dGb+4zRBeSYts!Y=~sRv6N&mpdU;UDSG?TxpuIcb#TP5-1x36$hF7WZAx{|A z;Pu`RY;xntc)X!Car%TySp&H7ZkQCbVwAlBd`q*JR;9K9pY8NjHBeRX&Y64z&A)Dy zA11B30SWogzsgCP+g;y?s#W=XRqNdJ)}edO)eDN|m&``pO%(YZPoVH=U$@icE-o*c z?{vH7lneLV(uK2%O5F>Kix)fbdREb_QYSvislNVM?GMt2LhE%E)Efl!A<)7?{n=0Y z*k|%ZN##E`)z5hCkzS8ntUM>bb!wGGo#YFYXmWb@j=q&@WjN87L``pqR+cmdD(HPZ zG^D$#rpB$`?<))VD!gI|UZ9j@x2L8mP$ib2@z66GJgYqoR4%@(YAe?Ez1AdkvQabr zLZ>cLze+fZUevUF$~7r1{e{oA`mHGSRaYy{^gb*5wN}0iy0B?BzV)iU?P`^f--ETH z;1+smR(>zmO6m29S1Y-Q-#_Hf&svFYH+fNg!j;^I!D(ekzFW(tW4JX|*NHhLb4x1p zw`i>*w$H*A(W}Cw8=t_nGuHUM?z*bR8v2-Q$wHV>Q{U(g`aL!BonHM8ulfC7Q;()$ z!SBji_3HO>b#vm~8seqbB#Y@)V+u<$y6CUqTE*okkgo?9(<{7SmLs~!>(}E)zLBHD zoHSLwj$9%`4Sx^Vs&jD}z1FB+1Sb9HRV(~{_PhKnZ;)OhHC`cxyOOU^^Cx|+8uiP_ z&`IHff{5RZS6~rx=rh4qW<8uKn4#!e`RU)H(gl_C-TZxHE59;Sc2fEVvX!VlNo>Wr z-4*j^%lAm##_Q1hMPeHxeNVX0j|5Wy!FX4%Li2o3MB#8@E%hFJRoSYk79F=i6qV0i zlCPRoh6SF5WXu#L_OB#cHLzFYTkbwF4|9{(SL3IbvQ4k2mwRdvcF;5NHD;^0b8S;8 zUZ2N%05F~2smh~HE@7QckuG18$ZPZlJb6X4O2!AOR*JlpjlsOSDu12Gt6A4blYxW* zpQ2zs^iyAzY@Bg}=JPaE5hJIX8Ui9uj)6QR^HzG14S3e#SBCmLpH~LRJWrjw)`z#{ z-E}okqH{`A%{NLyr%^)Hs`_da?+uV<7)OCN5B*i-ReM*hqWSdy%0bIqy2QX`pFH$^ zLk)R3%~Zu}WRRe3JX{JG^7{Uwh7Qoohra&TINC187kj+AeEmF*hGUsg%jtElmZ$9i z+9T8Q`gt7t)bhOjC1wLpEQUO;XX()QH}&&A3fsH>^FW|yYqY$+|EXai z3u^l{pN6!*P0#A+yuMGWp}wxQ+s|snN+d|Pme==FHQdGoUB12kba_s+Svcv?_gyvA z&npoRZBf|eZvu|`54(*n;?=>BFP|By$}%!Ytj1}Ucd(p>+lXj7Z0g0Wpqb@cPf^So zYJ0T`o!9UVo4np{(J;aLL|R^#uk%~%^3)-5==;b6Ro3{`?Vu6gM}q9v?I#ME!tfYw z4VVvyIfFFqOPJ;L{b&t;XDC*J_VRyalh^mHHH_KH*M3vlgm?l8x>w7?%(%ZD;|)4H zzs_s82eNcctL63mm#!}<{~~0m?`e5`pQfATJIFaWX#CKj%hm0;2{>ZW^7_8|Avba3 z{YnLlUoEfUYoM6rjr;G{DUl8Zj9)FMA<@n9dSAHXIwdmJ)?f~WHh$kligaoF^>@y* zJc>ofIW6GO35K13RDQk0`P4eBLakd8PuVuD7;}#)r0D3i8j= dH7apL3ou@nt9fa-_a%Sn2F3A-W@KoW|8I{d0&D;P literal 0 HcmV?d00001 diff --git a/cerise/sclpl.h b/cerise/sclpl.h new file mode 100644 index 0000000..2a0060b --- /dev/null +++ b/cerise/sclpl.h @@ -0,0 +1,313 @@ +#define _XOPEN_SOURCE 700 +#include +#include +#include +#include +#include +#include +#include +#include +#include + +void fatal(char* estr); +void* emalloc(size_t size); + +/* Token Types + *****************************************************************************/ +typedef enum { + T_NONE = 0, + T_STRING = 256, T_ID, T_INT, T_BOOL, T_CHAR, T_FLOAT, + T_REQUIRES, T_PROVIDES, T_LET, T_VAR, T_FUN, T_TYPE, T_STRUCT, + T_UNION, T_RETURN, T_IF, T_ELSE, + T_COUNT, + T_ERROR = -2, + T_END_FILE = -1 +} TokType; + +typedef struct { + const char* file; + long offset; + TokType type; + char* text; + union { + long long integer; + double floating; + } value; +} Tok; + +/* Datatype Types + *****************************************************************************/ +typedef enum { + VOID, INT, UINT, FLOAT, ARRAY, REF, PTR, FUNC +} Kind; + +typedef struct Type { + Kind kind; + union { + struct Type* type; + size_t bits; + struct { + struct Type* type; + size_t count; + } array; + } value; +} Type; + +Type* VoidType(void); +Type* IntType(size_t nbits); +Type* UIntType(size_t nbits); +Type* FloatType(size_t nbits); +Type* ArrayOf(Type* type, size_t count); +Type* RefTo(Type* type); +Type* PtrTo(Type* type); +bool types_equal(Type* type1, Type* type2); + +/* Symbol Table + *****************************************************************************/ +typedef enum { + SF_TYPEDEF = (1 << 0), + SF_CONSTANT = (1 << 1), + SF_ARGUMENT = (1 << 2), +} SymFlags; + +typedef struct Sym { + struct Sym* next; + bool is_typedef; + int flags; + char* name; + Type* type; +} Sym; + +typedef struct { + Sym* syms; +} SymTable; + +void sym_add(SymTable* syms, int flags, char* name, Type* type); +Sym* sym_get(SymTable* syms, char* name); + +/* AST Types + *****************************************************************************/ +typedef enum { + AST_VAR, AST_FUNC, AST_EXPLIST, AST_IF, AST_APPLY, + AST_STRING, AST_SYMBOL, AST_CHAR, AST_INT, + AST_FLOAT, AST_BOOL, AST_IDENT, AST_OPER +} ASTType; + +typedef struct AST { + ASTType nodetype; + Type* datatype; + union { + struct AST* nodes[3]; + struct { + int oper; + struct AST* left; + struct AST* right; + } op; + /* Definition Node */ + struct { + char* name; + int flags; + struct AST* value; + } var; + /* Expression Block Node */ + struct { + size_t nexprs; + struct AST** exprs; + } explist; + /* String, Symbol, Identifier */ + char* text; + /* Integer */ + intptr_t integer; + /* Float */ + double floating; + } value; +} AST; + +/* String */ +AST* String(char* val); +char* string_value(AST* val); + +/* Character */ +AST* Char(int val); +uint32_t char_value(AST* val); + +/* Integer */ +AST* Integer(int val); +intptr_t integer_value(AST* val); + +/* Float */ +AST* Float(double val); +double float_value(AST* val); + +/* Bool */ +AST* Bool(bool val); +bool bool_value(AST* val); + +/* Ident */ +AST* Ident(char* val); +char* ident_value(AST* val); + +/* Definition */ +AST* Var(char* name, AST* value, AST* type, int flags); +char* var_name(AST* var); +AST* var_value(AST* var); +bool var_flagset(AST* var, int mask); + +AST* Func(AST* args, AST* body, AST* type); +AST* func_args(AST* func); +AST* func_body(AST* func); + +AST* ExpList(void); +AST** explist_get(AST* explist, size_t* nexprs); +void explist_append(AST* explist, AST* expr); +void explist_prepend(AST* explist, AST* expr); + +AST* If(AST* cond, AST* b1, AST* b2); +AST* if_cond(AST* ifexp); +AST* if_then(AST* ifexp); +AST* if_else(AST* ifexp); + +AST* Apply(AST* func, AST* args); +AST* apply_func(AST* apply); +AST* apply_args(AST* apply); + +AST* OpCall(int oper, AST* left, AST* right); + + +/* Package Definition + *****************************************************************************/ +typedef struct Require { + struct Require* next; + char* path; + char* alias; +} Require; + +typedef struct Provide { + struct Provide* next; + char* name; +} Provide; + +typedef struct Definition { + struct Provide* next; + AST* ast; +} Definition; + +typedef struct { + char* name; + SymTable* syms; + Require* requires; + Provide* provides; + Definition* definitions; +} Package; + +void pkg_add_require(Package* p, char* req); +void pkg_add_provide(Package* p, char* exp); +void pkg_add_definition(Package* p, AST* ast); + +/* Pretty Printing + *****************************************************************************/ +void pprint_token(FILE* file, Tok* token, bool print_loc); +void pprint_tree(FILE* file, AST* tree, int depth); + +/* Lexer and Parser Types + *****************************************************************************/ +typedef struct LexFile { + struct LexFile* next; + char* path; + char* fbeg; + char* fpos; +} LexFile; + +typedef struct { + LexFile* done; + LexFile* file; + Tok tok; + SymTable syms; + Package pkg; +} Parser; + +void lexfile(Parser* ctx, char* path); +void lex(Parser* ctx); +void lexprintpos(Parser* p, FILE* file, Tok* tok); +void gettoken(Parser* ctx); +void toplevel(Parser* p); +void codegen_init(Parser* p); + +/* Option Parsing + *****************************************************************************/ + +/* This variable contains the value of argv[0] so that it can be referenced + * again once the option parsing is done. This variable must be defined by the + * program. + * + * NOTE: Ensure that you define this variable with external linkage (i.e. not + * static) + */ +extern char* ARGV0; + +/* This is a helper function used by the macros in this file to parse the next + * option from the command line. + */ +static inline char* __getopt(int* p_argc, char*** p_argv) { + if (!(*p_argv)[0][1] && !(*p_argv)[1]) { + return (char*)0; + } else if ((*p_argv)[0][1]) { + return &(*p_argv)[0][1]; + } else { + *p_argv = *p_argv + 1; + *p_argc = *p_argc - 1; + return (*p_argv)[0]; + } +} + +/* This macro is almost identical to the ARGBEGIN macro from suckless.org. If + * it ain't broke, don't fix it. */ +#define OPTBEGIN \ + for ( \ + ARGV0 = *argv, argc--, argv++; \ + argv[0] && argv[0][1] && argv[0][0] == '-'; \ + argc--, argv++ \ + ) { \ + int brk_; char argc_ , **argv_, *optarg_; \ + if (argv[0][1] == '-' && !argv[0][2]) { \ + argv++, argc--; break; \ + } \ + for (brk_=0, argv[0]++, argv_=argv; argv[0][0] && !brk_; argv[0]++) { \ + if (argv_ != argv) break; \ + argc_ = argv[0][0]; \ + switch (argc_) + +/* Terminate the option parsing. */ +#define OPTEND }} + +/* Get the current option chracter */ +#define OPTC() (argc_) + +/* Get an argument from the command line and return it as a string. If no + * argument is available, this macro returns NULL */ +#define OPTARG() \ + (optarg_ = __getopt(&argc,&argv), brk_ = (optarg_!=0), optarg_) + +/* Get an argument from the command line and return it as a string. If no + * argument is available, this macro executes the provided code. If that code + * returns, then abort is called. */ +#define EOPTARG(code) \ + (optarg_ = __getopt(&argc,&argv), \ + (!optarg_ ? ((code), abort(), (char*)0) : (brk_ = 1, optarg_))) + +/* Helper macro to recognize number options */ +#define OPTNUM \ + case '0': \ + case '1': \ + case '2': \ + case '3': \ + case '4': \ + case '5': \ + case '6': \ + case '7': \ + case '8': \ + case '9' + +/* Helper macro to recognize "long" options ala GNU style. */ +#define OPTLONG \ + case '-' diff --git a/cerise/syms.c b/cerise/syms.c new file mode 100644 index 0000000..6a8f0e2 --- /dev/null +++ b/cerise/syms.c @@ -0,0 +1,22 @@ +#include + +static Sym* mksym(int flags, char* name, Type* type, Sym* next) { + Sym* sym = emalloc(sizeof(Sym)); + sym->flags = flags; + sym->name = name; + sym->type = type; + sym->next = next; + return sym; +} + +void sym_add(SymTable* syms, int flags, char* name, Type* type) { + syms->syms = mksym(flags, name, type, syms->syms); +} + +Sym* sym_get(SymTable* syms, char* name) { + Sym* sym = syms->syms; + for (; sym; sym = sym->next) + if (!strcmp(sym->name, name)) + return sym; + return NULL; +} diff --git a/cerise/types.c b/cerise/types.c new file mode 100644 index 0000000..cfaae4c --- /dev/null +++ b/cerise/types.c @@ -0,0 +1,59 @@ +#include + +static Type* mktype(Kind kind) { + Type* type = emalloc(sizeof(Type)); + memset(type, 0, sizeof(Type)); + type->kind = kind; + return type; +} + +Type* VoidType(void) { + return mktype(VOID); +} + +Type* IntType(size_t nbits) { + Type* type = mktype(INT); + type->value.bits = nbits; + return type; +} + +Type* UIntType(size_t nbits) { + Type* type = mktype(UINT); + type->value.bits = nbits; + return type; +} + +Type* FloatType(size_t nbits) { + Type* type = mktype(FLOAT); + type->value.bits = nbits; + return type; +} + +Type* ArrayOf(Type* elemtype, size_t count) { + Type* type = mktype(ARRAY); + type->value.array.type = elemtype; + type->value.array.count = count; + return type; +} + +Type* RefTo(Type* type) { + (void)type; + return NULL; +} + +Type* PtrTo(Type* type) { + (void)type; + return NULL; +} + +bool types_equal(Type* type1, Type* type2) { + if (type1->kind != type2->kind) return false; + switch (type1->kind) { + case ARRAY: + return (types_equal(type1->value.array.type, type2->value.array.type) && + (type1->value.array.count == type2->value.array.count)); + default: + return true; + } +} + diff --git a/cerise/value b/cerise/value new file mode 100644 index 0000000..e69de29 -- 2.49.0