From: Michael D. Lowis Date: Thu, 15 Apr 2021 17:22:18 +0000 (-0400) Subject: plopped in code for a simple compiler based on sclpl. Will start reworking it into... X-Git-Url: https://git.mdlowis.com/?a=commitdiff_plain;h=3d87b4276d9f31d93ff0d0b743394e594cf390f0;p=proto%2Fobnc.git plopped in code for a simple compiler based on sclpl. Will start reworking it into an Oberon/Cerise compiler --- diff --git a/cerise/ast.c b/cerise/ast.c new file mode 100644 index 0000000..a3fd9d4 --- /dev/null +++ b/cerise/ast.c @@ -0,0 +1,193 @@ +#include + +static AST* ast(ASTType type) { + AST* tree = emalloc(sizeof(AST)); + memset(tree, 0, sizeof(AST)); + tree->nodetype = type; + return tree; +} + +AST* String(char* val) { + AST* node = ast(AST_STRING); + node->value.text = val; + return node; +} + +char* string_value(AST* val) { + assert(val->nodetype == AST_STRING); + return val->value.text; +} + +AST* Char(int val) { + AST* node = ast(AST_CHAR); + node->value.integer = val; + return node; +} + +uint32_t char_value(AST* val) { + assert(val->nodetype == AST_CHAR); + return val->value.integer; +} + +AST* Integer(int val) { + AST* node = ast(AST_INT); + node->value.integer = val; + return node; +} + +intptr_t integer_value(AST* val) { + assert(val->nodetype == AST_INT); + return val->value.integer; +} + +AST* Float(double val) { + AST* node = ast(AST_FLOAT); + node->value.floating = val; + return node; +} + +double float_value(AST* val) { + assert(val->nodetype == AST_FLOAT); + return val->value.floating; +} + +AST* Bool(bool val) { + AST* node = ast(AST_BOOL); + node->value.integer = val; + return node; +} + +bool bool_value(AST* val) { + assert(val->nodetype == AST_BOOL); + return val->value.integer; +} + +AST* Ident(char* val) { + AST* node = ast(AST_IDENT); + node->value.text = strdup(val); + return node; +} + +char* ident_value(AST* val) { + assert(val->nodetype == AST_IDENT); + return val->value.text; +} + +AST* Var(char* name, AST* value, AST* type, int flags) { + (void)type; + AST* node = ast(AST_VAR); + node->value.var.name = name; + node->value.var.value = value; + node->value.var.flags = flags; + return node; +} + +char* var_name(AST* var) { + assert(var->nodetype == AST_VAR); + return var->value.var.name; +} + +AST* var_value(AST* var) { + assert(var->nodetype == AST_VAR); + return var->value.var.value; +} + +bool var_flagset(AST* var, int mask) { + assert(var->nodetype == AST_VAR); + return ((var->value.var.flags & mask) == mask); +} + +AST* Func(AST* args, AST* body, AST* type) +{ + (void)type; + AST* node = ast(AST_FUNC); + node->value.nodes[0] = args; + node->value.nodes[1] = body; + return node; +} + +AST* func_args(AST* func) { + assert(func->nodetype == AST_FUNC); + return func->value.nodes[0]; +} + +AST* func_body(AST* func) { + assert(func->nodetype == AST_FUNC); + return func->value.nodes[1]; +} + +AST* ExpList(void) { + AST* node = ast(AST_EXPLIST); + node->value.explist.nexprs = 0; + node->value.explist.exprs = 0; + return node; +} + +AST** explist_get(AST* explist, size_t* nexprs) { + assert(explist->nodetype == AST_EXPLIST); + *nexprs = explist->value.explist.nexprs; + return explist->value.explist.exprs; +} + +void explist_append(AST* explist, AST* expr) { + assert(explist->nodetype == AST_EXPLIST); + explist->value.explist.nexprs += 1; + explist->value.explist.exprs = realloc(explist->value.explist.exprs, explist->value.explist.nexprs * sizeof(AST*)); + explist->value.explist.exprs[explist->value.explist.nexprs-1] = expr; +} + +void explist_prepend(AST* explist, AST* expr) { + assert(explist->nodetype == AST_EXPLIST); + explist->value.explist.nexprs++; + explist->value.explist.exprs = realloc(explist->value.explist.exprs, explist->value.explist.nexprs * sizeof(AST*)); + memmove(explist->value.explist.exprs+1, explist->value.explist.exprs, explist->value.explist.nexprs-1); + explist->value.explist.exprs[0] = expr; +} + +AST* If(AST* cond, AST* b1, AST* b2) { + AST* node = ast(AST_IF); + node->value.nodes[0] = cond; + node->value.nodes[1] = b1; + node->value.nodes[2] = b2; + return node; +} + +AST* if_cond(AST* ifexp) { + assert(ifexp->nodetype == AST_IF); + return ifexp->value.nodes[0]; +} + +AST* if_then(AST* ifexp) { + assert(ifexp->nodetype == AST_IF); + return ifexp->value.nodes[1]; +} + +AST* if_else(AST* ifexp) { + assert(ifexp->nodetype == AST_IF); + return ifexp->value.nodes[2]; +} + +AST* Apply(AST* func, AST* args) { + AST* node = ast(AST_APPLY); + node->value.nodes[0] = func; + node->value.nodes[1] = args; + return node; +} + +AST* apply_func(AST* apply) { + assert(apply->nodetype == AST_APPLY); + return apply->value.nodes[0]; +} + +AST* apply_args(AST* apply) { + assert(apply->nodetype == AST_APPLY); + return apply->value.nodes[1]; +} + +AST* OpCall(int oper, AST* left, AST* right) { + AST* node = ast(AST_OPER); + node->value.op.oper = oper; + node->value.op.left = left; + node->value.op.right = right; + return node; +} diff --git a/cerise/atf.h b/cerise/atf.h new file mode 100644 index 0000000..40088ed --- /dev/null +++ b/cerise/atf.h @@ -0,0 +1,160 @@ +/** + Aardvark Test Framework - A minimalistic unit testing framework for C. + + Copyright 2014 Michael D. Lowis + + Permission to use, copy, modify, and/or distribute this software + for any purpose with or without fee is hereby granted, provided + that the above copyright notice and this permission notice appear + in all copies. + + THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL + WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE + AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL + DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA + OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER + TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR + PERFORMANCE OF THIS SOFTWARE. +*/ +#ifndef ATF_H +#define ATF_H + +#include +#include +#include +#include + +extern char* Curr_Test; +void atf_init(int argc, char** argv); +void atf_test_start(char* file, unsigned int line, char* name); +bool atf_test_assert(bool success, char* expr_str, char* file, int line); +void atf_test_fail(char* expr, char* file, int line); +int atf_print_results(void); + +#define IGNORE(msg) \ + printf("%s:%d:%s:IGNORE:\n\t%s\n", __FILE__, __LINE__, Curr_Test, msg); break + +#define CHECK(expr) \ + if(atf_test_assert((expr), #expr, __FILE__, __LINE__)) break + +#define CHECK_EXITCODE(code) \ + CHECK(ExitCode == code) + +#define TEST_SUITE(name) \ + void name(void) + +#define TEST(desc) \ + for(atf_test_start(__FILE__,__LINE__,#desc); Curr_Test != NULL; Curr_Test = NULL) + +#define RUN_TEST_SUITE(name) \ + name(); + +#define RUN_EXTERN_TEST_SUITE(name) \ + do { extern TEST_SUITE(name); RUN_TEST_SUITE(name); } while(0) + +#define EXPECT_EXIT \ + if ((ExitExpected = true, 0 == setjmp(ExitPad))) + +/* Function Definitions + *****************************************************************************/ +#ifdef INCLUDE_DEFS +#include +#include +#ifndef NO_SIGNALS +#include +#endif + +char* Curr_Test = NULL; +char* Curr_File = NULL; +unsigned int Curr_Line = 0; +static unsigned int Total = 0; +static unsigned int Failed = 0; +bool ExitExpected; +int ExitCode; +jmp_buf ExitPad; + +#ifndef NO_SIGNALS +static void handle_signal(int sig) { + /* Determine the signal name */ + char* sig_name = NULL; + switch(sig) { + case SIGABRT: sig_name = "SIGABRT"; break; + case SIGBUS: sig_name = "SIGBUS"; break; + case SIGFPE: sig_name = "SIGFPE"; break; + case SIGILL: sig_name = "SIGILL"; break; + case SIGSEGV: sig_name = "SIGSEGV"; break; + case SIGSYS: sig_name = "SIGSYS"; break; + /* If we don't recognize it then just return and let the default handler + catch it. */ + default: return; + } + /* Error and exit. No summary will be printed but the user will know which + test has crashed. */ + fprintf(stderr,"%s:%d:0:%s:CRASH (signal: %d - %s)\n", Curr_File, Curr_Line, Curr_Test, sig, sig_name); + Failed++; + (void)atf_print_results(); + _Exit(1); +} +#endif + +void atf_init(int argc, char** argv) { + /* I reserve the right to use these later */ + (void)argc; + (void)argv; + +#ifndef NO_SIGNALS + /* Init signal handler */ + signal(SIGABRT, handle_signal); + signal(SIGBUS, handle_signal); + signal(SIGFPE, handle_signal); + signal(SIGILL, handle_signal); + signal(SIGSEGV, handle_signal); + signal(SIGSYS, handle_signal); +#endif +} + +void atf_test_start(char* file, unsigned int line, char* name) { + Curr_File = file; + Curr_Line = line; + Curr_Test = name; + Total++; +} + +bool atf_test_assert(bool success, char* expr, char* file, int line) { + bool failed = !success; + if (failed) atf_test_fail(expr,file,line); + return failed; +} + +void atf_test_fail(char* expr, char* file, int line) { + Failed++; + printf("%s:%d:0:%s:FAIL:( %s )\n", file, line, Curr_Test, expr); \ +} + +int atf_print_results(void) { + static const char* results_string = + "\nUnit Test Summary" + "\n-----------------" + "\nTotal: %d" + "\nPassed: %d" + "\nFailed: %d" + "\n\n"; + printf(results_string, Total, Total - Failed, Failed); + return Failed; +} + +void exit(int code) { + if (ExitExpected) { + ExitCode = code; + ExitExpected = false; + longjmp(ExitPad, 1); + } else { + assert(!"Unexpected exit. Something went wrong"); + } +} + +#undef INCLUDE_DEFS +#endif + +#endif /* ATF_H */ \ No newline at end of file diff --git a/cerise/build.sh b/cerise/build.sh new file mode 100755 index 0000000..c5d2f06 --- /dev/null +++ b/cerise/build.sh @@ -0,0 +1,3 @@ +#!/bin/sh +ctags -R & +cc -Wall -Wextra -Werror --std=c99 -o sclpl -I. *.c diff --git a/cerise/codegen.c b/cerise/codegen.c new file mode 100644 index 0000000..a625f09 --- /dev/null +++ b/cerise/codegen.c @@ -0,0 +1,12 @@ +#include + +void codegen_init(Parser* p) { + sym_add(&(p->syms), SF_TYPEDEF, "void", VoidType()); + sym_add(&(p->syms), SF_TYPEDEF, "bool", UIntType(1u)); + sym_add(&(p->syms), SF_TYPEDEF, "byte", UIntType(8u)); + sym_add(&(p->syms), SF_TYPEDEF, "int", IntType(64u)); + sym_add(&(p->syms), SF_TYPEDEF, "uint", UIntType(64u)); + sym_add(&(p->syms), SF_TYPEDEF, "float", FloatType(32u)); + sym_add(&(p->syms), SF_TYPEDEF, "string", + ArrayOf(sym_get(&(p->syms), "byte")->type, -1)); +} diff --git a/cerise/emalloc.c b/cerise/emalloc.c new file mode 100644 index 0000000..751a40f --- /dev/null +++ b/cerise/emalloc.c @@ -0,0 +1,11 @@ +#include +void fatal(char* estr) { + perror(estr); + exit(1); +} + +void* emalloc(size_t size) { + void* ptr = malloc(size); + if (!ptr) fatal("malloc()"); + return ptr; +} diff --git a/cerise/lex.c b/cerise/lex.c new file mode 100644 index 0000000..648295e --- /dev/null +++ b/cerise/lex.c @@ -0,0 +1,250 @@ +#include +#include +#include +#include + +typedef struct { + char* keyword; + int type; +} KeywordDef; + +static const char FirstChar[256] = { + /* Whitespace */ + [' '] = 1, ['\t'] = 1, ['\r'] = 1, ['\n'] = 1, + /* comment start */ + ['#'] = 2, + /* number or op */ + ['+'] = 3, ['-'] = 3, + /* number digits */ + ['0'] = 4, ['1'] = 4, ['2'] = 4, ['3'] = 4, ['4'] = 4, + ['5'] = 4, ['6'] = 4, ['7'] = 4, ['8'] = 4, ['9'] = 4, + /* alpha characters */ + ['A'] = 5, ['B'] = 5, ['C'] = 5, ['D'] = 5, ['E'] = 5, + ['F'] = 5, ['G'] = 5, ['H'] = 5, ['I'] = 5, ['J'] = 5, + ['K'] = 5, ['L'] = 5, ['M'] = 5, ['N'] = 5, ['O'] = 5, + ['P'] = 5, ['Q'] = 5, ['R'] = 5, ['S'] = 5, ['T'] = 5, + ['U'] = 5, ['V'] = 5, ['W'] = 5, ['X'] = 5, ['Y'] = 5, + ['Z'] = 5, ['a'] = 5, ['b'] = 5, ['c'] = 5, ['d'] = 5, + ['e'] = 5, ['f'] = 5, ['g'] = 5, ['h'] = 5, ['i'] = 5, + ['j'] = 5, ['k'] = 5, ['l'] = 5, ['m'] = 5, ['n'] = 5, + ['o'] = 5, ['p'] = 5, ['q'] = 5, ['r'] = 5, ['s'] = 5, + ['t'] = 5, ['u'] = 5, ['v'] = 5, ['w'] = 5, ['x'] = 5, + ['y'] = 5, ['z'] = 5, + /* punctuation */ + ['('] = 6, [')'] = 6, ['['] = 6, [']'] = 6, ['{'] = 6, ['}'] = 6, + ['.'] = 6, [','] = 6, [':'] = 6, ['&'] = 6, ['='] = 6, [';'] = 6, + ['*'] = 6, ['\''] = 6, + /* strings */ + ['"'] = 7 +}; + +char SPACE[256] = { + [' '] = 1, ['\t'] = 1, ['\r'] = 1, ['\n'] = 1, +}; + +char DIGIT[256] = { + ['0'] = 1, ['1'] = 1, ['2'] = 1, ['3'] = 1, ['4'] = 1, + ['5'] = 1, ['6'] = 1, ['7'] = 1, ['8'] = 1, ['9'] = 1, +}; + +char ALNUM_[256] = { + ['0'] = 1, ['1'] = 1, ['2'] = 1, ['3'] = 1, ['4'] = 1, + ['5'] = 1, ['6'] = 1, ['7'] = 1, ['8'] = 1, ['9'] = 1, + ['A'] = 1, ['B'] = 1, ['C'] = 1, ['D'] = 1, ['E'] = 1, + ['F'] = 1, ['G'] = 1, ['H'] = 1, ['I'] = 1, ['J'] = 1, + ['K'] = 1, ['L'] = 1, ['M'] = 1, ['N'] = 1, ['O'] = 1, + ['P'] = 1, ['Q'] = 1, ['R'] = 1, ['S'] = 1, ['T'] = 1, + ['U'] = 1, ['V'] = 1, ['W'] = 1, ['X'] = 1, ['Y'] = 1, + ['Z'] = 1, ['a'] = 1, ['b'] = 1, ['c'] = 1, ['d'] = 1, + ['e'] = 1, ['f'] = 1, ['g'] = 1, ['h'] = 1, ['i'] = 1, + ['j'] = 1, ['k'] = 1, ['l'] = 1, ['m'] = 1, ['n'] = 1, + ['o'] = 1, ['p'] = 1, ['q'] = 1, ['r'] = 1, ['s'] = 1, + ['t'] = 1, ['u'] = 1, ['v'] = 1, ['w'] = 1, ['x'] = 1, + ['y'] = 1, ['z'] = 1, ['_'] = 1, +}; + +#define NUM_KEYWORDS (sizeof(Keywords) / sizeof(Keywords[0])) +KeywordDef Keywords[] = { + { "else", T_ELSE }, + { "false", T_BOOL }, + { "fun", T_FUN }, + { "if", T_IF }, + { "let", T_LET }, + { "provide", T_PROVIDES }, + { "require", T_REQUIRES }, + { "return", T_RETURN }, + { "struct", T_STRUCT }, + { "true", T_BOOL }, + { "type", T_TYPE }, + { "union", T_UNION }, + { "var", T_VAR }, +}; + +static int keywcmp(const void* a, const void* b) { + return strcmp(((KeywordDef*)a)->keyword, ((KeywordDef*)b)->keyword); +} + +static inline char* file_load(char* path) { + int fd = -1, nread = 0, length = 0; + struct stat sb = {0}; + char* contents = NULL; + if (((fd = open(path, O_RDONLY, 0)) >= 0) && (fstat(fd, &sb) >= 0) && (sb.st_size > 0)) { + contents = calloc(sb.st_size + 1u, 1u); + while (sb.st_size && (nread = read(fd, contents+length, sb.st_size)) > 0) + length += nread, sb.st_size -= nread; + } + if (fd > 0) close(fd); + return contents; +} + +static inline void convert_value(Tok* tok) { + switch (tok->type) { + case T_STRING: { + size_t len = strlen(tok->text+1); + char* strtext = malloc(len); + strncpy(strtext, tok->text+1, len); + strtext[len-1] = '\0'; + free(tok->text), tok->text = strtext; + break; + } + + case T_INT: { + tok->value.integer = strtol(tok->text, NULL, 0); + break; + } + + case T_ID: { + KeywordDef key = { .keyword = tok->text }; + KeywordDef* match = bsearch( + &key, Keywords, NUM_KEYWORDS, sizeof(KeywordDef), keywcmp); + if (match) { + tok->type = match->type; + if (tok->type != T_ID) + convert_value(tok); /* recurse to ensure correct conversion */ + } + break; + } + + case T_BOOL: { + tok->value.integer = (tok->text[0] == 't'); + break; + } + + default: + break; + } +} + +static inline void readtok(Parser* ctx) { + Tok* tok = &(ctx->tok); + char *beg = ctx->file->fpos, *curr = ctx->file->fpos; + tok->offset = (beg - ctx->file->fbeg); + switch (FirstChar[(int)*curr++]) { + case 1: /* skip whitespace */ + for (; SPACE[(int)*curr]; curr++); + break; + + case 2: /* skip comments */ + for (; *curr != '\n'; curr++); + break; + + case 3: /* +/- as ops or number signs */ + tok->type = *(curr-1); + if (!DIGIT[(int)*curr]) break; + /* parse it as an int */ + tok->type = T_INT; + for (; DIGIT[(int)*curr]; curr++); + break; + + case 4: + tok->type = T_INT; + for (; DIGIT[(int)*curr]; curr++); + break; + + case 5: + tok->type = T_ID; + for (; ALNUM_[(int)*curr]; curr++); + break; + + case 6: /* single char tokens */ + tok->type = *(curr-1); + break; + + case 7: /* string parsing */ + tok->type = T_STRING; + for (; *curr != '"'; curr++); + curr++; + break; + + case 0: /* error handling */ + default: + fprintf(stderr, "Failed to parse token '%c'\n", *(curr-1)); + exit(1); + } + + if (tok->type) { + size_t sz = (curr - beg); + tok->text = malloc(sz+1); + tok->text[sz] = '\0'; + strncpy(tok->text, beg, sz); + convert_value(tok); + } + + ctx->file->fpos = curr; +} + +void lexfile(Parser* ctx, char* path) { + LexFile* file = calloc(sizeof(LexFile), 1u); + file->path = strdup(path); + file->fbeg = file->fpos = file_load(path); + file->next = ctx->file; + ctx->file = file; +} + +void lex(Parser* ctx) { + ctx->tok.file = ctx->file->path; + ctx->tok.type = T_NONE; + while (ctx->tok.type == T_NONE) { + if (!ctx->file) { + /* no more files left to process */ + ctx->tok.type = T_END_FILE; + return; + } else if (!(ctx->file->fpos) || !*(ctx->file->fpos)) { + /* grab the next file to process */ + LexFile* f = ctx->file; + ctx->file = f->next; + f->next = ctx->done; + ctx->done = f; + } else { + /* parse out a token */ + readtok(ctx); + } + } +} + +static LexFile* get_file(Parser* p, char const* path) { + LexFile* lf = p->file; + while (lf && strcmp(lf->path, path)) + lf = lf->next; + if (!lf) { + lf = p->done; + while (lf && strcmp(lf->path, path)) + lf = lf->next; + } + return lf; +} + +void lexprintpos(Parser* p, FILE* file, Tok* tok) { + size_t line = 1, col = 1; + char* data = get_file(p, tok->file)->fbeg; + char* end = data + tok->offset; + for (; *data && data < end; data++) { + if (*data == '\n') { + line++; + col = 1; + } else { + col++; + } + } + fprintf(file, "%s:%zu:%zu:", tok->file, line, col); +} diff --git a/cerise/main.c b/cerise/main.c new file mode 100644 index 0000000..d1beeeb --- /dev/null +++ b/cerise/main.c @@ -0,0 +1,49 @@ +#include + +char* ARGV0; +char* Artifact = "bin"; + +/* Driver Modes + *****************************************************************************/ +static int emit_binary(Parser* ctx, int argc, char **argv) { + (void)ctx, (void)argc, (void)argv; + return 0; +} + +static int emit_library(Parser* ctx, int argc, char **argv) { + (void)ctx, (void)argc, (void)argv; + return 0; +} + +/* Main Routine and Usage + *****************************************************************************/ +void usage(void) { + fprintf(stderr, "%s\n", + "Usage: sclpl [options...] [-A artifact] [file...]\n" + "\n-A Emit the given type of artifact" + "\n-h Print help information" + ); + exit(1); +} + +int main(int argc, char **argv) { + /* Option parsing */ + OPTBEGIN { + case 'A': Artifact = EOPTARG(usage()); break; + default: usage(); + } OPTEND; + /* initialize the parser */ + Parser ctx = {0}; + for (; argc; argc--,argv++) + lexfile(&ctx, *argv); + /* Execute the main compiler process */ + if (0 == strcmp("bin", Artifact)) { + return emit_binary(&ctx, argc, argv); + } else if (0 == strcmp("lib", Artifact)) { + return emit_library(&ctx, argc, argv); + } else { + fprintf(stderr, "Unknown artifact type: '%s'\n\n", Artifact); + usage(); + } + return 1; +} diff --git a/cerise/parser.c b/cerise/parser.c new file mode 100644 index 0000000..3ed1869 --- /dev/null +++ b/cerise/parser.c @@ -0,0 +1,117 @@ +#include +#include + +//#define TRACE +#ifdef TRACE +static int Indent = 0; +#define parse_enter() \ + (printf("%*c-> %s\n", ++Indent * 2, ' ', __func__)) +#define parse_exit() \ + (printf("%*c<- %s\n", --Indent * 2, ' ', __func__)) +#else +#define parse_enter() +#define parse_exit() +#endif + +/* Precedence Table + *****************************************************************************/ +//enum { /* Define precedence levels(based on C) */ +// LVL_NONE, +// LVL_LITERAL, +// LVL_COMMA, +// LVL_ASSIGN, +// LVL_TERNARY, +// LVL_BOOL_OR, +// LVL_BOOL_AND, +// LVL_BITWISE_OR, +// LVL_BITWISE_XOR, +// LVL_BITWISE_AND, +// LVL_EQUALITY, +// LVL_RELATIONAL, +// LVL_BITSHIFT, +// LVL_ADD_SUB, +// LVL_MUL_DIV, +// LVL_PREFIX, +// LVL_POSTFIX, +//}; +// +//typedef struct { +// int level; +// AST* (*prefixfn)(Parser* p); +// AST* (*infixfn)(Parser* p, AST* left); +//} OpRule; +// +//OpRule PrecedenceTable[T_COUNT] = { +// [T_BOOL] = { .level = LVL_LITERAL, .prefixfn = literal, .infixfn = NULL }, +// [T_CHAR] = { .level = LVL_LITERAL, .prefixfn = literal, .infixfn = NULL }, +// [T_STRING] = { .level = LVL_LITERAL, .prefixfn = literal, .infixfn = NULL }, +// [T_INT] = { .level = LVL_LITERAL, .prefixfn = literal, .infixfn = NULL }, +// [T_FLOAT] = { .level = LVL_LITERAL, .prefixfn = literal, .infixfn = NULL }, +// [T_ID] = { .level = LVL_LITERAL, .prefixfn = literal, .infixfn = NULL }, +// ['('] = { .level = LVL_POSTFIX, .prefixfn = grouping, .infixfn = func_call }, +// ['.'] = { .level = LVL_POSTFIX, .prefixfn = NULL, .infixfn = dot_call }, +//}; + +/* Parsing Routines + *****************************************************************************/ +//static Tok* peek(Parser* p) { +// if (T_NONE == p->tok.type) +// lex(p); +// return &(p->tok); +//} +// +//static void error(Parser* parser, const char* fmt, ...) { +// Tok* tok = peek(parser); +// va_list args; +// va_start(args, fmt); +// lexprintpos(parser, stderr, tok); +// fprintf(stderr, " error: "); +// vfprintf(stderr, fmt, args); +// fprintf(stderr, "\n"); +// va_end(args); +// exit(1); +//} +// +//static bool matches(Parser* p, TokType type) { +// return (peek(p)->type == type); +//} +// +//static bool accept(Parser* p, TokType type) { +// if (matches(p, type)) { +// p->tok.type = T_NONE; +// return true; +// } +// return false; +//} +// +//static void expect(Parser* p, TokType type) { +// if (!accept(p, type)) +// error(p, "Unexpected token"); +//} +// +//static Tok* expect_val(Parser* p, TokType type) { +// static Tok token = {0}; +// /* perform the match */ +// if (matches(p, type)) { +// token = *(peek(p)); +// p->tok.type = T_NONE; +// } else { +// error(p, "Unexpected token"); +// } +// return &token; +//} +// +//static int consume(Parser* p) { +// int type = peek(p)->type; +// if (!accept(p, type)) +// error(p, "Unexpected token"); +// return type; +//} + +/* Grammar Definition + *****************************************************************************/ +void toplevel(Parser* p) { + parse_enter(); + (void)p; + parse_exit(); +} diff --git a/cerise/pkg.c b/cerise/pkg.c new file mode 100644 index 0000000..0bed7ff --- /dev/null +++ b/cerise/pkg.c @@ -0,0 +1,23 @@ +#include + +void pkg_add_require(Package* p, char* req) +{ + Require* rq = malloc(sizeof(Require)); + rq->path = strdup(req); + rq->next = p->requires; + p->requires = rq; +} + +void pkg_add_provide(Package* p, char* exp) +{ + Provide* pr = malloc(sizeof(Provide)); + pr->name = strdup(exp); + pr->next = p->provides; + p->provides = pr; +} + +void pkg_add_definition(Package* p, AST* ast) +{ + (void)p, (void)ast; +} + diff --git a/cerise/pprint.c b/cerise/pprint.c new file mode 100644 index 0000000..8a9705c --- /dev/null +++ b/cerise/pprint.c @@ -0,0 +1,217 @@ +#include + +static void indent(FILE* file, int depth) { + fprintf(file, "\n"); + if (depth) fprintf(file, "%*c", depth * 2, ' '); +} + +static const char* token_type_to_string(int type) { + #define TOK(name) case (name): return #name + switch(type) { + TOK(T_NONE); TOK(T_ERROR); TOK(T_END_FILE); + TOK(T_REQUIRES); TOK(T_PROVIDES); TOK(T_LET); TOK(T_VAR); + TOK(T_FUN); TOK(T_TYPE); TOK(T_STRUCT); TOK(T_UNION); + TOK(T_RETURN); TOK(T_IF); TOK(T_ELSE); TOK(T_ID); + TOK(T_CHAR); TOK(T_INT); TOK(T_FLOAT); TOK(T_BOOL); + TOK(T_STRING); + case '{': return "T_LBRACE"; + case '}': return "T_RBRACE"; + case '[': return "T_LBRACK"; + case ']': return "T_RBRACK"; + case '(': return "T_LPAR"; + case ')': return "T_RPAR"; + case ',': return "T_COMMA"; + case ':': return "T_COLON"; + case '&': return "T_AMP"; + case '\'': return "T_SQUOTE"; + case '"': return "T_DQUOTE"; + default: return "???"; + } + #undef TOK +} + +static void print_char(FILE* file, char ch) { + int i; + static const char* lookup_table[5] = { + " \0space", + "\n\0newline", + "\r\0return", + "\t\0tab", + "\v\0vtab" + }; + for(i = 0; i < 5; i++) { + if (ch == lookup_table[i][0]) { + fprintf(file, "\\%s", &(lookup_table[i][2])); + break; + } + } + if (i == 5) fprintf(file, "\\%c", ch); +} + +static void pprint_token_type(FILE* file, Tok* token) { + if (token->type > 256) + fprintf(file, "%s", token_type_to_string(token->type)); + else + fprintf(file, "%c", token->type); +} + +static void pprint_token_value(FILE* file, Tok* token) { + #define TOK(name) case (name): fprintf(file, "%s", #name); break + switch(token->type) { + /* value tokens */ + case T_STRING: fprintf(file, "\"%s\"", token->text); break; + case T_ID: fprintf(file, "%s", token->text); break; + case T_CHAR: print_char(file, token->value.integer); break; + case T_INT: fprintf(file, "%lld", token->value.integer); break; + case T_FLOAT: fprintf(file, "%f", token->value.floating); break; + case T_BOOL: fprintf(file, "%s", (token->value.integer)?"true":"false"); break; + + /* keyword tokens */ + TOK(T_NONE); TOK(T_ERROR); TOK(T_END_FILE); + TOK(T_REQUIRES); TOK(T_PROVIDES); TOK(T_LET); TOK(T_VAR); + TOK(T_FUN); TOK(T_TYPE); TOK(T_STRUCT); TOK(T_UNION); TOK(T_RETURN); + TOK(T_IF); TOK(T_ELSE); + + /* evertything else */ + default: + fprintf(file, "???"); + break; + } + #undef TOK +} + +void pprint_token(FILE* file, Tok* token, bool print_loc) +{ + if (print_loc) { + fprintf(file, "%zu:", token->offset); + } + pprint_token_type(file, token); + if (token->type > 256) { + fprintf(file, ":"); + pprint_token_value(file, token); + } + fprintf(file, "\n"); +} + +/*****************************************************************************/ + +static const char* tree_type_to_string(ASTType type) { + switch(type) { + case AST_STRING: return "T_STRING"; + case AST_SYMBOL: return "T_SYMBOL"; + case AST_IDENT: return "T_ID"; + case AST_CHAR: return "T_CHAR"; + case AST_INT: return "T_INT"; + case AST_FLOAT: return "T_FLOAT"; + case AST_BOOL: return "T_BOOL"; + default: return "???"; + } +} + +static void pprint_literal(FILE* file, AST* tree, int depth) +{ + (void)depth; + fprintf(file, "%s:", tree_type_to_string(tree->nodetype)); + switch(tree->nodetype) { + case AST_STRING: fprintf(file, "\"%s\"", string_value(tree)); break; + case AST_IDENT: fprintf(file, "%s", ident_value(tree)); break; + case AST_CHAR: fprintf(file, "%c", char_value(tree)); break; + case AST_INT: fprintf(file, "%ld", integer_value(tree)); break; + case AST_FLOAT: fprintf(file, "%lf", float_value(tree)); break; + case AST_BOOL: + fprintf(file, "%s", bool_value(tree) ? "true" : "false"); + break; + default: fprintf(file, "???"); + } +} + +static char* getvartype(AST* tree) { + if (var_flagset(tree, SF_CONSTANT)) + return "let"; + else if (var_flagset(tree, SF_TYPEDEF)) + return "typedef"; + else + return "var"; +} + +void pprint_fargs(FILE* file, AST* tree) { + size_t nargs = 0; + AST** args = explist_get(tree, &nargs); + fprintf(file, "("); + for (size_t i = 0; i < nargs; i++) { + fprintf(file, "("); + fprintf(file, "%s : type", var_name(args[i])); + fprintf(file, ") "); + } + fprintf(file, ")"); +} + +void pprint_block(FILE* file, AST* tree, int depth) { + if (!tree) return; + size_t nexprs = 0; + AST** exprs = explist_get(tree, &nexprs); + for (size_t i = 0; i < nexprs; i++) { + indent(file, depth); + pprint_tree(file, exprs[i], depth); + } +} + +void pprint_branch(FILE* file, AST* tree, int depth) { + indent(file, depth); + pprint_tree(file, tree, depth); +} + +void pprint_ifexpr(FILE* file, AST* tree, int depth) { + fprintf(file, "(if "); + pprint_tree(file, if_cond(tree), depth); + pprint_branch(file, if_then(tree), depth+1); + pprint_branch(file, if_else(tree), depth+1); + fprintf(file, ")"); +} + +void pprint_apply(FILE* file, AST* tree, int depth) { + fprintf(file, "(apply "); + pprint_tree(file, apply_func(tree), depth); + size_t nexprs = 0; + AST** exprs = explist_get(apply_args(tree), &nexprs); + for (size_t i = 0; i < nexprs; i++) { + indent(file, depth+1); + pprint_tree(file, exprs[i], depth+1); + } + fprintf(file, ")"); +} + +void pprint_tree(FILE* file, AST* tree, int depth) { + if (tree == NULL) return; + switch (tree->nodetype) { + case AST_VAR: + fprintf(file, "(%s %s ", getvartype(tree), var_name(tree)); + pprint_tree(file, var_value(tree), depth); + fprintf(file, ")"); + break; + + case AST_FUNC: + pprint_fargs(file, func_args(tree)); + pprint_block(file, func_body(tree), depth+1); + break; + + case AST_EXPLIST: + fprintf(file, "(block"); + pprint_block(file, tree, depth+1); + fprintf(file, ")"); + break; + + case AST_IF: + pprint_ifexpr(file, tree, depth); + break; + + case AST_APPLY: + pprint_apply(file, tree, depth); + break; + + default: + pprint_literal(file, tree, depth); + break; + } +} + diff --git a/cerise/sclpl b/cerise/sclpl new file mode 100755 index 0000000..6fd7fbd Binary files /dev/null and b/cerise/sclpl differ diff --git a/cerise/sclpl.h b/cerise/sclpl.h new file mode 100644 index 0000000..2a0060b --- /dev/null +++ b/cerise/sclpl.h @@ -0,0 +1,313 @@ +#define _XOPEN_SOURCE 700 +#include +#include +#include +#include +#include +#include +#include +#include +#include + +void fatal(char* estr); +void* emalloc(size_t size); + +/* Token Types + *****************************************************************************/ +typedef enum { + T_NONE = 0, + T_STRING = 256, T_ID, T_INT, T_BOOL, T_CHAR, T_FLOAT, + T_REQUIRES, T_PROVIDES, T_LET, T_VAR, T_FUN, T_TYPE, T_STRUCT, + T_UNION, T_RETURN, T_IF, T_ELSE, + T_COUNT, + T_ERROR = -2, + T_END_FILE = -1 +} TokType; + +typedef struct { + const char* file; + long offset; + TokType type; + char* text; + union { + long long integer; + double floating; + } value; +} Tok; + +/* Datatype Types + *****************************************************************************/ +typedef enum { + VOID, INT, UINT, FLOAT, ARRAY, REF, PTR, FUNC +} Kind; + +typedef struct Type { + Kind kind; + union { + struct Type* type; + size_t bits; + struct { + struct Type* type; + size_t count; + } array; + } value; +} Type; + +Type* VoidType(void); +Type* IntType(size_t nbits); +Type* UIntType(size_t nbits); +Type* FloatType(size_t nbits); +Type* ArrayOf(Type* type, size_t count); +Type* RefTo(Type* type); +Type* PtrTo(Type* type); +bool types_equal(Type* type1, Type* type2); + +/* Symbol Table + *****************************************************************************/ +typedef enum { + SF_TYPEDEF = (1 << 0), + SF_CONSTANT = (1 << 1), + SF_ARGUMENT = (1 << 2), +} SymFlags; + +typedef struct Sym { + struct Sym* next; + bool is_typedef; + int flags; + char* name; + Type* type; +} Sym; + +typedef struct { + Sym* syms; +} SymTable; + +void sym_add(SymTable* syms, int flags, char* name, Type* type); +Sym* sym_get(SymTable* syms, char* name); + +/* AST Types + *****************************************************************************/ +typedef enum { + AST_VAR, AST_FUNC, AST_EXPLIST, AST_IF, AST_APPLY, + AST_STRING, AST_SYMBOL, AST_CHAR, AST_INT, + AST_FLOAT, AST_BOOL, AST_IDENT, AST_OPER +} ASTType; + +typedef struct AST { + ASTType nodetype; + Type* datatype; + union { + struct AST* nodes[3]; + struct { + int oper; + struct AST* left; + struct AST* right; + } op; + /* Definition Node */ + struct { + char* name; + int flags; + struct AST* value; + } var; + /* Expression Block Node */ + struct { + size_t nexprs; + struct AST** exprs; + } explist; + /* String, Symbol, Identifier */ + char* text; + /* Integer */ + intptr_t integer; + /* Float */ + double floating; + } value; +} AST; + +/* String */ +AST* String(char* val); +char* string_value(AST* val); + +/* Character */ +AST* Char(int val); +uint32_t char_value(AST* val); + +/* Integer */ +AST* Integer(int val); +intptr_t integer_value(AST* val); + +/* Float */ +AST* Float(double val); +double float_value(AST* val); + +/* Bool */ +AST* Bool(bool val); +bool bool_value(AST* val); + +/* Ident */ +AST* Ident(char* val); +char* ident_value(AST* val); + +/* Definition */ +AST* Var(char* name, AST* value, AST* type, int flags); +char* var_name(AST* var); +AST* var_value(AST* var); +bool var_flagset(AST* var, int mask); + +AST* Func(AST* args, AST* body, AST* type); +AST* func_args(AST* func); +AST* func_body(AST* func); + +AST* ExpList(void); +AST** explist_get(AST* explist, size_t* nexprs); +void explist_append(AST* explist, AST* expr); +void explist_prepend(AST* explist, AST* expr); + +AST* If(AST* cond, AST* b1, AST* b2); +AST* if_cond(AST* ifexp); +AST* if_then(AST* ifexp); +AST* if_else(AST* ifexp); + +AST* Apply(AST* func, AST* args); +AST* apply_func(AST* apply); +AST* apply_args(AST* apply); + +AST* OpCall(int oper, AST* left, AST* right); + + +/* Package Definition + *****************************************************************************/ +typedef struct Require { + struct Require* next; + char* path; + char* alias; +} Require; + +typedef struct Provide { + struct Provide* next; + char* name; +} Provide; + +typedef struct Definition { + struct Provide* next; + AST* ast; +} Definition; + +typedef struct { + char* name; + SymTable* syms; + Require* requires; + Provide* provides; + Definition* definitions; +} Package; + +void pkg_add_require(Package* p, char* req); +void pkg_add_provide(Package* p, char* exp); +void pkg_add_definition(Package* p, AST* ast); + +/* Pretty Printing + *****************************************************************************/ +void pprint_token(FILE* file, Tok* token, bool print_loc); +void pprint_tree(FILE* file, AST* tree, int depth); + +/* Lexer and Parser Types + *****************************************************************************/ +typedef struct LexFile { + struct LexFile* next; + char* path; + char* fbeg; + char* fpos; +} LexFile; + +typedef struct { + LexFile* done; + LexFile* file; + Tok tok; + SymTable syms; + Package pkg; +} Parser; + +void lexfile(Parser* ctx, char* path); +void lex(Parser* ctx); +void lexprintpos(Parser* p, FILE* file, Tok* tok); +void gettoken(Parser* ctx); +void toplevel(Parser* p); +void codegen_init(Parser* p); + +/* Option Parsing + *****************************************************************************/ + +/* This variable contains the value of argv[0] so that it can be referenced + * again once the option parsing is done. This variable must be defined by the + * program. + * + * NOTE: Ensure that you define this variable with external linkage (i.e. not + * static) + */ +extern char* ARGV0; + +/* This is a helper function used by the macros in this file to parse the next + * option from the command line. + */ +static inline char* __getopt(int* p_argc, char*** p_argv) { + if (!(*p_argv)[0][1] && !(*p_argv)[1]) { + return (char*)0; + } else if ((*p_argv)[0][1]) { + return &(*p_argv)[0][1]; + } else { + *p_argv = *p_argv + 1; + *p_argc = *p_argc - 1; + return (*p_argv)[0]; + } +} + +/* This macro is almost identical to the ARGBEGIN macro from suckless.org. If + * it ain't broke, don't fix it. */ +#define OPTBEGIN \ + for ( \ + ARGV0 = *argv, argc--, argv++; \ + argv[0] && argv[0][1] && argv[0][0] == '-'; \ + argc--, argv++ \ + ) { \ + int brk_; char argc_ , **argv_, *optarg_; \ + if (argv[0][1] == '-' && !argv[0][2]) { \ + argv++, argc--; break; \ + } \ + for (brk_=0, argv[0]++, argv_=argv; argv[0][0] && !brk_; argv[0]++) { \ + if (argv_ != argv) break; \ + argc_ = argv[0][0]; \ + switch (argc_) + +/* Terminate the option parsing. */ +#define OPTEND }} + +/* Get the current option chracter */ +#define OPTC() (argc_) + +/* Get an argument from the command line and return it as a string. If no + * argument is available, this macro returns NULL */ +#define OPTARG() \ + (optarg_ = __getopt(&argc,&argv), brk_ = (optarg_!=0), optarg_) + +/* Get an argument from the command line and return it as a string. If no + * argument is available, this macro executes the provided code. If that code + * returns, then abort is called. */ +#define EOPTARG(code) \ + (optarg_ = __getopt(&argc,&argv), \ + (!optarg_ ? ((code), abort(), (char*)0) : (brk_ = 1, optarg_))) + +/* Helper macro to recognize number options */ +#define OPTNUM \ + case '0': \ + case '1': \ + case '2': \ + case '3': \ + case '4': \ + case '5': \ + case '6': \ + case '7': \ + case '8': \ + case '9' + +/* Helper macro to recognize "long" options ala GNU style. */ +#define OPTLONG \ + case '-' diff --git a/cerise/syms.c b/cerise/syms.c new file mode 100644 index 0000000..6a8f0e2 --- /dev/null +++ b/cerise/syms.c @@ -0,0 +1,22 @@ +#include + +static Sym* mksym(int flags, char* name, Type* type, Sym* next) { + Sym* sym = emalloc(sizeof(Sym)); + sym->flags = flags; + sym->name = name; + sym->type = type; + sym->next = next; + return sym; +} + +void sym_add(SymTable* syms, int flags, char* name, Type* type) { + syms->syms = mksym(flags, name, type, syms->syms); +} + +Sym* sym_get(SymTable* syms, char* name) { + Sym* sym = syms->syms; + for (; sym; sym = sym->next) + if (!strcmp(sym->name, name)) + return sym; + return NULL; +} diff --git a/cerise/types.c b/cerise/types.c new file mode 100644 index 0000000..cfaae4c --- /dev/null +++ b/cerise/types.c @@ -0,0 +1,59 @@ +#include + +static Type* mktype(Kind kind) { + Type* type = emalloc(sizeof(Type)); + memset(type, 0, sizeof(Type)); + type->kind = kind; + return type; +} + +Type* VoidType(void) { + return mktype(VOID); +} + +Type* IntType(size_t nbits) { + Type* type = mktype(INT); + type->value.bits = nbits; + return type; +} + +Type* UIntType(size_t nbits) { + Type* type = mktype(UINT); + type->value.bits = nbits; + return type; +} + +Type* FloatType(size_t nbits) { + Type* type = mktype(FLOAT); + type->value.bits = nbits; + return type; +} + +Type* ArrayOf(Type* elemtype, size_t count) { + Type* type = mktype(ARRAY); + type->value.array.type = elemtype; + type->value.array.count = count; + return type; +} + +Type* RefTo(Type* type) { + (void)type; + return NULL; +} + +Type* PtrTo(Type* type) { + (void)type; + return NULL; +} + +bool types_equal(Type* type1, Type* type2) { + if (type1->kind != type2->kind) return false; + switch (type1->kind) { + case ARRAY: + return (types_equal(type1->value.array.type, type2->value.array.type) && + (type1->value.array.count == type2->value.array.count)); + default: + return true; + } +} + diff --git a/cerise/value b/cerise/value new file mode 100644 index 0000000..e69de29