From b2ac45d0f57005294b67d1302d448ea6ced08642 Mon Sep 17 00:00:00 2001 From: "Michael D. Lowis" Date: Fri, 16 Apr 2021 14:22:48 -0400 Subject: [PATCH] implemented basic module skeleton parsing --- cerise/build.sh | 2 +- cerise/cerise.h | 2 +- cerise/lex.c | 90 +++++++++++++------ cerise/main.c | 21 ++--- cerise/parser.c | 185 +++++++++++++++++++++++++++++----------- cerise/tests/tokens.txt | 1 + 6 files changed, 214 insertions(+), 87 deletions(-) diff --git a/cerise/build.sh b/cerise/build.sh index 10a89f4..de92c50 100755 --- a/cerise/build.sh +++ b/cerise/build.sh @@ -1,5 +1,5 @@ #!/bin/sh ctags -R & -cc -g -D CERISE_TESTS -Wall -Wextra -Werror --std=c99 -o cerisec-test *.c \ +cc -g -D CERISE_TESTS -Wall -Wextra --std=c99 -o cerisec-test *.c \ && ./cerisec-test \ && cc -g -Wall -Wextra -Werror --std=c99 -o cerisec *.c diff --git a/cerise/cerise.h b/cerise/cerise.h index 62fc8fd..714213d 100644 --- a/cerise/cerise.h +++ b/cerise/cerise.h @@ -94,7 +94,7 @@ void lexfile(Parser* ctx, char* path); void lex(Parser* ctx); void lexprintpos(Parser* p, FILE* file, Tok* tok); void gettoken(Parser* ctx); -void toplevel(Parser* p); +void module(Parser* p); /* Option Parsing *****************************************************************************/ diff --git a/cerise/lex.c b/cerise/lex.c index 019ac72..5de2cc8 100644 --- a/cerise/lex.c +++ b/cerise/lex.c @@ -29,7 +29,7 @@ static const char Chars[256] = { /* strings */ ['"'] = 3, - /* double character ops */ + /* potential double character ops */ ['='] = 4, ['.'] = 4, /* potential multi-character ops */ @@ -97,26 +97,36 @@ KeywordDef Keywords[] = { { "while", WHILE }, }; -static int keywcmp(const void* a, const void* b) { +static int keywcmp(const void* a, const void* b) +{ return strcmp(((KeywordDef*)a)->keyword, ((KeywordDef*)b)->keyword); } -static inline char* file_load(char* path) { +static inline char* file_load(char* path) +{ int fd = -1, nread = 0, length = 0; struct stat sb = {0}; char* contents = NULL; - if (((fd = open(path, O_RDONLY, 0)) >= 0) && (fstat(fd, &sb) >= 0) && (sb.st_size > 0)) { + if (((fd = open(path, O_RDONLY, 0)) >= 0) && (fstat(fd, &sb) >= 0) && (sb.st_size > 0)) + { contents = calloc(sb.st_size + 1u, 1u); while (sb.st_size && (nread = read(fd, contents+length, sb.st_size)) > 0) + { length += nread, sb.st_size -= nread; + } + } + if (fd > 0) + { + close(fd); } - if (fd > 0) close(fd); return contents; } -static inline void convert_value(Tok* tok) { +static inline void convert_value(Tok* tok) +{ switch (tok->type) { - case STRING: { + case STRING: + { size_t len = strlen(tok->text+1); char* strtext = malloc(len); strncpy(strtext, tok->text+1, len); @@ -125,34 +135,42 @@ static inline void convert_value(Tok* tok) { break; } - case INT: { + case INT: + { tok->value.integer = strtol(tok->text, NULL, 0); break; } - case IDENT: { + case IDENT: + { KeywordDef key = { .keyword = tok->text }; KeywordDef* match = bsearch( &key, Keywords, NUM_KEYWORDS, sizeof(KeywordDef), keywcmp); - if (match) { + if (match) + { tok->type = match->type; if (tok->type != IDENT) + { convert_value(tok); /* recurse to ensure correct conversion */ + } } break; } - case BOOL: { + case BOOL: + { tok->value.integer = (tok->text[0] == 't'); break; } default: + /* nothing to do here */ break; } } -static inline void readtok(Parser* ctx) { +static inline void readtok(Parser* ctx) +{ Tok* tok = &(ctx->tok); char *beg = ctx->file->fpos; char *curr = ctx->file->fpos; @@ -228,7 +246,8 @@ static inline void readtok(Parser* ctx) { exit(1); } - if (tok->type) { + if (tok->type) + { size_t sz = (curr - beg); tok->text = malloc(sz+1); tok->text[sz] = '\0'; @@ -239,7 +258,8 @@ static inline void readtok(Parser* ctx) { ctx->file->fpos = curr; } -void lexfile(Parser* ctx, char* path) { +void lexfile(Parser* ctx, char* path) +{ LexFile* file = calloc(sizeof(LexFile), 1u); file->path = strdup(path); file->fbeg = file->fpos = file_load(path); @@ -247,49 +267,67 @@ void lexfile(Parser* ctx, char* path) { ctx->file = file; } -void lex(Parser* ctx) { +void lex(Parser* ctx) +{ ctx->tok.file = ctx->file->path; ctx->tok.type = NONE; - while (ctx->tok.type == NONE) { - if (!ctx->file) { + while (ctx->tok.type == NONE) + { + if (!ctx->file) + { /* no more files left to process */ ctx->tok.type = END_FILE; ctx->tok.text = ""; return; - } else if (!(ctx->file->fpos) || !*(ctx->file->fpos)) { + } + else if (!(ctx->file->fpos) || !*(ctx->file->fpos)) + { /* grab the next file to process */ LexFile* f = ctx->file; ctx->file = f->next; f->next = ctx->done; ctx->done = f; - } else { + } + else + { /* parse out a token */ readtok(ctx); } } } -static LexFile* get_file(Parser* p, char const* path) { +static LexFile* get_file(Parser* p, char const* path) +{ LexFile* lf = p->file; while (lf && strcmp(lf->path, path)) + { lf = lf->next; - if (!lf) { + } + if (!lf) + { lf = p->done; while (lf && strcmp(lf->path, path)) + { lf = lf->next; + } } return lf; } -void lexprintpos(Parser* p, FILE* file, Tok* tok) { +void lexprintpos(Parser* p, FILE* file, Tok* tok) +{ size_t line = 1, col = 1; char* data = get_file(p, tok->file)->fbeg; char* end = data + tok->offset; - for (; *data && data < end; data++) { - if (*data == '\n') { + for (; *data && data < end; data++) + { + if (*data == '\n') + { line++; col = 1; - } else { + } + else + { col++; } } @@ -329,6 +367,7 @@ TEST_SUITE(Lexer) { "]", ']' }, { "{", '{' }, { "}", '}' }, + { "!", '!' }, { "and", AND }, { "array", ARRAY }, { "begin", BEGIN }, @@ -369,7 +408,6 @@ TEST_SUITE(Lexer) { "", END_FILE }, }; - TEST(Lexer recognizes all required tokens) { Parser ctx = {0}; diff --git a/cerise/main.c b/cerise/main.c index 34766a5..cea9b8f 100644 --- a/cerise/main.c +++ b/cerise/main.c @@ -7,13 +7,13 @@ char* Artifact = "bin"; /* Driver Modes *****************************************************************************/ -static int emit_binary(Parser* ctx, int argc, char **argv) +static int emit_binary(Parser* ctx, int argc, char **argv) { (void)ctx, (void)argc, (void)argv; return 0; } -static int emit_library(Parser* ctx, int argc, char **argv) +static int emit_library(Parser* ctx, int argc, char **argv) { (void)ctx, (void)argc, (void)argv; return 0; @@ -21,7 +21,7 @@ static int emit_library(Parser* ctx, int argc, char **argv) /* Main Routine and Usage *****************************************************************************/ -void usage(void) +void usage(void) { fprintf(stderr, "%s\n", "Usage: sclpl [options...] [-A artifact] [file...]\n" @@ -31,7 +31,7 @@ void usage(void) exit(1); } -int main(int argc, char **argv) +int main(int argc, char **argv) { /* Option parsing */ OPTBEGIN { @@ -43,15 +43,15 @@ int main(int argc, char **argv) for (; argc; argc--,argv++) lexfile(&ctx, *argv); /* Execute the main compiler process */ - if (0 == strcmp("bin", Artifact)) + if (0 == strcmp("bin", Artifact)) { return emit_binary(&ctx, argc, argv); - } - else if (0 == strcmp("lib", Artifact)) + } + else if (0 == strcmp("lib", Artifact)) { return emit_library(&ctx, argc, argv); - } - else + } + else { fprintf(stderr, "Unknown artifact type: '%s'\n\n", Artifact); usage(); @@ -63,10 +63,11 @@ int main(int argc, char **argv) #define INCLUDE_DEFS #include "atf.h" -int main(int argc, char **argv) +int main(int argc, char **argv) { atf_init(argc, argv); RUN_EXTERN_TEST_SUITE(Lexer); + RUN_EXTERN_TEST_SUITE(Grammar); return atf_print_results(); } diff --git a/cerise/parser.c b/cerise/parser.c index 826c710..39343d1 100644 --- a/cerise/parser.c +++ b/cerise/parser.c @@ -54,53 +54,64 @@ static int Indent = 0; /* Parsing Routines *****************************************************************************/ -//static Tok* peek(Parser* p) { -// if (T_NONE == p->tok.type) -// lex(p); -// return &(p->tok); -//} -// -//static void error(Parser* parser, const char* fmt, ...) { -// Tok* tok = peek(parser); -// va_list args; -// va_start(args, fmt); -// lexprintpos(parser, stderr, tok); -// fprintf(stderr, " error: "); -// vfprintf(stderr, fmt, args); -// fprintf(stderr, "\n"); -// va_end(args); -// exit(1); -//} -// -//static bool matches(Parser* p, TokType type) { -// return (peek(p)->type == type); -//} -// -//static bool accept(Parser* p, TokType type) { -// if (matches(p, type)) { -// p->tok.type = T_NONE; -// return true; -// } -// return false; -//} -// -//static void expect(Parser* p, TokType type) { -// if (!accept(p, type)) -// error(p, "Unexpected token"); -//} -// -//static Tok* expect_val(Parser* p, TokType type) { -// static Tok token = {0}; -// /* perform the match */ -// if (matches(p, type)) { -// token = *(peek(p)); -// p->tok.type = T_NONE; -// } else { -// error(p, "Unexpected token"); -// } -// return &token; -//} -// +static Tok* peek(Parser* p) +{ + if (NONE == p->tok.type) + lex(p); + return &(p->tok); +} + +static void error(Parser* parser, const char* fmt, ...) +{ + Tok* tok = peek(parser); + va_list args; + va_start(args, fmt); + lexprintpos(parser, stderr, tok); + fprintf(stderr, " error: "); + vfprintf(stderr, fmt, args); + fprintf(stderr, "\n"); + va_end(args); + exit(1); +} + +static bool matches(Parser* p, TokType type) +{ + return (peek(p)->type == type); +} + +static bool accept(Parser* p, TokType type) +{ + if (matches(p, type)) { + p->tok.type = NONE; + return true; + } + return false; +} + +static void expect(Parser* p, TokType type) +{ + if (!accept(p, type)) + error(p, "Unexpected token"); +} + +static Tok* expect_val(Parser* p, TokType type) +{ + static Tok token = {0}; + /* perform the match */ + if (matches(p, type)) { + token = *(peek(p)); + p->tok.type = NONE; + } else { + error(p, "Unexpected token"); + } + return &token; +} + +static char* expect_text(Parser* p, TokType type) +{ + return strdup(expect_val(p, type)->text); +} + //static int consume(Parser* p) { // int type = peek(p)->type; // if (!accept(p, type)) @@ -110,8 +121,84 @@ static int Indent = 0; /* Grammar Definition *****************************************************************************/ -void toplevel(Parser* p) { + +void import_list(Parser* p) +{ parse_enter(); - (void)p; + expect(p, IMPORT); + while (1) + { + expect(p, IDENT); + if (accept(p, '=')) + { + expect(p, IDENT); + } + if (matches(p, ';')) + { + break; + } + expect(p, ','); + } + expect(p, ';'); parse_exit(); } + +void module(Parser* p) +{ + parse_enter(); + expect(p, MODULE); + char* sname = expect_text(p, IDENT); + /* TODO: Check that it matches filename here */ + expect(p, ';'); + if (matches(p, IMPORT)) + { + import_list(p); + } +// declaration_seq(p); + if (accept(p, BEGIN)) + { +// statement_seq(p); + } + expect(p, END); + char* ename = expect_text(p, IDENT); + if (strcmp(sname, ename)) + { + error(p, "Expected module name '%s', recieved '%s' instead", sname, ename); + } + expect(p, ';'); + parse_exit(); +} + +/* Grammar Unit Tests + *****************************************************************************/ +#ifdef CERISE_TESTS +#include "atf.h" + +Parser Ctx = {0}; + +void parse_module(char* fname, char* string) +{ + memset(&Ctx, 0, sizeof(Ctx)); + LexFile* file = calloc(sizeof(LexFile), 1u); + file->path = strdup(fname); + file->fbeg = file->fpos = strdup(string); + file->next = Ctx.file; + Ctx.file = file; + module(&Ctx); +} + +TEST_SUITE(Grammar) +{ + TEST(Should parse basic module syntax) + { + parse_module("Empty", + "module Empty; end Empty;"); + parse_module("ModA", + "module ModA; import ModB; end ModA;"); + parse_module("ModA", + "module ModA; import ModB, ModC; end ModA;"); + parse_module("ModA", + "module ModA; import B = ModB, C = ModC; end ModA;"); + } +} +#endif diff --git a/cerise/tests/tokens.txt b/cerise/tests/tokens.txt index 4a967ba..d8a921a 100644 --- a/cerise/tests/tokens.txt +++ b/cerise/tests/tokens.txt @@ -22,6 +22,7 @@ ] { } +! and array begin -- 2.49.0