From b2ac45d0f57005294b67d1302d448ea6ced08642 Mon Sep 17 00:00:00 2001
From: "Michael D. Lowis" <mike.lowis@gentex.com>
Date: Fri, 16 Apr 2021 14:22:48 -0400
Subject: [PATCH] implemented basic module skeleton parsing

---
 cerise/build.sh         |   2 +-
 cerise/cerise.h         |   2 +-
 cerise/lex.c            |  90 +++++++++++++------
 cerise/main.c           |  21 ++---
 cerise/parser.c         | 185 +++++++++++++++++++++++++++++-----------
 cerise/tests/tokens.txt |   1 +
 6 files changed, 214 insertions(+), 87 deletions(-)

diff --git a/cerise/build.sh b/cerise/build.sh
index 10a89f4..de92c50 100755
--- a/cerise/build.sh
+++ b/cerise/build.sh
@@ -1,5 +1,5 @@
 #!/bin/sh
 ctags -R &
-cc -g -D CERISE_TESTS -Wall -Wextra -Werror --std=c99 -o cerisec-test *.c \
+cc -g -D CERISE_TESTS -Wall -Wextra --std=c99 -o cerisec-test *.c \
   && ./cerisec-test \
   && cc -g -Wall -Wextra -Werror --std=c99 -o cerisec *.c
diff --git a/cerise/cerise.h b/cerise/cerise.h
index 62fc8fd..714213d 100644
--- a/cerise/cerise.h
+++ b/cerise/cerise.h
@@ -94,7 +94,7 @@ void lexfile(Parser* ctx, char* path);
 void lex(Parser* ctx);
 void lexprintpos(Parser* p, FILE* file, Tok* tok);
 void gettoken(Parser* ctx);
-void toplevel(Parser* p);
+void module(Parser* p);
 
 /* Option Parsing
  *****************************************************************************/
diff --git a/cerise/lex.c b/cerise/lex.c
index 019ac72..5de2cc8 100644
--- a/cerise/lex.c
+++ b/cerise/lex.c
@@ -29,7 +29,7 @@ static const char Chars[256] = {
     /* strings */
     ['"'] = 3,
 
-    /* double character ops */
+    /* potential double character ops */
     ['='] = 4, ['.'] = 4,
 
     /* potential multi-character ops */
@@ -97,26 +97,36 @@ KeywordDef Keywords[] = {
     { "while",     WHILE     },
 };
 
-static int keywcmp(const void* a, const void* b) {
+static int keywcmp(const void* a, const void* b)
+{
     return strcmp(((KeywordDef*)a)->keyword, ((KeywordDef*)b)->keyword);
 }
 
-static inline char* file_load(char* path) {
+static inline char* file_load(char* path)
+{
     int fd = -1, nread = 0, length = 0;
     struct stat sb = {0};
     char* contents = NULL;
-    if (((fd = open(path, O_RDONLY, 0)) >= 0) && (fstat(fd, &sb) >= 0) && (sb.st_size > 0)) {
+    if (((fd = open(path, O_RDONLY, 0)) >= 0) && (fstat(fd, &sb) >= 0) && (sb.st_size > 0))
+    {
         contents = calloc(sb.st_size + 1u, 1u);
         while (sb.st_size && (nread = read(fd, contents+length, sb.st_size)) > 0)
+        {
             length += nread, sb.st_size -= nread;
+        }
+    }
+    if (fd > 0)
+    {
+        close(fd);
     }
-    if (fd > 0) close(fd);
     return contents;
 }
 
-static inline void convert_value(Tok* tok) {
+static inline void convert_value(Tok* tok)
+{
     switch (tok->type) {
-        case STRING: {
+        case STRING:
+        {
             size_t len = strlen(tok->text+1);
             char* strtext = malloc(len);
             strncpy(strtext, tok->text+1, len);
@@ -125,34 +135,42 @@ static inline void convert_value(Tok* tok) {
             break;
         }
 
-        case INT: {
+        case INT:
+        {
             tok->value.integer = strtol(tok->text, NULL, 0);
             break;
         }
 
-        case IDENT: {
+        case IDENT:
+        {
             KeywordDef key = { .keyword = tok->text };
             KeywordDef* match = bsearch(
                 &key, Keywords, NUM_KEYWORDS, sizeof(KeywordDef), keywcmp);
-            if (match) {
+            if (match)
+            {
                 tok->type = match->type;
                 if (tok->type != IDENT)
+                {
                     convert_value(tok); /* recurse to ensure correct conversion */
+                }
             }
             break;
         }
 
-        case BOOL: {
+        case BOOL:
+        {
             tok->value.integer = (tok->text[0] == 't');
             break;
         }
 
         default:
+            /* nothing to do here */
             break;
     }
 }
 
-static inline void readtok(Parser* ctx) {
+static inline void readtok(Parser* ctx)
+{
     Tok* tok = &(ctx->tok);
     char *beg = ctx->file->fpos;
     char *curr = ctx->file->fpos;
@@ -228,7 +246,8 @@ static inline void readtok(Parser* ctx) {
             exit(1);
     }
 
-    if (tok->type) {
+    if (tok->type)
+    {
         size_t sz = (curr - beg);
         tok->text = malloc(sz+1);
         tok->text[sz] = '\0';
@@ -239,7 +258,8 @@ static inline void readtok(Parser* ctx) {
     ctx->file->fpos = curr;
 }
 
-void lexfile(Parser* ctx, char* path) {
+void lexfile(Parser* ctx, char* path)
+{
     LexFile* file = calloc(sizeof(LexFile), 1u);
     file->path = strdup(path);
     file->fbeg = file->fpos = file_load(path);
@@ -247,49 +267,67 @@ void lexfile(Parser* ctx, char* path) {
     ctx->file = file;
 }
 
-void lex(Parser* ctx) {
+void lex(Parser* ctx)
+{
     ctx->tok.file = ctx->file->path;
     ctx->tok.type = NONE;
-    while (ctx->tok.type == NONE) {
-        if (!ctx->file) {
+    while (ctx->tok.type == NONE)
+    {
+        if (!ctx->file)
+        {
             /* no more files left to process */
             ctx->tok.type = END_FILE;
             ctx->tok.text = "";
             return;
-        } else if (!(ctx->file->fpos) || !*(ctx->file->fpos)) {
+        }
+        else if (!(ctx->file->fpos) || !*(ctx->file->fpos))
+        {
             /* grab the next file to process */
             LexFile* f = ctx->file;
             ctx->file = f->next;
             f->next = ctx->done;
             ctx->done = f;
-        } else {
+        }
+        else
+        {
             /* parse out a token */
             readtok(ctx);
         }
     }
 }
 
-static LexFile* get_file(Parser* p, char const* path) {
+static LexFile* get_file(Parser* p, char const* path)
+{
     LexFile* lf = p->file;
     while (lf && strcmp(lf->path, path))
+    {
         lf = lf->next;
-    if (!lf) {
+    }
+    if (!lf)
+    {
         lf = p->done;
         while (lf && strcmp(lf->path, path))
+        {
             lf = lf->next;
+        }
     }
     return lf;
 }
 
-void lexprintpos(Parser* p, FILE* file, Tok* tok) {
+void lexprintpos(Parser* p, FILE* file, Tok* tok)
+{
     size_t line = 1, col = 1;
     char* data = get_file(p, tok->file)->fbeg;
     char* end = data + tok->offset;
-    for (; *data && data < end; data++) {
-        if (*data == '\n') {
+    for (; *data && data < end; data++)
+    {
+        if (*data == '\n')
+        {
             line++;
             col = 1;
-        } else {
+        }
+        else
+        {
             col++;
         }
     }
@@ -329,6 +367,7 @@ TEST_SUITE(Lexer)
         { "]",         ']'       },
         { "{",         '{'       },
         { "}",         '}'       },
+        { "!",         '!'       },
         { "and",       AND       },
         { "array",     ARRAY     },
         { "begin",     BEGIN     },
@@ -369,7 +408,6 @@ TEST_SUITE(Lexer)
         { "",          END_FILE  },
     };
 
-
     TEST(Lexer recognizes all required tokens)
     {
         Parser ctx = {0};
diff --git a/cerise/main.c b/cerise/main.c
index 34766a5..cea9b8f 100644
--- a/cerise/main.c
+++ b/cerise/main.c
@@ -7,13 +7,13 @@ char* Artifact = "bin";
 
 /* Driver Modes
  *****************************************************************************/
-static int emit_binary(Parser* ctx, int argc, char **argv) 
+static int emit_binary(Parser* ctx, int argc, char **argv)
 {
     (void)ctx, (void)argc, (void)argv;
     return 0;
 }
 
-static int emit_library(Parser* ctx, int argc, char **argv) 
+static int emit_library(Parser* ctx, int argc, char **argv)
 {
     (void)ctx, (void)argc, (void)argv;
     return 0;
@@ -21,7 +21,7 @@ static int emit_library(Parser* ctx, int argc, char **argv)
 
 /* Main Routine and Usage
  *****************************************************************************/
-void usage(void) 
+void usage(void)
 {
     fprintf(stderr, "%s\n",
         "Usage: sclpl [options...] [-A artifact] [file...]\n"
@@ -31,7 +31,7 @@ void usage(void)
     exit(1);
 }
 
-int main(int argc, char **argv) 
+int main(int argc, char **argv)
 {
     /* Option parsing */
     OPTBEGIN {
@@ -43,15 +43,15 @@ int main(int argc, char **argv)
     for (; argc; argc--,argv++)
         lexfile(&ctx, *argv);
     /* Execute the main compiler process */
-    if (0 == strcmp("bin", Artifact)) 
+    if (0 == strcmp("bin", Artifact))
     {
         return emit_binary(&ctx, argc, argv);
-    } 
-    else if (0 == strcmp("lib", Artifact)) 
+    }
+    else if (0 == strcmp("lib", Artifact))
     {
         return emit_library(&ctx, argc, argv);
-    } 
-    else 
+    }
+    else
     {
         fprintf(stderr, "Unknown artifact type: '%s'\n\n", Artifact);
         usage();
@@ -63,10 +63,11 @@ int main(int argc, char **argv)
 
 #define INCLUDE_DEFS
 #include "atf.h"
-int main(int argc, char **argv) 
+int main(int argc, char **argv)
 {
     atf_init(argc, argv);
     RUN_EXTERN_TEST_SUITE(Lexer);
+    RUN_EXTERN_TEST_SUITE(Grammar);
     return atf_print_results();
 }
 
diff --git a/cerise/parser.c b/cerise/parser.c
index 826c710..39343d1 100644
--- a/cerise/parser.c
+++ b/cerise/parser.c
@@ -54,53 +54,64 @@ static int Indent = 0;
 
 /* Parsing Routines
  *****************************************************************************/
-//static Tok* peek(Parser* p) {
-//    if (T_NONE == p->tok.type)
-//        lex(p);
-//    return &(p->tok);
-//}
-//
-//static void error(Parser* parser, const char* fmt, ...) {
-//    Tok* tok = peek(parser);
-//    va_list args;
-//    va_start(args, fmt);
-//    lexprintpos(parser, stderr, tok);
-//    fprintf(stderr, " error: ");
-//    vfprintf(stderr, fmt, args);
-//    fprintf(stderr, "\n");
-//    va_end(args);
-//    exit(1);
-//}
-//
-//static bool matches(Parser* p, TokType type) {
-//    return (peek(p)->type == type);
-//}
-//
-//static bool accept(Parser* p, TokType type) {
-//    if (matches(p, type)) {
-//        p->tok.type = T_NONE;
-//        return true;
-//    }
-//    return false;
-//}
-//
-//static void expect(Parser* p, TokType type) {
-//    if (!accept(p, type))
-//        error(p, "Unexpected token");
-//}
-//
-//static Tok* expect_val(Parser* p, TokType type) {
-//    static Tok token = {0};
-//    /* perform the match */
-//    if (matches(p, type)) {
-//        token = *(peek(p));
-//        p->tok.type = T_NONE;
-//    } else {
-//        error(p, "Unexpected token");
-//    }
-//    return &token;
-//}
-//
+static Tok* peek(Parser* p)
+{
+    if (NONE == p->tok.type)
+        lex(p);
+    return &(p->tok);
+}
+
+static void error(Parser* parser, const char* fmt, ...)
+{
+    Tok* tok = peek(parser);
+    va_list args;
+    va_start(args, fmt);
+    lexprintpos(parser, stderr, tok);
+    fprintf(stderr, " error: ");
+    vfprintf(stderr, fmt, args);
+    fprintf(stderr, "\n");
+    va_end(args);
+    exit(1);
+}
+
+static bool matches(Parser* p, TokType type)
+{
+    return (peek(p)->type == type);
+}
+
+static bool accept(Parser* p, TokType type)
+{
+    if (matches(p, type)) {
+        p->tok.type = NONE;
+        return true;
+    }
+    return false;
+}
+
+static void expect(Parser* p, TokType type)
+{
+    if (!accept(p, type))
+        error(p, "Unexpected token");
+}
+
+static Tok* expect_val(Parser* p, TokType type)
+{
+    static Tok token = {0};
+    /* perform the match */
+    if (matches(p, type)) {
+        token = *(peek(p));
+        p->tok.type = NONE;
+    } else {
+        error(p, "Unexpected token");
+    }
+    return &token;
+}
+
+static char* expect_text(Parser* p, TokType type)
+{
+    return strdup(expect_val(p, type)->text);
+}
+
 //static int consume(Parser* p) {
 //    int type = peek(p)->type;
 //    if (!accept(p, type))
@@ -110,8 +121,84 @@ static int Indent = 0;
 
 /* Grammar Definition
  *****************************************************************************/
-void toplevel(Parser* p) {
+
+void import_list(Parser* p)
+{
     parse_enter();
-    (void)p;
+    expect(p, IMPORT);
+    while (1)
+    {
+        expect(p, IDENT);
+        if (accept(p, '='))
+        {
+            expect(p, IDENT);
+        }
+        if (matches(p, ';'))
+        {
+            break;
+        }
+        expect(p, ',');
+    }
+    expect(p, ';');
     parse_exit();
 }
+
+void module(Parser* p)
+{
+    parse_enter();
+    expect(p, MODULE);
+    char* sname = expect_text(p, IDENT);
+        /* TODO: Check that it matches filename here */
+    expect(p, ';');
+    if (matches(p, IMPORT))
+    {
+        import_list(p);
+    }
+//    declaration_seq(p);
+    if (accept(p, BEGIN))
+    {
+//        statement_seq(p);
+    }
+    expect(p, END);
+    char* ename = expect_text(p, IDENT);
+    if (strcmp(sname, ename))
+    {
+        error(p, "Expected module name '%s', recieved '%s' instead", sname, ename);
+    }
+    expect(p, ';');
+    parse_exit();
+}
+
+/* Grammar Unit Tests
+ *****************************************************************************/
+#ifdef CERISE_TESTS
+#include "atf.h"
+
+Parser Ctx = {0};
+
+void parse_module(char* fname, char* string)
+{
+    memset(&Ctx, 0, sizeof(Ctx));
+    LexFile* file = calloc(sizeof(LexFile), 1u);
+    file->path = strdup(fname);
+    file->fbeg = file->fpos = strdup(string);
+    file->next = Ctx.file;
+    Ctx.file = file;
+    module(&Ctx);
+}
+
+TEST_SUITE(Grammar)
+{
+    TEST(Should parse basic module syntax)
+    {
+        parse_module("Empty",
+            "module Empty; end Empty;");
+        parse_module("ModA",
+            "module ModA; import ModB; end ModA;");
+        parse_module("ModA",
+            "module ModA; import ModB, ModC; end ModA;");
+        parse_module("ModA",
+            "module ModA; import B = ModB, C = ModC; end ModA;");
+    }
+}
+#endif
diff --git a/cerise/tests/tokens.txt b/cerise/tests/tokens.txt
index 4a967ba..d8a921a 100644
--- a/cerise/tests/tokens.txt
+++ b/cerise/tests/tokens.txt
@@ -22,6 +22,7 @@
 ]
 {
 }
+!
 and
 array
 begin
-- 
2.49.0