implemented basic module skeleton parsing

author Michael D. Lowis <mike.lowis@gentex.com>

Fri, 16 Apr 2021 18:22:48 +0000 (14:22 -0400)

committer Michael D. Lowis <mike.lowis@gentex.com>

Fri, 16 Apr 2021 18:22:48 +0000 (14:22 -0400)
author Michael D. Lowis <mike.lowis@gentex.com>
Fri, 16 Apr 2021 18:22:48 +0000 (14:22 -0400)
committer Michael D. Lowis <mike.lowis@gentex.com>
Fri, 16 Apr 2021 18:22:48 +0000 (14:22 -0400)
diff --git a/cerise/build.sh b/cerise/build.sh

index 10a89f4e976ca680fb6cc6916e8b21b8562d0e78..de92c5040eb5025a696464fee27ba672d92fe31e 100755 (executable)
--- a/cerise/build.sh
+++ b/cerise/build.sh
@@ -1,5 +1,5 @@
  #!/bin/sh
  ctags -R &
-cc -g -D CERISE_TESTS -Wall -Wextra -Werror --std=c99 -o cerisec-test *.c \
+cc -g -D CERISE_TESTS -Wall -Wextra --std=c99 -o cerisec-test *.c \
    && ./cerisec-test \
    && cc -g -Wall -Wextra -Werror --std=c99 -o cerisec *.c
diff --git a/cerise/cerise.h b/cerise/cerise.h

index 62fc8fd23508b819374e634d8076174b4147756b..714213d4ffcd1c4f06cf04ade38b0c131fad56b9 100644 (file)
--- a/cerise/cerise.h
+++ b/cerise/cerise.h
@@ -94,7 +94,7 @@ void lexfile(Parser* ctx, char* path);
  void lex(Parser* ctx);
  void lexprintpos(Parser* p, FILE* file, Tok* tok);
  void gettoken(Parser* ctx);
-void toplevel(Parser* p);
+void module(Parser* p);
  
  /* Option Parsing
   *****************************************************************************/
diff --git a/cerise/lex.c b/cerise/lex.c

index 019ac72c88b69007fa46221dc08521d8565e4a24..5de2cc891e4a54b66adc3c4c132411c6ef3feb83 100644 (file)
--- a/cerise/lex.c
+++ b/cerise/lex.c
@@ -29,7 +29,7 @@ static const char Chars[256] = {
      /* strings */
      ['"'] = 3,
  
-    /* double character ops */
+    /* potential double character ops */
      ['='] = 4, ['.'] = 4,
  
      /* potential multi-character ops */
@@ -97,26 +97,36 @@ KeywordDef Keywords[] = {
      { "while",     WHILE     },
  };
  
-static int keywcmp(const void* a, const void* b) {
+static int keywcmp(const void* a, const void* b)
+{
      return strcmp(((KeywordDef*)a)->keyword, ((KeywordDef*)b)->keyword);
  }
  
-static inline char* file_load(char* path) {
+static inline char* file_load(char* path)
+{
      int fd = -1, nread = 0, length = 0;
      struct stat sb = {0};
      char* contents = NULL;
-    if (((fd = open(path, O_RDONLY, 0)) >= 0) && (fstat(fd, &sb) >= 0) && (sb.st_size > 0)) {
+    if (((fd = open(path, O_RDONLY, 0)) >= 0) && (fstat(fd, &sb) >= 0) && (sb.st_size > 0))
+    {
          contents = calloc(sb.st_size + 1u, 1u);
          while (sb.st_size && (nread = read(fd, contents+length, sb.st_size)) > 0)
+        {
              length += nread, sb.st_size -= nread;
+        }
+    }
+    if (fd > 0)
+    {
+        close(fd);
      }
-    if (fd > 0) close(fd);
      return contents;
  }
  
-static inline void convert_value(Tok* tok) {
+static inline void convert_value(Tok* tok)
+{
      switch (tok->type) {
-        case STRING: {
+        case STRING:
+        {
              size_t len = strlen(tok->text+1);
              char* strtext = malloc(len);
              strncpy(strtext, tok->text+1, len);
@@ -125,34 +135,42 @@ static inline void convert_value(Tok* tok) {
              break;
          }
  
-        case INT: {
+        case INT:
+        {
              tok->value.integer = strtol(tok->text, NULL, 0);
              break;
          }
  
-        case IDENT: {
+        case IDENT:
+        {
              KeywordDef key = { .keyword = tok->text };
              KeywordDef* match = bsearch(
                  &key, Keywords, NUM_KEYWORDS, sizeof(KeywordDef), keywcmp);
-            if (match) {
+            if (match)
+            {
                  tok->type = match->type;
                  if (tok->type != IDENT)
+                {
                      convert_value(tok); /* recurse to ensure correct conversion */
+                }
              }
              break;
          }
  
-        case BOOL: {
+        case BOOL:
+        {
              tok->value.integer = (tok->text[0] == 't');
              break;
          }
  
          default:
+            /* nothing to do here */
              break;
      }
  }
  
-static inline void readtok(Parser* ctx) {
+static inline void readtok(Parser* ctx)
+{
      Tok* tok = &(ctx->tok);
      char *beg = ctx->file->fpos;
      char *curr = ctx->file->fpos;
@@ -228,7 +246,8 @@ static inline void readtok(Parser* ctx) {
              exit(1);
      }
  
-    if (tok->type) {
+    if (tok->type)
+    {
          size_t sz = (curr - beg);
          tok->text = malloc(sz+1);
          tok->text[sz] = '\0';
@@ -239,7 +258,8 @@ static inline void readtok(Parser* ctx) {
      ctx->file->fpos = curr;
  }
  
-void lexfile(Parser* ctx, char* path) {
+void lexfile(Parser* ctx, char* path)
+{
      LexFile* file = calloc(sizeof(LexFile), 1u);
      file->path = strdup(path);
      file->fbeg = file->fpos = file_load(path);
@@ -247,49 +267,67 @@ void lexfile(Parser* ctx, char* path) {
      ctx->file = file;
  }
  
-void lex(Parser* ctx) {
+void lex(Parser* ctx)
+{
      ctx->tok.file = ctx->file->path;
      ctx->tok.type = NONE;
-    while (ctx->tok.type == NONE) {
-        if (!ctx->file) {
+    while (ctx->tok.type == NONE)
+    {
+        if (!ctx->file)
+        {
              /* no more files left to process */
              ctx->tok.type = END_FILE;
              ctx->tok.text = "";
              return;
-        } else if (!(ctx->file->fpos) || !*(ctx->file->fpos)) {
+        }
+        else if (!(ctx->file->fpos) || !*(ctx->file->fpos))
+        {
              /* grab the next file to process */
              LexFile* f = ctx->file;
              ctx->file = f->next;
              f->next = ctx->done;
              ctx->done = f;
-        } else {
+        }
+        else
+        {
              /* parse out a token */
              readtok(ctx);
          }
      }
  }
  
-static LexFile* get_file(Parser* p, char const* path) {
+static LexFile* get_file(Parser* p, char const* path)
+{
      LexFile* lf = p->file;
      while (lf && strcmp(lf->path, path))
+    {
          lf = lf->next;
-    if (!lf) {
+    }
+    if (!lf)
+    {
          lf = p->done;
          while (lf && strcmp(lf->path, path))
+        {
              lf = lf->next;
+        }
      }
      return lf;
  }
  
-void lexprintpos(Parser* p, FILE* file, Tok* tok) {
+void lexprintpos(Parser* p, FILE* file, Tok* tok)
+{
      size_t line = 1, col = 1;
      char* data = get_file(p, tok->file)->fbeg;
      char* end = data + tok->offset;
-    for (; *data && data < end; data++) {
-        if (*data == '\n') {
+    for (; *data && data < end; data++)
+    {
+        if (*data == '\n')
+        {
              line++;
              col = 1;
-        } else {
+        }
+        else
+        {
              col++;
          }
      }
@@ -329,6 +367,7 @@ TEST_SUITE(Lexer)
          { "]",         ']'       },
          { "{",         '{'       },
          { "}",         '}'       },
+        { "!",         '!'       },
          { "and",       AND       },
          { "array",     ARRAY     },
          { "begin",     BEGIN     },
@@ -369,7 +408,6 @@ TEST_SUITE(Lexer)
          { "",          END_FILE  },
      };
  
-
      TEST(Lexer recognizes all required tokens)
      {
          Parser ctx = {0};
diff --git a/cerise/main.c b/cerise/main.c

index 34766a5ef787eb37f7b6e0a0948a0da0ef6114d0..cea9b8ff29fe7bea370400dc6974d93c9d0cc22d 100644 (file)
--- a/cerise/main.c
+++ b/cerise/main.c
@@ -7,13 +7,13 @@ char* Artifact = "bin";
  
  /* Driver Modes
   *****************************************************************************/
-static int emit_binary(Parser* ctx, int argc, char **argv) 
+static int emit_binary(Parser* ctx, int argc, char **argv)
  {
      (void)ctx, (void)argc, (void)argv;
      return 0;
  }
  
-static int emit_library(Parser* ctx, int argc, char **argv) 
+static int emit_library(Parser* ctx, int argc, char **argv)
  {
      (void)ctx, (void)argc, (void)argv;
      return 0;
@@ -21,7 +21,7 @@ static int emit_library(Parser* ctx, int argc, char **argv)
  
  /* Main Routine and Usage
   *****************************************************************************/
-void usage(void) 
+void usage(void)
  {
      fprintf(stderr, "%s\n",
          "Usage: sclpl [options...] [-A artifact] [file...]\n"
@@ -31,7 +31,7 @@ void usage(void)
      exit(1);
  }
  
-int main(int argc, char **argv) 
+int main(int argc, char **argv)
  {
      /* Option parsing */
      OPTBEGIN {
@@ -43,15 +43,15 @@ int main(int argc, char **argv)
      for (; argc; argc--,argv++)
          lexfile(&ctx, *argv);
      /* Execute the main compiler process */
-    if (0 == strcmp("bin", Artifact)) 
+    if (0 == strcmp("bin", Artifact))
      {
          return emit_binary(&ctx, argc, argv);
-    } 
-    else if (0 == strcmp("lib", Artifact)) 
+    }
+    else if (0 == strcmp("lib", Artifact))
      {
          return emit_library(&ctx, argc, argv);
-    } 
-    else 
+    }
+    else
      {
          fprintf(stderr, "Unknown artifact type: '%s'\n\n", Artifact);
          usage();
@@ -63,10 +63,11 @@ int main(int argc, char **argv)
  
  #define INCLUDE_DEFS
  #include "atf.h"
-int main(int argc, char **argv) 
+int main(int argc, char **argv)
  {
      atf_init(argc, argv);
      RUN_EXTERN_TEST_SUITE(Lexer);
+    RUN_EXTERN_TEST_SUITE(Grammar);
      return atf_print_results();
  }
  
diff --git a/cerise/parser.c b/cerise/parser.c

index 826c7107bfa309824001bb3ee54b9b4d7f086187..39343d1079aa2c59edfbef854a7cc1807e919046 100644 (file)
--- a/cerise/parser.c
+++ b/cerise/parser.c
@@ -54,53 +54,64 @@ static int Indent = 0;
  
  /* Parsing Routines
   *****************************************************************************/
-//static Tok* peek(Parser* p) {
-//    if (T_NONE == p->tok.type)
-//        lex(p);
-//    return &(p->tok);
-//}
-//
-//static void error(Parser* parser, const char* fmt, ...) {
-//    Tok* tok = peek(parser);
-//    va_list args;
-//    va_start(args, fmt);
-//    lexprintpos(parser, stderr, tok);
-//    fprintf(stderr, " error: ");
-//    vfprintf(stderr, fmt, args);
-//    fprintf(stderr, "\n");
-//    va_end(args);
-//    exit(1);
-//}
-//
-//static bool matches(Parser* p, TokType type) {
-//    return (peek(p)->type == type);
-//}
-//
-//static bool accept(Parser* p, TokType type) {
-//    if (matches(p, type)) {
-//        p->tok.type = T_NONE;
-//        return true;
-//    }
-//    return false;
-//}
-//
-//static void expect(Parser* p, TokType type) {
-//    if (!accept(p, type))
-//        error(p, "Unexpected token");
-//}
-//
-//static Tok* expect_val(Parser* p, TokType type) {
-//    static Tok token = {0};
-//    /* perform the match */
-//    if (matches(p, type)) {
-//        token = *(peek(p));
-//        p->tok.type = T_NONE;
-//    } else {
-//        error(p, "Unexpected token");
-//    }
-//    return &token;
-//}
-//
+static Tok* peek(Parser* p)
+{
+    if (NONE == p->tok.type)
+        lex(p);
+    return &(p->tok);
+}
+
+static void error(Parser* parser, const char* fmt, ...)
+{
+    Tok* tok = peek(parser);
+    va_list args;
+    va_start(args, fmt);
+    lexprintpos(parser, stderr, tok);
+    fprintf(stderr, " error: ");
+    vfprintf(stderr, fmt, args);
+    fprintf(stderr, "\n");
+    va_end(args);
+    exit(1);
+}
+
+static bool matches(Parser* p, TokType type)
+{
+    return (peek(p)->type == type);
+}
+
+static bool accept(Parser* p, TokType type)
+{
+    if (matches(p, type)) {
+        p->tok.type = NONE;
+        return true;
+    }
+    return false;
+}
+
+static void expect(Parser* p, TokType type)
+{
+    if (!accept(p, type))
+        error(p, "Unexpected token");
+}
+
+static Tok* expect_val(Parser* p, TokType type)
+{
+    static Tok token = {0};
+    /* perform the match */
+    if (matches(p, type)) {
+        token = *(peek(p));
+        p->tok.type = NONE;
+    } else {
+        error(p, "Unexpected token");
+    }
+    return &token;
+}
+
+static char* expect_text(Parser* p, TokType type)
+{
+    return strdup(expect_val(p, type)->text);
+}
+
  //static int consume(Parser* p) {
  //    int type = peek(p)->type;
  //    if (!accept(p, type))
@@ -110,8 +121,84 @@ static int Indent = 0;
  
  /* Grammar Definition
   *****************************************************************************/
-void toplevel(Parser* p) {
+
+void import_list(Parser* p)
+{
      parse_enter();
-    (void)p;
+    expect(p, IMPORT);
+    while (1)
+    {
+        expect(p, IDENT);
+        if (accept(p, '='))
+        {
+            expect(p, IDENT);
+        }
+        if (matches(p, ';'))
+        {
+            break;
+        }
+        expect(p, ',');
+    }
+    expect(p, ';');
      parse_exit();
  }
+
+void module(Parser* p)
+{
+    parse_enter();
+    expect(p, MODULE);
+    char* sname = expect_text(p, IDENT);
+        /* TODO: Check that it matches filename here */
+    expect(p, ';');
+    if (matches(p, IMPORT))
+    {
+        import_list(p);
+    }
+//    declaration_seq(p);
+    if (accept(p, BEGIN))
+    {
+//        statement_seq(p);
+    }
+    expect(p, END);
+    char* ename = expect_text(p, IDENT);
+    if (strcmp(sname, ename))
+    {
+        error(p, "Expected module name '%s', recieved '%s' instead", sname, ename);
+    }
+    expect(p, ';');
+    parse_exit();
+}
+
+/* Grammar Unit Tests
+ *****************************************************************************/
+#ifdef CERISE_TESTS
+#include "atf.h"
+
+Parser Ctx = {0};
+
+void parse_module(char* fname, char* string)
+{
+    memset(&Ctx, 0, sizeof(Ctx));
+    LexFile* file = calloc(sizeof(LexFile), 1u);
+    file->path = strdup(fname);
+    file->fbeg = file->fpos = strdup(string);
+    file->next = Ctx.file;
+    Ctx.file = file;
+    module(&Ctx);
+}
+
+TEST_SUITE(Grammar)
+{
+    TEST(Should parse basic module syntax)
+    {
+        parse_module("Empty",
+            "module Empty; end Empty;");
+        parse_module("ModA",
+            "module ModA; import ModB; end ModA;");
+        parse_module("ModA",
+            "module ModA; import ModB, ModC; end ModA;");
+        parse_module("ModA",
+            "module ModA; import B = ModB, C = ModC; end ModA;");
+    }
+}
+#endif
diff --git a/cerise/tests/tokens.txt b/cerise/tests/tokens.txt

index 4a967ba556ff65683d951fade29e8a2d905f7210..d8a921a8c45afcaf1eaa44c22c006cbf7d29f628 100644 (file)
--- a/cerise/tests/tokens.txt
+++ b/cerise/tests/tokens.txt
@@ -22,6 +22,7 @@
  ]
  {
  }
+!
  and
  array
  begin
author	Michael D. Lowis <mike.lowis@gentex.com>
	Fri, 16 Apr 2021 18:22:48 +0000 (14:22 -0400)
committer	Michael D. Lowis <mike.lowis@gentex.com>
	Fri, 16 Apr 2021 18:22:48 +0000 (14:22 -0400)
cerise/build.sh		patch \| blob \| history
cerise/cerise.h		patch \| blob \| history
cerise/lex.c		patch \| blob \| history
cerise/main.c		patch \| blob \| history
cerise/parser.c		patch \| blob \| history
cerise/tests/tokens.txt		patch \| blob \| history