--- /dev/null
+#include <sclpl.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+
+typedef struct {
+ char* keyword;
+ int type;
+} KeywordDef;
+
+static const char FirstChar[256] = {
+ /* Whitespace */
+ [' '] = 1, ['\t'] = 1, ['\r'] = 1, ['\n'] = 1,
+ /* comment start */
+ ['#'] = 2,
+ /* number or op */
+ ['+'] = 3, ['-'] = 3,
+ /* number digits */
+ ['0'] = 4, ['1'] = 4, ['2'] = 4, ['3'] = 4, ['4'] = 4,
+ ['5'] = 4, ['6'] = 4, ['7'] = 4, ['8'] = 4, ['9'] = 4,
+ /* alpha characters */
+ ['A'] = 5, ['B'] = 5, ['C'] = 5, ['D'] = 5, ['E'] = 5,
+ ['F'] = 5, ['G'] = 5, ['H'] = 5, ['I'] = 5, ['J'] = 5,
+ ['K'] = 5, ['L'] = 5, ['M'] = 5, ['N'] = 5, ['O'] = 5,
+ ['P'] = 5, ['Q'] = 5, ['R'] = 5, ['S'] = 5, ['T'] = 5,
+ ['U'] = 5, ['V'] = 5, ['W'] = 5, ['X'] = 5, ['Y'] = 5,
+ ['Z'] = 5, ['a'] = 5, ['b'] = 5, ['c'] = 5, ['d'] = 5,
+ ['e'] = 5, ['f'] = 5, ['g'] = 5, ['h'] = 5, ['i'] = 5,
+ ['j'] = 5, ['k'] = 5, ['l'] = 5, ['m'] = 5, ['n'] = 5,
+ ['o'] = 5, ['p'] = 5, ['q'] = 5, ['r'] = 5, ['s'] = 5,
+ ['t'] = 5, ['u'] = 5, ['v'] = 5, ['w'] = 5, ['x'] = 5,
+ ['y'] = 5, ['z'] = 5,
+ /* punctuation */
+ ['('] = 6, [')'] = 6, ['['] = 6, [']'] = 6, ['{'] = 6, ['}'] = 6,
+ ['.'] = 6, [','] = 6, [':'] = 6, ['&'] = 6, ['='] = 6, [';'] = 6,
+ ['*'] = 6, ['\''] = 6,
+ /* strings */
+ ['"'] = 7
+};
+
+char SPACE[256] = {
+ [' '] = 1, ['\t'] = 1, ['\r'] = 1, ['\n'] = 1,
+};
+
+char DIGIT[256] = {
+ ['0'] = 1, ['1'] = 1, ['2'] = 1, ['3'] = 1, ['4'] = 1,
+ ['5'] = 1, ['6'] = 1, ['7'] = 1, ['8'] = 1, ['9'] = 1,
+};
+
+char ALNUM_[256] = {
+ ['0'] = 1, ['1'] = 1, ['2'] = 1, ['3'] = 1, ['4'] = 1,
+ ['5'] = 1, ['6'] = 1, ['7'] = 1, ['8'] = 1, ['9'] = 1,
+ ['A'] = 1, ['B'] = 1, ['C'] = 1, ['D'] = 1, ['E'] = 1,
+ ['F'] = 1, ['G'] = 1, ['H'] = 1, ['I'] = 1, ['J'] = 1,
+ ['K'] = 1, ['L'] = 1, ['M'] = 1, ['N'] = 1, ['O'] = 1,
+ ['P'] = 1, ['Q'] = 1, ['R'] = 1, ['S'] = 1, ['T'] = 1,
+ ['U'] = 1, ['V'] = 1, ['W'] = 1, ['X'] = 1, ['Y'] = 1,
+ ['Z'] = 1, ['a'] = 1, ['b'] = 1, ['c'] = 1, ['d'] = 1,
+ ['e'] = 1, ['f'] = 1, ['g'] = 1, ['h'] = 1, ['i'] = 1,
+ ['j'] = 1, ['k'] = 1, ['l'] = 1, ['m'] = 1, ['n'] = 1,
+ ['o'] = 1, ['p'] = 1, ['q'] = 1, ['r'] = 1, ['s'] = 1,
+ ['t'] = 1, ['u'] = 1, ['v'] = 1, ['w'] = 1, ['x'] = 1,
+ ['y'] = 1, ['z'] = 1, ['_'] = 1,
+};
+
+#define NUM_KEYWORDS (sizeof(Keywords) / sizeof(Keywords[0]))
+KeywordDef Keywords[] = {
+ { "else", T_ELSE },
+ { "false", T_BOOL },
+ { "fun", T_FUN },
+ { "if", T_IF },
+ { "let", T_LET },
+ { "provide", T_PROVIDES },
+ { "require", T_REQUIRES },
+ { "return", T_RETURN },
+ { "struct", T_STRUCT },
+ { "true", T_BOOL },
+ { "type", T_TYPE },
+ { "union", T_UNION },
+ { "var", T_VAR },
+};
+
+static int keywcmp(const void* a, const void* b) {
+ return strcmp(((KeywordDef*)a)->keyword, ((KeywordDef*)b)->keyword);
+}
+
+static inline char* file_load(char* path) {
+ int fd = -1, nread = 0, length = 0;
+ struct stat sb = {0};
+ char* contents = NULL;
+ if (((fd = open(path, O_RDONLY, 0)) >= 0) && (fstat(fd, &sb) >= 0) && (sb.st_size > 0)) {
+ contents = calloc(sb.st_size + 1u, 1u);
+ while (sb.st_size && (nread = read(fd, contents+length, sb.st_size)) > 0)
+ length += nread, sb.st_size -= nread;
+ }
+ if (fd > 0) close(fd);
+ return contents;
+}
+
+static inline void convert_value(Tok* tok) {
+ switch (tok->type) {
+ case T_STRING: {
+ size_t len = strlen(tok->text+1);
+ char* strtext = malloc(len);
+ strncpy(strtext, tok->text+1, len);
+ strtext[len-1] = '\0';
+ free(tok->text), tok->text = strtext;
+ break;
+ }
+
+ case T_INT: {
+ tok->value.integer = strtol(tok->text, NULL, 0);
+ break;
+ }
+
+ case T_ID: {
+ KeywordDef key = { .keyword = tok->text };
+ KeywordDef* match = bsearch(
+ &key, Keywords, NUM_KEYWORDS, sizeof(KeywordDef), keywcmp);
+ if (match) tok->type = match->type;
+ break;
+ }
+
+ case T_BOOL: {
+ tok->value.integer = (tok->text[0] == 't');
+ break;
+ }
+
+ default:
+ break;
+ }
+}
+
+static inline void readtok(Parser* ctx) {
+ Tok* tok = &(ctx->tok);
+ char *beg = ctx->file->fpos, *curr = ctx->file->fpos;
+ tok->offset = (beg - ctx->file->fbeg);
+ switch (FirstChar[*curr++]) {
+ case 1: /* skip whitespace */
+ for (; SPACE[*curr]; curr++);
+ break;
+
+ case 2: /* skip comments */
+ for (; *curr != '\n'; curr++);
+ break;
+
+ case 3: /* +/- as ops or number signs */
+ tok->type = *(curr-1);
+ if (!DIGIT[*curr]) break;
+ /* fallthrough to number parsing */
+
+ case 4:
+ tok->type = T_INT;
+ for (; DIGIT[*curr]; curr++);
+ break;
+
+ case 5:
+ tok->type = T_ID;
+ for (; ALNUM_[*curr]; curr++);
+ break;
+
+ case 6: /* single char tokens */
+ tok->type = *(curr-1);
+ break;
+
+ case 7: /* string parsing */
+ tok->type = T_STRING;
+ for (; *curr != '"'; curr++);
+ curr++;
+ break;
+
+ case 0: /* error handling */
+ default:
+ fprintf(stderr, "Failed to parse token '%c'\n", *(curr-1));
+ exit(1);
+ }
+
+ if (tok->type) {
+ size_t sz = (curr - beg);
+ tok->text = malloc(sz+1);
+ tok->text[sz] = '\0';
+ strncpy(tok->text, beg, sz);
+ convert_value(tok);
+ }
+
+ ctx->file->fpos = curr;
+}
+
+void lexfile(Parser* ctx, char* path) {
+ LexFile* file = calloc(sizeof(file), 1u);
+ file->path = strdup(path);
+ file->fbeg = file->fpos = file_load(path);
+ file->next = ctx->file;
+ ctx->file = file;
+}
+
+void lex(Parser* ctx) {
+ ctx->tok.type = T_NONE;
+ while (ctx->tok.type == T_NONE) {
+ if (!ctx->file) {
+ /* no more files left to process */
+ ctx->tok.type = T_END_FILE;
+ return;
+ } else if (!*(ctx->file->fpos)) {
+ /* grab the next file to process */
+ LexFile* f = ctx->file;
+ ctx->file = f->next;
+ f->next = ctx->done;
+ ctx->done = f;
+ } else {
+ /* parse out a token */
+ readtok(ctx);
+ }
+ }
+}