#!/bin/sh
ctags -R &
-cc -g -D CERISE_TESTS -Wall -Wextra -Werror --std=c99 -o cerisec-test *.c tests/*.c \
+cc -g -D CERISE_TESTS -Wall -Wextra -Werror --std=c99 -o cerisec-test *.c \
&& ./cerisec-test \
&& cc -g -Wall -Wextra -Werror --std=c99 -o cerisec *.c
/* Token Types
*****************************************************************************/
typedef enum {
- T_NONE = 0,
- T_STRING = 256, T_ID, T_INT, T_BOOL, T_CHAR, T_FLOAT,
- T_REQUIRES, T_PROVIDES, T_LET, T_VAR, T_FUN, T_TYPE, T_STRUCT,
- T_UNION, T_RETURN, T_IF, T_ELSE,
- T_COUNT,
- T_ERROR = -2,
- T_END_FILE = -1
+ NONE = 0,
+ IDENT = 256,
+ INT,
+ STRING,
+ BOOL,
+ EQ,
+ NEQ,
+ LTEQ,
+ GTEQ,
+ DOTDOT,
+ AND,
+ ARRAY,
+ BEGIN,
+ BY,
+ CASE,
+ CONST,
+ DIV,
+ DO,
+ ELSE,
+ ELSIF,
+ END,
+ FALSE,
+ FOR,
+ IF,
+ IMPORT,
+ IS,
+ MOD,
+ MODULE,
+ NIL,
+ NOT,
+ OF,
+ OR,
+ POINTER,
+ PROCEDURE,
+ RECORD,
+ REPEAT,
+ RETURN,
+ THEN,
+ TO,
+ TRUE,
+ TYPE,
+ UNTIL,
+ VAR,
+ WHILE,
+ COUNT,
+ ERROR = -2,
+ END_FILE = -1
} TokType;
typedef struct {
/* Datatype Types
*****************************************************************************/
-typedef enum {
- VOID, INT, UINT, FLOAT, ARRAY, REF, PTR, FUNC
-} Kind;
-
-typedef struct Type {
- Kind kind;
- union {
- struct Type* type;
- size_t bits;
- struct {
- struct Type* type;
- size_t count;
- } array;
- } value;
-} Type;
-
-Type* VoidType(void);
-Type* IntType(size_t nbits);
-Type* UIntType(size_t nbits);
-Type* FloatType(size_t nbits);
-Type* ArrayOf(Type* type, size_t count);
-Type* RefTo(Type* type);
-Type* PtrTo(Type* type);
-bool types_equal(Type* type1, Type* type2);
-
-/* Symbol Table
- *****************************************************************************/
-typedef enum {
- SF_TYPEDEF = (1 << 0),
- SF_CONSTANT = (1 << 1),
- SF_ARGUMENT = (1 << 2),
-} SymFlags;
-
-typedef struct Sym {
- struct Sym* next;
- bool is_typedef;
- int flags;
- char* name;
- Type* type;
-} Sym;
-
-typedef struct {
- Sym* syms;
-} SymTable;
-
-void sym_add(SymTable* syms, int flags, char* name, Type* type);
-Sym* sym_get(SymTable* syms, char* name);
-
-/* AST Types
- *****************************************************************************/
-typedef enum {
- AST_VAR, AST_FUNC, AST_EXPLIST, AST_IF, AST_APPLY,
- AST_STRING, AST_SYMBOL, AST_CHAR, AST_INT,
- AST_FLOAT, AST_BOOL, AST_IDENT, AST_OPER
-} ASTType;
-
-typedef struct AST {
- ASTType nodetype;
- Type* datatype;
- union {
- struct AST* nodes[3];
- struct {
- int oper;
- struct AST* left;
- struct AST* right;
- } op;
- /* Definition Node */
- struct {
- char* name;
- int flags;
- struct AST* value;
- } var;
- /* Expression Block Node */
- struct {
- size_t nexprs;
- struct AST** exprs;
- } explist;
- /* String, Symbol, Identifier */
- char* text;
- /* Integer */
- intptr_t integer;
- /* Float */
- double floating;
- } value;
-} AST;
-
-/* String */
-AST* String(char* val);
-char* string_value(AST* val);
-
-/* Character */
-AST* Char(int val);
-uint32_t char_value(AST* val);
-
-/* Integer */
-AST* Integer(int val);
-intptr_t integer_value(AST* val);
-
-/* Float */
-AST* Float(double val);
-double float_value(AST* val);
-
-/* Bool */
-AST* Bool(bool val);
-bool bool_value(AST* val);
-
-/* Ident */
-AST* Ident(char* val);
-char* ident_value(AST* val);
-
-/* Definition */
-AST* Var(char* name, AST* value, AST* type, int flags);
-char* var_name(AST* var);
-AST* var_value(AST* var);
-bool var_flagset(AST* var, int mask);
-
-AST* Func(AST* args, AST* body, AST* type);
-AST* func_args(AST* func);
-AST* func_body(AST* func);
-
-AST* ExpList(void);
-AST** explist_get(AST* explist, size_t* nexprs);
-void explist_append(AST* explist, AST* expr);
-void explist_prepend(AST* explist, AST* expr);
-
-AST* If(AST* cond, AST* b1, AST* b2);
-AST* if_cond(AST* ifexp);
-AST* if_then(AST* ifexp);
-AST* if_else(AST* ifexp);
-
-AST* Apply(AST* func, AST* args);
-AST* apply_func(AST* apply);
-AST* apply_args(AST* apply);
-
-AST* OpCall(int oper, AST* left, AST* right);
-
-
-/* Package Definition
- *****************************************************************************/
-typedef struct Require {
- struct Require* next;
- char* path;
- char* alias;
-} Require;
-
-typedef struct Provide {
- struct Provide* next;
- char* name;
-} Provide;
-
-typedef struct Definition {
- struct Provide* next;
- AST* ast;
-} Definition;
-
-typedef struct {
- char* name;
- SymTable* syms;
- Require* requires;
- Provide* provides;
- Definition* definitions;
-} Package;
-
-void pkg_add_require(Package* p, char* req);
-void pkg_add_provide(Package* p, char* exp);
-void pkg_add_definition(Package* p, AST* ast);
-
-/* Pretty Printing
- *****************************************************************************/
-void pprint_token(FILE* file, Tok* token, bool print_loc);
-void pprint_tree(FILE* file, AST* tree, int depth);
+//typedef enum {
+// VOID, INT, UINT, FLOAT, ARRAY, REF, PTR, FUNC
+//} Kind;
+//
+//typedef struct Type {
+// Kind kind;
+// union {
+// struct Type* type;
+// size_t bits;
+// struct {
+// struct Type* type;
+// size_t count;
+// } array;
+// } value;
+//} Type;
+//
+//Type* VoidType(void);
+//Type* IntType(size_t nbits);
+//Type* UIntType(size_t nbits);
+//Type* FloatType(size_t nbits);
+//Type* ArrayOf(Type* type, size_t count);
+//Type* RefTo(Type* type);
+//Type* PtrTo(Type* type);
+//bool types_equal(Type* type1, Type* type2);
/* Lexer and Parser Types
*****************************************************************************/
LexFile* done;
LexFile* file;
Tok tok;
- SymTable syms;
- Package pkg;
} Parser;
void lexfile(Parser* ctx, char* path);
void lexprintpos(Parser* p, FILE* file, Tok* tok);
void gettoken(Parser* ctx);
void toplevel(Parser* p);
-void codegen_init(Parser* p);
/* Option Parsing
*****************************************************************************/
/* comment start */
['#'] = 2,
/* number or op */
- ['+'] = 3, ['-'] = 3,
+// ['+'] = 3, ['-'] = 3,
/* number digits */
['0'] = 4, ['1'] = 4, ['2'] = 4, ['3'] = 4, ['4'] = 4,
['5'] = 4, ['6'] = 4, ['7'] = 4, ['8'] = 4, ['9'] = 4,
['y'] = 5, ['z'] = 5,
/* punctuation */
['('] = 6, [')'] = 6, ['['] = 6, [']'] = 6, ['{'] = 6, ['}'] = 6,
- ['.'] = 6, [','] = 6, [':'] = 6, ['&'] = 6, ['='] = 6, [';'] = 6,
- ['*'] = 6, ['\''] = 6,
+ ['.'] = 6, [','] = 6, [':'] = 6, ['='] = 6, [';'] = 6, ['^'] = 6,
+ ['+'] = 6, ['-'] = 6, ['*'] = 6, ['/'] = 6, ['<'] = 6, ['>'] = 6,
+ ['|'] = 6,
+ ['!'] = 6,
/* strings */
['"'] = 7
};
+static const char HasSecondChar[256] = {
+ ['<'] = 1, ['>'] = 1, ['!'] = 1
+};
+
char SPACE[256] = {
[' '] = 1, ['\t'] = 1, ['\r'] = 1, ['\n'] = 1,
};
#define NUM_KEYWORDS (sizeof(Keywords) / sizeof(Keywords[0]))
KeywordDef Keywords[] = {
- { "else", T_ELSE },
- { "false", T_BOOL },
- { "fun", T_FUN },
- { "if", T_IF },
- { "let", T_LET },
- { "provide", T_PROVIDES },
- { "require", T_REQUIRES },
- { "return", T_RETURN },
- { "struct", T_STRUCT },
- { "true", T_BOOL },
- { "type", T_TYPE },
- { "union", T_UNION },
- { "var", T_VAR },
+ { "and", AND },
+ { "array", ARRAY },
+ { "begin", BEGIN },
+ { "by", BY },
+ { "case", CASE },
+ { "const", CONST },
+ { "div", DIV },
+ { "do", DO },
+ { "else", ELSE },
+ { "elsif", ELSIF },
+ { "end", END },
+ { "false", FALSE },
+ { "for", FOR },
+ { "if", IF },
+ { "import", IMPORT },
+ { "is", IS },
+ { "mod", MOD },
+ { "module", MODULE },
+ { "nil", NIL },
+ { "not", NOT },
+ { "of", OF },
+ { "or", OR },
+ { "pointer", POINTER },
+ { "procedure", PROCEDURE },
+ { "record", RECORD },
+ { "repeat", REPEAT },
+ { "return", RETURN },
+ { "then", THEN },
+ { "to", TO },
+ { "true", TRUE },
+ { "type", TYPE },
+ { "until", UNTIL },
+ { "var", VAR },
+ { "while", WHILE },
};
static int keywcmp(const void* a, const void* b) {
static inline void convert_value(Tok* tok) {
switch (tok->type) {
- case T_STRING: {
+ case STRING: {
size_t len = strlen(tok->text+1);
char* strtext = malloc(len);
strncpy(strtext, tok->text+1, len);
break;
}
- case T_INT: {
+ case INT: {
tok->value.integer = strtol(tok->text, NULL, 0);
break;
}
- case T_ID: {
+ case IDENT: {
KeywordDef key = { .keyword = tok->text };
KeywordDef* match = bsearch(
&key, Keywords, NUM_KEYWORDS, sizeof(KeywordDef), keywcmp);
if (match) {
tok->type = match->type;
- if (tok->type != T_ID)
+ if (tok->type != IDENT)
convert_value(tok); /* recurse to ensure correct conversion */
}
break;
}
- case T_BOOL: {
+ case BOOL: {
tok->value.integer = (tok->text[0] == 't');
break;
}
static inline void readtok(Parser* ctx) {
Tok* tok = &(ctx->tok);
- char *beg = ctx->file->fpos, *curr = ctx->file->fpos;
+ char *beg = ctx->file->fpos;
+ char *curr = ctx->file->fpos;
tok->offset = (beg - ctx->file->fbeg);
switch (FirstChar[(int)*curr++]) {
case 1: /* skip whitespace */
tok->type = *(curr-1);
if (!DIGIT[(int)*curr]) break;
/* parse it as an int */
- tok->type = T_INT;
+ tok->type = INT;
for (; DIGIT[(int)*curr]; curr++);
break;
case 4:
- tok->type = T_INT;
+ tok->type = INT;
for (; DIGIT[(int)*curr]; curr++);
break;
case 5:
- tok->type = T_ID;
+ tok->type = IDENT;
for (; ALNUM_[(int)*curr]; curr++);
break;
- case 6: /* single char tokens */
- tok->type = *(curr-1);
+ case 6: /* single/double char tokens */
+ if (HasSecondChar[(int)*(curr-1)] && *(curr) == '=')
+ {
+ curr++;
+ switch (*(curr-2))
+ {
+ case '!': tok->type = NEQ; break;
+ case '<': tok->type = LTEQ; break;
+ case '>': tok->type = GTEQ; break;
+ default: goto error; break;
+ }
+ }
+ else if (*(curr-1) == '.' || *(curr-1) == '=')
+ {
+ tok->type = *(curr-1);
+ if (*(curr) == tok->type)
+ {
+ curr++;
+ tok->type = (tok->type == '.' ? DOTDOT : EQ);
+ }
+ }
+ else
+ {
+ tok->type = *(curr-1);
+ }
break;
case 7: /* string parsing */
- tok->type = T_STRING;
+ tok->type = STRING;
for (; *curr != '"'; curr++);
curr++;
break;
case 0: /* error handling */
default:
+ error:
fprintf(stderr, "Failed to parse token '%c'\n", *(curr-1));
exit(1);
}
void lex(Parser* ctx) {
ctx->tok.file = ctx->file->path;
- ctx->tok.type = T_NONE;
- while (ctx->tok.type == T_NONE) {
+ ctx->tok.type = NONE;
+ while (ctx->tok.type == NONE) {
if (!ctx->file) {
/* no more files left to process */
- ctx->tok.type = T_END_FILE;
+ ctx->tok.type = END_FILE;
+ ctx->tok.text = "";
return;
} else if (!(ctx->file->fpos) || !*(ctx->file->fpos)) {
/* grab the next file to process */
}
fprintf(file, "%s:%zu:%zu:", tok->file, line, col);
}
+
+#ifdef CERISE_TESTS
+#include "atf.h"
+
+TEST_SUITE(Lexer)
+{
+ struct {
+ char* text;
+ int type;
+ } Tokens[] = {
+ { "+", '+' },
+ { "-", '-' },
+ { "/", '/' },
+ { "*", '*' },
+ { "^", '^' },
+ { "=", '=' },
+ { "==", EQ },
+ { "!=", NEQ },
+ { "<", '<' },
+ { ">", '>' },
+ { "<=", LTEQ },
+ { ">=", GTEQ },
+ { ".", '.' },
+ { ",", ',' },
+ { ";", ';' },
+ { "..", DOTDOT },
+ { "|", '|' },
+ { ":", ':' },
+ { "(", '(' },
+ { ")", ')' },
+ { "[", '[' },
+ { "]", ']' },
+ { "{", '{' },
+ { "}", '}' },
+ { "and", AND },
+ { "array", ARRAY },
+ { "begin", BEGIN },
+ { "by", BY },
+ { "case", CASE },
+ { "const", CONST },
+ { "div", DIV },
+ { "do", DO },
+ { "else", ELSE },
+ { "elsif", ELSIF },
+ { "end", END },
+ { "false", FALSE },
+ { "for", FOR },
+ { "if", IF },
+ { "import", IMPORT },
+ { "is", IS },
+ { "mod", MOD },
+ { "module", MODULE },
+ { "nil", NIL },
+ { "not", NOT },
+ { "of", OF },
+ { "or", OR },
+ { "pointer", POINTER },
+ { "procedure", PROCEDURE },
+ { "record", RECORD },
+ { "repeat", REPEAT },
+ { "return", RETURN },
+ { "then", THEN },
+ { "to", TO },
+ { "true", TRUE },
+ { "type", TYPE },
+ { "until", UNTIL },
+ { "var", VAR },
+ { "while", WHILE },
+ { "", END_FILE },
+ };
+
+
+ TEST(Lexer recognizes all required tokens)
+ {
+ Parser ctx = {0};
+ lexfile(&ctx, "tests/tokens.txt");
+ for (size_t i = 0; i < sizeof(Tokens)/sizeof(Tokens[0]); i++)
+ {
+ lex(&ctx);
+ //printf("(%d, '%s') != (%d, '%s')\n",
+ // ctx.tok.type, ctx.tok.text, Tokens[i].type, Tokens[i].text);
+ CHECK(ctx.tok.type == Tokens[i].type);
+ CHECK(ctx.tok.text != NULL);
+ CHECK(!strcmp(ctx.tok.text, Tokens[i].text));
+ }
+ }
+}
+
+#endif
+
#include "cerise.h"
+#ifndef CERISE_TESTS
+
char* ARGV0;
char* Artifact = "bin";
/* Driver Modes
*****************************************************************************/
-static int emit_binary(Parser* ctx, int argc, char **argv) {
+static int emit_binary(Parser* ctx, int argc, char **argv)
+{
(void)ctx, (void)argc, (void)argv;
return 0;
}
-static int emit_library(Parser* ctx, int argc, char **argv) {
+static int emit_library(Parser* ctx, int argc, char **argv)
+{
(void)ctx, (void)argc, (void)argv;
return 0;
}
/* Main Routine and Usage
*****************************************************************************/
-void usage(void) {
+void usage(void)
+{
fprintf(stderr, "%s\n",
"Usage: sclpl [options...] [-A artifact] [file...]\n"
"\n-A<artifact> Emit the given type of artifact"
exit(1);
}
-int main(int argc, char **argv) {
+int main(int argc, char **argv)
+{
/* Option parsing */
OPTBEGIN {
case 'A': Artifact = EOPTARG(usage()); break;
for (; argc; argc--,argv++)
lexfile(&ctx, *argv);
/* Execute the main compiler process */
- if (0 == strcmp("bin", Artifact)) {
+ if (0 == strcmp("bin", Artifact))
+ {
return emit_binary(&ctx, argc, argv);
- } else if (0 == strcmp("lib", Artifact)) {
+ }
+ else if (0 == strcmp("lib", Artifact))
+ {
return emit_library(&ctx, argc, argv);
- } else {
+ }
+ else
+ {
fprintf(stderr, "Unknown artifact type: '%s'\n\n", Artifact);
usage();
}
return 1;
}
+
+#else
+
+#define INCLUDE_DEFS
+#include "atf.h"
+int main(int argc, char **argv)
+{
+ atf_init(argc, argv);
+ RUN_EXTERN_TEST_SUITE(Lexer);
+ return atf_print_results();
+}
+
+#endif