From d3697b2223f17f134c80435d4495a06c53d0a989 Mon Sep 17 00:00:00 2001 From: "Mike D. Lowis" Date: Fri, 29 May 2015 15:30:10 -0400 Subject: [PATCH] Removed lexer.l in favor of handwritten lexer --- build.rb | 4 +- source/lexer.l | 48 ----- source/main.c | 486 +++++++++++++++++++++++++++++++++++------------- source/parser.h | 46 ----- 4 files changed, 361 insertions(+), 223 deletions(-) delete mode 100644 source/lexer.l delete mode 100644 source/parser.h diff --git a/build.rb b/build.rb index 372ed05..d1a5c55 100755 --- a/build.rb +++ b/build.rb @@ -18,9 +18,7 @@ main_env.Library('build/lib/libcds.a', FileList['modules/libcds/source/**/*.c']) runtime_libs = ['build/lib/libcds.a'] # Build the parser -main_env.CFile('source/lex.yy.c', 'source/lexer.l') -parser_srcs = (FileList['source/*.c'] + ['source/lex.yy.c']).uniq -main_env.Program('parser', parser_srcs + runtime_libs) +main_env.Program('parser', FileList['source/*.c'] + runtime_libs) #------------------------------------------------------------------------------ # Test Build Targets diff --git a/source/lexer.l b/source/lexer.l deleted file mode 100644 index 3d13e1b..0000000 --- a/source/lexer.l +++ /dev/null @@ -1,48 +0,0 @@ -%{ - -#include "parser.h" - -size_t Row = 1; -size_t Column = 1; - -int yywrap(void){ return 1; } - -#define TOKEN(type) \ - puts(#type); return type - -%} - -%% - -[\r]?[\n] Row++; Column = 1; printf(":> "); -[ \t] Column++; -#.*[\r\n]+ Row++; Column = 1; - -"=" TOKEN(ASSIGN); -"<-" TOKEN(ASSIGN); - -"." TOKEN(PERIOD); -":" TOKEN(COLON); -"|" TOKEN(PIPE); -"(" TOKEN(LPAR); -")" TOKEN(RPAR); -"@[" TOKEN(ALBRACK); -"[" TOKEN(LBRACK); -"]" TOKEN(RBRACK); -"@{" TOKEN(ALBRACE); -"{" TOKEN(LBRACE); -"}" TOKEN(RBRACE); - -[0-9]+ TOKEN(NUM); -'[^\\]' TOKEN(CHAR); -\"[^"]*\" TOKEN(STRING); -$[_a-zA-Z][_a-zA-Z0-9]* TOKEN(SYMBOL); - -[+-]+ TOKEN(BINOP); -[_a-zA-Z][_a-zA-Z0-9]* TOKEN(ID); -[_a-zA-Z][_a-zA-Z0-9]*: TOKEN(KEYW); - -. fprintf(stderr, "Unrecognized Token\n"); exit(1); - -%% - diff --git a/source/main.c b/source/main.c index 51ceab8..0075a14 100644 --- a/source/main.c +++ b/source/main.c @@ -3,109 +3,56 @@ #include #include #include -#include "parser.h" - -extern int yylex (void); -extern size_t Row; -extern size_t Column; - -/*****************************************************************************/ +#include + +#define UNKNOWN 0 +#define SYMBOL 1 +#define KEYWORD 2 +#define IDENTIFIER 3 +#define NUMBER 4 +#define STRING 5 +#define CHARACTER 6 +#define AT_LBRACK 7 +#define AT_LBRACE 8 +#define RW_SLOT 9 +#define RO_SLOT 10 +#define SEMICOLON 11 +#define COLON 12 +#define PIPE 13 +#define LPAR 14 +#define RPAR 15 +#define LBRACK 16 +#define RBRACK 17 +#define LBRACE 18 +#define RBRACE 19 +#define OPERATOR 20 + +#define ARRAY 100 +#define OBJECT 101 +#define HASHMAP 102 +#define HASHSET 103 +#define PAIR 104 +#define UNARY_MSG 105 +#define BINARY_MSG 106 +#define KEYWORD_MSG 107 +#define KEYWORD_PAIR 108 + +/* Table of Contents + *****************************************************************************/ +// Types typedef struct AST { int type; int num_children; struct AST* children[]; } AST; -AST* Tree(int type, int num_children) { - AST* tree = (AST*)calloc(1, sizeof(AST) * (num_children * sizeof(AST*))); - tree->type = type; - tree->num_children = num_children; - return tree; -} - -void PrintTree(AST* tree, int depth) { - int indent = depth * 2; - printf("%*s(", indent, ""); - printf("%d", tree->type); - if (tree->num_children == 0) { - printf(")\n"); - } else { - printf("\n"); - for (int child = 0; child < tree->num_children; child++) { - PrintTree(tree->children[child], depth+1); - } - - printf("%*s)\n", indent, ""); - } -} - -/*****************************************************************************/ - -int Current = UNKNOWN; - -static void error(const char* msg) { - fprintf(stderr, "Error: %s\n", msg); - exit(1); -} - -static bool accept(int expected) { - if (Current == UNKNOWN) { - Current = yylex(); - } - return (Current == expected); -} - -static bool expect(int expected) { - bool accepted = accept(expected); - if (!accepted) - error("unexpected symbol"); - else - Current = UNKNOWN; - return accepted; -} - -static bool optional(int expected) { - if (accept(expected)) - return expect(expected); - else - return false; -} - -/*****************************************************************************/ - -intptr_t Token_Buffer[1024]; -intptr_t* Token_Stack = Token_Buffer-1; - -void shift(int type) { - int curr = Current; - if (expect(type)) { - *(++Token_Stack) = curr; - } -} - -void reduce(int count) { - int type = *(Token_Stack--); - AST* tree = Tree(type, count); - intptr_t* stack = Token_Stack - (count-1); - for (int i = 0; i < count; i++) { - tree->children[i] = (void*)stack[i]; - } - Token_Stack -= count ; - *(++Token_Stack) = (intptr_t)tree; -} - -void shift_reduce(int type, int nchildren) { - shift(type); - reduce(nchildren); -} - -void push_reduce(int type, int nchildren) { - *(++Token_Stack) = type; - reduce(nchildren); -} - -/*****************************************************************************/ +// Globals +static int CurrChar = ' '; +static int CurrTok = UNKNOWN; +static intptr_t Token_Buffer[1024]; +static intptr_t* Token_Stack = Token_Buffer-1; +// Parsing Rules static void expression(void); static void keyword_send(void); static void binary_send(void); @@ -113,20 +60,56 @@ static void unary_send(void); static void operand(void); static void literal(void); static void array(void); -static void object(void); static void hashmap(void); static void hashset(void); +static void object(void); -static void expression(void) { +// Parsing Helpers +static void error(const char* msg); +static bool accept(int expected); +static bool expect(int expected); +static bool optional(int expected); + +// Lexical Rules +static int token(void); +static void whitespace(void); +static int symbol(void); +static int identifier(void); +static int number(void); +static int string(void); +static int character(void); +static int punctuation(void); +static int operator(void); + +// Lexical Analysis Helpers +static int current(void); +static void append(void); +static void discard(void); +static void expect_ch(int ch); +static void lex_error(void); + +// Tree Routines +static AST* Tree(int type, int num_children); +static void PrintTree(AST* tree, int depth); +void shift(int type); +void reduce(int count); +void shift_reduce(int type, int nchildren); +void push_reduce(int type, int nchildren); + +/* Parsing Rules + *****************************************************************************/ +static void expression(void) +{ keyword_send(); - optional(PERIOD); + optional(SEMICOLON); } -static void keyword_send(void) { +static void keyword_send(void) +{ int count = 0; binary_send(); - while (accept(KEYW)) { - shift_reduce(KEYW,0); + while (accept(KEYWORD)) { + shift_reduce(KEYWORD,0); binary_send(); push_reduce(KEYWORD_PAIR, 2); count++; @@ -136,24 +119,27 @@ static void keyword_send(void) { } } -static void binary_send(void) { +static void binary_send(void) +{ unary_send(); - if (accept(BINOP)) { - shift_reduce(BINOP,0); + if (accept(OPERATOR)) { + shift_reduce(OPERATOR,0); unary_send(); push_reduce(BINARY_MSG, 3); } } -static void unary_send(void) { +static void unary_send(void) +{ operand(); - while (accept(ID)) { - shift_reduce(ID, 0); + while (accept(IDENTIFIER)) { + shift_reduce(IDENTIFIER, 0); push_reduce(UNARY_MSG, 2); } } -static void operand(void) { +static void operand(void) +{ if (accept(LPAR)) { expect(LPAR); expression(); @@ -163,22 +149,24 @@ static void operand(void) { } } -static void literal(void) { - switch (Current) { - case ID: shift_reduce(ID, 0u); push_reduce(UNARY_MSG, 1); break; - case NUM: shift_reduce(NUM, 0u); break; - case STRING: shift_reduce(STRING, 0u); break; - case SYMBOL: shift_reduce(SYMBOL, 0u); break; - case CHAR: shift_reduce(CHAR, 0u); break; - case LBRACK: array(); break; - case LBRACE: object(); break; - case ALBRACE: hashmap(); break; - case ALBRACK: hashset(); break; - default: error("Invalid literal"); +static void literal(void) +{ + switch (CurrTok) { + case IDENTIFIER: shift_reduce(IDENTIFIER, 0u); push_reduce(UNARY_MSG, 1); break; + case NUMBER: shift_reduce(NUMBER, 0u); break; + case STRING: shift_reduce(STRING, 0u); break; + case SYMBOL: shift_reduce(SYMBOL, 0u); break; + case CHARACTER: shift_reduce(CHARACTER, 0u); break; + case LBRACK: array(); break; + case LBRACE: object(); break; + case AT_LBRACE: hashmap(); break; + case AT_LBRACK: hashset(); break; + default: error("Invalid literal"); } } -static void array(void) { +static void array(void) +{ int count = 0; expect(LBRACK); while (!accept(RBRACK)) { @@ -189,9 +177,10 @@ static void array(void) { push_reduce(ARRAY, count); } -static void hashmap(void) { +static void hashmap(void) +{ int count = 0; - expect(ALBRACE); + expect(AT_LBRACE); while (!accept(RBRACE)) { shift_reduce(STRING, 0); expect(COLON); @@ -206,7 +195,7 @@ static void hashmap(void) { static void hashset(void) { int count = 0; - expect(ALBRACK); + expect(AT_LBRACK); while (!accept(RBRACK)) { expression(); count++; @@ -229,8 +218,253 @@ static void object(void) push_reduce(OBJECT, 0); } -/*****************************************************************************/ +/* Parsing Helpers + *****************************************************************************/ +static void error(const char* msg) +{ + fprintf(stderr, "Error: %s\n", msg); + exit(1); +} + +static bool accept(int expected) +{ + if (CurrTok == UNKNOWN) { + CurrTok = token(); + } + return (CurrTok == expected); +} + +static bool expect(int expected) +{ + bool accepted = accept(expected); + if (!accepted) + error("unexpected symbol"); + else + CurrTok = UNKNOWN; + return accepted; +} + +static bool optional(int expected) +{ + if (accept(expected)) + return expect(expected); + else + return false; +} + +/* Lexical Rules + *****************************************************************************/ +static int token(void) +{ + // Skip any whitespace. + whitespace(); + + if (EOF == current()) { + return EOF; + } else if ('$' == current()) { + return symbol(); + } else if (isalpha(current())) { + return identifier(); + } else if (isdigit(current())) { + return number(); + } else if ('"' == current()) { + return string(); + } else if ('\'' == current()) { + return character(); + } else { + return punctuation(); + } +} + +static void whitespace(void) +{ + while (isspace(current())) + discard(); +} + +static int symbol(void) +{ + expect_ch('$'); + while (isalnum(current())) + append(); + return SYMBOL; +} + +static int identifier(void) +{ + while (isalnum(current())) + append(); + + if (':' == current()) { + expect_ch(':'); + return KEYWORD; + } else { + return IDENTIFIER; + } +} + +static int number(void) +{ + append(); + while (isdigit(current())) + append(); + return NUMBER; +} + +static int string(void) +{ + expect_ch('"'); + while('"' != current()) + append(); + expect_ch('"'); + return STRING; +} + +static int character(void) +{ + expect_ch('\''); + append(); + expect_ch('\''); + return CHARACTER; +} + +static int punctuation(void) +{ + if ('@' == current()) { + expect_ch('@'); + if ('[' == current()) { + expect_ch('['); + return AT_LBRACK; + } else if ('{' == current()) { + expect_ch('{'); + return AT_LBRACE; + } else { + return operator(); + } + } else if ('<' == current()) { + expect_ch('<'); + if ('-' == current()) { + expect_ch('-'); + return RW_SLOT; + } else { + return operator(); + } + } else { + switch(current()) { + case '=': append(); return RO_SLOT; break; + case ';': append(); return SEMICOLON; break; + case ':': append(); return COLON; break; + case '|': append(); return PIPE; break; + case '(': append(); return LPAR; break; + case ')': append(); return RPAR; break; + case '[': append(); return LBRACK; break; + case ']': append(); return RBRACK; break; + case '{': append(); return LBRACE; break; + case '}': append(); return RBRACE; break; + default: return operator(); break; + } + } +} + +static int operator(void) +{ + lex_error(); + return UNKNOWN; +} + +/* Lexical Analysis Helpers + *****************************************************************************/ +static void lex_error(void) +{ + fprintf(stderr, "Lexer error\n"); + exit(1); +} + +static int current(void) +{ + return CurrChar; +} + +static void append(void) +{ + CurrChar = getchar(); +} + +static void discard(void) +{ + CurrChar = getchar(); +} + +static void expect_ch(int ch) +{ + if (ch == current()) + append(); + else + lex_error(); +} + + +/* Tree Routines + *****************************************************************************/ +AST* Tree(int type, int num_children) +{ + AST* tree = (AST*)calloc(1, sizeof(AST) * (num_children * sizeof(AST*))); + tree->type = type; + tree->num_children = num_children; + return tree; +} + +void PrintTree(AST* tree, int depth) +{ + int indent = depth * 2; + printf("%*s(", indent, ""); + printf("%d", tree->type); + if (tree->num_children == 0) { + printf(")\n"); + } else { + printf("\n"); + for (int child = 0; child < tree->num_children; child++) { + PrintTree(tree->children[child], depth+1); + } + + printf("%*s)\n", indent, ""); + } +} + +void shift(int type) +{ + int curr = CurrTok; + if (expect(type)) { + *(++Token_Stack) = curr; + } +} + +void reduce(int count) +{ + int type = *(Token_Stack--); + AST* tree = Tree(type, count); + intptr_t* stack = Token_Stack - (count-1); + for (int i = 0; i < count; i++) { + tree->children[i] = (void*)stack[i]; + } + Token_Stack -= count ; + *(++Token_Stack) = (intptr_t)tree; +} + +void shift_reduce(int type, int nchildren) +{ + shift(type); + reduce(nchildren); +} + +void push_reduce(int type, int nchildren) +{ + *(++Token_Stack) = type; + reduce(nchildren); +} +/* Main + *****************************************************************************/ int main(int argc, char** argv) { extern void world_init(void); world_init(); diff --git a/source/parser.h b/source/parser.h deleted file mode 100644 index 2acadd6..0000000 --- a/source/parser.h +++ /dev/null @@ -1,46 +0,0 @@ -/** - @file parser.h - @brief TODO: Describe this file - */ -#ifndef PARSER_H -#define PARSER_H - -#define UNKNOWN 0 - -#define NUM 1 -#define STRING 2 -#define CHAR 3 -#define SYMBOL 4 - -#define LPAR 10 -#define RPAR 11 -#define ALBRACK 12 -#define LBRACK 13 -#define RBRACK 14 -#define ALBRACE 15 -#define LBRACE 16 -#define RBRACE 17 - -#define COLON 20 -#define PERIOD 21 -#define BINOP 22 -#define PIPE 23 - -#define RETURN 30 -#define ASSIGN 31 -#define ID 32 -#define KEYW 33 - -#define ARRAY 100 -#define OBJECT 101 -#define HASHMAP 102 -#define HASHSET 103 - -#define PAIR 104 -#define UNARY_MSG 105 -#define BINARY_MSG 106 -#define KEYWORD_MSG 107 -#define KEYWORD_PAIR 108 - -#endif /* PARSER_H */ - -- 2.52.0