From: Mike D. Lowis Date: Mon, 5 Oct 2015 20:45:36 +0000 (-0400) Subject: Started rework of lexer and parser X-Git-Url: https://git.mdlowis.com/?a=commitdiff_plain;h=f65378c019feb61d00bb8485ff80813b425412df;p=proto%2Fsclpl.git Started rework of lexer and parser --- diff --git a/source/libparse/grammar.c b/source/libparse/grammar.c index b18f6b8..d718048 100644 --- a/source/libparse/grammar.c +++ b/source/libparse/grammar.c @@ -33,7 +33,7 @@ AST* toplevel(Parser* p) definition(p); else expression(p); - p_tree = get_tree(p); + //p_tree = get_tree(p); } catch(ParseException) { /* Do nothing, the tree is bad */ } @@ -42,29 +42,29 @@ AST* toplevel(Parser* p) static void require(Parser* p) { - size_t mrk = mark(p); + //size_t mrk = mark(p); expect(p, T_STRING); expect(p, T_END); - reduce(p, mrk); + //reduce(p, mrk); } static void type_annotation(Parser* p) { - size_t mrk = mark(p); + //size_t mrk = mark(p); expect(p, T_ID); type(p); expect(p, T_END); - reduce(p, mrk); + //reduce(p, mrk); } static void type_definition(Parser* p) { - size_t mrk = mark(p); + //size_t mrk = mark(p); expect(p, T_ID); expect_str(p, T_ID, "is"); type(p); expect(p, T_END); - reduce(p, mrk); + //reduce(p, mrk); } static void type(Parser* p) { @@ -79,48 +79,48 @@ static void type(Parser* p) { } static void tuple(Parser* p) { - size_t mrk = mark(p); - insert(p, T_ID, lexer_dup("tuple")); + //size_t mrk = mark(p); + //insert(p, T_ID, lexer_dup("tuple")); do { type(p); } while (accept(p, T_COMMA)); expect(p, T_RBRACE); - reduce(p, mrk); + //reduce(p, mrk); } static void function(Parser* p) { - size_t mark1 = mark(p) - 1; - size_t mark2 = mark(p); + //size_t mark1 = mark(p) - 1; + //size_t mark2 = mark(p); while (!accept(p, T_RPAR)) { type(p); if (T_RPAR != peek(p)->type) expect(p, T_COMMA); } - reduce(p, mark2); - reduce(p, mark1); + //reduce(p, mark2); + //reduce(p, mark1); } static void definition(Parser* p) { - size_t mrk = mark(p); + //size_t mrk = mark(p); expect(p,T_ID); if (peek(p)->type == T_LPAR) { - insert(p, T_ID, lexer_dup("fn")); + //insert(p, T_ID, lexer_dup("fn")); fn_stmnt(p); } else { expression(p); expect(p,T_END); } - reduce(p, mrk); + //reduce(p, mrk); } static void expression(Parser* p) { if (accept(p, T_LPAR)) { - size_t mrk = mark(p); + //size_t mrk = mark(p); expression(p); expect(p, T_RPAR); - reduce(p, mrk); + //reduce(p, mrk); } else if (accept_str(p, T_ID, "if")) { if_stmnt(p); } else if (accept_str(p, T_ID, "fn")) { @@ -153,7 +153,7 @@ static void literal(Parser* p) static void arglist(Parser* p) { - size_t mrk = mark(p); + //size_t mrk = mark(p); expect(p, T_LPAR); while(peek(p)->type != T_RPAR) { expression(p); @@ -161,37 +161,37 @@ static void arglist(Parser* p) expect(p, T_COMMA); } expect(p, T_RPAR); - reduce(p, mrk); + //reduce(p, mrk); } static void if_stmnt(Parser* p) { - size_t mrk = mark(p); + //size_t mrk = mark(p); expression(p); expression(p); if (accept_str(p, T_ID, "else")) { expression(p); } expect(p,T_END); - reduce(p, mrk); + //reduce(p, mrk); } static void fn_stmnt(Parser* p) { - size_t mark1 = mark(p); + //size_t mark1 = mark(p); expect(p, T_LPAR); - size_t mark2 = mark(p); + //size_t mark2 = mark(p); while(peek(p)->type != T_RPAR) { expect(p, T_ID); if(peek(p)->type != T_RPAR) expect(p, T_COMMA); } expect(p, T_RPAR); - reduce(p, mark2); + //reduce(p, mark2); while(peek(p)->type != T_END) { expression(p); } expect(p, T_END); - reduce(p, mark1); + //reduce(p, mark1); } diff --git a/source/libparse/lexer.c b/source/libparse/lexer.c index 91499c5..5ccced8 100644 --- a/source/libparse/lexer.c +++ b/source/libparse/lexer.c @@ -6,195 +6,35 @@ */ #include -static char* read(Lexer* ctx, size_t* line, size_t* col); -static bool eof(Lexer* ctx); -static bool eol(Lexer* ctx); -static void getline(Lexer* ctx); -static void skip_ws(Lexer* ctx); -static char current(Lexer* ctx); -static bool oneof(Lexer* ctx, const char* p_set); -static char* dup(Lexer* ctx, size_t start_idx, size_t len); -static char* read_string(Lexer* ctx); - -static Token* lexer_make_token(size_t line, size_t col, char* text); -static Token* lexer_punc(char* text); -static Token* lexer_char(char* text); -static Token* lexer_radix_int(char* text); -static Token* lexer_number(char* text); -static Token* lexer_integer(char* text, int base); -static Token* lexer_float(char* text); -static Token* lexer_bool(char* text); -static Token* lexer_var(char* text); -static bool lexer_oneof(const char* class, char c); -static bool is_float(char* text); -static int read_radix(char ch); - -static void lex_tok_free(void* p_obj) { - Token* p_tok = (Token*)p_obj; - if ((p_tok->type != T_BOOL) && (p_tok->type != T_CHAR) && (NULL != p_tok->value)) - mem_release(p_tok->value); +static char current(Parser* ctx) { + return ctx->line[ctx->index]; } -Token* token(TokenType type, void* val) { - Token* p_tok = (Token*)mem_allocate(sizeof(Token), &lex_tok_free); - p_tok->type = type; - p_tok->value = val; - return p_tok; -} - -static void lexer_free(void* p_obj) { -} - -Lexer* lexer_new(char* p_prompt, FILE* p_input) { - Lexer* p_lexer = (Lexer*)mem_allocate(sizeof(Lexer), &lexer_free); - - p_lexer->p_line = NULL; - p_lexer->index = 0; - p_lexer->lineno = 0; - p_lexer->p_input = p_input; - p_lexer->p_prompt = p_prompt; - - return p_lexer; -} - -Token* lexer_read(Lexer* p_lexer) { - Token* p_tok = NULL; - size_t line, col; - char* text = read(p_lexer, &line, &col); - if (NULL != text) { - p_tok = lexer_make_token(line, col, text); - free(text); - } - return p_tok; -} - -void lexer_skipline(Lexer* p_lexer) { - getline(p_lexer); -} - -static Token* lexer_make_token(size_t line, size_t col, char* text) { - Token* p_tok = NULL; - if (0 == strcmp(text,"end")) { - p_tok = token(T_END, NULL); - } else if (lexer_oneof("()[]{};,'", text[0])) { - p_tok = lexer_punc(text); - } else if ('"' == text[0]) { - text[strlen(text)-1] = '\0'; - p_tok = token(T_STRING, lexer_dup(&text[1])); - } else if (text[0] == '\\') { - p_tok = lexer_char(text); - } else if ((text[0] == '0') && lexer_oneof("bodh",text[1])) { - p_tok = lexer_radix_int(text); - } else if (lexer_oneof("+-0123456789",text[0])) { - p_tok = lexer_number(text); - } else if ((0 == strcmp(text,"true")) || (0 == strcmp(text,"false"))) { - p_tok = lexer_bool(text); - } else { - p_tok = lexer_var(text); - } - /* If we found a valid token then fill in the location details */ - if (NULL != p_tok) { - p_tok->line = line; - p_tok->col = col; - } - return p_tok; -} - -static Token* lexer_punc(char* text) -{ - Token* p_tok = NULL; - switch (text[0]) { - case '(': p_tok = token(T_LPAR, NULL); break; - case ')': p_tok = token(T_RPAR, NULL); break; - case '{': p_tok = token(T_LBRACE, NULL); break; - case '}': p_tok = token(T_RBRACE, NULL); break; - case '[': p_tok = token(T_LBRACK, NULL); break; - case ']': p_tok = token(T_RBRACK, NULL); break; - case ';': p_tok = token(T_END, NULL); break; - case ',': p_tok = token(T_COMMA, NULL); break; - case '\'': p_tok = token(T_SQUOTE, NULL); break; - } - return p_tok; -} - -static Token* lexer_char(char* text) +static bool eol(Parser* ctx) { - Token* p_tok = NULL; - static const char* lookup_table[5] = { - " \0space", - "\n\0newline", - "\r\0return", - "\t\0tab", - "\v\0vtab" - }; - if (strlen(text) == 2) { - p_tok = token(T_CHAR, (void*)((intptr_t)text[1])); - } else { - for(int i = 0; i < 5; i++) { - if (0 == strcmp(&text[1], &(lookup_table[i][2]))) { - p_tok = token(T_CHAR, (void*)((intptr_t)lookup_table[i][0])); - break; - } + bool ret = true; + size_t index = ctx->index; + char ch; + while((NULL != ctx->line) && ('\0' != (ch = ctx->line[index]))) { + if((' '!=ch) && ('\t'!=ch) && ('\r'!=ch) && ('\n'!=ch)) { + ret = false; + break; } - if (NULL == p_tok) - p_tok = lexer_var(text); + index++; } - return p_tok; -} - -static Token* lexer_radix_int(char* text) -{ - Token* ret = lexer_integer(&text[2], read_radix(text[1])); - if (NULL == ret) - ret = lexer_var(text); return ret; } -static Token* lexer_number(char* text) -{ - Token* p_tok = NULL; - if (is_float(text)) - p_tok = lexer_float(text); - else - p_tok = lexer_integer(text, 10); - return (NULL == p_tok) ? lexer_var(text) : p_tok; -} - -static Token* lexer_integer(char* text, int base) -{ - char* end; - long* p_int = (long*)mem_allocate(sizeof(long), NULL); - errno = 0; - *p_int = strtol(text, &end, base); - assert(errno == 0); - return (end[0] == '\0') ? token(T_INT, p_int) : NULL; -} - -static Token* lexer_float(char* text) -{ - char* end; - double* p_dbl = (double*)mem_allocate(sizeof(double), NULL); - errno = 0; - *p_dbl = strtod(text, &end); - assert(errno == 0); - return (end[0] == '\0') ? token(T_FLOAT, p_dbl) : NULL; -} - -static Token* lexer_bool(char* text) -{ - return token(T_BOOL, (void*)((intptr_t)((0 == strcmp(text,"true")) ? true : false))); -} - -static Token* lexer_var(char* text) +static bool eof(Parser* ctx) { - return token(T_ID, lexer_dup(text)); + return (eol(ctx) && feof(ctx->input)); } -static bool lexer_oneof(const char* class, char c) { +static bool oneof(Parser* ctx, const char* set) { bool ret = false; - size_t sz = strlen(class); + size_t sz = strlen(set); for (size_t idx = 0; idx < sz; idx++) { - if (c == class[idx]) { + if (current(ctx) == set[idx]) { ret = true; break; } @@ -202,59 +42,42 @@ static bool lexer_oneof(const char* class, char c) { return ret; } -static bool is_float(char* text) { - while (text[0] != '\0') - if (text[0] == '.') - return true; - else - text++; - return false; +static char* dup(Parser* ctx, size_t start_idx, size_t len) { + char* str = (char*)malloc(len+1); + memcpy(str, &(ctx->line[start_idx]), len); + str[len] = '\0'; + return str; } -char* lexer_dup(const char* p_old) { - size_t length = strlen(p_old); - char* p_str = (char*)mem_allocate(length+1, NULL); - memcpy(p_str, p_old, length); - p_str[length] = '\0'; - return p_str; -} - -static int read_radix(char ch) { - int ret = -1; - switch(ch) { - case 'b': ret = 2; break; - case 'o': ret = 8; break; - case 'd': ret = 10; break; - case 'h': ret = 16; break; - } - return ret; -} - -/*****************************************************************************/ +static void getline(Parser* ctx) { + int c; + size_t capacity = 8; + size_t index = 0; + /* Reallocate and clear the line buffer */ + ctx->line = realloc(ctx->line, capacity); + ctx->line[0] = '\0'; + ctx->index = 0; -static char* read(Lexer* ctx, size_t* line, size_t* column) { - char* p_tok = NULL; - skip_ws(ctx); - *line = ctx->lineno; - *column = ctx->index+1; + /* If we have not yet reached the end of the file, read the next line */ if (!eof(ctx)) { - if (oneof(ctx, "()[]{};,'")) { - p_tok = dup(ctx, ctx->index, 1); - ctx->index++; - } else if (current(ctx) == '"') { - p_tok = read_string(ctx); - } else { - size_t start = ctx->index; - while(!oneof(ctx," \t\r\n()[]{};,'\"") && (current(ctx) != '\0')) { - ctx->index++; + if (NULL != ctx->prompt) + printf("%s", ctx->prompt); + while(('\n' != (c = fgetc(ctx->input))) && (EOF != c)) { + if (index+2 == capacity) { + capacity <<= 1u; + ctx->line = realloc(ctx->line, capacity); } - p_tok = dup(ctx, start, ctx->index - start); + ctx->line[index++] = c; } + ctx->line[index++] = (c == EOF) ? '\0' : c; + ctx->line[index++] = '\0'; + ctx->index = 0; + /* Increment line count */ + ctx->lineno++; } - return p_tok; } -static char* read_string(Lexer* ctx) { +static char* read_string(Parser* ctx) { size_t capacity = 8; size_t index = 0; char* tok = (char*)malloc(capacity); @@ -294,57 +117,9 @@ static char* read_string(Lexer* ctx) { return tok; } -static bool eof(Lexer* ctx) -{ - return (eol(ctx) && feof(ctx->p_input)); -} - -static bool eol(Lexer* ctx) -{ - bool ret = true; - size_t index = ctx->index; - char ch; - while((NULL != ctx->p_line) && ('\0' != (ch = ctx->p_line[index]))) { - if((' '!=ch) && ('\t'!=ch) && ('\r'!=ch) && ('\n'!=ch)) { - ret = false; - break; - } - index++; - } - return ret; -} - -static void getline(Lexer* ctx) { - int c; - size_t capacity = 8; - size_t index = 0; - /* Reallocate and clear the line buffer */ - ctx->p_line = realloc(ctx->p_line, capacity); - ctx->p_line[0] = '\0'; - ctx->index = 0; - - /* If we have not yet reached the end of the file, read the next line */ - if (!eof(ctx)) { - if (NULL != ctx->p_prompt) - printf("%s", ctx->p_prompt); - while(('\n' != (c = fgetc(ctx->p_input))) && (EOF != c)) { - if (index+2 == capacity) { - capacity <<= 1u; - ctx->p_line = realloc(ctx->p_line, capacity); - } - ctx->p_line[index++] = c; - } - ctx->p_line[index++] = (c == EOF) ? '\0' : c; - ctx->p_line[index++] = '\0'; - ctx->index = 0; - /* Increment line count */ - ctx->lineno++; - } -} - -static void skip_ws(Lexer* ctx) { +static void skip_ws(Parser* ctx) { /* If we haven't read a line yet, read one now */ - if (NULL == ctx->p_line) + if (NULL == ctx->line) getline(ctx); /* Fast forward past whitespace and read a newline if necessary */ while(!eof(ctx)) { @@ -358,26 +133,293 @@ static void skip_ws(Lexer* ctx) { } } -static char current(Lexer* ctx) { - return ctx->p_line[ctx->index]; -} - -static bool oneof(Lexer* ctx, const char* p_set) { - bool ret = false; - size_t sz = strlen(p_set); - for (size_t idx = 0; idx < sz; idx++) { - if (current(ctx) == p_set[idx]) { - ret = true; - break; +static char* scan(Parser* ctx, size_t* line, size_t* column) { + char* tok = NULL; + skip_ws(ctx); + *line = ctx->lineno; + *column = ctx->index+1; + if (!eof(ctx)) { + if (oneof(ctx, "()[]{};,'")) { + tok = dup(ctx, ctx->index, 1); + ctx->index++; + } else if (current(ctx) == '"') { + tok = read_string(ctx); + } else { + size_t start = ctx->index; + while(!oneof(ctx," \t\r\n()[]{};,'\"") && (current(ctx) != '\0')) { + ctx->index++; + } + tok = dup(ctx, start, ctx->index - start); } } - return ret; + return tok; } -static char* dup(Lexer* ctx, size_t start_idx, size_t len) { - char* p_str = (char*)malloc(len+1); - memcpy(p_str, &(ctx->p_line[start_idx]), len); - p_str[len] = '\0'; - return p_str; + + + + +#if 0 + +// Scanning phase +static char* scan(Parser* ctx, size_t* line, size_t* col); + +// Classification phase +Tok* classify(const char* file, size_t line, size_t col, char* text); + +Tok* gettoken(Parser* lexer) { + Token* tok = NULL; + size_t line, col; + char* text = scan(lexer, &line, &col); + tok = classify(NULL, line, col, text); + return tok; +} + +#endif + + +void skipline(Parser* ctx) +{ +} + +Tok* gettoken(Parser* ctx) +{ + Tok* tok = NULL; + size_t line, col; + char* text = scan(lexer, &line, &col); + tok = classify(NULL, line, col, text); + return tok; } + + + + + +//static char* read(Parser* ctx, size_t* line, size_t* col); +//static bool eof(Parser* ctx); +//static bool eol(Parser* ctx); +//static void getline(Parser* ctx); +//static void skip_ws(Parser* ctx); +//static char current(Parser* ctx); +//static bool oneof(Parser* ctx, const char* set); +//static char* dup(Parser* ctx, size_t start_idx, size_t len); +//static char* read_string(Parser* ctx); +// +//static Token* lexer_make_token(size_t line, size_t col, char* text); +//static Token* lexer_punc(char* text); +//static Token* lexer_char(char* text); +//static Token* lexer_radix_int(char* text); +//static Token* lexer_number(char* text); +//static Token* lexer_integer(char* text, int base); +//static Token* lexer_float(char* text); +//static Token* lexer_bool(char* text); +//static Token* lexer_var(char* text); +//static bool lexer_oneof(const char* class, char c); +//static bool is_float(char* text); +//static int read_radix(char ch); +// +//static void lex_tok_free(void* obj) { +// Token* tok = (Token*)obj; +// if ((tok->type != T_BOOL) && (tok->type != T_CHAR) && (NULL != tok->value)) +// mem_release(tok->value); +//} +// +//Token* token(TokenType type, void* val) { +// Token* tok = (Token*)mem_allocate(sizeof(Token), &lex_tok_free); +// tok->type = type; +// tok->value = val; +// return tok; +//} +// +//static void lexer_free(void* obj) { +//} +// +//Parser* lexer_new(char* prompt, FILE* input) { +// Parser* lexer = (Parser*)mem_allocate(sizeof(Parser), &lexer_free); +// +// lexer->line = NULL; +// lexer->index = 0; +// lexer->lineno = 0; +// lexer->input = input; +// lexer->prompt = prompt; +// +// return lexer; +//} +// +//Token* lexer_read(Parser* lexer) { +// Token* tok = NULL; +// size_t line, col; +// char* text = read(lexer, &line, &col); +// if (NULL != text) { +// tok = lexer_make_token(line, col, text); +// free(text); +// } +// return tok; +//} +// +//void lexer_skipline(Parser* lexer) { +// getline(lexer); +//} +// +//static Token* lexer_make_token(size_t line, size_t col, char* text) { +// Token* tok = NULL; +// if (0 == strcmp(text,"end")) { +// tok = token(T_END, NULL); +// } else if (lexer_oneof("()[]{};,'", text[0])) { +// tok = lexer_punc(text); +// } else if ('"' == text[0]) { +// text[strlen(text)-1] = '\0'; +// tok = token(T_STRING, lexer_dup(&text[1])); +// } else if (text[0] == '\\') { +// tok = lexer_char(text); +// } else if ((text[0] == '0') && lexer_oneof("bodh",text[1])) { +// tok = lexer_radix_int(text); +// } else if (lexer_oneof("+-0123456789",text[0])) { +// tok = lexer_number(text); +// } else if ((0 == strcmp(text,"true")) || (0 == strcmp(text,"false"))) { +// tok = lexer_bool(text); +// } else { +// tok = lexer_var(text); +// } +// /* If we found a valid token then fill in the location details */ +// if (NULL != tok) { +// tok->line = line; +// tok->col = col; +// } +// return tok; +//} +// +//static Token* lexer_punc(char* text) +//{ +// Token* tok = NULL; +// switch (text[0]) { +// case '(': tok = token(T_LPAR, NULL); break; +// case ')': tok = token(T_RPAR, NULL); break; +// case '{': tok = token(T_LBRACE, NULL); break; +// case '}': tok = token(T_RBRACE, NULL); break; +// case '[': tok = token(T_LBRACK, NULL); break; +// case ']': tok = token(T_RBRACK, NULL); break; +// case ';': tok = token(T_END, NULL); break; +// case ',': tok = token(T_COMMA, NULL); break; +// case '\'': tok = token(T_SQUOTE, NULL); break; +// } +// return tok; +//} +// +//static Token* lexer_char(char* text) +//{ +// Token* tok = NULL; +// static const char* lookutable[5] = { +// " \0space", +// "\n\0newline", +// "\r\0return", +// "\t\0tab", +// "\v\0vtab" +// }; +// if (strlen(text) == 2) { +// tok = token(T_CHAR, (void*)((intptr_t)text[1])); +// } else { +// for(int i = 0; i < 5; i++) { +// if (0 == strcmp(&text[1], &(lookutable[i][2]))) { +// tok = token(T_CHAR, (void*)((intptr_t)lookutable[i][0])); +// break; +// } +// } +// if (NULL == tok) +// tok = lexer_var(text); +// } +// return tok; +//} +// +//static Token* lexer_radix_int(char* text) +//{ +// Token* ret = lexer_integer(&text[2], read_radix(text[1])); +// if (NULL == ret) +// ret = lexer_var(text); +// return ret; +//} +// +//static Token* lexer_number(char* text) +//{ +// Token* tok = NULL; +// if (is_float(text)) +// tok = lexer_float(text); +// else +// tok = lexer_integer(text, 10); +// return (NULL == tok) ? lexer_var(text) : tok; +//} +// +//static Token* lexer_integer(char* text, int base) +//{ +// char* end; +// long* int = (long*)mem_allocate(sizeof(long), NULL); +// errno = 0; +// *int = strtol(text, &end, base); +// assert(errno == 0); +// return (end[0] == '\0') ? token(T_INT, int) : NULL; +//} +// +//static Token* lexer_float(char* text) +//{ +// char* end; +// double* dbl = (double*)mem_allocate(sizeof(double), NULL); +// errno = 0; +// *dbl = strtod(text, &end); +// assert(errno == 0); +// return (end[0] == '\0') ? token(T_FLOAT, dbl) : NULL; +//} +// +//static Token* lexer_bool(char* text) +//{ +// return token(T_BOOL, (void*)((intptr_t)((0 == strcmp(text,"true")) ? true : false))); +//} +// +//static Token* lexer_var(char* text) +//{ +// return token(T_ID, lexer_dup(text)); +//} +// +//static bool lexer_oneof(const char* class, char c) { +// bool ret = false; +// size_t sz = strlen(class); +// for (size_t idx = 0; idx < sz; idx++) { +// if (c == class[idx]) { +// ret = true; +// break; +// } +// } +// return ret; +//} +// +//static bool is_float(char* text) { +// while (text[0] != '\0') +// if (text[0] == '.') +// return true; +// else +// text++; +// return false; +//} +// +//char* lexer_dup(const char* old) { +// size_t length = strlen(old); +// char* str = (char*)mem_allocate(length+1, NULL); +// memcpy(str, old, length); +// str[length] = '\0'; +// return str; +//} +// +//static int read_radix(char ch) { +// int ret = -1; +// switch(ch) { +// case 'b': ret = 2; break; +// case 'o': ret = 8; break; +// case 'd': ret = 10; break; +// case 'h': ret = 16; break; +// } +// return ret; +//} +// +///*****************************************************************************/ +// + diff --git a/source/libparse/libparse.h b/source/libparse/libparse.h index 0f79ae8..7753ab6 100644 --- a/source/libparse/libparse.h +++ b/source/libparse/libparse.h @@ -16,33 +16,61 @@ #include "vec.h" #include "exn.h" -typedef struct { - char* p_line; - size_t index; - size_t lineno; - FILE* p_input; - char* p_prompt; -} Lexer; +/* Token Types + *****************************************************************************/ +#if 1 typedef enum { T_ID, T_CHAR, T_INT, T_FLOAT, T_BOOL, T_STRING, T_LBRACE, T_RBRACE, T_LBRACK, T_RBRACK, T_LPAR, T_RPAR, T_COMMA, T_SQUOTE, T_DQUOTE, T_END, T_END_FILE -} TokenType; +} TokType; typedef struct { - TokenType type; const char* file; size_t line; size_t col; - void* value; -} Token; + TokType type; + union { + char* text; + uint32_t character; + intptr_t integer; + double floating; + bool boolean; + } value; +} Tok; + +Tok* Token(const char* file, size_t line, size_t col, char* text); + +#endif + +/* AST Types + *****************************************************************************/ + +/* Lexer and Parser Types + *****************************************************************************/ +//typedef enum { +// T_ID, T_CHAR, T_INT, T_FLOAT, T_BOOL, T_STRING, T_LBRACE, T_RBRACE, T_LBRACK, +// T_RBRACK, T_LPAR, T_RPAR, T_COMMA, T_SQUOTE, T_DQUOTE, T_END, T_END_FILE +//} TokenType; +// +//typedef struct { +// TokenType type; +// const char* file; +// size_t line; +// size_t col; +// void* value; +//} Token; DECLARE_EXCEPTION(ParseException); typedef struct { - Lexer* p_lexer; - Token* p_tok; - vec_t* p_tok_buf; + char* line; + size_t index; + size_t lineno; + FILE* input; + char* prompt; + Tok* tok; + vec_t* tokbuf; } Parser; typedef enum { @@ -53,33 +81,35 @@ typedef enum { typedef struct { ASTTag tag; union { - Token* tok; + Tok* tok; vec_t* vec; } ptr; } AST; // Lexer routines -Lexer* lexer_new(char* p_prompt, FILE* p_input); -Token* token(TokenType type, void* val); -Token* lexer_read(Lexer* p_lexer); -void lexer_skipline(Lexer* p_lexer); -char* lexer_dup(const char* p_old); +Tok* gettoken(Parser* ctx); + +//Lexer* lexer_new(char* p_prompt, FILE* p_input); +//Token* token(TokenType type, void* val); +//Token* lexer_read(Lexer* p_lexer); +//void lexer_skipline(Lexer* p_lexer); +//char* lexer_dup(const char* p_old); // Parser routines Parser* parser_new(char* p_prompt, FILE* input); void fetch(Parser* p_parser); -Token* peek(Parser* p_parser); +Tok* peek(Parser* p_parser); bool parser_eof(Parser* p_parser); void parser_resume(Parser* p_parser); void error(Parser* p_parser, const char* p_text); -bool accept(Parser* p_parser, TokenType type); -bool accept_str(Parser* p_parser, TokenType type, const char* p_text); -bool expect(Parser* p_parser, TokenType type); -bool expect_str(Parser* p_parser, TokenType type, const char* p_text); -size_t mark(Parser* p_parser); -void reduce(Parser* p_parser, size_t mark); -AST* get_tree(Parser* p_parser); -void insert(Parser* p_parser, TokenType type, void* value); +bool accept(Parser* p_parser, TokType type); +bool accept_str(Parser* p_parser, TokType type, const char* p_text); +bool expect(Parser* p_parser, TokType type); +bool expect_str(Parser* p_parser, TokType type, const char* p_text); +//size_t mark(Parser* p_parser); +//void reduce(Parser* p_parser, size_t mark); +//AST* get_tree(Parser* p_parser); +//void insert(Parser* p_parser, TokType type, char* value); // AST Routines AST* tree_convert(AST* p_tree); diff --git a/source/libparse/parser.c b/source/libparse/parser.c index f51b4e5..fedff91 100644 --- a/source/libparse/parser.c +++ b/source/libparse/parser.c @@ -8,137 +8,140 @@ DEFINE_EXCEPTION(ParseException, &RuntimeException); -Token tok_eof = { T_END_FILE, NULL, 0, 0, NULL }; +Tok tok_eof = { NULL, 0, 0, T_END_FILE, {0} }; -static void parser_free(void* p_obj) { - Parser* p_parser = (Parser*)p_obj; - if ((NULL != p_parser->p_tok) && (&tok_eof != p_parser->p_tok)) { - mem_release(p_parser->p_tok); +static void parser_free(void* obj) { + Parser* parser = (Parser*)obj; + if ((NULL != parser->tok) && (&tok_eof != parser->tok)) { + mem_release(parser->tok); } - mem_release(p_parser->p_lexer); - mem_release(p_parser->p_tok_buf); + mem_release(parser->tokbuf); } -Parser* parser_new(char* p_prompt, FILE* input) +Parser* parser_new(char* prompt, FILE* input) { - Parser* p_parser = (Parser*)mem_allocate(sizeof(Parser), &parser_free); - p_parser->p_lexer = lexer_new(p_prompt, input); - p_parser->p_tok = NULL; - p_parser->p_tok_buf = vec_new(0); - return p_parser; + Parser* parser = (Parser*)mem_allocate(sizeof(Parser), &parser_free); + parser->line = NULL; + parser->index = 0; + parser->lineno = 0; + parser->input = input; + parser->prompt = prompt; + parser->tok = NULL; + parser->tokbuf = vec_new(0); + return parser; } -void fetch(Parser* p_parser) +void fetch(Parser* parser) { - p_parser->p_tok = lexer_read(p_parser->p_lexer); - if (NULL == p_parser->p_tok) - p_parser->p_tok = &tok_eof; + parser->tok = gettoken(parser); + if (NULL == parser->tok) + parser->tok = &tok_eof; } -Token* peek(Parser* p_parser) +Tok* peek(Parser* parser) { - if (NULL == p_parser->p_tok) - fetch(p_parser); - return p_parser->p_tok; + if (NULL == parser->tok) + fetch(parser); + return parser->tok; } -bool parser_eof(Parser* p_parser) { - return (peek(p_parser)->type == T_END_FILE); +bool parser_eof(Parser* parser) { + return (peek(parser)->type == T_END_FILE); } -void parser_resume(Parser* p_parser) { - if ((NULL != p_parser->p_tok) && (&tok_eof != p_parser->p_tok)) { - mem_release(p_parser->p_tok); - p_parser->p_tok = NULL; +void parser_resume(Parser* parser) { + if ((NULL != parser->tok) && (&tok_eof != parser->tok)) { + mem_release(parser->tok); + parser->tok = NULL; } - vec_clear(p_parser->p_tok_buf); - lexer_skipline(p_parser->p_lexer); + vec_clear(parser->tokbuf); + skipline(parser); } -void error(Parser* p_parser, const char* p_text) +void error(Parser* parser, const char* text) { - (void)p_parser; - Token* tok = peek(p_parser); - fprintf(stderr, ":%zu:%zu:Error: %s\n", tok->line, tok->col, p_text); - throw_msg(ParseException, p_text); + (void)parser; + Tok* tok = peek(parser); + fprintf(stderr, ":%zu:%zu:Error: %s\n", tok->line, tok->col, text); + throw_msg(ParseException, text); } -bool accept(Parser* p_parser, TokenType type) +bool accept(Parser* parser, TokType type) { bool ret = false; - if (peek(p_parser)->type == type) { - vec_push_back(p_parser->p_tok_buf, tree_new(ATOM, p_parser->p_tok)); - p_parser->p_tok = NULL; + if (peek(parser)->type == type) { + vec_push_back(parser->tokbuf, tree_new(ATOM, parser->tok)); + parser->tok = NULL; ret = true; } return ret; } -bool accept_str(Parser* p_parser, TokenType type, const char* p_text) +bool accept_str(Parser* parser, TokType type, const char* text) { bool ret = false; - if ((peek(p_parser)->type == type) && (0 == strcmp((char*)(p_parser->p_tok->value), p_text))) { - vec_push_back(p_parser->p_tok_buf, tree_new(ATOM, p_parser->p_tok)); - p_parser->p_tok = NULL; + if ((peek(parser)->type == type) && (0 == strcmp((char*)(parser->tok->value.text), text))) { + vec_push_back(parser->tokbuf, tree_new(ATOM, parser->tok)); + parser->tok = NULL; ret = true; } return ret; } -bool expect(Parser* p_parser, TokenType type) +bool expect(Parser* parser, TokType type) { bool ret = false; - if (accept(p_parser, type)) { + if (accept(parser, type)) { ret = true; } else { - error(p_parser, "Unexpected token"); + error(parser, "Unexpected token"); } return ret; } -bool expect_str(Parser* p_parser, TokenType type, const char* p_text) +bool expect_str(Parser* parser, TokType type, const char* text) { bool ret = false; - if (accept_str(p_parser, type, p_text)) { + if (accept_str(parser, type, text)) { ret = true; } else { - error(p_parser, "Unexpected token"); + error(parser, "Unexpected token"); } return ret; } -size_t mark(Parser* p_parser) +size_t mark(Parser* parser) { - return (vec_size(p_parser->p_tok_buf) - 1); + return (vec_size(parser->tokbuf) - 1); } -void reduce(Parser* p_parser, size_t mark) +void reduce(Parser* parser, size_t mark) { - vec_t* p_buf = p_parser->p_tok_buf; - vec_t* p_form = vec_new(0); - for(size_t idx = mark; idx < vec_size(p_buf); idx++) { - AST* p_tree = mem_retain(vec_at(p_buf, idx)); - vec_push_back(p_form, p_tree); + vec_t* buf = parser->tokbuf; + vec_t* form = vec_new(0); + for(size_t idx = mark; idx < vec_size(buf); idx++) { + AST* tree = mem_retain(vec_at(buf, idx)); + vec_push_back(form, tree); } - vec_erase(p_buf, mark, vec_size(p_buf)-1); - vec_push_back(p_buf, tree_new(TREE, p_form)); + vec_erase(buf, mark, vec_size(buf)-1); + vec_push_back(buf, tree_new(TREE, form)); } -AST* get_tree(Parser* p_parser) { - AST* p_tree = NULL; - if (1 == vec_size(p_parser->p_tok_buf)) { - p_tree = mem_retain(vec_at(p_parser->p_tok_buf, 0)); - vec_clear(p_parser->p_tok_buf); +AST* get_tree(Parser* parser) { + AST* tree = NULL; + if (1 == vec_size(parser->tokbuf)) { + tree = mem_retain(vec_at(parser->tokbuf, 0)); + vec_clear(parser->tokbuf); } else { - p_tree = tree_new(TREE, p_parser->p_tok_buf); - p_parser->p_tok_buf = vec_new(0); + tree = tree_new(TREE, parser->tokbuf); + parser->tokbuf = vec_new(0); } - return p_tree; + return tree; } -void insert(Parser* p_parser, TokenType type, void* value) { - Token* p_tok = token(type, value); - AST* p_tree = tree_new(ATOM, p_tok); - vec_push_back(p_parser->p_tok_buf, p_tree); -} +//void insert(Parser* parser, TokType type, char* value) { +// Tok* tok = token(type, strdup(value)); +// AST* tree = tree_new(ATOM, tok); +// vec_push_back(parser->tokbuf, tree); +//} diff --git a/source/libparse/tree.c b/source/libparse/tree.c index db3d6b7..0bfd375 100644 --- a/source/libparse/tree.c +++ b/source/libparse/tree.c @@ -13,7 +13,7 @@ static void tree_free(void* p_obj) { } } -static bool is_punctuation(Token* p_tok) { +static bool is_punctuation(Tok* p_tok) { bool ret = false; switch(p_tok->type) { case T_END: @@ -52,7 +52,7 @@ AST* tree_convert(AST* p_tree) { AST* tree_new(ASTTag tag, void* p_obj) { AST* p_tree = (AST*)mem_allocate(sizeof(AST), &tree_free); p_tree->tag = tag; - p_tree->ptr.tok = (Token*)p_obj; + p_tree->ptr.tok = (Tok*)p_obj; return p_tree; } @@ -69,7 +69,7 @@ AST* tree_get_child(AST* p_tree, size_t idx) { void* tree_get_val(AST* p_tree) { void* ret = NULL; if (p_tree->tag == ATOM) { - ret = p_tree->ptr.tok->value; + ret = p_tree->ptr.tok->value.text; } return ret; } @@ -87,9 +87,9 @@ bool tree_is_formtype(AST* p_tree, const char* val) { bool ret = false; AST* child = tree_get_child(p_tree, 0); if ((NULL != child) && (child->tag == ATOM)) { - Token* token = child->ptr.tok; + Tok* token = child->ptr.tok; if ((token->type == T_ID) && - (0 == strcmp(val, (char*)token->value))) { + (0 == strcmp(val, token->value.text))) { ret = true; } } diff --git a/source/sclpl/codegen.c b/source/sclpl/codegen.c index 3de8875..d4d8aae 100644 --- a/source/sclpl/codegen.c +++ b/source/sclpl/codegen.c @@ -97,15 +97,15 @@ static void emit_def_placeholders(FILE* file, vec_t* prgrm) { static void emit_expression(FILE* file, vec_t* fnlst, AST* p_tree, int depth) { if (p_tree->tag == ATOM) { - Token* tok = p_tree->ptr.tok; + Tok* tok = p_tree->ptr.tok; switch (tok->type) { - case T_STRING: print_string(file, ((char*)tok->value)); break; - case T_CHAR: print_char(file, ((char)(intptr_t)tok->value)); break; - case T_INT: fprintf(file, "__int(%ld)", *((long int*)tok->value)); break; - case T_FLOAT: fprintf(file, "__float(%f)", *((double*)tok->value)); break; - case T_BOOL: fprintf(file, "__bool(%s)", ((intptr_t)tok->value)?"true":"false"); break; - case T_ID: fprintf(file, "%s", ((char*)tok->value)); break; - default: break; + case T_STRING: print_string(file, ((char*)tok->value.text)); break; + case T_CHAR: print_char(file, ((char)(intptr_t)tok->value.character)); break; + case T_INT: fprintf(file, "__int(%ld)", *((long int*)tok->value.integer)); break; + case T_FLOAT: fprintf(file, "__float(%f)", ((double)tok->value.floating)); break; + case T_BOOL: fprintf(file, "__bool(%s)", ((intptr_t)tok->value.boolean)?"true":"false"); break; + case T_ID: fprintf(file, "%s", ((char*)tok->value.text)); break; + default: break; } } else if (tree_is_formtype(p_tree, "if")) { fprintf(file, "IF ("); diff --git a/source/sclpl/ops.c b/source/sclpl/ops.c index eb5bdc0..d9503ae 100644 --- a/source/sclpl/ops.c +++ b/source/sclpl/ops.c @@ -40,22 +40,22 @@ vec_t* ops_deps_file(vec_t* program) { str_t* ops_token_file(str_t* in) { str_t* ofname = NULL; - FILE* input = (NULL == in) ? stdin : fopen(str_cstr(in), "r"); - FILE* output; - if (NULL == in) { - output = stdout; - } else { - ofname = sys_filename(TOKFILE, in); - output = fopen(str_cstr(ofname), "w"); - } + //FILE* input = (NULL == in) ? stdin : fopen(str_cstr(in), "r"); + //FILE* output; + //if (NULL == in) { + // output = stdout; + //} else { + // ofname = sys_filename(TOKFILE, in); + // output = fopen(str_cstr(ofname), "w"); + //} - Lexer* p_lexer = lexer_new(NULL, input); - Token* token; - while(NULL != (token = lexer_read(p_lexer))) { - pprint_token(output, token, true); - mem_release(token); - } - mem_release(p_lexer); + //Lexer* p_lexer = lexer_new(NULL, input); + //Token* token; + //while(NULL != (token = lexer_read(p_lexer))) { + // pprint_token(output, token, true); + // mem_release(token); + //} + //mem_release(p_lexer); return ofname; } diff --git a/source/sclpl/pprint.c b/source/sclpl/pprint.c index 6f5d0c5..64a1e3f 100644 --- a/source/sclpl/pprint.c +++ b/source/sclpl/pprint.c @@ -11,7 +11,7 @@ static void print_indent(FILE* file, int depth) { fprintf(file, "%c", ' '); } -static const char* token_type_to_string(TokenType type) { +static const char* token_type_to_string(TokType type) { switch(type) { case T_STRING: return "T_STRING"; case T_CHAR: return "T_CHAR"; @@ -52,12 +52,12 @@ static void print_char(FILE* file, char ch) { if (i == 5) fprintf(file, "\\%c", ch); } -void pprint_token_type(FILE* file, Token* token) { +void pprint_token_type(FILE* file, Tok* token) { fprintf(file, "%s", token_type_to_string(token->type)); } -void pprint_token_value(FILE* file, Token* token) { - void* value = token->value; +void pprint_token_value(FILE* file, Tok* token) { + void* value = NULL;//token->value; switch(token->type) { case T_STRING: fprintf(file, "\"%s\"", ((char*)value)); break; case T_CHAR: print_char(file, ((char)(intptr_t)value)); break; @@ -69,7 +69,7 @@ void pprint_token_value(FILE* file, Token* token) { } } -void pprint_token(FILE* file, Token* token, bool print_loc) +void pprint_token(FILE* file, Tok* token, bool print_loc) { if (print_loc) { fprintf(file, "%zu:", token->line); diff --git a/source/sclpl/pprint.h b/source/sclpl/pprint.h index e4717e9..a60030d 100644 --- a/source/sclpl/pprint.h +++ b/source/sclpl/pprint.h @@ -9,11 +9,11 @@ #include -void pprint_token_type(FILE* file, Token* token); +void pprint_token_type(FILE* file, Tok* token); -void pprint_token_value(FILE* file, Token* token); +void pprint_token_value(FILE* file, Tok* token); -void pprint_token(FILE* file, Token* token, bool print_loc); +void pprint_token(FILE* file, Tok* token, bool print_loc); void pprint_tree(FILE* file, AST* tree, int depth);