From a94dd2d9e01cdcbc1ca0d20950a68be187eca675 Mon Sep 17 00:00:00 2001 From: "Mike D. Lowis" Date: Tue, 6 Oct 2015 10:35:11 -0400 Subject: [PATCH] Lexer tests are now passing --- source/libparse/lexer.c | 140 +++++++++++++++++++++++-------------- source/libparse/libparse.h | 2 - source/sclpl/ops.c | 30 ++++---- source/sclpl/pprint.c | 15 ++-- 4 files changed, 111 insertions(+), 76 deletions(-) diff --git a/source/libparse/lexer.c b/source/libparse/lexer.c index 88b9049..07aea58 100644 --- a/source/libparse/lexer.c +++ b/source/libparse/lexer.c @@ -6,6 +6,69 @@ */ #include +static char* dupstring(const char* old) { + size_t length = strlen(old); + char* str = (char*)mem_allocate(length+1, NULL); + memcpy(str, old, length); + str[length] = '\0'; + return str; +} + +/* Token Constructors + *****************************************************************************/ +static void token_free(void* obj) +{ + Tok* tok = (Tok*)obj; + if ((tok->type != T_BOOL) && + (tok->type != T_CHAR) && + (tok->type != T_INT) && + (tok->type != T_FLOAT) && + (NULL != tok->value.text)) + mem_release(tok->value.text); +} + +static Tok* Token(TokType type) +{ + Tok* tok = (Tok*)mem_allocate(sizeof(Tok), &token_free); + tok->type = type; + return tok; +} + +Tok* TextTok(TokType type, char* text) +{ + Tok* tok = Token(type); + tok->value.text = dupstring(text); + return tok; +} + +Tok* CharTok(uint32_t val) +{ + Tok* tok = Token(T_CHAR); + tok->value.character = val; + return tok; +} + +Tok* IntTok(intptr_t val) +{ + Tok* tok = Token(T_INT); + tok->value.integer = val; + return tok; +} + +Tok* FloatTok(double val) +{ + Tok* tok = Token(T_FLOAT); + tok->value.floating = val; + return tok; +} + +Tok* BoolTok(bool val) +{ + Tok* tok = Token(T_BOOL); + tok->value.boolean = val; + return tok; +} + /* Token Scanning *****************************************************************************/ static char current(Parser* ctx) { @@ -164,14 +227,6 @@ static char* scan(Parser* ctx, size_t* line, size_t* column) { /* Lexical Analysis *****************************************************************************/ -static char* dupstring(const char* old) { - size_t length = strlen(old); - char* str = (char*)mem_allocate(length+1, NULL); - memcpy(str, old, length); - str[length] = '\0'; - return str; -} - static bool char_oneof(const char* class, char c) { bool ret = false; size_t sz = strlen(class); @@ -184,34 +239,19 @@ static bool char_oneof(const char* class, char c) { return ret; } -static void token_free(void* obj) -{ - Tok* tok = (Tok*)obj; - if ((tok->type != T_BOOL) && (tok->type != T_CHAR) && (NULL != tok->value.text)) - mem_release(tok->value.text); -} - -static Tok* token(TokType type, char* text) -{ - Tok* tok = (Tok*)mem_allocate(sizeof(Tok), &token_free); - tok->type = type; - tok->value.text = text; - return tok; -} - static Tok* punctuation(char* text) { Tok* tok = NULL; switch (text[0]) { - case '(': tok = token(T_LPAR, NULL); break; - case ')': tok = token(T_RPAR, NULL); break; - case '{': tok = token(T_LBRACE, NULL); break; - case '}': tok = token(T_RBRACE, NULL); break; - case '[': tok = token(T_LBRACK, NULL); break; - case ']': tok = token(T_RBRACK, NULL); break; - case ';': tok = token(T_END, NULL); break; - case ',': tok = token(T_COMMA, NULL); break; - case '\'': tok = token(T_SQUOTE, NULL); break; + case '(': tok = Token(T_LPAR); break; + case ')': tok = Token(T_RPAR); break; + case '{': tok = Token(T_LBRACE); break; + case '}': tok = Token(T_RBRACE); break; + case '[': tok = Token(T_LBRACK); break; + case ']': tok = Token(T_RBRACK); break; + case ';': tok = Token(T_END); break; + case ',': tok = Token(T_COMMA); break; + case '\'': tok = Token(T_SQUOTE); break; } return tok; } @@ -227,16 +267,16 @@ static Tok* character(char* text) "\v\0vtab" }; if (strlen(text) == 2) { - tok = token(T_CHAR, (void*)((intptr_t)text[1])); + tok = CharTok((uint32_t)text[1]); } else { for(int i = 0; i < 5; i++) { if (0 == strcmp(&text[1], &(lookuptable[i][2]))) { - tok = token(T_CHAR, (void*)((intptr_t)lookuptable[i][0])); + tok = CharTok((uint32_t)lookuptable[i][0]); break; } } if (NULL == tok) - tok = token(T_ID, text); + tok = TextTok(T_ID, text); } return tok; } @@ -244,12 +284,11 @@ static Tok* character(char* text) static Tok* integer(char* text, int base) { char* end; - long* integer = (long*)mem_allocate(sizeof(long), NULL); + long integer; errno = 0; - *integer = strtol(text, &end, base); + integer = strtol(text, &end, base); assert(errno == 0); - return NULL; - //return (end[0] == '\0') ? token(T_INT, integer) : NULL; + return (end[0] == '\0') ? IntTok(integer) : NULL; } static int getradix(char ch) { @@ -267,7 +306,7 @@ static Tok* radixint(char* text) { Tok* ret = integer(&text[2], getradix(text[1])); if (NULL == ret) - ret = token(T_ID, text); + ret = TextTok(T_ID, text); return ret; } @@ -283,12 +322,11 @@ static bool is_float(char* text) { static Tok* floating(char* text) { char* end; - double* dbl = (double*)mem_allocate(sizeof(double), NULL); + double dbl; errno = 0; - *dbl = strtod(text, &end); + dbl = strtod(text, &end); assert(errno == 0); - //return (end[0] == '\0') ? token(T_FLOAT, dbl) : NULL; - return NULL; + return (end[0] == '\0') ? FloatTok(dbl) : NULL; } static Tok* number(char* text) @@ -298,25 +336,24 @@ static Tok* number(char* text) tok = floating(text); else tok = integer(text, 10); - return (NULL == tok) ? token(T_ID, text) : tok; + return (NULL == tok) ? TextTok(T_ID, text) : tok; } static Tok* boolean(char* text) { - //return token(T_BOOL, (void*)((intptr_t)((0 == strcmp(text,"true")) ? true : false))); - return NULL; + return BoolTok(0 == strcmp(text,"true")); } static Tok* classify(const char* file, size_t line, size_t col, char* text) { Tok* tok = NULL; if (0 == strcmp(text,"end")) { - tok = token(T_END, NULL); + tok = Token(T_END); } else if (char_oneof("()[]{};,'", text[0])) { tok = punctuation(text); } else if ('"' == text[0]) { text[strlen(text)-1] = '\0'; - tok = token(T_STRING, dupstring(&text[1])); + tok = TextTok(T_STRING, dupstring(&text[1])); } else if (text[0] == '\\') { tok = character(text); } else if ((text[0] == '0') && char_oneof("bodh",text[1])) { @@ -326,7 +363,7 @@ static Tok* classify(const char* file, size_t line, size_t col, char* text) } else if ((0 == strcmp(text,"true")) || (0 == strcmp(text,"false"))) { tok = boolean(text); } else { - tok = token(T_ID, text); + tok = TextTok(T_ID, text); } /* If we found a valid token then fill in the location details */ if (NULL != tok) { @@ -341,7 +378,8 @@ Tok* gettoken(Parser* ctx) Tok* tok = NULL; size_t line, col; char* text = scan(ctx, &line, &col); - tok = classify(NULL, line, col, text); + if (text != NULL) + tok = classify(NULL, line, col, text); return tok; } diff --git a/source/libparse/libparse.h b/source/libparse/libparse.h index a01a9ee..05f41b7 100644 --- a/source/libparse/libparse.h +++ b/source/libparse/libparse.h @@ -39,8 +39,6 @@ typedef struct { } value; } Tok; -Tok* Token(const char* file, size_t line, size_t col, char* text); - #endif /* AST Types diff --git a/source/sclpl/ops.c b/source/sclpl/ops.c index d9503ae..30d64ff 100644 --- a/source/sclpl/ops.c +++ b/source/sclpl/ops.c @@ -40,22 +40,22 @@ vec_t* ops_deps_file(vec_t* program) { str_t* ops_token_file(str_t* in) { str_t* ofname = NULL; - //FILE* input = (NULL == in) ? stdin : fopen(str_cstr(in), "r"); - //FILE* output; - //if (NULL == in) { - // output = stdout; - //} else { - // ofname = sys_filename(TOKFILE, in); - // output = fopen(str_cstr(ofname), "w"); - //} + FILE* input = (NULL == in) ? stdin : fopen(str_cstr(in), "r"); + FILE* output; + if (NULL == in) { + output = stdout; + } else { + ofname = sys_filename(TOKFILE, in); + output = fopen(str_cstr(ofname), "w"); + } - //Lexer* p_lexer = lexer_new(NULL, input); - //Token* token; - //while(NULL != (token = lexer_read(p_lexer))) { - // pprint_token(output, token, true); - // mem_release(token); - //} - //mem_release(p_lexer); + Parser* ctx = parser_new(NULL, input); + Tok* token; + while(NULL != (token = gettoken(ctx))) { + pprint_token(output, token, true); + mem_release(token); + } + mem_release(ctx); return ofname; } diff --git a/source/sclpl/pprint.c b/source/sclpl/pprint.c index 64a1e3f..b54cce9 100644 --- a/source/sclpl/pprint.c +++ b/source/sclpl/pprint.c @@ -57,15 +57,14 @@ void pprint_token_type(FILE* file, Tok* token) { } void pprint_token_value(FILE* file, Tok* token) { - void* value = NULL;//token->value; switch(token->type) { - case T_STRING: fprintf(file, "\"%s\"", ((char*)value)); break; - case T_CHAR: print_char(file, ((char)(intptr_t)value)); break; - case T_INT: fprintf(file, "%ld", *((long int*)value)); break; - case T_FLOAT: fprintf(file, "%f", *((double*)value)); break; - case T_BOOL: fprintf(file, "%s", ((intptr_t)value)?"true":"false"); break; - case T_ID: fprintf(file, "%s", ((char*)value)); break; - default: fprintf(file, "???"); break; + case T_STRING: fprintf(file, "\"%s\"", token->value.text); break; + case T_ID: fprintf(file, "%s", token->value.text); break; + case T_CHAR: print_char(file, token->value.character); break; + case T_INT: fprintf(file, "%ld", token->value.integer); break; + case T_FLOAT: fprintf(file, "%f", token->value.floating); break; + case T_BOOL: fprintf(file, "%s", (token->value.boolean)?"true":"false"); break; + default: fprintf(file, "???"); break; } } -- 2.52.0