From: Michael D. Lowis Date: Tue, 6 Oct 2015 02:38:03 +0000 (-0400) Subject: Almost finished reworking lexer X-Git-Url: https://git.mdlowis.com/?a=commitdiff_plain;h=a9b20c361e9e98816b6f3fbbfc887dfda7e25900;p=proto%2Fsclpl.git Almost finished reworking lexer --- diff --git a/source/libparse/lexer.c b/source/libparse/lexer.c index 5ccced8..88b9049 100644 --- a/source/libparse/lexer.c +++ b/source/libparse/lexer.c @@ -6,6 +6,8 @@ */ #include +/* Token Scanning + *****************************************************************************/ static char current(Parser* ctx) { return ctx->line[ctx->index]; } @@ -49,7 +51,7 @@ static char* dup(Parser* ctx, size_t start_idx, size_t len) { return str; } -static void getline(Parser* ctx) { +static void fetchline(Parser* ctx) { int c; size_t capacity = 8; size_t index = 0; @@ -77,6 +79,11 @@ static void getline(Parser* ctx) { } } +void skipline(Parser* ctx) +{ + fetchline(ctx); +} + static char* read_string(Parser* ctx) { size_t capacity = 8; size_t index = 0; @@ -106,7 +113,7 @@ static char* read_string(Parser* ctx) { /* Get the next line if necessary */ if ('\n' == tok[index-1]) - getline(ctx); + fetchline(ctx); } /* Skip the last " */ @@ -120,11 +127,11 @@ static char* read_string(Parser* ctx) { static void skip_ws(Parser* ctx) { /* If we haven't read a line yet, read one now */ if (NULL == ctx->line) - getline(ctx); + fetchline(ctx); /* Fast forward past whitespace and read a newline if necessary */ while(!eof(ctx)) { if ('\0' == current(ctx)) { - getline(ctx); + fetchline(ctx); } else if (oneof(ctx, " \t\r\n")) { ctx->index++; } else { @@ -155,271 +162,186 @@ static char* scan(Parser* ctx, size_t* line, size_t* column) { return tok; } +/* Lexical Analysis + *****************************************************************************/ +static char* dupstring(const char* old) { + size_t length = strlen(old); + char* str = (char*)mem_allocate(length+1, NULL); + memcpy(str, old, length); + str[length] = '\0'; + return str; +} +static bool char_oneof(const char* class, char c) { + bool ret = false; + size_t sz = strlen(class); + for (size_t idx = 0; idx < sz; idx++) { + if (c == class[idx]) { + ret = true; + break; + } + } + return ret; +} +static void token_free(void* obj) +{ + Tok* tok = (Tok*)obj; + if ((tok->type != T_BOOL) && (tok->type != T_CHAR) && (NULL != tok->value.text)) + mem_release(tok->value.text); +} +static Tok* token(TokType type, char* text) +{ + Tok* tok = (Tok*)mem_allocate(sizeof(Tok), &token_free); + tok->type = type; + tok->value.text = text; + return tok; +} -#if 0 +static Tok* punctuation(char* text) +{ + Tok* tok = NULL; + switch (text[0]) { + case '(': tok = token(T_LPAR, NULL); break; + case ')': tok = token(T_RPAR, NULL); break; + case '{': tok = token(T_LBRACE, NULL); break; + case '}': tok = token(T_RBRACE, NULL); break; + case '[': tok = token(T_LBRACK, NULL); break; + case ']': tok = token(T_RBRACK, NULL); break; + case ';': tok = token(T_END, NULL); break; + case ',': tok = token(T_COMMA, NULL); break; + case '\'': tok = token(T_SQUOTE, NULL); break; + } + return tok; +} -// Scanning phase -static char* scan(Parser* ctx, size_t* line, size_t* col); +static Tok* character(char* text) +{ + Tok* tok = NULL; + static const char* lookuptable[5] = { + " \0space", + "\n\0newline", + "\r\0return", + "\t\0tab", + "\v\0vtab" + }; + if (strlen(text) == 2) { + tok = token(T_CHAR, (void*)((intptr_t)text[1])); + } else { + for(int i = 0; i < 5; i++) { + if (0 == strcmp(&text[1], &(lookuptable[i][2]))) { + tok = token(T_CHAR, (void*)((intptr_t)lookuptable[i][0])); + break; + } + } + if (NULL == tok) + tok = token(T_ID, text); + } + return tok; +} -// Classification phase -Tok* classify(const char* file, size_t line, size_t col, char* text); +static Tok* integer(char* text, int base) +{ + char* end; + long* integer = (long*)mem_allocate(sizeof(long), NULL); + errno = 0; + *integer = strtol(text, &end, base); + assert(errno == 0); + return NULL; + //return (end[0] == '\0') ? token(T_INT, integer) : NULL; +} -Tok* gettoken(Parser* lexer) { - Token* tok = NULL; - size_t line, col; - char* text = scan(lexer, &line, &col); - tok = classify(NULL, line, col, text); - return tok; +static int getradix(char ch) { + int ret = -1; + switch(ch) { + case 'b': ret = 2; break; + case 'o': ret = 8; break; + case 'd': ret = 10; break; + case 'h': ret = 16; break; + } + return ret; +} + +static Tok* radixint(char* text) +{ + Tok* ret = integer(&text[2], getradix(text[1])); + if (NULL == ret) + ret = token(T_ID, text); + return ret; } -#endif +static bool is_float(char* text) { + while (text[0] != '\0') + if (text[0] == '.') + return true; + else + text++; + return false; +} +static Tok* floating(char* text) +{ + char* end; + double* dbl = (double*)mem_allocate(sizeof(double), NULL); + errno = 0; + *dbl = strtod(text, &end); + assert(errno == 0); + //return (end[0] == '\0') ? token(T_FLOAT, dbl) : NULL; + return NULL; +} -void skipline(Parser* ctx) +static Tok* number(char* text) { + Tok* tok = NULL; + if (is_float(text)) + tok = floating(text); + else + tok = integer(text, 10); + return (NULL == tok) ? token(T_ID, text) : tok; +} + +static Tok* boolean(char* text) +{ + //return token(T_BOOL, (void*)((intptr_t)((0 == strcmp(text,"true")) ? true : false))); + return NULL; +} + +static Tok* classify(const char* file, size_t line, size_t col, char* text) +{ + Tok* tok = NULL; + if (0 == strcmp(text,"end")) { + tok = token(T_END, NULL); + } else if (char_oneof("()[]{};,'", text[0])) { + tok = punctuation(text); + } else if ('"' == text[0]) { + text[strlen(text)-1] = '\0'; + tok = token(T_STRING, dupstring(&text[1])); + } else if (text[0] == '\\') { + tok = character(text); + } else if ((text[0] == '0') && char_oneof("bodh",text[1])) { + tok = radixint(text); + } else if (char_oneof("+-0123456789",text[0])) { + tok = number(text); + } else if ((0 == strcmp(text,"true")) || (0 == strcmp(text,"false"))) { + tok = boolean(text); + } else { + tok = token(T_ID, text); + } + /* If we found a valid token then fill in the location details */ + if (NULL != tok) { + tok->line = line; + tok->col = col; + } + return tok; } Tok* gettoken(Parser* ctx) { Tok* tok = NULL; size_t line, col; - char* text = scan(lexer, &line, &col); + char* text = scan(ctx, &line, &col); tok = classify(NULL, line, col, text); return tok; } - - - - - -//static char* read(Parser* ctx, size_t* line, size_t* col); -//static bool eof(Parser* ctx); -//static bool eol(Parser* ctx); -//static void getline(Parser* ctx); -//static void skip_ws(Parser* ctx); -//static char current(Parser* ctx); -//static bool oneof(Parser* ctx, const char* set); -//static char* dup(Parser* ctx, size_t start_idx, size_t len); -//static char* read_string(Parser* ctx); -// -//static Token* lexer_make_token(size_t line, size_t col, char* text); -//static Token* lexer_punc(char* text); -//static Token* lexer_char(char* text); -//static Token* lexer_radix_int(char* text); -//static Token* lexer_number(char* text); -//static Token* lexer_integer(char* text, int base); -//static Token* lexer_float(char* text); -//static Token* lexer_bool(char* text); -//static Token* lexer_var(char* text); -//static bool lexer_oneof(const char* class, char c); -//static bool is_float(char* text); -//static int read_radix(char ch); -// -//static void lex_tok_free(void* obj) { -// Token* tok = (Token*)obj; -// if ((tok->type != T_BOOL) && (tok->type != T_CHAR) && (NULL != tok->value)) -// mem_release(tok->value); -//} -// -//Token* token(TokenType type, void* val) { -// Token* tok = (Token*)mem_allocate(sizeof(Token), &lex_tok_free); -// tok->type = type; -// tok->value = val; -// return tok; -//} -// -//static void lexer_free(void* obj) { -//} -// -//Parser* lexer_new(char* prompt, FILE* input) { -// Parser* lexer = (Parser*)mem_allocate(sizeof(Parser), &lexer_free); -// -// lexer->line = NULL; -// lexer->index = 0; -// lexer->lineno = 0; -// lexer->input = input; -// lexer->prompt = prompt; -// -// return lexer; -//} -// -//Token* lexer_read(Parser* lexer) { -// Token* tok = NULL; -// size_t line, col; -// char* text = read(lexer, &line, &col); -// if (NULL != text) { -// tok = lexer_make_token(line, col, text); -// free(text); -// } -// return tok; -//} -// -//void lexer_skipline(Parser* lexer) { -// getline(lexer); -//} -// -//static Token* lexer_make_token(size_t line, size_t col, char* text) { -// Token* tok = NULL; -// if (0 == strcmp(text,"end")) { -// tok = token(T_END, NULL); -// } else if (lexer_oneof("()[]{};,'", text[0])) { -// tok = lexer_punc(text); -// } else if ('"' == text[0]) { -// text[strlen(text)-1] = '\0'; -// tok = token(T_STRING, lexer_dup(&text[1])); -// } else if (text[0] == '\\') { -// tok = lexer_char(text); -// } else if ((text[0] == '0') && lexer_oneof("bodh",text[1])) { -// tok = lexer_radix_int(text); -// } else if (lexer_oneof("+-0123456789",text[0])) { -// tok = lexer_number(text); -// } else if ((0 == strcmp(text,"true")) || (0 == strcmp(text,"false"))) { -// tok = lexer_bool(text); -// } else { -// tok = lexer_var(text); -// } -// /* If we found a valid token then fill in the location details */ -// if (NULL != tok) { -// tok->line = line; -// tok->col = col; -// } -// return tok; -//} -// -//static Token* lexer_punc(char* text) -//{ -// Token* tok = NULL; -// switch (text[0]) { -// case '(': tok = token(T_LPAR, NULL); break; -// case ')': tok = token(T_RPAR, NULL); break; -// case '{': tok = token(T_LBRACE, NULL); break; -// case '}': tok = token(T_RBRACE, NULL); break; -// case '[': tok = token(T_LBRACK, NULL); break; -// case ']': tok = token(T_RBRACK, NULL); break; -// case ';': tok = token(T_END, NULL); break; -// case ',': tok = token(T_COMMA, NULL); break; -// case '\'': tok = token(T_SQUOTE, NULL); break; -// } -// return tok; -//} -// -//static Token* lexer_char(char* text) -//{ -// Token* tok = NULL; -// static const char* lookutable[5] = { -// " \0space", -// "\n\0newline", -// "\r\0return", -// "\t\0tab", -// "\v\0vtab" -// }; -// if (strlen(text) == 2) { -// tok = token(T_CHAR, (void*)((intptr_t)text[1])); -// } else { -// for(int i = 0; i < 5; i++) { -// if (0 == strcmp(&text[1], &(lookutable[i][2]))) { -// tok = token(T_CHAR, (void*)((intptr_t)lookutable[i][0])); -// break; -// } -// } -// if (NULL == tok) -// tok = lexer_var(text); -// } -// return tok; -//} -// -//static Token* lexer_radix_int(char* text) -//{ -// Token* ret = lexer_integer(&text[2], read_radix(text[1])); -// if (NULL == ret) -// ret = lexer_var(text); -// return ret; -//} -// -//static Token* lexer_number(char* text) -//{ -// Token* tok = NULL; -// if (is_float(text)) -// tok = lexer_float(text); -// else -// tok = lexer_integer(text, 10); -// return (NULL == tok) ? lexer_var(text) : tok; -//} -// -//static Token* lexer_integer(char* text, int base) -//{ -// char* end; -// long* int = (long*)mem_allocate(sizeof(long), NULL); -// errno = 0; -// *int = strtol(text, &end, base); -// assert(errno == 0); -// return (end[0] == '\0') ? token(T_INT, int) : NULL; -//} -// -//static Token* lexer_float(char* text) -//{ -// char* end; -// double* dbl = (double*)mem_allocate(sizeof(double), NULL); -// errno = 0; -// *dbl = strtod(text, &end); -// assert(errno == 0); -// return (end[0] == '\0') ? token(T_FLOAT, dbl) : NULL; -//} -// -//static Token* lexer_bool(char* text) -//{ -// return token(T_BOOL, (void*)((intptr_t)((0 == strcmp(text,"true")) ? true : false))); -//} -// -//static Token* lexer_var(char* text) -//{ -// return token(T_ID, lexer_dup(text)); -//} -// -//static bool lexer_oneof(const char* class, char c) { -// bool ret = false; -// size_t sz = strlen(class); -// for (size_t idx = 0; idx < sz; idx++) { -// if (c == class[idx]) { -// ret = true; -// break; -// } -// } -// return ret; -//} -// -//static bool is_float(char* text) { -// while (text[0] != '\0') -// if (text[0] == '.') -// return true; -// else -// text++; -// return false; -//} -// -//char* lexer_dup(const char* old) { -// size_t length = strlen(old); -// char* str = (char*)mem_allocate(length+1, NULL); -// memcpy(str, old, length); -// str[length] = '\0'; -// return str; -//} -// -//static int read_radix(char ch) { -// int ret = -1; -// switch(ch) { -// case 'b': ret = 2; break; -// case 'o': ret = 8; break; -// case 'd': ret = 10; break; -// case 'h': ret = 16; break; -// } -// return ret; -//} -// -///*****************************************************************************/ -// - diff --git a/source/libparse/libparse.h b/source/libparse/libparse.h index 7753ab6..a01a9ee 100644 --- a/source/libparse/libparse.h +++ b/source/libparse/libparse.h @@ -88,12 +88,7 @@ typedef struct { // Lexer routines Tok* gettoken(Parser* ctx); - -//Lexer* lexer_new(char* p_prompt, FILE* p_input); -//Token* token(TokenType type, void* val); -//Token* lexer_read(Lexer* p_lexer); -//void lexer_skipline(Lexer* p_lexer); -//char* lexer_dup(const char* p_old); +void skipline(Parser* ctx); // Parser routines Parser* parser_new(char* p_prompt, FILE* input);