From fcecc445c51a423fdc9d5373e06e5b88e7b05a30 Mon Sep 17 00:00:00 2001 From: "Mike D. Lowis" Date: Tue, 13 Aug 2013 15:24:58 -0400 Subject: [PATCH] Whack the old C based lexer and parser in favor of an all scheme parser --- source/common/tokens/tokens.c | 22 ---- source/common/tokens/tokens.h | 27 ----- source/lexer/buf.c | 39 ------- source/lexer/buf.h | 17 --- source/lexer/classes.c | 56 --------- source/lexer/classes.h | 21 ---- source/lexer/file.c | 83 ------------- source/lexer/file.h | 23 ---- source/lexer/lex.c | 213 ---------------------------------- source/lexer/lex.h | 21 ---- source/lexer/main.c | 62 ---------- source/lexer/tok.c | 49 -------- source/lexer/tok.h | 26 ----- source/parser/main.c | 54 --------- source/parser/tok.c | 169 --------------------------- source/parser/tok.h | 33 ------ 16 files changed, 915 deletions(-) delete mode 100644 source/common/tokens/tokens.c delete mode 100644 source/common/tokens/tokens.h delete mode 100644 source/lexer/buf.c delete mode 100644 source/lexer/buf.h delete mode 100644 source/lexer/classes.c delete mode 100644 source/lexer/classes.h delete mode 100644 source/lexer/file.c delete mode 100644 source/lexer/file.h delete mode 100644 source/lexer/lex.c delete mode 100644 source/lexer/lex.h delete mode 100644 source/lexer/main.c delete mode 100644 source/lexer/tok.c delete mode 100644 source/lexer/tok.h delete mode 100644 source/parser/main.c delete mode 100644 source/parser/tok.c delete mode 100644 source/parser/tok.h diff --git a/source/common/tokens/tokens.c b/source/common/tokens/tokens.c deleted file mode 100644 index 8985269..0000000 --- a/source/common/tokens/tokens.c +++ /dev/null @@ -1,22 +0,0 @@ -/** - @file tokens.c - @brief See header for details - $Revision$ - $HeadURL$ -*/ -#include "tokens.h" - -const char* Token_Types[TOK_MAX] = { - "EOF", /* TOK_EOF */ - "ID", /* TOK_ID */ - "NUM", /* TOK_NUM */ - "LPAREN", /* TOK_LPAR */ - "RPAREN", /* TOK_RPAR */ - "LBRACK", /* TOK_LBRACK */ - "RBRACK", /* TOK_RBRACK */ - "LBRACE", /* TOK_LBRACE */ - "RBRACE", /* TOK_RBRACE */ - "TERM", /* TOK_TERM */ - "BOOL", /* TOK_BOOL */ -}; - diff --git a/source/common/tokens/tokens.h b/source/common/tokens/tokens.h deleted file mode 100644 index ad6009e..0000000 --- a/source/common/tokens/tokens.h +++ /dev/null @@ -1,27 +0,0 @@ -/** - @file tokens.h - @brief TODO: Describe this file - $Revision$ - $HeadURL$ -*/ -#ifndef TOKENS_H -#define TOKENS_H - -typedef enum { - TOK_EOF = 0, - TOK_ID = 1, - TOK_NUM = 2, - TOK_LPAR = 3, - TOK_RPAR = 4, - TOK_LBRACK = 5, - TOK_RBRACK = 6, - TOK_LBRACE = 7, - TOK_RBRACE = 8, - TOK_TERM = 9, - TOK_BOOL = 10, - TOK_MAX = 11, -} tok_type_t; - -extern const char* Token_Types[TOK_MAX]; - -#endif /* TOKENS_H */ diff --git a/source/lexer/buf.c b/source/lexer/buf.c deleted file mode 100644 index 0a72ef6..0000000 --- a/source/lexer/buf.c +++ /dev/null @@ -1,39 +0,0 @@ -#include -#include "buf.h" - -size_t Size = 0; -char* Buffer = NULL; -size_t Index = 0; - -void buf_init(void) -{ - Size = 16; - Buffer = (char*)malloc(Size); - Index = 0; - Buffer[Index] = '\0'; -} - -void buf_put(char ch) -{ - if ((Index + 2) >= Size) - { - buf_grow(); - } - Buffer[Index++] = ch; - Buffer[Index] = '\0'; -} - -char* buf_accept(void) -{ - char* str = strdup(Buffer); - Index = 0; - Buffer[Index] = '\0'; - return str; -} - -void buf_grow(void) -{ - Size = Size * 2; - Buffer = realloc( Buffer, Size ); -} - diff --git a/source/lexer/buf.h b/source/lexer/buf.h deleted file mode 100644 index 01fbdea..0000000 --- a/source/lexer/buf.h +++ /dev/null @@ -1,17 +0,0 @@ -/** - @file buf.h - @brief A simple string building buffer akin to string streams in c++. - $Revision$ - $HeadURL$ -*/ -#ifndef BUF_H -#define BUF_H - -#include - -void buf_init(void); -void buf_put(char ch); -char* buf_accept(void); -void buf_grow(void); - -#endif /* BUF_H */ diff --git a/source/lexer/classes.c b/source/lexer/classes.c deleted file mode 100644 index f2fc3c4..0000000 --- a/source/lexer/classes.c +++ /dev/null @@ -1,56 +0,0 @@ -/** - @file classes.c - @brief See header for details - $Revision$ - $HeadURL$ -*/ -#include -#include "classes.h" -#include "file.h" - -bool whitespace(void) -{ - char ch = file_peek(); - return ((' ' == ch) || ('\t' == ch) || ('\n' == ch) || ('\r' == ch)); -} - -bool digit(void) -{ - char ch = file_peek(); - return (('0' <= ch) && (ch <= '9')); -} - -bool hex_digit(void) -{ - char ch = file_peek(); - return (('0' <= ch) && (ch <= '9')) || - (('A' <= ch) && (ch <= 'F')); -} - -bool token_end(void) -{ - return (whitespace() || matches_any("()[]{};") || file_eof()); -} - -bool matches(char ch) -{ - return (ch == file_peek()); -} - -bool matches_any(char* str) -{ - bool ret = false; - char ch = file_peek(); - int len = strlen(str); - int i; - for (i=0; i < len; i++) - { - if (ch == str[i]) - { - ret = true; - break; - } - } - return ret; -} - diff --git a/source/lexer/classes.h b/source/lexer/classes.h deleted file mode 100644 index c2b10b0..0000000 --- a/source/lexer/classes.h +++ /dev/null @@ -1,21 +0,0 @@ -/** - @file classes.h - @brief TODO: Describe this file - $Revision$ - $HeadURL$ -*/ -#ifndef CLASSES_H -#define CLASSES_H - -#include - -typedef bool (*predicate_t)(void); - -bool whitespace(void); -bool digit(void); -bool hex_digit(void); -bool token_end(void); -bool matches(char ch); -bool matches_any(char* str); - -#endif /* CLASSES_H */ diff --git a/source/lexer/file.c b/source/lexer/file.c deleted file mode 100644 index 9f10e2b..0000000 --- a/source/lexer/file.c +++ /dev/null @@ -1,83 +0,0 @@ -#include -#include "file.h" - -int Line = 1; -int Column = 1; -char* Name = NULL; -FILE* Handle = NULL; - -bool file_open(char* fname) -{ - Line = 1; - Column = 1; - Name = fname; - if (NULL == Name) - { - Handle = stdin; - } - else - { - Handle = fopen(fname,"r"); - } - return (NULL != Handle); -} - -void file_close(void) -{ - fclose(Handle); -} - -bool file_eof(void) -{ - bool ret = true; - if (NULL != Handle) - { - ret = feof( Handle ); - } - return ret; -} - -char file_get(void) -{ - char ret = EOF; - if (NULL != Handle) - { - ret = fgetc(Handle); - if ('\n' == ret) - { - Line++; - Column = 1; - } - else - { - Column++; - } - } - return ret; -} - -char file_peek(void) -{ - char ret = fgetc(Handle); - if (EOF != ret) - { - ungetc(ret,Handle); - } - return ret; -} - -int file_line(void) -{ - return Line; -} - -int file_column(void) -{ - return Column; -} - -char* file_name(void) -{ - return (NULL != Name) ? Name : ""; -} - diff --git a/source/lexer/file.h b/source/lexer/file.h deleted file mode 100644 index 610faf7..0000000 --- a/source/lexer/file.h +++ /dev/null @@ -1,23 +0,0 @@ -/** - @file file.h - @brief Describes a simple module for tracking input from a file including - line and column info. - $Revision$ - $HeadURL$ -*/ -#ifndef FILE_H -#define FILE_H - -#include -#include - -bool file_open(char* fname); -void file_close(void); -bool file_eof(void); -char file_get(void); -char file_peek(void); -int file_line(void); -int file_column(void); -char* file_name(void); - -#endif /* FILE_H */ diff --git a/source/lexer/lex.c b/source/lexer/lex.c deleted file mode 100644 index c581979..0000000 --- a/source/lexer/lex.c +++ /dev/null @@ -1,213 +0,0 @@ -/** - @file lex.c - @brief See header for details - $Revision$ - $HeadURL$ -*/ -#include -#include -#include "lex.h" -#include "tok.h" -#include "classes.h" -#include "file.h" -#include "buf.h" -#include "tokens.h" - -/* Prototypes - *****************************************************************************/ -static void accept(void); -static void accept_char(tok_type_t tok); -static void abort(void); -static void reset(void); -static void match_and_consume(char ch); -static bool one_or_more(predicate_t pfn); -static void keyword(void); -static void comment(void); -static void punctuation(void); -static void number(void); -static void hexadecimal(void); -static void floating_point(void); -static void exponent(void); -static void identifier(void); - -/* Global Variables - *****************************************************************************/ -jmp_buf Jump_Point; - -const lex_keyword_t Keywords[] = { - { "end", TOK_TERM }, - { "true", TOK_BOOL }, - { "false", TOK_BOOL }, - { NULL, TOK_MAX} -}; - -/* Control Functions - *****************************************************************************/ -static void accept(void) -{ - if (!token_end()) - abort(); - else - tok_accept(); -} - -static void accept_char(tok_type_t tok) -{ - tok_set_type( Token_Types[tok] ); - tok_consume(); - tok_accept(); -} - -static void abort(void) -{ - longjmp(Jump_Point,1); -} - -static void reset(void) -{ - while( whitespace() ) - tok_discard(); - tok_reset(); -} - -static void match_and_consume(char ch) -{ - if (matches(ch)) - tok_consume(); - else - abort(); -} - -static bool one_or_more(predicate_t pfn) -{ - if (!pfn()) abort(); - while (pfn()) tok_consume(); -} - -/* Token Matching Functions - *****************************************************************************/ -void next_token(tok_t* p_token) -{ - reset(); - if (!file_eof()) - { - /* Mark our starting point so we can resume if we abort */ - if (0 == setjmp(Jump_Point)) - { - if (matches('#')) comment(); - - if (matches_any("()[]{};")) - punctuation(); - else if (matches('-') || digit()) - number(); - //else if (matches('\'')) - // character(); - //else if (matches('\"')) - // string(); - else - identifier(); - } - else - { - identifier(); - } - keyword(); - } - tok_copy( p_token ); -} - -static void keyword(void) -{ - const char* p_text = tok_string(); - int i = 0; - while ( Keywords[i].p_text != NULL) - { - if (0 == strcmp( p_text, Keywords[i].p_text )) - { - tok_set_type( Token_Types[ Keywords[i].type ] ); - break; - } - i++; - } -} - -static void comment(void) -{ - while (!matches('\n')) - tok_discard(); - while( whitespace() ) - tok_discard(); -} - -static void punctuation(void) -{ - switch (file_peek()) - { - case '(': accept_char( TOK_LPAR ); break; - case ')': accept_char( TOK_RPAR ); break; - case '[': accept_char( TOK_LBRACK ); break; - case ']': accept_char( TOK_RBRACK ); break; - case '{': accept_char( TOK_LBRACE ); break; - case '}': accept_char( TOK_RBRACE ); break; - case ';': accept_char( TOK_TERM ); break; - default: identifier(); break; - } -} - -static void number(void) -{ - tok_set_type(Token_Types[TOK_NUM]); - if (matches('0')) - { - tok_consume(); - if (matches('x')) - { - tok_consume(); - hexadecimal(); - } - else if (matches('-')) - abort(); - else if (!token_end()) - { - floating_point(); - if (!token_end()) exponent(); - } - } - else - { - floating_point(); - if (!token_end()) exponent(); - } - accept(); -} - -static void hexadecimal(void) -{ - one_or_more( hex_digit ); -} - -static void floating_point(void) -{ - if (matches('-')) tok_consume(); - one_or_more( digit ); - if (matches('.')) - { - tok_consume(); - one_or_more( digit ); - } -} - -static void exponent(void) -{ - match_and_consume('e'); - floating_point(); -} - -static void identifier(void) -{ - tok_set_type(Token_Types[TOK_ID]); - while (!token_end()) - tok_consume(); - accept(); -} - diff --git a/source/lexer/lex.h b/source/lexer/lex.h deleted file mode 100644 index 8598165..0000000 --- a/source/lexer/lex.h +++ /dev/null @@ -1,21 +0,0 @@ -/** - @file lex.h - @brief TODO: Describe this file - $Revision$ - $HeadURL$ -*/ -#ifndef LEX_H -#define LEX_H - -#include "tok.h" -#include "classes.h" -#include "tokens.h" - -typedef struct { - const char* p_text; - tok_type_t type; -} lex_keyword_t; - -void next_token(tok_t* p_token); - -#endif /* LEX_H */ diff --git a/source/lexer/main.c b/source/lexer/main.c deleted file mode 100644 index 9fc64d2..0000000 --- a/source/lexer/main.c +++ /dev/null @@ -1,62 +0,0 @@ -#include -#include "gc.h" -#include "file.h" -#include "buf.h" -#include "lex.h" - -int lex_files(int argc, char** argv); -int lex_input(FILE* outfile); - -int main(int argc, char** argv) -{ - int ret; - buf_init(); - if (argc > 1) - { - ret = lex_files(argc,argv); - } - else - { - file_open(NULL); - ret = lex_input(stdout); - file_close(); - } - return ret; -} - -int lex_files(int argc, char** argv) -{ - int ret = 0; - int i; - for (i = 1; i < argc; i++) - { - if (file_open(argv[i])) - { - fprintf(stdout, "@file %s\n", file_name()); - ret = lex_input(stdout); - file_close(); - } - else - { - fprintf(stderr, "@error File not found: %s\n", argv[i]); - ret = 1; - break; - } - } - return ret; -} - -int lex_input(FILE* outfile) -{ - tok_t token; - int ret = 0; - while (!file_eof()) - { - next_token( &token ); - if (token.type != NULL) - fprintf(outfile, "%s\t%d\t%d\t%s\n", token.type, token.line, token.column, token.str); - free(token.str); - } - return ret; -} - diff --git a/source/lexer/tok.c b/source/lexer/tok.c deleted file mode 100644 index 84b2302..0000000 --- a/source/lexer/tok.c +++ /dev/null @@ -1,49 +0,0 @@ -/** - @file tok.c - @brief See header for details - $Revision$ - $HeadURL$ -*/ -#include -#include "tok.h" -#include "buf.h" - -tok_t Token; - -void tok_reset(void) -{ - (void)memset(&Token,0,sizeof(Token)); - Token.line = file_line(); - Token.column = file_column(); -} - -void tok_copy(tok_t* p_token) -{ - *(p_token) = Token; -} - -void tok_set_type(const char* p_str) -{ - Token.type = p_str; -} - -char* tok_string(void) -{ - return Token.str; -} - -void tok_consume(void) -{ - buf_put( file_get() ); -} - -void tok_discard(void) -{ - (void)file_get(); -} - -void tok_accept(void) -{ - Token.str = buf_accept(); -} - diff --git a/source/lexer/tok.h b/source/lexer/tok.h deleted file mode 100644 index c6521f0..0000000 --- a/source/lexer/tok.h +++ /dev/null @@ -1,26 +0,0 @@ -/** - @file tok.h - @brief TODO: Describe this file - $Revision$ - $HeadURL$ -*/ -#ifndef TOK_H -#define TOK_H - -typedef struct -{ - int line; - int column; - const char* type; - char* str; -} tok_t; - -void tok_reset(void); -void tok_copy(tok_t* p_token); -void tok_set_type(const char* p_str); -char* tok_string(void); -void tok_consume(void); -void tok_discard(void); -void tok_accept(void); - -#endif /* TOK_H */ diff --git a/source/parser/main.c b/source/parser/main.c deleted file mode 100644 index 2e60382..0000000 --- a/source/parser/main.c +++ /dev/null @@ -1,54 +0,0 @@ -#include -#include "tok.h" - -int parse_files(int num_files, char** fnames); -int parse_input(char* outfile); - -int main(int argc, char** argv) -{ - int ret; - if (argc > 1) - { - ret = parse_files(argc,argv); - } - else - { - ret = parse_input(NULL); - } - return ret; -} - -int parse_files(int num_files, char** fnames) -{ - int ret = 0; - int i; - for (i = 1; i < num_files; i++) - { - parse_input(fnames[i]); - } -} - -int parse_input(char* outfile) -{ - int ret = 0; - if (tok_source(outfile)) - { - while (!tok_eof()) - { - tok_t* p_tok = tok_read(); - if (NULL != p_tok) - { - printf( "%s:%d:%d:\t%d\t%s", - p_tok->p_file_name, - p_tok->line, - p_tok->column, - p_tok->type, - p_tok->p_text ); - } - } - tok_finish(); - } - return ret; -} - - diff --git a/source/parser/tok.c b/source/parser/tok.c deleted file mode 100644 index b4f63b9..0000000 --- a/source/parser/tok.c +++ /dev/null @@ -1,169 +0,0 @@ -/** - @file tok.c - @brief See header for details - $Revision$ - $HeadURL$ -*/ -#include -#include -#include -#include "tok.h" -#include "tokens.h" - -#define FIELD_TYPE 0 -#define FIELD_LINE_NUM 1 -#define FIELD_COLUMN_NUM 2 -#define FIELD_TEXT 3 -#define FIELD_MAX 4 - -#define MAX_INPUT_STR 1024 -FILE* Handle = NULL; -tok_t Token = { 0 }; -char Buffer[MAX_INPUT_STR]; - -bool tok_source(char* fname) -{ - if (NULL == fname) - { - Handle = stdin; - } - else - { - Handle = fopen(fname,"r"); - } - return (NULL != Handle); -} - -bool tok_eof(void) -{ - bool ret = true; - if (NULL != Handle) - { - ret = feof( Handle ); - } - return ret; -} - -void tok_finish(void) -{ - fclose(Handle); -} - -tok_t* tok_read(void) -{ - tok_t* p_tok = NULL; - if (NULL != Handle) - { - fgets(Buffer, MAX_INPUT_STR, Handle); - if ('\0' != Buffer[0]) - { - if ('@' == Buffer[0]) - { - tok_read_fname(); - p_tok = tok_read(); - } - else - { - p_tok = tok_build_token(); - } - Buffer[0] = '\0'; - } - } - return p_tok; -} - -void tok_read_fname(void) -{ - uint32_t index = 0; - Buffer[strlen(Buffer)-1] = '\0'; - while(' ' != Buffer[index]) index++; - char* new_str = (char*)malloc( strlen(&Buffer[index+1]) ); - strcpy( new_str, &Buffer[index+1] ); - Token.p_file_name = new_str; -} - -tok_t* tok_build_token(void) -{ - tok_t* ret = NULL; - uint32_t index; - uint32_t start = 0; - uint32_t end = 0; - - /* Look for and read all of the fields */ - for (index = 0; index < FIELD_MAX; index++) - { - /* Advance to the next field */ - bool last_field = tok_next_field( &end ); - /* copy the filed data */ - tok_read_field( index, &Buffer[start] ); - /* advance to next field or exit if last field */ - if (last_field) break; - else start = ++end; - } - - /* Copy the token to the heap */ - if (index == FIELD_TEXT) - { - ret = (tok_t*)calloc(1,sizeof(tok_t)); - *ret = Token; - } - - return ret; -} - -bool tok_next_field(uint32_t* end) -{ - bool last_field = false; - while (('\t' != Buffer[*end]) && - !(('\r' == Buffer[*end]) || - ('\0' == Buffer[*end]))) - { - (*end)++; - } - last_field = (('\r' == Buffer[*end]) || - ('\0' == Buffer[*end])) ? true : false; - Buffer[*end] = '\0'; - return last_field; -} - -void tok_read_field(uint32_t index, char* str) -{ - uint32_t type; - switch (index) - { - case FIELD_TYPE: - for (type = 0; type < TOK_MAX; type++) - { - if (0 == strcmp(Token_Types[type],str)) - { - Token.type = (tok_type_t)type; - break; - } - } - break; - - case FIELD_LINE_NUM: - Token.line = atoi( str ); - break; - - case FIELD_COLUMN_NUM: - Token.column = atoi( str ); - break; - - case FIELD_TEXT: - Token.p_text = (char*)malloc(strlen(str)); - strcpy( Token.p_text, str ); - break; - - default: - tok_fatal_error(1); - break; - } -} - -void tok_fatal_error(uint32_t err_code) -{ - fprintf(stderr,"Fatal Error\n"); - exit(err_code); -} - diff --git a/source/parser/tok.h b/source/parser/tok.h deleted file mode 100644 index 2070070..0000000 --- a/source/parser/tok.h +++ /dev/null @@ -1,33 +0,0 @@ -/** - @file tok.h - @brief TODO: Describe this file - $Revision$ - $HeadURL$ -*/ -#ifndef TOK_H -#define TOK_H - -#include -#include -#include "tokens.h" - -typedef struct -{ - tok_type_t type; - char* p_text; - char* p_file_name; - long line; - long column; -} tok_t; - -bool tok_source(char* fname); -bool tok_eof(void); -void tok_finish(void); -tok_t* tok_read(void); -void tok_read_fname(void); -tok_t* tok_build_token(void); -bool tok_next_field(uint32_t* end); -void tok_read_field(uint32_t index, char* str); -void tok_fatal_error(uint32_t err_code); - -#endif /* TOK_H */ -- 2.52.0