From 2794fae64d3af79ca0c0fcab85534fae718ac81e Mon Sep 17 00:00:00 2001 From: "Mike D. Lowis" Date: Mon, 25 Feb 2013 16:15:17 -0500 Subject: [PATCH] Implemented machinery for working lexer --- source/lexer/buf.c | 39 ++++++++++++++++++++ source/lexer/buf.h | 17 +++++++++ source/lexer/classes.c | 34 +++++++++++++++++ source/lexer/classes.h | 17 +++++++++ source/lexer/file.c | 14 ++++++- source/lexer/file.h | 1 + source/lexer/lex.c | 84 ++++++++++++++++++++++++++++++++++++++++++ source/lexer/lex.h | 33 +++++++++++++++++ source/lexer/main.c | 8 +++- 9 files changed, 243 insertions(+), 4 deletions(-) create mode 100644 source/lexer/buf.c create mode 100644 source/lexer/buf.h create mode 100644 source/lexer/classes.c create mode 100644 source/lexer/classes.h create mode 100644 source/lexer/lex.c create mode 100644 source/lexer/lex.h diff --git a/source/lexer/buf.c b/source/lexer/buf.c new file mode 100644 index 0000000..0a72ef6 --- /dev/null +++ b/source/lexer/buf.c @@ -0,0 +1,39 @@ +#include +#include "buf.h" + +size_t Size = 0; +char* Buffer = NULL; +size_t Index = 0; + +void buf_init(void) +{ + Size = 16; + Buffer = (char*)malloc(Size); + Index = 0; + Buffer[Index] = '\0'; +} + +void buf_put(char ch) +{ + if ((Index + 2) >= Size) + { + buf_grow(); + } + Buffer[Index++] = ch; + Buffer[Index] = '\0'; +} + +char* buf_accept(void) +{ + char* str = strdup(Buffer); + Index = 0; + Buffer[Index] = '\0'; + return str; +} + +void buf_grow(void) +{ + Size = Size * 2; + Buffer = realloc( Buffer, Size ); +} + diff --git a/source/lexer/buf.h b/source/lexer/buf.h new file mode 100644 index 0000000..01fbdea --- /dev/null +++ b/source/lexer/buf.h @@ -0,0 +1,17 @@ +/** + @file buf.h + @brief A simple string building buffer akin to string streams in c++. + $Revision$ + $HeadURL$ +*/ +#ifndef BUF_H +#define BUF_H + +#include + +void buf_init(void); +void buf_put(char ch); +char* buf_accept(void); +void buf_grow(void); + +#endif /* BUF_H */ diff --git a/source/lexer/classes.c b/source/lexer/classes.c new file mode 100644 index 0000000..d5ea2e4 --- /dev/null +++ b/source/lexer/classes.c @@ -0,0 +1,34 @@ +/** + @file classes.c + @brief See header for details + $Revision$ + $HeadURL$ +*/ +#include "classes.h" +#include "file.h" + +bool whitespace(void) +{ + char ch = file_peek(); + return ((' ' == ch) || ('\t' == ch) || ('\n' == ch) || ('\r' == ch)); +} + +bool digit(void) +{ + char ch = file_peek(); + return (('0' <= ch) && (ch <= '9')); +} + +bool hex_digit(void) +{ + char ch = file_peek(); + return (('0' <= ch) && (ch <= '9')) || + (('a' <= ch) && (ch <= 'f')) || + (('A' <= ch) && (ch <= 'F')); +} + +bool token_end(void) +{ + return (whitespace() || file_eof()); +} + diff --git a/source/lexer/classes.h b/source/lexer/classes.h new file mode 100644 index 0000000..759d066 --- /dev/null +++ b/source/lexer/classes.h @@ -0,0 +1,17 @@ +/** + @file classes.h + @brief TODO: Describe this file + $Revision$ + $HeadURL$ +*/ +#ifndef CLASSES_H +#define CLASSES_H + +#include + +bool whitespace(void); +bool digit(void); +bool hex_digit(void); +bool token_end(void); + +#endif /* CLASSES_H */ diff --git a/source/lexer/file.c b/source/lexer/file.c index 82f462c..be8c90f 100644 --- a/source/lexer/file.c +++ b/source/lexer/file.c @@ -8,8 +8,8 @@ FILE* Handle = NULL; bool file_open(char* fname) { - Line = 0; - Column = 0; + Line = 1; + Column = 1; Name = fname; if (NULL == Name) { @@ -57,6 +57,16 @@ char file_get(void) return ret; } +char file_peek(void) +{ + char ret = fgetc(Handle); + if (EOF != ret) + { + ungetc(ret,Handle); + } + return ret; +} + int file_line(void) { return Line; diff --git a/source/lexer/file.h b/source/lexer/file.h index 1e381e3..610faf7 100644 --- a/source/lexer/file.h +++ b/source/lexer/file.h @@ -15,6 +15,7 @@ bool file_open(char* fname); void file_close(void); bool file_eof(void); char file_get(void); +char file_peek(void); int file_line(void); int file_column(void); char* file_name(void); diff --git a/source/lexer/lex.c b/source/lexer/lex.c new file mode 100644 index 0000000..e8ac1a2 --- /dev/null +++ b/source/lexer/lex.c @@ -0,0 +1,84 @@ +/** + @file lex.c + @brief See header for details + $Revision$ + $HeadURL$ +*/ +#include +#include "lex.h" +#include "classes.h" +#include "file.h" +#include "buf.h" + +tok_t Token = { 0u }; + +const char* Token_Strings[TOK_MAX] = { + "id", /* TOK_ID */ + "num", /* TOK_NUM */ +}; + +tok_t next_token(void) +{ + (void)memset(&Token,0,sizeof(Token)); + if (!file_eof()) + { + consume_whitespace(); + record_position(); + if (digit()) + number(); + //else if (matches('\'')) + // character(); + //else if (matches('\"')) + // string(); + else + identifier(); + } + return Token; +} + +void number() +{ + set_type(TOK_NUM); + + while (digit()) consume(); + + if (!token_end()) + identifier(); + else + accept(); +} + +void identifier() +{ + set_type(TOK_ID); + while (!token_end()) consume(); + accept(); +} + +void record_position(void) +{ + Token.line = file_line(); + Token.column = file_column(); +} + +void set_type(tok_type_t type) +{ + Token.type = Token_Strings[type]; +} + +void consume(void) +{ + buf_put( file_get() ); +} + +void consume_whitespace(void) +{ + while( whitespace() ) + (void)file_get(); +} + +void accept() +{ + Token.str = buf_accept(); +} + diff --git a/source/lexer/lex.h b/source/lexer/lex.h new file mode 100644 index 0000000..e25f478 --- /dev/null +++ b/source/lexer/lex.h @@ -0,0 +1,33 @@ +/** + @file lex.h + @brief TODO: Describe this file + $Revision$ + $HeadURL$ +*/ +#ifndef LEX_H +#define LEX_H + +typedef struct +{ + int line; + int column; + const char* type; + char* str; +} tok_t; + +typedef enum { + TOK_ID = 0, + TOK_NUM = 1, + TOK_MAX = 2, +} tok_type_t; + +tok_t next_token(void); +void record_position(void); +void identifier(void); +void number(void); +void set_type(tok_type_t type); +void consume(void); +void consume_whitespace(void); +void accept(void); + +#endif /* LEX_H */ diff --git a/source/lexer/main.c b/source/lexer/main.c index 1b4b269..9268df9 100644 --- a/source/lexer/main.c +++ b/source/lexer/main.c @@ -1,6 +1,8 @@ #include #include "gc.h" #include "file.h" +#include "buf.h" +#include "lex.h" int lex_files(int argc, char** argv); int lex_input(FILE* outfile); @@ -8,6 +10,7 @@ int lex_input(FILE* outfile); int main(int argc, char** argv) { int ret; + buf_init(); if (argc > 1) { ret = lex_files(argc,argv); @@ -48,8 +51,9 @@ int lex_input(FILE* outfile) int ret = 0; while (!file_eof()) { - char ch = file_get(); - fprintf(stdout,"%s %d %d %c\n","char",file_line(),file_column(),ch); + tok_t token = next_token(); + fprintf(stdout, "%s %d %d %s\n", token.type, token.line, token.column, token.str); + free(token.str); } return ret; } -- 2.52.0