From: Mike D. Lowis Date: Tue, 26 Feb 2013 01:20:41 +0000 (-0500) Subject: added punctuation and terminator rules X-Git-Url: https://git.mdlowis.com/?a=commitdiff_plain;h=7d33e9d5a88e29b424547658fd93b4dac5b414a2;p=proto%2Fsclpl.git added punctuation and terminator rules --- diff --git a/source/lexer/classes.c b/source/lexer/classes.c index d5ea2e4..a57ad66 100644 --- a/source/lexer/classes.c +++ b/source/lexer/classes.c @@ -4,6 +4,7 @@ $Revision$ $HeadURL$ */ +#include #include "classes.h" #include "file.h" @@ -32,3 +33,25 @@ bool token_end(void) return (whitespace() || file_eof()); } +bool matches(char ch) +{ + return (ch == file_peek()); +} + +bool matches_any(char* str) +{ + bool ret = false; + char ch = file_peek(); + int len = strlen(str); + int i; + for (i=0; i < len; i++) + { + if (ch == str[i]) + { + ret = true; + break; + } + } + return ret; +} + diff --git a/source/lexer/classes.h b/source/lexer/classes.h index 759d066..25ee730 100644 --- a/source/lexer/classes.h +++ b/source/lexer/classes.h @@ -13,5 +13,7 @@ bool whitespace(void); bool digit(void); bool hex_digit(void); bool token_end(void); +bool matches(char ch); +bool matches_any(char* str); #endif /* CLASSES_H */ diff --git a/source/lexer/lex.c b/source/lexer/lex.c index e8ac1a2..999cb49 100644 --- a/source/lexer/lex.c +++ b/source/lexer/lex.c @@ -13,18 +13,26 @@ tok_t Token = { 0u }; const char* Token_Strings[TOK_MAX] = { - "id", /* TOK_ID */ - "num", /* TOK_NUM */ + "EOF", /* TOK_EOF */ + "ID", /* TOK_ID */ + "NUM", /* TOK_NUM */ + "LPAREN", /* TOK_LPAR */ + "RPAREN", /* TOK_RPAR */ + "LBRACK", /* TOK_LBRACK */ + "RBRACK", /* TOK_RBRACK */ + "LBRACE", /* TOK_LBRACE */ + "RBRACE", /* TOK_RBRACE */ + "TERM", /* TOK_TERM */ }; tok_t next_token(void) { - (void)memset(&Token,0,sizeof(Token)); + prepare_for_token(); if (!file_eof()) { - consume_whitespace(); - record_position(); - if (digit()) + if (matches_any("()[]{};")) + punctuation(); + else if (digit()) number(); //else if (matches('\'')) // character(); @@ -32,10 +40,29 @@ tok_t next_token(void) // string(); else identifier(); + + /* the keyword "end" is actually a TOK_TERM */ + if (0 == strcmp(Token.str,"end")) + set_type(TOK_TERM); } return Token; } +void punctuation(void) +{ + switch (file_peek()) + { + case '(': accept_char( TOK_LPAR ); break; + case ')': accept_char( TOK_RPAR ); break; + case '[': accept_char( TOK_LBRACK ); break; + case ']': accept_char( TOK_RBRACK ); break; + case '{': accept_char( TOK_LBRACE ); break; + case '}': accept_char( TOK_RBRACE ); break; + case ';': accept_char( TOK_TERM ); break; + default: identifier(); break; + } +} + void number() { set_type(TOK_NUM); @@ -51,7 +78,7 @@ void number() void identifier() { set_type(TOK_ID); - while (!token_end()) consume(); + while (!token_end() && !matches_any("()[]{};")) consume(); accept(); } @@ -71,10 +98,19 @@ void consume(void) buf_put( file_get() ); } -void consume_whitespace(void) +void prepare_for_token(void) { + (void)memset(&Token,0,sizeof(Token)); while( whitespace() ) (void)file_get(); + record_position(); +} + +void accept_char(tok_type_t type) +{ + set_type(type); + consume(); + accept(); } void accept() diff --git a/source/lexer/lex.h b/source/lexer/lex.h index e25f478..a85e61a 100644 --- a/source/lexer/lex.h +++ b/source/lexer/lex.h @@ -16,18 +16,28 @@ typedef struct } tok_t; typedef enum { - TOK_ID = 0, - TOK_NUM = 1, - TOK_MAX = 2, + TOK_EOF = 0, + TOK_ID = 1, + TOK_NUM = 2, + TOK_LPAR = 3, + TOK_RPAR = 4, + TOK_LBRACK = 5, + TOK_RBRACK = 6, + TOK_LBRACE = 7, + TOK_RBRACE = 8, + TOK_TERM = 9, + TOK_MAX = 10, } tok_type_t; tok_t next_token(void); +void punctuation(void); void record_position(void); void identifier(void); void number(void); void set_type(tok_type_t type); void consume(void); -void consume_whitespace(void); +void prepare_for_token(void); +void accept_char(tok_type_t type); void accept(void); #endif /* LEX_H */ diff --git a/source/lexer/main.c b/source/lexer/main.c index 9268df9..41e2f8e 100644 --- a/source/lexer/main.c +++ b/source/lexer/main.c @@ -52,7 +52,8 @@ int lex_input(FILE* outfile) while (!file_eof()) { tok_t token = next_token(); - fprintf(stdout, "%s %d %d %s\n", token.type, token.line, token.column, token.str); + if (token.type != NULL) + fprintf(outfile, "%s\t%d\t%d\t%s\n", token.type, token.line, token.column, token.str); free(token.str); } return ret;