From: Mike D. Lowis <mike@mdlowis.com>
Date: Tue, 26 Feb 2013 01:20:41 +0000 (-0500)
Subject: added punctuation and terminator rules
X-Git-Url: https://git.mdlowis.com/?a=commitdiff_plain;h=7d33e9d5a88e29b424547658fd93b4dac5b414a2;p=proto%2Fsclpl.git

added punctuation and terminator rules
---

diff --git a/source/lexer/classes.c b/source/lexer/classes.c
index d5ea2e4..a57ad66 100644
--- a/source/lexer/classes.c
+++ b/source/lexer/classes.c
@@ -4,6 +4,7 @@
     $Revision$
     $HeadURL$
 */
+#include <string.h>
 #include "classes.h"
 #include "file.h"
 
@@ -32,3 +33,25 @@ bool token_end(void)
     return (whitespace() || file_eof());
 }
 
+bool matches(char ch)
+{
+    return (ch == file_peek());
+}
+
+bool matches_any(char* str)
+{
+    bool ret = false;
+    char ch = file_peek();
+    int len = strlen(str);
+    int i;
+    for (i=0; i < len; i++)
+    {
+        if (ch == str[i])
+        {
+            ret = true;
+            break;
+        }
+    }
+    return ret;
+}
+
diff --git a/source/lexer/classes.h b/source/lexer/classes.h
index 759d066..25ee730 100644
--- a/source/lexer/classes.h
+++ b/source/lexer/classes.h
@@ -13,5 +13,7 @@ bool whitespace(void);
 bool digit(void);
 bool hex_digit(void);
 bool token_end(void);
+bool matches(char ch);
+bool matches_any(char* str);
 
 #endif /* CLASSES_H */
diff --git a/source/lexer/lex.c b/source/lexer/lex.c
index e8ac1a2..999cb49 100644
--- a/source/lexer/lex.c
+++ b/source/lexer/lex.c
@@ -13,18 +13,26 @@
 tok_t Token = { 0u };
 
 const char* Token_Strings[TOK_MAX] = {
-    "id",  /* TOK_ID  */
-    "num", /* TOK_NUM */
+    "EOF",    /* TOK_EOF */
+    "ID",     /* TOK_ID  */
+    "NUM",    /* TOK_NUM */
+    "LPAREN", /* TOK_LPAR */
+    "RPAREN", /* TOK_RPAR */
+    "LBRACK", /* TOK_LBRACK */
+    "RBRACK", /* TOK_RBRACK */
+    "LBRACE", /* TOK_LBRACE */
+    "RBRACE", /* TOK_RBRACE */
+    "TERM",   /* TOK_TERM */
 };
 
 tok_t next_token(void)
 {
-    (void)memset(&Token,0,sizeof(Token));
+    prepare_for_token();
     if (!file_eof())
     {
-        consume_whitespace();
-        record_position();
-        if (digit())
+        if (matches_any("()[]{};"))
+            punctuation();
+        else if (digit())
             number();
         //else if (matches('\''))
         //    character();
@@ -32,10 +40,29 @@ tok_t next_token(void)
         //    string();
         else
             identifier();
+
+        /* the keyword "end" is actually a TOK_TERM */
+        if (0 == strcmp(Token.str,"end"))
+            set_type(TOK_TERM);
     }
     return Token;
 }
 
+void punctuation(void)
+{
+    switch (file_peek())
+    {
+        case '(': accept_char( TOK_LPAR ); break;
+        case ')': accept_char( TOK_RPAR ); break;
+        case '[': accept_char( TOK_LBRACK ); break;
+        case ']': accept_char( TOK_RBRACK ); break;
+        case '{': accept_char( TOK_LBRACE ); break;
+        case '}': accept_char( TOK_RBRACE ); break;
+        case ';': accept_char( TOK_TERM ); break;
+        default:  identifier(); break;
+    }
+}
+
 void number()
 {
     set_type(TOK_NUM);
@@ -51,7 +78,7 @@ void number()
 void identifier()
 {
     set_type(TOK_ID);
-    while (!token_end()) consume();
+    while (!token_end() && !matches_any("()[]{};")) consume();
     accept();
 }
 
@@ -71,10 +98,19 @@ void consume(void)
     buf_put( file_get() );
 }
 
-void consume_whitespace(void)
+void prepare_for_token(void)
 {
+    (void)memset(&Token,0,sizeof(Token));
     while( whitespace() )
         (void)file_get();
+    record_position();
+}
+
+void accept_char(tok_type_t type)
+{
+    set_type(type);
+    consume();
+    accept();
 }
 
 void accept()
diff --git a/source/lexer/lex.h b/source/lexer/lex.h
index e25f478..a85e61a 100644
--- a/source/lexer/lex.h
+++ b/source/lexer/lex.h
@@ -16,18 +16,28 @@ typedef struct
 } tok_t;
 
 typedef enum {
-    TOK_ID  = 0,
-    TOK_NUM = 1,
-    TOK_MAX = 2,
+    TOK_EOF    = 0,
+    TOK_ID     = 1,
+    TOK_NUM    = 2,
+    TOK_LPAR   = 3,
+    TOK_RPAR   = 4,
+    TOK_LBRACK = 5,
+    TOK_RBRACK = 6,
+    TOK_LBRACE = 7,
+    TOK_RBRACE = 8,
+    TOK_TERM   = 9,
+    TOK_MAX    = 10,
 } tok_type_t;
 
 tok_t next_token(void);
+void punctuation(void);
 void record_position(void);
 void identifier(void);
 void number(void);
 void set_type(tok_type_t type);
 void consume(void);
-void consume_whitespace(void);
+void prepare_for_token(void);
+void accept_char(tok_type_t type);
 void accept(void);
 
 #endif /* LEX_H */
diff --git a/source/lexer/main.c b/source/lexer/main.c
index 9268df9..41e2f8e 100644
--- a/source/lexer/main.c
+++ b/source/lexer/main.c
@@ -52,7 +52,8 @@ int lex_input(FILE* outfile)
     while (!file_eof())
     {
         tok_t token = next_token();
-        fprintf(stdout, "%s %d %d %s\n", token.type, token.line, token.column, token.str);
+        if (token.type != NULL)
+            fprintf(outfile, "%s\t%d\t%d\t%s\n", token.type, token.line, token.column, token.str);
         free(token.str);
     }
     return ret;