]> git.mdlowis.com Git - proto/sclpl.git/commitdiff
Implemented machinery for working lexer
authorMike D. Lowis <mike@mdlowis.com>
Mon, 25 Feb 2013 21:15:17 +0000 (16:15 -0500)
committerMike D. Lowis <mike@mdlowis.com>
Mon, 25 Feb 2013 21:15:17 +0000 (16:15 -0500)
source/lexer/buf.c [new file with mode: 0644]
source/lexer/buf.h [new file with mode: 0644]
source/lexer/classes.c [new file with mode: 0644]
source/lexer/classes.h [new file with mode: 0644]
source/lexer/file.c
source/lexer/file.h
source/lexer/lex.c [new file with mode: 0644]
source/lexer/lex.h [new file with mode: 0644]
source/lexer/main.c

diff --git a/source/lexer/buf.c b/source/lexer/buf.c
new file mode 100644 (file)
index 0000000..0a72ef6
--- /dev/null
@@ -0,0 +1,39 @@
+#include <stdlib.h>
+#include "buf.h"
+
+size_t Size  = 0;
+char* Buffer = NULL;
+size_t Index = 0;
+
+void buf_init(void)
+{
+    Size   = 16;
+    Buffer = (char*)malloc(Size);
+    Index  = 0;
+    Buffer[Index] = '\0';
+}
+
+void buf_put(char ch)
+{
+    if ((Index + 2) >= Size)
+    {
+        buf_grow();
+    }
+    Buffer[Index++] = ch;
+    Buffer[Index]   = '\0';
+}
+
+char* buf_accept(void)
+{
+    char* str = strdup(Buffer);
+    Index = 0;
+    Buffer[Index] = '\0';
+    return str;
+}
+
+void buf_grow(void)
+{
+    Size = Size * 2;
+    Buffer = realloc( Buffer, Size );
+}
+
diff --git a/source/lexer/buf.h b/source/lexer/buf.h
new file mode 100644 (file)
index 0000000..01fbdea
--- /dev/null
@@ -0,0 +1,17 @@
+/**
+    @file buf.h
+    @brief A simple string building buffer akin to string streams in c++.
+    $Revision$
+    $HeadURL$
+*/
+#ifndef BUF_H
+#define BUF_H
+
+#include <string.h>
+
+void buf_init(void);
+void buf_put(char ch);
+char* buf_accept(void);
+void buf_grow(void);
+
+#endif /* BUF_H */
diff --git a/source/lexer/classes.c b/source/lexer/classes.c
new file mode 100644 (file)
index 0000000..d5ea2e4
--- /dev/null
@@ -0,0 +1,34 @@
+/**
+    @file classes.c
+    @brief See header for details
+    $Revision$
+    $HeadURL$
+*/
+#include "classes.h"
+#include "file.h"
+
+bool whitespace(void)
+{
+    char ch = file_peek();
+    return ((' ' == ch) || ('\t' == ch) || ('\n' == ch) || ('\r' == ch));
+}
+
+bool digit(void)
+{
+    char ch = file_peek();
+    return (('0' <= ch) && (ch <= '9'));
+}
+
+bool hex_digit(void)
+{
+    char ch = file_peek();
+    return (('0' <= ch) && (ch <= '9')) ||
+           (('a' <= ch) && (ch <= 'f')) ||
+           (('A' <= ch) && (ch <= 'F'));
+}
+
+bool token_end(void)
+{
+    return (whitespace() || file_eof());
+}
+
diff --git a/source/lexer/classes.h b/source/lexer/classes.h
new file mode 100644 (file)
index 0000000..759d066
--- /dev/null
@@ -0,0 +1,17 @@
+/**
+    @file classes.h
+    @brief TODO: Describe this file
+    $Revision$
+    $HeadURL$
+*/
+#ifndef CLASSES_H
+#define CLASSES_H
+
+#include <stdbool.h>
+
+bool whitespace(void);
+bool digit(void);
+bool hex_digit(void);
+bool token_end(void);
+
+#endif /* CLASSES_H */
index 82f462c992f283b63fa9b170746c86bc2a051513..be8c90f9909c7ea0bf508fe2df2b0dc65d91b3c7 100644 (file)
@@ -8,8 +8,8 @@ FILE* Handle = NULL;
 
 bool file_open(char* fname)
 {
-    Line   = 0;
-    Column = 0;
+    Line   = 1;
+    Column = 1;
     Name   = fname;
     if (NULL == Name)
     {
@@ -57,6 +57,16 @@ char file_get(void)
     return ret;
 }
 
+char file_peek(void)
+{
+    char ret = fgetc(Handle);
+    if (EOF != ret)
+    {
+        ungetc(ret,Handle);
+    }
+    return ret;
+}
+
 int file_line(void)
 {
     return Line;
index 1e381e325e78cf77f9f9acabbe8a887e3fe4dba9..610faf7d83299b0929df76ab047e8294c00332f5 100644 (file)
@@ -15,6 +15,7 @@ bool file_open(char* fname);
 void file_close(void);
 bool file_eof(void);
 char file_get(void);
+char file_peek(void);
 int file_line(void);
 int file_column(void);
 char* file_name(void);
diff --git a/source/lexer/lex.c b/source/lexer/lex.c
new file mode 100644 (file)
index 0000000..e8ac1a2
--- /dev/null
@@ -0,0 +1,84 @@
+/**
+    @file lex.c
+    @brief See header for details
+    $Revision$
+    $HeadURL$
+*/
+#include <string.h>
+#include "lex.h"
+#include "classes.h"
+#include "file.h"
+#include "buf.h"
+
+tok_t Token = { 0u };
+
+const char* Token_Strings[TOK_MAX] = {
+    "id",  /* TOK_ID  */
+    "num", /* TOK_NUM */
+};
+
+tok_t next_token(void)
+{
+    (void)memset(&Token,0,sizeof(Token));
+    if (!file_eof())
+    {
+        consume_whitespace();
+        record_position();
+        if (digit())
+            number();
+        //else if (matches('\''))
+        //    character();
+        //else if (matches('\"'))
+        //    string();
+        else
+            identifier();
+    }
+    return Token;
+}
+
+void number()
+{
+    set_type(TOK_NUM);
+
+    while (digit()) consume();
+
+    if (!token_end())
+        identifier();
+    else
+        accept();
+}
+
+void identifier()
+{
+    set_type(TOK_ID);
+    while (!token_end()) consume();
+    accept();
+}
+
+void record_position(void)
+{
+    Token.line   = file_line();
+    Token.column = file_column();
+}
+
+void set_type(tok_type_t type)
+{
+    Token.type = Token_Strings[type];
+}
+
+void consume(void)
+{
+    buf_put( file_get() );
+}
+
+void consume_whitespace(void)
+{
+    while( whitespace() )
+        (void)file_get();
+}
+
+void accept()
+{
+    Token.str = buf_accept();
+}
+
diff --git a/source/lexer/lex.h b/source/lexer/lex.h
new file mode 100644 (file)
index 0000000..e25f478
--- /dev/null
@@ -0,0 +1,33 @@
+/**
+    @file lex.h
+    @brief TODO: Describe this file
+    $Revision$
+    $HeadURL$
+*/
+#ifndef LEX_H
+#define LEX_H
+
+typedef struct
+{
+    int line;
+    int column;
+    const char* type;
+    char* str;
+} tok_t;
+
+typedef enum {
+    TOK_ID  = 0,
+    TOK_NUM = 1,
+    TOK_MAX = 2,
+} tok_type_t;
+
+tok_t next_token(void);
+void record_position(void);
+void identifier(void);
+void number(void);
+void set_type(tok_type_t type);
+void consume(void);
+void consume_whitespace(void);
+void accept(void);
+
+#endif /* LEX_H */
index 1b4b269f79299df1185355dab5aeb6ae5b5296be..9268df94d95741f5a4a1c2fe6aa0467d648f256c 100644 (file)
@@ -1,6 +1,8 @@
 #include <stdio.h>
 #include "gc.h"
 #include "file.h"
+#include "buf.h"
+#include "lex.h"
 
 int lex_files(int argc, char** argv);
 int lex_input(FILE* outfile);
@@ -8,6 +10,7 @@ int lex_input(FILE* outfile);
 int main(int argc, char** argv)
 {
     int ret;
+    buf_init();
     if (argc > 1)
     {
         ret = lex_files(argc,argv);
@@ -48,8 +51,9 @@ int lex_input(FILE* outfile)
     int ret = 0;
     while (!file_eof())
     {
-        char ch = file_get();
-        fprintf(stdout,"%s %d %d %c\n","char",file_line(),file_column(),ch);
+        tok_t token = next_token();
+        fprintf(stdout, "%s %d %d %s\n", token.type, token.line, token.column, token.str);
+        free(token.str);
     }
     return ret;
 }