From: Michael D. Lowis <mike@mdlowis.com>
Date: Fri, 17 Oct 2014 02:08:12 +0000 (-0400)
Subject: Added support for string literals
X-Git-Url: https://git.mdlowis.com/?a=commitdiff_plain;h=f98f253a26cf66074f30ab2c5c3a183914975b6b;p=proto%2Fsclpl.git

Added support for string literals
---

diff --git a/Gemfile.lock b/Gemfile.lock
index 203d395..03860b2 100644
--- a/Gemfile.lock
+++ b/Gemfile.lock
@@ -4,20 +4,20 @@ GEM
     diff-lcs (1.2.5)
     json (1.8.1)
     rake (10.3.2)
-    rscons (1.6.0)
+    rscons (1.7.0)
       json (~> 1.0)
-    rspec (3.0.0)
-      rspec-core (~> 3.0.0)
-      rspec-expectations (~> 3.0.0)
-      rspec-mocks (~> 3.0.0)
-    rspec-core (3.0.2)
-      rspec-support (~> 3.0.0)
-    rspec-expectations (3.0.2)
+    rspec (3.1.0)
+      rspec-core (~> 3.1.0)
+      rspec-expectations (~> 3.1.0)
+      rspec-mocks (~> 3.1.0)
+    rspec-core (3.1.7)
+      rspec-support (~> 3.1.0)
+    rspec-expectations (3.1.2)
       diff-lcs (>= 1.2.0, < 2.0)
-      rspec-support (~> 3.0.0)
-    rspec-mocks (3.0.2)
-      rspec-support (~> 3.0.0)
-    rspec-support (3.0.2)
+      rspec-support (~> 3.1.0)
+    rspec-mocks (3.1.3)
+      rspec-support (~> 3.1.0)
+    rspec-support (3.1.2)
 
 PLATFORMS
   ruby
diff --git a/source/sclpl/lexer.c b/source/sclpl/lexer.c
index 686a928..d88fb25 100644
--- a/source/sclpl/lexer.c
+++ b/source/sclpl/lexer.c
@@ -64,11 +64,13 @@ static lex_tok_t* lexer_make_token(char* text) {
     lex_tok_t* p_tok = NULL;
     if ((0 == strcmp(text,"end") || (text[0] == ';'))) {
         p_tok = lex_tok_new(T_END, NULL);
-    } else if (lexer_oneof("()[]{};,'\"", text[0])) {
+    } else if (lexer_oneof("()[]{};,'", text[0])) {
         p_tok = lexer_punc(text);
+    } else if ('"' == text[0]) {
+        p_tok = lex_tok_new(T_STRING, lexer_dup(text));
     } else if (text[0] == '\\') {
         p_tok = lexer_char(text);
-    } else if ((text[0] == '0') && lexer_oneof("bodx",text[1])) {
+    } else if ((text[0] == '0') && lexer_oneof("bodh",text[1])) {
         p_tok = lexer_radix_int(text);
     } else if (lexer_oneof("+-0123456789",text[0])) {
         p_tok = lexer_number(text);
@@ -93,7 +95,6 @@ static lex_tok_t* lexer_punc(char* text)
         case ';':  p_tok = lex_tok_new(T_END,    NULL); break;
         case ',':  p_tok = lex_tok_new(T_COMMA,  NULL); break;
         case '\'': p_tok = lex_tok_new(T_SQUOTE, NULL); break;
-        case '"':  p_tok = lex_tok_new(T_DQUOTE, NULL); break;
     }
     return p_tok;
 }
@@ -112,7 +113,7 @@ static lex_tok_t* lexer_char(char* text)
         p_tok = lex_tok_new(T_CHAR, (void*)((intptr_t)text[1]));
     } else {
         for(int i = 0; i < 5; i++) {
-            if (strcmp(text, &(lookup_table[i][2]))) {
+            if (0 == strcmp(&text[1], &(lookup_table[i][2]))) {
                 p_tok = lex_tok_new(T_CHAR, (void*)((intptr_t)lookup_table[i][0]));
                 break;
             }
@@ -123,7 +124,7 @@ static lex_tok_t* lexer_char(char* text)
 
 static lex_tok_t* lexer_radix_int(char* text)
 {
-    return lexer_integer(text, read_radix(text[1]));
+    return lexer_integer(&text[2], read_radix(text[1]));
 }
 
 static lex_tok_t* lexer_number(char* text)
diff --git a/source/sclpl/pprint.c b/source/sclpl/pprint.c
index 34085ac..166276d 100644
--- a/source/sclpl/pprint.c
+++ b/source/sclpl/pprint.c
@@ -34,6 +34,24 @@ static const char* token_type_to_string(lex_tok_type_t type) {
     }
 }
 
+static void print_char(FILE* file, char ch) {
+    int i;
+    static const char* lookup_table[5] = {
+        " \0space",
+        "\n\0newline",
+        "\r\0return",
+        "\t\0tab",
+        "\v\0vtab"
+    };
+    for(i = 0; i < 5; i++) {
+        if (ch == lookup_table[i][0]) {
+            fprintf(file, "\\%s", &(lookup_table[i][2]));
+            break;
+        }
+    }
+    if (i == 5) fprintf(file, "\\%c", ch);
+}
+
 void pprint_token_type(FILE* file, lex_tok_t* token) {
     fprintf(file, "%s", token_type_to_string(token->type));
 }
@@ -41,8 +59,8 @@ void pprint_token_type(FILE* file, lex_tok_t* token) {
 void pprint_token_value(FILE* file, lex_tok_t* token) {
     void* value = token->value;
     switch(token->type) {
-        case T_STRING: fprintf(file, "'%s'", ((char*)value));              break;
-        case T_CHAR:   fprintf(file, "\\%c", ((char)(int)value));          break;
+        case T_STRING: fprintf(file, "%s", ((char*)value));              break;
+        case T_CHAR:   print_char(file, ((char)(int)value));               break;
         case T_INT:    fprintf(file, "%ld",  *((long int*)value));         break;
         case T_FLOAT:  fprintf(file, "%f",   *((double*)value));           break;
         case T_BOOL:   fprintf(file, "%s",   ((int)value)?"true":"false"); break;
diff --git a/source/sclpl/scanner.c b/source/sclpl/scanner.c
index 32f75b0..17e6b12 100644
--- a/source/sclpl/scanner.c
+++ b/source/sclpl/scanner.c
@@ -5,6 +5,7 @@ static void scanner_skip_ws(scanner_t* p_scanner);
 static char scanner_current(scanner_t* p_scanner);
 static bool scanner_oneof(scanner_t* p_scanner, const char* p_set);
 static char* scanner_dup(scanner_t* p_scanner, size_t start_idx, size_t len);
+static char* scanner_read_string(scanner_t* p_scanner);
 
 void scanner_free(void* p_obj) {
     scanner_t* p_scanner = (scanner_t*)p_obj;
@@ -25,11 +26,13 @@ char* scanner_read(scanner_t* p_scanner) {
     char* p_tok = NULL;
     scanner_skip_ws(p_scanner);
     if (!scanner_eof(p_scanner)) {
-        if (scanner_oneof(p_scanner, "()[]{};,'\"")) {
+        if (scanner_oneof(p_scanner, "()[]{};,'")) {
             p_tok = scanner_dup(p_scanner, p_scanner->index, 1);
             p_scanner->index++;
+        } else if (scanner_current(p_scanner) == '"') {
+            p_tok = scanner_read_string(p_scanner);
         } else {
-            size_t start =  p_scanner->index;
+            size_t start = p_scanner->index;
             while(!scanner_oneof(p_scanner," \t\r\n()[];,'\"") &&
                   (scanner_current(p_scanner) != '\0')) {
                 p_scanner->index++;
@@ -40,6 +43,44 @@ char* scanner_read(scanner_t* p_scanner) {
     return p_tok;
 }
 
+static char* scanner_read_string(scanner_t* p_scanner) {
+    size_t capacity = 8;
+    size_t index = 0;
+    char*  tok = (char*)malloc(sizeof(capacity));
+
+    /* Skip the first " */
+    tok[index++] = scanner_current(p_scanner);
+    tok[index] = '\0';
+    p_scanner->index++;
+
+    /* Read the contents of the string */
+    while ('"' != scanner_current(p_scanner)) {
+        /* Resize the buffer if necessary */
+        if ((index+2) >= capacity)
+            capacity = capacity << 1;
+
+        /* EOF results in an assertion (don't do) */
+        if (scanner_eof(p_scanner))
+            assert(false);
+
+        /* Read the char */
+        tok[index++] = scanner_current(p_scanner);
+        tok[index] = '\0';
+        p_scanner->index++;
+
+        /* Get the next line if necessary */
+        if ('\n' == tok[index-1])
+            scanner_getline(p_scanner);
+    }
+
+    /* Skip the last " */
+    tok[index++] = scanner_current(p_scanner);
+    tok[index] = '\0';
+    p_scanner->index++;
+
+    return tok;
+}
+
 bool scanner_eof(scanner_t* p_scanner)
 {
     return (scanner_eol(p_scanner) && feof(p_scanner->p_input));
diff --git a/spec/lexer_spec.rb b/spec/lexer_spec.rb
index 95d192a..10c822f 100644
--- a/spec/lexer_spec.rb
+++ b/spec/lexer_spec.rb
@@ -8,9 +8,157 @@ def lexer(input)
 end
 
 describe "lexer" do
-  it "should recognize punctuation" do
-    expect(lexer('[](){}\'",;')).to eq(
-      ["T_LBRACK", "T_RBRACK", "T_LPAR", "T_RPAR", "T_LBRACE", "T_RBRACE", "T_SQUOTE", "T_DQUOTE", "T_COMMA", "T_END"])
+  context "punctuation" do
+    it "should recognize [" do
+      expect(lexer('[')).to eq ['T_LBRACK']
+    end
+
+    it "should recognize ]" do
+      expect(lexer(']')).to eq ['T_RBRACK']
+    end
+
+    it "should recognize (" do
+      expect(lexer('(')).to eq ['T_LPAR']
+    end
+
+    it "should recognize )" do
+      expect(lexer(')')).to eq ['T_RPAR']
+    end
+
+    it "should recognize {" do
+      expect(lexer('{')).to eq ['T_LBRACE']
+    end
+
+    it "should recognize }" do
+      expect(lexer('}')).to eq ['T_RBRACE']
+    end
+
+    it "should recognize '" do
+      expect(lexer('\'')).to eq ['T_SQUOTE']
+    end
+
+    it "should recognize ," do
+      expect(lexer(',')).to eq ['T_COMMA']
+    end
+
+    it "should recognize ;" do
+      expect(lexer(';')).to eq ['T_END']
+    end
+
+    it "should recognize all punctuation" do
+      expect(lexer('[](){}\',;')).to eq(
+        ["T_LBRACK", "T_RBRACK", "T_LPAR", "T_RPAR", "T_LBRACE", "T_RBRACE",
+         "T_SQUOTE", "T_COMMA", "T_END"])
+    end
+  end
+
+  context "characters" do
+    it "should recognize space" do
+      expect(lexer('\space')).to eq ['T_CHAR:\space']
+    end
+
+    it "should recognize newline" do
+      expect(lexer('\newline')).to eq ['T_CHAR:\newline']
+    end
+
+    it "should recognize return" do
+      expect(lexer('\return')).to eq ['T_CHAR:\return']
+    end
+
+    it "should recognize tab" do
+      expect(lexer('\tab')).to eq ['T_CHAR:\tab']
+    end
+
+    it "should recognize vertical tab" do
+      expect(lexer('\vtab')).to eq ['T_CHAR:\vtab']
+    end
+
+    it "should recognize 'c'" do
+      expect(lexer('\c')).to eq ['T_CHAR:\c']
+    end
+  end
+
+  context "numbers" do
+    context "integers" do
+      it "should recognize positive integer without sign" do
+        expect(lexer('123')).to eq ['T_INT:123']
+      end
+
+      it "should recognize positive integer with sign" do
+        expect(lexer('+123')).to eq ['T_INT:123']
+      end
+
+      it "should recognize negitve integer with sign" do
+        expect(lexer('-123')).to eq ['T_INT:-123']
+      end
+    end
+
+    context "radix integers" do
+      it "should recognize binary integer" do
+        expect(lexer('0b101')).to eq ['T_INT:5']
+      end
+
+      it "should recognize octal integer" do
+        expect(lexer('0o707')).to eq ['T_INT:455']
+      end
+
+      it "should recognize decimal integer" do
+        expect(lexer('0d909')).to eq ['T_INT:909']
+      end
+
+      it "should recognize decimal integer" do
+        expect(lexer('0hf0f')).to eq ['T_INT:3855']
+      end
+    end
+
+    context "floating point" do
+      it "should recognize positive float without sign" do
+        expect(lexer('123.0')).to eq ['T_FLOAT:123.000000']
+      end
+
+      it "should recognize positive float with sign" do
+        expect(lexer('+123.0')).to eq ['T_FLOAT:123.000000']
+      end
+
+      it "should recognize negitve float with sign" do
+        expect(lexer('-123.0')).to eq ['T_FLOAT:-123.000000']
+      end
+    end
+  end
+
+  context "boolean" do
+    it "should recognize true" do
+      expect(lexer('true')).to eq ['T_BOOL:true']
+    end
+
+    it "should recognize false" do
+      expect(lexer('false')).to eq ['T_BOOL:false']
+    end
+  end
+
+  context "identifiers" do
+    it "should recognize an identifier" do
+      expect(lexer('foo')).to eq ['T_VAR:foo']
+    end
+  end
+
+  context "strings" do
+    it "should recognize an empty string" do
+      expect(lexer('""')).to eq ['T_STRING:""']
+    end
+
+    it "should recognize a string with one element" do
+      expect(lexer('"a"')).to eq ['T_STRING:"a"']
+    end
+
+    it "should recognize a string with one element" do
+      expect(lexer('"ab"')).to eq ['T_STRING:"ab"']
+    end
+
+    it "should recognize a string that spans lines" do
+      pending "S-Expression parser is stupid. fix it."
+      expect(lexer("\"a\nb\"")).to eq ["T_STRING:\"a\nb\""]
+    end
   end
 end
 
diff --git a/spec/parser_spec.rb b/spec/parser_spec.rb
index 0e715a0..858c1db 100644
--- a/spec/parser_spec.rb
+++ b/spec/parser_spec.rb
@@ -137,7 +137,6 @@ describe "sclpl grammar" do
 
   context "literals" do
     it "should parse a string" do
-      pending "Waiting for implementation of string literals"
       expect(ast('"foo"')).to eq(['T_STRING:"foo"'])
     end