From: Michael D. Lowis Date: Fri, 17 Oct 2014 02:08:12 +0000 (-0400) Subject: Added support for string literals X-Git-Url: https://git.mdlowis.com/?a=commitdiff_plain;h=f98f253a26cf66074f30ab2c5c3a183914975b6b;p=proto%2Fsclpl.git Added support for string literals --- diff --git a/Gemfile.lock b/Gemfile.lock index 203d395..03860b2 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -4,20 +4,20 @@ GEM diff-lcs (1.2.5) json (1.8.1) rake (10.3.2) - rscons (1.6.0) + rscons (1.7.0) json (~> 1.0) - rspec (3.0.0) - rspec-core (~> 3.0.0) - rspec-expectations (~> 3.0.0) - rspec-mocks (~> 3.0.0) - rspec-core (3.0.2) - rspec-support (~> 3.0.0) - rspec-expectations (3.0.2) + rspec (3.1.0) + rspec-core (~> 3.1.0) + rspec-expectations (~> 3.1.0) + rspec-mocks (~> 3.1.0) + rspec-core (3.1.7) + rspec-support (~> 3.1.0) + rspec-expectations (3.1.2) diff-lcs (>= 1.2.0, < 2.0) - rspec-support (~> 3.0.0) - rspec-mocks (3.0.2) - rspec-support (~> 3.0.0) - rspec-support (3.0.2) + rspec-support (~> 3.1.0) + rspec-mocks (3.1.3) + rspec-support (~> 3.1.0) + rspec-support (3.1.2) PLATFORMS ruby diff --git a/source/sclpl/lexer.c b/source/sclpl/lexer.c index 686a928..d88fb25 100644 --- a/source/sclpl/lexer.c +++ b/source/sclpl/lexer.c @@ -64,11 +64,13 @@ static lex_tok_t* lexer_make_token(char* text) { lex_tok_t* p_tok = NULL; if ((0 == strcmp(text,"end") || (text[0] == ';'))) { p_tok = lex_tok_new(T_END, NULL); - } else if (lexer_oneof("()[]{};,'\"", text[0])) { + } else if (lexer_oneof("()[]{};,'", text[0])) { p_tok = lexer_punc(text); + } else if ('"' == text[0]) { + p_tok = lex_tok_new(T_STRING, lexer_dup(text)); } else if (text[0] == '\\') { p_tok = lexer_char(text); - } else if ((text[0] == '0') && lexer_oneof("bodx",text[1])) { + } else if ((text[0] == '0') && lexer_oneof("bodh",text[1])) { p_tok = lexer_radix_int(text); } else if (lexer_oneof("+-0123456789",text[0])) { p_tok = lexer_number(text); @@ -93,7 +95,6 @@ static lex_tok_t* lexer_punc(char* text) case ';': p_tok = lex_tok_new(T_END, NULL); break; case ',': p_tok = lex_tok_new(T_COMMA, NULL); break; case '\'': p_tok = lex_tok_new(T_SQUOTE, NULL); break; - case '"': p_tok = lex_tok_new(T_DQUOTE, NULL); break; } return p_tok; } @@ -112,7 +113,7 @@ static lex_tok_t* lexer_char(char* text) p_tok = lex_tok_new(T_CHAR, (void*)((intptr_t)text[1])); } else { for(int i = 0; i < 5; i++) { - if (strcmp(text, &(lookup_table[i][2]))) { + if (0 == strcmp(&text[1], &(lookup_table[i][2]))) { p_tok = lex_tok_new(T_CHAR, (void*)((intptr_t)lookup_table[i][0])); break; } @@ -123,7 +124,7 @@ static lex_tok_t* lexer_char(char* text) static lex_tok_t* lexer_radix_int(char* text) { - return lexer_integer(text, read_radix(text[1])); + return lexer_integer(&text[2], read_radix(text[1])); } static lex_tok_t* lexer_number(char* text) diff --git a/source/sclpl/pprint.c b/source/sclpl/pprint.c index 34085ac..166276d 100644 --- a/source/sclpl/pprint.c +++ b/source/sclpl/pprint.c @@ -34,6 +34,24 @@ static const char* token_type_to_string(lex_tok_type_t type) { } } +static void print_char(FILE* file, char ch) { + int i; + static const char* lookup_table[5] = { + " \0space", + "\n\0newline", + "\r\0return", + "\t\0tab", + "\v\0vtab" + }; + for(i = 0; i < 5; i++) { + if (ch == lookup_table[i][0]) { + fprintf(file, "\\%s", &(lookup_table[i][2])); + break; + } + } + if (i == 5) fprintf(file, "\\%c", ch); +} + void pprint_token_type(FILE* file, lex_tok_t* token) { fprintf(file, "%s", token_type_to_string(token->type)); } @@ -41,8 +59,8 @@ void pprint_token_type(FILE* file, lex_tok_t* token) { void pprint_token_value(FILE* file, lex_tok_t* token) { void* value = token->value; switch(token->type) { - case T_STRING: fprintf(file, "'%s'", ((char*)value)); break; - case T_CHAR: fprintf(file, "\\%c", ((char)(int)value)); break; + case T_STRING: fprintf(file, "%s", ((char*)value)); break; + case T_CHAR: print_char(file, ((char)(int)value)); break; case T_INT: fprintf(file, "%ld", *((long int*)value)); break; case T_FLOAT: fprintf(file, "%f", *((double*)value)); break; case T_BOOL: fprintf(file, "%s", ((int)value)?"true":"false"); break; diff --git a/source/sclpl/scanner.c b/source/sclpl/scanner.c index 32f75b0..17e6b12 100644 --- a/source/sclpl/scanner.c +++ b/source/sclpl/scanner.c @@ -5,6 +5,7 @@ static void scanner_skip_ws(scanner_t* p_scanner); static char scanner_current(scanner_t* p_scanner); static bool scanner_oneof(scanner_t* p_scanner, const char* p_set); static char* scanner_dup(scanner_t* p_scanner, size_t start_idx, size_t len); +static char* scanner_read_string(scanner_t* p_scanner); void scanner_free(void* p_obj) { scanner_t* p_scanner = (scanner_t*)p_obj; @@ -25,11 +26,13 @@ char* scanner_read(scanner_t* p_scanner) { char* p_tok = NULL; scanner_skip_ws(p_scanner); if (!scanner_eof(p_scanner)) { - if (scanner_oneof(p_scanner, "()[]{};,'\"")) { + if (scanner_oneof(p_scanner, "()[]{};,'")) { p_tok = scanner_dup(p_scanner, p_scanner->index, 1); p_scanner->index++; + } else if (scanner_current(p_scanner) == '"') { + p_tok = scanner_read_string(p_scanner); } else { - size_t start = p_scanner->index; + size_t start = p_scanner->index; while(!scanner_oneof(p_scanner," \t\r\n()[];,'\"") && (scanner_current(p_scanner) != '\0')) { p_scanner->index++; @@ -40,6 +43,44 @@ char* scanner_read(scanner_t* p_scanner) { return p_tok; } +static char* scanner_read_string(scanner_t* p_scanner) { + size_t capacity = 8; + size_t index = 0; + char* tok = (char*)malloc(sizeof(capacity)); + + /* Skip the first " */ + tok[index++] = scanner_current(p_scanner); + tok[index] = '\0'; + p_scanner->index++; + + /* Read the contents of the string */ + while ('"' != scanner_current(p_scanner)) { + /* Resize the buffer if necessary */ + if ((index+2) >= capacity) + capacity = capacity << 1; + + /* EOF results in an assertion (don't do) */ + if (scanner_eof(p_scanner)) + assert(false); + + /* Read the char */ + tok[index++] = scanner_current(p_scanner); + tok[index] = '\0'; + p_scanner->index++; + + /* Get the next line if necessary */ + if ('\n' == tok[index-1]) + scanner_getline(p_scanner); + } + + /* Skip the last " */ + tok[index++] = scanner_current(p_scanner); + tok[index] = '\0'; + p_scanner->index++; + + return tok; +} + bool scanner_eof(scanner_t* p_scanner) { return (scanner_eol(p_scanner) && feof(p_scanner->p_input)); diff --git a/spec/lexer_spec.rb b/spec/lexer_spec.rb index 95d192a..10c822f 100644 --- a/spec/lexer_spec.rb +++ b/spec/lexer_spec.rb @@ -8,9 +8,157 @@ def lexer(input) end describe "lexer" do - it "should recognize punctuation" do - expect(lexer('[](){}\'",;')).to eq( - ["T_LBRACK", "T_RBRACK", "T_LPAR", "T_RPAR", "T_LBRACE", "T_RBRACE", "T_SQUOTE", "T_DQUOTE", "T_COMMA", "T_END"]) + context "punctuation" do + it "should recognize [" do + expect(lexer('[')).to eq ['T_LBRACK'] + end + + it "should recognize ]" do + expect(lexer(']')).to eq ['T_RBRACK'] + end + + it "should recognize (" do + expect(lexer('(')).to eq ['T_LPAR'] + end + + it "should recognize )" do + expect(lexer(')')).to eq ['T_RPAR'] + end + + it "should recognize {" do + expect(lexer('{')).to eq ['T_LBRACE'] + end + + it "should recognize }" do + expect(lexer('}')).to eq ['T_RBRACE'] + end + + it "should recognize '" do + expect(lexer('\'')).to eq ['T_SQUOTE'] + end + + it "should recognize ," do + expect(lexer(',')).to eq ['T_COMMA'] + end + + it "should recognize ;" do + expect(lexer(';')).to eq ['T_END'] + end + + it "should recognize all punctuation" do + expect(lexer('[](){}\',;')).to eq( + ["T_LBRACK", "T_RBRACK", "T_LPAR", "T_RPAR", "T_LBRACE", "T_RBRACE", + "T_SQUOTE", "T_COMMA", "T_END"]) + end + end + + context "characters" do + it "should recognize space" do + expect(lexer('\space')).to eq ['T_CHAR:\space'] + end + + it "should recognize newline" do + expect(lexer('\newline')).to eq ['T_CHAR:\newline'] + end + + it "should recognize return" do + expect(lexer('\return')).to eq ['T_CHAR:\return'] + end + + it "should recognize tab" do + expect(lexer('\tab')).to eq ['T_CHAR:\tab'] + end + + it "should recognize vertical tab" do + expect(lexer('\vtab')).to eq ['T_CHAR:\vtab'] + end + + it "should recognize 'c'" do + expect(lexer('\c')).to eq ['T_CHAR:\c'] + end + end + + context "numbers" do + context "integers" do + it "should recognize positive integer without sign" do + expect(lexer('123')).to eq ['T_INT:123'] + end + + it "should recognize positive integer with sign" do + expect(lexer('+123')).to eq ['T_INT:123'] + end + + it "should recognize negitve integer with sign" do + expect(lexer('-123')).to eq ['T_INT:-123'] + end + end + + context "radix integers" do + it "should recognize binary integer" do + expect(lexer('0b101')).to eq ['T_INT:5'] + end + + it "should recognize octal integer" do + expect(lexer('0o707')).to eq ['T_INT:455'] + end + + it "should recognize decimal integer" do + expect(lexer('0d909')).to eq ['T_INT:909'] + end + + it "should recognize decimal integer" do + expect(lexer('0hf0f')).to eq ['T_INT:3855'] + end + end + + context "floating point" do + it "should recognize positive float without sign" do + expect(lexer('123.0')).to eq ['T_FLOAT:123.000000'] + end + + it "should recognize positive float with sign" do + expect(lexer('+123.0')).to eq ['T_FLOAT:123.000000'] + end + + it "should recognize negitve float with sign" do + expect(lexer('-123.0')).to eq ['T_FLOAT:-123.000000'] + end + end + end + + context "boolean" do + it "should recognize true" do + expect(lexer('true')).to eq ['T_BOOL:true'] + end + + it "should recognize false" do + expect(lexer('false')).to eq ['T_BOOL:false'] + end + end + + context "identifiers" do + it "should recognize an identifier" do + expect(lexer('foo')).to eq ['T_VAR:foo'] + end + end + + context "strings" do + it "should recognize an empty string" do + expect(lexer('""')).to eq ['T_STRING:""'] + end + + it "should recognize a string with one element" do + expect(lexer('"a"')).to eq ['T_STRING:"a"'] + end + + it "should recognize a string with one element" do + expect(lexer('"ab"')).to eq ['T_STRING:"ab"'] + end + + it "should recognize a string that spans lines" do + pending "S-Expression parser is stupid. fix it." + expect(lexer("\"a\nb\"")).to eq ["T_STRING:\"a\nb\""] + end end end diff --git a/spec/parser_spec.rb b/spec/parser_spec.rb index 0e715a0..858c1db 100644 --- a/spec/parser_spec.rb +++ b/spec/parser_spec.rb @@ -137,7 +137,6 @@ describe "sclpl grammar" do context "literals" do it "should parse a string" do - pending "Waiting for implementation of string literals" expect(ast('"foo"')).to eq(['T_STRING:"foo"']) end