From e486882b305d51554cd32a75ab6c14adf2bd447c Mon Sep 17 00:00:00 2001 From: "Mike D. Lowis" Date: Tue, 29 May 2012 12:32:08 -0400 Subject: [PATCH] Updated lexer and parser to new language syntax --- docs/language_spec/language.lyx | 228 +++++++++++++++++++- example.dl | 24 ++- source/dllexer/dllexer.cpp | 27 ++- source/dllexer/dllexer.h | 13 +- source/dlparser/dlparser.cpp | 64 ++++-- source/dlparser/dlparser.h | 4 +- source/dlparser/macro/pattern.cpp | 92 -------- source/dlparser/macro/pattern.h | 37 ---- tests/test_dllexer.cpp | 342 ++++++++++++++++++------------ 9 files changed, 522 insertions(+), 309 deletions(-) delete mode 100644 source/dlparser/macro/pattern.cpp delete mode 100644 source/dlparser/macro/pattern.h diff --git a/docs/language_spec/language.lyx b/docs/language_spec/language.lyx index d79a6d3..2b4169c 100644 --- a/docs/language_spec/language.lyx +++ b/docs/language_spec/language.lyx @@ -149,7 +149,6 @@ Numbers in DLang consist of several subtypes: Int, Float, BigInt, BigFloat, The Complex subtype represents numbers consisting of a real and imaginary part. Both the real and imaginary parts are represented internally as BigFloats. - \end_layout \begin_layout Standard @@ -239,7 +238,7 @@ Boolean Values \begin_layout Standard Booleans are a special subtype of Integers that can only have a value of zero or one. - Booleans can + Booleans can \end_layout \begin_layout Subsection @@ -412,5 +411,230 @@ Optimizations Formal Syntax and Semantics \end_layout +\begin_layout Standard +\begin_inset Box Frameless +position "t" +hor_pos "c" +has_inner_box 1 +inner_pos "t" +use_parbox 0 +use_makebox 0 +width "100col%" +special "none" +height "1in" +height_special "totalheight" +status open + +\begin_layout Plain Layout +\begin_inset listings +inline false +status open + +\begin_layout Plain Layout + +ID := [^ +\backslash +r +\backslash +n '"# +\backslash +( +\backslash +)] +\end_layout + +\begin_layout Plain Layout + +CHAR := '( +\backslash +.|[^'])' +\end_layout + +\begin_layout Plain Layout + +SYMBOL := +\backslash +$[^ +\backslash +r +\backslash +n '"# +\backslash +( +\backslash +)] +\end_layout + +\begin_layout Plain Layout + +STRING := "[^ +\backslash +r]*" +\end_layout + +\begin_layout Plain Layout + +NUMBER := -?[0-9]+( +\backslash +.[0-9]+)?(e[0-9]+)? +\end_layout + +\begin_layout Plain Layout + +\end_layout + +\begin_layout Plain Layout + +Expression := CoreForm +\end_layout + +\begin_layout Plain Layout + + | FuncApp +\end_layout + +\begin_layout Plain Layout + + | BasicExp +\end_layout + +\begin_layout Plain Layout + +\end_layout + +\begin_layout Plain Layout + +CoreForm := 'define' ID Expression +\end_layout + +\begin_layout Plain Layout + + | 'set!' ID Expression +\end_layout + +\begin_layout Plain Layout + + | 'begin' ExpList* TERM +\end_layout + +\begin_layout Plain Layout + + | 'if' Expression Expression 'else' Expression? TERM +\end_layout + +\begin_layout Plain Layout + + | 'quote' '(' Expression ')' +\end_layout + +\begin_layout Plain Layout + + | 'lambda' IdList ExpList? TERM +\end_layout + +\begin_layout Plain Layout + + | 'macro' IdList ExpList? TERM +\end_layout + +\begin_layout Plain Layout + + | 'syntax' ID IdList ID ExpList TERM +\end_layout + +\begin_layout Plain Layout + +\end_layout + +\begin_layout Plain Layout + +FuncApp := BasicExp '(' ParamList ')' +\end_layout + +\begin_layout Plain Layout + +\end_layout + +\begin_layout Plain Layout + +BasicExp := MacroName ExpList? TERM +\end_layout + +\begin_layout Plain Layout + + | '(' Expression (ID Expression)* ')' +\end_layout + +\begin_layout Plain Layout + + | Literal +\end_layout + +\begin_layout Plain Layout + +\end_layout + +\begin_layout Plain Layout + +Literal := ID +\end_layout + +\begin_layout Plain Layout + + | CHAR +\end_layout + +\begin_layout Plain Layout + + | SYMBOL +\end_layout + +\begin_layout Plain Layout + + | STRING +\end_layout + +\begin_layout Plain Layout + + | NUMBER +\end_layout + +\begin_layout Plain Layout + +\end_layout + +\begin_layout Plain Layout + +ParamList := '(' (Expression (',' Expression)*)? ')' +\end_layout + +\begin_layout Plain Layout + +\end_layout + +\begin_layout Plain Layout + +ExpList := Expression* +\end_layout + +\begin_layout Plain Layout + +\end_layout + +\begin_layout Plain Layout + +IdList := '(' ID* ')' +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\end_layout + \end_body \end_document diff --git a/example.dl b/example.dl index 5e6ad1b..bf1ee18 100644 --- a/example.dl +++ b/example.dl @@ -4,15 +4,23 @@ 1.0 $foo +# Infix operator expression +(1 add 1) +((1 add 1) add 1) +(1 add (1 add 1)) +(1 - (1 + 1)) + # Function Application foo() foo(1) foo(1 2) foo(1 2 3) +foo(1,2,3) +(foo . $bar)(1, 2, 3) # Definition and assignment -define foo 5 end -set! foo 6 end +define foo 5 +set! foo 6 # Lambda expressions lambda () end @@ -59,12 +67,6 @@ if conditional if_branch end -# Infix operator expression -(1 add 1) -((1 add 1) add 1) -(1 add (1 add 1)) -(1 - (1 + 1)) - ## Define an infix operator (No Precedence) #infix = # @@ -74,11 +76,11 @@ end #foo = (1 + 1) ; # Macros -macro let (:= =) ; - (a := b) +macro let ( := = ) ; + ( a := b ) define a b - (a = b) + ( a = b ) set! a b end diff --git a/source/dllexer/dllexer.cpp b/source/dllexer/dllexer.cpp index 15f2781..98aebb3 100644 --- a/source/dllexer/dllexer.cpp +++ b/source/dllexer/dllexer.cpp @@ -97,16 +97,23 @@ Token DLLexer::next(void) Symbol(ret); } + // Consume a comma + else if(lookahead(1) == ',') + { + ret = Token( COMMA, ",", line, column ); + consume(); + } + // Consume parentheses else if (lookahead(1) == '(') { - consume(); ret = Token( LPAR, "(", line, column ); + consume(); } else if (lookahead(1) == ')') { - consume(); ret = Token( RPAR, ")", line, column ); + consume(); } // Everything else (except the unescaped terminator) is considered an ID @@ -159,7 +166,9 @@ void DLLexer::Id(Token& tok) } while( !isWhiteSpace() && ('(' != lookahead(1)) && - (')' != lookahead(1)) ); + (')' != lookahead(1)) && + ('#' != lookahead(1)) && + (EOF != lookahead(1)) ); //while(isLetter() || isDigit() || lookahead(1) == '_'); tok = Token(ID, oss.str(), line, column); } @@ -241,13 +250,15 @@ void DLLexer::Decimal(std::ostringstream& oss) ex << "Missing fractional portion of floating point number."; throw ex; } - - do + else { - oss << lookahead(1); - consume(); + do + { + oss << lookahead(1); + consume(); + } + while ( isDigit(lookahead(1)) ); } - while ( isDigit(lookahead(1)) ); } void DLLexer::Char(Token& tok) diff --git a/source/dllexer/dllexer.h b/source/dllexer/dllexer.h index 27e7f8e..5b7a953 100644 --- a/source/dllexer/dllexer.h +++ b/source/dllexer/dllexer.h @@ -21,14 +21,15 @@ typedef enum TokenTypes MACRO_APP = 11, LPAR = 12, RPAR = 13, - TERM = 14, + COMMA = 14, + TERM = 15, // Datatypes - ID = 15, - NUM = 16, - CHAR = 17, - STRING = 18, - SYMBOL = 19, + ID = 16, + NUM = 17, + CHAR = 18, + STRING = 19, + SYMBOL = 20, } eTokenTypes; typedef struct { diff --git a/source/dlparser/dlparser.cpp b/source/dlparser/dlparser.cpp index c7a9432..25a1b24 100644 --- a/source/dlparser/dlparser.cpp +++ b/source/dlparser/dlparser.cpp @@ -54,18 +54,20 @@ AST* DLParser::Expression(void) AST* ret = NULL; // Expression := CoreForm + // | FuncApp // | BasicExp // // CoreForm := 'define' ID Expression TERM - // | 'set' ID Expression TERM + // | 'set!' ID Expression TERM // | 'lambda' IdList ExpList? TERM // | 'begin' ExpList* TERM - // | 'if' Expression Expression Expression? TERM + // | 'if' Expression Expression 'else' Expression? TERM // | 'macro' ID IdList ID ExpList TERM // + // FuncApp := BasicExp '(' ParamList ')' + // // BasicExp := MacroName ExpList? TERM - // | '(' Expression ID Expression ')' - // | ID '(' ExpList ')' + // | '(' Expression (ID Expression)* ')' // | Literal // // Literal := ID @@ -74,7 +76,9 @@ AST* DLParser::Expression(void) // | STRING // | NUMBER // - // ExpList := Expression+ + // ParamList := '(' (Expression (',' Expression)*)? ')' + // + // ExpList := Expression* // // IdList := '(' ID* ')' // @@ -85,7 +89,11 @@ AST* DLParser::Expression(void) } else { - ret = Application(); + ret = BasicExp(); + //if ( speculate_ParamList() ) + //{ + // ret + ParamList() + //} } // Register any new macros and expand any existing macros @@ -111,10 +119,12 @@ AST* DLParser::CoreForm(void) case LAMBDA: ret = new AST(LAMBDA, 2, IdList(), ExpList(TERM)); + match(TERM); break; case BEGIN: ret = new AST(BEGIN, 1, ExpList(TERM)); + match(TERM); break; case IF: @@ -123,6 +133,7 @@ AST* DLParser::CoreForm(void) { ret->addChild( Expression() ); } + match(TERM); break; case MACRO: @@ -147,17 +158,22 @@ AST* DLParser::CoreForm(void) transform->addChild( Expression() ); ret->addChild( transform ); } + match(TERM); break; default: throw Exception( lookaheadToken(1) ); break; } - match(TERM); return ret; } -AST* DLParser::Application(void) +AST* DLParser::FuncApp(void) +{ + return NULL; +} + +AST* DLParser::BasicExp(void) { AST* ret = NULL; @@ -182,15 +198,15 @@ AST* DLParser::Application(void) // Reset the terminator to its old value } - // Traditional Function Application - else if( (lookaheadType(1) == ID) && (lookaheadType(2) == LPAR) ) - { - ret = new AST( lookaheadToken(1) ); - consume(); - match(LPAR); - ret = new AST(APPLY, 2, ret, ExpList(RPAR)); - match(RPAR); - } + //// Traditional Function Application + //else if( (lookaheadType(1) == ID) && (lookaheadType(2) == LPAR) ) + //{ + // ret = new AST( lookaheadToken(1) ); + // consume(); + // match(LPAR); + // ret = new AST(APPLY, 2, ret, ExpList(RPAR)); + // match(RPAR); + //} // Infix Function Application else if( lookaheadType(1) == LPAR ) @@ -239,6 +255,20 @@ AST* DLParser::Literal(void) return ret; } +AST* DLParser::ParamList(void) +{ + AST* ret = new AST(EXP_LIST); + match(LPAR); + ret->addChild( Expression() ); + if( COMMA == lookaheadType(1) ) + { + match(COMMA); + ret->addChild( Expression() ); + } + match(RPAR); + return ret; +} + AST* DLParser::ExpList(eTokenTypes term) { AST* ret = new AST(EXP_LIST); diff --git a/source/dlparser/dlparser.h b/source/dlparser/dlparser.h index 1b01ea2..d2aa201 100644 --- a/source/dlparser/dlparser.h +++ b/source/dlparser/dlparser.h @@ -24,8 +24,10 @@ class DLParser : public BTParser AST* Program(void); AST* Expression(void); AST* CoreForm(void); - AST* Application(void); + AST* FuncApp(void); + AST* BasicExp(void); AST* Literal(void); + AST* ParamList(void); AST* ExpList(eTokenTypes term); AST* IdList(void); }; diff --git a/source/dlparser/macro/pattern.cpp b/source/dlparser/macro/pattern.cpp deleted file mode 100644 index 0eb4575..0000000 --- a/source/dlparser/macro/pattern.cpp +++ /dev/null @@ -1,92 +0,0 @@ -#include "pattern.h" -#include "dllexer.h" -#include "exception.h" - -using namespace std; - -Pattern::Pattern(const std::list& patt, const AST* ast) : pattern(patt), expr_ast(ast) -{ -} - -Pattern::Pattern(const Pattern& patt) -{ - pattern = patt.pattern; - expr_ast = patt.expr_ast->clone(); -} - -Pattern::~Pattern() -{ - delete expr_ast; -} - -std::list::iterator Pattern::begin() -{ - return pattern.begin(); -} - -std::list::iterator Pattern::end() -{ - return pattern.end(); -} - -void Pattern::apply(AST* cur,std::vector& params) -{ - if (cur != NULL) - { - list* children = cur->children(); - list::iterator it = children->begin(); - - // Visit the tree - for(; it != children->end(); it++) - { - if ((*it)->type() == SYMBOL) - { - AST* temp = *it; - *it = expand( *it, params ); - delete temp; - } - else - { - apply( *it, params ); - } - } - } -} - -AST* Pattern::expand(const AST* cur,std::vector& params) -{ - AST* ret = NULL; - unsigned int arg; - istringstream(cur->text()) >> arg; - - if (arg <= params.size()) - { - ret = params[ arg - 1 ]; - } - else - { - Exception ex; - ex << "Invalid parameter number"; - throw ex; - } - - return ret; -} - -AST* Pattern::accept(std::vector& params) -{ - AST* ret = NULL; - - if( expr_ast->type() == SYMBOL ) - { - ret = expand( expr_ast, params ); - } - else - { - ret = expr_ast->clone(); - apply( ret, params ); - } - - return ret; -} - diff --git a/source/dlparser/macro/pattern.h b/source/dlparser/macro/pattern.h deleted file mode 100644 index 512270e..0000000 --- a/source/dlparser/macro/pattern.h +++ /dev/null @@ -1,37 +0,0 @@ -#ifndef PATTERN_H -#define PATTERN_H - -#include -#include -#include "ast.h" - -typedef enum { - MAP_TYP = 0, - VECT_TYP = 1, - LIST_TYP = 2, - BLK_TYP = 3, - ID_TYP = 4, - NUM_TYP = 5, - CHAR_TYP = 6, - STR_TYP = 7, - SYM_TYP = 8, - EXPR_TYP = 9 -} PatternType_T; - -class Pattern { - protected: - std::list pattern; - const AST* expr_ast; - private: - void apply(AST* cur,std::vector& params); - public: - Pattern(const std::list& patt, const AST* ast); - Pattern(const Pattern& patt); - ~Pattern(); - std::list::iterator begin(); - std::list::iterator end(); - AST* accept(std::vector& params); - AST* expand(const AST* cur, std::vector& params); -}; - -#endif diff --git a/tests/test_dllexer.cpp b/tests/test_dllexer.cpp index 9c0b6bf..bd30d0c 100644 --- a/tests/test_dllexer.cpp +++ b/tests/test_dllexer.cpp @@ -10,45 +10,21 @@ using namespace UnitTest; +std::istringstream* input_stream = 0; + //----------------------------------------------------------------------------- // Helper Functions //----------------------------------------------------------------------------- -void TestLexerWithInput(std::string& input, eTokenTypes expected_types[]) -{ - // Setup - std::istringstream input_stream(input); - DLLexer* lexer = new DLLexer(input_stream); - int i = 0; - Token tok; - - // Compare tokens - do - { - tok = lexer->next(); - CHECK_EQUAL( expected_types[i], tok.type() ); - if( tok.type() != expected_types[i] ) - { - std::cout << "Test failed at index " << i << "." << std::endl; - break; - } - i++; - } - while(tok.type() != EOF); - // Cleanup - delete lexer; -} +#define CHECK_TOKEN(typ,txt,ln,col) \ + CHECK(lexer->next() == Token(typ,txt,ln,col)) -void TestLexerThrowsException(std::string& input) +DLLexer* SetupLexer(const std::string& input) { - // Setup - std::istringstream input_stream(input); - DLLexer* lexer = new DLLexer(input_stream); - - CHECK_THROW( lexer->next(), Exception ); - - // Cleanup - delete lexer; + if( input_stream != 0 ) delete input_stream; + input_stream = new std::istringstream( input ); + DLLexer* lexer = new DLLexer( *input_stream ); + return lexer; } //----------------------------------------------------------------------------- @@ -61,9 +37,11 @@ namespace { //------------------------------------------------------------------------- TEST(Recognize_And_Ignore_Whitespace) { - std::string input("foo \t\r\n foo"); - eTokenTypes expected[] = { ID, ID, (eTokenTypes)EOF }; - TestLexerWithInput( input, expected ); + DLLexer* lexer = SetupLexer( "foo \t\r\n bar" ); + CHECK_TOKEN( ID, "foo", 1, 3 ); + CHECK_TOKEN( ID, "bar", 2, 4 ); + CHECK_TOKEN( EOF, "", -1, -1 ); + delete lexer; } //------------------------------------------------------------------------- @@ -71,151 +49,245 @@ namespace { //------------------------------------------------------------------------- TEST(Recognize_And_Ignore_Comments) { - std::string input( + DLLexer* lexer = SetupLexer( "foo # Comment after valid token\r\n" "# Comment on a line by itself\r\n" "# Comment terminated by only a newline\n" "bar\n" "#\n" // An Empty comment - "foo" + "foo#\n" // No whitepace between comment and id "#" // A comment at the end of the file ); - eTokenTypes expected[] = { ID, ID, ID, (eTokenTypes)EOF }; - TestLexerWithInput( input, expected ); + CHECK_TOKEN( ID, "foo", 1, 3 ); + CHECK_TOKEN( ID, "bar", 4, 3 ); + CHECK_TOKEN( ID, "foo", 6, 3 ); + CHECK_TOKEN( EOF, "", -1, -1 ); + delete lexer; } //------------------------------------------------------------------------- - // Recognize Valid IDs + // Test Number Recognition //------------------------------------------------------------------------- - TEST(Recognize_Valid_IDs) + TEST(Recognize_Positive_Integers) { - std::string input( - // Make Sure we recognize all valid characters for an ID - "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_\n" - "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_\n" - "a_123\n" - "a123\n" - "a_\n" - "a_a\n" - ); - eTokenTypes expected[] = { ID, ID, ID, ID, ID, ID, (eTokenTypes)EOF }; - TestLexerWithInput( input, expected ); + DLLexer* lexer = SetupLexer( "0 1 2 3 4 5 6 7 8 9" ); + CHECK_TOKEN( NUM, "+0", 1, 1 ); + CHECK_TOKEN( NUM, "+1", 1, 3 ); + CHECK_TOKEN( NUM, "+2", 1, 5 ); + CHECK_TOKEN( NUM, "+3", 1, 7 ); + CHECK_TOKEN( NUM, "+4", 1, 9 ); + CHECK_TOKEN( NUM, "+5", 1, 11 ); + CHECK_TOKEN( NUM, "+6", 1, 13 ); + CHECK_TOKEN( NUM, "+7", 1, 15 ); + CHECK_TOKEN( NUM, "+8", 1, 17 ); + CHECK_TOKEN( NUM, "+9", 1, 19 ); + CHECK_TOKEN( EOF, "", -1, -1 ); + delete lexer; } - //------------------------------------------------------------------------- - // Test Number Recognition - //------------------------------------------------------------------------- - TEST(Recognize_Valid_Numbers) + TEST(Recognize_Negative_Integers) { - std::string input( - // Recognize all of the digits - "0 1 2 3 4 5 6 7 8 9\n" - // Recognize combinations of digits - "10 11 12 13 14 15 16 17 18 19\n" - // Recognize floating point numbers (with and without exponents) - "1.0 -1.0 0.1e1 10.0e-1 1e0 10e-1" - ); - eTokenTypes expected[] = { - NUM, NUM, NUM, NUM, NUM, NUM, NUM, NUM, NUM, NUM, - NUM, NUM, NUM, NUM, NUM, NUM, NUM, NUM, NUM, NUM, - NUM, NUM, NUM, NUM, NUM, NUM, - (eTokenTypes)EOF - }; - TestLexerWithInput( input, expected ); + DLLexer* lexer = SetupLexer( "-0 -1 -2 -3 -4 -5 -6 -7 -8 -9" ); + CHECK_TOKEN( NUM, "-0", 1, 2 ); + CHECK_TOKEN( NUM, "-1", 1, 5 ); + CHECK_TOKEN( NUM, "-2", 1, 8 ); + CHECK_TOKEN( NUM, "-3", 1, 11 ); + CHECK_TOKEN( NUM, "-4", 1, 14 ); + CHECK_TOKEN( NUM, "-5", 1, 17 ); + CHECK_TOKEN( NUM, "-6", 1, 20 ); + CHECK_TOKEN( NUM, "-7", 1, 23 ); + CHECK_TOKEN( NUM, "-8", 1, 26 ); + CHECK_TOKEN( NUM, "-9", 1, 29 ); + CHECK_TOKEN( EOF, "", -1, -1 ); + delete lexer; } - TEST(Recognize_Invalid_Numbers) + TEST(Recognize_Positive_Floats) { - std::string missing_exp("1.0e-"); - TestLexerThrowsException( missing_exp ); + DLLexer* lexer = SetupLexer( "1.0 1.1 1.2 1.3 1.4 1.5 1.6 1.7 1.8 1.9" ); + CHECK_TOKEN( NUM, "+1.0", 1, 3 ); + CHECK_TOKEN( NUM, "+1.1", 1, 7 ); + CHECK_TOKEN( NUM, "+1.2", 1, 11 ); + CHECK_TOKEN( NUM, "+1.3", 1, 15 ); + CHECK_TOKEN( NUM, "+1.4", 1, 19 ); + CHECK_TOKEN( NUM, "+1.5", 1, 23 ); + CHECK_TOKEN( NUM, "+1.6", 1, 27 ); + CHECK_TOKEN( NUM, "+1.7", 1, 31 ); + CHECK_TOKEN( NUM, "+1.8", 1, 35 ); + CHECK_TOKEN( NUM, "+1.9", 1, 39 ); + CHECK_TOKEN( EOF, "", -1, -1 ); + delete lexer; } - //------------------------------------------------------------------------- - // Test Character Recognition - //------------------------------------------------------------------------- - TEST(Recognize_Valid_Characters) + TEST(Recognize_Negative_Floats) { - std::string input( - // Make Sure we recognize characters and escaped characters - "'a' '\\a'" - ); - eTokenTypes expected[] = { - CHAR, CHAR, (eTokenTypes)EOF - }; - TestLexerWithInput( input, expected ); + DLLexer* lexer = SetupLexer( "-1.0 -1.1 -1.2 -1.3 -1.4 -1.5 -1.6 -1.7 -1.8 -1.9" ); + CHECK_TOKEN( NUM, "-1.0", 1, 4 ); + CHECK_TOKEN( NUM, "-1.1", 1, 9 ); + CHECK_TOKEN( NUM, "-1.2", 1, 14 ); + CHECK_TOKEN( NUM, "-1.3", 1, 19 ); + CHECK_TOKEN( NUM, "-1.4", 1, 24 ); + CHECK_TOKEN( NUM, "-1.5", 1, 29 ); + CHECK_TOKEN( NUM, "-1.6", 1, 34 ); + CHECK_TOKEN( NUM, "-1.7", 1, 39 ); + CHECK_TOKEN( NUM, "-1.8", 1, 44 ); + CHECK_TOKEN( NUM, "-1.9", 1, 49 ); + CHECK_TOKEN( EOF, "", -1, -1 ); + delete lexer; } - //------------------------------------------------------------------------- - // Test String Recognition - //------------------------------------------------------------------------- - TEST(Recognize_Valid_Strings) + TEST(Recognize_Floats_With_Positive_Exponents) { - std::string input( - // Make Sure we recognize all valid characters for a symbol - "\"\" \n" - "\"a\" \n" - "\"\\a\" \n" + DLLexer* lexer = SetupLexer( + "1.0e1 1.0e2 1.0e3 1.0e4 1.0e5 " + "1e6 1e7 1e8 1e9 1e10" ); - eTokenTypes expected[] = { - STRING, STRING, STRING, (eTokenTypes)EOF - }; - TestLexerWithInput( input, expected ); + CHECK_TOKEN( NUM, "+1.0e1", 1, 5 ); + CHECK_TOKEN( NUM, "+1.0e2", 1, 11 ); + CHECK_TOKEN( NUM, "+1.0e3", 1, 17 ); + CHECK_TOKEN( NUM, "+1.0e4", 1, 23 ); + CHECK_TOKEN( NUM, "+1.0e5", 1, 29 ); + CHECK_TOKEN( NUM, "+1e6", 1, 33 ); + CHECK_TOKEN( NUM, "+1e7", 1, 37 ); + CHECK_TOKEN( NUM, "+1e8", 1, 41 ); + CHECK_TOKEN( NUM, "+1e9", 1, 45 ); + CHECK_TOKEN( NUM, "+1e10", 1, 50 ); + CHECK_TOKEN( EOF, "", -1, -1 ); + delete lexer; } - //------------------------------------------------------------------------- - // Test Symbol Recognition - //------------------------------------------------------------------------- - TEST(Recognize_Valid_Symbols) + TEST(Recognize_Floats_With_Negative_Exponents) { - std::string input( - // Make Sure we recognize all valid characters for a symbol - "$abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_\n" - "$ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_\n" - "$a_123\n" - "$a123\n" - "$a_\n" - "$a_a\n" + DLLexer* lexer = SetupLexer( + "1.0e-1 1.0e-2 1.0e-3 1.0e-4 1.0e-5 " + "1e-6 1e-7 1e-8 1e-9 1e-10" ); - eTokenTypes expected[] = { - SYMBOL, SYMBOL, SYMBOL, SYMBOL, SYMBOL, SYMBOL, (eTokenTypes)EOF - }; - TestLexerWithInput( input, expected ); + CHECK_TOKEN( NUM, "+1.0e-1", 1, 6 ); + CHECK_TOKEN( NUM, "+1.0e-2", 1, 13 ); + CHECK_TOKEN( NUM, "+1.0e-3", 1, 20 ); + CHECK_TOKEN( NUM, "+1.0e-4", 1, 27 ); + CHECK_TOKEN( NUM, "+1.0e-5", 1, 34 ); + CHECK_TOKEN( NUM, "+1e-6", 1, 39 ); + CHECK_TOKEN( NUM, "+1e-7", 1, 44 ); + CHECK_TOKEN( NUM, "+1e-8", 1, 49 ); + CHECK_TOKEN( NUM, "+1e-9", 1, 54 ); + CHECK_TOKEN( NUM, "+1e-10", 1, 60 ); + CHECK_TOKEN( EOF, "", -1, -1 ); + delete lexer; + } + + TEST(Throw_Exception_For_Missing_Exponent) + { + DLLexer* lexer = SetupLexer("1.0e-"); + CHECK_THROW( lexer->next(), Exception ); + delete lexer; + } + + TEST(Throw_Exception_For_Invalid_Exponent) + { + DLLexer* lexer = SetupLexer("1.0e-a"); + CHECK_THROW( lexer->next(), Exception ); + delete lexer; + } + + TEST(Throw_Exception_For_Missing_Decimal) + { + DLLexer* lexer = SetupLexer("1."); + CHECK_THROW( lexer->next(), Exception ); + delete lexer; + } + + TEST(Throw_Exception_For_Invalid_Decimal) + { + DLLexer* lexer = SetupLexer("1.a"); + CHECK_THROW( lexer->next(), Exception ); + delete lexer; } //------------------------------------------------------------------------- - // Test Exceptional Cases + // Test Character Recognition //------------------------------------------------------------------------- - TEST(Throw_Exceptions_For_Exceptional_Cases) + TEST(Recognize_Valid_Characters) { - // Make sure invalid number literals throw exceptions where appropriate - std::string num_exception1("1.0e-"); - TestLexerThrowsException( num_exception1 ); + DLLexer* lexer = SetupLexer("'a' '\\a'"); + CHECK_TOKEN( CHAR, "a", 1, 3 ); + CHECK_TOKEN( CHAR, "\\a", 1, 8 ); + CHECK_TOKEN( EOF, "", -1, -1 ); + delete lexer; + } - std::string num_exception2("1.0e-a"); - TestLexerThrowsException( num_exception2 ); + ////------------------------------------------------------------------------- + //// Test String Recognition + ////------------------------------------------------------------------------- + //TEST(Recognize_Valid_Strings) + //{ + // std::string input( + // // Make Sure we recognize all valid characters for a symbol + // "\"\" \n" + // "\"a\" \n" + // "\"\\a\" \n" + // ); + // eTokenTypes expected[] = { + // STRING, STRING, STRING, (eTokenTypes)EOF + // }; + // TestLexerWithInput( input, expected ); + //} - std::string num_exception3("1.0e-"); - TestLexerThrowsException( num_exception3 ); + ////------------------------------------------------------------------------- + //// Test Symbol Recognition + ////------------------------------------------------------------------------- + //TEST(Recognize_Valid_Symbols) + //{ + // std::string input( + // // Make Sure we recognize all valid characters for a symbol + // "$abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_\n" + // "$ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_\n" + // "$a_123\n" + // "$a123\n" + // "$a_\n" + // "$a_a\n" + // ); + // eTokenTypes expected[] = { + // SYMBOL, SYMBOL, SYMBOL, SYMBOL, SYMBOL, SYMBOL, (eTokenTypes)EOF + // }; + // TestLexerWithInput( input, expected ); + //} + + ////------------------------------------------------------------------------- + //// Recognize Valid IDs + ////------------------------------------------------------------------------- + //TEST(Recognize_Valid_IDs) + //{ + // std::string input( + // // Make Sure we recognize all valid characters for an ID + // "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_\n" + // "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_\n" + // "a_123\n" + // "a123\n" + // "a_\n" + // "a_a\n" + // ); + // eTokenTypes expected[] = { ID, ID, ID, ID, ID, ID, (eTokenTypes)EOF }; + // TestLexerWithInput( input, expected ); + //} - std::string num_exception4("1.a"); - TestLexerThrowsException( num_exception4 ); - } //------------------------------------------------------------------------- - // Test General Corner Cases + // Test General Lexer Corner Cases //------------------------------------------------------------------------- TEST(Handle_An_Empty_Input_Stream) { - std::string input(""); - eTokenTypes expected[] = { (eTokenTypes)EOF }; - TestLexerWithInput( input, expected ); + DLLexer* lexer = SetupLexer( "" ); + CHECK_TOKEN( EOF, "", -1, -1 ); + delete lexer; } TEST(Handle_Recognition_At_The_End_Of_Input) { - std::string input("a"); - eTokenTypes expected[] = { ID, (eTokenTypes)EOF }; - TestLexerWithInput( input, expected ); + DLLexer* lexer = SetupLexer( "a" ); + CHECK_TOKEN( ID, "a", 1, 1 ); + CHECK_TOKEN( EOF, "", -1, -1 ); + delete lexer; } } -- 2.52.0