From: Mike D. Lowis Date: Wed, 9 May 2012 18:50:52 +0000 (-0400) Subject: Overhauled parser and lexer to support flexible expression terminators and more schem... X-Git-Url: https://git.mdlowis.com/?a=commitdiff_plain;h=378190e493f96f3e52a16aefc897566bb884087c;p=archive%2Fdlang.git Overhauled parser and lexer to support flexible expression terminators and more scheme-like macros --- diff --git a/example.dl b/example.dl index 6890240..6bc0e7d 100644 --- a/example.dl +++ b/example.dl @@ -11,48 +11,67 @@ foo(1 2) foo(1 2 3) # Definition and assignment -define(foo 5) -set(foo 6) +#define foo 5 end +#set! foo 6 end # Lambda expressions -lambda(()) -lambda((a)) -lambda((a b)) -lambda((a b c)) - -lambda(() - foo()) -lambda((a) - foo(a)) -lambda((a b) - foo(a b)) -lambda((a b c) - foo(a b c)) +#lambda () end +#lambda (a) end +#lambda (a b) end +#lambda (a b c) end +# +#lambda () +# foo() +#end +# +#lambda (a) +# foo(a) +#end +# +#lambda (a b) +# foo(a b) +#end +# +#lambda (a b c) +# foo(a b c) +#end # Begin block -begin() -begin( foo() ) -begin( - foo() - bar()) +#begin end +# +#begin +# foo() +#end +# +#begin +# foo() +# bar() +#end # If statement -if( conditional - if_branch - else_branch ) - -if( conditional - if_branch ) +#if conditional +# if_branch +#else +# else_branch +#end +# +#if conditional +# if_branch +#end # Infix operator expression (1 add 1) ((1 add 1) add 1) (1 add (1 add 1)) +(1 - (1 + 1)) # Macros -#macro( -# [(_ a ':=' b) -# define(a b)] -# [(_ a '=' b) -# set(a b)]) +#macro let (:= =) ; +# (a := b) +# define a b end +# (a = b) +# set! a b end +#end # +#let foo := "bar" ; +#let foo = 5 ; diff --git a/source/dllexer/dllexer.cpp b/source/dllexer/dllexer.cpp index a3fd9e7..8385b6b 100644 --- a/source/dllexer/dllexer.cpp +++ b/source/dllexer/dllexer.cpp @@ -4,7 +4,7 @@ using namespace std; -DLLexer::DLLexer(std::istream& in) : LLNLexer(in) +DLLexer::DLLexer(std::istream& in) : LLNLexer(in), terminator_string("end") { } @@ -22,9 +22,9 @@ bool DLLexer::isLetter(void) ((lookahead(1) >= 'A') && (lookahead(1) <= 'Z')); } -bool DLLexer::isDigit(void) +bool DLLexer::isDigit(char lach) { - return ((lookahead(1) >= '0') && (lookahead(1) <= '9')); + return ((lach >= '0') && (lach <= '9')); } bool DLLexer::isStringChar(void) @@ -34,6 +34,16 @@ bool DLLexer::isStringChar(void) && (lookahead(1) != '\n')); } +void DLLexer::terminator(std::string term) +{ + terminator_string = term; +} + +std::string DLLexer::terminator(void) +{ + return terminator_string; +} + Token DLLexer::next(void) { Token ret; @@ -45,43 +55,48 @@ Token DLLexer::next(void) // If we have non-EOF chars then process them while ( !eof() && (ret.type() == EOF) ) { + // Consume whitespace if (isWhiteSpace()) { WS(); } + + // Consume and ignore comments else if(lookahead(1) == '#') { COMMENT(); } - else if (isDigit()) + + // Consume positive or negative numbers + else if ( isDigit( lookahead(1) ) ) { Number(ret,false); } - else if(lookahead(1) == '-') + else if( (lookahead(1) == '-') && isDigit( lookahead(2) ) ) { consume(); - if(isDigit()) - { - Number(ret,true); - } - else - { - throw Exception(line,column); - } + Number(ret,true); } + + // Consume character literals else if(lookahead(1) == '\'') { Char(ret); } + + // Consume string literals else if(lookahead(1) == '"') { String(ret); } + + // Consume symbol literals else if(lookahead(1) == '$') { Symbol(ret); } - //* + + // Consume parentheses else if (lookahead(1) == '(') { consume(); @@ -92,10 +107,23 @@ Token DLLexer::next(void) consume(); ret = Token( RPAR, ")", line, column ); } - // */ + + // Everything else (except the unescaped terminator) is considered an ID else { + bool escaped = false; + if ( lookahead(1) == '\\' ) + { + consume(); + escaped = true; + } + Id(ret); + + if( escaped && (ret.text().compare( terminator_string ) == 0) ) + { + ret.type( TERM ); + } } } @@ -129,7 +157,10 @@ void DLLexer::Id(Token& tok) oss << lookahead(1); consume(); } - while(isLetter() || isDigit() || lookahead(1) == '_'); + while( !isWhiteSpace() && + ('(' != lookahead(1)) && + (')' != lookahead(1)) ); + //while(isLetter() || isDigit() || lookahead(1) == '_'); tok = Token(ID, oss.str(), line, column); } @@ -154,7 +185,7 @@ void DLLexer::Number(Token& tok, bool isNegative) consume(); } - if( isDigit() ) + if( isDigit( lookahead(1) ) ) { // Capture the integer part do @@ -162,7 +193,7 @@ void DLLexer::Number(Token& tok, bool isNegative) oss << lookahead(1); consume(); } - while(isDigit()); + while(isDigit( lookahead(1) )); } else { @@ -188,7 +219,7 @@ std::string DLLexer::FloatingPoint(bool isNegative) oss << lookahead(1); consume(); } - while(isDigit()); + while(isDigit(lookahead(1))); // Capture the decimal point if we have one if(lookahead(1) == '.') @@ -204,7 +235,7 @@ void DLLexer::Decimal(std::ostringstream& oss) oss << lookahead(1); consume(); - if(!isDigit()) + if(!isDigit(lookahead(1))) { Exception ex(line,column); ex << "Missing fractional portion of floating point number."; @@ -216,7 +247,7 @@ void DLLexer::Decimal(std::ostringstream& oss) oss << lookahead(1); consume(); } - while ( isDigit() ); + while ( isDigit(lookahead(1)) ); } void DLLexer::Char(Token& tok) @@ -267,7 +298,7 @@ void DLLexer::Symbol(Token& tok) oss << lookahead(1); consume(); } - while(isLetter() || isDigit() || lookahead(1) == '_'); + while(isLetter() || isDigit(lookahead(1)) || lookahead(1) == '_'); tok = Token( SYMBOL, oss.str(), line, column ); } diff --git a/source/dllexer/dllexer.h b/source/dllexer/dllexer.h index dfd3701..d68789c 100644 --- a/source/dllexer/dllexer.h +++ b/source/dllexer/dllexer.h @@ -7,24 +7,27 @@ typedef enum TokenTypes { // Symbols - PROGRAM = 0, - DEFINE = 1, - ASSIGN = 2, - LAMBDA = 3, - BEGIN = 4, - IF = 5, - APPLY = 6, - ID_LIST = 7, - EXP_LIST = 8, - LPAR = 9, - RPAR = 10, + PROGRAM = 0, + DEFINE = 1, + ASSIGN = 2, + LAMBDA = 3, + BEGIN = 4, + IF = 5, + APPLY = 6, + ID_LIST = 7, + EXP_LIST = 8, + MACRO = 9, + TRANSFORM = 10, + LPAR = 11, + RPAR = 12, + TERM = 13, // Datatypes - ID = 11, - NUM = 12, - CHAR = 13, - STRING = 14, - SYMBOL = 15, + ID = 14, + NUM = 15, + CHAR = 16, + STRING = 17, + SYMBOL = 18, } eTokenTypes; typedef struct { @@ -33,14 +36,18 @@ typedef struct { } SingleCharMatch_T; class DLLexer : public LLNLexer { + protected: + std::string terminator_string; public: DLLexer(std::istream& in); bool isWhiteSpace(void); bool isLetter(void); - bool isDigit(void); + bool isDigit(char lach); bool isStringChar(void); void WS(void); void COMMENT(void); + void terminator(std::string term); + std::string terminator(void); Token next(void); void Id(Token& tok); diff --git a/source/dlparser/dlparser.cpp b/source/dlparser/dlparser.cpp index 2c1eb78..e6d3403 100644 --- a/source/dlparser/dlparser.cpp +++ b/source/dlparser/dlparser.cpp @@ -6,22 +6,33 @@ using namespace std; DLParser::DLParser() : BTParser() { - pattern_types.insert( pair( "M", MAP_TYP )); - pattern_types.insert( pair( "V", VECT_TYP )); - pattern_types.insert( pair( "L", LIST_TYP )); - pattern_types.insert( pair( "B", BLK_TYP )); - pattern_types.insert( pair( "I", ID_TYP )); - pattern_types.insert( pair( "N", NUM_TYP )); - pattern_types.insert( pair( "C", CHAR_TYP )); - pattern_types.insert( pair( "St", STR_TYP )); - pattern_types.insert( pair( "Sy", SYM_TYP )); - pattern_types.insert( pair( "E", EXPR_TYP )); + core_forms.insert( pair("define", DEFINE) ); + core_forms.insert( pair("set!", ASSIGN) ); + core_forms.insert( pair("lambda", LAMBDA) ); + core_forms.insert( pair("begin", BEGIN) ); + core_forms.insert( pair("if", IF) ); + core_forms.insert( pair("macro", MACRO) ); } DLParser::~DLParser() { } +bool DLParser::isMacroName(void) +{ + return false; +} + +bool DLParser::isCoreFormName(void) +{ + return false; +} + +eTokenTypes DLParser::getCoreFormId(void) +{ + return (eTokenTypes)0; +} + void DLParser::parse(void) { result = Program(); @@ -29,7 +40,7 @@ void DLParser::parse(void) AST* DLParser::Program(void) { - AST* node = _new AST( PROGRAM ); + AST* node = new AST( PROGRAM ); while( lookaheadType(1) != EOF ) { node->addChild( Expression() ); @@ -41,103 +52,158 @@ AST* DLParser::Expression(void) { AST* ret = NULL; - // Expression := Application - // | Literal + // Expression := CoreForm + // | BasicExp // - // Application := define '(' ID Expression ')' - // | set '(' ID Expression ')' - // | lambda '(' IdList ExpList? ')' - // | begin '(' ExpList* ')' - // | if '(' Expression Expression Expression? ')' - // | ID '(' ExpList ')' + // CoreForm := 'define' ID Expression TERM + // | 'set' ID Expression TERM + // | 'lambda' IdList ExpList? TERM + // | 'begin' ExpList* TERM + // | 'if' Expression Expression Expression? TERM + // | 'macro' ID IdList ID ExpList TERM // - // ExpList := Expression+ - // - // IdList := '(' ID* ')' + // BasicExp := MacroName ExpList? TERM + // | '(' Expression ID Expression ')' + // | ID '(' ExpList ')' + // | Literal // // Literal := ID // | CHAR // | SYMBOL // | STRING // | NUMBER + // + // ExpList := Expression+ + // + // IdList := '(' ID* ')' + // - if ((ID == lookaheadType(1)) && (LPAR == lookaheadType(2))) + if( isCoreFormName() ) { - // Get the ID text - Token id = lookaheadToken(1); - std::string id_text = id.text(); - consume(); + ret = CoreForm(); + } + else + { + ret = Application(); + } - match(LPAR); - if (0 == id_text.compare("define")) - { - ret = new AST( lookaheadToken(1) ); - match(ID); - ret = new AST(DEFINE, 2, ret, Expression()); - } - else if (0 == id_text.compare("set")) - { + return ret; +} + +AST* DLParser::CoreForm(void) +{ + AST* ret = NULL; + eTokenTypes form_id = getCoreFormId(); + consume(); // Throw away the form name (we don't need it anymore) + switch( form_id ) + { + case DEFINE: + case ASSIGN: ret = new AST( lookaheadToken(1) ); match(ID); - ret = new AST(ASSIGN, 2, ret, Expression()); - } - else if (0 == id_text.compare("lambda")) - { + ret = new AST(form_id, 2, ret, Expression()); + break; + + case LAMBDA: ret = new AST(LAMBDA, 2, IdList(), ExpList()); - } - else if (0 == id_text.compare("begin")) - { + break; + + case BEGIN: ret = new AST(BEGIN, 1, ExpList()); - } - else if (0 == id_text.compare("if")) - { + break; + + case IF: ret = new AST(IF, 2, Expression(), Expression()); if(lookaheadType(1) != RPAR) { ret->addChild( Expression() ); } - } - //else if (0 == id_text.compare("macro")) - //{ - //} - else - { - ret = new AST( id ); - ret = new AST(APPLY, 2, ret, ExpList()); - } + break; + + case MACRO: + ret = new AST(MACRO); + + // Get the macro name + ret->addChild( new AST( lookaheadToken(1) ) ); + match(ID); + + // Get the macro keywords + ret->addChild( IdList() ); + + // Get the macro terminator + ret->addChild( new AST( lookaheadToken(1) ) ); + match(ID); + + // Get the macro transform rules + while (TERM != lookaheadType(1)) + { + AST* transform = new AST( TRANSFORM ); + transform->addChild( IdList() ); + transform->addChild( Expression() ); + ret->addChild( transform ); + } + break; + + default: + throw Exception( lookaheadToken(1) ); + break; + } + match(TERM); + return ret; +} + +AST* DLParser::Application(void) +{ + AST* ret = NULL; + + // Macro Expression + if ( isMacroName() ) + { + } + + // Traditional Function Application + else if( (lookaheadType(1) == ID) && (lookaheadType(2) == LPAR) ) + { + ret = new AST( lookaheadToken(1) ); + consume(); + match(LPAR); + ret = new AST(APPLY, 2, ret, ExpList()); match(RPAR); } - else if( LPAR == lookaheadType(1) ) + + // Infix Function Application + else if( lookaheadType(1) == LPAR ) { - AST* op = NULL; - AST* operand1 = NULL; - AST* operand2 = NULL; - Token op_tok; + AST* operation = NULL; + AST* operand1 = NULL; + AST* operand2 = NULL; - // Left Operand match(LPAR); operand1 = Expression(); - - // Operator - op_tok = lookaheadToken(1); + operation = new AST( lookaheadToken(1) ); match(ID); - op = new AST(op_tok); - - // Right Operand operand2 = Expression(); match(RPAR); - ret = new AST(APPLY, 3, op, operand1, operand2); + ret = new AST( APPLY, 2, operation, new AST(EXP_LIST, 2, operand1, operand2) ); } + + // Literal else { - ret = new AST( lookaheadToken(1) ); - consume(); + ret = Literal(); } return ret; } +AST* DLParser::Literal(void) +{ + AST* ret = new AST( lookaheadToken(1) ); + consume(); + return ret; +} + AST* DLParser::ExpList(void) { AST* ret = new AST(EXP_LIST); diff --git a/source/dlparser/dlparser.h b/source/dlparser/dlparser.h index 20d968e..2683d62 100644 --- a/source/dlparser/dlparser.h +++ b/source/dlparser/dlparser.h @@ -2,6 +2,7 @@ #define DLPARSER_H #include +#include #include "btparser.h" #include "dllexer.h" #include "macro.h" @@ -9,11 +10,14 @@ class DLParser : public BTParser { private: - std::map macros; - std::map pattern_types; + std::map core_forms; + std::set macros; public: DLParser(); ~DLParser(); + bool isMacroName(void); + bool isCoreFormName(void); + eTokenTypes getCoreFormId(void); void parse(void); bool isMacro(Token& token); bool speculate_GroupExpr(void); @@ -23,6 +27,7 @@ class DLParser : public BTParser // Entry Rules AST* Program(void); AST* Expression(void); + AST* CoreForm(void); AST* Application(void); AST* Literal(void); AST* ExpList(void);