From: Mike D. Lowis Date: Wed, 6 Jun 2012 19:21:32 +0000 (-0400) Subject: Simplified lexer and parser. The aim is to get minimal functionality working with... X-Git-Url: https://git.mdlowis.com/?a=commitdiff_plain;h=52cfdea852ac0e7411d51cb04fc33eca53d4879e;p=archive%2Fdlang.git Simplified lexer and parser. The aim is to get minimal functionality working with code generation and unit tests, then expand to support quoting and syntax extensions --- diff --git a/.gitmodules b/.gitmodules index 9c0ecd4..e17f8bc 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,6 @@ [submodule "deps/parse-utils"] path = deps/parse-utils url = git://github.com/mikedlowis/parse-utils.git +[submodule "deps/opts"] + path = deps/opts + url = https://github.com/mikedlowis/opts.git diff --git a/Makefile b/Makefile index 1bb68dc..5729fae 100644 --- a/Makefile +++ b/Makefile @@ -18,6 +18,13 @@ dlist = $(shell env find $(1) -type d -print) # Function for generating an include list incdirs = $(addprefix -I, $(call dlist, $(1))) +# Function for generating file dependencies +define make-depend + $(CXX) $(CXXFLAGS) -M $1 | \ + sed -e 's,\($(notdir $2)\) *:,$(dir $2)\1 $(subst .o,.d,$2): ,' \ + > $(subst .o,.d,$2) +endef + # Project and Artifact Names #--------------------------- PROJ_NAME = dlang @@ -34,7 +41,8 @@ SRC_EXT = cpp TEST_EXT = cpp # Libraries to Link Against -LIBS = deps/parse-utils/libparseutils.a +LIBS = deps/parse-utils/libparseutils.a \ + deps/opts/libopts.a TEST_LIBS = $(LIBS) \ tools/UnitTest++/libUnitTest++.a @@ -46,14 +54,19 @@ TEST_FILES = $(call flist, $(TESTS_ROOT), $(TEST_EXT)) SRC_OBJS = $(SRC_FILES:%.$(SRC_EXT)=%.o) TEST_OBJS = $(TEST_FILES:%.$(TEST_EXT)=%.o) +# Dependecy File Lists +SRC_DEPS = $(SRC_OBJS:%.o=%.d) +TEST_DEPS = $(TEST_OBJS:%.o=%.d) + # Include Directories INC_DIRS = $(call incdirs, $(SRC_ROOT)) \ $(call incdirs, deps/parse-utils/source) \ + $(call incdirs, deps/opts/source) \ $(call incdirs, tools/UnitTest++/src) # Compiler and Linker Options #---------------------------- -CXXFLAGS = -c $(INC_DIRS) -Wall -Werror +CXXFLAGS = -c $(INC_DIRS) -Wall -Wextra -Werror TEST_CXXFLAGS = -c $(INC_DIRS) -Wall # Build Rules @@ -63,35 +76,51 @@ all: release test release: $(PROJ_NAME) test: $(TEST_RUNNER) - ./$(TEST_RUNNER) + @echo Running unit tests... + @./$(TEST_RUNNER) # Binaries -$(PROJ_NAME): parseutils $(SRC_OBJS) - $(CXX) -o $@ $(SRC_OBJS) $(LIBS) +$(PROJ_NAME): parseutils opts $(SRC_OBJS) + @echo Linking $@... + @$(CXX) -o $@ $(SRC_OBJS) $(LIBS) -$(TEST_RUNNER): parseutils unit_test_pp $(SRC_OBJS) $(TEST_OBJS) - $(CXX) -o $@ $(filter-out source/main.o,$(SRC_OBJS)) $(TEST_OBJS) $(TEST_LIBS) +$(TEST_RUNNER): parseutils opts unit_test_pp $(SRC_OBJS) $(TEST_OBJS) + @echo Linking $@... + @$(CXX) -o $@ $(filter-out source/main.o,$(SRC_OBJS)) $(TEST_OBJS) $(TEST_LIBS) # Libraries parseutils: - $(MAKE) -C deps/parse-utils static + @$(MAKE) -C deps/parse-utils static + +opts: + @$(MAKE) -C deps/opts release unit_test_pp: - $(MAKE) -C tools/UnitTest++ + @$(MAKE) -C tools/UnitTest++ # Object Files $(SRC_OBJS): %.o : %.$(SRC_EXT) - $(CXX) $(CXXFLAGS) -o $@ $< + @echo $< + @$(call make-depend,$<,$@) + @$(CXX) $(CXXFLAGS) -o $@ $< $(TEST_OBJS): %.o : %.$(TEST_EXT) - $(CXX) $(TEST_CXXFLAGS) -o $@ $< + @echo $< + @$(call make-depend,$<,$@) + @$(CXX) $(TEST_CXXFLAGS) -o $@ $< # Cleanup clean: - $(MAKE) -C deps/parse-utils clean - $(MAKE) -C tools/UnitTest++ clean - $(RM) $(SRC_OBJS) - $(RM) $(TEST_OBJS) - $(RM) $(TEST_RUNNER)* - $(RM) $(PROJ_NAME)* + @$(MAKE) -C deps/parse-utils clean + @$(MAKE) -C deps/opts clean + @$(MAKE) -C tools/UnitTest++ clean + @$(RM) $(SRC_OBJS) + @$(RM) $(TEST_OBJS) + @$(RM) $(SRC_DEPS) + @$(RM) $(TEST_DEPS) + @$(RM) $(TEST_RUNNER)* + @$(RM) $(PROJ_NAME)* + +-include $(SRC_DEPS) +-include $(TEST_DEPS) diff --git a/deps/opts b/deps/opts new file mode 160000 index 0000000..0fd4135 --- /dev/null +++ b/deps/opts @@ -0,0 +1 @@ +Subproject commit 0fd4135d6ed05bb0f8f010644de0c36c58e1acb3 diff --git a/example.dl b/example.dl index 6411a0e..fb12525 100644 --- a/example.dl +++ b/example.dl @@ -18,8 +18,16 @@ foo(1 2 3) (foo . $bar)(1 2 3) # Definition and assignment -define foo 5; -set! foo 6; +def foo 5 +set foo 6 + +def max lambda(a b) + if (False == (a > b)) + a + #else + b + end +end # If statement if conditional diff --git a/source/dllexer/dllexer.cpp b/source/dllexer/dllexer.cpp index eca4a16..3d58fd1 100644 --- a/source/dllexer/dllexer.cpp +++ b/source/dllexer/dllexer.cpp @@ -4,7 +4,7 @@ using namespace std; -DLLexer::DLLexer(std::istream& in) : LLNLexer(in), terminator_string("end") +DLLexer::DLLexer(std::istream& in) : LLNLexer(in) { } @@ -37,16 +37,6 @@ bool DLLexer::isIDChar(void) (EOF != lookahead(1)) ); } -void DLLexer::terminator(std::string term) -{ - terminator_string = term; -} - -std::string DLLexer::terminator(void) -{ - return terminator_string; -} - Token DLLexer::next(void) { Token ret; @@ -125,7 +115,7 @@ Token DLLexer::next(void) } } - if( !escaped && (ret.text().compare( terminator_string ) == 0) ) + if( !escaped && (ret.text().compare( "end" ) == 0) ) { ret.type( TERM ); } diff --git a/source/dllexer/dllexer.h b/source/dllexer/dllexer.h index a068bed..693ffc8 100644 --- a/source/dllexer/dllexer.h +++ b/source/dllexer/dllexer.h @@ -42,8 +42,6 @@ typedef struct { } SingleCharMatch_T; class DLLexer : public LLNLexer { - protected: - std::string terminator_string; public: DLLexer(std::istream& in); bool isWhiteSpace(void); @@ -52,8 +50,6 @@ class DLLexer : public LLNLexer { bool isStringChar(void); void WS(void); void COMMENT(void); - void terminator(std::string term); - std::string terminator(void); Token next(void); void Id(Token& tok); diff --git a/source/dlparser/dlparser.cpp b/source/dlparser/dlparser.cpp index de53187..8c1be4b 100644 --- a/source/dlparser/dlparser.cpp +++ b/source/dlparser/dlparser.cpp @@ -7,8 +7,8 @@ using namespace std; DLParser::DLParser() : BTParser() { - core_forms["define"] = DEFINE; - core_forms["set!"] = ASSIGN; + core_forms["def"] = DEFINE; + core_forms["set"] = ASSIGN; core_forms["if"] = IF; core_forms["begin"] = BEGIN; core_forms["quote"] = QUOTE; @@ -82,77 +82,72 @@ AST* DLParser::Expression(void) AST* DLParser::CoreForm(void) { AST* ret = NULL; - std::string term = ((DLLexer*)lexer)->terminator(); eTokenTypes form_id = getCoreFormId(); consume(); // Throw away the form name (we don't need it anymore) switch( form_id ) { case DEFINE: case ASSIGN: - ((DLLexer*)lexer)->terminator(";"); ret = new AST( lookaheadToken(1) ); match(ID); ret = new AST(form_id, 2, ret, Expression()); break; case BEGIN: - ((DLLexer*)lexer)->terminator("end"); ret = new AST(BEGIN, 1, ExpList(TERM)); + match(TERM); break; case IF: - ((DLLexer*)lexer)->terminator("end"); ret = new AST(IF, 2, Expression(), Expression()); if(lookaheadType(1) != TERM) { ret->addChild( Expression() ); } + match(TERM); break; - //case QUOTE: - // match(LPAR); - // ret = new AST(QUOTE, 1, Expression()); - // ((DLLexer*)lexer)->terminator(")"); - // break; - case LAMBDA: case MACRO: - ((DLLexer*)lexer)->terminator("end"); ret = new AST(form_id, 2, IdList(), ExpList(TERM)); + match(TERM); break; - case SYNTAX: - ((DLLexer*)lexer)->terminator("end"); - ret = new AST(SYNTAX); + //case QUOTE: + // match(LPAR); + // ret = new AST(QUOTE, 1, Expression()); + // break; - // Get the macro name - ret->addChild( new AST( lookaheadToken(1) ) ); - match(ID); + //case SYNTAX: + // ret = new AST(SYNTAX); - // Get the macro keywords - ret->addChild( IdList() ); + // // Get the macro name + // ret->addChild( new AST( lookaheadToken(1) ) ); + // match(ID); - // Get the macro terminator - ret->addChild( new AST( lookaheadToken(1) ) ); - match(ID); + // // Get the macro keywords + // ret->addChild( IdList() ); - // Get the macro transform rules - while (TERM != lookaheadType(1)) - { - AST* transform = new AST( TRANSFORM ); - transform->addChild( IdList() ); - transform->addChild( Expression() ); - ret->addChild( transform ); - } - break; + // // Get the macro terminator + // ret->addChild( new AST( lookaheadToken(1) ) ); + // match(ID); + + // // Get the macro transform rules + // while (TERM != lookaheadType(1)) + // { + // AST* transform = new AST( TRANSFORM ); + // transform->addChild( IdList() ); + // transform->addChild( Expression() ); + // ret->addChild( transform ); + // } + // break; + case SYNTAX: case QUOTE: default: throw Exception( lookaheadToken(1) ); break; } - match(TERM); - ((DLLexer*)lexer)->terminator( term ); return ret; } @@ -166,12 +161,8 @@ AST* DLParser::BasicExp(void) AST* ret = NULL; // Macro Expression - if ( isSyntaxName() ) + /*if ( isSyntaxName() ) { - // Save current terminator - - // Register the new terminator - // Consume the name ret = new AST( EXPAND, 1, new AST( lookaheadToken(1) )); consume(); @@ -182,12 +173,10 @@ AST* DLParser::BasicExp(void) ret->addChild( Expression() ); } match(TERM); - - // Reset the terminator to its old value } // Infix Function Application - else if( lookaheadType(1) == LPAR ) + else*/ if( lookaheadType(1) == LPAR ) { AST* operation = NULL; AST* operand1 = NULL; diff --git a/source/main.cpp b/source/main.cpp index 781db54..69b70b6 100644 --- a/source/main.cpp +++ b/source/main.cpp @@ -3,7 +3,6 @@ #include #include #include "dlparser.h" -#include "scheme.h" #include "common.h" #include "options.h" #include "astprinter.h" @@ -31,8 +30,6 @@ int main(int argc, char** argv) // Setup Parser and Visitors DLParser parser; - //Scheme printer(output); - //Scheme debug_printer(std::cout); ASTPrinter debug_printer; parser.input(new DLLexer(input)); @@ -41,17 +38,15 @@ int main(int argc, char** argv) // Post process the AST (converts to scheme and prints to output file) parser.process( debug_printer ); - //parser.process( printer ); // Close the output file output.close(); // Compile the temporary file with chicken scheme //system( string("csc -O5 -v " + temp_fname).c_str() ); - (void)temp_fname; - //cout << "Removing temporary files..." << endl; - //(void)remove( temp_fname.c_str() ); + cout << "Removing temporary files..." << endl; + (void)remove( temp_fname.c_str() ); } else { diff --git a/source/visitors/expprocessor.cpp b/source/visitors/expprocessor.cpp index 91decf3..e5e1f01 100644 --- a/source/visitors/expprocessor.cpp +++ b/source/visitors/expprocessor.cpp @@ -6,14 +6,19 @@ ExpProcessor::ExpProcessor(std::map &syntaxes) : syntax_reg void ExpProcessor::beforeVisit(AST* cur, int depth) { + (void)cur; + (void)depth; } void ExpProcessor::afterVisit(AST* cur, int depth) { + (void)cur; + (void)depth; } void ExpProcessor::beforeChildren(AST* cur, int depth) { + (void)depth; // If we reached a syntax use then expand it if (cur->type() == EXPAND) { @@ -23,6 +28,7 @@ void ExpProcessor::beforeChildren(AST* cur, int depth) void ExpProcessor::afterChildren(AST* cur, int depth) { + (void)depth; // If we have a new syntax definition then register it if (cur->type() == SYNTAX) { @@ -35,9 +41,13 @@ void ExpProcessor::afterChildren(AST* cur, int depth) void ExpProcessor::beforeChild(AST* cur, int depth) { + (void)cur; + (void)depth; } void ExpProcessor::afterChild(AST* cur, int depth) { + (void)cur; + (void)depth; } diff --git a/source/visitors/scheme/scheme.cpp b/source/visitors/scheme/scheme.cpp deleted file mode 100644 index 0899262..0000000 --- a/source/visitors/scheme/scheme.cpp +++ /dev/null @@ -1,317 +0,0 @@ -#include -#include "scheme.h" -#include "exception.h" - -using namespace std; - -//extern char binary_res_environment_scm_start; - -Scheme::Scheme(std::ostream& out) : IVisitor(), stream(out) { - ifstream env_file("res/environment.scm"); - while( !env_file.eof() ) - { - stream << (char)env_file.get(); - } - env_file.close(); - // Print scheme environment to output stream - //stream << &binary_res_environment_scm_start << endl; -} - -string Scheme::typeToString(ASTNodeType type) -{ - ostringstream ret; - - switch (type) - { - //case ID: - // ret << "ID "; break; - //case NUM: - // ret << "NUM "; break; - //case MAP: - // ret << "MAP "; break; - //case CHAR: - // ret << "CHAR "; break; - //case ADD: - // ret << "+ "; break; - //case SUB: - // ret << "- "; break; - //case MUL: - // ret << "* "; break; - //case DIV: - // ret << "/ "; break; - //case AND: - // ret << "and "; break; - //case OR: - // ret << "or "; break; - //case NOT: - // ret << "not "; break; - //case EQ: - // ret << "equal? "; break; - //case NE: - // ret << "NE "; break; - //case LT: - // ret << "< "; break; - //case GT: - // ret << "> "; break; - //case LTE: - // ret << "<= "; break; - //case GTE: - // ret << ">= "; break; - //case DEFN: - // ret << "define "; break; - //case ASSIGN: - // ret << "set! "; break; - //case MUTATE: - // ret << "obj-set! "; break; - //case PROGRAM: - // ret << "begin "; break; - //case VECTOR: - // ret << "vector "; break; - //case LIST: - // ret << "list "; break; - //case BLOCK: - // ret << "begin "; break; - //case FUNC: - // ret << "lambda "; break; - //case FN_CALL: - // ret << "apply "; break; - //case ARRY_IDX: - // ret << "ARRY_IDX "; break; - //case SEP: - // ret << "cons "; break; - //case MEMB: - // ret << "hash-table-ref "; break; - - //// Print nothing for the following nodes - //case MACRO: - //case PARAMS: - // break; - - // Print out the type id (this will probably cause an error but also - // alert us to the fact that it is not properly handled) - default: - ret << type; break; - } - - return ret.str(); -} - -void Scheme::beforeVisit(AST* cur, int depth) -{ -} - -void Scheme::afterVisit(AST* cur, int depth) -{ - stream << endl; -} - -void Scheme::beforeChildren(AST* cur, int depth) -{ - nodeTypeBeginAction(cur); - //if( cur->type() != MACRO ) - { - if( isDatatype( cur->type() ) ) - { - printDatatype( cur ); - } - else - { - stream << "(" << typeToString( cur->type() ) << cur->text(); - } - } -} - -void Scheme::afterChildren(AST* cur, int depth) -{ - nodeTypeEndAction(cur); - if( !isDatatype( cur->type() ) ) - { - stream << ")"; - } -} - -void Scheme::beforeChild(AST* cur, int depth) -{ - stream << endl; - for(int i = 0; i< depth; i++) - { - stream << " "; - } -} - -void Scheme::afterChild(AST* cur, int depth) -{ -} - -bool Scheme::isDatatype(ASTNodeType type) -{ - bool ret = false; - switch(type) - { - case ID: - case NUM: - case CHAR: - case STRING: - case SYMBOL: - ret = true; - break; - default: - break; - } - return ret; -} - -void Scheme::printDatatype(AST* cur) -{ - switch(cur->type()) - { - case ID: - stream << "dl/" << cur->text(); - break; - case NUM: - stream << cur->text(); - break; - case CHAR: - charToString( cur->text() ); - break; - case STRING: - stream << '"' << cur->text() << '"'; - break; - case SYMBOL: - stream << '\'' << cur->text(); - break; - default: - break; - } -} - -void Scheme::charToString(string ch) -{ - switch(ch.at(0)) - { - case ' ': - stream << "#\\space"; - break; - - // Convert escape sequence - case '\\': - switch ( ch.at(1) ) - { - case 'a': - stream << "#\\alarm"; - break; - - case 'b': - stream << "#\\backspace"; - break; - - case 'n': - stream << "#\\newline"; - break; - - case 'r': - stream << "#\\return"; - break; - - case 't': - stream << "#\\tab"; - break; - - case 'v': - stream << "#\\vtab"; - break; - - default: - stream << "#\\" << ch; - break; - }; - break; - - default: - stream << "#\\" << ch; - } -} - -void Scheme::nodeTypeBeginAction(AST* cur) -{ - std::string text = cur->text(); - switch( cur->type() ) - { - // case MEMB: - // cur->children()->back()->type( SYMBOL ); - // break; - - // case BLOCK: - // scope_stack.startScope(); - // break; - - // case DEFN: - // defineSymbol(cur); - // break; - - // case ASSIGN: - // assignSymbol(cur); - // break; - - //TODO: Define builtin symbols and enable adding function args to scope - //case ID: - // referenceSymbol(cur); - // break; - - default: - break; - } -} - -void Scheme::nodeTypeEndAction(AST* cur) -{ - switch( cur->type() ) - { - // case BLOCK: - // scope_stack.stopScope(); - // break; - - default: - break; - } -} - -void Scheme::defineSymbol(AST* cur) -{ - string text = cur->children()->front()->text(); - if( scope_stack.lookup( text ) == NULL ) - { - scope_stack.define( text ); - } - else if ( scope_stack.isLocal(text) ) - { - Exception ex; - ex << "Redefining local symbol: '" << text << "'."; - throw ex; - } -} - -void Scheme::assignSymbol(AST* cur) -{ - if( cur->type() == ID ) - { - string text = cur->children()->front()->text(); - if( scope_stack.lookup( text ) == NULL ) - { - Exception ex; - ex << "Symbol '" << text << "' has not been defined in this scope."; - throw ex; - } - } -} - -void Scheme::referenceSymbol(AST* cur) -{ - string text = cur->text(); - if( scope_stack.lookup( text ) == NULL ) - { - Exception ex; - ex << "Symbol '" << text << "' has not been defined in this scope."; - throw ex; - } -} diff --git a/source/visitors/scheme/scheme.h b/source/visitors/scheme/scheme.h deleted file mode 100644 index 72e868a..0000000 --- a/source/visitors/scheme/scheme.h +++ /dev/null @@ -1,34 +0,0 @@ -#ifndef Scheme_H -#define Scheme_H - -#include -#include -#include "ivisitor.h" -#include "dllexer.h" -#include "scopestack.h" - -class Scheme : public IVisitor { - protected: - std::ostream& stream; - ScopeStack scope_stack; - public: - Scheme(std::ostream& in); - std::string typeToString(ASTNodeType type); - bool isDatatype(ASTNodeType type); - void printDatatype(AST* cur); - void charToString(std::string ch); - private: - void beforeVisit(AST* cur, int depth); - void afterVisit(AST* cur, int depth); - void beforeChildren(AST* cur, int depth); - void afterChildren(AST* cur, int depth); - void beforeChild(AST* cur, int depth); - void afterChild(AST* cur, int depth); - void nodeTypeBeginAction(AST* cur); - void nodeTypeEndAction(AST* cur); - void defineSymbol(AST* cur); - void assignSymbol(AST* cur); - void referenceSymbol(AST* cur); -}; - -#endif diff --git a/tests/test_dllexer.cpp b/tests/test_dllexer.cpp index 1bef778..50dcd94 100644 --- a/tests/test_dllexer.cpp +++ b/tests/test_dllexer.cpp @@ -269,24 +269,6 @@ namespace { delete lexer; } - TEST(Recognize_Overridden_Expression_Terminator) - { - DLLexer* lexer = SetupLexer(";"); - lexer->terminator( ";" ); - CHECK_TOKEN( TERM, ";", 1, 1 ); - CHECK_TOKEN( EOF, "", -1, -1 ); - delete lexer; - } - - TEST(Recognize_Overridden_Punctuation_Expression_Terminator) - { - DLLexer* lexer = SetupLexer(")"); - lexer->terminator( ")" ); - CHECK_TOKEN( TERM, ")", 1, 1 ); - CHECK_TOKEN( EOF, "", -1, -1 ); - delete lexer; - } - //------------------------------------------------------------------------- // Test General Lexer Corner Cases //-------------------------------------------------------------------------