From f1b8ff9c87cc7e8b763d649eb81364b7a159a777 Mon Sep 17 00:00:00 2001 From: "Michael D. Lowis" Date: Fri, 19 Sep 2014 14:41:46 -0400 Subject: [PATCH] Checkin new parser files --- source/sclpl/ast.c | 55 ++- source/sclpl/ast.h | 9 +- source/sclpl/grammar.c | 62 ++- source/sclpl/grammar.y | 30 +- source/sclpl/lexer.c | 169 ++++++++ source/sclpl/lexer.h | 38 ++ source/sclpl/main.c | 294 +++++++++----- source/sclpl/mpc.c | 870 ++++++++++++++++++++--------------------- source/sclpl/scanner.c | 123 ++++++ source/sclpl/scanner.h | 28 ++ 10 files changed, 1052 insertions(+), 626 deletions(-) create mode 100644 source/sclpl/lexer.c create mode 100644 source/sclpl/lexer.h create mode 100644 source/sclpl/scanner.c create mode 100644 source/sclpl/scanner.h diff --git a/source/sclpl/ast.c b/source/sclpl/ast.c index a94aec4..e45739f 100644 --- a/source/sclpl/ast.c +++ b/source/sclpl/ast.c @@ -5,7 +5,6 @@ $HeadURL$ */ #include "ast.h" -#include void ast_free(void* p_obj) { @@ -14,60 +13,59 @@ void ast_free(void* p_obj) ast_t* ast_new(ast_type_t type, void* value) { - ast_t* p_ast = (ast_t*)malloc(sizeof(ast_t)); - p_ast->pos = NULL; - p_ast->type = type; - p_ast->value = value; - p_ast->children = NULL; - return p_ast; + (void)type; + (void)value; + return NULL; } void ast_set_pos(ast_t* p_ast, const char* file, size_t line, size_t col) { - pos_t* p_pos = (pos_t*)malloc(sizeof(pos_t)); - p_pos->file = file; - p_pos->line = line; - p_pos->column = col; - p_ast->pos = p_pos; + (void)p_ast; + (void)file; + (void)line; + (void)col; } const pos_t* ast_get_pos(ast_t* p_ast) { - return p_ast->pos; + (void)p_ast; + return NULL; } void ast_set_type(ast_t* p_ast, ast_type_t type) { - p_ast->type = type; + (void)p_ast; + (void)type; } ast_type_t ast_get_type(ast_t* p_ast) { - return p_ast->type; + (void)p_ast; + return UNKNOWN; } void ast_set_value(ast_t* p_ast, void* value) { - p_ast->value = value; + (void)p_ast; + (void)value; } const void* ast_get_value(ast_t* p_ast) { - return p_ast->value; + (void)p_ast; + return NULL; +} + +void ast_set_children(ast_t* p_ast, child_t* p_children) +{ + (void)p_ast; + (void)p_children; } -ast_t* ast_add_child(ast_t* p_ast, ast_t* p_child) +const child_t* ast_get_children(ast_t* p_ast) { - child_t* child = p_ast->children; - child_t* newchild = (child_t*)malloc(sizeof(child)); - newchild->ast = p_ast; - newchild->next = NULL; - if (child != NULL) { - while (child->next != NULL) child = child->next; - child->next = newchild; - } else { - p_ast->children = newchild; - } + (void)p_ast; + return NULL; } ast_t* ast_map(const ast_t* p_ast, ast_map_fn_t p_fn) @@ -76,4 +74,3 @@ ast_t* ast_map(const ast_t* p_ast, ast_map_fn_t p_fn) (void)p_fn; return NULL; } - diff --git a/source/sclpl/ast.h b/source/sclpl/ast.h index 12f11c6..366b7ab 100644 --- a/source/sclpl/ast.h +++ b/source/sclpl/ast.h @@ -13,15 +13,13 @@ typedef enum { BOOLEAN, INTEGER, - FLOAT, CHARACTER, STRING, - SEXPR, UNKNOWN } ast_type_t; typedef struct { - const char* file; + char* file; size_t line; size_t column; } pos_t; @@ -58,9 +56,10 @@ void ast_set_value(ast_t* p_ast, void* value); const void* ast_get_value(ast_t* p_ast); -ast_t* ast_add_child(ast_t* p_ast, ast_t* p_child); +void ast_set_children(ast_t* p_ast, child_t* p_children); -ast_t* ast_map(const ast_t* p_ast, ast_map_fn_t p_fn); +const child_t* ast_get_children(ast_t* p_ast); +ast_t* ast_map(const ast_t* p_ast, ast_map_fn_t p_fn); #endif /* AST_H */ diff --git a/source/sclpl/grammar.c b/source/sclpl/grammar.c index 0e5dc93..96105c1 100644 --- a/source/sclpl/grammar.c +++ b/source/sclpl/grammar.c @@ -1,36 +1,26 @@ -const char Grammar[] = -"" -"program : /^/ /$/ ;" -"" -"replexpr : /[^\\n]*\\n/ ;" -"" -"expr : | | | | | | | | ;" -"" -"sexpr : '(' ( )* ')' ;" -"" -"qexpr : ('\\'' | '`' | ',') ;" -"" -"atom : | | | | | | ;" -"" -"int : /[-+]?[0-9]+/ ;" -"" -"float : /[-+]?[0-9]+\\.[0-9]+/ ;" -"" -"radixnum : \"0b\" /[0-1]+/" -" | \"0o\" /[0-7]+/" -" | \"0d\" /[0-9]+/" -" | \"0x\" /[0-9a-fA-F]+/" -" ;" -"" -"ch : '\\\\' (\"space\"|\"newline\"|\"return\"|\"tab\"|\"vtab\")" -" | '\\\\' /./" -" ;" -"" -"str : '\"' /[^\"]*/ '\"' ;" -"" -"bool : \"True\" | \"False\" ;" -"" -"var : /[^() \\t\\r\\n#`'\"]+/ ;" -"" -"ws : '#' /[^\\n]*\\n/ | /[ \\t\\r\\n]*/ ;" -; +const char Grammar[] = +"" +"token : /^/ /$/ ;" +"" +"atom : | | | | | | ;" +"" +"punc : /['\"(){}\\[\\];,]/ ;" +"" +"floating : /[-+]?[0-9]+\\.[0-9]+/ ;" +"" +"integer : /[-+]?[0-9]+/ ;" +"" +"radixnum : \"0b\" /[0-1]+/" +" | \"0o\" /[0-7]+/" +" | \"0d\" /[0-9]+/" +" | \"0x\" /[0-9a-fA-F]+/" +" ;" +"" +"character : '\\\\' (\"space\"|\"newline\"|\"return\"|\"tab\"|\"vtab\")" +" | '\\\\' /./" +" ;" +"" +"boolean : \"true\" | \"false\" ;" +"" +"var : /.*/ ;" +; diff --git a/source/sclpl/grammar.y b/source/sclpl/grammar.y index 3ed52d9..cb195f5 100644 --- a/source/sclpl/grammar.y +++ b/source/sclpl/grammar.y @@ -1,19 +1,13 @@ -program : /^/ /$/ ; +token : /^/ /$/ ; -replexpr : /[^\n]*\n/ ; +atom : | | | | | | ; -expr : | | | | | | | | ; +punc : /['"(){}\[\];,]/ ; -sexpr : '(' ( )* ')' ; +floating : /[-+]?[0-9]+\.[0-9]+/ ; -qexpr : ('\'' | '`' | ',') ; - -atom : | | | | | | ; - -int : /[-+]?[0-9]+/ ; - -float : /[-+]?[0-9]+\.[0-9]+/ ; +integer : /[-+]?[0-9]+/ ; radixnum : "0b" /[0-1]+/ | "0o" /[0-7]+/ @@ -21,14 +15,10 @@ radixnum : "0b" /[0-1]+/ | "0x" /[0-9a-fA-F]+/ ; -ch : '\\' ("space"|"newline"|"return"|"tab"|"vtab") - | '\\' /./ - ; - -str : '"' /[^"]*/ '"' ; - -bool : "True" | "False" ; +character : '\\' ("space"|"newline"|"return"|"tab"|"vtab") + | '\\' /./ + ; -var : /[^() \t\r\n#`'"]+/ ; +boolean : "true" | "false" ; -ws : '#' /[^\n]*\n/ | /[ \t\r\n]*/ ; +var : /.*/ ; diff --git a/source/sclpl/lexer.c b/source/sclpl/lexer.c new file mode 100644 index 0000000..04c92e4 --- /dev/null +++ b/source/sclpl/lexer.c @@ -0,0 +1,169 @@ +/** + @file lexer.c + @brief See header for details + $Revision$ + $HeadURL$ + */ +#include "lexer.h" + +static lex_tok_t* lexer_translate(mpc_ast_t* p_tok_ast); +static lex_tok_t* lexer_punc(mpc_ast_t* p_tok_ast); +static lex_tok_t* lexer_radix(mpc_ast_t* p_tok_ast); +static lex_tok_t* lexer_integer(mpc_ast_t* p_tok_ast, int base); +static lex_tok_t* lexer_float(mpc_ast_t* p_tok_ast); +static lex_tok_t* lexer_char(mpc_ast_t* p_tok_ast); +static lex_tok_t* lexer_bool(mpc_ast_t* p_tok_ast); +static lex_tok_t* lexer_var(mpc_ast_t* p_tok_ast); +static lex_tok_t* lex_tok_new(lex_tok_type_t type, void* val); +static int read_radix(const mpc_ast_t* t); + +/* Grammar is auto generated into 'source/grammar.c' */ +extern const char Grammar[]; + +lexer_t* lexer_new(char* p_prompt, FILE* p_input) { + lexer_t* p_lexer = (lexer_t*)malloc(sizeof(lexer_t)); + /* Build the token parser */ + mpc_parser_t* token = mpc_new("token"); + mpc_parser_t* atom = mpc_new("atom"); + mpc_parser_t* punc = mpc_new("punc"); + mpc_parser_t* floating = mpc_new("floating"); + mpc_parser_t* integer = mpc_new("integer"); + mpc_parser_t* radix_num = mpc_new("radixnum"); + mpc_parser_t* character = mpc_new("character"); + mpc_parser_t* boolean = mpc_new("boolean"); + mpc_parser_t* variable = mpc_new("var"); + mpca_lang(MPCA_LANG_DEFAULT, Grammar, + token, atom, punc, floating, integer, radix_num, character, boolean, variable, NULL); + /* Build the Lexer */ + p_lexer->lexrule = token; + p_lexer->scanner = scanner_new(p_prompt, p_input); + return p_lexer; +} + +lex_tok_t* lexer_read(lexer_t* p_lexer) { + mpc_result_t r; + lex_tok_t* p_tok = NULL; + char* text = scanner_read(p_lexer->scanner); + if (mpc_parse("", text, p_lexer->lexrule, &r)) { + mpc_ast_print(((mpc_ast_t*)r.output)->children[1]); + p_tok = lexer_translate( ((mpc_ast_t*)r.output)->children[1] ); + mpc_ast_delete(r.output); + } else { + mpc_err_print(r.error); + mpc_err_delete(r.error); + } + free(text); + return p_tok; +} + +lex_tok_t* lexer_translate(mpc_ast_t* p_tok_ast) { + lex_tok_t* p_tok = (lex_tok_t*)malloc(sizeof(lex_tok_t)); + if (0 == strncmp("atom|punc", p_tok_ast->tag, 9)) { + p_tok = lexer_punc(p_tok_ast); + } else if (0 == strncmp("radixnum", p_tok_ast->tag, 8)) { + p_tok = lexer_radix(p_tok_ast); + } else if (0 == strncmp("atom|integer", p_tok_ast->tag, 12)) { + p_tok = lexer_integer(p_tok_ast, 10); + } else if (0 == strncmp("atom|floating", p_tok_ast->tag, 13)) { + p_tok = lexer_float(p_tok_ast); + } else if (0 == strncmp("character", p_tok_ast->tag, 9)) { + p_tok = lexer_char(p_tok_ast); + } else if (0 == strncmp("atom|boolean", p_tok_ast->tag, 12)) { + p_tok = lexer_bool(p_tok_ast); + } else if (0 == strncmp("atom|var", p_tok_ast->tag, 8)) { + p_tok = lexer_var(p_tok_ast); + } else { + puts("unknown"); + } + return p_tok; +} + +lex_tok_t* lexer_punc(mpc_ast_t* p_tok_ast) +{ + lex_tok_t* p_tok = NULL; + switch (p_tok_ast->contents[0]) { + case '(': p_tok = lex_tok_new(LPAR, NULL); break; + case ')': p_tok = lex_tok_new(RPAR, NULL); break; + case '{': p_tok = lex_tok_new(LBRACE, NULL); break; + case '}': p_tok = lex_tok_new(RBRACE, NULL); break; + case '[': p_tok = lex_tok_new(LBRACK, NULL); break; + case ']': p_tok = lex_tok_new(RBRACK, NULL); break; + case ';': p_tok = lex_tok_new(END, NULL); break; + case ',': p_tok = lex_tok_new(COMMA, NULL); break; + } + return p_tok; +} + +lex_tok_t* lexer_radix(mpc_ast_t* p_tok_ast) +{ + return lexer_integer(p_tok_ast->children[1], read_radix(p_tok_ast)); +} + +lex_tok_t* lexer_integer(mpc_ast_t* p_tok_ast, int base) +{ + long* p_int = (long*)malloc(sizeof(long)); + errno = 0; + *p_int = strtol(p_tok_ast->contents, NULL, base); + assert(errno == 0); + return lex_tok_new(INT, p_int); +} + +lex_tok_t* lexer_float(mpc_ast_t* p_tok_ast) +{ + double* p_dbl = (double*)malloc(sizeof(double)); + errno = 0; + *p_dbl = strtod(p_tok_ast->contents, NULL); + assert(errno == 0); + return lex_tok_new(FLOAT, p_dbl); +} + +lex_tok_t* lexer_char(mpc_ast_t* p_tok_ast) +{ + lex_tok_t* p_tok = NULL; + static const char* lookup_table[5] = { + " \0space", + "\n\0newline", + "\r\0return", + "\t\0tab", + "\v\0vtab" + }; + if (strlen(p_tok_ast->contents) == 1) { + p_tok = lex_tok_new(CHAR, (void*)(p_tok_ast->contents[0])); + } else { + for(int i = 0; i < 5; i++) { + if (strcmp(p_tok_ast->contents, &(lookup_table[i][2]))) { + p_tok = lex_tok_new(CHAR, (void*)(lookup_table[i][0])); + break; + } + } + } + return p_tok; +} + +lex_tok_t* lexer_bool(mpc_ast_t* p_tok_ast) +{ + return lex_tok_new(BOOL, (void*)((0==strcmp(p_tok_ast->contents,"True")) ? true : false)); +} + +lex_tok_t* lexer_var(mpc_ast_t* p_tok_ast) +{ + char* p_str = strdup(p_tok_ast->contents); + return lex_tok_new(VAR, p_str); +} + +lex_tok_t* lex_tok_new(lex_tok_type_t type, void* val) { + lex_tok_t* p_tok = (lex_tok_t*)malloc(sizeof(lex_tok_t)); + p_tok->type = type; + p_tok->value = val; + return p_tok; +} + +static int read_radix(const mpc_ast_t* t) { + switch( t->children[0]->contents[1] ) { + case 'b': return 2; + case 'o': return 8; + case 'd': return 10; + case 'h': return 16; + default: return 10; + } +} diff --git a/source/sclpl/lexer.h b/source/sclpl/lexer.h new file mode 100644 index 0000000..9233b91 --- /dev/null +++ b/source/sclpl/lexer.h @@ -0,0 +1,38 @@ +/** + @file lexer.h + @brief TODO: Describe this file + $Revision$ + $HeadURL$ + */ +#ifndef LEXER_H +#define LEXER_H + +#include "scanner.h" +#include "mpc.h" +#include +#include +#include + +typedef struct { + mpc_parser_t* lexrule; + scanner_t* scanner; +} lexer_t; + +typedef enum { + END, STRING, CHAR, INT, FLOAT, BOOL, LBRACE, RBRACE, LBRACK, RBRACK, LPAR, + RPAR, COMMA, VAR +} lex_tok_type_t; + +typedef struct { + lex_tok_type_t type; + const char* file; + size_t line; + size_t column; + void* value; +} lex_tok_t; + +lexer_t* lexer_new(char* p_prompt, FILE* p_input); + +lex_tok_t* lexer_read(lexer_t* p_lexer); + +#endif /* LEXER_H */ diff --git a/source/sclpl/main.c b/source/sclpl/main.c index a739c37..430c260 100644 --- a/source/sclpl/main.c +++ b/source/sclpl/main.c @@ -1,133 +1,225 @@ #include "mpc.h" -#include "ast.h" +#include "scanner.h" +#include "lexer.h" #include -#include -/* SCLPL Parser - *****************************************************************************/ -/* Grammar is auto generated into 'source/grammar.c' */ -extern const char Grammar[]; +/*****************************************************************************/ +typedef struct { + lexer_t* p_lexer; + lex_tok_t* p_tok; +} parser_t; -static ast_t* read_sexpr(const mpc_ast_t* t) { - (void)t; - return NULL; +parser_t* parser_new(char* p_prompt, FILE* input) +{ + parser_t* p_parser = (parser_t*)malloc(sizeof(parser_t)); + p_parser->p_lexer = lexer_new(p_prompt, input); + p_parser->p_tok = NULL; + return p_parser; } -static ast_t* read_qexpr(const mpc_ast_t* t) { - (void)t; - return NULL; +void parser_fetch(parser_t* p_parser) +{ + p_parser->p_tok = lexer_read(p_parser->p_lexer); } -static ast_t* read_char(const mpc_ast_t* t) { - (void)t; - return NULL; +lex_tok_t* parser_peek(parser_t* p_parser) +{ + if (NULL == p_parser->p_tok) + parser_fetch(p_parser); + return p_parser->p_tok; } -static ast_t* read_string(const mpc_ast_t* t) { - (void)t; - return NULL; +void parser_error(parser_t* p_parser, const char* p_text) +{ + (void)p_parser; + fprintf(stderr,"Error: %s\n",p_text); + exit(1); +} + +bool parser_accept(parser_t* p_parser, lex_tok_type_t type) +{ + bool ret = false; + if (parser_peek(p_parser)->type == type) { + p_parser->p_tok = NULL; + ret = true; + } + return ret; } -static ast_t* read_var(const mpc_ast_t* t) { - (void)t; - return NULL; +bool parser_accept_str(parser_t* p_parser, lex_tok_type_t type, const char* p_text) +{ + bool ret = false; + if ((parser_peek(p_parser)->type == type) && (0 == strcmp((char*)(p_parser->p_tok->value), p_text))) { + p_parser->p_tok = NULL; + ret = true; + } + return ret; +} + +bool parser_expect(parser_t* p_parser, lex_tok_type_t type) +{ + bool ret = false; + if (parser_accept(p_parser, type)) { + ret = true; + } else { + parser_error(p_parser, "Unexpected token"); + } + return ret; } -static ast_t* read_bool(const mpc_ast_t* t) { - (void)t; - return NULL; +bool parser_expect_str(parser_t* p_parser, lex_tok_type_t type, const char* p_text) +{ + bool ret = false; + if (parser_accept_str(p_parser, type, p_text)) { + ret = true; + } else { + parser_error(p_parser, "Unexpected token"); + } + return ret; } -static ast_t* read_float(const mpc_ast_t* t) { - double* p_dbl = (double*)malloc(sizeof(double)); - ast_t* p_ast = ast_new(FLOAT, p_dbl); - ast_set_pos(p_ast, "", t->state.row, t->state.col); - errno = 0; - *p_dbl = strtod(t->contents, NULL); - assert(errno == 0); - return p_ast; +/*****************************************************************************/ +void parser_toplevel(parser_t* p_parser); +void parser_import(parser_t* p_parser); +void parser_definition(parser_t* p_parser); +void parser_expression(parser_t* p_parser); +void parser_literal(parser_t* p_parser); +void parser_arglist(parser_t* p_parser); +void parser_if_stmnt(parser_t* p_parser); +void parser_fn_stmnt(parser_t* p_parser); + +void parser_toplevel(parser_t* p_parser) +{ + if (parser_accept_str(p_parser, VAR, "import")) + parser_import(p_parser); + else if (parser_accept_str(p_parser, VAR, "def")) + parser_definition(p_parser); + else if (p_parser->p_lexer->scanner->p_input == stdin) + parser_expression(p_parser); + else + parser_error(p_parser, "Unrecognized top-level form"); } -static ast_t* read_int(const mpc_ast_t* t, int base) { - long* p_int = (long*)malloc(sizeof(long)); - printf("reading int with base: %d\n", base); - ast_t* p_ast = ast_new(INTEGER, p_int); - ast_set_pos(p_ast, "", t->state.row, t->state.col); - errno = 0; - *p_int = strtol(t->contents, NULL, base); - assert(errno == 0); - return p_ast; +void parser_import(parser_t* p_parser) +{ + parser_expect(p_parser, VAR); + parser_expect(p_parser, END); } -static int read_radix(const mpc_ast_t* t) { - switch( t->children[0]->contents[1] ) { - case 'b': return 2; - case 'o': return 8; - case 'd': return 10; - case 'x': return 16; - default: return 10; +void parser_definition(parser_t* p_parser) +{ + parser_expect(p_parser,VAR); + if (parser_peek(p_parser)->type == LPAR) { + parser_fn_stmnt(p_parser); + } else { + parser_expression(p_parser); + parser_expect(p_parser,END); } } -ast_t* format_expr_ast(mpc_ast_t* expr) { - ast_t* p_ast = NULL; - - /* Handle the current node */ - if (0 == strcmp("sexpr|>", expr->tag)) { - } else if (0 == strcmp("qexpr|>", expr->tag)) { - } else if (0 == strcmp("radixnum|>", expr->tag)) { - p_ast = read_int(expr->children[1], read_radix(expr)); - printf("int: %d\n", *((long*)p_ast->value)); - } else if (0 == strcmp("expr|float|regex", expr->tag)) { - p_ast = read_float(expr); - printf("double: %f\n", *((double*)p_ast->value)); - } else if (0 == strcmp("expr|int|regex", expr->tag)) { - p_ast = read_int(expr,10); - printf("int: %d\n", *((long*)p_ast->value)); - } else if (0 == strcmp("ch|>", expr->tag)) { - } else if (0 == strcmp("str|>", expr->tag)) { - } else if (0 == strcmp("expr|bool|str", expr->tag)) { - } else if (0 == strcmp("expr|var|regex", expr->tag)) { +void parser_expression(parser_t* p_parser) +{ + if (parser_accept(p_parser, LPAR)) { + parser_expression(p_parser); + parser_accept(p_parser, RPAR); + } else if (parser_accept_str(p_parser, VAR, "if")) { + parser_if_stmnt(p_parser); + } else if (parser_accept_str(p_parser, VAR, "fn")) { + parser_fn_stmnt(p_parser); + } else if (parser_peek(p_parser)->type == VAR) { + parser_expect(p_parser, VAR); + if (parser_peek(p_parser)->type == LPAR) { + parser_arglist(p_parser); + } } else { - printf("unknown tag: '%s'\n", expr->tag); - free(p_ast->pos); - free(p_ast); - p_ast = NULL; + parser_literal(p_parser); + } +} + +void parser_literal(parser_t* p_parser) +{ + switch (parser_peek(p_parser)->type) + { + case BOOL: + case CHAR: + case STRING: + case INT: + case FLOAT: + parser_accept(p_parser, parser_peek(p_parser)->type); + break; + + default: + parser_error(p_parser, "Not a valid expression"); + break; } +} + +void parser_arglist(parser_t* p_parser) +{ + parser_expect(p_parser, LPAR); + while(parser_peek(p_parser)->type != RPAR) { + parser_expression(p_parser); + if(parser_peek(p_parser)->type != RPAR) + parser_expect(p_parser, COMMA); + } + parser_expect(p_parser, RPAR); +} - return p_ast; +void parser_if_stmnt(parser_t* p_parser) +{ + parser_expression(p_parser); + parser_expression(p_parser); + parser_expect_str(p_parser,VAR,"else"); + parser_expression(p_parser); + parser_expect(p_parser,END); +} + +void parser_fn_stmnt(parser_t* p_parser) +{ + parser_expect(p_parser, LPAR); + while(parser_peek(p_parser)->type != RPAR) { + parser_expect(p_parser, VAR); + if(parser_peek(p_parser)->type != RPAR) + parser_expect(p_parser, COMMA); + } + parser_expect(p_parser, RPAR); + while(parser_peek(p_parser)->type != END) { + parser_expression(p_parser); + } + parser_expect(p_parser, END); } +/* SCLPL Parser + *****************************************************************************/ +/* TODO: + + * Gracefully handle EOF + * Formalize grammar for parser + * Paren for function application must be on same line as variable in REPL + * "end" and ';' must be equivalent + * skip line on error and terminate after full program parse + * skip line and print on error but do not terminate the REPL + * Phase out use of MPC + * Integrate libcds + * Integrate command line parsing + +*/ + int main(int argc, char **argv) { - mpc_parser_t* ReplExpr = mpc_new("replexpr"); - mpc_parser_t* Expr = mpc_new("expr"); - mpc_parser_t* SExpr = mpc_new("sexpr"); - mpc_parser_t* QExpr = mpc_new("qexpr"); - mpc_parser_t* Atom = mpc_new("atom"); - mpc_parser_t* Int = mpc_new("int"); - mpc_parser_t* Float = mpc_new("float"); - mpc_parser_t* Radix = mpc_new("radixnum"); - mpc_parser_t* Char = mpc_new("ch"); - mpc_parser_t* String = mpc_new("str"); - mpc_parser_t* Bool = mpc_new("bool"); - mpc_parser_t* Var = mpc_new("var"); - mpc_parser_t* WS = mpc_new("ws"); - mpca_lang(MPCA_LANG_WHITESPACE_SENSITIVE, Grammar, - ReplExpr, Expr, SExpr, QExpr, Atom, Int, Float, Radix, Char, String, Bool, Var, WS, NULL); - while(!feof(stdin)) { - mpc_result_t r; - printf(":> "); - if (mpc_parse_pipe("", stdin, ReplExpr, &r)) { - mpc_ast_t* expr = (mpc_ast_t*)(((mpc_ast_t*)r.output)->children[1]); - mpc_ast_print(expr); - format_expr_ast(expr); - mpc_ast_delete(r.output); - } else { - mpc_err_print(r.error); - mpc_err_delete(r.error); - while('\n' != fgetc(stdin)){} - } + (void)argc; + (void)argv; + + //scanner_t* p_scanner = scanner_new(":> ", stdin); + //while(!scanner_eof(p_scanner)) { + // printf("TOK: '%s'\n", scanner_read(p_scanner) ); + // puts("OK."); + //} + + parser_t* p_parser = parser_new(":> ", stdin); + while(true) { + parser_toplevel(p_parser); + puts("OK."); } - mpc_cleanup(13, ReplExpr, Expr, SExpr, QExpr, Atom, Int, Float, Radix, Char, String, Bool, Var, WS); + return 0; } diff --git a/source/sclpl/mpc.c b/source/sclpl/mpc.c index 4e0d8cf..5b0c5c4 100644 --- a/source/sclpl/mpc.c +++ b/source/sclpl/mpc.c @@ -63,7 +63,7 @@ void mpc_err_delete(mpc_err_t *x) { for (i = 0; i < x->expected_num; i++) { free(x->expected[i]); } - + free(x->expected); free(x->filename); free(x->failure); @@ -71,26 +71,26 @@ void mpc_err_delete(mpc_err_t *x) { } static int mpc_err_contains_expected(mpc_err_t *x, char *expected) { - + int i; for (i = 0; i < x->expected_num; i++) { if (strcmp(x->expected[i], expected) == 0) { return 1; } } - + return 0; } static void mpc_err_add_expected(mpc_err_t *x, char *expected) { - + x->expected_num++; x->expected = realloc(x->expected, sizeof(char*) * x->expected_num); x->expected[x->expected_num-1] = malloc(strlen(expected) + 1); strcpy(x->expected[x->expected_num-1], expected); - + } static void mpc_err_clear_expected(mpc_err_t *x, char *expected) { - + int i; for (i = 0; i < x->expected_num; i++) { free(x->expected[i]); @@ -99,7 +99,7 @@ static void mpc_err_clear_expected(mpc_err_t *x, char *expected) { x->expected = realloc(x->expected, sizeof(char*) * x->expected_num); x->expected[0] = malloc(strlen(expected) + 1); strcpy(x->expected[0], expected); - + } void mpc_err_print(mpc_err_t *x) { @@ -125,13 +125,13 @@ void mpc_err_string_cat(char *buffer, int *pos, int *max, char *fmt, ...) { static char char_unescape_buffer[3]; static char *mpc_err_char_unescape(char c) { - + char_unescape_buffer[0] = '\''; char_unescape_buffer[1] = ' '; char_unescape_buffer[2] = '\''; - + switch (c) { - + case '\a': return "bell"; case '\b': return "backspace"; case '\f': return "formfeed"; @@ -145,48 +145,48 @@ static char *mpc_err_char_unescape(char c) { char_unescape_buffer[1] = c; return char_unescape_buffer; } - + } char *mpc_err_string(mpc_err_t *x) { - + char *buffer = calloc(1, 1024); int max = 1023; - int pos = 0; + int pos = 0; int i; - + if (x->failure) { mpc_err_string_cat(buffer, &pos, &max, - "%s: error: %s\n", + "%s: error: %s\n", x->filename, x->failure); return buffer; } - - mpc_err_string_cat(buffer, &pos, &max, + + mpc_err_string_cat(buffer, &pos, &max, "%s:%i:%i: error: expected ", x->filename, x->state.row+1, x->state.col+1); - + if (x->expected_num == 0) { mpc_err_string_cat(buffer, &pos, &max, "ERROR: NOTHING EXPECTED"); } if (x->expected_num == 1) { mpc_err_string_cat(buffer, &pos, &max, "%s", x->expected[0]); } if (x->expected_num >= 2) { - + for (i = 0; i < x->expected_num-2; i++) { mpc_err_string_cat(buffer, &pos, &max, "%s, ", x->expected[i]); - } - - mpc_err_string_cat(buffer, &pos, &max, "%s or %s", - x->expected[x->expected_num-2], + } + + mpc_err_string_cat(buffer, &pos, &max, "%s or %s", + x->expected[x->expected_num-2], x->expected[x->expected_num-1]); } - + mpc_err_string_cat(buffer, &pos, &max, " at "); mpc_err_string_cat(buffer, &pos, &max, mpc_err_char_unescape(x->recieved)); mpc_err_string_cat(buffer, &pos, &max, "\n"); - + return realloc(buffer, strlen(buffer) + 1); } static mpc_err_t *mpc_err_or(mpc_err_t** x, int n) { - + int i, j; mpc_err_t *e = malloc(sizeof(mpc_err_t)); e->state = mpc_state_invalid(); @@ -195,32 +195,32 @@ static mpc_err_t *mpc_err_or(mpc_err_t** x, int n) { e->failure = NULL; e->filename = malloc(strlen(x[0]->filename)+1); strcpy(e->filename, x[0]->filename); - + for (i = 0; i < n; i++) { if (x[i]->state.pos > e->state.pos) { e->state = x[i]->state; } } - + for (i = 0; i < n; i++) { - + if (x[i]->state.pos < e->state.pos) { continue; } - + if (x[i]->failure) { e->failure = malloc(strlen(x[i]->failure)+1); strcpy(e->failure, x[i]->failure); break; } - + e->recieved = x[i]->recieved; - + for (j = 0; j < x[i]->expected_num; j++) { if (!mpc_err_contains_expected(e, x[i]->expected[j])) { mpc_err_add_expected(e, x[i]->expected[j]); } } } - + for (i = 0; i < n; i++) { mpc_err_delete(x[i]); } - + return e; } @@ -229,20 +229,20 @@ static mpc_err_t *mpc_err_repeat(mpc_err_t *x, const char *prefix) { int i; char *expect = malloc(strlen(prefix) + 1); strcpy(expect, prefix); - + if (x->expected_num == 1) { expect = realloc(expect, strlen(expect) + strlen(x->expected[0]) + 1); strcat(expect, x->expected[0]); } - + if (x->expected_num > 1) { - + for (i = 0; i < x->expected_num-2; i++) { expect = realloc(expect, strlen(expect) + strlen(x->expected[i]) + strlen(", ") + 1); strcat(expect, x->expected[i]); strcat(expect, ", "); } - + expect = realloc(expect, strlen(expect) + strlen(x->expected[x->expected_num-2]) + strlen(" or ") + 1); strcat(expect, x->expected[x->expected_num-2]); strcat(expect, " or "); @@ -250,10 +250,10 @@ static mpc_err_t *mpc_err_repeat(mpc_err_t *x, const char *prefix) { strcat(expect, x->expected[x->expected_num-1]); } - + mpc_err_clear_expected(x, expect); free(expect); - + return x; } @@ -277,24 +277,24 @@ static mpc_err_t *mpc_err_count(mpc_err_t *x, int n) { */ /* -** In mpc the input type has three modes of +** In mpc the input type has three modes of ** operation: String, File and Pipe. ** -** String is easy. The whole contents are +** String is easy. The whole contents are ** loaded into a buffer and scanned through. -** The cursor can jump around at will making +** The cursor can jump around at will making ** backtracking easy. ** ** The second is a File which is also somewhat -** easy. The contents are never loaded into +** easy. The contents are never loaded into ** memory but backtracking can still be achieved ** by seeking in the file at different positions. ** ** The final mode is Pipe. This is the difficult -** one. As we assume pipes cannot be seeked - and -** only support a single character lookahead at -** any point, when the input is marked for a -** potential backtracking we start buffering any +** one. As we assume pipes cannot be seeked - and +** only support a single character lookahead at +** any point, when the input is marked for a +** potential backtracking we start buffering any ** input. ** ** This means that if we are requested to seek @@ -316,102 +316,102 @@ enum { typedef struct { int type; - char *filename; + char *filename; mpc_state_t state; - + char *string; char *buffer; FILE *file; - + int backtrack; int marks_num; mpc_state_t* marks; char* lasts; - + char last; - + } mpc_input_t; static mpc_input_t *mpc_input_new_string(const char *filename, const char *string) { mpc_input_t *i = malloc(sizeof(mpc_input_t)); - + i->filename = malloc(strlen(filename) + 1); strcpy(i->filename, filename); i->type = MPC_INPUT_STRING; - + i->state = mpc_state_new(); - + i->string = malloc(strlen(string) + 1); strcpy(i->string, string); i->buffer = NULL; i->file = NULL; - + i->backtrack = 1; i->marks_num = 0; i->marks = NULL; i->lasts = NULL; i->last = '\0'; - + return i; } static mpc_input_t *mpc_input_new_pipe(const char *filename, FILE *pipe) { mpc_input_t *i = malloc(sizeof(mpc_input_t)); - + i->filename = malloc(strlen(filename) + 1); strcpy(i->filename, filename); - + i->type = MPC_INPUT_PIPE; i->state = mpc_state_new(); - + i->string = NULL; i->buffer = NULL; i->file = pipe; - + i->backtrack = 1; i->marks_num = 0; i->marks = NULL; i->lasts = NULL; - + i->last = '\0'; - + return i; - + } static mpc_input_t *mpc_input_new_file(const char *filename, FILE *file) { - + mpc_input_t *i = malloc(sizeof(mpc_input_t)); - + i->filename = malloc(strlen(filename) + 1); strcpy(i->filename, filename); i->type = MPC_INPUT_FILE; i->state = mpc_state_new(); - + i->string = NULL; i->buffer = NULL; i->file = file; - + i->backtrack = 1; i->marks_num = 0; i->marks = NULL; i->lasts = NULL; - + i->last = '\0'; - + return i; } static void mpc_input_delete(mpc_input_t *i) { - + free(i->filename); - + if (i->type == MPC_INPUT_STRING) { free(i->string); } if (i->type == MPC_INPUT_PIPE) { free(i->buffer); } - + free(i->marks); free(i->lasts); free(i); @@ -421,47 +421,47 @@ static void mpc_input_backtrack_disable(mpc_input_t *i) { i->backtrack--; } static void mpc_input_backtrack_enable(mpc_input_t *i) { i->backtrack++; } static void mpc_input_mark(mpc_input_t *i) { - + if (i->backtrack < 1) { return; } - + i->marks_num++; i->marks = realloc(i->marks, sizeof(mpc_state_t) * i->marks_num); i->lasts = realloc(i->lasts, sizeof(char) * i->marks_num); i->marks[i->marks_num-1] = i->state; i->lasts[i->marks_num-1] = i->last; - + if (i->type == MPC_INPUT_PIPE && i->marks_num == 1) { i->buffer = calloc(1, 1); } - + } static void mpc_input_unmark(mpc_input_t *i) { - + if (i->backtrack < 1) { return; } - + i->marks_num--; i->marks = realloc(i->marks, sizeof(mpc_state_t) * i->marks_num); i->lasts = realloc(i->lasts, sizeof(char) * i->marks_num); - + if (i->type == MPC_INPUT_PIPE && i->marks_num == 0) { free(i->buffer); i->buffer = NULL; } - + } static void mpc_input_rewind(mpc_input_t *i) { - + if (i->backtrack < 1) { return; } - + i->state = i->marks[i->marks_num-1]; i->last = i->lasts[i->marks_num-1]; - + if (i->type == MPC_INPUT_FILE) { fseek(i->file, i->state.pos, SEEK_SET); } - + mpc_input_unmark(i); } @@ -481,17 +481,17 @@ static int mpc_input_terminated(mpc_input_t *i) { } static char mpc_input_getc(mpc_input_t *i) { - + char c = '\0'; - + switch (i->type) { - + case MPC_INPUT_STRING: return i->string[i->state.pos]; case MPC_INPUT_FILE: c = fgetc(i->file); return c; case MPC_INPUT_PIPE: - + if (!i->buffer) { c = getc(i->file); return c; } - + if (i->buffer && mpc_input_buffer_in_range(i)) { c = mpc_input_buffer_get(i); return c; @@ -499,34 +499,34 @@ static char mpc_input_getc(mpc_input_t *i) { c = getc(i->file); return c; } - + default: return c; } } static char mpc_input_peekc(mpc_input_t *i) { - + char c = '\0'; - + switch (i->type) { case MPC_INPUT_STRING: return i->string[i->state.pos]; - case MPC_INPUT_FILE: - + case MPC_INPUT_FILE: + c = fgetc(i->file); if (feof(i->file)) { return '\0'; } - + fseek(i->file, -1, SEEK_CUR); return c; - + case MPC_INPUT_PIPE: - + if (!i->buffer) { c = getc(i->file); if (feof(i->file)) { return '\0'; } ungetc(c, i->file); return c; } - + if (i->buffer && mpc_input_buffer_in_range(i)) { return mpc_input_buffer_get(i); } else { @@ -535,10 +535,10 @@ static char mpc_input_peekc(mpc_input_t *i) { ungetc(c, i->file); return c; } - + default: return c; } - + } static int mpc_input_failure(mpc_input_t *i, char c) { @@ -547,46 +547,46 @@ static int mpc_input_failure(mpc_input_t *i, char c) { case MPC_INPUT_STRING: break; case MPC_INPUT_FILE: fseek(i->file, -1, SEEK_CUR); break; case MPC_INPUT_PIPE: - + if (!i->buffer) { ungetc(c, i->file); break; } - + if (i->buffer && mpc_input_buffer_in_range(i)) { break; } else { - ungetc(c, i->file); + ungetc(c, i->file); } - + } - + return 0; } static int mpc_input_success(mpc_input_t *i, char c, char **o) { - + if (i->type == MPC_INPUT_PIPE && i->buffer && !mpc_input_buffer_in_range(i)) { - + i->buffer = realloc(i->buffer, strlen(i->buffer) + 2); i->buffer[strlen(i->buffer) + 1] = '\0'; i->buffer[strlen(i->buffer) + 0] = c; } - + i->last = c; i->state.pos++; i->state.col++; - + if (c == '\n') { i->state.col = 0; i->state.row++; } - + if (o) { (*o) = malloc(2); (*o)[0] = c; (*o)[1] = '\0'; } - + return 1; } @@ -605,29 +605,29 @@ static int mpc_input_char(mpc_input_t *i, char c, char **o) { static int mpc_input_range(mpc_input_t *i, char c, char d, char **o) { char x = mpc_input_getc(i); if (mpc_input_terminated(i)) { return 0; } - return x >= c && x <= d ? mpc_input_success(i, x, o) : mpc_input_failure(i, x); + return x >= c && x <= d ? mpc_input_success(i, x, o) : mpc_input_failure(i, x); } static int mpc_input_oneof(mpc_input_t *i, const char *c, char **o) { char x = mpc_input_getc(i); if (mpc_input_terminated(i)) { return 0; } - return strchr(c, x) != 0 ? mpc_input_success(i, x, o) : mpc_input_failure(i, x); + return strchr(c, x) != 0 ? mpc_input_success(i, x, o) : mpc_input_failure(i, x); } static int mpc_input_noneof(mpc_input_t *i, const char *c, char **o) { char x = mpc_input_getc(i); if (mpc_input_terminated(i)) { return 0; } - return strchr(c, x) == 0 ? mpc_input_success(i, x, o) : mpc_input_failure(i, x); + return strchr(c, x) == 0 ? mpc_input_success(i, x, o) : mpc_input_failure(i, x); } static int mpc_input_satisfy(mpc_input_t *i, int(*cond)(char), char **o) { char x = mpc_input_getc(i); if (mpc_input_terminated(i)) { return 0; } - return cond(x) ? mpc_input_success(i, x, o) : mpc_input_failure(i, x); + return cond(x) ? mpc_input_success(i, x, o) : mpc_input_failure(i, x); } static int mpc_input_string(mpc_input_t *i, const char *c, char **o) { - + char *co = NULL; const char *x = c; @@ -642,7 +642,7 @@ static int mpc_input_string(mpc_input_t *i, const char *c, char **o) { x++; } mpc_input_unmark(i); - + *o = malloc(strlen(c) + 1); strcpy(*o, c); return 1; @@ -665,7 +665,7 @@ enum { MPC_TYPE_EXPECT = 5, MPC_TYPE_ANCHOR = 6, MPC_TYPE_STATE = 7, - + MPC_TYPE_ANY = 8, MPC_TYPE_SINGLE = 9, MPC_TYPE_ONEOF = 10, @@ -673,7 +673,7 @@ enum { MPC_TYPE_RANGE = 12, MPC_TYPE_SATISFY = 13, MPC_TYPE_STRING = 14, - + MPC_TYPE_APPLY = 15, MPC_TYPE_APPLY_TO = 16, MPC_TYPE_PREDICT = 17, @@ -682,7 +682,7 @@ enum { MPC_TYPE_MANY = 20, MPC_TYPE_MANY1 = 21, MPC_TYPE_COUNT = 22, - + MPC_TYPE_OR = 23, MPC_TYPE_AND = 24 }; @@ -743,26 +743,26 @@ typedef struct { int results_slots; mpc_result_t *results; int *returns; - + mpc_err_t *err; - + } mpc_stack_t; static mpc_stack_t *mpc_stack_new(const char *filename) { mpc_stack_t *s = malloc(sizeof(mpc_stack_t)); - + s->parsers_num = 0; s->parsers_slots = 0; s->parsers = NULL; s->states = NULL; - + s->results_num = 0; s->results_slots = 0; s->results = NULL; s->returns = NULL; - + s->err = mpc_err_fail(filename, mpc_state_invalid(), "Unknown Error"); - + return s; } @@ -775,7 +775,7 @@ static void mpc_stack_err(mpc_stack_t *s, mpc_err_t* e) { static int mpc_stack_terminate(mpc_stack_t *s, mpc_result_t *r) { int success = s->returns[0]; - + if (success) { r->output = s->results[0].output; mpc_err_delete(s->err); @@ -783,13 +783,13 @@ static int mpc_stack_terminate(mpc_stack_t *s, mpc_result_t *r) { mpc_stack_err(s, s->results[0].error); r->error = s->err; } - + free(s->parsers); free(s->states); free(s->results); free(s->returns); free(s); - + return success; } @@ -951,7 +951,7 @@ static mpc_err_t *mpc_stack_merger_err(mpc_stack_t *s, int n) { ** it looked pretty nice. But I've since switched ** it around to an awkward while loop. It was an ** unfortunate change for code simplicity but it -** is noble in the name of performance (and +** is noble in the name of performance (and ** not smashing the stack). ** ** But it is now a pretty ugly beast... @@ -963,25 +963,25 @@ static mpc_err_t *mpc_stack_merger_err(mpc_stack_t *s, int n) { #define MPC_PRIMATIVE(x, f) if (f) { MPC_SUCCESS(x); } else { MPC_FAILURE(mpc_err_fail(i->filename, i->state, "Incorrect Input")); } int mpc_parse_input(mpc_input_t *i, mpc_parser_t *init, mpc_result_t *final) { - + /* Stack */ int st = 0; mpc_parser_t *p = NULL; mpc_stack_t *stk = mpc_stack_new(i->filename); - + /* Variables */ char *s; mpc_result_t r; /* Go! */ mpc_stack_pushp(stk, init); - + while (!mpc_stack_empty(stk)) { - + mpc_stack_peepp(stk, &p, &st); - + switch (p->type) { - + /* Basic Parsers */ case MPC_TYPE_ANY: MPC_PRIMATIVE(s, mpc_input_any(i, &s)); @@ -991,36 +991,36 @@ int mpc_parse_input(mpc_input_t *i, mpc_parser_t *init, mpc_result_t *final) { case MPC_TYPE_NONEOF: MPC_PRIMATIVE(s, mpc_input_noneof(i, p->data.string.x, &s)); case MPC_TYPE_SATISFY: MPC_PRIMATIVE(s, mpc_input_satisfy(i, p->data.satisfy.f, &s)); case MPC_TYPE_STRING: MPC_PRIMATIVE(s, mpc_input_string(i, p->data.string.x, &s)); - + /* Other parsers */ - - case MPC_TYPE_UNDEFINED: MPC_FAILURE(mpc_err_fail(i->filename, i->state, "Parser Undefined!")); + + case MPC_TYPE_UNDEFINED: MPC_FAILURE(mpc_err_fail(i->filename, i->state, "Parser Undefined!")); case MPC_TYPE_PASS: MPC_SUCCESS(NULL); case MPC_TYPE_FAIL: MPC_FAILURE(mpc_err_fail(i->filename, i->state, p->data.fail.m)); case MPC_TYPE_LIFT: MPC_SUCCESS(p->data.lift.lf()); case MPC_TYPE_LIFT_VAL: MPC_SUCCESS(p->data.lift.x); case MPC_TYPE_STATE: MPC_SUCCESS(mpc_state_copy(i->state)); - + case MPC_TYPE_ANCHOR: if (mpc_input_anchor(i, p->data.anchor.f)) { MPC_SUCCESS(NULL); } else { MPC_FAILURE(mpc_err_new(i->filename, i->state, "anchor", mpc_input_peekc(i))); } - + /* Application Parsers */ - + case MPC_TYPE_EXPECT: if (st == 0) { MPC_CONTINUE(1, p->data.expect.x); } if (st == 1) { if (mpc_stack_popr(stk, &r)) { MPC_SUCCESS(r.output); } else { - mpc_err_delete(r.error); + mpc_err_delete(r.error); MPC_FAILURE(mpc_err_new(i->filename, i->state, p->data.expect.m, mpc_input_peekc(i))); } } - + case MPC_TYPE_APPLY: if (st == 0) { MPC_CONTINUE(1, p->data.apply.x); } if (st == 1) { @@ -1030,7 +1030,7 @@ int mpc_parse_input(mpc_input_t *i, mpc_parser_t *init, mpc_result_t *final) { MPC_FAILURE(r.error); } } - + case MPC_TYPE_APPLY_TO: if (st == 0) { MPC_CONTINUE(1, p->data.apply_to.x); } if (st == 1) { @@ -1040,7 +1040,7 @@ int mpc_parse_input(mpc_input_t *i, mpc_parser_t *init, mpc_result_t *final) { MPC_FAILURE(r.error); } } - + case MPC_TYPE_PREDICT: if (st == 0) { mpc_input_backtrack_disable(i); MPC_CONTINUE(1, p->data.predict.x); } if (st == 1) { @@ -1048,11 +1048,11 @@ int mpc_parse_input(mpc_input_t *i, mpc_parser_t *init, mpc_result_t *final) { mpc_stack_popp(stk, &p, &st); continue; } - + /* Optional Parsers */ - + /* TODO: Update Not Error Message */ - + case MPC_TYPE_NOT: if (st == 0) { mpc_input_mark(i); MPC_CONTINUE(1, p->data.not.x); } if (st == 1) { @@ -1066,7 +1066,7 @@ int mpc_parse_input(mpc_input_t *i, mpc_parser_t *init, mpc_result_t *final) { MPC_SUCCESS(p->data.not.lf()); } } - + case MPC_TYPE_MAYBE: if (st == 0) { MPC_CONTINUE(1, p->data.not.x); } if (st == 1) { @@ -1077,9 +1077,9 @@ int mpc_parse_input(mpc_input_t *i, mpc_parser_t *init, mpc_result_t *final) { MPC_SUCCESS(p->data.not.lf()); } } - + /* Repeat Parsers */ - + case MPC_TYPE_MANY: if (st == 0) { MPC_CONTINUE(st+1, p->data.repeat.x); } if (st > 0) { @@ -1091,7 +1091,7 @@ int mpc_parse_input(mpc_input_t *i, mpc_parser_t *init, mpc_result_t *final) { MPC_SUCCESS(mpc_stack_merger_out(stk, st-1, p->data.repeat.f)); } } - + case MPC_TYPE_MANY1: if (st == 0) { MPC_CONTINUE(st+1, p->data.repeat.x); } if (st > 0) { @@ -1108,7 +1108,7 @@ int mpc_parse_input(mpc_input_t *i, mpc_parser_t *init, mpc_result_t *final) { } } } - + case MPC_TYPE_COUNT: if (st == 0) { mpc_input_mark(i); MPC_CONTINUE(st+1, p->data.repeat.x); } if (st > 0) { @@ -1128,13 +1128,13 @@ int mpc_parse_input(mpc_input_t *i, mpc_parser_t *init, mpc_result_t *final) { } } } - + /* Combinatory Parsers */ - + case MPC_TYPE_OR: - + if (p->data.or.n == 0) { MPC_SUCCESS(NULL); } - + if (st == 0) { MPC_CONTINUE(st+1, p->data.or.xs[st]); } if (st <= p->data.or.n) { if (mpc_stack_peekr(stk, &r)) { @@ -1145,11 +1145,11 @@ int mpc_parse_input(mpc_input_t *i, mpc_parser_t *init, mpc_result_t *final) { if (st < p->data.or.n) { MPC_CONTINUE(st+1, p->data.or.xs[st]); } if (st == p->data.or.n) { MPC_FAILURE(mpc_stack_merger_err(stk, p->data.or.n)); } } - + case MPC_TYPE_AND: - + if (p->data.or.n == 0) { MPC_SUCCESS(p->data.and.f(0, NULL)); } - + if (st == 0) { mpc_input_mark(i); MPC_CONTINUE(st+1, p->data.and.xs[st]); } if (st <= p->data.and.n) { if (!mpc_stack_peekr(stk, &r)) { @@ -1161,17 +1161,17 @@ int mpc_parse_input(mpc_input_t *i, mpc_parser_t *init, mpc_result_t *final) { if (st < p->data.and.n) { MPC_CONTINUE(st+1, p->data.and.xs[st]); } if (st == p->data.and.n) { mpc_input_unmark(i); MPC_SUCCESS(mpc_stack_merger_out(stk, p->data.and.n, p->data.and.f)); } } - + /* End */ - + default: - + MPC_FAILURE(mpc_err_fail(i->filename, i->state, "Unknown Parser Type Id!")); } } - + return mpc_stack_terminate(stk, final); - + } #undef MPC_CONTINUE @@ -1204,16 +1204,16 @@ int mpc_parse_pipe(const char *filename, FILE *pipe, mpc_parser_t *p, mpc_result } int mpc_parse_contents(const char *filename, mpc_parser_t *p, mpc_result_t *r) { - + FILE *f = fopen(filename, "rb"); int res; - + if (f == NULL) { r->output = NULL; r->error = mpc_err_fail(filename, mpc_state_new(), "Unable to open file!"); return 0; } - + res = mpc_parse_file(filename, f, p, r); fclose(f); return res; @@ -1226,71 +1226,71 @@ int mpc_parse_contents(const char *filename, mpc_parser_t *p, mpc_result_t *r) { static void mpc_undefine_unretained(mpc_parser_t *p, int force); static void mpc_undefine_or(mpc_parser_t *p) { - + int i; for (i = 0; i < p->data.or.n; i++) { mpc_undefine_unretained(p->data.or.xs[i], 0); } free(p->data.or.xs); - + } static void mpc_undefine_and(mpc_parser_t *p) { - + int i; for (i = 0; i < p->data.and.n; i++) { mpc_undefine_unretained(p->data.and.xs[i], 0); } free(p->data.and.xs); free(p->data.and.dxs); - + } static void mpc_undefine_unretained(mpc_parser_t *p, int force) { - + if (p->retained && !force) { return; } - + switch (p->type) { - + case MPC_TYPE_FAIL: free(p->data.fail.m); break; - - case MPC_TYPE_ONEOF: + + case MPC_TYPE_ONEOF: case MPC_TYPE_NONEOF: case MPC_TYPE_STRING: - free(p->data.string.x); + free(p->data.string.x); break; - + case MPC_TYPE_APPLY: mpc_undefine_unretained(p->data.apply.x, 0); break; case MPC_TYPE_APPLY_TO: mpc_undefine_unretained(p->data.apply_to.x, 0); break; case MPC_TYPE_PREDICT: mpc_undefine_unretained(p->data.predict.x, 0); break; - + case MPC_TYPE_MAYBE: case MPC_TYPE_NOT: mpc_undefine_unretained(p->data.not.x, 0); break; - + case MPC_TYPE_EXPECT: mpc_undefine_unretained(p->data.expect.x, 0); free(p->data.expect.m); break; - + case MPC_TYPE_MANY: case MPC_TYPE_MANY1: case MPC_TYPE_COUNT: mpc_undefine_unretained(p->data.repeat.x, 0); break; - + case MPC_TYPE_OR: mpc_undefine_or(p); break; case MPC_TYPE_AND: mpc_undefine_and(p); break; - + default: break; } - + if (!force) { free(p->name); free(p); } - + } void mpc_delete(mpc_parser_t *p) { @@ -1298,13 +1298,13 @@ void mpc_delete(mpc_parser_t *p) { if (p->type != MPC_TYPE_UNDEFINED) { mpc_undefine_unretained(p, 0); - } - + } + free(p->name); free(p); - + } else { - mpc_undefine_unretained(p, 0); + mpc_undefine_unretained(p, 0); } } @@ -1335,7 +1335,7 @@ mpc_parser_t *mpc_undefine(mpc_parser_t *p) { } mpc_parser_t *mpc_define(mpc_parser_t *p, mpc_parser_t *a) { - + if (p->retained) { p->type = a->type; p->data = a->data; @@ -1345,21 +1345,21 @@ mpc_parser_t *mpc_define(mpc_parser_t *p, mpc_parser_t *a) { p->data = a2->data; free(a2); } - + free(a); - return p; + return p; } void mpc_cleanup(int n, ...) { int i; mpc_parser_t **list = malloc(sizeof(mpc_parser_t*) * n); - + va_list va; va_start(va, n); for (i = 0; i < n; i++) { list[i] = va_arg(va, mpc_parser_t*); } for (i = 0; i < n; i++) { mpc_undefine(list[i]); } - for (i = 0; i < n; i++) { mpc_delete(list[i]); } - va_end(va); + for (i = 0; i < n; i++) { mpc_delete(list[i]); } + va_end(va); free(list); } @@ -1379,7 +1379,7 @@ mpc_parser_t *mpc_fail(const char *m) { } /* -** As `snprintf` is not ANSI standard this +** As `snprintf` is not ANSI standard this ** function `mpc_failf` should be considered ** unsafe. ** @@ -1390,7 +1390,7 @@ mpc_parser_t *mpc_fail(const char *m) { ** the buffer length using precision specifiers ** such as `%.512s`. ** -** - Patch this function in your code base to +** - Patch this function in your code base to ** use `snprintf` or whatever variant your ** system supports. ** @@ -1399,18 +1399,18 @@ mpc_parser_t *mpc_fail(const char *m) { */ mpc_parser_t *mpc_failf(const char *fmt, ...) { - + va_list va; char *buffer; mpc_parser_t *p = mpc_undefined(); p->type = MPC_TYPE_FAIL; - + va_start(va, fmt); buffer = malloc(2048); vsprintf(buffer, fmt, va); va_end(va); - + buffer = realloc(buffer, strlen(buffer) + 1); p->data.fail.m = buffer; return p; @@ -1454,7 +1454,7 @@ mpc_parser_t *mpc_expect(mpc_parser_t *a, const char *expected) { } /* -** As `snprintf` is not ANSI standard this +** As `snprintf` is not ANSI standard this ** function `mpc_expectf` should be considered ** unsafe. ** @@ -1465,7 +1465,7 @@ mpc_parser_t *mpc_expect(mpc_parser_t *a, const char *expected) { ** the buffer length using precision specifiers ** such as `%.512s`. ** -** - Patch this function in your code base to +** - Patch this function in your code base to ** use `snprintf` or whatever variant your ** system supports. ** @@ -1479,12 +1479,12 @@ mpc_parser_t *mpc_expectf(mpc_parser_t *a, const char *fmt, ...) { mpc_parser_t *p = mpc_undefined(); p->type = MPC_TYPE_EXPECT; - + va_start(va, fmt); buffer = malloc(2048); vsprintf(buffer, fmt, va); va_end(va); - + buffer = realloc(buffer, strlen(buffer) + 1); p->data.expect.x = a; p->data.expect.m = buffer; @@ -1633,17 +1633,17 @@ mpc_parser_t *mpc_or(int n, ...) { va_list va; mpc_parser_t *p = mpc_undefined(); - + p->type = MPC_TYPE_OR; p->data.or.n = n; p->data.or.xs = malloc(sizeof(mpc_parser_t*) * n); - - va_start(va, n); + + va_start(va, n); for (i = 0; i < n; i++) { p->data.or.xs[i] = va_arg(va, mpc_parser_t*); } va_end(va); - + return p; } @@ -1653,22 +1653,22 @@ mpc_parser_t *mpc_and(int n, mpc_fold_t f, ...) { va_list va; mpc_parser_t *p = mpc_undefined(); - + p->type = MPC_TYPE_AND; p->data.and.n = n; p->data.and.f = f; p->data.and.xs = malloc(sizeof(mpc_parser_t*) * n); p->data.and.dxs = malloc(sizeof(mpc_dtor_t) * (n-1)); - va_start(va, f); + va_start(va, f); for (i = 0; i < n; i++) { p->data.and.xs[i] = va_arg(va, mpc_parser_t*); } for (i = 0; i < (n-1); i++) { p->data.and.dxs[i] = va_arg(va, mpc_dtor_t); - } + } va_end(va); - + return p; } @@ -1724,9 +1724,9 @@ mpc_parser_t *mpc_number(void) { return mpc_expect(mpc_or(3, mpc_int(), mpc_hex( mpc_parser_t *mpc_real(void) { /* [+-]?\d+(\.\d+)?([eE][+-]?[0-9]+)? */ - + mpc_parser_t *p0, *p1, *p2, *p30, *p31, *p32, *p3; - + p0 = mpc_maybe_lift(mpc_oneof("+-"), mpcf_ctor_str); p1 = mpc_digits(); p2 = mpc_maybe_lift(mpc_and(2, mpcf_strfold, mpc_char('.'), mpc_digits(), free), mpcf_ctor_str); @@ -1734,7 +1734,7 @@ mpc_parser_t *mpc_real(void) { p31 = mpc_maybe_lift(mpc_oneof("+-"), mpcf_ctor_str); p32 = mpc_digits(); p3 = mpc_maybe_lift(mpc_and(3, mpcf_strfold, p30, p31, p32, free, free), mpcf_ctor_str); - + return mpc_expect(mpc_and(4, mpcf_strfold, p0, p1, p2, p3, free, free, free), "real"); } @@ -1752,15 +1752,15 @@ mpc_parser_t *mpc_string_lit(void) { return mpc_expect(mpc_between(mpc_many(mpcf_strfold, strchar), free, "\"", "\""), "string"); } -mpc_parser_t *mpc_regex_lit(void) { +mpc_parser_t *mpc_regex_lit(void) { mpc_parser_t *regexchar = mpc_or(2, mpc_escape(), mpc_noneof("/")); return mpc_expect(mpc_between(mpc_many(mpcf_strfold, regexchar), free, "/", "/"), "regex"); } mpc_parser_t *mpc_ident(void) { - mpc_parser_t *p0, *p1; + mpc_parser_t *p0, *p1; p0 = mpc_or(2, mpc_alpha(), mpc_underscore()); - p1 = mpc_many(mpcf_strfold, mpc_alphanum()); + p1 = mpc_many(mpcf_strfold, mpc_alphanum()); return mpc_and(2, mpcf_strfold, p0, p1, free); } @@ -1821,7 +1821,7 @@ mpc_parser_t *mpc_tok_squares(mpc_parser_t *a, mpc_dtor_t ad) { return mpc_tok_ ** a parser directly - as we are parsing. ** ** This is certainly something that -** would be less elegant/interesting +** would be less elegant/interesting ** in a two-phase parser which first ** builds an AST and then traverses it ** to generate the object. @@ -1837,7 +1837,7 @@ mpc_parser_t *mpc_tok_squares(mpc_parser_t *a, mpc_dtor_t ad) { return mpc_tok_ ** ### Regular Expression Grammar ** ** : | ( "|" ) -** +** ** : * ** ** : @@ -1845,7 +1845,7 @@ mpc_parser_t *mpc_tok_squares(mpc_parser_t *a, mpc_dtor_t ad) { return mpc_tok_ ** | "+" ** | "?" ** | "{" "}" -** +** ** : ** | "\" ** | "(" ")" @@ -1867,7 +1867,7 @@ static mpc_val_t *mpcf_re_and(int n, mpc_val_t **xs) { } static mpc_val_t *mpcf_re_repeat(int n, mpc_val_t **xs) { - + int num; if (xs[1] == NULL) { return xs[0]; } if (strcmp(xs[1], "*") == 0) { free(xs[1]); return mpc_many(mpcf_strfold, xs[0]); } @@ -1875,7 +1875,7 @@ static mpc_val_t *mpcf_re_repeat(int n, mpc_val_t **xs) { if (strcmp(xs[1], "?") == 0) { free(xs[1]); return mpc_maybe_lift(xs[0], mpcf_ctor_str); } num = *(int*)xs[1]; free(xs[1]); - + return mpc_count(num, mpcf_strfold, xs[0], free); } @@ -1902,15 +1902,15 @@ static mpc_parser_t *mpc_re_escape_char(char c) { } static mpc_val_t *mpcf_re_escape(mpc_val_t *x) { - + char *s = x; mpc_parser_t *p; - + /* Regex Special Characters */ if (s[0] == '.') { free(s); return mpc_any(); } if (s[0] == '^') { free(s); return mpc_and(2, mpcf_snd, mpc_soi(), mpc_lift(mpcf_ctor_str), free); } if (s[0] == '$') { free(s); return mpc_and(2, mpcf_snd, mpc_eoi(), mpc_lift(mpcf_ctor_str), free); } - + /* Regex Escape */ if (s[0] == '\\') { p = mpc_re_escape_char(s[1]); @@ -1918,7 +1918,7 @@ static mpc_val_t *mpcf_re_escape(mpc_val_t *x) { free(s); return p; } - + /* Regex Standard */ p = mpc_char(s[0]); free(s); @@ -1943,7 +1943,7 @@ static char *mpc_re_range_escape_char(char c) { } static mpc_val_t *mpcf_re_range(mpc_val_t *x) { - + mpc_parser_t *out; char *range = calloc(1,1); char *tmp = NULL; @@ -1951,15 +1951,15 @@ static mpc_val_t *mpcf_re_range(mpc_val_t *x) { char start, end; int i, j; int comp = 0; - - if (s[0] == '\0') { free(x); return mpc_fail("Invalid Regex Range Expression"); } - if (s[0] == '^' && + + if (s[0] == '\0') { free(x); return mpc_fail("Invalid Regex Range Expression"); } + if (s[0] == '^' && s[1] == '\0') { free(x); return mpc_fail("Invalid Regex Range Expression"); } - + if (s[0] == '^') { comp = 1;} - + for (i = comp; i < strlen(s); i++){ - + /* Regex Range Escape */ if (s[i] == '\\') { tmp = mpc_re_range_escape_char(s[i+1]); @@ -1969,11 +1969,11 @@ static mpc_val_t *mpcf_re_range(mpc_val_t *x) { } else { range = realloc(range, strlen(range) + 1 + 1); range[strlen(range) + 1] = '\0'; - range[strlen(range) + 0] = s[i+1]; + range[strlen(range) + 0] = s[i+1]; } i++; } - + /* Regex Range...Range */ else if (s[i] == '-') { if (s[i+1] == '\0' || i == 0) { @@ -1986,48 +1986,48 @@ static mpc_val_t *mpcf_re_range(mpc_val_t *x) { range = realloc(range, strlen(range) + 1 + 1); range[strlen(range) + 1] = '\0'; range[strlen(range) + 0] = j; - } + } } } - + /* Regex Range Normal */ else { range = realloc(range, strlen(range) + 1 + 1); range[strlen(range) + 1] = '\0'; range[strlen(range) + 0] = s[i]; } - + } - + out = comp ? mpc_noneof(range) : mpc_oneof(range); - + free(x); free(range); - + return out; } mpc_parser_t *mpc_re(const char *re) { - + char *err_msg; mpc_parser_t *err_out; mpc_result_t r; - mpc_parser_t *Regex, *Term, *Factor, *Base, *Range, *RegexEnclose; - + mpc_parser_t *Regex, *Term, *Factor, *Base, *Range, *RegexEnclose; + Regex = mpc_new("regex"); Term = mpc_new("term"); Factor = mpc_new("factor"); Base = mpc_new("base"); Range = mpc_new("range"); - + mpc_define(Regex, mpc_and(2, mpcf_re_or, - Term, + Term, mpc_maybe(mpc_and(2, mpcf_snd_free, mpc_char('|'), Regex, free)), (mpc_dtor_t)mpc_delete )); - + mpc_define(Term, mpc_many(mpcf_re_and, Factor)); - + mpc_define(Factor, mpc_and(2, mpcf_re_repeat, Base, mpc_or(5, @@ -2036,34 +2036,34 @@ mpc_parser_t *mpc_re(const char *re) { mpc_pass()), (mpc_dtor_t)mpc_delete )); - + mpc_define(Base, mpc_or(4, mpc_parens(Regex, (mpc_dtor_t)mpc_delete), mpc_squares(Range, (mpc_dtor_t)mpc_delete), mpc_apply(mpc_escape(), mpcf_re_escape), mpc_apply(mpc_noneof(")|"), mpcf_re_escape) )); - + mpc_define(Range, mpc_apply( mpc_many(mpcf_strfold, mpc_or(2, mpc_escape(), mpc_noneof("]"))), mpcf_re_range )); - + RegexEnclose = mpc_whole(mpc_predictive(Regex), (mpc_dtor_t)mpc_delete); - + if(!mpc_parse("", re, RegexEnclose, &r)) { err_msg = mpc_err_string(r.error); err_out = mpc_failf("Invalid Regex: %s", err_msg); - mpc_err_delete(r.error); + mpc_err_delete(r.error); free(err_msg); r.output = err_out; } - + mpc_delete(RegexEnclose); mpc_cleanup(5, Regex, Term, Factor, Base, Range); - + return r.output; - + } /* @@ -2107,9 +2107,9 @@ mpc_val_t *mpcf_float(mpc_val_t *x) { static char mpc_escape_input_c[] = { '\a', '\b', '\f', '\n', '\r', '\t', '\v', '\\', '\'', '\"', '\0'}; - + static char *mpc_escape_output_c[] = { - "\\a", "\\b", "\\f", "\\n", "\\r", "\\t", + "\\a", "\\b", "\\f", "\\n", "\\r", "\\t", "\\v", "\\\\", "\\'", "\\\"", "\\0", NULL}; static char mpc_escape_input_raw_re[] = { '/' }; @@ -2122,15 +2122,15 @@ static char mpc_escape_input_raw_cchar[] = { '\'' }; static char *mpc_escape_output_raw_cchar[] = { "\\'", NULL }; static mpc_val_t *mpcf_escape_new(mpc_val_t *x, char *input, char **output) { - + int i; int found; char *s = x; char *y = calloc(1, 1); char buff[2]; - + while (*s) { - + i = 0; found = 0; @@ -2143,22 +2143,22 @@ static mpc_val_t *mpcf_escape_new(mpc_val_t *x, char *input, char **output) { } i++; } - + if (!found) { y = realloc(y, strlen(y) + 2); buff[0] = *s; buff[1] = '\0'; strcat(y, buff); } - + s++; } - - + + return y; } static mpc_val_t *mpcf_unescape_new(mpc_val_t *x, char *input, char **output) { - + int i; int found = 0; char *s = x; @@ -2166,10 +2166,10 @@ static mpc_val_t *mpcf_unescape_new(mpc_val_t *x, char *input, char **output) { char buff[2]; while (*s) { - + i = 0; found = 0; - + while (output[i]) { if ((*(s+0)) == output[i][0] && (*(s+1)) == output[i][1]) { @@ -2182,19 +2182,19 @@ static mpc_val_t *mpcf_unescape_new(mpc_val_t *x, char *input, char **output) { } i++; } - + if (!found) { y = realloc(y, strlen(y) + 2); buff[0] = *s; buff[1] = '\0'; strcat(y, buff); } - + if (*s == '\0') { break; } else { s++; } } - + return y; - + } mpc_val_t *mpcf_escape(mpc_val_t *x) { @@ -2212,7 +2212,7 @@ mpc_val_t *mpcf_unescape(mpc_val_t *x) { mpc_val_t *mpcf_unescape_regex(mpc_val_t *x) { mpc_val_t *y = mpcf_unescape_new(x, mpc_escape_input_raw_re, mpc_escape_output_raw_re); free(x); - return y; + return y; } mpc_val_t *mpcf_escape_string_raw(mpc_val_t *x) { @@ -2251,7 +2251,7 @@ static mpc_val_t *mpcf_nth_free(int n, mpc_val_t **xs, int x) { } return xs[x]; } - + mpc_val_t *mpcf_fst_free(int n, mpc_val_t **xs) { return mpcf_nth_free(n, xs, 0); } mpc_val_t *mpcf_snd_free(int n, mpc_val_t **xs) { return mpcf_nth_free(n, xs, 1); } mpc_val_t *mpcf_trd_free(int n, mpc_val_t **xs) { return mpcf_nth_free(n, xs, 2); } @@ -2268,17 +2268,17 @@ mpc_val_t *mpcf_strfold(int n, mpc_val_t **xs) { } mpc_val_t *mpcf_maths(int n, mpc_val_t **xs) { - + int **vs = (int**)xs; - + if (strcmp(xs[1], "*") == 0) { *vs[0] *= *vs[2]; } if (strcmp(xs[1], "/") == 0) { *vs[0] /= *vs[2]; } if (strcmp(xs[1], "%") == 0) { *vs[0] %= *vs[2]; } if (strcmp(xs[1], "+") == 0) { *vs[0] += *vs[2]; } if (strcmp(xs[1], "-") == 0) { *vs[0] -= *vs[2]; } - + free(xs[1]); free(xs[2]); - + return xs[0]; } @@ -2287,19 +2287,19 @@ mpc_val_t *mpcf_maths(int n, mpc_val_t **xs) { */ static void mpc_print_unretained(mpc_parser_t *p, int force) { - + /* TODO: Print Everything Escaped */ - + int i; char *s, *e; char buff[2]; - + if (p->retained && !force) {; if (p->name) { printf("<%s>", p->name); } else { printf(""); } return; } - + if (p->type == MPC_TYPE_UNDEFINED) { printf(""); } if (p->type == MPC_TYPE_PASS) { printf("<:>"); } if (p->type == MPC_TYPE_FAIL) { printf(""); } @@ -2310,7 +2310,7 @@ static void mpc_print_unretained(mpc_parser_t *p, int force) { printf("%s", p->data.expect.m); /*mpc_print_unretained(p->data.expect.x, 0);*/ } - + if (p->type == MPC_TYPE_ANY) { printf("<.>"); } if (p->type == MPC_TYPE_SATISFY) { printf(""); } @@ -2323,7 +2323,7 @@ static void mpc_print_unretained(mpc_parser_t *p, int force) { printf("'%s'", s); free(s); } - + if (p->type == MPC_TYPE_RANGE) { buff[0] = p->data.range.x; buff[1] = '\0'; s = mpcf_escape_new( @@ -2339,7 +2339,7 @@ static void mpc_print_unretained(mpc_parser_t *p, int force) { free(s); free(e); } - + if (p->type == MPC_TYPE_ONEOF) { s = mpcf_escape_new( p->data.string.x, @@ -2348,7 +2348,7 @@ static void mpc_print_unretained(mpc_parser_t *p, int force) { printf("[%s]", s); free(s); } - + if (p->type == MPC_TYPE_NONEOF) { s = mpcf_escape_new( p->data.string.x, @@ -2357,7 +2357,7 @@ static void mpc_print_unretained(mpc_parser_t *p, int force) { printf("[^%s]", s); free(s); } - + if (p->type == MPC_TYPE_STRING) { s = mpcf_escape_new( p->data.string.x, @@ -2366,7 +2366,7 @@ static void mpc_print_unretained(mpc_parser_t *p, int force) { printf("\"%s\"", s); free(s); } - + if (p->type == MPC_TYPE_APPLY) { mpc_print_unretained(p->data.apply.x, 0); } if (p->type == MPC_TYPE_APPLY_TO) { mpc_print_unretained(p->data.apply_to.x, 0); } if (p->type == MPC_TYPE_PREDICT) { mpc_print_unretained(p->data.predict.x, 0); } @@ -2377,7 +2377,7 @@ static void mpc_print_unretained(mpc_parser_t *p, int force) { if (p->type == MPC_TYPE_MANY) { mpc_print_unretained(p->data.repeat.x, 0); printf("*"); } if (p->type == MPC_TYPE_MANY1) { mpc_print_unretained(p->data.repeat.x, 0); printf("+"); } if (p->type == MPC_TYPE_COUNT) { mpc_print_unretained(p->data.repeat.x, 0); printf("{%i}", p->data.repeat.n); } - + if (p->type == MPC_TYPE_OR) { printf("("); for(i = 0; i < p->data.or.n-1; i++) { @@ -2387,7 +2387,7 @@ static void mpc_print_unretained(mpc_parser_t *p, int force) { mpc_print_unretained(p->data.or.xs[p->data.or.n-1], 0); printf(")"); } - + if (p->type == MPC_TYPE_AND) { printf("("); for(i = 0; i < p->data.and.n-1; i++) { @@ -2397,7 +2397,7 @@ static void mpc_print_unretained(mpc_parser_t *p, int force) { mpc_print_unretained(p->data.and.xs[p->data.and.n-1], 0); printf(")"); } - + } void mpc_print(mpc_parser_t *p) { @@ -2425,7 +2425,7 @@ int mpc_test_fail(mpc_parser_t *p, const char *s, void *d, mpc_dtor_t destructor, void(*printer)(void*)) { - mpc_result_t r; + mpc_result_t r; if (mpc_parse("", s, p, &r)) { if (tester(r.output, d)) { @@ -2435,22 +2435,22 @@ int mpc_test_fail(mpc_parser_t *p, const char *s, void *d, destructor(r.output); return 1; } - + } else { mpc_err_delete(r.error); return 1; } - + } int mpc_test_pass(mpc_parser_t *p, const char *s, void *d, - int(*tester)(void*, void*), - mpc_dtor_t destructor, + int(*tester)(void*, void*), + mpc_dtor_t destructor, void(*printer)(void*)) { - mpc_result_t r; + mpc_result_t r; if (mpc_parse("", s, p, &r)) { - + if (tester(r.output, d)) { destructor(r.output); return 1; @@ -2460,14 +2460,14 @@ int mpc_test_pass(mpc_parser_t *p, const char *s, void *d, destructor(r.output); return 0; } - - } else { + + } else { mpc_err_print(r.error); mpc_err_delete(r.error); return 0; - + } - + } @@ -2476,19 +2476,19 @@ int mpc_test_pass(mpc_parser_t *p, const char *s, void *d, */ void mpc_ast_delete(mpc_ast_t *a) { - + int i; - + if (a == NULL) { return; } for (i = 0; i < a->children_num; i++) { mpc_ast_delete(a->children[i]); } - + free(a->children); free(a->tag); free(a->contents); free(a); - + } static void mpc_ast_delete_no_children(mpc_ast_t *a) { @@ -2499,39 +2499,39 @@ static void mpc_ast_delete_no_children(mpc_ast_t *a) { } mpc_ast_t *mpc_ast_new(const char *tag, const char *contents) { - + mpc_ast_t *a = malloc(sizeof(mpc_ast_t)); - + a->tag = malloc(strlen(tag) + 1); strcpy(a->tag, tag); - + a->contents = malloc(strlen(contents) + 1); strcpy(a->contents, contents); - + a->state = mpc_state_new(); - + a->children_num = 0; a->children = NULL; return a; - + } mpc_ast_t *mpc_ast_build(int n, const char *tag, ...) { - + mpc_ast_t *a = mpc_ast_new(tag, ""); - + int i; va_list va; va_start(va, tag); - + for (i = 0; i < n; i++) { mpc_ast_add_child(a, va_arg(va, mpc_ast_t*)); } - + va_end(va); - + return a; - + } mpc_ast_t *mpc_ast_add_root(mpc_ast_t *a) { @@ -2548,17 +2548,17 @@ mpc_ast_t *mpc_ast_add_root(mpc_ast_t *a) { } int mpc_ast_eq(mpc_ast_t *a, mpc_ast_t *b) { - + int i; if (strcmp(a->tag, b->tag) != 0) { return 0; } if (strcmp(a->contents, b->contents) != 0) { return 0; } if (a->children_num != b->children_num) { return 0; } - + for (i = 0; i < a->children_num; i++) { if (!mpc_ast_eq(a->children[i], b->children[i])) { return 0; } } - + return 1; } @@ -2591,20 +2591,20 @@ mpc_ast_t *mpc_ast_state(mpc_ast_t *a, mpc_state_t s) { } static void mpc_ast_print_depth(mpc_ast_t *a, int d, FILE *fp) { - + int i; for (i = 0; i < d; i++) { fprintf(fp, " "); } - + if (strlen(a->contents)) { fprintf(fp, "%s:%i:%i '%s'\n", a->tag, a->state.row+1, a->state.col+1, a->contents); } else { fprintf(fp, "%s \n", a->tag); } - + for (i = 0; i < a->children_num; i++) { mpc_ast_print_depth(a->children[i], d+1, fp); } - + } void mpc_ast_print(mpc_ast_t *a) { @@ -2616,40 +2616,40 @@ void mpc_ast_print_to(mpc_ast_t *a, FILE *fp) { } mpc_val_t *mpcf_fold_ast(int n, mpc_val_t **xs) { - + int i, j; mpc_ast_t** as = (mpc_ast_t**)xs; mpc_ast_t *r; - + if (n == 0) { return NULL; } if (n == 1) { return xs[0]; } if (n == 2 && xs[1] == NULL) { return xs[0]; } if (n == 2 && xs[0] == NULL) { return xs[1]; } - + r = mpc_ast_new(">", ""); - + for (i = 0; i < n; i++) { - + if (as[i] == NULL) { continue; } - + if (as[i] && as[i]->children_num > 0) { - + for (j = 0; j < as[i]->children_num; j++) { mpc_ast_add_child(r, as[i]->children[j]); } - + mpc_ast_delete_no_children(as[i]); - + } else if (as[i] && as[i]->children_num == 0) { mpc_ast_add_child(r, as[i]); } - + } - + if (r->children_num) { r->state = r->children[0]->state; } - + return r; } @@ -2695,44 +2695,44 @@ mpc_parser_t *mpca_or(int n, ...) { va_list va; mpc_parser_t *p = mpc_undefined(); - + p->type = MPC_TYPE_OR; p->data.or.n = n; p->data.or.xs = malloc(sizeof(mpc_parser_t*) * n); - - va_start(va, n); + + va_start(va, n); for (i = 0; i < n; i++) { p->data.or.xs[i] = va_arg(va, mpc_parser_t*); } va_end(va); - + return p; - + } mpc_parser_t *mpca_and(int n, ...) { - + int i; va_list va; - + mpc_parser_t *p = mpc_undefined(); - + p->type = MPC_TYPE_AND; p->data.and.n = n; p->data.and.f = mpcf_fold_ast; p->data.and.xs = malloc(sizeof(mpc_parser_t*) * n); p->data.and.dxs = malloc(sizeof(mpc_dtor_t) * (n-1)); - + va_start(va, n); for (i = 0; i < n; i++) { p->data.and.xs[i] = va_arg(va, mpc_parser_t*); } for (i = 0; i < (n-1); i++) { p->data.and.dxs[i] = (mpc_dtor_t)mpc_ast_delete; - } + } va_end(va); - - return p; + + return p; } mpc_parser_t *mpca_total(mpc_parser_t *a) { return mpc_total(a, (mpc_dtor_t)mpc_ast_delete); } @@ -2765,7 +2765,7 @@ mpc_parser_t *mpca_total(mpc_parser_t *a) { return mpc_total(a, (mpc_dtor_t)mpc_ ** ### Grammar Grammar ** ** : ( "|" ) | -** +** ** : * ** ** : @@ -2773,7 +2773,7 @@ mpc_parser_t *mpca_total(mpc_parser_t *a) { return mpc_total(a, (mpc_dtor_t)mpc_ ** | "+" ** | "?" ** | "{" "}" -** +** ** : "<" ( | ) ">" ** | ** | @@ -2795,7 +2795,7 @@ static mpc_val_t *mpcaf_grammar_or(int n, mpc_val_t **xs) { static mpc_val_t *mpcaf_grammar_and(int n, mpc_val_t **xs) { int i; - mpc_parser_t *p = mpc_pass(); + mpc_parser_t *p = mpc_pass(); for (i = 0; i < n; i++) { if (xs[i] != NULL) { p = mpca_and(2, p, xs[i]); } } @@ -2803,9 +2803,9 @@ static mpc_val_t *mpcaf_grammar_and(int n, mpc_val_t **xs) { } static mpc_val_t *mpcaf_grammar_repeat(int n, mpc_val_t **xs) { - + int num; - if (xs[1] == NULL) { return xs[0]; } + if (xs[1] == NULL) { return xs[0]; } if (strcmp(xs[1], "*") == 0) { free(xs[1]); return mpca_many(xs[0]); } if (strcmp(xs[1], "+") == 0) { free(xs[1]); return mpca_many1(xs[0]); } if (strcmp(xs[1], "?") == 0) { free(xs[1]); return mpca_maybe(xs[0]); } @@ -2847,15 +2847,15 @@ static int is_number(const char* s) { } static mpc_parser_t *mpca_grammar_find_parser(char *x, mpca_grammar_st_t *st) { - + int i; mpc_parser_t *p; - + /* Case of Number */ if (is_number(x)) { i = strtol(x, NULL, 10); - + while (st->parsers_num <= i) { st->parsers_num++; st->parsers = realloc(st->parsers, sizeof(mpc_parser_t*) * st->parsers_num); @@ -2864,39 +2864,39 @@ static mpc_parser_t *mpca_grammar_find_parser(char *x, mpca_grammar_st_t *st) { return mpc_failf("No Parser in position %i! Only supplied %i Parsers!", i, st->parsers_num); } } - + return st->parsers[st->parsers_num-1]; - + /* Case of Identifier */ } else { - + /* Search Existing Parsers */ for (i = 0; i < st->parsers_num; i++) { mpc_parser_t *p = st->parsers[i]; if (p == NULL) { return mpc_failf("Unknown Parser '%s'!", x); } if (p->name && strcmp(p->name, x) == 0) { return p; } } - + /* Search New Parsers */ while (1) { - + p = va_arg(*st->va, mpc_parser_t*); - + st->parsers_num++; st->parsers = realloc(st->parsers, sizeof(mpc_parser_t*) * st->parsers_num); st->parsers[st->parsers_num-1] = p; - + if (p == NULL) { return mpc_failf("Unknown Parser '%s'!", x); } if (p->name && strcmp(p->name, x) == 0) { return p; } - + } - - } - + + } + } static mpc_val_t *mpcaf_grammar_id(mpc_val_t *x, void *s) { - + mpca_grammar_st_t *st = s; mpc_parser_t *p = mpca_grammar_find_parser(x, st); free(x); @@ -2909,30 +2909,30 @@ static mpc_val_t *mpcaf_grammar_id(mpc_val_t *x, void *s) { } mpc_parser_t *mpca_grammar_st(const char *grammar, mpca_grammar_st_t *st) { - + char *err_msg; mpc_parser_t *err_out; mpc_result_t r; mpc_parser_t *GrammarTotal, *Grammar, *Term, *Factor, *Base; - + GrammarTotal = mpc_new("grammar_total"); Grammar = mpc_new("grammar"); Term = mpc_new("term"); Factor = mpc_new("factor"); Base = mpc_new("base"); - + mpc_define(GrammarTotal, mpc_predictive(mpc_total(Grammar, mpc_soft_delete)) ); - + mpc_define(Grammar, mpc_and(2, mpcaf_grammar_or, Term, mpc_maybe(mpc_and(2, mpcf_snd_free, mpc_sym("|"), Grammar, free)), mpc_soft_delete )); - + mpc_define(Term, mpc_many1(mpcaf_grammar_and, Factor)); - + mpc_define(Factor, mpc_and(2, mpcaf_grammar_repeat, Base, mpc_or(6, @@ -2944,7 +2944,7 @@ mpc_parser_t *mpca_grammar_st(const char *grammar, mpca_grammar_st_t *st) { mpc_pass()), mpc_soft_delete )); - + mpc_define(Base, mpc_or(5, mpc_apply_to(mpc_tok(mpc_string_lit()), mpcaf_grammar_string, st), mpc_apply_to(mpc_tok(mpc_char_lit()), mpcaf_grammar_char, st), @@ -2952,7 +2952,7 @@ mpc_parser_t *mpca_grammar_st(const char *grammar, mpca_grammar_st_t *st) { mpc_apply_to(mpc_tok_braces(mpc_or(2, mpc_digits(), mpc_ident()), free), mpcaf_grammar_id, st), mpc_tok_parens(Grammar, mpc_soft_delete) )); - + if(!mpc_parse("", grammar, GrammarTotal, &r)) { err_msg = mpc_err_string(r.error); err_out = mpc_failf("Invalid Grammar: %s", err_msg); @@ -2960,11 +2960,11 @@ mpc_parser_t *mpca_grammar_st(const char *grammar, mpca_grammar_st_t *st) { free(err_msg); r.output = err_out; } - + mpc_cleanup(5, GrammarTotal, Grammar, Term, Factor, Base); - + return (st->flags & MPCA_LANG_PREDICTIVE) ? mpc_predictive(r.output) : r.output; - + } mpc_parser_t *mpca_grammar(int flags, const char *grammar, ...) { @@ -2972,13 +2972,13 @@ mpc_parser_t *mpca_grammar(int flags, const char *grammar, ...) { mpc_parser_t *res; va_list va; va_start(va, grammar); - + st.va = &va; st.parsers_num = 0; st.parsers = NULL; st.flags = flags; - - res = mpca_grammar_st(grammar, &st); + + res = mpca_grammar_st(grammar, &st); free(st.parsers); va_end(va); return res; @@ -2991,28 +2991,28 @@ typedef struct { } mpca_stmt_t; static mpc_val_t *mpca_stmt_afold(int n, mpc_val_t **xs) { - + mpca_stmt_t *stmt = malloc(sizeof(mpca_stmt_t)); stmt->ident = ((char**)xs)[0]; stmt->name = ((char**)xs)[1]; stmt->grammar = ((mpc_parser_t**)xs)[3]; - + free(((char**)xs)[2]); free(((char**)xs)[4]); - + return stmt; } static mpc_val_t *mpca_stmt_fold(int n, mpc_val_t **xs) { - + int i; mpca_stmt_t **stmts = malloc(sizeof(mpca_stmt_t*) * (n+1)); - + for (i = 0; i < n; i++) { stmts[i] = xs[i]; } - stmts[n] = NULL; - + stmts[n] = NULL; + return stmts; } @@ -3021,11 +3021,11 @@ static void mpca_stmt_list_delete(mpc_val_t *x) { mpca_stmt_t **stmts = x; while(*stmts) { - mpca_stmt_t *stmt = *stmts; + mpca_stmt_t *stmt = *stmts; free(stmt->ident); free(stmt->name); mpc_soft_delete(stmt->grammar); - free(stmt); + free(stmt); stmts++; } free(x); @@ -3051,41 +3051,41 @@ static mpc_val_t *mpca_stmt_list_apply_to(mpc_val_t *x, void *s) { stmts++; } free(x); - + return NULL; } static mpc_err_t *mpca_lang_st(mpc_input_t *i, mpca_grammar_st_t *st) { - + mpc_result_t r; mpc_err_t *e; - mpc_parser_t *Lang, *Stmt, *Grammar, *Term, *Factor, *Base; - + mpc_parser_t *Lang, *Stmt, *Grammar, *Term, *Factor, *Base; + Lang = mpc_new("lang"); Stmt = mpc_new("stmt"); Grammar = mpc_new("grammar"); Term = mpc_new("term"); Factor = mpc_new("factor"); Base = mpc_new("base"); - + mpc_define(Lang, mpc_apply_to( mpc_total(mpc_predictive(mpc_many(mpca_stmt_fold, Stmt)), mpca_stmt_list_delete), mpca_stmt_list_apply_to, st )); - + mpc_define(Stmt, mpc_and(5, mpca_stmt_afold, mpc_tok(mpc_ident()), mpc_maybe(mpc_tok(mpc_string_lit())), mpc_sym(":"), Grammar, mpc_sym(";"), free, free, free, mpc_soft_delete )); - + mpc_define(Grammar, mpc_and(2, mpcaf_grammar_or, Term, mpc_maybe(mpc_and(2, mpcf_snd_free, mpc_sym("|"), Grammar, free)), mpc_soft_delete )); - + mpc_define(Term, mpc_many1(mpcaf_grammar_and, Factor)); - + mpc_define(Factor, mpc_and(2, mpcaf_grammar_repeat, Base, mpc_or(6, @@ -3097,7 +3097,7 @@ static mpc_err_t *mpca_lang_st(mpc_input_t *i, mpca_grammar_st_t *st) { mpc_pass()), mpc_soft_delete )); - + mpc_define(Base, mpc_or(5, mpc_apply_to(mpc_tok(mpc_string_lit()), mpcaf_grammar_string, st), mpc_apply_to(mpc_tok(mpc_char_lit()), mpcaf_grammar_char, st), @@ -3105,16 +3105,16 @@ static mpc_err_t *mpca_lang_st(mpc_input_t *i, mpca_grammar_st_t *st) { mpc_apply_to(mpc_tok_braces(mpc_or(2, mpc_digits(), mpc_ident()), free), mpcaf_grammar_id, st), mpc_tok_parens(Grammar, mpc_soft_delete) )); - - + + if (!mpc_parse_input(i, Lang, &r)) { e = r.error; } else { e = NULL; } - + mpc_cleanup(6, Lang, Stmt, Grammar, Term, Factor, Base); - + return e; } @@ -3123,18 +3123,18 @@ mpc_err_t *mpca_lang_file(int flags, FILE *f, ...) { mpc_input_t *i; mpc_err_t *err; - va_list va; + va_list va; va_start(va, f); - + st.va = &va; st.parsers_num = 0; st.parsers = NULL; st.flags = flags; - + i = mpc_input_new_file("", f); err = mpca_lang_st(i, &st); mpc_input_delete(i); - + free(st.parsers); va_end(va); return err; @@ -3145,75 +3145,75 @@ mpc_err_t *mpca_lang_pipe(int flags, FILE *p, ...) { mpc_input_t *i; mpc_err_t *err; - va_list va; + va_list va; va_start(va, p); - + st.va = &va; st.parsers_num = 0; st.parsers = NULL; st.flags = flags; - + i = mpc_input_new_pipe("", p); err = mpca_lang_st(i, &st); mpc_input_delete(i); - + free(st.parsers); va_end(va); return err; } mpc_err_t *mpca_lang(int flags, const char *language, ...) { - + mpca_grammar_st_t st; mpc_input_t *i; mpc_err_t *err; - - va_list va; + + va_list va; va_start(va, language); - + st.va = &va; st.parsers_num = 0; st.parsers = NULL; st.flags = flags; - + i = mpc_input_new_string("", language); err = mpca_lang_st(i, &st); mpc_input_delete(i); - + free(st.parsers); va_end(va); return err; } mpc_err_t *mpca_lang_contents(int flags, const char *filename, ...) { - + mpca_grammar_st_t st; mpc_input_t *i; mpc_err_t *err; - + va_list va; FILE *f = fopen(filename, "rb"); - + if (f == NULL) { return mpc_err_fail(filename, mpc_state_new(), "Unable to open file!"); } - + va_start(va, filename); - + st.va = &va; st.parsers_num = 0; st.parsers = NULL; st.flags = flags; - + i = mpc_input_new_file(filename, f); err = mpca_lang_st(i, &st); mpc_input_delete(i); - + free(st.parsers); - va_end(va); - + va_end(va); + fclose(f); - + return err; } diff --git a/source/sclpl/scanner.c b/source/sclpl/scanner.c new file mode 100644 index 0000000..fcfe78f --- /dev/null +++ b/source/sclpl/scanner.c @@ -0,0 +1,123 @@ +#include "scanner.h" +#include +#include +#include +#include +#include +#include + +static void scanner_getline(scanner_t* p_scanner); +static void scanner_skip_ws(scanner_t* p_scanner); +static char scanner_current(scanner_t* p_scanner); +static bool scanner_oneof(scanner_t* p_scanner, const char* p_set); +static char* scanner_dup(scanner_t* p_scanner, size_t start_idx, size_t len); + +scanner_t* scanner_new(char* p_prompt, FILE* p_file) { + scanner_t* p_scanner = (scanner_t*)malloc(sizeof(scanner_t)); + p_scanner->p_line = NULL; + p_scanner->index = 0; + p_scanner->p_input = p_file; + p_scanner->p_prompt = p_prompt; + return p_scanner; +} + +char* scanner_read(scanner_t* p_scanner) { + char* p_tok = NULL; + scanner_skip_ws(p_scanner); + if (!scanner_eof(p_scanner)) { + if (scanner_oneof(p_scanner, "()[]{};,'\"")) { + p_tok = scanner_dup(p_scanner, p_scanner->index, 1); + p_scanner->index++; + } else { + size_t start = p_scanner->index; + while(!scanner_oneof(p_scanner," \t\r\n()[];,'\"")) { + p_scanner->index++; + } + p_tok = scanner_dup(p_scanner, start, p_scanner->index - start); + } + } + return p_tok; +} + +bool scanner_eof(scanner_t* p_scanner) +{ + return (scanner_eol(p_scanner) && feof(p_scanner->p_input)); +} + +bool scanner_eol(scanner_t* p_scanner) +{ + bool ret = true; + size_t index = p_scanner->index; + char ch; + while((NULL != p_scanner->p_line) && ('\0' != (ch = p_scanner->p_line[index]))) { + if((' '!=ch) && ('\t'!=ch) && ('\r'!=ch) && ('\n'!=ch)) { + ret = false; + break; + } + index++; + } + return ret; +} + +static void scanner_getline(scanner_t* p_scanner) { + int c; + size_t capacity = 8; + size_t index = 0; + /* Reallocate and clear the line buffer */ + p_scanner->p_line = realloc(p_scanner->p_line, capacity); + p_scanner->p_line[0] = '\0'; + p_scanner->index = 0; + + /* If we have not yet reached the end of the file, read the next line */ + if (!scanner_eof(p_scanner)) { + if (NULL != p_scanner->p_prompt) + printf("%s", p_scanner->p_prompt); + while(('\n' != (c = fgetc(p_scanner->p_input))) && (EOF != c)) { + if (index+2 == capacity) { + capacity <<= 1u; + p_scanner->p_line = realloc(p_scanner->p_line, capacity); + } + p_scanner->p_line[index++] = c; + } + p_scanner->p_line[index++] = (c == EOF) ? '\0' : c; + p_scanner->p_line[index++] = '\0'; + p_scanner->index = 0; + } +} + +static void scanner_skip_ws(scanner_t* p_scanner) { + /* If we haven't read a line yet, read one now */ + if (NULL == p_scanner->p_line) + scanner_getline(p_scanner); + while('\0' == scanner_current(p_scanner) || scanner_oneof(p_scanner, " \t\r\n")) { + if ('\0' == scanner_current(p_scanner) && !scanner_eof(p_scanner)) { + scanner_getline(p_scanner); + } else { + p_scanner->index++; + } + } +} + +static char scanner_current(scanner_t* p_scanner) { + return p_scanner->p_line[p_scanner->index]; +} + +static bool scanner_oneof(scanner_t* p_scanner, const char* p_set) { + bool ret = false; + size_t sz = strlen(p_set); + for (size_t idx = 0; idx < sz; idx++) { + if (scanner_current(p_scanner) == p_set[idx]) { + ret = true; + break; + } + } + return ret; +} + +static char* scanner_dup(scanner_t* p_scanner, size_t start_idx, size_t len) { + char* p_str = (char*)malloc(len+1); + memcpy(p_str, &(p_scanner->p_line[start_idx]), len); + p_str[len] = '\0'; + return p_str; +} + diff --git a/source/sclpl/scanner.h b/source/sclpl/scanner.h new file mode 100644 index 0000000..0cc14f1 --- /dev/null +++ b/source/sclpl/scanner.h @@ -0,0 +1,28 @@ +/** + @file scanner.h + @brief TODO: Describe this file + $Revision$ + $HeadURL$ + */ +#ifndef SCANNER_H +#define SCANNER_H + +#include +#include + +typedef struct { + char* p_line; + size_t index; + FILE* p_input; + char* p_prompt; +} scanner_t; + +scanner_t* scanner_new(char* p_prompt, FILE* p_file); + +char* scanner_read(scanner_t* p_scanner); + +bool scanner_eof(scanner_t* p_scanner); + +bool scanner_eol(scanner_t* p_scanner); + +#endif /* SCANNER_H */ -- 2.52.0