From cc9ca06c34b120fc14c614f8deb850a611b6fc1a Mon Sep 17 00:00:00 2001 From: "Mike D. Lowis" Date: Thu, 2 Aug 2012 14:46:35 -0400 Subject: [PATCH] Added tests for location metadta in tokens --- source/lexer.scm | 24 ++-- tests/test_lexer.scm | 237 +++++++++++++++---------------------- tests/test_parse_utils.scm | 2 +- 3 files changed, 112 insertions(+), 151 deletions(-) diff --git a/source/lexer.scm b/source/lexer.scm index 8fc71ec..b020306 100644 --- a/source/lexer.scm +++ b/source/lexer.scm @@ -7,6 +7,7 @@ (buf (charport input) charport-read)) (define (dlang/tokenize in) + (define location (buf-posdata in)) (let ((ch (buf-lookahead! in 1))) (define tok (cond @@ -38,13 +39,13 @@ ; Punctuation and Parens ((char=? ch #\() - (token 'lpar (string (buf-consume! in)) (buf-posdata in))) + (token 'lpar (string (buf-consume! in)) location)) ((char=? ch #\)) - (token 'rpar (string (buf-consume! in)) (buf-posdata in))) + (token 'rpar (string (buf-consume! in)) location)) ((char=? ch #\,) - (token 'comma (string (buf-consume! in)) (buf-posdata in))) + (token 'comma (string (buf-consume! in)) location)) ((char=? ch #\;) - (token 'term (string (buf-consume! in)) (buf-posdata in))) + (token 'term (string (buf-consume! in)) location)) ; Id (else @@ -71,6 +72,7 @@ (not (eof-object? (buf-lookahead! in 1))))) (define (dlang/number in) + (define location (buf-posdata in)) (token 'number (string-append (if (char=? #\- (buf-lookahead! in 1)) @@ -81,7 +83,7 @@ (if (or (char=? (buf-lookahead! in 1) #\e) (char=? (buf-lookahead! in 1) #\E)) (dlang/exponent in) "")) - (buf-posdata in))) + location)) (define (dlang/integer in) (if (and @@ -108,6 +110,7 @@ (dlang/integer in))) (define (dlang/character in) + (define location (buf-posdata in)) (token 'character (string-append (string (char-match in #\')) @@ -115,15 +118,16 @@ (abort "Unexpected EOF while parsing character literal") (string (buf-consume! in))) (string (char-match in #\'))) - (buf-posdata in))) + location)) (define (dlang/string in) + (define location (buf-posdata in)) (define text (string-append (string (char-match in #\")) (collect-char in dlang/string-char?) (string (char-match in #\")))) - (token 'string text (buf-posdata in))) + (token 'string text location)) (define (dlang/string-char? in) (define ch (buf-lookahead! in 1)) @@ -132,16 +136,18 @@ (not (char=? #\" ch)))) (define (dlang/symbol in) + (define location (buf-posdata in)) (token 'symbol (string-append (string (char-match in #\$)) (token-text (dlang/id in))) - (buf-posdata in))) + location)) (define (dlang/id in) + (define location (buf-posdata in)) (define str(collect-char in dlang/id-char?)) (if (> (string-length str) 0) - (token 'id str (buf-posdata in)) + (token 'id str location) (abort "An Id was expected but none found."))) (define (dlang/id-char? in) diff --git a/tests/test_lexer.scm b/tests/test_lexer.scm index bb60137..3c9702e 100644 --- a/tests/test_lexer.scm +++ b/tests/test_lexer.scm @@ -51,45 +51,40 @@ (lambda (input) (define buffer (dlang/char-buf input)) (define result (dlang/tokenize buffer)) - (and (token? result) - (equal? 'number (token-type result)) - (equal? "12" (token-text result)))))) + (token=? result + (token 'number "12" (posdata "(string)" 1 2)))))) (def-test "dlang/tokenize should recognize a character" (call-with-input-string "'a'" (lambda (input) (define buffer (dlang/char-buf input)) (define result (dlang/tokenize buffer)) - (and (token? result) - (equal? 'character (token-type result)) - (equal? "'a'" (token-text result)))))) + (token=? result + (token 'character "'a'" (posdata "(string)" 1 2)))))) (def-test "dlang/tokenize should recognize a string" (call-with-input-string "\"\"" (lambda (input) (define buffer (dlang/char-buf input)) (define result (dlang/tokenize buffer)) - (and (token? result) - (equal? 'string (token-type result)) - (equal? "\"\"" (token-text result)))))) + (token=? result + (token 'string "\"\"" (posdata "(string)" 1 2)))))) (def-test "dlang/tokenize should recognize a symbol" (call-with-input-string "$foobar" (lambda (input) (define buffer (dlang/char-buf input)) (define result (dlang/tokenize buffer)) - (and (token? result) - (equal? 'symbol (token-type result)) - (equal? "$foobar" (token-text result)))))) + (token=? result + (token 'symbol "$foobar" (posdata "(string)" 1 2)))))) (def-test "dlang/tokenize should recognize an id" (call-with-input-string "foobar" (lambda (input) (define buffer (dlang/char-buf input)) (define result (dlang/tokenize buffer)) - (and (token? result) - (equal? 'id (token-type result)) - (equal? "foobar" (token-text result)))))) + (token=? result + (token 'id "foobar" (posdata "(string)" 1 2)))))) (def-test "dlang/tokenize should recognize the EOF" (call-with-input-string "" @@ -102,45 +97,40 @@ (lambda (input) (define buffer (dlang/char-buf input)) (define result (dlang/tokenize buffer)) - (and (token? result) - (equal? 'lpar (token-type result)) - (equal? "(" (token-text result)))))) + (token=? result + (token 'lpar "(" (posdata "(string)" 1 1)))))) (def-test "dlang/tokenize should recognize a right parenthese" (call-with-input-string ")" (lambda (input) (define buffer (dlang/char-buf input)) (define result (dlang/tokenize buffer)) - (and (token? result) - (equal? 'rpar (token-type result)) - (equal? ")" (token-text result)))))) + (token=? result + (token 'rpar ")" (posdata "(string)" 1 1)))))) (def-test "dlang/tokenize should recognize a comma" (call-with-input-string "," (lambda (input) (define buffer (dlang/char-buf input)) (define result (dlang/tokenize buffer)) - (and (token? result) - (equal? 'comma (token-type result)) - (equal? "," (token-text result)))))) + (token=? result + (token 'comma "," (posdata "(string)" 1 1)))))) (def-test "dlang/tokenize should recognize a semicolon" (call-with-input-string ";" (lambda (input) (define buffer (dlang/char-buf input)) (define result (dlang/tokenize buffer)) - (and (token? result) - (equal? 'term (token-type result)) - (equal? ";" (token-text result)))))) + (token=? result + (token 'term ";" (posdata "(string)" 1 1)))))) (def-test "dlang/tokenize should recognize the end keyword" (call-with-input-string "end" (lambda (input) (define buffer (dlang/char-buf input)) (define result (dlang/tokenize buffer)) - (and (token? result) - (equal? 'term (token-type result)) - (equal? "end" (token-text result)))))) + (token=? result + (token 'term "end" (posdata "(string)" 1 2)))))) ; dlang/whitespace ;------------------------------------------------------------------------------ @@ -155,9 +145,9 @@ (lambda (input) (define buffer (dlang/char-buf input)) (define result (dlang/whitespace buffer)) - (and (token? result) - (equal? 'id (token-type result)) - (equal? "foo" (token-text result)))))) + (token=? result + (token 'id "foo" (posdata "(string)" 2 2))) + ))) ; dlang/comment ;------------------------------------------------------------------------------ @@ -190,9 +180,9 @@ (lambda (input) (define buffer (dlang/char-buf input)) (define result (dlang/comment buffer)) - (and (token? result) - (equal? 'id (token-type result)) - (equal? "bar" (token-text result)))))) + (token=? result + (token 'id "bar" (posdata "(string)" 2 2))) + ))) ; dlang/number ;------------------------------------------------------------------------------ @@ -201,108 +191,96 @@ (lambda (input) (define buffer (dlang/char-buf input)) (define result (dlang/number buffer)) - (and (token? result) - (equal? 'number (token-type result)) - (equal? "1" (token-text result)))))) + (token=? result + (token 'number "1" (posdata "(string)" 1 1)))))) (def-test "dlang/number should recognize a negative integer" (call-with-input-string "-1" (lambda (input) (define buffer (dlang/char-buf input)) (define result (dlang/number buffer)) - (and (token? result) - (equal? 'number (token-type result)) - (equal? "-1" (token-text result)))))) + (token=? result + (token 'number "-1" (posdata "(string)" 1 1)))))) (def-test "dlang/number should recognize a positive float" (call-with-input-string "1.1" (lambda (input) (define buffer (dlang/char-buf input)) (define result (dlang/number buffer)) - (and (token? result) - (equal? 'number (token-type result)) - (equal? "1.1" (token-text result)))))) + (token=? result + (token 'number "1.1" (posdata "(string)" 1 1)))))) (def-test "dlang/number should recognize a negative float" (call-with-input-string "-1.1" (lambda (input) (define buffer (dlang/char-buf input)) (define result (dlang/number buffer)) - (and (token? result) - (equal? 'number (token-type result)) - (equal? "-1.1" (token-text result)))))) + (token=? result + (token 'number "-1.1" (posdata "(string)" 1 1)))))) (def-test "dlang/number should recognize a positive integer with positive exponent" (call-with-input-string "1e1" (lambda (input) (define buffer (dlang/char-buf input)) (define result (dlang/number buffer)) - (and (token? result) - (equal? 'number (token-type result)) - (equal? "1e1" (token-text result)))))) + (token=? result + (token 'number "1e1" (posdata "(string)" 1 1)))))) (def-test "dlang/number should recognize a positive integer with negative exponent" (call-with-input-string "1e-1" (lambda (input) (define buffer (dlang/char-buf input)) (define result (dlang/number buffer)) - (and (token? result) - (equal? 'number (token-type result)) - (equal? "1e-1" (token-text result)))))) + (token=? result + (token 'number "1e-1" (posdata "(string)" 1 1)))))) (def-test "dlang/number should recognize a positive float with positive exponent" (call-with-input-string "1.1e1" (lambda (input) (define buffer (dlang/char-buf input)) (define result (dlang/number buffer)) - (and (token? result) - (equal? 'number (token-type result)) - (equal? "1.1e1" (token-text result)))))) + (token=? result + (token 'number "1.1e1" (posdata "(string)" 1 1)))))) (def-test "dlang/number should recognize a positive float with negative exponent" (call-with-input-string "1.1e-1" (lambda (input) (define buffer (dlang/char-buf input)) (define result (dlang/number buffer)) - (and (token? result) - (equal? 'number (token-type result)) - (equal? "1.1e-1" (token-text result)))))) + (token=? result + (token 'number "1.1e-1" (posdata "(string)" 1 1)))))) (def-test "dlang/number should recognize a negative integer with positive exponent" (call-with-input-string "-1e1" (lambda (input) (define buffer (dlang/char-buf input)) (define result (dlang/number buffer)) - (and (token? result) - (equal? 'number (token-type result)) - (equal? "-1e1" (token-text result)))))) + (token=? result + (token 'number "-1e1" (posdata "(string)" 1 1)))))) (def-test "dlang/number should recognize a negative integer with negative exponent" (call-with-input-string "-1e-1" (lambda (input) (define buffer (dlang/char-buf input)) (define result (dlang/number buffer)) - (and (token? result) - (equal? 'number (token-type result)) - (equal? "-1e-1" (token-text result)))))) + (token=? result + (token 'number "-1e-1" (posdata "(string)" 1 1)))))) (def-test "dlang/number should recognize a negative float with positive exponent" (call-with-input-string "-1.1e1" (lambda (input) (define buffer (dlang/char-buf input)) (define result (dlang/number buffer)) - (and (token? result) - (equal? 'number (token-type result)) - (equal? "-1.1e1" (token-text result)))))) + (token=? result + (token 'number "-1.1e1" (posdata "(string)" 1 1)))))) (def-test "dlang/number should recognize a negative float with negative exponent" (call-with-input-string "-1.1e-1" (lambda (input) (define buffer (dlang/char-buf input)) (define result (dlang/number buffer)) - (and (token? result) - (equal? 'number (token-type result)) - (equal? "-1.1e-1" (token-text result)))))) + (token=? result + (token 'number "-1.1e-1" (posdata "(string)" 1 1)))))) ; dlang/integer ;------------------------------------------------------------------------------ @@ -447,9 +425,8 @@ (lambda (input) (define buffer (dlang/char-buf input)) (define result (dlang/character buffer)) - (and (token? result) - (equal? 'character (token-type result)) - (equal? "'a'" (token-text result)))))) + (token=? result + (token 'character "'a'" (posdata "(string)" 1 1)))))) (def-test "dlang/character should error when missing first single quote" (call-with-input-string "a'" @@ -479,36 +456,32 @@ (lambda (input) (define buffer (dlang/char-buf input)) (define result (dlang/string buffer)) - (and (token? result) - (equal? 'string (token-type result)) - (equal? "\"\"" (token-text result)))))) + (token=? result + (token 'string "\"\"" (posdata "(string)" 1 1)))))) (def-test "dlang/string should recognize a string of length 1" (call-with-input-string "\"a\"" (lambda (input) (define buffer (dlang/char-buf input)) (define result (dlang/string buffer)) - (and (token? result) - (equal? 'string (token-type result)) - (equal? "\"a\"" (token-text result)))))) + (token=? result + (token 'string "\"a\"" (posdata "(string)" 1 1)))))) (def-test "dlang/string should recognize a string of length 2" (call-with-input-string "\"ab\"" (lambda (input) (define buffer (dlang/char-buf input)) (define result (dlang/string buffer)) - (and (token? result) - (equal? 'string (token-type result)) - (equal? "\"ab\"" (token-text result)))))) + (token=? result + (token 'string "\"ab\"" (posdata "(string)" 1 1)))))) (def-test "dlang/string should recognize a string of length 3" (call-with-input-string "\"abc\"" (lambda (input) (define buffer (dlang/char-buf input)) (define result (dlang/string buffer)) - (and (token? result) - (equal? 'string (token-type result)) - (equal? "\"abc\"" (token-text result)))))) + (token=? result + (token 'string "\"abc\"" (posdata "(string)" 1 1)))))) (def-test "dlang/string should error when missing first double quote" (call-with-input-string "a\"" @@ -538,45 +511,40 @@ (lambda (input) (define buffer (dlang/char-buf input)) (define result (dlang/symbol buffer)) - (and (token? result) - (equal? 'symbol (token-type result)) - (equal? "$a" (token-text result)))))) + (token=? result + (token 'symbol "$a" (posdata "(string)" 1 1)))))) (def-test "dlang/symbol should recognize a symbol of length two" (call-with-input-string "$ab" (lambda (input) (define buffer (dlang/char-buf input)) (define result (dlang/symbol buffer)) - (and (token? result) - (equal? 'symbol (token-type result)) - (equal? "$ab" (token-text result)))))) + (token=? result + (token 'symbol "$ab" (posdata "(string)" 1 1)))))) (def-test "dlang/symbol should recognize a symbol of length three" (call-with-input-string "$abc" (lambda (input) (define buffer (dlang/char-buf input)) (define result (dlang/symbol buffer)) - (and (token? result) - (equal? 'symbol (token-type result)) - (equal? "$abc" (token-text result)))))) + (token=? result + (token 'symbol "$abc" (posdata "(string)" 1 1)))))) (def-test "dlang/symbol should stop recognition on EOF" (call-with-input-string "$abc" (lambda (input) (define buffer (dlang/char-buf input)) (define result (dlang/symbol buffer)) - (and (token? result) - (equal? 'symbol (token-type result)) - (equal? "$abc" (token-text result)))))) + (token=? result + (token 'symbol "$abc" (posdata "(string)" 1 1)))))) (def-test "dlang/symbol should stop recognition on whitespace" (call-with-input-string "$abc " (lambda (input) (define buffer (dlang/char-buf input)) (define result (dlang/symbol buffer)) - (and (token? result) - (equal? 'symbol (token-type result)) - (equal? "$abc" (token-text result)))))) + (token=? result + (token 'symbol "$abc" (posdata "(string)" 1 1)))))) (def-test "dlang/symbol should error when no name given for a symbol" (call-with-input-string "$" @@ -599,45 +567,40 @@ (lambda (input) (define buffer (dlang/char-buf input)) (define result (dlang/id buffer)) - (and (token? result) - (equal? 'id (token-type result)) - (equal? "a" (token-text result)))))) + (token=? result + (token 'id "a" (posdata "(string)" 1 1)))))) (def-test "dlang/id should recognize an id of length two" (call-with-input-string "ab" (lambda (input) (define buffer (dlang/char-buf input)) (define result (dlang/id buffer)) - (and (token? result) - (equal? 'id (token-type result)) - (equal? "ab" (token-text result)))))) + (token=? result + (token 'id "ab" (posdata "(string)" 1 1)))))) (def-test "dlang/id should recognize an id of length three" (call-with-input-string "abc" (lambda (input) (define buffer (dlang/char-buf input)) (define result (dlang/id buffer)) - (and (token? result) - (equal? 'id (token-type result)) - (equal? "abc" (token-text result)))))) + (token=? result + (token 'id "abc" (posdata "(string)" 1 1)))))) (def-test "dlang/id should stop recognition on whitespace" (call-with-input-string "abc abc" (lambda (input) (define buffer (dlang/char-buf input)) (define result (dlang/id buffer)) - (and (token? result) - (equal? 'id (token-type result)) - (equal? "abc" (token-text result)))))) + (token=? result + (token 'id "abc" (posdata "(string)" 1 1)))))) (def-test "dlang/id should stop recognition on EOF" (call-with-input-string "abc" (lambda (input) (define buffer (dlang/char-buf input)) (define result (dlang/id buffer)) - (and (token? result) - (equal? 'id (token-type result)) - (equal? "abc" (token-text result)))))) + (token=? result + (token 'id "abc" (posdata "(string)" 1 1)))))) (def-test "dlang/id should error when no id recognized" (call-with-input-string "" @@ -651,72 +614,64 @@ (lambda (input) (define buffer (dlang/char-buf input)) (define result (dlang/id buffer)) - (and (token? result) - (equal? 'id (token-type result)) - (equal? "foo" (token-text result)))))) + (token=? result + (token 'id "foo" (posdata "(string)" 1 1)))))) (def-test "dlang/id should stop recognition when left paren encountered" (call-with-input-string "foo(" (lambda (input) (define buffer (dlang/char-buf input)) (define result (dlang/id buffer)) - (and (token? result) - (equal? 'id (token-type result)) - (equal? "foo" (token-text result)))))) + (token=? result + (token 'id "foo" (posdata "(string)" 1 1)))))) (def-test "dlang/id should stop recognition when right paren encountered" (call-with-input-string "foo)" (lambda (input) (define buffer (dlang/char-buf input)) (define result (dlang/id buffer)) - (and (token? result) - (equal? 'id (token-type result)) - (equal? "foo" (token-text result)))))) + (token=? result + (token 'id "foo" (posdata "(string)" 1 1)))))) (def-test "dlang/id should stop recognition when semicolon encountered" (call-with-input-string "foo;" (lambda (input) (define buffer (dlang/char-buf input)) (define result (dlang/id buffer)) - (and (token? result) - (equal? 'id (token-type result)) - (equal? "foo" (token-text result)))))) + (token=? result + (token 'id "foo" (posdata "(string)" 1 1)))))) (def-test "dlang/id should stop recognition when comma encountered" (call-with-input-string "foo," (lambda (input) (define buffer (dlang/char-buf input)) (define result (dlang/id buffer)) - (and (token? result) - (equal? 'id (token-type result)) - (equal? "foo" (token-text result)))))) + (token=? result + (token 'id "foo" (posdata "(string)" 1 1)))))) (def-test "dlang/id should stop recognition when single quote encountered" (call-with-input-string "foo'" (lambda (input) (define buffer (dlang/char-buf input)) (define result (dlang/id buffer)) - (and (token? result) - (equal? 'id (token-type result)) - (equal? "foo" (token-text result)))))) + (token=? result + (token 'id "foo" (posdata "(string)" 1 1)))))) (def-test "dlang/id should stop recognition when double quote encountered" (call-with-input-string "foo\"" (lambda (input) (define buffer (dlang/char-buf input)) (define result (dlang/id buffer)) - (and (token? result) - (equal? 'id (token-type result)) - (equal? "foo" (token-text result)))))) + (token=? result + (token 'id "foo" (posdata "(string)" 1 1)))))) (def-test "dlang/id should stop recognition when dollar sign encountered" (call-with-input-string "foo$" (lambda (input) (define buffer (dlang/char-buf input)) (define result (dlang/id buffer)) - (and (token? result) - (equal? 'id (token-type result)) - (equal? "foo" (token-text result)))))) + (token=? result + (token 'id "foo" (posdata "(string)" 1 1)))))) ; dlang/id-char? ;------------------------------------------------------------------------------ diff --git a/tests/test_parse_utils.scm b/tests/test_parse_utils.scm index 746ba1d..760ca69 100644 --- a/tests/test_parse_utils.scm +++ b/tests/test_parse_utils.scm @@ -282,7 +282,7 @@ (define buffer (dlang/lexer input)) (token=? (keyword-match buffer "abc") - (token 'id "abc" (posdata "(string)" 1 4)))))) + (token 'id "abc" (posdata "(string)" 1 2)))))) (def-test "keyword-match should error if next token not an id" (call-with-input-string "1.0" -- 2.52.0