From fe8fca408d38ff8d64aefcc71b5a87a06f36b01a Mon Sep 17 00:00:00 2001 From: "Mike D. Lowis" Date: Wed, 11 Jul 2012 12:16:46 -0400 Subject: [PATCH] Fixed bug in while loop and added tests for tokenize, whitespace, and comment --- inc/loop.scm | 7 ++- source/lexer.scm | 39 +++++++------ tests/test_lexer.scm | 129 ++++++++++++++++++++++++++++++++++++++++--- 3 files changed, 147 insertions(+), 28 deletions(-) diff --git a/inc/loop.scm b/inc/loop.scm index 5f8ec26..a49428f 100644 --- a/inc/loop.scm +++ b/inc/loop.scm @@ -11,9 +11,10 @@ (define-syntax while (syntax-rules () ((_ cnd body ...) - (let loop () - body ... - (if cnd (loop)))))) + (if cnd + (let loop () + body ... + (if cnd (loop))))))) ; Until loop macro (define-syntax until diff --git a/source/lexer.scm b/source/lexer.scm index 3803851..862151f 100644 --- a/source/lexer.scm +++ b/source/lexer.scm @@ -6,15 +6,16 @@ (define (dlang/tokenize in) (let ((ch (buf-lookahead! in 1))) (cond + ; End of Input reached + ((eof-object? ch) ch) + ; Whitespace ((char-whitespace? ch) - (dlang/whitespace in) - (dlang/tokenize in)) + (dlang/whitespace in)) ; Comment ((char=? ch #\#) - (dlang/comment in) - (dlang/tokenize in)) + (dlang/comment in)) ; Number ((or @@ -23,13 +24,13 @@ (dlang/number in)) ; Character - ((char=? ch #\') (dlang/character in "")) + ((char=? ch #\') (dlang/character in)) ; String - ((char=? ch #\") (dlang/string in "")) + ((char=? ch #\") (dlang/string in)) ; Symbol - ((char=? ch #\$) (dlang/symbol in "")) + ((char=? ch #\$) (dlang/symbol in)) ; Parentheses ((char=? ch #\() @@ -43,12 +44,15 @@ (define (dlang/whitespace in) (while (char-whitespace? (buf-lookahead! in 1)) - (buf-consume! in))) + (buf-consume! in)) + (dlang/tokenize in)) (define (dlang/comment in) - (match in #\#) - (while (not (char=? (buf-lookahead! in) #\newline)) - (buf-consume! in))) + (char-match in #\#) + (while (and (not (char=? (buf-lookahead! in 1) #\newline)) + (not (eof-object? (buf-lookahead! in 1)))) + (buf-consume! in)) + (dlang/tokenize in)) (define (dlang/number in) (token 'number @@ -90,16 +94,16 @@ (define (dlang/character in) (token 'character (string-append - (string (match in #\')) + (string (char-match in #\')) (string (buf-consume! in)) - (string (match in #\')) ))) + (string (char-match in #\')) ))) (define (dlang/string in) (token 'string (string-append - (string (match in #\")) - (accumulate-till in string-append "" #\") - (string (match in #\")) ))) + (string (char-match in #\")) + ;(accumulate-till in string-append "" #\") + (string (char-match in #\")) ))) (define (dlang/symbol in) (token 'symbol @@ -112,7 +116,8 @@ (ch (buf-lookahead! in 1))) (if (and (not (char-whitespace? ch)) - (not (eof-object? ch))) + (not (eof-object? ch)) + (not (char=? ch #\#))) (loop (string-append acc (string (buf-consume! in))) (buf-lookahead! in 1)) (if (> (string-length acc) 0) (token 'id acc) diff --git a/tests/test_lexer.scm b/tests/test_lexer.scm index 50470dc..80a7c96 100644 --- a/tests/test_lexer.scm +++ b/tests/test_lexer.scm @@ -4,30 +4,138 @@ ; dlang/tokenize ;------------------------------------------------------------------------------ +(def-test "dlang/tokenize should recognize whitespace" + (call-with-input-string " \t\r\n" + (lambda (input) + (define buffer (buf input read-char)) + (eof-object? (dlang/tokenize buffer))))) + +(def-test "dlang/tokenize should recognize a comment" + (call-with-input-string "# foo" + (lambda (input) + (define buffer (buf input read-char)) + (eof-object? (dlang/tokenize buffer))))) + +(def-test "dlang/tokenize should recognize a number" + (call-with-input-string "12" + (lambda (input) + (define buffer (buf input read-char)) + (define result (dlang/tokenize buffer)) + (and (token? result) + (equal? 'number (token-type result)) + (equal? "12" (token-text result)))))) + +(def-test "dlang/tokenize should recognize a character" + (call-with-input-string "'a'" + (lambda (input) + (define buffer (buf input read-char)) + (define result (dlang/tokenize buffer)) + (and (token? result) + (equal? 'character (token-type result)) + (equal? "'a'" (token-text result)))))) + +(def-test "dlang/tokenize should recognize a string" + (call-with-input-string "\"\"" + (lambda (input) + (define buffer (buf input read-char)) + (define result (dlang/tokenize buffer)) + (and (token? result) + (equal? 'string (token-type result)) + (equal? "\"\"" (token-text result)))))) + +(def-test "dlang/tokenize should recognize a symbol" + (call-with-input-string "$foobar" + (lambda (input) + (define buffer (buf input read-char)) + (define result (dlang/tokenize buffer)) + (and (token? result) + (equal? 'symbol (token-type result)) + (equal? "$foobar" (token-text result)))))) + +(def-test "dlang/tokenize should recognize an id" + (call-with-input-string "foobar" + (lambda (input) + (define buffer (buf input read-char)) + (define result (dlang/tokenize buffer)) + (and (token? result) + (equal? 'id (token-type result)) + (equal? "foobar" (token-text result)))))) + +(def-test "dlang/tokenize should recognize the EOF" + (call-with-input-string "" + (lambda (input) + (define buffer (buf input read-char)) + (eof-object? (dlang/tokenize buffer))))) + +(def-test "dlang/tokenize should recognize a left parenthese" + (call-with-input-string "(" + (lambda (input) + (define buffer (buf input read-char)) + (define result (dlang/tokenize buffer)) + (and (token? result) + (equal? 'lpar (token-type result)) + (equal? "(" (token-text result)))))) + +(def-test "dlang/tokenize should recognize a right parenthese" + (call-with-input-string ")" + (lambda (input) + (define buffer (buf input read-char)) + (define result (dlang/tokenize buffer)) + (and (token? result) + (equal? 'rpar (token-type result)) + (equal? ")" (token-text result)))))) ; dlang/whitespace ;------------------------------------------------------------------------------ (def-test "dlang/whitespace should recognize and consume whitespace" - (call-with-input-string " \t\r\na" - (lambda (input) '()))) + (call-with-input-string " \t\r\n" + (lambda (input) + (define buffer (buf input read-char)) + (eof-object? (dlang/whitespace buffer))))) + +(def-test "dlang/whitespace continue parsing after whitespace" + (call-with-input-string " \t\r\nfoo" + (lambda (input) + (define buffer (buf input read-char)) + (define result (dlang/whitespace buffer)) + (and (token? result) + (equal? 'id (token-type result)) + (equal? "foo" (token-text result)))))) ; dlang/comment ;------------------------------------------------------------------------------ (def-test "dlang/comment should recognize comments with windows style line endings" (call-with-input-string "# foo\r\n" - (lambda (input) '()))) + (lambda (input) + (define buffer (buf input read-char)) + (eof-object? (dlang/comment buffer))))) (def-test "dlang/comment should recognize comments with unix style line endings" (call-with-input-string "# foo\n" - (lambda (input) '()))) + (lambda (input) + (define buffer (buf input read-char)) + (eof-object? (dlang/comment buffer))))) (def-test "dlang/comment should recognize an empty comment" (call-with-input-string "#\n" - (lambda (input) '()))) + (lambda (input) + (define buffer (buf input read-char)) + (eof-object? (dlang/comment buffer))))) (def-test "dlang/comment should recognize comment at EOF" (call-with-input-string "#" - (lambda (input) '()))) + (lambda (input) + (define buffer (buf input read-char)) + (eof-object? (dlang/comment buffer))))) + +(def-test "dlang/comment should continue parsing after a comment" + (call-with-input-string "# foo\r\nbar" + (lambda (input) + (define buffer (buf input read-char)) + (define result (dlang/comment buffer)) + (and (token? result) + (equal? 'id (token-type result)) + (equal? "bar" (token-text result)))))) ; dlang/number ;------------------------------------------------------------------------------ @@ -396,7 +504,12 @@ (check-error "An Id was expected but none found." (dlang/id buffer))))) -(def-test "dlang/stop recognition when comment encountered" +(def-test "dlang/id should stop recognition when comment encountered" (call-with-input-string "foo#" - (lambda (input) '()))) + (lambda (input) + (define buffer (buf input read-char)) + (define result (dlang/id buffer)) + (and (token? result) + (equal? 'id (token-type result)) + (equal? "foo" (token-text result)))))) -- 2.52.0