From fe8fca408d38ff8d64aefcc71b5a87a06f36b01a Mon Sep 17 00:00:00 2001
From: "Mike D. Lowis" <mike@mdlowis.com>
Date: Wed, 11 Jul 2012 12:16:46 -0400
Subject: [PATCH] Fixed bug in while loop and added tests for tokenize,
 whitespace, and comment

---
 inc/loop.scm         |   7 ++-
 source/lexer.scm     |  39 +++++++------
 tests/test_lexer.scm | 129 ++++++++++++++++++++++++++++++++++++++++---
 3 files changed, 147 insertions(+), 28 deletions(-)

diff --git a/inc/loop.scm b/inc/loop.scm
index 5f8ec26..a49428f 100644
--- a/inc/loop.scm
+++ b/inc/loop.scm
@@ -11,9 +11,10 @@
 (define-syntax while
   (syntax-rules ()
     ((_ cnd body ...)
-      (let loop ()
-        body ...
-        (if cnd (loop))))))
+      (if cnd
+        (let loop ()
+          body ...
+          (if cnd (loop)))))))
 
 ; Until loop macro
 (define-syntax until
diff --git a/source/lexer.scm b/source/lexer.scm
index 3803851..862151f 100644
--- a/source/lexer.scm
+++ b/source/lexer.scm
@@ -6,15 +6,16 @@
 (define (dlang/tokenize in)
   (let ((ch (buf-lookahead! in 1)))
     (cond
+      ; End of Input reached
+      ((eof-object? ch) ch)
+
       ; Whitespace
       ((char-whitespace? ch)
-       (dlang/whitespace in)
-       (dlang/tokenize in))
+       (dlang/whitespace in))
 
       ; Comment
       ((char=? ch #\#)
-       (dlang/comment in)
-       (dlang/tokenize in))
+       (dlang/comment in))
 
       ; Number
       ((or
@@ -23,13 +24,13 @@
        (dlang/number in))
 
       ; Character
-      ((char=? ch #\') (dlang/character in ""))
+      ((char=? ch #\') (dlang/character in))
 
       ; String
-      ((char=? ch #\") (dlang/string in ""))
+      ((char=? ch #\") (dlang/string in))
 
       ; Symbol
-      ((char=? ch #\$) (dlang/symbol in ""))
+      ((char=? ch #\$) (dlang/symbol in))
 
       ; Parentheses
       ((char=? ch #\()
@@ -43,12 +44,15 @@
 
 (define (dlang/whitespace in)
   (while (char-whitespace? (buf-lookahead! in 1))
-    (buf-consume! in)))
+    (buf-consume! in))
+  (dlang/tokenize in))
 
 (define (dlang/comment in)
-  (match in #\#)
-  (while (not (char=? (buf-lookahead! in) #\newline))
-    (buf-consume! in)))
+  (char-match in #\#)
+  (while (and (not (char=? (buf-lookahead! in 1) #\newline))
+              (not (eof-object? (buf-lookahead! in 1))))
+    (buf-consume! in))
+  (dlang/tokenize in))
 
 (define (dlang/number in)
   (token 'number
@@ -90,16 +94,16 @@
 (define (dlang/character in)
   (token 'character
     (string-append
-      (string (match in #\'))
+      (string (char-match in #\'))
       (string (buf-consume! in))
-      (string (match in #\')) )))
+      (string (char-match in #\')) )))
 
 (define (dlang/string in)
   (token 'string
     (string-append
-      (string (match in #\"))
-      (accumulate-till in string-append "" #\")
-      (string (match in #\")) )))
+      (string (char-match in #\"))
+      ;(accumulate-till in string-append "" #\")
+      (string (char-match in #\")) )))
 
 (define (dlang/symbol in)
   (token 'symbol
@@ -112,7 +116,8 @@
              (ch  (buf-lookahead! in 1)))
     (if
       (and (not (char-whitespace? ch))
-           (not (eof-object? ch)))
+           (not (eof-object? ch))
+           (not (char=? ch #\#)))
       (loop (string-append acc (string (buf-consume! in))) (buf-lookahead! in 1))
       (if (> (string-length acc) 0)
         (token 'id acc)
diff --git a/tests/test_lexer.scm b/tests/test_lexer.scm
index 50470dc..80a7c96 100644
--- a/tests/test_lexer.scm
+++ b/tests/test_lexer.scm
@@ -4,30 +4,138 @@
 
 ; dlang/tokenize
 ;------------------------------------------------------------------------------
+(def-test "dlang/tokenize should recognize whitespace"
+  (call-with-input-string " \t\r\n"
+    (lambda (input)
+      (define buffer (buf input read-char))
+      (eof-object? (dlang/tokenize buffer)))))
+
+(def-test "dlang/tokenize should recognize a comment"
+  (call-with-input-string "# foo"
+    (lambda (input)
+      (define buffer (buf input read-char))
+      (eof-object? (dlang/tokenize buffer)))))
+
+(def-test "dlang/tokenize should recognize a number"
+  (call-with-input-string "12"
+    (lambda (input)
+      (define buffer (buf input read-char))
+      (define result (dlang/tokenize buffer))
+      (and (token? result)
+           (equal? 'number (token-type result))
+           (equal? "12" (token-text result))))))
+
+(def-test "dlang/tokenize should recognize a character"
+  (call-with-input-string "'a'"
+    (lambda (input)
+      (define buffer (buf input read-char))
+      (define result (dlang/tokenize buffer))
+      (and (token? result)
+           (equal? 'character (token-type result))
+           (equal? "'a'" (token-text result))))))
+
+(def-test "dlang/tokenize should recognize a string"
+  (call-with-input-string "\"\""
+    (lambda (input)
+      (define buffer (buf input read-char))
+      (define result (dlang/tokenize buffer))
+      (and (token? result)
+           (equal? 'string (token-type result))
+           (equal? "\"\"" (token-text result))))))
+
+(def-test "dlang/tokenize should recognize a symbol"
+  (call-with-input-string "$foobar"
+    (lambda (input)
+      (define buffer (buf input read-char))
+      (define result (dlang/tokenize buffer))
+      (and (token? result)
+           (equal? 'symbol (token-type result))
+           (equal? "$foobar" (token-text result))))))
+
+(def-test "dlang/tokenize should recognize an id"
+  (call-with-input-string "foobar"
+    (lambda (input)
+      (define buffer (buf input read-char))
+      (define result (dlang/tokenize buffer))
+      (and (token? result)
+           (equal? 'id (token-type result))
+           (equal? "foobar" (token-text result))))))
+
+(def-test "dlang/tokenize should recognize the EOF"
+  (call-with-input-string ""
+    (lambda (input)
+      (define buffer (buf input read-char))
+      (eof-object? (dlang/tokenize buffer)))))
+
+(def-test "dlang/tokenize should recognize a left parenthese"
+  (call-with-input-string "("
+    (lambda (input)
+      (define buffer (buf input read-char))
+      (define result (dlang/tokenize buffer))
+      (and (token? result)
+           (equal? 'lpar (token-type result))
+           (equal? "(" (token-text result))))))
+
+(def-test "dlang/tokenize should recognize a right parenthese"
+  (call-with-input-string ")"
+    (lambda (input)
+      (define buffer (buf input read-char))
+      (define result (dlang/tokenize buffer))
+      (and (token? result)
+           (equal? 'rpar (token-type result))
+           (equal? ")" (token-text result))))))
 
 ; dlang/whitespace
 ;------------------------------------------------------------------------------
 (def-test "dlang/whitespace should recognize and consume whitespace"
-  (call-with-input-string " \t\r\na"
-    (lambda (input) '())))
+  (call-with-input-string " \t\r\n"
+    (lambda (input)
+      (define buffer (buf input read-char))
+      (eof-object? (dlang/whitespace buffer)))))
+
+(def-test "dlang/whitespace continue parsing after whitespace"
+  (call-with-input-string " \t\r\nfoo"
+    (lambda (input)
+      (define buffer (buf input read-char))
+      (define result (dlang/whitespace buffer))
+      (and (token? result)
+           (equal? 'id (token-type result))
+           (equal? "foo" (token-text result))))))
 
 ; dlang/comment
 ;------------------------------------------------------------------------------
 (def-test "dlang/comment should recognize comments with windows style line endings"
   (call-with-input-string "# foo\r\n"
-    (lambda (input) '())))
+    (lambda (input)
+      (define buffer (buf input read-char))
+      (eof-object? (dlang/comment buffer)))))
 
 (def-test "dlang/comment should recognize comments with unix style line endings"
   (call-with-input-string "# foo\n"
-    (lambda (input) '())))
+    (lambda (input)
+      (define buffer (buf input read-char))
+      (eof-object? (dlang/comment buffer)))))
 
 (def-test "dlang/comment should recognize an empty comment"
   (call-with-input-string "#\n"
-    (lambda (input) '())))
+    (lambda (input)
+      (define buffer (buf input read-char))
+      (eof-object? (dlang/comment buffer)))))
 
 (def-test "dlang/comment should recognize comment at EOF"
   (call-with-input-string "#"
-    (lambda (input) '())))
+    (lambda (input)
+      (define buffer (buf input read-char))
+      (eof-object? (dlang/comment buffer)))))
+
+(def-test "dlang/comment should continue parsing after a comment"
+  (call-with-input-string "# foo\r\nbar"
+    (lambda (input)
+      (define buffer (buf input read-char))
+      (define result (dlang/comment buffer))
+      (and (token? result)
+           (equal? 'id (token-type result))
+           (equal? "bar" (token-text result))))))
 
 ; dlang/number
 ;------------------------------------------------------------------------------
@@ -396,7 +504,12 @@
       (check-error "An Id was expected but none found."
         (dlang/id buffer)))))
 
-(def-test "dlang/stop recognition when comment encountered"
+(def-test "dlang/id should stop recognition when comment encountered"
   (call-with-input-string "foo#"
-    (lambda (input) '())))
+    (lambda (input)
+      (define buffer (buf input read-char))
+      (define result (dlang/id buffer))
+      (and (token? result)
+           (equal? 'id (token-type result))
+           (equal? "foo" (token-text result))))))
 
-- 
2.52.0