(define (dlang/tokenize in)
(let ((ch (buf-lookahead! in 1)))
(cond
+ ; End of Input reached
+ ((eof-object? ch) ch)
+
; Whitespace
((char-whitespace? ch)
- (dlang/whitespace in)
- (dlang/tokenize in))
+ (dlang/whitespace in))
; Comment
((char=? ch #\#)
- (dlang/comment in)
- (dlang/tokenize in))
+ (dlang/comment in))
; Number
((or
(dlang/number in))
; Character
- ((char=? ch #\') (dlang/character in ""))
+ ((char=? ch #\') (dlang/character in))
; String
- ((char=? ch #\") (dlang/string in ""))
+ ((char=? ch #\") (dlang/string in))
; Symbol
- ((char=? ch #\$) (dlang/symbol in ""))
+ ((char=? ch #\$) (dlang/symbol in))
; Parentheses
((char=? ch #\()
(define (dlang/whitespace in)
(while (char-whitespace? (buf-lookahead! in 1))
- (buf-consume! in)))
+ (buf-consume! in))
+ (dlang/tokenize in))
(define (dlang/comment in)
- (match in #\#)
- (while (not (char=? (buf-lookahead! in) #\newline))
- (buf-consume! in)))
+ (char-match in #\#)
+ (while (and (not (char=? (buf-lookahead! in 1) #\newline))
+ (not (eof-object? (buf-lookahead! in 1))))
+ (buf-consume! in))
+ (dlang/tokenize in))
(define (dlang/number in)
(token 'number
(define (dlang/character in)
(token 'character
(string-append
- (string (match in #\'))
+ (string (char-match in #\'))
(string (buf-consume! in))
- (string (match in #\')) )))
+ (string (char-match in #\')) )))
(define (dlang/string in)
(token 'string
(string-append
- (string (match in #\"))
- (accumulate-till in string-append "" #\")
- (string (match in #\")) )))
+ (string (char-match in #\"))
+ ;(accumulate-till in string-append "" #\")
+ (string (char-match in #\")) )))
(define (dlang/symbol in)
(token 'symbol
(ch (buf-lookahead! in 1)))
(if
(and (not (char-whitespace? ch))
- (not (eof-object? ch)))
+ (not (eof-object? ch))
+ (not (char=? ch #\#)))
(loop (string-append acc (string (buf-consume! in))) (buf-lookahead! in 1))
(if (> (string-length acc) 0)
(token 'id acc)
; dlang/tokenize
;------------------------------------------------------------------------------
+(def-test "dlang/tokenize should recognize whitespace"
+ (call-with-input-string " \t\r\n"
+ (lambda (input)
+ (define buffer (buf input read-char))
+ (eof-object? (dlang/tokenize buffer)))))
+
+(def-test "dlang/tokenize should recognize a comment"
+ (call-with-input-string "# foo"
+ (lambda (input)
+ (define buffer (buf input read-char))
+ (eof-object? (dlang/tokenize buffer)))))
+
+(def-test "dlang/tokenize should recognize a number"
+ (call-with-input-string "12"
+ (lambda (input)
+ (define buffer (buf input read-char))
+ (define result (dlang/tokenize buffer))
+ (and (token? result)
+ (equal? 'number (token-type result))
+ (equal? "12" (token-text result))))))
+
+(def-test "dlang/tokenize should recognize a character"
+ (call-with-input-string "'a'"
+ (lambda (input)
+ (define buffer (buf input read-char))
+ (define result (dlang/tokenize buffer))
+ (and (token? result)
+ (equal? 'character (token-type result))
+ (equal? "'a'" (token-text result))))))
+
+(def-test "dlang/tokenize should recognize a string"
+ (call-with-input-string "\"\""
+ (lambda (input)
+ (define buffer (buf input read-char))
+ (define result (dlang/tokenize buffer))
+ (and (token? result)
+ (equal? 'string (token-type result))
+ (equal? "\"\"" (token-text result))))))
+
+(def-test "dlang/tokenize should recognize a symbol"
+ (call-with-input-string "$foobar"
+ (lambda (input)
+ (define buffer (buf input read-char))
+ (define result (dlang/tokenize buffer))
+ (and (token? result)
+ (equal? 'symbol (token-type result))
+ (equal? "$foobar" (token-text result))))))
+
+(def-test "dlang/tokenize should recognize an id"
+ (call-with-input-string "foobar"
+ (lambda (input)
+ (define buffer (buf input read-char))
+ (define result (dlang/tokenize buffer))
+ (and (token? result)
+ (equal? 'id (token-type result))
+ (equal? "foobar" (token-text result))))))
+
+(def-test "dlang/tokenize should recognize the EOF"
+ (call-with-input-string ""
+ (lambda (input)
+ (define buffer (buf input read-char))
+ (eof-object? (dlang/tokenize buffer)))))
+
+(def-test "dlang/tokenize should recognize a left parenthese"
+ (call-with-input-string "("
+ (lambda (input)
+ (define buffer (buf input read-char))
+ (define result (dlang/tokenize buffer))
+ (and (token? result)
+ (equal? 'lpar (token-type result))
+ (equal? "(" (token-text result))))))
+
+(def-test "dlang/tokenize should recognize a right parenthese"
+ (call-with-input-string ")"
+ (lambda (input)
+ (define buffer (buf input read-char))
+ (define result (dlang/tokenize buffer))
+ (and (token? result)
+ (equal? 'rpar (token-type result))
+ (equal? ")" (token-text result))))))
; dlang/whitespace
;------------------------------------------------------------------------------
(def-test "dlang/whitespace should recognize and consume whitespace"
- (call-with-input-string " \t\r\na"
- (lambda (input) '())))
+ (call-with-input-string " \t\r\n"
+ (lambda (input)
+ (define buffer (buf input read-char))
+ (eof-object? (dlang/whitespace buffer)))))
+
+(def-test "dlang/whitespace continue parsing after whitespace"
+ (call-with-input-string " \t\r\nfoo"
+ (lambda (input)
+ (define buffer (buf input read-char))
+ (define result (dlang/whitespace buffer))
+ (and (token? result)
+ (equal? 'id (token-type result))
+ (equal? "foo" (token-text result))))))
; dlang/comment
;------------------------------------------------------------------------------
(def-test "dlang/comment should recognize comments with windows style line endings"
(call-with-input-string "# foo\r\n"
- (lambda (input) '())))
+ (lambda (input)
+ (define buffer (buf input read-char))
+ (eof-object? (dlang/comment buffer)))))
(def-test "dlang/comment should recognize comments with unix style line endings"
(call-with-input-string "# foo\n"
- (lambda (input) '())))
+ (lambda (input)
+ (define buffer (buf input read-char))
+ (eof-object? (dlang/comment buffer)))))
(def-test "dlang/comment should recognize an empty comment"
(call-with-input-string "#\n"
- (lambda (input) '())))
+ (lambda (input)
+ (define buffer (buf input read-char))
+ (eof-object? (dlang/comment buffer)))))
(def-test "dlang/comment should recognize comment at EOF"
(call-with-input-string "#"
- (lambda (input) '())))
+ (lambda (input)
+ (define buffer (buf input read-char))
+ (eof-object? (dlang/comment buffer)))))
+
+(def-test "dlang/comment should continue parsing after a comment"
+ (call-with-input-string "# foo\r\nbar"
+ (lambda (input)
+ (define buffer (buf input read-char))
+ (define result (dlang/comment buffer))
+ (and (token? result)
+ (equal? 'id (token-type result))
+ (equal? "bar" (token-text result))))))
; dlang/number
;------------------------------------------------------------------------------
(check-error "An Id was expected but none found."
(dlang/id buffer)))))
-(def-test "dlang/stop recognition when comment encountered"
+(def-test "dlang/id should stop recognition when comment encountered"
(call-with-input-string "foo#"
- (lambda (input) '())))
+ (lambda (input)
+ (define buffer (buf input read-char))
+ (define result (dlang/id buffer))
+ (and (token? result)
+ (equal? 'id (token-type result))
+ (equal? "foo" (token-text result))))))