From: Mike D. Lowis Date: Tue, 2 Oct 2012 16:38:45 +0000 (-0400) Subject: Updated buffer position data to be more accurate X-Git-Url: https://git.mdlowis.com/?a=commitdiff_plain;h=3d8a5b983843c0d90afb4aadb6d25532457b3eb9;p=archive%2Fdlang-scm.git Updated buffer position data to be more accurate --- diff --git a/source/buf.scm b/source/buf.scm index 3a0e3f2..a882937 100644 --- a/source/buf.scm +++ b/source/buf.scm @@ -11,9 +11,9 @@ (define (buf? obj) (and (buf-struct? obj) (procedure? (buf-ldfn obj)) - (integer? (buf-pos obj)) - (list? (buf-marks obj)) - (vector? (buf-data obj)))) + (integer? (buf-pos obj)) + (list? (buf-marks obj)) + (vector? (buf-data obj)))) (define (vector-append v1 v2 . vN) (define new-vec (list->vector (append (vector->list v1) (vector->list v2)))) diff --git a/source/lexer.scm b/source/lexer.scm index 4297207..f25ad0e 100644 --- a/source/lexer.scm +++ b/source/lexer.scm @@ -7,7 +7,7 @@ (buf (charport input) charport-read)) (define (dlang/tokenize in) - (define location (buf-posdata in)) + (define location (current-buf-posdata in)) (let ((ch (buf-lookahead! in 1))) (define tok (cond @@ -71,7 +71,7 @@ (not (chobj-char=? (buf-lookahead! in 1) #\newline)))) (define (dlang/number in) - (define location (buf-posdata in)) + (define location (current-buf-posdata in)) (token 'number (string-append (if (chobj-char=? (buf-lookahead! in 1) #\-) @@ -109,18 +109,18 @@ (dlang/integer in))) (define (dlang/character in) - (define location (buf-posdata in)) + (define location (current-buf-posdata in)) (token 'character (string-append (string (char-match in #\')) (if (eof-object? (buf-lookahead! in 1)) - (abort "Unexpected EOF while parsing character literal") - (string (chobj-char (buf-consume! in)))) + (abort "Unexpected EOF while parsing character literal") + (string (chobj-char (buf-consume! in)))) (string (char-match in #\'))) location)) (define (dlang/string in) - (define location (buf-posdata in)) + (define location (current-buf-posdata in)) (define text (string-append (string (char-match in #\")) @@ -135,7 +135,7 @@ (not (chobj-char=? ch #\")))) (define (dlang/symbol in) - (define location (buf-posdata in)) + (define location (current-buf-posdata in)) (token 'symbol (string-append (string (char-match in #\$)) @@ -143,17 +143,17 @@ location)) (define (dlang/id in) - (define location (buf-posdata in)) + (define location (current-buf-posdata in)) (define str (collect-char in dlang/id-char?)) (if (> (string-length str) 0) - (token 'id str location) - (abort "An Id was expected but none found."))) + (token 'id str location) + (abort "An Id was expected but none found."))) (define (dlang/id-char? in) (define ch (buf-lookahead! in 1)) (and (not (eof-object? ch)) (not (chobj-whitespace? ch)) (case (chobj-char ch) - ((#\( #\) #\; #\, #\' #\" #\$ #\#) #f) - (else #t)))) + ((#\( #\) #\; #\, #\' #\" #\$ #\#) #f) + (else #t)))) diff --git a/source/parse-utils.scm b/source/parse-utils.scm index 0beea80..eefd59d 100644 --- a/source/parse-utils.scm +++ b/source/parse-utils.scm @@ -57,6 +57,7 @@ (define (charport-read chprt) (define ch (read-char (charport-port chprt))) + (define pos (charport-posdata chprt)) (cond ((eof-object? ch)) ; Do nothing for EOFs ((char=? ch #\newline) @@ -64,7 +65,7 @@ (charport-column-set! chprt 1)) (else (charport-column-set! chprt (+ 1 (charport-column chprt))))) - (if (eof-object? ch) ch (chobj ch (charport-posdata chprt)))) + (if (eof-object? ch) ch (chobj ch pos))) (define (charport-posdata chprt) (posdata @@ -72,6 +73,14 @@ (charport-line chprt) (charport-column chprt))) +(define (current-buf-posdata in) + (define itm (buf-lookahead! in 1)) + (cond + ((eof-object? itm) (buf-posdata in)) + ((token? itm) (token-pos itm)) + ((chobj? itm) (chobj-pos itm)) + (else (abort "Argument was not a buf or a charport")))) + (define (buf-posdata in) (cond ((buf? in) (buf-posdata (buf-src in))) diff --git a/source/parser.scm b/source/parser.scm index d9f326c..cf7f2f8 100644 --- a/source/parser.scm +++ b/source/parser.scm @@ -34,12 +34,12 @@ (define (dlang/expression in) (if (dlang/core-form? in) - (dlang/core-form in) - (let ((result (dlang/basic-expr in))) - (if (dlang/arg-list? in) - (syntree 'apply "" - (append (list result) (syntree-children (dlang/arg-list in)))) - result)))) + (dlang/core-form in) + (let ((result (dlang/basic-expr in))) + (if (dlang/arg-list? in) + (syntree 'apply "" + (append (list result) (syntree-children (dlang/arg-list in)))) + result)))) (define (dlang/core-form in) (define tok (buf-lookahead! in 1)) @@ -55,11 +55,7 @@ (define (dlang/core-form? in) (define tok (buf-lookahead! in 1)) (define text (if (eof-object? tok) "" (token-text tok))) - (or (string=? text "def") - (string=? text "set!") - (string=? text "if") - (string=? text "begin") - (string=? text "func"))) + (list? (member text '("def" "set!" "if" "begin" "func")))) (define (dlang/define in) (define node '()) @@ -139,13 +135,9 @@ (define (dlang/literal in) (define tok (buf-lookahead! in 1)) (define type (if (eof-object? tok) '() (token-type tok))) - (if (or (equal? 'id type) - (equal? 'character type) - (equal? 'string type) - (equal? 'symbol type) - (equal? 'number type)) - (set! tok (buf-consume! in)) - (abort "Expected a literal")) + (if (list? (member type '(id character string symbol number))) + (set! tok (buf-consume! in)) + (abort "Expected a literal")) (syntree (token-type tok) (token-text tok) '())) (define (dlang/arg-list in) @@ -182,8 +174,7 @@ (define (dlang/expr-block in term) (syntree 'block "" - (collect - in + (collect in (lambda (buf) (not (token-matches? buf term))) dlang/expression))) diff --git a/tests/test_lexer.scm b/tests/test_lexer.scm index 5c11374..818c4a3 100644 --- a/tests/test_lexer.scm +++ b/tests/test_lexer.scm @@ -52,7 +52,7 @@ (define buffer (dlang/char-buf input)) (define result (dlang/tokenize buffer)) (token=? result - (token 'number "12" (posdata "(string)" 1 2)))))) + (token 'number "12" (posdata "(string)" 1 1)))))) (def-test "dlang/tokenize should recognize a character" (call-with-input-string "'a'" @@ -60,7 +60,7 @@ (define buffer (dlang/char-buf input)) (define result (dlang/tokenize buffer)) (token=? result - (token 'character "'a'" (posdata "(string)" 1 2)))))) + (token 'character "'a'" (posdata "(string)" 1 1)))))) (def-test "dlang/tokenize should recognize a string" (call-with-input-string "\"\"" @@ -68,7 +68,7 @@ (define buffer (dlang/char-buf input)) (define result (dlang/tokenize buffer)) (token=? result - (token 'string "\"\"" (posdata "(string)" 1 2)))))) + (token 'string "\"\"" (posdata "(string)" 1 1)))))) (def-test "dlang/tokenize should recognize a symbol" (call-with-input-string "$foobar" @@ -76,7 +76,7 @@ (define buffer (dlang/char-buf input)) (define result (dlang/tokenize buffer)) (token=? result - (token 'symbol "$foobar" (posdata "(string)" 1 2)))))) + (token 'symbol "$foobar" (posdata "(string)" 1 1)))))) (def-test "dlang/tokenize should recognize an id" (call-with-input-string "foobar" @@ -84,7 +84,7 @@ (define buffer (dlang/char-buf input)) (define result (dlang/tokenize buffer)) (token=? result - (token 'id "foobar" (posdata "(string)" 1 2)))))) + (token 'id "foobar" (posdata "(string)" 1 1)))))) (def-test "dlang/tokenize should recognize the EOF" (call-with-input-string "" @@ -130,7 +130,7 @@ (define buffer (dlang/char-buf input)) (define result (dlang/tokenize buffer)) (token=? result - (token 'term "end" (posdata "(string)" 1 2)))))) + (token 'term "end" (posdata "(string)" 1 1)))))) ; dlang/whitespace ;------------------------------------------------------------------------------ @@ -146,7 +146,7 @@ (define buffer (dlang/char-buf input)) (define result (dlang/whitespace buffer)) (token=? result - (token 'id "foo" (posdata "(string)" 2 2)))))) + (token 'id "foo" (posdata "(string)" 2 1)))))) ; dlang/comment ;------------------------------------------------------------------------------ @@ -180,8 +180,7 @@ (define buffer (dlang/char-buf input)) (define result (dlang/comment buffer)) (token=? result - (token 'id "bar" (posdata "(string)" 2 2))) - ))) + (token 'id "bar" (posdata "(string)" 2 1)))))) ; dlang/number ;------------------------------------------------------------------------------ diff --git a/tests/test_parse_utils.scm b/tests/test_parse_utils.scm index 2450e09..3554595 100644 --- a/tests/test_parse_utils.scm +++ b/tests/test_parse_utils.scm @@ -150,7 +150,7 @@ (lambda (input) (define port (charport input)) (define result (charport-read port)) - (and (chobj=? (chobj #\a (posdata "(string)" 1 2)) result) + (and (chobj=? (chobj #\a (posdata "(string)" 1 1)) result) (equal? 1 (charport-line port)) (equal? 2 (charport-column port)))))) @@ -159,7 +159,7 @@ (lambda (input) (define port (charport input)) (define result (charport-read port)) - (and (chobj=? (chobj #\newline (posdata "(string)" 2 1)) result) + (and (chobj=? (chobj #\newline (posdata "(string)" 1 1)) result) (equal? 2 (charport-line port)) (equal? 1 (charport-column port)))))) @@ -240,7 +240,7 @@ (define buffer (dlang/lexer input)) (token=? (token-match buffer 'id) - (token 'id "a" (posdata "(string)" 1 2)))))) + (token 'id "a" (posdata "(string)" 1 1)))))) (def-test "token-match should error when EOF received" (call-with-input-string "" @@ -284,7 +284,7 @@ (define buffer (dlang/lexer input)) (token=? (keyword-match buffer "abc") - (token 'id "abc" (posdata "(string)" 1 2)))))) + (token 'id "abc" (posdata "(string)" 1 1)))))) (def-test "keyword-match should error if next token not an id" (call-with-input-string "1.0" @@ -355,7 +355,7 @@ (lambda (input) (define buffer (buf (charport input) charport-read)) (consume-all buffer dlang/integer?) - (chobj=? (chobj #\a (posdata "(string)" 1 2)) + (chobj=? (chobj #\a (posdata "(string)" 1 1)) (buf-lookahead! buffer 1))))) (def-test "should consume an item at a time until predicate returns false" @@ -363,7 +363,7 @@ (lambda (input) (define buffer (buf (charport input) charport-read)) (consume-all buffer dlang/integer?) - (chobj=? (chobj #\a (posdata "(string)" 1 5)) + (chobj=? (chobj #\a (posdata "(string)" 1 4)) (buf-lookahead! buffer 1))))) ; collect