From aba269ee16f04cabede17018af4dd2e3ed84e36b Mon Sep 17 00:00:00 2001 From: "Michael D. Lowis" Date: Fri, 6 Jul 2012 17:56:12 -0400 Subject: [PATCH] First crack at dlang lexer --- Makefile | 4 +-- inc/loop.scm | 2 -- source/buf.scm | 4 ++- source/lex.scm | 15 +++++++++ source/lexer.scm | 82 ++++++++++++++++++++++++++++++++++++++++++++++ tests/test_buf.scm | 12 +++---- 6 files changed, 108 insertions(+), 11 deletions(-) create mode 100644 source/lex.scm create mode 100644 source/lexer.scm diff --git a/Makefile b/Makefile index bbc8363..f39e95a 100644 --- a/Makefile +++ b/Makefile @@ -36,7 +36,7 @@ SRC_OBJS = $(SRC_FILES:%.$(SRC_EXT)=%.o) TEST_OBJS = $(TEST_FILES:%.$(TEST_EXT)=%.o) # Include Directories -SRC_INCS = +SRC_INCS = -I inc TEST_INCS = -I inc # Compiler and Linker Options @@ -70,7 +70,7 @@ $(TEST_RUNNER): $(SRC_OBJS) $(TEST_OBJS) # Object Files $(SRC_OBJS): %.o : %.$(SRC_EXT) @echo $< - @$(CSC) $(CSCFLAGS) -o $@ $< + @$(CSC) $(CSCFLAGS) $(SRC_INCS) -o $@ $< $(TEST_OBJS): %.o : %.$(TEST_EXT) @echo $< diff --git a/inc/loop.scm b/inc/loop.scm index 717c48a..5f8ec26 100644 --- a/inc/loop.scm +++ b/inc/loop.scm @@ -1,5 +1,3 @@ -(declare (unit loops)) - ; For loop macro (define-syntax for (syntax-rules () diff --git a/source/buf.scm b/source/buf.scm index 6bc30e3..d5b7713 100644 --- a/source/buf.scm +++ b/source/buf.scm @@ -61,6 +61,7 @@ (- (+ (buf-pos b) n) 1))) (define (buf-consume! b) + (define current (buf-lookahead! b 1)) (buf-advance! b) (if (and @@ -69,5 +70,6 @@ (begin (buf-pos-set! b 0) (buf-data-set! b (vector)))) - (buf-sync! b 1)) + (buf-sync! b 1) + current) diff --git a/source/lex.scm b/source/lex.scm new file mode 100644 index 0000000..2bad620 --- /dev/null +++ b/source/lex.scm @@ -0,0 +1,15 @@ +(declare (unit lex)) + +(define-record token type text) + +(define token make-token) + +(define (match buf expect) + (define actual (buf-lookahead! buf 1)) + (if (equal? expect actual) + (buf-consume! buf) + (error + (string-append + "Expected '" expect "', received '" actual "'"))) + actual) + diff --git a/source/lexer.scm b/source/lexer.scm new file mode 100644 index 0000000..a9ff5c8 --- /dev/null +++ b/source/lexer.scm @@ -0,0 +1,82 @@ +(declare (unit lexer) + (uses buf)) +(include "loop.scm") + +(define (dlang/tokenize in) + (let ((ch (buf-lookahead! in 1))) + (cond + ; Whitespace + ((char-whitespace? ch) + (dlang/whitespace in) + (dlang/tokenize in)) + + ; Comment + ((char=? ch #\#) + (dlang/comment in) + (dlang/tokenize in)) + + ; Number + ((or + (and (char=? ch #\-) (char-numeric? (buf-lookahead! in 2))) + (char-numeric? ch)) + (dlang/number in)) + + ; Character + ((char=? ch #\') (dlang/character in "")) + + ; String + ((char=? ch #\") (dlang/string in "")) + + ; Symbol + ((char=? ch #\$) (dlang/symbol in "")) + + ; Parentheses + ((char=? ch #\() + (token 'lpar (string (buf-consume! in)))) + ((char=? ch #\)) + (token 'rpar (string (buf-consume! in)))) + + ; Id + (else + (dlang/id in))))) + +(define (dlang/whitespace in) + (while (char-whitespace? (buf-lookahead! in 1)) + (buf-consume! in))) + +(define (dlang/comment in) + (match in #\#) + (while (not (char=? (buf-lookahead! in) #\newline)) + (buf-consume! in))) + +(define (dlang/number in str) 'number) +(define (dlang/character in str) + (token 'character + (string-append + (string (match in #\')) + (string (buf-consume! in)) + (string (match in #\')) ))) + +(define (dlang/string in str) + (token 'string + (string-append + (string (match in #\")) + (accumulate-till in string-append "" #\") + (string (match in #\")) ))) + +(define (dlang/symbol in str) + (token 'symbol + (string-append + (match in #\$) + (token-text (dlang/id in))))) + +(define (dlang/id in) + (let loop ((acc "") + (ch (buf-lookahead! in 1))) + (if + (and (not (char-whitespace? ch)) + (not (eof-object? ch))) + (loop (string-append acc (buf-consume! in)) (buf-lookahead! in 1))) + acc)) + + diff --git a/tests/test_buf.scm b/tests/test_buf.scm index d52bfb3..86e5930 100644 --- a/tests/test_buf.scm +++ b/tests/test_buf.scm @@ -218,8 +218,8 @@ (define buffer (buf input read-char)) (buf-sync! buffer 3) (buf-pos-set! buffer 2) - (buf-consume! buffer) - (and (= 0 (buf-pos buffer)) + (and (char=? #\c (buf-consume! buffer)) + (= 0 (buf-pos buffer)) (= 1 (vector-length (buf-data buffer))))))) (def-test "buf-consume! should NOT clear the buffer if pos not equal to the buffer size" @@ -228,8 +228,8 @@ (define buffer (buf input read-char)) (buf-sync! buffer 3) (buf-pos-set! buffer 1) - (buf-consume! buffer) - (and (= 2 (buf-pos buffer)) + (and (char=? #\b (buf-consume! buffer)) + (= 2 (buf-pos buffer)) (= 3 (vector-length (buf-data buffer))))))) (def-test "buf-consume! should NOT clear the buffer if the buffer is marked" @@ -239,8 +239,8 @@ (buf-sync! buffer 3) (buf-pos-set! buffer 2) (buf-mark! buffer) - (buf-consume! buffer) - (and (= 3 (buf-pos buffer)) + (and (char=? #\c (buf-consume! buffer)) + (= 3 (buf-pos buffer)) (= 4 (vector-length (buf-data buffer))))))) -- 2.54.0