diff --git a/fudge-chez.ss b/fudge-chez.ss index bc80ecd..087ce23 100644 --- a/fudge-chez.ss +++ b/fudge-chez.ss @@ -5,24 +5,63 @@ (let ([c (lookahead-char file)]) (cond [(char-alphabetic? c) - (get-char file) - (string-append (string c) (get-word file))] + (get-char file) + (string-append (string c) (get-word file))] [else ""])))) -(define get-words +(define char->digit + (lambda (c) + (- (char->integer c) (char->integer #\0)))) + +; get-number supports only nonnegative integers. +(define get-number + (lambda (file) + (let f ([total 0]) + (let ([c (lookahead-char file)]) + (if (char-numeric? c) + (begin + (get-char file) + (f (+ (* total 10) (char->digit c)))) + total))))) + +(define get-token (lambda (file) (let ([c (lookahead-char file)]) (cond - [(eof-object? c) '()] + [(eof-object? c) 'end-of-file] [(char-whitespace? c) - (get-char file) - (get-words file)] + (get-char file) + (get-token file)] [(char-alphabetic? c) - (cons (get-word file) (get-words file))])))) + (get-word file)] + [(char-numeric? c) + (get-number file)] + [(eq? c #\+) (get-char file) '+] + [(eq? c #\-) (get-char file) '-] + [(eq? c #\*) (get-char file) '*] + [(eq? c #\/) (get-char file) '/] + [(eq? c #\=) (get-char file) #\=] + [else (error 'get-token "Bad character" c)])))) +(define get-tokens + (lambda (file) + (let ([token (get-token file)]) + (if (eq? token 'end-of-file) + '() + (cons token (get-tokens file)))))) + (define tokenize (lambda (filename) (let ([file (open-input-file filename)]) - (get-words file)))) - -(display (tokenize "tokens.txt")) + (get-tokens file)))) + +(define test-lexer + (lambda (input want) + (let ([port (open-string-input-port input)]) + (let ([got (get-tokens port)]) + (unless (eq? got want) + (text-lexer-fail input want got)))))) + +(test-lexer "hello world" '("hello" "world")) +(test-lexer "+ * / -", '(+ * / -)) +(test-lexer "identifier = 14", '("identifier" = 14))