More elegant tokenizer
This commit is contained in:
@@ -1,28 +1,24 @@
|
|||||||
(import (rnrs))
|
(import (rnrs))
|
||||||
|
|
||||||
(define get-word
|
(define get-word
|
||||||
(lambda (file word)
|
(lambda (file)
|
||||||
(let ([c (lookahead-char file)])
|
(let ([c (lookahead-char file)])
|
||||||
(if (char-alphabetic? c)
|
(cond
|
||||||
(begin
|
[(char-alphabetic? c)
|
||||||
(get-char file)
|
(get-char file)
|
||||||
(get-word file (string-append word (string c))))
|
(string-append (string c) (get-word file))]
|
||||||
word))))
|
[else ""]))))
|
||||||
|
|
||||||
(define get-words
|
(define get-words
|
||||||
(lambda (file)
|
(lambda (file)
|
||||||
(letrec ([loop
|
|
||||||
(lambda (file words)
|
|
||||||
(let ([c (lookahead-char file)])
|
(let ([c (lookahead-char file)])
|
||||||
(cond
|
(cond
|
||||||
[(eof-object? c) words]
|
[(eof-object? c) '()]
|
||||||
[(char-whitespace? c)
|
[(char-whitespace? c)
|
||||||
(get-char file)
|
(get-char file)
|
||||||
(loop file words)]
|
(get-words file)]
|
||||||
[(char-alphabetic? c)
|
[(char-alphabetic? c)
|
||||||
(loop file (cons (get-word file "") words))]
|
(cons (get-word file) (get-words file))]))))
|
||||||
[else (error 'get-words "wtf" c)])))])
|
|
||||||
(reverse (loop file '())))))
|
|
||||||
|
|
||||||
(define tokenize
|
(define tokenize
|
||||||
(lambda (filename)
|
(lambda (filename)
|
||||||
|
|||||||
Reference in New Issue
Block a user