More elegant tokenizer

This commit is contained in:
2025-10-23 16:36:18 +02:00
parent 83a5208600
commit 9d0016561a

View File

@@ -1,28 +1,24 @@
(import (rnrs))
(define get-word
(lambda (file word)
(lambda (file)
(let ([c (lookahead-char file)])
(if (char-alphabetic? c)
(begin
(get-char file)
(get-word file (string-append word (string c))))
word))))
(cond
[(char-alphabetic? c)
(get-char file)
(string-append (string c) (get-word file))]
[else ""]))))
(define get-words
(lambda (file)
(letrec ([loop
(lambda (file words)
(let ([c (lookahead-char file)])
(cond
[(eof-object? c) words]
[(char-whitespace? c)
(get-char file)
(loop file words)]
[(char-alphabetic? c)
(loop file (cons (get-word file "") words))]
[else (error 'get-words "wtf" c)])))])
(reverse (loop file '())))))
(let ([c (lookahead-char file)])
(cond
[(eof-object? c) '()]
[(char-whitespace? c)
(get-char file)
(get-words file)]
[(char-alphabetic? c)
(cons (get-word file) (get-words file))]))))
(define tokenize
(lambda (filename)