From a1c91a623b813f50e17d6a1988dbedfa0648106a Mon Sep 17 00:00:00 2001 From: Artsiom Dzenisiuk Date: Thu, 23 Oct 2025 16:12:29 +0200 Subject: [PATCH] Tokenizer beginnings --- fudge-chez.ss | 33 ++++++++++++++++++++++++++++++++- sigma | 1 + 2 files changed, 33 insertions(+), 1 deletion(-) create mode 100644 sigma diff --git a/fudge-chez.ss b/fudge-chez.ss index 7a8ecc8..785108b 100644 --- a/fudge-chez.ss +++ b/fudge-chez.ss @@ -1 +1,32 @@ -(print "Hello, world!") +(import (rnrs)) + +(define get-word + (lambda (file word) + (let ([c (lookahead-char file)]) + (if (char-alphabetic? c) + (begin + (get-char file) + (get-word file (string-append word (string c)))) + word)))) + +(define get-words + (lambda (file) + (letrec ([loop + (lambda (file words) + (let ([c (lookahead-char file)]) + (cond + [(eof-object? c) words] + [(char-whitespace? c) + (get-char file) + (loop file words)] + [(char-alphabetic? c) + (loop file (cons (get-word file "") words))] + [else (error 'get-words "wtf" c)])))]) + (reverse (loop file '()))))) + +(define tokenize + (lambda (filename) + (let ([file (open-input-file filename)]) + (get-words file)))) + +(display (tokenize "tokens.txt")) diff --git a/sigma b/sigma new file mode 100644 index 0000000..5dd01c1 --- /dev/null +++ b/sigma @@ -0,0 +1 @@ +Hello, world! \ No newline at end of file