diff --git a/fudge.scm b/fudge.scm new file mode 100644 index 0000000..4a92fe4 --- /dev/null +++ b/fudge.scm @@ -0,0 +1,21 @@ +(import (scheme base) + (scheme write) + (scheme file) + (scheme process-context) + (lexer)) + +(define usage + (lambda () + (display "usage: fudge.scm ") + (newline))) + +(unless (= (length (command-line)) 2) + (usage) + (exit)) + +(define input-string + (read-string + 100000000000000 + (open-input-file (car (cdr (command-line)))))) + +(display (tokenize input-string)) diff --git a/fudge.ss b/fudge.ss deleted file mode 100755 index fc39d49..0000000 --- a/fudge.ss +++ /dev/null @@ -1,140 +0,0 @@ -(import (scheme base) - (scheme file) - (scheme write) - (scheme char)) - -(define-syntax import-scheme - (syntax-rules () - ((_ module ...) - (import (scheme module) ...)))) - -(define keywords - '("true" "false" "struct" "enum" "if" "then" "else" "type" "let")) - -(define keyword? - (lambda (word) - (member word keywords))) - -(define get-word-string - (lambda (file) - (let ((c (peek-char file))) - (cond - ((eof-object? c) "") - ((char-alphabetic? c) - (read-char file) - (string-append (string c) (get-word file))) - (else ""))))) - -(define get-word - (lambda (file) - (let ((word (get-word-string file))) - (if (keyword? word) - (string->symbol word) - word)))) - -(define char->digit - (lambda (c) - (- (char->integer c) (char->integer #\0)))) - -; get-number supports only nonnegative integers. -(define get-number - (lambda (file) - (let f ((total 0)) - (let ((c (peek-char file))) - (cond - ((eof-object? c) total) - ((char-numeric? c) - (read-char file) - (f (+ (* total 10) (char->digit c)))) - (else total)))))) - -(define get-token - (lambda (file) - (let ((c (peek-char file))) - (cond - ((eof-object? c) 'end-of-file) - ((char-whitespace? c) - (read-char file) - (get-token file)) - ((char-alphabetic? c) - (get-word file)) - ((char-numeric? c) - (get-number file)) - ((eq? c #\+) (read-char file) '+) - ((eq? c #\-) (read-char file) '-) - ((eq? c #\*) (read-char file) '*) - ((eq? c #\/) (read-char file) '/) - ((eq? c #\=) (read-char file) '=) - ((eq? c #\() (read-char file) 'left-parenthesis) - ((eq? c #\)) (read-char file) 'right-parenthesis) - ((eq? c #\{) (read-char file) 'left-brace) - ((eq? c #\}) (read-char file) 'right-brace) - ((eq? c #\;) (read-char file) 'semicolon) - ((eq? c #\:) (read-char file) 'colon) - (else (error 'get-token "Bad character" c)))))) - -(define get-tokens - (lambda (file) - (let ((token (get-token file))) - (if (eq? token 'end-of-file) - '() - (cons token (get-tokens file)))))) - -(define tokenize - (lambda (filename) - (let ((file (open-input-file filename))) - (get-tokens file)))) - -(define test-lexer-fail - (lambda (input want got) - (display "text-lexer-fail: input=") - (write input) - (display "; want: ") - (write want) - (display "; got: ") - (write got) - (display ".") - (newline))) - - -(define test-lexer-success - (lambda (input got) - (display "text-lexer-success: input=") - (write input) - (display "; got: ") - (write got) - (display ".") - (newline))) - -(define test-lexer-fail-exception - (lambda (input x) - (display "test-lexer-fail: input ") - (write input) - (display " throws ") - (write (error-object-message x)) - (display ".") - (newline))) - -(define test-lexer - (lambda (input want) - (let ((port (open-input-string input))) - (call/cc - (lambda (i-got-exception) - (with-exception-handler - (lambda (x) - (test-lexer-fail-exception input x) - (i-got-exception x)) - (lambda () - (let ((got (get-tokens port))) - (if (equal? got want) - (test-lexer-success input got) - (test-lexer-fail input want got)))))))))) - -(test-lexer "hello world" '("hello" "world")) -(test-lexer "+ * / - = " '(+ * / - =)) -(test-lexer "( ) { }" '(left-parenthesis right-parenthesis left-brace right-brace)) -(test-lexer "true false" '(true false)) -(test-lexer "if then else" '(if then else)) -(test-lexer ": ;" '(colon semicolon)) -(test-lexer "identifier = 14" '("identifier" = 14)) -(test-lexer "" '()) diff --git a/input.txt b/input.txt new file mode 100644 index 0000000..0a9dd58 --- /dev/null +++ b/input.txt @@ -0,0 +1 @@ +true false Struct diff --git a/lexer-test.scm b/lexer-test.scm new file mode 100644 index 0000000..b4b9900 --- /dev/null +++ b/lexer-test.scm @@ -0,0 +1,19 @@ +(import (scheme base) + (scheme write) + (lexer)) + +(define test + (lambda (input want) + (let ((got (tokenize input))) + (unless (equal? got want) + (display "test fail: input = ") + (display input) + (display "; want = ") + (display want) + (display "; got = ") + (display got) + (newline))))) + +(test "Struct Enum" '(Struct Enum)) +(test "this is identifier" '("this" "is" "identifier")) +(test "+ - = != :=" '(+ - = != :=)) diff --git a/lexer.sld b/lexer.sld new file mode 100644 index 0000000..b20b5da --- /dev/null +++ b/lexer.sld @@ -0,0 +1,72 @@ +(define-library (lexer) + (export tokenize) + (import (scheme base) + (scheme write) + (scheme char)) + (begin + (define-record-type lexer + (make-lexer-with-index input index) + lexer? + (input lexer-input lexer-input-set!) + (index lexer-index lexer-index-set!)) + + (define-record-type lexer-error + (make-lexer-error type message) + lexer-error? + (type lexer-error-type) + (message lexer-error-message) + )) + + (define make-lexer + (lambda (input) + (make-lexer-with-index input 0))) + + (define lexer-end? + (lambda (lexer) + (>= (lexer-index lexer) + (string-length (lexer-input lexer))))) + + (define lexer-peek + (lambda (lexer) + (if (lexer-end? lexer) + (eof-object) + (string-ref (lexer-input lexer) + (lexer-index lexer))))) + + (define lexer-eat! + (lambda (lexer) + (if (lexer-end? lexer) + (eof-object) + (let ((index (lexer-index lexer))) + (lexer-index-set! lexer (+ index 1)) + (string-ref (lexer-input lexer) index))))) + + (define get-token + (lambda (lexer) + (let ((c (lexer-peek lexer))) + (cond + ((eof-object? c) "") + ((char-alphabetic? c) + (lexer-eat! lexer) + (string-append (string c) (get-token lexer))) + (else ""))))) + + (define get-tokens + (lambda (lexer) + (let loop ((tokens #())) + (let ((c (lexer-peek lexer))) + (cond + ((eof-object? c) tokens) + ((char-whitespace? c) + (lexer-eat! lexer) + (loop tokens)) + ((char-alphabetic? c) + (let ((token (get-token lexer))) + (if (lexer-error? token) + token + (loop ))))))) + + + (define tokenize + (lambda (input) + (get-tokens (make-lexer input))))))