From a9427d2ec5fefacacb649c5be128c3b869403101 Mon Sep 17 00:00:00 2001 From: Jesse McDonald Date: Wed, 26 May 2010 21:53:14 -0500 Subject: [PATCH] In-VM reader for high-level Scheme syntax, initial revision. Currently supports booleans, lists/pairs, fixnums (incl. 0x, 0b, 0, #x, #d, #o, and #b radix prefixes), basic byte strings, boxes, weak boxes, script headers (#!), and end-of-line comments. TODO: Floating-point, vectors, structs, symbols, cyclic references. --- src/lib/vectors.rls | 11 + src/reader.rls | 620 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 631 insertions(+) create mode 100644 src/lib/vectors.rls create mode 100644 src/reader.rls diff --git a/src/lib/vectors.rls b/src/lib/vectors.rls new file mode 100644 index 0000000..9051b7b --- /dev/null +++ b/src/lib/vectors.rls @@ -0,0 +1,11 @@ +(define (vector-eq? v1 v2) + (letrec ([iter (lambda (i) + (if (fix>= i (vector-size v1)) + (fix>= i (vector-size v2)) + (if (fix>= i (vector-size v2)) + #f + (if (eq? (vector-ref v1 i) + (vector-ref v2 i)) + (iter (fix+ i 1)) + #f))))]) + (iter 0))) diff --git a/src/reader.rls b/src/reader.rls new file mode 100644 index 0000000..ee2364e --- /dev/null +++ b/src/reader.rls @@ -0,0 +1,620 @@ +(define (make-structure-type supers nslots callable) + (let ([struct (make-struct structure)]) + (struct-set! struct 0 supers) + (struct-set! struct 1 nslots) + (struct-set! struct 2 callable) + struct)) + +(define s:symbol (make-structure-type '() 1 #f)) +(define *symbols* '()) + +(define (make-symbol name) + (let ([sym (make-struct s:symbol)]) + (struct-set! sym 0 name) + sym)) + +(define (symbol-name sym) + (struct-ref sym 0)) + +(define (findf fn lst) + (if (pair? lst) + (let ([x (car lst)]) + (if (fn x) + x + (findf fn (cdr lst)))) + #f)) + +(define (memq val lst) + (if (pair? lst) + (if (eq? (car lst) val) + lst + (memq val (cdr lst))) + #f)) + +(define (memq? val lst) + (and (memq val lst) #t)) + +(define (find-symbol name) + (findf (lambda (x) (byte-string= (symbol-name x) name)) *symbols*)) + +(define (intern name) + (let ([sym (find-symbol name)]) + (or sym + (let ([sym (make-symbol name)]) + (set! *symbols* (cons sym *symbols*)) + sym)))) + +(define (read-from-fd fd) + (let/cc toplevel-return + (let ([weak-list '()] + [line 1] + [column 0] + [eof? #f] + current-char) + + (define (read-one-value) + (skip-whitespace) + (when eof? (unexpected-eof)) + (cond + [(eq? current-char #\#) + (read-special)] + [(eq? current-char #\() + (read-list)] + [(or (eq? current-char #\-) + (eq? current-char #\+) + (numeric-char? current-char)) + (read-number)] + [(eq? current-char #\") + (read-string)] + [(symbol-char? current-char) + (read-symbol)] + [else + (unexpected-char)])) + + (define (read-special) + (next-char) + (when eof? (unexpected-eof)) + + (cond + [(eq? current-char #\!) + (unless (and (eq? line 1) (eq? column 2)) (unexpected-char)) + (define (skip-until-newline) + (next-char) + (unless (or eof? (eq? current-char #\Newline)) + (skip-until-newline))) + (skip-until-newline) + (read-one-value)] + [(memq? current-char '(#\F #\f)) + (next-char) + (when (symbol-char? current-char) (unexpected-char)) + #f] + [(memq? current-char '(#\T #\t)) + (next-char) + (when (symbol-char? current-char) (unexpected-char)) + #t] + [(eq? current-char #\&) + (read-box)] + [(eq? current-char #\() + (read-vector)] + [(memq? current-char '(#\S #\s)) + (next-char) + (unless (eq? current-char #\() (unexpected-char)) + (read-struct)] + [(memq? current-char '(#\W #\w)) + (next-char) + (unless (eq? current-char #\&) (unexpected-char)) + (read-weak-box)] + [(memq? current-char '(#\X #\x)) + (next-char) + (read-fixnum 16)] + [(memq? current-char '(#\D #\d)) + (next-char) + (read-fixnum 10)] + [(memq? current-char '(#\O #\o)) + (next-char) + (read-fixnum 8)] + [(memq? current-char '(#\B #\b)) + (next-char) + (read-fixnum 2)] + [else (unexpected-char)])) + + (define (read-list) + (define (read-rest) + (skip-whitespace) + (cond + [eof? + (unexpected-eof)] + [(eq? current-char #\.) + (next-char) + (let ([lstcdr (read-one-value)]) + (skip-whitespace) + (unless (eq? current-char #\)) (unexpected-char)) + (next-char) + lstcdr)] + [(eq? current-char #\)) + (next-char) + '()] + [else + (cons (read-one-value) (read-rest))])) + + (next-char) + (if (eq? current-char #\)) + '() + (cons (read-one-value) (read-rest)))) + + (define (read-fixnum [radix #f]) + (define neg? (eq? current-char #\-)) + + (when (or neg? (eq? current-char #\+)) + (next-char)) + + (unless radix + (unless (numeric-char? current-char) (unexpected-char)) + (if (eq? current-char #\0) + (begin + (next-char) + (cond + [(memq? current-char '(#\X #\x)) + (next-char) + (set! radix 16)] + [(memq? current-char '(#\B #\b)) + (next-char) + (set! radix 2)] + [else + (set! radix 8)]) + (unless (or (eq? radix 8) + (and (alphanumeric-char? current-char) + (fix< (digit->integer current-char) radix))) + (unexpected-char))) + (set! radix 10))) + + (define (iter accum) + (let ([val (digit->integer current-char)]) + (if (and val (fix< val radix)) + (begin + (next-char) + (iter (fix+ (fix* accum radix) val))) + accum))) + + (let ([pos-val (iter 0)]) + (if neg? (fix- pos-val) pos-val))) + + (define (read-number) + (read-fixnum)) + + (define (read-box) + (next-char) + (make-box (read-one-value))) + + (define (read-weak-box) + (next-char) + (let ([val (read-one-value)]) + (set! weak-list (cons val weak-list)) + (make-weak-box val))) + + (define (read-string) + (define (read-chars [accum '()] [len 0]) + (define (read-one-char) + (when (eq? current-char #\\) + (next-char) + (when eof? (unexpected-eof))) + current-char) + + (next-char) + (when eof? (unexpected-eof)) + (if (eq? current-char #\") + (begin + (next-char) + (values accum len)) + (read-chars (cons (read-one-char) accum) (fix+ len 1)))) + + (call-with-values + read-chars + (lambda (revchars len) + (let ([str (make-byte-string len #\Null)]) + (define (iter n rc) + (when (fix>= n 0) + (byte-string-set! str n (car rc)) + (iter (fix- n 1) (cdr rc)))) + (iter (fix- len 1) revchars) + str)))) + +; static value_t read_string(reader_state_t *state) +; { +; char *buffer = (char*)malloc(128); +; size_t buffer_size = 128; +; size_t length = 0; +; value_t value; +; +; release_assert(buffer != NULL); +; release_assert(state->ch == '"'); +; +; next_char(state); +; +; while (state->ch != '"') +; { +; bool skip_ws = false; +; char ch; +; +; release_assert(state->ch != EOF); +; +; if ((buffer_size - length) < 1) +; { +; release_assert(buffer_size <= INT32_MAX / 3); +; buffer_size = (3 * buffer_size) / 2; +; buffer = realloc(buffer, buffer_size); +; release_assert(buffer != NULL); +; } +; +; ch = state->ch; +; next_char(state); +; +; if (ch == '\\') +; { +; switch (state->ch) +; { +; case 'o': +; next_char(state); +; release_assert(('0' <= state->ch) && (state->ch <= '7')); +; /* fall through */ +; case '0' ... '7': +; ch = 0; +; +; /* One to three octal digits */ +; for (int i = 0; i < 3; ++i) +; { +; ch = 8 * ch + (state->ch - '0'); +; next_char(state); +; if ((state->ch < '0') || (state->ch > '7')) +; break; +; } +; break; +; case 'X': +; case 'x': +; ch = 0; +; +; next_char(state); +; release_assert(isxdigit(state->ch)); +; +; /* One or two hex digits */ +; for (int i = 0; i < 2; ++i) +; { +; int n = isdigit(state->ch) +; ? (state->ch - '0') +; : (10 + toupper(state->ch) - 'A'); +; +; ch = 16 * ch + n; +; next_char(state); +; +; if (!isxdigit(state->ch)) +; break; +; } +; break; +; case ' ': +; case '\t': +; case '\v': +; case '\n': +; skip_ws = true; +; break; +; case ';': +; /* Treats everything that follows on the same line as a comment, +; * and additionally skips leading whitespace on the next line. */ +; while (state->ch != '\n') +; { +; release_assert(state->ch != EOF); +; next_char(state); +; } +; skip_ws = true; +; break; +; case '\\': ch = '\\'; next_char(state); break; +; case '\'': ch = '\''; next_char(state); break; +; case '\"': ch = '\"'; next_char(state); break; +; case 'a': ch = '\a'; next_char(state); break; +; case 'b': ch = '\b'; next_char(state); break; +; case 'f': ch = '\f'; next_char(state); break; +; case 'n': ch = '\n'; next_char(state); break; +; case 'r': ch = '\r'; next_char(state); break; +; case 't': ch = '\t'; next_char(state); break; +; case 'v': ch = '\v'; next_char(state); break; +; default: +; release_assert(NOTREACHED("Invalid escape sequence in string.")); +; ch = '#'; +; next_char(state); +; break; +; } +; } +; +; if (skip_ws) +; { +; bool hit_eol = false; +; +; /* Slightly different from the normal skip_whitespace(); skips +; * whitespace through the _second_ EOL following the backslash. */ +; do { +; if (state->ch == '\n') +; { +; if (!hit_eol) +; { +; hit_eol = true; +; } +; else +; { +; next_char(state); +; break; +; } +; } +; +; next_char(state); +; } while (isspace(state->ch)); +; } +; else +; { +; buffer[length++] = ch; +; } +; } +; +; next_char(state); +; +; value = make_byte_string(length, '\0'); +; memcpy(_get_byte_string(value)->bytes, buffer, length); +; free(buffer); +; +; return value; +; } + + (define (read-vector) undefined) + (define (read-struct) undefined) + (define (read-symbol) undefined) + + (define (skip-whitespace) + (cond + [(whitespace? current-char) + (next-char) + (skip-whitespace)] + [(eq? current-char #\;) + (define (skip-until-newline) + (next-char) + (if (eq? current-char #\Newline) + (next-char) + (unless eof? (skip-until-newline)))) + (skip-until-newline)])) + + (define (next-char) + (if eof? + #f + (let* ([str (make-byte-string 1 0)] + [res (posix-read fd str 1)]) + (if (eq? res 1) + (let ([ch (byte-string-ref str 0)]) + (set! current-char ch) + (if (fix= ch #\Newline) + (begin + (set! line (fix+ line 1)) + (set! column 0)) + (set! column (fix+ column 1))) + ch) + (begin + (set! current-char #f) + (set! eof? #t) + #f))))) + + (define (unexpected-eof) + (toplevel-return "unexpected-eof")) + + (define (unexpected-char) + (toplevel-return "unexpected-char" current-char (list line column))) + + (next-char) + (values + (read-one-value) + current-char)))) + +(define (whitespace? ch) + (memq? ch '(#\Space #\Tab #\VTab #\Page #\Newline))) + +(define (numeric-char? ch) + (and (fix>= ch #\0) (fix<= ch #\9))) + +(define (upcase-char? ch) + (and (fix>= ch #\A) (fix<= ch #\Z))) + +(define (downcase-char? ch) + (and (fix>= ch #\a) (fix<= ch #\z))) + +(define (alphabetic-char? ch) + (or (upcase-char? ch) (downcase-char? ch))) + +(define (alphanumeric-char? ch) + (or (numeric-char? ch) (alphabetic-char? ch))) + +(define (symbol-char? ch) + (or (alphanumeric-char? ch) + (memq? ch '(#\! #\$ #\% #\& #\* #\+ + #\- #\/ #\< #\= #\> #\? + #\@ #\\ #\^ #\_ #\| #\~)))) + +(define (reverse lst [newcdr '()]) + (if (pair? lst) + (reverse (cdr lst) (cons (car lst) newcdr)) + newcdr)) + +(define (list-length lst) + (define (iter n rst) + (if (pair? rst) + (iter (fix+ n 1) (cdr rst)) + n)) + (iter 0 lst)) + +(define (digit->integer ch) + (cond + [(numeric-char? ch) (fix- ch #\0)] + [(upcase-char? ch) (fix+ 10 (fix- ch #\A))] + [(downcase-char? ch) (fix+ 10 (fix- ch #\a))] + [else #f])) + +(read-from-fd 0) + +; static value_t read_number(reader_state_t *state) +; { +; bool negative = false; +; fixnum_t num = 0; +; native_float_t flt; +; int radix; +; +; if (state->ch == '-') +; { +; negative = true; +; next_char(state); +; } +; else if (state->ch == '+') +; { +; next_char(state); +; } +; +; release_assert(isdigit(state->ch)); +; +; if (state->ch == '0') +; { +; next_char(state); +; +; switch (state->ch) +; { +; case 'X': +; case 'x': +; radix = 16; +; next_char(state); +; break; +; case 'B': +; case 'b': +; radix = 2; +; next_char(state); +; break; +; case '0' ... '9': +; radix = 8; +; break; +; default: +; radix = 10; +; break; +; } +; +; if (radix != 10) +; { +; release_assert(isalnum(state->ch)); +; release_assert(char_to_digit(state->ch) < radix); +; } +; } +; else +; { +; radix = 10; +; } +; +; while (isalnum(state->ch)) +; { +; fixnum_t digit = char_to_digit(state->ch); +; +; if (digit >= radix) +; break; +; +; release_assert(num <= (FIXNUM_MAX/radix)); +; +; num *= radix; +; num += digit; +; +; next_char(state); +; } +; +; if ((radix != 10) || ((state->ch != '.') && (state->ch != 'E') && (state->ch != 'e'))) +; { +; if (negative) +; num = -num; +; release_assert(!issymbol(state->ch)); +; release_assert((FIXNUM_MIN <= num) && (num <= FIXNUM_MAX)); +; return fixnum_value(num); +; } +; +; /* +; * Floating-point. No guarantees as to precision... really should use binary/hex. +; */ +; +; flt = num; +; +; if (state->ch == '.') +; { +; next_char(state); +; +; for (native_float_t pv = negative ? -0.1 : 0.1; isdigit(state->ch); pv /= 10) +; { +; flt += (state->ch - '0') * pv; +; next_char(state); +; } +; } +; +; if ((state->ch == 'E') || (state->ch == 'e')) +; { +; next_char(state); +; num = read_fixnum(state, 10); +; flt *= pow(10, _get_fixnum(num)); +; } +; +; if (negative) +; flt = -flt; +; +; release_assert(!issymbol(state->ch)); +; return make_float(flt); +; } +; +; static value_t read_vector(reader_state_t *state) +; { +; gc_root_t list_root; +; size_t length = 0; +; value_t value; +; value_t item; +; +; register_gc_root(&list_root, read_list(state)); +; +; for (value_t item = list_root.value; !is_nil(item); item = CDR(item)) +; ++length; +; +; value = make_vector(length, UNDEFINED); +; +; item = list_root.value; +; for (size_t i = 0; i < length; ++i) +; { +; _get_vector(value)->elements[i] = _CAR(item); +; /* No write barrier needed here. */ +; item = _CDR(item); +; } +; +; unregister_gc_root(&list_root); +; +; return value; +; } +; +; static value_t read_struct(reader_state_t *state) +; { +; gc_root_t list_root; +; size_t slots = 0; +; value_t value; +; value_t item; +; +; register_gc_root(&list_root, read_list(state)); +; +; for (item = CDR(list_root.value); !is_nil(item); item = CDR(item)) +; ++slots; +; +; value = make_struct(_CAR(list_root.value), slots); +; +; item = _CDR(list_root.value); +; for (size_t i = 0; i < slots; ++i) +; { +; _get_struct(value)->slots[i] = _CAR(item); +; /* No write barrier needed here. */ +; item = _CDR(item); +; } +; +; unregister_gc_root(&list_root); +; +; return value; +; } + +; vim:set syntax=scheme sw=2 expandtab: