In-VM reader for high-level Scheme syntax, initial revision.

Currently supports booleans, lists/pairs, fixnums (incl. 0x, 0b,
0, #x, #d, #o, and #b radix prefixes), basic byte strings, boxes,
weak boxes, script headers (#!), and end-of-line comments.
TODO: Floating-point, vectors, structs, symbols, cyclic references.
This commit is contained in:
Jesse D. McDonald 2010-05-26 21:53:14 -05:00
parent fd62415dee
commit a9427d2ec5
2 changed files with 631 additions and 0 deletions

11
src/lib/vectors.rls Normal file
View File

@ -0,0 +1,11 @@
(define (vector-eq? v1 v2)
(letrec ([iter (lambda (i)
(if (fix>= i (vector-size v1))
(fix>= i (vector-size v2))
(if (fix>= i (vector-size v2))
#f
(if (eq? (vector-ref v1 i)
(vector-ref v2 i))
(iter (fix+ i 1))
#f))))])
(iter 0)))

620
src/reader.rls Normal file
View File

@ -0,0 +1,620 @@
(define (make-structure-type supers nslots callable)
(let ([struct (make-struct structure)])
(struct-set! struct 0 supers)
(struct-set! struct 1 nslots)
(struct-set! struct 2 callable)
struct))
(define s:symbol (make-structure-type '() 1 #f))
(define *symbols* '())
(define (make-symbol name)
(let ([sym (make-struct s:symbol)])
(struct-set! sym 0 name)
sym))
(define (symbol-name sym)
(struct-ref sym 0))
(define (findf fn lst)
(if (pair? lst)
(let ([x (car lst)])
(if (fn x)
x
(findf fn (cdr lst))))
#f))
(define (memq val lst)
(if (pair? lst)
(if (eq? (car lst) val)
lst
(memq val (cdr lst)))
#f))
(define (memq? val lst)
(and (memq val lst) #t))
(define (find-symbol name)
(findf (lambda (x) (byte-string= (symbol-name x) name)) *symbols*))
(define (intern name)
(let ([sym (find-symbol name)])
(or sym
(let ([sym (make-symbol name)])
(set! *symbols* (cons sym *symbols*))
sym))))
(define (read-from-fd fd)
(let/cc toplevel-return
(let ([weak-list '()]
[line 1]
[column 0]
[eof? #f]
current-char)
(define (read-one-value)
(skip-whitespace)
(when eof? (unexpected-eof))
(cond
[(eq? current-char #\#)
(read-special)]
[(eq? current-char #\()
(read-list)]
[(or (eq? current-char #\-)
(eq? current-char #\+)
(numeric-char? current-char))
(read-number)]
[(eq? current-char #\")
(read-string)]
[(symbol-char? current-char)
(read-symbol)]
[else
(unexpected-char)]))
(define (read-special)
(next-char)
(when eof? (unexpected-eof))
(cond
[(eq? current-char #\!)
(unless (and (eq? line 1) (eq? column 2)) (unexpected-char))
(define (skip-until-newline)
(next-char)
(unless (or eof? (eq? current-char #\Newline))
(skip-until-newline)))
(skip-until-newline)
(read-one-value)]
[(memq? current-char '(#\F #\f))
(next-char)
(when (symbol-char? current-char) (unexpected-char))
#f]
[(memq? current-char '(#\T #\t))
(next-char)
(when (symbol-char? current-char) (unexpected-char))
#t]
[(eq? current-char #\&)
(read-box)]
[(eq? current-char #\()
(read-vector)]
[(memq? current-char '(#\S #\s))
(next-char)
(unless (eq? current-char #\() (unexpected-char))
(read-struct)]
[(memq? current-char '(#\W #\w))
(next-char)
(unless (eq? current-char #\&) (unexpected-char))
(read-weak-box)]
[(memq? current-char '(#\X #\x))
(next-char)
(read-fixnum 16)]
[(memq? current-char '(#\D #\d))
(next-char)
(read-fixnum 10)]
[(memq? current-char '(#\O #\o))
(next-char)
(read-fixnum 8)]
[(memq? current-char '(#\B #\b))
(next-char)
(read-fixnum 2)]
[else (unexpected-char)]))
(define (read-list)
(define (read-rest)
(skip-whitespace)
(cond
[eof?
(unexpected-eof)]
[(eq? current-char #\.)
(next-char)
(let ([lstcdr (read-one-value)])
(skip-whitespace)
(unless (eq? current-char #\)) (unexpected-char))
(next-char)
lstcdr)]
[(eq? current-char #\))
(next-char)
'()]
[else
(cons (read-one-value) (read-rest))]))
(next-char)
(if (eq? current-char #\))
'()
(cons (read-one-value) (read-rest))))
(define (read-fixnum [radix #f])
(define neg? (eq? current-char #\-))
(when (or neg? (eq? current-char #\+))
(next-char))
(unless radix
(unless (numeric-char? current-char) (unexpected-char))
(if (eq? current-char #\0)
(begin
(next-char)
(cond
[(memq? current-char '(#\X #\x))
(next-char)
(set! radix 16)]
[(memq? current-char '(#\B #\b))
(next-char)
(set! radix 2)]
[else
(set! radix 8)])
(unless (or (eq? radix 8)
(and (alphanumeric-char? current-char)
(fix< (digit->integer current-char) radix)))
(unexpected-char)))
(set! radix 10)))
(define (iter accum)
(let ([val (digit->integer current-char)])
(if (and val (fix< val radix))
(begin
(next-char)
(iter (fix+ (fix* accum radix) val)))
accum)))
(let ([pos-val (iter 0)])
(if neg? (fix- pos-val) pos-val)))
(define (read-number)
(read-fixnum))
(define (read-box)
(next-char)
(make-box (read-one-value)))
(define (read-weak-box)
(next-char)
(let ([val (read-one-value)])
(set! weak-list (cons val weak-list))
(make-weak-box val)))
(define (read-string)
(define (read-chars [accum '()] [len 0])
(define (read-one-char)
(when (eq? current-char #\\)
(next-char)
(when eof? (unexpected-eof)))
current-char)
(next-char)
(when eof? (unexpected-eof))
(if (eq? current-char #\")
(begin
(next-char)
(values accum len))
(read-chars (cons (read-one-char) accum) (fix+ len 1))))
(call-with-values
read-chars
(lambda (revchars len)
(let ([str (make-byte-string len #\Null)])
(define (iter n rc)
(when (fix>= n 0)
(byte-string-set! str n (car rc))
(iter (fix- n 1) (cdr rc))))
(iter (fix- len 1) revchars)
str))))
; static value_t read_string(reader_state_t *state)
; {
; char *buffer = (char*)malloc(128);
; size_t buffer_size = 128;
; size_t length = 0;
; value_t value;
;
; release_assert(buffer != NULL);
; release_assert(state->ch == '"');
;
; next_char(state);
;
; while (state->ch != '"')
; {
; bool skip_ws = false;
; char ch;
;
; release_assert(state->ch != EOF);
;
; if ((buffer_size - length) < 1)
; {
; release_assert(buffer_size <= INT32_MAX / 3);
; buffer_size = (3 * buffer_size) / 2;
; buffer = realloc(buffer, buffer_size);
; release_assert(buffer != NULL);
; }
;
; ch = state->ch;
; next_char(state);
;
; if (ch == '\\')
; {
; switch (state->ch)
; {
; case 'o':
; next_char(state);
; release_assert(('0' <= state->ch) && (state->ch <= '7'));
; /* fall through */
; case '0' ... '7':
; ch = 0;
;
; /* One to three octal digits */
; for (int i = 0; i < 3; ++i)
; {
; ch = 8 * ch + (state->ch - '0');
; next_char(state);
; if ((state->ch < '0') || (state->ch > '7'))
; break;
; }
; break;
; case 'X':
; case 'x':
; ch = 0;
;
; next_char(state);
; release_assert(isxdigit(state->ch));
;
; /* One or two hex digits */
; for (int i = 0; i < 2; ++i)
; {
; int n = isdigit(state->ch)
; ? (state->ch - '0')
; : (10 + toupper(state->ch) - 'A');
;
; ch = 16 * ch + n;
; next_char(state);
;
; if (!isxdigit(state->ch))
; break;
; }
; break;
; case ' ':
; case '\t':
; case '\v':
; case '\n':
; skip_ws = true;
; break;
; case ';':
; /* Treats everything that follows on the same line as a comment,
; * and additionally skips leading whitespace on the next line. */
; while (state->ch != '\n')
; {
; release_assert(state->ch != EOF);
; next_char(state);
; }
; skip_ws = true;
; break;
; case '\\': ch = '\\'; next_char(state); break;
; case '\'': ch = '\''; next_char(state); break;
; case '\"': ch = '\"'; next_char(state); break;
; case 'a': ch = '\a'; next_char(state); break;
; case 'b': ch = '\b'; next_char(state); break;
; case 'f': ch = '\f'; next_char(state); break;
; case 'n': ch = '\n'; next_char(state); break;
; case 'r': ch = '\r'; next_char(state); break;
; case 't': ch = '\t'; next_char(state); break;
; case 'v': ch = '\v'; next_char(state); break;
; default:
; release_assert(NOTREACHED("Invalid escape sequence in string."));
; ch = '#';
; next_char(state);
; break;
; }
; }
;
; if (skip_ws)
; {
; bool hit_eol = false;
;
; /* Slightly different from the normal skip_whitespace(); skips
; * whitespace through the _second_ EOL following the backslash. */
; do {
; if (state->ch == '\n')
; {
; if (!hit_eol)
; {
; hit_eol = true;
; }
; else
; {
; next_char(state);
; break;
; }
; }
;
; next_char(state);
; } while (isspace(state->ch));
; }
; else
; {
; buffer[length++] = ch;
; }
; }
;
; next_char(state);
;
; value = make_byte_string(length, '\0');
; memcpy(_get_byte_string(value)->bytes, buffer, length);
; free(buffer);
;
; return value;
; }
(define (read-vector) undefined)
(define (read-struct) undefined)
(define (read-symbol) undefined)
(define (skip-whitespace)
(cond
[(whitespace? current-char)
(next-char)
(skip-whitespace)]
[(eq? current-char #\;)
(define (skip-until-newline)
(next-char)
(if (eq? current-char #\Newline)
(next-char)
(unless eof? (skip-until-newline))))
(skip-until-newline)]))
(define (next-char)
(if eof?
#f
(let* ([str (make-byte-string 1 0)]
[res (posix-read fd str 1)])
(if (eq? res 1)
(let ([ch (byte-string-ref str 0)])
(set! current-char ch)
(if (fix= ch #\Newline)
(begin
(set! line (fix+ line 1))
(set! column 0))
(set! column (fix+ column 1)))
ch)
(begin
(set! current-char #f)
(set! eof? #t)
#f)))))
(define (unexpected-eof)
(toplevel-return "unexpected-eof"))
(define (unexpected-char)
(toplevel-return "unexpected-char" current-char (list line column)))
(next-char)
(values
(read-one-value)
current-char))))
(define (whitespace? ch)
(memq? ch '(#\Space #\Tab #\VTab #\Page #\Newline)))
(define (numeric-char? ch)
(and (fix>= ch #\0) (fix<= ch #\9)))
(define (upcase-char? ch)
(and (fix>= ch #\A) (fix<= ch #\Z)))
(define (downcase-char? ch)
(and (fix>= ch #\a) (fix<= ch #\z)))
(define (alphabetic-char? ch)
(or (upcase-char? ch) (downcase-char? ch)))
(define (alphanumeric-char? ch)
(or (numeric-char? ch) (alphabetic-char? ch)))
(define (symbol-char? ch)
(or (alphanumeric-char? ch)
(memq? ch '(#\! #\$ #\% #\& #\* #\+
#\- #\/ #\< #\= #\> #\?
#\@ #\\ #\^ #\_ #\| #\~))))
(define (reverse lst [newcdr '()])
(if (pair? lst)
(reverse (cdr lst) (cons (car lst) newcdr))
newcdr))
(define (list-length lst)
(define (iter n rst)
(if (pair? rst)
(iter (fix+ n 1) (cdr rst))
n))
(iter 0 lst))
(define (digit->integer ch)
(cond
[(numeric-char? ch) (fix- ch #\0)]
[(upcase-char? ch) (fix+ 10 (fix- ch #\A))]
[(downcase-char? ch) (fix+ 10 (fix- ch #\a))]
[else #f]))
(read-from-fd 0)
; static value_t read_number(reader_state_t *state)
; {
; bool negative = false;
; fixnum_t num = 0;
; native_float_t flt;
; int radix;
;
; if (state->ch == '-')
; {
; negative = true;
; next_char(state);
; }
; else if (state->ch == '+')
; {
; next_char(state);
; }
;
; release_assert(isdigit(state->ch));
;
; if (state->ch == '0')
; {
; next_char(state);
;
; switch (state->ch)
; {
; case 'X':
; case 'x':
; radix = 16;
; next_char(state);
; break;
; case 'B':
; case 'b':
; radix = 2;
; next_char(state);
; break;
; case '0' ... '9':
; radix = 8;
; break;
; default:
; radix = 10;
; break;
; }
;
; if (radix != 10)
; {
; release_assert(isalnum(state->ch));
; release_assert(char_to_digit(state->ch) < radix);
; }
; }
; else
; {
; radix = 10;
; }
;
; while (isalnum(state->ch))
; {
; fixnum_t digit = char_to_digit(state->ch);
;
; if (digit >= radix)
; break;
;
; release_assert(num <= (FIXNUM_MAX/radix));
;
; num *= radix;
; num += digit;
;
; next_char(state);
; }
;
; if ((radix != 10) || ((state->ch != '.') && (state->ch != 'E') && (state->ch != 'e')))
; {
; if (negative)
; num = -num;
; release_assert(!issymbol(state->ch));
; release_assert((FIXNUM_MIN <= num) && (num <= FIXNUM_MAX));
; return fixnum_value(num);
; }
;
; /*
; * Floating-point. No guarantees as to precision... really should use binary/hex.
; */
;
; flt = num;
;
; if (state->ch == '.')
; {
; next_char(state);
;
; for (native_float_t pv = negative ? -0.1 : 0.1; isdigit(state->ch); pv /= 10)
; {
; flt += (state->ch - '0') * pv;
; next_char(state);
; }
; }
;
; if ((state->ch == 'E') || (state->ch == 'e'))
; {
; next_char(state);
; num = read_fixnum(state, 10);
; flt *= pow(10, _get_fixnum(num));
; }
;
; if (negative)
; flt = -flt;
;
; release_assert(!issymbol(state->ch));
; return make_float(flt);
; }
;
; static value_t read_vector(reader_state_t *state)
; {
; gc_root_t list_root;
; size_t length = 0;
; value_t value;
; value_t item;
;
; register_gc_root(&list_root, read_list(state));
;
; for (value_t item = list_root.value; !is_nil(item); item = CDR(item))
; ++length;
;
; value = make_vector(length, UNDEFINED);
;
; item = list_root.value;
; for (size_t i = 0; i < length; ++i)
; {
; _get_vector(value)->elements[i] = _CAR(item);
; /* No write barrier needed here. */
; item = _CDR(item);
; }
;
; unregister_gc_root(&list_root);
;
; return value;
; }
;
; static value_t read_struct(reader_state_t *state)
; {
; gc_root_t list_root;
; size_t slots = 0;
; value_t value;
; value_t item;
;
; register_gc_root(&list_root, read_list(state));
;
; for (item = CDR(list_root.value); !is_nil(item); item = CDR(item))
; ++slots;
;
; value = make_struct(_CAR(list_root.value), slots);
;
; item = _CDR(list_root.value);
; for (size_t i = 0; i < slots; ++i)
; {
; _get_struct(value)->slots[i] = _CAR(item);
; /* No write barrier needed here. */
; item = _CDR(item);
; }
;
; unregister_gc_root(&list_root);
;
; return value;
; }
; vim:set syntax=scheme sw=2 expandtab: