In-VM reader for high-level Scheme syntax, initial revision.
Currently supports booleans, lists/pairs, fixnums (incl. 0x, 0b, 0, #x, #d, #o, and #b radix prefixes), basic byte strings, boxes, weak boxes, script headers (#!), and end-of-line comments. TODO: Floating-point, vectors, structs, symbols, cyclic references.
This commit is contained in:
parent
fd62415dee
commit
a9427d2ec5
|
|
@ -0,0 +1,11 @@
|
|||
(define (vector-eq? v1 v2)
|
||||
(letrec ([iter (lambda (i)
|
||||
(if (fix>= i (vector-size v1))
|
||||
(fix>= i (vector-size v2))
|
||||
(if (fix>= i (vector-size v2))
|
||||
#f
|
||||
(if (eq? (vector-ref v1 i)
|
||||
(vector-ref v2 i))
|
||||
(iter (fix+ i 1))
|
||||
#f))))])
|
||||
(iter 0)))
|
||||
|
|
@ -0,0 +1,620 @@
|
|||
(define (make-structure-type supers nslots callable)
|
||||
(let ([struct (make-struct structure)])
|
||||
(struct-set! struct 0 supers)
|
||||
(struct-set! struct 1 nslots)
|
||||
(struct-set! struct 2 callable)
|
||||
struct))
|
||||
|
||||
(define s:symbol (make-structure-type '() 1 #f))
|
||||
(define *symbols* '())
|
||||
|
||||
(define (make-symbol name)
|
||||
(let ([sym (make-struct s:symbol)])
|
||||
(struct-set! sym 0 name)
|
||||
sym))
|
||||
|
||||
(define (symbol-name sym)
|
||||
(struct-ref sym 0))
|
||||
|
||||
(define (findf fn lst)
|
||||
(if (pair? lst)
|
||||
(let ([x (car lst)])
|
||||
(if (fn x)
|
||||
x
|
||||
(findf fn (cdr lst))))
|
||||
#f))
|
||||
|
||||
(define (memq val lst)
|
||||
(if (pair? lst)
|
||||
(if (eq? (car lst) val)
|
||||
lst
|
||||
(memq val (cdr lst)))
|
||||
#f))
|
||||
|
||||
(define (memq? val lst)
|
||||
(and (memq val lst) #t))
|
||||
|
||||
(define (find-symbol name)
|
||||
(findf (lambda (x) (byte-string= (symbol-name x) name)) *symbols*))
|
||||
|
||||
(define (intern name)
|
||||
(let ([sym (find-symbol name)])
|
||||
(or sym
|
||||
(let ([sym (make-symbol name)])
|
||||
(set! *symbols* (cons sym *symbols*))
|
||||
sym))))
|
||||
|
||||
(define (read-from-fd fd)
|
||||
(let/cc toplevel-return
|
||||
(let ([weak-list '()]
|
||||
[line 1]
|
||||
[column 0]
|
||||
[eof? #f]
|
||||
current-char)
|
||||
|
||||
(define (read-one-value)
|
||||
(skip-whitespace)
|
||||
(when eof? (unexpected-eof))
|
||||
(cond
|
||||
[(eq? current-char #\#)
|
||||
(read-special)]
|
||||
[(eq? current-char #\()
|
||||
(read-list)]
|
||||
[(or (eq? current-char #\-)
|
||||
(eq? current-char #\+)
|
||||
(numeric-char? current-char))
|
||||
(read-number)]
|
||||
[(eq? current-char #\")
|
||||
(read-string)]
|
||||
[(symbol-char? current-char)
|
||||
(read-symbol)]
|
||||
[else
|
||||
(unexpected-char)]))
|
||||
|
||||
(define (read-special)
|
||||
(next-char)
|
||||
(when eof? (unexpected-eof))
|
||||
|
||||
(cond
|
||||
[(eq? current-char #\!)
|
||||
(unless (and (eq? line 1) (eq? column 2)) (unexpected-char))
|
||||
(define (skip-until-newline)
|
||||
(next-char)
|
||||
(unless (or eof? (eq? current-char #\Newline))
|
||||
(skip-until-newline)))
|
||||
(skip-until-newline)
|
||||
(read-one-value)]
|
||||
[(memq? current-char '(#\F #\f))
|
||||
(next-char)
|
||||
(when (symbol-char? current-char) (unexpected-char))
|
||||
#f]
|
||||
[(memq? current-char '(#\T #\t))
|
||||
(next-char)
|
||||
(when (symbol-char? current-char) (unexpected-char))
|
||||
#t]
|
||||
[(eq? current-char #\&)
|
||||
(read-box)]
|
||||
[(eq? current-char #\()
|
||||
(read-vector)]
|
||||
[(memq? current-char '(#\S #\s))
|
||||
(next-char)
|
||||
(unless (eq? current-char #\() (unexpected-char))
|
||||
(read-struct)]
|
||||
[(memq? current-char '(#\W #\w))
|
||||
(next-char)
|
||||
(unless (eq? current-char #\&) (unexpected-char))
|
||||
(read-weak-box)]
|
||||
[(memq? current-char '(#\X #\x))
|
||||
(next-char)
|
||||
(read-fixnum 16)]
|
||||
[(memq? current-char '(#\D #\d))
|
||||
(next-char)
|
||||
(read-fixnum 10)]
|
||||
[(memq? current-char '(#\O #\o))
|
||||
(next-char)
|
||||
(read-fixnum 8)]
|
||||
[(memq? current-char '(#\B #\b))
|
||||
(next-char)
|
||||
(read-fixnum 2)]
|
||||
[else (unexpected-char)]))
|
||||
|
||||
(define (read-list)
|
||||
(define (read-rest)
|
||||
(skip-whitespace)
|
||||
(cond
|
||||
[eof?
|
||||
(unexpected-eof)]
|
||||
[(eq? current-char #\.)
|
||||
(next-char)
|
||||
(let ([lstcdr (read-one-value)])
|
||||
(skip-whitespace)
|
||||
(unless (eq? current-char #\)) (unexpected-char))
|
||||
(next-char)
|
||||
lstcdr)]
|
||||
[(eq? current-char #\))
|
||||
(next-char)
|
||||
'()]
|
||||
[else
|
||||
(cons (read-one-value) (read-rest))]))
|
||||
|
||||
(next-char)
|
||||
(if (eq? current-char #\))
|
||||
'()
|
||||
(cons (read-one-value) (read-rest))))
|
||||
|
||||
(define (read-fixnum [radix #f])
|
||||
(define neg? (eq? current-char #\-))
|
||||
|
||||
(when (or neg? (eq? current-char #\+))
|
||||
(next-char))
|
||||
|
||||
(unless radix
|
||||
(unless (numeric-char? current-char) (unexpected-char))
|
||||
(if (eq? current-char #\0)
|
||||
(begin
|
||||
(next-char)
|
||||
(cond
|
||||
[(memq? current-char '(#\X #\x))
|
||||
(next-char)
|
||||
(set! radix 16)]
|
||||
[(memq? current-char '(#\B #\b))
|
||||
(next-char)
|
||||
(set! radix 2)]
|
||||
[else
|
||||
(set! radix 8)])
|
||||
(unless (or (eq? radix 8)
|
||||
(and (alphanumeric-char? current-char)
|
||||
(fix< (digit->integer current-char) radix)))
|
||||
(unexpected-char)))
|
||||
(set! radix 10)))
|
||||
|
||||
(define (iter accum)
|
||||
(let ([val (digit->integer current-char)])
|
||||
(if (and val (fix< val radix))
|
||||
(begin
|
||||
(next-char)
|
||||
(iter (fix+ (fix* accum radix) val)))
|
||||
accum)))
|
||||
|
||||
(let ([pos-val (iter 0)])
|
||||
(if neg? (fix- pos-val) pos-val)))
|
||||
|
||||
(define (read-number)
|
||||
(read-fixnum))
|
||||
|
||||
(define (read-box)
|
||||
(next-char)
|
||||
(make-box (read-one-value)))
|
||||
|
||||
(define (read-weak-box)
|
||||
(next-char)
|
||||
(let ([val (read-one-value)])
|
||||
(set! weak-list (cons val weak-list))
|
||||
(make-weak-box val)))
|
||||
|
||||
(define (read-string)
|
||||
(define (read-chars [accum '()] [len 0])
|
||||
(define (read-one-char)
|
||||
(when (eq? current-char #\\)
|
||||
(next-char)
|
||||
(when eof? (unexpected-eof)))
|
||||
current-char)
|
||||
|
||||
(next-char)
|
||||
(when eof? (unexpected-eof))
|
||||
(if (eq? current-char #\")
|
||||
(begin
|
||||
(next-char)
|
||||
(values accum len))
|
||||
(read-chars (cons (read-one-char) accum) (fix+ len 1))))
|
||||
|
||||
(call-with-values
|
||||
read-chars
|
||||
(lambda (revchars len)
|
||||
(let ([str (make-byte-string len #\Null)])
|
||||
(define (iter n rc)
|
||||
(when (fix>= n 0)
|
||||
(byte-string-set! str n (car rc))
|
||||
(iter (fix- n 1) (cdr rc))))
|
||||
(iter (fix- len 1) revchars)
|
||||
str))))
|
||||
|
||||
; static value_t read_string(reader_state_t *state)
|
||||
; {
|
||||
; char *buffer = (char*)malloc(128);
|
||||
; size_t buffer_size = 128;
|
||||
; size_t length = 0;
|
||||
; value_t value;
|
||||
;
|
||||
; release_assert(buffer != NULL);
|
||||
; release_assert(state->ch == '"');
|
||||
;
|
||||
; next_char(state);
|
||||
;
|
||||
; while (state->ch != '"')
|
||||
; {
|
||||
; bool skip_ws = false;
|
||||
; char ch;
|
||||
;
|
||||
; release_assert(state->ch != EOF);
|
||||
;
|
||||
; if ((buffer_size - length) < 1)
|
||||
; {
|
||||
; release_assert(buffer_size <= INT32_MAX / 3);
|
||||
; buffer_size = (3 * buffer_size) / 2;
|
||||
; buffer = realloc(buffer, buffer_size);
|
||||
; release_assert(buffer != NULL);
|
||||
; }
|
||||
;
|
||||
; ch = state->ch;
|
||||
; next_char(state);
|
||||
;
|
||||
; if (ch == '\\')
|
||||
; {
|
||||
; switch (state->ch)
|
||||
; {
|
||||
; case 'o':
|
||||
; next_char(state);
|
||||
; release_assert(('0' <= state->ch) && (state->ch <= '7'));
|
||||
; /* fall through */
|
||||
; case '0' ... '7':
|
||||
; ch = 0;
|
||||
;
|
||||
; /* One to three octal digits */
|
||||
; for (int i = 0; i < 3; ++i)
|
||||
; {
|
||||
; ch = 8 * ch + (state->ch - '0');
|
||||
; next_char(state);
|
||||
; if ((state->ch < '0') || (state->ch > '7'))
|
||||
; break;
|
||||
; }
|
||||
; break;
|
||||
; case 'X':
|
||||
; case 'x':
|
||||
; ch = 0;
|
||||
;
|
||||
; next_char(state);
|
||||
; release_assert(isxdigit(state->ch));
|
||||
;
|
||||
; /* One or two hex digits */
|
||||
; for (int i = 0; i < 2; ++i)
|
||||
; {
|
||||
; int n = isdigit(state->ch)
|
||||
; ? (state->ch - '0')
|
||||
; : (10 + toupper(state->ch) - 'A');
|
||||
;
|
||||
; ch = 16 * ch + n;
|
||||
; next_char(state);
|
||||
;
|
||||
; if (!isxdigit(state->ch))
|
||||
; break;
|
||||
; }
|
||||
; break;
|
||||
; case ' ':
|
||||
; case '\t':
|
||||
; case '\v':
|
||||
; case '\n':
|
||||
; skip_ws = true;
|
||||
; break;
|
||||
; case ';':
|
||||
; /* Treats everything that follows on the same line as a comment,
|
||||
; * and additionally skips leading whitespace on the next line. */
|
||||
; while (state->ch != '\n')
|
||||
; {
|
||||
; release_assert(state->ch != EOF);
|
||||
; next_char(state);
|
||||
; }
|
||||
; skip_ws = true;
|
||||
; break;
|
||||
; case '\\': ch = '\\'; next_char(state); break;
|
||||
; case '\'': ch = '\''; next_char(state); break;
|
||||
; case '\"': ch = '\"'; next_char(state); break;
|
||||
; case 'a': ch = '\a'; next_char(state); break;
|
||||
; case 'b': ch = '\b'; next_char(state); break;
|
||||
; case 'f': ch = '\f'; next_char(state); break;
|
||||
; case 'n': ch = '\n'; next_char(state); break;
|
||||
; case 'r': ch = '\r'; next_char(state); break;
|
||||
; case 't': ch = '\t'; next_char(state); break;
|
||||
; case 'v': ch = '\v'; next_char(state); break;
|
||||
; default:
|
||||
; release_assert(NOTREACHED("Invalid escape sequence in string."));
|
||||
; ch = '#';
|
||||
; next_char(state);
|
||||
; break;
|
||||
; }
|
||||
; }
|
||||
;
|
||||
; if (skip_ws)
|
||||
; {
|
||||
; bool hit_eol = false;
|
||||
;
|
||||
; /* Slightly different from the normal skip_whitespace(); skips
|
||||
; * whitespace through the _second_ EOL following the backslash. */
|
||||
; do {
|
||||
; if (state->ch == '\n')
|
||||
; {
|
||||
; if (!hit_eol)
|
||||
; {
|
||||
; hit_eol = true;
|
||||
; }
|
||||
; else
|
||||
; {
|
||||
; next_char(state);
|
||||
; break;
|
||||
; }
|
||||
; }
|
||||
;
|
||||
; next_char(state);
|
||||
; } while (isspace(state->ch));
|
||||
; }
|
||||
; else
|
||||
; {
|
||||
; buffer[length++] = ch;
|
||||
; }
|
||||
; }
|
||||
;
|
||||
; next_char(state);
|
||||
;
|
||||
; value = make_byte_string(length, '\0');
|
||||
; memcpy(_get_byte_string(value)->bytes, buffer, length);
|
||||
; free(buffer);
|
||||
;
|
||||
; return value;
|
||||
; }
|
||||
|
||||
(define (read-vector) undefined)
|
||||
(define (read-struct) undefined)
|
||||
(define (read-symbol) undefined)
|
||||
|
||||
(define (skip-whitespace)
|
||||
(cond
|
||||
[(whitespace? current-char)
|
||||
(next-char)
|
||||
(skip-whitespace)]
|
||||
[(eq? current-char #\;)
|
||||
(define (skip-until-newline)
|
||||
(next-char)
|
||||
(if (eq? current-char #\Newline)
|
||||
(next-char)
|
||||
(unless eof? (skip-until-newline))))
|
||||
(skip-until-newline)]))
|
||||
|
||||
(define (next-char)
|
||||
(if eof?
|
||||
#f
|
||||
(let* ([str (make-byte-string 1 0)]
|
||||
[res (posix-read fd str 1)])
|
||||
(if (eq? res 1)
|
||||
(let ([ch (byte-string-ref str 0)])
|
||||
(set! current-char ch)
|
||||
(if (fix= ch #\Newline)
|
||||
(begin
|
||||
(set! line (fix+ line 1))
|
||||
(set! column 0))
|
||||
(set! column (fix+ column 1)))
|
||||
ch)
|
||||
(begin
|
||||
(set! current-char #f)
|
||||
(set! eof? #t)
|
||||
#f)))))
|
||||
|
||||
(define (unexpected-eof)
|
||||
(toplevel-return "unexpected-eof"))
|
||||
|
||||
(define (unexpected-char)
|
||||
(toplevel-return "unexpected-char" current-char (list line column)))
|
||||
|
||||
(next-char)
|
||||
(values
|
||||
(read-one-value)
|
||||
current-char))))
|
||||
|
||||
(define (whitespace? ch)
|
||||
(memq? ch '(#\Space #\Tab #\VTab #\Page #\Newline)))
|
||||
|
||||
(define (numeric-char? ch)
|
||||
(and (fix>= ch #\0) (fix<= ch #\9)))
|
||||
|
||||
(define (upcase-char? ch)
|
||||
(and (fix>= ch #\A) (fix<= ch #\Z)))
|
||||
|
||||
(define (downcase-char? ch)
|
||||
(and (fix>= ch #\a) (fix<= ch #\z)))
|
||||
|
||||
(define (alphabetic-char? ch)
|
||||
(or (upcase-char? ch) (downcase-char? ch)))
|
||||
|
||||
(define (alphanumeric-char? ch)
|
||||
(or (numeric-char? ch) (alphabetic-char? ch)))
|
||||
|
||||
(define (symbol-char? ch)
|
||||
(or (alphanumeric-char? ch)
|
||||
(memq? ch '(#\! #\$ #\% #\& #\* #\+
|
||||
#\- #\/ #\< #\= #\> #\?
|
||||
#\@ #\\ #\^ #\_ #\| #\~))))
|
||||
|
||||
(define (reverse lst [newcdr '()])
|
||||
(if (pair? lst)
|
||||
(reverse (cdr lst) (cons (car lst) newcdr))
|
||||
newcdr))
|
||||
|
||||
(define (list-length lst)
|
||||
(define (iter n rst)
|
||||
(if (pair? rst)
|
||||
(iter (fix+ n 1) (cdr rst))
|
||||
n))
|
||||
(iter 0 lst))
|
||||
|
||||
(define (digit->integer ch)
|
||||
(cond
|
||||
[(numeric-char? ch) (fix- ch #\0)]
|
||||
[(upcase-char? ch) (fix+ 10 (fix- ch #\A))]
|
||||
[(downcase-char? ch) (fix+ 10 (fix- ch #\a))]
|
||||
[else #f]))
|
||||
|
||||
(read-from-fd 0)
|
||||
|
||||
; static value_t read_number(reader_state_t *state)
|
||||
; {
|
||||
; bool negative = false;
|
||||
; fixnum_t num = 0;
|
||||
; native_float_t flt;
|
||||
; int radix;
|
||||
;
|
||||
; if (state->ch == '-')
|
||||
; {
|
||||
; negative = true;
|
||||
; next_char(state);
|
||||
; }
|
||||
; else if (state->ch == '+')
|
||||
; {
|
||||
; next_char(state);
|
||||
; }
|
||||
;
|
||||
; release_assert(isdigit(state->ch));
|
||||
;
|
||||
; if (state->ch == '0')
|
||||
; {
|
||||
; next_char(state);
|
||||
;
|
||||
; switch (state->ch)
|
||||
; {
|
||||
; case 'X':
|
||||
; case 'x':
|
||||
; radix = 16;
|
||||
; next_char(state);
|
||||
; break;
|
||||
; case 'B':
|
||||
; case 'b':
|
||||
; radix = 2;
|
||||
; next_char(state);
|
||||
; break;
|
||||
; case '0' ... '9':
|
||||
; radix = 8;
|
||||
; break;
|
||||
; default:
|
||||
; radix = 10;
|
||||
; break;
|
||||
; }
|
||||
;
|
||||
; if (radix != 10)
|
||||
; {
|
||||
; release_assert(isalnum(state->ch));
|
||||
; release_assert(char_to_digit(state->ch) < radix);
|
||||
; }
|
||||
; }
|
||||
; else
|
||||
; {
|
||||
; radix = 10;
|
||||
; }
|
||||
;
|
||||
; while (isalnum(state->ch))
|
||||
; {
|
||||
; fixnum_t digit = char_to_digit(state->ch);
|
||||
;
|
||||
; if (digit >= radix)
|
||||
; break;
|
||||
;
|
||||
; release_assert(num <= (FIXNUM_MAX/radix));
|
||||
;
|
||||
; num *= radix;
|
||||
; num += digit;
|
||||
;
|
||||
; next_char(state);
|
||||
; }
|
||||
;
|
||||
; if ((radix != 10) || ((state->ch != '.') && (state->ch != 'E') && (state->ch != 'e')))
|
||||
; {
|
||||
; if (negative)
|
||||
; num = -num;
|
||||
; release_assert(!issymbol(state->ch));
|
||||
; release_assert((FIXNUM_MIN <= num) && (num <= FIXNUM_MAX));
|
||||
; return fixnum_value(num);
|
||||
; }
|
||||
;
|
||||
; /*
|
||||
; * Floating-point. No guarantees as to precision... really should use binary/hex.
|
||||
; */
|
||||
;
|
||||
; flt = num;
|
||||
;
|
||||
; if (state->ch == '.')
|
||||
; {
|
||||
; next_char(state);
|
||||
;
|
||||
; for (native_float_t pv = negative ? -0.1 : 0.1; isdigit(state->ch); pv /= 10)
|
||||
; {
|
||||
; flt += (state->ch - '0') * pv;
|
||||
; next_char(state);
|
||||
; }
|
||||
; }
|
||||
;
|
||||
; if ((state->ch == 'E') || (state->ch == 'e'))
|
||||
; {
|
||||
; next_char(state);
|
||||
; num = read_fixnum(state, 10);
|
||||
; flt *= pow(10, _get_fixnum(num));
|
||||
; }
|
||||
;
|
||||
; if (negative)
|
||||
; flt = -flt;
|
||||
;
|
||||
; release_assert(!issymbol(state->ch));
|
||||
; return make_float(flt);
|
||||
; }
|
||||
;
|
||||
; static value_t read_vector(reader_state_t *state)
|
||||
; {
|
||||
; gc_root_t list_root;
|
||||
; size_t length = 0;
|
||||
; value_t value;
|
||||
; value_t item;
|
||||
;
|
||||
; register_gc_root(&list_root, read_list(state));
|
||||
;
|
||||
; for (value_t item = list_root.value; !is_nil(item); item = CDR(item))
|
||||
; ++length;
|
||||
;
|
||||
; value = make_vector(length, UNDEFINED);
|
||||
;
|
||||
; item = list_root.value;
|
||||
; for (size_t i = 0; i < length; ++i)
|
||||
; {
|
||||
; _get_vector(value)->elements[i] = _CAR(item);
|
||||
; /* No write barrier needed here. */
|
||||
; item = _CDR(item);
|
||||
; }
|
||||
;
|
||||
; unregister_gc_root(&list_root);
|
||||
;
|
||||
; return value;
|
||||
; }
|
||||
;
|
||||
; static value_t read_struct(reader_state_t *state)
|
||||
; {
|
||||
; gc_root_t list_root;
|
||||
; size_t slots = 0;
|
||||
; value_t value;
|
||||
; value_t item;
|
||||
;
|
||||
; register_gc_root(&list_root, read_list(state));
|
||||
;
|
||||
; for (item = CDR(list_root.value); !is_nil(item); item = CDR(item))
|
||||
; ++slots;
|
||||
;
|
||||
; value = make_struct(_CAR(list_root.value), slots);
|
||||
;
|
||||
; item = _CDR(list_root.value);
|
||||
; for (size_t i = 0; i < slots; ++i)
|
||||
; {
|
||||
; _get_struct(value)->slots[i] = _CAR(item);
|
||||
; /* No write barrier needed here. */
|
||||
; item = _CDR(item);
|
||||
; }
|
||||
;
|
||||
; unregister_gc_root(&list_root);
|
||||
;
|
||||
; return value;
|
||||
; }
|
||||
|
||||
; vim:set syntax=scheme sw=2 expandtab:
|
||||
Loading…
Reference in New Issue