#include #include #include #include #include #include #include "gc.h" #include "builtin.h" typedef struct reader_state { FILE *file; int ch; int line; int column; gc_root_t ref_alist; gc_root_t weak_list; gc_root_t ref_list; } reader_state_t; static value_t read_one_value(reader_state_t *state); static value_t read_special(reader_state_t *state); static value_t read_list(reader_state_t *state); static value_t read_fixnum(reader_state_t *state, int radix); static value_t read_number(reader_state_t *state); static value_t read_string(reader_state_t *state); static value_t read_box(reader_state_t *state); static value_t read_vector(reader_state_t *state); static value_t read_struct(reader_state_t *state); static value_t read_weak_box(reader_state_t *state); static value_t read_definition(reader_state_t *state); static value_t read_placeholder(reader_state_t *state); static void next_char(reader_state_t *state); static void skip_whitespace(reader_state_t *state); static bool is_placeholder(reader_state_t *state, value_t value); static value_t get_placeholder(reader_state_t *state, fixnum_t ref); static void set_placeholder(reader_state_t *state, value_t place, value_t value); static void finalize_placeholders(reader_state_t *state); static value_t patch_placeholders(reader_state_t *state, value_t v); static void tree_replace(value_t *in, value_t oldval, value_t newval); value_t read_value(FILE *f) { reader_state_t state; value_t result; register_gc_root(&state.ref_alist, NIL); register_gc_root(&state.weak_list, NIL); register_gc_root(&state.ref_list, NIL); state.file = f; state.line = 1; state.column = 0; next_char(&state); result = read_one_value(&state); ungetc(state.ch, f); result = patch_placeholders(&state, result); unregister_gc_root(&state.ref_list); unregister_gc_root(&state.weak_list); unregister_gc_root(&state.ref_alist); return result; } static value_t read_one_value(reader_state_t *state) { skip_whitespace(state); release_assert(state->ch != EOF); switch (state->ch) { case '#': return read_special(state); case '(': return read_list(state); case '-': case '+': case '0' ... '9': return read_number(state); case '\"': return read_string(state); default: fprintf(stderr, "Unexpected character '%c' in input on line %d, column %d.\n", state->ch, state->line, state->column); release_assert(NOTREACHED("Unexpected character in input.")); return UNDEFINED; } } bool issymbol(int ch) { switch (ch) { case 'A' ... 'Z': case 'a' ... 'z': case '0' ... '9': case '!': case '$': case '%': case '&': case '*': case '+': case '-': case '/': case '<': case '=': case '>': case '?': case '@': case '\\': case '^': case '_': case '|': case '~': return true; default: return false; } } static value_t read_special(reader_state_t *state) { next_char(state); release_assert(state->ch != EOF); switch (state->ch) { case '!': release_assert((state->line == 1) && (state->column == 2)); do { next_char(state); } while (state->ch != '\n'); return read_one_value(state); case 'F': case 'f': next_char(state); release_assert(!issymbol(state->ch)); return FALSE_VALUE; case 'T': case 't': next_char(state); release_assert(!issymbol(state->ch)); return TRUE_VALUE; case '&': return read_box(state); case '(': return read_vector(state); case 'S': case 's': next_char(state); release_assert(state->ch == '('); return read_struct(state); case 'W': case 'w': next_char(state); release_assert(state->ch == '&'); return read_weak_box(state); case '0' ... '9': return read_definition(state); case '=': return read_placeholder(state); default: release_assert(NOTREACHED("Invalid character in special value.")); return UNDEFINED; } } static void reverse_list(value_t *list, value_t newcdr) { value_t lst = *list; while (is_pair(lst)) { value_t temp = _get_pair(lst)->cdr; _get_pair(lst)->cdr = newcdr; newcdr = lst; lst = temp; } *list = newcdr; } static value_t read_list(reader_state_t *state) { gc_root_t list_root; bool done = false; register_gc_root(&list_root, NIL); next_char(state); while (!done) { skip_whitespace(state); release_assert(state->ch != EOF); switch (state->ch) { case '.': { release_assert(!is_nil(list_root.value)); next_char(state); value_t temp = read_one_value(state); reverse_list(&list_root.value, temp); skip_whitespace(state); release_assert(state->ch == ')'); next_char(state); done = true; } break; case ')': { reverse_list(&list_root.value, NIL); next_char(state); done = true; } break; default: { value_t temp = read_one_value(state); list_root.value = cons(temp, list_root.value); } break; } } unregister_gc_root(&list_root); return list_root.value; } static short int char_to_digit(int ch) { if (isdigit(ch)) { return ch - '0'; } else { assert(isalpha(ch)); return 10 + (toupper(ch) - 'A'); } } static value_t read_fixnum(reader_state_t *state, int radix) { fixnum_t num = 0; bool negative = false; assert((0 <= radix) && (radix <= 36)); if (state->ch == '-') { negative = true; next_char(state); } else if (state->ch == '+') { next_char(state); } release_assert(isdigit(state->ch)); if (radix == 0) { if (state->ch == '0') { next_char(state); switch (state->ch) { case 'X': case 'x': next_char(state); radix = 16; break; case 'B': case 'b': next_char(state); radix = 2; break; default: radix = 8; break; } if (radix != 8) { /* Make sure we have at least one digit; if octal then the '0' counts instead */ release_assert(isalnum(state->ch)); release_assert(char_to_digit(state->ch) < radix); } } else { radix = 10; } } while (isalnum(state->ch)) { fixnum_t digit = char_to_digit(state->ch); if (digit >= radix) break; release_assert(num <= (FIXNUM_MAX/radix)); num *= radix; num += digit; next_char(state); } if (negative) num = -num; release_assert((FIXNUM_MIN <= num) && (num <= FIXNUM_MAX)); return fixnum_value(num); } static value_t read_number(reader_state_t *state) { bool negative = false; fixnum_t num = 0; native_float_t flt; int radix; if (state->ch == '-') { negative = true; next_char(state); } else if (state->ch == '+') { next_char(state); } release_assert(isdigit(state->ch)); if (state->ch == '0') { next_char(state); switch (state->ch) { case 'X': case 'x': next_char(state); radix = 16; break; case 'B': case 'b': next_char(state); radix = 2; break; case '0' ... '9': ungetc(state->ch, state->file); state->ch = '0'; radix = 8; break; default: ungetc(state->ch, state->file); state->ch = '0'; radix = 10; break; } if (radix != 8) { /* Make sure we have at least one digit; if octal then the '0' counts instead */ release_assert(isalnum(state->ch)); release_assert(char_to_digit(state->ch) < radix); } } else { radix = 10; } while (isalnum(state->ch)) { fixnum_t digit = char_to_digit(state->ch); if (digit >= radix) break; release_assert(num <= (FIXNUM_MAX/radix)); num *= radix; num += digit; next_char(state); } if ((radix != 10) || ((state->ch != '.') && (state->ch != 'E') && (state->ch != 'e'))) { if (negative) num = -num; release_assert(!issymbol(state->ch)); release_assert((FIXNUM_MIN <= num) && (num <= FIXNUM_MAX)); return fixnum_value(num); } /* * Floating-point. No guarantees as to precision... really should use binary/hex. */ flt = num; if (state->ch == '.') { next_char(state); for (native_float_t pv = negative ? -0.1 : 0.1; isdigit(state->ch); pv /= 10) { flt += (state->ch - '0') * pv; next_char(state); } } if ((state->ch == 'E') || (state->ch == 'e')) { next_char(state); num = read_fixnum(state, 10); flt *= pow(10, _get_fixnum(num)); } if (negative) flt = -flt; release_assert(!issymbol(state->ch)); return make_float(flt); } static value_t read_string(reader_state_t *state) { char *buffer = (char*)malloc(128); size_t buffer_size = 128; size_t length = 0; value_t value; release_assert(buffer != NULL); next_char(state); while (state->ch != '"') { bool skip_ws = false; char ch; if ((buffer_size - length) < 1) { release_assert(buffer_size <= INT32_MAX / 3); buffer_size = (3 * buffer_size) / 2; buffer = realloc(buffer, buffer_size); release_assert(buffer != NULL); } ch = state->ch; next_char(state); if (ch == '\\') { switch (state->ch) { case 'o': next_char(state); release_assert(('0' <= state->ch) && (state->ch <= '7')); /* fall through */ case '0' ... '7': ch = 0; /* One to three octal digits */ for (int i = 0; i < 3; ++i) { ch = 8 * ch + (state->ch - '0'); next_char(state); if ((state->ch < '0') || (state->ch > '7')) break; } break; case 'X': case 'x': ch = 0; next_char(state); release_assert(isxdigit(state->ch)); /* One or two hex digits */ for (int i = 0; i < 2; ++i) { int n = isdigit(state->ch) ? (state->ch - '0') : (10 + toupper(state->ch) - 'A'); ch = 16 * ch + n; next_char(state); if (!isxdigit(state->ch)) break; } break; case ' ': case '\t': case '\v': case '\n': skip_ws = true; break; case ';': /* Treats everything that follows on the same line as a comment, * and additionally skips leading whitespace on the next line. */ while (state->ch != '\n') next_char(state); skip_ws = true; break; case '\\': ch = '\\'; next_char(state); break; case '\'': ch = '\''; next_char(state); break; case '\"': ch = '\"'; next_char(state); break; case 'a': ch = '\a'; next_char(state); break; case 'b': ch = '\b'; next_char(state); break; case 'f': ch = '\f'; next_char(state); break; case 'n': ch = '\n'; next_char(state); break; case 'r': ch = '\r'; next_char(state); break; case 't': ch = '\t'; next_char(state); break; case 'v': ch = '\v'; next_char(state); break; default: release_assert(NOTREACHED("Invalid escape sequence in string.")); ch = '#'; next_char(state); break; } } if (skip_ws) { bool hit_eol = false; /* Slightly different from the normal skip_whitespace(); skips * whitespace through the _second_ EOL following the backslash. */ do { if (state->ch == '\n') { if (!hit_eol) { hit_eol = true; } else { next_char(state); break; } } next_char(state); } while (isspace(state->ch)); } else { buffer[length++] = ch; } } next_char(state); value = make_byte_string(length, '\0'); memcpy(_get_byte_string(value)->bytes, buffer, length); free(buffer); return value; } static value_t read_box(reader_state_t *state) { next_char(state); return make_box(read_one_value(state)); } static value_t read_vector(reader_state_t *state) { gc_root_t list_root; size_t length = 0; value_t value; value_t item; register_gc_root(&list_root, read_list(state)); for (value_t item = list_root.value; !is_nil(item); item = CDR(item)) ++length; value = make_vector(length, UNDEFINED); item = list_root.value; for (size_t i = 0; i < length; ++i) { _get_vector(value)->elements[i] = _CAR(item); item = _CDR(item); } unregister_gc_root(&list_root); return value; } static value_t read_struct(reader_state_t *state) { gc_root_t list_root; size_t slots = 0; value_t value; value_t item; register_gc_root(&list_root, read_list(state)); for (item = CDR(list_root.value); !is_nil(item); item = CDR(item)) ++slots; value = make_struct(_CAR(list_root.value), slots); item = _CDR(list_root.value); for (size_t i = 0; i < slots; ++i) { _get_struct(value)->slots[i] = _CAR(item); item = _CDR(item); } unregister_gc_root(&list_root); return value; } static value_t read_weak_box(reader_state_t *state) { value_t value; next_char(state); value = read_one_value(state); state->weak_list.value = cons(value, state->weak_list.value); return make_weak_box(value); } static value_t read_definition(reader_state_t *state) { fixnum_t ref = get_fixnum(read_fixnum(state, 0)); gc_root_t place_root; value_t v; release_assert(state->ch == '='); next_char(state); register_gc_root(&place_root, get_placeholder(state, ref)); v = read_one_value(state); set_placeholder(state, place_root.value, v); unregister_gc_root(&place_root); return v; } static value_t read_placeholder(reader_state_t *state) { next_char(state); skip_whitespace(state); release_assert(state->ch != EOF); if (state->ch == '"') { char *name = value_to_string(read_string(state)); value_t bi = lookup_builtin(name); free(name); release_assert(bi != FALSE_VALUE); return bi; } else { return get_placeholder(state, get_fixnum(read_fixnum(state, 0))); } } static bool is_placeholder(reader_state_t *state, value_t value) { for (value_t item = state->ref_alist.value; !is_nil(item); item = _CDDR(item)) { if (value == item) return true; } return false; } static value_t get_placeholder(reader_state_t *state, fixnum_t ref) { value_t refval = fixnum_value(ref); for (value_t item = state->ref_alist.value; !is_nil(item); item = _CDDR(item)) { if (_CAR(item) == refval) return item; } /* No existing placeholder with that number; create a new one. */ state->ref_alist.value = cons(UNDEFINED, state->ref_alist.value); state->ref_alist.value = cons(refval, state->ref_alist.value); return state->ref_alist.value; } static void set_placeholder(reader_state_t *state, value_t place, value_t value) { assert(is_placeholder(state, place)); release_assert(is_undefined(_CADR(place))); _CADR(place) = value; } static void finalize_placeholders(reader_state_t *state) { bool changed = true; /* We're done when no placeholders link to other placeholders. */ while (changed) { changed = false; /* Resolve one level of placeholder-to-placeholder links. * Self-links indicate cycles and are replaced with UNDEFINED. */ for (value_t item = state->ref_alist.value; !is_nil(item); item = _CDDR(item)) { if (_CADR(item) == item) { _CADR(item) = UNDEFINED; changed = true; } else if (is_placeholder(state, _CADR(item))) { _CADR(item) = _CADR(_CADR(item)); changed = true; } } } } static value_t patch_placeholders(reader_state_t *state, value_t v) { finalize_placeholders(state); for (value_t item = state->ref_alist.value; !is_nil(item); item = _CDDR(item)) { assert(!is_placeholder(state, _CADR(item))); tree_replace(&v, item, _CADR(item)); } return v; } typedef struct seen_value { value_t value; struct seen_value *prev; } seen_value_t; static void _tree_replace(value_t *in, value_t oldval, value_t newval, seen_value_t *seen) { seen_value_t this_seen = { *in, seen }; for (seen_value_t *item = seen; item; item = item->prev) { if (*in == item->value) return; } if (*in == oldval) { *in = newval; } else if (is_box(*in)) { _tree_replace(&_get_box(*in)->value, oldval, newval, &this_seen); } else if (is_pair(*in)) { _tree_replace(&_get_pair(*in)->car, oldval, newval, &this_seen); _tree_replace(&_get_pair(*in)->cdr, oldval, newval, &this_seen); } else if (is_vector(*in)) { for (size_t i = 0; i < _get_vector(*in)->size; ++i) { _tree_replace(&_get_vector(*in)->elements[i], oldval, newval, &this_seen); } } else if (is_struct(*in)) { _tree_replace(&_get_struct(*in)->type, oldval, newval, &this_seen); for (size_t i = 0; i < _get_struct(*in)->nslots; ++i) _tree_replace(&_get_struct(*in)->slots[i], oldval, newval, &this_seen); } else if (is_weak_box(*in)) { _tree_replace(&_get_weak_box(*in)->value, oldval, newval, &this_seen); } } static void tree_replace(value_t *in, value_t oldval, value_t newval) { _tree_replace(in, oldval, newval, NULL); } static void next_char(reader_state_t *state) { state->ch = fgetc(state->file); if (state->ch == '\n') { ++state->line; state->column = 0; } else { ++state->column; } } static void skip_whitespace(reader_state_t *state) { for (;;) { if (isspace(state->ch)) { next_char(state); } else if (state->ch == ';') { /* Comments count as whitespace */ do { next_char(state); if (state->ch == '\n') { next_char(state); break; } } while (state->ch != EOF); } else { break; } } } /* vim:set sw=2 expandtab: */