#define _XOPEN_SOURCE 500 #include #include #include #include #include #include #include #include #include #include #include "gc.h" #include "builtin.h" #include "reader.h" typedef struct reader_state { FILE *file; int ch; int line; int column; gc_root_t weak_list; gc_root_t ref_list; } reader_state_t; #define REFERENCE_SLOT_IDENT 0 #define REFERENCE_SLOT_VALUE 1 #define REFERENCE_SLOT_PATCHED 2 #define REFERENCE_SLOTS 3 #define REF_IDENT(ref) _SLOT_VALUE(REFERENCE, (ref), IDENT) #define REF_VALUE(ref) _SLOT_VALUE(REFERENCE, (ref), VALUE) #define REF_PATCHED(ref) _SLOT_VALUE(REFERENCE, (ref), PATCHED) /* Wraps values which should be made immutable after substitution */ #define IMMUTABLE_PH_SLOT_VALUE 0 #define IMMUTABLE_PH_SLOTS 1 #define IMMUTABLE_PH_VALUE(ph) _SLOT_VALUE(IMMUTABLE_PH, (ph), VALUE) /* Wraps structure instances, where the type is always immutable */ #define STRUCT_PH_SLOT_TYPE 0 #define STRUCT_PH_SLOT_VALUES 1 #define STRUCT_PH_SLOT_RESULT 2 #define STRUCT_PH_SLOTS 3 #define STRUCT_PH_TYPE(ph) _SLOT_VALUE(STRUCT_PH, (ph), TYPE) #define STRUCT_PH_VALUES(ph) _SLOT_VALUE(STRUCT_PH, (ph), VALUES) #define STRUCT_PH_RESULT(ph) _SLOT_VALUE(STRUCT_PH, (ph), RESULT) static gc_root_t reference_root; static gc_root_t struct_ph_root; static gc_root_t immutable_ph_root; static value_t read_one_value(reader_state_t *state); static value_t read_special(reader_state_t *state); static value_t read_list(reader_state_t *state); static value_t read_fixnum(reader_state_t *state, int radix); static value_t read_number(reader_state_t *state); static value_t read_string(reader_state_t *state); static value_t read_immutable(reader_state_t *state); static value_t read_box(reader_state_t *state); static value_t read_vector(reader_state_t *state); static value_t read_struct(reader_state_t *state); static value_t read_weak_box(reader_state_t *state); static value_t read_definition(reader_state_t *state); static value_t read_reference(reader_state_t *state); static value_t read_indirect(value_t path); static value_t freeze(value_t val); static void next_char(reader_state_t *state); static void skip_whitespace(reader_state_t *state); static bool is_reference(reader_state_t *state, value_t value); static value_t get_reference(reader_state_t *state, fixnum_t ref); static void set_reference(reader_state_t *state, value_t place, value_t value); static void finalize_references(reader_state_t *state); static value_t patch_placeholders(reader_state_t *state, value_t in); static inline void next_char(reader_state_t *state) { if (state->ch != EOF) { state->ch = fgetc(state->file); if (state->ch == '\n') { ++state->line; state->column = 0; } else { ++state->column; } } } void reader_init(void) { register_gc_root(&reference_root, make_struct_type(NIL, REFERENCE_SLOTS, FALSE_VALUE)); register_gc_root(&struct_ph_root, make_struct_type(NIL, STRUCT_PH_SLOTS, FALSE_VALUE)); register_gc_root(&immutable_ph_root, make_struct_type(NIL, IMMUTABLE_PH_SLOTS, FALSE_VALUE)); } value_t read_value_from_file(FILE *f) { reader_state_t state; value_t result; register_gc_root(&state.weak_list, NIL); register_gc_root(&state.ref_list, NIL); state.file = f; state.line = 1; state.column = 0; next_char(&state); result = read_one_value(&state); ungetc(state.ch, f); result = patch_placeholders(&state, result); unregister_gc_root(&state.weak_list); unregister_gc_root(&state.ref_list); return result; } value_t read_value_from_path(const char *path) { FILE *f = fopen(path, "r"); const char *last_slash; value_t v; int dirfd; release_assert(f != NULL); dirfd = open(".", O_RDONLY); release_assert(dirfd >= 0); last_slash = strrchr(path, '/'); if (last_slash) { size_t bytes = last_slash - path; char *dirname = (char*)malloc(bytes+1); memcpy(dirname, path, bytes); dirname[bytes] = '\0'; chdir(dirname); } v = read_value_from_file(f); fchdir(dirfd); close(dirfd); fclose(f); return v; } static value_t read_one_value(reader_state_t *state) { skip_whitespace(state); release_assert(state->ch != EOF); switch (state->ch) { case '#': return read_special(state); case '(': return read_list(state); case '-': case '+': case '0' ... '9': return read_number(state); case '\"': return read_string(state); default: fprintf(stderr, "Unexpected character '%c' in input on line %d, column %d.\n", state->ch, state->line, state->column); release_assert(NOTREACHED("Unexpected character in input.")); return UNDEFINED; } } bool issymbol(int ch) { switch (ch) { case 'A' ... 'Z': case 'a' ... 'z': case '0' ... '9': case '!': case '$': case '%': case '&': case '*': case '+': case '-': case '/': case '<': case '=': case '>': case '?': case '@': case '\\': case '^': case '_': case '|': case '~': return true; default: return false; } } static value_t read_special(reader_state_t *state) { next_char(state); release_assert(state->ch != EOF); switch (state->ch) { case '!': release_assert((state->line == 1) && (state->column == 2)); do { next_char(state); } while (state->ch != '\n'); return read_one_value(state); case 'F': case 'f': next_char(state); release_assert(!issymbol(state->ch)); return FALSE_VALUE; case 'T': case 't': next_char(state); release_assert(!issymbol(state->ch)); return TRUE_VALUE; case '&': return read_box(state); case '(': return read_vector(state); case 'S': case 's': next_char(state); release_assert(state->ch == '('); return read_struct(state); case 'W': case 'w': next_char(state); release_assert(state->ch == '&'); return read_weak_box(state); case '0' ... '9': return read_definition(state); case '=': return read_reference(state); case 'I': case 'i': next_char(state); return read_indirect(read_string(state)); case '@': next_char(state); return read_immutable(state); default: release_assert(NOTREACHED("Invalid character in special value.")); return UNDEFINED; } } static void reverse_list(value_t *list, value_t newcdr) { value_t lst = *list; while (is_pair(lst)) { value_t temp = _get_pair(lst)->cdr; _get_pair(lst)->cdr = newcdr; WRITE_BARRIER(lst); newcdr = lst; lst = temp; } *list = newcdr; } static value_t read_list(reader_state_t *state) { gc_root_t list_root; bool done = false; register_gc_root(&list_root, NIL); next_char(state); while (!done) { skip_whitespace(state); release_assert(state->ch != EOF); switch (state->ch) { case '.': { release_assert(!is_nil(list_root.value)); next_char(state); value_t temp = read_one_value(state); reverse_list(&list_root.value, temp); skip_whitespace(state); release_assert(state->ch == ')'); next_char(state); done = true; } break; case ')': { reverse_list(&list_root.value, NIL); next_char(state); done = true; } break; default: { value_t temp = read_one_value(state); list_root.value = cons(temp, list_root.value); } break; } } unregister_gc_root(&list_root); return list_root.value; } static short int char_to_digit(int ch) { if (isdigit(ch)) { return ch - '0'; } else { assert(isalpha(ch)); return 10 + (toupper(ch) - 'A'); } } static value_t read_fixnum(reader_state_t *state, int radix) { fixnum_t num = 0; bool negative = false; assert((0 <= radix) && (radix <= 36)); if (state->ch == '-') { negative = true; next_char(state); } else if (state->ch == '+') { next_char(state); } release_assert(isdigit(state->ch)); if (radix == 0) { if (state->ch == '0') { next_char(state); switch (state->ch) { case 'X': case 'x': next_char(state); radix = 16; break; case 'B': case 'b': next_char(state); radix = 2; break; default: radix = 8; break; } if (radix != 8) { /* Make sure we have at least one digit; if octal then the '0' counts instead */ release_assert(isalnum(state->ch)); release_assert(char_to_digit(state->ch) < radix); } } else { radix = 10; } } while (isalnum(state->ch)) { fixnum_t digit = char_to_digit(state->ch); if (digit >= radix) break; release_assert(num <= (FIXNUM_MAX/radix)); num *= radix; num += digit; next_char(state); } if (negative) num = -num; release_assert((FIXNUM_MIN <= num) && (num <= FIXNUM_MAX)); return fixnum_value(num); } static value_t read_number(reader_state_t *state) { bool negative = false; fixnum_t num = 0; native_float_t flt; int radix; if (state->ch == '-') { negative = true; next_char(state); } else if (state->ch == '+') { next_char(state); } release_assert(isdigit(state->ch)); if (state->ch == '0') { next_char(state); switch (state->ch) { case 'X': case 'x': radix = 16; next_char(state); break; case 'B': case 'b': radix = 2; next_char(state); break; case '0' ... '9': radix = 8; break; default: radix = 10; break; } if (radix != 10) { release_assert(isalnum(state->ch)); release_assert(char_to_digit(state->ch) < radix); } } else { radix = 10; } while (isalnum(state->ch)) { fixnum_t digit = char_to_digit(state->ch); if (digit >= radix) break; release_assert(num <= (FIXNUM_MAX/radix)); num *= radix; num += digit; next_char(state); } if ((radix != 10) || ((state->ch != '.') && (state->ch != 'E') && (state->ch != 'e'))) { if (negative) num = -num; release_assert(!issymbol(state->ch)); release_assert((FIXNUM_MIN <= num) && (num <= FIXNUM_MAX)); return fixnum_value(num); } /* * Floating-point. No guarantees as to precision... really should use binary/hex. */ flt = num; if (state->ch == '.') { next_char(state); for (native_float_t pv = negative ? -0.1 : 0.1; isdigit(state->ch); pv /= 10) { flt += (state->ch - '0') * pv; next_char(state); } } if ((state->ch == 'E') || (state->ch == 'e')) { next_char(state); num = read_fixnum(state, 10); flt *= pow(10, _get_fixnum(num)); } if (negative) flt = -flt; release_assert(!issymbol(state->ch)); return make_float(flt); } static value_t read_string(reader_state_t *state) { char *buffer = (char*)malloc(128); size_t buffer_size = 128; size_t length = 0; value_t value; release_assert(buffer != NULL); release_assert(state->ch == '"'); next_char(state); while (state->ch != '"') { bool skip_ws = false; char ch; release_assert(state->ch != EOF); if ((buffer_size - length) < 1) { release_assert(buffer_size <= INT32_MAX / 3); buffer_size = (3 * buffer_size) / 2; buffer = realloc(buffer, buffer_size); release_assert(buffer != NULL); } ch = state->ch; next_char(state); if (ch == '\\') { switch (state->ch) { case 'o': next_char(state); release_assert(('0' <= state->ch) && (state->ch <= '7')); /* fall through */ case '0' ... '7': ch = 0; /* One to three octal digits */ for (int i = 0; i < 3; ++i) { ch = 8 * ch + (state->ch - '0'); next_char(state); if ((state->ch < '0') || (state->ch > '7')) break; } break; case 'X': case 'x': ch = 0; next_char(state); release_assert(isxdigit(state->ch)); /* One or two hex digits */ for (int i = 0; i < 2; ++i) { int n = isdigit(state->ch) ? (state->ch - '0') : (10 + toupper(state->ch) - 'A'); ch = 16 * ch + n; next_char(state); if (!isxdigit(state->ch)) break; } break; case ' ': case '\t': case '\v': case '\n': skip_ws = true; break; case ';': /* Treats everything that follows on the same line as a comment, * and additionally skips leading whitespace on the next line. */ while (state->ch != '\n') { release_assert(state->ch != EOF); next_char(state); } skip_ws = true; break; case '\\': ch = '\\'; next_char(state); break; case '\'': ch = '\''; next_char(state); break; case '\"': ch = '\"'; next_char(state); break; case 'a': ch = '\a'; next_char(state); break; case 'b': ch = '\b'; next_char(state); break; case 'f': ch = '\f'; next_char(state); break; case 'n': ch = '\n'; next_char(state); break; case 'r': ch = '\r'; next_char(state); break; case 't': ch = '\t'; next_char(state); break; case 'v': ch = '\v'; next_char(state); break; default: release_assert(NOTREACHED("Invalid escape sequence in string.")); ch = '#'; next_char(state); break; } } if (skip_ws) { bool hit_eol = false; /* Slightly different from the normal skip_whitespace(); skips * whitespace through the _second_ EOL following the backslash. */ do { if (state->ch == '\n') { if (!hit_eol) { hit_eol = true; } else { next_char(state); break; } } next_char(state); } while (isspace(state->ch)); } else { buffer[length++] = ch; } } next_char(state); value = make_byte_string(length, '\0'); memcpy(_get_byte_string(value)->bytes, buffer, length); free(buffer); return value; } static value_t read_immutable(reader_state_t *state) { gc_root_t ph_root; value_t val; register_gc_root(&ph_root, make_struct(immutable_ph_root.value)); val = read_one_value(state); release_assert(!struct_is_a(val, reference_root.value)); IMMUTABLE_PH_VALUE(ph_root.value) = val; WRITE_BARRIER(ph_root.value); unregister_gc_root(&ph_root); return ph_root.value; } static value_t read_box(reader_state_t *state) { next_char(state); return make_box(read_one_value(state)); } static value_t read_vector(reader_state_t *state) { gc_root_t list_root; size_t length = 0; value_t value; value_t item; register_gc_root(&list_root, read_list(state)); for (value_t item = list_root.value; !is_nil(item); item = CDR(item)) ++length; value = make_vector(length, UNDEFINED); item = list_root.value; for (size_t i = 0; i < length; ++i) { _get_vector(value)->elements[i] = _CAR(item); /* No write barrier needed here. */ item = _CDR(item); } unregister_gc_root(&list_root); return value; } static value_t read_struct(reader_state_t *state) { gc_root_t ph_root; value_t values; register_gc_root(&ph_root, make_struct(struct_ph_root.value)); values = read_list(state); STRUCT_PH_TYPE(ph_root.value) = CAR(values); STRUCT_PH_VALUES(ph_root.value) = CDR(values); STRUCT_PH_RESULT(ph_root.value) = FALSE_VALUE; WRITE_BARRIER(ph_root.value); unregister_gc_root(&ph_root); return ph_root.value; } static value_t read_weak_box(reader_state_t *state) { value_t value; next_char(state); value = read_one_value(state); state->weak_list.value = cons(value, state->weak_list.value); return make_weak_box(value); } static value_t read_definition(reader_state_t *state) { fixnum_t ref = get_fixnum(read_fixnum(state, 0)); gc_root_t place_root; value_t v; release_assert(state->ch == '='); next_char(state); register_gc_root(&place_root, get_reference(state, ref)); v = read_one_value(state); set_reference(state, place_root.value, v); unregister_gc_root(&place_root); return v; } static value_t read_reference(reader_state_t *state) { next_char(state); skip_whitespace(state); release_assert(state->ch != EOF); if (state->ch == '"') { char *name = value_to_string(read_string(state)); value_t bi = lookup_builtin(name); if (bi == FALSE_VALUE) { fprintf(stderr, "Unable to locate \"%s\" builtin.\n", name); } free(name); release_assert(bi != FALSE_VALUE); return bi; } else { return get_reference(state, get_fixnum(read_fixnum(state, 0))); } } static value_t read_indirect(value_t path) { char *name = value_to_string(path); value_t v = read_value_from_path(name); free(name); return v; } static value_t freeze(value_t val) { if (is_vector(val)) { _get_vector(val)->immutable = true; } else if (is_byte_string(val)) { _get_byte_string(val)->immutable = true; } else if (is_struct(val)) { _get_struct(val)->immutable = true; } else { /* Error if value cannot be made immutable */ release_assert(!is_object(val) || is_float(val) || is_builtin_fn(val)); } return val; } static bool is_reference(reader_state_t *state, value_t value) { return struct_is_a(value, reference_root.value); } static value_t get_reference(reader_state_t *state, fixnum_t refid) { value_t refidval = fixnum_value(refid); for (value_t item = state->ref_list.value; !is_nil(item); item = _CDR(item)) { if (REF_IDENT(_CAR(item)) == refidval) return _CAR(item); } /* No existing reference with that number; create a new one. */ { value_t ref = make_struct(reference_root.value); REF_IDENT(ref) = refidval; REF_VALUE(ref) = UNDEFINED; REF_PATCHED(ref) = FALSE_VALUE; state->ref_list.value = cons(ref, state->ref_list.value); } return _CAR(state->ref_list.value); } static void set_reference(reader_state_t *state, value_t ref, value_t value) { assert(is_reference(state, ref)); release_assert(is_undefined(REF_VALUE(ref))); REF_VALUE(ref) = value; WRITE_BARRIER(ref); } static void finalize_references(reader_state_t *state) { bool changed = true; /* We're done when no placeholders link to other placeholders. */ while (changed) { changed = false; /* Resolve one level of placeholder-to-placeholder links. */ for (value_t item = state->ref_list.value; !is_nil(item); item = _CDR(item)) { value_t ref = _CAR(item); if (REF_VALUE(ref) == ref) { /* Self-links indicate cycles. */ REF_VALUE(ref) = UNDEFINED; changed = true; } else if (is_reference(state, REF_VALUE(ref))) { REF_VALUE(ref) = REF_VALUE(REF_VALUE(ref)); WRITE_BARRIER(ref); changed = true; } } } } static value_t _patch_placeholders(reader_state_t *state, value_t in, void *seen) { gc_root_t in_root; struct seen_value { gc_root_t *root; struct seen_value *prev; } this_seen = { &in_root, (struct seen_value*)seen }; if (struct_is_a(in, reference_root.value)) { if (!_get_boolean(REF_PATCHED(in))) { value_t val; REF_PATCHED(in) = TRUE_VALUE; register_gc_root(&in_root, in); val = _patch_placeholders(state, REF_VALUE(in_root.value), &this_seen); in = in_root.value; unregister_gc_root(&in_root); REF_VALUE(in) = val; WRITE_BARRIER(in); } return REF_VALUE(in); } for (struct seen_value *item = this_seen.prev; item; item = item->prev) { if (in == item->root->value) return in; } register_gc_root(&in_root, in); if (struct_is_a(in_root.value, immutable_ph_root.value)) { value_t val = _patch_placeholders(state, IMMUTABLE_PH_VALUE(in_root.value), &this_seen); in_root.value = freeze(val); } else if (struct_is_a(in_root.value, struct_ph_root.value)) { if (_get_boolean(STRUCT_PH_RESULT(in_root.value))) { in_root.value = STRUCT_PH_RESULT(in_root.value); } else { value_t sval; value_t values; STRUCT_PH_RESULT(in_root.value) = UNDEFINED; sval = make_struct(_patch_placeholders(state, STRUCT_PH_TYPE(in_root.value), &this_seen)); STRUCT_PH_RESULT(in_root.value) = sval; values = STRUCT_PH_VALUES(in_root.value); in_root.value = sval; for (int i = 0; i < _get_struct(in_root.value)->nslots; ++i) { if (is_nil(values)) break; _get_struct(in_root.value)->slots[i] = CAR(values); values = _CDR(values); } WRITE_BARRIER(in_root.value); for (int i = 0; i < _get_struct(in_root.value)->nslots; ++i) { _get_struct(in_root.value)->slots[i] = _patch_placeholders(state, _get_struct(in_root.value)->slots[i], &this_seen); WRITE_BARRIER(in_root.value); } } } else if (is_box(in_root.value)) { value_t val = _patch_placeholders(state, _get_box(in_root.value)->value, &this_seen); _get_box(in_root.value)->value = val; } else if (is_weak_box(in_root.value)) { value_t val = _patch_placeholders(state, _get_weak_box(in_root.value)->value, &this_seen); _get_weak_box(in_root.value)->value = val; } else if (is_pair(in_root.value)) { value_t val; val = _patch_placeholders(state, _CAR(in_root.value), &this_seen); _CAR(in_root.value) = val; val = _patch_placeholders(state, _CDR(in_root.value), &this_seen); _CDR(in_root.value) = val; } else if (is_vector(in_root.value)) { size_t nelem = _get_vector(in_root.value)->size; for (size_t i = 0; i < nelem; ++i) { value_t val = _patch_placeholders(state, _get_vector(in_root.value)->elements[i], &this_seen); _get_vector(in_root.value)->elements[i] = val; } } unregister_gc_root(&in_root); return in_root.value; } static value_t patch_placeholders(reader_state_t *state, value_t in) { gc_root_t root; register_gc_root(&root, in); finalize_references(state); root.value = _patch_placeholders(state, root.value, NULL); unregister_gc_root(&root); return root.value; } static void skip_whitespace(reader_state_t *state) { for (;;) { if (isspace(state->ch)) { next_char(state); } else if (state->ch == ';') { /* Comments count as whitespace */ do { next_char(state); if (state->ch == '\n') { next_char(state); break; } } while (state->ch != EOF); } else { break; } } } /* vim:set sw=2 expandtab: */