688 lines
15 KiB
C
688 lines
15 KiB
C
#include <ctype.h>
|
|
#include <inttypes.h>
|
|
#include <stdbool.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
|
|
#include "gc.h"
|
|
#include "builtin.h"
|
|
|
|
typedef struct reader_state
|
|
{
|
|
FILE *file;
|
|
int ch;
|
|
int line;
|
|
int column;
|
|
gc_root_t ref_alist;
|
|
gc_root_t weak_list;
|
|
gc_root_t ref_list;
|
|
} reader_state_t;
|
|
|
|
static value_t read_one_value(reader_state_t *state);
|
|
static value_t read_special(reader_state_t *state);
|
|
static value_t read_list(reader_state_t *state);
|
|
static value_t read_number(reader_state_t *state);
|
|
static value_t read_string(reader_state_t *state);
|
|
|
|
static value_t read_box(reader_state_t *state);
|
|
static value_t read_vector(reader_state_t *state);
|
|
static value_t read_struct(reader_state_t *state);
|
|
static value_t read_weak_box(reader_state_t *state);
|
|
|
|
static value_t read_definition(reader_state_t *state);
|
|
static value_t read_placeholder(reader_state_t *state);
|
|
|
|
static void next_char(reader_state_t *state);
|
|
static void skip_whitespace(reader_state_t *state);
|
|
|
|
static bool is_placeholder(reader_state_t *state, value_t value);
|
|
static value_t get_placeholder(reader_state_t *state, fixnum_t ref);
|
|
static void set_placeholder(reader_state_t *state, value_t place, value_t value);
|
|
static void finalize_placeholders(reader_state_t *state);
|
|
static value_t patch_placeholders(reader_state_t *state, value_t v);
|
|
|
|
static void tree_replace(value_t *in, value_t oldval, value_t newval);
|
|
|
|
value_t read_value(FILE *f)
|
|
{
|
|
reader_state_t state;
|
|
value_t result;
|
|
|
|
register_gc_root(&state.ref_alist, NIL);
|
|
register_gc_root(&state.weak_list, NIL);
|
|
register_gc_root(&state.ref_list, NIL);
|
|
|
|
state.file = f;
|
|
state.line = state.column = 1;
|
|
next_char(&state);
|
|
result = read_one_value(&state);
|
|
ungetc(state.ch, f);
|
|
|
|
result = patch_placeholders(&state, result);
|
|
|
|
unregister_gc_root(&state.ref_list);
|
|
unregister_gc_root(&state.weak_list);
|
|
unregister_gc_root(&state.ref_alist);
|
|
|
|
return result;
|
|
}
|
|
|
|
static value_t read_one_value(reader_state_t *state)
|
|
{
|
|
skip_whitespace(state);
|
|
release_assert(state->ch != EOF);
|
|
|
|
switch (state->ch)
|
|
{
|
|
case '#':
|
|
return read_special(state);
|
|
case '(':
|
|
return read_list(state);
|
|
case '-':
|
|
case '0' ... '9':
|
|
return read_number(state);
|
|
case '\"':
|
|
return read_string(state);
|
|
default:
|
|
fprintf(stderr, "Unexpected character '%c' in input on line %d, column %d.\n",
|
|
state->ch, state->line, state->column);
|
|
release_assert(NOTREACHED("Unexpected character in input."));
|
|
return UNDEFINED;
|
|
}
|
|
}
|
|
|
|
bool issymbol(int ch)
|
|
{
|
|
switch (ch)
|
|
{
|
|
case 'A' ... 'Z':
|
|
case 'a' ... 'z':
|
|
case '0' ... '9':
|
|
case '!': case '$': case '%':
|
|
case '&': case '*': case '+':
|
|
case '-': case '/': case '<':
|
|
case '=': case '>': case '?':
|
|
case '@': case '\\': case '^':
|
|
case '_': case '|': case '~':
|
|
return true;
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
static value_t read_special(reader_state_t *state)
|
|
{
|
|
next_char(state);
|
|
release_assert(state->ch != EOF);
|
|
|
|
switch (state->ch)
|
|
{
|
|
case 'F':
|
|
case 'f':
|
|
next_char(state);
|
|
release_assert(!issymbol(state->ch));
|
|
return FALSE_VALUE;
|
|
case 'T':
|
|
case 't':
|
|
next_char(state);
|
|
release_assert(!issymbol(state->ch));
|
|
return TRUE_VALUE;
|
|
case '&':
|
|
return read_box(state);
|
|
case '(':
|
|
return read_vector(state);
|
|
case 'S':
|
|
case 's':
|
|
next_char(state);
|
|
release_assert(state->ch == '(');
|
|
return read_struct(state);
|
|
case 'W':
|
|
case 'w':
|
|
next_char(state);
|
|
release_assert(state->ch == '&');
|
|
return read_weak_box(state);
|
|
case '0' ... '9':
|
|
return read_definition(state);
|
|
case '=':
|
|
return read_placeholder(state);
|
|
default:
|
|
release_assert(NOTREACHED("Invalid character in special value."));
|
|
return UNDEFINED;
|
|
}
|
|
}
|
|
|
|
static void reverse_list(value_t *list, value_t newcdr)
|
|
{
|
|
value_t lst = *list;
|
|
|
|
while (is_pair(lst))
|
|
{
|
|
value_t temp = _get_pair(lst)->cdr;
|
|
_get_pair(lst)->cdr = newcdr;
|
|
newcdr = lst;
|
|
lst = temp;
|
|
}
|
|
|
|
*list = newcdr;
|
|
}
|
|
|
|
static value_t read_list(reader_state_t *state)
|
|
{
|
|
gc_root_t list_root;
|
|
bool done = false;
|
|
|
|
register_gc_root(&list_root, NIL);
|
|
next_char(state);
|
|
|
|
while (!done)
|
|
{
|
|
skip_whitespace(state);
|
|
release_assert(state->ch != EOF);
|
|
|
|
switch (state->ch)
|
|
{
|
|
case '.':
|
|
{
|
|
release_assert(!is_nil(list_root.value));
|
|
next_char(state);
|
|
value_t temp = read_one_value(state);
|
|
reverse_list(&list_root.value, temp);
|
|
|
|
skip_whitespace(state);
|
|
release_assert(state->ch == ')');
|
|
next_char(state);
|
|
|
|
done = true;
|
|
}
|
|
break;
|
|
case ')':
|
|
{
|
|
reverse_list(&list_root.value, NIL);
|
|
next_char(state);
|
|
done = true;
|
|
}
|
|
break;
|
|
default:
|
|
{
|
|
value_t temp = read_one_value(state);
|
|
list_root.value = cons(temp, list_root.value);
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
|
|
unregister_gc_root(&list_root);
|
|
return list_root.value;
|
|
}
|
|
|
|
static value_t read_number(reader_state_t *state)
|
|
{
|
|
fixnum_t radix = 10;
|
|
fixnum_t num = 0;
|
|
bool negative = false;
|
|
|
|
if (state->ch == '-')
|
|
{
|
|
negative = true;
|
|
next_char(state);
|
|
}
|
|
else if (state->ch == '+')
|
|
{
|
|
next_char(state);
|
|
}
|
|
|
|
release_assert(isdigit(state->ch));
|
|
|
|
if (state->ch == '0')
|
|
{
|
|
next_char(state);
|
|
|
|
switch (state->ch)
|
|
{
|
|
case 'X':
|
|
case 'x':
|
|
next_char(state);
|
|
radix = 16;
|
|
break;
|
|
case 'B':
|
|
case 'b':
|
|
next_char(state);
|
|
radix = 2;
|
|
break;
|
|
default:
|
|
radix = 8;
|
|
break;
|
|
}
|
|
}
|
|
|
|
while (isxdigit(state->ch))
|
|
{
|
|
fixnum_t digit = isdigit(state->ch)
|
|
? (state->ch - '0')
|
|
: (10 + (toupper(state->ch) - 'A'));
|
|
|
|
release_assert(digit < radix);
|
|
release_assert(num <= (FIXNUM_MAX/radix));
|
|
|
|
num *= radix;
|
|
num += digit;
|
|
|
|
next_char(state);
|
|
}
|
|
|
|
if (negative)
|
|
num = -num;
|
|
|
|
release_assert((FIXNUM_MIN <= num) && (num <= FIXNUM_MAX));
|
|
return fixnum_value(num);
|
|
}
|
|
|
|
static value_t read_string(reader_state_t *state)
|
|
{
|
|
char *buffer = (char*)malloc(128);
|
|
size_t buffer_size = 128;
|
|
size_t length = 0;
|
|
value_t value;
|
|
|
|
release_assert(buffer != NULL);
|
|
|
|
next_char(state);
|
|
|
|
while (state->ch != '"')
|
|
{
|
|
bool skip_ws = false;
|
|
char ch;
|
|
|
|
if ((buffer_size - length) < 1)
|
|
{
|
|
release_assert(buffer_size <= INT32_MAX / 3);
|
|
buffer_size = (3 * buffer_size) / 2;
|
|
buffer = realloc(buffer, buffer_size);
|
|
release_assert(buffer != NULL);
|
|
}
|
|
|
|
ch = state->ch;
|
|
next_char(state);
|
|
|
|
if (ch == '\\')
|
|
{
|
|
switch (state->ch)
|
|
{
|
|
case 'o':
|
|
next_char(state);
|
|
release_assert(('0' <= state->ch) && (state->ch <= '7'));
|
|
/* fall through */
|
|
case '0' ... '7':
|
|
ch = 0;
|
|
|
|
/* One to three octal digits */
|
|
for (int i = 0; i < 3; ++i)
|
|
{
|
|
ch = 8 * ch + (state->ch - '0');
|
|
next_char(state);
|
|
if ((state->ch < '0') || (state->ch > '7'))
|
|
break;
|
|
}
|
|
break;
|
|
case 'X':
|
|
case 'x':
|
|
ch = 0;
|
|
|
|
next_char(state);
|
|
release_assert(isxdigit(state->ch));
|
|
|
|
/* One or two hex digits */
|
|
for (int i = 0; i < 2; ++i)
|
|
{
|
|
int n = isdigit(state->ch)
|
|
? (state->ch - '0')
|
|
: (10 + toupper(state->ch) - 'A');
|
|
|
|
ch = 16 * ch + n;
|
|
next_char(state);
|
|
|
|
if (!isxdigit(state->ch))
|
|
break;
|
|
}
|
|
break;
|
|
case ' ':
|
|
case '\t':
|
|
case '\v':
|
|
case '\n':
|
|
skip_ws = true;
|
|
break;
|
|
case ';':
|
|
/* Treats everything that follows on the same line as a comment,
|
|
* and additionally skips leading whitespace on the next line. */
|
|
while (state->ch != '\n')
|
|
next_char(state);
|
|
skip_ws = true;
|
|
break;
|
|
case '\\': ch = '\\'; next_char(state); break;
|
|
case '\'': ch = '\''; next_char(state); break;
|
|
case '\"': ch = '\"'; next_char(state); break;
|
|
case 'a': ch = '\a'; next_char(state); break;
|
|
case 'b': ch = '\b'; next_char(state); break;
|
|
case 'f': ch = '\f'; next_char(state); break;
|
|
case 'n': ch = '\n'; next_char(state); break;
|
|
case 'r': ch = '\r'; next_char(state); break;
|
|
case 't': ch = '\t'; next_char(state); break;
|
|
case 'v': ch = '\v'; next_char(state); break;
|
|
default:
|
|
release_assert(NOTREACHED("Invalid escape sequence in string."));
|
|
ch = '#';
|
|
next_char(state);
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (skip_ws)
|
|
{
|
|
bool hit_eol = false;
|
|
|
|
/* Slightly different from the normal skip_whitespace(); skips
|
|
* whitespace through the _second_ EOL following the backslash. */
|
|
do {
|
|
if (state->ch == '\n')
|
|
{
|
|
if (!hit_eol)
|
|
{
|
|
hit_eol = true;
|
|
}
|
|
else
|
|
{
|
|
next_char(state);
|
|
break;
|
|
}
|
|
}
|
|
|
|
next_char(state);
|
|
} while (isspace(state->ch));
|
|
}
|
|
else
|
|
{
|
|
buffer[length++] = ch;
|
|
}
|
|
}
|
|
|
|
next_char(state);
|
|
|
|
value = make_byte_string(length, '\0');
|
|
memcpy(_get_byte_string(value)->bytes, buffer, length);
|
|
free(buffer);
|
|
|
|
return value;
|
|
}
|
|
|
|
static value_t read_box(reader_state_t *state)
|
|
{
|
|
next_char(state);
|
|
return make_box(read_one_value(state));
|
|
}
|
|
|
|
static value_t read_vector(reader_state_t *state)
|
|
{
|
|
gc_root_t list_root;
|
|
size_t length = 0;
|
|
value_t value;
|
|
value_t item;
|
|
|
|
register_gc_root(&list_root, read_list(state));
|
|
|
|
for (value_t item = list_root.value; !is_nil(item); item = CDR(item))
|
|
++length;
|
|
|
|
value = make_vector(length, UNDEFINED);
|
|
|
|
item = list_root.value;
|
|
for (size_t i = 0; i < length; ++i)
|
|
{
|
|
_get_vector(value)->elements[i] = _CAR(item);
|
|
item = _CDR(item);
|
|
}
|
|
|
|
unregister_gc_root(&list_root);
|
|
|
|
return value;
|
|
}
|
|
|
|
static value_t read_struct(reader_state_t *state)
|
|
{
|
|
gc_root_t list_root;
|
|
size_t slots = 0;
|
|
value_t value;
|
|
value_t item;
|
|
|
|
register_gc_root(&list_root, read_list(state));
|
|
|
|
for (item = CDR(list_root.value); !is_nil(item); item = CDR(item))
|
|
++slots;
|
|
|
|
value = make_struct(_CAR(list_root.value), slots);
|
|
|
|
item = _CDR(list_root.value);
|
|
for (size_t i = 0; i < slots; ++i)
|
|
{
|
|
_get_struct(value)->slots[i] = _CAR(item);
|
|
item = _CDR(item);
|
|
}
|
|
|
|
unregister_gc_root(&list_root);
|
|
|
|
return value;
|
|
}
|
|
|
|
static value_t read_weak_box(reader_state_t *state)
|
|
{
|
|
value_t value;
|
|
|
|
next_char(state);
|
|
value = read_one_value(state);
|
|
state->weak_list.value = cons(value, state->weak_list.value);
|
|
return make_weak_box(value);
|
|
}
|
|
|
|
static value_t read_definition(reader_state_t *state)
|
|
{
|
|
fixnum_t ref = get_fixnum(read_number(state));
|
|
gc_root_t place_root;
|
|
value_t v;
|
|
|
|
release_assert(state->ch == '=');
|
|
next_char(state);
|
|
|
|
register_gc_root(&place_root, get_placeholder(state, ref));
|
|
v = read_one_value(state);
|
|
set_placeholder(state, place_root.value, v);
|
|
unregister_gc_root(&place_root);
|
|
|
|
return v;
|
|
}
|
|
|
|
static value_t read_placeholder(reader_state_t *state)
|
|
{
|
|
next_char(state);
|
|
skip_whitespace(state);
|
|
release_assert(state->ch != EOF);
|
|
|
|
if (state->ch == '"')
|
|
{
|
|
char *name = value_to_string(read_string(state));
|
|
value_t bi = lookup_builtin(name);
|
|
free(name);
|
|
return bi;
|
|
}
|
|
else
|
|
{
|
|
return get_placeholder(state, get_fixnum(read_number(state)));
|
|
}
|
|
}
|
|
|
|
static bool is_placeholder(reader_state_t *state, value_t value)
|
|
{
|
|
for (value_t item = state->ref_alist.value; !is_nil(item); item = _CDDR(item))
|
|
{
|
|
if (value == item)
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
static value_t get_placeholder(reader_state_t *state, fixnum_t ref)
|
|
{
|
|
value_t refval = fixnum_value(ref);
|
|
|
|
for (value_t item = state->ref_alist.value; !is_nil(item); item = _CDDR(item))
|
|
{
|
|
if (_CAR(item) == refval)
|
|
return item;
|
|
}
|
|
|
|
/* No existing placeholder with that number; create a new one. */
|
|
state->ref_alist.value = cons(UNDEFINED, state->ref_alist.value);
|
|
state->ref_alist.value = cons(refval, state->ref_alist.value);
|
|
return state->ref_alist.value;
|
|
}
|
|
|
|
static void set_placeholder(reader_state_t *state, value_t place, value_t value)
|
|
{
|
|
assert(is_placeholder(state, place));
|
|
release_assert(is_undefined(_CADR(place)));
|
|
_CADR(place) = value;
|
|
}
|
|
|
|
static void finalize_placeholders(reader_state_t *state)
|
|
{
|
|
for (bool changed = true; changed; changed = false)
|
|
{
|
|
/* On each cycle, placeholder cycles/lists should come one link closer to self or actual value.
|
|
* Self-links indicate cycles and are replaced with UNDEFINED.
|
|
* We're done when no placeholders link to other placeholders. */
|
|
for (value_t item = state->ref_alist.value; !is_nil(item); item = _CDDR(item))
|
|
{
|
|
if (_CADR(item) == item)
|
|
{
|
|
_CADR(item) = UNDEFINED;
|
|
changed = true;
|
|
}
|
|
else if (is_placeholder(state, _CADR(item)))
|
|
{
|
|
_CADR(item) = _CADR(_CADR(item));
|
|
changed = true;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
static value_t patch_placeholders(reader_state_t *state, value_t v)
|
|
{
|
|
finalize_placeholders(state);
|
|
|
|
for (value_t item = state->ref_alist.value; !is_nil(item); item = _CDDR(item))
|
|
{
|
|
tree_replace(&v, item, _CADR(item));
|
|
}
|
|
|
|
return v;
|
|
}
|
|
|
|
typedef struct seen_value
|
|
{
|
|
value_t value;
|
|
struct seen_value *prev;
|
|
} seen_value_t;
|
|
|
|
static void _tree_replace(value_t *in, value_t oldval, value_t newval,
|
|
seen_value_t *seen)
|
|
{
|
|
seen_value_t this_seen = { *in, seen };
|
|
|
|
for (seen_value_t *item = seen; item; item = item->prev)
|
|
{
|
|
if (*in == item->value)
|
|
return;
|
|
}
|
|
|
|
if (*in == oldval)
|
|
{
|
|
*in = newval;
|
|
}
|
|
else if (is_box(*in))
|
|
{
|
|
_tree_replace(&_get_box(*in)->value, oldval, newval, &this_seen);
|
|
}
|
|
else if (is_pair(*in))
|
|
{
|
|
_tree_replace(&_get_pair(*in)->car, oldval, newval, &this_seen);
|
|
_tree_replace(&_get_pair(*in)->cdr, oldval, newval, &this_seen);
|
|
}
|
|
else if (is_vector(*in))
|
|
{
|
|
for (size_t i = 0; i < _get_vector(*in)->size; ++i)
|
|
{
|
|
_tree_replace(&_get_vector(*in)->elements[i], oldval, newval, &this_seen);
|
|
}
|
|
}
|
|
else if (is_struct(*in))
|
|
{
|
|
_tree_replace(&_get_struct(*in)->type, oldval, newval, &this_seen);
|
|
|
|
for (size_t i = 0; i < _get_struct(*in)->nslots; ++i)
|
|
_tree_replace(&_get_struct(*in)->slots[i], oldval, newval, &this_seen);
|
|
}
|
|
else if (is_weak_box(*in))
|
|
{
|
|
_tree_replace(&_get_weak_box(*in)->value, oldval, newval, &this_seen);
|
|
}
|
|
}
|
|
|
|
static void tree_replace(value_t *in, value_t oldval, value_t newval)
|
|
{
|
|
_tree_replace(in, oldval, newval, NULL);
|
|
}
|
|
|
|
static void next_char(reader_state_t *state)
|
|
{
|
|
state->ch = fgetc(state->file);
|
|
|
|
if (state->ch == '\n')
|
|
{
|
|
++state->line;
|
|
state->column = 1;
|
|
}
|
|
else
|
|
{
|
|
++state->column;
|
|
}
|
|
}
|
|
|
|
static void skip_whitespace(reader_state_t *state)
|
|
{
|
|
for (;;)
|
|
{
|
|
if (isspace(state->ch))
|
|
{
|
|
next_char(state);
|
|
}
|
|
else if (state->ch == ';')
|
|
{
|
|
/* Comments count as whitespace */
|
|
do {
|
|
next_char(state);
|
|
|
|
if (state->ch == '\n')
|
|
{
|
|
next_char(state);
|
|
break;
|
|
}
|
|
} while (state->ch != EOF);
|
|
}
|
|
else
|
|
{
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* vim:set sw=2 expandtab: */
|