rosella/reader.c

729 lines
16 KiB
C

#include <ctype.h>
#include <inttypes.h>
#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
#include "gc.h"
#include "builtin.h"
typedef struct reader_state
{
FILE *file;
int ch;
int line;
int column;
gc_root_t ref_alist;
gc_root_t weak_list;
gc_root_t ref_list;
} reader_state_t;
static value_t read_one_value(reader_state_t *state);
static value_t read_special(reader_state_t *state);
static value_t read_list(reader_state_t *state);
static value_t read_fixnum(reader_state_t *state, int radix);
static value_t read_number(reader_state_t *state);
static value_t read_string(reader_state_t *state);
static value_t read_box(reader_state_t *state);
static value_t read_vector(reader_state_t *state);
static value_t read_struct(reader_state_t *state);
static value_t read_weak_box(reader_state_t *state);
static value_t read_definition(reader_state_t *state);
static value_t read_placeholder(reader_state_t *state);
static void next_char(reader_state_t *state);
static void skip_whitespace(reader_state_t *state);
static bool is_placeholder(reader_state_t *state, value_t value);
static value_t get_placeholder(reader_state_t *state, fixnum_t ref);
static void set_placeholder(reader_state_t *state, value_t place, value_t value);
static void finalize_placeholders(reader_state_t *state);
static value_t patch_placeholders(reader_state_t *state, value_t v);
static void tree_replace(value_t *in, value_t oldval, value_t newval);
value_t read_value(FILE *f)
{
reader_state_t state;
value_t result;
register_gc_root(&state.ref_alist, NIL);
register_gc_root(&state.weak_list, NIL);
register_gc_root(&state.ref_list, NIL);
state.file = f;
state.line = state.column = 1;
next_char(&state);
result = read_one_value(&state);
ungetc(state.ch, f);
result = patch_placeholders(&state, result);
unregister_gc_root(&state.ref_list);
unregister_gc_root(&state.weak_list);
unregister_gc_root(&state.ref_alist);
return result;
}
static value_t read_one_value(reader_state_t *state)
{
skip_whitespace(state);
release_assert(state->ch != EOF);
switch (state->ch)
{
case '#':
return read_special(state);
case '(':
return read_list(state);
case '-':
case '+':
case '0' ... '9':
return read_number(state);
case '\"':
return read_string(state);
default:
fprintf(stderr, "Unexpected character '%c' in input on line %d, column %d.\n",
state->ch, state->line, state->column);
release_assert(NOTREACHED("Unexpected character in input."));
return UNDEFINED;
}
}
bool issymbol(int ch)
{
switch (ch)
{
case 'A' ... 'Z':
case 'a' ... 'z':
case '0' ... '9':
case '!': case '$': case '%':
case '&': case '*': case '+':
case '-': case '/': case '<':
case '=': case '>': case '?':
case '@': case '\\': case '^':
case '_': case '|': case '~':
return true;
default:
return false;
}
}
static value_t read_special(reader_state_t *state)
{
next_char(state);
release_assert(state->ch != EOF);
switch (state->ch)
{
case 'F':
case 'f':
next_char(state);
release_assert(!issymbol(state->ch));
return FALSE_VALUE;
case 'T':
case 't':
next_char(state);
release_assert(!issymbol(state->ch));
return TRUE_VALUE;
case '&':
return read_box(state);
case '(':
return read_vector(state);
case 'S':
case 's':
next_char(state);
release_assert(state->ch == '(');
return read_struct(state);
case 'W':
case 'w':
next_char(state);
release_assert(state->ch == '&');
return read_weak_box(state);
case '0' ... '9':
return read_definition(state);
case '=':
return read_placeholder(state);
default:
release_assert(NOTREACHED("Invalid character in special value."));
return UNDEFINED;
}
}
static void reverse_list(value_t *list, value_t newcdr)
{
value_t lst = *list;
while (is_pair(lst))
{
value_t temp = _get_pair(lst)->cdr;
_get_pair(lst)->cdr = newcdr;
newcdr = lst;
lst = temp;
}
*list = newcdr;
}
static value_t read_list(reader_state_t *state)
{
gc_root_t list_root;
bool done = false;
register_gc_root(&list_root, NIL);
next_char(state);
while (!done)
{
skip_whitespace(state);
release_assert(state->ch != EOF);
switch (state->ch)
{
case '.':
{
release_assert(!is_nil(list_root.value));
next_char(state);
value_t temp = read_one_value(state);
reverse_list(&list_root.value, temp);
skip_whitespace(state);
release_assert(state->ch == ')');
next_char(state);
done = true;
}
break;
case ')':
{
reverse_list(&list_root.value, NIL);
next_char(state);
done = true;
}
break;
default:
{
value_t temp = read_one_value(state);
list_root.value = cons(temp, list_root.value);
}
break;
}
}
unregister_gc_root(&list_root);
return list_root.value;
}
static short int char_to_digit(int ch)
{
if (isdigit(ch))
{
return ch - '0';
}
else
{
assert(isalpha(ch));
return 10 + (toupper(ch) - 'A');
}
}
static value_t read_fixnum(reader_state_t *state, int radix)
{
fixnum_t num = 0;
bool negative = false;
assert((0 <= radix) && (radix <= 36));
if (state->ch == '-')
{
negative = true;
next_char(state);
}
else if (state->ch == '+')
{
next_char(state);
}
release_assert(isdigit(state->ch));
if (radix == 0)
{
if (state->ch == '0')
{
next_char(state);
switch (state->ch)
{
case 'X':
case 'x':
next_char(state);
radix = 16;
break;
case 'B':
case 'b':
next_char(state);
radix = 2;
break;
default:
radix = 8;
break;
}
if (radix != 8)
{
/* Make sure we have at least one digit; if octal then the '0' counts instead */
release_assert(isalnum(state->ch));
release_assert(char_to_digit(state->ch) < radix);
}
}
else
{
radix = 10;
}
}
while (isalnum(state->ch))
{
fixnum_t digit = char_to_digit(state->ch);
if (digit >= radix)
break;
release_assert(num <= (FIXNUM_MAX/radix));
num *= radix;
num += digit;
next_char(state);
}
if (negative)
num = -num;
release_assert((FIXNUM_MIN <= num) && (num <= FIXNUM_MAX));
return fixnum_value(num);
}
static value_t read_number(reader_state_t *state)
{
value_t v = read_fixnum(state, 0);
release_assert(!issymbol(state->ch));
return v;
}
static value_t read_string(reader_state_t *state)
{
char *buffer = (char*)malloc(128);
size_t buffer_size = 128;
size_t length = 0;
value_t value;
release_assert(buffer != NULL);
next_char(state);
while (state->ch != '"')
{
bool skip_ws = false;
char ch;
if ((buffer_size - length) < 1)
{
release_assert(buffer_size <= INT32_MAX / 3);
buffer_size = (3 * buffer_size) / 2;
buffer = realloc(buffer, buffer_size);
release_assert(buffer != NULL);
}
ch = state->ch;
next_char(state);
if (ch == '\\')
{
switch (state->ch)
{
case 'o':
next_char(state);
release_assert(('0' <= state->ch) && (state->ch <= '7'));
/* fall through */
case '0' ... '7':
ch = 0;
/* One to three octal digits */
for (int i = 0; i < 3; ++i)
{
ch = 8 * ch + (state->ch - '0');
next_char(state);
if ((state->ch < '0') || (state->ch > '7'))
break;
}
break;
case 'X':
case 'x':
ch = 0;
next_char(state);
release_assert(isxdigit(state->ch));
/* One or two hex digits */
for (int i = 0; i < 2; ++i)
{
int n = isdigit(state->ch)
? (state->ch - '0')
: (10 + toupper(state->ch) - 'A');
ch = 16 * ch + n;
next_char(state);
if (!isxdigit(state->ch))
break;
}
break;
case ' ':
case '\t':
case '\v':
case '\n':
skip_ws = true;
break;
case ';':
/* Treats everything that follows on the same line as a comment,
* and additionally skips leading whitespace on the next line. */
while (state->ch != '\n')
next_char(state);
skip_ws = true;
break;
case '\\': ch = '\\'; next_char(state); break;
case '\'': ch = '\''; next_char(state); break;
case '\"': ch = '\"'; next_char(state); break;
case 'a': ch = '\a'; next_char(state); break;
case 'b': ch = '\b'; next_char(state); break;
case 'f': ch = '\f'; next_char(state); break;
case 'n': ch = '\n'; next_char(state); break;
case 'r': ch = '\r'; next_char(state); break;
case 't': ch = '\t'; next_char(state); break;
case 'v': ch = '\v'; next_char(state); break;
default:
release_assert(NOTREACHED("Invalid escape sequence in string."));
ch = '#';
next_char(state);
break;
}
}
if (skip_ws)
{
bool hit_eol = false;
/* Slightly different from the normal skip_whitespace(); skips
* whitespace through the _second_ EOL following the backslash. */
do {
if (state->ch == '\n')
{
if (!hit_eol)
{
hit_eol = true;
}
else
{
next_char(state);
break;
}
}
next_char(state);
} while (isspace(state->ch));
}
else
{
buffer[length++] = ch;
}
}
next_char(state);
value = make_byte_string(length, '\0');
memcpy(_get_byte_string(value)->bytes, buffer, length);
free(buffer);
return value;
}
static value_t read_box(reader_state_t *state)
{
next_char(state);
return make_box(read_one_value(state));
}
static value_t read_vector(reader_state_t *state)
{
gc_root_t list_root;
size_t length = 0;
value_t value;
value_t item;
register_gc_root(&list_root, read_list(state));
for (value_t item = list_root.value; !is_nil(item); item = CDR(item))
++length;
value = make_vector(length, UNDEFINED);
item = list_root.value;
for (size_t i = 0; i < length; ++i)
{
_get_vector(value)->elements[i] = _CAR(item);
item = _CDR(item);
}
unregister_gc_root(&list_root);
return value;
}
static value_t read_struct(reader_state_t *state)
{
gc_root_t list_root;
size_t slots = 0;
value_t value;
value_t item;
register_gc_root(&list_root, read_list(state));
for (item = CDR(list_root.value); !is_nil(item); item = CDR(item))
++slots;
value = make_struct(_CAR(list_root.value), slots);
item = _CDR(list_root.value);
for (size_t i = 0; i < slots; ++i)
{
_get_struct(value)->slots[i] = _CAR(item);
item = _CDR(item);
}
unregister_gc_root(&list_root);
return value;
}
static value_t read_weak_box(reader_state_t *state)
{
value_t value;
next_char(state);
value = read_one_value(state);
state->weak_list.value = cons(value, state->weak_list.value);
return make_weak_box(value);
}
static value_t read_definition(reader_state_t *state)
{
fixnum_t ref = get_fixnum(read_fixnum(state, 0));
gc_root_t place_root;
value_t v;
release_assert(state->ch == '=');
next_char(state);
register_gc_root(&place_root, get_placeholder(state, ref));
v = read_one_value(state);
set_placeholder(state, place_root.value, v);
unregister_gc_root(&place_root);
return v;
}
static value_t read_placeholder(reader_state_t *state)
{
next_char(state);
skip_whitespace(state);
release_assert(state->ch != EOF);
if (state->ch == '"')
{
char *name = value_to_string(read_string(state));
value_t bi = lookup_builtin(name);
free(name);
return bi;
}
else
{
return get_placeholder(state, get_fixnum(read_fixnum(state, 0)));
}
}
static bool is_placeholder(reader_state_t *state, value_t value)
{
for (value_t item = state->ref_alist.value; !is_nil(item); item = _CDDR(item))
{
if (value == item)
return true;
}
return false;
}
static value_t get_placeholder(reader_state_t *state, fixnum_t ref)
{
value_t refval = fixnum_value(ref);
for (value_t item = state->ref_alist.value; !is_nil(item); item = _CDDR(item))
{
if (_CAR(item) == refval)
return item;
}
/* No existing placeholder with that number; create a new one. */
state->ref_alist.value = cons(UNDEFINED, state->ref_alist.value);
state->ref_alist.value = cons(refval, state->ref_alist.value);
return state->ref_alist.value;
}
static void set_placeholder(reader_state_t *state, value_t place, value_t value)
{
assert(is_placeholder(state, place));
release_assert(is_undefined(_CADR(place)));
_CADR(place) = value;
}
static void finalize_placeholders(reader_state_t *state)
{
bool changed = true;
/* We're done when no placeholders link to other placeholders. */
while (changed)
{
changed = false;
/* Resolve one level of placeholder-to-placeholder links.
* Self-links indicate cycles and are replaced with UNDEFINED. */
for (value_t item = state->ref_alist.value; !is_nil(item); item = _CDDR(item))
{
if (_CADR(item) == item)
{
_CADR(item) = UNDEFINED;
changed = true;
}
else if (is_placeholder(state, _CADR(item)))
{
_CADR(item) = _CADR(_CADR(item));
changed = true;
}
}
}
}
static value_t patch_placeholders(reader_state_t *state, value_t v)
{
finalize_placeholders(state);
for (value_t item = state->ref_alist.value; !is_nil(item); item = _CDDR(item))
{
tree_replace(&v, item, _CADR(item));
}
return v;
}
typedef struct seen_value
{
value_t value;
struct seen_value *prev;
} seen_value_t;
static void _tree_replace(value_t *in, value_t oldval, value_t newval,
seen_value_t *seen)
{
seen_value_t this_seen = { *in, seen };
for (seen_value_t *item = seen; item; item = item->prev)
{
if (*in == item->value)
return;
}
if (*in == oldval)
{
*in = newval;
}
else if (is_box(*in))
{
_tree_replace(&_get_box(*in)->value, oldval, newval, &this_seen);
}
else if (is_pair(*in))
{
_tree_replace(&_get_pair(*in)->car, oldval, newval, &this_seen);
_tree_replace(&_get_pair(*in)->cdr, oldval, newval, &this_seen);
}
else if (is_vector(*in))
{
for (size_t i = 0; i < _get_vector(*in)->size; ++i)
{
_tree_replace(&_get_vector(*in)->elements[i], oldval, newval, &this_seen);
}
}
else if (is_struct(*in))
{
_tree_replace(&_get_struct(*in)->type, oldval, newval, &this_seen);
for (size_t i = 0; i < _get_struct(*in)->nslots; ++i)
_tree_replace(&_get_struct(*in)->slots[i], oldval, newval, &this_seen);
}
else if (is_weak_box(*in))
{
_tree_replace(&_get_weak_box(*in)->value, oldval, newval, &this_seen);
}
}
static void tree_replace(value_t *in, value_t oldval, value_t newval)
{
_tree_replace(in, oldval, newval, NULL);
}
static void next_char(reader_state_t *state)
{
state->ch = fgetc(state->file);
if (state->ch == '\n')
{
++state->line;
state->column = 1;
}
else
{
++state->column;
}
}
static void skip_whitespace(reader_state_t *state)
{
for (;;)
{
if (isspace(state->ch))
{
next_char(state);
}
else if (state->ch == ';')
{
/* Comments count as whitespace */
do {
next_char(state);
if (state->ch == '\n')
{
next_char(state);
break;
}
} while (state->ch != EOF);
}
else
{
break;
}
}
}
/* vim:set sw=2 expandtab: */