rosella/reader.c

1035 lines
22 KiB
C

#define _XOPEN_SOURCE 500
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <ctype.h>
#include <inttypes.h>
#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include "gc.h"
#include "builtin.h"
#include "reader.h"
typedef struct reader_state
{
FILE *file;
int ch;
int line;
int column;
gc_root_t weak_list;
gc_root_t ref_list;
} reader_state_t;
#define REFERENCE_SLOT_IDENT 0
#define REFERENCE_SLOT_VALUE 1
#define REFERENCE_SLOT_PATCHED 2
#define REFERENCE_SLOTS 3
#define REF_IDENT(ref) _SLOT_VALUE(REFERENCE, (ref), IDENT)
#define REF_VALUE(ref) _SLOT_VALUE(REFERENCE, (ref), VALUE)
#define REF_PATCHED(ref) _SLOT_VALUE(REFERENCE, (ref), PATCHED)
/* Wraps values which should be made immutable after substitution */
#define IMMUTABLE_PH_SLOT_VALUE 0
#define IMMUTABLE_PH_SLOTS 1
#define IMMUTABLE_PH_VALUE(ph) _SLOT_VALUE(IMMUTABLE_PH, (ph), VALUE)
/* Wraps structure instances, where the type is always immutable */
#define STRUCT_PH_SLOT_TYPE 0
#define STRUCT_PH_SLOT_VALUES 1
#define STRUCT_PH_SLOT_RESULT 2
#define STRUCT_PH_SLOTS 3
#define STRUCT_PH_TYPE(ph) _SLOT_VALUE(STRUCT_PH, (ph), TYPE)
#define STRUCT_PH_VALUES(ph) _SLOT_VALUE(STRUCT_PH, (ph), VALUES)
#define STRUCT_PH_RESULT(ph) _SLOT_VALUE(STRUCT_PH, (ph), RESULT)
static gc_root_t reference_root;
static gc_root_t struct_ph_root;
static gc_root_t immutable_ph_root;
static value_t read_one_value(reader_state_t *state);
static value_t read_special(reader_state_t *state);
static value_t read_list(reader_state_t *state);
static value_t read_fixnum(reader_state_t *state, int radix);
static value_t read_number(reader_state_t *state);
static value_t read_string(reader_state_t *state);
static value_t read_immutable(reader_state_t *state);
static value_t read_box(reader_state_t *state);
static value_t read_vector(reader_state_t *state);
static value_t read_struct(reader_state_t *state);
static value_t read_weak_box(reader_state_t *state);
static value_t read_definition(reader_state_t *state);
static value_t read_reference(reader_state_t *state);
static value_t read_indirect(value_t path);
static value_t freeze(value_t val);
static void next_char(reader_state_t *state);
static void skip_whitespace(reader_state_t *state);
static bool is_reference(reader_state_t *state, value_t value);
static value_t get_reference(reader_state_t *state, fixnum_t ref);
static void set_reference(reader_state_t *state, value_t place, value_t value);
static void finalize_references(reader_state_t *state);
static value_t patch_placeholders(reader_state_t *state, value_t in);
static inline void next_char(reader_state_t *state)
{
if (state->ch != EOF)
{
state->ch = fgetc(state->file);
if (state->ch == '\n')
{
++state->line;
state->column = 0;
}
else
{
++state->column;
}
}
}
void reader_init(void)
{
register_gc_root(&reference_root, make_struct_type(NIL, REFERENCE_SLOTS, FALSE_VALUE));
register_gc_root(&struct_ph_root, make_struct_type(NIL, STRUCT_PH_SLOTS, FALSE_VALUE));
register_gc_root(&immutable_ph_root, make_struct_type(NIL, IMMUTABLE_PH_SLOTS, FALSE_VALUE));
}
value_t read_value_from_file(FILE *f)
{
reader_state_t state;
value_t result;
register_gc_root(&state.weak_list, NIL);
register_gc_root(&state.ref_list, NIL);
state.file = f;
state.line = 1;
state.column = 0;
next_char(&state);
result = read_one_value(&state);
ungetc(state.ch, f);
result = patch_placeholders(&state, result);
unregister_gc_root(&state.weak_list);
unregister_gc_root(&state.ref_list);
return result;
}
value_t read_value_from_path(const char *path)
{
FILE *f = fopen(path, "r");
const char *last_slash;
value_t v;
int dirfd;
release_assert(f != NULL);
dirfd = open(".", O_RDONLY);
release_assert(dirfd >= 0);
last_slash = strrchr(path, '/');
if (last_slash)
{
size_t bytes = last_slash - path;
char *dirname = (char*)malloc(bytes+1);
memcpy(dirname, path, bytes);
dirname[bytes] = '\0';
chdir(dirname);
}
v = read_value_from_file(f);
fchdir(dirfd);
close(dirfd);
fclose(f);
return v;
}
static value_t read_one_value(reader_state_t *state)
{
skip_whitespace(state);
release_assert(state->ch != EOF);
switch (state->ch)
{
case '#':
return read_special(state);
case '(':
return read_list(state);
case '-':
case '+':
case '0' ... '9':
return read_number(state);
case '\"':
return read_string(state);
default:
fprintf(stderr, "Unexpected character '%c' in input on line %d, column %d.\n",
state->ch, state->line, state->column);
release_assert(NOTREACHED("Unexpected character in input."));
return UNDEFINED;
}
}
bool issymbol(int ch)
{
switch (ch)
{
case 'A' ... 'Z':
case 'a' ... 'z':
case '0' ... '9':
case '!': case '$': case '%':
case '&': case '*': case '+':
case '-': case '/': case '<':
case '=': case '>': case '?':
case '@': case '\\': case '^':
case '_': case '|': case '~':
return true;
default:
return false;
}
}
static value_t read_special(reader_state_t *state)
{
next_char(state);
release_assert(state->ch != EOF);
switch (state->ch)
{
case '!':
release_assert((state->line == 1) && (state->column == 2));
do {
next_char(state);
} while (state->ch != '\n');
return read_one_value(state);
case 'F':
case 'f':
next_char(state);
release_assert(!issymbol(state->ch));
return FALSE_VALUE;
case 'T':
case 't':
next_char(state);
release_assert(!issymbol(state->ch));
return TRUE_VALUE;
case '&':
return read_box(state);
case '(':
return read_vector(state);
case 'S':
case 's':
next_char(state);
release_assert(state->ch == '(');
return read_struct(state);
case 'W':
case 'w':
next_char(state);
release_assert(state->ch == '&');
return read_weak_box(state);
case '0' ... '9':
return read_definition(state);
case '=':
return read_reference(state);
case 'I':
case 'i':
next_char(state);
return read_indirect(read_string(state));
case '@':
next_char(state);
return read_immutable(state);
default:
release_assert(NOTREACHED("Invalid character in special value."));
return UNDEFINED;
}
}
static void reverse_list(value_t *list, value_t newcdr)
{
value_t lst = *list;
while (is_pair(lst))
{
value_t temp = _get_pair(lst)->cdr;
_get_pair(lst)->cdr = newcdr;
WRITE_BARRIER(lst);
newcdr = lst;
lst = temp;
}
*list = newcdr;
}
static value_t read_list(reader_state_t *state)
{
gc_root_t list_root;
bool done = false;
register_gc_root(&list_root, NIL);
next_char(state);
while (!done)
{
skip_whitespace(state);
release_assert(state->ch != EOF);
switch (state->ch)
{
case '.':
{
release_assert(!is_nil(list_root.value));
next_char(state);
value_t temp = read_one_value(state);
reverse_list(&list_root.value, temp);
skip_whitespace(state);
release_assert(state->ch == ')');
next_char(state);
done = true;
}
break;
case ')':
{
reverse_list(&list_root.value, NIL);
next_char(state);
done = true;
}
break;
default:
{
value_t temp = read_one_value(state);
list_root.value = cons(temp, list_root.value);
}
break;
}
}
unregister_gc_root(&list_root);
return list_root.value;
}
static short int char_to_digit(int ch)
{
if (isdigit(ch))
{
return ch - '0';
}
else
{
assert(isalpha(ch));
return 10 + (toupper(ch) - 'A');
}
}
static value_t read_fixnum(reader_state_t *state, int radix)
{
fixnum_t num = 0;
bool negative = false;
assert((0 <= radix) && (radix <= 36));
if (state->ch == '-')
{
negative = true;
next_char(state);
}
else if (state->ch == '+')
{
next_char(state);
}
release_assert(isdigit(state->ch));
if (radix == 0)
{
if (state->ch == '0')
{
next_char(state);
switch (state->ch)
{
case 'X':
case 'x':
next_char(state);
radix = 16;
break;
case 'B':
case 'b':
next_char(state);
radix = 2;
break;
default:
radix = 8;
break;
}
if (radix != 8)
{
/* Make sure we have at least one digit; if octal then the '0' counts instead */
release_assert(isalnum(state->ch));
release_assert(char_to_digit(state->ch) < radix);
}
}
else
{
radix = 10;
}
}
while (isalnum(state->ch))
{
fixnum_t digit = char_to_digit(state->ch);
if (digit >= radix)
break;
release_assert(num <= (FIXNUM_MAX/radix));
num *= radix;
num += digit;
next_char(state);
}
if (negative)
num = -num;
release_assert((FIXNUM_MIN <= num) && (num <= FIXNUM_MAX));
return fixnum_value(num);
}
static value_t read_number(reader_state_t *state)
{
bool negative = false;
fixnum_t num = 0;
native_float_t flt;
int radix;
if (state->ch == '-')
{
negative = true;
next_char(state);
}
else if (state->ch == '+')
{
next_char(state);
}
release_assert(isdigit(state->ch));
if (state->ch == '0')
{
next_char(state);
switch (state->ch)
{
case 'X':
case 'x':
radix = 16;
next_char(state);
break;
case 'B':
case 'b':
radix = 2;
next_char(state);
break;
case '0' ... '9':
radix = 8;
break;
default:
radix = 10;
break;
}
if (radix != 10)
{
release_assert(isalnum(state->ch));
release_assert(char_to_digit(state->ch) < radix);
}
}
else
{
radix = 10;
}
while (isalnum(state->ch))
{
fixnum_t digit = char_to_digit(state->ch);
if (digit >= radix)
break;
release_assert(num <= (FIXNUM_MAX/radix));
num *= radix;
num += digit;
next_char(state);
}
if ((radix != 10) || ((state->ch != '.') && (state->ch != 'E') && (state->ch != 'e')))
{
if (negative)
num = -num;
release_assert(!issymbol(state->ch));
release_assert((FIXNUM_MIN <= num) && (num <= FIXNUM_MAX));
return fixnum_value(num);
}
/*
* Floating-point. No guarantees as to precision... really should use binary/hex.
*/
flt = num;
if (state->ch == '.')
{
next_char(state);
for (native_float_t pv = negative ? -0.1 : 0.1; isdigit(state->ch); pv /= 10)
{
flt += (state->ch - '0') * pv;
next_char(state);
}
}
if ((state->ch == 'E') || (state->ch == 'e'))
{
next_char(state);
num = read_fixnum(state, 10);
flt *= pow(10, _get_fixnum(num));
}
if (negative)
flt = -flt;
release_assert(!issymbol(state->ch));
return make_float(flt);
}
static value_t read_string(reader_state_t *state)
{
char *buffer = (char*)malloc(128);
size_t buffer_size = 128;
size_t length = 0;
value_t value;
release_assert(buffer != NULL);
release_assert(state->ch == '"');
next_char(state);
while (state->ch != '"')
{
bool skip_ws = false;
char ch;
release_assert(state->ch != EOF);
if ((buffer_size - length) < 1)
{
release_assert(buffer_size <= INT32_MAX / 3);
buffer_size = (3 * buffer_size) / 2;
buffer = realloc(buffer, buffer_size);
release_assert(buffer != NULL);
}
ch = state->ch;
next_char(state);
if (ch == '\\')
{
switch (state->ch)
{
case 'o':
next_char(state);
release_assert(('0' <= state->ch) && (state->ch <= '7'));
/* fall through */
case '0' ... '7':
ch = 0;
/* One to three octal digits */
for (int i = 0; i < 3; ++i)
{
ch = 8 * ch + (state->ch - '0');
next_char(state);
if ((state->ch < '0') || (state->ch > '7'))
break;
}
break;
case 'X':
case 'x':
ch = 0;
next_char(state);
release_assert(isxdigit(state->ch));
/* One or two hex digits */
for (int i = 0; i < 2; ++i)
{
int n = isdigit(state->ch)
? (state->ch - '0')
: (10 + toupper(state->ch) - 'A');
ch = 16 * ch + n;
next_char(state);
if (!isxdigit(state->ch))
break;
}
break;
case ' ':
case '\t':
case '\v':
case '\n':
skip_ws = true;
break;
case ';':
/* Treats everything that follows on the same line as a comment,
* and additionally skips leading whitespace on the next line. */
while (state->ch != '\n')
{
release_assert(state->ch != EOF);
next_char(state);
}
skip_ws = true;
break;
case '\\': ch = '\\'; next_char(state); break;
case '\'': ch = '\''; next_char(state); break;
case '\"': ch = '\"'; next_char(state); break;
case 'a': ch = '\a'; next_char(state); break;
case 'b': ch = '\b'; next_char(state); break;
case 'f': ch = '\f'; next_char(state); break;
case 'n': ch = '\n'; next_char(state); break;
case 'r': ch = '\r'; next_char(state); break;
case 't': ch = '\t'; next_char(state); break;
case 'v': ch = '\v'; next_char(state); break;
default:
release_assert(NOTREACHED("Invalid escape sequence in string."));
ch = '#';
next_char(state);
break;
}
}
if (skip_ws)
{
bool hit_eol = false;
/* Slightly different from the normal skip_whitespace(); skips
* whitespace through the _second_ EOL following the backslash. */
do {
if (state->ch == '\n')
{
if (!hit_eol)
{
hit_eol = true;
}
else
{
next_char(state);
break;
}
}
next_char(state);
} while (isspace(state->ch));
}
else
{
buffer[length++] = ch;
}
}
next_char(state);
value = make_byte_string(length, '\0');
memcpy(_get_byte_string(value)->bytes, buffer, length);
free(buffer);
return value;
}
static value_t read_immutable(reader_state_t *state)
{
gc_root_t ph_root;
value_t val;
register_gc_root(&ph_root, make_struct(immutable_ph_root.value));
val = read_one_value(state);
IMMUTABLE_PH_VALUE(ph_root.value) = val;
WRITE_BARRIER(ph_root.value);
unregister_gc_root(&ph_root);
return ph_root.value;
}
static value_t read_box(reader_state_t *state)
{
next_char(state);
return make_box(read_one_value(state));
}
static value_t read_vector(reader_state_t *state)
{
gc_root_t list_root;
size_t length = 0;
value_t value;
value_t item;
register_gc_root(&list_root, read_list(state));
for (value_t item = list_root.value; !is_nil(item); item = CDR(item))
++length;
value = make_vector(length, UNDEFINED);
item = list_root.value;
for (size_t i = 0; i < length; ++i)
{
_get_vector(value)->elements[i] = _CAR(item);
/* No write barrier needed here. */
item = _CDR(item);
}
unregister_gc_root(&list_root);
return value;
}
static value_t read_struct(reader_state_t *state)
{
gc_root_t ph_root;
value_t values;
register_gc_root(&ph_root, make_struct(struct_ph_root.value));
values = read_list(state);
STRUCT_PH_TYPE(ph_root.value) = CAR(values);
STRUCT_PH_VALUES(ph_root.value) = CDR(values);
STRUCT_PH_RESULT(ph_root.value) = FALSE_VALUE;
WRITE_BARRIER(ph_root.value);
unregister_gc_root(&ph_root);
return ph_root.value;
}
static value_t read_weak_box(reader_state_t *state)
{
value_t value;
next_char(state);
value = read_one_value(state);
state->weak_list.value = cons(value, state->weak_list.value);
return make_weak_box(value);
}
static value_t read_definition(reader_state_t *state)
{
fixnum_t ref = get_fixnum(read_fixnum(state, 0));
gc_root_t place_root;
value_t v;
release_assert(state->ch == '=');
next_char(state);
register_gc_root(&place_root, get_reference(state, ref));
v = read_one_value(state);
set_reference(state, place_root.value, v);
unregister_gc_root(&place_root);
return v;
}
static value_t read_reference(reader_state_t *state)
{
next_char(state);
skip_whitespace(state);
release_assert(state->ch != EOF);
if (state->ch == '"')
{
char *name = value_to_string(read_string(state));
value_t bi = lookup_builtin(name);
if (bi == FALSE_VALUE)
{
fprintf(stderr, "Unable to locate \"%s\" builtin.\n", name);
}
free(name);
release_assert(bi != FALSE_VALUE);
return bi;
}
else
{
return get_reference(state, get_fixnum(read_fixnum(state, 0)));
}
}
static value_t read_indirect(value_t path)
{
char *name = value_to_string(path);
value_t v = read_value_from_path(name);
free(name);
return v;
}
static value_t freeze(value_t val)
{
if (is_vector(val))
{
_get_vector(val)->immutable = true;
}
else if (is_byte_string(val))
{
_get_byte_string(val)->immutable = true;
}
else if (is_struct(val))
{
_get_struct(val)->immutable = true;
}
else
{
/* Error if value cannot be made immutable */
release_assert(!is_object(val) || is_float(val) || is_builtin_fn(val));
}
return val;
}
static bool is_reference(reader_state_t *state, value_t value)
{
return struct_is_a(value, reference_root.value);
}
static value_t get_reference(reader_state_t *state, fixnum_t refid)
{
value_t refidval = fixnum_value(refid);
for (value_t item = state->ref_list.value; !is_nil(item); item = _CDR(item))
{
if (REF_IDENT(_CAR(item)) == refidval)
return _CAR(item);
}
/* No existing reference with that number; create a new one. */
{
value_t ref = make_struct(reference_root.value);
REF_IDENT(ref) = refidval;
REF_VALUE(ref) = UNDEFINED;
REF_PATCHED(ref) = FALSE_VALUE;
state->ref_list.value = cons(ref, state->ref_list.value);
}
return _CAR(state->ref_list.value);
}
static void set_reference(reader_state_t *state, value_t ref, value_t value)
{
assert(is_reference(state, ref));
release_assert(is_undefined(REF_VALUE(ref)));
REF_VALUE(ref) = value;
WRITE_BARRIER(ref);
}
static void finalize_references(reader_state_t *state)
{
bool changed = true;
/* We're done when no placeholders link to other placeholders. */
while (changed)
{
changed = false;
/* Resolve one level of placeholder-to-placeholder links. */
for (value_t item = state->ref_list.value; !is_nil(item); item = _CDR(item))
{
value_t ref = _CAR(item);
if (REF_VALUE(ref) == ref)
{
/* Self-links indicate cycles. */
REF_VALUE(ref) = UNDEFINED;
changed = true;
}
else if (is_reference(state, REF_VALUE(ref)))
{
REF_VALUE(ref) = REF_VALUE(REF_VALUE(ref));
WRITE_BARRIER(ref);
changed = true;
}
}
}
}
static value_t _patch_placeholders(reader_state_t *state, value_t in, void *seen)
{
gc_root_t in_root;
struct seen_value
{
gc_root_t *root;
struct seen_value *prev;
} this_seen = { &in_root, (struct seen_value*)seen };
if (struct_is_a(in, reference_root.value))
{
if (!_get_boolean(REF_PATCHED(in)))
{
value_t val;
REF_PATCHED(in) = TRUE_VALUE;
register_gc_root(&in_root, in);
val = _patch_placeholders(state, REF_VALUE(in_root.value), &this_seen);
in = in_root.value;
unregister_gc_root(&in_root);
REF_VALUE(in) = val;
WRITE_BARRIER(in);
}
return REF_VALUE(in);
}
for (struct seen_value *item = this_seen.prev; item; item = item->prev)
{
if (in == item->root->value)
return in;
}
register_gc_root(&in_root, in);
if (struct_is_a(in_root.value, immutable_ph_root.value))
{
value_t val = _patch_placeholders(state, IMMUTABLE_PH_VALUE(in_root.value), &this_seen);
in_root.value = freeze(val);
}
else if (struct_is_a(in_root.value, struct_ph_root.value))
{
if (_get_boolean(STRUCT_PH_RESULT(in_root.value)))
{
in_root.value = STRUCT_PH_RESULT(in_root.value);
}
else
{
value_t sval;
value_t values;
STRUCT_PH_RESULT(in_root.value) = UNDEFINED;
sval = make_struct(_patch_placeholders(state, STRUCT_PH_TYPE(in_root.value), &this_seen));
STRUCT_PH_RESULT(in_root.value) = sval;
values = STRUCT_PH_VALUES(in_root.value);
in_root.value = sval;
for (int i = 0; i < _get_struct(in_root.value)->nslots; ++i)
{
if (is_nil(values)) break;
_get_struct(in_root.value)->slots[i] = CAR(values);
values = _CDR(values);
}
WRITE_BARRIER(in_root.value);
for (int i = 0; i < _get_struct(in_root.value)->nslots; ++i)
{
_get_struct(in_root.value)->slots[i] =
_patch_placeholders(state, _get_struct(in_root.value)->slots[i], &this_seen);
WRITE_BARRIER(in_root.value);
}
}
}
else if (is_box(in_root.value))
{
value_t val = _patch_placeholders(state, _get_box(in_root.value)->value, &this_seen);
_get_box(in_root.value)->value = val;
}
else if (is_weak_box(in_root.value))
{
value_t val = _patch_placeholders(state, _get_weak_box(in_root.value)->value, &this_seen);
_get_weak_box(in_root.value)->value = val;
}
else if (is_pair(in_root.value))
{
value_t val;
val = _patch_placeholders(state, _CAR(in_root.value), &this_seen);
_CAR(in_root.value) = val;
val = _patch_placeholders(state, _CDR(in_root.value), &this_seen);
_CDR(in_root.value) = val;
}
else if (is_vector(in_root.value))
{
size_t nelem = _get_vector(in_root.value)->size;
for (size_t i = 0; i < nelem; ++i)
{
value_t val = _patch_placeholders(state, _get_vector(in_root.value)->elements[i], &this_seen);
_get_vector(in_root.value)->elements[i] = val;
}
}
unregister_gc_root(&in_root);
return in_root.value;
}
static value_t patch_placeholders(reader_state_t *state, value_t in)
{
gc_root_t root;
register_gc_root(&root, in);
finalize_references(state);
root.value = _patch_placeholders(state, root.value, NULL);
unregister_gc_root(&root);
return root.value;
}
static void skip_whitespace(reader_state_t *state)
{
for (;;)
{
if (isspace(state->ch))
{
next_char(state);
}
else if (state->ch == ';')
{
/* Comments count as whitespace */
do {
next_char(state);
if (state->ch == '\n')
{
next_char(state);
break;
}
} while (state->ch != EOF);
}
else
{
break;
}
}
}
/* vim:set sw=2 expandtab: */