rosella/gc.c

993 lines
22 KiB
C

#define _POSIX_C_SOURCE 199309L
#include <assert.h>
#include <ctype.h>
#include <inttypes.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include "gc.h"
#if _CLOCK_MONOTONIC
# define TIMING_CLOCK CLOCK_MONOTONIC
#else
# define TIMING_CLOCK CLOCK_REALTIME
#endif
gc_stats_t gc_stats;
/* Helper macros to reduce duplication */
#define VECTOR_BYTES(nelem) (sizeof(vector_t) + (sizeof(value_t) * (nelem)))
#define BYTESTR_BYTES(size) (sizeof(byte_string_t) + (size))
#define STRUCT_BYTES(nslots) (sizeof(struct_t) + (sizeof(value_t) * (nslots)))
/* Alignment must ensure each object has enough room to hold a forwarding object */
#define GC_ALIGNMENT ((size_t)(sizeof(object_t)))
/****************************************************************************/
static char *gc_ranges[2];
static size_t gc_min_size;
static size_t gc_max_size;
static size_t gc_soft_limit;
static bool gc_enabled;
static int gc_current_range;
static char *gc_free_ptr;
static char *gc_range_end;
static value_t gc_weak_box_list;
static value_t gc_will_list;
static value_t gc_will_active_list;
static gc_root_t gc_root_list = {
.value = UNDEFINED,
.prev = &gc_root_list,
.next = &gc_root_list
};
void register_gc_root(gc_root_t *root, value_t v)
{
root->value = v;
root->next = &gc_root_list;
gc_root_list.prev->next = root;
root->prev = gc_root_list.prev;
gc_root_list.prev = root;
}
void unregister_gc_root(gc_root_t *root)
{
assert(root && root->prev && root->next); /* Uninitialized */
assert((root->prev != root) && (root->next != root)); /* Already removed */
/* Cut the given root out of the list */
root->prev->next = root->next;
root->next->prev = root->prev;
/* Remove dead references to root list; protects against double-removal */
root->prev = root->next = root;
}
/****************************************************************************/
bool get_boolean(value_t v)
{
release_assert(is_boolean(v));
return (v != FALSE_VALUE);
}
fixnum_t get_fixnum(value_t v)
{
release_assert(is_fixnum(v));
return _get_fixnum(v);
}
object_t *get_object(value_t v)
{
release_assert(is_object(v));
return _get_object(v);
}
/* No one outside this module should care... */
static inline bool is_broken_heart(value_t v)
{
return is_object(v) && (_get_object(v)->tag == BROKEN_HEART);
}
value_t cons(value_t car, value_t cdr)
{
gc_root_t car_root, cdr_root;
pair_t *p;
register_gc_root(&car_root, car);
register_gc_root(&cdr_root, cdr);
p = (pair_t*)gc_alloc(sizeof(pair_t));
p->car = car_root.value;
p->cdr = cdr_root.value;
unregister_gc_root(&car_root);
unregister_gc_root(&cdr_root);
return pair_value(p);
}
pair_t *get_pair(value_t v)
{
release_assert(is_pair(v));
return _get_pair(v);
}
value_t make_box(value_t initial_value)
{
gc_root_t iv_root;
box_t *box;
register_gc_root(&iv_root, initial_value);
box = (box_t*)gc_alloc(sizeof(box_t));
box->tag = TYPE_TAG_BOX;
box->value = iv_root.value;
unregister_gc_root(&iv_root);
return object_value(box);
}
box_t *get_box(value_t v)
{
release_assert(is_box(v));
return _get_box(v);
}
value_t make_vector(size_t nelem, value_t initial_value)
{
gc_root_t iv_root;
vector_t *vec;
register_gc_root(&iv_root, initial_value);
vec = (vector_t*)gc_alloc(VECTOR_BYTES(nelem));
vec->tag = TYPE_TAG_VECTOR;
vec->size = nelem;
for (int i = 0; i < nelem; ++i)
vec->elements[i] = iv_root.value;
unregister_gc_root(&iv_root);
return object_value(vec);
}
vector_t *get_vector(value_t v)
{
release_assert(is_vector(v));
return _get_vector(v);
}
value_t make_byte_string(size_t size, int default_value)
{
const size_t nbytes = BYTESTR_BYTES(size);
byte_string_t *str;
str = (byte_string_t*)gc_alloc(nbytes);
str->tag = TYPE_TAG_BYTESTR;
str->size = size;
memset(str->bytes, default_value, size);
return object_value(str);
}
byte_string_t *get_byte_string(value_t v)
{
release_assert(is_byte_string(v));
return _get_byte_string(v);
}
value_t string_to_value(const char *s)
{
size_t len = strlen(s);
value_t v = make_byte_string(len, '\0');
memcpy(_get_byte_string(v)->bytes, s, len);
return v;
}
char *value_to_string(value_t v)
{
byte_string_t *str = get_byte_string(v);
char *s = (char*)malloc(str->size + 1);
memcpy(s, str->bytes, str->size);
s[str->size] = '\0';
return s;
}
int byte_strcmp(value_t s1, value_t s2)
{
byte_string_t *str1 = get_byte_string(s1);
byte_string_t *str2 = get_byte_string(s2);
if (str1->size < str2->size)
return -1;
else if (str1->size > str2->size)
return 1;
else
return memcmp(str1->bytes, str2->bytes, str1->size);
}
value_t make_struct(value_t type, size_t nslots)
{
gc_root_t type_root;
struct_t *s;
register_gc_root(&type_root, type);
s = (struct_t*)gc_alloc(STRUCT_BYTES(nslots));
s->tag = TYPE_TAG_STRUCT;
s->type = type_root.value;
s->nslots = nslots;
for (int i = 0; i < nslots; ++i)
s->slots[i] = UNDEFINED;
unregister_gc_root(&type_root);
return object_value(s);
}
struct_t *get_struct(value_t v)
{
release_assert(is_struct(v));
return _get_struct(v);
}
value_t make_weak_box(value_t initial_value)
{
gc_root_t iv_root;
weak_box_t *box;
register_gc_root(&iv_root, initial_value);
box = (weak_box_t*)gc_alloc(sizeof(weak_box_t));
box->tag = TYPE_TAG_WEAK_BOX;
box->value = iv_root.value;
box->next = gc_weak_box_list;
gc_weak_box_list = object_value(box);
unregister_gc_root(&iv_root);
return object_value(box);
}
weak_box_t *get_weak_box(value_t v)
{
release_assert(is_weak_box(v));
return _get_weak_box(v);
}
void register_finalizer(value_t value, value_t finalizer)
{
/* Non-objects are never GC'd, so their finalizers will never be invoked. */
if (is_object(value))
{
gc_root_t value_root, finalizer_root;
will_t *w;
register_gc_root(&value_root, value);
register_gc_root(&finalizer_root, finalizer);
w = (will_t*)gc_alloc(sizeof(will_t));
w->tag = TYPE_TAG_WILL;
w->value = value_root.value;
w->finalizer = finalizer_root.value;
w->next = gc_will_list;
gc_will_list = object_value(w);
unregister_gc_root(&value_root);
unregister_gc_root(&finalizer_root);
}
}
/* The 'will' accessors are private to the GC, unlike other similar routines. */
static inline will_t *_get_will(value_t v)
{
return (will_t*)_get_object(v);
}
static will_t *get_will(value_t v)
{
release_assert(is_will(v));
return _get_will(v);
}
value_t make_float(native_float_t value)
{
float_object_t *obj;
obj = (float_object_t*)gc_alloc(sizeof(float_object_t));
obj->tag = TYPE_TAG_FLOAT;
obj->value = value;
return object_value(obj);
}
native_float_t get_float(value_t v)
{
release_assert(is_float(v));
return _get_float(v);
}
value_t make_builtin_fn(builtin_fn_t *fn)
{
builtin_fn_object_t *obj;
obj = (builtin_fn_object_t*)gc_alloc(sizeof(builtin_fn_object_t));
obj->tag = TYPE_TAG_BUILTIN;
obj->fn = fn;
return object_value(obj);
}
builtin_fn_t *get_builtin_fn(value_t v)
{
release_assert(is_builtin_fn(v));
return _get_builtin_fn(v);
}
/****************************************************************************/
static inline size_t gc_align(size_t nbytes) __attribute__ ((const));
static int gc_range_of(void *object) __attribute__ ((const,unused));
static void transfer_object(value_t *value);
static size_t transfer_children(object_t *object);
static void _collect_garbage(size_t min_free);
static inline size_t gc_align(size_t nbytes)
{
return ((nbytes + GC_ALIGNMENT - 1) & ~(GC_ALIGNMENT - 1));
}
static int gc_range_of(void *object)
{
if (((value_t)object >= (value_t)gc_ranges[0]) &&
((value_t)object < (value_t)gc_ranges[1]))
return 0;
if (((value_t)object >= (value_t)gc_ranges[1]) &&
((value_t)object < (value_t)gc_ranges[2]))
return 1;
return -1;
}
static inline size_t gc_free_space(void)
{
return gc_range_end - gc_free_ptr;
}
void gc_init(size_t min_size, size_t max_size)
{
assert(min_size <= max_size);
gc_ranges[0] = (char*)malloc(max_size);
gc_ranges[1] = (char*)malloc(max_size);
assert(gc_ranges[0] && gc_ranges[1]);
gc_current_range = 0;
gc_free_ptr = gc_ranges[gc_current_range];
gc_min_size = min_size;
gc_max_size = max_size;
gc_soft_limit = gc_min_size;
gc_range_end = gc_free_ptr + gc_soft_limit;
gc_stats.collections = 0;
gc_stats.total_ns = 0;
gc_stats.total_freed = 0;
gc_stats.high_water = 0;
gc_stats.max_ns = 0;
gc_weak_box_list = NIL;
gc_will_list = NIL;
gc_will_active_list = NIL;
gc_enabled = true;
}
/* Preconditions: nbytes pre-aligned a la gc_align(), and space exists. */
static inline void *_gc_alloc(size_t nbytes)
{
void *p = gc_free_ptr;
assert(nbytes == gc_align(nbytes));
assert(nbytes <= gc_free_space());
gc_free_ptr += nbytes;
return p;
}
void *gc_alloc(size_t nbytes)
{
nbytes = gc_align(nbytes);
if (nbytes > gc_free_space())
_collect_garbage(nbytes);
return _gc_alloc(nbytes);
}
/* Precondition: *value refers to an object (or pair). */
static void transfer_object(value_t *value)
{
if (is_object(*value))
{
object_t *obj = _get_object(*value);
size_t nbytes;
void *newobj;
assert(gc_range_of(obj) != gc_current_range);
if (obj->tag == BROKEN_HEART)
{
/* Object has already been moved; just update the reference */
*value = obj->forward;
return;
}
switch (obj->tag)
{
case TYPE_TAG_BOX:
nbytes = sizeof(box_t);
break;
case TYPE_TAG_VECTOR:
nbytes = VECTOR_BYTES(((const vector_t*)obj)->size);
break;
case TYPE_TAG_BYTESTR:
nbytes = BYTESTR_BYTES(((const byte_string_t*)obj)->size);
break;
case TYPE_TAG_STRUCT:
nbytes = STRUCT_BYTES(((const struct_t*)obj)->nslots);
break;
case TYPE_TAG_WEAK_BOX:
nbytes = sizeof(weak_box_t);
break;
case TYPE_TAG_WILL:
nbytes = sizeof(will_t);
break;
case TYPE_TAG_FLOAT:
nbytes = sizeof(float_object_t);
break;
case TYPE_TAG_BUILTIN:
nbytes = sizeof(builtin_fn_object_t);
break;
default: /* pair */
nbytes = sizeof(pair_t);
break;
}
newobj = _gc_alloc(gc_align(nbytes));
memcpy(newobj, obj, nbytes);
/* Keep the original tag bits (pair or object) */
obj->tag = BROKEN_HEART;
*value = obj->forward = (object_value(newobj) & ~2) | (*value & 2);
}
}
static size_t transfer_vector(vector_t *vec)
{
assert(vec->tag == TYPE_TAG_VECTOR);
for (size_t i = 0; i < vec->size; ++i)
transfer_object(&vec->elements[i]);
return VECTOR_BYTES(vec->size);
}
static size_t transfer_struct(struct_t *s)
{
assert(s->tag == TYPE_TAG_STRUCT);
transfer_object(&s->type);
for (size_t i = 0; i < s->nslots; ++i)
transfer_object(&s->slots[i]);
return STRUCT_BYTES(s->nslots);
}
static size_t transfer_box(box_t *b)
{
transfer_object(&b->value);
return sizeof(box_t);
}
static size_t transfer_pair(pair_t *p)
{
transfer_object(&p->car);
transfer_object(&p->cdr);
return sizeof(pair_t);
}
static size_t transfer_will(will_t *w)
{
assert(w->tag == TYPE_TAG_WILL);
transfer_object(&w->finalizer);
/* Weak boxes are discarded when there are no other references,
* but wills need to remain until their finalizers are invoked. */
transfer_object(&w->next);
return sizeof(will_t);
}
static size_t transfer_children(object_t *obj)
{
switch (obj->tag)
{
case TYPE_TAG_BOX:
return transfer_box((box_t*)obj);
case TYPE_TAG_VECTOR:
return transfer_vector((vector_t*)obj);
case TYPE_TAG_BYTESTR:
return BYTESTR_BYTES(((const byte_string_t*)obj)->size);
case TYPE_TAG_STRUCT:
return transfer_struct((struct_t*)obj);
case TYPE_TAG_WEAK_BOX:
return sizeof(weak_box_t);
case TYPE_TAG_WILL:
return transfer_will((will_t*)obj);
case TYPE_TAG_FLOAT:
return sizeof(float_object_t);
case TYPE_TAG_BUILTIN:
return sizeof(builtin_fn_object_t);
default: /* pair */
return transfer_pair((pair_t*)obj);
}
}
static void swap_gc_ranges(void)
{
gc_current_range = 1 - gc_current_range;
gc_free_ptr = gc_ranges[gc_current_range];
gc_range_end = gc_free_ptr + gc_soft_limit;
}
static void transfer_roots(void)
{
/* Transfer registered GC roots */
for (gc_root_t *root = gc_root_list.next; root != &gc_root_list; root = root->next)
transfer_object(&root->value);
/* Ensure pending will list is transferred */
transfer_object(&gc_will_list);
/* The values associated with active wills are also roots */
for (value_t *will = &gc_will_active_list; !is_nil(*will); will = &_get_will(*will)->next)
transfer_object(&get_will(*will)->value);
/* Ensure active list itself is transferred */
transfer_object(&gc_will_active_list);
}
static void process_weak_boxes(void)
{
value_t wb = gc_weak_box_list;
while (!is_nil(wb))
{
weak_box_t *box;
if (is_broken_heart(wb))
{
/* Box has been moved; get a pointer to the new location, but don't update list yet. */
value_t fw = _get_object(wb)->forward;
assert(is_weak_box(fw));
box = _get_weak_box(fw);
}
else
{
/* Box hasn't been moved yet, but may live on as the value of a will. */
assert(is_weak_box(wb));
box = _get_weak_box(wb);
}
if (is_broken_heart(box->value))
{
/* The value in the box is reachable; update w/ new location. */
box->value = _get_object(box->value)->forward;
}
else if (is_object(box->value))
{
/* The value in the box is an unreachable object; change to #f. */
/* Note that an object is considered unreachable via weak box when it could be finalized,
* even though it will be kept alive until the finalizer(s) is/are removed from the 'active'
* list and the finalizer(s) itself/themselves may restore the object to a reachable state. */
/* This last behavior is not recommended. */
box->value = FALSE_VALUE;
}
/* Move on to this box's 'next' pointer */
wb = box->next;
}
}
/* Precondition: The wills themselves, and their finalizers,
* have already been transferred (recursively). */
static void process_wills(void)
{
/*
* Was value transferred (broken heart), and thus reachable?
* Yes ==> update value.
* No ==> transfer value and move will to active list.
*/
value_t *will = &gc_will_list;
while (!is_nil(*will))
{
will_t *w = get_will(*will);
if (is_broken_heart(w->value))
{
/* The value associated with the will is still reachable; update w/ new location. */
w->value = _get_object(w->value)->forward;
/* Move on to this will's 'next' pointer */
will = &w->next;
}
else
{
assert(is_object(w->value));
/*
* The will is associated with an unreachable object; activate it.
*/
/* First, ensure that the value remains reachable for the finalizer. */
transfer_object(&w->value);
/* Remove the will from the 'pending' list. */
*will = w->next;
/* Insert the will into the 'active' list. */
w->next = gc_will_active_list;
gc_will_active_list = object_value(w);
}
}
}
static void update_weak_box_list(void)
{
value_t *wb = &gc_weak_box_list;
while (!is_nil(*wb))
{
if (is_broken_heart(*wb))
{
/* The box itself is reachable; need to update 'next' pointer to new location */
*wb = _get_object(*wb)->forward;
/* Move on to next box's 'next' pointer */
assert(is_weak_box(*wb));
wb = &_get_weak_box(*wb)->next;
}
else
{
/* Box is no longer reachable; remove it from the list by updating 'next' pointer. */
assert(is_weak_box(*wb));
*wb = _get_weak_box(*wb)->next;
}
}
}
#define GC_DEFLATE_SIZE (64*1024)
static void update_soft_limit(size_t min_free)
{
size_t bytes_used = gc_free_ptr - gc_ranges[gc_current_range];
size_t min_limit = bytes_used + min_free;
size_t new_limit = (4 * min_limit) / 3;
if (gc_soft_limit > GC_DEFLATE_SIZE)
{
size_t deflate_limit = gc_soft_limit - GC_DEFLATE_SIZE;
if (new_limit < deflate_limit)
new_limit = deflate_limit;
}
if (new_limit > gc_max_size)
new_limit = gc_max_size;
else if (new_limit < gc_min_size)
new_limit = gc_min_size;
gc_soft_limit = new_limit;
/* Update end of range to reflect new limit */
gc_range_end = gc_ranges[gc_current_range] + gc_soft_limit;
#ifndef NO_STATS
if (gc_soft_limit > gc_stats.high_water)
{
gc_stats.high_water = gc_soft_limit;
}
#endif
}
static void _collect_garbage(size_t min_free)
{
if (gc_enabled)
{
char *object_ptr;
#ifndef NO_STATS
#ifndef NO_TIMING_STATS
struct timespec start_time;
clock_gettime(TIMING_CLOCK, &start_time);
#endif
gc_stats.total_freed -= gc_free_space();
++gc_stats.collections;
#endif
//debug(("Collecting garbage...\n"));
swap_gc_ranges();
/* New "current" range is initially empty, old one is full */
object_ptr = gc_free_ptr;
/* Prime the pump */
transfer_roots();
/* Keep transferring until no more objects in the new range refer to the old one,
* other than pending wills and weak boxes. */
while (object_ptr < gc_free_ptr)
{
object_ptr += gc_align(transfer_children((object_t*)object_ptr));
}
/* These have to be examined after normal reachability has been determined */
process_weak_boxes();
process_wills();
/* Keep transferring until no more objects in the new range refer to the old one.
* This is so that values which are otherwise unreachable, but have finalizers which
* may be able to reach them, are not collected prematurely. process_wills() transfers
* the value of any will newly placed on the active list. Note that these values may
* be finalized in any order, and that any weak references have already been cleared. */
while (object_ptr < gc_free_ptr)
{
object_ptr += gc_align(transfer_children((object_t*)object_ptr));
}
update_weak_box_list();
#ifndef NDEBUG
/* Clear old range, to make it easier to detect bugs. */
memset(gc_ranges[1-gc_current_range], 0, gc_soft_limit);
#endif
//debug(("Finished collection with %d bytes to spare (out of %d bytes).\n", gc_free_space(), gc_soft_limit));
#ifndef NO_STATS
#ifndef NO_TIMING_STATS
{
struct timespec end_time;
nsec_t nsec;
clock_gettime(TIMING_CLOCK, &end_time);
nsec = (end_time.tv_sec - start_time.tv_sec) * 1000000000LL;
nsec += (end_time.tv_nsec - start_time.tv_nsec);
gc_stats.total_ns += nsec;
if (nsec > gc_stats.max_ns)
gc_stats.max_ns = nsec;
}
}
#endif
gc_stats.total_freed += gc_free_space();
#endif
update_soft_limit(min_free);
if (gc_free_space() < min_free)
{
out_of_memory();
}
}
void collect_garbage(size_t min_free)
{
bool was_enabled = set_gc_enabled(true);
_collect_garbage(min_free);
set_gc_enabled(was_enabled);
}
bool set_gc_enabled(bool enable)
{
bool was_enabled = gc_enabled;
gc_enabled = enable;
return was_enabled;
}
bool are_finalizers_pending(void)
{
return !is_nil(gc_will_active_list);
}
/* Finalizer can be registered as #f, but value must be an object.
* Returning with value == #f means there are no more finalizers. */
void get_next_finalizer(value_t *value, value_t *finalizer)
{
assert(value && finalizer);
if (is_nil(gc_will_active_list))
{
*value = *finalizer = FALSE_VALUE;
}
else
{
will_t *w = get_will(gc_will_active_list);
*value = w->value;
*finalizer = w->finalizer;
/* Remove this finalizer from the list -- up to caller to keep values reachable. */
gc_will_active_list = w->next;
}
}
void _release_assert(bool expr, const char *str, const char *file, int line)
{
if (!expr)
{
fprintf(stderr, "ERROR: Invalid state detected in %s, line %d.\n"
"Assertion failed: %s\n",
file, line, str);
abort();
}
}
void fprint_value(FILE *f, value_t v)
{
if (v == NIL)
{
fputs("nil", f);
}
else if (v == FALSE_VALUE)
{
fputs("#f", f);
}
else if (v == TRUE_VALUE)
{
fputs("#t", f);
}
else if (v == UNDEFINED)
{
fputs("#<undefined>", f);
}
else if (is_fixnum(v))
{
fprintf(f, "%d", (int)get_fixnum(v));
}
else if (is_box(v))
{
fputs("#&", f);
fprint_value(f, _get_box(v)->value);
}
else if (is_pair(v))
{
fputc('(', f);
fprint_value(f, _get_pair(v)->car);
v = _get_pair(v)->cdr;
while (is_pair(v))
{
fputc(' ', f);
fprint_value(f, _get_pair(v)->car);
v = _get_pair(v)->cdr;
}
if (v != NIL)
{
fputs(" . ", f);
fprint_value(f, v);
}
fputc(')', f);
}
else if (is_vector(v))
{
fputs("#(", f);
for (size_t i = 0; i < _get_vector(v)->size; ++i)
{
if (i != 0) fputc(' ', f);
fprint_value(f, _get_vector(v)->elements[i]);
}
fputc(')', f);
}
else if (is_byte_string(v))
{
byte_string_t *str = _get_byte_string(v);
fputc('"', f);
for (size_t i = 0; i < str->size; ++i)
{
int ch = str->bytes[i];
if (isprint(ch) && (ch != '\\') && (ch != '\"'))
fputc(str->bytes[i], f);
else
fprintf(f, "\\x%.2X", (int)str->bytes[i]);
}
fputc('"', f);
}
else if (is_struct(v))
{
struct_t *meta = get_struct(_get_struct(v)->type);
byte_string_t *str = get_byte_string(meta->slots[0]);
fputs("#S(", f);
fwrite(str->bytes, str->size, 1, f);
for (size_t i = 0; i < _get_struct(v)->nslots; ++i)
{
fputc(' ', f);
fprint_value(f, _get_struct(v)->slots[i]);
}
fputc(')', f);
}
else if (is_weak_box(v))
{
fputs("#W&", f);
fprint_value(f, _get_weak_box(v)->value);
}
else if (is_float(v))
{
fprintf(f, "%f", (double)_get_float(v));
}
else if (is_builtin_fn(v))
{
fputs("#<builtin>", f);
}
else
{
fputs("#<unknown>", f);
}
}
void fprint_gc_stats(FILE *f)
{
const double total_time = gc_stats.total_ns / 1.0e9;
const double max_time = gc_stats.max_ns / 1.0e9;
fprintf(f, "%lld bytes freed in %0.6f sec => %0.3f MB/sec. (%d GCs.)\n",
gc_stats.total_freed,
total_time,
(gc_stats.total_freed / total_time) / (1024*1024),
gc_stats.collections);
fprintf(f, "Max GC time was %0.6f sec, avg. %0.6f sec; peak heap size was %d bytes.\n",
max_time, (total_time / gc_stats.collections), gc_stats.high_water);
}
/* vim:set sw=2 expandtab: */