From 7bd6e616ff30617b693436ceb16b19f659dbe71b Mon Sep 17 00:00:00 2001 From: Jesse McDonald Date: Sat, 7 Nov 2009 01:27:11 -0600 Subject: [PATCH] Add support for weak boxes and wills/finalizers. Also refactored some GC code into smaller functions. Weak boxes and wills will be useful for implementing weak hash tables, which in turn will be used to store dynamic parameters efficiently. --- gc.c | 417 ++++++++++++++++++++++++++++++++++++++++++++++------------- gc.h | 87 +++++++++++-- 2 files changed, 404 insertions(+), 100 deletions(-) diff --git a/gc.c b/gc.c index 45194d0..aea301d 100644 --- a/gc.c +++ b/gc.c @@ -30,6 +30,10 @@ static int gc_current_range; static char *gc_free_ptr; static char *gc_range_end; +static value_t gc_weak_box_list; +static value_t gc_will_list; +static value_t gc_will_active_list; + static gc_root_t gc_root_list = { .value = NIL, .prev = &gc_root_list, @@ -68,6 +72,12 @@ object_t *get_object(value_t v) abort(); } +/* No one outside this module should care... */ +static inline bool is_broken_heart(value_t v) +{ + return is_object(v) && (_get_object(v)->tag == BROKEN_HEART); +} + value_t cons(value_t car, value_t cdr) { gc_root_t car_root, cdr_root; @@ -76,7 +86,7 @@ value_t cons(value_t car, value_t cdr) register_gc_root(&car_root, car); register_gc_root(&cdr_root, cdr); - p = gc_alloc(sizeof(pair_t)); + p = (pair_t*)gc_alloc(sizeof(pair_t)); p->car = car_root.value; p->cdr = cdr_root.value; @@ -201,6 +211,70 @@ struct_t *get_struct(value_t v) abort(); } +value_t make_weak_box(value_t initial_value) +{ + gc_root_t iv_root; + weak_box_t *box; + + register_gc_root(&iv_root, initial_value); + + box = (weak_box_t*)gc_alloc(sizeof(weak_box_t)); + box->tag = TYPE_TAG_WEAK_BOX; + box->value = iv_root.value; + box->next = gc_weak_box_list; + gc_weak_box_list = object_value(box); + + unregister_gc_root(&iv_root); + + return object_value(box); +} + +weak_box_t *get_weak_box(value_t v) +{ + if (is_weak_box(v)) + return _get_weak_box(v); + else + abort(); +} + +void register_finalizer(value_t value, value_t finalizer) +{ + /* Non-objects are never GC'd, so their finalizers will never be invoked. */ + if (is_object(value)) + { + gc_root_t value_root, finalizer_root; + will_t *w; + + register_gc_root(&value_root, value); + register_gc_root(&finalizer_root, finalizer); + + w = (will_t*)gc_alloc(sizeof(will_t)); + w->tag = TYPE_TAG_WILL; + w->value = value_root.value; + w->finalizer = finalizer_root.value; + w->next = gc_will_list; + + gc_will_list = object_value(w); + + unregister_gc_root(&value_root); + unregister_gc_root(&finalizer_root); + } +} + +/* The 'will' accessors are private to the GC, unlike other similar routines. */ +static inline will_t *_get_will(value_t v) +{ + return (will_t*)_get_object(v); +} + +static will_t *get_will(value_t v) +{ + if (is_will(v)) + return _get_will(v); + else + abort(); +} + intptr_t get_fixnum(value_t v) { if (is_fixnum(v)) @@ -259,6 +333,10 @@ void gc_init(size_t min_size, size_t max_size) gc_stats.total_ticks = 0; gc_stats.high_water = 0; + gc_weak_box_list = NIL; + gc_will_list = NIL; + gc_will_active_list = NIL; + gc_enabled = true; } @@ -283,74 +361,87 @@ void *gc_alloc(size_t nbytes) /* Precondition: *value refers to an object (or pair). */ static void transfer_object(value_t *value) { - object_t *obj; - size_t nbytes; - void *newobj; - value_t new_value; - - assert(gc_range_of(obj) != gc_current_range); - assert(is_object(*value)); - - obj = _get_object(*value); - - if (obj->tag == BROKEN_HEART) + if (is_object(*value)) { - /* Object has already been moved; just update the reference */ - *value = obj->forward; - return; + object_t *obj; + size_t nbytes; + void *newobj; + value_t new_value; + + assert(gc_range_of(obj) != gc_current_range); + assert(is_object(*value)); + + obj = _get_object(*value); + + if (obj->tag == BROKEN_HEART) + { + /* Object has already been moved; just update the reference */ + *value = obj->forward; + return; + } + + switch (obj->tag) + { + case TYPE_TAG_VECTOR: + case TYPE_TAG_STRUCT: + nbytes = VECTOR_BYTES(((const vector_t*)obj)->size); + break; + case TYPE_TAG_BYTESTR: + nbytes = BYTESTR_BYTES(((const byte_string_t*)obj)->size); + break; + case TYPE_TAG_WEAK_BOX: + nbytes = sizeof(weak_box_t); + break; + case TYPE_TAG_WILL: + nbytes = sizeof(will_t); + break; + case TYPE_TAG_BOX: + default: /* pair */ + nbytes = sizeof(pair_t); + break; + } + + newobj = _gc_alloc(gc_align(nbytes)); + + memcpy(newobj, obj, nbytes); + + /* Keep the original tag bits (pair or object) */ + new_value = object_value(newobj) | (*value & 2); + + obj->tag = BROKEN_HEART; + obj->forward = new_value; + + *value = new_value; } - - switch (obj->tag) - { - case TYPE_TAG_VECTOR: - case TYPE_TAG_STRUCT: - nbytes = VECTOR_BYTES(((const vector_t*)obj)->size); - break; - case TYPE_TAG_BYTESTR: - nbytes = BYTESTR_BYTES(((const byte_string_t*)obj)->size); - break; - case TYPE_TAG_BOX: - default: /* pair */ - nbytes = sizeof(pair_t); - break; - } - - newobj = _gc_alloc(gc_align(nbytes)); - - memcpy(newobj, obj, nbytes); - - /* Keep the original tag bits (pair or object) */ - new_value = object_value(newobj) | (*value & 2); - - obj->tag = BROKEN_HEART; - obj->forward = new_value; - - *value = new_value; } /* Also works on structs, which share the same layout */ static size_t transfer_vector(vector_t *vec) { for (size_t i = 0; i < vec->size; ++i) - { - if (is_object(vec->elements[i])) - transfer_object(&vec->elements[i]); - } + transfer_object(&vec->elements[i]); return VECTOR_BYTES(vec->size); } static size_t transfer_pair(pair_t *p) { - if (is_object(p->car)) - transfer_object(&p->car); - - if (is_object(p->cdr)) - transfer_object(&p->cdr); - + transfer_object(&p->car); + transfer_object(&p->cdr); return sizeof(pair_t); } +static size_t transfer_will(will_t *w) +{ + transfer_object(&w->finalizer); + + /* Weak boxes are discarded when there are no other references, + * but wills need to remain until their finalizers are invoked. */ + transfer_object(&w->next); + + return sizeof(will_t); +} + static size_t transfer_children(object_t *obj) { switch (obj->tag) @@ -360,41 +451,180 @@ static size_t transfer_children(object_t *obj) return transfer_vector((vector_t*)obj); case TYPE_TAG_BYTESTR: return BYTESTR_BYTES(((const byte_string_t*)obj)->size); + case TYPE_TAG_WEAK_BOX: + return sizeof(weak_box_t); + case TYPE_TAG_WILL: + return transfer_will((will_t*)obj); case TYPE_TAG_BOX: default: /* pair */ return transfer_pair((pair_t*)obj); } } +static void swap_gc_ranges(void) +{ + gc_current_range = 1 - gc_current_range; + gc_free_ptr = gc_ranges[gc_current_range]; + gc_range_end = gc_free_ptr + gc_soft_limit; +} + +static void transfer_roots(void) +{ + /* Transfer registered GC roots */ + for (gc_root_t *root = gc_root_list.next; root != &gc_root_list; root = root->next) + transfer_object(&root->value); + + /* Ensure pending will list is transferred */ + transfer_object(&gc_will_list); + + /* The values associated with active wills are also roots */ + for (value_t *will = &gc_will_active_list; !is_nil(*will); will = &_get_will(*will)->next) + transfer_object(&get_will(*will)->value); + + /* Ensure active list itself is transferred */ + transfer_object(&gc_will_active_list); +} + +static void process_weak_boxes(void) +{ + value_t *wb = &gc_weak_box_list; + + while (!is_nil(*wb)) + { + if (is_broken_heart(*wb)) + { + /* The box itself is reachable; need to update 'next' pointer to new location */ + weak_box_t *box; + + *wb = _get_object(*wb)->forward; + assert(is_weak_box(*wb)); + box = _get_weak_box(*wb); + + if (is_broken_heart(box->value)) + { + /* The value in the box is also reachable; update w/ new location. */ + box->value = _get_object(box->value)->forward; + } + else if (is_object(box->value)) + { + /* The value in the box is an unreachable object; change to #f */ + box->value = FALSE_VALUE; + } + + /* Move on to this box's 'next' pointer */ + wb = &_get_weak_box(*wb)->next; + } + else + { + /* Box is no longer reachable; remove it from the list by updating 'next' pointer. */ + assert(is_weak_box(*wb)); + *wb = _get_weak_box(*wb)->next; + } + } +} + +/* Precondition: The wills themselves, and their finalizers, + * have already been transferred (recursively). */ +static void process_wills(void) +{ + /* + * Was value transferred (broken heart), and thus reachable? + * Yes ==> update value. + * No ==> transfer value and move will to active list. + */ + value_t *will = &gc_will_list; + + while (!is_nil(*will)) + { + will_t *w = get_will(*will); + + if (is_broken_heart(w->value)) + { + /* The value associated with the will is still reachable; update w/ new location. */ + w->value = _get_object(w->value)->forward; + + /* Move on to this will's 'next' pointer */ + will = &w->next; + } + else + { + assert(is_object(w->value)); + + /* + * The will is associated with an unreachable object; activate it. + */ + + /* First, ensure that the value remains reachable for the finalizer. */ + transfer_object(&w->value); + + /* Remove the will from the 'pending' list. */ + *will = w->next; + + /* Insert the will into the 'active' list. */ + w->next = gc_will_active_list; + gc_will_active_list = object_value(w); + } + } +} + +static void update_soft_limit(size_t min_free) +{ + size_t bytes_used = gc_free_ptr - gc_ranges[gc_current_range]; + size_t min_limit = bytes_used + min_free; + size_t new_limit = (5 * min_limit) / 3; + + if (new_limit > gc_max_size) + new_limit = gc_max_size; +#if 0 + else if (new_limit < gc_min_size) + new_limit = gc_min_size; + + gc_soft_limit = new_limit; +#else + if (new_limit > gc_soft_limit) + gc_soft_limit = new_limit; +#endif + + /* Update end of range to reflect new limit */ + gc_range_end = gc_ranges[gc_current_range] + gc_soft_limit; + + if (gc_soft_limit > gc_stats.high_water) + { + gc_stats.high_water = gc_soft_limit; + } +} + static void _collect_garbage(size_t min_free) { - gc_root_t *root; - char *object_ptr; - if (gc_enabled) { + char *object_ptr; + gc_stats.total_ticks -= clock(); ++gc_stats.collections; //debug(("Collecting garbage...\n")); /* Swap ranges; new "current" range is initially empty, old one is full */ - gc_current_range = 1 - gc_current_range; - gc_free_ptr = gc_ranges[gc_current_range]; - gc_range_end = gc_free_ptr + gc_soft_limit; - object_ptr = gc_free_ptr; + swap_gc_ranges(); + object_ptr = gc_free_ptr; - /* Transfer GC roots (if necessary) */ - root = gc_root_list.next; - - while (root != &gc_root_list) + transfer_roots(); + + /* Keep transferring until no more objects in the new range refer to the old one, + * other than pending wills and weak boxes. */ + while (object_ptr < gc_free_ptr) { - if (is_object(root->value)) - transfer_object(&root->value); - root = root->next; + object_ptr += gc_align(transfer_children((object_t*)object_ptr)); } - /* Keep transferring until no more objects in the new range refer to the old one */ + process_weak_boxes(); + process_wills(); + + /* Keep transferring until no more objects in the new range refer to the old one. + * This is so that values which are otherwise unreachable, but have finalizers which + * may be able to reach them, are not collected prematurely. Note that these values may + * be finalized in any order, and that any weak references have already been cleared. */ while (object_ptr < gc_free_ptr) { object_ptr += gc_align(transfer_children((object_t*)object_ptr)); @@ -405,36 +635,12 @@ static void _collect_garbage(size_t min_free) gc_stats.total_ticks += clock(); } - { - size_t bytes_used = gc_free_ptr - gc_ranges[gc_current_range]; - size_t min_limit = bytes_used + min_free; - size_t new_limit = (5 * min_limit) / 3; - - if (new_limit > gc_max_size) - new_limit = gc_max_size; -#if 0 - else if (new_limit < gc_min_size) - new_limit = gc_min_size; - - gc_soft_limit = new_limit; -#else - if (new_limit > gc_soft_limit) - gc_soft_limit = new_limit; -#endif - } - - /* Update end of range to reflect new limit */ - gc_range_end = gc_ranges[gc_current_range] + gc_soft_limit; + update_soft_limit(min_free); if (gc_free_space() < min_free) { out_of_memory(); } - - if (gc_soft_limit > gc_stats.high_water) - { - gc_stats.high_water = gc_soft_limit; - } } void collect_garbage(size_t min_free) @@ -452,4 +658,31 @@ bool set_gc_enabled(bool enable) return was_enabled; } +bool are_finalizers_pending(void) +{ + return !is_nil(gc_will_active_list); +} + +/* Finalizer can be registered as #f, but value must be an object. + * Returning with value == #f means there are no more finalizers. */ +void get_next_finalizer(value_t *value, value_t *finalizer) +{ + assert(value && finalizer); + + if (is_nil(gc_will_active_list)) + { + *value = *finalizer = FALSE_VALUE; + } + else + { + will_t *w = get_will(gc_will_active_list); + + *value = w->value; + *finalizer = w->finalizer; + + /* Remove this finalizer from the list -- up to caller to keep values reachable. */ + gc_will_active_list = w->next; + } +} + /* vim:set sw=2 expandtab: */ diff --git a/gc.h b/gc.h index 157993e..a010a29 100644 --- a/gc.h +++ b/gc.h @@ -19,18 +19,24 @@ typedef uintptr_t value_t; /* Pair: aaaaaaaa aaaaaaaa aaaaaaaa aaaaaa10 */ /* Fixnum: snnnnnnn nnnnnnnn nnnnnnnn nnnnnnn1 */ -#define NIL ((value_t)0) +#define NIL ((value_t)0) /* Special values (0 <= n < 1024) */ /* These correspond to objects within the first page of memory */ -#define SPECIAL_VALUE(n) ((value_t)(4*n+2)) -#define MAX_SPECIAL SPECIAL_VALUE(1023) +#define SPECIAL_VALUE(n) ((value_t)(4*n+2)) +#define MAX_SPECIAL SPECIAL_VALUE(1023) -#define BROKEN_HEART SPECIAL_VALUE(0) -#define TYPE_TAG_BOX SPECIAL_VALUE(1) -#define TYPE_TAG_VECTOR SPECIAL_VALUE(2) -#define TYPE_TAG_BYTESTR SPECIAL_VALUE(3) -#define TYPE_TAG_STRUCT SPECIAL_VALUE(4) +#define BROKEN_HEART SPECIAL_VALUE(0) +#define FALSE_VALUE SPECIAL_VALUE(1) +#define TRUE_VALUE SPECIAL_VALUE(2) +#define TYPE_TAG(n) SPECIAL_VALUE(2+(n)) + +#define TYPE_TAG_BOX TYPE_TAG(0) +#define TYPE_TAG_VECTOR TYPE_TAG(1) +#define TYPE_TAG_BYTESTR TYPE_TAG(2) +#define TYPE_TAG_STRUCT TYPE_TAG(3) +#define TYPE_TAG_WEAK_BOX TYPE_TAG(4) +#define TYPE_TAG_WILL TYPE_TAG(5) typedef struct object { @@ -73,6 +79,21 @@ typedef struct structure value_t slots[0]; } struct_t; +typedef struct weak_box +{ + value_t tag; + value_t value; + value_t next; +} weak_box_t; + +typedef struct will +{ + value_t tag; + value_t value; + value_t finalizer; + value_t next; +} will_t; + typedef struct gc_root { value_t value; @@ -107,10 +128,40 @@ byte_string_t *get_byte_string(value_t v); value_t make_struct(value_t type, size_t slots); struct_t *get_struct(value_t v); +value_t make_weak_box(value_t value); +weak_box_t *get_weak_box(value_t v); + +/* Finalizers are register-and-forget; there should be no external references to wills. */ +void register_finalizer(value_t value, value_t finalizer); +bool are_finalizers_pending(void); + +/* If *value == #f on return there are no more finalizers. */ +void get_next_finalizer(value_t *value, value_t *finalizer); + intptr_t get_fixnum(value_t v); /****************************************************************************/ +static inline bool is_nil(value_t v) +{ + return v == NIL; +} + +static inline bool is_false(value_t v) +{ + return v == FALSE_VALUE; +} + +static inline bool is_true(value_t v) +{ + return v != FALSE_VALUE; +} + +static inline bool is_boolean(value_t v) +{ + return (v == FALSE_VALUE) || (v == TRUE_VALUE); +} + static inline value_t object_value(void *obj) { assert((uintptr_t)obj >= 4096); @@ -147,6 +198,11 @@ static inline pair_t *_get_pair(value_t v) return (pair_t*)_get_object(v); } +static inline bool is_list(value_t v) +{ + return is_nil(v) || is_pair(v); +} + static inline bool is_box(value_t v) { return is_object(v) && (_get_object(v)->tag == TYPE_TAG_BOX); @@ -202,6 +258,21 @@ static inline size_t struct_type(value_t v) return get_struct(v)->slots[0]; } +static inline bool is_weak_box(value_t v) +{ + return is_object(v) && (_get_object(v)->tag == TYPE_TAG_WEAK_BOX); +} + +static inline weak_box_t *_get_weak_box(value_t v) +{ + return (weak_box_t*)_get_object(v); +} + +static inline bool is_will(value_t v) +{ + return is_object(v) && (_get_object(v)->tag == TYPE_TAG_WILL); +} + static inline bool is_fixnum(value_t v) { return (v & 1) != 0;