diff --git a/doc/bytecode.txt b/doc/bytecode.txt index 8d5da93..432c6ca 100644 --- a/doc/bytecode.txt +++ b/doc/bytecode.txt @@ -37,6 +37,7 @@ unary-expr: up to 255, 1 out, 1 in 29 (set! out (byte-string-size in)) 2a (set! out (struct-nslots in)) 2b (set! out (struct-type in)) + 2c (set! out (hash-value in)) ; ISO C floating-point 30 (set! out (acos in)) diff --git a/gc.c b/gc.c index 29a0382..d8ff277 100644 --- a/gc.c +++ b/gc.c @@ -33,6 +33,12 @@ gc_stats_t gc_stats; /* The default of 128 KB/block should require a 2 KB bitmap. */ #define GC_DIRTY_BLOCK_SIZE (128UL << 10) +typedef struct seen_value +{ + value_t value; + struct seen_value *prev; +} seen_value_t; + /****************************************************************************/ static value_t gc_weak_box_list; @@ -69,6 +75,13 @@ void unregister_gc_root(gc_root_t *root) /****************************************************************************/ +static value_t make_hash_value(void) +{ + static fixnum_t hash_seed = 0x67f76bc8; + hash_seed = (33 * hash_seed) ^ (fixnum_t)clock(); + return fixnum_value(hash_seed); +} + bool get_boolean(value_t v) { release_assert(is_boolean(v)); @@ -149,6 +162,7 @@ value_t make_vector(size_t nelem, value_t initial_value) vec = (vector_t*)gc_alloc(VECTOR_BYTES(nelem)); vec->tag = TYPE_TAG_VECTOR; vec->size = nelem; + vec->hash = make_hash_value(); for (int i = 0; i < nelem; ++i) vec->elements[i] = iv_root.value; @@ -227,6 +241,7 @@ value_t make_struct(value_t type, size_t nslots) s->tag = TYPE_TAG_STRUCT; s->type = type_root.value; s->nslots = nslots; + s->hash = make_hash_value(); for (int i = 0; i < nslots; ++i) s->slots[i] = UNDEFINED; @@ -343,6 +358,93 @@ builtin_fn_t *get_builtin_fn(value_t v) return _get_builtin_fn(v); } +static size_t dbj2_hash(uint8_t *bytes, size_t size) +{ + size_t hash = 5381; + + for (size_t i = 0; i < size; ++i) + { + hash = (33 * hash) ^ (size_t)bytes[i]; + } + + return hash; +} + +static value_t _get_hash_value(value_t v, seen_value_t *seen) +{ + seen_value_t new_seen = { v, seen }; + + if (is_object(v) && !(is_float(v) || is_byte_string(v) || is_builtin_fn(v))) + { + for (seen_value_t *sv = seen; sv; sv = sv->prev) + { + if (v == sv->value) + { + return 0; + } + } + } + + if (!is_object(v)) + { + /* Non-objects compare by value */ + return fixnum_value(v ^ (v >> 2)); + } + else if (is_float(v)) + { + double d = _get_float(v); + return fixnum_value(dbj2_hash((uint8_t*)&d, sizeof d)); + } + else if (is_builtin_fn(v)) + { + return fixnum_value((uintptr_t)_get_builtin_fn(v) >> 2); + } + else if (is_pair(v)) + { + /* Lists and trees compare by value, not reference. */ + value_t seed = fixnum_value(0); + seed = combine_hash_values(seed, _get_hash_value(_CAR(v)), &new_seen); + seed = combine_hash_values(seed, _get_hash_value(_CDR(v)), &new_seen); + return seed; + } + else if (is_box(v) || is_weak_box(v)) + { + /* Boxes compare by value, not by reference. */ + return _get_hash_value(_get_box(v)->value, &new_seen); + } + else if (is_byte_string(v)) + { + byte_string_t *str = _get_byte_string(v); + return fixnum_value(dbj2_hash(str->bytes, str->size)); + } + else if (is_vector(v)) + { + /* Vectors compare by reference. */ + return _get_vector(v)->hash; + } + else if (is_struct(v)) + { + /* Structures compare by reference. */ + return _get_struct(v)->hash; + } + else /* unknown object type */ + { + return _get_hash_value(_get_object(v)->tag, &new_seen); + } +} + +value_t get_hash_value(value_t v) +{ + return _get_hash_value(v, NULL); +} + +value_t combine_hash_values(value_t f1, value_t f2) +{ + fixnum_t h1 = _get_fixnum(f1); + fixnum_t h2 = _get_fixnum(f2); + return fixnum_value(h1 ^ (h2 + 0x9e3779b9 + (h1 << 6) + (h1 >> 2))); +} + /*************************** Common Collector Code **************************/ static bool gc_enabled; @@ -796,6 +898,9 @@ static size_t transfer_vector(vector_t *vec) { assert(vec->tag == TYPE_TAG_VECTOR); + /* Should be fixnum, but just in case... */ + transfer_object(&vec->hash); + for (size_t i = 0; i < vec->size; ++i) transfer_object(&vec->elements[i]); @@ -808,6 +913,9 @@ static size_t transfer_struct(struct_t *s) transfer_object(&s->type); + /* Should be fixnum, but just in case... */ + transfer_object(&s->hash); + for (size_t i = 0; i < s->nslots; ++i) transfer_object(&s->slots[i]); @@ -1282,12 +1390,6 @@ void _release_assert(bool expr, const char *str, const char *file, int line) } } -typedef struct seen_value -{ - value_t value; - struct seen_value *prev; -} seen_value_t; - static void _fprint_value(FILE *f, value_t v, seen_value_t *seen) { seen_value_t new_seen = { v, seen }; diff --git a/gc.h b/gc.h index e35c4f6..82c1cda 100644 --- a/gc.h +++ b/gc.h @@ -109,6 +109,7 @@ typedef struct vector { value_t tag; /* TYPE_TAG_VECTOR */ size_t size; + value_t hash; value_t elements[0]; } vector_t; @@ -125,6 +126,7 @@ typedef struct structure value_t tag; /* TYPE_TAG_STRUCT */ value_t type; size_t nslots; + value_t hash; value_t slots[0]; } struct_t; @@ -208,6 +210,9 @@ char *value_to_string(value_t v); /* Like strcmp(), but for byte strings. */ int byte_strcmp(value_t s1, value_t s2); +value_t get_hash_value(value_t val); +value_t combine_hash_values(value_t h1, value_t h2); + value_t make_struct(value_t type, size_t nslots); struct_t *get_struct(value_t v); diff --git a/interp.c b/interp.c index b1c8995..d8271e8 100644 --- a/interp.c +++ b/interp.c @@ -417,6 +417,7 @@ static value_t eval_unary_expression(interp_state_t *state, uint8_t subcode, uin case 0x29: return fixnum_value(get_byte_string(ST1)->size); case 0x2a: return fixnum_value(get_struct(ST1)->nslots); case 0x2b: return get_struct(ST1)->type; + case 0x2c: return get_hash_value(ST1); case 0x30: return make_float(acos(get_float(ST1))); case 0x31: return make_float(asin(get_float(ST1))); case 0x32: return make_float(atan(get_float(ST1))); diff --git a/libcompiler/primitives.scm b/libcompiler/primitives.scm index beeb7a5..850eb58 100644 --- a/libcompiler/primitives.scm +++ b/libcompiler/primitives.scm @@ -44,6 +44,7 @@ (%byte-string-size #x29 byte-string-size) (%struct-nslots #x2a struct-nslots) (%struct-type #x2b struct-type) + (%hash-value #x2c hash-value) (%acos #x30 acos) (%asin #x31 asin) (%atan #x32 atan) diff --git a/src/lib/primitives.rls b/src/lib/primitives.rls index 39bd85d..8824a63 100644 --- a/src/lib/primitives.rls +++ b/src/lib/primitives.rls @@ -25,6 +25,7 @@ (define (byte-string-size x) (byte-string-size x)) (define (struct-nslots x) (struct-nslots x)) (define (struct-type x) (struct-type x)) +(define (hash-value x) (hash-value x)) (define (acos x) (acos x)) (define (asin x) (asin x)) (define (atan x) (atan x))