Add a primitive operation to calculate a 30-bit hash from any value.

Pairs, boxes, and strings are hashed by values; vectors and structures
are "hashed" by reference. Takes into account the possibility of cycles.
This is a prelude to implementing hash-tables.
This commit is contained in:
Jesse D. McDonald 2010-05-07 23:36:21 -05:00
parent 5d8a302225
commit 96b3bded17
6 changed files with 117 additions and 6 deletions

View File

@ -37,6 +37,7 @@ unary-expr: up to 255, 1 out, 1 in
29 (set! out (byte-string-size in))
2a (set! out (struct-nslots in))
2b (set! out (struct-type in))
2c (set! out (hash-value in))
; ISO C floating-point
30 (set! out (acos in))

114
gc.c
View File

@ -33,6 +33,12 @@ gc_stats_t gc_stats;
/* The default of 128 KB/block should require a 2 KB bitmap. */
#define GC_DIRTY_BLOCK_SIZE (128UL << 10)
typedef struct seen_value
{
value_t value;
struct seen_value *prev;
} seen_value_t;
/****************************************************************************/
static value_t gc_weak_box_list;
@ -69,6 +75,13 @@ void unregister_gc_root(gc_root_t *root)
/****************************************************************************/
static value_t make_hash_value(void)
{
static fixnum_t hash_seed = 0x67f76bc8;
hash_seed = (33 * hash_seed) ^ (fixnum_t)clock();
return fixnum_value(hash_seed);
}
bool get_boolean(value_t v)
{
release_assert(is_boolean(v));
@ -149,6 +162,7 @@ value_t make_vector(size_t nelem, value_t initial_value)
vec = (vector_t*)gc_alloc(VECTOR_BYTES(nelem));
vec->tag = TYPE_TAG_VECTOR;
vec->size = nelem;
vec->hash = make_hash_value();
for (int i = 0; i < nelem; ++i)
vec->elements[i] = iv_root.value;
@ -227,6 +241,7 @@ value_t make_struct(value_t type, size_t nslots)
s->tag = TYPE_TAG_STRUCT;
s->type = type_root.value;
s->nslots = nslots;
s->hash = make_hash_value();
for (int i = 0; i < nslots; ++i)
s->slots[i] = UNDEFINED;
@ -343,6 +358,93 @@ builtin_fn_t *get_builtin_fn(value_t v)
return _get_builtin_fn(v);
}
static size_t dbj2_hash(uint8_t *bytes, size_t size)
{
size_t hash = 5381;
for (size_t i = 0; i < size; ++i)
{
hash = (33 * hash) ^ (size_t)bytes[i];
}
return hash;
}
static value_t _get_hash_value(value_t v, seen_value_t *seen)
{
seen_value_t new_seen = { v, seen };
if (is_object(v) && !(is_float(v) || is_byte_string(v) || is_builtin_fn(v)))
{
for (seen_value_t *sv = seen; sv; sv = sv->prev)
{
if (v == sv->value)
{
return 0;
}
}
}
if (!is_object(v))
{
/* Non-objects compare by value */
return fixnum_value(v ^ (v >> 2));
}
else if (is_float(v))
{
double d = _get_float(v);
return fixnum_value(dbj2_hash((uint8_t*)&d, sizeof d));
}
else if (is_builtin_fn(v))
{
return fixnum_value((uintptr_t)_get_builtin_fn(v) >> 2);
}
else if (is_pair(v))
{
/* Lists and trees compare by value, not reference. */
value_t seed = fixnum_value(0);
seed = combine_hash_values(seed, _get_hash_value(_CAR(v)), &new_seen);
seed = combine_hash_values(seed, _get_hash_value(_CDR(v)), &new_seen);
return seed;
}
else if (is_box(v) || is_weak_box(v))
{
/* Boxes compare by value, not by reference. */
return _get_hash_value(_get_box(v)->value, &new_seen);
}
else if (is_byte_string(v))
{
byte_string_t *str = _get_byte_string(v);
return fixnum_value(dbj2_hash(str->bytes, str->size));
}
else if (is_vector(v))
{
/* Vectors compare by reference. */
return _get_vector(v)->hash;
}
else if (is_struct(v))
{
/* Structures compare by reference. */
return _get_struct(v)->hash;
}
else /* unknown object type */
{
return _get_hash_value(_get_object(v)->tag, &new_seen);
}
}
value_t get_hash_value(value_t v)
{
return _get_hash_value(v, NULL);
}
value_t combine_hash_values(value_t f1, value_t f2)
{
fixnum_t h1 = _get_fixnum(f1);
fixnum_t h2 = _get_fixnum(f2);
return fixnum_value(h1 ^ (h2 + 0x9e3779b9 + (h1 << 6) + (h1 >> 2)));
}
/*************************** Common Collector Code **************************/
static bool gc_enabled;
@ -796,6 +898,9 @@ static size_t transfer_vector(vector_t *vec)
{
assert(vec->tag == TYPE_TAG_VECTOR);
/* Should be fixnum, but just in case... */
transfer_object(&vec->hash);
for (size_t i = 0; i < vec->size; ++i)
transfer_object(&vec->elements[i]);
@ -808,6 +913,9 @@ static size_t transfer_struct(struct_t *s)
transfer_object(&s->type);
/* Should be fixnum, but just in case... */
transfer_object(&s->hash);
for (size_t i = 0; i < s->nslots; ++i)
transfer_object(&s->slots[i]);
@ -1282,12 +1390,6 @@ void _release_assert(bool expr, const char *str, const char *file, int line)
}
}
typedef struct seen_value
{
value_t value;
struct seen_value *prev;
} seen_value_t;
static void _fprint_value(FILE *f, value_t v, seen_value_t *seen)
{
seen_value_t new_seen = { v, seen };

5
gc.h
View File

@ -109,6 +109,7 @@ typedef struct vector
{
value_t tag; /* TYPE_TAG_VECTOR */
size_t size;
value_t hash;
value_t elements[0];
} vector_t;
@ -125,6 +126,7 @@ typedef struct structure
value_t tag; /* TYPE_TAG_STRUCT */
value_t type;
size_t nslots;
value_t hash;
value_t slots[0];
} struct_t;
@ -208,6 +210,9 @@ char *value_to_string(value_t v);
/* Like strcmp(), but for byte strings. */
int byte_strcmp(value_t s1, value_t s2);
value_t get_hash_value(value_t val);
value_t combine_hash_values(value_t h1, value_t h2);
value_t make_struct(value_t type, size_t nslots);
struct_t *get_struct(value_t v);

View File

@ -417,6 +417,7 @@ static value_t eval_unary_expression(interp_state_t *state, uint8_t subcode, uin
case 0x29: return fixnum_value(get_byte_string(ST1)->size);
case 0x2a: return fixnum_value(get_struct(ST1)->nslots);
case 0x2b: return get_struct(ST1)->type;
case 0x2c: return get_hash_value(ST1);
case 0x30: return make_float(acos(get_float(ST1)));
case 0x31: return make_float(asin(get_float(ST1)));
case 0x32: return make_float(atan(get_float(ST1)));

View File

@ -44,6 +44,7 @@
(%byte-string-size #x29 byte-string-size)
(%struct-nslots #x2a struct-nslots)
(%struct-type #x2b struct-type)
(%hash-value #x2c hash-value)
(%acos #x30 acos)
(%asin #x31 asin)
(%atan #x32 atan)

View File

@ -25,6 +25,7 @@
(define (byte-string-size x) (byte-string-size x))
(define (struct-nslots x) (struct-nslots x))
(define (struct-type x) (struct-type x))
(define (hash-value x) (hash-value x))
(define (acos x) (acos x))
(define (asin x) (asin x))
(define (atan x) (atan x))