From 8cb67bf7bb79d2f4ec52a6a7d0188795049087d5 Mon Sep 17 00:00:00 2001 From: Jesse McDonald Date: Tue, 17 Jul 2012 19:43:53 -0500 Subject: [PATCH] Improve performance by grouping GC flag bits together. --- gc.c | 75 +++++++++++++++++++++++++++++++++++++++--------------------- gc.h | 2 +- 2 files changed, 50 insertions(+), 27 deletions(-) diff --git a/gc.c b/gc.c index 80a5b41..9a42797 100644 --- a/gc.c +++ b/gc.c @@ -26,11 +26,11 @@ # define TIMING_CLOCK CLOCK_REALTIME #endif -#define FLAG_PROC_BIT ((uint8_t)0x20) -#define FLAG_LIVE_BIT ((uint8_t)0x10) +#define FLAG_PROC_BIT ((uint32_t)0x2) +#define FLAG_LIVE_BIT ((uint32_t)0x1) -#define FLAG_GC_BITS ((uint8_t)0x30) -#define FLAG_TAG_BITS ((uint8_t)0x0f) +#define GC_FLAG_WORDS ((2 * (OBJECT_INDEX_MAX + 1) + 31) / 32) +#define GC_TAG_WORDS ((4 * (OBJECT_INDEX_MAX + 1) + 31) / 32) /* Helper macros to reduce duplication */ #define VECTOR_BYTES(nelem) (sizeof(vector_t) + (sizeof(value_t) * (nelem))) @@ -116,14 +116,40 @@ static value_t make_will(value_t value, value_t finalizer); /****************************************************************************/ -static inline uint8_t *_get_flags(value_t value) +static inline uint32_t get_gc_flag_bits(int blk, int idx) { - return &object_blocks[OBJECT_BLOCK(value)].flag_bits[OBJECT_INDEX(value)]; + uint32_t word = object_blocks[blk].flag_bits[idx / 16]; + return (int)((word >> (2 * (idx % 16))) & 0x3); +} + +static inline void set_gc_flag_bits(int blk, int idx, uint32_t mask, uint32_t bits) +{ + uint32_t *word = &object_blocks[blk].flag_bits[idx / 16]; + int shift = 2 * (idx % 16); + + mask = (mask & 0x3) << shift; + bits = (bits & 0x3) << shift; + *word = (*word & ~mask) | (bits & mask); +} + +static inline int get_gc_tag_bits(int blk, int idx) +{ + uint32_t word = object_blocks[blk].flag_bits[GC_FLAG_WORDS + (idx / 8)]; + return (int)((word >> (4 * (idx % 8))) & 0xf); +} + +static inline void set_gc_tag_bits(int blk, int idx, int tag) +{ + uint32_t *word = &object_blocks[blk].flag_bits[GC_FLAG_WORDS + (idx / 8)]; + int shift = 4 * (idx % 8); + uint32_t mask = ((uint32_t)0xf << shift); + + *word = (*word & ~mask) | (((uint32_t)tag << shift) & mask); } static inline value_t value_from_index(int blk, int idx) { - int tag = (int)object_blocks[blk].flag_bits[idx] & FLAG_TAG_BITS; + int tag = get_gc_tag_bits(blk, idx); /* Unallocated objects have zeroed flag bitfields */ if (tag == 0) return UNDEFINED; @@ -152,7 +178,7 @@ static void allocate_block(void) return; } - block->flag_bits = (uint8_t*)calloc(OBJECT_INDEX_MAX+1, sizeof(uint8_t)); + block->flag_bits = (uint32_t*)calloc(GC_FLAG_WORDS + GC_TAG_WORDS, sizeof(uint32_t)); if (!block->flag_bits) { @@ -211,7 +237,7 @@ static value_t allocate_object(int tag) gc_free_list = _get_object(gc_free_list)->next; gc_total_bytes += sizeof(object_t); - *_get_flags(result) = (uint8_t)tag; + set_gc_tag_bits(blk, idx, tag); memset(_get_object(result), 0, sizeof(object_t)); return result; @@ -243,7 +269,7 @@ static void free_object(value_t value) free(obj->will); } - *_get_flags(value) = 0; + set_gc_tag_bits(OBJECT_BLOCK(value), OBJECT_INDEX(value), 0); obj->next = gc_free_list; gc_total_bytes -= sizeof(object_t); @@ -258,49 +284,46 @@ static void clear_gc_flag_bits(void) for (blk = 0; (blk <= OBJECT_BLOCK_MAX) && object_blocks[blk].objects; ++blk) { object_block_t *block = &object_blocks[blk]; - int idx; - - for (idx = 0; idx <= OBJECT_INDEX_MAX; ++idx) - { - block->flag_bits[idx] &= ~FLAG_GC_BITS; - } + memset(block->flag_bits, 0, GC_FLAG_WORDS * sizeof(uint32_t)); } } static inline bool is_object_live(value_t v) { /* non-objects can't be added to the free list, so they're always "live" */ - return !is_object(v) || (*_get_flags(v) & FLAG_LIVE_BIT) != 0; + return !is_object(v) || + (get_gc_flag_bits(OBJECT_BLOCK(v), OBJECT_INDEX(v)) & FLAG_LIVE_BIT) != 0; } static inline bool is_object_processed(value_t v) { /* non-objects don't need to be processed */ - return !is_object(v) || (*_get_flags(v) & FLAG_PROC_BIT) != 0; + return !is_object(v) || + (get_gc_flag_bits(OBJECT_BLOCK(v), OBJECT_INDEX(v)) & FLAG_PROC_BIT) != 0; } static inline void set_object_live(value_t v) { assert(is_object(v)); - *_get_flags(v) |= FLAG_LIVE_BIT; + set_gc_flag_bits(OBJECT_BLOCK(v), OBJECT_INDEX(v), FLAG_LIVE_BIT, FLAG_LIVE_BIT); } static inline void set_object_processed(value_t v) { assert(is_object(v)); - *_get_flags(v) |= FLAG_PROC_BIT; + set_gc_flag_bits(OBJECT_BLOCK(v), OBJECT_INDEX(v), FLAG_PROC_BIT, FLAG_PROC_BIT); } static inline void clear_object_live(value_t v) { assert(is_object(v)); - *_get_flags(v) &= ~FLAG_LIVE_BIT; + set_gc_flag_bits(OBJECT_BLOCK(v), OBJECT_INDEX(v), FLAG_LIVE_BIT, 0); } static inline void clear_object_processed(value_t v) { assert(is_object(v)); - *_get_flags(v) &= ~FLAG_PROC_BIT; + set_gc_flag_bits(OBJECT_BLOCK(v), OBJECT_INDEX(v), FLAG_PROC_BIT, 0); } /****************************************************************************/ @@ -818,12 +841,12 @@ static value_t next_live_object(void) for (idx = OBJECT_INDEX(gc_next_live_value); idx <= OBJECT_INDEX_MAX; ++idx) { - uint8_t flags = object_blocks[blk].flag_bits[idx]; + uint32_t flags = get_gc_flag_bits(blk, idx); if ((flags & (FLAG_LIVE_BIT | FLAG_PROC_BIT)) == FLAG_LIVE_BIT) { - gc_next_live_value = value_from_index(blk, idx);; - return gc_next_live_value;; + gc_next_live_value = value_from_index(blk, idx); + return gc_next_live_value; } } @@ -831,7 +854,7 @@ static value_t next_live_object(void) { for (idx = 0; idx <= OBJECT_INDEX_MAX; ++idx) { - uint8_t flags = object_blocks[blk].flag_bits[idx]; + uint32_t flags = get_gc_flag_bits(blk, idx); if ((flags & (FLAG_LIVE_BIT | FLAG_PROC_BIT)) == FLAG_LIVE_BIT) { diff --git a/gc.h b/gc.h index d21a377..ad6ded8 100644 --- a/gc.h +++ b/gc.h @@ -184,7 +184,7 @@ typedef union object typedef struct object_block { object_t *objects; - uint8_t *flag_bits; + uint32_t *flag_bits; } object_block_t; typedef struct gc_root