Use chained scatter table

This commit is contained in:
Bruce Hill 2021-10-01 18:18:22 -07:00
parent b064b7e6af
commit b67cb4643c

77
match.c
View File

@ -17,14 +17,16 @@
#define MAX_CACHE_SIZE (1<<14)
typedef struct {
// Cache datastructures
typedef struct cache_hit_s {
pat_t *pat;
const char *start, *end;
// Cache entries use a chained scatter approach modeled after Lua's tables
struct cache_hit_s *next_probe;
} cache_hit_t;
// Cache datastructure
typedef struct {
size_t size, occupancy;
unsigned int size, occupancy, next_free;
cache_hit_t *hits;
} cache_t;
@ -88,14 +90,46 @@ static inline size_t hash(const char *str, size_t pat_id)
// given definitions. If a result has been memoized, set *result to the
// memoized value and return true, otherwise return false.
//
static cache_hit_t cache_get(match_ctx_t *ctx, const char *str, pat_t *pat)
static cache_hit_t *cache_get(match_ctx_t *ctx, const char *str, pat_t *pat)
{
if (!ctx->cache->hits) return (cache_hit_t){0};
size_t h = hash(str, pat->id) & (ctx->cache->size-1);
cache_hit_t c = ctx->cache->hits[h];
if (c.pat == pat && c.start == str)
return c;
return (cache_hit_t){0};
if (!ctx->cache->hits) return NULL;
for (cache_hit_t *hit = &ctx->cache->hits[hash(str, pat->id) & (ctx->cache->size-1)]; hit; hit = hit->next_probe) {
if (hit->pat == pat && hit->start == str)
return hit;
}
return NULL;
}
static void _hash_insert(cache_t *cache, cache_hit_t hit)
{
size_t h = hash(hit.start, hit.pat->id) & (cache->size-1);
cache_hit_t collision = cache->hits[h];
if (collision.pat == NULL) { // No collision
hit.next_probe = NULL;
cache->hits[h] = hit;
++cache->occupancy;
return;
}
if (collision.pat == hit.pat && collision.start == hit.start)
return; // Duplicate entry, just leave it be
// Shuffle the collision along to a free space:
while (cache->hits[cache->next_free].pat) ++cache->next_free;
cache->hits[cache->next_free] = collision;
size_t hcol = hash(collision.start, collision.pat->id) & (cache->size-1);
if (hcol == h) { // Chain `collision` after `hit`
hit.next_probe = &cache->hits[cache->next_free];
} else { // Keep `collision` in its own chain
cache_hit_t *prev = &cache->hits[hcol]; // Where `collision` wanted to be originally
while (prev->next_probe != &cache->hits[h]) prev = prev->next_probe;
prev->next_probe = &cache->hits[cache->next_free];
hit.next_probe = NULL;
}
cache->hits[h] = hit;
++cache->next_free;
++cache->occupancy;
}
//
@ -104,27 +138,23 @@ static cache_hit_t cache_get(match_ctx_t *ctx, const char *str, pat_t *pat)
static void cache_save(match_ctx_t *ctx, const char *str, pat_t *pat, match_t *m)
{
cache_t *cache = ctx->cache;
if (cache->occupancy+1 > (cache->size*1)/5) {
// Grow the hash if needed (>99% utilization):
if (cache->occupancy+1 > (cache->size*99)/100) {
cache_hit_t *old_hits = cache->hits;
size_t old_size = cache->size;
cache->size = old_size == 0 ? 16 : 2*old_size;
cache->hits = new(cache_hit_t[cache->size]);
cache->next_free = 0;
// Rehash:
for (size_t i = 0; i < old_size; i++) {
if (old_hits[i].pat) {
size_t h = hash(old_hits[i].start, old_hits[i].pat->id) & (cache->size-1);
cache->hits[h] = old_hits[i];
}
if (old_hits[i].pat)
_hash_insert(cache, old_hits[i]);
}
if (old_hits) delete(&old_hits);
}
size_t h = hash(str, pat->id) & (cache->size-1);
if (!cache->hits[h].start) ++cache->occupancy;
cache->hits[h].start = str;
cache->hits[h].end = m ? m->end : NULL;
cache->hits[h].pat = pat;
_hash_insert(cache, (cache_hit_t){.pat = pat, .start = str, .end = m ? m->end : NULL});
}
//
@ -134,8 +164,7 @@ void cache_destroy(match_ctx_t *ctx)
{
cache_t *cache = ctx->cache;
if (cache->hits) delete(&cache->hits);
cache->occupancy = 0;
cache->size = 0;
memset(cache, 0, sizeof(cache_t));
}
//
@ -561,8 +590,8 @@ static match_t *match(match_ctx_t *ctx, const char *str, pat_t *pat)
return new_match(pat, str, p ? p->end : str, MATCHES(p));
}
case BP_REF: {
cache_hit_t hit = cache_get(ctx, str, pat);
if (hit.start && !hit.end)
cache_hit_t *hit = cache_get(ctx, str, pat);
if (hit && !hit->end)
return NULL;
pat_t *ref = lookup(ctx, pat->args.ref.name, pat->args.ref.len);