Use chained scatter table
This commit is contained in:
parent
b064b7e6af
commit
b67cb4643c
77
match.c
77
match.c
@ -17,14 +17,16 @@
|
||||
|
||||
#define MAX_CACHE_SIZE (1<<14)
|
||||
|
||||
typedef struct {
|
||||
// Cache datastructures
|
||||
typedef struct cache_hit_s {
|
||||
pat_t *pat;
|
||||
const char *start, *end;
|
||||
// Cache entries use a chained scatter approach modeled after Lua's tables
|
||||
struct cache_hit_s *next_probe;
|
||||
} cache_hit_t;
|
||||
|
||||
// Cache datastructure
|
||||
typedef struct {
|
||||
size_t size, occupancy;
|
||||
unsigned int size, occupancy, next_free;
|
||||
cache_hit_t *hits;
|
||||
} cache_t;
|
||||
|
||||
@ -88,14 +90,46 @@ static inline size_t hash(const char *str, size_t pat_id)
|
||||
// given definitions. If a result has been memoized, set *result to the
|
||||
// memoized value and return true, otherwise return false.
|
||||
//
|
||||
static cache_hit_t cache_get(match_ctx_t *ctx, const char *str, pat_t *pat)
|
||||
static cache_hit_t *cache_get(match_ctx_t *ctx, const char *str, pat_t *pat)
|
||||
{
|
||||
if (!ctx->cache->hits) return (cache_hit_t){0};
|
||||
size_t h = hash(str, pat->id) & (ctx->cache->size-1);
|
||||
cache_hit_t c = ctx->cache->hits[h];
|
||||
if (c.pat == pat && c.start == str)
|
||||
return c;
|
||||
return (cache_hit_t){0};
|
||||
if (!ctx->cache->hits) return NULL;
|
||||
for (cache_hit_t *hit = &ctx->cache->hits[hash(str, pat->id) & (ctx->cache->size-1)]; hit; hit = hit->next_probe) {
|
||||
if (hit->pat == pat && hit->start == str)
|
||||
return hit;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void _hash_insert(cache_t *cache, cache_hit_t hit)
|
||||
{
|
||||
size_t h = hash(hit.start, hit.pat->id) & (cache->size-1);
|
||||
cache_hit_t collision = cache->hits[h];
|
||||
if (collision.pat == NULL) { // No collision
|
||||
hit.next_probe = NULL;
|
||||
cache->hits[h] = hit;
|
||||
++cache->occupancy;
|
||||
return;
|
||||
}
|
||||
|
||||
if (collision.pat == hit.pat && collision.start == hit.start)
|
||||
return; // Duplicate entry, just leave it be
|
||||
|
||||
// Shuffle the collision along to a free space:
|
||||
while (cache->hits[cache->next_free].pat) ++cache->next_free;
|
||||
cache->hits[cache->next_free] = collision;
|
||||
|
||||
size_t hcol = hash(collision.start, collision.pat->id) & (cache->size-1);
|
||||
if (hcol == h) { // Chain `collision` after `hit`
|
||||
hit.next_probe = &cache->hits[cache->next_free];
|
||||
} else { // Keep `collision` in its own chain
|
||||
cache_hit_t *prev = &cache->hits[hcol]; // Where `collision` wanted to be originally
|
||||
while (prev->next_probe != &cache->hits[h]) prev = prev->next_probe;
|
||||
prev->next_probe = &cache->hits[cache->next_free];
|
||||
hit.next_probe = NULL;
|
||||
}
|
||||
cache->hits[h] = hit;
|
||||
++cache->next_free;
|
||||
++cache->occupancy;
|
||||
}
|
||||
|
||||
//
|
||||
@ -104,27 +138,23 @@ static cache_hit_t cache_get(match_ctx_t *ctx, const char *str, pat_t *pat)
|
||||
static void cache_save(match_ctx_t *ctx, const char *str, pat_t *pat, match_t *m)
|
||||
{
|
||||
cache_t *cache = ctx->cache;
|
||||
if (cache->occupancy+1 > (cache->size*1)/5) {
|
||||
// Grow the hash if needed (>99% utilization):
|
||||
if (cache->occupancy+1 > (cache->size*99)/100) {
|
||||
cache_hit_t *old_hits = cache->hits;
|
||||
size_t old_size = cache->size;
|
||||
cache->size = old_size == 0 ? 16 : 2*old_size;
|
||||
cache->hits = new(cache_hit_t[cache->size]);
|
||||
cache->next_free = 0;
|
||||
|
||||
// Rehash:
|
||||
for (size_t i = 0; i < old_size; i++) {
|
||||
if (old_hits[i].pat) {
|
||||
size_t h = hash(old_hits[i].start, old_hits[i].pat->id) & (cache->size-1);
|
||||
cache->hits[h] = old_hits[i];
|
||||
}
|
||||
if (old_hits[i].pat)
|
||||
_hash_insert(cache, old_hits[i]);
|
||||
}
|
||||
if (old_hits) delete(&old_hits);
|
||||
}
|
||||
|
||||
size_t h = hash(str, pat->id) & (cache->size-1);
|
||||
if (!cache->hits[h].start) ++cache->occupancy;
|
||||
cache->hits[h].start = str;
|
||||
cache->hits[h].end = m ? m->end : NULL;
|
||||
cache->hits[h].pat = pat;
|
||||
_hash_insert(cache, (cache_hit_t){.pat = pat, .start = str, .end = m ? m->end : NULL});
|
||||
}
|
||||
|
||||
//
|
||||
@ -134,8 +164,7 @@ void cache_destroy(match_ctx_t *ctx)
|
||||
{
|
||||
cache_t *cache = ctx->cache;
|
||||
if (cache->hits) delete(&cache->hits);
|
||||
cache->occupancy = 0;
|
||||
cache->size = 0;
|
||||
memset(cache, 0, sizeof(cache_t));
|
||||
}
|
||||
|
||||
//
|
||||
@ -561,8 +590,8 @@ static match_t *match(match_ctx_t *ctx, const char *str, pat_t *pat)
|
||||
return new_match(pat, str, p ? p->end : str, MATCHES(p));
|
||||
}
|
||||
case BP_REF: {
|
||||
cache_hit_t hit = cache_get(ctx, str, pat);
|
||||
if (hit.start && !hit.end)
|
||||
cache_hit_t *hit = cache_get(ctx, str, pat);
|
||||
if (hit && !hit->end)
|
||||
return NULL;
|
||||
|
||||
pat_t *ref = lookup(ctx, pat->args.ref.name, pat->args.ref.len);
|
||||
|
Loading…
Reference in New Issue
Block a user