aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBruce Hill <bruce@bruce-hill.com>2021-10-01 18:18:22 -0700
committerBruce Hill <bruce@bruce-hill.com>2021-10-01 18:18:22 -0700
commitb67cb4643cc837728f5c527dbc5bf59958039735 (patch)
tree7b1e4a670882d5e792bcad3400b9fa9df041118e
parentb064b7e6af8ffb8c32b5c920a8a6c387e393f4a7 (diff)
Use chained scatter table
-rw-r--r--match.c77
1 files changed, 53 insertions, 24 deletions
diff --git a/match.c b/match.c
index aa00592..cd284a2 100644
--- a/match.c
+++ b/match.c
@@ -17,14 +17,16 @@
#define MAX_CACHE_SIZE (1<<14)
-typedef struct {
+// Cache datastructures
+typedef struct cache_hit_s {
pat_t *pat;
const char *start, *end;
+ // Cache entries use a chained scatter approach modeled after Lua's tables
+ struct cache_hit_s *next_probe;
} cache_hit_t;
-// Cache datastructure
typedef struct {
- size_t size, occupancy;
+ unsigned int size, occupancy, next_free;
cache_hit_t *hits;
} cache_t;
@@ -88,14 +90,46 @@ static inline size_t hash(const char *str, size_t pat_id)
// given definitions. If a result has been memoized, set *result to the
// memoized value and return true, otherwise return false.
//
-static cache_hit_t cache_get(match_ctx_t *ctx, const char *str, pat_t *pat)
+static cache_hit_t *cache_get(match_ctx_t *ctx, const char *str, pat_t *pat)
{
- if (!ctx->cache->hits) return (cache_hit_t){0};
- size_t h = hash(str, pat->id) & (ctx->cache->size-1);
- cache_hit_t c = ctx->cache->hits[h];
- if (c.pat == pat && c.start == str)
- return c;
- return (cache_hit_t){0};
+ if (!ctx->cache->hits) return NULL;
+ for (cache_hit_t *hit = &ctx->cache->hits[hash(str, pat->id) & (ctx->cache->size-1)]; hit; hit = hit->next_probe) {
+ if (hit->pat == pat && hit->start == str)
+ return hit;
+ }
+ return NULL;
+}
+
+static void _hash_insert(cache_t *cache, cache_hit_t hit)
+{
+ size_t h = hash(hit.start, hit.pat->id) & (cache->size-1);
+ cache_hit_t collision = cache->hits[h];
+ if (collision.pat == NULL) { // No collision
+ hit.next_probe = NULL;
+ cache->hits[h] = hit;
+ ++cache->occupancy;
+ return;
+ }
+
+ if (collision.pat == hit.pat && collision.start == hit.start)
+ return; // Duplicate entry, just leave it be
+
+ // Shuffle the collision along to a free space:
+ while (cache->hits[cache->next_free].pat) ++cache->next_free;
+ cache->hits[cache->next_free] = collision;
+
+ size_t hcol = hash(collision.start, collision.pat->id) & (cache->size-1);
+ if (hcol == h) { // Chain `collision` after `hit`
+ hit.next_probe = &cache->hits[cache->next_free];
+ } else { // Keep `collision` in its own chain
+ cache_hit_t *prev = &cache->hits[hcol]; // Where `collision` wanted to be originally
+ while (prev->next_probe != &cache->hits[h]) prev = prev->next_probe;
+ prev->next_probe = &cache->hits[cache->next_free];
+ hit.next_probe = NULL;
+ }
+ cache->hits[h] = hit;
+ ++cache->next_free;
+ ++cache->occupancy;
}
//
@@ -104,27 +138,23 @@ static cache_hit_t cache_get(match_ctx_t *ctx, const char *str, pat_t *pat)
static void cache_save(match_ctx_t *ctx, const char *str, pat_t *pat, match_t *m)
{
cache_t *cache = ctx->cache;
- if (cache->occupancy+1 > (cache->size*1)/5) {
+ // Grow the hash if needed (>99% utilization):
+ if (cache->occupancy+1 > (cache->size*99)/100) {
cache_hit_t *old_hits = cache->hits;
size_t old_size = cache->size;
cache->size = old_size == 0 ? 16 : 2*old_size;
cache->hits = new(cache_hit_t[cache->size]);
+ cache->next_free = 0;
// Rehash:
for (size_t i = 0; i < old_size; i++) {
- if (old_hits[i].pat) {
- size_t h = hash(old_hits[i].start, old_hits[i].pat->id) & (cache->size-1);
- cache->hits[h] = old_hits[i];
- }
+ if (old_hits[i].pat)
+ _hash_insert(cache, old_hits[i]);
}
if (old_hits) delete(&old_hits);
}
- size_t h = hash(str, pat->id) & (cache->size-1);
- if (!cache->hits[h].start) ++cache->occupancy;
- cache->hits[h].start = str;
- cache->hits[h].end = m ? m->end : NULL;
- cache->hits[h].pat = pat;
+ _hash_insert(cache, (cache_hit_t){.pat = pat, .start = str, .end = m ? m->end : NULL});
}
//
@@ -134,8 +164,7 @@ void cache_destroy(match_ctx_t *ctx)
{
cache_t *cache = ctx->cache;
if (cache->hits) delete(&cache->hits);
- cache->occupancy = 0;
- cache->size = 0;
+ memset(cache, 0, sizeof(cache_t));
}
//
@@ -561,8 +590,8 @@ static match_t *match(match_ctx_t *ctx, const char *str, pat_t *pat)
return new_match(pat, str, p ? p->end : str, MATCHES(p));
}
case BP_REF: {
- cache_hit_t hit = cache_get(ctx, str, pat);
- if (hit.start && !hit.end)
+ cache_hit_t *hit = cache_get(ctx, str, pat);
+ if (hit && !hit->end)
return NULL;
pat_t *ref = lookup(ctx, pat->args.ref.name, pat->args.ref.len);