diff options
| author | Bruce Hill <bruce@bruce-hill.com> | 2021-10-01 15:49:43 -0700 |
|---|---|---|
| committer | Bruce Hill <bruce@bruce-hill.com> | 2021-10-01 15:49:43 -0700 |
| commit | 1bdf8f4f40548dc1c273b09ebdd2a7153adf94ec (patch) | |
| tree | 36c4a1c7268b8a44c7e1351d76d7a51bbb9e36a3 | |
| parent | 89887711586d289db6fd013ffd8407d381992663 (diff) | |
Switch from chained buckets to just clobbering in the hash table
| -rw-r--r-- | match.c | 93 |
1 files changed, 33 insertions, 60 deletions
@@ -17,17 +17,15 @@ #define MAX_CACHE_SIZE (1<<14) -typedef struct cache_hit_s { - size_t pat_id; - const char *start; - bool does_match; - struct cache_hit_s *next; +typedef struct { + pat_t *pat; + const char *start, *end; } cache_hit_t; // Cache datastructure typedef struct { size_t size, occupancy; - cache_hit_t **matches; + cache_hit_t *hits; } cache_t; // Data structure for various ambient state for matching @@ -96,15 +94,14 @@ static inline size_t hash(const char *str, size_t pat_id) // given definitions. If a result has been memoized, set *result to the // memoized value and return true, otherwise return false. // -static enum {CACHE_MISSING, CACHE_MATCH, CACHE_NOMATCH} cache_get(match_ctx_t *ctx, const char *str, pat_t *pat) +static cache_hit_t cache_get(match_ctx_t *ctx, const char *str, pat_t *pat) { - if (!ctx->cache->matches) return CACHE_MISSING; + if (!ctx->cache->hits) return (cache_hit_t){0}; size_t h = hash(str, pat->id) & (ctx->cache->size-1); - for (cache_hit_t *c = ctx->cache->matches[h]; c; c = c->next) { - if (c->pat_id == pat->id && c->start == str) - return c->does_match ? CACHE_MATCH : CACHE_NOMATCH; - } - return CACHE_MISSING; + cache_hit_t c = ctx->cache->hits[h]; + if (c.pat == pat && c.start == str) + return c; + return (cache_hit_t){0}; } // @@ -113,42 +110,27 @@ static enum {CACHE_MISSING, CACHE_MATCH, CACHE_NOMATCH} cache_get(match_ctx_t *c static void cache_save(match_ctx_t *ctx, const char *str, pat_t *pat, match_t *m) { cache_t *cache = ctx->cache; - if (cache->occupancy+1 > 3*cache->size) { - if (cache->size == MAX_CACHE_SIZE) { - size_t h = hash(str, pat->id) & (cache->size-1); - for (int quota = 2; cache->matches[h] && quota > 0; quota--) { - cache_hit_t **last_home = &cache->matches[h]; - while ((*last_home)->next) - last_home = &(*last_home)->next; - delete(last_home); - } - } else { - cache_hit_t **old_matches = cache->matches; - size_t old_size = cache->size; - cache->size = old_size == 0 ? 16 : 2*old_size; - cache->matches = new(cache_hit_t*[cache->size]); - - // Rehash: - for (size_t i = 0; i < old_size; i++) { - for (cache_hit_t *o; (o = old_matches[i]); ) { - old_matches[i] = o->next; - size_t h = hash(o->start, o->pat_id) & (cache->size-1); - o->next = cache->matches[h]; - cache->matches[h] = o; - } + if (cache->occupancy+1 > (cache->size*1)/5) { + cache_hit_t *old_hits = cache->hits; + size_t old_size = cache->size; + cache->size = old_size == 0 ? 16 : 2*old_size; + cache->hits = new(cache_hit_t[cache->size]); + + // Rehash: + for (size_t i = 0; i < old_size; i++) { + if (old_hits[i].pat) { + size_t h = hash(old_hits[i].start, old_hits[i].pat->id) & (cache->size-1); + cache->hits[h] = old_hits[i]; } - if (old_matches) delete(&old_matches); } + if (old_hits) delete(&old_hits); } size_t h = hash(str, pat->id) & (cache->size-1); - cache_hit_t *hit = new(cache_hit_t); - hit->pat_id = pat->id; - hit->start = str; - hit->does_match = (m != NULL); - hit->next = cache->matches[h]; - cache->matches[h] = hit; - ++cache->occupancy; + if (!cache->hits[h].start) ++cache->occupancy; + cache->hits[h].start = str; + cache->hits[h].end = m ? m->end : NULL; + cache->hits[h].pat = pat; } // @@ -157,15 +139,8 @@ static void cache_save(match_ctx_t *ctx, const char *str, pat_t *pat, match_t *m void cache_destroy(match_ctx_t *ctx) { cache_t *cache = ctx->cache; - for (size_t i = 0; cache->occupancy > 0 && i < cache->size; i++) { - while (cache->matches[i]) { - cache_hit_t *next = cache->matches[i]->next; - delete(&cache->matches[i]); - cache->matches[i] = next; - } - } + if (cache->hits) delete(&cache->hits); cache->occupancy = 0; - if (cache->matches) delete(&cache->matches); cache->size = 0; } @@ -592,7 +567,8 @@ static match_t *match(match_ctx_t *ctx, const char *str, pat_t *pat) return new_match(pat, str, p ? p->end : str, MATCHES(p)); } case BP_REF: { - if (cache_get(ctx, str, pat) == CACHE_NOMATCH) + cache_hit_t hit = cache_get(ctx, str, pat); + if (hit.start && !hit.end) return NULL; pat_t *ref = lookup(ctx, pat->args.ref.name, pat->args.ref.len); @@ -647,17 +623,14 @@ static match_t *match(match_ctx_t *ctx, const char *str, pat_t *pat) m = m2; } + if (rec_op.args.leftrec.match) + recycle_match(&rec_op.args.leftrec.match); + // This match wrapper mainly exists for record-keeping purposes. - // However, it also keeps `m` from getting garbage collected with - // leftrec.match is GC'd. It also helps with visualization of match - // results. + // It also helps with visualization of match results. // OPTIMIZE: remove this if necessary match_t *wrap = new_match(pat, m->start, m->end, MATCHES(m)); cache_save(ctx, str, pat, wrap); - - if (rec_op.args.leftrec.match) - recycle_match(&rec_op.args.leftrec.match); - return wrap; } case BP_NODENT: { |
