Moved caching code onto the file, which fixed an issue with file slicing
having stale cache values.
This commit is contained in:
parent
8268e67875
commit
0b2de4721f
7
bp.c
7
bp.c
@ -406,7 +406,7 @@ static int process_file(def_t *defs, const char *filename, pat_t *pattern)
|
||||
}
|
||||
fflush(stdout);
|
||||
|
||||
cache_destroy();
|
||||
cache_destroy(f);
|
||||
if (recycle_all_matches() != 0)
|
||||
fprintf(stderr, "\033[33;1mMemory leak: there should no longer be any matches in use at this point.\033[0m\n");
|
||||
destroy_file(&f);
|
||||
@ -624,6 +624,10 @@ int main(int argc, char *argv[])
|
||||
tty_out = fopen("/dev/tty", "w");
|
||||
}
|
||||
|
||||
// No need for these caches anymore:
|
||||
for (file_t *f = loaded_files; f; f = f->next)
|
||||
cache_destroy(f);
|
||||
|
||||
int found = 0;
|
||||
if (options.mode == MODE_JSON) printf("[");
|
||||
if (options.git_mode) { // Get the list of files from `git --ls-files ...`
|
||||
@ -652,7 +656,6 @@ int main(int argc, char *argv[])
|
||||
// This code frees up all residual heap-allocated memory. Since the program
|
||||
// is about to exit, this step is unnecessary. However, it is useful for
|
||||
// tracking down memory leaks.
|
||||
cache_destroy();
|
||||
free_all_matches();
|
||||
defs = free_defs(defs, NULL);
|
||||
while (loaded_files) {
|
||||
|
@ -37,11 +37,8 @@ def_t *load_grammar(def_t *defs, file_t *f)
|
||||
pat_t *pat = bp_pattern(f, str);
|
||||
if (!pat) file_err(f, str, f->end, "Could not parse this file");
|
||||
if (pat->end < f->end) file_err(f, pat->end, f->end, "Could not parse this part of the file");
|
||||
for (pat_t *p = pat; p && p->type == BP_DEFINITION; p = p->args.def.pat) {
|
||||
// printf("Def '%.*s': %.*s\n", (int)p->args.def.namelen, p->args.def.name,
|
||||
// (int)(p->args.def.def->end - p->args.def.def->start), p->args.def.def->start);
|
||||
for (pat_t *p = pat; p && p->type == BP_DEFINITION; p = p->args.def.pat)
|
||||
defs = with_def(defs, p->args.def.namelen, p->args.def.name, p->args.def.def);
|
||||
}
|
||||
return defs;
|
||||
}
|
||||
|
||||
|
125
files.c
125
files.c
@ -14,6 +14,7 @@
|
||||
#include <unistd.h>
|
||||
|
||||
#include "files.h"
|
||||
#include "match.h"
|
||||
#include "pattern.h"
|
||||
#include "utils.h"
|
||||
|
||||
@ -183,6 +184,8 @@ void destroy_file(file_t **f)
|
||||
}
|
||||
}
|
||||
|
||||
cache_destroy(*f);
|
||||
|
||||
for (pat_t *next; (*f)->pats; (*f)->pats = next) {
|
||||
next = (*f)->pats->next;
|
||||
delete(&(*f)->pats);
|
||||
@ -270,4 +273,126 @@ void fprint_line(FILE *dest, file_t *f, const char *start, const char *end, cons
|
||||
fprintf(dest, "\033[0m\n");
|
||||
}
|
||||
|
||||
//
|
||||
// Hash a string position/pattern.
|
||||
//
|
||||
static inline size_t hash(const char *str, pat_t *pat)
|
||||
{
|
||||
return (size_t)str + 2*pat->id;
|
||||
}
|
||||
|
||||
//
|
||||
// Check if we have memoized a pattern match at the given position for the
|
||||
// given definitions. If a result has been memoized, set *result to the
|
||||
// memoized value and return true, otherwise return false.
|
||||
//
|
||||
bool cache_get(file_t *f, def_t *defs, const char *str, pat_t *pat, match_t **result)
|
||||
{
|
||||
if (!f->cache.matches) return NULL;
|
||||
size_t h = hash(str, pat) & (f->cache.size-1);
|
||||
for (match_t *c = f->cache.matches[h]; c; c = c->cache.next) {
|
||||
if (c->pat == pat && c->defs_id == (defs?defs->id:0) && c->start == str) {
|
||||
// If c->end == NULL, that means no match occurs here
|
||||
*result = c->end == NULL ? NULL : c;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
//
|
||||
// Remove an item from the cache.
|
||||
//
|
||||
static void cache_remove(file_t *f, match_t *m)
|
||||
{
|
||||
if (!m->cache.home) return;
|
||||
*m->cache.home = m->cache.next;
|
||||
if (m->cache.next) m->cache.next->cache.home = m->cache.home;
|
||||
m->cache.next = NULL;
|
||||
m->cache.home = NULL;
|
||||
if (--m->refcount == 0) recycle_if_unused(&m);
|
||||
--f->cache.occupancy;
|
||||
}
|
||||
|
||||
//
|
||||
// Save a match in the cache.
|
||||
//
|
||||
void cache_save(file_t *f, def_t *defs, const char *str, pat_t *pat, match_t *m)
|
||||
{
|
||||
// As a convention, a match with {.pat=pat, .start=str, .end==NULL} is used
|
||||
// to memoize the fact that `pat` will *not* match at `str`.
|
||||
if (m == NULL) m = new_match(defs, pat, str, NULL, NULL);
|
||||
|
||||
if (f->cache.occupancy+1 > 3*f->cache.size) {
|
||||
if (f->cache.size == MAX_CACHE_SIZE) {
|
||||
size_t h = hash(m->start, m->pat) & (f->cache.size-1);
|
||||
for (int quota = 2; f->cache.matches[h] && quota > 0; quota--) {
|
||||
match_t *last = f->cache.matches[h];
|
||||
while (last->cache.next) last = last->cache.next;
|
||||
cache_remove(f, last);
|
||||
}
|
||||
} else {
|
||||
match_t **old_matches = f->cache.matches;
|
||||
size_t old_size = f->cache.size;
|
||||
f->cache.size = old_size == 0 ? 16 : 2*old_size;
|
||||
f->cache.matches = new(match_t*[f->cache.size]);
|
||||
|
||||
// Rehash:
|
||||
if (old_matches) {
|
||||
for (size_t i = 0; i < old_size; i++) {
|
||||
for (match_t *o; (o = old_matches[i]); ) {
|
||||
*o->cache.home = o->cache.next;
|
||||
if (o->cache.next) o->cache.next->cache.home = o->cache.home;
|
||||
size_t h = hash(o->start, o->pat) & (f->cache.size-1);
|
||||
o->cache.home = &(f->cache.matches[h]);
|
||||
o->cache.next = f->cache.matches[h];
|
||||
if (f->cache.matches[h]) f->cache.matches[h]->cache.home = &o->cache.next;
|
||||
f->cache.matches[h] = o;
|
||||
}
|
||||
}
|
||||
free(old_matches);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
size_t h = hash(m->start, m->pat) & (f->cache.size-1);
|
||||
m->cache.home = &(f->cache.matches[h]);
|
||||
m->cache.next = f->cache.matches[h];
|
||||
if (f->cache.matches[h]) f->cache.matches[h]->cache.home = &m->cache.next;
|
||||
f->cache.matches[h] = m;
|
||||
++m->refcount;
|
||||
++f->cache.occupancy;
|
||||
}
|
||||
|
||||
//
|
||||
// Remove all items from the cache that do not overlap `start` and `end`.
|
||||
// (This is used to remove useless items from the cache)
|
||||
//
|
||||
void cache_prune(file_t *f, const char *start, const char *end)
|
||||
{
|
||||
if (!f->cache.matches) return;
|
||||
for (size_t i = 0; i < f->cache.size; i++) {
|
||||
for (match_t *m = f->cache.matches[i], *next = NULL; m; m = next) {
|
||||
next = m->cache.next;
|
||||
if (m->start < start || (m->end ? m->end : m->start) > end)
|
||||
cache_remove(f, m);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// Clear and deallocate the cache.
|
||||
//
|
||||
void cache_destroy(file_t *f)
|
||||
{
|
||||
if (!f->cache.matches) return;
|
||||
for (size_t i = 0; i < f->cache.size; i++) {
|
||||
while (f->cache.matches[i])
|
||||
cache_remove(f, f->cache.matches[i]);
|
||||
}
|
||||
f->cache.occupancy = 0;
|
||||
delete(&f->cache.matches);
|
||||
f->cache.size = 0;
|
||||
}
|
||||
|
||||
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1
|
||||
|
16
files.h
16
files.h
@ -4,13 +4,15 @@
|
||||
#ifndef FILES__H
|
||||
#define FILES__H
|
||||
|
||||
#include "types.h"
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stdio.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#define file_err(f, ...) do { fprint_line(stderr, f, __VA_ARGS__); exit(EXIT_FAILURE); } while(false)
|
||||
|
||||
struct pat_s; // declared in types.h
|
||||
#define MAX_CACHE_SIZE (1<<14)
|
||||
|
||||
typedef struct file_s {
|
||||
struct file_s *next;
|
||||
@ -18,6 +20,10 @@ typedef struct file_s {
|
||||
char *memory, **lines, *start, *end;
|
||||
size_t nlines;
|
||||
struct pat_s *pats;
|
||||
struct {
|
||||
size_t size, occupancy;
|
||||
match_t **matches;
|
||||
} cache;
|
||||
bool mmapped:1;
|
||||
} file_t;
|
||||
|
||||
@ -37,6 +43,14 @@ __attribute__((pure, nonnull))
|
||||
const char *get_line(file_t *f, size_t line_number);
|
||||
__attribute__((nonnull(1,2,3), format(printf,5,6)))
|
||||
void fprint_line(FILE *dest, file_t *f, const char *start, const char *end, const char *fmt, ...);
|
||||
__attribute__((nonnull(1,3,4,5)))
|
||||
bool cache_get(file_t *f, def_t *defs, const char *str, pat_t *pat, match_t **result);
|
||||
__attribute__((nonnull(1,3,4)))
|
||||
void cache_save(file_t *f, def_t *defs, const char *str, pat_t *pat, match_t *m);
|
||||
__attribute__((nonnull))
|
||||
void cache_prune(file_t *f, const char *start, const char *end);
|
||||
__attribute__((nonnull))
|
||||
void cache_destroy(file_t *f);
|
||||
|
||||
#endif
|
||||
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1
|
||||
|
128
match.c
128
match.c
@ -26,21 +26,10 @@
|
||||
static match_t *unused_matches = NULL;
|
||||
static match_t *in_use_matches = NULL;
|
||||
|
||||
typedef struct {
|
||||
size_t size, occupancy;
|
||||
match_t **matches;
|
||||
} cache_t;
|
||||
|
||||
#define MAX_CACHE_SIZE (1<<14)
|
||||
|
||||
#define MATCHES(...) (match_t*[]){__VA_ARGS__, NULL}
|
||||
|
||||
cache_t cache = {0, 0, NULL};
|
||||
|
||||
__attribute__((nonnull(1)))
|
||||
static inline pat_t *deref(def_t *defs, pat_t *pat);
|
||||
__attribute__((returns_nonnull))
|
||||
static match_t *new_match(def_t *defs, pat_t *pat, const char *start, const char *end, match_t *children[]);
|
||||
__attribute__((nonnull))
|
||||
static match_t *get_capture_by_num(match_t *m, int *n);
|
||||
__attribute__((nonnull, pure))
|
||||
@ -96,99 +85,6 @@ static inline void list_remove(match_t *m, match_dll_t *node)
|
||||
node->next = NULL;
|
||||
}
|
||||
|
||||
static inline size_t hash(const char *str, pat_t *pat)
|
||||
{
|
||||
return (size_t)str + 2*pat->id;
|
||||
}
|
||||
|
||||
static match_t *cache_lookup(def_t *defs, const char *str, pat_t *pat)
|
||||
{
|
||||
if (!cache.matches) return NULL;
|
||||
size_t h = hash(str, pat) & (cache.size-1);
|
||||
for (match_t *c = cache.matches[h]; c; c = c->cache.next) {
|
||||
if (c->pat == pat && c->defs_id == (defs?defs->id:0) && c->start == str)
|
||||
return c;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void cache_remove(match_t *m)
|
||||
{
|
||||
if (!m->cache.home) return;
|
||||
*m->cache.home = m->cache.next;
|
||||
if (m->cache.next) m->cache.next->cache.home = m->cache.home;
|
||||
m->cache.next = NULL;
|
||||
m->cache.home = NULL;
|
||||
remove_ownership(&m);
|
||||
--cache.occupancy;
|
||||
}
|
||||
|
||||
static void cache_save(match_t *m)
|
||||
{
|
||||
if (cache.occupancy+1 > 3*cache.size) {
|
||||
if (cache.size == MAX_CACHE_SIZE) {
|
||||
size_t h = hash(m->start, m->pat) & (cache.size-1);
|
||||
for (int quota = 2; cache.matches[h] && quota > 0; quota--) {
|
||||
match_t *last = cache.matches[h];
|
||||
while (last->cache.next) last = last->cache.next;
|
||||
cache_remove(last);
|
||||
}
|
||||
} else {
|
||||
cache_t old_cache = cache;
|
||||
cache.size = old_cache.size == 0 ? 16 : 2*old_cache.size;
|
||||
cache.matches = new(match_t*[cache.size]);
|
||||
|
||||
// Rehash:
|
||||
if (old_cache.matches) {
|
||||
for (size_t i = 0; i < old_cache.size; i++) {
|
||||
for (match_t *o; (o = old_cache.matches[i]); ) {
|
||||
*o->cache.home = o->cache.next;
|
||||
if (o->cache.next) o->cache.next->cache.home = o->cache.home;
|
||||
size_t h = hash(o->start, o->pat) & (cache.size-1);
|
||||
o->cache.home = &(cache.matches[h]);
|
||||
o->cache.next = cache.matches[h];
|
||||
if (cache.matches[h]) cache.matches[h]->cache.home = &o->cache.next;
|
||||
cache.matches[h] = o;
|
||||
}
|
||||
}
|
||||
free(old_cache.matches);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
size_t h = hash(m->start, m->pat) & (cache.size-1);
|
||||
m->cache.home = &(cache.matches[h]);
|
||||
m->cache.next = cache.matches[h];
|
||||
if (cache.matches[h]) cache.matches[h]->cache.home = &m->cache.next;
|
||||
cache.matches[h] = NULL;
|
||||
add_owner(&cache.matches[h], m);
|
||||
++cache.occupancy;
|
||||
}
|
||||
|
||||
static void cache_prune(const char *start, const char *end)
|
||||
{
|
||||
if (!cache.matches) return;
|
||||
for (size_t i = 0; i < cache.size; i++) {
|
||||
for (match_t *m = cache.matches[i], *next = NULL; m; m = next) {
|
||||
next = m->cache.next;
|
||||
if (m->start < start || (m->end ? m->end : m->start) > end)
|
||||
cache_remove(m);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void cache_destroy(void)
|
||||
{
|
||||
if (!cache.matches) return;
|
||||
for (size_t i = 0; i < cache.size; i++) {
|
||||
while (cache.matches[i])
|
||||
cache_remove(cache.matches[i]);
|
||||
}
|
||||
cache.occupancy = 0;
|
||||
delete(&cache.matches);
|
||||
cache.size = 0;
|
||||
}
|
||||
|
||||
//
|
||||
// If the given pattern is a reference, look it up and return the referenced
|
||||
// pattern. This is used for an optimization to avoid repeated lookups.
|
||||
@ -243,7 +139,7 @@ match_t *next_match(def_t *defs, file_t *f, match_t *prev, pat_t *pat, pat_t *sk
|
||||
if (prev) {
|
||||
str = prev->end > prev->start ? prev->end : prev->end + 1;
|
||||
if (prev->refcount == 0) recycle_if_unused(&prev);
|
||||
cache_prune(str, f->end);
|
||||
cache_prune(f, str, f->end);
|
||||
} else {
|
||||
str = f->start;
|
||||
}
|
||||
@ -470,20 +366,24 @@ static match_t *match(def_t *defs, file_t *f, const char *str, pat_t *pat, bool
|
||||
file_t slice;
|
||||
slice_file(&slice, f, f->start, str);
|
||||
for (const char *pos = &str[-(long)back->min_matchlen];
|
||||
pos >= f->start && (back->max_matchlen == -1 || pos >= &str[-(long)back->max_matchlen]);
|
||||
pos >= f->start && (back->max_matchlen == -1 || pos >= &str[-(int)back->max_matchlen]);
|
||||
pos = prev_char(f, pos)) {
|
||||
cache_destroy(&slice);
|
||||
slice.start = (char*)pos;
|
||||
match_t *m = match(defs, &slice, pos, back, ignorecase);
|
||||
// Match should not go past str (i.e. (<"AB" "B") should match "ABB", but not "AB")
|
||||
if (m && m->end != str)
|
||||
recycle_if_unused(&m);
|
||||
else if (m)
|
||||
else if (m) {
|
||||
cache_destroy(&slice);
|
||||
return new_match(defs, pat, str, str, MATCHES(m));
|
||||
}
|
||||
if (pos == f->start) break;
|
||||
// To prevent extreme performance degradation, don't keep
|
||||
// walking backwards endlessly over newlines.
|
||||
if (back->max_matchlen == -1 && *pos == '\n') break;
|
||||
}
|
||||
cache_destroy(&slice);
|
||||
return NULL;
|
||||
}
|
||||
case BP_BEFORE: {
|
||||
@ -548,9 +448,11 @@ static match_t *match(def_t *defs, file_t *f, const char *str, pat_t *pat, bool
|
||||
match_t *m2 = next_match(defs, &slice, NULL, pat->args.multiple.second, NULL, ignorecase);
|
||||
if ((!m2 && pat->type == BP_MATCH) || (m2 && pat->type == BP_NOT_MATCH)) {
|
||||
if (m2) recycle_if_unused(&m2);
|
||||
cache_destroy(&slice);
|
||||
recycle_if_unused(&m1);
|
||||
return NULL;
|
||||
}
|
||||
cache_destroy(&slice);
|
||||
return new_match(defs, pat, m1->start, m1->end, (pat->type == BP_MATCH) ? MATCHES(m2) : NULL);
|
||||
}
|
||||
case BP_REPLACE: {
|
||||
@ -562,8 +464,9 @@ static match_t *match(def_t *defs, file_t *f, const char *str, pat_t *pat, bool
|
||||
return new_match(defs, pat, str, p ? p->end : str, MATCHES(p));
|
||||
}
|
||||
case BP_REF: {
|
||||
match_t *cached = cache_lookup(defs, str, pat);
|
||||
if (cached) return cached->end == NULL ? NULL : cached;
|
||||
match_t *cached;
|
||||
if (cache_get(f, defs, str, pat, &cached))
|
||||
return cached;
|
||||
|
||||
def_t *def = lookup(defs, pat->args.ref.len, pat->args.ref.name);
|
||||
if (def == NULL)
|
||||
@ -593,8 +496,7 @@ static match_t *match(def_t *defs, file_t *f, const char *str, pat_t *pat, bool
|
||||
const char *prev = str;
|
||||
match_t *m = match(&defs2, f, str, ref, ignorecase);
|
||||
if (m == NULL) {
|
||||
// Store placeholder:
|
||||
cache_save(new_match(defs, pat, str, NULL, NULL));
|
||||
cache_save(f, defs, str, pat, NULL);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@ -618,7 +520,7 @@ static match_t *match(def_t *defs, file_t *f, const char *str, pat_t *pat, bool
|
||||
// results.
|
||||
// OPTIMIZE: remove this if necessary
|
||||
match_t *wrap = new_match(defs, pat, m->start, m->end, MATCHES(m));
|
||||
cache_save(wrap);
|
||||
cache_save(f, defs, str, pat, wrap);
|
||||
|
||||
if (rec_op.args.leftrec.match)
|
||||
remove_ownership(&rec_op.args.leftrec.match);
|
||||
@ -717,7 +619,7 @@ match_t *get_capture(match_t *m, const char **id)
|
||||
//
|
||||
// Return a match object which can be used (may be allocated or recycled).
|
||||
//
|
||||
static match_t *new_match(def_t *defs, pat_t *pat, const char *start, const char *end, match_t *children[])
|
||||
match_t *new_match(def_t *defs, pat_t *pat, const char *start, const char *end, match_t *children[])
|
||||
{
|
||||
match_t *m;
|
||||
if (unused_matches) {
|
||||
|
4
match.h
4
match.h
@ -7,8 +7,11 @@
|
||||
#include <stdbool.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "files.h"
|
||||
#include "types.h"
|
||||
|
||||
__attribute__((returns_nonnull))
|
||||
match_t *new_match(def_t *defs, pat_t *pat, const char *start, const char *end, match_t *children[]);
|
||||
__attribute__((nonnull(2,4)))
|
||||
match_t *next_match(def_t *defs, file_t *f, match_t *prev, pat_t *pat, pat_t *skip, bool ignorecase);
|
||||
__attribute__((nonnull))
|
||||
@ -17,7 +20,6 @@ __attribute__((nonnull))
|
||||
void recycle_if_unused(match_t **at_m);
|
||||
size_t free_all_matches(void);
|
||||
size_t recycle_all_matches(void);
|
||||
void cache_destroy(void);
|
||||
|
||||
#endif
|
||||
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1
|
||||
|
Loading…
Reference in New Issue
Block a user