Overhaul of memory tracking and left recursion. Added explanation doc

for left recursion and fixed all visible memory leaks.
This commit is contained in:
Bruce Hill 2021-01-14 19:21:31 -08:00
parent 9238ffea88
commit 3cd61e3abc
8 changed files with 281 additions and 124 deletions

View File

@ -4,6 +4,7 @@ PREFIX=/usr/local
SYSCONFDIR=/etc
CFLAGS=-std=c99 -Werror -D_XOPEN_SOURCE=700 -D_GNU_SOURCE -D_POSIX_C_SOURCE=200809L
CWARN=-Wall -Wpedantic -Wextra -Wsign-conversion -Wtype-limits -Wunused-result
EXTRA_FLAGS=
G=
O=-O3
@ -13,10 +14,10 @@ OBJFILES=$(CFILES:.c=.o)
all: $(NAME)
%.o: %.c %.h types.h
$(CC) -c $(CFLAGS) $(CWARN) $(G) $(O) -o $@ $<
$(CC) -c $(CFLAGS) $(EXTRA_CFLAGS) $(CWARN) $(G) $(O) -o $@ $<
$(NAME): $(OBJFILES) $(NAME).c
$(CC) $(CFLAGS) $(CWARN) $(G) $(O) -o $@ $(OBJFILES) $(NAME).c
$(CC) $(CFLAGS) $(EXTRA_CFLAGS) $(CWARN) $(G) $(O) -o $@ $(OBJFILES) $(NAME).c
clean:
rm -f $(NAME) $(OBJFILES)

41
bp.c
View File

@ -119,9 +119,8 @@ static int process_file(def_t *defs, const char *filename, vm_op_t *pattern, uns
}
}
if (m != NULL)
destroy_match(&m);
recycle_if_unused(&m);
check(recycle_all_matches() == 0, "Memory leak: there should no longer be any matches in use at this point.");
destroy_file(&f);
return success;
@ -134,12 +133,16 @@ int main(int argc, char *argv[])
unsigned int flags = 0;
char *flag = NULL;
char path[PATH_MAX] = {0};
const char *rule = "find-all";
const char *mode = "find-all";
def_t *defs = NULL;
file_t *loaded_files = NULL;
// Define an opcode that is just a reference to the rule `pattern`
file_t *pat_file = spoof_file(&loaded_files, "<pattern>", "pattern");
vm_op_t *pattern = bp_pattern(pat_file, pat_file->contents);
// Load builtins:
if (access("/etc/xdg/bp/builtins.bp", R_OK) != -1) {
file_t *f = load_file(&loaded_files, "/etc/xdg/bp/builtins.bp");
@ -176,13 +179,11 @@ int main(int argc, char *argv[])
} else if (FLAG("--replace") || FLAG("-r")) {
// TODO: spoof file as sprintf("pattern => '%s'", flag)
// except that would require handling edge cases like quotation marks etc.
file_t *pat_file = spoof_file(&loaded_files, "<pattern>", "pattern");
vm_op_t *patref = bp_pattern(pat_file, pat_file->contents);
file_t *replace_file = spoof_file(&loaded_files, "<replace argument>", flag);
vm_op_t *rep = bp_replacement(replace_file, patref, replace_file->contents);
vm_op_t *rep = bp_replacement(replace_file, pattern, replace_file->contents);
check(rep, "Replacement failed to compile: %s", flag);
defs = with_def(defs, replace_file, strlen("replacement"), "replacement", rep);
rule = "replace-all";
mode = "replace-all";
} else if (FLAG("--grammar") || FLAG("-g")) {
file_t *f = load_file(&loaded_files, flag);
if (f == NULL) {
@ -229,7 +230,7 @@ int main(int argc, char *argv[])
defs = with_def(defs, arg_file, strlen("pattern"), "pattern", p);
++npatterns;
} else if (FLAG("--mode") || FLAG("-m")) {
rule = flag;
mode = flag;
} else if (argv[i][0] == '-' && argv[i][1] && argv[i][1] != '-') { // single-char flags
for (char *c = &argv[i][1]; *c; ++c) {
switch (*c) {
@ -269,16 +270,20 @@ int main(int argc, char *argv[])
print_options |= PRINT_COLOR | PRINT_LINE_NUMBERS;
}
def_t *pattern_def = lookup(defs, rule);
check(pattern_def != NULL, "No such rule: '%s'", rule);
vm_op_t *pattern = pattern_def->op;
// Define an opcode that is just a reference to the overarching mode (e.g. find-all)
if (lookup(defs, mode) == NULL) {
printf("The mode '%s' is not defined.\n", mode);
return 1;
}
file_t *mode_file = spoof_file(&loaded_files, "<mode>", mode);
vm_op_t *mode_op = bp_pattern(mode_file, mode_file->contents);
int found = 0;
if (flags & BP_JSON) printf("[");
if (i < argc) {
// Files pass in as command line args:
for (int nfiles = 0; i < argc; nfiles++, i++) {
found += process_file(defs, argv[i], pattern, flags);
found += process_file(defs, argv[i], mode_op, flags);
}
} else if (isatty(STDIN_FILENO)) {
// No files, no piped in input, so use * **/*:
@ -286,21 +291,27 @@ int main(int argc, char *argv[])
glob("*", 0, NULL, &globbuf);
glob("**/*", GLOB_APPEND, NULL, &globbuf);
for (size_t i = 0; i < globbuf.gl_pathc; i++) {
found += process_file(defs, globbuf.gl_pathv[i], pattern, flags);
found += process_file(defs, globbuf.gl_pathv[i], mode_op, flags);
}
globfree(&globbuf);
} else {
// Piped in input:
found += process_file(defs, NULL, pattern, flags);
found += process_file(defs, NULL, mode_op, flags);
}
if (flags & BP_JSON) printf("]\n");
#ifdef FREE_ON_EXIT
// This code frees up all residual heap-allocated memory. Since the program
// is about to exit, this step is unnecessary. However, it is useful for
// tracking down memory leaks.
free_defs(&defs, NULL);
while (loaded_files) {
file_t *next = loaded_files->next;
destroy_file(&loaded_files);
loaded_files = next;
}
free_all_matches();
#endif
return (found > 0) ? 0 : 1;
}

View File

@ -89,7 +89,7 @@ file_t *load_file(file_t **files, const char *filename)
//
// Create a virtual file from a string.
//
file_t *spoof_file(file_t **files, const char *filename, char *text)
file_t *spoof_file(file_t **files, const char *filename, const char *text)
{
if (filename == NULL) filename = "";
file_t *f = new(file_t);

View File

@ -19,7 +19,7 @@ typedef struct file_s {
file_t *load_file(file_t **files, const char *filename);
__attribute__((nonnull(3), returns_nonnull))
file_t *spoof_file(file_t **files, const char *filename, char *text);
file_t *spoof_file(file_t **files, const char *filename, const char *text);
__attribute__((nonnull))
void intern_file(file_t *f);
__attribute__((nonnull))

68
left-recursion.md Normal file
View File

@ -0,0 +1,68 @@
# Left Recursion in BP
Left recursion presents a special difficulty for parsing expression grammars.
A regular recursive rule looks like this:
```
rec: "{" *rec "}"
```
This is a relatively simple case to handle, because we're always making forward
progress. In other words, calling `match(<rec>, str)` might call
`match(<rec>, str + 1)` but it will never recursively call the same function
with the same arguments: `match(<rec>, str)`.
Left-recursive rules, on the other hand, are rules that may attempt to match
themselves at the same position. For example:
```
laugh: (laugh "ha") / "Ha"
```
The rule above is functionally equivalent to `laugh: "Ha" (*"ha")`, but it's
written in a recursive style. It should be obvious by inspection that the
string `Hahaha` matches both versions of the rule, but a naive implementation
of `match()` would cause `match(<laugh>, str)` to call `match(<laugh>, str)`
before doing anything else, which results in infinite recursion and a stack
overflow. One option is to simply detect left recursion and cause a compilation
error (this is what LPEG does). However, it is possible to actually match left
recursive rules without overflowing the stack.
## Matching Left Recursion
The solution used in BP is the following:
1. Whenever matching a named rule, `foo`, against string `str`, first get the
originally defined pattern for that rule. Call this
`foo-original-definition`.
2. Temporarily define `match(<foo>, str) := signal left recursion and return
Fail` (In other words, `<foo>` is temporarily defined to signal left
recursion and then fail to match if matching against the string `str`.)
3. Run `result := match(<foo-original-definition>, str)`.
4. If the match failed, then return failure.
5. If the match was successful and did not trigger left recursion, return `result`.
6. If the match was successful, but did trigger left recursion, then:
a. If `result` is not longer than any previous `result`, stop looping and
return the longest `result` to avoid an infinite loop.
b. Otherwise: temporarily define `match(<foo>, str) := signal left recursion and return <result>`
c. Go to step 3.
This handles left recursion as a simple loop and successfully matches left
recursive patterns, even ones that are indirectly or nontrivially left
recursive.
## Example
Consider the rule `laugh: laugh "ha" / "Ha"` being applied to the input text `"Hahaha!"`:
|Definition of `match(<laugh>, "Hahaha!")` | Result of `match(<laugh "ha" / "Ha">, "Hahaha!")` |
|------------------------------------------|---------------------------------------------------|
|`Fail` | `Match{"Ha"}` |
|`Match{"Ha"}` | `Match{"Haha"}` |
|`Match{"Haha"}` | `Match{"Hahaha"}` |
|`Match{"Hahaha"}` | `Match{"Ha"}` (Forward progress stops being made) |
As you can see in this example, each successive iteration builds up a final
match incrementally until progress is no longer being made. At that point,
the longest match is returned: `Match{"Hahaha"}`.

View File

@ -39,7 +39,7 @@ enum VMOpcode {
VM_REF,
VM_BACKREF,
VM_NODENT,
VM_CANNED,
VM_LEFTRECURSION,
};
struct match_s; // forward declared to resolve circular struct defs
@ -80,7 +80,7 @@ typedef struct vm_op_s {
unsigned int visits;
const char *at;
struct vm_op_s *fallback;
} canned;
} leftrec;
struct vm_op_s *pat;
} args;
} vm_op_t;
@ -93,7 +93,9 @@ typedef struct match_s {
const char *start, *end;
struct match_s *child, *nextsibling;
vm_op_t *op;
unsigned int refcount;
// Intrusive linked list nodes for garbage collection:
struct match_s **atme, *next;
int refcount;
} match_t;
//

270
vm.c
View File

@ -12,25 +12,31 @@
#include "utils.h"
#include "vm.h"
// Linked list operations:
#define LL_PREPEND(head, node) do { (node)->atme = &(head); (node)->next = head; if (head) (head)->atme = &(node)->next; head = node; } while(0)
#define LL_REMOVE(node) do { *(node)->atme = (node)->next; if ((node)->next) (node)->next->atme = (node)->atme; } while(0)
// Refcounting ownership-setting macros:
#define ADD_OWNER(owner, m) do { owner = m; ++(m)->refcount; } while(0)
#define REMOVE_OWNERSHIP(owner) do { if (owner) { --(owner)->refcount; recycle_if_unused(&(owner)); owner = NULL; } } while(0)
// New match objects are either recycled from unused match objects or allocated
// from the heap. While it is in use, the match object is stored in the
// `in_use_matches` linked list. Once it is no longer needed, it is moved to
// the `unused_matches` linked list so it can be reused without the need for
// additional calls to malloc/free. Thus, it is an invariant that every match
// object is in one of these two lists:
static match_t *in_use_matches = NULL, *unused_matches = NULL;
__attribute__((nonnull, pure))
static inline const char *next_char(file_t *f, const char *str);
__attribute__((nonnull))
static const char *match_backref(const char *str, vm_op_t *op, match_t *cap, unsigned int flags);
__attribute__((hot, nonnull(2,3,4)))
static match_t *_match(def_t *defs, file_t *f, const char *str, vm_op_t *op, unsigned int flags);
__attribute__((nonnull))
static match_t *get_capture_by_num(match_t *m, int *n);
__attribute__((nonnull, pure))
static match_t *get_capture_by_name(match_t *m, const char *name);
static inline void set_owner(match_t *m, match_t **owner)
{
check(owner != &m->child, "Circular ownership");
check(owner != &m->nextsibling, "Circular ownership");
*owner = m;
++m->refcount;
}
//
// Return the location of the next character or UTF8 codepoint.
// (i.e. skip forward one codepoint at a time, not one byte at a time)
@ -51,22 +57,6 @@ static inline const char *next_char(file_t *f, const char *str)
return str;
}
//
// Recursively deallocate a match object and set to NULL
//
void destroy_match(match_t **m)
{
if (*m == NULL) return;
if (--(*m)->refcount > 0) {
*m = NULL;
return;
}
destroy_match(&((*m)->child));
destroy_match(&((*m)->nextsibling));
free(*m);
*m = NULL;
}
//
// Attempt to match text against a previously captured value.
// Return the character position after the backref has matched, or NULL if no match has occurred.
@ -131,22 +121,26 @@ static const char *match_backref(const char *str, vm_op_t *op, match_t *cap, uns
// a match struct, or NULL if no match is found.
// The returned value should be free()'d to avoid memory leaking.
//
static match_t *_match(def_t *defs, file_t *f, const char *str, vm_op_t *op, unsigned int flags)
match_t *match(def_t *defs, file_t *f, const char *str, vm_op_t *op, unsigned int flags)
{
switch (op->type) {
case VM_CANNED: {
if (str == op->args.canned.at) {
++op->args.canned.visits;
// TODO: deep copy?
return op->args.canned.match;
case VM_LEFTRECURSION: {
// Left recursion occurs when a pattern directly or indirectly
// invokes itself at the same position in the text. It's handled as
// a special case, but if a pattern invokes itself at a later
// point, it can be handled with normal recursion.
// See: left-recursion.md for more details.
if (str == op->args.leftrec.at) {
++op->args.leftrec.visits;
return op->args.leftrec.match;
} else {
return _match(defs, f, str, op->args.canned.fallback, flags);
return match(defs, f, str, op->args.leftrec.fallback, flags);
}
}
case VM_ANYCHAR: {
if (str >= f->end || *str == '\n')
return NULL;
match_t *m = new(match_t);
match_t *m = new_match();
m->op = op;
m->start = str;
m->end = next_char(f, str);
@ -157,7 +151,7 @@ static match_t *_match(def_t *defs, file_t *f, const char *str, vm_op_t *op, uns
if ((flags & BP_IGNORECASE) ? memicmp(str, op->args.s, (size_t)op->len) != 0
: memcmp(str, op->args.s, (size_t)op->len) != 0)
return NULL;
match_t *m = new(match_t);
match_t *m = new_match();
m->op = op;
m->start = str;
m->end = str + op->len;
@ -167,26 +161,26 @@ static match_t *_match(def_t *defs, file_t *f, const char *str, vm_op_t *op, uns
if (str >= f->end) return NULL;
if ((unsigned char)*str < op->args.range.low || (unsigned char)*str > op->args.range.high)
return NULL;
match_t *m = new(match_t);
match_t *m = new_match();
m->op = op;
m->start = str;
m->end = str + 1;
return m;
}
case VM_NOT: {
match_t *m = _match(defs, f, str, op->args.pat, flags);
match_t *m = match(defs, f, str, op->args.pat, flags);
if (m != NULL) {
destroy_match(&m);
recycle_if_unused(&m);
return NULL;
}
m = new(match_t);
m = new_match();
m->op = op;
m->start = str;
m->end = str;
return m;
}
case VM_UPTO_AND: {
match_t *m = new(match_t);
match_t *m = new_match();
m->start = str;
m->op = op;
@ -201,9 +195,9 @@ static match_t *_match(def_t *defs, file_t *f, const char *str, vm_op_t *op, uns
for (const char *prev = NULL; prev < str; ) {
prev = str;
if (pat) {
match_t *p = _match(defs, f, str, pat, flags);
match_t *p = match(defs, f, str, pat, flags);
if (p != NULL) {
set_owner(p, dest);
ADD_OWNER(*dest, p);
m->end = p->end;
return m;
}
@ -212,9 +206,9 @@ static match_t *_match(def_t *defs, file_t *f, const char *str, vm_op_t *op, uns
return m;
}
if (skip) {
match_t *s = _match(defs, f, str, skip, flags);
match_t *s = match(defs, f, str, skip, flags);
if (s != NULL) {
set_owner(s, dest);
ADD_OWNER(*dest, s);
dest = &s->nextsibling;
str = s->end;
continue;
@ -226,11 +220,11 @@ static match_t *_match(def_t *defs, file_t *f, const char *str, vm_op_t *op, uns
if (str < f->end && *str != '\n')
str = next_char(f, str);
}
destroy_match(&m);
recycle_if_unused(&m);
return NULL;
}
case VM_REPEAT: {
match_t *m = new(match_t);
match_t *m = new_match();
m->start = str;
m->end = str;
m->op = op;
@ -243,14 +237,14 @@ static match_t *_match(def_t *defs, file_t *f, const char *str, vm_op_t *op, uns
// Separator
match_t *sep = NULL;
if (op->args.repetitions.sep != NULL && reps > 0) {
sep = _match(defs, f, str, op->args.repetitions.sep, flags);
sep = match(defs, f, str, op->args.repetitions.sep, flags);
if (sep == NULL) break;
str = sep->end;
}
match_t *p = _match(defs, f, str, op->args.repetitions.repeat_pat, flags);
match_t *p = match(defs, f, str, op->args.repetitions.repeat_pat, flags);
if (p == NULL) {
str = start;
destroy_match(&sep);
recycle_if_unused(&sep);
break;
}
if (p->end == start && reps > 0) {
@ -260,8 +254,8 @@ static match_t *_match(def_t *defs, file_t *f, const char *str, vm_op_t *op, uns
// loop either. We know that this will continue to loop
// until reps==max, so let's just cut to the chase instead
// of looping infinitely.
destroy_match(&sep);
destroy_match(&p);
recycle_if_unused(&sep);
recycle_if_unused(&p);
if (op->args.repetitions.max == -1)
reps = ~(size_t)0;
else
@ -269,16 +263,16 @@ static match_t *_match(def_t *defs, file_t *f, const char *str, vm_op_t *op, uns
break;
}
if (sep) {
set_owner(sep, dest);
ADD_OWNER(*dest, sep);
dest = &sep->nextsibling;
}
set_owner(p, dest);
ADD_OWNER(*dest, p);
dest = &p->nextsibling;
str = p->end;
}
if (reps < (size_t)op->args.repetitions.min) {
destroy_match(&m);
recycle_if_unused(&m);
return NULL;
}
m->end = str;
@ -288,75 +282,75 @@ static match_t *_match(def_t *defs, file_t *f, const char *str, vm_op_t *op, uns
ssize_t backtrack = op->args.pat->len;
check(backtrack != -1, "'<' is only allowed for fixed-length operations");
if (str - backtrack < f->contents) return NULL;
match_t *before = _match(defs, f, str - backtrack, op->args.pat, flags);
match_t *before = match(defs, f, str - backtrack, op->args.pat, flags);
if (before == NULL) return NULL;
match_t *m = new(match_t);
match_t *m = new_match();
m->start = str;
m->end = str;
m->op = op;
set_owner(before, &m->child);
ADD_OWNER(m->child, before);
return m;
}
case VM_BEFORE: {
match_t *after = _match(defs, f, str, op->args.pat, flags);
match_t *after = match(defs, f, str, op->args.pat, flags);
if (after == NULL) return NULL;
match_t *m = new(match_t);
match_t *m = new_match();
m->start = str;
m->end = str;
m->op = op;
set_owner(after, &m->child);
ADD_OWNER(m->child, after);
return m;
}
case VM_CAPTURE: {
match_t *p = _match(defs, f, str, op->args.pat, flags);
match_t *p = match(defs, f, str, op->args.pat, flags);
if (p == NULL) return NULL;
match_t *m = new(match_t);
match_t *m = new_match();
m->start = str;
m->end = p->end;
m->op = op;
set_owner(p, &m->child);
ADD_OWNER(m->child, p);
return m;
}
case VM_HIDE: {
match_t *p = _match(defs, f, str, op->args.pat, flags);
match_t *p = match(defs, f, str, op->args.pat, flags);
if (p == NULL) return NULL;
match_t *m = new(match_t);
match_t *m = new_match();
m->start = str;
m->end = p->end;
m->op = op;
set_owner(p, &m->child);
ADD_OWNER(m->child, p);
return m;
}
case VM_OTHERWISE: {
match_t *m = _match(defs, f, str, op->args.multiple.first, flags);
if (m == NULL) m = _match(defs, f, str, op->args.multiple.second, flags);
match_t *m = match(defs, f, str, op->args.multiple.first, flags);
if (m == NULL) m = match(defs, f, str, op->args.multiple.second, flags);
return m;
}
case VM_CHAIN: {
match_t *m1 = _match(defs, f, str, op->args.multiple.first, flags);
match_t *m1 = match(defs, f, str, op->args.multiple.first, flags);
if (m1 == NULL) return NULL;
match_t *m2;
{ // Push backrefs and run matching, then cleanup
def_t *defs2 = with_backrefs(defs, f, m1);
m2 = _match(defs2, f, m1->end, op->args.multiple.second, flags);
m2 = match(defs2, f, m1->end, op->args.multiple.second, flags);
free_defs(&defs2, defs);
}
if (m2 == NULL) {
destroy_match(&m1);
recycle_if_unused(&m1);
return NULL;
}
match_t *m = new(match_t);
match_t *m = new_match();
m->start = str;
m->end = m2->end;
m->op = op;
set_owner(m1, &m->child);
set_owner(m2, &m1->nextsibling);
ADD_OWNER(m->child, m1);
ADD_OWNER(m1->nextsibling, m2);
return m;
}
case VM_EQUAL: case VM_NOT_EQUAL: {
match_t *m1 = _match(defs, f, str, op->args.multiple.first, flags);
match_t *m1 = match(defs, f, str, op->args.multiple.first, flags);
if (m1 == NULL) return NULL;
// <p1>==<p2> matches iff the text of <p1> matches <p2>
@ -368,35 +362,35 @@ static match_t *_match(def_t *defs, file_t *f, const char *str, vm_op_t *op, uns
.nlines=1 + get_line_number(f, m1->end)-get_line_number(f, m1->start),
.mmapped=f->mmapped,
};
match_t *m2 = _match(defs, &inner, str, op->args.multiple.second, flags);
match_t *m2 = match(defs, &inner, str, op->args.multiple.second, flags);
if ((m2 == NULL) == (op->type == VM_EQUAL)) {
destroy_match(&m1);
if (m2 != NULL) destroy_match(&m2);
recycle_if_unused(&m1);
if (m2 != NULL) recycle_if_unused(&m2);
return NULL;
}
match_t *m = new(match_t);
match_t *m = new_match();
m->start = m1->start;
m->end = m1->end;
m->op = op;
set_owner(m1, &m->child);
ADD_OWNER(m->child, m1);
if (op->type == VM_EQUAL) {
set_owner(m2, &m1->nextsibling);
ADD_OWNER(m1->nextsibling, m2);
} else {
destroy_match(&m2);
recycle_if_unused(&m2);
}
return m;
}
case VM_REPLACE: {
match_t *p = NULL;
if (op->args.replace.pat) {
p = _match(defs, f, str, op->args.replace.pat, flags);
p = match(defs, f, str, op->args.replace.pat, flags);
if (p == NULL) return NULL;
}
match_t *m = new(match_t);
match_t *m = new_match();
m->start = str;
m->op = op;
if (p) {
set_owner(p, &m->child);
ADD_OWNER(m->child, p);
m->end = p->end;
} else {
m->end = m->start;
@ -408,12 +402,12 @@ static match_t *_match(def_t *defs, file_t *f, const char *str, vm_op_t *op, uns
check(def != NULL, "Unknown identifier: '%s'", op->args.s);
vm_op_t *ref = def->op;
vm_op_t fail_op = {
.type = VM_CANNED,
vm_op_t rec_op = {
.type = VM_LEFTRECURSION,
.start = ref->start,
.end = ref->end,
.len = 0,
.args.canned = {
.args.leftrec = {
.match = NULL,
.visits = 0,
.at = str,
@ -424,29 +418,42 @@ static match_t *_match(def_t *defs, file_t *f, const char *str, vm_op_t *op, uns
.namelen = def->namelen,
.name = def->name,
.file = def->file,
.op = &fail_op,
.op = &rec_op,
.next = defs,
};
const char *prev = str;
match_t *m = _match(&defs2, f, str, ref, flags);
match_t *m = match(&defs2, f, str, ref, flags);
if (m == NULL) return NULL;
while (fail_op.args.canned.visits > 0 && m->end > prev) {
fail_op.args.canned.visits = 0;
fail_op.args.canned.match = m;
while (rec_op.args.leftrec.visits > 0) {
rec_op.args.leftrec.visits = 0;
REMOVE_OWNERSHIP(rec_op.args.leftrec.match);
ADD_OWNER(rec_op.args.leftrec.match, m);
prev = m->end;
match_t *m2 = _match(&defs2, f, str, ref, flags);
match_t *m2 = match(&defs2, f, str, ref, flags);
if (m2 == NULL) break;
if (m2->end <= prev) {
recycle_if_unused(&m2);
break;
}
m = m2;
}
if (rec_op.args.leftrec.match) {
// Ensure that `m` isn't garbage collected right now, but do
// clean up the recursive match result if it's not needed.
++m->refcount;
REMOVE_OWNERSHIP(rec_op.args.leftrec.match);
--m->refcount;
}
return m;
}
case VM_BACKREF: {
const char *end = match_backref(str, op, op->args.backref, flags);
if (end == NULL) return NULL;
match_t *m = new(match_t);
match_t *m = new_match();
m->op = op;
m->start = str;
m->end = end;
@ -473,7 +480,7 @@ static match_t *_match(def_t *defs, file_t *f, const char *str, vm_op_t *op, uns
if (str[i] != denter || &str[i] >= f->end) return NULL;
}
match_t *m = new(match_t);
match_t *m = new_match();
m->start = start;
m->end = &str[dents];
m->op = op;
@ -540,14 +547,79 @@ match_t *get_capture(match_t *m, const char **id)
}
//
// Wrapper function for _match() to kickstart the recursion info.
// Return a match object which can be used (may be allocated or recycled).
//
match_t *match(def_t *defs, file_t *f, const char *str, vm_op_t *op, unsigned int flags)
match_t *new_match(void)
{
return _match(defs, f, str, op, flags);
match_t *m;
if (unused_matches) {
m = unused_matches;
LL_REMOVE(m);
memset(m, 0, sizeof(match_t));
} else {
m = new(match_t);
}
// Keep track of the object:
LL_PREPEND(in_use_matches, m);
return m;
}
//
// If the given match is not currently a child member of another match (or
// otherwise reserved) then put it back in the pool of unused match objects.
//
void recycle_if_unused(match_t **at_m)
{
match_t *m = *at_m;
if (m == NULL) return;
if (m->refcount > 0) {
*at_m = NULL;
return;
}
REMOVE_OWNERSHIP(m->child);
REMOVE_OWNERSHIP(m->nextsibling);
LL_REMOVE(m);
memset(m, 0, sizeof(match_t));
LL_PREPEND(unused_matches, m);
*at_m = NULL;
}
//
// Force all match objects into the pool of unused match objects.
//
size_t recycle_all_matches(void)
{
size_t count = 0;
while (in_use_matches) {
match_t *m = in_use_matches;
LL_REMOVE(m);
LL_PREPEND(unused_matches, m);
++count;
}
return count;
}
//
// Free all match objects in memory.
//
size_t free_all_matches(void)
{
size_t count = 0;
recycle_all_matches();
while (unused_matches) {
match_t *m = unused_matches;
LL_REMOVE(m);
free(m);
++count;
}
return count;
}
//
// Deallocate memory associated with an op
//
void destroy_op(vm_op_t *op)
{
switch (op->type) {

9
vm.h
View File

@ -8,14 +8,17 @@
#include "types.h"
__attribute__((nonnull(2,3,4)))
__attribute__((hot, nonnull(2,3,4)))
match_t *match(def_t *defs, file_t *f, const char *str, vm_op_t *op, unsigned int flags);
__attribute__((nonnull))
void destroy_match(match_t **m);
__attribute__((nonnull))
match_t *get_capture(match_t *m, const char **id);
__attribute__((nonnull))
void destroy_op(vm_op_t *op);
match_t *new_match(void);
size_t free_all_matches(void);
__attribute__((nonnull))
void recycle_if_unused(match_t **at_m);
size_t recycle_all_matches(void);
#endif
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1