diff options
| author | Bruce Hill <bruce@bruce-hill.com> | 2021-01-13 18:56:22 -0800 |
|---|---|---|
| committer | Bruce Hill <bruce@bruce-hill.com> | 2021-01-13 18:56:22 -0800 |
| commit | e7f94bbf50e7d68c3294efc4d437598b8b56b92d (patch) | |
| tree | 8b5a4f1318dec4690bc4ebb9ab48e8fdf2d5d8b7 | |
| parent | 9a6c0ee9ccf22d635a4bfdff6808b5716cdeca64 (diff) | |
Working towards zero memory leakage
| -rw-r--r-- | bp.c | 59 | ||||
| -rw-r--r-- | compiler.c | 18 | ||||
| -rw-r--r-- | file_loader.c | 19 | ||||
| -rw-r--r-- | file_loader.h | 9 | ||||
| -rw-r--r-- | grammar.c | 15 | ||||
| -rw-r--r-- | grammar.h | 6 | ||||
| -rw-r--r-- | types.h | 9 | ||||
| -rw-r--r-- | vm.c | 241 | ||||
| -rw-r--r-- | vm.h | 2 |
9 files changed, 222 insertions, 156 deletions
@@ -74,13 +74,14 @@ static int process_file(def_t *defs, const char *filename, vm_op_t *pattern, uns { static int printed_matches = 0; int success = 0; - file_t *f = load_file(filename); + file_t *f = load_file(NULL, filename); check(f, "Could not open file: %s", filename); if (flags & BP_INPLACE) // Need to do this before matching intern_file(f); match_t *m = match(defs, f, f->contents, pattern, flags); if (m && print_errors(f, m, print_options) > 0) _exit(1); + if (m != NULL && m->end > m->start + 1) { success = 1; ++printed_matches; @@ -137,12 +138,18 @@ int main(int argc, char *argv[]) def_t *defs = NULL; + file_t *loaded_files = NULL; + // Load builtins: - if (access("/etc/xdg/bp/builtins.bp", R_OK) != -1) - defs = load_grammar(defs, load_file("/etc/xdg/bp/builtins.bp")); // Keep in memory for debugging output + if (access("/etc/xdg/bp/builtins.bp", R_OK) != -1) { + file_t *f = load_file(&loaded_files, "/etc/xdg/bp/builtins.bp"); + defs = load_grammar(defs, f); + } sprintf(path, "%s/.config/bp/builtins.bp", getenv("HOME")); - if (access(path, R_OK) != -1) - defs = load_grammar(defs, load_file(path)); // Keep in memory for debugging output + if (access(path, R_OK) != -1) { + file_t *f = load_file(&loaded_files, path); + defs = load_grammar(defs, f); + } int i, npatterns = 0; check(argc > 1, "%s", usage); @@ -169,22 +176,22 @@ int main(int argc, char *argv[]) } else if (FLAG("--replace") || FLAG("-r")) { // TODO: spoof file as sprintf("pattern => '%s'", flag) // except that would require handling edge cases like quotation marks etc. - file_t *pat_file = spoof_file("<pattern>", "pattern"); + file_t *pat_file = spoof_file(&loaded_files, "<pattern>", "pattern"); vm_op_t *patref = bp_pattern(pat_file, pat_file->contents); - file_t *replace_file = spoof_file("<replace argument>", flag); + file_t *replace_file = spoof_file(&loaded_files, "<replace argument>", flag); vm_op_t *rep = bp_replacement(replace_file, patref, replace_file->contents); check(rep, "Replacement failed to compile: %s", flag); - defs = with_def(defs, replace_file, "replacement", rep); + defs = with_def(defs, replace_file, strlen("replacement"), "replacement", rep); rule = "replace-all"; } else if (FLAG("--grammar") || FLAG("-g")) { - file_t *f = load_file(flag); + file_t *f = load_file(&loaded_files, flag); if (f == NULL) { sprintf(path, "%s/.config/bp/%s.bp", getenv("HOME"), flag); - f = load_file(path); + f = load_file(&loaded_files, path); } if (f == NULL) { sprintf(path, "/etc/xdg/bp/%s.bp", flag); - f = load_file(path); + f = load_file(&loaded_files, path); } check(f != NULL, "Couldn't find grammar: %s", flag); defs = load_grammar(defs, f); // Keep in memory for debug output @@ -194,32 +201,32 @@ int main(int argc, char *argv[]) check(eq, "Rule definitions must include an ':'\n\n%s", usage); *eq = '\0'; char *src = ++eq; - file_t *def_file = spoof_file(def, src); + file_t *def_file = spoof_file(&loaded_files, def, src); vm_op_t *pat = bp_pattern(def_file, def_file->contents); check(pat, "Failed to compile pattern: %s", flag); - defs = with_def(defs, def_file, def, pat); + defs = with_def(defs, def_file, strlen(def), def, pat); } else if (FLAG("--define-string") || FLAG("-D")) { char *def = flag; char *eq = strchr(def, ':'); check(eq, "Rule definitions must include an ':'\n\n%s", usage); *eq = '\0'; char *src = ++eq; - file_t *def_file = spoof_file(def, src); + file_t *def_file = spoof_file(&loaded_files, def, src); vm_op_t *pat = bp_stringpattern(def_file, def_file->contents); check(pat, "Failed to compile pattern: %s", src); - defs = with_def(defs, def_file, def, pat); + defs = with_def(defs, def_file, strlen(def), def, pat); } else if (FLAG("--pattern") || FLAG("-p")) { check(npatterns == 0, "Cannot define multiple patterns"); - file_t *arg_file = spoof_file("<pattern argument>", flag); + file_t *arg_file = spoof_file(&loaded_files, "<pattern argument>", flag); vm_op_t *p = bp_pattern(arg_file, arg_file->contents); check(p, "Pattern failed to compile: %s", flag); - defs = with_def(defs, arg_file, "pattern", p); + defs = with_def(defs, arg_file, strlen("pattern"), "pattern", p); ++npatterns; } else if (FLAG("--pattern-string") || FLAG("-P")) { - file_t *arg_file = spoof_file("<pattern argument>", flag); + file_t *arg_file = spoof_file(&loaded_files, "<pattern argument>", flag); vm_op_t *p = bp_stringpattern(arg_file, arg_file->contents); check(p, "Pattern failed to compile: %s", flag); - defs = with_def(defs, arg_file, "pattern", p); + defs = with_def(defs, arg_file, strlen("pattern"), "pattern", p); ++npatterns; } else if (FLAG("--mode") || FLAG("-m")) { rule = flag; @@ -241,10 +248,10 @@ int main(int argc, char *argv[]) } else if (argv[i][0] != '-') { if (npatterns > 0) break; // TODO: spoof file with quotation marks for better debugging - file_t *arg_file = spoof_file("<pattern argument>", argv[i]); + file_t *arg_file = spoof_file(&loaded_files, "<pattern argument>", argv[i]); vm_op_t *p = bp_stringpattern(arg_file, arg_file->contents); check(p, "Pattern failed to compile: %s", argv[i]); - defs = with_def(defs, arg_file, "pattern", p); + defs = with_def(defs, arg_file, strlen("pattern"), "pattern", p); ++npatterns; } else { printf("Unrecognized flag: %s\n\n%s\n", argv[i], usage); @@ -262,8 +269,9 @@ int main(int argc, char *argv[]) print_options |= PRINT_COLOR | PRINT_LINE_NUMBERS; } - vm_op_t *pattern = lookup(defs, rule); - check(pattern != NULL, "No such rule: '%s'", rule); + def_t *pattern_def = lookup(defs, rule); + check(pattern_def != NULL, "No such rule: '%s'", rule); + vm_op_t *pattern = pattern_def->op; int found = 0; if (flags & BP_JSON) printf("["); @@ -288,6 +296,11 @@ int main(int argc, char *argv[]) if (flags & BP_JSON) printf("]\n"); free_defs(&defs, NULL); + while (loaded_files) { + file_t *next = loaded_files->next; + destroy_file(&loaded_files); + loaded_files = next; + } return (found > 0) ? 0 : 1; } @@ -140,7 +140,7 @@ static vm_op_t *chain_together(file_t *f, vm_op_t *first, vm_op_t *second) { if (first == NULL) return second; if (second == NULL) return first; - check(first->type != VM_CHAIN, "A chain should not be the first item in a chain.\n"); + check(first->type != VM_CHAIN, "A chain should not be the first item in a chain."); vm_op_t *chain = new_op(f, first->start, VM_CHAIN); chain->start = first->start; if (first->len >= 0 && second->len >= 0) @@ -160,7 +160,7 @@ vm_op_t *bp_simplepattern(file_t *f, const char *str) vm_op_t *op = _bp_simplepattern(f, str); if (op == NULL) return op; - check(op->end != NULL, "op->end is uninitialized!\n"); + check(op->end != NULL, "op->end is uninitialized!"); // Expand postfix operators (if any) str = after_spaces(op->end); @@ -516,8 +516,6 @@ vm_op_t *bp_stringpattern(file_t *f, const char *str) { vm_op_t *ret = NULL; while (*str) { - vm_op_t *strop = new_op(f, str, VM_STRING); - strop->len = 0; char *start = (char*)str; vm_op_t *interp = NULL; for (; *str; str++) { @@ -553,13 +551,11 @@ vm_op_t *bp_stringpattern(file_t *f, const char *str) // Note: an unescaped string is guaranteed to be no longer than the // escaped string, so this is safe to do inplace. len = unescape_string(literal, literal, len); - strop->len = (ssize_t)len; - strop->args.s = literal; - strop->end = str; - - if (strop->len == 0) { - xfree(&strop); - } else { + if (len > 0) { + vm_op_t *strop = new_op(f, str, VM_STRING); + strop->len = (ssize_t)len; + strop->args.s = literal; + strop->end = str; ret = chain_together(f, ret, strop); } if (interp) { diff --git a/file_loader.c b/file_loader.c index 27f871f..6e4aa3f 100644 --- a/file_loader.c +++ b/file_loader.c @@ -42,7 +42,7 @@ static void populate_lines(file_t *f) // // Read an entire file into memory. // -file_t *load_file(const char *filename) +file_t *load_file(file_t **files, const char *filename) { if (filename == NULL) filename = "-"; int fd = streq(filename, "-") ? STDIN_FILENO : open(filename, O_RDONLY); @@ -79,20 +79,28 @@ file_t *load_file(const char *filename) finished_loading: f->end = &f->contents[length]; populate_lines(f); + if (files != NULL) { + f->next = *files; + *files = f; + } return f; } // // Create a virtual file from a string. // -file_t *spoof_file(const char *filename, char *text) +file_t *spoof_file(file_t **files, const char *filename, char *text) { if (filename == NULL) filename = ""; file_t *f = new(file_t); f->filename = strdup(filename); - f->contents = text; + f->contents = strdup(text); f->end = &f->contents[strlen(text)]; populate_lines(f); + if (files != NULL) { + f->next = *files; + *files = f; + } return f; } @@ -122,24 +130,23 @@ void destroy_file(file_t **f) { if ((*f)->filename) { xfree(&((*f)->filename)); - (*f)->filename = NULL; } if ((*f)->lines) { xfree(&((*f)->lines)); - (*f)->lines = NULL; } if ((*f)->contents) { if ((*f)->mmapped) { munmap((*f)->contents, (size_t)((*f)->end - (*f)->contents)); + (*f)->contents = NULL; } else { xfree(&((*f)->contents)); } - (*f)->contents = NULL; } while ((*f)->ops) { + destroy_op(&(*f)->ops->op); allocated_op_t *tofree = (*f)->ops; (*f)->ops = tofree->next; memset(tofree, 'A', sizeof(allocated_op_t)); // Sentinel diff --git a/file_loader.h b/file_loader.h index 2a47d40..85bb920 100644 --- a/file_loader.h +++ b/file_loader.h @@ -8,7 +8,8 @@ struct allocated_op_s; // declared in types.h -typedef struct { +typedef struct file_s { + struct file_s *next; const char *filename; char *contents, **lines, *end; size_t nlines; @@ -16,9 +17,9 @@ typedef struct { unsigned int mmapped:1; } file_t; -file_t *load_file(const char *filename); -__attribute__((nonnull(2), returns_nonnull)) -file_t *spoof_file(const char *filename, char *text); +file_t *load_file(file_t **files, const char *filename); +__attribute__((nonnull(3), returns_nonnull)) +file_t *spoof_file(file_t **files, const char *filename, char *text); __attribute__((nonnull)) void intern_file(file_t *f); __attribute__((nonnull)) @@ -16,11 +16,12 @@ static def_t *with_backref(def_t *defs, file_t *f, const char *name, match_t *m) // // Return a new list of definitions with one added to the front // -def_t *with_def(def_t *defs, file_t *f, const char *name, vm_op_t *op) +def_t *with_def(def_t *defs, file_t *f, size_t namelen, const char *name, vm_op_t *op) { def_t *def = new(def_t); def->next = defs; def->file = f; + def->namelen = namelen; def->name = name; def->op = op; return def; @@ -38,11 +39,11 @@ def_t *load_grammar(def_t *defs, file_t *f) const char *name = src; src = after_name(name); check(src > name, "Invalid name for definition: %s", name); - name = strndup(name, (size_t)(src-name)); + size_t namelen = (size_t)(src - name); check(matchchar(&src, ':'), "Expected ':' in definition"); vm_op_t *op = bp_pattern(f, src); if (op == NULL) break; - defs = with_def(defs, f, name, op); + defs = with_def(defs, f, namelen, name, op); src = op->end; src = after_spaces(src); if (matchchar(&src, ';')) @@ -58,11 +59,11 @@ def_t *load_grammar(def_t *defs, file_t *f) // // Look up a backreference or grammar definition by name // -vm_op_t *lookup(def_t *defs, const char *name) +def_t *lookup(def_t *defs, const char *name) { for ( ; defs; defs = defs->next) { - if (streq(defs->name, name)) - return defs->op; + if (strlen(name) == defs->namelen && strncmp(defs->name, name, defs->namelen) == 0) + return defs; } return NULL; } @@ -76,7 +77,7 @@ static def_t *with_backref(def_t *defs, file_t *f, const char *name, match_t *m) op->end = m->end; op->len = -1; // TODO: maybe calculate this? (nontrivial because of replacements) op->args.backref = m; - return with_def(defs, f, name, op); + return with_def(defs, f, strlen(name), name, op); } // @@ -7,14 +7,14 @@ #include "file_loader.h" #include "types.h" -__attribute__((nonnull(2,3,4), returns_nonnull)) -def_t *with_def(def_t *defs, file_t *f, const char *name, vm_op_t *op); +__attribute__((nonnull(2,4,5), returns_nonnull)) +def_t *with_def(def_t *defs, file_t *f, size_t namelen, const char *name, vm_op_t *op); __attribute__((nonnull(2,3))) def_t *with_backrefs(def_t *defs, file_t *f, match_t *m); __attribute__((nonnull(2))) def_t *load_grammar(def_t *defs, file_t *f); __attribute__((pure, nonnull(2))) -vm_op_t *lookup(def_t *defs, const char *name); +def_t *lookup(def_t *defs, const char *name); __attribute__((nonnull(1))) void free_defs(def_t **defs, def_t *stop); @@ -39,6 +39,7 @@ enum VMOpcode { VM_REF, VM_BACKREF, VM_NODENT, + VM_CANNED, }; struct match_s; // forward declared to resolve circular struct defs @@ -74,6 +75,12 @@ typedef struct vm_op_s { char *name; } capture; struct match_s *backref; + struct { + struct match_s *match; + unsigned int visits; + const char *at; + struct vm_op_s *fallback; + } canned; struct vm_op_s *pat; } args; } vm_op_t; @@ -86,12 +93,14 @@ typedef struct match_s { const char *start, *end; struct match_s *child, *nextsibling; vm_op_t *op; + unsigned int refcount; } match_t; // // Pattern matching rule definition(s) // typedef struct def_s { + size_t namelen; const char *name; file_t *file; vm_op_t *op; @@ -12,25 +12,25 @@ #include "utils.h" #include "vm.h" -typedef struct recursive_ref_s { - const vm_op_t *op; - const char *pos; - struct recursive_ref_s *prev; - int hit; - match_t *result; -} recursive_ref_t; - __attribute__((nonnull, pure)) static inline const char *next_char(file_t *f, const char *str); __attribute__((nonnull)) static const char *match_backref(const char *str, vm_op_t *op, match_t *cap, unsigned int flags); __attribute__((hot, nonnull(2,3,4))) -static match_t *_match(def_t *defs, file_t *f, const char *str, vm_op_t *op, unsigned int flags, recursive_ref_t *rec); +static match_t *_match(def_t *defs, file_t *f, const char *str, vm_op_t *op, unsigned int flags); __attribute__((nonnull)) static match_t *get_capture_by_num(match_t *m, int *n); __attribute__((nonnull, pure)) static match_t *get_capture_by_name(match_t *m, const char *name); +static inline void set_owner(match_t *m, match_t **owner) +{ + check(owner != &m->child, "Circular ownership"); + check(owner != &m->nextsibling, "Circular ownership"); + *owner = m; + ++m->refcount; +} + // // Return the location of the next character or UTF8 codepoint. // (i.e. skip forward one codepoint at a time, not one byte at a time) @@ -56,9 +56,14 @@ static inline const char *next_char(file_t *f, const char *str) // void destroy_match(match_t **m) { - if (!*m) return; + if (*m == NULL) return; + if (--(*m)->refcount > 0) { + *m = NULL; + return; + } destroy_match(&((*m)->child)); destroy_match(&((*m)->nextsibling)); + free(*m); *m = NULL; } @@ -126,9 +131,18 @@ static const char *match_backref(const char *str, vm_op_t *op, match_t *cap, uns // a match struct, or NULL if no match is found. // The returned value should be free()'d to avoid memory leaking. // -static match_t *_match(def_t *defs, file_t *f, const char *str, vm_op_t *op, unsigned int flags, recursive_ref_t *rec) +static match_t *_match(def_t *defs, file_t *f, const char *str, vm_op_t *op, unsigned int flags) { switch (op->type) { + case VM_CANNED: { + if (str == op->args.canned.at) { + ++op->args.canned.visits; + // TODO: deep copy? + return op->args.canned.match; + } else { + return _match(defs, f, str, op->args.canned.fallback, flags); + } + } case VM_ANYCHAR: { if (str >= f->end || *str == '\n') return NULL; @@ -141,7 +155,7 @@ static match_t *_match(def_t *defs, file_t *f, const char *str, vm_op_t *op, uns case VM_STRING: { if (&str[op->len] > f->end) return NULL; if ((flags & BP_IGNORECASE) ? memicmp(str, op->args.s, (size_t)op->len) != 0 - : memcmp(str, op->args.s, (size_t)op->len) != 0) + : memcmp(str, op->args.s, (size_t)op->len) != 0) return NULL; match_t *m = new(match_t); m->op = op; @@ -160,7 +174,7 @@ static match_t *_match(def_t *defs, file_t *f, const char *str, vm_op_t *op, uns return m; } case VM_NOT: { - match_t *m = _match(defs, f, str, op->args.pat, flags, rec); + match_t *m = _match(defs, f, str, op->args.pat, flags); if (m != NULL) { destroy_match(&m); return NULL; @@ -175,43 +189,45 @@ static match_t *_match(def_t *defs, file_t *f, const char *str, vm_op_t *op, uns match_t *m = new(match_t); m->start = str; m->op = op; - if (!op->args.multiple.first && !op->args.multiple.second) { + + vm_op_t *pat = op->args.multiple.first, *skip = op->args.multiple.second; + if (!pat && !skip) { while (str < f->end && *str != '\n') ++str; - } else { - match_t **dest = &m->child; - for (const char *prev = NULL; prev < str; ) { - prev = str; - if (op->args.multiple.first) { - match_t *p = _match(defs, f, str, op->args.multiple.first, flags, rec); - if (p) { - *dest = p; - m->end = p->end; - return m; - } - } else if (str == f->end) { - m->end = str; + m->end = str; + return m; + } + + match_t **dest = &m->child; + for (const char *prev = NULL; prev < str; ) { + prev = str; + if (pat) { + match_t *p = _match(defs, f, str, pat, flags); + if (p != NULL) { + set_owner(p, dest); + m->end = p->end; return m; } - if (op->args.multiple.second) { - match_t *p = _match(defs, f, str, op->args.multiple.second, flags, rec); - if (p) { - *dest = p; - dest = &p->nextsibling; - str = p->end; - continue; - } + } else if (str == f->end) { + m->end = str; + return m; + } + if (skip) { + match_t *s = _match(defs, f, str, skip, flags); + if (s != NULL) { + set_owner(s, dest); + dest = &s->nextsibling; + str = s->end; + continue; } - // This isn't in the for() structure because there needs to - // be at least once chance to match the pattern, even if - // we're at the end of the string already (e.g. "..$"). - if (str < f->end && *str != '\n') - str = next_char(f, str); } - destroy_match(&m); - return NULL; + // This isn't in the for() structure because there needs to + // be at least once chance to match the pattern, even if + // we're at the end of the string already (e.g. "..$"). + if (str < f->end && *str != '\n') + str = next_char(f, str); } - m->end = str; - return m; + destroy_match(&m); + return NULL; } case VM_REPEAT: { match_t *m = new(match_t); @@ -227,11 +243,11 @@ static match_t *_match(def_t *defs, file_t *f, const char *str, vm_op_t *op, uns // Separator match_t *sep = NULL; if (op->args.repetitions.sep != NULL && reps > 0) { - sep = _match(defs, f, str, op->args.repetitions.sep, flags, rec); + sep = _match(defs, f, str, op->args.repetitions.sep, flags); if (sep == NULL) break; str = sep->end; } - match_t *p = _match(defs, f, str, op->args.repetitions.repeat_pat, flags, rec); + match_t *p = _match(defs, f, str, op->args.repetitions.repeat_pat, flags); if (p == NULL) { str = start; destroy_match(&sep); @@ -253,10 +269,10 @@ static match_t *_match(def_t *defs, file_t *f, const char *str, vm_op_t *op, uns break; } if (sep) { - *dest = sep; + set_owner(sep, dest); dest = &sep->nextsibling; } - *dest = p; + set_owner(p, dest); dest = &p->nextsibling; str = p->end; } @@ -272,58 +288,58 @@ static match_t *_match(def_t *defs, file_t *f, const char *str, vm_op_t *op, uns ssize_t backtrack = op->args.pat->len; check(backtrack != -1, "'<' is only allowed for fixed-length operations"); if (str - backtrack < f->contents) return NULL; - match_t *before = _match(defs, f, str - backtrack, op->args.pat, flags, rec); + match_t *before = _match(defs, f, str - backtrack, op->args.pat, flags); if (before == NULL) return NULL; match_t *m = new(match_t); m->start = str; m->end = str; m->op = op; - m->child = before; + set_owner(before, &m->child); return m; } case VM_BEFORE: { - match_t *after = _match(defs, f, str, op->args.pat, flags, rec); + match_t *after = _match(defs, f, str, op->args.pat, flags); if (after == NULL) return NULL; match_t *m = new(match_t); m->start = str; m->end = str; m->op = op; - m->child = after; + set_owner(after, &m->child); return m; } case VM_CAPTURE: { - match_t *p = _match(defs, f, str, op->args.pat, flags, rec); + match_t *p = _match(defs, f, str, op->args.pat, flags); if (p == NULL) return NULL; match_t *m = new(match_t); m->start = str; m->end = p->end; m->op = op; - m->child = p; + set_owner(p, &m->child); return m; } case VM_HIDE: { - match_t *p = _match(defs, f, str, op->args.pat, flags, rec); + match_t *p = _match(defs, f, str, op->args.pat, flags); if (p == NULL) return NULL; match_t *m = new(match_t); m->start = str; m->end = p->end; m->op = op; - m->child = p; + set_owner(p, &m->child); return m; } case VM_OTHERWISE: { - match_t *m = _match(defs, f, str, op->args.multiple.first, flags, rec); - if (m == NULL) m = _match(defs, f, str, op->args.multiple.second, flags, rec); + match_t *m = _match(defs, f, str, op->args.multiple.first, flags); + if (m == NULL) m = _match(defs, f, str, op->args.multiple.second, flags); return m; } case VM_CHAIN: { - match_t *m1 = _match(defs, f, str, op->args.multiple.first, flags, rec); + match_t *m1 = _match(defs, f, str, op->args.multiple.first, flags); if (m1 == NULL) return NULL; match_t *m2; { // Push backrefs and run matching, then cleanup def_t *defs2 = with_backrefs(defs, f, m1); - m2 = _match(defs2, f, m1->end, op->args.multiple.second, flags, rec); + m2 = _match(defs2, f, m1->end, op->args.multiple.second, flags); free_defs(&defs2, defs); } @@ -335,12 +351,12 @@ static match_t *_match(def_t *defs, file_t *f, const char *str, vm_op_t *op, uns m->start = str; m->end = m2->end; m->op = op; - m->child = m1; - m1->nextsibling = m2; + set_owner(m1, &m->child); + set_owner(m2, &m1->nextsibling); return m; } case VM_EQUAL: case VM_NOT_EQUAL: { - match_t *m1 = _match(defs, f, str, op->args.multiple.first, flags, rec); + match_t *m1 = _match(defs, f, str, op->args.multiple.first, flags); if (m1 == NULL) return NULL; // <p1>==<p2> matches iff the text of <p1> matches <p2> @@ -352,19 +368,19 @@ static match_t *_match(def_t *defs, file_t *f, const char *str, vm_op_t *op, uns .nlines=1 + get_line_number(f, m1->end)-get_line_number(f, m1->start), .mmapped=f->mmapped, }; - match_t *m2 = _match(defs, &inner, str, op->args.multiple.second, flags, rec); + match_t *m2 = _match(defs, &inner, str, op->args.multiple.second, flags); if ((m2 == NULL) == (op->type == VM_EQUAL)) { destroy_match(&m1); - destroy_match(&m2); + if (m2 != NULL) destroy_match(&m2); return NULL; } match_t *m = new(match_t); m->start = m1->start; m->end = m1->end; m->op = op; - m->child = m1; + set_owner(m1, &m->child); if (op->type == VM_EQUAL) { - m1->nextsibling = m2; + set_owner(m2, &m1->nextsibling); } else { destroy_match(&m2); } @@ -373,14 +389,14 @@ static match_t *_match(def_t *defs, file_t *f, const char *str, vm_op_t *op, uns case VM_REPLACE: { match_t *p = NULL; if (op->args.replace.pat) { - p = _match(defs, f, str, op->args.replace.pat, flags, rec); + p = _match(defs, f, str, op->args.replace.pat, flags); if (p == NULL) return NULL; } match_t *m = new(match_t); m->start = str; m->op = op; if (p) { - m->child = p; + set_owner(p, &m->child); m->end = p->end; } else { m->end = m->start; @@ -388,41 +404,43 @@ static match_t *_match(def_t *defs, file_t *f, const char *str, vm_op_t *op, uns return m; } case VM_REF: { - vm_op_t *r = lookup(defs, op->args.s); - check(r != NULL, "Unknown identifier: '%s'", op->args.s); - - // Prevent infinite left recursion: - for (recursive_ref_t *p = rec; p; p = p->prev) { - if (p->pos == str && p->op == r) { - ++p->hit; - return p->result; - } - } + def_t *def = lookup(defs, op->args.s); + check(def != NULL, "Unknown identifier: '%s'", op->args.s); + vm_op_t *ref = def->op; - recursive_ref_t wrap = { - .op = r, - .pos = str, - .prev = rec, - .hit = 0, - .result = NULL, + vm_op_t fail_op = { + .type = VM_CANNED, + .start = ref->start, + .end = ref->end, + .len = 0, + .args.canned = { + .match = NULL, + .visits = 0, + .at = str, + .fallback = ref, + }, + }; + def_t defs2 = { + .namelen = def->namelen, + .name = def->name, + .file = def->file, + .op = &fail_op, + .next = defs, }; - match_t *best = NULL; - left_recursive:; - match_t *p = _match(defs, f, str, r, flags, &wrap); - if (p == NULL) return best; - if (wrap.hit && (best == NULL || p->end > best->end)) { - best = p; - wrap.hit = 0; - wrap.result = p; - goto left_recursive; - } else if (best == NULL) { - best = p; + + const char *prev = str; + match_t *m = _match(&defs2, f, str, ref, flags); + if (m == NULL) return NULL; + + while (fail_op.args.canned.visits > 0 && m->end > prev) { + fail_op.args.canned.visits = 0; + fail_op.args.canned.match = m; + prev = m->end; + match_t *m2 = _match(&defs2, f, str, ref, flags); + if (m2 == NULL) break; + m = m2; } - match_t *m = new(match_t); - m->start = best->start; - m->end = best->end; - m->op = op; - m->child = best; + return m; } case VM_BACKREF: { @@ -526,7 +544,26 @@ match_t *get_capture(match_t *m, const char **id) // match_t *match(def_t *defs, file_t *f, const char *str, vm_op_t *op, unsigned int flags) { - return _match(defs, f, str, op, flags, NULL); + return _match(defs, f, str, op, flags); +} + +// Deallocate memory associated with an op +void destroy_op(vm_op_t *op) +{ + switch (op->type) { + case VM_STRING: case VM_REF: + xfree(&op->args.s); + break; + case VM_CAPTURE: + if (op->args.capture.name) + xfree(&op->args.capture.name); + break; + case VM_REPLACE: + if (op->args.replace.text) + xfree(&op->args.replace.text); + break; + default: break; + } } // vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1 @@ -14,6 +14,8 @@ __attribute__((nonnull)) void destroy_match(match_t **m); __attribute__((nonnull)) match_t *get_capture(match_t *m, const char **id); +__attribute__((nonnull)) +void destroy_op(vm_op_t *op); #endif // vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1 |
