From cbe1d97b37963a7ab75900463570cb6a109be8b4 Mon Sep 17 00:00:00 2001 From: Bruce Hill Date: Fri, 15 Jan 2021 12:12:56 -0800 Subject: Fixed bug with backrefs. The backref pushing was overly greedy and has been updated to only push backrefs when a capture is directly in the chain and not recursively contained within it. --- grammar.c | 19 +------------------ grammar.h | 4 ++-- vm.c | 28 ++++++++++++++++++++-------- 3 files changed, 23 insertions(+), 28 deletions(-) diff --git a/grammar.c b/grammar.c index 3b3d349..1c9c3b6 100644 --- a/grammar.c +++ b/grammar.c @@ -10,9 +10,6 @@ #include "grammar.h" #include "utils.h" -__attribute__((nonnull(2,3,4), returns_nonnull)) -static def_t *with_backref(def_t *defs, file_t *f, const char *name, match_t *m); - // // Return a new list of definitions with one added to the front // @@ -71,7 +68,7 @@ def_t *lookup(def_t *defs, const char *name) // // Push a backreference onto the backreference stack // -static def_t *with_backref(def_t *defs, file_t *f, const char *name, match_t *m) +def_t *with_backref(def_t *defs, file_t *f, const char *name, match_t *m) { vm_op_t *op = new_op(f, m->start, VM_BACKREF); op->end = m->end; @@ -80,20 +77,6 @@ static def_t *with_backref(def_t *defs, file_t *f, const char *name, match_t *m) return with_def(defs, f, strlen(name), name, op); } -// -// Push all the backreferences contained in a match onto the backreference stack -// -def_t *with_backrefs(def_t *defs, file_t *f, match_t *m) -{ - if (m->op->type != VM_REF) { - if (m->op->type == VM_CAPTURE && m->op->args.capture.name) - defs = with_backref(defs, f, m->op->args.capture.name, m->child); - if (m->child) defs = with_backrefs(defs, f, m->child); - if (m->nextsibling) defs = with_backrefs(defs, f, m->nextsibling); - } - return defs; -} - // // Free all the given definitions up till (but not including) `stop` // diff --git a/grammar.h b/grammar.h index 8d69e76..0055a54 100644 --- a/grammar.h +++ b/grammar.h @@ -9,8 +9,8 @@ __attribute__((nonnull(2,4,5), returns_nonnull)) def_t *with_def(def_t *defs, file_t *f, size_t namelen, const char *name, vm_op_t *op); -__attribute__((nonnull(2,3))) -def_t *with_backrefs(def_t *defs, file_t *f, match_t *m); +__attribute__((nonnull(2,3,4), returns_nonnull)) +def_t *with_backref(def_t *defs, file_t *f, const char *name, match_t *m); __attribute__((nonnull(2))) def_t *load_grammar(def_t *defs, file_t *f); __attribute__((pure, nonnull(2))) diff --git a/vm.c b/vm.c index c9221a3..290cbbd 100644 --- a/vm.c +++ b/vm.c @@ -36,7 +36,7 @@ static match_t *in_use_matches = NULL; __attribute__((nonnull, pure)) static inline const char *next_char(file_t *f, const char *str); __attribute__((nonnull)) -static const char *match_backref(const char *str, vm_op_t *op, match_t *cap, unsigned int ignorecase); +static const char *match_backref(const char *str, match_t *cap, unsigned int ignorecase); __attribute__((nonnull)) static match_t *get_capture_by_num(match_t *m, int *n); __attribute__((nonnull, pure)) @@ -66,9 +66,8 @@ static inline const char *next_char(file_t *f, const char *str) // Attempt to match text against a previously captured value. // Return the character position after the backref has matched, or NULL if no match has occurred. // -static const char *match_backref(const char *str, vm_op_t *op, match_t *cap, unsigned int ignorecase) +static const char *match_backref(const char *str, match_t *cap, unsigned int ignorecase) { - check(op->type == VM_BACKREF, "Attempt to match backref against something that's not a backref"); if (cap->op->type == VM_REPLACE) { const char *text = cap->op->args.replace.text; const char *end = &text[cap->op->args.replace.len]; @@ -87,7 +86,7 @@ static const char *match_backref(const char *str, vm_op_t *op, match_t *cap, uns ++r; match_t *value = get_capture(cap, &r); if (value != NULL) { - str = match_backref(str, op, value, ignorecase); + str = match_backref(str, value, ignorecase); if (str == NULL) return NULL; } } @@ -104,7 +103,7 @@ static const char *match_backref(const char *str, vm_op_t *op, match_t *cap, uns prev = child->start; } if (child->start < prev) continue; - str = match_backref(str, op, child, ignorecase); + str = match_backref(str, child, ignorecase); if (str == NULL) return NULL; prev = child->end; } @@ -346,7 +345,10 @@ match_t *match(def_t *defs, file_t *f, const char *str, vm_op_t *op, unsigned in match_t *m2; { // Push backrefs and run matching, then cleanup - def_t *defs2 = with_backrefs(defs, f, m1); + def_t *defs2 = defs; + if (m1->op->type == VM_CAPTURE && m1->op->args.capture.name) + defs2 = with_backref(defs2, f, m1->op->args.capture.name, m1); + // def_t *defs2 = with_backrefs(defs, f, m1); m2 = match(defs2, f, m1->end, op->args.multiple.second, ignorecase); free_defs(&defs2, defs); } @@ -462,10 +464,20 @@ match_t *match(def_t *defs, file_t *f, const char *str, vm_op_t *op, unsigned in --m->refcount; } - return m; + check(m, "Match should be non-null at this point"); + // This match wrapper mainly exists for record-keeping purposes and + // does not affect correctness. It also helps with visualization of + // match results. + // OPTIMIZE: remove this if necessary + match_t *m2 = new_match(); + m2->op = op; + m2->start = m->start; + m2->end = m->end; + ADD_OWNER(m2->child, m); + return m2; } case VM_BACKREF: { - const char *end = match_backref(str, op, op->args.backref, ignorecase); + const char *end = match_backref(str, op->args.backref, ignorecase); if (end == NULL) return NULL; match_t *m = new_match(); m->op = op; -- cgit v1.2.3