diff options
| author | Bruce Hill <bruce@bruce-hill.com> | 2021-01-15 12:12:56 -0800 |
|---|---|---|
| committer | Bruce Hill <bruce@bruce-hill.com> | 2021-01-15 12:12:56 -0800 |
| commit | cbe1d97b37963a7ab75900463570cb6a109be8b4 (patch) | |
| tree | 70f89a2ce638aa96cf617b3badef13bad2179871 | |
| parent | b29060f38b30e6e379769582a0cf7cc3e9ae5056 (diff) | |
Fixed bug with backrefs. The backref pushing was overly greedy and
has been updated to only push backrefs when a capture is directly
in the chain and not recursively contained within it.
| -rw-r--r-- | grammar.c | 19 | ||||
| -rw-r--r-- | grammar.h | 4 | ||||
| -rw-r--r-- | vm.c | 28 |
3 files changed, 23 insertions, 28 deletions
@@ -10,9 +10,6 @@ #include "grammar.h" #include "utils.h" -__attribute__((nonnull(2,3,4), returns_nonnull)) -static def_t *with_backref(def_t *defs, file_t *f, const char *name, match_t *m); - // // Return a new list of definitions with one added to the front // @@ -71,7 +68,7 @@ def_t *lookup(def_t *defs, const char *name) // // Push a backreference onto the backreference stack // -static def_t *with_backref(def_t *defs, file_t *f, const char *name, match_t *m) +def_t *with_backref(def_t *defs, file_t *f, const char *name, match_t *m) { vm_op_t *op = new_op(f, m->start, VM_BACKREF); op->end = m->end; @@ -81,20 +78,6 @@ static def_t *with_backref(def_t *defs, file_t *f, const char *name, match_t *m) } // -// Push all the backreferences contained in a match onto the backreference stack -// -def_t *with_backrefs(def_t *defs, file_t *f, match_t *m) -{ - if (m->op->type != VM_REF) { - if (m->op->type == VM_CAPTURE && m->op->args.capture.name) - defs = with_backref(defs, f, m->op->args.capture.name, m->child); - if (m->child) defs = with_backrefs(defs, f, m->child); - if (m->nextsibling) defs = with_backrefs(defs, f, m->nextsibling); - } - return defs; -} - -// // Free all the given definitions up till (but not including) `stop` // void free_defs(def_t **defs, def_t *stop) @@ -9,8 +9,8 @@ __attribute__((nonnull(2,4,5), returns_nonnull)) def_t *with_def(def_t *defs, file_t *f, size_t namelen, const char *name, vm_op_t *op); -__attribute__((nonnull(2,3))) -def_t *with_backrefs(def_t *defs, file_t *f, match_t *m); +__attribute__((nonnull(2,3,4), returns_nonnull)) +def_t *with_backref(def_t *defs, file_t *f, const char *name, match_t *m); __attribute__((nonnull(2))) def_t *load_grammar(def_t *defs, file_t *f); __attribute__((pure, nonnull(2))) @@ -36,7 +36,7 @@ static match_t *in_use_matches = NULL; __attribute__((nonnull, pure)) static inline const char *next_char(file_t *f, const char *str); __attribute__((nonnull)) -static const char *match_backref(const char *str, vm_op_t *op, match_t *cap, unsigned int ignorecase); +static const char *match_backref(const char *str, match_t *cap, unsigned int ignorecase); __attribute__((nonnull)) static match_t *get_capture_by_num(match_t *m, int *n); __attribute__((nonnull, pure)) @@ -66,9 +66,8 @@ static inline const char *next_char(file_t *f, const char *str) // Attempt to match text against a previously captured value. // Return the character position after the backref has matched, or NULL if no match has occurred. // -static const char *match_backref(const char *str, vm_op_t *op, match_t *cap, unsigned int ignorecase) +static const char *match_backref(const char *str, match_t *cap, unsigned int ignorecase) { - check(op->type == VM_BACKREF, "Attempt to match backref against something that's not a backref"); if (cap->op->type == VM_REPLACE) { const char *text = cap->op->args.replace.text; const char *end = &text[cap->op->args.replace.len]; @@ -87,7 +86,7 @@ static const char *match_backref(const char *str, vm_op_t *op, match_t *cap, uns ++r; match_t *value = get_capture(cap, &r); if (value != NULL) { - str = match_backref(str, op, value, ignorecase); + str = match_backref(str, value, ignorecase); if (str == NULL) return NULL; } } @@ -104,7 +103,7 @@ static const char *match_backref(const char *str, vm_op_t *op, match_t *cap, uns prev = child->start; } if (child->start < prev) continue; - str = match_backref(str, op, child, ignorecase); + str = match_backref(str, child, ignorecase); if (str == NULL) return NULL; prev = child->end; } @@ -346,7 +345,10 @@ match_t *match(def_t *defs, file_t *f, const char *str, vm_op_t *op, unsigned in match_t *m2; { // Push backrefs and run matching, then cleanup - def_t *defs2 = with_backrefs(defs, f, m1); + def_t *defs2 = defs; + if (m1->op->type == VM_CAPTURE && m1->op->args.capture.name) + defs2 = with_backref(defs2, f, m1->op->args.capture.name, m1); + // def_t *defs2 = with_backrefs(defs, f, m1); m2 = match(defs2, f, m1->end, op->args.multiple.second, ignorecase); free_defs(&defs2, defs); } @@ -462,10 +464,20 @@ match_t *match(def_t *defs, file_t *f, const char *str, vm_op_t *op, unsigned in --m->refcount; } - return m; + check(m, "Match should be non-null at this point"); + // This match wrapper mainly exists for record-keeping purposes and + // does not affect correctness. It also helps with visualization of + // match results. + // OPTIMIZE: remove this if necessary + match_t *m2 = new_match(); + m2->op = op; + m2->start = m->start; + m2->end = m->end; + ADD_OWNER(m2->child, m); + return m2; } case VM_BACKREF: { - const char *end = match_backref(str, op, op->args.backref, ignorecase); + const char *end = match_backref(str, op->args.backref, ignorecase); if (end == NULL) return NULL; match_t *m = new_match(); m->op = op; |
