From 659ed934d297d0d21d141bc4e9ecf8519a390eb1 Mon Sep 17 00:00:00 2001 From: Bruce Hill Date: Tue, 12 Jan 2021 19:23:38 -0800 Subject: Simplified backref matching code and improved visualization of backrefs. --- printing.c | 19 ++++++++++++++++++- vm.c | 49 +++++++++++++++++-------------------------------- 2 files changed, 35 insertions(+), 33 deletions(-) diff --git a/printing.c b/printing.c index e8cd178..02693b5 100644 --- a/printing.c +++ b/printing.c @@ -40,19 +40,36 @@ static void _visualize_matches(match_node_t *firstmatch, int depth, const char * const char *color = (depth % 2 == 0) ? "34" : "33"; match_t *viz = firstmatch->m; + // This is a heuristic: print matches first if they have more submatches. + // In general, this helps reduce the height of the final output by allowing + // for more rows that show the same rule matching in multiple places. + // TODO: there may be a better heuristic that optimizes for this factor + // while also printing earlier matches first when it doesn't affect overall + // output height. for (match_node_t *p = firstmatch; p; p = p->next) if (match_height(p->m) > match_height(viz)) viz = p->m; const char *viz_type = viz->op->start; size_t viz_typelen = (size_t)(viz->op->end - viz->op->start); - printf("\033[%ldG\033[%s;1m", 2*textlen+3, color); + // Backrefs use added dim quote marks to indicate that the pattern is a + // literal string being matched. (Backrefs have start/end inside the text + // input, instead of something the user typed in) + if (viz_type >= text && viz_type <= &text[textlen]) + printf("\033[%ldG\033[0;2m\"\033[%s;1m", 2*textlen+3, color); + else + printf("\033[%ldG\033[%s;1m", 2*textlen+3, color); + for (size_t i = 0; i < viz_typelen; i++) { switch (viz_type[i]) { case '\n': printf("↵"); break; default: printf("%c", viz_type[i]); break; } } + + if (viz_type >= text && viz_type <= &text[textlen]) + printf("\033[0;2m\""); + printf("\033[0m"); match_node_t *children = NULL; diff --git a/vm.c b/vm.c index 94e18fa..9757933 100644 --- a/vm.c +++ b/vm.c @@ -51,30 +51,22 @@ typedef struct recursive_ref_s { /* * Attempt to match text against a previously captured value. + * Return the character position after the backref has matched, or NULL if no match has occurred. */ -static match_t *match_backref(const char *str, vm_op_t *op, match_t *cap, unsigned int flags) +static const char *match_backref(const char *str, vm_op_t *op, match_t *cap, unsigned int flags) { check(op->op == VM_BACKREF, "Attempt to match backref against something that's not a backref"); - match_t *ret = new(match_t); - ret->start = str; - ret->op = op; - match_t **dest = &ret->child; - if (cap->op->op == VM_REPLACE) { const char *text = cap->op->args.replace.text; const char *end = &text[cap->op->args.replace.len]; for (const char *r = text; r < end; ) { if (*r == '\\') { ++r; - if (*(str++) != unescapechar(r, &r)) { - destroy_match(&ret); + if (*(str++) != unescapechar(r, &r)) return NULL; - } } else if (*r != '@') { - if (*(str++) != *r) { - destroy_match(&ret); + if (*(str++) != *r) return NULL; - } ++r; continue; } @@ -82,13 +74,8 @@ static match_t *match_backref(const char *str, vm_op_t *op, match_t *cap, unsign ++r; match_t *value = get_capture(cap, &r); if (value != NULL) { - *dest = match_backref(str, op, value, flags); - if (*dest == NULL) { - destroy_match(&ret); - return NULL; - } - str = (*dest)->end; - dest = &(*dest)->nextsibling; + str = match_backref(str, op, value, flags); + if (str == NULL) return NULL; } } } else { @@ -97,35 +84,27 @@ static match_t *match_backref(const char *str, vm_op_t *op, match_t *cap, unsign if (child->start > prev) { size_t len = (size_t)(child->start - prev); if ((flags & BP_IGNORECASE) ? memicmp(str, prev, len) != 0 - : memcmp(str, prev, len) != 0) { - destroy_match(&ret); + : memcmp(str, prev, len) != 0) { return NULL; } str += len; prev = child->start; } if (child->start < prev) continue; - *dest = match_backref(str, op, child, flags); - if (*dest == NULL) { - destroy_match(&ret); - return NULL; - } - str = (*dest)->end; - dest = &(*dest)->nextsibling; + str = match_backref(str, op, child, flags); + if (str == NULL) return NULL; prev = child->end; } if (cap->end > prev) { size_t len = (size_t)(cap->end - prev); if ((flags & BP_IGNORECASE) ? memicmp(str, prev, len) != 0 : memcmp(str, prev, len) != 0) { - destroy_match(&ret); return NULL; } str += len; } } - ret->end = str; - return ret; + return str; } @@ -446,7 +425,13 @@ static match_t *_match(def_t *defs, file_t *f, const char *str, vm_op_t *op, uns return m; } case VM_BACKREF: { - return match_backref(str, op, op->args.backref, flags); + const char *end = match_backref(str, op, op->args.backref, flags); + if (end == NULL) return NULL; + match_t *m = new(match_t); + m->op = op; + m->start = str; + m->end = end; + return m; } case VM_NODENT: { if (*str != '\n') return NULL; -- cgit v1.2.3