diff options
| author | Bruce Hill <bruce@bruce-hill.com> | 2021-01-12 19:23:38 -0800 |
|---|---|---|
| committer | Bruce Hill <bruce@bruce-hill.com> | 2021-01-12 19:23:38 -0800 |
| commit | 659ed934d297d0d21d141bc4e9ecf8519a390eb1 (patch) | |
| tree | 6e513902ff4c201e11b8e2578afd7aa927a1d5b4 | |
| parent | dbe3f0b7269fa28ff459586ac25d765395cace16 (diff) | |
Simplified backref matching code and improved visualization of backrefs.
| -rw-r--r-- | printing.c | 19 | ||||
| -rw-r--r-- | vm.c | 49 |
2 files changed, 35 insertions, 33 deletions
@@ -40,19 +40,36 @@ static void _visualize_matches(match_node_t *firstmatch, int depth, const char * const char *color = (depth % 2 == 0) ? "34" : "33"; match_t *viz = firstmatch->m; + // This is a heuristic: print matches first if they have more submatches. + // In general, this helps reduce the height of the final output by allowing + // for more rows that show the same rule matching in multiple places. + // TODO: there may be a better heuristic that optimizes for this factor + // while also printing earlier matches first when it doesn't affect overall + // output height. for (match_node_t *p = firstmatch; p; p = p->next) if (match_height(p->m) > match_height(viz)) viz = p->m; const char *viz_type = viz->op->start; size_t viz_typelen = (size_t)(viz->op->end - viz->op->start); - printf("\033[%ldG\033[%s;1m", 2*textlen+3, color); + // Backrefs use added dim quote marks to indicate that the pattern is a + // literal string being matched. (Backrefs have start/end inside the text + // input, instead of something the user typed in) + if (viz_type >= text && viz_type <= &text[textlen]) + printf("\033[%ldG\033[0;2m\"\033[%s;1m", 2*textlen+3, color); + else + printf("\033[%ldG\033[%s;1m", 2*textlen+3, color); + for (size_t i = 0; i < viz_typelen; i++) { switch (viz_type[i]) { case '\n': printf("↵"); break; default: printf("%c", viz_type[i]); break; } } + + if (viz_type >= text && viz_type <= &text[textlen]) + printf("\033[0;2m\""); + printf("\033[0m"); match_node_t *children = NULL; @@ -51,30 +51,22 @@ typedef struct recursive_ref_s { /* * Attempt to match text against a previously captured value. + * Return the character position after the backref has matched, or NULL if no match has occurred. */ -static match_t *match_backref(const char *str, vm_op_t *op, match_t *cap, unsigned int flags) +static const char *match_backref(const char *str, vm_op_t *op, match_t *cap, unsigned int flags) { check(op->op == VM_BACKREF, "Attempt to match backref against something that's not a backref"); - match_t *ret = new(match_t); - ret->start = str; - ret->op = op; - match_t **dest = &ret->child; - if (cap->op->op == VM_REPLACE) { const char *text = cap->op->args.replace.text; const char *end = &text[cap->op->args.replace.len]; for (const char *r = text; r < end; ) { if (*r == '\\') { ++r; - if (*(str++) != unescapechar(r, &r)) { - destroy_match(&ret); + if (*(str++) != unescapechar(r, &r)) return NULL; - } } else if (*r != '@') { - if (*(str++) != *r) { - destroy_match(&ret); + if (*(str++) != *r) return NULL; - } ++r; continue; } @@ -82,13 +74,8 @@ static match_t *match_backref(const char *str, vm_op_t *op, match_t *cap, unsign ++r; match_t *value = get_capture(cap, &r); if (value != NULL) { - *dest = match_backref(str, op, value, flags); - if (*dest == NULL) { - destroy_match(&ret); - return NULL; - } - str = (*dest)->end; - dest = &(*dest)->nextsibling; + str = match_backref(str, op, value, flags); + if (str == NULL) return NULL; } } } else { @@ -97,35 +84,27 @@ static match_t *match_backref(const char *str, vm_op_t *op, match_t *cap, unsign if (child->start > prev) { size_t len = (size_t)(child->start - prev); if ((flags & BP_IGNORECASE) ? memicmp(str, prev, len) != 0 - : memcmp(str, prev, len) != 0) { - destroy_match(&ret); + : memcmp(str, prev, len) != 0) { return NULL; } str += len; prev = child->start; } if (child->start < prev) continue; - *dest = match_backref(str, op, child, flags); - if (*dest == NULL) { - destroy_match(&ret); - return NULL; - } - str = (*dest)->end; - dest = &(*dest)->nextsibling; + str = match_backref(str, op, child, flags); + if (str == NULL) return NULL; prev = child->end; } if (cap->end > prev) { size_t len = (size_t)(cap->end - prev); if ((flags & BP_IGNORECASE) ? memicmp(str, prev, len) != 0 : memcmp(str, prev, len) != 0) { - destroy_match(&ret); return NULL; } str += len; } } - ret->end = str; - return ret; + return str; } @@ -446,7 +425,13 @@ static match_t *_match(def_t *defs, file_t *f, const char *str, vm_op_t *op, uns return m; } case VM_BACKREF: { - return match_backref(str, op, op->args.backref, flags); + const char *end = match_backref(str, op, op->args.backref, flags); + if (end == NULL) return NULL; + match_t *m = new(match_t); + m->op = op; + m->start = str; + m->end = end; + return m; } case VM_NODENT: { if (*str != '\n') return NULL; |
