aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--printing.c19
-rw-r--r--vm.c49
2 files changed, 35 insertions, 33 deletions
diff --git a/printing.c b/printing.c
index e8cd178..02693b5 100644
--- a/printing.c
+++ b/printing.c
@@ -40,19 +40,36 @@ static void _visualize_matches(match_node_t *firstmatch, int depth, const char *
const char *color = (depth % 2 == 0) ? "34" : "33";
match_t *viz = firstmatch->m;
+ // This is a heuristic: print matches first if they have more submatches.
+ // In general, this helps reduce the height of the final output by allowing
+ // for more rows that show the same rule matching in multiple places.
+ // TODO: there may be a better heuristic that optimizes for this factor
+ // while also printing earlier matches first when it doesn't affect overall
+ // output height.
for (match_node_t *p = firstmatch; p; p = p->next)
if (match_height(p->m) > match_height(viz))
viz = p->m;
const char *viz_type = viz->op->start;
size_t viz_typelen = (size_t)(viz->op->end - viz->op->start);
- printf("\033[%ldG\033[%s;1m", 2*textlen+3, color);
+ // Backrefs use added dim quote marks to indicate that the pattern is a
+ // literal string being matched. (Backrefs have start/end inside the text
+ // input, instead of something the user typed in)
+ if (viz_type >= text && viz_type <= &text[textlen])
+ printf("\033[%ldG\033[0;2m\"\033[%s;1m", 2*textlen+3, color);
+ else
+ printf("\033[%ldG\033[%s;1m", 2*textlen+3, color);
+
for (size_t i = 0; i < viz_typelen; i++) {
switch (viz_type[i]) {
case '\n': printf("↵"); break;
default: printf("%c", viz_type[i]); break;
}
}
+
+ if (viz_type >= text && viz_type <= &text[textlen])
+ printf("\033[0;2m\"");
+
printf("\033[0m");
match_node_t *children = NULL;
diff --git a/vm.c b/vm.c
index 94e18fa..9757933 100644
--- a/vm.c
+++ b/vm.c
@@ -51,30 +51,22 @@ typedef struct recursive_ref_s {
/*
* Attempt to match text against a previously captured value.
+ * Return the character position after the backref has matched, or NULL if no match has occurred.
*/
-static match_t *match_backref(const char *str, vm_op_t *op, match_t *cap, unsigned int flags)
+static const char *match_backref(const char *str, vm_op_t *op, match_t *cap, unsigned int flags)
{
check(op->op == VM_BACKREF, "Attempt to match backref against something that's not a backref");
- match_t *ret = new(match_t);
- ret->start = str;
- ret->op = op;
- match_t **dest = &ret->child;
-
if (cap->op->op == VM_REPLACE) {
const char *text = cap->op->args.replace.text;
const char *end = &text[cap->op->args.replace.len];
for (const char *r = text; r < end; ) {
if (*r == '\\') {
++r;
- if (*(str++) != unescapechar(r, &r)) {
- destroy_match(&ret);
+ if (*(str++) != unescapechar(r, &r))
return NULL;
- }
} else if (*r != '@') {
- if (*(str++) != *r) {
- destroy_match(&ret);
+ if (*(str++) != *r)
return NULL;
- }
++r;
continue;
}
@@ -82,13 +74,8 @@ static match_t *match_backref(const char *str, vm_op_t *op, match_t *cap, unsign
++r;
match_t *value = get_capture(cap, &r);
if (value != NULL) {
- *dest = match_backref(str, op, value, flags);
- if (*dest == NULL) {
- destroy_match(&ret);
- return NULL;
- }
- str = (*dest)->end;
- dest = &(*dest)->nextsibling;
+ str = match_backref(str, op, value, flags);
+ if (str == NULL) return NULL;
}
}
} else {
@@ -97,35 +84,27 @@ static match_t *match_backref(const char *str, vm_op_t *op, match_t *cap, unsign
if (child->start > prev) {
size_t len = (size_t)(child->start - prev);
if ((flags & BP_IGNORECASE) ? memicmp(str, prev, len) != 0
- : memcmp(str, prev, len) != 0) {
- destroy_match(&ret);
+ : memcmp(str, prev, len) != 0) {
return NULL;
}
str += len;
prev = child->start;
}
if (child->start < prev) continue;
- *dest = match_backref(str, op, child, flags);
- if (*dest == NULL) {
- destroy_match(&ret);
- return NULL;
- }
- str = (*dest)->end;
- dest = &(*dest)->nextsibling;
+ str = match_backref(str, op, child, flags);
+ if (str == NULL) return NULL;
prev = child->end;
}
if (cap->end > prev) {
size_t len = (size_t)(cap->end - prev);
if ((flags & BP_IGNORECASE) ? memicmp(str, prev, len) != 0
: memcmp(str, prev, len) != 0) {
- destroy_match(&ret);
return NULL;
}
str += len;
}
}
- ret->end = str;
- return ret;
+ return str;
}
@@ -446,7 +425,13 @@ static match_t *_match(def_t *defs, file_t *f, const char *str, vm_op_t *op, uns
return m;
}
case VM_BACKREF: {
- return match_backref(str, op, op->args.backref, flags);
+ const char *end = match_backref(str, op, op->args.backref, flags);
+ if (end == NULL) return NULL;
+ match_t *m = new(match_t);
+ m->op = op;
+ m->start = str;
+ m->end = end;
+ return m;
}
case VM_NODENT: {
if (*str != '\n') return NULL;