aboutsummaryrefslogtreecommitdiff
path: root/match.c
diff options
context:
space:
mode:
Diffstat (limited to 'match.c')
-rw-r--r--match.c702
1 files changed, 351 insertions, 351 deletions
diff --git a/match.c b/match.c
index ed26a62..c1b5f49 100644
--- a/match.c
+++ b/match.c
@@ -106,25 +106,25 @@ static pat_t *first_pat(def_t *defs, pat_t *pat)
{
for (pat_t *p = pat; p; ) {
switch (p->type) {
- case BP_BEFORE:
- p = p->args.pat; break;
- case BP_REPEAT:
- if (p->args.repetitions.min == 0)
- return p;
- p = p->args.repetitions.repeat_pat; break;
- case BP_CAPTURE:
- p = p->args.capture.capture_pat; break;
- case BP_CHAIN: case BP_MATCH: case BP_NOT_MATCH:
- p = p->args.multiple.first; break;
- case BP_REPLACE:
- p = p->args.replace.pat; break;
- case BP_REF: {
- pat_t *p2 = deref(defs, p);
- if (p2 == p) return p2;
- p = p2;
- break;
- }
- default: return p;
+ case BP_BEFORE:
+ p = p->args.pat; break;
+ case BP_REPEAT:
+ if (p->args.repetitions.min == 0)
+ return p;
+ p = p->args.repetitions.repeat_pat; break;
+ case BP_CAPTURE:
+ p = p->args.capture.capture_pat; break;
+ case BP_CHAIN: case BP_MATCH: case BP_NOT_MATCH:
+ p = p->args.multiple.first; break;
+ case BP_REPLACE:
+ p = p->args.replace.pat; break;
+ case BP_REF: {
+ pat_t *p2 = deref(defs, p);
+ if (p2 == p) return p2;
+ p = p2;
+ break;
+ }
+ default: return p;
}
}
return pat;
@@ -187,376 +187,376 @@ match_t *next_match(def_t *defs, file_t *f, match_t *prev, pat_t *pat, pat_t *sk
static match_t *match(def_t *defs, file_t *f, const char *str, pat_t *pat, bool ignorecase)
{
switch (pat->type) {
- case BP_DEFINITION: {
- def_t *defs2 = with_def(defs, pat->args.def.namelen, pat->args.def.name, pat->args.def.def);
- match_t *m = match(defs2, f, str, pat->args.def.pat ? pat->args.def.pat : pat->args.def.def, ignorecase);
- defs = free_defs(defs2, defs);
- return m;
- }
- case BP_LEFTRECURSION: {
- // Left recursion occurs when a pattern directly or indirectly
- // invokes itself at the same position in the text. It's handled as
- // a special case, but if a pattern invokes itself at a later
- // point, it can be handled with normal recursion.
- // See: left-recursion.md for more details.
- if (str == pat->args.leftrec.at) {
- ++pat->args.leftrec.visits;
- return pat->args.leftrec.match;
- } else {
- return match(defs, f, str, pat->args.leftrec.fallback, ignorecase);
- }
- }
- case BP_ANYCHAR: {
- return (str < f->end && *str != '\n') ? new_match(defs, pat, str, next_char(f, str), NULL) : NULL;
- }
- case BP_ID_START: {
- return (str < f->end && isidstart(f, str)) ? new_match(defs, pat, str, next_char(f, str), NULL) : NULL;
- }
- case BP_ID_CONTINUE: {
- return (str < f->end && isidcontinue(f, str)) ? new_match(defs, pat, str, next_char(f, str), NULL) : NULL;
- }
- case BP_START_OF_FILE: {
- return (str == f->start) ? new_match(defs, pat, str, str, NULL) : NULL;
- }
- case BP_START_OF_LINE: {
- return (str == f->start || str[-1] == '\n') ? new_match(defs, pat, str, str, NULL) : NULL;
- }
- case BP_END_OF_FILE: {
- return (str == f->end) ? new_match(defs, pat, str, str, NULL) : NULL;
- }
- case BP_END_OF_LINE: {
- return (str == f->end || *str == '\n') ? new_match(defs, pat, str, str, NULL) : NULL;
- }
- case BP_WORD_BOUNDARY: {
- return (str == f->start || isidcontinue(f, str) != isidcontinue(f, prev_char(f, str))) ? new_match(defs, pat, str, str, NULL) : NULL;
- }
- case BP_STRING: {
- if (&str[pat->min_matchlen] > f->end) return NULL;
- if (pat->min_matchlen > 0 && (ignorecase ? memicmp : memcmp)(str, pat->args.string, pat->min_matchlen) != 0)
- return NULL;
- return new_match(defs, pat, str, str + pat->min_matchlen, NULL);
+ case BP_DEFINITION: {
+ def_t *defs2 = with_def(defs, pat->args.def.namelen, pat->args.def.name, pat->args.def.def);
+ match_t *m = match(defs2, f, str, pat->args.def.pat ? pat->args.def.pat : pat->args.def.def, ignorecase);
+ defs = free_defs(defs2, defs);
+ return m;
+ }
+ case BP_LEFTRECURSION: {
+ // Left recursion occurs when a pattern directly or indirectly
+ // invokes itself at the same position in the text. It's handled as
+ // a special case, but if a pattern invokes itself at a later
+ // point, it can be handled with normal recursion.
+ // See: left-recursion.md for more details.
+ if (str == pat->args.leftrec.at) {
+ ++pat->args.leftrec.visits;
+ return pat->args.leftrec.match;
+ } else {
+ return match(defs, f, str, pat->args.leftrec.fallback, ignorecase);
}
- case BP_RANGE: {
- if (str >= f->end) return NULL;
- if ((unsigned char)*str < pat->args.range.low || (unsigned char)*str > pat->args.range.high)
- return NULL;
- return new_match(defs, pat, str, str+1, NULL);
+ }
+ case BP_ANYCHAR: {
+ return (str < f->end && *str != '\n') ? new_match(defs, pat, str, next_char(f, str), NULL) : NULL;
+ }
+ case BP_ID_START: {
+ return (str < f->end && isidstart(f, str)) ? new_match(defs, pat, str, next_char(f, str), NULL) : NULL;
+ }
+ case BP_ID_CONTINUE: {
+ return (str < f->end && isidcontinue(f, str)) ? new_match(defs, pat, str, next_char(f, str), NULL) : NULL;
+ }
+ case BP_START_OF_FILE: {
+ return (str == f->start) ? new_match(defs, pat, str, str, NULL) : NULL;
+ }
+ case BP_START_OF_LINE: {
+ return (str == f->start || str[-1] == '\n') ? new_match(defs, pat, str, str, NULL) : NULL;
+ }
+ case BP_END_OF_FILE: {
+ return (str == f->end || (str == f->end-1 && *str == '\n')) ? new_match(defs, pat, str, str, NULL) : NULL;
+ }
+ case BP_END_OF_LINE: {
+ return (str == f->end || *str == '\n') ? new_match(defs, pat, str, str, NULL) : NULL;
+ }
+ case BP_WORD_BOUNDARY: {
+ return (str == f->start || isidcontinue(f, str) != isidcontinue(f, prev_char(f, str))) ? new_match(defs, pat, str, str, NULL) : NULL;
+ }
+ case BP_STRING: {
+ if (&str[pat->min_matchlen] > f->end) return NULL;
+ if (pat->min_matchlen > 0 && (ignorecase ? memicmp : memcmp)(str, pat->args.string, pat->min_matchlen) != 0)
+ return NULL;
+ return new_match(defs, pat, str, str + pat->min_matchlen, NULL);
+ }
+ case BP_RANGE: {
+ if (str >= f->end) return NULL;
+ if ((unsigned char)*str < pat->args.range.low || (unsigned char)*str > pat->args.range.high)
+ return NULL;
+ return new_match(defs, pat, str, str+1, NULL);
+ }
+ case BP_NOT: {
+ match_t *m = match(defs, f, str, pat->args.pat, ignorecase);
+ if (m != NULL) {
+ recycle_if_unused(&m);
+ return NULL;
}
- case BP_NOT: {
- match_t *m = match(defs, f, str, pat->args.pat, ignorecase);
- if (m != NULL) {
- recycle_if_unused(&m);
- return NULL;
- }
- return new_match(defs, pat, str, str, NULL);
+ return new_match(defs, pat, str, str, NULL);
+ }
+ case BP_UPTO: case BP_UPTO_STRICT: {
+ match_t *m = new_match(defs, pat, str, str, NULL);
+ pat_t *target = deref(defs, pat->args.multiple.first),
+ *skip = deref(defs, pat->args.multiple.second);
+ if (!target && !skip) {
+ while (str < f->end && *str != '\n') ++str;
+ m->end = str;
+ return m;
}
- case BP_UPTO: case BP_UPTO_STRICT: {
- match_t *m = new_match(defs, pat, str, str, NULL);
- pat_t *target = deref(defs, pat->args.multiple.first),
- *skip = deref(defs, pat->args.multiple.second);
- if (!target && !skip) {
- while (str < f->end && *str != '\n') ++str;
- m->end = str;
- return m;
- }
- size_t child_cap = 0, nchildren = 0;
- for (const char *prev = NULL; prev < str; ) {
- prev = str;
- if (target) {
- match_t *p = match(defs, f, str, target, ignorecase);
- if (p != NULL) {
- recycle_if_unused(&p);
- m->end = str;
- return m;
- }
- } else if (str == f->end) {
+ size_t child_cap = 0, nchildren = 0;
+ for (const char *prev = NULL; prev < str; ) {
+ prev = str;
+ if (target) {
+ match_t *p = match(defs, f, str, target, ignorecase);
+ if (p != NULL) {
+ recycle_if_unused(&p);
m->end = str;
return m;
}
- if (skip) {
- match_t *s = match(defs, f, str, skip, ignorecase);
- if (s != NULL) {
- str = s->end;
- if (nchildren+2 >= child_cap) {
- m->children = grow(m->children, child_cap += 5);
- for (size_t i = nchildren; i < child_cap; i++) m->children[i] = NULL;
- }
- add_owner(&m->children[nchildren++], s);
- continue;
- }
- }
- // This isn't in the for() structure because there needs to
- // be at least once chance to match the pattern, even if
- // we're at the end of the string already (e.g. "..$").
- if (str < f->end && *str != '\n' && pat->type != BP_UPTO_STRICT)
- str = next_char(f, str);
+ } else if (str == f->end) {
+ m->end = str;
+ return m;
}
- recycle_if_unused(&m);
- return NULL;
- }
- case BP_REPEAT: {
- match_t *m = new_match(defs, pat, str, str, NULL);
- size_t reps = 0;
- ssize_t max = pat->args.repetitions.max;
- pat_t *repeating = deref(defs, pat->args.repetitions.repeat_pat);
- pat_t *sep = deref(defs, pat->args.repetitions.sep);
- size_t child_cap = 0, nchildren = 0;
- for (reps = 0; max == -1 || reps < (size_t)max; ++reps) {
- const char *start = str;
- // Separator
- match_t *msep = NULL;
- if (sep != NULL && reps > 0) {
- msep = match(defs, f, str, sep, ignorecase);
- if (msep == NULL) break;
- str = msep->end;
- }
- match_t *mp = match(defs, f, str, repeating, ignorecase);
- if (mp == NULL) {
- str = start;
- if (msep) recycle_if_unused(&msep);
- break;
- }
- if (mp->end == start && reps > 0) {
- // Since no forward progress was made on either `repeating`
- // or `sep` and BP does not have mutable state, it's
- // guaranteed that no progress will be made on the next
- // loop either. We know that this will continue to loop
- // until reps==max, so let's just cut to the chase instead
- // of looping infinitely.
- if (msep) recycle_if_unused(&msep);
- recycle_if_unused(&mp);
- if (pat->args.repetitions.max == -1)
- reps = ~(size_t)0;
- else
- reps = (size_t)pat->args.repetitions.max;
- break;
- }
- if (msep) {
+ if (skip) {
+ match_t *s = match(defs, f, str, skip, ignorecase);
+ if (s != NULL) {
+ str = s->end;
if (nchildren+2 >= child_cap) {
m->children = grow(m->children, child_cap += 5);
for (size_t i = nchildren; i < child_cap; i++) m->children[i] = NULL;
}
- add_owner(&m->children[nchildren++], msep);
+ add_owner(&m->children[nchildren++], s);
+ continue;
}
-
+ }
+ // This isn't in the for() structure because there needs to
+ // be at least once chance to match the pattern, even if
+ // we're at the end of the string already (e.g. "..$").
+ if (str < f->end && *str != '\n' && pat->type != BP_UPTO_STRICT)
+ str = next_char(f, str);
+ }
+ recycle_if_unused(&m);
+ return NULL;
+ }
+ case BP_REPEAT: {
+ match_t *m = new_match(defs, pat, str, str, NULL);
+ size_t reps = 0;
+ ssize_t max = pat->args.repetitions.max;
+ pat_t *repeating = deref(defs, pat->args.repetitions.repeat_pat);
+ pat_t *sep = deref(defs, pat->args.repetitions.sep);
+ size_t child_cap = 0, nchildren = 0;
+ for (reps = 0; max == -1 || reps < (size_t)max; ++reps) {
+ const char *start = str;
+ // Separator
+ match_t *msep = NULL;
+ if (sep != NULL && reps > 0) {
+ msep = match(defs, f, str, sep, ignorecase);
+ if (msep == NULL) break;
+ str = msep->end;
+ }
+ match_t *mp = match(defs, f, str, repeating, ignorecase);
+ if (mp == NULL) {
+ str = start;
+ if (msep) recycle_if_unused(&msep);
+ break;
+ }
+ if (mp->end == start && reps > 0) {
+ // Since no forward progress was made on either `repeating`
+ // or `sep` and BP does not have mutable state, it's
+ // guaranteed that no progress will be made on the next
+ // loop either. We know that this will continue to loop
+ // until reps==max, so let's just cut to the chase instead
+ // of looping infinitely.
+ if (msep) recycle_if_unused(&msep);
+ recycle_if_unused(&mp);
+ if (pat->args.repetitions.max == -1)
+ reps = ~(size_t)0;
+ else
+ reps = (size_t)pat->args.repetitions.max;
+ break;
+ }
+ if (msep) {
if (nchildren+2 >= child_cap) {
m->children = grow(m->children, child_cap += 5);
for (size_t i = nchildren; i < child_cap; i++) m->children[i] = NULL;
}
- add_owner(&m->children[nchildren++], mp);
- str = mp->end;
+ add_owner(&m->children[nchildren++], msep);
}
- if (reps < (size_t)pat->args.repetitions.min) {
- recycle_if_unused(&m);
- return NULL;
+ if (nchildren+2 >= child_cap) {
+ m->children = grow(m->children, child_cap += 5);
+ for (size_t i = nchildren; i < child_cap; i++) m->children[i] = NULL;
}
- m->end = str;
- return m;
+ add_owner(&m->children[nchildren++], mp);
+ str = mp->end;
}
- case BP_AFTER: {
- pat_t *back = deref(defs, pat->args.pat);
- if (!back) return NULL;
-
- // We only care about the region from the backtrack pos up to the
- // current pos, so mock it out as a file slice.
- // TODO: this breaks ^/^^/$/$$, but that can probably be ignored
- // because you rarely need to check those in a backtrack.
- file_t slice;
- slice_file(&slice, f, f->start, str);
- for (const char *pos = &str[-(long)back->min_matchlen];
- pos >= f->start && (back->max_matchlen == -1 || pos >= &str[-(int)back->max_matchlen]);
- pos = prev_char(f, pos)) {
- cache_destroy(&slice);
- slice.start = (char*)pos;
- match_t *m = match(defs, &slice, pos, back, ignorecase);
- // Match should not go past str (i.e. (<"AB" "B") should match "ABB", but not "AB")
- if (m && m->end != str)
- recycle_if_unused(&m);
- else if (m) {
- cache_destroy(&slice);
- return new_match(defs, pat, str, str, MATCHES(m));
- }
- if (pos == f->start) break;
- // To prevent extreme performance degradation, don't keep
- // walking backwards endlessly over newlines.
- if (back->max_matchlen == -1 && *pos == '\n') break;
- }
- cache_destroy(&slice);
+
+ if (reps < (size_t)pat->args.repetitions.min) {
+ recycle_if_unused(&m);
return NULL;
}
- case BP_BEFORE: {
- match_t *after = match(defs, f, str, pat->args.pat, ignorecase);
- return after ? new_match(defs, pat, str, str, MATCHES(after)) : NULL;
- }
- case BP_CAPTURE: {
- match_t *p = match(defs, f, str, pat->args.pat, ignorecase);
- return p ? new_match(defs, pat, str, p->end, MATCHES(p)) : NULL;
- }
- case BP_OTHERWISE: {
- match_t *m = match(defs, f, str, pat->args.multiple.first, ignorecase);
- return m ? m : match(defs, f, str, pat->args.multiple.second, ignorecase);
+ m->end = str;
+ return m;
+ }
+ case BP_AFTER: {
+ pat_t *back = deref(defs, pat->args.pat);
+ if (!back) return NULL;
+
+ // We only care about the region from the backtrack pos up to the
+ // current pos, so mock it out as a file slice.
+ // TODO: this breaks ^/^^/$/$$, but that can probably be ignored
+ // because you rarely need to check those in a backtrack.
+ file_t slice;
+ slice_file(&slice, f, f->start, str);
+ for (const char *pos = &str[-(long)back->min_matchlen];
+ pos >= f->start && (back->max_matchlen == -1 || pos >= &str[-(int)back->max_matchlen]);
+ pos = prev_char(f, pos)) {
+ cache_destroy(&slice);
+ slice.start = (char*)pos;
+ match_t *m = match(defs, &slice, pos, back, ignorecase);
+ // Match should not go past str (i.e. (<"AB" "B") should match "ABB", but not "AB")
+ if (m && m->end != str)
+ recycle_if_unused(&m);
+ else if (m) {
+ cache_destroy(&slice);
+ return new_match(defs, pat, str, str, MATCHES(m));
+ }
+ if (pos == f->start) break;
+ // To prevent extreme performance degradation, don't keep
+ // walking backwards endlessly over newlines.
+ if (back->max_matchlen == -1 && *pos == '\n') break;
}
- case BP_CHAIN: {
- match_t *m1 = match(defs, f, str, pat->args.multiple.first, ignorecase);
- if (m1 == NULL) return NULL;
-
- match_t *m2;
- // Push backrefs and run matching, then cleanup
- if (m1->pat->type == BP_CAPTURE && m1->pat->args.capture.name) {
- // Temporarily add a rule that the backref name matches the
- // exact string of the original match (no replacements)
- size_t len = (size_t)(m1->end - m1->start);
- pat_t *backref = new_pat(f, m1->start, m1->end, len, (ssize_t)len, BP_STRING);
- backref->args.string = m1->start;
-
- def_t *defs2 = with_def(defs, m1->pat->args.capture.namelen, m1->pat->args.capture.name, backref);
- ++m1->refcount; {
- m2 = match(defs2, f, m1->end, pat->args.multiple.second, ignorecase);
- if (!m2) { // No need to keep the backref in memory if it didn't match
- for (pat_t **rem = &f->pats; *rem; rem = &(*rem)->next) {
- if ((*rem) == backref) {
- pat_t *tmp = *rem;
- *rem = (*rem)->next;
- free(tmp);
- break;
- }
+ cache_destroy(&slice);
+ return NULL;
+ }
+ case BP_BEFORE: {
+ match_t *after = match(defs, f, str, pat->args.pat, ignorecase);
+ return after ? new_match(defs, pat, str, str, MATCHES(after)) : NULL;
+ }
+ case BP_CAPTURE: {
+ match_t *p = match(defs, f, str, pat->args.pat, ignorecase);
+ return p ? new_match(defs, pat, str, p->end, MATCHES(p)) : NULL;
+ }
+ case BP_OTHERWISE: {
+ match_t *m = match(defs, f, str, pat->args.multiple.first, ignorecase);
+ return m ? m : match(defs, f, str, pat->args.multiple.second, ignorecase);
+ }
+ case BP_CHAIN: {
+ match_t *m1 = match(defs, f, str, pat->args.multiple.first, ignorecase);
+ if (m1 == NULL) return NULL;
+
+ match_t *m2;
+ // Push backrefs and run matching, then cleanup
+ if (m1->pat->type == BP_CAPTURE && m1->pat->args.capture.name) {
+ // Temporarily add a rule that the backref name matches the
+ // exact string of the original match (no replacements)
+ size_t len = (size_t)(m1->end - m1->start);
+ pat_t *backref = new_pat(f, m1->start, m1->end, len, (ssize_t)len, BP_STRING);
+ backref->args.string = m1->start;
+
+ def_t *defs2 = with_def(defs, m1->pat->args.capture.namelen, m1->pat->args.capture.name, backref);
+ ++m1->refcount; {
+ m2 = match(defs2, f, m1->end, pat->args.multiple.second, ignorecase);
+ if (!m2) { // No need to keep the backref in memory if it didn't match
+ for (pat_t **rem = &f->pats; *rem; rem = &(*rem)->next) {
+ if ((*rem) == backref) {
+ pat_t *tmp = *rem;
+ *rem = (*rem)->next;
+ free(tmp);
+ break;
}
}
- defs = free_defs(defs2, defs);
- } --m1->refcount;
- } else {
- m2 = match(defs, f, m1->end, pat->args.multiple.second, ignorecase);
- }
-
- if (m2 == NULL) {
- recycle_if_unused(&m1);
- return NULL;
- }
+ }
+ defs = free_defs(defs2, defs);
+ } --m1->refcount;
+ } else {
+ m2 = match(defs, f, m1->end, pat->args.multiple.second, ignorecase);
+ }
- return new_match(defs, pat, str, m2->end, MATCHES(m1, m2));
+ if (m2 == NULL) {
+ recycle_if_unused(&m1);
+ return NULL;
}
- case BP_MATCH: case BP_NOT_MATCH: {
- match_t *m1 = match(defs, f, str, pat->args.multiple.first, ignorecase);
- if (m1 == NULL) return NULL;
-
- // <p1>~<p2> matches iff the text of <p1> matches <p2>
- // <p1>!~<p2> matches iff the text of <p1> does not match <p2>
- file_t slice;
- slice_file(&slice, f, m1->start, m1->end);
- match_t *m2 = next_match(defs, &slice, NULL, pat->args.multiple.second, NULL, ignorecase);
- if ((!m2 && pat->type == BP_MATCH) || (m2 && pat->type == BP_NOT_MATCH)) {
- if (m2) recycle_if_unused(&m2);
- cache_destroy(&slice);
- recycle_if_unused(&m1);
- return NULL;
- }
+
+ return new_match(defs, pat, str, m2->end, MATCHES(m1, m2));
+ }
+ case BP_MATCH: case BP_NOT_MATCH: {
+ match_t *m1 = match(defs, f, str, pat->args.multiple.first, ignorecase);
+ if (m1 == NULL) return NULL;
+
+ // <p1>~<p2> matches iff the text of <p1> matches <p2>
+ // <p1>!~<p2> matches iff the text of <p1> does not match <p2>
+ file_t slice;
+ slice_file(&slice, f, m1->start, m1->end);
+ match_t *m2 = next_match(defs, &slice, NULL, pat->args.multiple.second, NULL, ignorecase);
+ if ((!m2 && pat->type == BP_MATCH) || (m2 && pat->type == BP_NOT_MATCH)) {
+ if (m2) recycle_if_unused(&m2);
cache_destroy(&slice);
- return new_match(defs, pat, m1->start, m1->end, (pat->type == BP_MATCH) ? MATCHES(m1, m2) : NULL);
+ recycle_if_unused(&m1);
+ return NULL;
}
- case BP_REPLACE: {
- match_t *p = NULL;
- if (pat->args.replace.pat) {
- p = match(defs, f, str, pat->args.replace.pat, ignorecase);
- if (p == NULL) return NULL;
- }
- return new_match(defs, pat, str, p ? p->end : str, MATCHES(p));
+ cache_destroy(&slice);
+ return new_match(defs, pat, m1->start, m1->end, (pat->type == BP_MATCH) ? MATCHES(m1, m2) : NULL);
+ }
+ case BP_REPLACE: {
+ match_t *p = NULL;
+ if (pat->args.replace.pat) {
+ p = match(defs, f, str, pat->args.replace.pat, ignorecase);
+ if (p == NULL) return NULL;
}
- case BP_REF: {
- match_t *cached;
- if (cache_get(f, defs, str, pat, &cached))
- return cached;
-
- def_t *def = lookup(defs, pat->args.ref.len, pat->args.ref.name);
- if (def == NULL)
- errx(EXIT_FAILURE, "Unknown identifier: '%.*s'", (int)pat->args.ref.len, pat->args.ref.name);
- pat_t *ref = def->pat;
-
- pat_t rec_op = {
- .type = BP_LEFTRECURSION,
- .start = ref->start,
- .end = ref->end,
- .min_matchlen = 0,
- .max_matchlen = -1,
- .args.leftrec = {
- .match = NULL,
- .visits = 0,
- .at = str,
- .fallback = ref,
- },
- };
- def_t defs2 = {
- .namelen = def->namelen,
- .name = def->name,
- .pat = &rec_op,
- .next = defs,
- };
-
- const char *prev = str;
- match_t *m = match(&defs2, f, str, ref, ignorecase);
- if (m == NULL) {
- cache_save(f, defs, str, pat, NULL);
- return NULL;
- }
+ return new_match(defs, pat, str, p ? p->end : str, MATCHES(p));
+ }
+ case BP_REF: {
+ match_t *cached;
+ if (cache_get(f, defs, str, pat, &cached))
+ return cached;
- while (rec_op.args.leftrec.visits > 0) {
- rec_op.args.leftrec.visits = 0;
- remove_ownership(&rec_op.args.leftrec.match);
- add_owner(&rec_op.args.leftrec.match, m);
- prev = m->end;
- match_t *m2 = match(&defs2, f, str, ref, ignorecase);
- if (m2 == NULL) break;
- if (m2->end <= prev) {
- recycle_if_unused(&m2);
- break;
- }
- m = m2;
+ def_t *def = lookup(defs, pat->args.ref.len, pat->args.ref.name);
+ if (def == NULL)
+ errx(EXIT_FAILURE, "Unknown identifier: '%.*s'", (int)pat->args.ref.len, pat->args.ref.name);
+ pat_t *ref = def->pat;
+
+ pat_t rec_op = {
+ .type = BP_LEFTRECURSION,
+ .start = ref->start,
+ .end = ref->end,
+ .min_matchlen = 0,
+ .max_matchlen = -1,
+ .args.leftrec = {
+ .match = NULL,
+ .visits = 0,
+ .at = str,
+ .fallback = ref,
+ },
+ };
+ def_t defs2 = {
+ .namelen = def->namelen,
+ .name = def->name,
+ .pat = &rec_op,
+ .next = defs,
+ };
+
+ const char *prev = str;
+ match_t *m = match(&defs2, f, str, ref, ignorecase);
+ if (m == NULL) {
+ cache_save(f, defs, str, pat, NULL);
+ return NULL;
+ }
+
+ while (rec_op.args.leftrec.visits > 0) {
+ rec_op.args.leftrec.visits = 0;
+ remove_ownership(&rec_op.args.leftrec.match);
+ add_owner(&rec_op.args.leftrec.match, m);
+ prev = m->end;
+ match_t *m2 = match(&defs2, f, str, ref, ignorecase);
+ if (m2 == NULL) break;
+ if (m2->end <= prev) {
+ recycle_if_unused(&m2);
+ break;
}
+ m = m2;
+ }
- // This match wrapper mainly exists for record-keeping purposes.
- // However, it also keeps `m` from getting garbage collected with
- // leftrec.match is GC'd. It also helps with visualization of match
- // results.
- // OPTIMIZE: remove this if necessary
- match_t *wrap = new_match(defs, pat, m->start, m->end, MATCHES(m));
- cache_save(f, defs, str, pat, wrap);
+ // This match wrapper mainly exists for record-keeping purposes.
+ // However, it also keeps `m` from getting garbage collected with
+ // leftrec.match is GC'd. It also helps with visualization of match
+ // results.
+ // OPTIMIZE: remove this if necessary
+ match_t *wrap = new_match(defs, pat, m->start, m->end, MATCHES(m));
+ cache_save(f, defs, str, pat, wrap);
- if (rec_op.args.leftrec.match)
- remove_ownership(&rec_op.args.leftrec.match);
+ if (rec_op.args.leftrec.match)
+ remove_ownership(&rec_op.args.leftrec.match);
- return wrap;
+ return wrap;
+ }
+ case BP_NODENT: {
+ if (*str != '\n') return NULL;
+ const char *start = str;
+
+ size_t linenum = get_line_number(f, str);
+ const char *p = get_line(f, linenum);
+ if (p < f->start) p = f->start; // Can happen with recursive matching
+
+ // Current indentation:
+ char denter = *p;
+ int dents = 0;
+ if (denter == ' ' || denter == '\t') {
+ for (; *p == denter && p < f->end; ++p) ++dents;
}
- case BP_NODENT: {
- if (*str != '\n') return NULL;
- const char *start = str;
-
- size_t linenum = get_line_number(f, str);
- const char *p = get_line(f, linenum);
- if (p < f->start) p = f->start; // Can happen with recursive matching
-
- // Current indentation:
- char denter = *p;
- int dents = 0;
- if (denter == ' ' || denter == '\t') {
- for (; *p == denter && p < f->end; ++p) ++dents;
- }
- // Subsequent indentation:
- while (*str == '\n' || *str == '\n') ++str;
- for (int i = 0; i < dents; i++)
- if (&str[i] >= f->end || str[i] != denter) return NULL;
+ // Subsequent indentation:
+ while (*str == '\n' || *str == '\n') ++str;
+ for (int i = 0; i < dents; i++)
+ if (&str[i] >= f->end || str[i] != denter) return NULL;
- return new_match(defs, pat, start, &str[dents], NULL);
- }
- case BP_ERROR: {
- match_t *p = pat->args.pat ? match(defs, f, str, pat->args.pat, ignorecase) : NULL;
- return p ? new_match(defs, pat, str, p->end, MATCHES(p)) : NULL;
- }
- default: {
- errx(EXIT_FAILURE, "Unknown pattern type: %u", pat->type);
- return NULL;
- }
+ return new_match(defs, pat, start, &str[dents], NULL);
+ }
+ case BP_ERROR: {
+ match_t *p = pat->args.pat ? match(defs, f, str, pat->args.pat, ignorecase) : NULL;
+ return p ? new_match(defs, pat, str, p->end, MATCHES(p)) : NULL;
+ }
+ default: {
+ errx(EXIT_FAILURE, "Unknown pattern type: %u", pat->type);
+ return NULL;
+ }
}
}
@@ -703,4 +703,4 @@ size_t free_all_matches(void)
return count;
}
-// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1
+// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0