diff options
Diffstat (limited to 'match.c')
| -rw-r--r-- | match.c | 702 |
1 files changed, 351 insertions, 351 deletions
@@ -106,25 +106,25 @@ static pat_t *first_pat(def_t *defs, pat_t *pat) { for (pat_t *p = pat; p; ) { switch (p->type) { - case BP_BEFORE: - p = p->args.pat; break; - case BP_REPEAT: - if (p->args.repetitions.min == 0) - return p; - p = p->args.repetitions.repeat_pat; break; - case BP_CAPTURE: - p = p->args.capture.capture_pat; break; - case BP_CHAIN: case BP_MATCH: case BP_NOT_MATCH: - p = p->args.multiple.first; break; - case BP_REPLACE: - p = p->args.replace.pat; break; - case BP_REF: { - pat_t *p2 = deref(defs, p); - if (p2 == p) return p2; - p = p2; - break; - } - default: return p; + case BP_BEFORE: + p = p->args.pat; break; + case BP_REPEAT: + if (p->args.repetitions.min == 0) + return p; + p = p->args.repetitions.repeat_pat; break; + case BP_CAPTURE: + p = p->args.capture.capture_pat; break; + case BP_CHAIN: case BP_MATCH: case BP_NOT_MATCH: + p = p->args.multiple.first; break; + case BP_REPLACE: + p = p->args.replace.pat; break; + case BP_REF: { + pat_t *p2 = deref(defs, p); + if (p2 == p) return p2; + p = p2; + break; + } + default: return p; } } return pat; @@ -187,376 +187,376 @@ match_t *next_match(def_t *defs, file_t *f, match_t *prev, pat_t *pat, pat_t *sk static match_t *match(def_t *defs, file_t *f, const char *str, pat_t *pat, bool ignorecase) { switch (pat->type) { - case BP_DEFINITION: { - def_t *defs2 = with_def(defs, pat->args.def.namelen, pat->args.def.name, pat->args.def.def); - match_t *m = match(defs2, f, str, pat->args.def.pat ? pat->args.def.pat : pat->args.def.def, ignorecase); - defs = free_defs(defs2, defs); - return m; - } - case BP_LEFTRECURSION: { - // Left recursion occurs when a pattern directly or indirectly - // invokes itself at the same position in the text. It's handled as - // a special case, but if a pattern invokes itself at a later - // point, it can be handled with normal recursion. - // See: left-recursion.md for more details. - if (str == pat->args.leftrec.at) { - ++pat->args.leftrec.visits; - return pat->args.leftrec.match; - } else { - return match(defs, f, str, pat->args.leftrec.fallback, ignorecase); - } - } - case BP_ANYCHAR: { - return (str < f->end && *str != '\n') ? new_match(defs, pat, str, next_char(f, str), NULL) : NULL; - } - case BP_ID_START: { - return (str < f->end && isidstart(f, str)) ? new_match(defs, pat, str, next_char(f, str), NULL) : NULL; - } - case BP_ID_CONTINUE: { - return (str < f->end && isidcontinue(f, str)) ? new_match(defs, pat, str, next_char(f, str), NULL) : NULL; - } - case BP_START_OF_FILE: { - return (str == f->start) ? new_match(defs, pat, str, str, NULL) : NULL; - } - case BP_START_OF_LINE: { - return (str == f->start || str[-1] == '\n') ? new_match(defs, pat, str, str, NULL) : NULL; - } - case BP_END_OF_FILE: { - return (str == f->end) ? new_match(defs, pat, str, str, NULL) : NULL; - } - case BP_END_OF_LINE: { - return (str == f->end || *str == '\n') ? new_match(defs, pat, str, str, NULL) : NULL; - } - case BP_WORD_BOUNDARY: { - return (str == f->start || isidcontinue(f, str) != isidcontinue(f, prev_char(f, str))) ? new_match(defs, pat, str, str, NULL) : NULL; - } - case BP_STRING: { - if (&str[pat->min_matchlen] > f->end) return NULL; - if (pat->min_matchlen > 0 && (ignorecase ? memicmp : memcmp)(str, pat->args.string, pat->min_matchlen) != 0) - return NULL; - return new_match(defs, pat, str, str + pat->min_matchlen, NULL); + case BP_DEFINITION: { + def_t *defs2 = with_def(defs, pat->args.def.namelen, pat->args.def.name, pat->args.def.def); + match_t *m = match(defs2, f, str, pat->args.def.pat ? pat->args.def.pat : pat->args.def.def, ignorecase); + defs = free_defs(defs2, defs); + return m; + } + case BP_LEFTRECURSION: { + // Left recursion occurs when a pattern directly or indirectly + // invokes itself at the same position in the text. It's handled as + // a special case, but if a pattern invokes itself at a later + // point, it can be handled with normal recursion. + // See: left-recursion.md for more details. + if (str == pat->args.leftrec.at) { + ++pat->args.leftrec.visits; + return pat->args.leftrec.match; + } else { + return match(defs, f, str, pat->args.leftrec.fallback, ignorecase); } - case BP_RANGE: { - if (str >= f->end) return NULL; - if ((unsigned char)*str < pat->args.range.low || (unsigned char)*str > pat->args.range.high) - return NULL; - return new_match(defs, pat, str, str+1, NULL); + } + case BP_ANYCHAR: { + return (str < f->end && *str != '\n') ? new_match(defs, pat, str, next_char(f, str), NULL) : NULL; + } + case BP_ID_START: { + return (str < f->end && isidstart(f, str)) ? new_match(defs, pat, str, next_char(f, str), NULL) : NULL; + } + case BP_ID_CONTINUE: { + return (str < f->end && isidcontinue(f, str)) ? new_match(defs, pat, str, next_char(f, str), NULL) : NULL; + } + case BP_START_OF_FILE: { + return (str == f->start) ? new_match(defs, pat, str, str, NULL) : NULL; + } + case BP_START_OF_LINE: { + return (str == f->start || str[-1] == '\n') ? new_match(defs, pat, str, str, NULL) : NULL; + } + case BP_END_OF_FILE: { + return (str == f->end || (str == f->end-1 && *str == '\n')) ? new_match(defs, pat, str, str, NULL) : NULL; + } + case BP_END_OF_LINE: { + return (str == f->end || *str == '\n') ? new_match(defs, pat, str, str, NULL) : NULL; + } + case BP_WORD_BOUNDARY: { + return (str == f->start || isidcontinue(f, str) != isidcontinue(f, prev_char(f, str))) ? new_match(defs, pat, str, str, NULL) : NULL; + } + case BP_STRING: { + if (&str[pat->min_matchlen] > f->end) return NULL; + if (pat->min_matchlen > 0 && (ignorecase ? memicmp : memcmp)(str, pat->args.string, pat->min_matchlen) != 0) + return NULL; + return new_match(defs, pat, str, str + pat->min_matchlen, NULL); + } + case BP_RANGE: { + if (str >= f->end) return NULL; + if ((unsigned char)*str < pat->args.range.low || (unsigned char)*str > pat->args.range.high) + return NULL; + return new_match(defs, pat, str, str+1, NULL); + } + case BP_NOT: { + match_t *m = match(defs, f, str, pat->args.pat, ignorecase); + if (m != NULL) { + recycle_if_unused(&m); + return NULL; } - case BP_NOT: { - match_t *m = match(defs, f, str, pat->args.pat, ignorecase); - if (m != NULL) { - recycle_if_unused(&m); - return NULL; - } - return new_match(defs, pat, str, str, NULL); + return new_match(defs, pat, str, str, NULL); + } + case BP_UPTO: case BP_UPTO_STRICT: { + match_t *m = new_match(defs, pat, str, str, NULL); + pat_t *target = deref(defs, pat->args.multiple.first), + *skip = deref(defs, pat->args.multiple.second); + if (!target && !skip) { + while (str < f->end && *str != '\n') ++str; + m->end = str; + return m; } - case BP_UPTO: case BP_UPTO_STRICT: { - match_t *m = new_match(defs, pat, str, str, NULL); - pat_t *target = deref(defs, pat->args.multiple.first), - *skip = deref(defs, pat->args.multiple.second); - if (!target && !skip) { - while (str < f->end && *str != '\n') ++str; - m->end = str; - return m; - } - size_t child_cap = 0, nchildren = 0; - for (const char *prev = NULL; prev < str; ) { - prev = str; - if (target) { - match_t *p = match(defs, f, str, target, ignorecase); - if (p != NULL) { - recycle_if_unused(&p); - m->end = str; - return m; - } - } else if (str == f->end) { + size_t child_cap = 0, nchildren = 0; + for (const char *prev = NULL; prev < str; ) { + prev = str; + if (target) { + match_t *p = match(defs, f, str, target, ignorecase); + if (p != NULL) { + recycle_if_unused(&p); m->end = str; return m; } - if (skip) { - match_t *s = match(defs, f, str, skip, ignorecase); - if (s != NULL) { - str = s->end; - if (nchildren+2 >= child_cap) { - m->children = grow(m->children, child_cap += 5); - for (size_t i = nchildren; i < child_cap; i++) m->children[i] = NULL; - } - add_owner(&m->children[nchildren++], s); - continue; - } - } - // This isn't in the for() structure because there needs to - // be at least once chance to match the pattern, even if - // we're at the end of the string already (e.g. "..$"). - if (str < f->end && *str != '\n' && pat->type != BP_UPTO_STRICT) - str = next_char(f, str); + } else if (str == f->end) { + m->end = str; + return m; } - recycle_if_unused(&m); - return NULL; - } - case BP_REPEAT: { - match_t *m = new_match(defs, pat, str, str, NULL); - size_t reps = 0; - ssize_t max = pat->args.repetitions.max; - pat_t *repeating = deref(defs, pat->args.repetitions.repeat_pat); - pat_t *sep = deref(defs, pat->args.repetitions.sep); - size_t child_cap = 0, nchildren = 0; - for (reps = 0; max == -1 || reps < (size_t)max; ++reps) { - const char *start = str; - // Separator - match_t *msep = NULL; - if (sep != NULL && reps > 0) { - msep = match(defs, f, str, sep, ignorecase); - if (msep == NULL) break; - str = msep->end; - } - match_t *mp = match(defs, f, str, repeating, ignorecase); - if (mp == NULL) { - str = start; - if (msep) recycle_if_unused(&msep); - break; - } - if (mp->end == start && reps > 0) { - // Since no forward progress was made on either `repeating` - // or `sep` and BP does not have mutable state, it's - // guaranteed that no progress will be made on the next - // loop either. We know that this will continue to loop - // until reps==max, so let's just cut to the chase instead - // of looping infinitely. - if (msep) recycle_if_unused(&msep); - recycle_if_unused(&mp); - if (pat->args.repetitions.max == -1) - reps = ~(size_t)0; - else - reps = (size_t)pat->args.repetitions.max; - break; - } - if (msep) { + if (skip) { + match_t *s = match(defs, f, str, skip, ignorecase); + if (s != NULL) { + str = s->end; if (nchildren+2 >= child_cap) { m->children = grow(m->children, child_cap += 5); for (size_t i = nchildren; i < child_cap; i++) m->children[i] = NULL; } - add_owner(&m->children[nchildren++], msep); + add_owner(&m->children[nchildren++], s); + continue; } - + } + // This isn't in the for() structure because there needs to + // be at least once chance to match the pattern, even if + // we're at the end of the string already (e.g. "..$"). + if (str < f->end && *str != '\n' && pat->type != BP_UPTO_STRICT) + str = next_char(f, str); + } + recycle_if_unused(&m); + return NULL; + } + case BP_REPEAT: { + match_t *m = new_match(defs, pat, str, str, NULL); + size_t reps = 0; + ssize_t max = pat->args.repetitions.max; + pat_t *repeating = deref(defs, pat->args.repetitions.repeat_pat); + pat_t *sep = deref(defs, pat->args.repetitions.sep); + size_t child_cap = 0, nchildren = 0; + for (reps = 0; max == -1 || reps < (size_t)max; ++reps) { + const char *start = str; + // Separator + match_t *msep = NULL; + if (sep != NULL && reps > 0) { + msep = match(defs, f, str, sep, ignorecase); + if (msep == NULL) break; + str = msep->end; + } + match_t *mp = match(defs, f, str, repeating, ignorecase); + if (mp == NULL) { + str = start; + if (msep) recycle_if_unused(&msep); + break; + } + if (mp->end == start && reps > 0) { + // Since no forward progress was made on either `repeating` + // or `sep` and BP does not have mutable state, it's + // guaranteed that no progress will be made on the next + // loop either. We know that this will continue to loop + // until reps==max, so let's just cut to the chase instead + // of looping infinitely. + if (msep) recycle_if_unused(&msep); + recycle_if_unused(&mp); + if (pat->args.repetitions.max == -1) + reps = ~(size_t)0; + else + reps = (size_t)pat->args.repetitions.max; + break; + } + if (msep) { if (nchildren+2 >= child_cap) { m->children = grow(m->children, child_cap += 5); for (size_t i = nchildren; i < child_cap; i++) m->children[i] = NULL; } - add_owner(&m->children[nchildren++], mp); - str = mp->end; + add_owner(&m->children[nchildren++], msep); } - if (reps < (size_t)pat->args.repetitions.min) { - recycle_if_unused(&m); - return NULL; + if (nchildren+2 >= child_cap) { + m->children = grow(m->children, child_cap += 5); + for (size_t i = nchildren; i < child_cap; i++) m->children[i] = NULL; } - m->end = str; - return m; + add_owner(&m->children[nchildren++], mp); + str = mp->end; } - case BP_AFTER: { - pat_t *back = deref(defs, pat->args.pat); - if (!back) return NULL; - - // We only care about the region from the backtrack pos up to the - // current pos, so mock it out as a file slice. - // TODO: this breaks ^/^^/$/$$, but that can probably be ignored - // because you rarely need to check those in a backtrack. - file_t slice; - slice_file(&slice, f, f->start, str); - for (const char *pos = &str[-(long)back->min_matchlen]; - pos >= f->start && (back->max_matchlen == -1 || pos >= &str[-(int)back->max_matchlen]); - pos = prev_char(f, pos)) { - cache_destroy(&slice); - slice.start = (char*)pos; - match_t *m = match(defs, &slice, pos, back, ignorecase); - // Match should not go past str (i.e. (<"AB" "B") should match "ABB", but not "AB") - if (m && m->end != str) - recycle_if_unused(&m); - else if (m) { - cache_destroy(&slice); - return new_match(defs, pat, str, str, MATCHES(m)); - } - if (pos == f->start) break; - // To prevent extreme performance degradation, don't keep - // walking backwards endlessly over newlines. - if (back->max_matchlen == -1 && *pos == '\n') break; - } - cache_destroy(&slice); + + if (reps < (size_t)pat->args.repetitions.min) { + recycle_if_unused(&m); return NULL; } - case BP_BEFORE: { - match_t *after = match(defs, f, str, pat->args.pat, ignorecase); - return after ? new_match(defs, pat, str, str, MATCHES(after)) : NULL; - } - case BP_CAPTURE: { - match_t *p = match(defs, f, str, pat->args.pat, ignorecase); - return p ? new_match(defs, pat, str, p->end, MATCHES(p)) : NULL; - } - case BP_OTHERWISE: { - match_t *m = match(defs, f, str, pat->args.multiple.first, ignorecase); - return m ? m : match(defs, f, str, pat->args.multiple.second, ignorecase); + m->end = str; + return m; + } + case BP_AFTER: { + pat_t *back = deref(defs, pat->args.pat); + if (!back) return NULL; + + // We only care about the region from the backtrack pos up to the + // current pos, so mock it out as a file slice. + // TODO: this breaks ^/^^/$/$$, but that can probably be ignored + // because you rarely need to check those in a backtrack. + file_t slice; + slice_file(&slice, f, f->start, str); + for (const char *pos = &str[-(long)back->min_matchlen]; + pos >= f->start && (back->max_matchlen == -1 || pos >= &str[-(int)back->max_matchlen]); + pos = prev_char(f, pos)) { + cache_destroy(&slice); + slice.start = (char*)pos; + match_t *m = match(defs, &slice, pos, back, ignorecase); + // Match should not go past str (i.e. (<"AB" "B") should match "ABB", but not "AB") + if (m && m->end != str) + recycle_if_unused(&m); + else if (m) { + cache_destroy(&slice); + return new_match(defs, pat, str, str, MATCHES(m)); + } + if (pos == f->start) break; + // To prevent extreme performance degradation, don't keep + // walking backwards endlessly over newlines. + if (back->max_matchlen == -1 && *pos == '\n') break; } - case BP_CHAIN: { - match_t *m1 = match(defs, f, str, pat->args.multiple.first, ignorecase); - if (m1 == NULL) return NULL; - - match_t *m2; - // Push backrefs and run matching, then cleanup - if (m1->pat->type == BP_CAPTURE && m1->pat->args.capture.name) { - // Temporarily add a rule that the backref name matches the - // exact string of the original match (no replacements) - size_t len = (size_t)(m1->end - m1->start); - pat_t *backref = new_pat(f, m1->start, m1->end, len, (ssize_t)len, BP_STRING); - backref->args.string = m1->start; - - def_t *defs2 = with_def(defs, m1->pat->args.capture.namelen, m1->pat->args.capture.name, backref); - ++m1->refcount; { - m2 = match(defs2, f, m1->end, pat->args.multiple.second, ignorecase); - if (!m2) { // No need to keep the backref in memory if it didn't match - for (pat_t **rem = &f->pats; *rem; rem = &(*rem)->next) { - if ((*rem) == backref) { - pat_t *tmp = *rem; - *rem = (*rem)->next; - free(tmp); - break; - } + cache_destroy(&slice); + return NULL; + } + case BP_BEFORE: { + match_t *after = match(defs, f, str, pat->args.pat, ignorecase); + return after ? new_match(defs, pat, str, str, MATCHES(after)) : NULL; + } + case BP_CAPTURE: { + match_t *p = match(defs, f, str, pat->args.pat, ignorecase); + return p ? new_match(defs, pat, str, p->end, MATCHES(p)) : NULL; + } + case BP_OTHERWISE: { + match_t *m = match(defs, f, str, pat->args.multiple.first, ignorecase); + return m ? m : match(defs, f, str, pat->args.multiple.second, ignorecase); + } + case BP_CHAIN: { + match_t *m1 = match(defs, f, str, pat->args.multiple.first, ignorecase); + if (m1 == NULL) return NULL; + + match_t *m2; + // Push backrefs and run matching, then cleanup + if (m1->pat->type == BP_CAPTURE && m1->pat->args.capture.name) { + // Temporarily add a rule that the backref name matches the + // exact string of the original match (no replacements) + size_t len = (size_t)(m1->end - m1->start); + pat_t *backref = new_pat(f, m1->start, m1->end, len, (ssize_t)len, BP_STRING); + backref->args.string = m1->start; + + def_t *defs2 = with_def(defs, m1->pat->args.capture.namelen, m1->pat->args.capture.name, backref); + ++m1->refcount; { + m2 = match(defs2, f, m1->end, pat->args.multiple.second, ignorecase); + if (!m2) { // No need to keep the backref in memory if it didn't match + for (pat_t **rem = &f->pats; *rem; rem = &(*rem)->next) { + if ((*rem) == backref) { + pat_t *tmp = *rem; + *rem = (*rem)->next; + free(tmp); + break; } } - defs = free_defs(defs2, defs); - } --m1->refcount; - } else { - m2 = match(defs, f, m1->end, pat->args.multiple.second, ignorecase); - } - - if (m2 == NULL) { - recycle_if_unused(&m1); - return NULL; - } + } + defs = free_defs(defs2, defs); + } --m1->refcount; + } else { + m2 = match(defs, f, m1->end, pat->args.multiple.second, ignorecase); + } - return new_match(defs, pat, str, m2->end, MATCHES(m1, m2)); + if (m2 == NULL) { + recycle_if_unused(&m1); + return NULL; } - case BP_MATCH: case BP_NOT_MATCH: { - match_t *m1 = match(defs, f, str, pat->args.multiple.first, ignorecase); - if (m1 == NULL) return NULL; - - // <p1>~<p2> matches iff the text of <p1> matches <p2> - // <p1>!~<p2> matches iff the text of <p1> does not match <p2> - file_t slice; - slice_file(&slice, f, m1->start, m1->end); - match_t *m2 = next_match(defs, &slice, NULL, pat->args.multiple.second, NULL, ignorecase); - if ((!m2 && pat->type == BP_MATCH) || (m2 && pat->type == BP_NOT_MATCH)) { - if (m2) recycle_if_unused(&m2); - cache_destroy(&slice); - recycle_if_unused(&m1); - return NULL; - } + + return new_match(defs, pat, str, m2->end, MATCHES(m1, m2)); + } + case BP_MATCH: case BP_NOT_MATCH: { + match_t *m1 = match(defs, f, str, pat->args.multiple.first, ignorecase); + if (m1 == NULL) return NULL; + + // <p1>~<p2> matches iff the text of <p1> matches <p2> + // <p1>!~<p2> matches iff the text of <p1> does not match <p2> + file_t slice; + slice_file(&slice, f, m1->start, m1->end); + match_t *m2 = next_match(defs, &slice, NULL, pat->args.multiple.second, NULL, ignorecase); + if ((!m2 && pat->type == BP_MATCH) || (m2 && pat->type == BP_NOT_MATCH)) { + if (m2) recycle_if_unused(&m2); cache_destroy(&slice); - return new_match(defs, pat, m1->start, m1->end, (pat->type == BP_MATCH) ? MATCHES(m1, m2) : NULL); + recycle_if_unused(&m1); + return NULL; } - case BP_REPLACE: { - match_t *p = NULL; - if (pat->args.replace.pat) { - p = match(defs, f, str, pat->args.replace.pat, ignorecase); - if (p == NULL) return NULL; - } - return new_match(defs, pat, str, p ? p->end : str, MATCHES(p)); + cache_destroy(&slice); + return new_match(defs, pat, m1->start, m1->end, (pat->type == BP_MATCH) ? MATCHES(m1, m2) : NULL); + } + case BP_REPLACE: { + match_t *p = NULL; + if (pat->args.replace.pat) { + p = match(defs, f, str, pat->args.replace.pat, ignorecase); + if (p == NULL) return NULL; } - case BP_REF: { - match_t *cached; - if (cache_get(f, defs, str, pat, &cached)) - return cached; - - def_t *def = lookup(defs, pat->args.ref.len, pat->args.ref.name); - if (def == NULL) - errx(EXIT_FAILURE, "Unknown identifier: '%.*s'", (int)pat->args.ref.len, pat->args.ref.name); - pat_t *ref = def->pat; - - pat_t rec_op = { - .type = BP_LEFTRECURSION, - .start = ref->start, - .end = ref->end, - .min_matchlen = 0, - .max_matchlen = -1, - .args.leftrec = { - .match = NULL, - .visits = 0, - .at = str, - .fallback = ref, - }, - }; - def_t defs2 = { - .namelen = def->namelen, - .name = def->name, - .pat = &rec_op, - .next = defs, - }; - - const char *prev = str; - match_t *m = match(&defs2, f, str, ref, ignorecase); - if (m == NULL) { - cache_save(f, defs, str, pat, NULL); - return NULL; - } + return new_match(defs, pat, str, p ? p->end : str, MATCHES(p)); + } + case BP_REF: { + match_t *cached; + if (cache_get(f, defs, str, pat, &cached)) + return cached; - while (rec_op.args.leftrec.visits > 0) { - rec_op.args.leftrec.visits = 0; - remove_ownership(&rec_op.args.leftrec.match); - add_owner(&rec_op.args.leftrec.match, m); - prev = m->end; - match_t *m2 = match(&defs2, f, str, ref, ignorecase); - if (m2 == NULL) break; - if (m2->end <= prev) { - recycle_if_unused(&m2); - break; - } - m = m2; + def_t *def = lookup(defs, pat->args.ref.len, pat->args.ref.name); + if (def == NULL) + errx(EXIT_FAILURE, "Unknown identifier: '%.*s'", (int)pat->args.ref.len, pat->args.ref.name); + pat_t *ref = def->pat; + + pat_t rec_op = { + .type = BP_LEFTRECURSION, + .start = ref->start, + .end = ref->end, + .min_matchlen = 0, + .max_matchlen = -1, + .args.leftrec = { + .match = NULL, + .visits = 0, + .at = str, + .fallback = ref, + }, + }; + def_t defs2 = { + .namelen = def->namelen, + .name = def->name, + .pat = &rec_op, + .next = defs, + }; + + const char *prev = str; + match_t *m = match(&defs2, f, str, ref, ignorecase); + if (m == NULL) { + cache_save(f, defs, str, pat, NULL); + return NULL; + } + + while (rec_op.args.leftrec.visits > 0) { + rec_op.args.leftrec.visits = 0; + remove_ownership(&rec_op.args.leftrec.match); + add_owner(&rec_op.args.leftrec.match, m); + prev = m->end; + match_t *m2 = match(&defs2, f, str, ref, ignorecase); + if (m2 == NULL) break; + if (m2->end <= prev) { + recycle_if_unused(&m2); + break; } + m = m2; + } - // This match wrapper mainly exists for record-keeping purposes. - // However, it also keeps `m` from getting garbage collected with - // leftrec.match is GC'd. It also helps with visualization of match - // results. - // OPTIMIZE: remove this if necessary - match_t *wrap = new_match(defs, pat, m->start, m->end, MATCHES(m)); - cache_save(f, defs, str, pat, wrap); + // This match wrapper mainly exists for record-keeping purposes. + // However, it also keeps `m` from getting garbage collected with + // leftrec.match is GC'd. It also helps with visualization of match + // results. + // OPTIMIZE: remove this if necessary + match_t *wrap = new_match(defs, pat, m->start, m->end, MATCHES(m)); + cache_save(f, defs, str, pat, wrap); - if (rec_op.args.leftrec.match) - remove_ownership(&rec_op.args.leftrec.match); + if (rec_op.args.leftrec.match) + remove_ownership(&rec_op.args.leftrec.match); - return wrap; + return wrap; + } + case BP_NODENT: { + if (*str != '\n') return NULL; + const char *start = str; + + size_t linenum = get_line_number(f, str); + const char *p = get_line(f, linenum); + if (p < f->start) p = f->start; // Can happen with recursive matching + + // Current indentation: + char denter = *p; + int dents = 0; + if (denter == ' ' || denter == '\t') { + for (; *p == denter && p < f->end; ++p) ++dents; } - case BP_NODENT: { - if (*str != '\n') return NULL; - const char *start = str; - - size_t linenum = get_line_number(f, str); - const char *p = get_line(f, linenum); - if (p < f->start) p = f->start; // Can happen with recursive matching - - // Current indentation: - char denter = *p; - int dents = 0; - if (denter == ' ' || denter == '\t') { - for (; *p == denter && p < f->end; ++p) ++dents; - } - // Subsequent indentation: - while (*str == '\n' || *str == '\n') ++str; - for (int i = 0; i < dents; i++) - if (&str[i] >= f->end || str[i] != denter) return NULL; + // Subsequent indentation: + while (*str == '\n' || *str == '\n') ++str; + for (int i = 0; i < dents; i++) + if (&str[i] >= f->end || str[i] != denter) return NULL; - return new_match(defs, pat, start, &str[dents], NULL); - } - case BP_ERROR: { - match_t *p = pat->args.pat ? match(defs, f, str, pat->args.pat, ignorecase) : NULL; - return p ? new_match(defs, pat, str, p->end, MATCHES(p)) : NULL; - } - default: { - errx(EXIT_FAILURE, "Unknown pattern type: %u", pat->type); - return NULL; - } + return new_match(defs, pat, start, &str[dents], NULL); + } + case BP_ERROR: { + match_t *p = pat->args.pat ? match(defs, f, str, pat->args.pat, ignorecase) : NULL; + return p ? new_match(defs, pat, str, p->end, MATCHES(p)) : NULL; + } + default: { + errx(EXIT_FAILURE, "Unknown pattern type: %u", pat->type); + return NULL; + } } } @@ -703,4 +703,4 @@ size_t free_all_matches(void) return count; } -// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1 +// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0 |
