aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--bp.c2
-rw-r--r--definitions.c2
-rw-r--r--definitions.h2
-rw-r--r--explain.c8
-rw-r--r--explain.h2
-rw-r--r--files.c2
-rw-r--r--files.h2
-rw-r--r--json.c12
-rw-r--r--json.h2
-rw-r--r--match.c702
-rw-r--r--match.h2
-rw-r--r--pattern.c534
-rw-r--r--pattern.h2
-rw-r--r--print.c2
-rw-r--r--print.h2
-rw-r--r--types.h2
-rw-r--r--utf8.c2
-rw-r--r--utf8.h2
-rw-r--r--utils.c83
-rw-r--r--utils.h2
20 files changed, 685 insertions, 684 deletions
diff --git a/bp.c b/bp.c
index b1ae071..3adc5f2 100644
--- a/bp.c
+++ b/bp.c
@@ -562,4 +562,4 @@ int main(int argc, char *argv[])
exit(found > 0 ? EXIT_SUCCESS : EXIT_FAILURE);
}
-// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1
+// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/definitions.c b/definitions.c
index 717e1a5..5309c8b 100644
--- a/definitions.c
+++ b/definitions.c
@@ -68,4 +68,4 @@ def_t *free_defs(def_t *defs, def_t *stop)
return defs;
}
-// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1
+// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/definitions.h b/definitions.h
index 5788ac0..6dd0920 100644
--- a/definitions.h
+++ b/definitions.h
@@ -17,4 +17,4 @@ __attribute__((nonnull(1)))
def_t *free_defs(def_t *defs, def_t *stop);
#endif
-// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1
+// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/explain.c b/explain.c
index 2d54234..79ef8c7 100644
--- a/explain.c
+++ b/explain.c
@@ -65,9 +65,9 @@ static void _explain_matches(match_node_t *firstmatch, int depth, const char *te
for (size_t i = 0; i < viz_typelen; i++) {
switch (viz_type[i]) {
- case '\n': printf("↵"); break;
- case '\t': printf("⇥"); break;
- default: printf("%c", viz_type[i]); break;
+ case '\n': printf("↵"); break;
+ case '\t': printf("⇥"); break;
+ default: printf("%c", viz_type[i]); break;
}
}
@@ -169,3 +169,5 @@ void explain_match(match_t *m)
_explain_matches(&first, 0, m->start, (size_t)(m->end - m->start));
printf("\033[?7h"); // Re-enable line wrapping
}
+
+// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/explain.h b/explain.h
index 75cf184..dab52b1 100644
--- a/explain.h
+++ b/explain.h
@@ -10,4 +10,4 @@ __attribute__((nonnull))
void explain_match(match_t *m);
#endif
-// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1
+// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/files.c b/files.c
index 79d2a03..7e55d27 100644
--- a/files.c
+++ b/files.c
@@ -383,4 +383,4 @@ void cache_destroy(file_t *f)
f->cache.size = 0;
}
-// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1
+// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/files.h b/files.h
index 375e142..840412b 100644
--- a/files.h
+++ b/files.h
@@ -53,4 +53,4 @@ __attribute__((nonnull))
void cache_destroy(file_t *f);
#endif
-// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1
+// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/json.c b/json.c
index 23079a7..e102cfe 100644
--- a/json.c
+++ b/json.c
@@ -29,11 +29,11 @@ static int _json_match(const char *text, match_t *m, int comma, bool verbose)
printf("{\"rule\":\"");
for (const char *c = m->pat->start; c < m->pat->end; c++) {
switch (*c) {
- case '"': printf("\\\""); break;
- case '\\': printf("\\\\"); break;
- case '\t': printf("\\t"); break;
- case '\n': printf("↵"); break;
- default: printf("%c", *c); break;
+ case '"': printf("\\\""); break;
+ case '\\': printf("\\\\"); break;
+ case '\t': printf("\\t"); break;
+ case '\n': printf("↵"); break;
+ default: printf("%c", *c); break;
}
}
printf("\",\"start\":%ld,\"end\":%ld,\"children\":[",
@@ -52,4 +52,4 @@ void json_match(const char *text, match_t *m, bool verbose)
(void)_json_match(text, m, 0, verbose);
}
-// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1
+// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/json.h b/json.h
index 554c88b..4450387 100644
--- a/json.h
+++ b/json.h
@@ -12,4 +12,4 @@ __attribute__((nonnull))
void json_match(const char *text, match_t *m, bool verbose);
#endif
-// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1
+// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/match.c b/match.c
index ed26a62..c1b5f49 100644
--- a/match.c
+++ b/match.c
@@ -106,25 +106,25 @@ static pat_t *first_pat(def_t *defs, pat_t *pat)
{
for (pat_t *p = pat; p; ) {
switch (p->type) {
- case BP_BEFORE:
- p = p->args.pat; break;
- case BP_REPEAT:
- if (p->args.repetitions.min == 0)
- return p;
- p = p->args.repetitions.repeat_pat; break;
- case BP_CAPTURE:
- p = p->args.capture.capture_pat; break;
- case BP_CHAIN: case BP_MATCH: case BP_NOT_MATCH:
- p = p->args.multiple.first; break;
- case BP_REPLACE:
- p = p->args.replace.pat; break;
- case BP_REF: {
- pat_t *p2 = deref(defs, p);
- if (p2 == p) return p2;
- p = p2;
- break;
- }
- default: return p;
+ case BP_BEFORE:
+ p = p->args.pat; break;
+ case BP_REPEAT:
+ if (p->args.repetitions.min == 0)
+ return p;
+ p = p->args.repetitions.repeat_pat; break;
+ case BP_CAPTURE:
+ p = p->args.capture.capture_pat; break;
+ case BP_CHAIN: case BP_MATCH: case BP_NOT_MATCH:
+ p = p->args.multiple.first; break;
+ case BP_REPLACE:
+ p = p->args.replace.pat; break;
+ case BP_REF: {
+ pat_t *p2 = deref(defs, p);
+ if (p2 == p) return p2;
+ p = p2;
+ break;
+ }
+ default: return p;
}
}
return pat;
@@ -187,376 +187,376 @@ match_t *next_match(def_t *defs, file_t *f, match_t *prev, pat_t *pat, pat_t *sk
static match_t *match(def_t *defs, file_t *f, const char *str, pat_t *pat, bool ignorecase)
{
switch (pat->type) {
- case BP_DEFINITION: {
- def_t *defs2 = with_def(defs, pat->args.def.namelen, pat->args.def.name, pat->args.def.def);
- match_t *m = match(defs2, f, str, pat->args.def.pat ? pat->args.def.pat : pat->args.def.def, ignorecase);
- defs = free_defs(defs2, defs);
- return m;
- }
- case BP_LEFTRECURSION: {
- // Left recursion occurs when a pattern directly or indirectly
- // invokes itself at the same position in the text. It's handled as
- // a special case, but if a pattern invokes itself at a later
- // point, it can be handled with normal recursion.
- // See: left-recursion.md for more details.
- if (str == pat->args.leftrec.at) {
- ++pat->args.leftrec.visits;
- return pat->args.leftrec.match;
- } else {
- return match(defs, f, str, pat->args.leftrec.fallback, ignorecase);
- }
- }
- case BP_ANYCHAR: {
- return (str < f->end && *str != '\n') ? new_match(defs, pat, str, next_char(f, str), NULL) : NULL;
- }
- case BP_ID_START: {
- return (str < f->end && isidstart(f, str)) ? new_match(defs, pat, str, next_char(f, str), NULL) : NULL;
- }
- case BP_ID_CONTINUE: {
- return (str < f->end && isidcontinue(f, str)) ? new_match(defs, pat, str, next_char(f, str), NULL) : NULL;
- }
- case BP_START_OF_FILE: {
- return (str == f->start) ? new_match(defs, pat, str, str, NULL) : NULL;
- }
- case BP_START_OF_LINE: {
- return (str == f->start || str[-1] == '\n') ? new_match(defs, pat, str, str, NULL) : NULL;
- }
- case BP_END_OF_FILE: {
- return (str == f->end) ? new_match(defs, pat, str, str, NULL) : NULL;
- }
- case BP_END_OF_LINE: {
- return (str == f->end || *str == '\n') ? new_match(defs, pat, str, str, NULL) : NULL;
- }
- case BP_WORD_BOUNDARY: {
- return (str == f->start || isidcontinue(f, str) != isidcontinue(f, prev_char(f, str))) ? new_match(defs, pat, str, str, NULL) : NULL;
- }
- case BP_STRING: {
- if (&str[pat->min_matchlen] > f->end) return NULL;
- if (pat->min_matchlen > 0 && (ignorecase ? memicmp : memcmp)(str, pat->args.string, pat->min_matchlen) != 0)
- return NULL;
- return new_match(defs, pat, str, str + pat->min_matchlen, NULL);
+ case BP_DEFINITION: {
+ def_t *defs2 = with_def(defs, pat->args.def.namelen, pat->args.def.name, pat->args.def.def);
+ match_t *m = match(defs2, f, str, pat->args.def.pat ? pat->args.def.pat : pat->args.def.def, ignorecase);
+ defs = free_defs(defs2, defs);
+ return m;
+ }
+ case BP_LEFTRECURSION: {
+ // Left recursion occurs when a pattern directly or indirectly
+ // invokes itself at the same position in the text. It's handled as
+ // a special case, but if a pattern invokes itself at a later
+ // point, it can be handled with normal recursion.
+ // See: left-recursion.md for more details.
+ if (str == pat->args.leftrec.at) {
+ ++pat->args.leftrec.visits;
+ return pat->args.leftrec.match;
+ } else {
+ return match(defs, f, str, pat->args.leftrec.fallback, ignorecase);
}
- case BP_RANGE: {
- if (str >= f->end) return NULL;
- if ((unsigned char)*str < pat->args.range.low || (unsigned char)*str > pat->args.range.high)
- return NULL;
- return new_match(defs, pat, str, str+1, NULL);
+ }
+ case BP_ANYCHAR: {
+ return (str < f->end && *str != '\n') ? new_match(defs, pat, str, next_char(f, str), NULL) : NULL;
+ }
+ case BP_ID_START: {
+ return (str < f->end && isidstart(f, str)) ? new_match(defs, pat, str, next_char(f, str), NULL) : NULL;
+ }
+ case BP_ID_CONTINUE: {
+ return (str < f->end && isidcontinue(f, str)) ? new_match(defs, pat, str, next_char(f, str), NULL) : NULL;
+ }
+ case BP_START_OF_FILE: {
+ return (str == f->start) ? new_match(defs, pat, str, str, NULL) : NULL;
+ }
+ case BP_START_OF_LINE: {
+ return (str == f->start || str[-1] == '\n') ? new_match(defs, pat, str, str, NULL) : NULL;
+ }
+ case BP_END_OF_FILE: {
+ return (str == f->end || (str == f->end-1 && *str == '\n')) ? new_match(defs, pat, str, str, NULL) : NULL;
+ }
+ case BP_END_OF_LINE: {
+ return (str == f->end || *str == '\n') ? new_match(defs, pat, str, str, NULL) : NULL;
+ }
+ case BP_WORD_BOUNDARY: {
+ return (str == f->start || isidcontinue(f, str) != isidcontinue(f, prev_char(f, str))) ? new_match(defs, pat, str, str, NULL) : NULL;
+ }
+ case BP_STRING: {
+ if (&str[pat->min_matchlen] > f->end) return NULL;
+ if (pat->min_matchlen > 0 && (ignorecase ? memicmp : memcmp)(str, pat->args.string, pat->min_matchlen) != 0)
+ return NULL;
+ return new_match(defs, pat, str, str + pat->min_matchlen, NULL);
+ }
+ case BP_RANGE: {
+ if (str >= f->end) return NULL;
+ if ((unsigned char)*str < pat->args.range.low || (unsigned char)*str > pat->args.range.high)
+ return NULL;
+ return new_match(defs, pat, str, str+1, NULL);
+ }
+ case BP_NOT: {
+ match_t *m = match(defs, f, str, pat->args.pat, ignorecase);
+ if (m != NULL) {
+ recycle_if_unused(&m);
+ return NULL;
}
- case BP_NOT: {
- match_t *m = match(defs, f, str, pat->args.pat, ignorecase);
- if (m != NULL) {
- recycle_if_unused(&m);
- return NULL;
- }
- return new_match(defs, pat, str, str, NULL);
+ return new_match(defs, pat, str, str, NULL);
+ }
+ case BP_UPTO: case BP_UPTO_STRICT: {
+ match_t *m = new_match(defs, pat, str, str, NULL);
+ pat_t *target = deref(defs, pat->args.multiple.first),
+ *skip = deref(defs, pat->args.multiple.second);
+ if (!target && !skip) {
+ while (str < f->end && *str != '\n') ++str;
+ m->end = str;
+ return m;
}
- case BP_UPTO: case BP_UPTO_STRICT: {
- match_t *m = new_match(defs, pat, str, str, NULL);
- pat_t *target = deref(defs, pat->args.multiple.first),
- *skip = deref(defs, pat->args.multiple.second);
- if (!target && !skip) {
- while (str < f->end && *str != '\n') ++str;
- m->end = str;
- return m;
- }
- size_t child_cap = 0, nchildren = 0;
- for (const char *prev = NULL; prev < str; ) {
- prev = str;
- if (target) {
- match_t *p = match(defs, f, str, target, ignorecase);
- if (p != NULL) {
- recycle_if_unused(&p);
- m->end = str;
- return m;
- }
- } else if (str == f->end) {
+ size_t child_cap = 0, nchildren = 0;
+ for (const char *prev = NULL; prev < str; ) {
+ prev = str;
+ if (target) {
+ match_t *p = match(defs, f, str, target, ignorecase);
+ if (p != NULL) {
+ recycle_if_unused(&p);
m->end = str;
return m;
}
- if (skip) {
- match_t *s = match(defs, f, str, skip, ignorecase);
- if (s != NULL) {
- str = s->end;
- if (nchildren+2 >= child_cap) {
- m->children = grow(m->children, child_cap += 5);
- for (size_t i = nchildren; i < child_cap; i++) m->children[i] = NULL;
- }
- add_owner(&m->children[nchildren++], s);
- continue;
- }
- }
- // This isn't in the for() structure because there needs to
- // be at least once chance to match the pattern, even if
- // we're at the end of the string already (e.g. "..$").
- if (str < f->end && *str != '\n' && pat->type != BP_UPTO_STRICT)
- str = next_char(f, str);
+ } else if (str == f->end) {
+ m->end = str;
+ return m;
}
- recycle_if_unused(&m);
- return NULL;
- }
- case BP_REPEAT: {
- match_t *m = new_match(defs, pat, str, str, NULL);
- size_t reps = 0;
- ssize_t max = pat->args.repetitions.max;
- pat_t *repeating = deref(defs, pat->args.repetitions.repeat_pat);
- pat_t *sep = deref(defs, pat->args.repetitions.sep);
- size_t child_cap = 0, nchildren = 0;
- for (reps = 0; max == -1 || reps < (size_t)max; ++reps) {
- const char *start = str;
- // Separator
- match_t *msep = NULL;
- if (sep != NULL && reps > 0) {
- msep = match(defs, f, str, sep, ignorecase);
- if (msep == NULL) break;
- str = msep->end;
- }
- match_t *mp = match(defs, f, str, repeating, ignorecase);
- if (mp == NULL) {
- str = start;
- if (msep) recycle_if_unused(&msep);
- break;
- }
- if (mp->end == start && reps > 0) {
- // Since no forward progress was made on either `repeating`
- // or `sep` and BP does not have mutable state, it's
- // guaranteed that no progress will be made on the next
- // loop either. We know that this will continue to loop
- // until reps==max, so let's just cut to the chase instead
- // of looping infinitely.
- if (msep) recycle_if_unused(&msep);
- recycle_if_unused(&mp);
- if (pat->args.repetitions.max == -1)
- reps = ~(size_t)0;
- else
- reps = (size_t)pat->args.repetitions.max;
- break;
- }
- if (msep) {
+ if (skip) {
+ match_t *s = match(defs, f, str, skip, ignorecase);
+ if (s != NULL) {
+ str = s->end;
if (nchildren+2 >= child_cap) {
m->children = grow(m->children, child_cap += 5);
for (size_t i = nchildren; i < child_cap; i++) m->children[i] = NULL;
}
- add_owner(&m->children[nchildren++], msep);
+ add_owner(&m->children[nchildren++], s);
+ continue;
}
-
+ }
+ // This isn't in the for() structure because there needs to
+ // be at least once chance to match the pattern, even if
+ // we're at the end of the string already (e.g. "..$").
+ if (str < f->end && *str != '\n' && pat->type != BP_UPTO_STRICT)
+ str = next_char(f, str);
+ }
+ recycle_if_unused(&m);
+ return NULL;
+ }
+ case BP_REPEAT: {
+ match_t *m = new_match(defs, pat, str, str, NULL);
+ size_t reps = 0;
+ ssize_t max = pat->args.repetitions.max;
+ pat_t *repeating = deref(defs, pat->args.repetitions.repeat_pat);
+ pat_t *sep = deref(defs, pat->args.repetitions.sep);
+ size_t child_cap = 0, nchildren = 0;
+ for (reps = 0; max == -1 || reps < (size_t)max; ++reps) {
+ const char *start = str;
+ // Separator
+ match_t *msep = NULL;
+ if (sep != NULL && reps > 0) {
+ msep = match(defs, f, str, sep, ignorecase);
+ if (msep == NULL) break;
+ str = msep->end;
+ }
+ match_t *mp = match(defs, f, str, repeating, ignorecase);
+ if (mp == NULL) {
+ str = start;
+ if (msep) recycle_if_unused(&msep);
+ break;
+ }
+ if (mp->end == start && reps > 0) {
+ // Since no forward progress was made on either `repeating`
+ // or `sep` and BP does not have mutable state, it's
+ // guaranteed that no progress will be made on the next
+ // loop either. We know that this will continue to loop
+ // until reps==max, so let's just cut to the chase instead
+ // of looping infinitely.
+ if (msep) recycle_if_unused(&msep);
+ recycle_if_unused(&mp);
+ if (pat->args.repetitions.max == -1)
+ reps = ~(size_t)0;
+ else
+ reps = (size_t)pat->args.repetitions.max;
+ break;
+ }
+ if (msep) {
if (nchildren+2 >= child_cap) {
m->children = grow(m->children, child_cap += 5);
for (size_t i = nchildren; i < child_cap; i++) m->children[i] = NULL;
}
- add_owner(&m->children[nchildren++], mp);
- str = mp->end;
+ add_owner(&m->children[nchildren++], msep);
}
- if (reps < (size_t)pat->args.repetitions.min) {
- recycle_if_unused(&m);
- return NULL;
+ if (nchildren+2 >= child_cap) {
+ m->children = grow(m->children, child_cap += 5);
+ for (size_t i = nchildren; i < child_cap; i++) m->children[i] = NULL;
}
- m->end = str;
- return m;
+ add_owner(&m->children[nchildren++], mp);
+ str = mp->end;
}
- case BP_AFTER: {
- pat_t *back = deref(defs, pat->args.pat);
- if (!back) return NULL;
-
- // We only care about the region from the backtrack pos up to the
- // current pos, so mock it out as a file slice.
- // TODO: this breaks ^/^^/$/$$, but that can probably be ignored
- // because you rarely need to check those in a backtrack.
- file_t slice;
- slice_file(&slice, f, f->start, str);
- for (const char *pos = &str[-(long)back->min_matchlen];
- pos >= f->start && (back->max_matchlen == -1 || pos >= &str[-(int)back->max_matchlen]);
- pos = prev_char(f, pos)) {
- cache_destroy(&slice);
- slice.start = (char*)pos;
- match_t *m = match(defs, &slice, pos, back, ignorecase);
- // Match should not go past str (i.e. (<"AB" "B") should match "ABB", but not "AB")
- if (m && m->end != str)
- recycle_if_unused(&m);
- else if (m) {
- cache_destroy(&slice);
- return new_match(defs, pat, str, str, MATCHES(m));
- }
- if (pos == f->start) break;
- // To prevent extreme performance degradation, don't keep
- // walking backwards endlessly over newlines.
- if (back->max_matchlen == -1 && *pos == '\n') break;
- }
- cache_destroy(&slice);
+
+ if (reps < (size_t)pat->args.repetitions.min) {
+ recycle_if_unused(&m);
return NULL;
}
- case BP_BEFORE: {
- match_t *after = match(defs, f, str, pat->args.pat, ignorecase);
- return after ? new_match(defs, pat, str, str, MATCHES(after)) : NULL;
- }
- case BP_CAPTURE: {
- match_t *p = match(defs, f, str, pat->args.pat, ignorecase);
- return p ? new_match(defs, pat, str, p->end, MATCHES(p)) : NULL;
- }
- case BP_OTHERWISE: {
- match_t *m = match(defs, f, str, pat->args.multiple.first, ignorecase);
- return m ? m : match(defs, f, str, pat->args.multiple.second, ignorecase);
+ m->end = str;
+ return m;
+ }
+ case BP_AFTER: {
+ pat_t *back = deref(defs, pat->args.pat);
+ if (!back) return NULL;
+
+ // We only care about the region from the backtrack pos up to the
+ // current pos, so mock it out as a file slice.
+ // TODO: this breaks ^/^^/$/$$, but that can probably be ignored
+ // because you rarely need to check those in a backtrack.
+ file_t slice;
+ slice_file(&slice, f, f->start, str);
+ for (const char *pos = &str[-(long)back->min_matchlen];
+ pos >= f->start && (back->max_matchlen == -1 || pos >= &str[-(int)back->max_matchlen]);
+ pos = prev_char(f, pos)) {
+ cache_destroy(&slice);
+ slice.start = (char*)pos;
+ match_t *m = match(defs, &slice, pos, back, ignorecase);
+ // Match should not go past str (i.e. (<"AB" "B") should match "ABB", but not "AB")
+ if (m && m->end != str)
+ recycle_if_unused(&m);
+ else if (m) {
+ cache_destroy(&slice);
+ return new_match(defs, pat, str, str, MATCHES(m));
+ }
+ if (pos == f->start) break;
+ // To prevent extreme performance degradation, don't keep
+ // walking backwards endlessly over newlines.
+ if (back->max_matchlen == -1 && *pos == '\n') break;
}
- case BP_CHAIN: {
- match_t *m1 = match(defs, f, str, pat->args.multiple.first, ignorecase);
- if (m1 == NULL) return NULL;
-
- match_t *m2;
- // Push backrefs and run matching, then cleanup
- if (m1->pat->type == BP_CAPTURE && m1->pat->args.capture.name) {
- // Temporarily add a rule that the backref name matches the
- // exact string of the original match (no replacements)
- size_t len = (size_t)(m1->end - m1->start);
- pat_t *backref = new_pat(f, m1->start, m1->end, len, (ssize_t)len, BP_STRING);
- backref->args.string = m1->start;
-
- def_t *defs2 = with_def(defs, m1->pat->args.capture.namelen, m1->pat->args.capture.name, backref);
- ++m1->refcount; {
- m2 = match(defs2, f, m1->end, pat->args.multiple.second, ignorecase);
- if (!m2) { // No need to keep the backref in memory if it didn't match
- for (pat_t **rem = &f->pats; *rem; rem = &(*rem)->next) {
- if ((*rem) == backref) {
- pat_t *tmp = *rem;
- *rem = (*rem)->next;
- free(tmp);
- break;
- }
+ cache_destroy(&slice);
+ return NULL;
+ }
+ case BP_BEFORE: {
+ match_t *after = match(defs, f, str, pat->args.pat, ignorecase);
+ return after ? new_match(defs, pat, str, str, MATCHES(after)) : NULL;
+ }
+ case BP_CAPTURE: {
+ match_t *p = match(defs, f, str, pat->args.pat, ignorecase);
+ return p ? new_match(defs, pat, str, p->end, MATCHES(p)) : NULL;
+ }
+ case BP_OTHERWISE: {
+ match_t *m = match(defs, f, str, pat->args.multiple.first, ignorecase);
+ return m ? m : match(defs, f, str, pat->args.multiple.second, ignorecase);
+ }
+ case BP_CHAIN: {
+ match_t *m1 = match(defs, f, str, pat->args.multiple.first, ignorecase);
+ if (m1 == NULL) return NULL;
+
+ match_t *m2;
+ // Push backrefs and run matching, then cleanup
+ if (m1->pat->type == BP_CAPTURE && m1->pat->args.capture.name) {
+ // Temporarily add a rule that the backref name matches the
+ // exact string of the original match (no replacements)
+ size_t len = (size_t)(m1->end - m1->start);
+ pat_t *backref = new_pat(f, m1->start, m1->end, len, (ssize_t)len, BP_STRING);
+ backref->args.string = m1->start;
+
+ def_t *defs2 = with_def(defs, m1->pat->args.capture.namelen, m1->pat->args.capture.name, backref);
+ ++m1->refcount; {
+ m2 = match(defs2, f, m1->end, pat->args.multiple.second, ignorecase);
+ if (!m2) { // No need to keep the backref in memory if it didn't match
+ for (pat_t **rem = &f->pats; *rem; rem = &(*rem)->next) {
+ if ((*rem) == backref) {
+ pat_t *tmp = *rem;
+ *rem = (*rem)->next;
+ free(tmp);
+ break;
}
}
- defs = free_defs(defs2, defs);
- } --m1->refcount;
- } else {
- m2 = match(defs, f, m1->end, pat->args.multiple.second, ignorecase);
- }
-
- if (m2 == NULL) {
- recycle_if_unused(&m1);
- return NULL;
- }
+ }
+ defs = free_defs(defs2, defs);
+ } --m1->refcount;
+ } else {
+ m2 = match(defs, f, m1->end, pat->args.multiple.second, ignorecase);
+ }
- return new_match(defs, pat, str, m2->end, MATCHES(m1, m2));
+ if (m2 == NULL) {
+ recycle_if_unused(&m1);
+ return NULL;
}
- case BP_MATCH: case BP_NOT_MATCH: {
- match_t *m1 = match(defs, f, str, pat->args.multiple.first, ignorecase);
- if (m1 == NULL) return NULL;
-
- // <p1>~<p2> matches iff the text of <p1> matches <p2>
- // <p1>!~<p2> matches iff the text of <p1> does not match <p2>
- file_t slice;
- slice_file(&slice, f, m1->start, m1->end);
- match_t *m2 = next_match(defs, &slice, NULL, pat->args.multiple.second, NULL, ignorecase);
- if ((!m2 && pat->type == BP_MATCH) || (m2 && pat->type == BP_NOT_MATCH)) {
- if (m2) recycle_if_unused(&m2);
- cache_destroy(&slice);
- recycle_if_unused(&m1);
- return NULL;
- }
+
+ return new_match(defs, pat, str, m2->end, MATCHES(m1, m2));
+ }
+ case BP_MATCH: case BP_NOT_MATCH: {
+ match_t *m1 = match(defs, f, str, pat->args.multiple.first, ignorecase);
+ if (m1 == NULL) return NULL;
+
+ // <p1>~<p2> matches iff the text of <p1> matches <p2>
+ // <p1>!~<p2> matches iff the text of <p1> does not match <p2>
+ file_t slice;
+ slice_file(&slice, f, m1->start, m1->end);
+ match_t *m2 = next_match(defs, &slice, NULL, pat->args.multiple.second, NULL, ignorecase);
+ if ((!m2 && pat->type == BP_MATCH) || (m2 && pat->type == BP_NOT_MATCH)) {
+ if (m2) recycle_if_unused(&m2);
cache_destroy(&slice);
- return new_match(defs, pat, m1->start, m1->end, (pat->type == BP_MATCH) ? MATCHES(m1, m2) : NULL);
+ recycle_if_unused(&m1);
+ return NULL;
}
- case BP_REPLACE: {
- match_t *p = NULL;
- if (pat->args.replace.pat) {
- p = match(defs, f, str, pat->args.replace.pat, ignorecase);
- if (p == NULL) return NULL;
- }
- return new_match(defs, pat, str, p ? p->end : str, MATCHES(p));
+ cache_destroy(&slice);
+ return new_match(defs, pat, m1->start, m1->end, (pat->type == BP_MATCH) ? MATCHES(m1, m2) : NULL);
+ }
+ case BP_REPLACE: {
+ match_t *p = NULL;
+ if (pat->args.replace.pat) {
+ p = match(defs, f, str, pat->args.replace.pat, ignorecase);
+ if (p == NULL) return NULL;
}
- case BP_REF: {
- match_t *cached;
- if (cache_get(f, defs, str, pat, &cached))
- return cached;
-
- def_t *def = lookup(defs, pat->args.ref.len, pat->args.ref.name);
- if (def == NULL)
- errx(EXIT_FAILURE, "Unknown identifier: '%.*s'", (int)pat->args.ref.len, pat->args.ref.name);
- pat_t *ref = def->pat;
-
- pat_t rec_op = {
- .type = BP_LEFTRECURSION,
- .start = ref->start,
- .end = ref->end,
- .min_matchlen = 0,
- .max_matchlen = -1,
- .args.leftrec = {
- .match = NULL,
- .visits = 0,
- .at = str,
- .fallback = ref,
- },
- };
- def_t defs2 = {
- .namelen = def->namelen,
- .name = def->name,
- .pat = &rec_op,
- .next = defs,
- };
-
- const char *prev = str;
- match_t *m = match(&defs2, f, str, ref, ignorecase);
- if (m == NULL) {
- cache_save(f, defs, str, pat, NULL);
- return NULL;
- }
+ return new_match(defs, pat, str, p ? p->end : str, MATCHES(p));
+ }
+ case BP_REF: {
+ match_t *cached;
+ if (cache_get(f, defs, str, pat, &cached))
+ return cached;
- while (rec_op.args.leftrec.visits > 0) {
- rec_op.args.leftrec.visits = 0;
- remove_ownership(&rec_op.args.leftrec.match);
- add_owner(&rec_op.args.leftrec.match, m);
- prev = m->end;
- match_t *m2 = match(&defs2, f, str, ref, ignorecase);
- if (m2 == NULL) break;
- if (m2->end <= prev) {
- recycle_if_unused(&m2);
- break;
- }
- m = m2;
+ def_t *def = lookup(defs, pat->args.ref.len, pat->args.ref.name);
+ if (def == NULL)
+ errx(EXIT_FAILURE, "Unknown identifier: '%.*s'", (int)pat->args.ref.len, pat->args.ref.name);
+ pat_t *ref = def->pat;
+
+ pat_t rec_op = {
+ .type = BP_LEFTRECURSION,
+ .start = ref->start,
+ .end = ref->end,
+ .min_matchlen = 0,
+ .max_matchlen = -1,
+ .args.leftrec = {
+ .match = NULL,
+ .visits = 0,
+ .at = str,
+ .fallback = ref,
+ },
+ };
+ def_t defs2 = {
+ .namelen = def->namelen,
+ .name = def->name,
+ .pat = &rec_op,
+ .next = defs,
+ };
+
+ const char *prev = str;
+ match_t *m = match(&defs2, f, str, ref, ignorecase);
+ if (m == NULL) {
+ cache_save(f, defs, str, pat, NULL);
+ return NULL;
+ }
+
+ while (rec_op.args.leftrec.visits > 0) {
+ rec_op.args.leftrec.visits = 0;
+ remove_ownership(&rec_op.args.leftrec.match);
+ add_owner(&rec_op.args.leftrec.match, m);
+ prev = m->end;
+ match_t *m2 = match(&defs2, f, str, ref, ignorecase);
+ if (m2 == NULL) break;
+ if (m2->end <= prev) {
+ recycle_if_unused(&m2);
+ break;
}
+ m = m2;
+ }
- // This match wrapper mainly exists for record-keeping purposes.
- // However, it also keeps `m` from getting garbage collected with
- // leftrec.match is GC'd. It also helps with visualization of match
- // results.
- // OPTIMIZE: remove this if necessary
- match_t *wrap = new_match(defs, pat, m->start, m->end, MATCHES(m));
- cache_save(f, defs, str, pat, wrap);
+ // This match wrapper mainly exists for record-keeping purposes.
+ // However, it also keeps `m` from getting garbage collected with
+ // leftrec.match is GC'd. It also helps with visualization of match
+ // results.
+ // OPTIMIZE: remove this if necessary
+ match_t *wrap = new_match(defs, pat, m->start, m->end, MATCHES(m));
+ cache_save(f, defs, str, pat, wrap);
- if (rec_op.args.leftrec.match)
- remove_ownership(&rec_op.args.leftrec.match);
+ if (rec_op.args.leftrec.match)
+ remove_ownership(&rec_op.args.leftrec.match);
- return wrap;
+ return wrap;
+ }
+ case BP_NODENT: {
+ if (*str != '\n') return NULL;
+ const char *start = str;
+
+ size_t linenum = get_line_number(f, str);
+ const char *p = get_line(f, linenum);
+ if (p < f->start) p = f->start; // Can happen with recursive matching
+
+ // Current indentation:
+ char denter = *p;
+ int dents = 0;
+ if (denter == ' ' || denter == '\t') {
+ for (; *p == denter && p < f->end; ++p) ++dents;
}
- case BP_NODENT: {
- if (*str != '\n') return NULL;
- const char *start = str;
-
- size_t linenum = get_line_number(f, str);
- const char *p = get_line(f, linenum);
- if (p < f->start) p = f->start; // Can happen with recursive matching
-
- // Current indentation:
- char denter = *p;
- int dents = 0;
- if (denter == ' ' || denter == '\t') {
- for (; *p == denter && p < f->end; ++p) ++dents;
- }
- // Subsequent indentation:
- while (*str == '\n' || *str == '\n') ++str;
- for (int i = 0; i < dents; i++)
- if (&str[i] >= f->end || str[i] != denter) return NULL;
+ // Subsequent indentation:
+ while (*str == '\n' || *str == '\n') ++str;
+ for (int i = 0; i < dents; i++)
+ if (&str[i] >= f->end || str[i] != denter) return NULL;
- return new_match(defs, pat, start, &str[dents], NULL);
- }
- case BP_ERROR: {
- match_t *p = pat->args.pat ? match(defs, f, str, pat->args.pat, ignorecase) : NULL;
- return p ? new_match(defs, pat, str, p->end, MATCHES(p)) : NULL;
- }
- default: {
- errx(EXIT_FAILURE, "Unknown pattern type: %u", pat->type);
- return NULL;
- }
+ return new_match(defs, pat, start, &str[dents], NULL);
+ }
+ case BP_ERROR: {
+ match_t *p = pat->args.pat ? match(defs, f, str, pat->args.pat, ignorecase) : NULL;
+ return p ? new_match(defs, pat, str, p->end, MATCHES(p)) : NULL;
+ }
+ default: {
+ errx(EXIT_FAILURE, "Unknown pattern type: %u", pat->type);
+ return NULL;
+ }
}
}
@@ -703,4 +703,4 @@ size_t free_all_matches(void)
return count;
}
-// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1
+// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/match.h b/match.h
index 5d4bd26..74ae5ca 100644
--- a/match.h
+++ b/match.h
@@ -22,4 +22,4 @@ size_t free_all_matches(void);
size_t recycle_all_matches(void);
#endif
-// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1
+// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/pattern.c b/pattern.c
index 5e4fb80..7e31bfc 100644
--- a/pattern.c
+++ b/pattern.c
@@ -234,291 +234,291 @@ static pat_t *_bp_simplepattern(file_t *f, const char *str)
char c = *str;
str = next_char(f, str);
switch (c) {
- // Any char (dot)
- case '.': {
- if (*str == '.') { // ".."
- pat_t *skip = NULL;
- str = next_char(f, str);
- char skipper = *str;
- if (matchchar(&str, '%', false) || matchchar(&str, '=', false)) {
- skip = bp_simplepattern(f, str);
- if (!skip)
- file_err(f, str, str, "There should be a pattern to skip here after the '%c'", skipper);
- str = skip->end;
- }
- pat_t *upto = new_pat(f, start, str, 0, -1, skipper == '=' ? BP_UPTO_STRICT : BP_UPTO);
- upto->args.multiple.second = skip;
- return upto;
- } else {
- return new_pat(f, start, str, 1, UTF8_MAXCHARLEN, BP_ANYCHAR);
+ // Any char (dot)
+ case '.': {
+ if (*str == '.') { // ".."
+ pat_t *skip = NULL;
+ str = next_char(f, str);
+ char skipper = *str;
+ if (matchchar(&str, '%', false) || matchchar(&str, '=', false)) {
+ skip = bp_simplepattern(f, str);
+ if (!skip)
+ file_err(f, str, str, "There should be a pattern to skip here after the '%c'", skipper);
+ str = skip->end;
}
+ pat_t *upto = new_pat(f, start, str, 0, -1, skipper == '=' ? BP_UPTO_STRICT : BP_UPTO);
+ upto->args.multiple.second = skip;
+ return upto;
+ } else {
+ return new_pat(f, start, str, 1, UTF8_MAXCHARLEN, BP_ANYCHAR);
}
- // Char literals
- case '`': {
- pat_t *all = NULL;
- do { // Comma-separated items:
- if (str >= f->end || !*str || *str == '\n')
- file_err(f, str, str, "There should be a character here after the '`'");
+ }
+ // Char literals
+ case '`': {
+ pat_t *all = NULL;
+ do { // Comma-separated items:
+ if (str >= f->end || !*str || *str == '\n')
+ file_err(f, str, str, "There should be a character here after the '`'");
- const char *c1_loc = str;
- str = next_char(f, c1_loc);
- if (*str == '-') { // Range
- const char *c2_loc = ++str;
- if (next_char(f, c1_loc) > c1_loc+1 || next_char(f, c2_loc) > c2_loc+1)
- file_err(f, start, next_char(f, c2_loc), "Sorry, UTF-8 character ranges are not yet supported.");
- char c1 = *c1_loc, c2 = *c2_loc;
- if (!c2 || c2 == '\n')
- file_err(f, str, str, "There should be a character here to complete the character range.");
- if (c1 > c2) { // Swap order
- char tmp = c1;
- c1 = c2;
- c2 = tmp;
- }
- str = next_char(f, c2_loc);
- pat_t *pat = new_pat(f, start == c1_loc - 1 ? start : c1_loc, str, 1, 1, BP_RANGE);
- pat->args.range.low = (unsigned char)c1;
- pat->args.range.high = (unsigned char)c2;
- all = either_pat(f, all, pat);
- } else {
- size_t len = (size_t)(str - c1_loc);
- pat_t *pat = new_pat(f, start, str, len, (ssize_t)len, BP_STRING);
- pat->args.string = c1_loc;
- all = either_pat(f, all, pat);
+ const char *c1_loc = str;
+ str = next_char(f, c1_loc);
+ if (*str == '-') { // Range
+ const char *c2_loc = ++str;
+ if (next_char(f, c1_loc) > c1_loc+1 || next_char(f, c2_loc) > c2_loc+1)
+ file_err(f, start, next_char(f, c2_loc), "Sorry, UTF-8 character ranges are not yet supported.");
+ char c1 = *c1_loc, c2 = *c2_loc;
+ if (!c2 || c2 == '\n')
+ file_err(f, str, str, "There should be a character here to complete the character range.");
+ if (c1 > c2) { // Swap order
+ char tmp = c1;
+ c1 = c2;
+ c2 = tmp;
}
- } while (*str++ == ',');
+ str = next_char(f, c2_loc);
+ pat_t *pat = new_pat(f, start == c1_loc - 1 ? start : c1_loc, str, 1, 1, BP_RANGE);
+ pat->args.range.low = (unsigned char)c1;
+ pat->args.range.high = (unsigned char)c2;
+ all = either_pat(f, all, pat);
+ } else {
+ size_t len = (size_t)(str - c1_loc);
+ pat_t *pat = new_pat(f, start, str, len, (ssize_t)len, BP_STRING);
+ pat->args.string = c1_loc;
+ all = either_pat(f, all, pat);
+ }
+ } while (*str++ == ',');
- return all;
- }
- // Escapes
- case '\\': {
- if (!*str || *str == '\n')
- file_err(f, str, str, "There should be an escape sequence here after this backslash.");
+ return all;
+ }
+ // Escapes
+ case '\\': {
+ if (!*str || *str == '\n')
+ file_err(f, str, str, "There should be an escape sequence here after this backslash.");
- pat_t *all = NULL;
- do { // Comma-separated items:
- const char *itemstart = str-1;
- if (*str == 'N') { // \N (nodent)
- all = either_pat(f, all, new_pat(f, itemstart, ++str, 1, -1, BP_NODENT));
- continue;
- } else if (*str == 'i') { // \i (identifier char)
- all = either_pat(f, all, new_pat(f, itemstart, ++str, 1, -1, BP_ID_CONTINUE));
- continue;
- } else if (*str == 'I') { // \I (identifier char, not including numbers)
- all = either_pat(f, all, new_pat(f, itemstart, ++str, 1, -1, BP_ID_START));
- continue;
- } else if (*str == 'b') { // \b word boundary
- all = either_pat(f, all, new_pat(f, itemstart, ++str, 0, 0, BP_WORD_BOUNDARY));
- continue;
- }
+ pat_t *all = NULL;
+ do { // Comma-separated items:
+ const char *itemstart = str-1;
+ if (*str == 'N') { // \N (nodent)
+ all = either_pat(f, all, new_pat(f, itemstart, ++str, 1, -1, BP_NODENT));
+ continue;
+ } else if (*str == 'i') { // \i (identifier char)
+ all = either_pat(f, all, new_pat(f, itemstart, ++str, 1, -1, BP_ID_CONTINUE));
+ continue;
+ } else if (*str == 'I') { // \I (identifier char, not including numbers)
+ all = either_pat(f, all, new_pat(f, itemstart, ++str, 1, -1, BP_ID_START));
+ continue;
+ } else if (*str == 'b') { // \b word boundary
+ all = either_pat(f, all, new_pat(f, itemstart, ++str, 0, 0, BP_WORD_BOUNDARY));
+ continue;
+ }
- const char *opstart = str;
- unsigned char e_low = (unsigned char)unescapechar(str, &str);
- if (str == opstart)
- file_err(f, start, str+1, "This isn't a valid escape sequence.");
- unsigned char e_high = e_low;
- if (*str == '-') { // Escape range (e.g. \x00-\xFF)
- ++str;
- if (next_char(f, str) != str+1)
- file_err(f, start, next_char(f, str), "Sorry, UTF8 escape sequences are not supported in ranges.");
- const char *seqstart = str;
- e_high = (unsigned char)unescapechar(str, &str);
- if (str == seqstart)
- file_err(f, seqstart, str+1, "This value isn't a valid escape sequence");
- if (e_high < e_low)
- file_err(f, start, str, "Escape ranges should be low-to-high, but this is high-to-low.");
- }
- pat_t *esc = new_pat(f, start, str, 1, 1, BP_RANGE);
- esc->args.range.low = e_low;
- esc->args.range.high = e_high;
- all = either_pat(f, all, esc);
- } while (*str++ == ',');
+ const char *opstart = str;
+ unsigned char e_low = (unsigned char)unescapechar(str, &str);
+ if (str == opstart)
+ file_err(f, start, str+1, "This isn't a valid escape sequence.");
+ unsigned char e_high = e_low;
+ if (*str == '-') { // Escape range (e.g. \x00-\xFF)
+ ++str;
+ if (next_char(f, str) != str+1)
+ file_err(f, start, next_char(f, str), "Sorry, UTF8 escape sequences are not supported in ranges.");
+ const char *seqstart = str;
+ e_high = (unsigned char)unescapechar(str, &str);
+ if (str == seqstart)
+ file_err(f, seqstart, str+1, "This value isn't a valid escape sequence");
+ if (e_high < e_low)
+ file_err(f, start, str, "Escape ranges should be low-to-high, but this is high-to-low.");
+ }
+ pat_t *esc = new_pat(f, start, str, 1, 1, BP_RANGE);
+ esc->args.range.low = e_low;
+ esc->args.range.high = e_high;
+ all = either_pat(f, all, esc);
+ } while (*str++ == ',');
- return all;
- }
- // Word boundary
- case '|': {
- return new_pat(f, start, str, 0, 0, BP_WORD_BOUNDARY);
- }
- // String literal
- case '"': case '\'': case '\002': case '{': {
- char endquote = c == '\002' ? '\003' : (c == '{' ? '}' : c);
- char *litstart = (char*)str;
- while (str < f->end && *str != endquote)
- str = next_char(f, str);
- size_t len = (size_t)(str - litstart);
+ return all;
+ }
+ // Word boundary
+ case '|': {
+ return new_pat(f, start, str, 0, 0, BP_WORD_BOUNDARY);
+ }
+ // String literal
+ case '"': case '\'': case '\002': case '{': {
+ char endquote = c == '\002' ? '\003' : (c == '{' ? '}' : c);
+ char *litstart = (char*)str;
+ while (str < f->end && *str != endquote)
str = next_char(f, str);
+ size_t len = (size_t)(str - litstart);
+ str = next_char(f, str);
- pat_t *pat = new_pat(f, start, str, len, (ssize_t)len, BP_STRING);
- pat->args.string = litstart;
- return pat;
- }
- // Not <pat>
- case '!': {
- pat_t *p = bp_simplepattern(f, str);
- if (!p) file_err(f, str, str, "There should be a pattern after this '!'");
- pat_t *not = new_pat(f, start, p->end, 0, 0, BP_NOT);
- not->args.pat = p;
- return not;
+ pat_t *pat = new_pat(f, start, str, len, (ssize_t)len, BP_STRING);
+ pat->args.string = litstart;
+ return pat;
+ }
+ // Not <pat>
+ case '!': {
+ pat_t *p = bp_simplepattern(f, str);
+ if (!p) file_err(f, str, str, "There should be a pattern after this '!'");
+ pat_t *not = new_pat(f, start, p->end, 0, 0, BP_NOT);
+ not->args.pat = p;
+ return not;
+ }
+ // Number of repetitions: <N>(-<N> / - / + / "")
+ case '0': case '1': case '2': case '3': case '4': case '5':
+ case '6': case '7': case '8': case '9': {
+ size_t min = 0;
+ ssize_t max = -1;
+ --str;
+ long n1 = strtol(str, (char**)&str, 10);
+ if (matchchar(&str, '-', false)) {
+ str = after_spaces(str, false);
+ const char *numstart = str;
+ long n2 = strtol(str, (char**)&str, 10);
+ if (str == numstart) min = 0, max = (ssize_t)n1;
+ else min = (size_t)n1, max = (ssize_t)n2;
+ } else if (matchchar(&str, '+', false)) {
+ min = (size_t)n1, max = -1;
+ } else {
+ min = (size_t)n1, max = (ssize_t)n1;
}
- // Number of repetitions: <N>(-<N> / - / + / "")
- case '0': case '1': case '2': case '3': case '4': case '5':
- case '6': case '7': case '8': case '9': {
- size_t min = 0;
- ssize_t max = -1;
- --str;
- long n1 = strtol(str, (char**)&str, 10);
- if (matchchar(&str, '-', false)) {
- str = after_spaces(str, false);
- const char *numstart = str;
- long n2 = strtol(str, (char**)&str, 10);
- if (str == numstart) min = 0, max = (ssize_t)n1;
- else min = (size_t)n1, max = (ssize_t)n2;
- } else if (matchchar(&str, '+', false)) {
- min = (size_t)n1, max = -1;
- } else {
- min = (size_t)n1, max = (ssize_t)n1;
- }
- pat_t *repeating = bp_simplepattern(f, str);
- if (!repeating)
- file_err(f, str, str, "There should be a pattern after this repetition count.");
+ pat_t *repeating = bp_simplepattern(f, str);
+ if (!repeating)
+ file_err(f, str, str, "There should be a pattern after this repetition count.");
+ str = repeating->end;
+ pat_t *sep = NULL;
+ if (matchchar(&str, '%', false)) {
+ sep = bp_simplepattern(f, str);
+ if (!sep)
+ file_err(f, str, str, "There should be a separator pattern after this '%%'");
+ str = sep->end;
+ } else {
str = repeating->end;
- pat_t *sep = NULL;
- if (matchchar(&str, '%', false)) {
- sep = bp_simplepattern(f, str);
- if (!sep)
- file_err(f, str, str, "There should be a separator pattern after this '%%'");
- str = sep->end;
- } else {
- str = repeating->end;
- }
- return new_range(f, start, str, min, max, repeating, sep);
- }
- // Lookbehind
- case '<': {
- pat_t *behind = bp_simplepattern(f, str);
- if (!behind)
- file_err(f, str, str, "There should be a pattern after this '<'");
- str = behind->end;
- str = behind->end;
- pat_t *pat = new_pat(f, start, str, 0, 0, BP_AFTER);
- pat->args.pat = behind;
- return pat;
}
- // Lookahead
- case '>': {
- pat_t *ahead = bp_simplepattern(f, str);
- if (!ahead)
- file_err(f, str, str, "There should be a pattern after this '>'");
- str = ahead->end;
- pat_t *pat = new_pat(f, start, str, 0, 0, BP_BEFORE);
- pat->args.pat = ahead;
- return pat;
+ return new_range(f, start, str, min, max, repeating, sep);
+ }
+ // Lookbehind
+ case '<': {
+ pat_t *behind = bp_simplepattern(f, str);
+ if (!behind)
+ file_err(f, str, str, "There should be a pattern after this '<'");
+ str = behind->end;
+ str = behind->end;
+ pat_t *pat = new_pat(f, start, str, 0, 0, BP_AFTER);
+ pat->args.pat = behind;
+ return pat;
+ }
+ // Lookahead
+ case '>': {
+ pat_t *ahead = bp_simplepattern(f, str);
+ if (!ahead)
+ file_err(f, str, str, "There should be a pattern after this '>'");
+ str = ahead->end;
+ pat_t *pat = new_pat(f, start, str, 0, 0, BP_BEFORE);
+ pat->args.pat = ahead;
+ return pat;
+ }
+ // Parentheses
+ case '(': {
+ if (start + 2 < f->end && strncmp(start, "(!)", 3) == 0) { // (!) errors
+ str = start + 3;
+ pat_t *pat = bp_simplepattern(f, str);
+ if (!pat) pat = new_pat(f, str, str, 0, 0, BP_STRING);
+ pat = expand_replacements(f, pat, false);
+ pat_t *error = new_pat(f, start, pat->end, pat->min_matchlen, pat->max_matchlen, BP_ERROR);
+ error->args.pat = pat;
+ return error;
}
- // Parentheses
- case '(': {
- if (start + 2 < f->end && strncmp(start, "(!)", 3) == 0) { // (!) errors
- str = start + 3;
- pat_t *pat = bp_simplepattern(f, str);
- if (!pat) pat = new_pat(f, str, str, 0, 0, BP_STRING);
- pat = expand_replacements(f, pat, false);
- pat_t *error = new_pat(f, start, pat->end, pat->min_matchlen, pat->max_matchlen, BP_ERROR);
- error->args.pat = pat;
- return error;
- }
- pat_t *pat = bp_pattern_nl(f, str, true);
- if (!pat)
- file_err(f, str, str, "There should be a valid pattern after this parenthesis.");
- str = pat->end;
- if (!matchchar(&str, ')', true)) file_err(f, str, str, "Missing paren: )");
- pat->start = start;
- pat->end = str;
- return pat;
- }
- // Square brackets
- case '[': {
- pat_t *maybe = bp_pattern_nl(f, str, true);
- if (!maybe)
- file_err(f, str, str, "There should be a valid pattern after this square bracket.");
- str = maybe->end;
- (void)matchchar(&str, ']', true);
- return new_range(f, start, str, 0, 1, maybe, NULL);
+ pat_t *pat = bp_pattern_nl(f, str, true);
+ if (!pat)
+ file_err(f, str, str, "There should be a valid pattern after this parenthesis.");
+ str = pat->end;
+ if (!matchchar(&str, ')', true)) file_err(f, str, str, "Missing paren: )");
+ pat->start = start;
+ pat->end = str;
+ return pat;
+ }
+ // Square brackets
+ case '[': {
+ pat_t *maybe = bp_pattern_nl(f, str, true);
+ if (!maybe)
+ file_err(f, str, str, "There should be a valid pattern after this square bracket.");
+ str = maybe->end;
+ (void)matchchar(&str, ']', true);
+ return new_range(f, start, str, 0, 1, maybe, NULL);
+ }
+ // Repeating
+ case '*': case '+': {
+ size_t min = (size_t)(c == '*' ? 0 : 1);
+ pat_t *repeating = bp_simplepattern(f, str);
+ if (!repeating)
+ file_err(f, str, str, "There should be a valid pattern here after the '%c'", c);
+ str = repeating->end;
+ pat_t *sep = NULL;
+ if (matchchar(&str, '%', false)) {
+ sep = bp_simplepattern(f, str);
+ if (!sep)
+ file_err(f, str, str, "There should be a separator pattern after the '%%' here.");
+ str = sep->end;
}
- // Repeating
- case '*': case '+': {
- size_t min = (size_t)(c == '*' ? 0 : 1);
- pat_t *repeating = bp_simplepattern(f, str);
- if (!repeating)
- file_err(f, str, str, "There should be a valid pattern here after the '%c'", c);
- str = repeating->end;
- pat_t *sep = NULL;
- if (matchchar(&str, '%', false)) {
- sep = bp_simplepattern(f, str);
- if (!sep)
- file_err(f, str, str, "There should be a separator pattern after the '%%' here.");
- str = sep->end;
- }
- return new_range(f, start, str, min, -1, repeating, sep);
+ return new_range(f, start, str, min, -1, repeating, sep);
+ }
+ // Capture
+ case '@': {
+ const char *name = NULL;
+ size_t namelen = 0;
+ const char *a = after_name(str);
+ const char *eq = a;
+ if (a > str && !matchstr(&eq, "=>", false) && matchchar(&eq, '=', false)) {
+ name = str;
+ namelen = (size_t)(a-str);
+ str = eq;
}
- // Capture
- case '@': {
- const char *name = NULL;
- size_t namelen = 0;
- const char *a = after_name(str);
- const char *eq = a;
- if (a > str && !matchstr(&eq, "=>", false) && matchchar(&eq, '=', false)) {
- name = str;
- namelen = (size_t)(a-str);
- str = eq;
- }
- pat_t *pat = bp_simplepattern(f, str);
- if (!pat)
- file_err(f, str, str, "There should be a valid pattern here to capture after the '@'");
+ pat_t *pat = bp_simplepattern(f, str);
+ if (!pat)
+ file_err(f, str, str, "There should be a valid pattern here to capture after the '@'");
- pat_t *capture = new_pat(f, start, pat->end, pat->min_matchlen, pat->max_matchlen, BP_CAPTURE);
- capture->args.capture.capture_pat = pat;
- capture->args.capture.name = name;
- capture->args.capture.namelen = namelen;
- return capture;
- }
- // Start of file/line
- case '^': {
- if (*str == '^')
- return new_pat(f, start, ++str, 0, 0, BP_START_OF_FILE);
- return new_pat(f, start, str, 0, 0, BP_START_OF_LINE);
- }
- // End of file/line:
- case '$': {
- if (*str == '$')
- return new_pat(f, start, ++str, 0, 0, BP_END_OF_FILE);
- return new_pat(f, start, str, 0, 0, BP_END_OF_LINE);
- }
- default: {
- // Reference
- if (!isalpha(c) && c != '_') return NULL;
- str = after_name(start);
- size_t namelen = (size_t)(str - start);
- if (matchchar(&str, ':', false)) { // Definitions
- pat_t *def = bp_pattern_nl(f, str, false);
- if (!def) file_err(f, str, f->end, "Could not parse this definition.");
- str = def->end;
- (void)matchchar(&str, ';', false); // Optional semicolon
- str = after_spaces(str, true);
- pat_t *pat = bp_pattern_nl(f, str, false);
- if (pat) str = pat->end;
- else pat = def;
- pat_t *ret = new_pat(f, start, str, pat->min_matchlen, pat->max_matchlen, BP_DEFINITION);
- ret->args.def.name = start;
- ret->args.def.namelen = namelen;
- ret->args.def.def = def;
- ret->args.def.pat = pat;
- return ret;
- }
- pat_t *ref = new_pat(f, start, str, 0, -1, BP_REF);
- ref->args.ref.name = start;
- ref->args.ref.len = namelen;
- return ref;
+ pat_t *capture = new_pat(f, start, pat->end, pat->min_matchlen, pat->max_matchlen, BP_CAPTURE);
+ capture->args.capture.capture_pat = pat;
+ capture->args.capture.name = name;
+ capture->args.capture.namelen = namelen;
+ return capture;
+ }
+ // Start of file/line
+ case '^': {
+ if (*str == '^')
+ return new_pat(f, start, ++str, 0, 0, BP_START_OF_FILE);
+ return new_pat(f, start, str, 0, 0, BP_START_OF_LINE);
+ }
+ // End of file/line:
+ case '$': {
+ if (*str == '$')
+ return new_pat(f, start, ++str, 0, 0, BP_END_OF_FILE);
+ return new_pat(f, start, str, 0, 0, BP_END_OF_LINE);
+ }
+ default: {
+ // Reference
+ if (!isalpha(c) && c != '_') return NULL;
+ str = after_name(start);
+ size_t namelen = (size_t)(str - start);
+ if (matchchar(&str, ':', false)) { // Definitions
+ pat_t *def = bp_pattern_nl(f, str, false);
+ if (!def) file_err(f, str, f->end, "Could not parse this definition.");
+ str = def->end;
+ (void)matchchar(&str, ';', false); // Optional semicolon
+ str = after_spaces(str, true);
+ pat_t *pat = bp_pattern_nl(f, str, false);
+ if (pat) str = pat->end;
+ else pat = def;
+ pat_t *ret = new_pat(f, start, str, pat->min_matchlen, pat->max_matchlen, BP_DEFINITION);
+ ret->args.def.name = start;
+ ret->args.def.namelen = namelen;
+ ret->args.def.def = def;
+ ret->args.def.pat = pat;
+ return ret;
}
+ pat_t *ref = new_pat(f, start, str, 0, -1, BP_REF);
+ ref->args.ref.name = start;
+ ref->args.ref.len = namelen;
+ return ref;
+ }
}
}
@@ -600,4 +600,4 @@ pat_t *bp_pattern(file_t *f, const char *str)
return bp_pattern_nl(f, str, false);
}
-// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1
+// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/pattern.h b/pattern.h
index 47d0c63..39aba63 100644
--- a/pattern.h
+++ b/pattern.h
@@ -21,4 +21,4 @@ __attribute__((nonnull))
pat_t *bp_pattern(file_t *f, const char *str);
#endif
-// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1
+// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/print.c b/print.c
index b5eb219..f3d5278 100644
--- a/print.c
+++ b/print.c
@@ -266,4 +266,4 @@ int print_errors(file_t *f, match_t *m)
return ret;
}
-// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1
+// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/print.h b/print.h
index 2bc2948..56dcae6 100644
--- a/print.h
+++ b/print.h
@@ -28,4 +28,4 @@ __attribute__((nonnull))
int print_errors(file_t *f, match_t *m);
#endif
-// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1
+// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/types.h b/types.h
index dd7de77..eb22e69 100644
--- a/types.h
+++ b/types.h
@@ -130,4 +130,4 @@ typedef struct def_s {
} def_t;
#endif
-// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1
+// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/utf8.c b/utf8.c
index ad807bd..6180ffe 100644
--- a/utf8.c
+++ b/utf8.c
@@ -280,4 +280,4 @@ bool isidcontinue(file_t *f, const char *str)
|| find_in_ranges(codepoint, XID_Continue_only, ARRAY_LEN(XID_Continue_only)));
}
-// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1
+// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/utf8.h b/utf8.h
index 9c43f13..97e259e 100644
--- a/utf8.h
+++ b/utf8.h
@@ -18,4 +18,4 @@ __attribute__((nonnull, pure))
bool isidcontinue(file_t *f, const char *str);
#endif
-// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1
+// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/utils.c b/utils.c
index cb0719c..47958f7 100644
--- a/utils.c
+++ b/utils.c
@@ -20,18 +20,18 @@ const char *after_spaces(const char *str, bool skip_nl)
// Skip whitespace and comments:
skip_whitespace:
switch (*str) {
- case '\r': case '\n':
- if (!skip_nl) break;
- __attribute__ ((fallthrough));
- case ' ': case '\t': {
- ++str;
- goto skip_whitespace;
- }
- case '#': {
- while (*str && *str != '\n') ++str;
- goto skip_whitespace;
- }
- default: break;
+ case '\r': case '\n':
+ if (!skip_nl) break;
+ __attribute__ ((fallthrough));
+ case ' ': case '\t': {
+ ++str;
+ goto skip_whitespace;
+ }
+ case '#': {
+ while (*str && *str != '\n') ++str;
+ goto skip_whitespace;
+ }
+ default: break;
}
return str;
}
@@ -90,39 +90,38 @@ char unescapechar(const char *escaped, const char **end)
size_t len = 1;
unsigned char ret = (unsigned char)*escaped;
switch (*escaped) {
- case 'a': ret = '\a'; break; case 'b': ret = '\b'; break;
- case 'n': ret = '\n'; break; case 'r': ret = '\r'; break;
- case 't': ret = '\t'; break; case 'v': ret = '\v'; break;
- case 'e': ret = '\033'; break; case '\\': ret = '\\'; break;
- case 'x': { // Hex
- static const unsigned char hextable[255] = {
- ['0']=0x10, ['1']=0x1, ['2']=0x2, ['3']=0x3, ['4']=0x4,
- ['5']=0x5, ['6']=0x6, ['7']=0x7, ['8']=0x8, ['9']=0x9,
- ['a']=0xa, ['b']=0xb, ['c']=0xc, ['d']=0xd, ['e']=0xe, ['f']=0xf,
- ['A']=0xa, ['B']=0xb, ['C']=0xc, ['D']=0xd, ['E']=0xe, ['F']=0xf,
- };
- if (hextable[(int)escaped[1]] && hextable[(int)escaped[2]]) {
- ret = (hextable[(int)escaped[1]] << 4) | (hextable[(int)escaped[2]] & 0xF);
- len = 3;
- }
- break;
+ case 'a': ret = '\a'; break; case 'b': ret = '\b'; break;
+ case 'n': ret = '\n'; break; case 'r': ret = '\r'; break;
+ case 't': ret = '\t'; break; case 'v': ret = '\v'; break;
+ case 'e': ret = '\033'; break; case '\\': ret = '\\'; break;
+ case 'x': { // Hex
+ static const unsigned char hextable[255] = {
+ ['0']=0x10, ['1']=0x1, ['2']=0x2, ['3']=0x3, ['4']=0x4,
+ ['5']=0x5, ['6']=0x6, ['7']=0x7, ['8']=0x8, ['9']=0x9,
+ ['a']=0xa, ['b']=0xb, ['c']=0xc, ['d']=0xd, ['e']=0xe, ['f']=0xf,
+ ['A']=0xa, ['B']=0xb, ['C']=0xc, ['D']=0xd, ['E']=0xe, ['F']=0xf,
+ };
+ if (hextable[(int)escaped[1]] && hextable[(int)escaped[2]]) {
+ ret = (hextable[(int)escaped[1]] << 4) | (hextable[(int)escaped[2]] & 0xF);
+ len = 3;
}
- case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': { // Octal
- ret = (unsigned char)(escaped[0] - '0');
- if ('0' <= escaped[1] && escaped[1] <= '7') {
+ break;
+ }
+ case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': { // Octal
+ ret = (unsigned char)(escaped[0] - '0');
+ if ('0' <= escaped[1] && escaped[1] <= '7') {
+ ++len;
+ ret = (ret << 3) | (escaped[1] - '0');
+ if ('0' <= escaped[2] && escaped[2] <= '7') {
++len;
- ret = (ret << 3) | (escaped[1] - '0');
- if ('0' <= escaped[2] && escaped[2] <= '7') {
- ++len;
- ret = (ret << 3) | (escaped[2] - '0');
- }
+ ret = (ret << 3) | (escaped[2] - '0');
}
- break;
- }
- default: {
- if (end) *end = escaped;
- return (char)0;
}
+ break;
+ }
+ default:
+ if (end) *end = escaped;
+ return (char)0;
}
if (end) *end = &escaped[len];
return (char)ret;
@@ -151,4 +150,4 @@ void delete(void *p)
*((void**)p) = NULL;
}
-// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1
+// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/utils.h b/utils.h
index 04df47a..59fedf7 100644
--- a/utils.h
+++ b/utils.h
@@ -63,4 +63,4 @@ __attribute__((nonnull))
void delete(void *p);
#endif
-// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1
+// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0