Style change: added cino=:0 (i.e. case statements on same indentation as
switch). Also fixed issue where $$ would fail to match with trailing newline on file
This commit is contained in:
parent
d44806f746
commit
f8860c385e
2
bp.c
2
bp.c
@ -562,4 +562,4 @@ int main(int argc, char *argv[])
|
|||||||
exit(found > 0 ? EXIT_SUCCESS : EXIT_FAILURE);
|
exit(found > 0 ? EXIT_SUCCESS : EXIT_FAILURE);
|
||||||
}
|
}
|
||||||
|
|
||||||
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1
|
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
|
||||||
|
@ -68,4 +68,4 @@ def_t *free_defs(def_t *defs, def_t *stop)
|
|||||||
return defs;
|
return defs;
|
||||||
}
|
}
|
||||||
|
|
||||||
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1
|
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
|
||||||
|
@ -17,4 +17,4 @@ __attribute__((nonnull(1)))
|
|||||||
def_t *free_defs(def_t *defs, def_t *stop);
|
def_t *free_defs(def_t *defs, def_t *stop);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1
|
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
|
||||||
|
@ -65,9 +65,9 @@ static void _explain_matches(match_node_t *firstmatch, int depth, const char *te
|
|||||||
|
|
||||||
for (size_t i = 0; i < viz_typelen; i++) {
|
for (size_t i = 0; i < viz_typelen; i++) {
|
||||||
switch (viz_type[i]) {
|
switch (viz_type[i]) {
|
||||||
case '\n': printf("↵"); break;
|
case '\n': printf("↵"); break;
|
||||||
case '\t': printf("⇥"); break;
|
case '\t': printf("⇥"); break;
|
||||||
default: printf("%c", viz_type[i]); break;
|
default: printf("%c", viz_type[i]); break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -169,3 +169,5 @@ void explain_match(match_t *m)
|
|||||||
_explain_matches(&first, 0, m->start, (size_t)(m->end - m->start));
|
_explain_matches(&first, 0, m->start, (size_t)(m->end - m->start));
|
||||||
printf("\033[?7h"); // Re-enable line wrapping
|
printf("\033[?7h"); // Re-enable line wrapping
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
|
||||||
|
@ -10,4 +10,4 @@ __attribute__((nonnull))
|
|||||||
void explain_match(match_t *m);
|
void explain_match(match_t *m);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1
|
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
|
||||||
|
2
files.c
2
files.c
@ -383,4 +383,4 @@ void cache_destroy(file_t *f)
|
|||||||
f->cache.size = 0;
|
f->cache.size = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1
|
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
|
||||||
|
2
files.h
2
files.h
@ -53,4 +53,4 @@ __attribute__((nonnull))
|
|||||||
void cache_destroy(file_t *f);
|
void cache_destroy(file_t *f);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1
|
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
|
||||||
|
12
json.c
12
json.c
@ -29,11 +29,11 @@ static int _json_match(const char *text, match_t *m, int comma, bool verbose)
|
|||||||
printf("{\"rule\":\"");
|
printf("{\"rule\":\"");
|
||||||
for (const char *c = m->pat->start; c < m->pat->end; c++) {
|
for (const char *c = m->pat->start; c < m->pat->end; c++) {
|
||||||
switch (*c) {
|
switch (*c) {
|
||||||
case '"': printf("\\\""); break;
|
case '"': printf("\\\""); break;
|
||||||
case '\\': printf("\\\\"); break;
|
case '\\': printf("\\\\"); break;
|
||||||
case '\t': printf("\\t"); break;
|
case '\t': printf("\\t"); break;
|
||||||
case '\n': printf("↵"); break;
|
case '\n': printf("↵"); break;
|
||||||
default: printf("%c", *c); break;
|
default: printf("%c", *c); break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
printf("\",\"start\":%ld,\"end\":%ld,\"children\":[",
|
printf("\",\"start\":%ld,\"end\":%ld,\"children\":[",
|
||||||
@ -52,4 +52,4 @@ void json_match(const char *text, match_t *m, bool verbose)
|
|||||||
(void)_json_match(text, m, 0, verbose);
|
(void)_json_match(text, m, 0, verbose);
|
||||||
}
|
}
|
||||||
|
|
||||||
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1
|
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
|
||||||
|
2
json.h
2
json.h
@ -12,4 +12,4 @@ __attribute__((nonnull))
|
|||||||
void json_match(const char *text, match_t *m, bool verbose);
|
void json_match(const char *text, match_t *m, bool verbose);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1
|
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
|
||||||
|
720
match.c
720
match.c
@ -106,25 +106,25 @@ static pat_t *first_pat(def_t *defs, pat_t *pat)
|
|||||||
{
|
{
|
||||||
for (pat_t *p = pat; p; ) {
|
for (pat_t *p = pat; p; ) {
|
||||||
switch (p->type) {
|
switch (p->type) {
|
||||||
case BP_BEFORE:
|
case BP_BEFORE:
|
||||||
p = p->args.pat; break;
|
p = p->args.pat; break;
|
||||||
case BP_REPEAT:
|
case BP_REPEAT:
|
||||||
if (p->args.repetitions.min == 0)
|
if (p->args.repetitions.min == 0)
|
||||||
return p;
|
return p;
|
||||||
p = p->args.repetitions.repeat_pat; break;
|
p = p->args.repetitions.repeat_pat; break;
|
||||||
case BP_CAPTURE:
|
case BP_CAPTURE:
|
||||||
p = p->args.capture.capture_pat; break;
|
p = p->args.capture.capture_pat; break;
|
||||||
case BP_CHAIN: case BP_MATCH: case BP_NOT_MATCH:
|
case BP_CHAIN: case BP_MATCH: case BP_NOT_MATCH:
|
||||||
p = p->args.multiple.first; break;
|
p = p->args.multiple.first; break;
|
||||||
case BP_REPLACE:
|
case BP_REPLACE:
|
||||||
p = p->args.replace.pat; break;
|
p = p->args.replace.pat; break;
|
||||||
case BP_REF: {
|
case BP_REF: {
|
||||||
pat_t *p2 = deref(defs, p);
|
pat_t *p2 = deref(defs, p);
|
||||||
if (p2 == p) return p2;
|
if (p2 == p) return p2;
|
||||||
p = p2;
|
p = p2;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
default: return p;
|
default: return p;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return pat;
|
return pat;
|
||||||
@ -187,376 +187,376 @@ match_t *next_match(def_t *defs, file_t *f, match_t *prev, pat_t *pat, pat_t *sk
|
|||||||
static match_t *match(def_t *defs, file_t *f, const char *str, pat_t *pat, bool ignorecase)
|
static match_t *match(def_t *defs, file_t *f, const char *str, pat_t *pat, bool ignorecase)
|
||||||
{
|
{
|
||||||
switch (pat->type) {
|
switch (pat->type) {
|
||||||
case BP_DEFINITION: {
|
case BP_DEFINITION: {
|
||||||
def_t *defs2 = with_def(defs, pat->args.def.namelen, pat->args.def.name, pat->args.def.def);
|
def_t *defs2 = with_def(defs, pat->args.def.namelen, pat->args.def.name, pat->args.def.def);
|
||||||
match_t *m = match(defs2, f, str, pat->args.def.pat ? pat->args.def.pat : pat->args.def.def, ignorecase);
|
match_t *m = match(defs2, f, str, pat->args.def.pat ? pat->args.def.pat : pat->args.def.def, ignorecase);
|
||||||
defs = free_defs(defs2, defs);
|
defs = free_defs(defs2, defs);
|
||||||
return m;
|
return m;
|
||||||
|
}
|
||||||
|
case BP_LEFTRECURSION: {
|
||||||
|
// Left recursion occurs when a pattern directly or indirectly
|
||||||
|
// invokes itself at the same position in the text. It's handled as
|
||||||
|
// a special case, but if a pattern invokes itself at a later
|
||||||
|
// point, it can be handled with normal recursion.
|
||||||
|
// See: left-recursion.md for more details.
|
||||||
|
if (str == pat->args.leftrec.at) {
|
||||||
|
++pat->args.leftrec.visits;
|
||||||
|
return pat->args.leftrec.match;
|
||||||
|
} else {
|
||||||
|
return match(defs, f, str, pat->args.leftrec.fallback, ignorecase);
|
||||||
}
|
}
|
||||||
case BP_LEFTRECURSION: {
|
}
|
||||||
// Left recursion occurs when a pattern directly or indirectly
|
case BP_ANYCHAR: {
|
||||||
// invokes itself at the same position in the text. It's handled as
|
return (str < f->end && *str != '\n') ? new_match(defs, pat, str, next_char(f, str), NULL) : NULL;
|
||||||
// a special case, but if a pattern invokes itself at a later
|
}
|
||||||
// point, it can be handled with normal recursion.
|
case BP_ID_START: {
|
||||||
// See: left-recursion.md for more details.
|
return (str < f->end && isidstart(f, str)) ? new_match(defs, pat, str, next_char(f, str), NULL) : NULL;
|
||||||
if (str == pat->args.leftrec.at) {
|
}
|
||||||
++pat->args.leftrec.visits;
|
case BP_ID_CONTINUE: {
|
||||||
return pat->args.leftrec.match;
|
return (str < f->end && isidcontinue(f, str)) ? new_match(defs, pat, str, next_char(f, str), NULL) : NULL;
|
||||||
} else {
|
}
|
||||||
return match(defs, f, str, pat->args.leftrec.fallback, ignorecase);
|
case BP_START_OF_FILE: {
|
||||||
}
|
return (str == f->start) ? new_match(defs, pat, str, str, NULL) : NULL;
|
||||||
}
|
}
|
||||||
case BP_ANYCHAR: {
|
case BP_START_OF_LINE: {
|
||||||
return (str < f->end && *str != '\n') ? new_match(defs, pat, str, next_char(f, str), NULL) : NULL;
|
return (str == f->start || str[-1] == '\n') ? new_match(defs, pat, str, str, NULL) : NULL;
|
||||||
}
|
}
|
||||||
case BP_ID_START: {
|
case BP_END_OF_FILE: {
|
||||||
return (str < f->end && isidstart(f, str)) ? new_match(defs, pat, str, next_char(f, str), NULL) : NULL;
|
return (str == f->end || (str == f->end-1 && *str == '\n')) ? new_match(defs, pat, str, str, NULL) : NULL;
|
||||||
}
|
}
|
||||||
case BP_ID_CONTINUE: {
|
case BP_END_OF_LINE: {
|
||||||
return (str < f->end && isidcontinue(f, str)) ? new_match(defs, pat, str, next_char(f, str), NULL) : NULL;
|
return (str == f->end || *str == '\n') ? new_match(defs, pat, str, str, NULL) : NULL;
|
||||||
}
|
}
|
||||||
case BP_START_OF_FILE: {
|
case BP_WORD_BOUNDARY: {
|
||||||
return (str == f->start) ? new_match(defs, pat, str, str, NULL) : NULL;
|
return (str == f->start || isidcontinue(f, str) != isidcontinue(f, prev_char(f, str))) ? new_match(defs, pat, str, str, NULL) : NULL;
|
||||||
}
|
}
|
||||||
case BP_START_OF_LINE: {
|
case BP_STRING: {
|
||||||
return (str == f->start || str[-1] == '\n') ? new_match(defs, pat, str, str, NULL) : NULL;
|
if (&str[pat->min_matchlen] > f->end) return NULL;
|
||||||
}
|
if (pat->min_matchlen > 0 && (ignorecase ? memicmp : memcmp)(str, pat->args.string, pat->min_matchlen) != 0)
|
||||||
case BP_END_OF_FILE: {
|
return NULL;
|
||||||
return (str == f->end) ? new_match(defs, pat, str, str, NULL) : NULL;
|
return new_match(defs, pat, str, str + pat->min_matchlen, NULL);
|
||||||
}
|
}
|
||||||
case BP_END_OF_LINE: {
|
case BP_RANGE: {
|
||||||
return (str == f->end || *str == '\n') ? new_match(defs, pat, str, str, NULL) : NULL;
|
if (str >= f->end) return NULL;
|
||||||
}
|
if ((unsigned char)*str < pat->args.range.low || (unsigned char)*str > pat->args.range.high)
|
||||||
case BP_WORD_BOUNDARY: {
|
return NULL;
|
||||||
return (str == f->start || isidcontinue(f, str) != isidcontinue(f, prev_char(f, str))) ? new_match(defs, pat, str, str, NULL) : NULL;
|
return new_match(defs, pat, str, str+1, NULL);
|
||||||
}
|
}
|
||||||
case BP_STRING: {
|
case BP_NOT: {
|
||||||
if (&str[pat->min_matchlen] > f->end) return NULL;
|
match_t *m = match(defs, f, str, pat->args.pat, ignorecase);
|
||||||
if (pat->min_matchlen > 0 && (ignorecase ? memicmp : memcmp)(str, pat->args.string, pat->min_matchlen) != 0)
|
if (m != NULL) {
|
||||||
return NULL;
|
|
||||||
return new_match(defs, pat, str, str + pat->min_matchlen, NULL);
|
|
||||||
}
|
|
||||||
case BP_RANGE: {
|
|
||||||
if (str >= f->end) return NULL;
|
|
||||||
if ((unsigned char)*str < pat->args.range.low || (unsigned char)*str > pat->args.range.high)
|
|
||||||
return NULL;
|
|
||||||
return new_match(defs, pat, str, str+1, NULL);
|
|
||||||
}
|
|
||||||
case BP_NOT: {
|
|
||||||
match_t *m = match(defs, f, str, pat->args.pat, ignorecase);
|
|
||||||
if (m != NULL) {
|
|
||||||
recycle_if_unused(&m);
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
return new_match(defs, pat, str, str, NULL);
|
|
||||||
}
|
|
||||||
case BP_UPTO: case BP_UPTO_STRICT: {
|
|
||||||
match_t *m = new_match(defs, pat, str, str, NULL);
|
|
||||||
pat_t *target = deref(defs, pat->args.multiple.first),
|
|
||||||
*skip = deref(defs, pat->args.multiple.second);
|
|
||||||
if (!target && !skip) {
|
|
||||||
while (str < f->end && *str != '\n') ++str;
|
|
||||||
m->end = str;
|
|
||||||
return m;
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t child_cap = 0, nchildren = 0;
|
|
||||||
for (const char *prev = NULL; prev < str; ) {
|
|
||||||
prev = str;
|
|
||||||
if (target) {
|
|
||||||
match_t *p = match(defs, f, str, target, ignorecase);
|
|
||||||
if (p != NULL) {
|
|
||||||
recycle_if_unused(&p);
|
|
||||||
m->end = str;
|
|
||||||
return m;
|
|
||||||
}
|
|
||||||
} else if (str == f->end) {
|
|
||||||
m->end = str;
|
|
||||||
return m;
|
|
||||||
}
|
|
||||||
if (skip) {
|
|
||||||
match_t *s = match(defs, f, str, skip, ignorecase);
|
|
||||||
if (s != NULL) {
|
|
||||||
str = s->end;
|
|
||||||
if (nchildren+2 >= child_cap) {
|
|
||||||
m->children = grow(m->children, child_cap += 5);
|
|
||||||
for (size_t i = nchildren; i < child_cap; i++) m->children[i] = NULL;
|
|
||||||
}
|
|
||||||
add_owner(&m->children[nchildren++], s);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// This isn't in the for() structure because there needs to
|
|
||||||
// be at least once chance to match the pattern, even if
|
|
||||||
// we're at the end of the string already (e.g. "..$").
|
|
||||||
if (str < f->end && *str != '\n' && pat->type != BP_UPTO_STRICT)
|
|
||||||
str = next_char(f, str);
|
|
||||||
}
|
|
||||||
recycle_if_unused(&m);
|
recycle_if_unused(&m);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
case BP_REPEAT: {
|
return new_match(defs, pat, str, str, NULL);
|
||||||
match_t *m = new_match(defs, pat, str, str, NULL);
|
}
|
||||||
size_t reps = 0;
|
case BP_UPTO: case BP_UPTO_STRICT: {
|
||||||
ssize_t max = pat->args.repetitions.max;
|
match_t *m = new_match(defs, pat, str, str, NULL);
|
||||||
pat_t *repeating = deref(defs, pat->args.repetitions.repeat_pat);
|
pat_t *target = deref(defs, pat->args.multiple.first),
|
||||||
pat_t *sep = deref(defs, pat->args.repetitions.sep);
|
*skip = deref(defs, pat->args.multiple.second);
|
||||||
size_t child_cap = 0, nchildren = 0;
|
if (!target && !skip) {
|
||||||
for (reps = 0; max == -1 || reps < (size_t)max; ++reps) {
|
while (str < f->end && *str != '\n') ++str;
|
||||||
const char *start = str;
|
m->end = str;
|
||||||
// Separator
|
return m;
|
||||||
match_t *msep = NULL;
|
}
|
||||||
if (sep != NULL && reps > 0) {
|
|
||||||
msep = match(defs, f, str, sep, ignorecase);
|
size_t child_cap = 0, nchildren = 0;
|
||||||
if (msep == NULL) break;
|
for (const char *prev = NULL; prev < str; ) {
|
||||||
str = msep->end;
|
prev = str;
|
||||||
|
if (target) {
|
||||||
|
match_t *p = match(defs, f, str, target, ignorecase);
|
||||||
|
if (p != NULL) {
|
||||||
|
recycle_if_unused(&p);
|
||||||
|
m->end = str;
|
||||||
|
return m;
|
||||||
}
|
}
|
||||||
match_t *mp = match(defs, f, str, repeating, ignorecase);
|
} else if (str == f->end) {
|
||||||
if (mp == NULL) {
|
m->end = str;
|
||||||
str = start;
|
return m;
|
||||||
if (msep) recycle_if_unused(&msep);
|
}
|
||||||
break;
|
if (skip) {
|
||||||
}
|
match_t *s = match(defs, f, str, skip, ignorecase);
|
||||||
if (mp->end == start && reps > 0) {
|
if (s != NULL) {
|
||||||
// Since no forward progress was made on either `repeating`
|
str = s->end;
|
||||||
// or `sep` and BP does not have mutable state, it's
|
|
||||||
// guaranteed that no progress will be made on the next
|
|
||||||
// loop either. We know that this will continue to loop
|
|
||||||
// until reps==max, so let's just cut to the chase instead
|
|
||||||
// of looping infinitely.
|
|
||||||
if (msep) recycle_if_unused(&msep);
|
|
||||||
recycle_if_unused(&mp);
|
|
||||||
if (pat->args.repetitions.max == -1)
|
|
||||||
reps = ~(size_t)0;
|
|
||||||
else
|
|
||||||
reps = (size_t)pat->args.repetitions.max;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
if (msep) {
|
|
||||||
if (nchildren+2 >= child_cap) {
|
if (nchildren+2 >= child_cap) {
|
||||||
m->children = grow(m->children, child_cap += 5);
|
m->children = grow(m->children, child_cap += 5);
|
||||||
for (size_t i = nchildren; i < child_cap; i++) m->children[i] = NULL;
|
for (size_t i = nchildren; i < child_cap; i++) m->children[i] = NULL;
|
||||||
}
|
}
|
||||||
add_owner(&m->children[nchildren++], msep);
|
add_owner(&m->children[nchildren++], s);
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
// This isn't in the for() structure because there needs to
|
||||||
|
// be at least once chance to match the pattern, even if
|
||||||
|
// we're at the end of the string already (e.g. "..$").
|
||||||
|
if (str < f->end && *str != '\n' && pat->type != BP_UPTO_STRICT)
|
||||||
|
str = next_char(f, str);
|
||||||
|
}
|
||||||
|
recycle_if_unused(&m);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
case BP_REPEAT: {
|
||||||
|
match_t *m = new_match(defs, pat, str, str, NULL);
|
||||||
|
size_t reps = 0;
|
||||||
|
ssize_t max = pat->args.repetitions.max;
|
||||||
|
pat_t *repeating = deref(defs, pat->args.repetitions.repeat_pat);
|
||||||
|
pat_t *sep = deref(defs, pat->args.repetitions.sep);
|
||||||
|
size_t child_cap = 0, nchildren = 0;
|
||||||
|
for (reps = 0; max == -1 || reps < (size_t)max; ++reps) {
|
||||||
|
const char *start = str;
|
||||||
|
// Separator
|
||||||
|
match_t *msep = NULL;
|
||||||
|
if (sep != NULL && reps > 0) {
|
||||||
|
msep = match(defs, f, str, sep, ignorecase);
|
||||||
|
if (msep == NULL) break;
|
||||||
|
str = msep->end;
|
||||||
|
}
|
||||||
|
match_t *mp = match(defs, f, str, repeating, ignorecase);
|
||||||
|
if (mp == NULL) {
|
||||||
|
str = start;
|
||||||
|
if (msep) recycle_if_unused(&msep);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (mp->end == start && reps > 0) {
|
||||||
|
// Since no forward progress was made on either `repeating`
|
||||||
|
// or `sep` and BP does not have mutable state, it's
|
||||||
|
// guaranteed that no progress will be made on the next
|
||||||
|
// loop either. We know that this will continue to loop
|
||||||
|
// until reps==max, so let's just cut to the chase instead
|
||||||
|
// of looping infinitely.
|
||||||
|
if (msep) recycle_if_unused(&msep);
|
||||||
|
recycle_if_unused(&mp);
|
||||||
|
if (pat->args.repetitions.max == -1)
|
||||||
|
reps = ~(size_t)0;
|
||||||
|
else
|
||||||
|
reps = (size_t)pat->args.repetitions.max;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (msep) {
|
||||||
if (nchildren+2 >= child_cap) {
|
if (nchildren+2 >= child_cap) {
|
||||||
m->children = grow(m->children, child_cap += 5);
|
m->children = grow(m->children, child_cap += 5);
|
||||||
for (size_t i = nchildren; i < child_cap; i++) m->children[i] = NULL;
|
for (size_t i = nchildren; i < child_cap; i++) m->children[i] = NULL;
|
||||||
}
|
}
|
||||||
add_owner(&m->children[nchildren++], mp);
|
add_owner(&m->children[nchildren++], msep);
|
||||||
str = mp->end;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (reps < (size_t)pat->args.repetitions.min) {
|
if (nchildren+2 >= child_cap) {
|
||||||
recycle_if_unused(&m);
|
m->children = grow(m->children, child_cap += 5);
|
||||||
return NULL;
|
for (size_t i = nchildren; i < child_cap; i++) m->children[i] = NULL;
|
||||||
}
|
}
|
||||||
m->end = str;
|
add_owner(&m->children[nchildren++], mp);
|
||||||
return m;
|
str = mp->end;
|
||||||
}
|
}
|
||||||
case BP_AFTER: {
|
|
||||||
pat_t *back = deref(defs, pat->args.pat);
|
|
||||||
if (!back) return NULL;
|
|
||||||
|
|
||||||
// We only care about the region from the backtrack pos up to the
|
if (reps < (size_t)pat->args.repetitions.min) {
|
||||||
// current pos, so mock it out as a file slice.
|
recycle_if_unused(&m);
|
||||||
// TODO: this breaks ^/^^/$/$$, but that can probably be ignored
|
|
||||||
// because you rarely need to check those in a backtrack.
|
|
||||||
file_t slice;
|
|
||||||
slice_file(&slice, f, f->start, str);
|
|
||||||
for (const char *pos = &str[-(long)back->min_matchlen];
|
|
||||||
pos >= f->start && (back->max_matchlen == -1 || pos >= &str[-(int)back->max_matchlen]);
|
|
||||||
pos = prev_char(f, pos)) {
|
|
||||||
cache_destroy(&slice);
|
|
||||||
slice.start = (char*)pos;
|
|
||||||
match_t *m = match(defs, &slice, pos, back, ignorecase);
|
|
||||||
// Match should not go past str (i.e. (<"AB" "B") should match "ABB", but not "AB")
|
|
||||||
if (m && m->end != str)
|
|
||||||
recycle_if_unused(&m);
|
|
||||||
else if (m) {
|
|
||||||
cache_destroy(&slice);
|
|
||||||
return new_match(defs, pat, str, str, MATCHES(m));
|
|
||||||
}
|
|
||||||
if (pos == f->start) break;
|
|
||||||
// To prevent extreme performance degradation, don't keep
|
|
||||||
// walking backwards endlessly over newlines.
|
|
||||||
if (back->max_matchlen == -1 && *pos == '\n') break;
|
|
||||||
}
|
|
||||||
cache_destroy(&slice);
|
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
case BP_BEFORE: {
|
m->end = str;
|
||||||
match_t *after = match(defs, f, str, pat->args.pat, ignorecase);
|
return m;
|
||||||
return after ? new_match(defs, pat, str, str, MATCHES(after)) : NULL;
|
}
|
||||||
}
|
case BP_AFTER: {
|
||||||
case BP_CAPTURE: {
|
pat_t *back = deref(defs, pat->args.pat);
|
||||||
match_t *p = match(defs, f, str, pat->args.pat, ignorecase);
|
if (!back) return NULL;
|
||||||
return p ? new_match(defs, pat, str, p->end, MATCHES(p)) : NULL;
|
|
||||||
}
|
|
||||||
case BP_OTHERWISE: {
|
|
||||||
match_t *m = match(defs, f, str, pat->args.multiple.first, ignorecase);
|
|
||||||
return m ? m : match(defs, f, str, pat->args.multiple.second, ignorecase);
|
|
||||||
}
|
|
||||||
case BP_CHAIN: {
|
|
||||||
match_t *m1 = match(defs, f, str, pat->args.multiple.first, ignorecase);
|
|
||||||
if (m1 == NULL) return NULL;
|
|
||||||
|
|
||||||
match_t *m2;
|
// We only care about the region from the backtrack pos up to the
|
||||||
// Push backrefs and run matching, then cleanup
|
// current pos, so mock it out as a file slice.
|
||||||
if (m1->pat->type == BP_CAPTURE && m1->pat->args.capture.name) {
|
// TODO: this breaks ^/^^/$/$$, but that can probably be ignored
|
||||||
// Temporarily add a rule that the backref name matches the
|
// because you rarely need to check those in a backtrack.
|
||||||
// exact string of the original match (no replacements)
|
file_t slice;
|
||||||
size_t len = (size_t)(m1->end - m1->start);
|
slice_file(&slice, f, f->start, str);
|
||||||
pat_t *backref = new_pat(f, m1->start, m1->end, len, (ssize_t)len, BP_STRING);
|
for (const char *pos = &str[-(long)back->min_matchlen];
|
||||||
backref->args.string = m1->start;
|
pos >= f->start && (back->max_matchlen == -1 || pos >= &str[-(int)back->max_matchlen]);
|
||||||
|
pos = prev_char(f, pos)) {
|
||||||
|
cache_destroy(&slice);
|
||||||
|
slice.start = (char*)pos;
|
||||||
|
match_t *m = match(defs, &slice, pos, back, ignorecase);
|
||||||
|
// Match should not go past str (i.e. (<"AB" "B") should match "ABB", but not "AB")
|
||||||
|
if (m && m->end != str)
|
||||||
|
recycle_if_unused(&m);
|
||||||
|
else if (m) {
|
||||||
|
cache_destroy(&slice);
|
||||||
|
return new_match(defs, pat, str, str, MATCHES(m));
|
||||||
|
}
|
||||||
|
if (pos == f->start) break;
|
||||||
|
// To prevent extreme performance degradation, don't keep
|
||||||
|
// walking backwards endlessly over newlines.
|
||||||
|
if (back->max_matchlen == -1 && *pos == '\n') break;
|
||||||
|
}
|
||||||
|
cache_destroy(&slice);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
case BP_BEFORE: {
|
||||||
|
match_t *after = match(defs, f, str, pat->args.pat, ignorecase);
|
||||||
|
return after ? new_match(defs, pat, str, str, MATCHES(after)) : NULL;
|
||||||
|
}
|
||||||
|
case BP_CAPTURE: {
|
||||||
|
match_t *p = match(defs, f, str, pat->args.pat, ignorecase);
|
||||||
|
return p ? new_match(defs, pat, str, p->end, MATCHES(p)) : NULL;
|
||||||
|
}
|
||||||
|
case BP_OTHERWISE: {
|
||||||
|
match_t *m = match(defs, f, str, pat->args.multiple.first, ignorecase);
|
||||||
|
return m ? m : match(defs, f, str, pat->args.multiple.second, ignorecase);
|
||||||
|
}
|
||||||
|
case BP_CHAIN: {
|
||||||
|
match_t *m1 = match(defs, f, str, pat->args.multiple.first, ignorecase);
|
||||||
|
if (m1 == NULL) return NULL;
|
||||||
|
|
||||||
def_t *defs2 = with_def(defs, m1->pat->args.capture.namelen, m1->pat->args.capture.name, backref);
|
match_t *m2;
|
||||||
++m1->refcount; {
|
// Push backrefs and run matching, then cleanup
|
||||||
m2 = match(defs2, f, m1->end, pat->args.multiple.second, ignorecase);
|
if (m1->pat->type == BP_CAPTURE && m1->pat->args.capture.name) {
|
||||||
if (!m2) { // No need to keep the backref in memory if it didn't match
|
// Temporarily add a rule that the backref name matches the
|
||||||
for (pat_t **rem = &f->pats; *rem; rem = &(*rem)->next) {
|
// exact string of the original match (no replacements)
|
||||||
if ((*rem) == backref) {
|
size_t len = (size_t)(m1->end - m1->start);
|
||||||
pat_t *tmp = *rem;
|
pat_t *backref = new_pat(f, m1->start, m1->end, len, (ssize_t)len, BP_STRING);
|
||||||
*rem = (*rem)->next;
|
backref->args.string = m1->start;
|
||||||
free(tmp);
|
|
||||||
break;
|
def_t *defs2 = with_def(defs, m1->pat->args.capture.namelen, m1->pat->args.capture.name, backref);
|
||||||
}
|
++m1->refcount; {
|
||||||
|
m2 = match(defs2, f, m1->end, pat->args.multiple.second, ignorecase);
|
||||||
|
if (!m2) { // No need to keep the backref in memory if it didn't match
|
||||||
|
for (pat_t **rem = &f->pats; *rem; rem = &(*rem)->next) {
|
||||||
|
if ((*rem) == backref) {
|
||||||
|
pat_t *tmp = *rem;
|
||||||
|
*rem = (*rem)->next;
|
||||||
|
free(tmp);
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
defs = free_defs(defs2, defs);
|
|
||||||
} --m1->refcount;
|
|
||||||
} else {
|
|
||||||
m2 = match(defs, f, m1->end, pat->args.multiple.second, ignorecase);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (m2 == NULL) {
|
|
||||||
recycle_if_unused(&m1);
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
return new_match(defs, pat, str, m2->end, MATCHES(m1, m2));
|
|
||||||
}
|
|
||||||
case BP_MATCH: case BP_NOT_MATCH: {
|
|
||||||
match_t *m1 = match(defs, f, str, pat->args.multiple.first, ignorecase);
|
|
||||||
if (m1 == NULL) return NULL;
|
|
||||||
|
|
||||||
// <p1>~<p2> matches iff the text of <p1> matches <p2>
|
|
||||||
// <p1>!~<p2> matches iff the text of <p1> does not match <p2>
|
|
||||||
file_t slice;
|
|
||||||
slice_file(&slice, f, m1->start, m1->end);
|
|
||||||
match_t *m2 = next_match(defs, &slice, NULL, pat->args.multiple.second, NULL, ignorecase);
|
|
||||||
if ((!m2 && pat->type == BP_MATCH) || (m2 && pat->type == BP_NOT_MATCH)) {
|
|
||||||
if (m2) recycle_if_unused(&m2);
|
|
||||||
cache_destroy(&slice);
|
|
||||||
recycle_if_unused(&m1);
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
cache_destroy(&slice);
|
|
||||||
return new_match(defs, pat, m1->start, m1->end, (pat->type == BP_MATCH) ? MATCHES(m1, m2) : NULL);
|
|
||||||
}
|
|
||||||
case BP_REPLACE: {
|
|
||||||
match_t *p = NULL;
|
|
||||||
if (pat->args.replace.pat) {
|
|
||||||
p = match(defs, f, str, pat->args.replace.pat, ignorecase);
|
|
||||||
if (p == NULL) return NULL;
|
|
||||||
}
|
|
||||||
return new_match(defs, pat, str, p ? p->end : str, MATCHES(p));
|
|
||||||
}
|
|
||||||
case BP_REF: {
|
|
||||||
match_t *cached;
|
|
||||||
if (cache_get(f, defs, str, pat, &cached))
|
|
||||||
return cached;
|
|
||||||
|
|
||||||
def_t *def = lookup(defs, pat->args.ref.len, pat->args.ref.name);
|
|
||||||
if (def == NULL)
|
|
||||||
errx(EXIT_FAILURE, "Unknown identifier: '%.*s'", (int)pat->args.ref.len, pat->args.ref.name);
|
|
||||||
pat_t *ref = def->pat;
|
|
||||||
|
|
||||||
pat_t rec_op = {
|
|
||||||
.type = BP_LEFTRECURSION,
|
|
||||||
.start = ref->start,
|
|
||||||
.end = ref->end,
|
|
||||||
.min_matchlen = 0,
|
|
||||||
.max_matchlen = -1,
|
|
||||||
.args.leftrec = {
|
|
||||||
.match = NULL,
|
|
||||||
.visits = 0,
|
|
||||||
.at = str,
|
|
||||||
.fallback = ref,
|
|
||||||
},
|
|
||||||
};
|
|
||||||
def_t defs2 = {
|
|
||||||
.namelen = def->namelen,
|
|
||||||
.name = def->name,
|
|
||||||
.pat = &rec_op,
|
|
||||||
.next = defs,
|
|
||||||
};
|
|
||||||
|
|
||||||
const char *prev = str;
|
|
||||||
match_t *m = match(&defs2, f, str, ref, ignorecase);
|
|
||||||
if (m == NULL) {
|
|
||||||
cache_save(f, defs, str, pat, NULL);
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
while (rec_op.args.leftrec.visits > 0) {
|
|
||||||
rec_op.args.leftrec.visits = 0;
|
|
||||||
remove_ownership(&rec_op.args.leftrec.match);
|
|
||||||
add_owner(&rec_op.args.leftrec.match, m);
|
|
||||||
prev = m->end;
|
|
||||||
match_t *m2 = match(&defs2, f, str, ref, ignorecase);
|
|
||||||
if (m2 == NULL) break;
|
|
||||||
if (m2->end <= prev) {
|
|
||||||
recycle_if_unused(&m2);
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
m = m2;
|
defs = free_defs(defs2, defs);
|
||||||
}
|
} --m1->refcount;
|
||||||
|
} else {
|
||||||
// This match wrapper mainly exists for record-keeping purposes.
|
m2 = match(defs, f, m1->end, pat->args.multiple.second, ignorecase);
|
||||||
// However, it also keeps `m` from getting garbage collected with
|
|
||||||
// leftrec.match is GC'd. It also helps with visualization of match
|
|
||||||
// results.
|
|
||||||
// OPTIMIZE: remove this if necessary
|
|
||||||
match_t *wrap = new_match(defs, pat, m->start, m->end, MATCHES(m));
|
|
||||||
cache_save(f, defs, str, pat, wrap);
|
|
||||||
|
|
||||||
if (rec_op.args.leftrec.match)
|
|
||||||
remove_ownership(&rec_op.args.leftrec.match);
|
|
||||||
|
|
||||||
return wrap;
|
|
||||||
}
|
}
|
||||||
case BP_NODENT: {
|
|
||||||
if (*str != '\n') return NULL;
|
|
||||||
const char *start = str;
|
|
||||||
|
|
||||||
size_t linenum = get_line_number(f, str);
|
if (m2 == NULL) {
|
||||||
const char *p = get_line(f, linenum);
|
recycle_if_unused(&m1);
|
||||||
if (p < f->start) p = f->start; // Can happen with recursive matching
|
|
||||||
|
|
||||||
// Current indentation:
|
|
||||||
char denter = *p;
|
|
||||||
int dents = 0;
|
|
||||||
if (denter == ' ' || denter == '\t') {
|
|
||||||
for (; *p == denter && p < f->end; ++p) ++dents;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Subsequent indentation:
|
|
||||||
while (*str == '\n' || *str == '\n') ++str;
|
|
||||||
for (int i = 0; i < dents; i++)
|
|
||||||
if (&str[i] >= f->end || str[i] != denter) return NULL;
|
|
||||||
|
|
||||||
return new_match(defs, pat, start, &str[dents], NULL);
|
|
||||||
}
|
|
||||||
case BP_ERROR: {
|
|
||||||
match_t *p = pat->args.pat ? match(defs, f, str, pat->args.pat, ignorecase) : NULL;
|
|
||||||
return p ? new_match(defs, pat, str, p->end, MATCHES(p)) : NULL;
|
|
||||||
}
|
|
||||||
default: {
|
|
||||||
errx(EXIT_FAILURE, "Unknown pattern type: %u", pat->type);
|
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return new_match(defs, pat, str, m2->end, MATCHES(m1, m2));
|
||||||
|
}
|
||||||
|
case BP_MATCH: case BP_NOT_MATCH: {
|
||||||
|
match_t *m1 = match(defs, f, str, pat->args.multiple.first, ignorecase);
|
||||||
|
if (m1 == NULL) return NULL;
|
||||||
|
|
||||||
|
// <p1>~<p2> matches iff the text of <p1> matches <p2>
|
||||||
|
// <p1>!~<p2> matches iff the text of <p1> does not match <p2>
|
||||||
|
file_t slice;
|
||||||
|
slice_file(&slice, f, m1->start, m1->end);
|
||||||
|
match_t *m2 = next_match(defs, &slice, NULL, pat->args.multiple.second, NULL, ignorecase);
|
||||||
|
if ((!m2 && pat->type == BP_MATCH) || (m2 && pat->type == BP_NOT_MATCH)) {
|
||||||
|
if (m2) recycle_if_unused(&m2);
|
||||||
|
cache_destroy(&slice);
|
||||||
|
recycle_if_unused(&m1);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
cache_destroy(&slice);
|
||||||
|
return new_match(defs, pat, m1->start, m1->end, (pat->type == BP_MATCH) ? MATCHES(m1, m2) : NULL);
|
||||||
|
}
|
||||||
|
case BP_REPLACE: {
|
||||||
|
match_t *p = NULL;
|
||||||
|
if (pat->args.replace.pat) {
|
||||||
|
p = match(defs, f, str, pat->args.replace.pat, ignorecase);
|
||||||
|
if (p == NULL) return NULL;
|
||||||
|
}
|
||||||
|
return new_match(defs, pat, str, p ? p->end : str, MATCHES(p));
|
||||||
|
}
|
||||||
|
case BP_REF: {
|
||||||
|
match_t *cached;
|
||||||
|
if (cache_get(f, defs, str, pat, &cached))
|
||||||
|
return cached;
|
||||||
|
|
||||||
|
def_t *def = lookup(defs, pat->args.ref.len, pat->args.ref.name);
|
||||||
|
if (def == NULL)
|
||||||
|
errx(EXIT_FAILURE, "Unknown identifier: '%.*s'", (int)pat->args.ref.len, pat->args.ref.name);
|
||||||
|
pat_t *ref = def->pat;
|
||||||
|
|
||||||
|
pat_t rec_op = {
|
||||||
|
.type = BP_LEFTRECURSION,
|
||||||
|
.start = ref->start,
|
||||||
|
.end = ref->end,
|
||||||
|
.min_matchlen = 0,
|
||||||
|
.max_matchlen = -1,
|
||||||
|
.args.leftrec = {
|
||||||
|
.match = NULL,
|
||||||
|
.visits = 0,
|
||||||
|
.at = str,
|
||||||
|
.fallback = ref,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
def_t defs2 = {
|
||||||
|
.namelen = def->namelen,
|
||||||
|
.name = def->name,
|
||||||
|
.pat = &rec_op,
|
||||||
|
.next = defs,
|
||||||
|
};
|
||||||
|
|
||||||
|
const char *prev = str;
|
||||||
|
match_t *m = match(&defs2, f, str, ref, ignorecase);
|
||||||
|
if (m == NULL) {
|
||||||
|
cache_save(f, defs, str, pat, NULL);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
while (rec_op.args.leftrec.visits > 0) {
|
||||||
|
rec_op.args.leftrec.visits = 0;
|
||||||
|
remove_ownership(&rec_op.args.leftrec.match);
|
||||||
|
add_owner(&rec_op.args.leftrec.match, m);
|
||||||
|
prev = m->end;
|
||||||
|
match_t *m2 = match(&defs2, f, str, ref, ignorecase);
|
||||||
|
if (m2 == NULL) break;
|
||||||
|
if (m2->end <= prev) {
|
||||||
|
recycle_if_unused(&m2);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
m = m2;
|
||||||
|
}
|
||||||
|
|
||||||
|
// This match wrapper mainly exists for record-keeping purposes.
|
||||||
|
// However, it also keeps `m` from getting garbage collected with
|
||||||
|
// leftrec.match is GC'd. It also helps with visualization of match
|
||||||
|
// results.
|
||||||
|
// OPTIMIZE: remove this if necessary
|
||||||
|
match_t *wrap = new_match(defs, pat, m->start, m->end, MATCHES(m));
|
||||||
|
cache_save(f, defs, str, pat, wrap);
|
||||||
|
|
||||||
|
if (rec_op.args.leftrec.match)
|
||||||
|
remove_ownership(&rec_op.args.leftrec.match);
|
||||||
|
|
||||||
|
return wrap;
|
||||||
|
}
|
||||||
|
case BP_NODENT: {
|
||||||
|
if (*str != '\n') return NULL;
|
||||||
|
const char *start = str;
|
||||||
|
|
||||||
|
size_t linenum = get_line_number(f, str);
|
||||||
|
const char *p = get_line(f, linenum);
|
||||||
|
if (p < f->start) p = f->start; // Can happen with recursive matching
|
||||||
|
|
||||||
|
// Current indentation:
|
||||||
|
char denter = *p;
|
||||||
|
int dents = 0;
|
||||||
|
if (denter == ' ' || denter == '\t') {
|
||||||
|
for (; *p == denter && p < f->end; ++p) ++dents;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Subsequent indentation:
|
||||||
|
while (*str == '\n' || *str == '\n') ++str;
|
||||||
|
for (int i = 0; i < dents; i++)
|
||||||
|
if (&str[i] >= f->end || str[i] != denter) return NULL;
|
||||||
|
|
||||||
|
return new_match(defs, pat, start, &str[dents], NULL);
|
||||||
|
}
|
||||||
|
case BP_ERROR: {
|
||||||
|
match_t *p = pat->args.pat ? match(defs, f, str, pat->args.pat, ignorecase) : NULL;
|
||||||
|
return p ? new_match(defs, pat, str, p->end, MATCHES(p)) : NULL;
|
||||||
|
}
|
||||||
|
default: {
|
||||||
|
errx(EXIT_FAILURE, "Unknown pattern type: %u", pat->type);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -703,4 +703,4 @@ size_t free_all_matches(void)
|
|||||||
return count;
|
return count;
|
||||||
}
|
}
|
||||||
|
|
||||||
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1
|
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
|
||||||
|
2
match.h
2
match.h
@ -22,4 +22,4 @@ size_t free_all_matches(void);
|
|||||||
size_t recycle_all_matches(void);
|
size_t recycle_all_matches(void);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1
|
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
|
||||||
|
544
pattern.c
544
pattern.c
@ -234,291 +234,291 @@ static pat_t *_bp_simplepattern(file_t *f, const char *str)
|
|||||||
char c = *str;
|
char c = *str;
|
||||||
str = next_char(f, str);
|
str = next_char(f, str);
|
||||||
switch (c) {
|
switch (c) {
|
||||||
// Any char (dot)
|
// Any char (dot)
|
||||||
case '.': {
|
case '.': {
|
||||||
if (*str == '.') { // ".."
|
if (*str == '.') { // ".."
|
||||||
pat_t *skip = NULL;
|
pat_t *skip = NULL;
|
||||||
str = next_char(f, str);
|
|
||||||
char skipper = *str;
|
|
||||||
if (matchchar(&str, '%', false) || matchchar(&str, '=', false)) {
|
|
||||||
skip = bp_simplepattern(f, str);
|
|
||||||
if (!skip)
|
|
||||||
file_err(f, str, str, "There should be a pattern to skip here after the '%c'", skipper);
|
|
||||||
str = skip->end;
|
|
||||||
}
|
|
||||||
pat_t *upto = new_pat(f, start, str, 0, -1, skipper == '=' ? BP_UPTO_STRICT : BP_UPTO);
|
|
||||||
upto->args.multiple.second = skip;
|
|
||||||
return upto;
|
|
||||||
} else {
|
|
||||||
return new_pat(f, start, str, 1, UTF8_MAXCHARLEN, BP_ANYCHAR);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Char literals
|
|
||||||
case '`': {
|
|
||||||
pat_t *all = NULL;
|
|
||||||
do { // Comma-separated items:
|
|
||||||
if (str >= f->end || !*str || *str == '\n')
|
|
||||||
file_err(f, str, str, "There should be a character here after the '`'");
|
|
||||||
|
|
||||||
const char *c1_loc = str;
|
|
||||||
str = next_char(f, c1_loc);
|
|
||||||
if (*str == '-') { // Range
|
|
||||||
const char *c2_loc = ++str;
|
|
||||||
if (next_char(f, c1_loc) > c1_loc+1 || next_char(f, c2_loc) > c2_loc+1)
|
|
||||||
file_err(f, start, next_char(f, c2_loc), "Sorry, UTF-8 character ranges are not yet supported.");
|
|
||||||
char c1 = *c1_loc, c2 = *c2_loc;
|
|
||||||
if (!c2 || c2 == '\n')
|
|
||||||
file_err(f, str, str, "There should be a character here to complete the character range.");
|
|
||||||
if (c1 > c2) { // Swap order
|
|
||||||
char tmp = c1;
|
|
||||||
c1 = c2;
|
|
||||||
c2 = tmp;
|
|
||||||
}
|
|
||||||
str = next_char(f, c2_loc);
|
|
||||||
pat_t *pat = new_pat(f, start == c1_loc - 1 ? start : c1_loc, str, 1, 1, BP_RANGE);
|
|
||||||
pat->args.range.low = (unsigned char)c1;
|
|
||||||
pat->args.range.high = (unsigned char)c2;
|
|
||||||
all = either_pat(f, all, pat);
|
|
||||||
} else {
|
|
||||||
size_t len = (size_t)(str - c1_loc);
|
|
||||||
pat_t *pat = new_pat(f, start, str, len, (ssize_t)len, BP_STRING);
|
|
||||||
pat->args.string = c1_loc;
|
|
||||||
all = either_pat(f, all, pat);
|
|
||||||
}
|
|
||||||
} while (*str++ == ',');
|
|
||||||
|
|
||||||
return all;
|
|
||||||
}
|
|
||||||
// Escapes
|
|
||||||
case '\\': {
|
|
||||||
if (!*str || *str == '\n')
|
|
||||||
file_err(f, str, str, "There should be an escape sequence here after this backslash.");
|
|
||||||
|
|
||||||
pat_t *all = NULL;
|
|
||||||
do { // Comma-separated items:
|
|
||||||
const char *itemstart = str-1;
|
|
||||||
if (*str == 'N') { // \N (nodent)
|
|
||||||
all = either_pat(f, all, new_pat(f, itemstart, ++str, 1, -1, BP_NODENT));
|
|
||||||
continue;
|
|
||||||
} else if (*str == 'i') { // \i (identifier char)
|
|
||||||
all = either_pat(f, all, new_pat(f, itemstart, ++str, 1, -1, BP_ID_CONTINUE));
|
|
||||||
continue;
|
|
||||||
} else if (*str == 'I') { // \I (identifier char, not including numbers)
|
|
||||||
all = either_pat(f, all, new_pat(f, itemstart, ++str, 1, -1, BP_ID_START));
|
|
||||||
continue;
|
|
||||||
} else if (*str == 'b') { // \b word boundary
|
|
||||||
all = either_pat(f, all, new_pat(f, itemstart, ++str, 0, 0, BP_WORD_BOUNDARY));
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
const char *opstart = str;
|
|
||||||
unsigned char e_low = (unsigned char)unescapechar(str, &str);
|
|
||||||
if (str == opstart)
|
|
||||||
file_err(f, start, str+1, "This isn't a valid escape sequence.");
|
|
||||||
unsigned char e_high = e_low;
|
|
||||||
if (*str == '-') { // Escape range (e.g. \x00-\xFF)
|
|
||||||
++str;
|
|
||||||
if (next_char(f, str) != str+1)
|
|
||||||
file_err(f, start, next_char(f, str), "Sorry, UTF8 escape sequences are not supported in ranges.");
|
|
||||||
const char *seqstart = str;
|
|
||||||
e_high = (unsigned char)unescapechar(str, &str);
|
|
||||||
if (str == seqstart)
|
|
||||||
file_err(f, seqstart, str+1, "This value isn't a valid escape sequence");
|
|
||||||
if (e_high < e_low)
|
|
||||||
file_err(f, start, str, "Escape ranges should be low-to-high, but this is high-to-low.");
|
|
||||||
}
|
|
||||||
pat_t *esc = new_pat(f, start, str, 1, 1, BP_RANGE);
|
|
||||||
esc->args.range.low = e_low;
|
|
||||||
esc->args.range.high = e_high;
|
|
||||||
all = either_pat(f, all, esc);
|
|
||||||
} while (*str++ == ',');
|
|
||||||
|
|
||||||
return all;
|
|
||||||
}
|
|
||||||
// Word boundary
|
|
||||||
case '|': {
|
|
||||||
return new_pat(f, start, str, 0, 0, BP_WORD_BOUNDARY);
|
|
||||||
}
|
|
||||||
// String literal
|
|
||||||
case '"': case '\'': case '\002': case '{': {
|
|
||||||
char endquote = c == '\002' ? '\003' : (c == '{' ? '}' : c);
|
|
||||||
char *litstart = (char*)str;
|
|
||||||
while (str < f->end && *str != endquote)
|
|
||||||
str = next_char(f, str);
|
|
||||||
size_t len = (size_t)(str - litstart);
|
|
||||||
str = next_char(f, str);
|
str = next_char(f, str);
|
||||||
|
char skipper = *str;
|
||||||
|
if (matchchar(&str, '%', false) || matchchar(&str, '=', false)) {
|
||||||
|
skip = bp_simplepattern(f, str);
|
||||||
|
if (!skip)
|
||||||
|
file_err(f, str, str, "There should be a pattern to skip here after the '%c'", skipper);
|
||||||
|
str = skip->end;
|
||||||
|
}
|
||||||
|
pat_t *upto = new_pat(f, start, str, 0, -1, skipper == '=' ? BP_UPTO_STRICT : BP_UPTO);
|
||||||
|
upto->args.multiple.second = skip;
|
||||||
|
return upto;
|
||||||
|
} else {
|
||||||
|
return new_pat(f, start, str, 1, UTF8_MAXCHARLEN, BP_ANYCHAR);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Char literals
|
||||||
|
case '`': {
|
||||||
|
pat_t *all = NULL;
|
||||||
|
do { // Comma-separated items:
|
||||||
|
if (str >= f->end || !*str || *str == '\n')
|
||||||
|
file_err(f, str, str, "There should be a character here after the '`'");
|
||||||
|
|
||||||
pat_t *pat = new_pat(f, start, str, len, (ssize_t)len, BP_STRING);
|
const char *c1_loc = str;
|
||||||
pat->args.string = litstart;
|
str = next_char(f, c1_loc);
|
||||||
return pat;
|
if (*str == '-') { // Range
|
||||||
}
|
const char *c2_loc = ++str;
|
||||||
// Not <pat>
|
if (next_char(f, c1_loc) > c1_loc+1 || next_char(f, c2_loc) > c2_loc+1)
|
||||||
case '!': {
|
file_err(f, start, next_char(f, c2_loc), "Sorry, UTF-8 character ranges are not yet supported.");
|
||||||
pat_t *p = bp_simplepattern(f, str);
|
char c1 = *c1_loc, c2 = *c2_loc;
|
||||||
if (!p) file_err(f, str, str, "There should be a pattern after this '!'");
|
if (!c2 || c2 == '\n')
|
||||||
pat_t *not = new_pat(f, start, p->end, 0, 0, BP_NOT);
|
file_err(f, str, str, "There should be a character here to complete the character range.");
|
||||||
not->args.pat = p;
|
if (c1 > c2) { // Swap order
|
||||||
return not;
|
char tmp = c1;
|
||||||
}
|
c1 = c2;
|
||||||
// Number of repetitions: <N>(-<N> / - / + / "")
|
c2 = tmp;
|
||||||
case '0': case '1': case '2': case '3': case '4': case '5':
|
}
|
||||||
case '6': case '7': case '8': case '9': {
|
str = next_char(f, c2_loc);
|
||||||
size_t min = 0;
|
pat_t *pat = new_pat(f, start == c1_loc - 1 ? start : c1_loc, str, 1, 1, BP_RANGE);
|
||||||
ssize_t max = -1;
|
pat->args.range.low = (unsigned char)c1;
|
||||||
--str;
|
pat->args.range.high = (unsigned char)c2;
|
||||||
long n1 = strtol(str, (char**)&str, 10);
|
all = either_pat(f, all, pat);
|
||||||
if (matchchar(&str, '-', false)) {
|
|
||||||
str = after_spaces(str, false);
|
|
||||||
const char *numstart = str;
|
|
||||||
long n2 = strtol(str, (char**)&str, 10);
|
|
||||||
if (str == numstart) min = 0, max = (ssize_t)n1;
|
|
||||||
else min = (size_t)n1, max = (ssize_t)n2;
|
|
||||||
} else if (matchchar(&str, '+', false)) {
|
|
||||||
min = (size_t)n1, max = -1;
|
|
||||||
} else {
|
} else {
|
||||||
min = (size_t)n1, max = (ssize_t)n1;
|
size_t len = (size_t)(str - c1_loc);
|
||||||
|
pat_t *pat = new_pat(f, start, str, len, (ssize_t)len, BP_STRING);
|
||||||
|
pat->args.string = c1_loc;
|
||||||
|
all = either_pat(f, all, pat);
|
||||||
}
|
}
|
||||||
pat_t *repeating = bp_simplepattern(f, str);
|
} while (*str++ == ',');
|
||||||
if (!repeating)
|
|
||||||
file_err(f, str, str, "There should be a pattern after this repetition count.");
|
return all;
|
||||||
str = repeating->end;
|
}
|
||||||
pat_t *sep = NULL;
|
// Escapes
|
||||||
if (matchchar(&str, '%', false)) {
|
case '\\': {
|
||||||
sep = bp_simplepattern(f, str);
|
if (!*str || *str == '\n')
|
||||||
if (!sep)
|
file_err(f, str, str, "There should be an escape sequence here after this backslash.");
|
||||||
file_err(f, str, str, "There should be a separator pattern after this '%%'");
|
|
||||||
str = sep->end;
|
pat_t *all = NULL;
|
||||||
} else {
|
do { // Comma-separated items:
|
||||||
str = repeating->end;
|
const char *itemstart = str-1;
|
||||||
}
|
if (*str == 'N') { // \N (nodent)
|
||||||
return new_range(f, start, str, min, max, repeating, sep);
|
all = either_pat(f, all, new_pat(f, itemstart, ++str, 1, -1, BP_NODENT));
|
||||||
}
|
continue;
|
||||||
// Lookbehind
|
} else if (*str == 'i') { // \i (identifier char)
|
||||||
case '<': {
|
all = either_pat(f, all, new_pat(f, itemstart, ++str, 1, -1, BP_ID_CONTINUE));
|
||||||
pat_t *behind = bp_simplepattern(f, str);
|
continue;
|
||||||
if (!behind)
|
} else if (*str == 'I') { // \I (identifier char, not including numbers)
|
||||||
file_err(f, str, str, "There should be a pattern after this '<'");
|
all = either_pat(f, all, new_pat(f, itemstart, ++str, 1, -1, BP_ID_START));
|
||||||
str = behind->end;
|
continue;
|
||||||
str = behind->end;
|
} else if (*str == 'b') { // \b word boundary
|
||||||
pat_t *pat = new_pat(f, start, str, 0, 0, BP_AFTER);
|
all = either_pat(f, all, new_pat(f, itemstart, ++str, 0, 0, BP_WORD_BOUNDARY));
|
||||||
pat->args.pat = behind;
|
continue;
|
||||||
return pat;
|
|
||||||
}
|
|
||||||
// Lookahead
|
|
||||||
case '>': {
|
|
||||||
pat_t *ahead = bp_simplepattern(f, str);
|
|
||||||
if (!ahead)
|
|
||||||
file_err(f, str, str, "There should be a pattern after this '>'");
|
|
||||||
str = ahead->end;
|
|
||||||
pat_t *pat = new_pat(f, start, str, 0, 0, BP_BEFORE);
|
|
||||||
pat->args.pat = ahead;
|
|
||||||
return pat;
|
|
||||||
}
|
|
||||||
// Parentheses
|
|
||||||
case '(': {
|
|
||||||
if (start + 2 < f->end && strncmp(start, "(!)", 3) == 0) { // (!) errors
|
|
||||||
str = start + 3;
|
|
||||||
pat_t *pat = bp_simplepattern(f, str);
|
|
||||||
if (!pat) pat = new_pat(f, str, str, 0, 0, BP_STRING);
|
|
||||||
pat = expand_replacements(f, pat, false);
|
|
||||||
pat_t *error = new_pat(f, start, pat->end, pat->min_matchlen, pat->max_matchlen, BP_ERROR);
|
|
||||||
error->args.pat = pat;
|
|
||||||
return error;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pat_t *pat = bp_pattern_nl(f, str, true);
|
const char *opstart = str;
|
||||||
if (!pat)
|
unsigned char e_low = (unsigned char)unescapechar(str, &str);
|
||||||
file_err(f, str, str, "There should be a valid pattern after this parenthesis.");
|
if (str == opstart)
|
||||||
str = pat->end;
|
file_err(f, start, str+1, "This isn't a valid escape sequence.");
|
||||||
if (!matchchar(&str, ')', true)) file_err(f, str, str, "Missing paren: )");
|
unsigned char e_high = e_low;
|
||||||
pat->start = start;
|
if (*str == '-') { // Escape range (e.g. \x00-\xFF)
|
||||||
pat->end = str;
|
++str;
|
||||||
return pat;
|
if (next_char(f, str) != str+1)
|
||||||
|
file_err(f, start, next_char(f, str), "Sorry, UTF8 escape sequences are not supported in ranges.");
|
||||||
|
const char *seqstart = str;
|
||||||
|
e_high = (unsigned char)unescapechar(str, &str);
|
||||||
|
if (str == seqstart)
|
||||||
|
file_err(f, seqstart, str+1, "This value isn't a valid escape sequence");
|
||||||
|
if (e_high < e_low)
|
||||||
|
file_err(f, start, str, "Escape ranges should be low-to-high, but this is high-to-low.");
|
||||||
|
}
|
||||||
|
pat_t *esc = new_pat(f, start, str, 1, 1, BP_RANGE);
|
||||||
|
esc->args.range.low = e_low;
|
||||||
|
esc->args.range.high = e_high;
|
||||||
|
all = either_pat(f, all, esc);
|
||||||
|
} while (*str++ == ',');
|
||||||
|
|
||||||
|
return all;
|
||||||
|
}
|
||||||
|
// Word boundary
|
||||||
|
case '|': {
|
||||||
|
return new_pat(f, start, str, 0, 0, BP_WORD_BOUNDARY);
|
||||||
|
}
|
||||||
|
// String literal
|
||||||
|
case '"': case '\'': case '\002': case '{': {
|
||||||
|
char endquote = c == '\002' ? '\003' : (c == '{' ? '}' : c);
|
||||||
|
char *litstart = (char*)str;
|
||||||
|
while (str < f->end && *str != endquote)
|
||||||
|
str = next_char(f, str);
|
||||||
|
size_t len = (size_t)(str - litstart);
|
||||||
|
str = next_char(f, str);
|
||||||
|
|
||||||
|
pat_t *pat = new_pat(f, start, str, len, (ssize_t)len, BP_STRING);
|
||||||
|
pat->args.string = litstart;
|
||||||
|
return pat;
|
||||||
|
}
|
||||||
|
// Not <pat>
|
||||||
|
case '!': {
|
||||||
|
pat_t *p = bp_simplepattern(f, str);
|
||||||
|
if (!p) file_err(f, str, str, "There should be a pattern after this '!'");
|
||||||
|
pat_t *not = new_pat(f, start, p->end, 0, 0, BP_NOT);
|
||||||
|
not->args.pat = p;
|
||||||
|
return not;
|
||||||
|
}
|
||||||
|
// Number of repetitions: <N>(-<N> / - / + / "")
|
||||||
|
case '0': case '1': case '2': case '3': case '4': case '5':
|
||||||
|
case '6': case '7': case '8': case '9': {
|
||||||
|
size_t min = 0;
|
||||||
|
ssize_t max = -1;
|
||||||
|
--str;
|
||||||
|
long n1 = strtol(str, (char**)&str, 10);
|
||||||
|
if (matchchar(&str, '-', false)) {
|
||||||
|
str = after_spaces(str, false);
|
||||||
|
const char *numstart = str;
|
||||||
|
long n2 = strtol(str, (char**)&str, 10);
|
||||||
|
if (str == numstart) min = 0, max = (ssize_t)n1;
|
||||||
|
else min = (size_t)n1, max = (ssize_t)n2;
|
||||||
|
} else if (matchchar(&str, '+', false)) {
|
||||||
|
min = (size_t)n1, max = -1;
|
||||||
|
} else {
|
||||||
|
min = (size_t)n1, max = (ssize_t)n1;
|
||||||
}
|
}
|
||||||
// Square brackets
|
pat_t *repeating = bp_simplepattern(f, str);
|
||||||
case '[': {
|
if (!repeating)
|
||||||
pat_t *maybe = bp_pattern_nl(f, str, true);
|
file_err(f, str, str, "There should be a pattern after this repetition count.");
|
||||||
if (!maybe)
|
str = repeating->end;
|
||||||
file_err(f, str, str, "There should be a valid pattern after this square bracket.");
|
pat_t *sep = NULL;
|
||||||
str = maybe->end;
|
if (matchchar(&str, '%', false)) {
|
||||||
(void)matchchar(&str, ']', true);
|
sep = bp_simplepattern(f, str);
|
||||||
return new_range(f, start, str, 0, 1, maybe, NULL);
|
if (!sep)
|
||||||
}
|
file_err(f, str, str, "There should be a separator pattern after this '%%'");
|
||||||
// Repeating
|
str = sep->end;
|
||||||
case '*': case '+': {
|
} else {
|
||||||
size_t min = (size_t)(c == '*' ? 0 : 1);
|
|
||||||
pat_t *repeating = bp_simplepattern(f, str);
|
|
||||||
if (!repeating)
|
|
||||||
file_err(f, str, str, "There should be a valid pattern here after the '%c'", c);
|
|
||||||
str = repeating->end;
|
str = repeating->end;
|
||||||
pat_t *sep = NULL;
|
|
||||||
if (matchchar(&str, '%', false)) {
|
|
||||||
sep = bp_simplepattern(f, str);
|
|
||||||
if (!sep)
|
|
||||||
file_err(f, str, str, "There should be a separator pattern after the '%%' here.");
|
|
||||||
str = sep->end;
|
|
||||||
}
|
|
||||||
return new_range(f, start, str, min, -1, repeating, sep);
|
|
||||||
}
|
}
|
||||||
// Capture
|
return new_range(f, start, str, min, max, repeating, sep);
|
||||||
case '@': {
|
}
|
||||||
const char *name = NULL;
|
// Lookbehind
|
||||||
size_t namelen = 0;
|
case '<': {
|
||||||
const char *a = after_name(str);
|
pat_t *behind = bp_simplepattern(f, str);
|
||||||
const char *eq = a;
|
if (!behind)
|
||||||
if (a > str && !matchstr(&eq, "=>", false) && matchchar(&eq, '=', false)) {
|
file_err(f, str, str, "There should be a pattern after this '<'");
|
||||||
name = str;
|
str = behind->end;
|
||||||
namelen = (size_t)(a-str);
|
str = behind->end;
|
||||||
str = eq;
|
pat_t *pat = new_pat(f, start, str, 0, 0, BP_AFTER);
|
||||||
}
|
pat->args.pat = behind;
|
||||||
|
return pat;
|
||||||
|
}
|
||||||
|
// Lookahead
|
||||||
|
case '>': {
|
||||||
|
pat_t *ahead = bp_simplepattern(f, str);
|
||||||
|
if (!ahead)
|
||||||
|
file_err(f, str, str, "There should be a pattern after this '>'");
|
||||||
|
str = ahead->end;
|
||||||
|
pat_t *pat = new_pat(f, start, str, 0, 0, BP_BEFORE);
|
||||||
|
pat->args.pat = ahead;
|
||||||
|
return pat;
|
||||||
|
}
|
||||||
|
// Parentheses
|
||||||
|
case '(': {
|
||||||
|
if (start + 2 < f->end && strncmp(start, "(!)", 3) == 0) { // (!) errors
|
||||||
|
str = start + 3;
|
||||||
pat_t *pat = bp_simplepattern(f, str);
|
pat_t *pat = bp_simplepattern(f, str);
|
||||||
if (!pat)
|
if (!pat) pat = new_pat(f, str, str, 0, 0, BP_STRING);
|
||||||
file_err(f, str, str, "There should be a valid pattern here to capture after the '@'");
|
pat = expand_replacements(f, pat, false);
|
||||||
|
pat_t *error = new_pat(f, start, pat->end, pat->min_matchlen, pat->max_matchlen, BP_ERROR);
|
||||||
|
error->args.pat = pat;
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
pat_t *capture = new_pat(f, start, pat->end, pat->min_matchlen, pat->max_matchlen, BP_CAPTURE);
|
pat_t *pat = bp_pattern_nl(f, str, true);
|
||||||
capture->args.capture.capture_pat = pat;
|
if (!pat)
|
||||||
capture->args.capture.name = name;
|
file_err(f, str, str, "There should be a valid pattern after this parenthesis.");
|
||||||
capture->args.capture.namelen = namelen;
|
str = pat->end;
|
||||||
return capture;
|
if (!matchchar(&str, ')', true)) file_err(f, str, str, "Missing paren: )");
|
||||||
|
pat->start = start;
|
||||||
|
pat->end = str;
|
||||||
|
return pat;
|
||||||
|
}
|
||||||
|
// Square brackets
|
||||||
|
case '[': {
|
||||||
|
pat_t *maybe = bp_pattern_nl(f, str, true);
|
||||||
|
if (!maybe)
|
||||||
|
file_err(f, str, str, "There should be a valid pattern after this square bracket.");
|
||||||
|
str = maybe->end;
|
||||||
|
(void)matchchar(&str, ']', true);
|
||||||
|
return new_range(f, start, str, 0, 1, maybe, NULL);
|
||||||
|
}
|
||||||
|
// Repeating
|
||||||
|
case '*': case '+': {
|
||||||
|
size_t min = (size_t)(c == '*' ? 0 : 1);
|
||||||
|
pat_t *repeating = bp_simplepattern(f, str);
|
||||||
|
if (!repeating)
|
||||||
|
file_err(f, str, str, "There should be a valid pattern here after the '%c'", c);
|
||||||
|
str = repeating->end;
|
||||||
|
pat_t *sep = NULL;
|
||||||
|
if (matchchar(&str, '%', false)) {
|
||||||
|
sep = bp_simplepattern(f, str);
|
||||||
|
if (!sep)
|
||||||
|
file_err(f, str, str, "There should be a separator pattern after the '%%' here.");
|
||||||
|
str = sep->end;
|
||||||
}
|
}
|
||||||
// Start of file/line
|
return new_range(f, start, str, min, -1, repeating, sep);
|
||||||
case '^': {
|
}
|
||||||
if (*str == '^')
|
// Capture
|
||||||
return new_pat(f, start, ++str, 0, 0, BP_START_OF_FILE);
|
case '@': {
|
||||||
return new_pat(f, start, str, 0, 0, BP_START_OF_LINE);
|
const char *name = NULL;
|
||||||
|
size_t namelen = 0;
|
||||||
|
const char *a = after_name(str);
|
||||||
|
const char *eq = a;
|
||||||
|
if (a > str && !matchstr(&eq, "=>", false) && matchchar(&eq, '=', false)) {
|
||||||
|
name = str;
|
||||||
|
namelen = (size_t)(a-str);
|
||||||
|
str = eq;
|
||||||
}
|
}
|
||||||
// End of file/line:
|
pat_t *pat = bp_simplepattern(f, str);
|
||||||
case '$': {
|
if (!pat)
|
||||||
if (*str == '$')
|
file_err(f, str, str, "There should be a valid pattern here to capture after the '@'");
|
||||||
return new_pat(f, start, ++str, 0, 0, BP_END_OF_FILE);
|
|
||||||
return new_pat(f, start, str, 0, 0, BP_END_OF_LINE);
|
pat_t *capture = new_pat(f, start, pat->end, pat->min_matchlen, pat->max_matchlen, BP_CAPTURE);
|
||||||
}
|
capture->args.capture.capture_pat = pat;
|
||||||
default: {
|
capture->args.capture.name = name;
|
||||||
// Reference
|
capture->args.capture.namelen = namelen;
|
||||||
if (!isalpha(c) && c != '_') return NULL;
|
return capture;
|
||||||
str = after_name(start);
|
}
|
||||||
size_t namelen = (size_t)(str - start);
|
// Start of file/line
|
||||||
if (matchchar(&str, ':', false)) { // Definitions
|
case '^': {
|
||||||
pat_t *def = bp_pattern_nl(f, str, false);
|
if (*str == '^')
|
||||||
if (!def) file_err(f, str, f->end, "Could not parse this definition.");
|
return new_pat(f, start, ++str, 0, 0, BP_START_OF_FILE);
|
||||||
str = def->end;
|
return new_pat(f, start, str, 0, 0, BP_START_OF_LINE);
|
||||||
(void)matchchar(&str, ';', false); // Optional semicolon
|
}
|
||||||
str = after_spaces(str, true);
|
// End of file/line:
|
||||||
pat_t *pat = bp_pattern_nl(f, str, false);
|
case '$': {
|
||||||
if (pat) str = pat->end;
|
if (*str == '$')
|
||||||
else pat = def;
|
return new_pat(f, start, ++str, 0, 0, BP_END_OF_FILE);
|
||||||
pat_t *ret = new_pat(f, start, str, pat->min_matchlen, pat->max_matchlen, BP_DEFINITION);
|
return new_pat(f, start, str, 0, 0, BP_END_OF_LINE);
|
||||||
ret->args.def.name = start;
|
}
|
||||||
ret->args.def.namelen = namelen;
|
default: {
|
||||||
ret->args.def.def = def;
|
// Reference
|
||||||
ret->args.def.pat = pat;
|
if (!isalpha(c) && c != '_') return NULL;
|
||||||
return ret;
|
str = after_name(start);
|
||||||
}
|
size_t namelen = (size_t)(str - start);
|
||||||
pat_t *ref = new_pat(f, start, str, 0, -1, BP_REF);
|
if (matchchar(&str, ':', false)) { // Definitions
|
||||||
ref->args.ref.name = start;
|
pat_t *def = bp_pattern_nl(f, str, false);
|
||||||
ref->args.ref.len = namelen;
|
if (!def) file_err(f, str, f->end, "Could not parse this definition.");
|
||||||
return ref;
|
str = def->end;
|
||||||
|
(void)matchchar(&str, ';', false); // Optional semicolon
|
||||||
|
str = after_spaces(str, true);
|
||||||
|
pat_t *pat = bp_pattern_nl(f, str, false);
|
||||||
|
if (pat) str = pat->end;
|
||||||
|
else pat = def;
|
||||||
|
pat_t *ret = new_pat(f, start, str, pat->min_matchlen, pat->max_matchlen, BP_DEFINITION);
|
||||||
|
ret->args.def.name = start;
|
||||||
|
ret->args.def.namelen = namelen;
|
||||||
|
ret->args.def.def = def;
|
||||||
|
ret->args.def.pat = pat;
|
||||||
|
return ret;
|
||||||
}
|
}
|
||||||
|
pat_t *ref = new_pat(f, start, str, 0, -1, BP_REF);
|
||||||
|
ref->args.ref.name = start;
|
||||||
|
ref->args.ref.len = namelen;
|
||||||
|
return ref;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -600,4 +600,4 @@ pat_t *bp_pattern(file_t *f, const char *str)
|
|||||||
return bp_pattern_nl(f, str, false);
|
return bp_pattern_nl(f, str, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1
|
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
|
||||||
|
@ -21,4 +21,4 @@ __attribute__((nonnull))
|
|||||||
pat_t *bp_pattern(file_t *f, const char *str);
|
pat_t *bp_pattern(file_t *f, const char *str);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1
|
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
|
||||||
|
2
print.c
2
print.c
@ -266,4 +266,4 @@ int print_errors(file_t *f, match_t *m)
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1
|
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
|
||||||
|
2
print.h
2
print.h
@ -28,4 +28,4 @@ __attribute__((nonnull))
|
|||||||
int print_errors(file_t *f, match_t *m);
|
int print_errors(file_t *f, match_t *m);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1
|
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
|
||||||
|
2
types.h
2
types.h
@ -130,4 +130,4 @@ typedef struct def_s {
|
|||||||
} def_t;
|
} def_t;
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1
|
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
|
||||||
|
2
utf8.c
2
utf8.c
@ -280,4 +280,4 @@ bool isidcontinue(file_t *f, const char *str)
|
|||||||
|| find_in_ranges(codepoint, XID_Continue_only, ARRAY_LEN(XID_Continue_only)));
|
|| find_in_ranges(codepoint, XID_Continue_only, ARRAY_LEN(XID_Continue_only)));
|
||||||
}
|
}
|
||||||
|
|
||||||
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1
|
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
|
||||||
|
2
utf8.h
2
utf8.h
@ -18,4 +18,4 @@ __attribute__((nonnull, pure))
|
|||||||
bool isidcontinue(file_t *f, const char *str);
|
bool isidcontinue(file_t *f, const char *str);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1
|
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
|
||||||
|
83
utils.c
83
utils.c
@ -20,18 +20,18 @@ const char *after_spaces(const char *str, bool skip_nl)
|
|||||||
// Skip whitespace and comments:
|
// Skip whitespace and comments:
|
||||||
skip_whitespace:
|
skip_whitespace:
|
||||||
switch (*str) {
|
switch (*str) {
|
||||||
case '\r': case '\n':
|
case '\r': case '\n':
|
||||||
if (!skip_nl) break;
|
if (!skip_nl) break;
|
||||||
__attribute__ ((fallthrough));
|
__attribute__ ((fallthrough));
|
||||||
case ' ': case '\t': {
|
case ' ': case '\t': {
|
||||||
++str;
|
++str;
|
||||||
goto skip_whitespace;
|
goto skip_whitespace;
|
||||||
}
|
}
|
||||||
case '#': {
|
case '#': {
|
||||||
while (*str && *str != '\n') ++str;
|
while (*str && *str != '\n') ++str;
|
||||||
goto skip_whitespace;
|
goto skip_whitespace;
|
||||||
}
|
}
|
||||||
default: break;
|
default: break;
|
||||||
}
|
}
|
||||||
return str;
|
return str;
|
||||||
}
|
}
|
||||||
@ -90,39 +90,38 @@ char unescapechar(const char *escaped, const char **end)
|
|||||||
size_t len = 1;
|
size_t len = 1;
|
||||||
unsigned char ret = (unsigned char)*escaped;
|
unsigned char ret = (unsigned char)*escaped;
|
||||||
switch (*escaped) {
|
switch (*escaped) {
|
||||||
case 'a': ret = '\a'; break; case 'b': ret = '\b'; break;
|
case 'a': ret = '\a'; break; case 'b': ret = '\b'; break;
|
||||||
case 'n': ret = '\n'; break; case 'r': ret = '\r'; break;
|
case 'n': ret = '\n'; break; case 'r': ret = '\r'; break;
|
||||||
case 't': ret = '\t'; break; case 'v': ret = '\v'; break;
|
case 't': ret = '\t'; break; case 'v': ret = '\v'; break;
|
||||||
case 'e': ret = '\033'; break; case '\\': ret = '\\'; break;
|
case 'e': ret = '\033'; break; case '\\': ret = '\\'; break;
|
||||||
case 'x': { // Hex
|
case 'x': { // Hex
|
||||||
static const unsigned char hextable[255] = {
|
static const unsigned char hextable[255] = {
|
||||||
['0']=0x10, ['1']=0x1, ['2']=0x2, ['3']=0x3, ['4']=0x4,
|
['0']=0x10, ['1']=0x1, ['2']=0x2, ['3']=0x3, ['4']=0x4,
|
||||||
['5']=0x5, ['6']=0x6, ['7']=0x7, ['8']=0x8, ['9']=0x9,
|
['5']=0x5, ['6']=0x6, ['7']=0x7, ['8']=0x8, ['9']=0x9,
|
||||||
['a']=0xa, ['b']=0xb, ['c']=0xc, ['d']=0xd, ['e']=0xe, ['f']=0xf,
|
['a']=0xa, ['b']=0xb, ['c']=0xc, ['d']=0xd, ['e']=0xe, ['f']=0xf,
|
||||||
['A']=0xa, ['B']=0xb, ['C']=0xc, ['D']=0xd, ['E']=0xe, ['F']=0xf,
|
['A']=0xa, ['B']=0xb, ['C']=0xc, ['D']=0xd, ['E']=0xe, ['F']=0xf,
|
||||||
};
|
};
|
||||||
if (hextable[(int)escaped[1]] && hextable[(int)escaped[2]]) {
|
if (hextable[(int)escaped[1]] && hextable[(int)escaped[2]]) {
|
||||||
ret = (hextable[(int)escaped[1]] << 4) | (hextable[(int)escaped[2]] & 0xF);
|
ret = (hextable[(int)escaped[1]] << 4) | (hextable[(int)escaped[2]] & 0xF);
|
||||||
len = 3;
|
len = 3;
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': { // Octal
|
break;
|
||||||
ret = (unsigned char)(escaped[0] - '0');
|
}
|
||||||
if ('0' <= escaped[1] && escaped[1] <= '7') {
|
case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': { // Octal
|
||||||
|
ret = (unsigned char)(escaped[0] - '0');
|
||||||
|
if ('0' <= escaped[1] && escaped[1] <= '7') {
|
||||||
|
++len;
|
||||||
|
ret = (ret << 3) | (escaped[1] - '0');
|
||||||
|
if ('0' <= escaped[2] && escaped[2] <= '7') {
|
||||||
++len;
|
++len;
|
||||||
ret = (ret << 3) | (escaped[1] - '0');
|
ret = (ret << 3) | (escaped[2] - '0');
|
||||||
if ('0' <= escaped[2] && escaped[2] <= '7') {
|
|
||||||
++len;
|
|
||||||
ret = (ret << 3) | (escaped[2] - '0');
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
break;
|
|
||||||
}
|
|
||||||
default: {
|
|
||||||
if (end) *end = escaped;
|
|
||||||
return (char)0;
|
|
||||||
}
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
if (end) *end = escaped;
|
||||||
|
return (char)0;
|
||||||
}
|
}
|
||||||
if (end) *end = &escaped[len];
|
if (end) *end = &escaped[len];
|
||||||
return (char)ret;
|
return (char)ret;
|
||||||
@ -151,4 +150,4 @@ void delete(void *p)
|
|||||||
*((void**)p) = NULL;
|
*((void**)p) = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1
|
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
|
||||||
|
Loading…
Reference in New Issue
Block a user