Left recursion correctness fixes

This commit is contained in:
Bruce Hill 2021-10-02 11:33:40 -07:00
parent 44f8608467
commit 304d5c35a8
2 changed files with 49 additions and 18 deletions

View File

@ -17,7 +17,7 @@ String-pattern: ..%(\n / Nodent / Identifier-char / Identifier-start / Escape /
pat: simple-pat !(__("!~"/"~")) / suffixed-pat
simple-pat: (Upto-and / Dot / Word-boundary/ String / Chars / Nodent
/ Identifier-char / Identifier-start / Escape-range
/ Escape / Repeat / Optional / No / After / Before / Capture / Empty-replacement
/ Escape / Repeat / Optional / No / After / Before / Capture
/ Start-of-File / Start-of-Line / End-of-File / End-of-Line / Ref / parens)
suffixed-pat: (
@ -63,10 +63,11 @@ After: `< __ pat
Before: `> __ pat
Capture: `@ [__ @capture-name=(id/`!) __ !"=>" `=] __ (@capture=pat / @error=(=>"Expected pattern to capture"))
Replace: (
@replace-pat=(Replace / Chain / pat) __ "=>" (__ @replacement=String / @error=(=>"Expected replacement string"))
@replace-pat=[Chain-noreplace / pat] __ "=>" (__ @replacement=String / @error=(=>"Expected replacement string"))
)
Replace-chain: Replace-chain __ "=>" (__ @replacement=String / @error=(=>"Expected replacement string"))
Empty-replacement: (
@replace-pat=(=>"''") "=>" (__ @replacement=String / @error=(=>"Expected replacement string"))
(=>"EMPTY") @replace-pat=(=>"''") "=>" (__ @replacement=String / @error=(=>"Expected replacement string"))
)
Ref: @name=id !(__`:)
Start-of-File: "^^"
@ -76,9 +77,10 @@ End-of-Line: "$"
parens: `( __ extended-pat (__ `) / @error=(=>"Expected closing parenthesis here"))
Chain: 2+@(pat !(__"=>") / Replace)%__
Otherwise: 2+@(Replace / Chain / pat)%(__`/__)
extended-pat: Otherwise / Replace / Chain / pat
Chain: @(Replace/pat) __ @(Chain/Replace/pat)
Chain-noreplace: @pat __ @(Chain-noreplace/pat)
Otherwise: 2+@(Chain / Replace / pat)%(__`/__)
extended-pat: Otherwise / Chain / Replace / pat
# Special-symbol rules:
_: *(` / \t)

53
match.c
View File

@ -56,6 +56,27 @@ static match_t *match(match_ctx_t *ctx, const char *str, pat_t *pat);
__attribute__((returns_nonnull))
static match_t *new_match(pat_t *pat, const char *start, const char *end, match_t *children[]);
static match_t *clone_match(match_t *m)
{
if (!m) return NULL;
match_t *ret = new_match(m->pat, m->start, m->end, NULL);
if (m->children) {
size_t child_cap = 0, nchildren = 0;
if (!m->children[0] || !m->children[1] || !m->children[2]) {
child_cap = 3;
ret->children = ret->_children;
}
for (int i = 0; m->children[i]; i++) {
if (nchildren+1 >= child_cap) {
ret->children = grow(ret->children, child_cap += 5);
for (size_t i = nchildren; i < child_cap; i++) ret->children[i] = NULL;
}
ret->children[nchildren++] = clone_match(m->children[i]);
}
}
return ret;
}
// Prepend to a doubly linked list
static inline void gc_list_prepend(match_t **head, match_t *m)
{
@ -227,7 +248,7 @@ static pat_t *get_prerequisite(match_ctx_t *ctx, pat_t *pat)
case BP_REPLACE:
p = p->args.replace.pat; break;
case BP_REF: {
if (++derefs > 10) return p;
if (++derefs > 10) return p; // In case of left recursion
pat_t *p2 = deref(ctx, p);
if (p2 == p) return p2;
p = p2;
@ -259,7 +280,6 @@ static match_t *_next_match(match_ctx_t *ctx, const char *str, pat_t *pat, pat_t
// Clear the cache so it's not full of old cache values from different parts of the file:
cache_destroy(ctx);
pat = deref(ctx, pat); // Avoid repeated lookups
pat_t *first = get_prerequisite(ctx, pat);
// Don't bother looping if this can only match at the start/end:
@ -316,7 +336,7 @@ static match_t *match(match_ctx_t *ctx, const char *str, pat_t *pat)
// See: left-recursion.md for more details.
if (str == pat->args.leftrec.at) {
++pat->args.leftrec.visits;
return pat->args.leftrec.match;
return clone_match(pat->args.leftrec.match);
} else {
return match(ctx, str, pat->args.leftrec.fallback);
}
@ -602,6 +622,9 @@ static match_t *match(match_ctx_t *ctx, const char *str, pat_t *pat)
if (ref == NULL)
errx(EXIT_FAILURE, "Unknown identifier: '%.*s'", (int)pat->args.ref.len, pat->args.ref.name);
if (ref->type == BP_LEFTRECURSION)
return match(ctx, str, ref);
pat_t rec_op = {
.type = BP_LEFTRECURSION,
.start = ref->start, .end = ref->end,
@ -627,15 +650,21 @@ static match_t *match(match_ctx_t *ctx, const char *str, pat_t *pat)
},
};
match_t *m = NULL;
// Keep matching as long as left recursion keeps happening and forward progress is made
for (const char *prev = str; m == NULL || (rec_op.args.leftrec.visits > 0 && m->end > prev); prev = m->end) {
rec_op.args.leftrec.match = m;
rec_op.args.leftrec.visits = 0;
match_t *m2 = match(&ctx2, str, ref);
if (!m2) break;
if (m) recycle_match(&m);
m = m2;
match_t *m = match(&ctx2, str, ref);
// If left recursion was involved, keep retrying while forward progress can be made:
if (m && rec_op.args.leftrec.visits > 0) {
while (1) {
const char *prev = m->end;
rec_op.args.leftrec.match = m;
match_t *m2 = match(&ctx2, str, ref);
if (!m2) break;
if (m2->end <= prev) {
recycle_match(&m2);
break;
}
recycle_match(&m);
m = m2;
}
}
if (!m) {