aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--grammars/bp.bp14
-rw-r--r--match.c53
2 files changed, 49 insertions, 18 deletions
diff --git a/grammars/bp.bp b/grammars/bp.bp
index a6e6f0b..68d1ec8 100644
--- a/grammars/bp.bp
+++ b/grammars/bp.bp
@@ -17,7 +17,7 @@ String-pattern: ..%(\n / Nodent / Identifier-char / Identifier-start / Escape /
pat: simple-pat !(__("!~"/"~")) / suffixed-pat
simple-pat: (Upto-and / Dot / Word-boundary/ String / Chars / Nodent
/ Identifier-char / Identifier-start / Escape-range
- / Escape / Repeat / Optional / No / After / Before / Capture / Empty-replacement
+ / Escape / Repeat / Optional / No / After / Before / Capture
/ Start-of-File / Start-of-Line / End-of-File / End-of-Line / Ref / parens)
suffixed-pat: (
@@ -63,10 +63,11 @@ After: `< __ pat
Before: `> __ pat
Capture: `@ [__ @capture-name=(id/`!) __ !"=>" `=] __ (@capture=pat / @error=(=>"Expected pattern to capture"))
Replace: (
- @replace-pat=(Replace / Chain / pat) __ "=>" (__ @replacement=String / @error=(=>"Expected replacement string"))
+ @replace-pat=[Chain-noreplace / pat] __ "=>" (__ @replacement=String / @error=(=>"Expected replacement string"))
)
+Replace-chain: Replace-chain __ "=>" (__ @replacement=String / @error=(=>"Expected replacement string"))
Empty-replacement: (
- @replace-pat=(=>"''") "=>" (__ @replacement=String / @error=(=>"Expected replacement string"))
+ (=>"EMPTY") @replace-pat=(=>"''") "=>" (__ @replacement=String / @error=(=>"Expected replacement string"))
)
Ref: @name=id !(__`:)
Start-of-File: "^^"
@@ -76,9 +77,10 @@ End-of-Line: "$"
parens: `( __ extended-pat (__ `) / @error=(=>"Expected closing parenthesis here"))
-Chain: 2+@(pat !(__"=>") / Replace)%__
-Otherwise: 2+@(Replace / Chain / pat)%(__`/__)
-extended-pat: Otherwise / Replace / Chain / pat
+Chain: @(Replace/pat) __ @(Chain/Replace/pat)
+Chain-noreplace: @pat __ @(Chain-noreplace/pat)
+Otherwise: 2+@(Chain / Replace / pat)%(__`/__)
+extended-pat: Otherwise / Chain / Replace / pat
# Special-symbol rules:
_: *(` / \t)
diff --git a/match.c b/match.c
index 2156e05..17f88bb 100644
--- a/match.c
+++ b/match.c
@@ -56,6 +56,27 @@ static match_t *match(match_ctx_t *ctx, const char *str, pat_t *pat);
__attribute__((returns_nonnull))
static match_t *new_match(pat_t *pat, const char *start, const char *end, match_t *children[]);
+static match_t *clone_match(match_t *m)
+{
+ if (!m) return NULL;
+ match_t *ret = new_match(m->pat, m->start, m->end, NULL);
+ if (m->children) {
+ size_t child_cap = 0, nchildren = 0;
+ if (!m->children[0] || !m->children[1] || !m->children[2]) {
+ child_cap = 3;
+ ret->children = ret->_children;
+ }
+ for (int i = 0; m->children[i]; i++) {
+ if (nchildren+1 >= child_cap) {
+ ret->children = grow(ret->children, child_cap += 5);
+ for (size_t i = nchildren; i < child_cap; i++) ret->children[i] = NULL;
+ }
+ ret->children[nchildren++] = clone_match(m->children[i]);
+ }
+ }
+ return ret;
+}
+
// Prepend to a doubly linked list
static inline void gc_list_prepend(match_t **head, match_t *m)
{
@@ -227,7 +248,7 @@ static pat_t *get_prerequisite(match_ctx_t *ctx, pat_t *pat)
case BP_REPLACE:
p = p->args.replace.pat; break;
case BP_REF: {
- if (++derefs > 10) return p;
+ if (++derefs > 10) return p; // In case of left recursion
pat_t *p2 = deref(ctx, p);
if (p2 == p) return p2;
p = p2;
@@ -259,7 +280,6 @@ static match_t *_next_match(match_ctx_t *ctx, const char *str, pat_t *pat, pat_t
// Clear the cache so it's not full of old cache values from different parts of the file:
cache_destroy(ctx);
- pat = deref(ctx, pat); // Avoid repeated lookups
pat_t *first = get_prerequisite(ctx, pat);
// Don't bother looping if this can only match at the start/end:
@@ -316,7 +336,7 @@ static match_t *match(match_ctx_t *ctx, const char *str, pat_t *pat)
// See: left-recursion.md for more details.
if (str == pat->args.leftrec.at) {
++pat->args.leftrec.visits;
- return pat->args.leftrec.match;
+ return clone_match(pat->args.leftrec.match);
} else {
return match(ctx, str, pat->args.leftrec.fallback);
}
@@ -602,6 +622,9 @@ static match_t *match(match_ctx_t *ctx, const char *str, pat_t *pat)
if (ref == NULL)
errx(EXIT_FAILURE, "Unknown identifier: '%.*s'", (int)pat->args.ref.len, pat->args.ref.name);
+ if (ref->type == BP_LEFTRECURSION)
+ return match(ctx, str, ref);
+
pat_t rec_op = {
.type = BP_LEFTRECURSION,
.start = ref->start, .end = ref->end,
@@ -627,15 +650,21 @@ static match_t *match(match_ctx_t *ctx, const char *str, pat_t *pat)
},
};
- match_t *m = NULL;
- // Keep matching as long as left recursion keeps happening and forward progress is made
- for (const char *prev = str; m == NULL || (rec_op.args.leftrec.visits > 0 && m->end > prev); prev = m->end) {
- rec_op.args.leftrec.match = m;
- rec_op.args.leftrec.visits = 0;
- match_t *m2 = match(&ctx2, str, ref);
- if (!m2) break;
- if (m) recycle_match(&m);
- m = m2;
+ match_t *m = match(&ctx2, str, ref);
+ // If left recursion was involved, keep retrying while forward progress can be made:
+ if (m && rec_op.args.leftrec.visits > 0) {
+ while (1) {
+ const char *prev = m->end;
+ rec_op.args.leftrec.match = m;
+ match_t *m2 = match(&ctx2, str, ref);
+ if (!m2) break;
+ if (m2->end <= prev) {
+ recycle_match(&m2);
+ break;
+ }
+ recycle_match(&m);
+ m = m2;
+ }
}
if (!m) {