aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBruce Hill <bruce@bruce-hill.com>2020-12-14 18:11:33 -0800
committerBruce Hill <bruce@bruce-hill.com>2020-12-14 18:11:33 -0800
commitc43e4781763ee3f3f148e821a88e99c6b80c58db (patch)
tree53efc5bdee0f16d6e39e5fcae9888c0b21f06ad2
parent41639915dedfb9e6ec15e2851538b34d0562660b (diff)
Added % operator to ..
-rw-r--r--bp.122
-rw-r--r--compiler.c9
-rw-r--r--vm.c16
3 files changed, 34 insertions, 13 deletions
diff --git a/bp.1 b/bp.1
index 60a5e8f..03874ef 100644
--- a/bp.1
+++ b/bp.1
@@ -72,9 +72,6 @@ A chain of patterns, pronounced \fI<pat1>\fB-then-\fI<pat2>\fR
A series of ordered choices (if one pattern matches, the following patterns
will not be attempted), pronounced \fI<pat1>\fB-or-\fI<pat2>\fR
-.B ..
-Any text \fBup-to-and-including\fR the following pattern, if any (multiline: \fB...\fR)
-
.B .
\fBAny\fR character (multiline: $.)
@@ -102,10 +99,10 @@ The literal \fBcharacter-\fI<c>\fR
.B `\fI<c1>\fB-\fI<c2>\fR
The \fBcharacter-range-\fI<c1>\fB-to-\fI<c2>\fR
-.B \\\fI<esc>\fR
+.B \\\\\fI<esc>\fR
The \fBescape-sequence-\fI<esc>\fR (\fB\\n\fR, \fB\\x1F\fR, \fB\\033\fR, etc.)
-.B \\\fI<esc1>\fB-\fI<esc2>\fR
+.B \\\\\fI<esc1>\fB-\fI<esc2>\fR
The \fBescape-sequence-range-\fI<esc1>\fB-to-\fI<esc2>\fR
.B !\fI<pat>\fR
@@ -132,6 +129,13 @@ The \fBescape-sequence-range-\fI<esc1>\fB-to-\fI<esc2>\fR
\fI<repeating-pat>\fB-separated-by-\fI<sep>\fR (equivalent to \fI<pat>
\fB0+(\fI<sep><pat>\fB)\fR)
+.B .. \fI<pat>\fR
+Any text \fBup-to-and-including\fR \fI<pat>\fR (multiline: \fB...\fR)
+
+.B .. \fI<pat>\fB % \fI<skip>\fR
+Any text \fBup-to-and-including\fR \fI<pat>\fR, but skipping over instances of \fI<skip>\fR.
+E.g. \fB`"..`" % (`\\.)
+
.B <\fI<pat>\fR
\fBJust-after-\fI<pat>\fR (lookbehind)
@@ -151,12 +155,12 @@ be a string, and it may contain references to captured values: \fB@0\fR
\fB@[\fIfoo\fR]\fR (the capture named \fIfoo\fR in \fI<pat>\fR), etc.
.B \fI<pat1>\fB == \fI<pat2>\fR
-Will match only if \fI<pat1>\fR and \fI<pat2>\fR both match and have the exact
-same length. Pronounced \fI<pat1>\fB-assuming-it-equals-\fI<pat2>\fR
+Will match only if \fI<pat1>\fR matches and \fI<pat2>\fR matches the text of \fI<pat1>\fR's
+match. Pronounced \fI<pat1>\fB-if-it-matches-\fI<pat2>\fR
.B \fI<pat1>\fB != \fI<pat2>\fR
-Will match only if \fI<pat1>\fR matches, but \fI<pat2>\fR doesn't also match with the
-same length. Pronounced \fI<pat1>\fB-unless-it-equals-\fI<pat2>\fR
+Will match only if \fI<pat1>\fR matches and \fI<pat2>\fR doesn't match the text of
+\fI<pat1>\fR's match. Pronounced \fI<pat1>\fB-unless-it-matches-\fI<pat2>\fR
.B \fI<pat1>\fB != \fI<pat2>\fR
Will match only if \fI<pat1>\fR and \fI<pat2>\fR don't both match and have the
diff --git a/compiler.c b/compiler.c
index 48d0023..97b1737 100644
--- a/compiler.c
+++ b/compiler.c
@@ -117,9 +117,16 @@ vm_op_t *bpeg_simplepattern(file_t *f, const char *str)
vm_op_t *till = bpeg_simplepattern(f, str);
op->op = VM_UPTO_AND;
op->len = -1;
- op->args.pat = till;
+ op->args.multiple.first = till;
if (till)
str = till->end;
+ if (matchchar(&str, '%')) {
+ vm_op_t *skip = bpeg_simplepattern(f, str);
+ if (!skip)
+ file_err(f, str, str, "There should be a pattern to skip here after the '%%'");
+ op->args.multiple.second = skip;
+ str = skip->end;
+ }
break;
} else {
anychar:
diff --git a/vm.c b/vm.c
index 4f26d5c..ed151bf 100644
--- a/vm.c
+++ b/vm.c
@@ -144,15 +144,25 @@ static match_t *_match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, un
match_t *m = calloc(sizeof(match_t), 1);
m->start = str;
m->op = op;
- if (op->args.pat) {
+ if (op->args.multiple.first) {
+ match_t **dest = &m->child;
for (const char *prev = NULL; prev < str; ) {
prev = str;
- match_t *p = _match(g, f, str, op->args.pat, flags, rec);
+ match_t *p = _match(g, f, str, op->args.multiple.first, flags, rec);
if (p) {
- m->child = p;
+ *dest = p;
m->end = p->end;
return m;
}
+ if (op->args.multiple.second) {
+ p = _match(g, f, str, op->args.multiple.second, flags, rec);
+ if (p) {
+ *dest = p;
+ dest = &p->nextsibling;
+ str = p->end;
+ continue;
+ }
+ }
// This isn't in the for() structure because there needs to
// be at least once chance to match the pattern, even if
// we're at the end of the string already (e.g. "..$").