From c43e4781763ee3f3f148e821a88e99c6b80c58db Mon Sep 17 00:00:00 2001
From: Bruce Hill <bruce@bruce-hill.com>
Date: Mon, 14 Dec 2020 18:11:33 -0800
Subject: Added % operator to ..

---
 bp.1       | 22 +++++++++++++---------
 compiler.c |  9 ++++++++-
 vm.c       | 16 +++++++++++++---
 3 files changed, 34 insertions(+), 13 deletions(-)
diff --git a/bp.1 b/bp.1
index 60a5e8f..03874ef 100644
--- a/bp.1
+++ b/bp.1
@@ -72,9 +72,6 @@ A chain of patterns, pronounced \fI<pat1>\fB-then-\fI<pat2>\fR
 A series of ordered choices (if one pattern matches, the following patterns
 will not be attempted), pronounced \fI<pat1>\fB-or-\fI<pat2>\fR
 
-.B ..
-Any text \fBup-to-and-including\fR the following pattern, if any (multiline: \fB...\fR)
-
 .B .
 \fBAny\fR character (multiline: $.)
 
@@ -102,10 +99,10 @@ The literal \fBcharacter-\fI<c>\fR
 .B `\fI<c1>\fB-\fI<c2>\fR
 The \fBcharacter-range-\fI<c1>\fB-to-\fI<c2>\fR
 
-.B \\\fI<esc>\fR
+.B \\\\\fI<esc>\fR
 The \fBescape-sequence-\fI<esc>\fR (\fB\\n\fR, \fB\\x1F\fR, \fB\\033\fR, etc.)
 
-.B \\\fI<esc1>\fB-\fI<esc2>\fR
+.B \\\\\fI<esc1>\fB-\fI<esc2>\fR
 The \fBescape-sequence-range-\fI<esc1>\fB-to-\fI<esc2>\fR
 
 .B !\fI<pat>\fR
@@ -132,6 +129,13 @@ The \fBescape-sequence-range-\fI<esc1>\fB-to-\fI<esc2>\fR
 \fI<repeating-pat>\fB-separated-by-\fI<sep>\fR (equivalent to \fI<pat>
 \fB0+(\fI<sep><pat>\fB)\fR)
 
+.B .. \fI<pat>\fR
+Any text \fBup-to-and-including\fR \fI<pat>\fR (multiline: \fB...\fR)
+
+.B .. \fI<pat>\fB % \fI<skip>\fR
+Any text \fBup-to-and-including\fR \fI<pat>\fR, but skipping over instances of \fI<skip>\fR.
+E.g. \fB`"..`" % (`\\.)
+
 .B <\fI<pat>\fR
 \fBJust-after-\fI<pat>\fR (lookbehind)
 
@@ -151,12 +155,12 @@ be a string, and it may contain references to captured values: \fB@0\fR
 \fB@[\fIfoo\fR]\fR (the capture named \fIfoo\fR in \fI<pat>\fR), etc.
 
 .B \fI<pat1>\fB == \fI<pat2>\fR
-Will match only if \fI<pat1>\fR and \fI<pat2>\fR both match and have the exact
-same length. Pronounced \fI<pat1>\fB-assuming-it-equals-\fI<pat2>\fR
+Will match only if \fI<pat1>\fR matches and \fI<pat2>\fR matches the text of \fI<pat1>\fR's
+match. Pronounced \fI<pat1>\fB-if-it-matches-\fI<pat2>\fR
 
 .B \fI<pat1>\fB != \fI<pat2>\fR
-Will match only if \fI<pat1>\fR matches, but \fI<pat2>\fR doesn't also match with the
-same length. Pronounced \fI<pat1>\fB-unless-it-equals-\fI<pat2>\fR
+Will match only if \fI<pat1>\fR matches and \fI<pat2>\fR doesn't match the text of
+\fI<pat1>\fR's match. Pronounced \fI<pat1>\fB-unless-it-matches-\fI<pat2>\fR
 
 .B \fI<pat1>\fB != \fI<pat2>\fR
 Will match only if \fI<pat1>\fR and \fI<pat2>\fR don't both match and have the
diff --git a/compiler.c b/compiler.c
index 48d0023..97b1737 100644
--- a/compiler.c
+++ b/compiler.c
@@ -117,9 +117,16 @@ vm_op_t *bpeg_simplepattern(file_t *f, const char *str)
                 vm_op_t *till = bpeg_simplepattern(f, str);
                 op->op = VM_UPTO_AND;
                 op->len = -1;
-                op->args.pat = till;
+                op->args.multiple.first = till;
                 if (till)
                     str = till->end;
+                if (matchchar(&str, '%')) {
+                    vm_op_t *skip = bpeg_simplepattern(f, str);
+                    if (!skip)
+                        file_err(f, str, str, "There should be a pattern to skip here after the '%%'");
+                    op->args.multiple.second = skip;
+                    str = skip->end;
+                }
                 break;
             } else {
               anychar:
diff --git a/vm.c b/vm.c
index 4f26d5c..ed151bf 100644
--- a/vm.c
+++ b/vm.c
@@ -144,15 +144,25 @@ static match_t *_match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, un
             match_t *m = calloc(sizeof(match_t), 1);
             m->start = str;
             m->op = op;
-            if (op->args.pat) {
+            if (op->args.multiple.first) {
+                match_t **dest = &m->child;
                 for (const char *prev = NULL; prev < str; ) {
                     prev = str;
-                    match_t *p = _match(g, f, str, op->args.pat, flags, rec);
+                    match_t *p = _match(g, f, str, op->args.multiple.first, flags, rec);
                     if (p) {
-                        m->child = p;
+                        *dest = p;
                         m->end = p->end;
                         return m;
                     }
+                    if (op->args.multiple.second) {
+                        p = _match(g, f, str, op->args.multiple.second, flags, rec);
+                        if (p) {
+                            *dest = p;
+                            dest = &p->nextsibling;
+                            str = p->end;
+                            continue;
+                        }
+                    }
                     // This isn't in the for() structure because there needs to
                     // be at least once chance to match the pattern, even if
                     // we're at the end of the string already (e.g. "..$").
-- 
cgit v1.2.3