Added strict mode for upto operator: ..=Abc

author: Bruce Hill <bruce@bruce-hill.com> 2021-07-30 19:24:35 -0700
committer: Bruce Hill <bruce@bruce-hill.com> 2021-07-30 19:24:35 -0700
commit: ba6ee18ded5e76e852dd7eab89e6cc2b420b42d2 (patch)
tree: 8a238dbfcf91766ee3b882d8c87a9587ca107119
parent: 18e8a131f58a54008512b05062382900027bf1d9 (diff)
7 files changed, 45 insertions, 26 deletions
diff --git a/README.md b/README.md
index 3cfa038..4d4445a 100644
--- a/README.md
+++ b/README.md
@@ -41,8 +41,9 @@ Pattern            | Meaning
 `"foo"`, `'foo'`   | The literal string `foo`. There are no escape sequences within strings.
 `pat1 pat2`        | `pat1` followed by `pat2`
 `pat1 / pat2`      | `pat1` if it matches, otherwise `pat2`
-`..pat`            | Any text up to and including `pat` (except newlines)
+`.. pat`           | Any text up to and including `pat` (except newlines)
 `.. % skip pat`    | Any text up to and including `pat` (except newlines), skipping over instances of `skip`
+`.. = repeat pat`  | Any number of repetitions of `repeat` up to and including `pat`
 `.`                | Any single character (except newline)
 `^^`               | The start of the input
 `^`                | The start of a line
diff --git a/bp.1 b/bp.1
index 77966c0..8bab758 100644
--- a/bp.1
+++ b/bp.1
@@ -237,6 +237,15 @@ over instances of \f[I]skip\f[R]
 opening quote, up to closing quote, skipping over backslash followed by
 a single character)
 .TP
+\f[B].. =\f[R] \f[I]only\f[R] \f[I]pat\f[R]
+Any number of repetitions of the pattern \f[I]only\f[R] up to and
+including \f[I]pat\f[R] (e.g.\ \f[B]\[dq]f\[dq] ..=abc \[dq]k\[dq]\f[R]
+matches the letter \[lq]f\[rq] followed by some alphabetic characters
+and then a \[lq]k\[rq], which would match \[lq]fork\[rq], but not
+\[lq]free kit\[rq]) This is essentially a \[lq]non-greedy\[rq] version
+of \f[B]*\f[R], and \f[B].. pat\f[R] can be thought of as the special
+case of \f[B]..=. pat\f[R]
+.TP
 \f[B]<\f[R] \f[I]pat\f[R]
 Matches at the current position if \f[I]pat\f[R] matches immediately
 before the current position (lookbehind).
diff --git a/bp.1.md b/bp.1.md
index 60a51a9..1e02264 100644
--- a/bp.1.md
+++ b/bp.1.md
@@ -207,6 +207,13 @@ etc.)
 of *skip* (e.g. `'"' ..%('\' .) '"'` opening quote, up to closing quote,
 skipping over backslash followed by a single character)
 
+`.. =` *only* *pat*
+: Any number of repetitions of the pattern *only* up to and including *pat*
+(e.g. `"f" ..=abc "k"` matches the letter "f" followed by some alphabetic
+characters and then a "k", which would match "fork", but not "free kit") This
+is essentially a "non-greedy" version of `*`, and `.. pat` can be thought of as
+the special case of `..=. pat`
+
 `<` *pat*
 : Matches at the current position if *pat* matches immediately before the
 current position (lookbehind). Conceptually, you can think of this as creating
diff --git a/grammars/bp.bp b/grammars/bp.bp
index 37fa83f..977fce8 100644
--- a/grammars/bp.bp
+++ b/grammars/bp.bp
@@ -51,7 +51,7 @@ Nodent: "\N"
 Word-boundary: "\b"
 Identifier-char: "\i"
 Identifier-start: "\I"
-Upto-and: ".." [__`%__@second=simple-pat] [__@first=simple-pat] 
+Upto-and: ".." [__(`%/`=)__@second=simple-pat] [__@first=simple-pat] 
 Repeat: (
         @min=(=>'0') (`*=>"-") @max=(=>'∞')
       / @min=int __ `- __ @max=int
diff --git a/match.c b/match.c
index 8bcead7..b594c80 100644
--- a/match.c
+++ b/match.c
@@ -348,7 +348,7 @@ static match_t *match(def_t *defs, file_t *f, const char *str, pat_t *pat, bool
             }
             return new_match(defs, pat, str, str, NULL);
         }
-        case BP_UPTO: {
+        case BP_UPTO: case BP_UPTO_STRICT: {
             match_t *m = new_match(defs, pat, str, str, NULL);
             pat_t *target = deref(defs, pat->args.multiple.first),
                   *skip = deref(defs, pat->args.multiple.second);
@@ -387,7 +387,7 @@ static match_t *match(def_t *defs, file_t *f, const char *str, pat_t *pat, bool
                 // This isn't in the for() structure because there needs to
                 // be at least once chance to match the pattern, even if
                 // we're at the end of the string already (e.g. "..$").
-                if (str < f->end && *str != '\n')
+                if (str < f->end && *str != '\n' && pat->type != BP_UPTO_STRICT)
                     str = next_char(f, str);
             }
             recycle_if_unused(&m);
diff --git a/pattern.c b/pattern.c
index 70c88d9..861efdf 100644
--- a/pattern.c
+++ b/pattern.c
@@ -153,7 +153,7 @@ pat_t *chain_together(file_t *f, pat_t *first, pat_t *second)
     // If `first` is an UPTO operator (..) or contains one, then let it know
     // that `second` is what it's up *to*.
     for (pat_t *p = first; p; ) {
-        if (p->type == BP_UPTO) {
+        if (p->type == BP_UPTO || p->type == BP_UPTO_STRICT) {
             p->args.multiple.first = second;
             p->min_matchlen = second->min_matchlen;
             p->max_matchlen = -1;
@@ -238,13 +238,14 @@ static pat_t *_bp_simplepattern(file_t *f, const char *str)
             if (*str == '.') { // ".."
                 pat_t *skip = NULL;
                 str = next_char(f, str);
-                if (matchchar(&str, '%')) {
+                char skipper = *str;
+                if (matchchar(&str, '%') || matchchar(&str, '=')) {
                     skip = bp_simplepattern(f, str);
                     if (!skip)
-                        file_err(f, str, str, "There should be a pattern to skip here after the '%%'");
+                        file_err(f, str, str, "There should be a pattern to skip here after the '%c'", skipper);
                     str = skip->end;
                 }
-                pat_t *upto = new_pat(f, start, str, 0, -1, BP_UPTO);
+                pat_t *upto = new_pat(f, start, str, 0, -1, skipper == '=' ? BP_UPTO_STRICT : BP_UPTO);
                 upto->args.multiple.second = skip;
                 return upto;
             } else {
diff --git a/types.h b/types.h
index 158178d..bb491bc 100644
--- a/types.h
+++ b/types.h
@@ -20,24 +20,25 @@ enum pattype_e {
     BP_RANGE         = 5,
     BP_NOT           = 6,
     BP_UPTO          = 7,
-    BP_REPEAT        = 8,
-    BP_BEFORE        = 9,
-    BP_AFTER         = 10,
-    BP_CAPTURE       = 11,
-    BP_OTHERWISE     = 12,
-    BP_CHAIN         = 13,
-    BP_MATCH         = 14,
-    BP_NOT_MATCH     = 15,
-    BP_REPLACE       = 16,
-    BP_REF           = 17,
-    BP_NODENT        = 18,
-    BP_START_OF_FILE = 19,
-    BP_START_OF_LINE = 20,
-    BP_END_OF_FILE   = 21,
-    BP_END_OF_LINE   = 22,
-    BP_WORD_BOUNDARY = 23,
-    BP_LEFTRECURSION = 24,
-    BP_ERROR         = 25,
+    BP_UPTO_STRICT   = 8,
+    BP_REPEAT        = 9,
+    BP_BEFORE        = 10,
+    BP_AFTER         = 11,
+    BP_CAPTURE       = 12,
+    BP_OTHERWISE     = 13,
+    BP_CHAIN         = 14,
+    BP_MATCH         = 15,
+    BP_NOT_MATCH     = 16,
+    BP_REPLACE       = 17,
+    BP_REF           = 18,
+    BP_NODENT        = 19,
+    BP_START_OF_FILE = 20,
+    BP_START_OF_LINE = 21,
+    BP_END_OF_FILE   = 22,
+    BP_END_OF_LINE   = 23,
+    BP_WORD_BOUNDARY = 24,
+    BP_LEFTRECURSION = 25,
+    BP_ERROR         = 26,
 };
 
 struct match_s; // forward declared to resolve circular struct defs
author	Bruce Hill <bruce@bruce-hill.com>	2021-07-30 19:24:35 -0700
committer	Bruce Hill <bruce@bruce-hill.com>	2021-07-30 19:24:35 -0700
commit	ba6ee18ded5e76e852dd7eab89e6cc2b420b42d2 (patch)
tree	8a238dbfcf91766ee3b882d8c87a9587ca107119
parent	18e8a131f58a54008512b05062382900027bf1d9 (diff)