aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBruce Hill <bruce@bruce-hill.com>2021-07-30 19:24:35 -0700
committerBruce Hill <bruce@bruce-hill.com>2021-07-30 19:24:35 -0700
commitba6ee18ded5e76e852dd7eab89e6cc2b420b42d2 (patch)
tree8a238dbfcf91766ee3b882d8c87a9587ca107119
parent18e8a131f58a54008512b05062382900027bf1d9 (diff)
Added strict mode for upto operator: ..=Abc
-rw-r--r--README.md3
-rw-r--r--bp.19
-rw-r--r--bp.1.md7
-rw-r--r--grammars/bp.bp2
-rw-r--r--match.c4
-rw-r--r--pattern.c9
-rw-r--r--types.h37
7 files changed, 45 insertions, 26 deletions
diff --git a/README.md b/README.md
index 3cfa038..4d4445a 100644
--- a/README.md
+++ b/README.md
@@ -41,8 +41,9 @@ Pattern | Meaning
`"foo"`, `'foo'` | The literal string `foo`. There are no escape sequences within strings.
`pat1 pat2` | `pat1` followed by `pat2`
`pat1 / pat2` | `pat1` if it matches, otherwise `pat2`
-`..pat` | Any text up to and including `pat` (except newlines)
+`.. pat` | Any text up to and including `pat` (except newlines)
`.. % skip pat` | Any text up to and including `pat` (except newlines), skipping over instances of `skip`
+`.. = repeat pat` | Any number of repetitions of `repeat` up to and including `pat`
`.` | Any single character (except newline)
`^^` | The start of the input
`^` | The start of a line
diff --git a/bp.1 b/bp.1
index 77966c0..8bab758 100644
--- a/bp.1
+++ b/bp.1
@@ -237,6 +237,15 @@ over instances of \f[I]skip\f[R]
opening quote, up to closing quote, skipping over backslash followed by
a single character)
.TP
+\f[B].. =\f[R] \f[I]only\f[R] \f[I]pat\f[R]
+Any number of repetitions of the pattern \f[I]only\f[R] up to and
+including \f[I]pat\f[R] (e.g.\ \f[B]\[dq]f\[dq] ..=abc \[dq]k\[dq]\f[R]
+matches the letter \[lq]f\[rq] followed by some alphabetic characters
+and then a \[lq]k\[rq], which would match \[lq]fork\[rq], but not
+\[lq]free kit\[rq]) This is essentially a \[lq]non-greedy\[rq] version
+of \f[B]*\f[R], and \f[B].. pat\f[R] can be thought of as the special
+case of \f[B]..=. pat\f[R]
+.TP
\f[B]<\f[R] \f[I]pat\f[R]
Matches at the current position if \f[I]pat\f[R] matches immediately
before the current position (lookbehind).
diff --git a/bp.1.md b/bp.1.md
index 60a51a9..1e02264 100644
--- a/bp.1.md
+++ b/bp.1.md
@@ -207,6 +207,13 @@ etc.)
of *skip* (e.g. `'"' ..%('\' .) '"'` opening quote, up to closing quote,
skipping over backslash followed by a single character)
+`.. =` *only* *pat*
+: Any number of repetitions of the pattern *only* up to and including *pat*
+(e.g. `"f" ..=abc "k"` matches the letter "f" followed by some alphabetic
+characters and then a "k", which would match "fork", but not "free kit") This
+is essentially a "non-greedy" version of `*`, and `.. pat` can be thought of as
+the special case of `..=. pat`
+
`<` *pat*
: Matches at the current position if *pat* matches immediately before the
current position (lookbehind). Conceptually, you can think of this as creating
diff --git a/grammars/bp.bp b/grammars/bp.bp
index 37fa83f..977fce8 100644
--- a/grammars/bp.bp
+++ b/grammars/bp.bp
@@ -51,7 +51,7 @@ Nodent: "\N"
Word-boundary: "\b"
Identifier-char: "\i"
Identifier-start: "\I"
-Upto-and: ".." [__`%__@second=simple-pat] [__@first=simple-pat]
+Upto-and: ".." [__(`%/`=)__@second=simple-pat] [__@first=simple-pat]
Repeat: (
@min=(=>'0') (`*=>"-") @max=(=>'∞')
/ @min=int __ `- __ @max=int
diff --git a/match.c b/match.c
index 8bcead7..b594c80 100644
--- a/match.c
+++ b/match.c
@@ -348,7 +348,7 @@ static match_t *match(def_t *defs, file_t *f, const char *str, pat_t *pat, bool
}
return new_match(defs, pat, str, str, NULL);
}
- case BP_UPTO: {
+ case BP_UPTO: case BP_UPTO_STRICT: {
match_t *m = new_match(defs, pat, str, str, NULL);
pat_t *target = deref(defs, pat->args.multiple.first),
*skip = deref(defs, pat->args.multiple.second);
@@ -387,7 +387,7 @@ static match_t *match(def_t *defs, file_t *f, const char *str, pat_t *pat, bool
// This isn't in the for() structure because there needs to
// be at least once chance to match the pattern, even if
// we're at the end of the string already (e.g. "..$").
- if (str < f->end && *str != '\n')
+ if (str < f->end && *str != '\n' && pat->type != BP_UPTO_STRICT)
str = next_char(f, str);
}
recycle_if_unused(&m);
diff --git a/pattern.c b/pattern.c
index 70c88d9..861efdf 100644
--- a/pattern.c
+++ b/pattern.c
@@ -153,7 +153,7 @@ pat_t *chain_together(file_t *f, pat_t *first, pat_t *second)
// If `first` is an UPTO operator (..) or contains one, then let it know
// that `second` is what it's up *to*.
for (pat_t *p = first; p; ) {
- if (p->type == BP_UPTO) {
+ if (p->type == BP_UPTO || p->type == BP_UPTO_STRICT) {
p->args.multiple.first = second;
p->min_matchlen = second->min_matchlen;
p->max_matchlen = -1;
@@ -238,13 +238,14 @@ static pat_t *_bp_simplepattern(file_t *f, const char *str)
if (*str == '.') { // ".."
pat_t *skip = NULL;
str = next_char(f, str);
- if (matchchar(&str, '%')) {
+ char skipper = *str;
+ if (matchchar(&str, '%') || matchchar(&str, '=')) {
skip = bp_simplepattern(f, str);
if (!skip)
- file_err(f, str, str, "There should be a pattern to skip here after the '%%'");
+ file_err(f, str, str, "There should be a pattern to skip here after the '%c'", skipper);
str = skip->end;
}
- pat_t *upto = new_pat(f, start, str, 0, -1, BP_UPTO);
+ pat_t *upto = new_pat(f, start, str, 0, -1, skipper == '=' ? BP_UPTO_STRICT : BP_UPTO);
upto->args.multiple.second = skip;
return upto;
} else {
diff --git a/types.h b/types.h
index 158178d..bb491bc 100644
--- a/types.h
+++ b/types.h
@@ -20,24 +20,25 @@ enum pattype_e {
BP_RANGE = 5,
BP_NOT = 6,
BP_UPTO = 7,
- BP_REPEAT = 8,
- BP_BEFORE = 9,
- BP_AFTER = 10,
- BP_CAPTURE = 11,
- BP_OTHERWISE = 12,
- BP_CHAIN = 13,
- BP_MATCH = 14,
- BP_NOT_MATCH = 15,
- BP_REPLACE = 16,
- BP_REF = 17,
- BP_NODENT = 18,
- BP_START_OF_FILE = 19,
- BP_START_OF_LINE = 20,
- BP_END_OF_FILE = 21,
- BP_END_OF_LINE = 22,
- BP_WORD_BOUNDARY = 23,
- BP_LEFTRECURSION = 24,
- BP_ERROR = 25,
+ BP_UPTO_STRICT = 8,
+ BP_REPEAT = 9,
+ BP_BEFORE = 10,
+ BP_AFTER = 11,
+ BP_CAPTURE = 12,
+ BP_OTHERWISE = 13,
+ BP_CHAIN = 14,
+ BP_MATCH = 15,
+ BP_NOT_MATCH = 16,
+ BP_REPLACE = 17,
+ BP_REF = 18,
+ BP_NODENT = 19,
+ BP_START_OF_FILE = 20,
+ BP_START_OF_LINE = 21,
+ BP_END_OF_FILE = 22,
+ BP_END_OF_LINE = 23,
+ BP_WORD_BOUNDARY = 24,
+ BP_LEFTRECURSION = 25,
+ BP_ERROR = 26,
};
struct match_s; // forward declared to resolve circular struct defs