From ba6ee18ded5e76e852dd7eab89e6cc2b420b42d2 Mon Sep 17 00:00:00 2001 From: Bruce Hill Date: Fri, 30 Jul 2021 19:24:35 -0700 Subject: [PATCH] Added strict mode for upto operator: ..=Abc --- README.md | 3 ++- bp.1 | 9 +++++++++ bp.1.md | 7 +++++++ grammars/bp.bp | 2 +- match.c | 4 ++-- pattern.c | 9 +++++---- types.h | 37 +++++++++++++++++++------------------ 7 files changed, 45 insertions(+), 26 deletions(-) diff --git a/README.md b/README.md index 3cfa038..4d4445a 100644 --- a/README.md +++ b/README.md @@ -41,8 +41,9 @@ Pattern | Meaning `"foo"`, `'foo'` | The literal string `foo`. There are no escape sequences within strings. `pat1 pat2` | `pat1` followed by `pat2` `pat1 / pat2` | `pat1` if it matches, otherwise `pat2` -`..pat` | Any text up to and including `pat` (except newlines) +`.. pat` | Any text up to and including `pat` (except newlines) `.. % skip pat` | Any text up to and including `pat` (except newlines), skipping over instances of `skip` +`.. = repeat pat` | Any number of repetitions of `repeat` up to and including `pat` `.` | Any single character (except newline) `^^` | The start of the input `^` | The start of a line diff --git a/bp.1 b/bp.1 index 77966c0..8bab758 100644 --- a/bp.1 +++ b/bp.1 @@ -237,6 +237,15 @@ over instances of \f[I]skip\f[R] opening quote, up to closing quote, skipping over backslash followed by a single character) .TP +\f[B].. =\f[R] \f[I]only\f[R] \f[I]pat\f[R] +Any number of repetitions of the pattern \f[I]only\f[R] up to and +including \f[I]pat\f[R] (e.g.\ \f[B]\[dq]f\[dq] ..=abc \[dq]k\[dq]\f[R] +matches the letter \[lq]f\[rq] followed by some alphabetic characters +and then a \[lq]k\[rq], which would match \[lq]fork\[rq], but not +\[lq]free kit\[rq]) This is essentially a \[lq]non-greedy\[rq] version +of \f[B]*\f[R], and \f[B].. pat\f[R] can be thought of as the special +case of \f[B]..=. pat\f[R] +.TP \f[B]<\f[R] \f[I]pat\f[R] Matches at the current position if \f[I]pat\f[R] matches immediately before the current position (lookbehind). diff --git a/bp.1.md b/bp.1.md index 60a51a9..1e02264 100644 --- a/bp.1.md +++ b/bp.1.md @@ -207,6 +207,13 @@ etc.) of *skip* (e.g. `'"' ..%('\' .) '"'` opening quote, up to closing quote, skipping over backslash followed by a single character) +`.. =` *only* *pat* +: Any number of repetitions of the pattern *only* up to and including *pat* +(e.g. `"f" ..=abc "k"` matches the letter "f" followed by some alphabetic +characters and then a "k", which would match "fork", but not "free kit") This +is essentially a "non-greedy" version of `*`, and `.. pat` can be thought of as +the special case of `..=. pat` + `<` *pat* : Matches at the current position if *pat* matches immediately before the current position (lookbehind). Conceptually, you can think of this as creating diff --git a/grammars/bp.bp b/grammars/bp.bp index 37fa83f..977fce8 100644 --- a/grammars/bp.bp +++ b/grammars/bp.bp @@ -51,7 +51,7 @@ Nodent: "\N" Word-boundary: "\b" Identifier-char: "\i" Identifier-start: "\I" -Upto-and: ".." [__`%__@second=simple-pat] [__@first=simple-pat] +Upto-and: ".." [__(`%/`=)__@second=simple-pat] [__@first=simple-pat] Repeat: ( @min=(=>'0') (`*=>"-") @max=(=>'∞') / @min=int __ `- __ @max=int diff --git a/match.c b/match.c index 8bcead7..b594c80 100644 --- a/match.c +++ b/match.c @@ -348,7 +348,7 @@ static match_t *match(def_t *defs, file_t *f, const char *str, pat_t *pat, bool } return new_match(defs, pat, str, str, NULL); } - case BP_UPTO: { + case BP_UPTO: case BP_UPTO_STRICT: { match_t *m = new_match(defs, pat, str, str, NULL); pat_t *target = deref(defs, pat->args.multiple.first), *skip = deref(defs, pat->args.multiple.second); @@ -387,7 +387,7 @@ static match_t *match(def_t *defs, file_t *f, const char *str, pat_t *pat, bool // This isn't in the for() structure because there needs to // be at least once chance to match the pattern, even if // we're at the end of the string already (e.g. "..$"). - if (str < f->end && *str != '\n') + if (str < f->end && *str != '\n' && pat->type != BP_UPTO_STRICT) str = next_char(f, str); } recycle_if_unused(&m); diff --git a/pattern.c b/pattern.c index 70c88d9..861efdf 100644 --- a/pattern.c +++ b/pattern.c @@ -153,7 +153,7 @@ pat_t *chain_together(file_t *f, pat_t *first, pat_t *second) // If `first` is an UPTO operator (..) or contains one, then let it know // that `second` is what it's up *to*. for (pat_t *p = first; p; ) { - if (p->type == BP_UPTO) { + if (p->type == BP_UPTO || p->type == BP_UPTO_STRICT) { p->args.multiple.first = second; p->min_matchlen = second->min_matchlen; p->max_matchlen = -1; @@ -238,13 +238,14 @@ static pat_t *_bp_simplepattern(file_t *f, const char *str) if (*str == '.') { // ".." pat_t *skip = NULL; str = next_char(f, str); - if (matchchar(&str, '%')) { + char skipper = *str; + if (matchchar(&str, '%') || matchchar(&str, '=')) { skip = bp_simplepattern(f, str); if (!skip) - file_err(f, str, str, "There should be a pattern to skip here after the '%%'"); + file_err(f, str, str, "There should be a pattern to skip here after the '%c'", skipper); str = skip->end; } - pat_t *upto = new_pat(f, start, str, 0, -1, BP_UPTO); + pat_t *upto = new_pat(f, start, str, 0, -1, skipper == '=' ? BP_UPTO_STRICT : BP_UPTO); upto->args.multiple.second = skip; return upto; } else { diff --git a/types.h b/types.h index 158178d..bb491bc 100644 --- a/types.h +++ b/types.h @@ -20,24 +20,25 @@ enum pattype_e { BP_RANGE = 5, BP_NOT = 6, BP_UPTO = 7, - BP_REPEAT = 8, - BP_BEFORE = 9, - BP_AFTER = 10, - BP_CAPTURE = 11, - BP_OTHERWISE = 12, - BP_CHAIN = 13, - BP_MATCH = 14, - BP_NOT_MATCH = 15, - BP_REPLACE = 16, - BP_REF = 17, - BP_NODENT = 18, - BP_START_OF_FILE = 19, - BP_START_OF_LINE = 20, - BP_END_OF_FILE = 21, - BP_END_OF_LINE = 22, - BP_WORD_BOUNDARY = 23, - BP_LEFTRECURSION = 24, - BP_ERROR = 25, + BP_UPTO_STRICT = 8, + BP_REPEAT = 9, + BP_BEFORE = 10, + BP_AFTER = 11, + BP_CAPTURE = 12, + BP_OTHERWISE = 13, + BP_CHAIN = 14, + BP_MATCH = 15, + BP_NOT_MATCH = 16, + BP_REPLACE = 17, + BP_REF = 18, + BP_NODENT = 19, + BP_START_OF_FILE = 20, + BP_START_OF_LINE = 21, + BP_END_OF_FILE = 22, + BP_END_OF_LINE = 23, + BP_WORD_BOUNDARY = 24, + BP_LEFTRECURSION = 25, + BP_ERROR = 26, }; struct match_s; // forward declared to resolve circular struct defs