Added strict mode for upto operator: ..=Abc
This commit is contained in:
parent
18e8a131f5
commit
ba6ee18ded
@ -41,8 +41,9 @@ Pattern | Meaning
|
|||||||
`"foo"`, `'foo'` | The literal string `foo`. There are no escape sequences within strings.
|
`"foo"`, `'foo'` | The literal string `foo`. There are no escape sequences within strings.
|
||||||
`pat1 pat2` | `pat1` followed by `pat2`
|
`pat1 pat2` | `pat1` followed by `pat2`
|
||||||
`pat1 / pat2` | `pat1` if it matches, otherwise `pat2`
|
`pat1 / pat2` | `pat1` if it matches, otherwise `pat2`
|
||||||
`..pat` | Any text up to and including `pat` (except newlines)
|
`.. pat` | Any text up to and including `pat` (except newlines)
|
||||||
`.. % skip pat` | Any text up to and including `pat` (except newlines), skipping over instances of `skip`
|
`.. % skip pat` | Any text up to and including `pat` (except newlines), skipping over instances of `skip`
|
||||||
|
`.. = repeat pat` | Any number of repetitions of `repeat` up to and including `pat`
|
||||||
`.` | Any single character (except newline)
|
`.` | Any single character (except newline)
|
||||||
`^^` | The start of the input
|
`^^` | The start of the input
|
||||||
`^` | The start of a line
|
`^` | The start of a line
|
||||||
|
9
bp.1
9
bp.1
@ -237,6 +237,15 @@ over instances of \f[I]skip\f[R]
|
|||||||
opening quote, up to closing quote, skipping over backslash followed by
|
opening quote, up to closing quote, skipping over backslash followed by
|
||||||
a single character)
|
a single character)
|
||||||
.TP
|
.TP
|
||||||
|
\f[B].. =\f[R] \f[I]only\f[R] \f[I]pat\f[R]
|
||||||
|
Any number of repetitions of the pattern \f[I]only\f[R] up to and
|
||||||
|
including \f[I]pat\f[R] (e.g.\ \f[B]\[dq]f\[dq] ..=abc \[dq]k\[dq]\f[R]
|
||||||
|
matches the letter \[lq]f\[rq] followed by some alphabetic characters
|
||||||
|
and then a \[lq]k\[rq], which would match \[lq]fork\[rq], but not
|
||||||
|
\[lq]free kit\[rq]) This is essentially a \[lq]non-greedy\[rq] version
|
||||||
|
of \f[B]*\f[R], and \f[B].. pat\f[R] can be thought of as the special
|
||||||
|
case of \f[B]..=. pat\f[R]
|
||||||
|
.TP
|
||||||
\f[B]<\f[R] \f[I]pat\f[R]
|
\f[B]<\f[R] \f[I]pat\f[R]
|
||||||
Matches at the current position if \f[I]pat\f[R] matches immediately
|
Matches at the current position if \f[I]pat\f[R] matches immediately
|
||||||
before the current position (lookbehind).
|
before the current position (lookbehind).
|
||||||
|
7
bp.1.md
7
bp.1.md
@ -207,6 +207,13 @@ etc.)
|
|||||||
of *skip* (e.g. `'"' ..%('\' .) '"'` opening quote, up to closing quote,
|
of *skip* (e.g. `'"' ..%('\' .) '"'` opening quote, up to closing quote,
|
||||||
skipping over backslash followed by a single character)
|
skipping over backslash followed by a single character)
|
||||||
|
|
||||||
|
`.. =` *only* *pat*
|
||||||
|
: Any number of repetitions of the pattern *only* up to and including *pat*
|
||||||
|
(e.g. `"f" ..=abc "k"` matches the letter "f" followed by some alphabetic
|
||||||
|
characters and then a "k", which would match "fork", but not "free kit") This
|
||||||
|
is essentially a "non-greedy" version of `*`, and `.. pat` can be thought of as
|
||||||
|
the special case of `..=. pat`
|
||||||
|
|
||||||
`<` *pat*
|
`<` *pat*
|
||||||
: Matches at the current position if *pat* matches immediately before the
|
: Matches at the current position if *pat* matches immediately before the
|
||||||
current position (lookbehind). Conceptually, you can think of this as creating
|
current position (lookbehind). Conceptually, you can think of this as creating
|
||||||
|
@ -51,7 +51,7 @@ Nodent: "\N"
|
|||||||
Word-boundary: "\b"
|
Word-boundary: "\b"
|
||||||
Identifier-char: "\i"
|
Identifier-char: "\i"
|
||||||
Identifier-start: "\I"
|
Identifier-start: "\I"
|
||||||
Upto-and: ".." [__`%__@second=simple-pat] [__@first=simple-pat]
|
Upto-and: ".." [__(`%/`=)__@second=simple-pat] [__@first=simple-pat]
|
||||||
Repeat: (
|
Repeat: (
|
||||||
@min=(=>'0') (`*=>"-") @max=(=>'∞')
|
@min=(=>'0') (`*=>"-") @max=(=>'∞')
|
||||||
/ @min=int __ `- __ @max=int
|
/ @min=int __ `- __ @max=int
|
||||||
|
4
match.c
4
match.c
@ -348,7 +348,7 @@ static match_t *match(def_t *defs, file_t *f, const char *str, pat_t *pat, bool
|
|||||||
}
|
}
|
||||||
return new_match(defs, pat, str, str, NULL);
|
return new_match(defs, pat, str, str, NULL);
|
||||||
}
|
}
|
||||||
case BP_UPTO: {
|
case BP_UPTO: case BP_UPTO_STRICT: {
|
||||||
match_t *m = new_match(defs, pat, str, str, NULL);
|
match_t *m = new_match(defs, pat, str, str, NULL);
|
||||||
pat_t *target = deref(defs, pat->args.multiple.first),
|
pat_t *target = deref(defs, pat->args.multiple.first),
|
||||||
*skip = deref(defs, pat->args.multiple.second);
|
*skip = deref(defs, pat->args.multiple.second);
|
||||||
@ -387,7 +387,7 @@ static match_t *match(def_t *defs, file_t *f, const char *str, pat_t *pat, bool
|
|||||||
// This isn't in the for() structure because there needs to
|
// This isn't in the for() structure because there needs to
|
||||||
// be at least once chance to match the pattern, even if
|
// be at least once chance to match the pattern, even if
|
||||||
// we're at the end of the string already (e.g. "..$").
|
// we're at the end of the string already (e.g. "..$").
|
||||||
if (str < f->end && *str != '\n')
|
if (str < f->end && *str != '\n' && pat->type != BP_UPTO_STRICT)
|
||||||
str = next_char(f, str);
|
str = next_char(f, str);
|
||||||
}
|
}
|
||||||
recycle_if_unused(&m);
|
recycle_if_unused(&m);
|
||||||
|
@ -153,7 +153,7 @@ pat_t *chain_together(file_t *f, pat_t *first, pat_t *second)
|
|||||||
// If `first` is an UPTO operator (..) or contains one, then let it know
|
// If `first` is an UPTO operator (..) or contains one, then let it know
|
||||||
// that `second` is what it's up *to*.
|
// that `second` is what it's up *to*.
|
||||||
for (pat_t *p = first; p; ) {
|
for (pat_t *p = first; p; ) {
|
||||||
if (p->type == BP_UPTO) {
|
if (p->type == BP_UPTO || p->type == BP_UPTO_STRICT) {
|
||||||
p->args.multiple.first = second;
|
p->args.multiple.first = second;
|
||||||
p->min_matchlen = second->min_matchlen;
|
p->min_matchlen = second->min_matchlen;
|
||||||
p->max_matchlen = -1;
|
p->max_matchlen = -1;
|
||||||
@ -238,13 +238,14 @@ static pat_t *_bp_simplepattern(file_t *f, const char *str)
|
|||||||
if (*str == '.') { // ".."
|
if (*str == '.') { // ".."
|
||||||
pat_t *skip = NULL;
|
pat_t *skip = NULL;
|
||||||
str = next_char(f, str);
|
str = next_char(f, str);
|
||||||
if (matchchar(&str, '%')) {
|
char skipper = *str;
|
||||||
|
if (matchchar(&str, '%') || matchchar(&str, '=')) {
|
||||||
skip = bp_simplepattern(f, str);
|
skip = bp_simplepattern(f, str);
|
||||||
if (!skip)
|
if (!skip)
|
||||||
file_err(f, str, str, "There should be a pattern to skip here after the '%%'");
|
file_err(f, str, str, "There should be a pattern to skip here after the '%c'", skipper);
|
||||||
str = skip->end;
|
str = skip->end;
|
||||||
}
|
}
|
||||||
pat_t *upto = new_pat(f, start, str, 0, -1, BP_UPTO);
|
pat_t *upto = new_pat(f, start, str, 0, -1, skipper == '=' ? BP_UPTO_STRICT : BP_UPTO);
|
||||||
upto->args.multiple.second = skip;
|
upto->args.multiple.second = skip;
|
||||||
return upto;
|
return upto;
|
||||||
} else {
|
} else {
|
||||||
|
37
types.h
37
types.h
@ -20,24 +20,25 @@ enum pattype_e {
|
|||||||
BP_RANGE = 5,
|
BP_RANGE = 5,
|
||||||
BP_NOT = 6,
|
BP_NOT = 6,
|
||||||
BP_UPTO = 7,
|
BP_UPTO = 7,
|
||||||
BP_REPEAT = 8,
|
BP_UPTO_STRICT = 8,
|
||||||
BP_BEFORE = 9,
|
BP_REPEAT = 9,
|
||||||
BP_AFTER = 10,
|
BP_BEFORE = 10,
|
||||||
BP_CAPTURE = 11,
|
BP_AFTER = 11,
|
||||||
BP_OTHERWISE = 12,
|
BP_CAPTURE = 12,
|
||||||
BP_CHAIN = 13,
|
BP_OTHERWISE = 13,
|
||||||
BP_MATCH = 14,
|
BP_CHAIN = 14,
|
||||||
BP_NOT_MATCH = 15,
|
BP_MATCH = 15,
|
||||||
BP_REPLACE = 16,
|
BP_NOT_MATCH = 16,
|
||||||
BP_REF = 17,
|
BP_REPLACE = 17,
|
||||||
BP_NODENT = 18,
|
BP_REF = 18,
|
||||||
BP_START_OF_FILE = 19,
|
BP_NODENT = 19,
|
||||||
BP_START_OF_LINE = 20,
|
BP_START_OF_FILE = 20,
|
||||||
BP_END_OF_FILE = 21,
|
BP_START_OF_LINE = 21,
|
||||||
BP_END_OF_LINE = 22,
|
BP_END_OF_FILE = 22,
|
||||||
BP_WORD_BOUNDARY = 23,
|
BP_END_OF_LINE = 23,
|
||||||
BP_LEFTRECURSION = 24,
|
BP_WORD_BOUNDARY = 24,
|
||||||
BP_ERROR = 25,
|
BP_LEFTRECURSION = 25,
|
||||||
|
BP_ERROR = 26,
|
||||||
};
|
};
|
||||||
|
|
||||||
struct match_s; // forward declared to resolve circular struct defs
|
struct match_s; // forward declared to resolve circular struct defs
|
||||||
|
Loading…
Reference in New Issue
Block a user