Added % operator to ..
This commit is contained in:
parent
41639915de
commit
c43e478176
22
bp.1
22
bp.1
@ -72,9 +72,6 @@ A chain of patterns, pronounced \fI<pat1>\fB-then-\fI<pat2>\fR
|
||||
A series of ordered choices (if one pattern matches, the following patterns
|
||||
will not be attempted), pronounced \fI<pat1>\fB-or-\fI<pat2>\fR
|
||||
|
||||
.B ..
|
||||
Any text \fBup-to-and-including\fR the following pattern, if any (multiline: \fB...\fR)
|
||||
|
||||
.B .
|
||||
\fBAny\fR character (multiline: $.)
|
||||
|
||||
@ -102,10 +99,10 @@ The literal \fBcharacter-\fI<c>\fR
|
||||
.B `\fI<c1>\fB-\fI<c2>\fR
|
||||
The \fBcharacter-range-\fI<c1>\fB-to-\fI<c2>\fR
|
||||
|
||||
.B \\\fI<esc>\fR
|
||||
.B \\\\\fI<esc>\fR
|
||||
The \fBescape-sequence-\fI<esc>\fR (\fB\\n\fR, \fB\\x1F\fR, \fB\\033\fR, etc.)
|
||||
|
||||
.B \\\fI<esc1>\fB-\fI<esc2>\fR
|
||||
.B \\\\\fI<esc1>\fB-\fI<esc2>\fR
|
||||
The \fBescape-sequence-range-\fI<esc1>\fB-to-\fI<esc2>\fR
|
||||
|
||||
.B !\fI<pat>\fR
|
||||
@ -132,6 +129,13 @@ The \fBescape-sequence-range-\fI<esc1>\fB-to-\fI<esc2>\fR
|
||||
\fI<repeating-pat>\fB-separated-by-\fI<sep>\fR (equivalent to \fI<pat>
|
||||
\fB0+(\fI<sep><pat>\fB)\fR)
|
||||
|
||||
.B .. \fI<pat>\fR
|
||||
Any text \fBup-to-and-including\fR \fI<pat>\fR (multiline: \fB...\fR)
|
||||
|
||||
.B .. \fI<pat>\fB % \fI<skip>\fR
|
||||
Any text \fBup-to-and-including\fR \fI<pat>\fR, but skipping over instances of \fI<skip>\fR.
|
||||
E.g. \fB`"..`" % (`\\.)
|
||||
|
||||
.B <\fI<pat>\fR
|
||||
\fBJust-after-\fI<pat>\fR (lookbehind)
|
||||
|
||||
@ -151,12 +155,12 @@ be a string, and it may contain references to captured values: \fB@0\fR
|
||||
\fB@[\fIfoo\fR]\fR (the capture named \fIfoo\fR in \fI<pat>\fR), etc.
|
||||
|
||||
.B \fI<pat1>\fB == \fI<pat2>\fR
|
||||
Will match only if \fI<pat1>\fR and \fI<pat2>\fR both match and have the exact
|
||||
same length. Pronounced \fI<pat1>\fB-assuming-it-equals-\fI<pat2>\fR
|
||||
Will match only if \fI<pat1>\fR matches and \fI<pat2>\fR matches the text of \fI<pat1>\fR's
|
||||
match. Pronounced \fI<pat1>\fB-if-it-matches-\fI<pat2>\fR
|
||||
|
||||
.B \fI<pat1>\fB != \fI<pat2>\fR
|
||||
Will match only if \fI<pat1>\fR matches, but \fI<pat2>\fR doesn't also match with the
|
||||
same length. Pronounced \fI<pat1>\fB-unless-it-equals-\fI<pat2>\fR
|
||||
Will match only if \fI<pat1>\fR matches and \fI<pat2>\fR doesn't match the text of
|
||||
\fI<pat1>\fR's match. Pronounced \fI<pat1>\fB-unless-it-matches-\fI<pat2>\fR
|
||||
|
||||
.B \fI<pat1>\fB != \fI<pat2>\fR
|
||||
Will match only if \fI<pat1>\fR and \fI<pat2>\fR don't both match and have the
|
||||
|
@ -117,9 +117,16 @@ vm_op_t *bpeg_simplepattern(file_t *f, const char *str)
|
||||
vm_op_t *till = bpeg_simplepattern(f, str);
|
||||
op->op = VM_UPTO_AND;
|
||||
op->len = -1;
|
||||
op->args.pat = till;
|
||||
op->args.multiple.first = till;
|
||||
if (till)
|
||||
str = till->end;
|
||||
if (matchchar(&str, '%')) {
|
||||
vm_op_t *skip = bpeg_simplepattern(f, str);
|
||||
if (!skip)
|
||||
file_err(f, str, str, "There should be a pattern to skip here after the '%%'");
|
||||
op->args.multiple.second = skip;
|
||||
str = skip->end;
|
||||
}
|
||||
break;
|
||||
} else {
|
||||
anychar:
|
||||
|
16
vm.c
16
vm.c
@ -144,15 +144,25 @@ static match_t *_match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, un
|
||||
match_t *m = calloc(sizeof(match_t), 1);
|
||||
m->start = str;
|
||||
m->op = op;
|
||||
if (op->args.pat) {
|
||||
if (op->args.multiple.first) {
|
||||
match_t **dest = &m->child;
|
||||
for (const char *prev = NULL; prev < str; ) {
|
||||
prev = str;
|
||||
match_t *p = _match(g, f, str, op->args.pat, flags, rec);
|
||||
match_t *p = _match(g, f, str, op->args.multiple.first, flags, rec);
|
||||
if (p) {
|
||||
m->child = p;
|
||||
*dest = p;
|
||||
m->end = p->end;
|
||||
return m;
|
||||
}
|
||||
if (op->args.multiple.second) {
|
||||
p = _match(g, f, str, op->args.multiple.second, flags, rec);
|
||||
if (p) {
|
||||
*dest = p;
|
||||
dest = &p->nextsibling;
|
||||
str = p->end;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
// This isn't in the for() structure because there needs to
|
||||
// be at least once chance to match the pattern, even if
|
||||
// we're at the end of the string already (e.g. "..$").
|
||||
|
Loading…
Reference in New Issue
Block a user