From c43e4781763ee3f3f148e821a88e99c6b80c58db Mon Sep 17 00:00:00 2001 From: Bruce Hill Date: Mon, 14 Dec 2020 18:11:33 -0800 Subject: Added % operator to .. --- bp.1 | 22 +++++++++++++--------- compiler.c | 9 ++++++++- vm.c | 16 +++++++++++++--- 3 files changed, 34 insertions(+), 13 deletions(-) diff --git a/bp.1 b/bp.1 index 60a5e8f..03874ef 100644 --- a/bp.1 +++ b/bp.1 @@ -72,9 +72,6 @@ A chain of patterns, pronounced \fI\fB-then-\fI\fR A series of ordered choices (if one pattern matches, the following patterns will not be attempted), pronounced \fI\fB-or-\fI\fR -.B .. -Any text \fBup-to-and-including\fR the following pattern, if any (multiline: \fB...\fR) - .B . \fBAny\fR character (multiline: $.) @@ -102,10 +99,10 @@ The literal \fBcharacter-\fI\fR .B `\fI\fB-\fI\fR The \fBcharacter-range-\fI\fB-to-\fI\fR -.B \\\fI\fR +.B \\\\\fI\fR The \fBescape-sequence-\fI\fR (\fB\\n\fR, \fB\\x1F\fR, \fB\\033\fR, etc.) -.B \\\fI\fB-\fI\fR +.B \\\\\fI\fB-\fI\fR The \fBescape-sequence-range-\fI\fB-to-\fI\fR .B !\fI\fR @@ -132,6 +129,13 @@ The \fBescape-sequence-range-\fI\fB-to-\fI\fR \fI\fB-separated-by-\fI\fR (equivalent to \fI \fB0+(\fI\fB)\fR) +.B .. \fI\fR +Any text \fBup-to-and-including\fR \fI\fR (multiline: \fB...\fR) + +.B .. \fI\fB % \fI\fR +Any text \fBup-to-and-including\fR \fI\fR, but skipping over instances of \fI\fR. +E.g. \fB`"..`" % (`\\.) + .B <\fI\fR \fBJust-after-\fI\fR (lookbehind) @@ -151,12 +155,12 @@ be a string, and it may contain references to captured values: \fB@0\fR \fB@[\fIfoo\fR]\fR (the capture named \fIfoo\fR in \fI\fR), etc. .B \fI\fB == \fI\fR -Will match only if \fI\fR and \fI\fR both match and have the exact -same length. Pronounced \fI\fB-assuming-it-equals-\fI\fR +Will match only if \fI\fR matches and \fI\fR matches the text of \fI\fR's +match. Pronounced \fI\fB-if-it-matches-\fI\fR .B \fI\fB != \fI\fR -Will match only if \fI\fR matches, but \fI\fR doesn't also match with the -same length. Pronounced \fI\fB-unless-it-equals-\fI\fR +Will match only if \fI\fR matches and \fI\fR doesn't match the text of +\fI\fR's match. Pronounced \fI\fB-unless-it-matches-\fI\fR .B \fI\fB != \fI\fR Will match only if \fI\fR and \fI\fR don't both match and have the diff --git a/compiler.c b/compiler.c index 48d0023..97b1737 100644 --- a/compiler.c +++ b/compiler.c @@ -117,9 +117,16 @@ vm_op_t *bpeg_simplepattern(file_t *f, const char *str) vm_op_t *till = bpeg_simplepattern(f, str); op->op = VM_UPTO_AND; op->len = -1; - op->args.pat = till; + op->args.multiple.first = till; if (till) str = till->end; + if (matchchar(&str, '%')) { + vm_op_t *skip = bpeg_simplepattern(f, str); + if (!skip) + file_err(f, str, str, "There should be a pattern to skip here after the '%%'"); + op->args.multiple.second = skip; + str = skip->end; + } break; } else { anychar: diff --git a/vm.c b/vm.c index 4f26d5c..ed151bf 100644 --- a/vm.c +++ b/vm.c @@ -144,15 +144,25 @@ static match_t *_match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, un match_t *m = calloc(sizeof(match_t), 1); m->start = str; m->op = op; - if (op->args.pat) { + if (op->args.multiple.first) { + match_t **dest = &m->child; for (const char *prev = NULL; prev < str; ) { prev = str; - match_t *p = _match(g, f, str, op->args.pat, flags, rec); + match_t *p = _match(g, f, str, op->args.multiple.first, flags, rec); if (p) { - m->child = p; + *dest = p; m->end = p->end; return m; } + if (op->args.multiple.second) { + p = _match(g, f, str, op->args.multiple.second, flags, rec); + if (p) { + *dest = p; + dest = &p->nextsibling; + str = p->end; + continue; + } + } // This isn't in the for() structure because there needs to // be at least once chance to match the pattern, even if // we're at the end of the string already (e.g. "..$"). -- cgit v1.2.3