diff options
| -rw-r--r-- | bp.1 | 22 | ||||
| -rw-r--r-- | compiler.c | 9 | ||||
| -rw-r--r-- | vm.c | 16 |
3 files changed, 34 insertions, 13 deletions
@@ -72,9 +72,6 @@ A chain of patterns, pronounced \fI<pat1>\fB-then-\fI<pat2>\fR A series of ordered choices (if one pattern matches, the following patterns will not be attempted), pronounced \fI<pat1>\fB-or-\fI<pat2>\fR -.B .. -Any text \fBup-to-and-including\fR the following pattern, if any (multiline: \fB...\fR) - .B . \fBAny\fR character (multiline: $.) @@ -102,10 +99,10 @@ The literal \fBcharacter-\fI<c>\fR .B `\fI<c1>\fB-\fI<c2>\fR The \fBcharacter-range-\fI<c1>\fB-to-\fI<c2>\fR -.B \\\fI<esc>\fR +.B \\\\\fI<esc>\fR The \fBescape-sequence-\fI<esc>\fR (\fB\\n\fR, \fB\\x1F\fR, \fB\\033\fR, etc.) -.B \\\fI<esc1>\fB-\fI<esc2>\fR +.B \\\\\fI<esc1>\fB-\fI<esc2>\fR The \fBescape-sequence-range-\fI<esc1>\fB-to-\fI<esc2>\fR .B !\fI<pat>\fR @@ -132,6 +129,13 @@ The \fBescape-sequence-range-\fI<esc1>\fB-to-\fI<esc2>\fR \fI<repeating-pat>\fB-separated-by-\fI<sep>\fR (equivalent to \fI<pat> \fB0+(\fI<sep><pat>\fB)\fR) +.B .. \fI<pat>\fR +Any text \fBup-to-and-including\fR \fI<pat>\fR (multiline: \fB...\fR) + +.B .. \fI<pat>\fB % \fI<skip>\fR +Any text \fBup-to-and-including\fR \fI<pat>\fR, but skipping over instances of \fI<skip>\fR. +E.g. \fB`"..`" % (`\\.) + .B <\fI<pat>\fR \fBJust-after-\fI<pat>\fR (lookbehind) @@ -151,12 +155,12 @@ be a string, and it may contain references to captured values: \fB@0\fR \fB@[\fIfoo\fR]\fR (the capture named \fIfoo\fR in \fI<pat>\fR), etc. .B \fI<pat1>\fB == \fI<pat2>\fR -Will match only if \fI<pat1>\fR and \fI<pat2>\fR both match and have the exact -same length. Pronounced \fI<pat1>\fB-assuming-it-equals-\fI<pat2>\fR +Will match only if \fI<pat1>\fR matches and \fI<pat2>\fR matches the text of \fI<pat1>\fR's +match. Pronounced \fI<pat1>\fB-if-it-matches-\fI<pat2>\fR .B \fI<pat1>\fB != \fI<pat2>\fR -Will match only if \fI<pat1>\fR matches, but \fI<pat2>\fR doesn't also match with the -same length. Pronounced \fI<pat1>\fB-unless-it-equals-\fI<pat2>\fR +Will match only if \fI<pat1>\fR matches and \fI<pat2>\fR doesn't match the text of +\fI<pat1>\fR's match. Pronounced \fI<pat1>\fB-unless-it-matches-\fI<pat2>\fR .B \fI<pat1>\fB != \fI<pat2>\fR Will match only if \fI<pat1>\fR and \fI<pat2>\fR don't both match and have the @@ -117,9 +117,16 @@ vm_op_t *bpeg_simplepattern(file_t *f, const char *str) vm_op_t *till = bpeg_simplepattern(f, str); op->op = VM_UPTO_AND; op->len = -1; - op->args.pat = till; + op->args.multiple.first = till; if (till) str = till->end; + if (matchchar(&str, '%')) { + vm_op_t *skip = bpeg_simplepattern(f, str); + if (!skip) + file_err(f, str, str, "There should be a pattern to skip here after the '%%'"); + op->args.multiple.second = skip; + str = skip->end; + } break; } else { anychar: @@ -144,15 +144,25 @@ static match_t *_match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, un match_t *m = calloc(sizeof(match_t), 1); m->start = str; m->op = op; - if (op->args.pat) { + if (op->args.multiple.first) { + match_t **dest = &m->child; for (const char *prev = NULL; prev < str; ) { prev = str; - match_t *p = _match(g, f, str, op->args.pat, flags, rec); + match_t *p = _match(g, f, str, op->args.multiple.first, flags, rec); if (p) { - m->child = p; + *dest = p; m->end = p->end; return m; } + if (op->args.multiple.second) { + p = _match(g, f, str, op->args.multiple.second, flags, rec); + if (p) { + *dest = p; + dest = &p->nextsibling; + str = p->end; + continue; + } + } // This isn't in the for() structure because there needs to // be at least once chance to match the pattern, even if // we're at the end of the string already (e.g. "..$"). |
