From 544a88e9d4faa3f7e34316daaa01d2fa14d1d9f6 Mon Sep 17 00:00:00 2001 From: Bruce Hill Date: Mon, 28 Sep 2020 17:42:38 -0700 Subject: Added +/*/? postfix operators --- README.md | 6 ++++-- bpeg.1 | 9 +++++++++ compiler.c | 28 ++++++++++++++++++++++++++-- grammars/bpeg.bpeg | 18 +++++++++++------- 4 files changed, 50 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index 66cbccf..b641de5 100644 --- a/README.md +++ b/README.md @@ -44,11 +44,13 @@ Pattern | Meaning `\n`, `\033`, `\x0A`, etc. | An escape sequence character `\x00-xFF` | An escape sequence range (byte `0x00` through `0xFF` here) `!pat` | `pat` does not match at the current position -`[pat]` | Zero or one occurrences of `pat` (optional pattern) +`[pat]` or `pat?` | Zero or one occurrences of `pat` (optional pattern) `5 pat` | Exactly 5 occurrences of `pat` `2-4 pat` | Between 2 and 4 occurrences of `pat` (inclusive) `5+ pat` | 5 or more occurrences of `pat` -`0+ pat % sep` | 0 or more occurrences of `pat`, separated by `sep` (e.g. `0+ int % ","` matches `1,2,3`) +`5+ pat % sep` | 5 or more occurrences of `pat`, separated by `sep` (e.g. `0+ int % ","` matches `1,2,3`) +`pat*` `pat* % sep`| 0 or more occurrences of `pat` (optionally separated by `sep`) +`pat+` `pat+ % sep`| 1 or more occurrences of `pat` (optionally separated by `sep`) `pat` | `pat` matches just in front of the current position (lookahead) `@pat` | Capture `pat` (used for text replacement and backreferences) diff --git a/bpeg.1 b/bpeg.1 index 3948b10..26b2a64 100644 --- a/bpeg.1 +++ b/bpeg.1 @@ -114,11 +114,20 @@ The \fBescape-sequence-range-\fI\fB-to-\fI\fR .B [\fI\fR] \fBMaybe-\fI\fR +.B \fI\fR? +\fI\fB-or-not\fR + .B \fI \fR .B \fI\fB-\fI \fR .B \fI\fB+ \fI\fR \fI\fB-to-\fI\fB-\fI\fBs\fR (repetitions of a pattern) +.B \fI\fR* +\fI\fB-zero-or-more-times\fR + +.B \fI\fR+ +\fI\fB-one-or-more-times\fR + .B \fI\fR \fB%\fI \fR \fI\fB-separated-by-\fI\fR (equivalent to \fI \fB0+(\fI\fB)\fR) diff --git a/compiler.c b/compiler.c index ba53579..5faf0f2 100644 --- a/compiler.c +++ b/compiler.c @@ -24,6 +24,12 @@ static void set_range(vm_op_t *op, ssize_t min, ssize_t max, vm_op_t *pat, vm_op op->args.repetitions.max = max; op->args.repetitions.repeat_pat = pat; op->args.repetitions.sep = sep; + if (!op->start) op->start = pat->start; + if (!op->end) op->end = pat->end; + if (sep) { + if (sep->start < op->start) op->start = sep->start; + if (sep->end > op->end) op->end = sep->end; + } } /* @@ -365,8 +371,26 @@ vm_op_t *bpeg_simplepattern(file_t *f, const char *str) // Postfix operators: postfix: - if (strncmp(after_spaces(str), "==", 2) == 0) { - str = after_spaces(str)+2; + if (f ? str >= f->end : !*str) return op; + str = after_spaces(str); + if (*str == '*' || *str == '+' || *str == '?') { // Repetitions: *, +, ? + char operator = *str; + ++str; + vm_op_t *pat = op; + vm_op_t *sep = NULL; + if (operator != '?' && matchchar(&str, '%')) { + sep = bpeg_simplepattern(f, str); + check(sep, "Expected pattern for separator after '%%'"); + str = sep->end; + } + op = calloc(sizeof(vm_op_t), 1); + set_range(op, operator == '+' ? 1 : 0, operator == '?' ? 1 : -1, pat, sep); + op->start = pat->start; + op->end = str; + op->len = -1; + goto postfix; + } else if (strncmp(str, "==", 2) == 0) { // Equality == + str = after_spaces(str+2); vm_op_t *first = op; vm_op_t *second = bpeg_simplepattern(f, str); check(second, "Expected pattern after '=='"); diff --git a/grammars/bpeg.bpeg b/grammars/bpeg.bpeg index bf2386e..ce10bc1 100644 --- a/grammars/bpeg.bpeg +++ b/grammars/bpeg.bpeg @@ -9,17 +9,21 @@ Def: @name=id _ `: __ ( # This is used for command line arguments: String-pattern: 0+(`\ (escape-sequence / pat [`;]) / .) -pat: suffixed-pat / simple-pat +pat: simple-pat !(__("=="/`*/`+/`?)) / suffixed-pat simple-pat: Upto-and / Dot / String / Char-range / Char / Escape-range / Escape / No / Nodent / Repeat / Optional / After / Before / Capture / Replace / Ref / parens -suffixed-pat: Eq-pat / simple-pat [ - @!={`* => "'*' is not a BPEG operator. Use 0+ instead of *"} - / @!={`+ => "'+' is not a BPEG operator. Use 1+ instead of +"} - / @!={`? => "'?' is not a BPEG operator. Use [] instead of ?"} -] +suffixed-pat: ( + Eq-pat + / Star-pat + / Plus-pat + / Question-pat +) -Eq-pat: @first=simple-pat "==" @second=pat +Eq-pat: @first=pat__"=="__@second=pat +Star-pat: pat __ `* @min={=>"0"} @max="" [__`%__@sep=pat] +Plus-pat: pat __ `+ @min={=>"1"} @max="" [__`%__@sep=pat] +Question-pat: pat __ `? Dot: `. !`. String: ( -- cgit v1.2.3