diff options
| -rw-r--r-- | README.md | 6 | ||||
| -rw-r--r-- | bpeg.1 | 9 | ||||
| -rw-r--r-- | compiler.c | 28 | ||||
| -rw-r--r-- | grammars/bpeg.bpeg | 18 |
4 files changed, 50 insertions, 11 deletions
@@ -44,11 +44,13 @@ Pattern | Meaning `\n`, `\033`, `\x0A`, etc. | An escape sequence character `\x00-xFF` | An escape sequence range (byte `0x00` through `0xFF` here) `!pat` | `pat` does not match at the current position -`[pat]` | Zero or one occurrences of `pat` (optional pattern) +`[pat]` or `pat?` | Zero or one occurrences of `pat` (optional pattern) `5 pat` | Exactly 5 occurrences of `pat` `2-4 pat` | Between 2 and 4 occurrences of `pat` (inclusive) `5+ pat` | 5 or more occurrences of `pat` -`0+ pat % sep` | 0 or more occurrences of `pat`, separated by `sep` (e.g. `0+ int % ","` matches `1,2,3`) +`5+ pat % sep` | 5 or more occurrences of `pat`, separated by `sep` (e.g. `0+ int % ","` matches `1,2,3`) +`pat*` `pat* % sep`| 0 or more occurrences of `pat` (optionally separated by `sep`) +`pat+` `pat+ % sep`| 1 or more occurrences of `pat` (optionally separated by `sep`) `<pat` | `pat` matches just before the current position (backref) `>pat` | `pat` matches just in front of the current position (lookahead) `@pat` | Capture `pat` (used for text replacement and backreferences) @@ -114,11 +114,20 @@ The \fBescape-sequence-range-\fI<esc1>\fB-to-\fI<esc2>\fR .B [\fI<pat>\fR] \fBMaybe-\fI<pat>\fR +.B \fI<pat>\fR? +\fI<pat>\fB-or-not\fR + .B \fI<N> <pat>\fR .B \fI<MIN>\fB-\fI<MAX> <pat>\fR .B \fI<MIN>\fB+ \fI<pat>\fR \fI<MIN>\fB-to-\fI<MAX>\fB-\fI<pat>\fBs\fR (repetitions of a pattern) +.B \fI<pat>\fR* +\fI<pat>\fB-zero-or-more-times\fR + +.B \fI<pat>\fR+ +\fI<pat>\fB-one-or-more-times\fR + .B \fI<repeating-pat>\fR \fB%\fI <sep>\fR \fI<repeating-pat>\fB-separated-by-\fI<sep>\fR (equivalent to \fI<pat> \fB0+(\fI<sep><pat>\fB)\fR) @@ -24,6 +24,12 @@ static void set_range(vm_op_t *op, ssize_t min, ssize_t max, vm_op_t *pat, vm_op op->args.repetitions.max = max; op->args.repetitions.repeat_pat = pat; op->args.repetitions.sep = sep; + if (!op->start) op->start = pat->start; + if (!op->end) op->end = pat->end; + if (sep) { + if (sep->start < op->start) op->start = sep->start; + if (sep->end > op->end) op->end = sep->end; + } } /* @@ -365,8 +371,26 @@ vm_op_t *bpeg_simplepattern(file_t *f, const char *str) // Postfix operators: postfix: - if (strncmp(after_spaces(str), "==", 2) == 0) { - str = after_spaces(str)+2; + if (f ? str >= f->end : !*str) return op; + str = after_spaces(str); + if (*str == '*' || *str == '+' || *str == '?') { // Repetitions: <pat>*, <pat>+, <pat>? + char operator = *str; + ++str; + vm_op_t *pat = op; + vm_op_t *sep = NULL; + if (operator != '?' && matchchar(&str, '%')) { + sep = bpeg_simplepattern(f, str); + check(sep, "Expected pattern for separator after '%%'"); + str = sep->end; + } + op = calloc(sizeof(vm_op_t), 1); + set_range(op, operator == '+' ? 1 : 0, operator == '?' ? 1 : -1, pat, sep); + op->start = pat->start; + op->end = str; + op->len = -1; + goto postfix; + } else if (strncmp(str, "==", 2) == 0) { // Equality <pat1>==<pat2> + str = after_spaces(str+2); vm_op_t *first = op; vm_op_t *second = bpeg_simplepattern(f, str); check(second, "Expected pattern after '=='"); diff --git a/grammars/bpeg.bpeg b/grammars/bpeg.bpeg index bf2386e..ce10bc1 100644 --- a/grammars/bpeg.bpeg +++ b/grammars/bpeg.bpeg @@ -9,17 +9,21 @@ Def: @name=id _ `: __ ( # This is used for command line arguments: String-pattern: 0+(`\ (escape-sequence / pat [`;]) / .) -pat: suffixed-pat / simple-pat +pat: simple-pat !(__("=="/`*/`+/`?)) / suffixed-pat simple-pat: Upto-and / Dot / String / Char-range / Char / Escape-range / Escape / No / Nodent / Repeat / Optional / After / Before / Capture / Replace / Ref / parens -suffixed-pat: Eq-pat / simple-pat [ - @!={`* => "'*' is not a BPEG operator. Use 0+<pat> instead of <pat>*"} - / @!={`+ => "'+' is not a BPEG operator. Use 1+<pat> instead of <pat>+"} - / @!={`? => "'?' is not a BPEG operator. Use [<pat>] instead of <pat>?"} -] +suffixed-pat: ( + Eq-pat + / Star-pat + / Plus-pat + / Question-pat +) -Eq-pat: @first=simple-pat "==" @second=pat +Eq-pat: @first=pat__"=="__@second=pat +Star-pat: pat __ `* @min={=>"0"} @max="" [__`%__@sep=pat] +Plus-pat: pat __ `+ @min={=>"1"} @max="" [__`%__@sep=pat] +Question-pat: pat __ `? Dot: `. !`. String: ( |
