aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBruce Hill <bruce@bruce-hill.com>2020-09-28 17:42:38 -0700
committerBruce Hill <bruce@bruce-hill.com>2020-09-28 17:42:38 -0700
commit544a88e9d4faa3f7e34316daaa01d2fa14d1d9f6 (patch)
tree77c3b3a35d04c7ab31aa5b7461d387ecf2a70b33
parent54b14a35573aed89670228e5cbfbd820da24aeaf (diff)
Added +/*/? postfix operators
-rw-r--r--README.md6
-rw-r--r--bpeg.19
-rw-r--r--compiler.c28
-rw-r--r--grammars/bpeg.bpeg18
4 files changed, 50 insertions, 11 deletions
diff --git a/README.md b/README.md
index 66cbccf..b641de5 100644
--- a/README.md
+++ b/README.md
@@ -44,11 +44,13 @@ Pattern | Meaning
`\n`, `\033`, `\x0A`, etc. | An escape sequence character
`\x00-xFF` | An escape sequence range (byte `0x00` through `0xFF` here)
`!pat` | `pat` does not match at the current position
-`[pat]` | Zero or one occurrences of `pat` (optional pattern)
+`[pat]` or `pat?` | Zero or one occurrences of `pat` (optional pattern)
`5 pat` | Exactly 5 occurrences of `pat`
`2-4 pat` | Between 2 and 4 occurrences of `pat` (inclusive)
`5+ pat` | 5 or more occurrences of `pat`
-`0+ pat % sep` | 0 or more occurrences of `pat`, separated by `sep` (e.g. `0+ int % ","` matches `1,2,3`)
+`5+ pat % sep` | 5 or more occurrences of `pat`, separated by `sep` (e.g. `0+ int % ","` matches `1,2,3`)
+`pat*` `pat* % sep`| 0 or more occurrences of `pat` (optionally separated by `sep`)
+`pat+` `pat+ % sep`| 1 or more occurrences of `pat` (optionally separated by `sep`)
`<pat` | `pat` matches just before the current position (backref)
`>pat` | `pat` matches just in front of the current position (lookahead)
`@pat` | Capture `pat` (used for text replacement and backreferences)
diff --git a/bpeg.1 b/bpeg.1
index 3948b10..26b2a64 100644
--- a/bpeg.1
+++ b/bpeg.1
@@ -114,11 +114,20 @@ The \fBescape-sequence-range-\fI<esc1>\fB-to-\fI<esc2>\fR
.B [\fI<pat>\fR]
\fBMaybe-\fI<pat>\fR
+.B \fI<pat>\fR?
+\fI<pat>\fB-or-not\fR
+
.B \fI<N> <pat>\fR
.B \fI<MIN>\fB-\fI<MAX> <pat>\fR
.B \fI<MIN>\fB+ \fI<pat>\fR
\fI<MIN>\fB-to-\fI<MAX>\fB-\fI<pat>\fBs\fR (repetitions of a pattern)
+.B \fI<pat>\fR*
+\fI<pat>\fB-zero-or-more-times\fR
+
+.B \fI<pat>\fR+
+\fI<pat>\fB-one-or-more-times\fR
+
.B \fI<repeating-pat>\fR \fB%\fI <sep>\fR
\fI<repeating-pat>\fB-separated-by-\fI<sep>\fR (equivalent to \fI<pat>
\fB0+(\fI<sep><pat>\fB)\fR)
diff --git a/compiler.c b/compiler.c
index ba53579..5faf0f2 100644
--- a/compiler.c
+++ b/compiler.c
@@ -24,6 +24,12 @@ static void set_range(vm_op_t *op, ssize_t min, ssize_t max, vm_op_t *pat, vm_op
op->args.repetitions.max = max;
op->args.repetitions.repeat_pat = pat;
op->args.repetitions.sep = sep;
+ if (!op->start) op->start = pat->start;
+ if (!op->end) op->end = pat->end;
+ if (sep) {
+ if (sep->start < op->start) op->start = sep->start;
+ if (sep->end > op->end) op->end = sep->end;
+ }
}
/*
@@ -365,8 +371,26 @@ vm_op_t *bpeg_simplepattern(file_t *f, const char *str)
// Postfix operators:
postfix:
- if (strncmp(after_spaces(str), "==", 2) == 0) {
- str = after_spaces(str)+2;
+ if (f ? str >= f->end : !*str) return op;
+ str = after_spaces(str);
+ if (*str == '*' || *str == '+' || *str == '?') { // Repetitions: <pat>*, <pat>+, <pat>?
+ char operator = *str;
+ ++str;
+ vm_op_t *pat = op;
+ vm_op_t *sep = NULL;
+ if (operator != '?' && matchchar(&str, '%')) {
+ sep = bpeg_simplepattern(f, str);
+ check(sep, "Expected pattern for separator after '%%'");
+ str = sep->end;
+ }
+ op = calloc(sizeof(vm_op_t), 1);
+ set_range(op, operator == '+' ? 1 : 0, operator == '?' ? 1 : -1, pat, sep);
+ op->start = pat->start;
+ op->end = str;
+ op->len = -1;
+ goto postfix;
+ } else if (strncmp(str, "==", 2) == 0) { // Equality <pat1>==<pat2>
+ str = after_spaces(str+2);
vm_op_t *first = op;
vm_op_t *second = bpeg_simplepattern(f, str);
check(second, "Expected pattern after '=='");
diff --git a/grammars/bpeg.bpeg b/grammars/bpeg.bpeg
index bf2386e..ce10bc1 100644
--- a/grammars/bpeg.bpeg
+++ b/grammars/bpeg.bpeg
@@ -9,17 +9,21 @@ Def: @name=id _ `: __ (
# This is used for command line arguments:
String-pattern: 0+(`\ (escape-sequence / pat [`;]) / .)
-pat: suffixed-pat / simple-pat
+pat: simple-pat !(__("=="/`*/`+/`?)) / suffixed-pat
simple-pat: Upto-and / Dot / String / Char-range / Char / Escape-range / Escape / No
/ Nodent / Repeat / Optional / After / Before / Capture / Replace / Ref / parens
-suffixed-pat: Eq-pat / simple-pat [
- @!={`* => "'*' is not a BPEG operator. Use 0+<pat> instead of <pat>*"}
- / @!={`+ => "'+' is not a BPEG operator. Use 1+<pat> instead of <pat>+"}
- / @!={`? => "'?' is not a BPEG operator. Use [<pat>] instead of <pat>?"}
-]
+suffixed-pat: (
+ Eq-pat
+ / Star-pat
+ / Plus-pat
+ / Question-pat
+)
-Eq-pat: @first=simple-pat "==" @second=pat
+Eq-pat: @first=pat__"=="__@second=pat
+Star-pat: pat __ `* @min={=>"0"} @max="" [__`%__@sep=pat]
+Plus-pat: pat __ `+ @min={=>"1"} @max="" [__`%__@sep=pat]
+Question-pat: pat __ `?
Dot: `. !`.
String: (