aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBruce Hill <bruce@bruce-hill.com>2020-09-28 17:56:02 -0700
committerBruce Hill <bruce@bruce-hill.com>2020-09-28 17:56:02 -0700
commit699e7c8b9869fa9737a3c61ef9bcc599751fb88b (patch)
tree13eb334ce0eb76c2a31bb2817ec86cbd45b81b81
parent544a88e9d4faa3f7e34316daaa01d2fa14d1d9f6 (diff)
Added !=
-rw-r--r--README.md1
-rw-r--r--bpeg.18
-rw-r--r--compiler.c17
-rw-r--r--grammars/bpeg.bpeg4
-rw-r--r--types.h1
-rw-r--r--vm.c16
6 files changed, 34 insertions, 13 deletions
diff --git a/README.md b/README.md
index b641de5..5167d4d 100644
--- a/README.md
+++ b/README.md
@@ -59,6 +59,7 @@ Pattern | Meaning
`{pat @other => "@1"}` | Match `pat` followed by `other` and replace it with the text of `other`
`{pat @keep=other => "@keep"}` | Match `pat` followed by `other` and replace it with the text of `other`
`pat1==pat2` | `pat1`, assuming `pat2` also matches with the same length
+`pat1!=pat2` | `pat1`, unless `pat2` also matches with the same length
`#( block comment )#` | A block comment
`# line comment` | A line comment
diff --git a/bpeg.1 b/bpeg.1
index 26b2a64..833be35 100644
--- a/bpeg.1
+++ b/bpeg.1
@@ -154,6 +154,14 @@ be a string, and it may contain references to captured values: \fB@0\fR
Will match only if \fI<pat1>\fR and \fI<pat2>\fR both match and have the exact
same length. Pronounced \fI<pat1>\fB-assuming-it-equals-\fI<pat2>\fR
+.B \fI<pat1>\fB != \fI<pat2>\fR
+Will match only if \fI<pat1>\fR matches, but \fI<pat2>\fR doesn't also match with the
+same length. Pronounced \fI<pat1>\fB-unless-it-equals-\fI<pat2>\fR
+
+.B \fI<pat1>\fB != \fI<pat2>\fR
+Will match only if \fI<pat1>\fR and \fI<pat2>\fR don't both match and have the
+exact same length. Pronounced \fI<pat1>\fB-assuming-it-doesn't-equal-\fI<pat2>\fR
+
.B |
This pattern matches the indentation at the beginning of a line that has the
same indentation as the line before (or zero indentation on the first line).
diff --git a/compiler.c b/compiler.c
index 5faf0f2..39a12b1 100644
--- a/compiler.c
+++ b/compiler.c
@@ -389,19 +389,22 @@ vm_op_t *bpeg_simplepattern(file_t *f, const char *str)
op->end = str;
op->len = -1;
goto postfix;
- } else if (strncmp(str, "==", 2) == 0) { // Equality <pat1>==<pat2>
+ } else if ((str[0] == '=' || str[0] == '!') && str[1] == '=') { // Equality <pat1>==<pat2> and inequality <pat1>!=<pat2>
+ int equal = str[0] == '=';
str = after_spaces(str+2);
vm_op_t *first = op;
vm_op_t *second = bpeg_simplepattern(f, str);
- check(second, "Expected pattern after '=='");
- check(first->len == -1 || second->len == -1 || first->len == second->len,
- "Two patterns cannot possibly match the same (different lengths: %ld != %ld)",
- first->len, second->len);
+ check(second, "Expected pattern after '%c='", equal? '=' : '!');
+ if (equal) {
+ check(first->len == -1 || second->len == -1 || first->len == second->len,
+ "Two patterns cannot possibly match the same (different lengths: %ld != %ld)",
+ first->len, second->len);
+ }
op = calloc(sizeof(vm_op_t), 1);
- op->op = VM_EQUAL;
+ op->op = equal ? VM_EQUAL : VM_NOT_EQUAL;
op->start = str;
op->end = second->end;
- op->len = (first->len == -1 || second->len == -1) ? -1 : first->len;
+ op->len = first->len != -1 ? first->len : second->len;
op->args.multiple.first = first;
op->args.multiple.second = second;
str = op->end;
diff --git a/grammars/bpeg.bpeg b/grammars/bpeg.bpeg
index ce10bc1..0c43c0e 100644
--- a/grammars/bpeg.bpeg
+++ b/grammars/bpeg.bpeg
@@ -9,18 +9,20 @@ Def: @name=id _ `: __ (
# This is used for command line arguments:
String-pattern: 0+(`\ (escape-sequence / pat [`;]) / .)
-pat: simple-pat !(__("=="/`*/`+/`?)) / suffixed-pat
+pat: simple-pat !(__("!="/"=="/`*/`+/`?)) / suffixed-pat
simple-pat: Upto-and / Dot / String / Char-range / Char / Escape-range / Escape / No
/ Nodent / Repeat / Optional / After / Before / Capture / Replace / Ref / parens
suffixed-pat: (
Eq-pat
+ / Not-eq-pat
/ Star-pat
/ Plus-pat
/ Question-pat
)
Eq-pat: @first=pat__"=="__@second=pat
+Not-eq-pat: @first=pat__"!="__@second=pat
Star-pat: pat __ `* @min={=>"0"} @max="" [__`%__@sep=pat]
Plus-pat: pat __ `+ @min={=>"1"} @max="" [__`%__@sep=pat]
Question-pat: pat __ `?
diff --git a/types.h b/types.h
index 2a1cc2a..21ed0e7 100644
--- a/types.h
+++ b/types.h
@@ -29,6 +29,7 @@ enum VMOpcode {
VM_OTHERWISE,
VM_CHAIN,
VM_EQUAL,
+ VM_NOT_EQUAL,
VM_REPLACE,
VM_REF,
VM_BACKREF,
diff --git a/vm.c b/vm.c
index 1db4481..158d58f 100644
--- a/vm.c
+++ b/vm.c
@@ -30,6 +30,7 @@ static const char *opcode_names[] = {
[VM_CHAIN] = "CHAIN",
[VM_REPLACE] = "REPLACE",
[VM_EQUAL] = "EQUAL",
+ [VM_NOT_EQUAL] = "NOT_EQUAL",
[VM_REF] = "REF",
[VM_BACKREF] = "BACKREF",
[VM_NODENT] = "NODENT",
@@ -280,23 +281,28 @@ static match_t *_match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, un
m1->nextsibling = m2;
return m;
}
- case VM_EQUAL: {
+ case VM_EQUAL: case VM_NOT_EQUAL: {
match_t *m1 = _match(g, f, str, op->args.multiple.first, flags, rec);
if (m1 == NULL) return NULL;
- // <p1>==<p2> matches iff both have the same start and end point:
+ // <p1>==<p2> matches iff both have the same start and end point
+ // <p1>!=<p2> matches iff <p1> matches, but is not equal to <p2>
match_t *m2 = _match(g, f, str, op->args.multiple.second, flags, rec);
- if (m2 == NULL || m2->end != m1->end) {
+ if ((m2 == NULL || m2->end != m1->end) == (op->op == VM_EQUAL)) {
destroy_match(&m1);
destroy_match(&m2);
return NULL;
}
match_t *m = calloc(sizeof(match_t), 1);
m->start = str;
- m->end = m2->end;
+ m->end = m1->end;
m->op = op;
m->child = m1;
- m1->nextsibling = m2;
+ if (op->op == VM_EQUAL) {
+ m1->nextsibling = m2;
+ } else {
+ destroy_match(&m2);
+ }
return m;
}
case VM_REPLACE: {