From 699e7c8b9869fa9737a3c61ef9bcc599751fb88b Mon Sep 17 00:00:00 2001 From: Bruce Hill Date: Mon, 28 Sep 2020 17:56:02 -0700 Subject: Added != --- README.md | 1 + bpeg.1 | 8 ++++++++ compiler.c | 17 ++++++++++------- grammars/bpeg.bpeg | 4 +++- types.h | 1 + vm.c | 16 +++++++++++----- 6 files changed, 34 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index b641de5..5167d4d 100644 --- a/README.md +++ b/README.md @@ -59,6 +59,7 @@ Pattern | Meaning `{pat @other => "@1"}` | Match `pat` followed by `other` and replace it with the text of `other` `{pat @keep=other => "@keep"}` | Match `pat` followed by `other` and replace it with the text of `other` `pat1==pat2` | `pat1`, assuming `pat2` also matches with the same length +`pat1!=pat2` | `pat1`, unless `pat2` also matches with the same length `#( block comment )#` | A block comment `# line comment` | A line comment diff --git a/bpeg.1 b/bpeg.1 index 26b2a64..833be35 100644 --- a/bpeg.1 +++ b/bpeg.1 @@ -154,6 +154,14 @@ be a string, and it may contain references to captured values: \fB@0\fR Will match only if \fI\fR and \fI\fR both match and have the exact same length. Pronounced \fI\fB-assuming-it-equals-\fI\fR +.B \fI\fB != \fI\fR +Will match only if \fI\fR matches, but \fI\fR doesn't also match with the +same length. Pronounced \fI\fB-unless-it-equals-\fI\fR + +.B \fI\fB != \fI\fR +Will match only if \fI\fR and \fI\fR don't both match and have the +exact same length. Pronounced \fI\fB-assuming-it-doesn't-equal-\fI\fR + .B | This pattern matches the indentation at the beginning of a line that has the same indentation as the line before (or zero indentation on the first line). diff --git a/compiler.c b/compiler.c index 5faf0f2..39a12b1 100644 --- a/compiler.c +++ b/compiler.c @@ -389,19 +389,22 @@ vm_op_t *bpeg_simplepattern(file_t *f, const char *str) op->end = str; op->len = -1; goto postfix; - } else if (strncmp(str, "==", 2) == 0) { // Equality == + } else if ((str[0] == '=' || str[0] == '!') && str[1] == '=') { // Equality == and inequality != + int equal = str[0] == '='; str = after_spaces(str+2); vm_op_t *first = op; vm_op_t *second = bpeg_simplepattern(f, str); - check(second, "Expected pattern after '=='"); - check(first->len == -1 || second->len == -1 || first->len == second->len, - "Two patterns cannot possibly match the same (different lengths: %ld != %ld)", - first->len, second->len); + check(second, "Expected pattern after '%c='", equal? '=' : '!'); + if (equal) { + check(first->len == -1 || second->len == -1 || first->len == second->len, + "Two patterns cannot possibly match the same (different lengths: %ld != %ld)", + first->len, second->len); + } op = calloc(sizeof(vm_op_t), 1); - op->op = VM_EQUAL; + op->op = equal ? VM_EQUAL : VM_NOT_EQUAL; op->start = str; op->end = second->end; - op->len = (first->len == -1 || second->len == -1) ? -1 : first->len; + op->len = first->len != -1 ? first->len : second->len; op->args.multiple.first = first; op->args.multiple.second = second; str = op->end; diff --git a/grammars/bpeg.bpeg b/grammars/bpeg.bpeg index ce10bc1..0c43c0e 100644 --- a/grammars/bpeg.bpeg +++ b/grammars/bpeg.bpeg @@ -9,18 +9,20 @@ Def: @name=id _ `: __ ( # This is used for command line arguments: String-pattern: 0+(`\ (escape-sequence / pat [`;]) / .) -pat: simple-pat !(__("=="/`*/`+/`?)) / suffixed-pat +pat: simple-pat !(__("!="/"=="/`*/`+/`?)) / suffixed-pat simple-pat: Upto-and / Dot / String / Char-range / Char / Escape-range / Escape / No / Nodent / Repeat / Optional / After / Before / Capture / Replace / Ref / parens suffixed-pat: ( Eq-pat + / Not-eq-pat / Star-pat / Plus-pat / Question-pat ) Eq-pat: @first=pat__"=="__@second=pat +Not-eq-pat: @first=pat__"!="__@second=pat Star-pat: pat __ `* @min={=>"0"} @max="" [__`%__@sep=pat] Plus-pat: pat __ `+ @min={=>"1"} @max="" [__`%__@sep=pat] Question-pat: pat __ `? diff --git a/types.h b/types.h index 2a1cc2a..21ed0e7 100644 --- a/types.h +++ b/types.h @@ -29,6 +29,7 @@ enum VMOpcode { VM_OTHERWISE, VM_CHAIN, VM_EQUAL, + VM_NOT_EQUAL, VM_REPLACE, VM_REF, VM_BACKREF, diff --git a/vm.c b/vm.c index 1db4481..158d58f 100644 --- a/vm.c +++ b/vm.c @@ -30,6 +30,7 @@ static const char *opcode_names[] = { [VM_CHAIN] = "CHAIN", [VM_REPLACE] = "REPLACE", [VM_EQUAL] = "EQUAL", + [VM_NOT_EQUAL] = "NOT_EQUAL", [VM_REF] = "REF", [VM_BACKREF] = "BACKREF", [VM_NODENT] = "NODENT", @@ -280,23 +281,28 @@ static match_t *_match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, un m1->nextsibling = m2; return m; } - case VM_EQUAL: { + case VM_EQUAL: case VM_NOT_EQUAL: { match_t *m1 = _match(g, f, str, op->args.multiple.first, flags, rec); if (m1 == NULL) return NULL; - // == matches iff both have the same start and end point: + // == matches iff both have the same start and end point + // != matches iff matches, but is not equal to match_t *m2 = _match(g, f, str, op->args.multiple.second, flags, rec); - if (m2 == NULL || m2->end != m1->end) { + if ((m2 == NULL || m2->end != m1->end) == (op->op == VM_EQUAL)) { destroy_match(&m1); destroy_match(&m2); return NULL; } match_t *m = calloc(sizeof(match_t), 1); m->start = str; - m->end = m2->end; + m->end = m1->end; m->op = op; m->child = m1; - m1->nextsibling = m2; + if (op->op == VM_EQUAL) { + m1->nextsibling = m2; + } else { + destroy_match(&m2); + } return m; } case VM_REPLACE: { -- cgit v1.2.3