diff options
| author | Bruce Hill <bruce@bruce-hill.com> | 2020-09-28 17:56:02 -0700 |
|---|---|---|
| committer | Bruce Hill <bruce@bruce-hill.com> | 2020-09-28 17:56:02 -0700 |
| commit | 699e7c8b9869fa9737a3c61ef9bcc599751fb88b (patch) | |
| tree | 13eb334ce0eb76c2a31bb2817ec86cbd45b81b81 | |
| parent | 544a88e9d4faa3f7e34316daaa01d2fa14d1d9f6 (diff) | |
Added !=
| -rw-r--r-- | README.md | 1 | ||||
| -rw-r--r-- | bpeg.1 | 8 | ||||
| -rw-r--r-- | compiler.c | 17 | ||||
| -rw-r--r-- | grammars/bpeg.bpeg | 4 | ||||
| -rw-r--r-- | types.h | 1 | ||||
| -rw-r--r-- | vm.c | 16 |
6 files changed, 34 insertions, 13 deletions
@@ -59,6 +59,7 @@ Pattern | Meaning `{pat @other => "@1"}` | Match `pat` followed by `other` and replace it with the text of `other` `{pat @keep=other => "@keep"}` | Match `pat` followed by `other` and replace it with the text of `other` `pat1==pat2` | `pat1`, assuming `pat2` also matches with the same length +`pat1!=pat2` | `pat1`, unless `pat2` also matches with the same length `#( block comment )#` | A block comment `# line comment` | A line comment @@ -154,6 +154,14 @@ be a string, and it may contain references to captured values: \fB@0\fR Will match only if \fI<pat1>\fR and \fI<pat2>\fR both match and have the exact same length. Pronounced \fI<pat1>\fB-assuming-it-equals-\fI<pat2>\fR +.B \fI<pat1>\fB != \fI<pat2>\fR +Will match only if \fI<pat1>\fR matches, but \fI<pat2>\fR doesn't also match with the +same length. Pronounced \fI<pat1>\fB-unless-it-equals-\fI<pat2>\fR + +.B \fI<pat1>\fB != \fI<pat2>\fR +Will match only if \fI<pat1>\fR and \fI<pat2>\fR don't both match and have the +exact same length. Pronounced \fI<pat1>\fB-assuming-it-doesn't-equal-\fI<pat2>\fR + .B | This pattern matches the indentation at the beginning of a line that has the same indentation as the line before (or zero indentation on the first line). @@ -389,19 +389,22 @@ vm_op_t *bpeg_simplepattern(file_t *f, const char *str) op->end = str; op->len = -1; goto postfix; - } else if (strncmp(str, "==", 2) == 0) { // Equality <pat1>==<pat2> + } else if ((str[0] == '=' || str[0] == '!') && str[1] == '=') { // Equality <pat1>==<pat2> and inequality <pat1>!=<pat2> + int equal = str[0] == '='; str = after_spaces(str+2); vm_op_t *first = op; vm_op_t *second = bpeg_simplepattern(f, str); - check(second, "Expected pattern after '=='"); - check(first->len == -1 || second->len == -1 || first->len == second->len, - "Two patterns cannot possibly match the same (different lengths: %ld != %ld)", - first->len, second->len); + check(second, "Expected pattern after '%c='", equal? '=' : '!'); + if (equal) { + check(first->len == -1 || second->len == -1 || first->len == second->len, + "Two patterns cannot possibly match the same (different lengths: %ld != %ld)", + first->len, second->len); + } op = calloc(sizeof(vm_op_t), 1); - op->op = VM_EQUAL; + op->op = equal ? VM_EQUAL : VM_NOT_EQUAL; op->start = str; op->end = second->end; - op->len = (first->len == -1 || second->len == -1) ? -1 : first->len; + op->len = first->len != -1 ? first->len : second->len; op->args.multiple.first = first; op->args.multiple.second = second; str = op->end; diff --git a/grammars/bpeg.bpeg b/grammars/bpeg.bpeg index ce10bc1..0c43c0e 100644 --- a/grammars/bpeg.bpeg +++ b/grammars/bpeg.bpeg @@ -9,18 +9,20 @@ Def: @name=id _ `: __ ( # This is used for command line arguments: String-pattern: 0+(`\ (escape-sequence / pat [`;]) / .) -pat: simple-pat !(__("=="/`*/`+/`?)) / suffixed-pat +pat: simple-pat !(__("!="/"=="/`*/`+/`?)) / suffixed-pat simple-pat: Upto-and / Dot / String / Char-range / Char / Escape-range / Escape / No / Nodent / Repeat / Optional / After / Before / Capture / Replace / Ref / parens suffixed-pat: ( Eq-pat + / Not-eq-pat / Star-pat / Plus-pat / Question-pat ) Eq-pat: @first=pat__"=="__@second=pat +Not-eq-pat: @first=pat__"!="__@second=pat Star-pat: pat __ `* @min={=>"0"} @max="" [__`%__@sep=pat] Plus-pat: pat __ `+ @min={=>"1"} @max="" [__`%__@sep=pat] Question-pat: pat __ `? @@ -29,6 +29,7 @@ enum VMOpcode { VM_OTHERWISE, VM_CHAIN, VM_EQUAL, + VM_NOT_EQUAL, VM_REPLACE, VM_REF, VM_BACKREF, @@ -30,6 +30,7 @@ static const char *opcode_names[] = { [VM_CHAIN] = "CHAIN", [VM_REPLACE] = "REPLACE", [VM_EQUAL] = "EQUAL", + [VM_NOT_EQUAL] = "NOT_EQUAL", [VM_REF] = "REF", [VM_BACKREF] = "BACKREF", [VM_NODENT] = "NODENT", @@ -280,23 +281,28 @@ static match_t *_match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, un m1->nextsibling = m2; return m; } - case VM_EQUAL: { + case VM_EQUAL: case VM_NOT_EQUAL: { match_t *m1 = _match(g, f, str, op->args.multiple.first, flags, rec); if (m1 == NULL) return NULL; - // <p1>==<p2> matches iff both have the same start and end point: + // <p1>==<p2> matches iff both have the same start and end point + // <p1>!=<p2> matches iff <p1> matches, but is not equal to <p2> match_t *m2 = _match(g, f, str, op->args.multiple.second, flags, rec); - if (m2 == NULL || m2->end != m1->end) { + if ((m2 == NULL || m2->end != m1->end) == (op->op == VM_EQUAL)) { destroy_match(&m1); destroy_match(&m2); return NULL; } match_t *m = calloc(sizeof(match_t), 1); m->start = str; - m->end = m2->end; + m->end = m1->end; m->op = op; m->child = m1; - m1->nextsibling = m2; + if (op->op == VM_EQUAL) { + m1->nextsibling = m2; + } else { + destroy_match(&m2); + } return m; } case VM_REPLACE: { |
