This commit is contained in:
Bruce Hill 2020-09-28 17:56:02 -07:00
parent 544a88e9d4
commit 699e7c8b98
6 changed files with 34 additions and 13 deletions

View File

@ -59,6 +59,7 @@ Pattern | Meaning
`{pat @other => "@1"}` | Match `pat` followed by `other` and replace it with the text of `other`
`{pat @keep=other => "@keep"}` | Match `pat` followed by `other` and replace it with the text of `other`
`pat1==pat2` | `pat1`, assuming `pat2` also matches with the same length
`pat1!=pat2` | `pat1`, unless `pat2` also matches with the same length
`#( block comment )#` | A block comment
`# line comment` | A line comment

8
bpeg.1
View File

@ -154,6 +154,14 @@ be a string, and it may contain references to captured values: \fB@0\fR
Will match only if \fI<pat1>\fR and \fI<pat2>\fR both match and have the exact
same length. Pronounced \fI<pat1>\fB-assuming-it-equals-\fI<pat2>\fR
.B \fI<pat1>\fB != \fI<pat2>\fR
Will match only if \fI<pat1>\fR matches, but \fI<pat2>\fR doesn't also match with the
same length. Pronounced \fI<pat1>\fB-unless-it-equals-\fI<pat2>\fR
.B \fI<pat1>\fB != \fI<pat2>\fR
Will match only if \fI<pat1>\fR and \fI<pat2>\fR don't both match and have the
exact same length. Pronounced \fI<pat1>\fB-assuming-it-doesn't-equal-\fI<pat2>\fR
.B |
This pattern matches the indentation at the beginning of a line that has the
same indentation as the line before (or zero indentation on the first line).

View File

@ -389,19 +389,22 @@ vm_op_t *bpeg_simplepattern(file_t *f, const char *str)
op->end = str;
op->len = -1;
goto postfix;
} else if (strncmp(str, "==", 2) == 0) { // Equality <pat1>==<pat2>
} else if ((str[0] == '=' || str[0] == '!') && str[1] == '=') { // Equality <pat1>==<pat2> and inequality <pat1>!=<pat2>
int equal = str[0] == '=';
str = after_spaces(str+2);
vm_op_t *first = op;
vm_op_t *second = bpeg_simplepattern(f, str);
check(second, "Expected pattern after '=='");
check(first->len == -1 || second->len == -1 || first->len == second->len,
"Two patterns cannot possibly match the same (different lengths: %ld != %ld)",
first->len, second->len);
check(second, "Expected pattern after '%c='", equal? '=' : '!');
if (equal) {
check(first->len == -1 || second->len == -1 || first->len == second->len,
"Two patterns cannot possibly match the same (different lengths: %ld != %ld)",
first->len, second->len);
}
op = calloc(sizeof(vm_op_t), 1);
op->op = VM_EQUAL;
op->op = equal ? VM_EQUAL : VM_NOT_EQUAL;
op->start = str;
op->end = second->end;
op->len = (first->len == -1 || second->len == -1) ? -1 : first->len;
op->len = first->len != -1 ? first->len : second->len;
op->args.multiple.first = first;
op->args.multiple.second = second;
str = op->end;

View File

@ -9,18 +9,20 @@ Def: @name=id _ `: __ (
# This is used for command line arguments:
String-pattern: 0+(`\ (escape-sequence / pat [`;]) / .)
pat: simple-pat !(__("=="/`*/`+/`?)) / suffixed-pat
pat: simple-pat !(__("!="/"=="/`*/`+/`?)) / suffixed-pat
simple-pat: Upto-and / Dot / String / Char-range / Char / Escape-range / Escape / No
/ Nodent / Repeat / Optional / After / Before / Capture / Replace / Ref / parens
suffixed-pat: (
Eq-pat
/ Not-eq-pat
/ Star-pat
/ Plus-pat
/ Question-pat
)
Eq-pat: @first=pat__"=="__@second=pat
Not-eq-pat: @first=pat__"!="__@second=pat
Star-pat: pat __ `* @min={=>"0"} @max="" [__`%__@sep=pat]
Plus-pat: pat __ `+ @min={=>"1"} @max="" [__`%__@sep=pat]
Question-pat: pat __ `?

View File

@ -29,6 +29,7 @@ enum VMOpcode {
VM_OTHERWISE,
VM_CHAIN,
VM_EQUAL,
VM_NOT_EQUAL,
VM_REPLACE,
VM_REF,
VM_BACKREF,

16
vm.c
View File

@ -30,6 +30,7 @@ static const char *opcode_names[] = {
[VM_CHAIN] = "CHAIN",
[VM_REPLACE] = "REPLACE",
[VM_EQUAL] = "EQUAL",
[VM_NOT_EQUAL] = "NOT_EQUAL",
[VM_REF] = "REF",
[VM_BACKREF] = "BACKREF",
[VM_NODENT] = "NODENT",
@ -280,23 +281,28 @@ static match_t *_match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, un
m1->nextsibling = m2;
return m;
}
case VM_EQUAL: {
case VM_EQUAL: case VM_NOT_EQUAL: {
match_t *m1 = _match(g, f, str, op->args.multiple.first, flags, rec);
if (m1 == NULL) return NULL;
// <p1>==<p2> matches iff both have the same start and end point:
// <p1>==<p2> matches iff both have the same start and end point
// <p1>!=<p2> matches iff <p1> matches, but is not equal to <p2>
match_t *m2 = _match(g, f, str, op->args.multiple.second, flags, rec);
if (m2 == NULL || m2->end != m1->end) {
if ((m2 == NULL || m2->end != m1->end) == (op->op == VM_EQUAL)) {
destroy_match(&m1);
destroy_match(&m2);
return NULL;
}
match_t *m = calloc(sizeof(match_t), 1);
m->start = str;
m->end = m2->end;
m->end = m1->end;
m->op = op;
m->child = m1;
m1->nextsibling = m2;
if (op->op == VM_EQUAL) {
m1->nextsibling = m2;
} else {
destroy_match(&m2);
}
return m;
}
case VM_REPLACE: {