Added == operator

This commit is contained in:
Bruce Hill 2020-09-13 20:33:11 -07:00
parent ab5ef5a77a
commit 1d1c3d35aa
7 changed files with 89 additions and 18 deletions

View File

@ -4,10 +4,14 @@ Grammar = __ *Def%(__`;__) ?(`;__);
Def = @[name]Ref __ `= __ @[definition]extended-pat;
# This is used for command line arguments:
String-grammar = *(`\ pat ?`; / .);
String-pattern = *(`\ pat ?`; / .);
pat = Empty / Dot / String / Char-range / Char / Escape-range / Escape / No / Anything-but
pat = suffixed-pat / simple-pat;
simple-pat = Empty / Dot / String / Char-range / Char / Escape-range / Escape / No / Anything-but
/ Upto-and / Repeat / After / Before / Capture / Replace / Ref / parens;
suffixed-pat = Eq-pat;
Eq-pat = @[first]simple-pat "==" @[second]pat;
Empty = `/ >(__ (`)/`}));
Dot = `.;

21
bpeg.c
View File

@ -24,7 +24,8 @@
* ( <pat> ) <pat>
* @ <pat> capture <pat>
* @ [ <name> ] <pat> <pat> named <name>
* { <pat> => <str> } <pat> replaced with <str>
* { <pat> => <str> } <pat> replaced with <str>
* <pat1> == <pat2> <pat1> iff <pat2> matches at the same spot for the same length
* "@1" or "@[1]" first capture
* "@foo" or "@[foo]" capture named "foo"
* <pat1> <pat2> <pat1> followed by <pat2>
@ -50,7 +51,8 @@ static const char *usage = (
"Flags:\n"
" -h --help\t print the usage and quit\n"
" -v --verbose\t print verbose debugging info\n"
" -d --define <name>=<def> define a grammar rule\n"
" -d --define <name>=<def>\t define a grammar rule\n"
" -D --define-string <name>=<def>\t define a grammar rule (string-pattern)\n"
" -e --escaped <pat>\t provide an escaped pattern (equivalent to bpeg '\\(<pat>)')\n"
" -s --string <pat>\t provide a string pattern (equivalent to bpeg '<pat>', but may be useful if '<pat>' begins with a '-')\n"
" -r --replace <replacement> replace the input pattern with the given replacement\n"
@ -83,8 +85,10 @@ static int run_match(grammar_t *g, const char *filename, vm_op_t *pattern, int v
}
match_t *m = match(g, input, pattern);
if (m != NULL && m->end > m->start + 1) {
if (isatty(STDOUT_FILENO)) printf("\033[1;4;33m%s\033[0m\n", filename);
else printf("%s\n", filename);
if (filename != NULL) {
if (isatty(STDOUT_FILENO)) printf("\033[1;4;33m%s\033[0m\n", filename);
else printf("%s\n", filename);
}
print_match(m, isatty(STDOUT_FILENO) ? "\033[0m" : NULL, verbose);
freefile(input);
return 0;
@ -153,6 +157,15 @@ int main(int argc, char *argv[])
vm_op_t *pat = bpeg_pattern(src);
check(pat, "Failed to compile pattern");
add_def(g, src, def, pat);
} else if (FLAG("--define-string") || FLAG("-D")) {
char *def = flag;
char *eq = strchr(def, '=');
check(eq, usage);
*eq = '\0';
char *src = ++eq;
vm_op_t *pat = bpeg_stringpattern(src);
check(pat, "Failed to compile pattern");
add_def(g, src, def, pat);
} else if (FLAG("--escaped") || FLAG("-e")) {
check(npatterns == 0, "Cannot define multiple patterns");
vm_op_t *p = bpeg_pattern(flag);

View File

@ -372,8 +372,7 @@ vm_op_t *bpeg_simplepattern(const char *str)
const char *refname = str;
str = after_name(str);
op->op = VM_REF;
op->len = (ssize_t)(str - refname);
op->args.s = strndup(refname, (size_t)op->len);
op->args.s = strndup(refname, (size_t)(str - refname));
break;
} else {
free(op);
@ -382,6 +381,29 @@ vm_op_t *bpeg_simplepattern(const char *str)
}
}
op->end = str;
// Postfix operators:
postfix:
str = after_spaces(str);
if (strncmp(str, "==", 2) == 0) {
str += 2;
vm_op_t *first = op;
vm_op_t *second = bpeg_simplepattern(str);
check(second, "Expected pattern after '=='");
check(first->len == -1 || second->len == -1 || first->len == second->len,
"Two patterns cannot possibly match the same (different lengths: %ld != %ld)",
first->len, second->len);
op = calloc(sizeof(vm_op_t), 1);
op->op = VM_EQUAL;
op->start = str;
op->end = second->end;
op->len = (first->len == -1 || second->len == -1) ? -1 : first->len;
op->args.multiple.first = first;
op->args.multiple.second = second;
str = op->end;
goto postfix;
}
return op;
}

View File

@ -2,7 +2,7 @@
pattern = !(/); # Not defined by default
replacement = {!(/)=>}; # Not defined by default
replace-all = +&&@replacement &&$$;
find-all = {&&>matching-line=>} +(matching-line/non-matching-line);
find-all = {&&>matching-line=>} +(matching-line/non-matching-line) ?{!<\n => "\n"};
only-matches = +{&&@pattern=>'@1\n'};
matching-line = +&@pattern *. $ ?\n;
non-matching-line = {&&(\n/$$)=>};
@ -14,7 +14,8 @@ anglebraces = `< *(anglebraces / ~~`>) `>;
brackets = `[ *(brackets / ~~`]) `];
braces = `{ *(braces / ~~`}) `};
parens = `( *(parens / ~~`)) `);
id = (`a-z/`A-Z/`_) *(`a-z/`A-Z/`_/`0-9);
id = !<(`a-z/`A-Z/`_/`0-9) (`a-z/`A-Z/`_) *(`a-z/`A-Z/`_/`0-9);
word = !<(`a-z/`A-Z/`_/`0-9) +(`a-z/`A-Z) !>(`0-9/`_);
HEX = `0-9/`A-F;
Hex = `0-9/`a-f/`A-F;
hex = `0-9/`a-f;

View File

@ -3,16 +3,25 @@ HTML = __ ?(doctype __) *html-element%__ __;
doctype = "<!DOCTYPE" &`>;
html-element = void-element / template-element / raw-text-element / normal-element;
html-element = (
>(`<("area"/"base"/"br"/"col"/"embed"/"hr"/"img"/"input"/"link"/"meta"/"param"/"source"/"track"/"wbr")) void-element
/ >(`<("script"/"style"/"textarea"/"title")) raw-element
/ >(`<("template")) template-element
/ normal-element);
void-element = `< ("area"/"base"/"br"/"col"/"embed"/"hr"/"img"/"input"/"link"/"meta"/"param"/"source"/"track"/"wbr") *(__attribute) __ ?`/ __ `>;
void-element = `< @[tag](id==match-tag) __attributes__ `/? __ `>;
template-element = "<template" __`> __ *(~~`< / comment / html-element / ~~("</template"__`>)) ("</template"__`>);
template-element = `< @[tag](id==match-tag) __`> __ >match-body @[body]0+(~~`< / comment / html-element / ~~("</"tag__`>)) ("</"tag__`>);
raw-text-element = `<@[tag]("script"/"style"/"textarea"/"title") *(__attribute) __ `> &("</"tag__`>);
raw-element = `< @[tag](id==match-tag) __attributes__ `> >match-body @[body]*~~("</"tag__`>) ("</"tag__`>);
normal-element = !raw-text-element `<@[tag]id *(__attribute) __ `> *(~~`< / comment / html-element / ~~("</"tag__`>)) "</"tag__`>;
normal-element = `< @[tag](id==match-tag) __attributes__ `> >match-body @[body]*(~~`< / comment / html-element / ~~("</"tag__`>)) "</"tag__`>;
comment = "<!--" &&"-->";
attribute = +id%`:__`=__(id / `" &`" / `' &`');
attributes = *(!(attribute==match-attribute))%__ __(attribute==match-attribute)__ *attribute%__;
attribute = (+id%`:)__`=__ (id / `" &`" / `' &`');
attribute = (+id%`:)__`=__ (id / `" *~`" `" / `' *~`' `');
match-attribute = attribute;
match-tag = id;
match-body = (/);

View File

@ -23,6 +23,7 @@ enum VMOpcode {
VM_CAPTURE,
VM_OTHERWISE,
VM_CHAIN,
VM_EQUAL,
VM_REPLACE,
VM_REF,
VM_BACKREF,

25
vm.c
View File

@ -28,6 +28,7 @@ static const char *opcode_names[] = {
[VM_OTHERWISE] = "OTHERWISE",
[VM_CHAIN] = "CHAIN",
[VM_REPLACE] = "REPLACE",
[VM_EQUAL] = "EQUAL",
[VM_REF] = "REF",
[VM_BACKREF] = "BACKREF",
};
@ -253,6 +254,25 @@ static match_t *_match(grammar_t *g, const char *str, vm_op_t *op, recursive_ref
m1->nextsibling = m2;
return m;
}
case VM_EQUAL: {
match_t *m1 = _match(g, str, op->args.multiple.first, rec);
if (m1 == NULL) return NULL;
// <p1>==<p2> matches iff both have the same start and end point:
match_t *m2 = _match(g, str, op->args.multiple.second, rec);
if (m2 == NULL || m2->end != m1->end) {
destroy_match(&m1);
destroy_match(&m2);
return NULL;
}
match_t *m = calloc(sizeof(match_t), 1);
m->start = str;
m->end = m2->end;
m->op = op;
m->child = m1;
m1->nextsibling = m2;
return m;
}
case VM_REPLACE: {
match_t *m = calloc(sizeof(match_t), 1);
m->start = str;
@ -512,8 +532,8 @@ void print_match(match_t *m, const char *color, int verbose)
const char *prev = m->start;
for (match_t *child = m->child; child; child = child->nextsibling) {
// Skip children from e.g. zero-width matches like >@foo
if (!(m->start <= child->start && child->start <= m->end &&
m->start <= child->end && child->end <= m->end))
if (!(prev <= child->start && child->start <= m->end &&
prev <= child->end && child->end <= m->end))
continue;
if (child->start > prev)
printf("%s%.*s", color ? color : "", (int)(child->start - prev), prev);
@ -608,6 +628,7 @@ static match_t *match_backref(const char *str, vm_op_t *op, match_t *cap)
str += len;
prev = child->start;
}
if (child->start < prev) continue;
*dest = match_backref(str, op, child);
if (*dest == NULL) {
destroy_match(&ret);