Added == operator
This commit is contained in:
parent
ab5ef5a77a
commit
1d1c3d35aa
@ -4,10 +4,14 @@ Grammar = __ *Def%(__`;__) ?(`;__);
|
||||
Def = @[name]Ref __ `= __ @[definition]extended-pat;
|
||||
|
||||
# This is used for command line arguments:
|
||||
String-grammar = *(`\ pat ?`; / .);
|
||||
String-pattern = *(`\ pat ?`; / .);
|
||||
|
||||
pat = Empty / Dot / String / Char-range / Char / Escape-range / Escape / No / Anything-but
|
||||
pat = suffixed-pat / simple-pat;
|
||||
simple-pat = Empty / Dot / String / Char-range / Char / Escape-range / Escape / No / Anything-but
|
||||
/ Upto-and / Repeat / After / Before / Capture / Replace / Ref / parens;
|
||||
suffixed-pat = Eq-pat;
|
||||
|
||||
Eq-pat = @[first]simple-pat "==" @[second]pat;
|
||||
|
||||
Empty = `/ >(__ (`)/`}));
|
||||
Dot = `.;
|
||||
|
21
bpeg.c
21
bpeg.c
@ -24,7 +24,8 @@
|
||||
* ( <pat> ) <pat>
|
||||
* @ <pat> capture <pat>
|
||||
* @ [ <name> ] <pat> <pat> named <name>
|
||||
* { <pat> => <str> } <pat> replaced with <str>
|
||||
* { <pat> => <str> } <pat> replaced with <str>
|
||||
* <pat1> == <pat2> <pat1> iff <pat2> matches at the same spot for the same length
|
||||
* "@1" or "@[1]" first capture
|
||||
* "@foo" or "@[foo]" capture named "foo"
|
||||
* <pat1> <pat2> <pat1> followed by <pat2>
|
||||
@ -50,7 +51,8 @@ static const char *usage = (
|
||||
"Flags:\n"
|
||||
" -h --help\t print the usage and quit\n"
|
||||
" -v --verbose\t print verbose debugging info\n"
|
||||
" -d --define <name>=<def> define a grammar rule\n"
|
||||
" -d --define <name>=<def>\t define a grammar rule\n"
|
||||
" -D --define-string <name>=<def>\t define a grammar rule (string-pattern)\n"
|
||||
" -e --escaped <pat>\t provide an escaped pattern (equivalent to bpeg '\\(<pat>)')\n"
|
||||
" -s --string <pat>\t provide a string pattern (equivalent to bpeg '<pat>', but may be useful if '<pat>' begins with a '-')\n"
|
||||
" -r --replace <replacement> replace the input pattern with the given replacement\n"
|
||||
@ -83,8 +85,10 @@ static int run_match(grammar_t *g, const char *filename, vm_op_t *pattern, int v
|
||||
}
|
||||
match_t *m = match(g, input, pattern);
|
||||
if (m != NULL && m->end > m->start + 1) {
|
||||
if (isatty(STDOUT_FILENO)) printf("\033[1;4;33m%s\033[0m\n", filename);
|
||||
else printf("%s\n", filename);
|
||||
if (filename != NULL) {
|
||||
if (isatty(STDOUT_FILENO)) printf("\033[1;4;33m%s\033[0m\n", filename);
|
||||
else printf("%s\n", filename);
|
||||
}
|
||||
print_match(m, isatty(STDOUT_FILENO) ? "\033[0m" : NULL, verbose);
|
||||
freefile(input);
|
||||
return 0;
|
||||
@ -153,6 +157,15 @@ int main(int argc, char *argv[])
|
||||
vm_op_t *pat = bpeg_pattern(src);
|
||||
check(pat, "Failed to compile pattern");
|
||||
add_def(g, src, def, pat);
|
||||
} else if (FLAG("--define-string") || FLAG("-D")) {
|
||||
char *def = flag;
|
||||
char *eq = strchr(def, '=');
|
||||
check(eq, usage);
|
||||
*eq = '\0';
|
||||
char *src = ++eq;
|
||||
vm_op_t *pat = bpeg_stringpattern(src);
|
||||
check(pat, "Failed to compile pattern");
|
||||
add_def(g, src, def, pat);
|
||||
} else if (FLAG("--escaped") || FLAG("-e")) {
|
||||
check(npatterns == 0, "Cannot define multiple patterns");
|
||||
vm_op_t *p = bpeg_pattern(flag);
|
||||
|
26
compiler.c
26
compiler.c
@ -372,8 +372,7 @@ vm_op_t *bpeg_simplepattern(const char *str)
|
||||
const char *refname = str;
|
||||
str = after_name(str);
|
||||
op->op = VM_REF;
|
||||
op->len = (ssize_t)(str - refname);
|
||||
op->args.s = strndup(refname, (size_t)op->len);
|
||||
op->args.s = strndup(refname, (size_t)(str - refname));
|
||||
break;
|
||||
} else {
|
||||
free(op);
|
||||
@ -382,6 +381,29 @@ vm_op_t *bpeg_simplepattern(const char *str)
|
||||
}
|
||||
}
|
||||
op->end = str;
|
||||
|
||||
// Postfix operators:
|
||||
postfix:
|
||||
str = after_spaces(str);
|
||||
if (strncmp(str, "==", 2) == 0) {
|
||||
str += 2;
|
||||
vm_op_t *first = op;
|
||||
vm_op_t *second = bpeg_simplepattern(str);
|
||||
check(second, "Expected pattern after '=='");
|
||||
check(first->len == -1 || second->len == -1 || first->len == second->len,
|
||||
"Two patterns cannot possibly match the same (different lengths: %ld != %ld)",
|
||||
first->len, second->len);
|
||||
op = calloc(sizeof(vm_op_t), 1);
|
||||
op->op = VM_EQUAL;
|
||||
op->start = str;
|
||||
op->end = second->end;
|
||||
op->len = (first->len == -1 || second->len == -1) ? -1 : first->len;
|
||||
op->args.multiple.first = first;
|
||||
op->args.multiple.second = second;
|
||||
str = op->end;
|
||||
goto postfix;
|
||||
}
|
||||
|
||||
return op;
|
||||
}
|
||||
|
||||
|
@ -2,7 +2,7 @@
|
||||
pattern = !(/); # Not defined by default
|
||||
replacement = {!(/)=>}; # Not defined by default
|
||||
replace-all = +&&@replacement &&$$;
|
||||
find-all = {&&>matching-line=>} +(matching-line/non-matching-line);
|
||||
find-all = {&&>matching-line=>} +(matching-line/non-matching-line) ?{!<\n => "\n"};
|
||||
only-matches = +{&&@pattern=>'@1\n'};
|
||||
matching-line = +&@pattern *. $ ?\n;
|
||||
non-matching-line = {&&(\n/$$)=>};
|
||||
@ -14,7 +14,8 @@ anglebraces = `< *(anglebraces / ~~`>) `>;
|
||||
brackets = `[ *(brackets / ~~`]) `];
|
||||
braces = `{ *(braces / ~~`}) `};
|
||||
parens = `( *(parens / ~~`)) `);
|
||||
id = (`a-z/`A-Z/`_) *(`a-z/`A-Z/`_/`0-9);
|
||||
id = !<(`a-z/`A-Z/`_/`0-9) (`a-z/`A-Z/`_) *(`a-z/`A-Z/`_/`0-9);
|
||||
word = !<(`a-z/`A-Z/`_/`0-9) +(`a-z/`A-Z) !>(`0-9/`_);
|
||||
HEX = `0-9/`A-F;
|
||||
Hex = `0-9/`a-f/`A-F;
|
||||
hex = `0-9/`a-f;
|
||||
|
@ -3,16 +3,25 @@ HTML = __ ?(doctype __) *html-element%__ __;
|
||||
|
||||
doctype = "<!DOCTYPE" &`>;
|
||||
|
||||
html-element = void-element / template-element / raw-text-element / normal-element;
|
||||
html-element = (
|
||||
>(`<("area"/"base"/"br"/"col"/"embed"/"hr"/"img"/"input"/"link"/"meta"/"param"/"source"/"track"/"wbr")) void-element
|
||||
/ >(`<("script"/"style"/"textarea"/"title")) raw-element
|
||||
/ >(`<("template")) template-element
|
||||
/ normal-element);
|
||||
|
||||
void-element = `< ("area"/"base"/"br"/"col"/"embed"/"hr"/"img"/"input"/"link"/"meta"/"param"/"source"/"track"/"wbr") *(__attribute) __ ?`/ __ `>;
|
||||
void-element = `< @[tag](id==match-tag) __attributes__ `/? __ `>;
|
||||
|
||||
template-element = "<template" __`> __ *(~~`< / comment / html-element / ~~("</template"__`>)) ("</template"__`>);
|
||||
template-element = `< @[tag](id==match-tag) __`> __ >match-body @[body]0+(~~`< / comment / html-element / ~~("</"tag__`>)) ("</"tag__`>);
|
||||
|
||||
raw-text-element = `<@[tag]("script"/"style"/"textarea"/"title") *(__attribute) __ `> &("</"tag__`>);
|
||||
raw-element = `< @[tag](id==match-tag) __attributes__ `> >match-body @[body]*~~("</"tag__`>) ("</"tag__`>);
|
||||
|
||||
normal-element = !raw-text-element `<@[tag]id *(__attribute) __ `> *(~~`< / comment / html-element / ~~("</"tag__`>)) "</"tag__`>;
|
||||
normal-element = `< @[tag](id==match-tag) __attributes__ `> >match-body @[body]*(~~`< / comment / html-element / ~~("</"tag__`>)) "</"tag__`>;
|
||||
|
||||
comment = "<!--" &&"-->";
|
||||
|
||||
attribute = +id%`:__`=__(id / `" &`" / `' &`');
|
||||
attributes = *(!(attribute==match-attribute))%__ __(attribute==match-attribute)__ *attribute%__;
|
||||
attribute = (+id%`:)__`=__ (id / `" &`" / `' &`');
|
||||
attribute = (+id%`:)__`=__ (id / `" *~`" `" / `' *~`' `');
|
||||
match-attribute = attribute;
|
||||
match-tag = id;
|
||||
match-body = (/);
|
||||
|
1
types.h
1
types.h
@ -23,6 +23,7 @@ enum VMOpcode {
|
||||
VM_CAPTURE,
|
||||
VM_OTHERWISE,
|
||||
VM_CHAIN,
|
||||
VM_EQUAL,
|
||||
VM_REPLACE,
|
||||
VM_REF,
|
||||
VM_BACKREF,
|
||||
|
25
vm.c
25
vm.c
@ -28,6 +28,7 @@ static const char *opcode_names[] = {
|
||||
[VM_OTHERWISE] = "OTHERWISE",
|
||||
[VM_CHAIN] = "CHAIN",
|
||||
[VM_REPLACE] = "REPLACE",
|
||||
[VM_EQUAL] = "EQUAL",
|
||||
[VM_REF] = "REF",
|
||||
[VM_BACKREF] = "BACKREF",
|
||||
};
|
||||
@ -253,6 +254,25 @@ static match_t *_match(grammar_t *g, const char *str, vm_op_t *op, recursive_ref
|
||||
m1->nextsibling = m2;
|
||||
return m;
|
||||
}
|
||||
case VM_EQUAL: {
|
||||
match_t *m1 = _match(g, str, op->args.multiple.first, rec);
|
||||
if (m1 == NULL) return NULL;
|
||||
|
||||
// <p1>==<p2> matches iff both have the same start and end point:
|
||||
match_t *m2 = _match(g, str, op->args.multiple.second, rec);
|
||||
if (m2 == NULL || m2->end != m1->end) {
|
||||
destroy_match(&m1);
|
||||
destroy_match(&m2);
|
||||
return NULL;
|
||||
}
|
||||
match_t *m = calloc(sizeof(match_t), 1);
|
||||
m->start = str;
|
||||
m->end = m2->end;
|
||||
m->op = op;
|
||||
m->child = m1;
|
||||
m1->nextsibling = m2;
|
||||
return m;
|
||||
}
|
||||
case VM_REPLACE: {
|
||||
match_t *m = calloc(sizeof(match_t), 1);
|
||||
m->start = str;
|
||||
@ -512,8 +532,8 @@ void print_match(match_t *m, const char *color, int verbose)
|
||||
const char *prev = m->start;
|
||||
for (match_t *child = m->child; child; child = child->nextsibling) {
|
||||
// Skip children from e.g. zero-width matches like >@foo
|
||||
if (!(m->start <= child->start && child->start <= m->end &&
|
||||
m->start <= child->end && child->end <= m->end))
|
||||
if (!(prev <= child->start && child->start <= m->end &&
|
||||
prev <= child->end && child->end <= m->end))
|
||||
continue;
|
||||
if (child->start > prev)
|
||||
printf("%s%.*s", color ? color : "", (int)(child->start - prev), prev);
|
||||
@ -608,6 +628,7 @@ static match_t *match_backref(const char *str, vm_op_t *op, match_t *cap)
|
||||
str += len;
|
||||
prev = child->start;
|
||||
}
|
||||
if (child->start < prev) continue;
|
||||
*dest = match_backref(str, op, child);
|
||||
if (*dest == NULL) {
|
||||
destroy_match(&ret);
|
||||
|
Loading…
Reference in New Issue
Block a user