Consolidated repetition ops (instead of '+', '*', '?', etc. now it's all
number based: '1+', '0+', '0-1') and reverted to UPTO_AND behavior instead of UPTO
This commit is contained in:
parent
79efa8bf5e
commit
21807a663d
36
compiler.c
36
compiler.c
@ -101,10 +101,11 @@ vm_op_t *bpeg_simplepattern(const char *str)
|
||||
if (matchchar(&str, '.')) // "..."
|
||||
op->multiline = 1;
|
||||
vm_op_t *till = bpeg_simplepattern(str);
|
||||
// Don't advance str, the following pattern will be re-matched.
|
||||
op->op = VM_UPTO;
|
||||
op->op = VM_UPTO_AND;
|
||||
op->len = -1;
|
||||
op->args.pat = till;
|
||||
if (till)
|
||||
str = till->end;
|
||||
break;
|
||||
} else {
|
||||
anychar:
|
||||
@ -205,38 +206,15 @@ vm_op_t *bpeg_simplepattern(const char *str)
|
||||
check(pat, "Expected pattern after repetition count");
|
||||
str = pat->end;
|
||||
str = after_spaces(str);
|
||||
vm_op_t *sep = NULL;
|
||||
if (matchchar(&str, '%')) {
|
||||
vm_op_t *sep = bpeg_simplepattern(str);
|
||||
sep = bpeg_simplepattern(str);
|
||||
check(sep, "Expected pattern for separator after '%%'");
|
||||
str = sep->end;
|
||||
set_range(op, min, max, pat, sep);
|
||||
} else {
|
||||
str = pat->end;
|
||||
set_range(op, min, max, pat, NULL);
|
||||
}
|
||||
break;
|
||||
}
|
||||
// Special repetitions:
|
||||
case '+': case '*': case '?': {
|
||||
ssize_t min = -1, max = -1;
|
||||
switch (c) {
|
||||
case '+': min = 1, max = -1; break;
|
||||
case '*': min = 0, max = -1; break;
|
||||
case '?': min = 0, max = 1; break;
|
||||
}
|
||||
vm_op_t *pat = bpeg_simplepattern(str);
|
||||
check(pat, "Expected pattern after '%c'", c);
|
||||
str = pat->end;
|
||||
str = after_spaces(str);
|
||||
if (matchchar(&str, '%')) {
|
||||
vm_op_t *sep = bpeg_simplepattern(str);
|
||||
check(sep, "Expected pattern for separator after '%%'");
|
||||
str = sep->end;
|
||||
set_range(op, min, max, pat, sep);
|
||||
} else {
|
||||
str = pat->end;
|
||||
set_range(op, min, max, pat, NULL);
|
||||
}
|
||||
set_range(op, min, max, pat, sep);
|
||||
break;
|
||||
}
|
||||
// Lookbehind
|
||||
@ -269,7 +247,7 @@ vm_op_t *bpeg_simplepattern(const char *str)
|
||||
op = expand_choices(op);
|
||||
str = op->end;
|
||||
str = after_spaces(str);
|
||||
check(matchchar(&str, ')'), "Expected closing parenthesis");
|
||||
check(matchchar(&str, ')'), "Expected closing ')' instead of \"%s\"", str);
|
||||
break;
|
||||
}
|
||||
// Capture
|
||||
|
@ -1,13 +1,13 @@
|
||||
# This is a file defining the BPEG grammar using BPEG syntax
|
||||
|
||||
Grammar = __ *Def%(__`;__) ?(`;__);
|
||||
Grammar = __ 0+Def%(__`;__) 0-1(`;__);
|
||||
Def = @[name]Ref __ `= __ @[definition]extended-pat;
|
||||
|
||||
# This is used for command line arguments:
|
||||
String-pattern = *(`\ pat ?`; / .);
|
||||
String-pattern = 0+(`\ pat 0-1`; / .);
|
||||
|
||||
pat = suffixed-pat / simple-pat;
|
||||
simple-pat = Empty / Upto / Dot / String / Char-range / Char / Escape-range / Escape / No
|
||||
simple-pat = Empty / Upto-and / Dot / String / Char-range / Char / Escape-range / Escape / No
|
||||
/ Nodent / Repeat / After / Before / Capture / Replace / Ref / parens;
|
||||
suffixed-pat = Eq-pat;
|
||||
|
||||
@ -16,8 +16,8 @@ Eq-pat = @[first]simple-pat "==" @[second]pat;
|
||||
Empty = `/ >(__ (`)/`}));
|
||||
Dot = `. !`.;
|
||||
String = (
|
||||
`" @[s]*(Escape / !`"$.) `"
|
||||
/ `' @[s]*(Escape / !`'$.) `'
|
||||
`" @[s]0+(Escape / !`"$.) `"
|
||||
/ `' @[s]0+(Escape / !`'$.) `'
|
||||
);
|
||||
Char-range = `` @[low]. `- @[high].;
|
||||
Char = `` @[s].;
|
||||
@ -30,35 +30,31 @@ escape-sequence = (
|
||||
);
|
||||
No = `! _ @pat;
|
||||
Nodent = `|;
|
||||
Upto = 2-3`. ?>(_@pat);
|
||||
Upto-and = 2-3`. 0-1(_@pat);
|
||||
Repeat = (
|
||||
@[min]int _ `- _ @[max]int
|
||||
/{@[min]{=>"0"}=>} @[max]int _ `-
|
||||
/ @[min]int _ `+ @[max](/)
|
||||
/ @[min]@[max]int
|
||||
/{@[min]{=>"1"}=>} `+ @[max](/)
|
||||
/{@[min]{=>"0"}=>} `* @[max](/)
|
||||
/{@[min]{=>"0"}=>} `? {@[max]{=>"1"}=>}
|
||||
) _ @[repeat-pat]pat ?( __ `% __ @[sep]pat);
|
||||
) _ @[repeat-pat]pat 0-1( __ `% __ @[sep]pat);
|
||||
After = `< _ pat;
|
||||
Before = `> _ pat;
|
||||
Capture = `@ ?(_ `[ @[capture-name]Ref `]) _ @[capture]pat;
|
||||
Capture = `@ 0-1(_ `[ @[capture-name]Ref `]) _ @[capture]pat;
|
||||
Replace = `{ __ (
|
||||
?(@[replace-pat]extended-pat __) "=>" ?(__ @[replacement]String)
|
||||
0-1(@[replace-pat]extended-pat __) "=>" 0-1(__ @[replacement]String)
|
||||
) __ `};
|
||||
Ref = @[name](
|
||||
"^^" / "^" / "__" / "_" / "$$" / "$" /
|
||||
(`a-z/`A-Z) *(`a-z/`A-Z/`0-9/`-));
|
||||
(`a-z/`A-Z) 0+(`a-z/`A-Z/`0-9/`-));
|
||||
|
||||
parens = `( __ extended-pat __ `);
|
||||
|
||||
Chain = +@pat % (__);
|
||||
Otherwise = +@(Chain/pat) % (__`/__);
|
||||
Chain = 2+@pat % (__);
|
||||
Otherwise = 2+@(Chain/pat) % (__`/__);
|
||||
extended-pat = Otherwise / Chain / pat;
|
||||
|
||||
# Special-symbol rules:
|
||||
_ = *(` / \t);
|
||||
__ = *(` / \t / \r / \n / comment);
|
||||
_ = 0+(` / \t);
|
||||
__ = 0+(` / \t / \r / \n / comment);
|
||||
$$ = !$.;
|
||||
$ = !.;
|
||||
^^ = !<$.;
|
@ -1,29 +1,29 @@
|
||||
# Meta-rules for acting on everything
|
||||
pattern = !(/); # Not defined by default
|
||||
replacement = !(/); # Not defined by default
|
||||
replace-all = +(...@replacement) ...;
|
||||
find-all = +find-next%\n ?{!<\n => "\n"};
|
||||
replace-all = 1+(...@replacement) ...;
|
||||
find-all = 1+find-next%\n 0-1{!<\n => "\n"};
|
||||
find-next = matching-line / {..\n =>} find-next;
|
||||
only-matches = +{...@pattern=>'@1\n'};
|
||||
matching-line = +(..@pattern) ..$;
|
||||
only-matches = 1+{...@pattern=>'@1\n'};
|
||||
matching-line = 1+(..@pattern) ..$;
|
||||
|
||||
# Helper definitions (commonly used)
|
||||
indent = \n|+(\t/' ');
|
||||
indent = \n|1+(\t/' ');
|
||||
dedent = $ !(\n|);
|
||||
indented-block = |` ..$ *(\n|..$);
|
||||
indented-block = |` ..$ 0+(\n|..$);
|
||||
crlf = \r\n;
|
||||
cr = \r; r = \r;
|
||||
anglebraces = `< *(anglebraces / !`>.) `>;
|
||||
brackets = `[ *(brackets / !`].) `];
|
||||
braces = `{ *(braces / !`}.) `};
|
||||
parens = `( *(parens / !`).) `);
|
||||
id = !<(`a-z/`A-Z/`_/`0-9) (`a-z/`A-Z/`_) *(`a-z/`A-Z/`_/`0-9);
|
||||
word = !<(`a-z/`A-Z/`_/`0-9) +(`a-z/`A-Z) !>(`0-9/`_);
|
||||
anglebraces = `< 0+(anglebraces / !`>.) `>;
|
||||
brackets = `[ 0+(brackets / !`].) `];
|
||||
braces = `{ 0+(braces / !`}.) `};
|
||||
parens = `( 0+(parens / !`).) `);
|
||||
id = !<(`a-z/`A-Z/`_/`0-9) (`a-z/`A-Z/`_) 0+(`a-z/`A-Z/`_/`0-9);
|
||||
word = !<(`a-z/`A-Z/`_/`0-9) 1+(`a-z/`A-Z) !>(`0-9/`_);
|
||||
HEX = `0-9/`A-F;
|
||||
Hex = `0-9/`a-f/`A-F;
|
||||
hex = `0-9/`a-f;
|
||||
number = +`0-9 ?(`. *`0-9) / `. +`0-9;
|
||||
int = +`0-9;
|
||||
number = 1+`0-9 0-1(`. 0+`0-9) / `. 1+`0-9;
|
||||
int = 1+`0-9;
|
||||
digit = `0-9;
|
||||
Abc = `a-z/`A-Z;
|
||||
ABC = `A-Z;
|
||||
@ -42,5 +42,5 @@ $$ = !$.;
|
||||
$ = !.;
|
||||
^^ = !<$.;
|
||||
^ = !<.;
|
||||
__ = *(` /\t/\n/\r/comment);
|
||||
_ = *(` /\t);
|
||||
__ = 0+(` /\t/\n/\r/comment);
|
||||
_ = 0+(` /\t);
|
||||
|
@ -1,5 +1,5 @@
|
||||
# HTML grammar
|
||||
HTML = __ ?(doctype __) *html-element%__ __;
|
||||
HTML = __ 0-1(doctype __) 0+html-element%__ __;
|
||||
|
||||
doctype = "<!DOCTYPE" ..`>;
|
||||
|
||||
@ -9,19 +9,19 @@ html-element = (
|
||||
/ >(`<("template")) template-element
|
||||
/ normal-element);
|
||||
|
||||
void-element = `< @[tag](id==match-tag) __attributes__ ?`/ __ `>;
|
||||
void-element = `< @[tag](id==match-tag) __attributes__ 0-1`/ __ `>;
|
||||
|
||||
template-element = `< @[tag](id==match-tag) __`> __ >match-body @[body]0+(!`<$. / comment / html-element / !("</"tag__`>)$.) ("</"tag__`>);
|
||||
|
||||
raw-element = `< @[tag](id==match-tag) __attributes__ `> >match-body @[body].. ("</"tag__`>);
|
||||
|
||||
normal-element = `< @[tag](id==match-tag) __attributes__ `> >match-body @[body]*(!`<$. / comment / html-element / !("</"tag__`>)$.) "</"tag__`>;
|
||||
normal-element = `< @[tag](id==match-tag) __attributes__ `> >match-body @[body]0+(!`<$. / comment / html-element / !("</"tag__`>)$.) "</"tag__`>;
|
||||
|
||||
comment = "<!--" ..."-->";
|
||||
|
||||
attributes = *(!(attribute==match-attribute))%__ __(attribute==match-attribute)__ *attribute%__;
|
||||
attribute = (+id%`:)__`=__ (id / `" ..`" / `' ..`');
|
||||
attribute = (+id%`:)__`=__ (id / `" ..`" / `' ..`');
|
||||
attributes = 0+(!(attribute==match-attribute))%__ __(attribute==match-attribute)__ 0+attribute%__;
|
||||
attribute = (1+id%`:)__`=__ (id / `" ..`" / `' ..`');
|
||||
attribute = (1+id%`:)__`=__ (id / `" ..`" / `' ..`');
|
||||
match-attribute = attribute;
|
||||
match-tag = id;
|
||||
match-body = (/);
|
||||
|
2
types.h
2
types.h
@ -20,7 +20,7 @@ enum VMOpcode {
|
||||
VM_STRING,
|
||||
VM_RANGE,
|
||||
VM_NOT,
|
||||
VM_UPTO,
|
||||
VM_UPTO_AND,
|
||||
VM_REPEAT,
|
||||
VM_BEFORE,
|
||||
VM_AFTER,
|
||||
|
11
vm.c
11
vm.c
@ -19,7 +19,7 @@ static const char *opcode_names[] = {
|
||||
[VM_STRING] = "STRING",
|
||||
[VM_RANGE] = "RANGE",
|
||||
[VM_NOT] = "NOT",
|
||||
[VM_UPTO] = "UPTO",
|
||||
[VM_UPTO_AND] = "UPTO_AND",
|
||||
[VM_REPEAT] = "REPEAT",
|
||||
[VM_BEFORE] = "BEFORE",
|
||||
[VM_AFTER] = "AFTER",
|
||||
@ -127,7 +127,7 @@ static match_t *_match(grammar_t *g, const char *str, vm_op_t *op, unsigned int
|
||||
m->end = str;
|
||||
return m;
|
||||
}
|
||||
case VM_UPTO: {
|
||||
case VM_UPTO_AND: {
|
||||
match_t *m = calloc(sizeof(match_t), 1);
|
||||
m->start = str;
|
||||
m->op = op;
|
||||
@ -136,7 +136,8 @@ static match_t *_match(grammar_t *g, const char *str, vm_op_t *op, unsigned int
|
||||
prev = str;
|
||||
match_t *p = _match(g, str, op->args.pat, flags, rec);
|
||||
if (p) {
|
||||
destroy_match(&p);
|
||||
m->child = p;
|
||||
str = p->end;
|
||||
break;
|
||||
}
|
||||
// This isn't in the for() structure because there needs to
|
||||
@ -410,8 +411,8 @@ void print_pattern(vm_op_t *op)
|
||||
fprintf(stderr, ")");
|
||||
break;
|
||||
}
|
||||
case VM_UPTO: {
|
||||
fprintf(stderr, "text up to (");
|
||||
case VM_UPTO_AND: {
|
||||
fprintf(stderr, "text up to and including (");
|
||||
print_pattern(op->args.pat);
|
||||
fprintf(stderr, ")");
|
||||
break;
|
||||
|
Loading…
Reference in New Issue
Block a user