Consolidated repetition ops (instead of '+', '*', '?', etc. now it's all

number based: '1+', '0+', '0-1') and reverted to UPTO_AND behavior
instead of UPTO
This commit is contained in:
Bruce Hill 2020-09-16 17:57:56 -07:00
parent 79efa8bf5e
commit 21807a663d
6 changed files with 50 additions and 75 deletions

View File

@ -101,10 +101,11 @@ vm_op_t *bpeg_simplepattern(const char *str)
if (matchchar(&str, '.')) // "..."
op->multiline = 1;
vm_op_t *till = bpeg_simplepattern(str);
// Don't advance str, the following pattern will be re-matched.
op->op = VM_UPTO;
op->op = VM_UPTO_AND;
op->len = -1;
op->args.pat = till;
if (till)
str = till->end;
break;
} else {
anychar:
@ -205,38 +206,15 @@ vm_op_t *bpeg_simplepattern(const char *str)
check(pat, "Expected pattern after repetition count");
str = pat->end;
str = after_spaces(str);
vm_op_t *sep = NULL;
if (matchchar(&str, '%')) {
vm_op_t *sep = bpeg_simplepattern(str);
sep = bpeg_simplepattern(str);
check(sep, "Expected pattern for separator after '%%'");
str = sep->end;
set_range(op, min, max, pat, sep);
} else {
str = pat->end;
set_range(op, min, max, pat, NULL);
}
break;
}
// Special repetitions:
case '+': case '*': case '?': {
ssize_t min = -1, max = -1;
switch (c) {
case '+': min = 1, max = -1; break;
case '*': min = 0, max = -1; break;
case '?': min = 0, max = 1; break;
}
vm_op_t *pat = bpeg_simplepattern(str);
check(pat, "Expected pattern after '%c'", c);
str = pat->end;
str = after_spaces(str);
if (matchchar(&str, '%')) {
vm_op_t *sep = bpeg_simplepattern(str);
check(sep, "Expected pattern for separator after '%%'");
str = sep->end;
set_range(op, min, max, pat, sep);
} else {
str = pat->end;
set_range(op, min, max, pat, NULL);
}
set_range(op, min, max, pat, sep);
break;
}
// Lookbehind
@ -269,7 +247,7 @@ vm_op_t *bpeg_simplepattern(const char *str)
op = expand_choices(op);
str = op->end;
str = after_spaces(str);
check(matchchar(&str, ')'), "Expected closing parenthesis");
check(matchchar(&str, ')'), "Expected closing ')' instead of \"%s\"", str);
break;
}
// Capture

View File

@ -1,13 +1,13 @@
# This is a file defining the BPEG grammar using BPEG syntax
Grammar = __ *Def%(__`;__) ?(`;__);
Grammar = __ 0+Def%(__`;__) 0-1(`;__);
Def = @[name]Ref __ `= __ @[definition]extended-pat;
# This is used for command line arguments:
String-pattern = *(`\ pat ?`; / .);
String-pattern = 0+(`\ pat 0-1`; / .);
pat = suffixed-pat / simple-pat;
simple-pat = Empty / Upto / Dot / String / Char-range / Char / Escape-range / Escape / No
simple-pat = Empty / Upto-and / Dot / String / Char-range / Char / Escape-range / Escape / No
/ Nodent / Repeat / After / Before / Capture / Replace / Ref / parens;
suffixed-pat = Eq-pat;
@ -16,8 +16,8 @@ Eq-pat = @[first]simple-pat "==" @[second]pat;
Empty = `/ >(__ (`)/`}));
Dot = `. !`.;
String = (
`" @[s]*(Escape / !`"$.) `"
/ `' @[s]*(Escape / !`'$.) `'
`" @[s]0+(Escape / !`"$.) `"
/ `' @[s]0+(Escape / !`'$.) `'
);
Char-range = `` @[low]. `- @[high].;
Char = `` @[s].;
@ -30,35 +30,31 @@ escape-sequence = (
);
No = `! _ @pat;
Nodent = `|;
Upto = 2-3`. ?>(_@pat);
Upto-and = 2-3`. 0-1(_@pat);
Repeat = (
@[min]int _ `- _ @[max]int
/{@[min]{=>"0"}=>} @[max]int _ `-
/ @[min]int _ `+ @[max](/)
/ @[min]@[max]int
/{@[min]{=>"1"}=>} `+ @[max](/)
/{@[min]{=>"0"}=>} `* @[max](/)
/{@[min]{=>"0"}=>} `? {@[max]{=>"1"}=>}
) _ @[repeat-pat]pat ?( __ `% __ @[sep]pat);
) _ @[repeat-pat]pat 0-1( __ `% __ @[sep]pat);
After = `< _ pat;
Before = `> _ pat;
Capture = `@ ?(_ `[ @[capture-name]Ref `]) _ @[capture]pat;
Capture = `@ 0-1(_ `[ @[capture-name]Ref `]) _ @[capture]pat;
Replace = `{ __ (
?(@[replace-pat]extended-pat __) "=>" ?(__ @[replacement]String)
0-1(@[replace-pat]extended-pat __) "=>" 0-1(__ @[replacement]String)
) __ `};
Ref = @[name](
"^^" / "^" / "__" / "_" / "$$" / "$" /
(`a-z/`A-Z) *(`a-z/`A-Z/`0-9/`-));
(`a-z/`A-Z) 0+(`a-z/`A-Z/`0-9/`-));
parens = `( __ extended-pat __ `);
Chain = +@pat % (__);
Otherwise = +@(Chain/pat) % (__`/__);
Chain = 2+@pat % (__);
Otherwise = 2+@(Chain/pat) % (__`/__);
extended-pat = Otherwise / Chain / pat;
# Special-symbol rules:
_ = *(` / \t);
__ = *(` / \t / \r / \n / comment);
_ = 0+(` / \t);
__ = 0+(` / \t / \r / \n / comment);
$$ = !$.;
$ = !.;
^^ = !<$.;

View File

@ -1,29 +1,29 @@
# Meta-rules for acting on everything
pattern = !(/); # Not defined by default
replacement = !(/); # Not defined by default
replace-all = +(...@replacement) ...;
find-all = +find-next%\n ?{!<\n => "\n"};
replace-all = 1+(...@replacement) ...;
find-all = 1+find-next%\n 0-1{!<\n => "\n"};
find-next = matching-line / {..\n =>} find-next;
only-matches = +{...@pattern=>'@1\n'};
matching-line = +(..@pattern) ..$;
only-matches = 1+{...@pattern=>'@1\n'};
matching-line = 1+(..@pattern) ..$;
# Helper definitions (commonly used)
indent = \n|+(\t/' ');
indent = \n|1+(\t/' ');
dedent = $ !(\n|);
indented-block = |` ..$ *(\n|..$);
indented-block = |` ..$ 0+(\n|..$);
crlf = \r\n;
cr = \r; r = \r;
anglebraces = `< *(anglebraces / !`>.) `>;
brackets = `[ *(brackets / !`].) `];
braces = `{ *(braces / !`}.) `};
parens = `( *(parens / !`).) `);
id = !<(`a-z/`A-Z/`_/`0-9) (`a-z/`A-Z/`_) *(`a-z/`A-Z/`_/`0-9);
word = !<(`a-z/`A-Z/`_/`0-9) +(`a-z/`A-Z) !>(`0-9/`_);
anglebraces = `< 0+(anglebraces / !`>.) `>;
brackets = `[ 0+(brackets / !`].) `];
braces = `{ 0+(braces / !`}.) `};
parens = `( 0+(parens / !`).) `);
id = !<(`a-z/`A-Z/`_/`0-9) (`a-z/`A-Z/`_) 0+(`a-z/`A-Z/`_/`0-9);
word = !<(`a-z/`A-Z/`_/`0-9) 1+(`a-z/`A-Z) !>(`0-9/`_);
HEX = `0-9/`A-F;
Hex = `0-9/`a-f/`A-F;
hex = `0-9/`a-f;
number = +`0-9 ?(`. *`0-9) / `. +`0-9;
int = +`0-9;
number = 1+`0-9 0-1(`. 0+`0-9) / `. 1+`0-9;
int = 1+`0-9;
digit = `0-9;
Abc = `a-z/`A-Z;
ABC = `A-Z;
@ -42,5 +42,5 @@ $$ = !$.;
$ = !.;
^^ = !<$.;
^ = !<.;
__ = *(` /\t/\n/\r/comment);
_ = *(` /\t);
__ = 0+(` /\t/\n/\r/comment);
_ = 0+(` /\t);

View File

@ -1,5 +1,5 @@
# HTML grammar
HTML = __ ?(doctype __) *html-element%__ __;
HTML = __ 0-1(doctype __) 0+html-element%__ __;
doctype = "<!DOCTYPE" ..`>;
@ -9,19 +9,19 @@ html-element = (
/ >(`<("template")) template-element
/ normal-element);
void-element = `< @[tag](id==match-tag) __attributes__ ?`/ __ `>;
void-element = `< @[tag](id==match-tag) __attributes__ 0-1`/ __ `>;
template-element = `< @[tag](id==match-tag) __`> __ >match-body @[body]0+(!`<$. / comment / html-element / !("</"tag__`>)$.) ("</"tag__`>);
raw-element = `< @[tag](id==match-tag) __attributes__ `> >match-body @[body].. ("</"tag__`>);
normal-element = `< @[tag](id==match-tag) __attributes__ `> >match-body @[body]*(!`<$. / comment / html-element / !("</"tag__`>)$.) "</"tag__`>;
normal-element = `< @[tag](id==match-tag) __attributes__ `> >match-body @[body]0+(!`<$. / comment / html-element / !("</"tag__`>)$.) "</"tag__`>;
comment = "<!--" ..."-->";
attributes = *(!(attribute==match-attribute))%__ __(attribute==match-attribute)__ *attribute%__;
attribute = (+id%`:)__`=__ (id / `" ..`" / `' ..`');
attribute = (+id%`:)__`=__ (id / `" ..`" / `' ..`');
attributes = 0+(!(attribute==match-attribute))%__ __(attribute==match-attribute)__ 0+attribute%__;
attribute = (1+id%`:)__`=__ (id / `" ..`" / `' ..`');
attribute = (1+id%`:)__`=__ (id / `" ..`" / `' ..`');
match-attribute = attribute;
match-tag = id;
match-body = (/);

View File

@ -20,7 +20,7 @@ enum VMOpcode {
VM_STRING,
VM_RANGE,
VM_NOT,
VM_UPTO,
VM_UPTO_AND,
VM_REPEAT,
VM_BEFORE,
VM_AFTER,

11
vm.c
View File

@ -19,7 +19,7 @@ static const char *opcode_names[] = {
[VM_STRING] = "STRING",
[VM_RANGE] = "RANGE",
[VM_NOT] = "NOT",
[VM_UPTO] = "UPTO",
[VM_UPTO_AND] = "UPTO_AND",
[VM_REPEAT] = "REPEAT",
[VM_BEFORE] = "BEFORE",
[VM_AFTER] = "AFTER",
@ -127,7 +127,7 @@ static match_t *_match(grammar_t *g, const char *str, vm_op_t *op, unsigned int
m->end = str;
return m;
}
case VM_UPTO: {
case VM_UPTO_AND: {
match_t *m = calloc(sizeof(match_t), 1);
m->start = str;
m->op = op;
@ -136,7 +136,8 @@ static match_t *_match(grammar_t *g, const char *str, vm_op_t *op, unsigned int
prev = str;
match_t *p = _match(g, str, op->args.pat, flags, rec);
if (p) {
destroy_match(&p);
m->child = p;
str = p->end;
break;
}
// This isn't in the for() structure because there needs to
@ -410,8 +411,8 @@ void print_pattern(vm_op_t *op)
fprintf(stderr, ")");
break;
}
case VM_UPTO: {
fprintf(stderr, "text up to (");
case VM_UPTO_AND: {
fprintf(stderr, "text up to and including (");
print_pattern(op->args.pat);
fprintf(stderr, ")");
break;