aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBruce Hill <bruce@bruce-hill.com>2020-09-16 17:57:56 -0700
committerBruce Hill <bruce@bruce-hill.com>2020-09-16 17:57:56 -0700
commit21807a663d0ab1fc934e1bb3ad485fe1c3e9c821 (patch)
tree618f998a8073b0adce37cb0947718945dedf775d
parent79efa8bf5efed69fafc558968d51da4dbdd9cfd1 (diff)
Consolidated repetition ops (instead of '+', '*', '?', etc. now it's all
number based: '1+', '0+', '0-1') and reverted to UPTO_AND behavior instead of UPTO
-rw-r--r--compiler.c36
-rw-r--r--grammars/bpeg.bpeg (renamed from bpeg.bpeg)32
-rw-r--r--grammars/builtins.bpeg32
-rw-r--r--grammars/html.bpeg12
-rw-r--r--types.h2
-rw-r--r--vm.c11
6 files changed, 50 insertions, 75 deletions
diff --git a/compiler.c b/compiler.c
index 1c0a817..b27717b 100644
--- a/compiler.c
+++ b/compiler.c
@@ -101,10 +101,11 @@ vm_op_t *bpeg_simplepattern(const char *str)
if (matchchar(&str, '.')) // "..."
op->multiline = 1;
vm_op_t *till = bpeg_simplepattern(str);
- // Don't advance str, the following pattern will be re-matched.
- op->op = VM_UPTO;
+ op->op = VM_UPTO_AND;
op->len = -1;
op->args.pat = till;
+ if (till)
+ str = till->end;
break;
} else {
anychar:
@@ -205,38 +206,15 @@ vm_op_t *bpeg_simplepattern(const char *str)
check(pat, "Expected pattern after repetition count");
str = pat->end;
str = after_spaces(str);
+ vm_op_t *sep = NULL;
if (matchchar(&str, '%')) {
- vm_op_t *sep = bpeg_simplepattern(str);
+ sep = bpeg_simplepattern(str);
check(sep, "Expected pattern for separator after '%%'");
str = sep->end;
- set_range(op, min, max, pat, sep);
} else {
str = pat->end;
- set_range(op, min, max, pat, NULL);
- }
- break;
- }
- // Special repetitions:
- case '+': case '*': case '?': {
- ssize_t min = -1, max = -1;
- switch (c) {
- case '+': min = 1, max = -1; break;
- case '*': min = 0, max = -1; break;
- case '?': min = 0, max = 1; break;
- }
- vm_op_t *pat = bpeg_simplepattern(str);
- check(pat, "Expected pattern after '%c'", c);
- str = pat->end;
- str = after_spaces(str);
- if (matchchar(&str, '%')) {
- vm_op_t *sep = bpeg_simplepattern(str);
- check(sep, "Expected pattern for separator after '%%'");
- str = sep->end;
- set_range(op, min, max, pat, sep);
- } else {
- str = pat->end;
- set_range(op, min, max, pat, NULL);
}
+ set_range(op, min, max, pat, sep);
break;
}
// Lookbehind
@@ -269,7 +247,7 @@ vm_op_t *bpeg_simplepattern(const char *str)
op = expand_choices(op);
str = op->end;
str = after_spaces(str);
- check(matchchar(&str, ')'), "Expected closing parenthesis");
+ check(matchchar(&str, ')'), "Expected closing ')' instead of \"%s\"", str);
break;
}
// Capture
diff --git a/bpeg.bpeg b/grammars/bpeg.bpeg
index de0c676..dd3a3b4 100644
--- a/bpeg.bpeg
+++ b/grammars/bpeg.bpeg
@@ -1,13 +1,13 @@
# This is a file defining the BPEG grammar using BPEG syntax
-Grammar = __ *Def%(__`;__) ?(`;__);
+Grammar = __ 0+Def%(__`;__) 0-1(`;__);
Def = @[name]Ref __ `= __ @[definition]extended-pat;
# This is used for command line arguments:
-String-pattern = *(`\ pat ?`; / .);
+String-pattern = 0+(`\ pat 0-1`; / .);
pat = suffixed-pat / simple-pat;
-simple-pat = Empty / Upto / Dot / String / Char-range / Char / Escape-range / Escape / No
+simple-pat = Empty / Upto-and / Dot / String / Char-range / Char / Escape-range / Escape / No
/ Nodent / Repeat / After / Before / Capture / Replace / Ref / parens;
suffixed-pat = Eq-pat;
@@ -16,8 +16,8 @@ Eq-pat = @[first]simple-pat "==" @[second]pat;
Empty = `/ >(__ (`)/`}));
Dot = `. !`.;
String = (
- `" @[s]*(Escape / !`"$.) `"
- / `' @[s]*(Escape / !`'$.) `'
+ `" @[s]0+(Escape / !`"$.) `"
+ / `' @[s]0+(Escape / !`'$.) `'
);
Char-range = `` @[low]. `- @[high].;
Char = `` @[s].;
@@ -30,35 +30,31 @@ escape-sequence = (
);
No = `! _ @pat;
Nodent = `|;
-Upto = 2-3`. ?>(_@pat);
+Upto-and = 2-3`. 0-1(_@pat);
Repeat = (
@[min]int _ `- _ @[max]int
- /{@[min]{=>"0"}=>} @[max]int _ `-
/ @[min]int _ `+ @[max](/)
/ @[min]@[max]int
- /{@[min]{=>"1"}=>} `+ @[max](/)
- /{@[min]{=>"0"}=>} `* @[max](/)
- /{@[min]{=>"0"}=>} `? {@[max]{=>"1"}=>}
- ) _ @[repeat-pat]pat ?( __ `% __ @[sep]pat);
+ ) _ @[repeat-pat]pat 0-1( __ `% __ @[sep]pat);
After = `< _ pat;
Before = `> _ pat;
-Capture = `@ ?(_ `[ @[capture-name]Ref `]) _ @[capture]pat;
+Capture = `@ 0-1(_ `[ @[capture-name]Ref `]) _ @[capture]pat;
Replace = `{ __ (
- ?(@[replace-pat]extended-pat __) "=>" ?(__ @[replacement]String)
+ 0-1(@[replace-pat]extended-pat __) "=>" 0-1(__ @[replacement]String)
) __ `};
Ref = @[name](
"^^" / "^" / "__" / "_" / "$$" / "$" /
- (`a-z/`A-Z) *(`a-z/`A-Z/`0-9/`-));
+ (`a-z/`A-Z) 0+(`a-z/`A-Z/`0-9/`-));
parens = `( __ extended-pat __ `);
-Chain = +@pat % (__);
-Otherwise = +@(Chain/pat) % (__`/__);
+Chain = 2+@pat % (__);
+Otherwise = 2+@(Chain/pat) % (__`/__);
extended-pat = Otherwise / Chain / pat;
# Special-symbol rules:
-_ = *(` / \t);
-__ = *(` / \t / \r / \n / comment);
+_ = 0+(` / \t);
+__ = 0+(` / \t / \r / \n / comment);
$$ = !$.;
$ = !.;
^^ = !<$.;
diff --git a/grammars/builtins.bpeg b/grammars/builtins.bpeg
index b90ac12..6ba31df 100644
--- a/grammars/builtins.bpeg
+++ b/grammars/builtins.bpeg
@@ -1,29 +1,29 @@
# Meta-rules for acting on everything
pattern = !(/); # Not defined by default
replacement = !(/); # Not defined by default
-replace-all = +(...@replacement) ...;
-find-all = +find-next%\n ?{!<\n => "\n"};
+replace-all = 1+(...@replacement) ...;
+find-all = 1+find-next%\n 0-1{!<\n => "\n"};
find-next = matching-line / {..\n =>} find-next;
-only-matches = +{...@pattern=>'@1\n'};
-matching-line = +(..@pattern) ..$;
+only-matches = 1+{...@pattern=>'@1\n'};
+matching-line = 1+(..@pattern) ..$;
# Helper definitions (commonly used)
-indent = \n|+(\t/' ');
+indent = \n|1+(\t/' ');
dedent = $ !(\n|);
-indented-block = |` ..$ *(\n|..$);
+indented-block = |` ..$ 0+(\n|..$);
crlf = \r\n;
cr = \r; r = \r;
-anglebraces = `< *(anglebraces / !`>.) `>;
-brackets = `[ *(brackets / !`].) `];
-braces = `{ *(braces / !`}.) `};
-parens = `( *(parens / !`).) `);
-id = !<(`a-z/`A-Z/`_/`0-9) (`a-z/`A-Z/`_) *(`a-z/`A-Z/`_/`0-9);
-word = !<(`a-z/`A-Z/`_/`0-9) +(`a-z/`A-Z) !>(`0-9/`_);
+anglebraces = `< 0+(anglebraces / !`>.) `>;
+brackets = `[ 0+(brackets / !`].) `];
+braces = `{ 0+(braces / !`}.) `};
+parens = `( 0+(parens / !`).) `);
+id = !<(`a-z/`A-Z/`_/`0-9) (`a-z/`A-Z/`_) 0+(`a-z/`A-Z/`_/`0-9);
+word = !<(`a-z/`A-Z/`_/`0-9) 1+(`a-z/`A-Z) !>(`0-9/`_);
HEX = `0-9/`A-F;
Hex = `0-9/`a-f/`A-F;
hex = `0-9/`a-f;
-number = +`0-9 ?(`. *`0-9) / `. +`0-9;
-int = +`0-9;
+number = 1+`0-9 0-1(`. 0+`0-9) / `. 1+`0-9;
+int = 1+`0-9;
digit = `0-9;
Abc = `a-z/`A-Z;
ABC = `A-Z;
@@ -42,5 +42,5 @@ $$ = !$.;
$ = !.;
^^ = !<$.;
^ = !<.;
-__ = *(` /\t/\n/\r/comment);
-_ = *(` /\t);
+__ = 0+(` /\t/\n/\r/comment);
+_ = 0+(` /\t);
diff --git a/grammars/html.bpeg b/grammars/html.bpeg
index 7af1f63..451e61c 100644
--- a/grammars/html.bpeg
+++ b/grammars/html.bpeg
@@ -1,5 +1,5 @@
# HTML grammar
-HTML = __ ?(doctype __) *html-element%__ __;
+HTML = __ 0-1(doctype __) 0+html-element%__ __;
doctype = "<!DOCTYPE" ..`>;
@@ -9,19 +9,19 @@ html-element = (
/ >(`<("template")) template-element
/ normal-element);
-void-element = `< @[tag](id==match-tag) __attributes__ ?`/ __ `>;
+void-element = `< @[tag](id==match-tag) __attributes__ 0-1`/ __ `>;
template-element = `< @[tag](id==match-tag) __`> __ >match-body @[body]0+(!`<$. / comment / html-element / !("</"tag__`>)$.) ("</"tag__`>);
raw-element = `< @[tag](id==match-tag) __attributes__ `> >match-body @[body].. ("</"tag__`>);
-normal-element = `< @[tag](id==match-tag) __attributes__ `> >match-body @[body]*(!`<$. / comment / html-element / !("</"tag__`>)$.) "</"tag__`>;
+normal-element = `< @[tag](id==match-tag) __attributes__ `> >match-body @[body]0+(!`<$. / comment / html-element / !("</"tag__`>)$.) "</"tag__`>;
comment = "<!--" ..."-->";
-attributes = *(!(attribute==match-attribute))%__ __(attribute==match-attribute)__ *attribute%__;
-attribute = (+id%`:)__`=__ (id / `" ..`" / `' ..`');
-attribute = (+id%`:)__`=__ (id / `" ..`" / `' ..`');
+attributes = 0+(!(attribute==match-attribute))%__ __(attribute==match-attribute)__ 0+attribute%__;
+attribute = (1+id%`:)__`=__ (id / `" ..`" / `' ..`');
+attribute = (1+id%`:)__`=__ (id / `" ..`" / `' ..`');
match-attribute = attribute;
match-tag = id;
match-body = (/);
diff --git a/types.h b/types.h
index b2461a2..3749156 100644
--- a/types.h
+++ b/types.h
@@ -20,7 +20,7 @@ enum VMOpcode {
VM_STRING,
VM_RANGE,
VM_NOT,
- VM_UPTO,
+ VM_UPTO_AND,
VM_REPEAT,
VM_BEFORE,
VM_AFTER,
diff --git a/vm.c b/vm.c
index 77d6d69..a245efb 100644
--- a/vm.c
+++ b/vm.c
@@ -19,7 +19,7 @@ static const char *opcode_names[] = {
[VM_STRING] = "STRING",
[VM_RANGE] = "RANGE",
[VM_NOT] = "NOT",
- [VM_UPTO] = "UPTO",
+ [VM_UPTO_AND] = "UPTO_AND",
[VM_REPEAT] = "REPEAT",
[VM_BEFORE] = "BEFORE",
[VM_AFTER] = "AFTER",
@@ -127,7 +127,7 @@ static match_t *_match(grammar_t *g, const char *str, vm_op_t *op, unsigned int
m->end = str;
return m;
}
- case VM_UPTO: {
+ case VM_UPTO_AND: {
match_t *m = calloc(sizeof(match_t), 1);
m->start = str;
m->op = op;
@@ -136,7 +136,8 @@ static match_t *_match(grammar_t *g, const char *str, vm_op_t *op, unsigned int
prev = str;
match_t *p = _match(g, str, op->args.pat, flags, rec);
if (p) {
- destroy_match(&p);
+ m->child = p;
+ str = p->end;
break;
}
// This isn't in the for() structure because there needs to
@@ -410,8 +411,8 @@ void print_pattern(vm_op_t *op)
fprintf(stderr, ")");
break;
}
- case VM_UPTO: {
- fprintf(stderr, "text up to (");
+ case VM_UPTO_AND: {
+ fprintf(stderr, "text up to and including (");
print_pattern(op->args.pat);
fprintf(stderr, ")");
break;