aboutsummaryrefslogtreecommitdiff
path: root/grammars
diff options
context:
space:
mode:
authorBruce Hill <bruce@bruce-hill.com>2020-09-16 17:57:56 -0700
committerBruce Hill <bruce@bruce-hill.com>2020-09-16 17:57:56 -0700
commit21807a663d0ab1fc934e1bb3ad485fe1c3e9c821 (patch)
tree618f998a8073b0adce37cb0947718945dedf775d /grammars
parent79efa8bf5efed69fafc558968d51da4dbdd9cfd1 (diff)
Consolidated repetition ops (instead of '+', '*', '?', etc. now it's all
number based: '1+', '0+', '0-1') and reverted to UPTO_AND behavior instead of UPTO
Diffstat (limited to 'grammars')
-rw-r--r--grammars/bpeg.bpeg66
-rw-r--r--grammars/builtins.bpeg32
-rw-r--r--grammars/html.bpeg12
3 files changed, 88 insertions, 22 deletions
diff --git a/grammars/bpeg.bpeg b/grammars/bpeg.bpeg
new file mode 100644
index 0000000..dd3a3b4
--- /dev/null
+++ b/grammars/bpeg.bpeg
@@ -0,0 +1,66 @@
+# This is a file defining the BPEG grammar using BPEG syntax
+
+Grammar = __ 0+Def%(__`;__) 0-1(`;__);
+Def = @[name]Ref __ `= __ @[definition]extended-pat;
+
+# This is used for command line arguments:
+String-pattern = 0+(`\ pat 0-1`; / .);
+
+pat = suffixed-pat / simple-pat;
+simple-pat = Empty / Upto-and / Dot / String / Char-range / Char / Escape-range / Escape / No
+ / Nodent / Repeat / After / Before / Capture / Replace / Ref / parens;
+suffixed-pat = Eq-pat;
+
+Eq-pat = @[first]simple-pat "==" @[second]pat;
+
+Empty = `/ >(__ (`)/`}));
+Dot = `. !`.;
+String = (
+ `" @[s]0+(Escape / !`"$.) `"
+ / `' @[s]0+(Escape / !`'$.) `'
+ );
+Char-range = `` @[low]. `- @[high].;
+Char = `` @[s].;
+Escape-range = `\ @[low]escape-sequence `- @[high]escape-sequence;
+Escape = `\ @[s]escape-sequence;
+escape-sequence = (
+ 1-3 `0-7
+ / `x 2 (`0-9/`a-f/`A-F)
+ /`a/`b/`e/`n/`r/`t/`v / . / \n
+ );
+No = `! _ @pat;
+Nodent = `|;
+Upto-and = 2-3`. 0-1(_@pat);
+Repeat = (
+ @[min]int _ `- _ @[max]int
+ / @[min]int _ `+ @[max](/)
+ / @[min]@[max]int
+ ) _ @[repeat-pat]pat 0-1( __ `% __ @[sep]pat);
+After = `< _ pat;
+Before = `> _ pat;
+Capture = `@ 0-1(_ `[ @[capture-name]Ref `]) _ @[capture]pat;
+Replace = `{ __ (
+ 0-1(@[replace-pat]extended-pat __) "=>" 0-1(__ @[replacement]String)
+ ) __ `};
+Ref = @[name](
+ "^^" / "^" / "__" / "_" / "$$" / "$" /
+ (`a-z/`A-Z) 0+(`a-z/`A-Z/`0-9/`-));
+
+parens = `( __ extended-pat __ `);
+
+Chain = 2+@pat % (__);
+Otherwise = 2+@(Chain/pat) % (__`/__);
+extended-pat = Otherwise / Chain / pat;
+
+# Special-symbol rules:
+_ = 0+(` / \t);
+__ = 0+(` / \t / \r / \n / comment);
+$$ = !$.;
+$ = !.;
+^^ = !<$.;
+^ = !<.;
+
+hash-comment = `# .. $;
+
+# Note: comments are undefined by default in regular BPEG
+comment = hash-comment;
diff --git a/grammars/builtins.bpeg b/grammars/builtins.bpeg
index b90ac12..6ba31df 100644
--- a/grammars/builtins.bpeg
+++ b/grammars/builtins.bpeg
@@ -1,29 +1,29 @@
# Meta-rules for acting on everything
pattern = !(/); # Not defined by default
replacement = !(/); # Not defined by default
-replace-all = +(...@replacement) ...;
-find-all = +find-next%\n ?{!<\n => "\n"};
+replace-all = 1+(...@replacement) ...;
+find-all = 1+find-next%\n 0-1{!<\n => "\n"};
find-next = matching-line / {..\n =>} find-next;
-only-matches = +{...@pattern=>'@1\n'};
-matching-line = +(..@pattern) ..$;
+only-matches = 1+{...@pattern=>'@1\n'};
+matching-line = 1+(..@pattern) ..$;
# Helper definitions (commonly used)
-indent = \n|+(\t/' ');
+indent = \n|1+(\t/' ');
dedent = $ !(\n|);
-indented-block = |` ..$ *(\n|..$);
+indented-block = |` ..$ 0+(\n|..$);
crlf = \r\n;
cr = \r; r = \r;
-anglebraces = `< *(anglebraces / !`>.) `>;
-brackets = `[ *(brackets / !`].) `];
-braces = `{ *(braces / !`}.) `};
-parens = `( *(parens / !`).) `);
-id = !<(`a-z/`A-Z/`_/`0-9) (`a-z/`A-Z/`_) *(`a-z/`A-Z/`_/`0-9);
-word = !<(`a-z/`A-Z/`_/`0-9) +(`a-z/`A-Z) !>(`0-9/`_);
+anglebraces = `< 0+(anglebraces / !`>.) `>;
+brackets = `[ 0+(brackets / !`].) `];
+braces = `{ 0+(braces / !`}.) `};
+parens = `( 0+(parens / !`).) `);
+id = !<(`a-z/`A-Z/`_/`0-9) (`a-z/`A-Z/`_) 0+(`a-z/`A-Z/`_/`0-9);
+word = !<(`a-z/`A-Z/`_/`0-9) 1+(`a-z/`A-Z) !>(`0-9/`_);
HEX = `0-9/`A-F;
Hex = `0-9/`a-f/`A-F;
hex = `0-9/`a-f;
-number = +`0-9 ?(`. *`0-9) / `. +`0-9;
-int = +`0-9;
+number = 1+`0-9 0-1(`. 0+`0-9) / `. 1+`0-9;
+int = 1+`0-9;
digit = `0-9;
Abc = `a-z/`A-Z;
ABC = `A-Z;
@@ -42,5 +42,5 @@ $$ = !$.;
$ = !.;
^^ = !<$.;
^ = !<.;
-__ = *(` /\t/\n/\r/comment);
-_ = *(` /\t);
+__ = 0+(` /\t/\n/\r/comment);
+_ = 0+(` /\t);
diff --git a/grammars/html.bpeg b/grammars/html.bpeg
index 7af1f63..451e61c 100644
--- a/grammars/html.bpeg
+++ b/grammars/html.bpeg
@@ -1,5 +1,5 @@
# HTML grammar
-HTML = __ ?(doctype __) *html-element%__ __;
+HTML = __ 0-1(doctype __) 0+html-element%__ __;
doctype = "<!DOCTYPE" ..`>;
@@ -9,19 +9,19 @@ html-element = (
/ >(`<("template")) template-element
/ normal-element);
-void-element = `< @[tag](id==match-tag) __attributes__ ?`/ __ `>;
+void-element = `< @[tag](id==match-tag) __attributes__ 0-1`/ __ `>;
template-element = `< @[tag](id==match-tag) __`> __ >match-body @[body]0+(!`<$. / comment / html-element / !("</"tag__`>)$.) ("</"tag__`>);
raw-element = `< @[tag](id==match-tag) __attributes__ `> >match-body @[body].. ("</"tag__`>);
-normal-element = `< @[tag](id==match-tag) __attributes__ `> >match-body @[body]*(!`<$. / comment / html-element / !("</"tag__`>)$.) "</"tag__`>;
+normal-element = `< @[tag](id==match-tag) __attributes__ `> >match-body @[body]0+(!`<$. / comment / html-element / !("</"tag__`>)$.) "</"tag__`>;
comment = "<!--" ..."-->";
-attributes = *(!(attribute==match-attribute))%__ __(attribute==match-attribute)__ *attribute%__;
-attribute = (+id%`:)__`=__ (id / `" ..`" / `' ..`');
-attribute = (+id%`:)__`=__ (id / `" ..`" / `' ..`');
+attributes = 0+(!(attribute==match-attribute))%__ __(attribute==match-attribute)__ 0+attribute%__;
+attribute = (1+id%`:)__`=__ (id / `" ..`" / `' ..`');
+attribute = (1+id%`:)__`=__ (id / `" ..`" / `' ..`');
match-attribute = attribute;
match-tag = id;
match-body = (/);