From 51313c4773f0af62121b2192ff0cca9560b7ab44 Mon Sep 17 00:00:00 2001 From: Bruce Hill Date: Sun, 17 Jan 2021 19:42:11 -0800 Subject: Adding more language grammars --- grammars/README.md | 14 ++++++++++ grammars/bp.bp | 82 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ grammars/bpeg.bp | 77 -------------------------------------------------- grammars/c.bp | 23 +++++++++++++++ grammars/lua.bp | 19 +++++++++++++ grammars/python.bp | 30 ++++++++++++++++++++ 6 files changed, 168 insertions(+), 77 deletions(-) create mode 100644 grammars/README.md create mode 100644 grammars/bp.bp delete mode 100644 grammars/bpeg.bp create mode 100644 grammars/c.bp create mode 100644 grammars/lua.bp create mode 100644 grammars/python.bp (limited to 'grammars') diff --git a/grammars/README.md b/grammars/README.md new file mode 100644 index 0000000..7fa486f --- /dev/null +++ b/grammars/README.md @@ -0,0 +1,14 @@ +# BP Grammars + +The files in this directory are predefined grammars for different languages and +contexts. They are intended to be used for common search patterns, and **not** +intended to be complete PEG definitions of language grammars, other than +[bp.bp](./bp.bp), which is included for stress-testing purposes, as well as a +showcase of some BP features. + +## Adding Grammars + +If you want to add your own grammar, the easiest way to do so is to create a +`.bp` file in `~/.config/bp/`. The syntax for grammar files is fully and +formally defined in [bp.bp](./bp.bp), but in short, it's a list of +whitespace-separated rule definitions of the form `id __ ":" __ pattern`. diff --git a/grammars/bp.bp b/grammars/bp.bp new file mode 100644 index 0000000..7379dfe --- /dev/null +++ b/grammars/bp.bp @@ -0,0 +1,82 @@ +# This is a file defining the BP grammar using BP syntax +# +# This is a complete definition of the grammar of BP grammar files, but it's +# mainly intended to be used as a proof-of-concept, and a stress-test for BP. +# The grammar files provided with BP are not otherwise intended to be full +# language grammars. + +Grammar: __ *(Def [__`;])%__ __ ($$ / @!=(..$$%\n => "Could not parse this code")) +Def: @name=id __ `: __ ( + @definition=extended-pat + / $$ @!=(''=>"No definition for rule") + / @!=(..>(`;/id_`:/$)%\n => "Invalid definition: @0")) + +# This is used for command line arguments: +String-pattern: ..$$ % (\n / Nodent / Escape / `\ pat [`;]) + +pat: simple-pat !(__("!="/"==")) / suffixed-pat +simple-pat: Upto-and / Dot / String / Chars / Nodent / Escape-range + / Escape / Repeat / Optional / No / After / Before / Capture + / Ref / parens + +suffixed-pat: ( + Eq-pat + / Not-eq-pat +) + +Eq-pat: @first=pat__"=="__@second=pat +Not-eq-pat: @first=pat__"!="__@second=pat + +Dot: `. !`. +String: ( + `" @s=*(Escape / !`".) (`" / @!=(''=> "Expected closing quote here")) + / `' @s=*(Escape / !`'.) (`' / @!=(''=> "Expected closing quote here")) + ) +Chars: `` @+(Char-range/Char) % `, +Char-range: @low=. `- (@high=. / @!=(''=> "Expected a second character to form a character range")) +Char: (@s=. / @!=(''=> "Expected a character following the '`'")) +Escape-range: `\ @low=escape-sequence `- @high=escape-sequence +Escape: `\ (@s=escape-sequence + / $ @!=(''=>"Backslashes are used for escape sequences, not splitting lines") + / @!=(. *(Abc/`0-9) => "Invalid escape sequence: '@0'") +) +escape-sequence: ( + `n,t,r,e,b,a,v + / 1-3 `0-7 + / `x 2 `0-9,a-f,A-F + ) +No: `! (__@pat / @!=(''=>"Expected a pattern after the exclamation mark")) +Nodent: `\ `N +Upto-and: ".." [__@first=simple-pat] [__`%__@second=simple-pat] +Repeat: ( + @min=(''=>'0') (`*=>"-") @max=(''=>'∞') + / @min=int __ `- __ @max=int + / @min=(int / ''=>'1') __ (`+=>"-") @max=(''=>'∞') + / @min=@max=int + ) __ @repeat-pat=pat [__`%__@sep=pat] +Optional: `[ __ extended-pat (__`] / @!=(''=> "Expected closing square bracket here")) +After: `< __ pat +Before: `> __ pat +Capture: `@ [__ @capture-name=(id/`!) __ !"=>" `=] __ (@capture=pat / @!=(''=> "Expected pattern to capture")) +Replace: ( + @replace-pat=(Replace / Chain / pat) __ "=>" (__ @replacement=String / @!=(''=> "Expected replacement string")) + ) +Ref: @name=id !(__`:) + +parens: `( __ extended-pat (__ `) / @!=(''=> "Expected closing parenthesis here")) + +Chain: 2+@(pat !(__"=>") / Replace)%__ +Otherwise: 2+@(Replace / Chain / pat)%(__`/__) +extended-pat: Otherwise / Replace / Chain / pat + +# Special-symbol rules: +_: *(` / \t) +__: *(` / \t / \r / \n / comment) +$$: !(./\n) +$: !. +^^: !<(./\n) +^: !<. + +id: "^^" / "^" / "__" / "_" / "$$" / "$" / "|" / `a-z,A-Z *`a-z,A-Z,0-9,- + +comment: `# .. $ diff --git a/grammars/bpeg.bp b/grammars/bpeg.bp deleted file mode 100644 index 0894e76..0000000 --- a/grammars/bpeg.bp +++ /dev/null @@ -1,77 +0,0 @@ -# This is a file defining the BP grammar using BP syntax - -Grammar: __ *(Def [__`;])%__ __ ($$ / @!=(..$$%\n => "Could not parse this code")) -Def: @name=id __ `: __ ( - @definition=extended-pat - / $$ @!=(''=>"No definition for rule") - / @!=(..>(`;/id_`:/$)%\n => "Invalid definition: @0")) - -# This is used for command line arguments: -String-pattern: ..$$ % (\n / Nodent / Escape / `\ pat [`;]) - -pat: simple-pat !(__("!="/"==")) / suffixed-pat -simple-pat: Upto-and / Dot / String / Chars / Nodent / Escape-range - / Escape / Repeat / Optional / No / After / Before / Capture - / Ref / parens - -suffixed-pat: ( - Eq-pat - / Not-eq-pat -) - -Eq-pat: @first=pat__"=="__@second=pat -Not-eq-pat: @first=pat__"!="__@second=pat - -Dot: `. !`. -String: ( - `" @s=*(Escape / !`".) (`" / @!=(''=> "Expected closing quote here")) - / `' @s=*(Escape / !`'.) (`' / @!=(''=> "Expected closing quote here")) - ) -Chars: `` @+(Char-range/Char) % `, -Char-range: @low=. `- (@high=. / @!=(''=> "Expected a second character to form a character range")) -Char: (@s=. / @!=(''=> "Expected a character following the '`'")) -Escape-range: `\ @low=escape-sequence `- @high=escape-sequence -Escape: `\ (@s=escape-sequence - / $ @!=(''=>"Backslashes are used for escape sequences, not splitting lines") - / @!=(. *(Abc/`0-9) => "Invalid escape sequence: '@0'") -) -escape-sequence: ( - `n,t,r,e,b,a,v - / 1-3 `0-7 - / `x 2 `0-9,a-f,A-F - ) -No: `! (__@pat / @!=(''=>"Expected a pattern after the exclamation mark")) -Nodent: `\ `N -Upto-and: ".." [__@first=simple-pat] [__`%__@second=simple-pat] -Repeat: ( - @min=(''=>'0') (`*=>"-") @max=(''=>'∞') - / @min=int __ `- __ @max=int - / @min=(int / ''=>'1') __ (`+=>"-") @max=(''=>'∞') - / @min=@max=int - ) __ @repeat-pat=pat [__`%__@sep=pat] -Optional: `[ __ extended-pat (__`] / @!=(''=> "Expected closing square bracket here")) -After: `< __ pat -Before: `> __ pat -Capture: `@ [__ @capture-name=(id/`!) __ !"=>" `=] __ (@capture=pat / @!=(''=> "Expected pattern to capture")) -Replace: ( - @replace-pat=(Replace / Chain / pat) __ "=>" (__ @replacement=String / @!=(''=> "Expected replacement string")) - ) -Ref: @name=id !(__`:) - -parens: `( __ extended-pat (__ `) / @!=(''=> "Expected closing parenthesis here")) - -Chain: 2+@(pat !(__"=>") / Replace)%__ -Otherwise: 2+@(Replace / Chain / pat)%(__`/__) -extended-pat: Otherwise / Replace / Chain / pat - -# Special-symbol rules: -_: *(` / \t) -__: *(` / \t / \r / \n / comment) -$$: !(./\n) -$: !. -^^: !<(./\n) -^: !<. - -id: "^^" / "^" / "__" / "_" / "$$" / "$" / "|" / `a-z,A-Z *`a-z,A-Z,0-9,- - -comment: `# .. $ diff --git a/grammars/c.bp b/grammars/c.bp new file mode 100644 index 0000000..2b2e1d9 --- /dev/null +++ b/grammars/c.bp @@ -0,0 +1,23 @@ +# Syntax definition for some C-specific patterns +# +# NOTE: this is *NOT* intended to be a complete definition of the language's +# syntax! Providing a full language grammar is overkill, because the intended +# use case is finding/replacing string patterns. This task does not require a +# full parse tree, and having one makes the task considerably more complicated. +# See the accompanying README.md for more info. + +comment: "//" .. $ / "/*" .. "*/" % \n +string: `"..`" % (`\.) +char: `' [`\] . `' +array: `{..`} % (array/comment/\n) +struct: array +keyword: |( + "auto" / "break" / "case" / "char" / "const" / "continue" / "default" / "do" / + "double" / "else" / "enum" / "extern" / "float" / "for" / "goto" / "if" / + "int" / "long" / "register" / "return" / "short" / "signed" / "sizeof" / + "static" / "struct" / "switch" / "typedef" / "union" / "unsigned" / "void" / + "volatile" / "while" +)| +function-def: ^_ 2+(keyword / id / `*) % __ parens (__`; / >(__`{)) +function: function-def __ braces +macro: ^"#define " ..$ *(<`\ \n..$) diff --git a/grammars/lua.bp b/grammars/lua.bp new file mode 100644 index 0000000..7ffdc73 --- /dev/null +++ b/grammars/lua.bp @@ -0,0 +1,19 @@ +# Syntax definitions for some Lua-specific patterns +# +# NOTE: this is *NOT* intended to be a complete definition of the language's +# syntax! Providing a full language grammar is overkill, because the intended +# use case is finding/replacing string patterns. This task does not require a +# full parse tree, and having one makes the task considerably more complicated. +# See the accompanying README.md for more info. + +comment: "--" (`[ @eqs=*`= `[ ..(`]eqs`]) % \n / ..$) +string: `"..`" % (`\.) / `'..`' % (`\.) / `[ @eqs=*`= `[ .. (`]eqs`]) % \n +table: `{..`} % (table/string/comment/\n) +keyword: |( + "and" / "break" / "do" / "else" / "elseif" / "end" / "false" / "for" / + "function" / "goto" / "if" / "in" / "local" / "nil" / "not" / "or" / + "repeat" / "return" / "then" / "true" / "until" / "while" +)| +function-def: |"function"|[_id (*(`.id)[`:id])]_ parens +block: function / |("do"/"then")| .. (|"end"|) % (comment/string/block/\n) +function: function-def .. (|"end"|) % (comment/string/block/\n) diff --git a/grammars/python.bp b/grammars/python.bp new file mode 100644 index 0000000..0683c0a --- /dev/null +++ b/grammars/python.bp @@ -0,0 +1,30 @@ +# Syntax definitions for some Python-specific patterns +# +# NOTE: this is *NOT* intended to be a complete definition of the language's +# syntax! Providing a full language grammar is overkill, because the intended +# use case is finding/replacing string patterns. This task does not require a +# full parse tree, and having one makes the task considerably more complicated. +# See the accompanying README.md for more info. + +comment: `# ..$ +string: quoted-string / `b quoted-string / `f quoted-string +quoted-string: "'''".."'''" % \n / '"""'..'"""' % \n / `"..`" % (`\.) / `'..`' % (`\.) +dict: `{__`} / `{ __ +(expr__`:__expr) % (__`,__) __ `} +set: `{ __ +expr % (__`,__) __ `} +list: `[ __ +expr % (__`,__) __ `] +keyword: |("and" / "as" / "assert" / "break" / "class" / "continue" / "def" / + "del" / "elif" / "else" / "except" / "finally" / "for" / "from" / + "global" / "if" / "import" / "in" / "is" / "lambda" / "None" / "nonlocal" / + "not" / "or" / "pass" / "raise" / "return" / "try" / "while" / + "with" / "yield")| +expr: (function-call / lambda / string / dict / set / list / id / number / `(__expr__`)) *(method/attr/index) +method: `. function-call +attr: `. id +index: `[__expr[__`:__expr [__`:__expr]]__`] +tuple: `(__`) / +(__expr__`,) [__expr] +empty-tuple: `(__`) +class: |"class"|_id`: +(\N ..$) +function: function-def +(\N ..$) +function-def: |"def"|_id parens `: +function-call: !<"def " id parens +lambda: |"lambda"| .. `:__expr -- cgit v1.2.3