Added pandoc lua filter to make code literals work better with manpages.
This commit is contained in:
parent
315aedc7cb
commit
aa1faea83c
9
.pandoc/bold-code.lua
Normal file
9
.pandoc/bold-code.lua
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
-- Convert code to bold
|
||||||
|
function Code(el)
|
||||||
|
return pandoc.Strong(el.text)
|
||||||
|
end
|
||||||
|
|
||||||
|
-- Convert code blocks to bold and indented
|
||||||
|
function CodeBlock(el)
|
||||||
|
return pandoc.BlockQuote({pandoc.Para(pandoc.Strong(el.text))})
|
||||||
|
end
|
2
Makefile
2
Makefile
@ -21,7 +21,7 @@ $(NAME): $(OBJFILES) bp.c
|
|||||||
$(CC) $(ALL_FLAGS) -o $@ $(OBJFILES) bp.c
|
$(CC) $(ALL_FLAGS) -o $@ $(OBJFILES) bp.c
|
||||||
|
|
||||||
bp.1: bp.1.md
|
bp.1: bp.1.md
|
||||||
pandoc -s $< -t man -o $@
|
pandoc --lua-filter=.pandoc/bold-code.lua -s $< -t man -o $@
|
||||||
|
|
||||||
tags: $(CFILES) bp.c
|
tags: $(CFILES) bp.c
|
||||||
ctags *.c *.h
|
ctags *.c *.h
|
||||||
|
140
bp.1
140
bp.1
@ -7,7 +7,7 @@
|
|||||||
bp - Bruce\[aq]s Parsing Expression Grammar tool
|
bp - Bruce\[aq]s Parsing Expression Grammar tool
|
||||||
.SH SYNOPSIS
|
.SH SYNOPSIS
|
||||||
.PP
|
.PP
|
||||||
\f[B]bp\f[R] [\f[I]options\&...\f[R]] \f[I]pattern\f[R] [[--]
|
\f[B]bp\f[R] [\f[I]options\&...\f[R]] \f[I]pattern\f[R] [[\f[B]--\f[R]]
|
||||||
\f[I]files\&...\f[R]]
|
\f[I]files\&...\f[R]]
|
||||||
.SH DESCRIPTION
|
.SH DESCRIPTION
|
||||||
.PP
|
.PP
|
||||||
@ -60,8 +60,7 @@ instead of treated as literal files.
|
|||||||
\f[B]-c\f[R], \f[B]--context\f[R] \f[I]N\f[R]
|
\f[B]-c\f[R], \f[B]--context\f[R] \f[I]N\f[R]
|
||||||
The number of lines of context to print.
|
The number of lines of context to print.
|
||||||
If \f[I]N\f[R] is 0, print only the exact text of the matches.
|
If \f[I]N\f[R] is 0, print only the exact text of the matches.
|
||||||
If \f[I]N\f[R] is \f[B]\f[CB]\[dq]all\[dq]\f[B]\f[R], print the entire
|
If \f[I]N\f[R] is \f[B]\[lq]all\[rq]\f[R], print the entire file.
|
||||||
file.
|
|
||||||
Otherwise, if \f[I]N\f[R] is a positive integer, print the whole line on
|
Otherwise, if \f[I]N\f[R] is a positive integer, print the whole line on
|
||||||
which matches occur, as well as the \f[I]N-1\f[R] lines before and after
|
which matches occur, as well as the \f[I]N-1\f[R] lines before and after
|
||||||
the match.
|
the match.
|
||||||
@ -97,7 +96,7 @@ with one or two patterns.
|
|||||||
The default mode for bp patterns is \[lq]string pattern mode\[rq].
|
The default mode for bp patterns is \[lq]string pattern mode\[rq].
|
||||||
In string pattern mode, all characters are interpreted literally except
|
In string pattern mode, all characters are interpreted literally except
|
||||||
for the backslash (\f[B]\[rs]\f[R]), which may be followed by a bp
|
for the backslash (\f[B]\[rs]\f[R]), which may be followed by a bp
|
||||||
pattern (see the \f[B]PATTERNS\f[R] section above).
|
pattern (see the \f[B]PATTERNS\f[R] section below).
|
||||||
Optionally, the bp pattern may be terminated by a semicolon
|
Optionally, the bp pattern may be terminated by a semicolon
|
||||||
(\f[B];\f[R]).
|
(\f[B];\f[R]).
|
||||||
.SH PATTERNS
|
.SH PATTERNS
|
||||||
@ -107,9 +106,12 @@ Expression Grammars and regular expression syntax.
|
|||||||
The syntax is designed to map closely to verbal descriptions of the
|
The syntax is designed to map closely to verbal descriptions of the
|
||||||
patterns, and prefix operators are preferred over suffix operators (as
|
patterns, and prefix operators are preferred over suffix operators (as
|
||||||
is common in regex syntax).
|
is common in regex syntax).
|
||||||
.PP
|
Patterns are whitespace-agnostic, so they work the same regardless of
|
||||||
Some patterns additionally have \[lq]multi-line\[rq] variants, which
|
whether whitespace is present or not, except for string literals
|
||||||
means that they include the newline character.
|
(\f[B]\[aq]...\[aq]\f[R] and \f[B]\[dq]...\[dq]\f[R]), character
|
||||||
|
literals (\f[B]\[ga]\f[R]), and escape sequences (\f[B]\[rs]\f[R]).
|
||||||
|
Whitespace between patterns or parts of a pattern should be used for
|
||||||
|
clarity, but it will not affect the meaning of the pattern.
|
||||||
.TP
|
.TP
|
||||||
\f[I]pat1 pat2\f[R]
|
\f[I]pat1 pat2\f[R]
|
||||||
A sequence: \f[I]pat1\f[R] followed by \f[I]pat2\f[R]
|
A sequence: \f[I]pat1\f[R] followed by \f[I]pat2\f[R]
|
||||||
@ -155,8 +157,8 @@ either end.
|
|||||||
Escape sequences are not allowed.
|
Escape sequences are not allowed.
|
||||||
.TP
|
.TP
|
||||||
\f[B]\[ga]\f[R]\f[I]c\f[R]
|
\f[B]\[ga]\f[R]\f[I]c\f[R]
|
||||||
The literal character \f[I]c\f[R] (e.g.\ **\[ga]\[at]** matches the
|
The literal character \f[I]c\f[R] (e.g.\ \f[B]\[ga]\[at]\f[R] matches
|
||||||
\[lq]\[at]\[rq] character)
|
the \[lq]\[at]\[rq] character)
|
||||||
.TP
|
.TP
|
||||||
\f[B]\[ga]\f[R]\f[I]c1\f[R]\f[B],\f[R]\f[I]c2\f[R]
|
\f[B]\[ga]\f[R]\f[I]c1\f[R]\f[B],\f[R]\f[I]c2\f[R]
|
||||||
The literal character \f[I]c1\f[R] or \f[I]c2\f[R]
|
The literal character \f[I]c1\f[R] or \f[I]c2\f[R]
|
||||||
@ -188,40 +190,43 @@ Not \f[I]pat\f[R]
|
|||||||
Maybe \f[I]pat\f[R]
|
Maybe \f[I]pat\f[R]
|
||||||
.TP
|
.TP
|
||||||
\f[I]N\f[R] \f[I]pat\f[R]
|
\f[I]N\f[R] \f[I]pat\f[R]
|
||||||
Exactly \f[I]N\f[R] repetitions of \f[I]pat\f[R] (e.g.\ \f[B]5
|
Exactly \f[I]N\f[R] repetitions of \f[I]pat\f[R]
|
||||||
\[ga]x\f[R] matches \f[B]\[lq]xxxxx\[rq]\f[R])
|
(e.g.\ \f[B]5 \[dq]x\[dq]\f[R] matches \f[B]\[lq]xxxxx\[rq]\f[R])
|
||||||
.TP
|
.TP
|
||||||
\f[I]N\f[R] \f[B]-\f[R] \f[I]M\f[R] \f[I]pat\f[R]
|
\f[I]N\f[R] \f[B]-\f[R] \f[I]M\f[R] \f[I]pat\f[R]
|
||||||
Between \f[I]N\f[R] and \f[I]M\f[R] repetitions of \f[I]pat\f[R]
|
Between \f[I]N\f[R] and \f[I]M\f[R] repetitions of \f[I]pat\f[R]
|
||||||
(e.g.\ \f[B]2-3 \[ga]x\f[R] matches \f[B]\[lq]xx\[rq]\f[R] or
|
(e.g.\ \f[B]2-3 \[dq]x\[dq]\f[R] matches \f[B]\[lq]xx\[rq]\f[R] or
|
||||||
\f[B]\[lq]xxx\[rq]\f[R])
|
\f[B]\[lq]xxx\[rq]\f[R])
|
||||||
.TP
|
.TP
|
||||||
\f[I]N\f[R]\f[B]+\f[R] \f[I]pat\f[R]
|
\f[I]N\f[R]\f[B]+\f[R] \f[I]pat\f[R]
|
||||||
At least \f[I]N\f[R] or more repetitions of \f[I]pat\f[R] (e.g.\ \f[B]2+
|
At least \f[I]N\f[R] or more repetitions of \f[I]pat\f[R]
|
||||||
\[ga]x\f[R] matches \f[B]\[lq]xx\[rq]\f[R], \f[B]\[lq]xxx\[rq]\f[R],
|
(e.g.\ \f[B]2+ \[dq]x\[dq]\f[R] matches \f[B]\[lq]xx\[rq]\f[R],
|
||||||
\f[B]\[lq]xxxx\[rq]\f[R], etc.)
|
\f[B]\[lq]xxx\[rq]\f[R], \f[B]\[lq]xxxx\[rq]\f[R], etc.)
|
||||||
.TP
|
.TP
|
||||||
\f[B]*\f[R] \f[I]pat\f[R]
|
\f[B]*\f[R] \f[I]pat\f[R]
|
||||||
Some \f[I]pat\f[R]s (zero or more, e.g.\ \f[B]* \[ga]x\f[R] matches
|
Some \f[I]pat\f[R]s (zero or more, e.g.\ \f[B]* \[dq]x\[dq]\f[R] matches
|
||||||
\f[B]\[dq]\[lq]\f[R], \f[B]\[rq]x\[lq]\f[R], \f[B]\[rq]xx\[dq]\f[R],
|
\f[B]\[dq]\[lq]\f[R], \f[B]\[rq]x\[lq]\f[R], \f[B]\[rq]xx\[dq]\f[R],
|
||||||
etc.)
|
etc.)
|
||||||
.TP
|
.TP
|
||||||
\f[B]+\f[R] \f[I]pat\f[R]
|
\f[B]+\f[R] \f[I]pat\f[R]
|
||||||
At least one \f[I]pat\f[R]s (e.g.\ \f[B]+ \[ga]x\f[R] matches
|
At least one \f[I]pat\f[R]s (e.g.\ \f[B]+ \[dq]x\[dq]\f[R] matches
|
||||||
\f[B]\[lq]x\[rq]\f[R], \f[B]\[lq]xx\[rq]\f[R], \f[B]\[lq]xxx\[rq]\f[R],
|
\f[B]\[lq]x\[rq]\f[R], \f[B]\[lq]xx\[rq]\f[R], \f[B]\[lq]xxx\[rq]\f[R],
|
||||||
etc.)
|
etc.)
|
||||||
.TP
|
.TP
|
||||||
\f[I]repeating-pat\f[R] \f[B]%\f[R] \f[I]sep\f[R]
|
\f[I]repeating-pat\f[R] \f[B]%\f[R] \f[I]sep\f[R]
|
||||||
\f[I]repeating-pat\f[R] separated by \f[I]sep\f[R] (e.g.\ \f[B]*word %
|
\f[I]repeating-pat\f[R] (see the examples above) separated by
|
||||||
\[ga],\f[R] matches zero or more comma-separated words)
|
\f[I]sep\f[R] (e.g.\ \f[B]*word % \[dq],\[dq]\f[R] matches zero or more
|
||||||
|
comma-separated words)
|
||||||
.TP
|
.TP
|
||||||
\f[B]..\f[R] \f[I]pat\f[R]
|
\f[B]..\f[R] \f[I]pat\f[R]
|
||||||
Any text (except newlines) up to and including \f[I]pat\f[R]
|
Any text (except newlines) up to and including \f[I]pat\f[R]
|
||||||
.TP
|
.TP
|
||||||
\f[B].. %\f[R] \f[I]skip\f[R] \f[I]pat\f[R]
|
\f[B].. %\f[R] \f[I]skip\f[R] \f[I]pat\f[R]
|
||||||
Any text (except newlines) up to and including \f[I]pat\f[R], skipping
|
Any text (except newlines) up to and including \f[I]pat\f[R], skipping
|
||||||
over instances of \f[I]skip\f[R] (e.g.\ \f[B]\[ga]\[dq]..\[ga]\[dq] %
|
over instances of \f[I]skip\f[R]
|
||||||
(\[ga]\[rs].)\f[R])
|
(e.g.\ \f[B]\[aq]\[dq]\[aq] ..%(\[aq]\[rs]\[aq] .) \[aq]\[dq]\[aq]\f[R]
|
||||||
|
opening quote, up to closing quote, skipping over backslash followed by
|
||||||
|
a single character)
|
||||||
.TP
|
.TP
|
||||||
\f[B]<\f[R] \f[I]pat\f[R]
|
\f[B]<\f[R] \f[I]pat\f[R]
|
||||||
Matches at the current position if \f[I]pat\f[R] matches immediately
|
Matches at the current position if \f[I]pat\f[R] matches immediately
|
||||||
@ -234,13 +239,13 @@ match up to maximum number of characters \f[I]pat\f[R] can match (or the
|
|||||||
length of the current line upto the current position, whichever is
|
length of the current line upto the current position, whichever is
|
||||||
smaller).
|
smaller).
|
||||||
\f[B]Note:\f[R] For fixed-length lookbehinds, this is quite efficient
|
\f[B]Note:\f[R] For fixed-length lookbehinds, this is quite efficient
|
||||||
(e.g.\ \f[B]<(100\[ga]x)\f[R]), however this could cause performance
|
(e.g.\ \f[B]<(100 \[dq]x\[dq])\f[R]), however this could cause
|
||||||
problems with variable-length lookbehinds (e.g.\ \f[B]<(\[ga]x
|
performance problems with variable-length lookbehinds
|
||||||
0-100\[ga]y)\f[R]).
|
(e.g.\ \f[B]<(\[dq]x\[dq] 0-100\[dq]y\[dq])\f[R]).
|
||||||
Also, it is not advised to use \f[B]\[ha]\f[R], \f[B]\[ha]\[ha]\f[R],
|
Also, it is worth noting that \f[B]\[ha]\f[R], \f[B]\[ha]\[ha]\f[R],
|
||||||
\f[B]\[u2005]*\[u2005]*,\f[BI]o\f[B]\f[BI]r\f[B]\[u2005]*\[u2005]*$\f[R]
|
\f[B]$\f[R], and \f[B]$$\f[R] all match against the edges of the slice,
|
||||||
inside a lookbehind, as they will match against the edges of the
|
which may give false positives if you were expecting them to match only
|
||||||
lookbehind slice.
|
against the edges file or line.
|
||||||
.TP
|
.TP
|
||||||
\f[B]>\f[R] \f[I]pat\f[R]
|
\f[B]>\f[R] \f[I]pat\f[R]
|
||||||
Matches \f[I]pat\f[R], but does not consume any input (lookahead).
|
Matches \f[I]pat\f[R], but does not consume any input (lookahead).
|
||||||
@ -258,19 +263,24 @@ See the \f[B]GRAMMAR FILES\f[R] section for more info.
|
|||||||
.TP
|
.TP
|
||||||
\f[B]\[at]\f[R] \f[I]name\f[R] \f[B]=\f[R] \f[I]pat\f[R]
|
\f[B]\[at]\f[R] \f[I]name\f[R] \f[B]=\f[R] \f[I]pat\f[R]
|
||||||
Let \f[I]name\f[R] equal \f[I]pat\f[R] (named capture).
|
Let \f[I]name\f[R] equal \f[I]pat\f[R] (named capture).
|
||||||
Named captures can be used as backreferences like so: \f[B]\[at]foo=word
|
Named captures can be used as backreferences like so:
|
||||||
\[ga]( foo \[ga])\f[R] (matches \f[B]\[lq]asdf(asdf)\[rq]\f[R] or
|
\f[B]\[at]foo=word \[ga]( foo \[ga])\f[R] (matches
|
||||||
\f[B]\[lq]baz(baz)\[rq]\f[R], but not \f[B]\[lq]foo(baz)\[rq]\f[R])
|
\f[B]\[lq]asdf(asdf)\[rq]\f[R] or \f[B]\[lq]baz(baz)\[rq]\f[R], but not
|
||||||
|
\f[B]\[lq]foo(baz)\[rq]\f[R])
|
||||||
.TP
|
.TP
|
||||||
\f[I]pat\f[R] \f[B]=> \[aq]\f[R]\f[I]replacement\f[R]\f[B]\[aq]\f[R]
|
\f[I]pat\f[R] \f[B]=>\f[R] \f[B]\[dq]\f[R]\f[I]replacement\f[R]\f[B]\[dq]\f[R]
|
||||||
Replace \f[I]pat\f[R] with \f[I]replacement\f[R].
|
Replace \f[I]pat\f[R] with \f[I]replacement\f[R].
|
||||||
Note: \f[I]replacement\f[R] should be a string, and it may contain
|
Note: \f[I]replacement\f[R] should be a string (single or double
|
||||||
|
quoted), and it may contain escape sequences (e.g.\ \f[B]\[rs]n\f[R]) or
|
||||||
references to captured values: \f[B]\[at]0\f[R] (the whole of
|
references to captured values: \f[B]\[at]0\f[R] (the whole of
|
||||||
\f[I]pat\f[R]), \f[B]\[at]1\f[R] (the first capture in \f[I]pat\f[R]),
|
\f[I]pat\f[R]), \f[B]\[at]1\f[R] (the first capture in \f[I]pat\f[R]),
|
||||||
\f[B]\[at]\f[R]\f[I]foo\f[R] (the capture named \f[I]foo\f[R] in
|
\f[B]\[at]\f[R]\f[I]foo\f[R] (the capture named \f[I]foo\f[R] in
|
||||||
\f[I]pat\f[R]), etc.
|
\f[I]pat\f[R]), etc.
|
||||||
For example, \f[B]\[at]word _ \[at]rest=(*word % _) => \[dq]\[at]rest
|
For example,
|
||||||
\[at]1\[dq]\f[R]
|
\f[B]\[at]word _ \[at]rest=(*word % _) => \[dq]\[at]rest:\[rs]n\[rs]t\[at]1\[dq]\f[R]
|
||||||
|
matches a word followed by whitespace, followed by a series of words and
|
||||||
|
replaces it with the series of words, a colon, a newline, a tab, and
|
||||||
|
then the first word.
|
||||||
.TP
|
.TP
|
||||||
\f[I]pat1\f[R] \f[B]\[ti]\f[R] \f[I]pat2\f[R]
|
\f[I]pat1\f[R] \f[B]\[ti]\f[R] \f[I]pat2\f[R]
|
||||||
Matches when \f[I]pat1\f[R] matches and \f[I]pat2\f[R] can be found
|
Matches when \f[I]pat1\f[R] matches and \f[I]pat2\f[R] can be found
|
||||||
@ -305,47 +315,51 @@ The \f[B]builtins\f[R] grammar file is loaded by default, and it defines
|
|||||||
a few useful general-purpose patterns.
|
a few useful general-purpose patterns.
|
||||||
For example, it defines the \f[B]parens\f[R] rule, which matches pairs
|
For example, it defines the \f[B]parens\f[R] rule, which matches pairs
|
||||||
of matching parentheses, accounting for nested inner parentheses:
|
of matching parentheses, accounting for nested inner parentheses:
|
||||||
.IP
|
.RS
|
||||||
.nf
|
.PP
|
||||||
\f[C]
|
\f[B]bp -p \[aq]\[dq]my_func\[dq] parens\[aq]\f[R]
|
||||||
bp -p \[aq]\[dq]my_func\[dq] parens\[aq]
|
.RE
|
||||||
\f[R]
|
|
||||||
.fi
|
|
||||||
.PP
|
.PP
|
||||||
\f[B]bp\f[R] also comes with a few grammar files for common programming
|
\f[B]bp\f[R] also comes with a few grammar files for common programming
|
||||||
languages, which may be loaded on demand.
|
languages, which may be loaded on demand.
|
||||||
These grammar files are not comprehensive syntax definitions, but only
|
These grammar files are not comprehensive syntax definitions, but only
|
||||||
some common patterns.
|
some common patterns.
|
||||||
For example, the c++ grammar file contains definitions for
|
For example, the c++ grammar file contains definitions for
|
||||||
\f[B]//\f[R]-style line comments as well as \f[B]/*\&...*/\f[R]-style
|
\f[B]//\f[R]-style line comments as well as \f[B]/*...*/\f[R]-style
|
||||||
block comments.
|
block comments.
|
||||||
Thus, you can find all comments with the word \[lq]TODO\[rq] with the
|
Thus, you can find all comments with the word \[lq]TODO\[rq] with the
|
||||||
following command:
|
following command:
|
||||||
.IP
|
.RS
|
||||||
.nf
|
.PP
|
||||||
\f[C]
|
\f[B]bp -g c++ -p \[aq]comment \[ti] {TODO}\[aq] *.cpp\f[R]
|
||||||
bp -g c++ -p \[aq]comment\[ti]{TODO}\[aq] *.cpp
|
.RE
|
||||||
\f[R]
|
|
||||||
.fi
|
|
||||||
.SH EXAMPLES
|
.SH EXAMPLES
|
||||||
.TP
|
.PP
|
||||||
|
Find files containing the string \[lq]foo\[rq] (a string pattern):
|
||||||
|
.RS
|
||||||
|
.PP
|
||||||
\f[B]ls | bp foo\f[R]
|
\f[B]ls | bp foo\f[R]
|
||||||
Find files containing the string \[dq]foo\[dq] (a string pattern)
|
.RE
|
||||||
.TP
|
.PP
|
||||||
|
Find files ending with \[lq].c\[rq] and print the name with the
|
||||||
|
\[lq].c\[rq] replaced with \[lq].h\[rq]:
|
||||||
|
.RS
|
||||||
|
.PP
|
||||||
\f[B]ls | bp \[aq].c\[rs]$\[aq] -r \[aq].h\[aq]\f[R]
|
\f[B]ls | bp \[aq].c\[rs]$\[aq] -r \[aq].h\[aq]\f[R]
|
||||||
Find files ending with \[dq].c\[dq] and replace the extension with
|
.RE
|
||||||
\[dq].h\[dq]
|
.PP
|
||||||
.TP
|
Find the word \[lq]foobar\[rq], followed by a pair of matching
|
||||||
|
parentheses in the file \f[I]my_file.py\f[R]:
|
||||||
|
.RS
|
||||||
|
.PP
|
||||||
\f[B]bp -p \[aq]{foobar} parens\[aq] my_file.py\f[R]
|
\f[B]bp -p \[aq]{foobar} parens\[aq] my_file.py\f[R]
|
||||||
Find the word \f[B]\[dq]foobar\[dq]\f[R], followed by a pair of matching
|
.RE
|
||||||
parentheses in the file \f[I]my_file.py\f[R]
|
.PP
|
||||||
.TP
|
|
||||||
\f[B]bp -g html -p \[aq]element \[ti] (\[ha]\[ha]\[dq]<a \[dq])\[aq] foo.html\f[R]
|
|
||||||
Using the \f[I]html\f[R] grammar, find all \f[I]element\f[R]s matching
|
Using the \f[I]html\f[R] grammar, find all \f[I]element\f[R]s matching
|
||||||
the tag \f[I]a\f[R] in the file \f[I]foo.html\f[R]
|
the tag \f[I]a\f[R] in the file \f[I]foo.html\f[R]:
|
||||||
.TP
|
.RS
|
||||||
\f[B]bp -g python -p \[aq]comment\[ti]{TODO}\[aq] *.py\f[R]
|
.PP
|
||||||
Find all comments with the word \f[B]\[lq]TODO\[rq]\f[R] in local python
|
\f[B]bp -g html -p \[aq]element \[ti] (\[ha]\[ha]\[dq]<a \[dq])\[aq] foo.html\f[R]
|
||||||
files.
|
.RE
|
||||||
.SH AUTHORS
|
.SH AUTHORS
|
||||||
Bruce Hill (\f[I]bruce\[at]bruce-hill.com\f[R]).
|
Bruce Hill (\f[I]bruce\[at]bruce-hill.com\f[R]).
|
||||||
|
251
bp.1.md
251
bp.1.md
@ -8,71 +8,68 @@ bp - Bruce\'s Parsing Expression Grammar tool
|
|||||||
|
|
||||||
# SYNOPSIS
|
# SYNOPSIS
|
||||||
|
|
||||||
**bp**
|
`bp` \[*options...*\] *pattern* \[\[`--`\] *files...*\]
|
||||||
\[*options...*\]
|
|
||||||
*pattern*
|
|
||||||
\[\[\--\] *files...*\]
|
|
||||||
|
|
||||||
# DESCRIPTION
|
# DESCRIPTION
|
||||||
|
|
||||||
**bp** is a tool that matches parsing expression grammars using a custom
|
`bp` is a tool that matches parsing expression grammars using a custom
|
||||||
syntax.
|
syntax.
|
||||||
|
|
||||||
# OPTIONS
|
# OPTIONS
|
||||||
|
|
||||||
**-v**, **\--verbose**
|
`-v`, `--verbose`
|
||||||
: Print debugging information.
|
: Print debugging information.
|
||||||
|
|
||||||
**-e**, **\--explain**
|
`-e`, `--explain`
|
||||||
: Print a visual explanation of the matches.
|
: Print a visual explanation of the matches.
|
||||||
|
|
||||||
**-j**, **\--json**
|
`-j`, `--json`
|
||||||
: Print a JSON list of the matches. (Pairs with **\--verbose** for more detail)
|
: Print a JSON list of the matches. (Pairs with `--verbose` for more detail)
|
||||||
|
|
||||||
**-l**, **\--list-files**
|
`-l`, `--list-files`
|
||||||
: Print only the names of files containing matches instead of the matches
|
: Print only the names of files containing matches instead of the matches
|
||||||
themselves.
|
themselves.
|
||||||
|
|
||||||
**-i**, **\--ignore-case**
|
`-i`, `--ignore-case`
|
||||||
: Perform pattern matching case-insensitively.
|
: Perform pattern matching case-insensitively.
|
||||||
|
|
||||||
**-I**, **\--inplace**
|
`-I`, `--inplace`
|
||||||
: Perform filtering or replacement in-place (i.e. overwrite files with new
|
: Perform filtering or replacement in-place (i.e. overwrite files with new
|
||||||
content).
|
content).
|
||||||
|
|
||||||
**-C**, **\--confirm**
|
`-C`, `--confirm`
|
||||||
: During in-place modification of a file, confirm before each modification.
|
: During in-place modification of a file, confirm before each modification.
|
||||||
|
|
||||||
**-r**, **\--replace** *replacement*
|
`-r`, `--replace` *replacement*
|
||||||
: Replace all occurrences of the main pattern with the given string.
|
: Replace all occurrences of the main pattern with the given string.
|
||||||
|
|
||||||
**-s**, **\--skip** *pattern*
|
`-s`, `--skip` *pattern*
|
||||||
: While looking for matches, skip over *pattern* occurrences. This can be
|
: While looking for matches, skip over *pattern* occurrences. This can be
|
||||||
useful for behavior like **bp -s string** (avoiding matches inside string
|
useful for behavior like `bp -s string` (avoiding matches inside string
|
||||||
literals).
|
literals).
|
||||||
|
|
||||||
**-g**, **\--grammar** *grammar-file*
|
`-g`, `--grammar` *grammar-file*
|
||||||
: Load the grammar from the given file. See the **GRAMMAR FILES** section
|
: Load the grammar from the given file. See the `GRAMMAR FILES` section
|
||||||
for more info.
|
for more info.
|
||||||
|
|
||||||
**-G**, **\--git**
|
`-G`, `--git`
|
||||||
: Use **git** to get a list of files. Remaining file arguments (if any) are
|
: Use `git` to get a list of files. Remaining file arguments (if any) are
|
||||||
passed to **git \--ls-files** instead of treated as literal files.
|
passed to `git --ls-files` instead of treated as literal files.
|
||||||
|
|
||||||
**-c**, **\--context** *N*
|
`-c`, `--context` *N*
|
||||||
: The number of lines of context to print. If *N* is 0, print only the
|
: The number of lines of context to print. If *N* is 0, print only the
|
||||||
exact text of the matches. If *N* is **`"all"`**, print the entire file.
|
exact text of the matches. If *N* is **"all"**, print the entire file.
|
||||||
Otherwise, if *N* is a positive integer, print the whole line on which
|
Otherwise, if *N* is a positive integer, print the whole line on which
|
||||||
matches occur, as well as the *N-1* lines before and after the match. The
|
matches occur, as well as the *N-1* lines before and after the match. The
|
||||||
default value for this argument is **1** (print whole lines where matches
|
default value for this argument is **1** (print whole lines where matches
|
||||||
occur).
|
occur).
|
||||||
|
|
||||||
**-f**, **\--format** *auto*\|*fancy*\|*plain*
|
`-f`, `--format` *auto*\|*fancy*\|*plain*
|
||||||
: Set the output format. *fancy* includes colors and line numbers, *plain*
|
: Set the output format. *fancy* includes colors and line numbers, *plain*
|
||||||
includes neither, and *auto* (the default) uses *fancy* formatting only when
|
includes neither, and *auto* (the default) uses *fancy* formatting only when
|
||||||
the output is a TTY.
|
the output is a TTY.
|
||||||
|
|
||||||
**\--help**
|
`--help`
|
||||||
: Print the usage and exit.
|
: Print the usage and exit.
|
||||||
|
|
||||||
*pattern*
|
*pattern*
|
||||||
@ -81,7 +78,7 @@ pattern (see the **STRING PATTERNS** section below).
|
|||||||
|
|
||||||
*files...*
|
*files...*
|
||||||
: The input files to search. If no input files are provided and data was piped
|
: The input files to search. If no input files are provided and data was piped
|
||||||
in, that data will be used instead. If neither are provided, **bp** will search
|
in, that data will be used instead. If neither are provided, `bp` will search
|
||||||
through all files in the current directory and its subdirectories
|
through all files in the current directory and its subdirectories
|
||||||
(recursively).
|
(recursively).
|
||||||
|
|
||||||
@ -90,118 +87,119 @@ through all files in the current directory and its subdirectories
|
|||||||
|
|
||||||
One of the most common use cases for pattern matching tools is matching plain,
|
One of the most common use cases for pattern matching tools is matching plain,
|
||||||
literal strings, or strings that are primarily plain strings, with one or two
|
literal strings, or strings that are primarily plain strings, with one or two
|
||||||
patterns. **bp** is designed around this fact. The default mode for bp patterns
|
patterns. `bp` is designed around this fact. The default mode for bp patterns
|
||||||
is "string pattern mode". In string pattern mode, all characters are
|
is "string pattern mode". In string pattern mode, all characters are
|
||||||
interpreted literally except for the backslash (**\\**), which may be followed
|
interpreted literally except for the backslash (`\`), which may be followed by
|
||||||
by a bp pattern (see the **PATTERNS** section above). Optionally, the bp
|
a bp pattern (see the **PATTERNS** section below). Optionally, the bp pattern
|
||||||
pattern may be terminated by a semicolon (**;**).
|
may be terminated by a semicolon (`;`).
|
||||||
|
|
||||||
|
|
||||||
# PATTERNS
|
# PATTERNS
|
||||||
|
|
||||||
**bp** patterns are based off of a combination of Parsing Expression Grammars
|
`bp` patterns are based off of a combination of Parsing Expression Grammars and
|
||||||
and regular expression syntax. The syntax is designed to map closely to verbal
|
regular expression syntax. The syntax is designed to map closely to verbal
|
||||||
descriptions of the patterns, and prefix operators are preferred over suffix
|
descriptions of the patterns, and prefix operators are preferred over suffix
|
||||||
operators (as is common in regex syntax).
|
operators (as is common in regex syntax). Patterns are whitespace-agnostic, so
|
||||||
|
they work the same regardless of whether whitespace is present or not, except
|
||||||
Some patterns additionally have "multi-line" variants, which means that they
|
for string literals (`'...'` and `"..."`), character literals (`` ` ``), and
|
||||||
include the newline character.
|
escape sequences (`\`). Whitespace between patterns or parts of a pattern
|
||||||
|
should be used for clarity, but it will not affect the meaning of the pattern.
|
||||||
|
|
||||||
*pat1 pat2*
|
*pat1 pat2*
|
||||||
: A sequence: *pat1* followed by *pat2*
|
: A sequence: *pat1* followed by *pat2*
|
||||||
|
|
||||||
*pat1* **/** *pat2*
|
*pat1* `/` *pat2*
|
||||||
: A choice: *pat1*, or if it doesn\'t match, then *pat2*
|
: A choice: *pat1*, or if it doesn\'t match, then *pat2*
|
||||||
|
|
||||||
**.**
|
`.`
|
||||||
: Any character (excluding newline)
|
: Any character (excluding newline)
|
||||||
|
|
||||||
**\^**
|
`^`
|
||||||
: Start of a line
|
: Start of a line
|
||||||
|
|
||||||
**\^\^**
|
`^^`
|
||||||
: Start of the text
|
: Start of the text
|
||||||
|
|
||||||
**\$**
|
`$`
|
||||||
: End of a line (does not include newline character)
|
: End of a line (does not include newline character)
|
||||||
|
|
||||||
**\$\$**
|
`$$`
|
||||||
: End of the text
|
: End of the text
|
||||||
|
|
||||||
**\_**
|
`_`
|
||||||
: Zero or more whitespace characters, including spaces and tabs, but not
|
: Zero or more whitespace characters, including spaces and tabs, but not
|
||||||
newlines.
|
newlines.
|
||||||
|
|
||||||
**\_\_**
|
`__`
|
||||||
: Zero or more whitespace characters, including spaces, tabs, newlines, and
|
: Zero or more whitespace characters, including spaces, tabs, newlines, and
|
||||||
comments. Comments are undefined by default, but may be defined by a separate
|
comments. Comments are undefined by default, but may be defined by a separate
|
||||||
grammar file. See the **GRAMMAR FILES** section for more info.
|
grammar file. See the **GRAMMAR FILES** section for more info.
|
||||||
|
|
||||||
**\"foo\"**, **\'foo\'**
|
`"foo"`, `'foo'`
|
||||||
: The literal string **"foo"**. Single and double quotes are treated the same.
|
: The literal string **"foo"**. Single and double quotes are treated the same.
|
||||||
Escape sequences are not allowed.
|
Escape sequences are not allowed.
|
||||||
|
|
||||||
**{foo}**
|
`{foo}`
|
||||||
: The literal string **"foo"** with word boundaries on either end. Escape
|
: The literal string **"foo"** with word boundaries on either end. Escape
|
||||||
sequences are not allowed.
|
sequences are not allowed.
|
||||||
|
|
||||||
**\`***c*
|
`` ` ``*c*
|
||||||
: The literal character *c* (e.g. **\`@** matches the "@" character)
|
: The literal character *c* (e.g. `` `@ `` matches the "@" character)
|
||||||
|
|
||||||
**\`***c1***,***c2*
|
`` ` ``*c1*`,`*c2*
|
||||||
: The literal character *c1* or *c2* (e.g. **\`a,e,i,o,u**)
|
: The literal character *c1* or *c2* (e.g. `` `a,e,i,o,u ``)
|
||||||
|
|
||||||
**\`***c1***-***c2*
|
`` ` ``*c1*`-`*c2*
|
||||||
: The character range *c1* to *c2* (e.g. **\`a-z**). Multiple ranges
|
: The character range *c1* to *c2* (e.g. `` `a-z ``). Multiple ranges
|
||||||
can be combined with a comma (e.g. **\`a-z,A-Z**).
|
can be combined with a comma (e.g. `` `a-z,A-Z ``).
|
||||||
|
|
||||||
**\\***esc*
|
`\`*esc*
|
||||||
: An escape sequence (e.g. **\\n**, **\\x1F**, **\\033**, etc.)
|
: An escape sequence (e.g. `\n`, `\x1F`, `\033`, etc.)
|
||||||
|
|
||||||
**\\***esc1***-***esc2*
|
`\`*esc1*`-`*esc2*
|
||||||
: An escape sequence range from *esc1* to *esc2* (e.g. **\\x00-x1F**)
|
: An escape sequence range from *esc1* to *esc2* (e.g. `\x00-x1F`)
|
||||||
|
|
||||||
**\\N**
|
`\N`
|
||||||
: A special case escape that matches a "nodent": one or more newlines followed
|
: A special case escape that matches a "nodent": one or more newlines followed
|
||||||
by the same indentation that occurs on the current line.
|
by the same indentation that occurs on the current line.
|
||||||
|
|
||||||
**!** *pat*
|
`!` *pat*
|
||||||
: Not *pat*
|
: Not *pat*
|
||||||
|
|
||||||
**\[** *pat* **\]**
|
`[` *pat* `]`
|
||||||
: Maybe *pat*
|
: Maybe *pat*
|
||||||
|
|
||||||
*N* *pat*
|
*N* *pat*
|
||||||
: Exactly *N* repetitions of *pat* (e.g. **5 \`x** matches **"xxxxx"**)
|
: Exactly *N* repetitions of *pat* (e.g. `5 "x"` matches **"xxxxx"**)
|
||||||
|
|
||||||
*N* **-** *M* *pat*
|
*N* `-` *M* *pat*
|
||||||
: Between *N* and *M* repetitions of *pat* (e.g. **2-3 \`x**
|
: Between *N* and *M* repetitions of *pat* (e.g. `2-3 "x"` matches **"xx"** or
|
||||||
matches **"xx"** or **"xxx"**)
|
**"xxx"**)
|
||||||
|
|
||||||
*N***+** *pat*
|
*N*`+` *pat*
|
||||||
: At least *N* or more repetitions of *pat* (e.g. **2+ \`x** matches
|
: At least *N* or more repetitions of *pat* (e.g. `2+ "x"` matches
|
||||||
**"xx"**, **"xxx"**, **"xxxx"**, etc.)
|
**"xx"**, **"xxx"**, **"xxxx"**, etc.)
|
||||||
|
|
||||||
**\*** *pat*
|
`*` *pat*
|
||||||
: Some *pat*s (zero or more, e.g. **\* \`x** matches **""**, **"x"**,
|
: Some *pat*s (zero or more, e.g. `* "x"` matches **""**, **"x"**, **"xx"**,
|
||||||
**"xx"**, etc.)
|
etc.)
|
||||||
|
|
||||||
**+** *pat*
|
`+` *pat*
|
||||||
: At least one *pat*s (e.g. **\+ \`x** matches **"x"**, **"xx"**,
|
: At least one *pat*s (e.g. `+ "x"` matches **"x"**, **"xx"**, **"xxx"**, etc.)
|
||||||
**"xxx"**, etc.)
|
|
||||||
|
|
||||||
*repeating-pat* **%** *sep*
|
*repeating-pat* `%` *sep*
|
||||||
: *repeating-pat* separated by *sep* (e.g. **\*word % \`,** matches
|
: *repeating-pat* (see the examples above) separated by *sep* (e.g. `*word %
|
||||||
zero or more comma-separated words)
|
","` matches zero or more comma-separated words)
|
||||||
|
|
||||||
**..** *pat*
|
`..` *pat*
|
||||||
: Any text (except newlines) up to and including *pat*
|
: Any text (except newlines) up to and including *pat*
|
||||||
|
|
||||||
**.. %** *skip* *pat*
|
`.. %` *skip* *pat*
|
||||||
: Any text (except newlines) up to and including *pat*, skipping over
|
: Any text (except newlines) up to and including *pat*, skipping over instances
|
||||||
instances of *skip* (e.g. **\`\"..\`\" % (\`\\.)**)
|
of *skip* (e.g. `'"' ..%('\' .) '"'` opening quote, up to closing quote,
|
||||||
|
skipping over backslash followed by a single character)
|
||||||
|
|
||||||
**\<** *pat*
|
`<` *pat*
|
||||||
: Matches at the current position if *pat* matches immediately before the
|
: Matches at the current position if *pat* matches immediately before the
|
||||||
current position (lookbehind). Conceptually, you can think of this as creating
|
current position (lookbehind). Conceptually, you can think of this as creating
|
||||||
a file containing only the *N* characters immediately before the current
|
a file containing only the *N* characters immediately before the current
|
||||||
@ -209,56 +207,59 @@ position and attempting to match *pat* on that file, for all values of *N* from
|
|||||||
the minimum number of characters *pat* can match up to maximum number of
|
the minimum number of characters *pat* can match up to maximum number of
|
||||||
characters *pat* can match (or the length of the current line upto the current
|
characters *pat* can match (or the length of the current line upto the current
|
||||||
position, whichever is smaller). **Note:** For fixed-length lookbehinds, this
|
position, whichever is smaller). **Note:** For fixed-length lookbehinds, this
|
||||||
is quite efficient (e.g. **\<(100\`x)**), however this could cause performance
|
is quite efficient (e.g. `<(100 "x")`), however this could cause performance
|
||||||
problems with variable-length lookbehinds (e.g. **\<(\`x 0-100\`y)**). Also,
|
problems with variable-length lookbehinds (e.g. `<("x" 0-100"y")`). Also, it is
|
||||||
it is not advised to use **\^**, **\^\^**, **$**, or **$$** inside a lookbehind,
|
worth noting that `^`, `^^`, `$`, and `$$` all match against the edges of the
|
||||||
as they will match against the edges of the lookbehind slice.
|
slice, which may give false positives if you were expecting them to match only
|
||||||
|
against the edges file or line.
|
||||||
|
|
||||||
**\>** *pat*
|
`>` *pat*
|
||||||
: Matches *pat*, but does not consume any input (lookahead).
|
: Matches *pat*, but does not consume any input (lookahead).
|
||||||
|
|
||||||
**\@** *pat*
|
`@` *pat*
|
||||||
: Capture *pat*
|
: Capture *pat*
|
||||||
|
|
||||||
**foo**
|
`foo`
|
||||||
: The named pattern whose name is **"foo"**. Pattern names come from definitions in
|
: The named pattern whose name is **"foo"**. Pattern names come from
|
||||||
grammar files or from named captures. Pattern names may contain dashes (**-**),
|
definitions in grammar files or from named captures. Pattern names may contain
|
||||||
but not underscores (**\_**), since the underscore is used to match whitespace.
|
dashes (`-`), but not underscores (`_`), since the underscore is used to match
|
||||||
See the **GRAMMAR FILES** section for more info.
|
whitespace. See the **GRAMMAR FILES** section for more info.
|
||||||
|
|
||||||
**\@** *name* **=** *pat*
|
`@` *name* `=` *pat*
|
||||||
: Let *name* equal *pat* (named capture). Named captures can be used as
|
: Let *name* equal *pat* (named capture). Named captures can be used as
|
||||||
backreferences like so: **\@foo=word \`( foo \`)** (matches **"asdf(asdf)"** or
|
backreferences like so: `` @foo=word `( foo `) `` (matches **"asdf(asdf)"** or
|
||||||
**"baz(baz)"**, but not **"foo(baz)"**)
|
**"baz(baz)"**, but not **"foo(baz)"**)
|
||||||
|
|
||||||
*pat* **=\> \'***replacement***\'**
|
*pat* `=>` `"`*replacement*`"`
|
||||||
: Replace *pat* with *replacement*. Note: *replacement* should be a
|
: Replace *pat* with *replacement*. Note: *replacement* should be a string
|
||||||
string, and it may contain references to captured values: **\@0** (the whole of
|
(single or double quoted), and it may contain escape sequences (e.g. `\n`) or
|
||||||
*pat*), **\@1** (the first capture in *pat*), **\@***foo* (the capture
|
references to captured values: `@0` (the whole of *pat*), `@1` (the first
|
||||||
named *foo* in *pat*), etc. For example, **\@word \_ \@rest=(\*word % \_)
|
capture in *pat*), `@`*foo* (the capture named *foo* in *pat*), etc. For
|
||||||
=\> \"\@rest \@1\"**
|
example, `@word _ @rest=(*word % _) => "@rest:\n\t@1"` matches a word followed
|
||||||
|
by whitespace, followed by a series of words and replaces it with the series
|
||||||
|
of words, a colon, a newline, a tab, and then the first word.
|
||||||
|
|
||||||
*pat1* **~** *pat2*
|
*pat1* `~` *pat2*
|
||||||
: Matches when *pat1* matches and *pat2* can be found within the text of that
|
: Matches when *pat1* matches and *pat2* can be found within the text of that
|
||||||
match. (e.g. **comment ~ {TODO}** matches comments that contain the word
|
match. (e.g. `comment ~ {TODO}` matches comments that contain the word
|
||||||
**"TODO"**)
|
**"TODO"**)
|
||||||
|
|
||||||
*pat1* **!~** *pat2*
|
*pat1* `!~` *pat2*
|
||||||
: Matches when *pat1* matches, but *pat2* can not be found within the text of
|
: Matches when *pat1* matches, but *pat2* can not be found within the text of
|
||||||
that match. (e.g. **comment ~ {IGNORE}** matches only comments that do not
|
that match. (e.g. `comment ~ {IGNORE}` matches only comments that do not
|
||||||
contain the word **"IGNORE"**)
|
contain the word **"IGNORE"**)
|
||||||
|
|
||||||
*name***:** *pat*
|
*name*`:` *pat*
|
||||||
: Define *name* to mean *pat* (pattern definition)
|
: Define *name* to mean *pat* (pattern definition)
|
||||||
|
|
||||||
**(!)** *error-pat*
|
`(!)` *error-pat*
|
||||||
: If *error-pat* matches, **bp** will not print any results in this file and
|
: If *error-pat* matches, **bp** will not print any results in this file and
|
||||||
instead print an error message to **STDERR** highlighting the matching position
|
instead print an error message to **STDERR** highlighting the matching position
|
||||||
of *error-pat* in the file and printing the text of *error-pat* as an error
|
of *error-pat* in the file and printing the text of *error-pat* as an error
|
||||||
message. Then, **bp** will exit with a failure status and not process any
|
message. Then, **bp** will exit with a failure status and not process any
|
||||||
further files.
|
further files.
|
||||||
|
|
||||||
**\#** *comment*
|
`#` *comment*
|
||||||
: A line comment
|
: A line comment
|
||||||
|
|
||||||
|
|
||||||
@ -277,30 +278,36 @@ bp -p '"my_func" parens'
|
|||||||
**bp** also comes with a few grammar files for common programming languages,
|
**bp** also comes with a few grammar files for common programming languages,
|
||||||
which may be loaded on demand. These grammar files are not comprehensive syntax
|
which may be loaded on demand. These grammar files are not comprehensive syntax
|
||||||
definitions, but only some common patterns. For example, the c++ grammar file
|
definitions, but only some common patterns. For example, the c++ grammar file
|
||||||
contains definitions for **//**-style line comments as well as
|
contains definitions for `//`-style line comments as well as `/*...*/`-style
|
||||||
**/\*...\*/**-style block comments. Thus, you can find all comments with the
|
block comments. Thus, you can find all comments with the word "TODO" with the
|
||||||
word "TODO" with the following command:
|
following command:
|
||||||
|
|
||||||
```
|
```
|
||||||
bp -g c++ -p 'comment~{TODO}' *.cpp
|
bp -g c++ -p 'comment ~ {TODO}' *.cpp
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
# EXAMPLES
|
# EXAMPLES
|
||||||
|
|
||||||
**ls \| bp foo**
|
Find files containing the string "foo" (a string pattern):
|
||||||
: Find files containing the string \"foo\" (a string pattern)
|
```
|
||||||
|
ls | bp foo
|
||||||
|
```
|
||||||
|
|
||||||
**ls \| bp \'.c\\\$\' -r \'.h\'**
|
Find files ending with ".c" and print the name with the ".c" replaced with ".h":
|
||||||
: Find files ending with \".c\" and replace the extension with \".h\"
|
```
|
||||||
|
ls | bp '.c\$' -r '.h'
|
||||||
|
```
|
||||||
|
|
||||||
**bp -p \'{foobar} parens\' my_file.py**
|
Find the word "foobar", followed by a pair of matching parentheses in the file
|
||||||
: Find the word **\"foobar\"**, followed by a pair of matching parentheses in
|
*my_file.py*:
|
||||||
the file *my_file.py*
|
```
|
||||||
|
bp -p '{foobar} parens' my_file.py
|
||||||
|
```
|
||||||
|
|
||||||
**bp -g html -p \'element ~ (^^\"\<a \")\' foo.html**
|
Using the *html* grammar, find all *element*s matching the tag *a* in the file
|
||||||
: Using the *html* grammar, find all *element*s matching the tag *a* in the
|
*foo.html*:
|
||||||
file *foo.html*
|
```
|
||||||
|
bp -g html -p 'element ~ (^^"<a ")' foo.html
|
||||||
|
```
|
||||||
|
|
||||||
**bp -g python -p \'comment~{TODO}\' \*.py**
|
|
||||||
: Find all comments with the word **"TODO"** in local python files.
|
|
||||||
|
Loading…
Reference in New Issue
Block a user