Added pandoc lua filter to make code literals work better with manpages.
This commit is contained in:
parent
315aedc7cb
commit
aa1faea83c
9
.pandoc/bold-code.lua
Normal file
9
.pandoc/bold-code.lua
Normal file
@ -0,0 +1,9 @@
|
||||
-- Convert code to bold
|
||||
function Code(el)
|
||||
return pandoc.Strong(el.text)
|
||||
end
|
||||
|
||||
-- Convert code blocks to bold and indented
|
||||
function CodeBlock(el)
|
||||
return pandoc.BlockQuote({pandoc.Para(pandoc.Strong(el.text))})
|
||||
end
|
2
Makefile
2
Makefile
@ -21,7 +21,7 @@ $(NAME): $(OBJFILES) bp.c
|
||||
$(CC) $(ALL_FLAGS) -o $@ $(OBJFILES) bp.c
|
||||
|
||||
bp.1: bp.1.md
|
||||
pandoc -s $< -t man -o $@
|
||||
pandoc --lua-filter=.pandoc/bold-code.lua -s $< -t man -o $@
|
||||
|
||||
tags: $(CFILES) bp.c
|
||||
ctags *.c *.h
|
||||
|
140
bp.1
140
bp.1
@ -7,7 +7,7 @@
|
||||
bp - Bruce\[aq]s Parsing Expression Grammar tool
|
||||
.SH SYNOPSIS
|
||||
.PP
|
||||
\f[B]bp\f[R] [\f[I]options\&...\f[R]] \f[I]pattern\f[R] [[--]
|
||||
\f[B]bp\f[R] [\f[I]options\&...\f[R]] \f[I]pattern\f[R] [[\f[B]--\f[R]]
|
||||
\f[I]files\&...\f[R]]
|
||||
.SH DESCRIPTION
|
||||
.PP
|
||||
@ -60,8 +60,7 @@ instead of treated as literal files.
|
||||
\f[B]-c\f[R], \f[B]--context\f[R] \f[I]N\f[R]
|
||||
The number of lines of context to print.
|
||||
If \f[I]N\f[R] is 0, print only the exact text of the matches.
|
||||
If \f[I]N\f[R] is \f[B]\f[CB]\[dq]all\[dq]\f[B]\f[R], print the entire
|
||||
file.
|
||||
If \f[I]N\f[R] is \f[B]\[lq]all\[rq]\f[R], print the entire file.
|
||||
Otherwise, if \f[I]N\f[R] is a positive integer, print the whole line on
|
||||
which matches occur, as well as the \f[I]N-1\f[R] lines before and after
|
||||
the match.
|
||||
@ -97,7 +96,7 @@ with one or two patterns.
|
||||
The default mode for bp patterns is \[lq]string pattern mode\[rq].
|
||||
In string pattern mode, all characters are interpreted literally except
|
||||
for the backslash (\f[B]\[rs]\f[R]), which may be followed by a bp
|
||||
pattern (see the \f[B]PATTERNS\f[R] section above).
|
||||
pattern (see the \f[B]PATTERNS\f[R] section below).
|
||||
Optionally, the bp pattern may be terminated by a semicolon
|
||||
(\f[B];\f[R]).
|
||||
.SH PATTERNS
|
||||
@ -107,9 +106,12 @@ Expression Grammars and regular expression syntax.
|
||||
The syntax is designed to map closely to verbal descriptions of the
|
||||
patterns, and prefix operators are preferred over suffix operators (as
|
||||
is common in regex syntax).
|
||||
.PP
|
||||
Some patterns additionally have \[lq]multi-line\[rq] variants, which
|
||||
means that they include the newline character.
|
||||
Patterns are whitespace-agnostic, so they work the same regardless of
|
||||
whether whitespace is present or not, except for string literals
|
||||
(\f[B]\[aq]...\[aq]\f[R] and \f[B]\[dq]...\[dq]\f[R]), character
|
||||
literals (\f[B]\[ga]\f[R]), and escape sequences (\f[B]\[rs]\f[R]).
|
||||
Whitespace between patterns or parts of a pattern should be used for
|
||||
clarity, but it will not affect the meaning of the pattern.
|
||||
.TP
|
||||
\f[I]pat1 pat2\f[R]
|
||||
A sequence: \f[I]pat1\f[R] followed by \f[I]pat2\f[R]
|
||||
@ -155,8 +157,8 @@ either end.
|
||||
Escape sequences are not allowed.
|
||||
.TP
|
||||
\f[B]\[ga]\f[R]\f[I]c\f[R]
|
||||
The literal character \f[I]c\f[R] (e.g.\ **\[ga]\[at]** matches the
|
||||
\[lq]\[at]\[rq] character)
|
||||
The literal character \f[I]c\f[R] (e.g.\ \f[B]\[ga]\[at]\f[R] matches
|
||||
the \[lq]\[at]\[rq] character)
|
||||
.TP
|
||||
\f[B]\[ga]\f[R]\f[I]c1\f[R]\f[B],\f[R]\f[I]c2\f[R]
|
||||
The literal character \f[I]c1\f[R] or \f[I]c2\f[R]
|
||||
@ -188,40 +190,43 @@ Not \f[I]pat\f[R]
|
||||
Maybe \f[I]pat\f[R]
|
||||
.TP
|
||||
\f[I]N\f[R] \f[I]pat\f[R]
|
||||
Exactly \f[I]N\f[R] repetitions of \f[I]pat\f[R] (e.g.\ \f[B]5
|
||||
\[ga]x\f[R] matches \f[B]\[lq]xxxxx\[rq]\f[R])
|
||||
Exactly \f[I]N\f[R] repetitions of \f[I]pat\f[R]
|
||||
(e.g.\ \f[B]5 \[dq]x\[dq]\f[R] matches \f[B]\[lq]xxxxx\[rq]\f[R])
|
||||
.TP
|
||||
\f[I]N\f[R] \f[B]-\f[R] \f[I]M\f[R] \f[I]pat\f[R]
|
||||
Between \f[I]N\f[R] and \f[I]M\f[R] repetitions of \f[I]pat\f[R]
|
||||
(e.g.\ \f[B]2-3 \[ga]x\f[R] matches \f[B]\[lq]xx\[rq]\f[R] or
|
||||
(e.g.\ \f[B]2-3 \[dq]x\[dq]\f[R] matches \f[B]\[lq]xx\[rq]\f[R] or
|
||||
\f[B]\[lq]xxx\[rq]\f[R])
|
||||
.TP
|
||||
\f[I]N\f[R]\f[B]+\f[R] \f[I]pat\f[R]
|
||||
At least \f[I]N\f[R] or more repetitions of \f[I]pat\f[R] (e.g.\ \f[B]2+
|
||||
\[ga]x\f[R] matches \f[B]\[lq]xx\[rq]\f[R], \f[B]\[lq]xxx\[rq]\f[R],
|
||||
\f[B]\[lq]xxxx\[rq]\f[R], etc.)
|
||||
At least \f[I]N\f[R] or more repetitions of \f[I]pat\f[R]
|
||||
(e.g.\ \f[B]2+ \[dq]x\[dq]\f[R] matches \f[B]\[lq]xx\[rq]\f[R],
|
||||
\f[B]\[lq]xxx\[rq]\f[R], \f[B]\[lq]xxxx\[rq]\f[R], etc.)
|
||||
.TP
|
||||
\f[B]*\f[R] \f[I]pat\f[R]
|
||||
Some \f[I]pat\f[R]s (zero or more, e.g.\ \f[B]* \[ga]x\f[R] matches
|
||||
Some \f[I]pat\f[R]s (zero or more, e.g.\ \f[B]* \[dq]x\[dq]\f[R] matches
|
||||
\f[B]\[dq]\[lq]\f[R], \f[B]\[rq]x\[lq]\f[R], \f[B]\[rq]xx\[dq]\f[R],
|
||||
etc.)
|
||||
.TP
|
||||
\f[B]+\f[R] \f[I]pat\f[R]
|
||||
At least one \f[I]pat\f[R]s (e.g.\ \f[B]+ \[ga]x\f[R] matches
|
||||
At least one \f[I]pat\f[R]s (e.g.\ \f[B]+ \[dq]x\[dq]\f[R] matches
|
||||
\f[B]\[lq]x\[rq]\f[R], \f[B]\[lq]xx\[rq]\f[R], \f[B]\[lq]xxx\[rq]\f[R],
|
||||
etc.)
|
||||
.TP
|
||||
\f[I]repeating-pat\f[R] \f[B]%\f[R] \f[I]sep\f[R]
|
||||
\f[I]repeating-pat\f[R] separated by \f[I]sep\f[R] (e.g.\ \f[B]*word %
|
||||
\[ga],\f[R] matches zero or more comma-separated words)
|
||||
\f[I]repeating-pat\f[R] (see the examples above) separated by
|
||||
\f[I]sep\f[R] (e.g.\ \f[B]*word % \[dq],\[dq]\f[R] matches zero or more
|
||||
comma-separated words)
|
||||
.TP
|
||||
\f[B]..\f[R] \f[I]pat\f[R]
|
||||
Any text (except newlines) up to and including \f[I]pat\f[R]
|
||||
.TP
|
||||
\f[B].. %\f[R] \f[I]skip\f[R] \f[I]pat\f[R]
|
||||
Any text (except newlines) up to and including \f[I]pat\f[R], skipping
|
||||
over instances of \f[I]skip\f[R] (e.g.\ \f[B]\[ga]\[dq]..\[ga]\[dq] %
|
||||
(\[ga]\[rs].)\f[R])
|
||||
over instances of \f[I]skip\f[R]
|
||||
(e.g.\ \f[B]\[aq]\[dq]\[aq] ..%(\[aq]\[rs]\[aq] .) \[aq]\[dq]\[aq]\f[R]
|
||||
opening quote, up to closing quote, skipping over backslash followed by
|
||||
a single character)
|
||||
.TP
|
||||
\f[B]<\f[R] \f[I]pat\f[R]
|
||||
Matches at the current position if \f[I]pat\f[R] matches immediately
|
||||
@ -234,13 +239,13 @@ match up to maximum number of characters \f[I]pat\f[R] can match (or the
|
||||
length of the current line upto the current position, whichever is
|
||||
smaller).
|
||||
\f[B]Note:\f[R] For fixed-length lookbehinds, this is quite efficient
|
||||
(e.g.\ \f[B]<(100\[ga]x)\f[R]), however this could cause performance
|
||||
problems with variable-length lookbehinds (e.g.\ \f[B]<(\[ga]x
|
||||
0-100\[ga]y)\f[R]).
|
||||
Also, it is not advised to use \f[B]\[ha]\f[R], \f[B]\[ha]\[ha]\f[R],
|
||||
\f[B]\[u2005]*\[u2005]*,\f[BI]o\f[B]\f[BI]r\f[B]\[u2005]*\[u2005]*$\f[R]
|
||||
inside a lookbehind, as they will match against the edges of the
|
||||
lookbehind slice.
|
||||
(e.g.\ \f[B]<(100 \[dq]x\[dq])\f[R]), however this could cause
|
||||
performance problems with variable-length lookbehinds
|
||||
(e.g.\ \f[B]<(\[dq]x\[dq] 0-100\[dq]y\[dq])\f[R]).
|
||||
Also, it is worth noting that \f[B]\[ha]\f[R], \f[B]\[ha]\[ha]\f[R],
|
||||
\f[B]$\f[R], and \f[B]$$\f[R] all match against the edges of the slice,
|
||||
which may give false positives if you were expecting them to match only
|
||||
against the edges file or line.
|
||||
.TP
|
||||
\f[B]>\f[R] \f[I]pat\f[R]
|
||||
Matches \f[I]pat\f[R], but does not consume any input (lookahead).
|
||||
@ -258,19 +263,24 @@ See the \f[B]GRAMMAR FILES\f[R] section for more info.
|
||||
.TP
|
||||
\f[B]\[at]\f[R] \f[I]name\f[R] \f[B]=\f[R] \f[I]pat\f[R]
|
||||
Let \f[I]name\f[R] equal \f[I]pat\f[R] (named capture).
|
||||
Named captures can be used as backreferences like so: \f[B]\[at]foo=word
|
||||
\[ga]( foo \[ga])\f[R] (matches \f[B]\[lq]asdf(asdf)\[rq]\f[R] or
|
||||
\f[B]\[lq]baz(baz)\[rq]\f[R], but not \f[B]\[lq]foo(baz)\[rq]\f[R])
|
||||
Named captures can be used as backreferences like so:
|
||||
\f[B]\[at]foo=word \[ga]( foo \[ga])\f[R] (matches
|
||||
\f[B]\[lq]asdf(asdf)\[rq]\f[R] or \f[B]\[lq]baz(baz)\[rq]\f[R], but not
|
||||
\f[B]\[lq]foo(baz)\[rq]\f[R])
|
||||
.TP
|
||||
\f[I]pat\f[R] \f[B]=> \[aq]\f[R]\f[I]replacement\f[R]\f[B]\[aq]\f[R]
|
||||
\f[I]pat\f[R] \f[B]=>\f[R] \f[B]\[dq]\f[R]\f[I]replacement\f[R]\f[B]\[dq]\f[R]
|
||||
Replace \f[I]pat\f[R] with \f[I]replacement\f[R].
|
||||
Note: \f[I]replacement\f[R] should be a string, and it may contain
|
||||
Note: \f[I]replacement\f[R] should be a string (single or double
|
||||
quoted), and it may contain escape sequences (e.g.\ \f[B]\[rs]n\f[R]) or
|
||||
references to captured values: \f[B]\[at]0\f[R] (the whole of
|
||||
\f[I]pat\f[R]), \f[B]\[at]1\f[R] (the first capture in \f[I]pat\f[R]),
|
||||
\f[B]\[at]\f[R]\f[I]foo\f[R] (the capture named \f[I]foo\f[R] in
|
||||
\f[I]pat\f[R]), etc.
|
||||
For example, \f[B]\[at]word _ \[at]rest=(*word % _) => \[dq]\[at]rest
|
||||
\[at]1\[dq]\f[R]
|
||||
For example,
|
||||
\f[B]\[at]word _ \[at]rest=(*word % _) => \[dq]\[at]rest:\[rs]n\[rs]t\[at]1\[dq]\f[R]
|
||||
matches a word followed by whitespace, followed by a series of words and
|
||||
replaces it with the series of words, a colon, a newline, a tab, and
|
||||
then the first word.
|
||||
.TP
|
||||
\f[I]pat1\f[R] \f[B]\[ti]\f[R] \f[I]pat2\f[R]
|
||||
Matches when \f[I]pat1\f[R] matches and \f[I]pat2\f[R] can be found
|
||||
@ -305,47 +315,51 @@ The \f[B]builtins\f[R] grammar file is loaded by default, and it defines
|
||||
a few useful general-purpose patterns.
|
||||
For example, it defines the \f[B]parens\f[R] rule, which matches pairs
|
||||
of matching parentheses, accounting for nested inner parentheses:
|
||||
.IP
|
||||
.nf
|
||||
\f[C]
|
||||
bp -p \[aq]\[dq]my_func\[dq] parens\[aq]
|
||||
\f[R]
|
||||
.fi
|
||||
.RS
|
||||
.PP
|
||||
\f[B]bp -p \[aq]\[dq]my_func\[dq] parens\[aq]\f[R]
|
||||
.RE
|
||||
.PP
|
||||
\f[B]bp\f[R] also comes with a few grammar files for common programming
|
||||
languages, which may be loaded on demand.
|
||||
These grammar files are not comprehensive syntax definitions, but only
|
||||
some common patterns.
|
||||
For example, the c++ grammar file contains definitions for
|
||||
\f[B]//\f[R]-style line comments as well as \f[B]/*\&...*/\f[R]-style
|
||||
\f[B]//\f[R]-style line comments as well as \f[B]/*...*/\f[R]-style
|
||||
block comments.
|
||||
Thus, you can find all comments with the word \[lq]TODO\[rq] with the
|
||||
following command:
|
||||
.IP
|
||||
.nf
|
||||
\f[C]
|
||||
bp -g c++ -p \[aq]comment\[ti]{TODO}\[aq] *.cpp
|
||||
\f[R]
|
||||
.fi
|
||||
.RS
|
||||
.PP
|
||||
\f[B]bp -g c++ -p \[aq]comment \[ti] {TODO}\[aq] *.cpp\f[R]
|
||||
.RE
|
||||
.SH EXAMPLES
|
||||
.TP
|
||||
.PP
|
||||
Find files containing the string \[lq]foo\[rq] (a string pattern):
|
||||
.RS
|
||||
.PP
|
||||
\f[B]ls | bp foo\f[R]
|
||||
Find files containing the string \[dq]foo\[dq] (a string pattern)
|
||||
.TP
|
||||
.RE
|
||||
.PP
|
||||
Find files ending with \[lq].c\[rq] and print the name with the
|
||||
\[lq].c\[rq] replaced with \[lq].h\[rq]:
|
||||
.RS
|
||||
.PP
|
||||
\f[B]ls | bp \[aq].c\[rs]$\[aq] -r \[aq].h\[aq]\f[R]
|
||||
Find files ending with \[dq].c\[dq] and replace the extension with
|
||||
\[dq].h\[dq]
|
||||
.TP
|
||||
.RE
|
||||
.PP
|
||||
Find the word \[lq]foobar\[rq], followed by a pair of matching
|
||||
parentheses in the file \f[I]my_file.py\f[R]:
|
||||
.RS
|
||||
.PP
|
||||
\f[B]bp -p \[aq]{foobar} parens\[aq] my_file.py\f[R]
|
||||
Find the word \f[B]\[dq]foobar\[dq]\f[R], followed by a pair of matching
|
||||
parentheses in the file \f[I]my_file.py\f[R]
|
||||
.TP
|
||||
\f[B]bp -g html -p \[aq]element \[ti] (\[ha]\[ha]\[dq]<a \[dq])\[aq] foo.html\f[R]
|
||||
.RE
|
||||
.PP
|
||||
Using the \f[I]html\f[R] grammar, find all \f[I]element\f[R]s matching
|
||||
the tag \f[I]a\f[R] in the file \f[I]foo.html\f[R]
|
||||
.TP
|
||||
\f[B]bp -g python -p \[aq]comment\[ti]{TODO}\[aq] *.py\f[R]
|
||||
Find all comments with the word \f[B]\[lq]TODO\[rq]\f[R] in local python
|
||||
files.
|
||||
the tag \f[I]a\f[R] in the file \f[I]foo.html\f[R]:
|
||||
.RS
|
||||
.PP
|
||||
\f[B]bp -g html -p \[aq]element \[ti] (\[ha]\[ha]\[dq]<a \[dq])\[aq] foo.html\f[R]
|
||||
.RE
|
||||
.SH AUTHORS
|
||||
Bruce Hill (\f[I]bruce\[at]bruce-hill.com\f[R]).
|
||||
|
251
bp.1.md
251
bp.1.md
@ -8,71 +8,68 @@ bp - Bruce\'s Parsing Expression Grammar tool
|
||||
|
||||
# SYNOPSIS
|
||||
|
||||
**bp**
|
||||
\[*options...*\]
|
||||
*pattern*
|
||||
\[\[\--\] *files...*\]
|
||||
`bp` \[*options...*\] *pattern* \[\[`--`\] *files...*\]
|
||||
|
||||
# DESCRIPTION
|
||||
|
||||
**bp** is a tool that matches parsing expression grammars using a custom
|
||||
`bp` is a tool that matches parsing expression grammars using a custom
|
||||
syntax.
|
||||
|
||||
# OPTIONS
|
||||
|
||||
**-v**, **\--verbose**
|
||||
`-v`, `--verbose`
|
||||
: Print debugging information.
|
||||
|
||||
**-e**, **\--explain**
|
||||
`-e`, `--explain`
|
||||
: Print a visual explanation of the matches.
|
||||
|
||||
**-j**, **\--json**
|
||||
: Print a JSON list of the matches. (Pairs with **\--verbose** for more detail)
|
||||
`-j`, `--json`
|
||||
: Print a JSON list of the matches. (Pairs with `--verbose` for more detail)
|
||||
|
||||
**-l**, **\--list-files**
|
||||
`-l`, `--list-files`
|
||||
: Print only the names of files containing matches instead of the matches
|
||||
themselves.
|
||||
|
||||
**-i**, **\--ignore-case**
|
||||
`-i`, `--ignore-case`
|
||||
: Perform pattern matching case-insensitively.
|
||||
|
||||
**-I**, **\--inplace**
|
||||
`-I`, `--inplace`
|
||||
: Perform filtering or replacement in-place (i.e. overwrite files with new
|
||||
content).
|
||||
|
||||
**-C**, **\--confirm**
|
||||
`-C`, `--confirm`
|
||||
: During in-place modification of a file, confirm before each modification.
|
||||
|
||||
**-r**, **\--replace** *replacement*
|
||||
`-r`, `--replace` *replacement*
|
||||
: Replace all occurrences of the main pattern with the given string.
|
||||
|
||||
**-s**, **\--skip** *pattern*
|
||||
`-s`, `--skip` *pattern*
|
||||
: While looking for matches, skip over *pattern* occurrences. This can be
|
||||
useful for behavior like **bp -s string** (avoiding matches inside string
|
||||
useful for behavior like `bp -s string` (avoiding matches inside string
|
||||
literals).
|
||||
|
||||
**-g**, **\--grammar** *grammar-file*
|
||||
: Load the grammar from the given file. See the **GRAMMAR FILES** section
|
||||
`-g`, `--grammar` *grammar-file*
|
||||
: Load the grammar from the given file. See the `GRAMMAR FILES` section
|
||||
for more info.
|
||||
|
||||
**-G**, **\--git**
|
||||
: Use **git** to get a list of files. Remaining file arguments (if any) are
|
||||
passed to **git \--ls-files** instead of treated as literal files.
|
||||
`-G`, `--git`
|
||||
: Use `git` to get a list of files. Remaining file arguments (if any) are
|
||||
passed to `git --ls-files` instead of treated as literal files.
|
||||
|
||||
**-c**, **\--context** *N*
|
||||
`-c`, `--context` *N*
|
||||
: The number of lines of context to print. If *N* is 0, print only the
|
||||
exact text of the matches. If *N* is **`"all"`**, print the entire file.
|
||||
exact text of the matches. If *N* is **"all"**, print the entire file.
|
||||
Otherwise, if *N* is a positive integer, print the whole line on which
|
||||
matches occur, as well as the *N-1* lines before and after the match. The
|
||||
default value for this argument is **1** (print whole lines where matches
|
||||
occur).
|
||||
|
||||
**-f**, **\--format** *auto*\|*fancy*\|*plain*
|
||||
`-f`, `--format` *auto*\|*fancy*\|*plain*
|
||||
: Set the output format. *fancy* includes colors and line numbers, *plain*
|
||||
includes neither, and *auto* (the default) uses *fancy* formatting only when
|
||||
the output is a TTY.
|
||||
|
||||
**\--help**
|
||||
`--help`
|
||||
: Print the usage and exit.
|
||||
|
||||
*pattern*
|
||||
@ -81,7 +78,7 @@ pattern (see the **STRING PATTERNS** section below).
|
||||
|
||||
*files...*
|
||||
: The input files to search. If no input files are provided and data was piped
|
||||
in, that data will be used instead. If neither are provided, **bp** will search
|
||||
in, that data will be used instead. If neither are provided, `bp` will search
|
||||
through all files in the current directory and its subdirectories
|
||||
(recursively).
|
||||
|
||||
@ -90,118 +87,119 @@ through all files in the current directory and its subdirectories
|
||||
|
||||
One of the most common use cases for pattern matching tools is matching plain,
|
||||
literal strings, or strings that are primarily plain strings, with one or two
|
||||
patterns. **bp** is designed around this fact. The default mode for bp patterns
|
||||
patterns. `bp` is designed around this fact. The default mode for bp patterns
|
||||
is "string pattern mode". In string pattern mode, all characters are
|
||||
interpreted literally except for the backslash (**\\**), which may be followed
|
||||
by a bp pattern (see the **PATTERNS** section above). Optionally, the bp
|
||||
pattern may be terminated by a semicolon (**;**).
|
||||
interpreted literally except for the backslash (`\`), which may be followed by
|
||||
a bp pattern (see the **PATTERNS** section below). Optionally, the bp pattern
|
||||
may be terminated by a semicolon (`;`).
|
||||
|
||||
|
||||
# PATTERNS
|
||||
|
||||
**bp** patterns are based off of a combination of Parsing Expression Grammars
|
||||
and regular expression syntax. The syntax is designed to map closely to verbal
|
||||
`bp` patterns are based off of a combination of Parsing Expression Grammars and
|
||||
regular expression syntax. The syntax is designed to map closely to verbal
|
||||
descriptions of the patterns, and prefix operators are preferred over suffix
|
||||
operators (as is common in regex syntax).
|
||||
|
||||
Some patterns additionally have "multi-line" variants, which means that they
|
||||
include the newline character.
|
||||
operators (as is common in regex syntax). Patterns are whitespace-agnostic, so
|
||||
they work the same regardless of whether whitespace is present or not, except
|
||||
for string literals (`'...'` and `"..."`), character literals (`` ` ``), and
|
||||
escape sequences (`\`). Whitespace between patterns or parts of a pattern
|
||||
should be used for clarity, but it will not affect the meaning of the pattern.
|
||||
|
||||
*pat1 pat2*
|
||||
: A sequence: *pat1* followed by *pat2*
|
||||
|
||||
*pat1* **/** *pat2*
|
||||
*pat1* `/` *pat2*
|
||||
: A choice: *pat1*, or if it doesn\'t match, then *pat2*
|
||||
|
||||
**.**
|
||||
`.`
|
||||
: Any character (excluding newline)
|
||||
|
||||
**\^**
|
||||
`^`
|
||||
: Start of a line
|
||||
|
||||
**\^\^**
|
||||
`^^`
|
||||
: Start of the text
|
||||
|
||||
**\$**
|
||||
`$`
|
||||
: End of a line (does not include newline character)
|
||||
|
||||
**\$\$**
|
||||
`$$`
|
||||
: End of the text
|
||||
|
||||
**\_**
|
||||
`_`
|
||||
: Zero or more whitespace characters, including spaces and tabs, but not
|
||||
newlines.
|
||||
|
||||
**\_\_**
|
||||
`__`
|
||||
: Zero or more whitespace characters, including spaces, tabs, newlines, and
|
||||
comments. Comments are undefined by default, but may be defined by a separate
|
||||
grammar file. See the **GRAMMAR FILES** section for more info.
|
||||
|
||||
**\"foo\"**, **\'foo\'**
|
||||
`"foo"`, `'foo'`
|
||||
: The literal string **"foo"**. Single and double quotes are treated the same.
|
||||
Escape sequences are not allowed.
|
||||
|
||||
**{foo}**
|
||||
`{foo}`
|
||||
: The literal string **"foo"** with word boundaries on either end. Escape
|
||||
sequences are not allowed.
|
||||
|
||||
**\`***c*
|
||||
: The literal character *c* (e.g. **\`@** matches the "@" character)
|
||||
`` ` ``*c*
|
||||
: The literal character *c* (e.g. `` `@ `` matches the "@" character)
|
||||
|
||||
**\`***c1***,***c2*
|
||||
: The literal character *c1* or *c2* (e.g. **\`a,e,i,o,u**)
|
||||
`` ` ``*c1*`,`*c2*
|
||||
: The literal character *c1* or *c2* (e.g. `` `a,e,i,o,u ``)
|
||||
|
||||
**\`***c1***-***c2*
|
||||
: The character range *c1* to *c2* (e.g. **\`a-z**). Multiple ranges
|
||||
can be combined with a comma (e.g. **\`a-z,A-Z**).
|
||||
`` ` ``*c1*`-`*c2*
|
||||
: The character range *c1* to *c2* (e.g. `` `a-z ``). Multiple ranges
|
||||
can be combined with a comma (e.g. `` `a-z,A-Z ``).
|
||||
|
||||
**\\***esc*
|
||||
: An escape sequence (e.g. **\\n**, **\\x1F**, **\\033**, etc.)
|
||||
`\`*esc*
|
||||
: An escape sequence (e.g. `\n`, `\x1F`, `\033`, etc.)
|
||||
|
||||
**\\***esc1***-***esc2*
|
||||
: An escape sequence range from *esc1* to *esc2* (e.g. **\\x00-x1F**)
|
||||
`\`*esc1*`-`*esc2*
|
||||
: An escape sequence range from *esc1* to *esc2* (e.g. `\x00-x1F`)
|
||||
|
||||
**\\N**
|
||||
`\N`
|
||||
: A special case escape that matches a "nodent": one or more newlines followed
|
||||
by the same indentation that occurs on the current line.
|
||||
|
||||
**!** *pat*
|
||||
`!` *pat*
|
||||
: Not *pat*
|
||||
|
||||
**\[** *pat* **\]**
|
||||
`[` *pat* `]`
|
||||
: Maybe *pat*
|
||||
|
||||
*N* *pat*
|
||||
: Exactly *N* repetitions of *pat* (e.g. **5 \`x** matches **"xxxxx"**)
|
||||
: Exactly *N* repetitions of *pat* (e.g. `5 "x"` matches **"xxxxx"**)
|
||||
|
||||
*N* **-** *M* *pat*
|
||||
: Between *N* and *M* repetitions of *pat* (e.g. **2-3 \`x**
|
||||
matches **"xx"** or **"xxx"**)
|
||||
*N* `-` *M* *pat*
|
||||
: Between *N* and *M* repetitions of *pat* (e.g. `2-3 "x"` matches **"xx"** or
|
||||
**"xxx"**)
|
||||
|
||||
*N***+** *pat*
|
||||
: At least *N* or more repetitions of *pat* (e.g. **2+ \`x** matches
|
||||
*N*`+` *pat*
|
||||
: At least *N* or more repetitions of *pat* (e.g. `2+ "x"` matches
|
||||
**"xx"**, **"xxx"**, **"xxxx"**, etc.)
|
||||
|
||||
**\*** *pat*
|
||||
: Some *pat*s (zero or more, e.g. **\* \`x** matches **""**, **"x"**,
|
||||
**"xx"**, etc.)
|
||||
`*` *pat*
|
||||
: Some *pat*s (zero or more, e.g. `* "x"` matches **""**, **"x"**, **"xx"**,
|
||||
etc.)
|
||||
|
||||
**+** *pat*
|
||||
: At least one *pat*s (e.g. **\+ \`x** matches **"x"**, **"xx"**,
|
||||
**"xxx"**, etc.)
|
||||
`+` *pat*
|
||||
: At least one *pat*s (e.g. `+ "x"` matches **"x"**, **"xx"**, **"xxx"**, etc.)
|
||||
|
||||
*repeating-pat* **%** *sep*
|
||||
: *repeating-pat* separated by *sep* (e.g. **\*word % \`,** matches
|
||||
zero or more comma-separated words)
|
||||
*repeating-pat* `%` *sep*
|
||||
: *repeating-pat* (see the examples above) separated by *sep* (e.g. `*word %
|
||||
","` matches zero or more comma-separated words)
|
||||
|
||||
**..** *pat*
|
||||
`..` *pat*
|
||||
: Any text (except newlines) up to and including *pat*
|
||||
|
||||
**.. %** *skip* *pat*
|
||||
: Any text (except newlines) up to and including *pat*, skipping over
|
||||
instances of *skip* (e.g. **\`\"..\`\" % (\`\\.)**)
|
||||
`.. %` *skip* *pat*
|
||||
: Any text (except newlines) up to and including *pat*, skipping over instances
|
||||
of *skip* (e.g. `'"' ..%('\' .) '"'` opening quote, up to closing quote,
|
||||
skipping over backslash followed by a single character)
|
||||
|
||||
**\<** *pat*
|
||||
`<` *pat*
|
||||
: Matches at the current position if *pat* matches immediately before the
|
||||
current position (lookbehind). Conceptually, you can think of this as creating
|
||||
a file containing only the *N* characters immediately before the current
|
||||
@ -209,56 +207,59 @@ position and attempting to match *pat* on that file, for all values of *N* from
|
||||
the minimum number of characters *pat* can match up to maximum number of
|
||||
characters *pat* can match (or the length of the current line upto the current
|
||||
position, whichever is smaller). **Note:** For fixed-length lookbehinds, this
|
||||
is quite efficient (e.g. **\<(100\`x)**), however this could cause performance
|
||||
problems with variable-length lookbehinds (e.g. **\<(\`x 0-100\`y)**). Also,
|
||||
it is not advised to use **\^**, **\^\^**, **$**, or **$$** inside a lookbehind,
|
||||
as they will match against the edges of the lookbehind slice.
|
||||
is quite efficient (e.g. `<(100 "x")`), however this could cause performance
|
||||
problems with variable-length lookbehinds (e.g. `<("x" 0-100"y")`). Also, it is
|
||||
worth noting that `^`, `^^`, `$`, and `$$` all match against the edges of the
|
||||
slice, which may give false positives if you were expecting them to match only
|
||||
against the edges file or line.
|
||||
|
||||
**\>** *pat*
|
||||
`>` *pat*
|
||||
: Matches *pat*, but does not consume any input (lookahead).
|
||||
|
||||
**\@** *pat*
|
||||
`@` *pat*
|
||||
: Capture *pat*
|
||||
|
||||
**foo**
|
||||
: The named pattern whose name is **"foo"**. Pattern names come from definitions in
|
||||
grammar files or from named captures. Pattern names may contain dashes (**-**),
|
||||
but not underscores (**\_**), since the underscore is used to match whitespace.
|
||||
See the **GRAMMAR FILES** section for more info.
|
||||
`foo`
|
||||
: The named pattern whose name is **"foo"**. Pattern names come from
|
||||
definitions in grammar files or from named captures. Pattern names may contain
|
||||
dashes (`-`), but not underscores (`_`), since the underscore is used to match
|
||||
whitespace. See the **GRAMMAR FILES** section for more info.
|
||||
|
||||
**\@** *name* **=** *pat*
|
||||
`@` *name* `=` *pat*
|
||||
: Let *name* equal *pat* (named capture). Named captures can be used as
|
||||
backreferences like so: **\@foo=word \`( foo \`)** (matches **"asdf(asdf)"** or
|
||||
backreferences like so: `` @foo=word `( foo `) `` (matches **"asdf(asdf)"** or
|
||||
**"baz(baz)"**, but not **"foo(baz)"**)
|
||||
|
||||
*pat* **=\> \'***replacement***\'**
|
||||
: Replace *pat* with *replacement*. Note: *replacement* should be a
|
||||
string, and it may contain references to captured values: **\@0** (the whole of
|
||||
*pat*), **\@1** (the first capture in *pat*), **\@***foo* (the capture
|
||||
named *foo* in *pat*), etc. For example, **\@word \_ \@rest=(\*word % \_)
|
||||
=\> \"\@rest \@1\"**
|
||||
*pat* `=>` `"`*replacement*`"`
|
||||
: Replace *pat* with *replacement*. Note: *replacement* should be a string
|
||||
(single or double quoted), and it may contain escape sequences (e.g. `\n`) or
|
||||
references to captured values: `@0` (the whole of *pat*), `@1` (the first
|
||||
capture in *pat*), `@`*foo* (the capture named *foo* in *pat*), etc. For
|
||||
example, `@word _ @rest=(*word % _) => "@rest:\n\t@1"` matches a word followed
|
||||
by whitespace, followed by a series of words and replaces it with the series
|
||||
of words, a colon, a newline, a tab, and then the first word.
|
||||
|
||||
*pat1* **~** *pat2*
|
||||
*pat1* `~` *pat2*
|
||||
: Matches when *pat1* matches and *pat2* can be found within the text of that
|
||||
match. (e.g. **comment ~ {TODO}** matches comments that contain the word
|
||||
match. (e.g. `comment ~ {TODO}` matches comments that contain the word
|
||||
**"TODO"**)
|
||||
|
||||
*pat1* **!~** *pat2*
|
||||
*pat1* `!~` *pat2*
|
||||
: Matches when *pat1* matches, but *pat2* can not be found within the text of
|
||||
that match. (e.g. **comment ~ {IGNORE}** matches only comments that do not
|
||||
that match. (e.g. `comment ~ {IGNORE}` matches only comments that do not
|
||||
contain the word **"IGNORE"**)
|
||||
|
||||
*name***:** *pat*
|
||||
*name*`:` *pat*
|
||||
: Define *name* to mean *pat* (pattern definition)
|
||||
|
||||
**(!)** *error-pat*
|
||||
`(!)` *error-pat*
|
||||
: If *error-pat* matches, **bp** will not print any results in this file and
|
||||
instead print an error message to **STDERR** highlighting the matching position
|
||||
of *error-pat* in the file and printing the text of *error-pat* as an error
|
||||
message. Then, **bp** will exit with a failure status and not process any
|
||||
further files.
|
||||
|
||||
**\#** *comment*
|
||||
`#` *comment*
|
||||
: A line comment
|
||||
|
||||
|
||||
@ -277,30 +278,36 @@ bp -p '"my_func" parens'
|
||||
**bp** also comes with a few grammar files for common programming languages,
|
||||
which may be loaded on demand. These grammar files are not comprehensive syntax
|
||||
definitions, but only some common patterns. For example, the c++ grammar file
|
||||
contains definitions for **//**-style line comments as well as
|
||||
**/\*...\*/**-style block comments. Thus, you can find all comments with the
|
||||
word "TODO" with the following command:
|
||||
contains definitions for `//`-style line comments as well as `/*...*/`-style
|
||||
block comments. Thus, you can find all comments with the word "TODO" with the
|
||||
following command:
|
||||
|
||||
```
|
||||
bp -g c++ -p 'comment~{TODO}' *.cpp
|
||||
bp -g c++ -p 'comment ~ {TODO}' *.cpp
|
||||
```
|
||||
|
||||
|
||||
# EXAMPLES
|
||||
|
||||
**ls \| bp foo**
|
||||
: Find files containing the string \"foo\" (a string pattern)
|
||||
Find files containing the string "foo" (a string pattern):
|
||||
```
|
||||
ls | bp foo
|
||||
```
|
||||
|
||||
**ls \| bp \'.c\\\$\' -r \'.h\'**
|
||||
: Find files ending with \".c\" and replace the extension with \".h\"
|
||||
Find files ending with ".c" and print the name with the ".c" replaced with ".h":
|
||||
```
|
||||
ls | bp '.c\$' -r '.h'
|
||||
```
|
||||
|
||||
**bp -p \'{foobar} parens\' my_file.py**
|
||||
: Find the word **\"foobar\"**, followed by a pair of matching parentheses in
|
||||
the file *my_file.py*
|
||||
Find the word "foobar", followed by a pair of matching parentheses in the file
|
||||
*my_file.py*:
|
||||
```
|
||||
bp -p '{foobar} parens' my_file.py
|
||||
```
|
||||
|
||||
**bp -g html -p \'element ~ (^^\"\<a \")\' foo.html**
|
||||
: Using the *html* grammar, find all *element*s matching the tag *a* in the
|
||||
file *foo.html*
|
||||
Using the *html* grammar, find all *element*s matching the tag *a* in the file
|
||||
*foo.html*:
|
||||
```
|
||||
bp -g html -p 'element ~ (^^"<a ")' foo.html
|
||||
```
|
||||
|
||||
**bp -g python -p \'comment~{TODO}\' \*.py**
|
||||
: Find all comments with the word **"TODO"** in local python files.
|
||||
|
Loading…
Reference in New Issue
Block a user