2020-12-12 16:31:53 -08:00
|
|
|
.\" Manpage for bp.
|
2020-09-11 01:54:26 -07:00
|
|
|
.\" Contact bruce@bruce-hill.com to correct errors or typos.
|
2020-12-12 16:31:53 -08:00
|
|
|
.TH man 1 "Sep 12, 2020" "0.1" "bp manual page"
|
2020-09-11 01:54:26 -07:00
|
|
|
.SH NAME
|
2020-12-12 16:31:53 -08:00
|
|
|
bp \- Bruce's Parsing Expression Grammar tool
|
2020-09-11 01:54:26 -07:00
|
|
|
.SH SYNOPSIS
|
2020-12-12 16:31:53 -08:00
|
|
|
.B bp
|
2020-09-11 01:54:26 -07:00
|
|
|
[\fI-h\fR|\fI--help\fR]
|
|
|
|
[\fI-v\fR|\fI--verbose\fR]
|
2020-12-14 22:32:47 -08:00
|
|
|
[\fI-e\fR|\fI--explain\fR]
|
|
|
|
[\fI-j\fR|\fI--json\fR]
|
2020-12-17 16:23:45 -08:00
|
|
|
[\fI-l\fR|\fI--list-files\fR]
|
2020-09-14 12:16:15 -07:00
|
|
|
[\fI-i\fR|\fI--ignore-case\fR \fI<pattern>\fR]
|
2020-12-27 19:48:52 -08:00
|
|
|
[\fI-I\fR|\fI--inplace\fR]
|
2021-01-15 18:23:18 -08:00
|
|
|
[\fI-C\fR|\fI--confirm\fR]
|
2020-09-13 23:31:38 -07:00
|
|
|
[\fI-p\fR|\fI--pattern\fR \fI<pattern>\fR]
|
2020-09-11 01:54:26 -07:00
|
|
|
[\fI-r\fR|\fI--replace\fR \fI<replacement>\fR]
|
2021-01-20 16:12:46 -08:00
|
|
|
[\fI-s\fR|\fI--skip\fR \fI<skip pattern>\fR]
|
2020-09-11 01:54:26 -07:00
|
|
|
[\fI-g\fR|\fI--grammar\fR \fI<grammar file>\fR]
|
2021-01-17 09:21:58 -08:00
|
|
|
[\fI-G\fR|\fI--git\fR]
|
2021-01-31 17:41:07 -08:00
|
|
|
[\fI-c\fR|\fI--context\fR \fI<N>\fR]
|
2020-09-11 01:54:26 -07:00
|
|
|
\fI<pattern\fR
|
2020-09-13 23:31:38 -07:00
|
|
|
[[--] \fI<input files...>\fR]
|
2020-09-11 01:54:26 -07:00
|
|
|
.SH DESCRIPTION
|
2020-12-12 16:31:53 -08:00
|
|
|
\fBbp\fR is a tool that matches parsing expression grammars using a custom syntax.
|
2020-09-11 01:54:26 -07:00
|
|
|
.SH OPTIONS
|
2020-09-13 23:31:38 -07:00
|
|
|
.B \-v\fR, \fB--verbose
|
2020-09-11 01:54:26 -07:00
|
|
|
Print debugging information.
|
|
|
|
|
2020-12-14 22:32:47 -08:00
|
|
|
.B \-e\fR, \fB--explain
|
|
|
|
Print a visual explanation of the matches.
|
|
|
|
|
|
|
|
.B \-j\fR, \fB--json
|
|
|
|
Print a JSON list of the matches. (Pairs with \fB--verbose\fR for more detail)
|
|
|
|
|
2020-12-17 16:23:45 -08:00
|
|
|
.B \-l\fR, \fB--list-files
|
|
|
|
Print only the names of files containing matches instead of the matches themselves.
|
|
|
|
|
2020-09-14 12:16:15 -07:00
|
|
|
.B \-i\fR, \fB--ignore-case
|
|
|
|
Perform pattern matching case-insensitively.
|
|
|
|
|
2020-12-27 19:48:52 -08:00
|
|
|
.B \-I\fR, \fB--inplace
|
|
|
|
Perform filtering or replacement in-place (i.e. overwrite files with new content).
|
|
|
|
|
2021-01-15 18:23:18 -08:00
|
|
|
.B \-C\fR, \fB--confirm
|
|
|
|
During in-place modification of a file, confirm before each modification.
|
|
|
|
|
2020-09-13 23:31:38 -07:00
|
|
|
.B \-r\fR, \fB--replace \fI<replacement>\fR
|
2020-09-11 01:54:26 -07:00
|
|
|
Replace all occurrences of the main pattern with the given string.
|
|
|
|
|
2021-01-20 16:12:46 -08:00
|
|
|
.B \-s\fR, \fB--skip \fI<skip pattern>\fR
|
|
|
|
While looking for matches, skip over \fB<skip pattern>\fR occurrences. This can
|
|
|
|
be useful for behavior like \fBbp -s string\fR (avoiding matches inside string
|
|
|
|
literals).
|
|
|
|
|
2020-09-13 23:31:38 -07:00
|
|
|
.B \-g\fR, \fB--grammar \fI<grammar file>\fR
|
2020-09-11 01:54:26 -07:00
|
|
|
Load the grammar from the given file.
|
|
|
|
|
2021-01-17 09:21:58 -08:00
|
|
|
.B \-G\fR, \fB--git\fR
|
|
|
|
Use \fBgit\fR to get a list of files. Remaining file arguments (if any) are
|
|
|
|
passed to \fBgit --ls-files\fR instead of treated as literal files.
|
|
|
|
|
2021-01-15 01:19:10 -08:00
|
|
|
.B \-c\fR, \fB--context \fI<N>\fR
|
|
|
|
The number of lines of context to print. If \fI<N>\fR is 0, print only the
|
|
|
|
exact text of the matches. If \fI<N>\fR is "all", print the entire file.
|
|
|
|
Otherwise, if \fI<N>\fR is a positive integer, print the whole line on which
|
|
|
|
matches occur, as well as the \fI<N-1>\fR lines before and after the match. The
|
|
|
|
default value for this argument is 1 (print whole lines where matches occur).
|
2020-09-13 23:31:38 -07:00
|
|
|
|
2020-09-11 01:54:26 -07:00
|
|
|
.B \--help
|
|
|
|
Print the usage and exit.
|
|
|
|
|
2020-09-13 23:31:38 -07:00
|
|
|
.B <string-pattern>
|
2020-12-12 16:31:53 -08:00
|
|
|
The main pattern for bp to match. By default, this pattern is a string
|
2020-09-13 23:31:38 -07:00
|
|
|
pattern (see the \fBSTRING PATTERNS\fR section below).
|
|
|
|
|
|
|
|
.B <input files...>
|
|
|
|
The input files to search. If no input files are provided and data was
|
|
|
|
piped in, that data will be used instead. If neither are provided,
|
2020-12-12 16:31:53 -08:00
|
|
|
\fBbp\fR will search through all files in the current directory and
|
2020-09-13 23:31:38 -07:00
|
|
|
its subdirectories (recursively).
|
|
|
|
|
|
|
|
.SH PATTERNS
|
2020-12-12 16:31:53 -08:00
|
|
|
bp patterns are based off of a combination of Parsing Expression Grammars
|
2020-09-13 23:31:38 -07:00
|
|
|
and regular expression syntax. The syntax is designed to map closely to
|
|
|
|
verbal descriptions of the patterns, and prefix operators are preferred over
|
|
|
|
suffix operators (as is common in regex syntax).
|
|
|
|
|
|
|
|
Some patterns additionally have "multi-line" variants, which means that they
|
|
|
|
include the newline character.
|
|
|
|
|
|
|
|
.I <pat1> <pat2>
|
2021-05-10 21:30:31 -07:00
|
|
|
A sequence: \fI<pat1>\fR followed by \fI<pat2>\fR
|
2020-09-13 23:31:38 -07:00
|
|
|
|
|
|
|
.I <pat1> \fB/\fI <pat2>\fR
|
2021-05-10 21:30:31 -07:00
|
|
|
A choice: \fI<pat1>\fR, or if it doesn't match, then \fI<pat2>\fR
|
2020-09-13 23:31:38 -07:00
|
|
|
|
|
|
|
.B .
|
2021-05-10 21:30:31 -07:00
|
|
|
Any character (excluding newline)
|
2020-09-13 23:31:38 -07:00
|
|
|
|
|
|
|
.B ^
|
2021-05-10 21:30:31 -07:00
|
|
|
Start of a line
|
2020-09-13 23:31:38 -07:00
|
|
|
|
|
|
|
.B ^^
|
2021-05-10 21:30:31 -07:00
|
|
|
Start of the text
|
2020-09-13 23:31:38 -07:00
|
|
|
|
|
|
|
.B $
|
2021-05-10 21:30:31 -07:00
|
|
|
End of a line (does not include newline character)
|
2020-09-13 23:31:38 -07:00
|
|
|
|
|
|
|
.B $$
|
2021-05-10 21:30:31 -07:00
|
|
|
End of the text
|
2020-09-13 23:31:38 -07:00
|
|
|
|
|
|
|
.B _
|
2021-05-10 21:30:31 -07:00
|
|
|
Zero or more whitespace characters (specifically, spaces and tabs)
|
2020-09-13 23:31:38 -07:00
|
|
|
|
|
|
|
.B __
|
2021-05-10 21:30:31 -07:00
|
|
|
Zero or more whitespace or newline characters
|
2020-09-13 23:31:38 -07:00
|
|
|
|
2021-05-10 23:46:46 -07:00
|
|
|
.B "foo"
|
|
|
|
.B 'foo'
|
|
|
|
The literal string \fIstring\fR. Escape sequences are not allowed.
|
|
|
|
|
|
|
|
.B {foo}
|
|
|
|
The literal string \fIfoo\fR with word boundaries on either end. Escape sequences are not allowed.
|
|
|
|
|
2020-09-13 23:31:38 -07:00
|
|
|
.B `\fI<c>\fR
|
2021-05-10 21:30:31 -07:00
|
|
|
The literal character \fI<c>\fR (e.g. \fB`@\fR matches the "@" character)
|
2020-09-13 23:31:38 -07:00
|
|
|
|
2020-12-19 18:53:51 -08:00
|
|
|
.B `\fI<c1>\fB,\fI<c2>\fR
|
2021-05-10 21:30:31 -07:00
|
|
|
The literal character \fI<c1>\fR or \fI<c2>\fR (e.g. \fB`a,e,i,o,u\fR)
|
|
|
|
|
|
|
|
.B `\fI<c1>\fB-\fI<c2>\fR
|
|
|
|
The character range \fI<c1>\fR to \fI<c2>\fR (e.g. \fB`a-z\fR).
|
|
|
|
Multiple ranges can be combined with a comma (e.g. \fB`a-z,A-Z\fR).
|
2020-12-19 18:53:51 -08:00
|
|
|
|
2020-12-14 18:11:33 -08:00
|
|
|
.B \\\\\fI<esc>\fR
|
2021-05-10 21:30:31 -07:00
|
|
|
An escape sequence (e.g. \fB\\n\fR, \fB\\x1F\fR, \fB\\033\fR, etc.)
|
2020-09-11 01:54:26 -07:00
|
|
|
|
2020-12-14 18:11:33 -08:00
|
|
|
.B \\\\\fI<esc1>\fB-\fI<esc2>\fR
|
2021-05-10 21:30:31 -07:00
|
|
|
An escape sequence range from \fI<esc1>\fR to \fI<esc2>\fR (e.g. \fB\\x00-x1F\fR)
|
2020-09-13 23:31:38 -07:00
|
|
|
|
2021-05-10 21:30:31 -07:00
|
|
|
.B \\\\N
|
|
|
|
A special case escape that matches a "nodent": one or more newlines followed by
|
2021-01-17 22:37:58 -08:00
|
|
|
the same indentation that occurs on the current line.
|
|
|
|
|
2020-09-13 23:31:38 -07:00
|
|
|
.B !\fI<pat>\fR
|
2021-05-10 21:30:31 -07:00
|
|
|
Not \fI<pat>\fR
|
2020-09-13 23:31:38 -07:00
|
|
|
|
2021-05-10 21:30:31 -07:00
|
|
|
.B [\fI<pat>\fB]
|
|
|
|
Maybe \fI<pat>\fR
|
2020-09-28 16:14:06 -07:00
|
|
|
|
2020-09-13 23:31:38 -07:00
|
|
|
.B \fI<N> <pat>\fR
|
2021-05-10 21:30:31 -07:00
|
|
|
Exactly \fIN\fR repetitions of \fI<pat>\fR (e.g. \fB5 `*\fR matches "*****")
|
|
|
|
|
|
|
|
.B \fI<N>\fB-\fI<M> <pat>\fR
|
|
|
|
Between \fI<N>\fR and \fI<M>\fR repetitions of \fI<pat>\fR (e.g. \fB2-3 `*\fR)
|
|
|
|
|
|
|
|
.B \fI<N>\fB+ \fI<pat>\fR
|
|
|
|
At least \fI<N>\fR or more repetitions of \fI<pat>\fR (e.g. \fB 2+ `*\fR)
|
2020-09-13 23:31:38 -07:00
|
|
|
|
2020-09-28 18:08:23 -07:00
|
|
|
.B *\fI<pat>\fR
|
2021-05-10 21:30:31 -07:00
|
|
|
Some \fI<pat>\fRs (zero or more)
|
2020-09-28 17:42:38 -07:00
|
|
|
|
2020-09-28 18:08:23 -07:00
|
|
|
.B +\fI<pat>\fR
|
2021-05-10 21:30:31 -07:00
|
|
|
At least one \fI<pat>\fRs
|
2020-09-28 17:42:38 -07:00
|
|
|
|
2020-09-13 23:31:38 -07:00
|
|
|
.B \fI<repeating-pat>\fR \fB%\fI <sep>\fR
|
2021-05-10 21:30:31 -07:00
|
|
|
\fI<repeating-pat>\fR separated by \fI<sep>\fR (e.g. \fB*word % `,\fR matches
|
|
|
|
zero or more comma-separated words)
|
2020-09-13 23:31:38 -07:00
|
|
|
|
2020-12-14 18:11:33 -08:00
|
|
|
.B .. \fI<pat>\fR
|
2021-05-10 21:30:31 -07:00
|
|
|
Any text (except newlines) up to and including \fI<pat>\fR
|
2020-12-14 18:11:33 -08:00
|
|
|
|
2021-01-20 15:23:57 -08:00
|
|
|
.B .. % \fI<skip>\fR \fI<pat>\fB
|
2021-05-10 21:30:31 -07:00
|
|
|
Any text (except newlines) up to and including \fI<pat>\fR, skipping over
|
|
|
|
instances of \fI<skip>\fR (e.g. \fB`"..`" % (`\\.)\fR)
|
2020-12-14 18:11:33 -08:00
|
|
|
|
2020-09-13 23:31:38 -07:00
|
|
|
.B <\fI<pat>\fR
|
2021-05-10 21:30:31 -07:00
|
|
|
Just after \fI<pat>\fR (lookbehind)
|
2020-09-13 23:31:38 -07:00
|
|
|
|
|
|
|
.B >\fI<pat>\fR
|
2021-05-10 21:30:31 -07:00
|
|
|
Just before \fI<pat>\fR (lookahead)
|
2020-09-13 23:31:38 -07:00
|
|
|
|
|
|
|
.B @\fI<pat>\fR
|
2021-05-10 21:30:31 -07:00
|
|
|
Capture \fI<pat>\fR
|
2020-09-13 23:31:38 -07:00
|
|
|
|
2020-09-28 16:14:06 -07:00
|
|
|
.B @\fI<name>\fB=\fI<pat>\fR
|
2021-05-10 21:30:31 -07:00
|
|
|
Let \fI<name>\fR equal \fI<pat>\fR (named capture). Named captures can be used
|
|
|
|
as backreferences like so: \fB@foo=word `( foo `)\fR (matches "asdf(asdf)" or
|
|
|
|
"baz(baz)", but not "foo(baz)")
|
2020-09-13 23:31:38 -07:00
|
|
|
|
2021-05-10 21:30:31 -07:00
|
|
|
.B \fI<pat>\fB => '\fI<replacement>\fB'
|
|
|
|
Replace \fI<pat>\fR with \fI<replacement>\fR. Note: \fI<replacement>\fR should
|
2020-09-13 23:31:38 -07:00
|
|
|
be a string, and it may contain references to captured values: \fB@0\fR
|
|
|
|
(the whole of \fI<pat>\fR), \fB@1\fR (the first capture in \fI<pat>\fR),
|
2021-05-10 21:30:31 -07:00
|
|
|
\fB@\fIfoo\fR (the capture named \fIfoo\fR in \fI<pat>\fR), etc.
|
|
|
|
For example, \fB@word _ @rest=(*word % _) => "@rest @1"\fR
|
2020-09-13 23:31:38 -07:00
|
|
|
|
|
|
|
.B \fI<pat1>\fB == \fI<pat2>\fR
|
2021-05-10 21:30:31 -07:00
|
|
|
Matches \fI<pat1>\fR, if and only if \fI<pat2>\fR also matches the text of
|
|
|
|
\fI<pat1>\fR's match. (e.g. \fBword == ("foo_" *.)\fR matches words that start
|
|
|
|
with "foo_")
|
2020-09-28 17:56:02 -07:00
|
|
|
|
|
|
|
.B \fI<pat1>\fB != \fI<pat2>\fR
|
2021-05-10 21:30:31 -07:00
|
|
|
Matches \fI<pat1>\fR, if and only if \fI<pat2>\fR does not match the text of
|
|
|
|
\fI<pat1>\fR's match. (e.g. \fBword == ("foo_" *.)\fR matches words that do not
|
|
|
|
start with "foo_")
|
2020-09-28 17:56:02 -07:00
|
|
|
|
2021-05-10 21:30:31 -07:00
|
|
|
.B \fI<name>\fB: \fI<pat>\fR
|
|
|
|
Define \fI<name>\fR to mean \fI<pat>\fR (pattern definition)
|
2021-01-15 12:40:19 -08:00
|
|
|
|
2020-09-13 23:31:38 -07:00
|
|
|
.B # \fI<comment>\fR
|
2020-09-28 16:36:26 -07:00
|
|
|
A line comment
|
2020-09-13 23:31:38 -07:00
|
|
|
|
|
|
|
.SH STRING PATTERNS
|
|
|
|
One of the most common use cases for pattern matching tools is matching plain,
|
|
|
|
literal strings, or strings that are primarily plain strings, with one or two
|
2020-12-12 16:31:53 -08:00
|
|
|
patterns. \fBbp\fR is designed around this fact. The default mode for bp
|
2020-09-13 23:31:38 -07:00
|
|
|
patterns is "string pattern mode". In string pattern mode, all characters
|
|
|
|
are interpreted literally except for the backslash (\fB\\\fR), which may be
|
2020-12-12 16:31:53 -08:00
|
|
|
followed by a bp pattern (see the \fBPATTERNS\fR section above). Optionally,
|
|
|
|
the bp pattern may be terminated by a semicolon (\fB;\fR).
|
2020-09-11 01:54:26 -07:00
|
|
|
|
|
|
|
.SH EXAMPLES
|
|
|
|
.TP
|
|
|
|
.B
|
2020-12-12 16:31:53 -08:00
|
|
|
ls | bp foo
|
2020-09-13 23:31:38 -07:00
|
|
|
Find files containing the string "foo" (a string pattern)
|
2020-09-11 01:54:26 -07:00
|
|
|
|
|
|
|
.TP
|
|
|
|
.B
|
2020-12-12 16:31:53 -08:00
|
|
|
ls | bp '.c\\$' -r '.h'
|
2020-09-11 01:54:26 -07:00
|
|
|
Find files ending with ".c" and replace the extension with ".h"
|
|
|
|
|
|
|
|
.TP
|
|
|
|
.B
|
2021-05-11 12:38:58 -07:00
|
|
|
bp -p '{foobar} parens' my_file.py
|
2021-05-10 21:30:31 -07:00
|
|
|
Find the literal string \fB"foobar"\fR, assuming it's a complete word, followed
|
|
|
|
by a pair of matching parentheses in the file \fImy_file.py\fR
|
2020-09-13 23:31:38 -07:00
|
|
|
|
|
|
|
.TP
|
|
|
|
.B
|
2020-12-12 16:31:53 -08:00
|
|
|
bp -g html -p html-element -D matching-tag=a foo.html
|
2020-09-13 23:31:38 -07:00
|
|
|
Using the \fIhtml\fR grammar, find all \fIhtml-element\fRs matching
|
|
|
|
the tag \fIa\fR in the file \fIfoo.html\fR
|
|
|
|
|
2020-09-11 01:54:26 -07:00
|
|
|
|
|
|
|
.SH AUTHOR
|
|
|
|
Bruce Hill (bruce@bruce-hill.com)
|