Added markdown manpage, which converts to roff using pandoc.
This commit is contained in:
parent
5d5817c2a3
commit
f824d3f3e2
5
Makefile
5
Makefile
@ -12,7 +12,7 @@ ALL_FLAGS=$(CFLAGS) -DBP_NAME="\"$(NAME)\"" $(EXTRA) $(CWARN) $(G) $(O)
|
||||
CFILES=pattern.c definitions.c utils.c match.c files.c print.c json.c
|
||||
OBJFILES=$(CFILES:.c=.o)
|
||||
|
||||
all: $(NAME) tags
|
||||
all: $(NAME) bp.1
|
||||
|
||||
%.o: %.c %.h types.h
|
||||
$(CC) -c $(ALL_FLAGS) -o $@ $<
|
||||
@ -20,6 +20,9 @@ all: $(NAME) tags
|
||||
$(NAME): $(OBJFILES) bp.c
|
||||
$(CC) $(ALL_FLAGS) -o $@ $(OBJFILES) bp.c
|
||||
|
||||
bp.1: bp.1.md
|
||||
pandoc -s $< -t man -o $@
|
||||
|
||||
tags: $(CFILES) bp.c
|
||||
ctags *.c *.h
|
||||
|
||||
|
546
bp.1
546
bp.1
@ -1,256 +1,324 @@
|
||||
.\" Manpage for bp.
|
||||
.\" Contact bruce@bruce-hill.com to correct errors or typos.
|
||||
.TH man 1 "Sep 12, 2020" "0.1" "bp manual page"
|
||||
.\" Automatically generated by Pandoc 2.11.3
|
||||
.\"
|
||||
.TH "BP" "1" "May 17 2021" "" ""
|
||||
.hy
|
||||
.SH NAME
|
||||
bp \- Bruce's Parsing Expression Grammar tool
|
||||
.PP
|
||||
bp - Bruce\[aq]s Parsing Expression Grammar tool
|
||||
.SH SYNOPSIS
|
||||
.B bp
|
||||
[\fI-h\fR|\fI--help\fR]
|
||||
[\fI-v\fR|\fI--verbose\fR]
|
||||
[\fI-e\fR|\fI--explain\fR]
|
||||
[\fI-j\fR|\fI--json\fR]
|
||||
[\fI-l\fR|\fI--list-files\fR]
|
||||
[\fI-i\fR|\fI--ignore-case\fR]
|
||||
[\fI-I\fR|\fI--inplace\fR]
|
||||
[\fI-C\fR|\fI--confirm\fR]
|
||||
[\fI-p\fR|\fI--pattern\fR \fI<pattern>\fR]
|
||||
[\fI-r\fR|\fI--replace\fR \fI<replacement>\fR]
|
||||
[\fI-s\fR|\fI--skip\fR \fI<skip pattern>\fR]
|
||||
[\fI-g\fR|\fI--grammar\fR \fI<grammar file>\fR]
|
||||
[\fI-G\fR|\fI--git\fR]
|
||||
[\fI-c\fR|\fI--context\fR \fI<N>\fR]
|
||||
\fI<pattern>\fR
|
||||
[[--] \fI<input files...>\fR]
|
||||
|
||||
.PP
|
||||
\f[B]bp\f[R] [\f[I]options\&...\f[R]] \f[I]pattern\f[R] [[--]
|
||||
\f[I]files\&...\f[R]]
|
||||
.SH DESCRIPTION
|
||||
\fBbp\fR is a tool that matches parsing expression grammars using a custom syntax.
|
||||
|
||||
.PP
|
||||
\f[B]bp\f[R] is a tool that matches parsing expression grammars using a
|
||||
custom syntax.
|
||||
.SH OPTIONS
|
||||
.B \-v\fR, \fB--verbose
|
||||
.TP
|
||||
\f[B]-v\f[R], \f[B]--verbose\f[R]
|
||||
Print debugging information.
|
||||
|
||||
.B \-e\fR, \fB--explain
|
||||
.TP
|
||||
\f[B]-e\f[R], \f[B]--explain\f[R]
|
||||
Print a visual explanation of the matches.
|
||||
|
||||
.B \-j\fR, \fB--json
|
||||
Print a JSON list of the matches. (Pairs with \fB--verbose\fR for more detail)
|
||||
|
||||
.B \-l\fR, \fB--list-files
|
||||
Print only the names of files containing matches instead of the matches themselves.
|
||||
|
||||
.B \-i\fR, \fB--ignore-case
|
||||
.TP
|
||||
\f[B]-j\f[R], \f[B]--json\f[R]
|
||||
Print a JSON list of the matches.
|
||||
(Pairs with \f[B]--verbose\f[R] for more detail)
|
||||
.TP
|
||||
\f[B]-l\f[R], \f[B]--list-files\f[R]
|
||||
Print only the names of files containing matches instead of the matches
|
||||
themselves.
|
||||
.TP
|
||||
\f[B]-i\f[R], \f[B]--ignore-case\f[R]
|
||||
Perform pattern matching case-insensitively.
|
||||
|
||||
.B \-I\fR, \fB--inplace
|
||||
Perform filtering or replacement in-place (i.e. overwrite files with new content).
|
||||
|
||||
.B \-C\fR, \fB--confirm
|
||||
During in-place modification of a file, confirm before each modification.
|
||||
|
||||
.B \-r\fR, \fB--replace \fI<replacement>\fR
|
||||
.TP
|
||||
\f[B]-I\f[R], \f[B]--inplace\f[R]
|
||||
Perform filtering or replacement in-place (i.e.\ overwrite files with
|
||||
new content).
|
||||
.TP
|
||||
\f[B]-C\f[R], \f[B]--confirm\f[R]
|
||||
During in-place modification of a file, confirm before each
|
||||
modification.
|
||||
.TP
|
||||
\f[B]-r\f[R], \f[B]--replace\f[R] \f[I]replacement\f[R]
|
||||
Replace all occurrences of the main pattern with the given string.
|
||||
|
||||
.B \-s\fR, \fB--skip \fI<skip pattern>\fR
|
||||
While looking for matches, skip over \fB<skip pattern>\fR occurrences. This can
|
||||
be useful for behavior like \fBbp -s string\fR (avoiding matches inside string
|
||||
literals).
|
||||
|
||||
.B \-g\fR, \fB--grammar \fI<grammar file>\fR
|
||||
.TP
|
||||
\f[B]-s\f[R], \f[B]--skip\f[R] \f[I]pattern\f[R]
|
||||
While looking for matches, skip over \f[I]pattern\f[R] occurrences.
|
||||
This can be useful for behavior like \f[B]bp -s string\f[R] (avoiding
|
||||
matches inside string literals).
|
||||
.TP
|
||||
\f[B]-g\f[R], \f[B]--grammar\f[R] \f[I]grammar-file\f[R]
|
||||
Load the grammar from the given file.
|
||||
|
||||
.B \-G\fR, \fB--git\fR
|
||||
Use \fBgit\fR to get a list of files. Remaining file arguments (if any) are
|
||||
passed to \fBgit --ls-files\fR instead of treated as literal files.
|
||||
|
||||
.B \-c\fR, \fB--context \fI<N>\fR
|
||||
The number of lines of context to print. If \fI<N>\fR is 0, print only the
|
||||
exact text of the matches. If \fI<N>\fR is "all", print the entire file.
|
||||
Otherwise, if \fI<N>\fR is a positive integer, print the whole line on which
|
||||
matches occur, as well as the \fI<N-1>\fR lines before and after the match. The
|
||||
default value for this argument is 1 (print whole lines where matches occur).
|
||||
|
||||
.B \-f\fR, \fB\--format \fIauto|fancy|plain\fR
|
||||
Set the output format. \fIfancy\fR includes colors and line numbers,
|
||||
\fIplain\fR includes neither, and \fIauto\fR (the default) uses \fIfancy\fR
|
||||
See the \f[B]GRAMMAR FILES\f[R] section for more info.
|
||||
.TP
|
||||
\f[B]-G\f[R], \f[B]--git\f[R]
|
||||
Use \f[B]git\f[R] to get a list of files.
|
||||
Remaining file arguments (if any) are passed to \f[B]git --ls-files\f[R]
|
||||
instead of treated as literal files.
|
||||
.TP
|
||||
\f[B]-c\f[R], \f[B]--context\f[R] \f[I]N\f[R]
|
||||
The number of lines of context to print.
|
||||
If \f[I]N\f[R] is 0, print only the exact text of the matches.
|
||||
If \f[I]N\f[R] is \f[B]\f[CB]\[dq]all\[dq]\f[B]\f[R], print the entire
|
||||
file.
|
||||
Otherwise, if \f[I]N\f[R] is a positive integer, print the whole line on
|
||||
which matches occur, as well as the \f[I]N-1\f[R] lines before and after
|
||||
the match.
|
||||
The default value for this argument is \f[B]1\f[R] (print whole lines
|
||||
where matches occur).
|
||||
.TP
|
||||
\f[B]-f\f[R], \f[B]--format\f[R] \f[I]auto\f[R]|\f[I]fancy\f[R]|\f[I]plain\f[R]
|
||||
Set the output format.
|
||||
\f[I]fancy\f[R] includes colors and line numbers, \f[I]plain\f[R]
|
||||
includes neither, and \f[I]auto\f[R] (the default) uses \f[I]fancy\f[R]
|
||||
formatting only when the output is a TTY.
|
||||
|
||||
.B \--help
|
||||
.TP
|
||||
\f[B]--help\f[R]
|
||||
Print the usage and exit.
|
||||
|
||||
.B <string-pattern>
|
||||
The main pattern for bp to match. By default, this pattern is a string
|
||||
pattern (see the \fBSTRING PATTERNS\fR section below).
|
||||
|
||||
.B <input files...>
|
||||
The input files to search. If no input files are provided and data was
|
||||
piped in, that data will be used instead. If neither are provided,
|
||||
\fBbp\fR will search through all files in the current directory and
|
||||
its subdirectories (recursively).
|
||||
|
||||
.SH PATTERNS
|
||||
bp patterns are based off of a combination of Parsing Expression Grammars
|
||||
and regular expression syntax. The syntax is designed to map closely to
|
||||
verbal descriptions of the patterns, and prefix operators are preferred over
|
||||
suffix operators (as is common in regex syntax).
|
||||
|
||||
Some patterns additionally have "multi-line" variants, which means that they
|
||||
include the newline character.
|
||||
|
||||
.I <pat1> <pat2>
|
||||
A sequence: \fI<pat1>\fR followed by \fI<pat2>\fR
|
||||
|
||||
.I <pat1> \fB/\fI <pat2>\fR
|
||||
A choice: \fI<pat1>\fR, or if it doesn't match, then \fI<pat2>\fR
|
||||
|
||||
.B .
|
||||
Any character (excluding newline)
|
||||
|
||||
.B ^
|
||||
Start of a line
|
||||
|
||||
.B ^^
|
||||
Start of the text
|
||||
|
||||
.B $
|
||||
End of a line (does not include newline character)
|
||||
|
||||
.B $$
|
||||
End of the text
|
||||
|
||||
.B _
|
||||
Zero or more whitespace characters (specifically, spaces and tabs)
|
||||
|
||||
.B __
|
||||
Zero or more whitespace or newline characters
|
||||
|
||||
.B "foo"
|
||||
.B 'foo'
|
||||
The literal string \fIstring\fR. Escape sequences are not allowed.
|
||||
|
||||
.B {foo}
|
||||
The literal string \fIfoo\fR with word boundaries on either end. Escape sequences are not allowed.
|
||||
|
||||
.B `\fI<c>\fR
|
||||
The literal character \fI<c>\fR (e.g. \fB`@\fR matches the "@" character)
|
||||
|
||||
.B `\fI<c1>\fB,\fI<c2>\fR
|
||||
The literal character \fI<c1>\fR or \fI<c2>\fR (e.g. \fB`a,e,i,o,u\fR)
|
||||
|
||||
.B `\fI<c1>\fB-\fI<c2>\fR
|
||||
The character range \fI<c1>\fR to \fI<c2>\fR (e.g. \fB`a-z\fR).
|
||||
Multiple ranges can be combined with a comma (e.g. \fB`a-z,A-Z\fR).
|
||||
|
||||
.B \\\\\fI<esc>\fR
|
||||
An escape sequence (e.g. \fB\\n\fR, \fB\\x1F\fR, \fB\\033\fR, etc.)
|
||||
|
||||
.B \\\\\fI<esc1>\fB-\fI<esc2>\fR
|
||||
An escape sequence range from \fI<esc1>\fR to \fI<esc2>\fR (e.g. \fB\\x00-x1F\fR)
|
||||
|
||||
.B \\\\N
|
||||
A special case escape that matches a "nodent": one or more newlines followed by
|
||||
the same indentation that occurs on the current line.
|
||||
|
||||
.B !\fI<pat>\fR
|
||||
Not \fI<pat>\fR
|
||||
|
||||
.B [\fI<pat>\fB]
|
||||
Maybe \fI<pat>\fR
|
||||
|
||||
.B \fI<N> <pat>\fR
|
||||
Exactly \fIN\fR repetitions of \fI<pat>\fR (e.g. \fB5 `*\fR matches "*****")
|
||||
|
||||
.B \fI<N>\fB-\fI<M> <pat>\fR
|
||||
Between \fI<N>\fR and \fI<M>\fR repetitions of \fI<pat>\fR (e.g. \fB2-3 `*\fR)
|
||||
|
||||
.B \fI<N>\fB+ \fI<pat>\fR
|
||||
At least \fI<N>\fR or more repetitions of \fI<pat>\fR (e.g. \fB 2+ `*\fR)
|
||||
|
||||
.B *\fI<pat>\fR
|
||||
Some \fI<pat>\fRs (zero or more)
|
||||
|
||||
.B +\fI<pat>\fR
|
||||
At least one \fI<pat>\fRs
|
||||
|
||||
.B \fI<repeating-pat>\fR \fB%\fI <sep>\fR
|
||||
\fI<repeating-pat>\fR separated by \fI<sep>\fR (e.g. \fB*word % `,\fR matches
|
||||
zero or more comma-separated words)
|
||||
|
||||
.B .. \fI<pat>\fR
|
||||
Any text (except newlines) up to and including \fI<pat>\fR
|
||||
|
||||
.B .. % \fI<skip>\fR \fI<pat>\fB
|
||||
Any text (except newlines) up to and including \fI<pat>\fR, skipping over
|
||||
instances of \fI<skip>\fR (e.g. \fB`"..`" % (`\\.)\fR)
|
||||
|
||||
.B <\fI<pat>\fR
|
||||
Just after \fI<pat>\fR (lookbehind)
|
||||
|
||||
.B >\fI<pat>\fR
|
||||
Just before \fI<pat>\fR (lookahead)
|
||||
|
||||
.B @\fI<pat>\fR
|
||||
Capture \fI<pat>\fR
|
||||
|
||||
.B @\fI<name>\fB=\fI<pat>\fR
|
||||
Let \fI<name>\fR equal \fI<pat>\fR (named capture). Named captures can be used
|
||||
as backreferences like so: \fB@foo=word `( foo `)\fR (matches "asdf(asdf)" or
|
||||
"baz(baz)", but not "foo(baz)")
|
||||
|
||||
.B \fI<pat>\fB => '\fI<replacement>\fB'
|
||||
Replace \fI<pat>\fR with \fI<replacement>\fR. Note: \fI<replacement>\fR should
|
||||
be a string, and it may contain references to captured values: \fB@0\fR
|
||||
(the whole of \fI<pat>\fR), \fB@1\fR (the first capture in \fI<pat>\fR),
|
||||
\fB@\fIfoo\fR (the capture named \fIfoo\fR in \fI<pat>\fR), etc.
|
||||
For example, \fB@word _ @rest=(*word % _) => "@rest @1"\fR
|
||||
|
||||
.B \fI<pat1>\fB == \fI<pat2>\fR
|
||||
Matches \fI<pat1>\fR, if and only if \fI<pat2>\fR also matches the text of
|
||||
\fI<pat1>\fR's match. (e.g. \fBword == ("foo_" *.)\fR matches words that start
|
||||
with "foo_")
|
||||
|
||||
.B \fI<pat1>\fB != \fI<pat2>\fR
|
||||
Matches \fI<pat1>\fR, if and only if \fI<pat2>\fR does not match the text of
|
||||
\fI<pat1>\fR's match. (e.g. \fBword == ("foo_" *.)\fR matches words that do not
|
||||
start with "foo_")
|
||||
|
||||
.B \fI<name>\fB: \fI<pat>\fR
|
||||
Define \fI<name>\fR to mean \fI<pat>\fR (pattern definition)
|
||||
|
||||
.B # \fI<comment>\fR
|
||||
A line comment
|
||||
|
||||
.TP
|
||||
\f[I]pattern\f[R]
|
||||
The main pattern for bp to match.
|
||||
By default, this pattern is a string pattern (see the \f[B]STRING
|
||||
PATTERNS\f[R] section below).
|
||||
.TP
|
||||
\f[I]files\&...\f[R]
|
||||
The input files to search.
|
||||
If no input files are provided and data was piped in, that data will be
|
||||
used instead.
|
||||
If neither are provided, \f[B]bp\f[R] will search through all files in
|
||||
the current directory and its subdirectories (recursively).
|
||||
.SH STRING PATTERNS
|
||||
One of the most common use cases for pattern matching tools is matching plain,
|
||||
literal strings, or strings that are primarily plain strings, with one or two
|
||||
patterns. \fBbp\fR is designed around this fact. The default mode for bp
|
||||
patterns is "string pattern mode". In string pattern mode, all characters
|
||||
are interpreted literally except for the backslash (\fB\\\fR), which may be
|
||||
followed by a bp pattern (see the \fBPATTERNS\fR section above). Optionally,
|
||||
the bp pattern may be terminated by a semicolon (\fB;\fR).
|
||||
|
||||
.PP
|
||||
One of the most common use cases for pattern matching tools is matching
|
||||
plain, literal strings, or strings that are primarily plain strings,
|
||||
with one or two patterns.
|
||||
\f[B]bp\f[R] is designed around this fact.
|
||||
The default mode for bp patterns is \[lq]string pattern mode\[rq].
|
||||
In string pattern mode, all characters are interpreted literally except
|
||||
for the backslash (\f[B]\[rs]\f[R]), which may be followed by a bp
|
||||
pattern (see the \f[B]PATTERNS\f[R] section above).
|
||||
Optionally, the bp pattern may be terminated by a semicolon
|
||||
(\f[B];\f[R]).
|
||||
.SH PATTERNS
|
||||
.PP
|
||||
\f[B]bp\f[R] patterns are based off of a combination of Parsing
|
||||
Expression Grammars and regular expression syntax.
|
||||
The syntax is designed to map closely to verbal descriptions of the
|
||||
patterns, and prefix operators are preferred over suffix operators (as
|
||||
is common in regex syntax).
|
||||
.PP
|
||||
Some patterns additionally have \[lq]multi-line\[rq] variants, which
|
||||
means that they include the newline character.
|
||||
.TP
|
||||
\f[I]pat1 pat2\f[R]
|
||||
A sequence: \f[I]pat1\f[R] followed by \f[I]pat2\f[R]
|
||||
.TP
|
||||
\f[I]pat1\f[R] \f[B]/\f[R] \f[I]pat2\f[R]
|
||||
A choice: \f[I]pat1\f[R], or if it doesn\[aq]t match, then
|
||||
\f[I]pat2\f[R]
|
||||
.TP
|
||||
\f[B].\f[R]
|
||||
Any character (excluding newline)
|
||||
.TP
|
||||
\f[B]\[ha]\f[R]
|
||||
Start of a line
|
||||
.TP
|
||||
\f[B]\[ha]\[ha]\f[R]
|
||||
Start of the text
|
||||
.TP
|
||||
\f[B]$\f[R]
|
||||
End of a line (does not include newline character)
|
||||
.TP
|
||||
\f[B]$$\f[R]
|
||||
End of the text
|
||||
.TP
|
||||
\f[B]_\f[R]
|
||||
Zero or more whitespace characters, including spaces and tabs, but not
|
||||
newlines.
|
||||
.TP
|
||||
\f[B]__\f[R]
|
||||
Zero or more whitespace characters, including spaces, tabs, newlines,
|
||||
and comments.
|
||||
Comments are undefined by default, but may be defined by a separate
|
||||
grammar file.
|
||||
See the \f[B]GRAMMAR FILES\f[R] section for more info.
|
||||
.TP
|
||||
\f[B]\[dq]foo\[dq]\f[R], \f[B]\[aq]foo\[aq]\f[R]
|
||||
The literal string \f[B]\[lq]foo\[rq]\f[R].
|
||||
Single and double quotes are treated the same.
|
||||
Escape sequences are not allowed.
|
||||
.TP
|
||||
\f[B]{foo}\f[R]
|
||||
The literal string \f[B]\[lq]foo\[rq]\f[R] with word boundaries on
|
||||
either end.
|
||||
Escape sequences are not allowed.
|
||||
.TP
|
||||
\f[B]\[ga]\f[R]\f[I]c\f[R]
|
||||
The literal character \f[I]c\f[R] (e.g.\ **\[ga]\[at]** matches the
|
||||
\[lq]\[at]\[rq] character)
|
||||
.TP
|
||||
\f[B]\[ga]\f[R]\f[I]c1\f[R]\f[B],\f[R]\f[I]c2\f[R]
|
||||
The literal character \f[I]c1\f[R] or \f[I]c2\f[R]
|
||||
(e.g.\ \f[B]\[ga]a,e,i,o,u\f[R])
|
||||
.TP
|
||||
\f[B]\[ga]\f[R]\f[I]c1\f[R]\f[B]-\f[R]\f[I]c2\f[R]
|
||||
The character range \f[I]c1\f[R] to \f[I]c2\f[R]
|
||||
(e.g.\ \f[B]\[ga]a-z\f[R]).
|
||||
Multiple ranges can be combined with a comma
|
||||
(e.g.\ \f[B]\[ga]a-z,A-Z\f[R]).
|
||||
.TP
|
||||
\f[B]\[rs]\f[R]\f[I]esc\f[R]
|
||||
An escape sequence (e.g.\ \f[B]\[rs]n\f[R], \f[B]\[rs]x1F\f[R],
|
||||
\f[B]\[rs]033\f[R], etc.)
|
||||
.TP
|
||||
\f[B]\[rs]\f[R]\f[I]esc1\f[R]\f[B]-\f[R]\f[I]esc2\f[R]
|
||||
An escape sequence range from \f[I]esc1\f[R] to \f[I]esc2\f[R]
|
||||
(e.g.\ \f[B]\[rs]x00-x1F\f[R])
|
||||
.TP
|
||||
\f[B]\[rs]N\f[R]
|
||||
A special case escape that matches a \[lq]nodent\[rq]: one or more
|
||||
newlines followed by the same indentation that occurs on the current
|
||||
line.
|
||||
.TP
|
||||
\f[B]!\f[R] \f[I]pat\f[R]
|
||||
Not \f[I]pat\f[R]
|
||||
.TP
|
||||
\f[B][\f[R] \f[I]pat\f[R] \f[B]]\f[R]
|
||||
Maybe \f[I]pat\f[R]
|
||||
.TP
|
||||
\f[I]N\f[R] \f[I]pat\f[R]
|
||||
Exactly \f[I]N\f[R] repetitions of \f[I]pat\f[R] (e.g.\ \f[B]5
|
||||
\[ga]x\f[R] matches \f[B]\[lq]xxxxx\[rq]\f[R])
|
||||
.TP
|
||||
\f[I]N\f[R] \f[B]-\f[R] \f[I]M\f[R] \f[I]pat\f[R]
|
||||
Between \f[I]N\f[R] and \f[I]M\f[R] repetitions of \f[I]pat\f[R]
|
||||
(e.g.\ \f[B]2-3 \[ga]x\f[R] matches \f[B]\[lq]xx\[rq]\f[R] or
|
||||
\f[B]\[lq]xxx\[rq]\f[R])
|
||||
.TP
|
||||
\f[I]N\f[R]\f[B]+\f[R] \f[I]pat\f[R]
|
||||
At least \f[I]N\f[R] or more repetitions of \f[I]pat\f[R] (e.g.\ \f[B]2+
|
||||
\[ga]x\f[R] matches \f[B]\[lq]xx\[rq]\f[R], \f[B]\[lq]xxx\[rq]\f[R],
|
||||
\f[B]\[lq]xxxx\[rq]\f[R], etc.)
|
||||
.TP
|
||||
\f[B]*\f[R] \f[I]pat\f[R]
|
||||
Some \f[I]pat\f[R]s (zero or more, e.g.\ \f[B]* \[ga]x\f[R] matches
|
||||
\f[B]\[dq]\[lq]\f[R], \f[B]\[rq]x\[lq]\f[R], \f[B]\[rq]xx\[dq]\f[R],
|
||||
etc.)
|
||||
.TP
|
||||
\f[B]+\f[R] \f[I]pat\f[R]
|
||||
At least one \f[I]pat\f[R]s (e.g.\ \f[B]+ \[ga]x\f[R] matches
|
||||
\f[B]\[lq]x\[rq]\f[R], \f[B]\[lq]xx\[rq]\f[R], \f[B]\[lq]xxx\[rq]\f[R],
|
||||
etc.)
|
||||
.TP
|
||||
\f[I]repeating-pat\f[R] \f[B]%\f[R] \f[I]sep\f[R]
|
||||
\f[I]repeating-pat\f[R] separated by \f[I]sep\f[R] (e.g.\ \f[B]*word %
|
||||
\[ga],\f[R] matches zero or more comma-separated words)
|
||||
.TP
|
||||
\f[B]..\f[R] \f[I]pat\f[R]
|
||||
Any text (except newlines) up to and including \f[I]pat\f[R]
|
||||
.TP
|
||||
\f[B].. %\f[R] \f[I]skip\f[R] \f[I]pat\f[R]
|
||||
Any text (except newlines) up to and including \f[I]pat\f[R], skipping
|
||||
over instances of \f[I]skip\f[R] (e.g.\ \f[B]\[ga]\[dq]..\[ga]\[dq] %
|
||||
(\[ga]\[rs].)\f[R])
|
||||
.TP
|
||||
\f[B]<\f[R] \f[I]pat\f[R]
|
||||
Just after \f[I]pat\f[R] (lookbehind)
|
||||
.TP
|
||||
\f[B]>\f[R] \f[I]pat\f[R]
|
||||
Just before \f[I]pat\f[R] (lookahead)
|
||||
.TP
|
||||
\f[B]\[at]\f[R] \f[I]pat\f[R]
|
||||
Capture \f[I]pat\f[R]
|
||||
.TP
|
||||
\f[B]foo\f[R]
|
||||
The named pattern whose name is \f[B]\[lq]foo\[rq]\f[R].
|
||||
Pattern names come from definitions in grammar files or from named
|
||||
captures.
|
||||
Pattern names may contain dashes (\f[B]-\f[R]), but not underscores
|
||||
(\f[B]_\f[R]), since the underscore is used to match whitespace.
|
||||
See the \f[B]GRAMMAR FILES\f[R] section for more info.
|
||||
.TP
|
||||
\f[B]\[at]\f[R] \f[I]name\f[R] \f[B]=\f[R] \f[I]pat\f[R]
|
||||
Let \f[I]name\f[R] equal \f[I]pat\f[R] (named capture).
|
||||
Named captures can be used as backreferences like so: \f[B]\[at]foo=word
|
||||
\[ga]( foo \[ga])\f[R] (matches \f[B]\[lq]asdf(asdf)\[rq]\f[R] or
|
||||
\f[B]\[lq]baz(baz)\[rq]\f[R], but not \f[B]\[lq]foo(baz)\[rq]\f[R])
|
||||
.TP
|
||||
\f[I]pat\f[R] \f[B]=> \[aq]\f[R]\f[I]replacement\f[R]\f[B]\[aq]\f[R]
|
||||
Replace \f[I]pat\f[R] with \f[I]replacement\f[R].
|
||||
Note: \f[I]replacement\f[R] should be a string, and it may contain
|
||||
references to captured values: \f[B]\[at]0\f[R] (the whole of
|
||||
\f[I]pat\f[R]), \f[B]\[at]1\f[R] (the first capture in \f[I]pat\f[R]),
|
||||
\f[B]\[at]\f[R]\f[I]foo\f[R] (the capture named \f[I]foo\f[R] in
|
||||
\f[I]pat\f[R]), etc.
|
||||
For example, \f[B]\[at]word _ \[at]rest=(*word % _) => \[dq]\[at]rest
|
||||
\[at]1\[dq]\f[R]
|
||||
.TP
|
||||
\f[I]pat1\f[R] \f[B]==\f[R] \f[I]pat2\f[R]
|
||||
Matches \f[I]pat1\f[R], if and only if \f[I]pat2\f[R] also matches the
|
||||
text of \f[I]pat1\f[R]\[aq]s match.
|
||||
(e.g.\ \f[B]word == (\[dq]foo_\[dq] *.)\f[R] matches words that start
|
||||
with \f[B]\[lq]foo_\[rq]\f[R])
|
||||
.TP
|
||||
\f[I]pat1\f[R] \f[B]!=\f[R] \f[I]pat2\f[R]
|
||||
Matches \f[I]pat1\f[R], if and only if \f[I]pat2\f[R] does not match the
|
||||
text of \f[I]pat1\f[R]\[aq]s match.
|
||||
(e.g.\ \f[B]word == (\[dq]foo_\[dq] *.)\f[R] matches words that do not
|
||||
start with \f[B]\[lq]foo_\[rq]\f[R])
|
||||
.TP
|
||||
\f[I]name\f[R]\f[B]:\f[R] \f[I]pat\f[R]
|
||||
Define \f[I]name\f[R] to mean \f[I]pat\f[R] (pattern definition)
|
||||
.TP
|
||||
\f[B]#\f[R] \f[I]comment\f[R]
|
||||
A line comment
|
||||
.SH GRAMMAR FILES
|
||||
.PP
|
||||
\f[B]bp\f[R] allows loading extra grammar files, which define patterns
|
||||
which may be used for matching.
|
||||
The \f[B]builtins\f[R] grammar file is loaded by default, and it defines
|
||||
a few useful general-purpose patterns.
|
||||
For example, it defines the \f[B]parens\f[R] rule, which matches pairs
|
||||
of matching parentheses, accounting for nested inner parentheses:
|
||||
.IP
|
||||
.nf
|
||||
\f[C]
|
||||
bp -p \[aq]\[dq]my_func\[dq] parens\[aq]
|
||||
\f[R]
|
||||
.fi
|
||||
.PP
|
||||
\f[B]bp\f[R] also comes with a few grammar files for common programming
|
||||
languages, which may be loaded on demand.
|
||||
These grammar files are not comprehensive syntax definitions, but only
|
||||
some common patterns.
|
||||
For example, the c++ grammar file contains definitions for
|
||||
\f[B]//\f[R]-style line comments as well as \f[B]/*\&...*/\f[R]-style
|
||||
block comments.
|
||||
Thus, you can find all comments with the string \[lq]TODO\[rq] with the
|
||||
following command:
|
||||
.IP
|
||||
.nf
|
||||
\f[C]
|
||||
bp -g c++ -p \[aq]comment==(..%\[rs]n \[dq]TODO\[dq] ..%\[rs]n$$)\[aq] *.cpp
|
||||
\f[R]
|
||||
.fi
|
||||
.SH EXAMPLES
|
||||
.TP
|
||||
.B
|
||||
ls | bp foo
|
||||
Find files containing the string "foo" (a string pattern)
|
||||
|
||||
\f[B]ls | bp foo\f[R]
|
||||
Find files containing the string \[dq]foo\[dq] (a string pattern)
|
||||
.TP
|
||||
.B
|
||||
ls | bp '.c\\$' -r '.h'
|
||||
Find files ending with ".c" and replace the extension with ".h"
|
||||
|
||||
\f[B]ls | bp \[aq].c\[rs]$\[aq] -r \[aq].h\[aq]\f[R]
|
||||
Find files ending with \[dq].c\[dq] and replace the extension with
|
||||
\[dq].h\[dq]
|
||||
.TP
|
||||
.B
|
||||
bp -p '{foobar} parens' my_file.py
|
||||
Find the literal string \fB"foobar"\fR, assuming it's a complete word, followed
|
||||
by a pair of matching parentheses in the file \fImy_file.py\fR
|
||||
|
||||
\f[B]bp -p \[aq]{foobar} parens\[aq] my_file.py\f[R]
|
||||
Find the literal string \f[B]\[dq]foobar\[dq]\f[R], assuming it\[aq]s a
|
||||
complete word, followed by a pair of matching parentheses in the file
|
||||
\f[I]my_file.py\f[R]
|
||||
.TP
|
||||
.B
|
||||
bp -g html -p html-element -D matching-tag=a foo.html
|
||||
Using the \fIhtml\fR grammar, find all \fIhtml-element\fRs matching
|
||||
the tag \fIa\fR in the file \fIfoo.html\fR
|
||||
|
||||
|
||||
.SH AUTHOR
|
||||
Bruce Hill (bruce@bruce-hill.com)
|
||||
\f[B]bp -g html -p `html-element==(\[dq]<a \[dq]..%\[rs]n$$)' foo.html\f[R]
|
||||
Using the \f[I]html\f[R] grammar, find all \f[I]html-element\f[R]s
|
||||
matching the tag \f[I]a\f[R] in the file \f[I]foo.html\f[R]
|
||||
.SH AUTHORS
|
||||
Bruce Hill (\f[I]bruce\[at]bruce-hill.com\f[R]).
|
||||
|
286
bp.1.md
Normal file
286
bp.1.md
Normal file
@ -0,0 +1,286 @@
|
||||
% BP(1)
|
||||
% Bruce Hill (*bruce@bruce-hill.com*)
|
||||
% May 17 2021
|
||||
|
||||
# NAME
|
||||
|
||||
bp - Bruce\'s Parsing Expression Grammar tool
|
||||
|
||||
# SYNOPSIS
|
||||
|
||||
**bp**
|
||||
\[*options...*\]
|
||||
*pattern*
|
||||
\[\[\--\] *files...*\]
|
||||
|
||||
# DESCRIPTION
|
||||
|
||||
**bp** is a tool that matches parsing expression grammars using a custom
|
||||
syntax.
|
||||
|
||||
# OPTIONS
|
||||
|
||||
**-v**, **\--verbose**
|
||||
: Print debugging information.
|
||||
|
||||
**-e**, **\--explain**
|
||||
: Print a visual explanation of the matches.
|
||||
|
||||
**-j**, **\--json**
|
||||
: Print a JSON list of the matches. (Pairs with **\--verbose** for more detail)
|
||||
|
||||
**-l**, **\--list-files**
|
||||
: Print only the names of files containing matches instead of the matches
|
||||
themselves.
|
||||
|
||||
**-i**, **\--ignore-case**
|
||||
: Perform pattern matching case-insensitively.
|
||||
|
||||
**-I**, **\--inplace**
|
||||
: Perform filtering or replacement in-place (i.e. overwrite files with new
|
||||
content).
|
||||
|
||||
**-C**, **\--confirm**
|
||||
: During in-place modification of a file, confirm before each modification.
|
||||
|
||||
**-r**, **\--replace** *replacement*
|
||||
: Replace all occurrences of the main pattern with the given string.
|
||||
|
||||
**-s**, **\--skip** *pattern*
|
||||
: While looking for matches, skip over *pattern* occurrences. This can be
|
||||
useful for behavior like **bp -s string** (avoiding matches inside string
|
||||
literals).
|
||||
|
||||
**-g**, **\--grammar** *grammar-file*
|
||||
: Load the grammar from the given file. See the **GRAMMAR FILES** section
|
||||
for more info.
|
||||
|
||||
**-G**, **\--git**
|
||||
: Use **git** to get a list of files. Remaining file arguments (if any) are
|
||||
passed to **git \--ls-files** instead of treated as literal files.
|
||||
|
||||
**-c**, **\--context** *N*
|
||||
: The number of lines of context to print. If *N* is 0, print only the
|
||||
exact text of the matches. If *N* is **`"all"`**, print the entire file.
|
||||
Otherwise, if *N* is a positive integer, print the whole line on which
|
||||
matches occur, as well as the *N-1* lines before and after the match. The
|
||||
default value for this argument is **1** (print whole lines where matches
|
||||
occur).
|
||||
|
||||
**-f**, **\--format** *auto*\|*fancy*\|*plain*
|
||||
: Set the output format. *fancy* includes colors and line numbers, *plain*
|
||||
includes neither, and *auto* (the default) uses *fancy* formatting only when
|
||||
the output is a TTY.
|
||||
|
||||
**\--help**
|
||||
: Print the usage and exit.
|
||||
|
||||
*pattern*
|
||||
: The main pattern for bp to match. By default, this pattern is a string
|
||||
pattern (see the **STRING PATTERNS** section below).
|
||||
|
||||
*files...*
|
||||
: The input files to search. If no input files are provided and data was piped
|
||||
in, that data will be used instead. If neither are provided, **bp** will search
|
||||
through all files in the current directory and its subdirectories
|
||||
(recursively).
|
||||
|
||||
|
||||
# STRING PATTERNS
|
||||
|
||||
One of the most common use cases for pattern matching tools is matching plain,
|
||||
literal strings, or strings that are primarily plain strings, with one or two
|
||||
patterns. **bp** is designed around this fact. The default mode for bp patterns
|
||||
is "string pattern mode". In string pattern mode, all characters are
|
||||
interpreted literally except for the backslash (**\\**), which may be followed
|
||||
by a bp pattern (see the **PATTERNS** section above). Optionally, the bp
|
||||
pattern may be terminated by a semicolon (**;**).
|
||||
|
||||
|
||||
# PATTERNS
|
||||
|
||||
**bp** patterns are based off of a combination of Parsing Expression Grammars
|
||||
and regular expression syntax. The syntax is designed to map closely to verbal
|
||||
descriptions of the patterns, and prefix operators are preferred over suffix
|
||||
operators (as is common in regex syntax).
|
||||
|
||||
Some patterns additionally have "multi-line" variants, which means that they
|
||||
include the newline character.
|
||||
|
||||
*pat1 pat2*
|
||||
: A sequence: *pat1* followed by *pat2*
|
||||
|
||||
*pat1* **/** *pat2*
|
||||
: A choice: *pat1*, or if it doesn\'t match, then *pat2*
|
||||
|
||||
**.**
|
||||
: Any character (excluding newline)
|
||||
|
||||
**\^**
|
||||
: Start of a line
|
||||
|
||||
**\^\^**
|
||||
: Start of the text
|
||||
|
||||
**\$**
|
||||
: End of a line (does not include newline character)
|
||||
|
||||
**\$\$**
|
||||
: End of the text
|
||||
|
||||
**\_**
|
||||
: Zero or more whitespace characters, including spaces and tabs, but not
|
||||
newlines.
|
||||
|
||||
**\_\_**
|
||||
: Zero or more whitespace characters, including spaces, tabs, newlines, and
|
||||
comments. Comments are undefined by default, but may be defined by a separate
|
||||
grammar file. See the **GRAMMAR FILES** section for more info.
|
||||
|
||||
**\"foo\"**, **\'foo\'**
|
||||
: The literal string **"foo"**. Single and double quotes are treated the same.
|
||||
Escape sequences are not allowed.
|
||||
|
||||
**{foo}**
|
||||
: The literal string **"foo"** with word boundaries on either end. Escape
|
||||
sequences are not allowed.
|
||||
|
||||
**\`***c*
|
||||
: The literal character *c* (e.g. **\`@** matches the "@" character)
|
||||
|
||||
**\`***c1***,***c2*
|
||||
: The literal character *c1* or *c2* (e.g. **\`a,e,i,o,u**)
|
||||
|
||||
**\`***c1***-***c2*
|
||||
: The character range *c1* to *c2* (e.g. **\`a-z**). Multiple ranges
|
||||
can be combined with a comma (e.g. **\`a-z,A-Z**).
|
||||
|
||||
**\\***esc*
|
||||
: An escape sequence (e.g. **\\n**, **\\x1F**, **\\033**, etc.)
|
||||
|
||||
**\\***esc1***-***esc2*
|
||||
: An escape sequence range from *esc1* to *esc2* (e.g. **\\x00-x1F**)
|
||||
|
||||
**\\N**
|
||||
: A special case escape that matches a "nodent": one or more newlines followed
|
||||
by the same indentation that occurs on the current line.
|
||||
|
||||
**!** *pat*
|
||||
: Not *pat*
|
||||
|
||||
**\[** *pat* **\]**
|
||||
: Maybe *pat*
|
||||
|
||||
*N* *pat*
|
||||
: Exactly *N* repetitions of *pat* (e.g. **5 \`x** matches **"xxxxx"**)
|
||||
|
||||
*N* **-** *M* *pat*
|
||||
: Between *N* and *M* repetitions of *pat* (e.g. **2-3 \`x**
|
||||
matches **"xx"** or **"xxx"**)
|
||||
|
||||
*N***+** *pat*
|
||||
: At least *N* or more repetitions of *pat* (e.g. **2+ \`x** matches
|
||||
**"xx"**, **"xxx"**, **"xxxx"**, etc.)
|
||||
|
||||
**\*** *pat*
|
||||
: Some *pat*s (zero or more, e.g. **\* \`x** matches **""**, **"x"**,
|
||||
**"xx"**, etc.)
|
||||
|
||||
**+** *pat*
|
||||
: At least one *pat*s (e.g. **\+ \`x** matches **"x"**, **"xx"**,
|
||||
**"xxx"**, etc.)
|
||||
|
||||
*repeating-pat* **%** *sep*
|
||||
: *repeating-pat* separated by *sep* (e.g. **\*word % \`,** matches
|
||||
zero or more comma-separated words)
|
||||
|
||||
**..** *pat*
|
||||
: Any text (except newlines) up to and including *pat*
|
||||
|
||||
**.. %** *skip* *pat*
|
||||
: Any text (except newlines) up to and including *pat*, skipping over
|
||||
instances of *skip* (e.g. **\`\"..\`\" % (\`\\.)**)
|
||||
|
||||
**\<** *pat*
|
||||
: Just after *pat* (lookbehind)
|
||||
|
||||
**\>** *pat*
|
||||
: Just before *pat* (lookahead)
|
||||
|
||||
**\@** *pat*
|
||||
: Capture *pat*
|
||||
|
||||
**foo**
|
||||
: The named pattern whose name is **"foo"**. Pattern names come from definitions in
|
||||
grammar files or from named captures. Pattern names may contain dashes (**-**),
|
||||
but not underscores (**\_**), since the underscore is used to match whitespace.
|
||||
See the **GRAMMAR FILES** section for more info.
|
||||
|
||||
**\@** *name* **=** *pat*
|
||||
: Let *name* equal *pat* (named capture). Named captures can be used as
|
||||
backreferences like so: **\@foo=word \`( foo \`)** (matches **"asdf(asdf)"** or
|
||||
**"baz(baz)"**, but not **"foo(baz)"**)
|
||||
|
||||
*pat* **=\> \'***replacement***\'**
|
||||
: Replace *pat* with *replacement*. Note: *replacement* should be a
|
||||
string, and it may contain references to captured values: **\@0** (the whole of
|
||||
*pat*), **\@1** (the first capture in *pat*), **\@***foo* (the capture
|
||||
named *foo* in *pat*), etc. For example, **\@word \_ \@rest=(\*word % \_)
|
||||
=\> \"\@rest \@1\"**
|
||||
|
||||
*pat1* **==** *pat2*
|
||||
: Matches *pat1*, if and only if *pat2* also matches the text of
|
||||
*pat1*\'s match. (e.g. **word == (\"foo\_\" \*.)** matches words that start
|
||||
with **"foo\_"**)
|
||||
|
||||
*pat1* **!=** *pat2*
|
||||
: Matches *pat1*, if and only if *pat2* does not match the text of
|
||||
*pat1*\'s match. (e.g. **word == (\"foo\_\" \*.)** matches words that do
|
||||
not start with **"foo\_"**)
|
||||
|
||||
*name***:** *pat*
|
||||
: Define *name* to mean *pat* (pattern definition)
|
||||
|
||||
**\#** *comment*
|
||||
: A line comment
|
||||
|
||||
|
||||
# GRAMMAR FILES
|
||||
|
||||
**bp** allows loading extra grammar files, which define patterns which may be
|
||||
used for matching. The **builtins** grammar file is loaded by default, and it
|
||||
defines a few useful general-purpose patterns. For example, it defines the
|
||||
**parens** rule, which matches pairs of matching parentheses, accounting for
|
||||
nested inner parentheses:
|
||||
|
||||
```
|
||||
bp -p '"my_func" parens'
|
||||
```
|
||||
|
||||
**bp** also comes with a few grammar files for common programming languages,
|
||||
which may be loaded on demand. These grammar files are not comprehensive syntax
|
||||
definitions, but only some common patterns. For example, the c++ grammar file
|
||||
contains definitions for **//**-style line comments as well as
|
||||
**/\*...\*/**-style block comments. Thus, you can find all comments with the
|
||||
string "TODO" with the following command:
|
||||
|
||||
```
|
||||
bp -g c++ -p 'comment==(..%\n "TODO" ..%\n$$)' *.cpp
|
||||
```
|
||||
|
||||
|
||||
# EXAMPLES
|
||||
|
||||
**ls \| bp foo**
|
||||
: Find files containing the string \"foo\" (a string pattern)
|
||||
|
||||
**ls \| bp \'.c\\\$\' -r \'.h\'**
|
||||
: Find files ending with \".c\" and replace the extension with \".h\"
|
||||
|
||||
**bp -p \'{foobar} parens\' my_file.py**
|
||||
: Find the literal string **\"foobar\"**, assuming it\'s a complete word,
|
||||
followed by a pair of matching parentheses in the file *my_file.py*
|
||||
|
||||
**bp -g html -p 'html-element==(\"\<a \"..%\\n\$\$)' foo.html**
|
||||
: Using the *html* grammar, find all *html-element*s matching the tag *a* in
|
||||
the file *foo.html*
|
Loading…
Reference in New Issue
Block a user