3 files changed, 594 insertions, 237 deletions
diff --git a/Makefile b/Makefile
index 7ff7f5f..e4d0dfb 100644
--- a/Makefile
+++ b/Makefile
@@ -12,7 +12,7 @@ ALL_FLAGS=$(CFLAGS) -DBP_NAME="\"$(NAME)\"" $(EXTRA) $(CWARN) $(G) $(O)
 CFILES=pattern.c definitions.c utils.c match.c files.c print.c json.c
 OBJFILES=$(CFILES:.c=.o)
 
-all: $(NAME) tags
+all: $(NAME) bp.1
 
 %.o: %.c %.h types.h
 	$(CC) -c $(ALL_FLAGS) -o $@ $<
@@ -20,6 +20,9 @@ all: $(NAME) tags
 $(NAME): $(OBJFILES) bp.c
 	$(CC) $(ALL_FLAGS) -o $@ $(OBJFILES) bp.c
 
+bp.1: bp.1.md
+	pandoc -s $< -t man -o $@
+
 tags: $(CFILES) bp.c
 	ctags *.c *.h
 
diff --git a/bp.1 b/bp.1
index ee920b1..2ff84dc 100644
--- a/bp.1
+++ b/bp.1
@@ -1,256 +1,324 @@
-.\" Manpage for bp.
-.\" Contact bruce@bruce-hill.com to correct errors or typos.
-.TH man 1 "Sep 12, 2020" "0.1" "bp manual page"
+.\" Automatically generated by Pandoc 2.11.3
+.\"
+.TH "BP" "1" "May 17 2021" "" ""
+.hy
 .SH NAME
-bp \- Bruce's Parsing Expression Grammar tool
+.PP
+bp - Bruce\[aq]s Parsing Expression Grammar tool
 .SH SYNOPSIS
-.B bp
-[\fI-h\fR|\fI--help\fR]
-[\fI-v\fR|\fI--verbose\fR]
-[\fI-e\fR|\fI--explain\fR]
-[\fI-j\fR|\fI--json\fR]
-[\fI-l\fR|\fI--list-files\fR]
-[\fI-i\fR|\fI--ignore-case\fR]
-[\fI-I\fR|\fI--inplace\fR]
-[\fI-C\fR|\fI--confirm\fR]
-[\fI-p\fR|\fI--pattern\fR \fI<pattern>\fR]
-[\fI-r\fR|\fI--replace\fR \fI<replacement>\fR]
-[\fI-s\fR|\fI--skip\fR \fI<skip pattern>\fR]
-[\fI-g\fR|\fI--grammar\fR \fI<grammar file>\fR]
-[\fI-G\fR|\fI--git\fR]
-[\fI-c\fR|\fI--context\fR \fI<N>\fR]
-\fI<pattern>\fR
-[[--] \fI<input files...>\fR]
-
+.PP
+\f[B]bp\f[R] [\f[I]options\&...\f[R]] \f[I]pattern\f[R] [[--]
+\f[I]files\&...\f[R]]
 .SH DESCRIPTION
-\fBbp\fR is a tool that matches parsing expression grammars using a custom syntax.
-
+.PP
+\f[B]bp\f[R] is a tool that matches parsing expression grammars using a
+custom syntax.
 .SH OPTIONS
-.B \-v\fR, \fB--verbose
+.TP
+\f[B]-v\f[R], \f[B]--verbose\f[R]
 Print debugging information.
-
-.B \-e\fR, \fB--explain
+.TP
+\f[B]-e\f[R], \f[B]--explain\f[R]
 Print a visual explanation of the matches.
-
-.B \-j\fR, \fB--json
-Print a JSON list of the matches. (Pairs with \fB--verbose\fR for more detail)
-
-.B \-l\fR, \fB--list-files
-Print only the names of files containing matches instead of the matches themselves.
-
-.B \-i\fR, \fB--ignore-case
+.TP
+\f[B]-j\f[R], \f[B]--json\f[R]
+Print a JSON list of the matches.
+(Pairs with \f[B]--verbose\f[R] for more detail)
+.TP
+\f[B]-l\f[R], \f[B]--list-files\f[R]
+Print only the names of files containing matches instead of the matches
+themselves.
+.TP
+\f[B]-i\f[R], \f[B]--ignore-case\f[R]
 Perform pattern matching case-insensitively.
-
-.B \-I\fR, \fB--inplace
-Perform filtering or replacement in-place (i.e. overwrite files with new content).
-
-.B \-C\fR, \fB--confirm
-During in-place modification of a file, confirm before each modification.
-
-.B \-r\fR, \fB--replace \fI<replacement>\fR
+.TP
+\f[B]-I\f[R], \f[B]--inplace\f[R]
+Perform filtering or replacement in-place (i.e.\ overwrite files with
+new content).
+.TP
+\f[B]-C\f[R], \f[B]--confirm\f[R]
+During in-place modification of a file, confirm before each
+modification.
+.TP
+\f[B]-r\f[R], \f[B]--replace\f[R] \f[I]replacement\f[R]
 Replace all occurrences of the main pattern with the given string.
-
-.B \-s\fR, \fB--skip \fI<skip pattern>\fR
-While looking for matches, skip over \fB<skip pattern>\fR occurrences. This can
-be useful for behavior like \fBbp -s string\fR (avoiding matches inside string
-literals).
-
-.B \-g\fR, \fB--grammar \fI<grammar file>\fR
+.TP
+\f[B]-s\f[R], \f[B]--skip\f[R] \f[I]pattern\f[R]
+While looking for matches, skip over \f[I]pattern\f[R] occurrences.
+This can be useful for behavior like \f[B]bp -s string\f[R] (avoiding
+matches inside string literals).
+.TP
+\f[B]-g\f[R], \f[B]--grammar\f[R] \f[I]grammar-file\f[R]
 Load the grammar from the given file.
-
-.B \-G\fR, \fB--git\fR
-Use \fBgit\fR to get a list of files. Remaining file arguments (if any) are
-passed to \fBgit --ls-files\fR instead of treated as literal files.
-
-.B \-c\fR, \fB--context \fI<N>\fR
-The number of lines of context to print. If \fI<N>\fR is 0, print only the
-exact text of the matches. If \fI<N>\fR is "all", print the entire file.
-Otherwise, if \fI<N>\fR is a positive integer, print the whole line on which
-matches occur, as well as the \fI<N-1>\fR lines before and after the match. The
-default value for this argument is 1 (print whole lines where matches occur).
-
-.B \-f\fR, \fB\--format \fIauto|fancy|plain\fR
-Set the output format. \fIfancy\fR includes colors and line numbers,
-\fIplain\fR includes neither, and \fIauto\fR (the default) uses \fIfancy\fR
+See the \f[B]GRAMMAR FILES\f[R] section for more info.
+.TP
+\f[B]-G\f[R], \f[B]--git\f[R]
+Use \f[B]git\f[R] to get a list of files.
+Remaining file arguments (if any) are passed to \f[B]git --ls-files\f[R]
+instead of treated as literal files.
+.TP
+\f[B]-c\f[R], \f[B]--context\f[R] \f[I]N\f[R]
+The number of lines of context to print.
+If \f[I]N\f[R] is 0, print only the exact text of the matches.
+If \f[I]N\f[R] is \f[B]\f[CB]\[dq]all\[dq]\f[B]\f[R], print the entire
+file.
+Otherwise, if \f[I]N\f[R] is a positive integer, print the whole line on
+which matches occur, as well as the \f[I]N-1\f[R] lines before and after
+the match.
+The default value for this argument is \f[B]1\f[R] (print whole lines
+where matches occur).
+.TP
+\f[B]-f\f[R], \f[B]--format\f[R] \f[I]auto\f[R]|\f[I]fancy\f[R]|\f[I]plain\f[R]
+Set the output format.
+\f[I]fancy\f[R] includes colors and line numbers, \f[I]plain\f[R]
+includes neither, and \f[I]auto\f[R] (the default) uses \f[I]fancy\f[R]
 formatting only when the output is a TTY.
-
-.B \--help
+.TP
+\f[B]--help\f[R]
 Print the usage and exit.
-
-.B <string-pattern>
-The main pattern for bp to match. By default, this pattern is a string
-pattern (see the \fBSTRING PATTERNS\fR section below).
-
-.B <input files...>
-The input files to search. If no input files are provided and data was
-piped in, that data will be used instead. If neither are provided,
-\fBbp\fR will search through all files in the current directory and
-its subdirectories (recursively).
-
+.TP
+\f[I]pattern\f[R]
+The main pattern for bp to match.
+By default, this pattern is a string pattern (see the \f[B]STRING
+PATTERNS\f[R] section below).
+.TP
+\f[I]files\&...\f[R]
+The input files to search.
+If no input files are provided and data was piped in, that data will be
+used instead.
+If neither are provided, \f[B]bp\f[R] will search through all files in
+the current directory and its subdirectories (recursively).
+.SH STRING PATTERNS
+.PP
+One of the most common use cases for pattern matching tools is matching
+plain, literal strings, or strings that are primarily plain strings,
+with one or two patterns.
+\f[B]bp\f[R] is designed around this fact.
+The default mode for bp patterns is \[lq]string pattern mode\[rq].
+In string pattern mode, all characters are interpreted literally except
+for the backslash (\f[B]\[rs]\f[R]), which may be followed by a bp
+pattern (see the \f[B]PATTERNS\f[R] section above).
+Optionally, the bp pattern may be terminated by a semicolon
+(\f[B];\f[R]).
 .SH PATTERNS
-bp patterns are based off of a combination of Parsing Expression Grammars
-and regular expression syntax. The syntax is designed to map closely to
-verbal descriptions of the patterns, and prefix operators are preferred over
-suffix operators (as is common in regex syntax).
-
-Some patterns additionally have "multi-line" variants, which means that they
-include the newline character.
-
-.I <pat1> <pat2>
-A sequence: \fI<pat1>\fR followed by \fI<pat2>\fR
-
-.I <pat1> \fB/\fI <pat2>\fR
-A choice: \fI<pat1>\fR, or if it doesn't match, then \fI<pat2>\fR
-
-.B .
+.PP
+\f[B]bp\f[R] patterns are based off of a combination of Parsing
+Expression Grammars and regular expression syntax.
+The syntax is designed to map closely to verbal descriptions of the
+patterns, and prefix operators are preferred over suffix operators (as
+is common in regex syntax).
+.PP
+Some patterns additionally have \[lq]multi-line\[rq] variants, which
+means that they include the newline character.
+.TP
+\f[I]pat1 pat2\f[R]
+A sequence: \f[I]pat1\f[R] followed by \f[I]pat2\f[R]
+.TP
+\f[I]pat1\f[R] \f[B]/\f[R] \f[I]pat2\f[R]
+A choice: \f[I]pat1\f[R], or if it doesn\[aq]t match, then
+\f[I]pat2\f[R]
+.TP
+\f[B].\f[R]
 Any character (excluding newline)
-
-.B ^
+.TP
+\f[B]\[ha]\f[R]
 Start of a line
-
-.B ^^
+.TP
+\f[B]\[ha]\[ha]\f[R]
 Start of the text
-
-.B $
+.TP
+\f[B]$\f[R]
 End of a line (does not include newline character)
-
-.B $$
+.TP
+\f[B]$$\f[R]
 End of the text
-
-.B _
-Zero or more whitespace characters (specifically, spaces and tabs)
-
-.B __
-Zero or more whitespace or newline characters
-
-.B "foo"
-.B 'foo'
-The literal string \fIstring\fR. Escape sequences are not allowed.
-
-.B {foo}
-The literal string \fIfoo\fR with word boundaries on either end. Escape sequences are not allowed.
-
-.B `\fI<c>\fR
-The literal character \fI<c>\fR (e.g. \fB`@\fR matches the "@" character)
-
-.B `\fI<c1>\fB,\fI<c2>\fR
-The literal character \fI<c1>\fR or \fI<c2>\fR (e.g. \fB`a,e,i,o,u\fR)
-
-.B `\fI<c1>\fB-\fI<c2>\fR
-The character range \fI<c1>\fR to \fI<c2>\fR (e.g. \fB`a-z\fR).
-Multiple ranges can be combined with a comma (e.g. \fB`a-z,A-Z\fR).
-
-.B \\\\\fI<esc>\fR
-An escape sequence (e.g. \fB\\n\fR, \fB\\x1F\fR, \fB\\033\fR, etc.)
-
-.B \\\\\fI<esc1>\fB-\fI<esc2>\fR
-An escape sequence range from \fI<esc1>\fR to \fI<esc2>\fR (e.g. \fB\\x00-x1F\fR)
-
-.B \\\\N
-A special case escape that matches a "nodent": one or more newlines followed by
-the same indentation that occurs on the current line.
-
-.B !\fI<pat>\fR
-Not \fI<pat>\fR
-
-.B [\fI<pat>\fB]
-Maybe \fI<pat>\fR
-
-.B \fI<N> <pat>\fR
-Exactly \fIN\fR repetitions of \fI<pat>\fR (e.g. \fB5 `*\fR matches "*****")
-
-.B \fI<N>\fB-\fI<M> <pat>\fR
-Between \fI<N>\fR and \fI<M>\fR repetitions of \fI<pat>\fR (e.g. \fB2-3 `*\fR)
-
-.B \fI<N>\fB+ \fI<pat>\fR
-At least \fI<N>\fR or more repetitions of \fI<pat>\fR (e.g. \fB 2+ `*\fR)
-
-.B *\fI<pat>\fR
-Some \fI<pat>\fRs (zero or more)
-
-.B +\fI<pat>\fR
-At least one \fI<pat>\fRs
-
-.B \fI<repeating-pat>\fR \fB%\fI <sep>\fR
-\fI<repeating-pat>\fR separated by \fI<sep>\fR (e.g. \fB*word % `,\fR matches
-zero or more comma-separated words)
-
-.B .. \fI<pat>\fR
-Any text (except newlines) up to and including \fI<pat>\fR
-
-.B .. % \fI<skip>\fR \fI<pat>\fB
-Any text (except newlines) up to and including \fI<pat>\fR, skipping over
-instances of \fI<skip>\fR (e.g. \fB`"..`" % (`\\.)\fR)
-
-.B <\fI<pat>\fR
-Just after \fI<pat>\fR (lookbehind)
-
-.B >\fI<pat>\fR
-Just before \fI<pat>\fR (lookahead)
-
-.B @\fI<pat>\fR
-Capture \fI<pat>\fR
-
-.B @\fI<name>\fB=\fI<pat>\fR
-Let \fI<name>\fR equal \fI<pat>\fR (named capture). Named captures can be used
-as backreferences like so: \fB@foo=word `( foo `)\fR (matches "asdf(asdf)" or
-"baz(baz)", but not "foo(baz)")
-
-.B \fI<pat>\fB => '\fI<replacement>\fB'
-Replace \fI<pat>\fR with \fI<replacement>\fR. Note: \fI<replacement>\fR should
-be a string, and it may contain references to captured values: \fB@0\fR
-(the whole of \fI<pat>\fR), \fB@1\fR (the first capture in \fI<pat>\fR),
-\fB@\fIfoo\fR (the capture named \fIfoo\fR in \fI<pat>\fR), etc.
-For example, \fB@word _ @rest=(*word % _) => "@rest @1"\fR
-
-.B \fI<pat1>\fB == \fI<pat2>\fR
-Matches \fI<pat1>\fR, if and only if \fI<pat2>\fR also matches the text of
-\fI<pat1>\fR's match. (e.g. \fBword == ("foo_" *.)\fR matches words that start
-with "foo_")
-
-.B \fI<pat1>\fB != \fI<pat2>\fR
-Matches \fI<pat1>\fR, if and only if \fI<pat2>\fR does not match the text of
-\fI<pat1>\fR's match. (e.g. \fBword == ("foo_" *.)\fR matches words that do not
-start with "foo_")
-
-.B \fI<name>\fB: \fI<pat>\fR
-Define \fI<name>\fR to mean \fI<pat>\fR (pattern definition)
-
-.B # \fI<comment>\fR
+.TP
+\f[B]_\f[R]
+Zero or more whitespace characters, including spaces and tabs, but not
+newlines.
+.TP
+\f[B]__\f[R]
+Zero or more whitespace characters, including spaces, tabs, newlines,
+and comments.
+Comments are undefined by default, but may be defined by a separate
+grammar file.
+See the \f[B]GRAMMAR FILES\f[R] section for more info.
+.TP
+\f[B]\[dq]foo\[dq]\f[R], \f[B]\[aq]foo\[aq]\f[R]
+The literal string \f[B]\[lq]foo\[rq]\f[R].
+Single and double quotes are treated the same.
+Escape sequences are not allowed.
+.TP
+\f[B]{foo}\f[R]
+The literal string \f[B]\[lq]foo\[rq]\f[R] with word boundaries on
+either end.
+Escape sequences are not allowed.
+.TP
+\f[B]\[ga]\f[R]\f[I]c\f[R]
+The literal character \f[I]c\f[R] (e.g.\ **\[ga]\[at]** matches the
+\[lq]\[at]\[rq] character)
+.TP
+\f[B]\[ga]\f[R]\f[I]c1\f[R]\f[B],\f[R]\f[I]c2\f[R]
+The literal character \f[I]c1\f[R] or \f[I]c2\f[R]
+(e.g.\ \f[B]\[ga]a,e,i,o,u\f[R])
+.TP
+\f[B]\[ga]\f[R]\f[I]c1\f[R]\f[B]-\f[R]\f[I]c2\f[R]
+The character range \f[I]c1\f[R] to \f[I]c2\f[R]
+(e.g.\ \f[B]\[ga]a-z\f[R]).
+Multiple ranges can be combined with a comma
+(e.g.\ \f[B]\[ga]a-z,A-Z\f[R]).
+.TP
+\f[B]\[rs]\f[R]\f[I]esc\f[R]
+An escape sequence (e.g.\ \f[B]\[rs]n\f[R], \f[B]\[rs]x1F\f[R],
+\f[B]\[rs]033\f[R], etc.)
+.TP
+\f[B]\[rs]\f[R]\f[I]esc1\f[R]\f[B]-\f[R]\f[I]esc2\f[R]
+An escape sequence range from \f[I]esc1\f[R] to \f[I]esc2\f[R]
+(e.g.\ \f[B]\[rs]x00-x1F\f[R])
+.TP
+\f[B]\[rs]N\f[R]
+A special case escape that matches a \[lq]nodent\[rq]: one or more
+newlines followed by the same indentation that occurs on the current
+line.
+.TP
+\f[B]!\f[R] \f[I]pat\f[R]
+Not \f[I]pat\f[R]
+.TP
+\f[B][\f[R] \f[I]pat\f[R] \f[B]]\f[R]
+Maybe \f[I]pat\f[R]
+.TP
+\f[I]N\f[R] \f[I]pat\f[R]
+Exactly \f[I]N\f[R] repetitions of \f[I]pat\f[R] (e.g.\ \f[B]5
+\[ga]x\f[R] matches \f[B]\[lq]xxxxx\[rq]\f[R])
+.TP
+\f[I]N\f[R] \f[B]-\f[R] \f[I]M\f[R] \f[I]pat\f[R]
+Between \f[I]N\f[R] and \f[I]M\f[R] repetitions of \f[I]pat\f[R]
+(e.g.\ \f[B]2-3 \[ga]x\f[R] matches \f[B]\[lq]xx\[rq]\f[R] or
+\f[B]\[lq]xxx\[rq]\f[R])
+.TP
+\f[I]N\f[R]\f[B]+\f[R] \f[I]pat\f[R]
+At least \f[I]N\f[R] or more repetitions of \f[I]pat\f[R] (e.g.\ \f[B]2+
+\[ga]x\f[R] matches \f[B]\[lq]xx\[rq]\f[R], \f[B]\[lq]xxx\[rq]\f[R],
+\f[B]\[lq]xxxx\[rq]\f[R], etc.)
+.TP
+\f[B]*\f[R] \f[I]pat\f[R]
+Some \f[I]pat\f[R]s (zero or more, e.g.\ \f[B]* \[ga]x\f[R] matches
+\f[B]\[dq]\[lq]\f[R], \f[B]\[rq]x\[lq]\f[R], \f[B]\[rq]xx\[dq]\f[R],
+etc.)
+.TP
+\f[B]+\f[R] \f[I]pat\f[R]
+At least one \f[I]pat\f[R]s (e.g.\ \f[B]+ \[ga]x\f[R] matches
+\f[B]\[lq]x\[rq]\f[R], \f[B]\[lq]xx\[rq]\f[R], \f[B]\[lq]xxx\[rq]\f[R],
+etc.)
+.TP
+\f[I]repeating-pat\f[R] \f[B]%\f[R] \f[I]sep\f[R]
+\f[I]repeating-pat\f[R] separated by \f[I]sep\f[R] (e.g.\ \f[B]*word %
+\[ga],\f[R] matches zero or more comma-separated words)
+.TP
+\f[B]..\f[R] \f[I]pat\f[R]
+Any text (except newlines) up to and including \f[I]pat\f[R]
+.TP
+\f[B].. %\f[R] \f[I]skip\f[R] \f[I]pat\f[R]
+Any text (except newlines) up to and including \f[I]pat\f[R], skipping
+over instances of \f[I]skip\f[R] (e.g.\ \f[B]\[ga]\[dq]..\[ga]\[dq] %
+(\[ga]\[rs].)\f[R])
+.TP
+\f[B]<\f[R] \f[I]pat\f[R]
+Just after \f[I]pat\f[R] (lookbehind)
+.TP
+\f[B]>\f[R] \f[I]pat\f[R]
+Just before \f[I]pat\f[R] (lookahead)
+.TP
+\f[B]\[at]\f[R] \f[I]pat\f[R]
+Capture \f[I]pat\f[R]
+.TP
+\f[B]foo\f[R]
+The named pattern whose name is \f[B]\[lq]foo\[rq]\f[R].
+Pattern names come from definitions in grammar files or from named
+captures.
+Pattern names may contain dashes (\f[B]-\f[R]), but not underscores
+(\f[B]_\f[R]), since the underscore is used to match whitespace.
+See the \f[B]GRAMMAR FILES\f[R] section for more info.
+.TP
+\f[B]\[at]\f[R] \f[I]name\f[R] \f[B]=\f[R] \f[I]pat\f[R]
+Let \f[I]name\f[R] equal \f[I]pat\f[R] (named capture).
+Named captures can be used as backreferences like so: \f[B]\[at]foo=word
+\[ga]( foo \[ga])\f[R] (matches \f[B]\[lq]asdf(asdf)\[rq]\f[R] or
+\f[B]\[lq]baz(baz)\[rq]\f[R], but not \f[B]\[lq]foo(baz)\[rq]\f[R])
+.TP
+\f[I]pat\f[R] \f[B]=> \[aq]\f[R]\f[I]replacement\f[R]\f[B]\[aq]\f[R]
+Replace \f[I]pat\f[R] with \f[I]replacement\f[R].
+Note: \f[I]replacement\f[R] should be a string, and it may contain
+references to captured values: \f[B]\[at]0\f[R] (the whole of
+\f[I]pat\f[R]), \f[B]\[at]1\f[R] (the first capture in \f[I]pat\f[R]),
+\f[B]\[at]\f[R]\f[I]foo\f[R] (the capture named \f[I]foo\f[R] in
+\f[I]pat\f[R]), etc.
+For example, \f[B]\[at]word _ \[at]rest=(*word % _) => \[dq]\[at]rest
+\[at]1\[dq]\f[R]
+.TP
+\f[I]pat1\f[R] \f[B]==\f[R] \f[I]pat2\f[R]
+Matches \f[I]pat1\f[R], if and only if \f[I]pat2\f[R] also matches the
+text of \f[I]pat1\f[R]\[aq]s match.
+(e.g.\ \f[B]word == (\[dq]foo_\[dq] *.)\f[R] matches words that start
+with \f[B]\[lq]foo_\[rq]\f[R])
+.TP
+\f[I]pat1\f[R] \f[B]!=\f[R] \f[I]pat2\f[R]
+Matches \f[I]pat1\f[R], if and only if \f[I]pat2\f[R] does not match the
+text of \f[I]pat1\f[R]\[aq]s match.
+(e.g.\ \f[B]word == (\[dq]foo_\[dq] *.)\f[R] matches words that do not
+start with \f[B]\[lq]foo_\[rq]\f[R])
+.TP
+\f[I]name\f[R]\f[B]:\f[R] \f[I]pat\f[R]
+Define \f[I]name\f[R] to mean \f[I]pat\f[R] (pattern definition)
+.TP
+\f[B]#\f[R] \f[I]comment\f[R]
 A line comment
-
-.SH STRING PATTERNS
-One of the most common use cases for pattern matching tools is matching plain,
-literal strings, or strings that are primarily plain strings, with one or two
-patterns. \fBbp\fR is designed around this fact. The default mode for bp
-patterns is "string pattern mode". In string pattern mode, all characters
-are interpreted literally except for the backslash (\fB\\\fR), which may be
-followed by a bp pattern (see the \fBPATTERNS\fR section above). Optionally,
-the bp pattern may be terminated by a semicolon (\fB;\fR).
-
+.SH GRAMMAR FILES
+.PP
+\f[B]bp\f[R] allows loading extra grammar files, which define patterns
+which may be used for matching.
+The \f[B]builtins\f[R] grammar file is loaded by default, and it defines
+a few useful general-purpose patterns.
+For example, it defines the \f[B]parens\f[R] rule, which matches pairs
+of matching parentheses, accounting for nested inner parentheses:
+.IP
+.nf
+\f[C]
+bp -p \[aq]\[dq]my_func\[dq] parens\[aq]
+\f[R]
+.fi
+.PP
+\f[B]bp\f[R] also comes with a few grammar files for common programming
+languages, which may be loaded on demand.
+These grammar files are not comprehensive syntax definitions, but only
+some common patterns.
+For example, the c++ grammar file contains definitions for
+\f[B]//\f[R]-style line comments as well as \f[B]/*\&...*/\f[R]-style
+block comments.
+Thus, you can find all comments with the string \[lq]TODO\[rq] with the
+following command:
+.IP
+.nf
+\f[C]
+bp -g c++ -p \[aq]comment==(..%\[rs]n \[dq]TODO\[dq] ..%\[rs]n$$)\[aq] *.cpp
+\f[R]
+.fi
 .SH EXAMPLES
 .TP
-.B
-ls | bp foo
-Find files containing the string "foo" (a string pattern)
-
-.TP
-.B
-ls | bp '.c\\$' -r '.h'
-Find files ending with ".c" and replace the extension with ".h"
-
-.TP
-.B
-bp -p '{foobar} parens' my_file.py
-Find the literal string \fB"foobar"\fR, assuming it's a complete word, followed
-by a pair of matching parentheses in the file \fImy_file.py\fR
-
-.TP
-.B
-bp -g html -p html-element -D matching-tag=a foo.html
-Using the \fIhtml\fR grammar, find all \fIhtml-element\fRs matching
-the tag \fIa\fR in the file \fIfoo.html\fR
-
-
-.SH AUTHOR
-Bruce Hill (bruce@bruce-hill.com)
+\f[B]ls | bp foo\f[R]
+Find files containing the string \[dq]foo\[dq] (a string pattern)
+.TP
+\f[B]ls | bp \[aq].c\[rs]$\[aq] -r \[aq].h\[aq]\f[R]
+Find files ending with \[dq].c\[dq] and replace the extension with
+\[dq].h\[dq]
+.TP
+\f[B]bp -p \[aq]{foobar} parens\[aq] my_file.py\f[R]
+Find the literal string \f[B]\[dq]foobar\[dq]\f[R], assuming it\[aq]s a
+complete word, followed by a pair of matching parentheses in the file
+\f[I]my_file.py\f[R]
+.TP
+\f[B]bp -g html -p `html-element==(\[dq]<a \[dq]..%\[rs]n$$)' foo.html\f[R]
+Using the \f[I]html\f[R] grammar, find all \f[I]html-element\f[R]s
+matching the tag \f[I]a\f[R] in the file \f[I]foo.html\f[R]
+.SH AUTHORS
+Bruce Hill (\f[I]bruce\[at]bruce-hill.com\f[R]).
diff --git a/bp.1.md b/bp.1.md
new file mode 100644
index 0000000..c58725c
--- /dev/null
+++ b/bp.1.md
@@ -0,0 +1,286 @@
+% BP(1)
+% Bruce Hill (*bruce@bruce-hill.com*)
+% May 17 2021
+
+# NAME
+
+bp - Bruce\'s Parsing Expression Grammar tool
+
+# SYNOPSIS
+
+**bp**
+\[*options...*\]
+*pattern*
+\[\[\--\] *files...*\]
+
+# DESCRIPTION
+
+**bp** is a tool that matches parsing expression grammars using a custom
+syntax.
+
+# OPTIONS
+
+**-v**, **\--verbose**
+: Print debugging information.
+
+**-e**, **\--explain**
+: Print a visual explanation of the matches.
+
+**-j**, **\--json**
+: Print a JSON list of the matches. (Pairs with **\--verbose** for more detail)
+
+**-l**, **\--list-files**
+: Print only the names of files containing matches instead of the matches
+themselves.
+
+**-i**, **\--ignore-case**
+: Perform pattern matching case-insensitively.
+
+**-I**, **\--inplace**
+: Perform filtering or replacement in-place (i.e. overwrite files with new
+content).
+
+**-C**, **\--confirm**
+: During in-place modification of a file, confirm before each modification.
+
+**-r**, **\--replace** *replacement*
+: Replace all occurrences of the main pattern with the given string.
+
+**-s**, **\--skip** *pattern*
+: While looking for matches, skip over *pattern* occurrences. This can be
+useful for behavior like **bp -s string** (avoiding matches inside string
+literals).
+
+**-g**, **\--grammar** *grammar-file*
+: Load the grammar from the given file. See the **GRAMMAR FILES** section
+for more info.
+
+**-G**, **\--git**
+: Use **git** to get a list of files. Remaining file arguments (if any) are
+passed to **git \--ls-files** instead of treated as literal files.
+
+**-c**, **\--context** *N*
+: The number of lines of context to print. If *N* is 0, print only the
+exact text of the matches. If *N* is **`"all"`**, print the entire file.
+Otherwise, if *N* is a positive integer, print the whole line on which
+matches occur, as well as the *N-1* lines before and after the match. The
+default value for this argument is **1** (print whole lines where matches
+occur).
+
+**-f**, **\--format** *auto*\|*fancy*\|*plain*
+: Set the output format. *fancy* includes colors and line numbers, *plain*
+includes neither, and *auto* (the default) uses *fancy* formatting only when
+the output is a TTY.
+
+**\--help**
+: Print the usage and exit.
+
+*pattern*
+: The main pattern for bp to match. By default, this pattern is a string
+pattern (see the **STRING PATTERNS** section below).
+
+*files...*
+: The input files to search. If no input files are provided and data was piped
+in, that data will be used instead. If neither are provided, **bp** will search
+through all files in the current directory and its subdirectories
+(recursively).
+
+
+# STRING PATTERNS
+
+One of the most common use cases for pattern matching tools is matching plain,
+literal strings, or strings that are primarily plain strings, with one or two
+patterns. **bp** is designed around this fact. The default mode for bp patterns
+is "string pattern mode". In string pattern mode, all characters are
+interpreted literally except for the backslash (**\\**), which may be followed
+by a bp pattern (see the **PATTERNS** section above). Optionally, the bp
+pattern may be terminated by a semicolon (**;**).
+
+
+# PATTERNS
+
+**bp** patterns are based off of a combination of Parsing Expression Grammars
+and regular expression syntax. The syntax is designed to map closely to verbal
+descriptions of the patterns, and prefix operators are preferred over suffix
+operators (as is common in regex syntax).
+
+Some patterns additionally have "multi-line" variants, which means that they
+include the newline character.
+
+*pat1 pat2*
+: A sequence: *pat1* followed by *pat2*
+
+*pat1* **/** *pat2*
+: A choice: *pat1*, or if it doesn\'t match, then *pat2*
+
+**.**
+: Any character (excluding newline)
+
+**\^**
+: Start of a line
+
+**\^\^**
+: Start of the text
+
+**\$**
+: End of a line (does not include newline character)
+
+**\$\$**
+: End of the text
+
+**\_**
+: Zero or more whitespace characters, including spaces and tabs, but not
+newlines.
+
+**\_\_**
+: Zero or more whitespace characters, including spaces, tabs, newlines, and
+comments. Comments are undefined by default, but may be defined by a separate
+grammar file. See the **GRAMMAR FILES** section for more info.
+
+**\"foo\"**, **\'foo\'**
+: The literal string **"foo"**. Single and double quotes are treated the same.
+Escape sequences are not allowed.
+
+**{foo}**
+: The literal string **"foo"** with word boundaries on either end. Escape
+sequences are not allowed.
+
+**\`***c*
+: The literal character *c* (e.g. **\`@** matches the "@" character)
+
+**\`***c1***,***c2*
+: The literal character *c1* or *c2* (e.g. **\`a,e,i,o,u**)
+
+**\`***c1***-***c2*
+: The character range *c1* to *c2* (e.g. **\`a-z**). Multiple ranges
+can be combined with a comma (e.g. **\`a-z,A-Z**).
+
+**\\***esc*
+: An escape sequence (e.g. **\\n**, **\\x1F**, **\\033**, etc.)
+
+**\\***esc1***-***esc2*
+: An escape sequence range from *esc1* to *esc2* (e.g. **\\x00-x1F**)
+
+**\\N**
+: A special case escape that matches a "nodent": one or more newlines followed
+by the same indentation that occurs on the current line.
+
+**!** *pat*
+: Not *pat*
+
+**\[** *pat* **\]**
+: Maybe *pat*
+
+*N* *pat*
+: Exactly *N* repetitions of *pat* (e.g. **5 \`x** matches **"xxxxx"**)
+
+*N* **-** *M* *pat*
+: Between *N* and *M* repetitions of *pat* (e.g. **2-3 \`x**
+matches **"xx"** or **"xxx"**)
+
+*N***+** *pat*
+: At least *N* or more repetitions of *pat* (e.g. **2+ \`x** matches
+**"xx"**, **"xxx"**, **"xxxx"**, etc.)
+
+**\*** *pat*
+: Some *pat*s (zero or more, e.g. **\* \`x** matches **""**, **"x"**,
+**"xx"**, etc.)
+
+**+** *pat*
+: At least one *pat*s (e.g. **\+ \`x** matches **"x"**, **"xx"**,
+**"xxx"**, etc.)
+
+*repeating-pat* **%** *sep*
+: *repeating-pat* separated by *sep* (e.g. **\*word % \`,** matches
+zero or more comma-separated words)
+
+**..** *pat*
+: Any text (except newlines) up to and including *pat*
+
+**.. %** *skip* *pat*
+: Any text (except newlines) up to and including *pat*, skipping over
+instances of *skip* (e.g. **\`\"..\`\" % (\`\\.)**)
+
+**\<** *pat*
+: Just after *pat* (lookbehind)
+
+**\>** *pat*
+: Just before *pat* (lookahead)
+
+**\@** *pat*
+: Capture *pat*
+
+**foo**
+: The named pattern whose name is **"foo"**. Pattern names come from definitions in
+grammar files or from named captures. Pattern names may contain dashes (**-**),
+but not underscores (**\_**), since the underscore is used to match whitespace.
+See the **GRAMMAR FILES** section for more info.
+
+**\@** *name* **=** *pat*
+: Let *name* equal *pat* (named capture). Named captures can be used as
+backreferences like so: **\@foo=word \`( foo \`)** (matches **"asdf(asdf)"** or
+**"baz(baz)"**, but not **"foo(baz)"**)
+
+*pat* **=\> \'***replacement***\'**
+: Replace *pat* with *replacement*. Note: *replacement* should be a
+string, and it may contain references to captured values: **\@0** (the whole of
+*pat*), **\@1** (the first capture in *pat*), **\@***foo* (the capture
+named *foo* in *pat*), etc. For example, **\@word \_ \@rest=(\*word % \_)
+=\> \"\@rest \@1\"**
+
+*pat1* **==** *pat2*
+: Matches *pat1*, if and only if *pat2* also matches the text of
+*pat1*\'s match. (e.g. **word == (\"foo\_\" \*.)** matches words that start
+with **"foo\_"**)
+
+*pat1* **!=** *pat2*
+: Matches *pat1*, if and only if *pat2* does not match the text of
+*pat1*\'s match. (e.g. **word == (\"foo\_\" \*.)** matches words that do
+not start with **"foo\_"**)
+
+*name***:** *pat*
+: Define *name* to mean *pat* (pattern definition)
+
+**\#** *comment*
+: A line comment
+
+
+# GRAMMAR FILES
+
+**bp** allows loading extra grammar files, which define patterns which may be
+used for matching. The **builtins** grammar file is loaded by default, and it
+defines a few useful general-purpose patterns. For example, it defines the
+**parens** rule, which matches pairs of matching parentheses, accounting for
+nested inner parentheses:
+
+```
+bp -p '"my_func" parens'
+```
+
+**bp** also comes with a few grammar files for common programming languages,
+which may be loaded on demand. These grammar files are not comprehensive syntax
+definitions, but only some common patterns. For example, the c++ grammar file
+contains definitions for **//**-style line comments as well as
+**/\*...\*/**-style block comments. Thus, you can find all comments with the
+string "TODO" with the following command:
+
+```
+bp -g c++ -p 'comment==(..%\n "TODO" ..%\n$$)' *.cpp
+```
+
+
+# EXAMPLES
+
+**ls \| bp foo**
+: Find files containing the string \"foo\" (a string pattern)
+
+**ls \| bp \'.c\\\$\' -r \'.h\'**
+: Find files ending with \".c\" and replace the extension with \".h\"
+
+**bp -p \'{foobar} parens\' my_file.py**
+: Find the literal string **\"foobar\"**, assuming it\'s a complete word,
+followed by a pair of matching parentheses in the file *my_file.py*
+
+**bp -g html -p 'html-element==(\"\<a \"..%\\n\$\$)' foo.html**
+: Using the *html* grammar, find all *html-element*s matching the tag *a* in
+the file *foo.html*