diff --git a/Makefile b/Makefile index 7ff7f5f..e4d0dfb 100644 --- a/Makefile +++ b/Makefile @@ -12,7 +12,7 @@ ALL_FLAGS=$(CFLAGS) -DBP_NAME="\"$(NAME)\"" $(EXTRA) $(CWARN) $(G) $(O) CFILES=pattern.c definitions.c utils.c match.c files.c print.c json.c OBJFILES=$(CFILES:.c=.o) -all: $(NAME) tags +all: $(NAME) bp.1 %.o: %.c %.h types.h $(CC) -c $(ALL_FLAGS) -o $@ $< @@ -20,6 +20,9 @@ all: $(NAME) tags $(NAME): $(OBJFILES) bp.c $(CC) $(ALL_FLAGS) -o $@ $(OBJFILES) bp.c +bp.1: bp.1.md + pandoc -s $< -t man -o $@ + tags: $(CFILES) bp.c ctags *.c *.h diff --git a/bp.1 b/bp.1 index ee920b1..2ff84dc 100644 --- a/bp.1 +++ b/bp.1 @@ -1,256 +1,324 @@ -.\" Manpage for bp. -.\" Contact bruce@bruce-hill.com to correct errors or typos. -.TH man 1 "Sep 12, 2020" "0.1" "bp manual page" +.\" Automatically generated by Pandoc 2.11.3 +.\" +.TH "BP" "1" "May 17 2021" "" "" +.hy .SH NAME -bp \- Bruce's Parsing Expression Grammar tool +.PP +bp - Bruce\[aq]s Parsing Expression Grammar tool .SH SYNOPSIS -.B bp -[\fI-h\fR|\fI--help\fR] -[\fI-v\fR|\fI--verbose\fR] -[\fI-e\fR|\fI--explain\fR] -[\fI-j\fR|\fI--json\fR] -[\fI-l\fR|\fI--list-files\fR] -[\fI-i\fR|\fI--ignore-case\fR] -[\fI-I\fR|\fI--inplace\fR] -[\fI-C\fR|\fI--confirm\fR] -[\fI-p\fR|\fI--pattern\fR \fI\fR] -[\fI-r\fR|\fI--replace\fR \fI\fR] -[\fI-s\fR|\fI--skip\fR \fI\fR] -[\fI-g\fR|\fI--grammar\fR \fI\fR] -[\fI-G\fR|\fI--git\fR] -[\fI-c\fR|\fI--context\fR \fI\fR] -\fI\fR -[[--] \fI\fR] - +.PP +\f[B]bp\f[R] [\f[I]options\&...\f[R]] \f[I]pattern\f[R] [[--] +\f[I]files\&...\f[R]] .SH DESCRIPTION -\fBbp\fR is a tool that matches parsing expression grammars using a custom syntax. - +.PP +\f[B]bp\f[R] is a tool that matches parsing expression grammars using a +custom syntax. .SH OPTIONS -.B \-v\fR, \fB--verbose +.TP +\f[B]-v\f[R], \f[B]--verbose\f[R] Print debugging information. - -.B \-e\fR, \fB--explain +.TP +\f[B]-e\f[R], \f[B]--explain\f[R] Print a visual explanation of the matches. - -.B \-j\fR, \fB--json -Print a JSON list of the matches. (Pairs with \fB--verbose\fR for more detail) - -.B \-l\fR, \fB--list-files -Print only the names of files containing matches instead of the matches themselves. - -.B \-i\fR, \fB--ignore-case +.TP +\f[B]-j\f[R], \f[B]--json\f[R] +Print a JSON list of the matches. +(Pairs with \f[B]--verbose\f[R] for more detail) +.TP +\f[B]-l\f[R], \f[B]--list-files\f[R] +Print only the names of files containing matches instead of the matches +themselves. +.TP +\f[B]-i\f[R], \f[B]--ignore-case\f[R] Perform pattern matching case-insensitively. - -.B \-I\fR, \fB--inplace -Perform filtering or replacement in-place (i.e. overwrite files with new content). - -.B \-C\fR, \fB--confirm -During in-place modification of a file, confirm before each modification. - -.B \-r\fR, \fB--replace \fI\fR +.TP +\f[B]-I\f[R], \f[B]--inplace\f[R] +Perform filtering or replacement in-place (i.e.\ overwrite files with +new content). +.TP +\f[B]-C\f[R], \f[B]--confirm\f[R] +During in-place modification of a file, confirm before each +modification. +.TP +\f[B]-r\f[R], \f[B]--replace\f[R] \f[I]replacement\f[R] Replace all occurrences of the main pattern with the given string. - -.B \-s\fR, \fB--skip \fI\fR -While looking for matches, skip over \fB\fR occurrences. This can -be useful for behavior like \fBbp -s string\fR (avoiding matches inside string -literals). - -.B \-g\fR, \fB--grammar \fI\fR +.TP +\f[B]-s\f[R], \f[B]--skip\f[R] \f[I]pattern\f[R] +While looking for matches, skip over \f[I]pattern\f[R] occurrences. +This can be useful for behavior like \f[B]bp -s string\f[R] (avoiding +matches inside string literals). +.TP +\f[B]-g\f[R], \f[B]--grammar\f[R] \f[I]grammar-file\f[R] Load the grammar from the given file. - -.B \-G\fR, \fB--git\fR -Use \fBgit\fR to get a list of files. Remaining file arguments (if any) are -passed to \fBgit --ls-files\fR instead of treated as literal files. - -.B \-c\fR, \fB--context \fI\fR -The number of lines of context to print. If \fI\fR is 0, print only the -exact text of the matches. If \fI\fR is "all", print the entire file. -Otherwise, if \fI\fR is a positive integer, print the whole line on which -matches occur, as well as the \fI\fR lines before and after the match. The -default value for this argument is 1 (print whole lines where matches occur). - -.B \-f\fR, \fB\--format \fIauto|fancy|plain\fR -Set the output format. \fIfancy\fR includes colors and line numbers, -\fIplain\fR includes neither, and \fIauto\fR (the default) uses \fIfancy\fR +See the \f[B]GRAMMAR FILES\f[R] section for more info. +.TP +\f[B]-G\f[R], \f[B]--git\f[R] +Use \f[B]git\f[R] to get a list of files. +Remaining file arguments (if any) are passed to \f[B]git --ls-files\f[R] +instead of treated as literal files. +.TP +\f[B]-c\f[R], \f[B]--context\f[R] \f[I]N\f[R] +The number of lines of context to print. +If \f[I]N\f[R] is 0, print only the exact text of the matches. +If \f[I]N\f[R] is \f[B]\f[CB]\[dq]all\[dq]\f[B]\f[R], print the entire +file. +Otherwise, if \f[I]N\f[R] is a positive integer, print the whole line on +which matches occur, as well as the \f[I]N-1\f[R] lines before and after +the match. +The default value for this argument is \f[B]1\f[R] (print whole lines +where matches occur). +.TP +\f[B]-f\f[R], \f[B]--format\f[R] \f[I]auto\f[R]|\f[I]fancy\f[R]|\f[I]plain\f[R] +Set the output format. +\f[I]fancy\f[R] includes colors and line numbers, \f[I]plain\f[R] +includes neither, and \f[I]auto\f[R] (the default) uses \f[I]fancy\f[R] formatting only when the output is a TTY. - -.B \--help +.TP +\f[B]--help\f[R] Print the usage and exit. - -.B -The main pattern for bp to match. By default, this pattern is a string -pattern (see the \fBSTRING PATTERNS\fR section below). - -.B -The input files to search. If no input files are provided and data was -piped in, that data will be used instead. If neither are provided, -\fBbp\fR will search through all files in the current directory and -its subdirectories (recursively). - -.SH PATTERNS -bp patterns are based off of a combination of Parsing Expression Grammars -and regular expression syntax. The syntax is designed to map closely to -verbal descriptions of the patterns, and prefix operators are preferred over -suffix operators (as is common in regex syntax). - -Some patterns additionally have "multi-line" variants, which means that they -include the newline character. - -.I -A sequence: \fI\fR followed by \fI\fR - -.I \fB/\fI \fR -A choice: \fI\fR, or if it doesn't match, then \fI\fR - -.B . -Any character (excluding newline) - -.B ^ -Start of a line - -.B ^^ -Start of the text - -.B $ -End of a line (does not include newline character) - -.B $$ -End of the text - -.B _ -Zero or more whitespace characters (specifically, spaces and tabs) - -.B __ -Zero or more whitespace or newline characters - -.B "foo" -.B 'foo' -The literal string \fIstring\fR. Escape sequences are not allowed. - -.B {foo} -The literal string \fIfoo\fR with word boundaries on either end. Escape sequences are not allowed. - -.B `\fI\fR -The literal character \fI\fR (e.g. \fB`@\fR matches the "@" character) - -.B `\fI\fB,\fI\fR -The literal character \fI\fR or \fI\fR (e.g. \fB`a,e,i,o,u\fR) - -.B `\fI\fB-\fI\fR -The character range \fI\fR to \fI\fR (e.g. \fB`a-z\fR). -Multiple ranges can be combined with a comma (e.g. \fB`a-z,A-Z\fR). - -.B \\\\\fI\fR -An escape sequence (e.g. \fB\\n\fR, \fB\\x1F\fR, \fB\\033\fR, etc.) - -.B \\\\\fI\fB-\fI\fR -An escape sequence range from \fI\fR to \fI\fR (e.g. \fB\\x00-x1F\fR) - -.B \\\\N -A special case escape that matches a "nodent": one or more newlines followed by -the same indentation that occurs on the current line. - -.B !\fI\fR -Not \fI\fR - -.B [\fI\fB] -Maybe \fI\fR - -.B \fI \fR -Exactly \fIN\fR repetitions of \fI\fR (e.g. \fB5 `*\fR matches "*****") - -.B \fI\fB-\fI \fR -Between \fI\fR and \fI\fR repetitions of \fI\fR (e.g. \fB2-3 `*\fR) - -.B \fI\fB+ \fI\fR -At least \fI\fR or more repetitions of \fI\fR (e.g. \fB 2+ `*\fR) - -.B *\fI\fR -Some \fI\fRs (zero or more) - -.B +\fI\fR -At least one \fI\fRs - -.B \fI\fR \fB%\fI \fR -\fI\fR separated by \fI\fR (e.g. \fB*word % `,\fR matches -zero or more comma-separated words) - -.B .. \fI\fR -Any text (except newlines) up to and including \fI\fR - -.B .. % \fI\fR \fI\fB -Any text (except newlines) up to and including \fI\fR, skipping over -instances of \fI\fR (e.g. \fB`"..`" % (`\\.)\fR) - -.B <\fI\fR -Just after \fI\fR (lookbehind) - -.B >\fI\fR -Just before \fI\fR (lookahead) - -.B @\fI\fR -Capture \fI\fR - -.B @\fI\fB=\fI\fR -Let \fI\fR equal \fI\fR (named capture). Named captures can be used -as backreferences like so: \fB@foo=word `( foo `)\fR (matches "asdf(asdf)" or -"baz(baz)", but not "foo(baz)") - -.B \fI\fB => '\fI\fB' -Replace \fI\fR with \fI\fR. Note: \fI\fR should -be a string, and it may contain references to captured values: \fB@0\fR -(the whole of \fI\fR), \fB@1\fR (the first capture in \fI\fR), -\fB@\fIfoo\fR (the capture named \fIfoo\fR in \fI\fR), etc. -For example, \fB@word _ @rest=(*word % _) => "@rest @1"\fR - -.B \fI\fB == \fI\fR -Matches \fI\fR, if and only if \fI\fR also matches the text of -\fI\fR's match. (e.g. \fBword == ("foo_" *.)\fR matches words that start -with "foo_") - -.B \fI\fB != \fI\fR -Matches \fI\fR, if and only if \fI\fR does not match the text of -\fI\fR's match. (e.g. \fBword == ("foo_" *.)\fR matches words that do not -start with "foo_") - -.B \fI\fB: \fI\fR -Define \fI\fR to mean \fI\fR (pattern definition) - -.B # \fI\fR -A line comment - +.TP +\f[I]pattern\f[R] +The main pattern for bp to match. +By default, this pattern is a string pattern (see the \f[B]STRING +PATTERNS\f[R] section below). +.TP +\f[I]files\&...\f[R] +The input files to search. +If no input files are provided and data was piped in, that data will be +used instead. +If neither are provided, \f[B]bp\f[R] will search through all files in +the current directory and its subdirectories (recursively). .SH STRING PATTERNS -One of the most common use cases for pattern matching tools is matching plain, -literal strings, or strings that are primarily plain strings, with one or two -patterns. \fBbp\fR is designed around this fact. The default mode for bp -patterns is "string pattern mode". In string pattern mode, all characters -are interpreted literally except for the backslash (\fB\\\fR), which may be -followed by a bp pattern (see the \fBPATTERNS\fR section above). Optionally, -the bp pattern may be terminated by a semicolon (\fB;\fR). - +.PP +One of the most common use cases for pattern matching tools is matching +plain, literal strings, or strings that are primarily plain strings, +with one or two patterns. +\f[B]bp\f[R] is designed around this fact. +The default mode for bp patterns is \[lq]string pattern mode\[rq]. +In string pattern mode, all characters are interpreted literally except +for the backslash (\f[B]\[rs]\f[R]), which may be followed by a bp +pattern (see the \f[B]PATTERNS\f[R] section above). +Optionally, the bp pattern may be terminated by a semicolon +(\f[B];\f[R]). +.SH PATTERNS +.PP +\f[B]bp\f[R] patterns are based off of a combination of Parsing +Expression Grammars and regular expression syntax. +The syntax is designed to map closely to verbal descriptions of the +patterns, and prefix operators are preferred over suffix operators (as +is common in regex syntax). +.PP +Some patterns additionally have \[lq]multi-line\[rq] variants, which +means that they include the newline character. +.TP +\f[I]pat1 pat2\f[R] +A sequence: \f[I]pat1\f[R] followed by \f[I]pat2\f[R] +.TP +\f[I]pat1\f[R] \f[B]/\f[R] \f[I]pat2\f[R] +A choice: \f[I]pat1\f[R], or if it doesn\[aq]t match, then +\f[I]pat2\f[R] +.TP +\f[B].\f[R] +Any character (excluding newline) +.TP +\f[B]\[ha]\f[R] +Start of a line +.TP +\f[B]\[ha]\[ha]\f[R] +Start of the text +.TP +\f[B]$\f[R] +End of a line (does not include newline character) +.TP +\f[B]$$\f[R] +End of the text +.TP +\f[B]_\f[R] +Zero or more whitespace characters, including spaces and tabs, but not +newlines. +.TP +\f[B]__\f[R] +Zero or more whitespace characters, including spaces, tabs, newlines, +and comments. +Comments are undefined by default, but may be defined by a separate +grammar file. +See the \f[B]GRAMMAR FILES\f[R] section for more info. +.TP +\f[B]\[dq]foo\[dq]\f[R], \f[B]\[aq]foo\[aq]\f[R] +The literal string \f[B]\[lq]foo\[rq]\f[R]. +Single and double quotes are treated the same. +Escape sequences are not allowed. +.TP +\f[B]{foo}\f[R] +The literal string \f[B]\[lq]foo\[rq]\f[R] with word boundaries on +either end. +Escape sequences are not allowed. +.TP +\f[B]\[ga]\f[R]\f[I]c\f[R] +The literal character \f[I]c\f[R] (e.g.\ **\[ga]\[at]** matches the +\[lq]\[at]\[rq] character) +.TP +\f[B]\[ga]\f[R]\f[I]c1\f[R]\f[B],\f[R]\f[I]c2\f[R] +The literal character \f[I]c1\f[R] or \f[I]c2\f[R] +(e.g.\ \f[B]\[ga]a,e,i,o,u\f[R]) +.TP +\f[B]\[ga]\f[R]\f[I]c1\f[R]\f[B]-\f[R]\f[I]c2\f[R] +The character range \f[I]c1\f[R] to \f[I]c2\f[R] +(e.g.\ \f[B]\[ga]a-z\f[R]). +Multiple ranges can be combined with a comma +(e.g.\ \f[B]\[ga]a-z,A-Z\f[R]). +.TP +\f[B]\[rs]\f[R]\f[I]esc\f[R] +An escape sequence (e.g.\ \f[B]\[rs]n\f[R], \f[B]\[rs]x1F\f[R], +\f[B]\[rs]033\f[R], etc.) +.TP +\f[B]\[rs]\f[R]\f[I]esc1\f[R]\f[B]-\f[R]\f[I]esc2\f[R] +An escape sequence range from \f[I]esc1\f[R] to \f[I]esc2\f[R] +(e.g.\ \f[B]\[rs]x00-x1F\f[R]) +.TP +\f[B]\[rs]N\f[R] +A special case escape that matches a \[lq]nodent\[rq]: one or more +newlines followed by the same indentation that occurs on the current +line. +.TP +\f[B]!\f[R] \f[I]pat\f[R] +Not \f[I]pat\f[R] +.TP +\f[B][\f[R] \f[I]pat\f[R] \f[B]]\f[R] +Maybe \f[I]pat\f[R] +.TP +\f[I]N\f[R] \f[I]pat\f[R] +Exactly \f[I]N\f[R] repetitions of \f[I]pat\f[R] (e.g.\ \f[B]5 +\[ga]x\f[R] matches \f[B]\[lq]xxxxx\[rq]\f[R]) +.TP +\f[I]N\f[R] \f[B]-\f[R] \f[I]M\f[R] \f[I]pat\f[R] +Between \f[I]N\f[R] and \f[I]M\f[R] repetitions of \f[I]pat\f[R] +(e.g.\ \f[B]2-3 \[ga]x\f[R] matches \f[B]\[lq]xx\[rq]\f[R] or +\f[B]\[lq]xxx\[rq]\f[R]) +.TP +\f[I]N\f[R]\f[B]+\f[R] \f[I]pat\f[R] +At least \f[I]N\f[R] or more repetitions of \f[I]pat\f[R] (e.g.\ \f[B]2+ +\[ga]x\f[R] matches \f[B]\[lq]xx\[rq]\f[R], \f[B]\[lq]xxx\[rq]\f[R], +\f[B]\[lq]xxxx\[rq]\f[R], etc.) +.TP +\f[B]*\f[R] \f[I]pat\f[R] +Some \f[I]pat\f[R]s (zero or more, e.g.\ \f[B]* \[ga]x\f[R] matches +\f[B]\[dq]\[lq]\f[R], \f[B]\[rq]x\[lq]\f[R], \f[B]\[rq]xx\[dq]\f[R], +etc.) +.TP +\f[B]+\f[R] \f[I]pat\f[R] +At least one \f[I]pat\f[R]s (e.g.\ \f[B]+ \[ga]x\f[R] matches +\f[B]\[lq]x\[rq]\f[R], \f[B]\[lq]xx\[rq]\f[R], \f[B]\[lq]xxx\[rq]\f[R], +etc.) +.TP +\f[I]repeating-pat\f[R] \f[B]%\f[R] \f[I]sep\f[R] +\f[I]repeating-pat\f[R] separated by \f[I]sep\f[R] (e.g.\ \f[B]*word % +\[ga],\f[R] matches zero or more comma-separated words) +.TP +\f[B]..\f[R] \f[I]pat\f[R] +Any text (except newlines) up to and including \f[I]pat\f[R] +.TP +\f[B].. %\f[R] \f[I]skip\f[R] \f[I]pat\f[R] +Any text (except newlines) up to and including \f[I]pat\f[R], skipping +over instances of \f[I]skip\f[R] (e.g.\ \f[B]\[ga]\[dq]..\[ga]\[dq] % +(\[ga]\[rs].)\f[R]) +.TP +\f[B]<\f[R] \f[I]pat\f[R] +Just after \f[I]pat\f[R] (lookbehind) +.TP +\f[B]>\f[R] \f[I]pat\f[R] +Just before \f[I]pat\f[R] (lookahead) +.TP +\f[B]\[at]\f[R] \f[I]pat\f[R] +Capture \f[I]pat\f[R] +.TP +\f[B]foo\f[R] +The named pattern whose name is \f[B]\[lq]foo\[rq]\f[R]. +Pattern names come from definitions in grammar files or from named +captures. +Pattern names may contain dashes (\f[B]-\f[R]), but not underscores +(\f[B]_\f[R]), since the underscore is used to match whitespace. +See the \f[B]GRAMMAR FILES\f[R] section for more info. +.TP +\f[B]\[at]\f[R] \f[I]name\f[R] \f[B]=\f[R] \f[I]pat\f[R] +Let \f[I]name\f[R] equal \f[I]pat\f[R] (named capture). +Named captures can be used as backreferences like so: \f[B]\[at]foo=word +\[ga]( foo \[ga])\f[R] (matches \f[B]\[lq]asdf(asdf)\[rq]\f[R] or +\f[B]\[lq]baz(baz)\[rq]\f[R], but not \f[B]\[lq]foo(baz)\[rq]\f[R]) +.TP +\f[I]pat\f[R] \f[B]=> \[aq]\f[R]\f[I]replacement\f[R]\f[B]\[aq]\f[R] +Replace \f[I]pat\f[R] with \f[I]replacement\f[R]. +Note: \f[I]replacement\f[R] should be a string, and it may contain +references to captured values: \f[B]\[at]0\f[R] (the whole of +\f[I]pat\f[R]), \f[B]\[at]1\f[R] (the first capture in \f[I]pat\f[R]), +\f[B]\[at]\f[R]\f[I]foo\f[R] (the capture named \f[I]foo\f[R] in +\f[I]pat\f[R]), etc. +For example, \f[B]\[at]word _ \[at]rest=(*word % _) => \[dq]\[at]rest +\[at]1\[dq]\f[R] +.TP +\f[I]pat1\f[R] \f[B]==\f[R] \f[I]pat2\f[R] +Matches \f[I]pat1\f[R], if and only if \f[I]pat2\f[R] also matches the +text of \f[I]pat1\f[R]\[aq]s match. +(e.g.\ \f[B]word == (\[dq]foo_\[dq] *.)\f[R] matches words that start +with \f[B]\[lq]foo_\[rq]\f[R]) +.TP +\f[I]pat1\f[R] \f[B]!=\f[R] \f[I]pat2\f[R] +Matches \f[I]pat1\f[R], if and only if \f[I]pat2\f[R] does not match the +text of \f[I]pat1\f[R]\[aq]s match. +(e.g.\ \f[B]word == (\[dq]foo_\[dq] *.)\f[R] matches words that do not +start with \f[B]\[lq]foo_\[rq]\f[R]) +.TP +\f[I]name\f[R]\f[B]:\f[R] \f[I]pat\f[R] +Define \f[I]name\f[R] to mean \f[I]pat\f[R] (pattern definition) +.TP +\f[B]#\f[R] \f[I]comment\f[R] +A line comment +.SH GRAMMAR FILES +.PP +\f[B]bp\f[R] allows loading extra grammar files, which define patterns +which may be used for matching. +The \f[B]builtins\f[R] grammar file is loaded by default, and it defines +a few useful general-purpose patterns. +For example, it defines the \f[B]parens\f[R] rule, which matches pairs +of matching parentheses, accounting for nested inner parentheses: +.IP +.nf +\f[C] +bp -p \[aq]\[dq]my_func\[dq] parens\[aq] +\f[R] +.fi +.PP +\f[B]bp\f[R] also comes with a few grammar files for common programming +languages, which may be loaded on demand. +These grammar files are not comprehensive syntax definitions, but only +some common patterns. +For example, the c++ grammar file contains definitions for +\f[B]//\f[R]-style line comments as well as \f[B]/*\&...*/\f[R]-style +block comments. +Thus, you can find all comments with the string \[lq]TODO\[rq] with the +following command: +.IP +.nf +\f[C] +bp -g c++ -p \[aq]comment==(..%\[rs]n \[dq]TODO\[dq] ..%\[rs]n$$)\[aq] *.cpp +\f[R] +.fi .SH EXAMPLES .TP -.B -ls | bp foo -Find files containing the string "foo" (a string pattern) - +\f[B]ls | bp foo\f[R] +Find files containing the string \[dq]foo\[dq] (a string pattern) .TP -.B -ls | bp '.c\\$' -r '.h' -Find files ending with ".c" and replace the extension with ".h" - +\f[B]ls | bp \[aq].c\[rs]$\[aq] -r \[aq].h\[aq]\f[R] +Find files ending with \[dq].c\[dq] and replace the extension with +\[dq].h\[dq] .TP -.B -bp -p '{foobar} parens' my_file.py -Find the literal string \fB"foobar"\fR, assuming it's a complete word, followed -by a pair of matching parentheses in the file \fImy_file.py\fR - +\f[B]bp -p \[aq]{foobar} parens\[aq] my_file.py\f[R] +Find the literal string \f[B]\[dq]foobar\[dq]\f[R], assuming it\[aq]s a +complete word, followed by a pair of matching parentheses in the file +\f[I]my_file.py\f[R] .TP -.B -bp -g html -p html-element -D matching-tag=a foo.html -Using the \fIhtml\fR grammar, find all \fIhtml-element\fRs matching -the tag \fIa\fR in the file \fIfoo.html\fR - - -.SH AUTHOR -Bruce Hill (bruce@bruce-hill.com) +\f[B]bp -g html -p `html-element==(\[dq]** *pat* +: Just before *pat* (lookahead) + +**\@** *pat* +: Capture *pat* + +**foo** +: The named pattern whose name is **"foo"**. Pattern names come from definitions in +grammar files or from named captures. Pattern names may contain dashes (**-**), +but not underscores (**\_**), since the underscore is used to match whitespace. +See the **GRAMMAR FILES** section for more info. + +**\@** *name* **=** *pat* +: Let *name* equal *pat* (named capture). Named captures can be used as +backreferences like so: **\@foo=word \`( foo \`)** (matches **"asdf(asdf)"** or +**"baz(baz)"**, but not **"foo(baz)"**) + +*pat* **=\> \'***replacement***\'** +: Replace *pat* with *replacement*. Note: *replacement* should be a +string, and it may contain references to captured values: **\@0** (the whole of +*pat*), **\@1** (the first capture in *pat*), **\@***foo* (the capture +named *foo* in *pat*), etc. For example, **\@word \_ \@rest=(\*word % \_) +=\> \"\@rest \@1\"** + +*pat1* **==** *pat2* +: Matches *pat1*, if and only if *pat2* also matches the text of +*pat1*\'s match. (e.g. **word == (\"foo\_\" \*.)** matches words that start +with **"foo\_"**) + +*pat1* **!=** *pat2* +: Matches *pat1*, if and only if *pat2* does not match the text of +*pat1*\'s match. (e.g. **word == (\"foo\_\" \*.)** matches words that do +not start with **"foo\_"**) + +*name***:** *pat* +: Define *name* to mean *pat* (pattern definition) + +**\#** *comment* +: A line comment + + +# GRAMMAR FILES + +**bp** allows loading extra grammar files, which define patterns which may be +used for matching. The **builtins** grammar file is loaded by default, and it +defines a few useful general-purpose patterns. For example, it defines the +**parens** rule, which matches pairs of matching parentheses, accounting for +nested inner parentheses: + +``` +bp -p '"my_func" parens' +``` + +**bp** also comes with a few grammar files for common programming languages, +which may be loaded on demand. These grammar files are not comprehensive syntax +definitions, but only some common patterns. For example, the c++ grammar file +contains definitions for **//**-style line comments as well as +**/\*...\*/**-style block comments. Thus, you can find all comments with the +string "TODO" with the following command: + +``` +bp -g c++ -p 'comment==(..%\n "TODO" ..%\n$$)' *.cpp +``` + + +# EXAMPLES + +**ls \| bp foo** +: Find files containing the string \"foo\" (a string pattern) + +**ls \| bp \'.c\\\$\' -r \'.h\'** +: Find files ending with \".c\" and replace the extension with \".h\" + +**bp -p \'{foobar} parens\' my_file.py** +: Find the literal string **\"foobar\"**, assuming it\'s a complete word, +followed by a pair of matching parentheses in the file *my_file.py* + +**bp -g html -p 'html-element==(\"\