aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--README.md5
-rw-r--r--bp.114
-rw-r--r--bp.1.md12
-rw-r--r--grammars/bp.bp4
-rw-r--r--grammars/html.bp4
-rw-r--r--grammars/lua.bp4
-rw-r--r--grammars/shell.bp2
-rw-r--r--match.c2
-rw-r--r--pattern.c9
-rw-r--r--pattern.h1
-rw-r--r--tests/24-backref.sh2
11 files changed, 39 insertions, 20 deletions
diff --git a/README.md b/README.md
index 5e7d716..7e7d714 100644
--- a/README.md
+++ b/README.md
@@ -74,8 +74,9 @@ Pattern | Meaning
`+pat` | 1 or more occurrences of `pat` (shorthand for `1+pat`)
`<pat` | `pat` matches just before the current position (lookbehind)
`>pat` | `pat` matches just in front of the current position (lookahead)
-`@pat` | Capture `pat` (used for text replacement and backreferences)
-`@foo=pat` | Let `foo` be the text of `pat` (used for text replacement and backreferences)
+`@pat` | Capture `pat` (used for text replacement)
+`@foo=pat` | Capture `pat` with the name `foo` attached (used for text replacement)
+`@foo:pat` | Let `foo` be the text of `pat` (used for backreferences)
`pat => "replacement"` | Match `pat` and replace it with `replacement`
`(pat1 @keep=pat2) => "@keep"` | Match `pat1` followed by `pat2` and replace it with the text of `pat2`
`pat1~pat2` | `pat1` when `pat2` can be found within the result
diff --git a/bp.1 b/bp.1
index 2524840..2c53ca6 100644
--- a/bp.1
+++ b/bp.1
@@ -305,7 +305,8 @@ against the edges file or line.
Matches \f[I]pat\f[R], but does not consume any input (lookahead).
.TP
\f[B]\[at]\f[R] \f[I]pat\f[R]
-Capture \f[I]pat\f[R]
+Capture \f[I]pat\f[R].
+Captured patterns can be used in replacements.
.TP
\f[B]foo\f[R]
The named pattern whose name is \f[B]\[lq]foo\[rq]\f[R].
@@ -315,11 +316,16 @@ Pattern names may contain dashes (\f[B]-\f[R]), but not underscores
(\f[B]_\f[R]), since the underscore is used to match whitespace.
See the \f[B]GRAMMAR FILES\f[R] section for more info.
.TP
+\f[B]\[at]\f[R] \f[I]name\f[R] \f[B]:\f[R] \f[I]pat\f[R]
+For the rest of the current chain, define \f[I]name\f[R] to match
+whatever \f[I]pat\f[R] matches, i.e.\ a backreference.
+For example, \f[B]\[at]foo:word \[ga]( foo \[ga])\f[R] (matches
+\f[B]\[lq]asdf(asdf)\[rq]\f[R] or \f[B]\[lq]baz(baz)\[rq]\f[R], but not
+\f[B]\[lq]foo(baz)\[rq]\f[R])
+.TP
\f[B]\[at]\f[R] \f[I]name\f[R] \f[B]=\f[R] \f[I]pat\f[R]
Let \f[I]name\f[R] equal \f[I]pat\f[R] (named capture).
-Named captures can be used as backreferences like so: \f[B]\[at]foo=word
-\[ga]( foo \[ga])\f[R] (matches \f[B]\[lq]asdf(asdf)\[rq]\f[R] or
-\f[B]\[lq]baz(baz)\[rq]\f[R], but not \f[B]\[lq]foo(baz)\[rq]\f[R])
+Named captures can be used in text replacements.
.TP
\f[I]pat\f[R] \f[B]=>\f[R] \f[B]\[dq]\f[R]\f[I]replacement\f[R]\f[B]\[dq]\f[R]
Replace \f[I]pat\f[R] with \f[I]replacement\f[R].
diff --git a/bp.1.md b/bp.1.md
index 869ea42..9473d24 100644
--- a/bp.1.md
+++ b/bp.1.md
@@ -252,7 +252,7 @@ against the edges file or line.
: Matches *pat*, but does not consume any input (lookahead).
`@` *pat*
-: Capture *pat*
+: Capture *pat*. Captured patterns can be used in replacements.
`foo`
: The named pattern whose name is **"foo"**. Pattern names come from
@@ -260,10 +260,14 @@ definitions in grammar files or from named captures. Pattern names may contain
dashes (`-`), but not underscores (`_`), since the underscore is used to match
whitespace. See the **GRAMMAR FILES** section for more info.
+`@` *name* `:` *pat*
+: For the rest of the current chain, define *name* to match whatever *pat*
+matches, i.e. a backreference. For example, `` @foo:word `( foo `) `` (matches
+**"asdf(asdf)"** or **"baz(baz)"**, but not **"foo(baz)"**)
+
`@` *name* `=` *pat*
-: Let *name* equal *pat* (named capture). Named captures can be used as
-backreferences like so: `` @foo=word `( foo `) `` (matches **"asdf(asdf)"** or
-**"baz(baz)"**, but not **"foo(baz)"**)
+: Let *name* equal *pat* (named capture). Named captures can be used in
+text replacements.
*pat* `=>` `"`*replacement*`"`
: Replace *pat* with *replacement*. Note: *replacement* should be a string
diff --git a/grammars/bp.bp b/grammars/bp.bp
index 2782934..a77881b 100644
--- a/grammars/bp.bp
+++ b/grammars/bp.bp
@@ -6,7 +6,7 @@
# language grammars.
Grammar: __ *(Def [__`;])%__ __ [@error=(+(./\n) => "Could not parse this code: @0")]
-Def: @name=id __ `: __ (
+Def: @name=id __ 1-2`: __ (
@definition=extended-pat
/ $$ @error=(=>"No definition for rule")
/ @error=(..%\n>(`;/id_`:/$) => "Invalid definition: @0"))
@@ -62,7 +62,7 @@ Repeat: (
Optional: `[ __ extended-pat (__`] / @error=(=>"Expected closing square bracket here"))
After: `< __ pat
Before: `> __ pat
-Capture: `@ [__ @capture-name=(id/`!) __ !"=>" `=] __ (@capture=pat / @error=(=>"Expected pattern to capture"))
+Capture: `@ [__ @capture-name=(id/`!) __ !"=>" `=,:] __ (@capture=pat / @error=(=>"Expected pattern to capture"))
Replace: (
@replace-pat=[Chain-noreplace / pat] __ "=>" (__ @replacement=String / @error=(=>"Expected replacement string"))
)
diff --git a/grammars/html.bp b/grammars/html.bp
index 1ed83df..1000670 100644
--- a/grammars/html.bp
+++ b/grammars/html.bp
@@ -14,9 +14,9 @@ void-element: `< ("area"/"base"/"br"/"col"/"embed"/"hr"/"img"/"input"/"link"/"me
template-element: "<template>" ..%(\n / comment / element) "</template>"
-raw-element: `< @tag=("script"/"style"/"textarea"/"title") __attributes__ `> ..%\n ("</"tag__`>)
+raw-element: `< @tag:("script"/"style"/"textarea"/"title") __attributes__ `> ..%\n ("</"tag__`>)
-normal-element: `< @tag=id __attributes__ `> ..%(\n / comment / element) ("</"tag__`>)
+normal-element: `< @tag:id __attributes__ `> ..%(\n / comment / element) ("</"tag__`>)
comment: "<!--" ..%\n "-->"
diff --git a/grammars/lua.bp b/grammars/lua.bp
index 3d3b862..4f417d2 100644
--- a/grammars/lua.bp
+++ b/grammars/lua.bp
@@ -6,8 +6,8 @@
# full parse tree, and having one makes the task considerably more complicated.
# See the accompanying README.md for more info.
-comment: "--" (`[ @eqs=*`= `[ ..%\n (`]eqs`]) / ..$)
-string: `"..%string-escape `" / `' ..%string-escape `' / `[ @eqs=*`= `[ ..%\n (`]eqs`])
+comment: "--" (`[ @eqs:*`= `[ ..%\n (`]eqs`]) / ..$)
+string: `"..%string-escape `" / `' ..%string-escape `' / `[ @eqs:*`= `[ ..%\n (`]eqs`])
table: `{ ..%(table/string/comment/\n) `}
keyword: ("and" / "break" / "do" / "else" / "elseif" / "end" / "false" / "for" /
"function" / "goto" / "if" / "in" / "local" / "nil" / "not" / "or" /
diff --git a/grammars/shell.bp b/grammars/shell.bp
index 76cdcfa..d305d6c 100644
--- a/grammars/shell.bp
+++ b/grammars/shell.bp
@@ -7,7 +7,7 @@
# See the accompanying README.md for more info.
comment: `#..$
-string: `" ..%(string-escape / subcommand / \n) `" / `' ..%\n `' / "<<" _ @delim=id _$ ..%\n (^delim$)
+string: `" ..%(string-escape / subcommand / \n) `" / `' ..%\n `' / "<<" _ @delim:id _$ ..%\n (^delim$)
string-escape: `\ `",`
subcommand: `` ..%\n `` / "$" (parens/braces)
keyword: ("echo" / "read" / "set" / "unset" / "readonly" / "shift" / "export" / "if" / "fi" /
diff --git a/match.c b/match.c
index 474bd85..a9f78ec 100644
--- a/match.c
+++ b/match.c
@@ -562,7 +562,7 @@ static match_t *match(match_ctx_t *ctx, const char *str, pat_t *pat)
match_t *m2;
// Push backrefs and run matching, then cleanup
- if (m1->pat->type == BP_CAPTURE && m1->pat->args.capture.name) {
+ if (m1->pat->type == BP_CAPTURE && m1->pat->args.capture.name && m1->pat->args.capture.backreffable) {
// Temporarily add a rule that the backref name matches the
// exact string of the original match (no replacements)
pat_t *backref;
diff --git a/pattern.c b/pattern.c
index 53970ac..edd007d 100644
--- a/pattern.c
+++ b/pattern.c
@@ -479,7 +479,13 @@ static pat_t *_bp_simplepattern(const char *str, const char *end, bool inside_st
size_t namelen = 0;
const char *a = after_name(str, end);
const char *eq = a;
- if (a > str && !matchstr(&eq, "=>", false, end) && matchchar(&eq, '=', false, end)) {
+ bool backreffable = false;
+ if (a > str && matchchar(&eq, ':', false, end)) {
+ name = str;
+ namelen = (size_t)(a-str);
+ str = eq;
+ backreffable = true;
+ } else if (a > str && !matchstr(&eq, "=>", false, end) && matchchar(&eq, '=', false, end)) {
name = str;
namelen = (size_t)(a-str);
str = eq;
@@ -492,6 +498,7 @@ static pat_t *_bp_simplepattern(const char *str, const char *end, bool inside_st
capture->args.capture.capture_pat = pat;
capture->args.capture.name = name;
capture->args.capture.namelen = namelen;
+ capture->args.capture.backreffable = backreffable;
return capture;
}
// Start of file/line
diff --git a/pattern.h b/pattern.h
index 89d57b8..16b9654 100644
--- a/pattern.h
+++ b/pattern.h
@@ -82,6 +82,7 @@ typedef struct pat_s {
struct pat_s *capture_pat;
const char *name;
size_t namelen;
+ bool backreffable;
} capture;
struct {
struct match_s *match;
diff --git a/tests/24-backref.sh b/tests/24-backref.sh
index 01a6280..28a203b 100644
--- a/tests/24-backref.sh
+++ b/tests/24-backref.sh
@@ -1,3 +1,3 @@
# With @-capturing, you can reference previous captures
# Example: bp -p '@first=`a-z .. first' matches "aba" and "xyzx", but not "abc"
-bp -p '@first=+Abc _ +Abc _ first'
+bp -p '@first:+Abc _ +Abc _ first'