diff --git a/README.md b/README.md
index 18e4fed..9e1d4d3 100644
--- a/README.md
+++ b/README.md
@@ -70,8 +70,8 @@ Pattern | Meaning
`@foo=pat` | Let `foo` be the text of `pat` (used for text replacement and backreferences)
`pat => "replacement"` | Match `pat` and replace it with `replacement`
`(pat1 @keep=pat2) => "@keep"` | Match `pat1` followed by `pat2` and replace it with the text of `pat2`
-`pat1==pat2` | `pat1`, assuming `pat2` also matches with the same length
-`pat1!=pat2` | `pat1`, unless `pat2` also matches with the same length
+`pat1~pat2` | `pat1` when `pat2` can be found within the result
+`pat1!~pat2` | `pat1` when `pat2` can not be found within the result
`name:pat2` | `name` is defined to mean `pat`
`# line comment` | A line comment
diff --git a/bp.1 b/bp.1
index 2ff84dc..4921be7 100644
--- a/bp.1
+++ b/bp.1
@@ -256,17 +256,17 @@ references to captured values: \f[B]\[at]0\f[R] (the whole of
For example, \f[B]\[at]word _ \[at]rest=(*word % _) => \[dq]\[at]rest
\[at]1\[dq]\f[R]
.TP
-\f[I]pat1\f[R] \f[B]==\f[R] \f[I]pat2\f[R]
-Matches \f[I]pat1\f[R], if and only if \f[I]pat2\f[R] also matches the
-text of \f[I]pat1\f[R]\[aq]s match.
-(e.g.\ \f[B]word == (\[dq]foo_\[dq] *.)\f[R] matches words that start
-with \f[B]\[lq]foo_\[rq]\f[R])
+\f[I]pat1\f[R] \f[B]\[ti]\f[R] \f[I]pat2\f[R]
+Matches when \f[I]pat1\f[R] matches and \f[I]pat2\f[R] can be found
+within the text of that match.
+(e.g.\ \f[B]comment \[ti] {TODO}\f[R] matches comments that contain the
+word \f[B]\[lq]TODO\[rq]\f[R])
.TP
-\f[I]pat1\f[R] \f[B]!=\f[R] \f[I]pat2\f[R]
-Matches \f[I]pat1\f[R], if and only if \f[I]pat2\f[R] does not match the
-text of \f[I]pat1\f[R]\[aq]s match.
-(e.g.\ \f[B]word == (\[dq]foo_\[dq] *.)\f[R] matches words that do not
-start with \f[B]\[lq]foo_\[rq]\f[R])
+\f[I]pat1\f[R] \f[B]!\[ti]\f[R] \f[I]pat2\f[R]
+Matches when \f[I]pat1\f[R] matches, but \f[I]pat2\f[R] can not be found
+within the text of that match.
+(e.g.\ \f[B]comment \[ti] {IGNORE}\f[R] matches only comments that do
+not contain the word \f[B]\[lq]IGNORE\[rq]\f[R])
.TP
\f[I]name\f[R]\f[B]:\f[R] \f[I]pat\f[R]
Define \f[I]name\f[R] to mean \f[I]pat\f[R] (pattern definition)
@@ -295,12 +295,12 @@ some common patterns.
For example, the c++ grammar file contains definitions for
\f[B]//\f[R]-style line comments as well as \f[B]/*\&...*/\f[R]-style
block comments.
-Thus, you can find all comments with the string \[lq]TODO\[rq] with the
+Thus, you can find all comments with the word \[lq]TODO\[rq] with the
following command:
.IP
.nf
\f[C]
-bp -g c++ -p \[aq]comment==(..%\[rs]n \[dq]TODO\[dq] ..%\[rs]n$$)\[aq] *.cpp
+bp -g c++ -p \[aq]comment\[ti]{TODO}\[aq] *.cpp
\f[R]
.fi
.SH EXAMPLES
@@ -313,12 +313,15 @@ Find files ending with \[dq].c\[dq] and replace the extension with
\[dq].h\[dq]
.TP
\f[B]bp -p \[aq]{foobar} parens\[aq] my_file.py\f[R]
-Find the literal string \f[B]\[dq]foobar\[dq]\f[R], assuming it\[aq]s a
-complete word, followed by a pair of matching parentheses in the file
-\f[I]my_file.py\f[R]
+Find the word \f[B]\[dq]foobar\[dq]\f[R], followed by a pair of matching
+parentheses in the file \f[I]my_file.py\f[R]
.TP
-\f[B]bp -g html -p `html-element==(\[dq] \"\@rest \@1\"**
-*pat1* **==** *pat2*
-: Matches *pat1*, if and only if *pat2* also matches the text of
-*pat1*\'s match. (e.g. **word == (\"foo\_\" \*.)** matches words that start
-with **"foo\_"**)
+*pat1* **~** *pat2*
+: Matches when *pat1* matches and *pat2* can be found within the text of that
+match. (e.g. **comment ~ {TODO}** matches comments that contain the word
+**"TODO"**)
-*pat1* **!=** *pat2*
-: Matches *pat1*, if and only if *pat2* does not match the text of
-*pat1*\'s match. (e.g. **word == (\"foo\_\" \*.)** matches words that do
-not start with **"foo\_"**)
+*pat1* **!~** *pat2*
+: Matches when *pat1* matches, but *pat2* can not be found within the text of
+that match. (e.g. **comment ~ {IGNORE}** matches only comments that do not
+contain the word **"IGNORE"**)
*name***:** *pat*
: Define *name* to mean *pat* (pattern definition)
@@ -262,10 +262,10 @@ which may be loaded on demand. These grammar files are not comprehensive syntax
definitions, but only some common patterns. For example, the c++ grammar file
contains definitions for **//**-style line comments as well as
**/\*...\*/**-style block comments. Thus, you can find all comments with the
-string "TODO" with the following command:
+word "TODO" with the following command:
```
-bp -g c++ -p 'comment==(..%\n "TODO" ..%\n$$)' *.cpp
+bp -g c++ -p 'comment~{TODO}' *.cpp
```
@@ -278,9 +278,12 @@ bp -g c++ -p 'comment==(..%\n "TODO" ..%\n$$)' *.cpp
: Find files ending with \".c\" and replace the extension with \".h\"
**bp -p \'{foobar} parens\' my_file.py**
-: Find the literal string **\"foobar\"**, assuming it\'s a complete word,
-followed by a pair of matching parentheses in the file *my_file.py*
+: Find the word **\"foobar\"**, followed by a pair of matching parentheses in
+the file *my_file.py*
-**bp -g html -p 'html-element==(\"\ "Expected pattern after '~'"))
+Not-match-pat: @first=(suffixed-pat / simple-pat)__"!~"__@second=(pat / @!=(''=> "Expected pattern after '!~'"))
Dot: `. !`.
String: (
diff --git a/grammars/builtins.bp b/grammars/builtins.bp
index 6c0fc75..42a41ac 100644
--- a/grammars/builtins.bp
+++ b/grammars/builtins.bp
@@ -17,15 +17,15 @@ braces: `{ ..%(\n/braces/string) `}
parens: `( ..%(\n/parens/string) `)
string: `" ..%string-escape `" / `' ..%string-escape `'
string-escape: `\ (`x 2 Hex / 1-3 `0-7 / `u 1-4 Hex / .)
-left-id-edge: ^ / <(\x00-x7f!=id-char) / <((\xc0-xdf \x80-xbf)!=id-char)
- / <((\xe0-xef 2\x80-xbf)!=id-char) / <((\xf0-xf7 3\x80-xbf)!=id-char)
+left-id-edge: ^ / <(\x00-x7f!~(^^id-char)) / <((\xc0-xdf \x80-xbf)!~(^^id-char))
+ / <((\xe0-xef 2\x80-xbf)!~(^^id-char)) / <((\xf0-xf7 3\x80-xbf)!~(^^id-char))
right-id-edge: !id-char
-id: left-id-edge !`0-9 (+id-char)!=keyword
+id: left-id-edge !`0-9 !(keyword left-id-edge) +id-char
id-char: `a-z,A-Z,_,0-9
var: id
keyword: !"" # No keywords defined by default
-left-word-edge: ^ / <(\x00-x7f!=word-char) / <((\xc0-xdf \x80-xbf)!=word-char)
- / <((\xe0-xef 2\x80-xbf)!=word-char) / <((\xf0-xf7 3\x80-xbf)!=word-char)
+left-word-edge: ^ / <(\x00-x7f!~(^^word-char)) / <((\xc0-xdf \x80-xbf)!~(^^word-char))
+ / <((\xe0-xef 2\x80-xbf)!~(^^word-char)) / <((\xf0-xf7 3\x80-xbf)!~(^^word-char))
right-word-edge: !word-char
word-char: `a-z,A-Z,_,0-9,-,'
word: left-word-edge +word-char
diff --git a/grammars/html.bp b/grammars/html.bp
index 483db6a..457a183 100644
--- a/grammars/html.bp
+++ b/grammars/html.bp
@@ -8,15 +8,15 @@
doctype: "
-html-element: void-element / raw-element / template-element / normal-element
+element: void-element / raw-element / template-element / normal-element
void-element: `< ("area"/"base"/"br"/"col"/"embed"/"hr"/"img"/"input"/"link"/"meta"/"param"/"source"/"track"/"wbr") __attributes__ [`/] __ `>
-template-element: "" ..%(\n / comment / html-element) ""
+template-element: "" ..%(\n / comment / element) ""
raw-element: `< @tag=("script"/"style"/"textarea"/"title") __attributes__ `> ..%\n (""tag__`>)
-normal-element: `< @tag=id __attributes__ `> ..%(\n / comment / html-element) (""tag`>)
+normal-element: `< @tag=id __attributes__ `> ..%(\n / comment / element) (""tag`>)
comment: ""
diff --git a/match.c b/match.c
index d4d6d27..f8f0ab4 100644
--- a/match.c
+++ b/match.c
@@ -114,8 +114,7 @@ static const char *match_backref(const char *str, match_t *cap, bool ignorecase)
for (match_t *child = cap->child; child; child = child->nextsibling) {
if (child->start > prev) {
size_t len = (size_t)(child->start - prev);
- if (ignorecase ? memicmp(str, prev, len) != 0
- : memcmp(str, prev, len) != 0) {
+ if ((ignorecase ? memicmp : memcmp)(str, prev, len) != 0) {
return NULL;
}
str += len;
@@ -128,8 +127,7 @@ static const char *match_backref(const char *str, match_t *cap, bool ignorecase)
}
if (cap->end > prev) {
size_t len = (size_t)(cap->end - prev);
- if (ignorecase ? memicmp(str, prev, len) != 0
- : memcmp(str, prev, len) != 0) {
+ if ((ignorecase ? memicmp : memcmp)(str, prev, len) != 0) {
return NULL;
}
str += len;
@@ -151,9 +149,11 @@ match_t *next_match(def_t *defs, file_t *f, match_t *prev, pat_t *pat, pat_t *sk
} else {
str = f->contents;
}
+ bool only_start = pat->type == BP_START_OF_FILE || (pat->type == BP_CHAIN && pat->args.multiple.first->type == BP_START_OF_FILE);
while (str <= f->end) {
match_t *m = match(defs, f, str, pat, ignorecase);
if (m) return m;
+ if (only_start) return NULL;
match_t *s;
if (skip && (s = match(defs, f, str, skip, ignorecase))) {
str = s->end > str ? s->end : str + 1;
@@ -201,8 +201,7 @@ static match_t *match(def_t *defs, file_t *f, const char *str, pat_t *pat, bool
}
case BP_STRING: {
if (&str[pat->len] > f->end) return NULL;
- if (ignorecase ? memicmp(str, pat->args.string, (size_t)pat->len) != 0
- : memcmp(str, pat->args.string, (size_t)pat->len) != 0)
+ if ((ignorecase ? memicmp : memcmp)(str, pat->args.string, (size_t)pat->len) != 0)
return NULL;
return new_match(pat, str, str + pat->len, NULL);
}
@@ -360,7 +359,7 @@ static match_t *match(def_t *defs, file_t *f, const char *str, pat_t *pat, bool
ADD_OWNER(m1->nextsibling, m2);
return new_match(pat, str, m2->end, m1);
}
- case BP_EQUAL: case BP_NOT_EQUAL: {
+ case BP_MATCH: case BP_NOT_MATCH: {
match_t *m1 = match(defs, f, str, pat->args.multiple.first, ignorecase);
if (m1 == NULL) return NULL;
@@ -374,17 +373,13 @@ static match_t *match(def_t *defs, file_t *f, const char *str, pat_t *pat, bool
.mmapped=f->mmapped,
.pats = NULL, .next = NULL,
};
- match_t *m2 = match(defs, &inner, str, pat->args.multiple.second, ignorecase);
- if ((m2 == NULL || m2->end != m1->end) == (pat->type == BP_EQUAL)) {
+ match_t *m2 = next_match(defs, &inner, NULL, pat->args.multiple.second, NULL, ignorecase);
+ if ((!m2 && pat->type == BP_MATCH) || (m2 && pat->type == BP_NOT_MATCH)) {
+ recycle_if_unused(&m2);
recycle_if_unused(&m1);
- if (m2 != NULL) recycle_if_unused(&m2);
return NULL;
}
- if (pat->type == BP_EQUAL) {
- ADD_OWNER(m1->nextsibling, m2);
- } else {
- recycle_if_unused(&m2);
- }
+ if (pat->type == BP_MATCH) ADD_OWNER(m1->nextsibling, m2);
return new_match(pat, m1->start, m1->end, m1);
}
case BP_REPLACE: {
diff --git a/pattern.c b/pattern.c
index 7c4c669..bc34e4b 100644
--- a/pattern.c
+++ b/pattern.c
@@ -139,7 +139,7 @@ pat_t *chain_together(file_t *f, pat_t *first, pat_t *second)
p = p->args.capture.capture_pat;
} else if (p->type == BP_CHAIN) {
p = p->args.multiple.second;
- } else if (p->type == BP_EQUAL || p->type == BP_NOT_EQUAL) {
+ } else if (p->type == BP_MATCH || p->type == BP_NOT_MATCH) {
p = p->args.pat;
} else break;
}
@@ -174,19 +174,20 @@ static pat_t *bp_simplepattern(file_t *f, const char *str)
// Expand postfix operators (if any)
str = after_spaces(pat->end);
- while (str+2 < f->end && (matchstr(&str, "!=") || matchstr(&str, "=="))) { // Equality == and inequality !=
- bool equal = str[-2] == '=';
+ while (str+2 < f->end) {
+ enum pattype_e type;
+ if (matchchar(&str, '~'))
+ type = BP_MATCH;
+ else if (matchstr(&str, "!~"))
+ type = BP_NOT_MATCH;
+ else break;
+
pat_t *first = pat;
pat_t *second = bp_simplepattern(f, str);
if (!second)
- file_err(f, str, str, "The '%c=' operator expects a pattern before and after.", equal?'=':'!');
- if (equal) {
- if (!(first->len == -1 || second->len == -1 || first->len == second->len))
- file_err(f, pat->start, second->end,
- "These two patterns cannot possibly give the same result (different lengths: %ld != %ld)",
- first->len, second->len);
- }
- pat = new_pat(f, str, second->end, first->len, equal ? BP_EQUAL : BP_NOT_EQUAL);
+ file_err(f, str, str, "The '%s' operator expects a pattern before and after.", type == BP_MATCH ? "~" : "!~");
+
+ pat = new_pat(f, str, second->end, first->len, type);
pat->args.multiple.first = first;
pat->args.multiple.second = second;
str = pat->end;
diff --git a/types.h b/types.h
index 19c6829..ae41d4a 100644
--- a/types.h
+++ b/types.h
@@ -22,8 +22,8 @@ enum pattype_e {
BP_CAPTURE,
BP_OTHERWISE,
BP_CHAIN,
- BP_EQUAL,
- BP_NOT_EQUAL,
+ BP_MATCH,
+ BP_NOT_MATCH,
BP_REPLACE,
BP_REF,
BP_BACKREF,