Added ~ and !~ operators as replacements for == and !=

This commit is contained in:
Bruce Hill 2021-05-19 23:41:57 -07:00
parent 3f0ab96f7f
commit 355e06a58e
9 changed files with 78 additions and 76 deletions

View File

@ -70,8 +70,8 @@ Pattern | Meaning
`@foo=pat` | Let `foo` be the text of `pat` (used for text replacement and backreferences)
`pat => "replacement"` | Match `pat` and replace it with `replacement`
`(pat1 @keep=pat2) => "@keep"` | Match `pat1` followed by `pat2` and replace it with the text of `pat2`
`pat1==pat2` | `pat1`, assuming `pat2` also matches with the same length
`pat1!=pat2` | `pat1`, unless `pat2` also matches with the same length
`pat1~pat2` | `pat1` when `pat2` can be found within the result
`pat1!~pat2` | `pat1` when `pat2` can not be found within the result
`name:pat2` | `name` is defined to mean `pat`
`# line comment` | A line comment

39
bp.1
View File

@ -256,17 +256,17 @@ references to captured values: \f[B]\[at]0\f[R] (the whole of
For example, \f[B]\[at]word _ \[at]rest=(*word % _) => \[dq]\[at]rest
\[at]1\[dq]\f[R]
.TP
\f[I]pat1\f[R] \f[B]==\f[R] \f[I]pat2\f[R]
Matches \f[I]pat1\f[R], if and only if \f[I]pat2\f[R] also matches the
text of \f[I]pat1\f[R]\[aq]s match.
(e.g.\ \f[B]word == (\[dq]foo_\[dq] *.)\f[R] matches words that start
with \f[B]\[lq]foo_\[rq]\f[R])
\f[I]pat1\f[R] \f[B]\[ti]\f[R] \f[I]pat2\f[R]
Matches when \f[I]pat1\f[R] matches and \f[I]pat2\f[R] can be found
within the text of that match.
(e.g.\ \f[B]comment \[ti] {TODO}\f[R] matches comments that contain the
word \f[B]\[lq]TODO\[rq]\f[R])
.TP
\f[I]pat1\f[R] \f[B]!=\f[R] \f[I]pat2\f[R]
Matches \f[I]pat1\f[R], if and only if \f[I]pat2\f[R] does not match the
text of \f[I]pat1\f[R]\[aq]s match.
(e.g.\ \f[B]word == (\[dq]foo_\[dq] *.)\f[R] matches words that do not
start with \f[B]\[lq]foo_\[rq]\f[R])
\f[I]pat1\f[R] \f[B]!\[ti]\f[R] \f[I]pat2\f[R]
Matches when \f[I]pat1\f[R] matches, but \f[I]pat2\f[R] can not be found
within the text of that match.
(e.g.\ \f[B]comment \[ti] {IGNORE}\f[R] matches only comments that do
not contain the word \f[B]\[lq]IGNORE\[rq]\f[R])
.TP
\f[I]name\f[R]\f[B]:\f[R] \f[I]pat\f[R]
Define \f[I]name\f[R] to mean \f[I]pat\f[R] (pattern definition)
@ -295,12 +295,12 @@ some common patterns.
For example, the c++ grammar file contains definitions for
\f[B]//\f[R]-style line comments as well as \f[B]/*\&...*/\f[R]-style
block comments.
Thus, you can find all comments with the string \[lq]TODO\[rq] with the
Thus, you can find all comments with the word \[lq]TODO\[rq] with the
following command:
.IP
.nf
\f[C]
bp -g c++ -p \[aq]comment==(..%\[rs]n \[dq]TODO\[dq] ..%\[rs]n$$)\[aq] *.cpp
bp -g c++ -p \[aq]comment\[ti]{TODO}\[aq] *.cpp
\f[R]
.fi
.SH EXAMPLES
@ -313,12 +313,15 @@ Find files ending with \[dq].c\[dq] and replace the extension with
\[dq].h\[dq]
.TP
\f[B]bp -p \[aq]{foobar} parens\[aq] my_file.py\f[R]
Find the literal string \f[B]\[dq]foobar\[dq]\f[R], assuming it\[aq]s a
complete word, followed by a pair of matching parentheses in the file
\f[I]my_file.py\f[R]
Find the word \f[B]\[dq]foobar\[dq]\f[R], followed by a pair of matching
parentheses in the file \f[I]my_file.py\f[R]
.TP
\f[B]bp -g html -p `html-element==(\[dq]<a \[dq]..%\[rs]n$$)' foo.html\f[R]
Using the \f[I]html\f[R] grammar, find all \f[I]html-element\f[R]s
matching the tag \f[I]a\f[R] in the file \f[I]foo.html\f[R]
\f[B]bp -g html -p \[aq]element \[ti] (\[ha]\[ha]\[dq]<a \[dq])\[aq] foo.html\f[R]
Using the \f[I]html\f[R] grammar, find all \f[I]element\f[R]s matching
the tag \f[I]a\f[R] in the file \f[I]foo.html\f[R]
.TP
\f[B]bp -g python -p \[aq]comment\[ti]{TODO}\[aq] *.py\f[R]
Find all comments with the word \f[B]\[lq]TODO\[rq]\f[R] in local python
files.
.SH AUTHORS
Bruce Hill (\f[I]bruce\[at]bruce-hill.com\f[R]).

33
bp.1.md
View File

@ -228,15 +228,15 @@ string, and it may contain references to captured values: **\@0** (the whole of
named *foo* in *pat*), etc. For example, **\@word \_ \@rest=(\*word % \_)
=\> \"\@rest \@1\"**
*pat1* **==** *pat2*
: Matches *pat1*, if and only if *pat2* also matches the text of
*pat1*\'s match. (e.g. **word == (\"foo\_\" \*.)** matches words that start
with **"foo\_"**)
*pat1* **~** *pat2*
: Matches when *pat1* matches and *pat2* can be found within the text of that
match. (e.g. **comment ~ {TODO}** matches comments that contain the word
**"TODO"**)
*pat1* **!=** *pat2*
: Matches *pat1*, if and only if *pat2* does not match the text of
*pat1*\'s match. (e.g. **word == (\"foo\_\" \*.)** matches words that do
not start with **"foo\_"**)
*pat1* **!~** *pat2*
: Matches when *pat1* matches, but *pat2* can not be found within the text of
that match. (e.g. **comment ~ {IGNORE}** matches only comments that do not
contain the word **"IGNORE"**)
*name***:** *pat*
: Define *name* to mean *pat* (pattern definition)
@ -262,10 +262,10 @@ which may be loaded on demand. These grammar files are not comprehensive syntax
definitions, but only some common patterns. For example, the c++ grammar file
contains definitions for **//**-style line comments as well as
**/\*...\*/**-style block comments. Thus, you can find all comments with the
string "TODO" with the following command:
word "TODO" with the following command:
```
bp -g c++ -p 'comment==(..%\n "TODO" ..%\n$$)' *.cpp
bp -g c++ -p 'comment~{TODO}' *.cpp
```
@ -278,9 +278,12 @@ bp -g c++ -p 'comment==(..%\n "TODO" ..%\n$$)' *.cpp
: Find files ending with \".c\" and replace the extension with \".h\"
**bp -p \'{foobar} parens\' my_file.py**
: Find the literal string **\"foobar\"**, assuming it\'s a complete word,
followed by a pair of matching parentheses in the file *my_file.py*
: Find the word **\"foobar\"**, followed by a pair of matching parentheses in
the file *my_file.py*
**bp -g html -p 'html-element==(\"\<a \"..%\\n\$\$)' foo.html**
: Using the *html* grammar, find all *html-element*s matching the tag *a* in
the file *foo.html*
**bp -g html -p \'element ~ (^^\"\<a \")\' foo.html**
: Using the *html* grammar, find all *element*s matching the tag *a* in the
file *foo.html*
**bp -g python -p \'comment~{TODO}\' \*.py**
: Find all comments with the word **"TODO"** in local python files.

View File

@ -14,18 +14,18 @@ Def: @name=id __ `: __ (
# This is used for command line arguments:
String-pattern: ..%(\n / Nodent / Escape / `\ pat [`;])$$
pat: simple-pat !(__("!="/"==")) / suffixed-pat
pat: simple-pat !(__("!~"/"~")) / suffixed-pat
simple-pat: Upto-and / Dot / String / Chars / Nodent / Escape-range
/ Escape / Repeat / Optional / No / After / Before / Capture
/ Start-of-File / Start-of-Line / End-of-File / End-of-Line / Ref / parens
suffixed-pat: (
Eq-pat
/ Not-eq-pat
Match-pat
/ Not-match-pat
)
Eq-pat: @first=(suffixed-pat / simple-pat)__"=="__@second=pat
Not-eq-pat: @first=(suffixed-pat / simple-pat)__"!="__@second=pat
Match-pat: @first=(suffixed-pat / simple-pat)__"~"__@second=(pat / @!=(''=> "Expected pattern after '~'"))
Not-match-pat: @first=(suffixed-pat / simple-pat)__"!~"__@second=(pat / @!=(''=> "Expected pattern after '!~'"))
Dot: `. !`.
String: (

View File

@ -17,15 +17,15 @@ braces: `{ ..%(\n/braces/string) `}
parens: `( ..%(\n/parens/string) `)
string: `" ..%string-escape `" / `' ..%string-escape `'
string-escape: `\ (`x 2 Hex / 1-3 `0-7 / `u 1-4 Hex / .)
left-id-edge: ^ / <(\x00-x7f!=id-char) / <((\xc0-xdf \x80-xbf)!=id-char)
/ <((\xe0-xef 2\x80-xbf)!=id-char) / <((\xf0-xf7 3\x80-xbf)!=id-char)
left-id-edge: ^ / <(\x00-x7f!~(^^id-char)) / <((\xc0-xdf \x80-xbf)!~(^^id-char))
/ <((\xe0-xef 2\x80-xbf)!~(^^id-char)) / <((\xf0-xf7 3\x80-xbf)!~(^^id-char))
right-id-edge: !id-char
id: left-id-edge !`0-9 (+id-char)!=keyword
id: left-id-edge !`0-9 !(keyword left-id-edge) +id-char
id-char: `a-z,A-Z,_,0-9
var: id
keyword: !"" # No keywords defined by default
left-word-edge: ^ / <(\x00-x7f!=word-char) / <((\xc0-xdf \x80-xbf)!=word-char)
/ <((\xe0-xef 2\x80-xbf)!=word-char) / <((\xf0-xf7 3\x80-xbf)!=word-char)
left-word-edge: ^ / <(\x00-x7f!~(^^word-char)) / <((\xc0-xdf \x80-xbf)!~(^^word-char))
/ <((\xe0-xef 2\x80-xbf)!~(^^word-char)) / <((\xf0-xf7 3\x80-xbf)!~(^^word-char))
right-word-edge: !word-char
word-char: `a-z,A-Z,_,0-9,-,'
word: left-word-edge +word-char

View File

@ -8,15 +8,15 @@
doctype: "<!DOCTYPE" ..%\n `>
html-element: void-element / raw-element / template-element / normal-element
element: void-element / raw-element / template-element / normal-element
void-element: `< ("area"/"base"/"br"/"col"/"embed"/"hr"/"img"/"input"/"link"/"meta"/"param"/"source"/"track"/"wbr") __attributes__ [`/] __ `>
template-element: "<template>" ..%(\n / comment / html-element) "</template>"
template-element: "<template>" ..%(\n / comment / element) "</template>"
raw-element: `< @tag=("script"/"style"/"textarea"/"title") __attributes__ `> ..%\n ("</"tag__`>)
normal-element: `< @tag=id __attributes__ `> ..%(\n / comment / html-element) ("</"tag`>)
normal-element: `< @tag=id __attributes__ `> ..%(\n / comment / element) ("</"tag`>)
comment: "<!--" ..%\n "-->"

25
match.c
View File

@ -114,8 +114,7 @@ static const char *match_backref(const char *str, match_t *cap, bool ignorecase)
for (match_t *child = cap->child; child; child = child->nextsibling) {
if (child->start > prev) {
size_t len = (size_t)(child->start - prev);
if (ignorecase ? memicmp(str, prev, len) != 0
: memcmp(str, prev, len) != 0) {
if ((ignorecase ? memicmp : memcmp)(str, prev, len) != 0) {
return NULL;
}
str += len;
@ -128,8 +127,7 @@ static const char *match_backref(const char *str, match_t *cap, bool ignorecase)
}
if (cap->end > prev) {
size_t len = (size_t)(cap->end - prev);
if (ignorecase ? memicmp(str, prev, len) != 0
: memcmp(str, prev, len) != 0) {
if ((ignorecase ? memicmp : memcmp)(str, prev, len) != 0) {
return NULL;
}
str += len;
@ -151,9 +149,11 @@ match_t *next_match(def_t *defs, file_t *f, match_t *prev, pat_t *pat, pat_t *sk
} else {
str = f->contents;
}
bool only_start = pat->type == BP_START_OF_FILE || (pat->type == BP_CHAIN && pat->args.multiple.first->type == BP_START_OF_FILE);
while (str <= f->end) {
match_t *m = match(defs, f, str, pat, ignorecase);
if (m) return m;
if (only_start) return NULL;
match_t *s;
if (skip && (s = match(defs, f, str, skip, ignorecase))) {
str = s->end > str ? s->end : str + 1;
@ -201,8 +201,7 @@ static match_t *match(def_t *defs, file_t *f, const char *str, pat_t *pat, bool
}
case BP_STRING: {
if (&str[pat->len] > f->end) return NULL;
if (ignorecase ? memicmp(str, pat->args.string, (size_t)pat->len) != 0
: memcmp(str, pat->args.string, (size_t)pat->len) != 0)
if ((ignorecase ? memicmp : memcmp)(str, pat->args.string, (size_t)pat->len) != 0)
return NULL;
return new_match(pat, str, str + pat->len, NULL);
}
@ -360,7 +359,7 @@ static match_t *match(def_t *defs, file_t *f, const char *str, pat_t *pat, bool
ADD_OWNER(m1->nextsibling, m2);
return new_match(pat, str, m2->end, m1);
}
case BP_EQUAL: case BP_NOT_EQUAL: {
case BP_MATCH: case BP_NOT_MATCH: {
match_t *m1 = match(defs, f, str, pat->args.multiple.first, ignorecase);
if (m1 == NULL) return NULL;
@ -374,17 +373,13 @@ static match_t *match(def_t *defs, file_t *f, const char *str, pat_t *pat, bool
.mmapped=f->mmapped,
.pats = NULL, .next = NULL,
};
match_t *m2 = match(defs, &inner, str, pat->args.multiple.second, ignorecase);
if ((m2 == NULL || m2->end != m1->end) == (pat->type == BP_EQUAL)) {
match_t *m2 = next_match(defs, &inner, NULL, pat->args.multiple.second, NULL, ignorecase);
if ((!m2 && pat->type == BP_MATCH) || (m2 && pat->type == BP_NOT_MATCH)) {
recycle_if_unused(&m2);
recycle_if_unused(&m1);
if (m2 != NULL) recycle_if_unused(&m2);
return NULL;
}
if (pat->type == BP_EQUAL) {
ADD_OWNER(m1->nextsibling, m2);
} else {
recycle_if_unused(&m2);
}
if (pat->type == BP_MATCH) ADD_OWNER(m1->nextsibling, m2);
return new_match(pat, m1->start, m1->end, m1);
}
case BP_REPLACE: {

View File

@ -139,7 +139,7 @@ pat_t *chain_together(file_t *f, pat_t *first, pat_t *second)
p = p->args.capture.capture_pat;
} else if (p->type == BP_CHAIN) {
p = p->args.multiple.second;
} else if (p->type == BP_EQUAL || p->type == BP_NOT_EQUAL) {
} else if (p->type == BP_MATCH || p->type == BP_NOT_MATCH) {
p = p->args.pat;
} else break;
}
@ -174,19 +174,20 @@ static pat_t *bp_simplepattern(file_t *f, const char *str)
// Expand postfix operators (if any)
str = after_spaces(pat->end);
while (str+2 < f->end && (matchstr(&str, "!=") || matchstr(&str, "=="))) { // Equality <pat1>==<pat2> and inequality <pat1>!=<pat2>
bool equal = str[-2] == '=';
while (str+2 < f->end) {
enum pattype_e type;
if (matchchar(&str, '~'))
type = BP_MATCH;
else if (matchstr(&str, "!~"))
type = BP_NOT_MATCH;
else break;
pat_t *first = pat;
pat_t *second = bp_simplepattern(f, str);
if (!second)
file_err(f, str, str, "The '%c=' operator expects a pattern before and after.", equal?'=':'!');
if (equal) {
if (!(first->len == -1 || second->len == -1 || first->len == second->len))
file_err(f, pat->start, second->end,
"These two patterns cannot possibly give the same result (different lengths: %ld != %ld)",
first->len, second->len);
}
pat = new_pat(f, str, second->end, first->len, equal ? BP_EQUAL : BP_NOT_EQUAL);
file_err(f, str, str, "The '%s' operator expects a pattern before and after.", type == BP_MATCH ? "~" : "!~");
pat = new_pat(f, str, second->end, first->len, type);
pat->args.multiple.first = first;
pat->args.multiple.second = second;
str = pat->end;

View File

@ -22,8 +22,8 @@ enum pattype_e {
BP_CAPTURE,
BP_OTHERWISE,
BP_CHAIN,
BP_EQUAL,
BP_NOT_EQUAL,
BP_MATCH,
BP_NOT_MATCH,
BP_REPLACE,
BP_REF,
BP_BACKREF,