Added --skip flag for skipping over patterns

author: Bruce Hill <bruce@bruce-hill.com> 2021-01-20 16:12:46 -0800
committer: Bruce Hill <bruce@bruce-hill.com> 2021-01-20 16:12:46 -0800
commit: c46a8227d0bfc31e4f71b6441303348f5c31174b (patch)
tree: a06e3747fa09c0d737f724000f0e516f9c853b32
parent: b50ad0cad099c99d4e739fc465b69779f661b77d (diff)
7 files changed, 56 insertions, 29 deletions
diff --git a/README.md b/README.md
index 1bf5301..0453771 100644
--- a/README.md
+++ b/README.md
@@ -20,7 +20,8 @@ It's written in pure C with no dependencies.
 * `-j` `--json` print matches as JSON objects
 * `-l` `--list-files` print only filenames containing matches
 * `-p` `--pattern <pat>` provide a pattern (equivalent to `bp '\(<pat>)'`)
-* `-r` `--replace <replacement>`  replace the input pattern with the given replacement
+* `-r` `--replace <replacement>` replace the input pattern with the given replacement
+* `-s` `--skip <skip pattern>` skip over the given pattern when looking for matches
 * `-c` `--context <N>` change how many lines of context are printed (`0`: no context, `all`: the whole file, `<N>` matching lines and `<N-1>` lines before/after)
 * `-g` `--grammar <grammar file>` use the specified file as a grammar
 * `-G` `--git` get filenames from git
diff --git a/bp.1 b/bp.1
index d7a0c35..d26b814 100644
--- a/bp.1
+++ b/bp.1
@@ -15,6 +15,7 @@ bp \- Bruce's Parsing Expression Grammar tool
 [\fI-C\fR|\fI--confirm\fR]
 [\fI-p\fR|\fI--pattern\fR \fI<pattern>\fR]
 [\fI-r\fR|\fI--replace\fR \fI<replacement>\fR]
+[\fI-s\fR|\fI--skip\fR \fI<skip pattern>\fR]
 [\fI-g\fR|\fI--grammar\fR \fI<grammar file>\fR]
 [\fI-G\fR|\fI--git\fR]
 [\fI-c\fR|\fI--conntext\fR \fI<N>\fR]
@@ -47,6 +48,11 @@ During in-place modification of a file, confirm before each modification.
 .B \-r\fR, \fB--replace \fI<replacement>\fR
 Replace all occurrences of the main pattern with the given string.
 
+.B \-s\fR, \fB--skip \fI<skip pattern>\fR
+While looking for matches, skip over \fB<skip pattern>\fR occurrences. This can
+be useful for behavior like \fBbp -s string\fR (avoiding matches inside string
+literals).
+
 .B \-g\fR, \fB--grammar \fI<grammar file>\fR
 Load the grammar from the given file.
 
diff --git a/bp.c b/bp.c
index 66299f9..85f7176 100644
--- a/bp.c
+++ b/bp.c
@@ -45,6 +45,7 @@ static const char *usage = (
     " -l --list-files                  list filenames only\n"
     " -p --pattern <pat>               provide a pattern (equivalent to bp '\\(<pat>)')\n"
     " -r --replace <replacement>       replace the input pattern with the given replacement\n"
+    " -s --skip <skip pattern>         skip over the given pattern when looking for matches\n"
     " -c --context <context>           set number of lines of context to print (all: the whole file, 0: only the match, 1: the line, N: N lines of context)\n"
     " -g --grammar <grammar file>      use the specified file as a grammar\n");
 
@@ -60,6 +61,7 @@ static bool print_line_numbers = false;
 static bool ignorecase = false;
 static bool verbose = false;
 static bool git_mode = false;
+static pat_t *skip = NULL;
 typedef enum { CONFIRM_ASK, CONFIRM_ALL, CONFIRM_NONE } confirm_t;
 static confirm_t confirm = CONFIRM_ALL;
 static enum {
@@ -154,7 +156,7 @@ static int is_text_file(const char *filename)
 static int print_matches_as_json(def_t *defs, file_t *f, pat_t *pattern)
 {
     int matches = 0;
-    for (match_t *m = NULL; (m = next_match(defs, f, m, pattern, ignorecase)); ) {
+    for (match_t *m = NULL; (m = next_match(defs, f, m, pattern, skip, ignorecase)); ) {
         if (++matches > 1)
             printf(",\n");
         printf("{\"filename\":\"%s\",", f->filename);
@@ -172,7 +174,7 @@ static int print_matches_as_json(def_t *defs, file_t *f, pat_t *pattern)
 static int explain_matches(def_t *defs, file_t *f, pat_t *pattern)
 {
     int matches = 0;
-    for (match_t *m = NULL; (m = next_match(defs, f, m, pattern, ignorecase)); ) {
+    for (match_t *m = NULL; (m = next_match(defs, f, m, pattern, skip, ignorecase)); ) {
         if (++matches == 1) {
             fprint_filename(stdout, f->filename);
         } else {
@@ -278,7 +280,7 @@ static int inplace_modify_file(def_t *defs, file_t *f, pat_t *pattern)
     FILE *inplace_file = NULL; // Lazy-open this on the first match
     int matches = 0;
     confirm_t confirm_file = confirm;
-    for (match_t *m = NULL; (m = next_match(defs, f, m, pattern, ignorecase)); ) {
+    for (match_t *m = NULL; (m = next_match(defs, f, m, pattern, skip, ignorecase)); ) {
         ++matches;
         printer_t err_pr = {.file = f, .context_lines = true, .use_color = true, .print_line_numbers = true};
         if (print_errors(&err_pr, m) > 0)
@@ -331,7 +333,7 @@ static int print_matches(def_t *defs, file_t *f, pat_t *pattern)
     };
 
     confirm_t confirm_file = confirm;
-    for (match_t *m = NULL; (m = next_match(defs, f, m, pattern, ignorecase)); ) {
+    for (match_t *m = NULL; (m = next_match(defs, f, m, pattern, skip, ignorecase)); ) {
         printer_t err_pr = {.file = f, .context_lines = true, .use_color = true, .print_line_numbers = true};
         if (print_errors(&err_pr, m) > 0)
             exit(EXIT_FAILURE);
@@ -369,7 +371,7 @@ static int process_file(def_t *defs, const char *filename, pat_t *pattern)
     if (mode == MODE_EXPLAIN) {
         matches += explain_matches(defs, f, pattern);
     } else if (mode == MODE_LISTFILES) {
-        match_t *m = next_match(defs, f, NULL, pattern, ignorecase);
+        match_t *m = next_match(defs, f, NULL, pattern, skip, ignorecase);
         if (m) {
             recycle_if_unused(&m);
             printf("%s\n", f->filename);
@@ -497,6 +499,17 @@ int main(int argc, char *argv[])
                     str = after_spaces(p->end);
                 }
             }
+        } else if (FLAG("-s")     || FLAG("--skip")) {
+            file_t *arg_file = spoof_file(&loaded_files, "<skip argument>", flag);
+            pat_t *s = bp_pattern(arg_file, arg_file->contents);
+            if (!s) {
+                fprint_line(stdout, arg_file, arg_file->contents, arg_file->end,
+                            "Failed to compile the skip argument");
+            } else if (after_spaces(s->end) < arg_file->end) {
+                fprint_line(stdout, arg_file, s->end, arg_file->end,
+                            "Failed to compile part of the skip argument");
+            }
+            skip = either_pat(arg_file, skip, s);
         } else if (FLAG("-c")     || FLAG("--context")) {
             if (streq(flag, "all"))
                 context_lines = ALL_CONTEXT;
diff --git a/match.c b/match.c
index e89abd4..c57bfcf 100644
--- a/match.c
+++ b/match.c
@@ -128,7 +128,7 @@ static const char *match_backref(const char *str, match_t *cap, bool ignorecase)
 //
 // Find the next match after prev (or the first match if prev is NULL)
 //
-match_t *next_match(def_t *defs, file_t *f, match_t *prev, pat_t *pat, bool ignorecase)
+match_t *next_match(def_t *defs, file_t *f, match_t *prev, pat_t *pat, pat_t *skip, bool ignorecase)
 {
     const char *str;
     if (prev) {
@@ -137,9 +137,14 @@ match_t *next_match(def_t *defs, file_t *f, match_t *prev, pat_t *pat, bool igno
     } else {
         str = f->contents;
     }
-    for (; str < f->end; ++str) {
+    while (str < f->end) {
         match_t *m = match(defs, f, str, pat, ignorecase);
         if (m) return m;
+        match_t *s;
+        if (skip && (s = match(defs, f, str, skip, ignorecase))) {
+            str = s->end > str ? s->end : str + 1;
+            recycle_if_unused(&s);
+        } else ++str;
     }
     return NULL;
 }
diff --git a/match.h b/match.h
index ee6fe6a..8584f04 100644
--- a/match.h
+++ b/match.h
@@ -10,7 +10,7 @@
 #include "types.h"
 
 __attribute__((nonnull(2,4)))
-match_t *next_match(def_t *defs, file_t *f, match_t *prev, pat_t *pat, bool ignorecase);
+match_t *next_match(def_t *defs, file_t *f, match_t *prev, pat_t *pat, pat_t *skip, bool ignorecase);
 __attribute__((nonnull))
 match_t *get_capture(match_t *m, const char **id);
 __attribute__((nonnull))
diff --git a/pattern.c b/pattern.c
index 17560b4..ff9841f 100644
--- a/pattern.c
+++ b/pattern.c
@@ -120,14 +120,7 @@ static pat_t *expand_choices(file_t *f, pat_t *first)
     if (!second)
         file_err(f, str, str, "There should be a pattern here after a '/'");
     second = expand_choices(f, second);
-    pat_t *choice = new_pat(f, first->start, BP_OTHERWISE);
-    if (first->len == second->len)
-        choice->len = first->len;
-    else choice->len = -1;
-    choice->end = second->end;
-    choice->args.multiple.first = first;
-    choice->args.multiple.second = second;
-    return choice;
+    return either_pat(f, first, second);
 }
 
 //
@@ -139,7 +132,6 @@ pat_t *chain_together(file_t *f, pat_t *first, pat_t *second)
     if (first == NULL) return second;
     if (second == NULL) return first;
     pat_t *chain = new_pat(f, first->start, BP_CHAIN);
-    chain->start = first->start;
     if (first->len >= 0 && second->len >= 0)
         chain->len = first->len + second->len;
     else chain->len = -1;
@@ -163,6 +155,24 @@ pat_t *chain_together(file_t *f, pat_t *first, pat_t *second)
 }
 
 //
+// Given two patterns, return a new pattern for matching either the first
+// pattern or the second. If either pattern is NULL, return the other.
+//
+pat_t *either_pat(file_t *f, pat_t *first, pat_t *second)
+{
+    if (first == NULL) return second;
+    if (second == NULL) return first;
+    pat_t *either = new_pat(f, first->start, BP_OTHERWISE);
+    if (first->len == second->len)
+        either->len = first->len;
+    else either->len = -1;
+    either->end = second->end;
+    either->args.multiple.first = first;
+    either->args.multiple.second = second;
+    return either;
+}
+
+//
 // Wrapper for _bp_simplepattern() that expands any postfix operators
 //
 static pat_t *bp_simplepattern(file_t *f, const char *str)
@@ -262,17 +272,7 @@ static pat_t *_bp_simplepattern(file_t *f, const char *str)
 
                 pat->len = 1;
                 pat->end = str;
-
-                if (all == NULL) {
-                    all = pat;
-                } else {
-                    pat_t *either = new_pat(f, all->start, BP_OTHERWISE);
-                    either->end = pat->end;
-                    either->args.multiple.first = all;
-                    either->args.multiple.second = pat;
-                    either->len = 1;
-                    all = either;
-                }
+                all = either_pat(f, all, pat);
                 pat = NULL;
             } while (matchchar(&str, ','));
 
diff --git a/pattern.h b/pattern.h
index bcb9eac..908d3b1 100644
--- a/pattern.h
+++ b/pattern.h
@@ -15,6 +15,8 @@ __attribute__((nonnull(1,2)))
 pat_t *bp_replacement(file_t *f, pat_t *replacepat, const char *replacement);
 __attribute__((nonnull(1)))
 pat_t *chain_together(file_t *f, pat_t *first, pat_t *second);
+__attribute__((nonnull(1)))
+pat_t *either_pat(file_t *f, pat_t *first, pat_t *second);
 __attribute__((nonnull))
 pat_t *bp_pattern(file_t *f, const char *str);
 __attribute__((nonnull))
author	Bruce Hill <bruce@bruce-hill.com>	2021-01-20 16:12:46 -0800
committer	Bruce Hill <bruce@bruce-hill.com>	2021-01-20 16:12:46 -0800
commit	c46a8227d0bfc31e4f71b6441303348f5c31174b (patch)
tree	a06e3747fa09c0d737f724000f0e516f9c853b32
parent	b50ad0cad099c99d4e739fc465b69779f661b77d (diff)