aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--README.md2
-rw-r--r--bp.112
-rw-r--r--bp.c252
-rw-r--r--compiler.c11
-rw-r--r--grammars/bpeg.bp3
-rw-r--r--grammars/builtins.bp20
-rw-r--r--printing.c229
-rw-r--r--printing.h18
-rw-r--r--types.h1
-rw-r--r--vm.c29
-rw-r--r--vm.h2
11 files changed, 367 insertions, 212 deletions
diff --git a/README.md b/README.md
index e3f41ed..f90b62d 100644
--- a/README.md
+++ b/README.md
@@ -19,7 +19,7 @@ It's written in pure C with no dependencies.
* `-p` `--pattern <pat>` provide a pattern (equivalent to `bp '\(<pat>)'`)
* `-P` `--pattern-string <pat>` provide a string pattern (equivalent to `bp '<pat>'`, but may be useful if `'<pat>'` begins with a '-')
* `-r` `--replace <replacement>` replace the input pattern with the given replacement
-* `-m` `--mode <mode>` set the behavior mode (defult: `find-all`)
+* `-c` `--context <N>` change how many lines of context are printed (`0`: no context, `all`: the whole file, `<N>` matching lines and `<N-1>` lines before/after)
* `-g` `--grammar <grammar file>` use the specified file as a grammar
See `man ./bp.1` for more details.
diff --git a/bp.1 b/bp.1
index fb10a56..2c48799 100644
--- a/bp.1
+++ b/bp.1
@@ -18,7 +18,7 @@ bp \- Bruce's Parsing Expression Grammar tool
[\fI-D\fR|\fI--define-string\fR \fI<name>\fR:\fI<string-pattern>\fR]
[\fI-r\fR|\fI--replace\fR \fI<replacement>\fR]
[\fI-g\fR|\fI--grammar\fR \fI<grammar file>\fR]
-[\fI-m\fR|\fI--mode\fR \fI<mode>\fR]
+[\fI-c\fR|\fI--conntext\fR \fI<N>\fR]
\fI<pattern\fR
[[--] \fI<input files...>\fR]
.SH DESCRIPTION
@@ -54,10 +54,12 @@ Replace all occurrences of the main pattern with the given string.
.B \-g\fR, \fB--grammar \fI<grammar file>\fR
Load the grammar from the given file.
-.B \-m\fR, \fB--mode \fI<mode>\fR
-The mode to operate in. Options are: \fIfind-all\fR (the default),
-\fIonly-matches\fR, \fIpattern\fR, \fIreplacement\fR, \fIreplace-all\fR
-(implied by \fB--replace\fR), or any other grammar rule name.
+.B \-c\fR, \fB--context \fI<N>\fR
+The number of lines of context to print. If \fI<N>\fR is 0, print only the
+exact text of the matches. If \fI<N>\fR is "all", print the entire file.
+Otherwise, if \fI<N>\fR is a positive integer, print the whole line on which
+matches occur, as well as the \fI<N-1>\fR lines before and after the match. The
+default value for this argument is 1 (print whole lines where matches occur).
.B \--help
Print the usage and exit.
diff --git a/bp.c b/bp.c
index 3257d0f..941a2b0 100644
--- a/bp.c
+++ b/bp.c
@@ -36,15 +36,16 @@ static const char *usage = (
" -p --pattern <pat> provide a pattern (equivalent to bp '\\(<pat>)')\n"
" -P --pattern-string <pat> provide a string pattern (may be useful if '<pat>' begins with a '-')\n"
" -r --replace <replacement> replace the input pattern with the given replacement\n"
- " -m --mode <mode> set the behavior mode (defult: find-all)\n"
+ " -c --context <context> set number of lines of context to print (all: the whole file, 0: only the match, 1: the line, N: N lines of context)\n"
" -g --grammar <grammar file> use the specified file as a grammar\n");
-static print_options_t print_options = 0;
+#define USE_DEFAULT_CONTEXT -2
+#define ALL_CONTEXT -1
+static int print_color = 0;
+static int print_line_numbers = 0;
__attribute__((nonnull))
static char *getflag(const char *flag, char *argv[], int *i);
-__attribute__((nonnull(3)))
-static int process_file(def_t *defs, const char *filename, vm_op_t *pattern, unsigned int flags);
//
// Return a pointer to the value part of a flag, if present, otherwise NULL.
@@ -67,65 +68,172 @@ static char *getflag(const char *flag, char *argv[], int *i)
}
//
+// Scan the first few dozen bytes of a file and return 1 if the contents all
+// look like printable text characters, otherwise return 0.
+//
+static int is_text_file(const char *filename)
+{
+ int fd = open(filename, O_RDONLY);
+ if (fd < 0) return 0;
+ unsigned char buf[64];
+ int len = read(fd, buf, sizeof(buf)/sizeof(unsigned char));
+ if (len < 0) return 0;
+ (void)close(fd);
+
+ for (int i = 0; i < len; i++) {
+ if (!(buf[i] == '\t' || buf[i] == '\n' || buf[i] == '\r'
+ || buf[i] >= '\x20'))
+ return 0;
+ }
+ return 1;
+}
+
+//
+// Print matches in JSON format.
+//
+static int print_matches_as_json(def_t *defs, file_t *f, vm_op_t *pattern, unsigned int flags)
+{
+ int matches = 0;
+ for (match_t *m = NULL; (m = next_match(defs, f, m, pattern, flags)); ) {
+ if (++matches > 1)
+ printf(",\n");
+ printf("{\"filename\":\"%s\",", f->filename ? f->filename : "-");
+ printf("\"tree\":{\"rule\":\"text\",\"start\":%d,\"end\":%ld,\"children\":[",
+ 0, f->end - f->contents);
+ json_match(f->contents, m, (flags & BP_VERBOSE) ? 1 : 0);
+ printf("]}}\n");
+ }
+ return matches;
+}
+
+//
+// Print matches in a visual explanation style
+//
+static int explain_matches(def_t *defs, file_t *f, vm_op_t *pattern, unsigned int flags)
+{
+ int matches = 0;
+ for (match_t *m = NULL; (m = next_match(defs, f, m, pattern, flags)); ) {
+ if (++matches == 1) {
+ if (print_color)
+ printf("\033[0;1;4;33m%s\033[0m\n", f->filename);
+ else
+ printf("%s:\n", f->filename);
+ } else {
+ printf("\n\n");
+ }
+ visualize_match(m);
+ }
+ return matches;
+}
+
+//
+// Replace a file's contents with the text version of a match.
+// (Useful for replacements)
+//
+static int inplace_modify_file(def_t *defs, file_t *f, vm_op_t *pattern, int context, unsigned int flags)
+{
+ // Need to do this before matching:
+ intern_file(f);
+
+ printer_t pr = {
+ .file = f,
+ .context_lines = context,
+ .use_color = 0,
+ .print_line_numbers = 0,
+ };
+
+ FILE *inplace_file = NULL; // Lazy-open this on the first match
+ int matches = 0;
+ for (match_t *m = NULL; (m = next_match(defs, f, m, pattern, flags)); ) {
+ ++matches;
+ if (print_errors(&pr, m) > 0)
+ exit(1);
+ // Lazy-open file for writing upon first match:
+ if (inplace_file == NULL) {
+ inplace_file = fopen(f->filename, "w");
+ check(inplace_file, "Could not open file for writing: %s\n", f->filename);
+ }
+ print_match(inplace_file, &pr, m);
+ }
+
+ if (inplace_file) {
+ printf("%s\n", f->filename);
+ fclose(inplace_file);
+ }
+ return matches;
+}
+
+//
+// Print all the matches in a file.
+//
+static int print_matches(def_t *defs, file_t *f, vm_op_t *pattern, int context, unsigned int flags)
+{
+ static int printed_filenames = 0;
+ int matches = 0;
+ printer_t pr = {
+ .file = f,
+ .context_lines = context,
+ .use_color = print_color,
+ .print_line_numbers = print_line_numbers,
+ };
+
+ for (match_t *m = NULL; (m = next_match(defs, f, m, pattern, flags)); ) {
+ if (print_errors(&pr, m) > 0)
+ exit(1);
+
+ if (++matches == 1) {
+ if (printed_filenames++ > 0) printf("\n");
+ if (print_color)
+ printf("\033[0;1;4;33m%s\033[0m\n", f->filename);
+ else
+ printf("%s:\n", f->filename);
+ }
+ print_match(stdout, &pr, m);
+ }
+
+ if (matches > 0) {
+ // Print trailing context lines:
+ print_match(stdout, &pr, NULL);
+ // Ensure a trailing newline:
+ if (pr.pos > f->contents && pr.pos[-1] != '\n') printf("\n");
+ }
+
+ return matches;
+}
+
+//
// For a given filename, open the file and attempt to match the given pattern
// against it, printing any results according to the flags.
//
-static int process_file(def_t *defs, const char *filename, vm_op_t *pattern, unsigned int flags)
+static int process_file(def_t *defs, const char *filename, vm_op_t *pattern, int context, unsigned int flags)
{
- static int printed_matches = 0;
- int success = 0;
file_t *f = load_file(NULL, filename);
check(f, "Could not open file: %s", filename);
- if (flags & BP_INPLACE) // Need to do this before matching
- intern_file(f);
- match_t *m = match(defs, f, f->contents, pattern, flags);
- if (m && print_errors(f, m, print_options) > 0)
- exit(1);
-
- if (m != NULL && m->end > m->start + 1) {
- success = 1;
- ++printed_matches;
-
- if (flags & BP_EXPLAIN) {
- if (filename)
- printf("\033[1;4m%s\033[0m\n", filename);
- visualize_match(m);
- } else if (flags & BP_LISTFILES) {
- printf("%s\n", filename);
- } else if (flags & BP_JSON) {
- if (printed_matches > 1)
- printf(",\n");
- printf("{\"filename\":\"%s\",", filename ? filename : "-");
- printf("\"tree\":{\"rule\":\"text\",\"start\":%d,\"end\":%ld,\"children\":[",
- 0, f->end - f->contents);
- json_match(f->contents, m, (flags & BP_VERBOSE) ? 1 : 0);
- printf("]}}\n");
- } else if (flags & BP_INPLACE && filename) {
- FILE *out = fopen(filename, "w");
- print_match(out, f, m, 0);
- fclose(out);
- printf("%s\n", filename);
- } else {
- if (printed_matches > 1)
- fputc('\n', stdout);
- if (filename) {
- if (print_options & PRINT_COLOR)
- printf("\033[1;4;33m%s\033[0m\n", filename);
- else
- printf("%s:\n", filename);
- }
- print_match(stdout, f, m,
- filename ? print_options : print_options & (print_options_t)~PRINT_LINE_NUMBERS);
+
+ int matches = 0;
+ if (flags & BP_EXPLAIN) {
+ matches += explain_matches(defs, f, pattern, flags);
+ } else if (flags & BP_LISTFILES) {
+ match_t *m = next_match(defs, f, NULL, pattern, flags);
+ if (m) {
+ recycle_if_unused(&m);
+ printf("%s\n", f->filename);
+ matches += 1;
}
+ } else if (flags & BP_JSON) {
+ matches += print_matches_as_json(defs, f, pattern, flags);
+ } else if (flags & BP_INPLACE) {
+ matches += inplace_modify_file(defs, f, pattern, context, flags);
+ } else {
+ matches += print_matches(defs, f, pattern, context, flags);
}
- recycle_if_unused(&m);
#ifdef DEBUG_HEAP
check(recycle_all_matches() == 0, "Memory leak: there should no longer be any matches in use at this point.");
#endif
destroy_file(&f);
-
- return success;
+ fflush(stdout);
+ return matches;
}
#define FLAG(f) (flag=getflag((f), argv, &i))
@@ -133,9 +241,9 @@ static int process_file(def_t *defs, const char *filename, vm_op_t *pattern, uns
int main(int argc, char *argv[])
{
unsigned int flags = 0;
+ int context = USE_DEFAULT_CONTEXT;
char *flag = NULL;
char path[PATH_MAX] = {0};
- const char *mode = "find-all";
def_t *defs = NULL;
@@ -145,6 +253,10 @@ int main(int argc, char *argv[])
file_t *pat_file = spoof_file(&loaded_files, "<pattern>", "pattern");
vm_op_t *pattern = bp_pattern(pat_file, pat_file->contents);
+ // Define an opcode that is just a reference to the rule `replacement`
+ file_t *rep_file = spoof_file(&loaded_files, "<replacement>", "replacement");
+ vm_op_t *replacement = bp_pattern(rep_file, rep_file->contents);
+
// Load builtins:
if (access("/etc/xdg/bp/builtins.bp", R_OK) != -1) {
file_t *f = load_file(&loaded_files, "/etc/xdg/bp/builtins.bp");
@@ -174,6 +286,7 @@ int main(int argc, char *argv[])
flags |= BP_JSON;
} else if (streq(argv[i], "--inplace")) {
flags |= BP_INPLACE;
+ context = ALL_CONTEXT;
} else if (streq(argv[i], "--ignore-case")) {
flags |= BP_IGNORECASE;
} else if (streq(argv[i], "--list-files")) {
@@ -185,7 +298,8 @@ int main(int argc, char *argv[])
vm_op_t *rep = bp_replacement(replace_file, pattern, replace_file->contents);
check(rep, "Replacement failed to compile: %s", flag);
defs = with_def(defs, replace_file, strlen("replacement"), "replacement", rep);
- mode = "replace-all";
+ pattern = replacement;
+ if (context == USE_DEFAULT_CONTEXT) context = 1;
} else if (FLAG("--grammar") || FLAG("-g")) {
file_t *f = load_file(&loaded_files, flag);
if (f == NULL) {
@@ -231,8 +345,11 @@ int main(int argc, char *argv[])
check(p, "Pattern failed to compile: %s", flag);
defs = with_def(defs, arg_file, strlen("pattern"), "pattern", p);
++npatterns;
- } else if (FLAG("--mode") || FLAG("-m")) {
- mode = flag;
+ } else if (FLAG("--context") || FLAG("-c")) {
+ if (streq(flag, "all"))
+ context = ALL_CONTEXT;
+ else
+ context = (int)strtol(flag, NULL, 10);
} else if (argv[i][0] == '-' && argv[i][1] && argv[i][1] != '-') { // single-char flags
for (char *c = &argv[i][1]; *c; ++c) {
switch (*c) {
@@ -240,7 +357,7 @@ int main(int argc, char *argv[])
case 'v': flags |= BP_VERBOSE; break; // -v
case 'e': flags |= BP_EXPLAIN; break; // -e
case 'j': flags |= BP_JSON; break; // -j
- case 'I': flags |= BP_INPLACE; break; // -I
+ case 'I': flags |= BP_INPLACE; context = ALL_CONTEXT; break; // -I
case 'i': flags |= BP_IGNORECASE; break; // -i
case 'l': flags |= BP_LISTFILES; break; // -l
default:
@@ -268,24 +385,28 @@ int main(int argc, char *argv[])
return 1;
}
- if (isatty(STDOUT_FILENO)) {
- print_options |= PRINT_COLOR | PRINT_LINE_NUMBERS;
+ if (context < 0) {
+ if (context == USE_DEFAULT_CONTEXT) context = 1;
+ else if (context != ALL_CONTEXT) context = 0;
}
- // Define an opcode that is just a reference to the overarching mode (e.g. find-all)
- if (lookup(defs, mode) == NULL) {
- printf("The mode '%s' is not defined.\n", mode);
+ if (flags & BP_INPLACE && context != ALL_CONTEXT) {
+ printf("--inplace and --context are mutually exclusive.\n"
+ "Please drop one of the two arguments and try again.\n");
return 1;
}
- file_t *mode_file = spoof_file(&loaded_files, "<mode>", mode);
- vm_op_t *mode_op = bp_pattern(mode_file, mode_file->contents);
+
+ if (isatty(STDOUT_FILENO)) {
+ print_color = 1;
+ print_line_numbers = 1;
+ }
int found = 0;
if (flags & BP_JSON) printf("[");
if (i < argc) {
// Files pass in as command line args:
for (int nfiles = 0; i < argc; nfiles++, i++) {
- found += process_file(defs, argv[i], mode_op, flags);
+ found += process_file(defs, argv[i], pattern, context, flags);
}
} else if (isatty(STDIN_FILENO)) {
// No files, no piped in input, so use * **/*:
@@ -293,12 +414,13 @@ int main(int argc, char *argv[])
glob("*", 0, NULL, &globbuf);
glob("**/*", GLOB_APPEND, NULL, &globbuf);
for (size_t i = 0; i < globbuf.gl_pathc; i++) {
- found += process_file(defs, globbuf.gl_pathv[i], mode_op, flags);
+ if (is_text_file(globbuf.gl_pathv[i]))
+ found += process_file(defs, globbuf.gl_pathv[i], pattern, context, flags);
}
globfree(&globbuf);
} else {
// Piped in input:
- found += process_file(defs, NULL, mode_op, flags);
+ found += process_file(defs, NULL, pattern, context, flags);
}
if (flags & BP_JSON) printf("]\n");
diff --git a/compiler.c b/compiler.c
index f36a7a4..951462a 100644
--- a/compiler.c
+++ b/compiler.c
@@ -465,17 +465,6 @@ static vm_op_t *_bp_simplepattern(file_t *f, const char *str)
op->end = pat->end;
return op;
}
- // Hide
- case '~': {
- vm_op_t *pat = bp_simplepattern(f, str);
- if (!pat)
- file_err(f, str, str, "There should be a pattern after this '~'");
- vm_op_t *op = new_op(f, start, VM_HIDE);
- op->len = 0;
- op->args.pat = pat;
- op->end = pat->end;
- return op;
- }
// Special rules:
case '_': case '^': case '$': case '|': {
const char *name = NULL;
diff --git a/grammars/bpeg.bp b/grammars/bpeg.bp
index 23a2250..e8d7259 100644
--- a/grammars/bpeg.bp
+++ b/grammars/bpeg.bp
@@ -11,7 +11,7 @@ String-pattern: ..$$ % (\n / Nodent / Escape / `\ pat [`;])
pat: simple-pat !(__("!="/"==")) / suffixed-pat
simple-pat: Upto-and / Dot / String / Chars / Nodent / Escape-range
- / Escape / Repeat / Optional / No / Hide / After / Before / Capture
+ / Escape / Repeat / Optional / No / After / Before / Capture
/ Ref / parens
suffixed-pat: (
@@ -41,7 +41,6 @@ escape-sequence: (
/ `x 2 `0-9,a-f,A-F
)
No: `! (__@pat / @!=(''=>"Expected a pattern after the exclamation mark"))
-Hide: `~ (__@pat / @!=(''=>"Expected a pattern after the tilde"))
Nodent: `\ `N
Upto-and: ".." [__@first=simple-pat] [__`%__@second=simple-pat]
Repeat: (
diff --git a/grammars/builtins.bp b/grammars/builtins.bp
index 59104c3..fcc1f20 100644
--- a/grammars/builtins.bp
+++ b/grammars/builtins.bp
@@ -9,26 +9,6 @@ is-text-file: >32(\t/\n/\r/\x20-x7e/!\x00-x7f utf8-codepoint/$$)
# Meta-rules for acting on everything:
pattern: !'' # Not defined by default
replacement: !'' # Not defined by default
-replace-all: (
- (include-binary-files / is-text-file)
- +(..replacement%\n) ..%\n
-)
-find-all: (
- (include-binary-files / is-text-file)
- *(!..pattern ~(..\n))
- +(+(..@pattern) ..(\n/$$) / ~(..\n))
- [!<\n => "\n"]
-)
-find-lines: (
- (include-binary-files / is-text-file)
- *(!(pattern$) ~(..\n))
- +(@pattern (\n/$$) / ~(..\n))
- [!<\n => "\n"]
-)
-only-matches: (
- (include-binary-files / is-text-file)
- +(..@pattern%\n =>'@1\n')
-)
# Helper definitions (commonly used)
url: (
diff --git a/printing.c b/printing.c
index 833a580..487fc74 100644
--- a/printing.c
+++ b/printing.c
@@ -16,21 +16,15 @@ typedef struct match_node_s {
struct match_node_s *next;
} match_node_t;
-typedef struct {
- size_t line, printed_line;
- const char *color;
-} print_state_t;
+static const char *color_hl = "\033[0;31;1m";
+static const char *color_normal = "\033[0m";
__attribute__((nonnull, pure))
static int height_of_match(match_t *m);
__attribute__((nonnull))
static void _visualize_matches(match_node_t *firstmatch, int depth, const char *text, size_t textlen);
-__attribute__((nonnull))
-static void _visualize_patterns(match_t *m);
-__attribute__((nonnull))
-static void print_line_number(FILE *out, print_state_t *state, print_options_t options);
-__attribute__((nonnull))
-static void _print_match(FILE *out, file_t *f, match_t *m, print_state_t *state, print_options_t options);
+__attribute__((nonnull(1,2)))
+static inline void print_line_number(FILE *out, printer_t *pr, size_t line_number, const char *color);
//
// Return the height of a match object (i.e. the number of descendents of the
@@ -170,88 +164,139 @@ static void _visualize_matches(match_node_t *firstmatch, int depth, const char *
}
//
-// Recursively look for references to a rule called "pattern" and print an
-// explanation for each one.
+// Print a visualization of a match object.
//
-static void _visualize_patterns(match_t *m)
+void visualize_match(match_t *m)
{
- if (m->op->type == VM_REF && streq(m->op->args.s, "pattern")) {
- m = m->child;
- match_node_t first = {.m = m};
- _visualize_matches(&first, 0, m->start, (size_t)(m->end - m->start));
+ printf("\033[?7l"); // Disable line wrapping
+ match_node_t first = {.m = m};
+ _visualize_matches(&first, 0, m->start, (size_t)(m->end - m->start));
+ printf("\033[?7h"); // Re-enable line wrapping
+}
+
+//
+// Print a line number, if it needs to be printed.
+// line number of 0 means "just print an empty space for the number"
+//
+__attribute__((nonnull(1,2)))
+static inline void print_line_number(FILE *out, printer_t *pr, size_t line_number, const char *color)
+{
+ if (!pr->print_line_numbers) return;
+ if (!pr->needs_line_number) return;
+ if (line_number == 0) {
+ if (color) fprintf(out, "\033[0;2m \033(0\x78\033(B%s", color);
+ else fprintf(out, " |");
} else {
- for (match_t *c = m->child; c; c = c->nextsibling)
- _visualize_patterns(c);
+ if (color) fprintf(out, "\033[0;2m% 5ld\033(0\x78\033(B%s", line_number, color);
+ else fprintf(out, "% 5ld|", line_number);
}
+ pr->needs_line_number = 0;
}
+//
+// Print a range of text from a file, adding line numbers if necessary.
//
-// For a match object, print a visual explanation for each "pattern" matched
-// inside it.
+__attribute__((nonnull(1,2,3,4)))
+static void print_between(FILE *out, printer_t *pr, const char *start, const char *end, const char *color)
+{
+ file_t *f = pr->file;
+ while (start < end) {
+ size_t line_num = get_line_number(f, start);
+ print_line_number(out, pr, line_num, color);
+ const char *eol = get_line(pr->file, line_num + 1);
+ if (!eol || eol > end) eol = end;
+ if (color) fprintf(out, "%s", color);
+ fprintf(out, "%.*s", (int)(eol - start), start);
+ if (eol[-1] == '\n')
+ pr->needs_line_number = 1;
+ start = eol;
+ }
+ pr->pos = end;
+}
+
//
-void visualize_match(match_t *m)
+// Return a pointer to the first character of context information before `pos`,
+// according to the context settings in `pr`
+//
+static const char *context_before(printer_t *pr, const char *pos)
{
- printf("\033[?7l");
- _visualize_patterns(m);
- printf("\033[?7h");
+ if (pr->context_lines == -1) {
+ return pr->pos;
+ } else if (pr->context_lines > 0) {
+ size_t n = get_line_number(pr->file, pos);
+ if (n >= (size_t)((pr->context_lines - 1) + 1))
+ n -= (size_t)(pr->context_lines - 1);
+ else
+ n = 1;
+ const char *sol = get_line(pr->file, n);
+ if (sol == NULL || sol < pr->pos) sol = pr->pos;
+ return sol;
+ } else {
+ return pos;
+ }
}
//
-// Print a line number.
+// Return a pointer to the last character of context information after `pos`,
+// according to the context settings in `pr`
//
-static void print_line_number(FILE *out, print_state_t *state, print_options_t options)
+static const char *context_after(printer_t *pr, const char *pos)
{
- state->printed_line = state->line;
- if (!(options & PRINT_LINE_NUMBERS)) return;
- if (options & PRINT_COLOR)
- fprintf(out, "\033[0;2m% 5ld\033(0\x78\033(B%s", state->line, state->color);
- else
- fprintf(out, "% 5ld|", state->line);
+ if (pr->context_lines == -1) {
+ return pr->file->end;
+ } else if (pr->context_lines > 0) {
+ size_t n = get_line_number(pr->file, pos) + (size_t)(pr->context_lines - 1);
+ const char *eol = get_line(pr->file, n+1);
+ return eol ? eol : pr->file->end;
+ } else {
+ return pos;
+ }
}
//
-// Helper function for print_match(), using a struct to keep track of some state.
+// Print the text of a match (no context).
//
-static void _print_match(FILE *out, file_t *f, match_t *m, print_state_t *state, print_options_t options)
+void _print_match(FILE *out, printer_t *pr, match_t *m)
{
- static const char *hl = "\033[0;31;1m";
- const char *old_color = state->color;
- if (m->op->type == VM_HIDE) {
- // TODO: handle replacements?
- for (const char *p = m->start; p < m->end; p++) {
- if (*p == '\n') ++state->line;
- }
- } else if (m->op->type == VM_REPLACE) {
- if (options & PRINT_COLOR && state->color != hl) {
- state->color = hl;
- fprintf(out, "%s", state->color);
- }
+ pr->pos = m->start;
+ if (m->op->type == VM_REPLACE) {
+ size_t line_start = get_line_number(pr->file, m->start);
+ size_t line_end = get_line_number(pr->file, m->end);
+ size_t line = line_start;
+
+ if (pr->use_color) printf("%s", color_hl);
const char *text = m->op->args.replace.text;
const char *end = &text[m->op->args.replace.len];
+
+ // TODO: clean up the line numbering code
for (const char *r = text; r < end; ) {
+ print_line_number(out, pr, line > line_end ? 0 : line, pr->use_color ? color_hl : NULL);
+
+ // Capture substitution
if (*r == '@' && r[1] && r[1] != '@') {
++r;
match_t *cap = get_capture(m, &r);
if (cap != NULL) {
- _print_match(out, f, cap, state, options);
+ print_match(out, pr, cap);
continue;
} else {
--r;
}
}
- if (state->printed_line != state->line)
- print_line_number(out, state, options);
-
if (*r == '\\') {
++r;
unsigned char c = unescapechar(r, &r);
fputc(c, out);
- if (c == '\n') ++state->line;
+ if (c == '\n') {
+ ++line;
+ pr->needs_line_number = 1;
+ }
continue;
} else if (*r == '\n') {
fputc('\n', out);
- ++state->line;
+ ++line;
+ pr->needs_line_number = 1;
++r;
continue;
} else {
@@ -260,70 +305,74 @@ static void _print_match(FILE *out, file_t *f, match_t *m, print_state_t *state,
continue;
}
}
+ print_line_number(out, pr, line > line_end ? 0 : line, pr->use_color ? color_hl : NULL);
} else {
- if (m->op->type == VM_CAPTURE) {
- if (options & PRINT_COLOR && state->color != hl) {
- state->color = hl;
- fprintf(out, "%s", state->color);
- }
- }
-
const char *prev = m->start;
for (match_t *child = m->child; child; child = child->nextsibling) {
// Skip children from e.g. zero-width matches like >@foo
if (!(prev <= child->start && child->start <= m->end &&
prev <= child->end && child->end <= m->end))
continue;
- if (child->start > prev) {
- for (const char *p = prev; p < child->start; ++p) {
- if (state->printed_line != state->line)
- print_line_number(out, state, options);
- fputc(*p, out);
- if (*p == '\n') ++state->line;
- }
- }
- _print_match(out, f, child, state, options);
+ if (child->start > prev)
+ print_between(out, pr, prev, child->start, pr->use_color ? color_hl : NULL);
+ print_match(out, pr, child);
prev = child->end;
}
- if (m->end > prev) {
- for (const char *p = prev; p < m->end; ++p) {
- if (state->printed_line != state->line)
- print_line_number(out, state, options);
- fputc(*p, out);
- if (*p == '\n') ++state->line;
- }
- }
- }
- if (options & PRINT_COLOR && old_color != state->color) {
- fprintf(out, "%s", old_color);
- state->color = old_color;
+ if (m->end > prev)
+ print_between(out, pr, prev, m->end, pr->use_color ? color_hl : NULL);
}
+ pr->pos = m->end;
}
//
-// Print a match with replacements and highlighting.
+// Print the text of a match and any context.
//
-void print_match(FILE *out, file_t *f, match_t *m, print_options_t options)
+void print_match(FILE *out, printer_t *pr, match_t *m)
{
- print_state_t state = {.line = 1, .color = "\033[0m"};
- _print_match(out, f, m, &state, options);
+ int first = (pr->pos == NULL);
+ if (first) { // First match printed:
+ pr->pos = pr->file->contents;
+ pr->needs_line_number = 1;
+ }
+ if (m) {
+ const char *before_m = context_before(pr, m->start);
+ if (!first) {
+ const char *after_last = context_after(pr, pr->pos);
+ if (after_last >= before_m) {
+ // Overlapping ranges:
+ before_m = pr->pos;
+ } else {
+ // Non-overlapping ranges:
+ print_between(out, pr, pr->pos, after_last, pr->use_color ? color_normal : NULL);
+ if (pr->context_lines > 1)
+ printf("\n"); // Gap between chunks
+ }
+ }
+ print_between(out, pr, before_m, m->start, pr->use_color ? color_normal : NULL);
+ _print_match(out, pr, m);
+ if (pr->use_color) printf("%s", color_normal);
+ } else {
+ // After the last match is printed, print the trailing context:
+ const char *after_last = context_after(pr, pr->pos);
+ print_between(out, pr, pr->pos, after_last, pr->use_color ? color_normal : NULL);
+ }
}
//
// Print any errors that are present in the given match object.
//
-int print_errors(file_t *f, match_t *m, print_options_t options)
+int print_errors(printer_t *pr, match_t *m)
{
int ret = 0;
if (m->op->type == VM_CAPTURE && m->op->args.capture.name && streq(m->op->args.capture.name, "!")) {
printf("\033[31;1m");
- print_match(stdout, f, m, options);
+ print_match(stdout, pr, m);
printf("\033[0m\n");
- fprint_line(stdout, f, m->start, m->end, " ");
+ fprint_line(stdout, pr->file, m->start, m->end, " ");
return 1;
}
- if (m->child) ret += print_errors(f, m->child, options);
- if (m->nextsibling) ret += print_errors(f, m->nextsibling, options);
+ if (m->child) ret += print_errors(pr, m->child);
+ if (m->nextsibling) ret += print_errors(pr, m->nextsibling);
return ret;
}
diff --git a/printing.h b/printing.h
index f81dcef..822285e 100644
--- a/printing.h
+++ b/printing.h
@@ -6,17 +6,21 @@
#include "types.h"
-typedef enum {
- PRINT_COLOR = 1<<0,
- PRINT_LINE_NUMBERS = 1<<1,
-} print_options_t;
+typedef struct {
+ file_t *file;
+ const char *pos;
+ int context_lines;
+ unsigned int needs_line_number:1;
+ unsigned int use_color:1;
+ unsigned int print_line_numbers:1;
+} printer_t;
__attribute__((nonnull))
void visualize_match(match_t *m);
+__attribute__((nonnull(1,2)))
+void print_match(FILE *out, printer_t *pr, match_t *m);
__attribute__((nonnull))
-void print_match(FILE *out, file_t *f, match_t *m, print_options_t options);
-__attribute__((nonnull))
-int print_errors(file_t *f, match_t *m, print_options_t options);
+int print_errors(printer_t *pr, match_t *m);
#endif
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1
diff --git a/types.h b/types.h
index 65dcb5a..244b4b7 100644
--- a/types.h
+++ b/types.h
@@ -30,7 +30,6 @@ enum VMOpcode {
VM_BEFORE,
VM_AFTER,
VM_CAPTURE,
- VM_HIDE,
VM_OTHERWISE,
VM_CHAIN,
VM_EQUAL,
diff --git a/vm.c b/vm.c
index b0bc267..346a61a 100644
--- a/vm.c
+++ b/vm.c
@@ -122,6 +122,25 @@ static const char *match_backref(const char *str, vm_op_t *op, match_t *cap, uns
//
+// Find the next match after prev (or the first match if prev is NULL)
+//
+match_t *next_match(def_t *defs, file_t *f, match_t *prev, vm_op_t *op, unsigned int flags)
+{
+ const char *str;
+ if (prev) {
+ str = prev->end > prev->start ? prev->end : prev->end + 1;
+ recycle_if_unused(&prev);
+ } else {
+ str = f->contents;
+ }
+ for (; str < f->end; ++str) {
+ match_t *m = match(defs, f, str, op, flags);
+ if (m) return m;
+ }
+ return NULL;
+}
+
+//
// Run virtual machine operation against a string and return
// a match struct, or NULL if no match is found.
// The returned value should be free()'d to avoid memory leaking.
@@ -316,16 +335,6 @@ match_t *match(def_t *defs, file_t *f, const char *str, vm_op_t *op, unsigned in
ADD_OWNER(m->child, p);
return m;
}
- case VM_HIDE: {
- match_t *p = match(defs, f, str, op->args.pat, flags);
- if (p == NULL) return NULL;
- match_t *m = new_match();
- m->start = str;
- m->end = p->end;
- m->op = op;
- ADD_OWNER(m->child, p);
- return m;
- }
case VM_OTHERWISE: {
match_t *m = match(defs, f, str, op->args.multiple.first, flags);
if (m == NULL) m = match(defs, f, str, op->args.multiple.second, flags);
diff --git a/vm.h b/vm.h
index 3777492..de167b4 100644
--- a/vm.h
+++ b/vm.h
@@ -8,6 +8,8 @@
#include "types.h"
+__attribute__((nonnull(2,4)))
+match_t *next_match(def_t *defs, file_t *f, match_t *prev, vm_op_t *op, unsigned int flags);
__attribute__((hot, nonnull(2,3,4)))
match_t *match(def_t *defs, file_t *f, const char *str, vm_op_t *op, unsigned int flags);
__attribute__((nonnull))