diff options
| -rw-r--r-- | README.md | 2 | ||||
| -rw-r--r-- | bp.1 | 12 | ||||
| -rw-r--r-- | bp.c | 252 | ||||
| -rw-r--r-- | compiler.c | 11 | ||||
| -rw-r--r-- | grammars/bpeg.bp | 3 | ||||
| -rw-r--r-- | grammars/builtins.bp | 20 | ||||
| -rw-r--r-- | printing.c | 229 | ||||
| -rw-r--r-- | printing.h | 18 | ||||
| -rw-r--r-- | types.h | 1 | ||||
| -rw-r--r-- | vm.c | 29 | ||||
| -rw-r--r-- | vm.h | 2 |
11 files changed, 367 insertions, 212 deletions
@@ -19,7 +19,7 @@ It's written in pure C with no dependencies. * `-p` `--pattern <pat>` provide a pattern (equivalent to `bp '\(<pat>)'`) * `-P` `--pattern-string <pat>` provide a string pattern (equivalent to `bp '<pat>'`, but may be useful if `'<pat>'` begins with a '-') * `-r` `--replace <replacement>` replace the input pattern with the given replacement -* `-m` `--mode <mode>` set the behavior mode (defult: `find-all`) +* `-c` `--context <N>` change how many lines of context are printed (`0`: no context, `all`: the whole file, `<N>` matching lines and `<N-1>` lines before/after) * `-g` `--grammar <grammar file>` use the specified file as a grammar See `man ./bp.1` for more details. @@ -18,7 +18,7 @@ bp \- Bruce's Parsing Expression Grammar tool [\fI-D\fR|\fI--define-string\fR \fI<name>\fR:\fI<string-pattern>\fR] [\fI-r\fR|\fI--replace\fR \fI<replacement>\fR] [\fI-g\fR|\fI--grammar\fR \fI<grammar file>\fR] -[\fI-m\fR|\fI--mode\fR \fI<mode>\fR] +[\fI-c\fR|\fI--conntext\fR \fI<N>\fR] \fI<pattern\fR [[--] \fI<input files...>\fR] .SH DESCRIPTION @@ -54,10 +54,12 @@ Replace all occurrences of the main pattern with the given string. .B \-g\fR, \fB--grammar \fI<grammar file>\fR Load the grammar from the given file. -.B \-m\fR, \fB--mode \fI<mode>\fR -The mode to operate in. Options are: \fIfind-all\fR (the default), -\fIonly-matches\fR, \fIpattern\fR, \fIreplacement\fR, \fIreplace-all\fR -(implied by \fB--replace\fR), or any other grammar rule name. +.B \-c\fR, \fB--context \fI<N>\fR +The number of lines of context to print. If \fI<N>\fR is 0, print only the +exact text of the matches. If \fI<N>\fR is "all", print the entire file. +Otherwise, if \fI<N>\fR is a positive integer, print the whole line on which +matches occur, as well as the \fI<N-1>\fR lines before and after the match. The +default value for this argument is 1 (print whole lines where matches occur). .B \--help Print the usage and exit. @@ -36,15 +36,16 @@ static const char *usage = ( " -p --pattern <pat> provide a pattern (equivalent to bp '\\(<pat>)')\n" " -P --pattern-string <pat> provide a string pattern (may be useful if '<pat>' begins with a '-')\n" " -r --replace <replacement> replace the input pattern with the given replacement\n" - " -m --mode <mode> set the behavior mode (defult: find-all)\n" + " -c --context <context> set number of lines of context to print (all: the whole file, 0: only the match, 1: the line, N: N lines of context)\n" " -g --grammar <grammar file> use the specified file as a grammar\n"); -static print_options_t print_options = 0; +#define USE_DEFAULT_CONTEXT -2 +#define ALL_CONTEXT -1 +static int print_color = 0; +static int print_line_numbers = 0; __attribute__((nonnull)) static char *getflag(const char *flag, char *argv[], int *i); -__attribute__((nonnull(3))) -static int process_file(def_t *defs, const char *filename, vm_op_t *pattern, unsigned int flags); // // Return a pointer to the value part of a flag, if present, otherwise NULL. @@ -67,65 +68,172 @@ static char *getflag(const char *flag, char *argv[], int *i) } // +// Scan the first few dozen bytes of a file and return 1 if the contents all +// look like printable text characters, otherwise return 0. +// +static int is_text_file(const char *filename) +{ + int fd = open(filename, O_RDONLY); + if (fd < 0) return 0; + unsigned char buf[64]; + int len = read(fd, buf, sizeof(buf)/sizeof(unsigned char)); + if (len < 0) return 0; + (void)close(fd); + + for (int i = 0; i < len; i++) { + if (!(buf[i] == '\t' || buf[i] == '\n' || buf[i] == '\r' + || buf[i] >= '\x20')) + return 0; + } + return 1; +} + +// +// Print matches in JSON format. +// +static int print_matches_as_json(def_t *defs, file_t *f, vm_op_t *pattern, unsigned int flags) +{ + int matches = 0; + for (match_t *m = NULL; (m = next_match(defs, f, m, pattern, flags)); ) { + if (++matches > 1) + printf(",\n"); + printf("{\"filename\":\"%s\",", f->filename ? f->filename : "-"); + printf("\"tree\":{\"rule\":\"text\",\"start\":%d,\"end\":%ld,\"children\":[", + 0, f->end - f->contents); + json_match(f->contents, m, (flags & BP_VERBOSE) ? 1 : 0); + printf("]}}\n"); + } + return matches; +} + +// +// Print matches in a visual explanation style +// +static int explain_matches(def_t *defs, file_t *f, vm_op_t *pattern, unsigned int flags) +{ + int matches = 0; + for (match_t *m = NULL; (m = next_match(defs, f, m, pattern, flags)); ) { + if (++matches == 1) { + if (print_color) + printf("\033[0;1;4;33m%s\033[0m\n", f->filename); + else + printf("%s:\n", f->filename); + } else { + printf("\n\n"); + } + visualize_match(m); + } + return matches; +} + +// +// Replace a file's contents with the text version of a match. +// (Useful for replacements) +// +static int inplace_modify_file(def_t *defs, file_t *f, vm_op_t *pattern, int context, unsigned int flags) +{ + // Need to do this before matching: + intern_file(f); + + printer_t pr = { + .file = f, + .context_lines = context, + .use_color = 0, + .print_line_numbers = 0, + }; + + FILE *inplace_file = NULL; // Lazy-open this on the first match + int matches = 0; + for (match_t *m = NULL; (m = next_match(defs, f, m, pattern, flags)); ) { + ++matches; + if (print_errors(&pr, m) > 0) + exit(1); + // Lazy-open file for writing upon first match: + if (inplace_file == NULL) { + inplace_file = fopen(f->filename, "w"); + check(inplace_file, "Could not open file for writing: %s\n", f->filename); + } + print_match(inplace_file, &pr, m); + } + + if (inplace_file) { + printf("%s\n", f->filename); + fclose(inplace_file); + } + return matches; +} + +// +// Print all the matches in a file. +// +static int print_matches(def_t *defs, file_t *f, vm_op_t *pattern, int context, unsigned int flags) +{ + static int printed_filenames = 0; + int matches = 0; + printer_t pr = { + .file = f, + .context_lines = context, + .use_color = print_color, + .print_line_numbers = print_line_numbers, + }; + + for (match_t *m = NULL; (m = next_match(defs, f, m, pattern, flags)); ) { + if (print_errors(&pr, m) > 0) + exit(1); + + if (++matches == 1) { + if (printed_filenames++ > 0) printf("\n"); + if (print_color) + printf("\033[0;1;4;33m%s\033[0m\n", f->filename); + else + printf("%s:\n", f->filename); + } + print_match(stdout, &pr, m); + } + + if (matches > 0) { + // Print trailing context lines: + print_match(stdout, &pr, NULL); + // Ensure a trailing newline: + if (pr.pos > f->contents && pr.pos[-1] != '\n') printf("\n"); + } + + return matches; +} + +// // For a given filename, open the file and attempt to match the given pattern // against it, printing any results according to the flags. // -static int process_file(def_t *defs, const char *filename, vm_op_t *pattern, unsigned int flags) +static int process_file(def_t *defs, const char *filename, vm_op_t *pattern, int context, unsigned int flags) { - static int printed_matches = 0; - int success = 0; file_t *f = load_file(NULL, filename); check(f, "Could not open file: %s", filename); - if (flags & BP_INPLACE) // Need to do this before matching - intern_file(f); - match_t *m = match(defs, f, f->contents, pattern, flags); - if (m && print_errors(f, m, print_options) > 0) - exit(1); - - if (m != NULL && m->end > m->start + 1) { - success = 1; - ++printed_matches; - - if (flags & BP_EXPLAIN) { - if (filename) - printf("\033[1;4m%s\033[0m\n", filename); - visualize_match(m); - } else if (flags & BP_LISTFILES) { - printf("%s\n", filename); - } else if (flags & BP_JSON) { - if (printed_matches > 1) - printf(",\n"); - printf("{\"filename\":\"%s\",", filename ? filename : "-"); - printf("\"tree\":{\"rule\":\"text\",\"start\":%d,\"end\":%ld,\"children\":[", - 0, f->end - f->contents); - json_match(f->contents, m, (flags & BP_VERBOSE) ? 1 : 0); - printf("]}}\n"); - } else if (flags & BP_INPLACE && filename) { - FILE *out = fopen(filename, "w"); - print_match(out, f, m, 0); - fclose(out); - printf("%s\n", filename); - } else { - if (printed_matches > 1) - fputc('\n', stdout); - if (filename) { - if (print_options & PRINT_COLOR) - printf("\033[1;4;33m%s\033[0m\n", filename); - else - printf("%s:\n", filename); - } - print_match(stdout, f, m, - filename ? print_options : print_options & (print_options_t)~PRINT_LINE_NUMBERS); + + int matches = 0; + if (flags & BP_EXPLAIN) { + matches += explain_matches(defs, f, pattern, flags); + } else if (flags & BP_LISTFILES) { + match_t *m = next_match(defs, f, NULL, pattern, flags); + if (m) { + recycle_if_unused(&m); + printf("%s\n", f->filename); + matches += 1; } + } else if (flags & BP_JSON) { + matches += print_matches_as_json(defs, f, pattern, flags); + } else if (flags & BP_INPLACE) { + matches += inplace_modify_file(defs, f, pattern, context, flags); + } else { + matches += print_matches(defs, f, pattern, context, flags); } - recycle_if_unused(&m); #ifdef DEBUG_HEAP check(recycle_all_matches() == 0, "Memory leak: there should no longer be any matches in use at this point."); #endif destroy_file(&f); - - return success; + fflush(stdout); + return matches; } #define FLAG(f) (flag=getflag((f), argv, &i)) @@ -133,9 +241,9 @@ static int process_file(def_t *defs, const char *filename, vm_op_t *pattern, uns int main(int argc, char *argv[]) { unsigned int flags = 0; + int context = USE_DEFAULT_CONTEXT; char *flag = NULL; char path[PATH_MAX] = {0}; - const char *mode = "find-all"; def_t *defs = NULL; @@ -145,6 +253,10 @@ int main(int argc, char *argv[]) file_t *pat_file = spoof_file(&loaded_files, "<pattern>", "pattern"); vm_op_t *pattern = bp_pattern(pat_file, pat_file->contents); + // Define an opcode that is just a reference to the rule `replacement` + file_t *rep_file = spoof_file(&loaded_files, "<replacement>", "replacement"); + vm_op_t *replacement = bp_pattern(rep_file, rep_file->contents); + // Load builtins: if (access("/etc/xdg/bp/builtins.bp", R_OK) != -1) { file_t *f = load_file(&loaded_files, "/etc/xdg/bp/builtins.bp"); @@ -174,6 +286,7 @@ int main(int argc, char *argv[]) flags |= BP_JSON; } else if (streq(argv[i], "--inplace")) { flags |= BP_INPLACE; + context = ALL_CONTEXT; } else if (streq(argv[i], "--ignore-case")) { flags |= BP_IGNORECASE; } else if (streq(argv[i], "--list-files")) { @@ -185,7 +298,8 @@ int main(int argc, char *argv[]) vm_op_t *rep = bp_replacement(replace_file, pattern, replace_file->contents); check(rep, "Replacement failed to compile: %s", flag); defs = with_def(defs, replace_file, strlen("replacement"), "replacement", rep); - mode = "replace-all"; + pattern = replacement; + if (context == USE_DEFAULT_CONTEXT) context = 1; } else if (FLAG("--grammar") || FLAG("-g")) { file_t *f = load_file(&loaded_files, flag); if (f == NULL) { @@ -231,8 +345,11 @@ int main(int argc, char *argv[]) check(p, "Pattern failed to compile: %s", flag); defs = with_def(defs, arg_file, strlen("pattern"), "pattern", p); ++npatterns; - } else if (FLAG("--mode") || FLAG("-m")) { - mode = flag; + } else if (FLAG("--context") || FLAG("-c")) { + if (streq(flag, "all")) + context = ALL_CONTEXT; + else + context = (int)strtol(flag, NULL, 10); } else if (argv[i][0] == '-' && argv[i][1] && argv[i][1] != '-') { // single-char flags for (char *c = &argv[i][1]; *c; ++c) { switch (*c) { @@ -240,7 +357,7 @@ int main(int argc, char *argv[]) case 'v': flags |= BP_VERBOSE; break; // -v case 'e': flags |= BP_EXPLAIN; break; // -e case 'j': flags |= BP_JSON; break; // -j - case 'I': flags |= BP_INPLACE; break; // -I + case 'I': flags |= BP_INPLACE; context = ALL_CONTEXT; break; // -I case 'i': flags |= BP_IGNORECASE; break; // -i case 'l': flags |= BP_LISTFILES; break; // -l default: @@ -268,24 +385,28 @@ int main(int argc, char *argv[]) return 1; } - if (isatty(STDOUT_FILENO)) { - print_options |= PRINT_COLOR | PRINT_LINE_NUMBERS; + if (context < 0) { + if (context == USE_DEFAULT_CONTEXT) context = 1; + else if (context != ALL_CONTEXT) context = 0; } - // Define an opcode that is just a reference to the overarching mode (e.g. find-all) - if (lookup(defs, mode) == NULL) { - printf("The mode '%s' is not defined.\n", mode); + if (flags & BP_INPLACE && context != ALL_CONTEXT) { + printf("--inplace and --context are mutually exclusive.\n" + "Please drop one of the two arguments and try again.\n"); return 1; } - file_t *mode_file = spoof_file(&loaded_files, "<mode>", mode); - vm_op_t *mode_op = bp_pattern(mode_file, mode_file->contents); + + if (isatty(STDOUT_FILENO)) { + print_color = 1; + print_line_numbers = 1; + } int found = 0; if (flags & BP_JSON) printf("["); if (i < argc) { // Files pass in as command line args: for (int nfiles = 0; i < argc; nfiles++, i++) { - found += process_file(defs, argv[i], mode_op, flags); + found += process_file(defs, argv[i], pattern, context, flags); } } else if (isatty(STDIN_FILENO)) { // No files, no piped in input, so use * **/*: @@ -293,12 +414,13 @@ int main(int argc, char *argv[]) glob("*", 0, NULL, &globbuf); glob("**/*", GLOB_APPEND, NULL, &globbuf); for (size_t i = 0; i < globbuf.gl_pathc; i++) { - found += process_file(defs, globbuf.gl_pathv[i], mode_op, flags); + if (is_text_file(globbuf.gl_pathv[i])) + found += process_file(defs, globbuf.gl_pathv[i], pattern, context, flags); } globfree(&globbuf); } else { // Piped in input: - found += process_file(defs, NULL, mode_op, flags); + found += process_file(defs, NULL, pattern, context, flags); } if (flags & BP_JSON) printf("]\n"); @@ -465,17 +465,6 @@ static vm_op_t *_bp_simplepattern(file_t *f, const char *str) op->end = pat->end; return op; } - // Hide - case '~': { - vm_op_t *pat = bp_simplepattern(f, str); - if (!pat) - file_err(f, str, str, "There should be a pattern after this '~'"); - vm_op_t *op = new_op(f, start, VM_HIDE); - op->len = 0; - op->args.pat = pat; - op->end = pat->end; - return op; - } // Special rules: case '_': case '^': case '$': case '|': { const char *name = NULL; diff --git a/grammars/bpeg.bp b/grammars/bpeg.bp index 23a2250..e8d7259 100644 --- a/grammars/bpeg.bp +++ b/grammars/bpeg.bp @@ -11,7 +11,7 @@ String-pattern: ..$$ % (\n / Nodent / Escape / `\ pat [`;]) pat: simple-pat !(__("!="/"==")) / suffixed-pat simple-pat: Upto-and / Dot / String / Chars / Nodent / Escape-range - / Escape / Repeat / Optional / No / Hide / After / Before / Capture + / Escape / Repeat / Optional / No / After / Before / Capture / Ref / parens suffixed-pat: ( @@ -41,7 +41,6 @@ escape-sequence: ( / `x 2 `0-9,a-f,A-F ) No: `! (__@pat / @!=(''=>"Expected a pattern after the exclamation mark")) -Hide: `~ (__@pat / @!=(''=>"Expected a pattern after the tilde")) Nodent: `\ `N Upto-and: ".." [__@first=simple-pat] [__`%__@second=simple-pat] Repeat: ( diff --git a/grammars/builtins.bp b/grammars/builtins.bp index 59104c3..fcc1f20 100644 --- a/grammars/builtins.bp +++ b/grammars/builtins.bp @@ -9,26 +9,6 @@ is-text-file: >32(\t/\n/\r/\x20-x7e/!\x00-x7f utf8-codepoint/$$) # Meta-rules for acting on everything: pattern: !'' # Not defined by default replacement: !'' # Not defined by default -replace-all: ( - (include-binary-files / is-text-file) - +(..replacement%\n) ..%\n -) -find-all: ( - (include-binary-files / is-text-file) - *(!..pattern ~(..\n)) - +(+(..@pattern) ..(\n/$$) / ~(..\n)) - [!<\n => "\n"] -) -find-lines: ( - (include-binary-files / is-text-file) - *(!(pattern$) ~(..\n)) - +(@pattern (\n/$$) / ~(..\n)) - [!<\n => "\n"] -) -only-matches: ( - (include-binary-files / is-text-file) - +(..@pattern%\n =>'@1\n') -) # Helper definitions (commonly used) url: ( @@ -16,21 +16,15 @@ typedef struct match_node_s { struct match_node_s *next; } match_node_t; -typedef struct { - size_t line, printed_line; - const char *color; -} print_state_t; +static const char *color_hl = "\033[0;31;1m"; +static const char *color_normal = "\033[0m"; __attribute__((nonnull, pure)) static int height_of_match(match_t *m); __attribute__((nonnull)) static void _visualize_matches(match_node_t *firstmatch, int depth, const char *text, size_t textlen); -__attribute__((nonnull)) -static void _visualize_patterns(match_t *m); -__attribute__((nonnull)) -static void print_line_number(FILE *out, print_state_t *state, print_options_t options); -__attribute__((nonnull)) -static void _print_match(FILE *out, file_t *f, match_t *m, print_state_t *state, print_options_t options); +__attribute__((nonnull(1,2))) +static inline void print_line_number(FILE *out, printer_t *pr, size_t line_number, const char *color); // // Return the height of a match object (i.e. the number of descendents of the @@ -170,88 +164,139 @@ static void _visualize_matches(match_node_t *firstmatch, int depth, const char * } // -// Recursively look for references to a rule called "pattern" and print an -// explanation for each one. +// Print a visualization of a match object. // -static void _visualize_patterns(match_t *m) +void visualize_match(match_t *m) { - if (m->op->type == VM_REF && streq(m->op->args.s, "pattern")) { - m = m->child; - match_node_t first = {.m = m}; - _visualize_matches(&first, 0, m->start, (size_t)(m->end - m->start)); + printf("\033[?7l"); // Disable line wrapping + match_node_t first = {.m = m}; + _visualize_matches(&first, 0, m->start, (size_t)(m->end - m->start)); + printf("\033[?7h"); // Re-enable line wrapping +} + +// +// Print a line number, if it needs to be printed. +// line number of 0 means "just print an empty space for the number" +// +__attribute__((nonnull(1,2))) +static inline void print_line_number(FILE *out, printer_t *pr, size_t line_number, const char *color) +{ + if (!pr->print_line_numbers) return; + if (!pr->needs_line_number) return; + if (line_number == 0) { + if (color) fprintf(out, "\033[0;2m \033(0\x78\033(B%s", color); + else fprintf(out, " |"); } else { - for (match_t *c = m->child; c; c = c->nextsibling) - _visualize_patterns(c); + if (color) fprintf(out, "\033[0;2m% 5ld\033(0\x78\033(B%s", line_number, color); + else fprintf(out, "% 5ld|", line_number); } + pr->needs_line_number = 0; } +// +// Print a range of text from a file, adding line numbers if necessary. // -// For a match object, print a visual explanation for each "pattern" matched -// inside it. +__attribute__((nonnull(1,2,3,4))) +static void print_between(FILE *out, printer_t *pr, const char *start, const char *end, const char *color) +{ + file_t *f = pr->file; + while (start < end) { + size_t line_num = get_line_number(f, start); + print_line_number(out, pr, line_num, color); + const char *eol = get_line(pr->file, line_num + 1); + if (!eol || eol > end) eol = end; + if (color) fprintf(out, "%s", color); + fprintf(out, "%.*s", (int)(eol - start), start); + if (eol[-1] == '\n') + pr->needs_line_number = 1; + start = eol; + } + pr->pos = end; +} + // -void visualize_match(match_t *m) +// Return a pointer to the first character of context information before `pos`, +// according to the context settings in `pr` +// +static const char *context_before(printer_t *pr, const char *pos) { - printf("\033[?7l"); - _visualize_patterns(m); - printf("\033[?7h"); + if (pr->context_lines == -1) { + return pr->pos; + } else if (pr->context_lines > 0) { + size_t n = get_line_number(pr->file, pos); + if (n >= (size_t)((pr->context_lines - 1) + 1)) + n -= (size_t)(pr->context_lines - 1); + else + n = 1; + const char *sol = get_line(pr->file, n); + if (sol == NULL || sol < pr->pos) sol = pr->pos; + return sol; + } else { + return pos; + } } // -// Print a line number. +// Return a pointer to the last character of context information after `pos`, +// according to the context settings in `pr` // -static void print_line_number(FILE *out, print_state_t *state, print_options_t options) +static const char *context_after(printer_t *pr, const char *pos) { - state->printed_line = state->line; - if (!(options & PRINT_LINE_NUMBERS)) return; - if (options & PRINT_COLOR) - fprintf(out, "\033[0;2m% 5ld\033(0\x78\033(B%s", state->line, state->color); - else - fprintf(out, "% 5ld|", state->line); + if (pr->context_lines == -1) { + return pr->file->end; + } else if (pr->context_lines > 0) { + size_t n = get_line_number(pr->file, pos) + (size_t)(pr->context_lines - 1); + const char *eol = get_line(pr->file, n+1); + return eol ? eol : pr->file->end; + } else { + return pos; + } } // -// Helper function for print_match(), using a struct to keep track of some state. +// Print the text of a match (no context). // -static void _print_match(FILE *out, file_t *f, match_t *m, print_state_t *state, print_options_t options) +void _print_match(FILE *out, printer_t *pr, match_t *m) { - static const char *hl = "\033[0;31;1m"; - const char *old_color = state->color; - if (m->op->type == VM_HIDE) { - // TODO: handle replacements? - for (const char *p = m->start; p < m->end; p++) { - if (*p == '\n') ++state->line; - } - } else if (m->op->type == VM_REPLACE) { - if (options & PRINT_COLOR && state->color != hl) { - state->color = hl; - fprintf(out, "%s", state->color); - } + pr->pos = m->start; + if (m->op->type == VM_REPLACE) { + size_t line_start = get_line_number(pr->file, m->start); + size_t line_end = get_line_number(pr->file, m->end); + size_t line = line_start; + + if (pr->use_color) printf("%s", color_hl); const char *text = m->op->args.replace.text; const char *end = &text[m->op->args.replace.len]; + + // TODO: clean up the line numbering code for (const char *r = text; r < end; ) { + print_line_number(out, pr, line > line_end ? 0 : line, pr->use_color ? color_hl : NULL); + + // Capture substitution if (*r == '@' && r[1] && r[1] != '@') { ++r; match_t *cap = get_capture(m, &r); if (cap != NULL) { - _print_match(out, f, cap, state, options); + print_match(out, pr, cap); continue; } else { --r; } } - if (state->printed_line != state->line) - print_line_number(out, state, options); - if (*r == '\\') { ++r; unsigned char c = unescapechar(r, &r); fputc(c, out); - if (c == '\n') ++state->line; + if (c == '\n') { + ++line; + pr->needs_line_number = 1; + } continue; } else if (*r == '\n') { fputc('\n', out); - ++state->line; + ++line; + pr->needs_line_number = 1; ++r; continue; } else { @@ -260,70 +305,74 @@ static void _print_match(FILE *out, file_t *f, match_t *m, print_state_t *state, continue; } } + print_line_number(out, pr, line > line_end ? 0 : line, pr->use_color ? color_hl : NULL); } else { - if (m->op->type == VM_CAPTURE) { - if (options & PRINT_COLOR && state->color != hl) { - state->color = hl; - fprintf(out, "%s", state->color); - } - } - const char *prev = m->start; for (match_t *child = m->child; child; child = child->nextsibling) { // Skip children from e.g. zero-width matches like >@foo if (!(prev <= child->start && child->start <= m->end && prev <= child->end && child->end <= m->end)) continue; - if (child->start > prev) { - for (const char *p = prev; p < child->start; ++p) { - if (state->printed_line != state->line) - print_line_number(out, state, options); - fputc(*p, out); - if (*p == '\n') ++state->line; - } - } - _print_match(out, f, child, state, options); + if (child->start > prev) + print_between(out, pr, prev, child->start, pr->use_color ? color_hl : NULL); + print_match(out, pr, child); prev = child->end; } - if (m->end > prev) { - for (const char *p = prev; p < m->end; ++p) { - if (state->printed_line != state->line) - print_line_number(out, state, options); - fputc(*p, out); - if (*p == '\n') ++state->line; - } - } - } - if (options & PRINT_COLOR && old_color != state->color) { - fprintf(out, "%s", old_color); - state->color = old_color; + if (m->end > prev) + print_between(out, pr, prev, m->end, pr->use_color ? color_hl : NULL); } + pr->pos = m->end; } // -// Print a match with replacements and highlighting. +// Print the text of a match and any context. // -void print_match(FILE *out, file_t *f, match_t *m, print_options_t options) +void print_match(FILE *out, printer_t *pr, match_t *m) { - print_state_t state = {.line = 1, .color = "\033[0m"}; - _print_match(out, f, m, &state, options); + int first = (pr->pos == NULL); + if (first) { // First match printed: + pr->pos = pr->file->contents; + pr->needs_line_number = 1; + } + if (m) { + const char *before_m = context_before(pr, m->start); + if (!first) { + const char *after_last = context_after(pr, pr->pos); + if (after_last >= before_m) { + // Overlapping ranges: + before_m = pr->pos; + } else { + // Non-overlapping ranges: + print_between(out, pr, pr->pos, after_last, pr->use_color ? color_normal : NULL); + if (pr->context_lines > 1) + printf("\n"); // Gap between chunks + } + } + print_between(out, pr, before_m, m->start, pr->use_color ? color_normal : NULL); + _print_match(out, pr, m); + if (pr->use_color) printf("%s", color_normal); + } else { + // After the last match is printed, print the trailing context: + const char *after_last = context_after(pr, pr->pos); + print_between(out, pr, pr->pos, after_last, pr->use_color ? color_normal : NULL); + } } // // Print any errors that are present in the given match object. // -int print_errors(file_t *f, match_t *m, print_options_t options) +int print_errors(printer_t *pr, match_t *m) { int ret = 0; if (m->op->type == VM_CAPTURE && m->op->args.capture.name && streq(m->op->args.capture.name, "!")) { printf("\033[31;1m"); - print_match(stdout, f, m, options); + print_match(stdout, pr, m); printf("\033[0m\n"); - fprint_line(stdout, f, m->start, m->end, " "); + fprint_line(stdout, pr->file, m->start, m->end, " "); return 1; } - if (m->child) ret += print_errors(f, m->child, options); - if (m->nextsibling) ret += print_errors(f, m->nextsibling, options); + if (m->child) ret += print_errors(pr, m->child); + if (m->nextsibling) ret += print_errors(pr, m->nextsibling); return ret; } @@ -6,17 +6,21 @@ #include "types.h" -typedef enum { - PRINT_COLOR = 1<<0, - PRINT_LINE_NUMBERS = 1<<1, -} print_options_t; +typedef struct { + file_t *file; + const char *pos; + int context_lines; + unsigned int needs_line_number:1; + unsigned int use_color:1; + unsigned int print_line_numbers:1; +} printer_t; __attribute__((nonnull)) void visualize_match(match_t *m); +__attribute__((nonnull(1,2))) +void print_match(FILE *out, printer_t *pr, match_t *m); __attribute__((nonnull)) -void print_match(FILE *out, file_t *f, match_t *m, print_options_t options); -__attribute__((nonnull)) -int print_errors(file_t *f, match_t *m, print_options_t options); +int print_errors(printer_t *pr, match_t *m); #endif // vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1 @@ -30,7 +30,6 @@ enum VMOpcode { VM_BEFORE, VM_AFTER, VM_CAPTURE, - VM_HIDE, VM_OTHERWISE, VM_CHAIN, VM_EQUAL, @@ -122,6 +122,25 @@ static const char *match_backref(const char *str, vm_op_t *op, match_t *cap, uns // +// Find the next match after prev (or the first match if prev is NULL) +// +match_t *next_match(def_t *defs, file_t *f, match_t *prev, vm_op_t *op, unsigned int flags) +{ + const char *str; + if (prev) { + str = prev->end > prev->start ? prev->end : prev->end + 1; + recycle_if_unused(&prev); + } else { + str = f->contents; + } + for (; str < f->end; ++str) { + match_t *m = match(defs, f, str, op, flags); + if (m) return m; + } + return NULL; +} + +// // Run virtual machine operation against a string and return // a match struct, or NULL if no match is found. // The returned value should be free()'d to avoid memory leaking. @@ -316,16 +335,6 @@ match_t *match(def_t *defs, file_t *f, const char *str, vm_op_t *op, unsigned in ADD_OWNER(m->child, p); return m; } - case VM_HIDE: { - match_t *p = match(defs, f, str, op->args.pat, flags); - if (p == NULL) return NULL; - match_t *m = new_match(); - m->start = str; - m->end = p->end; - m->op = op; - ADD_OWNER(m->child, p); - return m; - } case VM_OTHERWISE: { match_t *m = match(defs, f, str, op->args.multiple.first, flags); if (m == NULL) m = match(defs, f, str, op->args.multiple.second, flags); @@ -8,6 +8,8 @@ #include "types.h" +__attribute__((nonnull(2,4))) +match_t *next_match(def_t *defs, file_t *f, match_t *prev, vm_op_t *op, unsigned int flags); __attribute__((hot, nonnull(2,3,4))) match_t *match(def_t *defs, file_t *f, const char *str, vm_op_t *op, unsigned int flags); __attribute__((nonnull)) |
