diff options
| author | Bruce Hill <bruce@bruce-hill.com> | 2021-01-15 01:19:10 -0800 |
|---|---|---|
| committer | Bruce Hill <bruce@bruce-hill.com> | 2021-01-15 01:19:10 -0800 |
| commit | 8ff80b09ccd7e680829d0911d965ad4b0d6f7939 (patch) | |
| tree | 399da1a15ed749fa10cec8bf62dcde5e93ef3f3f /bp.c | |
| parent | 9b70cb4f624aa19c09ea73b3d9e0f50c032602c5 (diff) | |
Major overhaul of how different modes of behavior work. Approximately 2x
speedup and 2x memory footprint reduction. Also removed --mode and
VM_HIDE (~ operator), and added --context. Printing works better now.
Diffstat (limited to 'bp.c')
| -rw-r--r-- | bp.c | 252 |
1 files changed, 187 insertions, 65 deletions
@@ -36,15 +36,16 @@ static const char *usage = ( " -p --pattern <pat> provide a pattern (equivalent to bp '\\(<pat>)')\n" " -P --pattern-string <pat> provide a string pattern (may be useful if '<pat>' begins with a '-')\n" " -r --replace <replacement> replace the input pattern with the given replacement\n" - " -m --mode <mode> set the behavior mode (defult: find-all)\n" + " -c --context <context> set number of lines of context to print (all: the whole file, 0: only the match, 1: the line, N: N lines of context)\n" " -g --grammar <grammar file> use the specified file as a grammar\n"); -static print_options_t print_options = 0; +#define USE_DEFAULT_CONTEXT -2 +#define ALL_CONTEXT -1 +static int print_color = 0; +static int print_line_numbers = 0; __attribute__((nonnull)) static char *getflag(const char *flag, char *argv[], int *i); -__attribute__((nonnull(3))) -static int process_file(def_t *defs, const char *filename, vm_op_t *pattern, unsigned int flags); // // Return a pointer to the value part of a flag, if present, otherwise NULL. @@ -67,65 +68,172 @@ static char *getflag(const char *flag, char *argv[], int *i) } // +// Scan the first few dozen bytes of a file and return 1 if the contents all +// look like printable text characters, otherwise return 0. +// +static int is_text_file(const char *filename) +{ + int fd = open(filename, O_RDONLY); + if (fd < 0) return 0; + unsigned char buf[64]; + int len = read(fd, buf, sizeof(buf)/sizeof(unsigned char)); + if (len < 0) return 0; + (void)close(fd); + + for (int i = 0; i < len; i++) { + if (!(buf[i] == '\t' || buf[i] == '\n' || buf[i] == '\r' + || buf[i] >= '\x20')) + return 0; + } + return 1; +} + +// +// Print matches in JSON format. +// +static int print_matches_as_json(def_t *defs, file_t *f, vm_op_t *pattern, unsigned int flags) +{ + int matches = 0; + for (match_t *m = NULL; (m = next_match(defs, f, m, pattern, flags)); ) { + if (++matches > 1) + printf(",\n"); + printf("{\"filename\":\"%s\",", f->filename ? f->filename : "-"); + printf("\"tree\":{\"rule\":\"text\",\"start\":%d,\"end\":%ld,\"children\":[", + 0, f->end - f->contents); + json_match(f->contents, m, (flags & BP_VERBOSE) ? 1 : 0); + printf("]}}\n"); + } + return matches; +} + +// +// Print matches in a visual explanation style +// +static int explain_matches(def_t *defs, file_t *f, vm_op_t *pattern, unsigned int flags) +{ + int matches = 0; + for (match_t *m = NULL; (m = next_match(defs, f, m, pattern, flags)); ) { + if (++matches == 1) { + if (print_color) + printf("\033[0;1;4;33m%s\033[0m\n", f->filename); + else + printf("%s:\n", f->filename); + } else { + printf("\n\n"); + } + visualize_match(m); + } + return matches; +} + +// +// Replace a file's contents with the text version of a match. +// (Useful for replacements) +// +static int inplace_modify_file(def_t *defs, file_t *f, vm_op_t *pattern, int context, unsigned int flags) +{ + // Need to do this before matching: + intern_file(f); + + printer_t pr = { + .file = f, + .context_lines = context, + .use_color = 0, + .print_line_numbers = 0, + }; + + FILE *inplace_file = NULL; // Lazy-open this on the first match + int matches = 0; + for (match_t *m = NULL; (m = next_match(defs, f, m, pattern, flags)); ) { + ++matches; + if (print_errors(&pr, m) > 0) + exit(1); + // Lazy-open file for writing upon first match: + if (inplace_file == NULL) { + inplace_file = fopen(f->filename, "w"); + check(inplace_file, "Could not open file for writing: %s\n", f->filename); + } + print_match(inplace_file, &pr, m); + } + + if (inplace_file) { + printf("%s\n", f->filename); + fclose(inplace_file); + } + return matches; +} + +// +// Print all the matches in a file. +// +static int print_matches(def_t *defs, file_t *f, vm_op_t *pattern, int context, unsigned int flags) +{ + static int printed_filenames = 0; + int matches = 0; + printer_t pr = { + .file = f, + .context_lines = context, + .use_color = print_color, + .print_line_numbers = print_line_numbers, + }; + + for (match_t *m = NULL; (m = next_match(defs, f, m, pattern, flags)); ) { + if (print_errors(&pr, m) > 0) + exit(1); + + if (++matches == 1) { + if (printed_filenames++ > 0) printf("\n"); + if (print_color) + printf("\033[0;1;4;33m%s\033[0m\n", f->filename); + else + printf("%s:\n", f->filename); + } + print_match(stdout, &pr, m); + } + + if (matches > 0) { + // Print trailing context lines: + print_match(stdout, &pr, NULL); + // Ensure a trailing newline: + if (pr.pos > f->contents && pr.pos[-1] != '\n') printf("\n"); + } + + return matches; +} + +// // For a given filename, open the file and attempt to match the given pattern // against it, printing any results according to the flags. // -static int process_file(def_t *defs, const char *filename, vm_op_t *pattern, unsigned int flags) +static int process_file(def_t *defs, const char *filename, vm_op_t *pattern, int context, unsigned int flags) { - static int printed_matches = 0; - int success = 0; file_t *f = load_file(NULL, filename); check(f, "Could not open file: %s", filename); - if (flags & BP_INPLACE) // Need to do this before matching - intern_file(f); - match_t *m = match(defs, f, f->contents, pattern, flags); - if (m && print_errors(f, m, print_options) > 0) - exit(1); - - if (m != NULL && m->end > m->start + 1) { - success = 1; - ++printed_matches; - - if (flags & BP_EXPLAIN) { - if (filename) - printf("\033[1;4m%s\033[0m\n", filename); - visualize_match(m); - } else if (flags & BP_LISTFILES) { - printf("%s\n", filename); - } else if (flags & BP_JSON) { - if (printed_matches > 1) - printf(",\n"); - printf("{\"filename\":\"%s\",", filename ? filename : "-"); - printf("\"tree\":{\"rule\":\"text\",\"start\":%d,\"end\":%ld,\"children\":[", - 0, f->end - f->contents); - json_match(f->contents, m, (flags & BP_VERBOSE) ? 1 : 0); - printf("]}}\n"); - } else if (flags & BP_INPLACE && filename) { - FILE *out = fopen(filename, "w"); - print_match(out, f, m, 0); - fclose(out); - printf("%s\n", filename); - } else { - if (printed_matches > 1) - fputc('\n', stdout); - if (filename) { - if (print_options & PRINT_COLOR) - printf("\033[1;4;33m%s\033[0m\n", filename); - else - printf("%s:\n", filename); - } - print_match(stdout, f, m, - filename ? print_options : print_options & (print_options_t)~PRINT_LINE_NUMBERS); + + int matches = 0; + if (flags & BP_EXPLAIN) { + matches += explain_matches(defs, f, pattern, flags); + } else if (flags & BP_LISTFILES) { + match_t *m = next_match(defs, f, NULL, pattern, flags); + if (m) { + recycle_if_unused(&m); + printf("%s\n", f->filename); + matches += 1; } + } else if (flags & BP_JSON) { + matches += print_matches_as_json(defs, f, pattern, flags); + } else if (flags & BP_INPLACE) { + matches += inplace_modify_file(defs, f, pattern, context, flags); + } else { + matches += print_matches(defs, f, pattern, context, flags); } - recycle_if_unused(&m); #ifdef DEBUG_HEAP check(recycle_all_matches() == 0, "Memory leak: there should no longer be any matches in use at this point."); #endif destroy_file(&f); - - return success; + fflush(stdout); + return matches; } #define FLAG(f) (flag=getflag((f), argv, &i)) @@ -133,9 +241,9 @@ static int process_file(def_t *defs, const char *filename, vm_op_t *pattern, uns int main(int argc, char *argv[]) { unsigned int flags = 0; + int context = USE_DEFAULT_CONTEXT; char *flag = NULL; char path[PATH_MAX] = {0}; - const char *mode = "find-all"; def_t *defs = NULL; @@ -145,6 +253,10 @@ int main(int argc, char *argv[]) file_t *pat_file = spoof_file(&loaded_files, "<pattern>", "pattern"); vm_op_t *pattern = bp_pattern(pat_file, pat_file->contents); + // Define an opcode that is just a reference to the rule `replacement` + file_t *rep_file = spoof_file(&loaded_files, "<replacement>", "replacement"); + vm_op_t *replacement = bp_pattern(rep_file, rep_file->contents); + // Load builtins: if (access("/etc/xdg/bp/builtins.bp", R_OK) != -1) { file_t *f = load_file(&loaded_files, "/etc/xdg/bp/builtins.bp"); @@ -174,6 +286,7 @@ int main(int argc, char *argv[]) flags |= BP_JSON; } else if (streq(argv[i], "--inplace")) { flags |= BP_INPLACE; + context = ALL_CONTEXT; } else if (streq(argv[i], "--ignore-case")) { flags |= BP_IGNORECASE; } else if (streq(argv[i], "--list-files")) { @@ -185,7 +298,8 @@ int main(int argc, char *argv[]) vm_op_t *rep = bp_replacement(replace_file, pattern, replace_file->contents); check(rep, "Replacement failed to compile: %s", flag); defs = with_def(defs, replace_file, strlen("replacement"), "replacement", rep); - mode = "replace-all"; + pattern = replacement; + if (context == USE_DEFAULT_CONTEXT) context = 1; } else if (FLAG("--grammar") || FLAG("-g")) { file_t *f = load_file(&loaded_files, flag); if (f == NULL) { @@ -231,8 +345,11 @@ int main(int argc, char *argv[]) check(p, "Pattern failed to compile: %s", flag); defs = with_def(defs, arg_file, strlen("pattern"), "pattern", p); ++npatterns; - } else if (FLAG("--mode") || FLAG("-m")) { - mode = flag; + } else if (FLAG("--context") || FLAG("-c")) { + if (streq(flag, "all")) + context = ALL_CONTEXT; + else + context = (int)strtol(flag, NULL, 10); } else if (argv[i][0] == '-' && argv[i][1] && argv[i][1] != '-') { // single-char flags for (char *c = &argv[i][1]; *c; ++c) { switch (*c) { @@ -240,7 +357,7 @@ int main(int argc, char *argv[]) case 'v': flags |= BP_VERBOSE; break; // -v case 'e': flags |= BP_EXPLAIN; break; // -e case 'j': flags |= BP_JSON; break; // -j - case 'I': flags |= BP_INPLACE; break; // -I + case 'I': flags |= BP_INPLACE; context = ALL_CONTEXT; break; // -I case 'i': flags |= BP_IGNORECASE; break; // -i case 'l': flags |= BP_LISTFILES; break; // -l default: @@ -268,24 +385,28 @@ int main(int argc, char *argv[]) return 1; } - if (isatty(STDOUT_FILENO)) { - print_options |= PRINT_COLOR | PRINT_LINE_NUMBERS; + if (context < 0) { + if (context == USE_DEFAULT_CONTEXT) context = 1; + else if (context != ALL_CONTEXT) context = 0; } - // Define an opcode that is just a reference to the overarching mode (e.g. find-all) - if (lookup(defs, mode) == NULL) { - printf("The mode '%s' is not defined.\n", mode); + if (flags & BP_INPLACE && context != ALL_CONTEXT) { + printf("--inplace and --context are mutually exclusive.\n" + "Please drop one of the two arguments and try again.\n"); return 1; } - file_t *mode_file = spoof_file(&loaded_files, "<mode>", mode); - vm_op_t *mode_op = bp_pattern(mode_file, mode_file->contents); + + if (isatty(STDOUT_FILENO)) { + print_color = 1; + print_line_numbers = 1; + } int found = 0; if (flags & BP_JSON) printf("["); if (i < argc) { // Files pass in as command line args: for (int nfiles = 0; i < argc; nfiles++, i++) { - found += process_file(defs, argv[i], mode_op, flags); + found += process_file(defs, argv[i], pattern, context, flags); } } else if (isatty(STDIN_FILENO)) { // No files, no piped in input, so use * **/*: @@ -293,12 +414,13 @@ int main(int argc, char *argv[]) glob("*", 0, NULL, &globbuf); glob("**/*", GLOB_APPEND, NULL, &globbuf); for (size_t i = 0; i < globbuf.gl_pathc; i++) { - found += process_file(defs, globbuf.gl_pathv[i], mode_op, flags); + if (is_text_file(globbuf.gl_pathv[i])) + found += process_file(defs, globbuf.gl_pathv[i], pattern, context, flags); } globfree(&globbuf); } else { // Piped in input: - found += process_file(defs, NULL, mode_op, flags); + found += process_file(defs, NULL, pattern, context, flags); } if (flags & BP_JSON) printf("]\n"); |
