aboutsummaryrefslogtreecommitdiff
path: root/bp.c
diff options
context:
space:
mode:
authorBruce Hill <bruce@bruce-hill.com>2021-01-15 01:19:10 -0800
committerBruce Hill <bruce@bruce-hill.com>2021-01-15 01:19:10 -0800
commit8ff80b09ccd7e680829d0911d965ad4b0d6f7939 (patch)
tree399da1a15ed749fa10cec8bf62dcde5e93ef3f3f /bp.c
parent9b70cb4f624aa19c09ea73b3d9e0f50c032602c5 (diff)
Major overhaul of how different modes of behavior work. Approximately 2x
speedup and 2x memory footprint reduction. Also removed --mode and VM_HIDE (~ operator), and added --context. Printing works better now.
Diffstat (limited to 'bp.c')
-rw-r--r--bp.c252
1 files changed, 187 insertions, 65 deletions
diff --git a/bp.c b/bp.c
index 3257d0f..941a2b0 100644
--- a/bp.c
+++ b/bp.c
@@ -36,15 +36,16 @@ static const char *usage = (
" -p --pattern <pat> provide a pattern (equivalent to bp '\\(<pat>)')\n"
" -P --pattern-string <pat> provide a string pattern (may be useful if '<pat>' begins with a '-')\n"
" -r --replace <replacement> replace the input pattern with the given replacement\n"
- " -m --mode <mode> set the behavior mode (defult: find-all)\n"
+ " -c --context <context> set number of lines of context to print (all: the whole file, 0: only the match, 1: the line, N: N lines of context)\n"
" -g --grammar <grammar file> use the specified file as a grammar\n");
-static print_options_t print_options = 0;
+#define USE_DEFAULT_CONTEXT -2
+#define ALL_CONTEXT -1
+static int print_color = 0;
+static int print_line_numbers = 0;
__attribute__((nonnull))
static char *getflag(const char *flag, char *argv[], int *i);
-__attribute__((nonnull(3)))
-static int process_file(def_t *defs, const char *filename, vm_op_t *pattern, unsigned int flags);
//
// Return a pointer to the value part of a flag, if present, otherwise NULL.
@@ -67,65 +68,172 @@ static char *getflag(const char *flag, char *argv[], int *i)
}
//
+// Scan the first few dozen bytes of a file and return 1 if the contents all
+// look like printable text characters, otherwise return 0.
+//
+static int is_text_file(const char *filename)
+{
+ int fd = open(filename, O_RDONLY);
+ if (fd < 0) return 0;
+ unsigned char buf[64];
+ int len = read(fd, buf, sizeof(buf)/sizeof(unsigned char));
+ if (len < 0) return 0;
+ (void)close(fd);
+
+ for (int i = 0; i < len; i++) {
+ if (!(buf[i] == '\t' || buf[i] == '\n' || buf[i] == '\r'
+ || buf[i] >= '\x20'))
+ return 0;
+ }
+ return 1;
+}
+
+//
+// Print matches in JSON format.
+//
+static int print_matches_as_json(def_t *defs, file_t *f, vm_op_t *pattern, unsigned int flags)
+{
+ int matches = 0;
+ for (match_t *m = NULL; (m = next_match(defs, f, m, pattern, flags)); ) {
+ if (++matches > 1)
+ printf(",\n");
+ printf("{\"filename\":\"%s\",", f->filename ? f->filename : "-");
+ printf("\"tree\":{\"rule\":\"text\",\"start\":%d,\"end\":%ld,\"children\":[",
+ 0, f->end - f->contents);
+ json_match(f->contents, m, (flags & BP_VERBOSE) ? 1 : 0);
+ printf("]}}\n");
+ }
+ return matches;
+}
+
+//
+// Print matches in a visual explanation style
+//
+static int explain_matches(def_t *defs, file_t *f, vm_op_t *pattern, unsigned int flags)
+{
+ int matches = 0;
+ for (match_t *m = NULL; (m = next_match(defs, f, m, pattern, flags)); ) {
+ if (++matches == 1) {
+ if (print_color)
+ printf("\033[0;1;4;33m%s\033[0m\n", f->filename);
+ else
+ printf("%s:\n", f->filename);
+ } else {
+ printf("\n\n");
+ }
+ visualize_match(m);
+ }
+ return matches;
+}
+
+//
+// Replace a file's contents with the text version of a match.
+// (Useful for replacements)
+//
+static int inplace_modify_file(def_t *defs, file_t *f, vm_op_t *pattern, int context, unsigned int flags)
+{
+ // Need to do this before matching:
+ intern_file(f);
+
+ printer_t pr = {
+ .file = f,
+ .context_lines = context,
+ .use_color = 0,
+ .print_line_numbers = 0,
+ };
+
+ FILE *inplace_file = NULL; // Lazy-open this on the first match
+ int matches = 0;
+ for (match_t *m = NULL; (m = next_match(defs, f, m, pattern, flags)); ) {
+ ++matches;
+ if (print_errors(&pr, m) > 0)
+ exit(1);
+ // Lazy-open file for writing upon first match:
+ if (inplace_file == NULL) {
+ inplace_file = fopen(f->filename, "w");
+ check(inplace_file, "Could not open file for writing: %s\n", f->filename);
+ }
+ print_match(inplace_file, &pr, m);
+ }
+
+ if (inplace_file) {
+ printf("%s\n", f->filename);
+ fclose(inplace_file);
+ }
+ return matches;
+}
+
+//
+// Print all the matches in a file.
+//
+static int print_matches(def_t *defs, file_t *f, vm_op_t *pattern, int context, unsigned int flags)
+{
+ static int printed_filenames = 0;
+ int matches = 0;
+ printer_t pr = {
+ .file = f,
+ .context_lines = context,
+ .use_color = print_color,
+ .print_line_numbers = print_line_numbers,
+ };
+
+ for (match_t *m = NULL; (m = next_match(defs, f, m, pattern, flags)); ) {
+ if (print_errors(&pr, m) > 0)
+ exit(1);
+
+ if (++matches == 1) {
+ if (printed_filenames++ > 0) printf("\n");
+ if (print_color)
+ printf("\033[0;1;4;33m%s\033[0m\n", f->filename);
+ else
+ printf("%s:\n", f->filename);
+ }
+ print_match(stdout, &pr, m);
+ }
+
+ if (matches > 0) {
+ // Print trailing context lines:
+ print_match(stdout, &pr, NULL);
+ // Ensure a trailing newline:
+ if (pr.pos > f->contents && pr.pos[-1] != '\n') printf("\n");
+ }
+
+ return matches;
+}
+
+//
// For a given filename, open the file and attempt to match the given pattern
// against it, printing any results according to the flags.
//
-static int process_file(def_t *defs, const char *filename, vm_op_t *pattern, unsigned int flags)
+static int process_file(def_t *defs, const char *filename, vm_op_t *pattern, int context, unsigned int flags)
{
- static int printed_matches = 0;
- int success = 0;
file_t *f = load_file(NULL, filename);
check(f, "Could not open file: %s", filename);
- if (flags & BP_INPLACE) // Need to do this before matching
- intern_file(f);
- match_t *m = match(defs, f, f->contents, pattern, flags);
- if (m && print_errors(f, m, print_options) > 0)
- exit(1);
-
- if (m != NULL && m->end > m->start + 1) {
- success = 1;
- ++printed_matches;
-
- if (flags & BP_EXPLAIN) {
- if (filename)
- printf("\033[1;4m%s\033[0m\n", filename);
- visualize_match(m);
- } else if (flags & BP_LISTFILES) {
- printf("%s\n", filename);
- } else if (flags & BP_JSON) {
- if (printed_matches > 1)
- printf(",\n");
- printf("{\"filename\":\"%s\",", filename ? filename : "-");
- printf("\"tree\":{\"rule\":\"text\",\"start\":%d,\"end\":%ld,\"children\":[",
- 0, f->end - f->contents);
- json_match(f->contents, m, (flags & BP_VERBOSE) ? 1 : 0);
- printf("]}}\n");
- } else if (flags & BP_INPLACE && filename) {
- FILE *out = fopen(filename, "w");
- print_match(out, f, m, 0);
- fclose(out);
- printf("%s\n", filename);
- } else {
- if (printed_matches > 1)
- fputc('\n', stdout);
- if (filename) {
- if (print_options & PRINT_COLOR)
- printf("\033[1;4;33m%s\033[0m\n", filename);
- else
- printf("%s:\n", filename);
- }
- print_match(stdout, f, m,
- filename ? print_options : print_options & (print_options_t)~PRINT_LINE_NUMBERS);
+
+ int matches = 0;
+ if (flags & BP_EXPLAIN) {
+ matches += explain_matches(defs, f, pattern, flags);
+ } else if (flags & BP_LISTFILES) {
+ match_t *m = next_match(defs, f, NULL, pattern, flags);
+ if (m) {
+ recycle_if_unused(&m);
+ printf("%s\n", f->filename);
+ matches += 1;
}
+ } else if (flags & BP_JSON) {
+ matches += print_matches_as_json(defs, f, pattern, flags);
+ } else if (flags & BP_INPLACE) {
+ matches += inplace_modify_file(defs, f, pattern, context, flags);
+ } else {
+ matches += print_matches(defs, f, pattern, context, flags);
}
- recycle_if_unused(&m);
#ifdef DEBUG_HEAP
check(recycle_all_matches() == 0, "Memory leak: there should no longer be any matches in use at this point.");
#endif
destroy_file(&f);
-
- return success;
+ fflush(stdout);
+ return matches;
}
#define FLAG(f) (flag=getflag((f), argv, &i))
@@ -133,9 +241,9 @@ static int process_file(def_t *defs, const char *filename, vm_op_t *pattern, uns
int main(int argc, char *argv[])
{
unsigned int flags = 0;
+ int context = USE_DEFAULT_CONTEXT;
char *flag = NULL;
char path[PATH_MAX] = {0};
- const char *mode = "find-all";
def_t *defs = NULL;
@@ -145,6 +253,10 @@ int main(int argc, char *argv[])
file_t *pat_file = spoof_file(&loaded_files, "<pattern>", "pattern");
vm_op_t *pattern = bp_pattern(pat_file, pat_file->contents);
+ // Define an opcode that is just a reference to the rule `replacement`
+ file_t *rep_file = spoof_file(&loaded_files, "<replacement>", "replacement");
+ vm_op_t *replacement = bp_pattern(rep_file, rep_file->contents);
+
// Load builtins:
if (access("/etc/xdg/bp/builtins.bp", R_OK) != -1) {
file_t *f = load_file(&loaded_files, "/etc/xdg/bp/builtins.bp");
@@ -174,6 +286,7 @@ int main(int argc, char *argv[])
flags |= BP_JSON;
} else if (streq(argv[i], "--inplace")) {
flags |= BP_INPLACE;
+ context = ALL_CONTEXT;
} else if (streq(argv[i], "--ignore-case")) {
flags |= BP_IGNORECASE;
} else if (streq(argv[i], "--list-files")) {
@@ -185,7 +298,8 @@ int main(int argc, char *argv[])
vm_op_t *rep = bp_replacement(replace_file, pattern, replace_file->contents);
check(rep, "Replacement failed to compile: %s", flag);
defs = with_def(defs, replace_file, strlen("replacement"), "replacement", rep);
- mode = "replace-all";
+ pattern = replacement;
+ if (context == USE_DEFAULT_CONTEXT) context = 1;
} else if (FLAG("--grammar") || FLAG("-g")) {
file_t *f = load_file(&loaded_files, flag);
if (f == NULL) {
@@ -231,8 +345,11 @@ int main(int argc, char *argv[])
check(p, "Pattern failed to compile: %s", flag);
defs = with_def(defs, arg_file, strlen("pattern"), "pattern", p);
++npatterns;
- } else if (FLAG("--mode") || FLAG("-m")) {
- mode = flag;
+ } else if (FLAG("--context") || FLAG("-c")) {
+ if (streq(flag, "all"))
+ context = ALL_CONTEXT;
+ else
+ context = (int)strtol(flag, NULL, 10);
} else if (argv[i][0] == '-' && argv[i][1] && argv[i][1] != '-') { // single-char flags
for (char *c = &argv[i][1]; *c; ++c) {
switch (*c) {
@@ -240,7 +357,7 @@ int main(int argc, char *argv[])
case 'v': flags |= BP_VERBOSE; break; // -v
case 'e': flags |= BP_EXPLAIN; break; // -e
case 'j': flags |= BP_JSON; break; // -j
- case 'I': flags |= BP_INPLACE; break; // -I
+ case 'I': flags |= BP_INPLACE; context = ALL_CONTEXT; break; // -I
case 'i': flags |= BP_IGNORECASE; break; // -i
case 'l': flags |= BP_LISTFILES; break; // -l
default:
@@ -268,24 +385,28 @@ int main(int argc, char *argv[])
return 1;
}
- if (isatty(STDOUT_FILENO)) {
- print_options |= PRINT_COLOR | PRINT_LINE_NUMBERS;
+ if (context < 0) {
+ if (context == USE_DEFAULT_CONTEXT) context = 1;
+ else if (context != ALL_CONTEXT) context = 0;
}
- // Define an opcode that is just a reference to the overarching mode (e.g. find-all)
- if (lookup(defs, mode) == NULL) {
- printf("The mode '%s' is not defined.\n", mode);
+ if (flags & BP_INPLACE && context != ALL_CONTEXT) {
+ printf("--inplace and --context are mutually exclusive.\n"
+ "Please drop one of the two arguments and try again.\n");
return 1;
}
- file_t *mode_file = spoof_file(&loaded_files, "<mode>", mode);
- vm_op_t *mode_op = bp_pattern(mode_file, mode_file->contents);
+
+ if (isatty(STDOUT_FILENO)) {
+ print_color = 1;
+ print_line_numbers = 1;
+ }
int found = 0;
if (flags & BP_JSON) printf("[");
if (i < argc) {
// Files pass in as command line args:
for (int nfiles = 0; i < argc; nfiles++, i++) {
- found += process_file(defs, argv[i], mode_op, flags);
+ found += process_file(defs, argv[i], pattern, context, flags);
}
} else if (isatty(STDIN_FILENO)) {
// No files, no piped in input, so use * **/*:
@@ -293,12 +414,13 @@ int main(int argc, char *argv[])
glob("*", 0, NULL, &globbuf);
glob("**/*", GLOB_APPEND, NULL, &globbuf);
for (size_t i = 0; i < globbuf.gl_pathc; i++) {
- found += process_file(defs, globbuf.gl_pathv[i], mode_op, flags);
+ if (is_text_file(globbuf.gl_pathv[i]))
+ found += process_file(defs, globbuf.gl_pathv[i], pattern, context, flags);
}
globfree(&globbuf);
} else {
// Piped in input:
- found += process_file(defs, NULL, mode_op, flags);
+ found += process_file(defs, NULL, pattern, context, flags);
}
if (flags & BP_JSON) printf("]\n");