Major overhaul of how different modes of behavior work. Approximately 2x
speedup and 2x memory footprint reduction. Also removed --mode and VM_HIDE (~ operator), and added --context. Printing works better now.
This commit is contained in:
parent
9b70cb4f62
commit
8ff80b09cc
@ -19,7 +19,7 @@ It's written in pure C with no dependencies.
|
||||
* `-p` `--pattern <pat>` provide a pattern (equivalent to `bp '\(<pat>)'`)
|
||||
* `-P` `--pattern-string <pat>` provide a string pattern (equivalent to `bp '<pat>'`, but may be useful if `'<pat>'` begins with a '-')
|
||||
* `-r` `--replace <replacement>` replace the input pattern with the given replacement
|
||||
* `-m` `--mode <mode>` set the behavior mode (defult: `find-all`)
|
||||
* `-c` `--context <N>` change how many lines of context are printed (`0`: no context, `all`: the whole file, `<N>` matching lines and `<N-1>` lines before/after)
|
||||
* `-g` `--grammar <grammar file>` use the specified file as a grammar
|
||||
|
||||
See `man ./bp.1` for more details.
|
||||
|
12
bp.1
12
bp.1
@ -18,7 +18,7 @@ bp \- Bruce's Parsing Expression Grammar tool
|
||||
[\fI-D\fR|\fI--define-string\fR \fI<name>\fR:\fI<string-pattern>\fR]
|
||||
[\fI-r\fR|\fI--replace\fR \fI<replacement>\fR]
|
||||
[\fI-g\fR|\fI--grammar\fR \fI<grammar file>\fR]
|
||||
[\fI-m\fR|\fI--mode\fR \fI<mode>\fR]
|
||||
[\fI-c\fR|\fI--conntext\fR \fI<N>\fR]
|
||||
\fI<pattern\fR
|
||||
[[--] \fI<input files...>\fR]
|
||||
.SH DESCRIPTION
|
||||
@ -54,10 +54,12 @@ Replace all occurrences of the main pattern with the given string.
|
||||
.B \-g\fR, \fB--grammar \fI<grammar file>\fR
|
||||
Load the grammar from the given file.
|
||||
|
||||
.B \-m\fR, \fB--mode \fI<mode>\fR
|
||||
The mode to operate in. Options are: \fIfind-all\fR (the default),
|
||||
\fIonly-matches\fR, \fIpattern\fR, \fIreplacement\fR, \fIreplace-all\fR
|
||||
(implied by \fB--replace\fR), or any other grammar rule name.
|
||||
.B \-c\fR, \fB--context \fI<N>\fR
|
||||
The number of lines of context to print. If \fI<N>\fR is 0, print only the
|
||||
exact text of the matches. If \fI<N>\fR is "all", print the entire file.
|
||||
Otherwise, if \fI<N>\fR is a positive integer, print the whole line on which
|
||||
matches occur, as well as the \fI<N-1>\fR lines before and after the match. The
|
||||
default value for this argument is 1 (print whole lines where matches occur).
|
||||
|
||||
.B \--help
|
||||
Print the usage and exit.
|
||||
|
250
bp.c
250
bp.c
@ -36,15 +36,16 @@ static const char *usage = (
|
||||
" -p --pattern <pat> provide a pattern (equivalent to bp '\\(<pat>)')\n"
|
||||
" -P --pattern-string <pat> provide a string pattern (may be useful if '<pat>' begins with a '-')\n"
|
||||
" -r --replace <replacement> replace the input pattern with the given replacement\n"
|
||||
" -m --mode <mode> set the behavior mode (defult: find-all)\n"
|
||||
" -c --context <context> set number of lines of context to print (all: the whole file, 0: only the match, 1: the line, N: N lines of context)\n"
|
||||
" -g --grammar <grammar file> use the specified file as a grammar\n");
|
||||
|
||||
static print_options_t print_options = 0;
|
||||
#define USE_DEFAULT_CONTEXT -2
|
||||
#define ALL_CONTEXT -1
|
||||
static int print_color = 0;
|
||||
static int print_line_numbers = 0;
|
||||
|
||||
__attribute__((nonnull))
|
||||
static char *getflag(const char *flag, char *argv[], int *i);
|
||||
__attribute__((nonnull(3)))
|
||||
static int process_file(def_t *defs, const char *filename, vm_op_t *pattern, unsigned int flags);
|
||||
|
||||
//
|
||||
// Return a pointer to the value part of a flag, if present, otherwise NULL.
|
||||
@ -66,66 +67,173 @@ static char *getflag(const char *flag, char *argv[], int *i)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
//
|
||||
// Scan the first few dozen bytes of a file and return 1 if the contents all
|
||||
// look like printable text characters, otherwise return 0.
|
||||
//
|
||||
static int is_text_file(const char *filename)
|
||||
{
|
||||
int fd = open(filename, O_RDONLY);
|
||||
if (fd < 0) return 0;
|
||||
unsigned char buf[64];
|
||||
int len = read(fd, buf, sizeof(buf)/sizeof(unsigned char));
|
||||
if (len < 0) return 0;
|
||||
(void)close(fd);
|
||||
|
||||
for (int i = 0; i < len; i++) {
|
||||
if (!(buf[i] == '\t' || buf[i] == '\n' || buf[i] == '\r'
|
||||
|| buf[i] >= '\x20'))
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
//
|
||||
// Print matches in JSON format.
|
||||
//
|
||||
static int print_matches_as_json(def_t *defs, file_t *f, vm_op_t *pattern, unsigned int flags)
|
||||
{
|
||||
int matches = 0;
|
||||
for (match_t *m = NULL; (m = next_match(defs, f, m, pattern, flags)); ) {
|
||||
if (++matches > 1)
|
||||
printf(",\n");
|
||||
printf("{\"filename\":\"%s\",", f->filename ? f->filename : "-");
|
||||
printf("\"tree\":{\"rule\":\"text\",\"start\":%d,\"end\":%ld,\"children\":[",
|
||||
0, f->end - f->contents);
|
||||
json_match(f->contents, m, (flags & BP_VERBOSE) ? 1 : 0);
|
||||
printf("]}}\n");
|
||||
}
|
||||
return matches;
|
||||
}
|
||||
|
||||
//
|
||||
// Print matches in a visual explanation style
|
||||
//
|
||||
static int explain_matches(def_t *defs, file_t *f, vm_op_t *pattern, unsigned int flags)
|
||||
{
|
||||
int matches = 0;
|
||||
for (match_t *m = NULL; (m = next_match(defs, f, m, pattern, flags)); ) {
|
||||
if (++matches == 1) {
|
||||
if (print_color)
|
||||
printf("\033[0;1;4;33m%s\033[0m\n", f->filename);
|
||||
else
|
||||
printf("%s:\n", f->filename);
|
||||
} else {
|
||||
printf("\n\n");
|
||||
}
|
||||
visualize_match(m);
|
||||
}
|
||||
return matches;
|
||||
}
|
||||
|
||||
//
|
||||
// Replace a file's contents with the text version of a match.
|
||||
// (Useful for replacements)
|
||||
//
|
||||
static int inplace_modify_file(def_t *defs, file_t *f, vm_op_t *pattern, int context, unsigned int flags)
|
||||
{
|
||||
// Need to do this before matching:
|
||||
intern_file(f);
|
||||
|
||||
printer_t pr = {
|
||||
.file = f,
|
||||
.context_lines = context,
|
||||
.use_color = 0,
|
||||
.print_line_numbers = 0,
|
||||
};
|
||||
|
||||
FILE *inplace_file = NULL; // Lazy-open this on the first match
|
||||
int matches = 0;
|
||||
for (match_t *m = NULL; (m = next_match(defs, f, m, pattern, flags)); ) {
|
||||
++matches;
|
||||
if (print_errors(&pr, m) > 0)
|
||||
exit(1);
|
||||
// Lazy-open file for writing upon first match:
|
||||
if (inplace_file == NULL) {
|
||||
inplace_file = fopen(f->filename, "w");
|
||||
check(inplace_file, "Could not open file for writing: %s\n", f->filename);
|
||||
}
|
||||
print_match(inplace_file, &pr, m);
|
||||
}
|
||||
|
||||
if (inplace_file) {
|
||||
printf("%s\n", f->filename);
|
||||
fclose(inplace_file);
|
||||
}
|
||||
return matches;
|
||||
}
|
||||
|
||||
//
|
||||
// Print all the matches in a file.
|
||||
//
|
||||
static int print_matches(def_t *defs, file_t *f, vm_op_t *pattern, int context, unsigned int flags)
|
||||
{
|
||||
static int printed_filenames = 0;
|
||||
int matches = 0;
|
||||
printer_t pr = {
|
||||
.file = f,
|
||||
.context_lines = context,
|
||||
.use_color = print_color,
|
||||
.print_line_numbers = print_line_numbers,
|
||||
};
|
||||
|
||||
for (match_t *m = NULL; (m = next_match(defs, f, m, pattern, flags)); ) {
|
||||
if (print_errors(&pr, m) > 0)
|
||||
exit(1);
|
||||
|
||||
if (++matches == 1) {
|
||||
if (printed_filenames++ > 0) printf("\n");
|
||||
if (print_color)
|
||||
printf("\033[0;1;4;33m%s\033[0m\n", f->filename);
|
||||
else
|
||||
printf("%s:\n", f->filename);
|
||||
}
|
||||
print_match(stdout, &pr, m);
|
||||
}
|
||||
|
||||
if (matches > 0) {
|
||||
// Print trailing context lines:
|
||||
print_match(stdout, &pr, NULL);
|
||||
// Ensure a trailing newline:
|
||||
if (pr.pos > f->contents && pr.pos[-1] != '\n') printf("\n");
|
||||
}
|
||||
|
||||
return matches;
|
||||
}
|
||||
|
||||
//
|
||||
// For a given filename, open the file and attempt to match the given pattern
|
||||
// against it, printing any results according to the flags.
|
||||
//
|
||||
static int process_file(def_t *defs, const char *filename, vm_op_t *pattern, unsigned int flags)
|
||||
static int process_file(def_t *defs, const char *filename, vm_op_t *pattern, int context, unsigned int flags)
|
||||
{
|
||||
static int printed_matches = 0;
|
||||
int success = 0;
|
||||
file_t *f = load_file(NULL, filename);
|
||||
check(f, "Could not open file: %s", filename);
|
||||
if (flags & BP_INPLACE) // Need to do this before matching
|
||||
intern_file(f);
|
||||
match_t *m = match(defs, f, f->contents, pattern, flags);
|
||||
if (m && print_errors(f, m, print_options) > 0)
|
||||
exit(1);
|
||||
|
||||
if (m != NULL && m->end > m->start + 1) {
|
||||
success = 1;
|
||||
++printed_matches;
|
||||
|
||||
if (flags & BP_EXPLAIN) {
|
||||
if (filename)
|
||||
printf("\033[1;4m%s\033[0m\n", filename);
|
||||
visualize_match(m);
|
||||
} else if (flags & BP_LISTFILES) {
|
||||
printf("%s\n", filename);
|
||||
} else if (flags & BP_JSON) {
|
||||
if (printed_matches > 1)
|
||||
printf(",\n");
|
||||
printf("{\"filename\":\"%s\",", filename ? filename : "-");
|
||||
printf("\"tree\":{\"rule\":\"text\",\"start\":%d,\"end\":%ld,\"children\":[",
|
||||
0, f->end - f->contents);
|
||||
json_match(f->contents, m, (flags & BP_VERBOSE) ? 1 : 0);
|
||||
printf("]}}\n");
|
||||
} else if (flags & BP_INPLACE && filename) {
|
||||
FILE *out = fopen(filename, "w");
|
||||
print_match(out, f, m, 0);
|
||||
fclose(out);
|
||||
printf("%s\n", filename);
|
||||
} else {
|
||||
if (printed_matches > 1)
|
||||
fputc('\n', stdout);
|
||||
if (filename) {
|
||||
if (print_options & PRINT_COLOR)
|
||||
printf("\033[1;4;33m%s\033[0m\n", filename);
|
||||
else
|
||||
printf("%s:\n", filename);
|
||||
}
|
||||
print_match(stdout, f, m,
|
||||
filename ? print_options : print_options & (print_options_t)~PRINT_LINE_NUMBERS);
|
||||
int matches = 0;
|
||||
if (flags & BP_EXPLAIN) {
|
||||
matches += explain_matches(defs, f, pattern, flags);
|
||||
} else if (flags & BP_LISTFILES) {
|
||||
match_t *m = next_match(defs, f, NULL, pattern, flags);
|
||||
if (m) {
|
||||
recycle_if_unused(&m);
|
||||
printf("%s\n", f->filename);
|
||||
matches += 1;
|
||||
}
|
||||
} else if (flags & BP_JSON) {
|
||||
matches += print_matches_as_json(defs, f, pattern, flags);
|
||||
} else if (flags & BP_INPLACE) {
|
||||
matches += inplace_modify_file(defs, f, pattern, context, flags);
|
||||
} else {
|
||||
matches += print_matches(defs, f, pattern, context, flags);
|
||||
}
|
||||
|
||||
recycle_if_unused(&m);
|
||||
#ifdef DEBUG_HEAP
|
||||
check(recycle_all_matches() == 0, "Memory leak: there should no longer be any matches in use at this point.");
|
||||
#endif
|
||||
destroy_file(&f);
|
||||
|
||||
return success;
|
||||
fflush(stdout);
|
||||
return matches;
|
||||
}
|
||||
|
||||
#define FLAG(f) (flag=getflag((f), argv, &i))
|
||||
@ -133,9 +241,9 @@ static int process_file(def_t *defs, const char *filename, vm_op_t *pattern, uns
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
unsigned int flags = 0;
|
||||
int context = USE_DEFAULT_CONTEXT;
|
||||
char *flag = NULL;
|
||||
char path[PATH_MAX] = {0};
|
||||
const char *mode = "find-all";
|
||||
|
||||
def_t *defs = NULL;
|
||||
|
||||
@ -145,6 +253,10 @@ int main(int argc, char *argv[])
|
||||
file_t *pat_file = spoof_file(&loaded_files, "<pattern>", "pattern");
|
||||
vm_op_t *pattern = bp_pattern(pat_file, pat_file->contents);
|
||||
|
||||
// Define an opcode that is just a reference to the rule `replacement`
|
||||
file_t *rep_file = spoof_file(&loaded_files, "<replacement>", "replacement");
|
||||
vm_op_t *replacement = bp_pattern(rep_file, rep_file->contents);
|
||||
|
||||
// Load builtins:
|
||||
if (access("/etc/xdg/bp/builtins.bp", R_OK) != -1) {
|
||||
file_t *f = load_file(&loaded_files, "/etc/xdg/bp/builtins.bp");
|
||||
@ -174,6 +286,7 @@ int main(int argc, char *argv[])
|
||||
flags |= BP_JSON;
|
||||
} else if (streq(argv[i], "--inplace")) {
|
||||
flags |= BP_INPLACE;
|
||||
context = ALL_CONTEXT;
|
||||
} else if (streq(argv[i], "--ignore-case")) {
|
||||
flags |= BP_IGNORECASE;
|
||||
} else if (streq(argv[i], "--list-files")) {
|
||||
@ -185,7 +298,8 @@ int main(int argc, char *argv[])
|
||||
vm_op_t *rep = bp_replacement(replace_file, pattern, replace_file->contents);
|
||||
check(rep, "Replacement failed to compile: %s", flag);
|
||||
defs = with_def(defs, replace_file, strlen("replacement"), "replacement", rep);
|
||||
mode = "replace-all";
|
||||
pattern = replacement;
|
||||
if (context == USE_DEFAULT_CONTEXT) context = 1;
|
||||
} else if (FLAG("--grammar") || FLAG("-g")) {
|
||||
file_t *f = load_file(&loaded_files, flag);
|
||||
if (f == NULL) {
|
||||
@ -231,8 +345,11 @@ int main(int argc, char *argv[])
|
||||
check(p, "Pattern failed to compile: %s", flag);
|
||||
defs = with_def(defs, arg_file, strlen("pattern"), "pattern", p);
|
||||
++npatterns;
|
||||
} else if (FLAG("--mode") || FLAG("-m")) {
|
||||
mode = flag;
|
||||
} else if (FLAG("--context") || FLAG("-c")) {
|
||||
if (streq(flag, "all"))
|
||||
context = ALL_CONTEXT;
|
||||
else
|
||||
context = (int)strtol(flag, NULL, 10);
|
||||
} else if (argv[i][0] == '-' && argv[i][1] && argv[i][1] != '-') { // single-char flags
|
||||
for (char *c = &argv[i][1]; *c; ++c) {
|
||||
switch (*c) {
|
||||
@ -240,7 +357,7 @@ int main(int argc, char *argv[])
|
||||
case 'v': flags |= BP_VERBOSE; break; // -v
|
||||
case 'e': flags |= BP_EXPLAIN; break; // -e
|
||||
case 'j': flags |= BP_JSON; break; // -j
|
||||
case 'I': flags |= BP_INPLACE; break; // -I
|
||||
case 'I': flags |= BP_INPLACE; context = ALL_CONTEXT; break; // -I
|
||||
case 'i': flags |= BP_IGNORECASE; break; // -i
|
||||
case 'l': flags |= BP_LISTFILES; break; // -l
|
||||
default:
|
||||
@ -268,24 +385,28 @@ int main(int argc, char *argv[])
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (isatty(STDOUT_FILENO)) {
|
||||
print_options |= PRINT_COLOR | PRINT_LINE_NUMBERS;
|
||||
if (context < 0) {
|
||||
if (context == USE_DEFAULT_CONTEXT) context = 1;
|
||||
else if (context != ALL_CONTEXT) context = 0;
|
||||
}
|
||||
|
||||
// Define an opcode that is just a reference to the overarching mode (e.g. find-all)
|
||||
if (lookup(defs, mode) == NULL) {
|
||||
printf("The mode '%s' is not defined.\n", mode);
|
||||
if (flags & BP_INPLACE && context != ALL_CONTEXT) {
|
||||
printf("--inplace and --context are mutually exclusive.\n"
|
||||
"Please drop one of the two arguments and try again.\n");
|
||||
return 1;
|
||||
}
|
||||
file_t *mode_file = spoof_file(&loaded_files, "<mode>", mode);
|
||||
vm_op_t *mode_op = bp_pattern(mode_file, mode_file->contents);
|
||||
|
||||
if (isatty(STDOUT_FILENO)) {
|
||||
print_color = 1;
|
||||
print_line_numbers = 1;
|
||||
}
|
||||
|
||||
int found = 0;
|
||||
if (flags & BP_JSON) printf("[");
|
||||
if (i < argc) {
|
||||
// Files pass in as command line args:
|
||||
for (int nfiles = 0; i < argc; nfiles++, i++) {
|
||||
found += process_file(defs, argv[i], mode_op, flags);
|
||||
found += process_file(defs, argv[i], pattern, context, flags);
|
||||
}
|
||||
} else if (isatty(STDIN_FILENO)) {
|
||||
// No files, no piped in input, so use * **/*:
|
||||
@ -293,12 +414,13 @@ int main(int argc, char *argv[])
|
||||
glob("*", 0, NULL, &globbuf);
|
||||
glob("**/*", GLOB_APPEND, NULL, &globbuf);
|
||||
for (size_t i = 0; i < globbuf.gl_pathc; i++) {
|
||||
found += process_file(defs, globbuf.gl_pathv[i], mode_op, flags);
|
||||
if (is_text_file(globbuf.gl_pathv[i]))
|
||||
found += process_file(defs, globbuf.gl_pathv[i], pattern, context, flags);
|
||||
}
|
||||
globfree(&globbuf);
|
||||
} else {
|
||||
// Piped in input:
|
||||
found += process_file(defs, NULL, mode_op, flags);
|
||||
found += process_file(defs, NULL, pattern, context, flags);
|
||||
}
|
||||
if (flags & BP_JSON) printf("]\n");
|
||||
|
||||
|
11
compiler.c
11
compiler.c
@ -465,17 +465,6 @@ static vm_op_t *_bp_simplepattern(file_t *f, const char *str)
|
||||
op->end = pat->end;
|
||||
return op;
|
||||
}
|
||||
// Hide
|
||||
case '~': {
|
||||
vm_op_t *pat = bp_simplepattern(f, str);
|
||||
if (!pat)
|
||||
file_err(f, str, str, "There should be a pattern after this '~'");
|
||||
vm_op_t *op = new_op(f, start, VM_HIDE);
|
||||
op->len = 0;
|
||||
op->args.pat = pat;
|
||||
op->end = pat->end;
|
||||
return op;
|
||||
}
|
||||
// Special rules:
|
||||
case '_': case '^': case '$': case '|': {
|
||||
const char *name = NULL;
|
||||
|
@ -11,7 +11,7 @@ String-pattern: ..$$ % (\n / Nodent / Escape / `\ pat [`;])
|
||||
|
||||
pat: simple-pat !(__("!="/"==")) / suffixed-pat
|
||||
simple-pat: Upto-and / Dot / String / Chars / Nodent / Escape-range
|
||||
/ Escape / Repeat / Optional / No / Hide / After / Before / Capture
|
||||
/ Escape / Repeat / Optional / No / After / Before / Capture
|
||||
/ Ref / parens
|
||||
|
||||
suffixed-pat: (
|
||||
@ -41,7 +41,6 @@ escape-sequence: (
|
||||
/ `x 2 `0-9,a-f,A-F
|
||||
)
|
||||
No: `! (__@pat / @!=(''=>"Expected a pattern after the exclamation mark"))
|
||||
Hide: `~ (__@pat / @!=(''=>"Expected a pattern after the tilde"))
|
||||
Nodent: `\ `N
|
||||
Upto-and: ".." [__@first=simple-pat] [__`%__@second=simple-pat]
|
||||
Repeat: (
|
||||
|
@ -9,26 +9,6 @@ is-text-file: >32(\t/\n/\r/\x20-x7e/!\x00-x7f utf8-codepoint/$$)
|
||||
# Meta-rules for acting on everything:
|
||||
pattern: !'' # Not defined by default
|
||||
replacement: !'' # Not defined by default
|
||||
replace-all: (
|
||||
(include-binary-files / is-text-file)
|
||||
+(..replacement%\n) ..%\n
|
||||
)
|
||||
find-all: (
|
||||
(include-binary-files / is-text-file)
|
||||
*(!..pattern ~(..\n))
|
||||
+(+(..@pattern) ..(\n/$$) / ~(..\n))
|
||||
[!<\n => "\n"]
|
||||
)
|
||||
find-lines: (
|
||||
(include-binary-files / is-text-file)
|
||||
*(!(pattern$) ~(..\n))
|
||||
+(@pattern (\n/$$) / ~(..\n))
|
||||
[!<\n => "\n"]
|
||||
)
|
||||
only-matches: (
|
||||
(include-binary-files / is-text-file)
|
||||
+(..@pattern%\n =>'@1\n')
|
||||
)
|
||||
|
||||
# Helper definitions (commonly used)
|
||||
url: (
|
||||
|
237
printing.c
237
printing.c
@ -16,21 +16,15 @@ typedef struct match_node_s {
|
||||
struct match_node_s *next;
|
||||
} match_node_t;
|
||||
|
||||
typedef struct {
|
||||
size_t line, printed_line;
|
||||
const char *color;
|
||||
} print_state_t;
|
||||
static const char *color_hl = "\033[0;31;1m";
|
||||
static const char *color_normal = "\033[0m";
|
||||
|
||||
__attribute__((nonnull, pure))
|
||||
static int height_of_match(match_t *m);
|
||||
__attribute__((nonnull))
|
||||
static void _visualize_matches(match_node_t *firstmatch, int depth, const char *text, size_t textlen);
|
||||
__attribute__((nonnull))
|
||||
static void _visualize_patterns(match_t *m);
|
||||
__attribute__((nonnull))
|
||||
static void print_line_number(FILE *out, print_state_t *state, print_options_t options);
|
||||
__attribute__((nonnull))
|
||||
static void _print_match(FILE *out, file_t *f, match_t *m, print_state_t *state, print_options_t options);
|
||||
__attribute__((nonnull(1,2)))
|
||||
static inline void print_line_number(FILE *out, printer_t *pr, size_t line_number, const char *color);
|
||||
|
||||
//
|
||||
// Return the height of a match object (i.e. the number of descendents of the
|
||||
@ -170,88 +164,139 @@ static void _visualize_matches(match_node_t *firstmatch, int depth, const char *
|
||||
}
|
||||
|
||||
//
|
||||
// Recursively look for references to a rule called "pattern" and print an
|
||||
// explanation for each one.
|
||||
// Print a visualization of a match object.
|
||||
//
|
||||
static void _visualize_patterns(match_t *m)
|
||||
void visualize_match(match_t *m)
|
||||
{
|
||||
if (m->op->type == VM_REF && streq(m->op->args.s, "pattern")) {
|
||||
m = m->child;
|
||||
match_node_t first = {.m = m};
|
||||
_visualize_matches(&first, 0, m->start, (size_t)(m->end - m->start));
|
||||
printf("\033[?7l"); // Disable line wrapping
|
||||
match_node_t first = {.m = m};
|
||||
_visualize_matches(&first, 0, m->start, (size_t)(m->end - m->start));
|
||||
printf("\033[?7h"); // Re-enable line wrapping
|
||||
}
|
||||
|
||||
//
|
||||
// Print a line number, if it needs to be printed.
|
||||
// line number of 0 means "just print an empty space for the number"
|
||||
//
|
||||
__attribute__((nonnull(1,2)))
|
||||
static inline void print_line_number(FILE *out, printer_t *pr, size_t line_number, const char *color)
|
||||
{
|
||||
if (!pr->print_line_numbers) return;
|
||||
if (!pr->needs_line_number) return;
|
||||
if (line_number == 0) {
|
||||
if (color) fprintf(out, "\033[0;2m \033(0\x78\033(B%s", color);
|
||||
else fprintf(out, " |");
|
||||
} else {
|
||||
for (match_t *c = m->child; c; c = c->nextsibling)
|
||||
_visualize_patterns(c);
|
||||
if (color) fprintf(out, "\033[0;2m% 5ld\033(0\x78\033(B%s", line_number, color);
|
||||
else fprintf(out, "% 5ld|", line_number);
|
||||
}
|
||||
pr->needs_line_number = 0;
|
||||
}
|
||||
|
||||
//
|
||||
// Print a range of text from a file, adding line numbers if necessary.
|
||||
//
|
||||
__attribute__((nonnull(1,2,3,4)))
|
||||
static void print_between(FILE *out, printer_t *pr, const char *start, const char *end, const char *color)
|
||||
{
|
||||
file_t *f = pr->file;
|
||||
while (start < end) {
|
||||
size_t line_num = get_line_number(f, start);
|
||||
print_line_number(out, pr, line_num, color);
|
||||
const char *eol = get_line(pr->file, line_num + 1);
|
||||
if (!eol || eol > end) eol = end;
|
||||
if (color) fprintf(out, "%s", color);
|
||||
fprintf(out, "%.*s", (int)(eol - start), start);
|
||||
if (eol[-1] == '\n')
|
||||
pr->needs_line_number = 1;
|
||||
start = eol;
|
||||
}
|
||||
pr->pos = end;
|
||||
}
|
||||
|
||||
//
|
||||
// Return a pointer to the first character of context information before `pos`,
|
||||
// according to the context settings in `pr`
|
||||
//
|
||||
static const char *context_before(printer_t *pr, const char *pos)
|
||||
{
|
||||
if (pr->context_lines == -1) {
|
||||
return pr->pos;
|
||||
} else if (pr->context_lines > 0) {
|
||||
size_t n = get_line_number(pr->file, pos);
|
||||
if (n >= (size_t)((pr->context_lines - 1) + 1))
|
||||
n -= (size_t)(pr->context_lines - 1);
|
||||
else
|
||||
n = 1;
|
||||
const char *sol = get_line(pr->file, n);
|
||||
if (sol == NULL || sol < pr->pos) sol = pr->pos;
|
||||
return sol;
|
||||
} else {
|
||||
return pos;
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// For a match object, print a visual explanation for each "pattern" matched
|
||||
// inside it.
|
||||
// Return a pointer to the last character of context information after `pos`,
|
||||
// according to the context settings in `pr`
|
||||
//
|
||||
void visualize_match(match_t *m)
|
||||
static const char *context_after(printer_t *pr, const char *pos)
|
||||
{
|
||||
printf("\033[?7l");
|
||||
_visualize_patterns(m);
|
||||
printf("\033[?7h");
|
||||
if (pr->context_lines == -1) {
|
||||
return pr->file->end;
|
||||
} else if (pr->context_lines > 0) {
|
||||
size_t n = get_line_number(pr->file, pos) + (size_t)(pr->context_lines - 1);
|
||||
const char *eol = get_line(pr->file, n+1);
|
||||
return eol ? eol : pr->file->end;
|
||||
} else {
|
||||
return pos;
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// Print a line number.
|
||||
// Print the text of a match (no context).
|
||||
//
|
||||
static void print_line_number(FILE *out, print_state_t *state, print_options_t options)
|
||||
void _print_match(FILE *out, printer_t *pr, match_t *m)
|
||||
{
|
||||
state->printed_line = state->line;
|
||||
if (!(options & PRINT_LINE_NUMBERS)) return;
|
||||
if (options & PRINT_COLOR)
|
||||
fprintf(out, "\033[0;2m% 5ld\033(0\x78\033(B%s", state->line, state->color);
|
||||
else
|
||||
fprintf(out, "% 5ld|", state->line);
|
||||
}
|
||||
pr->pos = m->start;
|
||||
if (m->op->type == VM_REPLACE) {
|
||||
size_t line_start = get_line_number(pr->file, m->start);
|
||||
size_t line_end = get_line_number(pr->file, m->end);
|
||||
size_t line = line_start;
|
||||
|
||||
//
|
||||
// Helper function for print_match(), using a struct to keep track of some state.
|
||||
//
|
||||
static void _print_match(FILE *out, file_t *f, match_t *m, print_state_t *state, print_options_t options)
|
||||
{
|
||||
static const char *hl = "\033[0;31;1m";
|
||||
const char *old_color = state->color;
|
||||
if (m->op->type == VM_HIDE) {
|
||||
// TODO: handle replacements?
|
||||
for (const char *p = m->start; p < m->end; p++) {
|
||||
if (*p == '\n') ++state->line;
|
||||
}
|
||||
} else if (m->op->type == VM_REPLACE) {
|
||||
if (options & PRINT_COLOR && state->color != hl) {
|
||||
state->color = hl;
|
||||
fprintf(out, "%s", state->color);
|
||||
}
|
||||
if (pr->use_color) printf("%s", color_hl);
|
||||
const char *text = m->op->args.replace.text;
|
||||
const char *end = &text[m->op->args.replace.len];
|
||||
|
||||
// TODO: clean up the line numbering code
|
||||
for (const char *r = text; r < end; ) {
|
||||
print_line_number(out, pr, line > line_end ? 0 : line, pr->use_color ? color_hl : NULL);
|
||||
|
||||
// Capture substitution
|
||||
if (*r == '@' && r[1] && r[1] != '@') {
|
||||
++r;
|
||||
match_t *cap = get_capture(m, &r);
|
||||
if (cap != NULL) {
|
||||
_print_match(out, f, cap, state, options);
|
||||
print_match(out, pr, cap);
|
||||
continue;
|
||||
} else {
|
||||
--r;
|
||||
}
|
||||
}
|
||||
|
||||
if (state->printed_line != state->line)
|
||||
print_line_number(out, state, options);
|
||||
|
||||
if (*r == '\\') {
|
||||
++r;
|
||||
unsigned char c = unescapechar(r, &r);
|
||||
fputc(c, out);
|
||||
if (c == '\n') ++state->line;
|
||||
if (c == '\n') {
|
||||
++line;
|
||||
pr->needs_line_number = 1;
|
||||
}
|
||||
continue;
|
||||
} else if (*r == '\n') {
|
||||
fputc('\n', out);
|
||||
++state->line;
|
||||
++line;
|
||||
pr->needs_line_number = 1;
|
||||
++r;
|
||||
continue;
|
||||
} else {
|
||||
@ -260,70 +305,74 @@ static void _print_match(FILE *out, file_t *f, match_t *m, print_state_t *state,
|
||||
continue;
|
||||
}
|
||||
}
|
||||
print_line_number(out, pr, line > line_end ? 0 : line, pr->use_color ? color_hl : NULL);
|
||||
} else {
|
||||
if (m->op->type == VM_CAPTURE) {
|
||||
if (options & PRINT_COLOR && state->color != hl) {
|
||||
state->color = hl;
|
||||
fprintf(out, "%s", state->color);
|
||||
}
|
||||
}
|
||||
|
||||
const char *prev = m->start;
|
||||
for (match_t *child = m->child; child; child = child->nextsibling) {
|
||||
// Skip children from e.g. zero-width matches like >@foo
|
||||
if (!(prev <= child->start && child->start <= m->end &&
|
||||
prev <= child->end && child->end <= m->end))
|
||||
continue;
|
||||
if (child->start > prev) {
|
||||
for (const char *p = prev; p < child->start; ++p) {
|
||||
if (state->printed_line != state->line)
|
||||
print_line_number(out, state, options);
|
||||
fputc(*p, out);
|
||||
if (*p == '\n') ++state->line;
|
||||
}
|
||||
}
|
||||
_print_match(out, f, child, state, options);
|
||||
if (child->start > prev)
|
||||
print_between(out, pr, prev, child->start, pr->use_color ? color_hl : NULL);
|
||||
print_match(out, pr, child);
|
||||
prev = child->end;
|
||||
}
|
||||
if (m->end > prev) {
|
||||
for (const char *p = prev; p < m->end; ++p) {
|
||||
if (state->printed_line != state->line)
|
||||
print_line_number(out, state, options);
|
||||
fputc(*p, out);
|
||||
if (*p == '\n') ++state->line;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (options & PRINT_COLOR && old_color != state->color) {
|
||||
fprintf(out, "%s", old_color);
|
||||
state->color = old_color;
|
||||
if (m->end > prev)
|
||||
print_between(out, pr, prev, m->end, pr->use_color ? color_hl : NULL);
|
||||
}
|
||||
pr->pos = m->end;
|
||||
}
|
||||
|
||||
//
|
||||
// Print a match with replacements and highlighting.
|
||||
// Print the text of a match and any context.
|
||||
//
|
||||
void print_match(FILE *out, file_t *f, match_t *m, print_options_t options)
|
||||
void print_match(FILE *out, printer_t *pr, match_t *m)
|
||||
{
|
||||
print_state_t state = {.line = 1, .color = "\033[0m"};
|
||||
_print_match(out, f, m, &state, options);
|
||||
int first = (pr->pos == NULL);
|
||||
if (first) { // First match printed:
|
||||
pr->pos = pr->file->contents;
|
||||
pr->needs_line_number = 1;
|
||||
}
|
||||
if (m) {
|
||||
const char *before_m = context_before(pr, m->start);
|
||||
if (!first) {
|
||||
const char *after_last = context_after(pr, pr->pos);
|
||||
if (after_last >= before_m) {
|
||||
// Overlapping ranges:
|
||||
before_m = pr->pos;
|
||||
} else {
|
||||
// Non-overlapping ranges:
|
||||
print_between(out, pr, pr->pos, after_last, pr->use_color ? color_normal : NULL);
|
||||
if (pr->context_lines > 1)
|
||||
printf("\n"); // Gap between chunks
|
||||
}
|
||||
}
|
||||
print_between(out, pr, before_m, m->start, pr->use_color ? color_normal : NULL);
|
||||
_print_match(out, pr, m);
|
||||
if (pr->use_color) printf("%s", color_normal);
|
||||
} else {
|
||||
// After the last match is printed, print the trailing context:
|
||||
const char *after_last = context_after(pr, pr->pos);
|
||||
print_between(out, pr, pr->pos, after_last, pr->use_color ? color_normal : NULL);
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// Print any errors that are present in the given match object.
|
||||
//
|
||||
int print_errors(file_t *f, match_t *m, print_options_t options)
|
||||
int print_errors(printer_t *pr, match_t *m)
|
||||
{
|
||||
int ret = 0;
|
||||
if (m->op->type == VM_CAPTURE && m->op->args.capture.name && streq(m->op->args.capture.name, "!")) {
|
||||
printf("\033[31;1m");
|
||||
print_match(stdout, f, m, options);
|
||||
print_match(stdout, pr, m);
|
||||
printf("\033[0m\n");
|
||||
fprint_line(stdout, f, m->start, m->end, " ");
|
||||
fprint_line(stdout, pr->file, m->start, m->end, " ");
|
||||
return 1;
|
||||
}
|
||||
if (m->child) ret += print_errors(f, m->child, options);
|
||||
if (m->nextsibling) ret += print_errors(f, m->nextsibling, options);
|
||||
if (m->child) ret += print_errors(pr, m->child);
|
||||
if (m->nextsibling) ret += print_errors(pr, m->nextsibling);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
18
printing.h
18
printing.h
@ -6,17 +6,21 @@
|
||||
|
||||
#include "types.h"
|
||||
|
||||
typedef enum {
|
||||
PRINT_COLOR = 1<<0,
|
||||
PRINT_LINE_NUMBERS = 1<<1,
|
||||
} print_options_t;
|
||||
typedef struct {
|
||||
file_t *file;
|
||||
const char *pos;
|
||||
int context_lines;
|
||||
unsigned int needs_line_number:1;
|
||||
unsigned int use_color:1;
|
||||
unsigned int print_line_numbers:1;
|
||||
} printer_t;
|
||||
|
||||
__attribute__((nonnull))
|
||||
void visualize_match(match_t *m);
|
||||
__attribute__((nonnull(1,2)))
|
||||
void print_match(FILE *out, printer_t *pr, match_t *m);
|
||||
__attribute__((nonnull))
|
||||
void print_match(FILE *out, file_t *f, match_t *m, print_options_t options);
|
||||
__attribute__((nonnull))
|
||||
int print_errors(file_t *f, match_t *m, print_options_t options);
|
||||
int print_errors(printer_t *pr, match_t *m);
|
||||
|
||||
#endif
|
||||
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1
|
||||
|
1
types.h
1
types.h
@ -30,7 +30,6 @@ enum VMOpcode {
|
||||
VM_BEFORE,
|
||||
VM_AFTER,
|
||||
VM_CAPTURE,
|
||||
VM_HIDE,
|
||||
VM_OTHERWISE,
|
||||
VM_CHAIN,
|
||||
VM_EQUAL,
|
||||
|
29
vm.c
29
vm.c
@ -121,6 +121,25 @@ static const char *match_backref(const char *str, vm_op_t *op, match_t *cap, uns
|
||||
}
|
||||
|
||||
|
||||
//
|
||||
// Find the next match after prev (or the first match if prev is NULL)
|
||||
//
|
||||
match_t *next_match(def_t *defs, file_t *f, match_t *prev, vm_op_t *op, unsigned int flags)
|
||||
{
|
||||
const char *str;
|
||||
if (prev) {
|
||||
str = prev->end > prev->start ? prev->end : prev->end + 1;
|
||||
recycle_if_unused(&prev);
|
||||
} else {
|
||||
str = f->contents;
|
||||
}
|
||||
for (; str < f->end; ++str) {
|
||||
match_t *m = match(defs, f, str, op, flags);
|
||||
if (m) return m;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
//
|
||||
// Run virtual machine operation against a string and return
|
||||
// a match struct, or NULL if no match is found.
|
||||
@ -316,16 +335,6 @@ match_t *match(def_t *defs, file_t *f, const char *str, vm_op_t *op, unsigned in
|
||||
ADD_OWNER(m->child, p);
|
||||
return m;
|
||||
}
|
||||
case VM_HIDE: {
|
||||
match_t *p = match(defs, f, str, op->args.pat, flags);
|
||||
if (p == NULL) return NULL;
|
||||
match_t *m = new_match();
|
||||
m->start = str;
|
||||
m->end = p->end;
|
||||
m->op = op;
|
||||
ADD_OWNER(m->child, p);
|
||||
return m;
|
||||
}
|
||||
case VM_OTHERWISE: {
|
||||
match_t *m = match(defs, f, str, op->args.multiple.first, flags);
|
||||
if (m == NULL) m = match(defs, f, str, op->args.multiple.second, flags);
|
||||
|
2
vm.h
2
vm.h
@ -8,6 +8,8 @@
|
||||
|
||||
#include "types.h"
|
||||
|
||||
__attribute__((nonnull(2,4)))
|
||||
match_t *next_match(def_t *defs, file_t *f, match_t *prev, vm_op_t *op, unsigned int flags);
|
||||
__attribute__((hot, nonnull(2,3,4)))
|
||||
match_t *match(def_t *defs, file_t *f, const char *str, vm_op_t *op, unsigned int flags);
|
||||
__attribute__((nonnull))
|
||||
|
Loading…
Reference in New Issue
Block a user