diff options
| -rw-r--r-- | Makefile | 12 | ||||
| -rw-r--r-- | README.md | 12 | ||||
| -rw-r--r-- | bp.1 (renamed from bpeg.1) | 34 | ||||
| -rw-r--r-- | bpeg.c | 71 | ||||
| -rw-r--r-- | compiler.c | 13 | ||||
| -rw-r--r-- | grammars/bpeg.bp (renamed from grammars/bpeg.bpeg) | 0 | ||||
| -rw-r--r-- | grammars/builtins.bp (renamed from grammars/builtins.bpeg) | 23 | ||||
| -rw-r--r-- | grammars/html.bp (renamed from grammars/html.bpeg) | 0 | ||||
| -rw-r--r-- | grammars/utf8-id.bp (renamed from grammars/utf8-id.bpeg) | 0 | ||||
| -rw-r--r-- | types.h | 2 | ||||
| -rw-r--r-- | viz.c | 158 | ||||
| -rw-r--r-- | viz.h | 10 | ||||
| -rw-r--r-- | vm.c | 201 | ||||
| -rw-r--r-- | vm.h | 9 |
14 files changed, 449 insertions, 96 deletions
@@ -1,4 +1,4 @@ -NAME=bpeg +NAME=bp CC ?= gcc PREFIX=/usr/local CFLAGS=-std=c99 -D_XOPEN_SOURCE=500 -D_GNU_SOURCE -D_POSIX_C_SOURCE=200809L @@ -7,7 +7,7 @@ CWARN=-Wall -Wpedantic -Wextra -Wno-unknown-pragmas -Wno-missing-field-initializ G ?= O ?= -O3 -CFILES=compiler.c grammar.c utils.c vm.c file_loader.c +CFILES=compiler.c grammar.c utils.c vm.c file_loader.c viz.c OBJFILES=$(CFILES:.c=.o) all: $(NAME) @@ -15,7 +15,7 @@ all: $(NAME) .c.o: $(CC) -c $(CFLAGS) $(CWARN) $(G) $(O) -o $@ $< -$(NAME): $(OBJFILES) $(NAME).c +$(NAME): $(OBJFILES) bpeg.c $(CC) $(CFLAGS) $(CWARN) $(G) $(O) -o $@ $^ clean: @@ -29,8 +29,8 @@ install: $(NAME) fi; \ [ ! "$$prefix" ] && prefix="/usr/local"; \ [ ! "$$sysconfdir" ] && sysconfdir=/etc; \ - mkdir -pv -m 755 "$$prefix/share/man/man1" "$$prefix/bin" "$$sysconfdir/xdg/bpeg" \ - && cp -rv grammars/* "$$sysconfdir/xdg/bpeg/" \ + mkdir -pv -m 755 "$$prefix/share/man/man1" "$$prefix/bin" "$$sysconfdir/xdg/bp" \ + && cp -rv grammars/* "$$sysconfdir/xdg/bp/" \ && cp -v $(NAME).1 "$$prefix/share/man/man1/" \ && rm -f "$$prefix/bin/$(NAME)" \ && cp -v $(NAME) "$$prefix/bin/" @@ -44,7 +44,7 @@ uninstall: [ ! "$$prefix" ] && prefix="/usr/local"; \ [ ! "$$sysconfdir" ] && sysconfdir=/etc; \ echo "Deleting..."; \ - rm -rvf "$$prefix/bin/$(NAME)" "$$prefix/share/man/man1/$(NAME).1" "$$sysconfdir/xdg/bpeg"; \ + rm -rvf "$$prefix/bin/$(NAME)" "$$prefix/share/man/man1/$(NAME).1" "$$sysconfdir/xdg/bp"; \ printf "\033[1mIf you created any config files in ~/.config/$(NAME), you may want to delete them manually.\033[0m\n" .PHONY: all, clean, install, uninstall @@ -4,7 +4,7 @@ BPEG is a parsing expression grammar tool for the command line. It's written in pure C with no dependencies. ## Usage -`bpeg [flags] <pattern> [<input files>...]` +`bp [flags] <pattern> [<input files>...]` ### Flags * `-h` `--help` print the usage and quit @@ -12,13 +12,13 @@ It's written in pure C with no dependencies. * `-i` `--ignore-case` perform a case-insensitive match * `-d` `--define <name>:<def>` define a grammar rule * `-D` `--define-string <name>:<def>` define a grammar rule (string-pattern) -* `-p` `--pattern <pat>` provide a pattern (equivalent to bpeg ' -* `-P` `--pattern-string <pat>` provide a string pattern (equivalent to bpeg '<pat>', but may be useful if '<pat>' begins with a '-') +* `-p` `--pattern <pat>` provide a pattern (equivalent to `bp '\(<pat>)'`) +* `-P` `--pattern-string <pat>` provide a string pattern (equivalent to `bp '<pat>'`, but may be useful if `'<pat>'` begins with a '-') * `-r` `--replace <replacement>` replace the input pattern with the given replacement -* `-m` `--mode <mode>` set the behavior mode (defult: find-all) +* `-m` `--mode <mode>` set the behavior mode (defult: `find-all`) * `-g` `--grammar <grammar file>` use the specified file as a grammar -See `man ./bpeg.1` for more details. +See `man ./bp.1` for more details. ## BPEG Patterns BPEG patterns are a mixture of Parsing Expression Grammar and Regular @@ -63,7 +63,7 @@ Pattern | Meaning `#( block comment )#` | A block comment `# line comment` | A line comment -See `man ./bpeg.1` for more details. +See `man ./bp.1` for more details. ## License BPEG is provided under the MIT license with the [Commons Clause](https://commonsclause.com/) @@ -1,10 +1,10 @@ -.\" Manpage for bpeg. +.\" Manpage for bp. .\" Contact bruce@bruce-hill.com to correct errors or typos. -.TH man 1 "Sep 12, 2020" "0.1" "bpeg manual page" +.TH man 1 "Sep 12, 2020" "0.1" "bp manual page" .SH NAME -bpeg \- Bruce's Parsing Expression Grammar tool +bp \- Bruce's Parsing Expression Grammar tool .SH SYNOPSIS -.B bpeg +.B bp [\fI-h\fR|\fI--help\fR] [\fI-v\fR|\fI--verbose\fR] [\fI-i\fR|\fI--ignore-case\fR \fI<pattern>\fR] @@ -18,7 +18,7 @@ bpeg \- Bruce's Parsing Expression Grammar tool \fI<pattern\fR [[--] \fI<input files...>\fR] .SH DESCRIPTION -\fBbpeg\fR is a tool that matches parsing expression grammars using a custom syntax. +\fBbp\fR is a tool that matches parsing expression grammars using a custom syntax. .SH OPTIONS .B \-v\fR, \fB--verbose Print debugging information. @@ -27,10 +27,10 @@ Print debugging information. Perform pattern matching case-insensitively. .B \-d\fR, \fB--define \fI<name>\fR:\fI<pattern>\fR -Define a grammar rule using a bpeg pattern. +Define a grammar rule using a bp pattern. .B \-D\fR, \fB--define-string \fI<name>\fR:\fI<string-pattern>\fR -Define a grammar rule using a bpeg string pattern. +Define a grammar rule using a bp string pattern. .B \-r\fR, \fB--replace \fI<replacement>\fR Replace all occurrences of the main pattern with the given string. @@ -47,17 +47,17 @@ The mode to operate in. Options are: \fIfind-all\fR (the default), Print the usage and exit. .B <string-pattern> -The main pattern for bpeg to match. By default, this pattern is a string +The main pattern for bp to match. By default, this pattern is a string pattern (see the \fBSTRING PATTERNS\fR section below). .B <input files...> The input files to search. If no input files are provided and data was piped in, that data will be used instead. If neither are provided, -\fBbpeg\fR will search through all files in the current directory and +\fBbp\fR will search through all files in the current directory and its subdirectories (recursively). .SH PATTERNS -Bpeg patterns are based off of a combination of Parsing Expression Grammars +bp patterns are based off of a combination of Parsing Expression Grammars and regular expression syntax. The syntax is designed to map closely to verbal descriptions of the patterns, and prefix operators are preferred over suffix operators (as is common in regex syntax). @@ -175,32 +175,32 @@ A line comment .SH STRING PATTERNS One of the most common use cases for pattern matching tools is matching plain, literal strings, or strings that are primarily plain strings, with one or two -patterns. \fBbpeg\fR is designed around this fact. The default mode for bpeg +patterns. \fBbp\fR is designed around this fact. The default mode for bp patterns is "string pattern mode". In string pattern mode, all characters are interpreted literally except for the backslash (\fB\\\fR), which may be -followed by a bpeg pattern (see the \fBPATTERNS\fR section above). Optionally, -the bpeg pattern may be terminated by a semicolon (\fB;\fR). +followed by a bp pattern (see the \fBPATTERNS\fR section above). Optionally, +the bp pattern may be terminated by a semicolon (\fB;\fR). .SH EXAMPLES .TP .B -ls | bpeg foo +ls | bp foo Find files containing the string "foo" (a string pattern) .TP .B -ls | bpeg '.c\\$' -r '.h' +ls | bp '.c\\$' -r '.h' Find files ending with ".c" and replace the extension with ".h" .TP .B -bpeg -p '"foobar"==id parens' my_file.py +bp -p '"foobar"==id parens' my_file.py Find the literal string \fB"foobar"\fR, assuming it's a complete identifier, followed by a pair of matching parentheses in the file \fImy_file.py\fR .TP .B -bpeg -g html -p html-element -D matching-tag=a foo.html +bp -g html -p html-element -D matching-tag=a foo.html Using the \fIhtml\fR grammar, find all \fIhtml-element\fRs matching the tag \fIa\fR in the file \fIfoo.html\fR @@ -1,7 +1,7 @@ /* * bpeg.c - Source code for the bpeg parser * - * See `man ./bpeg.1` for more details + * See `man ./bp.1` for more details */ #include <fcntl.h> #include <glob.h> @@ -15,24 +15,28 @@ #include "file_loader.h" #include "grammar.h" #include "utils.h" +#include "viz.h" #include "vm.h" static const char *usage = ( - "BPEG - a Parsing Expression Grammar command line tool\n\n" + "BP - a Parsing Expression Grammar command line tool\n\n" "Usage:\n" - " bpeg [flags] <pattern> [<input files>...]\n\n" + " bp [flags] <pattern> [<input files>...]\n\n" "Flags:\n" " -h --help print the usage and quit\n" " -v --verbose print verbose debugging info\n" + " -e --explain explain the matches\n" " -i --ignore-case preform matching case-insensitively\n" " -d --define <name>:<def> define a grammar rule\n" " -D --define-string <name>:<def> define a grammar rule (string-pattern)\n" - " -p --pattern <pat> provide a pattern (equivalent to bpeg '\\(<pat>)')\n" + " -p --pattern <pat> provide a pattern (equivalent to bp '\\(<pat>)')\n" " -P --pattern-string <pat> provide a string pattern (may be useful if '<pat>' begins with a '-')\n" " -r --replace <replacement> replace the input pattern with the given replacement\n" " -m --mode <mode> set the behavior mode (defult: find-all)\n" " -g --grammar <grammar file> use the specified file as a grammar\n"); +static print_options_t print_options = 0; + static char *getflag(const char *flag, char *argv[], int *i) { size_t n = strlen(flag); @@ -54,7 +58,7 @@ static int print_errors(file_t *f, match_t *m) int ret = 0; if (m->op->op == VM_CAPTURE && m->value.name && streq(m->value.name, "!")) { printf("\033[31;1m"); - print_match(f, m); + print_match(f, m, print_options); printf("\033[0m\n"); fprint_line(stdout, f, m->start, m->end, " "); return 1; @@ -66,13 +70,49 @@ static int print_errors(file_t *f, match_t *m) static int run_match(grammar_t *g, const char *filename, vm_op_t *pattern, unsigned int flags) { + static int printed_matches = 0; file_t *f = load_file(filename); check(f, "Could not open file: %s", filename); match_t *m = match(g, f, f->contents, pattern, flags); if (m && print_errors(f, m) > 0) _exit(1); if (m != NULL && m->end > m->start + 1) { - print_match(f, m); + ++printed_matches; + + if (flags & BPEG_EXPLAIN) { + if (filename) { + printf("\033[1;4m%s:\033[0m\n", filename); + } + /* + if (printed_matches > 1) + fprintf(stdout, ",\n"); + printf("{\"filename\":\"%s\",\"text\":\"", filename ? filename : "-"); + for (char *c = f->contents; c < f->end; c++) { + switch (*c) { + case '"': printf("\\\""); break; + case '\n': printf("\\n"); break; + case '\t': printf("\\t"); break; + case '\\': printf("\\\\"); break; + default: printf("%c", *c); break; + } + } + printf("\",\n\"tree\":{\"type\":\"text\",\"start\":%d,\"end\":%ld,\"children\":[", + 0, f->end - f->contents); + json_match(stdout, f->contents, m); + printf("]}}\n"); + */ + visualize_match(m); + } else { + if (printed_matches > 1) + fputc('\n', stdout); + if (filename) { + if (print_options & PRINT_COLOR) + printf("\033[1;4;33m%s:\033[0m\n", filename); + else + printf("%s:\n", filename); + } + print_match(f, m, print_options); + } destroy_file(&f); return 0; } else { @@ -93,9 +133,9 @@ int main(int argc, char *argv[]) grammar_t *g = new_grammar(); // Load builtins: - if (access("/etc/xdg/bpeg/builtins.bpeg", R_OK) != -1) - load_grammar(g, load_file("/etc/xdg/bpeg/builtins.bpeg")); // Keep in memory for debugging output - sprintf(path, "%s/.config/bpeg/builtins.bpeg", getenv("HOME")); + if (access("/etc/xdg/bp/builtins.bp", R_OK) != -1) + load_grammar(g, load_file("/etc/xdg/bp/builtins.bp")); // Keep in memory for debugging output + sprintf(path, "%s/.config/bp/builtins.bp", getenv("HOME")); if (access(path, R_OK) != -1) load_grammar(g, load_file(path)); // Keep in memory for debugging output @@ -110,6 +150,8 @@ int main(int argc, char *argv[]) return 0; } else if (streq(argv[i], "--verbose") || streq(argv[i], "-v")) { flags |= BPEG_VERBOSE; + } else if (streq(argv[i], "--explain") || streq(argv[i], "-e")) { + flags |= BPEG_EXPLAIN; } else if (streq(argv[i], "--ignore-case") || streq(argv[i], "-i")) { flags |= BPEG_IGNORECASE; } else if (FLAG("--replace") || FLAG("-r")) { @@ -122,11 +164,11 @@ int main(int argc, char *argv[]) } else if (FLAG("--grammar") || FLAG("-g")) { file_t *f = load_file(flag); if (f == NULL) { - sprintf(path, "%s/.config/bpeg/%s.bpeg", getenv("HOME"), flag); + sprintf(path, "%s/.config/bp/%s.bp", getenv("HOME"), flag); f = load_file(path); } if (f == NULL) { - sprintf(path, "/etc/xdg/bpeg/%s.bpeg", flag); + sprintf(path, "/etc/xdg/bp/%s.bp", flag); f = load_file(path); } check(f != NULL, "Couldn't find grammar: %s", flag); @@ -180,11 +222,7 @@ int main(int argc, char *argv[]) } if (isatty(STDOUT_FILENO)) { - char *epsilon = "''"; - file_t *is_tty_file = spoof_file("<is-tty>", epsilon); - vm_op_t *p = bpeg_pattern(is_tty_file, epsilon); - check(p, "Failed to compile is-tty"); - add_def(g, is_tty_file, epsilon, "is-tty", p); + print_options |= PRINT_COLOR | PRINT_LINE_NUMBERS; } vm_op_t *pattern = lookup(g, rule); @@ -210,7 +248,6 @@ int main(int argc, char *argv[]) ret &= run_match(g, NULL, pattern, flags); } - return ret; } @@ -281,6 +281,8 @@ vm_op_t *bpeg_simplepattern(file_t *f, const char *str) str = after_spaces(str); if (!matchchar(&str, ')')) file_err(f, origin, str, "This parenthesis group isn't properly closed."); + op->start = origin; + op->end = str; break; } // Square brackets @@ -330,6 +332,17 @@ vm_op_t *bpeg_simplepattern(file_t *f, const char *str) op->len = pat->len; break; } + // Hide + case '~': { + vm_op_t *pat = bpeg_simplepattern(f, str); + if (!pat) + file_err(f, str, str, "There should be a pattern after this '~'"); + str = pat->end; + op->op = VM_HIDE; + op->len = 0; + op->args.pat = pat; + break; + } // Replacement case '{': { str = after_spaces(str); diff --git a/grammars/bpeg.bpeg b/grammars/bpeg.bp index 288ceee..288ceee 100644 --- a/grammars/bpeg.bpeg +++ b/grammars/bpeg.bp diff --git a/grammars/builtins.bpeg b/grammars/builtins.bp index 697bf27..7de936f 100644 --- a/grammars/builtins.bpeg +++ b/grammars/builtins.bp @@ -3,9 +3,6 @@ no: !'' # Configurable options: is-tty: no # Defined as either always-match or always-fail, depending on stdout -print-line-numbers: is-tty -print-filenames: is-tty -highlight: is-tty include-binary-files: no; is-text-file: ^^ >32(\t/\n/\r/\x20-x7e/!\x00-x7f utf8-codepoint/$$) @@ -14,29 +11,18 @@ pattern: !'' # Not defined by default replacement: !'' # Not defined by default replace-all: ( (include-binary-files / is-text-file) - define-highlights - add-filename - *(...(>pattern hl-replacement)) ... + +(...(>pattern replacement)) ... ) find-all: ( (include-binary-files / is-text-file) - define-highlights - add-filename - *(!..pattern {..\n=>}) - +(>..pattern add-line-number +(..hl-pattern) ..(\n/$$) / {..\n=>}) + *(!..pattern ~(..\n)) + +(+(..@pattern) ..(\n/$$) / ~(..\n)) [{!<\n => "\n"}] ) only-matches: ( (include-binary-files / is-text-file) - define-highlights - add-filename - +{...@hl-pattern =>'@1\n'} + +{...@pattern =>'@1\n'} ) -add-filename: [print-filenames (is-tty {=>"\033[33;1;4m@&:\033[0m\n"} / {=>"@&:\n"})] -add-line-number: [print-line-numbers (is-tty {=>"\033[2m@#\033[5G|\033[0m "} / {=>"@#| "})] -hl-pattern: {@match=pattern define-highlights => "@hl-start;@match;@hl-end;"} -hl-replacement: {@match=replacement define-highlights => "@hl-start;@match;@hl-end;" } -define-highlights: highlight @hl-start={=>"\033[31;1m"} @hl-end={=>"\033[0m"} / @hl-start="" @hl-end="" # Helper definitions (commonly used) url: ( @@ -70,6 +56,7 @@ parens: `( *(parens / $. != `)) `) id: !<(`a-z/`A-Z/`_/`0-9) (`a-z/`A-Z/`_) *(`a-z/`A-Z/`_/`0-9) id-char: `a-z/`A-Z/`_/`0-9 word: !<(`a-z/`A-Z/`_/`0-9) +(`a-z/`A-Z) !>(`0-9/`_) +edge: !<(`a-z/`A-Z/`_/`0-9) / !>(`0-9/`_) HEX: `0-9/`A-F Hex: `0-9/`a-f/`A-F hex: `0-9/`a-f diff --git a/grammars/html.bpeg b/grammars/html.bp index cea19c1..cea19c1 100644 --- a/grammars/html.bpeg +++ b/grammars/html.bp diff --git a/grammars/utf8-id.bpeg b/grammars/utf8-id.bp index 26e98ba..26e98ba 100644 --- a/grammars/utf8-id.bpeg +++ b/grammars/utf8-id.bp @@ -11,6 +11,7 @@ enum BPEGFlag { BPEG_VERBOSE = 1 << 0, BPEG_IGNORECASE = 1 << 1, + BPEG_EXPLAIN = 1 << 2, }; /* @@ -26,6 +27,7 @@ enum VMOpcode { VM_BEFORE, VM_AFTER, VM_CAPTURE, + VM_HIDE, VM_OTHERWISE, VM_CHAIN, VM_EQUAL, @@ -0,0 +1,158 @@ +/* + * viz.c - Visualize matches. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "types.h" +#include "viz.h" + + +/* +static size_t utf8_len(const char *s) +{ + size_t len = 0; + while (*s) { + len += (*s++ & 0xC0) != 0x80; + } + return len; +} +*/ + +static int match_height(match_t *m) +{ + int height = 0; + for (match_t *c = m->child; c; c = c->nextsibling) { + int childheight = match_height(c); + if (childheight > height) height = childheight; + } + return 1 + height; +} + +static void _visualize_matches(match_node_t *firstmatch, int depth, const char *text, size_t textlen) +{ + if (!firstmatch) return; + + const char *V = "│"; // Vertical bar + const char *H = "─"; // Horizontal bar + const char *color = (depth % 2 == 0) ? "34" : "33"; + + match_t *viz = firstmatch->m; + for (match_node_t *p = firstmatch; p; p = p->next) + if (match_height(p->m) > match_height(viz)) + viz = p->m; + const char *viz_type = viz->op->start; + size_t viz_typelen = (size_t)(viz->op->end - viz->op->start); + + printf("\033[%ldG\033[%s;1m", 2*textlen+3, color); + for (size_t i = 0; i < viz_typelen; i++) { + switch (viz_type[i]) { + case '\n': printf("↵"); break; + default: printf("%c", viz_type[i]); break; + } + } + printf("\033[0m"); + + match_node_t *children = NULL; + match_node_t **nextchild = &children; + +#define RIGHT_TYPE(m) (m->m->op->end == m->m->op->start + viz_typelen && strncmp(m->m->op->start, viz_type, viz_typelen) == 0) + // Print nonzero-width first: + for (match_node_t *m = firstmatch; m; m = m->next) { + //tree_text = byteslice(text, tree['start'], tree['end']).replace('\n', '↵') + if (RIGHT_TYPE(m)) { + //if (m->m->op->op != VM_REF) { + for (match_t *c = m->m->child; c; c = c->nextsibling) { + *nextchild = calloc(1, sizeof(match_node_t)); + (*nextchild)->m = c; + nextchild = &((*nextchild)->next); + } + //} + if (m->m->end == m->m->start) continue; + printf("\033[%ldG\033[0;2m%s\033[0;7;%sm", 1+2*(m->m->start - text), V, color); + for (const char *c = m->m->start; c < m->m->end; ++c) { + // TODO: newline + if (c > m->m->start) printf(" "); + // TODO: utf8 + //while ((*c & 0xC0) != 0x80) printf("%c", *(c++)); + printf("%c", *c); + } + printf("\033[0;2m%s\033[0m", V); + } else { + *nextchild = calloc(1, sizeof(match_node_t)); + (*nextchild)->m = m->m; + nextchild = &((*nextchild)->next); + printf("\033[%ldG\033[0;2m%s", 1+2*(m->m->start - text), V); + for (ssize_t i = (ssize_t)(2*(m->m->end - m->m->start)-1); i > 0; i--) + printf(" "); + if (m->m->end > m->m->start) + printf("\033[0;2m%s", V); + printf("\033[0m"); + } + } + + // Print stars for zero-width: + for (match_node_t *m = firstmatch; m; m = m->next) { + if (m->m->end > m->m->start) continue; + if (RIGHT_TYPE(m)) { + printf("\033[%ldG\033[7;%sm▒\033[0m", 1+2*(m->m->start - text), color); + } else { + printf("\033[%ldG\033[0;2m%s\033[0m", 1+2*(m->m->start - text), V); + } + } + + printf("\n"); + + for (match_node_t *m = firstmatch; m; m = m->next) { + if (m->m->end == m->m->start) { + if (!RIGHT_TYPE(m)) + printf("\033[%ldG\033[0;2m%s", 1 + 2*(m->m->start - text), V); + } else { + const char *l = "└"; + const char *r = "┘"; + for (match_node_t *c = children; c; c = c->next) { + if (c->m->start == m->m->start || c->m->end == m->m->start) l = V; + if (c->m->start == m->m->end || c->m->end == m->m->end) r = V; + } + printf("\033[%ldG\033[0;2m%s", 1 + 2*(m->m->start - text), l); + const char *h = RIGHT_TYPE(m) ? H : " "; + for (ssize_t n = (ssize_t)(2*(m->m->end - m->m->start) - 1); n > 0; n--) + printf("%s", h); + printf("%s\033[0m", r); + } + } +#undef RIGHT_TYPE + + printf("\n"); + + if (children) + _visualize_matches(children, depth+1, text, textlen); + + for (match_node_t *c = children, *next = NULL; c; c = next) { + next = c->next; + free(c); + } +} + +static void _visualize_patterns(match_t *m) +{ + if (m->op->op == VM_REF && strcmp(m->op->args.s, "pattern") == 0) { + m = m->child; + match_node_t first = {.m = m}; + _visualize_matches(&first, 0, m->start, (size_t)(m->end - m->start)); + } else { + for (match_t *c = m->child; c; c = c->nextsibling) + _visualize_patterns(c); + } +} + +void visualize_match(match_t *m) +{ + printf("\033[?7l"); + //match_node_t first = {.m = m}; + //_visualize_matches(&first, 0, m->start, (m->end - m->start)); + _visualize_patterns(m); + printf("\033[?7h"); +} @@ -0,0 +1,10 @@ +/* + * Header file for viz.c (visualizing matches) + */ + +typedef struct match_node_s { + match_t *m; + struct match_node_s *next; +} match_node_t; + +void visualize_match(match_t *m); @@ -26,6 +26,7 @@ static const char *opcode_names[] = { [VM_BEFORE] = "BEFORE", [VM_AFTER] = "AFTER", [VM_CAPTURE] = "CAPTURE", + [VM_HIDE] = "HIDE", [VM_OTHERWISE] = "OTHERWISE", [VM_CHAIN] = "CHAIN", [VM_REPLACE] = "REPLACE", @@ -257,6 +258,16 @@ static match_t *_match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, un m->value.name = op->args.capture.name; return m; } + case VM_HIDE: { + match_t *p = _match(g, f, str, op->args.pat, flags, rec); + if (p == NULL) return NULL; + match_t *m = calloc(sizeof(match_t), 1); + m->start = str; + m->end = p->end; + m->op = op; + m->child = p; + return m; + } case VM_OTHERWISE: { match_t *m = _match(g, f, str, op->args.multiple.first, flags, rec); if (m == NULL) m = _match(g, f, str, op->args.multiple.second, flags, rec); @@ -450,6 +461,12 @@ void print_pattern(vm_op_t *op) fprintf(stderr, ")"); break; } + case VM_HIDE: { + fprintf(stderr, "hidden ("); + print_pattern(op->args.pat); + fprintf(stderr, ")"); + break; + } case VM_CAPTURE: { fprintf(stderr, "capture ("); print_pattern(op->args.pat); @@ -547,63 +564,185 @@ static match_t *get_cap(match_t *m, const char **r) return NULL; } +typedef struct { + size_t line, printed_line; + const char *color; +} print_state_t; + +static void print_line_number(print_state_t *state, print_options_t options) +{ + state->printed_line = state->line; + if (!(options & PRINT_LINE_NUMBERS)) return; + if (options & PRINT_COLOR) + printf("\033[0;2m% 5ld\033(0\x78\033(B%s", state->line, state->color); + else + printf("% 5ld|", state->line); +} + /* * Print a match with replacements and highlighting. */ -void print_match(file_t *f, match_t *m) +static void _print_match(file_t *f, match_t *m, print_state_t *state, print_options_t options) { - if (m->op->op == VM_REPLACE) { + static const char *hl = "\033[0;31;1m"; + const char *old_color = state->color; + if (m->op->op == VM_HIDE) { + // TODO: handle replacements? + for (const char *p = m->start; p < m->end; p++) { + if (*p == '\n') ++state->line; + } + } else if (m->op->op == VM_REPLACE) { + if (options & PRINT_COLOR && state->color != hl) { + state->color = hl; + printf("%s", state->color); + } for (const char *r = m->value.replacement; *r; ) { - if (*r == '\\') { - ++r; - fputc(unescapechar(r, &r), stdout); - continue; - } else if (*r != '@') { - fputc(*r, stdout); + if (*r == '@' && r[1] && r[1] != '@') { ++r; - continue; + match_t *cap = get_cap(m, &r); + if (cap != NULL) { + _print_match(f, cap, state, options); + continue; + } else { + --r; + } } - ++r; - if (*r == '@' || *r == '\0') { - fputc('@', stdout); - continue; - } - if (*r == '#') { + if (state->printed_line != state->line) + print_line_number(state, options); + + if (*r == '\\') { ++r; - printf("%ld", get_line_number(f, m->start)); + unsigned char c = unescapechar(r, &r); + fputc(c, stdout); + if (c == '\n') ++state->line; continue; - } else if (*r == ':') { + } else if (*r == '\n') { + fputc('\n', stdout); + ++state->line; ++r; - printf("%ld", get_char_number(f, m->start)); continue; - } else if (*r == '&') { + } else { + fputc(*r, stdout); ++r; - printf("%s", f->filename ? f->filename : "-"); continue; } - match_t *cap = get_cap(m, &r); - if (cap != NULL) { - print_match(f, cap); - } else { - fputc('@', stdout); - } } } else { + if (m->op->op == VM_CAPTURE) { + if (options & PRINT_COLOR && state->color != hl) { + state->color = hl; + printf("%s", state->color); + } + } + const char *prev = m->start; for (match_t *child = m->child; child; child = child->nextsibling) { // Skip children from e.g. zero-width matches like >@foo if (!(prev <= child->start && child->start <= m->end && prev <= child->end && child->end <= m->end)) continue; - if (child->start > prev) - printf("%.*s", (int)(child->start - prev), prev); - print_match(f, child); + if (child->start > prev) { + for (const char *p = prev; p < child->start; ++p) { + if (state->printed_line != state->line) + print_line_number(state, options); + fputc(*p, stdout); + if (*p == '\n') ++state->line; + } + } + _print_match(f, child, state, options); prev = child->end; } - if (m->end > prev) - printf("%.*s", (int)(m->end - prev), prev); + if (m->end > prev) { + for (const char *p = prev; p < m->end; ++p) { + if (state->printed_line != state->line) + print_line_number(state, options); + fputc(*p, stdout); + if (*p == '\n') ++state->line; + } + } + } + if (options & PRINT_COLOR && old_color != state->color) { + printf("%s", old_color); + state->color = old_color; + } +} + +void print_match(file_t *f, match_t *m, print_options_t options) +{ + print_state_t state = {.line = 1, .color = "\033[0m"}; + _print_match(f, m, &state, options); +} + +/* + * Print a match as JSON + */ +static int _json_match(FILE *f, const char *text, match_t *m, int comma) +#define VERBOSE_JSON 1 +#if VERBOSE_JSON +{ + if (comma) fprintf(f, ",\n"); + comma = 0; + fprintf(f, "{\"type\":\""); + for (const char *c = m->op->start; c < m->op->end; c++) { + switch (*c) { + case '"': fprintf(f, "\\\""); break; + case '\\': fprintf(f, "\\\\"); break; + case '\t': fprintf(f, "\\t"); break; + case '\n': fprintf(f, "↵"); break; + default: fprintf(f, "%c", *c); break; + } + } + fprintf(f, "\",\"start\":%ld,\"end\":%ld,\"children\":[", + m->start - text, m->end - text); + for (match_t *child = m->child; child; child = child->nextsibling) { + comma |= _json_match(f, text, child, comma); + } + fprintf(f, "]}"); + return 1; +} +#else +{ + if (m->op->op == VM_STRING) { + if (comma) fprintf(f, ",\n"); + comma = 0; + fprintf(f, "{\"type\":\"\\\""); + for (const char *c = m->op->args.s; *c; c++) { + switch (*c) { + case '"': fprintf(f, "\\\""); break; + case '\\': fprintf(f, "\\\\"); break; + case '\t': fprintf(f, "\\t"); break; + case '\n': fprintf(f, "↵"); break; + default: fprintf(f, "%c", *c); break; + } + } + fprintf(f, "\\\"\",\"start\":%ld,\"end\":%ld,\"children\":[", + m->start - text, m->end - text); + } else if (m->op->op == VM_REF) { + if (comma) fprintf(f, ",\n"); + comma = 0; + fprintf(f, "{\"type\":\"%s\",\"start\":%ld,\"end\":%ld,\"children\":[", + m->op->args.s, m->start - text, m->end - text); + } else if (m->op->op == VM_CAPTURE && m->value.name) { + if (comma) fprintf(f, ",\n"); + comma = 0; + fprintf(f, "{\"type\":\"@%s\",\"start\":%ld,\"end\":%ld,\"children\":[", + m->value.name, m->start - text, m->end - text); + } + for (match_t *child = m->child; child; child = child->nextsibling) { + comma |= _json_match(f, text, child, comma); } + if (m->op->op == VM_REF || m->op->op == VM_STRING || (m->op->op == VM_CAPTURE && m->value.name)) { + fprintf(f, "]}"); + return 1; + } + return comma; +} +#endif + +void json_match(FILE *f, const char *text, match_t *m) +{ + _json_match(f, text, m, 0); } static match_t *match_backref(const char *str, vm_op_t *op, match_t *cap, unsigned int flags) @@ -11,6 +11,11 @@ #include "types.h" +typedef enum { + PRINT_COLOR = 1<<0, + PRINT_LINE_NUMBERS = 1<<1, +} print_options_t; + const char *opcode_name(enum VMOpcode o); __attribute__((hot, nonnull)) match_t *match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, unsigned int flags); @@ -19,7 +24,9 @@ void destroy_match(match_t **m); __attribute__((nonnull)) void print_pattern(vm_op_t *op); __attribute__((nonnull)) -void print_match(file_t *f, match_t *m); +void print_match(file_t *f, match_t *m, print_options_t options); +__attribute__((nonnull)) +void json_match(FILE *f, const char *text, match_t *m); #endif // vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1 |
