aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Makefile12
-rw-r--r--README.md12
-rw-r--r--bp.1 (renamed from bpeg.1)34
-rw-r--r--bpeg.c71
-rw-r--r--compiler.c13
-rw-r--r--grammars/bpeg.bp (renamed from grammars/bpeg.bpeg)0
-rw-r--r--grammars/builtins.bp (renamed from grammars/builtins.bpeg)23
-rw-r--r--grammars/html.bp (renamed from grammars/html.bpeg)0
-rw-r--r--grammars/utf8-id.bp (renamed from grammars/utf8-id.bpeg)0
-rw-r--r--types.h2
-rw-r--r--viz.c158
-rw-r--r--viz.h10
-rw-r--r--vm.c201
-rw-r--r--vm.h9
14 files changed, 449 insertions, 96 deletions
diff --git a/Makefile b/Makefile
index 1d34cf3..568421f 100644
--- a/Makefile
+++ b/Makefile
@@ -1,4 +1,4 @@
-NAME=bpeg
+NAME=bp
CC ?= gcc
PREFIX=/usr/local
CFLAGS=-std=c99 -D_XOPEN_SOURCE=500 -D_GNU_SOURCE -D_POSIX_C_SOURCE=200809L
@@ -7,7 +7,7 @@ CWARN=-Wall -Wpedantic -Wextra -Wno-unknown-pragmas -Wno-missing-field-initializ
G ?=
O ?= -O3
-CFILES=compiler.c grammar.c utils.c vm.c file_loader.c
+CFILES=compiler.c grammar.c utils.c vm.c file_loader.c viz.c
OBJFILES=$(CFILES:.c=.o)
all: $(NAME)
@@ -15,7 +15,7 @@ all: $(NAME)
.c.o:
$(CC) -c $(CFLAGS) $(CWARN) $(G) $(O) -o $@ $<
-$(NAME): $(OBJFILES) $(NAME).c
+$(NAME): $(OBJFILES) bpeg.c
$(CC) $(CFLAGS) $(CWARN) $(G) $(O) -o $@ $^
clean:
@@ -29,8 +29,8 @@ install: $(NAME)
fi; \
[ ! "$$prefix" ] && prefix="/usr/local"; \
[ ! "$$sysconfdir" ] && sysconfdir=/etc; \
- mkdir -pv -m 755 "$$prefix/share/man/man1" "$$prefix/bin" "$$sysconfdir/xdg/bpeg" \
- && cp -rv grammars/* "$$sysconfdir/xdg/bpeg/" \
+ mkdir -pv -m 755 "$$prefix/share/man/man1" "$$prefix/bin" "$$sysconfdir/xdg/bp" \
+ && cp -rv grammars/* "$$sysconfdir/xdg/bp/" \
&& cp -v $(NAME).1 "$$prefix/share/man/man1/" \
&& rm -f "$$prefix/bin/$(NAME)" \
&& cp -v $(NAME) "$$prefix/bin/"
@@ -44,7 +44,7 @@ uninstall:
[ ! "$$prefix" ] && prefix="/usr/local"; \
[ ! "$$sysconfdir" ] && sysconfdir=/etc; \
echo "Deleting..."; \
- rm -rvf "$$prefix/bin/$(NAME)" "$$prefix/share/man/man1/$(NAME).1" "$$sysconfdir/xdg/bpeg"; \
+ rm -rvf "$$prefix/bin/$(NAME)" "$$prefix/share/man/man1/$(NAME).1" "$$sysconfdir/xdg/bp"; \
printf "\033[1mIf you created any config files in ~/.config/$(NAME), you may want to delete them manually.\033[0m\n"
.PHONY: all, clean, install, uninstall
diff --git a/README.md b/README.md
index 9eb3b03..537055c 100644
--- a/README.md
+++ b/README.md
@@ -4,7 +4,7 @@ BPEG is a parsing expression grammar tool for the command line.
It's written in pure C with no dependencies.
## Usage
-`bpeg [flags] <pattern> [<input files>...]`
+`bp [flags] <pattern> [<input files>...]`
### Flags
* `-h` `--help` print the usage and quit
@@ -12,13 +12,13 @@ It's written in pure C with no dependencies.
* `-i` `--ignore-case` perform a case-insensitive match
* `-d` `--define <name>:<def>` define a grammar rule
* `-D` `--define-string <name>:<def>` define a grammar rule (string-pattern)
-* `-p` `--pattern <pat>` provide a pattern (equivalent to bpeg '
-* `-P` `--pattern-string <pat>` provide a string pattern (equivalent to bpeg '<pat>', but may be useful if '<pat>' begins with a '-')
+* `-p` `--pattern <pat>` provide a pattern (equivalent to `bp '\(<pat>)'`)
+* `-P` `--pattern-string <pat>` provide a string pattern (equivalent to `bp '<pat>'`, but may be useful if `'<pat>'` begins with a '-')
* `-r` `--replace <replacement>` replace the input pattern with the given replacement
-* `-m` `--mode <mode>` set the behavior mode (defult: find-all)
+* `-m` `--mode <mode>` set the behavior mode (defult: `find-all`)
* `-g` `--grammar <grammar file>` use the specified file as a grammar
-See `man ./bpeg.1` for more details.
+See `man ./bp.1` for more details.
## BPEG Patterns
BPEG patterns are a mixture of Parsing Expression Grammar and Regular
@@ -63,7 +63,7 @@ Pattern | Meaning
`#( block comment )#` | A block comment
`# line comment` | A line comment
-See `man ./bpeg.1` for more details.
+See `man ./bp.1` for more details.
## License
BPEG is provided under the MIT license with the [Commons Clause](https://commonsclause.com/)
diff --git a/bpeg.1 b/bp.1
index 9f36e0b..60a5e8f 100644
--- a/bpeg.1
+++ b/bp.1
@@ -1,10 +1,10 @@
-.\" Manpage for bpeg.
+.\" Manpage for bp.
.\" Contact bruce@bruce-hill.com to correct errors or typos.
-.TH man 1 "Sep 12, 2020" "0.1" "bpeg manual page"
+.TH man 1 "Sep 12, 2020" "0.1" "bp manual page"
.SH NAME
-bpeg \- Bruce's Parsing Expression Grammar tool
+bp \- Bruce's Parsing Expression Grammar tool
.SH SYNOPSIS
-.B bpeg
+.B bp
[\fI-h\fR|\fI--help\fR]
[\fI-v\fR|\fI--verbose\fR]
[\fI-i\fR|\fI--ignore-case\fR \fI<pattern>\fR]
@@ -18,7 +18,7 @@ bpeg \- Bruce's Parsing Expression Grammar tool
\fI<pattern\fR
[[--] \fI<input files...>\fR]
.SH DESCRIPTION
-\fBbpeg\fR is a tool that matches parsing expression grammars using a custom syntax.
+\fBbp\fR is a tool that matches parsing expression grammars using a custom syntax.
.SH OPTIONS
.B \-v\fR, \fB--verbose
Print debugging information.
@@ -27,10 +27,10 @@ Print debugging information.
Perform pattern matching case-insensitively.
.B \-d\fR, \fB--define \fI<name>\fR:\fI<pattern>\fR
-Define a grammar rule using a bpeg pattern.
+Define a grammar rule using a bp pattern.
.B \-D\fR, \fB--define-string \fI<name>\fR:\fI<string-pattern>\fR
-Define a grammar rule using a bpeg string pattern.
+Define a grammar rule using a bp string pattern.
.B \-r\fR, \fB--replace \fI<replacement>\fR
Replace all occurrences of the main pattern with the given string.
@@ -47,17 +47,17 @@ The mode to operate in. Options are: \fIfind-all\fR (the default),
Print the usage and exit.
.B <string-pattern>
-The main pattern for bpeg to match. By default, this pattern is a string
+The main pattern for bp to match. By default, this pattern is a string
pattern (see the \fBSTRING PATTERNS\fR section below).
.B <input files...>
The input files to search. If no input files are provided and data was
piped in, that data will be used instead. If neither are provided,
-\fBbpeg\fR will search through all files in the current directory and
+\fBbp\fR will search through all files in the current directory and
its subdirectories (recursively).
.SH PATTERNS
-Bpeg patterns are based off of a combination of Parsing Expression Grammars
+bp patterns are based off of a combination of Parsing Expression Grammars
and regular expression syntax. The syntax is designed to map closely to
verbal descriptions of the patterns, and prefix operators are preferred over
suffix operators (as is common in regex syntax).
@@ -175,32 +175,32 @@ A line comment
.SH STRING PATTERNS
One of the most common use cases for pattern matching tools is matching plain,
literal strings, or strings that are primarily plain strings, with one or two
-patterns. \fBbpeg\fR is designed around this fact. The default mode for bpeg
+patterns. \fBbp\fR is designed around this fact. The default mode for bp
patterns is "string pattern mode". In string pattern mode, all characters
are interpreted literally except for the backslash (\fB\\\fR), which may be
-followed by a bpeg pattern (see the \fBPATTERNS\fR section above). Optionally,
-the bpeg pattern may be terminated by a semicolon (\fB;\fR).
+followed by a bp pattern (see the \fBPATTERNS\fR section above). Optionally,
+the bp pattern may be terminated by a semicolon (\fB;\fR).
.SH EXAMPLES
.TP
.B
-ls | bpeg foo
+ls | bp foo
Find files containing the string "foo" (a string pattern)
.TP
.B
-ls | bpeg '.c\\$' -r '.h'
+ls | bp '.c\\$' -r '.h'
Find files ending with ".c" and replace the extension with ".h"
.TP
.B
-bpeg -p '"foobar"==id parens' my_file.py
+bp -p '"foobar"==id parens' my_file.py
Find the literal string \fB"foobar"\fR, assuming it's a complete identifier,
followed by a pair of matching parentheses in the file \fImy_file.py\fR
.TP
.B
-bpeg -g html -p html-element -D matching-tag=a foo.html
+bp -g html -p html-element -D matching-tag=a foo.html
Using the \fIhtml\fR grammar, find all \fIhtml-element\fRs matching
the tag \fIa\fR in the file \fIfoo.html\fR
diff --git a/bpeg.c b/bpeg.c
index e1b603f..4e3ef5b 100644
--- a/bpeg.c
+++ b/bpeg.c
@@ -1,7 +1,7 @@
/*
* bpeg.c - Source code for the bpeg parser
*
- * See `man ./bpeg.1` for more details
+ * See `man ./bp.1` for more details
*/
#include <fcntl.h>
#include <glob.h>
@@ -15,24 +15,28 @@
#include "file_loader.h"
#include "grammar.h"
#include "utils.h"
+#include "viz.h"
#include "vm.h"
static const char *usage = (
- "BPEG - a Parsing Expression Grammar command line tool\n\n"
+ "BP - a Parsing Expression Grammar command line tool\n\n"
"Usage:\n"
- " bpeg [flags] <pattern> [<input files>...]\n\n"
+ " bp [flags] <pattern> [<input files>...]\n\n"
"Flags:\n"
" -h --help print the usage and quit\n"
" -v --verbose print verbose debugging info\n"
+ " -e --explain explain the matches\n"
" -i --ignore-case preform matching case-insensitively\n"
" -d --define <name>:<def> define a grammar rule\n"
" -D --define-string <name>:<def> define a grammar rule (string-pattern)\n"
- " -p --pattern <pat> provide a pattern (equivalent to bpeg '\\(<pat>)')\n"
+ " -p --pattern <pat> provide a pattern (equivalent to bp '\\(<pat>)')\n"
" -P --pattern-string <pat> provide a string pattern (may be useful if '<pat>' begins with a '-')\n"
" -r --replace <replacement> replace the input pattern with the given replacement\n"
" -m --mode <mode> set the behavior mode (defult: find-all)\n"
" -g --grammar <grammar file> use the specified file as a grammar\n");
+static print_options_t print_options = 0;
+
static char *getflag(const char *flag, char *argv[], int *i)
{
size_t n = strlen(flag);
@@ -54,7 +58,7 @@ static int print_errors(file_t *f, match_t *m)
int ret = 0;
if (m->op->op == VM_CAPTURE && m->value.name && streq(m->value.name, "!")) {
printf("\033[31;1m");
- print_match(f, m);
+ print_match(f, m, print_options);
printf("\033[0m\n");
fprint_line(stdout, f, m->start, m->end, " ");
return 1;
@@ -66,13 +70,49 @@ static int print_errors(file_t *f, match_t *m)
static int run_match(grammar_t *g, const char *filename, vm_op_t *pattern, unsigned int flags)
{
+ static int printed_matches = 0;
file_t *f = load_file(filename);
check(f, "Could not open file: %s", filename);
match_t *m = match(g, f, f->contents, pattern, flags);
if (m && print_errors(f, m) > 0)
_exit(1);
if (m != NULL && m->end > m->start + 1) {
- print_match(f, m);
+ ++printed_matches;
+
+ if (flags & BPEG_EXPLAIN) {
+ if (filename) {
+ printf("\033[1;4m%s:\033[0m\n", filename);
+ }
+ /*
+ if (printed_matches > 1)
+ fprintf(stdout, ",\n");
+ printf("{\"filename\":\"%s\",\"text\":\"", filename ? filename : "-");
+ for (char *c = f->contents; c < f->end; c++) {
+ switch (*c) {
+ case '"': printf("\\\""); break;
+ case '\n': printf("\\n"); break;
+ case '\t': printf("\\t"); break;
+ case '\\': printf("\\\\"); break;
+ default: printf("%c", *c); break;
+ }
+ }
+ printf("\",\n\"tree\":{\"type\":\"text\",\"start\":%d,\"end\":%ld,\"children\":[",
+ 0, f->end - f->contents);
+ json_match(stdout, f->contents, m);
+ printf("]}}\n");
+ */
+ visualize_match(m);
+ } else {
+ if (printed_matches > 1)
+ fputc('\n', stdout);
+ if (filename) {
+ if (print_options & PRINT_COLOR)
+ printf("\033[1;4;33m%s:\033[0m\n", filename);
+ else
+ printf("%s:\n", filename);
+ }
+ print_match(f, m, print_options);
+ }
destroy_file(&f);
return 0;
} else {
@@ -93,9 +133,9 @@ int main(int argc, char *argv[])
grammar_t *g = new_grammar();
// Load builtins:
- if (access("/etc/xdg/bpeg/builtins.bpeg", R_OK) != -1)
- load_grammar(g, load_file("/etc/xdg/bpeg/builtins.bpeg")); // Keep in memory for debugging output
- sprintf(path, "%s/.config/bpeg/builtins.bpeg", getenv("HOME"));
+ if (access("/etc/xdg/bp/builtins.bp", R_OK) != -1)
+ load_grammar(g, load_file("/etc/xdg/bp/builtins.bp")); // Keep in memory for debugging output
+ sprintf(path, "%s/.config/bp/builtins.bp", getenv("HOME"));
if (access(path, R_OK) != -1)
load_grammar(g, load_file(path)); // Keep in memory for debugging output
@@ -110,6 +150,8 @@ int main(int argc, char *argv[])
return 0;
} else if (streq(argv[i], "--verbose") || streq(argv[i], "-v")) {
flags |= BPEG_VERBOSE;
+ } else if (streq(argv[i], "--explain") || streq(argv[i], "-e")) {
+ flags |= BPEG_EXPLAIN;
} else if (streq(argv[i], "--ignore-case") || streq(argv[i], "-i")) {
flags |= BPEG_IGNORECASE;
} else if (FLAG("--replace") || FLAG("-r")) {
@@ -122,11 +164,11 @@ int main(int argc, char *argv[])
} else if (FLAG("--grammar") || FLAG("-g")) {
file_t *f = load_file(flag);
if (f == NULL) {
- sprintf(path, "%s/.config/bpeg/%s.bpeg", getenv("HOME"), flag);
+ sprintf(path, "%s/.config/bp/%s.bp", getenv("HOME"), flag);
f = load_file(path);
}
if (f == NULL) {
- sprintf(path, "/etc/xdg/bpeg/%s.bpeg", flag);
+ sprintf(path, "/etc/xdg/bp/%s.bp", flag);
f = load_file(path);
}
check(f != NULL, "Couldn't find grammar: %s", flag);
@@ -180,11 +222,7 @@ int main(int argc, char *argv[])
}
if (isatty(STDOUT_FILENO)) {
- char *epsilon = "''";
- file_t *is_tty_file = spoof_file("<is-tty>", epsilon);
- vm_op_t *p = bpeg_pattern(is_tty_file, epsilon);
- check(p, "Failed to compile is-tty");
- add_def(g, is_tty_file, epsilon, "is-tty", p);
+ print_options |= PRINT_COLOR | PRINT_LINE_NUMBERS;
}
vm_op_t *pattern = lookup(g, rule);
@@ -210,7 +248,6 @@ int main(int argc, char *argv[])
ret &= run_match(g, NULL, pattern, flags);
}
-
return ret;
}
diff --git a/compiler.c b/compiler.c
index 6a8de58..48d0023 100644
--- a/compiler.c
+++ b/compiler.c
@@ -281,6 +281,8 @@ vm_op_t *bpeg_simplepattern(file_t *f, const char *str)
str = after_spaces(str);
if (!matchchar(&str, ')'))
file_err(f, origin, str, "This parenthesis group isn't properly closed.");
+ op->start = origin;
+ op->end = str;
break;
}
// Square brackets
@@ -330,6 +332,17 @@ vm_op_t *bpeg_simplepattern(file_t *f, const char *str)
op->len = pat->len;
break;
}
+ // Hide
+ case '~': {
+ vm_op_t *pat = bpeg_simplepattern(f, str);
+ if (!pat)
+ file_err(f, str, str, "There should be a pattern after this '~'");
+ str = pat->end;
+ op->op = VM_HIDE;
+ op->len = 0;
+ op->args.pat = pat;
+ break;
+ }
// Replacement
case '{': {
str = after_spaces(str);
diff --git a/grammars/bpeg.bpeg b/grammars/bpeg.bp
index 288ceee..288ceee 100644
--- a/grammars/bpeg.bpeg
+++ b/grammars/bpeg.bp
diff --git a/grammars/builtins.bpeg b/grammars/builtins.bp
index 697bf27..7de936f 100644
--- a/grammars/builtins.bpeg
+++ b/grammars/builtins.bp
@@ -3,9 +3,6 @@ no: !''
# Configurable options:
is-tty: no # Defined as either always-match or always-fail, depending on stdout
-print-line-numbers: is-tty
-print-filenames: is-tty
-highlight: is-tty
include-binary-files: no;
is-text-file: ^^ >32(\t/\n/\r/\x20-x7e/!\x00-x7f utf8-codepoint/$$)
@@ -14,29 +11,18 @@ pattern: !'' # Not defined by default
replacement: !'' # Not defined by default
replace-all: (
(include-binary-files / is-text-file)
- define-highlights
- add-filename
- *(...(>pattern hl-replacement)) ...
+ +(...(>pattern replacement)) ...
)
find-all: (
(include-binary-files / is-text-file)
- define-highlights
- add-filename
- *(!..pattern {..\n=>})
- +(>..pattern add-line-number +(..hl-pattern) ..(\n/$$) / {..\n=>})
+ *(!..pattern ~(..\n))
+ +(+(..@pattern) ..(\n/$$) / ~(..\n))
[{!<\n => "\n"}]
)
only-matches: (
(include-binary-files / is-text-file)
- define-highlights
- add-filename
- +{...@hl-pattern =>'@1\n'}
+ +{...@pattern =>'@1\n'}
)
-add-filename: [print-filenames (is-tty {=>"\033[33;1;4m@&:\033[0m\n"} / {=>"@&:\n"})]
-add-line-number: [print-line-numbers (is-tty {=>"\033[2m@#\033[5G|\033[0m "} / {=>"@#| "})]
-hl-pattern: {@match=pattern define-highlights => "@hl-start;@match;@hl-end;"}
-hl-replacement: {@match=replacement define-highlights => "@hl-start;@match;@hl-end;" }
-define-highlights: highlight @hl-start={=>"\033[31;1m"} @hl-end={=>"\033[0m"} / @hl-start="" @hl-end=""
# Helper definitions (commonly used)
url: (
@@ -70,6 +56,7 @@ parens: `( *(parens / $. != `)) `)
id: !<(`a-z/`A-Z/`_/`0-9) (`a-z/`A-Z/`_) *(`a-z/`A-Z/`_/`0-9)
id-char: `a-z/`A-Z/`_/`0-9
word: !<(`a-z/`A-Z/`_/`0-9) +(`a-z/`A-Z) !>(`0-9/`_)
+edge: !<(`a-z/`A-Z/`_/`0-9) / !>(`0-9/`_)
HEX: `0-9/`A-F
Hex: `0-9/`a-f/`A-F
hex: `0-9/`a-f
diff --git a/grammars/html.bpeg b/grammars/html.bp
index cea19c1..cea19c1 100644
--- a/grammars/html.bpeg
+++ b/grammars/html.bp
diff --git a/grammars/utf8-id.bpeg b/grammars/utf8-id.bp
index 26e98ba..26e98ba 100644
--- a/grammars/utf8-id.bpeg
+++ b/grammars/utf8-id.bp
diff --git a/types.h b/types.h
index da1ce40..eb8a925 100644
--- a/types.h
+++ b/types.h
@@ -11,6 +11,7 @@
enum BPEGFlag {
BPEG_VERBOSE = 1 << 0,
BPEG_IGNORECASE = 1 << 1,
+ BPEG_EXPLAIN = 1 << 2,
};
/*
@@ -26,6 +27,7 @@ enum VMOpcode {
VM_BEFORE,
VM_AFTER,
VM_CAPTURE,
+ VM_HIDE,
VM_OTHERWISE,
VM_CHAIN,
VM_EQUAL,
diff --git a/viz.c b/viz.c
new file mode 100644
index 0000000..c133156
--- /dev/null
+++ b/viz.c
@@ -0,0 +1,158 @@
+/*
+ * viz.c - Visualize matches.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "types.h"
+#include "viz.h"
+
+
+/*
+static size_t utf8_len(const char *s)
+{
+ size_t len = 0;
+ while (*s) {
+ len += (*s++ & 0xC0) != 0x80;
+ }
+ return len;
+}
+*/
+
+static int match_height(match_t *m)
+{
+ int height = 0;
+ for (match_t *c = m->child; c; c = c->nextsibling) {
+ int childheight = match_height(c);
+ if (childheight > height) height = childheight;
+ }
+ return 1 + height;
+}
+
+static void _visualize_matches(match_node_t *firstmatch, int depth, const char *text, size_t textlen)
+{
+ if (!firstmatch) return;
+
+ const char *V = "│"; // Vertical bar
+ const char *H = "─"; // Horizontal bar
+ const char *color = (depth % 2 == 0) ? "34" : "33";
+
+ match_t *viz = firstmatch->m;
+ for (match_node_t *p = firstmatch; p; p = p->next)
+ if (match_height(p->m) > match_height(viz))
+ viz = p->m;
+ const char *viz_type = viz->op->start;
+ size_t viz_typelen = (size_t)(viz->op->end - viz->op->start);
+
+ printf("\033[%ldG\033[%s;1m", 2*textlen+3, color);
+ for (size_t i = 0; i < viz_typelen; i++) {
+ switch (viz_type[i]) {
+ case '\n': printf("↵"); break;
+ default: printf("%c", viz_type[i]); break;
+ }
+ }
+ printf("\033[0m");
+
+ match_node_t *children = NULL;
+ match_node_t **nextchild = &children;
+
+#define RIGHT_TYPE(m) (m->m->op->end == m->m->op->start + viz_typelen && strncmp(m->m->op->start, viz_type, viz_typelen) == 0)
+ // Print nonzero-width first:
+ for (match_node_t *m = firstmatch; m; m = m->next) {
+ //tree_text = byteslice(text, tree['start'], tree['end']).replace('\n', '↵')
+ if (RIGHT_TYPE(m)) {
+ //if (m->m->op->op != VM_REF) {
+ for (match_t *c = m->m->child; c; c = c->nextsibling) {
+ *nextchild = calloc(1, sizeof(match_node_t));
+ (*nextchild)->m = c;
+ nextchild = &((*nextchild)->next);
+ }
+ //}
+ if (m->m->end == m->m->start) continue;
+ printf("\033[%ldG\033[0;2m%s\033[0;7;%sm", 1+2*(m->m->start - text), V, color);
+ for (const char *c = m->m->start; c < m->m->end; ++c) {
+ // TODO: newline
+ if (c > m->m->start) printf(" ");
+ // TODO: utf8
+ //while ((*c & 0xC0) != 0x80) printf("%c", *(c++));
+ printf("%c", *c);
+ }
+ printf("\033[0;2m%s\033[0m", V);
+ } else {
+ *nextchild = calloc(1, sizeof(match_node_t));
+ (*nextchild)->m = m->m;
+ nextchild = &((*nextchild)->next);
+ printf("\033[%ldG\033[0;2m%s", 1+2*(m->m->start - text), V);
+ for (ssize_t i = (ssize_t)(2*(m->m->end - m->m->start)-1); i > 0; i--)
+ printf(" ");
+ if (m->m->end > m->m->start)
+ printf("\033[0;2m%s", V);
+ printf("\033[0m");
+ }
+ }
+
+ // Print stars for zero-width:
+ for (match_node_t *m = firstmatch; m; m = m->next) {
+ if (m->m->end > m->m->start) continue;
+ if (RIGHT_TYPE(m)) {
+ printf("\033[%ldG\033[7;%sm▒\033[0m", 1+2*(m->m->start - text), color);
+ } else {
+ printf("\033[%ldG\033[0;2m%s\033[0m", 1+2*(m->m->start - text), V);
+ }
+ }
+
+ printf("\n");
+
+ for (match_node_t *m = firstmatch; m; m = m->next) {
+ if (m->m->end == m->m->start) {
+ if (!RIGHT_TYPE(m))
+ printf("\033[%ldG\033[0;2m%s", 1 + 2*(m->m->start - text), V);
+ } else {
+ const char *l = "└";
+ const char *r = "┘";
+ for (match_node_t *c = children; c; c = c->next) {
+ if (c->m->start == m->m->start || c->m->end == m->m->start) l = V;
+ if (c->m->start == m->m->end || c->m->end == m->m->end) r = V;
+ }
+ printf("\033[%ldG\033[0;2m%s", 1 + 2*(m->m->start - text), l);
+ const char *h = RIGHT_TYPE(m) ? H : " ";
+ for (ssize_t n = (ssize_t)(2*(m->m->end - m->m->start) - 1); n > 0; n--)
+ printf("%s", h);
+ printf("%s\033[0m", r);
+ }
+ }
+#undef RIGHT_TYPE
+
+ printf("\n");
+
+ if (children)
+ _visualize_matches(children, depth+1, text, textlen);
+
+ for (match_node_t *c = children, *next = NULL; c; c = next) {
+ next = c->next;
+ free(c);
+ }
+}
+
+static void _visualize_patterns(match_t *m)
+{
+ if (m->op->op == VM_REF && strcmp(m->op->args.s, "pattern") == 0) {
+ m = m->child;
+ match_node_t first = {.m = m};
+ _visualize_matches(&first, 0, m->start, (size_t)(m->end - m->start));
+ } else {
+ for (match_t *c = m->child; c; c = c->nextsibling)
+ _visualize_patterns(c);
+ }
+}
+
+void visualize_match(match_t *m)
+{
+ printf("\033[?7l");
+ //match_node_t first = {.m = m};
+ //_visualize_matches(&first, 0, m->start, (m->end - m->start));
+ _visualize_patterns(m);
+ printf("\033[?7h");
+}
diff --git a/viz.h b/viz.h
new file mode 100644
index 0000000..80d8cee
--- /dev/null
+++ b/viz.h
@@ -0,0 +1,10 @@
+/*
+ * Header file for viz.c (visualizing matches)
+ */
+
+typedef struct match_node_s {
+ match_t *m;
+ struct match_node_s *next;
+} match_node_t;
+
+void visualize_match(match_t *m);
diff --git a/vm.c b/vm.c
index e79505c..8f4d44f 100644
--- a/vm.c
+++ b/vm.c
@@ -26,6 +26,7 @@ static const char *opcode_names[] = {
[VM_BEFORE] = "BEFORE",
[VM_AFTER] = "AFTER",
[VM_CAPTURE] = "CAPTURE",
+ [VM_HIDE] = "HIDE",
[VM_OTHERWISE] = "OTHERWISE",
[VM_CHAIN] = "CHAIN",
[VM_REPLACE] = "REPLACE",
@@ -257,6 +258,16 @@ static match_t *_match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, un
m->value.name = op->args.capture.name;
return m;
}
+ case VM_HIDE: {
+ match_t *p = _match(g, f, str, op->args.pat, flags, rec);
+ if (p == NULL) return NULL;
+ match_t *m = calloc(sizeof(match_t), 1);
+ m->start = str;
+ m->end = p->end;
+ m->op = op;
+ m->child = p;
+ return m;
+ }
case VM_OTHERWISE: {
match_t *m = _match(g, f, str, op->args.multiple.first, flags, rec);
if (m == NULL) m = _match(g, f, str, op->args.multiple.second, flags, rec);
@@ -450,6 +461,12 @@ void print_pattern(vm_op_t *op)
fprintf(stderr, ")");
break;
}
+ case VM_HIDE: {
+ fprintf(stderr, "hidden (");
+ print_pattern(op->args.pat);
+ fprintf(stderr, ")");
+ break;
+ }
case VM_CAPTURE: {
fprintf(stderr, "capture (");
print_pattern(op->args.pat);
@@ -547,63 +564,185 @@ static match_t *get_cap(match_t *m, const char **r)
return NULL;
}
+typedef struct {
+ size_t line, printed_line;
+ const char *color;
+} print_state_t;
+
+static void print_line_number(print_state_t *state, print_options_t options)
+{
+ state->printed_line = state->line;
+ if (!(options & PRINT_LINE_NUMBERS)) return;
+ if (options & PRINT_COLOR)
+ printf("\033[0;2m% 5ld\033(0\x78\033(B%s", state->line, state->color);
+ else
+ printf("% 5ld|", state->line);
+}
+
/*
* Print a match with replacements and highlighting.
*/
-void print_match(file_t *f, match_t *m)
+static void _print_match(file_t *f, match_t *m, print_state_t *state, print_options_t options)
{
- if (m->op->op == VM_REPLACE) {
+ static const char *hl = "\033[0;31;1m";
+ const char *old_color = state->color;
+ if (m->op->op == VM_HIDE) {
+ // TODO: handle replacements?
+ for (const char *p = m->start; p < m->end; p++) {
+ if (*p == '\n') ++state->line;
+ }
+ } else if (m->op->op == VM_REPLACE) {
+ if (options & PRINT_COLOR && state->color != hl) {
+ state->color = hl;
+ printf("%s", state->color);
+ }
for (const char *r = m->value.replacement; *r; ) {
- if (*r == '\\') {
- ++r;
- fputc(unescapechar(r, &r), stdout);
- continue;
- } else if (*r != '@') {
- fputc(*r, stdout);
+ if (*r == '@' && r[1] && r[1] != '@') {
++r;
- continue;
+ match_t *cap = get_cap(m, &r);
+ if (cap != NULL) {
+ _print_match(f, cap, state, options);
+ continue;
+ } else {
+ --r;
+ }
}
- ++r;
- if (*r == '@' || *r == '\0') {
- fputc('@', stdout);
- continue;
- }
- if (*r == '#') {
+ if (state->printed_line != state->line)
+ print_line_number(state, options);
+
+ if (*r == '\\') {
++r;
- printf("%ld", get_line_number(f, m->start));
+ unsigned char c = unescapechar(r, &r);
+ fputc(c, stdout);
+ if (c == '\n') ++state->line;
continue;
- } else if (*r == ':') {
+ } else if (*r == '\n') {
+ fputc('\n', stdout);
+ ++state->line;
++r;
- printf("%ld", get_char_number(f, m->start));
continue;
- } else if (*r == '&') {
+ } else {
+ fputc(*r, stdout);
++r;
- printf("%s", f->filename ? f->filename : "-");
continue;
}
- match_t *cap = get_cap(m, &r);
- if (cap != NULL) {
- print_match(f, cap);
- } else {
- fputc('@', stdout);
- }
}
} else {
+ if (m->op->op == VM_CAPTURE) {
+ if (options & PRINT_COLOR && state->color != hl) {
+ state->color = hl;
+ printf("%s", state->color);
+ }
+ }
+
const char *prev = m->start;
for (match_t *child = m->child; child; child = child->nextsibling) {
// Skip children from e.g. zero-width matches like >@foo
if (!(prev <= child->start && child->start <= m->end &&
prev <= child->end && child->end <= m->end))
continue;
- if (child->start > prev)
- printf("%.*s", (int)(child->start - prev), prev);
- print_match(f, child);
+ if (child->start > prev) {
+ for (const char *p = prev; p < child->start; ++p) {
+ if (state->printed_line != state->line)
+ print_line_number(state, options);
+ fputc(*p, stdout);
+ if (*p == '\n') ++state->line;
+ }
+ }
+ _print_match(f, child, state, options);
prev = child->end;
}
- if (m->end > prev)
- printf("%.*s", (int)(m->end - prev), prev);
+ if (m->end > prev) {
+ for (const char *p = prev; p < m->end; ++p) {
+ if (state->printed_line != state->line)
+ print_line_number(state, options);
+ fputc(*p, stdout);
+ if (*p == '\n') ++state->line;
+ }
+ }
+ }
+ if (options & PRINT_COLOR && old_color != state->color) {
+ printf("%s", old_color);
+ state->color = old_color;
+ }
+}
+
+void print_match(file_t *f, match_t *m, print_options_t options)
+{
+ print_state_t state = {.line = 1, .color = "\033[0m"};
+ _print_match(f, m, &state, options);
+}
+
+/*
+ * Print a match as JSON
+ */
+static int _json_match(FILE *f, const char *text, match_t *m, int comma)
+#define VERBOSE_JSON 1
+#if VERBOSE_JSON
+{
+ if (comma) fprintf(f, ",\n");
+ comma = 0;
+ fprintf(f, "{\"type\":\"");
+ for (const char *c = m->op->start; c < m->op->end; c++) {
+ switch (*c) {
+ case '"': fprintf(f, "\\\""); break;
+ case '\\': fprintf(f, "\\\\"); break;
+ case '\t': fprintf(f, "\\t"); break;
+ case '\n': fprintf(f, "↵"); break;
+ default: fprintf(f, "%c", *c); break;
+ }
+ }
+ fprintf(f, "\",\"start\":%ld,\"end\":%ld,\"children\":[",
+ m->start - text, m->end - text);
+ for (match_t *child = m->child; child; child = child->nextsibling) {
+ comma |= _json_match(f, text, child, comma);
+ }
+ fprintf(f, "]}");
+ return 1;
+}
+#else
+{
+ if (m->op->op == VM_STRING) {
+ if (comma) fprintf(f, ",\n");
+ comma = 0;
+ fprintf(f, "{\"type\":\"\\\"");
+ for (const char *c = m->op->args.s; *c; c++) {
+ switch (*c) {
+ case '"': fprintf(f, "\\\""); break;
+ case '\\': fprintf(f, "\\\\"); break;
+ case '\t': fprintf(f, "\\t"); break;
+ case '\n': fprintf(f, "↵"); break;
+ default: fprintf(f, "%c", *c); break;
+ }
+ }
+ fprintf(f, "\\\"\",\"start\":%ld,\"end\":%ld,\"children\":[",
+ m->start - text, m->end - text);
+ } else if (m->op->op == VM_REF) {
+ if (comma) fprintf(f, ",\n");
+ comma = 0;
+ fprintf(f, "{\"type\":\"%s\",\"start\":%ld,\"end\":%ld,\"children\":[",
+ m->op->args.s, m->start - text, m->end - text);
+ } else if (m->op->op == VM_CAPTURE && m->value.name) {
+ if (comma) fprintf(f, ",\n");
+ comma = 0;
+ fprintf(f, "{\"type\":\"@%s\",\"start\":%ld,\"end\":%ld,\"children\":[",
+ m->value.name, m->start - text, m->end - text);
+ }
+ for (match_t *child = m->child; child; child = child->nextsibling) {
+ comma |= _json_match(f, text, child, comma);
}
+ if (m->op->op == VM_REF || m->op->op == VM_STRING || (m->op->op == VM_CAPTURE && m->value.name)) {
+ fprintf(f, "]}");
+ return 1;
+ }
+ return comma;
+}
+#endif
+
+void json_match(FILE *f, const char *text, match_t *m)
+{
+ _json_match(f, text, m, 0);
}
static match_t *match_backref(const char *str, vm_op_t *op, match_t *cap, unsigned int flags)
diff --git a/vm.h b/vm.h
index e0e6b5d..e862b79 100644
--- a/vm.h
+++ b/vm.h
@@ -11,6 +11,11 @@
#include "types.h"
+typedef enum {
+ PRINT_COLOR = 1<<0,
+ PRINT_LINE_NUMBERS = 1<<1,
+} print_options_t;
+
const char *opcode_name(enum VMOpcode o);
__attribute__((hot, nonnull))
match_t *match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, unsigned int flags);
@@ -19,7 +24,9 @@ void destroy_match(match_t **m);
__attribute__((nonnull))
void print_pattern(vm_op_t *op);
__attribute__((nonnull))
-void print_match(file_t *f, match_t *m);
+void print_match(file_t *f, match_t *m, print_options_t options);
+__attribute__((nonnull))
+void json_match(FILE *f, const char *text, match_t *m);
#endif
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1