From eb329bdac9fe56d67cb130fb6cdbb28743c6504b Mon Sep 17 00:00:00 2001 From: Bruce Hill Date: Sat, 12 Dec 2020 16:31:53 -0800 Subject: Bunch of changes, including some bpeg->bp renaming, and adding visualizations --- Makefile | 12 +- README.md | 12 +- bp.1 | 209 ++++++++++++++ bpeg.1 | 209 -------------- bpeg.c | 71 +++-- compiler.c | 13 + grammars/bpeg.bp | 78 ++++++ grammars/bpeg.bpeg | 78 ------ grammars/builtins.bp | 84 ++++++ grammars/builtins.bpeg | 97 ------- grammars/html.bp | 26 ++ grammars/html.bpeg | 26 -- grammars/utf8-id.bp | 735 +++++++++++++++++++++++++++++++++++++++++++++++++ grammars/utf8-id.bpeg | 735 ------------------------------------------------- types.h | 2 + viz.c | 158 +++++++++++ viz.h | 10 + vm.c | 201 +++++++++++--- vm.h | 9 +- 19 files changed, 1559 insertions(+), 1206 deletions(-) create mode 100644 bp.1 delete mode 100644 bpeg.1 create mode 100644 grammars/bpeg.bp delete mode 100644 grammars/bpeg.bpeg create mode 100644 grammars/builtins.bp delete mode 100644 grammars/builtins.bpeg create mode 100644 grammars/html.bp delete mode 100644 grammars/html.bpeg create mode 100644 grammars/utf8-id.bp delete mode 100644 grammars/utf8-id.bpeg create mode 100644 viz.c create mode 100644 viz.h diff --git a/Makefile b/Makefile index 1d34cf3..568421f 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -NAME=bpeg +NAME=bp CC ?= gcc PREFIX=/usr/local CFLAGS=-std=c99 -D_XOPEN_SOURCE=500 -D_GNU_SOURCE -D_POSIX_C_SOURCE=200809L @@ -7,7 +7,7 @@ CWARN=-Wall -Wpedantic -Wextra -Wno-unknown-pragmas -Wno-missing-field-initializ G ?= O ?= -O3 -CFILES=compiler.c grammar.c utils.c vm.c file_loader.c +CFILES=compiler.c grammar.c utils.c vm.c file_loader.c viz.c OBJFILES=$(CFILES:.c=.o) all: $(NAME) @@ -15,7 +15,7 @@ all: $(NAME) .c.o: $(CC) -c $(CFLAGS) $(CWARN) $(G) $(O) -o $@ $< -$(NAME): $(OBJFILES) $(NAME).c +$(NAME): $(OBJFILES) bpeg.c $(CC) $(CFLAGS) $(CWARN) $(G) $(O) -o $@ $^ clean: @@ -29,8 +29,8 @@ install: $(NAME) fi; \ [ ! "$$prefix" ] && prefix="/usr/local"; \ [ ! "$$sysconfdir" ] && sysconfdir=/etc; \ - mkdir -pv -m 755 "$$prefix/share/man/man1" "$$prefix/bin" "$$sysconfdir/xdg/bpeg" \ - && cp -rv grammars/* "$$sysconfdir/xdg/bpeg/" \ + mkdir -pv -m 755 "$$prefix/share/man/man1" "$$prefix/bin" "$$sysconfdir/xdg/bp" \ + && cp -rv grammars/* "$$sysconfdir/xdg/bp/" \ && cp -v $(NAME).1 "$$prefix/share/man/man1/" \ && rm -f "$$prefix/bin/$(NAME)" \ && cp -v $(NAME) "$$prefix/bin/" @@ -44,7 +44,7 @@ uninstall: [ ! "$$prefix" ] && prefix="/usr/local"; \ [ ! "$$sysconfdir" ] && sysconfdir=/etc; \ echo "Deleting..."; \ - rm -rvf "$$prefix/bin/$(NAME)" "$$prefix/share/man/man1/$(NAME).1" "$$sysconfdir/xdg/bpeg"; \ + rm -rvf "$$prefix/bin/$(NAME)" "$$prefix/share/man/man1/$(NAME).1" "$$sysconfdir/xdg/bp"; \ printf "\033[1mIf you created any config files in ~/.config/$(NAME), you may want to delete them manually.\033[0m\n" .PHONY: all, clean, install, uninstall diff --git a/README.md b/README.md index 9eb3b03..537055c 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ BPEG is a parsing expression grammar tool for the command line. It's written in pure C with no dependencies. ## Usage -`bpeg [flags] [...]` +`bp [flags] [...]` ### Flags * `-h` `--help` print the usage and quit @@ -12,13 +12,13 @@ It's written in pure C with no dependencies. * `-i` `--ignore-case` perform a case-insensitive match * `-d` `--define :` define a grammar rule * `-D` `--define-string :` define a grammar rule (string-pattern) -* `-p` `--pattern ` provide a pattern (equivalent to bpeg ' -* `-P` `--pattern-string ` provide a string pattern (equivalent to bpeg '', but may be useful if '' begins with a '-') +* `-p` `--pattern ` provide a pattern (equivalent to `bp '\()'`) +* `-P` `--pattern-string ` provide a string pattern (equivalent to `bp ''`, but may be useful if `''` begins with a '-') * `-r` `--replace ` replace the input pattern with the given replacement -* `-m` `--mode ` set the behavior mode (defult: find-all) +* `-m` `--mode ` set the behavior mode (defult: `find-all`) * `-g` `--grammar ` use the specified file as a grammar -See `man ./bpeg.1` for more details. +See `man ./bp.1` for more details. ## BPEG Patterns BPEG patterns are a mixture of Parsing Expression Grammar and Regular @@ -63,7 +63,7 @@ Pattern | Meaning `#( block comment )#` | A block comment `# line comment` | A line comment -See `man ./bpeg.1` for more details. +See `man ./bp.1` for more details. ## License BPEG is provided under the MIT license with the [Commons Clause](https://commonsclause.com/) diff --git a/bp.1 b/bp.1 new file mode 100644 index 0000000..60a5e8f --- /dev/null +++ b/bp.1 @@ -0,0 +1,209 @@ +.\" Manpage for bp. +.\" Contact bruce@bruce-hill.com to correct errors or typos. +.TH man 1 "Sep 12, 2020" "0.1" "bp manual page" +.SH NAME +bp \- Bruce's Parsing Expression Grammar tool +.SH SYNOPSIS +.B bp +[\fI-h\fR|\fI--help\fR] +[\fI-v\fR|\fI--verbose\fR] +[\fI-i\fR|\fI--ignore-case\fR \fI\fR] +[\fI-p\fR|\fI--pattern\fR \fI\fR] +[\fI-P\fR|\fI--pattern-string\fR \fI\fR] +[\fI-d\fR|\fI--define\fR \fI\fR:\fI\fR] +[\fI-D\fR|\fI--define-string\fR \fI\fR:\fI\fR] +[\fI-r\fR|\fI--replace\fR \fI\fR] +[\fI-g\fR|\fI--grammar\fR \fI\fR] +[\fI-m\fR|\fI--mode\fR \fI\fR] +\fI\fR] +.SH DESCRIPTION +\fBbp\fR is a tool that matches parsing expression grammars using a custom syntax. +.SH OPTIONS +.B \-v\fR, \fB--verbose +Print debugging information. + +.B \-i\fR, \fB--ignore-case +Perform pattern matching case-insensitively. + +.B \-d\fR, \fB--define \fI\fR:\fI\fR +Define a grammar rule using a bp pattern. + +.B \-D\fR, \fB--define-string \fI\fR:\fI\fR +Define a grammar rule using a bp string pattern. + +.B \-r\fR, \fB--replace \fI\fR +Replace all occurrences of the main pattern with the given string. + +.B \-g\fR, \fB--grammar \fI\fR +Load the grammar from the given file. + +.B \-m\fR, \fB--mode \fI\fR +The mode to operate in. Options are: \fIfind-all\fR (the default), +\fIonly-matches\fR, \fIpattern\fR, \fIreplacement\fR, \fIreplace-all\fR +(implied by \fB--replace\fR), or any other grammar rule name. + +.B \--help +Print the usage and exit. + +.B +The main pattern for bp to match. By default, this pattern is a string +pattern (see the \fBSTRING PATTERNS\fR section below). + +.B +The input files to search. If no input files are provided and data was +piped in, that data will be used instead. If neither are provided, +\fBbp\fR will search through all files in the current directory and +its subdirectories (recursively). + +.SH PATTERNS +bp patterns are based off of a combination of Parsing Expression Grammars +and regular expression syntax. The syntax is designed to map closely to +verbal descriptions of the patterns, and prefix operators are preferred over +suffix operators (as is common in regex syntax). + +Some patterns additionally have "multi-line" variants, which means that they +include the newline character. + +.I +A chain of patterns, pronounced \fI\fB-then-\fI\fR + +.I \fB/\fI \fR +A series of ordered choices (if one pattern matches, the following patterns +will not be attempted), pronounced \fI\fB-or-\fI\fR + +.B .. +Any text \fBup-to-and-including\fR the following pattern, if any (multiline: \fB...\fR) + +.B . +\fBAny\fR character (multiline: $.) + +.B ^ +\fBStart-of-a-line\fR + +.B ^^ +\fBStart-of-the-text\fR + +.B $ +\fBEnd-of-a-line\fR (does not include newline character) + +.B $$ +\fBEnd-of-the-text\fR + +.B _ +Zero or more \fBwhitespace\fR characters (specifically, spaces and tabs) + +.B __ +Zero or more \fBwhitespace-or-newline\fR characters + +.B `\fI\fR +The literal \fBcharacter-\fI\fR + +.B `\fI\fB-\fI\fR +The \fBcharacter-range-\fI\fB-to-\fI\fR + +.B \\\fI\fR +The \fBescape-sequence-\fI\fR (\fB\\n\fR, \fB\\x1F\fR, \fB\\033\fR, etc.) + +.B \\\fI\fB-\fI\fR +The \fBescape-sequence-range-\fI\fB-to-\fI\fR + +.B !\fI\fR +\fBNot-\fI\fR + +.B [\fI\fR] +\fBMaybe-\fI\fR + +.B \fI\fR? +\fI\fB-or-not\fR + +.B \fI \fR +.B \fI\fB-\fI \fR +.B \fI\fB+ \fI\fR +\fI\fB-to-\fI\fB-\fI\fBs\fR (repetitions of a pattern) + +.B *\fI\fR +\fBsome-\fI\fBs\fR + +.B +\fI\fR +\fBat-least-one-\fI\fBs\fR + +.B \fI\fR \fB%\fI \fR +\fI\fB-separated-by-\fI\fR (equivalent to \fI +\fB0+(\fI\fB)\fR) + +.B <\fI\fR +\fBJust-after-\fI\fR (lookbehind) + +.B >\fI\fR +\fBJust-before-\fI\fR (lookahead) + +.B @\fI\fR +\fBCapture-\fI\fR + +.B @\fI\fB=\fI\fR +\fBLet-\fI\fB-equal-\fI\fR (named capture) + +.B {\fI\fB => "\fI\fB"} +\fBReplace-\fI\fB-with-\fI\fR. Note: \fI\fR should +be a string, and it may contain references to captured values: \fB@0\fR +(the whole of \fI\fR), \fB@1\fR (the first capture in \fI\fR), +\fB@[\fIfoo\fR]\fR (the capture named \fIfoo\fR in \fI\fR), etc. + +.B \fI\fB == \fI\fR +Will match only if \fI\fR and \fI\fR both match and have the exact +same length. Pronounced \fI\fB-assuming-it-equals-\fI\fR + +.B \fI\fB != \fI\fR +Will match only if \fI\fR matches, but \fI\fR doesn't also match with the +same length. Pronounced \fI\fB-unless-it-equals-\fI\fR + +.B \fI\fB != \fI\fR +Will match only if \fI\fR and \fI\fR don't both match and have the +exact same length. Pronounced \fI\fB-assuming-it-doesn't-equal-\fI\fR + +.B | +This pattern matches the indentation at the beginning of a line that has the +same indentation as the line before (or zero indentation on the first line). + +.B #( \fI\fR )# +A block comment (can be nested) + +.B # \fI\fR +A line comment + +.SH STRING PATTERNS +One of the most common use cases for pattern matching tools is matching plain, +literal strings, or strings that are primarily plain strings, with one or two +patterns. \fBbp\fR is designed around this fact. The default mode for bp +patterns is "string pattern mode". In string pattern mode, all characters +are interpreted literally except for the backslash (\fB\\\fR), which may be +followed by a bp pattern (see the \fBPATTERNS\fR section above). Optionally, +the bp pattern may be terminated by a semicolon (\fB;\fR). + +.SH EXAMPLES +.TP +.B +ls | bp foo +Find files containing the string "foo" (a string pattern) + +.TP +.B +ls | bp '.c\\$' -r '.h' +Find files ending with ".c" and replace the extension with ".h" + +.TP +.B +bp -p '"foobar"==id parens' my_file.py +Find the literal string \fB"foobar"\fR, assuming it's a complete identifier, +followed by a pair of matching parentheses in the file \fImy_file.py\fR + +.TP +.B +bp -g html -p html-element -D matching-tag=a foo.html +Using the \fIhtml\fR grammar, find all \fIhtml-element\fRs matching +the tag \fIa\fR in the file \fIfoo.html\fR + + +.SH AUTHOR +Bruce Hill (bruce@bruce-hill.com) diff --git a/bpeg.1 b/bpeg.1 deleted file mode 100644 index 9f36e0b..0000000 --- a/bpeg.1 +++ /dev/null @@ -1,209 +0,0 @@ -.\" Manpage for bpeg. -.\" Contact bruce@bruce-hill.com to correct errors or typos. -.TH man 1 "Sep 12, 2020" "0.1" "bpeg manual page" -.SH NAME -bpeg \- Bruce's Parsing Expression Grammar tool -.SH SYNOPSIS -.B bpeg -[\fI-h\fR|\fI--help\fR] -[\fI-v\fR|\fI--verbose\fR] -[\fI-i\fR|\fI--ignore-case\fR \fI\fR] -[\fI-p\fR|\fI--pattern\fR \fI\fR] -[\fI-P\fR|\fI--pattern-string\fR \fI\fR] -[\fI-d\fR|\fI--define\fR \fI\fR:\fI\fR] -[\fI-D\fR|\fI--define-string\fR \fI\fR:\fI\fR] -[\fI-r\fR|\fI--replace\fR \fI\fR] -[\fI-g\fR|\fI--grammar\fR \fI\fR] -[\fI-m\fR|\fI--mode\fR \fI\fR] -\fI\fR] -.SH DESCRIPTION -\fBbpeg\fR is a tool that matches parsing expression grammars using a custom syntax. -.SH OPTIONS -.B \-v\fR, \fB--verbose -Print debugging information. - -.B \-i\fR, \fB--ignore-case -Perform pattern matching case-insensitively. - -.B \-d\fR, \fB--define \fI\fR:\fI\fR -Define a grammar rule using a bpeg pattern. - -.B \-D\fR, \fB--define-string \fI\fR:\fI\fR -Define a grammar rule using a bpeg string pattern. - -.B \-r\fR, \fB--replace \fI\fR -Replace all occurrences of the main pattern with the given string. - -.B \-g\fR, \fB--grammar \fI\fR -Load the grammar from the given file. - -.B \-m\fR, \fB--mode \fI\fR -The mode to operate in. Options are: \fIfind-all\fR (the default), -\fIonly-matches\fR, \fIpattern\fR, \fIreplacement\fR, \fIreplace-all\fR -(implied by \fB--replace\fR), or any other grammar rule name. - -.B \--help -Print the usage and exit. - -.B -The main pattern for bpeg to match. By default, this pattern is a string -pattern (see the \fBSTRING PATTERNS\fR section below). - -.B -The input files to search. If no input files are provided and data was -piped in, that data will be used instead. If neither are provided, -\fBbpeg\fR will search through all files in the current directory and -its subdirectories (recursively). - -.SH PATTERNS -Bpeg patterns are based off of a combination of Parsing Expression Grammars -and regular expression syntax. The syntax is designed to map closely to -verbal descriptions of the patterns, and prefix operators are preferred over -suffix operators (as is common in regex syntax). - -Some patterns additionally have "multi-line" variants, which means that they -include the newline character. - -.I -A chain of patterns, pronounced \fI\fB-then-\fI\fR - -.I \fB/\fI \fR -A series of ordered choices (if one pattern matches, the following patterns -will not be attempted), pronounced \fI\fB-or-\fI\fR - -.B .. -Any text \fBup-to-and-including\fR the following pattern, if any (multiline: \fB...\fR) - -.B . -\fBAny\fR character (multiline: $.) - -.B ^ -\fBStart-of-a-line\fR - -.B ^^ -\fBStart-of-the-text\fR - -.B $ -\fBEnd-of-a-line\fR (does not include newline character) - -.B $$ -\fBEnd-of-the-text\fR - -.B _ -Zero or more \fBwhitespace\fR characters (specifically, spaces and tabs) - -.B __ -Zero or more \fBwhitespace-or-newline\fR characters - -.B `\fI\fR -The literal \fBcharacter-\fI\fR - -.B `\fI\fB-\fI\fR -The \fBcharacter-range-\fI\fB-to-\fI\fR - -.B \\\fI\fR -The \fBescape-sequence-\fI\fR (\fB\\n\fR, \fB\\x1F\fR, \fB\\033\fR, etc.) - -.B \\\fI\fB-\fI\fR -The \fBescape-sequence-range-\fI\fB-to-\fI\fR - -.B !\fI\fR -\fBNot-\fI\fR - -.B [\fI\fR] -\fBMaybe-\fI\fR - -.B \fI\fR? -\fI\fB-or-not\fR - -.B \fI \fR -.B \fI\fB-\fI \fR -.B \fI\fB+ \fI\fR -\fI\fB-to-\fI\fB-\fI\fBs\fR (repetitions of a pattern) - -.B *\fI\fR -\fBsome-\fI\fBs\fR - -.B +\fI\fR -\fBat-least-one-\fI\fBs\fR - -.B \fI\fR \fB%\fI \fR -\fI\fB-separated-by-\fI\fR (equivalent to \fI -\fB0+(\fI\fB)\fR) - -.B <\fI\fR -\fBJust-after-\fI\fR (lookbehind) - -.B >\fI\fR -\fBJust-before-\fI\fR (lookahead) - -.B @\fI\fR -\fBCapture-\fI\fR - -.B @\fI\fB=\fI\fR -\fBLet-\fI\fB-equal-\fI\fR (named capture) - -.B {\fI\fB => "\fI\fB"} -\fBReplace-\fI\fB-with-\fI\fR. Note: \fI\fR should -be a string, and it may contain references to captured values: \fB@0\fR -(the whole of \fI\fR), \fB@1\fR (the first capture in \fI\fR), -\fB@[\fIfoo\fR]\fR (the capture named \fIfoo\fR in \fI\fR), etc. - -.B \fI\fB == \fI\fR -Will match only if \fI\fR and \fI\fR both match and have the exact -same length. Pronounced \fI\fB-assuming-it-equals-\fI\fR - -.B \fI\fB != \fI\fR -Will match only if \fI\fR matches, but \fI\fR doesn't also match with the -same length. Pronounced \fI\fB-unless-it-equals-\fI\fR - -.B \fI\fB != \fI\fR -Will match only if \fI\fR and \fI\fR don't both match and have the -exact same length. Pronounced \fI\fB-assuming-it-doesn't-equal-\fI\fR - -.B | -This pattern matches the indentation at the beginning of a line that has the -same indentation as the line before (or zero indentation on the first line). - -.B #( \fI\fR )# -A block comment (can be nested) - -.B # \fI\fR -A line comment - -.SH STRING PATTERNS -One of the most common use cases for pattern matching tools is matching plain, -literal strings, or strings that are primarily plain strings, with one or two -patterns. \fBbpeg\fR is designed around this fact. The default mode for bpeg -patterns is "string pattern mode". In string pattern mode, all characters -are interpreted literally except for the backslash (\fB\\\fR), which may be -followed by a bpeg pattern (see the \fBPATTERNS\fR section above). Optionally, -the bpeg pattern may be terminated by a semicolon (\fB;\fR). - -.SH EXAMPLES -.TP -.B -ls | bpeg foo -Find files containing the string "foo" (a string pattern) - -.TP -.B -ls | bpeg '.c\\$' -r '.h' -Find files ending with ".c" and replace the extension with ".h" - -.TP -.B -bpeg -p '"foobar"==id parens' my_file.py -Find the literal string \fB"foobar"\fR, assuming it's a complete identifier, -followed by a pair of matching parentheses in the file \fImy_file.py\fR - -.TP -.B -bpeg -g html -p html-element -D matching-tag=a foo.html -Using the \fIhtml\fR grammar, find all \fIhtml-element\fRs matching -the tag \fIa\fR in the file \fIfoo.html\fR - - -.SH AUTHOR -Bruce Hill (bruce@bruce-hill.com) diff --git a/bpeg.c b/bpeg.c index e1b603f..4e3ef5b 100644 --- a/bpeg.c +++ b/bpeg.c @@ -1,7 +1,7 @@ /* * bpeg.c - Source code for the bpeg parser * - * See `man ./bpeg.1` for more details + * See `man ./bp.1` for more details */ #include #include @@ -15,24 +15,28 @@ #include "file_loader.h" #include "grammar.h" #include "utils.h" +#include "viz.h" #include "vm.h" static const char *usage = ( - "BPEG - a Parsing Expression Grammar command line tool\n\n" + "BP - a Parsing Expression Grammar command line tool\n\n" "Usage:\n" - " bpeg [flags] [...]\n\n" + " bp [flags] [...]\n\n" "Flags:\n" " -h --help print the usage and quit\n" " -v --verbose print verbose debugging info\n" + " -e --explain explain the matches\n" " -i --ignore-case preform matching case-insensitively\n" " -d --define : define a grammar rule\n" " -D --define-string : define a grammar rule (string-pattern)\n" - " -p --pattern provide a pattern (equivalent to bpeg '\\()')\n" + " -p --pattern provide a pattern (equivalent to bp '\\()')\n" " -P --pattern-string provide a string pattern (may be useful if '' begins with a '-')\n" " -r --replace replace the input pattern with the given replacement\n" " -m --mode set the behavior mode (defult: find-all)\n" " -g --grammar use the specified file as a grammar\n"); +static print_options_t print_options = 0; + static char *getflag(const char *flag, char *argv[], int *i) { size_t n = strlen(flag); @@ -54,7 +58,7 @@ static int print_errors(file_t *f, match_t *m) int ret = 0; if (m->op->op == VM_CAPTURE && m->value.name && streq(m->value.name, "!")) { printf("\033[31;1m"); - print_match(f, m); + print_match(f, m, print_options); printf("\033[0m\n"); fprint_line(stdout, f, m->start, m->end, " "); return 1; @@ -66,13 +70,49 @@ static int print_errors(file_t *f, match_t *m) static int run_match(grammar_t *g, const char *filename, vm_op_t *pattern, unsigned int flags) { + static int printed_matches = 0; file_t *f = load_file(filename); check(f, "Could not open file: %s", filename); match_t *m = match(g, f, f->contents, pattern, flags); if (m && print_errors(f, m) > 0) _exit(1); if (m != NULL && m->end > m->start + 1) { - print_match(f, m); + ++printed_matches; + + if (flags & BPEG_EXPLAIN) { + if (filename) { + printf("\033[1;4m%s:\033[0m\n", filename); + } + /* + if (printed_matches > 1) + fprintf(stdout, ",\n"); + printf("{\"filename\":\"%s\",\"text\":\"", filename ? filename : "-"); + for (char *c = f->contents; c < f->end; c++) { + switch (*c) { + case '"': printf("\\\""); break; + case '\n': printf("\\n"); break; + case '\t': printf("\\t"); break; + case '\\': printf("\\\\"); break; + default: printf("%c", *c); break; + } + } + printf("\",\n\"tree\":{\"type\":\"text\",\"start\":%d,\"end\":%ld,\"children\":[", + 0, f->end - f->contents); + json_match(stdout, f->contents, m); + printf("]}}\n"); + */ + visualize_match(m); + } else { + if (printed_matches > 1) + fputc('\n', stdout); + if (filename) { + if (print_options & PRINT_COLOR) + printf("\033[1;4;33m%s:\033[0m\n", filename); + else + printf("%s:\n", filename); + } + print_match(f, m, print_options); + } destroy_file(&f); return 0; } else { @@ -93,9 +133,9 @@ int main(int argc, char *argv[]) grammar_t *g = new_grammar(); // Load builtins: - if (access("/etc/xdg/bpeg/builtins.bpeg", R_OK) != -1) - load_grammar(g, load_file("/etc/xdg/bpeg/builtins.bpeg")); // Keep in memory for debugging output - sprintf(path, "%s/.config/bpeg/builtins.bpeg", getenv("HOME")); + if (access("/etc/xdg/bp/builtins.bp", R_OK) != -1) + load_grammar(g, load_file("/etc/xdg/bp/builtins.bp")); // Keep in memory for debugging output + sprintf(path, "%s/.config/bp/builtins.bp", getenv("HOME")); if (access(path, R_OK) != -1) load_grammar(g, load_file(path)); // Keep in memory for debugging output @@ -110,6 +150,8 @@ int main(int argc, char *argv[]) return 0; } else if (streq(argv[i], "--verbose") || streq(argv[i], "-v")) { flags |= BPEG_VERBOSE; + } else if (streq(argv[i], "--explain") || streq(argv[i], "-e")) { + flags |= BPEG_EXPLAIN; } else if (streq(argv[i], "--ignore-case") || streq(argv[i], "-i")) { flags |= BPEG_IGNORECASE; } else if (FLAG("--replace") || FLAG("-r")) { @@ -122,11 +164,11 @@ int main(int argc, char *argv[]) } else if (FLAG("--grammar") || FLAG("-g")) { file_t *f = load_file(flag); if (f == NULL) { - sprintf(path, "%s/.config/bpeg/%s.bpeg", getenv("HOME"), flag); + sprintf(path, "%s/.config/bp/%s.bp", getenv("HOME"), flag); f = load_file(path); } if (f == NULL) { - sprintf(path, "/etc/xdg/bpeg/%s.bpeg", flag); + sprintf(path, "/etc/xdg/bp/%s.bp", flag); f = load_file(path); } check(f != NULL, "Couldn't find grammar: %s", flag); @@ -180,11 +222,7 @@ int main(int argc, char *argv[]) } if (isatty(STDOUT_FILENO)) { - char *epsilon = "''"; - file_t *is_tty_file = spoof_file("", epsilon); - vm_op_t *p = bpeg_pattern(is_tty_file, epsilon); - check(p, "Failed to compile is-tty"); - add_def(g, is_tty_file, epsilon, "is-tty", p); + print_options |= PRINT_COLOR | PRINT_LINE_NUMBERS; } vm_op_t *pattern = lookup(g, rule); @@ -210,7 +248,6 @@ int main(int argc, char *argv[]) ret &= run_match(g, NULL, pattern, flags); } - return ret; } diff --git a/compiler.c b/compiler.c index 6a8de58..48d0023 100644 --- a/compiler.c +++ b/compiler.c @@ -281,6 +281,8 @@ vm_op_t *bpeg_simplepattern(file_t *f, const char *str) str = after_spaces(str); if (!matchchar(&str, ')')) file_err(f, origin, str, "This parenthesis group isn't properly closed."); + op->start = origin; + op->end = str; break; } // Square brackets @@ -330,6 +332,17 @@ vm_op_t *bpeg_simplepattern(file_t *f, const char *str) op->len = pat->len; break; } + // Hide + case '~': { + vm_op_t *pat = bpeg_simplepattern(f, str); + if (!pat) + file_err(f, str, str, "There should be a pattern after this '~'"); + str = pat->end; + op->op = VM_HIDE; + op->len = 0; + op->args.pat = pat; + break; + } // Replacement case '{': { str = after_spaces(str); diff --git a/grammars/bpeg.bp b/grammars/bpeg.bp new file mode 100644 index 0000000..288ceee --- /dev/null +++ b/grammars/bpeg.bp @@ -0,0 +1,78 @@ +# This is a file defining the BPEG grammar using BPEG syntax + +Grammar: __ *(Def [__`;])%__ __ ($$ / @!={... => "Could not parse this code"}) +Def: @name=id _ `: __ ( + @definition=extended-pat + / $$ @!={=>"No definition for rule"} + / @!={...>(`;/id_`:/$) => "Invalid definition: @0"}) + +# This is used for command line arguments: +String-pattern: *(`\ (escape-sequence / pat [`;]) / .) + +pat: simple-pat !(__("!="/"==")) / suffixed-pat +simple-pat: Upto-and / Dot / String / Char-range / Char / Escape-range / Escape / No + / Nodent / Repeat / Optional / After / Before / Capture / Replace / Ref / parens + +suffixed-pat: ( + Eq-pat + / Not-eq-pat +) + +Eq-pat: @first=pat__"=="__@second=pat +Not-eq-pat: @first=pat__"!="__@second=pat + +Dot: `. !`. +String: ( + `" @s=*(Escape / !`".) (`" / @!={=> "Expected closing quote here"}) + / `' @s=*(Escape / !`'.) (`' / @!={=> "Expected closing quote here"}) + ) +Char-range: `` @low=. `- (@high=. / @!={=> "Expected a second character to form a character range"}) +Char: `` (@s=. / @!={=> "Expected a character following the '`'"}) +Escape-range: `\ @low=escape-sequence `- @high=escape-sequence +Escape: `\ (@s=escape-sequence + / $ @!={=>"Backslashes are used for escape sequences, not splitting lines"} + / @!={. *(Abc/`0-9) => "Invalid escape sequence: '@0'"} +) +escape-sequence: ( + `n/`t/`r/`e/`b/`a/`v + / 1-3 `0-7 + / `x 2 (`0-9/`a-f/`A-F) + ) +No: `! (_@pat / @!={=>"Expected a pattern after the exclamation mark"}) +Nodent: `| +Upto-and: 2-3`. [_@simple-pat] +Repeat: ( + @min=int _ `- _ @max=int + / @min=int _ `+ @max='' + / @min=@max=int + ) __ @repeat-pat=pat [__`%__@sep=pat] +Optional: `[ __ extended-pat (__`] / @!={=> "Expected closing square bracket here"}) +After: `< _ pat +Before: `> _ pat +Capture: `@ [_ @capture-name=(id/`!) __ !"=>" `=] __ (@capture=pat / @!={=> "Expected pattern to capture"}) +Replace: `{ __ ( + [@replace-pat=extended-pat __] "=>" [__ @replacement=String] + ) __ (`} / @!={=> "Expected closing brace here"}) +Ref: @name=id !(_`:) + +parens: `( __ extended-pat (__ `) / @!={=> "Expected closing parenthesis here"}) + +Chain: 2+@pat%__ +Otherwise: 2+@(Chain/pat)%(__`/__) +extended-pat: Otherwise / Chain / pat + +# Special-symbol rules: +_: *(` / \t) +__: *(` / \t / \r / \n / comment) +$$: !$. +$: !. +^^: !<$. +^: !<. + +id: "^^" / "^" / "__" / "_" / "$$" / "$" / (`a-z/`A-Z) *(`a-z/`A-Z/`0-9/`-) + +line-comment: `# .. $ +block-comment: "#(" *(block-comment / !")#" .) ")#" + +# Note: comments are undefined by default in regular BPEG +comment: block-comment / line-comment diff --git a/grammars/bpeg.bpeg b/grammars/bpeg.bpeg deleted file mode 100644 index 288ceee..0000000 --- a/grammars/bpeg.bpeg +++ /dev/null @@ -1,78 +0,0 @@ -# This is a file defining the BPEG grammar using BPEG syntax - -Grammar: __ *(Def [__`;])%__ __ ($$ / @!={... => "Could not parse this code"}) -Def: @name=id _ `: __ ( - @definition=extended-pat - / $$ @!={=>"No definition for rule"} - / @!={...>(`;/id_`:/$) => "Invalid definition: @0"}) - -# This is used for command line arguments: -String-pattern: *(`\ (escape-sequence / pat [`;]) / .) - -pat: simple-pat !(__("!="/"==")) / suffixed-pat -simple-pat: Upto-and / Dot / String / Char-range / Char / Escape-range / Escape / No - / Nodent / Repeat / Optional / After / Before / Capture / Replace / Ref / parens - -suffixed-pat: ( - Eq-pat - / Not-eq-pat -) - -Eq-pat: @first=pat__"=="__@second=pat -Not-eq-pat: @first=pat__"!="__@second=pat - -Dot: `. !`. -String: ( - `" @s=*(Escape / !`".) (`" / @!={=> "Expected closing quote here"}) - / `' @s=*(Escape / !`'.) (`' / @!={=> "Expected closing quote here"}) - ) -Char-range: `` @low=. `- (@high=. / @!={=> "Expected a second character to form a character range"}) -Char: `` (@s=. / @!={=> "Expected a character following the '`'"}) -Escape-range: `\ @low=escape-sequence `- @high=escape-sequence -Escape: `\ (@s=escape-sequence - / $ @!={=>"Backslashes are used for escape sequences, not splitting lines"} - / @!={. *(Abc/`0-9) => "Invalid escape sequence: '@0'"} -) -escape-sequence: ( - `n/`t/`r/`e/`b/`a/`v - / 1-3 `0-7 - / `x 2 (`0-9/`a-f/`A-F) - ) -No: `! (_@pat / @!={=>"Expected a pattern after the exclamation mark"}) -Nodent: `| -Upto-and: 2-3`. [_@simple-pat] -Repeat: ( - @min=int _ `- _ @max=int - / @min=int _ `+ @max='' - / @min=@max=int - ) __ @repeat-pat=pat [__`%__@sep=pat] -Optional: `[ __ extended-pat (__`] / @!={=> "Expected closing square bracket here"}) -After: `< _ pat -Before: `> _ pat -Capture: `@ [_ @capture-name=(id/`!) __ !"=>" `=] __ (@capture=pat / @!={=> "Expected pattern to capture"}) -Replace: `{ __ ( - [@replace-pat=extended-pat __] "=>" [__ @replacement=String] - ) __ (`} / @!={=> "Expected closing brace here"}) -Ref: @name=id !(_`:) - -parens: `( __ extended-pat (__ `) / @!={=> "Expected closing parenthesis here"}) - -Chain: 2+@pat%__ -Otherwise: 2+@(Chain/pat)%(__`/__) -extended-pat: Otherwise / Chain / pat - -# Special-symbol rules: -_: *(` / \t) -__: *(` / \t / \r / \n / comment) -$$: !$. -$: !. -^^: !<$. -^: !<. - -id: "^^" / "^" / "__" / "_" / "$$" / "$" / (`a-z/`A-Z) *(`a-z/`A-Z/`0-9/`-) - -line-comment: `# .. $ -block-comment: "#(" *(block-comment / !")#" .) ")#" - -# Note: comments are undefined by default in regular BPEG -comment: block-comment / line-comment diff --git a/grammars/builtins.bp b/grammars/builtins.bp new file mode 100644 index 0000000..7de936f --- /dev/null +++ b/grammars/builtins.bp @@ -0,0 +1,84 @@ +yes: '' +no: !'' + +# Configurable options: +is-tty: no # Defined as either always-match or always-fail, depending on stdout +include-binary-files: no; +is-text-file: ^^ >32(\t/\n/\r/\x20-x7e/!\x00-x7f utf8-codepoint/$$) + +# Meta-rules for acting on everything: +pattern: !'' # Not defined by default +replacement: !'' # Not defined by default +replace-all: ( + (include-binary-files / is-text-file) + +(...(>pattern replacement)) ... +) +find-all: ( + (include-binary-files / is-text-file) + *(!..pattern ~(..\n)) + +(+(..@pattern) ..(\n/$$) / ~(..\n)) + [{!<\n => "\n"}] +) +only-matches: ( + (include-binary-files / is-text-file) + +{...@pattern =>'@1\n'} +) + +# Helper definitions (commonly used) +url: ( + "file://" +(`/ *url-char) + / "mailto:" email + / ("https"/"http"/"ftp") "://" [>..`@ +url-char [`: +url-char] `@] + (ipv4/ipv6/domain) [`: int] [url-path] +) +url-path: +(`/ *url-char) [`? +(+url-char`=+url-char)%`&] +ipv4: 4 int % `. +ipv6: 8 (4 Hex) % `: +domain: +(+(Abc/digit/`-))%`. +url-char: Abc/digit/`$/`-/`_/`./`+/`!/`*/`'/`(/`)/`,/`% +email: +(Abc/digit/`./`-/`+) `@ domain + +nodent: | !(\t/` ) +indent: | (` /\t) +dedent: $ !(nodent/indent) +utf8-codepoint: ( + \x00-x7f + / \xc0-xdf 1\x80-xbf + / \xe0-xef 2\x80-xbf + / \xf0-xf7 3\x80-xbf +) +crlf: \r\n +cr: \r +anglebraces: `< *(anglebraces / $. != `>) `> +brackets: `[ *(brackets / $. != `]) `] +braces: `{ *(braces / $. != `}) `} +parens: `( *(parens / $. != `)) `) +id: !<(`a-z/`A-Z/`_/`0-9) (`a-z/`A-Z/`_) *(`a-z/`A-Z/`_/`0-9) +id-char: `a-z/`A-Z/`_/`0-9 +word: !<(`a-z/`A-Z/`_/`0-9) +(`a-z/`A-Z) !>(`0-9/`_) +edge: !<(`a-z/`A-Z/`_/`0-9) / !>(`0-9/`_) +HEX: `0-9/`A-F +Hex: `0-9/`a-f/`A-F +hex: `0-9/`a-f +number: +`0-9 [`. *`0-9] / `. +`0-9 +int: +`0-9 +digit: `0-9 +Abc: `a-z/`A-Z +ABC: `A-Z +abc: `a-z +esc: \e +tab: \t +nl: \n; lf: \n +c-block-comment: '/*' ... '*/' +c-line-comment: '//' ..$ +c-comment: c-line-comment / c-block-comment +hash-comment: `# ..$ +comment: !''; # No default definition, can be overridden +WS: ` /\t/\n/\r/comment +ws: ` /\t +$$: !$. +$: !. +^^: !<$. +^: !<. +__: *(` /\t/\n/\r/comment) +_: *(` /\t) diff --git a/grammars/builtins.bpeg b/grammars/builtins.bpeg deleted file mode 100644 index 697bf27..0000000 --- a/grammars/builtins.bpeg +++ /dev/null @@ -1,97 +0,0 @@ -yes: '' -no: !'' - -# Configurable options: -is-tty: no # Defined as either always-match or always-fail, depending on stdout -print-line-numbers: is-tty -print-filenames: is-tty -highlight: is-tty -include-binary-files: no; -is-text-file: ^^ >32(\t/\n/\r/\x20-x7e/!\x00-x7f utf8-codepoint/$$) - -# Meta-rules for acting on everything: -pattern: !'' # Not defined by default -replacement: !'' # Not defined by default -replace-all: ( - (include-binary-files / is-text-file) - define-highlights - add-filename - *(...(>pattern hl-replacement)) ... -) -find-all: ( - (include-binary-files / is-text-file) - define-highlights - add-filename - *(!..pattern {..\n=>}) - +(>..pattern add-line-number +(..hl-pattern) ..(\n/$$) / {..\n=>}) - [{!<\n => "\n"}] -) -only-matches: ( - (include-binary-files / is-text-file) - define-highlights - add-filename - +{...@hl-pattern =>'@1\n'} -) -add-filename: [print-filenames (is-tty {=>"\033[33;1;4m@&:\033[0m\n"} / {=>"@&:\n"})] -add-line-number: [print-line-numbers (is-tty {=>"\033[2m@#\033[5G|\033[0m "} / {=>"@#| "})] -hl-pattern: {@match=pattern define-highlights => "@hl-start;@match;@hl-end;"} -hl-replacement: {@match=replacement define-highlights => "@hl-start;@match;@hl-end;" } -define-highlights: highlight @hl-start={=>"\033[31;1m"} @hl-end={=>"\033[0m"} / @hl-start="" @hl-end="" - -# Helper definitions (commonly used) -url: ( - "file://" +(`/ *url-char) - / "mailto:" email - / ("https"/"http"/"ftp") "://" [>..`@ +url-char [`: +url-char] `@] - (ipv4/ipv6/domain) [`: int] [url-path] -) -url-path: +(`/ *url-char) [`? +(+url-char`=+url-char)%`&] -ipv4: 4 int % `. -ipv6: 8 (4 Hex) % `: -domain: +(+(Abc/digit/`-))%`. -url-char: Abc/digit/`$/`-/`_/`./`+/`!/`*/`'/`(/`)/`,/`% -email: +(Abc/digit/`./`-/`+) `@ domain - -nodent: | !(\t/` ) -indent: | (` /\t) -dedent: $ !(nodent/indent) -utf8-codepoint: ( - \x00-x7f - / \xc0-xdf 1\x80-xbf - / \xe0-xef 2\x80-xbf - / \xf0-xf7 3\x80-xbf -) -crlf: \r\n -cr: \r -anglebraces: `< *(anglebraces / $. != `>) `> -brackets: `[ *(brackets / $. != `]) `] -braces: `{ *(braces / $. != `}) `} -parens: `( *(parens / $. != `)) `) -id: !<(`a-z/`A-Z/`_/`0-9) (`a-z/`A-Z/`_) *(`a-z/`A-Z/`_/`0-9) -id-char: `a-z/`A-Z/`_/`0-9 -word: !<(`a-z/`A-Z/`_/`0-9) +(`a-z/`A-Z) !>(`0-9/`_) -HEX: `0-9/`A-F -Hex: `0-9/`a-f/`A-F -hex: `0-9/`a-f -number: +`0-9 [`. *`0-9] / `. +`0-9 -int: +`0-9 -digit: `0-9 -Abc: `a-z/`A-Z -ABC: `A-Z -abc: `a-z -esc: \e -tab: \t -nl: \n; lf: \n -c-block-comment: '/*' ... '*/' -c-line-comment: '//' ..$ -c-comment: c-line-comment / c-block-comment -hash-comment: `# ..$ -comment: !''; # No default definition, can be overridden -WS: ` /\t/\n/\r/comment -ws: ` /\t -$$: !$. -$: !. -^^: !<$. -^: !<. -__: *(` /\t/\n/\r/comment) -_: *(` /\t) diff --git a/grammars/html.bp b/grammars/html.bp new file mode 100644 index 0000000..cea19c1 --- /dev/null +++ b/grammars/html.bp @@ -0,0 +1,26 @@ +# HTML grammar +HTML: __ [doctype __] *html-element%__ __ + +doctype: " + +html-element: ( + >(`<("area"/"base"/"br"/"col"/"embed"/"hr"/"img"/"input"/"link"/"meta"/"param"/"source"/"track"/"wbr")) void-element + / >(`<("script"/"style"/"textarea"/"title")) raw-element + / >(`<("template")) template-element + / normal-element) + +void-element: `< @tag=(id==match-tag) __attributes__ [`/] __ `> + +template-element: `< @tag=(id==match-tag) __`> __ >match-body @body=*(!`<$. / comment / html-element / !(")$.) (") + +raw-element: `< @tag=(id==match-tag) __attributes__ `> >match-body @body=.. (") + +normal-element: `< @tag=(id==match-tag) __attributes__ `> >match-body @body=*(!`<$. / comment / html-element / !(")$.) " + +comment: "" + +attributes: *attribute%__ +attribute: (+id%`:)__`=__ (id / `" ..`" / `' ..`') +attribute: (+id%`:)__`=__ (id / `" ..`" / `' ..`') +match-tag: id +match-body: '' diff --git a/grammars/html.bpeg b/grammars/html.bpeg deleted file mode 100644 index cea19c1..0000000 --- a/grammars/html.bpeg +++ /dev/null @@ -1,26 +0,0 @@ -# HTML grammar -HTML: __ [doctype __] *html-element%__ __ - -doctype: " - -html-element: ( - >(`<("area"/"base"/"br"/"col"/"embed"/"hr"/"img"/"input"/"link"/"meta"/"param"/"source"/"track"/"wbr")) void-element - / >(`<("script"/"style"/"textarea"/"title")) raw-element - / >(`<("template")) template-element - / normal-element) - -void-element: `< @tag=(id==match-tag) __attributes__ [`/] __ `> - -template-element: `< @tag=(id==match-tag) __`> __ >match-body @body=*(!`<$. / comment / html-element / !(")$.) (") - -raw-element: `< @tag=(id==match-tag) __attributes__ `> >match-body @body=.. (") - -normal-element: `< @tag=(id==match-tag) __attributes__ `> >match-body @body=*(!`<$. / comment / html-element / !(")$.) " - -comment: "" - -attributes: *attribute%__ -attribute: (+id%`:)__`=__ (id / `" ..`" / `' ..`') -attribute: (+id%`:)__`=__ (id / `" ..`" / `' ..`') -match-tag: id -match-body: '' diff --git a/grammars/utf8-id.bp b/grammars/utf8-id.bp new file mode 100644 index 0000000..26e98ba --- /dev/null +++ b/grammars/utf8-id.bp @@ -0,0 +1,735 @@ +# Definitions of UTF8-compliant identifiers +utf8-id: utf8-id-start *utf8-id-cont + +utf8-id-start: `A-Z / `a-z / !\x00-x7F ( + \xc2 (\xaa / \xb5 / \xba) +/ \xc3 (\x80-x96 / \x98-xb6 / \xb8-xbf) +/ \xc4-xca\x80-xbf +/ \xcb (\x80-x81 / \x86-x91 / \xa0-xa4 / \xac / \xae) +/ \xcd (\xb0-xb4 / \xb6-xb7 / \xba-xbd / \xbf) +/ \xce (\x86 / \x88-x8a / \x8c / \x8e-xa1 / \xa3-xbf) +/ \xcf (\x80-xb5 / \xb7-xbf) +/ \xd0-xd2\x80-xbf +/ \xd2 (\x80-x81 / \x8a-xbf) +/ \xd3\x80-xbf +/ \xd4 (\x80-xaf / \xb1-xbf) +/ \xd5 (\x80-x96 / \x99 / \xa0-xbf) +/ \xd6\x80-x88 +/ \xd7 (\x90-xaa / \xaf-xb2) +/ \xd8\xa0-xbf +/ \xd9 (\x80-x8a / \xae-xaf / \xb1-xbf) +/ \xda\x80-xbf +/ \xdb (\x80-x93 / \x95 / \xa5-xa6 / \xae-xaf / \xba-xbc / \xbf) +/ \xdc (\x90 / \x92-xaf) +/ \xdd\x8d-xbf +/ \xde (\x80-xa5 / \xb1) +/ \xdf (\x8a-xaa / \xb4-xb5 / \xba) +/ \xe0 ( + \xa0 (\x80-x95 / \x9a / \xa4 / \xa8) + / \xa1 (\x80-x98 / \xa0-xaa) + / \xa2 (\xa0-xb4 / \xb6-xbd) + / \xa4 (\x84-xb9 / \xbd) + / \xa5 (\x90 / \x98-xa1 / \xb1-xbf) + / \xa6 (\x80 / \x85-x8c / \x8f-x90 / \x93-xa8 / \xaa-xb0 / \xb2 / \xb6-xb9 / \xbd) + / \xa7 (\x8e / \x9c-x9d / \x9f-xa1 / \xb0-xb1 / \xbc) + / \xa8 (\x85-x8a / \x8f-x90 / \x93-xa8 / \xaa-xb0 / \xb2-xb3 / \xb5-xb6 / \xb8-xb9) + / \xa9 (\x99-x9c / \x9e / \xb2-xb4) + / \xaa (\x85-x8d / \x8f-x91 / \x93-xa8 / \xaa-xb0 / \xb2-xb3 / \xb5-xb9 / \xbd) + / \xab (\x90 / \xa0-xa1 / \xb9) + / \xac (\x85-x8c / \x8f-x90 / \x93-xa8 / \xaa-xb0 / \xb2-xb3 / \xb5-xb9 / \xbd) + / \xad (\x9c-x9d / \x9f-xa1 / \xb1) + / \xae (\x83 / \x85-x8a / \x8e-x90 / \x92-x95 / \x99-x9a / \x9c / \x9e-x9f / \xa3-xa4 / \xa8-xaa / \xae-xb9) / \xaf\x90 + / \xb0 (\x85-x8c / \x8e-x90 / \x92-xa8 / \xaa-xb9 / \xbd) + / \xb1 (\x98-x9a / \xa0-xa1) + / \xb2 (\x80 / \x85-x8c / \x8e-x90 / \x92-xa8 / \xaa-xb3 / \xb5-xb9 / \xbd) + / \xb3 (\x9e / \xa0-xa1 / \xb1-xb2) + / \xb4 (\x85-x8c / \x8e-x90 / \x92-xba / \xbd) + / \xb5 (\x8e / \x94-x96 / \x9f-xa1 / \xba-xbf) + / \xb6 (\x85-x96 / \x9a-xb1 / \xb3-xbb / \xbd) + / \xb7\x80-x86 + / \xb8 (\x81-xb0 / \xb2-xb3) + / \xb9 (\x80-x85 / \x86) + / \xba (\x81-x82 / \x84 / \x86-x8a / \x8c-xa3 / \xa5 / \xa7-xb0 / \xb2-xb3 / \xbd) + / \xbb (\x80-x84 / \x86 / \x9c-x9f) + / \xbc\x80 + / \xbd (\x80-x87 / \x89-xac) + / \xbe\x88-x8c +) +/ \xe1 ( + \x80 (\x80-xaa / \xbf) + / \x81 (\x90-x95 / \x9a-x9d / \xa1 / \xa5-xa6 / \xae-xb0 / \xb5-xbf) + / \x82 (\x80-x81 / \x8e / \xa0-xbf) + / \x83 (\x80-x85 / \x87 / \x8d / \x90-xba / \xbc / \xbd-xbf) + / \x84-x88\x80-xbf + / \x89 (\x80-x88 / \x8a-x8d / \x90-x96 / \x98 / \x9a-x9d / \xa0-xbf) + / \x8a (\x80-x88 / \x8a-x8d / \x90-xb0 / \xb2-xb5 / \xb8-xbe) + / \x8b (\x80 / \x82-x85 / \x88-x96 / \x98-xbf) + / \x8c (\x80-x90 / \x92-x95 / \x98-xbf) + / \x8d\x80-x9a + / \x8e (\x80-x8f / \xa0-xbf) + / \x8f (\x80-xb5 / \xb8-xbd) + / \x90\x81-xbf + / \x91-x98\x80-xbf + / \x99 (\x80-xac / \xaf-xbf) + / \x9a (\x81-x9a / \xa0-xbf) + / \x9b (\x80-xaa / \xae-xb0 / \xb1-xb8) + / \x9c (\x80-x8c / \x8e-x91 / \xa0-xb1) + / \x9d (\x80-x91 / \xa0-xac / \xae-xb0) + / \x9e\x80-xb3 + / \x9f (\x97 / \x9c) + / \xa0\xa0-xbf + / \xa1\x80-xb8 + / \xa2 (\x80-xa8 / \xaa / \xb0-xbf) + / \xa3\x80-xb5 + / \xa4\x80-x9e + / \xa5 (\x90-xad / \xb0-xb4) + / \xa6 (\x80-xab / \xb0-xbf) + / \xa7\x80-x89 + / \xa8 (\x80-x96 / \xa0-xbf) + / \xa9\x80-x94 + / \xaa\xa7 + / \xac\x85-xb3 + / \xad\x85-x8b + / \xae (\x83-xa0 / \xae-xaf / \xba-xbf) + / \xaf\x80-xa5 + / \xb0\x80-xa3 + / \xb1 (\x8d-x8f / \x9a-xbd) + / \xb2 (\x80-x88 / \x90-xba / \xbd-xbf) + / \xb3 (\xa9-xac / \xae-xb3 / \xb5-xb6 / \xba) + / \xb4 (\x80-xab / \xac-xbf) + / \xb5-xbb\x80-xbf + / \xbc (\x80-x95 / \x98-x9d / \xa0-xbf) + / \xbd (\x80-x85 / \x88-x8d / \x90-x97 / \x99 / \x9b / \x9d / \x9f-xbd) + / \xbe (\x80-xb4 / \xb6-xbc / \xbe) + / \xbf (\x82-x84 / \x86-x8c / \x90-x93 / \x96-x9b / \xa0-xac / \xb2-xb4 / \xb6-xbc) +) +/ \xe2 ( + \x81 (\xb1 / \xbf) + / \x82\x90-x9c + / \x84 (\x82 / \x87 / \x8a-x93 / \x95 / \x98-x9d / \xa4 / \xa6 / \xa8 / \xaa-xb9 / \xbc-xbf) + / \x85 (\x85-x89 / \x8e / \xa0-xbf) + / \x86\x80-x88 + / \xb0 (\x80-xae / \xb0-xbf) + / \xb1 (\x80-x9e / \xa0-xbf) + / \xb2\x80-xbf + / \xb3 (\x80-xa4 / \xab-xae / \xb2-xb3) + / \xb4 (\x80-xa5 / \xa7 / \xad / \xb0-xbf) + / \xb5 (\x80-xa7 / \xaf) + / \xb6 (\x80-x96 / \xa0-xa6 / \xa8-xae / \xb0-xb6 / \xb8-xbe) + / \xb7 (\x80-x86 / \x88-x8e / \x90-x96 / \x98-x9e) +) +/ \xe3 ( + \x80 (\x85-x87 / \xa1-xa9 / \xb1-xb5 / \xb8-xba / \xbb-xbc) + / \x81\x81-xbf + / \x82 (\x80-x96 / \x9b-x9f / \xa1-xbf) + / \x83 (\x80-xba / \xbc-xbe / \xbf) + / \x84 (\x85-xaf / \xb1-xbf) + / \x85\x80-xbf + / \x86 (\x80-x8e / \xa0-xba) + / \x87\xb0-xbf + / \x90-xbf\x80-xbf +) +/ \xe4 (\x80-xb5\x80-xbf / \xb6\x80-xb5 / \xb8-xbf\x80-xbf) +/ \xe5-xe8\x80-xbf\x80-xbf +/ \xe9 (\x80-xbe\x80-xbf / \xbf\x80-xaf) +/ \xea ( + \x80-x91\x80-xbf + / \x92\x80-x8c + / \x93\x90-xbd + / \x94-x97\x80-xbf + / \x98 (\x80-x8c / \x90-x9f / \xaa-xab) + / \x99 (\x80-xae / \xbf) + / \x9a (\x80-x9d / \xa0-xbf) + / \x9b\x80-xaf + / \x9c (\x97-x9f / \xa2-xbf) + / \x9d\x80-xbf + / \x9e (\x80-x88 / \x8b-xbf) + / \x9f (\x82-x86 / \xb7-xbf) + / \xa0 (\x80-x81 / \x83-x85 / \x87-x8a / \x8c-xa2) + / \xa1\x80-xb3 + / \xa2\x82-xb3 + / \xa3 (\xb2-xb7 / \xbb / \xbd-xbe) + / \xa4 (\x8a-xa5 / \xb0-xbf) + / \xa5 (\x80-x86 / \xa0-xbc) + / \xa6\x84-xb2 + / \xa7 (\x8f / \xa0-xa4 / \xa6 / \xa7-xaf / \xba-xbe) + / \xa8\x80-xa8 + / \xa9 (\x80-x82 / \x84-x8b / \xa0-xb6 / \xba / \xbe-xbf) + / \xaa (\x80-xaf / \xb1 / \xb5-xb6 / \xb9-xbd) + / \xab (\x80 / \x82 / \x9b-x9d / \xa0-xaa / \xb2-xb4) + / \xac (\x81-x86 / \x89-x8e / \x91-x96 / \xa0-xa6 / \xa8-xae / \xb0-xbf) + / \xad (\x80-x9a / \x9c-x9f / \xa0-xa7 / \xb0-xbf) + / \xae\x80-xbf + / \xaf\x80-xa2 + / \xb0-xbf\x80-xbf +) +/ \xeb-xec\x80-xbf\x80-xbf +/ \xed ( + \x80-x9d\x80-xbf + / \x9e (\x80-xa3 / \xb0-xbf) + / \x9f (\x80-x86 / \x8b-xbb) +) +/ \xef ( + \xa4-xa8\x80-xbf + / \xa9 (\x80-xad / \xb0-xbf) + / \xaa\x80-xbf + / \xab\x80-x99 + / \xac (\x80-x86 / \x93-x97 / \x9d / \x9f-xa8 / \xaa-xb6 / \xb8-xbc / \xbe) + / \xad (\x80-x81 / \x83-x84 / \x86-xbf) + / \xae\x80-xb1 + / \xaf\x93-xbf + / \xb0-b3\x80-xbf + / \xb4\x80-xbd + / \xb5\x90-xbf + / \xb6 (\x80-x8f / \x92-xbf) + / \xb7 (\x80-x87 / \xb0-xbb) + / \xb9 (\xb0-xb4 / \xb6-xbf) + / \xba\x80-xbf + / \xbb\x80-xbc + / \xbc\xa1-xba + / \xbd (\x81-x9a / \xa6-xaf / \xb0-xbf) + / \xbe\x80-xbe + / \xbf (\x82-x87 / \x8a-x8f / \x92-x97 / \x9a-x9c) +) +/ \xf0 ( + \x90 ( + \x80 (\x80-x8b / \x8d-xa6 / \xa8-xba / \xbc-xbd / \xbf) + / \x81 (\x80-x8d / \x90-x9d) + / \x82\x80-xbf + / \x83\x80-xba + / \x85\x80-xb4 + / \x8a (\x80-x9c / \xa0-xbf) + / \x8b\x80-x90 + / \x8c (\x80-x9f / \xad-xbf) + / \x8d (\x80-x8a / \x90-xb5) + / \x8e (\x80-x9d / \xa0-xbf) + / \x8f (\x80-x83 / \x88-x8f / \x91-x95) + / \x90-x91\x80-xbf + / \x92 (\x80-x9d / \xb0-xbf) + / \x93 (\x80-x93 / \x98-xbb) + / \x94 (\x80-xa7 / \xb0-xbf) + / \x95\x80-xa3 + / \x98-x9b\x80-xbf + / \x9c\x80-xb6 + / \x9d (\x80-x95 / \xa0-xa7) + / \xa0 (\x80-x85 / \x88 / \x8a-xb5 / \xb7-xb8 / \xbc / \xbf) + / \xa1 (\x80-x95 / \xa0-xb6) + / \xa2\x80-x9e + / \xa3 (\xa0-xb2 / \xb4-xb5) + / \xa4 (\x80-x95 / \xa0-xb9) + / \xa6 (\x80-xb7 / \xbe-xbf) + / \xa8 (\x80 / \x90-x93 / \x95-x97 / \x99-xb5) + / \xa9\xa0-xbc + / \xaa\x80-x9c + / \xab (\x80-x87 / \x89-xa4) + / \xac\x80-xb5 + / \xad (\x80-x95 / \xa0-xb2) + / \xae\x80-x91 + / \xb0\x80-xbf + / \xb1\x80-x88 + / \xb2-xb3\x80-xb2 + / \xb4\x80-xa3 + / \xbc (\x80-x9c / \xa7 / \xb0-xbf) + / \xbd\x80-x85 + / \xbf\xa0-xb6 + ) + / \x91 ( + \x80\x83-xb7 + / \x82\x83-xaf + / \x83\x90-xa8 + / \x84\x83-xa6 + / \x85 (\x84 / \x90-xb2 / \xb6) + / \x86\x83-xb2 + / \x87 (\x81-x84 / \x9a / \x9c) + / \x88 (\x80-x91 / \x93-xab) + / \x8a (\x80-x86 / \x88 / \x8a-x8d / \x8f-x9d / \x9f-xa8 / \xb0-xbf) + / \x8b\x80-x9e + / \x8c (\x85-x8c / \x8f-x90 / \x93-xa8 / \xaa-xb0 / \xb2-xb3 / \xb5-xb9 / \xbd) + / \x8d (\x90 / \x9d-xa1) + / \x90\x80-xb4 + / \x91 (\x87-x8a / \x9f) + / \x92\x80-xaf + / \x93 (\x84-x85 / \x87) + / \x96\x80-xae + / \x97\x98-x9b + / \x98\x80-xaf + / \x99\x84 + / \x9a (\x80-xaa / \xb8) + / \x9c\x80-x9a + / \xa0\x80-xab + / \xa2\xa0-xbf + / \xa3 (\x80-x9f / \xbf) + / \xa6 (\xa0-xa7 / \xaa-xbf) + / \xa7 (\x80-x90 / \xa1 / \xa3) + / \xa8 (\x80 / \x8b-xb2 / \xba) + / \xa9 (\x90 / \x9c-xbf) + / \xaa (\x80-x89 / \x9d) + / \xab\x80-xb8 + / \xb0 (\x80-x88 / \x8a-xae) + / \xb1 (\x80 / \xb2-xbf) + / \xb2\x80-x8f + / \xb4 (\x80-x86 / \x88-x89 / \x8b-xb0) + / \xb5 (\x86 / \xa0-xa5 / \xa7-xa8 / \xaa-xbf) + / \xb6 (\x80-x89 / \x98) + / \xbb\xa0-xb2 + ) + / \x92 (\x80-x8d\x80-xbf / \x8e\x80-x99 / \x90\x80-xbf / \x91\x80-xae / \x92-x94\x80-xbf / \x95\x80-x83) + / \x93 (\x80-x8f\x80-xbf / \x90\x80-xae) + / \x94 (\x90-x98\x80-xbf / \x99\x80-x86) + / \x96 ( + \xa0-xa7\x80-xbf + / \xa8\x80-xb8 + / \xa9\x80-x9e + / \xab\x90-xad + / \xac\x80-xaf + / \xad (\x80-x83 / \xa3-xb7 / \xbd-xbf) + / \xae\x80-x8f + / \xb9-xbc\x80-xbf + / \xbd (\x80-x8a / \x90) + / \xbe\x93-x9f + / \xbf (\xa0-xa1 / \xa3) + ) + / \x97\x80-xbf\x80-xbf + / \x98 (\x80-x9e\x80-xbf / \x9f\x80-xb7 / \xa0-xaa\x80-xbf / \xab\x80-xb2) + / \x9b ( + \x80-x83\x80-xbf + / \x84\x80-x9e + / \x85 (\x90-x92 / \xa4-xa7 / \xb0-xbf) + / \x86-x8a\x80-xbf + / \x8b\x80-xbb + / \xb0\x80-xbf + / \xb1 (\x80-xaa / \xb0-xbc) + / \xb2 (\x80-x88 / \x90-x99) + ) + / \x9d ( + \x90\x80-xbf + / \x91 (\x80-x94 / \x96-xbf) + / \x92 (\x80-x9c / \x9e-x9f / \xa2 / \xa5-xa6 / \xa9-xac / \xae-xb9 / \xbb / \xbd-xbf) + / \x93 (\x80-x83 / \x85-xbf) + / \x94 (\x80-x85 / \x87-x8a / \x8d-x94 / \x96-x9c / \x9e-xb9 / \xbb-xbe) + / \x95 (\x80-x84 / \x86 / \x8a-x90 / \x92-xbf) + / \x96-x99\x80-xbf + / \x9a (\x80-xa5 / \xa8-xbf) + / \x9b (\x80 / \x82-x9a / \x9c-xba / \xbc-xbf) + / \x9c (\x80-x94 / \x96-xb4 / \xb6-xbf) + / \x9d (\x80-x8e / \x90-xae / \xb0-xbf) + / \x9e (\x80-x88 / \x8a-xa8 / \xaa-xbf) + / \x9f (\x80-x82 / \x84-x8b) + ) + / \x9e ( + \x84 (\x80-xac / \xb7-xbd) + / \x85\x8e + / \x8b\x80-xab + / \xa0-xa2\x80-xbf + / \xa3\x80-x84 + / \xa4\x80-xbf + / \xa5 (\x80-x83 / \x8b) + / \xb8 (\x80-x83 / \x85-x9f / \xa1-xa2 / \xa4 / \xa7 / \xa9-xb2 / \xb4-xb7 / \xb9 / \xbb) + / \xb9 (\x82 / \x87 / \x89 / \x8b / \x8d-x8f / \x91-x92 / \x94 / \x97 / \x99 / \x9b / \x9d / \x9f / \xa1-xa2 / \xa4 / \xa7-xaa / \xac-xb2 / \xb4-xb7 / \xb9-xbc / \xbe) + / \xba (\x80-x89 / \x8b-x9b / \xa1-xa3 / \xa5-xa9 / \xab-xbb) + ) + / \xa0-xa9\x80-xbf\x80-xbf + / \xaa (\x80-x9a\x80-xbf / \x9b\x80-x96 / \x9c-xbf\x80-xbf) + / \xab ( + \x80-x9b\x80-xbf + / \x9c\x80-xb4 + / \x9d-x9f\x80-xbf + / \xa0 (\x80-x9d / \xa0-xbf) + / \xa1-xbf\x80-xbf + ) + / \xac ( + \x80-xb9\x80-xbf + / \xba (\x80-xa1 / \xb0-xbf) + / \xbb-xbf\x80-xbf + ) + / \xad\x80-xbf\x80-xbf + / \xae (\x80-xae\x80-xbf / \xaf\x80-xa0) + / \xaf (\xa0-xa7\x80-xbf / \xa8\x80-x9d) +) +) + +utf8-id-cont: `0-9 / `A-Z / `_ / `a-z / !\x00-x7F ( + \xc2 (\xaa / \xb5 / \xb7 / \xba) +/ \xc3 (\x80-x96 / \x98-xb6 / \xb8-xbf) +/ \xc4-xca\x80-xbf +/ \xcb (\x80-x81 / \x86-x91 / \xa0-xa4 / \xac / \xae) +/ \xcc\x80-xbf +/ \xcd (\x80-xb4 / \xb6-xb7 / \xba-xbd / \xbf) +/ \xce (\x86-x8a / \x8c / \x8e-xa1 / \xa3-xbf) +/ \xcf (\x80-xb5 / \xb7-xbf) +/ \xd0-xd1\x80-xbf +/ \xd2 (\x80-x81 / \x83-x87 / \x8a-xbf) +/ \xd3\x80-xbf +/ \xd4 (\x80-xaf / \xb1-xbf) +/ \xd5 (\x80-x96 / \x99 / \xa0-xbf) +/ \xd6 (\x80-x88 / \x91-xbd / \xbf) +/ \xd7 (\x81-x82 / \x84-x85 / \x87 / \x90-xaa / \xaf-xb2) +/ \xd8 (\x90-x9a / \xa0-xbf) +/ \xd9 (\x80-xa9 / \xae-xbf) +/ \xda\x80-xbf +/ \xdb (\x80-x93 / \x95-x9c / \x9f-xa8 / \xaa-xbc / \xbf) +/ \xdc\x90-xbf +/ \xdd (\x80-x8a / \x8d-xbf) +/ \xde\x80-xb1 +/ \xdf (\x80-xb5 / \xba / \xbd) +/ \xe0 ( + \xa0\x80-xad + / \xa1 (\x80-x9b / \xa0-xaa) + / \xa2 (\xa0-xb4 / \xb6-xbd) + / \xa3 (\x93-xa1 / \xa3-xbf) + / \xa4\x80-xbf + / \xa5 (\x80-xa3 / \xa6-xaf / \xb1-xbf) + / \xa6 (\x80-x83 / \x85-x8c / \x8f-x90 / \x93-xa8 / \xaa-xb0 / \xb2 / \xb6-xb9 / \xbc-xbf) + / \xa7 (\x80-x84 / \x87-x88 / \x8b-x8e / \x97 / \x9c-x9d / \x9f-xa3 / \xa6-xb1 / \xbc / \xbe) + / \xa8 (\x81-x83 / \x85-x8a / \x8f-x90 / \x93-xa8 / \xaa-xb0 / \xb2-xb3 / \xb5-xb6 / \xb8-xb9 / \xbc / \xbe-xbf) + / \xa9 (\x80-x82 / \x87-x88 / \x8b-x8d / \x91 / \x99-x9c / \x9e / \xa6-xb5) + / \xaa (\x81-x83 / \x85-x8d / \x8f-x91 / \x93-xa8 / \xaa-xb0 / \xb2-xb3 / \xb5-xb9 / \xbc-xbf) + / \xab (\x80-x85 / \x87-x89 / \x8b-x8d / \x90 / \xa0-xa3 / \xa6-xaf / \xb9-xbf) + / \xac (\x81-x83 / \x85-x8c / \x8f-x90 / \x93-xa8 / \xaa-xb0 / \xb2-xb3 / \xb5-xb9 / \xbc-xbf) + / \xad (\x80-x84 / \x87-x88 / \x8b-x8d / \x96-x97 / \x9c-x9d / \x9f-xa3 / \xa6-xaf / \xb1) + / \xae (\x82-x83 / \x85-x8a / \x8e-x90 / \x92-x95 / \x99-x9a / \x9c / \x9e-x9f / \xa3-xa4 / \xa8-xaa / \xae-xb9 / \xbe-xbf) + / \xaf (\x80-x82 / \x86-x88 / \x8a-x8d / \x90 / \x97 / \xa6-xaf) + / \xb0 (\x80-x8c / \x8e-x90 / \x92-xa8 / \xaa-xb9 / \xbd-xbf) + / \xb1 (\x80-x84 / \x86-x88 / \x8a-x8d / \x95-x96 / \x98-x9a / \xa0-xa3 / \xa6-xaf) + / \xb2 (\x80-x83 / \x85-x8c / \x8e-x90 / \x92-xa8 / \xaa-xb3 / \xb5-xb9 / \xbc-xbf) + / \xb3 (\x80-x84 / \x86-x88 / \x8a-x8d / \x95-x96 / \x9e / \xa0-xa3 / \xa6-xaf / \xb1-xb2) + / \xb4 (\x80-x83 / \x85-x8c / \x8e-x90 / \x92-xbf) + / \xb5 (\x80-x84 / \x86-x88 / \x8a-x8e / \x94-x97 / \x9f-xa3 / \xa6-xaf / \xba-xbf) + / \xb6 (\x82-x83 / \x85-x96 / \x9a-xb1 / \xb3-xbb / \xbd) + / \xb7 (\x80-x86 / \x8a / \x8f-x94 / \x96 / \x98-x9f / \xa6-xaf / \xb2-xb3) + / \xb8\x81-xba + / \xb9 (\x80-x8e / \x90-x99) + / \xba (\x81-x82 / \x84 / \x86-x8a / \x8c-xa3 / \xa5 / \xa7-xbd) + / \xbb (\x80-x84 / \x86 / \x88-x8d / \x90-x99 / \x9c-x9f) + / \xbc (\x80 / \x98-x99 / \xa0-xa9 / \xb5 / \xb7 / \xb9 / \xbe-xbf) + / \xbd (\x80-x87 / \x89-xac / \xb1-xbf) + / \xbe (\x80-x84 / \x86-x97 / \x99-xbc) + / \xbf\x86 +) +/ \xe1 ( + \x80\x80-xbf + / \x81 (\x80-x89 / \x90-xbf) + / \x82 (\x80-x9d / \xa0-xbf) + / \x83 (\x80-x85 / \x87 / \x8d / \x90-xba / \xbc-xbf) + / \x84-x88\x80-xbf + / \x89 (\x80-x88 / \x8a-x8d / \x90-x96 / \x98 / \x9a-x9d / \xa0-xbf) + / \x8a ( + \x80-x88 + / \x8a-x8d + / \x90-xb0 + / \xb2-xb5 + / \xb8-xbe + ) + / \x8b (\x80 / \x82-x85 / \x88-x96 / \x98-xbf) + / \x8c (\x80-x90 / \x92-x95 / \x98-xbf) + / \x8d (\x80-x9a / \x9d-x9f / \xa9-xb1) + / \x8e (\x80-x8f / \xa0-xbf) + / \x8f (\x80-xb5 / \xb8-xbd) + / \x90\x81-xbf + / \x91\x80-xbf + / \x99 (\x80-xac / \xaf-xbf) + / \x9a (\x81-x9a / \xa0-xbf) + / \x9b (\x80-xaa / \xae-xb8) + / \x9c (\x80-x8c / \x8e-x94 / \xa0-xb4) + / \x9d (\x80-x93 / \xa0-xac / \xae-xb0 / \xb2-xb3) + / \x9e\x80-xbf + / \x9f (\x80-x93 / \x97 / \x9c-x9d / \xa0-xa9) + / \xa0 (\x8b-x8d / \x90-x99 / \xa0-xbf) + / \xa1\x80-xb8 + / \xa2 (\x80-xaa / \xb0-xbf) + / \xa3\x80-xb5 + / \xa4 (\x80-x9e / \xa0-xab / \xb0-xbb) + / \xa5 (\x86-xad / \xb0-xb4) + / \xa6 (\x80-xab / \xb0-xbf) + / \xa7 (\x80-x89 / \x90-x9a) + / \xa8 (\x80-x9b / \xa0-xbf) + / \xa9 (\x80-x9e / \xa0-xbc / \xbf) + / \xaa (\x80-x89 / \x90-x99 / \xa7 / \xb0-xbd) + / \xac\x80-xbf + / \xad (\x80-x8b / \x90-x99 / \xab-xb3) + / \xae\x80-xbf + / \xaf\x80-xb3 + / \xb0\x80-xb7 + / \xb1 (\x80-x89 / \x8d-xbd) + / \xb2 (\x80-x88 / \x90-xba / \xbd-xbf) + / \xb3 (\x90-x92 / \x94-xba) + / \xb4-xb6\x80-xbf + / \xb7 (\x80-xb9 / \xbb-xbf) + / \xb8-xbb\x80-xbf + / \xbc (\x80-x95 / \x98-x9d / \xa0-xbf) + / \xbd (\x80-x85 / \x88-x8d / \x90-x97 / \x99 / \x9b / \x9d / \x9f-xbd) + / \xbe (\x80-xb4 / \xb6-xbc / \xbe) + / \xbf (\x82-x84 / \x86-x8c / \x90-x93 / \x96-x9b / \xa0-xac / \xb2-xb4 / \xb6-xbc) +) +/ \xe2 ( + \x80\xbf + / \x81 (\x80 / \x94 / \xb1 / \xbf) + / \x82\x90-x9c + / \x83 (\x90-x9c / \xa1 / \xa5-xb0) + / \x84 (\x82 / \x87 / \x8a-x93 / \x95 / \x98-x9d / \xa4 / \xa6 / \xa8 / \xaa-xb9 / \xbc-xbf) + / \x85 (\x85-x89 / \x8e / \xa0-xbf) + / \x86\x80-x88 + / \xb0 (\x80-xae / \xb0-xbf) + / \xb1 (\x80-x9e / \xa0-xbf) + / \xb2\x80-xbf + / \xb3 (\x80-xa4 / \xab-xb3) + / \xb4 (\x80-xa5 / \xa7 / \xad / \xb0-xbf) + / \xb5 (\x80-xa7 / \xaf / \xbf) + / \xb6 (\x80-x96 / \xa0-xa6 / \xa8-xae / \xb0-xb6 / \xb8-xbe) + / \xb7 (\x80-x86 / \x88-x8e / \x90-x96 / \x98-x9e / \xa0-xbf) +) +/ \xe3 ( + \x80 (\x85-x87 / \xa1-xaf / \xb1-xb5 / \xb8-xbc) + / \x81\x81-xbf + / \x82 (\x80-x96 / \x99-x9f / \xa1-xbf) + / \x83 (\x80-xba / \xbc-xbf) + / \x84 (\x85-xaf / \xb1-xbf) + / \x85\x80-xbf + / \x86 (\x80-x8e / \xa0-xba) + / \x87\xb0-xbf + / \x90-xbf\x80-xbf +) +/ \xe4 (\x80-xb5\x80-xbf / \xb6\x80-xb5 / \xb8-xbf\x80-xbf) +/ \xe5-xe8\x80-xbf\x80-xbf +/ \xe9 (\x80-xbe\x80-xbf / \xbf\x80-xaf) +/ \xea ( + \x80-x91\x80-xbf + / \x92\x80-x8c + / \x93\x90-xbd + / \x94-x97\x80-xbf + / \x98 (\x80-x8c / \x90-xab) + / \x99 (\x80-xaf / \xb4-xbd / \xbf) + / \x9a\x80-xbf + / \x9b\x80-xb1 + / \x9c (\x97-x9f / \xa2-xbf) + / \x9d\x80-xbf + / \x9e (\x80-x88 / \x8b-xbf) + / \x9f (\x82-x86 / \xb7-xbf) + / \xa0\x80-xa7 + / \xa1\x80-xb3 + / \xa2\x80-xbf + / \xa3 (\x80-x85 / \x90-x99 / \xa0-xb7 / \xbb / \xbd-xbf) + / \xa4 (\x80-xad / \xb0-xbf) + / \xa5 (\x80-x93 / \xa0-xbc) + / \xa6\x80-xbf + / \xa7 (\x80 / \x8f-x99 / \xa0-xbe) + / \xa8\x80-xb6 + / \xa9 (\x80-x8d / \x90-x99 / \xa0-xb6 / \xba-xbf) + / \xaa\x80-xbf + / \xab (\x80-x82 / \x9b-x9d / \xa0-xaf / \xb2-xb6) + / \xac (\x81-x86 / \x89-x8e / \x91-x96 / \xa0-xa6 / \xa8-xae / \xb0-xbf) + / \xad (\x80-x9a / \x9c-xa7 / \xb0-xbf) + / \xae\x80-xbf + / \xaf (\x80-xaa / \xac-xad / \xb0-xb9) + / \xb0-xbf\x80-xbf +) +/ \xeb\x80-xbf\x80-xbf +/ \xec\x80-xbf\x80-xbf +/ \xed ( + \x80-x9d\x80-xbf + / \x9e (\x80-xa3 / \xb0-xbf) + / \x9f (\x80-x86 / \x8b-xbb) +) +/ \xef ( + \xa4-xa8\x80-xbf + / \xa9 (\x80-xad / \xb0-xbf) + / \xaa\x80-xbf + / \xab\x80-x99 + / \xac (\x80-x86 / \x93-x97 / \x9d-xa8 / \xaa-xb6 / \xb8-xbc / \xbe) + / \xad (\x80-x81 / \x83-x84 / \x86-xbf) + / \xae\x80-xb1 + / \xaf\x93-xbf + / \xb0\x80-xbf + / \xb1 (\x80-x9d / \x80-xbf / \xa4-xbf) + / \xb2-xb3\x80-xbf + / \xb4\x80-xbd + / \xb5\x90-xbf + / \xb6 (\x80-x8f / \x92-xbf) + / \xb7 (\x80-x87 / \xb0-xb9 / \xb0-xbb) + / \xb8 (\x80-x8f / \xa0-xaf / \xb3-xb4) + / \xb9 (\x8d-x8f / \xb0-xb4 / \xb1 / \xb3 / \xb6-xbf / \xb7 / \xb9 / \xbb / \xbd / \xbf) + / \xba\x80-xbf + / \xbb\x80-xbc + / \xbc (\x90-x99 / \xa1-xba / \xbf) + / \xbd (\x81-x9a / \xa6-xbf) + / \xbe\x80-xbe + / \xbf (\x82-x87 / \x8a-x8f / \x92-x97 / \x9a-x9c) +) +/ \xf0 ( + \x90 ( + \x80 (\x80-x8b / \x8d-xa6 / \xa8-xba / \xbc-xbd / \xbf) + / \x81 (\x80-x8d / \x90-x9d) + / \x82\x80-xbf + / \x83\x80-xba + / \x85\x80-xb4 + / \x87\xbd + / \x8a (\x80-x9c / \xa0-xbf) + / \x8b (\x80-x90 / \xa0) + / \x8c (\x80-x9f / \xad-xbf) + / \x8d (\x80-x8a / \x90-xba) + / \x8e (\x80-x9d / \xa0-xbf) + / \x8f (\x80-x83 / \x88-x8f / \x91-x95) + / \x90-x91\x80-xbf + / \x92 (\x80-x9d / \xa0-xa9 / \xb0-xbf) + / \x93 (\x80-x93 / \x98-xbb) + / \x94 (\x80-xa7 / \xb0-xbf) + / \x95\x80-xa3 + / \x98-x9b\x80-xbf + / \x9c\x80-xb6 + / \x9d (\x80-x95 / \xa0-xa7) + / \xa0 (\x80-x85 / \x88 / \x8a-xb5 / \xb7-xb8 / \xbc / \xbf) + / \xa1 (\x80-x95 / \xa0-xb6) + / \xa2\x80-x9e + / \xa3 (\xa0-xb2 / \xb4-xb5) + / \xa4 (\x80-x95 / \xa0-xb9) + / \xa6 (\x80-xb7 / \xbe-xbf) + / \xa8 (\x80-x83 / \x85-x86 / \x8c-x93 / \x95-x97 / \x99-xb5 / \xb8-xba / \xbf) + / \xa9\xa0-xbc + / \xaa\x80-x9c + / \xab (\x80-x87 / \x89-xa6) + / \xac\x80-xb5 + / \xad (\x80-x95 / \xa0-xb2) + / \xae\x80-x91 + / \xb0\x80-xbf + / \xb1\x80-x88 + / \xb2\x80-xb2 + / \xb3\x80-xb2 + / \xb4 (\x80-xa7 / \xb0-xb9) + / \xbc (\x80-x9c / \xa7 / \xb0-xbf) + / \xbd\x80-x90 + / \xbf\xa0-xb6 + ) + / \x91 ( + \x80\x80-xbf + / \x81 (\x80-x86 / \xa6-xaf / \xbf) + / \x82\x80-xba + / \x83 (\x90-xa8 / \xb0-xb9) + / \x84 (\x80-xb4 / \xb6-xbf) + / \x85 (\x84-x86 / \x90-xb3 / \xb6) + / \x86\x80-xbf + / \x87 (\x80-x84 / \x89-x8c / \x90-x9a / \x9c) + / \x88 (\x80-x91 / \x93-xb7 / \xbe) + / \x8a (\x80-x86 / \x88 / \x8a-x8d / \x8f-x9d / \x9f-xa8 / \xb0-xbf) + / \x8b (\x80-xaa / \xb0-xb9) + / \x8c (\x80-x83 / \x85-x8c / \x8f-x90 / \x93-xa8 / \xaa-xb0 / \xb2-xb3 / \xb5-xb9 / \xbb-xbf) + / \x8d (\x80-x84 / \x87-x88 / \x8b-x8d / \x90 / \x97 / \x9d-xa3 / \xa6-xac / \xb0-xb4) + / \x90\x80-xbf + / \x91 (\x80-x8a / \x90-x99 / \x9e-x9f) + / \x92\x80-xbf + / \x93 (\x80-x85 / \x87 / \x90-x99) + / \x96 (\x80-xb5 / \xb8-xbf) + / \x97 (\x80 / \x98-x9d) + / \x98\x80-xbf + / \x99 (\x80 / \x84 / \x90-x99) + / \x9a\x80-xb8 + / \x9b\x80-x89 + / \x9c (\x80-x9a / \x9d-xab / \xb0-xb9) + / \xa0\x80-xba + / \xa2\xa0-xbf + / \xa3 (\x80-xa9 / \xbf) + / \xa6 (\xa0-xa7 / \xaa-xbf) + / \xa7 (\x80-x97 / \x9a-xa1 / \xa3-xa4) + / \xa8\x80-xbe + / \xa9 (\x87 / \x90-xbf) + / \xaa (\x80-x99 / \x9d) + / \xab\x80-xb8 + / \xb0 (\x80-x88 / \x8a-xb6 / \xb8-xbf) + / \xb1 (\x80 / \x90-x99 / \xb2-xbf) + / \xb2 (\x80-x8f / \x92-xa7 / \xa9-xb6) + / \xb4 (\x80-x86 / \x88-x89 / \x8b-xb6 / \xba / \xbc-xbd / \xbf) + / \xb5 (\x80-x87 / \x90-x99 / \xa0-xa5 / \xa7-xa8 / \xaa-xbf) + / \xb6 (\x80-x8e / \x90-x91 / \x93-x98 / \xa0-xa9) + / \xbb\xa0-xb6 + ) + / \x92 (\x80-x8d\x80-xbf / \x8e\x80-x99 / \x90\x80-xbf / \x91\x80-xae / \x92-x94\x80-xbf / \x95\x80-x83) + / \x93 (\x80-x8f\x80-xbf / \x90\x80-xae) + / \x94 (\x90-x98\x80-xbf / \x99\x80-x86) + / \x96 ( + \xa0-xa7\x80-xbf + / \xa8\x80-xb8 + / \xa9 (\x80-x9e / \xa0-xa9) + / \xab (\x90-xad / \xb0-xb4) + / \xac\x80-xb6 + / \xad (\x80-x83 / \x90-x99 / \xa3-xb7 / \xbd-xbf) + / \xae\x80-x8f + / \xb9\x80-xbf + / \xbc\x80-xbf + / \xbd (\x80-x8a / \x8f-xbf) + / \xbe (\x80-x87 / \x8f-x9f) + / \xbf (\xa0-xa1 / \xa3) + ) + / \x97\x80-xbf\x80-xbf + / \x98 (\x80-x9e\x80-xbf / \x9f\x80-xb7 / \xa0-xaa\x80-xbf / \xab\x80-xb2) + / \x9b ( + \x80-x83\x80-xbf + / \x84\x80-x9e + / \x85 (\x90-x92 / \xa4-xa7 / \xb0-xbf) + / \x86-x8a\x80-xbf + / \x8b\x80-xbb + / \xb0\x80-xbf + / \xb1 (\x80-xaa / \xb0-xbc) + / \xb2 (\x80-x88 / \x90-x99 / \x9d-x9e) + ) + / \x9d ( + \x85 (\xa5-xa9 / \xad-xb2 / \xbb-xbf) + / \x86 (\x80-x82 / \x85-x8b / \xaa-xad) + / \x89\x82-x84 + / \x90\x80-xbf + / \x91 (\x80-x94 / \x96-xbf) + / \x92 (\x80-x9c / \x9e-x9f / \xa2 / \xa5-xa6 / \xa9-xac / \xae-xb9 / \xbb / \xbd-xbf) + / \x93 (\x80-x83 / \x85-xbf) + / \x94 (\x80-x85 / \x87-x8a / \x8d-x94 / \x96-x9c / \x9e-xb9 / \xbb-xbe) + / \x95 (\x80-x84 / \x86 / \x8a-x90 / \x92-xbf) + / \x96-x99\x80-xbf + / \x9a (\x80-xa5 / \xa8-xbf) + / \x9b (\x80 / \x82-x9a / \x9c-xba / \xbc-xbf) + / \x9c (\x80-x94 / \x96-xb4 / \xb6-xbf) + / \x9d (\x80-x8e / \x90-xae / \xb0-xbf) + / \x9e (\x80-x88 / \x8a-xa8 / \xaa-xbf) + / \x9f (\x80-x82 / \x84-x8b / \x8e-xbf) + / \xa8 (\x80-xb6 / \xbb-xbf) + / \xa9 (\x80-xac / \xb5) + / \xaa (\x84 / \x9b-x9f / \xa1-xaf) + ) + / \x9e ( + \x80 (\x80-x86 / \x88-x98 / \x9b-xa1 / \xa3-xa4 / \xa6-xaa) + / \x84 (\x80-xac / \xb0-xbd) + / \x85 (\x80-x89 / \x8e) + / \x8b\x80-xb9 + / \xa0-xa2\x80-xbf + / \xa3 (\x80-x84 / \x90-x96) + / \xa4\x80-xbf + / \xa5 (\x80-x8b / \x90-x99) + / \xb8 (\x80-x83 / \x85-x9f / \xa1-xa2 / \xa4 / \xa7 / \xa9-xb2 / \xb4-xb7 / \xb9 / \xbb) + / \xb9 (\x82 / \x87 / \x89 / \x8b / \x8d-x8f / \x91-x92 / \x94 / \x97 / \x99 / \x9b / \x9d / \x9f / \xa1-xa2 / \xa4 / \xa7-xaa / \xac-xb2 / \xb4-xb7 / \xb9-xbc / \xbe) + / \xba (\x80-x89 / \x8b-x9b / \xa1-xa3 / \xa5-xa9 / \xab-xbb) + ) + / \xa0\x80-xbf\x80-xbf + / \xa1\x80-xbf\x80-xbf + / \xa2\x80-xbf\x80-xbf + / \xa3\x80-xbf\x80-xbf + / \xa4\x80-xbf\x80-xbf + / \xa5\x80-xbf\x80-xbf + / \xa6\x80-xbf\x80-xbf + / \xa7\x80-xbf\x80-xbf + / \xa8\x80-xbf\x80-xbf + / \xa9\x80-xbf\x80-xbf + / \xaa (\x80-x9a\x80-xbf / \x9b\x80-x96 / \x9c-xbf\x80-xbf) + / \xab ( + \x80-x9b\x80-xbf + / \x9c\x80-xb4 + / \x9d-x9f\x80-xbf + / \xa0 (\x80-x9d / \xa0-xbf) + / \xa1-xbf\x80-xbf + ) + / \xac ( + \x80-xb9\x80-xbf + / \xba (\x80-xa1 / \xb0-xbf) + / \xbb-xbf\x80-xbf + ) + / \xad\x80-xbf\x80-xbf + / \xae (\x80-xae\x80-xbf / \xaf\x80-xa0) + / \xaf (\xa0-xa7\x80-xbf / \xa8\x80-x9d) +) +/ \xf3\xa0 (\x84-x86\x80-xbf / \x87\x80-xaf) +) diff --git a/grammars/utf8-id.bpeg b/grammars/utf8-id.bpeg deleted file mode 100644 index 26e98ba..0000000 --- a/grammars/utf8-id.bpeg +++ /dev/null @@ -1,735 +0,0 @@ -# Definitions of UTF8-compliant identifiers -utf8-id: utf8-id-start *utf8-id-cont - -utf8-id-start: `A-Z / `a-z / !\x00-x7F ( - \xc2 (\xaa / \xb5 / \xba) -/ \xc3 (\x80-x96 / \x98-xb6 / \xb8-xbf) -/ \xc4-xca\x80-xbf -/ \xcb (\x80-x81 / \x86-x91 / \xa0-xa4 / \xac / \xae) -/ \xcd (\xb0-xb4 / \xb6-xb7 / \xba-xbd / \xbf) -/ \xce (\x86 / \x88-x8a / \x8c / \x8e-xa1 / \xa3-xbf) -/ \xcf (\x80-xb5 / \xb7-xbf) -/ \xd0-xd2\x80-xbf -/ \xd2 (\x80-x81 / \x8a-xbf) -/ \xd3\x80-xbf -/ \xd4 (\x80-xaf / \xb1-xbf) -/ \xd5 (\x80-x96 / \x99 / \xa0-xbf) -/ \xd6\x80-x88 -/ \xd7 (\x90-xaa / \xaf-xb2) -/ \xd8\xa0-xbf -/ \xd9 (\x80-x8a / \xae-xaf / \xb1-xbf) -/ \xda\x80-xbf -/ \xdb (\x80-x93 / \x95 / \xa5-xa6 / \xae-xaf / \xba-xbc / \xbf) -/ \xdc (\x90 / \x92-xaf) -/ \xdd\x8d-xbf -/ \xde (\x80-xa5 / \xb1) -/ \xdf (\x8a-xaa / \xb4-xb5 / \xba) -/ \xe0 ( - \xa0 (\x80-x95 / \x9a / \xa4 / \xa8) - / \xa1 (\x80-x98 / \xa0-xaa) - / \xa2 (\xa0-xb4 / \xb6-xbd) - / \xa4 (\x84-xb9 / \xbd) - / \xa5 (\x90 / \x98-xa1 / \xb1-xbf) - / \xa6 (\x80 / \x85-x8c / \x8f-x90 / \x93-xa8 / \xaa-xb0 / \xb2 / \xb6-xb9 / \xbd) - / \xa7 (\x8e / \x9c-x9d / \x9f-xa1 / \xb0-xb1 / \xbc) - / \xa8 (\x85-x8a / \x8f-x90 / \x93-xa8 / \xaa-xb0 / \xb2-xb3 / \xb5-xb6 / \xb8-xb9) - / \xa9 (\x99-x9c / \x9e / \xb2-xb4) - / \xaa (\x85-x8d / \x8f-x91 / \x93-xa8 / \xaa-xb0 / \xb2-xb3 / \xb5-xb9 / \xbd) - / \xab (\x90 / \xa0-xa1 / \xb9) - / \xac (\x85-x8c / \x8f-x90 / \x93-xa8 / \xaa-xb0 / \xb2-xb3 / \xb5-xb9 / \xbd) - / \xad (\x9c-x9d / \x9f-xa1 / \xb1) - / \xae (\x83 / \x85-x8a / \x8e-x90 / \x92-x95 / \x99-x9a / \x9c / \x9e-x9f / \xa3-xa4 / \xa8-xaa / \xae-xb9) / \xaf\x90 - / \xb0 (\x85-x8c / \x8e-x90 / \x92-xa8 / \xaa-xb9 / \xbd) - / \xb1 (\x98-x9a / \xa0-xa1) - / \xb2 (\x80 / \x85-x8c / \x8e-x90 / \x92-xa8 / \xaa-xb3 / \xb5-xb9 / \xbd) - / \xb3 (\x9e / \xa0-xa1 / \xb1-xb2) - / \xb4 (\x85-x8c / \x8e-x90 / \x92-xba / \xbd) - / \xb5 (\x8e / \x94-x96 / \x9f-xa1 / \xba-xbf) - / \xb6 (\x85-x96 / \x9a-xb1 / \xb3-xbb / \xbd) - / \xb7\x80-x86 - / \xb8 (\x81-xb0 / \xb2-xb3) - / \xb9 (\x80-x85 / \x86) - / \xba (\x81-x82 / \x84 / \x86-x8a / \x8c-xa3 / \xa5 / \xa7-xb0 / \xb2-xb3 / \xbd) - / \xbb (\x80-x84 / \x86 / \x9c-x9f) - / \xbc\x80 - / \xbd (\x80-x87 / \x89-xac) - / \xbe\x88-x8c -) -/ \xe1 ( - \x80 (\x80-xaa / \xbf) - / \x81 (\x90-x95 / \x9a-x9d / \xa1 / \xa5-xa6 / \xae-xb0 / \xb5-xbf) - / \x82 (\x80-x81 / \x8e / \xa0-xbf) - / \x83 (\x80-x85 / \x87 / \x8d / \x90-xba / \xbc / \xbd-xbf) - / \x84-x88\x80-xbf - / \x89 (\x80-x88 / \x8a-x8d / \x90-x96 / \x98 / \x9a-x9d / \xa0-xbf) - / \x8a (\x80-x88 / \x8a-x8d / \x90-xb0 / \xb2-xb5 / \xb8-xbe) - / \x8b (\x80 / \x82-x85 / \x88-x96 / \x98-xbf) - / \x8c (\x80-x90 / \x92-x95 / \x98-xbf) - / \x8d\x80-x9a - / \x8e (\x80-x8f / \xa0-xbf) - / \x8f (\x80-xb5 / \xb8-xbd) - / \x90\x81-xbf - / \x91-x98\x80-xbf - / \x99 (\x80-xac / \xaf-xbf) - / \x9a (\x81-x9a / \xa0-xbf) - / \x9b (\x80-xaa / \xae-xb0 / \xb1-xb8) - / \x9c (\x80-x8c / \x8e-x91 / \xa0-xb1) - / \x9d (\x80-x91 / \xa0-xac / \xae-xb0) - / \x9e\x80-xb3 - / \x9f (\x97 / \x9c) - / \xa0\xa0-xbf - / \xa1\x80-xb8 - / \xa2 (\x80-xa8 / \xaa / \xb0-xbf) - / \xa3\x80-xb5 - / \xa4\x80-x9e - / \xa5 (\x90-xad / \xb0-xb4) - / \xa6 (\x80-xab / \xb0-xbf) - / \xa7\x80-x89 - / \xa8 (\x80-x96 / \xa0-xbf) - / \xa9\x80-x94 - / \xaa\xa7 - / \xac\x85-xb3 - / \xad\x85-x8b - / \xae (\x83-xa0 / \xae-xaf / \xba-xbf) - / \xaf\x80-xa5 - / \xb0\x80-xa3 - / \xb1 (\x8d-x8f / \x9a-xbd) - / \xb2 (\x80-x88 / \x90-xba / \xbd-xbf) - / \xb3 (\xa9-xac / \xae-xb3 / \xb5-xb6 / \xba) - / \xb4 (\x80-xab / \xac-xbf) - / \xb5-xbb\x80-xbf - / \xbc (\x80-x95 / \x98-x9d / \xa0-xbf) - / \xbd (\x80-x85 / \x88-x8d / \x90-x97 / \x99 / \x9b / \x9d / \x9f-xbd) - / \xbe (\x80-xb4 / \xb6-xbc / \xbe) - / \xbf (\x82-x84 / \x86-x8c / \x90-x93 / \x96-x9b / \xa0-xac / \xb2-xb4 / \xb6-xbc) -) -/ \xe2 ( - \x81 (\xb1 / \xbf) - / \x82\x90-x9c - / \x84 (\x82 / \x87 / \x8a-x93 / \x95 / \x98-x9d / \xa4 / \xa6 / \xa8 / \xaa-xb9 / \xbc-xbf) - / \x85 (\x85-x89 / \x8e / \xa0-xbf) - / \x86\x80-x88 - / \xb0 (\x80-xae / \xb0-xbf) - / \xb1 (\x80-x9e / \xa0-xbf) - / \xb2\x80-xbf - / \xb3 (\x80-xa4 / \xab-xae / \xb2-xb3) - / \xb4 (\x80-xa5 / \xa7 / \xad / \xb0-xbf) - / \xb5 (\x80-xa7 / \xaf) - / \xb6 (\x80-x96 / \xa0-xa6 / \xa8-xae / \xb0-xb6 / \xb8-xbe) - / \xb7 (\x80-x86 / \x88-x8e / \x90-x96 / \x98-x9e) -) -/ \xe3 ( - \x80 (\x85-x87 / \xa1-xa9 / \xb1-xb5 / \xb8-xba / \xbb-xbc) - / \x81\x81-xbf - / \x82 (\x80-x96 / \x9b-x9f / \xa1-xbf) - / \x83 (\x80-xba / \xbc-xbe / \xbf) - / \x84 (\x85-xaf / \xb1-xbf) - / \x85\x80-xbf - / \x86 (\x80-x8e / \xa0-xba) - / \x87\xb0-xbf - / \x90-xbf\x80-xbf -) -/ \xe4 (\x80-xb5\x80-xbf / \xb6\x80-xb5 / \xb8-xbf\x80-xbf) -/ \xe5-xe8\x80-xbf\x80-xbf -/ \xe9 (\x80-xbe\x80-xbf / \xbf\x80-xaf) -/ \xea ( - \x80-x91\x80-xbf - / \x92\x80-x8c - / \x93\x90-xbd - / \x94-x97\x80-xbf - / \x98 (\x80-x8c / \x90-x9f / \xaa-xab) - / \x99 (\x80-xae / \xbf) - / \x9a (\x80-x9d / \xa0-xbf) - / \x9b\x80-xaf - / \x9c (\x97-x9f / \xa2-xbf) - / \x9d\x80-xbf - / \x9e (\x80-x88 / \x8b-xbf) - / \x9f (\x82-x86 / \xb7-xbf) - / \xa0 (\x80-x81 / \x83-x85 / \x87-x8a / \x8c-xa2) - / \xa1\x80-xb3 - / \xa2\x82-xb3 - / \xa3 (\xb2-xb7 / \xbb / \xbd-xbe) - / \xa4 (\x8a-xa5 / \xb0-xbf) - / \xa5 (\x80-x86 / \xa0-xbc) - / \xa6\x84-xb2 - / \xa7 (\x8f / \xa0-xa4 / \xa6 / \xa7-xaf / \xba-xbe) - / \xa8\x80-xa8 - / \xa9 (\x80-x82 / \x84-x8b / \xa0-xb6 / \xba / \xbe-xbf) - / \xaa (\x80-xaf / \xb1 / \xb5-xb6 / \xb9-xbd) - / \xab (\x80 / \x82 / \x9b-x9d / \xa0-xaa / \xb2-xb4) - / \xac (\x81-x86 / \x89-x8e / \x91-x96 / \xa0-xa6 / \xa8-xae / \xb0-xbf) - / \xad (\x80-x9a / \x9c-x9f / \xa0-xa7 / \xb0-xbf) - / \xae\x80-xbf - / \xaf\x80-xa2 - / \xb0-xbf\x80-xbf -) -/ \xeb-xec\x80-xbf\x80-xbf -/ \xed ( - \x80-x9d\x80-xbf - / \x9e (\x80-xa3 / \xb0-xbf) - / \x9f (\x80-x86 / \x8b-xbb) -) -/ \xef ( - \xa4-xa8\x80-xbf - / \xa9 (\x80-xad / \xb0-xbf) - / \xaa\x80-xbf - / \xab\x80-x99 - / \xac (\x80-x86 / \x93-x97 / \x9d / \x9f-xa8 / \xaa-xb6 / \xb8-xbc / \xbe) - / \xad (\x80-x81 / \x83-x84 / \x86-xbf) - / \xae\x80-xb1 - / \xaf\x93-xbf - / \xb0-b3\x80-xbf - / \xb4\x80-xbd - / \xb5\x90-xbf - / \xb6 (\x80-x8f / \x92-xbf) - / \xb7 (\x80-x87 / \xb0-xbb) - / \xb9 (\xb0-xb4 / \xb6-xbf) - / \xba\x80-xbf - / \xbb\x80-xbc - / \xbc\xa1-xba - / \xbd (\x81-x9a / \xa6-xaf / \xb0-xbf) - / \xbe\x80-xbe - / \xbf (\x82-x87 / \x8a-x8f / \x92-x97 / \x9a-x9c) -) -/ \xf0 ( - \x90 ( - \x80 (\x80-x8b / \x8d-xa6 / \xa8-xba / \xbc-xbd / \xbf) - / \x81 (\x80-x8d / \x90-x9d) - / \x82\x80-xbf - / \x83\x80-xba - / \x85\x80-xb4 - / \x8a (\x80-x9c / \xa0-xbf) - / \x8b\x80-x90 - / \x8c (\x80-x9f / \xad-xbf) - / \x8d (\x80-x8a / \x90-xb5) - / \x8e (\x80-x9d / \xa0-xbf) - / \x8f (\x80-x83 / \x88-x8f / \x91-x95) - / \x90-x91\x80-xbf - / \x92 (\x80-x9d / \xb0-xbf) - / \x93 (\x80-x93 / \x98-xbb) - / \x94 (\x80-xa7 / \xb0-xbf) - / \x95\x80-xa3 - / \x98-x9b\x80-xbf - / \x9c\x80-xb6 - / \x9d (\x80-x95 / \xa0-xa7) - / \xa0 (\x80-x85 / \x88 / \x8a-xb5 / \xb7-xb8 / \xbc / \xbf) - / \xa1 (\x80-x95 / \xa0-xb6) - / \xa2\x80-x9e - / \xa3 (\xa0-xb2 / \xb4-xb5) - / \xa4 (\x80-x95 / \xa0-xb9) - / \xa6 (\x80-xb7 / \xbe-xbf) - / \xa8 (\x80 / \x90-x93 / \x95-x97 / \x99-xb5) - / \xa9\xa0-xbc - / \xaa\x80-x9c - / \xab (\x80-x87 / \x89-xa4) - / \xac\x80-xb5 - / \xad (\x80-x95 / \xa0-xb2) - / \xae\x80-x91 - / \xb0\x80-xbf - / \xb1\x80-x88 - / \xb2-xb3\x80-xb2 - / \xb4\x80-xa3 - / \xbc (\x80-x9c / \xa7 / \xb0-xbf) - / \xbd\x80-x85 - / \xbf\xa0-xb6 - ) - / \x91 ( - \x80\x83-xb7 - / \x82\x83-xaf - / \x83\x90-xa8 - / \x84\x83-xa6 - / \x85 (\x84 / \x90-xb2 / \xb6) - / \x86\x83-xb2 - / \x87 (\x81-x84 / \x9a / \x9c) - / \x88 (\x80-x91 / \x93-xab) - / \x8a (\x80-x86 / \x88 / \x8a-x8d / \x8f-x9d / \x9f-xa8 / \xb0-xbf) - / \x8b\x80-x9e - / \x8c (\x85-x8c / \x8f-x90 / \x93-xa8 / \xaa-xb0 / \xb2-xb3 / \xb5-xb9 / \xbd) - / \x8d (\x90 / \x9d-xa1) - / \x90\x80-xb4 - / \x91 (\x87-x8a / \x9f) - / \x92\x80-xaf - / \x93 (\x84-x85 / \x87) - / \x96\x80-xae - / \x97\x98-x9b - / \x98\x80-xaf - / \x99\x84 - / \x9a (\x80-xaa / \xb8) - / \x9c\x80-x9a - / \xa0\x80-xab - / \xa2\xa0-xbf - / \xa3 (\x80-x9f / \xbf) - / \xa6 (\xa0-xa7 / \xaa-xbf) - / \xa7 (\x80-x90 / \xa1 / \xa3) - / \xa8 (\x80 / \x8b-xb2 / \xba) - / \xa9 (\x90 / \x9c-xbf) - / \xaa (\x80-x89 / \x9d) - / \xab\x80-xb8 - / \xb0 (\x80-x88 / \x8a-xae) - / \xb1 (\x80 / \xb2-xbf) - / \xb2\x80-x8f - / \xb4 (\x80-x86 / \x88-x89 / \x8b-xb0) - / \xb5 (\x86 / \xa0-xa5 / \xa7-xa8 / \xaa-xbf) - / \xb6 (\x80-x89 / \x98) - / \xbb\xa0-xb2 - ) - / \x92 (\x80-x8d\x80-xbf / \x8e\x80-x99 / \x90\x80-xbf / \x91\x80-xae / \x92-x94\x80-xbf / \x95\x80-x83) - / \x93 (\x80-x8f\x80-xbf / \x90\x80-xae) - / \x94 (\x90-x98\x80-xbf / \x99\x80-x86) - / \x96 ( - \xa0-xa7\x80-xbf - / \xa8\x80-xb8 - / \xa9\x80-x9e - / \xab\x90-xad - / \xac\x80-xaf - / \xad (\x80-x83 / \xa3-xb7 / \xbd-xbf) - / \xae\x80-x8f - / \xb9-xbc\x80-xbf - / \xbd (\x80-x8a / \x90) - / \xbe\x93-x9f - / \xbf (\xa0-xa1 / \xa3) - ) - / \x97\x80-xbf\x80-xbf - / \x98 (\x80-x9e\x80-xbf / \x9f\x80-xb7 / \xa0-xaa\x80-xbf / \xab\x80-xb2) - / \x9b ( - \x80-x83\x80-xbf - / \x84\x80-x9e - / \x85 (\x90-x92 / \xa4-xa7 / \xb0-xbf) - / \x86-x8a\x80-xbf - / \x8b\x80-xbb - / \xb0\x80-xbf - / \xb1 (\x80-xaa / \xb0-xbc) - / \xb2 (\x80-x88 / \x90-x99) - ) - / \x9d ( - \x90\x80-xbf - / \x91 (\x80-x94 / \x96-xbf) - / \x92 (\x80-x9c / \x9e-x9f / \xa2 / \xa5-xa6 / \xa9-xac / \xae-xb9 / \xbb / \xbd-xbf) - / \x93 (\x80-x83 / \x85-xbf) - / \x94 (\x80-x85 / \x87-x8a / \x8d-x94 / \x96-x9c / \x9e-xb9 / \xbb-xbe) - / \x95 (\x80-x84 / \x86 / \x8a-x90 / \x92-xbf) - / \x96-x99\x80-xbf - / \x9a (\x80-xa5 / \xa8-xbf) - / \x9b (\x80 / \x82-x9a / \x9c-xba / \xbc-xbf) - / \x9c (\x80-x94 / \x96-xb4 / \xb6-xbf) - / \x9d (\x80-x8e / \x90-xae / \xb0-xbf) - / \x9e (\x80-x88 / \x8a-xa8 / \xaa-xbf) - / \x9f (\x80-x82 / \x84-x8b) - ) - / \x9e ( - \x84 (\x80-xac / \xb7-xbd) - / \x85\x8e - / \x8b\x80-xab - / \xa0-xa2\x80-xbf - / \xa3\x80-x84 - / \xa4\x80-xbf - / \xa5 (\x80-x83 / \x8b) - / \xb8 (\x80-x83 / \x85-x9f / \xa1-xa2 / \xa4 / \xa7 / \xa9-xb2 / \xb4-xb7 / \xb9 / \xbb) - / \xb9 (\x82 / \x87 / \x89 / \x8b / \x8d-x8f / \x91-x92 / \x94 / \x97 / \x99 / \x9b / \x9d / \x9f / \xa1-xa2 / \xa4 / \xa7-xaa / \xac-xb2 / \xb4-xb7 / \xb9-xbc / \xbe) - / \xba (\x80-x89 / \x8b-x9b / \xa1-xa3 / \xa5-xa9 / \xab-xbb) - ) - / \xa0-xa9\x80-xbf\x80-xbf - / \xaa (\x80-x9a\x80-xbf / \x9b\x80-x96 / \x9c-xbf\x80-xbf) - / \xab ( - \x80-x9b\x80-xbf - / \x9c\x80-xb4 - / \x9d-x9f\x80-xbf - / \xa0 (\x80-x9d / \xa0-xbf) - / \xa1-xbf\x80-xbf - ) - / \xac ( - \x80-xb9\x80-xbf - / \xba (\x80-xa1 / \xb0-xbf) - / \xbb-xbf\x80-xbf - ) - / \xad\x80-xbf\x80-xbf - / \xae (\x80-xae\x80-xbf / \xaf\x80-xa0) - / \xaf (\xa0-xa7\x80-xbf / \xa8\x80-x9d) -) -) - -utf8-id-cont: `0-9 / `A-Z / `_ / `a-z / !\x00-x7F ( - \xc2 (\xaa / \xb5 / \xb7 / \xba) -/ \xc3 (\x80-x96 / \x98-xb6 / \xb8-xbf) -/ \xc4-xca\x80-xbf -/ \xcb (\x80-x81 / \x86-x91 / \xa0-xa4 / \xac / \xae) -/ \xcc\x80-xbf -/ \xcd (\x80-xb4 / \xb6-xb7 / \xba-xbd / \xbf) -/ \xce (\x86-x8a / \x8c / \x8e-xa1 / \xa3-xbf) -/ \xcf (\x80-xb5 / \xb7-xbf) -/ \xd0-xd1\x80-xbf -/ \xd2 (\x80-x81 / \x83-x87 / \x8a-xbf) -/ \xd3\x80-xbf -/ \xd4 (\x80-xaf / \xb1-xbf) -/ \xd5 (\x80-x96 / \x99 / \xa0-xbf) -/ \xd6 (\x80-x88 / \x91-xbd / \xbf) -/ \xd7 (\x81-x82 / \x84-x85 / \x87 / \x90-xaa / \xaf-xb2) -/ \xd8 (\x90-x9a / \xa0-xbf) -/ \xd9 (\x80-xa9 / \xae-xbf) -/ \xda\x80-xbf -/ \xdb (\x80-x93 / \x95-x9c / \x9f-xa8 / \xaa-xbc / \xbf) -/ \xdc\x90-xbf -/ \xdd (\x80-x8a / \x8d-xbf) -/ \xde\x80-xb1 -/ \xdf (\x80-xb5 / \xba / \xbd) -/ \xe0 ( - \xa0\x80-xad - / \xa1 (\x80-x9b / \xa0-xaa) - / \xa2 (\xa0-xb4 / \xb6-xbd) - / \xa3 (\x93-xa1 / \xa3-xbf) - / \xa4\x80-xbf - / \xa5 (\x80-xa3 / \xa6-xaf / \xb1-xbf) - / \xa6 (\x80-x83 / \x85-x8c / \x8f-x90 / \x93-xa8 / \xaa-xb0 / \xb2 / \xb6-xb9 / \xbc-xbf) - / \xa7 (\x80-x84 / \x87-x88 / \x8b-x8e / \x97 / \x9c-x9d / \x9f-xa3 / \xa6-xb1 / \xbc / \xbe) - / \xa8 (\x81-x83 / \x85-x8a / \x8f-x90 / \x93-xa8 / \xaa-xb0 / \xb2-xb3 / \xb5-xb6 / \xb8-xb9 / \xbc / \xbe-xbf) - / \xa9 (\x80-x82 / \x87-x88 / \x8b-x8d / \x91 / \x99-x9c / \x9e / \xa6-xb5) - / \xaa (\x81-x83 / \x85-x8d / \x8f-x91 / \x93-xa8 / \xaa-xb0 / \xb2-xb3 / \xb5-xb9 / \xbc-xbf) - / \xab (\x80-x85 / \x87-x89 / \x8b-x8d / \x90 / \xa0-xa3 / \xa6-xaf / \xb9-xbf) - / \xac (\x81-x83 / \x85-x8c / \x8f-x90 / \x93-xa8 / \xaa-xb0 / \xb2-xb3 / \xb5-xb9 / \xbc-xbf) - / \xad (\x80-x84 / \x87-x88 / \x8b-x8d / \x96-x97 / \x9c-x9d / \x9f-xa3 / \xa6-xaf / \xb1) - / \xae (\x82-x83 / \x85-x8a / \x8e-x90 / \x92-x95 / \x99-x9a / \x9c / \x9e-x9f / \xa3-xa4 / \xa8-xaa / \xae-xb9 / \xbe-xbf) - / \xaf (\x80-x82 / \x86-x88 / \x8a-x8d / \x90 / \x97 / \xa6-xaf) - / \xb0 (\x80-x8c / \x8e-x90 / \x92-xa8 / \xaa-xb9 / \xbd-xbf) - / \xb1 (\x80-x84 / \x86-x88 / \x8a-x8d / \x95-x96 / \x98-x9a / \xa0-xa3 / \xa6-xaf) - / \xb2 (\x80-x83 / \x85-x8c / \x8e-x90 / \x92-xa8 / \xaa-xb3 / \xb5-xb9 / \xbc-xbf) - / \xb3 (\x80-x84 / \x86-x88 / \x8a-x8d / \x95-x96 / \x9e / \xa0-xa3 / \xa6-xaf / \xb1-xb2) - / \xb4 (\x80-x83 / \x85-x8c / \x8e-x90 / \x92-xbf) - / \xb5 (\x80-x84 / \x86-x88 / \x8a-x8e / \x94-x97 / \x9f-xa3 / \xa6-xaf / \xba-xbf) - / \xb6 (\x82-x83 / \x85-x96 / \x9a-xb1 / \xb3-xbb / \xbd) - / \xb7 (\x80-x86 / \x8a / \x8f-x94 / \x96 / \x98-x9f / \xa6-xaf / \xb2-xb3) - / \xb8\x81-xba - / \xb9 (\x80-x8e / \x90-x99) - / \xba (\x81-x82 / \x84 / \x86-x8a / \x8c-xa3 / \xa5 / \xa7-xbd) - / \xbb (\x80-x84 / \x86 / \x88-x8d / \x90-x99 / \x9c-x9f) - / \xbc (\x80 / \x98-x99 / \xa0-xa9 / \xb5 / \xb7 / \xb9 / \xbe-xbf) - / \xbd (\x80-x87 / \x89-xac / \xb1-xbf) - / \xbe (\x80-x84 / \x86-x97 / \x99-xbc) - / \xbf\x86 -) -/ \xe1 ( - \x80\x80-xbf - / \x81 (\x80-x89 / \x90-xbf) - / \x82 (\x80-x9d / \xa0-xbf) - / \x83 (\x80-x85 / \x87 / \x8d / \x90-xba / \xbc-xbf) - / \x84-x88\x80-xbf - / \x89 (\x80-x88 / \x8a-x8d / \x90-x96 / \x98 / \x9a-x9d / \xa0-xbf) - / \x8a ( - \x80-x88 - / \x8a-x8d - / \x90-xb0 - / \xb2-xb5 - / \xb8-xbe - ) - / \x8b (\x80 / \x82-x85 / \x88-x96 / \x98-xbf) - / \x8c (\x80-x90 / \x92-x95 / \x98-xbf) - / \x8d (\x80-x9a / \x9d-x9f / \xa9-xb1) - / \x8e (\x80-x8f / \xa0-xbf) - / \x8f (\x80-xb5 / \xb8-xbd) - / \x90\x81-xbf - / \x91\x80-xbf - / \x99 (\x80-xac / \xaf-xbf) - / \x9a (\x81-x9a / \xa0-xbf) - / \x9b (\x80-xaa / \xae-xb8) - / \x9c (\x80-x8c / \x8e-x94 / \xa0-xb4) - / \x9d (\x80-x93 / \xa0-xac / \xae-xb0 / \xb2-xb3) - / \x9e\x80-xbf - / \x9f (\x80-x93 / \x97 / \x9c-x9d / \xa0-xa9) - / \xa0 (\x8b-x8d / \x90-x99 / \xa0-xbf) - / \xa1\x80-xb8 - / \xa2 (\x80-xaa / \xb0-xbf) - / \xa3\x80-xb5 - / \xa4 (\x80-x9e / \xa0-xab / \xb0-xbb) - / \xa5 (\x86-xad / \xb0-xb4) - / \xa6 (\x80-xab / \xb0-xbf) - / \xa7 (\x80-x89 / \x90-x9a) - / \xa8 (\x80-x9b / \xa0-xbf) - / \xa9 (\x80-x9e / \xa0-xbc / \xbf) - / \xaa (\x80-x89 / \x90-x99 / \xa7 / \xb0-xbd) - / \xac\x80-xbf - / \xad (\x80-x8b / \x90-x99 / \xab-xb3) - / \xae\x80-xbf - / \xaf\x80-xb3 - / \xb0\x80-xb7 - / \xb1 (\x80-x89 / \x8d-xbd) - / \xb2 (\x80-x88 / \x90-xba / \xbd-xbf) - / \xb3 (\x90-x92 / \x94-xba) - / \xb4-xb6\x80-xbf - / \xb7 (\x80-xb9 / \xbb-xbf) - / \xb8-xbb\x80-xbf - / \xbc (\x80-x95 / \x98-x9d / \xa0-xbf) - / \xbd (\x80-x85 / \x88-x8d / \x90-x97 / \x99 / \x9b / \x9d / \x9f-xbd) - / \xbe (\x80-xb4 / \xb6-xbc / \xbe) - / \xbf (\x82-x84 / \x86-x8c / \x90-x93 / \x96-x9b / \xa0-xac / \xb2-xb4 / \xb6-xbc) -) -/ \xe2 ( - \x80\xbf - / \x81 (\x80 / \x94 / \xb1 / \xbf) - / \x82\x90-x9c - / \x83 (\x90-x9c / \xa1 / \xa5-xb0) - / \x84 (\x82 / \x87 / \x8a-x93 / \x95 / \x98-x9d / \xa4 / \xa6 / \xa8 / \xaa-xb9 / \xbc-xbf) - / \x85 (\x85-x89 / \x8e / \xa0-xbf) - / \x86\x80-x88 - / \xb0 (\x80-xae / \xb0-xbf) - / \xb1 (\x80-x9e / \xa0-xbf) - / \xb2\x80-xbf - / \xb3 (\x80-xa4 / \xab-xb3) - / \xb4 (\x80-xa5 / \xa7 / \xad / \xb0-xbf) - / \xb5 (\x80-xa7 / \xaf / \xbf) - / \xb6 (\x80-x96 / \xa0-xa6 / \xa8-xae / \xb0-xb6 / \xb8-xbe) - / \xb7 (\x80-x86 / \x88-x8e / \x90-x96 / \x98-x9e / \xa0-xbf) -) -/ \xe3 ( - \x80 (\x85-x87 / \xa1-xaf / \xb1-xb5 / \xb8-xbc) - / \x81\x81-xbf - / \x82 (\x80-x96 / \x99-x9f / \xa1-xbf) - / \x83 (\x80-xba / \xbc-xbf) - / \x84 (\x85-xaf / \xb1-xbf) - / \x85\x80-xbf - / \x86 (\x80-x8e / \xa0-xba) - / \x87\xb0-xbf - / \x90-xbf\x80-xbf -) -/ \xe4 (\x80-xb5\x80-xbf / \xb6\x80-xb5 / \xb8-xbf\x80-xbf) -/ \xe5-xe8\x80-xbf\x80-xbf -/ \xe9 (\x80-xbe\x80-xbf / \xbf\x80-xaf) -/ \xea ( - \x80-x91\x80-xbf - / \x92\x80-x8c - / \x93\x90-xbd - / \x94-x97\x80-xbf - / \x98 (\x80-x8c / \x90-xab) - / \x99 (\x80-xaf / \xb4-xbd / \xbf) - / \x9a\x80-xbf - / \x9b\x80-xb1 - / \x9c (\x97-x9f / \xa2-xbf) - / \x9d\x80-xbf - / \x9e (\x80-x88 / \x8b-xbf) - / \x9f (\x82-x86 / \xb7-xbf) - / \xa0\x80-xa7 - / \xa1\x80-xb3 - / \xa2\x80-xbf - / \xa3 (\x80-x85 / \x90-x99 / \xa0-xb7 / \xbb / \xbd-xbf) - / \xa4 (\x80-xad / \xb0-xbf) - / \xa5 (\x80-x93 / \xa0-xbc) - / \xa6\x80-xbf - / \xa7 (\x80 / \x8f-x99 / \xa0-xbe) - / \xa8\x80-xb6 - / \xa9 (\x80-x8d / \x90-x99 / \xa0-xb6 / \xba-xbf) - / \xaa\x80-xbf - / \xab (\x80-x82 / \x9b-x9d / \xa0-xaf / \xb2-xb6) - / \xac (\x81-x86 / \x89-x8e / \x91-x96 / \xa0-xa6 / \xa8-xae / \xb0-xbf) - / \xad (\x80-x9a / \x9c-xa7 / \xb0-xbf) - / \xae\x80-xbf - / \xaf (\x80-xaa / \xac-xad / \xb0-xb9) - / \xb0-xbf\x80-xbf -) -/ \xeb\x80-xbf\x80-xbf -/ \xec\x80-xbf\x80-xbf -/ \xed ( - \x80-x9d\x80-xbf - / \x9e (\x80-xa3 / \xb0-xbf) - / \x9f (\x80-x86 / \x8b-xbb) -) -/ \xef ( - \xa4-xa8\x80-xbf - / \xa9 (\x80-xad / \xb0-xbf) - / \xaa\x80-xbf - / \xab\x80-x99 - / \xac (\x80-x86 / \x93-x97 / \x9d-xa8 / \xaa-xb6 / \xb8-xbc / \xbe) - / \xad (\x80-x81 / \x83-x84 / \x86-xbf) - / \xae\x80-xb1 - / \xaf\x93-xbf - / \xb0\x80-xbf - / \xb1 (\x80-x9d / \x80-xbf / \xa4-xbf) - / \xb2-xb3\x80-xbf - / \xb4\x80-xbd - / \xb5\x90-xbf - / \xb6 (\x80-x8f / \x92-xbf) - / \xb7 (\x80-x87 / \xb0-xb9 / \xb0-xbb) - / \xb8 (\x80-x8f / \xa0-xaf / \xb3-xb4) - / \xb9 (\x8d-x8f / \xb0-xb4 / \xb1 / \xb3 / \xb6-xbf / \xb7 / \xb9 / \xbb / \xbd / \xbf) - / \xba\x80-xbf - / \xbb\x80-xbc - / \xbc (\x90-x99 / \xa1-xba / \xbf) - / \xbd (\x81-x9a / \xa6-xbf) - / \xbe\x80-xbe - / \xbf (\x82-x87 / \x8a-x8f / \x92-x97 / \x9a-x9c) -) -/ \xf0 ( - \x90 ( - \x80 (\x80-x8b / \x8d-xa6 / \xa8-xba / \xbc-xbd / \xbf) - / \x81 (\x80-x8d / \x90-x9d) - / \x82\x80-xbf - / \x83\x80-xba - / \x85\x80-xb4 - / \x87\xbd - / \x8a (\x80-x9c / \xa0-xbf) - / \x8b (\x80-x90 / \xa0) - / \x8c (\x80-x9f / \xad-xbf) - / \x8d (\x80-x8a / \x90-xba) - / \x8e (\x80-x9d / \xa0-xbf) - / \x8f (\x80-x83 / \x88-x8f / \x91-x95) - / \x90-x91\x80-xbf - / \x92 (\x80-x9d / \xa0-xa9 / \xb0-xbf) - / \x93 (\x80-x93 / \x98-xbb) - / \x94 (\x80-xa7 / \xb0-xbf) - / \x95\x80-xa3 - / \x98-x9b\x80-xbf - / \x9c\x80-xb6 - / \x9d (\x80-x95 / \xa0-xa7) - / \xa0 (\x80-x85 / \x88 / \x8a-xb5 / \xb7-xb8 / \xbc / \xbf) - / \xa1 (\x80-x95 / \xa0-xb6) - / \xa2\x80-x9e - / \xa3 (\xa0-xb2 / \xb4-xb5) - / \xa4 (\x80-x95 / \xa0-xb9) - / \xa6 (\x80-xb7 / \xbe-xbf) - / \xa8 (\x80-x83 / \x85-x86 / \x8c-x93 / \x95-x97 / \x99-xb5 / \xb8-xba / \xbf) - / \xa9\xa0-xbc - / \xaa\x80-x9c - / \xab (\x80-x87 / \x89-xa6) - / \xac\x80-xb5 - / \xad (\x80-x95 / \xa0-xb2) - / \xae\x80-x91 - / \xb0\x80-xbf - / \xb1\x80-x88 - / \xb2\x80-xb2 - / \xb3\x80-xb2 - / \xb4 (\x80-xa7 / \xb0-xb9) - / \xbc (\x80-x9c / \xa7 / \xb0-xbf) - / \xbd\x80-x90 - / \xbf\xa0-xb6 - ) - / \x91 ( - \x80\x80-xbf - / \x81 (\x80-x86 / \xa6-xaf / \xbf) - / \x82\x80-xba - / \x83 (\x90-xa8 / \xb0-xb9) - / \x84 (\x80-xb4 / \xb6-xbf) - / \x85 (\x84-x86 / \x90-xb3 / \xb6) - / \x86\x80-xbf - / \x87 (\x80-x84 / \x89-x8c / \x90-x9a / \x9c) - / \x88 (\x80-x91 / \x93-xb7 / \xbe) - / \x8a (\x80-x86 / \x88 / \x8a-x8d / \x8f-x9d / \x9f-xa8 / \xb0-xbf) - / \x8b (\x80-xaa / \xb0-xb9) - / \x8c (\x80-x83 / \x85-x8c / \x8f-x90 / \x93-xa8 / \xaa-xb0 / \xb2-xb3 / \xb5-xb9 / \xbb-xbf) - / \x8d (\x80-x84 / \x87-x88 / \x8b-x8d / \x90 / \x97 / \x9d-xa3 / \xa6-xac / \xb0-xb4) - / \x90\x80-xbf - / \x91 (\x80-x8a / \x90-x99 / \x9e-x9f) - / \x92\x80-xbf - / \x93 (\x80-x85 / \x87 / \x90-x99) - / \x96 (\x80-xb5 / \xb8-xbf) - / \x97 (\x80 / \x98-x9d) - / \x98\x80-xbf - / \x99 (\x80 / \x84 / \x90-x99) - / \x9a\x80-xb8 - / \x9b\x80-x89 - / \x9c (\x80-x9a / \x9d-xab / \xb0-xb9) - / \xa0\x80-xba - / \xa2\xa0-xbf - / \xa3 (\x80-xa9 / \xbf) - / \xa6 (\xa0-xa7 / \xaa-xbf) - / \xa7 (\x80-x97 / \x9a-xa1 / \xa3-xa4) - / \xa8\x80-xbe - / \xa9 (\x87 / \x90-xbf) - / \xaa (\x80-x99 / \x9d) - / \xab\x80-xb8 - / \xb0 (\x80-x88 / \x8a-xb6 / \xb8-xbf) - / \xb1 (\x80 / \x90-x99 / \xb2-xbf) - / \xb2 (\x80-x8f / \x92-xa7 / \xa9-xb6) - / \xb4 (\x80-x86 / \x88-x89 / \x8b-xb6 / \xba / \xbc-xbd / \xbf) - / \xb5 (\x80-x87 / \x90-x99 / \xa0-xa5 / \xa7-xa8 / \xaa-xbf) - / \xb6 (\x80-x8e / \x90-x91 / \x93-x98 / \xa0-xa9) - / \xbb\xa0-xb6 - ) - / \x92 (\x80-x8d\x80-xbf / \x8e\x80-x99 / \x90\x80-xbf / \x91\x80-xae / \x92-x94\x80-xbf / \x95\x80-x83) - / \x93 (\x80-x8f\x80-xbf / \x90\x80-xae) - / \x94 (\x90-x98\x80-xbf / \x99\x80-x86) - / \x96 ( - \xa0-xa7\x80-xbf - / \xa8\x80-xb8 - / \xa9 (\x80-x9e / \xa0-xa9) - / \xab (\x90-xad / \xb0-xb4) - / \xac\x80-xb6 - / \xad (\x80-x83 / \x90-x99 / \xa3-xb7 / \xbd-xbf) - / \xae\x80-x8f - / \xb9\x80-xbf - / \xbc\x80-xbf - / \xbd (\x80-x8a / \x8f-xbf) - / \xbe (\x80-x87 / \x8f-x9f) - / \xbf (\xa0-xa1 / \xa3) - ) - / \x97\x80-xbf\x80-xbf - / \x98 (\x80-x9e\x80-xbf / \x9f\x80-xb7 / \xa0-xaa\x80-xbf / \xab\x80-xb2) - / \x9b ( - \x80-x83\x80-xbf - / \x84\x80-x9e - / \x85 (\x90-x92 / \xa4-xa7 / \xb0-xbf) - / \x86-x8a\x80-xbf - / \x8b\x80-xbb - / \xb0\x80-xbf - / \xb1 (\x80-xaa / \xb0-xbc) - / \xb2 (\x80-x88 / \x90-x99 / \x9d-x9e) - ) - / \x9d ( - \x85 (\xa5-xa9 / \xad-xb2 / \xbb-xbf) - / \x86 (\x80-x82 / \x85-x8b / \xaa-xad) - / \x89\x82-x84 - / \x90\x80-xbf - / \x91 (\x80-x94 / \x96-xbf) - / \x92 (\x80-x9c / \x9e-x9f / \xa2 / \xa5-xa6 / \xa9-xac / \xae-xb9 / \xbb / \xbd-xbf) - / \x93 (\x80-x83 / \x85-xbf) - / \x94 (\x80-x85 / \x87-x8a / \x8d-x94 / \x96-x9c / \x9e-xb9 / \xbb-xbe) - / \x95 (\x80-x84 / \x86 / \x8a-x90 / \x92-xbf) - / \x96-x99\x80-xbf - / \x9a (\x80-xa5 / \xa8-xbf) - / \x9b (\x80 / \x82-x9a / \x9c-xba / \xbc-xbf) - / \x9c (\x80-x94 / \x96-xb4 / \xb6-xbf) - / \x9d (\x80-x8e / \x90-xae / \xb0-xbf) - / \x9e (\x80-x88 / \x8a-xa8 / \xaa-xbf) - / \x9f (\x80-x82 / \x84-x8b / \x8e-xbf) - / \xa8 (\x80-xb6 / \xbb-xbf) - / \xa9 (\x80-xac / \xb5) - / \xaa (\x84 / \x9b-x9f / \xa1-xaf) - ) - / \x9e ( - \x80 (\x80-x86 / \x88-x98 / \x9b-xa1 / \xa3-xa4 / \xa6-xaa) - / \x84 (\x80-xac / \xb0-xbd) - / \x85 (\x80-x89 / \x8e) - / \x8b\x80-xb9 - / \xa0-xa2\x80-xbf - / \xa3 (\x80-x84 / \x90-x96) - / \xa4\x80-xbf - / \xa5 (\x80-x8b / \x90-x99) - / \xb8 (\x80-x83 / \x85-x9f / \xa1-xa2 / \xa4 / \xa7 / \xa9-xb2 / \xb4-xb7 / \xb9 / \xbb) - / \xb9 (\x82 / \x87 / \x89 / \x8b / \x8d-x8f / \x91-x92 / \x94 / \x97 / \x99 / \x9b / \x9d / \x9f / \xa1-xa2 / \xa4 / \xa7-xaa / \xac-xb2 / \xb4-xb7 / \xb9-xbc / \xbe) - / \xba (\x80-x89 / \x8b-x9b / \xa1-xa3 / \xa5-xa9 / \xab-xbb) - ) - / \xa0\x80-xbf\x80-xbf - / \xa1\x80-xbf\x80-xbf - / \xa2\x80-xbf\x80-xbf - / \xa3\x80-xbf\x80-xbf - / \xa4\x80-xbf\x80-xbf - / \xa5\x80-xbf\x80-xbf - / \xa6\x80-xbf\x80-xbf - / \xa7\x80-xbf\x80-xbf - / \xa8\x80-xbf\x80-xbf - / \xa9\x80-xbf\x80-xbf - / \xaa (\x80-x9a\x80-xbf / \x9b\x80-x96 / \x9c-xbf\x80-xbf) - / \xab ( - \x80-x9b\x80-xbf - / \x9c\x80-xb4 - / \x9d-x9f\x80-xbf - / \xa0 (\x80-x9d / \xa0-xbf) - / \xa1-xbf\x80-xbf - ) - / \xac ( - \x80-xb9\x80-xbf - / \xba (\x80-xa1 / \xb0-xbf) - / \xbb-xbf\x80-xbf - ) - / \xad\x80-xbf\x80-xbf - / \xae (\x80-xae\x80-xbf / \xaf\x80-xa0) - / \xaf (\xa0-xa7\x80-xbf / \xa8\x80-x9d) -) -/ \xf3\xa0 (\x84-x86\x80-xbf / \x87\x80-xaf) -) diff --git a/types.h b/types.h index da1ce40..eb8a925 100644 --- a/types.h +++ b/types.h @@ -11,6 +11,7 @@ enum BPEGFlag { BPEG_VERBOSE = 1 << 0, BPEG_IGNORECASE = 1 << 1, + BPEG_EXPLAIN = 1 << 2, }; /* @@ -26,6 +27,7 @@ enum VMOpcode { VM_BEFORE, VM_AFTER, VM_CAPTURE, + VM_HIDE, VM_OTHERWISE, VM_CHAIN, VM_EQUAL, diff --git a/viz.c b/viz.c new file mode 100644 index 0000000..c133156 --- /dev/null +++ b/viz.c @@ -0,0 +1,158 @@ +/* + * viz.c - Visualize matches. + */ + +#include +#include +#include + +#include "types.h" +#include "viz.h" + + +/* +static size_t utf8_len(const char *s) +{ + size_t len = 0; + while (*s) { + len += (*s++ & 0xC0) != 0x80; + } + return len; +} +*/ + +static int match_height(match_t *m) +{ + int height = 0; + for (match_t *c = m->child; c; c = c->nextsibling) { + int childheight = match_height(c); + if (childheight > height) height = childheight; + } + return 1 + height; +} + +static void _visualize_matches(match_node_t *firstmatch, int depth, const char *text, size_t textlen) +{ + if (!firstmatch) return; + + const char *V = "│"; // Vertical bar + const char *H = "─"; // Horizontal bar + const char *color = (depth % 2 == 0) ? "34" : "33"; + + match_t *viz = firstmatch->m; + for (match_node_t *p = firstmatch; p; p = p->next) + if (match_height(p->m) > match_height(viz)) + viz = p->m; + const char *viz_type = viz->op->start; + size_t viz_typelen = (size_t)(viz->op->end - viz->op->start); + + printf("\033[%ldG\033[%s;1m", 2*textlen+3, color); + for (size_t i = 0; i < viz_typelen; i++) { + switch (viz_type[i]) { + case '\n': printf("↵"); break; + default: printf("%c", viz_type[i]); break; + } + } + printf("\033[0m"); + + match_node_t *children = NULL; + match_node_t **nextchild = &children; + +#define RIGHT_TYPE(m) (m->m->op->end == m->m->op->start + viz_typelen && strncmp(m->m->op->start, viz_type, viz_typelen) == 0) + // Print nonzero-width first: + for (match_node_t *m = firstmatch; m; m = m->next) { + //tree_text = byteslice(text, tree['start'], tree['end']).replace('\n', '↵') + if (RIGHT_TYPE(m)) { + //if (m->m->op->op != VM_REF) { + for (match_t *c = m->m->child; c; c = c->nextsibling) { + *nextchild = calloc(1, sizeof(match_node_t)); + (*nextchild)->m = c; + nextchild = &((*nextchild)->next); + } + //} + if (m->m->end == m->m->start) continue; + printf("\033[%ldG\033[0;2m%s\033[0;7;%sm", 1+2*(m->m->start - text), V, color); + for (const char *c = m->m->start; c < m->m->end; ++c) { + // TODO: newline + if (c > m->m->start) printf(" "); + // TODO: utf8 + //while ((*c & 0xC0) != 0x80) printf("%c", *(c++)); + printf("%c", *c); + } + printf("\033[0;2m%s\033[0m", V); + } else { + *nextchild = calloc(1, sizeof(match_node_t)); + (*nextchild)->m = m->m; + nextchild = &((*nextchild)->next); + printf("\033[%ldG\033[0;2m%s", 1+2*(m->m->start - text), V); + for (ssize_t i = (ssize_t)(2*(m->m->end - m->m->start)-1); i > 0; i--) + printf(" "); + if (m->m->end > m->m->start) + printf("\033[0;2m%s", V); + printf("\033[0m"); + } + } + + // Print stars for zero-width: + for (match_node_t *m = firstmatch; m; m = m->next) { + if (m->m->end > m->m->start) continue; + if (RIGHT_TYPE(m)) { + printf("\033[%ldG\033[7;%sm▒\033[0m", 1+2*(m->m->start - text), color); + } else { + printf("\033[%ldG\033[0;2m%s\033[0m", 1+2*(m->m->start - text), V); + } + } + + printf("\n"); + + for (match_node_t *m = firstmatch; m; m = m->next) { + if (m->m->end == m->m->start) { + if (!RIGHT_TYPE(m)) + printf("\033[%ldG\033[0;2m%s", 1 + 2*(m->m->start - text), V); + } else { + const char *l = "└"; + const char *r = "┘"; + for (match_node_t *c = children; c; c = c->next) { + if (c->m->start == m->m->start || c->m->end == m->m->start) l = V; + if (c->m->start == m->m->end || c->m->end == m->m->end) r = V; + } + printf("\033[%ldG\033[0;2m%s", 1 + 2*(m->m->start - text), l); + const char *h = RIGHT_TYPE(m) ? H : " "; + for (ssize_t n = (ssize_t)(2*(m->m->end - m->m->start) - 1); n > 0; n--) + printf("%s", h); + printf("%s\033[0m", r); + } + } +#undef RIGHT_TYPE + + printf("\n"); + + if (children) + _visualize_matches(children, depth+1, text, textlen); + + for (match_node_t *c = children, *next = NULL; c; c = next) { + next = c->next; + free(c); + } +} + +static void _visualize_patterns(match_t *m) +{ + if (m->op->op == VM_REF && strcmp(m->op->args.s, "pattern") == 0) { + m = m->child; + match_node_t first = {.m = m}; + _visualize_matches(&first, 0, m->start, (size_t)(m->end - m->start)); + } else { + for (match_t *c = m->child; c; c = c->nextsibling) + _visualize_patterns(c); + } +} + +void visualize_match(match_t *m) +{ + printf("\033[?7l"); + //match_node_t first = {.m = m}; + //_visualize_matches(&first, 0, m->start, (m->end - m->start)); + _visualize_patterns(m); + printf("\033[?7h"); +} diff --git a/viz.h b/viz.h new file mode 100644 index 0000000..80d8cee --- /dev/null +++ b/viz.h @@ -0,0 +1,10 @@ +/* + * Header file for viz.c (visualizing matches) + */ + +typedef struct match_node_s { + match_t *m; + struct match_node_s *next; +} match_node_t; + +void visualize_match(match_t *m); diff --git a/vm.c b/vm.c index e79505c..8f4d44f 100644 --- a/vm.c +++ b/vm.c @@ -26,6 +26,7 @@ static const char *opcode_names[] = { [VM_BEFORE] = "BEFORE", [VM_AFTER] = "AFTER", [VM_CAPTURE] = "CAPTURE", + [VM_HIDE] = "HIDE", [VM_OTHERWISE] = "OTHERWISE", [VM_CHAIN] = "CHAIN", [VM_REPLACE] = "REPLACE", @@ -257,6 +258,16 @@ static match_t *_match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, un m->value.name = op->args.capture.name; return m; } + case VM_HIDE: { + match_t *p = _match(g, f, str, op->args.pat, flags, rec); + if (p == NULL) return NULL; + match_t *m = calloc(sizeof(match_t), 1); + m->start = str; + m->end = p->end; + m->op = op; + m->child = p; + return m; + } case VM_OTHERWISE: { match_t *m = _match(g, f, str, op->args.multiple.first, flags, rec); if (m == NULL) m = _match(g, f, str, op->args.multiple.second, flags, rec); @@ -450,6 +461,12 @@ void print_pattern(vm_op_t *op) fprintf(stderr, ")"); break; } + case VM_HIDE: { + fprintf(stderr, "hidden ("); + print_pattern(op->args.pat); + fprintf(stderr, ")"); + break; + } case VM_CAPTURE: { fprintf(stderr, "capture ("); print_pattern(op->args.pat); @@ -547,63 +564,185 @@ static match_t *get_cap(match_t *m, const char **r) return NULL; } +typedef struct { + size_t line, printed_line; + const char *color; +} print_state_t; + +static void print_line_number(print_state_t *state, print_options_t options) +{ + state->printed_line = state->line; + if (!(options & PRINT_LINE_NUMBERS)) return; + if (options & PRINT_COLOR) + printf("\033[0;2m% 5ld\033(0\x78\033(B%s", state->line, state->color); + else + printf("% 5ld|", state->line); +} + /* * Print a match with replacements and highlighting. */ -void print_match(file_t *f, match_t *m) +static void _print_match(file_t *f, match_t *m, print_state_t *state, print_options_t options) { - if (m->op->op == VM_REPLACE) { + static const char *hl = "\033[0;31;1m"; + const char *old_color = state->color; + if (m->op->op == VM_HIDE) { + // TODO: handle replacements? + for (const char *p = m->start; p < m->end; p++) { + if (*p == '\n') ++state->line; + } + } else if (m->op->op == VM_REPLACE) { + if (options & PRINT_COLOR && state->color != hl) { + state->color = hl; + printf("%s", state->color); + } for (const char *r = m->value.replacement; *r; ) { - if (*r == '\\') { - ++r; - fputc(unescapechar(r, &r), stdout); - continue; - } else if (*r != '@') { - fputc(*r, stdout); + if (*r == '@' && r[1] && r[1] != '@') { ++r; - continue; + match_t *cap = get_cap(m, &r); + if (cap != NULL) { + _print_match(f, cap, state, options); + continue; + } else { + --r; + } } - ++r; - if (*r == '@' || *r == '\0') { - fputc('@', stdout); - continue; - } - if (*r == '#') { + if (state->printed_line != state->line) + print_line_number(state, options); + + if (*r == '\\') { ++r; - printf("%ld", get_line_number(f, m->start)); + unsigned char c = unescapechar(r, &r); + fputc(c, stdout); + if (c == '\n') ++state->line; continue; - } else if (*r == ':') { + } else if (*r == '\n') { + fputc('\n', stdout); + ++state->line; ++r; - printf("%ld", get_char_number(f, m->start)); continue; - } else if (*r == '&') { + } else { + fputc(*r, stdout); ++r; - printf("%s", f->filename ? f->filename : "-"); continue; } - match_t *cap = get_cap(m, &r); - if (cap != NULL) { - print_match(f, cap); - } else { - fputc('@', stdout); - } } } else { + if (m->op->op == VM_CAPTURE) { + if (options & PRINT_COLOR && state->color != hl) { + state->color = hl; + printf("%s", state->color); + } + } + const char *prev = m->start; for (match_t *child = m->child; child; child = child->nextsibling) { // Skip children from e.g. zero-width matches like >@foo if (!(prev <= child->start && child->start <= m->end && prev <= child->end && child->end <= m->end)) continue; - if (child->start > prev) - printf("%.*s", (int)(child->start - prev), prev); - print_match(f, child); + if (child->start > prev) { + for (const char *p = prev; p < child->start; ++p) { + if (state->printed_line != state->line) + print_line_number(state, options); + fputc(*p, stdout); + if (*p == '\n') ++state->line; + } + } + _print_match(f, child, state, options); prev = child->end; } - if (m->end > prev) - printf("%.*s", (int)(m->end - prev), prev); + if (m->end > prev) { + for (const char *p = prev; p < m->end; ++p) { + if (state->printed_line != state->line) + print_line_number(state, options); + fputc(*p, stdout); + if (*p == '\n') ++state->line; + } + } + } + if (options & PRINT_COLOR && old_color != state->color) { + printf("%s", old_color); + state->color = old_color; + } +} + +void print_match(file_t *f, match_t *m, print_options_t options) +{ + print_state_t state = {.line = 1, .color = "\033[0m"}; + _print_match(f, m, &state, options); +} + +/* + * Print a match as JSON + */ +static int _json_match(FILE *f, const char *text, match_t *m, int comma) +#define VERBOSE_JSON 1 +#if VERBOSE_JSON +{ + if (comma) fprintf(f, ",\n"); + comma = 0; + fprintf(f, "{\"type\":\""); + for (const char *c = m->op->start; c < m->op->end; c++) { + switch (*c) { + case '"': fprintf(f, "\\\""); break; + case '\\': fprintf(f, "\\\\"); break; + case '\t': fprintf(f, "\\t"); break; + case '\n': fprintf(f, "↵"); break; + default: fprintf(f, "%c", *c); break; + } + } + fprintf(f, "\",\"start\":%ld,\"end\":%ld,\"children\":[", + m->start - text, m->end - text); + for (match_t *child = m->child; child; child = child->nextsibling) { + comma |= _json_match(f, text, child, comma); + } + fprintf(f, "]}"); + return 1; +} +#else +{ + if (m->op->op == VM_STRING) { + if (comma) fprintf(f, ",\n"); + comma = 0; + fprintf(f, "{\"type\":\"\\\""); + for (const char *c = m->op->args.s; *c; c++) { + switch (*c) { + case '"': fprintf(f, "\\\""); break; + case '\\': fprintf(f, "\\\\"); break; + case '\t': fprintf(f, "\\t"); break; + case '\n': fprintf(f, "↵"); break; + default: fprintf(f, "%c", *c); break; + } + } + fprintf(f, "\\\"\",\"start\":%ld,\"end\":%ld,\"children\":[", + m->start - text, m->end - text); + } else if (m->op->op == VM_REF) { + if (comma) fprintf(f, ",\n"); + comma = 0; + fprintf(f, "{\"type\":\"%s\",\"start\":%ld,\"end\":%ld,\"children\":[", + m->op->args.s, m->start - text, m->end - text); + } else if (m->op->op == VM_CAPTURE && m->value.name) { + if (comma) fprintf(f, ",\n"); + comma = 0; + fprintf(f, "{\"type\":\"@%s\",\"start\":%ld,\"end\":%ld,\"children\":[", + m->value.name, m->start - text, m->end - text); + } + for (match_t *child = m->child; child; child = child->nextsibling) { + comma |= _json_match(f, text, child, comma); } + if (m->op->op == VM_REF || m->op->op == VM_STRING || (m->op->op == VM_CAPTURE && m->value.name)) { + fprintf(f, "]}"); + return 1; + } + return comma; +} +#endif + +void json_match(FILE *f, const char *text, match_t *m) +{ + _json_match(f, text, m, 0); } static match_t *match_backref(const char *str, vm_op_t *op, match_t *cap, unsigned int flags) diff --git a/vm.h b/vm.h index e0e6b5d..e862b79 100644 --- a/vm.h +++ b/vm.h @@ -11,6 +11,11 @@ #include "types.h" +typedef enum { + PRINT_COLOR = 1<<0, + PRINT_LINE_NUMBERS = 1<<1, +} print_options_t; + const char *opcode_name(enum VMOpcode o); __attribute__((hot, nonnull)) match_t *match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, unsigned int flags); @@ -19,7 +24,9 @@ void destroy_match(match_t **m); __attribute__((nonnull)) void print_pattern(vm_op_t *op); __attribute__((nonnull)) -void print_match(file_t *f, match_t *m); +void print_match(file_t *f, match_t *m, print_options_t options); +__attribute__((nonnull)) +void json_match(FILE *f, const char *text, match_t *m); #endif // vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1 -- cgit v1.2.3