diff options
| author | Bruce Hill <bruce@bruce-hill.com> | 2021-08-01 12:40:27 -0700 |
|---|---|---|
| committer | Bruce Hill <bruce@bruce-hill.com> | 2021-08-01 12:40:27 -0700 |
| commit | 994c9c973e0fd771699c3a5c76bee11f9b744c84 (patch) | |
| tree | 8257c1abe2d18a50a46a1d712eadbd25fd7e4f8b | |
| parent | cb9b4c40d87480bc794b90c2a36ed0f4c3240d8a (diff) | |
Changed how definitions work
| -rw-r--r-- | bp.c | 17 | ||||
| -rw-r--r-- | definitions.c | 28 | ||||
| -rw-r--r-- | grammars/bp.bp | 8 | ||||
| -rw-r--r-- | grammars/c++.bp | 5 | ||||
| -rw-r--r-- | grammars/c.bp | 5 | ||||
| -rw-r--r-- | grammars/go.bp | 5 | ||||
| -rw-r--r-- | grammars/javascript.bp | 5 | ||||
| -rw-r--r-- | grammars/lua.bp | 7 | ||||
| -rw-r--r-- | grammars/python.bp | 4 | ||||
| -rw-r--r-- | grammars/rust.bp | 5 | ||||
| -rw-r--r-- | grammars/shell.bp | 5 | ||||
| -rw-r--r-- | match.c | 10 | ||||
| -rw-r--r-- | pattern.c | 116 | ||||
| -rw-r--r-- | pattern.h | 2 | ||||
| -rw-r--r-- | types.h | 10 | ||||
| -rw-r--r-- | utils.c | 3 |
16 files changed, 113 insertions, 122 deletions
@@ -546,20 +546,9 @@ int main(int argc, char *argv[]) defs = load_grammar(defs, f); // Keep in memory for debug output } else if (FLAG("-p") || FLAG("--pattern")) { file_t *arg_file = spoof_file(&loaded_files, "<pattern argument>", flag, -1); - for (const char *str = arg_file->start; str < arg_file->end; ) { - def_t *d = bp_definition(defs, arg_file, str); - if (d) { - defs = d; - str = after_spaces(d->pat->end); - } else { - pat_t *p = bp_pattern(arg_file, str); - if (!p) - file_err(arg_file, str, arg_file->end, - "Failed to compile this part of the argument"); - pattern = chain_together(arg_file, pattern, p); - str = after_spaces(p->end); - } - } + pat_t *p = bp_pattern(arg_file, arg_file->start); + if (!p) file_err(arg_file, arg_file->start, arg_file->end, "Failed to compile this part of the argument"); + pattern = chain_together(arg_file, pattern, p); } else if (FLAG("-w") || FLAG("--word")) { check_nonnegative(asprintf(&flag, "\\|%s\\|", flag), "Could not allocate memory"); file_t *arg_file = spoof_file(&loaded_files, "<word pattern>", flag, -1); diff --git a/definitions.c b/definitions.c index c85e227..8216e1d 100644 --- a/definitions.c +++ b/definitions.c @@ -33,26 +33,16 @@ def_t *with_def(def_t *defs, size_t namelen, const char *name, pat_t *pat) // def_t *load_grammar(def_t *defs, file_t *f) { - const char *src = f->start; - src = after_spaces(src); - while (src < f->end) { - const char *name = src; - src = after_name(name); - if (src <= name) - file_err(f, name, src, "Invalid name for definition: %s", name); - size_t namelen = (size_t)(src - name); - if (!matchchar(&src, ':')) - errx(EXIT_FAILURE, "Expected ':' in definition"); - pat_t *pat = bp_pattern(f, src); - if (pat == NULL) break; - defs = with_def(defs, namelen, name, pat); - src = pat->end; - src = after_spaces(src); - if (matchchar(&src, ';')) - src = after_spaces(src); + const char *str = after_spaces(f->start); + while (*str == '\r' || *str == '\n') str = after_spaces(++str); + pat_t *pat = bp_pattern(f, str); + if (!pat) file_err(f, str, f->end, "Could not parse this file"); + if (pat->end < f->end) file_err(f, pat->end, f->end, "Could not parse this part of the file"); + for (pat_t *p = pat; p && p->type == BP_DEFINITION; p = p->args.def.pat) { + // printf("Def '%.*s': %.*s\n", (int)p->args.def.namelen, p->args.def.name, + // (int)(p->args.def.def->end - p->args.def.def->start), p->args.def.def->start); + defs = with_def(defs, p->args.def.namelen, p->args.def.name, p->args.def.def); } - if (src < f->end) - file_err(f, src, NULL, "Invalid BP pattern"); return defs; } diff --git a/grammars/bp.bp b/grammars/bp.bp index 3374f0c..2b0f4f9 100644 --- a/grammars/bp.bp +++ b/grammars/bp.bp @@ -12,13 +12,13 @@ Def: @name=id __ `: __ ( / (!)(..%\n>(`;/id_`:/$) => "Invalid definition: @0")) # This is used for command line arguments: - String-pattern: ..%(\n / Nodent / Identifier-char / Identifier-start / Escape / `\ pat [`;])$$ +String-pattern: ..%(\n / Nodent / Identifier-char / Identifier-start / Escape / `\ pat [`;])$$ pat: simple-pat !(__("!~"/"~")) / suffixed-pat -simple-pat: Upto-and / Dot / Word-boundary/ String / Chars / Nodent +simple-pat: (Upto-and / Dot / Word-boundary/ String / Chars / Nodent / Identifier-char / Identifier-start / Escape-range / Escape / Repeat / Optional / No / After / Before / Capture / Error / Empty-replacement - / Start-of-File / Start-of-Line / End-of-File / End-of-Line / Ref / parens + / Start-of-File / Start-of-Line / End-of-File / End-of-Line / Ref / parens) suffixed-pat: ( Match-pat @@ -51,7 +51,7 @@ Nodent: "\N" Word-boundary: `| / "\b" Identifier-char: "\i" Identifier-start: "\I" -Upto-and: ".." [__(`%/`=)__@second=simple-pat] [__@first=simple-pat] +Upto-and: ".." [__(`%/`=)__@second=simple-pat] [__@first=simple-pat] Repeat: ( @min=(=>'0') (`*=>"-") @max=(=>'∞') / @min=int __ `- __ @max=int diff --git a/grammars/c++.bp b/grammars/c++.bp index 1814320..d6135b2 100644 --- a/grammars/c++.bp +++ b/grammars/c++.bp @@ -8,8 +8,7 @@ comment: "//" .. $ / "/*" ..%\n "*/" string: `" ..%string-escape `" -keyword: - "alignas" / "alignof" / "and" / "and_eq" / "asm" / "atomic_cancel" / "atomic_commit" / +keyword: ("alignas" / "alignof" / "and" / "and_eq" / "asm" / "atomic_cancel" / "atomic_commit" / "atomic_noexcept" / "auto" / "bitand" / "bitor" / "bool" / "break" / "case" / "catch" / "char" / "char8_t" / "char16_t" / "char32_t" / "class" / "compl" / "concept" / "const" / "consteval" / "constexpr" / "constinit" / "const_cast" / "continue" / "co_await" / @@ -21,7 +20,7 @@ keyword: "requires" / "return" / "short" / "signed" / "sizeof" / "static" / "static_assert" / "static_cast" / "struct" / "switch" / "synchronized" / "template" / "this" / "thread_local" / "throw" / "true" / "try" / "typedef" / "typeid" / "typename" / "union" / - "unsigned" / "using" / "virtual" / "void" / "volatile" / "wchar_t" / "while" / "xor" / "xor_eq" + "unsigned" / "using" / "virtual" / "void" / "volatile" / "wchar_t" / "while" / "xor" / "xor_eq") function-def: ^_ 2+(id / keyword / anglebraces / `*) % __ parens (__`; / >(__`{)) function: function-def __ braces macro: ^"#define"| ..$ *(<`\ \n..$) diff --git a/grammars/c.bp b/grammars/c.bp index 03403eb..31e123a 100644 --- a/grammars/c.bp +++ b/grammars/c.bp @@ -9,12 +9,11 @@ comment: "//" .. $ / "/*" ..%\n "*/" string: `" ..%string-escape `" string-escape: `\ (`x 2 Hex / 1-3 `0-7 / .) -keyword: - "auto" / "break" / "case" / "char" / "const" / "continue" / "default" / "do" / +keyword: ("auto" / "break" / "case" / "char" / "const" / "continue" / "default" / "do" / "double" / "else" / "enum" / "extern" / "float" / "for" / "goto" / "if" / "int" / "long" / "register" / "return" / "short" / "signed" / "sizeof" / "static" / "struct" / "switch" / "typedef" / "union" / "unsigned" / "void" / - "volatile" / "while" + "volatile" / "while") function-def: ^_ 2+(id / keyword / `*) % __ parens (__`; / >(__`{)) function: function-def __ braces macro: ^"#define"| ..$ *(<`\ \n..$) diff --git a/grammars/go.bp b/grammars/go.bp index f31e21c..c6f41c1 100644 --- a/grammars/go.bp +++ b/grammars/go.bp @@ -8,10 +8,9 @@ comment: "//" .. $ / "/*" ..%\n "*/" string: `" ..%string-escape `" -keyword: - "break" / "default" / "func" / "interface" / "select" / "case" / "defer" / "go" / +keyword: ("break" / "default" / "func" / "interface" / "select" / "case" / "defer" / "go" / "map" / "struct" / "chan" / "else" / "goto" / "package" / "switch" / "const" / - "fallthrough" / "if" / "range" / "type" / "continue" / "for" / "import" / "return" / "var" + "fallthrough" / "if" / "range" / "type" / "continue" / "for" / "import" / "return" / "var") function-def: |"func"| __ id __ parens __ [id / parens] >(__`{) function: function-def __ braces import: |"import"| __ (parens / string) diff --git a/grammars/javascript.bp b/grammars/javascript.bp index 438ddab..2691c62 100644 --- a/grammars/javascript.bp +++ b/grammars/javascript.bp @@ -8,8 +8,7 @@ comment: "//" .. $ / "/*" ..%\n "*/" string: `" ..%string-escape `" / `' ..%string-escape `' / `/ ..%string-escape `/ -keyword: - "abstract" / "arguments" / "await" / "boolean" / "break" / "byte" / "case" / +keyword: ("abstract" / "arguments" / "await" / "boolean" / "break" / "byte" / "case" / "catch" / "char" / "class" / "const" / "continue" / "debugger" / "default" / "delete" / "do" / "double" / "else" / "enum" / "eval" / "export" / "extends" / "false" / "final" / "finally" / "float" / "for" / "function" / "goto" / "if" / @@ -17,7 +16,7 @@ keyword: "long" / "native" / "new" / "null" / "package" / "private" / "protected" / "public" / "return" / "short" / "static" / "super" / "switch" / "synchronized" / "this" / "throw" / "throws" / "transient" / "true" / "try" / "typeof" / "var" / - "void" / "volatile" / "while" / "with" / "yield" + "void" / "volatile" / "while" / "with" / "yield") function-def: |"function"| __ [id__] parens / (id / parens) __ "=>" function: function-def __ braces import: |"import"| ..%braces (`; / $) diff --git a/grammars/lua.bp b/grammars/lua.bp index 6005514..3d3b862 100644 --- a/grammars/lua.bp +++ b/grammars/lua.bp @@ -9,10 +9,9 @@ comment: "--" (`[ @eqs=*`= `[ ..%\n (`]eqs`]) / ..$) string: `"..%string-escape `" / `' ..%string-escape `' / `[ @eqs=*`= `[ ..%\n (`]eqs`]) table: `{ ..%(table/string/comment/\n) `} -keyword: - "and" / "break" / "do" / "else" / "elseif" / "end" / "false" / "for" / - "function" / "goto" / "if" / "in" / "local" / "nil" / "not" / "or" / - "repeat" / "return" / "then" / "true" / "until" / "while" +keyword: ("and" / "break" / "do" / "else" / "elseif" / "end" / "false" / "for" / + "function" / "goto" / "if" / "in" / "local" / "nil" / "not" / "or" / + "repeat" / "return" / "then" / "true" / "until" / "while") function-def: |"function"|[_id (*(`.id)[`:id])]_ parens block: function / if-block / while-block / for-block / repeat-block / do-block repeat-block: |"repeat"| ..%(comment/string/\n) (|"until"|) diff --git a/grammars/python.bp b/grammars/python.bp index 51c3c44..ee749d0 100644 --- a/grammars/python.bp +++ b/grammars/python.bp @@ -8,11 +8,11 @@ comment: `# ..$ string: "'''" ..%\n "'''" / '"""' ..%\n '"""' / `" ..%string-escape `" / `' ..%string-escape `' -keyword: "and" / "as" / "assert" / "break" / "class" / "continue" / "def" / +keyword: ("and" / "as" / "assert" / "break" / "class" / "continue" / "def" / "del" / "elif" / "else" / "except" / "finally" / "for" / "from" / "global" / "if" / "import" / "in" / "is" / "lambda" / "None" / "nonlocal" / "not" / "or" / "pass" / "raise" / "return" / "try" / "while" / - "with" / "yield" + "with" / "yield") class: class-def +(\N ..$) class-def: ^_"class"|_id[_parens]_`: function: function-def +(\N ..$) diff --git a/grammars/rust.bp b/grammars/rust.bp index 4df6b53..9ac459b 100644 --- a/grammars/rust.bp +++ b/grammars/rust.bp @@ -8,11 +8,10 @@ comment: "//" .. $ / "/*" ..%(comment / \n) "*/" string: `" ..%string-escape `" -keyword: - "as" / "break" / "const" / "continue" / "crate" / "else" / "enum" / "extern" / +keyword: ("as" / "break" / "const" / "continue" / "crate" / "else" / "enum" / "extern" / "false" / "fn" / "for" / "if" / "impl" / "in" / "let" / "loop" / "match" / "mod" / "move" / "mut" / "pub" / "ref" / "return" / "self" / "Self" / "static" / - "struct" / "super" / "trait" / "true" / "type" / "unsafe" / "use" / "where" / "while" + "struct" / "super" / "trait" / "true" / "type" / "unsafe" / "use" / "where" / "while") function-def: |"fn"| __ id __ parens __ ["->"__(id / parens)] >(__`{) function: function-def __ braces import: |"use"| _ *(id / braces) % "::" _ `; diff --git a/grammars/shell.bp b/grammars/shell.bp index 72b83cc..76cdcfa 100644 --- a/grammars/shell.bp +++ b/grammars/shell.bp @@ -10,10 +10,9 @@ comment: `#..$ string: `" ..%(string-escape / subcommand / \n) `" / `' ..%\n `' / "<<" _ @delim=id _$ ..%\n (^delim$) string-escape: `\ `",` subcommand: `` ..%\n `` / "$" (parens/braces) -keyword: - "echo" / "read" / "set" / "unset" / "readonly" / "shift" / "export" / "if" / "fi" / +keyword: ("echo" / "read" / "set" / "unset" / "readonly" / "shift" / "export" / "if" / "fi" / "else" / "while" / "do" / "done" / "for" / "until" / "case" / "esac" / "break" / - "continue" / "exit" / "return" / "trap" / "wait" / "eval" / "exec" / "ulimit" / "umask" + "continue" / "exit" / "return" / "trap" / "wait" / "eval" / "exec" / "ulimit" / "umask") function-def: ^_ ["function"_] id _ `(_`) >(__`{) function: function-def __ braces var: `$ (id / braces) @@ -106,7 +106,7 @@ static match_t *cache_lookup(def_t *defs, const char *str, pat_t *pat) if (!cache.matches) return NULL; size_t h = hash(str, pat) & (cache.size-1); for (match_t *c = cache.matches[h]; c; c = c->cache.next) { - if (c->pat == pat && c->defs_id == defs->id && c->start == str) + if (c->pat == pat && c->defs_id == (defs?defs->id:0) && c->start == str) return c; } return NULL; @@ -291,6 +291,12 @@ match_t *next_match(def_t *defs, file_t *f, match_t *prev, pat_t *pat, pat_t *sk static match_t *match(def_t *defs, file_t *f, const char *str, pat_t *pat, bool ignorecase) { switch (pat->type) { + case BP_DEFINITION: { + def_t *defs2 = with_def(defs, pat->args.def.namelen, pat->args.def.name, pat->args.def.def); + match_t *m = match(defs2, f, str, pat->args.def.pat ? pat->args.def.pat : pat->args.def.def, ignorecase); + defs = free_defs(defs2, defs); + return m; + } case BP_LEFTRECURSION: { // Left recursion occurs when a pattern directly or indirectly // invokes itself at the same position in the text. It's handled as @@ -727,7 +733,7 @@ static match_t *new_match(def_t *defs, pat_t *pat, const char *start, const char m->pat = pat; m->start = start; m->end = end; - m->defs_id = defs->id; + m->defs_id = (defs?defs->id:0); if (children) { for (int i = 0; children[i]; i++) @@ -8,18 +8,19 @@ #include <string.h> #include <unistd.h> -#include "definitions.h" #include "files.h" #include "pattern.h" #include "utils.h" #include "utf8.h" __attribute__((nonnull)) -static pat_t *expand_replacements(file_t *f, pat_t *replace_pat); +static pat_t *bp_pattern_nl(file_t *f, const char *str, bool allow_nl); __attribute__((nonnull)) -static pat_t *expand_chain(file_t *f, pat_t *first); +static pat_t *expand_replacements(file_t *f, pat_t *replace_pat, bool allow_nl); __attribute__((nonnull)) -static pat_t *expand_choices(file_t *f, pat_t *first); +static pat_t *expand_chain(file_t *f, pat_t *first, bool allow_nl); +__attribute__((nonnull)) +static pat_t *expand_choices(file_t *f, pat_t *first, bool allow_nl); __attribute__((nonnull)) static pat_t *_bp_simplepattern(file_t *f, const char *str); __attribute__((nonnull(1,2,3,6))) @@ -27,6 +28,8 @@ static pat_t *new_range(file_t *f, const char *start, const char *end, size_t mi __attribute__((nonnull(1,2))) static pat_t *bp_simplepattern(file_t *f, const char *str); +#define SKIP_NL_SPACES(str) for (str = after_spaces(str); *str == '\n' || *str == '\r'; ) str = after_spaces(++str) + // // Allocate a new pattern for this file (ensuring it will be automatically // freed when the file is freed) @@ -68,11 +71,13 @@ static pat_t *new_range(file_t *f, const char *start, const char *end, size_t mi // Take a pattern and expand it into a chain of patterns if it's followed by // any patterns (e.g. "`x `y"), otherwise return the original input. // -static pat_t *expand_chain(file_t *f, pat_t *first) +static pat_t *expand_chain(file_t *f, pat_t *first, bool allow_nl) { - pat_t *second = bp_simplepattern(f, first->end); + const char *str = first->end; + if (allow_nl) SKIP_NL_SPACES(str); + pat_t *second = bp_simplepattern(f, str); if (second == NULL) return first; - second = expand_chain(f, second); + second = expand_chain(f, second, allow_nl); if (second->end <= first->end) file_err(f, second->end, second->end, "This chain is not parsing properly"); @@ -82,10 +87,12 @@ static pat_t *expand_chain(file_t *f, pat_t *first) // // Match trailing => replacements (with optional pattern beforehand) // -static pat_t *expand_replacements(file_t *f, pat_t *replace_pat) +static pat_t *expand_replacements(file_t *f, pat_t *replace_pat, bool allow_nl) { const char *str = replace_pat->end; + if (allow_nl) SKIP_NL_SPACES(str); while (matchstr(&str, "=>")) { + if (allow_nl) SKIP_NL_SPACES(str); const char *repstr; size_t replen; if (matchchar(&str, '"') || matchchar(&str, '\'')) { @@ -121,18 +128,24 @@ static pat_t *expand_replacements(file_t *f, pat_t *replace_pat) // chain of choices if it's followed by any "/"-separated patterns (e.g. // "`x/`y"), otherwise return the original input. // -static pat_t *expand_choices(file_t *f, pat_t *first) +static pat_t *expand_choices(file_t *f, pat_t *first, bool allow_nl) { - first = expand_chain(f, first); - first = expand_replacements(f, first); + first = expand_chain(f, first, allow_nl); + first = expand_replacements(f, first, allow_nl); const char *str = first->end; + if (allow_nl) SKIP_NL_SPACES(str); if (!matchchar(&str, '/')) return first; + if (allow_nl) SKIP_NL_SPACES(str); pat_t *second = bp_simplepattern(f, str); + if (second) { + str = second->end; + if (allow_nl) SKIP_NL_SPACES(str); + } if (matchstr(&str, "=>")) - second = expand_replacements(f, second ? second : new_pat(f, str-2, str-2, 0, 0, BP_STRING)); + second = expand_replacements(f, second ? second : new_pat(f, str-2, str-2, 0, 0, BP_STRING), allow_nl); if (!second) file_err(f, str, str, "There should be a pattern here after a '/'"); - second = expand_choices(f, second); + second = expand_choices(f, second, allow_nl); return either_pat(f, first, second); } @@ -187,7 +200,7 @@ pat_t *either_pat(file_t *f, pat_t *first, pat_t *second) } // -// Wrapper for _bp_simplepattern() that expands any postfix operators +// Wrapper for _bp_simplepattern() that expands any postfix operators (~, !~) // static pat_t *bp_simplepattern(file_t *f, const char *str) { @@ -416,27 +429,29 @@ static pat_t *_bp_simplepattern(file_t *f, const char *str) if (matchstr(&str, "!)")) { // (!) errors pat_t *pat = bp_simplepattern(f, str); if (!pat) pat = new_pat(f, str, str, 0, 0, BP_STRING); - pat = expand_replacements(f, pat); + pat = expand_replacements(f, pat, false); pat_t *error = new_pat(f, start, pat->end, pat->min_matchlen, pat->max_matchlen, BP_ERROR); error->args.pat = pat; return error; } - pat_t *pat = bp_pattern(f, str); + pat_t *pat = bp_pattern_nl(f, str, true); if (!pat) file_err(f, str, str, "There should be a valid pattern after this parenthesis."); str = pat->end; - (void)matchchar(&str, ')'); + SKIP_NL_SPACES(str); + if (!matchchar(&str, ')')) file_err(f, str, str, "Missing paren: )"); pat->start = start; pat->end = str; return pat; } // Square brackets case '[': { - pat_t *maybe = bp_pattern(f, str); + pat_t *maybe = bp_pattern_nl(f, str, true); if (!maybe) file_err(f, str, str, "There should be a valid pattern after this square bracket."); str = maybe->end; + SKIP_NL_SPACES(str); (void)matchchar(&str, ']'); return new_range(f, start, str, 0, 1, maybe, NULL); } @@ -488,28 +503,30 @@ static pat_t *_bp_simplepattern(file_t *f, const char *str) return new_pat(f, start, str, 0, 0, BP_END_OF_FILE); return new_pat(f, start, str, 0, 0, BP_END_OF_LINE); } - // Whitespace: - case '_': { - size_t namelen = 1; - if (matchchar(&str, '_')) // double __ (whitespace with newlines) - ++namelen; - if (matchchar(&str, ':')) return NULL; // Don't match definitions - pat_t *ref = new_pat(f, start, str, 0, -1, BP_REF); - ref->args.ref.name = start; - ref->args.ref.len = namelen; - return ref; - } default: { // Reference - if (!isalpha(c)) return NULL; - --str; - const char *refname = str; - str = after_name(str); - if (matchchar(&str, ':')) // Don't match definitions - return NULL; + if (!isalpha(c) && c != '_') return NULL; + str = after_name(start); + size_t namelen = (size_t)(str - start); + if (matchchar(&str, ':')) { // Definitions + pat_t *def = bp_pattern_nl(f, str, false); + if (!def) file_err(f, str, f->end, "Could not parse this definition."); + str = def->end; + (void)matchchar(&str, ';'); // Optional semicolon + SKIP_NL_SPACES(str); + pat_t *pat = bp_pattern_nl(f, str, false); + if (pat) str = pat->end; + else pat = def; + pat_t *ret = new_pat(f, start, str, pat->min_matchlen, pat->max_matchlen, BP_DEFINITION); + ret->args.def.name = start; + ret->args.def.namelen = namelen; + ret->args.def.def = def; + ret->args.def.pat = pat; + return ret; + } pat_t *ref = new_pat(f, start, str, 0, -1, BP_REF); - ref->args.ref.name = refname; - ref->args.ref.len = (size_t)(str - refname); + ref->args.ref.name = start; + ref->args.ref.len = namelen; return ref; } } @@ -575,32 +592,23 @@ pat_t *bp_replacement(file_t *f, pat_t *replacepat, const char *replacement) return pat; } -// -// Compile a string representing a BP pattern into a pattern object. -// -pat_t *bp_pattern(file_t *f, const char *str) +static pat_t *bp_pattern_nl(file_t *f, const char *str, bool allow_nl) { + SKIP_NL_SPACES(str); pat_t *pat = bp_simplepattern(f, str); - if (pat != NULL) pat = expand_choices(f, pat); + if (pat != NULL) pat = expand_choices(f, pat, allow_nl); + SKIP_NL_SPACES(str); if (matchstr(&str, "=>")) - pat = expand_replacements(f, pat ? pat : new_pat(f, str-2, str-2, 0, 0, BP_STRING)); + pat = expand_replacements(f, pat ? pat : new_pat(f, str-2, str-2, 0, 0, BP_STRING), allow_nl); return pat; } // -// Match a definition (id__`:__pattern) +// Compile a string representing a BP pattern into a pattern object. // -def_t *bp_definition(def_t *defs, file_t *f, const char *str) +pat_t *bp_pattern(file_t *f, const char *str) { - const char *name = after_spaces(str); - str = after_name(name); - if (!str) return NULL; - size_t namelen = (size_t)(str - name); - if (!matchchar(&str, ':')) return NULL; - pat_t *defpat = bp_pattern(f, str); - if (!defpat) return NULL; - (void)matchchar(&defpat->end, ';'); // TODO: verify this is safe to mutate - return with_def(defs, namelen, name, defpat); + return bp_pattern_nl(f, str, false); } // vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1 @@ -19,8 +19,6 @@ __attribute__((nonnull(1))) pat_t *either_pat(file_t *f, pat_t *first, pat_t *second); __attribute__((nonnull)) pat_t *bp_pattern(file_t *f, const char *str); -__attribute__((nonnull)) -def_t *bp_definition(def_t *defs, file_t *f, const char *str); #endif // vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1 @@ -37,8 +37,9 @@ enum pattype_e { BP_END_OF_FILE = 22, BP_END_OF_LINE = 23, BP_WORD_BOUNDARY = 24, - BP_LEFTRECURSION = 25, - BP_ERROR = 26, + BP_DEFINITION = 25, + BP_LEFTRECURSION = 26, + BP_ERROR = 27, }; struct match_s; // forward declared to resolve circular struct defs @@ -60,6 +61,11 @@ typedef struct pat_s { size_t len; } ref; struct { + const char *name; + size_t namelen; + struct pat_s *def, *pat; + } def; + struct { unsigned char low, high; } range; struct { @@ -19,7 +19,8 @@ const char *after_spaces(const char *str) // Skip whitespace and comments: skip_whitespace: switch (*str) { - case ' ': case '\r': case '\n': case '\t': { + // case ' ': case '\r': case '\n': case '\t': { + case ' ': case '\t': { ++str; goto skip_whitespace; } |
