Changed how definitions work

author: Bruce Hill <bruce@bruce-hill.com> 2021-08-01 12:40:27 -0700
committer: Bruce Hill <bruce@bruce-hill.com> 2021-08-01 12:40:27 -0700
commit: 994c9c973e0fd771699c3a5c76bee11f9b744c84 (patch)
tree: 8257c1abe2d18a50a46a1d712eadbd25fd7e4f8b
parent: cb9b4c40d87480bc794b90c2a36ed0f4c3240d8a (diff)
16 files changed, 113 insertions, 122 deletions
diff --git a/bp.c b/bp.c
index c9eb009..b909c2b 100644
--- a/bp.c
+++ b/bp.c
@@ -546,20 +546,9 @@ int main(int argc, char *argv[])
             defs = load_grammar(defs, f); // Keep in memory for debug output
         } else if (FLAG("-p")     || FLAG("--pattern")) {
             file_t *arg_file = spoof_file(&loaded_files, "<pattern argument>", flag, -1);
-            for (const char *str = arg_file->start; str < arg_file->end; ) {
-                def_t *d = bp_definition(defs, arg_file, str);
-                if (d) {
-                    defs = d;
-                    str = after_spaces(d->pat->end);
-                } else {
-                    pat_t *p = bp_pattern(arg_file, str);
-                    if (!p)
-                        file_err(arg_file, str, arg_file->end,
-                                 "Failed to compile this part of the argument");
-                    pattern = chain_together(arg_file, pattern, p);
-                    str = after_spaces(p->end);
-                }
-            }
+            pat_t *p = bp_pattern(arg_file, arg_file->start);
+            if (!p) file_err(arg_file, arg_file->start, arg_file->end, "Failed to compile this part of the argument");
+            pattern = chain_together(arg_file, pattern, p);
         } else if (FLAG("-w")     || FLAG("--word")) {
             check_nonnegative(asprintf(&flag, "\\|%s\\|", flag), "Could not allocate memory");
             file_t *arg_file = spoof_file(&loaded_files, "<word pattern>", flag, -1);
diff --git a/definitions.c b/definitions.c
index c85e227..8216e1d 100644
--- a/definitions.c
+++ b/definitions.c
@@ -33,26 +33,16 @@ def_t *with_def(def_t *defs, size_t namelen, const char *name, pat_t *pat)
 //
 def_t *load_grammar(def_t *defs, file_t *f)
 {
-    const char *src = f->start;
-    src = after_spaces(src);
-    while (src < f->end) {
-        const char *name = src;
-        src = after_name(name);
-        if (src <= name)
-            file_err(f, name, src, "Invalid name for definition: %s", name);
-        size_t namelen = (size_t)(src - name);
-        if (!matchchar(&src, ':'))
-            errx(EXIT_FAILURE, "Expected ':' in definition");
-        pat_t *pat = bp_pattern(f, src);
-        if (pat == NULL) break;
-        defs = with_def(defs, namelen, name, pat);
-        src = pat->end;
-        src = after_spaces(src);
-        if (matchchar(&src, ';'))
-            src = after_spaces(src);
+    const char *str = after_spaces(f->start);
+    while (*str == '\r' || *str == '\n') str = after_spaces(++str);
+    pat_t *pat = bp_pattern(f, str);
+    if (!pat) file_err(f, str, f->end, "Could not parse this file");
+    if (pat->end < f->end) file_err(f, pat->end, f->end, "Could not parse this part of the file");
+    for (pat_t *p = pat; p && p->type == BP_DEFINITION; p = p->args.def.pat) {
+        // printf("Def '%.*s': %.*s\n", (int)p->args.def.namelen, p->args.def.name,
+        //        (int)(p->args.def.def->end - p->args.def.def->start), p->args.def.def->start);
+        defs = with_def(defs, p->args.def.namelen, p->args.def.name, p->args.def.def);
     }
-    if (src < f->end)
-        file_err(f, src, NULL, "Invalid BP pattern");
     return defs;
 }
 
diff --git a/grammars/bp.bp b/grammars/bp.bp
index 3374f0c..2b0f4f9 100644
--- a/grammars/bp.bp
+++ b/grammars/bp.bp
@@ -12,13 +12,13 @@ Def: @name=id __ `: __ (
     / (!)(..%\n>(`;/id_`:/$) => "Invalid definition: @0"))
 
 # This is used for command line arguments:
-    String-pattern: ..%(\n / Nodent / Identifier-char / Identifier-start / Escape / `\ pat [`;])$$
+String-pattern: ..%(\n / Nodent / Identifier-char / Identifier-start / Escape / `\ pat [`;])$$
 
 pat: simple-pat !(__("!~"/"~")) / suffixed-pat
-simple-pat: Upto-and / Dot / Word-boundary/ String / Chars / Nodent
+simple-pat: (Upto-and / Dot / Word-boundary/ String / Chars / Nodent
     / Identifier-char / Identifier-start / Escape-range
     / Escape / Repeat / Optional / No / After / Before / Capture  / Error / Empty-replacement
-    / Start-of-File / Start-of-Line / End-of-File / End-of-Line / Ref / parens
+    / Start-of-File / Start-of-Line / End-of-File / End-of-Line / Ref / parens)
 
 suffixed-pat: (
       Match-pat
@@ -51,7 +51,7 @@ Nodent: "\N"
 Word-boundary: `| / "\b"
 Identifier-char: "\i"
 Identifier-start: "\I"
-Upto-and: ".." [__(`%/`=)__@second=simple-pat] [__@first=simple-pat] 
+Upto-and: ".." [__(`%/`=)__@second=simple-pat] [__@first=simple-pat]
 Repeat: (
         @min=(=>'0') (`*=>"-") @max=(=>'∞')
       / @min=int __ `- __ @max=int
diff --git a/grammars/c++.bp b/grammars/c++.bp
index 1814320..d6135b2 100644
--- a/grammars/c++.bp
+++ b/grammars/c++.bp
@@ -8,8 +8,7 @@
 
 comment: "//" .. $ / "/*" ..%\n "*/"
 string: `" ..%string-escape `"
-keyword:
-    "alignas" / "alignof" / "and" / "and_eq" / "asm" / "atomic_cancel" / "atomic_commit" /
+keyword: ("alignas" / "alignof" / "and" / "and_eq" / "asm" / "atomic_cancel" / "atomic_commit" /
     "atomic_noexcept" / "auto" / "bitand" / "bitor" / "bool" / "break" / "case" / "catch" /
     "char" / "char8_t" / "char16_t" / "char32_t" / "class" / "compl" / "concept" / "const" /
     "consteval" / "constexpr" / "constinit" / "const_cast" / "continue" / "co_await" /
@@ -21,7 +20,7 @@ keyword:
     "requires" / "return" / "short" / "signed" / "sizeof" / "static" / "static_assert" /
     "static_cast" / "struct" / "switch" / "synchronized" / "template" / "this" /
     "thread_local" / "throw" / "true" / "try" / "typedef" / "typeid" / "typename" / "union" /
-    "unsigned" / "using" / "virtual" / "void" / "volatile" / "wchar_t" / "while" / "xor" / "xor_eq"
+    "unsigned" / "using" / "virtual" / "void" / "volatile" / "wchar_t" / "while" / "xor" / "xor_eq")
 function-def: ^_ 2+(id / keyword / anglebraces / `*) % __ parens (__`; / >(__`{))
 function: function-def __ braces
 macro: ^"#define"| ..$ *(<`\ \n..$)
diff --git a/grammars/c.bp b/grammars/c.bp
index 03403eb..31e123a 100644
--- a/grammars/c.bp
+++ b/grammars/c.bp
@@ -9,12 +9,11 @@
 comment: "//" .. $ / "/*" ..%\n "*/"
 string: `" ..%string-escape `"
 string-escape: `\ (`x 2 Hex / 1-3 `0-7 / .)
-keyword:
-    "auto" / "break" / "case" / "char" / "const" / "continue" / "default" / "do" /
+keyword: ("auto" / "break" / "case" / "char" / "const" / "continue" / "default" / "do" /
     "double" / "else" / "enum" / "extern" / "float" / "for" / "goto" / "if" /
     "int" / "long" / "register" / "return" / "short" / "signed" / "sizeof" /
     "static" / "struct" / "switch" / "typedef" / "union" / "unsigned" / "void" /
-    "volatile" / "while"
+    "volatile" / "while")
 function-def: ^_ 2+(id / keyword / `*) % __ parens (__`; / >(__`{))
 function: function-def __ braces
 macro: ^"#define"| ..$ *(<`\ \n..$)
diff --git a/grammars/go.bp b/grammars/go.bp
index f31e21c..c6f41c1 100644
--- a/grammars/go.bp
+++ b/grammars/go.bp
@@ -8,10 +8,9 @@
 
 comment: "//" .. $ / "/*" ..%\n "*/"
 string: `" ..%string-escape `"
-keyword:
-    "break" / "default" / "func" / "interface" / "select" / "case" / "defer" / "go" /
+keyword: ("break" / "default" / "func" / "interface" / "select" / "case" / "defer" / "go" /
     "map" / "struct" / "chan" / "else" / "goto" / "package" / "switch" / "const" /
-    "fallthrough" / "if" / "range" / "type" / "continue" / "for" / "import" / "return" / "var"
+    "fallthrough" / "if" / "range" / "type" / "continue" / "for" / "import" / "return" / "var")
 function-def: |"func"| __ id __ parens __ [id / parens] >(__`{)
 function: function-def __ braces
 import: |"import"| __ (parens / string)
diff --git a/grammars/javascript.bp b/grammars/javascript.bp
index 438ddab..2691c62 100644
--- a/grammars/javascript.bp
+++ b/grammars/javascript.bp
@@ -8,8 +8,7 @@
 
 comment: "//" .. $ / "/*" ..%\n "*/"
 string: `" ..%string-escape `" / `' ..%string-escape `' / `/ ..%string-escape `/
-keyword:
-    "abstract" / "arguments" / "await" / "boolean" / "break" / "byte" / "case" /
+keyword: ("abstract" / "arguments" / "await" / "boolean" / "break" / "byte" / "case" /
     "catch" / "char" / "class" / "const" / "continue" / "debugger" / "default" /
     "delete" / "do" / "double" / "else" / "enum" / "eval" / "export" / "extends" /
     "false" / "final" / "finally" / "float" / "for" / "function" / "goto" / "if" /
@@ -17,7 +16,7 @@ keyword:
     "long" / "native" / "new" / "null" / "package" / "private" / "protected" /
     "public" / "return" / "short" / "static" / "super" / "switch" / "synchronized" /
     "this" / "throw" / "throws" / "transient" / "true" / "try" / "typeof" / "var" /
-    "void" / "volatile" / "while" / "with" / "yield"
+    "void" / "volatile" / "while" / "with" / "yield")
 function-def: |"function"| __ [id__] parens / (id / parens) __ "=>"
 function: function-def __ braces
 import: |"import"| ..%braces (`; / $)
diff --git a/grammars/lua.bp b/grammars/lua.bp
index 6005514..3d3b862 100644
--- a/grammars/lua.bp
+++ b/grammars/lua.bp
@@ -9,10 +9,9 @@
 comment: "--" (`[ @eqs=*`= `[ ..%\n (`]eqs`]) / ..$)
 string: `"..%string-escape `" / `' ..%string-escape `' / `[ @eqs=*`= `[ ..%\n (`]eqs`])
 table: `{ ..%(table/string/comment/\n) `}
-keyword:
- "and" / "break" / "do" / "else" / "elseif" / "end" / "false" / "for" /
- "function" / "goto" / "if" / "in" / "local" / "nil" / "not" / "or" /
- "repeat" / "return" / "then" / "true" / "until" / "while"
+keyword: ("and" / "break" / "do" / "else" / "elseif" / "end" / "false" / "for" /
+    "function" / "goto" / "if" / "in" / "local" / "nil" / "not" / "or" /
+    "repeat" / "return" / "then" / "true" / "until" / "while")
 function-def: |"function"|[_id (*(`.id)[`:id])]_ parens
 block: function / if-block / while-block / for-block / repeat-block / do-block
 repeat-block: |"repeat"| ..%(comment/string/\n) (|"until"|)
diff --git a/grammars/python.bp b/grammars/python.bp
index 51c3c44..ee749d0 100644
--- a/grammars/python.bp
+++ b/grammars/python.bp
@@ -8,11 +8,11 @@
 
 comment: `# ..$
 string: "'''" ..%\n "'''" / '"""' ..%\n '"""' / `" ..%string-escape `" / `' ..%string-escape `'
-keyword: "and" / "as" / "assert" / "break" / "class" / "continue" / "def" /
+keyword: ("and" / "as" / "assert" / "break" / "class" / "continue" / "def" /
     "del" / "elif" / "else" / "except" / "finally" / "for" / "from" /
     "global" / "if" / "import" / "in" / "is" / "lambda" / "None" / "nonlocal" /
     "not" / "or" / "pass" / "raise" / "return" / "try" / "while" /
-    "with" / "yield"
+    "with" / "yield")
 class: class-def +(\N ..$)
 class-def: ^_"class"|_id[_parens]_`:
 function: function-def +(\N ..$)
diff --git a/grammars/rust.bp b/grammars/rust.bp
index 4df6b53..9ac459b 100644
--- a/grammars/rust.bp
+++ b/grammars/rust.bp
@@ -8,11 +8,10 @@
 
 comment: "//" .. $ / "/*" ..%(comment / \n) "*/"
 string: `" ..%string-escape `"
-keyword:
-    "as" / "break" / "const" / "continue" / "crate" / "else" / "enum" / "extern" /
+keyword: ("as" / "break" / "const" / "continue" / "crate" / "else" / "enum" / "extern" /
     "false" / "fn" / "for" / "if" / "impl" / "in" / "let" / "loop" / "match" /
     "mod" / "move" / "mut" / "pub" / "ref" / "return" / "self" / "Self" / "static" /
-    "struct" / "super" / "trait" / "true" / "type" / "unsafe" / "use" / "where" / "while"
+    "struct" / "super" / "trait" / "true" / "type" / "unsafe" / "use" / "where" / "while")
 function-def: |"fn"| __ id __ parens __ ["->"__(id / parens)] >(__`{)
 function: function-def __ braces
 import: |"use"| _ *(id / braces) % "::" _ `;
diff --git a/grammars/shell.bp b/grammars/shell.bp
index 72b83cc..76cdcfa 100644
--- a/grammars/shell.bp
+++ b/grammars/shell.bp
@@ -10,10 +10,9 @@ comment: `#..$
 string: `" ..%(string-escape / subcommand / \n) `" / `' ..%\n `' / "<<" _ @delim=id _$ ..%\n (^delim$)
 string-escape: `\ `",`
 subcommand: `` ..%\n `` / "$" (parens/braces)
-keyword:
-    "echo" / "read" / "set" / "unset" / "readonly" / "shift" / "export" / "if" / "fi" /
+keyword: ("echo" / "read" / "set" / "unset" / "readonly" / "shift" / "export" / "if" / "fi" /
     "else" / "while" / "do" / "done" / "for" / "until" / "case" / "esac" / "break" /
-    "continue" / "exit" / "return" / "trap" / "wait" / "eval" / "exec" / "ulimit" / "umask"
+    "continue" / "exit" / "return" / "trap" / "wait" / "eval" / "exec" / "ulimit" / "umask")
 function-def: ^_ ["function"_] id _ `(_`) >(__`{)
 function: function-def __ braces
 var: `$ (id / braces)
diff --git a/match.c b/match.c
index b594c80..d092d8b 100644
--- a/match.c
+++ b/match.c
@@ -106,7 +106,7 @@ static match_t *cache_lookup(def_t *defs, const char *str, pat_t *pat)
     if (!cache.matches) return NULL;
     size_t h = hash(str, pat) & (cache.size-1);
     for (match_t *c = cache.matches[h]; c; c = c->cache.next) {
-        if (c->pat == pat && c->defs_id == defs->id && c->start == str)
+        if (c->pat == pat && c->defs_id == (defs?defs->id:0) && c->start == str)
             return c;
     }
     return NULL;
@@ -291,6 +291,12 @@ match_t *next_match(def_t *defs, file_t *f, match_t *prev, pat_t *pat, pat_t *sk
 static match_t *match(def_t *defs, file_t *f, const char *str, pat_t *pat, bool ignorecase)
 {
     switch (pat->type) {
+        case BP_DEFINITION: {
+            def_t *defs2 = with_def(defs, pat->args.def.namelen, pat->args.def.name, pat->args.def.def);
+            match_t *m = match(defs2, f, str, pat->args.def.pat ? pat->args.def.pat : pat->args.def.def, ignorecase);
+            defs = free_defs(defs2, defs);
+            return m;
+        }
         case BP_LEFTRECURSION: {
             // Left recursion occurs when a pattern directly or indirectly
             // invokes itself at the same position in the text. It's handled as
@@ -727,7 +733,7 @@ static match_t *new_match(def_t *defs, pat_t *pat, const char *start, const char
     m->pat = pat;
     m->start = start;
     m->end = end;
-    m->defs_id = defs->id;
+    m->defs_id = (defs?defs->id:0);
 
     if (children) {
         for (int i = 0; children[i]; i++)
diff --git a/pattern.c b/pattern.c
index 785b45a..48a45a4 100644
--- a/pattern.c
+++ b/pattern.c
@@ -8,18 +8,19 @@
 #include <string.h>
 #include <unistd.h>
 
-#include "definitions.h"
 #include "files.h"
 #include "pattern.h"
 #include "utils.h"
 #include "utf8.h"
 
 __attribute__((nonnull))
-static pat_t *expand_replacements(file_t *f, pat_t *replace_pat);
+static pat_t *bp_pattern_nl(file_t *f, const char *str, bool allow_nl);
 __attribute__((nonnull))
-static pat_t *expand_chain(file_t *f, pat_t *first);
+static pat_t *expand_replacements(file_t *f, pat_t *replace_pat, bool allow_nl);
 __attribute__((nonnull))
-static pat_t *expand_choices(file_t *f, pat_t *first);
+static pat_t *expand_chain(file_t *f, pat_t *first, bool allow_nl);
+__attribute__((nonnull))
+static pat_t *expand_choices(file_t *f, pat_t *first, bool allow_nl);
 __attribute__((nonnull))
 static pat_t *_bp_simplepattern(file_t *f, const char *str);
 __attribute__((nonnull(1,2,3,6)))
@@ -27,6 +28,8 @@ static pat_t *new_range(file_t *f, const char *start, const char *end, size_t mi
 __attribute__((nonnull(1,2)))
 static pat_t *bp_simplepattern(file_t *f, const char *str);
 
+#define SKIP_NL_SPACES(str) for (str = after_spaces(str); *str == '\n' || *str == '\r'; ) str = after_spaces(++str)
+
 //
 // Allocate a new pattern for this file (ensuring it will be automatically
 // freed when the file is freed)
@@ -68,11 +71,13 @@ static pat_t *new_range(file_t *f, const char *start, const char *end, size_t mi
 // Take a pattern and expand it into a chain of patterns if it's followed by
 // any patterns (e.g. "`x `y"), otherwise return the original input.
 //
-static pat_t *expand_chain(file_t *f, pat_t *first)
+static pat_t *expand_chain(file_t *f, pat_t *first, bool allow_nl)
 {
-    pat_t *second = bp_simplepattern(f, first->end);
+    const char *str = first->end;
+    if (allow_nl) SKIP_NL_SPACES(str);
+    pat_t *second = bp_simplepattern(f, str);
     if (second == NULL) return first;
-    second = expand_chain(f, second);
+    second = expand_chain(f, second, allow_nl);
     if (second->end <= first->end)
         file_err(f, second->end, second->end,
                  "This chain is not parsing properly");
@@ -82,10 +87,12 @@ static pat_t *expand_chain(file_t *f, pat_t *first)
 //
 // Match trailing => replacements (with optional pattern beforehand)
 //
-static pat_t *expand_replacements(file_t *f, pat_t *replace_pat)
+static pat_t *expand_replacements(file_t *f, pat_t *replace_pat, bool allow_nl)
 {
     const char *str = replace_pat->end;
+    if (allow_nl) SKIP_NL_SPACES(str);
     while (matchstr(&str, "=>")) {
+        if (allow_nl) SKIP_NL_SPACES(str);
         const char *repstr;
         size_t replen;
         if (matchchar(&str, '"') || matchchar(&str, '\'')) {
@@ -121,18 +128,24 @@ static pat_t *expand_replacements(file_t *f, pat_t *replace_pat)
 // chain of choices if it's followed by any "/"-separated patterns (e.g.
 // "`x/`y"), otherwise return the original input.
 //
-static pat_t *expand_choices(file_t *f, pat_t *first)
+static pat_t *expand_choices(file_t *f, pat_t *first, bool allow_nl)
 {
-    first = expand_chain(f, first);
-    first = expand_replacements(f, first);
+    first = expand_chain(f, first, allow_nl);
+    first = expand_replacements(f, first, allow_nl);
     const char *str = first->end;
+    if (allow_nl) SKIP_NL_SPACES(str);
     if (!matchchar(&str, '/')) return first;
+    if (allow_nl) SKIP_NL_SPACES(str);
     pat_t *second = bp_simplepattern(f, str);
+    if (second) {
+        str = second->end;
+        if (allow_nl) SKIP_NL_SPACES(str);
+    }
     if (matchstr(&str, "=>"))
-        second = expand_replacements(f, second ? second : new_pat(f, str-2, str-2, 0, 0, BP_STRING));
+        second = expand_replacements(f, second ? second : new_pat(f, str-2, str-2, 0, 0, BP_STRING), allow_nl);
     if (!second)
         file_err(f, str, str, "There should be a pattern here after a '/'");
-    second = expand_choices(f, second);
+    second = expand_choices(f, second, allow_nl);
     return either_pat(f, first, second);
 }
 
@@ -187,7 +200,7 @@ pat_t *either_pat(file_t *f, pat_t *first, pat_t *second)
 }
 
 //
-// Wrapper for _bp_simplepattern() that expands any postfix operators
+// Wrapper for _bp_simplepattern() that expands any postfix operators (~, !~)
 //
 static pat_t *bp_simplepattern(file_t *f, const char *str)
 {
@@ -416,27 +429,29 @@ static pat_t *_bp_simplepattern(file_t *f, const char *str)
             if (matchstr(&str, "!)")) { // (!) errors
                 pat_t *pat = bp_simplepattern(f, str);
                 if (!pat) pat = new_pat(f, str, str, 0, 0, BP_STRING);
-                pat = expand_replacements(f, pat);
+                pat = expand_replacements(f, pat, false);
                 pat_t *error = new_pat(f, start, pat->end, pat->min_matchlen, pat->max_matchlen, BP_ERROR);
                 error->args.pat = pat;
                 return error;
             }
 
-            pat_t *pat = bp_pattern(f, str);
+            pat_t *pat = bp_pattern_nl(f, str, true);
             if (!pat)
                 file_err(f, str, str, "There should be a valid pattern after this parenthesis.");
             str = pat->end;
-            (void)matchchar(&str, ')');
+            SKIP_NL_SPACES(str);
+            if (!matchchar(&str, ')')) file_err(f, str, str, "Missing paren: )");
             pat->start = start;
             pat->end = str;
             return pat;
         }
         // Square brackets
         case '[': {
-            pat_t *maybe = bp_pattern(f, str);
+            pat_t *maybe = bp_pattern_nl(f, str, true);
             if (!maybe)
                 file_err(f, str, str, "There should be a valid pattern after this square bracket.");
             str = maybe->end;
+            SKIP_NL_SPACES(str);
             (void)matchchar(&str, ']');
             return new_range(f, start, str, 0, 1, maybe, NULL);
         }
@@ -488,28 +503,30 @@ static pat_t *_bp_simplepattern(file_t *f, const char *str)
                 return new_pat(f, start, str, 0, 0, BP_END_OF_FILE);
             return new_pat(f, start, str, 0, 0, BP_END_OF_LINE);
         }
-        // Whitespace:
-        case '_': {
-            size_t namelen = 1;
-            if (matchchar(&str, '_')) // double __ (whitespace with newlines)
-                ++namelen;
-            if (matchchar(&str, ':')) return NULL; // Don't match definitions
-            pat_t *ref = new_pat(f, start, str, 0, -1, BP_REF);
-            ref->args.ref.name = start;
-            ref->args.ref.len = namelen;
-            return ref;
-        }
         default: {
             // Reference
-            if (!isalpha(c)) return NULL;
-            --str;
-            const char *refname = str;
-            str = after_name(str);
-            if (matchchar(&str, ':')) // Don't match definitions
-                return NULL;
+            if (!isalpha(c) && c != '_') return NULL;
+            str = after_name(start);
+            size_t namelen = (size_t)(str - start);
+            if (matchchar(&str, ':')) { // Definitions
+                pat_t *def = bp_pattern_nl(f, str, false);
+                if (!def) file_err(f, str, f->end, "Could not parse this definition.");
+                str = def->end;
+                (void)matchchar(&str, ';'); // Optional semicolon
+                SKIP_NL_SPACES(str);
+                pat_t *pat = bp_pattern_nl(f, str, false);
+                if (pat) str = pat->end;
+                else pat = def;
+                pat_t *ret = new_pat(f, start, str, pat->min_matchlen, pat->max_matchlen, BP_DEFINITION);
+                ret->args.def.name = start;
+                ret->args.def.namelen = namelen;
+                ret->args.def.def = def;
+                ret->args.def.pat = pat;
+                return ret;
+            }
             pat_t *ref = new_pat(f, start, str, 0, -1, BP_REF);
-            ref->args.ref.name = refname;
-            ref->args.ref.len = (size_t)(str - refname);
+            ref->args.ref.name = start;
+            ref->args.ref.len = namelen;
             return ref;
         }
     }
@@ -575,32 +592,23 @@ pat_t *bp_replacement(file_t *f, pat_t *replacepat, const char *replacement)
     return pat;
 }
 
-//
-// Compile a string representing a BP pattern into a pattern object.
-//
-pat_t *bp_pattern(file_t *f, const char *str)
+static pat_t *bp_pattern_nl(file_t *f, const char *str, bool allow_nl)
 {
+    SKIP_NL_SPACES(str);
     pat_t *pat = bp_simplepattern(f, str);
-    if (pat != NULL) pat = expand_choices(f, pat);
+    if (pat != NULL) pat = expand_choices(f, pat, allow_nl);
+    SKIP_NL_SPACES(str);
     if (matchstr(&str, "=>"))
-        pat = expand_replacements(f, pat ? pat : new_pat(f, str-2, str-2, 0, 0, BP_STRING));
+        pat = expand_replacements(f, pat ? pat : new_pat(f, str-2, str-2, 0, 0, BP_STRING), allow_nl);
     return pat;
 }
 
 //
-// Match a definition (id__`:__pattern)
+// Compile a string representing a BP pattern into a pattern object.
 //
-def_t *bp_definition(def_t *defs, file_t *f, const char *str)
+pat_t *bp_pattern(file_t *f, const char *str)
 {
-    const char *name = after_spaces(str);
-    str = after_name(name);
-    if (!str) return NULL;
-    size_t namelen = (size_t)(str - name);
-    if (!matchchar(&str, ':')) return NULL;
-    pat_t *defpat = bp_pattern(f, str);
-    if (!defpat) return NULL;
-    (void)matchchar(&defpat->end, ';'); // TODO: verify this is safe to mutate
-    return with_def(defs, namelen, name, defpat);
+    return bp_pattern_nl(f, str, false);
 }
 
 // vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1
diff --git a/pattern.h b/pattern.h
index e9bebf7..47d0c63 100644
--- a/pattern.h
+++ b/pattern.h
@@ -19,8 +19,6 @@ __attribute__((nonnull(1)))
 pat_t *either_pat(file_t *f, pat_t *first, pat_t *second);
 __attribute__((nonnull))
 pat_t *bp_pattern(file_t *f, const char *str);
-__attribute__((nonnull))
-def_t *bp_definition(def_t *defs, file_t *f, const char *str);
 
 #endif
 // vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1
diff --git a/types.h b/types.h
index bb491bc..495234f 100644
--- a/types.h
+++ b/types.h
@@ -37,8 +37,9 @@ enum pattype_e {
     BP_END_OF_FILE   = 22,
     BP_END_OF_LINE   = 23,
     BP_WORD_BOUNDARY = 24,
-    BP_LEFTRECURSION = 25,
-    BP_ERROR         = 26,
+    BP_DEFINITION    = 25,
+    BP_LEFTRECURSION = 26,
+    BP_ERROR         = 27,
 };
 
 struct match_s; // forward declared to resolve circular struct defs
@@ -60,6 +61,11 @@ typedef struct pat_s {
             size_t len;
         } ref;
         struct {
+            const char *name;
+            size_t namelen;
+            struct pat_s *def, *pat;
+        } def;
+        struct {
             unsigned char low, high;
         } range;
         struct {
diff --git a/utils.c b/utils.c
index 39ae4cc..98eb8b3 100644
--- a/utils.c
+++ b/utils.c
@@ -19,7 +19,8 @@ const char *after_spaces(const char *str)
     // Skip whitespace and comments:
   skip_whitespace:
     switch (*str) {
-        case ' ': case '\r': case '\n': case '\t': {
+        // case ' ': case '\r': case '\n': case '\t': {
+        case ' ': case '\t': {
             ++str;
             goto skip_whitespace;
         }
author	Bruce Hill <bruce@bruce-hill.com>	2021-08-01 12:40:27 -0700
committer	Bruce Hill <bruce@bruce-hill.com>	2021-08-01 12:40:27 -0700
commit	994c9c973e0fd771699c3a5c76bee11f9b744c84 (patch)
tree	8257c1abe2d18a50a46a1d712eadbd25fd7e4f8b
parent	cb9b4c40d87480bc794b90c2a36ed0f4c3240d8a (diff)