aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBruce Hill <bruce@bruce-hill.com>2021-08-01 12:40:27 -0700
committerBruce Hill <bruce@bruce-hill.com>2021-08-01 12:40:27 -0700
commit994c9c973e0fd771699c3a5c76bee11f9b744c84 (patch)
tree8257c1abe2d18a50a46a1d712eadbd25fd7e4f8b
parentcb9b4c40d87480bc794b90c2a36ed0f4c3240d8a (diff)
Changed how definitions work
-rw-r--r--bp.c17
-rw-r--r--definitions.c28
-rw-r--r--grammars/bp.bp8
-rw-r--r--grammars/c++.bp5
-rw-r--r--grammars/c.bp5
-rw-r--r--grammars/go.bp5
-rw-r--r--grammars/javascript.bp5
-rw-r--r--grammars/lua.bp7
-rw-r--r--grammars/python.bp4
-rw-r--r--grammars/rust.bp5
-rw-r--r--grammars/shell.bp5
-rw-r--r--match.c10
-rw-r--r--pattern.c116
-rw-r--r--pattern.h2
-rw-r--r--types.h10
-rw-r--r--utils.c3
16 files changed, 113 insertions, 122 deletions
diff --git a/bp.c b/bp.c
index c9eb009..b909c2b 100644
--- a/bp.c
+++ b/bp.c
@@ -546,20 +546,9 @@ int main(int argc, char *argv[])
defs = load_grammar(defs, f); // Keep in memory for debug output
} else if (FLAG("-p") || FLAG("--pattern")) {
file_t *arg_file = spoof_file(&loaded_files, "<pattern argument>", flag, -1);
- for (const char *str = arg_file->start; str < arg_file->end; ) {
- def_t *d = bp_definition(defs, arg_file, str);
- if (d) {
- defs = d;
- str = after_spaces(d->pat->end);
- } else {
- pat_t *p = bp_pattern(arg_file, str);
- if (!p)
- file_err(arg_file, str, arg_file->end,
- "Failed to compile this part of the argument");
- pattern = chain_together(arg_file, pattern, p);
- str = after_spaces(p->end);
- }
- }
+ pat_t *p = bp_pattern(arg_file, arg_file->start);
+ if (!p) file_err(arg_file, arg_file->start, arg_file->end, "Failed to compile this part of the argument");
+ pattern = chain_together(arg_file, pattern, p);
} else if (FLAG("-w") || FLAG("--word")) {
check_nonnegative(asprintf(&flag, "\\|%s\\|", flag), "Could not allocate memory");
file_t *arg_file = spoof_file(&loaded_files, "<word pattern>", flag, -1);
diff --git a/definitions.c b/definitions.c
index c85e227..8216e1d 100644
--- a/definitions.c
+++ b/definitions.c
@@ -33,26 +33,16 @@ def_t *with_def(def_t *defs, size_t namelen, const char *name, pat_t *pat)
//
def_t *load_grammar(def_t *defs, file_t *f)
{
- const char *src = f->start;
- src = after_spaces(src);
- while (src < f->end) {
- const char *name = src;
- src = after_name(name);
- if (src <= name)
- file_err(f, name, src, "Invalid name for definition: %s", name);
- size_t namelen = (size_t)(src - name);
- if (!matchchar(&src, ':'))
- errx(EXIT_FAILURE, "Expected ':' in definition");
- pat_t *pat = bp_pattern(f, src);
- if (pat == NULL) break;
- defs = with_def(defs, namelen, name, pat);
- src = pat->end;
- src = after_spaces(src);
- if (matchchar(&src, ';'))
- src = after_spaces(src);
+ const char *str = after_spaces(f->start);
+ while (*str == '\r' || *str == '\n') str = after_spaces(++str);
+ pat_t *pat = bp_pattern(f, str);
+ if (!pat) file_err(f, str, f->end, "Could not parse this file");
+ if (pat->end < f->end) file_err(f, pat->end, f->end, "Could not parse this part of the file");
+ for (pat_t *p = pat; p && p->type == BP_DEFINITION; p = p->args.def.pat) {
+ // printf("Def '%.*s': %.*s\n", (int)p->args.def.namelen, p->args.def.name,
+ // (int)(p->args.def.def->end - p->args.def.def->start), p->args.def.def->start);
+ defs = with_def(defs, p->args.def.namelen, p->args.def.name, p->args.def.def);
}
- if (src < f->end)
- file_err(f, src, NULL, "Invalid BP pattern");
return defs;
}
diff --git a/grammars/bp.bp b/grammars/bp.bp
index 3374f0c..2b0f4f9 100644
--- a/grammars/bp.bp
+++ b/grammars/bp.bp
@@ -12,13 +12,13 @@ Def: @name=id __ `: __ (
/ (!)(..%\n>(`;/id_`:/$) => "Invalid definition: @0"))
# This is used for command line arguments:
- String-pattern: ..%(\n / Nodent / Identifier-char / Identifier-start / Escape / `\ pat [`;])$$
+String-pattern: ..%(\n / Nodent / Identifier-char / Identifier-start / Escape / `\ pat [`;])$$
pat: simple-pat !(__("!~"/"~")) / suffixed-pat
-simple-pat: Upto-and / Dot / Word-boundary/ String / Chars / Nodent
+simple-pat: (Upto-and / Dot / Word-boundary/ String / Chars / Nodent
/ Identifier-char / Identifier-start / Escape-range
/ Escape / Repeat / Optional / No / After / Before / Capture / Error / Empty-replacement
- / Start-of-File / Start-of-Line / End-of-File / End-of-Line / Ref / parens
+ / Start-of-File / Start-of-Line / End-of-File / End-of-Line / Ref / parens)
suffixed-pat: (
Match-pat
@@ -51,7 +51,7 @@ Nodent: "\N"
Word-boundary: `| / "\b"
Identifier-char: "\i"
Identifier-start: "\I"
-Upto-and: ".." [__(`%/`=)__@second=simple-pat] [__@first=simple-pat]
+Upto-and: ".." [__(`%/`=)__@second=simple-pat] [__@first=simple-pat]
Repeat: (
@min=(=>'0') (`*=>"-") @max=(=>'∞')
/ @min=int __ `- __ @max=int
diff --git a/grammars/c++.bp b/grammars/c++.bp
index 1814320..d6135b2 100644
--- a/grammars/c++.bp
+++ b/grammars/c++.bp
@@ -8,8 +8,7 @@
comment: "//" .. $ / "/*" ..%\n "*/"
string: `" ..%string-escape `"
-keyword:
- "alignas" / "alignof" / "and" / "and_eq" / "asm" / "atomic_cancel" / "atomic_commit" /
+keyword: ("alignas" / "alignof" / "and" / "and_eq" / "asm" / "atomic_cancel" / "atomic_commit" /
"atomic_noexcept" / "auto" / "bitand" / "bitor" / "bool" / "break" / "case" / "catch" /
"char" / "char8_t" / "char16_t" / "char32_t" / "class" / "compl" / "concept" / "const" /
"consteval" / "constexpr" / "constinit" / "const_cast" / "continue" / "co_await" /
@@ -21,7 +20,7 @@ keyword:
"requires" / "return" / "short" / "signed" / "sizeof" / "static" / "static_assert" /
"static_cast" / "struct" / "switch" / "synchronized" / "template" / "this" /
"thread_local" / "throw" / "true" / "try" / "typedef" / "typeid" / "typename" / "union" /
- "unsigned" / "using" / "virtual" / "void" / "volatile" / "wchar_t" / "while" / "xor" / "xor_eq"
+ "unsigned" / "using" / "virtual" / "void" / "volatile" / "wchar_t" / "while" / "xor" / "xor_eq")
function-def: ^_ 2+(id / keyword / anglebraces / `*) % __ parens (__`; / >(__`{))
function: function-def __ braces
macro: ^"#define"| ..$ *(<`\ \n..$)
diff --git a/grammars/c.bp b/grammars/c.bp
index 03403eb..31e123a 100644
--- a/grammars/c.bp
+++ b/grammars/c.bp
@@ -9,12 +9,11 @@
comment: "//" .. $ / "/*" ..%\n "*/"
string: `" ..%string-escape `"
string-escape: `\ (`x 2 Hex / 1-3 `0-7 / .)
-keyword:
- "auto" / "break" / "case" / "char" / "const" / "continue" / "default" / "do" /
+keyword: ("auto" / "break" / "case" / "char" / "const" / "continue" / "default" / "do" /
"double" / "else" / "enum" / "extern" / "float" / "for" / "goto" / "if" /
"int" / "long" / "register" / "return" / "short" / "signed" / "sizeof" /
"static" / "struct" / "switch" / "typedef" / "union" / "unsigned" / "void" /
- "volatile" / "while"
+ "volatile" / "while")
function-def: ^_ 2+(id / keyword / `*) % __ parens (__`; / >(__`{))
function: function-def __ braces
macro: ^"#define"| ..$ *(<`\ \n..$)
diff --git a/grammars/go.bp b/grammars/go.bp
index f31e21c..c6f41c1 100644
--- a/grammars/go.bp
+++ b/grammars/go.bp
@@ -8,10 +8,9 @@
comment: "//" .. $ / "/*" ..%\n "*/"
string: `" ..%string-escape `"
-keyword:
- "break" / "default" / "func" / "interface" / "select" / "case" / "defer" / "go" /
+keyword: ("break" / "default" / "func" / "interface" / "select" / "case" / "defer" / "go" /
"map" / "struct" / "chan" / "else" / "goto" / "package" / "switch" / "const" /
- "fallthrough" / "if" / "range" / "type" / "continue" / "for" / "import" / "return" / "var"
+ "fallthrough" / "if" / "range" / "type" / "continue" / "for" / "import" / "return" / "var")
function-def: |"func"| __ id __ parens __ [id / parens] >(__`{)
function: function-def __ braces
import: |"import"| __ (parens / string)
diff --git a/grammars/javascript.bp b/grammars/javascript.bp
index 438ddab..2691c62 100644
--- a/grammars/javascript.bp
+++ b/grammars/javascript.bp
@@ -8,8 +8,7 @@
comment: "//" .. $ / "/*" ..%\n "*/"
string: `" ..%string-escape `" / `' ..%string-escape `' / `/ ..%string-escape `/
-keyword:
- "abstract" / "arguments" / "await" / "boolean" / "break" / "byte" / "case" /
+keyword: ("abstract" / "arguments" / "await" / "boolean" / "break" / "byte" / "case" /
"catch" / "char" / "class" / "const" / "continue" / "debugger" / "default" /
"delete" / "do" / "double" / "else" / "enum" / "eval" / "export" / "extends" /
"false" / "final" / "finally" / "float" / "for" / "function" / "goto" / "if" /
@@ -17,7 +16,7 @@ keyword:
"long" / "native" / "new" / "null" / "package" / "private" / "protected" /
"public" / "return" / "short" / "static" / "super" / "switch" / "synchronized" /
"this" / "throw" / "throws" / "transient" / "true" / "try" / "typeof" / "var" /
- "void" / "volatile" / "while" / "with" / "yield"
+ "void" / "volatile" / "while" / "with" / "yield")
function-def: |"function"| __ [id__] parens / (id / parens) __ "=>"
function: function-def __ braces
import: |"import"| ..%braces (`; / $)
diff --git a/grammars/lua.bp b/grammars/lua.bp
index 6005514..3d3b862 100644
--- a/grammars/lua.bp
+++ b/grammars/lua.bp
@@ -9,10 +9,9 @@
comment: "--" (`[ @eqs=*`= `[ ..%\n (`]eqs`]) / ..$)
string: `"..%string-escape `" / `' ..%string-escape `' / `[ @eqs=*`= `[ ..%\n (`]eqs`])
table: `{ ..%(table/string/comment/\n) `}
-keyword:
- "and" / "break" / "do" / "else" / "elseif" / "end" / "false" / "for" /
- "function" / "goto" / "if" / "in" / "local" / "nil" / "not" / "or" /
- "repeat" / "return" / "then" / "true" / "until" / "while"
+keyword: ("and" / "break" / "do" / "else" / "elseif" / "end" / "false" / "for" /
+ "function" / "goto" / "if" / "in" / "local" / "nil" / "not" / "or" /
+ "repeat" / "return" / "then" / "true" / "until" / "while")
function-def: |"function"|[_id (*(`.id)[`:id])]_ parens
block: function / if-block / while-block / for-block / repeat-block / do-block
repeat-block: |"repeat"| ..%(comment/string/\n) (|"until"|)
diff --git a/grammars/python.bp b/grammars/python.bp
index 51c3c44..ee749d0 100644
--- a/grammars/python.bp
+++ b/grammars/python.bp
@@ -8,11 +8,11 @@
comment: `# ..$
string: "'''" ..%\n "'''" / '"""' ..%\n '"""' / `" ..%string-escape `" / `' ..%string-escape `'
-keyword: "and" / "as" / "assert" / "break" / "class" / "continue" / "def" /
+keyword: ("and" / "as" / "assert" / "break" / "class" / "continue" / "def" /
"del" / "elif" / "else" / "except" / "finally" / "for" / "from" /
"global" / "if" / "import" / "in" / "is" / "lambda" / "None" / "nonlocal" /
"not" / "or" / "pass" / "raise" / "return" / "try" / "while" /
- "with" / "yield"
+ "with" / "yield")
class: class-def +(\N ..$)
class-def: ^_"class"|_id[_parens]_`:
function: function-def +(\N ..$)
diff --git a/grammars/rust.bp b/grammars/rust.bp
index 4df6b53..9ac459b 100644
--- a/grammars/rust.bp
+++ b/grammars/rust.bp
@@ -8,11 +8,10 @@
comment: "//" .. $ / "/*" ..%(comment / \n) "*/"
string: `" ..%string-escape `"
-keyword:
- "as" / "break" / "const" / "continue" / "crate" / "else" / "enum" / "extern" /
+keyword: ("as" / "break" / "const" / "continue" / "crate" / "else" / "enum" / "extern" /
"false" / "fn" / "for" / "if" / "impl" / "in" / "let" / "loop" / "match" /
"mod" / "move" / "mut" / "pub" / "ref" / "return" / "self" / "Self" / "static" /
- "struct" / "super" / "trait" / "true" / "type" / "unsafe" / "use" / "where" / "while"
+ "struct" / "super" / "trait" / "true" / "type" / "unsafe" / "use" / "where" / "while")
function-def: |"fn"| __ id __ parens __ ["->"__(id / parens)] >(__`{)
function: function-def __ braces
import: |"use"| _ *(id / braces) % "::" _ `;
diff --git a/grammars/shell.bp b/grammars/shell.bp
index 72b83cc..76cdcfa 100644
--- a/grammars/shell.bp
+++ b/grammars/shell.bp
@@ -10,10 +10,9 @@ comment: `#..$
string: `" ..%(string-escape / subcommand / \n) `" / `' ..%\n `' / "<<" _ @delim=id _$ ..%\n (^delim$)
string-escape: `\ `",`
subcommand: `` ..%\n `` / "$" (parens/braces)
-keyword:
- "echo" / "read" / "set" / "unset" / "readonly" / "shift" / "export" / "if" / "fi" /
+keyword: ("echo" / "read" / "set" / "unset" / "readonly" / "shift" / "export" / "if" / "fi" /
"else" / "while" / "do" / "done" / "for" / "until" / "case" / "esac" / "break" /
- "continue" / "exit" / "return" / "trap" / "wait" / "eval" / "exec" / "ulimit" / "umask"
+ "continue" / "exit" / "return" / "trap" / "wait" / "eval" / "exec" / "ulimit" / "umask")
function-def: ^_ ["function"_] id _ `(_`) >(__`{)
function: function-def __ braces
var: `$ (id / braces)
diff --git a/match.c b/match.c
index b594c80..d092d8b 100644
--- a/match.c
+++ b/match.c
@@ -106,7 +106,7 @@ static match_t *cache_lookup(def_t *defs, const char *str, pat_t *pat)
if (!cache.matches) return NULL;
size_t h = hash(str, pat) & (cache.size-1);
for (match_t *c = cache.matches[h]; c; c = c->cache.next) {
- if (c->pat == pat && c->defs_id == defs->id && c->start == str)
+ if (c->pat == pat && c->defs_id == (defs?defs->id:0) && c->start == str)
return c;
}
return NULL;
@@ -291,6 +291,12 @@ match_t *next_match(def_t *defs, file_t *f, match_t *prev, pat_t *pat, pat_t *sk
static match_t *match(def_t *defs, file_t *f, const char *str, pat_t *pat, bool ignorecase)
{
switch (pat->type) {
+ case BP_DEFINITION: {
+ def_t *defs2 = with_def(defs, pat->args.def.namelen, pat->args.def.name, pat->args.def.def);
+ match_t *m = match(defs2, f, str, pat->args.def.pat ? pat->args.def.pat : pat->args.def.def, ignorecase);
+ defs = free_defs(defs2, defs);
+ return m;
+ }
case BP_LEFTRECURSION: {
// Left recursion occurs when a pattern directly or indirectly
// invokes itself at the same position in the text. It's handled as
@@ -727,7 +733,7 @@ static match_t *new_match(def_t *defs, pat_t *pat, const char *start, const char
m->pat = pat;
m->start = start;
m->end = end;
- m->defs_id = defs->id;
+ m->defs_id = (defs?defs->id:0);
if (children) {
for (int i = 0; children[i]; i++)
diff --git a/pattern.c b/pattern.c
index 785b45a..48a45a4 100644
--- a/pattern.c
+++ b/pattern.c
@@ -8,18 +8,19 @@
#include <string.h>
#include <unistd.h>
-#include "definitions.h"
#include "files.h"
#include "pattern.h"
#include "utils.h"
#include "utf8.h"
__attribute__((nonnull))
-static pat_t *expand_replacements(file_t *f, pat_t *replace_pat);
+static pat_t *bp_pattern_nl(file_t *f, const char *str, bool allow_nl);
__attribute__((nonnull))
-static pat_t *expand_chain(file_t *f, pat_t *first);
+static pat_t *expand_replacements(file_t *f, pat_t *replace_pat, bool allow_nl);
__attribute__((nonnull))
-static pat_t *expand_choices(file_t *f, pat_t *first);
+static pat_t *expand_chain(file_t *f, pat_t *first, bool allow_nl);
+__attribute__((nonnull))
+static pat_t *expand_choices(file_t *f, pat_t *first, bool allow_nl);
__attribute__((nonnull))
static pat_t *_bp_simplepattern(file_t *f, const char *str);
__attribute__((nonnull(1,2,3,6)))
@@ -27,6 +28,8 @@ static pat_t *new_range(file_t *f, const char *start, const char *end, size_t mi
__attribute__((nonnull(1,2)))
static pat_t *bp_simplepattern(file_t *f, const char *str);
+#define SKIP_NL_SPACES(str) for (str = after_spaces(str); *str == '\n' || *str == '\r'; ) str = after_spaces(++str)
+
//
// Allocate a new pattern for this file (ensuring it will be automatically
// freed when the file is freed)
@@ -68,11 +71,13 @@ static pat_t *new_range(file_t *f, const char *start, const char *end, size_t mi
// Take a pattern and expand it into a chain of patterns if it's followed by
// any patterns (e.g. "`x `y"), otherwise return the original input.
//
-static pat_t *expand_chain(file_t *f, pat_t *first)
+static pat_t *expand_chain(file_t *f, pat_t *first, bool allow_nl)
{
- pat_t *second = bp_simplepattern(f, first->end);
+ const char *str = first->end;
+ if (allow_nl) SKIP_NL_SPACES(str);
+ pat_t *second = bp_simplepattern(f, str);
if (second == NULL) return first;
- second = expand_chain(f, second);
+ second = expand_chain(f, second, allow_nl);
if (second->end <= first->end)
file_err(f, second->end, second->end,
"This chain is not parsing properly");
@@ -82,10 +87,12 @@ static pat_t *expand_chain(file_t *f, pat_t *first)
//
// Match trailing => replacements (with optional pattern beforehand)
//
-static pat_t *expand_replacements(file_t *f, pat_t *replace_pat)
+static pat_t *expand_replacements(file_t *f, pat_t *replace_pat, bool allow_nl)
{
const char *str = replace_pat->end;
+ if (allow_nl) SKIP_NL_SPACES(str);
while (matchstr(&str, "=>")) {
+ if (allow_nl) SKIP_NL_SPACES(str);
const char *repstr;
size_t replen;
if (matchchar(&str, '"') || matchchar(&str, '\'')) {
@@ -121,18 +128,24 @@ static pat_t *expand_replacements(file_t *f, pat_t *replace_pat)
// chain of choices if it's followed by any "/"-separated patterns (e.g.
// "`x/`y"), otherwise return the original input.
//
-static pat_t *expand_choices(file_t *f, pat_t *first)
+static pat_t *expand_choices(file_t *f, pat_t *first, bool allow_nl)
{
- first = expand_chain(f, first);
- first = expand_replacements(f, first);
+ first = expand_chain(f, first, allow_nl);
+ first = expand_replacements(f, first, allow_nl);
const char *str = first->end;
+ if (allow_nl) SKIP_NL_SPACES(str);
if (!matchchar(&str, '/')) return first;
+ if (allow_nl) SKIP_NL_SPACES(str);
pat_t *second = bp_simplepattern(f, str);
+ if (second) {
+ str = second->end;
+ if (allow_nl) SKIP_NL_SPACES(str);
+ }
if (matchstr(&str, "=>"))
- second = expand_replacements(f, second ? second : new_pat(f, str-2, str-2, 0, 0, BP_STRING));
+ second = expand_replacements(f, second ? second : new_pat(f, str-2, str-2, 0, 0, BP_STRING), allow_nl);
if (!second)
file_err(f, str, str, "There should be a pattern here after a '/'");
- second = expand_choices(f, second);
+ second = expand_choices(f, second, allow_nl);
return either_pat(f, first, second);
}
@@ -187,7 +200,7 @@ pat_t *either_pat(file_t *f, pat_t *first, pat_t *second)
}
//
-// Wrapper for _bp_simplepattern() that expands any postfix operators
+// Wrapper for _bp_simplepattern() that expands any postfix operators (~, !~)
//
static pat_t *bp_simplepattern(file_t *f, const char *str)
{
@@ -416,27 +429,29 @@ static pat_t *_bp_simplepattern(file_t *f, const char *str)
if (matchstr(&str, "!)")) { // (!) errors
pat_t *pat = bp_simplepattern(f, str);
if (!pat) pat = new_pat(f, str, str, 0, 0, BP_STRING);
- pat = expand_replacements(f, pat);
+ pat = expand_replacements(f, pat, false);
pat_t *error = new_pat(f, start, pat->end, pat->min_matchlen, pat->max_matchlen, BP_ERROR);
error->args.pat = pat;
return error;
}
- pat_t *pat = bp_pattern(f, str);
+ pat_t *pat = bp_pattern_nl(f, str, true);
if (!pat)
file_err(f, str, str, "There should be a valid pattern after this parenthesis.");
str = pat->end;
- (void)matchchar(&str, ')');
+ SKIP_NL_SPACES(str);
+ if (!matchchar(&str, ')')) file_err(f, str, str, "Missing paren: )");
pat->start = start;
pat->end = str;
return pat;
}
// Square brackets
case '[': {
- pat_t *maybe = bp_pattern(f, str);
+ pat_t *maybe = bp_pattern_nl(f, str, true);
if (!maybe)
file_err(f, str, str, "There should be a valid pattern after this square bracket.");
str = maybe->end;
+ SKIP_NL_SPACES(str);
(void)matchchar(&str, ']');
return new_range(f, start, str, 0, 1, maybe, NULL);
}
@@ -488,28 +503,30 @@ static pat_t *_bp_simplepattern(file_t *f, const char *str)
return new_pat(f, start, str, 0, 0, BP_END_OF_FILE);
return new_pat(f, start, str, 0, 0, BP_END_OF_LINE);
}
- // Whitespace:
- case '_': {
- size_t namelen = 1;
- if (matchchar(&str, '_')) // double __ (whitespace with newlines)
- ++namelen;
- if (matchchar(&str, ':')) return NULL; // Don't match definitions
- pat_t *ref = new_pat(f, start, str, 0, -1, BP_REF);
- ref->args.ref.name = start;
- ref->args.ref.len = namelen;
- return ref;
- }
default: {
// Reference
- if (!isalpha(c)) return NULL;
- --str;
- const char *refname = str;
- str = after_name(str);
- if (matchchar(&str, ':')) // Don't match definitions
- return NULL;
+ if (!isalpha(c) && c != '_') return NULL;
+ str = after_name(start);
+ size_t namelen = (size_t)(str - start);
+ if (matchchar(&str, ':')) { // Definitions
+ pat_t *def = bp_pattern_nl(f, str, false);
+ if (!def) file_err(f, str, f->end, "Could not parse this definition.");
+ str = def->end;
+ (void)matchchar(&str, ';'); // Optional semicolon
+ SKIP_NL_SPACES(str);
+ pat_t *pat = bp_pattern_nl(f, str, false);
+ if (pat) str = pat->end;
+ else pat = def;
+ pat_t *ret = new_pat(f, start, str, pat->min_matchlen, pat->max_matchlen, BP_DEFINITION);
+ ret->args.def.name = start;
+ ret->args.def.namelen = namelen;
+ ret->args.def.def = def;
+ ret->args.def.pat = pat;
+ return ret;
+ }
pat_t *ref = new_pat(f, start, str, 0, -1, BP_REF);
- ref->args.ref.name = refname;
- ref->args.ref.len = (size_t)(str - refname);
+ ref->args.ref.name = start;
+ ref->args.ref.len = namelen;
return ref;
}
}
@@ -575,32 +592,23 @@ pat_t *bp_replacement(file_t *f, pat_t *replacepat, const char *replacement)
return pat;
}
-//
-// Compile a string representing a BP pattern into a pattern object.
-//
-pat_t *bp_pattern(file_t *f, const char *str)
+static pat_t *bp_pattern_nl(file_t *f, const char *str, bool allow_nl)
{
+ SKIP_NL_SPACES(str);
pat_t *pat = bp_simplepattern(f, str);
- if (pat != NULL) pat = expand_choices(f, pat);
+ if (pat != NULL) pat = expand_choices(f, pat, allow_nl);
+ SKIP_NL_SPACES(str);
if (matchstr(&str, "=>"))
- pat = expand_replacements(f, pat ? pat : new_pat(f, str-2, str-2, 0, 0, BP_STRING));
+ pat = expand_replacements(f, pat ? pat : new_pat(f, str-2, str-2, 0, 0, BP_STRING), allow_nl);
return pat;
}
//
-// Match a definition (id__`:__pattern)
+// Compile a string representing a BP pattern into a pattern object.
//
-def_t *bp_definition(def_t *defs, file_t *f, const char *str)
+pat_t *bp_pattern(file_t *f, const char *str)
{
- const char *name = after_spaces(str);
- str = after_name(name);
- if (!str) return NULL;
- size_t namelen = (size_t)(str - name);
- if (!matchchar(&str, ':')) return NULL;
- pat_t *defpat = bp_pattern(f, str);
- if (!defpat) return NULL;
- (void)matchchar(&defpat->end, ';'); // TODO: verify this is safe to mutate
- return with_def(defs, namelen, name, defpat);
+ return bp_pattern_nl(f, str, false);
}
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1
diff --git a/pattern.h b/pattern.h
index e9bebf7..47d0c63 100644
--- a/pattern.h
+++ b/pattern.h
@@ -19,8 +19,6 @@ __attribute__((nonnull(1)))
pat_t *either_pat(file_t *f, pat_t *first, pat_t *second);
__attribute__((nonnull))
pat_t *bp_pattern(file_t *f, const char *str);
-__attribute__((nonnull))
-def_t *bp_definition(def_t *defs, file_t *f, const char *str);
#endif
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1
diff --git a/types.h b/types.h
index bb491bc..495234f 100644
--- a/types.h
+++ b/types.h
@@ -37,8 +37,9 @@ enum pattype_e {
BP_END_OF_FILE = 22,
BP_END_OF_LINE = 23,
BP_WORD_BOUNDARY = 24,
- BP_LEFTRECURSION = 25,
- BP_ERROR = 26,
+ BP_DEFINITION = 25,
+ BP_LEFTRECURSION = 26,
+ BP_ERROR = 27,
};
struct match_s; // forward declared to resolve circular struct defs
@@ -60,6 +61,11 @@ typedef struct pat_s {
size_t len;
} ref;
struct {
+ const char *name;
+ size_t namelen;
+ struct pat_s *def, *pat;
+ } def;
+ struct {
unsigned char low, high;
} range;
struct {
diff --git a/utils.c b/utils.c
index 39ae4cc..98eb8b3 100644
--- a/utils.c
+++ b/utils.c
@@ -19,7 +19,8 @@ const char *after_spaces(const char *str)
// Skip whitespace and comments:
skip_whitespace:
switch (*str) {
- case ' ': case '\r': case '\n': case '\t': {
+ // case ' ': case '\r': case '\n': case '\t': {
+ case ' ': case '\t': {
++str;
goto skip_whitespace;
}