diff --git a/compiler.c b/compiler.c index 97b1737..f178ce6 100644 --- a/compiler.c +++ b/compiler.c @@ -162,6 +162,12 @@ vm_op_t *bpeg_simplepattern(file_t *f, const char *str) case '\\': { if (!*str || *str == '\n') file_err(f, str, str, "There should be an escape sequence here after this backslash."); + + if (matchchar(&str, 'N')) { // \N (nodent) + op->op = VM_NODENT; + break; + } + op->len = 1; unsigned char e = unescapechar(str, &str); if (*str == '-') { // Escape range (e.g. \x00-\xFF) @@ -397,7 +403,7 @@ vm_op_t *bpeg_simplepattern(file_t *f, const char *str) break; } // Special rules: - case '_': case '^': case '$': { + case '_': case '^': case '$': case '|': { if (matchchar(&str, c)) { // double __, ^^, $$ char tmp[3] = {c, c, '\0'}; op->args.s = strdup(tmp); @@ -415,10 +421,6 @@ vm_op_t *bpeg_simplepattern(file_t *f, const char *str) op->op = VM_REF; break; } - case '|': { - op->op = VM_NODENT; - break; - } default: { // Reference if (isalpha(c)) { @@ -488,6 +490,13 @@ vm_op_t *bpeg_stringpattern(file_t *f, const char *str) if (*str == '\\') { if (!str[1] || str[1] == '\n') file_err(f, str, str, "There should be an escape sequence or pattern here after this backslash."); + + if (matchchar(&str, 'N')) { // \N (nodent) + interp = calloc(sizeof(vm_op_t), 1); + interp->op = VM_NODENT; + break; + } + const char *after_escape; unsigned char e = unescapechar(&str[1], &after_escape); if (e != str[1]) { diff --git a/grammar.c b/grammar.c index 50f2306..d55865a 100644 --- a/grammar.c +++ b/grammar.c @@ -39,7 +39,7 @@ vm_op_t *load_grammar(grammar_t *g, file_t *f) while (*src) { const char *name = src; const char *name_end = after_name(name); - check(name_end > name, "Invalid name for definition"); + check(name_end > name, "Invalid name for definition: %s", name); name = strndup(name, (size_t)(name_end-name)); src = after_spaces(name_end); check(matchchar(&src, ':'), "Expected ':' in definition"); diff --git a/grammars/bpeg.bp b/grammars/bpeg.bp index 88d986a..ad98240 100644 --- a/grammars/bpeg.bp +++ b/grammars/bpeg.bp @@ -7,11 +7,12 @@ Def: @name=id _ `: __ ( / @!={...>(`;/id_`:/$) => "Invalid definition: @0"}) # This is used for command line arguments: -String-pattern: ... % (`\ (escape-sequence / pat [`;])) +String-pattern: ... % (Nodent / Escape / `\ pat [`;]) pat: simple-pat !(__("!="/"==")) / suffixed-pat -simple-pat: Upto-and / Dot / String / Char-range / Char / Escape-range / Escape / No - / Nodent / Repeat / Optional / After / Before / Capture / Replace / Ref / parens +simple-pat: Upto-and / Dot / String / Char-range / Char / Nodent / Escape-range + / Escape / Repeat / Optional / After / Before / Capture / Replace + / Ref / parens suffixed-pat: ( Eq-pat @@ -39,7 +40,7 @@ escape-sequence: ( / `x 2 (`0-9/`a-f/`A-F) ) No: `! (_@pat / @!={=>"Expected a pattern after the exclamation mark"}) -Nodent: `| +Nodent: `\ `N Upto-and: 2-3`. [_@first=simple-pat] [__`%__@second=simple-pat] Repeat: ( @min='' `* @max='' @@ -70,7 +71,7 @@ $: !. ^^: !<$. ^: !<. -id: "^^" / "^" / "__" / "_" / "$$" / "$" / (`a-z/`A-Z) *(`a-z/`A-Z/`0-9/`-) +id: "^^" / "^" / "__" / "_" / "$$" / "$" / "|" / (`a-z/`A-Z) *(`a-z/`A-Z/`0-9/`-) line-comment: `# .. $ block-comment: "#("..")#" % block-comment diff --git a/utils.c b/utils.c index 1452cd6..05eaec3 100644 --- a/utils.c +++ b/utils.c @@ -44,6 +44,7 @@ const char *after_spaces(const char *str) */ const char *after_name(const char *str) { + if (*str == '|') return &str[1]; if (*str == '^' || *str == '_' || *str == '$') { return (str[1] == *str) ? &str[2] : &str[1]; }