diff --git a/README.md b/README.md index 04bfba4..e3f41ed 100644 --- a/README.md +++ b/README.md @@ -33,10 +33,9 @@ Pattern | Meaning -------------------|--------------------- `pat1 pat2` | `pat1` followed by `pat2` `pat1 / pat2` | `pat1` if it matches, otherwise `pat2` -`...pat` | Any text up to and including `pat` (including newlines) `..pat` | Any text up to and including `pat` (except newlines) +`..pat1 % pat2` | Any text up to and including `pat1` (except newlines), skipping over instances of `pat2` `.` | Any single character (except newline) -`$.` | Any single character (including newline) `^^` | The start of the input `^` | The start of a line `$$` | The end of the input diff --git a/bp.1 b/bp.1 index 1dfe7e0..fb10a56 100644 --- a/bp.1 +++ b/bp.1 @@ -89,7 +89,7 @@ A series of ordered choices (if one pattern matches, the following patterns will not be attempted), pronounced \fI\fB-or-\fI\fR .B . -\fBAny\fR character (multiline: $.) +\fBAny\fR character (excluding newline) .B ^ \fBStart-of-a-line\fR @@ -150,7 +150,7 @@ The \fBescape-sequence-range-\fI\fB-to-\fI\fR \fB0+(\fI\fB)\fR) .B .. \fI\fR -Any text \fBup-to-and-including\fR \fI\fR (multiline: \fB...\fR) +Any text \fBup-to-and-including\fR \fI\fR (excluding newline) .B .. \fI\fB % \fI\fR Any text \fBup-to-and-including\fR \fI\fR, but skipping over instances of \fI\fR. diff --git a/compiler.c b/compiler.c index 7ee5fbe..69b2f10 100644 --- a/compiler.c +++ b/compiler.c @@ -111,14 +111,10 @@ vm_op_t *bp_simplepattern(file_t *f, const char *str) const char *origin = str; ++str; switch (c) { - // Any char (dot) ($. is multiline anychar) + // Any char (dot) case '.': { if (*str == '.') { // ".." ++str; - if (*str == '.') { // "..." - ++str; - op->multiline = 1; - } vm_op_t *till = bp_simplepattern(f, str); op->op = VM_UPTO_AND; op->len = -1; @@ -134,7 +130,6 @@ vm_op_t *bp_simplepattern(file_t *f, const char *str) } break; } else { - anychar: op->op = VM_ANYCHAR; op->len = 1; break; @@ -400,9 +395,6 @@ vm_op_t *bp_simplepattern(file_t *f, const char *str) if (matchchar(&str, c)) { // double __, ^^, $$ char tmp[3] = {c, c, '\0'}; op->args.s = strdup(tmp); - } else if (c == '$' && matchchar(&str, '.')) { // $. (multi-line anychar) - op->multiline = 1; - goto anychar; } else { op->args.s = strndup(&c, 1); } diff --git a/grammars/bpeg.bp b/grammars/bpeg.bp index 007c209..8a223b2 100644 --- a/grammars/bpeg.bp +++ b/grammars/bpeg.bp @@ -1,13 +1,13 @@ # This is a file defining the BP grammar using BP syntax -Grammar: __ *(Def [__`;])%__ __ ($$ / @!=(... => "Could not parse this code")) +Grammar: __ *(Def [__`;])%__ __ ($$ / @!=(..%\n => "Could not parse this code")) Def: @name=id _ `: __ ( @definition=extended-pat / $$ @!=(''=>"No definition for rule") - / @!=(...>(`;/id_`:/$) => "Invalid definition: @0")) + / @!=(..>(`;/id_`:/$)%\n => "Invalid definition: @0")) # This is used for command line arguments: -String-pattern: ... % (Nodent / Escape / `\ pat [`;]) +String-pattern: .. % (\n / Nodent / Escape / `\ pat [`;]) pat: simple-pat !(__("!="/"=="/"=>")) / suffixed-pat simple-pat: Upto-and / Dot / String / Chars / Nodent / Escape-range @@ -68,9 +68,9 @@ extended-pat: Otherwise / Chain / pat # Special-symbol rules: _: *(` / \t) __: *(` / \t / \r / \n / comment) -$$: !$. +$$: !(./\n) $: !. -^^: !<$. +^^: !<(./\n) ^: !<. id: "^^" / "^" / "__" / "_" / "$$" / "$" / "|" / `a-z,A-Z *`a-z,A-Z,0-9,- diff --git a/grammars/builtins.bp b/grammars/builtins.bp index 9c34fe4..f68b2a7 100644 --- a/grammars/builtins.bp +++ b/grammars/builtins.bp @@ -11,7 +11,7 @@ pattern: !'' # Not defined by default replacement: !'' # Not defined by default replace-all: ( (include-binary-files / is-text-file) - +(...(>pattern replacement)) ... + +(..(>pattern replacement)%\n) ..%\n ) find-all: ( (include-binary-files / is-text-file) @@ -21,7 +21,7 @@ find-all: ( ) only-matches: ( (include-binary-files / is-text-file) - +(...@pattern =>'@1\n') + +(..@pattern%\n =>'@1\n') ) # Helper definitions (commonly used) @@ -49,10 +49,10 @@ utf8-codepoint: ( ) crlf: \r\n cr: \r -anglebraces: `<...`> % (anglebraces/string) -brackets: `[...`] % (brackets/string) -braces: `{...`} % (braces/string) -parens: `(...`) % (parens/string) +anglebraces: `<..`> % (\n/anglebraces/string) +brackets: `[..`] % (\n/brackets/string) +braces: `{..`} % (\n/braces/string) +parens: `(..`) % (\n/parens/string) string: `"..`" % (`\.) / `'..`' % (`\.) id: !<`a-z,A-Z,_,0-9 `a-z,A-Z,_ *`a-z,A-Z,_,0-9 id-char: `a-z,A-Z,_,0-9 @@ -70,16 +70,16 @@ abc: `a-z esc: \e tab: \t nl: \n; lf: \n -c-block-comment: '/*' ... '*/' -c-line-comment: '//' ..$ +c-block-comment: '/*'..'*/'%\n +c-line-comment: '//'..$ c-comment: c-line-comment / c-block-comment hash-comment: `# ..$ comment: !''; # No default definition, can be overridden WS: ` /\t/\n/\r/comment ws: ` /\t -$$: !$. +$$: !(./\n) $: !. -^^: !<$. +^^: !<(./\n) ^: !<. __: *(` /\t/\n/\r/comment) _: *(` /\t) diff --git a/grammars/html.bp b/grammars/html.bp index 9ec33b6..c820b4b 100644 --- a/grammars/html.bp +++ b/grammars/html.bp @@ -7,13 +7,13 @@ html-element: void-element / raw-element / template-element / normal-element void-element: `< ("area"/"base"/"br"/"col"/"embed"/"hr"/"img"/"input"/"link"/"meta"/"param"/"source"/"track"/"wbr") __attributes__ [`/] __ `> -template-element: "") % (comment / html-element) +template-element: "" % (\n / comment / html-element) -raw-element: `< @tag=("script"/"style"/"textarea"/"title") __attributes__ `>...(") +raw-element: `< @tag=("script"/"style"/"textarea"/"title") __attributes__ `>..(")%\n -normal-element: `< @tag=id __attributes__ `>...(") % (comment / html-element) +normal-element: `< @tag=id __attributes__ `>..(") % (\n / comment / html-element) -comment: "" +comment: "" % \n attributes: *attribute%__ attribute: (+id%`:)__`=__ (id / `" ..`" / `' ..`') diff --git a/types.h b/types.h index 24b66a9..56c6e28 100644 --- a/types.h +++ b/types.h @@ -46,7 +46,6 @@ enum VMOpcode { */ typedef struct vm_op_s { enum VMOpcode op; - unsigned int multiline:1, negate:1; const char *start, *end; // Length of the match, if constant, otherwise -1 ssize_t len; diff --git a/vm.c b/vm.c index 9412b0c..32cabe4 100644 --- a/vm.c +++ b/vm.c @@ -106,7 +106,7 @@ static match_t *_match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, un { switch (op->op) { case VM_ANYCHAR: { - if (str >= f->end || (!op->multiline && *str == '\n')) + if (str >= f->end || *str == '\n') return NULL; match_t *m = new(match_t); m->op = op; @@ -152,11 +152,7 @@ static match_t *_match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, un m->start = str; m->op = op; if (!op->args.multiple.first && !op->args.multiple.second) { - if (op->multiline) { - str = f->end; - } else { - while (str < f->end && *str != '\n') ++str; - } + while (str < f->end && *str != '\n') ++str; } else { match_t **dest = &m->child; for (const char *prev = NULL; prev < str; ) { @@ -181,7 +177,7 @@ static match_t *_match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, un // This isn't in the for() structure because there needs to // be at least once chance to match the pattern, even if // we're at the end of the string already (e.g. "..$"). - if (str < f->end && (op->multiline || *str != '\n')) + if (str < f->end && *str != '\n') str = next_char(f, str); } destroy_match(&m);