Simplified `...` to `..%\n` and `$.` to `./\n`

author: Bruce Hill <bruce@bruce-hill.com> 2021-01-05 00:09:30 -0800
committer: Bruce Hill <bruce@bruce-hill.com> 2021-01-05 00:09:30 -0800
commit: 4350d996d1f4987ae83569acfdec2e25b996f599 (patch)
tree: 835ab3332ec593c748f4eabd7490266b2e22aa30
parent: 8d14bf01bc3c3cfbb3613487b59fe0bc4d6efba6 (diff)
8 files changed, 26 insertions, 40 deletions
diff --git a/README.md b/README.md
index 04bfba4..e3f41ed 100644
--- a/README.md
+++ b/README.md
@@ -33,10 +33,9 @@ Pattern            | Meaning
 -------------------|---------------------
 `pat1 pat2`        | `pat1` followed by `pat2`
 `pat1 / pat2`      | `pat1` if it matches, otherwise `pat2`
-`...pat`           | Any text up to and including `pat` (including newlines)
 `..pat`            | Any text up to and including `pat` (except newlines)
+`..pat1 % pat2`    | Any text up to and including `pat1` (except newlines), skipping over instances of `pat2`
 `.`                | Any single character (except newline)
-`$.`               | Any single character (including newline)
 `^^`               | The start of the input
 `^`                | The start of a line
 `$$`               | The end of the input
diff --git a/bp.1 b/bp.1
index 1dfe7e0..fb10a56 100644
--- a/bp.1
+++ b/bp.1
@@ -89,7 +89,7 @@ A series of ordered choices (if one pattern matches, the following patterns
 will not be attempted), pronounced \fI<pat1>\fB-or-\fI<pat2>\fR
 
 .B .
-\fBAny\fR character (multiline: $.)
+\fBAny\fR character (excluding newline)
 
 .B ^
 \fBStart-of-a-line\fR
@@ -150,7 +150,7 @@ The \fBescape-sequence-range-\fI<esc1>\fB-to-\fI<esc2>\fR
 \fB0+(\fI<sep><pat>\fB)\fR)
 
 .B .. \fI<pat>\fR
-Any text \fBup-to-and-including\fR \fI<pat>\fR (multiline: \fB...\fR)
+Any text \fBup-to-and-including\fR \fI<pat>\fR (excluding newline)
 
 .B .. \fI<pat>\fB % \fI<skip>\fR
 Any text \fBup-to-and-including\fR \fI<pat>\fR, but skipping over instances of \fI<skip>\fR.
diff --git a/compiler.c b/compiler.c
index 7ee5fbe..69b2f10 100644
--- a/compiler.c
+++ b/compiler.c
@@ -111,14 +111,10 @@ vm_op_t *bp_simplepattern(file_t *f, const char *str)
     const char *origin = str;
     ++str;
     switch (c) {
-        // Any char (dot) ($. is multiline anychar)
+        // Any char (dot)
         case '.': {
             if (*str == '.') { // ".."
                 ++str;
-                if (*str == '.') { // "..."
-                    ++str;
-                    op->multiline = 1;
-                }
                 vm_op_t *till = bp_simplepattern(f, str);
                 op->op = VM_UPTO_AND;
                 op->len = -1;
@@ -134,7 +130,6 @@ vm_op_t *bp_simplepattern(file_t *f, const char *str)
                 }
                 break;
             } else {
-              anychar:
                 op->op = VM_ANYCHAR;
                 op->len = 1;
                 break;
@@ -400,9 +395,6 @@ vm_op_t *bp_simplepattern(file_t *f, const char *str)
             if (matchchar(&str, c)) { // double __, ^^, $$
                 char tmp[3] = {c, c, '\0'};
                 op->args.s = strdup(tmp);
-            } else if (c == '$' && matchchar(&str, '.')) { // $. (multi-line anychar)
-                op->multiline = 1;
-                goto anychar;
             } else {
                 op->args.s = strndup(&c, 1);
             }
diff --git a/grammars/bpeg.bp b/grammars/bpeg.bp
index 007c209..8a223b2 100644
--- a/grammars/bpeg.bp
+++ b/grammars/bpeg.bp
@@ -1,13 +1,13 @@
 # This is a file defining the BP grammar using BP syntax
 
-Grammar: __ *(Def [__`;])%__ __ ($$ / @!=(... => "Could not parse this code"))
+Grammar: __ *(Def [__`;])%__ __ ($$ / @!=(..%\n => "Could not parse this code"))
 Def: @name=id _ `: __ (
       @definition=extended-pat
     / $$ @!=(''=>"No definition for rule")
-    / @!=(...>(`;/id_`:/$) => "Invalid definition: @0"))
+    / @!=(..>(`;/id_`:/$)%\n => "Invalid definition: @0"))
 
 # This is used for command line arguments:
-String-pattern: ... % (Nodent / Escape / `\ pat [`;])
+String-pattern: .. % (\n / Nodent / Escape / `\ pat [`;])
 
 pat: simple-pat !(__("!="/"=="/"=>")) / suffixed-pat
 simple-pat: Upto-and / Dot / String / Chars / Nodent / Escape-range
@@ -68,9 +68,9 @@ extended-pat: Otherwise / Chain / pat
 # Special-symbol rules:
 _:  *(`  / \t)
 __: *(`  / \t / \r / \n / comment)
-$$: !$.
+$$: !(./\n)
 $:  !.
-^^: !<$.
+^^: !<(./\n)
 ^:  !<.
 
 id: "^^" / "^" / "__" / "_" / "$$" / "$" / "|" / `a-z,A-Z *`a-z,A-Z,0-9,-
diff --git a/grammars/builtins.bp b/grammars/builtins.bp
index 9c34fe4..f68b2a7 100644
--- a/grammars/builtins.bp
+++ b/grammars/builtins.bp
@@ -11,7 +11,7 @@ pattern: !'' # Not defined by default
 replacement: !'' # Not defined by default
 replace-all: (
     (include-binary-files / is-text-file)
-    +(...(>pattern replacement)) ...
+    +(..(>pattern replacement)%\n) ..%\n
 )
 find-all: (
     (include-binary-files / is-text-file)
@@ -21,7 +21,7 @@ find-all: (
 )
 only-matches: (
     (include-binary-files / is-text-file)
-    +(...@pattern =>'@1\n')
+    +(..@pattern%\n =>'@1\n')
 )
 
 # Helper definitions (commonly used)
@@ -49,10 +49,10 @@ utf8-codepoint: (
 )
 crlf: \r\n
 cr: \r
-anglebraces: `<...`> % (anglebraces/string)
-brackets: `[...`] % (brackets/string)
-braces: `{...`} % (braces/string)
-parens: `(...`) % (parens/string)
+anglebraces: `<..`> % (\n/anglebraces/string)
+brackets: `[..`] % (\n/brackets/string)
+braces: `{..`} % (\n/braces/string)
+parens: `(..`) % (\n/parens/string)
 string: `"..`" % (`\.) / `'..`' % (`\.)
 id: !<`a-z,A-Z,_,0-9 `a-z,A-Z,_ *`a-z,A-Z,_,0-9
 id-char: `a-z,A-Z,_,0-9
@@ -70,16 +70,16 @@ abc: `a-z
 esc: \e
 tab: \t
 nl: \n; lf: \n
-c-block-comment: '/*' ... '*/'
-c-line-comment: '//' ..$
+c-block-comment: '/*'..'*/'%\n
+c-line-comment: '//'..$
 c-comment: c-line-comment / c-block-comment
 hash-comment: `# ..$
 comment: !''; # No default definition, can be overridden
 WS: ` /\t/\n/\r/comment
 ws: ` /\t
-$$: !$.
+$$: !(./\n)
 $:  !.
-^^: !<$.
+^^: !<(./\n)
 ^:  !<.
 __: *(` /\t/\n/\r/comment)
 _:  *(` /\t)
diff --git a/grammars/html.bp b/grammars/html.bp
index 9ec33b6..c820b4b 100644
--- a/grammars/html.bp
+++ b/grammars/html.bp
@@ -7,13 +7,13 @@ html-element: void-element / raw-element / template-element / normal-element
 
 void-element: `< ("area"/"base"/"br"/"col"/"embed"/"hr"/"img"/"input"/"link"/"meta"/"param"/"source"/"track"/"wbr") __attributes__ [`/] __ `>
 
-template-element: "<template>"...("</template>") % (comment / html-element)
+template-element: "<template>".."</template>" % (\n / comment / html-element)
 
-raw-element: `< @tag=("script"/"style"/"textarea"/"title") __attributes__ `>...("</"tag__`>)
+raw-element: `< @tag=("script"/"style"/"textarea"/"title") __attributes__ `>..("</"tag__`>)%\n
 
-normal-element: `< @tag=id __attributes__ `>...("</"tag`>) % (comment / html-element)
+normal-element: `< @tag=id __attributes__ `>..("</"tag`>) % (\n / comment / html-element)
 
-comment: "<!--" ..."-->"
+comment: "<!--" .."-->" % \n
 
 attributes: *attribute%__
 attribute: (+id%`:)__`=__ (id / `" ..`" / `' ..`')
diff --git a/types.h b/types.h
index 24b66a9..56c6e28 100644
--- a/types.h
+++ b/types.h
@@ -46,7 +46,6 @@ enum VMOpcode {
  */
 typedef struct vm_op_s {
     enum VMOpcode op;
-    unsigned int multiline:1, negate:1;
     const char *start, *end;
     // Length of the match, if constant, otherwise -1
     ssize_t len;
diff --git a/vm.c b/vm.c
index 9412b0c..32cabe4 100644
--- a/vm.c
+++ b/vm.c
@@ -106,7 +106,7 @@ static match_t *_match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, un
 {
     switch (op->op) {
         case VM_ANYCHAR: {
-            if (str >= f->end || (!op->multiline && *str == '\n'))
+            if (str >= f->end || *str == '\n')
                 return NULL;
             match_t *m = new(match_t);
             m->op = op;
@@ -152,11 +152,7 @@ static match_t *_match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, un
             m->start = str;
             m->op = op;
             if (!op->args.multiple.first && !op->args.multiple.second) {
-                if (op->multiline) {
-                    str = f->end;
-                } else {
-                    while (str < f->end && *str != '\n') ++str;
-                }
+                while (str < f->end && *str != '\n') ++str;
             } else {
                 match_t **dest = &m->child;
                 for (const char *prev = NULL; prev < str; ) {
@@ -181,7 +177,7 @@ static match_t *_match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, un
                     // This isn't in the for() structure because there needs to
                     // be at least once chance to match the pattern, even if
                     // we're at the end of the string already (e.g. "..$").
-                    if (str < f->end && (op->multiline || *str != '\n'))
+                    if (str < f->end && *str != '\n')
                         str = next_char(f, str);
                 }
                 destroy_match(&m);
author	Bruce Hill <bruce@bruce-hill.com>	2021-01-05 00:09:30 -0800
committer	Bruce Hill <bruce@bruce-hill.com>	2021-01-05 00:09:30 -0800
commit	4350d996d1f4987ae83569acfdec2e25b996f599 (patch)
tree	835ab3332ec593c748f4eabd7490266b2e22aa30
parent	8d14bf01bc3c3cfbb3613487b59fe0bc4d6efba6 (diff)