aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBruce Hill <bruce@bruce-hill.com>2021-01-05 00:09:30 -0800
committerBruce Hill <bruce@bruce-hill.com>2021-01-05 00:09:30 -0800
commit4350d996d1f4987ae83569acfdec2e25b996f599 (patch)
tree835ab3332ec593c748f4eabd7490266b2e22aa30
parent8d14bf01bc3c3cfbb3613487b59fe0bc4d6efba6 (diff)
Simplified `...` to `..%\n` and `$.` to `./\n`
-rw-r--r--README.md3
-rw-r--r--bp.14
-rw-r--r--compiler.c10
-rw-r--r--grammars/bpeg.bp10
-rw-r--r--grammars/builtins.bp20
-rw-r--r--grammars/html.bp8
-rw-r--r--types.h1
-rw-r--r--vm.c10
8 files changed, 26 insertions, 40 deletions
diff --git a/README.md b/README.md
index 04bfba4..e3f41ed 100644
--- a/README.md
+++ b/README.md
@@ -33,10 +33,9 @@ Pattern | Meaning
-------------------|---------------------
`pat1 pat2` | `pat1` followed by `pat2`
`pat1 / pat2` | `pat1` if it matches, otherwise `pat2`
-`...pat` | Any text up to and including `pat` (including newlines)
`..pat` | Any text up to and including `pat` (except newlines)
+`..pat1 % pat2` | Any text up to and including `pat1` (except newlines), skipping over instances of `pat2`
`.` | Any single character (except newline)
-`$.` | Any single character (including newline)
`^^` | The start of the input
`^` | The start of a line
`$$` | The end of the input
diff --git a/bp.1 b/bp.1
index 1dfe7e0..fb10a56 100644
--- a/bp.1
+++ b/bp.1
@@ -89,7 +89,7 @@ A series of ordered choices (if one pattern matches, the following patterns
will not be attempted), pronounced \fI<pat1>\fB-or-\fI<pat2>\fR
.B .
-\fBAny\fR character (multiline: $.)
+\fBAny\fR character (excluding newline)
.B ^
\fBStart-of-a-line\fR
@@ -150,7 +150,7 @@ The \fBescape-sequence-range-\fI<esc1>\fB-to-\fI<esc2>\fR
\fB0+(\fI<sep><pat>\fB)\fR)
.B .. \fI<pat>\fR
-Any text \fBup-to-and-including\fR \fI<pat>\fR (multiline: \fB...\fR)
+Any text \fBup-to-and-including\fR \fI<pat>\fR (excluding newline)
.B .. \fI<pat>\fB % \fI<skip>\fR
Any text \fBup-to-and-including\fR \fI<pat>\fR, but skipping over instances of \fI<skip>\fR.
diff --git a/compiler.c b/compiler.c
index 7ee5fbe..69b2f10 100644
--- a/compiler.c
+++ b/compiler.c
@@ -111,14 +111,10 @@ vm_op_t *bp_simplepattern(file_t *f, const char *str)
const char *origin = str;
++str;
switch (c) {
- // Any char (dot) ($. is multiline anychar)
+ // Any char (dot)
case '.': {
if (*str == '.') { // ".."
++str;
- if (*str == '.') { // "..."
- ++str;
- op->multiline = 1;
- }
vm_op_t *till = bp_simplepattern(f, str);
op->op = VM_UPTO_AND;
op->len = -1;
@@ -134,7 +130,6 @@ vm_op_t *bp_simplepattern(file_t *f, const char *str)
}
break;
} else {
- anychar:
op->op = VM_ANYCHAR;
op->len = 1;
break;
@@ -400,9 +395,6 @@ vm_op_t *bp_simplepattern(file_t *f, const char *str)
if (matchchar(&str, c)) { // double __, ^^, $$
char tmp[3] = {c, c, '\0'};
op->args.s = strdup(tmp);
- } else if (c == '$' && matchchar(&str, '.')) { // $. (multi-line anychar)
- op->multiline = 1;
- goto anychar;
} else {
op->args.s = strndup(&c, 1);
}
diff --git a/grammars/bpeg.bp b/grammars/bpeg.bp
index 007c209..8a223b2 100644
--- a/grammars/bpeg.bp
+++ b/grammars/bpeg.bp
@@ -1,13 +1,13 @@
# This is a file defining the BP grammar using BP syntax
-Grammar: __ *(Def [__`;])%__ __ ($$ / @!=(... => "Could not parse this code"))
+Grammar: __ *(Def [__`;])%__ __ ($$ / @!=(..%\n => "Could not parse this code"))
Def: @name=id _ `: __ (
@definition=extended-pat
/ $$ @!=(''=>"No definition for rule")
- / @!=(...>(`;/id_`:/$) => "Invalid definition: @0"))
+ / @!=(..>(`;/id_`:/$)%\n => "Invalid definition: @0"))
# This is used for command line arguments:
-String-pattern: ... % (Nodent / Escape / `\ pat [`;])
+String-pattern: .. % (\n / Nodent / Escape / `\ pat [`;])
pat: simple-pat !(__("!="/"=="/"=>")) / suffixed-pat
simple-pat: Upto-and / Dot / String / Chars / Nodent / Escape-range
@@ -68,9 +68,9 @@ extended-pat: Otherwise / Chain / pat
# Special-symbol rules:
_: *(` / \t)
__: *(` / \t / \r / \n / comment)
-$$: !$.
+$$: !(./\n)
$: !.
-^^: !<$.
+^^: !<(./\n)
^: !<.
id: "^^" / "^" / "__" / "_" / "$$" / "$" / "|" / `a-z,A-Z *`a-z,A-Z,0-9,-
diff --git a/grammars/builtins.bp b/grammars/builtins.bp
index 9c34fe4..f68b2a7 100644
--- a/grammars/builtins.bp
+++ b/grammars/builtins.bp
@@ -11,7 +11,7 @@ pattern: !'' # Not defined by default
replacement: !'' # Not defined by default
replace-all: (
(include-binary-files / is-text-file)
- +(...(>pattern replacement)) ...
+ +(..(>pattern replacement)%\n) ..%\n
)
find-all: (
(include-binary-files / is-text-file)
@@ -21,7 +21,7 @@ find-all: (
)
only-matches: (
(include-binary-files / is-text-file)
- +(...@pattern =>'@1\n')
+ +(..@pattern%\n =>'@1\n')
)
# Helper definitions (commonly used)
@@ -49,10 +49,10 @@ utf8-codepoint: (
)
crlf: \r\n
cr: \r
-anglebraces: `<...`> % (anglebraces/string)
-brackets: `[...`] % (brackets/string)
-braces: `{...`} % (braces/string)
-parens: `(...`) % (parens/string)
+anglebraces: `<..`> % (\n/anglebraces/string)
+brackets: `[..`] % (\n/brackets/string)
+braces: `{..`} % (\n/braces/string)
+parens: `(..`) % (\n/parens/string)
string: `"..`" % (`\.) / `'..`' % (`\.)
id: !<`a-z,A-Z,_,0-9 `a-z,A-Z,_ *`a-z,A-Z,_,0-9
id-char: `a-z,A-Z,_,0-9
@@ -70,16 +70,16 @@ abc: `a-z
esc: \e
tab: \t
nl: \n; lf: \n
-c-block-comment: '/*' ... '*/'
-c-line-comment: '//' ..$
+c-block-comment: '/*'..'*/'%\n
+c-line-comment: '//'..$
c-comment: c-line-comment / c-block-comment
hash-comment: `# ..$
comment: !''; # No default definition, can be overridden
WS: ` /\t/\n/\r/comment
ws: ` /\t
-$$: !$.
+$$: !(./\n)
$: !.
-^^: !<$.
+^^: !<(./\n)
^: !<.
__: *(` /\t/\n/\r/comment)
_: *(` /\t)
diff --git a/grammars/html.bp b/grammars/html.bp
index 9ec33b6..c820b4b 100644
--- a/grammars/html.bp
+++ b/grammars/html.bp
@@ -7,13 +7,13 @@ html-element: void-element / raw-element / template-element / normal-element
void-element: `< ("area"/"base"/"br"/"col"/"embed"/"hr"/"img"/"input"/"link"/"meta"/"param"/"source"/"track"/"wbr") __attributes__ [`/] __ `>
-template-element: "<template>"...("</template>") % (comment / html-element)
+template-element: "<template>".."</template>" % (\n / comment / html-element)
-raw-element: `< @tag=("script"/"style"/"textarea"/"title") __attributes__ `>...("</"tag__`>)
+raw-element: `< @tag=("script"/"style"/"textarea"/"title") __attributes__ `>..("</"tag__`>)%\n
-normal-element: `< @tag=id __attributes__ `>...("</"tag`>) % (comment / html-element)
+normal-element: `< @tag=id __attributes__ `>..("</"tag`>) % (\n / comment / html-element)
-comment: "<!--" ..."-->"
+comment: "<!--" .."-->" % \n
attributes: *attribute%__
attribute: (+id%`:)__`=__ (id / `" ..`" / `' ..`')
diff --git a/types.h b/types.h
index 24b66a9..56c6e28 100644
--- a/types.h
+++ b/types.h
@@ -46,7 +46,6 @@ enum VMOpcode {
*/
typedef struct vm_op_s {
enum VMOpcode op;
- unsigned int multiline:1, negate:1;
const char *start, *end;
// Length of the match, if constant, otherwise -1
ssize_t len;
diff --git a/vm.c b/vm.c
index 9412b0c..32cabe4 100644
--- a/vm.c
+++ b/vm.c
@@ -106,7 +106,7 @@ static match_t *_match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, un
{
switch (op->op) {
case VM_ANYCHAR: {
- if (str >= f->end || (!op->multiline && *str == '\n'))
+ if (str >= f->end || *str == '\n')
return NULL;
match_t *m = new(match_t);
m->op = op;
@@ -152,11 +152,7 @@ static match_t *_match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, un
m->start = str;
m->op = op;
if (!op->args.multiple.first && !op->args.multiple.second) {
- if (op->multiline) {
- str = f->end;
- } else {
- while (str < f->end && *str != '\n') ++str;
- }
+ while (str < f->end && *str != '\n') ++str;
} else {
match_t **dest = &m->child;
for (const char *prev = NULL; prev < str; ) {
@@ -181,7 +177,7 @@ static match_t *_match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, un
// This isn't in the for() structure because there needs to
// be at least once chance to match the pattern, even if
// we're at the end of the string already (e.g. "..$").
- if (str < f->end && (op->multiline || *str != '\n'))
+ if (str < f->end && *str != '\n')
str = next_char(f, str);
}
destroy_match(&m);