aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--bpeg.bpeg10
-rw-r--r--bpeg.c2
-rw-r--r--compiler.c30
-rw-r--r--grammars/builtins.bpeg16
-rw-r--r--types.h2
-rw-r--r--vm.c40
6 files changed, 53 insertions, 47 deletions
diff --git a/bpeg.bpeg b/bpeg.bpeg
index 3301155..39e0f3f 100644
--- a/bpeg.bpeg
+++ b/bpeg.bpeg
@@ -7,14 +7,14 @@ Def = @[name]Ref __ `= __ @[definition]extended-pat;
String-pattern = *(`\ pat ?`; / .);
pat = suffixed-pat / simple-pat;
-simple-pat = Empty / Dot / String / Char-range / Char / Escape-range / Escape / No / Anything-but
- / Upto-and / Repeat / After / Before / Capture / Replace / Ref / parens;
+simple-pat = Empty / Upto / Dot / String / Char-range / Char / Escape-range / Escape / No / Anything-but
+ / Repeat / After / Before / Capture / Replace / Ref / parens;
suffixed-pat = Eq-pat;
Eq-pat = @[first]simple-pat "==" @[second]pat;
Empty = `/ >(__ (`)/`}));
-Dot = `.;
+Dot = `. !`.;
String = (
`" @[s]*(Escape / ~`") `"
/ `' @[s]*(Escape / ~`') `'
@@ -30,7 +30,7 @@ escape-sequence = (
);
No = `! _ @pat;
Anything-but = `~ ?`~ _ @pat;
-Upto-and = `& ?`& _ @pat;
+Upto = 2-3`. ?>(_@pat);
Repeat = (
@[min]int _ `- _ @[max]int
/{@[min]{=>"0"}=>} @[max]int _ `-
@@ -64,7 +64,7 @@ $ = !.;
^^ = !<$.;
^ = !<.;
-hash-comment = `# *.;
+hash-comment = `# .. $;
# Note: comments are undefined by default in regular BPEG
comment = hash-comment;
diff --git a/bpeg.c b/bpeg.c
index 89f35e7..2245ff8 100644
--- a/bpeg.c
+++ b/bpeg.c
@@ -3,6 +3,7 @@
*
* Grammar:
* # <comment> comment
+ * .. any text up to the following pattern (if any); (multiline: ...)
* . any character (multiline: $.)
* ^ beginning of a line (^^: beginning of file)
* $ end of a line ($$: end of file)
@@ -13,7 +14,6 @@
* \<e1>-<e2> escape sequence range (e.g. \x00-\xF0)
* ! <pat> no <pat>
* ~ <pat> any character as long as it doesn't match <pat> (multiline: ~~<pat>)
- * & <pat> upto and including <pat> (aka *~<pat> <pat>) (multiline: &&<pat>)
* <N=1> + <pat> [% <sep="">] <N> or more <pat>s (separated by <sep>)
* * <pat> [% <sep="">] sugar for "0+ <pat> [% <sep>]"
* <N=1> - <pat> [% <sep="">] <N> or fewer <pat>s (separated by <sep>)
diff --git a/compiler.c b/compiler.c
index 4e2b185..df34e44 100644
--- a/compiler.c
+++ b/compiler.c
@@ -97,10 +97,21 @@ vm_op_t *bpeg_simplepattern(const char *str)
switch (c) {
// Any char (dot) ($. is multiline anychar)
case '.': {
- anychar:
- op->op = VM_ANYCHAR;
- op->len = 1;
- break;
+ if (matchchar(&str, '.')) { // ".."
+ if (matchchar(&str, '.')) // "..."
+ op->multiline = 1;
+ vm_op_t *till = bpeg_simplepattern(str);
+ str = str; // Don't advance str, the following pattern will be re-matched.
+ op->op = VM_UPTO;
+ op->len = -1;
+ op->args.pat = till;
+ break;
+ } else {
+ anychar:
+ op->op = VM_ANYCHAR;
+ op->len = 1;
+ break;
+ }
}
// Char literals
case '`': {
@@ -184,17 +195,6 @@ vm_op_t *bpeg_simplepattern(const char *str)
op->args.pat = p;
break;
}
- // Upto and including <pat>
- case '&': {
- if (matchchar(&str, '&')) op->multiline = 1;
- vm_op_t *p = bpeg_simplepattern(str);
- check(p, "Expected pattern after '&'\n");
- str = p->end;
- op->op = VM_UPTO_AND;
- op->len = -1;
- op->args.pat = p;
- break;
- }
// Number of repetitions: <N>(-<N> / - / + / "")
case '0': case '1': case '2': case '3': case '4': case '5':
case '6': case '7': case '8': case '9': {
diff --git a/grammars/builtins.bpeg b/grammars/builtins.bpeg
index f92e0aa..c558c21 100644
--- a/grammars/builtins.bpeg
+++ b/grammars/builtins.bpeg
@@ -1,11 +1,11 @@
# Meta-rules for acting on everything
pattern = !(/); # Not defined by default
replacement = {!(/)=>}; # Not defined by default
-replace-all = +&&@replacement &&$$;
-find-all = {&&>matching-line=>} +(matching-line/non-matching-line) ?{!<\n => "\n"};
-only-matches = +{&&@pattern=>'@1\n'};
-matching-line = +&@pattern *. $ ?\n;
-non-matching-line = {&&(\n/$$)=>};
+replace-all = +(...@replacement) ...;
+find-all = {... >matching-line =>} +(matching-line/non-matching-line) ?{!<\n => "\n"};
+only-matches = +{...@pattern=>'@1\n'};
+matching-line = +(..@pattern)..$ ?\n;
+non-matching-line = {..$=>};
# Helper definitions (commonly used)
crlf = \r\n;
@@ -28,10 +28,10 @@ abc = `a-z;
esc = \e; e = \e;
tab = \t; t = \t;
nl = \n; lf = \n; n = \n;
-c-block-comment = '/*' &&'*/';
-c-line-comment = '//' &$;
+c-block-comment = '/*' ... '*/';
+c-line-comment = '//' ..$;
c-comment = c-line-comment / c-block-comment;
-hash-comment = `# &$;
+hash-comment = `# ..$;
comment = !(/); # No default definition, can be overridden
WS = ` /\t/\n/\r/comment;
ws = ` /\t;
diff --git a/types.h b/types.h
index f335285..6346342 100644
--- a/types.h
+++ b/types.h
@@ -16,7 +16,7 @@ enum VMOpcode {
VM_STRING,
VM_RANGE,
VM_NOT,
- VM_UPTO_AND,
+ VM_UPTO,
VM_REPEAT,
VM_BEFORE,
VM_AFTER,
diff --git a/vm.c b/vm.c
index 8e0957d..b69b3cb 100644
--- a/vm.c
+++ b/vm.c
@@ -20,7 +20,7 @@ static const char *opcode_names[] = {
[VM_STRING] = "STRING",
[VM_RANGE] = "RANGE",
[VM_NOT] = "NOT",
- [VM_UPTO_AND] = "UPTO_AND",
+ [VM_UPTO] = "UPTO",
[VM_REPEAT] = "REPEAT",
[VM_BEFORE] = "BEFORE",
[VM_AFTER] = "AFTER",
@@ -130,24 +130,30 @@ static match_t *_match(grammar_t *g, const char *str, vm_op_t *op, recursive_ref
m->end = str;
return m;
}
- case VM_UPTO_AND: {
+ case VM_UPTO: {
match_t *m = calloc(sizeof(match_t), 1);
m->start = str;
m->op = op;
- match_t *p = NULL;
- for (const char *prev = NULL; p == NULL && prev < str; ) {
- prev = str;
- p = _match(g, str, op->args.pat, rec);
- if (*str && (op->multiline || *str != '\n'))
- ++str;
- }
- if (p) {
- m->end = p->end;
- m->child = p;
- return m;
+ if (op->args.pat) {
+ for (const char *prev = NULL; prev < str; ) {
+ prev = str;
+ match_t *p = _match(g, str, op->args.pat, rec);
+ if (p) {
+ destroy_match(&p);
+ break;
+ }
+ // This isn't in the for() structure because there needs to
+ // be at least once chance to match the pattern, even if
+ // we're at the end of the string already (e.g. "..$").
+ if (*str && (op->multiline || *str != '\n')) ++str;
+ }
+ } else if (op->multiline) {
+ while (*str) ++str;
+ } else {
+ while (*str && *str != '\n') ++str;
}
- destroy_match(&m);
- return NULL;
+ m->end = str;
+ return m;
}
case VM_REPEAT: {
match_t *m = calloc(sizeof(match_t), 1);
@@ -374,8 +380,8 @@ void print_pattern(vm_op_t *op)
fprintf(stderr, ")");
break;
}
- case VM_UPTO_AND: {
- fprintf(stderr, "text up to and including (");
+ case VM_UPTO: {
+ fprintf(stderr, "text up to (");
print_pattern(op->args.pat);
fprintf(stderr, ")");
break;