Changed "upto-and" syntax to ".."/"..."

This commit is contained in:
Bruce Hill 2020-09-13 22:04:51 -07:00
parent 1d1c3d35aa
commit 1570dd55e8
6 changed files with 53 additions and 47 deletions

View File

@ -7,14 +7,14 @@ Def = @[name]Ref __ `= __ @[definition]extended-pat;
String-pattern = *(`\ pat ?`; / .);
pat = suffixed-pat / simple-pat;
simple-pat = Empty / Dot / String / Char-range / Char / Escape-range / Escape / No / Anything-but
/ Upto-and / Repeat / After / Before / Capture / Replace / Ref / parens;
simple-pat = Empty / Upto / Dot / String / Char-range / Char / Escape-range / Escape / No / Anything-but
/ Repeat / After / Before / Capture / Replace / Ref / parens;
suffixed-pat = Eq-pat;
Eq-pat = @[first]simple-pat "==" @[second]pat;
Empty = `/ >(__ (`)/`}));
Dot = `.;
Dot = `. !`.;
String = (
`" @[s]*(Escape / ~`") `"
/ `' @[s]*(Escape / ~`') `'
@ -30,7 +30,7 @@ escape-sequence = (
);
No = `! _ @pat;
Anything-but = `~ ?`~ _ @pat;
Upto-and = `& ?`& _ @pat;
Upto = 2-3`. ?>(_@pat);
Repeat = (
@[min]int _ `- _ @[max]int
/{@[min]{=>"0"}=>} @[max]int _ `-
@ -64,7 +64,7 @@ $ = !.;
^^ = !<$.;
^ = !<.;
hash-comment = `# *.;
hash-comment = `# .. $;
# Note: comments are undefined by default in regular BPEG
comment = hash-comment;

2
bpeg.c
View File

@ -3,6 +3,7 @@
*
* Grammar:
* # <comment> comment
* .. any text up to the following pattern (if any); (multiline: ...)
* . any character (multiline: $.)
* ^ beginning of a line (^^: beginning of file)
* $ end of a line ($$: end of file)
@ -13,7 +14,6 @@
* \<e1>-<e2> escape sequence range (e.g. \x00-\xF0)
* ! <pat> no <pat>
* ~ <pat> any character as long as it doesn't match <pat> (multiline: ~~<pat>)
* & <pat> upto and including <pat> (aka *~<pat> <pat>) (multiline: &&<pat>)
* <N=1> + <pat> [% <sep="">] <N> or more <pat>s (separated by <sep>)
* * <pat> [% <sep="">] sugar for "0+ <pat> [% <sep>]"
* <N=1> - <pat> [% <sep="">] <N> or fewer <pat>s (separated by <sep>)

View File

@ -97,10 +97,21 @@ vm_op_t *bpeg_simplepattern(const char *str)
switch (c) {
// Any char (dot) ($. is multiline anychar)
case '.': {
anychar:
op->op = VM_ANYCHAR;
op->len = 1;
break;
if (matchchar(&str, '.')) { // ".."
if (matchchar(&str, '.')) // "..."
op->multiline = 1;
vm_op_t *till = bpeg_simplepattern(str);
str = str; // Don't advance str, the following pattern will be re-matched.
op->op = VM_UPTO;
op->len = -1;
op->args.pat = till;
break;
} else {
anychar:
op->op = VM_ANYCHAR;
op->len = 1;
break;
}
}
// Char literals
case '`': {
@ -184,17 +195,6 @@ vm_op_t *bpeg_simplepattern(const char *str)
op->args.pat = p;
break;
}
// Upto and including <pat>
case '&': {
if (matchchar(&str, '&')) op->multiline = 1;
vm_op_t *p = bpeg_simplepattern(str);
check(p, "Expected pattern after '&'\n");
str = p->end;
op->op = VM_UPTO_AND;
op->len = -1;
op->args.pat = p;
break;
}
// Number of repetitions: <N>(-<N> / - / + / "")
case '0': case '1': case '2': case '3': case '4': case '5':
case '6': case '7': case '8': case '9': {

View File

@ -1,11 +1,11 @@
# Meta-rules for acting on everything
pattern = !(/); # Not defined by default
replacement = {!(/)=>}; # Not defined by default
replace-all = +&&@replacement &&$$;
find-all = {&&>matching-line=>} +(matching-line/non-matching-line) ?{!<\n => "\n"};
only-matches = +{&&@pattern=>'@1\n'};
matching-line = +&@pattern *. $ ?\n;
non-matching-line = {&&(\n/$$)=>};
replace-all = +(...@replacement) ...;
find-all = {... >matching-line =>} +(matching-line/non-matching-line) ?{!<\n => "\n"};
only-matches = +{...@pattern=>'@1\n'};
matching-line = +(..@pattern)..$ ?\n;
non-matching-line = {..$=>};
# Helper definitions (commonly used)
crlf = \r\n;
@ -28,10 +28,10 @@ abc = `a-z;
esc = \e; e = \e;
tab = \t; t = \t;
nl = \n; lf = \n; n = \n;
c-block-comment = '/*' &&'*/';
c-line-comment = '//' &$;
c-block-comment = '/*' ... '*/';
c-line-comment = '//' ..$;
c-comment = c-line-comment / c-block-comment;
hash-comment = `# &$;
hash-comment = `# ..$;
comment = !(/); # No default definition, can be overridden
WS = ` /\t/\n/\r/comment;
ws = ` /\t;

View File

@ -16,7 +16,7 @@ enum VMOpcode {
VM_STRING,
VM_RANGE,
VM_NOT,
VM_UPTO_AND,
VM_UPTO,
VM_REPEAT,
VM_BEFORE,
VM_AFTER,

40
vm.c
View File

@ -20,7 +20,7 @@ static const char *opcode_names[] = {
[VM_STRING] = "STRING",
[VM_RANGE] = "RANGE",
[VM_NOT] = "NOT",
[VM_UPTO_AND] = "UPTO_AND",
[VM_UPTO] = "UPTO",
[VM_REPEAT] = "REPEAT",
[VM_BEFORE] = "BEFORE",
[VM_AFTER] = "AFTER",
@ -130,24 +130,30 @@ static match_t *_match(grammar_t *g, const char *str, vm_op_t *op, recursive_ref
m->end = str;
return m;
}
case VM_UPTO_AND: {
case VM_UPTO: {
match_t *m = calloc(sizeof(match_t), 1);
m->start = str;
m->op = op;
match_t *p = NULL;
for (const char *prev = NULL; p == NULL && prev < str; ) {
prev = str;
p = _match(g, str, op->args.pat, rec);
if (*str && (op->multiline || *str != '\n'))
++str;
if (op->args.pat) {
for (const char *prev = NULL; prev < str; ) {
prev = str;
match_t *p = _match(g, str, op->args.pat, rec);
if (p) {
destroy_match(&p);
break;
}
// This isn't in the for() structure because there needs to
// be at least once chance to match the pattern, even if
// we're at the end of the string already (e.g. "..$").
if (*str && (op->multiline || *str != '\n')) ++str;
}
} else if (op->multiline) {
while (*str) ++str;
} else {
while (*str && *str != '\n') ++str;
}
if (p) {
m->end = p->end;
m->child = p;
return m;
}
destroy_match(&m);
return NULL;
m->end = str;
return m;
}
case VM_REPEAT: {
match_t *m = calloc(sizeof(match_t), 1);
@ -374,8 +380,8 @@ void print_pattern(vm_op_t *op)
fprintf(stderr, ")");
break;
}
case VM_UPTO_AND: {
fprintf(stderr, "text up to and including (");
case VM_UPTO: {
fprintf(stderr, "text up to (");
print_pattern(op->args.pat);
fprintf(stderr, ")");
break;