Changed "upto-and" syntax to ".."/"..."
This commit is contained in:
parent
1d1c3d35aa
commit
1570dd55e8
10
bpeg.bpeg
10
bpeg.bpeg
@ -7,14 +7,14 @@ Def = @[name]Ref __ `= __ @[definition]extended-pat;
|
||||
String-pattern = *(`\ pat ?`; / .);
|
||||
|
||||
pat = suffixed-pat / simple-pat;
|
||||
simple-pat = Empty / Dot / String / Char-range / Char / Escape-range / Escape / No / Anything-but
|
||||
/ Upto-and / Repeat / After / Before / Capture / Replace / Ref / parens;
|
||||
simple-pat = Empty / Upto / Dot / String / Char-range / Char / Escape-range / Escape / No / Anything-but
|
||||
/ Repeat / After / Before / Capture / Replace / Ref / parens;
|
||||
suffixed-pat = Eq-pat;
|
||||
|
||||
Eq-pat = @[first]simple-pat "==" @[second]pat;
|
||||
|
||||
Empty = `/ >(__ (`)/`}));
|
||||
Dot = `.;
|
||||
Dot = `. !`.;
|
||||
String = (
|
||||
`" @[s]*(Escape / ~`") `"
|
||||
/ `' @[s]*(Escape / ~`') `'
|
||||
@ -30,7 +30,7 @@ escape-sequence = (
|
||||
);
|
||||
No = `! _ @pat;
|
||||
Anything-but = `~ ?`~ _ @pat;
|
||||
Upto-and = `& ?`& _ @pat;
|
||||
Upto = 2-3`. ?>(_@pat);
|
||||
Repeat = (
|
||||
@[min]int _ `- _ @[max]int
|
||||
/{@[min]{=>"0"}=>} @[max]int _ `-
|
||||
@ -64,7 +64,7 @@ $ = !.;
|
||||
^^ = !<$.;
|
||||
^ = !<.;
|
||||
|
||||
hash-comment = `# *.;
|
||||
hash-comment = `# .. $;
|
||||
|
||||
# Note: comments are undefined by default in regular BPEG
|
||||
comment = hash-comment;
|
||||
|
2
bpeg.c
2
bpeg.c
@ -3,6 +3,7 @@
|
||||
*
|
||||
* Grammar:
|
||||
* # <comment> comment
|
||||
* .. any text up to the following pattern (if any); (multiline: ...)
|
||||
* . any character (multiline: $.)
|
||||
* ^ beginning of a line (^^: beginning of file)
|
||||
* $ end of a line ($$: end of file)
|
||||
@ -13,7 +14,6 @@
|
||||
* \<e1>-<e2> escape sequence range (e.g. \x00-\xF0)
|
||||
* ! <pat> no <pat>
|
||||
* ~ <pat> any character as long as it doesn't match <pat> (multiline: ~~<pat>)
|
||||
* & <pat> upto and including <pat> (aka *~<pat> <pat>) (multiline: &&<pat>)
|
||||
* <N=1> + <pat> [% <sep="">] <N> or more <pat>s (separated by <sep>)
|
||||
* * <pat> [% <sep="">] sugar for "0+ <pat> [% <sep>]"
|
||||
* <N=1> - <pat> [% <sep="">] <N> or fewer <pat>s (separated by <sep>)
|
||||
|
30
compiler.c
30
compiler.c
@ -97,10 +97,21 @@ vm_op_t *bpeg_simplepattern(const char *str)
|
||||
switch (c) {
|
||||
// Any char (dot) ($. is multiline anychar)
|
||||
case '.': {
|
||||
anychar:
|
||||
op->op = VM_ANYCHAR;
|
||||
op->len = 1;
|
||||
break;
|
||||
if (matchchar(&str, '.')) { // ".."
|
||||
if (matchchar(&str, '.')) // "..."
|
||||
op->multiline = 1;
|
||||
vm_op_t *till = bpeg_simplepattern(str);
|
||||
str = str; // Don't advance str, the following pattern will be re-matched.
|
||||
op->op = VM_UPTO;
|
||||
op->len = -1;
|
||||
op->args.pat = till;
|
||||
break;
|
||||
} else {
|
||||
anychar:
|
||||
op->op = VM_ANYCHAR;
|
||||
op->len = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
// Char literals
|
||||
case '`': {
|
||||
@ -184,17 +195,6 @@ vm_op_t *bpeg_simplepattern(const char *str)
|
||||
op->args.pat = p;
|
||||
break;
|
||||
}
|
||||
// Upto and including <pat>
|
||||
case '&': {
|
||||
if (matchchar(&str, '&')) op->multiline = 1;
|
||||
vm_op_t *p = bpeg_simplepattern(str);
|
||||
check(p, "Expected pattern after '&'\n");
|
||||
str = p->end;
|
||||
op->op = VM_UPTO_AND;
|
||||
op->len = -1;
|
||||
op->args.pat = p;
|
||||
break;
|
||||
}
|
||||
// Number of repetitions: <N>(-<N> / - / + / "")
|
||||
case '0': case '1': case '2': case '3': case '4': case '5':
|
||||
case '6': case '7': case '8': case '9': {
|
||||
|
@ -1,11 +1,11 @@
|
||||
# Meta-rules for acting on everything
|
||||
pattern = !(/); # Not defined by default
|
||||
replacement = {!(/)=>}; # Not defined by default
|
||||
replace-all = +&&@replacement &&$$;
|
||||
find-all = {&&>matching-line=>} +(matching-line/non-matching-line) ?{!<\n => "\n"};
|
||||
only-matches = +{&&@pattern=>'@1\n'};
|
||||
matching-line = +&@pattern *. $ ?\n;
|
||||
non-matching-line = {&&(\n/$$)=>};
|
||||
replace-all = +(...@replacement) ...;
|
||||
find-all = {... >matching-line =>} +(matching-line/non-matching-line) ?{!<\n => "\n"};
|
||||
only-matches = +{...@pattern=>'@1\n'};
|
||||
matching-line = +(..@pattern)..$ ?\n;
|
||||
non-matching-line = {..$=>};
|
||||
|
||||
# Helper definitions (commonly used)
|
||||
crlf = \r\n;
|
||||
@ -28,10 +28,10 @@ abc = `a-z;
|
||||
esc = \e; e = \e;
|
||||
tab = \t; t = \t;
|
||||
nl = \n; lf = \n; n = \n;
|
||||
c-block-comment = '/*' &&'*/';
|
||||
c-line-comment = '//' &$;
|
||||
c-block-comment = '/*' ... '*/';
|
||||
c-line-comment = '//' ..$;
|
||||
c-comment = c-line-comment / c-block-comment;
|
||||
hash-comment = `# &$;
|
||||
hash-comment = `# ..$;
|
||||
comment = !(/); # No default definition, can be overridden
|
||||
WS = ` /\t/\n/\r/comment;
|
||||
ws = ` /\t;
|
||||
|
2
types.h
2
types.h
@ -16,7 +16,7 @@ enum VMOpcode {
|
||||
VM_STRING,
|
||||
VM_RANGE,
|
||||
VM_NOT,
|
||||
VM_UPTO_AND,
|
||||
VM_UPTO,
|
||||
VM_REPEAT,
|
||||
VM_BEFORE,
|
||||
VM_AFTER,
|
||||
|
40
vm.c
40
vm.c
@ -20,7 +20,7 @@ static const char *opcode_names[] = {
|
||||
[VM_STRING] = "STRING",
|
||||
[VM_RANGE] = "RANGE",
|
||||
[VM_NOT] = "NOT",
|
||||
[VM_UPTO_AND] = "UPTO_AND",
|
||||
[VM_UPTO] = "UPTO",
|
||||
[VM_REPEAT] = "REPEAT",
|
||||
[VM_BEFORE] = "BEFORE",
|
||||
[VM_AFTER] = "AFTER",
|
||||
@ -130,24 +130,30 @@ static match_t *_match(grammar_t *g, const char *str, vm_op_t *op, recursive_ref
|
||||
m->end = str;
|
||||
return m;
|
||||
}
|
||||
case VM_UPTO_AND: {
|
||||
case VM_UPTO: {
|
||||
match_t *m = calloc(sizeof(match_t), 1);
|
||||
m->start = str;
|
||||
m->op = op;
|
||||
match_t *p = NULL;
|
||||
for (const char *prev = NULL; p == NULL && prev < str; ) {
|
||||
prev = str;
|
||||
p = _match(g, str, op->args.pat, rec);
|
||||
if (*str && (op->multiline || *str != '\n'))
|
||||
++str;
|
||||
if (op->args.pat) {
|
||||
for (const char *prev = NULL; prev < str; ) {
|
||||
prev = str;
|
||||
match_t *p = _match(g, str, op->args.pat, rec);
|
||||
if (p) {
|
||||
destroy_match(&p);
|
||||
break;
|
||||
}
|
||||
// This isn't in the for() structure because there needs to
|
||||
// be at least once chance to match the pattern, even if
|
||||
// we're at the end of the string already (e.g. "..$").
|
||||
if (*str && (op->multiline || *str != '\n')) ++str;
|
||||
}
|
||||
} else if (op->multiline) {
|
||||
while (*str) ++str;
|
||||
} else {
|
||||
while (*str && *str != '\n') ++str;
|
||||
}
|
||||
if (p) {
|
||||
m->end = p->end;
|
||||
m->child = p;
|
||||
return m;
|
||||
}
|
||||
destroy_match(&m);
|
||||
return NULL;
|
||||
m->end = str;
|
||||
return m;
|
||||
}
|
||||
case VM_REPEAT: {
|
||||
match_t *m = calloc(sizeof(match_t), 1);
|
||||
@ -374,8 +380,8 @@ void print_pattern(vm_op_t *op)
|
||||
fprintf(stderr, ")");
|
||||
break;
|
||||
}
|
||||
case VM_UPTO_AND: {
|
||||
fprintf(stderr, "text up to and including (");
|
||||
case VM_UPTO: {
|
||||
fprintf(stderr, "text up to (");
|
||||
print_pattern(op->args.pat);
|
||||
fprintf(stderr, ")");
|
||||
break;
|
||||
|
Loading…
Reference in New Issue
Block a user