Added nodent support (|)
This commit is contained in:
parent
9f2d5464d6
commit
a82164505e
@ -8,7 +8,7 @@ String-pattern = *(`\ pat ?`; / .);
|
||||
|
||||
pat = suffixed-pat / simple-pat;
|
||||
simple-pat = Empty / Upto / Dot / String / Char-range / Char / Escape-range / Escape / No
|
||||
/ Repeat / After / Before / Capture / Replace / Ref / parens;
|
||||
/ Nodent / Repeat / After / Before / Capture / Replace / Ref / parens;
|
||||
suffixed-pat = Eq-pat;
|
||||
|
||||
Eq-pat = @[first]simple-pat "==" @[second]pat;
|
||||
@ -29,6 +29,7 @@ escape-sequence = (
|
||||
/`a/`b/`e/`n/`r/`t/`v / . / \n
|
||||
);
|
||||
No = `! _ @pat;
|
||||
Nodent = `|;
|
||||
Upto = 2-3`. ?>(_@pat);
|
||||
Repeat = (
|
||||
@[min]int _ `- _ @[max]int
|
||||
|
15
compiler.c
15
compiler.c
@ -211,6 +211,7 @@ vm_op_t *bpeg_simplepattern(const char *str)
|
||||
str = sep->end;
|
||||
set_range(op, min, max, pat, sep);
|
||||
} else {
|
||||
str = pat->end;
|
||||
set_range(op, min, max, pat, NULL);
|
||||
}
|
||||
break;
|
||||
@ -233,6 +234,7 @@ vm_op_t *bpeg_simplepattern(const char *str)
|
||||
str = sep->end;
|
||||
set_range(op, min, max, pat, sep);
|
||||
} else {
|
||||
str = pat->end;
|
||||
set_range(op, min, max, pat, NULL);
|
||||
}
|
||||
break;
|
||||
@ -345,8 +347,8 @@ vm_op_t *bpeg_simplepattern(const char *str)
|
||||
}
|
||||
// Empty choice (/) or {/}
|
||||
case '/': {
|
||||
str = after_spaces(str);
|
||||
if (*str == ')' || *str == '}') {
|
||||
const char *next = after_spaces(str);
|
||||
if (*next == ')' || *next == '}') {
|
||||
op->op = VM_EMPTY;
|
||||
} else {
|
||||
free(op);
|
||||
@ -354,6 +356,10 @@ vm_op_t *bpeg_simplepattern(const char *str)
|
||||
}
|
||||
break;
|
||||
}
|
||||
case '|': {
|
||||
op->op = VM_NODENT;
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
// Reference
|
||||
if (isalpha(c)) {
|
||||
@ -373,9 +379,8 @@ vm_op_t *bpeg_simplepattern(const char *str)
|
||||
|
||||
// Postfix operators:
|
||||
postfix:
|
||||
str = after_spaces(str);
|
||||
if (strncmp(str, "==", 2) == 0) {
|
||||
str += 2;
|
||||
if (strncmp(after_spaces(str), "==", 2) == 0) {
|
||||
str = after_spaces(str)+2;
|
||||
vm_op_t *first = op;
|
||||
vm_op_t *second = bpeg_simplepattern(str);
|
||||
check(second, "Expected pattern after '=='");
|
||||
|
@ -2,12 +2,15 @@
|
||||
pattern = !(/); # Not defined by default
|
||||
replacement = !(/); # Not defined by default
|
||||
replace-all = +(...@replacement) ...;
|
||||
find-all = {... >matching-line =>} +(matching-line/non-matching-line) ?{!<\n => "\n"};
|
||||
find-all = +find-next%\n ?{!<\n => "\n"};
|
||||
find-next = matching-line / {..\n =>} find-next;
|
||||
only-matches = +{...@pattern=>'@1\n'};
|
||||
matching-line = +(..@pattern)..$ ?\n;
|
||||
non-matching-line = {..$=>};
|
||||
matching-line = +(..@pattern) ..$;
|
||||
|
||||
# Helper definitions (commonly used)
|
||||
indent = \n |+(\t/' ');
|
||||
dedent = >(\n !|);
|
||||
indented-block = |` ..$ *(\n|..$);
|
||||
crlf = \r\n;
|
||||
cr = \r; r = \r;
|
||||
anglebraces = `< *(anglebraces / !`>.) `>;
|
||||
|
1
types.h
1
types.h
@ -26,6 +26,7 @@ enum VMOpcode {
|
||||
VM_REPLACE,
|
||||
VM_REF,
|
||||
VM_BACKREF,
|
||||
VM_NODENT,
|
||||
};
|
||||
|
||||
/*
|
||||
|
6
utils.c
6
utils.c
@ -46,9 +46,9 @@ const char *after_name(const char *str)
|
||||
*/
|
||||
int matchchar(const char **str, char c)
|
||||
{
|
||||
*str = after_spaces(*str);
|
||||
if (**str == c) {
|
||||
++(*str);
|
||||
const char *next = after_spaces(*str);
|
||||
if (*next == c) {
|
||||
*str = &next[1];
|
||||
return 1;
|
||||
} else {
|
||||
return 0;
|
||||
|
30
vm.c
30
vm.c
@ -30,6 +30,7 @@ static const char *opcode_names[] = {
|
||||
[VM_EQUAL] = "EQUAL",
|
||||
[VM_REF] = "REF",
|
||||
[VM_BACKREF] = "BACKREF",
|
||||
[VM_NODENT] = "NODENT",
|
||||
};
|
||||
|
||||
const char *opcode_name(enum VMOpcode o)
|
||||
@ -337,6 +338,35 @@ static match_t *_match(grammar_t *g, const char *str, vm_op_t *op, recursive_ref
|
||||
case VM_BACKREF: {
|
||||
return match_backref(str, op, (match_t*)op->args.backref);
|
||||
}
|
||||
case VM_NODENT: {
|
||||
if (str[-1] == '\0') { // First line
|
||||
match_t *m = calloc(sizeof(match_t), 1);
|
||||
m->start = str;
|
||||
m->end = str;
|
||||
m->op = op;
|
||||
return m;
|
||||
} else if (str[-1] != '\n') {
|
||||
return NULL; // Not at beginning of line
|
||||
}
|
||||
const char *p = &str[-1];
|
||||
while (*p == '\n') --p; // Skip blank lines
|
||||
while (p[-1] && p[-1] != '\n') --p; // Backtrack to start of last (nonblank) line
|
||||
// Count indentation:
|
||||
char denter = *p;
|
||||
int dents = 0;
|
||||
if (denter == ' ' || denter == '\t') {
|
||||
for (; *p == denter; ++p) ++dents;
|
||||
}
|
||||
for (int i = 0; i < dents; i++) {
|
||||
if (str[i] != denter) return NULL;
|
||||
}
|
||||
|
||||
match_t *m = calloc(sizeof(match_t), 1);
|
||||
m->start = str;
|
||||
m->end = &str[dents];
|
||||
m->op = op;
|
||||
return m;
|
||||
}
|
||||
default: {
|
||||
fprintf(stderr, "Unknown opcode: %d", op->op);
|
||||
_exit(1);
|
||||
|
Loading…
Reference in New Issue
Block a user