Added nodent support (|)

This commit is contained in:
Bruce Hill 2020-09-14 01:21:49 -07:00
parent 9f2d5464d6
commit a82164505e
6 changed files with 52 additions and 12 deletions

View File

@ -8,7 +8,7 @@ String-pattern = *(`\ pat ?`; / .);
pat = suffixed-pat / simple-pat;
simple-pat = Empty / Upto / Dot / String / Char-range / Char / Escape-range / Escape / No
/ Repeat / After / Before / Capture / Replace / Ref / parens;
/ Nodent / Repeat / After / Before / Capture / Replace / Ref / parens;
suffixed-pat = Eq-pat;
Eq-pat = @[first]simple-pat "==" @[second]pat;
@ -29,6 +29,7 @@ escape-sequence = (
/`a/`b/`e/`n/`r/`t/`v / . / \n
);
No = `! _ @pat;
Nodent = `|;
Upto = 2-3`. ?>(_@pat);
Repeat = (
@[min]int _ `- _ @[max]int

View File

@ -211,6 +211,7 @@ vm_op_t *bpeg_simplepattern(const char *str)
str = sep->end;
set_range(op, min, max, pat, sep);
} else {
str = pat->end;
set_range(op, min, max, pat, NULL);
}
break;
@ -233,6 +234,7 @@ vm_op_t *bpeg_simplepattern(const char *str)
str = sep->end;
set_range(op, min, max, pat, sep);
} else {
str = pat->end;
set_range(op, min, max, pat, NULL);
}
break;
@ -345,8 +347,8 @@ vm_op_t *bpeg_simplepattern(const char *str)
}
// Empty choice (/) or {/}
case '/': {
str = after_spaces(str);
if (*str == ')' || *str == '}') {
const char *next = after_spaces(str);
if (*next == ')' || *next == '}') {
op->op = VM_EMPTY;
} else {
free(op);
@ -354,6 +356,10 @@ vm_op_t *bpeg_simplepattern(const char *str)
}
break;
}
case '|': {
op->op = VM_NODENT;
break;
}
default: {
// Reference
if (isalpha(c)) {
@ -373,9 +379,8 @@ vm_op_t *bpeg_simplepattern(const char *str)
// Postfix operators:
postfix:
str = after_spaces(str);
if (strncmp(str, "==", 2) == 0) {
str += 2;
if (strncmp(after_spaces(str), "==", 2) == 0) {
str = after_spaces(str)+2;
vm_op_t *first = op;
vm_op_t *second = bpeg_simplepattern(str);
check(second, "Expected pattern after '=='");

View File

@ -2,12 +2,15 @@
pattern = !(/); # Not defined by default
replacement = !(/); # Not defined by default
replace-all = +(...@replacement) ...;
find-all = {... >matching-line =>} +(matching-line/non-matching-line) ?{!<\n => "\n"};
find-all = +find-next%\n ?{!<\n => "\n"};
find-next = matching-line / {..\n =>} find-next;
only-matches = +{...@pattern=>'@1\n'};
matching-line = +(..@pattern)..$ ?\n;
non-matching-line = {..$=>};
matching-line = +(..@pattern) ..$;
# Helper definitions (commonly used)
indent = \n |+(\t/' ');
dedent = >(\n !|);
indented-block = |` ..$ *(\n|..$);
crlf = \r\n;
cr = \r; r = \r;
anglebraces = `< *(anglebraces / !`>.) `>;

View File

@ -26,6 +26,7 @@ enum VMOpcode {
VM_REPLACE,
VM_REF,
VM_BACKREF,
VM_NODENT,
};
/*

View File

@ -46,9 +46,9 @@ const char *after_name(const char *str)
*/
int matchchar(const char **str, char c)
{
*str = after_spaces(*str);
if (**str == c) {
++(*str);
const char *next = after_spaces(*str);
if (*next == c) {
*str = &next[1];
return 1;
} else {
return 0;

30
vm.c
View File

@ -30,6 +30,7 @@ static const char *opcode_names[] = {
[VM_EQUAL] = "EQUAL",
[VM_REF] = "REF",
[VM_BACKREF] = "BACKREF",
[VM_NODENT] = "NODENT",
};
const char *opcode_name(enum VMOpcode o)
@ -337,6 +338,35 @@ static match_t *_match(grammar_t *g, const char *str, vm_op_t *op, recursive_ref
case VM_BACKREF: {
return match_backref(str, op, (match_t*)op->args.backref);
}
case VM_NODENT: {
if (str[-1] == '\0') { // First line
match_t *m = calloc(sizeof(match_t), 1);
m->start = str;
m->end = str;
m->op = op;
return m;
} else if (str[-1] != '\n') {
return NULL; // Not at beginning of line
}
const char *p = &str[-1];
while (*p == '\n') --p; // Skip blank lines
while (p[-1] && p[-1] != '\n') --p; // Backtrack to start of last (nonblank) line
// Count indentation:
char denter = *p;
int dents = 0;
if (denter == ' ' || denter == '\t') {
for (; *p == denter; ++p) ++dents;
}
for (int i = 0; i < dents; i++) {
if (str[i] != denter) return NULL;
}
match_t *m = calloc(sizeof(match_t), 1);
m->start = str;
m->end = &str[dents];
m->op = op;
return m;
}
default: {
fprintf(stderr, "Unknown opcode: %d", op->op);
_exit(1);