aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--bpeg.bpeg3
-rw-r--r--compiler.c15
-rw-r--r--grammars/builtins.bpeg9
-rw-r--r--types.h1
-rw-r--r--utils.c6
-rw-r--r--vm.c30
6 files changed, 52 insertions, 12 deletions
diff --git a/bpeg.bpeg b/bpeg.bpeg
index ae2d3fb..de0c676 100644
--- a/bpeg.bpeg
+++ b/bpeg.bpeg
@@ -8,7 +8,7 @@ String-pattern = *(`\ pat ?`; / .);
pat = suffixed-pat / simple-pat;
simple-pat = Empty / Upto / Dot / String / Char-range / Char / Escape-range / Escape / No
- / Repeat / After / Before / Capture / Replace / Ref / parens;
+ / Nodent / Repeat / After / Before / Capture / Replace / Ref / parens;
suffixed-pat = Eq-pat;
Eq-pat = @[first]simple-pat "==" @[second]pat;
@@ -29,6 +29,7 @@ escape-sequence = (
/`a/`b/`e/`n/`r/`t/`v / . / \n
);
No = `! _ @pat;
+Nodent = `|;
Upto = 2-3`. ?>(_@pat);
Repeat = (
@[min]int _ `- _ @[max]int
diff --git a/compiler.c b/compiler.c
index 267ea27..8279285 100644
--- a/compiler.c
+++ b/compiler.c
@@ -211,6 +211,7 @@ vm_op_t *bpeg_simplepattern(const char *str)
str = sep->end;
set_range(op, min, max, pat, sep);
} else {
+ str = pat->end;
set_range(op, min, max, pat, NULL);
}
break;
@@ -233,6 +234,7 @@ vm_op_t *bpeg_simplepattern(const char *str)
str = sep->end;
set_range(op, min, max, pat, sep);
} else {
+ str = pat->end;
set_range(op, min, max, pat, NULL);
}
break;
@@ -345,8 +347,8 @@ vm_op_t *bpeg_simplepattern(const char *str)
}
// Empty choice (/) or {/}
case '/': {
- str = after_spaces(str);
- if (*str == ')' || *str == '}') {
+ const char *next = after_spaces(str);
+ if (*next == ')' || *next == '}') {
op->op = VM_EMPTY;
} else {
free(op);
@@ -354,6 +356,10 @@ vm_op_t *bpeg_simplepattern(const char *str)
}
break;
}
+ case '|': {
+ op->op = VM_NODENT;
+ break;
+ }
default: {
// Reference
if (isalpha(c)) {
@@ -373,9 +379,8 @@ vm_op_t *bpeg_simplepattern(const char *str)
// Postfix operators:
postfix:
- str = after_spaces(str);
- if (strncmp(str, "==", 2) == 0) {
- str += 2;
+ if (strncmp(after_spaces(str), "==", 2) == 0) {
+ str = after_spaces(str)+2;
vm_op_t *first = op;
vm_op_t *second = bpeg_simplepattern(str);
check(second, "Expected pattern after '=='");
diff --git a/grammars/builtins.bpeg b/grammars/builtins.bpeg
index c871408..df03104 100644
--- a/grammars/builtins.bpeg
+++ b/grammars/builtins.bpeg
@@ -2,12 +2,15 @@
pattern = !(/); # Not defined by default
replacement = !(/); # Not defined by default
replace-all = +(...@replacement) ...;
-find-all = {... >matching-line =>} +(matching-line/non-matching-line) ?{!<\n => "\n"};
+find-all = +find-next%\n ?{!<\n => "\n"};
+find-next = matching-line / {..\n =>} find-next;
only-matches = +{...@pattern=>'@1\n'};
-matching-line = +(..@pattern)..$ ?\n;
-non-matching-line = {..$=>};
+matching-line = +(..@pattern) ..$;
# Helper definitions (commonly used)
+indent = \n |+(\t/' ');
+dedent = >(\n !|);
+indented-block = |` ..$ *(\n|..$);
crlf = \r\n;
cr = \r; r = \r;
anglebraces = `< *(anglebraces / !`>.) `>;
diff --git a/types.h b/types.h
index c8f7a17..36e1aa6 100644
--- a/types.h
+++ b/types.h
@@ -26,6 +26,7 @@ enum VMOpcode {
VM_REPLACE,
VM_REF,
VM_BACKREF,
+ VM_NODENT,
};
/*
diff --git a/utils.c b/utils.c
index 5078c01..7844934 100644
--- a/utils.c
+++ b/utils.c
@@ -46,9 +46,9 @@ const char *after_name(const char *str)
*/
int matchchar(const char **str, char c)
{
- *str = after_spaces(*str);
- if (**str == c) {
- ++(*str);
+ const char *next = after_spaces(*str);
+ if (*next == c) {
+ *str = &next[1];
return 1;
} else {
return 0;
diff --git a/vm.c b/vm.c
index 2a44dd7..9974547 100644
--- a/vm.c
+++ b/vm.c
@@ -30,6 +30,7 @@ static const char *opcode_names[] = {
[VM_EQUAL] = "EQUAL",
[VM_REF] = "REF",
[VM_BACKREF] = "BACKREF",
+ [VM_NODENT] = "NODENT",
};
const char *opcode_name(enum VMOpcode o)
@@ -337,6 +338,35 @@ static match_t *_match(grammar_t *g, const char *str, vm_op_t *op, recursive_ref
case VM_BACKREF: {
return match_backref(str, op, (match_t*)op->args.backref);
}
+ case VM_NODENT: {
+ if (str[-1] == '\0') { // First line
+ match_t *m = calloc(sizeof(match_t), 1);
+ m->start = str;
+ m->end = str;
+ m->op = op;
+ return m;
+ } else if (str[-1] != '\n') {
+ return NULL; // Not at beginning of line
+ }
+ const char *p = &str[-1];
+ while (*p == '\n') --p; // Skip blank lines
+ while (p[-1] && p[-1] != '\n') --p; // Backtrack to start of last (nonblank) line
+ // Count indentation:
+ char denter = *p;
+ int dents = 0;
+ if (denter == ' ' || denter == '\t') {
+ for (; *p == denter; ++p) ++dents;
+ }
+ for (int i = 0; i < dents; i++) {
+ if (str[i] != denter) return NULL;
+ }
+
+ match_t *m = calloc(sizeof(match_t), 1);
+ m->start = str;
+ m->end = &str[dents];
+ m->op = op;
+ return m;
+ }
default: {
fprintf(stderr, "Unknown opcode: %d", op->op);
_exit(1);