aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBruce Hill <bruce@bruce-hill.com>2020-09-10 22:42:47 -0700
committerBruce Hill <bruce@bruce-hill.com>2020-09-10 22:42:47 -0700
commitb93d8979bd9ea4148ea7e1d08d00ca846e151cf1 (patch)
tree5762fbe37037115cdcee606b110000ce16232fcc
parent95ec009a7c6cfb2ae43e63e25744ff5f76b88bf5 (diff)
Misc. tweaks and visualization changes, capitalized some rules
-rw-r--r--bpeg.bpeg78
-rw-r--r--bpeg.c39
-rw-r--r--bpeg.h21
3 files changed, 87 insertions, 51 deletions
diff --git a/bpeg.bpeg b/bpeg.bpeg
index 4f55383..f5d490d 100644
--- a/bpeg.bpeg
+++ b/bpeg.bpeg
@@ -1,60 +1,66 @@
# This is a file defining the BPEG grammar using BPEG syntax
-grammar;
-grammar = __ @[main-pattern]extended-pat __ *((__`;__) def) ?(`;__);
-def = @[name]ref __ `= __ @[definition]extended-pat;
+Grammar;
+Grammar = __ @[main-pattern]extended-pat __ *((__`;__) Def) ?(`;__);
+Def = @[name]Ref __ `= __ @[definition]extended-pat;
# This is used for command line arguments:
-string-grammar = *(`\ pat ?`; / .);
+String-grammar = *(`\ pat ?`; / .);
-pat = empty / dot / string / char-range / char / escape-range / escape / no / anything-but
- / upto-and / repeat / after / before / capture / replace / ref / parens;
+pat = Empty / Dot / String / Char-range / Char / Escape-range / Escape / No / Anything-but
+ / Upto-and / Repeat / After / Before / Capture / Replace / Ref / parens;
-empty = `/ >(__ (`)/`}));
-dot = `.;
-string = (
- `" @[s]*(escape / ~`") `"
- / `' @[s]*(escape / ~`') `'
+Empty = `/ >(__ (`)/`}));
+Dot = `.;
+String = (
+ `" @[s]*(Escape / ~`") `"
+ / `' @[s]*(Escape / ~`') `'
);
-char-range = `` @[low]. `- @[high].;
-char = `` @[s].;
-escape-range = `\ @[low]escape-sequence `- @[high]escape-sequence;
-escape = `\ @[s]escape-sequence;
+Char-range = `` @[low]. `- @[high].;
+Char = `` @[s].;
+Escape-range = `\ @[low]escape-sequence `- @[high]escape-sequence;
+Escape = `\ @[s]escape-sequence;
escape-sequence = (
1-3 `0-7
/ `x 2 (`0-9/`a-f/`A-F)
/`a/`b/`e/`n/`r/`t/`v / . / \n
);
-no = `! _ @pat;
-anything-but = `~ ?`~ _ @pat;
-upto-and = `& ?`& _ @pat;
-repeat = (
- @[min]int _ `- _ @[max]int
- / @[min]{=>"0"} @[max]int _ `-
- / @[min]int _ `+ @[max](/)
- / @[min]@[max]int
- / `+ @[min]{=>"1"} @[max](/)
- / `* @[min]{=>"0"} @[max](/)
- / `? @[min]{=>"0"} @[max]{=>"1"}
+No = `! _ @pat;
+Anything-but = `~ ?`~ _ @pat;
+Upto-and = `& ?`& _ @pat;
+Repeat = (
+ @[min]int _ `- _ @[max]int
+ /{@[min]{=>"0"}=>} @[max]int _ `-
+ / @[min]int _ `+ @[max](/)
+ / @[min]@[max]int
+ /{@[min]{=>"1"}=>} `+ @[max](/)
+ /{@[min]{=>"0"}=>} `* @[max](/)
+ /{@[min]{=>"0"}=>} `? {@[max]{=>"1"}=>}
) _ @[repeat-pat]pat ?( __ `% __ @[sep]pat);
-after = `< _ pat;
-before = `> _ pat;
-capture = `@ ?(_ `[ @[capture-name]ref `]) _ @[capture]pat;
-replace = `{ __ (
- ?(@[replace-pat]extended-pat __) "=>" ?(__ @[replacement]string)
+After = `< _ pat;
+Before = `> _ pat;
+Capture = `@ ?(_ `[ @[capture-name]Ref `]) _ @[capture]pat;
+Replace = `{ __ (
+ ?(@[replace-pat]extended-pat __) "=>" ?(__ @[replacement]String)
) __ `};
-ref = @[name](
+Ref = @[name](
"^^" / "^" / "__" / "_" / "$$" / "$" /
(`a-z/`A-Z) *(`a-z/`A-Z/`0-9/`-));
parens = `( __ extended-pat __ `);
-chain = +@pat % (__);
-otherwise = +@(chain/pat) % (__`/__);
-extended-pat = otherwise / chain / pat;
+Chain = +@pat % (__);
+Otherwise = +@(Chain/pat) % (__`/__);
+extended-pat = Otherwise / Chain / pat;
-_ = *(` / \t);
+# Special-symbol rules:
+_ = *(` / \t);
__ = *(` / \t / \r / \n / comment);
+$$ = !$.;
+$ = !.;
+^^ = !<$.;
+^ = !<.;
+
hash-comment = `# *.;
# Note: comments are undefined by default in regular BPEG
diff --git a/bpeg.c b/bpeg.c
index 8a8c1f9..6607527 100644
--- a/bpeg.c
+++ b/bpeg.c
@@ -52,10 +52,11 @@ static match_t *free_match(match_t *m)
*/
static match_t *match(const char *str, vm_op_t *op)
{
- tailcall:
+ //tailcall:
switch (op->op) {
case VM_EMPTY: {
match_t *m = calloc(sizeof(match_t), 1);
+ m->op = op;
m->start = str;
m->end = str;
return m;
@@ -64,6 +65,7 @@ static match_t *match(const char *str, vm_op_t *op)
if (!*str || (!op->multiline && *str == '\n'))
return NULL;
match_t *m = calloc(sizeof(match_t), 1);
+ m->op = op;
m->start = str;
m->end = str+1;
return m;
@@ -72,6 +74,7 @@ static match_t *match(const char *str, vm_op_t *op)
if (strncmp(str, op->args.s, op->len) != 0)
return NULL;
match_t *m = calloc(sizeof(match_t), 1);
+ m->op = op;
m->start = str;
m->end = str + op->len;
return m;
@@ -80,6 +83,7 @@ static match_t *match(const char *str, vm_op_t *op)
if (*str < op->args.range.low || *str > op->args.range.high)
return NULL;
match_t *m = calloc(sizeof(match_t), 1);
+ m->op = op;
m->start = str;
m->end = str + 1;
return m;
@@ -94,6 +98,7 @@ static match_t *match(const char *str, vm_op_t *op)
return NULL;
}
m = calloc(sizeof(match_t), 1);
+ m->op = op;
m->start = str;
if (op->op == VM_ANYTHING_BUT) ++str;
m->end = str;
@@ -102,6 +107,7 @@ static match_t *match(const char *str, vm_op_t *op)
case VM_UPTO_AND: {
match_t *m = calloc(sizeof(match_t), 1);
m->start = str;
+ m->op = op;
match_t *p = NULL;
for (const char *prev = NULL; p == NULL && prev < str; ) {
prev = str;
@@ -121,6 +127,7 @@ static match_t *match(const char *str, vm_op_t *op)
match_t *m = calloc(sizeof(match_t), 1);
m->start = str;
m->end = str;
+ m->op = op;
if (op->args.repetitions.max == 0) return m;
match_t **dest = &m->child;
@@ -171,6 +178,7 @@ static match_t *match(const char *str, vm_op_t *op)
match_t *m = calloc(sizeof(match_t), 1);
m->start = str;
m->end = str;
+ m->op = op;
return m;
}
case VM_BEFORE: {
@@ -180,6 +188,7 @@ static match_t *match(const char *str, vm_op_t *op)
match_t *m = calloc(sizeof(match_t), 1);
m->start = str;
m->end = str;
+ m->op = op;
return m;
}
case VM_CAPTURE: {
@@ -188,6 +197,7 @@ static match_t *match(const char *str, vm_op_t *op)
match_t *m = calloc(sizeof(match_t), 1);
m->start = str;
m->end = p->end;
+ m->op = op;
m->child = p;
m->is_capture = 1;
if (op->args.capture.name)
@@ -210,6 +220,7 @@ static match_t *match(const char *str, vm_op_t *op)
match_t *m = calloc(sizeof(match_t), 1);
m->start = str;
m->end = m2->end;
+ m->op = op;
m->child = m1;
m1->nextsibling = m2;
return m;
@@ -217,6 +228,7 @@ static match_t *match(const char *str, vm_op_t *op)
case VM_REPLACE: {
match_t *m = calloc(sizeof(match_t), 1);
m->start = str;
+ m->op = op;
if (op->args.replace.replace_pat) {
match_t *p = match(str, op->args.replace.replace_pat);
if (p == NULL) return NULL;
@@ -234,8 +246,20 @@ static match_t *match(const char *str, vm_op_t *op)
for (int i = ndefs-1; i >= 0; i--) {
if (streq(defs[i].name, op->args.s)) {
// Bingo!
+ /*
op = defs[i].op;
goto tailcall;
+ */
+ match_t *p = match(str, defs[i].op);
+ if (p == NULL) return NULL;
+ match_t *m = calloc(sizeof(match_t), 1);
+ m->start = p->start;
+ m->end = p->end;
+ m->op = op;
+ m->child = p;
+ m->name_or_replacement = defs[i].name;
+ m->is_ref = 1;
+ return m;
}
}
check(0, "Unknown identifier: '%s'", op->args.s);
@@ -905,8 +929,11 @@ static void print_match(match_t *m, const char *color)
}
}
} else {
- if (m->is_capture && m->name_or_replacement)
- printf("\033[0;2;33m[%s:", m->name_or_replacement);
+ const char *name = m->name_or_replacement;
+ if (verbose && m->is_ref && name && isupper(name[0]))
+ printf("\033[0;2;35m{%s:", name);
+ //if (m->is_capture && name)
+ // printf("\033[0;2;33m[%s:", name);
const char *prev = m->start;
for (match_t *child = m->child; child; child = child->nextsibling) {
if (child->start > prev)
@@ -916,8 +943,10 @@ static void print_match(match_t *m, const char *color)
}
if (m->end > prev)
printf("%s%.*s", color, (int)(m->end - prev), prev);
- if (m->is_capture && m->name_or_replacement)
- printf("\033[0;2;33m]");
+ if (verbose && m->is_ref && name && isupper(name[0]))
+ printf("\033[0;2;35m}");
+ //if (m->is_capture && name)
+ // printf("\033[0;2;33m]");
}
}
diff --git a/bpeg.h b/bpeg.h
index 7836250..459bd65 100644
--- a/bpeg.h
+++ b/bpeg.h
@@ -19,16 +19,6 @@ const char *usage = (
" -s --slow\t run in slow mode for debugging\n"
" -r --replace <replacement> replace the input pattern with the given replacement\n"
" -g --grammar <grammar file> use the specified file as a grammar\n");
-/*
- * Pattern matching result object
- */
-typedef struct match_s {
- // Where the match starts and ends (end is after the last character)
- const char *start, *end;
- unsigned int is_capture:1, is_replacement:1;
- const char *name_or_replacement;
- struct match_s *child, *nextsibling;
-} match_t;
/*
* BPEG virtual machine opcodes
@@ -85,6 +75,17 @@ typedef struct vm_op_s {
} args;
} vm_op_t;
+/*
+ * Pattern matching result object
+ */
+typedef struct match_s {
+ // Where the match starts and ends (end is after the last character)
+ const char *start, *end;
+ unsigned int is_capture:1, is_replacement:1, is_ref:1;
+ const char *name_or_replacement;
+ struct match_s *child, *nextsibling;
+ vm_op_t *op;
+} match_t;
static inline const char *after_spaces(const char *str);
static match_t *free_match(match_t *m);