Some cool debug viz stuff
This commit is contained in:
parent
9453ac360a
commit
64659a1566
257
bpeg.c
257
bpeg.c
@ -258,12 +258,13 @@ static void set_range(vm_op_t *op, ssize_t min, ssize_t max, vm_op_t *pat, vm_op
|
|||||||
* followed by any patterns (e.g. "`x `y"), otherwise return
|
* followed by any patterns (e.g. "`x `y"), otherwise return
|
||||||
* the original input.
|
* the original input.
|
||||||
*/
|
*/
|
||||||
static vm_op_t *expand_chain(vm_op_t *first)
|
static vm_op_t *expand_chain(const char *source, vm_op_t *first)
|
||||||
{
|
{
|
||||||
vm_op_t *second = compile_bpeg(first->end);
|
vm_op_t *second = compile_bpeg(source, first->end);
|
||||||
if (second == NULL) return first;
|
if (second == NULL) return first;
|
||||||
check(second->end > first->end, "No forward progress in chain!");
|
check(second->end > first->end, "No forward progress in chain!");
|
||||||
second = expand_chain(second);
|
visualize(source, first->end, "Expanding chain...");
|
||||||
|
second = expand_chain(source, second);
|
||||||
vm_op_t *chain = calloc(sizeof(vm_op_t), 1);
|
vm_op_t *chain = calloc(sizeof(vm_op_t), 1);
|
||||||
chain->op = VM_CHAIN;
|
chain->op = VM_CHAIN;
|
||||||
chain->start = first->start;
|
chain->start = first->start;
|
||||||
@ -273,6 +274,7 @@ static vm_op_t *expand_chain(vm_op_t *first)
|
|||||||
chain->end = second->end;
|
chain->end = second->end;
|
||||||
chain->args.multiple.first = first;
|
chain->args.multiple.first = first;
|
||||||
chain->args.multiple.second = second;
|
chain->args.multiple.second = second;
|
||||||
|
visualize(source, chain->end, "Got chained pair.");
|
||||||
return chain;
|
return chain;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -281,15 +283,16 @@ static vm_op_t *expand_chain(vm_op_t *first)
|
|||||||
* followed by any "/"-separated patterns (e.g. "`x/`y"), otherwise
|
* followed by any "/"-separated patterns (e.g. "`x/`y"), otherwise
|
||||||
* return the original input.
|
* return the original input.
|
||||||
*/
|
*/
|
||||||
static vm_op_t *expand_choices(vm_op_t *first)
|
static vm_op_t *expand_choices(const char *source, vm_op_t *first)
|
||||||
{
|
{
|
||||||
first = expand_chain(first);
|
first = expand_chain(source, first);
|
||||||
const char *str = first->end;
|
const char *str = first->end;
|
||||||
if (!matchchar(&str, '/')) return first;
|
if (!matchchar(&str, '/')) return first;
|
||||||
debug("Otherwise:\n");
|
visualize(source, str, "Expanding choices...");
|
||||||
vm_op_t *second = compile_bpeg(str);
|
//debug("Otherwise:\n");
|
||||||
|
vm_op_t *second = compile_bpeg(source, str);
|
||||||
check(second, "Expected pattern after '/'");
|
check(second, "Expected pattern after '/'");
|
||||||
second = expand_choices(second);
|
second = expand_choices(source, second);
|
||||||
vm_op_t *choice = calloc(sizeof(vm_op_t), 1);
|
vm_op_t *choice = calloc(sizeof(vm_op_t), 1);
|
||||||
choice->op = VM_OTHERWISE;
|
choice->op = VM_OTHERWISE;
|
||||||
choice->start = first->start;
|
choice->start = first->start;
|
||||||
@ -299,6 +302,7 @@ static vm_op_t *expand_choices(vm_op_t *first)
|
|||||||
choice->end = second->end;
|
choice->end = second->end;
|
||||||
choice->args.multiple.first = first;
|
choice->args.multiple.first = first;
|
||||||
choice->args.multiple.second = second;
|
choice->args.multiple.second = second;
|
||||||
|
visualize(source, choice->end, "Got two choices");
|
||||||
return choice;
|
return choice;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -344,10 +348,11 @@ static char escapechar(const char *escaped, const char **end)
|
|||||||
/*
|
/*
|
||||||
* Compile a string of BPEG code into virtual machine opcodes
|
* Compile a string of BPEG code into virtual machine opcodes
|
||||||
*/
|
*/
|
||||||
static vm_op_t *compile_bpeg(const char *str)
|
static vm_op_t *compile_bpeg(const char *source, const char *str)
|
||||||
{
|
{
|
||||||
if (!*str) return NULL;
|
if (!*str) return NULL;
|
||||||
debug("Parsing \"%s\"...\n", str);
|
visualize(source, str, "Compiling...");
|
||||||
|
//debug("Parsing \"%s\"...\n", str);
|
||||||
str = after_spaces(str);
|
str = after_spaces(str);
|
||||||
check(*str, "Expected a pattern");
|
check(*str, "Expected a pattern");
|
||||||
vm_op_t *op = calloc(sizeof(vm_op_t), 1);
|
vm_op_t *op = calloc(sizeof(vm_op_t), 1);
|
||||||
@ -358,7 +363,8 @@ static vm_op_t *compile_bpeg(const char *str)
|
|||||||
switch (c) {
|
switch (c) {
|
||||||
// Any char (dot)
|
// Any char (dot)
|
||||||
case '.': {
|
case '.': {
|
||||||
debug("Dot\n");
|
visualize(source, str, "Dot");
|
||||||
|
//debug("Dot\n");
|
||||||
op->op = VM_ANYCHAR;
|
op->op = VM_ANYCHAR;
|
||||||
op->len = 1;
|
op->len = 1;
|
||||||
break;
|
break;
|
||||||
@ -367,10 +373,12 @@ static vm_op_t *compile_bpeg(const char *str)
|
|||||||
case '`': {
|
case '`': {
|
||||||
char literal[2] = {*str, '\0'};
|
char literal[2] = {*str, '\0'};
|
||||||
++str;
|
++str;
|
||||||
|
visualize(source, str, "Char literal");
|
||||||
check(literal[0], "Expected character after '`'\n");
|
check(literal[0], "Expected character after '`'\n");
|
||||||
op->len = 1;
|
op->len = 1;
|
||||||
if (matchchar(&str, ',')) { // Range
|
if (matchchar(&str, ',')) { // Range
|
||||||
debug("Char range\n");
|
visualize(source, str, "Char range");
|
||||||
|
//debug("Char range\n");
|
||||||
char c2 = *str;
|
char c2 = *str;
|
||||||
check(c2, "Expected character after ','");
|
check(c2, "Expected character after ','");
|
||||||
op->op = VM_RANGE;
|
op->op = VM_RANGE;
|
||||||
@ -378,7 +386,7 @@ static vm_op_t *compile_bpeg(const char *str)
|
|||||||
op->args.range.high = c2;
|
op->args.range.high = c2;
|
||||||
++str;
|
++str;
|
||||||
} else {
|
} else {
|
||||||
debug("Char literal\n");
|
//debug("Char literal\n");
|
||||||
op->op = VM_STRING;
|
op->op = VM_STRING;
|
||||||
op->args.s = strdup(literal);
|
op->args.s = strdup(literal);
|
||||||
}
|
}
|
||||||
@ -386,7 +394,8 @@ static vm_op_t *compile_bpeg(const char *str)
|
|||||||
}
|
}
|
||||||
// Escapes
|
// Escapes
|
||||||
case '\\': {
|
case '\\': {
|
||||||
debug("Escape sequence\n");
|
//debug("Escape sequence\n");
|
||||||
|
visualize(source, str, "Escape sequence");
|
||||||
check(*str, "Expected escape after '\\'");
|
check(*str, "Expected escape after '\\'");
|
||||||
op->op = VM_STRING;
|
op->op = VM_STRING;
|
||||||
op->len = 1;
|
op->len = 1;
|
||||||
@ -396,6 +405,7 @@ static vm_op_t *compile_bpeg(const char *str)
|
|||||||
}
|
}
|
||||||
// String literal
|
// String literal
|
||||||
case '"': case '\'': {
|
case '"': case '\'': {
|
||||||
|
visualize(source, str, "String literal");
|
||||||
char quote = c;
|
char quote = c;
|
||||||
const char *literal = str;
|
const char *literal = str;
|
||||||
for (; *str && *str != quote; str++) {
|
for (; *str && *str != quote; str++) {
|
||||||
@ -403,19 +413,21 @@ static vm_op_t *compile_bpeg(const char *str)
|
|||||||
check(str[1], "Expected more string contents after backslash");
|
check(str[1], "Expected more string contents after backslash");
|
||||||
++str;
|
++str;
|
||||||
}
|
}
|
||||||
|
visualize(source, str, "String literal");
|
||||||
}
|
}
|
||||||
op->op = VM_STRING;
|
op->op = VM_STRING;
|
||||||
op->len = (ssize_t)(str - literal);
|
op->len = (ssize_t)(str - literal);
|
||||||
op->args.s = strndup(literal, (size_t)op->len);
|
op->args.s = strndup(literal, (size_t)op->len);
|
||||||
// TODO: handle escape chars like \n
|
// TODO: handle escape chars like \n
|
||||||
debug("String literal: %c%s%c\n", quote, op->args.s, quote);
|
//debug("String literal: %c%s%c\n", quote, op->args.s, quote);
|
||||||
check(matchchar(&str, quote), "Missing closing quote");
|
check(matchchar(&str, quote), "Missing closing quote");
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
// Not <pat>
|
// Not <pat>
|
||||||
case '!': {
|
case '!': {
|
||||||
debug("Not pattern\n");
|
// debug("Not pattern\n");
|
||||||
vm_op_t *p = compile_bpeg(str);
|
visualize(source, str, "Not <pat>");
|
||||||
|
vm_op_t *p = compile_bpeg(source, str);
|
||||||
check(p, "Expected pattern after '!'\n");
|
check(p, "Expected pattern after '!'\n");
|
||||||
str = p->end;
|
str = p->end;
|
||||||
op->op = VM_NOT;
|
op->op = VM_NOT;
|
||||||
@ -425,8 +437,9 @@ static vm_op_t *compile_bpeg(const char *str)
|
|||||||
}
|
}
|
||||||
// Upto <pat>
|
// Upto <pat>
|
||||||
case '^': {
|
case '^': {
|
||||||
debug("Upto pattern\n");
|
visualize(source, str, "Upto <pat>");
|
||||||
vm_op_t *p = compile_bpeg(str);
|
//debug("Upto pattern\n");
|
||||||
|
vm_op_t *p = compile_bpeg(source, str);
|
||||||
check(p, "Expected pattern after '^'\n");
|
check(p, "Expected pattern after '^'\n");
|
||||||
str = p->end;
|
str = p->end;
|
||||||
op->op = VM_UPTO;
|
op->op = VM_UPTO;
|
||||||
@ -436,8 +449,9 @@ static vm_op_t *compile_bpeg(const char *str)
|
|||||||
}
|
}
|
||||||
// Upto and including <pat>
|
// Upto and including <pat>
|
||||||
case '&': {
|
case '&': {
|
||||||
debug("Upto-and pattern\n");
|
//debug("Upto-and pattern\n");
|
||||||
vm_op_t *p = compile_bpeg(str);
|
visualize(source, str, "Upto and including <pat>");
|
||||||
|
vm_op_t *p = compile_bpeg(source, str);
|
||||||
check(p, "Expected pattern after '&'\n");
|
check(p, "Expected pattern after '&'\n");
|
||||||
str = p->end;
|
str = p->end;
|
||||||
op->op = VM_UPTO_AND;
|
op->op = VM_UPTO_AND;
|
||||||
@ -448,7 +462,7 @@ static vm_op_t *compile_bpeg(const char *str)
|
|||||||
// Number of repetitions: <N>(-<N> / - / + / "")
|
// Number of repetitions: <N>(-<N> / - / + / "")
|
||||||
case '0': case '1': case '2': case '3': case '4': case '5':
|
case '0': case '1': case '2': case '3': case '4': case '5':
|
||||||
case '6': case '7': case '8': case '9': {
|
case '6': case '7': case '8': case '9': {
|
||||||
debug("Repetitions\n");
|
visualize(source, str, "Repeat <pat>");
|
||||||
ssize_t min = -1, max = -1;
|
ssize_t min = -1, max = -1;
|
||||||
--str;
|
--str;
|
||||||
long n1 = strtol(str, (char**)&str, 10);
|
long n1 = strtol(str, (char**)&str, 10);
|
||||||
@ -463,49 +477,56 @@ static vm_op_t *compile_bpeg(const char *str)
|
|||||||
} else {
|
} else {
|
||||||
min = n1, max = n1;
|
min = n1, max = n1;
|
||||||
}
|
}
|
||||||
vm_op_t *pat = compile_bpeg(str);
|
visualize(source, str, NULL);
|
||||||
|
vm_op_t *pat = compile_bpeg(source, str);
|
||||||
check(pat, "Expected pattern after repetition count");
|
check(pat, "Expected pattern after repetition count");
|
||||||
str = pat->end;
|
str = pat->end;
|
||||||
str = after_spaces(str);
|
str = after_spaces(str);
|
||||||
if (matchchar(&str, '%')) {
|
if (matchchar(&str, '%')) {
|
||||||
vm_op_t *sep = compile_bpeg(str);
|
visualize(source, str, "Repeat <pat> with separator");
|
||||||
|
vm_op_t *sep = compile_bpeg(source, str);
|
||||||
check(sep, "Expected pattern for separator after '%%'");
|
check(sep, "Expected pattern for separator after '%%'");
|
||||||
str = sep->end;
|
str = sep->end;
|
||||||
set_range(op, min, max, pat, sep);
|
set_range(op, min, max, pat, sep);
|
||||||
} else {
|
} else {
|
||||||
set_range(op, min, max, pat, NULL);
|
set_range(op, min, max, pat, NULL);
|
||||||
}
|
}
|
||||||
debug("min = %lld max = %lld\n", (long long)op->args.repetitions.min, (long long)op->args.repetitions.max);
|
visualize(source, str, NULL);
|
||||||
|
//debug("min = %lld max = %lld\n", (long long)op->args.repetitions.min, (long long)op->args.repetitions.max);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
// Special repetitions:
|
// Special repetitions:
|
||||||
case '+': case '*': case '?': {
|
case '+': case '*': case '?': {
|
||||||
debug("Special repetitions\n");
|
//debug("Special repetitions\n");
|
||||||
|
visualize(source, str, "Repeat <pat>");
|
||||||
ssize_t min = -1, max = -1;
|
ssize_t min = -1, max = -1;
|
||||||
switch (c) {
|
switch (c) {
|
||||||
case '+': min = 1, max = -1; break;
|
case '+': min = 1, max = -1; break;
|
||||||
case '*': min = 0, max = -1; break;
|
case '*': min = 0, max = -1; break;
|
||||||
case '?': min = 0, max = 1; break;
|
case '?': min = 0, max = 1; break;
|
||||||
}
|
}
|
||||||
vm_op_t *pat = compile_bpeg(str);
|
vm_op_t *pat = compile_bpeg(source, str);
|
||||||
check(pat, "Expected pattern after +");
|
check(pat, "Expected pattern after +");
|
||||||
str = pat->end;
|
str = pat->end;
|
||||||
str = after_spaces(str);
|
str = after_spaces(str);
|
||||||
if (matchchar(&str, '%')) {
|
if (matchchar(&str, '%')) {
|
||||||
vm_op_t *sep = compile_bpeg(str);
|
visualize(source, str, "Repeat <pat> with separator");
|
||||||
|
vm_op_t *sep = compile_bpeg(source, str);
|
||||||
check(sep, "Expected pattern for separator after '%%'");
|
check(sep, "Expected pattern for separator after '%%'");
|
||||||
str = sep->end;
|
str = sep->end;
|
||||||
set_range(op, min, max, pat, sep);
|
set_range(op, min, max, pat, sep);
|
||||||
} else {
|
} else {
|
||||||
set_range(op, min, max, pat, NULL);
|
set_range(op, min, max, pat, NULL);
|
||||||
}
|
}
|
||||||
debug("min = %lld max = %lld\n", (long long)op->args.repetitions.min, (long long)op->args.repetitions.max);
|
visualize(source, str, NULL);
|
||||||
|
//debug("min = %lld max = %lld\n", (long long)op->args.repetitions.min, (long long)op->args.repetitions.max);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
// Lookbehind
|
// Lookbehind
|
||||||
case '<': {
|
case '<': {
|
||||||
debug("Lookbehind\n");
|
visualize(source, str, "After <pat>");
|
||||||
vm_op_t *pat = compile_bpeg(str);
|
//debug("Lookbehind\n");
|
||||||
|
vm_op_t *pat = compile_bpeg(source, str);
|
||||||
check(pat, "Expected pattern after <");
|
check(pat, "Expected pattern after <");
|
||||||
str = pat->end;
|
str = pat->end;
|
||||||
check(pat->len != -1, "Lookbehind patterns must have a fixed length");
|
check(pat->len != -1, "Lookbehind patterns must have a fixed length");
|
||||||
@ -517,8 +538,9 @@ static vm_op_t *compile_bpeg(const char *str)
|
|||||||
}
|
}
|
||||||
// Lookahead
|
// Lookahead
|
||||||
case '>': {
|
case '>': {
|
||||||
debug("Lookahead\n");
|
visualize(source, str, "Before <pat>");
|
||||||
vm_op_t *pat = compile_bpeg(str);
|
//debug("Lookahead\n");
|
||||||
|
vm_op_t *pat = compile_bpeg(source, str);
|
||||||
check(pat, "Expected pattern after >");
|
check(pat, "Expected pattern after >");
|
||||||
str = pat->end;
|
str = pat->end;
|
||||||
op->op = VM_BEFORE;
|
op->op = VM_BEFORE;
|
||||||
@ -528,50 +550,57 @@ static vm_op_t *compile_bpeg(const char *str)
|
|||||||
}
|
}
|
||||||
// Parentheses
|
// Parentheses
|
||||||
case '(': {
|
case '(': {
|
||||||
debug("Open paren (\n");
|
visualize(source, str, NULL);
|
||||||
|
// debug("Open paren (\n");
|
||||||
free(op);
|
free(op);
|
||||||
op = compile_bpeg(str);
|
op = compile_bpeg(source, str);
|
||||||
check(op, "Expected pattern inside parentheses");
|
check(op, "Expected pattern inside parentheses");
|
||||||
op = expand_choices(op);
|
op = expand_choices(source, op);
|
||||||
str = op->end;
|
str = op->end;
|
||||||
str = after_spaces(str);
|
str = after_spaces(str);
|
||||||
check(matchchar(&str, ')'), "Expected closing parenthesis");
|
check(matchchar(&str, ')'), "Expected closing parenthesis");
|
||||||
debug(")\n");
|
visualize(source, str, NULL);
|
||||||
|
// debug(")\n");
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
// Capture
|
// Capture
|
||||||
case '@': {
|
case '@': {
|
||||||
debug("Capture\n");
|
//debug("Capture\n");
|
||||||
|
visualize(source, str, "Capture");
|
||||||
op->op = VM_CAPTURE;
|
op->op = VM_CAPTURE;
|
||||||
str = after_spaces(str);
|
str = after_spaces(str);
|
||||||
if (matchchar(&str, '[')) {
|
if (matchchar(&str, '[')) {
|
||||||
char *closing = strchr(str, ']');
|
char *closing = strchr(str, ']');
|
||||||
check(closing, "Expected closing ']'");
|
check(closing, "Expected closing ']'");
|
||||||
op->args.capture.name = strndup(str, (size_t)(closing-str));
|
op->args.capture.name = strndup(str, (size_t)(closing-str));
|
||||||
debug("named \"%s\"\n", op->args.capture.name);
|
visualize(source, str, "Named capture");
|
||||||
|
//debug("named \"%s\"\n", op->args.capture.name);
|
||||||
str = closing;
|
str = closing;
|
||||||
check(matchchar(&str, ']'), "Expected closing ']'");
|
check(matchchar(&str, ']'), "Expected closing ']'");
|
||||||
}
|
}
|
||||||
vm_op_t *pat = compile_bpeg(str);
|
vm_op_t *pat = compile_bpeg(source, str);
|
||||||
check(pat, "Expected pattern after @");
|
check(pat, "Expected pattern after @");
|
||||||
str = pat->end;
|
str = pat->end;
|
||||||
op->args.capture.capture_pat = pat;
|
op->args.capture.capture_pat = pat;
|
||||||
op->len = pat->len;
|
op->len = pat->len;
|
||||||
|
visualize(source, str, NULL);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
// Replacement
|
// Replacement
|
||||||
case '{': {
|
case '{': {
|
||||||
debug("Replacement {\n");
|
//debug("Replacement {\n");
|
||||||
|
visualize(source, str, "Replacement");
|
||||||
str = after_spaces(str);
|
str = after_spaces(str);
|
||||||
vm_op_t *pat = NULL;
|
vm_op_t *pat = NULL;
|
||||||
if (!matchchar(&str, '~')) {
|
if (!matchchar(&str, '~')) {
|
||||||
pat = compile_bpeg(str);
|
pat = compile_bpeg(source, str);
|
||||||
check(pat, "Expected pattern after '{'");
|
check(pat, "Expected pattern after '{'");
|
||||||
pat = expand_choices(pat);
|
pat = expand_choices(source, pat);
|
||||||
str = pat->end;
|
str = pat->end;
|
||||||
str = after_spaces(str);
|
str = after_spaces(str);
|
||||||
check(matchchar(&str, '~'), "Expected '~' after pattern in replacement");
|
check(matchchar(&str, '~'), "Expected '~' after pattern in replacement");
|
||||||
}
|
}
|
||||||
|
visualize(source, str, NULL);
|
||||||
str = after_spaces(str);
|
str = after_spaces(str);
|
||||||
|
|
||||||
char quote = *str;
|
char quote = *str;
|
||||||
@ -587,22 +616,26 @@ static vm_op_t *compile_bpeg(const char *str)
|
|||||||
check(str[1], "Expected more string contents after backslash");
|
check(str[1], "Expected more string contents after backslash");
|
||||||
++str;
|
++str;
|
||||||
}
|
}
|
||||||
|
visualize(source, str, NULL);
|
||||||
}
|
}
|
||||||
replacement = strndup(replacement, (size_t)(str-replacement));
|
replacement = strndup(replacement, (size_t)(str-replacement));
|
||||||
check(matchchar(&str, quote), "Expected closing quote");
|
check(matchchar(&str, quote), "Expected closing quote");
|
||||||
}
|
}
|
||||||
|
visualize(source, str, NULL);
|
||||||
check(matchchar(&str, '}'), "Expected a closing '}'");
|
check(matchchar(&str, '}'), "Expected a closing '}'");
|
||||||
op->op = VM_REPLACE;
|
op->op = VM_REPLACE;
|
||||||
op->args.replace.replace_pat = pat;
|
op->args.replace.replace_pat = pat;
|
||||||
op->args.replace.replacement = replacement;
|
op->args.replace.replacement = replacement;
|
||||||
debug(" rep = \"%s\"\n", replacement);
|
//debug(" rep = \"%s\"\n", replacement);
|
||||||
debug("}\n");
|
//debug("}\n");
|
||||||
if (pat != NULL) op->len = pat->len;
|
if (pat != NULL) op->len = pat->len;
|
||||||
|
visualize(source, str, NULL);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
// Whitespace
|
// Whitespace
|
||||||
case '_': {
|
case '_': {
|
||||||
debug("Whitespace\n");
|
//debug("Whitespace\n");
|
||||||
|
visualize(source, str, NULL);
|
||||||
op->op = VM_REF;
|
op->op = VM_REF;
|
||||||
op->args.s = strdup("_");
|
op->args.s = strdup("_");
|
||||||
break;
|
break;
|
||||||
@ -610,16 +643,20 @@ static vm_op_t *compile_bpeg(const char *str)
|
|||||||
default: {
|
default: {
|
||||||
// Reference
|
// Reference
|
||||||
if (isalpha(c)) {
|
if (isalpha(c)) {
|
||||||
|
visualize(source, str, "Ref");
|
||||||
--str;
|
--str;
|
||||||
const char *refname = str;
|
const char *refname = str;
|
||||||
size_t len = 1;
|
size_t len = 1;
|
||||||
for (++str; isalnum(*str); ++str)
|
for (++str; isalnum(*str); ++str) {
|
||||||
++len;
|
++len;
|
||||||
|
visualize(source, str, NULL);
|
||||||
|
}
|
||||||
op->op = VM_REF;
|
op->op = VM_REF;
|
||||||
debug("Ref: %s\n", refname);
|
//debug("Ref: %s\n", refname);
|
||||||
op->args.s = strndup(refname, len);
|
op->args.s = strndup(refname, len);
|
||||||
break;
|
break;
|
||||||
} else {
|
} else {
|
||||||
|
visualize(source, str, "No match");
|
||||||
free(op);
|
free(op);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
@ -629,11 +666,12 @@ static vm_op_t *compile_bpeg(const char *str)
|
|||||||
return op;
|
return op;
|
||||||
}
|
}
|
||||||
|
|
||||||
static vm_op_t *load_def(const char *name, const char *def)
|
static vm_op_t *load_def(const char *name, const char *source)
|
||||||
{
|
{
|
||||||
defs[ndefs].name = name;
|
defs[ndefs].name = name;
|
||||||
vm_op_t *op = compile_bpeg(def);
|
defs[ndefs].source = source;
|
||||||
op = expand_choices(op);
|
vm_op_t *op = compile_bpeg(source, source);
|
||||||
|
op = expand_choices(source, op);
|
||||||
defs[ndefs].op = op;
|
defs[ndefs].op = op;
|
||||||
++ndefs;
|
++ndefs;
|
||||||
return op;
|
return op;
|
||||||
@ -726,7 +764,7 @@ static void print_match(match_t *m, const char *color)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if (m->is_capture) printf("\033[0;33m{");
|
if (m->is_capture) printf("\033[0;2;33m{");
|
||||||
const char *prev = m->start;
|
const char *prev = m->start;
|
||||||
for (match_t *child = m->child; child; child = child->nextsibling) {
|
for (match_t *child = m->child; child; child = child->nextsibling) {
|
||||||
if (child->start > prev)
|
if (child->start > prev)
|
||||||
@ -736,7 +774,7 @@ static void print_match(match_t *m, const char *color)
|
|||||||
}
|
}
|
||||||
if (m->end > prev)
|
if (m->end > prev)
|
||||||
printf("%s%.*s", color, (int)(m->end - prev), prev);
|
printf("%s%.*s", color, (int)(m->end - prev), prev);
|
||||||
if (m->is_capture) printf("\033[0;33m}");
|
if (m->is_capture) printf("\033[0;2;33m}");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -756,18 +794,123 @@ static char *readfile(int fd)
|
|||||||
return buf;
|
return buf;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void print_grammar(vm_op_t *op)
|
||||||
|
{
|
||||||
|
switch (op->op) {
|
||||||
|
case VM_REF: fprintf(stderr, "a $%s", op->args.s); break;
|
||||||
|
case VM_EMPTY: fprintf(stderr, "empty"); break;
|
||||||
|
case VM_ANYCHAR: fprintf(stderr, "any char"); break;
|
||||||
|
case VM_STRING: fprintf(stderr, "string \"%s\"", op->args.s); break;
|
||||||
|
case VM_RANGE: {
|
||||||
|
fprintf(stderr, "char from %c-%c", op->args.range.low, op->args.range.high);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case VM_REPEAT: {
|
||||||
|
if (op->args.repetitions.max == -1)
|
||||||
|
fprintf(stderr, "%ld or more ", op->args.repetitions.min);
|
||||||
|
else
|
||||||
|
fprintf(stderr, "%ld-%ld of (",
|
||||||
|
op->args.repetitions.min,
|
||||||
|
op->args.repetitions.max);
|
||||||
|
print_grammar(op->args.repetitions.repeat_pat);
|
||||||
|
fprintf(stderr, ")");
|
||||||
|
if (op->args.repetitions.sep) {
|
||||||
|
fprintf(stderr, " separated by (");
|
||||||
|
print_grammar(op->args.repetitions.sep);
|
||||||
|
fprintf(stderr, ")");
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case VM_NOT: {
|
||||||
|
fprintf(stderr, "not (");
|
||||||
|
print_grammar(op->args.pat);
|
||||||
|
fprintf(stderr, ")");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case VM_UPTO: {
|
||||||
|
fprintf(stderr, "text up to (");
|
||||||
|
print_grammar(op->args.pat);
|
||||||
|
fprintf(stderr, ")");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case VM_UPTO_AND: {
|
||||||
|
fprintf(stderr, "text up to and including (");
|
||||||
|
print_grammar(op->args.pat);
|
||||||
|
fprintf(stderr, ")");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case VM_AFTER: {
|
||||||
|
fprintf(stderr, "after (");
|
||||||
|
print_grammar(op->args.pat);
|
||||||
|
fprintf(stderr, ")");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case VM_BEFORE: {
|
||||||
|
fprintf(stderr, "before (");
|
||||||
|
print_grammar(op->args.pat);
|
||||||
|
fprintf(stderr, ")");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case VM_CAPTURE: {
|
||||||
|
fprintf(stderr, "capture (");
|
||||||
|
print_grammar(op->args.pat);
|
||||||
|
fprintf(stderr, ")");
|
||||||
|
if (op->args.capture.name)
|
||||||
|
fprintf(stderr, " and call it %s", op->args.capture.name);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case VM_OTHERWISE: {
|
||||||
|
fprintf(stderr, "(");
|
||||||
|
print_grammar(op->args.multiple.first);
|
||||||
|
fprintf(stderr, ") or else ");
|
||||||
|
if (op->args.multiple.second->op != VM_OTHERWISE)
|
||||||
|
fprintf(stderr, "(");
|
||||||
|
print_grammar(op->args.multiple.second);
|
||||||
|
if (op->args.multiple.second->op != VM_OTHERWISE)
|
||||||
|
fprintf(stderr, ")");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case VM_CHAIN: {
|
||||||
|
fprintf(stderr, "(");
|
||||||
|
print_grammar(op->args.multiple.first);
|
||||||
|
fprintf(stderr, ") then ");
|
||||||
|
if (op->args.multiple.second->op != VM_CHAIN)
|
||||||
|
fprintf(stderr, "(");
|
||||||
|
print_grammar(op->args.multiple.second);
|
||||||
|
if (op->args.multiple.second->op != VM_CHAIN)
|
||||||
|
fprintf(stderr, ")");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case VM_REPLACE: {
|
||||||
|
fprintf(stderr, "replace ");
|
||||||
|
if (op->args.replace.replace_pat) {
|
||||||
|
fprintf(stderr, "(");
|
||||||
|
print_grammar(op->args.replace.replace_pat);
|
||||||
|
fprintf(stderr, ")");
|
||||||
|
} else
|
||||||
|
fprintf(stderr, "\"\"");
|
||||||
|
fprintf(stderr, " with \"%s\"", op->args.replace.replacement);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
default: break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
int main(int argc, char *argv[])
|
int main(int argc, char *argv[])
|
||||||
{
|
{
|
||||||
check(argc >= 2, "Usage: bpeg <pat> [<file>]");
|
check(argc >= 2, "Usage: bpeg <pat> [<file>]");
|
||||||
|
fprintf(stderr, "========== Compiling ===========\n\n\n\n");
|
||||||
load_defs();
|
load_defs();
|
||||||
|
|
||||||
const char *lang = argv[1];
|
const char *lang = argv[1];
|
||||||
vm_op_t *op = compile_bpeg(lang);
|
visualize_delay = 100000;
|
||||||
|
vm_op_t *op = compile_bpeg(lang, lang);
|
||||||
check(op, "Failed to compile_bpeg input");
|
check(op, "Failed to compile_bpeg input");
|
||||||
op = expand_choices(op);
|
op = expand_choices(lang, op);
|
||||||
|
|
||||||
const char *defs = op->end;
|
const char *defs = op->end;
|
||||||
while (matchchar(&defs, ';')) {
|
while (matchchar(&defs, ';')) {
|
||||||
|
fprintf(stderr, "\n");
|
||||||
defs = after_spaces(defs);
|
defs = after_spaces(defs);
|
||||||
const char *name = defs;
|
const char *name = defs;
|
||||||
check(isalpha(*name), "Definition must begin with a name");
|
check(isalpha(*name), "Definition must begin with a name");
|
||||||
@ -780,6 +923,10 @@ int main(int argc, char *argv[])
|
|||||||
defs = def->end;
|
defs = def->end;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fprintf(stderr, "\n\n");
|
||||||
|
print_grammar(op);
|
||||||
|
fprintf(stderr, "\n\n");
|
||||||
|
|
||||||
char *input;
|
char *input;
|
||||||
if (argc >= 3) {
|
if (argc >= 3) {
|
||||||
int fd = open(argv[2], O_RDONLY);
|
int fd = open(argv[2], O_RDONLY);
|
||||||
|
11
bpeg.h
11
bpeg.h
@ -78,19 +78,20 @@ typedef struct vm_op_s {
|
|||||||
static inline const char *after_spaces(const char *str);
|
static inline const char *after_spaces(const char *str);
|
||||||
static match_t *free_match(match_t *m);
|
static match_t *free_match(match_t *m);
|
||||||
static match_t *match(const char *str, vm_op_t *op);
|
static match_t *match(const char *str, vm_op_t *op);
|
||||||
static vm_op_t *compile_bpeg(const char *str);
|
static vm_op_t *compile_bpeg(const char *source, const char *str);
|
||||||
static vm_op_t *expand_chain(vm_op_t *first);
|
static vm_op_t *expand_chain(const char *source, vm_op_t *first);
|
||||||
static vm_op_t *expand_choices(vm_op_t *op);
|
static vm_op_t *expand_choices(const char *source, vm_op_t *op);
|
||||||
static void print_match(match_t *m, const char *color);
|
static void print_match(match_t *m, const char *color);
|
||||||
static void set_range(vm_op_t *op, ssize_t min, ssize_t max, vm_op_t *pat, vm_op_t *sep);
|
static void set_range(vm_op_t *op, ssize_t min, ssize_t max, vm_op_t *pat, vm_op_t *sep);
|
||||||
|
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
const char *name;
|
const char *name;
|
||||||
|
const char *source;
|
||||||
vm_op_t *op;
|
vm_op_t *op;
|
||||||
} def_t;
|
} def_t;
|
||||||
|
|
||||||
static def_t defs[1024] = {{NULL, NULL}};
|
static def_t defs[1024] = {{NULL, NULL, NULL}};
|
||||||
size_t ndefs = 0;
|
size_t ndefs = 0;
|
||||||
static int verbose = 1;
|
//static int verbose = 1;
|
||||||
|
|
||||||
|
15
utils.h
15
utils.h
@ -6,6 +6,8 @@
|
|||||||
#define check(cond, ...) do { if (!(cond)) { fprintf(stderr, __VA_ARGS__); fwrite("\n", 1, 1, stderr); _exit(1); } } while(0)
|
#define check(cond, ...) do { if (!(cond)) { fprintf(stderr, __VA_ARGS__); fwrite("\n", 1, 1, stderr); _exit(1); } } while(0)
|
||||||
#define debug(...) do { if (verbose) fprintf(stderr, __VA_ARGS__); } while(0)
|
#define debug(...) do { if (verbose) fprintf(stderr, __VA_ARGS__); } while(0)
|
||||||
|
|
||||||
|
static int visualize_delay = -1;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Helper function to skip past all spaces (and comments)
|
* Helper function to skip past all spaces (and comments)
|
||||||
* Returns a pointer to the first non-space character.
|
* Returns a pointer to the first non-space character.
|
||||||
@ -37,3 +39,16 @@ static inline int matchchar(const char **str, char c)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void visualize(const char *source, const char *ptr, const char *msg)
|
||||||
|
{
|
||||||
|
if (visualize_delay < 0) return;
|
||||||
|
fprintf(stderr, "\033[0;1m\r\033[2A\033[K%.*s\033[0;2m%s\033[0m\n",
|
||||||
|
(int)(ptr-source), source, ptr);
|
||||||
|
fprintf(stderr, "\033[0;1m");
|
||||||
|
for (--ptr ; ptr > source; --ptr) putc(' ', stderr);
|
||||||
|
fprintf(stderr, "^\033[K\n");
|
||||||
|
if (msg)
|
||||||
|
fprintf(stderr, "\033[K\033[33;1m%s\033[0m", msg);
|
||||||
|
usleep(visualize_delay);
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user