Starting to add better error messages

This commit is contained in:
Bruce Hill 2020-09-28 21:30:43 -07:00
parent 90b8db84a4
commit 1a8095c6d7
9 changed files with 117 additions and 59 deletions

37
bpeg.c
View File

@ -56,7 +56,7 @@ static int print_errors(file_t *f, match_t *m)
printf("\033[31;1m");
print_match(f, m);
printf("\033[0m\n");
fprint_line(stdout, f, m->start, m->end, "");
fprint_line(stdout, f, m->start, m->end, " ");
return 1;
}
if (m->child) ret += print_errors(f, m->child);
@ -113,9 +113,10 @@ int main(int argc, char *argv[])
} else if (streq(argv[i], "--ignore-case") || streq(argv[i], "-i")) {
flags |= BPEG_IGNORECASE;
} else if (FLAG("--replace") || FLAG("-r")) {
vm_op_t *p = bpeg_replacement(bpeg_pattern(NULL, "pattern"), flag);
file_t *replace_file = spoof_file("<replace argument>", flag);
vm_op_t *p = bpeg_replacement(bpeg_pattern(replace_file, "pattern"), flag);
check(p, "Replacement failed to compile");
add_def(g, NULL, flag, "replacement", p);
add_def(g, replace_file, flag, "replacement", p);
rule = "replace-all";
} else if (FLAG("--grammar") || FLAG("-g")) {
file_t *f = load_file(flag);
@ -135,36 +136,41 @@ int main(int argc, char *argv[])
check(eq, "Rule definitions must include an ':'\n\n%s", usage);
*eq = '\0';
char *src = ++eq;
vm_op_t *pat = bpeg_pattern(NULL, src);
file_t *def_file = spoof_file(def, flag);
vm_op_t *pat = bpeg_pattern(def_file, src);
check(pat, "Failed to compile pattern");
add_def(g, NULL, src, def, pat);
add_def(g, def_file, src, def, pat);
} else if (FLAG("--define-string") || FLAG("-D")) {
char *def = flag;
char *eq = strchr(def, ':');
check(eq, "Rule definitions must include an ':'\n\n%s", usage);
*eq = '\0';
char *src = ++eq;
vm_op_t *pat = bpeg_stringpattern(NULL, src);
file_t *def_file = spoof_file(def, flag);
vm_op_t *pat = bpeg_stringpattern(def_file, src);
check(pat, "Failed to compile pattern");
add_def(g, NULL, src, def, pat);
add_def(g, def_file, src, def, pat);
} else if (FLAG("--pattern") || FLAG("-p")) {
check(npatterns == 0, "Cannot define multiple patterns");
vm_op_t *p = bpeg_pattern(NULL, flag);
file_t *arg_file = spoof_file("<pattern argument>", flag);
vm_op_t *p = bpeg_pattern(arg_file, flag);
check(p, "Pattern failed to compile: '%s'", flag);
add_def(g, NULL, flag, "pattern", p);
add_def(g, arg_file, flag, "pattern", p);
++npatterns;
} else if (FLAG("--pattern-string") || FLAG("-P")) {
vm_op_t *p = bpeg_stringpattern(NULL, flag);
file_t *arg_file = spoof_file("<pattern argument>", flag);
vm_op_t *p = bpeg_stringpattern(arg_file, flag);
check(p, "Pattern failed to compile");
add_def(g, NULL, flag, "pattern", p);
add_def(g, arg_file, flag, "pattern", p);
++npatterns;
} else if (FLAG("--mode") || FLAG("-m")) {
rule = flag;
} else if (argv[i][0] != '-') {
if (npatterns > 0) break;
vm_op_t *p = bpeg_stringpattern(NULL, argv[i]);
file_t *arg_file = spoof_file("<pattern argument>", flag);
vm_op_t *p = bpeg_stringpattern(arg_file, argv[i]);
check(p, "Pattern failed to compile");
add_def(g, NULL, argv[i], "pattern", p);
add_def(g, arg_file, argv[i], "pattern", p);
++npatterns;
} else {
printf("Unrecognized flag: %s\n\n%s\n", argv[i], usage);
@ -173,9 +179,10 @@ int main(int argc, char *argv[])
}
if (isatty(STDOUT_FILENO)) {
vm_op_t *p = bpeg_pattern(NULL, "''");
file_t *is_tty_file = spoof_file("<is-tty>", flag);
vm_op_t *p = bpeg_pattern(is_tty_file, "''");
check(p, "Failed to compile is-tty");
add_def(g, NULL, "''", "is-tty", p);
add_def(g, is_tty_file, "''", "is-tty", p);
}
vm_op_t *pattern = lookup(g, rule);

View File

@ -5,6 +5,8 @@
#include "compiler.h"
#include "utils.h"
#define file_err(f, ...) do { fprint_line(stderr, f, __VA_ARGS__); _exit(1); } while(0)
static vm_op_t *expand_chain(file_t *f, vm_op_t *first);
static vm_op_t *expand_choices(file_t *f, vm_op_t *first);
static vm_op_t *chain_together(vm_op_t *first, vm_op_t *second);
@ -42,7 +44,9 @@ static vm_op_t *expand_chain(file_t *f, vm_op_t *first)
vm_op_t *second = bpeg_simplepattern(f, first->end);
if (second == NULL) return first;
second = expand_chain(f, second);
check(second->end > first->end, "No forward progress in chain!");
if (second->end <= first->end)
file_err(f, second->end, second->end,
"This chain is not parsing properly");
return chain_together(first, second);
}
@ -57,7 +61,8 @@ static vm_op_t *expand_choices(file_t *f, vm_op_t *first)
const char *str = first->end;
if (!matchchar(&str, '/')) return first;
vm_op_t *second = bpeg_simplepattern(f, str);
check(second, "Expected pattern after '/'");
if (!second)
file_err(f, str, str, "There should be a pattern here after a '/'");
second = expand_choices(f, second);
vm_op_t *choice = calloc(sizeof(vm_op_t), 1);
choice->op = VM_OTHERWISE;
@ -98,6 +103,7 @@ vm_op_t *bpeg_simplepattern(file_t *f, const char *str)
op->start = str;
op->len = -1;
char c = *str;
const char *origin = str;
++str;
switch (c) {
// Any char (dot) ($. is multiline anychar)
@ -126,12 +132,15 @@ vm_op_t *bpeg_simplepattern(file_t *f, const char *str)
case '`': {
char literal[2] = {*str, '\0'};
++str;
check(literal[0], "Expected character after '`'\n");
if (!literal[0] || literal[0] == '\n')
file_err(f, str, str, "There should be a character here after the '`'");
op->len = 1;
if (matchchar(&str, '-')) { // Range
char c2 = *str;
check(c2, "Expected character after '-'");
check(c2 >= literal[0], "Character range must be low-to-high");
if (!c2 || c2 == '\n')
file_err(f, str, str, "There should be a character here to complete the character range.");
if (c2 < literal[0])
file_err(f, origin, str+1, "Character ranges must be low-to-high, but this is high-to-low.");
op->op = VM_RANGE;
op->args.range.low = literal[0];
op->args.range.high = c2;
@ -145,13 +154,19 @@ vm_op_t *bpeg_simplepattern(file_t *f, const char *str)
// Escapes
case '\\': {
check(*str && *str != '\n', "Expected escape after '\\'");
if (!*str || *str == '\n')
file_err(f, str, str, "There should be an escape sequence here after this backslash.");
op->len = 1;
char e = unescapechar(str, &str);
unsigned char e = unescapechar(str, &str);
if (*str == '-') { // Escape range (e.g. \x00-\xFF)
++str;
char e2 = unescapechar(str, &str);
check(e2, "Expected character after '-'");
check(e2 >= e, "Character range must be low-to-high");
const char *seqstart = str;
unsigned char e2 = unescapechar(str, &str);
if (str == seqstart)
file_err(f, seqstart, str+1, "This value isn't a valid escape sequence");
if (e2 < e)
file_err(f, origin, str, "Escape ranges should be low-to-high, but this is high-to-low.");
printf("%d - %d\n", (int)e, (int)e2);
op->op = VM_RANGE;
op->args.range.low = e;
op->args.range.high = e2;
@ -228,9 +243,13 @@ vm_op_t *bpeg_simplepattern(file_t *f, const char *str)
// Lookbehind
case '<': {
vm_op_t *pat = bpeg_simplepattern(f, str);
check(pat, "Expected pattern after <");
if (!pat)
file_err(f, str, str, "There should be a pattern after this '<'");
str = pat->end;
check(pat->len != -1, "Lookbehind patterns must have a fixed length");
if (pat->len == -1)
file_err(f, origin, pat->end,
"Sorry, variable-length lookbehind patterns like this are not supported.\n"
"Please use a fixed-length lookbehind pattern instead.");
str = pat->end;
op->op = VM_AFTER;
op->len = 0;
@ -240,7 +259,8 @@ vm_op_t *bpeg_simplepattern(file_t *f, const char *str)
// Lookahead
case '>': {
vm_op_t *pat = bpeg_simplepattern(f, str);
check(pat, "Expected pattern after >");
if (!pat)
file_err(f, str, str, "There should be a pattern after this '>'");
str = pat->end;
op->op = VM_BEFORE;
op->len = 0;
@ -251,11 +271,13 @@ vm_op_t *bpeg_simplepattern(file_t *f, const char *str)
case '(': {
free(op);
op = bpeg_simplepattern(f, str);
check(op, "Expected pattern inside parentheses");
if (!op)
file_err(f, str, str, "Expected a pattern inside parentheses");
op = expand_choices(f, op);
str = op->end;
str = after_spaces(str);
check(matchchar(&str, ')'), "Expected closing ')' instead of \"%s\"", str);
if (!matchchar(&str, ')'))
file_err(f, origin, str, "This parenthesis is not closed");
break;
}
// Square brackets
@ -279,7 +301,8 @@ vm_op_t *bpeg_simplepattern(file_t *f, const char *str)
vm_op_t *sep = NULL;
if (matchchar(&str, '%')) {
sep = bpeg_simplepattern(f, str);
check(sep, "Expected pattern for separator after '%%'");
if (!sep)
file_err(f, str, str, "Expected pattern for separator after '%%'");
str = sep->end;
}
set_range(op, min, -1, pat, sep);
@ -387,7 +410,7 @@ vm_op_t *bpeg_simplepattern(file_t *f, const char *str)
// Postfix operators:
postfix:
if (f ? str >= f->end : !*str) return op;
if (str >= f->end) return op;
str = after_spaces(str);
if ((str[0] == '=' || str[0] == '!') && str[1] == '=') { // Equality <pat1>==<pat2> and inequality <pat1>!=<pat2>
int equal = str[0] == '=';
@ -431,7 +454,7 @@ vm_op_t *bpeg_stringpattern(file_t *f, const char *str)
if (*str == '\\') {
check(str[1], "Expected more string contents after backslash");
const char *after_escape;
char e = unescapechar(&str[1], &after_escape);
unsigned char e = unescapechar(&str[1], &after_escape);
if (e != str[1]) {
str = after_escape - 1;
continue;

View File

@ -9,13 +9,13 @@
#include "types.h"
#include "file_loader.h"
__attribute__((nonnull(2)))
__attribute__((nonnull(1,2)))
vm_op_t *bpeg_simplepattern(file_t *f, const char *str);
__attribute__((nonnull(2)))
__attribute__((nonnull(1,2)))
vm_op_t *bpeg_stringpattern(file_t *f, const char *str);
__attribute__((nonnull(1,2)))
vm_op_t *bpeg_replacement(vm_op_t *pat, const char *replacement);
__attribute__((nonnull(2)))
__attribute__((nonnull(1,2)))
vm_op_t *bpeg_pattern(file_t *f, const char *str);
#endif

View File

@ -5,6 +5,7 @@
#include <ctype.h>
#include <fcntl.h>
#include <limits.h>
#include <stdarg.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
@ -13,6 +14,23 @@
#include "file_loader.h"
static void populate_lines(file_t *f)
{
// Calculate line numbers:
size_t linecap = 10;
f->lines = calloc(sizeof(const char*), linecap);
f->nlines = 0;
char *p = f->contents;
for (size_t n = 0; p && p < f->end; ++n) {
++f->nlines;
if (n >= linecap)
f->lines = realloc(f->lines, sizeof(const char*)*(linecap *= 2));
f->lines[n] = p;
p = strchr(p, '\n');
if (p) ++p;
}
}
/*
* Read an entire file into memory.
*/
@ -23,7 +41,7 @@ file_t *load_file(const char *filename)
if (fd < 0) return NULL;
file_t *f = calloc(sizeof(file_t), 1);
f->filename = strdup(filename);
// TODO: use mmap when possible
struct stat sb;
if (fstat(fd, &sb) == -1)
goto skip_mmap;
@ -51,21 +69,22 @@ file_t *load_file(const char *filename)
finished_loading:
f->end = &f->contents[f->length];
// Calculate line numbers:
size_t linecap = 10;
f->lines = calloc(sizeof(const char*), linecap);
f->nlines = 0;
char *p = f->contents;
for (size_t n = 0; p && *p; ++n) {
++f->nlines;
if (n >= linecap)
f->lines = realloc(f->lines, sizeof(const char*)*(linecap *= 2));
f->lines[n] = p;
p = strchr(p, '\n');
if (p) ++p;
}
populate_lines(f);
return f;
}
/*
* Create a virtual file from a string.
*/
file_t *spoof_file(const char *filename, char *text)
{
if (filename == NULL) filename = "";
file_t *f = calloc(sizeof(file_t), 1);
f->filename = strdup(filename);
f->contents = text;
f->length = strlen(text);
f->end = &f->contents[f->length];
populate_lines(f);
return f;
}
@ -113,12 +132,19 @@ const char *get_line(file_t *f, size_t line_number)
return f->lines[line_number - 1];
}
void fprint_line(FILE *dest, file_t *f, const char *start, const char *end, const char *msg)
void fprint_line(FILE *dest, file_t *f, const char *start, const char *end, const char *fmt, ...)
{
size_t linenum = get_line_number(f, start);
const char *line = get_line(f, linenum);
size_t charnum = get_char_number(f, start);
fprintf(dest, "\033[1m%s:%ld:\033[0m %s\n", f->filename, linenum, msg);
fprintf(dest, "\033[1m%s:%ld:\033[0m ", f->filename, linenum);
va_list args;
va_start(args, fmt);
vfprintf(dest, fmt, args);
va_end(args);
fputc('\n', dest);
const char *eol = linenum == f->nlines ? strchr(line, '\0') : strchr(line, '\n');
if (end == NULL || end > eol) end = eol;
fprintf(dest, "\033[2m% 5ld |\033[0m %.*s\033[41;30m%.*s\033[0m%.*s\n",

View File

@ -14,6 +14,7 @@ typedef struct {
} file_t;
file_t *load_file(const char *filename);
file_t *spoof_file(const char *filename, char *text);
__attribute__((nonnull))
void destroy_file(file_t **f);
__attribute__((pure, nonnull))
@ -22,6 +23,7 @@ __attribute__((pure, nonnull))
size_t get_char_number(file_t *f, const char *p);
__attribute__((pure, nonnull))
const char *get_line(file_t *f, size_t line_number);
void fprint_line(FILE *dest, file_t *f, const char *start, const char *end, const char *msg);
__attribute__((format (printf, 5, 6)))
void fprint_line(FILE *dest, file_t *f, const char *start, const char *end, const char *fmt, ...);
#endif

View File

@ -48,7 +48,7 @@ typedef struct vm_op_s {
union {
const char *s;
struct {
char low, high;
unsigned char low, high;
} range;
struct {
ssize_t min, max;

View File

@ -74,17 +74,17 @@ int matchchar(const char **str, char c)
* character that was escaped.
* Set *end = the first character past the end of the escape sequence.
*/
char unescapechar(const char *escaped, const char **end)
unsigned char unescapechar(const char *escaped, const char **end)
{
size_t len = 1;
char ret = *escaped;
unsigned char ret = *escaped;
switch (*escaped) {
case 'a': ret = '\a'; break; case 'b': ret = '\b'; break;
case 'n': ret = '\n'; break; case 'r': ret = '\r'; break;
case 't': ret = '\t'; break; case 'v': ret = '\v'; break;
case 'e': ret = '\033'; break;
case 'x': { // Hex
static const char hextable[255] = {
static const unsigned char hextable[255] = {
['0']=0x10, ['1']=0x1, ['2']=0x2, ['3']=0x3, ['4']=0x4,
['5']=0x5, ['6']=0x6, ['7']=0x7, ['8']=0x8, ['9']=0x9,
['a']=0xa, ['b']=0xb, ['c']=0xc, ['d']=0xd, ['e']=0xe, ['f']=0xf,

View File

@ -18,7 +18,7 @@
#define debug(...) do { if (verbose) fprintf(stderr, __VA_ARGS__); } while(0)
__attribute__((nonnull))
char unescapechar(const char *escaped, const char **end);
unsigned char unescapechar(const char *escaped, const char **end);
__attribute__((pure, nonnull, returns_nonnull))
const char *after_name(const char *str);
__attribute__((pure, nonnull, returns_nonnull))

2
vm.c
View File

@ -119,7 +119,7 @@ static match_t *_match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, un
return m;
}
case VM_RANGE: {
if (*str < op->args.range.low || *str > op->args.range.high)
if ((unsigned char)*str < op->args.range.low || (unsigned char)*str > op->args.range.high)
return NULL;
match_t *m = calloc(sizeof(match_t), 1);
m->op = op;