From 1a8095c6d774e3448f6afa1aeec2bc79c5bb545e Mon Sep 17 00:00:00 2001 From: Bruce Hill Date: Mon, 28 Sep 2020 21:30:43 -0700 Subject: [PATCH] Starting to add better error messages --- bpeg.c | 37 ++++++++++++++++++------------- compiler.c | 57 +++++++++++++++++++++++++++++++++--------------- compiler.h | 6 +++--- file_loader.c | 60 ++++++++++++++++++++++++++++++++++++--------------- file_loader.h | 4 +++- types.h | 2 +- utils.c | 6 +++--- utils.h | 2 +- vm.c | 2 +- 9 files changed, 117 insertions(+), 59 deletions(-) diff --git a/bpeg.c b/bpeg.c index 1c934ab..071f5d2 100644 --- a/bpeg.c +++ b/bpeg.c @@ -56,7 +56,7 @@ static int print_errors(file_t *f, match_t *m) printf("\033[31;1m"); print_match(f, m); printf("\033[0m\n"); - fprint_line(stdout, f, m->start, m->end, ""); + fprint_line(stdout, f, m->start, m->end, " "); return 1; } if (m->child) ret += print_errors(f, m->child); @@ -113,9 +113,10 @@ int main(int argc, char *argv[]) } else if (streq(argv[i], "--ignore-case") || streq(argv[i], "-i")) { flags |= BPEG_IGNORECASE; } else if (FLAG("--replace") || FLAG("-r")) { - vm_op_t *p = bpeg_replacement(bpeg_pattern(NULL, "pattern"), flag); + file_t *replace_file = spoof_file("", flag); + vm_op_t *p = bpeg_replacement(bpeg_pattern(replace_file, "pattern"), flag); check(p, "Replacement failed to compile"); - add_def(g, NULL, flag, "replacement", p); + add_def(g, replace_file, flag, "replacement", p); rule = "replace-all"; } else if (FLAG("--grammar") || FLAG("-g")) { file_t *f = load_file(flag); @@ -135,36 +136,41 @@ int main(int argc, char *argv[]) check(eq, "Rule definitions must include an ':'\n\n%s", usage); *eq = '\0'; char *src = ++eq; - vm_op_t *pat = bpeg_pattern(NULL, src); + file_t *def_file = spoof_file(def, flag); + vm_op_t *pat = bpeg_pattern(def_file, src); check(pat, "Failed to compile pattern"); - add_def(g, NULL, src, def, pat); + add_def(g, def_file, src, def, pat); } else if (FLAG("--define-string") || FLAG("-D")) { char *def = flag; char *eq = strchr(def, ':'); check(eq, "Rule definitions must include an ':'\n\n%s", usage); *eq = '\0'; char *src = ++eq; - vm_op_t *pat = bpeg_stringpattern(NULL, src); + file_t *def_file = spoof_file(def, flag); + vm_op_t *pat = bpeg_stringpattern(def_file, src); check(pat, "Failed to compile pattern"); - add_def(g, NULL, src, def, pat); + add_def(g, def_file, src, def, pat); } else if (FLAG("--pattern") || FLAG("-p")) { check(npatterns == 0, "Cannot define multiple patterns"); - vm_op_t *p = bpeg_pattern(NULL, flag); + file_t *arg_file = spoof_file("", flag); + vm_op_t *p = bpeg_pattern(arg_file, flag); check(p, "Pattern failed to compile: '%s'", flag); - add_def(g, NULL, flag, "pattern", p); + add_def(g, arg_file, flag, "pattern", p); ++npatterns; } else if (FLAG("--pattern-string") || FLAG("-P")) { - vm_op_t *p = bpeg_stringpattern(NULL, flag); + file_t *arg_file = spoof_file("", flag); + vm_op_t *p = bpeg_stringpattern(arg_file, flag); check(p, "Pattern failed to compile"); - add_def(g, NULL, flag, "pattern", p); + add_def(g, arg_file, flag, "pattern", p); ++npatterns; } else if (FLAG("--mode") || FLAG("-m")) { rule = flag; } else if (argv[i][0] != '-') { if (npatterns > 0) break; - vm_op_t *p = bpeg_stringpattern(NULL, argv[i]); + file_t *arg_file = spoof_file("", flag); + vm_op_t *p = bpeg_stringpattern(arg_file, argv[i]); check(p, "Pattern failed to compile"); - add_def(g, NULL, argv[i], "pattern", p); + add_def(g, arg_file, argv[i], "pattern", p); ++npatterns; } else { printf("Unrecognized flag: %s\n\n%s\n", argv[i], usage); @@ -173,9 +179,10 @@ int main(int argc, char *argv[]) } if (isatty(STDOUT_FILENO)) { - vm_op_t *p = bpeg_pattern(NULL, "''"); + file_t *is_tty_file = spoof_file("", flag); + vm_op_t *p = bpeg_pattern(is_tty_file, "''"); check(p, "Failed to compile is-tty"); - add_def(g, NULL, "''", "is-tty", p); + add_def(g, is_tty_file, "''", "is-tty", p); } vm_op_t *pattern = lookup(g, rule); diff --git a/compiler.c b/compiler.c index b7a2458..b6f5e6f 100644 --- a/compiler.c +++ b/compiler.c @@ -5,6 +5,8 @@ #include "compiler.h" #include "utils.h" +#define file_err(f, ...) do { fprint_line(stderr, f, __VA_ARGS__); _exit(1); } while(0) + static vm_op_t *expand_chain(file_t *f, vm_op_t *first); static vm_op_t *expand_choices(file_t *f, vm_op_t *first); static vm_op_t *chain_together(vm_op_t *first, vm_op_t *second); @@ -42,7 +44,9 @@ static vm_op_t *expand_chain(file_t *f, vm_op_t *first) vm_op_t *second = bpeg_simplepattern(f, first->end); if (second == NULL) return first; second = expand_chain(f, second); - check(second->end > first->end, "No forward progress in chain!"); + if (second->end <= first->end) + file_err(f, second->end, second->end, + "This chain is not parsing properly"); return chain_together(first, second); } @@ -57,7 +61,8 @@ static vm_op_t *expand_choices(file_t *f, vm_op_t *first) const char *str = first->end; if (!matchchar(&str, '/')) return first; vm_op_t *second = bpeg_simplepattern(f, str); - check(second, "Expected pattern after '/'"); + if (!second) + file_err(f, str, str, "There should be a pattern here after a '/'"); second = expand_choices(f, second); vm_op_t *choice = calloc(sizeof(vm_op_t), 1); choice->op = VM_OTHERWISE; @@ -98,6 +103,7 @@ vm_op_t *bpeg_simplepattern(file_t *f, const char *str) op->start = str; op->len = -1; char c = *str; + const char *origin = str; ++str; switch (c) { // Any char (dot) ($. is multiline anychar) @@ -126,12 +132,15 @@ vm_op_t *bpeg_simplepattern(file_t *f, const char *str) case '`': { char literal[2] = {*str, '\0'}; ++str; - check(literal[0], "Expected character after '`'\n"); + if (!literal[0] || literal[0] == '\n') + file_err(f, str, str, "There should be a character here after the '`'"); op->len = 1; if (matchchar(&str, '-')) { // Range char c2 = *str; - check(c2, "Expected character after '-'"); - check(c2 >= literal[0], "Character range must be low-to-high"); + if (!c2 || c2 == '\n') + file_err(f, str, str, "There should be a character here to complete the character range."); + if (c2 < literal[0]) + file_err(f, origin, str+1, "Character ranges must be low-to-high, but this is high-to-low."); op->op = VM_RANGE; op->args.range.low = literal[0]; op->args.range.high = c2; @@ -145,13 +154,19 @@ vm_op_t *bpeg_simplepattern(file_t *f, const char *str) // Escapes case '\\': { check(*str && *str != '\n', "Expected escape after '\\'"); + if (!*str || *str == '\n') + file_err(f, str, str, "There should be an escape sequence here after this backslash."); op->len = 1; - char e = unescapechar(str, &str); + unsigned char e = unescapechar(str, &str); if (*str == '-') { // Escape range (e.g. \x00-\xFF) ++str; - char e2 = unescapechar(str, &str); - check(e2, "Expected character after '-'"); - check(e2 >= e, "Character range must be low-to-high"); + const char *seqstart = str; + unsigned char e2 = unescapechar(str, &str); + if (str == seqstart) + file_err(f, seqstart, str+1, "This value isn't a valid escape sequence"); + if (e2 < e) + file_err(f, origin, str, "Escape ranges should be low-to-high, but this is high-to-low."); + printf("%d - %d\n", (int)e, (int)e2); op->op = VM_RANGE; op->args.range.low = e; op->args.range.high = e2; @@ -228,9 +243,13 @@ vm_op_t *bpeg_simplepattern(file_t *f, const char *str) // Lookbehind case '<': { vm_op_t *pat = bpeg_simplepattern(f, str); - check(pat, "Expected pattern after <"); + if (!pat) + file_err(f, str, str, "There should be a pattern after this '<'"); str = pat->end; - check(pat->len != -1, "Lookbehind patterns must have a fixed length"); + if (pat->len == -1) + file_err(f, origin, pat->end, + "Sorry, variable-length lookbehind patterns like this are not supported.\n" + "Please use a fixed-length lookbehind pattern instead."); str = pat->end; op->op = VM_AFTER; op->len = 0; @@ -240,7 +259,8 @@ vm_op_t *bpeg_simplepattern(file_t *f, const char *str) // Lookahead case '>': { vm_op_t *pat = bpeg_simplepattern(f, str); - check(pat, "Expected pattern after >"); + if (!pat) + file_err(f, str, str, "There should be a pattern after this '>'"); str = pat->end; op->op = VM_BEFORE; op->len = 0; @@ -251,11 +271,13 @@ vm_op_t *bpeg_simplepattern(file_t *f, const char *str) case '(': { free(op); op = bpeg_simplepattern(f, str); - check(op, "Expected pattern inside parentheses"); + if (!op) + file_err(f, str, str, "Expected a pattern inside parentheses"); op = expand_choices(f, op); str = op->end; str = after_spaces(str); - check(matchchar(&str, ')'), "Expected closing ')' instead of \"%s\"", str); + if (!matchchar(&str, ')')) + file_err(f, origin, str, "This parenthesis is not closed"); break; } // Square brackets @@ -279,7 +301,8 @@ vm_op_t *bpeg_simplepattern(file_t *f, const char *str) vm_op_t *sep = NULL; if (matchchar(&str, '%')) { sep = bpeg_simplepattern(f, str); - check(sep, "Expected pattern for separator after '%%'"); + if (!sep) + file_err(f, str, str, "Expected pattern for separator after '%%'"); str = sep->end; } set_range(op, min, -1, pat, sep); @@ -387,7 +410,7 @@ vm_op_t *bpeg_simplepattern(file_t *f, const char *str) // Postfix operators: postfix: - if (f ? str >= f->end : !*str) return op; + if (str >= f->end) return op; str = after_spaces(str); if ((str[0] == '=' || str[0] == '!') && str[1] == '=') { // Equality == and inequality != int equal = str[0] == '='; @@ -431,7 +454,7 @@ vm_op_t *bpeg_stringpattern(file_t *f, const char *str) if (*str == '\\') { check(str[1], "Expected more string contents after backslash"); const char *after_escape; - char e = unescapechar(&str[1], &after_escape); + unsigned char e = unescapechar(&str[1], &after_escape); if (e != str[1]) { str = after_escape - 1; continue; diff --git a/compiler.h b/compiler.h index d5b4ff1..89693af 100644 --- a/compiler.h +++ b/compiler.h @@ -9,13 +9,13 @@ #include "types.h" #include "file_loader.h" -__attribute__((nonnull(2))) +__attribute__((nonnull(1,2))) vm_op_t *bpeg_simplepattern(file_t *f, const char *str); -__attribute__((nonnull(2))) +__attribute__((nonnull(1,2))) vm_op_t *bpeg_stringpattern(file_t *f, const char *str); __attribute__((nonnull(1,2))) vm_op_t *bpeg_replacement(vm_op_t *pat, const char *replacement); -__attribute__((nonnull(2))) +__attribute__((nonnull(1,2))) vm_op_t *bpeg_pattern(file_t *f, const char *str); #endif diff --git a/file_loader.c b/file_loader.c index 82f594b..20ed253 100644 --- a/file_loader.c +++ b/file_loader.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -13,6 +14,23 @@ #include "file_loader.h" +static void populate_lines(file_t *f) +{ + // Calculate line numbers: + size_t linecap = 10; + f->lines = calloc(sizeof(const char*), linecap); + f->nlines = 0; + char *p = f->contents; + for (size_t n = 0; p && p < f->end; ++n) { + ++f->nlines; + if (n >= linecap) + f->lines = realloc(f->lines, sizeof(const char*)*(linecap *= 2)); + f->lines[n] = p; + p = strchr(p, '\n'); + if (p) ++p; + } +} + /* * Read an entire file into memory. */ @@ -23,7 +41,7 @@ file_t *load_file(const char *filename) if (fd < 0) return NULL; file_t *f = calloc(sizeof(file_t), 1); f->filename = strdup(filename); - // TODO: use mmap when possible + struct stat sb; if (fstat(fd, &sb) == -1) goto skip_mmap; @@ -51,21 +69,22 @@ file_t *load_file(const char *filename) finished_loading: f->end = &f->contents[f->length]; - - // Calculate line numbers: - size_t linecap = 10; - f->lines = calloc(sizeof(const char*), linecap); - f->nlines = 0; - char *p = f->contents; - for (size_t n = 0; p && *p; ++n) { - ++f->nlines; - if (n >= linecap) - f->lines = realloc(f->lines, sizeof(const char*)*(linecap *= 2)); - f->lines[n] = p; - p = strchr(p, '\n'); - if (p) ++p; - } + populate_lines(f); + return f; +} +/* + * Create a virtual file from a string. + */ +file_t *spoof_file(const char *filename, char *text) +{ + if (filename == NULL) filename = ""; + file_t *f = calloc(sizeof(file_t), 1); + f->filename = strdup(filename); + f->contents = text; + f->length = strlen(text); + f->end = &f->contents[f->length]; + populate_lines(f); return f; } @@ -113,12 +132,19 @@ const char *get_line(file_t *f, size_t line_number) return f->lines[line_number - 1]; } -void fprint_line(FILE *dest, file_t *f, const char *start, const char *end, const char *msg) +void fprint_line(FILE *dest, file_t *f, const char *start, const char *end, const char *fmt, ...) { size_t linenum = get_line_number(f, start); const char *line = get_line(f, linenum); size_t charnum = get_char_number(f, start); - fprintf(dest, "\033[1m%s:%ld:\033[0m %s\n", f->filename, linenum, msg); + fprintf(dest, "\033[1m%s:%ld:\033[0m ", f->filename, linenum); + + va_list args; + va_start(args, fmt); + vfprintf(dest, fmt, args); + va_end(args); + fputc('\n', dest); + const char *eol = linenum == f->nlines ? strchr(line, '\0') : strchr(line, '\n'); if (end == NULL || end > eol) end = eol; fprintf(dest, "\033[2m% 5ld |\033[0m %.*s\033[41;30m%.*s\033[0m%.*s\n", diff --git a/file_loader.h b/file_loader.h index 1634f15..cda8dc3 100644 --- a/file_loader.h +++ b/file_loader.h @@ -14,6 +14,7 @@ typedef struct { } file_t; file_t *load_file(const char *filename); +file_t *spoof_file(const char *filename, char *text); __attribute__((nonnull)) void destroy_file(file_t **f); __attribute__((pure, nonnull)) @@ -22,6 +23,7 @@ __attribute__((pure, nonnull)) size_t get_char_number(file_t *f, const char *p); __attribute__((pure, nonnull)) const char *get_line(file_t *f, size_t line_number); -void fprint_line(FILE *dest, file_t *f, const char *start, const char *end, const char *msg); +__attribute__((format (printf, 5, 6))) +void fprint_line(FILE *dest, file_t *f, const char *start, const char *end, const char *fmt, ...); #endif diff --git a/types.h b/types.h index 21ed0e7..da1ce40 100644 --- a/types.h +++ b/types.h @@ -48,7 +48,7 @@ typedef struct vm_op_s { union { const char *s; struct { - char low, high; + unsigned char low, high; } range; struct { ssize_t min, max; diff --git a/utils.c b/utils.c index 012c1eb..0c0fa31 100644 --- a/utils.c +++ b/utils.c @@ -74,17 +74,17 @@ int matchchar(const char **str, char c) * character that was escaped. * Set *end = the first character past the end of the escape sequence. */ -char unescapechar(const char *escaped, const char **end) +unsigned char unescapechar(const char *escaped, const char **end) { size_t len = 1; - char ret = *escaped; + unsigned char ret = *escaped; switch (*escaped) { case 'a': ret = '\a'; break; case 'b': ret = '\b'; break; case 'n': ret = '\n'; break; case 'r': ret = '\r'; break; case 't': ret = '\t'; break; case 'v': ret = '\v'; break; case 'e': ret = '\033'; break; case 'x': { // Hex - static const char hextable[255] = { + static const unsigned char hextable[255] = { ['0']=0x10, ['1']=0x1, ['2']=0x2, ['3']=0x3, ['4']=0x4, ['5']=0x5, ['6']=0x6, ['7']=0x7, ['8']=0x8, ['9']=0x9, ['a']=0xa, ['b']=0xb, ['c']=0xc, ['d']=0xd, ['e']=0xe, ['f']=0xf, diff --git a/utils.h b/utils.h index 84456a1..d9636ce 100644 --- a/utils.h +++ b/utils.h @@ -18,7 +18,7 @@ #define debug(...) do { if (verbose) fprintf(stderr, __VA_ARGS__); } while(0) __attribute__((nonnull)) -char unescapechar(const char *escaped, const char **end); +unsigned char unescapechar(const char *escaped, const char **end); __attribute__((pure, nonnull, returns_nonnull)) const char *after_name(const char *str); __attribute__((pure, nonnull, returns_nonnull)) diff --git a/vm.c b/vm.c index 158d58f..ac0a52a 100644 --- a/vm.c +++ b/vm.c @@ -119,7 +119,7 @@ static match_t *_match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, un return m; } case VM_RANGE: { - if (*str < op->args.range.low || *str > op->args.range.high) + if ((unsigned char)*str < op->args.range.low || (unsigned char)*str > op->args.range.high) return NULL; match_t *m = calloc(sizeof(match_t), 1); m->op = op;