Starting to add better error messages
This commit is contained in:
parent
90b8db84a4
commit
1a8095c6d7
37
bpeg.c
37
bpeg.c
@ -56,7 +56,7 @@ static int print_errors(file_t *f, match_t *m)
|
||||
printf("\033[31;1m");
|
||||
print_match(f, m);
|
||||
printf("\033[0m\n");
|
||||
fprint_line(stdout, f, m->start, m->end, "");
|
||||
fprint_line(stdout, f, m->start, m->end, " ");
|
||||
return 1;
|
||||
}
|
||||
if (m->child) ret += print_errors(f, m->child);
|
||||
@ -113,9 +113,10 @@ int main(int argc, char *argv[])
|
||||
} else if (streq(argv[i], "--ignore-case") || streq(argv[i], "-i")) {
|
||||
flags |= BPEG_IGNORECASE;
|
||||
} else if (FLAG("--replace") || FLAG("-r")) {
|
||||
vm_op_t *p = bpeg_replacement(bpeg_pattern(NULL, "pattern"), flag);
|
||||
file_t *replace_file = spoof_file("<replace argument>", flag);
|
||||
vm_op_t *p = bpeg_replacement(bpeg_pattern(replace_file, "pattern"), flag);
|
||||
check(p, "Replacement failed to compile");
|
||||
add_def(g, NULL, flag, "replacement", p);
|
||||
add_def(g, replace_file, flag, "replacement", p);
|
||||
rule = "replace-all";
|
||||
} else if (FLAG("--grammar") || FLAG("-g")) {
|
||||
file_t *f = load_file(flag);
|
||||
@ -135,36 +136,41 @@ int main(int argc, char *argv[])
|
||||
check(eq, "Rule definitions must include an ':'\n\n%s", usage);
|
||||
*eq = '\0';
|
||||
char *src = ++eq;
|
||||
vm_op_t *pat = bpeg_pattern(NULL, src);
|
||||
file_t *def_file = spoof_file(def, flag);
|
||||
vm_op_t *pat = bpeg_pattern(def_file, src);
|
||||
check(pat, "Failed to compile pattern");
|
||||
add_def(g, NULL, src, def, pat);
|
||||
add_def(g, def_file, src, def, pat);
|
||||
} else if (FLAG("--define-string") || FLAG("-D")) {
|
||||
char *def = flag;
|
||||
char *eq = strchr(def, ':');
|
||||
check(eq, "Rule definitions must include an ':'\n\n%s", usage);
|
||||
*eq = '\0';
|
||||
char *src = ++eq;
|
||||
vm_op_t *pat = bpeg_stringpattern(NULL, src);
|
||||
file_t *def_file = spoof_file(def, flag);
|
||||
vm_op_t *pat = bpeg_stringpattern(def_file, src);
|
||||
check(pat, "Failed to compile pattern");
|
||||
add_def(g, NULL, src, def, pat);
|
||||
add_def(g, def_file, src, def, pat);
|
||||
} else if (FLAG("--pattern") || FLAG("-p")) {
|
||||
check(npatterns == 0, "Cannot define multiple patterns");
|
||||
vm_op_t *p = bpeg_pattern(NULL, flag);
|
||||
file_t *arg_file = spoof_file("<pattern argument>", flag);
|
||||
vm_op_t *p = bpeg_pattern(arg_file, flag);
|
||||
check(p, "Pattern failed to compile: '%s'", flag);
|
||||
add_def(g, NULL, flag, "pattern", p);
|
||||
add_def(g, arg_file, flag, "pattern", p);
|
||||
++npatterns;
|
||||
} else if (FLAG("--pattern-string") || FLAG("-P")) {
|
||||
vm_op_t *p = bpeg_stringpattern(NULL, flag);
|
||||
file_t *arg_file = spoof_file("<pattern argument>", flag);
|
||||
vm_op_t *p = bpeg_stringpattern(arg_file, flag);
|
||||
check(p, "Pattern failed to compile");
|
||||
add_def(g, NULL, flag, "pattern", p);
|
||||
add_def(g, arg_file, flag, "pattern", p);
|
||||
++npatterns;
|
||||
} else if (FLAG("--mode") || FLAG("-m")) {
|
||||
rule = flag;
|
||||
} else if (argv[i][0] != '-') {
|
||||
if (npatterns > 0) break;
|
||||
vm_op_t *p = bpeg_stringpattern(NULL, argv[i]);
|
||||
file_t *arg_file = spoof_file("<pattern argument>", flag);
|
||||
vm_op_t *p = bpeg_stringpattern(arg_file, argv[i]);
|
||||
check(p, "Pattern failed to compile");
|
||||
add_def(g, NULL, argv[i], "pattern", p);
|
||||
add_def(g, arg_file, argv[i], "pattern", p);
|
||||
++npatterns;
|
||||
} else {
|
||||
printf("Unrecognized flag: %s\n\n%s\n", argv[i], usage);
|
||||
@ -173,9 +179,10 @@ int main(int argc, char *argv[])
|
||||
}
|
||||
|
||||
if (isatty(STDOUT_FILENO)) {
|
||||
vm_op_t *p = bpeg_pattern(NULL, "''");
|
||||
file_t *is_tty_file = spoof_file("<is-tty>", flag);
|
||||
vm_op_t *p = bpeg_pattern(is_tty_file, "''");
|
||||
check(p, "Failed to compile is-tty");
|
||||
add_def(g, NULL, "''", "is-tty", p);
|
||||
add_def(g, is_tty_file, "''", "is-tty", p);
|
||||
}
|
||||
|
||||
vm_op_t *pattern = lookup(g, rule);
|
||||
|
57
compiler.c
57
compiler.c
@ -5,6 +5,8 @@
|
||||
#include "compiler.h"
|
||||
#include "utils.h"
|
||||
|
||||
#define file_err(f, ...) do { fprint_line(stderr, f, __VA_ARGS__); _exit(1); } while(0)
|
||||
|
||||
static vm_op_t *expand_chain(file_t *f, vm_op_t *first);
|
||||
static vm_op_t *expand_choices(file_t *f, vm_op_t *first);
|
||||
static vm_op_t *chain_together(vm_op_t *first, vm_op_t *second);
|
||||
@ -42,7 +44,9 @@ static vm_op_t *expand_chain(file_t *f, vm_op_t *first)
|
||||
vm_op_t *second = bpeg_simplepattern(f, first->end);
|
||||
if (second == NULL) return first;
|
||||
second = expand_chain(f, second);
|
||||
check(second->end > first->end, "No forward progress in chain!");
|
||||
if (second->end <= first->end)
|
||||
file_err(f, second->end, second->end,
|
||||
"This chain is not parsing properly");
|
||||
return chain_together(first, second);
|
||||
}
|
||||
|
||||
@ -57,7 +61,8 @@ static vm_op_t *expand_choices(file_t *f, vm_op_t *first)
|
||||
const char *str = first->end;
|
||||
if (!matchchar(&str, '/')) return first;
|
||||
vm_op_t *second = bpeg_simplepattern(f, str);
|
||||
check(second, "Expected pattern after '/'");
|
||||
if (!second)
|
||||
file_err(f, str, str, "There should be a pattern here after a '/'");
|
||||
second = expand_choices(f, second);
|
||||
vm_op_t *choice = calloc(sizeof(vm_op_t), 1);
|
||||
choice->op = VM_OTHERWISE;
|
||||
@ -98,6 +103,7 @@ vm_op_t *bpeg_simplepattern(file_t *f, const char *str)
|
||||
op->start = str;
|
||||
op->len = -1;
|
||||
char c = *str;
|
||||
const char *origin = str;
|
||||
++str;
|
||||
switch (c) {
|
||||
// Any char (dot) ($. is multiline anychar)
|
||||
@ -126,12 +132,15 @@ vm_op_t *bpeg_simplepattern(file_t *f, const char *str)
|
||||
case '`': {
|
||||
char literal[2] = {*str, '\0'};
|
||||
++str;
|
||||
check(literal[0], "Expected character after '`'\n");
|
||||
if (!literal[0] || literal[0] == '\n')
|
||||
file_err(f, str, str, "There should be a character here after the '`'");
|
||||
op->len = 1;
|
||||
if (matchchar(&str, '-')) { // Range
|
||||
char c2 = *str;
|
||||
check(c2, "Expected character after '-'");
|
||||
check(c2 >= literal[0], "Character range must be low-to-high");
|
||||
if (!c2 || c2 == '\n')
|
||||
file_err(f, str, str, "There should be a character here to complete the character range.");
|
||||
if (c2 < literal[0])
|
||||
file_err(f, origin, str+1, "Character ranges must be low-to-high, but this is high-to-low.");
|
||||
op->op = VM_RANGE;
|
||||
op->args.range.low = literal[0];
|
||||
op->args.range.high = c2;
|
||||
@ -145,13 +154,19 @@ vm_op_t *bpeg_simplepattern(file_t *f, const char *str)
|
||||
// Escapes
|
||||
case '\\': {
|
||||
check(*str && *str != '\n', "Expected escape after '\\'");
|
||||
if (!*str || *str == '\n')
|
||||
file_err(f, str, str, "There should be an escape sequence here after this backslash.");
|
||||
op->len = 1;
|
||||
char e = unescapechar(str, &str);
|
||||
unsigned char e = unescapechar(str, &str);
|
||||
if (*str == '-') { // Escape range (e.g. \x00-\xFF)
|
||||
++str;
|
||||
char e2 = unescapechar(str, &str);
|
||||
check(e2, "Expected character after '-'");
|
||||
check(e2 >= e, "Character range must be low-to-high");
|
||||
const char *seqstart = str;
|
||||
unsigned char e2 = unescapechar(str, &str);
|
||||
if (str == seqstart)
|
||||
file_err(f, seqstart, str+1, "This value isn't a valid escape sequence");
|
||||
if (e2 < e)
|
||||
file_err(f, origin, str, "Escape ranges should be low-to-high, but this is high-to-low.");
|
||||
printf("%d - %d\n", (int)e, (int)e2);
|
||||
op->op = VM_RANGE;
|
||||
op->args.range.low = e;
|
||||
op->args.range.high = e2;
|
||||
@ -228,9 +243,13 @@ vm_op_t *bpeg_simplepattern(file_t *f, const char *str)
|
||||
// Lookbehind
|
||||
case '<': {
|
||||
vm_op_t *pat = bpeg_simplepattern(f, str);
|
||||
check(pat, "Expected pattern after <");
|
||||
if (!pat)
|
||||
file_err(f, str, str, "There should be a pattern after this '<'");
|
||||
str = pat->end;
|
||||
check(pat->len != -1, "Lookbehind patterns must have a fixed length");
|
||||
if (pat->len == -1)
|
||||
file_err(f, origin, pat->end,
|
||||
"Sorry, variable-length lookbehind patterns like this are not supported.\n"
|
||||
"Please use a fixed-length lookbehind pattern instead.");
|
||||
str = pat->end;
|
||||
op->op = VM_AFTER;
|
||||
op->len = 0;
|
||||
@ -240,7 +259,8 @@ vm_op_t *bpeg_simplepattern(file_t *f, const char *str)
|
||||
// Lookahead
|
||||
case '>': {
|
||||
vm_op_t *pat = bpeg_simplepattern(f, str);
|
||||
check(pat, "Expected pattern after >");
|
||||
if (!pat)
|
||||
file_err(f, str, str, "There should be a pattern after this '>'");
|
||||
str = pat->end;
|
||||
op->op = VM_BEFORE;
|
||||
op->len = 0;
|
||||
@ -251,11 +271,13 @@ vm_op_t *bpeg_simplepattern(file_t *f, const char *str)
|
||||
case '(': {
|
||||
free(op);
|
||||
op = bpeg_simplepattern(f, str);
|
||||
check(op, "Expected pattern inside parentheses");
|
||||
if (!op)
|
||||
file_err(f, str, str, "Expected a pattern inside parentheses");
|
||||
op = expand_choices(f, op);
|
||||
str = op->end;
|
||||
str = after_spaces(str);
|
||||
check(matchchar(&str, ')'), "Expected closing ')' instead of \"%s\"", str);
|
||||
if (!matchchar(&str, ')'))
|
||||
file_err(f, origin, str, "This parenthesis is not closed");
|
||||
break;
|
||||
}
|
||||
// Square brackets
|
||||
@ -279,7 +301,8 @@ vm_op_t *bpeg_simplepattern(file_t *f, const char *str)
|
||||
vm_op_t *sep = NULL;
|
||||
if (matchchar(&str, '%')) {
|
||||
sep = bpeg_simplepattern(f, str);
|
||||
check(sep, "Expected pattern for separator after '%%'");
|
||||
if (!sep)
|
||||
file_err(f, str, str, "Expected pattern for separator after '%%'");
|
||||
str = sep->end;
|
||||
}
|
||||
set_range(op, min, -1, pat, sep);
|
||||
@ -387,7 +410,7 @@ vm_op_t *bpeg_simplepattern(file_t *f, const char *str)
|
||||
|
||||
// Postfix operators:
|
||||
postfix:
|
||||
if (f ? str >= f->end : !*str) return op;
|
||||
if (str >= f->end) return op;
|
||||
str = after_spaces(str);
|
||||
if ((str[0] == '=' || str[0] == '!') && str[1] == '=') { // Equality <pat1>==<pat2> and inequality <pat1>!=<pat2>
|
||||
int equal = str[0] == '=';
|
||||
@ -431,7 +454,7 @@ vm_op_t *bpeg_stringpattern(file_t *f, const char *str)
|
||||
if (*str == '\\') {
|
||||
check(str[1], "Expected more string contents after backslash");
|
||||
const char *after_escape;
|
||||
char e = unescapechar(&str[1], &after_escape);
|
||||
unsigned char e = unescapechar(&str[1], &after_escape);
|
||||
if (e != str[1]) {
|
||||
str = after_escape - 1;
|
||||
continue;
|
||||
|
@ -9,13 +9,13 @@
|
||||
#include "types.h"
|
||||
#include "file_loader.h"
|
||||
|
||||
__attribute__((nonnull(2)))
|
||||
__attribute__((nonnull(1,2)))
|
||||
vm_op_t *bpeg_simplepattern(file_t *f, const char *str);
|
||||
__attribute__((nonnull(2)))
|
||||
__attribute__((nonnull(1,2)))
|
||||
vm_op_t *bpeg_stringpattern(file_t *f, const char *str);
|
||||
__attribute__((nonnull(1,2)))
|
||||
vm_op_t *bpeg_replacement(vm_op_t *pat, const char *replacement);
|
||||
__attribute__((nonnull(2)))
|
||||
__attribute__((nonnull(1,2)))
|
||||
vm_op_t *bpeg_pattern(file_t *f, const char *str);
|
||||
|
||||
#endif
|
||||
|
@ -5,6 +5,7 @@
|
||||
#include <ctype.h>
|
||||
#include <fcntl.h>
|
||||
#include <limits.h>
|
||||
#include <stdarg.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/mman.h>
|
||||
@ -13,6 +14,23 @@
|
||||
|
||||
#include "file_loader.h"
|
||||
|
||||
static void populate_lines(file_t *f)
|
||||
{
|
||||
// Calculate line numbers:
|
||||
size_t linecap = 10;
|
||||
f->lines = calloc(sizeof(const char*), linecap);
|
||||
f->nlines = 0;
|
||||
char *p = f->contents;
|
||||
for (size_t n = 0; p && p < f->end; ++n) {
|
||||
++f->nlines;
|
||||
if (n >= linecap)
|
||||
f->lines = realloc(f->lines, sizeof(const char*)*(linecap *= 2));
|
||||
f->lines[n] = p;
|
||||
p = strchr(p, '\n');
|
||||
if (p) ++p;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Read an entire file into memory.
|
||||
*/
|
||||
@ -23,7 +41,7 @@ file_t *load_file(const char *filename)
|
||||
if (fd < 0) return NULL;
|
||||
file_t *f = calloc(sizeof(file_t), 1);
|
||||
f->filename = strdup(filename);
|
||||
// TODO: use mmap when possible
|
||||
|
||||
struct stat sb;
|
||||
if (fstat(fd, &sb) == -1)
|
||||
goto skip_mmap;
|
||||
@ -51,21 +69,22 @@ file_t *load_file(const char *filename)
|
||||
|
||||
finished_loading:
|
||||
f->end = &f->contents[f->length];
|
||||
|
||||
// Calculate line numbers:
|
||||
size_t linecap = 10;
|
||||
f->lines = calloc(sizeof(const char*), linecap);
|
||||
f->nlines = 0;
|
||||
char *p = f->contents;
|
||||
for (size_t n = 0; p && *p; ++n) {
|
||||
++f->nlines;
|
||||
if (n >= linecap)
|
||||
f->lines = realloc(f->lines, sizeof(const char*)*(linecap *= 2));
|
||||
f->lines[n] = p;
|
||||
p = strchr(p, '\n');
|
||||
if (p) ++p;
|
||||
}
|
||||
populate_lines(f);
|
||||
return f;
|
||||
}
|
||||
|
||||
/*
|
||||
* Create a virtual file from a string.
|
||||
*/
|
||||
file_t *spoof_file(const char *filename, char *text)
|
||||
{
|
||||
if (filename == NULL) filename = "";
|
||||
file_t *f = calloc(sizeof(file_t), 1);
|
||||
f->filename = strdup(filename);
|
||||
f->contents = text;
|
||||
f->length = strlen(text);
|
||||
f->end = &f->contents[f->length];
|
||||
populate_lines(f);
|
||||
return f;
|
||||
}
|
||||
|
||||
@ -113,12 +132,19 @@ const char *get_line(file_t *f, size_t line_number)
|
||||
return f->lines[line_number - 1];
|
||||
}
|
||||
|
||||
void fprint_line(FILE *dest, file_t *f, const char *start, const char *end, const char *msg)
|
||||
void fprint_line(FILE *dest, file_t *f, const char *start, const char *end, const char *fmt, ...)
|
||||
{
|
||||
size_t linenum = get_line_number(f, start);
|
||||
const char *line = get_line(f, linenum);
|
||||
size_t charnum = get_char_number(f, start);
|
||||
fprintf(dest, "\033[1m%s:%ld:\033[0m %s\n", f->filename, linenum, msg);
|
||||
fprintf(dest, "\033[1m%s:%ld:\033[0m ", f->filename, linenum);
|
||||
|
||||
va_list args;
|
||||
va_start(args, fmt);
|
||||
vfprintf(dest, fmt, args);
|
||||
va_end(args);
|
||||
fputc('\n', dest);
|
||||
|
||||
const char *eol = linenum == f->nlines ? strchr(line, '\0') : strchr(line, '\n');
|
||||
if (end == NULL || end > eol) end = eol;
|
||||
fprintf(dest, "\033[2m% 5ld |\033[0m %.*s\033[41;30m%.*s\033[0m%.*s\n",
|
||||
|
@ -14,6 +14,7 @@ typedef struct {
|
||||
} file_t;
|
||||
|
||||
file_t *load_file(const char *filename);
|
||||
file_t *spoof_file(const char *filename, char *text);
|
||||
__attribute__((nonnull))
|
||||
void destroy_file(file_t **f);
|
||||
__attribute__((pure, nonnull))
|
||||
@ -22,6 +23,7 @@ __attribute__((pure, nonnull))
|
||||
size_t get_char_number(file_t *f, const char *p);
|
||||
__attribute__((pure, nonnull))
|
||||
const char *get_line(file_t *f, size_t line_number);
|
||||
void fprint_line(FILE *dest, file_t *f, const char *start, const char *end, const char *msg);
|
||||
__attribute__((format (printf, 5, 6)))
|
||||
void fprint_line(FILE *dest, file_t *f, const char *start, const char *end, const char *fmt, ...);
|
||||
|
||||
#endif
|
||||
|
2
types.h
2
types.h
@ -48,7 +48,7 @@ typedef struct vm_op_s {
|
||||
union {
|
||||
const char *s;
|
||||
struct {
|
||||
char low, high;
|
||||
unsigned char low, high;
|
||||
} range;
|
||||
struct {
|
||||
ssize_t min, max;
|
||||
|
6
utils.c
6
utils.c
@ -74,17 +74,17 @@ int matchchar(const char **str, char c)
|
||||
* character that was escaped.
|
||||
* Set *end = the first character past the end of the escape sequence.
|
||||
*/
|
||||
char unescapechar(const char *escaped, const char **end)
|
||||
unsigned char unescapechar(const char *escaped, const char **end)
|
||||
{
|
||||
size_t len = 1;
|
||||
char ret = *escaped;
|
||||
unsigned char ret = *escaped;
|
||||
switch (*escaped) {
|
||||
case 'a': ret = '\a'; break; case 'b': ret = '\b'; break;
|
||||
case 'n': ret = '\n'; break; case 'r': ret = '\r'; break;
|
||||
case 't': ret = '\t'; break; case 'v': ret = '\v'; break;
|
||||
case 'e': ret = '\033'; break;
|
||||
case 'x': { // Hex
|
||||
static const char hextable[255] = {
|
||||
static const unsigned char hextable[255] = {
|
||||
['0']=0x10, ['1']=0x1, ['2']=0x2, ['3']=0x3, ['4']=0x4,
|
||||
['5']=0x5, ['6']=0x6, ['7']=0x7, ['8']=0x8, ['9']=0x9,
|
||||
['a']=0xa, ['b']=0xb, ['c']=0xc, ['d']=0xd, ['e']=0xe, ['f']=0xf,
|
||||
|
2
utils.h
2
utils.h
@ -18,7 +18,7 @@
|
||||
#define debug(...) do { if (verbose) fprintf(stderr, __VA_ARGS__); } while(0)
|
||||
|
||||
__attribute__((nonnull))
|
||||
char unescapechar(const char *escaped, const char **end);
|
||||
unsigned char unescapechar(const char *escaped, const char **end);
|
||||
__attribute__((pure, nonnull, returns_nonnull))
|
||||
const char *after_name(const char *str);
|
||||
__attribute__((pure, nonnull, returns_nonnull))
|
||||
|
2
vm.c
2
vm.c
@ -119,7 +119,7 @@ static match_t *_match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, un
|
||||
return m;
|
||||
}
|
||||
case VM_RANGE: {
|
||||
if (*str < op->args.range.low || *str > op->args.range.high)
|
||||
if ((unsigned char)*str < op->args.range.low || (unsigned char)*str > op->args.range.high)
|
||||
return NULL;
|
||||
match_t *m = calloc(sizeof(match_t), 1);
|
||||
m->op = op;
|
||||
|
Loading…
Reference in New Issue
Block a user