aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--bpeg.c37
-rw-r--r--compiler.c57
-rw-r--r--compiler.h6
-rw-r--r--file_loader.c60
-rw-r--r--file_loader.h4
-rw-r--r--types.h2
-rw-r--r--utils.c6
-rw-r--r--utils.h2
-rw-r--r--vm.c2
9 files changed, 117 insertions, 59 deletions
diff --git a/bpeg.c b/bpeg.c
index 1c934ab..071f5d2 100644
--- a/bpeg.c
+++ b/bpeg.c
@@ -56,7 +56,7 @@ static int print_errors(file_t *f, match_t *m)
printf("\033[31;1m");
print_match(f, m);
printf("\033[0m\n");
- fprint_line(stdout, f, m->start, m->end, "");
+ fprint_line(stdout, f, m->start, m->end, " ");
return 1;
}
if (m->child) ret += print_errors(f, m->child);
@@ -113,9 +113,10 @@ int main(int argc, char *argv[])
} else if (streq(argv[i], "--ignore-case") || streq(argv[i], "-i")) {
flags |= BPEG_IGNORECASE;
} else if (FLAG("--replace") || FLAG("-r")) {
- vm_op_t *p = bpeg_replacement(bpeg_pattern(NULL, "pattern"), flag);
+ file_t *replace_file = spoof_file("<replace argument>", flag);
+ vm_op_t *p = bpeg_replacement(bpeg_pattern(replace_file, "pattern"), flag);
check(p, "Replacement failed to compile");
- add_def(g, NULL, flag, "replacement", p);
+ add_def(g, replace_file, flag, "replacement", p);
rule = "replace-all";
} else if (FLAG("--grammar") || FLAG("-g")) {
file_t *f = load_file(flag);
@@ -135,36 +136,41 @@ int main(int argc, char *argv[])
check(eq, "Rule definitions must include an ':'\n\n%s", usage);
*eq = '\0';
char *src = ++eq;
- vm_op_t *pat = bpeg_pattern(NULL, src);
+ file_t *def_file = spoof_file(def, flag);
+ vm_op_t *pat = bpeg_pattern(def_file, src);
check(pat, "Failed to compile pattern");
- add_def(g, NULL, src, def, pat);
+ add_def(g, def_file, src, def, pat);
} else if (FLAG("--define-string") || FLAG("-D")) {
char *def = flag;
char *eq = strchr(def, ':');
check(eq, "Rule definitions must include an ':'\n\n%s", usage);
*eq = '\0';
char *src = ++eq;
- vm_op_t *pat = bpeg_stringpattern(NULL, src);
+ file_t *def_file = spoof_file(def, flag);
+ vm_op_t *pat = bpeg_stringpattern(def_file, src);
check(pat, "Failed to compile pattern");
- add_def(g, NULL, src, def, pat);
+ add_def(g, def_file, src, def, pat);
} else if (FLAG("--pattern") || FLAG("-p")) {
check(npatterns == 0, "Cannot define multiple patterns");
- vm_op_t *p = bpeg_pattern(NULL, flag);
+ file_t *arg_file = spoof_file("<pattern argument>", flag);
+ vm_op_t *p = bpeg_pattern(arg_file, flag);
check(p, "Pattern failed to compile: '%s'", flag);
- add_def(g, NULL, flag, "pattern", p);
+ add_def(g, arg_file, flag, "pattern", p);
++npatterns;
} else if (FLAG("--pattern-string") || FLAG("-P")) {
- vm_op_t *p = bpeg_stringpattern(NULL, flag);
+ file_t *arg_file = spoof_file("<pattern argument>", flag);
+ vm_op_t *p = bpeg_stringpattern(arg_file, flag);
check(p, "Pattern failed to compile");
- add_def(g, NULL, flag, "pattern", p);
+ add_def(g, arg_file, flag, "pattern", p);
++npatterns;
} else if (FLAG("--mode") || FLAG("-m")) {
rule = flag;
} else if (argv[i][0] != '-') {
if (npatterns > 0) break;
- vm_op_t *p = bpeg_stringpattern(NULL, argv[i]);
+ file_t *arg_file = spoof_file("<pattern argument>", flag);
+ vm_op_t *p = bpeg_stringpattern(arg_file, argv[i]);
check(p, "Pattern failed to compile");
- add_def(g, NULL, argv[i], "pattern", p);
+ add_def(g, arg_file, argv[i], "pattern", p);
++npatterns;
} else {
printf("Unrecognized flag: %s\n\n%s\n", argv[i], usage);
@@ -173,9 +179,10 @@ int main(int argc, char *argv[])
}
if (isatty(STDOUT_FILENO)) {
- vm_op_t *p = bpeg_pattern(NULL, "''");
+ file_t *is_tty_file = spoof_file("<is-tty>", flag);
+ vm_op_t *p = bpeg_pattern(is_tty_file, "''");
check(p, "Failed to compile is-tty");
- add_def(g, NULL, "''", "is-tty", p);
+ add_def(g, is_tty_file, "''", "is-tty", p);
}
vm_op_t *pattern = lookup(g, rule);
diff --git a/compiler.c b/compiler.c
index b7a2458..b6f5e6f 100644
--- a/compiler.c
+++ b/compiler.c
@@ -5,6 +5,8 @@
#include "compiler.h"
#include "utils.h"
+#define file_err(f, ...) do { fprint_line(stderr, f, __VA_ARGS__); _exit(1); } while(0)
+
static vm_op_t *expand_chain(file_t *f, vm_op_t *first);
static vm_op_t *expand_choices(file_t *f, vm_op_t *first);
static vm_op_t *chain_together(vm_op_t *first, vm_op_t *second);
@@ -42,7 +44,9 @@ static vm_op_t *expand_chain(file_t *f, vm_op_t *first)
vm_op_t *second = bpeg_simplepattern(f, first->end);
if (second == NULL) return first;
second = expand_chain(f, second);
- check(second->end > first->end, "No forward progress in chain!");
+ if (second->end <= first->end)
+ file_err(f, second->end, second->end,
+ "This chain is not parsing properly");
return chain_together(first, second);
}
@@ -57,7 +61,8 @@ static vm_op_t *expand_choices(file_t *f, vm_op_t *first)
const char *str = first->end;
if (!matchchar(&str, '/')) return first;
vm_op_t *second = bpeg_simplepattern(f, str);
- check(second, "Expected pattern after '/'");
+ if (!second)
+ file_err(f, str, str, "There should be a pattern here after a '/'");
second = expand_choices(f, second);
vm_op_t *choice = calloc(sizeof(vm_op_t), 1);
choice->op = VM_OTHERWISE;
@@ -98,6 +103,7 @@ vm_op_t *bpeg_simplepattern(file_t *f, const char *str)
op->start = str;
op->len = -1;
char c = *str;
+ const char *origin = str;
++str;
switch (c) {
// Any char (dot) ($. is multiline anychar)
@@ -126,12 +132,15 @@ vm_op_t *bpeg_simplepattern(file_t *f, const char *str)
case '`': {
char literal[2] = {*str, '\0'};
++str;
- check(literal[0], "Expected character after '`'\n");
+ if (!literal[0] || literal[0] == '\n')
+ file_err(f, str, str, "There should be a character here after the '`'");
op->len = 1;
if (matchchar(&str, '-')) { // Range
char c2 = *str;
- check(c2, "Expected character after '-'");
- check(c2 >= literal[0], "Character range must be low-to-high");
+ if (!c2 || c2 == '\n')
+ file_err(f, str, str, "There should be a character here to complete the character range.");
+ if (c2 < literal[0])
+ file_err(f, origin, str+1, "Character ranges must be low-to-high, but this is high-to-low.");
op->op = VM_RANGE;
op->args.range.low = literal[0];
op->args.range.high = c2;
@@ -145,13 +154,19 @@ vm_op_t *bpeg_simplepattern(file_t *f, const char *str)
// Escapes
case '\\': {
check(*str && *str != '\n', "Expected escape after '\\'");
+ if (!*str || *str == '\n')
+ file_err(f, str, str, "There should be an escape sequence here after this backslash.");
op->len = 1;
- char e = unescapechar(str, &str);
+ unsigned char e = unescapechar(str, &str);
if (*str == '-') { // Escape range (e.g. \x00-\xFF)
++str;
- char e2 = unescapechar(str, &str);
- check(e2, "Expected character after '-'");
- check(e2 >= e, "Character range must be low-to-high");
+ const char *seqstart = str;
+ unsigned char e2 = unescapechar(str, &str);
+ if (str == seqstart)
+ file_err(f, seqstart, str+1, "This value isn't a valid escape sequence");
+ if (e2 < e)
+ file_err(f, origin, str, "Escape ranges should be low-to-high, but this is high-to-low.");
+ printf("%d - %d\n", (int)e, (int)e2);
op->op = VM_RANGE;
op->args.range.low = e;
op->args.range.high = e2;
@@ -228,9 +243,13 @@ vm_op_t *bpeg_simplepattern(file_t *f, const char *str)
// Lookbehind
case '<': {
vm_op_t *pat = bpeg_simplepattern(f, str);
- check(pat, "Expected pattern after <");
+ if (!pat)
+ file_err(f, str, str, "There should be a pattern after this '<'");
str = pat->end;
- check(pat->len != -1, "Lookbehind patterns must have a fixed length");
+ if (pat->len == -1)
+ file_err(f, origin, pat->end,
+ "Sorry, variable-length lookbehind patterns like this are not supported.\n"
+ "Please use a fixed-length lookbehind pattern instead.");
str = pat->end;
op->op = VM_AFTER;
op->len = 0;
@@ -240,7 +259,8 @@ vm_op_t *bpeg_simplepattern(file_t *f, const char *str)
// Lookahead
case '>': {
vm_op_t *pat = bpeg_simplepattern(f, str);
- check(pat, "Expected pattern after >");
+ if (!pat)
+ file_err(f, str, str, "There should be a pattern after this '>'");
str = pat->end;
op->op = VM_BEFORE;
op->len = 0;
@@ -251,11 +271,13 @@ vm_op_t *bpeg_simplepattern(file_t *f, const char *str)
case '(': {
free(op);
op = bpeg_simplepattern(f, str);
- check(op, "Expected pattern inside parentheses");
+ if (!op)
+ file_err(f, str, str, "Expected a pattern inside parentheses");
op = expand_choices(f, op);
str = op->end;
str = after_spaces(str);
- check(matchchar(&str, ')'), "Expected closing ')' instead of \"%s\"", str);
+ if (!matchchar(&str, ')'))
+ file_err(f, origin, str, "This parenthesis is not closed");
break;
}
// Square brackets
@@ -279,7 +301,8 @@ vm_op_t *bpeg_simplepattern(file_t *f, const char *str)
vm_op_t *sep = NULL;
if (matchchar(&str, '%')) {
sep = bpeg_simplepattern(f, str);
- check(sep, "Expected pattern for separator after '%%'");
+ if (!sep)
+ file_err(f, str, str, "Expected pattern for separator after '%%'");
str = sep->end;
}
set_range(op, min, -1, pat, sep);
@@ -387,7 +410,7 @@ vm_op_t *bpeg_simplepattern(file_t *f, const char *str)
// Postfix operators:
postfix:
- if (f ? str >= f->end : !*str) return op;
+ if (str >= f->end) return op;
str = after_spaces(str);
if ((str[0] == '=' || str[0] == '!') && str[1] == '=') { // Equality <pat1>==<pat2> and inequality <pat1>!=<pat2>
int equal = str[0] == '=';
@@ -431,7 +454,7 @@ vm_op_t *bpeg_stringpattern(file_t *f, const char *str)
if (*str == '\\') {
check(str[1], "Expected more string contents after backslash");
const char *after_escape;
- char e = unescapechar(&str[1], &after_escape);
+ unsigned char e = unescapechar(&str[1], &after_escape);
if (e != str[1]) {
str = after_escape - 1;
continue;
diff --git a/compiler.h b/compiler.h
index d5b4ff1..89693af 100644
--- a/compiler.h
+++ b/compiler.h
@@ -9,13 +9,13 @@
#include "types.h"
#include "file_loader.h"
-__attribute__((nonnull(2)))
+__attribute__((nonnull(1,2)))
vm_op_t *bpeg_simplepattern(file_t *f, const char *str);
-__attribute__((nonnull(2)))
+__attribute__((nonnull(1,2)))
vm_op_t *bpeg_stringpattern(file_t *f, const char *str);
__attribute__((nonnull(1,2)))
vm_op_t *bpeg_replacement(vm_op_t *pat, const char *replacement);
-__attribute__((nonnull(2)))
+__attribute__((nonnull(1,2)))
vm_op_t *bpeg_pattern(file_t *f, const char *str);
#endif
diff --git a/file_loader.c b/file_loader.c
index 82f594b..20ed253 100644
--- a/file_loader.c
+++ b/file_loader.c
@@ -5,6 +5,7 @@
#include <ctype.h>
#include <fcntl.h>
#include <limits.h>
+#include <stdarg.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
@@ -13,6 +14,23 @@
#include "file_loader.h"
+static void populate_lines(file_t *f)
+{
+ // Calculate line numbers:
+ size_t linecap = 10;
+ f->lines = calloc(sizeof(const char*), linecap);
+ f->nlines = 0;
+ char *p = f->contents;
+ for (size_t n = 0; p && p < f->end; ++n) {
+ ++f->nlines;
+ if (n >= linecap)
+ f->lines = realloc(f->lines, sizeof(const char*)*(linecap *= 2));
+ f->lines[n] = p;
+ p = strchr(p, '\n');
+ if (p) ++p;
+ }
+}
+
/*
* Read an entire file into memory.
*/
@@ -23,7 +41,7 @@ file_t *load_file(const char *filename)
if (fd < 0) return NULL;
file_t *f = calloc(sizeof(file_t), 1);
f->filename = strdup(filename);
- // TODO: use mmap when possible
+
struct stat sb;
if (fstat(fd, &sb) == -1)
goto skip_mmap;
@@ -51,21 +69,22 @@ file_t *load_file(const char *filename)
finished_loading:
f->end = &f->contents[f->length];
-
- // Calculate line numbers:
- size_t linecap = 10;
- f->lines = calloc(sizeof(const char*), linecap);
- f->nlines = 0;
- char *p = f->contents;
- for (size_t n = 0; p && *p; ++n) {
- ++f->nlines;
- if (n >= linecap)
- f->lines = realloc(f->lines, sizeof(const char*)*(linecap *= 2));
- f->lines[n] = p;
- p = strchr(p, '\n');
- if (p) ++p;
- }
+ populate_lines(f);
+ return f;
+}
+/*
+ * Create a virtual file from a string.
+ */
+file_t *spoof_file(const char *filename, char *text)
+{
+ if (filename == NULL) filename = "";
+ file_t *f = calloc(sizeof(file_t), 1);
+ f->filename = strdup(filename);
+ f->contents = text;
+ f->length = strlen(text);
+ f->end = &f->contents[f->length];
+ populate_lines(f);
return f;
}
@@ -113,12 +132,19 @@ const char *get_line(file_t *f, size_t line_number)
return f->lines[line_number - 1];
}
-void fprint_line(FILE *dest, file_t *f, const char *start, const char *end, const char *msg)
+void fprint_line(FILE *dest, file_t *f, const char *start, const char *end, const char *fmt, ...)
{
size_t linenum = get_line_number(f, start);
const char *line = get_line(f, linenum);
size_t charnum = get_char_number(f, start);
- fprintf(dest, "\033[1m%s:%ld:\033[0m %s\n", f->filename, linenum, msg);
+ fprintf(dest, "\033[1m%s:%ld:\033[0m ", f->filename, linenum);
+
+ va_list args;
+ va_start(args, fmt);
+ vfprintf(dest, fmt, args);
+ va_end(args);
+ fputc('\n', dest);
+
const char *eol = linenum == f->nlines ? strchr(line, '\0') : strchr(line, '\n');
if (end == NULL || end > eol) end = eol;
fprintf(dest, "\033[2m% 5ld |\033[0m %.*s\033[41;30m%.*s\033[0m%.*s\n",
diff --git a/file_loader.h b/file_loader.h
index 1634f15..cda8dc3 100644
--- a/file_loader.h
+++ b/file_loader.h
@@ -14,6 +14,7 @@ typedef struct {
} file_t;
file_t *load_file(const char *filename);
+file_t *spoof_file(const char *filename, char *text);
__attribute__((nonnull))
void destroy_file(file_t **f);
__attribute__((pure, nonnull))
@@ -22,6 +23,7 @@ __attribute__((pure, nonnull))
size_t get_char_number(file_t *f, const char *p);
__attribute__((pure, nonnull))
const char *get_line(file_t *f, size_t line_number);
-void fprint_line(FILE *dest, file_t *f, const char *start, const char *end, const char *msg);
+__attribute__((format (printf, 5, 6)))
+void fprint_line(FILE *dest, file_t *f, const char *start, const char *end, const char *fmt, ...);
#endif
diff --git a/types.h b/types.h
index 21ed0e7..da1ce40 100644
--- a/types.h
+++ b/types.h
@@ -48,7 +48,7 @@ typedef struct vm_op_s {
union {
const char *s;
struct {
- char low, high;
+ unsigned char low, high;
} range;
struct {
ssize_t min, max;
diff --git a/utils.c b/utils.c
index 012c1eb..0c0fa31 100644
--- a/utils.c
+++ b/utils.c
@@ -74,17 +74,17 @@ int matchchar(const char **str, char c)
* character that was escaped.
* Set *end = the first character past the end of the escape sequence.
*/
-char unescapechar(const char *escaped, const char **end)
+unsigned char unescapechar(const char *escaped, const char **end)
{
size_t len = 1;
- char ret = *escaped;
+ unsigned char ret = *escaped;
switch (*escaped) {
case 'a': ret = '\a'; break; case 'b': ret = '\b'; break;
case 'n': ret = '\n'; break; case 'r': ret = '\r'; break;
case 't': ret = '\t'; break; case 'v': ret = '\v'; break;
case 'e': ret = '\033'; break;
case 'x': { // Hex
- static const char hextable[255] = {
+ static const unsigned char hextable[255] = {
['0']=0x10, ['1']=0x1, ['2']=0x2, ['3']=0x3, ['4']=0x4,
['5']=0x5, ['6']=0x6, ['7']=0x7, ['8']=0x8, ['9']=0x9,
['a']=0xa, ['b']=0xb, ['c']=0xc, ['d']=0xd, ['e']=0xe, ['f']=0xf,
diff --git a/utils.h b/utils.h
index 84456a1..d9636ce 100644
--- a/utils.h
+++ b/utils.h
@@ -18,7 +18,7 @@
#define debug(...) do { if (verbose) fprintf(stderr, __VA_ARGS__); } while(0)
__attribute__((nonnull))
-char unescapechar(const char *escaped, const char **end);
+unsigned char unescapechar(const char *escaped, const char **end);
__attribute__((pure, nonnull, returns_nonnull))
const char *after_name(const char *str);
__attribute__((pure, nonnull, returns_nonnull))
diff --git a/vm.c b/vm.c
index 158d58f..ac0a52a 100644
--- a/vm.c
+++ b/vm.c
@@ -119,7 +119,7 @@ static match_t *_match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, un
return m;
}
case VM_RANGE: {
- if (*str < op->args.range.low || *str > op->args.range.high)
+ if ((unsigned char)*str < op->args.range.low || (unsigned char)*str > op->args.range.high)
return NULL;
match_t *m = calloc(sizeof(match_t), 1);
m->op = op;