aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBruce Hill <bruce@bruce-hill.com>2020-09-16 19:35:43 -0700
committerBruce Hill <bruce@bruce-hill.com>2020-09-16 19:41:28 -0700
commit6c237850e90dce317ede7b0d4e53125df15ab62b (patch)
treec783ab61954de0b3120727245327843a82233542
parent3483cd75cb0a67d50bdcf9d03a15dc5af67a1986 (diff)
WIP
-rw-r--r--Makefile2
-rw-r--r--bpeg.c74
-rw-r--r--compiler.c54
-rw-r--r--compiler.h7
-rw-r--r--file_loader.c102
-rw-r--r--file_loader.h22
-rw-r--r--grammar.c11
-rw-r--r--grammar.h5
-rw-r--r--types.h3
-rw-r--r--utils.c23
-rw-r--r--utils.h2
-rw-r--r--vm.c50
-rw-r--r--vm.h2
13 files changed, 225 insertions, 132 deletions
diff --git a/Makefile b/Makefile
index 2eb7f56..e914372 100644
--- a/Makefile
+++ b/Makefile
@@ -6,7 +6,7 @@ CWARN=-Wall -Wpedantic -Wextra -Wno-unknown-pragmas -Wno-missing-field-initializ
G ?=
O ?= -O3
-CFILES=compiler.c grammar.c utils.c vm.c
+CFILES=compiler.c grammar.c utils.c vm.c file_loader.c
OBJFILES=$(CFILES:.c=.o)
all: $(NAME)
diff --git a/bpeg.c b/bpeg.c
index 7e89dba..b8b4c0d 100644
--- a/bpeg.c
+++ b/bpeg.c
@@ -12,6 +12,7 @@
#include <unistd.h>
#include "compiler.h"
+#include "file_loader.h"
#include "grammar.h"
#include "utils.h"
#include "vm.h"
@@ -50,25 +51,18 @@ static char *getflag(const char *flag, char *argv[], int *i)
static int run_match(grammar_t *g, const char *filename, vm_op_t *pattern, unsigned int flags)
{
- char *input;
- if (filename == NULL || streq(filename, "-")) {
- input = readfile(STDIN_FILENO);
- } else {
- int fd = open(filename, O_RDONLY);
- check(fd >= 0, "Couldn't open file: %s", filename);
- input = readfile(fd);
- }
- match_t *m = match(g, input, pattern, flags);
+ file_t *f = load_file(filename);
+ match_t *m = match(g, f, f->contents, pattern, flags);
if (m != NULL && m->end > m->start + 1) {
if (filename != NULL) {
if (isatty(STDOUT_FILENO)) printf("\033[1;4;33m%s\033[0m\n", filename);
else printf("%s\n", filename);
}
print_match(m, isatty(STDOUT_FILENO) ? "\033[0m" : NULL, (flags & BPEG_VERBOSE) != 0);
- freefile(input);
+ destroy_file(&f);
return 0;
} else {
- freefile(input);
+ destroy_file(&f);
return 1;
}
}
@@ -85,12 +79,11 @@ int main(int argc, char *argv[])
grammar_t *g = new_grammar();
// Load builtins:
- int fd;
- if ((fd=open("/etc/xdg/bpeg/builtins.bpeg", O_RDONLY)) >= 0)
- load_grammar(g, readfile(fd)); // Keep in memory for debugging output
+ if (access("/etc/xdg/bpeg/builtins.bpeg", R_OK) != -1)
+ load_grammar(g, load_file("/etc/xdg/bpeg/builtins.bpeg")); // Keep in memory for debugging output
sprintf(path, "%s/.config/bpeg/builtins.bpeg", getenv("HOME"));
- if ((fd=open(path, O_RDONLY)) >= 0)
- load_grammar(g, readfile(fd)); // Keep in memory for debugging output
+ if (access(path, R_OK) != -1)
+ load_grammar(g, load_file(path)); // Keep in memory for debugging output
int i, npatterns = 0;
check(argc > 1, "%s", usage);
@@ -106,63 +99,58 @@ int main(int argc, char *argv[])
} else if (streq(argv[i], "--ignore-case") || streq(argv[i], "-i")) {
flags |= BPEG_IGNORECASE;
} else if (FLAG("--replace") || FLAG("-r")) {
- vm_op_t *p = bpeg_replacement(bpeg_pattern("pattern"), flag);
+ vm_op_t *p = bpeg_replacement(bpeg_pattern(NULL, "pattern"), flag);
check(p, "Replacement failed to compile");
- add_def(g, flag, "replacement", p);
+ add_def(g, NULL, flag, "replacement", p);
rule = "replace-all";
} else if (FLAG("--grammar") || FLAG("-g")) {
- int fd;
- if (streq(flag, "-")) {
- fd = STDIN_FILENO;
- } else {
- fd = open(flag, O_RDONLY);
- if (fd < 0) {
- sprintf(path, "%s/.config/bpeg/%s.bpeg", getenv("HOME"), flag);
- fd = open(path, O_RDONLY);
- }
- if (fd < 0) {
- sprintf(path, "/etc/xdg/bpeg/%s.bpeg", flag);
- fd = open(path, O_RDONLY);
- }
- check(fd >= 0, "Couldn't find grammar: %s", flag);
+ file_t *f = load_file(flag);
+ if (f == NULL) {
+ sprintf(path, "%s/.config/bpeg/%s.bpeg", getenv("HOME"), flag);
+ f = load_file(path);
+ }
+ if (f == NULL) {
+ sprintf(path, "/etc/xdg/bpeg/%s.bpeg", flag);
+ f = load_file(path);
}
- load_grammar(g, readfile(fd)); // Keep in memory for debug output
+ check(f != NULL, "Couldn't find grammar: %s", flag);
+ load_grammar(g, f); // Keep in memory for debug output
} else if (FLAG("--define") || FLAG("-d")) {
char *def = flag;
char *eq = strchr(def, '=');
check(eq, "Rule definitions must include an '='\n\n%s", usage);
*eq = '\0';
char *src = ++eq;
- vm_op_t *pat = bpeg_pattern(src);
+ vm_op_t *pat = bpeg_pattern(NULL, src);
check(pat, "Failed to compile pattern");
- add_def(g, src, def, pat);
+ add_def(g, NULL, src, def, pat);
} else if (FLAG("--define-string") || FLAG("-D")) {
char *def = flag;
char *eq = strchr(def, '=');
check(eq, "Rule definitions must include an '='\n\n%s", usage);
*eq = '\0';
char *src = ++eq;
- vm_op_t *pat = bpeg_stringpattern(src);
+ vm_op_t *pat = bpeg_stringpattern(NULL, src);
check(pat, "Failed to compile pattern");
- add_def(g, src, def, pat);
+ add_def(g, NULL, src, def, pat);
} else if (FLAG("--pattern") || FLAG("-p")) {
check(npatterns == 0, "Cannot define multiple patterns");
- vm_op_t *p = bpeg_pattern(flag);
+ vm_op_t *p = bpeg_pattern(NULL, flag);
check(p, "Pattern failed to compile: '%s'", flag);
- add_def(g, flag, "pattern", p);
+ add_def(g, NULL, flag, "pattern", p);
++npatterns;
} else if (FLAG("--pattern-string") || FLAG("-P")) {
- vm_op_t *p = bpeg_stringpattern(flag);
+ vm_op_t *p = bpeg_stringpattern(NULL, flag);
check(p, "Pattern failed to compile");
- add_def(g, flag, "pattern", p);
+ add_def(g, NULL, flag, "pattern", p);
++npatterns;
} else if (FLAG("--mode") || FLAG("-m")) {
rule = flag;
} else if (argv[i][0] != '-') {
if (npatterns > 0) break;
- vm_op_t *p = bpeg_stringpattern(argv[i]);
+ vm_op_t *p = bpeg_stringpattern(NULL, argv[i]);
check(p, "Pattern failed to compile");
- add_def(g, argv[i], "pattern", p);
+ add_def(g, NULL, argv[i], "pattern", p);
++npatterns;
} else {
printf("Unrecognized flag: %s\n\n%s\n", argv[i], usage);
diff --git a/compiler.c b/compiler.c
index b27717b..fe7ad3f 100644
--- a/compiler.c
+++ b/compiler.c
@@ -5,8 +5,8 @@
#include "compiler.h"
#include "utils.h"
-static vm_op_t *expand_chain(vm_op_t *first);
-static vm_op_t *expand_choices(vm_op_t *first);
+static vm_op_t *expand_chain(file_t *f, vm_op_t *first);
+static vm_op_t *expand_choices(file_t *f, vm_op_t *first);
static vm_op_t *chain_together(vm_op_t *first, vm_op_t *second);
static void set_range(vm_op_t *op, ssize_t min, ssize_t max, vm_op_t *pat, vm_op_t *sep);
@@ -31,11 +31,11 @@ static void set_range(vm_op_t *op, ssize_t min, ssize_t max, vm_op_t *pat, vm_op
* followed by any patterns (e.g. "`x `y"), otherwise return
* the original input.
*/
-static vm_op_t *expand_chain(vm_op_t *first)
+static vm_op_t *expand_chain(file_t *f, vm_op_t *first)
{
- vm_op_t *second = bpeg_simplepattern(first->end);
+ vm_op_t *second = bpeg_simplepattern(f, first->end);
if (second == NULL) return first;
- second = expand_chain(second);
+ second = expand_chain(f, second);
check(second->end > first->end, "No forward progress in chain!");
return chain_together(first, second);
}
@@ -45,14 +45,14 @@ static vm_op_t *expand_chain(vm_op_t *first)
* followed by any "/"-separated patterns (e.g. "`x/`y"), otherwise
* return the original input.
*/
-static vm_op_t *expand_choices(vm_op_t *first)
+static vm_op_t *expand_choices(file_t *f, vm_op_t *first)
{
- first = expand_chain(first);
+ first = expand_chain(f, first);
const char *str = first->end;
if (!matchchar(&str, '/')) return first;
- vm_op_t *second = bpeg_simplepattern(str);
+ vm_op_t *second = bpeg_simplepattern(f, str);
check(second, "Expected pattern after '/'");
- second = expand_choices(second);
+ second = expand_choices(f, second);
vm_op_t *choice = calloc(sizeof(vm_op_t), 1);
choice->op = VM_OTHERWISE;
choice->start = first->start;
@@ -84,7 +84,7 @@ static vm_op_t *chain_together(vm_op_t *first, vm_op_t *second)
/*
* Compile a string of BPEG code into virtual machine opcodes
*/
-vm_op_t *bpeg_simplepattern(const char *str)
+vm_op_t *bpeg_simplepattern(file_t *f, const char *str)
{
if (!*str) return NULL;
str = after_spaces(str);
@@ -100,7 +100,7 @@ vm_op_t *bpeg_simplepattern(const char *str)
if (matchchar(&str, '.')) { // ".."
if (matchchar(&str, '.')) // "..."
op->multiline = 1;
- vm_op_t *till = bpeg_simplepattern(str);
+ vm_op_t *till = bpeg_simplepattern(f, str);
op->op = VM_UPTO_AND;
op->len = -1;
op->args.pat = till;
@@ -177,7 +177,7 @@ vm_op_t *bpeg_simplepattern(const char *str)
}
// Not <pat>
case '!': {
- vm_op_t *p = bpeg_simplepattern(str);
+ vm_op_t *p = bpeg_simplepattern(f, str);
check(p, "Expected pattern after '!'\n");
str = p->end;
op->op = VM_NOT;
@@ -202,13 +202,13 @@ vm_op_t *bpeg_simplepattern(const char *str)
} else {
min = n1, max = n1;
}
- vm_op_t *pat = bpeg_simplepattern(str);
+ vm_op_t *pat = bpeg_simplepattern(f, str);
check(pat, "Expected pattern after repetition count");
str = pat->end;
str = after_spaces(str);
vm_op_t *sep = NULL;
if (matchchar(&str, '%')) {
- sep = bpeg_simplepattern(str);
+ sep = bpeg_simplepattern(f, str);
check(sep, "Expected pattern for separator after '%%'");
str = sep->end;
} else {
@@ -219,7 +219,7 @@ vm_op_t *bpeg_simplepattern(const char *str)
}
// Lookbehind
case '<': {
- vm_op_t *pat = bpeg_simplepattern(str);
+ vm_op_t *pat = bpeg_simplepattern(f, str);
check(pat, "Expected pattern after <");
str = pat->end;
check(pat->len != -1, "Lookbehind patterns must have a fixed length");
@@ -231,7 +231,7 @@ vm_op_t *bpeg_simplepattern(const char *str)
}
// Lookahead
case '>': {
- vm_op_t *pat = bpeg_simplepattern(str);
+ vm_op_t *pat = bpeg_simplepattern(f, str);
check(pat, "Expected pattern after >");
str = pat->end;
op->op = VM_BEFORE;
@@ -242,9 +242,9 @@ vm_op_t *bpeg_simplepattern(const char *str)
// Parentheses
case '(': {
free(op);
- op = bpeg_simplepattern(str);
+ op = bpeg_simplepattern(f, str);
check(op, "Expected pattern inside parentheses");
- op = expand_choices(op);
+ op = expand_choices(f, op);
str = op->end;
str = after_spaces(str);
check(matchchar(&str, ')'), "Expected closing ')' instead of \"%s\"", str);
@@ -261,7 +261,7 @@ vm_op_t *bpeg_simplepattern(const char *str)
str = closing;
check(matchchar(&str, ']'), "Expected closing ']'");
}
- vm_op_t *pat = bpeg_simplepattern(str);
+ vm_op_t *pat = bpeg_simplepattern(f, str);
check(pat, "Expected pattern after @");
str = pat->end;
op->args.capture.capture_pat = pat;
@@ -275,9 +275,9 @@ vm_op_t *bpeg_simplepattern(const char *str)
if (strncmp(str, "=>", 2) == 0) {
str += strlen("=>");
} else {
- pat = bpeg_simplepattern(str);
+ pat = bpeg_simplepattern(f, str);
check(pat, "Invalid pattern after '{'");
- pat = expand_choices(pat);
+ pat = expand_choices(f, pat);
str = pat->end;
str = after_spaces(str);
check(matchchar(&str, '=') && matchchar(&str, '>'),
@@ -360,7 +360,7 @@ vm_op_t *bpeg_simplepattern(const char *str)
if (strncmp(after_spaces(str), "==", 2) == 0) {
str = after_spaces(str)+2;
vm_op_t *first = op;
- vm_op_t *second = bpeg_simplepattern(str);
+ vm_op_t *second = bpeg_simplepattern(f, str);
check(second, "Expected pattern after '=='");
check(first->len == -1 || second->len == -1 || first->len == second->len,
"Two patterns cannot possibly match the same (different lengths: %ld != %ld)",
@@ -382,7 +382,7 @@ vm_op_t *bpeg_simplepattern(const char *str)
/*
* Similar to bpeg_simplepattern, except that the pattern begins with an implicit, unclosable quote.
*/
-vm_op_t *bpeg_stringpattern(const char *str)
+vm_op_t *bpeg_stringpattern(file_t *f, const char *str)
{
vm_op_t *ret = NULL;
while (*str) {
@@ -395,7 +395,7 @@ vm_op_t *bpeg_stringpattern(const char *str)
for (; *str; str++) {
if (*str == '\\') {
check(str[1], "Expected more string contents after backslash");
- interp = bpeg_simplepattern(str + 1);
+ interp = bpeg_simplepattern(f, str + 1);
check(interp != NULL, "No valid BPEG pattern detected after backslash");
break;
}
@@ -448,10 +448,10 @@ vm_op_t *bpeg_replacement(vm_op_t *pat, const char *replacement)
return op;
}
-vm_op_t *bpeg_pattern(const char *str)
+vm_op_t *bpeg_pattern(file_t *f, const char *str)
{
- vm_op_t *op = bpeg_simplepattern(str);
- if (op != NULL) op = expand_choices(op);
+ vm_op_t *op = bpeg_simplepattern(f, str);
+ if (op != NULL) op = expand_choices(f, op);
return op;
}
diff --git a/compiler.h b/compiler.h
index 855b05d..005eacc 100644
--- a/compiler.h
+++ b/compiler.h
@@ -7,11 +7,12 @@
#include <stdlib.h>
#include "types.h"
+#include "file_loader.h"
-vm_op_t *bpeg_simplepattern(const char *str);
-vm_op_t *bpeg_stringpattern(const char *str);
+vm_op_t *bpeg_simplepattern(file_t *f, const char *str);
+vm_op_t *bpeg_stringpattern(file_t *f, const char *str);
vm_op_t *bpeg_replacement(vm_op_t *pat, const char *replacement);
-vm_op_t *bpeg_pattern(const char *str);
+vm_op_t *bpeg_pattern(file_t *f, const char *str);
#endif
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1
diff --git a/file_loader.c b/file_loader.c
new file mode 100644
index 0000000..b7c3817
--- /dev/null
+++ b/file_loader.c
@@ -0,0 +1,102 @@
+/*
+ * file_loader.c - Implementation of some file loading functionality.
+ */
+
+#include <ctype.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "file_loader.h"
+
+/*
+ * Read an entire file into memory.
+ */
+file_t *load_file(const char *filename)
+{
+ if (filename == NULL) filename = "-";
+ int fd = strcmp(filename, "-") != 0 ? open(filename, O_RDONLY) : STDIN_FILENO;
+ if (fd < 0) return NULL;
+ file_t *f = calloc(sizeof(file_t), 1);
+ f->filename = strdup(filename);
+ // TODO: use mmap when possible
+ f->mmapped = 0;
+ size_t capacity = 1000;
+ f->length = 0;
+ f->contents = calloc(sizeof(char), capacity+1);
+ ssize_t just_read;
+ while ((just_read=read(fd, &f->contents[f->length], capacity - f->length)) > 0) {
+ f->length += (size_t)just_read;
+ if (f->length >= capacity)
+ f->contents = realloc(f->contents, sizeof(char)*(capacity *= 2) + 1);
+ }
+ f->contents[f->length] = '\0';
+ close(fd);
+
+ // Calculate line numbers:
+ size_t linecap = 10;
+ f->lines = calloc(sizeof(const char*), linecap);
+ f->nlines = 1;
+ char *p = f->contents;
+ for (size_t n = 0; p && *p; ++n) {
+ if (n >= linecap)
+ f->lines = realloc(f->lines, sizeof(const char*)*(linecap *= 2));
+ f->lines[n] = p;
+ p = strchr(p, '\n');
+ if (p) ++p;
+ }
+
+ return f;
+}
+
+void destroy_file(file_t **f)
+{
+ if ((*f)->filename) {
+ free((char*)(*f)->filename);
+ (*f)->filename = NULL;
+ }
+ if ((*f)->lines) {
+ free((*f)->lines);
+ (*f)->lines = NULL;
+ }
+ if ((*f)->contents) {
+ free((*f)->contents);
+ (*f)->contents = NULL;
+ }
+ free(*f);
+ *f = NULL;
+}
+
+size_t get_line_number(file_t *f, const char *p)
+{
+ // TODO: binary search
+ for (size_t n = 1; n < f->nlines; n++) {
+ if (f->lines[n] > p)
+ return n;
+ }
+ return 0;
+}
+
+const char *get_line(file_t *f, size_t line_number)
+{
+ if (line_number == 0 || line_number > f->nlines) return NULL;
+ return f->lines[line_number - 1];
+}
+
+void fprint_line(FILE *dest, file_t *f, const char *start, const char *end, const char *msg)
+{
+ size_t linenum = get_line_number(f, start);
+ const char *line = get_line(f, linenum);
+ size_t charnum = 1 + (size_t)(start - line);
+ fprintf(dest, "\033[1m%s:%ld:%ld:\033[0m %s\n",
+ f->filename, linenum, charnum, msg);
+ const char *eol = linenum == f->nlines ? strchr(line, '\0') : strchr(line, '\n');
+ if (end == NULL || end > eol) end = eol;
+ fprintf(dest, "\033[2m% 5ld |\033[0m %.*s\033[31;4;1m%.*s\033[0m%.*s\n",
+ linenum,
+ (int)charnum - 1, line,
+ (int)(end - &line[charnum-1]), &line[charnum-1],
+ (int)(eol - end), end);
+}
diff --git a/file_loader.h b/file_loader.h
new file mode 100644
index 0000000..cb49373
--- /dev/null
+++ b/file_loader.h
@@ -0,0 +1,22 @@
+/*
+ * file_loader.h - Definitions of an API for loading files.
+ */
+#ifndef FILE_LOADER__H
+#define FILE_LOADER__H
+
+#include <stdio.h>
+
+typedef struct {
+ const char *filename;
+ char *contents, **lines;
+ size_t length, nlines;
+ unsigned int mmapped:1;
+} file_t;
+
+file_t *load_file(const char *filename);
+void destroy_file(file_t **f);
+size_t get_line_number(file_t *f, const char *p);
+const char *get_line(file_t *f, size_t line_number);
+void fprint_line(FILE *dest, file_t *f, const char *start, const char *end, const char *msg);
+
+#endif
diff --git a/grammar.c b/grammar.c
index 61c82c9..b4621ee 100644
--- a/grammar.c
+++ b/grammar.c
@@ -4,6 +4,7 @@
#include "grammar.h"
#include "compiler.h"
+#include "file_loader.h"
#include "utils.h"
grammar_t *new_grammar(void)
@@ -13,12 +14,13 @@ grammar_t *new_grammar(void)
return g;
}
-void add_def(grammar_t *g, const char *src, const char *name, vm_op_t *op)
+void add_def(grammar_t *g, file_t *f, const char *src, const char *name, vm_op_t *op)
{
if (g->defcount >= g->defcapacity) {
g->definitions = realloc(g->definitions, sizeof(&g->definitions[0])*(g->defcapacity += 32));
}
int i = g->defcount;
+ g->definitions[i].file = f;
g->definitions[i].source = src;
g->definitions[i].name = name;
g->definitions[i].op = op;
@@ -29,9 +31,10 @@ void add_def(grammar_t *g, const char *src, const char *name, vm_op_t *op)
* Load the given grammar (semicolon-separated definitions)
* and return the first rule defined.
*/
-vm_op_t *load_grammar(grammar_t *g, const char *src)
+vm_op_t *load_grammar(grammar_t *g, file_t *f)
{
vm_op_t *ret = NULL;
+ const char *src = f->contents;
do {
src = after_spaces(src);
if (!*src) break;
@@ -41,9 +44,9 @@ vm_op_t *load_grammar(grammar_t *g, const char *src)
name = strndup(name, (size_t)(name_end-name));
src = after_spaces(name_end);
check(matchchar(&src, '='), "Expected '=' in definition");
- vm_op_t *op = bpeg_pattern(src);
+ vm_op_t *op = bpeg_pattern(f, src);
check(op, "Couldn't load definition");
- add_def(g, src, name, op);
+ add_def(g, f, src, name, op);
if (ret == NULL) {
ret = op;
}
diff --git a/grammar.h b/grammar.h
index f75b111..0f57616 100644
--- a/grammar.h
+++ b/grammar.h
@@ -7,13 +7,14 @@
#include <stdlib.h>
#include <string.h>
+#include "file_loader.h"
#include "types.h"
grammar_t *new_grammar(void);
-void add_def(grammar_t *g, const char *src, const char *name, vm_op_t *op);
+void add_def(grammar_t *g, file_t *f, const char *src, const char *name, vm_op_t *op);
void push_backref(grammar_t *g, const char *name, match_t *capture);
void pop_backrefs(grammar_t *g, size_t count);
-vm_op_t *load_grammar(grammar_t *g, const char *source);
+vm_op_t *load_grammar(grammar_t *g, file_t *f);
vm_op_t *lookup(grammar_t *g, const char *name);
#endif
diff --git a/types.h b/types.h
index 3749156..de408cc 100644
--- a/types.h
+++ b/types.h
@@ -6,6 +6,8 @@
#include <sys/types.h>
+#include "file_loader.h"
+
enum BPEGFlag {
BPEG_VERBOSE = 1 << 0,
BPEG_IGNORECASE = 1 << 1,
@@ -85,6 +87,7 @@ typedef struct match_s {
typedef struct {
const char *name;
const char *source;
+ file_t *file;
vm_op_t *op;
} def_t;
diff --git a/utils.c b/utils.c
index 7844934..6b9af5c 100644
--- a/utils.c
+++ b/utils.c
@@ -153,28 +153,5 @@ size_t unescape_string(char *dest, const char *src, size_t bufsize)
#undef PUT
}
-/*
- * Read an entire file into memory. (Guaranteeing that ret[-1] == '\0')
- */
-char *readfile(int fd)
-{
- size_t capacity = 1000, len = 0;
- char *buf = calloc(sizeof(char), capacity+1);
- buf[len++] = '\0';
- ssize_t just_read;
- while ((just_read=read(fd, &buf[len], capacity-len)) > 0) {
- len += (size_t)just_read;
- if (len >= capacity)
- buf = realloc(buf, sizeof(char)*(capacity *= 2));
- }
- buf[len] = '\0';
- close(fd);
- return &buf[1];
-}
-
-void freefile(char *f)
-{
- free(&f[-1]);
-}
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1
diff --git a/utils.h b/utils.h
index 8588d24..28bfe05 100644
--- a/utils.h
+++ b/utils.h
@@ -17,8 +17,6 @@
#define check(cond, ...) do { if (!(cond)) { fprintf(stderr, __VA_ARGS__); fwrite("\n", 1, 1, stderr); _exit(1); } } while(0)
#define debug(...) do { if (verbose) fprintf(stderr, __VA_ARGS__); } while(0)
-char *readfile(int fd);
-void freefile(char *f);
char unescapechar(const char *escaped, const char **end);
const char *after_name(const char *str);
const char *after_spaces(const char *str);
diff --git a/vm.c b/vm.c
index 7b6a04b..637c4b8 100644
--- a/vm.c
+++ b/vm.c
@@ -77,7 +77,7 @@ typedef struct recursive_ref_s {
* a match struct, or NULL if no match is found.
* The returned value should be free()'d to avoid memory leaking.
*/
-static match_t *_match(grammar_t *g, const char *str, vm_op_t *op, unsigned int flags, recursive_ref_t *rec)
+static match_t *_match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, unsigned int flags, recursive_ref_t *rec)
{
switch (op->op) {
case VM_EMPTY: {
@@ -116,7 +116,7 @@ static match_t *_match(grammar_t *g, const char *str, vm_op_t *op, unsigned int
return m;
}
case VM_NOT: {
- match_t *m = _match(g, str, op->args.pat, flags, rec);
+ match_t *m = _match(g, f, str, op->args.pat, flags, rec);
if (m != NULL) {
destroy_match(&m);
return NULL;
@@ -134,7 +134,7 @@ static match_t *_match(grammar_t *g, const char *str, vm_op_t *op, unsigned int
if (op->args.pat) {
for (const char *prev = NULL; prev < str; ) {
prev = str;
- match_t *p = _match(g, str, op->args.pat, flags, rec);
+ match_t *p = _match(g, f, str, op->args.pat, flags, rec);
if (p) {
m->child = p;
m->end = p->end;
@@ -168,11 +168,11 @@ static match_t *_match(grammar_t *g, const char *str, vm_op_t *op, unsigned int
// Separator
match_t *sep = NULL;
if (op->args.repetitions.sep != NULL && reps > 0) {
- sep = _match(g, str, op->args.repetitions.sep, flags, rec);
+ sep = _match(g, f, str, op->args.repetitions.sep, flags, rec);
if (sep == NULL) break;
str = sep->end;
}
- match_t *p = _match(g, str, op->args.repetitions.repeat_pat, flags, rec);
+ match_t *p = _match(g, f, str, op->args.repetitions.repeat_pat, flags, rec);
if (p == NULL || (p->end == prev && reps > 0)) { // Prevent infinite loops
destroy_match(&sep);
destroy_match(&p);
@@ -204,11 +204,8 @@ static match_t *_match(grammar_t *g, const char *str, vm_op_t *op, unsigned int
case VM_AFTER: {
ssize_t backtrack = op->args.pat->len;
check(backtrack != -1, "'<' is only allowed for fixed-length operations");
- // Check for necessary space:
- for (int i = 0; i < backtrack; i++) {
- if (str[-i] == '\0') return NULL;
- }
- match_t *before = _match(g, str - backtrack, op->args.pat, flags, rec);
+ if (str - backtrack < f->contents) return NULL;
+ match_t *before = _match(g, f, str - backtrack, op->args.pat, flags, rec);
if (before == NULL) return NULL;
match_t *m = calloc(sizeof(match_t), 1);
m->start = str;
@@ -218,7 +215,7 @@ static match_t *_match(grammar_t *g, const char *str, vm_op_t *op, unsigned int
return m;
}
case VM_BEFORE: {
- match_t *after = _match(g, str, op->args.pat, flags, rec);
+ match_t *after = _match(g, f, str, op->args.pat, flags, rec);
if (after == NULL) return NULL;
match_t *m = calloc(sizeof(match_t), 1);
m->start = str;
@@ -228,7 +225,7 @@ static match_t *_match(grammar_t *g, const char *str, vm_op_t *op, unsigned int
return m;
}
case VM_CAPTURE: {
- match_t *p = _match(g, str, op->args.pat, flags, rec);
+ match_t *p = _match(g, f, str, op->args.pat, flags, rec);
if (p == NULL) return NULL;
match_t *m = calloc(sizeof(match_t), 1);
m->start = str;
@@ -241,16 +238,16 @@ static match_t *_match(grammar_t *g, const char *str, vm_op_t *op, unsigned int
return m;
}
case VM_OTHERWISE: {
- match_t *m = _match(g, str, op->args.multiple.first, flags, rec);
- if (m == NULL) m = _match(g, str, op->args.multiple.second, flags, rec);
+ match_t *m = _match(g, f, str, op->args.multiple.first, flags, rec);
+ if (m == NULL) m = _match(g, f, str, op->args.multiple.second, flags, rec);
return m;
}
case VM_CHAIN: {
- match_t *m1 = _match(g, str, op->args.multiple.first, flags, rec);
+ match_t *m1 = _match(g, f, str, op->args.multiple.first, flags, rec);
if (m1 == NULL) return NULL;
size_t nbackrefs = push_backrefs(g, m1);
- match_t *m2 = _match(g, m1->end, op->args.multiple.second, flags, rec);
+ match_t *m2 = _match(g, f, m1->end, op->args.multiple.second, flags, rec);
pop_backrefs(g, nbackrefs);
if (m2 == NULL) {
destroy_match(&m1);
@@ -265,11 +262,11 @@ static match_t *_match(grammar_t *g, const char *str, vm_op_t *op, unsigned int
return m;
}
case VM_EQUAL: {
- match_t *m1 = _match(g, str, op->args.multiple.first, flags, rec);
+ match_t *m1 = _match(g, f, str, op->args.multiple.first, flags, rec);
if (m1 == NULL) return NULL;
// <p1>==<p2> matches iff both have the same start and end point:
- match_t *m2 = _match(g, str, op->args.multiple.second, flags, rec);
+ match_t *m2 = _match(g, f, str, op->args.multiple.second, flags, rec);
if (m2 == NULL || m2->end != m1->end) {
destroy_match(&m1);
destroy_match(&m2);
@@ -288,7 +285,7 @@ static match_t *_match(grammar_t *g, const char *str, vm_op_t *op, unsigned int
m->start = str;
m->op = op;
if (op->args.replace.replace_pat) {
- match_t *p = _match(g, str, op->args.replace.replace_pat, flags, rec);
+ match_t *p = _match(g, f, str, op->args.replace.replace_pat, flags, rec);
if (p == NULL) return NULL;
m->child = p;
m->end = p->end;
@@ -320,7 +317,7 @@ static match_t *_match(grammar_t *g, const char *str, vm_op_t *op, unsigned int
};
match_t *best = NULL;
left_recursive:;
- match_t *p = _match(g, str, r, flags, &wrap);
+ match_t *p = _match(g, f, str, r, flags, &wrap);
if (p == NULL) return best;
if (wrap.hit && (best == NULL || p->end > best->end)) {
best = p;
@@ -343,7 +340,10 @@ static match_t *_match(grammar_t *g, const char *str, vm_op_t *op, unsigned int
return match_backref(str, op, (match_t*)op->args.backref, flags);
}
case VM_NODENT: {
- if (str[-1] == '\0') { // First line
+ size_t linenum = get_line_number(f, str);
+ if (linenum == 1) { // First line
+ if (str > f->contents)
+ return NULL;
match_t *m = calloc(sizeof(match_t), 1);
m->start = str;
m->end = str;
@@ -352,9 +352,7 @@ static match_t *_match(grammar_t *g, const char *str, vm_op_t *op, unsigned int
} else if (str[-1] != '\n') {
return NULL; // Not at beginning of line
}
- const char *p = &str[-1];
- while (*p == '\n') --p; // Skip blank lines
- while (p[-1] && p[-1] != '\n') --p; // Backtrack to start of last (nonblank) line
+ const char *p = get_line(f, linenum - 1);
// Count indentation:
char denter = *p;
int dents = 0;
@@ -686,9 +684,9 @@ static match_t *match_backref(const char *str, vm_op_t *op, match_t *cap, unsign
return ret;
}
-match_t *match(grammar_t *g, const char *str, vm_op_t *op, unsigned int flags)
+match_t *match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, unsigned int flags)
{
- return _match(g, str, op, flags, NULL);
+ return _match(g, f, str, op, flags, NULL);
}
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1
diff --git a/vm.h b/vm.h
index ac0269f..feb8eeb 100644
--- a/vm.h
+++ b/vm.h
@@ -12,7 +12,7 @@
#include "types.h"
const char *opcode_name(enum VMOpcode o);
-match_t *match(grammar_t *g, const char *str, vm_op_t *op, unsigned int flags);
+match_t *match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, unsigned int flags);
void destroy_match(match_t **m);
void print_pattern(vm_op_t *op);
void print_match(match_t *m, const char *color, int verbose);