This commit is contained in:
Bruce Hill 2020-09-16 19:35:43 -07:00
parent 3483cd75cb
commit 6c237850e9
13 changed files with 225 additions and 132 deletions

View File

@ -6,7 +6,7 @@ CWARN=-Wall -Wpedantic -Wextra -Wno-unknown-pragmas -Wno-missing-field-initializ
G ?=
O ?= -O3
CFILES=compiler.c grammar.c utils.c vm.c
CFILES=compiler.c grammar.c utils.c vm.c file_loader.c
OBJFILES=$(CFILES:.c=.o)
all: $(NAME)

74
bpeg.c
View File

@ -12,6 +12,7 @@
#include <unistd.h>
#include "compiler.h"
#include "file_loader.h"
#include "grammar.h"
#include "utils.h"
#include "vm.h"
@ -50,25 +51,18 @@ static char *getflag(const char *flag, char *argv[], int *i)
static int run_match(grammar_t *g, const char *filename, vm_op_t *pattern, unsigned int flags)
{
char *input;
if (filename == NULL || streq(filename, "-")) {
input = readfile(STDIN_FILENO);
} else {
int fd = open(filename, O_RDONLY);
check(fd >= 0, "Couldn't open file: %s", filename);
input = readfile(fd);
}
match_t *m = match(g, input, pattern, flags);
file_t *f = load_file(filename);
match_t *m = match(g, f, f->contents, pattern, flags);
if (m != NULL && m->end > m->start + 1) {
if (filename != NULL) {
if (isatty(STDOUT_FILENO)) printf("\033[1;4;33m%s\033[0m\n", filename);
else printf("%s\n", filename);
}
print_match(m, isatty(STDOUT_FILENO) ? "\033[0m" : NULL, (flags & BPEG_VERBOSE) != 0);
freefile(input);
destroy_file(&f);
return 0;
} else {
freefile(input);
destroy_file(&f);
return 1;
}
}
@ -85,12 +79,11 @@ int main(int argc, char *argv[])
grammar_t *g = new_grammar();
// Load builtins:
int fd;
if ((fd=open("/etc/xdg/bpeg/builtins.bpeg", O_RDONLY)) >= 0)
load_grammar(g, readfile(fd)); // Keep in memory for debugging output
if (access("/etc/xdg/bpeg/builtins.bpeg", R_OK) != -1)
load_grammar(g, load_file("/etc/xdg/bpeg/builtins.bpeg")); // Keep in memory for debugging output
sprintf(path, "%s/.config/bpeg/builtins.bpeg", getenv("HOME"));
if ((fd=open(path, O_RDONLY)) >= 0)
load_grammar(g, readfile(fd)); // Keep in memory for debugging output
if (access(path, R_OK) != -1)
load_grammar(g, load_file(path)); // Keep in memory for debugging output
int i, npatterns = 0;
check(argc > 1, "%s", usage);
@ -106,63 +99,58 @@ int main(int argc, char *argv[])
} else if (streq(argv[i], "--ignore-case") || streq(argv[i], "-i")) {
flags |= BPEG_IGNORECASE;
} else if (FLAG("--replace") || FLAG("-r")) {
vm_op_t *p = bpeg_replacement(bpeg_pattern("pattern"), flag);
vm_op_t *p = bpeg_replacement(bpeg_pattern(NULL, "pattern"), flag);
check(p, "Replacement failed to compile");
add_def(g, flag, "replacement", p);
add_def(g, NULL, flag, "replacement", p);
rule = "replace-all";
} else if (FLAG("--grammar") || FLAG("-g")) {
int fd;
if (streq(flag, "-")) {
fd = STDIN_FILENO;
} else {
fd = open(flag, O_RDONLY);
if (fd < 0) {
sprintf(path, "%s/.config/bpeg/%s.bpeg", getenv("HOME"), flag);
fd = open(path, O_RDONLY);
}
if (fd < 0) {
sprintf(path, "/etc/xdg/bpeg/%s.bpeg", flag);
fd = open(path, O_RDONLY);
}
check(fd >= 0, "Couldn't find grammar: %s", flag);
file_t *f = load_file(flag);
if (f == NULL) {
sprintf(path, "%s/.config/bpeg/%s.bpeg", getenv("HOME"), flag);
f = load_file(path);
}
load_grammar(g, readfile(fd)); // Keep in memory for debug output
if (f == NULL) {
sprintf(path, "/etc/xdg/bpeg/%s.bpeg", flag);
f = load_file(path);
}
check(f != NULL, "Couldn't find grammar: %s", flag);
load_grammar(g, f); // Keep in memory for debug output
} else if (FLAG("--define") || FLAG("-d")) {
char *def = flag;
char *eq = strchr(def, '=');
check(eq, "Rule definitions must include an '='\n\n%s", usage);
*eq = '\0';
char *src = ++eq;
vm_op_t *pat = bpeg_pattern(src);
vm_op_t *pat = bpeg_pattern(NULL, src);
check(pat, "Failed to compile pattern");
add_def(g, src, def, pat);
add_def(g, NULL, src, def, pat);
} else if (FLAG("--define-string") || FLAG("-D")) {
char *def = flag;
char *eq = strchr(def, '=');
check(eq, "Rule definitions must include an '='\n\n%s", usage);
*eq = '\0';
char *src = ++eq;
vm_op_t *pat = bpeg_stringpattern(src);
vm_op_t *pat = bpeg_stringpattern(NULL, src);
check(pat, "Failed to compile pattern");
add_def(g, src, def, pat);
add_def(g, NULL, src, def, pat);
} else if (FLAG("--pattern") || FLAG("-p")) {
check(npatterns == 0, "Cannot define multiple patterns");
vm_op_t *p = bpeg_pattern(flag);
vm_op_t *p = bpeg_pattern(NULL, flag);
check(p, "Pattern failed to compile: '%s'", flag);
add_def(g, flag, "pattern", p);
add_def(g, NULL, flag, "pattern", p);
++npatterns;
} else if (FLAG("--pattern-string") || FLAG("-P")) {
vm_op_t *p = bpeg_stringpattern(flag);
vm_op_t *p = bpeg_stringpattern(NULL, flag);
check(p, "Pattern failed to compile");
add_def(g, flag, "pattern", p);
add_def(g, NULL, flag, "pattern", p);
++npatterns;
} else if (FLAG("--mode") || FLAG("-m")) {
rule = flag;
} else if (argv[i][0] != '-') {
if (npatterns > 0) break;
vm_op_t *p = bpeg_stringpattern(argv[i]);
vm_op_t *p = bpeg_stringpattern(NULL, argv[i]);
check(p, "Pattern failed to compile");
add_def(g, argv[i], "pattern", p);
add_def(g, NULL, argv[i], "pattern", p);
++npatterns;
} else {
printf("Unrecognized flag: %s\n\n%s\n", argv[i], usage);

View File

@ -5,8 +5,8 @@
#include "compiler.h"
#include "utils.h"
static vm_op_t *expand_chain(vm_op_t *first);
static vm_op_t *expand_choices(vm_op_t *first);
static vm_op_t *expand_chain(file_t *f, vm_op_t *first);
static vm_op_t *expand_choices(file_t *f, vm_op_t *first);
static vm_op_t *chain_together(vm_op_t *first, vm_op_t *second);
static void set_range(vm_op_t *op, ssize_t min, ssize_t max, vm_op_t *pat, vm_op_t *sep);
@ -31,11 +31,11 @@ static void set_range(vm_op_t *op, ssize_t min, ssize_t max, vm_op_t *pat, vm_op
* followed by any patterns (e.g. "`x `y"), otherwise return
* the original input.
*/
static vm_op_t *expand_chain(vm_op_t *first)
static vm_op_t *expand_chain(file_t *f, vm_op_t *first)
{
vm_op_t *second = bpeg_simplepattern(first->end);
vm_op_t *second = bpeg_simplepattern(f, first->end);
if (second == NULL) return first;
second = expand_chain(second);
second = expand_chain(f, second);
check(second->end > first->end, "No forward progress in chain!");
return chain_together(first, second);
}
@ -45,14 +45,14 @@ static vm_op_t *expand_chain(vm_op_t *first)
* followed by any "/"-separated patterns (e.g. "`x/`y"), otherwise
* return the original input.
*/
static vm_op_t *expand_choices(vm_op_t *first)
static vm_op_t *expand_choices(file_t *f, vm_op_t *first)
{
first = expand_chain(first);
first = expand_chain(f, first);
const char *str = first->end;
if (!matchchar(&str, '/')) return first;
vm_op_t *second = bpeg_simplepattern(str);
vm_op_t *second = bpeg_simplepattern(f, str);
check(second, "Expected pattern after '/'");
second = expand_choices(second);
second = expand_choices(f, second);
vm_op_t *choice = calloc(sizeof(vm_op_t), 1);
choice->op = VM_OTHERWISE;
choice->start = first->start;
@ -84,7 +84,7 @@ static vm_op_t *chain_together(vm_op_t *first, vm_op_t *second)
/*
* Compile a string of BPEG code into virtual machine opcodes
*/
vm_op_t *bpeg_simplepattern(const char *str)
vm_op_t *bpeg_simplepattern(file_t *f, const char *str)
{
if (!*str) return NULL;
str = after_spaces(str);
@ -100,7 +100,7 @@ vm_op_t *bpeg_simplepattern(const char *str)
if (matchchar(&str, '.')) { // ".."
if (matchchar(&str, '.')) // "..."
op->multiline = 1;
vm_op_t *till = bpeg_simplepattern(str);
vm_op_t *till = bpeg_simplepattern(f, str);
op->op = VM_UPTO_AND;
op->len = -1;
op->args.pat = till;
@ -177,7 +177,7 @@ vm_op_t *bpeg_simplepattern(const char *str)
}
// Not <pat>
case '!': {
vm_op_t *p = bpeg_simplepattern(str);
vm_op_t *p = bpeg_simplepattern(f, str);
check(p, "Expected pattern after '!'\n");
str = p->end;
op->op = VM_NOT;
@ -202,13 +202,13 @@ vm_op_t *bpeg_simplepattern(const char *str)
} else {
min = n1, max = n1;
}
vm_op_t *pat = bpeg_simplepattern(str);
vm_op_t *pat = bpeg_simplepattern(f, str);
check(pat, "Expected pattern after repetition count");
str = pat->end;
str = after_spaces(str);
vm_op_t *sep = NULL;
if (matchchar(&str, '%')) {
sep = bpeg_simplepattern(str);
sep = bpeg_simplepattern(f, str);
check(sep, "Expected pattern for separator after '%%'");
str = sep->end;
} else {
@ -219,7 +219,7 @@ vm_op_t *bpeg_simplepattern(const char *str)
}
// Lookbehind
case '<': {
vm_op_t *pat = bpeg_simplepattern(str);
vm_op_t *pat = bpeg_simplepattern(f, str);
check(pat, "Expected pattern after <");
str = pat->end;
check(pat->len != -1, "Lookbehind patterns must have a fixed length");
@ -231,7 +231,7 @@ vm_op_t *bpeg_simplepattern(const char *str)
}
// Lookahead
case '>': {
vm_op_t *pat = bpeg_simplepattern(str);
vm_op_t *pat = bpeg_simplepattern(f, str);
check(pat, "Expected pattern after >");
str = pat->end;
op->op = VM_BEFORE;
@ -242,9 +242,9 @@ vm_op_t *bpeg_simplepattern(const char *str)
// Parentheses
case '(': {
free(op);
op = bpeg_simplepattern(str);
op = bpeg_simplepattern(f, str);
check(op, "Expected pattern inside parentheses");
op = expand_choices(op);
op = expand_choices(f, op);
str = op->end;
str = after_spaces(str);
check(matchchar(&str, ')'), "Expected closing ')' instead of \"%s\"", str);
@ -261,7 +261,7 @@ vm_op_t *bpeg_simplepattern(const char *str)
str = closing;
check(matchchar(&str, ']'), "Expected closing ']'");
}
vm_op_t *pat = bpeg_simplepattern(str);
vm_op_t *pat = bpeg_simplepattern(f, str);
check(pat, "Expected pattern after @");
str = pat->end;
op->args.capture.capture_pat = pat;
@ -275,9 +275,9 @@ vm_op_t *bpeg_simplepattern(const char *str)
if (strncmp(str, "=>", 2) == 0) {
str += strlen("=>");
} else {
pat = bpeg_simplepattern(str);
pat = bpeg_simplepattern(f, str);
check(pat, "Invalid pattern after '{'");
pat = expand_choices(pat);
pat = expand_choices(f, pat);
str = pat->end;
str = after_spaces(str);
check(matchchar(&str, '=') && matchchar(&str, '>'),
@ -360,7 +360,7 @@ vm_op_t *bpeg_simplepattern(const char *str)
if (strncmp(after_spaces(str), "==", 2) == 0) {
str = after_spaces(str)+2;
vm_op_t *first = op;
vm_op_t *second = bpeg_simplepattern(str);
vm_op_t *second = bpeg_simplepattern(f, str);
check(second, "Expected pattern after '=='");
check(first->len == -1 || second->len == -1 || first->len == second->len,
"Two patterns cannot possibly match the same (different lengths: %ld != %ld)",
@ -382,7 +382,7 @@ vm_op_t *bpeg_simplepattern(const char *str)
/*
* Similar to bpeg_simplepattern, except that the pattern begins with an implicit, unclosable quote.
*/
vm_op_t *bpeg_stringpattern(const char *str)
vm_op_t *bpeg_stringpattern(file_t *f, const char *str)
{
vm_op_t *ret = NULL;
while (*str) {
@ -395,7 +395,7 @@ vm_op_t *bpeg_stringpattern(const char *str)
for (; *str; str++) {
if (*str == '\\') {
check(str[1], "Expected more string contents after backslash");
interp = bpeg_simplepattern(str + 1);
interp = bpeg_simplepattern(f, str + 1);
check(interp != NULL, "No valid BPEG pattern detected after backslash");
break;
}
@ -448,10 +448,10 @@ vm_op_t *bpeg_replacement(vm_op_t *pat, const char *replacement)
return op;
}
vm_op_t *bpeg_pattern(const char *str)
vm_op_t *bpeg_pattern(file_t *f, const char *str)
{
vm_op_t *op = bpeg_simplepattern(str);
if (op != NULL) op = expand_choices(op);
vm_op_t *op = bpeg_simplepattern(f, str);
if (op != NULL) op = expand_choices(f, op);
return op;
}

View File

@ -7,11 +7,12 @@
#include <stdlib.h>
#include "types.h"
#include "file_loader.h"
vm_op_t *bpeg_simplepattern(const char *str);
vm_op_t *bpeg_stringpattern(const char *str);
vm_op_t *bpeg_simplepattern(file_t *f, const char *str);
vm_op_t *bpeg_stringpattern(file_t *f, const char *str);
vm_op_t *bpeg_replacement(vm_op_t *pat, const char *replacement);
vm_op_t *bpeg_pattern(const char *str);
vm_op_t *bpeg_pattern(file_t *f, const char *str);
#endif
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1

102
file_loader.c Normal file
View File

@ -0,0 +1,102 @@
/*
* file_loader.c - Implementation of some file loading functionality.
*/
#include <ctype.h>
#include <fcntl.h>
#include <limits.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include "file_loader.h"
/*
* Read an entire file into memory.
*/
file_t *load_file(const char *filename)
{
if (filename == NULL) filename = "-";
int fd = strcmp(filename, "-") != 0 ? open(filename, O_RDONLY) : STDIN_FILENO;
if (fd < 0) return NULL;
file_t *f = calloc(sizeof(file_t), 1);
f->filename = strdup(filename);
// TODO: use mmap when possible
f->mmapped = 0;
size_t capacity = 1000;
f->length = 0;
f->contents = calloc(sizeof(char), capacity+1);
ssize_t just_read;
while ((just_read=read(fd, &f->contents[f->length], capacity - f->length)) > 0) {
f->length += (size_t)just_read;
if (f->length >= capacity)
f->contents = realloc(f->contents, sizeof(char)*(capacity *= 2) + 1);
}
f->contents[f->length] = '\0';
close(fd);
// Calculate line numbers:
size_t linecap = 10;
f->lines = calloc(sizeof(const char*), linecap);
f->nlines = 1;
char *p = f->contents;
for (size_t n = 0; p && *p; ++n) {
if (n >= linecap)
f->lines = realloc(f->lines, sizeof(const char*)*(linecap *= 2));
f->lines[n] = p;
p = strchr(p, '\n');
if (p) ++p;
}
return f;
}
void destroy_file(file_t **f)
{
if ((*f)->filename) {
free((char*)(*f)->filename);
(*f)->filename = NULL;
}
if ((*f)->lines) {
free((*f)->lines);
(*f)->lines = NULL;
}
if ((*f)->contents) {
free((*f)->contents);
(*f)->contents = NULL;
}
free(*f);
*f = NULL;
}
size_t get_line_number(file_t *f, const char *p)
{
// TODO: binary search
for (size_t n = 1; n < f->nlines; n++) {
if (f->lines[n] > p)
return n;
}
return 0;
}
const char *get_line(file_t *f, size_t line_number)
{
if (line_number == 0 || line_number > f->nlines) return NULL;
return f->lines[line_number - 1];
}
void fprint_line(FILE *dest, file_t *f, const char *start, const char *end, const char *msg)
{
size_t linenum = get_line_number(f, start);
const char *line = get_line(f, linenum);
size_t charnum = 1 + (size_t)(start - line);
fprintf(dest, "\033[1m%s:%ld:%ld:\033[0m %s\n",
f->filename, linenum, charnum, msg);
const char *eol = linenum == f->nlines ? strchr(line, '\0') : strchr(line, '\n');
if (end == NULL || end > eol) end = eol;
fprintf(dest, "\033[2m% 5ld |\033[0m %.*s\033[31;4;1m%.*s\033[0m%.*s\n",
linenum,
(int)charnum - 1, line,
(int)(end - &line[charnum-1]), &line[charnum-1],
(int)(eol - end), end);
}

22
file_loader.h Normal file
View File

@ -0,0 +1,22 @@
/*
* file_loader.h - Definitions of an API for loading files.
*/
#ifndef FILE_LOADER__H
#define FILE_LOADER__H
#include <stdio.h>
typedef struct {
const char *filename;
char *contents, **lines;
size_t length, nlines;
unsigned int mmapped:1;
} file_t;
file_t *load_file(const char *filename);
void destroy_file(file_t **f);
size_t get_line_number(file_t *f, const char *p);
const char *get_line(file_t *f, size_t line_number);
void fprint_line(FILE *dest, file_t *f, const char *start, const char *end, const char *msg);
#endif

View File

@ -4,6 +4,7 @@
#include "grammar.h"
#include "compiler.h"
#include "file_loader.h"
#include "utils.h"
grammar_t *new_grammar(void)
@ -13,12 +14,13 @@ grammar_t *new_grammar(void)
return g;
}
void add_def(grammar_t *g, const char *src, const char *name, vm_op_t *op)
void add_def(grammar_t *g, file_t *f, const char *src, const char *name, vm_op_t *op)
{
if (g->defcount >= g->defcapacity) {
g->definitions = realloc(g->definitions, sizeof(&g->definitions[0])*(g->defcapacity += 32));
}
int i = g->defcount;
g->definitions[i].file = f;
g->definitions[i].source = src;
g->definitions[i].name = name;
g->definitions[i].op = op;
@ -29,9 +31,10 @@ void add_def(grammar_t *g, const char *src, const char *name, vm_op_t *op)
* Load the given grammar (semicolon-separated definitions)
* and return the first rule defined.
*/
vm_op_t *load_grammar(grammar_t *g, const char *src)
vm_op_t *load_grammar(grammar_t *g, file_t *f)
{
vm_op_t *ret = NULL;
const char *src = f->contents;
do {
src = after_spaces(src);
if (!*src) break;
@ -41,9 +44,9 @@ vm_op_t *load_grammar(grammar_t *g, const char *src)
name = strndup(name, (size_t)(name_end-name));
src = after_spaces(name_end);
check(matchchar(&src, '='), "Expected '=' in definition");
vm_op_t *op = bpeg_pattern(src);
vm_op_t *op = bpeg_pattern(f, src);
check(op, "Couldn't load definition");
add_def(g, src, name, op);
add_def(g, f, src, name, op);
if (ret == NULL) {
ret = op;
}

View File

@ -7,13 +7,14 @@
#include <stdlib.h>
#include <string.h>
#include "file_loader.h"
#include "types.h"
grammar_t *new_grammar(void);
void add_def(grammar_t *g, const char *src, const char *name, vm_op_t *op);
void add_def(grammar_t *g, file_t *f, const char *src, const char *name, vm_op_t *op);
void push_backref(grammar_t *g, const char *name, match_t *capture);
void pop_backrefs(grammar_t *g, size_t count);
vm_op_t *load_grammar(grammar_t *g, const char *source);
vm_op_t *load_grammar(grammar_t *g, file_t *f);
vm_op_t *lookup(grammar_t *g, const char *name);
#endif

View File

@ -6,6 +6,8 @@
#include <sys/types.h>
#include "file_loader.h"
enum BPEGFlag {
BPEG_VERBOSE = 1 << 0,
BPEG_IGNORECASE = 1 << 1,
@ -85,6 +87,7 @@ typedef struct match_s {
typedef struct {
const char *name;
const char *source;
file_t *file;
vm_op_t *op;
} def_t;

23
utils.c
View File

@ -153,28 +153,5 @@ size_t unescape_string(char *dest, const char *src, size_t bufsize)
#undef PUT
}
/*
* Read an entire file into memory. (Guaranteeing that ret[-1] == '\0')
*/
char *readfile(int fd)
{
size_t capacity = 1000, len = 0;
char *buf = calloc(sizeof(char), capacity+1);
buf[len++] = '\0';
ssize_t just_read;
while ((just_read=read(fd, &buf[len], capacity-len)) > 0) {
len += (size_t)just_read;
if (len >= capacity)
buf = realloc(buf, sizeof(char)*(capacity *= 2));
}
buf[len] = '\0';
close(fd);
return &buf[1];
}
void freefile(char *f)
{
free(&f[-1]);
}
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1

View File

@ -17,8 +17,6 @@
#define check(cond, ...) do { if (!(cond)) { fprintf(stderr, __VA_ARGS__); fwrite("\n", 1, 1, stderr); _exit(1); } } while(0)
#define debug(...) do { if (verbose) fprintf(stderr, __VA_ARGS__); } while(0)
char *readfile(int fd);
void freefile(char *f);
char unescapechar(const char *escaped, const char **end);
const char *after_name(const char *str);
const char *after_spaces(const char *str);

50
vm.c
View File

@ -77,7 +77,7 @@ typedef struct recursive_ref_s {
* a match struct, or NULL if no match is found.
* The returned value should be free()'d to avoid memory leaking.
*/
static match_t *_match(grammar_t *g, const char *str, vm_op_t *op, unsigned int flags, recursive_ref_t *rec)
static match_t *_match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, unsigned int flags, recursive_ref_t *rec)
{
switch (op->op) {
case VM_EMPTY: {
@ -116,7 +116,7 @@ static match_t *_match(grammar_t *g, const char *str, vm_op_t *op, unsigned int
return m;
}
case VM_NOT: {
match_t *m = _match(g, str, op->args.pat, flags, rec);
match_t *m = _match(g, f, str, op->args.pat, flags, rec);
if (m != NULL) {
destroy_match(&m);
return NULL;
@ -134,7 +134,7 @@ static match_t *_match(grammar_t *g, const char *str, vm_op_t *op, unsigned int
if (op->args.pat) {
for (const char *prev = NULL; prev < str; ) {
prev = str;
match_t *p = _match(g, str, op->args.pat, flags, rec);
match_t *p = _match(g, f, str, op->args.pat, flags, rec);
if (p) {
m->child = p;
m->end = p->end;
@ -168,11 +168,11 @@ static match_t *_match(grammar_t *g, const char *str, vm_op_t *op, unsigned int
// Separator
match_t *sep = NULL;
if (op->args.repetitions.sep != NULL && reps > 0) {
sep = _match(g, str, op->args.repetitions.sep, flags, rec);
sep = _match(g, f, str, op->args.repetitions.sep, flags, rec);
if (sep == NULL) break;
str = sep->end;
}
match_t *p = _match(g, str, op->args.repetitions.repeat_pat, flags, rec);
match_t *p = _match(g, f, str, op->args.repetitions.repeat_pat, flags, rec);
if (p == NULL || (p->end == prev && reps > 0)) { // Prevent infinite loops
destroy_match(&sep);
destroy_match(&p);
@ -204,11 +204,8 @@ static match_t *_match(grammar_t *g, const char *str, vm_op_t *op, unsigned int
case VM_AFTER: {
ssize_t backtrack = op->args.pat->len;
check(backtrack != -1, "'<' is only allowed for fixed-length operations");
// Check for necessary space:
for (int i = 0; i < backtrack; i++) {
if (str[-i] == '\0') return NULL;
}
match_t *before = _match(g, str - backtrack, op->args.pat, flags, rec);
if (str - backtrack < f->contents) return NULL;
match_t *before = _match(g, f, str - backtrack, op->args.pat, flags, rec);
if (before == NULL) return NULL;
match_t *m = calloc(sizeof(match_t), 1);
m->start = str;
@ -218,7 +215,7 @@ static match_t *_match(grammar_t *g, const char *str, vm_op_t *op, unsigned int
return m;
}
case VM_BEFORE: {
match_t *after = _match(g, str, op->args.pat, flags, rec);
match_t *after = _match(g, f, str, op->args.pat, flags, rec);
if (after == NULL) return NULL;
match_t *m = calloc(sizeof(match_t), 1);
m->start = str;
@ -228,7 +225,7 @@ static match_t *_match(grammar_t *g, const char *str, vm_op_t *op, unsigned int
return m;
}
case VM_CAPTURE: {
match_t *p = _match(g, str, op->args.pat, flags, rec);
match_t *p = _match(g, f, str, op->args.pat, flags, rec);
if (p == NULL) return NULL;
match_t *m = calloc(sizeof(match_t), 1);
m->start = str;
@ -241,16 +238,16 @@ static match_t *_match(grammar_t *g, const char *str, vm_op_t *op, unsigned int
return m;
}
case VM_OTHERWISE: {
match_t *m = _match(g, str, op->args.multiple.first, flags, rec);
if (m == NULL) m = _match(g, str, op->args.multiple.second, flags, rec);
match_t *m = _match(g, f, str, op->args.multiple.first, flags, rec);
if (m == NULL) m = _match(g, f, str, op->args.multiple.second, flags, rec);
return m;
}
case VM_CHAIN: {
match_t *m1 = _match(g, str, op->args.multiple.first, flags, rec);
match_t *m1 = _match(g, f, str, op->args.multiple.first, flags, rec);
if (m1 == NULL) return NULL;
size_t nbackrefs = push_backrefs(g, m1);
match_t *m2 = _match(g, m1->end, op->args.multiple.second, flags, rec);
match_t *m2 = _match(g, f, m1->end, op->args.multiple.second, flags, rec);
pop_backrefs(g, nbackrefs);
if (m2 == NULL) {
destroy_match(&m1);
@ -265,11 +262,11 @@ static match_t *_match(grammar_t *g, const char *str, vm_op_t *op, unsigned int
return m;
}
case VM_EQUAL: {
match_t *m1 = _match(g, str, op->args.multiple.first, flags, rec);
match_t *m1 = _match(g, f, str, op->args.multiple.first, flags, rec);
if (m1 == NULL) return NULL;
// <p1>==<p2> matches iff both have the same start and end point:
match_t *m2 = _match(g, str, op->args.multiple.second, flags, rec);
match_t *m2 = _match(g, f, str, op->args.multiple.second, flags, rec);
if (m2 == NULL || m2->end != m1->end) {
destroy_match(&m1);
destroy_match(&m2);
@ -288,7 +285,7 @@ static match_t *_match(grammar_t *g, const char *str, vm_op_t *op, unsigned int
m->start = str;
m->op = op;
if (op->args.replace.replace_pat) {
match_t *p = _match(g, str, op->args.replace.replace_pat, flags, rec);
match_t *p = _match(g, f, str, op->args.replace.replace_pat, flags, rec);
if (p == NULL) return NULL;
m->child = p;
m->end = p->end;
@ -320,7 +317,7 @@ static match_t *_match(grammar_t *g, const char *str, vm_op_t *op, unsigned int
};
match_t *best = NULL;
left_recursive:;
match_t *p = _match(g, str, r, flags, &wrap);
match_t *p = _match(g, f, str, r, flags, &wrap);
if (p == NULL) return best;
if (wrap.hit && (best == NULL || p->end > best->end)) {
best = p;
@ -343,7 +340,10 @@ static match_t *_match(grammar_t *g, const char *str, vm_op_t *op, unsigned int
return match_backref(str, op, (match_t*)op->args.backref, flags);
}
case VM_NODENT: {
if (str[-1] == '\0') { // First line
size_t linenum = get_line_number(f, str);
if (linenum == 1) { // First line
if (str > f->contents)
return NULL;
match_t *m = calloc(sizeof(match_t), 1);
m->start = str;
m->end = str;
@ -352,9 +352,7 @@ static match_t *_match(grammar_t *g, const char *str, vm_op_t *op, unsigned int
} else if (str[-1] != '\n') {
return NULL; // Not at beginning of line
}
const char *p = &str[-1];
while (*p == '\n') --p; // Skip blank lines
while (p[-1] && p[-1] != '\n') --p; // Backtrack to start of last (nonblank) line
const char *p = get_line(f, linenum - 1);
// Count indentation:
char denter = *p;
int dents = 0;
@ -686,9 +684,9 @@ static match_t *match_backref(const char *str, vm_op_t *op, match_t *cap, unsign
return ret;
}
match_t *match(grammar_t *g, const char *str, vm_op_t *op, unsigned int flags)
match_t *match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, unsigned int flags)
{
return _match(g, str, op, flags, NULL);
return _match(g, f, str, op, flags, NULL);
}
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1

2
vm.h
View File

@ -12,7 +12,7 @@
#include "types.h"
const char *opcode_name(enum VMOpcode o);
match_t *match(grammar_t *g, const char *str, vm_op_t *op, unsigned int flags);
match_t *match(grammar_t *g, file_t *f, const char *str, vm_op_t *op, unsigned int flags);
void destroy_match(match_t **m);
void print_pattern(vm_op_t *op);
void print_match(match_t *m, const char *color, int verbose);