Renamed VM_ to BP_

This commit is contained in:
Bruce Hill 2021-01-16 10:29:09 -08:00
parent a2f30332cc
commit 3f6e7c2beb
7 changed files with 74 additions and 74 deletions

2
bp.c
View File

@ -179,7 +179,7 @@ static void sig_handler(int sig) { (void)sig; cleanup(); }
static void confirm_replacements(file_t *f, match_t *m, confirm_t *confirm)
{
if (*confirm == CONFIRM_ALL) return;
if (m->pat->type == VM_REPLACE) {
if (m->pat->type == BP_REPLACE) {
if (*confirm == CONFIRM_NONE) {
m->skip_replacement = 1;
goto check_children;

View File

@ -70,7 +70,7 @@ def_t *lookup(def_t *defs, const char *name)
//
def_t *with_backref(def_t *defs, file_t *f, const char *name, match_t *m)
{
pat_t *backref = new_pat(f, m->start, VM_BACKREF);
pat_t *backref = new_pat(f, m->start, BP_BACKREF);
backref->end = m->end;
backref->len = -1; // TODO: maybe calculate this? (nontrivial because of replacements)
backref->args.backref = m;

2
json.c
View File

@ -17,7 +17,7 @@ static int _json_match(const char *text, match_t *m, int comma, unsigned int ver
static int _json_match(const char *text, match_t *m, int comma, unsigned int verbose)
{
if (!verbose) {
if (m->pat->type != VM_REF) {
if (m->pat->type != BP_REF) {
for (match_t *child = m->child; child; child = child->nextsibling) {
comma |= _json_match(text, child, comma, verbose);
}

50
match.c
View File

@ -68,7 +68,7 @@ static inline const char *next_char(file_t *f, const char *str)
//
static const char *match_backref(const char *str, match_t *cap, unsigned int ignorecase)
{
if (cap->pat->type == VM_REPLACE) {
if (cap->pat->type == BP_REPLACE) {
const char *text = cap->pat->args.replace.text;
const char *end = &text[cap->pat->args.replace.len];
for (const char *r = text; r < end; ) {
@ -147,7 +147,7 @@ match_t *next_match(def_t *defs, file_t *f, match_t *prev, pat_t *pat, unsigned
match_t *match(def_t *defs, file_t *f, const char *str, pat_t *pat, unsigned int ignorecase)
{
switch (pat->type) {
case VM_LEFTRECURSION: {
case BP_LEFTRECURSION: {
// Left recursion occurs when a pattern directly or indirectly
// invokes itself at the same position in the text. It's handled as
// a special case, but if a pattern invokes itself at a later
@ -160,7 +160,7 @@ match_t *match(def_t *defs, file_t *f, const char *str, pat_t *pat, unsigned int
return match(defs, f, str, pat->args.leftrec.fallback, ignorecase);
}
}
case VM_ANYCHAR: {
case BP_ANYCHAR: {
if (str >= f->end || *str == '\n')
return NULL;
match_t *m = new_match();
@ -169,7 +169,7 @@ match_t *match(def_t *defs, file_t *f, const char *str, pat_t *pat, unsigned int
m->end = next_char(f, str);
return m;
}
case VM_STRING: {
case BP_STRING: {
if (&str[pat->len] > f->end) return NULL;
if (ignorecase ? memicmp(str, pat->args.s, (size_t)pat->len) != 0
: memcmp(str, pat->args.s, (size_t)pat->len) != 0)
@ -180,7 +180,7 @@ match_t *match(def_t *defs, file_t *f, const char *str, pat_t *pat, unsigned int
m->end = str + pat->len;
return m;
}
case VM_RANGE: {
case BP_RANGE: {
if (str >= f->end) return NULL;
if ((unsigned char)*str < pat->args.range.low || (unsigned char)*str > pat->args.range.high)
return NULL;
@ -190,7 +190,7 @@ match_t *match(def_t *defs, file_t *f, const char *str, pat_t *pat, unsigned int
m->end = str + 1;
return m;
}
case VM_NOT: {
case BP_NOT: {
match_t *m = match(defs, f, str, pat->args.pat, ignorecase);
if (m != NULL) {
recycle_if_unused(&m);
@ -202,7 +202,7 @@ match_t *match(def_t *defs, file_t *f, const char *str, pat_t *pat, unsigned int
m->end = str;
return m;
}
case VM_UPTO_AND: {
case BP_UPTO_AND: {
match_t *m = new_match();
m->start = str;
m->pat = pat;
@ -246,7 +246,7 @@ match_t *match(def_t *defs, file_t *f, const char *str, pat_t *pat, unsigned int
recycle_if_unused(&m);
return NULL;
}
case VM_REPEAT: {
case BP_REPEAT: {
match_t *m = new_match();
m->start = str;
m->end = str;
@ -301,7 +301,7 @@ match_t *match(def_t *defs, file_t *f, const char *str, pat_t *pat, unsigned int
m->end = str;
return m;
}
case VM_AFTER: {
case BP_AFTER: {
ssize_t backtrack = pat->args.pat->len;
check(backtrack != -1, "'<' is only allowed for fixed-length operations");
if (str - backtrack < f->contents) return NULL;
@ -314,7 +314,7 @@ match_t *match(def_t *defs, file_t *f, const char *str, pat_t *pat, unsigned int
ADD_OWNER(m->child, before);
return m;
}
case VM_BEFORE: {
case BP_BEFORE: {
match_t *after = match(defs, f, str, pat->args.pat, ignorecase);
if (after == NULL) return NULL;
match_t *m = new_match();
@ -324,7 +324,7 @@ match_t *match(def_t *defs, file_t *f, const char *str, pat_t *pat, unsigned int
ADD_OWNER(m->child, after);
return m;
}
case VM_CAPTURE: {
case BP_CAPTURE: {
match_t *p = match(defs, f, str, pat->args.pat, ignorecase);
if (p == NULL) return NULL;
match_t *m = new_match();
@ -334,19 +334,19 @@ match_t *match(def_t *defs, file_t *f, const char *str, pat_t *pat, unsigned int
ADD_OWNER(m->child, p);
return m;
}
case VM_OTHERWISE: {
case BP_OTHERWISE: {
match_t *m = match(defs, f, str, pat->args.multiple.first, ignorecase);
if (m == NULL) m = match(defs, f, str, pat->args.multiple.second, ignorecase);
return m;
}
case VM_CHAIN: {
case BP_CHAIN: {
match_t *m1 = match(defs, f, str, pat->args.multiple.first, ignorecase);
if (m1 == NULL) return NULL;
match_t *m2;
{ // Push backrefs and run matching, then cleanup
def_t *defs2 = defs;
if (m1->pat->type == VM_CAPTURE && m1->pat->args.capture.name)
if (m1->pat->type == BP_CAPTURE && m1->pat->args.capture.name)
defs2 = with_backref(defs2, f, m1->pat->args.capture.name, m1);
// def_t *defs2 = with_backrefs(defs, f, m1);
m2 = match(defs2, f, m1->end, pat->args.multiple.second, ignorecase);
@ -365,7 +365,7 @@ match_t *match(def_t *defs, file_t *f, const char *str, pat_t *pat, unsigned int
ADD_OWNER(m1->nextsibling, m2);
return m;
}
case VM_EQUAL: case VM_NOT_EQUAL: {
case BP_EQUAL: case BP_NOT_EQUAL: {
match_t *m1 = match(defs, f, str, pat->args.multiple.first, ignorecase);
if (m1 == NULL) return NULL;
@ -379,7 +379,7 @@ match_t *match(def_t *defs, file_t *f, const char *str, pat_t *pat, unsigned int
.mmapped=f->mmapped,
};
match_t *m2 = match(defs, &inner, str, pat->args.multiple.second, ignorecase);
if ((m2 == NULL) == (pat->type == VM_EQUAL)) {
if ((m2 == NULL) == (pat->type == BP_EQUAL)) {
recycle_if_unused(&m1);
if (m2 != NULL) recycle_if_unused(&m2);
return NULL;
@ -389,14 +389,14 @@ match_t *match(def_t *defs, file_t *f, const char *str, pat_t *pat, unsigned int
m->end = m1->end;
m->pat = pat;
ADD_OWNER(m->child, m1);
if (pat->type == VM_EQUAL) {
if (pat->type == BP_EQUAL) {
ADD_OWNER(m1->nextsibling, m2);
} else {
recycle_if_unused(&m2);
}
return m;
}
case VM_REPLACE: {
case BP_REPLACE: {
match_t *p = NULL;
if (pat->args.replace.pat) {
p = match(defs, f, str, pat->args.replace.pat, ignorecase);
@ -413,13 +413,13 @@ match_t *match(def_t *defs, file_t *f, const char *str, pat_t *pat, unsigned int
}
return m;
}
case VM_REF: {
case BP_REF: {
def_t *def = lookup(defs, pat->args.s);
check(def != NULL, "Unknown identifier: '%s'", pat->args.s);
pat_t *ref = def->pat;
pat_t rec_op = {
.type = VM_LEFTRECURSION,
.type = BP_LEFTRECURSION,
.start = ref->start,
.end = ref->end,
.len = 0,
@ -476,7 +476,7 @@ match_t *match(def_t *defs, file_t *f, const char *str, pat_t *pat, unsigned int
ADD_OWNER(m2->child, m);
return m2;
}
case VM_BACKREF: {
case BP_BACKREF: {
const char *end = match_backref(str, pat->args.backref, ignorecase);
if (end == NULL) return NULL;
match_t *m = new_match();
@ -485,7 +485,7 @@ match_t *match(def_t *defs, file_t *f, const char *str, pat_t *pat, unsigned int
m->end = end;
return m;
}
case VM_NODENT: {
case BP_NODENT: {
if (*str != '\n') return NULL;
const char *start = str;
@ -526,8 +526,8 @@ match_t *match(def_t *defs, file_t *f, const char *str, pat_t *pat, unsigned int
static match_t *get_capture_by_num(match_t *m, int *n)
{
if (*n == 0) return m;
if (m->pat->type == VM_CAPTURE && *n == 1) return m;
if (m->pat->type == VM_CAPTURE) --(*n);
if (m->pat->type == BP_CAPTURE && *n == 1) return m;
if (m->pat->type == BP_CAPTURE) --(*n);
for (match_t *c = m->child; c; c = c->nextsibling) {
match_t *cap = get_capture_by_num(c, n);
if (cap) return cap;
@ -540,7 +540,7 @@ static match_t *get_capture_by_num(match_t *m, int *n)
//
static match_t *get_capture_by_name(match_t *m, const char *name)
{
if (m->pat->type == VM_CAPTURE && m->pat->args.capture.name
if (m->pat->type == BP_CAPTURE && m->pat->args.capture.name
&& streq(m->pat->args.capture.name, name))
return m;
for (match_t *c = m->child; c; c = c->nextsibling) {

View File

@ -43,7 +43,7 @@ pat_t *new_pat(file_t *f, const char *start, enum pattype_e type)
//
static pat_t *new_range(file_t *f, const char *start, const char *end, ssize_t min, ssize_t max, pat_t *repeating, pat_t *sep)
{
pat_t *range = new_pat(f, start, VM_REPEAT);
pat_t *range = new_pat(f, start, BP_REPEAT);
if (repeating->len >= 0 && (sep == NULL || sep->len >= 0) && min == max && min >= 0)
range->len = repeating->len * min + (sep == NULL || min == 0 ? 0 : sep->len * (min-1));
else
@ -109,7 +109,7 @@ static pat_t *expand_choices(file_t *f, pat_t *first)
memcpy((void*)replacement, repstr, replace_len);
pat_t *replacepat = first;
first = new_pat(f, replacepat->start, VM_REPLACE);
first = new_pat(f, replacepat->start, BP_REPLACE);
first->args.replace.pat = replacepat;
first->args.replace.text = replacement;
first->args.replace.len = replace_len;
@ -122,7 +122,7 @@ static pat_t *expand_choices(file_t *f, pat_t *first)
if (!second)
file_err(f, str, str, "There should be a pattern here after a '/'");
second = expand_choices(f, second);
pat_t *choice = new_pat(f, first->start, VM_OTHERWISE);
pat_t *choice = new_pat(f, first->start, BP_OTHERWISE);
if (first->len == second->len)
choice->len = first->len;
else choice->len = -1;
@ -140,7 +140,7 @@ static pat_t *chain_together(file_t *f, pat_t *first, pat_t *second)
{
if (first == NULL) return second;
if (second == NULL) return first;
pat_t *chain = new_pat(f, first->start, VM_CHAIN);
pat_t *chain = new_pat(f, first->start, BP_CHAIN);
chain->start = first->start;
if (first->len >= 0 && second->len >= 0)
chain->len = first->len + second->len;
@ -175,7 +175,7 @@ pat_t *bp_simplepattern(file_t *f, const char *str)
"These two patterns cannot possibly give the same result (different lengths: %ld != %ld)",
first->len, second->len);
}
pat = new_pat(f, str, equal ? VM_EQUAL : VM_NOT_EQUAL);
pat = new_pat(f, str, equal ? BP_EQUAL : BP_NOT_EQUAL);
pat->end = second->end;
pat->len = first->len != -1 ? first->len : second->len;
pat->args.multiple.first = first;
@ -201,7 +201,7 @@ static pat_t *_bp_simplepattern(file_t *f, const char *str)
// Any char (dot)
case '.': {
if (*str == '.') { // ".."
pat_t *upto = new_pat(f, start, VM_UPTO_AND);
pat_t *upto = new_pat(f, start, BP_UPTO_AND);
++str;
pat_t *till = bp_simplepattern(f, str);
upto->args.multiple.first = till;
@ -217,7 +217,7 @@ static pat_t *_bp_simplepattern(file_t *f, const char *str)
upto->end = str;
return upto;
} else {
pat_t *dot = new_pat(f, start, VM_ANYCHAR);
pat_t *dot = new_pat(f, start, BP_ANYCHAR);
dot->len = 1;
dot->end = str;
return dot;
@ -238,7 +238,7 @@ static pat_t *_bp_simplepattern(file_t *f, const char *str)
char c2 = *str;
if (!c2 || c2 == '\n')
file_err(f, str, str, "There should be a character here to complete the character range.");
pat = new_pat(f, opstart, VM_RANGE);
pat = new_pat(f, opstart, BP_RANGE);
if (c < c2) {
pat->args.range.low = (unsigned char)c;
pat->args.range.high = (unsigned char)c2;
@ -248,7 +248,7 @@ static pat_t *_bp_simplepattern(file_t *f, const char *str)
}
++str;
} else {
pat = new_pat(f, opstart, VM_STRING);
pat = new_pat(f, opstart, BP_STRING);
char *s = xcalloc(sizeof(char), 2);
s[0] = c;
pat->args.s = s;
@ -260,7 +260,7 @@ static pat_t *_bp_simplepattern(file_t *f, const char *str)
if (all == NULL) {
all = pat;
} else {
pat_t *either = new_pat(f, all->start, VM_OTHERWISE);
pat_t *either = new_pat(f, all->start, BP_OTHERWISE);
either->end = pat->end;
either->args.multiple.first = all;
either->args.multiple.second = pat;
@ -278,7 +278,7 @@ static pat_t *_bp_simplepattern(file_t *f, const char *str)
file_err(f, str, str, "There should be an escape sequence here after this backslash.");
if (matchchar(&str, 'N')) { // \N (nodent)
pat_t *nodent = new_pat(f, start, VM_NODENT);
pat_t *nodent = new_pat(f, start, BP_NODENT);
nodent->end = str;
return nodent;
}
@ -294,11 +294,11 @@ static pat_t *_bp_simplepattern(file_t *f, const char *str)
file_err(f, seqstart, str+1, "This value isn't a valid escape sequence");
if (e2 < e)
file_err(f, start, str, "Escape ranges should be low-to-high, but this is high-to-low.");
esc = new_pat(f, opstart, VM_RANGE);
esc = new_pat(f, opstart, BP_RANGE);
esc->args.range.low = e;
esc->args.range.high = e2;
} else {
esc = new_pat(f, opstart, VM_STRING);
esc = new_pat(f, opstart, BP_STRING);
char *s = xcalloc(sizeof(char), 2);
s[0] = (char)e;
esc->args.s = s;
@ -326,7 +326,7 @@ static pat_t *_bp_simplepattern(file_t *f, const char *str)
// escaped string, so this is safe to do inplace.
len = unescape_string(literal, literal, len);
pat_t *pat = new_pat(f, start, VM_STRING);
pat_t *pat = new_pat(f, start, BP_STRING);
pat->len = (ssize_t)len;
pat->args.s = literal;
@ -340,7 +340,7 @@ static pat_t *_bp_simplepattern(file_t *f, const char *str)
case '!': {
pat_t *p = bp_simplepattern(f, str);
if (!p) file_err(f, str, str, "There should be a pattern after this '!'");
pat_t *not = new_pat(f, start, VM_NOT);
pat_t *not = new_pat(f, start, BP_NOT);
not->len = 0;
not->args.pat = p;
not->end = p->end;
@ -389,7 +389,7 @@ static pat_t *_bp_simplepattern(file_t *f, const char *str)
"Sorry, variable-length lookbehind patterns like this are not supported.\n"
"Please use a fixed-length lookbehind pattern instead.");
str = behind->end;
pat_t *pat = new_pat(f, start, VM_AFTER);
pat_t *pat = new_pat(f, start, BP_AFTER);
pat->len = 0;
pat->args.pat = behind;
pat->end = str;
@ -401,7 +401,7 @@ static pat_t *_bp_simplepattern(file_t *f, const char *str)
if (!ahead)
file_err(f, str, str, "There should be a pattern after this '>'");
str = ahead->end;
pat_t *pat = new_pat(f, start, VM_BEFORE);
pat_t *pat = new_pat(f, start, BP_BEFORE);
pat->len = 0;
pat->args.pat = ahead;
pat->end = str;
@ -450,7 +450,7 @@ static pat_t *_bp_simplepattern(file_t *f, const char *str)
}
// Capture
case '@': {
pat_t *capture = new_pat(f, start, VM_CAPTURE);
pat_t *capture = new_pat(f, start, BP_CAPTURE);
const char *a = *str == '!' ? &str[1] : after_name(str);
if (a > str && after_spaces(a)[0] == '=' && after_spaces(a)[1] != '>') {
capture->args.capture.name = strndup(str, (size_t)(a-str));
@ -475,7 +475,7 @@ static pat_t *_bp_simplepattern(file_t *f, const char *str)
if (matchchar(&str, ':')) return NULL; // Don't match definitions
name = strndup(&c, 1);
}
pat_t *ref = new_pat(f, start, VM_REF);
pat_t *ref = new_pat(f, start, BP_REF);
ref->args.s = name;
ref->end = str;
return ref;
@ -488,7 +488,7 @@ static pat_t *_bp_simplepattern(file_t *f, const char *str)
str = after_name(str);
if (matchchar(&str, ':')) // Don't match definitions
return NULL;
pat_t *ref = new_pat(f, start, VM_REF);
pat_t *ref = new_pat(f, start, BP_REF);
ref->args.s = strndup(refname, (size_t)(str - refname));
ref->end = str;
return ref;
@ -512,7 +512,7 @@ pat_t *bp_stringpattern(file_t *f, const char *str)
file_err(f, str, str, "There should be an escape sequence or pattern here after this backslash.");
if (matchchar(&str, 'N')) { // \N (nodent)
interp = new_pat(f, str-2, VM_NODENT);
interp = new_pat(f, str-2, BP_NODENT);
break;
}
@ -540,7 +540,7 @@ pat_t *bp_stringpattern(file_t *f, const char *str)
// escaped string, so this is safe to do inplace.
len = unescape_string(literal, literal, len);
if (len > 0) {
pat_t *strop = new_pat(f, str, VM_STRING);
pat_t *strop = new_pat(f, str, BP_STRING);
strop->len = (ssize_t)len;
strop->args.s = literal;
strop->end = str;
@ -562,7 +562,7 @@ pat_t *bp_stringpattern(file_t *f, const char *str)
//
pat_t *bp_replacement(file_t *f, pat_t *replacepat, const char *replacement)
{
pat_t *pat = new_pat(f, replacepat->start, VM_REPLACE);
pat_t *pat = new_pat(f, replacepat->start, BP_REPLACE);
pat->end = replacepat->end;
pat->len = replacepat->len;
pat->args.replace.pat = replacepat;
@ -619,14 +619,14 @@ def_t *bp_definition(file_t *f, const char *str)
void destroy_pat(pat_t *pat)
{
switch (pat->type) {
case VM_STRING: case VM_REF:
case BP_STRING: case BP_REF:
xfree(&pat->args.s);
break;
case VM_CAPTURE:
case BP_CAPTURE:
if (pat->args.capture.name)
xfree(&pat->args.capture.name);
break;
case VM_REPLACE:
case BP_REPLACE:
if (pat->args.replace.text)
xfree(&pat->args.replace.text);
break;

View File

@ -257,7 +257,7 @@ static const char *context_after(printer_t *pr, const char *pos)
void _print_match(FILE *out, printer_t *pr, match_t *m)
{
pr->pos = m->start;
if (m->pat->type == VM_REPLACE) {
if (m->pat->type == BP_REPLACE) {
if (m->skip_replacement) {
_print_match(out, pr, m->child);
return;
@ -369,7 +369,7 @@ void print_match(FILE *out, printer_t *pr, match_t *m)
int print_errors(printer_t *pr, match_t *m)
{
int ret = 0;
if (m->pat->type == VM_CAPTURE && m->pat->args.capture.name && streq(m->pat->args.capture.name, "!")) {
if (m->pat->type == BP_CAPTURE && m->pat->args.capture.name && streq(m->pat->args.capture.name, "!")) {
printf("\033[31;1m");
print_match(stdout, pr, m);
printf("\033[0m\n");

36
types.h
View File

@ -10,24 +10,24 @@
// BP virtual machine pattern types
enum pattype_e {
VM_ANYCHAR = 1,
VM_STRING,
VM_RANGE,
VM_NOT,
VM_UPTO_AND,
VM_REPEAT,
VM_BEFORE,
VM_AFTER,
VM_CAPTURE,
VM_OTHERWISE,
VM_CHAIN,
VM_EQUAL,
VM_NOT_EQUAL,
VM_REPLACE,
VM_REF,
VM_BACKREF,
VM_NODENT,
VM_LEFTRECURSION,
BP_ANYCHAR = 1,
BP_STRING,
BP_RANGE,
BP_NOT,
BP_UPTO_AND,
BP_REPEAT,
BP_BEFORE,
BP_AFTER,
BP_CAPTURE,
BP_OTHERWISE,
BP_CHAIN,
BP_EQUAL,
BP_NOT_EQUAL,
BP_REPLACE,
BP_REF,
BP_BACKREF,
BP_NODENT,
BP_LEFTRECURSION,
};
struct match_s; // forward declared to resolve circular struct defs