Rename match_t -> bp_match_t

This commit is contained in:
Bruce Hill 2024-05-29 13:14:35 -04:00
parent f271863601
commit da067957c8
9 changed files with 94 additions and 93 deletions

View File

@ -30,7 +30,7 @@ static const char *builtins_source = (
static int MATCH_METATABLE = 0, PAT_METATABLE = 0; static int MATCH_METATABLE = 0, PAT_METATABLE = 0;
static bp_pat_t *builtins; static bp_pat_t *builtins;
static void push_match(lua_State *L, match_t *m, const char *start); static void push_match(lua_State *L, bp_match_t *m, const char *start);
lua_State *cur_state = NULL; lua_State *cur_state = NULL;
@ -70,7 +70,7 @@ static int Lcompile(lua_State *L)
return 1; return 1;
} }
static void push_matchstring(lua_State *L, match_t *m) static void push_matchstring(lua_State *L, bp_match_t *m)
{ {
char *buf = NULL; char *buf = NULL;
size_t size = 0; size_t size = 0;
@ -81,7 +81,7 @@ static void push_matchstring(lua_State *L, match_t *m)
fclose(out); fclose(out);
} }
static match_t *get_first_capture(match_t *m) static bp_match_t *get_first_capture(bp_match_t *m)
{ {
if (m->pat->type == BP_TAGGED) { if (m->pat->type == BP_TAGGED) {
return m; return m;
@ -89,17 +89,17 @@ static match_t *get_first_capture(match_t *m)
return m; return m;
} else if (m->children) { } else if (m->children) {
for (int i = 0; m->children[i]; i++) { for (int i = 0; m->children[i]; i++) {
match_t *cap = get_first_capture(m->children[i]); bp_match_t *cap = get_first_capture(m->children[i]);
if (cap) return cap; if (cap) return cap;
} }
} }
return NULL; return NULL;
} }
static void set_capture_fields(lua_State *L, match_t *m, int *n, const char *start) static void set_capture_fields(lua_State *L, bp_match_t *m, int *n, const char *start)
{ {
if (m->pat->type == BP_CAPTURE) { if (m->pat->type == BP_CAPTURE) {
match_t *cap = get_first_capture(m->children[0]); bp_match_t *cap = get_first_capture(m->children[0]);
if (!cap) cap = m->children[0]; if (!cap) cap = m->children[0];
auto capture = When(m->pat, BP_CAPTURE); auto capture = When(m->pat, BP_CAPTURE);
if (capture->namelen > 0) { if (capture->namelen > 0) {
@ -119,7 +119,7 @@ static void set_capture_fields(lua_State *L, match_t *m, int *n, const char *sta
} }
} }
static void push_match(lua_State *L, match_t *m, const char *start) static void push_match(lua_State *L, bp_match_t *m, const char *start)
{ {
lua_createtable(L, 1, 2); lua_createtable(L, 1, 2);
lua_pushlightuserdata(L, (void*)&MATCH_METATABLE); lua_pushlightuserdata(L, (void*)&MATCH_METATABLE);
@ -170,7 +170,7 @@ static int Lmatch(lua_State *L)
if (index > (lua_Integer)strlen(text)+1) if (index > (lua_Integer)strlen(text)+1)
return 0; return 0;
match_t *m = NULL; bp_match_t *m = NULL;
int ret = 0; int ret = 0;
cur_state = L; cur_state = L;
bp_errhand_t old = bp_set_error_handler(match_error); bp_errhand_t old = bp_set_error_handler(match_error);
@ -215,7 +215,7 @@ static int Lreplace(lua_State *L)
bp_pat_t *rep_pat = maybe_replacement.value.pat; bp_pat_t *rep_pat = maybe_replacement.value.pat;
cur_state = L; cur_state = L;
bp_errhand_t old = bp_set_error_handler(match_error); bp_errhand_t old = bp_set_error_handler(match_error);
for (match_t *m = NULL; next_match(&m, text, &text[textlen], rep_pat, builtins, NULL, false); ) { for (bp_match_t *m = NULL; next_match(&m, text, &text[textlen], rep_pat, builtins, NULL, false); ) {
fwrite(prev, sizeof(char), (size_t)(m->start - prev), out); fwrite(prev, sizeof(char), (size_t)(m->start - prev), out);
fprint_match(out, text, m, NULL); fprint_match(out, text, m, NULL);
prev = m->end; prev = m->end;

10
bp.c
View File

@ -211,7 +211,7 @@ static int is_text_file(const char *filename)
static int print_matches_as_json(file_t *f, bp_pat_t *pattern, bp_pat_t *defs) static int print_matches_as_json(file_t *f, bp_pat_t *pattern, bp_pat_t *defs)
{ {
int nmatches = 0; int nmatches = 0;
for (match_t *m = NULL; next_match(&m, f->start, f->end, pattern, defs, options.skip, options.ignorecase); ) { for (bp_match_t *m = NULL; next_match(&m, f->start, f->end, pattern, defs, options.skip, options.ignorecase); ) {
if (++nmatches > 1) if (++nmatches > 1)
printf(",\n"); printf(",\n");
printf("{\"filename\":\"%s\",\"match\":", f->filename); printf("{\"filename\":\"%s\",\"match\":", f->filename);
@ -227,7 +227,7 @@ static int print_matches_as_json(file_t *f, bp_pat_t *pattern, bp_pat_t *defs)
static int explain_matches(file_t *f, bp_pat_t *pattern, bp_pat_t *defs) static int explain_matches(file_t *f, bp_pat_t *pattern, bp_pat_t *defs)
{ {
int nmatches = 0; int nmatches = 0;
for (match_t *m = NULL; next_match(&m, f->start, f->end, pattern, defs, options.skip, options.ignorecase); ) { for (bp_match_t *m = NULL; next_match(&m, f->start, f->end, pattern, defs, options.skip, options.ignorecase); ) {
if (++nmatches == 1) { if (++nmatches == 1) {
if (options.print_filenames) if (options.print_filenames)
fprint_filename(stdout, f->filename); fprint_filename(stdout, f->filename);
@ -370,7 +370,7 @@ static int print_matches(FILE *out, file_t *f, bp_pat_t *pattern, bp_pat_t *defs
print_opts.replace_color = "\033[0;34;1m"; print_opts.replace_color = "\033[0;34;1m";
print_opts.normal_color = "\033[m"; print_opts.normal_color = "\033[m";
} }
for (match_t *m = NULL; next_match(&m, f->start, f->end, pattern, defs, options.skip, options.ignorecase); ) { for (bp_match_t *m = NULL; next_match(&m, f->start, f->end, pattern, defs, options.skip, options.ignorecase); ) {
if (++matches == 1 && options.print_filenames) { if (++matches == 1 && options.print_filenames) {
if (printed_filenames++ > 0) printf("\n"); if (printed_filenames++ > 0) printf("\n");
fprint_filename(out, f->filename); fprint_filename(out, f->filename);
@ -412,7 +412,7 @@ static int process_file(const char *filename, bp_pat_t *pattern, bp_pat_t *defs)
if (options.mode == MODE_EXPLAIN) { if (options.mode == MODE_EXPLAIN) {
matches += explain_matches(f, pattern, defs); matches += explain_matches(f, pattern, defs);
} else if (options.mode == MODE_LISTFILES) { } else if (options.mode == MODE_LISTFILES) {
match_t *m = NULL; bp_match_t *m = NULL;
if (next_match(&m, f->start, f->end, pattern, defs, options.skip, options.ignorecase)) { if (next_match(&m, f->start, f->end, pattern, defs, options.skip, options.ignorecase)) {
printf("%s\n", f->filename); printf("%s\n", f->filename);
matches += 1; matches += 1;
@ -421,7 +421,7 @@ static int process_file(const char *filename, bp_pat_t *pattern, bp_pat_t *defs)
} else if (options.mode == MODE_JSON) { } else if (options.mode == MODE_JSON) {
matches += print_matches_as_json(f, pattern, defs); matches += print_matches_as_json(f, pattern, defs);
} else if (options.mode == MODE_INPLACE) { } else if (options.mode == MODE_INPLACE) {
match_t *m = NULL; bp_match_t *m = NULL;
bool found = next_match(&m, f->start, f->end, pattern, defs, options.skip, options.ignorecase); bool found = next_match(&m, f->start, f->end, pattern, defs, options.skip, options.ignorecase);
stop_matching(&m); stop_matching(&m);
if (!found) return 0; if (!found) return 0;

6
json.c
View File

@ -8,14 +8,14 @@
#include "utils.h" #include "utils.h"
__attribute__((nonnull)) __attribute__((nonnull))
static int _json_match(const char *text, match_t *m, int comma, bool verbose); static int _json_match(const char *text, bp_match_t *m, int comma, bool verbose);
// //
// Helper function for json_match(). // Helper function for json_match().
// `comma` is used to track whether a comma will need to be printed before the // `comma` is used to track whether a comma will need to be printed before the
// next object or not. // next object or not.
// //
static int _json_match(const char *text, match_t *m, int comma, bool verbose) static int _json_match(const char *text, bp_match_t *m, int comma, bool verbose)
{ {
if (!verbose && m->pat->type != BP_TAGGED) { if (!verbose && m->pat->type != BP_TAGGED) {
if (m->children) { if (m->children) {
@ -78,7 +78,7 @@ static int _json_match(const char *text, match_t *m, int comma, bool verbose)
// //
// Print a match object as a JSON object. // Print a match object as a JSON object.
// //
public void json_match(const char *text, match_t *m, bool verbose) public void json_match(const char *text, bp_match_t *m, bool verbose)
{ {
(void)_json_match(text, m, 0, verbose); (void)_json_match(text, m, 0, verbose);
} }

2
json.h
View File

@ -8,6 +8,6 @@
#include "match.h" #include "match.h"
__attribute__((nonnull)) __attribute__((nonnull))
void json_match(const char *text, match_t *m, bool verbose); void json_match(const char *text, bp_match_t *m, bool verbose);
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0 // vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0

104
match.c
View File

@ -48,8 +48,8 @@ typedef struct match_ctx_s {
// the `unused_matches` linked list so it can be reused without the need for // the `unused_matches` linked list so it can be reused without the need for
// additional calls to malloc/free. Thus, it is an invariant that every match // additional calls to malloc/free. Thus, it is an invariant that every match
// object is in one of these two lists: // object is in one of these two lists:
static match_t *unused_matches = NULL; static bp_match_t *unused_matches = NULL;
static match_t *in_use_matches = NULL; static bp_match_t *in_use_matches = NULL;
static void default_error_handler(char **msg) { static void default_error_handler(char **msg) {
errx(EXIT_FAILURE, "%s", *msg); errx(EXIT_FAILURE, "%s", *msg);
@ -64,12 +64,12 @@ public bp_errhand_t bp_set_error_handler(bp_errhand_t new_handler)
return old_handler; return old_handler;
} }
#define MATCHES(...) (match_t*[]){__VA_ARGS__, NULL} #define MATCHES(...) (bp_match_t*[]){__VA_ARGS__, NULL}
__attribute__((hot, nonnull(1,2,3))) __attribute__((hot, nonnull(1,2,3)))
static match_t *match(match_ctx_t *ctx, const char *str, bp_pat_t *pat); static bp_match_t *match(match_ctx_t *ctx, const char *str, bp_pat_t *pat);
__attribute__((returns_nonnull)) __attribute__((returns_nonnull))
static match_t *new_match(bp_pat_t *pat, const char *start, const char *end, match_t *children[]); static bp_match_t *new_match(bp_pat_t *pat, const char *start, const char *end, bp_match_t *children[]);
char *error_message = NULL; char *error_message = NULL;
@ -84,10 +84,10 @@ static inline void match_error(match_ctx_t *ctx, const char *fmt, ...)
longjmp(ctx->error_jump, 1); longjmp(ctx->error_jump, 1);
} }
static match_t *clone_match(match_t *m) static bp_match_t *clone_match(bp_match_t *m)
{ {
if (!m) return NULL; if (!m) return NULL;
match_t *ret = new_match(m->pat, m->start, m->end, NULL); bp_match_t *ret = new_match(m->pat, m->start, m->end, NULL);
if (m->children) { if (m->children) {
size_t child_cap = 0, nchildren = 0; size_t child_cap = 0, nchildren = 0;
if (!m->children[0] || !m->children[1] || !m->children[2]) { if (!m->children[0] || !m->children[1] || !m->children[2]) {
@ -106,7 +106,7 @@ static match_t *clone_match(match_t *m)
} }
// Prepend to a doubly linked list // Prepend to a doubly linked list
static inline void gc_list_prepend(match_t **head, match_t *m) static inline void gc_list_prepend(bp_match_t **head, bp_match_t *m)
{ {
if (m->gc.home) if (m->gc.home)
errx(1, "Node already has a home"); errx(1, "Node already has a home");
@ -117,7 +117,7 @@ static inline void gc_list_prepend(match_t **head, match_t *m)
} }
// Remove from a doubly linked list // Remove from a doubly linked list
static inline void gc_list_remove(match_t *m) static inline void gc_list_remove(bp_match_t *m)
{ {
if (!m->gc.home) if (!m->gc.home)
errx(1, "Attempt to remove something that isn't in a list"); errx(1, "Attempt to remove something that isn't in a list");
@ -321,7 +321,7 @@ static bp_pat_t *get_prerequisite(match_ctx_t *ctx, bp_pat_t *pat)
// Find the next match after prev (or the first match if prev is NULL) // Find the next match after prev (or the first match if prev is NULL)
// //
__attribute__((nonnull(1,2,3))) __attribute__((nonnull(1,2,3)))
static match_t *_next_match(match_ctx_t *ctx, const char *str, bp_pat_t *pat, bp_pat_t *skip) static bp_match_t *_next_match(match_ctx_t *ctx, const char *str, bp_pat_t *pat, bp_pat_t *skip)
{ {
// Clear the cache so it's not full of old cache values from different parts of the file: // Clear the cache so it's not full of old cache values from different parts of the file:
cache_destroy(ctx); cache_destroy(ctx);
@ -348,9 +348,9 @@ static match_t *_next_match(match_ctx_t *ctx, const char *str, bp_pat_t *pat, bp
} }
do { do {
match_t *m = match(ctx, str, pat); bp_match_t *m = match(ctx, str, pat);
if (m) return m; if (m) return m;
match_t *skipped = skip ? match(ctx, str, skip) : NULL; bp_match_t *skipped = skip ? match(ctx, str, skip) : NULL;
if (skipped) { if (skipped) {
str = skipped->end > str ? skipped->end : str + 1; str = skipped->end > str ? skipped->end : str + 1;
recycle_match(&skipped); recycle_match(&skipped);
@ -364,7 +364,7 @@ static match_t *_next_match(match_ctx_t *ctx, const char *str, bp_pat_t *pat, bp
// match object, or NULL if no match is found. // match object, or NULL if no match is found.
// The returned value should be free()'d to avoid memory leaking. // The returned value should be free()'d to avoid memory leaking.
// //
static match_t *match(match_ctx_t *ctx, const char *str, bp_pat_t *pat) static bp_match_t *match(match_ctx_t *ctx, const char *str, bp_pat_t *pat)
{ {
switch (pat->type) { switch (pat->type) {
case BP_DEFINITIONS: { case BP_DEFINITIONS: {
@ -372,7 +372,7 @@ static match_t *match(match_ctx_t *ctx, const char *str, bp_pat_t *pat)
ctx2.cache = &(cache_t){0}; ctx2.cache = &(cache_t){0};
ctx2.parent_ctx = ctx; ctx2.parent_ctx = ctx;
ctx2.defs = pat; ctx2.defs = pat;
match_t *m = match(&ctx2, str, When(pat, BP_DEFINITIONS)->meaning); bp_match_t *m = match(&ctx2, str, When(pat, BP_DEFINITIONS)->meaning);
cache_destroy(&ctx2); cache_destroy(&ctx2);
return m; return m;
} }
@ -429,7 +429,7 @@ static match_t *match(match_ctx_t *ctx, const char *str, bp_pat_t *pat)
return new_match(pat, str, str+1, NULL); return new_match(pat, str, str+1, NULL);
} }
case BP_NOT: { case BP_NOT: {
match_t *m = match(ctx, str, When(pat, BP_NOT)->pat); bp_match_t *m = match(ctx, str, When(pat, BP_NOT)->pat);
if (m != NULL) { if (m != NULL) {
recycle_match(&m); recycle_match(&m);
return NULL; return NULL;
@ -437,7 +437,7 @@ static match_t *match(match_ctx_t *ctx, const char *str, bp_pat_t *pat)
return new_match(pat, str, str, NULL); return new_match(pat, str, str, NULL);
} }
case BP_UPTO: case BP_UPTO_STRICT: { case BP_UPTO: case BP_UPTO_STRICT: {
match_t *m = new_match(pat, str, str, NULL); bp_match_t *m = new_match(pat, str, str, NULL);
bp_pat_t *target = deref(ctx, pat->type == BP_UPTO ? When(pat, BP_UPTO)->target : When(pat, BP_UPTO_STRICT)->target), bp_pat_t *target = deref(ctx, pat->type == BP_UPTO ? When(pat, BP_UPTO)->target : When(pat, BP_UPTO_STRICT)->target),
*skip = deref(ctx, pat->type == BP_UPTO ? When(pat, BP_UPTO)->skip : When(pat, BP_UPTO_STRICT)->skip); *skip = deref(ctx, pat->type == BP_UPTO ? When(pat, BP_UPTO)->skip : When(pat, BP_UPTO_STRICT)->skip);
if (!target && !skip) { if (!target && !skip) {
@ -450,7 +450,7 @@ static match_t *match(match_ctx_t *ctx, const char *str, bp_pat_t *pat)
for (const char *prev = NULL; prev < str; ) { for (const char *prev = NULL; prev < str; ) {
prev = str; prev = str;
if (target) { if (target) {
match_t *p = match(ctx, str, target); bp_match_t *p = match(ctx, str, target);
if (p != NULL) { if (p != NULL) {
recycle_match(&p); recycle_match(&p);
m->end = str; m->end = str;
@ -461,7 +461,7 @@ static match_t *match(match_ctx_t *ctx, const char *str, bp_pat_t *pat)
return m; return m;
} }
if (skip) { if (skip) {
match_t *s = match(ctx, str, skip); bp_match_t *s = match(ctx, str, skip);
if (s != NULL) { if (s != NULL) {
str = s->end; str = s->end;
if (nchildren+2 >= child_cap) { if (nchildren+2 >= child_cap) {
@ -482,7 +482,7 @@ static match_t *match(match_ctx_t *ctx, const char *str, bp_pat_t *pat)
return NULL; return NULL;
} }
case BP_REPEAT: { case BP_REPEAT: {
match_t *m = new_match(pat, str, str, NULL); bp_match_t *m = new_match(pat, str, str, NULL);
size_t reps = 0; size_t reps = 0;
auto repeat = When(pat, BP_REPEAT); auto repeat = When(pat, BP_REPEAT);
bp_pat_t *repeating = deref(ctx, repeat->repeat_pat); bp_pat_t *repeating = deref(ctx, repeat->repeat_pat);
@ -491,13 +491,13 @@ static match_t *match(match_ctx_t *ctx, const char *str, bp_pat_t *pat)
for (reps = 0; repeat->max == -1 || reps < (size_t)repeat->max; ++reps) { for (reps = 0; repeat->max == -1 || reps < (size_t)repeat->max; ++reps) {
const char *start = str; const char *start = str;
// Separator // Separator
match_t *msep = NULL; bp_match_t *msep = NULL;
if (sep != NULL && reps > 0) { if (sep != NULL && reps > 0) {
msep = match(ctx, str, sep); msep = match(ctx, str, sep);
if (msep == NULL) break; if (msep == NULL) break;
str = msep->end; str = msep->end;
} }
match_t *mp = match(ctx, str, repeating); bp_match_t *mp = match(ctx, str, repeating);
if (mp == NULL) { if (mp == NULL) {
str = start; str = start;
if (msep) recycle_match(&msep); if (msep) recycle_match(&msep);
@ -558,7 +558,7 @@ static match_t *match(match_ctx_t *ctx, const char *str, bp_pat_t *pat)
pos = prev_char(ctx->start, pos)) { pos = prev_char(ctx->start, pos)) {
cache_destroy(&slice_ctx); cache_destroy(&slice_ctx);
slice_ctx.start = (char*)pos; slice_ctx.start = (char*)pos;
match_t *m = match(&slice_ctx, pos, back); bp_match_t *m = match(&slice_ctx, pos, back);
// Match should not go past str (i.e. (<"AB" "B") should match "ABB", but not "AB") // Match should not go past str (i.e. (<"AB" "B") should match "ABB", but not "AB")
if (m && m->end != str) if (m && m->end != str)
recycle_match(&m); recycle_match(&m);
@ -575,18 +575,18 @@ static match_t *match(match_ctx_t *ctx, const char *str, bp_pat_t *pat)
return NULL; return NULL;
} }
case BP_BEFORE: { case BP_BEFORE: {
match_t *after = match(ctx, str, When(pat, BP_BEFORE)->pat); bp_match_t *after = match(ctx, str, When(pat, BP_BEFORE)->pat);
return after ? new_match(pat, str, str, MATCHES(after)) : NULL; return after ? new_match(pat, str, str, MATCHES(after)) : NULL;
} }
case BP_CAPTURE: case BP_TAGGED: { case BP_CAPTURE: case BP_TAGGED: {
bp_pat_t *to_match = pat->type == BP_CAPTURE ? When(pat, BP_CAPTURE)->pat : When(pat, BP_TAGGED)->pat; bp_pat_t *to_match = pat->type == BP_CAPTURE ? When(pat, BP_CAPTURE)->pat : When(pat, BP_TAGGED)->pat;
if (!to_match) if (!to_match)
return new_match(pat, str, str, NULL); return new_match(pat, str, str, NULL);
match_t *p = match(ctx, str, to_match); bp_match_t *p = match(ctx, str, to_match);
return p ? new_match(pat, str, p->end, MATCHES(p)) : NULL; return p ? new_match(pat, str, p->end, MATCHES(p)) : NULL;
} }
case BP_OTHERWISE: { case BP_OTHERWISE: {
match_t *m = match(ctx, str, When(pat, BP_OTHERWISE)->first); bp_match_t *m = match(ctx, str, When(pat, BP_OTHERWISE)->first);
return m ? m : match(ctx, str, When(pat, BP_OTHERWISE)->second); return m ? m : match(ctx, str, When(pat, BP_OTHERWISE)->second);
} }
case BP_CHAIN: { case BP_CHAIN: {
@ -596,15 +596,15 @@ static match_t *match(match_ctx_t *ctx, const char *str, bp_pat_t *pat)
ctx2.cache = &(cache_t){0}; ctx2.cache = &(cache_t){0};
ctx2.parent_ctx = ctx; ctx2.parent_ctx = ctx;
ctx2.defs = chain->first; ctx2.defs = chain->first;
match_t *m = match(&ctx2, str, chain->second); bp_match_t *m = match(&ctx2, str, chain->second);
cache_destroy(&ctx2); cache_destroy(&ctx2);
return m; return m;
} }
match_t *m1 = match(ctx, str, chain->first); bp_match_t *m1 = match(ctx, str, chain->first);
if (m1 == NULL) return NULL; if (m1 == NULL) return NULL;
match_t *m2; bp_match_t *m2;
// Push backrefs and run matching, then cleanup // Push backrefs and run matching, then cleanup
if (m1->pat->type == BP_CAPTURE && When(m1->pat, BP_CAPTURE)->name && When(m1->pat, BP_CAPTURE)->backreffable) { if (m1->pat->type == BP_CAPTURE && When(m1->pat, BP_CAPTURE)->name && When(m1->pat, BP_CAPTURE)->backreffable) {
// Temporarily add a rule that the backref name matches the // Temporarily add a rule that the backref name matches the
@ -654,7 +654,7 @@ static match_t *match(match_ctx_t *ctx, const char *str, bp_pat_t *pat)
} }
case BP_MATCH: case BP_NOT_MATCH: { case BP_MATCH: case BP_NOT_MATCH: {
bp_pat_t *target = pat->type == BP_MATCH ? When(pat, BP_MATCH)->pat : When(pat, BP_NOT_MATCH)->pat; bp_pat_t *target = pat->type == BP_MATCH ? When(pat, BP_MATCH)->pat : When(pat, BP_NOT_MATCH)->pat;
match_t *m1 = match(ctx, str, target); bp_match_t *m1 = match(ctx, str, target);
if (m1 == NULL) return NULL; if (m1 == NULL) return NULL;
// <p1>~<p2> matches iff the text of <p1> matches <p2> // <p1>~<p2> matches iff the text of <p1> matches <p2>
@ -663,7 +663,7 @@ static match_t *match(match_ctx_t *ctx, const char *str, bp_pat_t *pat)
slice_ctx.cache = &(cache_t){0}; slice_ctx.cache = &(cache_t){0};
slice_ctx.start = m1->start; slice_ctx.start = m1->start;
slice_ctx.end = m1->end; slice_ctx.end = m1->end;
match_t *ret = NULL, *m2 = NULL; bp_match_t *ret = NULL, *m2 = NULL;
if (pat->type == BP_MATCH) { if (pat->type == BP_MATCH) {
m2 = _next_match(&slice_ctx, slice_ctx.start, When(pat, BP_MATCH)->must_match, NULL); m2 = _next_match(&slice_ctx, slice_ctx.start, When(pat, BP_MATCH)->must_match, NULL);
if (m2) ret = new_match(pat, m1->start, m1->end, MATCHES(m1, m2)); if (m2) ret = new_match(pat, m1->start, m1->end, MATCHES(m1, m2));
@ -679,7 +679,7 @@ static match_t *match(match_ctx_t *ctx, const char *str, bp_pat_t *pat)
return ret; return ret;
} }
case BP_REPLACE: { case BP_REPLACE: {
match_t *p = NULL; bp_match_t *p = NULL;
auto replace = When(pat, BP_REPLACE); auto replace = When(pat, BP_REPLACE);
if (replace->pat) { if (replace->pat) {
p = match(ctx, str, replace->pat); p = match(ctx, str, replace->pat);
@ -725,14 +725,14 @@ static match_t *match(match_ctx_t *ctx, const char *str, bp_pat_t *pat)
}, },
}; };
match_t *m = match(&ctx2, str, ref); bp_match_t *m = match(&ctx2, str, ref);
// If left recursion was involved, keep retrying while forward progress can be made: // If left recursion was involved, keep retrying while forward progress can be made:
if (m && rec_op.__tagged.BP_LEFTRECURSION.visited) { if (m && rec_op.__tagged.BP_LEFTRECURSION.visited) {
while (1) { while (1) {
const char *prev = m->end; const char *prev = m->end;
rec_op.__tagged.BP_LEFTRECURSION.match = m; rec_op.__tagged.BP_LEFTRECURSION.match = m;
ctx2.cache = &(cache_t){0}; ctx2.cache = &(cache_t){0};
match_t *m2 = match(&ctx2, str, ref); bp_match_t *m2 = match(&ctx2, str, ref);
cache_destroy(&ctx2); cache_destroy(&ctx2);
if (!m2) break; if (!m2) break;
if (m2->end <= prev) { if (m2->end <= prev) {
@ -788,15 +788,15 @@ static match_t *match(match_ctx_t *ctx, const char *str, bp_pat_t *pat)
// //
// Return a match object which can be used (may be allocated or recycled). // Return a match object which can be used (may be allocated or recycled).
// //
match_t *new_match(bp_pat_t *pat, const char *start, const char *end, match_t *children[]) bp_match_t *new_match(bp_pat_t *pat, const char *start, const char *end, bp_match_t *children[])
{ {
match_t *m; bp_match_t *m;
if (unused_matches) { if (unused_matches) {
m = unused_matches; m = unused_matches;
gc_list_remove(m); gc_list_remove(m);
memset(m, 0, sizeof(match_t)); memset(m, 0, sizeof(bp_match_t));
} else { } else {
m = new(match_t); m = new(bp_match_t);
} }
// Keep track of the object: // Keep track of the object:
gc_list_prepend(&in_use_matches, m); gc_list_prepend(&in_use_matches, m);
@ -817,9 +817,9 @@ match_t *new_match(bp_pat_t *pat, const char *start, const char *end, match_t *c
// If the given match is not currently a child member of another match (or // If the given match is not currently a child member of another match (or
// otherwise reserved) then put it back in the pool of unused match objects. // otherwise reserved) then put it back in the pool of unused match objects.
// //
public void recycle_match(match_t **at_m) public void recycle_match(bp_match_t **at_m)
{ {
match_t *m = *at_m; bp_match_t *m = *at_m;
if (m->children) { if (m->children) {
for (int i = 0; m->children[i]; i++) for (int i = 0; m->children[i]; i++)
recycle_match(&m->children[i]); recycle_match(&m->children[i]);
@ -828,7 +828,7 @@ public void recycle_match(match_t **at_m)
} }
gc_list_remove(m); gc_list_remove(m);
(void)memset(m, 0, sizeof(match_t)); (void)memset(m, 0, sizeof(bp_match_t));
gc_list_prepend(&unused_matches, m); gc_list_prepend(&unused_matches, m);
*at_m = NULL; *at_m = NULL;
} }
@ -839,7 +839,7 @@ public void recycle_match(match_t **at_m)
public size_t recycle_all_matches(void) public size_t recycle_all_matches(void)
{ {
size_t count = 0; size_t count = 0;
for (match_t *m; (m = in_use_matches); ++count) { for (bp_match_t *m; (m = in_use_matches); ++count) {
gc_list_remove(m); gc_list_remove(m);
if (m->children && m->children != m->_children) if (m->children && m->children != m->_children)
delete(&m->children); delete(&m->children);
@ -855,7 +855,7 @@ public size_t free_all_matches(void)
{ {
size_t count = 0; size_t count = 0;
recycle_all_matches(); recycle_all_matches();
for (match_t *m; (m = unused_matches); ++count) { for (bp_match_t *m; (m = unused_matches); ++count) {
gc_list_remove(m); gc_list_remove(m);
delete(&m); delete(&m);
} }
@ -864,9 +864,9 @@ public size_t free_all_matches(void)
// //
// Iterate over matches. // Iterate over matches.
// Usage: for (match_t *m = NULL; next_match(&m, ...); ) {...} // Usage: for (bp_match_t *m = NULL; next_match(&m, ...); ) {...}
// //
public bool next_match(match_t **m, const char *start, const char *end, bp_pat_t *pat, bp_pat_t *defs, bp_pat_t *skip, bool ignorecase) public bool next_match(bp_match_t **m, const char *start, const char *end, bp_pat_t *pat, bp_pat_t *defs, bp_pat_t *skip, bool ignorecase)
{ {
const char *pos; const char *pos;
if (*m) { if (*m) {
@ -911,7 +911,7 @@ public bool next_match(match_t **m, const char *start, const char *end, bp_pat_t
// Helper function to track state while doing a depth-first search. // Helper function to track state while doing a depth-first search.
// //
__attribute__((nonnull)) __attribute__((nonnull))
static match_t *_get_numbered_capture(match_t *m, int *n) static bp_match_t *_get_numbered_capture(bp_match_t *m, int *n)
{ {
if ((m->pat->type == BP_CAPTURE && When(m->pat, BP_CAPTURE)->namelen == 0) || m->pat->type == BP_TAGGED) { if ((m->pat->type == BP_CAPTURE && When(m->pat, BP_CAPTURE)->namelen == 0) || m->pat->type == BP_TAGGED) {
if (*n == 1) { if (*n == 1) {
@ -927,7 +927,7 @@ static match_t *_get_numbered_capture(match_t *m, int *n)
if (m->children) { if (m->children) {
for (int i = 0; m->children[i]; i++) { for (int i = 0; m->children[i]; i++) {
match_t *cap = _get_numbered_capture(m->children[i], n); bp_match_t *cap = _get_numbered_capture(m->children[i], n);
if (cap) return cap; if (cap) return cap;
} }
} }
@ -937,14 +937,14 @@ static match_t *_get_numbered_capture(match_t *m, int *n)
// //
// Get a specific numbered pattern capture. // Get a specific numbered pattern capture.
// //
public match_t *get_numbered_capture(match_t *m, int n) public bp_match_t *get_numbered_capture(bp_match_t *m, int n)
{ {
if (n <= 0) return m; if (n <= 0) return m;
if (m->pat->type == BP_TAGGED || m->pat->type == BP_CAPTURE) { if (m->pat->type == BP_TAGGED || m->pat->type == BP_CAPTURE) {
if (n == 1 && m->pat->type == BP_CAPTURE && When(m->pat, BP_CAPTURE)->namelen == 0) return m; if (n == 1 && m->pat->type == BP_CAPTURE && When(m->pat, BP_CAPTURE)->namelen == 0) return m;
if (m->children) { if (m->children) {
for (int i = 0; m->children[i]; i++) { for (int i = 0; m->children[i]; i++) {
match_t *cap = _get_numbered_capture(m->children[i], &n); bp_match_t *cap = _get_numbered_capture(m->children[i], &n);
if (cap) return cap; if (cap) return cap;
} }
} }
@ -957,7 +957,7 @@ public match_t *get_numbered_capture(match_t *m, int n)
// //
// Helper function for get_named_capture() // Helper function for get_named_capture()
// //
match_t *_get_named_capture(match_t *m, const char *name, size_t namelen) bp_match_t *_get_named_capture(bp_match_t *m, const char *name, size_t namelen)
{ {
if (m->pat->type == BP_CAPTURE && When(m->pat, BP_CAPTURE)->name if (m->pat->type == BP_CAPTURE && When(m->pat, BP_CAPTURE)->name
&& When(m->pat, BP_CAPTURE)->namelen == namelen && When(m->pat, BP_CAPTURE)->namelen == namelen
@ -969,7 +969,7 @@ match_t *_get_named_capture(match_t *m, const char *name, size_t namelen)
if (m->children) { if (m->children) {
for (int i = 0; m->children[i]; i++) { for (int i = 0; m->children[i]; i++) {
match_t *cap = _get_named_capture(m->children[i], name, namelen); bp_match_t *cap = _get_named_capture(m->children[i], name, namelen);
if (cap) return cap; if (cap) return cap;
} }
} }
@ -979,13 +979,13 @@ match_t *_get_named_capture(match_t *m, const char *name, size_t namelen)
// //
// Get a capture with a specific name. // Get a capture with a specific name.
// //
public match_t *get_named_capture(match_t *m, const char *name, ssize_t _namelen) public bp_match_t *get_named_capture(bp_match_t *m, const char *name, ssize_t _namelen)
{ {
size_t namelen = _namelen < 0 ? strlen(name) : (size_t)_namelen; size_t namelen = _namelen < 0 ? strlen(name) : (size_t)_namelen;
if (m->pat->type == BP_TAGGED) {// || (m->pat->type == BP_CAPTURE && m->pat->args.capture.namelen > 0)) { if (m->pat->type == BP_TAGGED) {// || (m->pat->type == BP_CAPTURE && m->pat->args.capture.namelen > 0)) {
if (m->children) { if (m->children) {
for (int i = 0; m->children[i]; i++) { for (int i = 0; m->children[i]; i++) {
match_t *cap = _get_named_capture(m->children[i], name, namelen); bp_match_t *cap = _get_named_capture(m->children[i], name, namelen);
if (cap) return cap; if (cap) return cap;
} }
} }

19
match.h
View File

@ -12,30 +12,31 @@
// //
// Pattern matching result object // Pattern matching result object
// //
typedef struct match_s { typedef struct bp_match_s bp_match_t;
struct bp_match_s {
// Where the match starts and ends (end is after the last character) // Where the match starts and ends (end is after the last character)
const char *start, *end; const char *start, *end;
bp_pat_t *pat; bp_pat_t *pat;
// Intrusive linked list node for garbage collection: // Intrusive linked list node for garbage collection:
struct { struct {
struct match_s **home, *next; bp_match_t **home, *next;
} gc; } gc;
struct match_s **children; bp_match_t **children;
struct match_s *_children[3]; bp_match_t *_children[3];
} match_t; };
typedef void (*bp_errhand_t)(char **err_msg); typedef void (*bp_errhand_t)(char **err_msg);
__attribute__((nonnull)) __attribute__((nonnull))
void recycle_match(match_t **at_m); void recycle_match(bp_match_t **at_m);
size_t free_all_matches(void); size_t free_all_matches(void);
size_t recycle_all_matches(void); size_t recycle_all_matches(void);
bool next_match(match_t **m, const char *start, const char *end, bp_pat_t *pat, bp_pat_t *defs, bp_pat_t *skip, bool ignorecase); bool next_match(bp_match_t **m, const char *start, const char *end, bp_pat_t *pat, bp_pat_t *defs, bp_pat_t *skip, bool ignorecase);
#define stop_matching(m) next_match(m, NULL, NULL, NULL, NULL, NULL, 0) #define stop_matching(m) next_match(m, NULL, NULL, NULL, NULL, NULL, 0)
bp_errhand_t bp_set_error_handler(bp_errhand_t handler); bp_errhand_t bp_set_error_handler(bp_errhand_t handler);
__attribute__((nonnull)) __attribute__((nonnull))
match_t *get_numbered_capture(match_t *m, int n); bp_match_t *get_numbered_capture(bp_match_t *m, int n);
__attribute__((nonnull, pure)) __attribute__((nonnull, pure))
match_t *get_named_capture(match_t *m, const char *name, ssize_t namelen); bp_match_t *get_named_capture(bp_match_t *m, const char *name, ssize_t namelen);
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0 // vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0

View File

@ -116,7 +116,7 @@ struct bp_pat_s {
bool backreffable; bool backreffable;
} BP_TAGGED; } BP_TAGGED;
struct { struct {
struct match_s *match; struct bp_match_s *match;
const char *at; const char *at;
bp_pat_t *fallback; bp_pat_t *fallback;
void *ctx; void *ctx;
@ -126,7 +126,7 @@ struct bp_pat_s {
}; };
typedef struct leftrec_info_s { typedef struct leftrec_info_s {
struct match_s *match; struct bp_match_s *match;
const char *at; const char *at;
bp_pat_t *fallback; bp_pat_t *fallback;
void *ctx; void *ctx;

View File

@ -11,12 +11,12 @@
#include "utils.h" #include "utils.h"
typedef struct match_node_s { typedef struct match_node_s {
match_t *m; bp_match_t *m;
struct match_node_s *next; struct match_node_s *next;
} match_node_t; } match_node_t;
__attribute__((nonnull, pure)) __attribute__((nonnull, pure))
static int height_of_match(match_t *m); static int height_of_match(bp_match_t *m);
__attribute__((nonnull)) __attribute__((nonnull))
static void _explain_matches(match_node_t *firstmatch, int depth, const char *text, size_t textlen); static void _explain_matches(match_node_t *firstmatch, int depth, const char *text, size_t textlen);
@ -24,11 +24,11 @@ static void _explain_matches(match_node_t *firstmatch, int depth, const char *te
// Return the height of a match object (i.e. the number of descendents of the // Return the height of a match object (i.e. the number of descendents of the
// structure). // structure).
// //
static int height_of_match(match_t *m) static int height_of_match(bp_match_t *m)
{ {
int height = 0; int height = 0;
for (int i = 0; m->children && m->children[i]; i++) { for (int i = 0; m->children && m->children[i]; i++) {
match_t *child = m->children[i]; bp_match_t *child = m->children[i];
int childheight = height_of_match(child); int childheight = height_of_match(child);
if (childheight > height) height = childheight; if (childheight > height) height = childheight;
} }
@ -44,7 +44,7 @@ static void _explain_matches(match_node_t *firstmatch, int depth, const char *te
const char *H = ""; // Horizontal bar const char *H = ""; // Horizontal bar
const char *color = (depth % 2 == 0) ? "34" : "33"; const char *color = (depth % 2 == 0) ? "34" : "33";
match_t *viz = firstmatch->m; bp_match_t *viz = firstmatch->m;
// This is a heuristic: print matches first if they have more submatches. // This is a heuristic: print matches first if they have more submatches.
// In general, this helps reduce the height of the final output by allowing // In general, this helps reduce the height of the final output by allowing
// for more rows that show the same rule matching in multiple places. // for more rows that show the same rule matching in multiple places.
@ -87,7 +87,7 @@ static void _explain_matches(match_node_t *firstmatch, int depth, const char *te
if (RIGHT_TYPE(m)) { if (RIGHT_TYPE(m)) {
// Instead of printing each subchain on its own line, flatten them all out at once: // Instead of printing each subchain on its own line, flatten them all out at once:
if (m->m->pat->type == BP_CHAIN) { if (m->m->pat->type == BP_CHAIN) {
match_t *tmp = m->m; bp_match_t *tmp = m->m;
while (tmp->pat->type == BP_CHAIN) { while (tmp->pat->type == BP_CHAIN) {
*nextchild = new(match_node_t); *nextchild = new(match_node_t);
(*nextchild)->m = tmp->children[0]; (*nextchild)->m = tmp->children[0];
@ -178,7 +178,7 @@ static void _explain_matches(match_node_t *firstmatch, int depth, const char *te
// //
// Print a visualization of a match object. // Print a visualization of a match object.
// //
public void explain_match(match_t *m) public void explain_match(bp_match_t *m)
{ {
printf("\033[?7l"); // Disable line wrapping printf("\033[?7l"); // Disable line wrapping
match_node_t first = {.m = m}; match_node_t first = {.m = m};
@ -196,7 +196,7 @@ static inline int fputc_safe(FILE *out, char c, print_options_t *opts)
return printed; return printed;
} }
public int fprint_match(FILE *out, const char *file_start, match_t *m, print_options_t *opts) public int fprint_match(FILE *out, const char *file_start, bp_match_t *m, print_options_t *opts)
{ {
int printed = 0; int printed = 0;
if (m->pat->type == BP_REPLACE) { if (m->pat->type == BP_REPLACE) {
@ -211,7 +211,7 @@ public int fprint_match(FILE *out, const char *file_start, match_t *m, print_opt
if (*r == '@' && r+1 < end && r[1] != '@') { if (*r == '@' && r+1 < end && r[1] != '@') {
const char *next = r+1; const char *next = r+1;
// Retrieve the capture value: // Retrieve the capture value:
match_t *cap = NULL; bp_match_t *cap = NULL;
if (isdigit(*next)) { if (isdigit(*next)) {
int n = (int)strtol(next, (char**)&next, 10); int n = (int)strtol(next, (char**)&next, 10);
cap = get_numbered_capture(m->children[0], n); cap = get_numbered_capture(m->children[0], n);
@ -256,7 +256,7 @@ public int fprint_match(FILE *out, const char *file_start, match_t *m, print_opt
if (opts && opts->match_color) printed += fprintf(out, "%s", opts->match_color); if (opts && opts->match_color) printed += fprintf(out, "%s", opts->match_color);
const char *prev = m->start; const char *prev = m->start;
for (int i = 0; m->children && m->children[i]; i++) { for (int i = 0; m->children && m->children[i]; i++) {
match_t *child = m->children[i]; bp_match_t *child = m->children[i];
// Skip children from e.g. zero-width matches like >@foo // Skip children from e.g. zero-width matches like >@foo
if (!(prev <= child->start && child->start <= m->end && if (!(prev <= child->start && child->start <= m->end &&
prev <= child->end && child->end <= m->end)) prev <= child->end && child->end <= m->end))

View File

@ -11,9 +11,9 @@ typedef struct {
void (*on_nl)(FILE *out); void (*on_nl)(FILE *out);
} print_options_t; } print_options_t;
__attribute__((nonnull(1,2,3))) __attribute__((nonnull(1,2,3)))
int fprint_match(FILE *out, const char *file_start, match_t *m, print_options_t *opts); int fprint_match(FILE *out, const char *file_start, bp_match_t *m, print_options_t *opts);
__attribute__((nonnull)) __attribute__((nonnull))
void explain_match(match_t *m); void explain_match(bp_match_t *m);
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0 // vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0