Rename 'pat_t' -> 'bp_pat_t'
This commit is contained in:
parent
1597b34a95
commit
f271863601
24
Lua/lbp.c
24
Lua/lbp.c
@ -28,7 +28,7 @@ static const char *builtins_source = (
|
||||
#include "builtins.h"
|
||||
);
|
||||
static int MATCH_METATABLE = 0, PAT_METATABLE = 0;
|
||||
static pat_t *builtins;
|
||||
static bp_pat_t *builtins;
|
||||
|
||||
static void push_match(lua_State *L, match_t *m, const char *start);
|
||||
|
||||
@ -60,7 +60,7 @@ static int Lcompile(lua_State *L)
|
||||
raise_parse_error(L, maybe_pat);
|
||||
return 0;
|
||||
}
|
||||
pat_t **pat_storage = (pat_t**)lua_newuserdatauv(L, sizeof(pat_t*), 1);
|
||||
bp_pat_t **pat_storage = (bp_pat_t**)lua_newuserdatauv(L, sizeof(bp_pat_t*), 1);
|
||||
*pat_storage = maybe_pat.value.pat;
|
||||
lua_pushvalue(L, 1);
|
||||
lua_setiuservalue(L, -2, 1);
|
||||
@ -85,7 +85,7 @@ static match_t *get_first_capture(match_t *m)
|
||||
{
|
||||
if (m->pat->type == BP_TAGGED) {
|
||||
return m;
|
||||
} else if (m->pat->type == BP_CAPTURE && !Match(m->pat, BP_CAPTURE)->name) {
|
||||
} else if (m->pat->type == BP_CAPTURE && !When(m->pat, BP_CAPTURE)->name) {
|
||||
return m;
|
||||
} else if (m->children) {
|
||||
for (int i = 0; m->children[i]; i++) {
|
||||
@ -101,7 +101,7 @@ static void set_capture_fields(lua_State *L, match_t *m, int *n, const char *sta
|
||||
if (m->pat->type == BP_CAPTURE) {
|
||||
match_t *cap = get_first_capture(m->children[0]);
|
||||
if (!cap) cap = m->children[0];
|
||||
auto capture = Match(m->pat, BP_CAPTURE);
|
||||
auto capture = When(m->pat, BP_CAPTURE);
|
||||
if (capture->namelen > 0) {
|
||||
lua_pushlstring(L, capture->name, capture->namelen);
|
||||
push_match(L, cap, start);
|
||||
@ -129,7 +129,7 @@ static void push_match(lua_State *L, match_t *m, const char *start)
|
||||
lua_seti(L, -2, 0);
|
||||
|
||||
if (m->pat->type == BP_TAGGED) {
|
||||
auto tagged = Match(m->pat, BP_TAGGED);
|
||||
auto tagged = When(m->pat, BP_TAGGED);
|
||||
lua_pushlstring(L, tagged->name, tagged->namelen);
|
||||
lua_setfield(L, -2, "__tag");
|
||||
}
|
||||
@ -151,8 +151,8 @@ static int Lmatch(lua_State *L)
|
||||
return 0;
|
||||
lua_replace(L, 1);
|
||||
}
|
||||
pat_t **at_pat = lua_touserdata(L, 1);
|
||||
pat_t *pat = at_pat ? *at_pat : NULL;
|
||||
bp_pat_t **at_pat = lua_touserdata(L, 1);
|
||||
bp_pat_t *pat = at_pat ? *at_pat : NULL;
|
||||
if (!pat) luaL_error(L, "Not a valid pattern");
|
||||
|
||||
size_t textlen;
|
||||
@ -190,8 +190,8 @@ static int Lreplace(lua_State *L)
|
||||
return 0;
|
||||
lua_replace(L, 1);
|
||||
}
|
||||
pat_t **at_pat = lua_touserdata(L, 1);
|
||||
pat_t *pat = at_pat ? *at_pat : NULL;
|
||||
bp_pat_t **at_pat = lua_touserdata(L, 1);
|
||||
bp_pat_t *pat = at_pat ? *at_pat : NULL;
|
||||
if (!pat) luaL_error(L, "Not a valid pattern");
|
||||
|
||||
size_t replen, textlen;
|
||||
@ -212,7 +212,7 @@ static int Lreplace(lua_State *L)
|
||||
FILE *out = open_memstream(&buf, &size);
|
||||
int replacements = 0;
|
||||
const char *prev = text;
|
||||
pat_t *rep_pat = maybe_replacement.value.pat;
|
||||
bp_pat_t *rep_pat = maybe_replacement.value.pat;
|
||||
cur_state = L;
|
||||
bp_errhand_t old = bp_set_error_handler(match_error);
|
||||
for (match_t *m = NULL; next_match(&m, text, &text[textlen], rep_pat, builtins, NULL, false); ) {
|
||||
@ -289,8 +289,8 @@ static int Lpat_tostring(lua_State *L)
|
||||
static int Lpat_gc(lua_State *L)
|
||||
{
|
||||
(void)L;
|
||||
pat_t **at_pat = lua_touserdata(L, 1);
|
||||
pat_t *pat = *at_pat;
|
||||
bp_pat_t **at_pat = lua_touserdata(L, 1);
|
||||
bp_pat_t *pat = *at_pat;
|
||||
if (pat) delete_pat(at_pat, true);
|
||||
(void)pat;
|
||||
return 0;
|
||||
|
28
bp.c
28
bp.c
@ -67,7 +67,7 @@ static struct {
|
||||
bool ignorecase, verbose, git_mode, print_filenames;
|
||||
enum { MODE_NORMAL, MODE_LISTFILES, MODE_INPLACE, MODE_JSON, MODE_EXPLAIN } mode;
|
||||
enum { FORMAT_AUTO, FORMAT_FANCY, FORMAT_PLAIN, FORMAT_BARE, FORMAT_FILE_LINE } format;
|
||||
pat_t *skip;
|
||||
bp_pat_t *skip;
|
||||
} options = {
|
||||
.context_before = USE_DEFAULT_CONTEXT,
|
||||
.context_after = USE_DEFAULT_CONTEXT,
|
||||
@ -103,7 +103,7 @@ static inline void fprint_filename(FILE *out, const char *filename)
|
||||
//
|
||||
// If there was a parse error while building a pattern, print an error message and exit.
|
||||
//
|
||||
static inline pat_t *assert_pat(const char *start, const char *end, maybe_pat_t maybe_pat)
|
||||
static inline bp_pat_t *assert_pat(const char *start, const char *end, maybe_pat_t maybe_pat)
|
||||
{
|
||||
if (!end) end = start + strlen(start);
|
||||
if (!maybe_pat.success) {
|
||||
@ -208,7 +208,7 @@ static int is_text_file(const char *filename)
|
||||
//
|
||||
// Print matches in JSON format.
|
||||
//
|
||||
static int print_matches_as_json(file_t *f, pat_t *pattern, pat_t *defs)
|
||||
static int print_matches_as_json(file_t *f, bp_pat_t *pattern, bp_pat_t *defs)
|
||||
{
|
||||
int nmatches = 0;
|
||||
for (match_t *m = NULL; next_match(&m, f->start, f->end, pattern, defs, options.skip, options.ignorecase); ) {
|
||||
@ -224,7 +224,7 @@ static int print_matches_as_json(file_t *f, pat_t *pattern, pat_t *defs)
|
||||
//
|
||||
// Print matches in a visual explanation style
|
||||
//
|
||||
static int explain_matches(file_t *f, pat_t *pattern, pat_t *defs)
|
||||
static int explain_matches(file_t *f, bp_pat_t *pattern, bp_pat_t *defs)
|
||||
{
|
||||
int nmatches = 0;
|
||||
for (match_t *m = NULL; next_match(&m, f->start, f->end, pattern, defs, options.skip, options.ignorecase); ) {
|
||||
@ -355,7 +355,7 @@ static void on_nl(FILE *out)
|
||||
//
|
||||
// Print all the matches in a file.
|
||||
//
|
||||
static int print_matches(FILE *out, file_t *f, pat_t *pattern, pat_t *defs)
|
||||
static int print_matches(FILE *out, file_t *f, bp_pat_t *pattern, bp_pat_t *defs)
|
||||
{
|
||||
static int printed_filenames = 0;
|
||||
int matches = 0;
|
||||
@ -400,7 +400,7 @@ static int print_matches(FILE *out, file_t *f, pat_t *pattern, pat_t *defs)
|
||||
// against it, printing any results according to the flags.
|
||||
//
|
||||
__attribute__((nonnull))
|
||||
static int process_file(const char *filename, pat_t *pattern, pat_t *defs)
|
||||
static int process_file(const char *filename, bp_pat_t *pattern, bp_pat_t *defs)
|
||||
{
|
||||
file_t *f = load_file(NULL, filename);
|
||||
if (f == NULL) {
|
||||
@ -458,7 +458,7 @@ static int process_file(const char *filename, pat_t *pattern, pat_t *defs)
|
||||
// Recursively process all non-dotfile files in the given directory.
|
||||
//
|
||||
__attribute__((nonnull))
|
||||
static int process_dir(const char *dirname, pat_t *pattern, pat_t *defs)
|
||||
static int process_dir(const char *dirname, bp_pat_t *pattern, bp_pat_t *defs)
|
||||
{
|
||||
int matches = 0;
|
||||
glob_t globbuf;
|
||||
@ -488,7 +488,7 @@ static int process_dir(const char *dirname, pat_t *pattern, pat_t *defs)
|
||||
// Process git files using `git ls-files ...`
|
||||
//
|
||||
__attribute__((nonnull(1)))
|
||||
static int process_git_files(pat_t *pattern, pat_t *defs, int argc, char *argv[])
|
||||
static int process_git_files(bp_pat_t *pattern, bp_pat_t *defs, int argc, char *argv[])
|
||||
{
|
||||
int fds[2];
|
||||
require(pipe(fds), "Failed to create pipe");
|
||||
@ -525,7 +525,7 @@ static int process_git_files(pat_t *pattern, pat_t *defs, int argc, char *argv[]
|
||||
// Load the given grammar (semicolon-separated definitions)
|
||||
// and return the first rule defined.
|
||||
//
|
||||
static pat_t *load_grammar(pat_t *defs, file_t *f)
|
||||
static bp_pat_t *load_grammar(bp_pat_t *defs, file_t *f)
|
||||
{
|
||||
return chain_together(defs, assert_pat(f->start, f->end, bp_pattern(f->start, f->end)));
|
||||
}
|
||||
@ -562,9 +562,9 @@ int main(int argc, char *argv[])
|
||||
if (set_pattern_printf_specifier('P'))
|
||||
errx(1, "Couldn't set printf specifier");
|
||||
|
||||
pat_t *defs = NULL;
|
||||
bp_pat_t *defs = NULL;
|
||||
file_t *loaded_files = NULL;
|
||||
pat_t *pattern = NULL;
|
||||
bp_pat_t *pattern = NULL;
|
||||
|
||||
// Load builtins:
|
||||
file_t *builtins_file = load_file(&loaded_files, "/etc/"BP_NAME"/builtins.bp");
|
||||
@ -627,10 +627,10 @@ int main(int argc, char *argv[])
|
||||
if (!explicit_case_sensitivity)
|
||||
options.ignorecase = !any_uppercase(flag);
|
||||
delete(&flag);
|
||||
pat_t *p = assert_pat(arg_file->start, arg_file->end, bp_stringpattern(arg_file->start, arg_file->end));
|
||||
bp_pat_t *p = assert_pat(arg_file->start, arg_file->end, bp_stringpattern(arg_file->start, arg_file->end));
|
||||
pattern = chain_together(pattern, p);
|
||||
} else if (FLAG("-s") || FLAG("--skip")) {
|
||||
pat_t *s = assert_pat(flag, NULL, bp_pattern(flag, flag+strlen(flag)));
|
||||
bp_pat_t *s = assert_pat(flag, NULL, bp_pattern(flag, flag+strlen(flag)));
|
||||
options.skip = either_pat(options.skip, s);
|
||||
} else if (FLAG("-C") || FLAG("--context")) {
|
||||
options.context_before = options.context_after = context_from_flag(flag);
|
||||
@ -651,7 +651,7 @@ int main(int argc, char *argv[])
|
||||
errx(EXIT_FAILURE, "Unrecognized flag: -%c\n\n%s", argv[0][1], usage);
|
||||
} else if (argv[0][0] != '-') {
|
||||
if (pattern != NULL) break;
|
||||
pat_t *p = assert_pat(argv[0], NULL, bp_stringpattern(argv[0], argv[0]+strlen(argv[0])));
|
||||
bp_pat_t *p = assert_pat(argv[0], NULL, bp_stringpattern(argv[0], argv[0]+strlen(argv[0])));
|
||||
if (!explicit_case_sensitivity)
|
||||
options.ignorecase = !any_uppercase(argv[0]);
|
||||
pattern = chain_together(pattern, p);
|
||||
|
64
match.c
64
match.c
@ -20,7 +20,7 @@
|
||||
|
||||
// Cache entries for results of matching a pattern at a string position
|
||||
typedef struct cache_entry_s {
|
||||
pat_t *pat;
|
||||
bp_pat_t *pat;
|
||||
const char *start;
|
||||
// Cache entries use a chained scatter approach modeled after Lua's tables
|
||||
struct cache_entry_s *next_probe;
|
||||
@ -35,7 +35,7 @@ typedef struct {
|
||||
// Data structure for holding ambient state values during matching
|
||||
typedef struct match_ctx_s {
|
||||
struct match_ctx_s *parent_ctx;
|
||||
pat_t *defs;
|
||||
bp_pat_t *defs;
|
||||
cache_t *cache;
|
||||
const char *start, *end;
|
||||
jmp_buf error_jump;
|
||||
@ -67,9 +67,9 @@ public bp_errhand_t bp_set_error_handler(bp_errhand_t new_handler)
|
||||
#define MATCHES(...) (match_t*[]){__VA_ARGS__, NULL}
|
||||
|
||||
__attribute__((hot, nonnull(1,2,3)))
|
||||
static match_t *match(match_ctx_t *ctx, const char *str, pat_t *pat);
|
||||
static match_t *match(match_ctx_t *ctx, const char *str, bp_pat_t *pat);
|
||||
__attribute__((returns_nonnull))
|
||||
static match_t *new_match(pat_t *pat, const char *start, const char *end, match_t *children[]);
|
||||
static match_t *new_match(bp_pat_t *pat, const char *start, const char *end, match_t *children[]);
|
||||
|
||||
char *error_message = NULL;
|
||||
|
||||
@ -138,7 +138,7 @@ static inline size_t hash(const char *str, size_t pat_id)
|
||||
//
|
||||
// Check if we have cached a failure to match a given pattern at the given position.
|
||||
//
|
||||
static bool has_cached_failure(match_ctx_t *ctx, const char *str, pat_t *pat)
|
||||
static bool has_cached_failure(match_ctx_t *ctx, const char *str, bp_pat_t *pat)
|
||||
{
|
||||
if (!ctx->cache->fails) return false;
|
||||
for (cache_entry_t *fail = &ctx->cache->fails[hash(str, pat->id) & (ctx->cache->size-1)]; fail; fail = fail->next_probe) {
|
||||
@ -151,7 +151,7 @@ static bool has_cached_failure(match_ctx_t *ctx, const char *str, pat_t *pat)
|
||||
//
|
||||
// Insert into the hash table using a chained scatter table approach.
|
||||
//
|
||||
static void _hash_insert(cache_t *cache, const char *str, pat_t *pat)
|
||||
static void _hash_insert(cache_t *cache, const char *str, bp_pat_t *pat)
|
||||
{
|
||||
size_t h = hash(str, pat->id) & (cache->size-1);
|
||||
if (cache->fails[h].pat == NULL) { // No collision
|
||||
@ -187,7 +187,7 @@ static void _hash_insert(cache_t *cache, const char *str, pat_t *pat)
|
||||
//
|
||||
// Save a match in the cache.
|
||||
//
|
||||
static void cache_failure(match_ctx_t *ctx, const char *str, pat_t *pat)
|
||||
static void cache_failure(match_ctx_t *ctx, const char *str, bp_pat_t *pat)
|
||||
{
|
||||
cache_t *cache = ctx->cache;
|
||||
// Grow the hash if needed (>99% utilization):
|
||||
@ -223,12 +223,12 @@ void cache_destroy(match_ctx_t *ctx)
|
||||
// Look up a pattern definition by name from a definition pattern.
|
||||
//
|
||||
__attribute__((nonnull(2)))
|
||||
static pat_t *_lookup_def(match_ctx_t *ctx, pat_t *defs, const char *name, size_t namelen)
|
||||
static bp_pat_t *_lookup_def(match_ctx_t *ctx, bp_pat_t *defs, const char *name, size_t namelen)
|
||||
{
|
||||
while (defs) {
|
||||
if (defs->type == BP_CHAIN) {
|
||||
auto chain = When(defs, BP_CHAIN);
|
||||
pat_t *second = _lookup_def(ctx, chain->second, name, namelen);
|
||||
bp_pat_t *second = _lookup_def(ctx, chain->second, name, namelen);
|
||||
if (second) return second;
|
||||
defs = chain->first;
|
||||
} else if (defs->type == BP_DEFINITIONS) {
|
||||
@ -248,10 +248,10 @@ static pat_t *_lookup_def(match_ctx_t *ctx, pat_t *defs, const char *name, size_
|
||||
// Look up a pattern definition by name from a context.
|
||||
//
|
||||
__attribute__((nonnull))
|
||||
pat_t *lookup_ctx(match_ctx_t *ctx, const char *name, size_t namelen)
|
||||
bp_pat_t *lookup_ctx(match_ctx_t *ctx, const char *name, size_t namelen)
|
||||
{
|
||||
for (; ctx; ctx = ctx->parent_ctx) {
|
||||
pat_t *def = _lookup_def(ctx, ctx->defs, name, namelen);
|
||||
bp_pat_t *def = _lookup_def(ctx, ctx->defs, name, namelen);
|
||||
if (def) return def;
|
||||
}
|
||||
return NULL;
|
||||
@ -262,11 +262,11 @@ pat_t *lookup_ctx(match_ctx_t *ctx, const char *name, size_t namelen)
|
||||
// pattern. This is used for an optimization to avoid repeated lookups.
|
||||
//
|
||||
__attribute__((nonnull(1)))
|
||||
static inline pat_t *deref(match_ctx_t *ctx, pat_t *pat)
|
||||
static inline bp_pat_t *deref(match_ctx_t *ctx, bp_pat_t *pat)
|
||||
{
|
||||
if (pat && pat->type == BP_REF) {
|
||||
auto ref = When(pat, BP_REF);
|
||||
pat_t *def = lookup_ctx(ctx, ref->name, ref->len);
|
||||
bp_pat_t *def = lookup_ctx(ctx, ref->name, ref->len);
|
||||
if (def) return def;
|
||||
}
|
||||
return pat;
|
||||
@ -277,10 +277,10 @@ static inline pat_t *deref(match_ctx_t *ctx, pat_t *pat)
|
||||
// match for the whole pattern to match (if any). Ideally, this would be a
|
||||
// string literal that can be quickly scanned for.
|
||||
//
|
||||
static pat_t *get_prerequisite(match_ctx_t *ctx, pat_t *pat)
|
||||
static bp_pat_t *get_prerequisite(match_ctx_t *ctx, bp_pat_t *pat)
|
||||
{
|
||||
int derefs = 0;
|
||||
for (pat_t *p = pat; p; ) {
|
||||
for (bp_pat_t *p = pat; p; ) {
|
||||
switch (p->type) {
|
||||
case BP_BEFORE:
|
||||
p = When(p, BP_BEFORE)->pat; break;
|
||||
@ -306,7 +306,7 @@ static pat_t *get_prerequisite(match_ctx_t *ctx, pat_t *pat)
|
||||
p = When(p, BP_REPLACE)->pat; break;
|
||||
case BP_REF: {
|
||||
if (++derefs > 10) return p; // In case of left recursion
|
||||
pat_t *p2 = deref(ctx, p);
|
||||
bp_pat_t *p2 = deref(ctx, p);
|
||||
if (p2 == p) return p2;
|
||||
p = p2;
|
||||
break;
|
||||
@ -321,12 +321,12 @@ static pat_t *get_prerequisite(match_ctx_t *ctx, pat_t *pat)
|
||||
// Find the next match after prev (or the first match if prev is NULL)
|
||||
//
|
||||
__attribute__((nonnull(1,2,3)))
|
||||
static match_t *_next_match(match_ctx_t *ctx, const char *str, pat_t *pat, pat_t *skip)
|
||||
static match_t *_next_match(match_ctx_t *ctx, const char *str, bp_pat_t *pat, bp_pat_t *skip)
|
||||
{
|
||||
// Clear the cache so it's not full of old cache values from different parts of the file:
|
||||
cache_destroy(ctx);
|
||||
|
||||
pat_t *first = get_prerequisite(ctx, pat);
|
||||
bp_pat_t *first = get_prerequisite(ctx, pat);
|
||||
|
||||
// Don't bother looping if this can only match at the start/end:
|
||||
if (first->type == BP_START_OF_FILE)
|
||||
@ -364,7 +364,7 @@ static match_t *_next_match(match_ctx_t *ctx, const char *str, pat_t *pat, pat_t
|
||||
// match object, or NULL if no match is found.
|
||||
// The returned value should be free()'d to avoid memory leaking.
|
||||
//
|
||||
static match_t *match(match_ctx_t *ctx, const char *str, pat_t *pat)
|
||||
static match_t *match(match_ctx_t *ctx, const char *str, bp_pat_t *pat)
|
||||
{
|
||||
switch (pat->type) {
|
||||
case BP_DEFINITIONS: {
|
||||
@ -438,7 +438,7 @@ static match_t *match(match_ctx_t *ctx, const char *str, pat_t *pat)
|
||||
}
|
||||
case BP_UPTO: case BP_UPTO_STRICT: {
|
||||
match_t *m = new_match(pat, str, str, NULL);
|
||||
pat_t *target = deref(ctx, pat->type == BP_UPTO ? When(pat, BP_UPTO)->target : When(pat, BP_UPTO_STRICT)->target),
|
||||
bp_pat_t *target = deref(ctx, pat->type == BP_UPTO ? When(pat, BP_UPTO)->target : When(pat, BP_UPTO_STRICT)->target),
|
||||
*skip = deref(ctx, pat->type == BP_UPTO ? When(pat, BP_UPTO)->skip : When(pat, BP_UPTO_STRICT)->skip);
|
||||
if (!target && !skip) {
|
||||
while (str < ctx->end && *str != '\n') ++str;
|
||||
@ -485,8 +485,8 @@ static match_t *match(match_ctx_t *ctx, const char *str, pat_t *pat)
|
||||
match_t *m = new_match(pat, str, str, NULL);
|
||||
size_t reps = 0;
|
||||
auto repeat = When(pat, BP_REPEAT);
|
||||
pat_t *repeating = deref(ctx, repeat->repeat_pat);
|
||||
pat_t *sep = deref(ctx, repeat->sep);
|
||||
bp_pat_t *repeating = deref(ctx, repeat->repeat_pat);
|
||||
bp_pat_t *sep = deref(ctx, repeat->sep);
|
||||
size_t child_cap = 0, nchildren = 0;
|
||||
for (reps = 0; repeat->max == -1 || reps < (size_t)repeat->max; ++reps) {
|
||||
const char *start = str;
|
||||
@ -542,7 +542,7 @@ static match_t *match(match_ctx_t *ctx, const char *str, pat_t *pat)
|
||||
return m;
|
||||
}
|
||||
case BP_AFTER: {
|
||||
pat_t *back = deref(ctx, When(pat, BP_AFTER)->pat);
|
||||
bp_pat_t *back = deref(ctx, When(pat, BP_AFTER)->pat);
|
||||
if (!back) return NULL;
|
||||
|
||||
// We only care about the region from the backtrack pos up to the
|
||||
@ -579,7 +579,7 @@ static match_t *match(match_ctx_t *ctx, const char *str, pat_t *pat)
|
||||
return after ? new_match(pat, str, str, MATCHES(after)) : NULL;
|
||||
}
|
||||
case BP_CAPTURE: case BP_TAGGED: {
|
||||
pat_t *to_match = pat->type == BP_CAPTURE ? When(pat, BP_CAPTURE)->pat : When(pat, BP_TAGGED)->pat;
|
||||
bp_pat_t *to_match = pat->type == BP_CAPTURE ? When(pat, BP_CAPTURE)->pat : When(pat, BP_TAGGED)->pat;
|
||||
if (!to_match)
|
||||
return new_match(pat, str, str, NULL);
|
||||
match_t *p = match(ctx, str, to_match);
|
||||
@ -609,7 +609,7 @@ static match_t *match(match_ctx_t *ctx, const char *str, pat_t *pat)
|
||||
if (m1->pat->type == BP_CAPTURE && When(m1->pat, BP_CAPTURE)->name && When(m1->pat, BP_CAPTURE)->backreffable) {
|
||||
// Temporarily add a rule that the backref name matches the
|
||||
// exact string of the original match (no replacements)
|
||||
pat_t *backref;
|
||||
bp_pat_t *backref;
|
||||
if (m1->children && m1->children[0]->pat->type == BP_CURDENT) {
|
||||
const char *linestart = m1->start;
|
||||
while (linestart > ctx->start && linestart[-1] != '\n') --linestart;
|
||||
@ -628,7 +628,7 @@ static match_t *match(match_ctx_t *ctx, const char *str, pat_t *pat)
|
||||
match_ctx_t ctx2 = *ctx;
|
||||
ctx2.cache = &(cache_t){0};
|
||||
ctx2.parent_ctx = ctx;
|
||||
ctx2.defs = &(pat_t){
|
||||
ctx2.defs = &(bp_pat_t){
|
||||
.type = BP_DEFINITIONS,
|
||||
.start = m1->pat->start, .end = m1->pat->end,
|
||||
.__tagged.BP_DEFINITIONS = {
|
||||
@ -653,7 +653,7 @@ static match_t *match(match_ctx_t *ctx, const char *str, pat_t *pat)
|
||||
return new_match(pat, str, m2->end, MATCHES(m1, m2));
|
||||
}
|
||||
case BP_MATCH: case BP_NOT_MATCH: {
|
||||
pat_t *target = pat->type == BP_MATCH ? When(pat, BP_MATCH)->pat : When(pat, BP_NOT_MATCH)->pat;
|
||||
bp_pat_t *target = pat->type == BP_MATCH ? When(pat, BP_MATCH)->pat : When(pat, BP_NOT_MATCH)->pat;
|
||||
match_t *m1 = match(ctx, str, target);
|
||||
if (m1 == NULL) return NULL;
|
||||
|
||||
@ -692,7 +692,7 @@ static match_t *match(match_ctx_t *ctx, const char *str, pat_t *pat)
|
||||
return NULL;
|
||||
|
||||
auto ref_pat = When(pat, BP_REF);
|
||||
pat_t *ref = lookup_ctx(ctx, ref_pat->name, ref_pat->len);
|
||||
bp_pat_t *ref = lookup_ctx(ctx, ref_pat->name, ref_pat->len);
|
||||
if (ref == NULL) {
|
||||
match_error(ctx, "Unknown pattern: '%.*s'", (int)ref_pat->len, ref_pat->name);
|
||||
return NULL;
|
||||
@ -701,7 +701,7 @@ static match_t *match(match_ctx_t *ctx, const char *str, pat_t *pat)
|
||||
if (ref->type == BP_LEFTRECURSION)
|
||||
return match(ctx, str, ref);
|
||||
|
||||
pat_t rec_op = {
|
||||
bp_pat_t rec_op = {
|
||||
.type = BP_LEFTRECURSION,
|
||||
.start = ref->start, .end = ref->end,
|
||||
.min_matchlen = 0, .max_matchlen = -1,
|
||||
@ -715,7 +715,7 @@ static match_t *match(match_ctx_t *ctx, const char *str, pat_t *pat)
|
||||
};
|
||||
match_ctx_t ctx2 = *ctx;
|
||||
ctx2.parent_ctx = ctx;
|
||||
ctx2.defs = &(pat_t){
|
||||
ctx2.defs = &(bp_pat_t){
|
||||
.type = BP_DEFINITIONS,
|
||||
.start = pat->start, .end = pat->end,
|
||||
.__tagged.BP_DEFINITIONS = {
|
||||
@ -788,7 +788,7 @@ static match_t *match(match_ctx_t *ctx, const char *str, pat_t *pat)
|
||||
//
|
||||
// Return a match object which can be used (may be allocated or recycled).
|
||||
//
|
||||
match_t *new_match(pat_t *pat, const char *start, const char *end, match_t *children[])
|
||||
match_t *new_match(bp_pat_t *pat, const char *start, const char *end, match_t *children[])
|
||||
{
|
||||
match_t *m;
|
||||
if (unused_matches) {
|
||||
@ -866,7 +866,7 @@ public size_t free_all_matches(void)
|
||||
// Iterate over matches.
|
||||
// Usage: for (match_t *m = NULL; next_match(&m, ...); ) {...}
|
||||
//
|
||||
public bool next_match(match_t **m, const char *start, const char *end, pat_t *pat, pat_t *defs, pat_t *skip, bool ignorecase)
|
||||
public bool next_match(match_t **m, const char *start, const char *end, bp_pat_t *pat, bp_pat_t *defs, bp_pat_t *skip, bool ignorecase)
|
||||
{
|
||||
const char *pos;
|
||||
if (*m) {
|
||||
|
4
match.h
4
match.h
@ -15,7 +15,7 @@
|
||||
typedef struct match_s {
|
||||
// Where the match starts and ends (end is after the last character)
|
||||
const char *start, *end;
|
||||
pat_t *pat;
|
||||
bp_pat_t *pat;
|
||||
// Intrusive linked list node for garbage collection:
|
||||
struct {
|
||||
struct match_s **home, *next;
|
||||
@ -30,7 +30,7 @@ __attribute__((nonnull))
|
||||
void recycle_match(match_t **at_m);
|
||||
size_t free_all_matches(void);
|
||||
size_t recycle_all_matches(void);
|
||||
bool next_match(match_t **m, const char *start, const char *end, pat_t *pat, pat_t *defs, pat_t *skip, bool ignorecase);
|
||||
bool next_match(match_t **m, const char *start, const char *end, bp_pat_t *pat, bp_pat_t *defs, bp_pat_t *skip, bool ignorecase);
|
||||
#define stop_matching(m) next_match(m, NULL, NULL, NULL, NULL, NULL, 0)
|
||||
bp_errhand_t bp_set_error_handler(bp_errhand_t handler);
|
||||
__attribute__((nonnull))
|
||||
|
110
pattern.c
110
pattern.c
@ -15,16 +15,16 @@
|
||||
#include "utils.h"
|
||||
#include "utf8.h"
|
||||
|
||||
#define Pattern(_tag, _start, _end, _min, _max, ...) allocate_pat((pat_t){.type=_tag, .start=_start, .end=_end, \
|
||||
#define Pattern(_tag, _start, _end, _min, _max, ...) allocate_pat((bp_pat_t){.type=_tag, .start=_start, .end=_end, \
|
||||
.min_matchlen=_min, .max_matchlen=_max, .__tagged._tag={__VA_ARGS__}})
|
||||
#define UNBOUNDED(pat) ((pat)->max_matchlen == -1)
|
||||
|
||||
static pat_t *allocated_pats = NULL;
|
||||
static bp_pat_t *allocated_pats = NULL;
|
||||
|
||||
__attribute__((nonnull))
|
||||
static pat_t *bp_pattern_nl(const char *str, const char *end, bool allow_nl);
|
||||
static bp_pat_t *bp_pattern_nl(const char *str, const char *end, bool allow_nl);
|
||||
__attribute__((nonnull))
|
||||
static pat_t *bp_simplepattern(const char *str, const char *end);
|
||||
static bp_pat_t *bp_simplepattern(const char *str, const char *end);
|
||||
|
||||
// For error-handling purposes, use setjmp/longjmp to break out of deeply
|
||||
// recursive function calls when a parse error occurs.
|
||||
@ -52,10 +52,10 @@ static inline void parse_err(const char *start, const char *end, const char *msg
|
||||
// Allocate a new pattern for this file (ensuring it will be automatically
|
||||
// freed when the file is freed)
|
||||
//
|
||||
public pat_t *allocate_pat(pat_t pat)
|
||||
public bp_pat_t *allocate_pat(bp_pat_t pat)
|
||||
{
|
||||
static size_t next_pat_id = 1;
|
||||
pat_t *allocated = new(pat_t);
|
||||
bp_pat_t *allocated = new(bp_pat_t);
|
||||
*allocated = pat;
|
||||
allocated->home = &allocated_pats;
|
||||
allocated->next = allocated_pats;
|
||||
@ -69,7 +69,7 @@ public pat_t *allocate_pat(pat_t pat)
|
||||
// Helper function to initialize a range object.
|
||||
//
|
||||
__attribute__((nonnull(1,2,5)))
|
||||
static pat_t *new_range(const char *start, const char *end, size_t min, ssize_t max, pat_t *repeating, pat_t *sep)
|
||||
static bp_pat_t *new_range(const char *start, const char *end, size_t min, ssize_t max, bp_pat_t *repeating, bp_pat_t *sep)
|
||||
{
|
||||
size_t minlen = min*repeating->min_matchlen + (min > 0 ? min-1 : 0)*(sep ? sep->min_matchlen : 0);
|
||||
ssize_t maxlen = (max == -1 || UNBOUNDED(repeating) || (max != 0 && max != 1 && sep && UNBOUNDED(sep))) ? (ssize_t)-1
|
||||
@ -83,10 +83,10 @@ static pat_t *new_range(const char *start, const char *end, size_t min, ssize_t
|
||||
// any patterns (e.g. "`x `y"), otherwise return the original input.
|
||||
//
|
||||
__attribute__((nonnull))
|
||||
static pat_t *expand_chain(pat_t *first, const char *end, bool allow_nl)
|
||||
static bp_pat_t *expand_chain(bp_pat_t *first, const char *end, bool allow_nl)
|
||||
{
|
||||
const char *str = after_spaces(first->end, allow_nl, end);
|
||||
pat_t *second = bp_simplepattern(str, end);
|
||||
bp_pat_t *second = bp_simplepattern(str, end);
|
||||
if (second == NULL) return first;
|
||||
second = expand_chain(second, end, allow_nl);
|
||||
return chain_together(first, second);
|
||||
@ -96,7 +96,7 @@ static pat_t *expand_chain(pat_t *first, const char *end, bool allow_nl)
|
||||
// Match trailing => replacements (with optional pattern beforehand)
|
||||
//
|
||||
__attribute__((nonnull))
|
||||
static pat_t *expand_replacements(pat_t *replace_pat, const char *end, bool allow_nl)
|
||||
static bp_pat_t *expand_replacements(bp_pat_t *replace_pat, const char *end, bool allow_nl)
|
||||
{
|
||||
const char *str = replace_pat->end;
|
||||
while (matchstr(&str, "=>", allow_nl, end)) {
|
||||
@ -134,14 +134,14 @@ static pat_t *expand_replacements(pat_t *replace_pat, const char *end, bool allo
|
||||
// "`x/`y"), otherwise return the original input.
|
||||
//
|
||||
__attribute__((nonnull))
|
||||
static pat_t *expand_choices(pat_t *first, const char *end, bool allow_nl)
|
||||
static bp_pat_t *expand_choices(bp_pat_t *first, const char *end, bool allow_nl)
|
||||
{
|
||||
first = expand_chain(first, end, allow_nl);
|
||||
first = expand_replacements(first, end, allow_nl);
|
||||
const char *str = first->end;
|
||||
if (!matchchar(&str, '/', allow_nl, end)) return first;
|
||||
str = after_spaces(str, allow_nl, end);
|
||||
pat_t *second = bp_simplepattern(str, end);
|
||||
bp_pat_t *second = bp_simplepattern(str, end);
|
||||
if (second) str = second->end;
|
||||
if (matchstr(&str, "=>", allow_nl, end))
|
||||
second = expand_replacements(second ? second : Pattern(BP_STRING, str-2, str-2, 0, 0), end, allow_nl);
|
||||
@ -155,7 +155,7 @@ static pat_t *expand_choices(pat_t *first, const char *end, bool allow_nl)
|
||||
// Given two patterns, return a new pattern for the first pattern followed by
|
||||
// the second. If either pattern is NULL, return the other.
|
||||
//
|
||||
public pat_t *chain_together(pat_t *first, pat_t *second)
|
||||
public bp_pat_t *chain_together(bp_pat_t *first, bp_pat_t *second)
|
||||
{
|
||||
if (first == NULL) return second;
|
||||
if (second == NULL) return first;
|
||||
@ -173,7 +173,7 @@ public pat_t *chain_together(pat_t *first, pat_t *second)
|
||||
// Given two patterns, return a new pattern for matching either the first
|
||||
// pattern or the second. If either pattern is NULL, return the other.
|
||||
//
|
||||
public pat_t *either_pat(pat_t *first, pat_t *second)
|
||||
public bp_pat_t *either_pat(bp_pat_t *first, bp_pat_t *second)
|
||||
{
|
||||
if (first == NULL) return second;
|
||||
if (second == NULL) return first;
|
||||
@ -187,14 +187,14 @@ public pat_t *either_pat(pat_t *first, pat_t *second)
|
||||
// Parse a definition
|
||||
//
|
||||
__attribute__((nonnull))
|
||||
static pat_t *_bp_definition(const char *start, const char *end)
|
||||
static bp_pat_t *_bp_definition(const char *start, const char *end)
|
||||
{
|
||||
if (start >= end || !(isalpha(*start) || *start == '_')) return NULL;
|
||||
const char *str = after_name(start, end);
|
||||
size_t namelen = (size_t)(str - start);
|
||||
if (!matchchar(&str, ':', false, end)) return NULL;
|
||||
bool is_tagged = str < end && *str == ':' && matchchar(&str, ':', false, end);
|
||||
pat_t *def = bp_pattern_nl(str, end, false);
|
||||
bp_pat_t *def = bp_pattern_nl(str, end, false);
|
||||
if (!def) parse_err(str, end, "Could not parse this definition.");
|
||||
str = def->end;
|
||||
(void)matchchar(&str, ';', false, end); // Optional semicolon
|
||||
@ -202,7 +202,7 @@ static pat_t *_bp_definition(const char *start, const char *end)
|
||||
def = Pattern(BP_TAGGED, def->start, def->end, def->min_matchlen, def->max_matchlen,
|
||||
.pat=def, .name=start, .namelen=namelen);
|
||||
}
|
||||
pat_t *next_def = _bp_definition(after_spaces(str, true, end), end);
|
||||
bp_pat_t *next_def = _bp_definition(after_spaces(str, true, end), end);
|
||||
return Pattern(BP_DEFINITIONS, start, next_def ? next_def->end : str, 0, -1,
|
||||
.name=start, .namelen=namelen, .meaning=def, .next_def=next_def);
|
||||
}
|
||||
@ -211,7 +211,7 @@ static pat_t *_bp_definition(const char *start, const char *end)
|
||||
// Compile a string of BP code into a BP pattern object.
|
||||
//
|
||||
__attribute__((nonnull))
|
||||
static pat_t *_bp_simplepattern(const char *str, const char *end, bool inside_stringpattern)
|
||||
static bp_pat_t *_bp_simplepattern(const char *str, const char *end, bool inside_stringpattern)
|
||||
{
|
||||
str = after_spaces(str, false, end);
|
||||
if (!*str) return NULL;
|
||||
@ -223,8 +223,8 @@ static pat_t *_bp_simplepattern(const char *str, const char *end, bool inside_st
|
||||
case '.': {
|
||||
if (*str == '.') { // ".."
|
||||
str = next_char(str, end);
|
||||
enum pattype_e type = BP_UPTO;
|
||||
pat_t *extra_arg = NULL;
|
||||
enum bp_pattype_e type = BP_UPTO;
|
||||
bp_pat_t *extra_arg = NULL;
|
||||
if (matchchar(&str, '%', false, end)) {
|
||||
extra_arg = bp_simplepattern(str, end);
|
||||
if (extra_arg)
|
||||
@ -239,7 +239,7 @@ static pat_t *_bp_simplepattern(const char *str, const char *end, bool inside_st
|
||||
parse_err(str, str, "There should be a pattern here after the '='");
|
||||
type = BP_UPTO_STRICT;
|
||||
}
|
||||
pat_t *target;
|
||||
bp_pat_t *target;
|
||||
if (inside_stringpattern) {
|
||||
target = NULL;
|
||||
} else {
|
||||
@ -254,7 +254,7 @@ static pat_t *_bp_simplepattern(const char *str, const char *end, bool inside_st
|
||||
}
|
||||
// Char literals
|
||||
case '`': {
|
||||
pat_t *all = NULL;
|
||||
bp_pat_t *all = NULL;
|
||||
do { // Comma-separated items:
|
||||
if (str >= end || !*str || *str == '\n')
|
||||
parse_err(str, str, "There should be a character here after the '`'");
|
||||
@ -274,11 +274,11 @@ static pat_t *_bp_simplepattern(const char *str, const char *end, bool inside_st
|
||||
c2 = tmp;
|
||||
}
|
||||
str = next_char(c2_loc, end);
|
||||
pat_t *pat = Pattern(BP_RANGE, start == c1_loc - 1 ? start : c1_loc, str, 1, 1, .low=c1, .high=c2);
|
||||
bp_pat_t *pat = Pattern(BP_RANGE, start == c1_loc - 1 ? start : c1_loc, str, 1, 1, .low=c1, .high=c2);
|
||||
all = either_pat(all, pat);
|
||||
} else {
|
||||
size_t len = (size_t)(str - c1_loc);
|
||||
pat_t *pat = Pattern(BP_STRING, start, str, len, (ssize_t)len, .string=strndup(c1_loc, len));
|
||||
bp_pat_t *pat = Pattern(BP_STRING, start, str, len, (ssize_t)len, .string=strndup(c1_loc, len));
|
||||
all = either_pat(all, pat);
|
||||
}
|
||||
} while (*str++ == ',');
|
||||
@ -290,7 +290,7 @@ static pat_t *_bp_simplepattern(const char *str, const char *end, bool inside_st
|
||||
if (!*str || *str == '\n')
|
||||
parse_err(str, str, "There should be an escape sequence here after this backslash.");
|
||||
|
||||
pat_t *all = NULL;
|
||||
bp_pat_t *all = NULL;
|
||||
do { // Comma-separated items:
|
||||
const char *itemstart = str-1;
|
||||
if (*str == 'N') { // \N (nodent)
|
||||
@ -326,7 +326,7 @@ static pat_t *_bp_simplepattern(const char *str, const char *end, bool inside_st
|
||||
if (e_high < e_low)
|
||||
parse_err(start, str, "Escape ranges should be low-to-high, but this is high-to-low.");
|
||||
}
|
||||
pat_t *esc = Pattern(BP_RANGE, start, str, 1, 1, .low=e_low, .high=e_high);
|
||||
bp_pat_t *esc = Pattern(BP_RANGE, start, str, 1, 1, .low=e_low, .high=e_high);
|
||||
all = either_pat(all, esc);
|
||||
} while (*str == ',' && str++ < end);
|
||||
|
||||
@ -348,7 +348,7 @@ static pat_t *_bp_simplepattern(const char *str, const char *end, bool inside_st
|
||||
}
|
||||
// Not <pat>
|
||||
case '!': {
|
||||
pat_t *p = bp_simplepattern(str, end);
|
||||
bp_pat_t *p = bp_simplepattern(str, end);
|
||||
if (!p) parse_err(str, str, "There should be a pattern after this '!'");
|
||||
return Pattern(BP_NOT, start, p->end, 0, 0, .pat=p);
|
||||
}
|
||||
@ -370,11 +370,11 @@ static pat_t *_bp_simplepattern(const char *str, const char *end, bool inside_st
|
||||
} else {
|
||||
min = (size_t)n1, max = (ssize_t)n1;
|
||||
}
|
||||
pat_t *repeating = bp_simplepattern(str, end);
|
||||
bp_pat_t *repeating = bp_simplepattern(str, end);
|
||||
if (!repeating)
|
||||
parse_err(str, str, "There should be a pattern after this repetition count.");
|
||||
str = repeating->end;
|
||||
pat_t *sep = NULL;
|
||||
bp_pat_t *sep = NULL;
|
||||
if (matchchar(&str, '%', false, end)) {
|
||||
sep = bp_simplepattern(str, end);
|
||||
if (!sep)
|
||||
@ -387,21 +387,21 @@ static pat_t *_bp_simplepattern(const char *str, const char *end, bool inside_st
|
||||
}
|
||||
// Lookbehind
|
||||
case '<': {
|
||||
pat_t *behind = bp_simplepattern(str, end);
|
||||
bp_pat_t *behind = bp_simplepattern(str, end);
|
||||
if (!behind)
|
||||
parse_err(str, str, "There should be a pattern after this '<'");
|
||||
return Pattern(BP_AFTER, start, behind->end, 0, 0, .pat=behind);
|
||||
}
|
||||
// Lookahead
|
||||
case '>': {
|
||||
pat_t *ahead = bp_simplepattern(str, end);
|
||||
bp_pat_t *ahead = bp_simplepattern(str, end);
|
||||
if (!ahead)
|
||||
parse_err(str, str, "There should be a pattern after this '>'");
|
||||
return Pattern(BP_BEFORE, start, ahead->end, 0, 0, .pat=ahead);
|
||||
}
|
||||
// Parentheses
|
||||
case '(': {
|
||||
pat_t *pat = bp_pattern_nl(str, end, true);
|
||||
bp_pat_t *pat = bp_pattern_nl(str, end, true);
|
||||
if (!pat)
|
||||
parse_err(str, str, "There should be a valid pattern after this parenthesis.");
|
||||
str = pat->end;
|
||||
@ -412,7 +412,7 @@ static pat_t *_bp_simplepattern(const char *str, const char *end, bool inside_st
|
||||
}
|
||||
// Square brackets
|
||||
case '[': {
|
||||
pat_t *maybe = bp_pattern_nl(str, end, true);
|
||||
bp_pat_t *maybe = bp_pattern_nl(str, end, true);
|
||||
if (!maybe)
|
||||
parse_err(str, str, "There should be a valid pattern after this square bracket.");
|
||||
str = maybe->end;
|
||||
@ -422,11 +422,11 @@ static pat_t *_bp_simplepattern(const char *str, const char *end, bool inside_st
|
||||
// Repeating
|
||||
case '*': case '+': {
|
||||
size_t min = (size_t)(c == '*' ? 0 : 1);
|
||||
pat_t *repeating = bp_simplepattern(str, end);
|
||||
bp_pat_t *repeating = bp_simplepattern(str, end);
|
||||
if (!repeating)
|
||||
parse_err(str, str, "There should be a valid pattern to repeat here");
|
||||
str = repeating->end;
|
||||
pat_t *sep = NULL;
|
||||
bp_pat_t *sep = NULL;
|
||||
if (matchchar(&str, '%', false, end)) {
|
||||
sep = bp_simplepattern(str, end);
|
||||
if (!sep)
|
||||
@ -443,7 +443,7 @@ static pat_t *_bp_simplepattern(const char *str, const char *end, bool inside_st
|
||||
if (str <= name)
|
||||
parse_err(start, str, "There should be an identifier after this '@:'");
|
||||
size_t namelen = (size_t)(str - name);
|
||||
pat_t *p = NULL;
|
||||
bp_pat_t *p = NULL;
|
||||
if (matchchar(&str, '=', false, end)) {
|
||||
p = bp_simplepattern(str, end);
|
||||
if (p) str = p->end;
|
||||
@ -467,7 +467,7 @@ static pat_t *_bp_simplepattern(const char *str, const char *end, bool inside_st
|
||||
namelen = (size_t)(a-str);
|
||||
str = eq;
|
||||
}
|
||||
pat_t *pat = bp_simplepattern(str, end);
|
||||
bp_pat_t *pat = bp_simplepattern(str, end);
|
||||
if (!pat)
|
||||
parse_err(str, str, "There should be a valid pattern here to capture after the '@'");
|
||||
|
||||
@ -487,7 +487,7 @@ static pat_t *_bp_simplepattern(const char *str, const char *end, bool inside_st
|
||||
return Pattern(BP_END_OF_LINE, start, str, 0, 0);
|
||||
}
|
||||
default: {
|
||||
pat_t *def = _bp_definition(start, end);
|
||||
bp_pat_t *def = _bp_definition(start, end);
|
||||
if (def) return def;
|
||||
// Reference
|
||||
if (!isalpha(c) && c != '_') return NULL;
|
||||
@ -510,10 +510,10 @@ public maybe_pat_t bp_stringpattern(const char *str, const char *end)
|
||||
while (str < end && *str != '{')
|
||||
str = next_char(str, end);
|
||||
size_t len = (size_t)(str - start);
|
||||
pat_t *pat = len > 0 ? Pattern(BP_STRING, start, str, len, (ssize_t)len, .string=strndup(start, len)) : NULL;
|
||||
bp_pat_t *pat = len > 0 ? Pattern(BP_STRING, start, str, len, (ssize_t)len, .string=strndup(start, len)) : NULL;
|
||||
str += 1;
|
||||
if (str < end) {
|
||||
pat_t *interp = bp_pattern_nl(str, end, true);
|
||||
bp_pat_t *interp = bp_pattern_nl(str, end, true);
|
||||
if (interp)
|
||||
pat = chain_together(pat, interp);
|
||||
}
|
||||
@ -524,24 +524,24 @@ public maybe_pat_t bp_stringpattern(const char *str, const char *end)
|
||||
//
|
||||
// Wrapper for _bp_simplepattern() that expands any postfix operators (~, !~)
|
||||
//
|
||||
static pat_t *bp_simplepattern(const char *str, const char *end)
|
||||
static bp_pat_t *bp_simplepattern(const char *str, const char *end)
|
||||
{
|
||||
const char *start = str;
|
||||
pat_t *pat = _bp_simplepattern(str, end, false);
|
||||
bp_pat_t *pat = _bp_simplepattern(str, end, false);
|
||||
if (pat == NULL) return pat;
|
||||
str = pat->end;
|
||||
|
||||
// Expand postfix operators (if any)
|
||||
while (str < end) {
|
||||
enum pattype_e type;
|
||||
enum bp_pattype_e type;
|
||||
if (matchchar(&str, '~', false, end))
|
||||
type = BP_MATCH;
|
||||
else if (matchstr(&str, "!~", false, end))
|
||||
type = BP_NOT_MATCH;
|
||||
else break;
|
||||
|
||||
pat_t *first = pat;
|
||||
pat_t *second = bp_simplepattern(str, end);
|
||||
bp_pat_t *first = pat;
|
||||
bp_pat_t *second = bp_simplepattern(str, end);
|
||||
if (!second)
|
||||
parse_err(str, str, "There should be a valid pattern here");
|
||||
|
||||
@ -558,7 +558,7 @@ static pat_t *bp_simplepattern(const char *str, const char *end)
|
||||
// Given a pattern and a replacement string, compile the two into a BP
|
||||
// replace pattern.
|
||||
//
|
||||
public maybe_pat_t bp_replacement(pat_t *replacepat, const char *replacement, const char *end)
|
||||
public maybe_pat_t bp_replacement(bp_pat_t *replacepat, const char *replacement, const char *end)
|
||||
{
|
||||
const char *p = replacement;
|
||||
if (!end) end = replacement + strlen(replacement);
|
||||
@ -574,15 +574,15 @@ public maybe_pat_t bp_replacement(pat_t *replacepat, const char *replacement, co
|
||||
size_t rlen = (size_t)(p-replacement);
|
||||
char *rcpy = new(char[rlen + 1]);
|
||||
memcpy(rcpy, replacement, rlen);
|
||||
pat_t *pat = Pattern(BP_REPLACE, replacepat->start, replacepat->end, replacepat->min_matchlen, replacepat->max_matchlen,
|
||||
bp_pat_t *pat = Pattern(BP_REPLACE, replacepat->start, replacepat->end, replacepat->min_matchlen, replacepat->max_matchlen,
|
||||
.pat=replacepat, .text=rcpy, .len=rlen);
|
||||
return (maybe_pat_t){.success = true, .value.pat = pat};
|
||||
}
|
||||
|
||||
static pat_t *bp_pattern_nl(const char *str, const char *end, bool allow_nl)
|
||||
static bp_pat_t *bp_pattern_nl(const char *str, const char *end, bool allow_nl)
|
||||
{
|
||||
str = after_spaces(str, allow_nl, end);
|
||||
pat_t *pat = bp_simplepattern(str, end);
|
||||
bp_pat_t *pat = bp_simplepattern(str, end);
|
||||
if (pat != NULL) pat = expand_choices(pat, end, allow_nl);
|
||||
if (matchstr(&str, "=>", allow_nl, end))
|
||||
pat = expand_replacements(pat ? pat : Pattern(BP_STRING, str-2, str-2, 0, 0), end, allow_nl);
|
||||
@ -592,7 +592,7 @@ static pat_t *bp_pattern_nl(const char *str, const char *end, bool allow_nl)
|
||||
//
|
||||
// Return a new back reference to an existing match.
|
||||
//
|
||||
public pat_t *bp_raw_literal(const char *str, size_t len)
|
||||
public bp_pat_t *bp_raw_literal(const char *str, size_t len)
|
||||
{
|
||||
return Pattern(BP_STRING, str, &str[len], len, (ssize_t)len, .string=strndup(str, len));
|
||||
}
|
||||
@ -605,7 +605,7 @@ public maybe_pat_t bp_pattern(const char *str, const char *end)
|
||||
str = after_spaces(str, true, end);
|
||||
if (!end) end = str + strlen(str);
|
||||
__TRY_PATTERN__
|
||||
pat_t *ret = bp_pattern_nl(str, end, false);
|
||||
bp_pat_t *ret = bp_pattern_nl(str, end, false);
|
||||
__END_TRY_PATTERN__
|
||||
if (ret && after_spaces(ret->end, true, end) < end)
|
||||
return (maybe_pat_t){.success = false, .value.error.start = ret->end, .value.error.end = end, .value.error.msg = "Failed to parse this part of the pattern"};
|
||||
@ -618,15 +618,15 @@ public maybe_pat_t bp_pattern(const char *str, const char *end)
|
||||
public void free_all_pats(void)
|
||||
{
|
||||
while (allocated_pats) {
|
||||
pat_t *tofree = allocated_pats;
|
||||
bp_pat_t *tofree = allocated_pats;
|
||||
allocated_pats = tofree->next;
|
||||
delete(&tofree);
|
||||
}
|
||||
}
|
||||
|
||||
public void delete_pat(pat_t **at_pat, bool recursive)
|
||||
public void delete_pat(bp_pat_t **at_pat, bool recursive)
|
||||
{
|
||||
pat_t *pat = *at_pat;
|
||||
bp_pat_t *pat = *at_pat;
|
||||
if (!pat) return;
|
||||
|
||||
#define T(tag, ...) case tag: { auto _data = When(pat, tag); __VA_ARGS__; break; }
|
||||
@ -672,7 +672,7 @@ static int printf_pattern_size(const struct printf_info *info, size_t n, int arg
|
||||
static int printf_pattern(FILE *stream, const struct printf_info *info, const void *const args[])
|
||||
{
|
||||
(void)info;
|
||||
pat_t *pat = *(pat_t**)args[0];
|
||||
bp_pat_t *pat = *(bp_pat_t**)args[0];
|
||||
if (!pat) return fputs("(null)", stream);
|
||||
|
||||
switch (pat->type) {
|
||||
|
61
pattern.h
61
pattern.h
@ -10,12 +10,8 @@
|
||||
#include <sys/types.h>
|
||||
#include <err.h>
|
||||
|
||||
#ifndef auto
|
||||
#define auto __auto_type
|
||||
#endif
|
||||
|
||||
// BP virtual machine pattern types
|
||||
enum pattype_e {
|
||||
enum bp_pattype_e {
|
||||
BP_ERROR = 0,
|
||||
BP_ANYCHAR = 1,
|
||||
BP_ID_START = 2,
|
||||
@ -50,9 +46,10 @@ enum pattype_e {
|
||||
//
|
||||
// A struct reperesenting a BP virtual machine operation
|
||||
//
|
||||
typedef struct pat_s {
|
||||
struct pat_s *next, **home;
|
||||
enum pattype_e type;
|
||||
typedef struct bp_pat_s bp_pat_t;
|
||||
struct bp_pat_s {
|
||||
bp_pat_t *next, **home;
|
||||
enum bp_pattype_e type;
|
||||
uint32_t id;
|
||||
const char *start, *end;
|
||||
// The bounds of the match length (used for backtracking)
|
||||
@ -67,32 +64,32 @@ typedef struct pat_s {
|
||||
struct {} BP_ID_CONTINUE;
|
||||
struct {const char *string; size_t len; } BP_STRING;
|
||||
struct {unsigned char low, high; } BP_RANGE;
|
||||
struct {struct pat_s *pat;} BP_NOT;
|
||||
struct {struct pat_s *target, *skip;} BP_UPTO;
|
||||
struct {struct pat_s *target, *skip;} BP_UPTO_STRICT;
|
||||
struct {bp_pat_t *pat;} BP_NOT;
|
||||
struct {bp_pat_t *target, *skip;} BP_UPTO;
|
||||
struct {bp_pat_t *target, *skip;} BP_UPTO_STRICT;
|
||||
struct {
|
||||
uint32_t min;
|
||||
int32_t max;
|
||||
struct pat_s *sep, *repeat_pat;
|
||||
bp_pat_t *sep, *repeat_pat;
|
||||
} BP_REPEAT;
|
||||
struct {struct pat_s *pat;} BP_BEFORE;
|
||||
struct {struct pat_s *pat;} BP_AFTER;
|
||||
struct {bp_pat_t *pat;} BP_BEFORE;
|
||||
struct {bp_pat_t *pat;} BP_AFTER;
|
||||
struct {
|
||||
struct pat_s *pat;
|
||||
bp_pat_t *pat;
|
||||
const char *name;
|
||||
uint16_t namelen;
|
||||
bool backreffable;
|
||||
} BP_CAPTURE;
|
||||
struct {
|
||||
struct pat_s *first, *second;
|
||||
bp_pat_t *first, *second;
|
||||
} BP_OTHERWISE;
|
||||
struct {
|
||||
struct pat_s *first, *second;
|
||||
bp_pat_t *first, *second;
|
||||
} BP_CHAIN;
|
||||
struct {struct pat_s *pat, *must_match;} BP_MATCH;
|
||||
struct {struct pat_s *pat, *must_not_match;} BP_NOT_MATCH;
|
||||
struct {bp_pat_t *pat, *must_match;} BP_MATCH;
|
||||
struct {bp_pat_t *pat, *must_not_match;} BP_NOT_MATCH;
|
||||
struct {
|
||||
struct pat_s *pat;
|
||||
bp_pat_t *pat;
|
||||
const char *text;
|
||||
uint32_t len;
|
||||
} BP_REPLACE;
|
||||
@ -110,10 +107,10 @@ typedef struct pat_s {
|
||||
struct {
|
||||
const char *name;
|
||||
uint32_t namelen;
|
||||
struct pat_s *meaning, *next_def;
|
||||
bp_pat_t *meaning, *next_def;
|
||||
} BP_DEFINITIONS;
|
||||
struct {
|
||||
struct pat_s *pat;
|
||||
bp_pat_t *pat;
|
||||
const char *name;
|
||||
uint16_t namelen;
|
||||
bool backreffable;
|
||||
@ -121,17 +118,17 @@ typedef struct pat_s {
|
||||
struct {
|
||||
struct match_s *match;
|
||||
const char *at;
|
||||
struct pat_s *fallback;
|
||||
bp_pat_t *fallback;
|
||||
void *ctx;
|
||||
bool visited;
|
||||
} BP_LEFTRECURSION;
|
||||
} __tagged;
|
||||
} pat_t;
|
||||
};
|
||||
|
||||
typedef struct leftrec_info_s {
|
||||
struct match_s *match;
|
||||
const char *at;
|
||||
struct pat_s *fallback;
|
||||
bp_pat_t *fallback;
|
||||
void *ctx;
|
||||
bool visited;
|
||||
} leftrec_info_t;
|
||||
@ -139,7 +136,7 @@ typedef struct leftrec_info_s {
|
||||
typedef struct {
|
||||
bool success;
|
||||
union {
|
||||
pat_t *pat;
|
||||
bp_pat_t *pat;
|
||||
struct {
|
||||
const char *start, *end, *msg;
|
||||
} error;
|
||||
@ -147,20 +144,20 @@ typedef struct {
|
||||
} maybe_pat_t;
|
||||
|
||||
__attribute__((returns_nonnull))
|
||||
pat_t *allocate_pat(pat_t pat);
|
||||
bp_pat_t *allocate_pat(bp_pat_t pat);
|
||||
__attribute__((nonnull, returns_nonnull))
|
||||
pat_t *bp_raw_literal(const char *str, size_t len);
|
||||
bp_pat_t *bp_raw_literal(const char *str, size_t len);
|
||||
__attribute__((nonnull(1)))
|
||||
maybe_pat_t bp_stringpattern(const char *str, const char *end);
|
||||
__attribute__((nonnull(1,2)))
|
||||
maybe_pat_t bp_replacement(pat_t *replacepat, const char *replacement, const char *end);
|
||||
pat_t *chain_together(pat_t *first, pat_t *second);
|
||||
pat_t *either_pat(pat_t *first, pat_t *second);
|
||||
maybe_pat_t bp_replacement(bp_pat_t *replacepat, const char *replacement, const char *end);
|
||||
bp_pat_t *chain_together(bp_pat_t *first, bp_pat_t *second);
|
||||
bp_pat_t *either_pat(bp_pat_t *first, bp_pat_t *second);
|
||||
__attribute__((nonnull(1)))
|
||||
maybe_pat_t bp_pattern(const char *str, const char *end);
|
||||
void free_all_pats(void);
|
||||
__attribute__((nonnull))
|
||||
void delete_pat(pat_t **at_pat, bool recursive);
|
||||
void delete_pat(bp_pat_t **at_pat, bool recursive);
|
||||
int set_pattern_printf_specifier(char specifier);
|
||||
|
||||
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
|
||||
|
Loading…
Reference in New Issue
Block a user