aboutsummaryrefslogtreecommitdiff
path: root/match.c
diff options
context:
space:
mode:
Diffstat (limited to 'match.c')
-rw-r--r--match.c352
1 files changed, 163 insertions, 189 deletions
diff --git a/match.c b/match.c
index 85cbccc..5af90ce 100644
--- a/match.c
+++ b/match.c
@@ -14,10 +14,10 @@
#include "match.h"
#include "pattern.h"
-#include "utils.h"
#include "utf8.h"
+#include "utils.h"
-#define MAX_CACHE_SIZE (1<<14)
+#define MAX_CACHE_SIZE (1 << 14)
// Cache entries for results of matching a pattern at a string position
typedef struct cache_entry_s {
@@ -52,31 +52,27 @@ typedef struct match_ctx_s {
static bp_match_t *unused_matches = NULL;
static bp_match_t *in_use_matches = NULL;
-static void default_error_handler(char **msg) {
- errx(EXIT_FAILURE, "%s", *msg);
-}
+static void default_error_handler(char **msg) { errx(EXIT_FAILURE, "%s", *msg); }
static bp_errhand_t error_handler = default_error_handler;
-public bp_errhand_t bp_set_error_handler(bp_errhand_t new_handler)
-{
+public
+bp_errhand_t bp_set_error_handler(bp_errhand_t new_handler) {
bp_errhand_t old_handler = error_handler;
error_handler = new_handler;
return old_handler;
}
-#define MATCHES(...) (bp_match_t*[]){__VA_ARGS__, NULL}
+#define MATCHES(...) \
+ (bp_match_t *[]) { __VA_ARGS__, NULL }
-__attribute__((hot, nonnull(1,2,3)))
-static bp_match_t *match(match_ctx_t *ctx, const char *str, bp_pat_t *pat);
-__attribute__((returns_nonnull))
-static bp_match_t *new_match(bp_pat_t *pat, const char *start, const char *end, bp_match_t *children[]);
+__attribute__((hot, nonnull(1, 2, 3))) static bp_match_t *match(match_ctx_t *ctx, const char *str, bp_pat_t *pat);
+__attribute__((returns_nonnull)) static bp_match_t *new_match(bp_pat_t *pat, const char *start, const char *end,
+ bp_match_t *children[]);
char *error_message = NULL;
-__attribute__((format(printf,2,3)))
-static inline void match_error(match_ctx_t *ctx, const char *fmt, ...)
-{
+__attribute__((format(printf, 2, 3))) static inline void match_error(match_ctx_t *ctx, const char *fmt, ...) {
va_list args;
va_start(args, fmt);
if (error_message) free(error_message);
@@ -85,8 +81,7 @@ static inline void match_error(match_ctx_t *ctx, const char *fmt, ...)
longjmp(ctx->error_jump, 1);
}
-static bp_match_t *clone_match(bp_match_t *m)
-{
+static bp_match_t *clone_match(bp_match_t *m) {
if (!m) return NULL;
bp_match_t *ret = new_match(m->pat, m->start, m->end, NULL);
if (m->children) {
@@ -96,9 +91,10 @@ static bp_match_t *clone_match(bp_match_t *m)
ret->children = ret->_children;
}
for (int i = 0; m->children[i]; i++) {
- if (nchildren+1 >= child_cap) {
+ if (nchildren + 1 >= child_cap) {
ret->children = grow(ret->children, child_cap += 5);
- for (size_t j = nchildren; j < child_cap; j++) ret->children[j] = NULL;
+ for (size_t j = nchildren; j < child_cap; j++)
+ ret->children[j] = NULL;
}
ret->children[nchildren++] = clone_match(m->children[i]);
}
@@ -107,10 +103,8 @@ static bp_match_t *clone_match(bp_match_t *m)
}
// Prepend to a doubly linked list
-static inline void gc_list_prepend(bp_match_t **head, bp_match_t *m)
-{
- if (m->gc.home)
- errx(1, "Node already has a home");
+static inline void gc_list_prepend(bp_match_t **head, bp_match_t *m) {
+ if (m->gc.home) errx(1, "Node already has a home");
m->gc.home = head;
m->gc.next = *head;
if (*head) (*head)->gc.home = &m->gc.next;
@@ -118,10 +112,8 @@ static inline void gc_list_prepend(bp_match_t **head, bp_match_t *m)
}
// Remove from a doubly linked list
-static inline void gc_list_remove(bp_match_t *m)
-{
- if (!m->gc.home)
- errx(1, "Attempt to remove something that isn't in a list");
+static inline void gc_list_remove(bp_match_t *m) {
+ if (!m->gc.home) errx(1, "Attempt to remove something that isn't in a list");
*m->gc.home = m->gc.next;
if (m->gc.next) m->gc.next->gc.home = m->gc.home;
m->gc.home = NULL;
@@ -131,20 +123,16 @@ static inline void gc_list_remove(bp_match_t *m)
//
// Hash a string position/pattern.
//
-static inline size_t hash(const char *str, size_t pat_id)
-{
- return (size_t)str + 2*pat_id;
-}
+static inline size_t hash(const char *str, size_t pat_id) { return (size_t)str + 2 * pat_id; }
//
// Check if we have cached a failure to match a given pattern at the given position.
//
-static bool has_cached_failure(match_ctx_t *ctx, const char *str, bp_pat_t *pat)
-{
+static bool has_cached_failure(match_ctx_t *ctx, const char *str, bp_pat_t *pat) {
if (!ctx->cache->fails) return false;
- for (cache_entry_t *fail = &ctx->cache->fails[hash(str, pat->id) & (ctx->cache->size-1)]; fail; fail = fail->next_probe) {
- if (fail->pat == pat && fail->start == str)
- return true;
+ for (cache_entry_t *fail = &ctx->cache->fails[hash(str, pat->id) & (ctx->cache->size - 1)]; fail;
+ fail = fail->next_probe) {
+ if (fail->pat == pat && fail->start == str) return true;
}
return false;
}
@@ -152,9 +140,8 @@ static bool has_cached_failure(match_ctx_t *ctx, const char *str, bp_pat_t *pat)
//
// Insert into the hash table using a chained scatter table approach.
//
-static void _hash_insert(cache_t *cache, const char *str, bp_pat_t *pat)
-{
- size_t h = hash(str, pat->id) & (cache->size-1);
+static void _hash_insert(cache_t *cache, const char *str, bp_pat_t *pat) {
+ size_t h = hash(str, pat->id) & (cache->size - 1);
if (cache->fails[h].pat == NULL) { // No collision
cache->fails[h].pat = pat;
cache->fails[h].start = str;
@@ -163,14 +150,14 @@ static void _hash_insert(cache_t *cache, const char *str, bp_pat_t *pat)
return;
}
- if (cache->fails[h].pat == pat && cache->fails[h].start == str)
- return; // Duplicate entry, just leave it be
+ if (cache->fails[h].pat == pat && cache->fails[h].start == str) return; // Duplicate entry, just leave it be
// Shuffle the colliding entry along to a free space:
- while (cache->fails[cache->next_free].pat) ++cache->next_free;
+ while (cache->fails[cache->next_free].pat)
+ ++cache->next_free;
cache_entry_t *free_slot = &cache->fails[cache->next_free];
*free_slot = cache->fails[h];
- size_t h_orig = hash(free_slot->start, free_slot->pat->id) & (cache->size-1);
+ size_t h_orig = hash(free_slot->start, free_slot->pat->id) & (cache->size - 1);
// Put the new entry in its desired slot
cache->fails[h].pat = pat;
@@ -180,7 +167,8 @@ static void _hash_insert(cache_t *cache, const char *str, bp_pat_t *pat)
if (h_orig != h) { // Maintain the chain that points to the colliding entry
cache_entry_t *prev = &cache->fails[h_orig]; // Start of the chain
- while (prev->next_probe != &cache->fails[h]) prev = prev->next_probe;
+ while (prev->next_probe != &cache->fails[h])
+ prev = prev->next_probe;
prev->next_probe = free_slot;
}
}
@@ -188,23 +176,21 @@ static void _hash_insert(cache_t *cache, const char *str, bp_pat_t *pat)
//
// Save a match in the cache.
//
-static void cache_failure(match_ctx_t *ctx, const char *str, bp_pat_t *pat)
-{
+static void cache_failure(match_ctx_t *ctx, const char *str, bp_pat_t *pat) {
cache_t *cache = ctx->cache;
// Grow the hash if needed (>99% utilization):
- if (cache->occupancy+1 > (cache->size*99)/100) {
+ if (cache->occupancy + 1 > (cache->size * 99) / 100) {
cache_entry_t *old_fails = cache->fails;
size_t old_size = cache->size;
- cache->size = old_size == 0 ? 16 : 2*old_size;
- cache->fails = new(cache_entry_t[cache->size]);
+ cache->size = old_size == 0 ? 16 : 2 * old_size;
+ cache->fails = new (cache_entry_t[cache->size]);
cache->next_free = 0;
// Rehash:
for (size_t i = 0; i < old_size; i++) {
- if (old_fails[i].pat)
- _hash_insert(cache, old_fails[i].start, old_fails[i].pat);
+ if (old_fails[i].pat) _hash_insert(cache, old_fails[i].start, old_fails[i].pat);
}
- if (old_fails) delete(&old_fails);
+ if (old_fails) delete (&old_fails);
}
_hash_insert(cache, str, pat);
@@ -213,18 +199,16 @@ static void cache_failure(match_ctx_t *ctx, const char *str, bp_pat_t *pat)
//
// Clear and deallocate the cache.
//
-void cache_destroy(match_ctx_t *ctx)
-{
+void cache_destroy(match_ctx_t *ctx) {
cache_t *cache = ctx->cache;
- if (cache->fails) delete(&cache->fails);
+ if (cache->fails) delete (&cache->fails);
memset(cache, 0, sizeof(cache_t));
}
//
// Look up a pattern definition by name from a definition pattern.
//
-static bp_pat_t *_lookup_def(match_ctx_t *ctx, bp_pat_t *defs, const char *name, size_t namelen)
-{
+static bp_pat_t *_lookup_def(match_ctx_t *ctx, bp_pat_t *defs, const char *name, size_t namelen) {
while (defs != NULL) {
if (defs->type == BP_CHAIN) {
auto chain = When(defs, BP_CHAIN);
@@ -233,8 +217,7 @@ static bp_pat_t *_lookup_def(match_ctx_t *ctx, bp_pat_t *defs, const char *name,
defs = chain->first;
} else if (defs->type == BP_DEFINITIONS) {
auto def = When(defs, BP_DEFINITIONS);
- if (namelen == def->namelen && strncmp(def->name, name, namelen) == 0)
- return def->meaning;
+ if (namelen == def->namelen && strncmp(def->name, name, namelen) == 0) return def->meaning;
defs = def->next_def;
} else {
match_error(ctx, "Invalid pattern type in definitions");
@@ -247,9 +230,7 @@ static bp_pat_t *_lookup_def(match_ctx_t *ctx, bp_pat_t *defs, const char *name,
//
// Look up a pattern definition by name from a context.
//
-__attribute__((nonnull(2)))
-bp_pat_t *lookup_ctx(match_ctx_t *ctx, const char *name, size_t namelen)
-{
+__attribute__((nonnull(2))) bp_pat_t *lookup_ctx(match_ctx_t *ctx, const char *name, size_t namelen) {
for (; ctx; ctx = ctx->parent_ctx) {
bp_pat_t *def = _lookup_def(ctx, ctx->defs, name, namelen);
if (def) return def;
@@ -261,9 +242,7 @@ bp_pat_t *lookup_ctx(match_ctx_t *ctx, const char *name, size_t namelen)
// If the given pattern is a reference, look it up and return the referenced
// pattern. This is used for an optimization to avoid repeated lookups.
//
-__attribute__((nonnull(1)))
-static inline bp_pat_t *deref(match_ctx_t *ctx, bp_pat_t *pat)
-{
+__attribute__((nonnull(1))) static inline bp_pat_t *deref(match_ctx_t *ctx, bp_pat_t *pat) {
if (pat && pat->type == BP_REF) {
auto ref = When(pat, BP_REF);
bp_pat_t *def = lookup_ctx(ctx, ref->name, ref->len);
@@ -277,33 +256,26 @@ static inline bp_pat_t *deref(match_ctx_t *ctx, bp_pat_t *pat)
// match for the whole pattern to match (if any). Ideally, this would be a
// string literal that can be quickly scanned for.
//
-static bp_pat_t *get_prerequisite(match_ctx_t *ctx, bp_pat_t *pat)
-{
+static bp_pat_t *get_prerequisite(match_ctx_t *ctx, bp_pat_t *pat) {
int derefs = 0;
- for (bp_pat_t *p = pat; p; ) {
+ for (bp_pat_t *p = pat; p;) {
switch (p->type) {
- case BP_BEFORE:
- p = When(p, BP_BEFORE)->pat; break;
+ case BP_BEFORE: p = When(p, BP_BEFORE)->pat; break;
case BP_REPEAT:
- if (When(p, BP_REPEAT)->min == 0)
- return p;
- p = When(p, BP_REPEAT)->repeat_pat; break;
- case BP_CAPTURE:
- p = When(p, BP_CAPTURE)->pat; break;
- case BP_TAGGED:
- p = When(p, BP_TAGGED)->pat; break;
+ if (When(p, BP_REPEAT)->min == 0) return p;
+ p = When(p, BP_REPEAT)->repeat_pat;
+ break;
+ case BP_CAPTURE: p = When(p, BP_CAPTURE)->pat; break;
+ case BP_TAGGED: p = When(p, BP_TAGGED)->pat; break;
case BP_CHAIN: {
auto chain = When(p, BP_CHAIN);
// If pattern is something like (|"foo"|), then use "foo" as the first thing to scan for
p = chain->first->max_matchlen == 0 ? chain->second : chain->first;
break;
}
- case BP_MATCH:
- p = When(p, BP_MATCH)->pat; break;
- case BP_NOT_MATCH:
- p = When(p, BP_NOT_MATCH)->pat; break;
- case BP_REPLACE:
- p = When(p, BP_REPLACE)->pat; break;
+ case BP_MATCH: p = When(p, BP_MATCH)->pat; break;
+ case BP_NOT_MATCH: p = When(p, BP_NOT_MATCH)->pat; break;
+ case BP_REPLACE: p = When(p, BP_REPLACE)->pat; break;
case BP_REF: {
if (++derefs > 10) return p; // In case of left recursion
bp_pat_t *p2 = deref(ctx, p);
@@ -320,31 +292,28 @@ static bp_pat_t *get_prerequisite(match_ctx_t *ctx, bp_pat_t *pat)
//
// Find the next match after prev (or the first match if prev is NULL)
//
-__attribute__((nonnull(1,2,3)))
-static bp_match_t *_next_match(match_ctx_t *ctx, const char *str, bp_pat_t *pat, bp_pat_t *skip)
-{
+__attribute__((nonnull(1, 2, 3))) static bp_match_t *_next_match(match_ctx_t *ctx, const char *str, bp_pat_t *pat,
+ bp_pat_t *skip) {
// Clear the cache so it's not full of old cache values from different parts of the file:
cache_destroy(ctx);
bp_pat_t *first = get_prerequisite(ctx, pat);
// Don't bother looping if this can only match at the start/end:
- if (first->type == BP_START_OF_FILE)
- return match(ctx, str, pat);
- else if (first->type == BP_END_OF_FILE)
- return match(ctx, ctx->end, pat);
+ if (first->type == BP_START_OF_FILE) return match(ctx, str, pat);
+ else if (first->type == BP_END_OF_FILE) return match(ctx, ctx->end, pat);
// Performance optimization: if the pattern starts with a string literal,
// we can just rely on the highly optimized memmem() implementation to skip
// past areas where we know we won't find a match.
if (!skip && first->type == BP_STRING && first->min_matchlen > 0) {
- char *found = ctx->ignorecase ?
- strcasestr(str, When(first, BP_STRING)->string)
- : strnstr(str, When(first, BP_STRING)->string, MIN((size_t)(ctx->end - str), first->min_matchlen));
+ char *found = ctx->ignorecase ? strcasestr(str, When(first, BP_STRING)->string)
+ : strnstr(str, When(first, BP_STRING)->string,
+ MIN((size_t)(ctx->end - str), first->min_matchlen));
str = found ? found : ctx->end;
} else if (!skip && str > ctx->start && (first->type == BP_START_OF_LINE || first->type == BP_END_OF_LINE)) {
char *found = memchr(str, '\n', (size_t)(ctx->end - str));
- str = found ? (first->type == BP_START_OF_LINE ? found+1 : found) : ctx->end;
+ str = found ? (first->type == BP_START_OF_LINE ? found + 1 : found) : ctx->end;
}
do {
@@ -364,8 +333,7 @@ static bp_match_t *_next_match(match_ctx_t *ctx, const char *str, bp_pat_t *pat,
// match object, or NULL if no match is found.
// The returned value should be free()'d to avoid memory leaking.
//
-static bp_match_t *match(match_ctx_t *ctx, const char *str, bp_pat_t *pat)
-{
+static bp_match_t *match(match_ctx_t *ctx, const char *str, bp_pat_t *pat) {
switch (pat->type) {
case BP_DEFINITIONS: {
match_ctx_t ctx2 = *ctx;
@@ -394,10 +362,12 @@ static bp_match_t *match(match_ctx_t *ctx, const char *str, bp_pat_t *pat)
return (str < ctx->end && *str != '\n') ? new_match(pat, str, next_char(str, ctx->end), NULL) : NULL;
}
case BP_ID_START: {
- return (str < ctx->end && isidstart(str, ctx->end)) ? new_match(pat, str, next_char(str, ctx->end), NULL) : NULL;
+ return (str < ctx->end && isidstart(str, ctx->end)) ? new_match(pat, str, next_char(str, ctx->end), NULL)
+ : NULL;
}
case BP_ID_CONTINUE: {
- return (str < ctx->end && isidcontinue(str, ctx->end)) ? new_match(pat, str, next_char(str, ctx->end), NULL) : NULL;
+ return (str < ctx->end && isidcontinue(str, ctx->end)) ? new_match(pat, str, next_char(str, ctx->end), NULL)
+ : NULL;
}
case BP_START_OF_FILE: {
return (str == ctx->start) ? new_match(pat, str, str, NULL) : NULL;
@@ -406,27 +376,28 @@ static bp_match_t *match(match_ctx_t *ctx, const char *str, bp_pat_t *pat)
return (str == ctx->start || str[-1] == '\n') ? new_match(pat, str, str, NULL) : NULL;
}
case BP_END_OF_FILE: {
- return (str == ctx->end || (str == ctx->end-1 && *str == '\n')) ? new_match(pat, str, str, NULL) : NULL;
+ return (str == ctx->end || (str == ctx->end - 1 && *str == '\n')) ? new_match(pat, str, str, NULL) : NULL;
}
case BP_END_OF_LINE: {
return (str == ctx->end || *str == '\n') ? new_match(pat, str, str, NULL) : NULL;
}
case BP_WORD_BOUNDARY: {
- return (str == ctx->start || isidcontinue(str, ctx->end) != isidcontinue(prev_char(ctx->start, str), ctx->end)) ?
- new_match(pat, str, str, NULL) : NULL;
+ return (str == ctx->start || isidcontinue(str, ctx->end) != isidcontinue(prev_char(ctx->start, str), ctx->end))
+ ? new_match(pat, str, str, NULL)
+ : NULL;
}
case BP_STRING: {
if (&str[pat->min_matchlen] > ctx->end) return NULL;
- if (pat->min_matchlen > 0 && (ctx->ignorecase ? strncasecmp : strncmp)(str, When(pat, BP_STRING)->string, pat->min_matchlen) != 0)
+ if (pat->min_matchlen > 0
+ && (ctx->ignorecase ? strncasecmp : strncmp)(str, When(pat, BP_STRING)->string, pat->min_matchlen) != 0)
return NULL;
return new_match(pat, str, str + pat->min_matchlen, NULL);
}
case BP_RANGE: {
if (str >= ctx->end) return NULL;
auto range = When(pat, BP_RANGE);
- if ((unsigned char)*str < range->low || (unsigned char)*str > range->high)
- return NULL;
- return new_match(pat, str, str+1, NULL);
+ if ((unsigned char)*str < range->low || (unsigned char)*str > range->high) return NULL;
+ return new_match(pat, str, str + 1, NULL);
}
case BP_NOT: {
bp_match_t *m = match(ctx, str, When(pat, BP_NOT)->pat);
@@ -436,18 +407,21 @@ static bp_match_t *match(match_ctx_t *ctx, const char *str, bp_pat_t *pat)
}
return new_match(pat, str, str, NULL);
}
- case BP_UPTO: case BP_UPTO_STRICT: {
+ case BP_UPTO:
+ case BP_UPTO_STRICT: {
bp_match_t *m = new_match(pat, str, str, NULL);
- bp_pat_t *target = deref(ctx, pat->type == BP_UPTO ? When(pat, BP_UPTO)->target : When(pat, BP_UPTO_STRICT)->target),
- *skip = deref(ctx, pat->type == BP_UPTO ? When(pat, BP_UPTO)->skip : When(pat, BP_UPTO_STRICT)->skip);
+ bp_pat_t *target =
+ deref(ctx, pat->type == BP_UPTO ? When(pat, BP_UPTO)->target : When(pat, BP_UPTO_STRICT)->target),
+ *skip = deref(ctx, pat->type == BP_UPTO ? When(pat, BP_UPTO)->skip : When(pat, BP_UPTO_STRICT)->skip);
if (!target && !skip) {
- while (str < ctx->end && *str != '\n') ++str;
+ while (str < ctx->end && *str != '\n')
+ ++str;
m->end = str;
return m;
}
size_t child_cap = 0, nchildren = 0;
- for (const char *prev = NULL; prev < str; ) {
+ for (const char *prev = NULL; prev < str;) {
prev = str;
if (target) {
bp_match_t *p = match(ctx, str, target);
@@ -464,9 +438,10 @@ static bp_match_t *match(match_ctx_t *ctx, const char *str, bp_pat_t *pat)
bp_match_t *s = match(ctx, str, skip);
if (s != NULL) {
str = s->end;
- if (nchildren+2 >= child_cap) {
+ if (nchildren + 2 >= child_cap) {
m->children = grow(m->children, child_cap += 5);
- for (size_t i = nchildren; i < child_cap; i++) m->children[i] = NULL;
+ for (size_t i = nchildren; i < child_cap; i++)
+ m->children[i] = NULL;
}
m->children[nchildren++] = s;
continue;
@@ -475,8 +450,7 @@ static bp_match_t *match(match_ctx_t *ctx, const char *str, bp_pat_t *pat)
// This isn't in the for() structure because there needs to
// be at least once chance to match the pattern, even if
// we're at the end of the string already (e.g. "..$").
- if (str < ctx->end && *str != '\n' && pat->type != BP_UPTO_STRICT)
- str = next_char(str, ctx->end);
+ if (str < ctx->end && *str != '\n' && pat->type != BP_UPTO_STRICT) str = next_char(str, ctx->end);
}
recycle_match(&m);
return NULL;
@@ -512,23 +486,23 @@ static bp_match_t *match(match_ctx_t *ctx, const char *str, bp_pat_t *pat)
// of looping infinitely.
if (msep) recycle_match(&msep);
recycle_match(&mp);
- if (repeat->max == -1)
- reps = ~(size_t)0;
- else
- reps = (size_t)repeat->max;
+ if (repeat->max == -1) reps = ~(size_t)0;
+ else reps = (size_t)repeat->max;
break;
}
if (msep) {
- if (nchildren+2 >= child_cap) {
+ if (nchildren + 2 >= child_cap) {
m->children = grow(m->children, child_cap += 5);
- for (size_t i = nchildren; i < child_cap; i++) m->children[i] = NULL;
+ for (size_t i = nchildren; i < child_cap; i++)
+ m->children[i] = NULL;
}
m->children[nchildren++] = msep;
}
- if (nchildren+2 >= child_cap) {
+ if (nchildren + 2 >= child_cap) {
m->children = grow(m->children, child_cap += 5);
- for (size_t i = nchildren; i < child_cap; i++) m->children[i] = NULL;
+ for (size_t i = nchildren; i < child_cap; i++)
+ m->children[i] = NULL;
}
m->children[nchildren++] = mp;
str = mp->end;
@@ -557,11 +531,10 @@ static bp_match_t *match(match_ctx_t *ctx, const char *str, bp_pat_t *pat)
pos >= ctx->start && (back->max_matchlen == -1 || pos >= &str[-(int)back->max_matchlen]);
pos = prev_char(ctx->start, pos)) {
cache_destroy(&slice_ctx);
- slice_ctx.start = (char*)pos;
+ slice_ctx.start = (char *)pos;
bp_match_t *m = match(&slice_ctx, pos, back);
// Match should not go past str (i.e. (<"AB" "B") should match "ABB", but not "AB")
- if (m && m->end != str)
- recycle_match(&m);
+ if (m && m->end != str) recycle_match(&m);
else if (m) {
cache_destroy(&slice_ctx);
return new_match(pat, str, str, MATCHES(m));
@@ -578,10 +551,10 @@ static bp_match_t *match(match_ctx_t *ctx, const char *str, bp_pat_t *pat)
bp_match_t *after = match(ctx, str, When(pat, BP_BEFORE)->pat);
return after ? new_match(pat, str, str, MATCHES(after)) : NULL;
}
- case BP_CAPTURE: case BP_TAGGED: {
+ case BP_CAPTURE:
+ case BP_TAGGED: {
bp_pat_t *to_match = pat->type == BP_CAPTURE ? When(pat, BP_CAPTURE)->pat : When(pat, BP_TAGGED)->pat;
- if (!to_match)
- return new_match(pat, str, str, NULL);
+ if (!to_match) return new_match(pat, str, str, NULL);
bp_match_t *p = match(ctx, str, to_match);
return p ? new_match(pat, str, p->end, MATCHES(p)) : NULL;
}
@@ -612,7 +585,8 @@ static bp_match_t *match(match_ctx_t *ctx, const char *str, bp_pat_t *pat)
bp_pat_t *backref;
if (m1->children && m1->children[0]->pat->type == BP_CURDENT) {
const char *linestart = m1->start;
- while (linestart > ctx->start && linestart[-1] != '\n') --linestart;
+ while (linestart > ctx->start && linestart[-1] != '\n')
+ --linestart;
// Current indentation:
char denter = *linestart;
@@ -630,12 +604,14 @@ static bp_match_t *match(match_ctx_t *ctx, const char *str, bp_pat_t *pat)
ctx2.parent_ctx = ctx;
ctx2.defs = &(bp_pat_t){
.type = BP_DEFINITIONS,
- .start = m1->pat->start, .end = m1->pat->end,
- .__tagged.BP_DEFINITIONS = {
- .name = When(m1->pat, BP_CAPTURE)->name,
- .namelen = When(m1->pat, BP_CAPTURE)->namelen,
- .meaning = backref,
- },
+ .start = m1->pat->start,
+ .end = m1->pat->end,
+ .__tagged.BP_DEFINITIONS =
+ {
+ .name = When(m1->pat, BP_CAPTURE)->name,
+ .namelen = When(m1->pat, BP_CAPTURE)->namelen,
+ .meaning = backref,
+ },
};
m2 = match(&ctx2, m1->end, chain->second);
if (!m2) // No need to keep the backref in memory if it didn't match
@@ -652,7 +628,8 @@ static bp_match_t *match(match_ctx_t *ctx, const char *str, bp_pat_t *pat)
return new_match(pat, str, m2->end, MATCHES(m1, m2));
}
- case BP_MATCH: case BP_NOT_MATCH: {
+ case BP_MATCH:
+ case BP_NOT_MATCH: {
bp_pat_t *target = pat->type == BP_MATCH ? When(pat, BP_MATCH)->pat : When(pat, BP_NOT_MATCH)->pat;
bp_match_t *m1 = match(ctx, str, target);
if (m1 == NULL) return NULL;
@@ -688,8 +665,7 @@ static bp_match_t *match(match_ctx_t *ctx, const char *str, bp_pat_t *pat)
return new_match(pat, str, p ? p->end : str, MATCHES(p));
}
case BP_REF: {
- if (has_cached_failure(ctx, str, pat))
- return NULL;
+ if (has_cached_failure(ctx, str, pat)) return NULL;
auto ref_pat = When(pat, BP_REF);
bp_pat_t *ref = lookup_ctx(ctx, ref_pat->name, ref_pat->len);
@@ -698,27 +674,31 @@ static bp_match_t *match(match_ctx_t *ctx, const char *str, bp_pat_t *pat)
return NULL;
}
- if (ref->type == BP_LEFTRECURSION)
- return match(ctx, str, ref);
+ if (ref->type == BP_LEFTRECURSION) return match(ctx, str, ref);
bp_pat_t rec_op = {
.type = BP_LEFTRECURSION,
- .start = ref->start, .end = ref->end,
- .min_matchlen = 0, .max_matchlen = -1,
- .__tagged.BP_LEFTRECURSION = {
- .match = NULL,
- .visited = false,
- .at = str,
- .fallback = pat,
- .ctx = (void*)ctx,
- },
+ .start = ref->start,
+ .end = ref->end,
+ .min_matchlen = 0,
+ .max_matchlen = -1,
+ .__tagged.BP_LEFTRECURSION =
+ {
+ .match = NULL,
+ .visited = false,
+ .at = str,
+ .fallback = pat,
+ .ctx = (void *)ctx,
+ },
};
match_ctx_t ctx2 = *ctx;
ctx2.parent_ctx = ctx;
ctx2.defs = &(bp_pat_t){
.type = BP_DEFINITIONS,
- .start = pat->start, .end = pat->end,
- .__tagged.BP_DEFINITIONS = {
+ .start = pat->start,
+ .end = pat->end,
+ .__tagged.BP_DEFINITIONS =
+ {
.name = ref_pat->name,
.namelen = ref_pat->len,
.meaning = &rec_op,
@@ -759,17 +739,20 @@ static bp_match_t *match(match_ctx_t *ctx, const char *str, bp_pat_t *pat)
const char *start = str;
const char *p = str;
- while (p > ctx->start && p[-1] != '\n') --p;
+ while (p > ctx->start && p[-1] != '\n')
+ --p;
// Current indentation:
char denter = *p;
int dents = 0;
if (denter == ' ' || denter == '\t') {
- for (; *p == denter && p < ctx->end; ++p) ++dents;
+ for (; *p == denter && p < ctx->end; ++p)
+ ++dents;
}
// Subsequent indentation:
- while (*str == '\n' || *str == '\n') ++str;
+ while (*str == '\n' || *str == '\n')
+ ++str;
for (int i = 0; i < dents; i++)
if (&str[i] >= ctx->end || str[i] != denter) return NULL;
@@ -788,15 +771,14 @@ static bp_match_t *match(match_ctx_t *ctx, const char *str, bp_pat_t *pat)
//
// Return a match object which can be used (may be allocated or recycled).
//
-bp_match_t *new_match(bp_pat_t *pat, const char *start, const char *end, bp_match_t *children[])
-{
+bp_match_t *new_match(bp_pat_t *pat, const char *start, const char *end, bp_match_t *children[]) {
bp_match_t *m;
if (unused_matches) {
m = unused_matches;
gc_list_remove(m);
memset(m, 0, sizeof(bp_match_t));
} else {
- m = new(bp_match_t);
+ m = new (bp_match_t);
}
// Keep track of the object:
gc_list_prepend(&in_use_matches, m);
@@ -817,14 +799,13 @@ bp_match_t *new_match(bp_pat_t *pat, const char *start, const char *end, bp_matc
// If the given match is not currently a child member of another match (or
// otherwise reserved) then put it back in the pool of unused match objects.
//
-public void recycle_match(bp_match_t **at_m)
-{
+public
+void recycle_match(bp_match_t **at_m) {
bp_match_t *m = *at_m;
if (m->children) {
for (int i = 0; m->children[i]; i++)
recycle_match(&m->children[i]);
- if (m->children != m->_children)
- delete(&m->children);
+ if (m->children != m->_children) delete (&m->children);
}
gc_list_remove(m);
@@ -836,13 +817,12 @@ public void recycle_match(bp_match_t **at_m)
//
// Force all match objects into the pool of unused match objects.
//
-public size_t recycle_all_matches(void)
-{
+public
+size_t recycle_all_matches(void) {
size_t count = 0;
for (bp_match_t *m; (m = in_use_matches); ++count) {
gc_list_remove(m);
- if (m->children && m->children != m->_children)
- delete(&m->children);
+ if (m->children && m->children != m->_children) delete (&m->children);
gc_list_prepend(&unused_matches, m);
}
return count;
@@ -851,13 +831,13 @@ public size_t recycle_all_matches(void)
//
// Free all match objects in memory.
//
-public size_t free_all_matches(void)
-{
+public
+size_t free_all_matches(void) {
size_t count = 0;
recycle_all_matches();
for (bp_match_t *m; (m = unused_matches); ++count) {
gc_list_remove(m);
- delete(&m);
+ delete (&m);
}
return count;
}
@@ -866,12 +846,13 @@ public size_t free_all_matches(void)
// Iterate over matches.
// Usage: for (bp_match_t *m = NULL; next_match(&m, ...); ) {...}
//
-public bool next_match(bp_match_t **m, const char *start, const char *end, bp_pat_t *pat, bp_pat_t *defs, bp_pat_t *skip, bool ignorecase)
-{
+public
+bool next_match(bp_match_t **m, const char *start, const char *end, bp_pat_t *pat, bp_pat_t *defs, bp_pat_t *skip,
+ bool ignorecase) {
const char *pos;
if (*m) {
// Make sure forward progress is occurring, even after zero-width matches:
- pos = ((*m)->end > (*m)->start) ? (*m)->end : (*m)->end+1;
+ pos = ((*m)->end > (*m)->start) ? (*m)->end : (*m)->end + 1;
recycle_match(m);
} else {
pos = start;
@@ -896,8 +877,7 @@ public bool next_match(bp_match_t **m, const char *start, const char *end, bp_pa
recycle_all_matches();
cache_destroy(&ctx);
*m = NULL;
- if (error_handler)
- error_handler(&error_message);
+ if (error_handler) error_handler(&error_message);
if (error_message) {
free(error_message);
@@ -910,9 +890,7 @@ public bool next_match(bp_match_t **m, const char *start, const char *end, bp_pa
//
// Helper function to track state while doing a depth-first search.
//
-__attribute__((nonnull))
-static bp_match_t *_get_numbered_capture(bp_match_t *m, int *n)
-{
+__attribute__((nonnull)) static bp_match_t *_get_numbered_capture(bp_match_t *m, int *n) {
if ((m->pat->type == BP_CAPTURE && When(m->pat, BP_CAPTURE)->namelen == 0) || m->pat->type == BP_TAGGED) {
if (*n == 1) {
return m;
@@ -922,8 +900,7 @@ static bp_match_t *_get_numbered_capture(bp_match_t *m, int *n)
}
}
- if (m->pat->type == BP_CAPTURE || m->pat->type == BP_TAGGED)
- return NULL;
+ if (m->pat->type == BP_CAPTURE || m->pat->type == BP_TAGGED) return NULL;
if (m->children) {
for (int i = 0; m->children[i]; i++) {
@@ -937,8 +914,8 @@ static bp_match_t *_get_numbered_capture(bp_match_t *m, int *n)
//
// Get a specific numbered pattern capture.
//
-public bp_match_t *get_numbered_capture(bp_match_t *m, int n)
-{
+public
+bp_match_t *get_numbered_capture(bp_match_t *m, int n) {
if (n <= 0) return m;
if (m->pat->type == BP_TAGGED || m->pat->type == BP_CAPTURE) {
if (n == 1 && m->pat->type == BP_CAPTURE && When(m->pat, BP_CAPTURE)->namelen == 0) return m;
@@ -957,15 +934,12 @@ public bp_match_t *get_numbered_capture(bp_match_t *m, int n)
//
// Helper function for get_named_capture()
//
-bp_match_t *_get_named_capture(bp_match_t *m, const char *name, size_t namelen)
-{
- if (m->pat->type == BP_CAPTURE && When(m->pat, BP_CAPTURE)->name
- && When(m->pat, BP_CAPTURE)->namelen == namelen
+bp_match_t *_get_named_capture(bp_match_t *m, const char *name, size_t namelen) {
+ if (m->pat->type == BP_CAPTURE && When(m->pat, BP_CAPTURE)->name && When(m->pat, BP_CAPTURE)->namelen == namelen
&& strncmp(When(m->pat, BP_CAPTURE)->name, name, When(m->pat, BP_CAPTURE)->namelen) == 0)
return m;
- if (m->pat->type == BP_TAGGED || m->pat->type == BP_CAPTURE)
- return NULL;
+ if (m->pat->type == BP_TAGGED || m->pat->type == BP_CAPTURE) return NULL;
if (m->children) {
for (int i = 0; m->children[i]; i++) {
@@ -979,10 +953,10 @@ bp_match_t *_get_named_capture(bp_match_t *m, const char *name, size_t namelen)
//
// Get a capture with a specific name.
//
-public bp_match_t *get_named_capture(bp_match_t *m, const char *name, ssize_t _namelen)
-{
+public
+bp_match_t *get_named_capture(bp_match_t *m, const char *name, ssize_t _namelen) {
size_t namelen = _namelen < 0 ? strlen(name) : (size_t)_namelen;
- if (m->pat->type == BP_TAGGED) {// || (m->pat->type == BP_CAPTURE && m->pat->args.capture.namelen > 0)) {
+ if (m->pat->type == BP_TAGGED) { // || (m->pat->type == BP_CAPTURE && m->pat->args.capture.namelen > 0)) {
if (m->children) {
for (int i = 0; m->children[i]; i++) {
bp_match_t *cap = _get_named_capture(m->children[i], name, namelen);