diff options
| author | Bruce Hill <bruce@bruce-hill.com> | 2025-09-24 20:22:00 -0400 |
|---|---|---|
| committer | Bruce Hill <bruce@bruce-hill.com> | 2025-09-24 20:22:00 -0400 |
| commit | 3d5944a732e34b6dd01921dee991dee54af47e18 (patch) | |
| tree | 97d17a4e7feb97d367060a184907a6978352d5ec /match.c | |
| parent | 20c11b29b3a63c221cac942a17bf9abcf8b9bafe (diff) | |
Autoformatting with clang-format
Diffstat (limited to 'match.c')
| -rw-r--r-- | match.c | 352 |
1 files changed, 163 insertions, 189 deletions
@@ -13,10 +13,10 @@ #include "match.h" #include "pattern.h" -#include "utils.h" #include "utf8.h" +#include "utils.h" -#define MAX_CACHE_SIZE (1<<14) +#define MAX_CACHE_SIZE (1 << 14) // Cache entries for results of matching a pattern at a string position typedef struct cache_entry_s { @@ -51,31 +51,27 @@ typedef struct match_ctx_s { static bp_match_t *unused_matches = NULL; static bp_match_t *in_use_matches = NULL; -static void default_error_handler(char **msg) { - errx(EXIT_FAILURE, "%s", *msg); -} +static void default_error_handler(char **msg) { errx(EXIT_FAILURE, "%s", *msg); } static bp_errhand_t error_handler = default_error_handler; -public bp_errhand_t bp_set_error_handler(bp_errhand_t new_handler) -{ +public +bp_errhand_t bp_set_error_handler(bp_errhand_t new_handler) { bp_errhand_t old_handler = error_handler; error_handler = new_handler; return old_handler; } -#define MATCHES(...) (bp_match_t*[]){__VA_ARGS__, NULL} +#define MATCHES(...) \ + (bp_match_t *[]) { __VA_ARGS__, NULL } -__attribute__((hot, nonnull(1,2,3))) -static bp_match_t *match(match_ctx_t *ctx, const char *str, bp_pat_t *pat); -__attribute__((returns_nonnull)) -static bp_match_t *new_match(bp_pat_t *pat, const char *start, const char *end, bp_match_t *children[]); +__attribute__((hot, nonnull(1, 2, 3))) static bp_match_t *match(match_ctx_t *ctx, const char *str, bp_pat_t *pat); +__attribute__((returns_nonnull)) static bp_match_t *new_match(bp_pat_t *pat, const char *start, const char *end, + bp_match_t *children[]); char *error_message = NULL; -__attribute__((format(printf,2,3))) -static inline void match_error(match_ctx_t *ctx, const char *fmt, ...) -{ +__attribute__((format(printf, 2, 3))) static inline void match_error(match_ctx_t *ctx, const char *fmt, ...) { va_list args; va_start(args, fmt); if (error_message) free(error_message); @@ -84,8 +80,7 @@ static inline void match_error(match_ctx_t *ctx, const char *fmt, ...) longjmp(ctx->error_jump, 1); } -static bp_match_t *clone_match(bp_match_t *m) -{ +static bp_match_t *clone_match(bp_match_t *m) { if (!m) return NULL; bp_match_t *ret = new_match(m->pat, m->start, m->end, NULL); if (m->children) { @@ -95,9 +90,10 @@ static bp_match_t *clone_match(bp_match_t *m) ret->children = ret->_children; } for (int i = 0; m->children[i]; i++) { - if (nchildren+1 >= child_cap) { + if (nchildren + 1 >= child_cap) { ret->children = grow(ret->children, child_cap += 5); - for (size_t j = nchildren; j < child_cap; j++) ret->children[j] = NULL; + for (size_t j = nchildren; j < child_cap; j++) + ret->children[j] = NULL; } ret->children[nchildren++] = clone_match(m->children[i]); } @@ -106,10 +102,8 @@ static bp_match_t *clone_match(bp_match_t *m) } // Prepend to a doubly linked list -static inline void gc_list_prepend(bp_match_t **head, bp_match_t *m) -{ - if (m->gc.home) - errx(1, "Node already has a home"); +static inline void gc_list_prepend(bp_match_t **head, bp_match_t *m) { + if (m->gc.home) errx(1, "Node already has a home"); m->gc.home = head; m->gc.next = *head; if (*head) (*head)->gc.home = &m->gc.next; @@ -117,10 +111,8 @@ static inline void gc_list_prepend(bp_match_t **head, bp_match_t *m) } // Remove from a doubly linked list -static inline void gc_list_remove(bp_match_t *m) -{ - if (!m->gc.home) - errx(1, "Attempt to remove something that isn't in a list"); +static inline void gc_list_remove(bp_match_t *m) { + if (!m->gc.home) errx(1, "Attempt to remove something that isn't in a list"); *m->gc.home = m->gc.next; if (m->gc.next) m->gc.next->gc.home = m->gc.home; m->gc.home = NULL; @@ -130,20 +122,16 @@ static inline void gc_list_remove(bp_match_t *m) // // Hash a string position/pattern. // -static inline size_t hash(const char *str, size_t pat_id) -{ - return (size_t)str + 2*pat_id; -} +static inline size_t hash(const char *str, size_t pat_id) { return (size_t)str + 2 * pat_id; } // // Check if we have cached a failure to match a given pattern at the given position. // -static bool has_cached_failure(match_ctx_t *ctx, const char *str, bp_pat_t *pat) -{ +static bool has_cached_failure(match_ctx_t *ctx, const char *str, bp_pat_t *pat) { if (!ctx->cache->fails) return false; - for (cache_entry_t *fail = &ctx->cache->fails[hash(str, pat->id) & (ctx->cache->size-1)]; fail; fail = fail->next_probe) { - if (fail->pat == pat && fail->start == str) - return true; + for (cache_entry_t *fail = &ctx->cache->fails[hash(str, pat->id) & (ctx->cache->size - 1)]; fail; + fail = fail->next_probe) { + if (fail->pat == pat && fail->start == str) return true; } return false; } @@ -151,9 +139,8 @@ static bool has_cached_failure(match_ctx_t *ctx, const char *str, bp_pat_t *pat) // // Insert into the hash table using a chained scatter table approach. // -static void _hash_insert(cache_t *cache, const char *str, bp_pat_t *pat) -{ - size_t h = hash(str, pat->id) & (cache->size-1); +static void _hash_insert(cache_t *cache, const char *str, bp_pat_t *pat) { + size_t h = hash(str, pat->id) & (cache->size - 1); if (cache->fails[h].pat == NULL) { // No collision cache->fails[h].pat = pat; cache->fails[h].start = str; @@ -162,14 +149,14 @@ static void _hash_insert(cache_t *cache, const char *str, bp_pat_t *pat) return; } - if (cache->fails[h].pat == pat && cache->fails[h].start == str) - return; // Duplicate entry, just leave it be + if (cache->fails[h].pat == pat && cache->fails[h].start == str) return; // Duplicate entry, just leave it be // Shuffle the colliding entry along to a free space: - while (cache->fails[cache->next_free].pat) ++cache->next_free; + while (cache->fails[cache->next_free].pat) + ++cache->next_free; cache_entry_t *free_slot = &cache->fails[cache->next_free]; *free_slot = cache->fails[h]; - size_t h_orig = hash(free_slot->start, free_slot->pat->id) & (cache->size-1); + size_t h_orig = hash(free_slot->start, free_slot->pat->id) & (cache->size - 1); // Put the new entry in its desired slot cache->fails[h].pat = pat; @@ -179,7 +166,8 @@ static void _hash_insert(cache_t *cache, const char *str, bp_pat_t *pat) if (h_orig != h) { // Maintain the chain that points to the colliding entry cache_entry_t *prev = &cache->fails[h_orig]; // Start of the chain - while (prev->next_probe != &cache->fails[h]) prev = prev->next_probe; + while (prev->next_probe != &cache->fails[h]) + prev = prev->next_probe; prev->next_probe = free_slot; } } @@ -187,23 +175,21 @@ static void _hash_insert(cache_t *cache, const char *str, bp_pat_t *pat) // // Save a match in the cache. // -static void cache_failure(match_ctx_t *ctx, const char *str, bp_pat_t *pat) -{ +static void cache_failure(match_ctx_t *ctx, const char *str, bp_pat_t *pat) { cache_t *cache = ctx->cache; // Grow the hash if needed (>99% utilization): - if (cache->occupancy+1 > (cache->size*99)/100) { + if (cache->occupancy + 1 > (cache->size * 99) / 100) { cache_entry_t *old_fails = cache->fails; size_t old_size = cache->size; - cache->size = old_size == 0 ? 16 : 2*old_size; - cache->fails = new(cache_entry_t[cache->size]); + cache->size = old_size == 0 ? 16 : 2 * old_size; + cache->fails = new (cache_entry_t[cache->size]); cache->next_free = 0; // Rehash: for (size_t i = 0; i < old_size; i++) { - if (old_fails[i].pat) - _hash_insert(cache, old_fails[i].start, old_fails[i].pat); + if (old_fails[i].pat) _hash_insert(cache, old_fails[i].start, old_fails[i].pat); } - if (old_fails) delete(&old_fails); + if (old_fails) delete (&old_fails); } _hash_insert(cache, str, pat); @@ -212,18 +198,16 @@ static void cache_failure(match_ctx_t *ctx, const char *str, bp_pat_t *pat) // // Clear and deallocate the cache. // -void cache_destroy(match_ctx_t *ctx) -{ +void cache_destroy(match_ctx_t *ctx) { cache_t *cache = ctx->cache; - if (cache->fails) delete(&cache->fails); + if (cache->fails) delete (&cache->fails); memset(cache, 0, sizeof(cache_t)); } // // Look up a pattern definition by name from a definition pattern. // -static bp_pat_t *_lookup_def(match_ctx_t *ctx, bp_pat_t *defs, const char *name, size_t namelen) -{ +static bp_pat_t *_lookup_def(match_ctx_t *ctx, bp_pat_t *defs, const char *name, size_t namelen) { while (defs != NULL) { if (defs->type == BP_CHAIN) { auto chain = When(defs, BP_CHAIN); @@ -232,8 +216,7 @@ static bp_pat_t *_lookup_def(match_ctx_t *ctx, bp_pat_t *defs, const char *name, defs = chain->first; } else if (defs->type == BP_DEFINITIONS) { auto def = When(defs, BP_DEFINITIONS); - if (namelen == def->namelen && strncmp(def->name, name, namelen) == 0) - return def->meaning; + if (namelen == def->namelen && strncmp(def->name, name, namelen) == 0) return def->meaning; defs = def->next_def; } else { match_error(ctx, "Invalid pattern type in definitions"); @@ -246,9 +229,7 @@ static bp_pat_t *_lookup_def(match_ctx_t *ctx, bp_pat_t *defs, const char *name, // // Look up a pattern definition by name from a context. // -__attribute__((nonnull(2))) -bp_pat_t *lookup_ctx(match_ctx_t *ctx, const char *name, size_t namelen) -{ +__attribute__((nonnull(2))) bp_pat_t *lookup_ctx(match_ctx_t *ctx, const char *name, size_t namelen) { for (; ctx; ctx = ctx->parent_ctx) { bp_pat_t *def = _lookup_def(ctx, ctx->defs, name, namelen); if (def) return def; @@ -260,9 +241,7 @@ bp_pat_t *lookup_ctx(match_ctx_t *ctx, const char *name, size_t namelen) // If the given pattern is a reference, look it up and return the referenced // pattern. This is used for an optimization to avoid repeated lookups. // -__attribute__((nonnull(1))) -static inline bp_pat_t *deref(match_ctx_t *ctx, bp_pat_t *pat) -{ +__attribute__((nonnull(1))) static inline bp_pat_t *deref(match_ctx_t *ctx, bp_pat_t *pat) { if (pat && pat->type == BP_REF) { auto ref = When(pat, BP_REF); bp_pat_t *def = lookup_ctx(ctx, ref->name, ref->len); @@ -276,33 +255,26 @@ static inline bp_pat_t *deref(match_ctx_t *ctx, bp_pat_t *pat) // match for the whole pattern to match (if any). Ideally, this would be a // string literal that can be quickly scanned for. // -static bp_pat_t *get_prerequisite(match_ctx_t *ctx, bp_pat_t *pat) -{ +static bp_pat_t *get_prerequisite(match_ctx_t *ctx, bp_pat_t *pat) { int derefs = 0; - for (bp_pat_t *p = pat; p; ) { + for (bp_pat_t *p = pat; p;) { switch (p->type) { - case BP_BEFORE: - p = When(p, BP_BEFORE)->pat; break; + case BP_BEFORE: p = When(p, BP_BEFORE)->pat; break; case BP_REPEAT: - if (When(p, BP_REPEAT)->min == 0) - return p; - p = When(p, BP_REPEAT)->repeat_pat; break; - case BP_CAPTURE: - p = When(p, BP_CAPTURE)->pat; break; - case BP_TAGGED: - p = When(p, BP_TAGGED)->pat; break; + if (When(p, BP_REPEAT)->min == 0) return p; + p = When(p, BP_REPEAT)->repeat_pat; + break; + case BP_CAPTURE: p = When(p, BP_CAPTURE)->pat; break; + case BP_TAGGED: p = When(p, BP_TAGGED)->pat; break; case BP_CHAIN: { auto chain = When(p, BP_CHAIN); // If pattern is something like (|"foo"|), then use "foo" as the first thing to scan for p = chain->first->max_matchlen == 0 ? chain->second : chain->first; break; } - case BP_MATCH: - p = When(p, BP_MATCH)->pat; break; - case BP_NOT_MATCH: - p = When(p, BP_NOT_MATCH)->pat; break; - case BP_REPLACE: - p = When(p, BP_REPLACE)->pat; break; + case BP_MATCH: p = When(p, BP_MATCH)->pat; break; + case BP_NOT_MATCH: p = When(p, BP_NOT_MATCH)->pat; break; + case BP_REPLACE: p = When(p, BP_REPLACE)->pat; break; case BP_REF: { if (++derefs > 10) return p; // In case of left recursion bp_pat_t *p2 = deref(ctx, p); @@ -319,31 +291,28 @@ static bp_pat_t *get_prerequisite(match_ctx_t *ctx, bp_pat_t *pat) // // Find the next match after prev (or the first match if prev is NULL) // -__attribute__((nonnull(1,2,3))) -static bp_match_t *_next_match(match_ctx_t *ctx, const char *str, bp_pat_t *pat, bp_pat_t *skip) -{ +__attribute__((nonnull(1, 2, 3))) static bp_match_t *_next_match(match_ctx_t *ctx, const char *str, bp_pat_t *pat, + bp_pat_t *skip) { // Clear the cache so it's not full of old cache values from different parts of the file: cache_destroy(ctx); bp_pat_t *first = get_prerequisite(ctx, pat); // Don't bother looping if this can only match at the start/end: - if (first->type == BP_START_OF_FILE) - return match(ctx, str, pat); - else if (first->type == BP_END_OF_FILE) - return match(ctx, ctx->end, pat); + if (first->type == BP_START_OF_FILE) return match(ctx, str, pat); + else if (first->type == BP_END_OF_FILE) return match(ctx, ctx->end, pat); // Performance optimization: if the pattern starts with a string literal, // we can just rely on the highly optimized memmem() implementation to skip // past areas where we know we won't find a match. if (!skip && first->type == BP_STRING && first->min_matchlen > 0) { - char *found = ctx->ignorecase ? - strcasestr(str, When(first, BP_STRING)->string) - : memmem(str, (size_t)(ctx->end - str), When(first, BP_STRING)->string, first->min_matchlen); + char *found = ctx->ignorecase + ? strcasestr(str, When(first, BP_STRING)->string) + : memmem(str, (size_t)(ctx->end - str), When(first, BP_STRING)->string, first->min_matchlen); str = found ? found : ctx->end; } else if (!skip && str > ctx->start && (first->type == BP_START_OF_LINE || first->type == BP_END_OF_LINE)) { char *found = memchr(str, '\n', (size_t)(ctx->end - str)); - str = found ? (first->type == BP_START_OF_LINE ? found+1 : found) : ctx->end; + str = found ? (first->type == BP_START_OF_LINE ? found + 1 : found) : ctx->end; } do { @@ -363,8 +332,7 @@ static bp_match_t *_next_match(match_ctx_t *ctx, const char *str, bp_pat_t *pat, // match object, or NULL if no match is found. // The returned value should be free()'d to avoid memory leaking. // -static bp_match_t *match(match_ctx_t *ctx, const char *str, bp_pat_t *pat) -{ +static bp_match_t *match(match_ctx_t *ctx, const char *str, bp_pat_t *pat) { switch (pat->type) { case BP_DEFINITIONS: { match_ctx_t ctx2 = *ctx; @@ -393,10 +361,12 @@ static bp_match_t *match(match_ctx_t *ctx, const char *str, bp_pat_t *pat) return (str < ctx->end && *str != '\n') ? new_match(pat, str, next_char(str, ctx->end), NULL) : NULL; } case BP_ID_START: { - return (str < ctx->end && isidstart(str, ctx->end)) ? new_match(pat, str, next_char(str, ctx->end), NULL) : NULL; + return (str < ctx->end && isidstart(str, ctx->end)) ? new_match(pat, str, next_char(str, ctx->end), NULL) + : NULL; } case BP_ID_CONTINUE: { - return (str < ctx->end && isidcontinue(str, ctx->end)) ? new_match(pat, str, next_char(str, ctx->end), NULL) : NULL; + return (str < ctx->end && isidcontinue(str, ctx->end)) ? new_match(pat, str, next_char(str, ctx->end), NULL) + : NULL; } case BP_START_OF_FILE: { return (str == ctx->start) ? new_match(pat, str, str, NULL) : NULL; @@ -405,27 +375,28 @@ static bp_match_t *match(match_ctx_t *ctx, const char *str, bp_pat_t *pat) return (str == ctx->start || str[-1] == '\n') ? new_match(pat, str, str, NULL) : NULL; } case BP_END_OF_FILE: { - return (str == ctx->end || (str == ctx->end-1 && *str == '\n')) ? new_match(pat, str, str, NULL) : NULL; + return (str == ctx->end || (str == ctx->end - 1 && *str == '\n')) ? new_match(pat, str, str, NULL) : NULL; } case BP_END_OF_LINE: { return (str == ctx->end || *str == '\n') ? new_match(pat, str, str, NULL) : NULL; } case BP_WORD_BOUNDARY: { - return (str == ctx->start || isidcontinue(str, ctx->end) != isidcontinue(prev_char(ctx->start, str), ctx->end)) ? - new_match(pat, str, str, NULL) : NULL; + return (str == ctx->start || isidcontinue(str, ctx->end) != isidcontinue(prev_char(ctx->start, str), ctx->end)) + ? new_match(pat, str, str, NULL) + : NULL; } case BP_STRING: { if (&str[pat->min_matchlen] > ctx->end) return NULL; - if (pat->min_matchlen > 0 && (ctx->ignorecase ? strncasecmp : strncmp)(str, When(pat, BP_STRING)->string, pat->min_matchlen) != 0) + if (pat->min_matchlen > 0 + && (ctx->ignorecase ? strncasecmp : strncmp)(str, When(pat, BP_STRING)->string, pat->min_matchlen) != 0) return NULL; return new_match(pat, str, str + pat->min_matchlen, NULL); } case BP_RANGE: { if (str >= ctx->end) return NULL; auto range = When(pat, BP_RANGE); - if ((unsigned char)*str < range->low || (unsigned char)*str > range->high) - return NULL; - return new_match(pat, str, str+1, NULL); + if ((unsigned char)*str < range->low || (unsigned char)*str > range->high) return NULL; + return new_match(pat, str, str + 1, NULL); } case BP_NOT: { bp_match_t *m = match(ctx, str, When(pat, BP_NOT)->pat); @@ -435,18 +406,21 @@ static bp_match_t *match(match_ctx_t *ctx, const char *str, bp_pat_t *pat) } return new_match(pat, str, str, NULL); } - case BP_UPTO: case BP_UPTO_STRICT: { + case BP_UPTO: + case BP_UPTO_STRICT: { bp_match_t *m = new_match(pat, str, str, NULL); - bp_pat_t *target = deref(ctx, pat->type == BP_UPTO ? When(pat, BP_UPTO)->target : When(pat, BP_UPTO_STRICT)->target), - *skip = deref(ctx, pat->type == BP_UPTO ? When(pat, BP_UPTO)->skip : When(pat, BP_UPTO_STRICT)->skip); + bp_pat_t *target = + deref(ctx, pat->type == BP_UPTO ? When(pat, BP_UPTO)->target : When(pat, BP_UPTO_STRICT)->target), + *skip = deref(ctx, pat->type == BP_UPTO ? When(pat, BP_UPTO)->skip : When(pat, BP_UPTO_STRICT)->skip); if (!target && !skip) { - while (str < ctx->end && *str != '\n') ++str; + while (str < ctx->end && *str != '\n') + ++str; m->end = str; return m; } size_t child_cap = 0, nchildren = 0; - for (const char *prev = NULL; prev < str; ) { + for (const char *prev = NULL; prev < str;) { prev = str; if (target) { bp_match_t *p = match(ctx, str, target); @@ -463,9 +437,10 @@ static bp_match_t *match(match_ctx_t *ctx, const char *str, bp_pat_t *pat) bp_match_t *s = match(ctx, str, skip); if (s != NULL) { str = s->end; - if (nchildren+2 >= child_cap) { + if (nchildren + 2 >= child_cap) { m->children = grow(m->children, child_cap += 5); - for (size_t i = nchildren; i < child_cap; i++) m->children[i] = NULL; + for (size_t i = nchildren; i < child_cap; i++) + m->children[i] = NULL; } m->children[nchildren++] = s; continue; @@ -474,8 +449,7 @@ static bp_match_t *match(match_ctx_t *ctx, const char *str, bp_pat_t *pat) // This isn't in the for() structure because there needs to // be at least once chance to match the pattern, even if // we're at the end of the string already (e.g. "..$"). - if (str < ctx->end && *str != '\n' && pat->type != BP_UPTO_STRICT) - str = next_char(str, ctx->end); + if (str < ctx->end && *str != '\n' && pat->type != BP_UPTO_STRICT) str = next_char(str, ctx->end); } recycle_match(&m); return NULL; @@ -511,23 +485,23 @@ static bp_match_t *match(match_ctx_t *ctx, const char *str, bp_pat_t *pat) // of looping infinitely. if (msep) recycle_match(&msep); recycle_match(&mp); - if (repeat->max == -1) - reps = ~(size_t)0; - else - reps = (size_t)repeat->max; + if (repeat->max == -1) reps = ~(size_t)0; + else reps = (size_t)repeat->max; break; } if (msep) { - if (nchildren+2 >= child_cap) { + if (nchildren + 2 >= child_cap) { m->children = grow(m->children, child_cap += 5); - for (size_t i = nchildren; i < child_cap; i++) m->children[i] = NULL; + for (size_t i = nchildren; i < child_cap; i++) + m->children[i] = NULL; } m->children[nchildren++] = msep; } - if (nchildren+2 >= child_cap) { + if (nchildren + 2 >= child_cap) { m->children = grow(m->children, child_cap += 5); - for (size_t i = nchildren; i < child_cap; i++) m->children[i] = NULL; + for (size_t i = nchildren; i < child_cap; i++) + m->children[i] = NULL; } m->children[nchildren++] = mp; str = mp->end; @@ -556,11 +530,10 @@ static bp_match_t *match(match_ctx_t *ctx, const char *str, bp_pat_t *pat) pos >= ctx->start && (back->max_matchlen == -1 || pos >= &str[-(int)back->max_matchlen]); pos = prev_char(ctx->start, pos)) { cache_destroy(&slice_ctx); - slice_ctx.start = (char*)pos; + slice_ctx.start = (char *)pos; bp_match_t *m = match(&slice_ctx, pos, back); // Match should not go past str (i.e. (<"AB" "B") should match "ABB", but not "AB") - if (m && m->end != str) - recycle_match(&m); + if (m && m->end != str) recycle_match(&m); else if (m) { cache_destroy(&slice_ctx); return new_match(pat, str, str, MATCHES(m)); @@ -577,10 +550,10 @@ static bp_match_t *match(match_ctx_t *ctx, const char *str, bp_pat_t *pat) bp_match_t *after = match(ctx, str, When(pat, BP_BEFORE)->pat); return after ? new_match(pat, str, str, MATCHES(after)) : NULL; } - case BP_CAPTURE: case BP_TAGGED: { + case BP_CAPTURE: + case BP_TAGGED: { bp_pat_t *to_match = pat->type == BP_CAPTURE ? When(pat, BP_CAPTURE)->pat : When(pat, BP_TAGGED)->pat; - if (!to_match) - return new_match(pat, str, str, NULL); + if (!to_match) return new_match(pat, str, str, NULL); bp_match_t *p = match(ctx, str, to_match); return p ? new_match(pat, str, p->end, MATCHES(p)) : NULL; } @@ -611,7 +584,8 @@ static bp_match_t *match(match_ctx_t *ctx, const char *str, bp_pat_t *pat) bp_pat_t *backref; if (m1->children && m1->children[0]->pat->type == BP_CURDENT) { const char *linestart = m1->start; - while (linestart > ctx->start && linestart[-1] != '\n') --linestart; + while (linestart > ctx->start && linestart[-1] != '\n') + --linestart; // Current indentation: char denter = *linestart; @@ -629,12 +603,14 @@ static bp_match_t *match(match_ctx_t *ctx, const char *str, bp_pat_t *pat) ctx2.parent_ctx = ctx; ctx2.defs = &(bp_pat_t){ .type = BP_DEFINITIONS, - .start = m1->pat->start, .end = m1->pat->end, - .__tagged.BP_DEFINITIONS = { - .name = When(m1->pat, BP_CAPTURE)->name, - .namelen = When(m1->pat, BP_CAPTURE)->namelen, - .meaning = backref, - }, + .start = m1->pat->start, + .end = m1->pat->end, + .__tagged.BP_DEFINITIONS = + { + .name = When(m1->pat, BP_CAPTURE)->name, + .namelen = When(m1->pat, BP_CAPTURE)->namelen, + .meaning = backref, + }, }; m2 = match(&ctx2, m1->end, chain->second); if (!m2) // No need to keep the backref in memory if it didn't match @@ -651,7 +627,8 @@ static bp_match_t *match(match_ctx_t *ctx, const char *str, bp_pat_t *pat) return new_match(pat, str, m2->end, MATCHES(m1, m2)); } - case BP_MATCH: case BP_NOT_MATCH: { + case BP_MATCH: + case BP_NOT_MATCH: { bp_pat_t *target = pat->type == BP_MATCH ? When(pat, BP_MATCH)->pat : When(pat, BP_NOT_MATCH)->pat; bp_match_t *m1 = match(ctx, str, target); if (m1 == NULL) return NULL; @@ -687,8 +664,7 @@ static bp_match_t *match(match_ctx_t *ctx, const char *str, bp_pat_t *pat) return new_match(pat, str, p ? p->end : str, MATCHES(p)); } case BP_REF: { - if (has_cached_failure(ctx, str, pat)) - return NULL; + if (has_cached_failure(ctx, str, pat)) return NULL; auto ref_pat = When(pat, BP_REF); bp_pat_t *ref = lookup_ctx(ctx, ref_pat->name, ref_pat->len); @@ -697,27 +673,31 @@ static bp_match_t *match(match_ctx_t *ctx, const char *str, bp_pat_t *pat) return NULL; } - if (ref->type == BP_LEFTRECURSION) - return match(ctx, str, ref); + if (ref->type == BP_LEFTRECURSION) return match(ctx, str, ref); bp_pat_t rec_op = { .type = BP_LEFTRECURSION, - .start = ref->start, .end = ref->end, - .min_matchlen = 0, .max_matchlen = -1, - .__tagged.BP_LEFTRECURSION = { - .match = NULL, - .visited = false, - .at = str, - .fallback = pat, - .ctx = (void*)ctx, - }, + .start = ref->start, + .end = ref->end, + .min_matchlen = 0, + .max_matchlen = -1, + .__tagged.BP_LEFTRECURSION = + { + .match = NULL, + .visited = false, + .at = str, + .fallback = pat, + .ctx = (void *)ctx, + }, }; match_ctx_t ctx2 = *ctx; ctx2.parent_ctx = ctx; ctx2.defs = &(bp_pat_t){ .type = BP_DEFINITIONS, - .start = pat->start, .end = pat->end, - .__tagged.BP_DEFINITIONS = { + .start = pat->start, + .end = pat->end, + .__tagged.BP_DEFINITIONS = + { .name = ref_pat->name, .namelen = ref_pat->len, .meaning = &rec_op, @@ -758,17 +738,20 @@ static bp_match_t *match(match_ctx_t *ctx, const char *str, bp_pat_t *pat) const char *start = str; const char *p = str; - while (p > ctx->start && p[-1] != '\n') --p; + while (p > ctx->start && p[-1] != '\n') + --p; // Current indentation: char denter = *p; int dents = 0; if (denter == ' ' || denter == '\t') { - for (; *p == denter && p < ctx->end; ++p) ++dents; + for (; *p == denter && p < ctx->end; ++p) + ++dents; } // Subsequent indentation: - while (*str == '\n' || *str == '\n') ++str; + while (*str == '\n' || *str == '\n') + ++str; for (int i = 0; i < dents; i++) if (&str[i] >= ctx->end || str[i] != denter) return NULL; @@ -787,15 +770,14 @@ static bp_match_t *match(match_ctx_t *ctx, const char *str, bp_pat_t *pat) // // Return a match object which can be used (may be allocated or recycled). // -bp_match_t *new_match(bp_pat_t *pat, const char *start, const char *end, bp_match_t *children[]) -{ +bp_match_t *new_match(bp_pat_t *pat, const char *start, const char *end, bp_match_t *children[]) { bp_match_t *m; if (unused_matches) { m = unused_matches; gc_list_remove(m); memset(m, 0, sizeof(bp_match_t)); } else { - m = new(bp_match_t); + m = new (bp_match_t); } // Keep track of the object: gc_list_prepend(&in_use_matches, m); @@ -816,14 +798,13 @@ bp_match_t *new_match(bp_pat_t *pat, const char *start, const char *end, bp_matc // If the given match is not currently a child member of another match (or // otherwise reserved) then put it back in the pool of unused match objects. // -public void recycle_match(bp_match_t **at_m) -{ +public +void recycle_match(bp_match_t **at_m) { bp_match_t *m = *at_m; if (m->children) { for (int i = 0; m->children[i]; i++) recycle_match(&m->children[i]); - if (m->children != m->_children) - delete(&m->children); + if (m->children != m->_children) delete (&m->children); } gc_list_remove(m); @@ -835,13 +816,12 @@ public void recycle_match(bp_match_t **at_m) // // Force all match objects into the pool of unused match objects. // -public size_t recycle_all_matches(void) -{ +public +size_t recycle_all_matches(void) { size_t count = 0; for (bp_match_t *m; (m = in_use_matches); ++count) { gc_list_remove(m); - if (m->children && m->children != m->_children) - delete(&m->children); + if (m->children && m->children != m->_children) delete (&m->children); gc_list_prepend(&unused_matches, m); } return count; @@ -850,13 +830,13 @@ public size_t recycle_all_matches(void) // // Free all match objects in memory. // -public size_t free_all_matches(void) -{ +public +size_t free_all_matches(void) { size_t count = 0; recycle_all_matches(); for (bp_match_t *m; (m = unused_matches); ++count) { gc_list_remove(m); - delete(&m); + delete (&m); } return count; } @@ -865,12 +845,13 @@ public size_t free_all_matches(void) // Iterate over matches. // Usage: for (bp_match_t *m = NULL; next_match(&m, ...); ) {...} // -public bool next_match(bp_match_t **m, const char *start, const char *end, bp_pat_t *pat, bp_pat_t *defs, bp_pat_t *skip, bool ignorecase) -{ +public +bool next_match(bp_match_t **m, const char *start, const char *end, bp_pat_t *pat, bp_pat_t *defs, bp_pat_t *skip, + bool ignorecase) { const char *pos; if (*m) { // Make sure forward progress is occurring, even after zero-width matches: - pos = ((*m)->end > (*m)->start) ? (*m)->end : (*m)->end+1; + pos = ((*m)->end > (*m)->start) ? (*m)->end : (*m)->end + 1; recycle_match(m); } else { pos = start; @@ -895,8 +876,7 @@ public bool next_match(bp_match_t **m, const char *start, const char *end, bp_pa recycle_all_matches(); cache_destroy(&ctx); *m = NULL; - if (error_handler) - error_handler(&error_message); + if (error_handler) error_handler(&error_message); if (error_message) { free(error_message); @@ -909,9 +889,7 @@ public bool next_match(bp_match_t **m, const char *start, const char *end, bp_pa // // Helper function to track state while doing a depth-first search. // -__attribute__((nonnull)) -static bp_match_t *_get_numbered_capture(bp_match_t *m, int *n) -{ +__attribute__((nonnull)) static bp_match_t *_get_numbered_capture(bp_match_t *m, int *n) { if ((m->pat->type == BP_CAPTURE && When(m->pat, BP_CAPTURE)->namelen == 0) || m->pat->type == BP_TAGGED) { if (*n == 1) { return m; @@ -921,8 +899,7 @@ static bp_match_t *_get_numbered_capture(bp_match_t *m, int *n) } } - if (m->pat->type == BP_CAPTURE || m->pat->type == BP_TAGGED) - return NULL; + if (m->pat->type == BP_CAPTURE || m->pat->type == BP_TAGGED) return NULL; if (m->children) { for (int i = 0; m->children[i]; i++) { @@ -936,8 +913,8 @@ static bp_match_t *_get_numbered_capture(bp_match_t *m, int *n) // // Get a specific numbered pattern capture. // -public bp_match_t *get_numbered_capture(bp_match_t *m, int n) -{ +public +bp_match_t *get_numbered_capture(bp_match_t *m, int n) { if (n <= 0) return m; if (m->pat->type == BP_TAGGED || m->pat->type == BP_CAPTURE) { if (n == 1 && m->pat->type == BP_CAPTURE && When(m->pat, BP_CAPTURE)->namelen == 0) return m; @@ -956,15 +933,12 @@ public bp_match_t *get_numbered_capture(bp_match_t *m, int n) // // Helper function for get_named_capture() // -bp_match_t *_get_named_capture(bp_match_t *m, const char *name, size_t namelen) -{ - if (m->pat->type == BP_CAPTURE && When(m->pat, BP_CAPTURE)->name - && When(m->pat, BP_CAPTURE)->namelen == namelen +bp_match_t *_get_named_capture(bp_match_t *m, const char *name, size_t namelen) { + if (m->pat->type == BP_CAPTURE && When(m->pat, BP_CAPTURE)->name && When(m->pat, BP_CAPTURE)->namelen == namelen && strncmp(When(m->pat, BP_CAPTURE)->name, name, When(m->pat, BP_CAPTURE)->namelen) == 0) return m; - if (m->pat->type == BP_TAGGED || m->pat->type == BP_CAPTURE) - return NULL; + if (m->pat->type == BP_TAGGED || m->pat->type == BP_CAPTURE) return NULL; if (m->children) { for (int i = 0; m->children[i]; i++) { @@ -978,10 +952,10 @@ bp_match_t *_get_named_capture(bp_match_t *m, const char *name, size_t namelen) // // Get a capture with a specific name. // -public bp_match_t *get_named_capture(bp_match_t *m, const char *name, ssize_t _namelen) -{ +public +bp_match_t *get_named_capture(bp_match_t *m, const char *name, ssize_t _namelen) { size_t namelen = _namelen < 0 ? strlen(name) : (size_t)_namelen; - if (m->pat->type == BP_TAGGED) {// || (m->pat->type == BP_CAPTURE && m->pat->args.capture.namelen > 0)) { + if (m->pat->type == BP_TAGGED) { // || (m->pat->type == BP_CAPTURE && m->pat->args.capture.namelen > 0)) { if (m->children) { for (int i = 0; m->children[i]; i++) { bp_match_t *cap = _get_named_capture(m->children[i], name, namelen); |
