bp - Bruce's Parsing Expression Grammar tool for matching PEGs

// // match.c - Code for the BP virtual machine that performs the matching. //

#include #include #include #include #include #include #include #include

#include “match.h” #include “pattern.h” #include “utf8.h” #include “utils.h”

#define MAXCACHESIZE (1 << 14)

// Cache entries for results of matching a pattern at a string position typedef struct cacheentrys { bppatt *pat; const char *start; // Cache entries use a chained scatter approach modeled after Lua’s tables struct cacheentrys *nextprobe; } cacheentry_t;

// Cache uses a hash table to store places where matches will always fail typedef struct { unsigned int size, occupancy, nextfree; cacheentryt *fails; } cachet;

// Data structure for holding ambient state values during matching typedef struct matchctxs { struct matchctxs *parentctx; bppatt *defs; cachet *cache; const char *start, *end; jmpbuf errorjump; bool ignorecase; } matchctxt;

// New match objects are either recycled from unused match objects or allocated // from the heap. While it is in use, the match object is stored in the // in_use_matches linked list. Once it is no longer needed, it is moved to // the unused_matches linked list so it can be reused without the need for // additional calls to malloc/free. Thus, it is an invariant that every match // object is in one of these two lists: static bpmatcht *unusedmatches = NULL; static bpmatcht *inuse_matches = NULL;

static void defaulterrorhandler(char **msg) { errx(EXIT_FAILURE, “%s”, *msg); }

static bperrhandt errorhandler = defaulterror_handler;

public bperrhandt bpseterrorhandler(bperrhandt newhandler) { bperrhandt oldhandler = errorhandler; errorhandler = newhandler; return old_handler; }

#define MATCHES(…)
(bpmatcht *[]) { VA_ARGS, NULL }

attribute((hot, nonnull(1, 2, 3))) static bpmatcht *match(matchctxt *ctx, const char *str, bppatt *pat); attribute((returnsnonnull)) static bpmatcht *newmatch(bppatt *pat, const char *start, const char *end, bpmatcht *children[]);

char *error_message = NULL;

attribute((format(printf, 2, 3))) static inline void matcherror(matchctxt *ctx, const char *fmt, …) { valist args; vastart(args, fmt); if (errormessage) free(errormessage); vasprintf(&errormessage, fmt, args); vaend(args); longjmp(ctx->errorjump, 1); }

static bpmatcht *clonematch(bpmatcht *m) { if (!m) return NULL; bpmatcht *ret = newmatch(m->pat, m->start, m->end, NULL); if (m->children) { sizet childcap = 0, nchildren = 0; if (!m->children[0] || !m->children[1] || !m->children[2]) { childcap = 3; ret->children = ret->children; } for (int i = 0; m->children[i]; i++) { if (nchildren + 1 >= childcap) { ret->children = grow(ret->children, childcap += 5); for (sizet j = nchildren; j < childcap; j++) ret->children[j] = NULL; } ret->children[nchildren++] = clone_match(m->children[i]); } } return ret; }

// Prepend to a doubly linked list static inline void gclistprepend(bpmatcht **head, bpmatcht *m) { if (m->gc.home) errx(1, “Node already has a home”); m->gc.home = head; m->gc.next = head; if (head) (*head)->gc.home = &m->gc.next; *head = m; }

// Remove from a doubly linked list static inline void gclistremove(bpmatcht *m) { if (!m->gc.home) errx(1, “Attempt to remove something that isn’t in a list”); *m->gc.home = m->gc.next; if (m->gc.next) m->gc.next->gc.home = m->gc.home; m->gc.home = NULL; m->gc.next = NULL; }

// // Hash a string position/pattern. // static inline sizet hash(const char *str, sizet patid) { return (sizet)str + 2 * pat_id; }

// // Check if we have cached a failure to match a given pattern at the given position. // static bool hascachedfailure(matchctxt *ctx, const char *str, bppatt *pat) { if (!ctx->cache->fails) return false; for (cacheentryt *fail = &ctx->cache->fails[hash(str, pat->id) & (ctx->cache->size - 1)]; fail; fail = fail->next_probe) { if (fail->pat == pat && fail->start == str) return true; } return false; }

// // Insert into the hash table using a chained scatter table approach. // static void hashinsert(cachet *cache, const char *str, bppatt *pat) { sizet h = hash(str, pat->id) & (cache->size - 1); if (cache->fails[h].pat == NULL) { // No collision cache->fails[h].pat = pat; cache->fails[h].start = str; cache->fails[h].next_probe = NULL; ++cache->occupancy; return; }

if (cache->fails[h].pat == pat && cache->fails[h].start == str) return; // Duplicate entry, just leave it be

// Shuffle the colliding entry along to a free space:
while (cache->fails[cache->next_free].pat)
    ++cache->next_free;
cache_entry_t *free_slot = &cache->fails[cache->next_free];
*free_slot = cache->fails[h];
size_t h_orig = hash(free_slot->start, free_slot->pat->id) & (cache->size - 1);

// Put the new entry in its desired slot
cache->fails[h].pat = pat;
cache->fails[h].start = str;
cache->fails[h].next_probe = h_orig == h ? free_slot : NULL;
++cache->occupancy;

if (h_orig != h) { // Maintain the chain that points to the colliding entry
    cache_entry_t *prev = &cache->fails[h_orig]; // Start of the chain
    while (prev->next_probe != &cache->fails[h])
        prev = prev->next_probe;
    prev->next_probe = free_slot;
}

}

// // Save a match in the cache. // static void cachefailure(matchctxt *ctx, const char *str, bppatt *pat) { cachet *cache = ctx->cache; // Grow the hash if needed (>99% utilization): if (cache->occupancy + 1 > (cache->size * 99) / 100) { cacheentryt *oldfails = cache->fails; sizet oldsize = cache->size; cache->size = oldsize == 0 ? 16 : 2 * oldsize; cache->fails = new (cacheentryt[cache->size]); cache->nextfree = 0;

    // Rehash:
    for (size_t i = 0; i < old_size; i++) {
        if (old_fails[i].pat) _hash_insert(cache, old_fails[i].start, old_fails[i].pat);
    }
    if (old_fails) delete (&old_fails);
}

_hash_insert(cache, str, pat);

}

// // Clear and deallocate the cache. // void cachedestroy(matchctxt *ctx) { cachet *cache = ctx->cache; if (cache->fails) delete (&cache->fails); memset(cache, 0, sizeof(cache_t)); }

// // Look up a pattern definition by name from a definition pattern. // static bppatt *lookupdef(matchctxt *ctx, bppatt *defs, const char *name, sizet namelen) { while (defs != NULL) { if (defs->type == BPCHAIN) { auto chain = When(defs, BPCHAIN); bppat_t *second = lookupdef(ctx, chain->second, name, namelen); if (second) return second; defs = chain->first; } else if (defs->type == BPDEFINITIONS) { auto def = When(defs, BPDEFINITIONS); if (namelen == def->namelen && strncmp(def->name, name, namelen) == 0) return def->meaning; defs = def->nextdef; } else { matcherror(ctx, “Invalid pattern type in definitions”); return NULL; } } return NULL; }

// // Look up a pattern definition by name from a context. // attribute((nonnull(2))) bppatt *lookupctx(matchctxt *ctx, const char *name, sizet namelen) { for (; ctx; ctx = ctx->parentctx) { bppat_t *def = lookupdef(ctx, ctx->defs, name, namelen); if (def) return def; } return NULL; }

// // If the given pattern is a reference, look it up and return the referenced // pattern. This is used for an optimization to avoid repeated lookups. // attribute((nonnull(1))) static inline bppatt *deref(matchctxt *ctx, bppatt *pat) { if (pat && pat->type == BPREF) { auto ref = When(pat, BPREF); bppatt *def = lookup_ctx(ctx, ref->name, ref->len); if (def) return def; } return pat; }

// // Find and return the first and simplest pattern that will definitely have to // match for the whole pattern to match (if any). Ideally, this would be a // string literal that can be quickly scanned for. // static bppatt *getprerequisite(matchctxt *ctx, bppatt *pat) { int derefs = 0; for (bppatt *p = pat; p;) { switch (p->type) { case BPBEFORE: p = When(p, BPBEFORE)->pat; break; case BPREPEAT: if (When(p, BPREPEAT)->min == 0) return p; p = When(p, BPREPEAT)->repeatpat; break; case BPCAPTURE: p = When(p, BPCAPTURE)->pat; break; case BPTAGGED: p = When(p, BPTAGGED)->pat; break; case BPCHAIN: { auto chain = When(p, BPCHAIN); // If pattern is something like (|"foo”|), then use “foo” as the first thing to scan for p = chain->first->maxmatchlen == 0 ? chain->second : chain->first; break; } case BPMATCH: p = When(p, BPMATCH)->pat; break; case BPNOTMATCH: p = When(p, BPNOTMATCH)->pat; break; case BPREPLACE: p = When(p, BPREPLACE)->pat; break; case BPREF: { if (++derefs > 10) return p; // In case of left recursion bppat_t *p2 = deref(ctx, p); if (p2 == p) return p2; p = p2; break; } default: return p; } } return pat; }

// // Find the next match after prev (or the first match if prev is NULL) // attribute((nonnull(1, 2, 3))) static bpmatcht *nextmatch(matchctxt *ctx, const char *str, bppatt *pat, bppatt *skip) { // Clear the cache so it’s not full of old cache values from different parts of the file: cache_destroy(ctx);

bp_pat_t *first = get_prerequisite(ctx, pat);

// Don't bother looping if this can only match at the start/end:
if (first->type == BP_START_OF_FILE) return match(ctx, str, pat);
else if (first->type == BP_END_OF_FILE) return match(ctx, ctx->end, pat);

// Performance optimization: if the pattern starts with a string literal,
// we can just rely on the highly optimized memmem() implementation to skip
// past areas where we know we won't find a match.
if (!skip && first->type == BP_STRING && first->min_matchlen > 0) {
    char *found = ctx->ignorecase ? strcasestr(str, When(first, BP_STRING)->string)
                                  : memmem(str, (size_t)(ctx->end - str), When(first, BP_STRING)->string,
                                           strlen(When(first, BP_STRING)->string));
    str = found ? found : ctx->end;
} else if (!skip && str > ctx->start && (first->type == BP_START_OF_LINE || first->type == BP_END_OF_LINE)) {
    char *found = memchr(str, '\n', (size_t)(ctx->end - str));
    str = found ? (first->type == BP_START_OF_LINE ? found + 1 : found) : ctx->end;
}

do {
    bp_match_t *m = match(ctx, str, pat);
    if (m) return m;
    bp_match_t *skipped = skip ? match(ctx, str, skip) : NULL;
    if (skipped) {
        str = skipped->end > str ? skipped->end : str + 1;
        recycle_match(&skipped);
    } else str = next_char(str, ctx->end);
} while (str < ctx->end);
return NULL;

}

// // Attempt to match the given pattern against the input string and return a // match object, or NULL if no match is found. // The returned value should be free()’d to avoid memory leaking. // static bpmatcht *match(matchctxt *ctx, const char *str, bppatt *pat) { switch (pat->type) { case BPDEFINITIONS: { matchctxt ctx2 = *ctx; ctx2.cache = &(cachet){0}; ctx2.parentctx = ctx; ctx2.defs = pat; bpmatcht *m = match(&ctx2, str, When(pat, BPDEFINITIONS)->meaning); cachedestroy(&ctx2); return m; } case BPLEFTRECURSION: { // Left recursion occurs when a pattern directly or indirectly // invokes itself at the same position in the text. It’s handled as // a special case, but if a pattern invokes itself at a later // point, it can be handled with normal recursion. // See: left-recursion.md for more details. auto leftrec = When(pat, BPLEFTRECURSION); if (str == leftrec->at) { leftrec->visited = true; return clonematch(leftrec->match); } else { return match(leftrec->ctx, str, leftrec->fallback); } } case BPANYCHAR: { return (str < ctx->end && *str != ‘\n’) ? newmatch(pat, str, nextchar(str, ctx->end), NULL) : NULL; } case BPIDSTART: { return (str < ctx->end && isidstart(str, ctx->end)) ? newmatch(pat, str, nextchar(str, ctx->end), NULL) : NULL; } case BPIDCONTINUE: { return (str < ctx->end && isidcontinue(str, ctx->end)) ? newmatch(pat, str, nextchar(str, ctx->end), NULL) : NULL; } case BPSTARTOFFILE: { return (str == ctx->start) ? newmatch(pat, str, str, NULL) : NULL; } case BPSTARTOFLINE: { return (str == ctx->start || str[-1] == ‘\n’) ? newmatch(pat, str, str, NULL) : NULL; } case BPENDOFFILE: { return (str == ctx->end || (str == ctx->end - 1 && *str == ‘\n’)) ? newmatch(pat, str, str, NULL) : NULL; } case BPENDOFLINE: { return (str == ctx->end || str == ‘\n’) ? newmatch(pat, str, str, NULL) : NULL; } case BPWORDBOUNDARY: { return (str == ctx->start || isidcontinue(str, ctx->end) != isidcontinue(prevchar(ctx->start, str), ctx->end)) ? newmatch(pat, str, str, NULL) : NULL; } case BPSTRING: { if (&str[pat->minmatchlen] > ctx->end) return NULL; if (pat->minmatchlen > 0 && (ctx->ignorecase ? strncasecmp : strncmp)(str, When(pat, BPSTRING)->string, pat->minmatchlen) != 0) return NULL; return newmatch(pat, str, str + pat->minmatchlen, NULL); } case BPRANGE: { if (str >= ctx->end) return NULL; auto range = When(pat, BPRANGE); if ((unsigned char)str < range->low || (unsigned char)*str > range->high) return NULL; return newmatch(pat, str, str + 1, NULL); } case BPNOT: { bpmatcht *m = match(ctx, str, When(pat, BPNOT)->pat); if (m != NULL) { recyclematch(&m); return NULL; } return newmatch(pat, str, str, NULL); } case BPUPTO: case BPUPTOSTRICT: { bpmatcht *m = newmatch(pat, str, str, NULL); bppatt *target = deref(ctx, pat->type == BPUPTO ? When(pat, BPUPTO)->target : When(pat, BPUPTOSTRICT)->target), *skip = deref(ctx, pat->type == BPUPTO ? When(pat, BPUPTO)->skip : When(pat, BPUPTO_STRICT)->skip); if (!target && !skip) { while (str < ctx->end && *str != ‘\n’) ++str; m->end = str; return m; }

    size_t child_cap = 0, nchildren = 0;
    for (const char *prev = NULL; prev < str;) {
        prev = str;
        if (target) {
            bp_match_t *p = match(ctx, str, target);
            if (p != NULL) {
                recycle_match(&p);
                m->end = str;
                return m;
            }
        } else if (str == ctx->end || *str == '\n') {
            m->end = str;
            return m;
        }
        if (skip) {
            bp_match_t *s = match(ctx, str, skip);
            if (s != NULL) {
                str = s->end;
                if (nchildren + 2 >= child_cap) {
                    m->children = grow(m->children, child_cap += 5);
                    for (size_t i = nchildren; i < child_cap; i++)
                        m->children[i] = NULL;
                }
                m->children[nchildren++] = s;
                continue;
            }
        }
        // This isn't in the for() structure because there needs to
        // be at least once chance to match the pattern, even if
        // we're at the end of the string already (e.g. "..$").
        if (str < ctx->end && *str != '\n' && pat->type != BP_UPTO_STRICT) str = next_char(str, ctx->end);
    }
    recycle_match(&m);
    return NULL;
}
case BP_REPEAT: {
    bp_match_t *m = new_match(pat, str, str, NULL);
    size_t reps = 0;
    auto repeat = When(pat, BP_REPEAT);
    bp_pat_t *repeating = deref(ctx, repeat->repeat_pat);
    bp_pat_t *sep = deref(ctx, repeat->sep);
    size_t child_cap = 0, nchildren = 0;
    for (reps = 0; repeat->max == -1 || reps < (size_t)repeat->max; ++reps) {
        const char *start = str;
        // Separator
        bp_match_t *msep = NULL;
        if (sep != NULL && reps > 0) {
            msep = match(ctx, str, sep);
            if (msep == NULL) break;
            str = msep->end;
        }
        bp_match_t *mp = match(ctx, str, repeating);
        if (mp == NULL) {
            str = start;
            if (msep) recycle_match(&msep);
            break;
        }
        if (mp->end == start && reps > 0) {
            // Since no forward progress was made on either `repeating`
            // or `sep` and BP does not have mutable state, it's
            // guaranteed that no progress will be made on the next
            // loop either. We know that this will continue to loop
            // until reps==max, so let's just cut to the chase instead
            // of looping infinitely.
            if (msep) recycle_match(&msep);
            recycle_match(&mp);
            if (repeat->max == -1) reps = ~(size_t)0;
            else reps = (size_t)repeat->max;
            break;
        }
        if (msep) {
            if (nchildren + 2 >= child_cap) {
                m->children = grow(m->children, child_cap += 5);
                for (size_t i = nchildren; i < child_cap; i++)
                    m->children[i] = NULL;
            }
            m->children[nchildren++] = msep;
        }

        if (nchildren + 2 >= child_cap) {
            m->children = grow(m->children, child_cap += 5);
            for (size_t i = nchildren; i < child_cap; i++)
                m->children[i] = NULL;
        }
        m->children[nchildren++] = mp;
        str = mp->end;
    }

    if (reps < (size_t)repeat->min) {
        recycle_match(&m);
        return NULL;
    }
    m->end = str;
    return m;
}
case BP_AFTER: {
    bp_pat_t *back = deref(ctx, When(pat, BP_AFTER)->pat);
    if (!back) return NULL;

    // We only care about the region from the backtrack pos up to the
    // current pos, so mock it out as a file slice.
    // TODO: this breaks ^/^^/$/$$, but that can probably be ignored
    // because you rarely need to check those in a backtrack.
    match_ctx_t slice_ctx = *ctx;
    slice_ctx.cache = &(cache_t){0};
    slice_ctx.start = ctx->start;
    slice_ctx.end = str;
    for (const char *pos = &str[-(long)back->min_matchlen];
         pos >= ctx->start && (back->max_matchlen == -1 || pos >= &str[-(int)back->max_matchlen]);
         pos = prev_char(ctx->start, pos)) {
        cache_destroy(&slice_ctx);
        slice_ctx.start = (char *)pos;
        bp_match_t *m = match(&slice_ctx, pos, back);
        // Match should not go past str (i.e. (<"AB" "B") should match "ABB", but not "AB")
        if (m && m->end != str) recycle_match(&m);
        else if (m) {
            cache_destroy(&slice_ctx);
            return new_match(pat, str, str, MATCHES(m));
        }
        if (pos == ctx->start) break;
        // To prevent extreme performance degradation, don't keep
        // walking backwards endlessly over newlines.
        if (back->max_matchlen == -1 && *pos == '\n') break;
    }
    cache_destroy(&slice_ctx);
    return NULL;
}
case BP_BEFORE: {
    bp_match_t *after = match(ctx, str, When(pat, BP_BEFORE)->pat);
    return after ? new_match(pat, str, str, MATCHES(after)) : NULL;
}
case BP_CAPTURE:
case BP_TAGGED: {
    bp_pat_t *to_match = pat->type == BP_CAPTURE ? When(pat, BP_CAPTURE)->pat : When(pat, BP_TAGGED)->pat;
    if (!to_match) return new_match(pat, str, str, NULL);
    bp_match_t *p = match(ctx, str, to_match);
    return p ? new_match(pat, str, p->end, MATCHES(p)) : NULL;
}
case BP_OTHERWISE: {
    bp_match_t *m = match(ctx, str, When(pat, BP_OTHERWISE)->first);
    return m ? m : match(ctx, str, When(pat, BP_OTHERWISE)->second);
}
case BP_CHAIN: {
    auto chain = When(pat, BP_CHAIN);
    if (chain->first->type == BP_DEFINITIONS) {
        match_ctx_t ctx2 = *ctx;
        ctx2.cache = &(cache_t){0};
        ctx2.parent_ctx = ctx;
        ctx2.defs = chain->first;
        bp_match_t *m = match(&ctx2, str, chain->second);
        cache_destroy(&ctx2);
        return m;
    }

    bp_match_t *m1 = match(ctx, str, chain->first);
    if (m1 == NULL) return NULL;

    bp_match_t *m2;
    // Push backrefs and run matching, then cleanup
    if (m1->pat->type == BP_CAPTURE && When(m1->pat, BP_CAPTURE)->name && When(m1->pat, BP_CAPTURE)->backreffable) {
        // Temporarily add a rule that the backref name matches the
        // exact string of the original match (no replacements)
        bp_pat_t *backref;
        if (m1->children && m1->children[0]->pat->type == BP_CURDENT) {
            const char *linestart = m1->start;
            while (linestart > ctx->start && linestart[-1] != '\n')
                --linestart;

            // Current indentation:
            char denter = *linestart;
            size_t dents = 0;
            if (denter == ' ' || denter == '\t') {
                while (linestart[dents] == denter && &linestart[dents] < ctx->end)
                    ++dents;
            }
            backref = bp_raw_literal(linestart, dents);
        } else {
            backref = bp_raw_literal(m1->start, (size_t)(m1->end - m1->start));
        }
        match_ctx_t ctx2 = *ctx;
        ctx2.cache = &(cache_t){0};
        ctx2.parent_ctx = ctx;
        ctx2.defs = &(bp_pat_t){
            .type = BP_DEFINITIONS,
            .start = m1->pat->start,
            .end = m1->pat->end,
            .__tagged.BP_DEFINITIONS =
                {
                    .name = When(m1->pat, BP_CAPTURE)->name,
                    .namelen = When(m1->pat, BP_CAPTURE)->namelen,
                    .meaning = backref,
                },
        };
        m2 = match(&ctx2, m1->end, chain->second);
        if (!m2) // No need to keep the backref in memory if it didn't match
            delete_pat(&backref, false);
        cache_destroy(&ctx2);
    } else {
        m2 = match(ctx, m1->end, chain->second);
    }

    if (m2 == NULL) {
        recycle_match(&m1);
        return NULL;
    }

    return new_match(pat, str, m2->end, MATCHES(m1, m2));
}
case BP_MATCH:
case BP_NOT_MATCH: {
    bp_pat_t *target = pat->type == BP_MATCH ? When(pat, BP_MATCH)->pat : When(pat, BP_NOT_MATCH)->pat;
    bp_match_t *m1 = match(ctx, str, target);
    if (m1 == NULL) return NULL;

    // <p1>~<p2> matches iff the text of <p1> matches <p2>
    // <p1>!~<p2> matches iff the text of <p1> does not match <p2>
    match_ctx_t slice_ctx = *ctx;
    slice_ctx.cache = &(cache_t){0};
    slice_ctx.start = m1->start;
    slice_ctx.end = m1->end;
    bp_match_t *ret = NULL, *m2 = NULL;
    if (pat->type == BP_MATCH) {
        m2 = _next_match(&slice_ctx, slice_ctx.start, When(pat, BP_MATCH)->must_match, NULL);
        if (m2) ret = new_match(pat, m1->start, m1->end, MATCHES(m1, m2));
    } else {
        m2 = _next_match(&slice_ctx, slice_ctx.start, When(pat, BP_NOT_MATCH)->must_not_match, NULL);
        if (!m2) ret = new_match(pat, m1->start, m1->end, MATCHES(m1));
    }
    cache_destroy(&slice_ctx);
    if (!ret) {
        if (m2) recycle_match(&m2);
        recycle_match(&m1);
    }
    return ret;
}
case BP_REPLACE: {
    bp_match_t *p = NULL;
    auto replace = When(pat, BP_REPLACE);
    if (replace->pat) {
        p = match(ctx, str, replace->pat);
        if (p == NULL) return NULL;
    }
    return new_match(pat, str, p ? p->end : str, MATCHES(p));
}
case BP_REF: {
    if (has_cached_failure(ctx, str, pat)) return NULL;

    auto ref_pat = When(pat, BP_REF);
    bp_pat_t *ref = lookup_ctx(ctx, ref_pat->name, ref_pat->len);
    if (ref == NULL) {
        match_error(ctx, "Unknown pattern: '%.*s'", (int)ref_pat->len, ref_pat->name);
        return NULL;
    }

    if (ref->type == BP_LEFTRECURSION) return match(ctx, str, ref);

    bp_pat_t rec_op = {
        .type = BP_LEFTRECURSION,
        .start = ref->start,
        .end = ref->end,
        .min_matchlen = 0,
        .max_matchlen = -1,
        .__tagged.BP_LEFTRECURSION =
            {
                .match = NULL,
                .visited = false,
                .at = str,
                .fallback = pat,
                .ctx = (void *)ctx,
            },
    };
    match_ctx_t ctx2 = *ctx;
    ctx2.parent_ctx = ctx;
    ctx2.defs = &(bp_pat_t){
        .type = BP_DEFINITIONS,
        .start = pat->start,
        .end = pat->end,
        .__tagged.BP_DEFINITIONS =
            {
                .name = ref_pat->name,
                .namelen = ref_pat->len,
                .meaning = &rec_op,
            },
    };

    bp_match_t *m = match(&ctx2, str, ref);
    // If left recursion was involved, keep retrying while forward progress can be made:
    if (m && rec_op.__tagged.BP_LEFTRECURSION.visited) {
        while (1) {
            const char *prev = m->end;
            rec_op.__tagged.BP_LEFTRECURSION.match = m;
            ctx2.cache = &(cache_t){0};
            bp_match_t *m2 = match(&ctx2, str, ref);
            cache_destroy(&ctx2);
            if (!m2) break;
            if (m2->end <= prev) {
                recycle_match(&m2);
                break;
            }
            recycle_match(&m);
            m = m2;
        }
    }

    if (!m) {
        cache_failure(ctx, str, pat);
        return NULL;
    }

    // This match wrapper mainly exists for record-keeping purposes.
    // It also helps with visualization of match results.
    // OPTIMIZE: remove this if necessary
    return new_match(pat, m->start, m->end, MATCHES(m));
}
case BP_NODENT: {
    if (*str != '\n') return NULL;
    const char *start = str;

    const char *p = str;
    while (p > ctx->start && p[-1] != '\n')
        --p;

    // Current indentation:
    char denter = *p;
    int dents = 0;
    if (denter == ' ' || denter == '\t') {
        for (; *p == denter && p < ctx->end; ++p)
            ++dents;
    }

    // Subsequent indentation:
    while (*str == '\n' || *str == '\n')
        ++str;
    for (int i = 0; i < dents; i++)
        if (&str[i] >= ctx->end || str[i] != denter) return NULL;

    return new_match(pat, start, &str[dents], NULL);
}
case BP_CURDENT: {
    return new_match(pat, str, str, NULL);
}
default: {
    match_error(ctx, "Unknown pattern type: %u", pat->type);
    return NULL;
}
}

}

// // Return a match object which can be used (may be allocated or recycled). // bpmatcht *newmatch(bppatt *pat, const char *start, const char *end, bpmatcht *children[]) { bpmatcht *m; if (unusedmatches) { m = unusedmatches; gclistremove(m); memset(m, 0, sizeof(bpmatcht)); } else { m = new (bpmatcht); } // Keep track of the object: gclistprepend(&inuse_matches, m);

m->pat = pat;
m->start = start;
m->end = end;

if (children) {
    for (int i = 0; children[i]; i++)
        m->_children[i] = children[i];
    m->children = m->_children;
}
return m;

}

// // If the given match is not currently a child member of another match (or // otherwise reserved) then put it back in the pool of unused match objects. // public void recyclematch(bpmatcht **atm) { bpmatcht *m = *atm; if (m->children) { for (int i = 0; m->children[i]; i++) recyclematch(&m->children[i]); if (m->children != m->_children) delete (&m->children); }

gc_list_remove(m);
(void)memset(m, 0, sizeof(bp_match_t));
gc_list_prepend(&unused_matches, m);
*at_m = NULL;

}

// // Force all match objects into the pool of unused match objects. // public sizet recycleallmatches(void) { sizet count = 0; for (bpmatcht *m; (m = inusematches); ++count) { gclistremove(m); if (m->children && m->children != m->children) delete (&m->children); gclistprepend(&unusedmatches, m); } return count; }

// // Free all match objects in memory. // public sizet freeallmatches(void) { sizet count = 0; recycleallmatches(); for (bpmatcht *m; (m = unusedmatches); ++count) { gclist_remove(m); delete (&m); } return count; }

// // Iterate over matches. // Usage: for (bpmatcht *m = NULL; nextmatch(&m, …); ) {…} // public bool nextmatch(bpmatcht **m, const char *start, const char *end, bppatt *pat, bppatt *defs, bppatt *skip, bool ignorecase) { const char pos; if (m) { // Make sure forward progress is occurring, even after zero-width matches: pos = ((m)->end > (m)->start) ? (m)->end : (m)->end + 1; recycle_match(m); } else { pos = start; }

if (!pat) {
    error_handler = default_error_handler;
    return false;
}

match_ctx_t ctx = {
    .cache = &(cache_t){0},
    .start = start,
    .end = end,
    .ignorecase = ignorecase,
    .defs = defs,
};
if (setjmp(ctx.error_jump) == 0) {
    *m = (pos <= end) ? _next_match(&ctx, pos, pat, skip) : NULL;
    cache_destroy(&ctx);
} else {
    recycle_all_matches();
    cache_destroy(&ctx);
    *m = NULL;
    if (error_handler) error_handler(&error_message);

    if (error_message) {
        free(error_message);
        error_message = NULL;
    }
}
return *m != NULL;

}

// // Helper function to track state while doing a depth-first search. // attribute((nonnull)) static bpmatcht *getnumberedcapture(bpmatcht *m, int *n) { if ((m->pat->type == BPCAPTURE && When(m->pat, BPCAPTURE)->namelen == 0) || m->pat->type == BPTAGGED) { if (n == 1) { return m; } else { –(n); return NULL; } }

if (m->pat->type == BP_CAPTURE || m->pat->type == BP_TAGGED) return NULL;

if (m->children) {
    for (int i = 0; m->children[i]; i++) {
        bp_match_t *cap = _get_numbered_capture(m->children[i], n);
        if (cap) return cap;
    }
}
return NULL;

}

// // Get a specific numbered pattern capture. // public bpmatcht *getnumberedcapture(bpmatcht *m, int n) { if (n <= 0) return m; if (m->pat->type == BPTAGGED || m->pat->type == BPCAPTURE) { if (n == 1 && m->pat->type == BPCAPTURE && When(m->pat, BPCAPTURE)->namelen == 0) return m; if (m->children) { for (int i = 0; m->children[i]; i++) { bpmatcht *cap = getnumbered_capture(m->children[i], &n); if (cap) return cap; } } return NULL; } else { return getnumbered_capture(m, &n); } }

// // Helper function for getnamedcapture() // bpmatcht *getnamedcapture(bpmatcht *m, const char *name, sizet namelen) { if (m->pat->type == BPCAPTURE && When(m->pat, BPCAPTURE)->name && When(m->pat, BPCAPTURE)->namelen == namelen && strncmp(When(m->pat, BPCAPTURE)->name, name, When(m->pat, BP_CAPTURE)->namelen) == 0) return m;

if (m->pat->type == BP_TAGGED || m->pat->type == BP_CAPTURE) return NULL;

if (m->children) {
    for (int i = 0; m->children[i]; i++) {
        bp_match_t *cap = _get_named_capture(m->children[i], name, namelen);
        if (cap) return cap;
    }
}
return NULL;

}

// // Get a capture with a specific name. // public bpmatcht *getnamedcapture(bpmatcht *m, const char *name, ssize_t namelen) { sizet namelen = namelen < 0 ? strlen(name) : (sizet)namelen; if (m->pat->type == BPTAGGED) { // || (m->pat->type == BPCAPTURE && m->pat->args.capture.namelen > 0)) { if (m->children) { for (int i = 0; m->children[i]; i++) { bpmatch_t *cap = getnamed_capture(m->children[i], name, namelen); if (cap) return cap; } } return NULL; } else { return getnamed_capture(m, name, namelen); } return NULL; }

// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,:0