2021-01-12 21:04:43 -08:00
|
|
|
//
|
2021-01-15 19:30:21 -08:00
|
|
|
// match.c - Code for the BP virtual machine that performs the matching.
|
2021-01-12 21:04:43 -08:00
|
|
|
//
|
2020-09-16 20:38:58 -07:00
|
|
|
|
|
|
|
#include <ctype.h>
|
2021-01-26 17:54:23 -08:00
|
|
|
#include <err.h>
|
2021-07-26 20:59:45 -07:00
|
|
|
#include <limits.h>
|
2021-01-18 11:39:20 -08:00
|
|
|
#include <stdbool.h>
|
2020-12-27 19:48:52 -08:00
|
|
|
#include <stdio.h>
|
2020-12-14 22:13:47 -08:00
|
|
|
#include <stdlib.h>
|
|
|
|
#include <string.h>
|
2020-09-16 20:38:58 -07:00
|
|
|
|
2021-01-15 19:35:39 -08:00
|
|
|
#include "definitions.h"
|
2021-01-15 19:44:16 -08:00
|
|
|
#include "match.h"
|
2021-07-17 13:54:26 -07:00
|
|
|
#include "pattern.h"
|
2020-12-14 22:01:50 -08:00
|
|
|
#include "types.h"
|
2020-09-11 01:28:06 -07:00
|
|
|
#include "utils.h"
|
2021-05-20 15:27:24 -07:00
|
|
|
#include "utf8.h"
|
2020-09-11 01:28:06 -07:00
|
|
|
|
2021-01-14 19:21:31 -08:00
|
|
|
// New match objects are either recycled from unused match objects or allocated
|
|
|
|
// from the heap. While it is in use, the match object is stored in the
|
|
|
|
// `in_use_matches` linked list. Once it is no longer needed, it is moved to
|
|
|
|
// the `unused_matches` linked list so it can be reused without the need for
|
|
|
|
// additional calls to malloc/free. Thus, it is an invariant that every match
|
|
|
|
// object is in one of these two lists:
|
2021-01-14 19:43:30 -08:00
|
|
|
static match_t *unused_matches = NULL;
|
|
|
|
static match_t *in_use_matches = NULL;
|
2021-01-14 19:21:31 -08:00
|
|
|
|
2021-07-26 20:59:45 -07:00
|
|
|
typedef struct {
|
|
|
|
size_t size, occupancy;
|
|
|
|
match_t **matches;
|
|
|
|
} cache_t;
|
|
|
|
|
2021-07-26 23:56:04 -07:00
|
|
|
#define MAX_CACHE_SIZE (1<<14)
|
2021-07-26 20:59:45 -07:00
|
|
|
|
2021-07-30 13:36:29 -07:00
|
|
|
#define MATCHES(...) (match_t*[]){__VA_ARGS__, NULL}
|
|
|
|
|
2021-07-26 20:59:45 -07:00
|
|
|
cache_t cache = {0, 0, NULL};
|
|
|
|
|
2021-05-31 12:38:42 -07:00
|
|
|
__attribute__((nonnull(1)))
|
2021-05-11 12:38:58 -07:00
|
|
|
static inline pat_t *deref(def_t *defs, pat_t *pat);
|
2021-01-18 10:30:17 -08:00
|
|
|
__attribute__((returns_nonnull))
|
2021-07-26 20:59:45 -07:00
|
|
|
static match_t *new_match(def_t *defs, pat_t *pat, const char *start, const char *end, match_t *children[]);
|
2021-01-12 22:33:28 -08:00
|
|
|
__attribute__((nonnull))
|
|
|
|
static match_t *get_capture_by_num(match_t *m, int *n);
|
|
|
|
__attribute__((nonnull, pure))
|
|
|
|
static match_t *get_capture_by_name(match_t *m, const char *name);
|
2021-01-18 09:15:25 -08:00
|
|
|
__attribute__((hot, nonnull(2,3,4)))
|
2021-01-18 10:30:17 -08:00
|
|
|
static match_t *match(def_t *defs, file_t *f, const char *str, pat_t *pat, bool ignorecase);
|
2020-09-12 18:20:13 -07:00
|
|
|
|
2021-07-26 20:59:45 -07:00
|
|
|
// Store a value and update its refcount
|
|
|
|
static inline void add_owner(match_t** owner, match_t* owned)
|
|
|
|
{
|
|
|
|
if (*owner != NULL)
|
|
|
|
errx(EXIT_FAILURE, "Ownership is being overwritten");
|
|
|
|
*owner = owned;
|
|
|
|
++owned->refcount;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Unstore a value and update its refcount
|
|
|
|
static inline void remove_ownership(match_t** owner)
|
|
|
|
{
|
|
|
|
if (*owner) {
|
|
|
|
--(*owner)->refcount;
|
|
|
|
if ((*owner)->refcount == 0)
|
|
|
|
recycle_if_unused(owner);
|
|
|
|
*owner = NULL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-07-30 13:36:29 -07:00
|
|
|
// Prepend to a doubly linked list
|
|
|
|
static inline void list_prepend(match_t **head, match_t *m, match_dll_t *node)
|
|
|
|
{
|
|
|
|
if (node->home)
|
|
|
|
errx(1, "Node already has a home");
|
|
|
|
node->home = head;
|
|
|
|
node->next = *head;
|
|
|
|
if (*head) {
|
|
|
|
match_dll_t *head_node = (match_dll_t*)((char*)(*head) + ((char*)node - (char*)m));
|
|
|
|
head_node->home = &node->next;
|
|
|
|
}
|
|
|
|
*head = m;
|
|
|
|
}
|
2021-07-26 20:59:45 -07:00
|
|
|
|
2021-07-30 13:36:29 -07:00
|
|
|
// Remove from a doubly linked list
|
|
|
|
static inline void list_remove(match_t *m, match_dll_t *node)
|
|
|
|
{
|
|
|
|
if (!node->home)
|
|
|
|
errx(1, "Attempt to remove something that isn't in a list");
|
|
|
|
*node->home = node->next;
|
|
|
|
if (node->next) {
|
|
|
|
match_dll_t *next_node = (match_dll_t*)((char*)(node->next) + ((char*)node - (char*)m));
|
|
|
|
next_node->home = node->home;
|
|
|
|
}
|
|
|
|
node->home = NULL;
|
|
|
|
node->next = NULL;
|
|
|
|
}
|
2021-07-26 20:59:45 -07:00
|
|
|
|
2021-07-27 00:09:04 -07:00
|
|
|
static inline size_t hash(const char *str, pat_t *pat)
|
2021-07-26 20:59:45 -07:00
|
|
|
{
|
2021-07-26 23:29:51 -07:00
|
|
|
return (size_t)str + 2*pat->id;
|
2021-07-26 20:59:45 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
static match_t *cache_lookup(def_t *defs, const char *str, pat_t *pat)
|
|
|
|
{
|
2021-07-26 23:29:51 -07:00
|
|
|
if (!cache.matches) return NULL;
|
2021-07-26 23:56:04 -07:00
|
|
|
size_t h = hash(str, pat) & (cache.size-1);
|
2021-07-30 13:36:29 -07:00
|
|
|
for (match_t *c = cache.matches[h]; c; c = c->cache.next) {
|
2021-07-27 00:16:15 -07:00
|
|
|
if (c->pat == pat && c->defs_id == defs->id && c->start == str)
|
2021-07-26 20:59:45 -07:00
|
|
|
return c;
|
|
|
|
}
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void cache_remove(match_t *m)
|
|
|
|
{
|
2021-07-30 13:36:29 -07:00
|
|
|
if (!m->cache.home) return;
|
|
|
|
*m->cache.home = m->cache.next;
|
|
|
|
if (m->cache.next) m->cache.next->cache.home = m->cache.home;
|
|
|
|
m->cache.next = NULL;
|
|
|
|
m->cache.home = NULL;
|
2021-07-26 20:59:45 -07:00
|
|
|
remove_ownership(&m);
|
|
|
|
--cache.occupancy;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void cache_save(match_t *m)
|
|
|
|
{
|
2021-07-27 00:09:04 -07:00
|
|
|
if (cache.occupancy+1 > 3*cache.size) {
|
2021-07-26 23:56:04 -07:00
|
|
|
if (cache.size == MAX_CACHE_SIZE) {
|
|
|
|
size_t h = hash(m->start, m->pat) & (cache.size-1);
|
2021-07-27 00:09:04 -07:00
|
|
|
for (int quota = 2; cache.matches[h] && quota > 0; quota--) {
|
2021-07-26 23:56:04 -07:00
|
|
|
match_t *last = cache.matches[h];
|
2021-07-30 13:36:29 -07:00
|
|
|
while (last->cache.next) last = last->cache.next;
|
2021-07-26 23:56:04 -07:00
|
|
|
cache_remove(last);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
cache_t old_cache = cache;
|
|
|
|
cache.size = old_cache.size == 0 ? 16 : 2*old_cache.size;
|
2021-07-30 14:44:00 -07:00
|
|
|
cache.matches = new(match_t*[cache.size]);
|
2021-07-26 23:56:04 -07:00
|
|
|
|
|
|
|
// Rehash:
|
|
|
|
if (old_cache.matches) {
|
|
|
|
for (size_t i = 0; i < old_cache.size; i++) {
|
|
|
|
for (match_t *o; (o = old_cache.matches[i]); ) {
|
2021-07-30 13:36:29 -07:00
|
|
|
*o->cache.home = o->cache.next;
|
|
|
|
if (o->cache.next) o->cache.next->cache.home = o->cache.home;
|
2021-07-26 23:56:04 -07:00
|
|
|
size_t h = hash(o->start, o->pat) & (cache.size-1);
|
2021-07-30 13:36:29 -07:00
|
|
|
o->cache.home = &(cache.matches[h]);
|
|
|
|
o->cache.next = cache.matches[h];
|
|
|
|
if (cache.matches[h]) cache.matches[h]->cache.home = &o->cache.next;
|
2021-07-26 23:56:04 -07:00
|
|
|
cache.matches[h] = o;
|
2021-07-26 20:59:45 -07:00
|
|
|
}
|
|
|
|
}
|
2021-07-26 23:56:04 -07:00
|
|
|
free(old_cache.matches);
|
2021-07-26 20:59:45 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2021-07-26 23:29:51 -07:00
|
|
|
|
2021-07-26 23:56:04 -07:00
|
|
|
size_t h = hash(m->start, m->pat) & (cache.size-1);
|
2021-07-30 13:36:29 -07:00
|
|
|
m->cache.home = &(cache.matches[h]);
|
|
|
|
m->cache.next = cache.matches[h];
|
|
|
|
if (cache.matches[h]) cache.matches[h]->cache.home = &m->cache.next;
|
2021-07-26 20:59:45 -07:00
|
|
|
cache.matches[h] = NULL;
|
|
|
|
add_owner(&cache.matches[h], m);
|
|
|
|
++cache.occupancy;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void cache_prune(const char *start, const char *end)
|
|
|
|
{
|
|
|
|
if (!cache.matches) return;
|
|
|
|
for (size_t i = 0; i < cache.size; i++) {
|
|
|
|
for (match_t *m = cache.matches[i], *next = NULL; m; m = next) {
|
2021-07-30 13:36:29 -07:00
|
|
|
next = m->cache.next;
|
2021-07-26 20:59:45 -07:00
|
|
|
if (m->start < start || (m->end ? m->end : m->start) > end)
|
|
|
|
cache_remove(m);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void cache_destroy(void)
|
|
|
|
{
|
|
|
|
if (!cache.matches) return;
|
|
|
|
for (size_t i = 0; i < cache.size; i++) {
|
|
|
|
while (cache.matches[i])
|
|
|
|
cache_remove(cache.matches[i]);
|
|
|
|
}
|
|
|
|
cache.occupancy = 0;
|
|
|
|
xfree(&cache.matches);
|
|
|
|
cache.size = 0;
|
|
|
|
}
|
|
|
|
|
2021-05-11 12:38:58 -07:00
|
|
|
//
|
|
|
|
// If the given pattern is a reference, look it up and return the referenced
|
|
|
|
// pattern. This is used for an optimization to avoid repeated lookups.
|
|
|
|
//
|
|
|
|
static inline pat_t *deref(def_t *defs, pat_t *pat)
|
|
|
|
{
|
2021-05-31 12:38:42 -07:00
|
|
|
if (pat && pat->type == BP_REF) {
|
2021-05-11 12:38:58 -07:00
|
|
|
def_t *def = lookup(defs, pat->args.ref.len, pat->args.ref.name);
|
|
|
|
if (def) pat = def->pat;
|
|
|
|
}
|
|
|
|
return pat;
|
|
|
|
}
|
2020-09-25 08:41:29 -07:00
|
|
|
|
2021-07-17 15:25:24 -07:00
|
|
|
//
|
2021-07-17 16:01:18 -07:00
|
|
|
// Find and return the first and simplest pattern that will definitely have to
|
|
|
|
// match for the whole pattern to match (if any)
|
2021-07-17 15:25:24 -07:00
|
|
|
//
|
2021-07-17 16:01:18 -07:00
|
|
|
static pat_t *first_pat(def_t *defs, pat_t *pat)
|
2021-07-17 15:25:24 -07:00
|
|
|
{
|
|
|
|
for (pat_t *p = pat; p; ) {
|
2021-07-17 16:01:18 -07:00
|
|
|
switch (p->type) {
|
|
|
|
case BP_BEFORE:
|
|
|
|
p = p->args.pat; break;
|
|
|
|
case BP_REPEAT:
|
|
|
|
if (p->args.repetitions.min == 0)
|
|
|
|
return p;
|
|
|
|
p = p->args.repetitions.repeat_pat; break;
|
|
|
|
case BP_CAPTURE:
|
|
|
|
p = p->args.capture.capture_pat; break;
|
|
|
|
case BP_CHAIN: case BP_MATCH: case BP_NOT_MATCH:
|
|
|
|
p = p->args.multiple.first; break;
|
|
|
|
case BP_REPLACE:
|
|
|
|
p = p->args.replace.pat; break;
|
2021-07-19 19:40:43 -07:00
|
|
|
case BP_REF: {
|
|
|
|
pat_t *p2 = deref(defs, p);
|
|
|
|
if (p2 == p) return p2;
|
|
|
|
p = p2;
|
|
|
|
break;
|
|
|
|
}
|
2021-07-17 16:01:18 -07:00
|
|
|
default: return p;
|
|
|
|
}
|
2021-07-17 15:25:24 -07:00
|
|
|
}
|
2021-07-17 16:01:18 -07:00
|
|
|
return pat;
|
2021-07-17 15:25:24 -07:00
|
|
|
}
|
|
|
|
|
2021-01-15 01:19:10 -08:00
|
|
|
//
|
|
|
|
// Find the next match after prev (or the first match if prev is NULL)
|
|
|
|
//
|
2021-01-20 16:12:46 -08:00
|
|
|
match_t *next_match(def_t *defs, file_t *f, match_t *prev, pat_t *pat, pat_t *skip, bool ignorecase)
|
2021-01-15 01:19:10 -08:00
|
|
|
{
|
|
|
|
const char *str;
|
|
|
|
if (prev) {
|
|
|
|
str = prev->end > prev->start ? prev->end : prev->end + 1;
|
2021-07-26 20:59:45 -07:00
|
|
|
if (prev->refcount == 0) recycle_if_unused(&prev);
|
|
|
|
cache_prune(str, f->end);
|
2021-01-15 01:19:10 -08:00
|
|
|
} else {
|
2021-05-20 18:31:28 -07:00
|
|
|
str = f->start;
|
2021-01-15 01:19:10 -08:00
|
|
|
}
|
2021-07-17 16:01:18 -07:00
|
|
|
|
|
|
|
pat = deref(defs, pat);
|
|
|
|
pat_t *first = first_pat(defs, pat);
|
2021-07-17 15:25:24 -07:00
|
|
|
|
|
|
|
// Performance optimization: if the pattern starts with a string literal,
|
|
|
|
// we can just rely on the highly optimized strstr()/strcasestr()
|
|
|
|
// implementations to skip past areas where we know we won't find a match.
|
2021-07-19 13:33:51 -07:00
|
|
|
if (!skip && first->type == BP_STRING) {
|
2021-07-17 16:01:18 -07:00
|
|
|
for (size_t i = 0; i < first->min_matchlen; i++)
|
|
|
|
if (first->args.string[i] == '\0')
|
2021-07-17 15:25:24 -07:00
|
|
|
goto pattern_search;
|
2021-07-17 16:01:18 -07:00
|
|
|
char *tmp = strndup(first->args.string, first->min_matchlen);
|
2021-07-17 15:25:24 -07:00
|
|
|
char *found = (ignorecase ? strcasestr : strstr)(str, tmp);
|
|
|
|
if (found)
|
|
|
|
str = found;
|
2021-07-17 16:01:18 -07:00
|
|
|
else
|
2021-07-17 16:47:05 -07:00
|
|
|
str += strlen(str); // Use += strlen here instead of f->end to handle files with NULL bytes
|
2021-07-17 15:25:24 -07:00
|
|
|
free(tmp);
|
|
|
|
}
|
|
|
|
|
|
|
|
pattern_search:
|
2021-07-19 19:40:43 -07:00
|
|
|
if (str > f->end) return NULL;
|
|
|
|
|
|
|
|
do {
|
2021-01-15 19:21:41 -08:00
|
|
|
match_t *m = match(defs, f, str, pat, ignorecase);
|
2021-01-15 01:19:10 -08:00
|
|
|
if (m) return m;
|
2021-07-17 16:01:18 -07:00
|
|
|
if (first->type == BP_START_OF_FILE) return NULL;
|
2021-01-20 16:12:46 -08:00
|
|
|
match_t *s;
|
|
|
|
if (skip && (s = match(defs, f, str, skip, ignorecase))) {
|
|
|
|
str = s->end > str ? s->end : str + 1;
|
|
|
|
recycle_if_unused(&s);
|
2021-07-19 19:40:43 -07:00
|
|
|
} else str = next_char(f, str);
|
|
|
|
} while (str < f->end);
|
2021-01-15 01:19:10 -08:00
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2021-01-12 21:04:43 -08:00
|
|
|
//
|
2021-01-15 19:21:41 -08:00
|
|
|
// Attempt to match the given pattern against the input string and return a
|
|
|
|
// match object, or NULL if no match is found.
|
2021-01-12 21:04:43 -08:00
|
|
|
// The returned value should be free()'d to avoid memory leaking.
|
|
|
|
//
|
2021-01-18 10:30:17 -08:00
|
|
|
static match_t *match(def_t *defs, file_t *f, const char *str, pat_t *pat, bool ignorecase)
|
2020-09-11 01:28:06 -07:00
|
|
|
{
|
2021-01-15 19:21:41 -08:00
|
|
|
switch (pat->type) {
|
2021-01-16 10:29:09 -08:00
|
|
|
case BP_LEFTRECURSION: {
|
2021-01-14 19:21:31 -08:00
|
|
|
// Left recursion occurs when a pattern directly or indirectly
|
|
|
|
// invokes itself at the same position in the text. It's handled as
|
|
|
|
// a special case, but if a pattern invokes itself at a later
|
|
|
|
// point, it can be handled with normal recursion.
|
|
|
|
// See: left-recursion.md for more details.
|
2021-01-15 19:21:41 -08:00
|
|
|
if (str == pat->args.leftrec.at) {
|
|
|
|
++pat->args.leftrec.visits;
|
|
|
|
return pat->args.leftrec.match;
|
2021-01-13 18:56:22 -08:00
|
|
|
} else {
|
2021-01-15 19:21:41 -08:00
|
|
|
return match(defs, f, str, pat->args.leftrec.fallback, ignorecase);
|
2021-01-13 18:56:22 -08:00
|
|
|
}
|
|
|
|
}
|
2021-01-16 10:29:09 -08:00
|
|
|
case BP_ANYCHAR: {
|
2021-07-26 20:59:45 -07:00
|
|
|
return (str < f->end && *str != '\n') ? new_match(defs, pat, str, next_char(f, str), NULL) : NULL;
|
2021-05-11 11:39:42 -07:00
|
|
|
}
|
2021-07-19 19:40:43 -07:00
|
|
|
case BP_ID_START: {
|
2021-07-26 20:59:45 -07:00
|
|
|
return (str < f->end && isidstart(f, str)) ? new_match(defs, pat, str, next_char(f, str), NULL) : NULL;
|
2021-07-19 19:40:43 -07:00
|
|
|
}
|
|
|
|
case BP_ID_CONTINUE: {
|
2021-07-26 20:59:45 -07:00
|
|
|
return (str < f->end && isidcontinue(f, str)) ? new_match(defs, pat, str, next_char(f, str), NULL) : NULL;
|
2021-07-19 19:40:43 -07:00
|
|
|
}
|
2021-05-11 11:39:42 -07:00
|
|
|
case BP_START_OF_FILE: {
|
2021-07-26 20:59:45 -07:00
|
|
|
return (str == f->start) ? new_match(defs, pat, str, str, NULL) : NULL;
|
2021-05-11 11:39:42 -07:00
|
|
|
}
|
|
|
|
case BP_START_OF_LINE: {
|
2021-07-26 20:59:45 -07:00
|
|
|
return (str == f->start || str[-1] == '\n') ? new_match(defs, pat, str, str, NULL) : NULL;
|
2021-05-11 11:39:42 -07:00
|
|
|
}
|
|
|
|
case BP_END_OF_FILE: {
|
2021-07-26 20:59:45 -07:00
|
|
|
return (str == f->end) ? new_match(defs, pat, str, str, NULL) : NULL;
|
2021-05-11 11:39:42 -07:00
|
|
|
}
|
|
|
|
case BP_END_OF_LINE: {
|
2021-07-26 20:59:45 -07:00
|
|
|
return (str == f->end || *str == '\n') ? new_match(defs, pat, str, str, NULL) : NULL;
|
2020-09-11 01:28:06 -07:00
|
|
|
}
|
2021-07-19 19:40:43 -07:00
|
|
|
case BP_WORD_BOUNDARY: {
|
2021-07-26 20:59:45 -07:00
|
|
|
return (isidcontinue(f, str) != isidcontinue(f, prev_char(f, str))) ? new_match(defs, pat, str, str, NULL) : NULL;
|
2021-07-19 19:40:43 -07:00
|
|
|
}
|
2021-01-16 10:29:09 -08:00
|
|
|
case BP_STRING: {
|
2021-05-20 15:27:24 -07:00
|
|
|
if (&str[pat->min_matchlen] > f->end) return NULL;
|
2021-05-20 16:21:33 -07:00
|
|
|
if (pat->min_matchlen > 0 && (ignorecase ? memicmp : memcmp)(str, pat->args.string, pat->min_matchlen) != 0)
|
2020-09-11 01:28:06 -07:00
|
|
|
return NULL;
|
2021-07-26 20:59:45 -07:00
|
|
|
return new_match(defs, pat, str, str + pat->min_matchlen, NULL);
|
2020-09-11 01:28:06 -07:00
|
|
|
}
|
2021-01-16 10:29:09 -08:00
|
|
|
case BP_RANGE: {
|
2020-12-19 18:51:30 -08:00
|
|
|
if (str >= f->end) return NULL;
|
2021-01-15 19:21:41 -08:00
|
|
|
if ((unsigned char)*str < pat->args.range.low || (unsigned char)*str > pat->args.range.high)
|
2020-09-11 01:28:06 -07:00
|
|
|
return NULL;
|
2021-07-26 20:59:45 -07:00
|
|
|
return new_match(defs, pat, str, str+1, NULL);
|
2020-09-11 01:28:06 -07:00
|
|
|
}
|
2021-01-16 10:29:09 -08:00
|
|
|
case BP_NOT: {
|
2021-01-15 19:21:41 -08:00
|
|
|
match_t *m = match(defs, f, str, pat->args.pat, ignorecase);
|
2020-09-11 01:28:06 -07:00
|
|
|
if (m != NULL) {
|
2021-01-14 19:21:31 -08:00
|
|
|
recycle_if_unused(&m);
|
2020-09-11 01:28:06 -07:00
|
|
|
return NULL;
|
|
|
|
}
|
2021-07-26 20:59:45 -07:00
|
|
|
return new_match(defs, pat, str, str, NULL);
|
2020-09-11 01:28:06 -07:00
|
|
|
}
|
2021-01-20 15:23:57 -08:00
|
|
|
case BP_UPTO: {
|
2021-07-26 20:59:45 -07:00
|
|
|
match_t *m = new_match(defs, pat, str, str, NULL);
|
2021-05-11 12:38:58 -07:00
|
|
|
pat_t *target = deref(defs, pat->args.multiple.first),
|
|
|
|
*skip = deref(defs, pat->args.multiple.second);
|
2021-01-15 19:21:41 -08:00
|
|
|
if (!target && !skip) {
|
2021-01-05 00:09:30 -08:00
|
|
|
while (str < f->end && *str != '\n') ++str;
|
2021-01-13 18:56:22 -08:00
|
|
|
m->end = str;
|
|
|
|
return m;
|
|
|
|
}
|
|
|
|
|
2021-07-26 20:59:45 -07:00
|
|
|
size_t child_cap = 0, nchildren = 0;
|
2021-01-13 18:56:22 -08:00
|
|
|
for (const char *prev = NULL; prev < str; ) {
|
|
|
|
prev = str;
|
2021-01-15 19:21:41 -08:00
|
|
|
if (target) {
|
|
|
|
match_t *p = match(defs, f, str, target, ignorecase);
|
2021-01-13 18:56:22 -08:00
|
|
|
if (p != NULL) {
|
2021-01-20 15:23:57 -08:00
|
|
|
recycle_if_unused(&p);
|
|
|
|
m->end = str;
|
2021-01-08 00:46:47 -08:00
|
|
|
return m;
|
2020-09-13 22:04:51 -07:00
|
|
|
}
|
2021-01-13 18:56:22 -08:00
|
|
|
} else if (str == f->end) {
|
|
|
|
m->end = str;
|
|
|
|
return m;
|
|
|
|
}
|
|
|
|
if (skip) {
|
2021-01-15 02:05:17 -08:00
|
|
|
match_t *s = match(defs, f, str, skip, ignorecase);
|
2021-01-13 18:56:22 -08:00
|
|
|
if (s != NULL) {
|
|
|
|
str = s->end;
|
2021-07-26 20:59:45 -07:00
|
|
|
if (nchildren+2 >= child_cap) {
|
2021-07-30 14:44:00 -07:00
|
|
|
m->children = grow(m->children, child_cap += 5);
|
2021-07-26 20:59:45 -07:00
|
|
|
for (size_t i = nchildren; i < child_cap; i++) m->children[i] = NULL;
|
|
|
|
}
|
|
|
|
add_owner(&m->children[nchildren++], s);
|
2021-01-13 18:56:22 -08:00
|
|
|
continue;
|
2020-12-14 18:11:33 -08:00
|
|
|
}
|
2020-09-13 22:04:51 -07:00
|
|
|
}
|
2021-01-13 18:56:22 -08:00
|
|
|
// This isn't in the for() structure because there needs to
|
|
|
|
// be at least once chance to match the pattern, even if
|
|
|
|
// we're at the end of the string already (e.g. "..$").
|
|
|
|
if (str < f->end && *str != '\n')
|
|
|
|
str = next_char(f, str);
|
2020-09-11 01:28:06 -07:00
|
|
|
}
|
2021-01-14 19:21:31 -08:00
|
|
|
recycle_if_unused(&m);
|
2021-01-13 18:56:22 -08:00
|
|
|
return NULL;
|
2020-09-11 01:28:06 -07:00
|
|
|
}
|
2021-01-16 10:29:09 -08:00
|
|
|
case BP_REPEAT: {
|
2021-07-26 20:59:45 -07:00
|
|
|
match_t *m = new_match(defs, pat, str, str, NULL);
|
2020-09-13 23:31:38 -07:00
|
|
|
size_t reps = 0;
|
2021-01-15 19:21:41 -08:00
|
|
|
ssize_t max = pat->args.repetitions.max;
|
2021-05-11 12:38:58 -07:00
|
|
|
pat_t *repeating = deref(defs, pat->args.repetitions.repeat_pat);
|
|
|
|
pat_t *sep = deref(defs, pat->args.repetitions.sep);
|
2021-07-26 20:59:45 -07:00
|
|
|
size_t child_cap = 0, nchildren = 0;
|
2020-09-18 22:32:36 -07:00
|
|
|
for (reps = 0; max == -1 || reps < (size_t)max; ++reps) {
|
|
|
|
const char *start = str;
|
2020-09-11 01:28:06 -07:00
|
|
|
// Separator
|
2021-01-15 19:21:41 -08:00
|
|
|
match_t *msep = NULL;
|
2021-05-11 12:38:58 -07:00
|
|
|
if (sep != NULL && reps > 0) {
|
|
|
|
msep = match(defs, f, str, sep, ignorecase);
|
2021-01-15 19:21:41 -08:00
|
|
|
if (msep == NULL) break;
|
|
|
|
str = msep->end;
|
2020-09-11 01:28:06 -07:00
|
|
|
}
|
2021-05-11 12:38:58 -07:00
|
|
|
match_t *mp = match(defs, f, str, repeating, ignorecase);
|
2021-01-15 19:21:41 -08:00
|
|
|
if (mp == NULL) {
|
2021-01-12 20:01:46 -08:00
|
|
|
str = start;
|
2021-01-18 11:28:39 -08:00
|
|
|
if (msep) recycle_if_unused(&msep);
|
2020-09-18 22:32:36 -07:00
|
|
|
break;
|
|
|
|
}
|
2021-01-15 19:21:41 -08:00
|
|
|
if (mp->end == start && reps > 0) {
|
2021-05-11 12:38:58 -07:00
|
|
|
// Since no forward progress was made on either `repeating`
|
|
|
|
// or `sep` and BP does not have mutable state, it's
|
|
|
|
// guaranteed that no progress will be made on the next
|
|
|
|
// loop either. We know that this will continue to loop
|
|
|
|
// until reps==max, so let's just cut to the chase instead
|
|
|
|
// of looping infinitely.
|
2021-01-18 11:28:39 -08:00
|
|
|
if (msep) recycle_if_unused(&msep);
|
2021-01-15 19:21:41 -08:00
|
|
|
recycle_if_unused(&mp);
|
|
|
|
if (pat->args.repetitions.max == -1)
|
2020-09-18 22:32:36 -07:00
|
|
|
reps = ~(size_t)0;
|
|
|
|
else
|
2021-01-15 19:21:41 -08:00
|
|
|
reps = (size_t)pat->args.repetitions.max;
|
2020-09-11 01:28:06 -07:00
|
|
|
break;
|
|
|
|
}
|
2021-01-15 19:21:41 -08:00
|
|
|
if (msep) {
|
2021-07-26 20:59:45 -07:00
|
|
|
if (nchildren+2 >= child_cap) {
|
2021-07-30 14:44:00 -07:00
|
|
|
m->children = grow(m->children, child_cap += 5);
|
2021-07-26 20:59:45 -07:00
|
|
|
for (size_t i = nchildren; i < child_cap; i++) m->children[i] = NULL;
|
|
|
|
}
|
|
|
|
add_owner(&m->children[nchildren++], msep);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (nchildren+2 >= child_cap) {
|
2021-07-30 14:44:00 -07:00
|
|
|
m->children = grow(m->children, child_cap += 5);
|
2021-07-26 20:59:45 -07:00
|
|
|
for (size_t i = nchildren; i < child_cap; i++) m->children[i] = NULL;
|
2020-09-11 01:28:06 -07:00
|
|
|
}
|
2021-07-26 20:59:45 -07:00
|
|
|
add_owner(&m->children[nchildren++], mp);
|
2021-01-15 19:21:41 -08:00
|
|
|
str = mp->end;
|
2020-09-11 01:28:06 -07:00
|
|
|
}
|
|
|
|
|
2021-01-15 19:21:41 -08:00
|
|
|
if (reps < (size_t)pat->args.repetitions.min) {
|
2021-01-14 19:21:31 -08:00
|
|
|
recycle_if_unused(&m);
|
2020-09-11 01:28:06 -07:00
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
m->end = str;
|
|
|
|
return m;
|
|
|
|
}
|
2021-01-16 10:29:09 -08:00
|
|
|
case BP_AFTER: {
|
2021-05-20 15:27:24 -07:00
|
|
|
pat_t *back = deref(defs, pat->args.pat);
|
2021-05-31 12:38:42 -07:00
|
|
|
if (!back) return NULL;
|
2021-05-20 16:59:42 -07:00
|
|
|
|
|
|
|
// We only care about the region from the backtrack pos up to the
|
|
|
|
// current pos, so mock it out as a file slice.
|
|
|
|
// TODO: this breaks ^/^^/$/$$, but that can probably be ignored
|
|
|
|
// because you rarely need to check those in a backtrack.
|
|
|
|
file_t slice;
|
2021-05-20 18:31:28 -07:00
|
|
|
slice_file(&slice, f, f->start, str);
|
2021-05-20 16:21:33 -07:00
|
|
|
for (const char *pos = &str[-(long)back->min_matchlen];
|
2021-05-20 18:31:28 -07:00
|
|
|
pos >= f->start && (back->max_matchlen == -1 || pos >= &str[-(long)back->max_matchlen]);
|
2021-05-20 15:27:24 -07:00
|
|
|
pos = prev_char(f, pos)) {
|
2021-05-20 18:31:28 -07:00
|
|
|
slice.start = (char*)pos;
|
2021-05-20 16:59:42 -07:00
|
|
|
match_t *m = match(defs, &slice, pos, back, ignorecase);
|
2021-05-20 16:21:33 -07:00
|
|
|
// Match should not go past str (i.e. (<"AB" "B") should match "ABB", but not "AB")
|
|
|
|
if (m && m->end != str)
|
|
|
|
recycle_if_unused(&m);
|
|
|
|
else if (m)
|
2021-07-26 20:59:45 -07:00
|
|
|
return new_match(defs, pat, str, str, MATCHES(m));
|
2021-05-20 18:31:28 -07:00
|
|
|
if (pos == f->start) break;
|
2021-05-20 16:59:42 -07:00
|
|
|
// To prevent extreme performance degradation, don't keep
|
|
|
|
// walking backwards endlessly over newlines.
|
|
|
|
if (back->max_matchlen == -1 && *pos == '\n') break;
|
2021-05-20 15:27:24 -07:00
|
|
|
}
|
|
|
|
return NULL;
|
2020-09-11 01:28:06 -07:00
|
|
|
}
|
2021-01-16 10:29:09 -08:00
|
|
|
case BP_BEFORE: {
|
2021-01-15 19:21:41 -08:00
|
|
|
match_t *after = match(defs, f, str, pat->args.pat, ignorecase);
|
2021-07-26 20:59:45 -07:00
|
|
|
return after ? new_match(defs, pat, str, str, MATCHES(after)) : NULL;
|
2020-09-11 01:28:06 -07:00
|
|
|
}
|
2021-01-16 10:29:09 -08:00
|
|
|
case BP_CAPTURE: {
|
2021-01-15 19:21:41 -08:00
|
|
|
match_t *p = match(defs, f, str, pat->args.pat, ignorecase);
|
2021-07-26 20:59:45 -07:00
|
|
|
return p ? new_match(defs, pat, str, p->end, MATCHES(p)) : NULL;
|
2020-09-11 01:28:06 -07:00
|
|
|
}
|
2021-01-16 10:29:09 -08:00
|
|
|
case BP_OTHERWISE: {
|
2021-01-15 19:21:41 -08:00
|
|
|
match_t *m = match(defs, f, str, pat->args.multiple.first, ignorecase);
|
2021-05-20 16:21:33 -07:00
|
|
|
return m ? m : match(defs, f, str, pat->args.multiple.second, ignorecase);
|
2020-09-11 01:28:06 -07:00
|
|
|
}
|
2021-01-16 10:29:09 -08:00
|
|
|
case BP_CHAIN: {
|
2021-01-15 19:21:41 -08:00
|
|
|
match_t *m1 = match(defs, f, str, pat->args.multiple.first, ignorecase);
|
2020-09-11 01:28:06 -07:00
|
|
|
if (m1 == NULL) return NULL;
|
2020-09-12 18:20:13 -07:00
|
|
|
|
2021-01-10 01:45:40 -08:00
|
|
|
match_t *m2;
|
2021-07-26 20:59:45 -07:00
|
|
|
// Push backrefs and run matching, then cleanup
|
|
|
|
if (m1->pat->type == BP_CAPTURE && m1->pat->args.capture.name) {
|
|
|
|
// Temporarily add a rule that the backref name matches the
|
|
|
|
// exact string of the original match (no replacements)
|
|
|
|
size_t len = (size_t)(m1->end - m1->start);
|
|
|
|
pat_t *backref = new_pat(f, m1->start, m1->end, len, (ssize_t)len, BP_STRING);
|
|
|
|
backref->args.string = m1->start;
|
|
|
|
|
2021-07-29 12:45:37 -07:00
|
|
|
def_t *defs2 = with_def(defs, m1->pat->args.capture.namelen, m1->pat->args.capture.name, backref);
|
2021-07-26 20:59:45 -07:00
|
|
|
++m1->refcount; {
|
|
|
|
m2 = match(defs2, f, m1->end, pat->args.multiple.second, ignorecase);
|
|
|
|
if (!m2) { // No need to keep the backref in memory if it didn't match
|
2021-07-29 12:45:37 -07:00
|
|
|
for (pat_t **rem = &f->pats; *rem; rem = &(*rem)->next) {
|
|
|
|
if ((*rem) == backref) {
|
|
|
|
pat_t *tmp = *rem;
|
2021-07-26 20:59:45 -07:00
|
|
|
*rem = (*rem)->next;
|
|
|
|
free(tmp);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
defs = free_defs(defs2, defs);
|
|
|
|
} --m1->refcount;
|
|
|
|
} else {
|
|
|
|
m2 = match(defs, f, m1->end, pat->args.multiple.second, ignorecase);
|
2021-01-10 01:45:40 -08:00
|
|
|
}
|
|
|
|
|
2020-09-11 01:28:06 -07:00
|
|
|
if (m2 == NULL) {
|
2021-01-14 19:21:31 -08:00
|
|
|
recycle_if_unused(&m1);
|
2020-09-11 01:28:06 -07:00
|
|
|
return NULL;
|
|
|
|
}
|
2021-07-26 20:59:45 -07:00
|
|
|
|
|
|
|
return new_match(defs, pat, str, m2->end, MATCHES(m1, m2));
|
2020-09-11 01:28:06 -07:00
|
|
|
}
|
2021-05-19 23:41:57 -07:00
|
|
|
case BP_MATCH: case BP_NOT_MATCH: {
|
2021-01-15 19:21:41 -08:00
|
|
|
match_t *m1 = match(defs, f, str, pat->args.multiple.first, ignorecase);
|
2020-09-13 20:33:11 -07:00
|
|
|
if (m1 == NULL) return NULL;
|
|
|
|
|
2021-07-17 16:01:18 -07:00
|
|
|
// <p1>~<p2> matches iff the text of <p1> matches <p2>
|
|
|
|
// <p1>!~<p2> matches iff the text of <p1> does not match <p2>
|
2021-05-20 16:59:42 -07:00
|
|
|
file_t slice;
|
2021-05-20 18:31:28 -07:00
|
|
|
slice_file(&slice, f, m1->start, m1->end);
|
2021-05-20 16:59:42 -07:00
|
|
|
match_t *m2 = next_match(defs, &slice, NULL, pat->args.multiple.second, NULL, ignorecase);
|
2021-05-19 23:41:57 -07:00
|
|
|
if ((!m2 && pat->type == BP_MATCH) || (m2 && pat->type == BP_NOT_MATCH)) {
|
2021-07-26 20:59:45 -07:00
|
|
|
if (m2) recycle_if_unused(&m2);
|
2021-01-14 19:21:31 -08:00
|
|
|
recycle_if_unused(&m1);
|
2020-09-13 20:33:11 -07:00
|
|
|
return NULL;
|
|
|
|
}
|
2021-07-26 20:59:45 -07:00
|
|
|
return new_match(defs, pat, m1->start, m1->end, (pat->type == BP_MATCH) ? MATCHES(m2) : NULL);
|
2020-09-13 20:33:11 -07:00
|
|
|
}
|
2021-01-16 10:29:09 -08:00
|
|
|
case BP_REPLACE: {
|
2020-09-17 01:00:06 -07:00
|
|
|
match_t *p = NULL;
|
2021-01-15 19:21:41 -08:00
|
|
|
if (pat->args.replace.pat) {
|
|
|
|
p = match(defs, f, str, pat->args.replace.pat, ignorecase);
|
2020-09-17 01:00:06 -07:00
|
|
|
if (p == NULL) return NULL;
|
|
|
|
}
|
2021-07-26 20:59:45 -07:00
|
|
|
return new_match(defs, pat, str, p ? p->end : str, MATCHES(p));
|
2020-09-11 01:28:06 -07:00
|
|
|
}
|
2021-01-16 10:29:09 -08:00
|
|
|
case BP_REF: {
|
2021-07-26 20:59:45 -07:00
|
|
|
match_t *cached = cache_lookup(defs, str, pat);
|
|
|
|
if (cached) return cached->end == NULL ? NULL : cached;
|
|
|
|
|
2021-05-11 11:39:42 -07:00
|
|
|
def_t *def = lookup(defs, pat->args.ref.len, pat->args.ref.name);
|
2021-01-26 17:54:23 -08:00
|
|
|
if (def == NULL)
|
2021-05-11 11:39:42 -07:00
|
|
|
errx(EXIT_FAILURE, "Unknown identifier: '%.*s'", (int)pat->args.ref.len, pat->args.ref.name);
|
2021-01-15 19:02:36 -08:00
|
|
|
pat_t *ref = def->pat;
|
2020-09-13 00:48:39 -07:00
|
|
|
|
2021-01-15 18:38:06 -08:00
|
|
|
pat_t rec_op = {
|
2021-01-16 10:29:09 -08:00
|
|
|
.type = BP_LEFTRECURSION,
|
2021-01-13 18:56:22 -08:00
|
|
|
.start = ref->start,
|
|
|
|
.end = ref->end,
|
2021-05-20 15:27:24 -07:00
|
|
|
.min_matchlen = 0,
|
|
|
|
.max_matchlen = -1,
|
2021-01-14 19:21:31 -08:00
|
|
|
.args.leftrec = {
|
2021-01-13 18:56:22 -08:00
|
|
|
.match = NULL,
|
|
|
|
.visits = 0,
|
|
|
|
.at = str,
|
|
|
|
.fallback = ref,
|
|
|
|
},
|
|
|
|
};
|
|
|
|
def_t defs2 = {
|
|
|
|
.namelen = def->namelen,
|
|
|
|
.name = def->name,
|
2021-01-15 19:02:36 -08:00
|
|
|
.pat = &rec_op,
|
2021-01-13 18:56:22 -08:00
|
|
|
.next = defs,
|
2020-09-13 00:48:39 -07:00
|
|
|
};
|
2021-01-13 18:56:22 -08:00
|
|
|
|
|
|
|
const char *prev = str;
|
2021-01-15 02:05:17 -08:00
|
|
|
match_t *m = match(&defs2, f, str, ref, ignorecase);
|
2021-07-26 20:59:45 -07:00
|
|
|
if (m == NULL) {
|
|
|
|
// Store placeholder:
|
|
|
|
cache_save(new_match(defs, pat, str, NULL, NULL));
|
|
|
|
return NULL;
|
|
|
|
}
|
2021-01-13 18:56:22 -08:00
|
|
|
|
2021-01-14 19:21:31 -08:00
|
|
|
while (rec_op.args.leftrec.visits > 0) {
|
|
|
|
rec_op.args.leftrec.visits = 0;
|
2021-07-26 20:59:45 -07:00
|
|
|
remove_ownership(&rec_op.args.leftrec.match);
|
|
|
|
add_owner(&rec_op.args.leftrec.match, m);
|
2021-01-13 18:56:22 -08:00
|
|
|
prev = m->end;
|
2021-01-15 02:05:17 -08:00
|
|
|
match_t *m2 = match(&defs2, f, str, ref, ignorecase);
|
2021-01-13 18:56:22 -08:00
|
|
|
if (m2 == NULL) break;
|
2021-01-14 19:21:31 -08:00
|
|
|
if (m2->end <= prev) {
|
|
|
|
recycle_if_unused(&m2);
|
|
|
|
break;
|
|
|
|
}
|
2021-01-13 18:56:22 -08:00
|
|
|
m = m2;
|
2020-09-13 00:48:39 -07:00
|
|
|
}
|
2021-01-13 18:56:22 -08:00
|
|
|
|
2021-07-26 20:59:45 -07:00
|
|
|
// This match wrapper mainly exists for record-keeping purposes.
|
|
|
|
// However, it also keeps `m` from getting garbage collected with
|
|
|
|
// leftrec.match is GC'd. It also helps with visualization of match
|
|
|
|
// results.
|
2021-01-15 12:12:56 -08:00
|
|
|
// OPTIMIZE: remove this if necessary
|
2021-07-26 20:59:45 -07:00
|
|
|
match_t *wrap = new_match(defs, pat, m->start, m->end, MATCHES(m));
|
|
|
|
cache_save(wrap);
|
|
|
|
|
|
|
|
if (rec_op.args.leftrec.match)
|
|
|
|
remove_ownership(&rec_op.args.leftrec.match);
|
|
|
|
|
|
|
|
return wrap;
|
2020-09-11 01:28:06 -07:00
|
|
|
}
|
2021-01-16 10:29:09 -08:00
|
|
|
case BP_NODENT: {
|
2020-10-13 17:12:37 -07:00
|
|
|
if (*str != '\n') return NULL;
|
|
|
|
const char *start = str;
|
|
|
|
|
2020-09-16 19:35:43 -07:00
|
|
|
size_t linenum = get_line_number(f, str);
|
2020-10-13 17:12:37 -07:00
|
|
|
const char *p = get_line(f, linenum);
|
2021-05-20 18:31:28 -07:00
|
|
|
if (p < f->start) p = f->start; // Can happen with recursive matching
|
2020-10-13 17:12:37 -07:00
|
|
|
|
|
|
|
// Current indentation:
|
2020-09-14 01:21:49 -07:00
|
|
|
char denter = *p;
|
|
|
|
int dents = 0;
|
|
|
|
if (denter == ' ' || denter == '\t') {
|
2020-12-14 22:18:04 -08:00
|
|
|
for (; *p == denter && p < f->end; ++p) ++dents;
|
2020-09-14 01:21:49 -07:00
|
|
|
}
|
2020-10-13 17:12:37 -07:00
|
|
|
|
|
|
|
// Subsequent indentation:
|
|
|
|
while (*str == '\n') ++str;
|
2020-09-14 01:21:49 -07:00
|
|
|
for (int i = 0; i < dents; i++) {
|
2020-12-14 22:18:04 -08:00
|
|
|
if (str[i] != denter || &str[i] >= f->end) return NULL;
|
2020-09-14 01:21:49 -07:00
|
|
|
}
|
|
|
|
|
2021-07-26 20:59:45 -07:00
|
|
|
return new_match(defs, pat, start, &str[dents], NULL);
|
2020-09-14 01:21:49 -07:00
|
|
|
}
|
2021-05-20 00:33:11 -07:00
|
|
|
case BP_ERROR: {
|
2021-05-22 13:23:10 -07:00
|
|
|
match_t *p = pat->args.pat ? match(defs, f, str, pat->args.pat, ignorecase) : NULL;
|
2021-07-26 20:59:45 -07:00
|
|
|
return p ? new_match(defs, pat, str, p->end, MATCHES(p)) : NULL;
|
2021-05-20 00:33:11 -07:00
|
|
|
}
|
2020-09-11 01:28:06 -07:00
|
|
|
default: {
|
2021-05-31 12:56:49 -07:00
|
|
|
errx(EXIT_FAILURE, "Unknown pattern type: %u", pat->type);
|
2020-09-11 01:28:06 -07:00
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-01-12 21:04:43 -08:00
|
|
|
//
|
|
|
|
// Get a specific numbered pattern capture.
|
|
|
|
//
|
2021-01-08 01:00:27 -08:00
|
|
|
static match_t *get_capture_by_num(match_t *m, int *n)
|
2020-09-11 01:28:06 -07:00
|
|
|
{
|
|
|
|
if (*n == 0) return m;
|
2021-01-16 10:29:09 -08:00
|
|
|
if (m->pat->type == BP_CAPTURE && *n == 1) return m;
|
|
|
|
if (m->pat->type == BP_CAPTURE) --(*n);
|
2021-07-26 20:59:45 -07:00
|
|
|
if (m->children) {
|
|
|
|
for (int i = 0; m->children[i]; i++) {
|
|
|
|
match_t *cap = get_capture_by_num(m->children[i], n);
|
|
|
|
if (cap) return cap;
|
|
|
|
}
|
2020-09-11 01:28:06 -07:00
|
|
|
}
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2021-01-12 21:04:43 -08:00
|
|
|
//
|
|
|
|
// Get a capture with a specific name.
|
|
|
|
//
|
2021-01-08 01:00:27 -08:00
|
|
|
static match_t *get_capture_by_name(match_t *m, const char *name)
|
2020-09-11 01:28:06 -07:00
|
|
|
{
|
2021-01-16 10:29:09 -08:00
|
|
|
if (m->pat->type == BP_CAPTURE && m->pat->args.capture.name
|
2021-01-17 23:28:19 -08:00
|
|
|
&& strncmp(m->pat->args.capture.name, name, m->pat->args.capture.namelen) == 0)
|
2020-09-11 01:28:06 -07:00
|
|
|
return m;
|
2021-07-26 20:59:45 -07:00
|
|
|
if (m->children) {
|
|
|
|
for (int i = 0; m->children[i]; i++) {
|
|
|
|
match_t *cap = get_capture_by_name(m->children[i], name);
|
|
|
|
if (cap) return cap;
|
|
|
|
}
|
2020-09-11 01:28:06 -07:00
|
|
|
}
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2021-01-12 21:04:43 -08:00
|
|
|
//
|
2021-01-12 22:22:38 -08:00
|
|
|
// Get a capture by identifier (name or number).
|
|
|
|
// Update *id to point to after the identifier (if found).
|
2021-01-12 21:04:43 -08:00
|
|
|
//
|
2021-01-12 22:22:38 -08:00
|
|
|
match_t *get_capture(match_t *m, const char **id)
|
2020-09-16 20:38:58 -07:00
|
|
|
{
|
2021-01-12 22:22:38 -08:00
|
|
|
if (isdigit(**id)) {
|
|
|
|
int n = (int)strtol(*id, (char**)id, 10);
|
2021-07-26 20:59:45 -07:00
|
|
|
return get_capture_by_num(m, &n);
|
2020-09-28 16:35:22 -07:00
|
|
|
} else {
|
2021-01-12 22:22:38 -08:00
|
|
|
const char *end = after_name(*id);
|
|
|
|
if (end == *id) return NULL;
|
|
|
|
char *name = strndup(*id, (size_t)(end-*id));
|
2021-01-08 01:00:27 -08:00
|
|
|
match_t *cap = get_capture_by_name(m, name);
|
2021-01-10 00:24:24 -08:00
|
|
|
xfree(&name);
|
2021-01-12 22:22:38 -08:00
|
|
|
*id = end;
|
|
|
|
if (**id == ';') ++(*id);
|
2020-09-16 20:38:58 -07:00
|
|
|
return cap;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-01-12 21:28:44 -08:00
|
|
|
//
|
2021-01-14 19:21:31 -08:00
|
|
|
// Return a match object which can be used (may be allocated or recycled).
|
2021-01-12 21:28:44 -08:00
|
|
|
//
|
2021-07-26 20:59:45 -07:00
|
|
|
static match_t *new_match(def_t *defs, pat_t *pat, const char *start, const char *end, match_t *children[])
|
2021-01-14 19:21:31 -08:00
|
|
|
{
|
|
|
|
match_t *m;
|
|
|
|
if (unused_matches) {
|
|
|
|
m = unused_matches;
|
2021-07-30 13:36:29 -07:00
|
|
|
list_remove(m, &m->gc);
|
2021-01-14 19:21:31 -08:00
|
|
|
memset(m, 0, sizeof(match_t));
|
|
|
|
} else {
|
|
|
|
m = new(match_t);
|
|
|
|
}
|
|
|
|
// Keep track of the object:
|
2021-07-30 13:36:29 -07:00
|
|
|
list_prepend(&in_use_matches, m, &m->gc);
|
2021-01-14 19:43:30 -08:00
|
|
|
|
2021-05-11 11:39:42 -07:00
|
|
|
m->pat = pat;
|
|
|
|
m->start = start;
|
|
|
|
m->end = end;
|
2021-07-26 20:59:45 -07:00
|
|
|
m->defs_id = defs->id;
|
|
|
|
|
|
|
|
if (children) {
|
|
|
|
for (int i = 0; children[i]; i++)
|
|
|
|
add_owner(&m->_children[i], children[i]);
|
|
|
|
m->children = m->_children;
|
|
|
|
}
|
2021-01-14 19:21:31 -08:00
|
|
|
return m;
|
|
|
|
}
|
|
|
|
|
|
|
|
//
|
|
|
|
// If the given match is not currently a child member of another match (or
|
|
|
|
// otherwise reserved) then put it back in the pool of unused match objects.
|
|
|
|
//
|
|
|
|
void recycle_if_unused(match_t **at_m)
|
2020-09-13 00:48:39 -07:00
|
|
|
{
|
2021-01-14 19:21:31 -08:00
|
|
|
match_t *m = *at_m;
|
|
|
|
if (m == NULL) return;
|
|
|
|
if (m->refcount > 0) {
|
|
|
|
*at_m = NULL;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2021-07-26 20:59:45 -07:00
|
|
|
if (m->children) {
|
|
|
|
for (int i = 0; m->children[i]; i++)
|
|
|
|
remove_ownership(&m->children[i]);
|
|
|
|
if (m->children != m->_children)
|
|
|
|
xfree(&m->children);
|
|
|
|
}
|
2021-01-14 19:21:31 -08:00
|
|
|
|
2021-07-30 13:36:29 -07:00
|
|
|
list_remove(m, &m->gc);
|
2021-01-18 09:52:35 -08:00
|
|
|
(void)memset(m, 0, sizeof(match_t));
|
2021-07-30 13:36:29 -07:00
|
|
|
list_prepend(&unused_matches, m, &m->gc);
|
2021-01-14 19:21:31 -08:00
|
|
|
*at_m = NULL;
|
2021-01-13 18:56:22 -08:00
|
|
|
}
|
|
|
|
|
2021-01-14 19:21:31 -08:00
|
|
|
//
|
|
|
|
// Force all match objects into the pool of unused match objects.
|
|
|
|
//
|
|
|
|
size_t recycle_all_matches(void)
|
|
|
|
{
|
|
|
|
size_t count = 0;
|
|
|
|
while (in_use_matches) {
|
|
|
|
match_t *m = in_use_matches;
|
2021-07-30 13:36:29 -07:00
|
|
|
list_remove(m, &m->gc);
|
2021-07-26 20:59:45 -07:00
|
|
|
if (m->children && m->children != m->_children)
|
|
|
|
xfree(&m->children);
|
2021-07-30 13:36:29 -07:00
|
|
|
list_prepend(&unused_matches, m, &m->gc);
|
2021-01-14 19:21:31 -08:00
|
|
|
++count;
|
|
|
|
}
|
|
|
|
return count;
|
|
|
|
}
|
|
|
|
|
|
|
|
//
|
|
|
|
// Free all match objects in memory.
|
|
|
|
//
|
|
|
|
size_t free_all_matches(void)
|
|
|
|
{
|
|
|
|
size_t count = 0;
|
|
|
|
recycle_all_matches();
|
|
|
|
while (unused_matches) {
|
|
|
|
match_t *m = unused_matches;
|
2021-07-30 13:36:29 -07:00
|
|
|
list_remove(m, &m->gc);
|
2021-01-14 19:21:31 -08:00
|
|
|
free(m);
|
|
|
|
++count;
|
|
|
|
}
|
|
|
|
return count;
|
|
|
|
}
|
|
|
|
|
2020-09-11 01:38:44 -07:00
|
|
|
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1
|