Rename 'Match' macro to avoid confusion

This commit is contained in:
Bruce Hill 2024-05-28 02:05:57 -04:00
parent acdb6ff91b
commit 2ec9f76b72
6 changed files with 50 additions and 47 deletions

2
json.c
View File

@ -29,7 +29,7 @@ static int _json_match(const char *text, match_t *m, int comma, bool verbose)
comma = 0; comma = 0;
printf("{"); printf("{");
if (m->pat->type == BP_TAGGED) { if (m->pat->type == BP_TAGGED) {
printf("\"tag\":\"%.*s\"", (int)Match(m->pat, BP_TAGGED)->namelen, Match(m->pat, BP_TAGGED)->name); printf("\"tag\":\"%.*s\"", (int)When(m->pat, BP_TAGGED)->namelen, When(m->pat, BP_TAGGED)->name);
comma = 1; comma = 1;
} }
if (verbose) { if (verbose) {

82
match.c
View File

@ -227,12 +227,12 @@ static pat_t *_lookup_def(match_ctx_t *ctx, pat_t *defs, const char *name, size_
{ {
while (defs) { while (defs) {
if (defs->type == BP_CHAIN) { if (defs->type == BP_CHAIN) {
auto chain = Match(defs, BP_CHAIN); auto chain = When(defs, BP_CHAIN);
pat_t *second = _lookup_def(ctx, chain->second, name, namelen); pat_t *second = _lookup_def(ctx, chain->second, name, namelen);
if (second) return second; if (second) return second;
defs = chain->first; defs = chain->first;
} else if (defs->type == BP_DEFINITIONS) { } else if (defs->type == BP_DEFINITIONS) {
auto def = Match(defs, BP_DEFINITIONS); auto def = When(defs, BP_DEFINITIONS);
if (namelen == def->namelen && strncmp(def->name, name, namelen) == 0) if (namelen == def->namelen && strncmp(def->name, name, namelen) == 0)
return def->meaning; return def->meaning;
defs = def->next_def; defs = def->next_def;
@ -265,7 +265,7 @@ __attribute__((nonnull(1)))
static inline pat_t *deref(match_ctx_t *ctx, pat_t *pat) static inline pat_t *deref(match_ctx_t *ctx, pat_t *pat)
{ {
if (pat && pat->type == BP_REF) { if (pat && pat->type == BP_REF) {
auto ref = Match(pat, BP_REF); auto ref = When(pat, BP_REF);
pat_t *def = lookup_ctx(ctx, ref->name, ref->len); pat_t *def = lookup_ctx(ctx, ref->name, ref->len);
if (def) return def; if (def) return def;
} }
@ -283,27 +283,27 @@ static pat_t *get_prerequisite(match_ctx_t *ctx, pat_t *pat)
for (pat_t *p = pat; p; ) { for (pat_t *p = pat; p; ) {
switch (p->type) { switch (p->type) {
case BP_BEFORE: case BP_BEFORE:
p = Match(p, BP_BEFORE)->pat; break; p = When(p, BP_BEFORE)->pat; break;
case BP_REPEAT: case BP_REPEAT:
if (Match(p, BP_REPEAT)->min == 0) if (When(p, BP_REPEAT)->min == 0)
return p; return p;
p = Match(p, BP_REPEAT)->repeat_pat; break; p = When(p, BP_REPEAT)->repeat_pat; break;
case BP_CAPTURE: case BP_CAPTURE:
p = Match(p, BP_CAPTURE)->pat; break; p = When(p, BP_CAPTURE)->pat; break;
case BP_TAGGED: case BP_TAGGED:
p = Match(p, BP_TAGGED)->pat; break; p = When(p, BP_TAGGED)->pat; break;
case BP_CHAIN: { case BP_CHAIN: {
auto chain = Match(p, BP_CHAIN); auto chain = When(p, BP_CHAIN);
// If pattern is something like (|"foo"|), then use "foo" as the first thing to scan for // If pattern is something like (|"foo"|), then use "foo" as the first thing to scan for
p = chain->first->max_matchlen == 0 ? chain->second : chain->first; p = chain->first->max_matchlen == 0 ? chain->second : chain->first;
break; break;
} }
case BP_MATCH: case BP_MATCH:
p = Match(p, BP_MATCH)->pat; break; p = When(p, BP_MATCH)->pat; break;
case BP_NOT_MATCH: case BP_NOT_MATCH:
p = Match(p, BP_NOT_MATCH)->pat; break; p = When(p, BP_NOT_MATCH)->pat; break;
case BP_REPLACE: case BP_REPLACE:
p = Match(p, BP_REPLACE)->pat; break; p = When(p, BP_REPLACE)->pat; break;
case BP_REF: { case BP_REF: {
if (++derefs > 10) return p; // In case of left recursion if (++derefs > 10) return p; // In case of left recursion
pat_t *p2 = deref(ctx, p); pat_t *p2 = deref(ctx, p);
@ -339,8 +339,8 @@ static match_t *_next_match(match_ctx_t *ctx, const char *str, pat_t *pat, pat_t
// past areas where we know we won't find a match. // past areas where we know we won't find a match.
if (!skip && first->type == BP_STRING && first->min_matchlen > 0) { if (!skip && first->type == BP_STRING && first->min_matchlen > 0) {
char *found = ctx->ignorecase ? char *found = ctx->ignorecase ?
strcasestr(str, Match(first, BP_STRING)->string) strcasestr(str, When(first, BP_STRING)->string)
: memmem(str, (size_t)(ctx->end - str), Match(first, BP_STRING)->string, first->min_matchlen); : memmem(str, (size_t)(ctx->end - str), When(first, BP_STRING)->string, first->min_matchlen);
str = found ? found : ctx->end; str = found ? found : ctx->end;
} else if (!skip && str > ctx->start && (first->type == BP_START_OF_LINE || first->type == BP_END_OF_LINE)) { } else if (!skip && str > ctx->start && (first->type == BP_START_OF_LINE || first->type == BP_END_OF_LINE)) {
char *found = memchr(str, '\n', (size_t)(ctx->end - str)); char *found = memchr(str, '\n', (size_t)(ctx->end - str));
@ -372,7 +372,7 @@ static match_t *match(match_ctx_t *ctx, const char *str, pat_t *pat)
ctx2.cache = &(cache_t){0}; ctx2.cache = &(cache_t){0};
ctx2.parent_ctx = ctx; ctx2.parent_ctx = ctx;
ctx2.defs = pat; ctx2.defs = pat;
match_t *m = match(&ctx2, str, Match(pat, BP_DEFINITIONS)->meaning); match_t *m = match(&ctx2, str, When(pat, BP_DEFINITIONS)->meaning);
cache_destroy(&ctx2); cache_destroy(&ctx2);
return m; return m;
} }
@ -382,7 +382,7 @@ static match_t *match(match_ctx_t *ctx, const char *str, pat_t *pat)
// a special case, but if a pattern invokes itself at a later // a special case, but if a pattern invokes itself at a later
// point, it can be handled with normal recursion. // point, it can be handled with normal recursion.
// See: left-recursion.md for more details. // See: left-recursion.md for more details.
auto leftrec = Match(pat, BP_LEFTRECURSION); auto leftrec = When(pat, BP_LEFTRECURSION);
if (str == leftrec->at) { if (str == leftrec->at) {
leftrec->visited = true; leftrec->visited = true;
return clone_match(leftrec->match); return clone_match(leftrec->match);
@ -417,19 +417,19 @@ static match_t *match(match_ctx_t *ctx, const char *str, pat_t *pat)
} }
case BP_STRING: { case BP_STRING: {
if (&str[pat->min_matchlen] > ctx->end) return NULL; if (&str[pat->min_matchlen] > ctx->end) return NULL;
if (pat->min_matchlen > 0 && (ctx->ignorecase ? strncasecmp : strncmp)(str, Match(pat, BP_STRING)->string, pat->min_matchlen) != 0) if (pat->min_matchlen > 0 && (ctx->ignorecase ? strncasecmp : strncmp)(str, When(pat, BP_STRING)->string, pat->min_matchlen) != 0)
return NULL; return NULL;
return new_match(pat, str, str + pat->min_matchlen, NULL); return new_match(pat, str, str + pat->min_matchlen, NULL);
} }
case BP_RANGE: { case BP_RANGE: {
if (str >= ctx->end) return NULL; if (str >= ctx->end) return NULL;
auto range = Match(pat, BP_RANGE); auto range = When(pat, BP_RANGE);
if ((unsigned char)*str < range->low || (unsigned char)*str > range->high) if ((unsigned char)*str < range->low || (unsigned char)*str > range->high)
return NULL; return NULL;
return new_match(pat, str, str+1, NULL); return new_match(pat, str, str+1, NULL);
} }
case BP_NOT: { case BP_NOT: {
match_t *m = match(ctx, str, Match(pat, BP_NOT)->pat); match_t *m = match(ctx, str, When(pat, BP_NOT)->pat);
if (m != NULL) { if (m != NULL) {
recycle_match(&m); recycle_match(&m);
return NULL; return NULL;
@ -438,8 +438,8 @@ static match_t *match(match_ctx_t *ctx, const char *str, pat_t *pat)
} }
case BP_UPTO: case BP_UPTO_STRICT: { case BP_UPTO: case BP_UPTO_STRICT: {
match_t *m = new_match(pat, str, str, NULL); match_t *m = new_match(pat, str, str, NULL);
pat_t *target = deref(ctx, pat->type == BP_UPTO ? Match(pat, BP_UPTO)->target : Match(pat, BP_UPTO_STRICT)->target), pat_t *target = deref(ctx, pat->type == BP_UPTO ? When(pat, BP_UPTO)->target : When(pat, BP_UPTO_STRICT)->target),
*skip = deref(ctx, pat->type == BP_UPTO ? Match(pat, BP_UPTO)->skip : Match(pat, BP_UPTO_STRICT)->skip); *skip = deref(ctx, pat->type == BP_UPTO ? When(pat, BP_UPTO)->skip : When(pat, BP_UPTO_STRICT)->skip);
if (!target && !skip) { if (!target && !skip) {
while (str < ctx->end && *str != '\n') ++str; while (str < ctx->end && *str != '\n') ++str;
m->end = str; m->end = str;
@ -484,7 +484,7 @@ static match_t *match(match_ctx_t *ctx, const char *str, pat_t *pat)
case BP_REPEAT: { case BP_REPEAT: {
match_t *m = new_match(pat, str, str, NULL); match_t *m = new_match(pat, str, str, NULL);
size_t reps = 0; size_t reps = 0;
auto repeat = Match(pat, BP_REPEAT); auto repeat = When(pat, BP_REPEAT);
pat_t *repeating = deref(ctx, repeat->repeat_pat); pat_t *repeating = deref(ctx, repeat->repeat_pat);
pat_t *sep = deref(ctx, repeat->sep); pat_t *sep = deref(ctx, repeat->sep);
size_t child_cap = 0, nchildren = 0; size_t child_cap = 0, nchildren = 0;
@ -542,7 +542,7 @@ static match_t *match(match_ctx_t *ctx, const char *str, pat_t *pat)
return m; return m;
} }
case BP_AFTER: { case BP_AFTER: {
pat_t *back = deref(ctx, Match(pat, BP_AFTER)->pat); pat_t *back = deref(ctx, When(pat, BP_AFTER)->pat);
if (!back) return NULL; if (!back) return NULL;
// We only care about the region from the backtrack pos up to the // We only care about the region from the backtrack pos up to the
@ -575,22 +575,22 @@ static match_t *match(match_ctx_t *ctx, const char *str, pat_t *pat)
return NULL; return NULL;
} }
case BP_BEFORE: { case BP_BEFORE: {
match_t *after = match(ctx, str, Match(pat, BP_BEFORE)->pat); match_t *after = match(ctx, str, When(pat, BP_BEFORE)->pat);
return after ? new_match(pat, str, str, MATCHES(after)) : NULL; return after ? new_match(pat, str, str, MATCHES(after)) : NULL;
} }
case BP_CAPTURE: case BP_TAGGED: { case BP_CAPTURE: case BP_TAGGED: {
pat_t *to_match = pat->type == BP_CAPTURE ? Match(pat, BP_CAPTURE)->pat : Match(pat, BP_TAGGED)->pat; pat_t *to_match = pat->type == BP_CAPTURE ? When(pat, BP_CAPTURE)->pat : When(pat, BP_TAGGED)->pat;
if (!to_match) if (!to_match)
return new_match(pat, str, str, NULL); return new_match(pat, str, str, NULL);
match_t *p = match(ctx, str, to_match); match_t *p = match(ctx, str, to_match);
return p ? new_match(pat, str, p->end, MATCHES(p)) : NULL; return p ? new_match(pat, str, p->end, MATCHES(p)) : NULL;
} }
case BP_OTHERWISE: { case BP_OTHERWISE: {
match_t *m = match(ctx, str, Match(pat, BP_OTHERWISE)->first); match_t *m = match(ctx, str, When(pat, BP_OTHERWISE)->first);
return m ? m : match(ctx, str, Match(pat, BP_OTHERWISE)->second); return m ? m : match(ctx, str, When(pat, BP_OTHERWISE)->second);
} }
case BP_CHAIN: { case BP_CHAIN: {
auto chain = Match(pat, BP_CHAIN); auto chain = When(pat, BP_CHAIN);
if (chain->first->type == BP_DEFINITIONS) { if (chain->first->type == BP_DEFINITIONS) {
match_ctx_t ctx2 = *ctx; match_ctx_t ctx2 = *ctx;
ctx2.cache = &(cache_t){0}; ctx2.cache = &(cache_t){0};
@ -606,7 +606,7 @@ static match_t *match(match_ctx_t *ctx, const char *str, pat_t *pat)
match_t *m2; match_t *m2;
// Push backrefs and run matching, then cleanup // Push backrefs and run matching, then cleanup
if (m1->pat->type == BP_CAPTURE && Match(m1->pat, BP_CAPTURE)->name && Match(m1->pat, BP_CAPTURE)->backreffable) { if (m1->pat->type == BP_CAPTURE && When(m1->pat, BP_CAPTURE)->name && When(m1->pat, BP_CAPTURE)->backreffable) {
// Temporarily add a rule that the backref name matches the // Temporarily add a rule that the backref name matches the
// exact string of the original match (no replacements) // exact string of the original match (no replacements)
pat_t *backref; pat_t *backref;
@ -632,8 +632,8 @@ static match_t *match(match_ctx_t *ctx, const char *str, pat_t *pat)
.type = BP_DEFINITIONS, .type = BP_DEFINITIONS,
.start = m1->pat->start, .end = m1->pat->end, .start = m1->pat->start, .end = m1->pat->end,
.__tagged.BP_DEFINITIONS = { .__tagged.BP_DEFINITIONS = {
.name = Match(m1->pat, BP_CAPTURE)->name, .name = When(m1->pat, BP_CAPTURE)->name,
.namelen = Match(m1->pat, BP_CAPTURE)->namelen, .namelen = When(m1->pat, BP_CAPTURE)->namelen,
.meaning = backref, .meaning = backref,
}, },
}; };
@ -653,7 +653,7 @@ static match_t *match(match_ctx_t *ctx, const char *str, pat_t *pat)
return new_match(pat, str, m2->end, MATCHES(m1, m2)); return new_match(pat, str, m2->end, MATCHES(m1, m2));
} }
case BP_MATCH: case BP_NOT_MATCH: { case BP_MATCH: case BP_NOT_MATCH: {
pat_t *target = pat->type == BP_MATCH ? Match(pat, BP_MATCH)->pat : Match(pat, BP_NOT_MATCH)->pat; pat_t *target = pat->type == BP_MATCH ? When(pat, BP_MATCH)->pat : When(pat, BP_NOT_MATCH)->pat;
match_t *m1 = match(ctx, str, target); match_t *m1 = match(ctx, str, target);
if (m1 == NULL) return NULL; if (m1 == NULL) return NULL;
@ -665,10 +665,10 @@ static match_t *match(match_ctx_t *ctx, const char *str, pat_t *pat)
slice_ctx.end = m1->end; slice_ctx.end = m1->end;
match_t *ret = NULL, *m2 = NULL; match_t *ret = NULL, *m2 = NULL;
if (pat->type == BP_MATCH) { if (pat->type == BP_MATCH) {
m2 = _next_match(&slice_ctx, slice_ctx.start, Match(pat, BP_MATCH)->must_match, NULL); m2 = _next_match(&slice_ctx, slice_ctx.start, When(pat, BP_MATCH)->must_match, NULL);
if (m2) ret = new_match(pat, m1->start, m1->end, MATCHES(m1, m2)); if (m2) ret = new_match(pat, m1->start, m1->end, MATCHES(m1, m2));
} else { } else {
m2 = _next_match(&slice_ctx, slice_ctx.start, Match(pat, BP_NOT_MATCH)->must_not_match, NULL); m2 = _next_match(&slice_ctx, slice_ctx.start, When(pat, BP_NOT_MATCH)->must_not_match, NULL);
if (!m2) ret = new_match(pat, m1->start, m1->end, MATCHES(m1)); if (!m2) ret = new_match(pat, m1->start, m1->end, MATCHES(m1));
} }
cache_destroy(&slice_ctx); cache_destroy(&slice_ctx);
@ -680,7 +680,7 @@ static match_t *match(match_ctx_t *ctx, const char *str, pat_t *pat)
} }
case BP_REPLACE: { case BP_REPLACE: {
match_t *p = NULL; match_t *p = NULL;
auto replace = Match(pat, BP_REPLACE); auto replace = When(pat, BP_REPLACE);
if (replace->pat) { if (replace->pat) {
p = match(ctx, str, replace->pat); p = match(ctx, str, replace->pat);
if (p == NULL) return NULL; if (p == NULL) return NULL;
@ -691,7 +691,7 @@ static match_t *match(match_ctx_t *ctx, const char *str, pat_t *pat)
if (has_cached_failure(ctx, str, pat)) if (has_cached_failure(ctx, str, pat))
return NULL; return NULL;
auto ref_pat = Match(pat, BP_REF); auto ref_pat = When(pat, BP_REF);
pat_t *ref = lookup_ctx(ctx, ref_pat->name, ref_pat->len); pat_t *ref = lookup_ctx(ctx, ref_pat->name, ref_pat->len);
if (ref == NULL) { if (ref == NULL) {
match_error(ctx, "Unknown pattern: '%.*s'", (int)ref_pat->len, ref_pat->name); match_error(ctx, "Unknown pattern: '%.*s'", (int)ref_pat->len, ref_pat->name);
@ -913,7 +913,7 @@ public bool next_match(match_t **m, const char *start, const char *end, pat_t *p
__attribute__((nonnull)) __attribute__((nonnull))
static match_t *_get_numbered_capture(match_t *m, int *n) static match_t *_get_numbered_capture(match_t *m, int *n)
{ {
if ((m->pat->type == BP_CAPTURE && Match(m->pat, BP_CAPTURE)->namelen == 0) || m->pat->type == BP_TAGGED) { if ((m->pat->type == BP_CAPTURE && When(m->pat, BP_CAPTURE)->namelen == 0) || m->pat->type == BP_TAGGED) {
if (*n == 1) { if (*n == 1) {
return m; return m;
} else { } else {
@ -941,7 +941,7 @@ public match_t *get_numbered_capture(match_t *m, int n)
{ {
if (n <= 0) return m; if (n <= 0) return m;
if (m->pat->type == BP_TAGGED || m->pat->type == BP_CAPTURE) { if (m->pat->type == BP_TAGGED || m->pat->type == BP_CAPTURE) {
if (n == 1 && m->pat->type == BP_CAPTURE && Match(m->pat, BP_CAPTURE)->namelen == 0) return m; if (n == 1 && m->pat->type == BP_CAPTURE && When(m->pat, BP_CAPTURE)->namelen == 0) return m;
if (m->children) { if (m->children) {
for (int i = 0; m->children[i]; i++) { for (int i = 0; m->children[i]; i++) {
match_t *cap = _get_numbered_capture(m->children[i], &n); match_t *cap = _get_numbered_capture(m->children[i], &n);
@ -959,9 +959,9 @@ public match_t *get_numbered_capture(match_t *m, int n)
// //
match_t *_get_named_capture(match_t *m, const char *name, size_t namelen) match_t *_get_named_capture(match_t *m, const char *name, size_t namelen)
{ {
if (m->pat->type == BP_CAPTURE && Match(m->pat, BP_CAPTURE)->name if (m->pat->type == BP_CAPTURE && When(m->pat, BP_CAPTURE)->name
&& Match(m->pat, BP_CAPTURE)->namelen == namelen && When(m->pat, BP_CAPTURE)->namelen == namelen
&& strncmp(Match(m->pat, BP_CAPTURE)->name, name, Match(m->pat, BP_CAPTURE)->namelen) == 0) && strncmp(When(m->pat, BP_CAPTURE)->name, name, When(m->pat, BP_CAPTURE)->namelen) == 0)
return m; return m;
if (m->pat->type == BP_TAGGED || m->pat->type == BP_CAPTURE) if (m->pat->type == BP_TAGGED || m->pat->type == BP_CAPTURE)

View File

@ -15,6 +15,9 @@
#include "utils.h" #include "utils.h"
#include "utf8.h" #include "utf8.h"
#define Pattern(_tag, _start, _end, _min, _max, ...) allocate_pat((pat_t){.type=_tag, .start=_start, .end=_end, \
.min_matchlen=_min, .max_matchlen=_max, .__tagged._tag={__VA_ARGS__}})
static pat_t *allocated_pats = NULL; static pat_t *allocated_pats = NULL;
__attribute__((nonnull)) __attribute__((nonnull))
@ -625,7 +628,7 @@ public void delete_pat(pat_t **at_pat, bool recursive)
pat_t *pat = *at_pat; pat_t *pat = *at_pat;
if (!pat) return; if (!pat) return;
#define T(tag, ...) case tag: { auto _data = Match(pat, tag); __VA_ARGS__; break; } #define T(tag, ...) case tag: { auto _data = When(pat, tag); __VA_ARGS__; break; }
#define F(field) delete_pat(&_data->field, true) #define F(field) delete_pat(&_data->field, true)
if (recursive) { if (recursive) {
switch (pat->type) { switch (pat->type) {

View File

@ -15,9 +15,7 @@
#endif #endif
#define UNBOUNDED(pat) ((pat)->max_matchlen == -1) #define UNBOUNDED(pat) ((pat)->max_matchlen == -1)
#define Match(x, _tag) ((x)->type == _tag ? &(x)->__tagged._tag : (errx(1, __FILE__ ":%d This was supposed to be a " # _tag "\n", __LINE__), &(x)->__tagged._tag))
#define Pattern(_tag, _start, _end, _min, _max, ...) allocate_pat((pat_t){.type=_tag, .start=_start, .end=_end, \
.min_matchlen=_min, .max_matchlen=_max, .__tagged._tag={__VA_ARGS__}})
// BP virtual machine pattern types // BP virtual machine pattern types
enum pattype_e { enum pattype_e {
BP_ERROR = 0, BP_ERROR = 0,

View File

@ -200,7 +200,7 @@ public int fprint_match(FILE *out, const char *file_start, match_t *m, print_opt
{ {
int printed = 0; int printed = 0;
if (m->pat->type == BP_REPLACE) { if (m->pat->type == BP_REPLACE) {
auto rep = Match(m->pat, BP_REPLACE); auto rep = When(m->pat, BP_REPLACE);
const char *text = rep->text; const char *text = rep->text;
const char *end = &text[rep->len]; const char *end = &text[rep->len];
if (opts && opts->replace_color) printed += fprintf(out, "%s", opts->replace_color); if (opts && opts->replace_color) printed += fprintf(out, "%s", opts->replace_color);

View File

@ -20,6 +20,8 @@
__expr; \ __expr; \
}) })
#define When(x, _tag) ((x)->type == _tag ? &(x)->__tagged._tag : (errx(1, __FILE__ ":%d This was supposed to be a " # _tag "\n", __LINE__), &(x)->__tagged._tag))
#ifndef public #ifndef public
#define public __attribute__ ((visibility ("default"))) #define public __attribute__ ((visibility ("default")))
#endif #endif