Simplified backrefs by only doing direct substring matching instead of
accounting for replacement strings.
This commit is contained in:
parent
26b683ca74
commit
0f05961578
@ -64,7 +64,7 @@ Pattern | Meaning
|
||||
`5+ pat % sep` | 5 or more occurrences of `pat`, separated by `sep` (e.g. `0+ int % ","` matches `1,2,3`)
|
||||
`*pat` | 0 or more occurrences of `pat` (shorthand for `0+pat`)
|
||||
`+pat` | 1 or more occurrences of `pat` (shorthand for `1+pat`)
|
||||
`<pat` | `pat` matches just before the current position (backref)
|
||||
`<pat` | `pat` matches just before the current position (lookbehind)
|
||||
`>pat` | `pat` matches just in front of the current position (lookahead)
|
||||
`@pat` | Capture `pat` (used for text replacement and backreferences)
|
||||
`@foo=pat` | Let `foo` be the text of `pat` (used for text replacement and backreferences)
|
||||
|
@ -65,17 +65,6 @@ def_t *lookup(def_t *defs, size_t namelen, const char *name)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
//
|
||||
// Push a backreference onto the backreference stack
|
||||
//
|
||||
def_t *with_backref(def_t *defs, file_t *f, size_t namelen, const char *name, match_t *m)
|
||||
{
|
||||
// TODO: maybe calculate length? (nontrivial because of replacements)
|
||||
pat_t *backref = new_pat(f, m->start, m->end, 0, -1, BP_BACKREF);
|
||||
backref->args.backref = m;
|
||||
return with_def(defs, namelen, name, backref);
|
||||
}
|
||||
|
||||
//
|
||||
// Free all the given definitions up till (but not including) `stop`
|
||||
//
|
||||
|
@ -9,8 +9,6 @@
|
||||
|
||||
__attribute__((nonnull(3,4), returns_nonnull))
|
||||
def_t *with_def(def_t *defs, size_t namelen, const char *name, pat_t *pat);
|
||||
__attribute__((nonnull(2,4,5), returns_nonnull))
|
||||
def_t *with_backref(def_t *defs, file_t *f, size_t namelen, const char *name, match_t *m);
|
||||
__attribute__((nonnull(2)))
|
||||
def_t *load_grammar(def_t *defs, file_t *f);
|
||||
__attribute__((pure, nonnull(3)))
|
||||
|
74
match.c
74
match.c
@ -11,6 +11,7 @@
|
||||
|
||||
#include "definitions.h"
|
||||
#include "match.h"
|
||||
#include "pattern.h"
|
||||
#include "types.h"
|
||||
#include "utils.h"
|
||||
#include "utf8.h"
|
||||
@ -41,8 +42,6 @@ static inline pat_t *deref(def_t *defs, pat_t *pat);
|
||||
__attribute__((returns_nonnull))
|
||||
static match_t *new_match(pat_t *pat, const char *start, const char *end, match_t *child);
|
||||
__attribute__((nonnull))
|
||||
static const char *match_backref(const char *str, match_t *cap, bool ignorecase);
|
||||
__attribute__((nonnull))
|
||||
static match_t *get_capture_by_num(match_t *m, int *n);
|
||||
__attribute__((nonnull, pure))
|
||||
static match_t *get_capture_by_name(match_t *m, const char *name);
|
||||
@ -62,62 +61,6 @@ static inline pat_t *deref(def_t *defs, pat_t *pat)
|
||||
return pat;
|
||||
}
|
||||
|
||||
//
|
||||
// Attempt to match text against a previously captured value.
|
||||
// Return the character position after the backref has matched, or NULL if no match has occurred.
|
||||
//
|
||||
static const char *match_backref(const char *str, match_t *cap, bool ignorecase)
|
||||
{
|
||||
if (cap->pat->type == BP_REPLACE) {
|
||||
const char *text = cap->pat->args.replace.text;
|
||||
const char *end = &text[cap->pat->args.replace.len];
|
||||
for (const char *r = text; r < end; ) {
|
||||
if (*r == '\\') {
|
||||
++r;
|
||||
if (*(str++) != unescapechar(r, &r))
|
||||
return NULL;
|
||||
} else if (*r != '@') {
|
||||
if (*(str++) != *r)
|
||||
return NULL;
|
||||
++r;
|
||||
continue;
|
||||
}
|
||||
|
||||
++r;
|
||||
match_t *value = get_capture(cap, &r);
|
||||
if (value != NULL) {
|
||||
str = match_backref(str, value, ignorecase);
|
||||
if (str == NULL) return NULL;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
const char *prev = cap->start;
|
||||
for (match_t *child = cap->child; child; child = child->nextsibling) {
|
||||
if (child->start > prev) {
|
||||
size_t len = (size_t)(child->start - prev);
|
||||
if ((ignorecase ? memicmp : memcmp)(str, prev, len) != 0) {
|
||||
return NULL;
|
||||
}
|
||||
str += len;
|
||||
prev = child->start;
|
||||
}
|
||||
if (child->start < prev) continue;
|
||||
str = match_backref(str, child, ignorecase);
|
||||
if (str == NULL) return NULL;
|
||||
prev = child->end;
|
||||
}
|
||||
if (cap->end > prev) {
|
||||
size_t len = (size_t)(cap->end - prev);
|
||||
if ((ignorecase ? memicmp : memcmp)(str, prev, len) != 0) {
|
||||
return NULL;
|
||||
}
|
||||
str += len;
|
||||
}
|
||||
}
|
||||
return str;
|
||||
}
|
||||
|
||||
|
||||
//
|
||||
// Find the next match after prev (or the first match if prev is NULL)
|
||||
//
|
||||
@ -341,9 +284,14 @@ static match_t *match(def_t *defs, file_t *f, const char *str, pat_t *pat, bool
|
||||
match_t *m2;
|
||||
{ // Push backrefs and run matching, then cleanup
|
||||
def_t *defs2 = defs;
|
||||
if (m1->pat->type == BP_CAPTURE && m1->pat->args.capture.name)
|
||||
defs2 = with_backref(defs2, f, m1->pat->args.capture.namelen, m1->pat->args.capture.name, m1);
|
||||
// def_t *defs2 = with_backrefs(defs, f, m1);
|
||||
if (m1->pat->type == BP_CAPTURE && m1->pat->args.capture.name) {
|
||||
// Temporarily add a rule that the backref name matches the
|
||||
// exact string of the original match (no replacements)
|
||||
ssize_t len = (ssize_t)(m1->end - m1->start);
|
||||
pat_t *backref = new_pat(f, m1->start, m1->end, len, len, BP_STRING);
|
||||
backref->args.string = m1->start;
|
||||
defs2 = with_def(defs, pat->args.ref.len, pat->args.ref.name, backref);
|
||||
}
|
||||
m2 = match(defs2, f, m1->end, pat->args.multiple.second, ignorecase);
|
||||
free_defs(&defs2, defs);
|
||||
}
|
||||
@ -440,10 +388,6 @@ static match_t *match(def_t *defs, file_t *f, const char *str, pat_t *pat, bool
|
||||
// OPTIMIZE: remove this if necessary
|
||||
return new_match(pat, m->start, m->end, m);
|
||||
}
|
||||
case BP_BACKREF: {
|
||||
const char *end = match_backref(str, pat->args.backref, ignorecase);
|
||||
return end ? new_match(pat, str, end, NULL) : NULL;
|
||||
}
|
||||
case BP_NODENT: {
|
||||
if (*str != '\n') return NULL;
|
||||
const char *start = str;
|
||||
|
Loading…
Reference in New Issue
Block a user