diff --git a/README.md b/README.md index 94f432f..9959a8a 100644 --- a/README.md +++ b/README.md @@ -64,7 +64,7 @@ Pattern | Meaning `5+ pat % sep` | 5 or more occurrences of `pat`, separated by `sep` (e.g. `0+ int % ","` matches `1,2,3`) `*pat` | 0 or more occurrences of `pat` (shorthand for `0+pat`) `+pat` | 1 or more occurrences of `pat` (shorthand for `1+pat`) -`pat` | `pat` matches just in front of the current position (lookahead) `@pat` | Capture `pat` (used for text replacement and backreferences) `@foo=pat` | Let `foo` be the text of `pat` (used for text replacement and backreferences) diff --git a/definitions.c b/definitions.c index 3e5f0e2..18bd612 100644 --- a/definitions.c +++ b/definitions.c @@ -65,17 +65,6 @@ def_t *lookup(def_t *defs, size_t namelen, const char *name) return NULL; } -// -// Push a backreference onto the backreference stack -// -def_t *with_backref(def_t *defs, file_t *f, size_t namelen, const char *name, match_t *m) -{ - // TODO: maybe calculate length? (nontrivial because of replacements) - pat_t *backref = new_pat(f, m->start, m->end, 0, -1, BP_BACKREF); - backref->args.backref = m; - return with_def(defs, namelen, name, backref); -} - // // Free all the given definitions up till (but not including) `stop` // diff --git a/definitions.h b/definitions.h index 6017b59..d5b108d 100644 --- a/definitions.h +++ b/definitions.h @@ -9,8 +9,6 @@ __attribute__((nonnull(3,4), returns_nonnull)) def_t *with_def(def_t *defs, size_t namelen, const char *name, pat_t *pat); -__attribute__((nonnull(2,4,5), returns_nonnull)) -def_t *with_backref(def_t *defs, file_t *f, size_t namelen, const char *name, match_t *m); __attribute__((nonnull(2))) def_t *load_grammar(def_t *defs, file_t *f); __attribute__((pure, nonnull(3))) diff --git a/match.c b/match.c index f7ad3ad..514d8ca 100644 --- a/match.c +++ b/match.c @@ -11,6 +11,7 @@ #include "definitions.h" #include "match.h" +#include "pattern.h" #include "types.h" #include "utils.h" #include "utf8.h" @@ -41,8 +42,6 @@ static inline pat_t *deref(def_t *defs, pat_t *pat); __attribute__((returns_nonnull)) static match_t *new_match(pat_t *pat, const char *start, const char *end, match_t *child); __attribute__((nonnull)) -static const char *match_backref(const char *str, match_t *cap, bool ignorecase); -__attribute__((nonnull)) static match_t *get_capture_by_num(match_t *m, int *n); __attribute__((nonnull, pure)) static match_t *get_capture_by_name(match_t *m, const char *name); @@ -62,62 +61,6 @@ static inline pat_t *deref(def_t *defs, pat_t *pat) return pat; } -// -// Attempt to match text against a previously captured value. -// Return the character position after the backref has matched, or NULL if no match has occurred. -// -static const char *match_backref(const char *str, match_t *cap, bool ignorecase) -{ - if (cap->pat->type == BP_REPLACE) { - const char *text = cap->pat->args.replace.text; - const char *end = &text[cap->pat->args.replace.len]; - for (const char *r = text; r < end; ) { - if (*r == '\\') { - ++r; - if (*(str++) != unescapechar(r, &r)) - return NULL; - } else if (*r != '@') { - if (*(str++) != *r) - return NULL; - ++r; - continue; - } - - ++r; - match_t *value = get_capture(cap, &r); - if (value != NULL) { - str = match_backref(str, value, ignorecase); - if (str == NULL) return NULL; - } - } - } else { - const char *prev = cap->start; - for (match_t *child = cap->child; child; child = child->nextsibling) { - if (child->start > prev) { - size_t len = (size_t)(child->start - prev); - if ((ignorecase ? memicmp : memcmp)(str, prev, len) != 0) { - return NULL; - } - str += len; - prev = child->start; - } - if (child->start < prev) continue; - str = match_backref(str, child, ignorecase); - if (str == NULL) return NULL; - prev = child->end; - } - if (cap->end > prev) { - size_t len = (size_t)(cap->end - prev); - if ((ignorecase ? memicmp : memcmp)(str, prev, len) != 0) { - return NULL; - } - str += len; - } - } - return str; -} - - // // Find the next match after prev (or the first match if prev is NULL) // @@ -341,9 +284,14 @@ static match_t *match(def_t *defs, file_t *f, const char *str, pat_t *pat, bool match_t *m2; { // Push backrefs and run matching, then cleanup def_t *defs2 = defs; - if (m1->pat->type == BP_CAPTURE && m1->pat->args.capture.name) - defs2 = with_backref(defs2, f, m1->pat->args.capture.namelen, m1->pat->args.capture.name, m1); - // def_t *defs2 = with_backrefs(defs, f, m1); + if (m1->pat->type == BP_CAPTURE && m1->pat->args.capture.name) { + // Temporarily add a rule that the backref name matches the + // exact string of the original match (no replacements) + ssize_t len = (ssize_t)(m1->end - m1->start); + pat_t *backref = new_pat(f, m1->start, m1->end, len, len, BP_STRING); + backref->args.string = m1->start; + defs2 = with_def(defs, pat->args.ref.len, pat->args.ref.name, backref); + } m2 = match(defs2, f, m1->end, pat->args.multiple.second, ignorecase); free_defs(&defs2, defs); } @@ -440,10 +388,6 @@ static match_t *match(def_t *defs, file_t *f, const char *str, pat_t *pat, bool // OPTIMIZE: remove this if necessary return new_match(pat, m->start, m->end, m); } - case BP_BACKREF: { - const char *end = match_backref(str, pat->args.backref, ignorecase); - return end ? new_match(pat, str, end, NULL) : NULL; - } case BP_NODENT: { if (*str != '\n') return NULL; const char *start = str; diff --git a/types.h b/types.h index 96e0727..78232ef 100644 --- a/types.h +++ b/types.h @@ -28,7 +28,6 @@ enum pattype_e { BP_NOT_MATCH, BP_REPLACE, BP_REF, - BP_BACKREF, BP_NODENT, BP_START_OF_FILE, BP_START_OF_LINE,