From 85f6cb8e769fa9bb4e90dcf93c02b061401f1ad1 Mon Sep 17 00:00:00 2001 From: Bruce Hill Date: Sat, 17 Jul 2021 15:25:24 -0700 Subject: Performance optimization for common case where pattern starts with string --- match.c | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) (limited to 'match.c') diff --git a/match.c b/match.c index 2ab3a90..7edf36f 100644 --- a/match.c +++ b/match.c @@ -61,11 +61,35 @@ static inline pat_t *deref(def_t *defs, pat_t *pat) return pat; } +// +// Find and return the first string literal to be matched (if any) +// +static pat_t *first_literal(def_t *defs, pat_t *pat) +{ + for (pat_t *p = pat; p; ) { + if (p->type == BP_STRING) + return p; + else if (p->type == BP_BEFORE) + p = p->args.pat; + else if (p->type == BP_CAPTURE) + p = p->args.capture.capture_pat; + else if (p->type == BP_CHAIN) + p = p->args.multiple.first; + else if (p->type == BP_REPLACE) + p = p->args.replace.pat; + else if (p->type == BP_REF) + p = deref(defs, p); + else break; + } + return NULL; +} + // // Find the next match after prev (or the first match if prev is NULL) // match_t *next_match(def_t *defs, file_t *f, match_t *prev, pat_t *pat, pat_t *skip, bool ignorecase) { + pat = deref(defs, pat); const char *str; if (prev) { str = prev->end > prev->start ? prev->end : prev->end + 1; @@ -74,6 +98,25 @@ match_t *next_match(def_t *defs, file_t *f, match_t *prev, pat_t *pat, pat_t *sk str = f->start; } bool only_start = pat->type == BP_START_OF_FILE || (pat->type == BP_CHAIN && pat->args.multiple.first->type == BP_START_OF_FILE); + + // Performance optimization: if the pattern starts with a string literal, + // we can just rely on the highly optimized strstr()/strcasestr() + // implementations to skip past areas where we know we won't find a match. + pat_t *first_str = first_literal(defs, pat); + if (first_str) { + for (size_t i = 0; i < first_str->min_matchlen; i++) + if (first_str->args.string[i] == '\0') + goto pattern_search; + char *tmp = strndup(first_str->args.string, first_str->min_matchlen); + char *found = (ignorecase ? strcasestr : strstr)(str, tmp); + if (found) + str = found; + else if (&str[strlen(str)] == f->end) + str = f->end+1; + free(tmp); + } + + pattern_search: while (str <= f->end) { match_t *m = match(defs, f, str, pat, ignorecase); if (m) return m; -- cgit v1.2.3