Performance optimization for common case where pattern starts with
string
This commit is contained in:
parent
ad85fb1da5
commit
85f6cb8e76
@ -14,9 +14,9 @@ void-element: `< ("area"/"base"/"br"/"col"/"embed"/"hr"/"img"/"input"/"link"/"me
|
||||
|
||||
template-element: "<template>" ..%(\n / comment / element) "</template>"
|
||||
|
||||
raw-element: `< @tag=("script"/"style"/"textarea"/"title") __attributes__ `> ..%\n ("</"tag__`>)
|
||||
raw-element: `< @tag=("script"/"style"/"textarea"/"title") __attributes__ `> ..%\n "</"tag__`>
|
||||
|
||||
normal-element: `< @tag=id __attributes__ `> ..%(\n / comment / element) ("</"tag`>)
|
||||
normal-element: `< @tag=id __attributes__ `> ..%(\n / comment / element) "</"tag__`>
|
||||
|
||||
comment: "<!--" ..%\n "-->"
|
||||
|
||||
|
43
match.c
43
match.c
@ -61,11 +61,35 @@ static inline pat_t *deref(def_t *defs, pat_t *pat)
|
||||
return pat;
|
||||
}
|
||||
|
||||
//
|
||||
// Find and return the first string literal to be matched (if any)
|
||||
//
|
||||
static pat_t *first_literal(def_t *defs, pat_t *pat)
|
||||
{
|
||||
for (pat_t *p = pat; p; ) {
|
||||
if (p->type == BP_STRING)
|
||||
return p;
|
||||
else if (p->type == BP_BEFORE)
|
||||
p = p->args.pat;
|
||||
else if (p->type == BP_CAPTURE)
|
||||
p = p->args.capture.capture_pat;
|
||||
else if (p->type == BP_CHAIN)
|
||||
p = p->args.multiple.first;
|
||||
else if (p->type == BP_REPLACE)
|
||||
p = p->args.replace.pat;
|
||||
else if (p->type == BP_REF)
|
||||
p = deref(defs, p);
|
||||
else break;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
//
|
||||
// Find the next match after prev (or the first match if prev is NULL)
|
||||
//
|
||||
match_t *next_match(def_t *defs, file_t *f, match_t *prev, pat_t *pat, pat_t *skip, bool ignorecase)
|
||||
{
|
||||
pat = deref(defs, pat);
|
||||
const char *str;
|
||||
if (prev) {
|
||||
str = prev->end > prev->start ? prev->end : prev->end + 1;
|
||||
@ -74,6 +98,25 @@ match_t *next_match(def_t *defs, file_t *f, match_t *prev, pat_t *pat, pat_t *sk
|
||||
str = f->start;
|
||||
}
|
||||
bool only_start = pat->type == BP_START_OF_FILE || (pat->type == BP_CHAIN && pat->args.multiple.first->type == BP_START_OF_FILE);
|
||||
|
||||
// Performance optimization: if the pattern starts with a string literal,
|
||||
// we can just rely on the highly optimized strstr()/strcasestr()
|
||||
// implementations to skip past areas where we know we won't find a match.
|
||||
pat_t *first_str = first_literal(defs, pat);
|
||||
if (first_str) {
|
||||
for (size_t i = 0; i < first_str->min_matchlen; i++)
|
||||
if (first_str->args.string[i] == '\0')
|
||||
goto pattern_search;
|
||||
char *tmp = strndup(first_str->args.string, first_str->min_matchlen);
|
||||
char *found = (ignorecase ? strcasestr : strstr)(str, tmp);
|
||||
if (found)
|
||||
str = found;
|
||||
else if (&str[strlen(str)] == f->end)
|
||||
str = f->end+1;
|
||||
free(tmp);
|
||||
}
|
||||
|
||||
pattern_search:
|
||||
while (str <= f->end) {
|
||||
match_t *m = match(defs, f, str, pat, ignorecase);
|
||||
if (m) return m;
|
||||
|
Loading…
Reference in New Issue
Block a user