diff options
| -rw-r--r-- | Lua/lbp.c | 17 | ||||
| -rw-r--r-- | bp.c | 51 | ||||
| -rw-r--r-- | grammars/html.bp | 4 | ||||
| -rw-r--r-- | match.c | 51 | ||||
| -rw-r--r-- | match.h | 4 | ||||
| -rw-r--r-- | pattern.c | 40 | ||||
| -rw-r--r-- | pattern.h | 1 |
7 files changed, 81 insertions, 87 deletions
@@ -134,13 +134,11 @@ static int Lmatch(lua_State *L) match_t *m = NULL; int ret = 0; - pat_t *def_pat = chain_together(builtins, pat); - if (next_match(&m, text+index-1, &text[textlen], def_pat, NULL, false)) { + if (next_match(&m, text+index-1, &text[textlen], pat, builtins, NULL, false)) { push_match(L, m, text); stop_matching(&m); ret = 1; } - delete_pat(&def_pat, false); return ret; } @@ -173,8 +171,8 @@ static int Lreplace(lua_State *L) FILE *out = open_memstream(&buf, &size); int replacements = 0; const char *prev = text; - pat_t *rep_pat = chain_together(builtins, maybe_replacement.value.pat); - for (match_t *m = NULL; next_match(&m, text, &text[textlen], rep_pat, NULL, false); ) { + pat_t *rep_pat = maybe_replacement.value.pat; + for (match_t *m = NULL; next_match(&m, text, &text[textlen], rep_pat, builtins, NULL, false); ) { fwrite(prev, sizeof(char), (size_t)(m->start - prev), out); fprint_match(out, text, m, NULL); prev = m->end; @@ -186,7 +184,7 @@ static int Lreplace(lua_State *L) lua_pushinteger(L, replacements); fclose(out); - delete_pat(&maybe_replacement.value.pat, false); + delete_pat(&rep_pat, false); return 2; } @@ -247,9 +245,10 @@ static int Lpat_tostring(lua_State *L) static int Lpat_gc(lua_State *L) { (void)L; - // pat_t **at_pat = lua_touserdata(L, 1); - // pat_t *pat = *at_pat; - // if (pat) delete_pat(at_pat, true); + pat_t **at_pat = lua_touserdata(L, 1); + pat_t *pat = *at_pat; + if (pat) delete_pat(at_pat, true); + (void)pat; return 0; } @@ -207,10 +207,10 @@ static int is_text_file(const char *filename) // // Print matches in JSON format. // -static int print_matches_as_json(file_t *f, pat_t *pattern) +static int print_matches_as_json(file_t *f, pat_t *pattern, pat_t *defs) { int nmatches = 0; - for (match_t *m = NULL; next_match(&m, f->start, f->end, pattern, options.skip, options.ignorecase); ) { + for (match_t *m = NULL; next_match(&m, f->start, f->end, pattern, defs, options.skip, options.ignorecase); ) { if (++nmatches > 1) printf(",\n"); printf("{\"filename\":\"%s\",\"match\":", f->filename); @@ -223,10 +223,10 @@ static int print_matches_as_json(file_t *f, pat_t *pattern) // // Print matches in a visual explanation style // -static int explain_matches(file_t *f, pat_t *pattern) +static int explain_matches(file_t *f, pat_t *pattern, pat_t *defs) { int nmatches = 0; - for (match_t *m = NULL; next_match(&m, f->start, f->end, pattern, options.skip, options.ignorecase); ) { + for (match_t *m = NULL; next_match(&m, f->start, f->end, pattern, defs, options.skip, options.ignorecase); ) { if (++nmatches == 1) { if (options.print_filenames) fprint_filename(stdout, f->filename); @@ -349,7 +349,7 @@ static void on_nl(FILE *out) // // Print all the matches in a file. // -static int print_matches(FILE *out, file_t *f, pat_t *pattern) +static int print_matches(FILE *out, file_t *f, pat_t *pattern, pat_t *defs) { static int printed_filenames = 0; int matches = 0; @@ -364,7 +364,7 @@ static int print_matches(FILE *out, file_t *f, pat_t *pattern) print_opts.replace_color = "\033[0;34;1m"; print_opts.normal_color = "\033[m"; } - for (match_t *m = NULL; next_match(&m, f->start, f->end, pattern, options.skip, options.ignorecase); ) { + for (match_t *m = NULL; next_match(&m, f->start, f->end, pattern, defs, options.skip, options.ignorecase); ) { if (++matches == 1 && options.print_filenames) { if (printed_filenames++ > 0) printf("\n"); fprint_filename(out, f->filename); @@ -394,7 +394,7 @@ static int print_matches(FILE *out, file_t *f, pat_t *pattern) // against it, printing any results according to the flags. // __attribute__((nonnull)) -static int process_file(const char *filename, pat_t *pattern) +static int process_file(const char *filename, pat_t *pattern, pat_t *defs) { file_t *f = load_file(NULL, filename); if (f == NULL) { @@ -404,19 +404,19 @@ static int process_file(const char *filename, pat_t *pattern) int matches = 0; if (options.mode == MODE_EXPLAIN) { - matches += explain_matches(f, pattern); + matches += explain_matches(f, pattern, defs); } else if (options.mode == MODE_LISTFILES) { match_t *m = NULL; - if (next_match(&m, f->start, f->end, pattern, options.skip, options.ignorecase)) { + if (next_match(&m, f->start, f->end, pattern, defs, options.skip, options.ignorecase)) { printf("%s\n", f->filename); matches += 1; } stop_matching(&m); } else if (options.mode == MODE_JSON) { - matches += print_matches_as_json(f, pattern); + matches += print_matches_as_json(f, pattern, defs); } else if (options.mode == MODE_INPLACE) { match_t *m = NULL; - bool found = next_match(&m, f->start, f->end, pattern, options.skip, options.ignorecase); + bool found = next_match(&m, f->start, f->end, pattern, defs, options.skip, options.ignorecase); stop_matching(&m); if (!found) return 0; @@ -432,12 +432,12 @@ static int process_file(const char *filename, pat_t *pattern) // are used to restore the original file contents. modifying_file = out; backup_file = f; { - matches += print_matches(out, f, pattern); + matches += print_matches(out, f, pattern, defs); } modifying_file = NULL; backup_file = NULL; fclose(out); } else { - matches += print_matches(stdout, f, pattern); + matches += print_matches(stdout, f, pattern, defs); } fflush(stdout); @@ -452,7 +452,7 @@ static int process_file(const char *filename, pat_t *pattern) // Recursively process all non-dotfile files in the given directory. // __attribute__((nonnull)) -static int process_dir(const char *dirname, pat_t *pattern) +static int process_dir(const char *dirname, pat_t *pattern, pat_t *defs) { int matches = 0; glob_t globbuf; @@ -469,9 +469,9 @@ static int process_dir(const char *dirname, pat_t *pattern) if (S_ISLNK(statbuf.st_mode)) continue; // Skip symbolic links else if (S_ISDIR(statbuf.st_mode)) - matches += process_dir(globbuf.gl_pathv[i], pattern); + matches += process_dir(globbuf.gl_pathv[i], pattern, defs); else if (is_text_file(globbuf.gl_pathv[i])) - matches += process_file(globbuf.gl_pathv[i], pattern); + matches += process_file(globbuf.gl_pathv[i], pattern, defs); } } globfree(&globbuf); @@ -482,7 +482,7 @@ static int process_dir(const char *dirname, pat_t *pattern) // Process git files using `git ls-files ...` // __attribute__((nonnull(1))) -static int process_git_files(pat_t *pattern, int argc, char *argv[]) +static int process_git_files(pat_t *pattern, pat_t *defs, int argc, char *argv[]) { int fds[2]; require(pipe(fds), "Failed to create pipe"); @@ -505,7 +505,7 @@ static int process_git_files(pat_t *pattern, int argc, char *argv[]) size_t path_size = 0; int found = 0; while (getdelim(&path, &path_size, '\0', fp) > 0) - found += process_file(path, pattern); + found += process_file(path, pattern, defs); if (path) delete(&path); require(fclose(fp), "Failed to close read end of pipe"); int status; @@ -521,7 +521,7 @@ static int process_git_files(pat_t *pattern, int argc, char *argv[]) // static pat_t *load_grammar(pat_t *defs, file_t *f) { - return chain_together(assert_pat(f->start, f->end, bp_pattern(f->start, f->end)), defs); + return chain_together(defs, assert_pat(f->start, f->end, bp_pattern(f->start, f->end))); } // @@ -636,9 +636,6 @@ int main(int argc, char *argv[]) if (pattern == NULL) errx(EXIT_FAILURE, "No pattern provided.\n\n%s", usage); - // Hook up definitions: - pattern = chain_together(defs, pattern); - for (argc = 0; argv[argc]; ++argc) ; // update argc if (options.context_before == USE_DEFAULT_CONTEXT) options.context_before = 0; @@ -660,7 +657,7 @@ int main(int argc, char *argv[]) int found = 0; if (options.mode == MODE_JSON) printf("["); if (options.git_mode) { // Get the list of files from `git --ls-files ...` - found = process_git_files(pattern, argc, argv); + found = process_git_files(pattern, defs, argc, argv); } else if (argv[0]) { // Files pass in as command line args: struct stat statbuf; @@ -668,17 +665,17 @@ int main(int argc, char *argv[]) options.print_filenames = false; for ( ; argv[0]; argv++) { if (stat(argv[0], &statbuf) == 0 && S_ISDIR(statbuf.st_mode)) // Symlinks are okay if manually specified - found += process_dir(argv[0], pattern); + found += process_dir(argv[0], pattern, defs); else - found += process_file(argv[0], pattern); + found += process_file(argv[0], pattern, defs); } } else if (isatty(STDIN_FILENO)) { // No files, no piped in input, so use files in current dir, recursively - found += process_dir(".", pattern); + found += process_dir(".", pattern, defs); } else { // Piped in input: options.print_filenames = false; // Don't print filename on stdin - found += process_file("", pattern); + found += process_file("", pattern, defs); } if (options.mode == MODE_JSON) printf("]\n"); diff --git a/grammars/html.bp b/grammars/html.bp index 29e4566..1ed83df 100644 --- a/grammars/html.bp +++ b/grammars/html.bp @@ -14,9 +14,9 @@ void-element: `< ("area"/"base"/"br"/"col"/"embed"/"hr"/"img"/"input"/"link"/"me template-element: "<template>" ..%(\n / comment / element) "</template>" -raw-element: `< @tag=("script"/"style"/"textarea"/"title") __attributes__ `> ..%\n "</"tag__`> +raw-element: `< @tag=("script"/"style"/"textarea"/"title") __attributes__ `> ..%\n ("</"tag__`>) -normal-element: `< @tag=id __attributes__ `> ..%(\n / comment / element) "</"tag__`> +normal-element: `< @tag=id __attributes__ `> ..%(\n / comment / element) ("</"tag__`>) comment: "<!--" ..%\n "-->" @@ -192,16 +192,37 @@ void cache_destroy(match_ctx_t *ctx) } // -// Look up a pattern definition by name. +// Look up a pattern definition by name from a definition pattern. +// +__attribute__((nonnull(2))) +static pat_t *_lookup_def(pat_t *defs, const char *name, size_t namelen) +{ + while (defs) { + if (defs->type == BP_CHAIN) { + pat_t *second = _lookup_def(defs->args.multiple.second, name, namelen); + if (second) return second; + defs = defs->args.multiple.first; + } else if (defs->type == BP_DEFINITIONS) { + if (namelen == defs->args.def.namelen && strncmp(defs->args.def.name, name, namelen) == 0) + return defs->args.def.meaning; + defs = defs->args.def.next_def; + } else { + errx(1, "Invalid pattern type in definitions"); + } + } + return NULL; +} + +// +// Look up a pattern definition by name from a context. // __attribute__((nonnull)) -pat_t *lookup(match_ctx_t *ctx, const char *name, size_t namelen) +pat_t *lookup_ctx(match_ctx_t *ctx, const char *name, size_t namelen) { - for (pat_t *def = ctx->defs; def; def = def->args.def.next_def) { - if (namelen == def->args.def.namelen && strncmp(def->args.def.name, name, namelen) == 0) - return def->args.def.meaning; + for (; ctx; ctx = ctx->parent_ctx) { + pat_t *def = _lookup_def(ctx->defs, name, namelen); + if (def) return def; } - if (ctx->parent_ctx) return lookup(ctx->parent_ctx, name, namelen); return NULL; } @@ -213,7 +234,7 @@ __attribute__((nonnull(1))) static inline pat_t *deref(match_ctx_t *ctx, pat_t *pat) { if (pat && pat->type == BP_REF) { - pat_t *def = lookup(ctx, pat->args.ref.name, pat->args.ref.len); + pat_t *def = lookup_ctx(ctx, pat->args.ref.name, pat->args.ref.len); if (def) return def; } return pat; @@ -266,17 +287,6 @@ static pat_t *get_prerequisite(match_ctx_t *ctx, pat_t *pat) __attribute__((nonnull(1,2,3))) static match_t *_next_match(match_ctx_t *ctx, const char *str, pat_t *pat, pat_t *skip) { - if (pat->type == BP_DEFINITIONS || (pat->type == BP_CHAIN && pat->args.multiple.first->type == BP_DEFINITIONS)) { - match_ctx_t ctx2 = *ctx; - ctx2.cache = &(cache_t){0}; - ctx2.parent_ctx = ctx; - ctx2.defs = pat->type == BP_DEFINITIONS ? pat : pat->args.multiple.first; - pat_t *match_pat = pat->type == BP_DEFINITIONS ? pat->args.def.meaning : pat->args.multiple.second; - match_t *m = _next_match(&ctx2, str, match_pat, skip); - cache_destroy(&ctx2); - return m; - } - // Clear the cache so it's not full of old cache values from different parts of the file: cache_destroy(ctx); @@ -618,7 +628,7 @@ static match_t *match(match_ctx_t *ctx, const char *str, pat_t *pat) if (has_cached_failure(ctx, str, pat)) return NULL; - pat_t *ref = lookup(ctx, pat->args.ref.name, pat->args.ref.len); + pat_t *ref = lookup_ctx(ctx, pat->args.ref.name, pat->args.ref.len); if (ref == NULL) errx(EXIT_FAILURE, "Unknown identifier: '%.*s'", (int)pat->args.ref.len, pat->args.ref.name); @@ -787,7 +797,7 @@ size_t free_all_matches(void) // Iterate over matches. // Usage: for (match_t *m = NULL; next_match(&m, ...); ) {...} // -bool next_match(match_t **m, const char *start, const char *end, pat_t *pat, pat_t *skip, bool ignorecase) +bool next_match(match_t **m, const char *start, const char *end, pat_t *pat, pat_t *defs, pat_t *skip, bool ignorecase) { const char *pos; if (*m) { @@ -805,6 +815,7 @@ bool next_match(match_t **m, const char *start, const char *end, pat_t *pat, pat .start = start, .end = end, .ignorecase = ignorecase, + .defs = defs, }; *m = (pos <= end) ? _next_match(&ctx, pos, pat, skip) : NULL; cache_destroy(&ctx); @@ -28,8 +28,8 @@ __attribute__((nonnull)) void recycle_match(match_t **at_m); size_t free_all_matches(void); size_t recycle_all_matches(void); -bool next_match(match_t **m, const char *start, const char *end, pat_t *pat, pat_t *skip, bool ignorecase); -#define stop_matching(m) next_match(m, NULL, NULL, NULL, NULL, 0) +bool next_match(match_t **m, const char *start, const char *end, pat_t *pat, pat_t *defs, pat_t *skip, bool ignorecase); +#define stop_matching(m) next_match(m, NULL, NULL, NULL, NULL, NULL, 0) __attribute__((nonnull)) match_t *get_numbered_capture(match_t *m, int n); __attribute__((nonnull, pure)) @@ -169,11 +169,10 @@ pat_t *chain_together(pat_t *first, pat_t *second) if (second == NULL) return first; if (first->type == BP_DEFINITIONS && second->type == BP_DEFINITIONS) { - pat_t *second_end = second; - while (second_end->args.def.next_def != NULL) - second_end = second_end->args.def.next_def; - second_end->args.def.next_def = first; - return second; + pat_t *chain = new_pat(BP_CHAIN, first->start, second->end, second->min_matchlen, second->max_matchlen); + chain->args.multiple.first = first; + chain->args.multiple.second = second; + return chain; } size_t minlen = first->min_matchlen + second->min_matchlen; @@ -181,23 +180,6 @@ pat_t *chain_together(pat_t *first, pat_t *second) pat_t *chain = new_pat(BP_CHAIN, first->start, second->end, minlen, maxlen); chain->args.multiple.first = first; chain->args.multiple.second = second; - - // If `first` is an UPTO operator (..) or contains one, then let it know - // that `second` is what it's up *to*. - for (pat_t *p = first; p; ) { - if (p->type == BP_UPTO || p->type == BP_UPTO_STRICT) { - p->args.multiple.first = second; - p->min_matchlen = second->min_matchlen; - p->max_matchlen = -1; - break; - } else if (p->type == BP_CAPTURE) { - p = p->args.capture.capture_pat; - } else if (p->type == BP_CHAIN) { - p = p->args.multiple.second; - } else if (p->type == BP_MATCH || p->type == BP_NOT_MATCH) { - p = p->args.multiple.first; - } else break; - } return chain; } @@ -246,7 +228,7 @@ static pat_t *_bp_definition(const char *start, const char *end) // Compile a string of BP code into a BP pattern object. // __attribute__((nonnull)) -static pat_t *_bp_simplepattern(const char *str, const char *end) +static pat_t *_bp_simplepattern(const char *str, const char *end, bool inside_stringpattern) { str = after_spaces(str, false, end); if (!*str) return NULL; @@ -272,6 +254,12 @@ static pat_t *_bp_simplepattern(const char *str, const char *end) } pat_t *upto = new_pat(type, start, extra_arg ? extra_arg->end : str, 0, -1); upto->args.multiple.second = extra_arg; + if (inside_stringpattern) { + maybe_pat_t target = bp_stringpattern(upto->end, end); + upto->args.multiple.first = target.success ? target.value.pat : NULL; + } else { + upto->args.multiple.first = bp_simplepattern(upto->end, end); + } return upto; } else { return new_pat(BP_ANYCHAR, start, str, 1, UTF8_MAXCHARLEN); @@ -536,9 +524,9 @@ maybe_pat_t bp_stringpattern(const char *str, const char *end) for (; str < end; str = next_char(str, end)) { if (*str == '\\' && str+1 < end) { if (str[1] == '\\' || isalnum(str[1])) - interp = bp_simplepattern(str, end); + interp = _bp_simplepattern(str, end, true); else - interp = bp_simplepattern(str+1, end); + interp = _bp_simplepattern(str+1, end, true); if (interp) break; // If there is no interpolated value, this is just a plain ol' regular backslash } @@ -567,7 +555,7 @@ maybe_pat_t bp_stringpattern(const char *str, const char *end) // static pat_t *bp_simplepattern(const char *str, const char *end) { - pat_t *pat = _bp_simplepattern(str, end); + pat_t *pat = _bp_simplepattern(str, end, false); if (pat == NULL) return pat; str = pat->end; @@ -120,6 +120,5 @@ void free_all_pats(void); __attribute__((nonnull)) void delete_pat(pat_t **at_pat, bool recursive); - #endif // vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0 |
