Simplified things by passing a def argument to next_match instead of
chaining defs together. Also simplified `..` by just using a lookahead instead of retconning it. Immutability invariants are now enforced better.
This commit is contained in:
parent
e5c0d09893
commit
24ed834317
17
Lua/lbp.c
17
Lua/lbp.c
@ -134,13 +134,11 @@ static int Lmatch(lua_State *L)
|
||||
|
||||
match_t *m = NULL;
|
||||
int ret = 0;
|
||||
pat_t *def_pat = chain_together(builtins, pat);
|
||||
if (next_match(&m, text+index-1, &text[textlen], def_pat, NULL, false)) {
|
||||
if (next_match(&m, text+index-1, &text[textlen], pat, builtins, NULL, false)) {
|
||||
push_match(L, m, text);
|
||||
stop_matching(&m);
|
||||
ret = 1;
|
||||
}
|
||||
delete_pat(&def_pat, false);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -173,8 +171,8 @@ static int Lreplace(lua_State *L)
|
||||
FILE *out = open_memstream(&buf, &size);
|
||||
int replacements = 0;
|
||||
const char *prev = text;
|
||||
pat_t *rep_pat = chain_together(builtins, maybe_replacement.value.pat);
|
||||
for (match_t *m = NULL; next_match(&m, text, &text[textlen], rep_pat, NULL, false); ) {
|
||||
pat_t *rep_pat = maybe_replacement.value.pat;
|
||||
for (match_t *m = NULL; next_match(&m, text, &text[textlen], rep_pat, builtins, NULL, false); ) {
|
||||
fwrite(prev, sizeof(char), (size_t)(m->start - prev), out);
|
||||
fprint_match(out, text, m, NULL);
|
||||
prev = m->end;
|
||||
@ -186,7 +184,7 @@ static int Lreplace(lua_State *L)
|
||||
lua_pushinteger(L, replacements);
|
||||
fclose(out);
|
||||
|
||||
delete_pat(&maybe_replacement.value.pat, false);
|
||||
delete_pat(&rep_pat, false);
|
||||
|
||||
return 2;
|
||||
}
|
||||
@ -247,9 +245,10 @@ static int Lpat_tostring(lua_State *L)
|
||||
static int Lpat_gc(lua_State *L)
|
||||
{
|
||||
(void)L;
|
||||
// pat_t **at_pat = lua_touserdata(L, 1);
|
||||
// pat_t *pat = *at_pat;
|
||||
// if (pat) delete_pat(at_pat, true);
|
||||
pat_t **at_pat = lua_touserdata(L, 1);
|
||||
pat_t *pat = *at_pat;
|
||||
if (pat) delete_pat(at_pat, true);
|
||||
(void)pat;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
51
bp.c
51
bp.c
@ -207,10 +207,10 @@ static int is_text_file(const char *filename)
|
||||
//
|
||||
// Print matches in JSON format.
|
||||
//
|
||||
static int print_matches_as_json(file_t *f, pat_t *pattern)
|
||||
static int print_matches_as_json(file_t *f, pat_t *pattern, pat_t *defs)
|
||||
{
|
||||
int nmatches = 0;
|
||||
for (match_t *m = NULL; next_match(&m, f->start, f->end, pattern, options.skip, options.ignorecase); ) {
|
||||
for (match_t *m = NULL; next_match(&m, f->start, f->end, pattern, defs, options.skip, options.ignorecase); ) {
|
||||
if (++nmatches > 1)
|
||||
printf(",\n");
|
||||
printf("{\"filename\":\"%s\",\"match\":", f->filename);
|
||||
@ -223,10 +223,10 @@ static int print_matches_as_json(file_t *f, pat_t *pattern)
|
||||
//
|
||||
// Print matches in a visual explanation style
|
||||
//
|
||||
static int explain_matches(file_t *f, pat_t *pattern)
|
||||
static int explain_matches(file_t *f, pat_t *pattern, pat_t *defs)
|
||||
{
|
||||
int nmatches = 0;
|
||||
for (match_t *m = NULL; next_match(&m, f->start, f->end, pattern, options.skip, options.ignorecase); ) {
|
||||
for (match_t *m = NULL; next_match(&m, f->start, f->end, pattern, defs, options.skip, options.ignorecase); ) {
|
||||
if (++nmatches == 1) {
|
||||
if (options.print_filenames)
|
||||
fprint_filename(stdout, f->filename);
|
||||
@ -349,7 +349,7 @@ static void on_nl(FILE *out)
|
||||
//
|
||||
// Print all the matches in a file.
|
||||
//
|
||||
static int print_matches(FILE *out, file_t *f, pat_t *pattern)
|
||||
static int print_matches(FILE *out, file_t *f, pat_t *pattern, pat_t *defs)
|
||||
{
|
||||
static int printed_filenames = 0;
|
||||
int matches = 0;
|
||||
@ -364,7 +364,7 @@ static int print_matches(FILE *out, file_t *f, pat_t *pattern)
|
||||
print_opts.replace_color = "\033[0;34;1m";
|
||||
print_opts.normal_color = "\033[m";
|
||||
}
|
||||
for (match_t *m = NULL; next_match(&m, f->start, f->end, pattern, options.skip, options.ignorecase); ) {
|
||||
for (match_t *m = NULL; next_match(&m, f->start, f->end, pattern, defs, options.skip, options.ignorecase); ) {
|
||||
if (++matches == 1 && options.print_filenames) {
|
||||
if (printed_filenames++ > 0) printf("\n");
|
||||
fprint_filename(out, f->filename);
|
||||
@ -394,7 +394,7 @@ static int print_matches(FILE *out, file_t *f, pat_t *pattern)
|
||||
// against it, printing any results according to the flags.
|
||||
//
|
||||
__attribute__((nonnull))
|
||||
static int process_file(const char *filename, pat_t *pattern)
|
||||
static int process_file(const char *filename, pat_t *pattern, pat_t *defs)
|
||||
{
|
||||
file_t *f = load_file(NULL, filename);
|
||||
if (f == NULL) {
|
||||
@ -404,19 +404,19 @@ static int process_file(const char *filename, pat_t *pattern)
|
||||
|
||||
int matches = 0;
|
||||
if (options.mode == MODE_EXPLAIN) {
|
||||
matches += explain_matches(f, pattern);
|
||||
matches += explain_matches(f, pattern, defs);
|
||||
} else if (options.mode == MODE_LISTFILES) {
|
||||
match_t *m = NULL;
|
||||
if (next_match(&m, f->start, f->end, pattern, options.skip, options.ignorecase)) {
|
||||
if (next_match(&m, f->start, f->end, pattern, defs, options.skip, options.ignorecase)) {
|
||||
printf("%s\n", f->filename);
|
||||
matches += 1;
|
||||
}
|
||||
stop_matching(&m);
|
||||
} else if (options.mode == MODE_JSON) {
|
||||
matches += print_matches_as_json(f, pattern);
|
||||
matches += print_matches_as_json(f, pattern, defs);
|
||||
} else if (options.mode == MODE_INPLACE) {
|
||||
match_t *m = NULL;
|
||||
bool found = next_match(&m, f->start, f->end, pattern, options.skip, options.ignorecase);
|
||||
bool found = next_match(&m, f->start, f->end, pattern, defs, options.skip, options.ignorecase);
|
||||
stop_matching(&m);
|
||||
if (!found) return 0;
|
||||
|
||||
@ -432,12 +432,12 @@ static int process_file(const char *filename, pat_t *pattern)
|
||||
// are used to restore the original file contents.
|
||||
modifying_file = out; backup_file = f;
|
||||
{
|
||||
matches += print_matches(out, f, pattern);
|
||||
matches += print_matches(out, f, pattern, defs);
|
||||
}
|
||||
modifying_file = NULL; backup_file = NULL;
|
||||
fclose(out);
|
||||
} else {
|
||||
matches += print_matches(stdout, f, pattern);
|
||||
matches += print_matches(stdout, f, pattern, defs);
|
||||
}
|
||||
fflush(stdout);
|
||||
|
||||
@ -452,7 +452,7 @@ static int process_file(const char *filename, pat_t *pattern)
|
||||
// Recursively process all non-dotfile files in the given directory.
|
||||
//
|
||||
__attribute__((nonnull))
|
||||
static int process_dir(const char *dirname, pat_t *pattern)
|
||||
static int process_dir(const char *dirname, pat_t *pattern, pat_t *defs)
|
||||
{
|
||||
int matches = 0;
|
||||
glob_t globbuf;
|
||||
@ -469,9 +469,9 @@ static int process_dir(const char *dirname, pat_t *pattern)
|
||||
if (S_ISLNK(statbuf.st_mode))
|
||||
continue; // Skip symbolic links
|
||||
else if (S_ISDIR(statbuf.st_mode))
|
||||
matches += process_dir(globbuf.gl_pathv[i], pattern);
|
||||
matches += process_dir(globbuf.gl_pathv[i], pattern, defs);
|
||||
else if (is_text_file(globbuf.gl_pathv[i]))
|
||||
matches += process_file(globbuf.gl_pathv[i], pattern);
|
||||
matches += process_file(globbuf.gl_pathv[i], pattern, defs);
|
||||
}
|
||||
}
|
||||
globfree(&globbuf);
|
||||
@ -482,7 +482,7 @@ static int process_dir(const char *dirname, pat_t *pattern)
|
||||
// Process git files using `git ls-files ...`
|
||||
//
|
||||
__attribute__((nonnull(1)))
|
||||
static int process_git_files(pat_t *pattern, int argc, char *argv[])
|
||||
static int process_git_files(pat_t *pattern, pat_t *defs, int argc, char *argv[])
|
||||
{
|
||||
int fds[2];
|
||||
require(pipe(fds), "Failed to create pipe");
|
||||
@ -505,7 +505,7 @@ static int process_git_files(pat_t *pattern, int argc, char *argv[])
|
||||
size_t path_size = 0;
|
||||
int found = 0;
|
||||
while (getdelim(&path, &path_size, '\0', fp) > 0)
|
||||
found += process_file(path, pattern);
|
||||
found += process_file(path, pattern, defs);
|
||||
if (path) delete(&path);
|
||||
require(fclose(fp), "Failed to close read end of pipe");
|
||||
int status;
|
||||
@ -521,7 +521,7 @@ static int process_git_files(pat_t *pattern, int argc, char *argv[])
|
||||
//
|
||||
static pat_t *load_grammar(pat_t *defs, file_t *f)
|
||||
{
|
||||
return chain_together(assert_pat(f->start, f->end, bp_pattern(f->start, f->end)), defs);
|
||||
return chain_together(defs, assert_pat(f->start, f->end, bp_pattern(f->start, f->end)));
|
||||
}
|
||||
|
||||
//
|
||||
@ -636,9 +636,6 @@ int main(int argc, char *argv[])
|
||||
if (pattern == NULL)
|
||||
errx(EXIT_FAILURE, "No pattern provided.\n\n%s", usage);
|
||||
|
||||
// Hook up definitions:
|
||||
pattern = chain_together(defs, pattern);
|
||||
|
||||
for (argc = 0; argv[argc]; ++argc) ; // update argc
|
||||
|
||||
if (options.context_before == USE_DEFAULT_CONTEXT) options.context_before = 0;
|
||||
@ -660,7 +657,7 @@ int main(int argc, char *argv[])
|
||||
int found = 0;
|
||||
if (options.mode == MODE_JSON) printf("[");
|
||||
if (options.git_mode) { // Get the list of files from `git --ls-files ...`
|
||||
found = process_git_files(pattern, argc, argv);
|
||||
found = process_git_files(pattern, defs, argc, argv);
|
||||
} else if (argv[0]) {
|
||||
// Files pass in as command line args:
|
||||
struct stat statbuf;
|
||||
@ -668,17 +665,17 @@ int main(int argc, char *argv[])
|
||||
options.print_filenames = false;
|
||||
for ( ; argv[0]; argv++) {
|
||||
if (stat(argv[0], &statbuf) == 0 && S_ISDIR(statbuf.st_mode)) // Symlinks are okay if manually specified
|
||||
found += process_dir(argv[0], pattern);
|
||||
found += process_dir(argv[0], pattern, defs);
|
||||
else
|
||||
found += process_file(argv[0], pattern);
|
||||
found += process_file(argv[0], pattern, defs);
|
||||
}
|
||||
} else if (isatty(STDIN_FILENO)) {
|
||||
// No files, no piped in input, so use files in current dir, recursively
|
||||
found += process_dir(".", pattern);
|
||||
found += process_dir(".", pattern, defs);
|
||||
} else {
|
||||
// Piped in input:
|
||||
options.print_filenames = false; // Don't print filename on stdin
|
||||
found += process_file("", pattern);
|
||||
found += process_file("", pattern, defs);
|
||||
}
|
||||
if (options.mode == MODE_JSON) printf("]\n");
|
||||
|
||||
|
@ -14,9 +14,9 @@ void-element: `< ("area"/"base"/"br"/"col"/"embed"/"hr"/"img"/"input"/"link"/"me
|
||||
|
||||
template-element: "<template>" ..%(\n / comment / element) "</template>"
|
||||
|
||||
raw-element: `< @tag=("script"/"style"/"textarea"/"title") __attributes__ `> ..%\n "</"tag__`>
|
||||
raw-element: `< @tag=("script"/"style"/"textarea"/"title") __attributes__ `> ..%\n ("</"tag__`>)
|
||||
|
||||
normal-element: `< @tag=id __attributes__ `> ..%(\n / comment / element) "</"tag__`>
|
||||
normal-element: `< @tag=id __attributes__ `> ..%(\n / comment / element) ("</"tag__`>)
|
||||
|
||||
comment: "<!--" ..%\n "-->"
|
||||
|
||||
|
51
match.c
51
match.c
@ -192,16 +192,37 @@ void cache_destroy(match_ctx_t *ctx)
|
||||
}
|
||||
|
||||
//
|
||||
// Look up a pattern definition by name.
|
||||
// Look up a pattern definition by name from a definition pattern.
|
||||
//
|
||||
__attribute__((nonnull(2)))
|
||||
static pat_t *_lookup_def(pat_t *defs, const char *name, size_t namelen)
|
||||
{
|
||||
while (defs) {
|
||||
if (defs->type == BP_CHAIN) {
|
||||
pat_t *second = _lookup_def(defs->args.multiple.second, name, namelen);
|
||||
if (second) return second;
|
||||
defs = defs->args.multiple.first;
|
||||
} else if (defs->type == BP_DEFINITIONS) {
|
||||
if (namelen == defs->args.def.namelen && strncmp(defs->args.def.name, name, namelen) == 0)
|
||||
return defs->args.def.meaning;
|
||||
defs = defs->args.def.next_def;
|
||||
} else {
|
||||
errx(1, "Invalid pattern type in definitions");
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
//
|
||||
// Look up a pattern definition by name from a context.
|
||||
//
|
||||
__attribute__((nonnull))
|
||||
pat_t *lookup(match_ctx_t *ctx, const char *name, size_t namelen)
|
||||
pat_t *lookup_ctx(match_ctx_t *ctx, const char *name, size_t namelen)
|
||||
{
|
||||
for (pat_t *def = ctx->defs; def; def = def->args.def.next_def) {
|
||||
if (namelen == def->args.def.namelen && strncmp(def->args.def.name, name, namelen) == 0)
|
||||
return def->args.def.meaning;
|
||||
for (; ctx; ctx = ctx->parent_ctx) {
|
||||
pat_t *def = _lookup_def(ctx->defs, name, namelen);
|
||||
if (def) return def;
|
||||
}
|
||||
if (ctx->parent_ctx) return lookup(ctx->parent_ctx, name, namelen);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@ -213,7 +234,7 @@ __attribute__((nonnull(1)))
|
||||
static inline pat_t *deref(match_ctx_t *ctx, pat_t *pat)
|
||||
{
|
||||
if (pat && pat->type == BP_REF) {
|
||||
pat_t *def = lookup(ctx, pat->args.ref.name, pat->args.ref.len);
|
||||
pat_t *def = lookup_ctx(ctx, pat->args.ref.name, pat->args.ref.len);
|
||||
if (def) return def;
|
||||
}
|
||||
return pat;
|
||||
@ -266,17 +287,6 @@ static pat_t *get_prerequisite(match_ctx_t *ctx, pat_t *pat)
|
||||
__attribute__((nonnull(1,2,3)))
|
||||
static match_t *_next_match(match_ctx_t *ctx, const char *str, pat_t *pat, pat_t *skip)
|
||||
{
|
||||
if (pat->type == BP_DEFINITIONS || (pat->type == BP_CHAIN && pat->args.multiple.first->type == BP_DEFINITIONS)) {
|
||||
match_ctx_t ctx2 = *ctx;
|
||||
ctx2.cache = &(cache_t){0};
|
||||
ctx2.parent_ctx = ctx;
|
||||
ctx2.defs = pat->type == BP_DEFINITIONS ? pat : pat->args.multiple.first;
|
||||
pat_t *match_pat = pat->type == BP_DEFINITIONS ? pat->args.def.meaning : pat->args.multiple.second;
|
||||
match_t *m = _next_match(&ctx2, str, match_pat, skip);
|
||||
cache_destroy(&ctx2);
|
||||
return m;
|
||||
}
|
||||
|
||||
// Clear the cache so it's not full of old cache values from different parts of the file:
|
||||
cache_destroy(ctx);
|
||||
|
||||
@ -618,7 +628,7 @@ static match_t *match(match_ctx_t *ctx, const char *str, pat_t *pat)
|
||||
if (has_cached_failure(ctx, str, pat))
|
||||
return NULL;
|
||||
|
||||
pat_t *ref = lookup(ctx, pat->args.ref.name, pat->args.ref.len);
|
||||
pat_t *ref = lookup_ctx(ctx, pat->args.ref.name, pat->args.ref.len);
|
||||
if (ref == NULL)
|
||||
errx(EXIT_FAILURE, "Unknown identifier: '%.*s'", (int)pat->args.ref.len, pat->args.ref.name);
|
||||
|
||||
@ -787,7 +797,7 @@ size_t free_all_matches(void)
|
||||
// Iterate over matches.
|
||||
// Usage: for (match_t *m = NULL; next_match(&m, ...); ) {...}
|
||||
//
|
||||
bool next_match(match_t **m, const char *start, const char *end, pat_t *pat, pat_t *skip, bool ignorecase)
|
||||
bool next_match(match_t **m, const char *start, const char *end, pat_t *pat, pat_t *defs, pat_t *skip, bool ignorecase)
|
||||
{
|
||||
const char *pos;
|
||||
if (*m) {
|
||||
@ -805,6 +815,7 @@ bool next_match(match_t **m, const char *start, const char *end, pat_t *pat, pat
|
||||
.start = start,
|
||||
.end = end,
|
||||
.ignorecase = ignorecase,
|
||||
.defs = defs,
|
||||
};
|
||||
*m = (pos <= end) ? _next_match(&ctx, pos, pat, skip) : NULL;
|
||||
cache_destroy(&ctx);
|
||||
|
4
match.h
4
match.h
@ -28,8 +28,8 @@ __attribute__((nonnull))
|
||||
void recycle_match(match_t **at_m);
|
||||
size_t free_all_matches(void);
|
||||
size_t recycle_all_matches(void);
|
||||
bool next_match(match_t **m, const char *start, const char *end, pat_t *pat, pat_t *skip, bool ignorecase);
|
||||
#define stop_matching(m) next_match(m, NULL, NULL, NULL, NULL, 0)
|
||||
bool next_match(match_t **m, const char *start, const char *end, pat_t *pat, pat_t *defs, pat_t *skip, bool ignorecase);
|
||||
#define stop_matching(m) next_match(m, NULL, NULL, NULL, NULL, NULL, 0)
|
||||
__attribute__((nonnull))
|
||||
match_t *get_numbered_capture(match_t *m, int n);
|
||||
__attribute__((nonnull, pure))
|
||||
|
40
pattern.c
40
pattern.c
@ -169,11 +169,10 @@ pat_t *chain_together(pat_t *first, pat_t *second)
|
||||
if (second == NULL) return first;
|
||||
|
||||
if (first->type == BP_DEFINITIONS && second->type == BP_DEFINITIONS) {
|
||||
pat_t *second_end = second;
|
||||
while (second_end->args.def.next_def != NULL)
|
||||
second_end = second_end->args.def.next_def;
|
||||
second_end->args.def.next_def = first;
|
||||
return second;
|
||||
pat_t *chain = new_pat(BP_CHAIN, first->start, second->end, second->min_matchlen, second->max_matchlen);
|
||||
chain->args.multiple.first = first;
|
||||
chain->args.multiple.second = second;
|
||||
return chain;
|
||||
}
|
||||
|
||||
size_t minlen = first->min_matchlen + second->min_matchlen;
|
||||
@ -181,23 +180,6 @@ pat_t *chain_together(pat_t *first, pat_t *second)
|
||||
pat_t *chain = new_pat(BP_CHAIN, first->start, second->end, minlen, maxlen);
|
||||
chain->args.multiple.first = first;
|
||||
chain->args.multiple.second = second;
|
||||
|
||||
// If `first` is an UPTO operator (..) or contains one, then let it know
|
||||
// that `second` is what it's up *to*.
|
||||
for (pat_t *p = first; p; ) {
|
||||
if (p->type == BP_UPTO || p->type == BP_UPTO_STRICT) {
|
||||
p->args.multiple.first = second;
|
||||
p->min_matchlen = second->min_matchlen;
|
||||
p->max_matchlen = -1;
|
||||
break;
|
||||
} else if (p->type == BP_CAPTURE) {
|
||||
p = p->args.capture.capture_pat;
|
||||
} else if (p->type == BP_CHAIN) {
|
||||
p = p->args.multiple.second;
|
||||
} else if (p->type == BP_MATCH || p->type == BP_NOT_MATCH) {
|
||||
p = p->args.multiple.first;
|
||||
} else break;
|
||||
}
|
||||
return chain;
|
||||
}
|
||||
|
||||
@ -246,7 +228,7 @@ static pat_t *_bp_definition(const char *start, const char *end)
|
||||
// Compile a string of BP code into a BP pattern object.
|
||||
//
|
||||
__attribute__((nonnull))
|
||||
static pat_t *_bp_simplepattern(const char *str, const char *end)
|
||||
static pat_t *_bp_simplepattern(const char *str, const char *end, bool inside_stringpattern)
|
||||
{
|
||||
str = after_spaces(str, false, end);
|
||||
if (!*str) return NULL;
|
||||
@ -272,6 +254,12 @@ static pat_t *_bp_simplepattern(const char *str, const char *end)
|
||||
}
|
||||
pat_t *upto = new_pat(type, start, extra_arg ? extra_arg->end : str, 0, -1);
|
||||
upto->args.multiple.second = extra_arg;
|
||||
if (inside_stringpattern) {
|
||||
maybe_pat_t target = bp_stringpattern(upto->end, end);
|
||||
upto->args.multiple.first = target.success ? target.value.pat : NULL;
|
||||
} else {
|
||||
upto->args.multiple.first = bp_simplepattern(upto->end, end);
|
||||
}
|
||||
return upto;
|
||||
} else {
|
||||
return new_pat(BP_ANYCHAR, start, str, 1, UTF8_MAXCHARLEN);
|
||||
@ -536,9 +524,9 @@ maybe_pat_t bp_stringpattern(const char *str, const char *end)
|
||||
for (; str < end; str = next_char(str, end)) {
|
||||
if (*str == '\\' && str+1 < end) {
|
||||
if (str[1] == '\\' || isalnum(str[1]))
|
||||
interp = bp_simplepattern(str, end);
|
||||
interp = _bp_simplepattern(str, end, true);
|
||||
else
|
||||
interp = bp_simplepattern(str+1, end);
|
||||
interp = _bp_simplepattern(str+1, end, true);
|
||||
if (interp) break;
|
||||
// If there is no interpolated value, this is just a plain ol' regular backslash
|
||||
}
|
||||
@ -567,7 +555,7 @@ maybe_pat_t bp_stringpattern(const char *str, const char *end)
|
||||
//
|
||||
static pat_t *bp_simplepattern(const char *str, const char *end)
|
||||
{
|
||||
pat_t *pat = _bp_simplepattern(str, end);
|
||||
pat_t *pat = _bp_simplepattern(str, end, false);
|
||||
if (pat == NULL) return pat;
|
||||
str = pat->end;
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user