Simplified things by passing a def argument to next_match instead of

chaining defs together. Also simplified `..` by just using a lookahead
instead of retconning it. Immutability invariants are now enforced
better.
This commit is contained in:
Bruce Hill 2022-04-30 14:19:08 -04:00
parent e5c0d09893
commit 24ed834317
7 changed files with 81 additions and 87 deletions

View File

@ -134,13 +134,11 @@ static int Lmatch(lua_State *L)
match_t *m = NULL;
int ret = 0;
pat_t *def_pat = chain_together(builtins, pat);
if (next_match(&m, text+index-1, &text[textlen], def_pat, NULL, false)) {
if (next_match(&m, text+index-1, &text[textlen], pat, builtins, NULL, false)) {
push_match(L, m, text);
stop_matching(&m);
ret = 1;
}
delete_pat(&def_pat, false);
return ret;
}
@ -173,8 +171,8 @@ static int Lreplace(lua_State *L)
FILE *out = open_memstream(&buf, &size);
int replacements = 0;
const char *prev = text;
pat_t *rep_pat = chain_together(builtins, maybe_replacement.value.pat);
for (match_t *m = NULL; next_match(&m, text, &text[textlen], rep_pat, NULL, false); ) {
pat_t *rep_pat = maybe_replacement.value.pat;
for (match_t *m = NULL; next_match(&m, text, &text[textlen], rep_pat, builtins, NULL, false); ) {
fwrite(prev, sizeof(char), (size_t)(m->start - prev), out);
fprint_match(out, text, m, NULL);
prev = m->end;
@ -186,7 +184,7 @@ static int Lreplace(lua_State *L)
lua_pushinteger(L, replacements);
fclose(out);
delete_pat(&maybe_replacement.value.pat, false);
delete_pat(&rep_pat, false);
return 2;
}
@ -247,9 +245,10 @@ static int Lpat_tostring(lua_State *L)
static int Lpat_gc(lua_State *L)
{
(void)L;
// pat_t **at_pat = lua_touserdata(L, 1);
// pat_t *pat = *at_pat;
// if (pat) delete_pat(at_pat, true);
pat_t **at_pat = lua_touserdata(L, 1);
pat_t *pat = *at_pat;
if (pat) delete_pat(at_pat, true);
(void)pat;
return 0;
}

51
bp.c
View File

@ -207,10 +207,10 @@ static int is_text_file(const char *filename)
//
// Print matches in JSON format.
//
static int print_matches_as_json(file_t *f, pat_t *pattern)
static int print_matches_as_json(file_t *f, pat_t *pattern, pat_t *defs)
{
int nmatches = 0;
for (match_t *m = NULL; next_match(&m, f->start, f->end, pattern, options.skip, options.ignorecase); ) {
for (match_t *m = NULL; next_match(&m, f->start, f->end, pattern, defs, options.skip, options.ignorecase); ) {
if (++nmatches > 1)
printf(",\n");
printf("{\"filename\":\"%s\",\"match\":", f->filename);
@ -223,10 +223,10 @@ static int print_matches_as_json(file_t *f, pat_t *pattern)
//
// Print matches in a visual explanation style
//
static int explain_matches(file_t *f, pat_t *pattern)
static int explain_matches(file_t *f, pat_t *pattern, pat_t *defs)
{
int nmatches = 0;
for (match_t *m = NULL; next_match(&m, f->start, f->end, pattern, options.skip, options.ignorecase); ) {
for (match_t *m = NULL; next_match(&m, f->start, f->end, pattern, defs, options.skip, options.ignorecase); ) {
if (++nmatches == 1) {
if (options.print_filenames)
fprint_filename(stdout, f->filename);
@ -349,7 +349,7 @@ static void on_nl(FILE *out)
//
// Print all the matches in a file.
//
static int print_matches(FILE *out, file_t *f, pat_t *pattern)
static int print_matches(FILE *out, file_t *f, pat_t *pattern, pat_t *defs)
{
static int printed_filenames = 0;
int matches = 0;
@ -364,7 +364,7 @@ static int print_matches(FILE *out, file_t *f, pat_t *pattern)
print_opts.replace_color = "\033[0;34;1m";
print_opts.normal_color = "\033[m";
}
for (match_t *m = NULL; next_match(&m, f->start, f->end, pattern, options.skip, options.ignorecase); ) {
for (match_t *m = NULL; next_match(&m, f->start, f->end, pattern, defs, options.skip, options.ignorecase); ) {
if (++matches == 1 && options.print_filenames) {
if (printed_filenames++ > 0) printf("\n");
fprint_filename(out, f->filename);
@ -394,7 +394,7 @@ static int print_matches(FILE *out, file_t *f, pat_t *pattern)
// against it, printing any results according to the flags.
//
__attribute__((nonnull))
static int process_file(const char *filename, pat_t *pattern)
static int process_file(const char *filename, pat_t *pattern, pat_t *defs)
{
file_t *f = load_file(NULL, filename);
if (f == NULL) {
@ -404,19 +404,19 @@ static int process_file(const char *filename, pat_t *pattern)
int matches = 0;
if (options.mode == MODE_EXPLAIN) {
matches += explain_matches(f, pattern);
matches += explain_matches(f, pattern, defs);
} else if (options.mode == MODE_LISTFILES) {
match_t *m = NULL;
if (next_match(&m, f->start, f->end, pattern, options.skip, options.ignorecase)) {
if (next_match(&m, f->start, f->end, pattern, defs, options.skip, options.ignorecase)) {
printf("%s\n", f->filename);
matches += 1;
}
stop_matching(&m);
} else if (options.mode == MODE_JSON) {
matches += print_matches_as_json(f, pattern);
matches += print_matches_as_json(f, pattern, defs);
} else if (options.mode == MODE_INPLACE) {
match_t *m = NULL;
bool found = next_match(&m, f->start, f->end, pattern, options.skip, options.ignorecase);
bool found = next_match(&m, f->start, f->end, pattern, defs, options.skip, options.ignorecase);
stop_matching(&m);
if (!found) return 0;
@ -432,12 +432,12 @@ static int process_file(const char *filename, pat_t *pattern)
// are used to restore the original file contents.
modifying_file = out; backup_file = f;
{
matches += print_matches(out, f, pattern);
matches += print_matches(out, f, pattern, defs);
}
modifying_file = NULL; backup_file = NULL;
fclose(out);
} else {
matches += print_matches(stdout, f, pattern);
matches += print_matches(stdout, f, pattern, defs);
}
fflush(stdout);
@ -452,7 +452,7 @@ static int process_file(const char *filename, pat_t *pattern)
// Recursively process all non-dotfile files in the given directory.
//
__attribute__((nonnull))
static int process_dir(const char *dirname, pat_t *pattern)
static int process_dir(const char *dirname, pat_t *pattern, pat_t *defs)
{
int matches = 0;
glob_t globbuf;
@ -469,9 +469,9 @@ static int process_dir(const char *dirname, pat_t *pattern)
if (S_ISLNK(statbuf.st_mode))
continue; // Skip symbolic links
else if (S_ISDIR(statbuf.st_mode))
matches += process_dir(globbuf.gl_pathv[i], pattern);
matches += process_dir(globbuf.gl_pathv[i], pattern, defs);
else if (is_text_file(globbuf.gl_pathv[i]))
matches += process_file(globbuf.gl_pathv[i], pattern);
matches += process_file(globbuf.gl_pathv[i], pattern, defs);
}
}
globfree(&globbuf);
@ -482,7 +482,7 @@ static int process_dir(const char *dirname, pat_t *pattern)
// Process git files using `git ls-files ...`
//
__attribute__((nonnull(1)))
static int process_git_files(pat_t *pattern, int argc, char *argv[])
static int process_git_files(pat_t *pattern, pat_t *defs, int argc, char *argv[])
{
int fds[2];
require(pipe(fds), "Failed to create pipe");
@ -505,7 +505,7 @@ static int process_git_files(pat_t *pattern, int argc, char *argv[])
size_t path_size = 0;
int found = 0;
while (getdelim(&path, &path_size, '\0', fp) > 0)
found += process_file(path, pattern);
found += process_file(path, pattern, defs);
if (path) delete(&path);
require(fclose(fp), "Failed to close read end of pipe");
int status;
@ -521,7 +521,7 @@ static int process_git_files(pat_t *pattern, int argc, char *argv[])
//
static pat_t *load_grammar(pat_t *defs, file_t *f)
{
return chain_together(assert_pat(f->start, f->end, bp_pattern(f->start, f->end)), defs);
return chain_together(defs, assert_pat(f->start, f->end, bp_pattern(f->start, f->end)));
}
//
@ -636,9 +636,6 @@ int main(int argc, char *argv[])
if (pattern == NULL)
errx(EXIT_FAILURE, "No pattern provided.\n\n%s", usage);
// Hook up definitions:
pattern = chain_together(defs, pattern);
for (argc = 0; argv[argc]; ++argc) ; // update argc
if (options.context_before == USE_DEFAULT_CONTEXT) options.context_before = 0;
@ -660,7 +657,7 @@ int main(int argc, char *argv[])
int found = 0;
if (options.mode == MODE_JSON) printf("[");
if (options.git_mode) { // Get the list of files from `git --ls-files ...`
found = process_git_files(pattern, argc, argv);
found = process_git_files(pattern, defs, argc, argv);
} else if (argv[0]) {
// Files pass in as command line args:
struct stat statbuf;
@ -668,17 +665,17 @@ int main(int argc, char *argv[])
options.print_filenames = false;
for ( ; argv[0]; argv++) {
if (stat(argv[0], &statbuf) == 0 && S_ISDIR(statbuf.st_mode)) // Symlinks are okay if manually specified
found += process_dir(argv[0], pattern);
found += process_dir(argv[0], pattern, defs);
else
found += process_file(argv[0], pattern);
found += process_file(argv[0], pattern, defs);
}
} else if (isatty(STDIN_FILENO)) {
// No files, no piped in input, so use files in current dir, recursively
found += process_dir(".", pattern);
found += process_dir(".", pattern, defs);
} else {
// Piped in input:
options.print_filenames = false; // Don't print filename on stdin
found += process_file("", pattern);
found += process_file("", pattern, defs);
}
if (options.mode == MODE_JSON) printf("]\n");

View File

@ -14,9 +14,9 @@ void-element: `< ("area"/"base"/"br"/"col"/"embed"/"hr"/"img"/"input"/"link"/"me
template-element: "<template>" ..%(\n / comment / element) "</template>"
raw-element: `< @tag=("script"/"style"/"textarea"/"title") __attributes__ `> ..%\n "</"tag__`>
raw-element: `< @tag=("script"/"style"/"textarea"/"title") __attributes__ `> ..%\n ("</"tag__`>)
normal-element: `< @tag=id __attributes__ `> ..%(\n / comment / element) "</"tag__`>
normal-element: `< @tag=id __attributes__ `> ..%(\n / comment / element) ("</"tag__`>)
comment: "<!--" ..%\n "-->"

51
match.c
View File

@ -192,16 +192,37 @@ void cache_destroy(match_ctx_t *ctx)
}
//
// Look up a pattern definition by name.
// Look up a pattern definition by name from a definition pattern.
//
__attribute__((nonnull(2)))
static pat_t *_lookup_def(pat_t *defs, const char *name, size_t namelen)
{
while (defs) {
if (defs->type == BP_CHAIN) {
pat_t *second = _lookup_def(defs->args.multiple.second, name, namelen);
if (second) return second;
defs = defs->args.multiple.first;
} else if (defs->type == BP_DEFINITIONS) {
if (namelen == defs->args.def.namelen && strncmp(defs->args.def.name, name, namelen) == 0)
return defs->args.def.meaning;
defs = defs->args.def.next_def;
} else {
errx(1, "Invalid pattern type in definitions");
}
}
return NULL;
}
//
// Look up a pattern definition by name from a context.
//
__attribute__((nonnull))
pat_t *lookup(match_ctx_t *ctx, const char *name, size_t namelen)
pat_t *lookup_ctx(match_ctx_t *ctx, const char *name, size_t namelen)
{
for (pat_t *def = ctx->defs; def; def = def->args.def.next_def) {
if (namelen == def->args.def.namelen && strncmp(def->args.def.name, name, namelen) == 0)
return def->args.def.meaning;
for (; ctx; ctx = ctx->parent_ctx) {
pat_t *def = _lookup_def(ctx->defs, name, namelen);
if (def) return def;
}
if (ctx->parent_ctx) return lookup(ctx->parent_ctx, name, namelen);
return NULL;
}
@ -213,7 +234,7 @@ __attribute__((nonnull(1)))
static inline pat_t *deref(match_ctx_t *ctx, pat_t *pat)
{
if (pat && pat->type == BP_REF) {
pat_t *def = lookup(ctx, pat->args.ref.name, pat->args.ref.len);
pat_t *def = lookup_ctx(ctx, pat->args.ref.name, pat->args.ref.len);
if (def) return def;
}
return pat;
@ -266,17 +287,6 @@ static pat_t *get_prerequisite(match_ctx_t *ctx, pat_t *pat)
__attribute__((nonnull(1,2,3)))
static match_t *_next_match(match_ctx_t *ctx, const char *str, pat_t *pat, pat_t *skip)
{
if (pat->type == BP_DEFINITIONS || (pat->type == BP_CHAIN && pat->args.multiple.first->type == BP_DEFINITIONS)) {
match_ctx_t ctx2 = *ctx;
ctx2.cache = &(cache_t){0};
ctx2.parent_ctx = ctx;
ctx2.defs = pat->type == BP_DEFINITIONS ? pat : pat->args.multiple.first;
pat_t *match_pat = pat->type == BP_DEFINITIONS ? pat->args.def.meaning : pat->args.multiple.second;
match_t *m = _next_match(&ctx2, str, match_pat, skip);
cache_destroy(&ctx2);
return m;
}
// Clear the cache so it's not full of old cache values from different parts of the file:
cache_destroy(ctx);
@ -618,7 +628,7 @@ static match_t *match(match_ctx_t *ctx, const char *str, pat_t *pat)
if (has_cached_failure(ctx, str, pat))
return NULL;
pat_t *ref = lookup(ctx, pat->args.ref.name, pat->args.ref.len);
pat_t *ref = lookup_ctx(ctx, pat->args.ref.name, pat->args.ref.len);
if (ref == NULL)
errx(EXIT_FAILURE, "Unknown identifier: '%.*s'", (int)pat->args.ref.len, pat->args.ref.name);
@ -787,7 +797,7 @@ size_t free_all_matches(void)
// Iterate over matches.
// Usage: for (match_t *m = NULL; next_match(&m, ...); ) {...}
//
bool next_match(match_t **m, const char *start, const char *end, pat_t *pat, pat_t *skip, bool ignorecase)
bool next_match(match_t **m, const char *start, const char *end, pat_t *pat, pat_t *defs, pat_t *skip, bool ignorecase)
{
const char *pos;
if (*m) {
@ -805,6 +815,7 @@ bool next_match(match_t **m, const char *start, const char *end, pat_t *pat, pat
.start = start,
.end = end,
.ignorecase = ignorecase,
.defs = defs,
};
*m = (pos <= end) ? _next_match(&ctx, pos, pat, skip) : NULL;
cache_destroy(&ctx);

View File

@ -28,8 +28,8 @@ __attribute__((nonnull))
void recycle_match(match_t **at_m);
size_t free_all_matches(void);
size_t recycle_all_matches(void);
bool next_match(match_t **m, const char *start, const char *end, pat_t *pat, pat_t *skip, bool ignorecase);
#define stop_matching(m) next_match(m, NULL, NULL, NULL, NULL, 0)
bool next_match(match_t **m, const char *start, const char *end, pat_t *pat, pat_t *defs, pat_t *skip, bool ignorecase);
#define stop_matching(m) next_match(m, NULL, NULL, NULL, NULL, NULL, 0)
__attribute__((nonnull))
match_t *get_numbered_capture(match_t *m, int n);
__attribute__((nonnull, pure))

View File

@ -169,11 +169,10 @@ pat_t *chain_together(pat_t *first, pat_t *second)
if (second == NULL) return first;
if (first->type == BP_DEFINITIONS && second->type == BP_DEFINITIONS) {
pat_t *second_end = second;
while (second_end->args.def.next_def != NULL)
second_end = second_end->args.def.next_def;
second_end->args.def.next_def = first;
return second;
pat_t *chain = new_pat(BP_CHAIN, first->start, second->end, second->min_matchlen, second->max_matchlen);
chain->args.multiple.first = first;
chain->args.multiple.second = second;
return chain;
}
size_t minlen = first->min_matchlen + second->min_matchlen;
@ -181,23 +180,6 @@ pat_t *chain_together(pat_t *first, pat_t *second)
pat_t *chain = new_pat(BP_CHAIN, first->start, second->end, minlen, maxlen);
chain->args.multiple.first = first;
chain->args.multiple.second = second;
// If `first` is an UPTO operator (..) or contains one, then let it know
// that `second` is what it's up *to*.
for (pat_t *p = first; p; ) {
if (p->type == BP_UPTO || p->type == BP_UPTO_STRICT) {
p->args.multiple.first = second;
p->min_matchlen = second->min_matchlen;
p->max_matchlen = -1;
break;
} else if (p->type == BP_CAPTURE) {
p = p->args.capture.capture_pat;
} else if (p->type == BP_CHAIN) {
p = p->args.multiple.second;
} else if (p->type == BP_MATCH || p->type == BP_NOT_MATCH) {
p = p->args.multiple.first;
} else break;
}
return chain;
}
@ -246,7 +228,7 @@ static pat_t *_bp_definition(const char *start, const char *end)
// Compile a string of BP code into a BP pattern object.
//
__attribute__((nonnull))
static pat_t *_bp_simplepattern(const char *str, const char *end)
static pat_t *_bp_simplepattern(const char *str, const char *end, bool inside_stringpattern)
{
str = after_spaces(str, false, end);
if (!*str) return NULL;
@ -272,6 +254,12 @@ static pat_t *_bp_simplepattern(const char *str, const char *end)
}
pat_t *upto = new_pat(type, start, extra_arg ? extra_arg->end : str, 0, -1);
upto->args.multiple.second = extra_arg;
if (inside_stringpattern) {
maybe_pat_t target = bp_stringpattern(upto->end, end);
upto->args.multiple.first = target.success ? target.value.pat : NULL;
} else {
upto->args.multiple.first = bp_simplepattern(upto->end, end);
}
return upto;
} else {
return new_pat(BP_ANYCHAR, start, str, 1, UTF8_MAXCHARLEN);
@ -536,9 +524,9 @@ maybe_pat_t bp_stringpattern(const char *str, const char *end)
for (; str < end; str = next_char(str, end)) {
if (*str == '\\' && str+1 < end) {
if (str[1] == '\\' || isalnum(str[1]))
interp = bp_simplepattern(str, end);
interp = _bp_simplepattern(str, end, true);
else
interp = bp_simplepattern(str+1, end);
interp = _bp_simplepattern(str+1, end, true);
if (interp) break;
// If there is no interpolated value, this is just a plain ol' regular backslash
}
@ -567,7 +555,7 @@ maybe_pat_t bp_stringpattern(const char *str, const char *end)
//
static pat_t *bp_simplepattern(const char *str, const char *end)
{
pat_t *pat = _bp_simplepattern(str, end);
pat_t *pat = _bp_simplepattern(str, end, false);
if (pat == NULL) return pat;
str = pat->end;

View File

@ -120,6 +120,5 @@ void free_all_pats(void);
__attribute__((nonnull))
void delete_pat(pat_t **at_pat, bool recursive);
#endif
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0