aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBruce Hill <bruce@bruce-hill.com>2022-04-30 14:19:08 -0400
committerBruce Hill <bruce@bruce-hill.com>2022-04-30 14:19:08 -0400
commit24ed834317f3fda8f0f55489f54a2df1aca5de17 (patch)
tree823b9f7c7c1c28e1d0b42535955d211b1073bec2
parente5c0d09893401b82855872d4f150a0acf56d76d4 (diff)
Simplified things by passing a def argument to next_match instead of
chaining defs together. Also simplified `..` by just using a lookahead instead of retconning it. Immutability invariants are now enforced better.
-rw-r--r--Lua/lbp.c17
-rw-r--r--bp.c51
-rw-r--r--grammars/html.bp4
-rw-r--r--match.c51
-rw-r--r--match.h4
-rw-r--r--pattern.c40
-rw-r--r--pattern.h1
7 files changed, 81 insertions, 87 deletions
diff --git a/Lua/lbp.c b/Lua/lbp.c
index 89cb12a..a8b573e 100644
--- a/Lua/lbp.c
+++ b/Lua/lbp.c
@@ -134,13 +134,11 @@ static int Lmatch(lua_State *L)
match_t *m = NULL;
int ret = 0;
- pat_t *def_pat = chain_together(builtins, pat);
- if (next_match(&m, text+index-1, &text[textlen], def_pat, NULL, false)) {
+ if (next_match(&m, text+index-1, &text[textlen], pat, builtins, NULL, false)) {
push_match(L, m, text);
stop_matching(&m);
ret = 1;
}
- delete_pat(&def_pat, false);
return ret;
}
@@ -173,8 +171,8 @@ static int Lreplace(lua_State *L)
FILE *out = open_memstream(&buf, &size);
int replacements = 0;
const char *prev = text;
- pat_t *rep_pat = chain_together(builtins, maybe_replacement.value.pat);
- for (match_t *m = NULL; next_match(&m, text, &text[textlen], rep_pat, NULL, false); ) {
+ pat_t *rep_pat = maybe_replacement.value.pat;
+ for (match_t *m = NULL; next_match(&m, text, &text[textlen], rep_pat, builtins, NULL, false); ) {
fwrite(prev, sizeof(char), (size_t)(m->start - prev), out);
fprint_match(out, text, m, NULL);
prev = m->end;
@@ -186,7 +184,7 @@ static int Lreplace(lua_State *L)
lua_pushinteger(L, replacements);
fclose(out);
- delete_pat(&maybe_replacement.value.pat, false);
+ delete_pat(&rep_pat, false);
return 2;
}
@@ -247,9 +245,10 @@ static int Lpat_tostring(lua_State *L)
static int Lpat_gc(lua_State *L)
{
(void)L;
- // pat_t **at_pat = lua_touserdata(L, 1);
- // pat_t *pat = *at_pat;
- // if (pat) delete_pat(at_pat, true);
+ pat_t **at_pat = lua_touserdata(L, 1);
+ pat_t *pat = *at_pat;
+ if (pat) delete_pat(at_pat, true);
+ (void)pat;
return 0;
}
diff --git a/bp.c b/bp.c
index dbd72bd..b863e24 100644
--- a/bp.c
+++ b/bp.c
@@ -207,10 +207,10 @@ static int is_text_file(const char *filename)
//
// Print matches in JSON format.
//
-static int print_matches_as_json(file_t *f, pat_t *pattern)
+static int print_matches_as_json(file_t *f, pat_t *pattern, pat_t *defs)
{
int nmatches = 0;
- for (match_t *m = NULL; next_match(&m, f->start, f->end, pattern, options.skip, options.ignorecase); ) {
+ for (match_t *m = NULL; next_match(&m, f->start, f->end, pattern, defs, options.skip, options.ignorecase); ) {
if (++nmatches > 1)
printf(",\n");
printf("{\"filename\":\"%s\",\"match\":", f->filename);
@@ -223,10 +223,10 @@ static int print_matches_as_json(file_t *f, pat_t *pattern)
//
// Print matches in a visual explanation style
//
-static int explain_matches(file_t *f, pat_t *pattern)
+static int explain_matches(file_t *f, pat_t *pattern, pat_t *defs)
{
int nmatches = 0;
- for (match_t *m = NULL; next_match(&m, f->start, f->end, pattern, options.skip, options.ignorecase); ) {
+ for (match_t *m = NULL; next_match(&m, f->start, f->end, pattern, defs, options.skip, options.ignorecase); ) {
if (++nmatches == 1) {
if (options.print_filenames)
fprint_filename(stdout, f->filename);
@@ -349,7 +349,7 @@ static void on_nl(FILE *out)
//
// Print all the matches in a file.
//
-static int print_matches(FILE *out, file_t *f, pat_t *pattern)
+static int print_matches(FILE *out, file_t *f, pat_t *pattern, pat_t *defs)
{
static int printed_filenames = 0;
int matches = 0;
@@ -364,7 +364,7 @@ static int print_matches(FILE *out, file_t *f, pat_t *pattern)
print_opts.replace_color = "\033[0;34;1m";
print_opts.normal_color = "\033[m";
}
- for (match_t *m = NULL; next_match(&m, f->start, f->end, pattern, options.skip, options.ignorecase); ) {
+ for (match_t *m = NULL; next_match(&m, f->start, f->end, pattern, defs, options.skip, options.ignorecase); ) {
if (++matches == 1 && options.print_filenames) {
if (printed_filenames++ > 0) printf("\n");
fprint_filename(out, f->filename);
@@ -394,7 +394,7 @@ static int print_matches(FILE *out, file_t *f, pat_t *pattern)
// against it, printing any results according to the flags.
//
__attribute__((nonnull))
-static int process_file(const char *filename, pat_t *pattern)
+static int process_file(const char *filename, pat_t *pattern, pat_t *defs)
{
file_t *f = load_file(NULL, filename);
if (f == NULL) {
@@ -404,19 +404,19 @@ static int process_file(const char *filename, pat_t *pattern)
int matches = 0;
if (options.mode == MODE_EXPLAIN) {
- matches += explain_matches(f, pattern);
+ matches += explain_matches(f, pattern, defs);
} else if (options.mode == MODE_LISTFILES) {
match_t *m = NULL;
- if (next_match(&m, f->start, f->end, pattern, options.skip, options.ignorecase)) {
+ if (next_match(&m, f->start, f->end, pattern, defs, options.skip, options.ignorecase)) {
printf("%s\n", f->filename);
matches += 1;
}
stop_matching(&m);
} else if (options.mode == MODE_JSON) {
- matches += print_matches_as_json(f, pattern);
+ matches += print_matches_as_json(f, pattern, defs);
} else if (options.mode == MODE_INPLACE) {
match_t *m = NULL;
- bool found = next_match(&m, f->start, f->end, pattern, options.skip, options.ignorecase);
+ bool found = next_match(&m, f->start, f->end, pattern, defs, options.skip, options.ignorecase);
stop_matching(&m);
if (!found) return 0;
@@ -432,12 +432,12 @@ static int process_file(const char *filename, pat_t *pattern)
// are used to restore the original file contents.
modifying_file = out; backup_file = f;
{
- matches += print_matches(out, f, pattern);
+ matches += print_matches(out, f, pattern, defs);
}
modifying_file = NULL; backup_file = NULL;
fclose(out);
} else {
- matches += print_matches(stdout, f, pattern);
+ matches += print_matches(stdout, f, pattern, defs);
}
fflush(stdout);
@@ -452,7 +452,7 @@ static int process_file(const char *filename, pat_t *pattern)
// Recursively process all non-dotfile files in the given directory.
//
__attribute__((nonnull))
-static int process_dir(const char *dirname, pat_t *pattern)
+static int process_dir(const char *dirname, pat_t *pattern, pat_t *defs)
{
int matches = 0;
glob_t globbuf;
@@ -469,9 +469,9 @@ static int process_dir(const char *dirname, pat_t *pattern)
if (S_ISLNK(statbuf.st_mode))
continue; // Skip symbolic links
else if (S_ISDIR(statbuf.st_mode))
- matches += process_dir(globbuf.gl_pathv[i], pattern);
+ matches += process_dir(globbuf.gl_pathv[i], pattern, defs);
else if (is_text_file(globbuf.gl_pathv[i]))
- matches += process_file(globbuf.gl_pathv[i], pattern);
+ matches += process_file(globbuf.gl_pathv[i], pattern, defs);
}
}
globfree(&globbuf);
@@ -482,7 +482,7 @@ static int process_dir(const char *dirname, pat_t *pattern)
// Process git files using `git ls-files ...`
//
__attribute__((nonnull(1)))
-static int process_git_files(pat_t *pattern, int argc, char *argv[])
+static int process_git_files(pat_t *pattern, pat_t *defs, int argc, char *argv[])
{
int fds[2];
require(pipe(fds), "Failed to create pipe");
@@ -505,7 +505,7 @@ static int process_git_files(pat_t *pattern, int argc, char *argv[])
size_t path_size = 0;
int found = 0;
while (getdelim(&path, &path_size, '\0', fp) > 0)
- found += process_file(path, pattern);
+ found += process_file(path, pattern, defs);
if (path) delete(&path);
require(fclose(fp), "Failed to close read end of pipe");
int status;
@@ -521,7 +521,7 @@ static int process_git_files(pat_t *pattern, int argc, char *argv[])
//
static pat_t *load_grammar(pat_t *defs, file_t *f)
{
- return chain_together(assert_pat(f->start, f->end, bp_pattern(f->start, f->end)), defs);
+ return chain_together(defs, assert_pat(f->start, f->end, bp_pattern(f->start, f->end)));
}
//
@@ -636,9 +636,6 @@ int main(int argc, char *argv[])
if (pattern == NULL)
errx(EXIT_FAILURE, "No pattern provided.\n\n%s", usage);
- // Hook up definitions:
- pattern = chain_together(defs, pattern);
-
for (argc = 0; argv[argc]; ++argc) ; // update argc
if (options.context_before == USE_DEFAULT_CONTEXT) options.context_before = 0;
@@ -660,7 +657,7 @@ int main(int argc, char *argv[])
int found = 0;
if (options.mode == MODE_JSON) printf("[");
if (options.git_mode) { // Get the list of files from `git --ls-files ...`
- found = process_git_files(pattern, argc, argv);
+ found = process_git_files(pattern, defs, argc, argv);
} else if (argv[0]) {
// Files pass in as command line args:
struct stat statbuf;
@@ -668,17 +665,17 @@ int main(int argc, char *argv[])
options.print_filenames = false;
for ( ; argv[0]; argv++) {
if (stat(argv[0], &statbuf) == 0 && S_ISDIR(statbuf.st_mode)) // Symlinks are okay if manually specified
- found += process_dir(argv[0], pattern);
+ found += process_dir(argv[0], pattern, defs);
else
- found += process_file(argv[0], pattern);
+ found += process_file(argv[0], pattern, defs);
}
} else if (isatty(STDIN_FILENO)) {
// No files, no piped in input, so use files in current dir, recursively
- found += process_dir(".", pattern);
+ found += process_dir(".", pattern, defs);
} else {
// Piped in input:
options.print_filenames = false; // Don't print filename on stdin
- found += process_file("", pattern);
+ found += process_file("", pattern, defs);
}
if (options.mode == MODE_JSON) printf("]\n");
diff --git a/grammars/html.bp b/grammars/html.bp
index 29e4566..1ed83df 100644
--- a/grammars/html.bp
+++ b/grammars/html.bp
@@ -14,9 +14,9 @@ void-element: `< ("area"/"base"/"br"/"col"/"embed"/"hr"/"img"/"input"/"link"/"me
template-element: "<template>" ..%(\n / comment / element) "</template>"
-raw-element: `< @tag=("script"/"style"/"textarea"/"title") __attributes__ `> ..%\n "</"tag__`>
+raw-element: `< @tag=("script"/"style"/"textarea"/"title") __attributes__ `> ..%\n ("</"tag__`>)
-normal-element: `< @tag=id __attributes__ `> ..%(\n / comment / element) "</"tag__`>
+normal-element: `< @tag=id __attributes__ `> ..%(\n / comment / element) ("</"tag__`>)
comment: "<!--" ..%\n "-->"
diff --git a/match.c b/match.c
index 80296e9..fcea58b 100644
--- a/match.c
+++ b/match.c
@@ -192,16 +192,37 @@ void cache_destroy(match_ctx_t *ctx)
}
//
-// Look up a pattern definition by name.
+// Look up a pattern definition by name from a definition pattern.
+//
+__attribute__((nonnull(2)))
+static pat_t *_lookup_def(pat_t *defs, const char *name, size_t namelen)
+{
+ while (defs) {
+ if (defs->type == BP_CHAIN) {
+ pat_t *second = _lookup_def(defs->args.multiple.second, name, namelen);
+ if (second) return second;
+ defs = defs->args.multiple.first;
+ } else if (defs->type == BP_DEFINITIONS) {
+ if (namelen == defs->args.def.namelen && strncmp(defs->args.def.name, name, namelen) == 0)
+ return defs->args.def.meaning;
+ defs = defs->args.def.next_def;
+ } else {
+ errx(1, "Invalid pattern type in definitions");
+ }
+ }
+ return NULL;
+}
+
+//
+// Look up a pattern definition by name from a context.
//
__attribute__((nonnull))
-pat_t *lookup(match_ctx_t *ctx, const char *name, size_t namelen)
+pat_t *lookup_ctx(match_ctx_t *ctx, const char *name, size_t namelen)
{
- for (pat_t *def = ctx->defs; def; def = def->args.def.next_def) {
- if (namelen == def->args.def.namelen && strncmp(def->args.def.name, name, namelen) == 0)
- return def->args.def.meaning;
+ for (; ctx; ctx = ctx->parent_ctx) {
+ pat_t *def = _lookup_def(ctx->defs, name, namelen);
+ if (def) return def;
}
- if (ctx->parent_ctx) return lookup(ctx->parent_ctx, name, namelen);
return NULL;
}
@@ -213,7 +234,7 @@ __attribute__((nonnull(1)))
static inline pat_t *deref(match_ctx_t *ctx, pat_t *pat)
{
if (pat && pat->type == BP_REF) {
- pat_t *def = lookup(ctx, pat->args.ref.name, pat->args.ref.len);
+ pat_t *def = lookup_ctx(ctx, pat->args.ref.name, pat->args.ref.len);
if (def) return def;
}
return pat;
@@ -266,17 +287,6 @@ static pat_t *get_prerequisite(match_ctx_t *ctx, pat_t *pat)
__attribute__((nonnull(1,2,3)))
static match_t *_next_match(match_ctx_t *ctx, const char *str, pat_t *pat, pat_t *skip)
{
- if (pat->type == BP_DEFINITIONS || (pat->type == BP_CHAIN && pat->args.multiple.first->type == BP_DEFINITIONS)) {
- match_ctx_t ctx2 = *ctx;
- ctx2.cache = &(cache_t){0};
- ctx2.parent_ctx = ctx;
- ctx2.defs = pat->type == BP_DEFINITIONS ? pat : pat->args.multiple.first;
- pat_t *match_pat = pat->type == BP_DEFINITIONS ? pat->args.def.meaning : pat->args.multiple.second;
- match_t *m = _next_match(&ctx2, str, match_pat, skip);
- cache_destroy(&ctx2);
- return m;
- }
-
// Clear the cache so it's not full of old cache values from different parts of the file:
cache_destroy(ctx);
@@ -618,7 +628,7 @@ static match_t *match(match_ctx_t *ctx, const char *str, pat_t *pat)
if (has_cached_failure(ctx, str, pat))
return NULL;
- pat_t *ref = lookup(ctx, pat->args.ref.name, pat->args.ref.len);
+ pat_t *ref = lookup_ctx(ctx, pat->args.ref.name, pat->args.ref.len);
if (ref == NULL)
errx(EXIT_FAILURE, "Unknown identifier: '%.*s'", (int)pat->args.ref.len, pat->args.ref.name);
@@ -787,7 +797,7 @@ size_t free_all_matches(void)
// Iterate over matches.
// Usage: for (match_t *m = NULL; next_match(&m, ...); ) {...}
//
-bool next_match(match_t **m, const char *start, const char *end, pat_t *pat, pat_t *skip, bool ignorecase)
+bool next_match(match_t **m, const char *start, const char *end, pat_t *pat, pat_t *defs, pat_t *skip, bool ignorecase)
{
const char *pos;
if (*m) {
@@ -805,6 +815,7 @@ bool next_match(match_t **m, const char *start, const char *end, pat_t *pat, pat
.start = start,
.end = end,
.ignorecase = ignorecase,
+ .defs = defs,
};
*m = (pos <= end) ? _next_match(&ctx, pos, pat, skip) : NULL;
cache_destroy(&ctx);
diff --git a/match.h b/match.h
index 7439b27..5209977 100644
--- a/match.h
+++ b/match.h
@@ -28,8 +28,8 @@ __attribute__((nonnull))
void recycle_match(match_t **at_m);
size_t free_all_matches(void);
size_t recycle_all_matches(void);
-bool next_match(match_t **m, const char *start, const char *end, pat_t *pat, pat_t *skip, bool ignorecase);
-#define stop_matching(m) next_match(m, NULL, NULL, NULL, NULL, 0)
+bool next_match(match_t **m, const char *start, const char *end, pat_t *pat, pat_t *defs, pat_t *skip, bool ignorecase);
+#define stop_matching(m) next_match(m, NULL, NULL, NULL, NULL, NULL, 0)
__attribute__((nonnull))
match_t *get_numbered_capture(match_t *m, int n);
__attribute__((nonnull, pure))
diff --git a/pattern.c b/pattern.c
index 379a830..78cf6e6 100644
--- a/pattern.c
+++ b/pattern.c
@@ -169,11 +169,10 @@ pat_t *chain_together(pat_t *first, pat_t *second)
if (second == NULL) return first;
if (first->type == BP_DEFINITIONS && second->type == BP_DEFINITIONS) {
- pat_t *second_end = second;
- while (second_end->args.def.next_def != NULL)
- second_end = second_end->args.def.next_def;
- second_end->args.def.next_def = first;
- return second;
+ pat_t *chain = new_pat(BP_CHAIN, first->start, second->end, second->min_matchlen, second->max_matchlen);
+ chain->args.multiple.first = first;
+ chain->args.multiple.second = second;
+ return chain;
}
size_t minlen = first->min_matchlen + second->min_matchlen;
@@ -181,23 +180,6 @@ pat_t *chain_together(pat_t *first, pat_t *second)
pat_t *chain = new_pat(BP_CHAIN, first->start, second->end, minlen, maxlen);
chain->args.multiple.first = first;
chain->args.multiple.second = second;
-
- // If `first` is an UPTO operator (..) or contains one, then let it know
- // that `second` is what it's up *to*.
- for (pat_t *p = first; p; ) {
- if (p->type == BP_UPTO || p->type == BP_UPTO_STRICT) {
- p->args.multiple.first = second;
- p->min_matchlen = second->min_matchlen;
- p->max_matchlen = -1;
- break;
- } else if (p->type == BP_CAPTURE) {
- p = p->args.capture.capture_pat;
- } else if (p->type == BP_CHAIN) {
- p = p->args.multiple.second;
- } else if (p->type == BP_MATCH || p->type == BP_NOT_MATCH) {
- p = p->args.multiple.first;
- } else break;
- }
return chain;
}
@@ -246,7 +228,7 @@ static pat_t *_bp_definition(const char *start, const char *end)
// Compile a string of BP code into a BP pattern object.
//
__attribute__((nonnull))
-static pat_t *_bp_simplepattern(const char *str, const char *end)
+static pat_t *_bp_simplepattern(const char *str, const char *end, bool inside_stringpattern)
{
str = after_spaces(str, false, end);
if (!*str) return NULL;
@@ -272,6 +254,12 @@ static pat_t *_bp_simplepattern(const char *str, const char *end)
}
pat_t *upto = new_pat(type, start, extra_arg ? extra_arg->end : str, 0, -1);
upto->args.multiple.second = extra_arg;
+ if (inside_stringpattern) {
+ maybe_pat_t target = bp_stringpattern(upto->end, end);
+ upto->args.multiple.first = target.success ? target.value.pat : NULL;
+ } else {
+ upto->args.multiple.first = bp_simplepattern(upto->end, end);
+ }
return upto;
} else {
return new_pat(BP_ANYCHAR, start, str, 1, UTF8_MAXCHARLEN);
@@ -536,9 +524,9 @@ maybe_pat_t bp_stringpattern(const char *str, const char *end)
for (; str < end; str = next_char(str, end)) {
if (*str == '\\' && str+1 < end) {
if (str[1] == '\\' || isalnum(str[1]))
- interp = bp_simplepattern(str, end);
+ interp = _bp_simplepattern(str, end, true);
else
- interp = bp_simplepattern(str+1, end);
+ interp = _bp_simplepattern(str+1, end, true);
if (interp) break;
// If there is no interpolated value, this is just a plain ol' regular backslash
}
@@ -567,7 +555,7 @@ maybe_pat_t bp_stringpattern(const char *str, const char *end)
//
static pat_t *bp_simplepattern(const char *str, const char *end)
{
- pat_t *pat = _bp_simplepattern(str, end);
+ pat_t *pat = _bp_simplepattern(str, end, false);
if (pat == NULL) return pat;
str = pat->end;
diff --git a/pattern.h b/pattern.h
index a39f5bf..218a997 100644
--- a/pattern.h
+++ b/pattern.h
@@ -120,6 +120,5 @@ void free_all_pats(void);
__attribute__((nonnull))
void delete_pat(pat_t **at_pat, bool recursive);
-
#endif
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0