From 8e1e6572feabd291cbd5048459c0a58c6460ff91 Mon Sep 17 00:00:00 2001 From: Bruce Hill Date: Thu, 23 Sep 2021 15:15:48 -0700 Subject: Moved type defs into their own files instead of types.h --- Makefile | 2 +- bp.c | 1 + definitions.h | 13 +++++- explain.c | 2 +- explain.h | 2 +- files.c | 5 --- files.h | 3 -- json.h | 2 +- match.c | 13 ++---- match.h | 26 ++++++++++- pattern.c | 36 +++++++++++++--- pattern.h | 96 ++++++++++++++++++++++++++++++++++++++++- print.c | 1 - print.h | 2 +- types.h | 135 ---------------------------------------------------------- 15 files changed, 169 insertions(+), 170 deletions(-) delete mode 100644 types.h diff --git a/Makefile b/Makefile index 642246b..db2f57f 100644 --- a/Makefile +++ b/Makefile @@ -27,7 +27,7 @@ OBJFILES=$(CFILES:.c=.o) all: $(NAME) bp.1 -%.o: %.c %.h types.h utf8.h +%.o: %.c %.h utf8.h $(CC) -c $(ALL_FLAGS) -o $@ $< $(NAME): $(OBJFILES) bp.c diff --git a/bp.c b/bp.c index 5fb8ed9..ffb286e 100644 --- a/bp.c +++ b/bp.c @@ -557,6 +557,7 @@ int main(int argc, char *argv[]) // tracking down memory leaks. free_all_matches(); defs = free_defs(defs, NULL); + free_pat(NULL); while (loaded_files) { file_t *next = loaded_files->next; destroy_file(&loaded_files); diff --git a/definitions.h b/definitions.h index 6dd0920..a171de5 100644 --- a/definitions.h +++ b/definitions.h @@ -5,7 +5,18 @@ #define DEFINITIONS__H #include "files.h" -#include "types.h" +#include "pattern.h" + +// +// Pattern matching rule definition(s) +// +typedef struct def_s { + size_t id; + size_t namelen; + const char *name; + pat_t *pat; + struct def_s *next; +} def_t; __attribute__((nonnull(3,4), returns_nonnull)) def_t *with_def(def_t *defs, size_t namelen, const char *name, pat_t *pat); diff --git a/explain.c b/explain.c index 58ddcc1..41939c3 100644 --- a/explain.c +++ b/explain.c @@ -5,7 +5,7 @@ #include #include -#include "types.h" +#include "match.h" #include "utils.h" typedef struct match_node_s { diff --git a/explain.h b/explain.h index dab52b1..fbb2abf 100644 --- a/explain.h +++ b/explain.h @@ -4,7 +4,7 @@ #ifndef EXPLAIN__H #define EXPLAIN__H -#include "types.h" +#include "match.h" __attribute__((nonnull)) void explain_match(match_t *m); diff --git a/files.c b/files.c index 5e9b40e..d391eaf 100644 --- a/files.c +++ b/files.c @@ -182,11 +182,6 @@ void destroy_file(file_t **at_f) f->mmapped = NULL; } - for (pat_t *next; f->pats; f->pats = next) { - next = f->pats->next; - delete(&f->pats); - } - delete(at_f); } diff --git a/files.h b/files.h index 3735348..41c8398 100644 --- a/files.h +++ b/files.h @@ -4,8 +4,6 @@ #ifndef FILES__H #define FILES__H -#include "types.h" - #include #include @@ -17,7 +15,6 @@ typedef struct file_s { char *mmapped, *allocated; char **lines, *start, *end; size_t nlines; - struct pat_s *pats; } file_t; __attribute__((nonnull(2))) diff --git a/json.h b/json.h index 4450387..538caa6 100644 --- a/json.h +++ b/json.h @@ -6,7 +6,7 @@ #include -#include "types.h" +#include "match.h" __attribute__((nonnull)) void json_match(const char *text, match_t *m, bool verbose); diff --git a/match.c b/match.c index 6089a2d..9e8e388 100644 --- a/match.c +++ b/match.c @@ -12,7 +12,6 @@ #include "definitions.h" #include "match.h" #include "pattern.h" -#include "types.h" #include "utils.h" #include "utf8.h" @@ -518,19 +517,13 @@ static match_t *match(def_t *defs, cache_t *cache, file_t *f, const char *str, p if (m1->pat->type == BP_CAPTURE && m1->pat->args.capture.name) { // Temporarily add a rule that the backref name matches the // exact string of the original match (no replacements) - pat_t *backref = bp_backref(f, m1); + pat_t *backref = bp_raw_literal(f, m1->start, (size_t)(m1->end - m1->start)); def_t *defs2 = with_def(defs, m1->pat->args.capture.namelen, m1->pat->args.capture.name, backref); ++m1->refcount; { m2 = match(defs2, cache, f, m1->end, pat->args.multiple.second, ignorecase); if (!m2) { // No need to keep the backref in memory if it didn't match - for (pat_t **rem = &f->pats; *rem; rem = &(*rem)->next) { - if ((*rem) == backref) { - pat_t *tmp = *rem; - *rem = (*rem)->next; - free(tmp); - break; - } - } + free_pat(backref); + backref = NULL; } defs = free_defs(defs2, defs); } --m1->refcount; diff --git a/match.h b/match.h index b6e7392..db948b8 100644 --- a/match.h +++ b/match.h @@ -8,7 +8,31 @@ #include #include "files.h" -#include "types.h" +#include "pattern.h" +#include "definitions.h" + +struct match_s; // forward declared to resolve circular struct defs + +typedef struct { + struct match_s **home, *next; +} match_dll_t; + +// #define MATCH_FROM(node, name) ((match_t*)((char*)node + (size_t)(&((match_t*)0)->name))) + +// +// Pattern matching result object +// +typedef struct match_s { + // Where the match starts and ends (end is after the last character) + const char *start, *end; + pat_t *pat; + // Intrusive linked list nodes for garbage collection and cache buckets: + match_dll_t gc, cache; + size_t defs_id; + int refcount; + struct match_s **children; + struct match_s *_children[3]; +} match_t; __attribute__((returns_nonnull)) match_t *new_match(def_t *defs, pat_t *pat, const char *start, const char *end, match_t *children[]); diff --git a/pattern.c b/pattern.c index 67f6ae1..dbd48a1 100644 --- a/pattern.c +++ b/pattern.c @@ -14,6 +14,8 @@ #include "utils.h" #include "utf8.h" +static pat_t *allocated_pats = NULL; + __attribute__((nonnull)) static pat_t *bp_pattern_nl(file_t *f, const char *str, bool allow_nl); __attribute__((nonnull)) @@ -48,10 +50,11 @@ static inline void parse_err(const char *start, const char *end, const char *msg __attribute__((returns_nonnull, nonnull(1,2))) static pat_t *new_pat(file_t *f, const char *start, const char *end, size_t minlen, ssize_t maxlen, enum pattype_e type) { + (void)f; static size_t next_pat_id = 1; pat_t *pat = new(pat_t); *pat = (pat_t){ - .next = f->pats, + .next = allocated_pats, .type = type, .start = start, .end = end, @@ -59,7 +62,7 @@ static pat_t *new_pat(file_t *f, const char *start, const char *end, size_t minl .max_matchlen = maxlen, .id = next_pat_id++, }; - f->pats = pat; + allocated_pats = pat; return pat; } @@ -610,12 +613,11 @@ static pat_t *bp_pattern_nl(file_t *f, const char *str, bool allow_nl) // // Return a new back reference to an existing match. // -pat_t *bp_backref(file_t *f, match_t *m) +pat_t *bp_raw_literal(file_t *f, const char *str, size_t len) { - size_t len = (size_t)(m->end - m->start); - pat_t *backref = new_pat(f, m->start, m->end, len, (ssize_t)len, BP_STRING); - backref->args.string = m->start; - return backref; + pat_t *lit = new_pat(f, str, &str[len], len, (ssize_t)len, BP_STRING); + lit->args.string = str; + return lit; } // @@ -634,4 +636,24 @@ maybe_pat_t bp_pattern(file_t *f, const char *str) return (maybe_pat_t){.success = false, .value.error.start = str, .value.error.end = f->end, .value.error.msg = "Failed to parse this pattern"}; } +void free_pat(pat_t *target) +{ + if (target) { + for (pat_t **rem = &allocated_pats; *rem; rem = &(*rem)->next) { + if ((*rem) == target) { + pat_t *tmp = *rem; + *rem = (*rem)->next; + free(tmp); + break; + } + } + } else { + while (allocated_pats) { + pat_t *tofree = allocated_pats; + allocated_pats = tofree->next; + free(tofree); + } + } +} + // vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0 diff --git a/pattern.h b/pattern.h index 005e658..1407cd1 100644 --- a/pattern.h +++ b/pattern.h @@ -4,8 +4,98 @@ #ifndef PATTERN__H #define PATTERN__H +#include + #include "files.h" -#include "types.h" + +#define UNBOUNDED(pat) ((pat)->max_matchlen == -1) + +// BP virtual machine pattern types +enum pattype_e { + BP_ANYCHAR = 1, + BP_ID_START = 2, + BP_ID_CONTINUE = 3, + BP_STRING = 4, + BP_RANGE = 5, + BP_NOT = 6, + BP_UPTO = 7, + BP_UPTO_STRICT = 8, + BP_REPEAT = 9, + BP_BEFORE = 10, + BP_AFTER = 11, + BP_CAPTURE = 12, + BP_OTHERWISE = 13, + BP_CHAIN = 14, + BP_MATCH = 15, + BP_NOT_MATCH = 16, + BP_REPLACE = 17, + BP_REF = 18, + BP_NODENT = 19, + BP_START_OF_FILE = 20, + BP_START_OF_LINE = 21, + BP_END_OF_FILE = 22, + BP_END_OF_LINE = 23, + BP_WORD_BOUNDARY = 24, + BP_DEFINITION = 25, + BP_LEFTRECURSION = 26, +}; + +// +// A struct reperesenting a BP virtual machine operation +// +typedef struct pat_s { + struct pat_s *next; + enum pattype_e type; + const char *start, *end; + // The bounds of the match length (used for backtracking) + size_t min_matchlen; + ssize_t max_matchlen; // -1 means unbounded length + union { + const char *string; + struct { + const char *name; + size_t len; + } ref; + struct { + const char *name; + size_t namelen; + struct pat_s *def, *pat; + } def; + struct { + unsigned char low, high; + } range; + struct { + size_t min; + ssize_t max; + struct pat_s *sep, *repeat_pat; + } repetitions; + // TODO: use a linked list instead of a binary tree + struct { + struct pat_s *first, *second; + } multiple; + struct { + struct pat_s *pat; + const char *text; + size_t len; + } replace; + struct { + struct pat_s *capture_pat; + const char *name; + size_t namelen; + } capture; + struct { + struct match_s *match; + unsigned int visits; + const char *at; + struct pat_s *fallback; + } leftrec; + struct { + const char *start, *end, *msg; + } error; + struct pat_s *pat; + } args; + size_t id; +} pat_t; typedef struct { bool success; @@ -18,7 +108,7 @@ typedef struct { } maybe_pat_t; __attribute__((nonnull, returns_nonnull)) -pat_t *bp_backref(file_t *f, match_t *m); +pat_t *bp_raw_literal(file_t *f, const char *str, size_t len); __attribute__((nonnull)) maybe_pat_t bp_stringpattern(file_t *f, const char *str); __attribute__((nonnull(1,2))) @@ -30,6 +120,8 @@ pat_t *either_pat(file_t *f, pat_t *first, pat_t *second); __attribute__((nonnull)) maybe_pat_t bp_pattern(file_t *f, const char *str); +void free_pat(pat_t *pat); + #endif // vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0 diff --git a/print.c b/print.c index a5d3b33..a5c27af 100644 --- a/print.c +++ b/print.c @@ -11,7 +11,6 @@ #include "files.h" #include "match.h" #include "print.h" -#include "types.h" #include "utils.h" static const char *color_match = "\033[0;31;1m"; diff --git a/print.h b/print.h index 70427a4..b690d69 100644 --- a/print.h +++ b/print.h @@ -6,8 +6,8 @@ #include -#include "types.h" #include "files.h" +#include "match.h" #define USE_DEFAULT_CONTEXT -3 #define ALL_CONTEXT -2 diff --git a/types.h b/types.h deleted file mode 100644 index 2dda92d..0000000 --- a/types.h +++ /dev/null @@ -1,135 +0,0 @@ -// -// types.h - Datatypes used by BP -// -#ifndef TYPES__H -#define TYPES__H - -#include -#include - -#define UNBOUNDED(pat) ((pat)->max_matchlen == -1) - -// BP virtual machine pattern types -enum pattype_e { - BP_ANYCHAR = 1, - BP_ID_START = 2, - BP_ID_CONTINUE = 3, - BP_STRING = 4, - BP_RANGE = 5, - BP_NOT = 6, - BP_UPTO = 7, - BP_UPTO_STRICT = 8, - BP_REPEAT = 9, - BP_BEFORE = 10, - BP_AFTER = 11, - BP_CAPTURE = 12, - BP_OTHERWISE = 13, - BP_CHAIN = 14, - BP_MATCH = 15, - BP_NOT_MATCH = 16, - BP_REPLACE = 17, - BP_REF = 18, - BP_NODENT = 19, - BP_START_OF_FILE = 20, - BP_START_OF_LINE = 21, - BP_END_OF_FILE = 22, - BP_END_OF_LINE = 23, - BP_WORD_BOUNDARY = 24, - BP_DEFINITION = 25, - BP_LEFTRECURSION = 26, -}; - -struct match_s; // forward declared to resolve circular struct defs - -// -// A struct reperesenting a BP virtual machine operation -// -typedef struct pat_s { - struct pat_s *next; - enum pattype_e type; - const char *start, *end; - // The bounds of the match length (used for backtracking) - size_t min_matchlen; - ssize_t max_matchlen; // -1 means unbounded length - union { - const char *string; - struct { - const char *name; - size_t len; - } ref; - struct { - const char *name; - size_t namelen; - struct pat_s *def, *pat; - } def; - struct { - unsigned char low, high; - } range; - struct { - size_t min; - ssize_t max; - struct pat_s *sep, *repeat_pat; - } repetitions; - // TODO: use a linked list instead of a binary tree - struct { - struct pat_s *first, *second; - } multiple; - struct { - struct pat_s *pat; - const char *text; - size_t len; - } replace; - struct { - struct pat_s *capture_pat; - const char *name; - size_t namelen; - } capture; - struct match_s *backref; - struct { - struct match_s *match; - unsigned int visits; - const char *at; - struct pat_s *fallback; - } leftrec; - struct { - const char *start, *end, *msg; - } error; - struct pat_s *pat; - } args; - size_t id; -} pat_t; - -typedef struct { - struct match_s **home, *next; -} match_dll_t; - -// #define MATCH_FROM(node, name) ((match_t*)((char*)node + (size_t)(&((match_t*)0)->name))) - -// -// Pattern matching result object -// -typedef struct match_s { - // Where the match starts and ends (end is after the last character) - const char *start, *end; - pat_t *pat; - // Intrusive linked list nodes for garbage collection and cache buckets: - match_dll_t gc, cache; - size_t defs_id; - int refcount; - struct match_s **children; - struct match_s *_children[3]; -} match_t; - -// -// Pattern matching rule definition(s) -// -typedef struct def_s { - size_t id; - size_t namelen; - const char *name; - pat_t *pat; - struct def_s *next; -} def_t; - -#endif -// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0 -- cgit v1.2.3