aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Makefile2
-rw-r--r--bp.c1
-rw-r--r--definitions.h13
-rw-r--r--explain.c2
-rw-r--r--explain.h2
-rw-r--r--files.c5
-rw-r--r--files.h3
-rw-r--r--json.h2
-rw-r--r--match.c13
-rw-r--r--match.h26
-rw-r--r--pattern.c36
-rw-r--r--pattern.h96
-rw-r--r--print.c1
-rw-r--r--print.h2
-rw-r--r--types.h135
15 files changed, 169 insertions, 170 deletions
diff --git a/Makefile b/Makefile
index 642246b..db2f57f 100644
--- a/Makefile
+++ b/Makefile
@@ -27,7 +27,7 @@ OBJFILES=$(CFILES:.c=.o)
all: $(NAME) bp.1
-%.o: %.c %.h types.h utf8.h
+%.o: %.c %.h utf8.h
$(CC) -c $(ALL_FLAGS) -o $@ $<
$(NAME): $(OBJFILES) bp.c
diff --git a/bp.c b/bp.c
index 5fb8ed9..ffb286e 100644
--- a/bp.c
+++ b/bp.c
@@ -557,6 +557,7 @@ int main(int argc, char *argv[])
// tracking down memory leaks.
free_all_matches();
defs = free_defs(defs, NULL);
+ free_pat(NULL);
while (loaded_files) {
file_t *next = loaded_files->next;
destroy_file(&loaded_files);
diff --git a/definitions.h b/definitions.h
index 6dd0920..a171de5 100644
--- a/definitions.h
+++ b/definitions.h
@@ -5,7 +5,18 @@
#define DEFINITIONS__H
#include "files.h"
-#include "types.h"
+#include "pattern.h"
+
+//
+// Pattern matching rule definition(s)
+//
+typedef struct def_s {
+ size_t id;
+ size_t namelen;
+ const char *name;
+ pat_t *pat;
+ struct def_s *next;
+} def_t;
__attribute__((nonnull(3,4), returns_nonnull))
def_t *with_def(def_t *defs, size_t namelen, const char *name, pat_t *pat);
diff --git a/explain.c b/explain.c
index 58ddcc1..41939c3 100644
--- a/explain.c
+++ b/explain.c
@@ -5,7 +5,7 @@
#include <stdio.h>
#include <string.h>
-#include "types.h"
+#include "match.h"
#include "utils.h"
typedef struct match_node_s {
diff --git a/explain.h b/explain.h
index dab52b1..fbb2abf 100644
--- a/explain.h
+++ b/explain.h
@@ -4,7 +4,7 @@
#ifndef EXPLAIN__H
#define EXPLAIN__H
-#include "types.h"
+#include "match.h"
__attribute__((nonnull))
void explain_match(match_t *m);
diff --git a/files.c b/files.c
index 5e9b40e..d391eaf 100644
--- a/files.c
+++ b/files.c
@@ -182,11 +182,6 @@ void destroy_file(file_t **at_f)
f->mmapped = NULL;
}
- for (pat_t *next; f->pats; f->pats = next) {
- next = f->pats->next;
- delete(&f->pats);
- }
-
delete(at_f);
}
diff --git a/files.h b/files.h
index 3735348..41c8398 100644
--- a/files.h
+++ b/files.h
@@ -4,8 +4,6 @@
#ifndef FILES__H
#define FILES__H
-#include "types.h"
-
#include <stdio.h>
#include <unistd.h>
@@ -17,7 +15,6 @@ typedef struct file_s {
char *mmapped, *allocated;
char **lines, *start, *end;
size_t nlines;
- struct pat_s *pats;
} file_t;
__attribute__((nonnull(2)))
diff --git a/json.h b/json.h
index 4450387..538caa6 100644
--- a/json.h
+++ b/json.h
@@ -6,7 +6,7 @@
#include <stdbool.h>
-#include "types.h"
+#include "match.h"
__attribute__((nonnull))
void json_match(const char *text, match_t *m, bool verbose);
diff --git a/match.c b/match.c
index 6089a2d..9e8e388 100644
--- a/match.c
+++ b/match.c
@@ -12,7 +12,6 @@
#include "definitions.h"
#include "match.h"
#include "pattern.h"
-#include "types.h"
#include "utils.h"
#include "utf8.h"
@@ -518,19 +517,13 @@ static match_t *match(def_t *defs, cache_t *cache, file_t *f, const char *str, p
if (m1->pat->type == BP_CAPTURE && m1->pat->args.capture.name) {
// Temporarily add a rule that the backref name matches the
// exact string of the original match (no replacements)
- pat_t *backref = bp_backref(f, m1);
+ pat_t *backref = bp_raw_literal(f, m1->start, (size_t)(m1->end - m1->start));
def_t *defs2 = with_def(defs, m1->pat->args.capture.namelen, m1->pat->args.capture.name, backref);
++m1->refcount; {
m2 = match(defs2, cache, f, m1->end, pat->args.multiple.second, ignorecase);
if (!m2) { // No need to keep the backref in memory if it didn't match
- for (pat_t **rem = &f->pats; *rem; rem = &(*rem)->next) {
- if ((*rem) == backref) {
- pat_t *tmp = *rem;
- *rem = (*rem)->next;
- free(tmp);
- break;
- }
- }
+ free_pat(backref);
+ backref = NULL;
}
defs = free_defs(defs2, defs);
} --m1->refcount;
diff --git a/match.h b/match.h
index b6e7392..db948b8 100644
--- a/match.h
+++ b/match.h
@@ -8,7 +8,31 @@
#include <stdio.h>
#include "files.h"
-#include "types.h"
+#include "pattern.h"
+#include "definitions.h"
+
+struct match_s; // forward declared to resolve circular struct defs
+
+typedef struct {
+ struct match_s **home, *next;
+} match_dll_t;
+
+// #define MATCH_FROM(node, name) ((match_t*)((char*)node + (size_t)(&((match_t*)0)->name)))
+
+//
+// Pattern matching result object
+//
+typedef struct match_s {
+ // Where the match starts and ends (end is after the last character)
+ const char *start, *end;
+ pat_t *pat;
+ // Intrusive linked list nodes for garbage collection and cache buckets:
+ match_dll_t gc, cache;
+ size_t defs_id;
+ int refcount;
+ struct match_s **children;
+ struct match_s *_children[3];
+} match_t;
__attribute__((returns_nonnull))
match_t *new_match(def_t *defs, pat_t *pat, const char *start, const char *end, match_t *children[]);
diff --git a/pattern.c b/pattern.c
index 67f6ae1..dbd48a1 100644
--- a/pattern.c
+++ b/pattern.c
@@ -14,6 +14,8 @@
#include "utils.h"
#include "utf8.h"
+static pat_t *allocated_pats = NULL;
+
__attribute__((nonnull))
static pat_t *bp_pattern_nl(file_t *f, const char *str, bool allow_nl);
__attribute__((nonnull))
@@ -48,10 +50,11 @@ static inline void parse_err(const char *start, const char *end, const char *msg
__attribute__((returns_nonnull, nonnull(1,2)))
static pat_t *new_pat(file_t *f, const char *start, const char *end, size_t minlen, ssize_t maxlen, enum pattype_e type)
{
+ (void)f;
static size_t next_pat_id = 1;
pat_t *pat = new(pat_t);
*pat = (pat_t){
- .next = f->pats,
+ .next = allocated_pats,
.type = type,
.start = start,
.end = end,
@@ -59,7 +62,7 @@ static pat_t *new_pat(file_t *f, const char *start, const char *end, size_t minl
.max_matchlen = maxlen,
.id = next_pat_id++,
};
- f->pats = pat;
+ allocated_pats = pat;
return pat;
}
@@ -610,12 +613,11 @@ static pat_t *bp_pattern_nl(file_t *f, const char *str, bool allow_nl)
//
// Return a new back reference to an existing match.
//
-pat_t *bp_backref(file_t *f, match_t *m)
+pat_t *bp_raw_literal(file_t *f, const char *str, size_t len)
{
- size_t len = (size_t)(m->end - m->start);
- pat_t *backref = new_pat(f, m->start, m->end, len, (ssize_t)len, BP_STRING);
- backref->args.string = m->start;
- return backref;
+ pat_t *lit = new_pat(f, str, &str[len], len, (ssize_t)len, BP_STRING);
+ lit->args.string = str;
+ return lit;
}
//
@@ -634,4 +636,24 @@ maybe_pat_t bp_pattern(file_t *f, const char *str)
return (maybe_pat_t){.success = false, .value.error.start = str, .value.error.end = f->end, .value.error.msg = "Failed to parse this pattern"};
}
+void free_pat(pat_t *target)
+{
+ if (target) {
+ for (pat_t **rem = &allocated_pats; *rem; rem = &(*rem)->next) {
+ if ((*rem) == target) {
+ pat_t *tmp = *rem;
+ *rem = (*rem)->next;
+ free(tmp);
+ break;
+ }
+ }
+ } else {
+ while (allocated_pats) {
+ pat_t *tofree = allocated_pats;
+ allocated_pats = tofree->next;
+ free(tofree);
+ }
+ }
+}
+
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/pattern.h b/pattern.h
index 005e658..1407cd1 100644
--- a/pattern.h
+++ b/pattern.h
@@ -4,8 +4,98 @@
#ifndef PATTERN__H
#define PATTERN__H
+#include <stdbool.h>
+
#include "files.h"
-#include "types.h"
+
+#define UNBOUNDED(pat) ((pat)->max_matchlen == -1)
+
+// BP virtual machine pattern types
+enum pattype_e {
+ BP_ANYCHAR = 1,
+ BP_ID_START = 2,
+ BP_ID_CONTINUE = 3,
+ BP_STRING = 4,
+ BP_RANGE = 5,
+ BP_NOT = 6,
+ BP_UPTO = 7,
+ BP_UPTO_STRICT = 8,
+ BP_REPEAT = 9,
+ BP_BEFORE = 10,
+ BP_AFTER = 11,
+ BP_CAPTURE = 12,
+ BP_OTHERWISE = 13,
+ BP_CHAIN = 14,
+ BP_MATCH = 15,
+ BP_NOT_MATCH = 16,
+ BP_REPLACE = 17,
+ BP_REF = 18,
+ BP_NODENT = 19,
+ BP_START_OF_FILE = 20,
+ BP_START_OF_LINE = 21,
+ BP_END_OF_FILE = 22,
+ BP_END_OF_LINE = 23,
+ BP_WORD_BOUNDARY = 24,
+ BP_DEFINITION = 25,
+ BP_LEFTRECURSION = 26,
+};
+
+//
+// A struct reperesenting a BP virtual machine operation
+//
+typedef struct pat_s {
+ struct pat_s *next;
+ enum pattype_e type;
+ const char *start, *end;
+ // The bounds of the match length (used for backtracking)
+ size_t min_matchlen;
+ ssize_t max_matchlen; // -1 means unbounded length
+ union {
+ const char *string;
+ struct {
+ const char *name;
+ size_t len;
+ } ref;
+ struct {
+ const char *name;
+ size_t namelen;
+ struct pat_s *def, *pat;
+ } def;
+ struct {
+ unsigned char low, high;
+ } range;
+ struct {
+ size_t min;
+ ssize_t max;
+ struct pat_s *sep, *repeat_pat;
+ } repetitions;
+ // TODO: use a linked list instead of a binary tree
+ struct {
+ struct pat_s *first, *second;
+ } multiple;
+ struct {
+ struct pat_s *pat;
+ const char *text;
+ size_t len;
+ } replace;
+ struct {
+ struct pat_s *capture_pat;
+ const char *name;
+ size_t namelen;
+ } capture;
+ struct {
+ struct match_s *match;
+ unsigned int visits;
+ const char *at;
+ struct pat_s *fallback;
+ } leftrec;
+ struct {
+ const char *start, *end, *msg;
+ } error;
+ struct pat_s *pat;
+ } args;
+ size_t id;
+} pat_t;
typedef struct {
bool success;
@@ -18,7 +108,7 @@ typedef struct {
} maybe_pat_t;
__attribute__((nonnull, returns_nonnull))
-pat_t *bp_backref(file_t *f, match_t *m);
+pat_t *bp_raw_literal(file_t *f, const char *str, size_t len);
__attribute__((nonnull))
maybe_pat_t bp_stringpattern(file_t *f, const char *str);
__attribute__((nonnull(1,2)))
@@ -30,6 +120,8 @@ pat_t *either_pat(file_t *f, pat_t *first, pat_t *second);
__attribute__((nonnull))
maybe_pat_t bp_pattern(file_t *f, const char *str);
+void free_pat(pat_t *pat);
+
#endif
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/print.c b/print.c
index a5d3b33..a5c27af 100644
--- a/print.c
+++ b/print.c
@@ -11,7 +11,6 @@
#include "files.h"
#include "match.h"
#include "print.h"
-#include "types.h"
#include "utils.h"
static const char *color_match = "\033[0;31;1m";
diff --git a/print.h b/print.h
index 70427a4..b690d69 100644
--- a/print.h
+++ b/print.h
@@ -6,8 +6,8 @@
#include <stdbool.h>
-#include "types.h"
#include "files.h"
+#include "match.h"
#define USE_DEFAULT_CONTEXT -3
#define ALL_CONTEXT -2
diff --git a/types.h b/types.h
deleted file mode 100644
index 2dda92d..0000000
--- a/types.h
+++ /dev/null
@@ -1,135 +0,0 @@
-//
-// types.h - Datatypes used by BP
-//
-#ifndef TYPES__H
-#define TYPES__H
-
-#include <stdbool.h>
-#include <sys/types.h>
-
-#define UNBOUNDED(pat) ((pat)->max_matchlen == -1)
-
-// BP virtual machine pattern types
-enum pattype_e {
- BP_ANYCHAR = 1,
- BP_ID_START = 2,
- BP_ID_CONTINUE = 3,
- BP_STRING = 4,
- BP_RANGE = 5,
- BP_NOT = 6,
- BP_UPTO = 7,
- BP_UPTO_STRICT = 8,
- BP_REPEAT = 9,
- BP_BEFORE = 10,
- BP_AFTER = 11,
- BP_CAPTURE = 12,
- BP_OTHERWISE = 13,
- BP_CHAIN = 14,
- BP_MATCH = 15,
- BP_NOT_MATCH = 16,
- BP_REPLACE = 17,
- BP_REF = 18,
- BP_NODENT = 19,
- BP_START_OF_FILE = 20,
- BP_START_OF_LINE = 21,
- BP_END_OF_FILE = 22,
- BP_END_OF_LINE = 23,
- BP_WORD_BOUNDARY = 24,
- BP_DEFINITION = 25,
- BP_LEFTRECURSION = 26,
-};
-
-struct match_s; // forward declared to resolve circular struct defs
-
-//
-// A struct reperesenting a BP virtual machine operation
-//
-typedef struct pat_s {
- struct pat_s *next;
- enum pattype_e type;
- const char *start, *end;
- // The bounds of the match length (used for backtracking)
- size_t min_matchlen;
- ssize_t max_matchlen; // -1 means unbounded length
- union {
- const char *string;
- struct {
- const char *name;
- size_t len;
- } ref;
- struct {
- const char *name;
- size_t namelen;
- struct pat_s *def, *pat;
- } def;
- struct {
- unsigned char low, high;
- } range;
- struct {
- size_t min;
- ssize_t max;
- struct pat_s *sep, *repeat_pat;
- } repetitions;
- // TODO: use a linked list instead of a binary tree
- struct {
- struct pat_s *first, *second;
- } multiple;
- struct {
- struct pat_s *pat;
- const char *text;
- size_t len;
- } replace;
- struct {
- struct pat_s *capture_pat;
- const char *name;
- size_t namelen;
- } capture;
- struct match_s *backref;
- struct {
- struct match_s *match;
- unsigned int visits;
- const char *at;
- struct pat_s *fallback;
- } leftrec;
- struct {
- const char *start, *end, *msg;
- } error;
- struct pat_s *pat;
- } args;
- size_t id;
-} pat_t;
-
-typedef struct {
- struct match_s **home, *next;
-} match_dll_t;
-
-// #define MATCH_FROM(node, name) ((match_t*)((char*)node + (size_t)(&((match_t*)0)->name)))
-
-//
-// Pattern matching result object
-//
-typedef struct match_s {
- // Where the match starts and ends (end is after the last character)
- const char *start, *end;
- pat_t *pat;
- // Intrusive linked list nodes for garbage collection and cache buckets:
- match_dll_t gc, cache;
- size_t defs_id;
- int refcount;
- struct match_s **children;
- struct match_s *_children[3];
-} match_t;
-
-//
-// Pattern matching rule definition(s)
-//
-typedef struct def_s {
- size_t id;
- size_t namelen;
- const char *name;
- pat_t *pat;
- struct def_s *next;
-} def_t;
-
-#endif
-// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0