code / bp

Lines4.3K C3.3K Markdown541 YAML273 make110 Shell77 Lua54
(186 lines)
1 //
2 // pattern.h - Header file for BP pattern compilation.
3 //
4 #pragma once
6 #include <err.h>
7 #include <stdbool.h>
8 #include <stdint.h>
9 #include <stdio.h>
10 #include <sys/types.h>
12 // BP virtual machine pattern types
13 enum bp_pattype_e {
14 BP_ERROR = 0,
15 BP_ANYCHAR = 1,
16 BP_ID_START = 2,
17 BP_ID_CONTINUE = 3,
18 BP_STRING = 4,
19 BP_RANGE = 5,
20 BP_NOT = 6,
21 BP_UPTO = 7,
22 BP_UPTO_STRICT = 8,
23 BP_REPEAT = 9,
24 BP_BEFORE = 10,
25 BP_AFTER = 11,
26 BP_CAPTURE = 12,
27 BP_OTHERWISE = 13,
28 BP_CHAIN = 14,
29 BP_MATCH = 15,
30 BP_NOT_MATCH = 16,
31 BP_REPLACE = 17,
32 BP_REF = 18,
33 BP_NODENT = 19,
34 BP_CURDENT = 20,
35 BP_START_OF_FILE = 21,
36 BP_START_OF_LINE = 22,
37 BP_END_OF_FILE = 23,
38 BP_END_OF_LINE = 24,
39 BP_WORD_BOUNDARY = 25,
40 BP_DEFINITIONS = 26,
41 BP_TAGGED = 27,
42 BP_LEFTRECURSION = 28,
43 };
45 //
46 // A struct reperesenting a BP virtual machine operation
47 //
48 typedef struct bp_pat_s bp_pat_t;
49 struct bp_pat_s {
50 bp_pat_t *next, **home;
51 enum bp_pattype_e type;
52 uint32_t id;
53 const char *start, *end;
54 // The bounds of the match length (used for backtracking)
55 uint32_t min_matchlen;
56 int32_t max_matchlen; // -1 means unbounded length
57 union {
58 struct {
59 const char *start, *end, *msg;
60 } BP_ERROR;
61 struct {
62 } BP_ANYCHAR;
63 struct {
64 } BP_ID_START;
65 struct {
66 } BP_ID_CONTINUE;
67 struct {
68 const char *string;
69 size_t len;
70 } BP_STRING;
71 struct {
72 unsigned char low, high;
73 } BP_RANGE;
74 struct {
75 bp_pat_t *pat;
76 } BP_NOT;
77 struct {
78 bp_pat_t *target, *skip;
79 } BP_UPTO;
80 struct {
81 bp_pat_t *target, *skip;
82 } BP_UPTO_STRICT;
83 struct {
84 uint32_t min;
85 int32_t max;
86 bp_pat_t *sep, *repeat_pat;
87 } BP_REPEAT;
88 struct {
89 bp_pat_t *pat;
90 } BP_BEFORE;
91 struct {
92 bp_pat_t *pat;
93 } BP_AFTER;
94 struct {
95 bp_pat_t *pat;
96 const char *name;
97 uint16_t namelen;
98 bool backreffable;
99 } BP_CAPTURE;
100 struct {
101 bp_pat_t *first, *second;
102 } BP_OTHERWISE;
103 struct {
104 bp_pat_t *first, *second;
105 } BP_CHAIN;
106 struct {
107 bp_pat_t *pat, *must_match;
108 } BP_MATCH;
109 struct {
110 bp_pat_t *pat, *must_not_match;
111 } BP_NOT_MATCH;
112 struct {
113 bp_pat_t *pat;
114 const char *text;
115 uint32_t len;
116 } BP_REPLACE;
117 struct {
118 const char *name;
119 uint32_t len;
120 } BP_REF;
121 struct {
122 } BP_NODENT;
123 struct {
124 } BP_CURDENT;
125 struct {
126 } BP_START_OF_FILE;
127 struct {
128 } BP_START_OF_LINE;
129 struct {
130 } BP_END_OF_FILE;
131 struct {
132 } BP_END_OF_LINE;
133 struct {
134 } BP_WORD_BOUNDARY;
135 struct {
136 const char *name;
137 uint32_t namelen;
138 bp_pat_t *meaning, *next_def;
139 } BP_DEFINITIONS;
140 struct {
141 bp_pat_t *pat;
142 const char *name;
143 uint16_t namelen;
144 bool backreffable;
145 } BP_TAGGED;
146 struct {
147 struct bp_match_s *match;
148 const char *at;
149 bp_pat_t *fallback;
150 void *ctx;
151 bool visited;
152 } BP_LEFTRECURSION;
153 } __tagged;
156 typedef struct leftrec_info_s {
157 struct bp_match_s *match;
158 const char *at;
159 bp_pat_t *fallback;
160 void *ctx;
161 bool visited;
162 } leftrec_info_t;
164 typedef struct {
165 bool success;
166 union {
167 bp_pat_t *pat;
168 struct {
169 const char *start, *end, *msg;
170 } error;
171 } value;
172 } maybe_pat_t;
174 __attribute__((returns_nonnull)) bp_pat_t *allocate_pat(bp_pat_t pat);
175 __attribute__((nonnull, returns_nonnull)) bp_pat_t *bp_raw_literal(const char *str, size_t len);
176 __attribute__((nonnull(1))) maybe_pat_t bp_stringpattern(const char *str, const char *end);
177 __attribute__((nonnull(1, 2))) maybe_pat_t bp_replacement(bp_pat_t *replacepat, const char *replacement,
178 const char *end);
179 bp_pat_t *chain_together(bp_pat_t *first, bp_pat_t *second);
180 bp_pat_t *either_pat(bp_pat_t *first, bp_pat_t *second);
181 __attribute__((nonnull(1))) maybe_pat_t bp_pattern(const char *str, const char *end);
182 void free_all_pats(void);
183 __attribute__((nonnull)) void delete_pat(bp_pat_t **at_pat, bool recursive);
184 int fprint_pattern(FILE *stream, bp_pat_t *pat);
186 // vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0