Converted ^/^^/$/$$ into pattern types instead of builtin definitions
This commit is contained in:
parent
b09bd4c746
commit
3359a804c8
@ -17,7 +17,7 @@ String-pattern: ..%(\n / Nodent / Escape / `\ pat [`;])$$
|
||||
pat: simple-pat !(__("!="/"==")) / suffixed-pat
|
||||
simple-pat: Upto-and / Dot / String / Chars / Nodent / Escape-range
|
||||
/ Escape / Repeat / Optional / No / After / Before / Capture
|
||||
/ Ref / parens
|
||||
/ Start-of-File / Start-of-Line / End-of-File / End-of-Line / Ref / parens
|
||||
|
||||
suffixed-pat: (
|
||||
Eq-pat
|
||||
@ -63,6 +63,10 @@ Replace: (
|
||||
@replace-pat=(Replace / Chain / pat) __ "=>" (__ @replacement=String / @!=(''=> "Expected replacement string"))
|
||||
)
|
||||
Ref: @name=id !(__`:)
|
||||
Start-of-File: "^^"
|
||||
Start-of-Line: "^"
|
||||
End-of-File: "$$"
|
||||
End-of-Line: "$"
|
||||
|
||||
parens: `( __ extended-pat (__ `) / @!=(''=> "Expected closing parenthesis here"))
|
||||
|
||||
@ -73,11 +77,7 @@ extended-pat: Otherwise / Replace / Chain / pat
|
||||
# Special-symbol rules:
|
||||
_: *(` / \t)
|
||||
__: *(` / \t / \r / \n / comment)
|
||||
$$: !(./\n)
|
||||
$: !.
|
||||
^^: !<(./\n)
|
||||
^: !<.
|
||||
|
||||
id: "^^" / "^" / "__" / "_" / "$$" / "$" / "|" / `a-z,A-Z *`a-z,A-Z,0-9,-
|
||||
id: "__" / "_" / "|" / `a-z,A-Z *`a-z,A-Z,0-9,-
|
||||
|
||||
comment: `# .. $
|
||||
|
@ -38,9 +38,5 @@ esc: \e
|
||||
tab: \t
|
||||
nl: \n; lf: \n
|
||||
comment: !''; # No default definition, can be overridden
|
||||
$$: !(./\n)
|
||||
$: !.
|
||||
^^: !<(./\n)
|
||||
^: !<.
|
||||
__: *(` /\t/\n/\r/comment)
|
||||
_: *(` /\t)
|
||||
|
127
match.c
127
match.c
@ -36,7 +36,7 @@ static match_t *in_use_matches = NULL;
|
||||
#endif
|
||||
|
||||
__attribute__((returns_nonnull))
|
||||
static match_t *new_match(void);
|
||||
static match_t *new_match(pat_t *pat, const char *start, const char *end, match_t *child);
|
||||
__attribute__((nonnull, pure))
|
||||
static inline const char *next_char(file_t *f, const char *str);
|
||||
__attribute__((nonnull))
|
||||
@ -172,34 +172,32 @@ static match_t *match(def_t *defs, file_t *f, const char *str, pat_t *pat, bool
|
||||
}
|
||||
}
|
||||
case BP_ANYCHAR: {
|
||||
if (str >= f->end || *str == '\n')
|
||||
return NULL;
|
||||
match_t *m = new_match();
|
||||
m->pat = pat;
|
||||
m->start = str;
|
||||
m->end = next_char(f, str);
|
||||
return m;
|
||||
return (str < f->end && *str != '\n') ? new_match(pat, str, next_char(f, str), NULL) : NULL;
|
||||
}
|
||||
case BP_START_OF_FILE: {
|
||||
return (str == f->contents) ? new_match(pat, str, str, NULL) : NULL;
|
||||
}
|
||||
case BP_START_OF_LINE: {
|
||||
return (str == f->contents || str[-1] == '\n') ? new_match(pat, str, str, NULL) : NULL;
|
||||
}
|
||||
case BP_END_OF_FILE: {
|
||||
return (str == f->end) ? new_match(pat, str, str, NULL) : NULL;
|
||||
}
|
||||
case BP_END_OF_LINE: {
|
||||
return (str == f->end || *str == '\n') ? new_match(pat, str, str, NULL) : NULL;
|
||||
}
|
||||
case BP_STRING: {
|
||||
if (&str[pat->len] > f->end) return NULL;
|
||||
if (ignorecase ? memicmp(str, pat->args.string, (size_t)pat->len) != 0
|
||||
: memcmp(str, pat->args.string, (size_t)pat->len) != 0)
|
||||
return NULL;
|
||||
match_t *m = new_match();
|
||||
m->pat = pat;
|
||||
m->start = str;
|
||||
m->end = str + pat->len;
|
||||
return m;
|
||||
return new_match(pat, str, str + pat->len, NULL);
|
||||
}
|
||||
case BP_RANGE: {
|
||||
if (str >= f->end) return NULL;
|
||||
if ((unsigned char)*str < pat->args.range.low || (unsigned char)*str > pat->args.range.high)
|
||||
return NULL;
|
||||
match_t *m = new_match();
|
||||
m->pat = pat;
|
||||
m->start = str;
|
||||
m->end = str + 1;
|
||||
return m;
|
||||
return new_match(pat, str, str+1, NULL);
|
||||
}
|
||||
case BP_NOT: {
|
||||
match_t *m = match(defs, f, str, pat->args.pat, ignorecase);
|
||||
@ -207,17 +205,10 @@ static match_t *match(def_t *defs, file_t *f, const char *str, pat_t *pat, bool
|
||||
recycle_if_unused(&m);
|
||||
return NULL;
|
||||
}
|
||||
m = new_match();
|
||||
m->pat = pat;
|
||||
m->start = str;
|
||||
m->end = str;
|
||||
return m;
|
||||
return new_match(pat, str, str, NULL);
|
||||
}
|
||||
case BP_UPTO: {
|
||||
match_t *m = new_match();
|
||||
m->start = str;
|
||||
m->pat = pat;
|
||||
|
||||
match_t *m = new_match(pat, str, str, NULL);
|
||||
pat_t *target = pat->args.multiple.first, *skip = pat->args.multiple.second;
|
||||
if (!target && !skip) {
|
||||
while (str < f->end && *str != '\n') ++str;
|
||||
@ -258,11 +249,7 @@ static match_t *match(def_t *defs, file_t *f, const char *str, pat_t *pat, bool
|
||||
return NULL;
|
||||
}
|
||||
case BP_REPEAT: {
|
||||
match_t *m = new_match();
|
||||
m->start = str;
|
||||
m->end = str;
|
||||
m->pat = pat;
|
||||
|
||||
match_t *m = new_match(pat, str, str, NULL);
|
||||
match_t **dest = &m->child;
|
||||
size_t reps = 0;
|
||||
ssize_t max = pat->args.repetitions.max;
|
||||
@ -319,32 +306,17 @@ static match_t *match(def_t *defs, file_t *f, const char *str, pat_t *pat, bool
|
||||
if (str - backtrack < f->contents) return NULL;
|
||||
match_t *before = match(defs, f, str - backtrack, pat->args.pat, ignorecase);
|
||||
if (before == NULL) return NULL;
|
||||
match_t *m = new_match();
|
||||
m->start = str;
|
||||
m->end = str;
|
||||
m->pat = pat;
|
||||
ADD_OWNER(m->child, before);
|
||||
return m;
|
||||
return new_match(pat, str, str, before);
|
||||
}
|
||||
case BP_BEFORE: {
|
||||
match_t *after = match(defs, f, str, pat->args.pat, ignorecase);
|
||||
if (after == NULL) return NULL;
|
||||
match_t *m = new_match();
|
||||
m->start = str;
|
||||
m->end = str;
|
||||
m->pat = pat;
|
||||
ADD_OWNER(m->child, after);
|
||||
return m;
|
||||
return new_match(pat, str, str, after);
|
||||
}
|
||||
case BP_CAPTURE: {
|
||||
match_t *p = match(defs, f, str, pat->args.pat, ignorecase);
|
||||
if (p == NULL) return NULL;
|
||||
match_t *m = new_match();
|
||||
m->start = str;
|
||||
m->end = p->end;
|
||||
m->pat = pat;
|
||||
ADD_OWNER(m->child, p);
|
||||
return m;
|
||||
return new_match(pat, str, p->end, p);
|
||||
}
|
||||
case BP_OTHERWISE: {
|
||||
match_t *m = match(defs, f, str, pat->args.multiple.first, ignorecase);
|
||||
@ -369,13 +341,8 @@ static match_t *match(def_t *defs, file_t *f, const char *str, pat_t *pat, bool
|
||||
recycle_if_unused(&m1);
|
||||
return NULL;
|
||||
}
|
||||
match_t *m = new_match();
|
||||
m->start = str;
|
||||
m->end = m2->end;
|
||||
m->pat = pat;
|
||||
ADD_OWNER(m->child, m1);
|
||||
ADD_OWNER(m1->nextsibling, m2);
|
||||
return m;
|
||||
return new_match(pat, str, m2->end, m1);
|
||||
}
|
||||
case BP_EQUAL: case BP_NOT_EQUAL: {
|
||||
match_t *m1 = match(defs, f, str, pat->args.multiple.first, ignorecase);
|
||||
@ -397,17 +364,12 @@ static match_t *match(def_t *defs, file_t *f, const char *str, pat_t *pat, bool
|
||||
if (m2 != NULL) recycle_if_unused(&m2);
|
||||
return NULL;
|
||||
}
|
||||
match_t *m = new_match();
|
||||
m->start = m1->start;
|
||||
m->end = m1->end;
|
||||
m->pat = pat;
|
||||
ADD_OWNER(m->child, m1);
|
||||
if (pat->type == BP_EQUAL) {
|
||||
ADD_OWNER(m1->nextsibling, m2);
|
||||
} else {
|
||||
recycle_if_unused(&m2);
|
||||
}
|
||||
return m;
|
||||
return new_match(pat, m1->start, m1->end, m1);
|
||||
}
|
||||
case BP_REPLACE: {
|
||||
match_t *p = NULL;
|
||||
@ -415,21 +377,12 @@ static match_t *match(def_t *defs, file_t *f, const char *str, pat_t *pat, bool
|
||||
p = match(defs, f, str, pat->args.replace.pat, ignorecase);
|
||||
if (p == NULL) return NULL;
|
||||
}
|
||||
match_t *m = new_match();
|
||||
m->start = str;
|
||||
m->pat = pat;
|
||||
if (p) {
|
||||
ADD_OWNER(m->child, p);
|
||||
m->end = p->end;
|
||||
} else {
|
||||
m->end = m->start;
|
||||
}
|
||||
return m;
|
||||
return new_match(pat, str, p ? p->end : str, p);
|
||||
}
|
||||
case BP_REF: {
|
||||
def_t *def = lookup(defs, pat->args.name.len, pat->args.name.name);
|
||||
def_t *def = lookup(defs, pat->args.ref.len, pat->args.ref.name);
|
||||
if (def == NULL)
|
||||
errx(EXIT_FAILURE, "Unknown identifier: '%.*s'", (int)pat->args.name.len, pat->args.name.name);
|
||||
errx(EXIT_FAILURE, "Unknown identifier: '%.*s'", (int)pat->args.ref.len, pat->args.ref.name);
|
||||
pat_t *ref = def->pat;
|
||||
|
||||
pat_t rec_op = {
|
||||
@ -483,21 +436,11 @@ static match_t *match(def_t *defs, file_t *f, const char *str, pat_t *pat, bool
|
||||
// does not affect correctness. It also helps with visualization of
|
||||
// match results.
|
||||
// OPTIMIZE: remove this if necessary
|
||||
match_t *m2 = new_match();
|
||||
m2->pat = pat;
|
||||
m2->start = m->start;
|
||||
m2->end = m->end;
|
||||
ADD_OWNER(m2->child, m);
|
||||
return m2;
|
||||
return new_match(pat, m->start, m->end, m);
|
||||
}
|
||||
case BP_BACKREF: {
|
||||
const char *end = match_backref(str, pat->args.backref, ignorecase);
|
||||
if (end == NULL) return NULL;
|
||||
match_t *m = new_match();
|
||||
m->pat = pat;
|
||||
m->start = str;
|
||||
m->end = end;
|
||||
return m;
|
||||
return end ? new_match(pat, str, end, NULL) : NULL;
|
||||
}
|
||||
case BP_NODENT: {
|
||||
if (*str != '\n') return NULL;
|
||||
@ -520,11 +463,7 @@ static match_t *match(def_t *defs, file_t *f, const char *str, pat_t *pat, bool
|
||||
if (str[i] != denter || &str[i] >= f->end) return NULL;
|
||||
}
|
||||
|
||||
match_t *m = new_match();
|
||||
m->start = start;
|
||||
m->end = &str[dents];
|
||||
m->pat = pat;
|
||||
return m;
|
||||
return new_match(pat, start, &str[dents], NULL);
|
||||
}
|
||||
default: {
|
||||
errx(EXIT_FAILURE, "Unknown pattern type: %d", pat->type);
|
||||
@ -587,7 +526,7 @@ match_t *get_capture(match_t *m, const char **id)
|
||||
//
|
||||
// Return a match object which can be used (may be allocated or recycled).
|
||||
//
|
||||
static match_t *new_match(void)
|
||||
static match_t *new_match(pat_t *pat, const char *start, const char *end, match_t *child)
|
||||
{
|
||||
match_t *m;
|
||||
|
||||
@ -611,6 +550,10 @@ static match_t *new_match(void)
|
||||
}
|
||||
#endif
|
||||
|
||||
m->pat = pat;
|
||||
m->start = start;
|
||||
m->end = end;
|
||||
if (child) ADD_OWNER(m->child, child);
|
||||
return m;
|
||||
}
|
||||
|
||||
|
32
pattern.c
32
pattern.c
@ -305,12 +305,12 @@ static pat_t *_bp_simplepattern(file_t *f, const char *str)
|
||||
|
||||
if (c == '{') { // Surround with `|` word boundaries
|
||||
pat_t *left = new_pat(f, start, start+1, -1, BP_REF);
|
||||
left->args.name.name = "|";
|
||||
left->args.name.len = 1;
|
||||
left->args.ref.name = "|";
|
||||
left->args.ref.len = 1;
|
||||
|
||||
pat_t *right = new_pat(f, str, str+1, -1, BP_REF);
|
||||
right->args.name.name = "|";
|
||||
right->args.name.len = 1;
|
||||
right->args.ref.name = "|";
|
||||
right->args.ref.len = 1;
|
||||
|
||||
pat = chain_together(f, left, chain_together(f, pat, right));
|
||||
}
|
||||
@ -442,15 +442,27 @@ static pat_t *_bp_simplepattern(file_t *f, const char *str)
|
||||
capture->args.capture.namelen = namelen;
|
||||
return capture;
|
||||
}
|
||||
// Start of file/line:
|
||||
case '^': {
|
||||
if (matchchar(&str, '^'))
|
||||
return new_pat(f, start, str, 0, BP_START_OF_FILE);
|
||||
return new_pat(f, start, str, 0, BP_START_OF_LINE);
|
||||
}
|
||||
// End of file/line:
|
||||
case '$': {
|
||||
if (matchchar(&str, '$'))
|
||||
return new_pat(f, start, str, 0, BP_END_OF_FILE);
|
||||
return new_pat(f, start, str, 0, BP_END_OF_LINE);
|
||||
}
|
||||
// Special rules:
|
||||
case '_': case '^': case '$': case '|': {
|
||||
case '_': case '|': {
|
||||
size_t namelen = 1;
|
||||
if (matchchar(&str, c)) // double __, ^^, $$
|
||||
if (c == '_' && matchchar(&str, c)) // double __, ^^, $$
|
||||
++namelen;
|
||||
if (matchchar(&str, ':')) return NULL; // Don't match definitions
|
||||
pat_t *ref = new_pat(f, start, str, -1, BP_REF);
|
||||
ref->args.name.name = start;
|
||||
ref->args.name.len = namelen;
|
||||
ref->args.ref.name = start;
|
||||
ref->args.ref.len = namelen;
|
||||
return ref;
|
||||
}
|
||||
default: {
|
||||
@ -462,8 +474,8 @@ static pat_t *_bp_simplepattern(file_t *f, const char *str)
|
||||
if (matchchar(&str, ':')) // Don't match definitions
|
||||
return NULL;
|
||||
pat_t *ref = new_pat(f, start, str, -1, BP_REF);
|
||||
ref->args.name.name = refname;
|
||||
ref->args.name.len = (size_t)(str - refname);
|
||||
ref->args.ref.name = refname;
|
||||
ref->args.ref.len = (size_t)(str - refname);
|
||||
return ref;
|
||||
}
|
||||
}
|
||||
|
6
types.h
6
types.h
@ -28,6 +28,10 @@ enum pattype_e {
|
||||
BP_REF,
|
||||
BP_BACKREF,
|
||||
BP_NODENT,
|
||||
BP_START_OF_FILE,
|
||||
BP_START_OF_LINE,
|
||||
BP_END_OF_FILE,
|
||||
BP_END_OF_LINE,
|
||||
BP_LEFTRECURSION,
|
||||
};
|
||||
|
||||
@ -46,7 +50,7 @@ typedef struct pat_s {
|
||||
struct {
|
||||
const char *name;
|
||||
size_t len;
|
||||
} name;
|
||||
} ref;
|
||||
struct {
|
||||
unsigned char low, high;
|
||||
} range;
|
||||
|
Loading…
Reference in New Issue
Block a user