aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--grammars/bp.bp12
-rw-r--r--grammars/builtins.bp4
-rw-r--r--match.c127
-rw-r--r--pattern.c32
-rw-r--r--types.h6
5 files changed, 68 insertions, 113 deletions
diff --git a/grammars/bp.bp b/grammars/bp.bp
index 6d98187..5e4dcc8 100644
--- a/grammars/bp.bp
+++ b/grammars/bp.bp
@@ -17,7 +17,7 @@ String-pattern: ..%(\n / Nodent / Escape / `\ pat [`;])$$
pat: simple-pat !(__("!="/"==")) / suffixed-pat
simple-pat: Upto-and / Dot / String / Chars / Nodent / Escape-range
/ Escape / Repeat / Optional / No / After / Before / Capture
- / Ref / parens
+ / Start-of-File / Start-of-Line / End-of-File / End-of-Line / Ref / parens
suffixed-pat: (
Eq-pat
@@ -63,6 +63,10 @@ Replace: (
@replace-pat=(Replace / Chain / pat) __ "=>" (__ @replacement=String / @!=(''=> "Expected replacement string"))
)
Ref: @name=id !(__`:)
+Start-of-File: "^^"
+Start-of-Line: "^"
+End-of-File: "$$"
+End-of-Line: "$"
parens: `( __ extended-pat (__ `) / @!=(''=> "Expected closing parenthesis here"))
@@ -73,11 +77,7 @@ extended-pat: Otherwise / Replace / Chain / pat
# Special-symbol rules:
_: *(` / \t)
__: *(` / \t / \r / \n / comment)
-$$: !(./\n)
-$: !.
-^^: !<(./\n)
-^: !<.
-id: "^^" / "^" / "__" / "_" / "$$" / "$" / "|" / `a-z,A-Z *`a-z,A-Z,0-9,-
+id: "__" / "_" / "|" / `a-z,A-Z *`a-z,A-Z,0-9,-
comment: `# .. $
diff --git a/grammars/builtins.bp b/grammars/builtins.bp
index 76bf4f5..354ac50 100644
--- a/grammars/builtins.bp
+++ b/grammars/builtins.bp
@@ -38,9 +38,5 @@ esc: \e
tab: \t
nl: \n; lf: \n
comment: !''; # No default definition, can be overridden
-$$: !(./\n)
-$: !.
-^^: !<(./\n)
-^: !<.
__: *(` /\t/\n/\r/comment)
_: *(` /\t)
diff --git a/match.c b/match.c
index dd9492f..02bb753 100644
--- a/match.c
+++ b/match.c
@@ -36,7 +36,7 @@ static match_t *in_use_matches = NULL;
#endif
__attribute__((returns_nonnull))
-static match_t *new_match(void);
+static match_t *new_match(pat_t *pat, const char *start, const char *end, match_t *child);
__attribute__((nonnull, pure))
static inline const char *next_char(file_t *f, const char *str);
__attribute__((nonnull))
@@ -172,34 +172,32 @@ static match_t *match(def_t *defs, file_t *f, const char *str, pat_t *pat, bool
}
}
case BP_ANYCHAR: {
- if (str >= f->end || *str == '\n')
- return NULL;
- match_t *m = new_match();
- m->pat = pat;
- m->start = str;
- m->end = next_char(f, str);
- return m;
+ return (str < f->end && *str != '\n') ? new_match(pat, str, next_char(f, str), NULL) : NULL;
+ }
+ case BP_START_OF_FILE: {
+ return (str == f->contents) ? new_match(pat, str, str, NULL) : NULL;
+ }
+ case BP_START_OF_LINE: {
+ return (str == f->contents || str[-1] == '\n') ? new_match(pat, str, str, NULL) : NULL;
+ }
+ case BP_END_OF_FILE: {
+ return (str == f->end) ? new_match(pat, str, str, NULL) : NULL;
+ }
+ case BP_END_OF_LINE: {
+ return (str == f->end || *str == '\n') ? new_match(pat, str, str, NULL) : NULL;
}
case BP_STRING: {
if (&str[pat->len] > f->end) return NULL;
if (ignorecase ? memicmp(str, pat->args.string, (size_t)pat->len) != 0
: memcmp(str, pat->args.string, (size_t)pat->len) != 0)
return NULL;
- match_t *m = new_match();
- m->pat = pat;
- m->start = str;
- m->end = str + pat->len;
- return m;
+ return new_match(pat, str, str + pat->len, NULL);
}
case BP_RANGE: {
if (str >= f->end) return NULL;
if ((unsigned char)*str < pat->args.range.low || (unsigned char)*str > pat->args.range.high)
return NULL;
- match_t *m = new_match();
- m->pat = pat;
- m->start = str;
- m->end = str + 1;
- return m;
+ return new_match(pat, str, str+1, NULL);
}
case BP_NOT: {
match_t *m = match(defs, f, str, pat->args.pat, ignorecase);
@@ -207,17 +205,10 @@ static match_t *match(def_t *defs, file_t *f, const char *str, pat_t *pat, bool
recycle_if_unused(&m);
return NULL;
}
- m = new_match();
- m->pat = pat;
- m->start = str;
- m->end = str;
- return m;
+ return new_match(pat, str, str, NULL);
}
case BP_UPTO: {
- match_t *m = new_match();
- m->start = str;
- m->pat = pat;
-
+ match_t *m = new_match(pat, str, str, NULL);
pat_t *target = pat->args.multiple.first, *skip = pat->args.multiple.second;
if (!target && !skip) {
while (str < f->end && *str != '\n') ++str;
@@ -258,11 +249,7 @@ static match_t *match(def_t *defs, file_t *f, const char *str, pat_t *pat, bool
return NULL;
}
case BP_REPEAT: {
- match_t *m = new_match();
- m->start = str;
- m->end = str;
- m->pat = pat;
-
+ match_t *m = new_match(pat, str, str, NULL);
match_t **dest = &m->child;
size_t reps = 0;
ssize_t max = pat->args.repetitions.max;
@@ -319,32 +306,17 @@ static match_t *match(def_t *defs, file_t *f, const char *str, pat_t *pat, bool
if (str - backtrack < f->contents) return NULL;
match_t *before = match(defs, f, str - backtrack, pat->args.pat, ignorecase);
if (before == NULL) return NULL;
- match_t *m = new_match();
- m->start = str;
- m->end = str;
- m->pat = pat;
- ADD_OWNER(m->child, before);
- return m;
+ return new_match(pat, str, str, before);
}
case BP_BEFORE: {
match_t *after = match(defs, f, str, pat->args.pat, ignorecase);
if (after == NULL) return NULL;
- match_t *m = new_match();
- m->start = str;
- m->end = str;
- m->pat = pat;
- ADD_OWNER(m->child, after);
- return m;
+ return new_match(pat, str, str, after);
}
case BP_CAPTURE: {
match_t *p = match(defs, f, str, pat->args.pat, ignorecase);
if (p == NULL) return NULL;
- match_t *m = new_match();
- m->start = str;
- m->end = p->end;
- m->pat = pat;
- ADD_OWNER(m->child, p);
- return m;
+ return new_match(pat, str, p->end, p);
}
case BP_OTHERWISE: {
match_t *m = match(defs, f, str, pat->args.multiple.first, ignorecase);
@@ -369,13 +341,8 @@ static match_t *match(def_t *defs, file_t *f, const char *str, pat_t *pat, bool
recycle_if_unused(&m1);
return NULL;
}
- match_t *m = new_match();
- m->start = str;
- m->end = m2->end;
- m->pat = pat;
- ADD_OWNER(m->child, m1);
ADD_OWNER(m1->nextsibling, m2);
- return m;
+ return new_match(pat, str, m2->end, m1);
}
case BP_EQUAL: case BP_NOT_EQUAL: {
match_t *m1 = match(defs, f, str, pat->args.multiple.first, ignorecase);
@@ -397,17 +364,12 @@ static match_t *match(def_t *defs, file_t *f, const char *str, pat_t *pat, bool
if (m2 != NULL) recycle_if_unused(&m2);
return NULL;
}
- match_t *m = new_match();
- m->start = m1->start;
- m->end = m1->end;
- m->pat = pat;
- ADD_OWNER(m->child, m1);
if (pat->type == BP_EQUAL) {
ADD_OWNER(m1->nextsibling, m2);
} else {
recycle_if_unused(&m2);
}
- return m;
+ return new_match(pat, m1->start, m1->end, m1);
}
case BP_REPLACE: {
match_t *p = NULL;
@@ -415,21 +377,12 @@ static match_t *match(def_t *defs, file_t *f, const char *str, pat_t *pat, bool
p = match(defs, f, str, pat->args.replace.pat, ignorecase);
if (p == NULL) return NULL;
}
- match_t *m = new_match();
- m->start = str;
- m->pat = pat;
- if (p) {
- ADD_OWNER(m->child, p);
- m->end = p->end;
- } else {
- m->end = m->start;
- }
- return m;
+ return new_match(pat, str, p ? p->end : str, p);
}
case BP_REF: {
- def_t *def = lookup(defs, pat->args.name.len, pat->args.name.name);
+ def_t *def = lookup(defs, pat->args.ref.len, pat->args.ref.name);
if (def == NULL)
- errx(EXIT_FAILURE, "Unknown identifier: '%.*s'", (int)pat->args.name.len, pat->args.name.name);
+ errx(EXIT_FAILURE, "Unknown identifier: '%.*s'", (int)pat->args.ref.len, pat->args.ref.name);
pat_t *ref = def->pat;
pat_t rec_op = {
@@ -483,21 +436,11 @@ static match_t *match(def_t *defs, file_t *f, const char *str, pat_t *pat, bool
// does not affect correctness. It also helps with visualization of
// match results.
// OPTIMIZE: remove this if necessary
- match_t *m2 = new_match();
- m2->pat = pat;
- m2->start = m->start;
- m2->end = m->end;
- ADD_OWNER(m2->child, m);
- return m2;
+ return new_match(pat, m->start, m->end, m);
}
case BP_BACKREF: {
const char *end = match_backref(str, pat->args.backref, ignorecase);
- if (end == NULL) return NULL;
- match_t *m = new_match();
- m->pat = pat;
- m->start = str;
- m->end = end;
- return m;
+ return end ? new_match(pat, str, end, NULL) : NULL;
}
case BP_NODENT: {
if (*str != '\n') return NULL;
@@ -520,11 +463,7 @@ static match_t *match(def_t *defs, file_t *f, const char *str, pat_t *pat, bool
if (str[i] != denter || &str[i] >= f->end) return NULL;
}
- match_t *m = new_match();
- m->start = start;
- m->end = &str[dents];
- m->pat = pat;
- return m;
+ return new_match(pat, start, &str[dents], NULL);
}
default: {
errx(EXIT_FAILURE, "Unknown pattern type: %d", pat->type);
@@ -587,7 +526,7 @@ match_t *get_capture(match_t *m, const char **id)
//
// Return a match object which can be used (may be allocated or recycled).
//
-static match_t *new_match(void)
+static match_t *new_match(pat_t *pat, const char *start, const char *end, match_t *child)
{
match_t *m;
@@ -611,6 +550,10 @@ static match_t *new_match(void)
}
#endif
+ m->pat = pat;
+ m->start = start;
+ m->end = end;
+ if (child) ADD_OWNER(m->child, child);
return m;
}
diff --git a/pattern.c b/pattern.c
index 1476e85..bb1b946 100644
--- a/pattern.c
+++ b/pattern.c
@@ -305,12 +305,12 @@ static pat_t *_bp_simplepattern(file_t *f, const char *str)
if (c == '{') { // Surround with `|` word boundaries
pat_t *left = new_pat(f, start, start+1, -1, BP_REF);
- left->args.name.name = "|";
- left->args.name.len = 1;
+ left->args.ref.name = "|";
+ left->args.ref.len = 1;
pat_t *right = new_pat(f, str, str+1, -1, BP_REF);
- right->args.name.name = "|";
- right->args.name.len = 1;
+ right->args.ref.name = "|";
+ right->args.ref.len = 1;
pat = chain_together(f, left, chain_together(f, pat, right));
}
@@ -442,15 +442,27 @@ static pat_t *_bp_simplepattern(file_t *f, const char *str)
capture->args.capture.namelen = namelen;
return capture;
}
+ // Start of file/line:
+ case '^': {
+ if (matchchar(&str, '^'))
+ return new_pat(f, start, str, 0, BP_START_OF_FILE);
+ return new_pat(f, start, str, 0, BP_START_OF_LINE);
+ }
+ // End of file/line:
+ case '$': {
+ if (matchchar(&str, '$'))
+ return new_pat(f, start, str, 0, BP_END_OF_FILE);
+ return new_pat(f, start, str, 0, BP_END_OF_LINE);
+ }
// Special rules:
- case '_': case '^': case '$': case '|': {
+ case '_': case '|': {
size_t namelen = 1;
- if (matchchar(&str, c)) // double __, ^^, $$
+ if (c == '_' && matchchar(&str, c)) // double __, ^^, $$
++namelen;
if (matchchar(&str, ':')) return NULL; // Don't match definitions
pat_t *ref = new_pat(f, start, str, -1, BP_REF);
- ref->args.name.name = start;
- ref->args.name.len = namelen;
+ ref->args.ref.name = start;
+ ref->args.ref.len = namelen;
return ref;
}
default: {
@@ -462,8 +474,8 @@ static pat_t *_bp_simplepattern(file_t *f, const char *str)
if (matchchar(&str, ':')) // Don't match definitions
return NULL;
pat_t *ref = new_pat(f, start, str, -1, BP_REF);
- ref->args.name.name = refname;
- ref->args.name.len = (size_t)(str - refname);
+ ref->args.ref.name = refname;
+ ref->args.ref.len = (size_t)(str - refname);
return ref;
}
}
diff --git a/types.h b/types.h
index 9cbc665..19c6829 100644
--- a/types.h
+++ b/types.h
@@ -28,6 +28,10 @@ enum pattype_e {
BP_REF,
BP_BACKREF,
BP_NODENT,
+ BP_START_OF_FILE,
+ BP_START_OF_LINE,
+ BP_END_OF_FILE,
+ BP_END_OF_LINE,
BP_LEFTRECURSION,
};
@@ -46,7 +50,7 @@ typedef struct pat_s {
struct {
const char *name;
size_t len;
- } name;
+ } ref;
struct {
unsigned char low, high;
} range;