Changed how tags work, changed Lua API for handling match captures

This commit is contained in:
Bruce Hill 2022-05-14 22:43:13 -04:00
parent 5fd2f6b8c5
commit 23c64e386c
6 changed files with 55 additions and 48 deletions

View File

@ -80,39 +80,45 @@ static void push_matchstring(lua_State *L, match_t *m)
fclose(out);
}
static void set_capture_fields(lua_State *L, match_t *m, int *n, const char *start)
static match_t *get_first_capture(match_t *m)
{
if (m->pat->type == BP_TAGGED) {
push_match(L, m, start);
lua_seti(L, -2, (*n)++);
} else if (m->pat->type == BP_CAPTURE) {
if (m->pat->args.capture.namelen > 0) {
lua_pushlstring(L, m->pat->args.capture.name, m->pat->args.capture.namelen);
push_match(L, m->children[0], start);
lua_settable(L, -3);
} else {
push_match(L, m->children[0], start);
lua_seti(L, -2, (*n)++);
}
return m;
} else if (m->pat->type == BP_CAPTURE && !m->pat->args.capture.name) {
return m;
} else if (m->children) {
for (int i = 0; m->children[i]; i++) {
if (m->children[i]->pat->type == BP_TAGGED) {
push_match(L, m->children[i], start);
lua_seti(L, -2, (*n)++);
} else {
set_capture_fields(L, m->children[i], n, start);
}
match_t *cap = get_first_capture(m->children[i]);
if (cap) return cap;
}
}
return NULL;
}
static void set_capture_fields(lua_State *L, match_t *m, int *n, const char *start)
{
if (m->pat->type == BP_CAPTURE) {
match_t *cap = get_first_capture(m->children[0]);
if (!cap) cap = m->children[0];
if (m->pat->args.capture.namelen > 0) {
lua_pushlstring(L, m->pat->args.capture.name, m->pat->args.capture.namelen);
push_match(L, cap, start);
lua_settable(L, -3);
} else {
push_match(L, cap, start);
lua_seti(L, -2, (*n)++);
}
} else if (m->pat->type == BP_TAGGED) {
push_match(L, m, start);
lua_seti(L, -2, (*n)++);
} else if (m->children) {
for (int i = 0; m->children[i]; i++)
set_capture_fields(L, m->children[i], n, start);
}
}
static void push_match(lua_State *L, match_t *m, const char *start)
{
// Given tag::id,
// Case 1: (@id _ tag _ @id) -> {[0]="foo baz qux", 1={[0]="foo", __tag="tag"}, 2={[0]="qux"}}
// Case 2: (@id _ @tag _ @id) -> {[0]="foo baz qux", 1={[0]="foo"}, 2={[0]="baz", 1={[0]="baz", __tag="tag"}}, 3={[0]="qux"}}
// Case 3: (@id @(_tag_) @id) -> {[0]="foo baz qux", 1={[0]="foo"}, 2={[0]=" baz ", 1={[0]="baz", __tag="tag"}}, 3={[0]="qux"}}
// Case 4: (@first=id _ @second=tag _ @third=id) -> {[0]="foo baz qux", first={[0]="foo"}, second={[0]="baz", 1={[0]="baz", __tag="tag"}}, third={[0]="qux"}}
lua_createtable(L, 1, 2);
lua_pushlightuserdata(L, (void*)&MATCH_METATABLE);
lua_gettable(L, LUA_REGISTRYINDEX);

8
bp.1
View File

@ -355,13 +355,13 @@ not contain the word \f[B]\[lq]IGNORE\[rq]\f[R])
\f[I]name\f[R]\f[B]:\f[R] \f[I]pat\f[R]
Define \f[I]name\f[R] to mean \f[I]pat\f[R] (pattern definition)
.TP
\f[B]:\f[R]\f[I]name\f[R] \f[I]pat\f[R]
\f[B]\[at]:\f[R]\f[I]name\f[R] \f[B]=\f[R] \f[I]pat\f[R]
Match \f[I]pat\f[R] and tag it with the given name as metadata.
.TP
\f[I]name\f[R]\f[B]::\f[R] \f[I]pat\f[R]
Syntactic sugar for \f[I]name\f[R]\f[B]::\f[R] \f[B]:\f[R]\f[I]name\f[R]
\f[I]pat\f[R] (define a pattern that also attaches a metadata tag of the
same name)
Syntactic sugar for \f[I]name\f[R]\f[B]:\f[R]
\f[B]\[at]:\f[R]\f[I]name\f[R]\f[B]=\f[R]\f[I]pat\f[R] (define a pattern
that also attaches a metadata tag of the same name)
.TP
\f[B]#\f[R] \f[I]comment\f[R]
A line comment

View File

@ -291,11 +291,11 @@ contain the word **"IGNORE"**)
*name*`:` *pat*
: Define *name* to mean *pat* (pattern definition)
`:`*name* *pat*
`@:`*name* `=` *pat*
: Match *pat* and tag it with the given name as metadata.
*name*`::` *pat*
: Syntactic sugar for *name*`::` `:`*name* *pat* (define a pattern that also
: Syntactic sugar for *name*`:` `@:`*name*`=`*pat* (define a pattern that also
attaches a metadata tag of the same name)
`#` *comment*

View File

@ -14,7 +14,7 @@ string-escape: `\ (`x 2 Hex / 1-3 `0-7 / `u 1-4 Hex / .)
id: \I *\i
var: \I *\i
keyword: !"" # No keywords defined by default
word: | +\i
word: \b +\i
HEX: `0-9,A-F
Hex: `0-9,a-f,A-F
hex: `0-9,a-f

View File

@ -475,6 +475,24 @@ static pat_t *_bp_simplepattern(const char *str, const char *end, bool inside_st
}
// Capture
case '@': {
if (matchchar(&str, ':', false, end)) { // Tagged capture @:Foo=pat
const char *name = str;
str = after_name(name, end);
if (str <= name)
parse_err(start, str, "There should be an identifier after this '@:'");
size_t namelen = (size_t)(str - name);
pat_t *p = NULL;
if (matchchar(&str, '=', false, end)) {
p = bp_simplepattern(str, end);
if (p) str = p->end;
}
pat_t *tagged = new_pat(BP_TAGGED, start, str, p ? p->min_matchlen : 0, p ? p->max_matchlen : 0);
tagged->args.capture.capture_pat = p;
tagged->args.capture.name = name;
tagged->args.capture.namelen = namelen;
return tagged;
}
const char *name = NULL;
size_t namelen = 0;
const char *a = after_name(str, end);
@ -513,23 +531,6 @@ static pat_t *_bp_simplepattern(const char *str, const char *end, bool inside_st
return new_pat(BP_END_OF_FILE, start, ++str, 0, 0);
return new_pat(BP_END_OF_LINE, start, str, 0, 0);
}
// Tagged pattern :Tag:pat...
case ':': {
const char *name = str;
str = after_name(name, end);
if (str == name)
parse_err(start, str, "There should be an identifier after this ':'");
size_t namelen = (size_t)(str - name);
(void)matchchar(&str, ':', false, end); // Optional second colon for :Tag:foo instead of :Tag(foo)
pat_t *p = bp_simplepattern(str, end);
if (p) str = p->end;
pat_t *tagged = new_pat(BP_TAGGED, start, str, p ? p->min_matchlen : 0, p ? p->max_matchlen : 0);
tagged->args.capture.capture_pat = p;
tagged->args.capture.name = start;
tagged->args.capture.namelen = namelen;
return tagged;
}
default: {
pat_t *def = _bp_definition(start, end);
if (def) return def;

View File

@ -43,7 +43,7 @@ const char *after_spaces(const char *str, bool skip_nl, const char *end)
//
const char *after_name(const char *str, const char *end)
{
if (str >= end) return NULL;
if (str >= end) return end;
if (*str == '|') return &str[1];
if (*str == '^' || *str == '_' || *str == '$') {
return (&str[1] < end && str[1] == *str) ? &str[2] : &str[1];