diff options
| author | Bruce Hill <bruce@bruce-hill.com> | 2022-05-14 22:43:13 -0400 |
|---|---|---|
| committer | Bruce Hill <bruce@bruce-hill.com> | 2022-05-14 22:43:13 -0400 |
| commit | 23c64e386c7ea1d0054c37f945a6467dccdf2980 (patch) | |
| tree | da7b8d914f999f19188a1cda15d0cf43b5d8570a | |
| parent | 5fd2f6b8c594c6cbc1313efbcc28b53c15ba85d5 (diff) | |
Changed how tags work, changed Lua API for handling match captures
| -rw-r--r-- | Lua/lbp.c | 44 | ||||
| -rw-r--r-- | bp.1 | 8 | ||||
| -rw-r--r-- | bp.1.md | 4 | ||||
| -rw-r--r-- | grammars/builtins.bp | 2 | ||||
| -rw-r--r-- | pattern.c | 35 | ||||
| -rw-r--r-- | utils.c | 2 |
6 files changed, 51 insertions, 44 deletions
@@ -80,39 +80,45 @@ static void push_matchstring(lua_State *L, match_t *m) fclose(out); } -static void set_capture_fields(lua_State *L, match_t *m, int *n, const char *start) +static match_t *get_first_capture(match_t *m) { if (m->pat->type == BP_TAGGED) { - push_match(L, m, start); - lua_seti(L, -2, (*n)++); - } else if (m->pat->type == BP_CAPTURE) { + return m; + } else if (m->pat->type == BP_CAPTURE && !m->pat->args.capture.name) { + return m; + } else if (m->children) { + for (int i = 0; m->children[i]; i++) { + match_t *cap = get_first_capture(m->children[i]); + if (cap) return cap; + } + } + return NULL; +} + +static void set_capture_fields(lua_State *L, match_t *m, int *n, const char *start) +{ + if (m->pat->type == BP_CAPTURE) { + match_t *cap = get_first_capture(m->children[0]); + if (!cap) cap = m->children[0]; if (m->pat->args.capture.namelen > 0) { lua_pushlstring(L, m->pat->args.capture.name, m->pat->args.capture.namelen); - push_match(L, m->children[0], start); + push_match(L, cap, start); lua_settable(L, -3); } else { - push_match(L, m->children[0], start); + push_match(L, cap, start); lua_seti(L, -2, (*n)++); } + } else if (m->pat->type == BP_TAGGED) { + push_match(L, m, start); + lua_seti(L, -2, (*n)++); } else if (m->children) { - for (int i = 0; m->children[i]; i++) { - if (m->children[i]->pat->type == BP_TAGGED) { - push_match(L, m->children[i], start); - lua_seti(L, -2, (*n)++); - } else { - set_capture_fields(L, m->children[i], n, start); - } - } + for (int i = 0; m->children[i]; i++) + set_capture_fields(L, m->children[i], n, start); } } static void push_match(lua_State *L, match_t *m, const char *start) { - // Given tag::id, - // Case 1: (@id _ tag _ @id) -> {[0]="foo baz qux", 1={[0]="foo", __tag="tag"}, 2={[0]="qux"}} - // Case 2: (@id _ @tag _ @id) -> {[0]="foo baz qux", 1={[0]="foo"}, 2={[0]="baz", 1={[0]="baz", __tag="tag"}}, 3={[0]="qux"}} - // Case 3: (@id @(_tag_) @id) -> {[0]="foo baz qux", 1={[0]="foo"}, 2={[0]=" baz ", 1={[0]="baz", __tag="tag"}}, 3={[0]="qux"}} - // Case 4: (@first=id _ @second=tag _ @third=id) -> {[0]="foo baz qux", first={[0]="foo"}, second={[0]="baz", 1={[0]="baz", __tag="tag"}}, third={[0]="qux"}} lua_createtable(L, 1, 2); lua_pushlightuserdata(L, (void*)&MATCH_METATABLE); lua_gettable(L, LUA_REGISTRYINDEX); @@ -355,13 +355,13 @@ not contain the word \f[B]\[lq]IGNORE\[rq]\f[R]) \f[I]name\f[R]\f[B]:\f[R] \f[I]pat\f[R] Define \f[I]name\f[R] to mean \f[I]pat\f[R] (pattern definition) .TP -\f[B]:\f[R]\f[I]name\f[R] \f[I]pat\f[R] +\f[B]\[at]:\f[R]\f[I]name\f[R] \f[B]=\f[R] \f[I]pat\f[R] Match \f[I]pat\f[R] and tag it with the given name as metadata. .TP \f[I]name\f[R]\f[B]::\f[R] \f[I]pat\f[R] -Syntactic sugar for \f[I]name\f[R]\f[B]::\f[R] \f[B]:\f[R]\f[I]name\f[R] -\f[I]pat\f[R] (define a pattern that also attaches a metadata tag of the -same name) +Syntactic sugar for \f[I]name\f[R]\f[B]:\f[R] +\f[B]\[at]:\f[R]\f[I]name\f[R]\f[B]=\f[R]\f[I]pat\f[R] (define a pattern +that also attaches a metadata tag of the same name) .TP \f[B]#\f[R] \f[I]comment\f[R] A line comment @@ -291,11 +291,11 @@ contain the word **"IGNORE"**) *name*`:` *pat* : Define *name* to mean *pat* (pattern definition) -`:`*name* *pat* +`@:`*name* `=` *pat* : Match *pat* and tag it with the given name as metadata. *name*`::` *pat* -: Syntactic sugar for *name*`::` `:`*name* *pat* (define a pattern that also +: Syntactic sugar for *name*`:` `@:`*name*`=`*pat* (define a pattern that also attaches a metadata tag of the same name) `#` *comment* diff --git a/grammars/builtins.bp b/grammars/builtins.bp index 16df6e6..b846f3b 100644 --- a/grammars/builtins.bp +++ b/grammars/builtins.bp @@ -14,7 +14,7 @@ string-escape: `\ (`x 2 Hex / 1-3 `0-7 / `u 1-4 Hex / .) id: \I *\i var: \I *\i keyword: !"" # No keywords defined by default -word: | +\i +word: \b +\i HEX: `0-9,A-F Hex: `0-9,a-f,A-F hex: `0-9,a-f @@ -475,6 +475,24 @@ static pat_t *_bp_simplepattern(const char *str, const char *end, bool inside_st } // Capture case '@': { + if (matchchar(&str, ':', false, end)) { // Tagged capture @:Foo=pat + const char *name = str; + str = after_name(name, end); + if (str <= name) + parse_err(start, str, "There should be an identifier after this '@:'"); + size_t namelen = (size_t)(str - name); + pat_t *p = NULL; + if (matchchar(&str, '=', false, end)) { + p = bp_simplepattern(str, end); + if (p) str = p->end; + } + pat_t *tagged = new_pat(BP_TAGGED, start, str, p ? p->min_matchlen : 0, p ? p->max_matchlen : 0); + tagged->args.capture.capture_pat = p; + tagged->args.capture.name = name; + tagged->args.capture.namelen = namelen; + return tagged; + } + const char *name = NULL; size_t namelen = 0; const char *a = after_name(str, end); @@ -513,23 +531,6 @@ static pat_t *_bp_simplepattern(const char *str, const char *end, bool inside_st return new_pat(BP_END_OF_FILE, start, ++str, 0, 0); return new_pat(BP_END_OF_LINE, start, str, 0, 0); } - // Tagged pattern :Tag:pat... - case ':': { - const char *name = str; - str = after_name(name, end); - if (str == name) - parse_err(start, str, "There should be an identifier after this ':'"); - size_t namelen = (size_t)(str - name); - (void)matchchar(&str, ':', false, end); // Optional second colon for :Tag:foo instead of :Tag(foo) - - pat_t *p = bp_simplepattern(str, end); - if (p) str = p->end; - pat_t *tagged = new_pat(BP_TAGGED, start, str, p ? p->min_matchlen : 0, p ? p->max_matchlen : 0); - tagged->args.capture.capture_pat = p; - tagged->args.capture.name = start; - tagged->args.capture.namelen = namelen; - return tagged; - } default: { pat_t *def = _bp_definition(start, end); if (def) return def; @@ -43,7 +43,7 @@ const char *after_spaces(const char *str, bool skip_nl, const char *end) // const char *after_name(const char *str, const char *end) { - if (str >= end) return NULL; + if (str >= end) return end; if (*str == '|') return &str[1]; if (*str == '^' || *str == '_' || *str == '$') { return (&str[1] < end && str[1] == *str) ? &str[2] : &str[1]; |
