Fixes for captures and backrefs, and added block comments

This commit is contained in:
Bruce Hill 2020-09-28 16:35:22 -07:00
parent 5049bd7cad
commit 907101b421
5 changed files with 34 additions and 28 deletions

View File

@ -267,7 +267,7 @@ vm_op_t *bpeg_simplepattern(file_t *f, const char *str)
case '@': {
op->op = VM_CAPTURE;
const char *a = *str == '!' ? &str[1] : after_name(str);
if (a > str && *after_spaces(a) == '=') {
if (a > str && a[0] == '=' && a[1] != '>') {
op->args.capture.name = strndup(str, (size_t)(a-str));
str = a + 1;
}

View File

@ -49,7 +49,7 @@ Repeat: (
Optional: `[ __ extended-pat (__`] / @!={=> "Expected closing square bracket here"})
After: `< _ pat
Before: `> _ pat
Capture: `@ [_ `[ @capture-name=(...>(`]/$$)) (`] / @!={=>"Expected closing bracket here"})] _ @capture=pat
Capture: `@ [_ @capture-name=(id/`!) !"=>" `=] _ (@capture=pat / @!={=> "Expected pattern to capture"})
Replace: `{ __ (
[@replace-pat=extended-pat __] "=>" [__ @replacement=String]
) __ (`} / @!={=> "Expected closing brace here"})
@ -71,7 +71,8 @@ $: !.
id: "^^" / "^" / "__" / "_" / "$$" / "$" / (`a-z/`A-Z) 0+(`a-z/`A-Z/`0-9/`-)
hash-comment: `# .. $
line-comment: `# .. $
block-comment: "#(" 0+(block-comment / !")#" .) ")#"
# Note: comments are undefined by default in regular BPEG
comment: hash-comment
comment: block-comment / line-comment

View File

@ -30,7 +30,7 @@ only-matches: (
(include-binary-files / is-text-file)
define-highlights
add-filename
1+{...@hl-pattern=>'@1\n'}
1+{...@hl-pattern =>'@1\n'}
)
add-filename: [print-filenames (is-tty {=>"\033[33;1;4m@&:\033[0m\n"} / {=>"@&:\n"})]
add-line-number: [print-line-numbers (is-tty {=>"\033[2m@#\033[5G|\033[0m "} / {=>"@#| "})]

16
utils.c
View File

@ -9,6 +9,7 @@
*/
const char *after_spaces(const char *str)
{
int block_comment_depth = 0;
// Skip whitespace and comments:
skip_whitespace:
switch (*str) {
@ -17,7 +18,20 @@ const char *after_spaces(const char *str)
goto skip_whitespace;
}
case '#': {
while (*str && *str != '\n') ++str;
if (str[1] == '(') {
++block_comment_depth;
for (str += 2; *str && block_comment_depth > 0; ++str) {
if (str[0] == '#' && str[1] == '(') {
++block_comment_depth;
++str;
} else if (str[0] == ')' && str[1] == '#') {
--block_comment_depth;
++str;
}
}
} else {
while (*str && *str != '\n') ++str;
}
goto skip_whitespace;
}
}

35
vm.c
View File

@ -532,14 +532,14 @@ static match_t *get_cap(match_t *m, const char **r)
if (isdigit(**r)) {
int n = (int)strtol(*r, (char**)r, 10);
return get_capture_n(m->child, &n);
} else if (**r == '[') {
char *closing = strchr(*r+1, ']');
if (!closing) return NULL;
++(*r);
char *name = strndup(*r, (size_t)(closing-*r));
} else {
const char *end = after_name(*r);
if (end == *r) return NULL;
char *name = strndup(*r, (size_t)(end-*r));
match_t *cap = get_capture_named(m, name);
free(name);
*r = closing + 1;
*r = end;
if (*end == ';') ++(*r);
return cap;
}
return NULL;
@ -638,26 +638,17 @@ static match_t *match_backref(const char *str, vm_op_t *op, match_t *cap, unsign
cap = get_capture_n(cap->child, &n);
break;
}
case '[': {
char *closing = strchr(r+1, ']');
if (!closing) {
if (*(str++) != '@') {
destroy_match(&ret);
return NULL;
}
}
++r;
char *name = strndup(r, (size_t)(closing-r));
cap = get_capture_named(cap, name);
free(name);
r = closing + 1;
break;
}
default: {
if (*(str++) != '@') {
const char *end = after_name(r);
if (end == r) {
destroy_match(&ret);
return NULL;
}
char *name = strndup(r, (size_t)(end-r));
cap = get_capture_named(cap, name);
free(name);
r = end;
if (*r == ';') ++r;
break;
}
}