From 907101b42159c3d2a8ee74540fb8e9259b36db7e Mon Sep 17 00:00:00 2001 From: Bruce Hill Date: Mon, 28 Sep 2020 16:35:22 -0700 Subject: Fixes for captures and backrefs, and added block comments --- compiler.c | 2 +- grammars/bpeg.bpeg | 7 ++++--- grammars/builtins.bpeg | 2 +- utils.c | 16 +++++++++++++++- vm.c | 35 +++++++++++++---------------------- 5 files changed, 34 insertions(+), 28 deletions(-) diff --git a/compiler.c b/compiler.c index 49c4996..1f2d9ff 100644 --- a/compiler.c +++ b/compiler.c @@ -267,7 +267,7 @@ vm_op_t *bpeg_simplepattern(file_t *f, const char *str) case '@': { op->op = VM_CAPTURE; const char *a = *str == '!' ? &str[1] : after_name(str); - if (a > str && *after_spaces(a) == '=') { + if (a > str && a[0] == '=' && a[1] != '>') { op->args.capture.name = strndup(str, (size_t)(a-str)); str = a + 1; } diff --git a/grammars/bpeg.bpeg b/grammars/bpeg.bpeg index 380bf9c..88dc6b1 100644 --- a/grammars/bpeg.bpeg +++ b/grammars/bpeg.bpeg @@ -49,7 +49,7 @@ Repeat: ( Optional: `[ __ extended-pat (__`] / @!={=> "Expected closing square bracket here"}) After: `< _ pat Before: `> _ pat -Capture: `@ [_ `[ @capture-name=(...>(`]/$$)) (`] / @!={=>"Expected closing bracket here"})] _ @capture=pat +Capture: `@ [_ @capture-name=(id/`!) !"=>" `=] _ (@capture=pat / @!={=> "Expected pattern to capture"}) Replace: `{ __ ( [@replace-pat=extended-pat __] "=>" [__ @replacement=String] ) __ (`} / @!={=> "Expected closing brace here"}) @@ -71,7 +71,8 @@ $: !. id: "^^" / "^" / "__" / "_" / "$$" / "$" / (`a-z/`A-Z) 0+(`a-z/`A-Z/`0-9/`-) -hash-comment: `# .. $ +line-comment: `# .. $ +block-comment: "#(" 0+(block-comment / !")#" .) ")#" # Note: comments are undefined by default in regular BPEG -comment: hash-comment +comment: block-comment / line-comment diff --git a/grammars/builtins.bpeg b/grammars/builtins.bpeg index 1633a30..c05b4c7 100644 --- a/grammars/builtins.bpeg +++ b/grammars/builtins.bpeg @@ -30,7 +30,7 @@ only-matches: ( (include-binary-files / is-text-file) define-highlights add-filename - 1+{...@hl-pattern=>'@1\n'} + 1+{...@hl-pattern =>'@1\n'} ) add-filename: [print-filenames (is-tty {=>"\033[33;1;4m@&:\033[0m\n"} / {=>"@&:\n"})] add-line-number: [print-line-numbers (is-tty {=>"\033[2m@#\033[5G|\033[0m "} / {=>"@#| "})] diff --git a/utils.c b/utils.c index 6b9af5c..012c1eb 100644 --- a/utils.c +++ b/utils.c @@ -9,6 +9,7 @@ */ const char *after_spaces(const char *str) { + int block_comment_depth = 0; // Skip whitespace and comments: skip_whitespace: switch (*str) { @@ -17,7 +18,20 @@ const char *after_spaces(const char *str) goto skip_whitespace; } case '#': { - while (*str && *str != '\n') ++str; + if (str[1] == '(') { + ++block_comment_depth; + for (str += 2; *str && block_comment_depth > 0; ++str) { + if (str[0] == '#' && str[1] == '(') { + ++block_comment_depth; + ++str; + } else if (str[0] == ')' && str[1] == '#') { + --block_comment_depth; + ++str; + } + } + } else { + while (*str && *str != '\n') ++str; + } goto skip_whitespace; } } diff --git a/vm.c b/vm.c index 5d749ec..1db4481 100644 --- a/vm.c +++ b/vm.c @@ -532,14 +532,14 @@ static match_t *get_cap(match_t *m, const char **r) if (isdigit(**r)) { int n = (int)strtol(*r, (char**)r, 10); return get_capture_n(m->child, &n); - } else if (**r == '[') { - char *closing = strchr(*r+1, ']'); - if (!closing) return NULL; - ++(*r); - char *name = strndup(*r, (size_t)(closing-*r)); + } else { + const char *end = after_name(*r); + if (end == *r) return NULL; + char *name = strndup(*r, (size_t)(end-*r)); match_t *cap = get_capture_named(m, name); free(name); - *r = closing + 1; + *r = end; + if (*end == ';') ++(*r); return cap; } return NULL; @@ -638,26 +638,17 @@ static match_t *match_backref(const char *str, vm_op_t *op, match_t *cap, unsign cap = get_capture_n(cap->child, &n); break; } - case '[': { - char *closing = strchr(r+1, ']'); - if (!closing) { - if (*(str++) != '@') { - destroy_match(&ret); - return NULL; - } - } - ++r; - char *name = strndup(r, (size_t)(closing-r)); - cap = get_capture_named(cap, name); - free(name); - r = closing + 1; - break; - } default: { - if (*(str++) != '@') { + const char *end = after_name(r); + if (end == r) { destroy_match(&ret); return NULL; } + char *name = strndup(r, (size_t)(end-r)); + cap = get_capture_named(cap, name); + free(name); + r = end; + if (*r == ';') ++r; break; } } -- cgit v1.2.3