From a07da1989d7ec515d4bb08051ea6940960577446 Mon Sep 17 00:00:00 2001 From: Bruce Hill Date: Wed, 30 Dec 2020 15:30:19 -0800 Subject: Simplified syntax for replacement: just =>, no need for {} --- README.md | 5 ++- bp.1 | 2 +- compiler.c | 94 +++++++++++++++++++++------------------------------- grammars/bpeg.bp | 37 +++++++++++---------- grammars/builtins.bp | 4 +-- utils.c | 14 ++++++++ utils.h | 2 ++ 7 files changed, 77 insertions(+), 81 deletions(-) diff --git a/README.md b/README.md index 784296a..cd290e1 100644 --- a/README.md +++ b/README.md @@ -60,9 +60,8 @@ Pattern | Meaning `>pat` | `pat` matches just in front of the current position (lookahead) `@pat` | Capture `pat` (used for text replacement and backreferences) `@foo=pat` | Let `foo` be the text of `pat` (used for text replacement and backreferences) -`{pat => "replacement"}` | Match `pat` and replace it with `replacement` -`{pat @other => "@1"}` | Match `pat` followed by `other` and replace it with the text of `other` -`{pat @keep=other => "@keep"}` | Match `pat` followed by `other` and replace it with the text of `other` +`pat => "replacement"` | Match `pat` and replace it with `replacement` +`(pat1 @keep=pat2) => "@keep"` | Match `pat1` followed by `pat2` and replace it with the text of `pat2` `pat1==pat2` | `pat1`, assuming `pat2` also matches with the same length `pat1!=pat2` | `pat1`, unless `pat2` also matches with the same length `#( block comment )#` | A block comment diff --git a/bp.1 b/bp.1 index 2af9967..1dfe7e0 100644 --- a/bp.1 +++ b/bp.1 @@ -168,7 +168,7 @@ E.g. \fB`"..`" % (`\\.) .B @\fI\fB=\fI\fR \fBLet-\fI\fB-equal-\fI\fR (named capture) -.B {\fI\fB => "\fI\fB"} +.B \fI\fB => "\fI\fB" \fBReplace-\fI\fB-with-\fI\fR. Note: \fI\fR should be a string, and it may contain references to captured values: \fB@0\fR (the whole of \fI\fR), \fB@1\fR (the first capture in \fI\fR), diff --git a/compiler.c b/compiler.c index 937407d..2ad0e32 100644 --- a/compiler.c +++ b/compiler.c @@ -322,7 +322,8 @@ vm_op_t *bpeg_simplepattern(file_t *f, const char *str) break; } // Parentheses - case '(': { + case '(': case '{': { + char closing = c == '(' ? ')' : '}'; free(op); op = bpeg_simplepattern(f, str); if (!op) @@ -330,7 +331,7 @@ vm_op_t *bpeg_simplepattern(file_t *f, const char *str) op = expand_choices(f, op); str = op->end; str = after_spaces(str); - if (!matchchar(&str, ')')) + if (!matchchar(&str, closing)) file_err(f, origin, str, "This parenthesis group isn't properly closed."); op->start = origin; op->end = str; @@ -394,57 +395,6 @@ vm_op_t *bpeg_simplepattern(file_t *f, const char *str) op->args.pat = pat; break; } - // Replacement - case '{': { - str = after_spaces(str); - vm_op_t *pat = NULL; - if (strncmp(str, "=>", 2) == 0) { - str += strlen("=>"); - } else { - pat = bpeg_simplepattern(f, str); - if (!pat) - file_err(f, str, str, "There should be a valid pattern inside this replacement."); - pat = expand_choices(f, pat); - str = pat->end; - str = after_spaces(str); - if (!(matchchar(&str, '=') && matchchar(&str, '>'))) - file_err(f, str, str, "There should be a '=>' after a pattern inside a replacement."); - } - str = after_spaces(str); - - char quote = *str; - const char *replacement; - size_t replace_len; - if (matchchar(&str, '}')) { - replacement = strdup(""); - replace_len = 0; - } else { - if (!(matchchar(&str, '"') || matchchar(&str, '\''))) - file_err(f, str, str, "There should be a string literal as a replacement here."); - const char *repstr = str; - for (; *str && *str != quote; str++) { - if (*str == '\\') { - if (!str[1] || str[1] == '\n') - file_err(f, str, str+1, - "There should be an escape sequence after this backslash."); - ++str; - } - } - replace_len = (size_t)(str-repstr); - replacement = xcalloc(sizeof(char), replace_len+1); - memcpy((void*)replacement, repstr, (size_t)(str-repstr)); - if (!matchchar(&str, quote)) - file_err(f, &repstr[-1], str, "This string doesn't have a closing quote."); - if (!matchchar(&str, '}')) - file_err(f, origin, str, "This replacement doesn't have a closing '}'"); - } - op->op = VM_REPLACE; - op->args.replace.pat = pat; - op->args.replace.text = replacement; - op->args.replace.len = replace_len; - if (pat != NULL) op->len = pat->len; - break; - } // Special rules: case '_': case '^': case '$': case '|': { if (matchchar(&str, c)) { // double __, ^^, $$ @@ -487,11 +437,41 @@ vm_op_t *bpeg_simplepattern(file_t *f, const char *str) // Postfix operators: postfix: - if (str >= f->end) return op; str = after_spaces(str); - if ((str[0] == '=' || str[0] == '!') && str[1] == '=') { // Equality == and inequality != - int equal = str[0] == '='; - str = after_spaces(str+2); + if (str+2 < f->end && matchstr(&str, "=>")) { // Replacement => + str = after_spaces(str); + char quote = *str; + if (!(matchchar(&str, '"') || matchchar(&str, '\''))) + file_err(f, str, str, "There should be a string literal as a replacement here."); + const char *repstr = str; + for (; *str && *str != quote; str++) { + if (*str == '\\') { + if (!str[1] || str[1] == '\n') + file_err(f, str, str+1, + "There should be an escape sequence after this backslash."); + ++str; + } + } + if (!matchchar(&str, quote)) + file_err(f, &repstr[-1], str, "This string doesn't have a closing quote."); + + size_t replace_len = (size_t)(str-repstr-1); + const char *replacement = xcalloc(sizeof(char), replace_len+1); + memcpy((void*)replacement, repstr, replace_len); + + vm_op_t *pat = op; + op = new(vm_op_t); + op->op = VM_REPLACE; + op->args.replace.pat = pat; + op->args.replace.text = replacement; + op->args.replace.len = replace_len; + op->len = pat->len; + op->start = pat->start; + op->end = str; + goto postfix; + + } else if (str+2 < f->end && (matchstr(&str, "!=") || matchstr(&str, "=="))) { // Equality == and inequality != + int equal = str[-2] == '='; vm_op_t *first = op; vm_op_t *second = bpeg_simplepattern(f, str); if (!second) diff --git a/grammars/bpeg.bp b/grammars/bpeg.bp index c9422a8..c7960dd 100644 --- a/grammars/bpeg.bp +++ b/grammars/bpeg.bp @@ -1,22 +1,23 @@ # This is a file defining the BPEG grammar using BPEG syntax -Grammar: __ *(Def [__`;])%__ __ ($$ / @!={... => "Could not parse this code"}) +Grammar: __ *(Def [__`;])%__ __ ($$ / @!=(... => "Could not parse this code")) Def: @name=id _ `: __ ( @definition=extended-pat - / $$ @!={=>"No definition for rule"} - / @!={...>(`;/id_`:/$) => "Invalid definition: @0"}) + / $$ @!=(''=>"No definition for rule") + / @!=(...>(`;/id_`:/$) => "Invalid definition: @0")) # This is used for command line arguments: String-pattern: ... % (Nodent / Escape / `\ pat [`;]) -pat: simple-pat !(__("!="/"==")) / suffixed-pat +pat: simple-pat !(__("!="/"=="/"=>")) / suffixed-pat simple-pat: Upto-and / Dot / String / Chars / Nodent / Escape-range - / Escape / Repeat / Optional / After / Before / Capture / Replace + / Escape / Repeat / Optional / After / Before / Capture / Ref / parens suffixed-pat: ( Eq-pat / Not-eq-pat + / Replace ) Eq-pat: @first=pat__"=="__@second=pat @@ -24,23 +25,23 @@ Not-eq-pat: @first=pat__"!="__@second=pat Dot: `. !`. String: ( - `" @s=*(Escape / !`".) (`" / @!={=> "Expected closing quote here"}) - / `' @s=*(Escape / !`'.) (`' / @!={=> "Expected closing quote here"}) + `" @s=*(Escape / !`".) (`" / @!=(''=> "Expected closing quote here")) + / `' @s=*(Escape / !`'.) (`' / @!=(''=> "Expected closing quote here")) ) Chars: `` @+(Char/Char-range) % `, -Char-range: @low=. `- (@high=. / @!={=> "Expected a second character to form a character range"}) -Char: (@s=. / @!={=> "Expected a character following the '`'"}) +Char-range: @low=. `- (@high=. / @!=(''=> "Expected a second character to form a character range")) +Char: (@s=. / @!=(''=> "Expected a character following the '`'")) Escape-range: `\ @low=escape-sequence `- @high=escape-sequence Escape: `\ (@s=escape-sequence - / $ @!={=>"Backslashes are used for escape sequences, not splitting lines"} - / @!={. *(Abc/`0-9) => "Invalid escape sequence: '@0'"} + / $ @!=(''=>"Backslashes are used for escape sequences, not splitting lines") + / @!=((. *(Abc/`0-9)) => "Invalid escape sequence: '@0'") ) escape-sequence: ( `n,t,r,e,b,a,v / 1-3 `0-7 / `x 2 `0-9,a-f,A-F ) -No: `! (_@pat / @!={=>"Expected a pattern after the exclamation mark"}) +No: `! (_@pat / @!=(''=>"Expected a pattern after the exclamation mark")) Nodent: `\ `N Upto-and: 2-3`. [_@first=simple-pat] [__`%__@second=simple-pat] Repeat: ( @@ -49,16 +50,16 @@ Repeat: ( / @min=int _ `+ @max='' / @min=@max=int ) __ @repeat-pat=pat [__`%__@sep=pat] -Optional: `[ __ extended-pat (__`] / @!={=> "Expected closing square bracket here"}) +Optional: `[ __ extended-pat (__`] / @!=(''=> "Expected closing square bracket here")) After: `< _ pat Before: `> _ pat -Capture: `@ [_ @capture-name=(id/`!) __ !"=>" `=] __ (@capture=pat / @!={=> "Expected pattern to capture"}) -Replace: `{ __ ( - [@replace-pat=extended-pat __] "=>" [__ @replacement=String] - ) __ (`} / @!={=> "Expected closing brace here"}) +Capture: `@ [_ @capture-name=(id/`!) __ !"=>" `=] __ (@capture=pat / @!=(''=> "Expected pattern to capture")) +Replace: ( + @replace-pat=pat __ "=>" (__ @replacement=String / @!=(''=> "Expected replacement string")) + ) Ref: @name=id !(_`:) -parens: `( __ extended-pat (__ `) / @!={=> "Expected closing parenthesis here"}) +parens: `( __ extended-pat (__ `) / @!=(''=> "Expected closing parenthesis here")) Chain: 2+@pat%__ Otherwise: 2+@(Chain/pat)%(__`/__) diff --git a/grammars/builtins.bp b/grammars/builtins.bp index f69f988..9c34fe4 100644 --- a/grammars/builtins.bp +++ b/grammars/builtins.bp @@ -17,11 +17,11 @@ find-all: ( (include-binary-files / is-text-file) *(!..pattern ~(..\n)) +(+(..@pattern) ..(\n/$$) / ~(..\n)) - [{!<\n => "\n"}] + [!<\n => "\n"] ) only-matches: ( (include-binary-files / is-text-file) - +{...@pattern =>'@1\n'} + +(...@pattern =>'@1\n') ) # Helper definitions (commonly used) diff --git a/utils.c b/utils.c index 53bc3fc..c2473d2 100644 --- a/utils.c +++ b/utils.c @@ -74,6 +74,20 @@ int matchchar(const char **str, char c) } } +/* + * Check if a string is found and if so, move past it. + */ +int matchstr(const char **str, const char *target) +{ + const char *next = after_spaces(*str); + if (strncmp(next, target, strlen(target)) == 0) { + *str = &next[strlen(target)]; + return 1; + } else { + return 0; + } +} + /* * Process a string escape sequence for a character and return the * character that was escaped. diff --git a/utils.h b/utils.h index 2912426..83659d9 100644 --- a/utils.h +++ b/utils.h @@ -27,6 +27,8 @@ const char *after_spaces(const char *str); __attribute__((nonnull)) int matchchar(const char **str, char c); __attribute__((nonnull)) +int matchstr(const char **str, const char *target); +__attribute__((nonnull)) size_t unescape_string(char *dest, const char *src, size_t bufsize); void *memcheck(void *p); int memicmp(const void *s1, const void *s2, size_t n); -- cgit v1.2.3