From a07da1989d7ec515d4bb08051ea6940960577446 Mon Sep 17 00:00:00 2001
From: Bruce Hill <bruce@bruce-hill.com>
Date: Wed, 30 Dec 2020 15:30:19 -0800
Subject: Simplified syntax for replacement: just =>, no need for {}

---
 README.md            |  5 ++-
 bp.1                 |  2 +-
 compiler.c           | 94 +++++++++++++++++++++-------------------------------
 grammars/bpeg.bp     | 37 +++++++++++----------
 grammars/builtins.bp |  4 +--
 utils.c              | 14 ++++++++
 utils.h              |  2 ++
 7 files changed, 77 insertions(+), 81 deletions(-)
diff --git a/README.md b/README.md
index 784296a..cd290e1 100644
--- a/README.md
+++ b/README.md
@@ -60,9 +60,8 @@ Pattern            | Meaning
 `>pat`             | `pat` matches just in front of the current position (lookahead)
 `@pat`             | Capture `pat` (used for text replacement and backreferences)
 `@foo=pat`         | Let `foo` be the text of `pat` (used for text replacement and backreferences)
-`{pat => "replacement"}` | Match `pat` and replace it with `replacement`
-`{pat @other => "@1"}` | Match `pat` followed by `other` and replace it with the text of `other`
-`{pat @keep=other => "@keep"}` | Match `pat` followed by `other` and replace it with the text of `other`
+`pat => "replacement"` | Match `pat` and replace it with `replacement`
+`(pat1 @keep=pat2) => "@keep"` | Match `pat1` followed by `pat2` and replace it with the text of `pat2`
 `pat1==pat2`       | `pat1`, assuming `pat2` also matches with the same length
 `pat1!=pat2`       | `pat1`, unless `pat2` also matches with the same length
 `#( block comment )#` | A block comment
diff --git a/bp.1 b/bp.1
index 2af9967..1dfe7e0 100644
--- a/bp.1
+++ b/bp.1
@@ -168,7 +168,7 @@ E.g. \fB`"..`" % (`\\.)
 .B @\fI<name>\fB=\fI<pat>\fR
 \fBLet-\fI<name>\fB-equal-\fI<pat>\fR (named capture)
 
-.B {\fI<pat>\fB => "\fI<replacement>\fB"}
+.B \fI<pat>\fB => "\fI<replacement>\fB"
 \fBReplace-\fI<pat>\fB-with-\fI<replacement>\fR. Note: \fI<replacement>\fR should
 be a string, and it may contain references to captured values: \fB@0\fR
 (the whole of \fI<pat>\fR), \fB@1\fR (the first capture in \fI<pat>\fR),
diff --git a/compiler.c b/compiler.c
index 937407d..2ad0e32 100644
--- a/compiler.c
+++ b/compiler.c
@@ -322,7 +322,8 @@ vm_op_t *bpeg_simplepattern(file_t *f, const char *str)
             break;
         }
         // Parentheses
-        case '(': {
+        case '(': case '{': {
+            char closing = c == '(' ? ')' : '}';
             free(op);
             op = bpeg_simplepattern(f, str);
             if (!op)
@@ -330,7 +331,7 @@ vm_op_t *bpeg_simplepattern(file_t *f, const char *str)
             op = expand_choices(f, op);
             str = op->end;
             str = after_spaces(str);
-            if (!matchchar(&str, ')'))
+            if (!matchchar(&str, closing))
                 file_err(f, origin, str, "This parenthesis group isn't properly closed.");
             op->start = origin;
             op->end = str;
@@ -394,57 +395,6 @@ vm_op_t *bpeg_simplepattern(file_t *f, const char *str)
             op->args.pat = pat;
             break;
         }
-        // Replacement
-        case '{': {
-            str = after_spaces(str);
-            vm_op_t *pat = NULL;
-            if (strncmp(str, "=>", 2) == 0) {
-                str += strlen("=>");
-            } else {
-                pat = bpeg_simplepattern(f, str);
-                if (!pat)
-                    file_err(f, str, str, "There should be a valid pattern inside this replacement.");
-                pat = expand_choices(f, pat);
-                str = pat->end;
-                str = after_spaces(str);
-                if (!(matchchar(&str, '=') && matchchar(&str, '>')))
-                    file_err(f, str, str, "There should be a '=>' after a pattern inside a replacement.");
-            }
-            str = after_spaces(str);
-
-            char quote = *str;
-            const char *replacement;
-            size_t replace_len;
-            if (matchchar(&str, '}')) {
-                replacement = strdup("");
-                replace_len = 0;
-            } else {
-                if (!(matchchar(&str, '"') || matchchar(&str, '\'')))
-                    file_err(f, str, str, "There should be a string literal as a replacement here.");
-                const char *repstr = str;
-                for (; *str && *str != quote; str++) {
-                    if (*str == '\\') {
-                        if (!str[1] || str[1] == '\n')
-                            file_err(f, str, str+1,
-                                     "There should be an escape sequence after this backslash.");
-                        ++str;
-                    }
-                }
-                replace_len = (size_t)(str-repstr);
-                replacement = xcalloc(sizeof(char), replace_len+1);
-                memcpy((void*)replacement, repstr, (size_t)(str-repstr));
-                if (!matchchar(&str, quote))
-                    file_err(f, &repstr[-1], str, "This string doesn't have a closing quote.");
-                if (!matchchar(&str, '}'))
-                    file_err(f, origin, str, "This replacement doesn't have a closing '}'");
-            }
-            op->op = VM_REPLACE;
-            op->args.replace.pat = pat;
-            op->args.replace.text = replacement;
-            op->args.replace.len = replace_len;
-            if (pat != NULL) op->len = pat->len;
-            break;
-        }
         // Special rules:
         case '_': case '^': case '$': case '|': {
             if (matchchar(&str, c)) { // double __, ^^, $$
@@ -487,11 +437,41 @@ vm_op_t *bpeg_simplepattern(file_t *f, const char *str)
 
     // Postfix operators:
   postfix:
-    if (str >= f->end) return op;
     str = after_spaces(str);
-    if ((str[0] == '=' || str[0] == '!') && str[1] == '=') { // Equality <pat1>==<pat2> and inequality <pat1>!=<pat2>
-        int equal = str[0] == '=';
-        str = after_spaces(str+2);
+    if (str+2 < f->end && matchstr(&str, "=>")) { // Replacement <pat> => <pat>
+        str = after_spaces(str);
+        char quote = *str;
+        if (!(matchchar(&str, '"') || matchchar(&str, '\'')))
+            file_err(f, str, str, "There should be a string literal as a replacement here.");
+        const char *repstr = str;
+        for (; *str && *str != quote; str++) {
+            if (*str == '\\') {
+                if (!str[1] || str[1] == '\n')
+                    file_err(f, str, str+1,
+                             "There should be an escape sequence after this backslash.");
+                ++str;
+            }
+        }
+        if (!matchchar(&str, quote))
+            file_err(f, &repstr[-1], str, "This string doesn't have a closing quote.");
+
+        size_t replace_len = (size_t)(str-repstr-1);
+        const char *replacement = xcalloc(sizeof(char), replace_len+1);
+        memcpy((void*)replacement, repstr, replace_len);
+        
+        vm_op_t *pat = op;
+        op = new(vm_op_t);
+        op->op = VM_REPLACE;
+        op->args.replace.pat = pat;
+        op->args.replace.text = replacement;
+        op->args.replace.len = replace_len;
+        op->len = pat->len;
+        op->start = pat->start;
+        op->end = str;
+        goto postfix;
+
+    } else if (str+2 < f->end && (matchstr(&str, "!=") || matchstr(&str, "=="))) { // Equality <pat1>==<pat2> and inequality <pat1>!=<pat2>
+        int equal = str[-2] == '=';
         vm_op_t *first = op;
         vm_op_t *second = bpeg_simplepattern(f, str);
         if (!second)
diff --git a/grammars/bpeg.bp b/grammars/bpeg.bp
index c9422a8..c7960dd 100644
--- a/grammars/bpeg.bp
+++ b/grammars/bpeg.bp
@@ -1,22 +1,23 @@
 # This is a file defining the BPEG grammar using BPEG syntax
 
-Grammar: __ *(Def [__`;])%__ __ ($$ / @!={... => "Could not parse this code"})
+Grammar: __ *(Def [__`;])%__ __ ($$ / @!=(... => "Could not parse this code"))
 Def: @name=id _ `: __ (
       @definition=extended-pat
-    / $$ @!={=>"No definition for rule"}
-    / @!={...>(`;/id_`:/$) => "Invalid definition: @0"})
+    / $$ @!=(''=>"No definition for rule")
+    / @!=(...>(`;/id_`:/$) => "Invalid definition: @0"))
 
 # This is used for command line arguments:
 String-pattern: ... % (Nodent / Escape / `\ pat [`;])
 
-pat: simple-pat !(__("!="/"==")) / suffixed-pat
+pat: simple-pat !(__("!="/"=="/"=>")) / suffixed-pat
 simple-pat: Upto-and / Dot / String / Chars / Nodent / Escape-range
-    / Escape / Repeat / Optional / After / Before / Capture / Replace
+    / Escape / Repeat / Optional / After / Before / Capture
     / Ref / parens
 
 suffixed-pat: (
       Eq-pat
     / Not-eq-pat
+    / Replace
 )
 
 Eq-pat: @first=pat__"=="__@second=pat
@@ -24,23 +25,23 @@ Not-eq-pat: @first=pat__"!="__@second=pat
 
 Dot: `. !`.
 String: (
-        `" @s=*(Escape / !`".) (`" / @!={=> "Expected closing quote here"})
-      / `' @s=*(Escape / !`'.) (`' / @!={=> "Expected closing quote here"})
+        `" @s=*(Escape / !`".) (`" / @!=(''=> "Expected closing quote here"))
+      / `' @s=*(Escape / !`'.) (`' / @!=(''=> "Expected closing quote here"))
     )
 Chars: `` @+(Char/Char-range) % `,
-Char-range: @low=. `- (@high=. / @!={=> "Expected a second character to form a character range"})
-Char: (@s=. / @!={=> "Expected a character following the '`'"})
+Char-range: @low=. `- (@high=. / @!=(''=> "Expected a second character to form a character range"))
+Char: (@s=. / @!=(''=> "Expected a character following the '`'"))
 Escape-range: `\ @low=escape-sequence `- @high=escape-sequence
 Escape: `\ (@s=escape-sequence
-    / $ @!={=>"Backslashes are used for escape sequences, not splitting lines"}
-    / @!={. *(Abc/`0-9) => "Invalid escape sequence: '@0'"}
+    / $ @!=(''=>"Backslashes are used for escape sequences, not splitting lines")
+    / @!=((. *(Abc/`0-9)) => "Invalid escape sequence: '@0'")
 )
 escape-sequence: (
        `n,t,r,e,b,a,v
       / 1-3 `0-7
       / `x 2 `0-9,a-f,A-F
     )
-No: `! (_@pat / @!={=>"Expected a pattern after the exclamation mark"})
+No: `! (_@pat / @!=(''=>"Expected a pattern after the exclamation mark"))
 Nodent: `\ `N
 Upto-and: 2-3`. [_@first=simple-pat] [__`%__@second=simple-pat]
 Repeat: (
@@ -49,16 +50,16 @@ Repeat: (
       / @min=int _ `+   @max=''
       / @min=@max=int
     ) __ @repeat-pat=pat [__`%__@sep=pat]
-Optional: `[ __ extended-pat (__`] / @!={=> "Expected closing square bracket here"})
+Optional: `[ __ extended-pat (__`] / @!=(''=> "Expected closing square bracket here"))
 After: `< _ pat
 Before: `> _ pat
-Capture: `@ [_ @capture-name=(id/`!) __ !"=>" `=] __ (@capture=pat / @!={=> "Expected pattern to capture"})
-Replace: `{ __ (
-      [@replace-pat=extended-pat __] "=>" [__ @replacement=String]
-    ) __ (`} / @!={=> "Expected closing brace here"})
+Capture: `@ [_ @capture-name=(id/`!) __ !"=>" `=] __ (@capture=pat / @!=(''=> "Expected pattern to capture"))
+Replace: (
+      @replace-pat=pat __ "=>" (__ @replacement=String / @!=(''=> "Expected replacement string"))
+    )
 Ref: @name=id !(_`:)
 
-parens: `( __ extended-pat (__ `) / @!={=> "Expected closing parenthesis here"})
+parens: `( __ extended-pat (__ `) / @!=(''=> "Expected closing parenthesis here"))
 
 Chain: 2+@pat%__
 Otherwise: 2+@(Chain/pat)%(__`/__)
diff --git a/grammars/builtins.bp b/grammars/builtins.bp
index f69f988..9c34fe4 100644
--- a/grammars/builtins.bp
+++ b/grammars/builtins.bp
@@ -17,11 +17,11 @@ find-all: (
     (include-binary-files / is-text-file)
     *(!..pattern ~(..\n))
     +(+(..@pattern) ..(\n/$$) / ~(..\n))
-    [{!<\n => "\n"}]
+    [!<\n => "\n"]
 )
 only-matches: (
     (include-binary-files / is-text-file)
-    +{...@pattern =>'@1\n'}
+    +(...@pattern =>'@1\n')
 )
 
 # Helper definitions (commonly used)
diff --git a/utils.c b/utils.c
index 53bc3fc..c2473d2 100644
--- a/utils.c
+++ b/utils.c
@@ -74,6 +74,20 @@ int matchchar(const char **str, char c)
     }
 }
 
+/*
+ * Check if a string is found and if so, move past it.
+ */
+int matchstr(const char **str, const char *target)
+{
+    const char *next = after_spaces(*str);
+    if (strncmp(next, target, strlen(target)) == 0) {
+        *str = &next[strlen(target)];
+        return 1;
+    } else {
+        return 0;
+    }
+}
+
 /*
  * Process a string escape sequence for a character and return the
  * character that was escaped.
diff --git a/utils.h b/utils.h
index 2912426..83659d9 100644
--- a/utils.h
+++ b/utils.h
@@ -27,6 +27,8 @@ const char *after_spaces(const char *str);
 __attribute__((nonnull))
 int matchchar(const char **str, char c);
 __attribute__((nonnull))
+int matchstr(const char **str, const char *target);
+__attribute__((nonnull))
 size_t unescape_string(char *dest, const char *src, size_t bufsize);
 void *memcheck(void *p);
 int memicmp(const void *s1, const void *s2, size_t n);
-- 
cgit v1.2.3