aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBruce Hill <bruce@bruce-hill.com>2020-12-30 15:30:19 -0800
committerBruce Hill <bruce@bruce-hill.com>2020-12-30 15:30:19 -0800
commita07da1989d7ec515d4bb08051ea6940960577446 (patch)
tree6f506818554fc205ddcc46a3c9bfe6054ae9cea7
parent7d4bc36949cec8e5c791c352a264cd1dea4f8a1e (diff)
Simplified syntax for replacement: just =>, no need for {}
-rw-r--r--README.md5
-rw-r--r--bp.12
-rw-r--r--compiler.c94
-rw-r--r--grammars/bpeg.bp37
-rw-r--r--grammars/builtins.bp4
-rw-r--r--utils.c14
-rw-r--r--utils.h2
7 files changed, 77 insertions, 81 deletions
diff --git a/README.md b/README.md
index 784296a..cd290e1 100644
--- a/README.md
+++ b/README.md
@@ -60,9 +60,8 @@ Pattern | Meaning
`>pat` | `pat` matches just in front of the current position (lookahead)
`@pat` | Capture `pat` (used for text replacement and backreferences)
`@foo=pat` | Let `foo` be the text of `pat` (used for text replacement and backreferences)
-`{pat => "replacement"}` | Match `pat` and replace it with `replacement`
-`{pat @other => "@1"}` | Match `pat` followed by `other` and replace it with the text of `other`
-`{pat @keep=other => "@keep"}` | Match `pat` followed by `other` and replace it with the text of `other`
+`pat => "replacement"` | Match `pat` and replace it with `replacement`
+`(pat1 @keep=pat2) => "@keep"` | Match `pat1` followed by `pat2` and replace it with the text of `pat2`
`pat1==pat2` | `pat1`, assuming `pat2` also matches with the same length
`pat1!=pat2` | `pat1`, unless `pat2` also matches with the same length
`#( block comment )#` | A block comment
diff --git a/bp.1 b/bp.1
index 2af9967..1dfe7e0 100644
--- a/bp.1
+++ b/bp.1
@@ -168,7 +168,7 @@ E.g. \fB`"..`" % (`\\.)
.B @\fI<name>\fB=\fI<pat>\fR
\fBLet-\fI<name>\fB-equal-\fI<pat>\fR (named capture)
-.B {\fI<pat>\fB => "\fI<replacement>\fB"}
+.B \fI<pat>\fB => "\fI<replacement>\fB"
\fBReplace-\fI<pat>\fB-with-\fI<replacement>\fR. Note: \fI<replacement>\fR should
be a string, and it may contain references to captured values: \fB@0\fR
(the whole of \fI<pat>\fR), \fB@1\fR (the first capture in \fI<pat>\fR),
diff --git a/compiler.c b/compiler.c
index 937407d..2ad0e32 100644
--- a/compiler.c
+++ b/compiler.c
@@ -322,7 +322,8 @@ vm_op_t *bpeg_simplepattern(file_t *f, const char *str)
break;
}
// Parentheses
- case '(': {
+ case '(': case '{': {
+ char closing = c == '(' ? ')' : '}';
free(op);
op = bpeg_simplepattern(f, str);
if (!op)
@@ -330,7 +331,7 @@ vm_op_t *bpeg_simplepattern(file_t *f, const char *str)
op = expand_choices(f, op);
str = op->end;
str = after_spaces(str);
- if (!matchchar(&str, ')'))
+ if (!matchchar(&str, closing))
file_err(f, origin, str, "This parenthesis group isn't properly closed.");
op->start = origin;
op->end = str;
@@ -394,57 +395,6 @@ vm_op_t *bpeg_simplepattern(file_t *f, const char *str)
op->args.pat = pat;
break;
}
- // Replacement
- case '{': {
- str = after_spaces(str);
- vm_op_t *pat = NULL;
- if (strncmp(str, "=>", 2) == 0) {
- str += strlen("=>");
- } else {
- pat = bpeg_simplepattern(f, str);
- if (!pat)
- file_err(f, str, str, "There should be a valid pattern inside this replacement.");
- pat = expand_choices(f, pat);
- str = pat->end;
- str = after_spaces(str);
- if (!(matchchar(&str, '=') && matchchar(&str, '>')))
- file_err(f, str, str, "There should be a '=>' after a pattern inside a replacement.");
- }
- str = after_spaces(str);
-
- char quote = *str;
- const char *replacement;
- size_t replace_len;
- if (matchchar(&str, '}')) {
- replacement = strdup("");
- replace_len = 0;
- } else {
- if (!(matchchar(&str, '"') || matchchar(&str, '\'')))
- file_err(f, str, str, "There should be a string literal as a replacement here.");
- const char *repstr = str;
- for (; *str && *str != quote; str++) {
- if (*str == '\\') {
- if (!str[1] || str[1] == '\n')
- file_err(f, str, str+1,
- "There should be an escape sequence after this backslash.");
- ++str;
- }
- }
- replace_len = (size_t)(str-repstr);
- replacement = xcalloc(sizeof(char), replace_len+1);
- memcpy((void*)replacement, repstr, (size_t)(str-repstr));
- if (!matchchar(&str, quote))
- file_err(f, &repstr[-1], str, "This string doesn't have a closing quote.");
- if (!matchchar(&str, '}'))
- file_err(f, origin, str, "This replacement doesn't have a closing '}'");
- }
- op->op = VM_REPLACE;
- op->args.replace.pat = pat;
- op->args.replace.text = replacement;
- op->args.replace.len = replace_len;
- if (pat != NULL) op->len = pat->len;
- break;
- }
// Special rules:
case '_': case '^': case '$': case '|': {
if (matchchar(&str, c)) { // double __, ^^, $$
@@ -487,11 +437,41 @@ vm_op_t *bpeg_simplepattern(file_t *f, const char *str)
// Postfix operators:
postfix:
- if (str >= f->end) return op;
str = after_spaces(str);
- if ((str[0] == '=' || str[0] == '!') && str[1] == '=') { // Equality <pat1>==<pat2> and inequality <pat1>!=<pat2>
- int equal = str[0] == '=';
- str = after_spaces(str+2);
+ if (str+2 < f->end && matchstr(&str, "=>")) { // Replacement <pat> => <pat>
+ str = after_spaces(str);
+ char quote = *str;
+ if (!(matchchar(&str, '"') || matchchar(&str, '\'')))
+ file_err(f, str, str, "There should be a string literal as a replacement here.");
+ const char *repstr = str;
+ for (; *str && *str != quote; str++) {
+ if (*str == '\\') {
+ if (!str[1] || str[1] == '\n')
+ file_err(f, str, str+1,
+ "There should be an escape sequence after this backslash.");
+ ++str;
+ }
+ }
+ if (!matchchar(&str, quote))
+ file_err(f, &repstr[-1], str, "This string doesn't have a closing quote.");
+
+ size_t replace_len = (size_t)(str-repstr-1);
+ const char *replacement = xcalloc(sizeof(char), replace_len+1);
+ memcpy((void*)replacement, repstr, replace_len);
+
+ vm_op_t *pat = op;
+ op = new(vm_op_t);
+ op->op = VM_REPLACE;
+ op->args.replace.pat = pat;
+ op->args.replace.text = replacement;
+ op->args.replace.len = replace_len;
+ op->len = pat->len;
+ op->start = pat->start;
+ op->end = str;
+ goto postfix;
+
+ } else if (str+2 < f->end && (matchstr(&str, "!=") || matchstr(&str, "=="))) { // Equality <pat1>==<pat2> and inequality <pat1>!=<pat2>
+ int equal = str[-2] == '=';
vm_op_t *first = op;
vm_op_t *second = bpeg_simplepattern(f, str);
if (!second)
diff --git a/grammars/bpeg.bp b/grammars/bpeg.bp
index c9422a8..c7960dd 100644
--- a/grammars/bpeg.bp
+++ b/grammars/bpeg.bp
@@ -1,22 +1,23 @@
# This is a file defining the BPEG grammar using BPEG syntax
-Grammar: __ *(Def [__`;])%__ __ ($$ / @!={... => "Could not parse this code"})
+Grammar: __ *(Def [__`;])%__ __ ($$ / @!=(... => "Could not parse this code"))
Def: @name=id _ `: __ (
@definition=extended-pat
- / $$ @!={=>"No definition for rule"}
- / @!={...>(`;/id_`:/$) => "Invalid definition: @0"})
+ / $$ @!=(''=>"No definition for rule")
+ / @!=(...>(`;/id_`:/$) => "Invalid definition: @0"))
# This is used for command line arguments:
String-pattern: ... % (Nodent / Escape / `\ pat [`;])
-pat: simple-pat !(__("!="/"==")) / suffixed-pat
+pat: simple-pat !(__("!="/"=="/"=>")) / suffixed-pat
simple-pat: Upto-and / Dot / String / Chars / Nodent / Escape-range
- / Escape / Repeat / Optional / After / Before / Capture / Replace
+ / Escape / Repeat / Optional / After / Before / Capture
/ Ref / parens
suffixed-pat: (
Eq-pat
/ Not-eq-pat
+ / Replace
)
Eq-pat: @first=pat__"=="__@second=pat
@@ -24,23 +25,23 @@ Not-eq-pat: @first=pat__"!="__@second=pat
Dot: `. !`.
String: (
- `" @s=*(Escape / !`".) (`" / @!={=> "Expected closing quote here"})
- / `' @s=*(Escape / !`'.) (`' / @!={=> "Expected closing quote here"})
+ `" @s=*(Escape / !`".) (`" / @!=(''=> "Expected closing quote here"))
+ / `' @s=*(Escape / !`'.) (`' / @!=(''=> "Expected closing quote here"))
)
Chars: `` @+(Char/Char-range) % `,
-Char-range: @low=. `- (@high=. / @!={=> "Expected a second character to form a character range"})
-Char: (@s=. / @!={=> "Expected a character following the '`'"})
+Char-range: @low=. `- (@high=. / @!=(''=> "Expected a second character to form a character range"))
+Char: (@s=. / @!=(''=> "Expected a character following the '`'"))
Escape-range: `\ @low=escape-sequence `- @high=escape-sequence
Escape: `\ (@s=escape-sequence
- / $ @!={=>"Backslashes are used for escape sequences, not splitting lines"}
- / @!={. *(Abc/`0-9) => "Invalid escape sequence: '@0'"}
+ / $ @!=(''=>"Backslashes are used for escape sequences, not splitting lines")
+ / @!=((. *(Abc/`0-9)) => "Invalid escape sequence: '@0'")
)
escape-sequence: (
`n,t,r,e,b,a,v
/ 1-3 `0-7
/ `x 2 `0-9,a-f,A-F
)
-No: `! (_@pat / @!={=>"Expected a pattern after the exclamation mark"})
+No: `! (_@pat / @!=(''=>"Expected a pattern after the exclamation mark"))
Nodent: `\ `N
Upto-and: 2-3`. [_@first=simple-pat] [__`%__@second=simple-pat]
Repeat: (
@@ -49,16 +50,16 @@ Repeat: (
/ @min=int _ `+ @max=''
/ @min=@max=int
) __ @repeat-pat=pat [__`%__@sep=pat]
-Optional: `[ __ extended-pat (__`] / @!={=> "Expected closing square bracket here"})
+Optional: `[ __ extended-pat (__`] / @!=(''=> "Expected closing square bracket here"))
After: `< _ pat
Before: `> _ pat
-Capture: `@ [_ @capture-name=(id/`!) __ !"=>" `=] __ (@capture=pat / @!={=> "Expected pattern to capture"})
-Replace: `{ __ (
- [@replace-pat=extended-pat __] "=>" [__ @replacement=String]
- ) __ (`} / @!={=> "Expected closing brace here"})
+Capture: `@ [_ @capture-name=(id/`!) __ !"=>" `=] __ (@capture=pat / @!=(''=> "Expected pattern to capture"))
+Replace: (
+ @replace-pat=pat __ "=>" (__ @replacement=String / @!=(''=> "Expected replacement string"))
+ )
Ref: @name=id !(_`:)
-parens: `( __ extended-pat (__ `) / @!={=> "Expected closing parenthesis here"})
+parens: `( __ extended-pat (__ `) / @!=(''=> "Expected closing parenthesis here"))
Chain: 2+@pat%__
Otherwise: 2+@(Chain/pat)%(__`/__)
diff --git a/grammars/builtins.bp b/grammars/builtins.bp
index f69f988..9c34fe4 100644
--- a/grammars/builtins.bp
+++ b/grammars/builtins.bp
@@ -17,11 +17,11 @@ find-all: (
(include-binary-files / is-text-file)
*(!..pattern ~(..\n))
+(+(..@pattern) ..(\n/$$) / ~(..\n))
- [{!<\n => "\n"}]
+ [!<\n => "\n"]
)
only-matches: (
(include-binary-files / is-text-file)
- +{...@pattern =>'@1\n'}
+ +(...@pattern =>'@1\n')
)
# Helper definitions (commonly used)
diff --git a/utils.c b/utils.c
index 53bc3fc..c2473d2 100644
--- a/utils.c
+++ b/utils.c
@@ -75,6 +75,20 @@ int matchchar(const char **str, char c)
}
/*
+ * Check if a string is found and if so, move past it.
+ */
+int matchstr(const char **str, const char *target)
+{
+ const char *next = after_spaces(*str);
+ if (strncmp(next, target, strlen(target)) == 0) {
+ *str = &next[strlen(target)];
+ return 1;
+ } else {
+ return 0;
+ }
+}
+
+/*
* Process a string escape sequence for a character and return the
* character that was escaped.
* Set *end = the first character past the end of the escape sequence.
diff --git a/utils.h b/utils.h
index 2912426..83659d9 100644
--- a/utils.h
+++ b/utils.h
@@ -27,6 +27,8 @@ const char *after_spaces(const char *str);
__attribute__((nonnull))
int matchchar(const char **str, char c);
__attribute__((nonnull))
+int matchstr(const char **str, const char *target);
+__attribute__((nonnull))
size_t unescape_string(char *dest, const char *src, size_t bufsize);
void *memcheck(void *p);
int memicmp(const void *s1, const void *s2, size_t n);