From b4c8a33a0cbf4938b53458ded4d46efc6e8820ab Mon Sep 17 00:00:00 2001 From: Bruce Hill Date: Wed, 20 Jan 2021 15:23:57 -0800 Subject: Tweaked `..` syntax to make it more flexible (now: `.. % skip pat`) --- README.md | 2 +- bp.1 | 2 +- grammars/bp.bp | 8 ++++---- grammars/builtins.bp | 10 +++++----- match.c | 6 +++--- pattern.c | 19 ++++++++++++++----- types.h | 2 +- 7 files changed, 29 insertions(+), 20 deletions(-) diff --git a/README.md b/README.md index 14f6b57..1bf5301 100644 --- a/README.md +++ b/README.md @@ -39,7 +39,7 @@ Pattern | Meaning `pat1 pat2` | `pat1` followed by `pat2` `pat1 / pat2` | `pat1` if it matches, otherwise `pat2` `..pat` | Any text up to and including `pat` (except newlines) -`..pat1 % pat2` | Any text up to and including `pat1` (except newlines), skipping over instances of `pat2` +`.. % skip pat` | Any text up to and including `pat` (except newlines), skipping over instances of `skip` `.` | Any single character (except newline) `^^` | The start of the input `^` | The start of a line diff --git a/bp.1 b/bp.1 index 8bc1dbe..d7a0c35 100644 --- a/bp.1 +++ b/bp.1 @@ -158,7 +158,7 @@ the same indentation that occurs on the current line. .B .. \fI\fR Any text \fBup-to-and-including\fR \fI\fR (excluding newline) -.B .. \fI\fB % \fI\fR +.B .. % \fI\fR \fI\fB Any text \fBup-to-and-including\fR \fI\fR, but skipping over instances of \fI\fR. E.g. \fB`"..`" % (`\\.) diff --git a/grammars/bp.bp b/grammars/bp.bp index 0b222ca..725d213 100644 --- a/grammars/bp.bp +++ b/grammars/bp.bp @@ -5,14 +5,14 @@ # The grammar files provided with BP are not otherwise intended to be full # language grammars. -Grammar: __ *(Def [__`;])%__ __ ($$ / @!=(..$$%\n => "Could not parse this code")) +Grammar: __ *(Def [__`;])%__ __ ($$ / @!=(..%\n$$ => "Could not parse this code")) Def: @name=id __ `: __ ( @definition=extended-pat / $$ @!=(''=>"No definition for rule") - / @!=(..>(`;/id_`:/$)%\n => "Invalid definition: @0")) + / @!=(..%\n>(`;/id_`:/$) => "Invalid definition: @0")) # This is used for command line arguments: -String-pattern: ..$$ % (\n / Nodent / Escape / `\ pat [`;]) +String-pattern: ..%(\n / Nodent / Escape / `\ pat [`;])$$ pat: simple-pat !(__("!="/"==")) / suffixed-pat simple-pat: Upto-and / Dot / String / Chars / Nodent / Escape-range @@ -47,7 +47,7 @@ escape-sequence: ( ) No: `! (__@pat / @!=(''=>"Expected a pattern after the exclamation mark")) Nodent: `\ `N -Upto-and: ".." [__@first=simple-pat] [__`%__@second=simple-pat] +Upto-and: ".." [__`%__@second=simple-pat] [__@first=simple-pat] Repeat: ( @min=(''=>'0') (`*=>"-") @max=(''=>'∞') / @min=int __ `- __ @max=int diff --git a/grammars/builtins.bp b/grammars/builtins.bp index 937db3c..76bf4f5 100644 --- a/grammars/builtins.bp +++ b/grammars/builtins.bp @@ -11,11 +11,11 @@ utf8-codepoint: ( ) crlf: \r\n cr: \r -anglebraces: `<..`> % (\n/anglebraces/string) -brackets: `[..`] % (\n/brackets/string) -braces: `{..`} % (\n/braces/string) -parens: `(..`) % (\n/parens/string) -string: `"..`" % (`\.) / `'..`' % (`\.) +anglebraces: `< ..%(\n/anglebraces/string) `> +brackets: `[ ..%(\n/brackets/string) `] +braces: `{ ..%(\n/braces/string) `} +parens: `( ..%(\n/parens/string) `) +string: `" ..%(`\.) `" / `' ..%(`\.) `' id: !<`a-z,A-Z,_,0-9 (`a-z,A-Z,_ *`a-z,A-Z,_,0-9)!=keyword | id-char: `a-z,A-Z,_,0-9 |: !<`a-z,A-Z,_,0-9 / !`a-z,A-Z,_,0-9 diff --git a/match.c b/match.c index 05d8a8a..e89abd4 100644 --- a/match.c +++ b/match.c @@ -207,7 +207,7 @@ static match_t *match(def_t *defs, file_t *f, const char *str, pat_t *pat, bool m->end = str; return m; } - case BP_UPTO_AND: { + case BP_UPTO: { match_t *m = new_match(); m->start = str; m->pat = pat; @@ -225,8 +225,8 @@ static match_t *match(def_t *defs, file_t *f, const char *str, pat_t *pat, bool if (target) { match_t *p = match(defs, f, str, target, ignorecase); if (p != NULL) { - ADD_OWNER(*dest, p); - m->end = p->end; + recycle_if_unused(&p); + m->end = str; return m; } } else if (str == f->end) { diff --git a/pattern.c b/pattern.c index 658f132..17560b4 100644 --- a/pattern.c +++ b/pattern.c @@ -146,6 +146,19 @@ pat_t *chain_together(file_t *f, pat_t *first, pat_t *second) chain->end = second->end; chain->args.multiple.first = first; chain->args.multiple.second = second; + + // If `first` is an UPTO operator (..) or contains one, then let it know + // that `second` is what it's up *to*. + for (pat_t *p = first; p; ) { + if (p->type == BP_UPTO) { + p->args.multiple.first = second; + break; + } else if (p->type == BP_CAPTURE) { + p = p->args.capture.capture_pat; + } else if (p->type == BP_EQUAL || p->type == BP_NOT_EQUAL) { + p = p->args.pat; + } else break; + } return chain; } @@ -199,12 +212,8 @@ static pat_t *_bp_simplepattern(file_t *f, const char *str) // Any char (dot) case '.': { if (*str == '.') { // ".." - pat_t *upto = new_pat(f, start, BP_UPTO_AND); + pat_t *upto = new_pat(f, start, BP_UPTO); ++str; - pat_t *till = bp_simplepattern(f, str); - upto->args.multiple.first = till; - if (till) - str = till->end; if (matchchar(&str, '%')) { pat_t *skip = bp_simplepattern(f, str); if (!skip) diff --git a/types.h b/types.h index f0e9ff6..9cbc665 100644 --- a/types.h +++ b/types.h @@ -15,7 +15,7 @@ enum pattype_e { BP_STRING, BP_RANGE, BP_NOT, - BP_UPTO_AND, + BP_UPTO, BP_REPEAT, BP_BEFORE, BP_AFTER, -- cgit v1.2.3