Moved */+ back to prefix, and dropped ?

2020-09-28 18:08:23 -07:00 · 2020-09-28 18:08:23 -07:00 · 90b8db84a4
commit 90b8db84a4
parent 699e7c8b98
7 changed files with 60 additions and 66 deletions
--- a/README.md
+++ b/README.md
@ -49,8 +49,8 @@ Pattern            | Meaning
 `2-4 pat`          | Between 2 and 4 occurrences of `pat` (inclusive)
 `5+ pat`           | 5 or more occurrences of `pat`
 `5+ pat % sep`     | 5 or more occurrences of `pat`, separated by `sep` (e.g. `0+ int % ","` matches `1,2,3`)
-`pat*` `pat* % sep`| 0 or more occurrences of `pat` (optionally separated by `sep`)
-`pat+` `pat+ % sep`| 1 or more occurrences of `pat` (optionally separated by `sep`)
+`*pat`             | 0 or more occurrences of `pat` (shorthand for `0+pat`)
+`+pat`             | 1 or more occurrences of `pat` (shorthand for `1+pat`)
 `<pat`             | `pat` matches just before the current position (backref)
 `>pat`             | `pat` matches just in front of the current position (lookahead)
 `@pat`             | Capture `pat` (used for text replacement and backreferences)
--- a/bpeg.1
+++ b/bpeg.1
@ -122,11 +122,11 @@ The \fBescape-sequence-range-\fI<esc1>\fB-to-\fI<esc2>\fR
 .B \fI<MIN>\fB+ \fI<pat>\fR
 \fI<MIN>\fB-to-\fI<MAX>\fB-\fI<pat>\fBs\fR (repetitions of a pattern)

-.B \fI<pat>\fR*
-\fI<pat>\fB-zero-or-more-times\fR
+.B *\fI<pat>\fR
+\fBsome-\fI<pat>\fBs\fR

-.B \fI<pat>\fR+
-\fI<pat>\fB-one-or-more-times\fR
+.B +\fI<pat>\fR
+\fBat-least-one-\fI<pat>\fBs\fR

 .B \fI<repeating-pat>\fR \fB%\fI <sep>\fR
 \fI<repeating-pat>\fB-separated-by-\fI<sep>\fR (equivalent to \fI<pat>
--- a/compiler.c
+++ b/compiler.c
@ -269,6 +269,22 @@ vm_op_t *bpeg_simplepattern(file_t *f, const char *str)
            set_range(op, 0, 1, pat, NULL);
            break;
        }
+        // Repeating
+        case '*': case '+': {
+            ssize_t min = c == '*' ? 0 : 1;
+            vm_op_t *pat = bpeg_simplepattern(f, str);
+            check(pat, "Expected pattern after '%c'", *str);
+            str = pat->end;
+            str = after_spaces(str);
+            vm_op_t *sep = NULL;
+            if (matchchar(&str, '%')) {
+                sep = bpeg_simplepattern(f, str);
+                check(sep, "Expected pattern for separator after '%%'");
+                str = sep->end;
+            }
+            set_range(op, min, -1, pat, sep);
+            break;
+        }
        // Capture
        case '@': {
            op->op = VM_CAPTURE;
@ -373,23 +389,7 @@ vm_op_t *bpeg_simplepattern(file_t *f, const char *str)
  postfix:
    if (f ? str >= f->end : !*str) return op;
    str = after_spaces(str);
-    if (*str == '*' || *str == '+' || *str == '?') { // Repetitions: <pat>*, <pat>+, <pat>?
-        char operator = *str;
-        ++str;
-        vm_op_t *pat = op;
-        vm_op_t *sep = NULL;
-        if (operator != '?' && matchchar(&str, '%')) {
-            sep = bpeg_simplepattern(f, str);
-            check(sep, "Expected pattern for separator after '%%'");
-            str = sep->end;
-        }
-        op = calloc(sizeof(vm_op_t), 1);
-        set_range(op, operator == '+' ? 1 : 0, operator == '?' ? 1 : -1, pat, sep);
-        op->start = pat->start;
-        op->end = str;
-        op->len = -1;
-        goto postfix;
-    } else if ((str[0] == '=' || str[0] == '!') && str[1] == '=') { // Equality <pat1>==<pat2> and inequality <pat1>!=<pat2>
+    if ((str[0] == '=' || str[0] == '!') && str[1] == '=') { // Equality <pat1>==<pat2> and inequality <pat1>!=<pat2>
        int equal = str[0] == '=';
        str = after_spaces(str+2);
        vm_op_t *first = op;
--- a/grammars/bpeg.bpeg
+++ b/grammars/bpeg.bpeg
@ -1,43 +1,37 @@
 # This is a file defining the BPEG grammar using BPEG syntax

-Grammar: __ 0+(Def [__`;])%__ __ ($$ / @!={... => "Could not parse this code"})
+Grammar: __ *(Def [__`;])%__ __ ($$ / @!={... => "Could not parse this code"})
 Def: @name=id _ `: __ (
      @definition=extended-pat
    / $$ @!={=>"No definition for rule"}
    / @!={...>(`;/id_`:/$) => "Invalid definition: @0"})

 # This is used for command line arguments:
-String-pattern: 0+(`\ (escape-sequence / pat [`;]) / .)
+String-pattern: *(`\ (escape-sequence / pat [`;]) / .)

-pat: simple-pat !(__("!="/"=="/`*/`+/`?)) / suffixed-pat
+pat: simple-pat !(__("!="/"==")) / suffixed-pat
 simple-pat: Upto-and / Dot / String / Char-range / Char / Escape-range / Escape / No
    / Nodent / Repeat / Optional / After / Before / Capture / Replace / Ref / parens

 suffixed-pat: (
      Eq-pat
    / Not-eq-pat
-    / Star-pat
-    / Plus-pat
-    / Question-pat
 )

 Eq-pat: @first=pat__"=="__@second=pat
 Not-eq-pat: @first=pat__"!="__@second=pat
-Star-pat: pat __ `* @min={=>"0"} @max="" [__`%__@sep=pat]
-Plus-pat: pat __ `+ @min={=>"1"} @max="" [__`%__@sep=pat]
-Question-pat: pat __ `?

 Dot: `. !`.
 String: (
-        `" @s=0+(Escape / !`".) (`" / @!={=> "Expected closing quote here"})
-      / `' @s=0+(Escape / !`'.) (`' / @!={=> "Expected closing quote here"})
+        `" @s=*(Escape / !`".) (`" / @!={=> "Expected closing quote here"})
+      / `' @s=*(Escape / !`'.) (`' / @!={=> "Expected closing quote here"})
    )
 Char-range: `` @low=. `- (@high=. / @!={=> "Expected a second character to form a character range"})
 Char: `` (@s=. / @!={=> "Expected a character following the '`'"})
 Escape-range: `\ @low=escape-sequence `- @high=escape-sequence
 Escape: `\ (@s=escape-sequence
    / $ @!={=>"Backslashes are used for escape sequences, not splitting lines"}
-    / @!={. 0+(Abc/`0-9) => "Invalid escape sequence: '@0'"}
+    / @!={. *(Abc/`0-9) => "Invalid escape sequence: '@0'"}
 )
 escape-sequence: (
       `n/`t/`r/`e/`b/`a/`v
@ -68,17 +62,17 @@ Otherwise: 2+@(Chain/pat)%(__`/__)
 extended-pat: Otherwise / Chain / pat

 # Special-symbol rules:
-_:  0+(`  / \t)
-__: 0+(`  / \t / \r / \n / comment)
+_:  *(`  / \t)
+__: *(`  / \t / \r / \n / comment)
 $$: !$.
 $:  !.
 ^^: !<$.
 ^:  !<.

-id: "^^" / "^" / "__" / "_" / "$$" / "$" / (`a-z/`A-Z) 0+(`a-z/`A-Z/`0-9/`-)
+id: "^^" / "^" / "__" / "_" / "$$" / "$" / (`a-z/`A-Z) *(`a-z/`A-Z/`0-9/`-)

 line-comment: `# .. $
-block-comment: "#(" 0+(block-comment / !")#" .) ")#"
+block-comment: "#(" *(block-comment / !")#" .) ")#"

 # Note: comments are undefined by default in regular BPEG
 comment: block-comment / line-comment
--- a/grammars/builtins.bpeg
+++ b/grammars/builtins.bpeg
@ -16,21 +16,21 @@ replace-all: (
    (include-binary-files / is-text-file)
    define-highlights
    add-filename
-    0+(...(>pattern hl-replacement)) ...
+    *(...(>pattern hl-replacement)) ...
 )
 find-all: (
    (include-binary-files / is-text-file)
    define-highlights
    add-filename
-    0+ (!..pattern {..\n=>})
-    1+ (>..pattern add-line-number 1+(..hl-pattern) ..\n / {..\n=>})
+    *(!..pattern {..\n=>})
+    +(>..pattern add-line-number +(..hl-pattern) ..\n / {..\n=>})
    [{!<\n => "\n"}]
 )
 only-matches: (
    (include-binary-files / is-text-file)
    define-highlights
    add-filename
-    1+{...@hl-pattern =>'@1\n'}
+    +{...@hl-pattern =>'@1\n'}
 )
 add-filename: [print-filenames (is-tty {=>"\033[33;1;4m@&:\033[0m\n"} / {=>"@&:\n"})]
 add-line-number: [print-line-numbers (is-tty {=>"\033[2m@#\033[5G|\033[0m "} / {=>"@#| "})]
@ -41,21 +41,21 @@ define-highlights: highlight @hl-start={=>"\033[31;1m"} @hl-end={=>"\033[0m"} /
 # Helper definitions (commonly used)
 #(
 url: (
-    "file://" 1+(`/ 0+url-char)
+    "file://" +(`/ *url-char)
    / "mailto:" email
-    / ("https"/"http"/"ftp") "://" [1+url-char [`: 1+url-char] `@] (ipv4/ipv6/domain) [`: int] [url-path]
+    / ("https"/"http"/"ftp") "://" [+url-char [`: +url-char] `@] (ipv4/ipv6/domain) [`: int] [url-path]
 )
-url-path: 1+(`/ 0+url-char) [`? 1+(1+url-char`=1+url-char]
+url-path: +(`/ *url-char) [`? +(+url-char`=+url-char]
 ipv4: 4 int % `.
 ipv6: 8 (4 Hex) % `:
-domain: 1+(Abc/digit/`-)%`.
+domain: +(Abc/digit/`-)%`.
 url-char: Abc/digit/`$/`-/`_/`./`+/`!/`*/`'/`(/`)/`,/`%

 url: @(https?|ftp)://(-\.)?([^\s/?\.#-]+\.?)+(/[^\s]*)?$@iS
 )#
-indent: \n|1+(\t/' ')
+indent: \n|+(\t/' ')
 dedent: $ !(\n|)
-indented-block: |` ..$ 0+(\n|..$)
+indented-block: |` ..$ *(\n|..$)
 utf8-codepoint: (
      \x00-x7f
    / \xc0-xdf 1\x80-xbf
@ -64,18 +64,18 @@ utf8-codepoint: (
 )
 crlf: \r\n
 cr: \r
-anglebraces: `< 0+(anglebraces / !`>$.) `>
-brackets: `[ 0+(brackets / !`]$.) `]
-braces: `{ 0+(braces / !`}$.) `}
-parens: `( 0+(parens / !`)$.) `)
-id: !<(`a-z/`A-Z/`_/`0-9) (`a-z/`A-Z/`_) 0+(`a-z/`A-Z/`_/`0-9)
+anglebraces: `< *(anglebraces / !`>$.) `>
+brackets: `[ *(brackets / !`]$.) `]
+braces: `{ *(braces / !`}$.) `}
+parens: `( *(parens / !`)$.) `)
+id: !<(`a-z/`A-Z/`_/`0-9) (`a-z/`A-Z/`_) *(`a-z/`A-Z/`_/`0-9)
 id-char: `a-z/`A-Z/`_/`0-9
-word: !<(`a-z/`A-Z/`_/`0-9) 1+(`a-z/`A-Z) !>(`0-9/`_)
+word: !<(`a-z/`A-Z/`_/`0-9) +(`a-z/`A-Z) !>(`0-9/`_)
 HEX: `0-9/`A-F
 Hex: `0-9/`a-f/`A-F
 hex: `0-9/`a-f
-number: 1+`0-9 [`. 0+`0-9] / `. 1+`0-9
-int: 1+`0-9
+number: +`0-9 [`. *`0-9] / `. +`0-9
+int: +`0-9
 digit: `0-9
 Abc: `a-z/`A-Z
 ABC: `A-Z
@ -94,5 +94,5 @@ $$: !$.
 $:  !.
 ^^: !<$.
 ^:  !<.
-__: 0+(` /\t/\n/\r/comment)
-_:  0+(` /\t)
+__: *(` /\t/\n/\r/comment)
+_:  *(` /\t)
--- a/grammars/html.bpeg
+++ b/grammars/html.bpeg
@ -1,5 +1,5 @@
 # HTML grammar
-HTML: __ [doctype __] 0+html-element%__ __
+HTML: __ [doctype __] *html-element%__ __

 doctype: "<!DOCTYPE" ..`>

@ -11,16 +11,16 @@ html-element: (

 void-element: `< @tag=(id==match-tag) __attributes__ [`/] __ `>

-template-element: `< @tag=(id==match-tag) __`> __ >match-body @body=0+(!`<$. / comment / html-element / !("</"tag__`>)$.) ("</"tag__`>)
+template-element: `< @tag=(id==match-tag) __`> __ >match-body @body=*(!`<$. / comment / html-element / !("</"tag__`>)$.) ("</"tag__`>)

 raw-element: `< @tag=(id==match-tag) __attributes__ `> >match-body @body=.. ("</"tag__`>)

-normal-element: `< @tag=(id==match-tag) __attributes__ `> >match-body @body=0+(!`<$. / comment / html-element / !("</"tag__`>)$.) "</"tag__`>
+normal-element: `< @tag=(id==match-tag) __attributes__ `> >match-body @body=*(!`<$. / comment / html-element / !("</"tag__`>)$.) "</"tag__`>

 comment: "<!--" ..."-->"

-attributes: 0+attribute%__
-attribute: (1+id%`:)__`=__ (id / `" ..`" / `' ..`')
-attribute: (1+id%`:)__`=__ (id / `" ..`" / `' ..`')
+attributes: *attribute%__
+attribute: (+id%`:)__`=__ (id / `" ..`" / `' ..`')
+attribute: (+id%`:)__`=__ (id / `" ..`" / `' ..`')
 match-tag: id
 match-body: ''
--- a/grammars/utf8-id.bpeg
+++ b/grammars/utf8-id.bpeg
@ -1,5 +1,5 @@
 # Definitions of UTF8-compliant identifiers
-utf8-id: utf8-id-start 0+utf8-id-cont
+utf8-id: utf8-id-start *utf8-id-cont

 utf8-id-start: `A-Z / `a-z / !\x00-x7F (
  \xc2 (\xaa / \xb5 / \xba)