diff options
| author | Bruce Hill <bruce@bruce-hill.com> | 2021-05-20 16:46:14 -0700 |
|---|---|---|
| committer | Bruce Hill <bruce@bruce-hill.com> | 2021-05-20 16:46:14 -0700 |
| commit | ec1a3e08354d27742e89fd8437a5a1d39153c53e (patch) | |
| tree | b6d434441179ba090da89b83a34ad243ba9215a9 | |
| parent | cd30e89042f820c7e6dc7c40890eb21ddf0cd94f (diff) | |
Fixes for char sets/ranges and cleanups
| -rw-r--r-- | grammars/bp.bp | 2 | ||||
| -rw-r--r-- | grammars/builtins.bp | 3 | ||||
| -rw-r--r-- | pattern.c | 17 |
3 files changed, 11 insertions, 11 deletions
diff --git a/grammars/bp.bp b/grammars/bp.bp index fc1e794..228f7d0 100644 --- a/grammars/bp.bp +++ b/grammars/bp.bp @@ -5,7 +5,7 @@ # The grammar files provided with BP are not otherwise intended to be full # language grammars. -Grammar: __ *(Def [__`;])%__ __ ($$ / (!)(..%\n$$ => "Could not parse this code")) +Grammar: __ *(Def [__`;])%__ __ [(!)(+(./\n) => "Could not parse this code")] Def: @name=id __ `: __ ( @definition=extended-pat / $$ (!)=>"No definition for rule" diff --git a/grammars/builtins.bp b/grammars/builtins.bp index cf642e2..888cc2c 100644 --- a/grammars/builtins.bp +++ b/grammars/builtins.bp @@ -17,8 +17,7 @@ braces: `{ ..%(\n/braces/string) `} parens: `( ..%(\n/parens/string) `) string: `" ..%string-escape `" / `' ..%string-escape `' string-escape: `\ (`x 2 Hex / 1-3 `0-7 / `u 1-4 Hex / .) -left-id-edge: ^ / <(\x00-x7f!~(^^id-char)) / <((\xc0-xdf \x80-xbf)!~(^^id-char)) - / <((\xe0-xef 2\x80-xbf)!~(^^id-char)) / <((\xf0-xf7 3\x80-xbf)!~(^^id-char)) +left-id-edge: !<id-char right-id-edge: !id-char id: left-id-edge !`0-9 !(keyword left-id-edge) +id-char id-char: `a-z,A-Z,_,0-9 @@ -146,6 +146,7 @@ pat_t *chain_together(file_t *f, pat_t *first, pat_t *second) if (p->type == BP_UPTO) { p->args.multiple.first = second; p->min_matchlen = second->min_matchlen; + p->max_matchlen = -1; break; } else if (p->type == BP_CAPTURE) { p = p->args.capture.capture_pat; @@ -250,10 +251,10 @@ static pat_t *_bp_simplepattern(file_t *f, const char *str) const char *c1_loc = str; str = next_char(f, c1_loc); if (matchchar(&str, '-')) { // Range - if (&str[-1] - c1_loc > 1 || next_char(f, str) > str+1) - file_err(f, start, next_char(f, str), "Sorry, UTF-8 character ranges are not yet supported."); - char c1 = *c1_loc; - char c2 = *str; + const char *c2_loc = str; + if (next_char(f, c1_loc) > c1_loc+1 || next_char(f, c2_loc) > c2_loc+1) + file_err(f, start, next_char(f, c2_loc), "Sorry, UTF-8 character ranges are not yet supported."); + char c1 = *c1_loc, c2 = *c2_loc; if (!c2 || c2 == '\n') file_err(f, str, str, "There should be a character here to complete the character range."); if (c1 > c2) { // Swap order @@ -261,14 +262,14 @@ static pat_t *_bp_simplepattern(file_t *f, const char *str) c1 = c2; c2 = tmp; } - str = next_char(f, str); - pat_t *pat = new_pat(f, start, str, 1, 1, BP_RANGE); + str = next_char(f, c2_loc); + pat_t *pat = new_pat(f, start == c1_loc - 1 ? start : c1_loc, str, 1, 1, BP_RANGE); pat->args.range.low = (unsigned char)c1; pat->args.range.high = (unsigned char)c2; all = either_pat(f, all, pat); } else { - size_t len = (size_t)(str - start - 1); - pat_t *pat = new_pat(f, start, str, len, (ssize_t)len, BP_STRING); + size_t len = (size_t)(str - c1_loc); + pat_t *pat = new_pat(f, c1_loc, str, len, (ssize_t)len, BP_STRING); pat->args.string = c1_loc; all = either_pat(f, all, pat); } |
