Fixes for char sets/ranges and cleanups
This commit is contained in:
parent
cd30e89042
commit
ec1a3e0835
@ -5,7 +5,7 @@
|
||||
# The grammar files provided with BP are not otherwise intended to be full
|
||||
# language grammars.
|
||||
|
||||
Grammar: __ *(Def [__`;])%__ __ ($$ / (!)(..%\n$$ => "Could not parse this code"))
|
||||
Grammar: __ *(Def [__`;])%__ __ [(!)(+(./\n) => "Could not parse this code")]
|
||||
Def: @name=id __ `: __ (
|
||||
@definition=extended-pat
|
||||
/ $$ (!)=>"No definition for rule"
|
||||
|
@ -17,8 +17,7 @@ braces: `{ ..%(\n/braces/string) `}
|
||||
parens: `( ..%(\n/parens/string) `)
|
||||
string: `" ..%string-escape `" / `' ..%string-escape `'
|
||||
string-escape: `\ (`x 2 Hex / 1-3 `0-7 / `u 1-4 Hex / .)
|
||||
left-id-edge: ^ / <(\x00-x7f!~(^^id-char)) / <((\xc0-xdf \x80-xbf)!~(^^id-char))
|
||||
/ <((\xe0-xef 2\x80-xbf)!~(^^id-char)) / <((\xf0-xf7 3\x80-xbf)!~(^^id-char))
|
||||
left-id-edge: !<id-char
|
||||
right-id-edge: !id-char
|
||||
id: left-id-edge !`0-9 !(keyword left-id-edge) +id-char
|
||||
id-char: `a-z,A-Z,_,0-9
|
||||
|
17
pattern.c
17
pattern.c
@ -146,6 +146,7 @@ pat_t *chain_together(file_t *f, pat_t *first, pat_t *second)
|
||||
if (p->type == BP_UPTO) {
|
||||
p->args.multiple.first = second;
|
||||
p->min_matchlen = second->min_matchlen;
|
||||
p->max_matchlen = -1;
|
||||
break;
|
||||
} else if (p->type == BP_CAPTURE) {
|
||||
p = p->args.capture.capture_pat;
|
||||
@ -250,10 +251,10 @@ static pat_t *_bp_simplepattern(file_t *f, const char *str)
|
||||
const char *c1_loc = str;
|
||||
str = next_char(f, c1_loc);
|
||||
if (matchchar(&str, '-')) { // Range
|
||||
if (&str[-1] - c1_loc > 1 || next_char(f, str) > str+1)
|
||||
file_err(f, start, next_char(f, str), "Sorry, UTF-8 character ranges are not yet supported.");
|
||||
char c1 = *c1_loc;
|
||||
char c2 = *str;
|
||||
const char *c2_loc = str;
|
||||
if (next_char(f, c1_loc) > c1_loc+1 || next_char(f, c2_loc) > c2_loc+1)
|
||||
file_err(f, start, next_char(f, c2_loc), "Sorry, UTF-8 character ranges are not yet supported.");
|
||||
char c1 = *c1_loc, c2 = *c2_loc;
|
||||
if (!c2 || c2 == '\n')
|
||||
file_err(f, str, str, "There should be a character here to complete the character range.");
|
||||
if (c1 > c2) { // Swap order
|
||||
@ -261,14 +262,14 @@ static pat_t *_bp_simplepattern(file_t *f, const char *str)
|
||||
c1 = c2;
|
||||
c2 = tmp;
|
||||
}
|
||||
str = next_char(f, str);
|
||||
pat_t *pat = new_pat(f, start, str, 1, 1, BP_RANGE);
|
||||
str = next_char(f, c2_loc);
|
||||
pat_t *pat = new_pat(f, start == c1_loc - 1 ? start : c1_loc, str, 1, 1, BP_RANGE);
|
||||
pat->args.range.low = (unsigned char)c1;
|
||||
pat->args.range.high = (unsigned char)c2;
|
||||
all = either_pat(f, all, pat);
|
||||
} else {
|
||||
size_t len = (size_t)(str - start - 1);
|
||||
pat_t *pat = new_pat(f, start, str, len, (ssize_t)len, BP_STRING);
|
||||
size_t len = (size_t)(str - c1_loc);
|
||||
pat_t *pat = new_pat(f, c1_loc, str, len, (ssize_t)len, BP_STRING);
|
||||
pat->args.string = c1_loc;
|
||||
all = either_pat(f, all, pat);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user