Added escape ranges

This commit is contained in:
Bruce Hill 2020-09-10 02:14:47 -07:00
parent 8126489f81
commit ace2f9f178
2 changed files with 20 additions and 8 deletions

View File

@ -7,7 +7,7 @@ def = @[name]ref __ `= __ @[definition]extendedPat;
# This is used for command line arguments:
stringGrammar = *(`\ pat ?`; / .);
pat = empty / dot / string / charRange / char / escape / no / anythingBut
pat = empty / dot / string / charRange / char / escapeRange / escape / no / anythingBut
/ uptoAnd / repeat / after / before / capture / replace / ref / parens;
empty = `/ >(__ (`}/`}));
@ -18,7 +18,9 @@ string = (
);
charRange = `` @[low]. `- @[high].;
char = `` @[s].;
escape = `\ @[s](
escapeRange = `\ @[low]escapeSequence `- @[high]escapeSequence;
escape = `\ @[s]escapeSequence;
escapeSequence = (
1-3 `0-7
/ `x 2 (`0-9/`a-f/`A-F)
/`a/`b/`e/`n/`r/`t/`v / . / \n

22
bpeg.c
View File

@ -10,6 +10,7 @@
* `<c> character <c>
* `<a>-<z> character between <a> and <z>
* \<e> escape sequence (e.g. \n, \033)
* \<e1>-<e2> escape sequence range (e.g. \x00-\xF0)
* ! <pat> no <pat>
* ~ <pat> any character as long as it doesn't match <pat>
* & <pat> upto and including <pat> (aka *~<pat> <pat>)
@ -397,13 +398,11 @@ static vm_op_t *compile_bpeg(const char *source, const char *str)
visualize(source, str, "Char range");
char c2 = *str;
check(c2, "Expected character after '-'");
check(c2 >= literal[0], "Character range must be low-to-high");
op->op = VM_RANGE;
op->args.range.low = literal[0];
op->args.range.high = c2;
++str;
} else if (matchchar(&str, ',')) { // Set
// TODO: implement
check(0, "Sorry, character sets are not yet implemented!");
} else {
//debug("Char literal\n");
op->op = VM_STRING;
@ -416,10 +415,21 @@ static vm_op_t *compile_bpeg(const char *source, const char *str)
//debug("Escape sequence\n");
visualize(source, str, "Escape sequence");
check(*str, "Expected escape after '\\'");
op->op = VM_STRING;
op->len = 1;
char literal[2] = {unescapechar(str, &str), '\0'};
op->args.s = strdup(literal);
char e = unescapechar(str, &str);
if (*str == '-') { // Escape range (e.g. \x00-\xFF)
++str;
char e2 = unescapechar(str, &str);
check(e2, "Expected character after '-'");
check(e2 >= e, "Character range must be low-to-high");
op->op = VM_RANGE;
op->args.range.low = e;
op->args.range.high = e2;
} else {
char literal[2] = {e, '\0'};
op->op = VM_STRING;
op->args.s = strdup(literal);
}
break;
}
// String literal