diff options
| -rw-r--r-- | bpeg.bpeg | 6 | ||||
| -rw-r--r-- | bpeg.c | 22 |
2 files changed, 20 insertions, 8 deletions
@@ -7,7 +7,7 @@ def = @[name]ref __ `= __ @[definition]extendedPat; # This is used for command line arguments: stringGrammar = *(`\ pat ?`; / .); -pat = empty / dot / string / charRange / char / escape / no / anythingBut +pat = empty / dot / string / charRange / char / escapeRange / escape / no / anythingBut / uptoAnd / repeat / after / before / capture / replace / ref / parens; empty = `/ >(__ (`}/`})); @@ -18,7 +18,9 @@ string = ( ); charRange = `` @[low]. `- @[high].; char = `` @[s].; -escape = `\ @[s]( +escapeRange = `\ @[low]escapeSequence `- @[high]escapeSequence; +escape = `\ @[s]escapeSequence; +escapeSequence = ( 1-3 `0-7 / `x 2 (`0-9/`a-f/`A-F) /`a/`b/`e/`n/`r/`t/`v / . / \n @@ -10,6 +10,7 @@ * `<c> character <c> * `<a>-<z> character between <a> and <z> * \<e> escape sequence (e.g. \n, \033) + * \<e1>-<e2> escape sequence range (e.g. \x00-\xF0) * ! <pat> no <pat> * ~ <pat> any character as long as it doesn't match <pat> * & <pat> upto and including <pat> (aka *~<pat> <pat>) @@ -397,13 +398,11 @@ static vm_op_t *compile_bpeg(const char *source, const char *str) visualize(source, str, "Char range"); char c2 = *str; check(c2, "Expected character after '-'"); + check(c2 >= literal[0], "Character range must be low-to-high"); op->op = VM_RANGE; op->args.range.low = literal[0]; op->args.range.high = c2; ++str; - } else if (matchchar(&str, ',')) { // Set - // TODO: implement - check(0, "Sorry, character sets are not yet implemented!"); } else { //debug("Char literal\n"); op->op = VM_STRING; @@ -416,10 +415,21 @@ static vm_op_t *compile_bpeg(const char *source, const char *str) //debug("Escape sequence\n"); visualize(source, str, "Escape sequence"); check(*str, "Expected escape after '\\'"); - op->op = VM_STRING; op->len = 1; - char literal[2] = {unescapechar(str, &str), '\0'}; - op->args.s = strdup(literal); + char e = unescapechar(str, &str); + if (*str == '-') { // Escape range (e.g. \x00-\xFF) + ++str; + char e2 = unescapechar(str, &str); + check(e2, "Expected character after '-'"); + check(e2 >= e, "Character range must be low-to-high"); + op->op = VM_RANGE; + op->args.range.low = e; + op->args.range.high = e2; + } else { + char literal[2] = {e, '\0'}; + op->op = VM_STRING; + op->args.s = strdup(literal); + } break; } // String literal |
