From ace2f9f178dd25bee4d9de661050c7ca6a9187e6 Mon Sep 17 00:00:00 2001 From: Bruce Hill Date: Thu, 10 Sep 2020 02:14:47 -0700 Subject: Added escape ranges --- bpeg.bpeg | 6 ++++-- bpeg.c | 22 ++++++++++++++++------ 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/bpeg.bpeg b/bpeg.bpeg index b90f7d0..ef08239 100644 --- a/bpeg.bpeg +++ b/bpeg.bpeg @@ -7,7 +7,7 @@ def = @[name]ref __ `= __ @[definition]extendedPat; # This is used for command line arguments: stringGrammar = *(`\ pat ?`; / .); -pat = empty / dot / string / charRange / char / escape / no / anythingBut +pat = empty / dot / string / charRange / char / escapeRange / escape / no / anythingBut / uptoAnd / repeat / after / before / capture / replace / ref / parens; empty = `/ >(__ (`}/`})); @@ -18,7 +18,9 @@ string = ( ); charRange = `` @[low]. `- @[high].; char = `` @[s].; -escape = `\ @[s]( +escapeRange = `\ @[low]escapeSequence `- @[high]escapeSequence; +escape = `\ @[s]escapeSequence; +escapeSequence = ( 1-3 `0-7 / `x 2 (`0-9/`a-f/`A-F) /`a/`b/`e/`n/`r/`t/`v / . / \n diff --git a/bpeg.c b/bpeg.c index 7dc0a6f..101f85c 100644 --- a/bpeg.c +++ b/bpeg.c @@ -10,6 +10,7 @@ * ` character * `- character between and * \ escape sequence (e.g. \n, \033) + * \- escape sequence range (e.g. \x00-\xF0) * ! no * ~ any character as long as it doesn't match * & upto and including (aka *~ ) @@ -397,13 +398,11 @@ static vm_op_t *compile_bpeg(const char *source, const char *str) visualize(source, str, "Char range"); char c2 = *str; check(c2, "Expected character after '-'"); + check(c2 >= literal[0], "Character range must be low-to-high"); op->op = VM_RANGE; op->args.range.low = literal[0]; op->args.range.high = c2; ++str; - } else if (matchchar(&str, ',')) { // Set - // TODO: implement - check(0, "Sorry, character sets are not yet implemented!"); } else { //debug("Char literal\n"); op->op = VM_STRING; @@ -416,10 +415,21 @@ static vm_op_t *compile_bpeg(const char *source, const char *str) //debug("Escape sequence\n"); visualize(source, str, "Escape sequence"); check(*str, "Expected escape after '\\'"); - op->op = VM_STRING; op->len = 1; - char literal[2] = {unescapechar(str, &str), '\0'}; - op->args.s = strdup(literal); + char e = unescapechar(str, &str); + if (*str == '-') { // Escape range (e.g. \x00-\xFF) + ++str; + char e2 = unescapechar(str, &str); + check(e2, "Expected character after '-'"); + check(e2 >= e, "Character range must be low-to-high"); + op->op = VM_RANGE; + op->args.range.low = e; + op->args.range.high = e2; + } else { + char literal[2] = {e, '\0'}; + op->op = VM_STRING; + op->args.s = strdup(literal); + } break; } // String literal -- cgit v1.2.3