Updated '|' to mean word boundary and \N for nodent
This commit is contained in:
parent
8b50fc5d1d
commit
3753bc1be0
19
compiler.c
19
compiler.c
@ -162,6 +162,12 @@ vm_op_t *bpeg_simplepattern(file_t *f, const char *str)
|
|||||||
case '\\': {
|
case '\\': {
|
||||||
if (!*str || *str == '\n')
|
if (!*str || *str == '\n')
|
||||||
file_err(f, str, str, "There should be an escape sequence here after this backslash.");
|
file_err(f, str, str, "There should be an escape sequence here after this backslash.");
|
||||||
|
|
||||||
|
if (matchchar(&str, 'N')) { // \N (nodent)
|
||||||
|
op->op = VM_NODENT;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
op->len = 1;
|
op->len = 1;
|
||||||
unsigned char e = unescapechar(str, &str);
|
unsigned char e = unescapechar(str, &str);
|
||||||
if (*str == '-') { // Escape range (e.g. \x00-\xFF)
|
if (*str == '-') { // Escape range (e.g. \x00-\xFF)
|
||||||
@ -397,7 +403,7 @@ vm_op_t *bpeg_simplepattern(file_t *f, const char *str)
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
// Special rules:
|
// Special rules:
|
||||||
case '_': case '^': case '$': {
|
case '_': case '^': case '$': case '|': {
|
||||||
if (matchchar(&str, c)) { // double __, ^^, $$
|
if (matchchar(&str, c)) { // double __, ^^, $$
|
||||||
char tmp[3] = {c, c, '\0'};
|
char tmp[3] = {c, c, '\0'};
|
||||||
op->args.s = strdup(tmp);
|
op->args.s = strdup(tmp);
|
||||||
@ -415,10 +421,6 @@ vm_op_t *bpeg_simplepattern(file_t *f, const char *str)
|
|||||||
op->op = VM_REF;
|
op->op = VM_REF;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case '|': {
|
|
||||||
op->op = VM_NODENT;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
default: {
|
default: {
|
||||||
// Reference
|
// Reference
|
||||||
if (isalpha(c)) {
|
if (isalpha(c)) {
|
||||||
@ -488,6 +490,13 @@ vm_op_t *bpeg_stringpattern(file_t *f, const char *str)
|
|||||||
if (*str == '\\') {
|
if (*str == '\\') {
|
||||||
if (!str[1] || str[1] == '\n')
|
if (!str[1] || str[1] == '\n')
|
||||||
file_err(f, str, str, "There should be an escape sequence or pattern here after this backslash.");
|
file_err(f, str, str, "There should be an escape sequence or pattern here after this backslash.");
|
||||||
|
|
||||||
|
if (matchchar(&str, 'N')) { // \N (nodent)
|
||||||
|
interp = calloc(sizeof(vm_op_t), 1);
|
||||||
|
interp->op = VM_NODENT;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
const char *after_escape;
|
const char *after_escape;
|
||||||
unsigned char e = unescapechar(&str[1], &after_escape);
|
unsigned char e = unescapechar(&str[1], &after_escape);
|
||||||
if (e != str[1]) {
|
if (e != str[1]) {
|
||||||
|
@ -39,7 +39,7 @@ vm_op_t *load_grammar(grammar_t *g, file_t *f)
|
|||||||
while (*src) {
|
while (*src) {
|
||||||
const char *name = src;
|
const char *name = src;
|
||||||
const char *name_end = after_name(name);
|
const char *name_end = after_name(name);
|
||||||
check(name_end > name, "Invalid name for definition");
|
check(name_end > name, "Invalid name for definition: %s", name);
|
||||||
name = strndup(name, (size_t)(name_end-name));
|
name = strndup(name, (size_t)(name_end-name));
|
||||||
src = after_spaces(name_end);
|
src = after_spaces(name_end);
|
||||||
check(matchchar(&src, ':'), "Expected ':' in definition");
|
check(matchchar(&src, ':'), "Expected ':' in definition");
|
||||||
|
@ -7,11 +7,12 @@ Def: @name=id _ `: __ (
|
|||||||
/ @!={...>(`;/id_`:/$) => "Invalid definition: @0"})
|
/ @!={...>(`;/id_`:/$) => "Invalid definition: @0"})
|
||||||
|
|
||||||
# This is used for command line arguments:
|
# This is used for command line arguments:
|
||||||
String-pattern: ... % (`\ (escape-sequence / pat [`;]))
|
String-pattern: ... % (Nodent / Escape / `\ pat [`;])
|
||||||
|
|
||||||
pat: simple-pat !(__("!="/"==")) / suffixed-pat
|
pat: simple-pat !(__("!="/"==")) / suffixed-pat
|
||||||
simple-pat: Upto-and / Dot / String / Char-range / Char / Escape-range / Escape / No
|
simple-pat: Upto-and / Dot / String / Char-range / Char / Nodent / Escape-range
|
||||||
/ Nodent / Repeat / Optional / After / Before / Capture / Replace / Ref / parens
|
/ Escape / Repeat / Optional / After / Before / Capture / Replace
|
||||||
|
/ Ref / parens
|
||||||
|
|
||||||
suffixed-pat: (
|
suffixed-pat: (
|
||||||
Eq-pat
|
Eq-pat
|
||||||
@ -39,7 +40,7 @@ escape-sequence: (
|
|||||||
/ `x 2 (`0-9/`a-f/`A-F)
|
/ `x 2 (`0-9/`a-f/`A-F)
|
||||||
)
|
)
|
||||||
No: `! (_@pat / @!={=>"Expected a pattern after the exclamation mark"})
|
No: `! (_@pat / @!={=>"Expected a pattern after the exclamation mark"})
|
||||||
Nodent: `|
|
Nodent: `\ `N
|
||||||
Upto-and: 2-3`. [_@first=simple-pat] [__`%__@second=simple-pat]
|
Upto-and: 2-3`. [_@first=simple-pat] [__`%__@second=simple-pat]
|
||||||
Repeat: (
|
Repeat: (
|
||||||
@min='' `* @max=''
|
@min='' `* @max=''
|
||||||
@ -70,7 +71,7 @@ $: !.
|
|||||||
^^: !<$.
|
^^: !<$.
|
||||||
^: !<.
|
^: !<.
|
||||||
|
|
||||||
id: "^^" / "^" / "__" / "_" / "$$" / "$" / (`a-z/`A-Z) *(`a-z/`A-Z/`0-9/`-)
|
id: "^^" / "^" / "__" / "_" / "$$" / "$" / "|" / (`a-z/`A-Z) *(`a-z/`A-Z/`0-9/`-)
|
||||||
|
|
||||||
line-comment: `# .. $
|
line-comment: `# .. $
|
||||||
block-comment: "#("..")#" % block-comment
|
block-comment: "#("..")#" % block-comment
|
||||||
|
1
utils.c
1
utils.c
@ -44,6 +44,7 @@ const char *after_spaces(const char *str)
|
|||||||
*/
|
*/
|
||||||
const char *after_name(const char *str)
|
const char *after_name(const char *str)
|
||||||
{
|
{
|
||||||
|
if (*str == '|') return &str[1];
|
||||||
if (*str == '^' || *str == '_' || *str == '$') {
|
if (*str == '^' || *str == '_' || *str == '$') {
|
||||||
return (str[1] == *str) ? &str[2] : &str[1];
|
return (str[1] == *str) ? &str[2] : &str[1];
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user