2021-01-12 21:04:43 -08:00
//
2021-01-15 19:32:37 -08:00
// pattern.h - Header file for BP pattern compilation.
2021-01-12 21:04:43 -08:00
//
2022-11-07 19:54:59 -08:00
# pragma once
2020-09-11 01:28:06 -07:00
2024-02-11 13:46:07 -08:00
# include <printf.h>
2021-09-23 15:15:48 -07:00
# include <stdbool.h>
2022-10-26 10:38:38 -07:00
# include <stdint.h>
2024-02-11 13:46:07 -08:00
# include <stdio.h>
2022-05-14 11:31:34 -07:00
# include <sys/types.h>
2023-05-06 10:43:32 -07:00
# include <err.h>
2021-09-23 15:15:48 -07:00
2023-05-06 10:43:32 -07:00
# ifndef auto
# define auto __auto_type
# endif
2021-09-23 15:15:48 -07:00
2023-05-06 10:43:32 -07:00
# define UNBOUNDED(pat) ((pat)->max_matchlen == -1)
# define Match(x, _tag) ((x)->type == _tag ? &(x)->__tagged._tag : (errx(1, __FILE__ ":%d This was supposed to be a " # _tag "\n", __LINE__), &(x)->__tagged._tag))
# define Pattern(_tag, _start, _end, _min, _max, ...) allocate_pat((pat_t){.type=_tag, .start=_start, .end=_end, \
. min_matchlen = _min , . max_matchlen = _max , . __tagged . _tag = { __VA_ARGS__ } } )
2021-09-23 15:15:48 -07:00
// BP virtual machine pattern types
enum pattype_e {
2023-05-06 10:43:32 -07:00
BP_ERROR = 0 ,
2021-09-23 15:15:48 -07:00
BP_ANYCHAR = 1 ,
BP_ID_START = 2 ,
BP_ID_CONTINUE = 3 ,
BP_STRING = 4 ,
BP_RANGE = 5 ,
BP_NOT = 6 ,
BP_UPTO = 7 ,
BP_UPTO_STRICT = 8 ,
BP_REPEAT = 9 ,
BP_BEFORE = 10 ,
BP_AFTER = 11 ,
BP_CAPTURE = 12 ,
BP_OTHERWISE = 13 ,
BP_CHAIN = 14 ,
BP_MATCH = 15 ,
BP_NOT_MATCH = 16 ,
BP_REPLACE = 17 ,
BP_REF = 18 ,
BP_NODENT = 19 ,
2022-05-02 14:25:18 -07:00
BP_CURDENT = 20 ,
BP_START_OF_FILE = 21 ,
BP_START_OF_LINE = 22 ,
BP_END_OF_FILE = 23 ,
BP_END_OF_LINE = 24 ,
BP_WORD_BOUNDARY = 25 ,
BP_DEFINITIONS = 26 ,
BP_TAGGED = 27 ,
BP_LEFTRECURSION = 28 ,
2021-09-23 15:15:48 -07:00
} ;
//
// A struct reperesenting a BP virtual machine operation
//
typedef struct pat_s {
2021-09-26 13:12:02 -07:00
struct pat_s * next , * * home ;
2021-09-23 15:15:48 -07:00
enum pattype_e type ;
2022-10-26 10:38:38 -07:00
uint32_t id ;
2021-09-23 15:15:48 -07:00
const char * start , * end ;
// The bounds of the match length (used for backtracking)
2022-10-26 10:38:38 -07:00
uint32_t min_matchlen ;
int32_t max_matchlen ; // -1 means unbounded length
2021-09-23 15:15:48 -07:00
union {
struct {
2023-05-06 10:43:32 -07:00
const char * start , * end , * msg ;
} BP_ERROR ;
struct { } BP_ANYCHAR ;
struct { } BP_ID_START ;
struct { } BP_ID_CONTINUE ;
2024-02-11 13:46:07 -08:00
struct { const char * string ; size_t len ; } BP_STRING ;
2023-05-06 10:43:32 -07:00
struct { unsigned char low , high ; } BP_RANGE ;
struct { struct pat_s * pat ; } BP_NOT ;
struct { struct pat_s * target , * skip ; } BP_UPTO ;
struct { struct pat_s * target , * skip ; } BP_UPTO_STRICT ;
2021-09-23 15:15:48 -07:00
struct {
2022-10-26 10:38:38 -07:00
uint32_t min ;
int32_t max ;
2021-09-23 15:15:48 -07:00
struct pat_s * sep , * repeat_pat ;
2023-05-06 10:43:32 -07:00
} BP_REPEAT ;
struct { struct pat_s * pat ; } BP_BEFORE ;
struct { struct pat_s * pat ; } BP_AFTER ;
struct {
struct pat_s * pat ;
const char * name ;
uint16_t namelen ;
bool backreffable ;
} BP_CAPTURE ;
struct {
struct pat_s * first , * second ;
} BP_OTHERWISE ;
2021-09-23 15:15:48 -07:00
struct {
struct pat_s * first , * second ;
2023-05-06 10:43:32 -07:00
} BP_CHAIN ;
struct { struct pat_s * pat , * must_match ; } BP_MATCH ;
struct { struct pat_s * pat , * must_not_match ; } BP_NOT_MATCH ;
2021-09-23 15:15:48 -07:00
struct {
struct pat_s * pat ;
const char * text ;
2022-10-26 10:38:38 -07:00
uint32_t len ;
2023-05-06 10:43:32 -07:00
} BP_REPLACE ;
2021-09-23 15:15:48 -07:00
struct {
2023-05-06 10:43:32 -07:00
const char * name ;
uint32_t len ;
} BP_REF ;
struct { } BP_NODENT ;
struct { } BP_CURDENT ;
struct { } BP_START_OF_FILE ;
struct { } BP_START_OF_LINE ;
struct { } BP_END_OF_FILE ;
struct { } BP_END_OF_LINE ;
struct { } BP_WORD_BOUNDARY ;
struct {
const char * name ;
uint32_t namelen ;
struct pat_s * meaning , * next_def ;
} BP_DEFINITIONS ;
struct {
struct pat_s * pat ;
2021-09-23 15:15:48 -07:00
const char * name ;
2022-10-26 10:38:38 -07:00
uint16_t namelen ;
2022-05-12 09:11:28 -07:00
bool backreffable ;
2023-05-06 10:43:32 -07:00
} BP_TAGGED ;
2021-09-23 15:15:48 -07:00
struct {
2023-05-06 10:43:32 -07:00
struct match_s * match ;
const char * at ;
struct pat_s * fallback ;
void * ctx ;
bool visited ;
} BP_LEFTRECURSION ;
} __tagged ;
2021-09-23 15:15:48 -07:00
} pat_t ;
2020-09-11 01:28:06 -07:00
2022-10-26 10:38:38 -07:00
typedef struct leftrec_info_s {
struct match_s * match ;
const char * at ;
struct pat_s * fallback ;
void * ctx ;
bool visited ;
} leftrec_info_t ;
2021-09-23 14:38:46 -07:00
typedef struct {
bool success ;
union {
pat_t * pat ;
struct {
const char * start , * end , * msg ;
} error ;
} value ;
} maybe_pat_t ;
2023-05-06 10:43:32 -07:00
__attribute__ ( ( returns_nonnull ) )
pat_t * allocate_pat ( pat_t pat ) ;
2021-09-23 14:55:30 -07:00
__attribute__ ( ( nonnull , returns_nonnull ) )
2021-09-23 15:24:08 -07:00
pat_t * bp_raw_literal ( const char * str , size_t len ) ;
2022-04-21 09:54:09 -07:00
__attribute__ ( ( nonnull ( 1 ) ) )
2021-09-23 15:40:45 -07:00
maybe_pat_t bp_stringpattern ( const char * str , const char * end ) ;
2022-04-21 09:54:09 -07:00
__attribute__ ( ( nonnull ( 1 , 2 ) ) )
2021-09-23 15:40:45 -07:00
maybe_pat_t bp_replacement ( pat_t * replacepat , const char * replacement , const char * end ) ;
2021-09-23 15:24:08 -07:00
pat_t * chain_together ( pat_t * first , pat_t * second ) ;
pat_t * either_pat ( pat_t * first , pat_t * second ) ;
2022-04-21 09:54:09 -07:00
__attribute__ ( ( nonnull ( 1 ) ) )
2021-09-23 15:40:45 -07:00
maybe_pat_t bp_pattern ( const char * str , const char * end ) ;
2021-09-26 13:12:02 -07:00
void free_all_pats ( void ) ;
__attribute__ ( ( nonnull ) )
void delete_pat ( pat_t * * at_pat , bool recursive ) ;
2024-02-11 13:46:07 -08:00
int set_pattern_printf_specifier ( char specifier ) ;
2021-09-23 15:15:48 -07:00
2021-08-28 16:05:30 -07:00
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0