aboutsummaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorBruce Hill <bruce@bruce-hill.com>2025-08-23 19:28:08 -0400
committerBruce Hill <bruce@bruce-hill.com>2025-08-23 19:28:08 -0400
commitfcda36561d668f43bac91ea31cd55cbbd605d330 (patch)
treeeb74c0b17df584af0fd8154422ad924e04c96cc2 /lib
parent414b0c7472c87c5a013029aefef49e2dbc41e700 (diff)
Autoformat everything with clang-format
Diffstat (limited to 'lib')
-rw-r--r--lib/commands/commands.c107
-rw-r--r--lib/patterns/match_type.h1
-rw-r--r--lib/patterns/patterns.c715
-rw-r--r--lib/random/chacha.h304
-rw-r--r--lib/random/sysrandom.h2
-rw-r--r--lib/time/time_defs.h31
6 files changed, 519 insertions, 641 deletions
diff --git a/lib/commands/commands.c b/lib/commands/commands.c
index 80710387..4a6272b9 100644
--- a/lib/commands/commands.c
+++ b/lib/commands/commands.c
@@ -18,16 +18,14 @@
#define WRITE_END 1
static void xpipe(int fd[2]) {
- if (pipe(fd) != 0)
- fail("Failed to create pipe: ", strerror(errno));
+ if (pipe(fd) != 0) fail("Failed to create pipe: ", strerror(errno));
}
-int run_command(Text_t exe, List_t arg_list, Table_t env_table,
- OptionalList_t input_bytes, List_t *output_bytes, List_t *error_bytes)
-{
+int run_command(Text_t exe, List_t arg_list, Table_t env_table, OptionalList_t input_bytes, List_t *output_bytes,
+ List_t *error_bytes) {
pthread_testcancel();
- struct sigaction sa = { .sa_handler = SIG_IGN }, oldint, oldquit;
+ struct sigaction sa = {.sa_handler = SIG_IGN}, oldint, oldquit;
sigaction(SIGINT, &sa, &oldint);
sigaction(SIGQUIT, &sa, &oldquit);
sigaddset(&sa.sa_mask, SIGCHLD);
@@ -40,7 +38,7 @@ int run_command(Text_t exe, List_t arg_list, Table_t env_table,
posix_spawnattr_init(&attr);
posix_spawnattr_setsigmask(&attr, &old);
posix_spawnattr_setsigdefault(&attr, &reset);
- posix_spawnattr_setflags(&attr, POSIX_SPAWN_SETSIGDEF|POSIX_SPAWN_SETSIGMASK);
+ posix_spawnattr_setflags(&attr, POSIX_SPAWN_SETSIGDEF | POSIX_SPAWN_SETSIGMASK);
int child_inpipe[2], child_outpipe[2], child_errpipe[2];
if (input_bytes.length >= 0) xpipe(child_inpipe);
@@ -65,10 +63,11 @@ int run_command(Text_t exe, List_t arg_list, Table_t env_table,
const char *exe_str = Text$as_c_string(exe);
List_t arg_strs = {};
- List$insert_value(&arg_strs, exe_str, I(0), sizeof(char*));
+ List$insert_value(&arg_strs, exe_str, I(0), sizeof(char *));
for (int64_t i = 0; i < arg_list.length; i++)
- List$insert_value(&arg_strs, Text$as_c_string(*(Text_t*)(arg_list.data + i*arg_list.stride)), I(0), sizeof(char*));
- List$insert_value(&arg_strs, NULL, I(0), sizeof(char*));
+ List$insert_value(&arg_strs, Text$as_c_string(*(Text_t *)(arg_list.data + i * arg_list.stride)), I(0),
+ sizeof(char *));
+ List$insert_value(&arg_strs, NULL, I(0), sizeof(char *));
char **args = arg_strs.data;
extern char **environ;
@@ -76,24 +75,24 @@ int run_command(Text_t exe, List_t arg_list, Table_t env_table,
if (env_table.entries.length > 0) {
List_t env_list = {}; // List of const char*
for (char **e = environ; *e; e++)
- List$insert(&env_list, e, I(0), sizeof(char*));
+ List$insert(&env_list, e, I(0), sizeof(char *));
for (int64_t i = 0; i < env_table.entries.length; i++) {
- struct { Text_t key, value; } *entry = env_table.entries.data + env_table.entries.stride*i;
+ struct {
+ Text_t key, value;
+ } *entry = env_table.entries.data + env_table.entries.stride * i;
const char *env_entry = String(entry->key, "=", entry->value);
- List$insert(&env_list, &env_entry, I(0), sizeof(char*));
+ List$insert(&env_list, &env_entry, I(0), sizeof(char *));
}
- List$insert_value(&env_list, NULL, I(0), sizeof(char*));
- assert(env_list.stride == sizeof(char*));
+ List$insert_value(&env_list, NULL, I(0), sizeof(char *));
+ assert(env_list.stride == sizeof(char *));
env = env_list.data;
}
pid_t pid;
- int ret = exe_str[0] == '/' ?
- posix_spawn(&pid, exe_str, &actions, &attr, args, env)
- : posix_spawnp(&pid, exe_str, &actions, &attr, args, env);
- if (ret != 0)
- return -1;
+ int ret = exe_str[0] == '/' ? posix_spawn(&pid, exe_str, &actions, &attr, args, env)
+ : posix_spawnp(&pid, exe_str, &actions, &attr, args, env);
+ if (ret != 0) return -1;
posix_spawnattr_destroy(&attr);
posix_spawn_file_actions_destroy(&actions);
@@ -103,19 +102,16 @@ int run_command(Text_t exe, List_t arg_list, Table_t env_table,
if (error_bytes) close(child_errpipe[WRITE_END]);
struct pollfd pollfds[3] = {};
- if (input_bytes.length >= 0) pollfds[0] = (struct pollfd){.fd=child_inpipe[WRITE_END], .events=POLLOUT};
- if (output_bytes) pollfds[1] = (struct pollfd){.fd=child_outpipe[WRITE_END], .events=POLLIN};
- if (error_bytes) pollfds[2] = (struct pollfd){.fd=child_errpipe[WRITE_END], .events=POLLIN};
+ if (input_bytes.length >= 0) pollfds[0] = (struct pollfd){.fd = child_inpipe[WRITE_END], .events = POLLOUT};
+ if (output_bytes) pollfds[1] = (struct pollfd){.fd = child_outpipe[WRITE_END], .events = POLLIN};
+ if (error_bytes) pollfds[2] = (struct pollfd){.fd = child_errpipe[WRITE_END], .events = POLLIN};
- if (input_bytes.length > 0 && input_bytes.stride != 1)
- List$compact(&input_bytes, sizeof(char));
- if (output_bytes)
- *output_bytes = (List_t){.atomic=1, .stride=1, .length=0};
- if (error_bytes)
- *error_bytes = (List_t){.atomic=1, .stride=1, .length=0};
+ if (input_bytes.length > 0 && input_bytes.stride != 1) List$compact(&input_bytes, sizeof(char));
+ if (output_bytes) *output_bytes = (List_t){.atomic = 1, .stride = 1, .length = 0};
+ if (error_bytes) *error_bytes = (List_t){.atomic = 1, .stride = 1, .length = 0};
while (input_bytes.length > 0 || output_bytes || error_bytes) {
- (void)poll(pollfds, sizeof(pollfds)/sizeof(pollfds[0]), -1); // Wait for data or readiness
+ (void)poll(pollfds, sizeof(pollfds) / sizeof(pollfds[0]), -1); // Wait for data or readiness
bool did_something = false;
if (input_bytes.length >= 0 && pollfds[0].revents) {
if (input_bytes.length > 0) {
@@ -171,10 +167,8 @@ int run_command(Text_t exe, List_t arg_list, Table_t env_table,
int status = 0;
if (ret == 0) {
while (waitpid(pid, &status, 0) < 0 && errno == EINTR) {
- if (WIFEXITED(status) || WIFSIGNALED(status))
- break;
- else if (WIFSTOPPED(status))
- kill(pid, SIGCONT);
+ if (WIFEXITED(status) || WIFSIGNALED(status)) break;
+ else if (WIFSTOPPED(status)) kill(pid, SIGCONT);
}
}
@@ -195,8 +189,7 @@ typedef struct {
FILE *out;
} child_info_t;
-static void _line_reader_cleanup(child_info_t *child)
-{
+static void _line_reader_cleanup(child_info_t *child) {
if (child && child->out) {
fclose(child->out);
child->out = NULL;
@@ -207,8 +200,7 @@ static void _line_reader_cleanup(child_info_t *child)
}
}
-static Text_t _next_line(child_info_t *child)
-{
+static Text_t _next_line(child_info_t *child) {
if (!child || !child->out) return NONE_TEXT;
char *line = NULL;
@@ -219,19 +211,17 @@ static Text_t _next_line(child_info_t *child)
return NONE_TEXT;
}
- while (len > 0 && (line[len-1] == '\r' || line[len-1] == '\n'))
+ while (len > 0 && (line[len - 1] == '\r' || line[len - 1] == '\n'))
--len;
- if (u8_check((uint8_t*)line, (size_t)len) != NULL)
- fail("Invalid UTF8!");
+ if (u8_check((uint8_t *)line, (size_t)len) != NULL) fail("Invalid UTF8!");
Text_t line_text = Text$from_strn(line, len);
free(line);
return line_text;
}
-OptionalClosure_t command_by_line(Text_t exe, List_t arg_list, Table_t env_table)
-{
+OptionalClosure_t command_by_line(Text_t exe, List_t arg_list, Table_t env_table) {
posix_spawnattr_t attr;
posix_spawnattr_init(&attr);
@@ -246,10 +236,11 @@ OptionalClosure_t command_by_line(Text_t exe, List_t arg_list, Table_t env_table
const char *exe_str = Text$as_c_string(exe);
List_t arg_strs = {};
- List$insert_value(&arg_strs, exe_str, I(0), sizeof(char*));
+ List$insert_value(&arg_strs, exe_str, I(0), sizeof(char *));
for (int64_t i = 0; i < arg_list.length; i++)
- List$insert_value(&arg_strs, Text$as_c_string(*(Text_t*)(arg_list.data + i*arg_list.stride)), I(0), sizeof(char*));
- List$insert_value(&arg_strs, NULL, I(0), sizeof(char*));
+ List$insert_value(&arg_strs, Text$as_c_string(*(Text_t *)(arg_list.data + i * arg_list.stride)), I(0),
+ sizeof(char *));
+ List$insert_value(&arg_strs, NULL, I(0), sizeof(char *));
char **args = arg_strs.data;
extern char **environ;
@@ -257,24 +248,24 @@ OptionalClosure_t command_by_line(Text_t exe, List_t arg_list, Table_t env_table
if (env_table.entries.length > 0) {
List_t env_list = {}; // List of const char*
for (char **e = environ; *e; e++)
- List$insert(&env_list, e, I(0), sizeof(char*));
+ List$insert(&env_list, e, I(0), sizeof(char *));
for (int64_t i = 0; i < env_table.entries.length; i++) {
- struct { Text_t key, value; } *entry = env_table.entries.data + env_table.entries.stride*i;
+ struct {
+ Text_t key, value;
+ } *entry = env_table.entries.data + env_table.entries.stride * i;
const char *env_entry = String(entry->key, "=", entry->value);
- List$insert(&env_list, &env_entry, I(0), sizeof(char*));
+ List$insert(&env_list, &env_entry, I(0), sizeof(char *));
}
- List$insert_value(&env_list, NULL, I(0), sizeof(char*));
- assert(env_list.stride == sizeof(char*));
+ List$insert_value(&env_list, NULL, I(0), sizeof(char *));
+ assert(env_list.stride == sizeof(char *));
env = env_list.data;
}
pid_t pid;
- int ret = exe_str[0] == '/' ?
- posix_spawn(&pid, exe_str, &actions, &attr, args, env)
- : posix_spawnp(&pid, exe_str, &actions, &attr, args, env);
- if (ret != 0)
- return NONE_CLOSURE;
+ int ret = exe_str[0] == '/' ? posix_spawn(&pid, exe_str, &actions, &attr, args, env)
+ : posix_spawnp(&pid, exe_str, &actions, &attr, args, env);
+ if (ret != 0) return NONE_CLOSURE;
posix_spawnattr_destroy(&attr);
posix_spawn_file_actions_destroy(&actions);
@@ -284,8 +275,8 @@ OptionalClosure_t command_by_line(Text_t exe, List_t arg_list, Table_t env_table
child_info_t *child_info = GC_MALLOC(sizeof(child_info_t));
child_info->out = fdopen(child_outpipe[READ_END], "r");
child_info->pid = pid;
- GC_register_finalizer(child_info, (void*)_line_reader_cleanup, NULL, NULL, NULL);
- return (Closure_t){.fn=(void*)_next_line, .userdata=child_info};
+ GC_register_finalizer(child_info, (void *)_line_reader_cleanup, NULL, NULL, NULL);
+ return (Closure_t){.fn = (void *)_next_line, .userdata = child_info};
}
#undef READ_END
diff --git a/lib/patterns/match_type.h b/lib/patterns/match_type.h
index abbc4fce..911d66fc 100644
--- a/lib/patterns/match_type.h
+++ b/lib/patterns/match_type.h
@@ -5,4 +5,3 @@ typedef struct {
Int_t index;
List_t captures;
} XMatch;
-
diff --git a/lib/patterns/patterns.c b/lib/patterns/patterns.c
index 224a00a0..0bdbbf58 100644
--- a/lib/patterns/patterns.c
+++ b/lib/patterns/patterns.c
@@ -20,10 +20,10 @@ typedef struct {
Text_t text;
Int_t index;
List_t captures;
- bool is_none:1;
+ bool is_none : 1;
} OptionalPatternMatch;
-#define NONE_MATCH ((OptionalPatternMatch){.is_none=true})
+#define NONE_MATCH ((OptionalPatternMatch){.is_none = true})
typedef struct {
int64_t index, length;
@@ -45,18 +45,15 @@ typedef struct {
static Text_t replace_list(Text_t text, List_t replacements, Text_t backref_pat, bool recursive);
-static INLINE void skip_whitespace(TextIter_t *state, int64_t *i)
-{
+static INLINE void skip_whitespace(TextIter_t *state, int64_t *i) {
while (*i < state->stack[0].text.length) {
int32_t grapheme = Text$get_grapheme_fast(state, *i);
- if (grapheme > 0 && !uc_is_property_white_space((ucs4_t)grapheme))
- return;
+ if (grapheme > 0 && !uc_is_property_white_space((ucs4_t)grapheme)) return;
*i += 1;
}
}
-static INLINE bool match_grapheme(TextIter_t *state, int64_t *i, int32_t grapheme)
-{
+static INLINE bool match_grapheme(TextIter_t *state, int64_t *i, int32_t grapheme) {
if (*i < state->stack[0].text.length && Text$get_grapheme_fast(state, *i) == grapheme) {
*i += 1;
return true;
@@ -64,8 +61,7 @@ static INLINE bool match_grapheme(TextIter_t *state, int64_t *i, int32_t graphem
return false;
}
-static INLINE bool match_str(TextIter_t *state, int64_t *i, const char *str)
-{
+static INLINE bool match_str(TextIter_t *state, int64_t *i, const char *str) {
int64_t matched = 0;
while (matched[str]) {
if (*i + matched >= state->stack[0].text.length || Text$get_grapheme_fast(state, *i + matched) != str[matched])
@@ -76,21 +72,19 @@ static INLINE bool match_str(TextIter_t *state, int64_t *i, const char *str)
return true;
}
-static int64_t parse_int(TextIter_t *state, int64_t *i)
-{
+static int64_t parse_int(TextIter_t *state, int64_t *i) {
int64_t value = 0;
for (;; *i += 1) {
uint32_t grapheme = Text$get_main_grapheme_fast(state, *i);
int digit = uc_digit_value(grapheme);
if (digit < 0) break;
- if (value >= INT64_MAX/10) break;
- value = 10*value + digit;
+ if (value >= INT64_MAX / 10) break;
+ value = 10 * value + digit;
}
return value;
}
-static const char *get_property_name(TextIter_t *state, int64_t *i)
-{
+static const char *get_property_name(TextIter_t *state, int64_t *i) {
skip_whitespace(state, i);
char *name = GC_MALLOC_ATOMIC(UNINAME_MAX);
char *dest = name;
@@ -99,8 +93,7 @@ static const char *get_property_name(TextIter_t *state, int64_t *i)
if (!(grapheme & ~0xFF) && (isalnum(grapheme) || grapheme == ' ' || grapheme == '_' || grapheme == '-')) {
*dest = (char)grapheme;
++dest;
- if (dest >= name + UNINAME_MAX - 1)
- break;
+ if (dest >= name + UNINAME_MAX - 1) break;
} else {
break;
}
@@ -115,28 +108,36 @@ static const char *get_property_name(TextIter_t *state, int64_t *i)
return name;
}
-#define EAT1(state, index, cond) ({\
- int32_t grapheme = Text$get_grapheme_fast(state, index); \
- bool success = (cond); \
- if (success) index += 1; \
- success; })
-
-#define EAT2(state, index, cond1, cond2) ({\
- int32_t grapheme = Text$get_grapheme_fast(state, index); \
- bool success = (cond1); \
- if (success) { \
- grapheme = Text$get_grapheme_fast(state, index + 1); \
- success = (cond2); \
- if (success) \
- index += 2; \
- } \
- success; })
-
-
-#define EAT_MANY(state, index, cond) ({ int64_t _n = 0; while (EAT1(state, index, cond)) { _n += 1; } _n; })
-
-static int64_t match_email(TextIter_t *state, int64_t index)
-{
+#define EAT1(state, index, cond) \
+ ({ \
+ int32_t grapheme = Text$get_grapheme_fast(state, index); \
+ bool success = (cond); \
+ if (success) index += 1; \
+ success; \
+ })
+
+#define EAT2(state, index, cond1, cond2) \
+ ({ \
+ int32_t grapheme = Text$get_grapheme_fast(state, index); \
+ bool success = (cond1); \
+ if (success) { \
+ grapheme = Text$get_grapheme_fast(state, index + 1); \
+ success = (cond2); \
+ if (success) index += 2; \
+ } \
+ success; \
+ })
+
+#define EAT_MANY(state, index, cond) \
+ ({ \
+ int64_t _n = 0; \
+ while (EAT1(state, index, cond)) { \
+ _n += 1; \
+ } \
+ _n; \
+ })
+
+static int64_t match_email(TextIter_t *state, int64_t index) {
// email = local "@" domain
// local = 1-64 ([a-zA-Z0-9!#$%&‘*+–/=?^_`.{|}~] | non-ascii)
// domain = dns-label ("." dns-label)*
@@ -144,8 +145,7 @@ static int64_t match_email(TextIter_t *state, int64_t index)
if (index > 0) {
uint32_t prev_codepoint = Text$get_main_grapheme_fast(state, index - 1);
- if (uc_is_property_alphabetic(prev_codepoint))
- return -1;
+ if (uc_is_property_alphabetic(prev_codepoint)) return -1;
}
int64_t start_index = index;
@@ -153,78 +153,65 @@ static int64_t match_email(TextIter_t *state, int64_t index)
// Local part:
int64_t local_len = 0;
static const char *allowed_local = "!#$%&‘*+–/=?^_`.{|}~";
- while (EAT1(state, index,
- (grapheme & ~0x7F) || isalnum((char)grapheme) || strchr(allowed_local, (char)grapheme))) {
+ while (EAT1(state, index, (grapheme & ~0x7F) || isalnum((char)grapheme) || strchr(allowed_local, (char)grapheme))) {
local_len += 1;
if (local_len > 64) return -1;
}
-
- if (!EAT1(state, index, grapheme == '@'))
- return -1;
+
+ if (!EAT1(state, index, grapheme == '@')) return -1;
// Host
int64_t host_len = 0;
do {
int64_t label_len = 0;
- while (EAT1(state, index,
- (grapheme & ~0x7F) || isalnum((char)grapheme) || grapheme == '-')) {
+ while (EAT1(state, index, (grapheme & ~0x7F) || isalnum((char)grapheme) || grapheme == '-')) {
label_len += 1;
if (label_len > 63) return -1;
}
- if (label_len == 0)
- return -1;
+ if (label_len == 0) return -1;
host_len += label_len;
- if (host_len > 255)
- return -1;
+ if (host_len > 255) return -1;
host_len += 1;
} while (EAT1(state, index, grapheme == '.'));
return index - start_index;
}
-static int64_t match_ipv6(TextIter_t *state, int64_t index)
-{
+static int64_t match_ipv6(TextIter_t *state, int64_t index) {
if (index > 0) {
int32_t prev_codepoint = Text$get_grapheme_fast(state, index - 1);
- if ((prev_codepoint & ~0x7F) && (isxdigit(prev_codepoint) || prev_codepoint == ':'))
- return -1;
+ if ((prev_codepoint & ~0x7F) && (isxdigit(prev_codepoint) || prev_codepoint == ':')) return -1;
}
int64_t start_index = index;
const int NUM_CLUSTERS = 8;
bool double_colon_used = false;
for (int cluster = 0; cluster < NUM_CLUSTERS; cluster++) {
for (int digits = 0; digits < 4; digits++) {
- if (!EAT1(state, index, ~(grapheme & ~0x7F) && isxdigit((char)grapheme)))
- break;
+ if (!EAT1(state, index, ~(grapheme & ~0x7F) && isxdigit((char)grapheme))) break;
}
- if (EAT1(state, index, ~(grapheme & ~0x7F) && isxdigit((char)grapheme)))
- return -1; // Too many digits
+ if (EAT1(state, index, ~(grapheme & ~0x7F) && isxdigit((char)grapheme))) return -1; // Too many digits
- if (cluster == NUM_CLUSTERS-1) {
+ if (cluster == NUM_CLUSTERS - 1) {
break;
} else if (!EAT1(state, index, grapheme == ':')) {
- if (double_colon_used)
- break;
+ if (double_colon_used) break;
return -1;
}
if (EAT1(state, index, grapheme == ':')) {
- if (double_colon_used)
- return -1;
+ if (double_colon_used) return -1;
double_colon_used = true;
}
}
return index - start_index;
}
-static int64_t match_ipv4(TextIter_t *state, int64_t index)
-{
+static int64_t match_ipv4(TextIter_t *state, int64_t index) {
if (index > 0) {
int32_t prev_codepoint = Text$get_grapheme_fast(state, index - 1);
- if ((prev_codepoint & ~0x7F) && (isdigit(prev_codepoint) || prev_codepoint == '.'))
- return -1;
+ if ((prev_codepoint & ~0x7F) && (isdigit(prev_codepoint) || prev_codepoint == '.')) return -1;
}
int64_t start_index = index;
@@ -237,27 +224,22 @@ static int64_t match_ipv4(TextIter_t *state, int64_t index)
}
}
- if (EAT1(state, index, ~(grapheme & ~0x7F) && isdigit((char)grapheme)))
- return -1; // Too many digits
+ if (EAT1(state, index, ~(grapheme & ~0x7F) && isdigit((char)grapheme))) return -1; // Too many digits
- if (cluster == NUM_CLUSTERS-1)
- break;
- else if (!EAT1(state, index, grapheme == '.'))
- return -1;
+ if (cluster == NUM_CLUSTERS - 1) break;
+ else if (!EAT1(state, index, grapheme == '.')) return -1;
}
return (index - start_index);
}
-static int64_t match_ip(TextIter_t *state, int64_t index)
-{
+static int64_t match_ip(TextIter_t *state, int64_t index) {
int64_t len = match_ipv6(state, index);
if (len >= 0) return len;
len = match_ipv4(state, index);
return (len >= 0) ? len : -1;
}
-static int64_t match_host(TextIter_t *state, int64_t index)
-{
+static int64_t match_host(TextIter_t *state, int64_t index) {
int64_t ip_len = match_ip(state, index);
if (ip_len > 0) return ip_len;
@@ -266,28 +248,24 @@ static int64_t match_host(TextIter_t *state, int64_t index)
ip_len = match_ip(state, index);
if (ip_len <= 0) return -1;
index += ip_len;
- if (match_grapheme(state, &index, ']'))
- return (index - start_index);
+ if (match_grapheme(state, &index, ']')) return (index - start_index);
return -1;
}
- if (!EAT1(state, index, isalpha(grapheme)))
- return -1;
+ if (!EAT1(state, index, isalpha(grapheme))) return -1;
static const char *non_host_chars = "/#?:@ \t\r\n<>[]{}\\^|\"`";
EAT_MANY(state, index, (grapheme & ~0x7F) || !strchr(non_host_chars, (char)grapheme));
return (index - start_index);
}
-static int64_t match_authority(TextIter_t *state, int64_t index)
-{
+static int64_t match_authority(TextIter_t *state, int64_t index) {
int64_t authority_start = index;
static const char *non_segment_chars = "/#?:@ \t\r\n<>[]{}\\^|\"`.";
// Optional user@ prefix:
int64_t username_len = EAT_MANY(state, index, (grapheme & ~0x7F) || !strchr(non_segment_chars, (char)grapheme));
- if (username_len < 1 || !EAT1(state, index, grapheme == '@'))
- index = authority_start; // No user@ part
+ if (username_len < 1 || !EAT1(state, index, grapheme == '@')) index = authority_start; // No user@ part
// Host:
int64_t host_len = match_host(state, index);
@@ -296,14 +274,12 @@ static int64_t match_authority(TextIter_t *state, int64_t index)
// Port:
if (EAT1(state, index, grapheme == ':')) {
- if (EAT_MANY(state, index, !(grapheme & ~0x7F) && isdigit(grapheme)) == 0)
- return -1;
+ if (EAT_MANY(state, index, !(grapheme & ~0x7F) && isdigit(grapheme)) == 0) return -1;
}
return (index - authority_start);
}
-static int64_t match_uri(TextIter_t *state, int64_t index)
-{
+static int64_t match_uri(TextIter_t *state, int64_t index) {
// URI = scheme ":" ["//" authority] path ["?" query] ["#" fragment]
// scheme = [a-zA-Z] [a-zA-Z0-9+.-]
// authority = [userinfo "@"] host [":" port]
@@ -311,25 +287,22 @@ static int64_t match_uri(TextIter_t *state, int64_t index)
if (index > 0) {
// Don't match if we're not at a word edge:
uint32_t prev_codepoint = Text$get_main_grapheme_fast(state, index - 1);
- if (uc_is_property_alphabetic(prev_codepoint))
- return -1;
+ if (uc_is_property_alphabetic(prev_codepoint)) return -1;
}
int64_t start_index = index;
// Scheme:
- if (!EAT1(state, index, isalpha(grapheme)))
- return -1;
- EAT_MANY(state, index, !(grapheme & ~0x7F) && (isalnum(grapheme) || grapheme == '+' || grapheme == '.' || grapheme == '-'));
- if (!match_grapheme(state, &index, ':'))
- return -1;
+ if (!EAT1(state, index, isalpha(grapheme))) return -1;
+ EAT_MANY(state, index,
+ !(grapheme & ~0x7F) && (isalnum(grapheme) || grapheme == '+' || grapheme == '.' || grapheme == '-'));
+ if (!match_grapheme(state, &index, ':')) return -1;
// Authority:
int64_t authority_len;
if (match_str(state, &index, "//")) {
authority_len = match_authority(state, index);
- if (authority_len > 0)
- index += authority_len;
+ if (authority_len > 0) index += authority_len;
} else {
authority_len = 0;
}
@@ -344,92 +317,75 @@ static int64_t match_uri(TextIter_t *state, int64_t index)
static const char *non_query = " \"#<>[]{}\\^`|";
EAT_MANY(state, index, (grapheme & ~0x7F) || !strchr(non_query, (char)grapheme));
}
-
+
if (EAT1(state, index, grapheme == '#')) { // Fragment
static const char *non_fragment = " \"#<>[]{}\\^`|";
EAT_MANY(state, index, (grapheme & ~0x7F) || !strchr(non_fragment, (char)grapheme));
}
}
- if (authority_len <= 0 && index == path_start)
- return -1;
+ if (authority_len <= 0 && index == path_start) return -1;
return index - start_index;
}
-static int64_t match_url(TextIter_t *state, int64_t index)
-{
+static int64_t match_url(TextIter_t *state, int64_t index) {
int64_t lookahead = index;
- if (!(match_str(state, &lookahead, "https:")
- || match_str(state, &lookahead, "http:")
- || match_str(state, &lookahead, "ftp:")
- || match_str(state, &lookahead, "wss:")
- || match_str(state, &lookahead, "ws:")))
+ if (!(match_str(state, &lookahead, "https:") || match_str(state, &lookahead, "http:")
+ || match_str(state, &lookahead, "ftp:") || match_str(state, &lookahead, "wss:")
+ || match_str(state, &lookahead, "ws:")))
return -1;
return match_uri(state, index);
}
-static int64_t match_id(TextIter_t *state, int64_t index)
-{
- if (!EAT1(state, index, uc_is_property((ucs4_t)grapheme, UC_PROPERTY_XID_START)))
- return -1;
+static int64_t match_id(TextIter_t *state, int64_t index) {
+ if (!EAT1(state, index, uc_is_property((ucs4_t)grapheme, UC_PROPERTY_XID_START))) return -1;
return 1 + EAT_MANY(state, index, uc_is_property((ucs4_t)grapheme, UC_PROPERTY_XID_CONTINUE));
}
-static int64_t match_int(TextIter_t *state, int64_t index)
-{
+static int64_t match_int(TextIter_t *state, int64_t index) {
int64_t negative = EAT1(state, index, grapheme == '-') ? 1 : 0;
int64_t len = EAT_MANY(state, index, uc_is_property((ucs4_t)grapheme, UC_PROPERTY_DECIMAL_DIGIT));
return len > 0 ? negative + len : -1;
}
-static int64_t match_alphanumeric(TextIter_t *state, int64_t index)
-{
+static int64_t match_alphanumeric(TextIter_t *state, int64_t index) {
return EAT1(state, index, uc_is_property_alphabetic((ucs4_t)grapheme) || uc_is_property_numeric((ucs4_t)grapheme))
- ? 1 : -1;
+ ? 1
+ : -1;
}
-static int64_t match_num(TextIter_t *state, int64_t index)
-{
+static int64_t match_num(TextIter_t *state, int64_t index) {
bool negative = EAT1(state, index, grapheme == '-') ? 1 : 0;
- int64_t pre_decimal = EAT_MANY(state, index,
- uc_is_property((ucs4_t)grapheme, UC_PROPERTY_DECIMAL_DIGIT));
+ int64_t pre_decimal = EAT_MANY(state, index, uc_is_property((ucs4_t)grapheme, UC_PROPERTY_DECIMAL_DIGIT));
bool decimal = (EAT1(state, index, grapheme == '.') == 1);
- int64_t post_decimal = decimal ? EAT_MANY(state, index,
- uc_is_property((ucs4_t)grapheme, UC_PROPERTY_DECIMAL_DIGIT)) : 0;
- if (pre_decimal == 0 && post_decimal == 0)
- return -1;
+ int64_t post_decimal =
+ decimal ? EAT_MANY(state, index, uc_is_property((ucs4_t)grapheme, UC_PROPERTY_DECIMAL_DIGIT)) : 0;
+ if (pre_decimal == 0 && post_decimal == 0) return -1;
return negative + pre_decimal + decimal + post_decimal;
}
-static int64_t match_newline(TextIter_t *state, int64_t index)
-{
- if (index >= state->stack[0].text.length)
- return -1;
+static int64_t match_newline(TextIter_t *state, int64_t index) {
+ if (index >= state->stack[0].text.length) return -1;
uint32_t grapheme = index >= state->stack[0].text.length ? 0 : Text$get_main_grapheme_fast(state, index);
- if (grapheme == '\n')
- return 1;
- if (grapheme == '\r' && Text$get_grapheme_fast(state, index + 1) == '\n')
- return 2;
+ if (grapheme == '\n') return 1;
+ if (grapheme == '\r' && Text$get_grapheme_fast(state, index + 1) == '\n') return 2;
return -1;
}
-static int64_t match_pat(TextIter_t *state, int64_t index, pat_t pat)
-{
+static int64_t match_pat(TextIter_t *state, int64_t index, pat_t pat) {
Text_t text = state->stack[0].text;
int32_t grapheme = index >= text.length ? 0 : Text$get_grapheme_fast(state, index);
switch (pat.tag) {
case PAT_START: {
- if (index == 0)
- return pat.negated ? -1 : 0;
+ if (index == 0) return pat.negated ? -1 : 0;
return pat.negated ? 0 : -1;
}
case PAT_END: {
- if (index >= text.length)
- return pat.negated ? -1 : 0;
+ if (index >= text.length) return pat.negated ? -1 : 0;
return pat.negated ? 0 : -1;
}
case PAT_ANY: {
@@ -437,51 +393,40 @@ static int64_t match_pat(TextIter_t *state, int64_t index, pat_t pat)
return (index < text.length) ? 1 : -1;
}
case PAT_GRAPHEME: {
- if (index >= text.length)
- return -1;
- else if (grapheme == pat.grapheme)
- return pat.negated ? -1 : 1;
+ if (index >= text.length) return -1;
+ else if (grapheme == pat.grapheme) return pat.negated ? -1 : 1;
return pat.negated ? 1 : -1;
}
case PAT_PROPERTY: {
- if (index >= text.length)
- return -1;
- else if (uc_is_property((ucs4_t)grapheme, pat.property))
- return pat.negated ? -1 : 1;
+ if (index >= text.length) return -1;
+ else if (uc_is_property((ucs4_t)grapheme, pat.property)) return pat.negated ? -1 : 1;
return pat.negated ? 1 : -1;
}
case PAT_PAIR: {
// Nested punctuation: (?), [?], etc
- if (index >= text.length)
- return -1;
+ if (index >= text.length) return -1;
int32_t open = pat.pair_graphemes[0];
- if (grapheme != open)
- return pat.negated ? 1 : -1;
+ if (grapheme != open) return pat.negated ? 1 : -1;
int32_t close = pat.pair_graphemes[1];
int64_t depth = 1;
int64_t match_len = 1;
for (; depth > 0; match_len++) {
- if (index + match_len >= text.length)
- return pat.negated ? 1 : -1;
+ if (index + match_len >= text.length) return pat.negated ? 1 : -1;
int32_t c = Text$get_grapheme_fast(state, index + match_len);
- if (c == open)
- depth += 1;
- else if (c == close)
- depth -= 1;
+ if (c == open) depth += 1;
+ else if (c == close) depth -= 1;
}
return pat.negated ? -1 : match_len;
}
case PAT_QUOTE: {
// Nested quotes: "?", '?', etc
- if (index >= text.length)
- return -1;
+ if (index >= text.length) return -1;
int32_t open = pat.quote_graphemes[0];
- if (grapheme != open)
- return pat.negated ? 1 : -1;
+ if (grapheme != open) return pat.negated ? 1 : -1;
int32_t close = pat.quote_graphemes[1];
for (int64_t i = index + 1; i < text.length; i++) {
@@ -496,8 +441,7 @@ static int64_t match_pat(TextIter_t *state, int64_t index, pat_t pat)
}
case PAT_FUNCTION: {
int64_t match_len = pat.fn(state, index);
- if (match_len >= 0)
- return pat.negated ? -1 : match_len;
+ if (match_len >= 0) return pat.negated ? -1 : match_len;
return pat.negated ? 1 : -1;
}
default: errx(1, "Invalid pattern");
@@ -506,37 +450,32 @@ static int64_t match_pat(TextIter_t *state, int64_t index, pat_t pat)
return 0;
}
-static pat_t parse_next_pat(TextIter_t *state, int64_t *index)
-{
- if (EAT2(state, *index,
- uc_is_property((ucs4_t)grapheme, UC_PROPERTY_QUOTATION_MARK),
- grapheme == '?')) {
+static pat_t parse_next_pat(TextIter_t *state, int64_t *index) {
+ if (EAT2(state, *index, uc_is_property((ucs4_t)grapheme, UC_PROPERTY_QUOTATION_MARK), grapheme == '?')) {
// Quotations: "?", '?', etc
- int32_t open = Text$get_grapheme_fast(state, *index-2);
+ int32_t open = Text$get_grapheme_fast(state, *index - 2);
int32_t close = open;
- uc_mirror_char((ucs4_t)open, (ucs4_t*)&close);
- if (!match_grapheme(state, index, close))
- fail("Pattern's closing quote is missing: ", state->stack[0].text);
+ uc_mirror_char((ucs4_t)open, (ucs4_t *)&close);
+ if (!match_grapheme(state, index, close)) fail("Pattern's closing quote is missing: ", state->stack[0].text);
return (pat_t){
- .tag=PAT_QUOTE,
- .min=1, .max=1,
- .quote_graphemes={open, close},
+ .tag = PAT_QUOTE,
+ .min = 1,
+ .max = 1,
+ .quote_graphemes = {open, close},
};
- } else if (EAT2(state, *index,
- uc_is_property((ucs4_t)grapheme, UC_PROPERTY_PAIRED_PUNCTUATION),
- grapheme == '?')) {
+ } else if (EAT2(state, *index, uc_is_property((ucs4_t)grapheme, UC_PROPERTY_PAIRED_PUNCTUATION), grapheme == '?')) {
// Nested punctuation: (?), [?], etc
- int32_t open = Text$get_grapheme_fast(state, *index-2);
+ int32_t open = Text$get_grapheme_fast(state, *index - 2);
int32_t close = open;
- uc_mirror_char((ucs4_t)open, (ucs4_t*)&close);
- if (!match_grapheme(state, index, close))
- fail("Pattern's closing brace is missing: ", state->stack[0].text);
-
+ uc_mirror_char((ucs4_t)open, (ucs4_t *)&close);
+ if (!match_grapheme(state, index, close)) fail("Pattern's closing brace is missing: ", state->stack[0].text);
+
return (pat_t){
- .tag=PAT_PAIR,
- .min=1, .max=1,
- .pair_graphemes={open, close},
+ .tag = PAT_PAIR,
+ .min = 1,
+ .max = 1,
+ .pair_graphemes = {open, close},
};
} else if (EAT1(state, *index, grapheme == '{')) { // named patterns {id}, {2-3 hex}, etc.
skip_whitespace(state, index);
@@ -559,133 +498,127 @@ static pat_t parse_next_pat(TextIter_t *state, int64_t *index)
skip_whitespace(state, index);
bool negated = match_grapheme(state, index, '!');
-#define PAT(_tag, ...) ((pat_t){.min=min, .max=max, .negated=negated, .tag=_tag, __VA_ARGS__})
+#define PAT(_tag, ...) ((pat_t){.min = min, .max = max, .negated = negated, .tag = _tag, __VA_ARGS__})
const char *prop_name;
- if (match_str(state, index, ".."))
- prop_name = "..";
- else
- prop_name = get_property_name(state, index);
+ if (match_str(state, index, "..")) prop_name = "..";
+ else prop_name = get_property_name(state, index);
if (!prop_name) {
// Literal character, e.g. {1?}
skip_whitespace(state, index);
int32_t grapheme = Text$get_grapheme_fast(state, (*index)++);
- if (!match_grapheme(state, index, '}'))
- fail("Missing closing '}' in pattern: ", state->stack[0].text);
- return PAT(PAT_GRAPHEME, .grapheme=grapheme);
+ if (!match_grapheme(state, index, '}')) fail("Missing closing '}' in pattern: ", state->stack[0].text);
+ return PAT(PAT_GRAPHEME, .grapheme = grapheme);
} else if (strlen(prop_name) == 1) {
// Single letter names: {1+ A}
skip_whitespace(state, index);
- if (!match_grapheme(state, index, '}'))
- fail("Missing closing '}' in pattern: ", state->stack[0].text);
- return PAT(PAT_GRAPHEME, .grapheme=prop_name[0]);
+ if (!match_grapheme(state, index, '}')) fail("Missing closing '}' in pattern: ", state->stack[0].text);
+ return PAT(PAT_GRAPHEME, .grapheme = prop_name[0]);
}
skip_whitespace(state, index);
- if (!match_grapheme(state, index, '}'))
- fail("Missing closing '}' in pattern: ", state->stack[0].text);
+ if (!match_grapheme(state, index, '}')) fail("Missing closing '}' in pattern: ", state->stack[0].text);
switch (tolower(prop_name[0])) {
case '.':
if (prop_name[1] == '.') {
- if (negated)
- return ((pat_t){.tag=PAT_END, .min=min, .max=max, .non_capturing=true});
- else
- return PAT(PAT_ANY);
+ if (negated) return ((pat_t){.tag = PAT_END, .min = min, .max = max, .non_capturing = true});
+ else return PAT(PAT_ANY);
}
break;
case 'a':
if (strcasecmp(prop_name, "authority") == 0) {
- return PAT(PAT_FUNCTION, .fn=match_authority);
+ return PAT(PAT_FUNCTION, .fn = match_authority);
} else if (strcasecmp(prop_name, "alphanum") == 0 || strcasecmp(prop_name, "anum") == 0
|| strcasecmp(prop_name, "alphanumeric") == 0) {
- return PAT(PAT_FUNCTION, .fn=match_alphanumeric);
+ return PAT(PAT_FUNCTION, .fn = match_alphanumeric);
}
break;
- case 'c':
- if (strcasecmp(prop_name, "crlf") == 0)
- return PAT(PAT_FUNCTION, .fn=match_newline);
+ case 'c':
+ if (strcasecmp(prop_name, "crlf") == 0) return PAT(PAT_FUNCTION, .fn = match_newline);
break;
case 'd':
if (strcasecmp(prop_name, "digit") == 0) {
- return PAT(PAT_PROPERTY, .property=UC_PROPERTY_DECIMAL_DIGIT);
+ return PAT(PAT_PROPERTY, .property = UC_PROPERTY_DECIMAL_DIGIT);
}
break;
case 'e':
if (strcasecmp(prop_name, "end") == 0) {
- return PAT(PAT_END, .non_capturing=!negated);
+ return PAT(PAT_END, .non_capturing = !negated);
} else if (strcasecmp(prop_name, "email") == 0) {
- return PAT(PAT_FUNCTION, .fn=match_email);
+ return PAT(PAT_FUNCTION, .fn = match_email);
}
#if _LIBUNISTRING_VERSION >= 0x0100000
else if (strcasecmp(prop_name, "emoji") == 0) {
- return PAT(PAT_PROPERTY, .property=UC_PROPERTY_EMOJI);
+ return PAT(PAT_PROPERTY, .property = UC_PROPERTY_EMOJI);
}
#endif
break;
case 'h':
if (strcasecmp(prop_name, "host") == 0) {
- return PAT(PAT_FUNCTION, .fn=match_host);
+ return PAT(PAT_FUNCTION, .fn = match_host);
}
break;
case 'i':
if (strcasecmp(prop_name, "id") == 0) {
- return PAT(PAT_FUNCTION, .fn=match_id);
+ return PAT(PAT_FUNCTION, .fn = match_id);
} else if (strcasecmp(prop_name, "int") == 0) {
- return PAT(PAT_FUNCTION, .fn=match_int);
+ return PAT(PAT_FUNCTION, .fn = match_int);
} else if (strcasecmp(prop_name, "ipv4") == 0) {
- return PAT(PAT_FUNCTION, .fn=match_ipv4);
+ return PAT(PAT_FUNCTION, .fn = match_ipv4);
} else if (strcasecmp(prop_name, "ipv6") == 0) {
- return PAT(PAT_FUNCTION, .fn=match_ipv6);
+ return PAT(PAT_FUNCTION, .fn = match_ipv6);
} else if (strcasecmp(prop_name, "ip") == 0) {
- return PAT(PAT_FUNCTION, .fn=match_ip);
+ return PAT(PAT_FUNCTION, .fn = match_ip);
}
break;
case 'n':
if (strcasecmp(prop_name, "nl") == 0 || strcasecmp(prop_name, "newline") == 0) {
- return PAT(PAT_FUNCTION, .fn=match_newline);
+ return PAT(PAT_FUNCTION, .fn = match_newline);
} else if (strcasecmp(prop_name, "num") == 0) {
- return PAT(PAT_FUNCTION, .fn=match_num);
+ return PAT(PAT_FUNCTION, .fn = match_num);
}
break;
case 's':
if (strcasecmp(prop_name, "start") == 0) {
- return PAT(PAT_START, .non_capturing=!negated);
+ return PAT(PAT_START, .non_capturing = !negated);
}
break;
case 'u':
if (strcasecmp(prop_name, "uri") == 0) {
- return PAT(PAT_FUNCTION, .fn=match_uri);
+ return PAT(PAT_FUNCTION, .fn = match_uri);
} else if (strcasecmp(prop_name, "url") == 0) {
- return PAT(PAT_FUNCTION, .fn=match_url);
+ return PAT(PAT_FUNCTION, .fn = match_url);
}
break;
case 'w':
if (strcasecmp(prop_name, "word") == 0) {
- return PAT(PAT_FUNCTION, .fn=match_id);
+ return PAT(PAT_FUNCTION, .fn = match_id);
} else if (strcasecmp(prop_name, "ws") == 0 || strcasecmp(prop_name, "whitespace") == 0) {
- return PAT(PAT_PROPERTY, .property=UC_PROPERTY_WHITE_SPACE);
+ return PAT(PAT_PROPERTY, .property = UC_PROPERTY_WHITE_SPACE);
}
break;
default: break;
}
uc_property_t prop = uc_property_byname(prop_name);
- if (uc_property_is_valid(prop))
- return PAT(PAT_PROPERTY, .property=prop);
+ if (uc_property_is_valid(prop)) return PAT(PAT_PROPERTY, .property = prop);
ucs4_t grapheme = unicode_name_character(prop_name);
- if (grapheme == UNINAME_INVALID)
- fail("Not a valid property or character name: ", prop_name);
- return PAT(PAT_GRAPHEME, .grapheme=(int32_t)grapheme);
+ if (grapheme == UNINAME_INVALID) fail("Not a valid property or character name: ", prop_name);
+ return PAT(PAT_GRAPHEME, .grapheme = (int32_t)grapheme);
#undef PAT
} else {
- return (pat_t){.tag=PAT_GRAPHEME, .non_capturing=true, .min=1, .max=1, .grapheme=Text$get_grapheme_fast(state, (*index)++)};
+ return (pat_t){.tag = PAT_GRAPHEME,
+ .non_capturing = true,
+ .min = 1,
+ .max = 1,
+ .grapheme = Text$get_grapheme_fast(state, (*index)++)};
}
}
-static int64_t match(Text_t text, int64_t text_index, Text_t pattern, int64_t pattern_index, capture_t *captures, int64_t capture_index)
-{
+static int64_t match(Text_t text, int64_t text_index, Text_t pattern, int64_t pattern_index, capture_t *captures,
+ int64_t capture_index) {
if (pattern_index >= pattern.length) // End of the pattern
return 0;
@@ -713,7 +646,8 @@ static int64_t match(Text_t text, int64_t text_index, Text_t pattern, int64_t pa
}
if (pat.min == 0 && pattern_index < pattern.length) {
- next_match_len = match(text, text_index, pattern, pattern_index, captures, capture_index + (pat.non_capturing ? 0 : 1));
+ next_match_len =
+ match(text, text_index, pattern, pattern_index, captures, capture_index + (pat.non_capturing ? 0 : 1));
if (next_match_len >= 0) {
capture_len = 0;
goto success;
@@ -722,17 +656,16 @@ static int64_t match(Text_t text, int64_t text_index, Text_t pattern, int64_t pa
while (count < pat.max) {
int64_t match_len = match_pat(&text_state, text_index, pat);
- if (match_len < 0)
- break;
+ if (match_len < 0) break;
capture_len += match_len;
text_index += match_len;
count += 1;
if (pattern_index < pattern.length) { // More stuff after this
- if (count < pat.min)
- next_match_len = -1;
+ if (count < pat.min) next_match_len = -1;
else
- next_match_len = match(text, text_index, pattern, pattern_index, captures, capture_index + (pat.non_capturing ? 0 : 1));
+ next_match_len = match(text, text_index, pattern, pattern_index, captures,
+ capture_index + (pat.non_capturing ? 0 : 1));
} else {
next_match_len = 0;
}
@@ -748,32 +681,29 @@ static int64_t match(Text_t text, int64_t text_index, Text_t pattern, int64_t pa
}
}
- if (pattern_index < pattern.length && next_match_len >= 0)
- break; // Next guy exists and wants to stop here
+ if (pattern_index < pattern.length && next_match_len >= 0) break; // Next guy exists and wants to stop here
- if (text_index >= text.length)
- break;
+ if (text_index >= text.length) break;
}
- if (count < pat.min || next_match_len < 0)
- return -1;
+ if (count < pat.min || next_match_len < 0) return -1;
- success:
+success:
if (captures && capture_index < MAX_BACKREFS && !pat.non_capturing) {
if (pat.tag == PAT_PAIR || pat.tag == PAT_QUOTE) {
assert(capture_len > 0);
captures[capture_index] = (capture_t){
- .index=capture_start + 1, // Skip leading quote/paren
- .length=capture_len - 2, // Skip open/close
- .occupied=true,
- .recursive=(pat.tag == PAT_PAIR),
+ .index = capture_start + 1, // Skip leading quote/paren
+ .length = capture_len - 2, // Skip open/close
+ .occupied = true,
+ .recursive = (pat.tag == PAT_PAIR),
};
} else {
captures[capture_index] = (capture_t){
- .index=capture_start,
- .length=capture_len,
- .occupied=true,
- .recursive=false,
+ .index = capture_start,
+ .length = capture_len,
+ .occupied = true,
+ .recursive = false,
};
}
}
@@ -784,11 +714,10 @@ static int64_t match(Text_t text, int64_t text_index, Text_t pattern, int64_t pa
#undef EAT2
#undef EAT_MANY
-static int64_t _find(Text_t text, Text_t pattern, int64_t first, int64_t last, int64_t *match_length, capture_t *captures)
-{
+static int64_t _find(Text_t text, Text_t pattern, int64_t first, int64_t last, int64_t *match_length,
+ capture_t *captures) {
int32_t first_grapheme = Text$get_grapheme(pattern, 0);
- bool find_first = (first_grapheme != '{'
- && !uc_is_property((ucs4_t)first_grapheme, UC_PROPERTY_QUOTATION_MARK)
+ bool find_first = (first_grapheme != '{' && !uc_is_property((ucs4_t)first_grapheme, UC_PROPERTY_QUOTATION_MARK)
&& !uc_is_property((ucs4_t)first_grapheme, UC_PROPERTY_PAIRED_PUNCTUATION));
TextIter_t text_state = NEW_TEXT_ITER_STATE(text);
@@ -801,111 +730,97 @@ static int64_t _find(Text_t text, Text_t pattern, int64_t first, int64_t last, i
int64_t m = match(text, i, pattern, 0, captures, 0);
if (m >= 0) {
- if (match_length)
- *match_length = m;
+ if (match_length) *match_length = m;
return i;
}
}
- if (match_length)
- *match_length = -1;
+ if (match_length) *match_length = -1;
return -1;
}
-static OptionalPatternMatch find(Text_t text, Text_t pattern, Int_t from_index)
-{
+static OptionalPatternMatch find(Text_t text, Text_t pattern, Int_t from_index) {
int64_t first = Int64$from_int(from_index, false);
if (first == 0) fail("Invalid index: 0");
if (first < 0) first = text.length + first + 1;
- if (first > text.length || first < 1)
- return NONE_MATCH;
+ if (first > text.length || first < 1) return NONE_MATCH;
capture_t captures[MAX_BACKREFS] = {};
int64_t len = 0;
- int64_t found = _find(text, pattern, first-1, text.length-1, &len, captures);
- if (found == -1)
- return NONE_MATCH;
+ int64_t found = _find(text, pattern, first - 1, text.length - 1, &len, captures);
+ if (found == -1) return NONE_MATCH;
List_t capture_list = {};
for (int i = 0; captures[i].occupied; i++) {
- Text_t capture = Text$slice(text, I(captures[i].index+1), I(captures[i].index+captures[i].length));
+ Text_t capture = Text$slice(text, I(captures[i].index + 1), I(captures[i].index + captures[i].length));
List$insert(&capture_list, &capture, I(0), sizeof(Text_t));
}
return (OptionalPatternMatch){
- .text=Text$slice(text, I(found+1), I(found+len)),
- .index=I(found+1),
- .captures=capture_list,
+ .text = Text$slice(text, I(found + 1), I(found + len)),
+ .index = I(found + 1),
+ .captures = capture_list,
};
}
-PUREFUNC static bool Pattern$has(Text_t text, Text_t pattern)
-{
+PUREFUNC static bool Pattern$has(Text_t text, Text_t pattern) {
if (Text$starts_with(pattern, Text("{start}"), &pattern)) {
int64_t m = match(text, 0, pattern, 0, NULL, 0);
return m >= 0;
} else if (Text$ends_with(text, Text("{end}"), NULL)) {
- for (int64_t i = text.length-1; i >= 0; i--) {
+ for (int64_t i = text.length - 1; i >= 0; i--) {
int64_t match_len = match(text, i, pattern, 0, NULL, 0);
- if (match_len >= 0 && i + match_len == text.length)
- return true;
+ if (match_len >= 0 && i + match_len == text.length) return true;
}
return false;
} else {
- int64_t found = _find(text, pattern, 0, text.length-1, NULL, NULL);
+ int64_t found = _find(text, pattern, 0, text.length - 1, NULL, NULL);
return (found >= 0);
}
}
-static bool Pattern$matches(Text_t text, Text_t pattern)
-{
+static bool Pattern$matches(Text_t text, Text_t pattern) {
capture_t captures[MAX_BACKREFS] = {};
int64_t match_len = match(text, 0, pattern, 0, NULL, 0);
return (match_len == text.length);
}
-static bool Pattern$match_at(Text_t text, Text_t pattern, Int_t pos, PatternMatch *dest)
-{
+static bool Pattern$match_at(Text_t text, Text_t pattern, Int_t pos, PatternMatch *dest) {
int64_t start = Int64$from_int(pos, false) - 1;
capture_t captures[MAX_BACKREFS] = {};
int64_t match_len = match(text, start, pattern, 0, captures, 0);
- if (match_len < 0)
- return false;
+ if (match_len < 0) return false;
List_t capture_list = {};
for (int i = 0; captures[i].occupied; i++) {
- Text_t capture = Text$slice(text, I(captures[i].index+1), I(captures[i].index+captures[i].length));
+ Text_t capture = Text$slice(text, I(captures[i].index + 1), I(captures[i].index + captures[i].length));
List$insert(&capture_list, &capture, I(0), sizeof(Text_t));
}
- dest->text = Text$slice(text, I(start+1), I(start+match_len));
- dest->index = I(start+1);
+ dest->text = Text$slice(text, I(start + 1), I(start + match_len));
+ dest->index = I(start + 1);
dest->captures = capture_list;
return true;
}
-static OptionalList_t Pattern$captures(Text_t text, Text_t pattern)
-{
+static OptionalList_t Pattern$captures(Text_t text, Text_t pattern) {
capture_t captures[MAX_BACKREFS] = {};
int64_t match_len = match(text, 0, pattern, 0, captures, 0);
- if (match_len != text.length)
- return NONE_LIST;
+ if (match_len != text.length) return NONE_LIST;
List_t capture_list = {};
for (int i = 0; captures[i].occupied; i++) {
- Text_t capture = Text$slice(text, I(captures[i].index+1), I(captures[i].index+captures[i].length));
+ Text_t capture = Text$slice(text, I(captures[i].index + 1), I(captures[i].index + captures[i].length));
List$insert(&capture_list, &capture, I(0), sizeof(Text_t));
}
return capture_list;
}
-static List_t Pattern$find_all(Text_t text, Text_t pattern)
-{
+static List_t Pattern$find_all(Text_t text, Text_t pattern) {
if (pattern.length == 0) // special case
- return (List_t){.length=0};
+ return (List_t){.length = 0};
List_t matches = {};
- for (int64_t i = 1; ; ) {
+ for (int64_t i = 1;;) {
OptionalPatternMatch m = find(text, pattern, I(i));
- if (m.is_none)
- break;
+ if (m.is_none) break;
i = Int64$from_int(m.index, false) + m.text.length;
List$insert(&matches, &m, I_small(0), sizeof(PatternMatch));
}
@@ -918,41 +833,35 @@ typedef struct {
Text_t pattern;
} match_iter_state_t;
-static OptionalPatternMatch next_match(match_iter_state_t *state)
-{
- if (Int64$from_int(state->i, false) > state->state.stack[0].text.length)
- return NONE_MATCH;
+static OptionalPatternMatch next_match(match_iter_state_t *state) {
+ if (Int64$from_int(state->i, false) > state->state.stack[0].text.length) return NONE_MATCH;
OptionalPatternMatch m = find(state->state.stack[0].text, state->pattern, state->i);
if (m.is_none) // No match
state->i = I(state->state.stack[0].text.length + 1);
- else
- state->i = Int$plus(m.index, I(MAX(1, m.text.length)));
+ else state->i = Int$plus(m.index, I(MAX(1, m.text.length)));
return m;
}
-static Closure_t Pattern$by_match(Text_t text, Text_t pattern)
-{
+static Closure_t Pattern$by_match(Text_t text, Text_t pattern) {
return (Closure_t){
- .fn=(void*)next_match,
- .userdata=new(match_iter_state_t, .state=NEW_TEXT_ITER_STATE(text), .i=I_small(1), .pattern=pattern),
+ .fn = (void *)next_match,
+ .userdata = new (match_iter_state_t, .state = NEW_TEXT_ITER_STATE(text), .i = I_small(1), .pattern = pattern),
};
}
-static Text_t apply_backrefs(Text_t text, List_t recursive_replacements, Text_t replacement, Text_t backref_pat, capture_t *captures)
-{
- if (backref_pat.length == 0)
- return replacement;
+static Text_t apply_backrefs(Text_t text, List_t recursive_replacements, Text_t replacement, Text_t backref_pat,
+ capture_t *captures) {
+ if (backref_pat.length == 0) return replacement;
int32_t first_grapheme = Text$get_grapheme(backref_pat, 0);
- bool find_first = (first_grapheme != '{'
- && !uc_is_property((ucs4_t)first_grapheme, UC_PROPERTY_QUOTATION_MARK)
+ bool find_first = (first_grapheme != '{' && !uc_is_property((ucs4_t)first_grapheme, UC_PROPERTY_QUOTATION_MARK)
&& !uc_is_property((ucs4_t)first_grapheme, UC_PROPERTY_PAIRED_PUNCTUATION));
Text_t ret = Text("");
TextIter_t replacement_state = NEW_TEXT_ITER_STATE(replacement);
int64_t nonmatching_pos = 0;
- for (int64_t pos = 0; pos < replacement.length; ) {
+ for (int64_t pos = 0; pos < replacement.length;) {
// Optimization: quickly skip ahead to first char in the backref pattern:
if (find_first) {
while (pos < replacement.length && Text$get_grapheme_fast(&replacement_state, pos) != first_grapheme)
@@ -971,22 +880,23 @@ static Text_t apply_backrefs(Text_t text, List_t recursive_replacements, Text_t
pos += 1;
continue;
}
- if (backref < 0 || backref > 9) fail("Invalid backref index: ", backref, " (only 0-", MAX_BACKREFS-1, " are allowed)");
+ if (backref < 0 || backref > 9)
+ fail("Invalid backref index: ", backref, " (only 0-", MAX_BACKREFS - 1, " are allowed)");
backref_len = (after_backref - pos);
if (Text$get_grapheme_fast(&replacement_state, pos + backref_len) == ';')
backref_len += 1; // skip optional semicolon
- if (!captures[backref].occupied)
- fail("There is no capture number ", backref, "!");
+ if (!captures[backref].occupied) fail("There is no capture number ", backref, "!");
- Text_t backref_text = Text$slice(text, I(captures[backref].index+1), I(captures[backref].index + captures[backref].length));
+ Text_t backref_text =
+ Text$slice(text, I(captures[backref].index + 1), I(captures[backref].index + captures[backref].length));
if (captures[backref].recursive && recursive_replacements.length > 0)
backref_text = replace_list(backref_text, recursive_replacements, backref_pat, true);
if (pos > nonmatching_pos) {
- Text_t before_slice = Text$slice(replacement, I(nonmatching_pos+1), I(pos));
+ Text_t before_slice = Text$slice(replacement, I(nonmatching_pos + 1), I(pos));
ret = Text$concat(ret, before_slice, backref_text);
} else {
ret = Text$concat(ret, backref_text);
@@ -996,31 +906,29 @@ static Text_t apply_backrefs(Text_t text, List_t recursive_replacements, Text_t
nonmatching_pos = pos;
}
if (nonmatching_pos < replacement.length) {
- Text_t last_slice = Text$slice(replacement, I(nonmatching_pos+1), I(replacement.length));
+ Text_t last_slice = Text$slice(replacement, I(nonmatching_pos + 1), I(replacement.length));
ret = Text$concat(ret, last_slice);
}
return ret;
}
-static Text_t Pattern$replace(Text_t text, Text_t pattern, Text_t replacement, Text_t backref_pat, bool recursive)
-{
+static Text_t Pattern$replace(Text_t text, Text_t pattern, Text_t replacement, Text_t backref_pat, bool recursive) {
Text_t ret = EMPTY_TEXT;
int32_t first_grapheme = Text$get_grapheme(pattern, 0);
- bool find_first = (first_grapheme != '{'
- && !uc_is_property((ucs4_t)first_grapheme, UC_PROPERTY_QUOTATION_MARK)
+ bool find_first = (first_grapheme != '{' && !uc_is_property((ucs4_t)first_grapheme, UC_PROPERTY_QUOTATION_MARK)
&& !uc_is_property((ucs4_t)first_grapheme, UC_PROPERTY_PAIRED_PUNCTUATION));
Text_t entries[2] = {pattern, replacement};
List_t replacements = {
- .data=entries,
- .length=1,
- .stride=sizeof(entries),
+ .data = entries,
+ .length = 1,
+ .stride = sizeof(entries),
};
TextIter_t text_state = NEW_TEXT_ITER_STATE(text);
int64_t nonmatching_pos = 0;
- for (int64_t pos = 0; pos < text.length; ) {
+ for (int64_t pos = 0; pos < text.length;) {
// Optimization: quickly skip ahead to first char in pattern:
if (find_first) {
while (pos < text.length && Text$get_grapheme_fast(&text_state, pos) != first_grapheme)
@@ -1034,13 +942,16 @@ static Text_t Pattern$replace(Text_t text, Text_t pattern, Text_t replacement, T
continue;
}
captures[0] = (capture_t){
- .index = pos, .length = match_len,
- .occupied = true, .recursive = false,
+ .index = pos,
+ .length = match_len,
+ .occupied = true,
+ .recursive = false,
};
- Text_t replacement_text = apply_backrefs(text, recursive ? replacements : (List_t){}, replacement, backref_pat, captures);
+ Text_t replacement_text =
+ apply_backrefs(text, recursive ? replacements : (List_t){}, replacement, backref_pat, captures);
if (pos > nonmatching_pos) {
- Text_t before_slice = Text$slice(text, I(nonmatching_pos+1), I(pos));
+ Text_t before_slice = Text$slice(text, I(nonmatching_pos + 1), I(pos));
ret = Text$concat(ret, before_slice, replacement_text);
} else {
ret = Text$concat(ret, replacement_text);
@@ -1049,44 +960,39 @@ static Text_t Pattern$replace(Text_t text, Text_t pattern, Text_t replacement, T
pos += MAX(match_len, 1);
}
if (nonmatching_pos < text.length) {
- Text_t last_slice = Text$slice(text, I(nonmatching_pos+1), I(text.length));
+ Text_t last_slice = Text$slice(text, I(nonmatching_pos + 1), I(text.length));
ret = Text$concat(ret, last_slice);
}
return ret;
}
-static Text_t Pattern$trim(Text_t text, Text_t pattern, bool trim_left, bool trim_right)
-{
- int64_t first = 0, last = text.length-1;
+static Text_t Pattern$trim(Text_t text, Text_t pattern, bool trim_left, bool trim_right) {
+ int64_t first = 0, last = text.length - 1;
if (trim_left) {
int64_t match_len = match(text, 0, pattern, 0, NULL, 0);
- if (match_len > 0)
- first = match_len;
+ if (match_len > 0) first = match_len;
}
if (trim_right) {
- for (int64_t i = text.length-1; i >= first; i--) {
+ for (int64_t i = text.length - 1; i >= first; i--) {
int64_t match_len = match(text, i, pattern, 0, NULL, 0);
- if (match_len > 0 && i + match_len == text.length)
- last = i-1;
+ if (match_len > 0 && i + match_len == text.length) last = i - 1;
}
}
- return Text$slice(text, I(first+1), I(last+1));
+ return Text$slice(text, I(first + 1), I(last + 1));
}
-static Text_t Pattern$map(Text_t text, Text_t pattern, Closure_t fn, bool recursive)
-{
+static Text_t Pattern$map(Text_t text, Text_t pattern, Closure_t fn, bool recursive) {
Text_t ret = EMPTY_TEXT;
int32_t first_grapheme = Text$get_grapheme(pattern, 0);
- bool find_first = (first_grapheme != '{'
- && !uc_is_property((ucs4_t)first_grapheme, UC_PROPERTY_QUOTATION_MARK)
+ bool find_first = (first_grapheme != '{' && !uc_is_property((ucs4_t)first_grapheme, UC_PROPERTY_QUOTATION_MARK)
&& !uc_is_property((ucs4_t)first_grapheme, UC_PROPERTY_PAIRED_PUNCTUATION));
TextIter_t text_state = NEW_TEXT_ITER_STATE(text);
int64_t nonmatching_pos = 0;
- Text_t (*text_mapper)(PatternMatch, void*) = fn.fn;
+ Text_t (*text_mapper)(PatternMatch, void *) = fn.fn;
for (int64_t pos = 0; pos < text.length; pos++) {
// Optimization: quickly skip ahead to first char in pattern:
if (find_first) {
@@ -1099,20 +1005,19 @@ static Text_t Pattern$map(Text_t text, Text_t pattern, Closure_t fn, bool recurs
if (match_len < 0) continue;
PatternMatch m = {
- .text=Text$slice(text, I(pos+1), I(pos+match_len)),
- .index=I(pos+1),
- .captures={},
+ .text = Text$slice(text, I(pos + 1), I(pos + match_len)),
+ .index = I(pos + 1),
+ .captures = {},
};
for (int i = 0; captures[i].occupied; i++) {
- Text_t capture = Text$slice(text, I(captures[i].index+1), I(captures[i].index+captures[i].length));
- if (recursive)
- capture = Pattern$map(capture, pattern, fn, recursive);
+ Text_t capture = Text$slice(text, I(captures[i].index + 1), I(captures[i].index + captures[i].length));
+ if (recursive) capture = Pattern$map(capture, pattern, fn, recursive);
List$insert(&m.captures, &capture, I(0), sizeof(Text_t));
}
Text_t replacement = text_mapper(m, fn.userdata);
if (pos > nonmatching_pos) {
- Text_t before_slice = Text$slice(text, I(nonmatching_pos+1), I(pos));
+ Text_t before_slice = Text$slice(text, I(nonmatching_pos + 1), I(pos));
ret = Text$concat(ret, before_slice, replacement);
} else {
ret = Text$concat(ret, replacement);
@@ -1121,21 +1026,19 @@ static Text_t Pattern$map(Text_t text, Text_t pattern, Closure_t fn, bool recurs
pos += (match_len - 1);
}
if (nonmatching_pos < text.length) {
- Text_t last_slice = Text$slice(text, I(nonmatching_pos+1), I(text.length));
+ Text_t last_slice = Text$slice(text, I(nonmatching_pos + 1), I(text.length));
ret = Text$concat(ret, last_slice);
}
return ret;
}
-static void Pattern$each(Text_t text, Text_t pattern, Closure_t fn, bool recursive)
-{
+static void Pattern$each(Text_t text, Text_t pattern, Closure_t fn, bool recursive) {
int32_t first_grapheme = Text$get_grapheme(pattern, 0);
- bool find_first = (first_grapheme != '{'
- && !uc_is_property((ucs4_t)first_grapheme, UC_PROPERTY_QUOTATION_MARK)
+ bool find_first = (first_grapheme != '{' && !uc_is_property((ucs4_t)first_grapheme, UC_PROPERTY_QUOTATION_MARK)
&& !uc_is_property((ucs4_t)first_grapheme, UC_PROPERTY_PAIRED_PUNCTUATION));
TextIter_t text_state = NEW_TEXT_ITER_STATE(text);
- void (*action)(PatternMatch, void*) = fn.fn;
+ void (*action)(PatternMatch, void *) = fn.fn;
for (int64_t pos = 0; pos < text.length; pos++) {
// Optimization: quickly skip ahead to first char in pattern:
if (find_first) {
@@ -1148,14 +1051,13 @@ static void Pattern$each(Text_t text, Text_t pattern, Closure_t fn, bool recursi
if (match_len < 0) continue;
PatternMatch m = {
- .text=Text$slice(text, I(pos+1), I(pos+match_len)),
- .index=I(pos+1),
- .captures={},
+ .text = Text$slice(text, I(pos + 1), I(pos + match_len)),
+ .index = I(pos + 1),
+ .captures = {},
};
for (int i = 0; captures[i].occupied; i++) {
- Text_t capture = Text$slice(text, I(captures[i].index+1), I(captures[i].index+captures[i].length));
- if (recursive)
- Pattern$each(capture, pattern, fn, recursive);
+ Text_t capture = Text$slice(text, I(captures[i].index + 1), I(captures[i].index + captures[i].length));
+ if (recursive) Pattern$each(capture, pattern, fn, recursive);
List$insert(&m.captures, &capture, I(0), sizeof(Text_t));
}
@@ -1164,17 +1066,16 @@ static void Pattern$each(Text_t text, Text_t pattern, Closure_t fn, bool recursi
}
}
-Text_t replace_list(Text_t text, List_t replacements, Text_t backref_pat, bool recursive)
-{
+Text_t replace_list(Text_t text, List_t replacements, Text_t backref_pat, bool recursive) {
if (replacements.length == 0) return text;
Text_t ret = EMPTY_TEXT;
int64_t nonmatch_pos = 0;
- for (int64_t pos = 0; pos < text.length; ) {
+ for (int64_t pos = 0; pos < text.length;) {
// Find the first matching pattern at this position:
for (int64_t i = 0; i < replacements.length; i++) {
- Text_t pattern = *(Text_t*)(replacements.data + i*replacements.stride);
+ Text_t pattern = *(Text_t *)(replacements.data + i * replacements.stride);
capture_t captures[MAX_BACKREFS] = {};
int64_t len = match(text, pos, pattern, 0, captures, 1);
if (len < 0) continue;
@@ -1183,13 +1084,14 @@ Text_t replace_list(Text_t text, List_t replacements, Text_t backref_pat, bool r
// If we skipped over some non-matching text before finding a match, insert it here:
if (pos > nonmatch_pos) {
- Text_t before_slice = Text$slice(text, I(nonmatch_pos+1), I(pos));
+ Text_t before_slice = Text$slice(text, I(nonmatch_pos + 1), I(pos));
ret = Text$concat(ret, before_slice);
}
// Concatenate the replacement:
- Text_t replacement = *(Text_t*)(replacements.data + i*replacements.stride + sizeof(Text_t));
- Text_t replacement_text = apply_backrefs(text, recursive ? replacements : (List_t){}, replacement, backref_pat, captures);
+ Text_t replacement = *(Text_t *)(replacements.data + i * replacements.stride + sizeof(Text_t));
+ Text_t replacement_text =
+ apply_backrefs(text, recursive ? replacements : (List_t){}, replacement, backref_pat, captures);
ret = Text$concat(ret, replacement_text);
pos += MAX(len, 1);
nonmatch_pos = pos;
@@ -1197,26 +1099,24 @@ Text_t replace_list(Text_t text, List_t replacements, Text_t backref_pat, bool r
}
pos += 1;
- next_pos:
+ next_pos:
continue;
}
if (nonmatch_pos <= text.length) {
- Text_t last_slice = Text$slice(text, I(nonmatch_pos+1), I(text.length));
+ Text_t last_slice = Text$slice(text, I(nonmatch_pos + 1), I(text.length));
ret = Text$concat(ret, last_slice);
}
return ret;
}
-static Text_t Pattern$replace_all(Text_t text, Table_t replacements, Text_t backref_pat, bool recursive)
-{
+static Text_t Pattern$replace_all(Text_t text, Table_t replacements, Text_t backref_pat, bool recursive) {
return replace_list(text, replacements.entries, backref_pat, recursive);
}
-static List_t Pattern$split(Text_t text, Text_t pattern)
-{
+static List_t Pattern$split(Text_t text, Text_t pattern) {
if (text.length == 0) // special case
- return (List_t){.length=0};
+ return (List_t){.length = 0};
if (pattern.length == 0) // special case
return Text$clusters(text);
@@ -1226,16 +1126,15 @@ static List_t Pattern$split(Text_t text, Text_t pattern)
int64_t i = 0;
for (;;) {
int64_t len = 0;
- int64_t found = _find(text, pattern, i, text.length-1, &len, NULL);
- if (found == i && len == 0)
- found = _find(text, pattern, i + 1, text.length-1, &len, NULL);
+ int64_t found = _find(text, pattern, i, text.length - 1, &len, NULL);
+ if (found == i && len == 0) found = _find(text, pattern, i + 1, text.length - 1, &len, NULL);
if (found < 0) break;
- Text_t chunk = Text$slice(text, I(i+1), I(found));
+ Text_t chunk = Text$slice(text, I(i + 1), I(found));
List$insert(&chunks, &chunk, I_small(0), sizeof(Text_t));
i = MAX(found + len, i + 1);
}
- Text_t last_chunk = Text$slice(text, I(i+1), I(text.length));
+ Text_t last_chunk = Text$slice(text, I(i + 1), I(text.length));
List$insert(&chunks, &last_chunk, I_small(0), sizeof(Text_t));
return chunks;
@@ -1247,8 +1146,7 @@ typedef struct {
Text_t pattern;
} split_iter_state_t;
-static OptionalText_t next_split(split_iter_state_t *state)
-{
+static OptionalText_t next_split(split_iter_state_t *state) {
Text_t text = state->state.stack[0].text;
if (state->i >= text.length) {
if (state->pattern.length > 0 && state->i == text.length) { // special case
@@ -1259,37 +1157,34 @@ static OptionalText_t next_split(split_iter_state_t *state)
}
if (state->pattern.length == 0) { // special case
- Text_t ret = Text$cluster(text, I(state->i+1));
+ Text_t ret = Text$cluster(text, I(state->i + 1));
state->i += 1;
return ret;
}
int64_t start = state->i;
int64_t len = 0;
- int64_t found = _find(text, state->pattern, start, text.length-1, &len, NULL);
+ int64_t found = _find(text, state->pattern, start, text.length - 1, &len, NULL);
- if (found == start && len == 0)
- found = _find(text, state->pattern, start + 1, text.length-1, &len, NULL);
+ if (found == start && len == 0) found = _find(text, state->pattern, start + 1, text.length - 1, &len, NULL);
if (found >= 0) {
state->i = MAX(found + len, state->i + 1);
- return Text$slice(text, I(start+1), I(found));
+ return Text$slice(text, I(start + 1), I(found));
} else {
state->i = state->state.stack[0].text.length + 1;
- return Text$slice(text, I(start+1), I(text.length));
+ return Text$slice(text, I(start + 1), I(text.length));
}
}
-static Closure_t Pattern$by_split(Text_t text, Text_t pattern)
-{
+static Closure_t Pattern$by_split(Text_t text, Text_t pattern) {
return (Closure_t){
- .fn=(void*)next_split,
- .userdata=new(split_iter_state_t, .state=NEW_TEXT_ITER_STATE(text), .i=0, .pattern=pattern),
+ .fn = (void *)next_split,
+ .userdata = new (split_iter_state_t, .state = NEW_TEXT_ITER_STATE(text), .i = 0, .pattern = pattern),
};
}
-static Text_t Pattern$escape_text(Text_t text)
-{
+static Text_t Pattern$escape_text(Text_t text) {
// TODO: optimize for spans of non-escaped text
Text_t ret = EMPTY_TEXT;
TextIter_t state = NEW_TEXT_ITER_STATE(text);
@@ -1297,23 +1192,21 @@ static Text_t Pattern$escape_text(Text_t text)
uint32_t g = Text$get_main_grapheme_fast(&state, i);
if (g == '{') {
ret = Text$concat(ret, Text("{1{}"));
- } else if (g == '?'
- || uc_is_property_quotation_mark(g)
+ } else if (g == '?' || uc_is_property_quotation_mark(g)
|| (uc_is_property_paired_punctuation(g) && uc_is_property_left_of_pair(g))) {
- ret = Text$concat(ret, Text("{1"), Text$slice(text, I(i+1), I(i+1)), Text("}"));
+ ret = Text$concat(ret, Text("{1"), Text$slice(text, I(i + 1), I(i + 1)), Text("}"));
} else {
- ret = Text$concat(ret, Text$slice(text, I(i+1), I(i+1)));
+ ret = Text$concat(ret, Text$slice(text, I(i + 1), I(i + 1)));
}
}
return ret;
}
-static Text_t Pattern$as_text(const void *obj, bool colorize, const TypeInfo_t *info)
-{
+static Text_t Pattern$as_text(const void *obj, bool colorize, const TypeInfo_t *info) {
(void)info;
if (!obj) return Text("Pattern");
- Text_t pat = *(Text_t*)obj;
+ Text_t pat = *(Text_t *)obj;
Text_t quote = Pattern$has(pat, Text("/")) && !Pattern$has(pat, Text("|")) ? Text("|") : Text("/");
return Text$concat(colorize ? Text("\x1b[1m$\033[m") : Text("$"), Text$quoted(pat, colorize, quote));
}
diff --git a/lib/random/chacha.h b/lib/random/chacha.h
index b803c24a..a98c0b87 100644
--- a/lib/random/chacha.h
+++ b/lib/random/chacha.h
@@ -11,9 +11,8 @@ Public domain.
typedef unsigned char u8;
typedef unsigned int u32;
-typedef struct
-{
- u32 input[16]; /* could be compressed */
+typedef struct {
+ u32 input[16]; /* could be compressed */
} chacha_ctx;
#define KEYSZ 32
@@ -25,172 +24,167 @@ typedef struct
#define U8V(v) ((u8)(v) & U8C(0xFF))
#define U32V(v) ((u32)(v) & U32C(0xFFFFFFFF))
-#define ROTL32(v, n) \
- (U32V((v) << (n)) | ((v) >> (32 - (n))))
+#define ROTL32(v, n) (U32V((v) << (n)) | ((v) >> (32 - (n))))
-#define U8TO32_LITTLE(p) \
- (((u32)((p)[0]) ) | \
- ((u32)((p)[1]) << 8) | \
- ((u32)((p)[2]) << 16) | \
- ((u32)((p)[3]) << 24))
+#define U8TO32_LITTLE(p) (((u32)((p)[0])) | ((u32)((p)[1]) << 8) | ((u32)((p)[2]) << 16) | ((u32)((p)[3]) << 24))
-#define U32TO8_LITTLE(p, v) \
- do { \
- (p)[0] = U8V((v) ); \
- (p)[1] = U8V((v) >> 8); \
- (p)[2] = U8V((v) >> 16); \
- (p)[3] = U8V((v) >> 24); \
- } while (0)
+#define U32TO8_LITTLE(p, v) \
+ do { \
+ (p)[0] = U8V((v)); \
+ (p)[1] = U8V((v) >> 8); \
+ (p)[2] = U8V((v) >> 16); \
+ (p)[3] = U8V((v) >> 24); \
+ } while (0)
#define ROTATE(v, c) (ROTL32(v, c))
#define XOR(v, w) ((v) ^ (w))
#define PLUS(v, w) (U32V((v) + (w)))
#define PLUSONE(v) (PLUS((v), 1))
-#define QUARTERROUND(a, b, c, d) \
- a = PLUS(a, b); d = ROTATE(XOR(d, a), 16); \
- c = PLUS(c, d); b = ROTATE(XOR(b, c), 12); \
- a = PLUS(a, b); d = ROTATE(XOR(d, a), 8); \
- c = PLUS(c, d); b = ROTATE(XOR(b, c), 7);
+#define QUARTERROUND(a, b, c, d) \
+ a = PLUS(a, b); \
+ d = ROTATE(XOR(d, a), 16); \
+ c = PLUS(c, d); \
+ b = ROTATE(XOR(b, c), 12); \
+ a = PLUS(a, b); \
+ d = ROTATE(XOR(d, a), 8); \
+ c = PLUS(c, d); \
+ b = ROTATE(XOR(b, c), 7);
static const char sigma[16] = "expand 32-byte k";
-static void
-chacha_keysetup(chacha_ctx *chacha, const u8 *k)
-{
- chacha->input[0] = U8TO32_LITTLE(sigma + 0);
- chacha->input[1] = U8TO32_LITTLE(sigma + 4);
- chacha->input[2] = U8TO32_LITTLE(sigma + 8);
- chacha->input[3] = U8TO32_LITTLE(sigma + 12);
- chacha->input[4] = U8TO32_LITTLE(k + 0);
- chacha->input[5] = U8TO32_LITTLE(k + 4);
- chacha->input[6] = U8TO32_LITTLE(k + 8);
- chacha->input[7] = U8TO32_LITTLE(k + 12);
- chacha->input[8] = U8TO32_LITTLE(k + 16);
- chacha->input[9] = U8TO32_LITTLE(k + 20);
- chacha->input[10] = U8TO32_LITTLE(k + 24);
- chacha->input[11] = U8TO32_LITTLE(k + 28);
+static void chacha_keysetup(chacha_ctx *chacha, const u8 *k) {
+ chacha->input[0] = U8TO32_LITTLE(sigma + 0);
+ chacha->input[1] = U8TO32_LITTLE(sigma + 4);
+ chacha->input[2] = U8TO32_LITTLE(sigma + 8);
+ chacha->input[3] = U8TO32_LITTLE(sigma + 12);
+ chacha->input[4] = U8TO32_LITTLE(k + 0);
+ chacha->input[5] = U8TO32_LITTLE(k + 4);
+ chacha->input[6] = U8TO32_LITTLE(k + 8);
+ chacha->input[7] = U8TO32_LITTLE(k + 12);
+ chacha->input[8] = U8TO32_LITTLE(k + 16);
+ chacha->input[9] = U8TO32_LITTLE(k + 20);
+ chacha->input[10] = U8TO32_LITTLE(k + 24);
+ chacha->input[11] = U8TO32_LITTLE(k + 28);
}
-static void
-chacha_ivsetup(chacha_ctx *chacha, const u8 *iv)
-{
- chacha->input[12] = 0;
- chacha->input[13] = 0;
- chacha->input[14] = U8TO32_LITTLE(iv + 0);
- chacha->input[15] = U8TO32_LITTLE(iv + 4);
+static void chacha_ivsetup(chacha_ctx *chacha, const u8 *iv) {
+ chacha->input[12] = 0;
+ chacha->input[13] = 0;
+ chacha->input[14] = U8TO32_LITTLE(iv + 0);
+ chacha->input[15] = U8TO32_LITTLE(iv + 4);
}
-static void
-chacha_encrypt_bytes(chacha_ctx *chacha, const u8 *m, u8 *c, u32 bytes)
-{
- u32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15;
- u32 j0, j1, j2, j3, j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15;
- u8 *ctarget = (u8*)0;
- u8 tmp[64];
- unsigned int i;
-
- if (!bytes) return;
-
- j0 = chacha->input[0];
- j1 = chacha->input[1];
- j2 = chacha->input[2];
- j3 = chacha->input[3];
- j4 = chacha->input[4];
- j5 = chacha->input[5];
- j6 = chacha->input[6];
- j7 = chacha->input[7];
- j8 = chacha->input[8];
- j9 = chacha->input[9];
- j10 = chacha->input[10];
- j11 = chacha->input[11];
- j12 = chacha->input[12];
- j13 = chacha->input[13];
- j14 = chacha->input[14];
- j15 = chacha->input[15];
-
- for (;;) {
- if (bytes < 64) {
- for (i = 0;i < bytes;++i) tmp[i] = m[i];
- m = tmp;
- ctarget = c;
- c = tmp;
+static void chacha_encrypt_bytes(chacha_ctx *chacha, const u8 *m, u8 *c, u32 bytes) {
+ u32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15;
+ u32 j0, j1, j2, j3, j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15;
+ u8 *ctarget = (u8 *)0;
+ u8 tmp[64];
+ unsigned int i;
+
+ if (!bytes) return;
+
+ j0 = chacha->input[0];
+ j1 = chacha->input[1];
+ j2 = chacha->input[2];
+ j3 = chacha->input[3];
+ j4 = chacha->input[4];
+ j5 = chacha->input[5];
+ j6 = chacha->input[6];
+ j7 = chacha->input[7];
+ j8 = chacha->input[8];
+ j9 = chacha->input[9];
+ j10 = chacha->input[10];
+ j11 = chacha->input[11];
+ j12 = chacha->input[12];
+ j13 = chacha->input[13];
+ j14 = chacha->input[14];
+ j15 = chacha->input[15];
+
+ for (;;) {
+ if (bytes < 64) {
+ for (i = 0; i < bytes; ++i)
+ tmp[i] = m[i];
+ m = tmp;
+ ctarget = c;
+ c = tmp;
+ }
+ x0 = j0;
+ x1 = j1;
+ x2 = j2;
+ x3 = j3;
+ x4 = j4;
+ x5 = j5;
+ x6 = j6;
+ x7 = j7;
+ x8 = j8;
+ x9 = j9;
+ x10 = j10;
+ x11 = j11;
+ x12 = j12;
+ x13 = j13;
+ x14 = j14;
+ x15 = j15;
+ for (i = 20; i > 0; i -= 2) {
+ QUARTERROUND(x0, x4, x8, x12)
+ QUARTERROUND(x1, x5, x9, x13)
+ QUARTERROUND(x2, x6, x10, x14)
+ QUARTERROUND(x3, x7, x11, x15)
+ QUARTERROUND(x0, x5, x10, x15)
+ QUARTERROUND(x1, x6, x11, x12)
+ QUARTERROUND(x2, x7, x8, x13)
+ QUARTERROUND(x3, x4, x9, x14)
+ }
+ x0 = PLUS(x0, j0);
+ x1 = PLUS(x1, j1);
+ x2 = PLUS(x2, j2);
+ x3 = PLUS(x3, j3);
+ x4 = PLUS(x4, j4);
+ x5 = PLUS(x5, j5);
+ x6 = PLUS(x6, j6);
+ x7 = PLUS(x7, j7);
+ x8 = PLUS(x8, j8);
+ x9 = PLUS(x9, j9);
+ x10 = PLUS(x10, j10);
+ x11 = PLUS(x11, j11);
+ x12 = PLUS(x12, j12);
+ x13 = PLUS(x13, j13);
+ x14 = PLUS(x14, j14);
+ x15 = PLUS(x15, j15);
+
+ j12 = PLUSONE(j12);
+ if (!j12) {
+ j13 = PLUSONE(j13);
+ /* stopping at 2^70 bytes per nonce is user's responsibility */
+ }
+
+ U32TO8_LITTLE(c + 0, x0);
+ U32TO8_LITTLE(c + 4, x1);
+ U32TO8_LITTLE(c + 8, x2);
+ U32TO8_LITTLE(c + 12, x3);
+ U32TO8_LITTLE(c + 16, x4);
+ U32TO8_LITTLE(c + 20, x5);
+ U32TO8_LITTLE(c + 24, x6);
+ U32TO8_LITTLE(c + 28, x7);
+ U32TO8_LITTLE(c + 32, x8);
+ U32TO8_LITTLE(c + 36, x9);
+ U32TO8_LITTLE(c + 40, x10);
+ U32TO8_LITTLE(c + 44, x11);
+ U32TO8_LITTLE(c + 48, x12);
+ U32TO8_LITTLE(c + 52, x13);
+ U32TO8_LITTLE(c + 56, x14);
+ U32TO8_LITTLE(c + 60, x15);
+
+ if (bytes <= 64) {
+ if (bytes < 64) {
+ for (i = 0; i < bytes; ++i)
+ ctarget[i] = c[i];
+ }
+ chacha->input[12] = j12;
+ chacha->input[13] = j13;
+ return;
+ }
+ bytes -= 64;
+ c += 64;
}
- x0 = j0;
- x1 = j1;
- x2 = j2;
- x3 = j3;
- x4 = j4;
- x5 = j5;
- x6 = j6;
- x7 = j7;
- x8 = j8;
- x9 = j9;
- x10 = j10;
- x11 = j11;
- x12 = j12;
- x13 = j13;
- x14 = j14;
- x15 = j15;
- for (i = 20;i > 0;i -= 2) {
- QUARTERROUND( x0, x4, x8, x12)
- QUARTERROUND( x1, x5, x9, x13)
- QUARTERROUND( x2, x6, x10, x14)
- QUARTERROUND( x3, x7, x11, x15)
- QUARTERROUND( x0, x5, x10, x15)
- QUARTERROUND( x1, x6, x11, x12)
- QUARTERROUND( x2, x7, x8, x13)
- QUARTERROUND( x3, x4, x9, x14)
- }
- x0 = PLUS(x0, j0);
- x1 = PLUS(x1, j1);
- x2 = PLUS(x2, j2);
- x3 = PLUS(x3, j3);
- x4 = PLUS(x4, j4);
- x5 = PLUS(x5, j5);
- x6 = PLUS(x6, j6);
- x7 = PLUS(x7, j7);
- x8 = PLUS(x8, j8);
- x9 = PLUS(x9, j9);
- x10 = PLUS(x10, j10);
- x11 = PLUS(x11, j11);
- x12 = PLUS(x12, j12);
- x13 = PLUS(x13, j13);
- x14 = PLUS(x14, j14);
- x15 = PLUS(x15, j15);
-
- j12 = PLUSONE(j12);
- if (!j12) {
- j13 = PLUSONE(j13);
- /* stopping at 2^70 bytes per nonce is user's responsibility */
- }
-
- U32TO8_LITTLE(c + 0, x0);
- U32TO8_LITTLE(c + 4, x1);
- U32TO8_LITTLE(c + 8, x2);
- U32TO8_LITTLE(c + 12, x3);
- U32TO8_LITTLE(c + 16, x4);
- U32TO8_LITTLE(c + 20, x5);
- U32TO8_LITTLE(c + 24, x6);
- U32TO8_LITTLE(c + 28, x7);
- U32TO8_LITTLE(c + 32, x8);
- U32TO8_LITTLE(c + 36, x9);
- U32TO8_LITTLE(c + 40, x10);
- U32TO8_LITTLE(c + 44, x11);
- U32TO8_LITTLE(c + 48, x12);
- U32TO8_LITTLE(c + 52, x13);
- U32TO8_LITTLE(c + 56, x14);
- U32TO8_LITTLE(c + 60, x15);
-
- if (bytes <= 64) {
- if (bytes < 64) {
- for (i = 0;i < bytes;++i) ctarget[i] = c[i];
- }
- chacha->input[12] = j12;
- chacha->input[13] = j13;
- return;
- }
- bytes -= 64;
- c += 64;
- }
}
diff --git a/lib/random/sysrandom.h b/lib/random/sysrandom.h
index d276be45..8acec5b7 100644
--- a/lib/random/sysrandom.h
+++ b/lib/random/sysrandom.h
@@ -11,5 +11,5 @@ static ssize_t getrandom(void *buf, size_t buflen, unsigned int flags) {
// Use getrandom()
#include <sys/random.h>
#else
- #error "Unsupported platform for secure random number generation"
+#error "Unsupported platform for secure random number generation"
#endif
diff --git a/lib/time/time_defs.h b/lib/time/time_defs.h
index 2ae1d9e4..214c26b9 100644
--- a/lib/time/time_defs.h
+++ b/lib/time/time_defs.h
@@ -7,25 +7,26 @@ typedef struct timeval Time;
static OptionalText_t _local_timezone = NONE_TEXT;
-static INLINE Text_t num_format(long n, const char *unit)
-{
- if (n == 0)
- return Text("now");
- return Text$from_str(String((int64_t)labs(n), " ", unit, (n == -1 || n == 1) ? "" : "s", n <= 0 ? " ago" : " later"));
+static INLINE Text_t num_format(long n, const char *unit) {
+ if (n == 0) return Text("now");
+ return Text$from_str(
+ String((int64_t)labs(n), " ", unit, (n == -1 || n == 1) ? "" : "s", n <= 0 ? " ago" : " later"));
}
-static void set_local_timezone(Text_t tz)
-{
+static void set_local_timezone(Text_t tz) {
setenv("TZ", Text$as_c_string(tz), 1);
_local_timezone = tz;
tzset();
}
-#define WITH_TIMEZONE(tz, body) ({ if (tz.length >= 0) { \
- OptionalText_t old_timezone = _local_timezone; \
- set_local_timezone(tz); \
- body; \
- set_local_timezone(old_timezone); \
- } else { \
- body; \
- }})
+#define WITH_TIMEZONE(tz, body) \
+ ({ \
+ if (tz.length >= 0) { \
+ OptionalText_t old_timezone = _local_timezone; \
+ set_local_timezone(tz); \
+ body; \
+ set_local_timezone(old_timezone); \
+ } else { \
+ body; \
+ } \
+ })