From c72b0406a32ffc3f04324f7b6c321486762fca41 Mon Sep 17 00:00:00 2001 From: Bruce Hill Date: Sat, 16 Aug 2025 17:21:01 -0400 Subject: Improved parsing and prefix/suffix matching using a `remainder` parameter --- src/stdlib/bools.c | 39 ++++++++++++++++++++++++++------------- src/stdlib/bools.h | 2 +- src/stdlib/bytes.c | 13 +++++++++++++ src/stdlib/bytes.h | 1 + src/stdlib/integers.c | 36 ++++++++++++++++++++++++++++++++---- src/stdlib/integers.h | 4 ++-- src/stdlib/nums.c | 23 +++++++++++++++++------ src/stdlib/nums.h | 4 ++-- src/stdlib/paths.c | 4 ++-- src/stdlib/stdlib.c | 14 +++++++------- src/stdlib/text.c | 24 ++++++++++++++++++------ src/stdlib/text.h | 4 ++-- 12 files changed, 123 insertions(+), 45 deletions(-) (limited to 'src/stdlib') diff --git a/src/stdlib/bools.c b/src/stdlib/bools.c index 66b7e209..85de0621 100644 --- a/src/stdlib/bools.c +++ b/src/stdlib/bools.c @@ -22,24 +22,37 @@ PUREFUNC public Text_t Bool$as_text(const void *b, bool colorize, const TypeInfo return *(Bool_t*)b ? Text("yes") : Text("no"); } -PUREFUNC public OptionalBool_t Bool$parse(Text_t text) +static bool try_parse(Text_t text, Text_t target, bool target_value, Text_t *remainder, bool *result) { - Text_t lang = Text("C"); - if (Text$equal_ignoring_case(text, Text("yes"), lang) - || Text$equal_ignoring_case(text, Text("on"), lang) - || Text$equal_ignoring_case(text, Text("true"), lang) - || Text$equal_ignoring_case(text, Text("1"), lang)) { - return yes; - } else if (Text$equal_ignoring_case(text, Text("no"), lang) - || Text$equal_ignoring_case(text, Text("off"), lang) - || Text$equal_ignoring_case(text, Text("false"), lang) - || Text$equal_ignoring_case(text, Text("0"), lang)) { - return no; + static const Text_t lang = Text("C"); + if (text.length < target.length) return false; + Text_t prefix = Text$to(text, Int$from_int64(target.length)); + if (Text$equal_ignoring_case(prefix, target, lang)) { + if (remainder) *remainder = Text$from(text, Int$from_int64(target.length + 1)); + else if (text.length > target.length) return false; + *result = target_value; + return true; } else { - return NONE_BOOL; + return false; } } +PUREFUNC public OptionalBool_t Bool$parse(Text_t text, Text_t *remainder) +{ + bool result; + if (try_parse(text, Text("yes"), true, remainder, &result) + || try_parse(text, Text("true"), true, remainder, &result) + || try_parse(text, Text("on"), true, remainder, &result) + || try_parse(text, Text("1"), true, remainder, &result) + || try_parse(text, Text("no"), false, remainder, &result) + || try_parse(text, Text("false"), false, remainder, &result) + || try_parse(text, Text("off"), false, remainder, &result) + || try_parse(text, Text("0"), false, remainder, &result)) + return result; + else + return NONE_BOOL; +} + static bool Bool$is_none(const void *b, const TypeInfo_t *info) { (void)info; diff --git a/src/stdlib/bools.h b/src/stdlib/bools.h index 6d0300d5..ae6c5feb 100644 --- a/src/stdlib/bools.h +++ b/src/stdlib/bools.h @@ -13,7 +13,7 @@ #define no (Bool_t)false PUREFUNC Text_t Bool$as_text(const void *b, bool colorize, const TypeInfo_t *type); -OptionalBool_t Bool$parse(Text_t text); +OptionalBool_t Bool$parse(Text_t text, Text_t *remainder); MACROLIKE Bool_t Bool$from_int(Int_t i) { return (i.small != 0); } MACROLIKE Bool_t Bool$from_int64(Int64_t i) { return (i != 0); } MACROLIKE Bool_t Bool$from_int32(Int32_t i) { return (i != 0); } diff --git a/src/stdlib/bytes.c b/src/stdlib/bytes.c index 48c8b93b..130a645f 100644 --- a/src/stdlib/bytes.c +++ b/src/stdlib/bytes.c @@ -3,6 +3,7 @@ #include #include "bytes.h" +#include "integers.h" #include "stdlib.h" #include "text.h" #include "util.h" @@ -29,6 +30,18 @@ public CONSTFUNC bool Byte$is_between(const Byte_t x, const Byte_t low, const By return low <= x && x <= high; } +public OptionalByte_t Byte$parse(Text_t text, Text_t *remainder) +{ + OptionalInt_t full_int = Int$parse(text, remainder); + if (full_int.small != 0L + && Int$compare_value(full_int, I(0)) >= 0 + && Int$compare_value(full_int, I(255)) <= 0) { + return (OptionalByte_t){.value=Byte$from_int(full_int, true)}; + } else { + return NONE_BYTE; + } +} + public Text_t Byte$hex(Byte_t byte, bool uppercase, bool prefix) { struct Text_s text = {.tag=TEXT_ASCII}; text.ascii = GC_MALLOC_ATOMIC(8); diff --git a/src/stdlib/bytes.h b/src/stdlib/bytes.h index e733c274..ab88b5bc 100644 --- a/src/stdlib/bytes.h +++ b/src/stdlib/bytes.h @@ -19,6 +19,7 @@ Byte_t Byte$from_int(Int_t i, bool truncate); Byte_t Byte$from_int64(int64_t i, bool truncate); Byte_t Byte$from_int32(int32_t i, bool truncate); Byte_t Byte$from_int16(int16_t i, bool truncate); +OptionalByte_t Byte$parse(Text_t text, Text_t *remainder); Closure_t Byte$to(Byte_t first, Byte_t last, OptionalInt8_t step); MACROLIKE Byte_t Byte$from_int8(int8_t i) { return (Byte_t)i; } MACROLIKE Byte_t Byte$from_bool(bool b) { return (Byte_t)b; } diff --git a/src/stdlib/integers.c b/src/stdlib/integers.c index 018798ec..86be790d 100644 --- a/src/stdlib/integers.c +++ b/src/stdlib/integers.c @@ -424,8 +424,36 @@ public Int_t Int$from_str(const char *str) { return Int$from_mpz(i); } -public OptionalInt_t Int$parse(Text_t text) { - return Int$from_str(Text$as_c_string(text)); +public OptionalInt_t Int$parse(Text_t text, Text_t *remainder) { + const char *str = Text$as_c_string(text); + mpz_t i; + int result; + if (strncmp(str, "0x", 2) == 0) { + const char *end = str + 2 + strcspn(str + 2, "0123456789abcdefABCDEF"); + if (remainder) *remainder = Text$from_str(end); + else if (*end != '\0') return NONE_INT; + result = mpz_init_set_str(i, str + 2, 16); + } else if (strncmp(str, "0o", 2) == 0) { + const char *end = str + 2 + strcspn(str + 2, "01234567"); + if (remainder) *remainder = Text$from_str(end); + else if (*end != '\0') return NONE_INT; + result = mpz_init_set_str(i, str + 2, 8); + } else if (strncmp(str, "0b", 2) == 0) { + const char *end = str + 2 + strcspn(str + 2, "01"); + if (remainder) *remainder = Text$from_str(end); + else if (*end != '\0') return NONE_INT; + result = mpz_init_set_str(i, str + 2, 2); + } else { + const char *end = str + 2 + strcspn(str + 2, "0123456789"); + if (remainder) *remainder = Text$from_str(end); + else if (*end != '\0') return NONE_INT; + result = mpz_init_set_str(i, str, 10); + } + if (result != 0) { + if (remainder) *remainder = text; + return NONE_INT; + } + return Int$from_mpz(i); } public bool Int$is_prime(Int_t x, Int_t reps) @@ -670,8 +698,8 @@ public void Int32$deserialize(FILE *in, void *outval, List_t *pointers, const Ty range->step = step; \ return (Closure_t){.fn=_next_##KindOfInt, .userdata=range}; \ } \ - public PUREFUNC Optional ## KindOfInt ## _t KindOfInt ## $parse(Text_t text) { \ - OptionalInt_t full_int = Int$parse(text); \ + public PUREFUNC Optional ## KindOfInt ## _t KindOfInt ## $parse(Text_t text, Text_t *remainder) { \ + OptionalInt_t full_int = Int$parse(text, remainder); \ if (full_int.small == 0L) return (Optional ## KindOfInt ## _t){.is_none=true}; \ if (Int$compare_value(full_int, I(min_val)) < 0) { \ return (Optional ## KindOfInt ## _t){.is_none=true}; \ diff --git a/src/stdlib/integers.h b/src/stdlib/integers.h index beb26bd6..50ca485f 100644 --- a/src/stdlib/integers.h +++ b/src/stdlib/integers.h @@ -32,7 +32,7 @@ bool type_name ## $get_bit(c_type x, Int_t bit_index); \ Closure_t type_name ## $to(c_type first, c_type last, Optional ## type_name ## _t step); \ Closure_t type_name ## $onward(c_type first, c_type step); \ - PUREFUNC Optional ## type_name ## _t type_name ## $parse(Text_t text); \ + PUREFUNC Optional ## type_name ## _t type_name ## $parse(Text_t text, Text_t *remainder); \ CONSTFUNC bool type_name ## $is_between(const c_type x, const c_type low, const c_type high); \ CONSTFUNC c_type type_name ## $clamped(c_type x, c_type min, c_type max); \ MACROLIKE CONSTFUNC c_type type_name ## $from_byte(Byte_t b) { return (c_type)b; } \ @@ -101,7 +101,7 @@ Text_t Int$octal(Int_t i, Int_t digits, bool prefix); PUREFUNC Closure_t Int$to(Int_t first, Int_t last, OptionalInt_t step); PUREFUNC Closure_t Int$onward(Int_t first, Int_t step); OptionalInt_t Int$from_str(const char *str); -OptionalInt_t Int$parse(Text_t text); +OptionalInt_t Int$parse(Text_t text, Text_t *remainder); Int_t Int$abs(Int_t x); Int_t Int$power(Int_t base, Int_t exponent); Int_t Int$gcd(Int_t x, Int_t y); diff --git a/src/stdlib/nums.c b/src/stdlib/nums.c index 34fbb162..3775c8f4 100644 --- a/src/stdlib/nums.c +++ b/src/stdlib/nums.c @@ -98,14 +98,20 @@ public CONSTFUNC double Num$clamped(double x, double low, double high) { return (x <= low) ? low : (x >= high ? high : x); } -public OptionalNum_t Num$parse(Text_t text) { +public OptionalNum_t Num$parse(Text_t text, Text_t *remainder) { const char *str = Text$as_c_string(text); char *end = NULL; double d = strtod(str, &end); - if (end > str && end[0] == '\0') + if (end > str) { + if (remainder) + *remainder = Text$from_str(end); + else if (*end != '\0') + return nan("none"); return d; - else + } else { + if (remainder) *remainder = text; return nan("none"); + } } static bool Num$is_none(const void *n, const TypeInfo_t *info) @@ -203,14 +209,19 @@ public CONSTFUNC float Num32$clamped(float x, float low, float high) { return (x <= low) ? low : (x >= high ? high : x); } -public OptionalNum32_t Num32$parse(Text_t text) { +public OptionalNum32_t Num32$parse(Text_t text, Text_t *remainder) { const char *str = Text$as_c_string(text); char *end = NULL; double d = strtod(str, &end); - if (end > str && end[0] == '\0') + if (end > str && end[0] == '\0') { + if (remainder) *remainder = Text$from_str(end); + else if (*end != '\0') + return nan("none"); return d; - else + } else { + if (remainder) *remainder = text; return nan("none"); + } } static bool Num32$is_none(const void *n, const TypeInfo_t *info) diff --git a/src/stdlib/nums.h b/src/stdlib/nums.h index fdd9e227..fe76d1c3 100644 --- a/src/stdlib/nums.h +++ b/src/stdlib/nums.h @@ -30,7 +30,7 @@ CONSTFUNC bool Num$finite(double n); CONSTFUNC bool Num$isnan(double n); double Num$nan(Text_t tag); CONSTFUNC double Num$mix(double amount, double x, double y); -OptionalNum_t Num$parse(Text_t text); +OptionalNum_t Num$parse(Text_t text, Text_t *remainder); CONSTFUNC bool Num$is_between(const double x, const double low, const double high); CONSTFUNC double Num$clamped(double x, double low, double high); MACROLIKE CONSTFUNC double Num$from_num32(Num32_t n) { return (double)n; } @@ -83,7 +83,7 @@ CONSTFUNC bool Num32$isinf(float n); CONSTFUNC bool Num32$finite(float n); CONSTFUNC bool Num32$isnan(float n); CONSTFUNC float Num32$mix(float amount, float x, float y); -OptionalNum32_t Num32$parse(Text_t text); +OptionalNum32_t Num32$parse(Text_t text, Text_t *remainder); float Num32$nan(Text_t tag); CONSTFUNC bool Num32$is_between(const float x, const float low, const float high); CONSTFUNC float Num32$clamped(float x, float low, float high); diff --git a/src/stdlib/paths.c b/src/stdlib/paths.c index 94baf995..58702ec7 100644 --- a/src/stdlib/paths.c +++ b/src/stdlib/paths.c @@ -619,10 +619,10 @@ public bool Path$has_extension(Path_t path, Text_t extension) if (extension.length == 0) return !Text$has(Text$from(last, I(2)), Text(".")) || Text$equal_values(last, Text("..")); - if (!Text$starts_with(extension, Text("."))) + if (!Text$starts_with(extension, Text("."), NULL)) extension = Texts(Text("."), extension); - return Text$ends_with(Text$from(last, I(2)), extension); + return Text$ends_with(Text$from(last, I(2)), extension, NULL); } public Path_t Path$child(Path_t path, Text_t name) diff --git a/src/stdlib/stdlib.c b/src/stdlib/stdlib.c index 2b4bd99c..02ccd710 100644 --- a/src/stdlib/stdlib.c +++ b/src/stdlib/stdlib.c @@ -90,37 +90,37 @@ static bool parse_single_arg(const TypeInfo_t *info, char *arg, void *dest) *(OptionalInt_t*)dest = parsed; return parsed.small != 0; } else if (info == &Int64$info) { - OptionalInt64_t parsed = Int64$parse(Text$from_str(arg)); + OptionalInt64_t parsed = Int64$parse(Text$from_str(arg), NULL); if (!parsed.is_none) *(OptionalInt64_t*)dest = parsed; return !parsed.is_none; } else if (info == &Int32$info) { - OptionalInt32_t parsed = Int32$parse(Text$from_str(arg)); + OptionalInt32_t parsed = Int32$parse(Text$from_str(arg), NULL); if (!parsed.is_none) *(OptionalInt32_t*)dest = parsed; return !parsed.is_none; } else if (info == &Int16$info) { - OptionalInt16_t parsed = Int16$parse(Text$from_str(arg)); + OptionalInt16_t parsed = Int16$parse(Text$from_str(arg), NULL); if (!parsed.is_none) *(OptionalInt16_t*)dest = parsed; return !parsed.is_none; } else if (info == &Int8$info) { - OptionalInt8_t parsed = Int8$parse(Text$from_str(arg)); + OptionalInt8_t parsed = Int8$parse(Text$from_str(arg), NULL); if (!parsed.is_none) *(OptionalInt8_t*)dest = parsed; return !parsed.is_none; } else if (info == &Bool$info) { - OptionalBool_t parsed = Bool$parse(Text$from_str(arg)); + OptionalBool_t parsed = Bool$parse(Text$from_str(arg), NULL); if (parsed != NONE_BOOL) *(OptionalBool_t*)dest = parsed; return parsed != NONE_BOOL; } else if (info == &Num$info) { - OptionalNum_t parsed = Num$parse(Text$from_str(arg)); + OptionalNum_t parsed = Num$parse(Text$from_str(arg), NULL); if (!isnan(parsed)) *(OptionalNum_t*)dest = parsed; return !isnan(parsed); } else if (info == &Num32$info) { - OptionalNum32_t parsed = Num32$parse(Text$from_str(arg)); + OptionalNum32_t parsed = Num32$parse(Text$from_str(arg), NULL); if (!isnan(parsed)) *(OptionalNum32_t*)dest = parsed; return !isnan(parsed); diff --git a/src/stdlib/text.c b/src/stdlib/text.c index 80c267ed..8ef0874e 100644 --- a/src/stdlib/text.c +++ b/src/stdlib/text.c @@ -1102,30 +1102,42 @@ bool _matches(TextIter_t *text_state, TextIter_t *target_state, int64_t pos) return true; } -PUREFUNC public bool Text$starts_with(Text_t text, Text_t prefix) +PUREFUNC public bool Text$starts_with(Text_t text, Text_t prefix, Text_t *remainder) { if (text.length < prefix.length) return false; TextIter_t text_state = NEW_TEXT_ITER_STATE(text), prefix_state = NEW_TEXT_ITER_STATE(prefix); - return _matches(&text_state, &prefix_state, 0); + if (_matches(&text_state, &prefix_state, 0)) { + if (remainder) *remainder = Text$from(text, Int$from_int64(prefix.length + 1)); + return true; + } else { + if (remainder) *remainder = text; + return false; + } } -PUREFUNC public bool Text$ends_with(Text_t text, Text_t suffix) +PUREFUNC public bool Text$ends_with(Text_t text, Text_t suffix, Text_t *remainder) { if (text.length < suffix.length) return false; TextIter_t text_state = NEW_TEXT_ITER_STATE(text), suffix_state = NEW_TEXT_ITER_STATE(suffix); - return _matches(&text_state, &suffix_state, text.length - suffix.length); + if (_matches(&text_state, &suffix_state, text.length - suffix.length)) { + if (remainder) *remainder = Text$to(text, Int$from_int64(text.length - suffix.length)); + return true; + } else { + if (remainder) *remainder = text; + return false; + } } public Text_t Text$without_prefix(Text_t text, Text_t prefix) { - return Text$starts_with(text, prefix) ? Text$slice(text, I(prefix.length + 1), I(text.length)) : text; + return Text$starts_with(text, prefix, false) ? Text$slice(text, I(prefix.length + 1), I(text.length)) : text; } public Text_t Text$without_suffix(Text_t text, Text_t suffix) { - return Text$ends_with(text, suffix) ? Text$slice(text, I(1), I(text.length - suffix.length)) : text; + return Text$ends_with(text, suffix, false) ? Text$slice(text, I(1), I(text.length - suffix.length)) : text; } static bool _has_grapheme(TextIter_t *text, int32_t g) diff --git a/src/stdlib/text.h b/src/stdlib/text.h index 642a74b6..637a3db7 100644 --- a/src/stdlib/text.h +++ b/src/stdlib/text.h @@ -59,8 +59,8 @@ Text_t Text$lower(Text_t text, Text_t language); Text_t Text$title(Text_t text, Text_t language); Text_t Text$as_text(const void *text, bool colorize, const TypeInfo_t *info); Text_t Text$quoted(Text_t str, bool colorize, Text_t quotation_mark); -PUREFUNC bool Text$starts_with(Text_t text, Text_t prefix); -PUREFUNC bool Text$ends_with(Text_t text, Text_t suffix); +PUREFUNC bool Text$starts_with(Text_t text, Text_t prefix, Text_t *remainder); +PUREFUNC bool Text$ends_with(Text_t text, Text_t suffix, Text_t *remainder); Text_t Text$without_prefix(Text_t text, Text_t prefix); Text_t Text$without_suffix(Text_t text, Text_t suffix); Text_t Text$replace(Text_t text, Text_t target, Text_t replacement); -- cgit v1.2.3