1 // Type info and methods for Text datatype, which uses a struct inspired by
2 // Raku's string representation and libunistr
10 #include "integers.h" // IWYU pragma: export
12 #include "nums.h" // IWYU pragma: export
16 #define MAX_TEXT_DEPTH 48
22 } stack[MAX_TEXT_DEPTH];
26 #define NEW_TEXT_ITER_STATE(t) (TextIter_t){.stack = {{t, 0}}, .stack_index = 0}
28 #define Text(str) ((Text_t){.length = sizeof(str) - 1, .tag = TEXT_ASCII, .ascii = "" str})
30 #define EMPTY_TEXT ((Text_t){.length = 0, .tag = TEXT_ASCII, .ascii = 0})
32 static inline Text_t Text_from_str_literal(const char *str) {
33 return (Text_t){.length = strlen(str), .tag = TEXT_ASCII, .ascii = str};
36 static inline Text_t Text_from_text(Text_t t) {
40 #define convert_to_text(x) \
42 Text_t: Text_from_text, \
43 char *: Text$from_str, \
44 const char *: Text$from_str, \
45 int8_t: Int8$value_as_text, \
46 int16_t: Int16$value_as_text, \
47 int32_t: Int32$value_as_text, \
48 int64_t: Int64$value_as_text, \
49 double: Num$value_as_text, \
50 float: Num32$value_as_text, \
51 Int_t: Int$value_as_text)(x)
53 Text_t Text$_concat(int n, Text_t items[n]);
54 #define Text$concat(...) Text$_concat(sizeof((Text_t[]){__VA_ARGS__}) / sizeof(Text_t), (Text_t[]){__VA_ARGS__})
55 #define Texts(...) Text$concat(MAP_LIST(convert_to_text, __VA_ARGS__))
56 // This function is defined as an extern in `src/stdlib/print.h`
57 // int Text$print(FILE *stream, Text_t t);
58 Text_t Text$slice(Text_t text, Int_t first_int, Int_t last_int);
59 Text_t Text$from(Text_t text, Int_t first);
60 Text_t Text$to(Text_t text, Int_t last);
61 Text_t Text$reversed(Text_t text);
62 OptionalText_t Text$cluster(Text_t text, Int_t index_int);
63 #define Text$cluster_checked(text_expr, index_expr, start, end) \
65 const Text_t text = text_expr; \
66 Int_t index = index_expr; \
67 OptionalText_t cluster = Text$cluster(text, index); \
68 if (unlikely(cluster.tag == TEXT_NONE)) \
69 fail_source(__SOURCE_FILE__, start, end, \
70 Text$concat(Text("Invalid text index: "), convert_to_text(index), Text(" (text has length "), \
71 convert_to_text((int64_t)text.length), Text(")\n"))); \
74 OptionalText_t Text$from_str(const char *str);
75 OptionalText_t Text$from_strn(const char *str, size_t len);
76 PUREFUNC uint64_t Text$hash(const void *text, const TypeInfo_t *);
77 PUREFUNC int32_t Text$compare(const void *va, const void *vb, const TypeInfo_t *);
78 PUREFUNC bool Text$equal(const void *a, const void *b, const TypeInfo_t *);
79 PUREFUNC bool Text$equal_values(Text_t a, Text_t b);
80 PUREFUNC bool Text$equal_ignoring_case(Text_t a, Text_t b, Text_t language);
81 PUREFUNC bool Text$is_none(const void *t, const TypeInfo_t *);
82 Text_t Text$upper(Text_t text, Text_t language);
83 Text_t Text$lower(Text_t text, Text_t language);
84 Text_t Text$title(Text_t text, Text_t language);
85 Text_t Text$as_text(const void *text, bool colorize, const TypeInfo_t *info);
86 Text_t Text$escaped(Text_t text, bool colorize, Text_t extra_escapes);
87 Text_t Text$quoted(Text_t str, bool colorize, Text_t quotation_mark);
88 PUREFUNC bool Text$starts_with(Text_t text, Text_t prefix, Text_t *remainder);
89 PUREFUNC bool Text$ends_with(Text_t text, Text_t suffix, Text_t *remainder);
90 Text_t Text$without_prefix(Text_t text, Text_t prefix);
91 Text_t Text$without_suffix(Text_t text, Text_t suffix);
92 OptionalInt_t Text$find(Text_t text, Text_t target, Int_t start);
93 bool Text$matches_glob(Text_t text, Text_t glob);
94 Text_t Text$replace(Text_t text, Text_t target, Text_t replacement);
95 Text_t Text$translate(Text_t text, Table_t translations);
96 PUREFUNC bool Text$has(Text_t text, Text_t target);
97 List_t Text$split(Text_t text, Text_t delimiter);
98 List_t Text$split_any(Text_t text, Text_t delimiters);
99 Closure_t Text$by_split(Text_t text, Text_t delimiter);
100 Closure_t Text$by_split_any(Text_t text, Text_t delimiters);
101 Text_t Text$trim(Text_t text, Text_t to_trim, bool left, bool right);
102 const char *Text$as_c_string(Text_t text);
103 List_t Text$clusters(Text_t text);
104 List_t Text$utf8(Text_t text);
105 List_t Text$utf16(Text_t text);
106 List_t Text$utf32(Text_t text);
107 List_t Text$codepoint_names(Text_t text);
108 OptionalText_t Text$from_utf8(List_t units);
109 OptionalText_t Text$from_utf16(List_t units);
110 OptionalText_t Text$from_utf32(List_t codepoints);
111 OptionalText_t Text$from_codepoint_names(List_t codepoint_names);
112 List_t Text$lines(Text_t text);
113 Closure_t Text$by_line(Text_t text);
114 Text_t Text$join(Text_t glue, List_t pieces);
115 Text_t Text$repeat(Text_t text, Int_t count);
116 Int_t Text$width(Text_t text, Text_t language);
117 Text_t Text$left_pad(Text_t text, Int_t width, Text_t padding, Text_t language);
118 Text_t Text$right_pad(Text_t text, Int_t width, Text_t padding, Text_t language);
119 Text_t Text$middle_pad(Text_t text, Int_t width, Text_t padding, Text_t language);
120 double Text$distance(Text_t a, Text_t b, Text_t language);
121 int32_t Text$get_grapheme_fast(TextIter_t *state, int64_t index);
122 uint32_t Text$get_main_grapheme_fast(TextIter_t *state, int64_t index);
123 Int_t Text$memory_size(Text_t text);
124 Text_t Text$layout(Text_t text);
125 void Text$serialize(const void *obj, FILE *out, Table_t *, const TypeInfo_t *);
126 void Text$deserialize(FILE *in, void *out, List_t *, const TypeInfo_t *);
128 MACROLIKE int32_t Text$get_grapheme(Text_t text, int64_t index) {
129 TextIter_t state = NEW_TEXT_ITER_STATE(text);
130 return Text$get_grapheme_fast(&state, index);
133 extern const TypeInfo_t Text$info;
135 #define Text$metamethods \
137 .as_text = Text$as_text, \
139 .compare = Text$compare, \
140 .equal = Text$equal, \
141 .is_none = Text$is_none, \
142 .serialize = Text$serialize, \
143 .deserialize = Text$deserialize, \