text.h - tomo

(144 lines)
   1 // Type info and methods for Text datatype, which uses a struct inspired by
   2 // Raku's string representation and libunistr
   3 
   4 #pragma once
   5 
   6 #include <stdbool.h>
   7 #include <stdint.h>
   8 
   9 #include "datatypes.h"
  10 #include "integers.h" // IWYU pragma: export
  11 #include "mapmacro.h"
  12 #include "nums.h" // IWYU pragma: export
  13 #include "types.h"
  14 #include "util.h"
  15 
  16 #define MAX_TEXT_DEPTH 48
  17 
  18 typedef struct {
  19     struct {
  20         Text_t text;
  21         int64_t offset;
  22     } stack[MAX_TEXT_DEPTH];
  23     int64_t stack_index;
  24 } TextIter_t;
  25 
  26 #define NEW_TEXT_ITER_STATE(t) (TextIter_t){.stack = {{t, 0}}, .stack_index = 0}
  27 
  28 #define Text(str) ((Text_t){.length = sizeof(str) - 1, .tag = TEXT_ASCII, .ascii = "" str})
  29 
  30 #define EMPTY_TEXT ((Text_t){.length = 0, .tag = TEXT_ASCII, .ascii = 0})
  31 
  32 static inline Text_t Text_from_str_literal(const char *str) {
  33     return (Text_t){.length = strlen(str), .tag = TEXT_ASCII, .ascii = str};
  34 }
  35 
  36 static inline Text_t Text_from_text(Text_t t) {
  37     return t;
  38 }
  39 
  40 #define convert_to_text(x)                                                                                             \
  41     _Generic(x,                                                                                                        \
  42         Text_t: Text_from_text,                                                                                        \
  43         char *: Text$from_str,                                                                                         \
  44         const char *: Text$from_str,                                                                                   \
  45         int8_t: Int8$value_as_text,                                                                                    \
  46         int16_t: Int16$value_as_text,                                                                                  \
  47         int32_t: Int32$value_as_text,                                                                                  \
  48         int64_t: Int64$value_as_text,                                                                                  \
  49         double: Num$value_as_text,                                                                                     \
  50         float: Num32$value_as_text,                                                                                    \
  51         Int_t: Int$value_as_text)(x)
  52 
  53 Text_t Text$_concat(int n, Text_t items[n]);
  54 #define Text$concat(...) Text$_concat(sizeof((Text_t[]){__VA_ARGS__}) / sizeof(Text_t), (Text_t[]){__VA_ARGS__})
  55 #define Texts(...) Text$concat(MAP_LIST(convert_to_text, __VA_ARGS__))
  56 // This function is defined as an extern in `src/stdlib/print.h`
  57 // int Text$print(FILE *stream, Text_t t);
  58 Text_t Text$slice(Text_t text, Int_t first_int, Int_t last_int);
  59 Text_t Text$from(Text_t text, Int_t first);
  60 Text_t Text$to(Text_t text, Int_t last);
  61 Text_t Text$reversed(Text_t text);
  62 OptionalText_t Text$cluster(Text_t text, Int_t index_int);
  63 #define Text$cluster_checked(text_expr, index_expr, start, end)                                                        \
  64     ({                                                                                                                 \
  65         const Text_t text = text_expr;                                                                                 \
  66         Int_t index = index_expr;                                                                                      \
  67         OptionalText_t cluster = Text$cluster(text, index);                                                            \
  68         if (unlikely(cluster.tag == TEXT_NONE))                                                                        \
  69             fail_source(__SOURCE_FILE__, start, end,                                                                   \
  70                         Text$concat(Text("Invalid text index: "), convert_to_text(index), Text(" (text has length "),  \
  71                                     convert_to_text((int64_t)text.length), Text(")\n")));                              \
  72         cluster;                                                                                                       \
  73     })
  74 OptionalText_t Text$from_str(const char *str);
  75 OptionalText_t Text$from_strn(const char *str, size_t len);
  76 PUREFUNC uint64_t Text$hash(const void *text, const TypeInfo_t *);
  77 PUREFUNC int32_t Text$compare(const void *va, const void *vb, const TypeInfo_t *);
  78 PUREFUNC bool Text$equal(const void *a, const void *b, const TypeInfo_t *);
  79 PUREFUNC bool Text$equal_values(Text_t a, Text_t b);
  80 PUREFUNC bool Text$equal_ignoring_case(Text_t a, Text_t b, Text_t language);
  81 PUREFUNC bool Text$is_none(const void *t, const TypeInfo_t *);
  82 Text_t Text$upper(Text_t text, Text_t language);
  83 Text_t Text$lower(Text_t text, Text_t language);
  84 Text_t Text$title(Text_t text, Text_t language);
  85 Text_t Text$as_text(const void *text, bool colorize, const TypeInfo_t *info);
  86 Text_t Text$escaped(Text_t text, bool colorize, Text_t extra_escapes);
  87 Text_t Text$quoted(Text_t str, bool colorize, Text_t quotation_mark);
  88 PUREFUNC bool Text$starts_with(Text_t text, Text_t prefix, Text_t *remainder);
  89 PUREFUNC bool Text$ends_with(Text_t text, Text_t suffix, Text_t *remainder);
  90 Text_t Text$without_prefix(Text_t text, Text_t prefix);
  91 Text_t Text$without_suffix(Text_t text, Text_t suffix);
  92 OptionalInt_t Text$find(Text_t text, Text_t target, Int_t start);
  93 bool Text$matches_glob(Text_t text, Text_t glob);
  94 Text_t Text$replace(Text_t text, Text_t target, Text_t replacement);
  95 Text_t Text$translate(Text_t text, Table_t translations);
  96 PUREFUNC bool Text$has(Text_t text, Text_t target);
  97 List_t Text$split(Text_t text, Text_t delimiter);
  98 List_t Text$split_any(Text_t text, Text_t delimiters);
  99 Closure_t Text$by_split(Text_t text, Text_t delimiter);
 100 Closure_t Text$by_split_any(Text_t text, Text_t delimiters);
 101 Text_t Text$trim(Text_t text, Text_t to_trim, bool left, bool right);
 102 const char *Text$as_c_string(Text_t text);
 103 List_t Text$clusters(Text_t text);
 104 List_t Text$utf8(Text_t text);
 105 List_t Text$utf16(Text_t text);
 106 List_t Text$utf32(Text_t text);
 107 List_t Text$codepoint_names(Text_t text);
 108 OptionalText_t Text$from_utf8(List_t units);
 109 OptionalText_t Text$from_utf16(List_t units);
 110 OptionalText_t Text$from_utf32(List_t codepoints);
 111 OptionalText_t Text$from_codepoint_names(List_t codepoint_names);
 112 List_t Text$lines(Text_t text);
 113 Closure_t Text$by_line(Text_t text);
 114 Text_t Text$join(Text_t glue, List_t pieces);
 115 Text_t Text$repeat(Text_t text, Int_t count);
 116 Int_t Text$width(Text_t text, Text_t language);
 117 Text_t Text$left_pad(Text_t text, Int_t width, Text_t padding, Text_t language);
 118 Text_t Text$right_pad(Text_t text, Int_t width, Text_t padding, Text_t language);
 119 Text_t Text$middle_pad(Text_t text, Int_t width, Text_t padding, Text_t language);
 120 double Text$distance(Text_t a, Text_t b, Text_t language);
 121 int32_t Text$get_grapheme_fast(TextIter_t *state, int64_t index);
 122 uint32_t Text$get_main_grapheme_fast(TextIter_t *state, int64_t index);
 123 Int_t Text$memory_size(Text_t text);
 124 Text_t Text$layout(Text_t text);
 125 void Text$serialize(const void *obj, FILE *out, Table_t *, const TypeInfo_t *);
 126 void Text$deserialize(FILE *in, void *out, List_t *, const TypeInfo_t *);
 127 
 128 MACROLIKE int32_t Text$get_grapheme(Text_t text, int64_t index) {
 129     TextIter_t state = NEW_TEXT_ITER_STATE(text);
 130     return Text$get_grapheme_fast(&state, index);
 131 }
 132 
 133 extern const TypeInfo_t Text$info;
 134 
 135 #define Text$metamethods                                                                                               \
 136     {                                                                                                                  \
 137         .as_text = Text$as_text,                                                                                       \
 138         .hash = Text$hash,                                                                                             \
 139         .compare = Text$compare,                                                                                       \
 140         .equal = Text$equal,                                                                                           \
 141         .is_none = Text$is_none,                                                                                       \
 142         .serialize = Text$serialize,                                                                                   \
 143         .deserialize = Text$deserialize,                                                                               \
 144     }