aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBruce Hill <bruce@bruce-hill.com>2025-03-17 23:37:31 -0400
committerBruce Hill <bruce@bruce-hill.com>2025-03-17 23:37:31 -0400
commit451634ae23418b65945a9d9a3b1d895023940c18 (patch)
tree7d0c2dc1aec246e671c20c85844ce1e78d6c5d89
parentf6eda4cf855fdf613cbcde0b073adfde43b354e7 (diff)
Add text width support and have that work for padding
-rw-r--r--environment.c15
-rw-r--r--stdlib/text.c50
-rw-r--r--stdlib/text.h7
-rw-r--r--test/text.tm13
4 files changed, 62 insertions, 23 deletions
diff --git a/environment.c b/environment.c
index afa9524b..f43a9ca2 100644
--- a/environment.c
+++ b/environment.c
@@ -370,7 +370,7 @@ env_t *new_compilation_unit(CORD libname)
{"by_match", "Text$by_match", "func(text:Text, pattern:Pattern -> func(->Match?))"},
{"by_split", "Text$by_split", "func(text:Text, pattern=$Pattern'' -> func(->Text?))"},
{"bytes", "Text$utf8_bytes", "func(text:Text -> [Byte])"},
- {"caseless_equals", "Text$equal_ignoring_case", "func(a,b:Text, language=\"C\" -> Bool)"},
+ {"caseless_equals", "Text$equal_ignoring_case", "func(a,b:Text, language='C' -> Bool)"},
{"codepoint_names", "Text$codepoint_names", "func(text:Text -> [Text])"},
{"ends_with", "Text$ends_with", "func(text,suffix:Text -> Bool)"},
{"each", "Text$each", "func(text:Text, pattern:Pattern, fn:func(match:Match), recursive=yes)"},
@@ -384,26 +384,27 @@ env_t *new_compilation_unit(CORD libname)
{"from_text", "Path$from_text", "func(text:Text -> Path)"},
{"has", "Text$has", "func(text:Text, pattern:Pattern -> Bool)"},
{"join", "Text$join", "func(glue:Text, pieces:[Text] -> Text)"},
- {"left_pad", "Text$left_pad", "func(text:Text, count:Int, pad=\" \" -> Text)"},
+ {"left_pad", "Text$left_pad", "func(text:Text, count:Int, pad=' ', language='C' -> Text)"},
{"lines", "Text$lines", "func(text:Text -> [Text])"},
- {"lower", "Text$lower", "func(text:Text, language=\"C\" -> Text)"},
+ {"lower", "Text$lower", "func(text:Text, language='C' -> Text)"},
{"map", "Text$map", "func(text:Text, pattern:Pattern, fn:func(match:Match -> Text), recursive=yes -> Text)"},
{"matches", "Text$matches", "func(text:Text, pattern:Pattern -> [Text]?)"},
- {"middle_pad", "Text$middle_pad", "func(text:Text, count:Int, pad=\" \" -> Text)"},
+ {"middle_pad", "Text$middle_pad", "func(text:Text, count:Int, pad=' ', language='C' -> Text)"},
{"quoted", "Text$quoted", "func(text:Text, color=no -> Text)"},
{"repeat", "Text$repeat", "func(text:Text, count:Int -> Text)"},
{"replace", "Text$replace", "func(text:Text, pattern:Pattern, replacement:Text, backref=$/\\/, recursive=yes -> Text)"},
{"replace_all", "Text$replace_all", "func(text:Text, replacements:{Pattern,Text}, backref=$/\\/, recursive=yes -> Text)"},
{"reversed", "Text$reversed", "func(text:Text -> Text)"},
- {"right_pad", "Text$right_pad", "func(text:Text, count:Int, pad=\" \" -> Text)"},
+ {"right_pad", "Text$right_pad", "func(text:Text, count:Int, pad=' ', language='C' -> Text)"},
{"slice", "Text$slice", "func(text:Text, from=1, to=-1 -> Text)"},
{"split", "Text$split", "func(text:Text, pattern=$Pattern'' -> [Text])"},
{"starts_with", "Text$starts_with", "func(text,prefix:Text -> Bool)"},
- {"title", "Text$title", "func(text:Text, language=\"C\" -> Text)"},
+ {"title", "Text$title", "func(text:Text, language='C' -> Text)"},
{"to", "Text$to", "func(text:Text, last:Int -> Text)"},
{"trim", "Text$trim", "func(text:Text, pattern=$/{whitespace}/, trim_left=yes, trim_right=yes -> Text)"},
- {"upper", "Text$upper", "func(text:Text, language=\"C\" -> Text)"},
+ {"upper", "Text$upper", "func(text:Text, language='C' -> Text)"},
{"utf32_codepoints", "Text$utf32_codepoints", "func(text:Text -> [Int32])"},
+ {"width", "Text$width", "func(text:Text, language='C' -> Int)"},
)},
{"Thread", THREAD_TYPE, "Thread_t", "Thread", TypedArray(ns_entry_t,
{"new", "Thread$new", "func(fn:func() -> Thread)"},
diff --git a/stdlib/text.c b/stdlib/text.c
index adbac3f0..9abab57b 100644
--- a/stdlib/text.c
+++ b/stdlib/text.c
@@ -66,6 +66,7 @@
#include <unictype.h>
#include <unigbrk.h>
#include <uniname.h>
+#include <uniwidth.h>
#include "arrays.h"
#include "integers.h"
@@ -512,45 +513,68 @@ public Text_t Text$repeat(Text_t text, Int_t count)
return ret;
}
-static Text_t Text$repeat_to_length(Text_t to_repeat, int64_t length)
+public Int_t Text$width(Text_t text, Text_t language)
{
- if (length <= 0)
+ int width = u8_strwidth((const uint8_t*)Text$as_c_string(text), Text$as_c_string(language));
+ return Int$from_int32(width);
+}
+
+static Text_t Text$repeat_to_width(Text_t to_repeat, int64_t target_width, Text_t language)
+{
+ if (target_width <= 0)
return EMPTY_TEXT;
+ const char *lang_str = Text$as_c_string(language);
+ int64_t width = (int64_t)u8_strwidth((const uint8_t*)Text$as_c_string(to_repeat), lang_str);
Text_t repeated = EMPTY_TEXT;
- while (repeated.length + to_repeat.length <= length)
+ int64_t repeated_width = 0;
+ while (repeated_width + width <= target_width) {
repeated = concat2(repeated, to_repeat);
+ repeated_width += width;
+ }
- if (repeated.length < length)
- repeated = concat2(repeated, Text$slice(to_repeat, I_small(1), I(length - repeated.length)));
+ if (repeated_width < target_width) {
+ for (int64_t i = 0; repeated_width < target_width && i < to_repeat.length; i++) {
+ Text_t c = Text$slice(to_repeat, I_small(i+1), I_small(i+1));
+ int64_t w = (int64_t)u8_strwidth((const uint8_t*)Text$as_c_string(c), lang_str);
+ if (repeated_width + w > target_width) {
+ repeated = concat2(repeated, Text$repeat(Text(" "), I(target_width - repeated_width)));
+ repeated_width = target_width;
+ break;
+ }
+ repeated = concat2(repeated, c);
+ repeated_width += w;
+ }
+ }
- assert(repeated.length == length);
return repeated;
}
-public Text_t Text$left_pad(Text_t text, Int_t count, Text_t padding)
+public Text_t Text$left_pad(Text_t text, Int_t width, Text_t padding, Text_t language)
{
if (padding.length == 0)
fail("Cannot pad with an empty text!");
- return concat2(Text$repeat_to_length(padding, Int64$from_int(count, false) - text.length), text);
+ int64_t needed = Int64$from_int(width, false) - Int64$from_int(Text$width(text, language), false);
+ return concat2(Text$repeat_to_width(padding, needed, language), text);
}
-public Text_t Text$right_pad(Text_t text, Int_t count, Text_t padding)
+public Text_t Text$right_pad(Text_t text, Int_t width, Text_t padding, Text_t language)
{
if (padding.length == 0)
fail("Cannot pad with an empty text!");
- return concat2(text, Text$repeat_to_length(padding, Int64$from_int(count, false) - text.length));
+ int64_t needed = Int64$from_int(width, false) - Int64$from_int(Text$width(text, language), false);
+ return concat2(text, Text$repeat_to_width(padding, needed, language));
}
-public Text_t Text$middle_pad(Text_t text, Int_t count, Text_t padding)
+public Text_t Text$middle_pad(Text_t text, Int_t width, Text_t padding, Text_t language)
{
if (padding.length == 0)
fail("Cannot pad with an empty text!");
- int64_t needed = Int64$from_int(count, false) - text.length;
- return Texts(Text$repeat_to_length(padding, needed/2), text, Text$repeat_to_length(padding, (needed+1)/2));
+ int64_t needed = Int64$from_int(width, false) - Int64$from_int(Text$width(text, language), false);
+ return Texts(Text$repeat_to_width(padding, needed/2, language), text, Text$repeat_to_width(padding, (needed+1)/2, language));
}
public Text_t Text$slice(Text_t text, Int_t first_int, Int_t last_int)
diff --git a/stdlib/text.h b/stdlib/text.h
index 9923403c..e2b0984b 100644
--- a/stdlib/text.h
+++ b/stdlib/text.h
@@ -68,9 +68,10 @@ Array_t Text$lines(Text_t text);
Closure_t Text$by_line(Text_t text);
Text_t Text$join(Text_t glue, Array_t pieces);
Text_t Text$repeat(Text_t text, Int_t count);
-Text_t Text$left_pad(Text_t text, Int_t count, Text_t padding);
-Text_t Text$right_pad(Text_t text, Int_t count, Text_t padding);
-Text_t Text$middle_pad(Text_t text, Int_t count, Text_t padding);
+Int_t Text$width(Text_t text, Text_t language);
+Text_t Text$left_pad(Text_t text, Int_t width, Text_t padding, Text_t language);
+Text_t Text$right_pad(Text_t text, Int_t width, Text_t padding, Text_t language);
+Text_t Text$middle_pad(Text_t text, Int_t width, Text_t padding, Text_t language);
int32_t Text$get_grapheme_fast(TextIter_t *state, int64_t index);
uint32_t Text$get_main_grapheme_fast(TextIter_t *state, int64_t index);
void Text$serialize(const void *obj, FILE *out, Table_t *, const TypeInfo_t *);
diff --git a/test/text.tm b/test/text.tm
index 4e38e346..45215251 100644
--- a/test/text.tm
+++ b/test/text.tm
@@ -374,3 +374,16 @@ func main():
= "1234XYZX" : Text
>> "1234":middle_pad(9, "XYZ")
= "XY1234XYZ" : Text
+
+ >> amelie:width()
+ = 6
+ cowboy := "🤠"
+ >> cowboy:width()
+ = 2
+ >> cowboy:left_pad(4)
+ = " 🤠"
+ >> cowboy:right_pad(4)
+ = "🤠 "
+ >> cowboy:middle_pad(4)
+ = " 🤠 "
+