From 451634ae23418b65945a9d9a3b1d895023940c18 Mon Sep 17 00:00:00 2001 From: Bruce Hill Date: Mon, 17 Mar 2025 23:37:31 -0400 Subject: [PATCH] Add text width support and have that work for padding --- environment.c | 15 ++++++++------- stdlib/text.c | 50 +++++++++++++++++++++++++++++++++++++------------- stdlib/text.h | 7 ++++--- test/text.tm | 13 +++++++++++++ 4 files changed, 62 insertions(+), 23 deletions(-) diff --git a/environment.c b/environment.c index afa9524..f43a9ca 100644 --- a/environment.c +++ b/environment.c @@ -370,7 +370,7 @@ env_t *new_compilation_unit(CORD libname) {"by_match", "Text$by_match", "func(text:Text, pattern:Pattern -> func(->Match?))"}, {"by_split", "Text$by_split", "func(text:Text, pattern=$Pattern'' -> func(->Text?))"}, {"bytes", "Text$utf8_bytes", "func(text:Text -> [Byte])"}, - {"caseless_equals", "Text$equal_ignoring_case", "func(a,b:Text, language=\"C\" -> Bool)"}, + {"caseless_equals", "Text$equal_ignoring_case", "func(a,b:Text, language='C' -> Bool)"}, {"codepoint_names", "Text$codepoint_names", "func(text:Text -> [Text])"}, {"ends_with", "Text$ends_with", "func(text,suffix:Text -> Bool)"}, {"each", "Text$each", "func(text:Text, pattern:Pattern, fn:func(match:Match), recursive=yes)"}, @@ -384,26 +384,27 @@ env_t *new_compilation_unit(CORD libname) {"from_text", "Path$from_text", "func(text:Text -> Path)"}, {"has", "Text$has", "func(text:Text, pattern:Pattern -> Bool)"}, {"join", "Text$join", "func(glue:Text, pieces:[Text] -> Text)"}, - {"left_pad", "Text$left_pad", "func(text:Text, count:Int, pad=\" \" -> Text)"}, + {"left_pad", "Text$left_pad", "func(text:Text, count:Int, pad=' ', language='C' -> Text)"}, {"lines", "Text$lines", "func(text:Text -> [Text])"}, - {"lower", "Text$lower", "func(text:Text, language=\"C\" -> Text)"}, + {"lower", "Text$lower", "func(text:Text, language='C' -> Text)"}, {"map", "Text$map", "func(text:Text, pattern:Pattern, fn:func(match:Match -> Text), recursive=yes -> Text)"}, {"matches", "Text$matches", "func(text:Text, pattern:Pattern -> [Text]?)"}, - {"middle_pad", "Text$middle_pad", "func(text:Text, count:Int, pad=\" \" -> Text)"}, + {"middle_pad", "Text$middle_pad", "func(text:Text, count:Int, pad=' ', language='C' -> Text)"}, {"quoted", "Text$quoted", "func(text:Text, color=no -> Text)"}, {"repeat", "Text$repeat", "func(text:Text, count:Int -> Text)"}, {"replace", "Text$replace", "func(text:Text, pattern:Pattern, replacement:Text, backref=$/\\/, recursive=yes -> Text)"}, {"replace_all", "Text$replace_all", "func(text:Text, replacements:{Pattern,Text}, backref=$/\\/, recursive=yes -> Text)"}, {"reversed", "Text$reversed", "func(text:Text -> Text)"}, - {"right_pad", "Text$right_pad", "func(text:Text, count:Int, pad=\" \" -> Text)"}, + {"right_pad", "Text$right_pad", "func(text:Text, count:Int, pad=' ', language='C' -> Text)"}, {"slice", "Text$slice", "func(text:Text, from=1, to=-1 -> Text)"}, {"split", "Text$split", "func(text:Text, pattern=$Pattern'' -> [Text])"}, {"starts_with", "Text$starts_with", "func(text,prefix:Text -> Bool)"}, - {"title", "Text$title", "func(text:Text, language=\"C\" -> Text)"}, + {"title", "Text$title", "func(text:Text, language='C' -> Text)"}, {"to", "Text$to", "func(text:Text, last:Int -> Text)"}, {"trim", "Text$trim", "func(text:Text, pattern=$/{whitespace}/, trim_left=yes, trim_right=yes -> Text)"}, - {"upper", "Text$upper", "func(text:Text, language=\"C\" -> Text)"}, + {"upper", "Text$upper", "func(text:Text, language='C' -> Text)"}, {"utf32_codepoints", "Text$utf32_codepoints", "func(text:Text -> [Int32])"}, + {"width", "Text$width", "func(text:Text, language='C' -> Int)"}, )}, {"Thread", THREAD_TYPE, "Thread_t", "Thread", TypedArray(ns_entry_t, {"new", "Thread$new", "func(fn:func() -> Thread)"}, diff --git a/stdlib/text.c b/stdlib/text.c index adbac3f..9abab57 100644 --- a/stdlib/text.c +++ b/stdlib/text.c @@ -66,6 +66,7 @@ #include #include #include +#include #include "arrays.h" #include "integers.h" @@ -512,45 +513,68 @@ public Text_t Text$repeat(Text_t text, Int_t count) return ret; } -static Text_t Text$repeat_to_length(Text_t to_repeat, int64_t length) +public Int_t Text$width(Text_t text, Text_t language) { - if (length <= 0) + int width = u8_strwidth((const uint8_t*)Text$as_c_string(text), Text$as_c_string(language)); + return Int$from_int32(width); +} + +static Text_t Text$repeat_to_width(Text_t to_repeat, int64_t target_width, Text_t language) +{ + if (target_width <= 0) return EMPTY_TEXT; + const char *lang_str = Text$as_c_string(language); + int64_t width = (int64_t)u8_strwidth((const uint8_t*)Text$as_c_string(to_repeat), lang_str); Text_t repeated = EMPTY_TEXT; - while (repeated.length + to_repeat.length <= length) + int64_t repeated_width = 0; + while (repeated_width + width <= target_width) { repeated = concat2(repeated, to_repeat); + repeated_width += width; + } - if (repeated.length < length) - repeated = concat2(repeated, Text$slice(to_repeat, I_small(1), I(length - repeated.length))); + if (repeated_width < target_width) { + for (int64_t i = 0; repeated_width < target_width && i < to_repeat.length; i++) { + Text_t c = Text$slice(to_repeat, I_small(i+1), I_small(i+1)); + int64_t w = (int64_t)u8_strwidth((const uint8_t*)Text$as_c_string(c), lang_str); + if (repeated_width + w > target_width) { + repeated = concat2(repeated, Text$repeat(Text(" "), I(target_width - repeated_width))); + repeated_width = target_width; + break; + } + repeated = concat2(repeated, c); + repeated_width += w; + } + } - assert(repeated.length == length); return repeated; } -public Text_t Text$left_pad(Text_t text, Int_t count, Text_t padding) +public Text_t Text$left_pad(Text_t text, Int_t width, Text_t padding, Text_t language) { if (padding.length == 0) fail("Cannot pad with an empty text!"); - return concat2(Text$repeat_to_length(padding, Int64$from_int(count, false) - text.length), text); + int64_t needed = Int64$from_int(width, false) - Int64$from_int(Text$width(text, language), false); + return concat2(Text$repeat_to_width(padding, needed, language), text); } -public Text_t Text$right_pad(Text_t text, Int_t count, Text_t padding) +public Text_t Text$right_pad(Text_t text, Int_t width, Text_t padding, Text_t language) { if (padding.length == 0) fail("Cannot pad with an empty text!"); - return concat2(text, Text$repeat_to_length(padding, Int64$from_int(count, false) - text.length)); + int64_t needed = Int64$from_int(width, false) - Int64$from_int(Text$width(text, language), false); + return concat2(text, Text$repeat_to_width(padding, needed, language)); } -public Text_t Text$middle_pad(Text_t text, Int_t count, Text_t padding) +public Text_t Text$middle_pad(Text_t text, Int_t width, Text_t padding, Text_t language) { if (padding.length == 0) fail("Cannot pad with an empty text!"); - int64_t needed = Int64$from_int(count, false) - text.length; - return Texts(Text$repeat_to_length(padding, needed/2), text, Text$repeat_to_length(padding, (needed+1)/2)); + int64_t needed = Int64$from_int(width, false) - Int64$from_int(Text$width(text, language), false); + return Texts(Text$repeat_to_width(padding, needed/2, language), text, Text$repeat_to_width(padding, (needed+1)/2, language)); } public Text_t Text$slice(Text_t text, Int_t first_int, Int_t last_int) diff --git a/stdlib/text.h b/stdlib/text.h index 9923403..e2b0984 100644 --- a/stdlib/text.h +++ b/stdlib/text.h @@ -68,9 +68,10 @@ Array_t Text$lines(Text_t text); Closure_t Text$by_line(Text_t text); Text_t Text$join(Text_t glue, Array_t pieces); Text_t Text$repeat(Text_t text, Int_t count); -Text_t Text$left_pad(Text_t text, Int_t count, Text_t padding); -Text_t Text$right_pad(Text_t text, Int_t count, Text_t padding); -Text_t Text$middle_pad(Text_t text, Int_t count, Text_t padding); +Int_t Text$width(Text_t text, Text_t language); +Text_t Text$left_pad(Text_t text, Int_t width, Text_t padding, Text_t language); +Text_t Text$right_pad(Text_t text, Int_t width, Text_t padding, Text_t language); +Text_t Text$middle_pad(Text_t text, Int_t width, Text_t padding, Text_t language); int32_t Text$get_grapheme_fast(TextIter_t *state, int64_t index); uint32_t Text$get_main_grapheme_fast(TextIter_t *state, int64_t index); void Text$serialize(const void *obj, FILE *out, Table_t *, const TypeInfo_t *); diff --git a/test/text.tm b/test/text.tm index 4e38e34..4521525 100644 --- a/test/text.tm +++ b/test/text.tm @@ -374,3 +374,16 @@ func main(): = "1234XYZX" : Text >> "1234":middle_pad(9, "XYZ") = "XY1234XYZ" : Text + + >> amelie:width() + = 6 + cowboy := "🤠" + >> cowboy:width() + = 2 + >> cowboy:left_pad(4) + = " 🤠" + >> cowboy:right_pad(4) + = "🤠 " + >> cowboy:middle_pad(4) + = " 🤠 " +