Add text width support and have that work for padding

This commit is contained in:
Bruce Hill 2025-03-17 23:37:31 -04:00
parent f6eda4cf85
commit 451634ae23
4 changed files with 62 additions and 23 deletions

View File

@ -370,7 +370,7 @@ env_t *new_compilation_unit(CORD libname)
{"by_match", "Text$by_match", "func(text:Text, pattern:Pattern -> func(->Match?))"},
{"by_split", "Text$by_split", "func(text:Text, pattern=$Pattern'' -> func(->Text?))"},
{"bytes", "Text$utf8_bytes", "func(text:Text -> [Byte])"},
{"caseless_equals", "Text$equal_ignoring_case", "func(a,b:Text, language=\"C\" -> Bool)"},
{"caseless_equals", "Text$equal_ignoring_case", "func(a,b:Text, language='C' -> Bool)"},
{"codepoint_names", "Text$codepoint_names", "func(text:Text -> [Text])"},
{"ends_with", "Text$ends_with", "func(text,suffix:Text -> Bool)"},
{"each", "Text$each", "func(text:Text, pattern:Pattern, fn:func(match:Match), recursive=yes)"},
@ -384,26 +384,27 @@ env_t *new_compilation_unit(CORD libname)
{"from_text", "Path$from_text", "func(text:Text -> Path)"},
{"has", "Text$has", "func(text:Text, pattern:Pattern -> Bool)"},
{"join", "Text$join", "func(glue:Text, pieces:[Text] -> Text)"},
{"left_pad", "Text$left_pad", "func(text:Text, count:Int, pad=\" \" -> Text)"},
{"left_pad", "Text$left_pad", "func(text:Text, count:Int, pad=' ', language='C' -> Text)"},
{"lines", "Text$lines", "func(text:Text -> [Text])"},
{"lower", "Text$lower", "func(text:Text, language=\"C\" -> Text)"},
{"lower", "Text$lower", "func(text:Text, language='C' -> Text)"},
{"map", "Text$map", "func(text:Text, pattern:Pattern, fn:func(match:Match -> Text), recursive=yes -> Text)"},
{"matches", "Text$matches", "func(text:Text, pattern:Pattern -> [Text]?)"},
{"middle_pad", "Text$middle_pad", "func(text:Text, count:Int, pad=\" \" -> Text)"},
{"middle_pad", "Text$middle_pad", "func(text:Text, count:Int, pad=' ', language='C' -> Text)"},
{"quoted", "Text$quoted", "func(text:Text, color=no -> Text)"},
{"repeat", "Text$repeat", "func(text:Text, count:Int -> Text)"},
{"replace", "Text$replace", "func(text:Text, pattern:Pattern, replacement:Text, backref=$/\\/, recursive=yes -> Text)"},
{"replace_all", "Text$replace_all", "func(text:Text, replacements:{Pattern,Text}, backref=$/\\/, recursive=yes -> Text)"},
{"reversed", "Text$reversed", "func(text:Text -> Text)"},
{"right_pad", "Text$right_pad", "func(text:Text, count:Int, pad=\" \" -> Text)"},
{"right_pad", "Text$right_pad", "func(text:Text, count:Int, pad=' ', language='C' -> Text)"},
{"slice", "Text$slice", "func(text:Text, from=1, to=-1 -> Text)"},
{"split", "Text$split", "func(text:Text, pattern=$Pattern'' -> [Text])"},
{"starts_with", "Text$starts_with", "func(text,prefix:Text -> Bool)"},
{"title", "Text$title", "func(text:Text, language=\"C\" -> Text)"},
{"title", "Text$title", "func(text:Text, language='C' -> Text)"},
{"to", "Text$to", "func(text:Text, last:Int -> Text)"},
{"trim", "Text$trim", "func(text:Text, pattern=$/{whitespace}/, trim_left=yes, trim_right=yes -> Text)"},
{"upper", "Text$upper", "func(text:Text, language=\"C\" -> Text)"},
{"upper", "Text$upper", "func(text:Text, language='C' -> Text)"},
{"utf32_codepoints", "Text$utf32_codepoints", "func(text:Text -> [Int32])"},
{"width", "Text$width", "func(text:Text, language='C' -> Int)"},
)},
{"Thread", THREAD_TYPE, "Thread_t", "Thread", TypedArray(ns_entry_t,
{"new", "Thread$new", "func(fn:func() -> Thread)"},

View File

@ -66,6 +66,7 @@
#include <unictype.h>
#include <unigbrk.h>
#include <uniname.h>
#include <uniwidth.h>
#include "arrays.h"
#include "integers.h"
@ -512,45 +513,68 @@ public Text_t Text$repeat(Text_t text, Int_t count)
return ret;
}
static Text_t Text$repeat_to_length(Text_t to_repeat, int64_t length)
public Int_t Text$width(Text_t text, Text_t language)
{
if (length <= 0)
int width = u8_strwidth((const uint8_t*)Text$as_c_string(text), Text$as_c_string(language));
return Int$from_int32(width);
}
static Text_t Text$repeat_to_width(Text_t to_repeat, int64_t target_width, Text_t language)
{
if (target_width <= 0)
return EMPTY_TEXT;
const char *lang_str = Text$as_c_string(language);
int64_t width = (int64_t)u8_strwidth((const uint8_t*)Text$as_c_string(to_repeat), lang_str);
Text_t repeated = EMPTY_TEXT;
while (repeated.length + to_repeat.length <= length)
int64_t repeated_width = 0;
while (repeated_width + width <= target_width) {
repeated = concat2(repeated, to_repeat);
repeated_width += width;
}
if (repeated.length < length)
repeated = concat2(repeated, Text$slice(to_repeat, I_small(1), I(length - repeated.length)));
if (repeated_width < target_width) {
for (int64_t i = 0; repeated_width < target_width && i < to_repeat.length; i++) {
Text_t c = Text$slice(to_repeat, I_small(i+1), I_small(i+1));
int64_t w = (int64_t)u8_strwidth((const uint8_t*)Text$as_c_string(c), lang_str);
if (repeated_width + w > target_width) {
repeated = concat2(repeated, Text$repeat(Text(" "), I(target_width - repeated_width)));
repeated_width = target_width;
break;
}
repeated = concat2(repeated, c);
repeated_width += w;
}
}
assert(repeated.length == length);
return repeated;
}
public Text_t Text$left_pad(Text_t text, Int_t count, Text_t padding)
public Text_t Text$left_pad(Text_t text, Int_t width, Text_t padding, Text_t language)
{
if (padding.length == 0)
fail("Cannot pad with an empty text!");
return concat2(Text$repeat_to_length(padding, Int64$from_int(count, false) - text.length), text);
int64_t needed = Int64$from_int(width, false) - Int64$from_int(Text$width(text, language), false);
return concat2(Text$repeat_to_width(padding, needed, language), text);
}
public Text_t Text$right_pad(Text_t text, Int_t count, Text_t padding)
public Text_t Text$right_pad(Text_t text, Int_t width, Text_t padding, Text_t language)
{
if (padding.length == 0)
fail("Cannot pad with an empty text!");
return concat2(text, Text$repeat_to_length(padding, Int64$from_int(count, false) - text.length));
int64_t needed = Int64$from_int(width, false) - Int64$from_int(Text$width(text, language), false);
return concat2(text, Text$repeat_to_width(padding, needed, language));
}
public Text_t Text$middle_pad(Text_t text, Int_t count, Text_t padding)
public Text_t Text$middle_pad(Text_t text, Int_t width, Text_t padding, Text_t language)
{
if (padding.length == 0)
fail("Cannot pad with an empty text!");
int64_t needed = Int64$from_int(count, false) - text.length;
return Texts(Text$repeat_to_length(padding, needed/2), text, Text$repeat_to_length(padding, (needed+1)/2));
int64_t needed = Int64$from_int(width, false) - Int64$from_int(Text$width(text, language), false);
return Texts(Text$repeat_to_width(padding, needed/2, language), text, Text$repeat_to_width(padding, (needed+1)/2, language));
}
public Text_t Text$slice(Text_t text, Int_t first_int, Int_t last_int)

View File

@ -68,9 +68,10 @@ Array_t Text$lines(Text_t text);
Closure_t Text$by_line(Text_t text);
Text_t Text$join(Text_t glue, Array_t pieces);
Text_t Text$repeat(Text_t text, Int_t count);
Text_t Text$left_pad(Text_t text, Int_t count, Text_t padding);
Text_t Text$right_pad(Text_t text, Int_t count, Text_t padding);
Text_t Text$middle_pad(Text_t text, Int_t count, Text_t padding);
Int_t Text$width(Text_t text, Text_t language);
Text_t Text$left_pad(Text_t text, Int_t width, Text_t padding, Text_t language);
Text_t Text$right_pad(Text_t text, Int_t width, Text_t padding, Text_t language);
Text_t Text$middle_pad(Text_t text, Int_t width, Text_t padding, Text_t language);
int32_t Text$get_grapheme_fast(TextIter_t *state, int64_t index);
uint32_t Text$get_main_grapheme_fast(TextIter_t *state, int64_t index);
void Text$serialize(const void *obj, FILE *out, Table_t *, const TypeInfo_t *);

View File

@ -374,3 +374,16 @@ func main():
= "1234XYZX" : Text
>> "1234":middle_pad(9, "XYZ")
= "XY1234XYZ" : Text
>> amelie:width()
= 6
cowboy := "🤠"
>> cowboy:width()
= 2
>> cowboy:left_pad(4)
= " 🤠"
>> cowboy:right_pad(4)
= "🤠 "
>> cowboy:middle_pad(4)
= " 🤠 "