From b0faa5adc2c5f56ae50cf21f855fa6805db926cf Mon Sep 17 00:00:00 2001 From: Bruce Hill Date: Thu, 19 Dec 2024 13:50:35 -0500 Subject: [PATCH] Add Text:from()/to() and Array:slice() for symmetry --- compile.c | 4 ++++ docs/arrays.md | 32 +++++++++++++++++++++++++ docs/text.md | 64 +++++++++++++++++++++++++++++++++++++++++++++++++ environment.c | 2 ++ stdlib/arrays.c | 63 +++++++++++++++++++++++------------------------- stdlib/arrays.h | 1 + stdlib/text.c | 10 ++++++++ stdlib/text.h | 2 ++ typecheck.c | 1 + 9 files changed, 146 insertions(+), 33 deletions(-) diff --git a/compile.c b/compile.c index 770b019..f4e1e73 100644 --- a/compile.c +++ b/compile.c @@ -2892,6 +2892,10 @@ CORD compile(env_t *env, ast_t *ast) self = compile_to_pointer_depth(env, call->self, 0, false); arg_t *arg_spec = new(arg_t, .name="rng", .type=RNG_TYPE, .default_val=default_rng); return CORD_all("Array$shuffled(", self, ", ", compile_arguments(env, ast, arg_spec, call->args), ", ", padded_item_size, ")"); + } else if (streq(call->name, "slice")) { + self = compile_to_pointer_depth(env, call->self, 0, true); + arg_t *arg_spec = new(arg_t, .name="first", .type=INT_TYPE, .next=new(arg_t, .name="last", .type=INT_TYPE)); + return CORD_all("Array$slice(", self, ", ", compile_arguments(env, ast, arg_spec, call->args), ")"); } else if (streq(call->name, "sort") || streq(call->name, "sorted")) { if (streq(call->name, "sort")) EXPECT_POINTER("an", "array"); diff --git a/docs/arrays.md b/docs/arrays.md index ce1c47d..76372cb 100644 --- a/docs/arrays.md +++ b/docs/arrays.md @@ -816,6 +816,38 @@ A new array with shuffled elements. --- +### `slice` + +**Description:** +Returns a slice of the array spanning the given indices (inclusive). + +**Signature:** +```tomo +func slice(arr: [T], from: Int, to: Int -> [T]) +``` + +**Parameters:** + +- `arr`: The original array. +- `from`: The first index to include. +- `to`: The last index to include. + +**Returns:** +A new array spanning the given indices. Note: negative indices are counted from +the back of the array, so `-1` refers to the last element, `-2` the +second-to-last, and so on. + +**Example:** +```tomo +>> [10, 20, 30, 40, 50]:slice(2, 4) += [20, 30, 40] + +>> [10, 20, 30, 40, 50]:slice(-3, -2) += [30, 40] +``` + +--- + ### `sort` **Description:** diff --git a/docs/text.md b/docs/text.md index 463a233..ecf3224 100644 --- a/docs/text.md +++ b/docs/text.md @@ -746,6 +746,38 @@ Note: if `text` or `pattern` is empty, an empty array will be returned. --- +## `from` + +**Description:** +Get a slice of the text, starting at the given position. + +**Signature:** +```tomo +func from(text: Text, first: Int -> Text) +``` + +**Parameters:** + +- `text`: The text to be sliced. +- `frist`: The index of the first grapheme cluster to include (1-indexed). + +**Returns:** +The text from the given grapheme cluster to the end of the text. Note: a +negative index counts backwards from the end of the text, so `-1` refers to the +last cluster, `-2` the second-to-last, etc. Slice ranges will be truncated to +the length of the string. + +**Example:** +```tomo +>> "hello":from(2) += "ello" + +>> "hello":from(-2) += "lo" +``` + +--- + ## `has` **Description:** @@ -1217,6 +1249,38 @@ The text in title case. --- +## `to` + +**Description:** +Get a slice of the text, ending at the given position. + +**Signature:** +```tomo +func to(text: Text, last: Int -> Text) +``` + +**Parameters:** + +- `text`: The text to be sliced. +- `last`: The index of the last grapheme cluster to include (1-indexed). + +**Returns:** +The text up to and including the given grapheme cluster. Note: a negative index +counts backwards from the end of the text, so `-1` refers to the last cluster, +`-2` the second-to-last, etc. Slice ranges will be truncated to the length of +the string. + +**Example:** +```tomo +>> "goodbye":to(3) += "goo" + +>> "goodbye":to(-2) += "goodby" +``` + +--- + ## `trim` **Description:** diff --git a/environment.c b/environment.c index af4c853..1af173b 100644 --- a/environment.c +++ b/environment.c @@ -399,6 +399,7 @@ env_t *new_compilation_unit(CORD libname) {"each", "Text$each", "func(text:Text, pattern:Pattern, fn:func(match:Match))"}, {"find", "Text$find", "func(text:Text, pattern:Pattern, start=1 -> Match?)"}, {"find_all", "Text$find_all", "func(text:Text, pattern:Pattern -> [Match])"}, + {"from", "Text$from", "func(text:Text, first:Int -> Text)"}, {"from_bytes", "Text$from_bytes", "func(bytes:[Byte] -> Text?)"}, {"from_c_string", "Text$from_str", "func(str:CString -> Text?)"}, {"from_codepoint_names", "Text$from_codepoint_names", "func(codepoint_names:[Text] -> Text?)"}, @@ -418,6 +419,7 @@ env_t *new_compilation_unit(CORD libname) {"split", "Text$split", "func(text:Text, pattern=$Pattern'' -> [Text])"}, {"starts_with", "Text$starts_with", "func(text,prefix:Text -> Bool)"}, {"title", "Text$title", "func(text:Text -> Text)"}, + {"to", "Text$to", "func(text:Text, last:Int -> Text)"}, {"trim", "Text$trim", "func(text:Text, pattern=$/{whitespace}/, trim_left=yes, trim_right=yes -> Text)"}, {"upper", "Text$upper", "func(text:Text -> Text)"}, {"utf32_codepoints", "Text$utf32_codepoints", "func(text:Text -> [Int32])"}, diff --git a/stdlib/arrays.c b/stdlib/arrays.c index fd4dcf9..c8ee55b 100644 --- a/stdlib/arrays.c +++ b/stdlib/arrays.c @@ -386,43 +386,14 @@ public Array_t Array$sample(Array_t arr, Int_t int_n, Array_t weights, RNG_t rng return selected; } -public Array_t Array$from(Array_t array, Int_t int_first) +public Array_t Array$from(Array_t array, Int_t first) { - int64_t first = Int_to_Int64(int_first, false); - if (first < 0) - first = array.length + first + 1; - - if (first < 1 || first > array.length) - return (Array_t){.atomic=array.atomic}; - - return (Array_t){ - .atomic=array.atomic, - .data=array.data + array.stride*(first-1), - .length=array.length - first + 1, - .stride=array.stride, - .data_refcount=array.data_refcount, - }; + return Array$slice(array, first, I_small(-1)); } -public Array_t Array$to(Array_t array, Int_t int_last) +public Array_t Array$to(Array_t array, Int_t last) { - int64_t last = Int_to_Int64(int_last, false); - if (last < 0) - last = array.length + last + 1; - - if (last > array.length) - last = array.length; - - if (last == 0) - return (Array_t){.atomic=array.atomic}; - - return (Array_t){ - .atomic=array.atomic, - .data=array.data, - .length=last, - .stride=array.stride, - .data_refcount=array.data_refcount, - }; + return Array$slice(array, I_small(1), last); } public Array_t Array$by(Array_t array, Int_t int_stride, int64_t padded_item_size) @@ -459,6 +430,32 @@ public Array_t Array$by(Array_t array, Int_t int_stride, int64_t padded_item_siz }; } +public Array_t Array$slice(Array_t array, Int_t int_first, Int_t int_last) + +{ + int64_t first = Int_to_Int64(int_first, false); + if (first < 0) + first = array.length + first + 1; + + int64_t last = Int_to_Int64(int_last, false); + if (last < 0) + last = array.length + last + 1; + + if (last > array.length) + last = array.length; + + if (first < 1 || first > array.length || last == 0) + return (Array_t){.atomic=array.atomic}; + + return (Array_t){ + .atomic=array.atomic, + .data=array.data + array.stride*(first-1), + .length=last - first + 1, + .stride=array.stride, + .data_refcount=array.data_refcount, + }; +} + public Array_t Array$reversed(Array_t array, int64_t padded_item_size) { // Just in case negating the stride gives a value that doesn't fit into a diff --git a/stdlib/arrays.h b/stdlib/arrays.h index e880c64..5e0ca7e 100644 --- a/stdlib/arrays.h +++ b/stdlib/arrays.h @@ -83,6 +83,7 @@ PUREFUNC bool Array$has(Array_t array, void *item, const TypeInfo_t *type); PUREFUNC Array_t Array$from(Array_t array, Int_t first); PUREFUNC Array_t Array$to(Array_t array, Int_t last); PUREFUNC Array_t Array$by(Array_t array, Int_t stride, int64_t padded_item_size); +PUREFUNC Array_t Array$slice(Array_t array, Int_t int_first, Int_t int_last); PUREFUNC Array_t Array$reversed(Array_t array, int64_t padded_item_size); Array_t Array$concat(Array_t x, Array_t y, int64_t padded_item_size); PUREFUNC uint64_t Array$hash(const void *arr, const TypeInfo_t *type); diff --git a/stdlib/text.c b/stdlib/text.c index 4879ead..384bf7a 100644 --- a/stdlib/text.c +++ b/stdlib/text.c @@ -562,6 +562,16 @@ public Text_t Text$slice(Text_t text, Int_t first_int, Int_t last_int) } } +public Text_t Text$from(Text_t text, Int_t first) +{ + return Text$slice(text, first, I_small(-1)); +} + +public Text_t Text$to(Text_t text, Int_t last) +{ + return Text$slice(text, I_small(1), last); +} + public Text_t Text$cluster(Text_t text, Int_t index_int) { int64_t index = Int_to_Int64(index_int, false); diff --git a/stdlib/text.h b/stdlib/text.h index b493f4a..6e2ac41 100644 --- a/stdlib/text.h +++ b/stdlib/text.h @@ -29,6 +29,8 @@ Text_t Text$_concat(int n, Text_t items[n]); #define Text$concat(...) Text$_concat(sizeof((Text_t[]){__VA_ARGS__})/sizeof(Text_t), (Text_t[]){__VA_ARGS__}) #define Texts(...) Text$concat(__VA_ARGS__) Text_t Text$slice(Text_t text, Int_t first_int, Int_t last_int); +Text_t Text$from(Text_t text, Int_t first); +Text_t Text$to(Text_t text, Int_t last); Text_t Text$cluster(Text_t text, Int_t index_int); OptionalText_t Text$from_str(const char *str); OptionalText_t Text$from_strn(const char *str, size_t len); diff --git a/typecheck.c b/typecheck.c index b87e524..ea3cd3a 100644 --- a/typecheck.c +++ b/typecheck.c @@ -833,6 +833,7 @@ type_t *get_type(env_t *env, ast_t *ast) else if (streq(call->name, "sample")) return self_value_t; else if (streq(call->name, "shuffle")) return Type(VoidType); else if (streq(call->name, "shuffled")) return self_value_t; + else if (streq(call->name, "slice")) return self_value_t; else if (streq(call->name, "sort")) return Type(VoidType); else if (streq(call->name, "sorted")) return self_value_t; else if (streq(call->name, "to")) return self_value_t;