From b0faa5adc2c5f56ae50cf21f855fa6805db926cf Mon Sep 17 00:00:00 2001
From: Bruce Hill <bruce@bruce-hill.com>
Date: Thu, 19 Dec 2024 13:50:35 -0500
Subject: [PATCH] Add Text:from()/to() and Array:slice() for symmetry

---
 compile.c       |  4 ++++
 docs/arrays.md  | 32 +++++++++++++++++++++++++
 docs/text.md    | 64 +++++++++++++++++++++++++++++++++++++++++++++++++
 environment.c   |  2 ++
 stdlib/arrays.c | 63 +++++++++++++++++++++++-------------------------
 stdlib/arrays.h |  1 +
 stdlib/text.c   | 10 ++++++++
 stdlib/text.h   |  2 ++
 typecheck.c     |  1 +
 9 files changed, 146 insertions(+), 33 deletions(-)

diff --git a/compile.c b/compile.c
index 770b019..f4e1e73 100644
--- a/compile.c
+++ b/compile.c
@@ -2892,6 +2892,10 @@ CORD compile(env_t *env, ast_t *ast)
                 self = compile_to_pointer_depth(env, call->self, 0, false);
                 arg_t *arg_spec = new(arg_t, .name="rng", .type=RNG_TYPE, .default_val=default_rng);
                 return CORD_all("Array$shuffled(", self, ", ", compile_arguments(env, ast, arg_spec, call->args), ", ", padded_item_size, ")");
+            } else if (streq(call->name, "slice")) {
+                self = compile_to_pointer_depth(env, call->self, 0, true);
+                arg_t *arg_spec = new(arg_t, .name="first", .type=INT_TYPE, .next=new(arg_t, .name="last", .type=INT_TYPE));
+                return CORD_all("Array$slice(", self, ", ", compile_arguments(env, ast, arg_spec, call->args), ")");
             } else if (streq(call->name, "sort") || streq(call->name, "sorted")) {
                 if (streq(call->name, "sort"))
                     EXPECT_POINTER("an", "array");
diff --git a/docs/arrays.md b/docs/arrays.md
index ce1c47d..76372cb 100644
--- a/docs/arrays.md
+++ b/docs/arrays.md
@@ -816,6 +816,38 @@ A new array with shuffled elements.
 
 ---
 
+### `slice`
+
+**Description:**  
+Returns a slice of the array spanning the given indices (inclusive).
+
+**Signature:**  
+```tomo
+func slice(arr: [T], from: Int, to: Int -> [T])
+```
+
+**Parameters:**
+
+- `arr`: The original array.
+- `from`: The first index to include.
+- `to`: The last index to include.
+
+**Returns:**  
+A new array spanning the given indices. Note: negative indices are counted from
+the back of the array, so `-1` refers to the last element, `-2` the
+second-to-last, and so on.
+
+**Example:**  
+```tomo
+>> [10, 20, 30, 40, 50]:slice(2, 4)
+= [20, 30, 40]
+
+>> [10, 20, 30, 40, 50]:slice(-3, -2)
+= [30, 40]
+```
+
+---
+
 ### `sort`
 
 **Description:**  
diff --git a/docs/text.md b/docs/text.md
index 463a233..ecf3224 100644
--- a/docs/text.md
+++ b/docs/text.md
@@ -746,6 +746,38 @@ Note: if `text` or `pattern` is empty, an empty array will be returned.
 
 ---
 
+## `from`
+
+**Description:**  
+Get a slice of the text, starting at the given position.
+
+**Signature:**  
+```tomo
+func from(text: Text, first: Int -> Text)
+```
+
+**Parameters:**
+
+- `text`: The text to be sliced.
+- `frist`: The index of the first grapheme cluster to include (1-indexed).
+
+**Returns:**  
+The text from the given grapheme cluster to the end of the text. Note: a
+negative index counts backwards from the end of the text, so `-1` refers to the
+last cluster, `-2` the second-to-last, etc. Slice ranges will be truncated to
+the length of the string.
+
+**Example:**  
+```tomo
+>> "hello":from(2)
+= "ello"
+
+>> "hello":from(-2)
+= "lo"
+```
+
+---
+
 ## `has`
 
 **Description:**  
@@ -1217,6 +1249,38 @@ The text in title case.
 
 ---
 
+## `to`
+
+**Description:**  
+Get a slice of the text, ending at the given position.
+
+**Signature:**  
+```tomo
+func to(text: Text, last: Int -> Text)
+```
+
+**Parameters:**
+
+- `text`: The text to be sliced.
+- `last`: The index of the last grapheme cluster to include (1-indexed).
+
+**Returns:**  
+The text up to and including the given grapheme cluster. Note: a negative index
+counts backwards from the end of the text, so `-1` refers to the last cluster,
+`-2` the second-to-last, etc. Slice ranges will be truncated to the length of
+the string.
+
+**Example:**  
+```tomo
+>> "goodbye":to(3)
+= "goo"
+
+>> "goodbye":to(-2)
+= "goodby"
+```
+
+---
+
 ## `trim`
 
 **Description:**  
diff --git a/environment.c b/environment.c
index af4c853..1af173b 100644
--- a/environment.c
+++ b/environment.c
@@ -399,6 +399,7 @@ env_t *new_compilation_unit(CORD libname)
             {"each", "Text$each", "func(text:Text, pattern:Pattern, fn:func(match:Match))"},
             {"find", "Text$find", "func(text:Text, pattern:Pattern, start=1 -> Match?)"},
             {"find_all", "Text$find_all", "func(text:Text, pattern:Pattern -> [Match])"},
+            {"from", "Text$from", "func(text:Text, first:Int -> Text)"},
             {"from_bytes", "Text$from_bytes", "func(bytes:[Byte] -> Text?)"},
             {"from_c_string", "Text$from_str", "func(str:CString -> Text?)"},
             {"from_codepoint_names", "Text$from_codepoint_names", "func(codepoint_names:[Text] -> Text?)"},
@@ -418,6 +419,7 @@ env_t *new_compilation_unit(CORD libname)
             {"split", "Text$split", "func(text:Text, pattern=$Pattern'' -> [Text])"},
             {"starts_with", "Text$starts_with", "func(text,prefix:Text -> Bool)"},
             {"title", "Text$title", "func(text:Text -> Text)"},
+            {"to", "Text$to", "func(text:Text, last:Int -> Text)"},
             {"trim", "Text$trim", "func(text:Text, pattern=$/{whitespace}/, trim_left=yes, trim_right=yes -> Text)"},
             {"upper", "Text$upper", "func(text:Text -> Text)"},
             {"utf32_codepoints", "Text$utf32_codepoints", "func(text:Text -> [Int32])"},
diff --git a/stdlib/arrays.c b/stdlib/arrays.c
index fd4dcf9..c8ee55b 100644
--- a/stdlib/arrays.c
+++ b/stdlib/arrays.c
@@ -386,43 +386,14 @@ public Array_t Array$sample(Array_t arr, Int_t int_n, Array_t weights, RNG_t rng
     return selected;
 }
 
-public Array_t Array$from(Array_t array, Int_t int_first)
+public Array_t Array$from(Array_t array, Int_t first)
 {
-    int64_t first = Int_to_Int64(int_first, false);
-    if (first < 0)
-        first = array.length + first + 1;
-
-    if (first < 1 || first > array.length)
-        return (Array_t){.atomic=array.atomic};
-
-    return (Array_t){
-        .atomic=array.atomic,
-        .data=array.data + array.stride*(first-1),
-        .length=array.length - first + 1,
-        .stride=array.stride,
-        .data_refcount=array.data_refcount,
-    };
+    return Array$slice(array, first, I_small(-1));
 }
 
-public Array_t Array$to(Array_t array, Int_t int_last)
+public Array_t Array$to(Array_t array, Int_t last)
 {
-    int64_t last = Int_to_Int64(int_last, false);
-    if (last < 0)
-        last = array.length + last + 1;
-
-    if (last > array.length)
-        last = array.length;
-
-    if (last == 0)
-        return (Array_t){.atomic=array.atomic};
-
-    return (Array_t){
-        .atomic=array.atomic,
-        .data=array.data,
-        .length=last,
-        .stride=array.stride,
-        .data_refcount=array.data_refcount,
-    };
+    return Array$slice(array, I_small(1), last);
 }
 
 public Array_t Array$by(Array_t array, Int_t int_stride, int64_t padded_item_size)
@@ -459,6 +430,32 @@ public Array_t Array$by(Array_t array, Int_t int_stride, int64_t padded_item_siz
     };
 }
 
+public Array_t Array$slice(Array_t array, Int_t int_first, Int_t int_last)
+
+{
+    int64_t first = Int_to_Int64(int_first, false);
+    if (first < 0)
+        first = array.length + first + 1;
+
+    int64_t last = Int_to_Int64(int_last, false);
+    if (last < 0)
+        last = array.length + last + 1;
+
+    if (last > array.length)
+        last = array.length;
+
+    if (first < 1 || first > array.length || last == 0)
+        return (Array_t){.atomic=array.atomic};
+
+    return (Array_t){
+        .atomic=array.atomic,
+        .data=array.data + array.stride*(first-1),
+        .length=last - first + 1,
+        .stride=array.stride,
+        .data_refcount=array.data_refcount,
+    };
+}
+
 public Array_t Array$reversed(Array_t array, int64_t padded_item_size)
 {
     // Just in case negating the stride gives a value that doesn't fit into a
diff --git a/stdlib/arrays.h b/stdlib/arrays.h
index e880c64..5e0ca7e 100644
--- a/stdlib/arrays.h
+++ b/stdlib/arrays.h
@@ -83,6 +83,7 @@ PUREFUNC bool Array$has(Array_t array, void *item, const TypeInfo_t *type);
 PUREFUNC Array_t Array$from(Array_t array, Int_t first);
 PUREFUNC Array_t Array$to(Array_t array, Int_t last);
 PUREFUNC Array_t Array$by(Array_t array, Int_t stride, int64_t padded_item_size);
+PUREFUNC Array_t Array$slice(Array_t array, Int_t int_first, Int_t int_last);
 PUREFUNC Array_t Array$reversed(Array_t array, int64_t padded_item_size);
 Array_t Array$concat(Array_t x, Array_t y, int64_t padded_item_size);
 PUREFUNC uint64_t Array$hash(const void *arr, const TypeInfo_t *type);
diff --git a/stdlib/text.c b/stdlib/text.c
index 4879ead..384bf7a 100644
--- a/stdlib/text.c
+++ b/stdlib/text.c
@@ -562,6 +562,16 @@ public Text_t Text$slice(Text_t text, Int_t first_int, Int_t last_int)
     }
 }
 
+public Text_t Text$from(Text_t text, Int_t first)
+{
+    return Text$slice(text, first, I_small(-1));
+}
+
+public Text_t Text$to(Text_t text, Int_t last)
+{
+    return Text$slice(text, I_small(1), last);
+}
+
 public Text_t Text$cluster(Text_t text, Int_t index_int)
 {
     int64_t index = Int_to_Int64(index_int, false);
diff --git a/stdlib/text.h b/stdlib/text.h
index b493f4a..6e2ac41 100644
--- a/stdlib/text.h
+++ b/stdlib/text.h
@@ -29,6 +29,8 @@ Text_t Text$_concat(int n, Text_t items[n]);
 #define Text$concat(...) Text$_concat(sizeof((Text_t[]){__VA_ARGS__})/sizeof(Text_t), (Text_t[]){__VA_ARGS__})
 #define Texts(...) Text$concat(__VA_ARGS__)
 Text_t Text$slice(Text_t text, Int_t first_int, Int_t last_int);
+Text_t Text$from(Text_t text, Int_t first);
+Text_t Text$to(Text_t text, Int_t last);
 Text_t Text$cluster(Text_t text, Int_t index_int);
 OptionalText_t Text$from_str(const char *str);
 OptionalText_t Text$from_strn(const char *str, size_t len);
diff --git a/typecheck.c b/typecheck.c
index b87e524..ea3cd3a 100644
--- a/typecheck.c
+++ b/typecheck.c
@@ -833,6 +833,7 @@ type_t *get_type(env_t *env, ast_t *ast)
             else if (streq(call->name, "sample")) return self_value_t;
             else if (streq(call->name, "shuffle")) return Type(VoidType);
             else if (streq(call->name, "shuffled")) return self_value_t;
+            else if (streq(call->name, "slice")) return self_value_t;
             else if (streq(call->name, "sort")) return Type(VoidType);
             else if (streq(call->name, "sorted")) return self_value_t;
             else if (streq(call->name, "to")) return self_value_t;