From e91dceb212e251a24f50e8273807e4334dd8d708 Mon Sep 17 00:00:00 2001
From: Bruce Hill <bruce@bruce-hill.com>
Date: Tue, 1 Apr 2025 17:12:53 -0400
Subject: [PATCH] Update array random docs and fix array random methods to
 check ranges

---
 docs/arrays.md      | 33 +++++++++++++++++++++------------
 src/stdlib/arrays.c | 20 ++++++++++++++------
 2 files changed, 35 insertions(+), 18 deletions(-)

diff --git a/docs/arrays.md b/docs/arrays.md
index 543e44e..613d793 100644
--- a/docs/arrays.md
+++ b/docs/arrays.md
@@ -246,13 +246,13 @@ variable or dereference a heap pointer, it may trigger copy-on-write behavior.
 - [`func insert(arr: @[T], item: T, at: Int = 0 -> Void)`](#insert)
 - [`func insert_all(arr: @[T], items: [T], at: Int = 0 -> Void)`](#insert_all)
 - [`func pop(arr: &[T], index: Int = -1 -> T?)`](#pop)
-- [`func random(arr: [T], rng: RNG = random -> T)`](#random)
+- [`func random(arr: [T], random: func(min,max:Int64->Int64)? = none:func(min,max:Int64->Int64) -> T)`](#random)
 - [`func remove_at(arr: @[T], at: Int = -1, count: Int = 1 -> Void)`](#remove_at)
 - [`func remove_item(arr: @[T], item: T, max_count: Int = -1 -> Void)`](#remove_item)
 - [`func reversed(arr: [T] -> [T])`](#reversed)
-- [`func sample(arr: [T], count: Int, weights: [Num]? = ![Num], rng: RNG = random -> [T])`](#sample)
-- [`func shuffle(arr: @[T], rng: RNG = random -> Void)`](#shuffle)
-- [`func shuffled(arr: [T], rng: RNG = random -> [T])`](#shuffled)
+- [`func sample(arr: [T], count: Int, weights: [Num]? = ![Num], random: func(->Num)? = none:func(->Num) -> [T])`](#sample)
+- [`func shuffle(arr: @[T], random: func(min,max:Int64->Int64)? = none:func(min,max:Int64->Int64) -> Void)`](#shuffle)
+- [`func shuffled(arr: [T], random: func(min,max:Int64->Int64)? = none:func(min,max:Int64->Int64) -> [T])`](#shuffled)
 - [`func slice(arr: [T], from: Int, to: Int -> [T])`](#slice)
 - [`func sort(arr: @[T], by=T.compare -> Void)`](#sort)
 - [`sorted(arr: [T], by=T.compare -> [T])`](#sorted)
@@ -607,11 +607,13 @@ otherwise the item at the given index.
 Selects a random element from the array.
 
 ```tomo
-func random(arr: [T], rng: RNG = random -> T)
+func random(arr: [T], random: func(min,max:Int64->Int64)? = none:func(min,max:Int64->Int64) -> T)
 ```
 
 - `arr`: The array from which to select a random element.
-- `rng`: The [random number generator](rng.md) to use.
+- `random`: If provided, this function will be used to get a random index in the array. Returned
+  values must be between `min` and `max` (inclusive). (Used for deterministic pseudorandom number
+  generation)
 
 **Returns:**  
 A random element from the array.
@@ -705,7 +707,7 @@ Selects a sample of elements from the array, optionally with weighted
 probabilities.
 
 ```tomo
-func sample(arr: [T], count: Int, weights: [Num]? = ![Num], rng: RNG = random -> [T])
+func sample(arr: [T], count: Int, weights: [Num]? = ![Num], random: func(->Num)? = none:func(->Num) -> [T])
 ```
 
 - `arr`: The array to sample from.
@@ -714,7 +716,10 @@ func sample(arr: [T], count: Int, weights: [Num]? = ![Num], rng: RNG = random ->
   values do not need to add up to any particular number, they are relative
   weights. If no weights are given, elements will be sampled with uniform
   probability.
-- `rng`: The [random number generator](rng.md) to use.
+- `random`: If provided, this function will be used to get random values for
+  sampling the array. The provided function should return random numbers
+  between `0.0` (inclusive) and `1.0` (exclusive). (Used for deterministic
+  pseudorandom number generation)
 
 **Errors:**
 Errors will be raised if any of the following conditions occurs:
@@ -739,11 +744,13 @@ A list of sampled elements from the array.
 Shuffles the elements of the array in place.
 
 ```tomo
-func shuffle(arr: @[T], rng: RNG = random -> Void)
+func shuffle(arr: @[T], random: func(min,max:Int64->Int64)? = none:func(min,max:Int64->Int64) -> Void)
 ```
 
 - `arr`: The mutable reference to the array to be shuffled.
-- `rng`: The [random number generator](rng.md) to use.
+- `random`: If provided, this function will be used to get a random index in the array. Returned
+  values must be between `min` and `max` (inclusive). (Used for deterministic pseudorandom number
+  generation)
 
 **Returns:**  
 Nothing.
@@ -759,11 +766,13 @@ Nothing.
 Creates a new array with elements shuffled.
 
 ```tomo
-func shuffled(arr: [T], rng: RNG = random -> [T])
+func shuffled(arr: [T], random: func(min,max:Int64->Int64)? = none:func(min,max:Int64->Int64) -> [T])
 ```
 
 - `arr`: The array to be shuffled.
-- `rng`: The [random number generator](rng.md) to use.
+- `random`: If provided, this function will be used to get a random index in the array. Returned
+  values must be between `min` and `max` (inclusive). (Used for deterministic pseudorandom number
+  generation)
 
 **Returns:**  
 A new array with shuffled elements.
diff --git a/src/stdlib/arrays.c b/src/stdlib/arrays.c
index b018012..2941d2e 100644
--- a/src/stdlib/arrays.c
+++ b/src/stdlib/arrays.c
@@ -307,6 +307,8 @@ public void Array$shuffle(Array_t *arr, OptionalClosure_t random_int64, int64_t
     char tmp[padded_item_size];
     for (int64_t i = arr->length-1; i > 1; i--) {
         int64_t j = rng_fn(0, i, random_int64.userdata);
+        if unlikely (j < 0 || j > arr->length-1)
+            fail("The provided random number function returned an invalid value: ", j, " (not between 0 and ", i, ")");
         memcpy(tmp, arr->data + i*padded_item_size, (size_t)padded_item_size);
         memcpy((void*)arr->data + i*padded_item_size, arr->data + j*padded_item_size, (size_t)padded_item_size);
         memcpy((void*)arr->data + j*padded_item_size, tmp, (size_t)padded_item_size);
@@ -327,6 +329,8 @@ public void *Array$random(Array_t arr, OptionalClosure_t random_int64)
 
     int64_t (*rng_fn)(int64_t, int64_t, void*) = random_int64.fn;
     int64_t index = rng_fn(0, arr.length-1, random_int64.userdata);
+    if unlikely (index < 0 || index > arr.length-1)
+        fail("The provided random number function returned an invalid value: ", index, " (not between 0 and ", (int64_t)arr.length, ")");
     return arr.data + arr.stride*index;
 }
 
@@ -370,11 +374,6 @@ public Array_t Array$sample(Array_t arr, Int_t int_n, Array_t weights, OptionalC
     if (arr.length == 0)
         fail("There are no elements in this array!");
 
-    Array_t selected = {
-        .data=arr.atomic ? GC_MALLOC_ATOMIC((size_t)(n * padded_item_size)) : GC_MALLOC((size_t)(n * padded_item_size)),
-        .length=n,
-        .stride=padded_item_size, .atomic=arr.atomic};
-
     if (weights.length != arr.length)
         fail("Array has ", (int64_t)arr.length, " elements, but there are ", (int64_t)weights.length, " weights given");
 
@@ -433,9 +432,18 @@ public Array_t Array$sample(Array_t arr, Int_t int_n, Array_t weights, OptionalC
             aliases[i].alias = i;
 
     double (*rng_fn)(void*) = random_num.fn ? random_num.fn : _default_random_num;
+
+    Array_t selected = {
+        .data=arr.atomic ? GC_MALLOC_ATOMIC((size_t)(n * padded_item_size)) : GC_MALLOC((size_t)(n * padded_item_size)),
+        .length=n,
+        .stride=padded_item_size, .atomic=arr.atomic};
     for (int64_t i = 0; i < n; i++) {
-        double r = (double)arr.length * rng_fn(random_num.userdata);
+        double r = rng_fn(random_num.userdata);
+        if unlikely (r < 0.0 || r >= 1.0)
+            fail("The random number function returned a value not between 0.0 (inclusive) and 1.0 (exclusive): ", r);
+        r *= (double)arr.length;
         int64_t index = (int64_t)r;
+        assert(index >= 0 && index < arr.length);
         if ((r - (double)index) > aliases[index].odds)
             index = aliases[index].alias;
         memcpy(selected.data + i*selected.stride, arr.data + index*arr.stride, (size_t)padded_item_size);