From fc9a6f1416be514e9d26b301d05e7e347560560b Mon Sep 17 00:00:00 2001 From: Bruce Hill Date: Sun, 3 Nov 2024 22:37:48 -0500 Subject: [PATCH] Add RNGs to the language --- Makefile | 2 +- compile.c | 18 +-- docs/README.md | 1 + docs/arrays.md | 12 +- docs/booleans.md | 24 ---- docs/bytes.md | 25 +---- docs/functions.md | 2 +- docs/integers.md | 26 ----- docs/nums.md | 24 ---- docs/rng.md | 248 ++++++++++++++++++++++++++++++++++++++++++ environment.c | 39 ++++--- environment.h | 1 + stdlib/arrays.c | 36 ++---- stdlib/arrays.h | 8 +- stdlib/bools.c | 5 - stdlib/bools.h | 1 - stdlib/bytes.c | 11 -- stdlib/bytes.h | 1 - stdlib/chacha.h | 223 ++++++++++++++++++++++++++++++++++++++ stdlib/datatypes.h | 2 + stdlib/integers.c | 51 --------- stdlib/integers.h | 6 +- stdlib/nums.c | 8 -- stdlib/nums.h | 2 - stdlib/rng.c | 265 +++++++++++++++++++++++++++++++++++++++++++++ stdlib/rng.h | 31 ++++++ stdlib/stdlib.c | 17 +-- stdlib/threads.c | 9 +- stdlib/tomo.h | 1 + test/rng.tm | 40 +++++++ 30 files changed, 889 insertions(+), 250 deletions(-) create mode 100644 docs/rng.md create mode 100644 stdlib/chacha.h create mode 100644 stdlib/rng.c create mode 100644 stdlib/rng.h create mode 100644 test/rng.tm diff --git a/Makefile b/Makefile index 2a4737e..936ea0a 100644 --- a/Makefile +++ b/Makefile @@ -31,7 +31,7 @@ CFLAGS_PLACEHOLDER="$$(echo -e '\033[2m\033[m')" LDLIBS=-lgc -lcord -lm -lunistring -lgmp -ldl BUILTIN_OBJS=stdlib/siphash.o stdlib/arrays.o stdlib/bools.o stdlib/bytes.o stdlib/channels.o stdlib/nums.o stdlib/integers.o \ stdlib/pointers.o stdlib/memory.o stdlib/text.o stdlib/threads.o stdlib/c_strings.o stdlib/tables.o \ - stdlib/types.o stdlib/util.o stdlib/files.o stdlib/ranges.o stdlib/shell.o stdlib/paths.o \ + stdlib/types.o stdlib/util.o stdlib/files.o stdlib/ranges.o stdlib/shell.o stdlib/paths.o stdlib/rng.o \ stdlib/optionals.o stdlib/patterns.o stdlib/metamethods.o stdlib/functiontype.o stdlib/stdlib.o stdlib/datetime.o TESTS=$(patsubst %.tm,%.tm.testresult,$(wildcard test/*.tm)) diff --git a/compile.c b/compile.c index 4c7c1bd..1cd0c2d 100644 --- a/compile.c +++ b/compile.c @@ -216,6 +216,7 @@ CORD compile_type(type_t *t) { if (t == THREAD_TYPE) return "Thread_t"; else if (t == RANGE_TYPE) return "Range_t"; + else if (t == RNG_TYPE) return "RNG_t"; switch (t->tag) { case ReturnType: errx(1, "Shouldn't be compiling ReturnType to a type"); @@ -888,7 +889,7 @@ CORD compile_statement(env_t *env, ast_t *ast) CORD pop_code = CORD_EMPTY; if (fndef->cache->tag == Int && !cache_size.is_null && cache_size.i > 0) { pop_code = CORD_all("if (cache.entries.length > ", CORD_asprintf("%ld", cache_size.i), - ") Table$remove(&cache, cache.entries.data + cache.entries.stride*Int$random(I(0), I(cache.entries.length-1)), table_type);\n"); + ") Table$remove(&cache, cache.entries.data + cache.entries.stride*RNG$int64(default_rng, I(0), I(cache.entries.length-1)), table_type);\n"); } CORD arg_typedef = compile_struct_header(env, args_def); @@ -2689,10 +2690,7 @@ CORD compile(env_t *env, ast_t *ast) type_t *item_t = Match(self_value_t, ArrayType)->item_type; CORD padded_item_size = CORD_asprintf("%ld", type_size(item_t)); - type_t *rng_fn = Type(ClosureType, .fn=Type(FunctionType, .args=NULL, .ret=Type(IntType, .bits=TYPE_IBITS64))); - ast_t *default_rng = FakeAST(InlineCCode, - .code=CORD_all("((Closure_t){.fn=Int64$full_random})"), - .type=rng_fn); + ast_t *default_rng = FakeAST(InlineCCode, .code="default_rng", .type=RNG_TYPE); if (streq(call->name, "insert")) { CORD self = compile_to_pointer_depth(env, call->self, 1, false); @@ -2720,7 +2718,7 @@ CORD compile(env_t *env, ast_t *ast) compile_type_info(env, self_value_t), ")"); } else if (streq(call->name, "random")) { CORD self = compile_to_pointer_depth(env, call->self, 0, false); - arg_t *arg_spec = new(arg_t, .name="rng", .type=rng_fn, .default_val=default_rng); + arg_t *arg_spec = new(arg_t, .name="rng", .type=RNG_TYPE, .default_val=default_rng); return CORD_all("Array$random_value(", self, ", ", compile_arguments(env, ast, arg_spec, call->args), ", ", compile_type(item_t), ")"); } else if (streq(call->name, "has")) { CORD self = compile_to_pointer_depth(env, call->self, 0, false); @@ -2732,16 +2730,17 @@ CORD compile(env_t *env, ast_t *ast) arg_t *arg_spec = new(arg_t, .name="count", .type=INT_TYPE, .next=new(arg_t, .name="weights", .type=Type(ArrayType, .item_type=Type(NumType)), .default_val=FakeAST(Null, .type=new(type_ast_t, .tag=ArrayTypeAST, - .__data.ArrayTypeAST.item=new(type_ast_t, .tag=VarTypeAST, .__data.VarTypeAST.name="Num"))))); + .__data.ArrayTypeAST.item=new(type_ast_t, .tag=VarTypeAST, .__data.VarTypeAST.name="Num"))), + .next=new(arg_t, .name="rng", .type=RNG_TYPE, .default_val=default_rng))); return CORD_all("Array$sample(", self, ", ", compile_arguments(env, ast, arg_spec, call->args), ", ", padded_item_size, ")"); } else if (streq(call->name, "shuffle")) { CORD self = compile_to_pointer_depth(env, call->self, 1, false); - arg_t *arg_spec = new(arg_t, .name="rng", .type=rng_fn, .default_val=default_rng); + arg_t *arg_spec = new(arg_t, .name="rng", .type=RNG_TYPE, .default_val=default_rng); return CORD_all("Array$shuffle(", self, ", ", compile_arguments(env, ast, arg_spec, call->args), ", ", padded_item_size, ")"); } else if (streq(call->name, "shuffled")) { CORD self = compile_to_pointer_depth(env, call->self, 0, false); - arg_t *arg_spec = new(arg_t, .name="rng", .type=rng_fn, .default_val=default_rng); + arg_t *arg_spec = new(arg_t, .name="rng", .type=RNG_TYPE, .default_val=default_rng); return CORD_all("Array$shuffled(", self, ", ", compile_arguments(env, ast, arg_spec, call->args), ", ", padded_item_size, ")"); } else if (streq(call->name, "sort") || streq(call->name, "sorted")) { CORD self = streq(call->name, "sort") @@ -3541,6 +3540,7 @@ CORD compile_type_info(env_t *env, type_t *t) { if (t == THREAD_TYPE) return "&Thread$info"; else if (t == RANGE_TYPE) return "&Range$info"; + else if (t == RNG_TYPE) return "&RNG$info"; switch (t->tag) { case BoolType: case ByteType: case IntType: case BigIntType: case NumType: case CStringType: case DateTimeType: diff --git a/docs/README.md b/docs/README.md index 1f636bc..6d3567d 100644 --- a/docs/README.md +++ b/docs/README.md @@ -29,6 +29,7 @@ Information about Tomo's built-in types can be found here: - [Integers](integers.md) - [Languages](langs.md) - [Paths](paths.md) +- [Random Number Generators](rng.md) - [Sets](sets.md) - [Structs](structs.md) - [Tables](tables.md) diff --git a/docs/arrays.md b/docs/arrays.md index ee28cf3..144bcd7 100644 --- a/docs/arrays.md +++ b/docs/arrays.md @@ -614,12 +614,13 @@ Selects a random element from the array. **Signature:** ```tomo -func random(arr: [T] -> T) +func random(arr: [T], rng: RNG = random -> T) ``` **Parameters:** - `arr`: The array from which to select a random element. +- `rng`: The [random number generator](rng.md) to use. **Returns:** A random element from the array. @@ -731,7 +732,7 @@ probabilities. **Signature:** ```tomo -func sample(arr: [T], count: Int, weights: [Num]? = ![Num] -> [T]) +func sample(arr: [T], count: Int, weights: [Num]? = ![Num], rng: RNG = random -> [T]) ``` **Parameters:** @@ -742,6 +743,7 @@ func sample(arr: [T], count: Int, weights: [Num]? = ![Num] -> [T]) values do not need to add up to any particular number, they are relative weights. If no weights are given, elements will be sampled with uniform probability. +- `rng`: The [random number generator](rng.md) to use. **Errors:** Errors will be raised if any of the following conditions occurs: @@ -769,12 +771,13 @@ Shuffles the elements of the array in place. **Signature:** ```tomo -func shuffle(arr: @[T] -> Void) +func shuffle(arr: @[T], rng: RNG = random -> Void) ``` **Parameters:** - `arr`: The mutable reference to the array to be shuffled. +- `rng`: The [random number generator](rng.md) to use. **Returns:** Nothing. @@ -793,12 +796,13 @@ Creates a new array with elements shuffled. **Signature:** ```tomo -func shuffled(arr: [T] -> [T]) +func shuffled(arr: [T], rng: RNG = random -> [T]) ``` **Parameters:** - `arr`: The array to be shuffled. +- `rng`: The [random number generator](rng.md) to use. **Returns:** A new array with shuffled elements. diff --git a/docs/booleans.md b/docs/booleans.md index 5610d86..4f44931 100644 --- a/docs/booleans.md +++ b/docs/booleans.md @@ -35,27 +35,3 @@ func from_text(text: Text -> Bool?) >> Bool.from_text("???") = !Bool ``` - ---- - -## `random` - -**Description:** -Generates a random boolean value based on a specified probability. - -**Signature:** -```tomo -func random(p: Float = 0.5 -> Bool) -``` - -**Parameters:** - -- `p`: The probability (between `0` and `1`) of returning `yes`. Default is `0.5`. - -**Returns:** -`yes` with probability `p`, and `no` with probability `1 - p`. - -**Example:** -```tomo ->> Bool.random(70%) // yes (with 70% probability) -``` diff --git a/docs/bytes.md b/docs/bytes.md index eb73342..0b61833 100644 --- a/docs/bytes.md +++ b/docs/bytes.md @@ -9,27 +9,4 @@ integer with a `[B]` suffix, e.g. `255[B]`. # Byte Methods -## `random` - -**Description:** -Generates a random byte value in the specified range. - -**Signature:** -```tomo -func random(min: Byte = Byte.min, max: Byte = Byte.max -> Byte) -``` - -**Parameters:** - -- `min`: The minimum value to generate (inclusive). -- `max`: The maximum value to generate (inclusive). - -**Returns:** -A random byte chosen with uniform probability from within the given range -(inclusive). If `min` is greater than `max`, an error will be raised. - -**Example:** -```tomo ->> Byte.random() -= 42[B] -``` +None. diff --git a/docs/functions.md b/docs/functions.md index 174ad23..05d84e4 100644 --- a/docs/functions.md +++ b/docs/functions.md @@ -35,7 +35,7 @@ callsite: ``` **Note:** Default arguments are re-evaluated at the callsite for each function -call, so if your default argument is `func foo(x=Int.random(1,10) -> Int)`, then +call, so if your default argument is `func foo(x=random:int(1,10) -> Int)`, then each time you call the function without an `x` argument, it will give you a new random number. diff --git a/docs/integers.md b/docs/integers.md index 1c30aee..5b29d5a 100644 --- a/docs/integers.md +++ b/docs/integers.md @@ -112,32 +112,6 @@ The octal string representation of the integer. --- -## `random` - -**Description:** -Generates a random integer between the specified minimum and maximum values. - -**Signature:** -```tomo -func random(min: Int, max: Int -> Int) -``` - -**Parameters:** - -- `min`: The minimum value of the range. -- `max`: The maximum value of the range. - -**Returns:** -A random integer between `min` and `max` (inclusive). - -**Example:** -```tomo ->> Int.random(1, 100) -= 47 -``` - ---- - ## `from_text` **Description:** diff --git a/docs/nums.md b/docs/nums.md index 9072f86..5f9b528 100644 --- a/docs/nums.md +++ b/docs/nums.md @@ -991,30 +991,6 @@ The next representable value after `x` in the direction of `y`. --- -### `random` - -**Description:** -Generates a random floating-point number. - -**Signature:** -```tomo -func random(->Num) -``` - -**Parameters:** -None - -**Returns:** -A random floating-point number between 0 and 1. - -**Example:** -```tomo ->> Num.random() -= 0.4521 -``` - ---- - ### `rint` **Description:** diff --git a/docs/rng.md b/docs/rng.md new file mode 100644 index 0000000..9d1c94d --- /dev/null +++ b/docs/rng.md @@ -0,0 +1,248 @@ +# Random Number Generators (RNG) + +Tomo comes with an `RNG` type (Random Number Generator). This type represents a +self-contained piece of data that encapsulates the state of a relatively fast +and relatively secure pseudo-random number generator. The current +implementation is based on the [ChaCha20 stream +cipher,](https://en.wikipedia.org/wiki/Salsa20#ChaCha_variant) inspired by +[`arc4random` in OpenBSD.](https://man.openbsd.org/arc4random.3) + +An `RNG` object can be used for deterministic, repeatable generation of +pseudorandom numbers (for example, to be used in a video game for creating +seeded levels). The default random number generator for Tomo is called `random` +and is, by default, initialized with random data from the operating system when +a Tomo program launches. + +# RNG Functions + +This documentation provides details on RNG functions available in the API. +[Arrays](arrays.md) also have some methods which use RNG values: +`array:shuffle()`, `array:shuffled()`, `array:random()`, and `array:sample()`. + +## `bool` + +**Description:** +Generate a random boolean value with a given probability. + +**Signature:** +```tomo +func bool(rng: RNG, p: Num = 0.5 -> Bool) +``` + +**Parameters:** + +- `rng`: The random number generator to use. +- `p`: The probability of returning a `yes` value. Values less than zero and + `NaN` values are treated as equal to zero and values larger than zero are + treated as equal to one. + +**Returns:** +`yes` with probability `p` and `no` with probability `1-p`. + +**Example:** +```tomo +>> random:bool() += no +>> random:bool(1.0) += yes +``` + +--- + +## `byte` + +**Description:** +Generate a random byte with uniform probability. + +**Signature:** +```tomo +func byte(rng: RNG -> Byte) +``` + +**Parameters:** + +- `rng`: The random number generator to use. + +**Returns:** +A random byte (0-255). + +**Example:** +```tomo +>> random:byte() += 103[B] +``` + +--- + +## `bytes` + +**Description:** +Generate an array of uniformly random bytes with the given length. + +**Signature:** +```tomo +func bytes(rng: RNG, count: Int -> [Byte]) +``` + +**Parameters:** + +- `rng`: The random number generator to use. +- `count`: The number of random bytes to return. + +**Returns:** +An array of length `count` random bytes with uniform random distribution (0-255). + +**Example:** +```tomo +>> random:bytes(4) += [135[B], 169[B], 103[B], 212[B]] +``` + +--- + +## `copy` + +**Description:** +Return a copy of a random number generator. This copy will be a parallel version of +the given RNG with its own internal state. + +**Signature:** +```tomo +func copy(rng: RNG -> RNG) +``` + +**Parameters:** + +- `rng`: The random number generator to copy. + +**Returns:** +A copy of the given RNG. + +**Example:** +```tomo +>> rng := RNG.new([:Byte]) +>> copy := rng:copy() + +>> rng:bytes(10) += [224[B], 102[B], 190[B], 59[B], 251[B], 50[B], 217[B], 170[B], 15[B], 221[B]] + +# The copy runs in parallel to the original RNG: +>> copy:bytes(10) += [224[B], 102[B], 190[B], 59[B], 251[B], 50[B], 217[B], 170[B], 15[B], 221[B]] +``` + +--- + +## `int`, `int64`, `int32`, `int16`, `int8` + +**Description:** +Generate a random integer value with the given range. + +**Signature:** +```tomo +func int(rng: RNG, min: Int, max: Int -> Int) +func int64(rng: RNG, min: Int64 = Int64.min, max: Int64 = Int64.max -> Int) +func int32(rng: RNG, min: Int32 = Int32.min, max: Int32 = Int32.max -> Int) +func int16(rng: RNG, min: Int16 = Int16.min, max: Int16 = Int16.max -> Int) +func int8(rng: RNG, min: Int8 = Int8.min, max: Int8 = Int8.max -> Int) +``` + +**Parameters:** + +- `rng`: The random number generator to use. +- `min`: The minimum value to be returned. +- `max`: The maximum value to be returned. + +**Returns:** +An integer uniformly chosen from the range `[min, max]` (inclusive). If `min` +is greater than `max`, an error will be raised. + +**Example:** +```tomo +>> random:int(1, 10) += 8 +``` + +--- + +## `new` + +**Description:** +Return a new random number generator. + +**Signature:** +```tomo +func new(seed: [Byte] = (/dev/urandom):read_bytes(40) -> RNG) +``` + +**Parameters:** + +- `seed`: The seed use for the random number generator. A seed length of 40 + bytes is recommended. Seed lengths of less than 40 bytes are padded with + zeroes. + +**Returns:** +A new random number generator. + +**Example:** +```tomo +>> my_rng := RNG.new([1[B], 2[B], 3[B], 4[B]]) +>> my_rng:bool() += yes +``` + +--- + +## `num`, `num32` + +**Description:** +Generate a random floating point value with the given range. + +**Signature:** +```tomo +func num(rng: RNG, min: Num = 0.0, max: Num = 1.0 -> Int) +func num32(rng: RNG, min: Num = 0.0_f32, max: Num = 1.0_f32 -> Int) +``` + +**Parameters:** + +- `rng`: The random number generator to use. +- `min`: The minimum value to be returned. +- `max`: The maximum value to be returned. + +**Returns:** +A floating point number uniformly chosen from the range `[min, max]` +(inclusive). If `min` is greater than `max`, an error will be raised. + +**Example:** +```tomo +>> random:num(1, 10) += 9.512830439975572 +``` + +--- + +## `set_seed` + +**Description:** +Set the seed for an RNG. + +**Signature:** +```tomo +func set_seed(rng: RNG, seed: [Byte]) +``` + +**Parameters:** + +- `rng`: The random number generator to modify. +- `seed`: A new seed to re-seed the random number generator with. A seed length + of 40 bytes is recommended. Seed lengths of less than 40 bytes are padded + with zeroes. + +**Returns:** +Nothing. + +**Example:** +```tomo +random:set_seed((/dev/urandom):read_bytes(40)) +``` diff --git a/environment.c b/environment.c index 5b829b1..113eff1 100644 --- a/environment.c +++ b/environment.c @@ -13,6 +13,7 @@ type_t *TEXT_TYPE = NULL; type_t *RANGE_TYPE = NULL; +type_t *RNG_TYPE = NULL; public type_t *THREAD_TYPE = NULL; env_t *new_compilation_unit(CORD libname) @@ -78,6 +79,13 @@ env_t *new_compilation_unit(CORD libname) THREAD_TYPE = Type(StructType, .name="Thread", .env=thread_env, .opaque=true); } + { + env_t *rng_env = namespace_env(env, "RNG"); + RNG_TYPE = Type( + StructType, .name="RNG", .env=rng_env, + .fields=new(arg_t, .name="state", .type=Type(PointerType, .pointed=Type(MemoryType)))); + } + struct { const char *name; type_t *type; @@ -89,12 +97,10 @@ env_t *new_compilation_unit(CORD libname) {"Memory", Type(MemoryType), "Memory_t", "Memory$info", {}}, {"Bool", Type(BoolType), "Bool_t", "Bool$info", TypedArray(ns_entry_t, {"from_text", "Bool$from_text", "func(text:Text -> Bool?)"}, - {"random", "Bool$random", "func(p=0.5 -> Bool)"}, )}, {"Byte", Type(ByteType), "Byte_t", "Byte$info", TypedArray(ns_entry_t, {"max", "Byte$max", "Byte"}, {"min", "Byte$min", "Byte"}, - {"random", "Byte$random", "func(min=Byte.min, max=Byte.max -> Byte)"}, )}, {"Int", Type(BigIntType), "Int_t", "Int$info", TypedArray(ns_entry_t, {"abs", "Int$abs", "func(x:Int -> Int)"}, @@ -118,7 +124,6 @@ env_t *new_compilation_unit(CORD libname) {"plus", "Int$plus", "func(x,y:Int -> Int)"}, {"power", "Int$power", "func(base:Int,exponent:Int -> Int)"}, {"prev_prime", "Int$prev_prime", "func(x:Int -> Int)"}, - {"random", "Int$random", "func(min,max:Int -> Int)"}, {"right_shifted", "Int$right_shifted", "func(x,y:Int -> Int)"}, {"sqrt", "Int$sqrt", "func(x:Int -> Int)"}, {"times", "Int$times", "func(x,y:Int -> Int)"}, @@ -142,8 +147,6 @@ env_t *new_compilation_unit(CORD libname) {"unsigned_right_shifted", "Int64$unsigned_right_shifted", "func(x:Int64,y:Int64 -> Int64)"}, {"wrapping_minus", "Int64$wrapping_minus", "func(x:Int64,y:Int64 -> Int64)"}, {"wrapping_plus", "Int64$wrapping_plus", "func(x:Int64,y:Int64 -> Int64)"}, - // Must be defined after min/max: - {"random", "Int64$random", "func(min=Int64.min, max=Int64.max -> Int64)"}, )}, {"Int32", Type(IntType, .bits=TYPE_IBITS32), "Int32_t", "Int32$info", TypedArray(ns_entry_t, {"abs", "abs", "func(i:Int32 -> Int32)"}, @@ -163,8 +166,6 @@ env_t *new_compilation_unit(CORD libname) {"unsigned_right_shifted", "Int32$unsigned_right_shifted", "func(x:Int32,y:Int32 -> Int32)"}, {"wrapping_minus", "Int32$wrapping_minus", "func(x:Int32,y:Int32 -> Int32)"}, {"wrapping_plus", "Int32$wrapping_plus", "func(x:Int32,y:Int32 -> Int32)"}, - // Must be defined after min/max: - {"random", "Int32$random", "func(min=Int32.min, max=Int32.max -> Int32)"}, )}, {"Int16", Type(IntType, .bits=TYPE_IBITS16), "Int16_t", "Int16$info", TypedArray(ns_entry_t, {"abs", "abs", "func(i:Int16 -> Int16)"}, @@ -184,8 +185,6 @@ env_t *new_compilation_unit(CORD libname) {"unsigned_right_shifted", "Int16$unsigned_right_shifted", "func(x:Int16,y:Int16 -> Int16)"}, {"wrapping_minus", "Int16$wrapping_minus", "func(x:Int16,y:Int16 -> Int16)"}, {"wrapping_plus", "Int16$wrapping_plus", "func(x:Int16,y:Int16 -> Int16)"}, - // Must be defined after min/max: - {"random", "Int16$random", "func(min=Int16.min, max=Int16.max -> Int16)"}, )}, {"Int8", Type(IntType, .bits=TYPE_IBITS8), "Int8_t", "Int8$info", TypedArray(ns_entry_t, {"abs", "abs", "func(i:Int8 -> Int8)"}, @@ -205,8 +204,6 @@ env_t *new_compilation_unit(CORD libname) {"unsigned_right_shifted", "Int8$unsigned_right_shifted", "func(x:Int8,y:Int8 -> Int8)"}, {"wrapping_minus", "Int8$wrapping_minus", "func(x:Int8,y:Int8 -> Int8)"}, {"wrapping_plus", "Int8$wrapping_plus", "func(x:Int8,y:Int8 -> Int8)"}, - // Must be defined after min/max: - {"random", "Int8$random", "func(min=Int8.min, max=Int8.max -> Int8)"}, )}, #define C(name) {#name, "M_"#name, "Num"} #define F(name) {#name, #name, "func(n:Num -> Num)"} @@ -224,7 +221,6 @@ env_t *new_compilation_unit(CORD libname) C(PI), C(PI_4), C(SQRT2), C(SQRT1_2), {"INF", "(Num_t)(INFINITY)", "Num"}, {"TAU", "(Num_t)(2.*M_PI)", "Num"}, - {"random", "Num$random", "func(->Num)"}, {"mix", "Num$mix", "func(amount,x,y:Num -> Num)"}, {"from_text", "Num$from_text", "func(text:Text -> Num?)"}, {"abs", "fabs", "func(n:Num -> Num)"}, @@ -253,7 +249,6 @@ env_t *new_compilation_unit(CORD libname) C(PI), C(PI_4), C(SQRT2), C(SQRT1_2), {"INF", "(Num32_t)(INFINITY)", "Num32"}, {"TAU", "(Num32_t)(2.f*M_PI)", "Num32"}, - {"random", "Num32$random", "func(->Num32)"}, {"mix", "Num32$mix", "func(amount,x,y:Num32 -> Num32)"}, {"from_text", "Num32$from_text", "func(text:Text -> Num32?)"}, {"abs", "fabsf", "func(n:Num32 -> Num32)"}, @@ -349,6 +344,22 @@ env_t *new_compilation_unit(CORD libname) {"replace_all", "Text$replace_all", "func(path:Path, replacements:{Pattern:Text}, backref=$/\\/, recursive=yes -> Path)"}, {"starts_with", "Text$starts_with", "func(path:Path, prefix:Text -> Bool)"}, )}, + // RNG must come after Path so we can read bytes from /dev/urandom + {"RNG", RNG_TYPE, "RNG_t", "RNG", TypedArray(ns_entry_t, + {"bool", "RNG$bool", "func(rng:RNG, p=0.5 -> Bool)"}, + {"byte", "RNG$byte", "func(rng:RNG -> Byte)"}, + {"bytes", "RNG$bytes", "func(rng:RNG, count:Int -> [Byte])"}, + {"copy", "RNG$copy", "func(rng:RNG -> RNG)"}, + {"int", "RNG$int", "func(rng:RNG, min,max:Int -> Int)"}, + {"int16", "RNG$int16", "func(rng:RNG, min=Int16.min, max=Int16.max -> Int16)"}, + {"int32", "RNG$int32", "func(rng:RNG, min=Int32.min, max=Int32.max -> Int32)"}, + {"int64", "RNG$int64", "func(rng:RNG, min=Int64.min, max=Int64.max -> Int64)"}, + {"int8", "RNG$int8", "func(rng:RNG, min=Int8.min, max=Int8.max -> Int8)"}, + {"new", "RNG$new", "func(seed=(/dev/urandom):read_bytes(40) -> RNG)"}, + {"num", "RNG$num", "func(rng:RNG, min=0.0, max=1.0 -> Num)"}, + {"num32", "RNG$num32", "func(rng:RNG, min=0.0_f32, max=1.0_f32 -> Num32)"}, + {"set_seed", "RNG$set_seed", "func(rng:RNG, seed:[Byte])"}, + )}, {"Shell", Type(TextType, .lang="Shell", .env=namespace_env(env, "Shell")), "Shell_t", "Shell$info", TypedArray(ns_entry_t, {"by_line", "Shell$by_line", "func(command:Shell -> func(->Text?)?)"}, {"escape_int", "Int$value_as_text", "func(i:Int -> Shell)"}, @@ -446,6 +457,8 @@ env_t *new_compilation_unit(CORD libname) .ret=Type(TextType, .lang="Pattern", .env=namespace_env(env, "Pattern"))), .code="(Pattern_t)")); + Table$str_set(env->globals, "random", new(binding_t, .type=RNG_TYPE, .code="default_rng")); + env_t *lib_env = fresh_scope(env); lib_env->libname = libname; return lib_env; diff --git a/environment.h b/environment.h index 2fc0771..a579dbd 100644 --- a/environment.h +++ b/environment.h @@ -81,6 +81,7 @@ binding_t *get_namespace_binding(env_t *env, ast_t *self, const char *name); #define code_err(ast, ...) compiler_err((ast)->file, (ast)->start, (ast)->end, __VA_ARGS__) extern type_t *TEXT_TYPE; extern type_t *RANGE_TYPE; +extern type_t *RNG_TYPE; extern type_t *THREAD_TYPE; // vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0 diff --git a/stdlib/arrays.c b/stdlib/arrays.c index 552fb4c..8814f31 100644 --- a/stdlib/arrays.c +++ b/stdlib/arrays.c @@ -8,6 +8,7 @@ #include "arrays.h" #include "metamethods.h" #include "optionals.h" +#include "rng.h" #include "tables.h" #include "text.h" #include "util.h" @@ -249,51 +250,34 @@ public Array_t Array$sorted(Array_t arr, Closure_t comparison, int64_t padded_it return arr; } -static uint64_t random_range(Closure_t rng, uint64_t upper_bound) -{ - if (upper_bound < 2) - return 0; - - // This approach is taken from arc4random_uniform() - uint64_t min = -upper_bound % upper_bound; - uint64_t r; - for (;;) { - r = ((uint64_t(*)(void*))rng.fn)(rng.userdata); - if (r >= min) - break; - } - - return r % upper_bound; -} - -public void Array$shuffle(Array_t *arr, Closure_t rng, int64_t padded_item_size) +public void Array$shuffle(Array_t *arr, RNG_t rng, int64_t padded_item_size) { if (arr->data_refcount != 0 || (int64_t)arr->stride != padded_item_size) Array$compact(arr, padded_item_size); char tmp[padded_item_size]; for (int64_t i = arr->length-1; i > 1; i--) { - int64_t j = (int64_t)random_range(rng, (uint64_t)(i+1)); + int64_t j = RNG$int64(rng, 0, i); memcpy(tmp, arr->data + i*padded_item_size, (size_t)padded_item_size); memcpy((void*)arr->data + i*padded_item_size, arr->data + j*padded_item_size, (size_t)padded_item_size); memcpy((void*)arr->data + j*padded_item_size, tmp, (size_t)padded_item_size); } } -public Array_t Array$shuffled(Array_t arr, Closure_t rng, int64_t padded_item_size) +public Array_t Array$shuffled(Array_t arr, RNG_t rng, int64_t padded_item_size) { Array$compact(&arr, padded_item_size); Array$shuffle(&arr, rng, padded_item_size); return arr; } -public void *Array$random(Array_t arr, Closure_t rng) +public void *Array$random(Array_t arr, RNG_t rng) { if (arr.length == 0) return NULL; // fail("Cannot get a random item from an empty array!"); - uint64_t index = random_range(rng, (uint64_t)arr.length); - return arr.data + arr.stride*(int64_t)index; + int64_t index = RNG$int64(rng, 0, arr.length-1); + return arr.data + arr.stride*index; } public Table_t Array$counts(Array_t arr, const TypeInfo_t *type) @@ -310,7 +294,7 @@ public Table_t Array$counts(Array_t arr, const TypeInfo_t *type) return counts; } -public Array_t Array$sample(Array_t arr, Int_t int_n, Array_t weights, int64_t padded_item_size) +public Array_t Array$sample(Array_t arr, Int_t int_n, Array_t weights, RNG_t rng, int64_t padded_item_size) { int64_t n = Int_to_Int64(int_n, false); if (n < 0) @@ -329,7 +313,7 @@ public Array_t Array$sample(Array_t arr, Int_t int_n, Array_t weights, int64_t p if (weights.length < 0) { for (int64_t i = 0; i < n; i++) { - int64_t index = arc4random_uniform(arr.length); + int64_t index = RNG$int64(rng, 0, arr.length-1); memcpy(selected.data + i*padded_item_size, arr.data + arr.stride*index, (size_t)padded_item_size); } return selected; @@ -393,7 +377,7 @@ public Array_t Array$sample(Array_t arr, Int_t int_n, Array_t weights, int64_t p aliases[i].alias = i; for (int64_t i = 0; i < n; i++) { - double r = drand48() * arr.length; + double r = RNG$num(rng, 0, arr.length); int64_t index = (int64_t)r; if ((r - (double)index) > aliases[index].odds) index = aliases[index].alias; diff --git a/stdlib/arrays.h b/stdlib/arrays.h index 5d452d3..251e9f9 100644 --- a/stdlib/arrays.h +++ b/stdlib/arrays.h @@ -70,11 +70,11 @@ Int_t Array$find(Array_t arr, void *item, const TypeInfo_t *type); Int_t Array$first(Array_t arr, Closure_t predicate); void Array$sort(Array_t *arr, Closure_t comparison, int64_t padded_item_size); Array_t Array$sorted(Array_t arr, Closure_t comparison, int64_t padded_item_size); -void Array$shuffle(Array_t *arr, Closure_t rng, int64_t padded_item_size); -Array_t Array$shuffled(Array_t arr, Closure_t rng, int64_t padded_item_size); -void *Array$random(Array_t arr, Closure_t rng); +void Array$shuffle(Array_t *arr, RNG_t rng, int64_t padded_item_size); +Array_t Array$shuffled(Array_t arr, RNG_t rng, int64_t padded_item_size); +void *Array$random(Array_t arr, RNG_t rng); #define Array$random_value(arr, rng, t) ({ Array_t _arr = arr; if (_arr.length == 0) fail("Cannot get a random value from an empty array!"); *(t*)Array$random(_arr, rng); }) -Array_t Array$sample(Array_t arr, Int_t n, Array_t weights, int64_t padded_item_size); +Array_t Array$sample(Array_t arr, Int_t n, Array_t weights, RNG_t rng, int64_t padded_item_size); Table_t Array$counts(Array_t arr, const TypeInfo_t *type); void Array$clear(Array_t *array); void Array$compact(Array_t *arr, int64_t padded_item_size); diff --git a/stdlib/bools.c b/stdlib/bools.c index c815c73..bc1a644 100644 --- a/stdlib/bools.c +++ b/stdlib/bools.c @@ -39,11 +39,6 @@ PUREFUNC public OptionalBool_t Bool$from_text(Text_t text) } } -public Bool_t Bool$random(double p) -{ - return (drand48() < p); -} - public const TypeInfo_t Bool$info = { .size=sizeof(bool), .align=__alignof__(bool), diff --git a/stdlib/bools.h b/stdlib/bools.h index 4b0b42a..fbc40f8 100644 --- a/stdlib/bools.h +++ b/stdlib/bools.h @@ -15,7 +15,6 @@ PUREFUNC Text_t Bool$as_text(const bool *b, bool colorize, const TypeInfo_t *type); OptionalBool_t Bool$from_text(Text_t text); -Bool_t Bool$random(double p); extern const TypeInfo_t Bool$info; diff --git a/stdlib/bytes.c b/stdlib/bytes.c index f7e9057..8d66579 100644 --- a/stdlib/bytes.c +++ b/stdlib/bytes.c @@ -17,17 +17,6 @@ PUREFUNC public Text_t Byte$as_text(const Byte_t *b, bool colorize, const TypeIn return Text$format(colorize ? "\x1b[35m%u[B]\x1b[m" : "%u[B]", *b); } -public Byte_t Byte$random(Byte_t min, Byte_t max) -{ - if (min > max) - fail("Random minimum value (%u) is larger than the maximum value (%u)", min, max); - if (min == max) - return min; - - uint32_t r = arc4random_uniform((uint32_t)max - (uint32_t)min + 1u); - return (Byte_t)(min + r); -} - public const TypeInfo_t Byte$info = { .size=sizeof(Byte_t), .align=__alignof__(Byte_t), diff --git a/stdlib/bytes.h b/stdlib/bytes.h index 62b266e..2a92f65 100644 --- a/stdlib/bytes.h +++ b/stdlib/bytes.h @@ -12,7 +12,6 @@ #define Byte(b) ((Byte_t)(b)) PUREFUNC Text_t Byte$as_text(const Byte_t *b, bool colorize, const TypeInfo_t *type); -Byte_t Byte$random(Byte_t min, Byte_t max); extern const Byte_t Byte$min; extern const Byte_t Byte$max; diff --git a/stdlib/chacha.h b/stdlib/chacha.h new file mode 100644 index 0000000..1bca008 --- /dev/null +++ b/stdlib/chacha.h @@ -0,0 +1,223 @@ +/* +chacha-merged.c version 20080118 +D. J. Bernstein +Public domain. +*/ + +/* $OpenBSD: chacha_private.h,v 1.3 2022/02/28 21:56:29 dtucker Exp $ */ +/* Tomo: chacha.h,v 1.0 2024/11/03 Bruce Hill */ + +typedef unsigned char u8; +typedef unsigned int u32; + +typedef struct +{ + u32 input[16]; /* could be compressed */ +} chacha_ctx; + +#define U8C(v) (v##U) +#define U32C(v) (v##U) + +#define U8V(v) ((u8)(v) & U8C(0xFF)) +#define U32V(v) ((u32)(v) & U32C(0xFFFFFFFF)) + +#define ROTL32(v, n) \ + (U32V((v) << (n)) | ((v) >> (32 - (n)))) + +#define U8TO32_LITTLE(p) \ + (((u32)((p)[0]) ) | \ + ((u32)((p)[1]) << 8) | \ + ((u32)((p)[2]) << 16) | \ + ((u32)((p)[3]) << 24)) + +#define U32TO8_LITTLE(p, v) \ + do { \ + (p)[0] = U8V((v) ); \ + (p)[1] = U8V((v) >> 8); \ + (p)[2] = U8V((v) >> 16); \ + (p)[3] = U8V((v) >> 24); \ + } while (0) + +#define ROTATE(v,c) (ROTL32(v,c)) +#define XOR(v,w) ((v) ^ (w)) +#define PLUS(v,w) (U32V((v) + (w))) +#define PLUSONE(v) (PLUS((v),1)) + +#define QUARTERROUND(a,b,c,d) \ + a = PLUS(a,b); d = ROTATE(XOR(d,a),16); \ + c = PLUS(c,d); b = ROTATE(XOR(b,c),12); \ + a = PLUS(a,b); d = ROTATE(XOR(d,a), 8); \ + c = PLUS(c,d); b = ROTATE(XOR(b,c), 7); + +static const char sigma[16] = "expand 32-byte k"; +static const char tau[16] = "expand 16-byte k"; + +static void +chacha_keysetup(chacha_ctx *x,const u8 *k,u32 kbits) +{ + const char *constants; + + x->input[4] = U8TO32_LITTLE(k + 0); + x->input[5] = U8TO32_LITTLE(k + 4); + x->input[6] = U8TO32_LITTLE(k + 8); + x->input[7] = U8TO32_LITTLE(k + 12); + if (kbits == 256) { /* recommended */ + k += 16; + constants = sigma; + } else { /* kbits == 128 */ + constants = tau; + } + x->input[8] = U8TO32_LITTLE(k + 0); + x->input[9] = U8TO32_LITTLE(k + 4); + x->input[10] = U8TO32_LITTLE(k + 8); + x->input[11] = U8TO32_LITTLE(k + 12); + x->input[0] = U8TO32_LITTLE(constants + 0); + x->input[1] = U8TO32_LITTLE(constants + 4); + x->input[2] = U8TO32_LITTLE(constants + 8); + x->input[3] = U8TO32_LITTLE(constants + 12); +} + +static void +chacha_ivsetup(chacha_ctx *x,const u8 *iv) +{ + x->input[12] = 0; + x->input[13] = 0; + x->input[14] = U8TO32_LITTLE(iv + 0); + x->input[15] = U8TO32_LITTLE(iv + 4); +} + +static void +chacha_encrypt_bytes(chacha_ctx *x,const u8 *m,u8 *c,u32 bytes) +{ + u32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15; + u32 j0, j1, j2, j3, j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15; + u8 *ctarget = NULL; + u8 tmp[64]; + u_int i; + + if (!bytes) return; + + j0 = x->input[0]; + j1 = x->input[1]; + j2 = x->input[2]; + j3 = x->input[3]; + j4 = x->input[4]; + j5 = x->input[5]; + j6 = x->input[6]; + j7 = x->input[7]; + j8 = x->input[8]; + j9 = x->input[9]; + j10 = x->input[10]; + j11 = x->input[11]; + j12 = x->input[12]; + j13 = x->input[13]; + j14 = x->input[14]; + j15 = x->input[15]; + + for (;;) { + if (bytes < 64) { + for (i = 0;i < bytes;++i) tmp[i] = m[i]; + m = tmp; + ctarget = c; + c = tmp; + } + x0 = j0; + x1 = j1; + x2 = j2; + x3 = j3; + x4 = j4; + x5 = j5; + x6 = j6; + x7 = j7; + x8 = j8; + x9 = j9; + x10 = j10; + x11 = j11; + x12 = j12; + x13 = j13; + x14 = j14; + x15 = j15; + for (i = 20;i > 0;i -= 2) { + QUARTERROUND( x0, x4, x8,x12) + QUARTERROUND( x1, x5, x9,x13) + QUARTERROUND( x2, x6,x10,x14) + QUARTERROUND( x3, x7,x11,x15) + QUARTERROUND( x0, x5,x10,x15) + QUARTERROUND( x1, x6,x11,x12) + QUARTERROUND( x2, x7, x8,x13) + QUARTERROUND( x3, x4, x9,x14) + } + x0 = PLUS(x0,j0); + x1 = PLUS(x1,j1); + x2 = PLUS(x2,j2); + x3 = PLUS(x3,j3); + x4 = PLUS(x4,j4); + x5 = PLUS(x5,j5); + x6 = PLUS(x6,j6); + x7 = PLUS(x7,j7); + x8 = PLUS(x8,j8); + x9 = PLUS(x9,j9); + x10 = PLUS(x10,j10); + x11 = PLUS(x11,j11); + x12 = PLUS(x12,j12); + x13 = PLUS(x13,j13); + x14 = PLUS(x14,j14); + x15 = PLUS(x15,j15); + +#ifndef KEYSTREAM_ONLY + x0 = XOR(x0,U8TO32_LITTLE(m + 0)); + x1 = XOR(x1,U8TO32_LITTLE(m + 4)); + x2 = XOR(x2,U8TO32_LITTLE(m + 8)); + x3 = XOR(x3,U8TO32_LITTLE(m + 12)); + x4 = XOR(x4,U8TO32_LITTLE(m + 16)); + x5 = XOR(x5,U8TO32_LITTLE(m + 20)); + x6 = XOR(x6,U8TO32_LITTLE(m + 24)); + x7 = XOR(x7,U8TO32_LITTLE(m + 28)); + x8 = XOR(x8,U8TO32_LITTLE(m + 32)); + x9 = XOR(x9,U8TO32_LITTLE(m + 36)); + x10 = XOR(x10,U8TO32_LITTLE(m + 40)); + x11 = XOR(x11,U8TO32_LITTLE(m + 44)); + x12 = XOR(x12,U8TO32_LITTLE(m + 48)); + x13 = XOR(x13,U8TO32_LITTLE(m + 52)); + x14 = XOR(x14,U8TO32_LITTLE(m + 56)); + x15 = XOR(x15,U8TO32_LITTLE(m + 60)); +#endif + + j12 = PLUSONE(j12); + if (!j12) { + j13 = PLUSONE(j13); + /* stopping at 2^70 bytes per nonce is user's responsibility */ + } + + U32TO8_LITTLE(c + 0,x0); + U32TO8_LITTLE(c + 4,x1); + U32TO8_LITTLE(c + 8,x2); + U32TO8_LITTLE(c + 12,x3); + U32TO8_LITTLE(c + 16,x4); + U32TO8_LITTLE(c + 20,x5); + U32TO8_LITTLE(c + 24,x6); + U32TO8_LITTLE(c + 28,x7); + U32TO8_LITTLE(c + 32,x8); + U32TO8_LITTLE(c + 36,x9); + U32TO8_LITTLE(c + 40,x10); + U32TO8_LITTLE(c + 44,x11); + U32TO8_LITTLE(c + 48,x12); + U32TO8_LITTLE(c + 52,x13); + U32TO8_LITTLE(c + 56,x14); + U32TO8_LITTLE(c + 60,x15); + + if (bytes <= 64) { + if (bytes < 64) { + for (i = 0;i < bytes;++i) ctarget[i] = c[i]; + } + x->input[12] = j12; + x->input[13] = j13; + return; + } + bytes -= 64; + c += 64; +#ifndef KEYSTREAM_ONLY + m += 64; +#endif + } +} diff --git a/stdlib/datatypes.h b/stdlib/datatypes.h index 8d342d2..f033a21 100644 --- a/stdlib/datatypes.h +++ b/stdlib/datatypes.h @@ -93,4 +93,6 @@ typedef struct Text_s { typedef struct timeval DateTime_t; #define OptionalDateTime_t DateTime_t +typedef struct RNGState_t* RNG_t; + // vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0 diff --git a/stdlib/integers.c b/stdlib/integers.c index 6b0a9dd..f6140da 100644 --- a/stdlib/integers.c +++ b/stdlib/integers.c @@ -14,13 +14,6 @@ #include "text.h" #include "types.h" -static _Thread_local gmp_randstate_t Int_rng = {}; - -public void Int$init_random(long seed) { - gmp_randinit_default(Int_rng); - gmp_randseed_ui(Int_rng, (unsigned long)seed); -} - public Text_t Int$value_as_text(Int_t i) { if (__builtin_expect(i.small & 1, 1)) { return Text$format("%ld", (i.small)>>2); @@ -316,31 +309,6 @@ public Int_t Int$sqrt(Int_t i) return Int$from_mpz(result); } -public Int_t Int$random(Int_t min, Int_t max) { - int32_t cmp = Int$compare_value(min, max); - if (cmp > 0) { - Text_t min_text = Int$as_text(&min, false, &Int$info), max_text = Int$as_text(&max, false, &Int$info); - fail("Random minimum value (%k) is larger than the maximum value (%k)", - &min_text, &max_text); - } - if (cmp == 0) return min; - - mpz_t range_size; - mpz_init_set_int(range_size, max); - if (min.small & 1) { - mpz_t min_mpz; - mpz_init_set_si(min_mpz, min.small >> 2); - mpz_sub(range_size, range_size, min_mpz); - } else { - mpz_sub(range_size, range_size, *min.big); - } - - mpz_t r; - mpz_init(r); - mpz_urandomm(r, Int_rng, range_size); - return Int$plus(min, Int$from_mpz(r)); -} - public PUREFUNC Range_t Int$to(Int_t from, Int_t to) { return (Range_t){from, to, Int$compare_value(to, from) >= 0 ? (Int_t){.small=(1<<2)|1} : (Int_t){.small=(-1>>2)|1}}; } @@ -440,25 +408,6 @@ public const TypeInfo_t Int$info = { } \ return bit_array; \ } \ - public c_type KindOfInt ## $full_random(void) { \ - c_type r; \ - arc4random_buf(&r, sizeof(r)); \ - return r; \ - } \ - public c_type KindOfInt ## $random(c_type min, c_type max) { \ - if (min > max) fail("Random minimum value (%ld) is larger than the maximum value (%ld)", min, max); \ - if (min == max) return min; \ - if (min == min_val && max == max_val) \ - return KindOfInt ## $full_random(); \ - uint64_t range = (uint64_t)max - (uint64_t)min + 1; \ - uint64_t min_r = -range % range; \ - uint64_t r; \ - for (;;) { \ - arc4random_buf(&r, sizeof(r)); \ - if (r >= min_r) break; \ - } \ - return (c_type)((uint64_t)min + (r % range)); \ - } \ public to_attr Range_t KindOfInt ## $to(c_type from, c_type to) { \ return (Range_t){Int64_to_Int(from), Int64_to_Int(to), to >= from ? (Int_t){.small=(1<<2)&1} : (Int_t){.small=(1<<2)&1}}; \ } \ diff --git a/stdlib/integers.h b/stdlib/integers.h index 0469916..b441e2b 100644 --- a/stdlib/integers.h +++ b/stdlib/integers.h @@ -34,8 +34,6 @@ Text_t type_name ## $hex(c_type i, Int_t digits, bool uppercase, bool prefix); \ Text_t type_name ## $octal(c_type i, Int_t digits, bool prefix); \ Array_t type_name ## $bits(c_type x); \ - c_type type_name ## $random(c_type min, c_type max); \ - c_type type_name ## $full_random(void); \ to_attr Range_t type_name ## $to(c_type from, c_type to); \ PUREFUNC Optional ## type_name ## _t type_name ## $from_text(Text_t text); \ MACROLIKE PUREFUNC c_type type_name ## $clamped(c_type x, c_type min, c_type max) { \ @@ -103,8 +101,6 @@ PUREFUNC bool Int$equal_value(const Int_t x, const Int_t y); Text_t Int$format(Int_t i, Int_t digits); Text_t Int$hex(Int_t i, Int_t digits, bool uppercase, bool prefix); Text_t Int$octal(Int_t i, Int_t digits, bool prefix); -void Int$init_random(long seed); -Int_t Int$random(Int_t min, Int_t max); PUREFUNC Range_t Int$to(Int_t from, Int_t to); OptionalInt_t Int$from_str(const char *str); OptionalInt_t Int$from_text(Text_t text); @@ -127,7 +123,7 @@ Int_t Int$sqrt(Int_t i); } while (0) #define I(i) ((int64_t)(i) == (int32_t)(i) ? ((Int_t){.small=(int64_t)((uint64_t)(i)<<2)|1}) : Int64_to_Int(i)) -#define I_small(i) ((Int_t){.small=((uint64_t)(i)<<2)|1}) +#define I_small(i) ((Int_t){.small=(int64_t)((uint64_t)(i)<<2)|1}) #define I_is_zero(i) ((i).small == 1) Int_t Int$slow_plus(Int_t x, Int_t y); diff --git a/stdlib/nums.c b/stdlib/nums.c index 1a8fec2..b8de553 100644 --- a/stdlib/nums.c +++ b/stdlib/nums.c @@ -57,10 +57,6 @@ public CONSTFUNC double Num$mod(double num, double modulus) { return (result < 0) != (modulus < 0) ? result + modulus : result; } -public double Num$random(void) { - return drand48(); -} - public CONSTFUNC double Num$mix(double amount, double x, double y) { return (1.0-amount)*x + amount*y; } @@ -138,10 +134,6 @@ public CONSTFUNC float Num32$mod(float num, float modulus) { return (result < 0) != (modulus < 0) ? result + modulus : result; } -public float Num32$random(void) { - return (float)drand48(); -} - public CONSTFUNC float Num32$mix(float amount, float x, float y) { return (1.0f-amount)*x + amount*y; } diff --git a/stdlib/nums.h b/stdlib/nums.h index fc6d804..a07a271 100644 --- a/stdlib/nums.h +++ b/stdlib/nums.h @@ -27,7 +27,6 @@ CONSTFUNC bool Num$isinf(double n); CONSTFUNC bool Num$finite(double n); CONSTFUNC bool Num$isnan(double n); double Num$nan(Text_t tag); -double Num$random(void); CONSTFUNC double Num$mix(double amount, double x, double y); OptionalNum_t Num$from_text(Text_t text); MACROLIKE CONSTFUNC double Num$clamped(double x, double low, double high) { @@ -45,7 +44,6 @@ float Num32$mod(float num, float modulus); CONSTFUNC bool Num32$isinf(float n); CONSTFUNC bool Num32$finite(float n); CONSTFUNC bool Num32$isnan(float n); -float Num32$random(void); CONSTFUNC float Num32$mix(float amount, float x, float y); OptionalNum32_t Num32$from_text(Text_t text); float Num32$nan(Text_t tag); diff --git a/stdlib/rng.c b/stdlib/rng.c new file mode 100644 index 0000000..c69a277 --- /dev/null +++ b/stdlib/rng.c @@ -0,0 +1,265 @@ +// Random Number Generator (RNG) implementation based on ChaCha + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "arrays.h" +#include "datatypes.h" +#include "rng.h" +#include "text.h" +#include "util.h" + +#include "chacha.h" + +public _Thread_local RNG_t default_rng; + +struct RNGState_t { + chacha_ctx chacha; + size_t unused_bytes; + uint8_t buf[16*64]; +}; + +PUREFUNC static Text_t RNG$as_text(const RNG_t *rng, bool colorize, const TypeInfo_t *type) +{ + (void)type; + if (!rng) return Text("RNG"); + return Text$format(colorize ? "\x1b[34;1mRNG(%p)\x1b[m" : "RNG(%p)", *rng); +} + +#define KEYSZ 32 +#define IVSZ 8 + +public void RNG$set_seed(RNG_t rng, Array_t seed) +{ + uint8_t seed_bytes[KEYSZ + IVSZ] = {}; + for (int64_t i = 0; i < (int64_t)sizeof(seed_bytes); i++) + seed_bytes[i] = i < seed.length ? *(uint8_t*)(seed.data + i*seed.stride) : 0; + + rng->unused_bytes = 0; + chacha_keysetup(&rng->chacha, seed_bytes, KEYSZ/8); + chacha_ivsetup(&rng->chacha, seed_bytes + KEYSZ); +} + +public RNG_t RNG$copy(RNG_t rng) +{ + RNG_t copy = GC_MALLOC_ATOMIC(sizeof(struct RNGState_t)); + *copy = *rng; + return copy; +} + +public RNG_t RNG$new(Array_t seed) +{ + RNG_t rng = GC_MALLOC_ATOMIC(sizeof(struct RNGState_t)); + RNG$set_seed(rng, seed); + return rng; +} + +static void rekey(RNG_t rng) +{ + // Fill the buffer with the keystream + chacha_encrypt_bytes(&rng->chacha, rng->buf, rng->buf, sizeof(rng->buf)); + // Immediately reinitialize for backtracking resistance + chacha_keysetup(&rng->chacha, rng->buf, KEYSZ/8); + chacha_ivsetup(&rng->chacha, rng->buf + KEYSZ); + memset(rng->buf, 0, KEYSZ + IVSZ); + rng->unused_bytes = sizeof(rng->buf) - KEYSZ - IVSZ; +} + +static void random_bytes(RNG_t rng, uint8_t *dest, size_t needed) +{ + while (needed > 0) { + if (rng->unused_bytes > 0) { + size_t to_get = MIN(needed, rng->unused_bytes); + uint8_t *keystream = rng->buf + sizeof(rng->buf) - rng->unused_bytes; + memcpy(dest, keystream, to_get); + memset(keystream, 0, to_get); + dest += to_get; + needed -= to_get; + rng->unused_bytes -= to_get; + } + if (rng->unused_bytes == 0) + rekey(rng); + } +} + +public Bool_t RNG$bool(RNG_t rng, Num_t p) +{ + if (p == 0.5) { + uint8_t b; + random_bytes(rng, &b, sizeof(b)); + return b & 1; + } else { + return RNG$num(rng, 0.0, 1.0) < p; + } +} + +public Int_t RNG$int(RNG_t rng, Int_t min, Int_t max) +{ + if (__builtin_expect(((min.small & max.small) & 1) != 0, 1)) { + int32_t r = RNG$int32(rng, (int32_t)(min.small >> 2), (int32_t)(max.small >> 2)); + return I_small(r); + } + + int32_t cmp = Int$compare_value(min, max); + if (cmp > 0) { + Text_t min_text = Int$as_text(&min, false, &Int$info), max_text = Int$as_text(&max, false, &Int$info); + fail("Random minimum value (%k) is larger than the maximum value (%k)", + &min_text, &max_text); + } + if (cmp == 0) return min; + + mpz_t range_size; + mpz_init_set_int(range_size, max); + if (min.small & 1) { + mpz_t min_mpz; + mpz_init_set_si(min_mpz, min.small >> 2); + mpz_sub(range_size, range_size, min_mpz); + } else { + mpz_sub(range_size, range_size, *min.big); + } + + gmp_randstate_t gmp_rng; + gmp_randinit_default(gmp_rng); + gmp_randseed_ui(gmp_rng, (unsigned long)RNG$int64(rng, INT64_MIN, INT64_MAX)); + + mpz_t r; + mpz_init(r); + mpz_urandomm(r, gmp_rng, range_size); + + gmp_randclear(gmp_rng); + return Int$plus(min, Int$from_mpz(r)); +} + +public Int64_t RNG$int64(RNG_t rng, Int64_t min, Int64_t max) +{ + if (min > max) fail("Random minimum value (%ld) is larger than the maximum value (%ld)", min, max); + if (min == max) return min; + if (min == INT64_MIN && max == INT64_MAX) { + int64_t r; + random_bytes(rng, (uint8_t*)&r, sizeof(r)); + return r; + } + uint64_t range = (uint64_t)max - (uint64_t)min + 1; + uint64_t min_r = -range % range; + uint64_t r; + for (;;) { + random_bytes(rng, (uint8_t*)&r, sizeof(r)); + if (r >= min_r) break; + } + return (int64_t)((uint64_t)min + (r % range)); +} + +public Int32_t RNG$int32(RNG_t rng, Int32_t min, Int32_t max) +{ + if (min > max) fail("Random minimum value (%d) is larger than the maximum value (%d)", min, max); + if (min == max) return min; + if (min == INT32_MIN && max == INT32_MAX) { + int32_t r; + random_bytes(rng, (uint8_t*)&r, sizeof(r)); + return r; + } + uint32_t range = (uint32_t)max - (uint32_t)min + 1; + uint32_t min_r = -range % range; + uint32_t r; + for (;;) { + random_bytes(rng, (uint8_t*)&r, sizeof(r)); + if (r >= min_r) break; + } + return (int32_t)((uint32_t)min + (r % range)); +} + +public Int16_t RNG$int16(RNG_t rng, Int16_t min, Int16_t max) +{ + if (min > max) fail("Random minimum value (%d) is larger than the maximum value (%d)", min, max); + if (min == max) return min; + if (min == INT16_MIN && max == INT16_MAX) { + int16_t r; + random_bytes(rng, (uint8_t*)&r, sizeof(r)); + return r; + } + uint16_t range = (uint16_t)max - (uint16_t)min + 1; + uint16_t min_r = -range % range; + uint16_t r; + for (;;) { + random_bytes(rng, (uint8_t*)&r, sizeof(r)); + if (r >= min_r) break; + } + return (int16_t)((uint16_t)min + (r % range)); +} + +public Int8_t RNG$int8(RNG_t rng, Int8_t min, Int8_t max) +{ + if (min > max) fail("Random minimum value (%d) is larger than the maximum value (%d)", min, max); + if (min == max) return min; + if (min == INT8_MIN && max == INT8_MAX) { + int8_t r; + random_bytes(rng, (uint8_t*)&r, sizeof(r)); + return r; + } + uint8_t range = (uint8_t)max - (uint8_t)min + 1; + uint8_t min_r = -range % range; + uint8_t r; + for (;;) { + random_bytes(rng, (uint8_t*)&r, sizeof(r)); + if (r >= min_r) break; + } + return (int8_t)((uint8_t)min + (r % range)); +} + +public Num_t RNG$num(RNG_t rng, Num_t min, Num_t max) +{ + if (min > max) fail("Random minimum value (%g) is larger than the maximum value (%g)", min, max); + if (min == max) return min; + + union { + Num_t num; + uint64_t bits; + } r, one = {.num=1.0}; + random_bytes(rng, (void*)&r, sizeof(r)); + + // Set r.num to 1. + r.bits &= ~(0xFFFULL << 52); + r.bits |= (one.bits & (0xFFFULL << 52)); + + r.num -= 1.0; + + if (min == 0.0 && max == 1.0) + return r.num; + + return (1.0-r.num)*min + r.num*max; +} + +public Num32_t RNG$num32(RNG_t rng, Num32_t min, Num32_t max) +{ + return (Num32_t)RNG$num(rng, (Num_t)min, (Num_t)max); +} + +public Byte_t RNG$byte(RNG_t rng) +{ + Byte_t b; + random_bytes(rng, &b, sizeof(b)); + return b; +} + +public Array_t RNG$bytes(RNG_t rng, Int_t count) +{ + int64_t n = Int_to_Int64(count, false); + Byte_t *r = GC_MALLOC_ATOMIC(sizeof(Byte_t[n])); + random_bytes(rng, r, sizeof(Byte_t[n])); + return (Array_t){.data=r, .length=n, .stride=1, .atomic=1}; +} + +public const TypeInfo_t RNG$info = { + .size=sizeof(void*), + .align=__alignof__(void*), + .tag=CustomInfo, + .CustomInfo={.as_text=(void*)RNG$as_text}, +}; + +// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0 diff --git a/stdlib/rng.h b/stdlib/rng.h new file mode 100644 index 0000000..5bc4794 --- /dev/null +++ b/stdlib/rng.h @@ -0,0 +1,31 @@ +#pragma once + +// Random Number Generator (RNG) functions/type info + +#include +#include + +#include "datatypes.h" +#include "types.h" +#include "bools.h" +#include "bytes.h" +#include "util.h" + +RNG_t RNG$new(Array_t seed); +void RNG$set_seed(RNG_t rng, Array_t seed); +RNG_t RNG$copy(RNG_t rng); +Bool_t RNG$bool(RNG_t rng, Num_t p); +Int_t RNG$int(RNG_t rng, Int_t min, Int_t max); +Int64_t RNG$int64(RNG_t rng, Int64_t min, Int64_t max); +Int32_t RNG$int32(RNG_t rng, Int32_t min, Int32_t max); +Int16_t RNG$int16(RNG_t rng, Int16_t min, Int16_t max); +Int8_t RNG$int8(RNG_t rng, Int8_t min, Int8_t max); +Byte_t RNG$byte(RNG_t rng); +Array_t RNG$bytes(RNG_t rng, Int_t count); +Num_t RNG$num(RNG_t rng, Num_t min, Num_t max); +Num32_t RNG$num32(RNG_t rng, Num32_t min, Num32_t max); + +extern const TypeInfo_t RNG$info; +extern _Thread_local RNG_t default_rng; + +// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0 diff --git a/stdlib/stdlib.c b/stdlib/stdlib.c index fb37fcf..f2c6897 100644 --- a/stdlib/stdlib.c +++ b/stdlib/stdlib.c @@ -2,6 +2,7 @@ #include #include +#include #include #include #include @@ -17,6 +18,7 @@ #include "metamethods.h" #include "patterns.h" #include "paths.h" +#include "rng.h" #include "siphash.h" #include "stdlib.h" #include "tables.h" @@ -30,14 +32,15 @@ public void tomo_init(void) GC_INIT(); USE_COLOR = getenv("COLOR") ? strcmp(getenv("COLOR"), "1") == 0 : isatty(STDOUT_FILENO); getrandom(TOMO_HASH_KEY, sizeof(TOMO_HASH_KEY), 0); - unsigned int seed; - getrandom(&seed, sizeof(seed), 0); - srand(seed); - srand48(seed); - long long_seed; - getrandom(&long_seed, sizeof(long_seed), 0); - Int$init_random(long_seed); + int rng_fd = open("/dev/urandom", O_RDONLY); + if (rng_fd < 0) + fail("Couldn't read from /dev/urandom"); + uint8_t *random_bytes = GC_MALLOC_ATOMIC(40); + if (read(rng_fd, (void*)random_bytes, 40) < 40) + fail("Couldn't read from /dev/urandom"); + Array_t rng_seed = {.length=40, .data=random_bytes, .stride=1, .atomic=1}; + default_rng = RNG$new(rng_seed); if (register_printf_specifier('k', printf_text, printf_text_size)) errx(1, "Couldn't set printf specifier"); diff --git a/stdlib/threads.c b/stdlib/threads.c index beb6771..3ae2980 100644 --- a/stdlib/threads.c +++ b/stdlib/threads.c @@ -2,6 +2,7 @@ #include #include +#include #include #include #include @@ -13,6 +14,7 @@ #include "arrays.h" #include "datatypes.h" +#include "rng.h" #include "text.h" #include "threads.h" #include "types.h" @@ -20,9 +22,10 @@ static void *run_thread(Closure_t *closure) { - long seed; - getrandom(&seed, sizeof(seed), 0); - Int$init_random(seed); + uint8_t *random_bytes = GC_MALLOC_ATOMIC(40); + getrandom(random_bytes, 40, 0); + Array_t rng_seed = {.length=40, .data=random_bytes, .stride=1, .atomic=1}; + default_rng = RNG$new(rng_seed); ((void(*)(void*))closure->fn)(closure->userdata); return NULL; } diff --git a/stdlib/tomo.h b/stdlib/tomo.h index 64a9979..8b378c0 100644 --- a/stdlib/tomo.h +++ b/stdlib/tomo.h @@ -24,6 +24,7 @@ #include "patterns.h" #include "pointers.h" #include "ranges.h" +#include "rng.h" #include "shell.h" #include "siphash.h" #include "tables.h" diff --git a/test/rng.tm b/test/rng.tm new file mode 100644 index 0000000..3285cd0 --- /dev/null +++ b/test/rng.tm @@ -0,0 +1,40 @@ +# Random Number Generator tests + +func main(): + !! Default RNG: + >> random:int64() + + >> original_rng := RNG.new([:Byte]) + >> copy := original_rng:copy() + + for rng in [original_rng, copy]: + !! RNG: $rng + >> rng:int(1, 1000) + = 921 + >> rng:int64(1, 1000) + = 324[64] + >> rng:int32(1, 1000) + = 586[32] + >> rng:int16(1, 1000) + = 453[16] + >> rng:int8(1, 100) + = 53[8] + >> rng:byte() + = 220[B] + >> rng:bytes(10) + = [160[B], 90[B], 16[B], 63[B], 108[B], 209[B], 53[B], 194[B], 135[B], 140[B]] + >> rng:bool(p=0.8) + = yes + >> rng:num() + = 0.03492503353647658 + >> rng:num32(1, 1000) + = 761.05908_f32 + + !! Random array methods: + >> nums := [10*i for i in 10] + >> nums:shuffled(rng=rng) + = [30, 50, 100, 20, 90, 10, 80, 40, 70, 60] + >> nums:random(rng=rng) + = 70 + >> nums:sample(10, weights=[1.0/i for i in nums.length], rng=rng) + = [10, 20, 10, 10, 30, 70, 10, 40, 60, 80]