Moved RNG out of the compiler and into a standalone library

This commit is contained in:
Bruce Hill 2025-04-01 16:55:24 -04:00
parent a32c374756
commit 6de2d68a70
21 changed files with 387 additions and 479 deletions

View File

@ -26,7 +26,6 @@ Information about Tomo's built-in types can be found here:
- [Integers](integers.md)
- [Languages](langs.md)
- [Paths](paths.md)
- [Random Number Generators](rng.md)
- [Sets](sets.md)
- [Structs](structs.md)
- [Tables](tables.md)

View File

@ -7,3 +7,4 @@ use shell
use colorful
use log
use pthreads
use random

View File

@ -1,9 +1,9 @@
# Random Number Generators (RNG)
Tomo comes with an `RNG` type (Random Number Generator). This type represents a
self-contained piece of data that encapsulates the state of a relatively fast
and relatively secure pseudo-random number generator. The current
implementation is based on the [ChaCha20 stream
This library provides an `RNG` type (Random Number Generator). This type
represents a self-contained piece of data that encapsulates the state of a
relatively fast and relatively secure pseudo-random number generator. The
current implementation is based on the [ChaCha20 stream
cipher,](https://en.wikipedia.org/wiki/Salsa20#ChaCha_variant) inspired by
[`arc4random` in OpenBSD.](https://man.openbsd.org/arc4random.3)
@ -16,7 +16,7 @@ a Tomo program launches.
## RNG Functions
This documentation provides details on RNG functions available in the API.
[Arrays](arrays.md) also have some methods which use RNG values:
Arrays also have some methods which use RNG values:
`array:shuffle()`, `array:shuffled()`, `array:random()`, and `array:sample()`.
- [`func bool(rng: RNG, p: Num = 0.5 -> Bool)`](#bool)
@ -25,8 +25,9 @@ This documentation provides details on RNG functions available in the API.
- [`func copy(rng: RNG -> RNG)`](#copy)
- [`func int(rng: RNG, min: Int, max: Int -> Int)`](#int`, `int64`, `int32`, `int16`, `int8)
- [`func new(seed: [Byte] = (/dev/urandom):read_bytes(40)! -> RNG)`](#new)
- [`func num(rng: RNG, min: Num = 0.0, max: Num = 1.0 -> Int)`](#num`, `num32)
- [`func set_seed(rng: RNG, seed: [Byte])`](#set_seed)
- [`func num(rng: RNG, min: Num = 0.0, max: Num = 1.0 -> Num)`](#num`, `num32)
-------------
### `bool`
Generate a random boolean value with a given probability.
@ -193,25 +194,3 @@ A floating point number uniformly chosen from the range `[min, max]`
>> random:num(1, 10)
= 9.512830439975572
```
---
### `set_seed`
Set the seed for an RNG.
```tomo
func set_seed(rng: RNG, seed: [Byte])
```
- `rng`: The random number generator to modify.
- `seed`: A new seed to re-seed the random number generator with. A seed length
of 40 bytes is recommended. Seed lengths of less than 40 bytes are padded
with zeroes.
**Returns:**
Nothing.
**Example:**
```tomo
random:set_seed((/dev/urandom):read_bytes(40))
```

View File

@ -1,3 +1,4 @@
#pragma once
/*
chacha-merged.c version 20080118
D. J. Bernstein
@ -15,6 +16,9 @@ typedef struct
u32 input[16]; /* could be compressed */
} chacha_ctx;
#define KEYSZ 32
#define IVSZ 8
#define U8C(v) (v##U)
#define U32C(v) (v##U)
@ -50,31 +54,22 @@ typedef struct
c = PLUS(c, d); b = ROTATE(XOR(b, c), 7);
static const char sigma[16] = "expand 32-byte k";
static const char tau[16] = "expand 16-byte k";
static void
chacha_keysetup(chacha_ctx *chacha, const u8 *k, u32 kbits)
chacha_keysetup(chacha_ctx *chacha, const u8 *k)
{
const char *constants;
chacha->input[0] = U8TO32_LITTLE(sigma + 0);
chacha->input[1] = U8TO32_LITTLE(sigma + 4);
chacha->input[2] = U8TO32_LITTLE(sigma + 8);
chacha->input[3] = U8TO32_LITTLE(sigma + 12);
chacha->input[4] = U8TO32_LITTLE(k + 0);
chacha->input[5] = U8TO32_LITTLE(k + 4);
chacha->input[6] = U8TO32_LITTLE(k + 8);
chacha->input[7] = U8TO32_LITTLE(k + 12);
if (kbits == 256) { /* recommended */
k += 16;
constants = sigma;
} else { /* kbits == 128 */
constants = tau;
}
chacha->input[8] = U8TO32_LITTLE(k + 0);
chacha->input[9] = U8TO32_LITTLE(k + 4);
chacha->input[10] = U8TO32_LITTLE(k + 8);
chacha->input[11] = U8TO32_LITTLE(k + 12);
chacha->input[0] = U8TO32_LITTLE(constants + 0);
chacha->input[1] = U8TO32_LITTLE(constants + 4);
chacha->input[2] = U8TO32_LITTLE(constants + 8);
chacha->input[3] = U8TO32_LITTLE(constants + 12);
chacha->input[8] = U8TO32_LITTLE(k + 16);
chacha->input[9] = U8TO32_LITTLE(k + 20);
chacha->input[10] = U8TO32_LITTLE(k + 24);
chacha->input[11] = U8TO32_LITTLE(k + 28);
}
static void

240
examples/random/random.tm Normal file
View File

@ -0,0 +1,240 @@
# Random Number Generator (RNG) implementation based on ChaCha
use ./sysrandom.h
use ./chacha.h
struct chacha_ctx(j0,j1,j2,j3,j4,j5,j6,j7,j8,j9,j10,j11,j12,j13,j14,j15:Int32; extern, secret):
func from_seed(seed=[:Byte] -> chacha_ctx):
return inline C : chacha_ctx {
chacha_ctx ctx;
uint8_t seed_bytes[KEYSZ + IVSZ] = {};
for (int64_t i = 0; i < (int64_t)sizeof(seed_bytes); i++)
seed_bytes[i] = i < _$seed.length ? *(uint8_t*)(_$seed.data + i*_$seed.stride) : 0;
chacha_keysetup(&ctx, seed_bytes);
chacha_ivsetup(&ctx, seed_bytes + KEYSZ);
ctx;
}
default_random := RandomNumberGenerator.new()
func _os_random_bytes(count:Int64 -> [Byte]):
return inline C : [Byte] {
uint8_t *random_bytes = GC_MALLOC_ATOMIC(_$count);
getrandom(random_bytes, _$count, 0);
(Array_t){.length=_$count, .data=random_bytes, .stride=1, .atomic=1};
}
struct RandomNumberGenerator(_chacha:chacha_ctx, _random_bytes=[:Byte]; secret):
func new(seed=none:[Byte], -> @RandomNumberGenerator):
ctx := chacha_ctx.from_seed(seed or _os_random_bytes(40))
return @RandomNumberGenerator(ctx, [:Byte])
func _rekey(rng:&RandomNumberGenerator):
rng._random_bytes = inline C : [Byte] {
Byte_t new_keystream[KEYSZ + IVSZ] = {};
// Fill the buffer with the keystream
chacha_encrypt_bytes(&_$rng->_chacha, new_keystream, new_keystream, sizeof(new_keystream));
// Immediately reinitialize for backtracking resistance
chacha_keysetup(&_$rng->_chacha, new_keystream);
chacha_ivsetup(&_$rng->_chacha, new_keystream + KEYSZ);
Array_t new_bytes = (Array_t){.data=GC_MALLOC_ATOMIC(1024), .length=1024, .stride=1, .atomic=1};
memset(new_bytes.data, 0, new_bytes.length);
chacha_encrypt_bytes(&_$rng->_chacha, new_bytes.data, new_bytes.data, new_bytes.length);
new_bytes;
}
func _fill_bytes(rng:&RandomNumberGenerator, dest:&Memory, needed:Int64):
inline C {
while (_$needed > 0) {
if (_$rng->_random_bytes.length == 0)
_$random$RandomNumberGenerator$_rekey(_$rng);
assert(_$rng->_random_bytes.stride == 1);
int64_t batch_size = MIN(_$needed, _$rng->_random_bytes.length);
uint8_t *batch_src = _$rng->_random_bytes.data;
memcpy(_$dest, batch_src, batch_size);
memset(batch_src, 0, batch_size);
_$rng->_random_bytes.data += batch_size;
_$rng->_random_bytes.length -= batch_size;
_$dest += batch_size;
_$needed -= batch_size;
}
}
func bytes(rng:&RandomNumberGenerator, count:Int -> [Byte]):
return inline C : [Byte] {
int64_t count64 = Int64$from_int(_$count, false);
Array_t ret = {.data=GC_MALLOC_ATOMIC(count64), .stride=1, .atomic=1, .length=count64};
_$random$RandomNumberGenerator$_fill_bytes(_$rng, ret.data, count64);
ret;
}
func byte(rng:&RandomNumberGenerator -> Byte):
return inline C : Byte {
Byte_t b;
_$random$RandomNumberGenerator$_fill_bytes(_$rng, &b, sizeof(b));
b;
}
func bool(rng:&RandomNumberGenerator, probability=0.5 -> Bool):
if probability == 0.5:
return rng:byte() < 0x80
else:
return rng:num(0., 1.) < 0.5
func int64(rng:&RandomNumberGenerator, min=Int64.min, max=Int64.max -> Int64):
fail("Random minimum value $min is larger than the maximum value $max") if min > max
return min if min == max
if min == Int64.min and max == Int64.max:
return inline C : Int64 {
int64_t i;
_$random$RandomNumberGenerator$_fill_bytes(_$rng, &i, sizeof(i));
i;
}
return inline C : Int64 {
uint64_t range = (uint64_t)_$max - (uint64_t)_$min + 1;
uint64_t min_r = -range % range;
uint64_t r;
for (;;) {
_$random$RandomNumberGenerator$_fill_bytes(_$rng, (uint8_t*)&r, sizeof(r));
if (r >= min_r) break;
}
(int64_t)((uint64_t)_$min + (r % range));
}
func int32(rng:&RandomNumberGenerator, min=Int32.min, max=Int32.max -> Int32):
fail("Random minimum value $min is larger than the maximum value $max") if min > max
return min if min == max
if min == Int32.min and max == Int32.max:
return inline C : Int32 {
int32_t i;
_$random$RandomNumberGenerator$_fill_bytes(_$rng, &i, sizeof(i));
i;
}
return inline C : Int32 {
uint32_t range = (uint32_t)_$max - (uint32_t)_$min + 1;
uint32_t min_r = -range % range;
uint32_t r;
for (;;) {
_$random$RandomNumberGenerator$_fill_bytes(_$rng, (uint8_t*)&r, sizeof(r));
if (r >= min_r) break;
}
(int32_t)((uint32_t)_$min + (r % range));
}
func int16(rng:&RandomNumberGenerator, min=Int16.min, max=Int16.max -> Int16):
fail("Random minimum value $min is larger than the maximum value $max") if min > max
return min if min == max
if min == Int16.min and max == Int16.max:
return inline C : Int16 {
int16_t i;
_$random$RandomNumberGenerator$_fill_bytes(_$rng, &i, sizeof(i));
i;
}
return inline C : Int16 {
uint16_t range = (uint16_t)_$max - (uint16_t)_$min + 1;
uint16_t min_r = -range % range;
uint16_t r;
for (;;) {
_$random$RandomNumberGenerator$_fill_bytes(_$rng, (uint8_t*)&r, sizeof(r));
if (r >= min_r) break;
}
(int16_t)((uint16_t)_$min + (r % range));
}
func int8(rng:&RandomNumberGenerator, min=Int8.min, max=Int8.max -> Int8):
fail("Random minimum value $min is larger than the maximum value $max") if min > max
return min if min == max
if min == Int8.min and max == Int8.max:
return inline C : Int8 {
int8_t i;
_$random$RandomNumberGenerator$_fill_bytes(_$rng, &i, sizeof(i));
i;
}
return inline C : Int8 {
uint8_t range = (uint8_t)_$max - (uint8_t)_$min + 1;
uint8_t min_r = -range % range;
uint8_t r;
for (;;) {
_$random$RandomNumberGenerator$_fill_bytes(_$rng, (uint8_t*)&r, sizeof(r));
if (r >= min_r) break;
}
(int8_t)((uint8_t)_$min + (r % range));
}
func num(rng:&RandomNumberGenerator, min=0., max=1. -> Num):
return inline C : Num {
if (_$min > _$max) fail("Random minimum value (", _$min, ") is larger than the maximum value (", _$max, ")");
if (_$min == _$max) return _$min;
union {
Num_t num;
uint64_t bits;
} r = {.bits=0}, one = {.num=1.0};
_$random$RandomNumberGenerator$_fill_bytes(_$rng, (uint8_t*)&r, sizeof(r));
// Set r.num to 1.<random-bits>
r.bits &= ~(0xFFFULL << 52);
r.bits |= (one.bits & (0xFFFULL << 52));
r.num -= 1.0;
(_$min == 0.0 && _$max == 1.0) ? r.num : ((1.0-r.num)*_$min + r.num*_$max);
}
func num32(rng:&RandomNumberGenerator, min=Num32(0.), max=Num32(1.) -> Num32):
return Num32(rng:num(Num(min), Num(max)))
func int(rng:&RandomNumberGenerator, min:Int, max:Int -> Int):
return inline C : Int {
if (likely(((_$min.small & _$max.small) & 1) != 0)) {
int32_t r = _$random$RandomNumberGenerator$int32(_$rng, (int32_t)(_$min.small >> 2), (int32_t)(_$max.small >> 2));
return I_small(r);
}
int32_t cmp = Int$compare_value(_$min, _$max);
if (cmp > 0)
fail("Random minimum value (", _$min, ") is larger than the maximum value (", _$max, ")");
if (cmp == 0) return _$min;
mpz_t range_size;
mpz_init_set_int(range_size, _$max);
if (_$min.small & 1) {
mpz_t min_mpz;
mpz_init_set_si(min_mpz, _$min.small >> 2);
mpz_sub(range_size, range_size, min_mpz);
} else {
mpz_sub(range_size, range_size, *_$min.big);
}
gmp_randstate_t gmp_rng;
gmp_randinit_default(gmp_rng);
int64_t seed = _$random$RandomNumberGenerator$int64(_$rng, INT64_MIN, INT64_MAX);
gmp_randseed_ui(gmp_rng, (unsigned long)seed);
mpz_t r;
mpz_init(r);
mpz_urandomm(r, gmp_rng, range_size);
gmp_randclear(gmp_rng);
Int$plus(_$min, Int$from_mpz(r));
}
func main():
>> rng := RandomNumberGenerator.new()
>> rng:num()
>> rng:num()
>> rng:num()
>> rng:num(0, 100)
>> rng:byte()
>> rng:bytes(20)
# >> rng:int(1, 100)
# >> rng:int(1, 100)
# >> rng:int(1, 100)
# >> rng:int(1, 100)

View File

@ -0,0 +1,14 @@
#pragma once
#if defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__NetBSD__) || defined(__APPLE__)
static ssize_t getrandom(void *buf, size_t buflen, unsigned int flags) {
(void)flags;
arc4random_buf(buf, buflen);
return buflen;
}
#elif defined(__linux__)
// Use getrandom()
# include <sys/random.h>
#else
#error "Unsupported platform for secure random number generation"
#endif

View File

@ -12,6 +12,7 @@
#include "cordhelpers.h"
#include "enums.h"
#include "environment.h"
#include "parse.h"
#include "stdlib/integers.h"
#include "stdlib/nums.h"
#include "stdlib/paths.h"
@ -495,8 +496,7 @@ PUREFUNC CORD compile_unsigned_type(type_t *t)
CORD compile_type(type_t *t)
{
if (t == RNG_TYPE) return "RNG_t";
else if (t == PATH_TYPE) return "Path_t";
if (t == PATH_TYPE) return "Path_t";
else if (t == PATH_TYPE_TYPE) return "PathType_t";
switch (t->tag) {
@ -2990,7 +2990,6 @@ CORD compile(env_t *env, ast_t *ast)
type_t *item_t = Match(self_value_t, ArrayType)->item_type;
CORD padded_item_size = CORD_all("sizeof(", compile_type(item_t), ")");
ast_t *default_rng = FakeAST(InlineCCode, .code="default_rng", .type=RNG_TYPE);
if (streq(call->name, "insert")) {
EXPECT_POINTER("an", "array");
arg_t *arg_spec = new(arg_t, .name="item", .type=item_t,
@ -3015,36 +3014,40 @@ CORD compile(env_t *env, ast_t *ast)
.next=new(arg_t, .name="max_count", .type=INT_TYPE, .default_val=FakeAST(Int, .str="-1")));
return CORD_all("Array$remove_item_value(", self, ", ", compile_arguments(env, ast, arg_spec, call->args), ", ",
compile_type_info(self_value_t), ")");
} else if (streq(call->name, "random")) {
self = compile_to_pointer_depth(env, call->self, 0, false);
arg_t *arg_spec = new(arg_t, .name="rng", .type=RNG_TYPE, .default_val=default_rng);
return CORD_all("Array$random_value(", self, ", ", compile_arguments(env, ast, arg_spec, call->args), ", ", compile_type(item_t), ")");
} else if (streq(call->name, "has")) {
self = compile_to_pointer_depth(env, call->self, 0, false);
arg_t *arg_spec = new(arg_t, .name="item", .type=item_t);
return CORD_all("Array$has_value(", self, ", ", compile_arguments(env, ast, arg_spec, call->args), ", ",
compile_type_info(self_value_t), ")");
} else if (streq(call->name, "sample")) {
type_t *random_num_type = parse_type_string(env, "func(->Num)?");
ast_t *none_rng = parse_expression("none:func(->Num)");
self = compile_to_pointer_depth(env, call->self, 0, false);
arg_t *arg_spec = new(arg_t, .name="count", .type=INT_TYPE,
.next=new(arg_t, .name="weights", .type=Type(ArrayType, .item_type=Type(NumType)),
.default_val=FakeAST(None, .type=new(type_ast_t, .tag=ArrayTypeAST,
.__data.ArrayTypeAST.item=new(type_ast_t, .tag=VarTypeAST, .__data.VarTypeAST.name="Num"))),
.next=new(arg_t, .name="rng", .type=RNG_TYPE, .default_val=default_rng)));
.next=new(arg_t, .name="random", .type=random_num_type, .default_val=none_rng)));
return CORD_all("Array$sample(", self, ", ", compile_arguments(env, ast, arg_spec, call->args), ", ",
padded_item_size, ")");
} else if (streq(call->name, "shuffle")) {
type_t *random_int64_type = parse_type_string(env, "func(min,max:Int64->Int64)?");
ast_t *none_rng = parse_expression("none:func(min,max:Int64->Int64)");
EXPECT_POINTER("an", "array");
arg_t *arg_spec = new(arg_t, .name="rng", .type=RNG_TYPE, .default_val=default_rng);
arg_t *arg_spec = new(arg_t, .name="random", .type=random_int64_type, .default_val=none_rng);
return CORD_all("Array$shuffle(", self, ", ", compile_arguments(env, ast, arg_spec, call->args), ", ", padded_item_size, ")");
} else if (streq(call->name, "shuffled")) {
type_t *random_int64_type = parse_type_string(env, "func(min,max:Int64->Int64)?");
ast_t *none_rng = parse_expression("none:func(min,max:Int64->Int64)");
self = compile_to_pointer_depth(env, call->self, 0, false);
arg_t *arg_spec = new(arg_t, .name="rng", .type=RNG_TYPE, .default_val=default_rng);
arg_t *arg_spec = new(arg_t, .name="random", .type=random_int64_type, .default_val=none_rng);
return CORD_all("Array$shuffled(", self, ", ", compile_arguments(env, ast, arg_spec, call->args), ", ", padded_item_size, ")");
} else if (streq(call->name, "slice")) {
self = compile_to_pointer_depth(env, call->self, 0, true);
arg_t *arg_spec = new(arg_t, .name="first", .type=INT_TYPE, .next=new(arg_t, .name="last", .type=INT_TYPE));
return CORD_all("Array$slice(", self, ", ", compile_arguments(env, ast, arg_spec, call->args), ")");
} else if (streq(call->name, "random")) {
type_t *random_int64_type = parse_type_string(env, "func(min,max:Int64->Int64)?");
ast_t *none_rng = parse_expression("none:func(min,max:Int64->Int64)");
self = compile_to_pointer_depth(env, call->self, 0, false);
arg_t *arg_spec = new(arg_t, .name="random", .type=random_int64_type, .default_val=none_rng);
return CORD_all("Array$random_value(", self, ", ", compile_arguments(env, ast, arg_spec, call->args), ", ", compile_type(item_t), ")");
} else if (streq(call->name, "sort") || streq(call->name, "sorted")) {
if (streq(call->name, "sort"))
EXPECT_POINTER("an", "array");
@ -3755,8 +3758,7 @@ CORD compile(env_t *env, ast_t *ast)
CORD compile_type_info(type_t *t)
{
if (t == RNG_TYPE) return "&RNG$info";
else if (t == PATH_TYPE) return "&Path$info";
if (t == PATH_TYPE) return "&Path$info";
else if (t == PATH_TYPE_TYPE) return "&PathType$info";
switch (t->tag) {
@ -4036,8 +4038,10 @@ CORD compile_function(env_t *env, CORD name_code, ast_t *ast, CORD *staticdefs)
OptionalInt64_t cache_size = Int64$parse(Text$from_str(Match(cache, Int)->str));
CORD pop_code = CORD_EMPTY;
if (cache->tag == Int && !cache_size.is_none && cache_size.value > 0) {
// FIXME: this currently just deletes the first entry, but this should be more like a
// least-recently-used cache eviction policy or least-frequently-used
pop_code = CORD_all("if (cache.entries.length > ", CORD_asprintf("%ld", cache_size.value),
") Table$remove(&cache, cache.entries.data + cache.entries.stride*RNG$int64(default_rng, 0, cache.entries.length-1), table_type);\n");
") Table$remove(&cache, cache.entries.data + cache.entries.stride*0, table_type);\n");
}
if (!args->next) {

View File

@ -13,7 +13,6 @@
#include "typecheck.h"
type_t *TEXT_TYPE = NULL;
type_t *RNG_TYPE = NULL;
public type_t *PATH_TYPE = NULL;
public type_t *PATH_TYPE_TYPE = NULL;
@ -67,7 +66,6 @@ env_t *global_env(void)
(void)bind_type(env, "Memory", Type(MemoryType));
PATH_TYPE_TYPE = declare_type(env, "enum PathType(Relative, Absolute, Home)");
PATH_TYPE = declare_type(env, "struct Path(type:PathType, components:[Text])");
RNG_TYPE = declare_type(env, "struct RNG(state:@Memory)");
typedef struct {
const char *name, *code, *type_str;
@ -324,22 +322,6 @@ env_t *global_env(void)
{"write_unique", "Path$write_unique", "func(path:Path, text:Text -> Path)"},
{"write_unique_bytes", "Path$write_unique_bytes", "func(path:Path, bytes:[Byte] -> Path)"},
)},
// RNG must come after Path so we can read bytes from /dev/urandom
{"RNG", RNG_TYPE, "RNG_t", "RNG", TypedArray(ns_entry_t,
{"bool", "RNG$bool", "func(rng:RNG, p=0.5 -> Bool)"},
{"byte", "RNG$byte", "func(rng:RNG -> Byte)"},
{"bytes", "RNG$bytes", "func(rng:RNG, count:Int -> [Byte])"},
{"copy", "RNG$copy", "func(rng:RNG -> RNG)"},
{"int", "RNG$int", "func(rng:RNG, min,max:Int -> Int)"},
{"int16", "RNG$int16", "func(rng:RNG, min=Int16.min, max=Int16.max -> Int16)"},
{"int32", "RNG$int32", "func(rng:RNG, min=Int32.min, max=Int32.max -> Int32)"},
{"int64", "RNG$int64", "func(rng:RNG, min=Int64.min, max=Int64.max -> Int64)"},
{"int8", "RNG$int8", "func(rng:RNG, min=Int8.min, max=Int8.max -> Int8)"},
{"new", "RNG$new", "func(seed=(/dev/urandom):read_bytes(40)! -> RNG)"},
{"num", "RNG$num", "func(rng:RNG, min=0.0, max=1.0 -> Num)"},
{"num32", "RNG$num32", "func(rng:RNG, min=Num32(0.0), max=Num32(1.0) -> Num32)"},
{"set_seed", "RNG$set_seed", "func(rng:RNG, seed:[Byte])"},
)},
{"Text", TEXT_TYPE, "Text_t", "Text$info", TypedArray(ns_entry_t,
{"as_c_string", "Text$as_c_string", "func(text:Text -> CString)"},
{"at", "Text$cluster", "func(text:Text, index:Int -> Text)"},
@ -512,7 +494,6 @@ env_t *global_env(void)
{"Path$escape_path", "func(path:Path -> Path)"},
{"Int$value_as_text", "func(i:Int -> Path)"});
ADD_CONSTRUCTORS("CString", {"Text$as_c_string", "func(text:Text -> CString)"});
ADD_CONSTRUCTORS("RNG", {"RNG$new", "func(-> RNG)"});
#undef ADD_CONSTRUCTORS
set_binding(namespace_env(env, "Path"), "from_text",
@ -524,7 +505,6 @@ env_t *global_env(void)
const char *name, *code, *type_str;
} global_vars[] = {
{"USE_COLOR", "USE_COLOR", "Bool"},
{"random", "default_rng", "RNG"},
{"say", "say", "func(text:Text, newline=yes)"},
{"print", "say", "func(text:Text, newline=yes)"},
{"ask", "ask", "func(prompt:Text, bold=yes, force_tty=yes -> Text?)"},

View File

@ -89,7 +89,6 @@ void set_binding(env_t *env, const char *name, type_t *type, CORD code);
binding_t *get_namespace_binding(env_t *env, ast_t *self, const char *name);
#define code_err(ast, ...) compiler_err((ast)->file, (ast)->start, (ast)->end, __VA_ARGS__)
extern type_t *TEXT_TYPE;
extern type_t *RNG_TYPE;
extern type_t *PATH_TYPE;
extern type_t *PATH_TYPE_TYPE;

View File

@ -2442,8 +2442,9 @@ ast_t *parse_file(const char *path, jmp_buf *on_err) {
// If cache is getting too big, evict a random entry:
if (cached.entries.length > PARSE_CACHE_SIZE) {
uint32_t i = arc4random_uniform(cached.entries.length);
struct {const char *path; ast_t *ast; } *to_remove = Table$entry(cached, i+1);
// FIXME: this currently evicts the first entry, but it should be more like
// an LRU cache
struct {const char *path; ast_t *ast; } *to_remove = Table$entry(cached, 1);
Table$str_remove(&cached, to_remove->path);
}
@ -2484,7 +2485,24 @@ ast_t *parse(const char *str) {
pos = ast->end;
whitespace(&pos);
if (pos < file->text + file->len && *pos != '\0')
parser_err(&ctx, pos, pos + strlen(pos), "I couldn't parse this part of the file");
parser_err(&ctx, pos, pos + strlen(pos), "I couldn't parse this part of the string");
return ast;
}
ast_t *parse_expression(const char *str) {
file_t *file = spoof_file("<string>", str);
parse_ctx_t ctx = {
.file=file,
.on_err=NULL,
};
const char *pos = file->text;
whitespace(&pos);
ast_t *ast = parse_extended_expr(&ctx, pos);
pos = ast->end;
whitespace(&pos);
if (pos < file->text + file->len && *pos != '\0')
parser_err(&ctx, pos, pos + strlen(pos), "I couldn't parse this part of the string");
return ast;
}

View File

@ -9,5 +9,6 @@
type_ast_t *parse_type_str(const char *str);
ast_t *parse_file(const char *path, jmp_buf *on_err);
ast_t *parse(const char *str);
ast_t *parse_expression(const char *str);
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0

View File

@ -10,7 +10,6 @@
#include "math.h"
#include "metamethods.h"
#include "optionals.h"
#include "rng.h"
#include "tables.h"
#include "text.h"
#include "util.h"
@ -271,33 +270,63 @@ public Array_t Array$sorted(Array_t arr, Closure_t comparison, int64_t padded_it
return arr;
}
public void Array$shuffle(Array_t *arr, RNG_t rng, int64_t padded_item_size)
#if defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__NetBSD__) || defined(__APPLE__)
static ssize_t getrandom(void *buf, size_t buflen, unsigned int flags) {
(void)flags;
arc4random_buf(buf, buflen);
return buflen;
}
#elif defined(__linux__)
// Use getrandom()
# include <sys/random.h>
#else
#error "Unsupported platform for secure random number generation"
#endif
static int64_t _default_random_int64(int64_t min, int64_t max, void *userdata)
{
(void)userdata;
if (min > max) fail("Random minimum value (", min, ") is larger than the maximum value (", max, ")");
if (min == max) return min;
uint64_t range = (uint64_t)max - (uint64_t)min + 1;
uint64_t min_r = -range % range;
uint64_t r;
for (;;) {
getrandom(&r, sizeof(r), 0);
if (r >= min_r) break;
}
return (int64_t)((uint64_t)min + (r % range));
}
public void Array$shuffle(Array_t *arr, OptionalClosure_t random_int64, int64_t padded_item_size)
{
if (arr->data_refcount != 0 || (int64_t)arr->stride != padded_item_size)
Array$compact(arr, padded_item_size);
int64_t (*rng_fn)(int64_t, int64_t, void*) = random_int64.fn ? random_int64.fn : _default_random_int64;
char tmp[padded_item_size];
for (int64_t i = arr->length-1; i > 1; i--) {
int64_t j = RNG$int64(rng, 0, i);
int64_t j = rng_fn(0, i, random_int64.userdata);
memcpy(tmp, arr->data + i*padded_item_size, (size_t)padded_item_size);
memcpy((void*)arr->data + i*padded_item_size, arr->data + j*padded_item_size, (size_t)padded_item_size);
memcpy((void*)arr->data + j*padded_item_size, tmp, (size_t)padded_item_size);
}
}
public Array_t Array$shuffled(Array_t arr, RNG_t rng, int64_t padded_item_size)
public Array_t Array$shuffled(Array_t arr, Closure_t random_int64, int64_t padded_item_size)
{
Array$compact(&arr, padded_item_size);
Array$shuffle(&arr, rng, padded_item_size);
Array$shuffle(&arr, random_int64, padded_item_size);
return arr;
}
public void *Array$random(Array_t arr, RNG_t rng)
public void *Array$random(Array_t arr, OptionalClosure_t random_int64)
{
if (arr.length == 0)
return NULL; // fail("Cannot get a random item from an empty array!");
int64_t index = RNG$int64(rng, 0, arr.length-1);
int64_t (*rng_fn)(int64_t, int64_t, void*) = random_int64.fn;
int64_t index = rng_fn(0, arr.length-1, random_int64.userdata);
return arr.data + arr.stride*index;
}
@ -314,7 +343,22 @@ public Table_t Array$counts(Array_t arr, const TypeInfo_t *type)
return counts;
}
public Array_t Array$sample(Array_t arr, Int_t int_n, Array_t weights, RNG_t rng, int64_t padded_item_size)
static double _default_random_num(void *userdata)
{
(void)userdata;
union {
Num_t num;
uint64_t bits;
} r = {.bits=0}, one = {.num=1.0};
getrandom((uint8_t*)&r, sizeof(r), 0);
// Set r.num to 1.<random-bits>
r.bits &= ~(0xFFFULL << 52);
r.bits |= (one.bits & (0xFFFULL << 52));
return r.num - 1.0;
}
public Array_t Array$sample(Array_t arr, Int_t int_n, Array_t weights, OptionalClosure_t random_num, int64_t padded_item_size)
{
int64_t n = Int64$from_int(int_n, false);
if (n < 0)
@ -331,14 +375,6 @@ public Array_t Array$sample(Array_t arr, Int_t int_n, Array_t weights, RNG_t rng
.length=n,
.stride=padded_item_size, .atomic=arr.atomic};
if (weights.length < 0) {
for (int64_t i = 0; i < n; i++) {
int64_t index = RNG$int64(rng, 0, arr.length-1);
memcpy(selected.data + i*padded_item_size, arr.data + arr.stride*index, (size_t)padded_item_size);
}
return selected;
}
if (weights.length != arr.length)
fail("Array has ", (int64_t)arr.length, " elements, but there are ", (int64_t)weights.length, " weights given");
@ -396,8 +432,9 @@ public Array_t Array$sample(Array_t arr, Int_t int_n, Array_t weights, RNG_t rng
if (aliases[i].alias == -1)
aliases[i].alias = i;
double (*rng_fn)(void*) = random_num.fn ? random_num.fn : _default_random_num;
for (int64_t i = 0; i < n; i++) {
double r = RNG$num(rng, 0, arr.length);
double r = (double)arr.length * rng_fn(random_num.userdata);
int64_t index = (int64_t)r;
if ((r - (double)index) > aliases[index].odds)
index = aliases[index].alias;

View File

@ -82,11 +82,11 @@ OptionalInt_t Array$find(Array_t arr, void *item, const TypeInfo_t *type);
OptionalInt_t Array$first(Array_t arr, Closure_t predicate);
void Array$sort(Array_t *arr, Closure_t comparison, int64_t padded_item_size);
Array_t Array$sorted(Array_t arr, Closure_t comparison, int64_t padded_item_size);
void Array$shuffle(Array_t *arr, RNG_t rng, int64_t padded_item_size);
Array_t Array$shuffled(Array_t arr, RNG_t rng, int64_t padded_item_size);
void *Array$random(Array_t arr, RNG_t rng);
#define Array$random_value(arr, rng, t) ({ Array_t _arr = arr; if (_arr.length == 0) fail("Cannot get a random value from an empty array!"); *(t*)Array$random(_arr, rng); })
Array_t Array$sample(Array_t arr, Int_t n, Array_t weights, RNG_t rng, int64_t padded_item_size);
void Array$shuffle(Array_t *arr, OptionalClosure_t random_int64, int64_t padded_item_size);
Array_t Array$shuffled(Array_t arr, OptionalClosure_t random_int64, int64_t padded_item_size);
void *Array$random(Array_t arr, OptionalClosure_t random_int64);
#define Array$random_value(arr, random_int64, t) ({ Array_t _arr = arr; if (_arr.length == 0) fail("Cannot get a random value from an empty array!"); *(t*)Array$random(_arr, random_int64); })
Array_t Array$sample(Array_t arr, Int_t n, Array_t weights, Closure_t random_num, int64_t padded_item_size);
Table_t Array$counts(Array_t arr, const TypeInfo_t *type);
void Array$clear(Array_t *array);
void Array$compact(Array_t *arr, int64_t padded_item_size);

View File

@ -105,8 +105,6 @@ typedef struct {
} Path_t;
#define OptionalPath_t Path_t
typedef struct RNGState_t* RNG_t;
#define OptionalBool_t uint8_t
#define OptionalArray_t Array_t
#define OptionalTable_t Table_t

View File

@ -1,274 +0,0 @@
// Random Number Generator (RNG) implementation based on ChaCha
#include <ctype.h>
#include <err.h>
#include <gc.h>
#include <gmp.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <sys/param.h>
#include "arrays.h"
#include "datatypes.h"
#include "rng.h"
#include "text.h"
#include "util.h"
#include "chacha.h"
struct RNGState_t {
chacha_ctx chacha;
size_t unused_bytes;
uint8_t random_bytes[1024];
};
#ifdef __TINYC__
// TinyCC doesn't implement _Thread_local
public RNG_t default_rng = (struct RNGState_t[1]){};
#else
public _Thread_local RNG_t default_rng = (struct RNGState_t[1]){};
#endif
PUREFUNC static Text_t RNG$as_text(const void *rng, bool colorize, const TypeInfo_t*)
{
if (!rng) return Text("RNG");
return Text$format(colorize ? "\x1b[34;1mRNG(%p)\x1b[m" : "RNG(%p)", *(RNG_t**)rng);
}
#define KEYSZ 32
#define IVSZ 8
public void RNG$set_seed(RNG_t rng, Array_t seed)
{
uint8_t seed_bytes[KEYSZ + IVSZ] = {};
for (int64_t i = 0; i < (int64_t)sizeof(seed_bytes); i++)
seed_bytes[i] = i < seed.length ? *(uint8_t*)(seed.data + i*seed.stride) : 0;
rng->unused_bytes = 0;
chacha_keysetup(&rng->chacha, seed_bytes, KEYSZ/8);
chacha_ivsetup(&rng->chacha, seed_bytes + KEYSZ);
}
public RNG_t RNG$copy(RNG_t rng)
{
RNG_t copy = GC_MALLOC_ATOMIC(sizeof(struct RNGState_t));
*copy = *rng;
return copy;
}
public RNG_t RNG$new(Array_t seed)
{
RNG_t rng = GC_MALLOC_ATOMIC(sizeof(struct RNGState_t));
RNG$set_seed(rng, seed);
return rng;
}
static void rekey(RNG_t rng)
{
// Fill the buffer with the keystream
chacha_encrypt_bytes(&rng->chacha, rng->random_bytes, rng->random_bytes, sizeof(rng->random_bytes));
// Immediately reinitialize for backtracking resistance
chacha_keysetup(&rng->chacha, rng->random_bytes, KEYSZ/8);
chacha_ivsetup(&rng->chacha, rng->random_bytes + KEYSZ);
#ifdef __APPLE__
bzero(rng->random_bytes, KEYSZ + IVSZ);
#else
explicit_bzero(rng->random_bytes, KEYSZ + IVSZ);
#endif
rng->unused_bytes = sizeof(rng->random_bytes) - KEYSZ - IVSZ;
assert(rng->unused_bytes <= sizeof(rng->random_bytes));
}
static void random_bytes(RNG_t rng, uint8_t *dest, size_t needed)
{
while (needed > 0) {
assert(rng->unused_bytes <= sizeof(rng->random_bytes));
if (rng->unused_bytes == 0)
rekey(rng);
size_t batch_size = MIN(needed, rng->unused_bytes);
uint8_t *batch_src = rng->random_bytes + sizeof(rng->random_bytes) - rng->unused_bytes;
memcpy(dest, batch_src, batch_size);
memset(batch_src, 0, batch_size);
rng->unused_bytes -= batch_size;
dest += batch_size;
needed -= batch_size;
assert(rng->unused_bytes <= sizeof(rng->random_bytes));
}
}
public Bool_t RNG$bool(RNG_t rng, Num_t p)
{
if (p == 0.5) {
uint8_t b;
random_bytes(rng, &b, sizeof(b));
return b & 1;
} else {
return RNG$num(rng, 0.0, 1.0) < p;
}
}
public Int_t RNG$int(RNG_t rng, Int_t min, Int_t max)
{
if (likely(((min.small & max.small) & 1) != 0)) {
int32_t r = RNG$int32(rng, (int32_t)(min.small >> 2), (int32_t)(max.small >> 2));
return I_small(r);
}
int32_t cmp = Int$compare_value(min, max);
if (cmp > 0)
fail("Random minimum value (", min, ") is larger than the maximum value (", max, ")");
if (cmp == 0) return min;
mpz_t range_size;
mpz_init_set_int(range_size, max);
if (min.small & 1) {
mpz_t min_mpz;
mpz_init_set_si(min_mpz, min.small >> 2);
mpz_sub(range_size, range_size, min_mpz);
} else {
mpz_sub(range_size, range_size, *min.big);
}
gmp_randstate_t gmp_rng;
gmp_randinit_default(gmp_rng);
gmp_randseed_ui(gmp_rng, (unsigned long)RNG$int64(rng, INT64_MIN, INT64_MAX));
mpz_t r;
mpz_init(r);
mpz_urandomm(r, gmp_rng, range_size);
gmp_randclear(gmp_rng);
return Int$plus(min, Int$from_mpz(r));
}
public Int64_t RNG$int64(RNG_t rng, Int64_t min, Int64_t max)
{
if (min > max) fail("Random minimum value (", min, ") is larger than the maximum value (", max, ")");
if (min == max) return min;
if (min == INT64_MIN && max == INT64_MAX) {
int64_t r;
random_bytes(rng, (uint8_t*)&r, sizeof(r));
return r;
}
uint64_t range = (uint64_t)max - (uint64_t)min + 1;
uint64_t min_r = -range % range;
uint64_t r;
for (;;) {
random_bytes(rng, (uint8_t*)&r, sizeof(r));
if (r >= min_r) break;
}
return (int64_t)((uint64_t)min + (r % range));
}
public Int32_t RNG$int32(RNG_t rng, Int32_t min, Int32_t max)
{
if (min > max) fail("Random minimum value (", min, ") is larger than the maximum value (", max, ")");
if (min == max) return min;
if (min == INT32_MIN && max == INT32_MAX) {
int32_t r;
random_bytes(rng, (uint8_t*)&r, sizeof(r));
return r;
}
uint32_t range = (uint32_t)max - (uint32_t)min + 1;
uint32_t min_r = -range % range;
uint32_t r;
for (;;) {
random_bytes(rng, (uint8_t*)&r, sizeof(r));
if (r >= min_r) break;
}
return (int32_t)((uint32_t)min + (r % range));
}
public Int16_t RNG$int16(RNG_t rng, Int16_t min, Int16_t max)
{
if (min > max) fail("Random minimum value (", min, ") is larger than the maximum value (", max, ")");
if (min == max) return min;
if (min == INT16_MIN && max == INT16_MAX) {
int16_t r;
random_bytes(rng, (uint8_t*)&r, sizeof(r));
return r;
}
uint16_t range = (uint16_t)max - (uint16_t)min + 1;
uint16_t min_r = -range % range;
uint16_t r;
for (;;) {
random_bytes(rng, (uint8_t*)&r, sizeof(r));
if (r >= min_r) break;
}
return (int16_t)((uint16_t)min + (r % range));
}
public Int8_t RNG$int8(RNG_t rng, Int8_t min, Int8_t max)
{
if (min > max) fail("Random minimum value (", min, ") is larger than the maximum value (", max, ")");
if (min == max) return min;
if (min == INT8_MIN && max == INT8_MAX) {
int8_t r;
random_bytes(rng, (uint8_t*)&r, sizeof(r));
return r;
}
uint8_t range = (uint8_t)max - (uint8_t)min + 1;
uint8_t min_r = -range % range;
uint8_t r;
for (;;) {
random_bytes(rng, (uint8_t*)&r, sizeof(r));
if (r >= min_r) break;
}
return (int8_t)((uint8_t)min + (r % range));
}
public Num_t RNG$num(RNG_t rng, Num_t min, Num_t max)
{
if (min > max) fail("Random minimum value (", min, ") is larger than the maximum value (", max, ")");
if (min == max) return min;
union {
Num_t num;
uint64_t bits;
} r = {.bits=0}, one = {.num=1.0};
random_bytes(rng, (void*)&r, sizeof(r));
// Set r.num to 1.<random-bits>
r.bits &= ~(0xFFFULL << 52);
r.bits |= (one.bits & (0xFFFULL << 52));
r.num -= 1.0;
if (min == 0.0 && max == 1.0)
return r.num;
return (1.0-r.num)*min + r.num*max;
}
public Num32_t RNG$num32(RNG_t rng, Num32_t min, Num32_t max)
{
return (Num32_t)RNG$num(rng, (Num_t)min, (Num_t)max);
}
public Byte_t RNG$byte(RNG_t rng)
{
Byte_t b;
random_bytes(rng, &b, sizeof(b));
return b;
}
public Array_t RNG$bytes(RNG_t rng, Int_t count)
{
int64_t n = Int64$from_int(count, false);
Byte_t *r = GC_MALLOC_ATOMIC(sizeof(Byte_t[n]));
random_bytes(rng, r, sizeof(Byte_t[n]));
return (Array_t){.data=r, .length=n, .stride=1, .atomic=1};
}
public const TypeInfo_t RNG$info = {
.size=sizeof(void*),
.align=__alignof__(void*),
.metamethods={
.as_text=RNG$as_text,
},
};
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0

View File

@ -1,36 +0,0 @@
#pragma once
// Random Number Generator (RNG) functions/type info
#include <stdbool.h>
#include <stdint.h>
#include "datatypes.h"
#include "types.h"
#include "bools.h"
#include "bytes.h"
#include "util.h"
RNG_t RNG$new(Array_t seed);
void RNG$set_seed(RNG_t rng, Array_t seed);
RNG_t RNG$copy(RNG_t rng);
Bool_t RNG$bool(RNG_t rng, Num_t p);
Int_t RNG$int(RNG_t rng, Int_t min, Int_t max);
Int64_t RNG$int64(RNG_t rng, Int64_t min, Int64_t max);
Int32_t RNG$int32(RNG_t rng, Int32_t min, Int32_t max);
Int16_t RNG$int16(RNG_t rng, Int16_t min, Int16_t max);
Int8_t RNG$int8(RNG_t rng, Int8_t min, Int8_t max);
Byte_t RNG$byte(RNG_t rng);
Array_t RNG$bytes(RNG_t rng, Int_t count);
Num_t RNG$num(RNG_t rng, Num_t min, Num_t max);
Num32_t RNG$num32(RNG_t rng, Num32_t min, Num32_t max);
extern const TypeInfo_t RNG$info;
// TinyCC doesn't implement _Thread_local
#ifdef __TINYC__
extern RNG_t default_rng;
#else
extern _Thread_local RNG_t default_rng;
#endif
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0

View File

@ -21,7 +21,6 @@
#include "metamethods.h"
#include "nums.h"
#include "paths.h"
#include "rng.h"
#include "siphash.h"
#include "stdlib.h"
#include "tables.h"
@ -65,11 +64,6 @@ public void tomo_init(void)
setlocale(LC_ALL, "");
getrandom(TOMO_HASH_KEY, sizeof(TOMO_HASH_KEY), 0);
uint8_t *random_bytes[40] = {};
getrandom(random_bytes, sizeof(random_bytes), 0);
Array_t rng_seed = {.length=sizeof(random_bytes), .data=random_bytes, .stride=1, .atomic=1};
RNG$set_seed(default_rng, rng_seed);
struct sigaction sigact;
sigact.sa_sigaction = signal_handler;
sigemptyset(&sigact.sa_mask);

View File

@ -22,7 +22,6 @@
#include "paths.h"
#include "pointers.h"
#include "print.h"
#include "rng.h"
#include "siphash.h"
#include "structs.h"
#include "tables.h"

View File

@ -101,7 +101,7 @@ func main():
>> ["A", "B", "C"]:sample(10, [1.0, 0.5, 0.0])
do:
>> heap := @[random:int(1, 50) for _ in 10]
>> heap := @[(i * 1337) mod 37 for i in 10]
>> heap:heapify()
>> heap
sorted := @[:Int]
@ -109,8 +109,8 @@ func main():
sorted:insert(heap:heap_pop() or stop)
>> sorted == sorted:sorted()
= yes
for _ in 10:
heap:heap_push(random:int(1, 50))
for i in 10:
heap:heap_push((i*13337) mod 37)
>> heap
sorted = @[:Int]
repeat:

View File

@ -85,12 +85,12 @@ func main():
= 10000000000000000000000
do:
for in 20:
>> n := random:int(-999999, 999999)
>> d := random:int(-999, 999)
!! n=$n, d=$d:
>> (n/d)*d + (n mod d) == n
= yes
interesting_numerators := [-999999, -100, -23, -1, 0, 1, 23, 100, 999999]
interesting_denominators := [-99, -20, -17, -1, 1, 17, 20, 99]
for n in interesting_numerators:
for d in interesting_denominators:
>> (n/d)*d + (n mod d) == n
= yes
>> 0:next_prime()
= 2

View File

@ -1,40 +0,0 @@
# Random Number Generator tests
func main():
!! Default RNG:
>> random:int64()
>> original_rng := RNG.new([:Byte])
>> copy := original_rng:copy()
for rng in [original_rng, copy]:
!! RNG: $rng
>> rng:int(1, 1000)
= 921
>> rng:int64(1, 1000)
= Int64(324)
>> rng:int32(1, 1000)
= Int32(586)
>> rng:int16(1, 1000)
= Int16(453)
>> rng:int8(1, 100)
= Int8(53)
>> rng:byte()
= Byte(0xDC)
>> rng:bytes(10)
= [:Byte, 0xA0, 0x5A, 0x10, 0x3F, 0x6C, 0xD1, 0x35, 0xC2, 0x87, 0x8C]
>> rng:bool(p=0.8)
= yes
>> rng:num()
= 0.03492503353647658
>> rng:num32(1, 1000)
= Num32(761.05908)
!! Random array methods:
>> nums := [10*i for i in 10]
>> nums:shuffled(rng=rng)
= [30, 50, 100, 20, 90, 10, 80, 40, 70, 60]
>> nums:random(rng=rng)
= 70
>> nums:sample(10, weights=[1.0/Num(i) for i in nums.length], rng=rng)
= [10, 20, 10, 10, 30, 70, 10, 40, 60, 80]