Minor cleanups and fixes to RNG, chacha, and text

This commit is contained in:
Bruce Hill 2024-12-17 14:24:37 -05:00
parent d3eb8b8339
commit aa26234471
3 changed files with 106 additions and 131 deletions

View File

@ -38,56 +38,56 @@ typedef struct
(p)[3] = U8V((v) >> 24); \
} while (0)
#define ROTATE(v,c) (ROTL32(v,c))
#define XOR(v,w) ((v) ^ (w))
#define PLUS(v,w) (U32V((v) + (w)))
#define PLUSONE(v) (PLUS((v),1))
#define ROTATE(v, c) (ROTL32(v, c))
#define XOR(v, w) ((v) ^ (w))
#define PLUS(v, w) (U32V((v) + (w)))
#define PLUSONE(v) (PLUS((v), 1))
#define QUARTERROUND(a,b,c,d) \
a = PLUS(a,b); d = ROTATE(XOR(d,a),16); \
c = PLUS(c,d); b = ROTATE(XOR(b,c),12); \
a = PLUS(a,b); d = ROTATE(XOR(d,a), 8); \
c = PLUS(c,d); b = ROTATE(XOR(b,c), 7);
#define QUARTERROUND(a, b, c, d) \
a = PLUS(a, b); d = ROTATE(XOR(d, a), 16); \
c = PLUS(c, d); b = ROTATE(XOR(b, c), 12); \
a = PLUS(a, b); d = ROTATE(XOR(d, a), 8); \
c = PLUS(c, d); b = ROTATE(XOR(b, c), 7);
static const char sigma[16] = "expand 32-byte k";
static const char tau[16] = "expand 16-byte k";
static void
chacha_keysetup(chacha_ctx *x,const u8 *k,u32 kbits)
chacha_keysetup(chacha_ctx *chacha, const u8 *k, u32 kbits)
{
const char *constants;
x->input[4] = U8TO32_LITTLE(k + 0);
x->input[5] = U8TO32_LITTLE(k + 4);
x->input[6] = U8TO32_LITTLE(k + 8);
x->input[7] = U8TO32_LITTLE(k + 12);
chacha->input[4] = U8TO32_LITTLE(k + 0);
chacha->input[5] = U8TO32_LITTLE(k + 4);
chacha->input[6] = U8TO32_LITTLE(k + 8);
chacha->input[7] = U8TO32_LITTLE(k + 12);
if (kbits == 256) { /* recommended */
k += 16;
constants = sigma;
} else { /* kbits == 128 */
constants = tau;
}
x->input[8] = U8TO32_LITTLE(k + 0);
x->input[9] = U8TO32_LITTLE(k + 4);
x->input[10] = U8TO32_LITTLE(k + 8);
x->input[11] = U8TO32_LITTLE(k + 12);
x->input[0] = U8TO32_LITTLE(constants + 0);
x->input[1] = U8TO32_LITTLE(constants + 4);
x->input[2] = U8TO32_LITTLE(constants + 8);
x->input[3] = U8TO32_LITTLE(constants + 12);
chacha->input[8] = U8TO32_LITTLE(k + 0);
chacha->input[9] = U8TO32_LITTLE(k + 4);
chacha->input[10] = U8TO32_LITTLE(k + 8);
chacha->input[11] = U8TO32_LITTLE(k + 12);
chacha->input[0] = U8TO32_LITTLE(constants + 0);
chacha->input[1] = U8TO32_LITTLE(constants + 4);
chacha->input[2] = U8TO32_LITTLE(constants + 8);
chacha->input[3] = U8TO32_LITTLE(constants + 12);
}
static void
chacha_ivsetup(chacha_ctx *x,const u8 *iv)
chacha_ivsetup(chacha_ctx *chacha, const u8 *iv)
{
x->input[12] = 0;
x->input[13] = 0;
x->input[14] = U8TO32_LITTLE(iv + 0);
x->input[15] = U8TO32_LITTLE(iv + 4);
chacha->input[12] = 0;
chacha->input[13] = 0;
chacha->input[14] = U8TO32_LITTLE(iv + 0);
chacha->input[15] = U8TO32_LITTLE(iv + 4);
}
static void
chacha_encrypt_bytes(chacha_ctx *x,const u8 *m,u8 *c,u32 bytes)
chacha_encrypt_bytes(chacha_ctx *chacha, const u8 *m, u8 *c, u32 bytes)
{
u32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15;
u32 j0, j1, j2, j3, j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15;
@ -97,22 +97,22 @@ chacha_encrypt_bytes(chacha_ctx *x,const u8 *m,u8 *c,u32 bytes)
if (!bytes) return;
j0 = x->input[0];
j1 = x->input[1];
j2 = x->input[2];
j3 = x->input[3];
j4 = x->input[4];
j5 = x->input[5];
j6 = x->input[6];
j7 = x->input[7];
j8 = x->input[8];
j9 = x->input[9];
j10 = x->input[10];
j11 = x->input[11];
j12 = x->input[12];
j13 = x->input[13];
j14 = x->input[14];
j15 = x->input[15];
j0 = chacha->input[0];
j1 = chacha->input[1];
j2 = chacha->input[2];
j3 = chacha->input[3];
j4 = chacha->input[4];
j5 = chacha->input[5];
j6 = chacha->input[6];
j7 = chacha->input[7];
j8 = chacha->input[8];
j9 = chacha->input[9];
j10 = chacha->input[10];
j11 = chacha->input[11];
j12 = chacha->input[12];
j13 = chacha->input[13];
j14 = chacha->input[14];
j15 = chacha->input[15];
for (;;) {
if (bytes < 64) {
@ -138,50 +138,31 @@ chacha_encrypt_bytes(chacha_ctx *x,const u8 *m,u8 *c,u32 bytes)
x14 = j14;
x15 = j15;
for (i = 20;i > 0;i -= 2) {
QUARTERROUND( x0, x4, x8,x12)
QUARTERROUND( x1, x5, x9,x13)
QUARTERROUND( x2, x6,x10,x14)
QUARTERROUND( x3, x7,x11,x15)
QUARTERROUND( x0, x5,x10,x15)
QUARTERROUND( x1, x6,x11,x12)
QUARTERROUND( x2, x7, x8,x13)
QUARTERROUND( x3, x4, x9,x14)
QUARTERROUND( x0, x4, x8, x12)
QUARTERROUND( x1, x5, x9, x13)
QUARTERROUND( x2, x6, x10, x14)
QUARTERROUND( x3, x7, x11, x15)
QUARTERROUND( x0, x5, x10, x15)
QUARTERROUND( x1, x6, x11, x12)
QUARTERROUND( x2, x7, x8, x13)
QUARTERROUND( x3, x4, x9, x14)
}
x0 = PLUS(x0,j0);
x1 = PLUS(x1,j1);
x2 = PLUS(x2,j2);
x3 = PLUS(x3,j3);
x4 = PLUS(x4,j4);
x5 = PLUS(x5,j5);
x6 = PLUS(x6,j6);
x7 = PLUS(x7,j7);
x8 = PLUS(x8,j8);
x9 = PLUS(x9,j9);
x10 = PLUS(x10,j10);
x11 = PLUS(x11,j11);
x12 = PLUS(x12,j12);
x13 = PLUS(x13,j13);
x14 = PLUS(x14,j14);
x15 = PLUS(x15,j15);
#ifndef KEYSTREAM_ONLY
x0 = XOR(x0,U8TO32_LITTLE(m + 0));
x1 = XOR(x1,U8TO32_LITTLE(m + 4));
x2 = XOR(x2,U8TO32_LITTLE(m + 8));
x3 = XOR(x3,U8TO32_LITTLE(m + 12));
x4 = XOR(x4,U8TO32_LITTLE(m + 16));
x5 = XOR(x5,U8TO32_LITTLE(m + 20));
x6 = XOR(x6,U8TO32_LITTLE(m + 24));
x7 = XOR(x7,U8TO32_LITTLE(m + 28));
x8 = XOR(x8,U8TO32_LITTLE(m + 32));
x9 = XOR(x9,U8TO32_LITTLE(m + 36));
x10 = XOR(x10,U8TO32_LITTLE(m + 40));
x11 = XOR(x11,U8TO32_LITTLE(m + 44));
x12 = XOR(x12,U8TO32_LITTLE(m + 48));
x13 = XOR(x13,U8TO32_LITTLE(m + 52));
x14 = XOR(x14,U8TO32_LITTLE(m + 56));
x15 = XOR(x15,U8TO32_LITTLE(m + 60));
#endif
x0 = PLUS(x0, j0);
x1 = PLUS(x1, j1);
x2 = PLUS(x2, j2);
x3 = PLUS(x3, j3);
x4 = PLUS(x4, j4);
x5 = PLUS(x5, j5);
x6 = PLUS(x6, j6);
x7 = PLUS(x7, j7);
x8 = PLUS(x8, j8);
x9 = PLUS(x9, j9);
x10 = PLUS(x10, j10);
x11 = PLUS(x11, j11);
x12 = PLUS(x12, j12);
x13 = PLUS(x13, j13);
x14 = PLUS(x14, j14);
x15 = PLUS(x15, j15);
j12 = PLUSONE(j12);
if (!j12) {
@ -189,35 +170,32 @@ chacha_encrypt_bytes(chacha_ctx *x,const u8 *m,u8 *c,u32 bytes)
/* stopping at 2^70 bytes per nonce is user's responsibility */
}
U32TO8_LITTLE(c + 0,x0);
U32TO8_LITTLE(c + 4,x1);
U32TO8_LITTLE(c + 8,x2);
U32TO8_LITTLE(c + 12,x3);
U32TO8_LITTLE(c + 16,x4);
U32TO8_LITTLE(c + 20,x5);
U32TO8_LITTLE(c + 24,x6);
U32TO8_LITTLE(c + 28,x7);
U32TO8_LITTLE(c + 32,x8);
U32TO8_LITTLE(c + 36,x9);
U32TO8_LITTLE(c + 40,x10);
U32TO8_LITTLE(c + 44,x11);
U32TO8_LITTLE(c + 48,x12);
U32TO8_LITTLE(c + 52,x13);
U32TO8_LITTLE(c + 56,x14);
U32TO8_LITTLE(c + 60,x15);
U32TO8_LITTLE(c + 0, x0);
U32TO8_LITTLE(c + 4, x1);
U32TO8_LITTLE(c + 8, x2);
U32TO8_LITTLE(c + 12, x3);
U32TO8_LITTLE(c + 16, x4);
U32TO8_LITTLE(c + 20, x5);
U32TO8_LITTLE(c + 24, x6);
U32TO8_LITTLE(c + 28, x7);
U32TO8_LITTLE(c + 32, x8);
U32TO8_LITTLE(c + 36, x9);
U32TO8_LITTLE(c + 40, x10);
U32TO8_LITTLE(c + 44, x11);
U32TO8_LITTLE(c + 48, x12);
U32TO8_LITTLE(c + 52, x13);
U32TO8_LITTLE(c + 56, x14);
U32TO8_LITTLE(c + 60, x15);
if (bytes <= 64) {
if (bytes < 64) {
for (i = 0;i < bytes;++i) ctarget[i] = c[i];
}
x->input[12] = j12;
x->input[13] = j13;
chacha->input[12] = j12;
chacha->input[13] = j13;
return;
}
bytes -= 64;
c += 64;
#ifndef KEYSTREAM_ONLY
m += 64;
#endif
}
}

View File

@ -76,19 +76,17 @@ static void random_bytes(RNG_t rng, uint8_t *dest, size_t needed)
{
while (needed > 0) {
assert(rng->unused_bytes <= sizeof(rng->random_bytes));
if (rng->unused_bytes == 0) {
if (rng->unused_bytes == 0)
rekey(rng);
} else {
size_t to_get = MIN(needed, rng->unused_bytes);
assert(to_get <= rng->unused_bytes);
uint8_t *keystream = rng->random_bytes + sizeof(rng->random_bytes) - rng->unused_bytes;
memcpy(dest, keystream, to_get);
memset(keystream, 0, to_get);
dest += to_get;
needed -= to_get;
rng->unused_bytes -= to_get;
assert(rng->unused_bytes <= sizeof(rng->random_bytes));
}
size_t batch_size = MIN(needed, rng->unused_bytes);
uint8_t *batch_src = rng->random_bytes + sizeof(rng->random_bytes) - rng->unused_bytes;
memcpy(dest, batch_src, batch_size);
memset(batch_src, 0, batch_size);
rng->unused_bytes -= batch_size;
dest += batch_size;
needed -= batch_size;
assert(rng->unused_bytes <= sizeof(rng->random_bytes));
}
}
@ -224,7 +222,7 @@ public Num_t RNG$num(RNG_t rng, Num_t min, Num_t max)
union {
Num_t num;
uint64_t bits;
} r, one = {.num=1.0};
} r = {.bits=0}, one = {.num=1.0};
random_bytes(rng, (void*)&r, sizeof(r));
// Set r.num to 1.<random-bits>

View File

@ -206,13 +206,13 @@ public int32_t get_synthetic_grapheme(const ucs4_t *codepoints, int64_t utf32_le
PUREFUNC static inline int64_t num_subtexts(Text_t t)
{
if (t.tag != TEXT_SUBTEXT) return 1;
int64_t len = t.length;
int64_t n = 0;
while (len > 0) {
len -= t.subtexts[n].length;
++n;
int64_t remaining = t.length;
int64_t subtexts = 0;
while (remaining > 0) {
remaining -= t.subtexts[subtexts].length;
++subtexts;
}
return n;
return subtexts;
}
int text_visualize(FILE *stream, Text_t t)
@ -402,9 +402,8 @@ public Text_t Text$_concat(int n, Text_t items[n])
if (n == 1) return items[0];
if (n == 2) return concat2(items[0], items[1]);
int64_t len = 0, subtexts = 0;
int64_t subtexts = 0;
for (int i = 0; i < n; i++) {
len += items[i].length;
if (items[i].length > 0)
subtexts += num_subtexts(items[i]);
}
@ -412,14 +411,14 @@ public Text_t Text$_concat(int n, Text_t items[n])
Text_t ret = {
.length=0,
.tag=TEXT_SUBTEXT,
.subtexts=GC_MALLOC(sizeof(Text_t[len])),
.subtexts=GC_MALLOC(sizeof(Text_t[subtexts])),
};
int64_t sub_i = 0;
for (int i = 0; i < n; i++) {
if (items[i].length == 0)
continue;
if (i > 0 && unlikely(!is_concat_stable(items[i-1], items[i]))) {
if (i > 0 && unlikely(!is_concat_stable(ret, items[i]))) {
// Oops, guess this wasn't stable for concatenation, let's break it
// up into subtasks:
return concat2(ret, Text$_concat(n-i, &items[i]));