diff --git a/stdlib/chacha.h b/stdlib/chacha.h index 1bca008..69d79ea 100644 --- a/stdlib/chacha.h +++ b/stdlib/chacha.h @@ -38,56 +38,56 @@ typedef struct (p)[3] = U8V((v) >> 24); \ } while (0) -#define ROTATE(v,c) (ROTL32(v,c)) -#define XOR(v,w) ((v) ^ (w)) -#define PLUS(v,w) (U32V((v) + (w))) -#define PLUSONE(v) (PLUS((v),1)) +#define ROTATE(v, c) (ROTL32(v, c)) +#define XOR(v, w) ((v) ^ (w)) +#define PLUS(v, w) (U32V((v) + (w))) +#define PLUSONE(v) (PLUS((v), 1)) -#define QUARTERROUND(a,b,c,d) \ - a = PLUS(a,b); d = ROTATE(XOR(d,a),16); \ - c = PLUS(c,d); b = ROTATE(XOR(b,c),12); \ - a = PLUS(a,b); d = ROTATE(XOR(d,a), 8); \ - c = PLUS(c,d); b = ROTATE(XOR(b,c), 7); +#define QUARTERROUND(a, b, c, d) \ + a = PLUS(a, b); d = ROTATE(XOR(d, a), 16); \ + c = PLUS(c, d); b = ROTATE(XOR(b, c), 12); \ + a = PLUS(a, b); d = ROTATE(XOR(d, a), 8); \ + c = PLUS(c, d); b = ROTATE(XOR(b, c), 7); static const char sigma[16] = "expand 32-byte k"; static const char tau[16] = "expand 16-byte k"; static void -chacha_keysetup(chacha_ctx *x,const u8 *k,u32 kbits) +chacha_keysetup(chacha_ctx *chacha, const u8 *k, u32 kbits) { const char *constants; - x->input[4] = U8TO32_LITTLE(k + 0); - x->input[5] = U8TO32_LITTLE(k + 4); - x->input[6] = U8TO32_LITTLE(k + 8); - x->input[7] = U8TO32_LITTLE(k + 12); + chacha->input[4] = U8TO32_LITTLE(k + 0); + chacha->input[5] = U8TO32_LITTLE(k + 4); + chacha->input[6] = U8TO32_LITTLE(k + 8); + chacha->input[7] = U8TO32_LITTLE(k + 12); if (kbits == 256) { /* recommended */ k += 16; constants = sigma; } else { /* kbits == 128 */ constants = tau; } - x->input[8] = U8TO32_LITTLE(k + 0); - x->input[9] = U8TO32_LITTLE(k + 4); - x->input[10] = U8TO32_LITTLE(k + 8); - x->input[11] = U8TO32_LITTLE(k + 12); - x->input[0] = U8TO32_LITTLE(constants + 0); - x->input[1] = U8TO32_LITTLE(constants + 4); - x->input[2] = U8TO32_LITTLE(constants + 8); - x->input[3] = U8TO32_LITTLE(constants + 12); + chacha->input[8] = U8TO32_LITTLE(k + 0); + chacha->input[9] = U8TO32_LITTLE(k + 4); + chacha->input[10] = U8TO32_LITTLE(k + 8); + chacha->input[11] = U8TO32_LITTLE(k + 12); + chacha->input[0] = U8TO32_LITTLE(constants + 0); + chacha->input[1] = U8TO32_LITTLE(constants + 4); + chacha->input[2] = U8TO32_LITTLE(constants + 8); + chacha->input[3] = U8TO32_LITTLE(constants + 12); } static void -chacha_ivsetup(chacha_ctx *x,const u8 *iv) +chacha_ivsetup(chacha_ctx *chacha, const u8 *iv) { - x->input[12] = 0; - x->input[13] = 0; - x->input[14] = U8TO32_LITTLE(iv + 0); - x->input[15] = U8TO32_LITTLE(iv + 4); + chacha->input[12] = 0; + chacha->input[13] = 0; + chacha->input[14] = U8TO32_LITTLE(iv + 0); + chacha->input[15] = U8TO32_LITTLE(iv + 4); } static void -chacha_encrypt_bytes(chacha_ctx *x,const u8 *m,u8 *c,u32 bytes) +chacha_encrypt_bytes(chacha_ctx *chacha, const u8 *m, u8 *c, u32 bytes) { u32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15; u32 j0, j1, j2, j3, j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15; @@ -97,22 +97,22 @@ chacha_encrypt_bytes(chacha_ctx *x,const u8 *m,u8 *c,u32 bytes) if (!bytes) return; - j0 = x->input[0]; - j1 = x->input[1]; - j2 = x->input[2]; - j3 = x->input[3]; - j4 = x->input[4]; - j5 = x->input[5]; - j6 = x->input[6]; - j7 = x->input[7]; - j8 = x->input[8]; - j9 = x->input[9]; - j10 = x->input[10]; - j11 = x->input[11]; - j12 = x->input[12]; - j13 = x->input[13]; - j14 = x->input[14]; - j15 = x->input[15]; + j0 = chacha->input[0]; + j1 = chacha->input[1]; + j2 = chacha->input[2]; + j3 = chacha->input[3]; + j4 = chacha->input[4]; + j5 = chacha->input[5]; + j6 = chacha->input[6]; + j7 = chacha->input[7]; + j8 = chacha->input[8]; + j9 = chacha->input[9]; + j10 = chacha->input[10]; + j11 = chacha->input[11]; + j12 = chacha->input[12]; + j13 = chacha->input[13]; + j14 = chacha->input[14]; + j15 = chacha->input[15]; for (;;) { if (bytes < 64) { @@ -138,50 +138,31 @@ chacha_encrypt_bytes(chacha_ctx *x,const u8 *m,u8 *c,u32 bytes) x14 = j14; x15 = j15; for (i = 20;i > 0;i -= 2) { - QUARTERROUND( x0, x4, x8,x12) - QUARTERROUND( x1, x5, x9,x13) - QUARTERROUND( x2, x6,x10,x14) - QUARTERROUND( x3, x7,x11,x15) - QUARTERROUND( x0, x5,x10,x15) - QUARTERROUND( x1, x6,x11,x12) - QUARTERROUND( x2, x7, x8,x13) - QUARTERROUND( x3, x4, x9,x14) + QUARTERROUND( x0, x4, x8, x12) + QUARTERROUND( x1, x5, x9, x13) + QUARTERROUND( x2, x6, x10, x14) + QUARTERROUND( x3, x7, x11, x15) + QUARTERROUND( x0, x5, x10, x15) + QUARTERROUND( x1, x6, x11, x12) + QUARTERROUND( x2, x7, x8, x13) + QUARTERROUND( x3, x4, x9, x14) } - x0 = PLUS(x0,j0); - x1 = PLUS(x1,j1); - x2 = PLUS(x2,j2); - x3 = PLUS(x3,j3); - x4 = PLUS(x4,j4); - x5 = PLUS(x5,j5); - x6 = PLUS(x6,j6); - x7 = PLUS(x7,j7); - x8 = PLUS(x8,j8); - x9 = PLUS(x9,j9); - x10 = PLUS(x10,j10); - x11 = PLUS(x11,j11); - x12 = PLUS(x12,j12); - x13 = PLUS(x13,j13); - x14 = PLUS(x14,j14); - x15 = PLUS(x15,j15); - -#ifndef KEYSTREAM_ONLY - x0 = XOR(x0,U8TO32_LITTLE(m + 0)); - x1 = XOR(x1,U8TO32_LITTLE(m + 4)); - x2 = XOR(x2,U8TO32_LITTLE(m + 8)); - x3 = XOR(x3,U8TO32_LITTLE(m + 12)); - x4 = XOR(x4,U8TO32_LITTLE(m + 16)); - x5 = XOR(x5,U8TO32_LITTLE(m + 20)); - x6 = XOR(x6,U8TO32_LITTLE(m + 24)); - x7 = XOR(x7,U8TO32_LITTLE(m + 28)); - x8 = XOR(x8,U8TO32_LITTLE(m + 32)); - x9 = XOR(x9,U8TO32_LITTLE(m + 36)); - x10 = XOR(x10,U8TO32_LITTLE(m + 40)); - x11 = XOR(x11,U8TO32_LITTLE(m + 44)); - x12 = XOR(x12,U8TO32_LITTLE(m + 48)); - x13 = XOR(x13,U8TO32_LITTLE(m + 52)); - x14 = XOR(x14,U8TO32_LITTLE(m + 56)); - x15 = XOR(x15,U8TO32_LITTLE(m + 60)); -#endif + x0 = PLUS(x0, j0); + x1 = PLUS(x1, j1); + x2 = PLUS(x2, j2); + x3 = PLUS(x3, j3); + x4 = PLUS(x4, j4); + x5 = PLUS(x5, j5); + x6 = PLUS(x6, j6); + x7 = PLUS(x7, j7); + x8 = PLUS(x8, j8); + x9 = PLUS(x9, j9); + x10 = PLUS(x10, j10); + x11 = PLUS(x11, j11); + x12 = PLUS(x12, j12); + x13 = PLUS(x13, j13); + x14 = PLUS(x14, j14); + x15 = PLUS(x15, j15); j12 = PLUSONE(j12); if (!j12) { @@ -189,35 +170,32 @@ chacha_encrypt_bytes(chacha_ctx *x,const u8 *m,u8 *c,u32 bytes) /* stopping at 2^70 bytes per nonce is user's responsibility */ } - U32TO8_LITTLE(c + 0,x0); - U32TO8_LITTLE(c + 4,x1); - U32TO8_LITTLE(c + 8,x2); - U32TO8_LITTLE(c + 12,x3); - U32TO8_LITTLE(c + 16,x4); - U32TO8_LITTLE(c + 20,x5); - U32TO8_LITTLE(c + 24,x6); - U32TO8_LITTLE(c + 28,x7); - U32TO8_LITTLE(c + 32,x8); - U32TO8_LITTLE(c + 36,x9); - U32TO8_LITTLE(c + 40,x10); - U32TO8_LITTLE(c + 44,x11); - U32TO8_LITTLE(c + 48,x12); - U32TO8_LITTLE(c + 52,x13); - U32TO8_LITTLE(c + 56,x14); - U32TO8_LITTLE(c + 60,x15); + U32TO8_LITTLE(c + 0, x0); + U32TO8_LITTLE(c + 4, x1); + U32TO8_LITTLE(c + 8, x2); + U32TO8_LITTLE(c + 12, x3); + U32TO8_LITTLE(c + 16, x4); + U32TO8_LITTLE(c + 20, x5); + U32TO8_LITTLE(c + 24, x6); + U32TO8_LITTLE(c + 28, x7); + U32TO8_LITTLE(c + 32, x8); + U32TO8_LITTLE(c + 36, x9); + U32TO8_LITTLE(c + 40, x10); + U32TO8_LITTLE(c + 44, x11); + U32TO8_LITTLE(c + 48, x12); + U32TO8_LITTLE(c + 52, x13); + U32TO8_LITTLE(c + 56, x14); + U32TO8_LITTLE(c + 60, x15); if (bytes <= 64) { if (bytes < 64) { for (i = 0;i < bytes;++i) ctarget[i] = c[i]; } - x->input[12] = j12; - x->input[13] = j13; + chacha->input[12] = j12; + chacha->input[13] = j13; return; } bytes -= 64; c += 64; -#ifndef KEYSTREAM_ONLY - m += 64; -#endif } } diff --git a/stdlib/rng.c b/stdlib/rng.c index befc535..9558f96 100644 --- a/stdlib/rng.c +++ b/stdlib/rng.c @@ -76,19 +76,17 @@ static void random_bytes(RNG_t rng, uint8_t *dest, size_t needed) { while (needed > 0) { assert(rng->unused_bytes <= sizeof(rng->random_bytes)); - if (rng->unused_bytes == 0) { + if (rng->unused_bytes == 0) rekey(rng); - } else { - size_t to_get = MIN(needed, rng->unused_bytes); - assert(to_get <= rng->unused_bytes); - uint8_t *keystream = rng->random_bytes + sizeof(rng->random_bytes) - rng->unused_bytes; - memcpy(dest, keystream, to_get); - memset(keystream, 0, to_get); - dest += to_get; - needed -= to_get; - rng->unused_bytes -= to_get; - assert(rng->unused_bytes <= sizeof(rng->random_bytes)); - } + + size_t batch_size = MIN(needed, rng->unused_bytes); + uint8_t *batch_src = rng->random_bytes + sizeof(rng->random_bytes) - rng->unused_bytes; + memcpy(dest, batch_src, batch_size); + memset(batch_src, 0, batch_size); + rng->unused_bytes -= batch_size; + dest += batch_size; + needed -= batch_size; + assert(rng->unused_bytes <= sizeof(rng->random_bytes)); } } @@ -224,7 +222,7 @@ public Num_t RNG$num(RNG_t rng, Num_t min, Num_t max) union { Num_t num; uint64_t bits; - } r, one = {.num=1.0}; + } r = {.bits=0}, one = {.num=1.0}; random_bytes(rng, (void*)&r, sizeof(r)); // Set r.num to 1. diff --git a/stdlib/text.c b/stdlib/text.c index f4789ff..4879ead 100644 --- a/stdlib/text.c +++ b/stdlib/text.c @@ -206,13 +206,13 @@ public int32_t get_synthetic_grapheme(const ucs4_t *codepoints, int64_t utf32_le PUREFUNC static inline int64_t num_subtexts(Text_t t) { if (t.tag != TEXT_SUBTEXT) return 1; - int64_t len = t.length; - int64_t n = 0; - while (len > 0) { - len -= t.subtexts[n].length; - ++n; + int64_t remaining = t.length; + int64_t subtexts = 0; + while (remaining > 0) { + remaining -= t.subtexts[subtexts].length; + ++subtexts; } - return n; + return subtexts; } int text_visualize(FILE *stream, Text_t t) @@ -402,9 +402,8 @@ public Text_t Text$_concat(int n, Text_t items[n]) if (n == 1) return items[0]; if (n == 2) return concat2(items[0], items[1]); - int64_t len = 0, subtexts = 0; + int64_t subtexts = 0; for (int i = 0; i < n; i++) { - len += items[i].length; if (items[i].length > 0) subtexts += num_subtexts(items[i]); } @@ -412,14 +411,14 @@ public Text_t Text$_concat(int n, Text_t items[n]) Text_t ret = { .length=0, .tag=TEXT_SUBTEXT, - .subtexts=GC_MALLOC(sizeof(Text_t[len])), + .subtexts=GC_MALLOC(sizeof(Text_t[subtexts])), }; int64_t sub_i = 0; for (int i = 0; i < n; i++) { if (items[i].length == 0) continue; - if (i > 0 && unlikely(!is_concat_stable(items[i-1], items[i]))) { + if (i > 0 && unlikely(!is_concat_stable(ret, items[i]))) { // Oops, guess this wasn't stable for concatenation, let's break it // up into subtasks: return concat2(ret, Text$_concat(n-i, &items[i]));