aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBruce Hill <bruce@bruce-hill.com>2024-09-05 16:23:05 -0400
committerBruce Hill <bruce@bruce-hill.com>2024-09-05 16:23:05 -0400
commitbac14fa6c79b6abea50fd5f188799ae8e83cb20d (patch)
tree416a7b9ff39075ab33fb93ab4e86003df4d25aca
parent47e8972427c152891c4c6c4b458f78cd7ceb8aee (diff)
Fully clean up siphash code and fix some issues
-rw-r--r--Makefile11
-rw-r--r--builtins/array.c6
-rw-r--r--builtins/c_string.c9
-rw-r--r--builtins/channel.c8
-rw-r--r--builtins/functions.c7
-rw-r--r--builtins/functions.h3
-rw-r--r--builtins/halfsiphash.h22
-rw-r--r--builtins/integers.c7
-rw-r--r--builtins/memory.c1
-rw-r--r--builtins/pointer.c1
-rw-r--r--builtins/siphash-internals.h125
-rw-r--r--builtins/siphash.h9
-rw-r--r--builtins/table.c28
-rw-r--r--builtins/table.h2
-rw-r--r--builtins/text.c8
-rw-r--r--builtins/thread.c1
-rw-r--r--builtins/tomo.h5
-rw-r--r--builtins/types.h2
-rw-r--r--enums.c14
-rw-r--r--structs.c8
20 files changed, 186 insertions, 91 deletions
diff --git a/Makefile b/Makefile
index 30640174..5f0f2be8 100644
--- a/Makefile
+++ b/Makefile
@@ -24,22 +24,19 @@ G=-ggdb
O=-Og
CFLAGS=$(CCONFIG) $(EXTRA) $(CWARN) $(G) $(O) $(OSFLAGS)
LDLIBS=-lgc -lcord -lm -lunistring -lgmp -ldl
-BUILTIN_OBJS=builtins/array.o builtins/bool.o builtins/channel.o builtins/nums.o builtins/functions.o builtins/integers.o \
+BUILTIN_OBJS=builtins/siphash.o builtins/array.o builtins/bool.o builtins/channel.o builtins/nums.o builtins/functions.o builtins/integers.o \
builtins/pointer.o builtins/memory.o builtins/text.o builtins/thread.o builtins/c_string.o builtins/table.o \
builtins/types.o builtins/util.o builtins/files.o builtins/range.o
TESTS=$(patsubst %.tm,%.tm.testresult,$(wildcard test/*.tm))
all: libtomo.so tomo
-tomo: tomo.o $(BUILTIN_OBJS) SipHash/halfsiphash.o ast.o parse.o environment.o types.o typecheck.o structs.o enums.o compile.o repl.o
+tomo: tomo.o $(BUILTIN_OBJS) ast.o parse.o environment.o types.o typecheck.o structs.o enums.o compile.o repl.o
$(CC) $(CFLAGS) $(LDFLAGS) $^ $(LDLIBS) -o $@
-libtomo.so: $(BUILTIN_OBJS) SipHash/halfsiphash.o
+libtomo.so: $(BUILTIN_OBJS)
$(CC) $^ $(CFLAGS) $(EXTRA) $(CWARN) $(G) $(O) $(OSFLAGS) -lgc -lcord -lm -lunistring -lgmp -ldl -Wl,-soname,libtomo.so -shared -o $@
-SipHash/halfsiphash.c:
- git submodule update --init --recursive
-
tags:
ctags *.[ch] **/*.[ch]
@@ -58,7 +55,7 @@ test: $(TESTS)
@echo -e '\x1b[32;7m ALL TESTS PASSED! \x1b[m'
clean:
- rm -f tomo *.o SipHash/halfsiphash.o builtins/*.o libtomo.so test/*.tm.{c,h,o,testresult}
+ rm -f tomo *.o builtins/*.o libtomo.so test/*.tm.{c,h,o,testresult} examples/*.tm.*{c,h,o}
%.1: %.1.md
pandoc --lua-filter=.pandoc/bold-code.lua -s $< -t man -o $@
diff --git a/builtins/array.c b/builtins/array.c
index 174bcbdd..f09af917 100644
--- a/builtins/array.c
+++ b/builtins/array.c
@@ -18,7 +18,9 @@
#include "types.h"
#include "util.h"
-#include "siphash.c"
+// Use inline version of siphash code:
+#include "siphash.h"
+#include "siphash-internals.h"
static inline int64_t get_padded_item_size(const TypeInfo *info)
{
@@ -555,7 +557,7 @@ public uint64_t Array$hash(const Array_t *arr, const TypeInfo *type)
{
const TypeInfo *item = type->ArrayInfo.item;
siphash sh;
- siphashinit(&sh, sizeof(uint64_t[arr->length]), (uint64_t*)TOMO_HASH_KEY);
+ siphashinit(&sh, sizeof(uint64_t[arr->length]));
if (item->tag == PointerInfo || (item->tag == CustomInfo && item->CustomInfo.hash == NULL && item->size == sizeof(void*))) { // Raw data hash
for (int64_t i = 0; i < arr->length; i++)
siphashadd64bits(&sh, (uint64_t)(arr->data + i*arr->stride));
diff --git a/builtins/c_string.c b/builtins/c_string.c
index b0c70f0a..d916bfa9 100644
--- a/builtins/c_string.c
+++ b/builtins/c_string.c
@@ -8,8 +8,8 @@
#include <stdlib.h>
#include "functions.h"
-#include "halfsiphash.h"
#include "text.h"
+#include "siphash.h"
#include "types.h"
#include "util.h"
@@ -37,13 +37,10 @@ public bool CString$equal(const char **x, const char **y)
return CString$compare(x, y) == 0;
}
-public uint32_t CString$hash(const char **c_str)
+public uint64_t CString$hash(const char **c_str)
{
if (!*c_str) return 0;
-
- uint32_t hash;
- halfsiphash(*c_str, strlen(*c_str), TOMO_HASH_KEY, (uint8_t*)&hash, sizeof(hash));
- return hash;
+ return siphash24((void*)*c_str, strlen(*c_str));
}
public const TypeInfo CString$info = {
diff --git a/builtins/channel.c b/builtins/channel.c
index 9b1754af..e877b6b5 100644
--- a/builtins/channel.c
+++ b/builtins/channel.c
@@ -13,8 +13,8 @@
#include "array.h"
#include "functions.h"
-#include "halfsiphash.h"
#include "integers.h"
+#include "siphash.h"
#include "text.h"
#include "types.h"
#include "util.h"
@@ -100,12 +100,10 @@ public void Channel$clear(channel_t *channel)
(void)pthread_cond_signal(&channel->cond);
}
-public uint32_t Channel$hash(const channel_t **channel, const TypeInfo *type)
+public uint64_t Channel$hash(const channel_t **channel, const TypeInfo *type)
{
(void)type;
- uint32_t hash;
- halfsiphash(*channel, sizeof(channel_t*), TOMO_HASH_KEY, (uint8_t*)&hash, sizeof(hash));
- return hash;
+ return siphash24((void*)*channel, sizeof(channel_t*));
}
public int32_t Channel$compare(const channel_t **x, const channel_t **y, const TypeInfo *type)
diff --git a/builtins/functions.c b/builtins/functions.c
index 0ccc44cb..b59885bc 100644
--- a/builtins/functions.c
+++ b/builtins/functions.c
@@ -17,15 +17,14 @@
#include "functions.h"
#include "integers.h"
#include "pointer.h"
+#include "siphash.h"
#include "string.h"
#include "table.h"
#include "text.h"
#include "types.h"
#include "util.h"
-#include "siphash.c"
-
-public uint8_t TOMO_HASH_KEY[16] = {0};
+public uint64_t TOMO_HASH_KEY[2] = {23, 42}; // Randomized in tomo_init()
public void tomo_init(void)
{
@@ -115,7 +114,7 @@ public uint64_t generic_hash(const void *obj, const TypeInfo *type)
return type->CustomInfo.hash(obj, type);
default: {
hash_data:;
- return siphash24((void*)obj, type->size, (uint64_t*)TOMO_HASH_KEY);
+ return siphash24((void*)obj, type->size);
}
}
}
diff --git a/builtins/functions.h b/builtins/functions.h
index c9db6ea8..6445400a 100644
--- a/builtins/functions.h
+++ b/builtins/functions.h
@@ -9,10 +9,7 @@
#include "datatypes.h"
#include "types.h"
-extern uint8_t TOMO_HASH_KEY[16];
-
void tomo_init(void);
-
void fail(const char *fmt, ...);
void fail_source(const char *filename, int64_t start, int64_t end, const char *fmt, ...);
Text_t builtin_last_err();
diff --git a/builtins/halfsiphash.h b/builtins/halfsiphash.h
deleted file mode 100644
index a1af8cd2..00000000
--- a/builtins/halfsiphash.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- SipHash reference C implementation
-
- Copyright (c) 2012-2021 Jean-Philippe Aumasson
- <jeanphilippe.aumasson@gmail.com>
- Copyright (c) 2012-2014 Daniel J. Bernstein <djb@cr.yp.to>
-
- To the extent possible under law, the author(s) have dedicated all copyright
- and related and neighboring rights to this software to the public domain
- worldwide. This software is distributed without any warranty.
-
- You should have received a copy of the CC0 Public Domain Dedication along
- with
- this software. If not, see
- <http://creativecommons.org/publicdomain/zero/1.0/>.
- */
-
-#include <inttypes.h>
-#include <string.h>
-
-int halfsiphash(const void *in, const size_t inlen, const void *k, uint8_t *out,
- const size_t outlen);
diff --git a/builtins/integers.c b/builtins/integers.c
index aad4bf33..523ec4d1 100644
--- a/builtins/integers.c
+++ b/builtins/integers.c
@@ -10,11 +10,10 @@
#include "array.h"
#include "datatypes.h"
#include "integers.h"
+#include "siphash.h"
#include "text.h"
#include "types.h"
-#include "siphash.c"
-
static gmp_randstate_t Int_rng = {};
public void Int$init_random(long seed)
@@ -63,10 +62,10 @@ public uint64_t Int$hash(const Int_t *x, const TypeInfo *type) {
(void)type;
if (__builtin_expect(x->small & 1, 1)) {
int64_t i = (x->small>>2);
- return siphash24((void*)&i, sizeof(i), (uint64_t*)TOMO_HASH_KEY);
+ return siphash24((void*)&i, sizeof(i));
} else {
char *str = mpz_get_str(NULL, 16, *x->big);
- return siphash24((void*)str, strlen(str), (uint64_t*)TOMO_HASH_KEY);
+ return siphash24((void*)str, strlen(str));
}
}
diff --git a/builtins/memory.c b/builtins/memory.c
index 47ff081b..9d7dbc80 100644
--- a/builtins/memory.c
+++ b/builtins/memory.c
@@ -7,7 +7,6 @@
#include <sys/param.h>
#include <err.h>
-#include "halfsiphash.h"
#include "memory.h"
#include "text.h"
#include "types.h"
diff --git a/builtins/pointer.c b/builtins/pointer.c
index bd18a1e4..66cd308a 100644
--- a/builtins/pointer.c
+++ b/builtins/pointer.c
@@ -9,7 +9,6 @@
#include <sys/param.h>
#include "functions.h"
-#include "halfsiphash.h"
#include "text.h"
#include "types.h"
#include "util.h"
diff --git a/builtins/siphash-internals.h b/builtins/siphash-internals.h
new file mode 100644
index 00000000..95b00b53
--- /dev/null
+++ b/builtins/siphash-internals.h
@@ -0,0 +1,125 @@
+#pragma once
+
+// This file holds the internals for the SipHash implementation. For a few
+// cases, we want to include this for incrementally computing hashes.
+// Otherwise, it suffices to just use the siphash24() function from siphash.h
+
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+
+#include "tomo.h"
+
+/* <MIT License>
+ Copyright (c) 2013 Marek Majkowski <marek@popcount.org>
+ Copyright (c) 2018 Samantha McVey <samantham@posteo.net>
+ Copyright (c) 2024 Bruce Hill <bruce@bruce-hill.com>
+
+ Permission is hereby granted, free of charge, to any person obtaining a copy
+ of this software and associated documentation files (the "Software"), to deal
+ in the Software without restriction, including without limitation the rights
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ copies of the Software, and to permit persons to whom the Software is
+ furnished to do so, subject to the following conditions:
+
+ The above copyright notice and this permission notice shall be included in
+ all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ THE SOFTWARE.
+ </MIT License>
+
+ Original location:
+ https://github.com/majek/csiphash/
+
+ Original solution inspired by code from:
+ Samuel Neves (supercop/crypto_auth/siphash24/little)
+ djb (supercop/crypto_auth/siphash24/little2)
+ Jean-Philippe Aumasson (https://131002.net/siphash/siphash24.c)
+
+ Extensive modifications for MoarVM by Samantha McVey
+
+ Further modifications for Tomo by Bruce Hill
+*/
+struct siphash {
+ uint64_t v0;
+ uint64_t v1;
+ uint64_t v2;
+ uint64_t v3;
+ uint64_t b;
+};
+typedef struct siphash siphash;
+#define ROTATE(x, b) (uint64_t)( ((x) << (b)) | ( (x) >> (64 - (b))) )
+
+#define HALF_ROUND(a,b,c,d,s,t) \
+ a += b; c += d; \
+ b = ROTATE(b, s) ^ a; \
+ d = ROTATE(d, t) ^ c; \
+ a = ROTATE(a, 32);
+
+#define DOUBLE_ROUND(v0,v1,v2,v3) \
+ HALF_ROUND(v0,v1,v2,v3,13,16); \
+ HALF_ROUND(v2,v1,v0,v3,17,21); \
+ HALF_ROUND(v0,v1,v2,v3,13,16); \
+ HALF_ROUND(v2,v1,v0,v3,17,21);
+
+static inline void siphashinit (siphash *sh, size_t src_sz) {
+ const uint64_t k0 = TOMO_HASH_KEY[0];
+ const uint64_t k1 = TOMO_HASH_KEY[1];
+ sh->b = (uint64_t)src_sz << 56;
+ sh->v0 = k0 ^ 0x736f6d6570736575ULL;
+ sh->v1 = k1 ^ 0x646f72616e646f6dULL;
+ sh->v2 = k0 ^ 0x6c7967656e657261ULL;
+ sh->v3 = k1 ^ 0x7465646279746573ULL;
+}
+static inline void siphashadd64bits (siphash *sh, const uint64_t in) {
+ const uint64_t mi = in;
+ sh->v3 ^= mi;
+ DOUBLE_ROUND(sh->v0,sh->v1,sh->v2,sh->v3);
+ sh->v0 ^= mi;
+}
+static inline uint64_t siphashfinish_last_part (siphash *sh, uint64_t t) {
+ sh->b |= t;
+ sh->v3 ^= sh->b;
+ DOUBLE_ROUND(sh->v0,sh->v1,sh->v2,sh->v3);
+ sh->v0 ^= sh->b;
+ sh->v2 ^= 0xff;
+ DOUBLE_ROUND(sh->v0,sh->v1,sh->v2,sh->v3);
+ DOUBLE_ROUND(sh->v0,sh->v1,sh->v2,sh->v3);
+ return (sh->v0 ^ sh->v1) ^ (sh->v2 ^ sh->v3);
+}
+/* This union helps us avoid doing weird things with pointers that can cause old
+ * compilers like GCC 4 to generate bad code. In addition it is nicely more C
+ * standards compliant to keep type punning to a minimum. */
+union SipHash64_union {
+ uint64_t u64;
+ uint32_t u32;
+ uint8_t u8[8];
+};
+static inline uint64_t siphashfinish (siphash *sh, const uint8_t *src, size_t src_sz) {
+ union SipHash64_union t = { 0 };
+ switch (src_sz) {
+ /* Falls through */
+ case 7: t.u8[6] = src[6];
+ /* Falls through */
+ case 6: t.u8[5] = src[5];
+ /* Falls through */
+ case 5: t.u8[4] = src[4];
+ /* Falls through */
+ case 4: t.u8[3] = src[3];
+ /* Falls through */
+ case 3: t.u8[2] = src[2];
+ /* Falls through */
+ case 2: t.u8[1] = src[1];
+ /* Falls through */
+ case 1: t.u8[0] = src[0];
+ }
+ return siphashfinish_last_part(sh, t.u64);
+}
+
+// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/builtins/siphash.h b/builtins/siphash.h
new file mode 100644
index 00000000..a466200c
--- /dev/null
+++ b/builtins/siphash.h
@@ -0,0 +1,9 @@
+#pragma once
+
+// An implementation of the SipHash algorithm.
+
+#include <stdint.h>
+
+uint64_t siphash24(const uint8_t *src, size_t src_sz);
+
+// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/builtins/table.c b/builtins/table.c
index 3a672e0f..a5dafd48 100644
--- a/builtins/table.c
+++ b/builtins/table.c
@@ -19,8 +19,8 @@
#include "array.h"
#include "c_string.h"
#include "datatypes.h"
-#include "halfsiphash.h"
#include "memory.h"
+#include "siphash.h"
#include "table.h"
#include "text.h"
#include "types.h"
@@ -114,11 +114,11 @@ public void *Table$get_raw(Table_t t, const void *key, const TypeInfo *type)
assert(type->tag == TableInfo);
if (!key || !t.bucket_info) return NULL;
- uint32_t hash = HASH_KEY(t, key);
+ uint64_t hash = HASH_KEY(t, key);
hshow(&t);
hdebug("Getting value with initial probe at %u\n", hash);
bucket_t *buckets = t.bucket_info->buckets;
- for (uint32_t i = hash; buckets[i].occupied; i = buckets[i].next_bucket) {
+ for (uint64_t i = hash; buckets[i].occupied; i = buckets[i].next_bucket) {
hdebug("Checking against key in bucket %u\n", i);
void *entry = GET_ENTRY(t, buckets[i].index);
if (EQUAL_KEYS(entry, key)) {
@@ -147,7 +147,7 @@ static void Table$set_bucket(Table_t *t, const void *entry, int32_t index, const
hshow(t);
const void *key = entry;
bucket_t *buckets = t->bucket_info->buckets;
- uint32_t hash = HASH_KEY(*t, key);
+ uint64_t hash = HASH_KEY(*t, key);
hdebug("Hash value (mod %u) = %u\n", t->bucket_info->count, hash);
bucket_t *bucket = &buckets[hash];
if (!bucket->occupied) {
@@ -167,11 +167,11 @@ static void Table$set_bucket(Table_t *t, const void *entry, int32_t index, const
--t->bucket_info->last_free;
}
- uint32_t collided_hash = HASH_KEY(*t, GET_ENTRY(*t, bucket->index));
+ uint64_t collided_hash = HASH_KEY(*t, GET_ENTRY(*t, bucket->index));
if (collided_hash != hash) { // Collided with a mid-chain entry
hdebug("Hit a mid-chain entry at bucket %u (chain starting at %u)\n", hash, collided_hash);
// Find chain predecessor
- uint32_t predecessor = collided_hash;
+ uint64_t predecessor = collided_hash;
while (buckets[predecessor].next_bucket != hash)
predecessor = buckets[predecessor].next_bucket;
@@ -180,7 +180,7 @@ static void Table$set_bucket(Table_t *t, const void *entry, int32_t index, const
buckets[t->bucket_info->last_free] = *bucket;
} else { // Collided with the start of a chain
hdebug("Hit start of a chain\n");
- uint32_t end_of_chain = hash;
+ uint64_t end_of_chain = hash;
while (buckets[end_of_chain].next_bucket != END_OF_CHAIN)
end_of_chain = buckets[end_of_chain].next_bucket;
hdebug("Appending to chain\n");
@@ -309,10 +309,10 @@ public void Table$remove(Table_t *t, const void *key, const TypeInfo *type)
// zero out bucket
// maybe update lastfree_index1 to removed bucket's index
- uint32_t hash = HASH_KEY(*t, key);
+ uint64_t hash = HASH_KEY(*t, key);
hdebug("Removing key with hash %u\n", hash);
bucket_t *bucket, *prev = NULL;
- for (uint32_t i = hash; t->bucket_info->buckets[i].occupied; i = t->bucket_info->buckets[i].next_bucket) {
+ for (uint64_t i = hash; t->bucket_info->buckets[i].occupied; i = t->bucket_info->buckets[i].next_bucket) {
if (EQUAL_KEYS(GET_ENTRY(*t, t->bucket_info->buckets[i].index), key)) {
bucket = &t->bucket_info->buckets[i];
hdebug("Found key to delete in bucket %u\n", i);
@@ -336,7 +336,7 @@ public void Table$remove(Table_t *t, const void *key, const TypeInfo *type)
hdebug("Removing key/value from the middle of the entries array\n");
// Find the bucket that points to the last entry's index:
- uint32_t i = HASH_KEY(*t, GET_ENTRY(*t, last_entry));
+ uint64_t i = HASH_KEY(*t, GET_ENTRY(*t, last_entry));
while (t->bucket_info->buckets[i].index != last_entry)
i = t->bucket_info->buckets[i].next_bucket;
// Update the bucket to point to the last entry's new home (the space
@@ -438,21 +438,19 @@ public int32_t Table$compare(const Table_t *x, const Table_t *y, const TypeInfo
return 0;
}
-public uint32_t Table$hash(const Table_t *t, const TypeInfo *type)
+public uint64_t Table$hash(const Table_t *t, const TypeInfo *type)
{
assert(type->tag == TableInfo);
// Table hashes are computed as:
// hash(hash(t.keys), hash(t.values), hash(t.fallback), hash(t.default))
// Where fallback and default hash to zero if absent
auto table = type->TableInfo;
- uint32_t components[] = {
+ uint64_t components[] = {
Array$hash(&t->entries, Array$info(table.key)),
Array$hash(&t->entries + value_offset(type), Array$info(table.value)),
t->fallback ? Table$hash(t->fallback, type) : 0,
};
- uint32_t hash;
- halfsiphash(&components, sizeof(components), TOMO_HASH_KEY, (uint8_t*)&hash, sizeof(hash));
- return hash;
+ return siphash24((void*)&components, sizeof(components));
}
public Text_t Table$as_text(const Table_t *t, bool colorize, const TypeInfo *type)
diff --git a/builtins/table.h b/builtins/table.h
index cd267912..12f20f29 100644
--- a/builtins/table.h
+++ b/builtins/table.h
@@ -73,7 +73,7 @@ void Table$mark_copy_on_write(Table_t *t);
#define TABLE_COPY(t) ({ TABLE_INCREF(t); t; })
int32_t Table$compare(const Table_t *x, const Table_t *y, const TypeInfo *type);
bool Table$equal(const Table_t *x, const Table_t *y, const TypeInfo *type);
-uint32_t Table$hash(const Table_t *t, const TypeInfo *type);
+uint64_t Table$hash(const Table_t *t, const TypeInfo *type);
Text_t Table$as_text(const Table_t *t, bool colorize, const TypeInfo *type);
void *Table$str_entry(Table_t t, int64_t n);
diff --git a/builtins/text.c b/builtins/text.c
index 39222ed4..3beda82f 100644
--- a/builtins/text.c
+++ b/builtins/text.c
@@ -74,7 +74,9 @@
#include "text.h"
#include "types.h"
-#include "siphash.c"
+// Use inline version of the siphash code for performance:
+#include "siphash.h"
+#include "siphash-internals.h"
typedef struct {
uint32_t main_codepoint;
@@ -114,7 +116,7 @@ static bool graphemes_equal(uint32_t **a, uint32_t **b) {
static uint64_t grapheme_hash(uint32_t **g) {
uint32_t *cluster = *g;
- return siphash24((void*)&cluster[1], sizeof(uint32_t[cluster[0]]), (uint64_t*)TOMO_HASH_KEY);
+ return siphash24((void*)&cluster[1], sizeof(uint32_t[cluster[0]]));
}
static const TypeInfo GraphemeClusterInfo = {
@@ -723,7 +725,7 @@ public uint64_t Text$hash(Text_t *text)
{
if (text->hash != 0) return text->hash;
siphash sh;
- siphashinit(&sh, sizeof(int32_t[text->length]), (uint64_t*)TOMO_HASH_KEY);
+ siphashinit(&sh, sizeof(int32_t[text->length]));
union {
int32_t chunks[2];
diff --git a/builtins/thread.c b/builtins/thread.c
index 4f422e5d..b9193787 100644
--- a/builtins/thread.c
+++ b/builtins/thread.c
@@ -12,7 +12,6 @@
#include "array.h"
#include "functions.h"
-#include "halfsiphash.h"
#include "text.h"
#include "types.h"
#include "util.h"
diff --git a/builtins/tomo.h b/builtins/tomo.h
index 637975f4..8d6661ca 100644
--- a/builtins/tomo.h
+++ b/builtins/tomo.h
@@ -16,16 +16,19 @@
#include "channel.h"
#include "datatypes.h"
#include "functions.h"
-#include "halfsiphash.h"
#include "integers.h"
#include "macros.h"
#include "memory.h"
#include "nums.h"
#include "pointer.h"
#include "range.h"
+#include "siphash.h"
#include "table.h"
#include "text.h"
#include "thread.h"
#include "types.h"
+// This value will be randomized on startup in tomo_init():
+extern uint64_t TOMO_HASH_KEY[2];
+
// vim: ts=4 sw=0 et cino=L2,l1,(0,W4,m1,\:0
diff --git a/builtins/types.h b/builtins/types.h
index 70cf89c3..0b48ab36 100644
--- a/builtins/types.h
+++ b/builtins/types.h
@@ -9,7 +9,7 @@
struct TypeInfo;
-typedef uint32_t (*hash_fn_t)(const void*, const struct TypeInfo*);
+typedef uint64_t (*hash_fn_t)(const void*, const struct TypeInfo*);
typedef int32_t (*compare_fn_t)(const void*, const void*, const struct TypeInfo*);
typedef bool (*equal_fn_t)(const void*, const void*, const struct TypeInfo*);
typedef Text_t (*text_fn_t)(const void*, bool, const struct TypeInfo*);
diff --git a/enums.c b/enums.c
index 0705f688..50b7662b 100644
--- a/enums.c
+++ b/enums.c
@@ -121,16 +121,14 @@ static CORD compile_hash_method(env_t *env, ast_t *ast)
CORD full_name = CORD_cat(namespace_prefix(env->libname, env->namespace), def->name);
if (!has_extra_data(def->tags)) {
// Hashing is simpler if there is only a tag, no tagged data:
- return CORD_all("static uint32_t ", full_name, "$hash(const ", full_name, "_t *obj, const TypeInfo *info) {\n"
+ return CORD_all("static uint64_t ", full_name, "$hash(const ", full_name, "_t *obj, const TypeInfo *info) {\n"
"(void)info;\n"
- "uint32_t hash;\n"
- "halfsiphash(&obj->tag, sizeof(obj->tag), TOMO_HASH_KEY, (uint8_t*)&hash, sizeof(hash));\n"
- "return hash;"
+ "return siphash24((void*)&obj->tag, sizeof(obj->tag));\n"
"\n}\n");
}
- CORD hash_func = CORD_all("static uint32_t ", full_name, "$hash(const ", full_name, "_t *obj, const TypeInfo *info) {\n"
+ CORD hash_func = CORD_all("static uint64_t ", full_name, "$hash(const ", full_name, "_t *obj, const TypeInfo *info) {\n"
"(void)info;\n"
- "uint32_t hashes[2] = {(uint32_t)obj->tag, 0};\n"
+ "uint64_t hashes[2] = {(uint64_t)obj->tag, 0};\n"
"switch (obj->tag) {\n");
for (tag_ast_t *tag = def->tags; tag; tag = tag->next) {
if (tag->fields) {
@@ -143,9 +141,7 @@ static CORD compile_hash_method(env_t *env, ast_t *ast)
}
}
hash_func = CORD_all(hash_func, "}\n"
- "uint32_t hash;\n"
- "halfsiphash(&hashes, sizeof(hashes), TOMO_HASH_KEY, (uint8_t*)&hash, sizeof(hash));\n"
- "return hash;\n}\n");
+ "return siphash24((void*)&hashes, sizeof(hashes));\n}\n");
return hash_func;
}
diff --git a/structs.c b/structs.c
index ebd4f7a3..566f39ff 100644
--- a/structs.c
+++ b/structs.c
@@ -97,9 +97,9 @@ static CORD compile_hash_method(env_t *env, ast_t *ast)
{
auto def = Match(ast, StructDef);
CORD full_name = CORD_cat(namespace_prefix(env->libname, env->namespace), def->name);
- CORD hash_func = CORD_all("static uint32_t ", full_name, "$hash(const ", full_name, "_t *obj, const TypeInfo *info) {\n"
+ CORD hash_func = CORD_all("static uint64_t ", full_name, "$hash(const ", full_name, "_t *obj, const TypeInfo *info) {\n"
"(void)info;\n"
- "uint32_t field_hashes[] = {");
+ "uint64_t field_hashes[] = {");
for (arg_ast_t *field = def->fields; field; field = field->next) {
type_t *field_type = get_arg_ast_type(env, field);
if (field_type->tag == BoolType) // Bools can be bit fields, so you can't use *obj->field there:
@@ -108,9 +108,7 @@ static CORD compile_hash_method(env_t *env, ast_t *ast)
hash_func = CORD_all(hash_func, "\ngeneric_hash(&obj->$", field->name, ", ", compile_type_info(env, field_type), "),");
}
hash_func = CORD_all(hash_func, "};\n"
- "uint32_t hash;\n"
- "halfsiphash(&field_hashes, sizeof(field_hashes), TOMO_HASH_KEY, (uint8_t*)&hash, sizeof(hash));\n"
- "return hash;\n}\n");
+ "return siphash24((void*)&field_hashes, sizeof(field_hashes));\n}\n");
return hash_func;
}