176 lines
3.6 KiB
C
176 lines
3.6 KiB
C
|
#define HASH_CSTR(str) murmur_hash64(str, strlen(str), 0)
|
||
|
#define HASH_STR(str, len) murmur_hash64(str, len, 0)
|
||
|
|
||
|
const uint64_t HASH_UNUSED = 0xFFFFFFFFFFFFFFFFU;
|
||
|
|
||
|
enum {
|
||
|
NEAREST_PRIME_8 = 11,
|
||
|
NEAREST_PRIME_16 = 17,
|
||
|
NEAREST_PRIME_32 = 37,
|
||
|
NEAREST_PRIME_64 = 67,
|
||
|
NEAREST_PRIME_128 = 131,
|
||
|
NEAREST_PRIME_256 = 257,
|
||
|
NEAREST_PRIME_512 = 521,
|
||
|
NEAREST_PRIME_1024 = 1031
|
||
|
};
|
||
|
|
||
|
typedef struct hash {
|
||
|
int count;
|
||
|
int capacity;
|
||
|
int element_size;
|
||
|
uint64_t* keys;
|
||
|
void* data;
|
||
|
} hash_t;
|
||
|
|
||
|
hash_t
|
||
|
hash_create(size_t capacity, size_t element_size)
|
||
|
{
|
||
|
hash_t result;
|
||
|
|
||
|
result.count = 0;
|
||
|
result.capacity = capacity;
|
||
|
result.element_size = element_size;
|
||
|
result.keys = engine_mem_alloc(capacity, sizeof(*result.keys));
|
||
|
result.data = engine_mem_alloc(capacity, element_size);
|
||
|
|
||
|
for (size_t i = 0; i < capacity; i++) {
|
||
|
result.keys[i] = HASH_UNUSED;
|
||
|
}
|
||
|
|
||
|
return result;
|
||
|
}
|
||
|
|
||
|
void
|
||
|
hash_add(hash_t* hash, uint64_t key, void* element)
|
||
|
{
|
||
|
int key_index = key % hash->capacity;
|
||
|
|
||
|
LOG_ASSERT(hash->keys[key_index] == HASH_UNUSED, "Hash table collision");
|
||
|
LOG_ASSERT(hash->count+1 < hash->capacity, "Hash table data array too small");
|
||
|
|
||
|
uint64_t element_index = hash->count;
|
||
|
hash->keys[key_index] = element_index;
|
||
|
hash->count++;
|
||
|
|
||
|
memcpy(&hash->data[element_index*hash->element_size], element, hash->element_size);
|
||
|
}
|
||
|
|
||
|
void*
|
||
|
hash_get(hash_t hash, uint64_t key)
|
||
|
{
|
||
|
size_t key_index = key % hash.capacity;
|
||
|
LOG_ASSERT(hash.keys[key_index] != HASH_UNUSED, "Key not found in hash table");
|
||
|
|
||
|
uint64_t element_index = hash.keys[key_index];
|
||
|
void* element = &hash.data[hash.element_size*element_index];
|
||
|
|
||
|
return element;
|
||
|
}
|
||
|
|
||
|
uint32_t
|
||
|
murmur_hash32(const char* key, size_t len, uint32_t seed)
|
||
|
{
|
||
|
const uint32_t C1 = 0xcc9e2d51;
|
||
|
const uint32_t C2 = 0x1b873593;
|
||
|
const uint32_t N = 0xe6546b64;
|
||
|
const int R1 = 15;
|
||
|
const int R2 = 13;
|
||
|
const int M = 5;
|
||
|
|
||
|
uint32_t hash = seed;
|
||
|
|
||
|
const char* chunk = key;
|
||
|
size_t chunk_size = sizeof(uint32_t);
|
||
|
int chunk_count = len / chunk_size;
|
||
|
for (int i = 0; i < chunk_count; i++)
|
||
|
{
|
||
|
uint32_t k = *(uint32_t*)chunk;
|
||
|
k = k * C1;
|
||
|
k = (k << R1) | (k >> (32-R1));
|
||
|
k = k * C2;
|
||
|
|
||
|
hash = hash ^ k;
|
||
|
hash = (hash << R2) | (hash >> (32-R2));
|
||
|
hash = (hash * M) + N;
|
||
|
|
||
|
chunk += chunk_size;
|
||
|
}
|
||
|
|
||
|
int remainder_count = len % chunk_size;
|
||
|
uint32_t k = 0;
|
||
|
switch (remainder_count)
|
||
|
{
|
||
|
case 3: k |= chunk[2] << 16;
|
||
|
case 2: k |= chunk[1] << 8;
|
||
|
case 1:
|
||
|
{
|
||
|
k |= chunk[0];
|
||
|
|
||
|
k = k * C1;
|
||
|
k = (k << R1) | (k >> (32-R1));
|
||
|
k = k * C2;
|
||
|
|
||
|
hash = hash ^ k;
|
||
|
}
|
||
|
default: break;
|
||
|
}
|
||
|
|
||
|
hash = hash ^ len;
|
||
|
hash = hash ^ (hash >> 16);
|
||
|
hash = hash * 0x85ebca6b;
|
||
|
hash = hash ^ (hash >> 13);
|
||
|
hash = hash * 0xc2b2ae35;
|
||
|
hash = hash ^ (hash >> 16);
|
||
|
|
||
|
return hash;
|
||
|
}
|
||
|
|
||
|
uint64_t
|
||
|
murmur_hash64(const char* key, size_t len, uint64_t seed)
|
||
|
{
|
||
|
const uint64_t M = 0xc6a4a7935bd1e995;
|
||
|
const int R = 47;
|
||
|
|
||
|
uint64_t hash = seed ^ (len * M);
|
||
|
|
||
|
const char* chunk = key;
|
||
|
size_t chunk_size = sizeof(uint64_t);
|
||
|
int chunk_count = len / chunk_size;
|
||
|
for (int i = 0; i < chunk_count; i++)
|
||
|
{
|
||
|
uint64_t k = *(uint64_t*)chunk;
|
||
|
k = k * M;
|
||
|
k = k ^ (k >> R);
|
||
|
k = k * M;
|
||
|
|
||
|
hash = hash ^ k;
|
||
|
hash = hash * M;
|
||
|
|
||
|
chunk += chunk_size;
|
||
|
}
|
||
|
|
||
|
int remainder_count = len % chunk_size;
|
||
|
switch (remainder_count)
|
||
|
{
|
||
|
case 7: hash ^= (uint64_t)chunk[6] << 48;
|
||
|
case 6: hash ^= (uint64_t)chunk[5] << 40;
|
||
|
case 5: hash ^= (uint64_t)chunk[4] << 32;
|
||
|
case 4: hash ^= (uint64_t)chunk[3] << 24;
|
||
|
case 3: hash ^= (uint64_t)chunk[2] << 16;
|
||
|
case 2: hash ^= (uint64_t)chunk[1] << 8;
|
||
|
case 1:
|
||
|
{
|
||
|
hash ^= (uint64_t)chunk[0];
|
||
|
hash = hash * M;
|
||
|
}
|
||
|
default: break;
|
||
|
}
|
||
|
|
||
|
hash = hash ^ (hash >> R);
|
||
|
hash = hash * M;
|
||
|
hash = hash ^ (hash >> R);
|
||
|
|
||
|
return hash;
|
||
|
}
|
||
|
|