cOMS/stdlib/HashMap.h
Dennis Eichhorn 2883ca0841
Some checks failed
CodeQL / Analyze (${{ matrix.language }}) (autobuild, c-cpp) (push) Has been cancelled
Microsoft C++ Code Analysis / Analyze (push) Has been cancelled
prepare for changes
2025-04-21 18:11:26 +00:00

992 lines
32 KiB
C
Executable File

/**
* Jingga
*
* @copyright Jingga
* @license OMS License 2.0
* @version 1.0.0
* @link https://jingga.app
*/
#ifndef COMS_STDLIB_HASH_MAP_H
#define COMS_STDLIB_HASH_MAP_H
#include "Types.h"
#include "../hash/GeneralHash.h"
#include "../memory/RingMemory.h"
#include "../memory/BufferMemory.h"
#include "../memory/ChunkMemory.h"
#include "../utils/StringUtils.h"
#include "../stdlib/Simd.h"
#include "../system/Allocator.h"
// If a hash key is longer than the max key length, we use the last N characters of that key
// The key length is currently chosen to result in 32 byte size for the common case: HashEntryInt32
#define HASH_MAP_MAX_KEY_LENGTH 26
/////////////////////////////
// string key
/////////////////////////////
// Next below always represents the index in the chunk memory of the next entry with the same hash (not the byte offset)
struct HashEntryInt32 {
char key[HASH_MAP_MAX_KEY_LENGTH];
uint16 next;
int32 value;
};
// This struct is often used for hash maps that are implemented with dynamic length content
// value = stores the offset into the buffer array
// value2 = stores the length of the data
struct HashEntryInt32Int32 {
char key[HASH_MAP_MAX_KEY_LENGTH];
uint16 next;
int32 value;
int32 value2;
};
struct HashEntryInt64 {
char key[HASH_MAP_MAX_KEY_LENGTH];
uint16 next;
int64 value;
};
struct HashEntryUIntPtr {
char key[HASH_MAP_MAX_KEY_LENGTH];
uint16 next;
uintptr_t value;
};
struct HashEntryVoidP {
char key[HASH_MAP_MAX_KEY_LENGTH];
uint16 next;
void* value;
};
struct HashEntryFloat {
char key[HASH_MAP_MAX_KEY_LENGTH];
uint16 next;
f32 value;
};
struct HashEntryStr {
char key[HASH_MAP_MAX_KEY_LENGTH];
uint16 next;
char value[HASH_MAP_MAX_KEY_LENGTH];
};
struct HashEntry {
char key[HASH_MAP_MAX_KEY_LENGTH];
uint16 next;
byte* value;
};
/////////////////////////////
// int key
/////////////////////////////
struct HashEntryInt32KeyInt32 {
int32 key;
uint16 next;
int32 value;
};
struct HashEntryInt64KeyInt32 {
int32 key;
uint16 next;
int64 value;
};
struct HashEntryUIntPtrKeyInt32 {
int32 key;
uint16 next;
uintptr_t value;
};
struct HashEntryVoidPKeyInt32 {
int32 key;
uint16 next;
void* value;
};
struct HashEntryFloatKeyInt32 {
int32 key;
uint16 next;
f32 value;
};
struct HashEntryStrKeyInt32 {
int32 key;
uint16 next;
char value[HASH_MAP_MAX_KEY_LENGTH];
};
struct HashEntryKeyInt32 {
int32 key;
uint16 next;
byte* value;
};
// HashMaps are limited to 4GB in total size
struct HashMap {
// Contains the chunk memory index for a provided key/hash
// Values are 1-indexed/offset since 0 means not used/found
uint16* table;
// Contains the actual data of the hash map
// Careful, some hash map implementations don't store the value in here but an offset for use in another array
// @question We might want to align the ChunkMemory memory to 8byte, currently it's either 4 or 8 byte depending on the length
ChunkMemory buf;
};
// The ref hash map is used if the value size is dynamic per element (e.g. files, cache data etc.)
struct HashMapRef {
HashMap hm;
ChunkMemory data;
};
// @todo Change so the hashmap can grow or maybe even better create a static and dynamic version
inline
void hashmap_alloc(HashMap* hm, int32 count, int32 element_size, int32 alignment = 64)
{
LOG_1("Allocate HashMap for %n elements with %n B per element", {{LOG_DATA_INT32, &count}, {LOG_DATA_INT32, &element_size}});
byte* data = (byte *) platform_alloc(
count * (sizeof(uint16) + element_size)
+ CEIL_DIV(count, alignment) * sizeof(hm->buf.free)
);
hm->table = (uint16 *) data;
chunk_init(&hm->buf, data + sizeof(uint16) * count, count, element_size, 8);
}
inline
void hashmap_alloc(HashMapRef* hmr, int32 count, int32 data_element_size, int32 alignment = 64)
{
int32 element_size = sizeof(HashEntryInt32Int32);
LOG_1("Allocate HashMap for %n elements with %n B per element", {{LOG_DATA_INT32, &count}, {LOG_DATA_INT32, &element_size}});
byte* data = (byte *) platform_alloc(
count * (sizeof(uint16) + element_size)
+ CEIL_DIV(count, alignment) * sizeof(hmr->hm.buf.free)
+ count * data_element_size
);
hmr->hm.table = (uint16 *) data;
chunk_init(&hmr->hm.buf, data + sizeof(uint16) * count, count, element_size, 8);
chunk_init(&hmr->data, data + hmr->hm.buf.size, count, data_element_size);
}
inline
void hashmap_free(HashMap* hm)
{
platform_free((void **) &hm->table);
hm->table = NULL;
hm->buf.size = 0;
hm->buf.memory = NULL;
}
// WARNING: element_size = element size + remaining HashEntry data size
inline
void hashmap_create(HashMap* hm, int32 count, int32 element_size, RingMemory* ring, int32 alignment = 64) noexcept
{
LOG_1("Create HashMap for %n elements with %n B per element", {{LOG_DATA_INT32, &count}, {LOG_DATA_INT32, &element_size}});
byte* data = ring_get_memory(
ring,
count * (sizeof(uint16) + element_size)
+ CEIL_DIV(count, alignment) * sizeof(hm->buf.free)
);
hm->table = (uint16 *) data;
chunk_init(&hm->buf, data + sizeof(uint16) * count, count, element_size, 8);
}
// WARNING: element_size = element size + remaining HashEntry data size
inline
void hashmap_create(HashMap* hm, int32 count, int32 element_size, BufferMemory* buf, int32 alignment = 64) noexcept
{
LOG_1("Create HashMap for %n elements with %n B per element", {{LOG_DATA_INT32, &count}, {LOG_DATA_INT32, &element_size}});
byte* data = buffer_get_memory(
buf,
count * (sizeof(uint16) + element_size)
+ CEIL_DIV(count, alignment) * sizeof(hm->buf.free)
);
hm->table = (uint16 *) data;
chunk_init(&hm->buf, data + sizeof(uint16) * count, count, element_size, 8);
}
// WARNING: element_size = element size + remaining HashEntry data size
inline
void hashmap_create(HashMap* hm, int32 count, int32 element_size, byte* buf) noexcept
{
LOG_1("Create HashMap for %n elements with %n B per element", {{LOG_DATA_INT32, &count}, {LOG_DATA_INT32, &element_size}});
hm->table = (uint16 *) buf;
chunk_init(&hm->buf, buf + sizeof(uint16) * count, count, element_size, 8);
}
inline
void hashmap_update_data_pointer(HashMap* hm, byte* data) noexcept
{
hm->table = (uint16 *) data;
hm->buf.memory = data + sizeof(uint16) * hm->buf.count;
}
// Calculates how large a hashmap will be
inline
int64 hashmap_size(int count, int32 element_size) noexcept
{
return count * sizeof(element_size) // table
+ count * element_size // elements
+ sizeof(uint64) * CEIL_DIV(count, 64); // free
}
inline
int64 hashmap_size(const HashMap* hm) noexcept
{
return hm->buf.count * sizeof(uint16) + hm->buf.size;
}
/////////////////////////////
// string key
/////////////////////////////
// @performance We could greatly improve the hashmap performance by ensuring that a uniquely hashed entry always starts at a cacheline
// If another hash results in the same index that should be at the cacheline + 32 position (if 32 bit size)
// This would ensure for those elements that if there is one collision we at least don't have to read another cache line
// Of course for more than 1 collision we would still need to load multiple cachelines, but ideally that shouldn't happen too often
void hashmap_insert(HashMap* hm, const char* key, int32 value) noexcept {
uint64 index = hash_djb2(key) % hm->buf.count;
int32 element = chunk_reserve(&hm->buf, 1);
HashEntryInt32* entry = (HashEntryInt32 *) chunk_get_element(&hm->buf, element, true);
// Ensure key length
str_move_to_pos(&key, -HASH_MAP_MAX_KEY_LENGTH);
str_copy_short(entry->key, key, HASH_MAP_MAX_KEY_LENGTH);
entry->key[HASH_MAP_MAX_KEY_LENGTH - 1] = '\0';
entry->value = value;
entry->next = 0;
uint16* target = &hm->table[index];
while (*target) {
HashEntryInt32* tmp = (HashEntryInt32*) chunk_get_element(&hm->buf, *target - 1, false);
target = &tmp->next;
}
*target = (uint16) (element + 1);
}
void hashmap_insert(HashMap* hm, const char* key, int64 value) noexcept {
uint64 index = hash_djb2(key) % hm->buf.count;
int32 element = chunk_reserve(&hm->buf, 1);
HashEntryInt64* entry = (HashEntryInt64 *) chunk_get_element(&hm->buf, element, true);
// Ensure key length
str_move_to_pos(&key, -HASH_MAP_MAX_KEY_LENGTH);
str_copy_short(entry->key, key, HASH_MAP_MAX_KEY_LENGTH);
entry->key[HASH_MAP_MAX_KEY_LENGTH - 1] = '\0';
entry->value = value;
entry->next = 0;
uint16* target = &hm->table[index];
while (*target) {
HashEntryInt64* tmp = (HashEntryInt64*) chunk_get_element(&hm->buf, *target - 1, false);
target = &tmp->next;
}
*target = (uint16) (element + 1);
}
void hashmap_insert(HashMap* hm, const char* key, int32 value1, int32 value2) noexcept {
uint64 index = hash_djb2(key) % hm->buf.count;
int32 element = chunk_reserve(&hm->buf, 1);
HashEntryInt32Int32* entry = (HashEntryInt32Int32 *) chunk_get_element(&hm->buf, element, true);
// Ensure key length
str_move_to_pos(&key, -HASH_MAP_MAX_KEY_LENGTH);
str_copy_short(entry->key, key, HASH_MAP_MAX_KEY_LENGTH);
entry->key[HASH_MAP_MAX_KEY_LENGTH - 1] = '\0';
entry->value = value1;
entry->value2 = value2;
entry->next = 0;
uint16* target = &hm->table[index];
while (*target) {
HashEntryInt32Int32* tmp = (HashEntryInt32Int32*) chunk_get_element(&hm->buf, *target - 1, false);
target = &tmp->next;
}
*target = (uint16) (element + 1);
}
void hashmap_insert(HashMapRef* hmr, const char* key, byte* data, int32 data_size) noexcept {
// Data chunk
int32 chunk_count = (int32) ((data_size + hmr->data.chunk_size - 1) / hmr->data.chunk_size);
int32 chunk_offset = chunk_reserve(&hmr->data, chunk_count);
if (chunk_offset < 0) {
ASSERT_SIMPLE(chunk_offset >= 0);
return;
}
// Insert Data
// NOTE: The data and the hash map entry are in two separate memory areas
byte* data_mem = chunk_get_element(&hmr->data, chunk_offset);
memcpy(data_mem, data, data_size);
// Handle hash map entry
uint64 index = hash_djb2(key) % hmr->hm.buf.count;
int32 element = chunk_reserve(&hmr->hm.buf, 1);
HashEntryInt32Int32* entry = (HashEntryInt32Int32 *) chunk_get_element(&hmr->hm.buf, element, true);
// Ensure key length
str_move_to_pos(&key, -HASH_MAP_MAX_KEY_LENGTH);
str_copy_short(entry->key, key, HASH_MAP_MAX_KEY_LENGTH);
entry->key[HASH_MAP_MAX_KEY_LENGTH - 1] = '\0';
entry->value = chunk_offset;
entry->value2 = chunk_count;
entry->next = 0;
uint16* target = &hmr->hm.table[index];
while (*target) {
HashEntryInt32Int32* tmp = (HashEntryInt32Int32*) chunk_get_element(&hmr->hm.buf, *target - 1, false);
target = &tmp->next;
}
*target = (uint16) (element + 1);
}
void hashmap_insert(HashMap* hm, const char* key, uintptr_t value) noexcept {
uint64 index = hash_djb2(key) % hm->buf.count;
int32 element = chunk_reserve(&hm->buf, 1);
HashEntryUIntPtr* entry = (HashEntryUIntPtr *) chunk_get_element(&hm->buf, element, true);
// Ensure key length
str_move_to_pos(&key, -HASH_MAP_MAX_KEY_LENGTH);
str_copy_short(entry->key, key, HASH_MAP_MAX_KEY_LENGTH);
entry->key[HASH_MAP_MAX_KEY_LENGTH - 1] = '\0';
entry->value = value;
entry->next = 0;
uint16* target = &hm->table[index];
while (*target) {
HashEntryUIntPtr* tmp = (HashEntryUIntPtr*) chunk_get_element(&hm->buf, *target - 1, false);
target = &tmp->next;
}
*target = (uint16) (element + 1);
}
void hashmap_insert(HashMap* hm, const char* key, void* value) noexcept {
uint64 index = hash_djb2(key) % hm->buf.count;
int32 element = chunk_reserve(&hm->buf, 1);
HashEntryVoidP* entry = (HashEntryVoidP *) chunk_get_element(&hm->buf, element, true);
// Ensure key length
str_move_to_pos(&key, -HASH_MAP_MAX_KEY_LENGTH);
str_copy_short(entry->key, key, HASH_MAP_MAX_KEY_LENGTH);
entry->key[HASH_MAP_MAX_KEY_LENGTH - 1] = '\0';
entry->value = value;
entry->next = 0;
uint16* target = &hm->table[index];
while (*target) {
HashEntryVoidP* tmp = (HashEntryVoidP*) chunk_get_element(&hm->buf, *target - 1, false);
target = &tmp->next;
}
*target = (uint16) (element + 1);
}
void hashmap_insert(HashMap* hm, const char* key, f32 value) noexcept {
uint64 index = hash_djb2(key) % hm->buf.count;
int32 element = chunk_reserve(&hm->buf, 1);
HashEntryFloat* entry = (HashEntryFloat *) chunk_get_element(&hm->buf, element, true);
// Ensure key length
str_move_to_pos(&key, -HASH_MAP_MAX_KEY_LENGTH);
str_copy_short(entry->key, key, HASH_MAP_MAX_KEY_LENGTH);
entry->key[HASH_MAP_MAX_KEY_LENGTH - 1] = '\0';
entry->value = value;
entry->next = 0;
uint16* target = &hm->table[index];
while (*target) {
HashEntryFloat* tmp = (HashEntryFloat*) chunk_get_element(&hm->buf, *target - 1, false);
target = &tmp->next;
}
*target = (uint16) (element + 1);
}
void hashmap_insert(HashMap* hm, const char* key, const char* value) noexcept {
uint64 index = hash_djb2(key) % hm->buf.count;
int32 element = chunk_reserve(&hm->buf, 1);
HashEntryStr* entry = (HashEntryStr *) chunk_get_element(&hm->buf, element, true);
// Ensure key length
str_move_to_pos(&key, -HASH_MAP_MAX_KEY_LENGTH);
str_copy_short(entry->key, key, HASH_MAP_MAX_KEY_LENGTH);
entry->key[HASH_MAP_MAX_KEY_LENGTH - 1] = '\0';
str_copy_short(entry->value, value, HASH_MAP_MAX_KEY_LENGTH);
entry->value[HASH_MAP_MAX_KEY_LENGTH - 1] = '\0';
entry->next = 0;
uint16* target = &hm->table[index];
while (*target) {
HashEntryStr* tmp = (HashEntryStr*) chunk_get_element(&hm->buf, *target - 1, false);
target = &tmp->next;
}
*target = (uint16) (element + 1);
}
HashEntry* hashmap_insert(HashMap* hm, const char* key, const byte* value) noexcept {
uint64 index = hash_djb2(key) % hm->buf.count;
int32 element = chunk_reserve(&hm->buf, 1);
HashEntry* entry = (HashEntry *) chunk_get_element(&hm->buf, element, true);
entry->value = (byte *) entry + sizeof(HashEntry);
// Ensure key length
str_move_to_pos(&key, -HASH_MAP_MAX_KEY_LENGTH);
str_copy_short(entry->key, key, HASH_MAP_MAX_KEY_LENGTH);
entry->key[HASH_MAP_MAX_KEY_LENGTH - 1] = '\0';
memcpy(entry->value, value, hm->buf.chunk_size - sizeof(HashEntry));
entry->next = 0;
uint16* target = &hm->table[index];
while (*target) {
HashEntry* tmp = (HashEntry*) chunk_get_element(&hm->buf, *target - 1, false);
target = &tmp->next;
}
*target = (uint16) (element + 1);
return entry;
}
HashEntry* hashmap_reserve(HashMap* hm, const char* key) noexcept {
uint64 index = hash_djb2(key) % hm->buf.count;
int32 element = chunk_reserve(&hm->buf, 1);
HashEntry* entry = (HashEntry *) chunk_get_element(&hm->buf, element, true);
entry->value = (byte *) entry + sizeof(HashEntry);
// Ensure key length
str_move_to_pos(&key, -HASH_MAP_MAX_KEY_LENGTH);
str_copy_short(entry->key, key, HASH_MAP_MAX_KEY_LENGTH);
entry->key[HASH_MAP_MAX_KEY_LENGTH - 1] = '\0';
entry->next = 0;
uint16* target = &hm->table[index];
while (*target) {
HashEntry* tmp = (HashEntry*) chunk_get_element(&hm->buf, *target - 1, false);
target = &tmp->next;
}
*target = (uint16) (element + 1);
return entry;
}
// Returns existing element or element to be filled
HashEntry* hashmap_get_reserve(HashMap* hm, const char* key) noexcept
{
uint64 index = hash_djb2(key) % hm->buf.count;
HashEntry* entry = (HashEntry *) chunk_get_element(&hm->buf, hm->table[index] - 1, false);
str_move_to_pos(&key, -HASH_MAP_MAX_KEY_LENGTH);
while (entry != NULL) {
if (str_compare(entry->key, key) == 0) {
DEBUG_MEMORY_READ((uintptr_t) entry, sizeof(HashEntry));
return entry;
}
if (!entry->next) {
break;
}
entry = (HashEntry *) chunk_get_element(&hm->buf, entry->next - 1, false);
}
int32 element = chunk_reserve(&hm->buf, 1);
HashEntry* entry_new = (HashEntry *) chunk_get_element(&hm->buf, element, true);
entry_new->value = (byte *) entry_new + sizeof(HashEntry);
// Ensure key length
str_move_to_pos(&key, -HASH_MAP_MAX_KEY_LENGTH);
str_copy_short(entry_new->key, key, HASH_MAP_MAX_KEY_LENGTH);
entry_new->key[HASH_MAP_MAX_KEY_LENGTH - 1] = '\0';
if (entry) {
entry->next = (uint16) (element + 1);
} else {
hm->table[index] = (uint16) (element + 1);
}
return entry_new;
}
inline
HashEntry* hashmap_get_entry_by_element(HashMap* hm, uint32 element) noexcept
{
return (HashEntry *) chunk_get_element(&hm->buf, element - 1, false);
}
HashEntry* hashmap_get_entry(HashMap* hm, const char* key) noexcept {
uint64 index = hash_djb2(key) % hm->buf.count;
HashEntry* entry = (HashEntry *) chunk_get_element(&hm->buf, hm->table[index] - 1, false);
str_move_to_pos(&key, -HASH_MAP_MAX_KEY_LENGTH);
while (entry != NULL) {
if (str_compare(entry->key, key) == 0) {
DEBUG_MEMORY_READ((uintptr_t) entry, sizeof(HashEntry));
return entry;
}
entry = (HashEntry *) chunk_get_element(&hm->buf, entry->next - 1, false);
}
return NULL;
}
byte* hashmap_get_value(HashMapRef* hmr, const char* key) noexcept {
uint64 index = hash_djb2(key) % hmr->hm.buf.count;
HashEntryInt32Int32* entry = (HashEntryInt32Int32 *) chunk_get_element(&hmr->hm.buf, hmr->hm.table[index] - 1, false);
str_move_to_pos(&key, -HASH_MAP_MAX_KEY_LENGTH);
while (entry != NULL) {
if (str_compare(entry->key, key) == 0) {
DEBUG_MEMORY_READ((uintptr_t) entry, sizeof(HashEntryInt32Int32));
return chunk_get_element(&hmr->data, entry->value);
}
entry = (HashEntryInt32Int32 *) chunk_get_element(&hmr->hm.buf, entry->next - 1, false);
}
return NULL;
}
uint32 hashmap_get_element(const HashMap* hm, const char* key) noexcept {
uint64 index = hash_djb2(key) % hm->buf.count;
const HashEntry* entry = (const HashEntry *) chunk_get_element((ChunkMemory *) &hm->buf, hm->table[index] - 1, false);
uint32 element_id = hm->table[index];
str_move_to_pos(&key, -HASH_MAP_MAX_KEY_LENGTH);
while (entry != NULL) {
if (str_compare(entry->key, key) == 0) {
DEBUG_MEMORY_READ((uintptr_t) entry, sizeof(HashEntry));
return element_id;
}
element_id = entry->next;
entry = (const HashEntry *) chunk_get_element((ChunkMemory *) &hm->buf, entry->next - 1, false);
}
return 0;
}
inline
uint32 hashmap_get_element_by_entry(const HashMap* hm, const HashEntry* entry) noexcept
{
return chunk_id_from_memory(&hm->buf, (byte *) entry) + 1;
}
// This function only saves one step (omission of the hash function)
// The reason for this is in some cases we can use compile time hashing
HashEntry* hashmap_get_entry(HashMap* hm, const char* key, uint64 hash) noexcept {
hash %= hm->buf.count;
HashEntry* entry = (HashEntry *) chunk_get_element(&hm->buf, hm->table[hash] - 1, false);
str_move_to_pos(&key, -HASH_MAP_MAX_KEY_LENGTH);
while (entry != NULL) {
if (str_compare(entry->key, key) == 0) {
DEBUG_MEMORY_READ((uintptr_t) entry, sizeof(HashEntry));
return entry;
}
entry = (HashEntry *) chunk_get_element(&hm->buf, entry->next - 1, false);
}
return NULL;
}
// @performance If we had a doubly linked list we could delete keys much easier
// However that would make insertion slower
// Maybe we create a nother hashmap that is doubly linked
void hashmap_remove(HashMap* hm, const char* key) noexcept {
uint64 index = hash_djb2(key) % hm->buf.count;
HashEntry* entry = (HashEntry *) chunk_get_element(&hm->buf, hm->table[index] - 1, false);
HashEntry* prev = NULL;
uint32 element_id = hm->table[index];
str_move_to_pos(&key, -HASH_MAP_MAX_KEY_LENGTH);
while (entry != NULL) {
if (str_compare(entry->key, key) == 0) {
if (prev == NULL) {
hm->table[index] = entry->next;
} else {
prev->next = entry->next;
}
chunk_free_elements(&hm->buf, element_id - 1);
return;
}
element_id = entry->next;
prev = entry;
entry = (HashEntry *) chunk_get_element(&hm->buf, entry->next - 1, false);
}
}
/////////////////////////////
// int key
/////////////////////////////
void hashmap_insert(HashMap* hm, int32 key, int32 value) noexcept {
uint64 index = ((uint32) key) % hm->buf.count;
int32 element = chunk_reserve(&hm->buf, 1);
HashEntryInt32KeyInt32* entry = (HashEntryInt32KeyInt32 *) chunk_get_element(&hm->buf, element, true);
entry->key = key;
entry->value = value;
entry->next = 0;
uint16* target = &hm->table[index];
while (*target) {
HashEntryInt32KeyInt32* tmp = (HashEntryInt32KeyInt32*) chunk_get_element(&hm->buf, *target - 1, false);
target = &tmp->next;
}
*target = (uint16) (element + 1);
}
void hashmap_insert(HashMap* hm, int32 key, int64 value) noexcept {
uint64 index = ((uint32) key) % hm->buf.count;
int32 element = chunk_reserve(&hm->buf, 1);
HashEntryInt64KeyInt32* entry = (HashEntryInt64KeyInt32 *) chunk_get_element(&hm->buf, element, true);
entry->key = key;
entry->value = value;
entry->next = 0;
uint16* target = &hm->table[index];
while (*target) {
HashEntryInt64KeyInt32* tmp = (HashEntryInt64KeyInt32*) chunk_get_element(&hm->buf, *target - 1, false);
target = &tmp->next;
}
*target = (uint16) (element + 1);
}
void hashmap_insert(HashMap* hm, int32 key, uintptr_t value) noexcept {
uint64 index = ((uint32) key) % hm->buf.count;
int32 element = chunk_reserve(&hm->buf, 1);
HashEntryUIntPtrKeyInt32* entry = (HashEntryUIntPtrKeyInt32 *) chunk_get_element(&hm->buf, element, true);
entry->key = key;
entry->value = value;
entry->next = 0;
uint16* target = &hm->table[index];
while (*target) {
HashEntryUIntPtrKeyInt32* tmp = (HashEntryUIntPtrKeyInt32*) chunk_get_element(&hm->buf, *target - 1, false);
target = &tmp->next;
}
*target = (uint16) (element + 1);
}
void hashmap_insert(HashMap* hm, int32 key, void* value) noexcept {
uint64 index = ((uint32) key) % hm->buf.count;
int32 element = chunk_reserve(&hm->buf, 1);
HashEntryVoidPKeyInt32* entry = (HashEntryVoidPKeyInt32 *) chunk_get_element(&hm->buf, element, true);
entry->key = key;
entry->value = value;
entry->next = 0;
uint16* target = &hm->table[index];
while (*target) {
HashEntryVoidPKeyInt32* tmp = (HashEntryVoidPKeyInt32*) chunk_get_element(&hm->buf, *target - 1, false);
target = &tmp->next;
}
*target = (uint16) (element + 1);
}
void hashmap_insert(HashMap* hm, int32 key, f32 value) noexcept {
uint64 index = ((uint32) key) % hm->buf.count;
int32 element = chunk_reserve(&hm->buf, 1);
HashEntryFloatKeyInt32* entry = (HashEntryFloatKeyInt32 *) chunk_get_element(&hm->buf, element, true);
entry->key = key;
entry->value = value;
entry->next = 0;
uint16* target = &hm->table[index];
while (*target) {
HashEntryFloatKeyInt32* tmp = (HashEntryFloatKeyInt32*) chunk_get_element(&hm->buf, *target - 1, false);
target = &tmp->next;
}
*target = (uint16) (element + 1);
}
void hashmap_insert(HashMap* hm, int32 key, const char* value) noexcept {
uint64 index = ((uint32) key) % hm->buf.count;
int32 element = chunk_reserve(&hm->buf, 1);
HashEntryStrKeyInt32* entry = (HashEntryStrKeyInt32 *) chunk_get_element(&hm->buf, element, true);
entry->key = key;
str_copy_short(entry->value, value, HASH_MAP_MAX_KEY_LENGTH);
entry->value[HASH_MAP_MAX_KEY_LENGTH - 1] = '\0';
entry->next = 0;
uint16* target = &hm->table[index];
while (*target) {
HashEntryStrKeyInt32* tmp = (HashEntryStrKeyInt32*) chunk_get_element(&hm->buf, *target - 1, false);
target = &tmp->next;
}
*target = (uint16) (element + 1);
}
void hashmap_insert(HashMap* hm, int32 key, const byte* value) noexcept {
uint64 index = ((uint32) key) % hm->buf.count;
int32 element = chunk_reserve(&hm->buf, 1);
HashEntryKeyInt32* entry = (HashEntryKeyInt32 *) chunk_get_element(&hm->buf, element, true);
entry->key = key;
entry->value = (byte *) entry + sizeof(HashEntryKeyInt32);
memcpy(entry->value, value, hm->buf.chunk_size - sizeof(HashEntryKeyInt32));
entry->next = 0;
uint16* target = &hm->table[index];
while (*target) {
HashEntryKeyInt32* tmp = (HashEntryKeyInt32*) chunk_get_element(&hm->buf, *target - 1, false);
target = &tmp->next;
}
*target = (uint16) (element + 1);
}
HashEntryKeyInt32* hashmap_get_entry(HashMap* hm, int32 key) noexcept {
uint64 index = ((uint32) key) % hm->buf.count;
HashEntryKeyInt32* entry = (HashEntryKeyInt32 *) chunk_get_element(&hm->buf, hm->table[index] - 1, false);
while (entry != NULL) {
if (entry->key == key) {
DEBUG_MEMORY_READ((uintptr_t) entry, sizeof(HashEntryKeyInt32));
return entry;
}
entry = (HashEntryKeyInt32 *) chunk_get_element(&hm->buf, entry->next - 1, false);
}
return NULL;
}
// This function only saves one step (omission of the hash function)
// The reason for this is in some cases we can use compile time hashing
HashEntryKeyInt32* hashmap_get_entry(HashMap* hm, int32 key, uint64 hash) noexcept {
hash %= hm->buf.count;
HashEntryKeyInt32* entry = (HashEntryKeyInt32 *) chunk_get_element(&hm->buf, hm->table[hash] - 1, false);
while (entry != NULL) {
if (entry->key == key) {
DEBUG_MEMORY_READ((uintptr_t) entry, sizeof(HashEntryKeyInt32));
return entry;
}
entry = (HashEntryKeyInt32 *) chunk_get_element(&hm->buf, entry->next - 1, false);
}
return NULL;
}
// @performance If we had a doubly linked list we could delete keys much easier
// However that would make insertion slower
// Maybe we create a nother hashmap that is doubly linked
void hashmap_remove(HashMap* hm, int32 key) noexcept {
uint64 index = ((uint32) key) % hm->buf.count;
HashEntryKeyInt32* entry = (HashEntryKeyInt32 *) chunk_get_element(&hm->buf, hm->table[index] - 1, false);
HashEntryKeyInt32* prev = NULL;
uint32 element_id = hm->table[index];
while (entry != NULL) {
if (entry->key == key) {
if (prev == NULL) {
hm->table[index] = entry->next;
} else {
prev->next = entry->next;
}
chunk_free_elements(&hm->buf, element_id - 1);
return;
}
element_id = entry->next;
prev = entry;
entry = (HashEntryKeyInt32 *) chunk_get_element(&hm->buf, entry->next - 1, false);
}
}
inline
int32 hashmap_value_size(const HashMap* hm) noexcept
{
return (uint32) (
hm->buf.chunk_size
- sizeof(char) * HASH_MAP_MAX_KEY_LENGTH // key
- sizeof(uint16) // next element
);
}
// @question Shouldn't we also store the chunk size etc? Currently not done and expected to be correctly initialized.
inline
int64 hashmap_dump(const HashMap* hm, byte* data, [[maybe_unused]] int32 steps = 8)
{
LOG_1("Dump HashMap");
*((uint32 *) data) = SWAP_ENDIAN_LITTLE(hm->buf.count);
data += sizeof(hm->buf.count);
// Dump the table content
memcpy(data, hm->table, sizeof(uint16) * hm->buf.count);
SWAP_ENDIAN_LITTLE_SIMD(
(uint16 *) data,
(uint16 *) data,
sizeof(uint16) * hm->buf.count / 2, // everything is 2 bytes -> easy to swap
steps
);
data += sizeof(uint16) * hm->buf.count;
// @bug what if Int32 key?
int32 value_size = hashmap_value_size(hm);
// Dumb hash map content = buffer memory
// Since we are using ChunkMemory we can be smart about it and iterate the chunk memory instead of performing pointer chasing
int32 free_index = 0;
int32 bit_index = 0;
for (uint32 i = 0; i < hm->buf.count; ++i) {
if (hm->buf.free[free_index] & (1ULL << bit_index)) {
HashEntry* entry = (HashEntry *) chunk_get_element((ChunkMemory *) &hm->buf, i);
// key
memcpy(data, entry->key, sizeof(entry->key));
data += sizeof(entry->key);
// next "pointer"
*((uint16 *) data) = SWAP_ENDIAN_LITTLE(entry->next);
data += sizeof(entry->next);
// We just assume that 4 or 8 bytes = int -> endian handling
if (value_size == 4) {
*((int32 *) data) = SWAP_ENDIAN_LITTLE(((HashEntryInt32 *) entry)->value);
} else if (value_size == 8) {
*((int64 *) data) = SWAP_ENDIAN_LITTLE(((HashEntryInt64 *) entry)->value);
} else {
memcpy(data, entry->value, value_size);
}
data += value_size;
} else {
// No entry defined -> NULL
memset(data, 0, hm->buf.chunk_size);
data += hm->buf.chunk_size;
}
++bit_index;
if (bit_index > 63) {
bit_index = 0;
++free_index;
}
}
// dump free array
memcpy(data, hm->buf.free, sizeof(uint64) * CEIL_DIV(hm->buf.count, 64));
LOG_1("Dumped HashMap: %n B", {{LOG_DATA_UINT64, (void *) &hm->buf.size}});
return sizeof(hm->buf.count) // hash map count = buffer count
+ hm->buf.count * sizeof(uint16) // table content
+ hm->buf.size; // hash map content + free array
}
// WARNING: Requires hashmap_create first
inline
int64 hashmap_load(HashMap* hm, const byte* data, [[maybe_unused]] int32 steps = 8)
{
LOG_1("Load HashMap");
uint64 count = SWAP_ENDIAN_LITTLE(*((uint32 *) data));
data += sizeof(uint32);
// Load the table content
memcpy(hm->table, data, sizeof(uint16) * count);
SWAP_ENDIAN_LITTLE_SIMD(
(uint16 *) hm->table,
(uint16 *) hm->table,
sizeof(uint16) * count / 2, // everything is 2 bytes -> easy to swap
steps
);
data += sizeof(uint16) * count;
// This loop here is why it is important to already have an initialized hashmap
// @question Do we maybe want to change this and not require an initalized hashmap?
memcpy(hm->buf.memory, data, hm->buf.size);
data += hm->buf.chunk_size * hm->buf.count;
// @question don't we have to possibly endian swap check the free array as well?
memcpy(hm->buf.free, data, sizeof(uint64) * CEIL_DIV(hm->buf.count, 64));
// @bug what if Int32 key?
int32 value_size = hashmap_value_size(hm);
// Switch endian AND turn offsets to pointers
uint32 chunk_id = 0;
chunk_iterate_start(&hm->buf, chunk_id) {
HashEntry* entry = (HashEntry *) chunk_get_element((ChunkMemory *) &hm->buf, chunk_id);
// key is already loaded with the memcpy
// @question Do we even want to use memcpy? We are re-checking all the values here anyways
// next "pointer"
entry->next = SWAP_ENDIAN_LITTLE(entry->next);
if (value_size == 4) {
((HashEntryInt32 *) entry)->value = SWAP_ENDIAN_LITTLE(((HashEntryInt32 *) entry)->value);
} else if (value_size == 8) {
((HashEntryInt64 *) entry)->value = SWAP_ENDIAN_LITTLE(((HashEntryInt64 *) entry)->value);
}
} chunk_iterate_end;
LOG_1("Loaded HashMap: %n B", {{LOG_DATA_UINT64, &hm->buf.size}});
// How many bytes was read from data
return sizeof(hm->buf.count) // hash map count = buffer count
+ hm->buf.count * sizeof(uint16) // table content
+ hm->buf.size;
}
#endif