From fa9fcb6394e9cd0df81286fb57f751d0650390a2 Mon Sep 17 00:00:00 2001 From: Dennis Eichhorn Date: Wed, 18 Dec 2024 07:43:11 +0100 Subject: [PATCH] Started to do a massive thread improvement journey. AMS still outstanding also handling multiple AssetArchives is still a open todo, same goes for multiple gpuapis --- asset/Asset.h | 14 +- asset/AssetManagementSystem.h | 76 +- audio/AudioMixer.h | 5 +- hash/GeneralHash.h | 142 +++- log/Debug.cpp | 14 +- memory/BufferMemory.h | 3 - memory/ChunkMemory.h | 54 +- memory/Heap.h | 4 + memory/Queue.h | 169 +++- memory/RingMemory.h | 63 +- memory/ThreadedChunkMemory.h | 42 + memory/ThreadedQueue.h | 149 ++-- memory/ThreadedRingMemory.h | 2 +- platform/linux/Allocator.h | 3 +- platform/linux/threading/Atomic.h | 1042 +++++++++++++++++++++-- platform/linux/threading/Thread.h | 23 +- platform/win32/Allocator.h | 2 + platform/win32/threading/Atomic.h | 1325 +++++++++++++++++++++++++++-- stdlib/HashMap.h | 7 +- stdlib/Intrinsics.h | 27 +- stdlib/IntrinsicsArm.h | 22 + stdlib/PerfectHashMap.h | 363 ++++++++ thread/Thread.h | 2 +- thread/ThreadPool.h | 38 +- ui/UITheme.h | 1 + 25 files changed, 3206 insertions(+), 386 deletions(-) create mode 100644 memory/ThreadedChunkMemory.h diff --git a/asset/Asset.h b/asset/Asset.h index 5118f7c..626cd31 100644 --- a/asset/Asset.h +++ b/asset/Asset.h @@ -22,6 +22,8 @@ struct Asset { // Could be 0 if there is no official id uint64 official_id; + // @performance This is bad, this uses the same name as the hashmap + // We effectively store the asset name twice which shouldn't be the case char name[MAX_ASSET_NAME_LENGTH]; AssetType type; @@ -32,13 +34,17 @@ struct Asset { // Describes how much ram/vram the asset uses // E.g. vram_size = 0 but ram_size > 0 means that it never uses any gpu memory - uint64 ram_size; - uint64 vram_size; + uint32 ram_size; + uint32 vram_size; + uint64 last_access; // Usually 1 but in some cases an ams may hold entities of variable chunk length // For textures for example a 128x128 is of size 1 but 256x256 is of size 4 uint32 size; + // Variable used for thread safety + bool is_loaded; + // Describes if the memory is currently available in ram/vram // E.g. an asset might be uploaded to the gpu and no longer held in ram (or the other way around) bool is_ram; @@ -49,10 +55,6 @@ struct Asset { bool can_garbage_collect_ram; bool can_garbage_collect_vram; - // Describes if the asset should be removed/garbage collected during CPU/GPU down time - bool should_garbage_collect_ram; - bool should_garbage_collect_vram; - Asset* next; Asset* prev; diff --git a/asset/AssetManagementSystem.h b/asset/AssetManagementSystem.h index 3ccbdc7..025b1e7 100644 --- a/asset/AssetManagementSystem.h +++ b/asset/AssetManagementSystem.h @@ -22,6 +22,7 @@ // @question Asset component systems could be created per region -> easy to simulate a specific region // @bug This means players might not be able to transition from one area to another?! +// @performance There is a huge performance flaw. We CANNOT have an asset only in vram because it always also allocates the ram (asset_data_memory) struct AssetManagementSystem { // @question is this even necessary or could we integrate this directly into the system here? HashMap hash_map; @@ -29,6 +30,7 @@ struct AssetManagementSystem { uint64 ram_size; uint64 vram_size; uint64 asset_count; + int32 overhead; bool has_changed; // The indices of asset_memory and asset_data_memory are always linked @@ -48,16 +50,15 @@ struct AssetManagementSystem { // @question do we want to create an extra threaded version? Or a combined one, like we have right now. // @question Do we want to add a mutex to assets. This way we don't have to lock the entire ams. pthread_mutex_t mutex; - - // @bug We probably also need a overhead value. - // In some cases we need more data than our normal data (see texture, it contains image + texture) }; -void ams_create(AssetManagementSystem* ams, BufferMemory* buf, int32 chunk_size, int32 count) +void ams_create(AssetManagementSystem* ams, BufferMemory* buf, int32 chunk_size, int32 count, int32 overhead = 0) { // setup hash_map hashmap_create(&ams->hash_map, count, sizeof(HashEntryInt64), buf); + ams->overhead = overhead; + // setup asset_memory chunk_init(&ams->asset_memory, buf, count, sizeof(Asset), 64); @@ -71,13 +72,15 @@ void ams_create(AssetManagementSystem* ams, BufferMemory* buf, int32 chunk_size, } // WARNING: buf size see ams_get_buffer_size -void ams_create(AssetManagementSystem* ams, byte* buf, int32 chunk_size, int32 count) +void ams_create(AssetManagementSystem* ams, byte* buf, int32 chunk_size, int32 count, int32 overhead = 0) { ASSERT_SIMPLE(chunk_size); // setup hash_map hashmap_create(&ams->hash_map, count, sizeof(HashEntryInt64), buf); + ams->overhead = overhead; + // setup asset_memory ams->asset_memory.count = count; ams->asset_memory.chunk_size = sizeof(Asset); @@ -108,7 +111,7 @@ void ams_free(AssetManagementSystem* ams) inline int32 ams_calculate_chunks(const AssetManagementSystem* ams, int32 byte_size) { - return (int32) CEIL_DIV(byte_size, ams->asset_data_memory.chunk_size); + return (int32) CEIL_DIV(byte_size + ams->overhead, ams->asset_data_memory.chunk_size); } inline @@ -122,8 +125,6 @@ int64 ams_get_buffer_size(int32 count, int32 chunk_size) inline void ams_update_stats(AssetManagementSystem* ams) { - // @bug We should check the hash map or the memory status, we could still have old values in here - ams->vram_size = 0; ams->ram_size = 0; ams->asset_count = 0; @@ -197,10 +198,14 @@ Asset* ams_get_asset(AssetManagementSystem* ams, const char* key) { HashEntry* entry = hashmap_get_entry(&ams->hash_map, key); - // @bug entry->value seems to be an address outside of any known buffer, how? DEBUG_MEMORY_READ( (uint64) (entry ? (Asset *) entry->value : 0), - entry ? ((Asset *) entry->value)->ram_size + sizeof(Asset) : 0 + entry ? sizeof(Asset) : 0 + ); + + DEBUG_MEMORY_READ( + (uint64) (entry ? (Asset *) entry->value : 0), + entry ? ((Asset *) entry->value)->self + ((Asset *) entry->value)->ram_size : 0 ); return entry ? (Asset *) entry->value : NULL; @@ -211,10 +216,14 @@ Asset* ams_get_asset(AssetManagementSystem* ams, const char* key, uint64 hash) { HashEntry* entry = hashmap_get_entry(&ams->hash_map, key, hash); - // @bug entry->value seems to be an address outside of any known buffer, how? DEBUG_MEMORY_READ( (uint64) (entry ? (Asset *) entry->value : 0), - entry ? ((Asset *) entry->value)->ram_size + sizeof(Asset) : 0 + entry ? sizeof(Asset) : 0 + ); + + DEBUG_MEMORY_READ( + (uint64) (entry ? (Asset *) entry->value : 0), + entry ? ((Asset *) entry->value)->self + ((Asset *) entry->value)->ram_size : 0 ); return entry ? (Asset *) entry->value : NULL; @@ -248,6 +257,7 @@ Asset* thrd_ams_get_asset(AssetManagementSystem* ams, const char* key, uint64 ha // @todo implement defragment command to optimize memory layout since the memory layout will become fragmented over time // @performance This function is VERY important, check if we can optimize it +// We could probably optimize the threaded version by adding a atomic_set_release(asset->is_loaded, true); Asset* ams_reserve_asset(AssetManagementSystem* ams, const char* name, uint32 elements = 1) { int64 free_asset = chunk_reserve(&ams->asset_memory, elements, true); @@ -315,6 +325,48 @@ Asset* ams_reserve_asset(AssetManagementSystem* ams, const char* name, uint32 el return asset; } +void ams_garbage_collect(AssetManagementSystem* ams, uint64 time, uint64 dt) +{ + Asset* asset = ams->first; + + while (asset) { + // @performance We cannot just remove ram and keep vram. This is a huge flaw + if (asset->can_garbage_collect_ram && asset->can_garbage_collect_vram && time - asset->last_access <= dt) { + ams_free_asset(ams, asset); + } + + asset = asset->next; + } +} + +void ams_garbage_collect(AssetManagementSystem* ams) +{ + Asset* asset = ams->first; + + while (asset) { + // @performance We cannot just remove ram and keep vram. This is a huge flaw + if (asset->can_garbage_collect_ram && asset->can_garbage_collect_vram) { + ams_free_asset(ams, asset); + } + + asset = asset->next; + } +} + +void thrd_ams_garbage_collect(AssetManagementSystem* ams, uint64 time, uint64 dt) +{ + pthread_mutex_lock(&ams->mutex); + ams_garbage_collect(ams, time, dt); + pthread_mutex_unlock(&ams->mutex); +} + +void thrd_ams_garbage_collect(AssetManagementSystem* ams) +{ + pthread_mutex_lock(&ams->mutex); + ams_garbage_collect(ams); + pthread_mutex_unlock(&ams->mutex); +} + Asset* thrd_ams_reserve_asset(AssetManagementSystem* ams, const char* name, uint32 elements = 1) { pthread_mutex_lock(&ams->mutex); Asset* asset = ams_reserve_asset(ams, name, elements); diff --git a/audio/AudioMixer.h b/audio/AudioMixer.h index 5f5c9c7..0e8b68f 100644 --- a/audio/AudioMixer.h +++ b/audio/AudioMixer.h @@ -89,17 +89,18 @@ struct AudioMixer { // @todo add mutex for locking and create threaded functions // do we need a condition or semaphore? + // Wait, why do we even need threading? Isn't the threading handled by the file loading }; bool audio_mixer_is_active(AudioMixer* mixer) { if (mixer->state_new == AUDIO_MIXER_STATE_ACTIVE - && atomic_get((int32 *) &mixer->state_new) == AUDIO_MIXER_STATE_ACTIVE + && atomic_get_relaxed((int32 *) &mixer->state_new) == AUDIO_MIXER_STATE_ACTIVE ) { return true; } AudioMixerState mixer_state; - if ((mixer_state = (AudioMixerState) atomic_get((int32 *) &mixer->state_new)) != mixer->state_old) { + if ((mixer_state = (AudioMixerState) atomic_get_relaxed((int32 *) &mixer->state_new)) != mixer->state_old) { if (mixer_state != AUDIO_MIXER_STATE_UNINITIALIZED) { audio_load( mixer->window, diff --git a/hash/GeneralHash.h b/hash/GeneralHash.h index 007f26d..7538742 100644 --- a/hash/GeneralHash.h +++ b/hash/GeneralHash.h @@ -23,6 +23,7 @@ uint64 hash_djb2(const char* key) { return hash; } +inline uint64 hash_sdbm(const byte* key) { uint64 hash = 0; @@ -35,6 +36,7 @@ uint64 hash_sdbm(const byte* key) return hash; } +inline uint64 hash_lose_lose(const byte* key) { uint64 hash = 0; @@ -47,7 +49,9 @@ uint64 hash_lose_lose(const byte* key) return hash; } -uint64 hash_polynomial_rolling(const char* str) { +inline +uint64 hash_polynomial_rolling(const char* str) +{ const int32 p = 31; const int32 m = 1000000009; uint64 hash = 0; @@ -62,7 +66,9 @@ uint64 hash_polynomial_rolling(const char* str) { return hash; } -uint64 hash_fnv1a(const char* str) { +inline +uint64 hash_fnv1a(const char* str) +{ const uint64 FNV_OFFSET_BASIS = 14695981039346656037UL; const uint64 FNV_PRIME = 1099511628211UL; uint64 hash = FNV_OFFSET_BASIS; @@ -76,34 +82,144 @@ uint64 hash_fnv1a(const char* str) { return hash; } +inline uint32 hash_oat(const char* str) { - uint32 h = 0; + uint32 hash = 0; while(*str) { - h += *str++; - h += (h << 10); - h ^= (h >> 6); + hash += *str++; + hash += (hash << 10); + hash ^= (hash >> 6); } - h += (h << 3); - h ^= (h >> 11); - h += (h << 15); + hash += (hash << 3); + hash ^= (hash >> 11); + hash += (hash << 15); - return h; + return hash; } +inline uint32 hash_ejb(const char* str) { const uint32 PRIME1 = 37; const uint32 PRIME2 = 1048583; - uint32 h = 0; + uint32 hash = 0; while (*str) { - h = h * PRIME1 ^ (*str++ - ' '); + hash = hash * PRIME1 ^ (*str++ - ' '); } - return h % PRIME2; + return hash % PRIME2; +} + +//////////////////////////////////// +// Seeded hash functions +//////////////////////////////////// + +inline constexpr +uint64 hash_djb2_seeded(const char* key, int32 seed) +{ + uint64 hash = 5381; + int32 c; + + while ((c = *key++)) { + hash = ((hash << 5) + hash) + c; + } + + return hash ^ (seed + (seed << 6) + (seed >> 2)); +} + +inline +uint64 hash_sdbm_seeded(const char* key, int32 seed) +{ + uint64 hash = 0; + int32 c; + + while (c = *key++) { + hash = c + (hash << 6) + (hash << 16) - hash; + } + + return hash ^ (seed + (seed << 6) + (seed >> 2)); +} + +inline +uint64 hash_lose_lose_seeded(const char* key, int32 seed) +{ + uint64 hash = 0; + int32 c; + + while (c = *key++) { + hash += c; + } + + return hash ^ (seed + (seed << 6) + (seed >> 2)); +} + +inline +uint64 hash_polynomial_rolling_seeded(const char* str, int32 seed) +{ + const int32 p = 31; + const int32 m = 1000000009; + uint64 hash = 0; + uint64 p_pow = 1; + + while (*str) { + hash = (hash + (*str - 'a' + 1) * p_pow) % m; + p_pow = (p_pow * p) % m; + str++; + } + + return hash ^ (seed + (seed << 6) + (seed >> 2)); +} + +inline +uint64 hash_fnv1a_seeded(const char* str, int32 seed) +{ + const uint64 FNV_OFFSET_BASIS = 14695981039346656037UL; + const uint64 FNV_PRIME = 1099511628211UL; + uint64 hash = FNV_OFFSET_BASIS; + + while (*str) { + hash ^= (byte) *str; + hash *= FNV_PRIME; + str++; + } + + return hash ^ (seed + (seed << 6) + (seed >> 2)); +} + +inline +uint64 hash_oat_seeded(const char* str, int32 seed) +{ + uint64 hash = 0; + + while(*str) { + hash += *str++; + hash += (hash << 10); + hash ^= (hash >> 6); + } + + hash += (hash << 3); + hash ^= (hash >> 11); + hash += (hash << 15); + + return hash ^ (seed + (seed << 6) + (seed >> 2));; +} + +inline +uint64 hash_ejb_seeded(const char* str, int32 seed) +{ + const uint64 PRIME1 = 37; + const uint64 PRIME2 = 1048583; + uint64 hash = 0; + + while (*str) { + hash = hash * PRIME1 ^ (*str++ - ' '); + } + + return (hash % PRIME2) ^ (seed + (seed << 6) + (seed >> 2));; } #endif \ No newline at end of file diff --git a/log/Debug.cpp b/log/Debug.cpp index aa761ad..5d5c7c0 100644 --- a/log/Debug.cpp +++ b/log/Debug.cpp @@ -161,19 +161,19 @@ void update_timing_stat_reset(uint32 stat) inline void reset_counter(int32 id) { - atomic_set(&debug_container->counter[id], 0); + atomic_set_acquire(&debug_container->counter[id], 0); } inline void log_increment(int32 id, int64 by = 1) { - atomic_add(&debug_container->counter[id], by); + atomic_add_acquire(&debug_container->counter[id], by); } inline void log_counter(int32 id, int64 value) { - atomic_set(&debug_container->counter[id], value); + atomic_set_acquire(&debug_container->counter[id], value); } // @todo don't use a pointer to this should be in a global together with other logging data (see Log.h) @@ -234,9 +234,9 @@ void debug_memory_log(uint64 start, uint64 size, int32 type, const char* functio return; } - uint64 idx = atomic_fetch_add(&mem->action_idx, 1); + uint64 idx = atomic_fetch_add_relaxed(&mem->action_idx, 1); if (idx >= ARRAY_COUNT(mem->last_action)) { - atomic_set(&mem->action_idx, 1); + atomic_set_acquire(&mem->action_idx, 1); idx %= ARRAY_COUNT(mem->last_action); } @@ -266,9 +266,9 @@ void debug_memory_reserve(uint64 start, uint64 size, int32 type, const char* fun return; } - uint64 idx = atomic_fetch_add(&mem->reserve_action_idx, 1); + uint64 idx = atomic_fetch_add_relaxed(&mem->reserve_action_idx, 1); if (idx >= ARRAY_COUNT(mem->reserve_action)) { - atomic_set(&mem->reserve_action_idx, 1); + atomic_set_acquire(&mem->reserve_action_idx, 1); idx %= ARRAY_COUNT(mem->last_action); } diff --git a/memory/BufferMemory.h b/memory/BufferMemory.h index 23d763f..8ca0557 100644 --- a/memory/BufferMemory.h +++ b/memory/BufferMemory.h @@ -170,9 +170,6 @@ int64 buffer_load(BufferMemory* buf, const byte* data) buf->alignment = SWAP_ENDIAN_LITTLE(*((int32 *) data)); data += sizeof(buf->alignment); - buf->element_alignment = SWAP_ENDIAN_LITTLE(*((int32 *) data)); - data += sizeof(buf->element_alignment); - // End buf->end = buf->memory + SWAP_ENDIAN_LITTLE(*((uint64 *) data)); data += sizeof(uint64); diff --git a/memory/ChunkMemory.h b/memory/ChunkMemory.h index bb7b939..86ffc8a 100644 --- a/memory/ChunkMemory.h +++ b/memory/ChunkMemory.h @@ -43,23 +43,6 @@ struct ChunkMemory { uint64* free; }; -struct ThreadedChunkMemory { - byte* memory; - - uint64 count; - uint64 size; - uint64 chunk_size; - int64 last_pos; - int32 alignment; - - // length = count - // free describes which locations are used and which are free - uint64* free; - - pthread_mutex_t mutex; - pthread_cond_t cond; -}; - inline void chunk_alloc(ChunkMemory* buf, uint64 count, uint64 chunk_size, int32 alignment = 64) { @@ -69,11 +52,11 @@ void chunk_alloc(ChunkMemory* buf, uint64 count, uint64 chunk_size, int32 alignm chunk_size = ROUND_TO_NEAREST(chunk_size, alignment); buf->memory = alignment < 2 - ? (byte *) platform_alloc(count * chunk_size + sizeof(buf->free) * CEIL_DIV(count, 64)) - : (byte *) platform_alloc_aligned(count * chunk_size + sizeof(buf->free) * CEIL_DIV(count, 64), alignment); + ? (byte *) platform_alloc(count * chunk_size + sizeof(uint64) * CEIL_DIV(count, 64)) + : (byte *) platform_alloc_aligned(count * chunk_size + sizeof(uint64) * CEIL_DIV(count, 64), alignment); buf->count = count; - buf->size = count * chunk_size + sizeof(buf->free) * CEIL_DIV(count, 64); + buf->size = count * chunk_size + sizeof(uint64) * CEIL_DIV(count, 64); buf->chunk_size = chunk_size; buf->last_pos = -1; buf->alignment = alignment; @@ -86,17 +69,6 @@ void chunk_alloc(ChunkMemory* buf, uint64 count, uint64 chunk_size, int32 alignm DEBUG_MEMORY_INIT((uint64) buf->memory, buf->size); } -inline -void chunk_free(ChunkMemory* buf) -{ - DEBUG_MEMORY_DELETE((uint64) buf->memory, buf->size); - if (buf->alignment < 2) { - platform_free((void **) &buf->memory); - } else { - platform_aligned_free((void **) &buf->memory); - } -} - inline void chunk_init(ChunkMemory* buf, BufferMemory* data, uint64 count, uint64 chunk_size, int32 alignment = 64) { @@ -105,10 +77,10 @@ void chunk_init(ChunkMemory* buf, BufferMemory* data, uint64 count, uint64 chunk chunk_size = ROUND_TO_NEAREST(chunk_size, alignment); - buf->memory = buffer_get_memory(data, count * chunk_size + sizeof(buf->free) * CEIL_DIV(count, 64)); + buf->memory = buffer_get_memory(data, count * chunk_size + sizeof(uint64) * CEIL_DIV(count, 64)); buf->count = count; - buf->size = count * chunk_size + sizeof(buf->free) * CEIL_DIV(count, 64); + buf->size = count * chunk_size + sizeof(uint64) * CEIL_DIV(count, 64); buf->chunk_size = chunk_size; buf->last_pos = -1; buf->alignment = alignment; @@ -134,7 +106,7 @@ void chunk_init(ChunkMemory* buf, byte* data, uint64 count, uint64 chunk_size, i buf->memory = data; buf->count = count; - buf->size = count * chunk_size + sizeof(buf->free) * CEIL_DIV(count, 64); + buf->size = count * chunk_size + sizeof(uint64) * CEIL_DIV(count, 64); buf->chunk_size = chunk_size; buf->last_pos = -1; buf->alignment = alignment; @@ -148,10 +120,22 @@ void chunk_init(ChunkMemory* buf, byte* data, uint64 count, uint64 chunk_size, i DEBUG_MEMORY_RESERVE((uint64) buf->memory, buf->size, 187); } +inline +void chunk_free(ChunkMemory* buf) +{ + DEBUG_MEMORY_DELETE((uint64) buf->memory, buf->size); + if (buf->alignment < 2) { + platform_free((void **) &buf->memory); + } else { + platform_aligned_free((void **) &buf->memory); + } +} + inline byte* chunk_get_element(ChunkMemory* buf, uint64 element, bool zeroed = false) { byte* offset = buf->memory + element * buf->chunk_size; + ASSERT_SIMPLE(offset); if (zeroed) { memset((void *) offset, 0, buf->chunk_size); @@ -159,8 +143,6 @@ byte* chunk_get_element(ChunkMemory* buf, uint64 element, bool zeroed = false) DEBUG_MEMORY_READ((uint64) offset, buf->chunk_size); - ASSERT_SIMPLE(offset); - return offset; } diff --git a/memory/Heap.h b/memory/Heap.h index 0d27541..f665b99 100644 --- a/memory/Heap.h +++ b/memory/Heap.h @@ -132,6 +132,8 @@ void heap_pop(Heap* heap, void* out) { return; } + DEBUG_MEMORY_READ((uint64) heap->elements, heap->element_size); + memcpy(out, heap->elements, heap->element_size); void* last_element = heap->elements + ((heap->size - 1) * heap->element_size); memcpy(heap->elements, last_element, heap->element_size); @@ -141,6 +143,8 @@ void heap_pop(Heap* heap, void* out) { inline void* heap_peek(Heap* heap) { + DEBUG_MEMORY_READ((uint64) heap->elements, heap->element_size); + return heap->elements; } diff --git a/memory/Queue.h b/memory/Queue.h index 458bc97..b3e6fcf 100644 --- a/memory/Queue.h +++ b/memory/Queue.h @@ -12,35 +12,57 @@ #include "../stdlib/Types.h" #include "RingMemory.h" -typedef RingMemory Queue; +// WARNING: Structure needs to be the same as RingMemory +struct Queue { + byte* memory; + byte* end; -// @question Consider to add the element size into the Queue struct -> we don't need to pass it after initialization as parameter + byte* head; + + // This variable is usually only used by single producer/consumer code mostly found in threads. + // One thread inserts elements -> updates head + // The other thread reads elements -> updates tail + // This code itself doesn't change this variable + byte* tail; + + uint64 size; + uint32 alignment; + + // The ring memory ends here + uint32 element_size; +}; inline -void queue_alloc(Queue* queue, uint64 element_count, uint32 element_size, int32 alignment = 64) +void queue_alloc(Queue* queue, uint64 element_count, uint32 element_size, uint32 alignment = 64) { - // @bug The alignment needs to be included in EVERY element - ring_alloc(queue, element_count * element_size, alignment); + element_size = ROUND_TO_NEAREST(element_size, alignment); + + ring_alloc((RingMemory *) queue, element_count * element_size, alignment); + queue->element_size = element_size; } inline -void queue_init(Queue* queue, BufferMemory* buf, uint64 element_count, uint32 element_size, int32 alignment = 64) +void queue_init(Queue* queue, BufferMemory* buf, uint64 element_count, uint32 element_size, uint32 alignment = 64) { - // @bug The alignment needs to be included in EVERY element - ring_init(queue, buf, element_count * element_size, alignment); + element_size = ROUND_TO_NEAREST(element_size, alignment); + + ring_init((RingMemory *) queue, buf, element_count * element_size, alignment); + queue->element_size = element_size; } inline -void queue_init(Queue* queue, byte* buf, uint64 element_count, uint32 element_size, int32 alignment = 64) +void queue_init(Queue* queue, byte* buf, uint64 element_count, uint32 element_size, uint32 alignment = 64) { - // @bug The alignment needs to be included in EVERY element - ring_init(queue, buf, element_count * element_size, alignment); + element_size = ROUND_TO_NEAREST(element_size, alignment); + + ring_init((RingMemory *) queue, buf, element_count * element_size, alignment); + queue->element_size = element_size; } inline void queue_free(Queue* queue) { - ring_free(queue); + ring_free((RingMemory *) queue); } inline @@ -54,60 +76,145 @@ bool queue_set_empty(Queue* queue) { } inline -bool queue_is_full(Queue* queue, uint64 size, byte aligned = 0) { - return !ring_commit_safe((RingMemory *) queue, size, aligned); +bool queue_is_full(Queue* queue) { + return !ring_commit_safe((RingMemory *) queue, queue->element_size, queue->alignment); } -// Conditional Lock inline -byte* queue_enqueue(Queue* queue, byte* data, uint64 size, byte aligned = 0) +void queue_enqueue_unique(ThreadedQueue* queue, const byte* data) { - byte* mem = ring_get_memory_nomove(queue, size, aligned); - memcpy(mem, data, size); - ring_move_pointer(queue, &queue->head, size, aligned); + ASSERT_SIMPLE((uint64_t) data % 4 == 0); + + byte* tail = queue->tail; + while (tail != queue->tail) { + ASSERT_SIMPLE((uint64_t) tail % 4 == 0); + + // @performance we could probably make this faster since we don't need to compare the entire range + if (is_equal_aligned(tail, data, queue->element_size) == 0) { + return; + } + + ring_move_pointer((RingMemory *) queue, &tail, queue->element_size, queue->alignment); + } + + if (!ring_commit_safe((RingMemory *) queue, queue->element_size, queue->alignment)) { + return; + } + + byte* mem = ring_get_memory((RingMemory *) queue, queue->element_size, queue->alignment); + memcpy(mem, data, queue->element_size); +} + +inline +byte* queue_enqueue(Queue* queue, byte* data) +{ + byte* mem = ring_get_memory_nomove((RingMemory *) queue, queue->element_size, queue->alignment); + memcpy(mem, data, queue->element_size); + ring_move_pointer((RingMemory *) queue, &queue->head, queue->element_size, queue->alignment); return mem; } inline -byte* queue_enqueue_start(Queue* queue, uint64 size, byte aligned = 0) +byte* queue_enqueue_safe(Queue* queue, byte* data) { - return ring_get_memory_nomove(queue, size, aligned); + if(queue_is_full(queue)) { + return NULL; + } + + byte* mem = ring_get_memory_nomove((RingMemory *) queue, queue->element_size, queue->alignment); + memcpy(mem, data, queue->element_size); + ring_move_pointer((RingMemory *) queue, &queue->head, queue->element_size, queue->alignment); + + return mem; +} + +// WARNING: Only useful for single producer single consumer +inline +byte* queue_enqueue_wait_atomic(Queue* queue, byte* data) +{ + while (!ring_commit_safe_atomic((RingMemory *) queue, queue->alignment)) {} + + byte* mem = ring_get_memory_nomove((RingMemory *) queue, queue->element_size, queue->alignment); + memcpy(mem, data, queue->element_size); + ring_move_pointer((RingMemory *) queue, &queue->head, queue->element_size, queue->alignment); + + return mem; +} + +// WARNING: Only useful for single producer single consumer +inline +byte* queue_enqueue_safe_atomic(Queue* queue, byte* data) +{ + if (!ring_commit_safe_atomic((RingMemory *) queue, queue->alignment)) { + return NULL; + } + + byte* mem = ring_get_memory_nomove((RingMemory *) queue, queue->element_size, queue->alignment); + memcpy(mem, data, queue->element_size); + ring_move_pointer((RingMemory *) queue, &queue->head, queue->element_size, queue->alignment); + + return mem; } inline -void queue_enqueue_end(Queue* queue, uint64 size, byte aligned = 0) +byte* queue_enqueue_start(Queue* queue) { - ring_move_pointer(queue, &queue->head, size, aligned); + return ring_get_memory_nomove((RingMemory *) queue, queue->element_size, queue->alignment); } inline -bool queue_dequeue(Queue* queue, byte* data, uint64 size, byte aligned = 0) +void queue_enqueue_end(Queue* queue) +{ + ring_move_pointer((RingMemory *) queue, &queue->head, queue->element_size, queue->alignment); +} + +inline +bool queue_dequeue(Queue* queue, byte* data) { if (queue->head == queue->tail) { return false; } - if (size == 4) { + if (queue->element_size == 4) { *((int32 *) data) = *((int32 *) queue->tail); } else { - memcpy(data, queue->tail, size); + memcpy(data, queue->tail, queue->element_size); } - ring_move_pointer(queue, &queue->tail, size, aligned); + ring_move_pointer((RingMemory *) queue, &queue->tail, queue->element_size, queue->alignment); + + return true; +} + +// WARNING: Only useful for single producer single consumer +inline +bool queue_dequeue_atomic(Queue* queue, byte* data) +{ + if (atomic_get_relaxed((uint64 *) &queue->head) == (uint64) queue->tail) { + return false; + } + + if (queue->element_size == 4) { + *((int32 *) data) = *((int32 *) queue->tail); + } else { + memcpy(data, queue->tail, queue->element_size); + } + + ring_move_pointer((RingMemory *) queue, &queue->tail, queue->element_size, queue->alignment); return true; } inline -byte* queue_dequeue_keep(Queue* queue, uint64 size, byte aligned = 0) +byte* queue_dequeue_keep(Queue* queue) { if (queue->head == queue->tail) { return NULL; } byte* data = queue->tail; - ring_move_pointer(queue, &queue->tail, size, aligned); + ring_move_pointer((RingMemory *) queue, &queue->tail, queue->element_size, queue->alignment); return data; } @@ -119,9 +226,9 @@ byte* queue_dequeue_start(Queue* queue) } inline -void queue_dequeue_end(Queue* queue, uint64 size, byte aligned = 0) +void queue_dequeue_end(Queue* queue) { - ring_move_pointer(queue, &queue->tail, size, aligned); + ring_move_pointer((RingMemory *) queue, &queue->tail, queue->element_size, queue->alignment); } #endif \ No newline at end of file diff --git a/memory/RingMemory.h b/memory/RingMemory.h index eae684f..8188728 100644 --- a/memory/RingMemory.h +++ b/memory/RingMemory.h @@ -24,12 +24,16 @@ #include "../platform/win32/Allocator.h" #include "../platform/win32/threading/ThreadDefines.h" #include "../platform/win32/threading/Semaphore.h" + #include "../platform/win32/threading/Atomic.h" #elif __linux__ #include "../platform/linux/Allocator.h" #include "../platform/linux/threading/ThreadDefines.h" #include "../platform/linux/threading/Semaphore.h" + #include "../platform/linux/threading/Atomic.h" #endif +// WARNING: Changing this structure has effects on other data structures (e.g. Queue) +// When chaning make sure you understand what you are doing struct RingMemory { byte* memory; byte* end; @@ -43,14 +47,11 @@ struct RingMemory { byte* tail; uint64 size; - int32 alignment; - int32 element_alignment; + uint32 alignment; }; -// @bug alignment should also include the end point, not just the start - inline -void ring_alloc(RingMemory* ring, uint64 size, int32 alignment = 64) +void ring_alloc(RingMemory* ring, uint64 size, uint32 alignment = 64) { ASSERT_SIMPLE(size); @@ -63,7 +64,6 @@ void ring_alloc(RingMemory* ring, uint64 size, int32 alignment = 64) ring->tail = ring->memory; ring->size = size; ring->alignment = alignment; - ring->element_alignment = 0; memset(ring->memory, 0, ring->size); @@ -71,7 +71,7 @@ void ring_alloc(RingMemory* ring, uint64 size, int32 alignment = 64) } inline -void ring_init(RingMemory* ring, BufferMemory* buf, uint64 size, int32 alignment = 64) +void ring_init(RingMemory* ring, BufferMemory* buf, uint64 size, uint32 alignment = 64) { ASSERT_SIMPLE(size); @@ -82,26 +82,23 @@ void ring_init(RingMemory* ring, BufferMemory* buf, uint64 size, int32 alignment ring->tail = ring->memory; ring->size = size; ring->alignment = alignment; - ring->element_alignment = 0; DEBUG_MEMORY_INIT((uint64) ring->memory, ring->size); DEBUG_MEMORY_RESERVE((uint64) ring->memory, ring->size, 187); } inline -void ring_init(RingMemory* ring, byte* buf, uint64 size, int32 alignment = 64) +void ring_init(RingMemory* ring, byte* buf, uint64 size, uint32 alignment = 64) { ASSERT_SIMPLE(size); - // @bug what if an alignment is defined? - ring->memory = buf; + ring->memory = (byte *) ROUND_TO_NEAREST((uintptr_t) buf, alignment); ring->end = ring->memory + size; ring->head = ring->memory; ring->tail = ring->memory; ring->size = size; ring->alignment = alignment; - ring->element_alignment = 0; memset(ring->memory, 0, ring->size); @@ -122,10 +119,6 @@ void ring_free(RingMemory* ring) inline byte* ring_calculate_position(const RingMemory* ring, uint64 size, byte aligned = 0) { - if (aligned == 0) { - aligned = (byte) OMS_MAX(ring->element_alignment, 1); - } - byte* head = ring->head; if (aligned > 1) { @@ -158,9 +151,9 @@ void ring_move_pointer(RingMemory* ring, byte** pos, uint64 size, byte aligned = { ASSERT_SIMPLE(size <= ring->size); - if (aligned == 0) { - aligned = (byte) OMS_MAX(ring->element_alignment, 1); - } + // Actually, we cannot be sure that this is a read, it could also be a write. + // However, we better do it once here than manually in every place that uses this function + DEBUG_MEMORY_READ((uint64) *pos, size); if (aligned > 1) { uintptr_t address = (uintptr_t) *pos; @@ -184,10 +177,6 @@ byte* ring_get_memory(RingMemory* ring, uint64 size, byte aligned = 0, bool zero { ASSERT_SIMPLE(size <= ring->size); - if (aligned == 0) { - aligned = (byte) OMS_MAX(ring->element_alignment, 1); - } - if (aligned > 1) { uintptr_t address = (uintptr_t) ring->head; ring->head += (aligned - (address& (aligned - 1))) % aligned; @@ -222,10 +211,6 @@ byte* ring_get_memory_nomove(RingMemory* ring, uint64 size, byte aligned = 0, bo { ASSERT_SIMPLE(size <= ring->size); - if (aligned == 0) { - aligned = (byte) OMS_MAX(ring->element_alignment, 1); - } - byte* pos = ring->head; if (aligned > 1) { @@ -285,6 +270,27 @@ bool ring_commit_safe(const RingMemory* ring, uint64 size, byte aligned = 0) } } +inline +bool ring_commit_safe_atomic(const RingMemory* ring, uint64 size, byte aligned = 0) +{ + // aligned * 2 since that should be the maximum overhead for an element + // @bug could this result in a case where the ring is considered empty/full (false positive/negative)? + // The "correct" version would probably to use ring_move_pointer in some form + uint64 max_mem_required = size + aligned * 2; + + uint64 tail = atomic_get_relaxed((uint64 *) &ring->tail); + uint64 head = atomic_get_relaxed((uint64 *) &ring->head); + + if (tail < head) { + return ((uint64) (ring->end - head)) > max_mem_required + || ((uint64) (tail - (uint64) ring->memory)) > max_mem_required; + } else if (tail > head) { + return ((uint64) (tail - head)) > max_mem_required; + } else { + return true; + } +} + inline void ring_force_head_update(const RingMemory* ring) { @@ -314,9 +320,6 @@ int64 ring_dump(const RingMemory* ring, byte* data) *((int32 *) data) = SWAP_ENDIAN_LITTLE(ring->alignment); data += sizeof(ring->alignment); - *((int32 *) data) = SWAP_ENDIAN_LITTLE(ring->element_alignment); - data += sizeof(ring->element_alignment); - // tail/End *((uint64 *) data) = SWAP_ENDIAN_LITTLE((uint64) (ring->tail - ring->memory)); data += sizeof(ring->tail); diff --git a/memory/ThreadedChunkMemory.h b/memory/ThreadedChunkMemory.h new file mode 100644 index 0000000..9987d4b --- /dev/null +++ b/memory/ThreadedChunkMemory.h @@ -0,0 +1,42 @@ +/** + * Jingga + * + * @copyright Jingga + * @license OMS License 2.0 + * @version 1.0.0 + * @link https://jingga.app + */ +#ifndef TOS_MEMORY_THREADED_CHUNK_MEMORY_H +#define TOS_MEMORY_THREADED_CHUNK_MEMORY_H + +#include +#include "../stdlib/Types.h" + +#if _WIN32 + #include "../platform/win32/threading/Thread.h" +#elif __linux__ + #include "../platform/linux/threading/Thread.h" +#endif + +struct ThreadedChunkMemory { + byte* memory; + + uint64 count; + uint64 size; + uint64 chunk_size; + int64 last_pos; + int32 alignment; + + // length = count + // free describes which locations are used and which are free + uint64* free; + + // Chunk implementation ends here + // The completeness indicates if the data is completely written to + uint64* completeness; + + pthread_mutex_t mutex; + pthread_cond_t cond; +}; + +#endif \ No newline at end of file diff --git a/memory/ThreadedQueue.h b/memory/ThreadedQueue.h index 44c8d4d..e21a337 100644 --- a/memory/ThreadedQueue.h +++ b/memory/ThreadedQueue.h @@ -36,8 +36,10 @@ struct ThreadedQueue { byte* tail; uint64 size; - int32 alignment; - int32 element_alignment; + uint32 alignment; + + // The ring memory ends here + uint32 element_size; // We support both conditional locking and semaphore locking // These values are not initialized and not used unless you use the queue @@ -48,26 +50,14 @@ struct ThreadedQueue { sem_t full; }; -// @question Consider to add the element size into the Queue struct -> we don't need to pass it after initialization as parameter - inline -void thrd_queue_alloc(ThreadedQueue* queue, uint32 element_count, uint64 element_size, int32 alignment = 64) +void thrd_queue_alloc(ThreadedQueue* queue, uint32 element_count, uint64 element_size, uint32 alignment = 64) { - // @bug The alignment needs to be included in EVERY element + element_size = ROUND_TO_NEAREST(element_size, alignment); + ring_alloc((RingMemory *) queue, element_count * element_size, alignment); - pthread_mutex_init(&queue->mutex, NULL); - pthread_cond_init(&queue->cond, NULL); - - sem_init(&queue->empty, element_count); - sem_init(&queue->full, 0); -} - -inline -void thrd_queue_init(ThreadedQueue* queue, BufferMemory* buf, uint32 element_count, uint64 element_size, int32 alignment = 64) -{ - // @bug The alignment needs to be included in EVERY element - ring_init((RingMemory *) queue, buf, element_count * element_size, alignment); + queue->element_size = element_size; pthread_mutex_init(&queue->mutex, NULL); pthread_cond_init(&queue->cond, NULL); @@ -77,11 +67,30 @@ void thrd_queue_init(ThreadedQueue* queue, BufferMemory* buf, uint32 element_cou } inline -void thrd_queue_init(ThreadedQueue* queue, byte* buf, uint32 element_count, uint64 element_size, int32 alignment = 64) +void thrd_queue_init(ThreadedQueue* queue, BufferMemory* buf, uint32 element_count, uint64 element_size, uint32 alignment = 64) { - // @bug The alignment needs to be included in EVERY element + element_size = ROUND_TO_NEAREST(element_size, alignment); + ring_init((RingMemory *) queue, buf, element_count * element_size, alignment); + queue->element_size = element_size; + + pthread_mutex_init(&queue->mutex, NULL); + pthread_cond_init(&queue->cond, NULL); + + sem_init(&queue->empty, element_count); + sem_init(&queue->full, 0); +} + +inline +void thrd_queue_init(ThreadedQueue* queue, byte* buf, uint32 element_count, uint64 element_size, uint32 alignment = 64) +{ + element_size = ROUND_TO_NEAREST(element_size, alignment); + + ring_init((RingMemory *) queue, buf, element_count * element_size, alignment); + + queue->element_size = element_size; + pthread_mutex_init(&queue->mutex, NULL); pthread_cond_init(&queue->cond, NULL); @@ -101,7 +110,7 @@ void thrd_queue_free(ThreadedQueue* queue) // @todo Create enqueue_unique and enqueue_unique_sem inline -void thrd_queue_enqueue_unique_wait(ThreadedQueue* queue, const byte* data, uint64 size, byte aligned = 0) +void thrd_queue_enqueue_unique_wait(ThreadedQueue* queue, const byte* data) { ASSERT_SIMPLE((uint64_t) data % 4 == 0); pthread_mutex_lock(&queue->mutex); @@ -111,28 +120,28 @@ void thrd_queue_enqueue_unique_wait(ThreadedQueue* queue, const byte* data, uint ASSERT_SIMPLE((uint64_t) tail % 4 == 0); // @performance we could probably make this faster since we don't need to compare the entire range - if (is_equal_aligned(tail, data, size) == 0) { + if (is_equal_aligned(tail, data, queue->element_size) == 0) { pthread_mutex_unlock(&queue->mutex); return; } - ring_move_pointer((RingMemory *) queue, &tail, size, aligned); + ring_move_pointer((RingMemory *) queue, &tail, queue->element_size, queue->alignment); } - while (!ring_commit_safe((RingMemory *) queue, size, aligned)) { + while (!ring_commit_safe((RingMemory *) queue, queue->element_size, queue->alignment)) { pthread_cond_wait(&queue->cond, &queue->mutex); } - byte* mem = ring_get_memory((RingMemory *) queue, size, aligned); - memcpy(mem, data, size); + byte* mem = ring_get_memory((RingMemory *) queue, queue->element_size, queue->alignment); + memcpy(mem, data, queue->element_size); pthread_cond_signal(&queue->cond); pthread_mutex_unlock(&queue->mutex); } inline -void thrd_queue_enqueue_unique(ThreadedQueue* queue, const byte* data, uint64 size, byte aligned = 0) +void thrd_queue_enqueue_unique(ThreadedQueue* queue, const byte* data) { ASSERT_SIMPLE((uint64_t) data % 4 == 0); pthread_mutex_lock(&queue->mutex); @@ -142,23 +151,23 @@ void thrd_queue_enqueue_unique(ThreadedQueue* queue, const byte* data, uint64 si ASSERT_SIMPLE((uint64_t) tail % 4 == 0); // @performance we could probably make this faster since we don't need to compare the entire range - if (is_equal_aligned(tail, data, size) == 0) { + if (is_equal_aligned(tail, data, queue->element_size) == 0) { pthread_mutex_unlock(&queue->mutex); return; } - ring_move_pointer((RingMemory *) queue, &tail, size, aligned); + ring_move_pointer((RingMemory *) queue, &tail, queue->element_size, queue->alignment); } - if (!ring_commit_safe((RingMemory *) queue, size, aligned)) { + if (!ring_commit_safe((RingMemory *) queue, queue->element_size, queue->alignment)) { pthread_mutex_unlock(&queue->mutex); return; } - byte* mem = ring_get_memory((RingMemory *) queue, size, aligned); - memcpy(mem, data, size); + byte* mem = ring_get_memory((RingMemory *) queue, queue->element_size, queue->alignment); + memcpy(mem, data, queue->element_size); pthread_cond_signal(&queue->cond); pthread_mutex_unlock(&queue->mutex); @@ -166,49 +175,49 @@ void thrd_queue_enqueue_unique(ThreadedQueue* queue, const byte* data, uint64 si // Conditional Lock inline -void thrd_queue_enqueue(ThreadedQueue* queue, const byte* data, uint64 size, byte aligned = 0) +void thrd_queue_enqueue(ThreadedQueue* queue, const byte* data) { pthread_mutex_lock(&queue->mutex); - if (!ring_commit_safe((RingMemory *) queue, size, aligned)) { + if (!ring_commit_safe((RingMemory *) queue, queue->element_size, queue->alignment)) { pthread_mutex_unlock(&queue->mutex); return; } - byte* mem = ring_get_memory((RingMemory *) queue, size, aligned); - memcpy(mem, data, size); + byte* mem = ring_get_memory((RingMemory *) queue, queue->element_size, queue->alignment); + memcpy(mem, data, queue->element_size); pthread_cond_signal(&queue->cond); pthread_mutex_unlock(&queue->mutex); } inline -void thrd_queue_enqueue_wait(ThreadedQueue* queue, const byte* data, uint64 size, byte aligned = 0) +void thrd_queue_enqueue_wait(ThreadedQueue* queue, const byte* data) { pthread_mutex_lock(&queue->mutex); - while (!ring_commit_safe((RingMemory *) queue, size, aligned)) { + while (!ring_commit_safe((RingMemory *) queue, queue->element_size, queue->alignment)) { pthread_cond_wait(&queue->cond, &queue->mutex); } - byte* mem = ring_get_memory((RingMemory *) queue, size, aligned); - memcpy(mem, data, size); + byte* mem = ring_get_memory((RingMemory *) queue, queue->element_size, queue->alignment); + memcpy(mem, data, queue->element_size); pthread_cond_signal(&queue->cond); pthread_mutex_unlock(&queue->mutex); } inline -byte* thrd_queue_enqueue_start_wait(ThreadedQueue* queue, uint64 size, byte aligned = 0) +byte* thrd_queue_enqueue_start_wait(ThreadedQueue* queue) { pthread_mutex_lock(&queue->mutex); - while (!ring_commit_safe((RingMemory *) queue, size, aligned)) { + while (!ring_commit_safe((RingMemory *) queue, queue->element_size, queue->alignment)) { pthread_cond_wait(&queue->cond, &queue->mutex); } - return ring_get_memory((RingMemory *) queue, size, aligned); + return ring_get_memory((RingMemory *) queue, queue->element_size, queue->alignment); } inline @@ -219,7 +228,7 @@ void thrd_queue_enqueue_end_wait(ThreadedQueue* queue) } inline -bool thrd_queue_dequeue(ThreadedQueue* queue, byte* data, uint64 size, byte aligned = 0) +bool thrd_queue_dequeue(ThreadedQueue* queue, byte* data) { if (queue->head == queue->tail) { return false; @@ -233,12 +242,12 @@ bool thrd_queue_dequeue(ThreadedQueue* queue, byte* data, uint64 size, byte alig return false; } - if (size == 4) { + if (queue->element_size == 4) { *((int32 *) data) = *((int32 *) queue->tail); } else { - memcpy(data, queue->tail, size); + memcpy(data, queue->tail, queue->element_size); } - ring_move_pointer((RingMemory *) queue, &queue->tail, size, aligned); + ring_move_pointer((RingMemory *) queue, &queue->tail, queue->element_size, queue->alignment); pthread_cond_signal(&queue->cond); pthread_mutex_unlock(&queue->mutex); @@ -256,9 +265,9 @@ bool thrd_queue_empty(ThreadedQueue* queue) { } inline -bool thrd_queue_full(ThreadedQueue* queue, uint64 size, byte aligned = 0) { +bool thrd_queue_full(ThreadedQueue* queue) { pthread_mutex_lock(&queue->mutex); - bool is_full = !ring_commit_safe((RingMemory *) queue, size, aligned); + bool is_full = !ring_commit_safe((RingMemory *) queue, queue->element_size, queue->alignment); pthread_mutex_unlock(&queue->mutex); return is_full; @@ -266,7 +275,7 @@ bool thrd_queue_full(ThreadedQueue* queue, uint64 size, byte aligned = 0) { // Waits until a dequeue is available inline -void thrd_queue_dequeue_wait(ThreadedQueue* queue, byte* data, uint64 size, byte aligned = 0) +void thrd_queue_dequeue_wait(ThreadedQueue* queue, byte* data) { pthread_mutex_lock(&queue->mutex); @@ -274,8 +283,8 @@ void thrd_queue_dequeue_wait(ThreadedQueue* queue, byte* data, uint64 size, byte pthread_cond_wait(&queue->cond, &queue->mutex); } - memcpy(data, queue->tail, size); - ring_move_pointer((RingMemory *) queue, &queue->tail, size, aligned); + memcpy(data, queue->tail, queue->element_size); + ring_move_pointer((RingMemory *) queue, &queue->tail, queue->element_size, queue->alignment); pthread_cond_signal(&queue->cond); pthread_mutex_unlock(&queue->mutex); @@ -294,9 +303,9 @@ byte* thrd_queue_dequeue_start_wait(ThreadedQueue* queue) } inline -void thrd_queue_dequeue_end_wait(ThreadedQueue* queue, uint64 size, byte aligned = 0) +void thrd_queue_dequeue_end_wait(ThreadedQueue* queue) { - ring_move_pointer((RingMemory *) queue, &queue->tail, size, aligned); + ring_move_pointer((RingMemory *) queue, &queue->tail, queue->element_size, queue->alignment); pthread_cond_signal(&queue->cond); pthread_mutex_unlock(&queue->mutex); @@ -304,20 +313,20 @@ void thrd_queue_dequeue_end_wait(ThreadedQueue* queue, uint64 size, byte aligned // Semaphore Lock inline -void thrd_queue_enqueue_sem_wait(ThreadedQueue* queue, const byte* data, uint64 size, byte aligned = 0) +void thrd_queue_enqueue_sem_wait(ThreadedQueue* queue, const byte* data) { sem_wait(&queue->empty); pthread_mutex_lock(&queue->mutex); - byte* mem = ring_get_memory((RingMemory *) queue, size, aligned); - memcpy(mem, data, size); + byte* mem = ring_get_memory((RingMemory *) queue, queue->element_size, queue->alignment); + memcpy(mem, data, queue->element_size); pthread_mutex_unlock(&queue->mutex); sem_post(&queue->full); } inline -bool thrd_queue_enqueue_sem_timedwait(ThreadedQueue* queue, const byte* data, uint64 size, uint64 wait, byte aligned = 0) +bool thrd_queue_enqueue_sem_timedwait(ThreadedQueue* queue, const byte* data, uint64 wait) { if (sem_timedwait(&queue->empty, wait)) { return false; @@ -325,8 +334,8 @@ bool thrd_queue_enqueue_sem_timedwait(ThreadedQueue* queue, const byte* data, ui pthread_mutex_lock(&queue->mutex); - byte* mem = ring_get_memory((RingMemory *) queue, size, aligned); - memcpy(mem, data, size); + byte* mem = ring_get_memory((RingMemory *) queue, queue->element_size, queue->alignment); + memcpy(mem, data, queue->element_size); pthread_mutex_unlock(&queue->mutex); sem_post(&queue->full); @@ -335,12 +344,12 @@ bool thrd_queue_enqueue_sem_timedwait(ThreadedQueue* queue, const byte* data, ui } inline -byte* thrd_queue_enqueue_start_sem_wait(ThreadedQueue* queue, uint64 size, byte aligned = 0) +byte* thrd_queue_enqueue_start_sem_wait(ThreadedQueue* queue) { sem_wait(&queue->empty); pthread_mutex_lock(&queue->mutex); - return ring_get_memory((RingMemory *) queue, size, aligned); + return ring_get_memory((RingMemory *) queue, queue->element_size, queue->alignment); } inline @@ -351,20 +360,20 @@ void thrd_queue_enqueue_end_sem_wait(ThreadedQueue* queue) } inline -byte* thrd_queue_dequeue_sem_wait(ThreadedQueue* queue, byte* data, uint64 size, byte aligned = 0) +byte* thrd_queue_dequeue_sem_wait(ThreadedQueue* queue, byte* data) { sem_wait(&queue->full); pthread_mutex_lock(&queue->mutex); - memcpy(data, queue->tail, size); - ring_move_pointer((RingMemory *) queue, &queue->tail, size, aligned); + memcpy(data, queue->tail, queue->element_size); + ring_move_pointer((RingMemory *) queue, &queue->tail, queue->element_size, queue->alignment); pthread_mutex_unlock(&queue->mutex); sem_post(&queue->empty); } inline -bool thrd_queue_dequeue_sem_timedwait(ThreadedQueue* queue, byte* data, uint64 size, uint64 wait, byte aligned = 0) +bool thrd_queue_dequeue_sem_timedwait(ThreadedQueue* queue, byte* data, uint64 wait) { if (sem_timedwait(&queue->full, wait)) { return false; @@ -372,8 +381,8 @@ bool thrd_queue_dequeue_sem_timedwait(ThreadedQueue* queue, byte* data, uint64 s pthread_mutex_lock(&queue->mutex); - memcpy(data, queue->tail, size); - ring_move_pointer((RingMemory *) queue, &queue->tail, size, aligned); + memcpy(data, queue->tail, queue->element_size); + ring_move_pointer((RingMemory *) queue, &queue->tail, queue->element_size, queue->alignment); pthread_mutex_unlock(&queue->mutex); sem_post(&queue->empty); @@ -391,9 +400,9 @@ byte* thrd_queue_dequeue_start_sem_wait(ThreadedQueue* queue) } inline -void thrd_queue_dequeue_end_sem_wait(ThreadedQueue* queue, uint64 size, byte aligned = 0) +void thrd_queue_dequeue_end_sem_wait(ThreadedQueue* queue) { - ring_move_pointer((RingMemory *) queue, &queue->tail, size, aligned); + ring_move_pointer((RingMemory *) queue, &queue->tail, queue->element_size, queue->alignment); pthread_mutex_unlock(&queue->mutex); sem_post(&queue->empty); diff --git a/memory/ThreadedRingMemory.h b/memory/ThreadedRingMemory.h index b9faa6e..3dcbf83 100644 --- a/memory/ThreadedRingMemory.h +++ b/memory/ThreadedRingMemory.h @@ -33,8 +33,8 @@ struct ThreadedRingMemory { uint64 size; int32 alignment; - int32 element_alignment; + // The ring memory ends here pthread_mutex_t mutex; }; diff --git a/platform/linux/Allocator.h b/platform/linux/Allocator.h index 48968f2..430d9c7 100644 --- a/platform/linux/Allocator.h +++ b/platform/linux/Allocator.h @@ -45,7 +45,8 @@ void* platform_alloc_aligned(size_t size, int32 alignment) alignment = page_size; } - size += alignment - 1 + sizeof(void *) + sizeof(size_t); + size = ROUND_TO_NEAREST(size, alignment); + size += alignment + sizeof(void *) + sizeof(size_t); void* ptr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); ASSERT_SIMPLE(ptr != MAP_FAILED); diff --git a/platform/linux/threading/Atomic.h b/platform/linux/threading/Atomic.h index d822a0a..943472c 100644 --- a/platform/linux/threading/Atomic.h +++ b/platform/linux/threading/Atomic.h @@ -13,260 +13,1204 @@ #include "../../../stdlib/Types.h" inline -void atomic_set(void** target, void* value) { +void atomic_set_relaxed(void** target, void* value) +{ + __atomic_store_n(target, value, __ATOMIC_RELAXED); +} + +inline +void* atomic_get_relaxed(void** target) +{ + return __atomic_load_n(target, __ATOMIC_RELAXED); +} + +inline +void atomic_set_relaxed(volatile int32* value, int32 new_value) +{ + __atomic_store_n(value, new_value, __ATOMIC_RELAXED); +} + +inline +void atomic_set_relaxed(volatile int64* value, int64 new_value) +{ + __atomic_store_n(value, new_value, __ATOMIC_RELAXED); +} + +inline +int32 atomic_fetch_set_relaxed(volatile int32* value, int32 new_value) +{ + return __atomic_exchange_n(value, new_value, __ATOMIC_RELAXED); +} + +inline +int64 atomic_fetch_set_relaxed(volatile int64* value, int64 new_value) +{ + return __atomic_exchange_n(value, new_value, __ATOMIC_RELAXED); +} + +inline +void atomic_get_relaxed(volatile byte* value, byte data[16]) +{ + __atomic_store((volatile __int128 *) value, (__int128 *) data, __ATOMIC_RELAXED); +} + +inline +int32 atomic_get_relaxed(volatile int32* value) +{ + return __atomic_load_n((int32 *) value, __ATOMIC_RELAXED); +} + +inline +int64 atomic_get_relaxed(volatile int64* value) +{ + return __atomic_load_n((int64 *) value, __ATOMIC_RELAXED); +} + +inline +void atomic_get_relaxed(volatile byte* value, byte data[16]) +{ + __atomic_load((volatile __int128 *) value, (__int128 *) data, __ATOMIC_RELAXED); +} + +inline +void atomic_increment_relaxed(volatile int32* value) +{ + __atomic_add_fetch(value, 1, __ATOMIC_RELAXED); +} + +inline +void atomic_decrement_relaxed(volatile int32* value) +{ + __atomic_sub_fetch(value, 1, __ATOMIC_RELAXED); +} + +inline +void atomic_increment_relaxed(volatile int64* value) +{ + __atomic_add_fetch(value, 1, __ATOMIC_RELAXED); +} + +inline +void atomic_decrement_relaxed(volatile int64* value) +{ + __atomic_sub_fetch(value, 1, __ATOMIC_RELAXED); +} + +inline +void atomic_add_relaxed(volatile int32* value, int32 increment) +{ + __atomic_add_fetch(value, increment, __ATOMIC_RELAXED); +} + +inline +void atomic_sub_relaxed(volatile int32* value, int32 decrement) +{ + __atomic_sub_fetch(value, decrement, __ATOMIC_RELAXED); +} + +inline +void atomic_add_relaxed(volatile int64* value, int64 increment) +{ + __atomic_add_fetch(value, increment, __ATOMIC_RELAXED); +} + +inline +void atomic_sub_relaxed(volatile int64* value, int64 decrement) +{ + __atomic_sub_fetch(value, decrement, __ATOMIC_RELAXED); +} + +inline +int32 atomic_compare_exchange_weak_relaxed(volatile int32* value, int32* expected, int32 desired) +{ + __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED); + return *expected; +} + +inline +int32 atomic_fetch_add_relaxed(volatile int32* value, int32 operand) +{ + return __atomic_add_fetch(value, operand, __ATOMIC_RELAXED); +} + +inline +int32 atomic_fetch_sub_relaxed(volatile int32* value, int32 operand) +{ + return __atomic_sub_fetch(value, operand, __ATOMIC_RELAXED); +} + +inline +int64 atomic_fetch_add_relaxed(volatile int64* value, int64 operand) +{ + return __atomic_add_fetch(value, operand, __ATOMIC_RELAXED); +} + +inline +int64 atomic_fetch_sub_relaxed(volatile int64* value, int64 operand) +{ + return __atomic_sub_fetch(value, operand, __ATOMIC_RELAXED); +} + +inline +void atomic_set_relaxed(volatile uint32* value, uint32 new_value) +{ + __atomic_store_n(value, new_value, __ATOMIC_RELAXED); +} + +inline +void atomic_set_relaxed(volatile uint64* value, uint64 new_value) +{ + __atomic_store_n(value, new_value, __ATOMIC_RELAXED); +} + +inline +uint32 atomic_fetch_set_relaxed(volatile uint32* value, uint32 new_value) +{ + return __atomic_exchange_n(value, new_value, __ATOMIC_RELAXED); +} + +inline +uint64 atomic_fetch_set_relaxed(volatile uint64* value, uint64 new_value) +{ + return __atomic_exchange_n(value, new_value, __ATOMIC_RELAXED); +} + +inline +void atomic_get_relaxed(volatile byte* value, byte data[16]) +{ + __atomic_store((volatile __uint128 *) value, (__uint128 *) data, __ATOMIC_RELAXED); +} + +inline +uint32 atomic_get_relaxed(volatile uint32* value) +{ + return __atomic_load_n((uint32 *) value, __ATOMIC_RELAXED); +} + +inline +uint64 atomic_get_relaxed(volatile uint64* value) +{ + return __atomic_load_n((uint64 *) value, __ATOMIC_RELAXED); +} + +inline +void atomic_increment_relaxed(volatile uint32* value) +{ + __atomic_add_fetch(value, 1, __ATOMIC_RELAXED); +} + +inline +void atomic_decrement_relaxed(volatile uint32* value) +{ + __atomic_sub_fetch(value, 1, __ATOMIC_RELAXED); +} + +inline +void atomic_increment_relaxed(volatile uint64* value) +{ + __atomic_add_fetch(value, 1, __ATOMIC_RELAXED); +} + +inline +void atomic_decrement_relaxed(volatile uint64* value) +{ + __atomic_sub_fetch(value, 1, __ATOMIC_RELAXED); +} + +inline +void atomic_add_relaxed(volatile uint32* value, uint32 increment) +{ + __atomic_add_fetch(value, increment, __ATOMIC_RELAXED); +} + +inline +void atomic_sub_relaxed(volatile uint32* value, uint32 decrement) +{ + __atomic_sub_fetch(value, decrement, __ATOMIC_RELAXED); +} + +inline +uint32 atomic_compare_exchange_weak_relaxed(volatile uint32* value, uint32* expected, uint32 desired) +{ + __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED); + return *expected; +} + +inline +uint32 atomic_fetch_add_relaxed(volatile uint32* value, uint32 operand) +{ + return __atomic_add_fetch(value, operand, __ATOMIC_RELAXED); +} + +inline +uint32 atomic_fetch_sub_relaxed(volatile uint32* value, uint32 operand) +{ + return __atomic_sub_fetch(value, operand, __ATOMIC_RELAXED); +} + +inline +uint64 atomic_fetch_add_relaxed(volatile uint64* value, uint64 operand) +{ + return __atomic_add_fetch(value, operand, __ATOMIC_RELAXED); +} + +inline +uint64 atomic_fetch_sub_relaxed(volatile uint64* value, uint64 operand) +{ + return __atomic_sub_fetch(value, operand, __ATOMIC_RELAXED); +} + +inline +void atomic_and_relaxed(volatile uint32* value, uint32 mask) +{ + __atomic_fetch_and(value, mask, __ATOMIC_RELAXED); +} + +inline +void atomic_and_relaxed(volatile int32* value, int32 mask) +{ + __atomic_fetch_and(value, mask, __ATOMIC_RELAXED); +} + +inline +void atomic_and_relaxed(volatile uint64* value, uint64 mask) +{ + __atomic_fetch_and(value, mask, __ATOMIC_RELAXED); +} + +inline +void atomic_and_relaxed(volatile int64* value, int64 mask) +{ + __atomic_fetch_and(value, mask, __ATOMIC_RELAXED); +} + +inline +void atomic_or_relaxed(volatile uint32* value, uint32 mask) +{ + __atomic_fetch_or(value, mask, __ATOMIC_RELAXED); +} + +inline +void atomic_or_relaxed(volatile int32* value, int32 mask) +{ + __atomic_fetch_or(value, mask, __ATOMIC_RELAXED); +} + +inline +void atomic_or_relaxed(volatile uint64* value, uint64 mask) +{ + __atomic_fetch_or(value, mask, __ATOMIC_RELAXED); +} + +inline +void atomic_or_relaxed(volatile int64* value, int64 mask) +{ + __atomic_fetch_or(value, mask, __ATOMIC_RELAXED); +} + +inline +void atomic_set_acquire(void** target, void* value) +{ + __atomic_store_n(target, value, __ATOMIC_ACQUIRE); +} + +inline +void* atomic_get_acquire(void** target) +{ + return __atomic_load_n(target, __ATOMIC_ACQUIRE); +} + +inline +void atomic_set_acquire(volatile int32* value, int32 new_value) +{ + __atomic_store_n(value, new_value, __ATOMIC_ACQUIRE); +} + +inline +void atomic_set_acquire(volatile int64* value, int64 new_value) +{ + __atomic_store_n(value, new_value, __ATOMIC_ACQUIRE); +} + +inline +int32 atomic_fetch_set_acquire(volatile int32* value, int32 new_value) +{ + return __atomic_exchange_n(value, new_value, __ATOMIC_ACQUIRE); +} + +inline +int64 atomic_fetch_set_acquire(volatile int64* value, int64 new_value) +{ + return __atomic_exchange_n(value, new_value, __ATOMIC_ACQUIRE); +} + +inline +void atomic_get_acquire(volatile byte* value, byte data[16]) +{ + __atomic_store((volatile __int128 *) value, (__int128 *) data, __ATOMIC_ACQUIRE); +} + +inline +int32 atomic_get_acquire(volatile int32* value) +{ + return __atomic_load_n((int32 *) value, __ATOMIC_ACQUIRE); +} + +inline +int64 atomic_get_acquire(volatile int64* value) +{ + return __atomic_load_n((int64 *) value, __ATOMIC_ACQUIRE); +} + +inline +void atomic_get_acquire(volatile byte* value, byte data[16]) +{ + __atomic_load((volatile __int128 *) value, (__int128 *) data, __ATOMIC_ACQUIRE); +} + +inline +void atomic_increment_acquire(volatile int32* value) +{ + __atomic_add_fetch(value, 1, __ATOMIC_ACQUIRE); +} + +inline +void atomic_decrement_acquire(volatile int32* value) +{ + __atomic_sub_fetch(value, 1, __ATOMIC_ACQUIRE); +} + +inline +void atomic_increment_acquire(volatile int64* value) +{ + __atomic_add_fetch(value, 1, __ATOMIC_ACQUIRE); +} + +inline +void atomic_decrement_acquire(volatile int64* value) +{ + __atomic_sub_fetch(value, 1, __ATOMIC_ACQUIRE); +} + +inline +void atomic_add_acquire(volatile int32* value, int32 increment) +{ + __atomic_add_fetch(value, increment, __ATOMIC_ACQUIRE); +} + +inline +void atomic_sub_acquire(volatile int32* value, int32 decrement) +{ + __atomic_sub_fetch(value, decrement, __ATOMIC_ACQUIRE); +} + +inline +void atomic_add_acquire(volatile int64* value, int64 increment) +{ + __atomic_add_fetch(value, increment, __ATOMIC_ACQUIRE); +} + +inline +void atomic_sub_acquire(volatile int64* value, int64 decrement) +{ + __atomic_sub_fetch(value, decrement, __ATOMIC_ACQUIRE); +} + +inline +int32 atomic_compare_exchange_weak_acquire(volatile int32* value, int32* expected, int32 desired) +{ + __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE); + return *expected; +} + +inline +int32 atomic_fetch_add_acquire(volatile int32* value, int32 operand) +{ + return __atomic_add_fetch(value, operand, __ATOMIC_ACQUIRE); +} + +inline +int32 atomic_fetch_sub_acquire(volatile int32* value, int32 operand) +{ + return __atomic_sub_fetch(value, operand, __ATOMIC_ACQUIRE); +} + +inline +int64 atomic_fetch_add_acquire(volatile int64* value, int64 operand) +{ + return __atomic_add_fetch(value, operand, __ATOMIC_ACQUIRE); +} + +inline +int64 atomic_fetch_sub_acquire(volatile int64* value, int64 operand) +{ + return __atomic_sub_fetch(value, operand, __ATOMIC_ACQUIRE); +} + +inline +void atomic_set_acquire(volatile uint32* value, uint32 new_value) +{ + __atomic_store_n(value, new_value, __ATOMIC_ACQUIRE); +} + +inline +void atomic_set_acquire(volatile uint64* value, uint64 new_value) +{ + __atomic_store_n(value, new_value, __ATOMIC_ACQUIRE); +} + +inline +uint32 atomic_fetch_set_acquire(volatile uint32* value, uint32 new_value) +{ + return __atomic_exchange_n(value, new_value, __ATOMIC_ACQUIRE); +} + +inline +uint64 atomic_fetch_set_acquire(volatile uint64* value, uint64 new_value) +{ + return __atomic_exchange_n(value, new_value, __ATOMIC_ACQUIRE); +} + +inline +void atomic_get_acquire(volatile byte* value, byte data[16]) +{ + __atomic_store((volatile __uint128 *) value, (__uint128 *) data, __ATOMIC_ACQUIRE); +} + +inline +uint32 atomic_get_acquire(volatile uint32* value) +{ + return __atomic_load_n((uint32 *) value, __ATOMIC_ACQUIRE); +} + +inline +uint64 atomic_get_acquire(volatile uint64* value) +{ + return __atomic_load_n((uint64 *) value, __ATOMIC_ACQUIRE); +} + +inline +void atomic_increment_acquire(volatile uint32* value) +{ + __atomic_add_fetch(value, 1, __ATOMIC_ACQUIRE); +} + +inline +void atomic_decrement_acquire(volatile uint32* value) +{ + __atomic_sub_fetch(value, 1, __ATOMIC_ACQUIRE); +} + +inline +void atomic_increment_acquire(volatile uint64* value) +{ + __atomic_add_fetch(value, 1, __ATOMIC_ACQUIRE); +} + +inline +void atomic_decrement_acquire(volatile uint64* value) +{ + __atomic_sub_fetch(value, 1, __ATOMIC_ACQUIRE); +} + +inline +void atomic_add_acquire(volatile uint32* value, uint32 increment) +{ + __atomic_add_fetch(value, increment, __ATOMIC_ACQUIRE); +} + +inline +void atomic_sub_acquire(volatile uint32* value, uint32 decrement) +{ + __atomic_sub_fetch(value, decrement, __ATOMIC_ACQUIRE); +} + +inline +uint32 atomic_compare_exchange_weak_acquire(volatile uint32* value, uint32* expected, uint32 desired) +{ + __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE); + return *expected; +} + +inline +uint32 atomic_fetch_add_acquire(volatile uint32* value, uint32 operand) +{ + return __atomic_add_fetch(value, operand, __ATOMIC_ACQUIRE); +} + +inline +uint32 atomic_fetch_sub_acquire(volatile uint32* value, uint32 operand) +{ + return __atomic_sub_fetch(value, operand, __ATOMIC_ACQUIRE); +} + +inline +uint64 atomic_fetch_add_acquire(volatile uint64* value, uint64 operand) +{ + return __atomic_add_fetch(value, operand, __ATOMIC_ACQUIRE); +} + +inline +uint64 atomic_fetch_sub_acquire(volatile uint64* value, uint64 operand) +{ + return __atomic_sub_fetch(value, operand, __ATOMIC_ACQUIRE); +} + +inline +void atomic_and_acquire(volatile uint32* value, uint32 mask) +{ + __atomic_fetch_and(value, mask, __ATOMIC_ACQUIRE); +} + +inline +void atomic_and_acquire(volatile int32* value, int32 mask) +{ + __atomic_fetch_and(value, mask, __ATOMIC_ACQUIRE); +} + +inline +void atomic_and_acquire(volatile uint64* value, uint64 mask) +{ + __atomic_fetch_and(value, mask, __ATOMIC_ACQUIRE); +} + +inline +void atomic_and_acquire(volatile int64* value, int64 mask) +{ + __atomic_fetch_and(value, mask, __ATOMIC_ACQUIRE); +} + +inline +void atomic_or_acquire(volatile uint32* value, uint32 mask) +{ + __atomic_fetch_or(value, mask, __ATOMIC_ACQUIRE); +} + +inline +void atomic_or_acquire(volatile int32* value, int32 mask) +{ + __atomic_fetch_or(value, mask, __ATOMIC_ACQUIRE); +} + +inline +void atomic_or_acquire(volatile uint64* value, uint64 mask) +{ + __atomic_fetch_or(value, mask, __ATOMIC_ACQUIRE); +} + +inline +void atomic_or_acquire(volatile int64* value, int64 mask) +{ + __atomic_fetch_or(value, mask, __ATOMIC_ACQUIRE); +} + +inline +void atomic_set_release(void** target, void* value) +{ + __atomic_store_n(target, value, __ATOMIC_RELEASE); +} + +inline +void* atomic_get_release(void** target) +{ + return __atomic_load_n(target, __ATOMIC_RELEASE); +} + +inline +void atomic_set_release(volatile int32* value, int32 new_value) +{ + __atomic_store_n(value, new_value, __ATOMIC_RELEASE); +} + +inline +void atomic_set_release(volatile int64* value, int64 new_value) +{ + __atomic_store_n(value, new_value, __ATOMIC_RELEASE); +} + +inline +int32 atomic_fetch_set_release(volatile int32* value, int32 new_value) +{ + return __atomic_exchange_n(value, new_value, __ATOMIC_RELEASE); +} + +inline +int64 atomic_fetch_set_release(volatile int64* value, int64 new_value) +{ + return __atomic_exchange_n(value, new_value, __ATOMIC_RELEASE); +} + +inline +void atomic_get_release(volatile byte* value, byte data[16]) +{ + __atomic_store((volatile __int128 *) value, (__int128 *) data, __ATOMIC_RELEASE); +} + +inline +int32 atomic_get_release(volatile int32* value) +{ + return __atomic_load_n((int32 *) value, __ATOMIC_RELEASE); +} + +inline +int64 atomic_get_release(volatile int64* value) +{ + return __atomic_load_n((int64 *) value, __ATOMIC_RELEASE); +} + +inline +void atomic_get_release(volatile byte* value, byte data[16]) +{ + __atomic_load((volatile __int128 *) value, (__int128 *) data, __ATOMIC_RELEASE); +} + +inline +void atomic_increment_release(volatile int32* value) +{ + __atomic_add_fetch(value, 1, __ATOMIC_RELEASE); +} + +inline +void atomic_decrement_release(volatile int32* value) +{ + __atomic_sub_fetch(value, 1, __ATOMIC_RELEASE); +} + +inline +void atomic_increment_release(volatile int64* value) +{ + __atomic_add_fetch(value, 1, __ATOMIC_RELEASE); +} + +inline +void atomic_decrement_release(volatile int64* value) +{ + __atomic_sub_fetch(value, 1, __ATOMIC_RELEASE); +} + +inline +void atomic_add_release(volatile int32* value, int32 increment) +{ + __atomic_add_fetch(value, increment, __ATOMIC_RELEASE); +} + +inline +void atomic_sub_release(volatile int32* value, int32 decrement) +{ + __atomic_sub_fetch(value, decrement, __ATOMIC_RELEASE); +} + +inline +void atomic_add_release(volatile int64* value, int64 increment) +{ + __atomic_add_fetch(value, increment, __ATOMIC_RELEASE); +} + +inline +void atomic_sub_release(volatile int64* value, int64 decrement) +{ + __atomic_sub_fetch(value, decrement, __ATOMIC_RELEASE); +} + +inline +int32 atomic_compare_exchange_weak_release(volatile int32* value, int32* expected, int32 desired) +{ + __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_RELEASE, __ATOMIC_RELEASE); + return *expected; +} + +inline +int32 atomic_fetch_add_release(volatile int32* value, int32 operand) +{ + return __atomic_add_fetch(value, operand, __ATOMIC_RELEASE); +} + +inline +int32 atomic_fetch_sub_release(volatile int32* value, int32 operand) +{ + return __atomic_sub_fetch(value, operand, __ATOMIC_RELEASE); +} + +inline +int64 atomic_fetch_add_release(volatile int64* value, int64 operand) +{ + return __atomic_add_fetch(value, operand, __ATOMIC_RELEASE); +} + +inline +int64 atomic_fetch_sub_release(volatile int64* value, int64 operand) +{ + return __atomic_sub_fetch(value, operand, __ATOMIC_RELEASE); +} + +inline +void atomic_set_release(volatile uint32* value, uint32 new_value) +{ + __atomic_store_n(value, new_value, __ATOMIC_RELEASE); +} + +inline +void atomic_set_release(volatile uint64* value, uint64 new_value) +{ + __atomic_store_n(value, new_value, __ATOMIC_RELEASE); +} + +inline +uint32 atomic_fetch_set_release(volatile uint32* value, uint32 new_value) +{ + return __atomic_exchange_n(value, new_value, __ATOMIC_RELEASE); +} + +inline +uint64 atomic_fetch_set_release(volatile uint64* value, uint64 new_value) +{ + return __atomic_exchange_n(value, new_value, __ATOMIC_RELEASE); +} + +inline +void atomic_get_release(volatile byte* value, byte data[16]) +{ + __atomic_store((volatile __uint128 *) value, (__uint128 *) data, __ATOMIC_RELEASE); +} + +inline +uint32 atomic_get_release(volatile uint32* value) +{ + return __atomic_load_n((uint32 *) value, __ATOMIC_RELEASE); +} + +inline +uint64 atomic_get_release(volatile uint64* value) +{ + return __atomic_load_n((uint64 *) value, __ATOMIC_RELEASE); +} + +inline +void atomic_increment_release(volatile uint32* value) +{ + __atomic_add_fetch(value, 1, __ATOMIC_RELEASE); +} + +inline +void atomic_decrement_release(volatile uint32* value) +{ + __atomic_sub_fetch(value, 1, __ATOMIC_RELEASE); +} + +inline +void atomic_increment_release(volatile uint64* value) +{ + __atomic_add_fetch(value, 1, __ATOMIC_RELEASE); +} + +inline +void atomic_decrement_release(volatile uint64* value) +{ + __atomic_sub_fetch(value, 1, __ATOMIC_RELEASE); +} + +inline +void atomic_add_release(volatile uint32* value, uint32 increment) +{ + __atomic_add_fetch(value, increment, __ATOMIC_RELEASE); +} + +inline +void atomic_sub_release(volatile uint32* value, uint32 decrement) +{ + __atomic_sub_fetch(value, decrement, __ATOMIC_RELEASE); +} + +inline +uint32 atomic_compare_exchange_weak_release(volatile uint32* value, uint32* expected, uint32 desired) +{ + __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_RELEASE, __ATOMIC_RELEASE); + return *expected; +} + +inline +uint32 atomic_fetch_add_release(volatile uint32* value, uint32 operand) +{ + return __atomic_add_fetch(value, operand, __ATOMIC_RELEASE); +} + +inline +uint32 atomic_fetch_sub_release(volatile uint32* value, uint32 operand) +{ + return __atomic_sub_fetch(value, operand, __ATOMIC_RELEASE); +} + +inline +uint64 atomic_fetch_add_release(volatile uint64* value, uint64 operand) +{ + return __atomic_add_fetch(value, operand, __ATOMIC_RELEASE); +} + +inline +uint64 atomic_fetch_sub_release(volatile uint64* value, uint64 operand) +{ + return __atomic_sub_fetch(value, operand, __ATOMIC_RELEASE); +} + +inline +void atomic_and_release(volatile uint32* value, uint32 mask) +{ + __atomic_fetch_and(value, mask, __ATOMIC_RELEASE); +} + +inline +void atomic_and_release(volatile int32* value, int32 mask) +{ + __atomic_fetch_and(value, mask, __ATOMIC_RELEASE); +} + +inline +void atomic_and_release(volatile uint64* value, uint64 mask) +{ + __atomic_fetch_and(value, mask, __ATOMIC_RELEASE); +} + +inline +void atomic_and_release(volatile int64* value, int64 mask) +{ + __atomic_fetch_and(value, mask, __ATOMIC_RELEASE); +} + +inline +void atomic_or_release(volatile uint32* value, uint32 mask) +{ + __atomic_fetch_or(value, mask, __ATOMIC_RELEASE); +} + +inline +void atomic_or_release(volatile int32* value, int32 mask) +{ + __atomic_fetch_or(value, mask, __ATOMIC_RELEASE); +} + +inline +void atomic_or_release(volatile uint64* value, uint64 mask) +{ + __atomic_fetch_or(value, mask, __ATOMIC_RELEASE); +} + +inline +void atomic_or_release(volatile int64* value, int64 mask) +{ + __atomic_fetch_or(value, mask, __ATOMIC_RELEASE); +} + +inline +void atomic_set_acquire_release(void** target, void* value) +{ __atomic_store_n(target, value, __ATOMIC_SEQ_CST); } inline -void* atomic_get(void** target) { +void* atomic_get_acquire_release(void** target) +{ return __atomic_load_n(target, __ATOMIC_SEQ_CST); } inline -void atomic_set(volatile int32* value, int32 new_value) +void atomic_set_acquire_release(volatile int32* value, int32 new_value) { __atomic_store_n(value, new_value, __ATOMIC_SEQ_CST); } inline -void atomic_set(volatile int64* value, int64 new_value) +void atomic_set_acquire_release(volatile int64* value, int64 new_value) { __atomic_store_n(value, new_value, __ATOMIC_SEQ_CST); } inline -int32 atomic_set_fetch(volatile int32* value, int32 new_value) { +int32 atomic_fetch_set_acquire_release(volatile int32* value, int32 new_value) +{ return __atomic_exchange_n(value, new_value, __ATOMIC_SEQ_CST); } inline -int64 atomic_set_fetch(volatile int64* value, int64 new_value) { +int64 atomic_fetch_set_acquire_release(volatile int64* value, int64 new_value) +{ return __atomic_exchange_n(value, new_value, __ATOMIC_SEQ_CST); } inline -void atomic_get(volatile byte* value, byte data[16]) +void atomic_get_acquire_release(volatile byte* value, byte data[16]) { __atomic_store((volatile __int128 *) value, (__int128 *) data, __ATOMIC_SEQ_CST); } inline -int32 atomic_get(volatile int32* value) +int32 atomic_get_acquire_release(volatile int32* value) { return __atomic_load_n((int32 *) value, __ATOMIC_SEQ_CST); } inline -int64 atomic_get(volatile int64* value) +int64 atomic_get_acquire_release(volatile int64* value) { return __atomic_load_n((int64 *) value, __ATOMIC_SEQ_CST); } inline -void atomic_get(volatile byte* value, byte data[16]) +void atomic_get_acquire_release(volatile byte* value, byte data[16]) { __atomic_load((volatile __int128 *) value, (__int128 *) data, __ATOMIC_SEQ_CST); } inline -void atomic_increment(volatile int32* value) { +void atomic_increment_acquire_release(volatile int32* value) +{ __atomic_add_fetch(value, 1, __ATOMIC_SEQ_CST); } inline -void atomic_decrement(volatile int32* value) { +void atomic_decrement_acquire_release(volatile int32* value) +{ __atomic_sub_fetch(value, 1, __ATOMIC_SEQ_CST); } inline -void atomic_increment(volatile int64* value) { +void atomic_increment_acquire_release(volatile int64* value) +{ __atomic_add_fetch(value, 1, __ATOMIC_SEQ_CST); } inline -void atomic_decrement(volatile int64* value) { +void atomic_decrement_acquire_release(volatile int64* value) +{ __atomic_sub_fetch(value, 1, __ATOMIC_SEQ_CST); } inline -void atomic_add(volatile int32* value, int32 increment) { +void atomic_add_acquire_release(volatile int32* value, int32 increment) +{ __atomic_add_fetch(value, increment, __ATOMIC_SEQ_CST); } inline -void atomic_sub(volatile int32* value, int32 decrement) { +void atomic_sub_acquire_release(volatile int32* value, int32 decrement) +{ __atomic_sub_fetch(value, decrement, __ATOMIC_SEQ_CST); } inline -void atomic_add(volatile int64* value, int64 increment) { +void atomic_add_acquire_release(volatile int64* value, int64 increment) +{ __atomic_add_fetch(value, increment, __ATOMIC_SEQ_CST); } inline -void atomic_sub(volatile int64* value, int64 decrement) { +void atomic_sub_acquire_release(volatile int64* value, int64 decrement) +{ __atomic_sub_fetch(value, decrement, __ATOMIC_SEQ_CST); } inline -int32 atomic_compare_exchange_weak(volatile int32* value, int32* expected, int32 desired) { +int32 atomic_compare_exchange_weak_acquire_release(volatile int32* value, int32* expected, int32 desired) +{ __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); return *expected; } inline -int32 atomic_fetch_add(volatile int32* value, int32 operand) { +int32 atomic_fetch_add_acquire_release(volatile int32* value, int32 operand) +{ return __atomic_add_fetch(value, operand, __ATOMIC_SEQ_CST); } inline -int32 atomic_fetch_sub(volatile int32* value, int32 operand) { +int32 atomic_fetch_sub_acquire_release(volatile int32* value, int32 operand) +{ return __atomic_sub_fetch(value, operand, __ATOMIC_SEQ_CST); } inline -int64 atomic_fetch_add(volatile int64* value, int64 operand) { +int64 atomic_fetch_add_acquire_release(volatile int64* value, int64 operand) +{ return __atomic_add_fetch(value, operand, __ATOMIC_SEQ_CST); } inline -int64 atomic_fetch_sub(volatile int64* value, int64 operand) { +int64 atomic_fetch_sub_acquire_release(volatile int64* value, int64 operand) +{ return __atomic_sub_fetch(value, operand, __ATOMIC_SEQ_CST); } inline -void atomic_set(volatile uint32* value, uint32 new_value) +void atomic_set_acquire_release(volatile uint32* value, uint32 new_value) { __atomic_store_n(value, new_value, __ATOMIC_SEQ_CST); } inline -void atomic_set(volatile uint64* value, uint64 new_value) +void atomic_set_acquire_release(volatile uint64* value, uint64 new_value) { __atomic_store_n(value, new_value, __ATOMIC_SEQ_CST); } inline -uint32 atomic_set_fetch(volatile uint32* value, uint32 new_value) { +uint32 atomic_fetch_set_acquire_release(volatile uint32* value, uint32 new_value) +{ return __atomic_exchange_n(value, new_value, __ATOMIC_SEQ_CST); } inline -uint64 atomic_set_fetch(volatile uint64* value, uint64 new_value) { +uint64 atomic_fetch_set_acquire_release(volatile uint64* value, uint64 new_value) +{ return __atomic_exchange_n(value, new_value, __ATOMIC_SEQ_CST); } inline -void atomic_get(volatile byte* value, byte data[16]) +void atomic_get_acquire_release(volatile byte* value, byte data[16]) { __atomic_store((volatile __uint128 *) value, (__uint128 *) data, __ATOMIC_SEQ_CST); } inline -uint32 atomic_get(volatile uint32* value) +uint32 atomic_get_acquire_release(volatile uint32* value) { return __atomic_load_n((uint32 *) value, __ATOMIC_SEQ_CST); } inline -uint64 atomic_get(volatile uint64* value) +uint64 atomic_get_acquire_release(volatile uint64* value) { return __atomic_load_n((uint64 *) value, __ATOMIC_SEQ_CST); } inline -void atomic_increment(volatile uint32* value) { +void atomic_increment_acquire_release(volatile uint32* value) +{ __atomic_add_fetch(value, 1, __ATOMIC_SEQ_CST); } inline -void atomic_decrement(volatile uint32* value) { +void atomic_decrement_acquire_release(volatile uint32* value) +{ __atomic_sub_fetch(value, 1, __ATOMIC_SEQ_CST); } inline -void atomic_increment(volatile uint64* value) { +void atomic_increment_acquire_release(volatile uint64* value) +{ __atomic_add_fetch(value, 1, __ATOMIC_SEQ_CST); } inline -void atomic_decrement(volatile uint64* value) { +void atomic_decrement_acquire_release(volatile uint64* value) +{ __atomic_sub_fetch(value, 1, __ATOMIC_SEQ_CST); } inline -void atomic_add(volatile uint32* value, uint32 increment) { +void atomic_add_acquire_release(volatile uint32* value, uint32 increment) +{ __atomic_add_fetch(value, increment, __ATOMIC_SEQ_CST); } inline -void atomic_sub(volatile uint32* value, uint32 decrement) { +void atomic_sub_acquire_release(volatile uint32* value, uint32 decrement) +{ __atomic_sub_fetch(value, decrement, __ATOMIC_SEQ_CST); } inline -uint32 atomic_compare_exchange_weak(volatile uint32* value, uint32* expected, uint32 desired) { +uint32 atomic_compare_exchange_weak_acquire_release(volatile uint32* value, uint32* expected, uint32 desired) +{ __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); return *expected; } inline -uint32 atomic_fetch_add(volatile uint32* value, uint32 operand) { +uint32 atomic_fetch_add_acquire_release(volatile uint32* value, uint32 operand) +{ return __atomic_add_fetch(value, operand, __ATOMIC_SEQ_CST); } inline -uint32 atomic_fetch_sub(volatile uint32* value, uint32 operand) { +uint32 atomic_fetch_sub_acquire_release(volatile uint32* value, uint32 operand) +{ return __atomic_sub_fetch(value, operand, __ATOMIC_SEQ_CST); } inline -uint64 atomic_fetch_add(volatile uint64* value, uint64 operand) { +uint64 atomic_fetch_add_acquire_release(volatile uint64* value, uint64 operand) +{ return __atomic_add_fetch(value, operand, __ATOMIC_SEQ_CST); } inline -uint64 atomic_fetch_sub(volatile uint64* value, uint64 operand) { +uint64 atomic_fetch_sub_acquire_release(volatile uint64* value, uint64 operand) +{ return __atomic_sub_fetch(value, operand, __ATOMIC_SEQ_CST); } inline -void atomic_and(volatile uint32* value, uint32 mask) { +void atomic_and_acquire_release(volatile uint32* value, uint32 mask) +{ __atomic_fetch_and(value, mask, __ATOMIC_SEQ_CST); } inline -void atomic_and(volatile int32* value, int32 mask) { +void atomic_and_acquire_release(volatile int32* value, int32 mask) +{ __atomic_fetch_and(value, mask, __ATOMIC_SEQ_CST); } inline -void atomic_and(volatile uint64* value, uint64 mask) { +void atomic_and_acquire_release(volatile uint64* value, uint64 mask) +{ __atomic_fetch_and(value, mask, __ATOMIC_SEQ_CST); } inline -void atomic_and(volatile int64* value, int64 mask) { +void atomic_and_acquire_release(volatile int64* value, int64 mask) +{ __atomic_fetch_and(value, mask, __ATOMIC_SEQ_CST); } inline -void atomic_or(volatile uint32* value, uint32 mask) { +void atomic_or_acquire_release(volatile uint32* value, uint32 mask) +{ __atomic_fetch_or(value, mask, __ATOMIC_SEQ_CST); } inline -void atomic_or(volatile int32* value, int32 mask) { +void atomic_or_acquire_release(volatile int32* value, int32 mask) +{ __atomic_fetch_or(value, mask, __ATOMIC_SEQ_CST); } inline -void atomic_or(volatile uint64* value, uint64 mask) { +void atomic_or_acquire_release(volatile uint64* value, uint64 mask) +{ __atomic_fetch_or(value, mask, __ATOMIC_SEQ_CST); } inline -void atomic_or(volatile int64* value, int64 mask) { +void atomic_or_acquire_release(volatile int64* value, int64 mask) +{ __atomic_fetch_or(value, mask, __ATOMIC_SEQ_CST); } + + +// Check out the intrinsic functions fence_memory and fence_write +// These are much faster and could accomplish what you are doing +inline +void atomic_fence_acquire() +{ + __atomic_thread_fence(__ATOMIC_ACQUIRE); +} + +// Check out the intrinsic functions fence_memory and fence_write +// These are much faster and could accomplish what you are doing +inline +void atomic_fence_release() +{ + __atomic_thread_fence(__ATOMIC_RELEASE); +} + #endif \ No newline at end of file diff --git a/platform/linux/threading/Thread.h b/platform/linux/threading/Thread.h index 0dd9d74..14fa8dc 100644 --- a/platform/linux/threading/Thread.h +++ b/platform/linux/threading/Thread.h @@ -61,7 +61,7 @@ int32 pthread_join(pthread_t thread, void** retval) { } int32 pthread_mutex_init(pthread_mutex_t* mutex, pthread_mutexattr_t*) { - atomic_set(mutex, 0); + atomic_set_acquire(mutex, 0); return 0; } @@ -77,44 +77,43 @@ int32 pthread_mutex_lock(pthread_mutex_t* mutex) { } int32 pthread_mutex_unlock(pthread_mutex_t* mutex) { - atomic_set(mutex, 0); + atomic_set_release(mutex, 0); syscall(SYS_futex, mutex, FUTEX_WAKE, 1, NULL, NULL, 0); return 0; } int32 pthread_cond_init(pthread_cond_t* cond, pthread_condattr_t*) { - atomic_set(cond, 0); + atomic_set_release(cond, 0); return 0; } int32 pthread_cond_wait(pthread_cond_t* cond, pthread_mutex_t* mutex) { pthread_mutex_unlock(mutex); - syscall(SYS_futex, cond, FUTEX_WAIT, atomic_get(cond), NULL, NULL, 0); + syscall(SYS_futex, cond, FUTEX_WAIT, atomic_get_acquire(cond), NULL, NULL, 0); pthread_mutex_lock(mutex); return 0; } int32 pthread_cond_signal(pthread_cond_t* cond) { - atomic_fetch_add(cond, 1); + atomic_fetch_add_acquire(cond, 1); syscall(SYS_futex, cond, FUTEX_WAKE, 1, NULL, NULL, 0); return 0; } int32 pthread_rwlock_init(pthread_rwlock_t* rwlock, const pthread_rwlockattr_t*) { - atomic_set((int64 *) &rwlock->readers, 0); - //atomic_set(&rwlock->writer, 0); + atomic_set_release((int64 *) &rwlock->readers, 0); return 0; } int32 pthread_rwlock_rdlock(pthread_rwlock_t* rwlock) { - while (atomic_get(&rwlock->writer)) {} + while (atomic_get_acquire_release(&rwlock->writer)) {} - atomic_fetch_add(&rwlock->readers, 1); + atomic_fetch_add_acquire(&rwlock->readers, 1); return 0; } @@ -126,10 +125,10 @@ int32 pthread_rwlock_wrlock(pthread_rwlock_t* rwlock) { } int32 pthread_rwlock_unlock(pthread_rwlock_t* rwlock) { - if (atomic_get(&rwlock->writer)) { - atomic_set(&rwlock->writer, 0); + if (atomic_get_acquire(&rwlock->writer)) { + atomic_set_release(&rwlock->writer, 0); } else { - atomic_fetch_sub(&rwlock->readers, 1); + atomic_fetch_sub_acquire(&rwlock->readers, 1); } return 0; diff --git a/platform/win32/Allocator.h b/platform/win32/Allocator.h index 7309f90..632098a 100644 --- a/platform/win32/Allocator.h +++ b/platform/win32/Allocator.h @@ -26,6 +26,8 @@ void* platform_alloc(size_t size) inline void* platform_alloc_aligned(size_t size, int32 alignment) { + size = ROUND_TO_NEAREST(size, alignment); + void* ptr = VirtualAlloc(NULL, size + alignment + sizeof(void*), MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE); ASSERT_SIMPLE(ptr); diff --git a/platform/win32/threading/Atomic.h b/platform/win32/threading/Atomic.h index 25c3be6..99d304a 100644 --- a/platform/win32/threading/Atomic.h +++ b/platform/win32/threading/Atomic.h @@ -12,56 +12,58 @@ #include #include "../../../stdlib/Types.h" +// WARNING: Windows doesn't really support all the relaxed implementations, we therefore often use acquire as alternative. + inline -void atomic_set(void** target, void* new_pointer) +void atomic_set_relaxed(void** target, void* new_pointer) { - InterlockedExchangePointer(target, new_pointer); + InterlockedExchangePointerAcquire(target, new_pointer); } inline -void* atomic_get(void** target) +void* atomic_get_relaxed(void** target) { return InterlockedCompareExchangePointer(target, NULL, NULL); } inline -void atomic_set(volatile int32* value, int32 new_value) +void atomic_set_relaxed(volatile int32* value, int32 new_value) { - InterlockedExchange((long *) value, new_value); + InterlockedExchangeAcquire((long *) value, new_value); } inline -void atomic_set(volatile int64* value, int64 new_value) +void atomic_set_relaxed(volatile int64* value, int64 new_value) { - InterlockedExchange((long *) value, (long) new_value); + InterlockedExchangeAcquire((long *) value, (long) new_value); } inline -void atomic_set(volatile f32* value, f32 new_value) +void atomic_set_relaxed(volatile f32* value, f32 new_value) { - InterlockedExchange((long *) value, (long) new_value); + InterlockedExchangeAcquire((long *) value, (long) new_value); } inline -void atomic_set(volatile f64* value, f64 new_value) +void atomic_set_relaxed(volatile f64* value, f64 new_value) { - InterlockedExchange((long *) value, (long) new_value); + InterlockedExchangeAcquire((long *) value, (long) new_value); } inline -int32 atomic_set_fetch(volatile int32* value, int32 new_value) +int32 atomic_fetch_set_relaxed(volatile int32* value, int32 new_value) { - return (int32) InterlockedExchange((long *) value, new_value); + return (int32) InterlockedExchangeAcquire((long *) value, new_value); } inline -int64 atomic_set_fetch(volatile int64* value, int64 new_value) +int64 atomic_fetch_set_relaxed(volatile int64* value, int64 new_value) { - return (int64) InterlockedExchange((long *) value, (long) new_value); + return (int64) InterlockedExchangeAcquire((long *) value, (long) new_value); } inline -void atomic_set(volatile byte* value, const byte new_value[16]) +void atomic_set_relaxed(volatile byte* value, const byte new_value[16]) { int64* value64 = (int64*) value; const int64* new_value64 = (const int64*) new_value; @@ -82,259 +84,1412 @@ void atomic_set(volatile byte* value, const byte new_value[16]) } inline -int32 atomic_get(volatile int32* value) +int32 atomic_get_relaxed(volatile int32* value) { - return (int32) InterlockedCompareExchange((long *) value, 0, 0); + return (int32) InterlockedCompareExchangeAcquire((long *) value, 0, 0); } inline -int64 atomic_get(volatile int64* value) +int64 atomic_get_relaxed(volatile int64* value) { - return (int64) InterlockedCompareExchange((long *) value, 0, 0); + return (int64) InterlockedCompareExchangeAcquire((long *) value, 0, 0); } inline -f32 atomic_get(volatile f32* value) +f32 atomic_get_relaxed(volatile f32* value) { - return (f32) InterlockedCompareExchange((long *) value, 0, 0); + return (f32) InterlockedCompareExchangeAcquire((long *) value, 0, 0); } inline -f64 atomic_get(volatile f64* value) +f64 atomic_get_relaxed(volatile f64* value) { - return (f64) InterlockedCompareExchange((long *) value, 0, 0); + return (f64) InterlockedCompareExchangeAcquire((long *) value, 0, 0); } inline -void atomic_get(volatile byte* value, byte data[16]) +void atomic_get_relaxed(volatile byte* value, byte data[16]) { InterlockedCompareExchange128((volatile long long *) value, 0, 0, (long long *) data); } inline -void atomic_increment(volatile int32* value) { - InterlockedIncrement((long *) value); +void atomic_increment_relaxed(volatile int32* value) +{ + InterlockedIncrementAcquire((long *) value); } inline -void atomic_decrement(volatile int32* value) { - InterlockedDecrement((long *) value); +void atomic_decrement_relaxed(volatile int32* value) +{ + InterlockedDecrementAcquire((long *) value); } inline -void atomic_increment(volatile int64* value) { - InterlockedIncrement((long *) value); +void atomic_increment_relaxed(volatile int64* value) +{ + InterlockedIncrementAcquire((long *) value); } inline -void atomic_decrement(volatile int64* value) { - InterlockedDecrement((long *) value); +void atomic_decrement_relaxed(volatile int64* value) +{ + InterlockedDecrementAcquire((long *) value); } inline -void atomic_add(volatile int32* value, int32 increment) { - InterlockedAdd((long *) value, increment); +void atomic_add_relaxed(volatile int32* value, int32 increment) +{ + InterlockedAddAcquire((long *) value, increment); } inline -void atomic_sub(volatile int32* value, int32 decrement) { - InterlockedAdd((long *) value, -decrement); +void atomic_sub_relaxed(volatile int32* value, int32 decrement) +{ + InterlockedAddAcquire((long *) value, -decrement); } inline -void atomic_add(volatile int64* value, int64 increment) { - InterlockedAdd((long *) value, (long) increment); +void atomic_add_relaxed(volatile int64* value, int64 increment) +{ + InterlockedAddAcquire((long *) value, (long) increment); } inline -void atomic_sub(volatile int64* value, int64 decrement) { - InterlockedAdd((long *) value, -1 * ((long) decrement)); +void atomic_sub_relaxed(volatile int64* value, int64 decrement) +{ + InterlockedAddAcquire((long *) value, -1 * ((long) decrement)); } inline -f32 atomic_compare_exchange_weak(volatile f32* value, f32* expected, f32 desired) { - return (f32) InterlockedCompareExchange((long *) value, (long) desired, (long) *expected); +f32 atomic_compare_exchange_weak_relaxed(volatile f32* value, f32* expected, f32 desired) +{ + return (f32) InterlockedCompareExchangeRelease((long *) value, (long) desired, (long) *expected); } inline -f64 atomic_compare_exchange_weak(volatile f64* value, f64* expected, f64 desired) { - return (f64) InterlockedCompareExchange((long *) value, (long) desired, (long) *expected); +f64 atomic_compare_exchange_weak_relaxed(volatile f64* value, f64* expected, f64 desired) +{ + return (f64) InterlockedCompareExchangeRelease((long *) value, (long) desired, (long) *expected); } inline -int32 atomic_compare_exchange_weak(volatile int32* value, int32* expected, int32 desired) { - return (int32) InterlockedCompareExchange((long *) value, desired, *expected); +int32 atomic_compare_exchange_weak_relaxed(volatile int32* value, int32* expected, int32 desired) +{ + return (int32) InterlockedCompareExchangeRelease((long *) value, desired, *expected); } inline -int64 atomic_compare_exchange_weak(volatile int64* value, int64* expected, int64 desired) { - return (int64) InterlockedCompareExchange((long *) value, (long) desired, (long) *expected); +int64 atomic_compare_exchange_weak_relaxed(volatile int64* value, int64* expected, int64 desired) +{ + return (int64) InterlockedCompareExchangeRelease((long *) value, (long) desired, (long) *expected); } inline -int32 atomic_fetch_add(volatile int32* value, int32 operand) { - return (int32) InterlockedExchangeAdd((long *) value, operand); +int32 atomic_fetch_add_relaxed(volatile int32* value, int32 operand) +{ + return (int32) InterlockedExchangeAddRelease((long *) value, operand); } inline -int32 atomic_fetch_sub(volatile int32* value, int32 operand) { +int32 atomic_fetch_sub_relaxed(volatile int32* value, int32 operand) +{ return (int32) InterlockedExchangeSubtract((unsigned long *) value, operand); } inline -int64 atomic_fetch_add(volatile int64* value, int64 operand) { - return (int64) InterlockedExchangeAdd((long *) value, (long) operand); +int64 atomic_fetch_add_relaxed(volatile int64* value, int64 operand) +{ + return (int64) InterlockedExchangeAddRelease((long *) value, (long) operand); } inline -int64 atomic_fetch_sub(volatile int64* value, int64 operand) { +int64 atomic_fetch_sub_relaxed(volatile int64* value, int64 operand) +{ return (int64) InterlockedExchangeSubtract((unsigned long *) value, (long) operand); } inline -void atomic_set(volatile uint32* value, uint32 new_value) +void atomic_set_relaxed(volatile uint32* value, uint32 new_value) +{ + InterlockedExchangeAcquire((long *) value, new_value); +} + +inline +void atomic_set_relaxed(volatile uint64* value, uint64 new_value) +{ + InterlockedExchangeAcquire((long *) value, (long) new_value); +} + +inline +uint32 atomic_fetch_set_relaxed(volatile uint32* value, uint32 new_value) +{ + return (uint32) InterlockedExchangeAcquire((long *) value, new_value); +} + +inline +uint64 atomic_fetch_set_relaxed(volatile uint64* value, uint64 new_value) +{ + return (uint64) InterlockedExchangeAcquire((long *) value, (long) new_value); +} + +inline +uint32 atomic_get_relaxed(volatile uint32* value) +{ + return (uint32) InterlockedCompareExchangeAcquire((long *) value, 0, 0); +} + +inline +uint64 atomic_get_relaxed(volatile uint64* value) +{ + return (uint64) InterlockedCompareExchangeAcquire((long *) value, 0, 0); +} + +inline +void atomic_increment_relaxed(volatile uint32* value) +{ + InterlockedIncrementRelease((long *) value); +} + +inline +void atomic_decrement_relaxed(volatile uint32* value) +{ + InterlockedDecrementRelease((long *) value); +} + +inline +void atomic_increment_relaxed(volatile uint64* value) +{ + InterlockedIncrementRelease((long *) value); +} + +inline +void atomic_decrement_relaxed(volatile uint64* value) +{ + InterlockedDecrementRelease((long *) value); +} + +inline +void atomic_add_relaxed(volatile uint32* value, uint32 increment) +{ + InterlockedAddRelease((long *) value, increment); +} + +inline +void atomic_sub_relaxed(volatile uint32* value, uint32 decrement) +{ + InterlockedAddRelease((long *) value, -1 * ((int32) decrement)); +} + +inline +void atomic_add_relaxed(volatile uint64* value, uint64 increment) +{ + InterlockedAddRelease((long *) value, (long) increment); +} + +inline +void atomic_sub_relaxed(volatile uint64* value, uint64 decrement) +{ + InterlockedAddRelease((long *) value, -1 * ((long) decrement)); +} + +inline +uint32 atomic_compare_exchange_weak_relaxed(volatile uint32* value, uint32* expected, uint32 desired) +{ + return (uint32) InterlockedCompareExchangeAcquire((long *) value, desired, *expected); +} + +inline +uint64 atomic_compare_exchange_weak_relaxed(volatile uint64* value, uint64* expected, uint64 desired) +{ + return (uint64) InterlockedCompareExchangeAcquire((unsigned long long *) value, (unsigned long long) desired, (unsigned long long) *expected); +} + +inline +uint32 atomic_fetch_add_relaxed(volatile uint32* value, uint32 operand) +{ + return (uint32) InterlockedExchangeAddRelease((long *) value, operand); +} + +inline +uint32 atomic_fetch_sub_relaxed(volatile uint32* value, uint32 operand) +{ + return (uint32) InterlockedExchangeSubtract((unsigned long *) value, operand); +} + +inline +uint64 atomic_fetch_add_relaxed(volatile uint64* value, uint64 operand) +{ + return (uint64) InterlockedExchangeAddRelease((long *) value, (long) operand); +} + +inline +uint64 atomic_fetch_sub_relaxed(volatile uint64* value, uint64 operand) +{ + return (uint64) InterlockedExchangeSubtract((unsigned long *) value, (long) operand); +} + +inline +void atomic_and_relaxed(volatile uint32* value, uint32 mask) +{ + InterlockedAndRelease((volatile LONG *) value, mask); +} + +inline +void atomic_and_relaxed(volatile int32* value, int32 mask) +{ + InterlockedAndRelease((volatile LONG *) value, (LONG)mask); +} + +inline +void atomic_and_relaxed(volatile uint64* value, uint64 mask) +{ + InterlockedAnd64Release((volatile LONG64 *) value, mask); +} + +inline +void atomic_and_relaxed(volatile int64* value, int64 mask) +{ + InterlockedAnd64Release((volatile LONG64 *) value, mask); +} + +inline +void atomic_or_relaxed(volatile uint32* value, uint32 mask) +{ + InterlockedOrRelease((volatile LONG *) value, mask); +} + +inline +void atomic_or_relaxed(volatile int32* value, int32 mask) +{ + InterlockedOrRelease((volatile LONG *) value, (LONG)mask); +} + +inline +void atomic_or_relaxed(volatile uint64* value, uint64 mask) +{ + InterlockedOr64Release((volatile LONG64 *) value, mask); +} + +inline +void atomic_or_relaxed(volatile int64* value, int64 mask) +{ + InterlockedOr64Release((volatile LONG64 *) value, mask); +} + +inline +void atomic_set_acquire(void** target, void* new_pointer) +{ + InterlockedExchangePointerAcquire(target, new_pointer); +} + +inline +void* atomic_get_acquire(void** target) +{ + return InterlockedCompareExchangePointerAcquire(target, NULL, NULL); +} + +inline +void atomic_set_acquire(volatile int32* value, int32 new_value) +{ + InterlockedExchangeAcquire((long *) value, new_value); +} + +inline +void atomic_set_acquire(volatile int64* value, int64 new_value) +{ + InterlockedExchangeAcquire((long *) value, (long) new_value); +} + +inline +void atomic_set_acquire(volatile f32* value, f32 new_value) +{ + InterlockedExchangeAcquire((long *) value, (long) new_value); +} + +inline +void atomic_set_acquire(volatile f64* value, f64 new_value) +{ + InterlockedExchangeAcquire((long *) value, (long) new_value); +} + +inline +int32 atomic_fetch_set_acquire(volatile int32* value, int32 new_value) +{ + return (int32) InterlockedExchangeAcquire((long *) value, new_value); +} + +inline +int64 atomic_fetch_set_acquire(volatile int64* value, int64 new_value) +{ + return (int64) InterlockedExchangeAcquire((long *) value, (long) new_value); +} + +inline +void atomic_set_acquire(volatile byte* value, const byte new_value[16]) +{ + int64* value64 = (int64*) value; + const int64* new_value64 = (const int64*) new_value; + + int64 expected_low, expected_high; + + do { + expected_low = value64[0]; + expected_high = value64[1]; + } while ( + !InterlockedCompareExchange128( + (volatile long long *) value, + new_value64[1], + new_value64[0], + &expected_low + ) + ); +} + +inline +int32 atomic_get_acquire(volatile int32* value) +{ + return (int32) InterlockedCompareExchangeAcquire((long *) value, 0, 0); +} + +inline +int64 atomic_get_acquire(volatile int64* value) +{ + return (int64) InterlockedCompareExchangeAcquire((long *) value, 0, 0); +} + +inline +f32 atomic_get_acquire(volatile f32* value) +{ + return (f32) InterlockedCompareExchangeAcquire((long *) value, 0, 0); +} + +inline +f64 atomic_get_acquire(volatile f64* value) +{ + return (f64) InterlockedCompareExchangeAcquire((long *) value, 0, 0); +} + +inline +void atomic_get_acquire(volatile byte* value, byte data[16]) +{ + InterlockedCompareExchange128((volatile long long *) value, 0, 0, (long long *) data); +} + +inline +void atomic_increment_acquire(volatile int32* value) +{ + InterlockedIncrementAcquire((long *) value); +} + +inline +void atomic_decrement_acquire(volatile int32* value) +{ + InterlockedDecrementAcquire((long *) value); +} + +inline +void atomic_increment_acquire(volatile int64* value) +{ + InterlockedIncrementAcquire((long *) value); +} + +inline +void atomic_decrement_acquire(volatile int64* value) +{ + InterlockedDecrementAcquire((long *) value); +} + +inline +void atomic_add_acquire(volatile int32* value, int32 increment) +{ + InterlockedAddAcquire((long *) value, increment); +} + +inline +void atomic_sub_acquire(volatile int32* value, int32 decrement) +{ + InterlockedAddAcquire((long *) value, -decrement); +} + +inline +void atomic_add_acquire(volatile int64* value, int64 increment) +{ + InterlockedAddAcquire((long *) value, (long) increment); +} + +inline +void atomic_sub_acquire(volatile int64* value, int64 decrement) +{ + InterlockedAddAcquire((long *) value, -1 * ((long) decrement)); +} + +inline +f32 atomic_compare_exchange_weak_acquire(volatile f32* value, f32* expected, f32 desired) +{ + return (f32) InterlockedCompareExchangeAcquire((long *) value, (long) desired, (long) *expected); +} + +inline +f64 atomic_compare_exchange_weak_acquire(volatile f64* value, f64* expected, f64 desired) +{ + return (f64) InterlockedCompareExchangeAcquire((long *) value, (long) desired, (long) *expected); +} + +inline +int32 atomic_compare_exchange_weak_acquire(volatile int32* value, int32* expected, int32 desired) +{ + return (int32) InterlockedCompareExchangeAcquire((long *) value, desired, *expected); +} + +inline +int64 atomic_compare_exchange_weak_acquire(volatile int64* value, int64* expected, int64 desired) +{ + return (int64) InterlockedCompareExchangeAcquire((long *) value, (long) desired, (long) *expected); +} + +inline +int32 atomic_fetch_add_acquire(volatile int32* value, int32 operand) +{ + return (int32) InterlockedExchangeAddAcquire((long *) value, operand); +} + +inline +int32 atomic_fetch_sub_acquire(volatile int32* value, int32 operand) +{ + return (int32) InterlockedExchangeSubtract((unsigned long *) value, operand); +} + +inline +int64 atomic_fetch_add_acquire(volatile int64* value, int64 operand) +{ + return (int64) InterlockedExchangeSubtract((unsigned long *) value, operand); +} + +inline +int64 atomic_fetch_sub_acquire(volatile int64* value, int64 operand) +{ + return (int64) InterlockedExchangeSubtract((unsigned long *) value, (long) operand); +} + +inline +void atomic_set_acquire(volatile uint32* value, uint32 new_value) +{ + InterlockedExchangeAcquire((long *) value, new_value); +} + +inline +void atomic_set_acquire(volatile uint64* value, uint64 new_value) +{ + InterlockedExchangeAcquire((long *) value, (long) new_value); +} + +inline +uint32 atomic_fetch_set_acquire(volatile uint32* value, uint32 new_value) +{ + return (uint32) InterlockedExchangeAcquire((long *) value, new_value); +} + +inline +uint64 atomic_fetch_set_acquire(volatile uint64* value, uint64 new_value) +{ + return (uint64) InterlockedExchangeAcquire((long *) value, (long) new_value); +} + +inline +uint32 atomic_get_acquire(volatile uint32* value) +{ + return (uint32) InterlockedCompareExchangeAcquire((long *) value, 0, 0); +} + +inline +uint64 atomic_get_acquire(volatile uint64* value) +{ + return (uint64) InterlockedCompareExchangeAcquire((long *) value, 0, 0); +} + +inline +void atomic_increment_acquire(volatile uint32* value) +{ + InterlockedIncrementAcquire((long *) value); +} + +inline +void atomic_decrement_acquire(volatile uint32* value) +{ + InterlockedDecrementAcquire((long *) value); +} + +inline +void atomic_increment_acquire(volatile uint64* value) +{ + InterlockedIncrementAcquire((long *) value); +} + +inline +void atomic_decrement_acquire(volatile uint64* value) +{ + InterlockedDecrementAcquire((long *) value); +} + +inline +void atomic_add_acquire(volatile uint32* value, uint32 increment) +{ + InterlockedAddAcquire((long *) value, increment); +} + +inline +void atomic_sub_acquire(volatile uint32* value, uint32 decrement) +{ + InterlockedAddAcquire((long *) value, -1 * ((int32) decrement)); +} + +inline +void atomic_add_acquire(volatile uint64* value, uint64 increment) +{ + InterlockedAddAcquire((long *) value, (long) increment); +} + +inline +void atomic_sub_acquire(volatile uint64* value, uint64 decrement) +{ + InterlockedAddAcquire((long *) value, -1 * ((long) decrement)); +} + +inline +uint32 atomic_compare_exchange_weak_acquire(volatile uint32* value, uint32* expected, uint32 desired) +{ + return (uint32) InterlockedCompareExchangeAcquire((long *) value, desired, *expected); +} + +inline +uint64 atomic_compare_exchange_weak_acquire(volatile uint64* value, uint64* expected, uint64 desired) +{ + return (uint64) InterlockedCompareExchangeAcquire((unsigned long long *) value, (unsigned long long) desired, (unsigned long long) *expected); +} + +inline +uint32 atomic_fetch_add_acquire(volatile uint32* value, uint32 operand) +{ + return (uint32) InterlockedExchangeAddAcquire((long *) value, operand); +} + +inline +uint32 atomic_fetch_sub_acquire(volatile uint32* value, uint32 operand) +{ + return (uint32) InterlockedExchangeSubtract((unsigned long *) value, (long) operand); +} + +inline +uint64 atomic_fetch_add_acquire(volatile uint64* value, uint64 operand) +{ + return (uint64) InterlockedExchangeAddAcquire((long *) value, (long) operand); +} + +inline +uint64 atomic_fetch_sub_acquire(volatile uint64* value, uint64 operand) +{ + return (uint64) InterlockedExchangeSubtract((unsigned long *) value, (long) operand); +} + +inline +void atomic_and_acquire(volatile uint32* value, uint32 mask) +{ + InterlockedAndAcquire((volatile LONG *) value, mask); +} + +inline +void atomic_and_acquire(volatile int32* value, int32 mask) +{ + InterlockedAndAcquire((volatile LONG *) value, (LONG)mask); +} + +inline +void atomic_and_acquire(volatile uint64* value, uint64 mask) +{ + InterlockedAnd64Acquire((volatile LONG64 *) value, mask); +} + +inline +void atomic_and_acquire(volatile int64* value, int64 mask) +{ + InterlockedAnd64Acquire((volatile LONG64 *) value, mask); +} + +inline +void atomic_or_acquire(volatile uint32* value, uint32 mask) +{ + InterlockedOrAcquire((volatile LONG *) value, mask); +} + +inline +void atomic_or_acquire(volatile int32* value, int32 mask) +{ + InterlockedOrAcquire((volatile LONG *) value, (LONG)mask); +} + +inline +void atomic_or_acquire(volatile uint64* value, uint64 mask) +{ + InterlockedOr64Acquire((volatile LONG64 *) value, mask); +} + +inline +void atomic_or_acquire(volatile int64* value, int64 mask) +{ + InterlockedOr64Acquire((volatile LONG64 *) value, mask); +} + +inline +void atomic_set_release(void** target, void* new_pointer) +{ + InterlockedExchangePointer(target, new_pointer); +} + +inline +void* atomic_get_release(void** target) +{ + return InterlockedCompareExchangePointerRelease(target, NULL, NULL); +} + +inline +void atomic_set_release(volatile int32* value, int32 new_value) { InterlockedExchange((long *) value, new_value); } inline -void atomic_set(volatile uint64* value, uint64 new_value) +void atomic_set_release(volatile int64* value, int64 new_value) { InterlockedExchange((long *) value, (long) new_value); } inline -uint32 atomic_set_fetch(volatile uint32* value, uint32 new_value) +void atomic_set_release(volatile f32* value, f32 new_value) +{ + InterlockedExchange((long *) value, (long) new_value); +} + +inline +void atomic_set_release(volatile f64* value, f64 new_value) +{ + InterlockedExchange((long *) value, (long) new_value); +} + +inline +int32 atomic_fetch_set_release(volatile int32* value, int32 new_value) +{ + return (int32) InterlockedExchange((long *) value, new_value); +} + +inline +int64 atomic_fetch_set_release(volatile int64* value, int64 new_value) +{ + return (int64) InterlockedExchange((long *) value, (long) new_value); +} + +inline +void atomic_set_release(volatile byte* value, const byte new_value[16]) +{ + int64* value64 = (int64*) value; + const int64* new_value64 = (const int64*) new_value; + + int64 expected_low, expected_high; + + do { + expected_low = value64[0]; + expected_high = value64[1]; + } while ( + !InterlockedCompareExchange128( + (volatile long long *) value, + new_value64[1], + new_value64[0], + &expected_low + ) + ); +} + +inline +int32 atomic_get_release(volatile int32* value) +{ + return (int32) InterlockedCompareExchangeRelease((long *) value, 0, 0); +} + +inline +int64 atomic_get_release(volatile int64* value) +{ + return (int64) InterlockedCompareExchangeRelease((long *) value, 0, 0); +} + +inline +f32 atomic_get_release(volatile f32* value) +{ + return (f32) InterlockedCompareExchangeRelease((long *) value, 0, 0); +} + +inline +f64 atomic_get_release(volatile f64* value) +{ + return (f64) InterlockedCompareExchangeRelease((long *) value, 0, 0); +} + +inline +void atomic_get_release(volatile byte* value, byte data[16]) +{ + InterlockedCompareExchange128((volatile long long *) value, 0, 0, (long long *) data); +} + +inline +void atomic_increment_release(volatile int32* value) +{ + InterlockedIncrementRelease((long *) value); +} + +inline +void atomic_decrement_release(volatile int32* value) +{ + InterlockedDecrementRelease((long *) value); +} + +inline +void atomic_increment_release(volatile int64* value) +{ + InterlockedIncrementRelease((long *) value); +} + +inline +void atomic_decrement_release(volatile int64* value) +{ + InterlockedDecrementRelease((long *) value); +} + +inline +void atomic_add_release(volatile int32* value, int32 increment) +{ + InterlockedAddRelease((long *) value, increment); +} + +inline +void atomic_sub_release(volatile int32* value, int32 decrement) +{ + InterlockedAddRelease((long *) value, -decrement); +} + +inline +void atomic_add_release(volatile int64* value, int64 increment) +{ + InterlockedAddRelease((long *) value, (long) increment); +} + +inline +void atomic_sub_release(volatile int64* value, int64 decrement) +{ + InterlockedAddRelease((long *) value, -1 * ((long) decrement)); +} + +inline +f32 atomic_compare_exchange_weak_release(volatile f32* value, f32* expected, f32 desired) +{ + return (f32) InterlockedCompareExchangeRelease((long *) value, (long) desired, (long) *expected); +} + +inline +f64 atomic_compare_exchange_weak_release(volatile f64* value, f64* expected, f64 desired) +{ + return (f64) InterlockedCompareExchangeRelease((long *) value, (long) desired, (long) *expected); +} + +inline +int32 atomic_compare_exchange_weak_release(volatile int32* value, int32* expected, int32 desired) +{ + return (int32) InterlockedCompareExchangeRelease((long *) value, desired, *expected); +} + +inline +int64 atomic_compare_exchange_weak_release(volatile int64* value, int64* expected, int64 desired) +{ + return (int64) InterlockedCompareExchangeRelease((long *) value, (long) desired, (long) *expected); +} + +inline +int32 atomic_fetch_add_release(volatile int32* value, int32 operand) +{ + return (int32) InterlockedExchangeAddRelease((long *) value, operand); +} + +inline +int32 atomic_fetch_sub_release(volatile int32* value, int32 operand) +{ + return (int32) InterlockedExchangeSubtract((unsigned long *) value, operand); +} + +inline +int64 atomic_fetch_add_release(volatile int64* value, int64 operand) +{ + return (int64) InterlockedExchangeSubtract((unsigned long *) value, operand); +} + +inline +int64 atomic_fetch_sub_release(volatile int64* value, int64 operand) +{ + return (int64) InterlockedExchangeSubtract((unsigned long *) value, (long) operand); +} + +inline +void atomic_set_release(volatile uint32* value, uint32 new_value) +{ + InterlockedExchange((long *) value, new_value); +} + +inline +void atomic_set_release(volatile uint64* value, uint64 new_value) +{ + InterlockedExchange((long *) value, (long) new_value); +} + +inline +uint32 atomic_fetch_set_release(volatile uint32* value, uint32 new_value) { return (uint32) InterlockedExchange((long *) value, new_value); } inline -uint64 atomic_set_fetch(volatile uint64* value, uint64 new_value) +uint64 atomic_fetch_set_release(volatile uint64* value, uint64 new_value) { return (uint64) InterlockedExchange((long *) value, (long) new_value); } inline -uint32 atomic_get(volatile uint32* value) +uint32 atomic_get_release(volatile uint32* value) +{ + return (uint32) InterlockedCompareExchangeRelease((long *) value, 0, 0); +} + +inline +uint64 atomic_get_release(volatile uint64* value) +{ + return (uint64) InterlockedCompareExchangeRelease((long *) value, 0, 0); +} + +inline +void atomic_increment_release(volatile uint32* value) +{ + InterlockedIncrementRelease((long *) value); +} + +inline +void atomic_decrement_release(volatile uint32* value) +{ + InterlockedDecrementRelease((long *) value); +} + +inline +void atomic_increment_release(volatile uint64* value) +{ + InterlockedIncrementRelease((long *) value); +} + +inline +void atomic_decrement_release(volatile uint64* value) +{ + InterlockedDecrementRelease((long *) value); +} + +inline +void atomic_add_release(volatile uint32* value, uint32 increment) +{ + InterlockedAddRelease((long *) value, increment); +} + +inline +void atomic_sub_release(volatile uint32* value, uint32 decrement) +{ + InterlockedAddRelease((long *) value, -1 * ((int32) decrement)); +} + +inline +void atomic_add_release(volatile uint64* value, uint64 increment) +{ + InterlockedAddRelease((long *) value, (long) increment); +} + +inline +void atomic_sub_release(volatile uint64* value, uint64 decrement) +{ + InterlockedAddRelease((long *) value, -1 * ((long) decrement)); +} + +inline +uint32 atomic_compare_exchange_weak_release(volatile uint32* value, uint32* expected, uint32 desired) +{ + return (uint32) InterlockedCompareExchangeRelease((long *) value, desired, *expected); +} + +inline +uint64 atomic_compare_exchange_weak_release(volatile uint64* value, uint64* expected, uint64 desired) +{ + return (uint64) InterlockedCompareExchangeRelease((unsigned long long *) value, (unsigned long long) desired, (unsigned long long) *expected); +} + +inline +uint32 atomic_fetch_add_release(volatile uint32* value, uint32 operand) +{ + return (uint32) InterlockedExchangeAddRelease((long *) value, operand); +} + +inline +uint32 atomic_fetch_sub_release(volatile uint32* value, uint32 operand) +{ + return (uint32) InterlockedExchangeSubtract((unsigned long *) value, (long) operand); +} + +inline +uint64 atomic_fetch_add_release(volatile uint64* value, uint64 operand) +{ + return (uint64) InterlockedExchangeAddRelease((long *) value, (long) operand); +} + +inline +uint64 atomic_fetch_sub_release(volatile uint64* value, uint64 operand) +{ + return (uint64) InterlockedExchangeSubtract((unsigned long *) value, (long) operand); +} + +inline +void atomic_and_release(volatile uint32* value, uint32 mask) +{ + InterlockedAndRelease((volatile LONG *) value, mask); +} + +inline +void atomic_and_release(volatile int32* value, int32 mask) +{ + InterlockedAndRelease((volatile LONG *) value, (LONG)mask); +} + +inline +void atomic_and_release(volatile uint64* value, uint64 mask) +{ + InterlockedAnd64Release((volatile LONG64 *) value, mask); +} + +inline +void atomic_and_release(volatile int64* value, int64 mask) +{ + InterlockedAnd64Release((volatile LONG64 *) value, mask); +} + +inline +void atomic_or_release(volatile uint32* value, uint32 mask) +{ + InterlockedOrRelease((volatile LONG *) value, mask); +} + +inline +void atomic_or_release(volatile int32* value, int32 mask) +{ + InterlockedOrRelease((volatile LONG *) value, (LONG)mask); +} + +inline +void atomic_or_release(volatile uint64* value, uint64 mask) +{ + InterlockedOr64Release((volatile LONG64 *) value, mask); +} + +inline +void atomic_or_release(volatile int64* value, int64 mask) +{ + InterlockedOr64Release((volatile LONG64 *) value, mask); +} + +inline +void atomic_set_acquire_release(void** target, void* new_pointer) +{ + InterlockedExchangePointer(target, new_pointer); +} + +inline +void* atomic_get_acquire_release(void** target) +{ + return InterlockedCompareExchangePointer(target, NULL, NULL); +} + +inline +void atomic_set_acquire_release(volatile int32* value, int32 new_value) +{ + InterlockedExchange((long *) value, new_value); +} + +inline +void atomic_set_acquire_release(volatile int64* value, int64 new_value) +{ + InterlockedExchange((long *) value, (long) new_value); +} + +inline +void atomic_set_acquire_release(volatile f32* value, f32 new_value) +{ + InterlockedExchange((long *) value, (long) new_value); +} + +inline +void atomic_set_acquire_release(volatile f64* value, f64 new_value) +{ + InterlockedExchange((long *) value, (long) new_value); +} + +inline +int32 atomic_fetch_set_acquire_release(volatile int32* value, int32 new_value) +{ + return (int32) InterlockedExchange((long *) value, new_value); +} + +inline +int64 atomic_fetch_set_acquire_release(volatile int64* value, int64 new_value) +{ + return (int64) InterlockedExchange((long *) value, (long) new_value); +} + +inline +void atomic_set_acquire_release(volatile byte* value, const byte new_value[16]) +{ + int64* value64 = (int64*) value; + const int64* new_value64 = (const int64*) new_value; + + int64 expected_low, expected_high; + + do { + expected_low = value64[0]; + expected_high = value64[1]; + } while ( + !InterlockedCompareExchange128( + (volatile long long *) value, + new_value64[1], + new_value64[0], + &expected_low + ) + ); +} + +inline +int32 atomic_get_acquire_release(volatile int32* value) +{ + return (int32) InterlockedCompareExchange((long *) value, 0, 0); +} + +inline +int64 atomic_get_acquire_release(volatile int64* value) +{ + return (int64) InterlockedCompareExchange((long *) value, 0, 0); +} + +inline +f32 atomic_get_acquire_release(volatile f32* value) +{ + return (f32) InterlockedCompareExchange((long *) value, 0, 0); +} + +inline +f64 atomic_get_acquire_release(volatile f64* value) +{ + return (f64) InterlockedCompareExchange((long *) value, 0, 0); +} + +inline +void atomic_get_acquire_release(volatile byte* value, byte data[16]) +{ + InterlockedCompareExchange128((volatile long long *) value, 0, 0, (long long *) data); +} + +inline +void atomic_increment_acquire_release(volatile int32* value) +{ + InterlockedIncrement((long *) value); +} + +inline +void atomic_decrement_acquire_release(volatile int32* value) +{ + InterlockedDecrement((long *) value); +} + +inline +void atomic_increment_acquire_release(volatile int64* value) +{ + InterlockedIncrement((long *) value); +} + +inline +void atomic_decrement_acquire_release(volatile int64* value) +{ + InterlockedDecrement((long *) value); +} + +inline +void atomic_add_acquire_release(volatile int32* value, int32 increment) +{ + InterlockedAdd((long *) value, increment); +} + +inline +void atomic_sub_acquire_release(volatile int32* value, int32 decrement) +{ + InterlockedAdd((long *) value, -decrement); +} + +inline +void atomic_add_acquire_release(volatile int64* value, int64 increment) +{ + InterlockedAdd((long *) value, (long) increment); +} + +inline +void atomic_sub_acquire_release(volatile int64* value, int64 decrement) +{ + InterlockedAdd((long *) value, -1 * ((long) decrement)); +} + +inline +f32 atomic_compare_exchange_weak_acquire_release(volatile f32* value, f32* expected, f32 desired) +{ + return (f32) InterlockedCompareExchange((long *) value, (long) desired, (long) *expected); +} + +inline +f64 atomic_compare_exchange_weak_acquire_release(volatile f64* value, f64* expected, f64 desired) +{ + return (f64) InterlockedCompareExchange((long *) value, (long) desired, (long) *expected); +} + +inline +int32 atomic_compare_exchange_weak_acquire_release(volatile int32* value, int32* expected, int32 desired) +{ + return (int32) InterlockedCompareExchange((long *) value, desired, *expected); +} + +inline +int64 atomic_compare_exchange_weak_acquire_release(volatile int64* value, int64* expected, int64 desired) +{ + return (int64) InterlockedCompareExchange((long *) value, (long) desired, (long) *expected); +} + +inline +int32 atomic_fetch_add_acquire_release(volatile int32* value, int32 operand) +{ + return (int32) InterlockedExchangeAdd((long *) value, operand); +} + +inline +int32 atomic_fetch_sub_acquire_release(volatile int32* value, int32 operand) +{ + int32 ret = (int32) InterlockedExchangeSubtract((unsigned long *) value, operand); + + return ret; +} + +inline +int64 atomic_fetch_add_acquire_release(volatile int64* value, int64 operand) +{ + int64 ret = (int64) InterlockedExchangeSubtract((unsigned long *) value, operand); + + return ret; +} + +inline +int64 atomic_fetch_sub_acquire_release(volatile int64* value, int64 operand) +{ + int64 ret = (int64) InterlockedExchangeSubtract((unsigned long *) value, (long) operand); + + return ret; +} + +inline +void atomic_set_acquire_release(volatile uint32* value, uint32 new_value) +{ + InterlockedExchange((long *) value, new_value); +} + +inline +void atomic_set_acquire_release(volatile uint64* value, uint64 new_value) +{ + InterlockedExchange((long *) value, (long) new_value); +} + +inline +uint32 atomic_fetch_set_acquire_release(volatile uint32* value, uint32 new_value) +{ + return (uint32) InterlockedExchange((long *) value, new_value); +} + +inline +uint64 atomic_fetch_set_acquire_release(volatile uint64* value, uint64 new_value) +{ + return (uint64) InterlockedExchange((long *) value, (long) new_value); +} + +inline +uint32 atomic_get_acquire_release(volatile uint32* value) { return (uint32) InterlockedCompareExchange((long *) value, 0, 0); } inline -uint64 atomic_get(volatile uint64* value) +uint64 atomic_get_acquire_release(volatile uint64* value) { return (uint64) InterlockedCompareExchange((long *) value, 0, 0); } inline -void atomic_increment(volatile uint32* value) { +void atomic_increment_acquire_release(volatile uint32* value) +{ InterlockedIncrement((long *) value); } inline -void atomic_decrement(volatile uint32* value) { +void atomic_decrement_acquire_release(volatile uint32* value) +{ InterlockedDecrement((long *) value); } inline -void atomic_increment(volatile uint64* value) { +void atomic_increment_acquire_release(volatile uint64* value) +{ InterlockedIncrement((long *) value); } inline -void atomic_decrement(volatile uint64* value) { +void atomic_decrement_acquire_release(volatile uint64* value) +{ InterlockedDecrement((long *) value); } inline -void atomic_add(volatile uint32* value, uint32 increment) { +void atomic_add_acquire_release(volatile uint32* value, uint32 increment) +{ InterlockedAdd((long *) value, increment); } inline -void atomic_sub(volatile uint32* value, uint32 decrement) { +void atomic_sub_acquire_release(volatile uint32* value, uint32 decrement) +{ InterlockedAdd((long *) value, -1 * ((int32) decrement)); } inline -void atomic_add(volatile uint64* value, uint64 increment) { +void atomic_add_acquire_release(volatile uint64* value, uint64 increment) +{ InterlockedAdd((long *) value, (long) increment); } inline -void atomic_sub(volatile uint64* value, uint64 decrement) { +void atomic_sub_acquire_release(volatile uint64* value, uint64 decrement) +{ InterlockedAdd((long *) value, -1 * ((long) decrement)); } inline -uint32 atomic_compare_exchange_weak(volatile uint32* value, uint32* expected, uint32 desired) { +uint32 atomic_compare_exchange_weak_acquire_release(volatile uint32* value, uint32* expected, uint32 desired) +{ return (uint32) InterlockedCompareExchange((long *) value, desired, *expected); } inline -uint64 atomic_compare_exchange_weak(volatile uint64* value, uint64* expected, uint64 desired) { +uint64 atomic_compare_exchange_weak_acquire_release(volatile uint64* value, uint64* expected, uint64 desired) +{ return (uint64) InterlockedCompareExchange((unsigned long long *) value, (unsigned long long) desired, (unsigned long long) *expected); } inline -uint32 atomic_fetch_add(volatile uint32* value, uint32 operand) { +uint32 atomic_fetch_add_acquire_release(volatile uint32* value, uint32 operand) +{ return (uint32) InterlockedExchangeAdd((long *) value, operand); } inline -uint32 atomic_fetch_sub(volatile uint32* value, uint32 operand) { - return (uint32) InterlockedExchangeSubtract((unsigned long *) value, operand); +uint32 atomic_fetch_sub_acquire_release(volatile uint32* value, uint32 operand) +{ + uint32 ret = (uint32) InterlockedExchangeSubtract((unsigned long *) value, (long) operand); + + return ret; } inline -uint64 atomic_fetch_add(volatile uint64* value, uint64 operand) { +uint64 atomic_fetch_add_acquire_release(volatile uint64* value, uint64 operand) +{ return (uint64) InterlockedExchangeAdd((long *) value, (long) operand); } inline -uint64 atomic_fetch_sub(volatile uint64* value, uint64 operand) { - return (uint64) InterlockedExchangeSubtract((unsigned long *) value, (long) operand); +uint64 atomic_fetch_sub_acquire_release(volatile uint64* value, uint64 operand) +{ + uint64 ret = (uint64) InterlockedExchangeSubtract((unsigned long *) value, (long) operand); + + return ret; } inline -void atomic_and(volatile uint32* value, uint32 mask) { +void atomic_and_acquire_release(volatile uint32* value, uint32 mask) +{ InterlockedAnd((volatile LONG *) value, mask); } inline -void atomic_and(volatile int32* value, int32 mask) { +void atomic_and_acquire_release(volatile int32* value, int32 mask) +{ InterlockedAnd((volatile LONG *) value, (LONG)mask); } inline -void atomic_and(volatile uint64* value, uint64 mask) { +void atomic_and_acquire_release(volatile uint64* value, uint64 mask) +{ InterlockedAnd64((volatile LONG64 *) value, mask); } inline -void atomic_and(volatile int64* value, int64 mask) { +void atomic_and_acquire_release(volatile int64* value, int64 mask) +{ InterlockedAnd64((volatile LONG64 *) value, mask); } inline -void atomic_or(volatile uint32* value, uint32 mask) { +void atomic_or_acquire_release(volatile uint32* value, uint32 mask) +{ InterlockedOr((volatile LONG *) value, mask); } inline -void atomic_or(volatile int32* value, int32 mask) { +void atomic_or_acquire_release(volatile int32* value, int32 mask) +{ InterlockedOr((volatile LONG *) value, (LONG)mask); } inline -void atomic_or(volatile uint64* value, uint64 mask) { +void atomic_or_acquire_release(volatile uint64* value, uint64 mask) +{ InterlockedOr64((volatile LONG64 *) value, mask); } inline -void atomic_or(volatile int64* value, int64 mask) { +void atomic_or_acquire_release(volatile int64* value, int64 mask) +{ InterlockedOr64((volatile LONG64 *) value, mask); } +// Check out the intrinsic functions fence_memory and fence_write +// These are much faster and could accomplish what you are doing +inline +void atomic_fence_acquire() +{ + MemoryBarrier(); +} + +// Check out the intrinsic functions fence_memory and fence_write +// These are much faster and could accomplish what you are doing +inline +void atomic_fence_release() +{ + MemoryBarrier(); +} + #endif \ No newline at end of file diff --git a/stdlib/HashMap.h b/stdlib/HashMap.h index e6133bc..e7ca2aa 100644 --- a/stdlib/HashMap.h +++ b/stdlib/HashMap.h @@ -72,10 +72,6 @@ struct HashMap { ChunkMemory buf; }; -// @performance Implement more like gperf, our implementation is slow. However, keep it around since it is very general purpose -// Alternatively, also create a version that creates perfect hashes (input requires a hash function and a seed for that hash function) -// Both would be saved in the hash impl. - // WARNING: element_size = element size + remaining HashEntry data size void hashmap_create(HashMap* hm, int32 count, int32 element_size, RingMemory* ring) { @@ -132,6 +128,7 @@ void hashmap_insert(HashMap* hm, const char* key, int32 value) { HashEntryInt32* entry = (HashEntryInt32 *) chunk_get_element(&hm->buf, element, true); entry->element_id = element; + // @performance Do we really need strncpy? Either use memcpy or strcpy?! Same goes for all the other cases below strncpy(entry->key, key, HASH_MAP_MAX_KEY_LENGTH); entry->key[HASH_MAP_MAX_KEY_LENGTH - 1] = '\0'; @@ -364,7 +361,7 @@ inline int64 hashmap_dump(const HashMap* hm, byte* data) { *((uint64 *) data) = SWAP_ENDIAN_LITTLE(hm->buf.count); - data += sizeof(uint64); + data += sizeof(hm->buf.count); // Dump the table content where the elements are relative indices/pointers for (int32 i = 0; i < hm->buf.count; ++i) { diff --git a/stdlib/Intrinsics.h b/stdlib/Intrinsics.h index add41ed..55cf99b 100644 --- a/stdlib/Intrinsics.h +++ b/stdlib/Intrinsics.h @@ -52,17 +52,38 @@ inline f32 oms_floor(f32 a) { return _mm_cvtss_f32(_mm_floor_ss(_mm_setzero_ps() inline f32 oms_ceil(f32 a) { return _mm_cvtss_f32(_mm_ceil_ss(_mm_setzero_ps(), _mm_set_ss(a))); } -inline uint32 hash(uint64 a, uint64 b = 0) +inline uint32 oms_hash(uint64 a, uint64 b = 0) { uint8 seed[16] = { 0xaa, 0x9b, 0xbd, 0xb8, 0xa1, 0x98, 0xac, 0x3f, 0x1f, 0x94, 0x07, 0xb3, 0x8c, 0x27, 0x93, 0x69, }; __m128i hash = _mm_set_epi64x(a, b); - hash = _mm_aesdec_si128(hash, _mm_loadu_si128((__m128i *) seed)); - hash = _mm_aesdec_si128(hash, _mm_loadu_si128((__m128i *) seed)); + hash = _mm_aesdec_si128(hash, _mm_loadu_si128((__m128i *) seed)); + hash = _mm_aesdec_si128(hash, _mm_loadu_si128((__m128i *) seed)); return _mm_extract_epi32(hash, 0); } +inline void oms_fence_memory() +{ + _mm_mfence(); +} + +inline void oms_fence_write() +{ + _mm_sfence(); +} + +inline void oms_fence_load() +{ + _mm_lfence(); +} + +inline +void oms_invalidate_cache(void* address) +{ + _mm_clflush(address); +} + #endif \ No newline at end of file diff --git a/stdlib/IntrinsicsArm.h b/stdlib/IntrinsicsArm.h index da6c1a7..c88d18b 100644 --- a/stdlib/IntrinsicsArm.h +++ b/stdlib/IntrinsicsArm.h @@ -10,6 +10,7 @@ #define TOS_STDLIB_INTRINSICS_ARM_H #include +#include inline float oms_sqrt(float a) { svfloat32_t input = svdup_f32(a); @@ -67,4 +68,25 @@ inline float oms_ceil(float a) { return svget1_f32(result); } +inline void oms_fence_memory() +{ + __dmb(0xF); +} + +inline void oms_fence_write() +{ + __dmb(0xB); +} + +inline void oms_fence_load() +{ + __dmb(0x7); +} + +inline +void oms_invalidate_cache(void* address) +{ + asm volatile("dc ivac, %0" : : "r"(address) : "memory"); +} + #endif \ No newline at end of file diff --git a/stdlib/PerfectHashMap.h b/stdlib/PerfectHashMap.h index e69de29..1d82664 100644 --- a/stdlib/PerfectHashMap.h +++ b/stdlib/PerfectHashMap.h @@ -0,0 +1,363 @@ +/** + * Jingga + * + * @copyright Jingga + * @license OMS License 2.0 + * @version 1.0.0 + * @link https://jingga.app + */ +#ifndef TOS_STDLIB_PERFECT_HASH_MAP_H +#define TOS_STDLIB_PERFECT_HASH_MAP_H + +#include "Types.h" +#include "HashMap.h" +#include "../hash/GeneralHash.h" +#include "../memory/RingMemory.h" + +#define PERFECT_HASH_MAP_MAX_KEY_LENGTH 32 +typedef uint64 (*perfect_hash_function)(const char* key, int32 seed); + +const perfect_hash_function PERFECT_HASH_FUNCTIONS[] = { + hash_djb2_seeded, + hash_sdbm_seeded, + hash_lose_lose_seeded, + hash_polynomial_rolling_seeded, + hash_fnv1a_seeded, + hash_oat_seeded, + hash_ejb_seeded +}; + +struct PerfectHashEntryInt32 { + int64 element_id; + char key[PERFECT_HASH_MAP_MAX_KEY_LENGTH]; + int32 value; +}; + +struct PerfectHashEntryInt64 { + int64 element_id; + char key[PERFECT_HASH_MAP_MAX_KEY_LENGTH]; + int64 value; +}; + +struct PerfectHashEntryUIntPtr { + int64 element_id; + char key[PERFECT_HASH_MAP_MAX_KEY_LENGTH]; + uintptr_t value; +}; + +struct PerfectHashEntryVoidP { + int64 element_id; + char key[PERFECT_HASH_MAP_MAX_KEY_LENGTH]; + void* value; +}; + +struct PerfectHashEntryFloat { + int64 element_id; + char key[PERFECT_HASH_MAP_MAX_KEY_LENGTH]; + f32 value; +}; + +struct PerfectHashEntryStr { + int64 element_id; + char key[PERFECT_HASH_MAP_MAX_KEY_LENGTH]; + char value[PERFECT_HASH_MAP_MAX_KEY_LENGTH]; +}; + +struct PerfectHashEntry { + int64 element_id; + char key[PERFECT_HASH_MAP_MAX_KEY_LENGTH]; + byte* value; +}; + +struct PerfectHashMap { + int32 hash_seed; + perfect_hash_function hash_function; + int32 entry_size; + + int32 map_size; + byte* hash_entries; +}; + +bool set_perfect_hashmap(PerfectHashMap* hm, const char** keys, int32 key_count, perfect_hash_function hash_func, int32 seed_tries, RingMemory* ring) +{ + int32* indices = (int32 *) ring_get_memory(ring, hm->map_size * sizeof(int32), 4); + bool is_unique = false; + + int32 seed; + int32 c = 0; + + while (!is_unique && c < seed_tries) { + is_unique = true; + seed = rand(); + memset(indices, 0, hm->map_size * sizeof(int32)); + + for (int32 j = 0; j < key_count; ++j) { + int32 index = hash_func(keys[j], seed) % hm->map_size; + if (indices[index]) { + is_unique = false; + break; + } else { + indices[index] = 1; + } + } + + ++c; + } + + if (is_unique) { + hm->hash_seed = seed; + hm->hash_function = hash_func; + } + + return is_unique; +} + +bool perfect_hashmap_find_perfect_hash(PerfectHashMap* hm, const char** keys, int32 key_count, int32 seed_trys, RingMemory* ring) +{ + int32* indices = (int32 *) ring_get_memory(ring, hm->map_size * sizeof(int32), 4); + bool is_unique = false; + + for (int32 i = 0; i < ARRAY_COUNT(PERFECT_HASH_FUNCTIONS); ++i) { + int32 seed; + int32 c = 0; + + while (!is_unique && c < seed_trys) { + is_unique = true; + seed = rand(); + memset(indices, 0, hm->map_size * sizeof(int32)); + + for (int32 j = 0; j < key_count; ++j) { + int32 index = (PERFECT_HASH_FUNCTIONS[i])(keys[j], seed) % hm->map_size; + if (indices[index]) { + is_unique = false; + break; + } else { + indices[index] = 1; + } + } + + ++c; + } + + if (is_unique) { + hm->hash_seed = seed; + hm->hash_function = PERFECT_HASH_FUNCTIONS[i]; + } + } + + return is_unique; +} + +// WARNING: element_size = element size + remaining HashEntry data size +void perfect_hashmap_create(PerfectHashMap* hm, int32 count, int32 element_size, RingMemory* ring) +{ + hm->map_size = count; + hm->entry_size = element_size; + hm->hash_entries = ring_get_memory( + ring, + count * element_size, + 0, true + ); +} + +// WARNING: element_size = element size + remaining HashEntry data size +void perfect_hashmap_create(PerfectHashMap* hm, int32 count, int32 element_size, BufferMemory* buf) +{ + hm->map_size = count; + hm->entry_size = element_size; + hm->hash_entries = buffer_get_memory( + buf, + count * element_size, + 0, true + ); +} + +// WARNING: element_size = element size + remaining HashEntry data size +void perfect_hashmap_create(PerfectHashMap* hm, int32 count, int32 element_size, byte* buf) +{ + hm->map_size = count; + hm->entry_size = element_size; + hm->hash_entries = buf; +} + +// Calculates how large a hashmap will be +inline +int64 perfect_hashmap_size(int count, int32 element_size) +{ + return count * element_size; +} + +inline +int64 perfect_hashmap_size(const PerfectHashMap* hm) +{ + return hm->entry_size * hm->map_size; +} + +inline +void perfect_hashmap_insert(PerfectHashMap* hm, const char* key, int32 value) { + int32 index = hm->hash_function(key, hm->hash_seed) % hm->map_size; + PerfectHashEntryInt32* entry = (PerfectHashEntryInt32 *) (hm->hash_entries + hm->entry_size * index); + entry->element_id = index; + strcpy(entry->key, key); + entry->value = value; +} + +inline +void perfect_hashmap_insert(PerfectHashMap* hm, const char* key, int64 value) { + int32 index = hm->hash_function(key, hm->hash_seed) % hm->map_size; + PerfectHashEntryInt64* entry = (PerfectHashEntryInt64 *) (hm->hash_entries + hm->entry_size * index); + entry->element_id = index; + strcpy(entry->key, key); + entry->value = value; +} + +inline +void perfect_hashmap_insert(PerfectHashMap* hm, const char* key, uintptr_t value) { + int32 index = hm->hash_function(key, hm->hash_seed) % hm->map_size; + PerfectHashEntryUIntPtr* entry = (PerfectHashEntryUIntPtr *) (hm->hash_entries + hm->entry_size * index); + entry->element_id = index; + strcpy(entry->key, key); + entry->value = value; +} + +inline +void perfect_hashmap_insert(PerfectHashMap* hm, const char* key, void* value) { + int32 index = hm->hash_function(key, hm->hash_seed) % hm->map_size; + PerfectHashEntryVoidP* entry = (PerfectHashEntryVoidP *) (hm->hash_entries + hm->entry_size * index); + entry->element_id = index; + strcpy(entry->key, key); + entry->value = value; +} + +inline +void perfect_hashmap_insert(PerfectHashMap* hm, const char* key, f32 value) { + int32 index = hm->hash_function(key, hm->hash_seed) % hm->map_size; + PerfectHashEntryFloat* entry = (PerfectHashEntryFloat *) (hm->hash_entries + hm->entry_size * index); + entry->element_id = index; + strcpy(entry->key, key); + entry->value = value; +} + +inline +void perfect_hashmap_insert(PerfectHashMap* hm, const char* key, const char* value) { + int32 index = hm->hash_function(key, hm->hash_seed) % hm->map_size; + PerfectHashEntryStr* entry = (PerfectHashEntryStr *) (hm->hash_entries + hm->entry_size * index); + entry->element_id = index; + strcpy(entry->key, key); + memcpy(entry->value, value, PERFECT_HASH_MAP_MAX_KEY_LENGTH); +} + +inline +void perfect_hashmap_insert(PerfectHashMap* hm, const char* key, byte* value) { + int32 index = hm->hash_function(key, hm->hash_seed) % hm->map_size; + PerfectHashEntryStr* entry = (PerfectHashEntryStr *) (hm->hash_entries + hm->entry_size * index); + entry->element_id = index; + strcpy(entry->key, key); + memcpy(entry->value, value, hm->entry_size - sizeof(PerfectHashEntry)); +} + +inline +PerfectHashEntry* perfect_hashmap_get_entry(const PerfectHashMap* hm, const char* key) { + int32 index = hm->hash_function(key, hm->hash_seed) % hm->map_size; + PerfectHashEntry* entry = (PerfectHashEntry *) (hm->hash_entries + hm->entry_size * index); + + return *entry->key == '\0' ? NULL : entry; +} + +inline +void perfect_hashmap_delete_entry(PerfectHashMap* hm, const char* key) { + int32 index = hm->hash_function(key, hm->hash_seed) % hm->map_size; + PerfectHashEntry* entry = (PerfectHashEntry *) (hm->hash_entries + hm->entry_size * index); + + // This depends on where we check if an element exists (if we change perfect_hashmap_get_entry this also needs changing) + *entry->key = '\0'; +} + +inline +int64 perfect_hashmap_dump(const PerfectHashMap* hm, byte* data) +{ + byte* start = data; + + *((int32 *) data) = SWAP_ENDIAN_LITTLE(hm->map_size); + data += sizeof(hm->map_size); + + *((int32 *) data) = SWAP_ENDIAN_LITTLE(hm->hash_seed); + data += sizeof(hm->hash_seed); + + for (int32 i = 0; i < ARRAY_COUNT(PERFECT_HASH_FUNCTIONS); ++i) { + if (hm->hash_function == PERFECT_HASH_FUNCTIONS[i]) { + *((int32 *) data) = SWAP_ENDIAN_LITTLE((uint64) i); + data += sizeof(i); + + break; + } + } + + *((int32 *) data) = SWAP_ENDIAN_LITTLE(hm->entry_size); + data += sizeof(hm->entry_size); + + memcpy(data, hm->hash_entries, hm->map_size * hm->entry_size); + data += hm->map_size * hm->entry_size; + + return (int64) (data - start); +} + +// WARNING: Requires perfect_hashmap_create first +inline +int64 perfect_hashmap_load(PerfectHashMap* hm, const byte* data) +{ + const byte* start = data; + + hm->map_size = SWAP_ENDIAN_LITTLE(*((int32 *) data)); + data += sizeof(hm->map_size); + + hm->hash_seed = SWAP_ENDIAN_LITTLE(*((int32 *) data)); + data += sizeof(hm->hash_seed); + + hm->hash_function = PERFECT_HASH_FUNCTIONS[*((int32 *) data)]; + data += sizeof(int32); + + hm->entry_size = SWAP_ENDIAN_LITTLE(*((int32 *) data)); + data += sizeof(hm->entry_size); + + memcpy(hm->hash_entries, data, hm->map_size * hm->entry_size); + data += hm->map_size * hm->entry_size; + + return (int64) (data - start); +} + +// WARNiNG: Requires the phm to be initialized already incl. element count and element size etc. +inline +bool perfect_hashmap_from_hashmap(PerfectHashMap* phm, const HashMap* hm, int32 seed_trys, RingMemory* ring) +{ + char** keys = (char **) ring_get_memory(ring, sizeof(char *) * hm->buf.count, 8); + + // Find all keys + int32 key_index = 0; + for (int32 i = 0; i < hm->buf.count; ++i) { + HashEntry* entry = (HashEntry *) hm->table[i]; + while (entry != NULL) { + keys[key_index++] = entry->key; + entry = (HashEntry *) entry->next; + } + } + + // Check if we can turn it into a perfect hash map + bool is_perfect = perfect_hashmap_find_perfect_hash(phm, (char **) keys, key_index, seed_trys, ring); + if (!is_perfect) { + return false; + } + + // Fill perfect hash map + for (int32 i = 0; i < hm->buf.count; ++i) { + HashEntry* entry = (HashEntry *) hm->table[i]; + while (entry != NULL) { + perfect_hashmap_insert(phm, entry->key, entry->value); + entry = (HashEntry *) entry->next; + } + } + + return true; +} + +#endif \ No newline at end of file diff --git a/thread/Thread.h b/thread/Thread.h index 4f548e6..fdd5227 100644 --- a/thread/Thread.h +++ b/thread/Thread.h @@ -32,7 +32,7 @@ void thread_create(Worker* worker, ThreadJobFunc routine, void* arg) void thread_stop(Worker* worker) { - atomic_set(&worker->state, 0); + atomic_set_acquire(&worker->state, 0); pthread_join(worker->thread, NULL); } diff --git a/thread/ThreadPool.h b/thread/ThreadPool.h index 7336bd2..a38b6c5 100644 --- a/thread/ThreadPool.h +++ b/thread/ThreadPool.h @@ -60,32 +60,32 @@ static THREAD_RETURN thread_pool_worker(void* arg) break; } - work = (PoolWorker *) queue_dequeue_keep(&pool->work_queue, sizeof(PoolWorker), 64); + work = (PoolWorker *) queue_dequeue_keep(&pool->work_queue); pthread_mutex_unlock(&pool->work_mutex); if (!work) { continue; } - atomic_increment(&pool->working_cnt); - atomic_set(&work->state, 2); + atomic_increment_relaxed(&pool->working_cnt); + atomic_set_release(&work->state, 2); work->func(work); - atomic_set(&work->state, 1); + atomic_set_release(&work->state, 1); // Job gets marked after completion -> can be overwritten now - if (atomic_get(&work->id) == -1) { - atomic_set(&work->id, 0); + if (atomic_get_relaxed(&work->id) == -1) { + atomic_set_release(&work->id, 0); } - atomic_decrement(&pool->working_cnt); + atomic_decrement_relaxed(&pool->working_cnt); - if (atomic_get(&pool->state) == 0 && atomic_get(&pool->working_cnt) == 0) { + if (atomic_get_relaxed(&pool->state) == 0 && atomic_get_relaxed(&pool->working_cnt) == 0) { pthread_cond_signal(&pool->working_cond); } } pthread_cond_signal(&pool->working_cond); - atomic_decrement(&pool->thread_cnt); + atomic_decrement_relaxed(&pool->thread_cnt); return NULL; } @@ -121,10 +121,10 @@ void thread_pool_wait(ThreadPool* pool) void thread_pool_destroy(ThreadPool* pool) { // This sets the queue to empty - atomic_set((void **) &pool->work_queue.tail, (void **) &pool->work_queue.head); + atomic_set_acquire((void **) &pool->work_queue.tail, (void **) &pool->work_queue.head); // This sets the state to "shutdown" - atomic_set(&pool->state, 1); + atomic_set_release(&pool->state, 1); pthread_cond_broadcast(&pool->work_cond); thread_pool_wait(pool); @@ -137,8 +137,8 @@ void thread_pool_destroy(ThreadPool* pool) PoolWorker* thread_pool_add_work(ThreadPool* pool, const PoolWorker* job) { pthread_mutex_lock(&pool->work_mutex); - PoolWorker* temp_job = (PoolWorker *) ring_get_memory_nomove(&pool->work_queue, sizeof(PoolWorker), 64); - if (atomic_get(&temp_job->id) > 0) { + PoolWorker* temp_job = (PoolWorker *) ring_get_memory_nomove((RingMemory *) &pool->work_queue, sizeof(PoolWorker), 64); + if (atomic_get_relaxed(&temp_job->id) > 0) { pthread_mutex_unlock(&pool->work_mutex); ASSERT_SIMPLE(temp_job->id == 0); @@ -146,10 +146,10 @@ PoolWorker* thread_pool_add_work(ThreadPool* pool, const PoolWorker* job) } memcpy(temp_job, job, sizeof(PoolWorker)); - ring_move_pointer(&pool->work_queue, &pool->work_queue.head, sizeof(PoolWorker), 64); + ring_move_pointer((RingMemory *) &pool->work_queue, &pool->work_queue.head, sizeof(PoolWorker), 64); if (temp_job->id == 0) { - temp_job->id = atomic_fetch_add(&pool->id_counter, 1); + temp_job->id = atomic_fetch_add_acquire(&pool->id_counter, 1); } pthread_cond_broadcast(&pool->work_cond); @@ -164,8 +164,8 @@ PoolWorker* thread_pool_add_work_start(ThreadPool* pool) { pthread_mutex_lock(&pool->work_mutex); - PoolWorker* temp_job = (PoolWorker *) queue_enqueue_start(&pool->work_queue, sizeof(PoolWorker), 64); - if (atomic_get(&temp_job->id) > 0) { + PoolWorker* temp_job = (PoolWorker *) queue_enqueue_start(&pool->work_queue); + if (atomic_get_relaxed(&temp_job->id) > 0) { pthread_mutex_unlock(&pool->work_mutex); ASSERT_SIMPLE(temp_job->id == 0); @@ -174,7 +174,7 @@ PoolWorker* thread_pool_add_work_start(ThreadPool* pool) if (temp_job->id == 0) { // +1 because otherwise the very first job would be id = 0 which is not a valid id - temp_job->id = atomic_fetch_add(&pool->id_counter, 1) + 1; + temp_job->id = atomic_fetch_add_acquire(&pool->id_counter, 1) + 1; } return temp_job; @@ -182,7 +182,7 @@ PoolWorker* thread_pool_add_work_start(ThreadPool* pool) void thread_pool_add_work_end(ThreadPool* pool) { - queue_enqueue_end(&pool->work_queue, sizeof(PoolWorker), 64); + queue_enqueue_end(&pool->work_queue); pthread_cond_broadcast(&pool->work_cond); pthread_mutex_unlock(&pool->work_mutex); } diff --git a/ui/UITheme.h b/ui/UITheme.h index 54a566c..2c67d15 100644 --- a/ui/UITheme.h +++ b/ui/UITheme.h @@ -31,6 +31,7 @@ struct UIThemeStyle { // A theme may have N named styles // The hashmap contains the offset where the respective style can be found + // @performance Switch to perfect hash map HashMap hash_map; };