Started to do a massive thread improvement journey. AMS still outstanding also handling multiple AssetArchives is still a open todo, same goes for multiple gpuapis

This commit is contained in:
Dennis Eichhorn 2024-12-18 07:43:11 +01:00
parent 8c034c4a26
commit fa9fcb6394
25 changed files with 3206 additions and 386 deletions

View File

@ -22,6 +22,8 @@ struct Asset {
// Could be 0 if there is no official id
uint64 official_id;
// @performance This is bad, this uses the same name as the hashmap
// We effectively store the asset name twice which shouldn't be the case
char name[MAX_ASSET_NAME_LENGTH];
AssetType type;
@ -32,13 +34,17 @@ struct Asset {
// Describes how much ram/vram the asset uses
// E.g. vram_size = 0 but ram_size > 0 means that it never uses any gpu memory
uint64 ram_size;
uint64 vram_size;
uint32 ram_size;
uint32 vram_size;
uint64 last_access;
// Usually 1 but in some cases an ams may hold entities of variable chunk length
// For textures for example a 128x128 is of size 1 but 256x256 is of size 4
uint32 size;
// Variable used for thread safety
bool is_loaded;
// Describes if the memory is currently available in ram/vram
// E.g. an asset might be uploaded to the gpu and no longer held in ram (or the other way around)
bool is_ram;
@ -49,10 +55,6 @@ struct Asset {
bool can_garbage_collect_ram;
bool can_garbage_collect_vram;
// Describes if the asset should be removed/garbage collected during CPU/GPU down time
bool should_garbage_collect_ram;
bool should_garbage_collect_vram;
Asset* next;
Asset* prev;

View File

@ -22,6 +22,7 @@
// @question Asset component systems could be created per region -> easy to simulate a specific region
// @bug This means players might not be able to transition from one area to another?!
// @performance There is a huge performance flaw. We CANNOT have an asset only in vram because it always also allocates the ram (asset_data_memory)
struct AssetManagementSystem {
// @question is this even necessary or could we integrate this directly into the system here?
HashMap hash_map;
@ -29,6 +30,7 @@ struct AssetManagementSystem {
uint64 ram_size;
uint64 vram_size;
uint64 asset_count;
int32 overhead;
bool has_changed;
// The indices of asset_memory and asset_data_memory are always linked
@ -48,16 +50,15 @@ struct AssetManagementSystem {
// @question do we want to create an extra threaded version? Or a combined one, like we have right now.
// @question Do we want to add a mutex to assets. This way we don't have to lock the entire ams.
pthread_mutex_t mutex;
// @bug We probably also need a overhead value.
// In some cases we need more data than our normal data (see texture, it contains image + texture)
};
void ams_create(AssetManagementSystem* ams, BufferMemory* buf, int32 chunk_size, int32 count)
void ams_create(AssetManagementSystem* ams, BufferMemory* buf, int32 chunk_size, int32 count, int32 overhead = 0)
{
// setup hash_map
hashmap_create(&ams->hash_map, count, sizeof(HashEntryInt64), buf);
ams->overhead = overhead;
// setup asset_memory
chunk_init(&ams->asset_memory, buf, count, sizeof(Asset), 64);
@ -71,13 +72,15 @@ void ams_create(AssetManagementSystem* ams, BufferMemory* buf, int32 chunk_size,
}
// WARNING: buf size see ams_get_buffer_size
void ams_create(AssetManagementSystem* ams, byte* buf, int32 chunk_size, int32 count)
void ams_create(AssetManagementSystem* ams, byte* buf, int32 chunk_size, int32 count, int32 overhead = 0)
{
ASSERT_SIMPLE(chunk_size);
// setup hash_map
hashmap_create(&ams->hash_map, count, sizeof(HashEntryInt64), buf);
ams->overhead = overhead;
// setup asset_memory
ams->asset_memory.count = count;
ams->asset_memory.chunk_size = sizeof(Asset);
@ -108,7 +111,7 @@ void ams_free(AssetManagementSystem* ams)
inline
int32 ams_calculate_chunks(const AssetManagementSystem* ams, int32 byte_size)
{
return (int32) CEIL_DIV(byte_size, ams->asset_data_memory.chunk_size);
return (int32) CEIL_DIV(byte_size + ams->overhead, ams->asset_data_memory.chunk_size);
}
inline
@ -122,8 +125,6 @@ int64 ams_get_buffer_size(int32 count, int32 chunk_size)
inline
void ams_update_stats(AssetManagementSystem* ams)
{
// @bug We should check the hash map or the memory status, we could still have old values in here
ams->vram_size = 0;
ams->ram_size = 0;
ams->asset_count = 0;
@ -197,10 +198,14 @@ Asset* ams_get_asset(AssetManagementSystem* ams, const char* key)
{
HashEntry* entry = hashmap_get_entry(&ams->hash_map, key);
// @bug entry->value seems to be an address outside of any known buffer, how?
DEBUG_MEMORY_READ(
(uint64) (entry ? (Asset *) entry->value : 0),
entry ? ((Asset *) entry->value)->ram_size + sizeof(Asset) : 0
entry ? sizeof(Asset) : 0
);
DEBUG_MEMORY_READ(
(uint64) (entry ? (Asset *) entry->value : 0),
entry ? ((Asset *) entry->value)->self + ((Asset *) entry->value)->ram_size : 0
);
return entry ? (Asset *) entry->value : NULL;
@ -211,10 +216,14 @@ Asset* ams_get_asset(AssetManagementSystem* ams, const char* key, uint64 hash)
{
HashEntry* entry = hashmap_get_entry(&ams->hash_map, key, hash);
// @bug entry->value seems to be an address outside of any known buffer, how?
DEBUG_MEMORY_READ(
(uint64) (entry ? (Asset *) entry->value : 0),
entry ? ((Asset *) entry->value)->ram_size + sizeof(Asset) : 0
entry ? sizeof(Asset) : 0
);
DEBUG_MEMORY_READ(
(uint64) (entry ? (Asset *) entry->value : 0),
entry ? ((Asset *) entry->value)->self + ((Asset *) entry->value)->ram_size : 0
);
return entry ? (Asset *) entry->value : NULL;
@ -248,6 +257,7 @@ Asset* thrd_ams_get_asset(AssetManagementSystem* ams, const char* key, uint64 ha
// @todo implement defragment command to optimize memory layout since the memory layout will become fragmented over time
// @performance This function is VERY important, check if we can optimize it
// We could probably optimize the threaded version by adding a atomic_set_release(asset->is_loaded, true);
Asset* ams_reserve_asset(AssetManagementSystem* ams, const char* name, uint32 elements = 1)
{
int64 free_asset = chunk_reserve(&ams->asset_memory, elements, true);
@ -315,6 +325,48 @@ Asset* ams_reserve_asset(AssetManagementSystem* ams, const char* name, uint32 el
return asset;
}
void ams_garbage_collect(AssetManagementSystem* ams, uint64 time, uint64 dt)
{
Asset* asset = ams->first;
while (asset) {
// @performance We cannot just remove ram and keep vram. This is a huge flaw
if (asset->can_garbage_collect_ram && asset->can_garbage_collect_vram && time - asset->last_access <= dt) {
ams_free_asset(ams, asset);
}
asset = asset->next;
}
}
void ams_garbage_collect(AssetManagementSystem* ams)
{
Asset* asset = ams->first;
while (asset) {
// @performance We cannot just remove ram and keep vram. This is a huge flaw
if (asset->can_garbage_collect_ram && asset->can_garbage_collect_vram) {
ams_free_asset(ams, asset);
}
asset = asset->next;
}
}
void thrd_ams_garbage_collect(AssetManagementSystem* ams, uint64 time, uint64 dt)
{
pthread_mutex_lock(&ams->mutex);
ams_garbage_collect(ams, time, dt);
pthread_mutex_unlock(&ams->mutex);
}
void thrd_ams_garbage_collect(AssetManagementSystem* ams)
{
pthread_mutex_lock(&ams->mutex);
ams_garbage_collect(ams);
pthread_mutex_unlock(&ams->mutex);
}
Asset* thrd_ams_reserve_asset(AssetManagementSystem* ams, const char* name, uint32 elements = 1) {
pthread_mutex_lock(&ams->mutex);
Asset* asset = ams_reserve_asset(ams, name, elements);

View File

@ -89,17 +89,18 @@ struct AudioMixer {
// @todo add mutex for locking and create threaded functions
// do we need a condition or semaphore?
// Wait, why do we even need threading? Isn't the threading handled by the file loading
};
bool audio_mixer_is_active(AudioMixer* mixer) {
if (mixer->state_new == AUDIO_MIXER_STATE_ACTIVE
&& atomic_get((int32 *) &mixer->state_new) == AUDIO_MIXER_STATE_ACTIVE
&& atomic_get_relaxed((int32 *) &mixer->state_new) == AUDIO_MIXER_STATE_ACTIVE
) {
return true;
}
AudioMixerState mixer_state;
if ((mixer_state = (AudioMixerState) atomic_get((int32 *) &mixer->state_new)) != mixer->state_old) {
if ((mixer_state = (AudioMixerState) atomic_get_relaxed((int32 *) &mixer->state_new)) != mixer->state_old) {
if (mixer_state != AUDIO_MIXER_STATE_UNINITIALIZED) {
audio_load(
mixer->window,

View File

@ -23,6 +23,7 @@ uint64 hash_djb2(const char* key) {
return hash;
}
inline
uint64 hash_sdbm(const byte* key)
{
uint64 hash = 0;
@ -35,6 +36,7 @@ uint64 hash_sdbm(const byte* key)
return hash;
}
inline
uint64 hash_lose_lose(const byte* key)
{
uint64 hash = 0;
@ -47,7 +49,9 @@ uint64 hash_lose_lose(const byte* key)
return hash;
}
uint64 hash_polynomial_rolling(const char* str) {
inline
uint64 hash_polynomial_rolling(const char* str)
{
const int32 p = 31;
const int32 m = 1000000009;
uint64 hash = 0;
@ -62,7 +66,9 @@ uint64 hash_polynomial_rolling(const char* str) {
return hash;
}
uint64 hash_fnv1a(const char* str) {
inline
uint64 hash_fnv1a(const char* str)
{
const uint64 FNV_OFFSET_BASIS = 14695981039346656037UL;
const uint64 FNV_PRIME = 1099511628211UL;
uint64 hash = FNV_OFFSET_BASIS;
@ -76,34 +82,144 @@ uint64 hash_fnv1a(const char* str) {
return hash;
}
inline
uint32 hash_oat(const char* str)
{
uint32 h = 0;
uint32 hash = 0;
while(*str) {
h += *str++;
h += (h << 10);
h ^= (h >> 6);
hash += *str++;
hash += (hash << 10);
hash ^= (hash >> 6);
}
h += (h << 3);
h ^= (h >> 11);
h += (h << 15);
hash += (hash << 3);
hash ^= (hash >> 11);
hash += (hash << 15);
return h;
return hash;
}
inline
uint32 hash_ejb(const char* str)
{
const uint32 PRIME1 = 37;
const uint32 PRIME2 = 1048583;
uint32 h = 0;
uint32 hash = 0;
while (*str) {
h = h * PRIME1 ^ (*str++ - ' ');
hash = hash * PRIME1 ^ (*str++ - ' ');
}
return h % PRIME2;
return hash % PRIME2;
}
////////////////////////////////////
// Seeded hash functions
////////////////////////////////////
inline constexpr
uint64 hash_djb2_seeded(const char* key, int32 seed)
{
uint64 hash = 5381;
int32 c;
while ((c = *key++)) {
hash = ((hash << 5) + hash) + c;
}
return hash ^ (seed + (seed << 6) + (seed >> 2));
}
inline
uint64 hash_sdbm_seeded(const char* key, int32 seed)
{
uint64 hash = 0;
int32 c;
while (c = *key++) {
hash = c + (hash << 6) + (hash << 16) - hash;
}
return hash ^ (seed + (seed << 6) + (seed >> 2));
}
inline
uint64 hash_lose_lose_seeded(const char* key, int32 seed)
{
uint64 hash = 0;
int32 c;
while (c = *key++) {
hash += c;
}
return hash ^ (seed + (seed << 6) + (seed >> 2));
}
inline
uint64 hash_polynomial_rolling_seeded(const char* str, int32 seed)
{
const int32 p = 31;
const int32 m = 1000000009;
uint64 hash = 0;
uint64 p_pow = 1;
while (*str) {
hash = (hash + (*str - 'a' + 1) * p_pow) % m;
p_pow = (p_pow * p) % m;
str++;
}
return hash ^ (seed + (seed << 6) + (seed >> 2));
}
inline
uint64 hash_fnv1a_seeded(const char* str, int32 seed)
{
const uint64 FNV_OFFSET_BASIS = 14695981039346656037UL;
const uint64 FNV_PRIME = 1099511628211UL;
uint64 hash = FNV_OFFSET_BASIS;
while (*str) {
hash ^= (byte) *str;
hash *= FNV_PRIME;
str++;
}
return hash ^ (seed + (seed << 6) + (seed >> 2));
}
inline
uint64 hash_oat_seeded(const char* str, int32 seed)
{
uint64 hash = 0;
while(*str) {
hash += *str++;
hash += (hash << 10);
hash ^= (hash >> 6);
}
hash += (hash << 3);
hash ^= (hash >> 11);
hash += (hash << 15);
return hash ^ (seed + (seed << 6) + (seed >> 2));;
}
inline
uint64 hash_ejb_seeded(const char* str, int32 seed)
{
const uint64 PRIME1 = 37;
const uint64 PRIME2 = 1048583;
uint64 hash = 0;
while (*str) {
hash = hash * PRIME1 ^ (*str++ - ' ');
}
return (hash % PRIME2) ^ (seed + (seed << 6) + (seed >> 2));;
}
#endif

View File

@ -161,19 +161,19 @@ void update_timing_stat_reset(uint32 stat)
inline
void reset_counter(int32 id)
{
atomic_set(&debug_container->counter[id], 0);
atomic_set_acquire(&debug_container->counter[id], 0);
}
inline
void log_increment(int32 id, int64 by = 1)
{
atomic_add(&debug_container->counter[id], by);
atomic_add_acquire(&debug_container->counter[id], by);
}
inline
void log_counter(int32 id, int64 value)
{
atomic_set(&debug_container->counter[id], value);
atomic_set_acquire(&debug_container->counter[id], value);
}
// @todo don't use a pointer to this should be in a global together with other logging data (see Log.h)
@ -234,9 +234,9 @@ void debug_memory_log(uint64 start, uint64 size, int32 type, const char* functio
return;
}
uint64 idx = atomic_fetch_add(&mem->action_idx, 1);
uint64 idx = atomic_fetch_add_relaxed(&mem->action_idx, 1);
if (idx >= ARRAY_COUNT(mem->last_action)) {
atomic_set(&mem->action_idx, 1);
atomic_set_acquire(&mem->action_idx, 1);
idx %= ARRAY_COUNT(mem->last_action);
}
@ -266,9 +266,9 @@ void debug_memory_reserve(uint64 start, uint64 size, int32 type, const char* fun
return;
}
uint64 idx = atomic_fetch_add(&mem->reserve_action_idx, 1);
uint64 idx = atomic_fetch_add_relaxed(&mem->reserve_action_idx, 1);
if (idx >= ARRAY_COUNT(mem->reserve_action)) {
atomic_set(&mem->reserve_action_idx, 1);
atomic_set_acquire(&mem->reserve_action_idx, 1);
idx %= ARRAY_COUNT(mem->last_action);
}

View File

@ -170,9 +170,6 @@ int64 buffer_load(BufferMemory* buf, const byte* data)
buf->alignment = SWAP_ENDIAN_LITTLE(*((int32 *) data));
data += sizeof(buf->alignment);
buf->element_alignment = SWAP_ENDIAN_LITTLE(*((int32 *) data));
data += sizeof(buf->element_alignment);
// End
buf->end = buf->memory + SWAP_ENDIAN_LITTLE(*((uint64 *) data));
data += sizeof(uint64);

View File

@ -43,23 +43,6 @@ struct ChunkMemory {
uint64* free;
};
struct ThreadedChunkMemory {
byte* memory;
uint64 count;
uint64 size;
uint64 chunk_size;
int64 last_pos;
int32 alignment;
// length = count
// free describes which locations are used and which are free
uint64* free;
pthread_mutex_t mutex;
pthread_cond_t cond;
};
inline
void chunk_alloc(ChunkMemory* buf, uint64 count, uint64 chunk_size, int32 alignment = 64)
{
@ -69,11 +52,11 @@ void chunk_alloc(ChunkMemory* buf, uint64 count, uint64 chunk_size, int32 alignm
chunk_size = ROUND_TO_NEAREST(chunk_size, alignment);
buf->memory = alignment < 2
? (byte *) platform_alloc(count * chunk_size + sizeof(buf->free) * CEIL_DIV(count, 64))
: (byte *) platform_alloc_aligned(count * chunk_size + sizeof(buf->free) * CEIL_DIV(count, 64), alignment);
? (byte *) platform_alloc(count * chunk_size + sizeof(uint64) * CEIL_DIV(count, 64))
: (byte *) platform_alloc_aligned(count * chunk_size + sizeof(uint64) * CEIL_DIV(count, 64), alignment);
buf->count = count;
buf->size = count * chunk_size + sizeof(buf->free) * CEIL_DIV(count, 64);
buf->size = count * chunk_size + sizeof(uint64) * CEIL_DIV(count, 64);
buf->chunk_size = chunk_size;
buf->last_pos = -1;
buf->alignment = alignment;
@ -86,17 +69,6 @@ void chunk_alloc(ChunkMemory* buf, uint64 count, uint64 chunk_size, int32 alignm
DEBUG_MEMORY_INIT((uint64) buf->memory, buf->size);
}
inline
void chunk_free(ChunkMemory* buf)
{
DEBUG_MEMORY_DELETE((uint64) buf->memory, buf->size);
if (buf->alignment < 2) {
platform_free((void **) &buf->memory);
} else {
platform_aligned_free((void **) &buf->memory);
}
}
inline
void chunk_init(ChunkMemory* buf, BufferMemory* data, uint64 count, uint64 chunk_size, int32 alignment = 64)
{
@ -105,10 +77,10 @@ void chunk_init(ChunkMemory* buf, BufferMemory* data, uint64 count, uint64 chunk
chunk_size = ROUND_TO_NEAREST(chunk_size, alignment);
buf->memory = buffer_get_memory(data, count * chunk_size + sizeof(buf->free) * CEIL_DIV(count, 64));
buf->memory = buffer_get_memory(data, count * chunk_size + sizeof(uint64) * CEIL_DIV(count, 64));
buf->count = count;
buf->size = count * chunk_size + sizeof(buf->free) * CEIL_DIV(count, 64);
buf->size = count * chunk_size + sizeof(uint64) * CEIL_DIV(count, 64);
buf->chunk_size = chunk_size;
buf->last_pos = -1;
buf->alignment = alignment;
@ -134,7 +106,7 @@ void chunk_init(ChunkMemory* buf, byte* data, uint64 count, uint64 chunk_size, i
buf->memory = data;
buf->count = count;
buf->size = count * chunk_size + sizeof(buf->free) * CEIL_DIV(count, 64);
buf->size = count * chunk_size + sizeof(uint64) * CEIL_DIV(count, 64);
buf->chunk_size = chunk_size;
buf->last_pos = -1;
buf->alignment = alignment;
@ -148,10 +120,22 @@ void chunk_init(ChunkMemory* buf, byte* data, uint64 count, uint64 chunk_size, i
DEBUG_MEMORY_RESERVE((uint64) buf->memory, buf->size, 187);
}
inline
void chunk_free(ChunkMemory* buf)
{
DEBUG_MEMORY_DELETE((uint64) buf->memory, buf->size);
if (buf->alignment < 2) {
platform_free((void **) &buf->memory);
} else {
platform_aligned_free((void **) &buf->memory);
}
}
inline
byte* chunk_get_element(ChunkMemory* buf, uint64 element, bool zeroed = false)
{
byte* offset = buf->memory + element * buf->chunk_size;
ASSERT_SIMPLE(offset);
if (zeroed) {
memset((void *) offset, 0, buf->chunk_size);
@ -159,8 +143,6 @@ byte* chunk_get_element(ChunkMemory* buf, uint64 element, bool zeroed = false)
DEBUG_MEMORY_READ((uint64) offset, buf->chunk_size);
ASSERT_SIMPLE(offset);
return offset;
}

View File

@ -132,6 +132,8 @@ void heap_pop(Heap* heap, void* out) {
return;
}
DEBUG_MEMORY_READ((uint64) heap->elements, heap->element_size);
memcpy(out, heap->elements, heap->element_size);
void* last_element = heap->elements + ((heap->size - 1) * heap->element_size);
memcpy(heap->elements, last_element, heap->element_size);
@ -141,6 +143,8 @@ void heap_pop(Heap* heap, void* out) {
inline
void* heap_peek(Heap* heap) {
DEBUG_MEMORY_READ((uint64) heap->elements, heap->element_size);
return heap->elements;
}

View File

@ -12,35 +12,57 @@
#include "../stdlib/Types.h"
#include "RingMemory.h"
typedef RingMemory Queue;
// WARNING: Structure needs to be the same as RingMemory
struct Queue {
byte* memory;
byte* end;
// @question Consider to add the element size into the Queue struct -> we don't need to pass it after initialization as parameter
byte* head;
// This variable is usually only used by single producer/consumer code mostly found in threads.
// One thread inserts elements -> updates head
// The other thread reads elements -> updates tail
// This code itself doesn't change this variable
byte* tail;
uint64 size;
uint32 alignment;
// The ring memory ends here
uint32 element_size;
};
inline
void queue_alloc(Queue* queue, uint64 element_count, uint32 element_size, int32 alignment = 64)
void queue_alloc(Queue* queue, uint64 element_count, uint32 element_size, uint32 alignment = 64)
{
// @bug The alignment needs to be included in EVERY element
ring_alloc(queue, element_count * element_size, alignment);
element_size = ROUND_TO_NEAREST(element_size, alignment);
ring_alloc((RingMemory *) queue, element_count * element_size, alignment);
queue->element_size = element_size;
}
inline
void queue_init(Queue* queue, BufferMemory* buf, uint64 element_count, uint32 element_size, int32 alignment = 64)
void queue_init(Queue* queue, BufferMemory* buf, uint64 element_count, uint32 element_size, uint32 alignment = 64)
{
// @bug The alignment needs to be included in EVERY element
ring_init(queue, buf, element_count * element_size, alignment);
element_size = ROUND_TO_NEAREST(element_size, alignment);
ring_init((RingMemory *) queue, buf, element_count * element_size, alignment);
queue->element_size = element_size;
}
inline
void queue_init(Queue* queue, byte* buf, uint64 element_count, uint32 element_size, int32 alignment = 64)
void queue_init(Queue* queue, byte* buf, uint64 element_count, uint32 element_size, uint32 alignment = 64)
{
// @bug The alignment needs to be included in EVERY element
ring_init(queue, buf, element_count * element_size, alignment);
element_size = ROUND_TO_NEAREST(element_size, alignment);
ring_init((RingMemory *) queue, buf, element_count * element_size, alignment);
queue->element_size = element_size;
}
inline
void queue_free(Queue* queue)
{
ring_free(queue);
ring_free((RingMemory *) queue);
}
inline
@ -54,60 +76,145 @@ bool queue_set_empty(Queue* queue) {
}
inline
bool queue_is_full(Queue* queue, uint64 size, byte aligned = 0) {
return !ring_commit_safe((RingMemory *) queue, size, aligned);
bool queue_is_full(Queue* queue) {
return !ring_commit_safe((RingMemory *) queue, queue->element_size, queue->alignment);
}
// Conditional Lock
inline
byte* queue_enqueue(Queue* queue, byte* data, uint64 size, byte aligned = 0)
void queue_enqueue_unique(ThreadedQueue* queue, const byte* data)
{
byte* mem = ring_get_memory_nomove(queue, size, aligned);
memcpy(mem, data, size);
ring_move_pointer(queue, &queue->head, size, aligned);
ASSERT_SIMPLE((uint64_t) data % 4 == 0);
byte* tail = queue->tail;
while (tail != queue->tail) {
ASSERT_SIMPLE((uint64_t) tail % 4 == 0);
// @performance we could probably make this faster since we don't need to compare the entire range
if (is_equal_aligned(tail, data, queue->element_size) == 0) {
return;
}
ring_move_pointer((RingMemory *) queue, &tail, queue->element_size, queue->alignment);
}
if (!ring_commit_safe((RingMemory *) queue, queue->element_size, queue->alignment)) {
return;
}
byte* mem = ring_get_memory((RingMemory *) queue, queue->element_size, queue->alignment);
memcpy(mem, data, queue->element_size);
}
inline
byte* queue_enqueue(Queue* queue, byte* data)
{
byte* mem = ring_get_memory_nomove((RingMemory *) queue, queue->element_size, queue->alignment);
memcpy(mem, data, queue->element_size);
ring_move_pointer((RingMemory *) queue, &queue->head, queue->element_size, queue->alignment);
return mem;
}
inline
byte* queue_enqueue_start(Queue* queue, uint64 size, byte aligned = 0)
byte* queue_enqueue_safe(Queue* queue, byte* data)
{
return ring_get_memory_nomove(queue, size, aligned);
if(queue_is_full(queue)) {
return NULL;
}
byte* mem = ring_get_memory_nomove((RingMemory *) queue, queue->element_size, queue->alignment);
memcpy(mem, data, queue->element_size);
ring_move_pointer((RingMemory *) queue, &queue->head, queue->element_size, queue->alignment);
return mem;
}
// WARNING: Only useful for single producer single consumer
inline
byte* queue_enqueue_wait_atomic(Queue* queue, byte* data)
{
while (!ring_commit_safe_atomic((RingMemory *) queue, queue->alignment)) {}
byte* mem = ring_get_memory_nomove((RingMemory *) queue, queue->element_size, queue->alignment);
memcpy(mem, data, queue->element_size);
ring_move_pointer((RingMemory *) queue, &queue->head, queue->element_size, queue->alignment);
return mem;
}
// WARNING: Only useful for single producer single consumer
inline
byte* queue_enqueue_safe_atomic(Queue* queue, byte* data)
{
if (!ring_commit_safe_atomic((RingMemory *) queue, queue->alignment)) {
return NULL;
}
byte* mem = ring_get_memory_nomove((RingMemory *) queue, queue->element_size, queue->alignment);
memcpy(mem, data, queue->element_size);
ring_move_pointer((RingMemory *) queue, &queue->head, queue->element_size, queue->alignment);
return mem;
}
inline
void queue_enqueue_end(Queue* queue, uint64 size, byte aligned = 0)
byte* queue_enqueue_start(Queue* queue)
{
ring_move_pointer(queue, &queue->head, size, aligned);
return ring_get_memory_nomove((RingMemory *) queue, queue->element_size, queue->alignment);
}
inline
bool queue_dequeue(Queue* queue, byte* data, uint64 size, byte aligned = 0)
void queue_enqueue_end(Queue* queue)
{
ring_move_pointer((RingMemory *) queue, &queue->head, queue->element_size, queue->alignment);
}
inline
bool queue_dequeue(Queue* queue, byte* data)
{
if (queue->head == queue->tail) {
return false;
}
if (size == 4) {
if (queue->element_size == 4) {
*((int32 *) data) = *((int32 *) queue->tail);
} else {
memcpy(data, queue->tail, size);
memcpy(data, queue->tail, queue->element_size);
}
ring_move_pointer(queue, &queue->tail, size, aligned);
ring_move_pointer((RingMemory *) queue, &queue->tail, queue->element_size, queue->alignment);
return true;
}
// WARNING: Only useful for single producer single consumer
inline
bool queue_dequeue_atomic(Queue* queue, byte* data)
{
if (atomic_get_relaxed((uint64 *) &queue->head) == (uint64) queue->tail) {
return false;
}
if (queue->element_size == 4) {
*((int32 *) data) = *((int32 *) queue->tail);
} else {
memcpy(data, queue->tail, queue->element_size);
}
ring_move_pointer((RingMemory *) queue, &queue->tail, queue->element_size, queue->alignment);
return true;
}
inline
byte* queue_dequeue_keep(Queue* queue, uint64 size, byte aligned = 0)
byte* queue_dequeue_keep(Queue* queue)
{
if (queue->head == queue->tail) {
return NULL;
}
byte* data = queue->tail;
ring_move_pointer(queue, &queue->tail, size, aligned);
ring_move_pointer((RingMemory *) queue, &queue->tail, queue->element_size, queue->alignment);
return data;
}
@ -119,9 +226,9 @@ byte* queue_dequeue_start(Queue* queue)
}
inline
void queue_dequeue_end(Queue* queue, uint64 size, byte aligned = 0)
void queue_dequeue_end(Queue* queue)
{
ring_move_pointer(queue, &queue->tail, size, aligned);
ring_move_pointer((RingMemory *) queue, &queue->tail, queue->element_size, queue->alignment);
}
#endif

View File

@ -24,12 +24,16 @@
#include "../platform/win32/Allocator.h"
#include "../platform/win32/threading/ThreadDefines.h"
#include "../platform/win32/threading/Semaphore.h"
#include "../platform/win32/threading/Atomic.h"
#elif __linux__
#include "../platform/linux/Allocator.h"
#include "../platform/linux/threading/ThreadDefines.h"
#include "../platform/linux/threading/Semaphore.h"
#include "../platform/linux/threading/Atomic.h"
#endif
// WARNING: Changing this structure has effects on other data structures (e.g. Queue)
// When chaning make sure you understand what you are doing
struct RingMemory {
byte* memory;
byte* end;
@ -43,14 +47,11 @@ struct RingMemory {
byte* tail;
uint64 size;
int32 alignment;
int32 element_alignment;
uint32 alignment;
};
// @bug alignment should also include the end point, not just the start
inline
void ring_alloc(RingMemory* ring, uint64 size, int32 alignment = 64)
void ring_alloc(RingMemory* ring, uint64 size, uint32 alignment = 64)
{
ASSERT_SIMPLE(size);
@ -63,7 +64,6 @@ void ring_alloc(RingMemory* ring, uint64 size, int32 alignment = 64)
ring->tail = ring->memory;
ring->size = size;
ring->alignment = alignment;
ring->element_alignment = 0;
memset(ring->memory, 0, ring->size);
@ -71,7 +71,7 @@ void ring_alloc(RingMemory* ring, uint64 size, int32 alignment = 64)
}
inline
void ring_init(RingMemory* ring, BufferMemory* buf, uint64 size, int32 alignment = 64)
void ring_init(RingMemory* ring, BufferMemory* buf, uint64 size, uint32 alignment = 64)
{
ASSERT_SIMPLE(size);
@ -82,26 +82,23 @@ void ring_init(RingMemory* ring, BufferMemory* buf, uint64 size, int32 alignment
ring->tail = ring->memory;
ring->size = size;
ring->alignment = alignment;
ring->element_alignment = 0;
DEBUG_MEMORY_INIT((uint64) ring->memory, ring->size);
DEBUG_MEMORY_RESERVE((uint64) ring->memory, ring->size, 187);
}
inline
void ring_init(RingMemory* ring, byte* buf, uint64 size, int32 alignment = 64)
void ring_init(RingMemory* ring, byte* buf, uint64 size, uint32 alignment = 64)
{
ASSERT_SIMPLE(size);
// @bug what if an alignment is defined?
ring->memory = buf;
ring->memory = (byte *) ROUND_TO_NEAREST((uintptr_t) buf, alignment);
ring->end = ring->memory + size;
ring->head = ring->memory;
ring->tail = ring->memory;
ring->size = size;
ring->alignment = alignment;
ring->element_alignment = 0;
memset(ring->memory, 0, ring->size);
@ -122,10 +119,6 @@ void ring_free(RingMemory* ring)
inline
byte* ring_calculate_position(const RingMemory* ring, uint64 size, byte aligned = 0)
{
if (aligned == 0) {
aligned = (byte) OMS_MAX(ring->element_alignment, 1);
}
byte* head = ring->head;
if (aligned > 1) {
@ -158,9 +151,9 @@ void ring_move_pointer(RingMemory* ring, byte** pos, uint64 size, byte aligned =
{
ASSERT_SIMPLE(size <= ring->size);
if (aligned == 0) {
aligned = (byte) OMS_MAX(ring->element_alignment, 1);
}
// Actually, we cannot be sure that this is a read, it could also be a write.
// However, we better do it once here than manually in every place that uses this function
DEBUG_MEMORY_READ((uint64) *pos, size);
if (aligned > 1) {
uintptr_t address = (uintptr_t) *pos;
@ -184,10 +177,6 @@ byte* ring_get_memory(RingMemory* ring, uint64 size, byte aligned = 0, bool zero
{
ASSERT_SIMPLE(size <= ring->size);
if (aligned == 0) {
aligned = (byte) OMS_MAX(ring->element_alignment, 1);
}
if (aligned > 1) {
uintptr_t address = (uintptr_t) ring->head;
ring->head += (aligned - (address& (aligned - 1))) % aligned;
@ -222,10 +211,6 @@ byte* ring_get_memory_nomove(RingMemory* ring, uint64 size, byte aligned = 0, bo
{
ASSERT_SIMPLE(size <= ring->size);
if (aligned == 0) {
aligned = (byte) OMS_MAX(ring->element_alignment, 1);
}
byte* pos = ring->head;
if (aligned > 1) {
@ -285,6 +270,27 @@ bool ring_commit_safe(const RingMemory* ring, uint64 size, byte aligned = 0)
}
}
inline
bool ring_commit_safe_atomic(const RingMemory* ring, uint64 size, byte aligned = 0)
{
// aligned * 2 since that should be the maximum overhead for an element
// @bug could this result in a case where the ring is considered empty/full (false positive/negative)?
// The "correct" version would probably to use ring_move_pointer in some form
uint64 max_mem_required = size + aligned * 2;
uint64 tail = atomic_get_relaxed((uint64 *) &ring->tail);
uint64 head = atomic_get_relaxed((uint64 *) &ring->head);
if (tail < head) {
return ((uint64) (ring->end - head)) > max_mem_required
|| ((uint64) (tail - (uint64) ring->memory)) > max_mem_required;
} else if (tail > head) {
return ((uint64) (tail - head)) > max_mem_required;
} else {
return true;
}
}
inline
void ring_force_head_update(const RingMemory* ring)
{
@ -314,9 +320,6 @@ int64 ring_dump(const RingMemory* ring, byte* data)
*((int32 *) data) = SWAP_ENDIAN_LITTLE(ring->alignment);
data += sizeof(ring->alignment);
*((int32 *) data) = SWAP_ENDIAN_LITTLE(ring->element_alignment);
data += sizeof(ring->element_alignment);
// tail/End
*((uint64 *) data) = SWAP_ENDIAN_LITTLE((uint64) (ring->tail - ring->memory));
data += sizeof(ring->tail);

View File

@ -0,0 +1,42 @@
/**
* Jingga
*
* @copyright Jingga
* @license OMS License 2.0
* @version 1.0.0
* @link https://jingga.app
*/
#ifndef TOS_MEMORY_THREADED_CHUNK_MEMORY_H
#define TOS_MEMORY_THREADED_CHUNK_MEMORY_H
#include <string.h>
#include "../stdlib/Types.h"
#if _WIN32
#include "../platform/win32/threading/Thread.h"
#elif __linux__
#include "../platform/linux/threading/Thread.h"
#endif
struct ThreadedChunkMemory {
byte* memory;
uint64 count;
uint64 size;
uint64 chunk_size;
int64 last_pos;
int32 alignment;
// length = count
// free describes which locations are used and which are free
uint64* free;
// Chunk implementation ends here
// The completeness indicates if the data is completely written to
uint64* completeness;
pthread_mutex_t mutex;
pthread_cond_t cond;
};
#endif

View File

@ -36,8 +36,10 @@ struct ThreadedQueue {
byte* tail;
uint64 size;
int32 alignment;
int32 element_alignment;
uint32 alignment;
// The ring memory ends here
uint32 element_size;
// We support both conditional locking and semaphore locking
// These values are not initialized and not used unless you use the queue
@ -48,26 +50,14 @@ struct ThreadedQueue {
sem_t full;
};
// @question Consider to add the element size into the Queue struct -> we don't need to pass it after initialization as parameter
inline
void thrd_queue_alloc(ThreadedQueue* queue, uint32 element_count, uint64 element_size, int32 alignment = 64)
void thrd_queue_alloc(ThreadedQueue* queue, uint32 element_count, uint64 element_size, uint32 alignment = 64)
{
// @bug The alignment needs to be included in EVERY element
element_size = ROUND_TO_NEAREST(element_size, alignment);
ring_alloc((RingMemory *) queue, element_count * element_size, alignment);
pthread_mutex_init(&queue->mutex, NULL);
pthread_cond_init(&queue->cond, NULL);
sem_init(&queue->empty, element_count);
sem_init(&queue->full, 0);
}
inline
void thrd_queue_init(ThreadedQueue* queue, BufferMemory* buf, uint32 element_count, uint64 element_size, int32 alignment = 64)
{
// @bug The alignment needs to be included in EVERY element
ring_init((RingMemory *) queue, buf, element_count * element_size, alignment);
queue->element_size = element_size;
pthread_mutex_init(&queue->mutex, NULL);
pthread_cond_init(&queue->cond, NULL);
@ -77,11 +67,30 @@ void thrd_queue_init(ThreadedQueue* queue, BufferMemory* buf, uint32 element_cou
}
inline
void thrd_queue_init(ThreadedQueue* queue, byte* buf, uint32 element_count, uint64 element_size, int32 alignment = 64)
void thrd_queue_init(ThreadedQueue* queue, BufferMemory* buf, uint32 element_count, uint64 element_size, uint32 alignment = 64)
{
// @bug The alignment needs to be included in EVERY element
element_size = ROUND_TO_NEAREST(element_size, alignment);
ring_init((RingMemory *) queue, buf, element_count * element_size, alignment);
queue->element_size = element_size;
pthread_mutex_init(&queue->mutex, NULL);
pthread_cond_init(&queue->cond, NULL);
sem_init(&queue->empty, element_count);
sem_init(&queue->full, 0);
}
inline
void thrd_queue_init(ThreadedQueue* queue, byte* buf, uint32 element_count, uint64 element_size, uint32 alignment = 64)
{
element_size = ROUND_TO_NEAREST(element_size, alignment);
ring_init((RingMemory *) queue, buf, element_count * element_size, alignment);
queue->element_size = element_size;
pthread_mutex_init(&queue->mutex, NULL);
pthread_cond_init(&queue->cond, NULL);
@ -101,7 +110,7 @@ void thrd_queue_free(ThreadedQueue* queue)
// @todo Create enqueue_unique and enqueue_unique_sem
inline
void thrd_queue_enqueue_unique_wait(ThreadedQueue* queue, const byte* data, uint64 size, byte aligned = 0)
void thrd_queue_enqueue_unique_wait(ThreadedQueue* queue, const byte* data)
{
ASSERT_SIMPLE((uint64_t) data % 4 == 0);
pthread_mutex_lock(&queue->mutex);
@ -111,28 +120,28 @@ void thrd_queue_enqueue_unique_wait(ThreadedQueue* queue, const byte* data, uint
ASSERT_SIMPLE((uint64_t) tail % 4 == 0);
// @performance we could probably make this faster since we don't need to compare the entire range
if (is_equal_aligned(tail, data, size) == 0) {
if (is_equal_aligned(tail, data, queue->element_size) == 0) {
pthread_mutex_unlock(&queue->mutex);
return;
}
ring_move_pointer((RingMemory *) queue, &tail, size, aligned);
ring_move_pointer((RingMemory *) queue, &tail, queue->element_size, queue->alignment);
}
while (!ring_commit_safe((RingMemory *) queue, size, aligned)) {
while (!ring_commit_safe((RingMemory *) queue, queue->element_size, queue->alignment)) {
pthread_cond_wait(&queue->cond, &queue->mutex);
}
byte* mem = ring_get_memory((RingMemory *) queue, size, aligned);
memcpy(mem, data, size);
byte* mem = ring_get_memory((RingMemory *) queue, queue->element_size, queue->alignment);
memcpy(mem, data, queue->element_size);
pthread_cond_signal(&queue->cond);
pthread_mutex_unlock(&queue->mutex);
}
inline
void thrd_queue_enqueue_unique(ThreadedQueue* queue, const byte* data, uint64 size, byte aligned = 0)
void thrd_queue_enqueue_unique(ThreadedQueue* queue, const byte* data)
{
ASSERT_SIMPLE((uint64_t) data % 4 == 0);
pthread_mutex_lock(&queue->mutex);
@ -142,23 +151,23 @@ void thrd_queue_enqueue_unique(ThreadedQueue* queue, const byte* data, uint64 si
ASSERT_SIMPLE((uint64_t) tail % 4 == 0);
// @performance we could probably make this faster since we don't need to compare the entire range
if (is_equal_aligned(tail, data, size) == 0) {
if (is_equal_aligned(tail, data, queue->element_size) == 0) {
pthread_mutex_unlock(&queue->mutex);
return;
}
ring_move_pointer((RingMemory *) queue, &tail, size, aligned);
ring_move_pointer((RingMemory *) queue, &tail, queue->element_size, queue->alignment);
}
if (!ring_commit_safe((RingMemory *) queue, size, aligned)) {
if (!ring_commit_safe((RingMemory *) queue, queue->element_size, queue->alignment)) {
pthread_mutex_unlock(&queue->mutex);
return;
}
byte* mem = ring_get_memory((RingMemory *) queue, size, aligned);
memcpy(mem, data, size);
byte* mem = ring_get_memory((RingMemory *) queue, queue->element_size, queue->alignment);
memcpy(mem, data, queue->element_size);
pthread_cond_signal(&queue->cond);
pthread_mutex_unlock(&queue->mutex);
@ -166,49 +175,49 @@ void thrd_queue_enqueue_unique(ThreadedQueue* queue, const byte* data, uint64 si
// Conditional Lock
inline
void thrd_queue_enqueue(ThreadedQueue* queue, const byte* data, uint64 size, byte aligned = 0)
void thrd_queue_enqueue(ThreadedQueue* queue, const byte* data)
{
pthread_mutex_lock(&queue->mutex);
if (!ring_commit_safe((RingMemory *) queue, size, aligned)) {
if (!ring_commit_safe((RingMemory *) queue, queue->element_size, queue->alignment)) {
pthread_mutex_unlock(&queue->mutex);
return;
}
byte* mem = ring_get_memory((RingMemory *) queue, size, aligned);
memcpy(mem, data, size);
byte* mem = ring_get_memory((RingMemory *) queue, queue->element_size, queue->alignment);
memcpy(mem, data, queue->element_size);
pthread_cond_signal(&queue->cond);
pthread_mutex_unlock(&queue->mutex);
}
inline
void thrd_queue_enqueue_wait(ThreadedQueue* queue, const byte* data, uint64 size, byte aligned = 0)
void thrd_queue_enqueue_wait(ThreadedQueue* queue, const byte* data)
{
pthread_mutex_lock(&queue->mutex);
while (!ring_commit_safe((RingMemory *) queue, size, aligned)) {
while (!ring_commit_safe((RingMemory *) queue, queue->element_size, queue->alignment)) {
pthread_cond_wait(&queue->cond, &queue->mutex);
}
byte* mem = ring_get_memory((RingMemory *) queue, size, aligned);
memcpy(mem, data, size);
byte* mem = ring_get_memory((RingMemory *) queue, queue->element_size, queue->alignment);
memcpy(mem, data, queue->element_size);
pthread_cond_signal(&queue->cond);
pthread_mutex_unlock(&queue->mutex);
}
inline
byte* thrd_queue_enqueue_start_wait(ThreadedQueue* queue, uint64 size, byte aligned = 0)
byte* thrd_queue_enqueue_start_wait(ThreadedQueue* queue)
{
pthread_mutex_lock(&queue->mutex);
while (!ring_commit_safe((RingMemory *) queue, size, aligned)) {
while (!ring_commit_safe((RingMemory *) queue, queue->element_size, queue->alignment)) {
pthread_cond_wait(&queue->cond, &queue->mutex);
}
return ring_get_memory((RingMemory *) queue, size, aligned);
return ring_get_memory((RingMemory *) queue, queue->element_size, queue->alignment);
}
inline
@ -219,7 +228,7 @@ void thrd_queue_enqueue_end_wait(ThreadedQueue* queue)
}
inline
bool thrd_queue_dequeue(ThreadedQueue* queue, byte* data, uint64 size, byte aligned = 0)
bool thrd_queue_dequeue(ThreadedQueue* queue, byte* data)
{
if (queue->head == queue->tail) {
return false;
@ -233,12 +242,12 @@ bool thrd_queue_dequeue(ThreadedQueue* queue, byte* data, uint64 size, byte alig
return false;
}
if (size == 4) {
if (queue->element_size == 4) {
*((int32 *) data) = *((int32 *) queue->tail);
} else {
memcpy(data, queue->tail, size);
memcpy(data, queue->tail, queue->element_size);
}
ring_move_pointer((RingMemory *) queue, &queue->tail, size, aligned);
ring_move_pointer((RingMemory *) queue, &queue->tail, queue->element_size, queue->alignment);
pthread_cond_signal(&queue->cond);
pthread_mutex_unlock(&queue->mutex);
@ -256,9 +265,9 @@ bool thrd_queue_empty(ThreadedQueue* queue) {
}
inline
bool thrd_queue_full(ThreadedQueue* queue, uint64 size, byte aligned = 0) {
bool thrd_queue_full(ThreadedQueue* queue) {
pthread_mutex_lock(&queue->mutex);
bool is_full = !ring_commit_safe((RingMemory *) queue, size, aligned);
bool is_full = !ring_commit_safe((RingMemory *) queue, queue->element_size, queue->alignment);
pthread_mutex_unlock(&queue->mutex);
return is_full;
@ -266,7 +275,7 @@ bool thrd_queue_full(ThreadedQueue* queue, uint64 size, byte aligned = 0) {
// Waits until a dequeue is available
inline
void thrd_queue_dequeue_wait(ThreadedQueue* queue, byte* data, uint64 size, byte aligned = 0)
void thrd_queue_dequeue_wait(ThreadedQueue* queue, byte* data)
{
pthread_mutex_lock(&queue->mutex);
@ -274,8 +283,8 @@ void thrd_queue_dequeue_wait(ThreadedQueue* queue, byte* data, uint64 size, byte
pthread_cond_wait(&queue->cond, &queue->mutex);
}
memcpy(data, queue->tail, size);
ring_move_pointer((RingMemory *) queue, &queue->tail, size, aligned);
memcpy(data, queue->tail, queue->element_size);
ring_move_pointer((RingMemory *) queue, &queue->tail, queue->element_size, queue->alignment);
pthread_cond_signal(&queue->cond);
pthread_mutex_unlock(&queue->mutex);
@ -294,9 +303,9 @@ byte* thrd_queue_dequeue_start_wait(ThreadedQueue* queue)
}
inline
void thrd_queue_dequeue_end_wait(ThreadedQueue* queue, uint64 size, byte aligned = 0)
void thrd_queue_dequeue_end_wait(ThreadedQueue* queue)
{
ring_move_pointer((RingMemory *) queue, &queue->tail, size, aligned);
ring_move_pointer((RingMemory *) queue, &queue->tail, queue->element_size, queue->alignment);
pthread_cond_signal(&queue->cond);
pthread_mutex_unlock(&queue->mutex);
@ -304,20 +313,20 @@ void thrd_queue_dequeue_end_wait(ThreadedQueue* queue, uint64 size, byte aligned
// Semaphore Lock
inline
void thrd_queue_enqueue_sem_wait(ThreadedQueue* queue, const byte* data, uint64 size, byte aligned = 0)
void thrd_queue_enqueue_sem_wait(ThreadedQueue* queue, const byte* data)
{
sem_wait(&queue->empty);
pthread_mutex_lock(&queue->mutex);
byte* mem = ring_get_memory((RingMemory *) queue, size, aligned);
memcpy(mem, data, size);
byte* mem = ring_get_memory((RingMemory *) queue, queue->element_size, queue->alignment);
memcpy(mem, data, queue->element_size);
pthread_mutex_unlock(&queue->mutex);
sem_post(&queue->full);
}
inline
bool thrd_queue_enqueue_sem_timedwait(ThreadedQueue* queue, const byte* data, uint64 size, uint64 wait, byte aligned = 0)
bool thrd_queue_enqueue_sem_timedwait(ThreadedQueue* queue, const byte* data, uint64 wait)
{
if (sem_timedwait(&queue->empty, wait)) {
return false;
@ -325,8 +334,8 @@ bool thrd_queue_enqueue_sem_timedwait(ThreadedQueue* queue, const byte* data, ui
pthread_mutex_lock(&queue->mutex);
byte* mem = ring_get_memory((RingMemory *) queue, size, aligned);
memcpy(mem, data, size);
byte* mem = ring_get_memory((RingMemory *) queue, queue->element_size, queue->alignment);
memcpy(mem, data, queue->element_size);
pthread_mutex_unlock(&queue->mutex);
sem_post(&queue->full);
@ -335,12 +344,12 @@ bool thrd_queue_enqueue_sem_timedwait(ThreadedQueue* queue, const byte* data, ui
}
inline
byte* thrd_queue_enqueue_start_sem_wait(ThreadedQueue* queue, uint64 size, byte aligned = 0)
byte* thrd_queue_enqueue_start_sem_wait(ThreadedQueue* queue)
{
sem_wait(&queue->empty);
pthread_mutex_lock(&queue->mutex);
return ring_get_memory((RingMemory *) queue, size, aligned);
return ring_get_memory((RingMemory *) queue, queue->element_size, queue->alignment);
}
inline
@ -351,20 +360,20 @@ void thrd_queue_enqueue_end_sem_wait(ThreadedQueue* queue)
}
inline
byte* thrd_queue_dequeue_sem_wait(ThreadedQueue* queue, byte* data, uint64 size, byte aligned = 0)
byte* thrd_queue_dequeue_sem_wait(ThreadedQueue* queue, byte* data)
{
sem_wait(&queue->full);
pthread_mutex_lock(&queue->mutex);
memcpy(data, queue->tail, size);
ring_move_pointer((RingMemory *) queue, &queue->tail, size, aligned);
memcpy(data, queue->tail, queue->element_size);
ring_move_pointer((RingMemory *) queue, &queue->tail, queue->element_size, queue->alignment);
pthread_mutex_unlock(&queue->mutex);
sem_post(&queue->empty);
}
inline
bool thrd_queue_dequeue_sem_timedwait(ThreadedQueue* queue, byte* data, uint64 size, uint64 wait, byte aligned = 0)
bool thrd_queue_dequeue_sem_timedwait(ThreadedQueue* queue, byte* data, uint64 wait)
{
if (sem_timedwait(&queue->full, wait)) {
return false;
@ -372,8 +381,8 @@ bool thrd_queue_dequeue_sem_timedwait(ThreadedQueue* queue, byte* data, uint64 s
pthread_mutex_lock(&queue->mutex);
memcpy(data, queue->tail, size);
ring_move_pointer((RingMemory *) queue, &queue->tail, size, aligned);
memcpy(data, queue->tail, queue->element_size);
ring_move_pointer((RingMemory *) queue, &queue->tail, queue->element_size, queue->alignment);
pthread_mutex_unlock(&queue->mutex);
sem_post(&queue->empty);
@ -391,9 +400,9 @@ byte* thrd_queue_dequeue_start_sem_wait(ThreadedQueue* queue)
}
inline
void thrd_queue_dequeue_end_sem_wait(ThreadedQueue* queue, uint64 size, byte aligned = 0)
void thrd_queue_dequeue_end_sem_wait(ThreadedQueue* queue)
{
ring_move_pointer((RingMemory *) queue, &queue->tail, size, aligned);
ring_move_pointer((RingMemory *) queue, &queue->tail, queue->element_size, queue->alignment);
pthread_mutex_unlock(&queue->mutex);
sem_post(&queue->empty);

View File

@ -33,8 +33,8 @@ struct ThreadedRingMemory {
uint64 size;
int32 alignment;
int32 element_alignment;
// The ring memory ends here
pthread_mutex_t mutex;
};

View File

@ -45,7 +45,8 @@ void* platform_alloc_aligned(size_t size, int32 alignment)
alignment = page_size;
}
size += alignment - 1 + sizeof(void *) + sizeof(size_t);
size = ROUND_TO_NEAREST(size, alignment);
size += alignment + sizeof(void *) + sizeof(size_t);
void* ptr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
ASSERT_SIMPLE(ptr != MAP_FAILED);

File diff suppressed because it is too large Load Diff

View File

@ -61,7 +61,7 @@ int32 pthread_join(pthread_t thread, void** retval) {
}
int32 pthread_mutex_init(pthread_mutex_t* mutex, pthread_mutexattr_t*) {
atomic_set(mutex, 0);
atomic_set_acquire(mutex, 0);
return 0;
}
@ -77,44 +77,43 @@ int32 pthread_mutex_lock(pthread_mutex_t* mutex) {
}
int32 pthread_mutex_unlock(pthread_mutex_t* mutex) {
atomic_set(mutex, 0);
atomic_set_release(mutex, 0);
syscall(SYS_futex, mutex, FUTEX_WAKE, 1, NULL, NULL, 0);
return 0;
}
int32 pthread_cond_init(pthread_cond_t* cond, pthread_condattr_t*) {
atomic_set(cond, 0);
atomic_set_release(cond, 0);
return 0;
}
int32 pthread_cond_wait(pthread_cond_t* cond, pthread_mutex_t* mutex) {
pthread_mutex_unlock(mutex);
syscall(SYS_futex, cond, FUTEX_WAIT, atomic_get(cond), NULL, NULL, 0);
syscall(SYS_futex, cond, FUTEX_WAIT, atomic_get_acquire(cond), NULL, NULL, 0);
pthread_mutex_lock(mutex);
return 0;
}
int32 pthread_cond_signal(pthread_cond_t* cond) {
atomic_fetch_add(cond, 1);
atomic_fetch_add_acquire(cond, 1);
syscall(SYS_futex, cond, FUTEX_WAKE, 1, NULL, NULL, 0);
return 0;
}
int32 pthread_rwlock_init(pthread_rwlock_t* rwlock, const pthread_rwlockattr_t*) {
atomic_set((int64 *) &rwlock->readers, 0);
//atomic_set(&rwlock->writer, 0);
atomic_set_release((int64 *) &rwlock->readers, 0);
return 0;
}
int32 pthread_rwlock_rdlock(pthread_rwlock_t* rwlock) {
while (atomic_get(&rwlock->writer)) {}
while (atomic_get_acquire_release(&rwlock->writer)) {}
atomic_fetch_add(&rwlock->readers, 1);
atomic_fetch_add_acquire(&rwlock->readers, 1);
return 0;
}
@ -126,10 +125,10 @@ int32 pthread_rwlock_wrlock(pthread_rwlock_t* rwlock) {
}
int32 pthread_rwlock_unlock(pthread_rwlock_t* rwlock) {
if (atomic_get(&rwlock->writer)) {
atomic_set(&rwlock->writer, 0);
if (atomic_get_acquire(&rwlock->writer)) {
atomic_set_release(&rwlock->writer, 0);
} else {
atomic_fetch_sub(&rwlock->readers, 1);
atomic_fetch_sub_acquire(&rwlock->readers, 1);
}
return 0;

View File

@ -26,6 +26,8 @@ void* platform_alloc(size_t size)
inline
void* platform_alloc_aligned(size_t size, int32 alignment)
{
size = ROUND_TO_NEAREST(size, alignment);
void* ptr = VirtualAlloc(NULL, size + alignment + sizeof(void*), MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE);
ASSERT_SIMPLE(ptr);

File diff suppressed because it is too large Load Diff

View File

@ -72,10 +72,6 @@ struct HashMap {
ChunkMemory buf;
};
// @performance Implement more like gperf, our implementation is slow. However, keep it around since it is very general purpose
// Alternatively, also create a version that creates perfect hashes (input requires a hash function and a seed for that hash function)
// Both would be saved in the hash impl.
// WARNING: element_size = element size + remaining HashEntry data size
void hashmap_create(HashMap* hm, int32 count, int32 element_size, RingMemory* ring)
{
@ -132,6 +128,7 @@ void hashmap_insert(HashMap* hm, const char* key, int32 value) {
HashEntryInt32* entry = (HashEntryInt32 *) chunk_get_element(&hm->buf, element, true);
entry->element_id = element;
// @performance Do we really need strncpy? Either use memcpy or strcpy?! Same goes for all the other cases below
strncpy(entry->key, key, HASH_MAP_MAX_KEY_LENGTH);
entry->key[HASH_MAP_MAX_KEY_LENGTH - 1] = '\0';
@ -364,7 +361,7 @@ inline
int64 hashmap_dump(const HashMap* hm, byte* data)
{
*((uint64 *) data) = SWAP_ENDIAN_LITTLE(hm->buf.count);
data += sizeof(uint64);
data += sizeof(hm->buf.count);
// Dump the table content where the elements are relative indices/pointers
for (int32 i = 0; i < hm->buf.count; ++i) {

View File

@ -52,17 +52,38 @@ inline f32 oms_floor(f32 a) { return _mm_cvtss_f32(_mm_floor_ss(_mm_setzero_ps()
inline f32 oms_ceil(f32 a) { return _mm_cvtss_f32(_mm_ceil_ss(_mm_setzero_ps(), _mm_set_ss(a))); }
inline uint32 hash(uint64 a, uint64 b = 0)
inline uint32 oms_hash(uint64 a, uint64 b = 0)
{
uint8 seed[16] = {
0xaa, 0x9b, 0xbd, 0xb8, 0xa1, 0x98, 0xac, 0x3f, 0x1f, 0x94, 0x07, 0xb3, 0x8c, 0x27, 0x93, 0x69,
};
__m128i hash = _mm_set_epi64x(a, b);
hash = _mm_aesdec_si128(hash, _mm_loadu_si128((__m128i *) seed));
hash = _mm_aesdec_si128(hash, _mm_loadu_si128((__m128i *) seed));
hash = _mm_aesdec_si128(hash, _mm_loadu_si128((__m128i *) seed));
hash = _mm_aesdec_si128(hash, _mm_loadu_si128((__m128i *) seed));
return _mm_extract_epi32(hash, 0);
}
inline void oms_fence_memory()
{
_mm_mfence();
}
inline void oms_fence_write()
{
_mm_sfence();
}
inline void oms_fence_load()
{
_mm_lfence();
}
inline
void oms_invalidate_cache(void* address)
{
_mm_clflush(address);
}
#endif

View File

@ -10,6 +10,7 @@
#define TOS_STDLIB_INTRINSICS_ARM_H
#include <arm_sve.h>
#include <arm_acle.h>
inline float oms_sqrt(float a) {
svfloat32_t input = svdup_f32(a);
@ -67,4 +68,25 @@ inline float oms_ceil(float a) {
return svget1_f32(result);
}
inline void oms_fence_memory()
{
__dmb(0xF);
}
inline void oms_fence_write()
{
__dmb(0xB);
}
inline void oms_fence_load()
{
__dmb(0x7);
}
inline
void oms_invalidate_cache(void* address)
{
asm volatile("dc ivac, %0" : : "r"(address) : "memory");
}
#endif

View File

@ -0,0 +1,363 @@
/**
* Jingga
*
* @copyright Jingga
* @license OMS License 2.0
* @version 1.0.0
* @link https://jingga.app
*/
#ifndef TOS_STDLIB_PERFECT_HASH_MAP_H
#define TOS_STDLIB_PERFECT_HASH_MAP_H
#include "Types.h"
#include "HashMap.h"
#include "../hash/GeneralHash.h"
#include "../memory/RingMemory.h"
#define PERFECT_HASH_MAP_MAX_KEY_LENGTH 32
typedef uint64 (*perfect_hash_function)(const char* key, int32 seed);
const perfect_hash_function PERFECT_HASH_FUNCTIONS[] = {
hash_djb2_seeded,
hash_sdbm_seeded,
hash_lose_lose_seeded,
hash_polynomial_rolling_seeded,
hash_fnv1a_seeded,
hash_oat_seeded,
hash_ejb_seeded
};
struct PerfectHashEntryInt32 {
int64 element_id;
char key[PERFECT_HASH_MAP_MAX_KEY_LENGTH];
int32 value;
};
struct PerfectHashEntryInt64 {
int64 element_id;
char key[PERFECT_HASH_MAP_MAX_KEY_LENGTH];
int64 value;
};
struct PerfectHashEntryUIntPtr {
int64 element_id;
char key[PERFECT_HASH_MAP_MAX_KEY_LENGTH];
uintptr_t value;
};
struct PerfectHashEntryVoidP {
int64 element_id;
char key[PERFECT_HASH_MAP_MAX_KEY_LENGTH];
void* value;
};
struct PerfectHashEntryFloat {
int64 element_id;
char key[PERFECT_HASH_MAP_MAX_KEY_LENGTH];
f32 value;
};
struct PerfectHashEntryStr {
int64 element_id;
char key[PERFECT_HASH_MAP_MAX_KEY_LENGTH];
char value[PERFECT_HASH_MAP_MAX_KEY_LENGTH];
};
struct PerfectHashEntry {
int64 element_id;
char key[PERFECT_HASH_MAP_MAX_KEY_LENGTH];
byte* value;
};
struct PerfectHashMap {
int32 hash_seed;
perfect_hash_function hash_function;
int32 entry_size;
int32 map_size;
byte* hash_entries;
};
bool set_perfect_hashmap(PerfectHashMap* hm, const char** keys, int32 key_count, perfect_hash_function hash_func, int32 seed_tries, RingMemory* ring)
{
int32* indices = (int32 *) ring_get_memory(ring, hm->map_size * sizeof(int32), 4);
bool is_unique = false;
int32 seed;
int32 c = 0;
while (!is_unique && c < seed_tries) {
is_unique = true;
seed = rand();
memset(indices, 0, hm->map_size * sizeof(int32));
for (int32 j = 0; j < key_count; ++j) {
int32 index = hash_func(keys[j], seed) % hm->map_size;
if (indices[index]) {
is_unique = false;
break;
} else {
indices[index] = 1;
}
}
++c;
}
if (is_unique) {
hm->hash_seed = seed;
hm->hash_function = hash_func;
}
return is_unique;
}
bool perfect_hashmap_find_perfect_hash(PerfectHashMap* hm, const char** keys, int32 key_count, int32 seed_trys, RingMemory* ring)
{
int32* indices = (int32 *) ring_get_memory(ring, hm->map_size * sizeof(int32), 4);
bool is_unique = false;
for (int32 i = 0; i < ARRAY_COUNT(PERFECT_HASH_FUNCTIONS); ++i) {
int32 seed;
int32 c = 0;
while (!is_unique && c < seed_trys) {
is_unique = true;
seed = rand();
memset(indices, 0, hm->map_size * sizeof(int32));
for (int32 j = 0; j < key_count; ++j) {
int32 index = (PERFECT_HASH_FUNCTIONS[i])(keys[j], seed) % hm->map_size;
if (indices[index]) {
is_unique = false;
break;
} else {
indices[index] = 1;
}
}
++c;
}
if (is_unique) {
hm->hash_seed = seed;
hm->hash_function = PERFECT_HASH_FUNCTIONS[i];
}
}
return is_unique;
}
// WARNING: element_size = element size + remaining HashEntry data size
void perfect_hashmap_create(PerfectHashMap* hm, int32 count, int32 element_size, RingMemory* ring)
{
hm->map_size = count;
hm->entry_size = element_size;
hm->hash_entries = ring_get_memory(
ring,
count * element_size,
0, true
);
}
// WARNING: element_size = element size + remaining HashEntry data size
void perfect_hashmap_create(PerfectHashMap* hm, int32 count, int32 element_size, BufferMemory* buf)
{
hm->map_size = count;
hm->entry_size = element_size;
hm->hash_entries = buffer_get_memory(
buf,
count * element_size,
0, true
);
}
// WARNING: element_size = element size + remaining HashEntry data size
void perfect_hashmap_create(PerfectHashMap* hm, int32 count, int32 element_size, byte* buf)
{
hm->map_size = count;
hm->entry_size = element_size;
hm->hash_entries = buf;
}
// Calculates how large a hashmap will be
inline
int64 perfect_hashmap_size(int count, int32 element_size)
{
return count * element_size;
}
inline
int64 perfect_hashmap_size(const PerfectHashMap* hm)
{
return hm->entry_size * hm->map_size;
}
inline
void perfect_hashmap_insert(PerfectHashMap* hm, const char* key, int32 value) {
int32 index = hm->hash_function(key, hm->hash_seed) % hm->map_size;
PerfectHashEntryInt32* entry = (PerfectHashEntryInt32 *) (hm->hash_entries + hm->entry_size * index);
entry->element_id = index;
strcpy(entry->key, key);
entry->value = value;
}
inline
void perfect_hashmap_insert(PerfectHashMap* hm, const char* key, int64 value) {
int32 index = hm->hash_function(key, hm->hash_seed) % hm->map_size;
PerfectHashEntryInt64* entry = (PerfectHashEntryInt64 *) (hm->hash_entries + hm->entry_size * index);
entry->element_id = index;
strcpy(entry->key, key);
entry->value = value;
}
inline
void perfect_hashmap_insert(PerfectHashMap* hm, const char* key, uintptr_t value) {
int32 index = hm->hash_function(key, hm->hash_seed) % hm->map_size;
PerfectHashEntryUIntPtr* entry = (PerfectHashEntryUIntPtr *) (hm->hash_entries + hm->entry_size * index);
entry->element_id = index;
strcpy(entry->key, key);
entry->value = value;
}
inline
void perfect_hashmap_insert(PerfectHashMap* hm, const char* key, void* value) {
int32 index = hm->hash_function(key, hm->hash_seed) % hm->map_size;
PerfectHashEntryVoidP* entry = (PerfectHashEntryVoidP *) (hm->hash_entries + hm->entry_size * index);
entry->element_id = index;
strcpy(entry->key, key);
entry->value = value;
}
inline
void perfect_hashmap_insert(PerfectHashMap* hm, const char* key, f32 value) {
int32 index = hm->hash_function(key, hm->hash_seed) % hm->map_size;
PerfectHashEntryFloat* entry = (PerfectHashEntryFloat *) (hm->hash_entries + hm->entry_size * index);
entry->element_id = index;
strcpy(entry->key, key);
entry->value = value;
}
inline
void perfect_hashmap_insert(PerfectHashMap* hm, const char* key, const char* value) {
int32 index = hm->hash_function(key, hm->hash_seed) % hm->map_size;
PerfectHashEntryStr* entry = (PerfectHashEntryStr *) (hm->hash_entries + hm->entry_size * index);
entry->element_id = index;
strcpy(entry->key, key);
memcpy(entry->value, value, PERFECT_HASH_MAP_MAX_KEY_LENGTH);
}
inline
void perfect_hashmap_insert(PerfectHashMap* hm, const char* key, byte* value) {
int32 index = hm->hash_function(key, hm->hash_seed) % hm->map_size;
PerfectHashEntryStr* entry = (PerfectHashEntryStr *) (hm->hash_entries + hm->entry_size * index);
entry->element_id = index;
strcpy(entry->key, key);
memcpy(entry->value, value, hm->entry_size - sizeof(PerfectHashEntry));
}
inline
PerfectHashEntry* perfect_hashmap_get_entry(const PerfectHashMap* hm, const char* key) {
int32 index = hm->hash_function(key, hm->hash_seed) % hm->map_size;
PerfectHashEntry* entry = (PerfectHashEntry *) (hm->hash_entries + hm->entry_size * index);
return *entry->key == '\0' ? NULL : entry;
}
inline
void perfect_hashmap_delete_entry(PerfectHashMap* hm, const char* key) {
int32 index = hm->hash_function(key, hm->hash_seed) % hm->map_size;
PerfectHashEntry* entry = (PerfectHashEntry *) (hm->hash_entries + hm->entry_size * index);
// This depends on where we check if an element exists (if we change perfect_hashmap_get_entry this also needs changing)
*entry->key = '\0';
}
inline
int64 perfect_hashmap_dump(const PerfectHashMap* hm, byte* data)
{
byte* start = data;
*((int32 *) data) = SWAP_ENDIAN_LITTLE(hm->map_size);
data += sizeof(hm->map_size);
*((int32 *) data) = SWAP_ENDIAN_LITTLE(hm->hash_seed);
data += sizeof(hm->hash_seed);
for (int32 i = 0; i < ARRAY_COUNT(PERFECT_HASH_FUNCTIONS); ++i) {
if (hm->hash_function == PERFECT_HASH_FUNCTIONS[i]) {
*((int32 *) data) = SWAP_ENDIAN_LITTLE((uint64) i);
data += sizeof(i);
break;
}
}
*((int32 *) data) = SWAP_ENDIAN_LITTLE(hm->entry_size);
data += sizeof(hm->entry_size);
memcpy(data, hm->hash_entries, hm->map_size * hm->entry_size);
data += hm->map_size * hm->entry_size;
return (int64) (data - start);
}
// WARNING: Requires perfect_hashmap_create first
inline
int64 perfect_hashmap_load(PerfectHashMap* hm, const byte* data)
{
const byte* start = data;
hm->map_size = SWAP_ENDIAN_LITTLE(*((int32 *) data));
data += sizeof(hm->map_size);
hm->hash_seed = SWAP_ENDIAN_LITTLE(*((int32 *) data));
data += sizeof(hm->hash_seed);
hm->hash_function = PERFECT_HASH_FUNCTIONS[*((int32 *) data)];
data += sizeof(int32);
hm->entry_size = SWAP_ENDIAN_LITTLE(*((int32 *) data));
data += sizeof(hm->entry_size);
memcpy(hm->hash_entries, data, hm->map_size * hm->entry_size);
data += hm->map_size * hm->entry_size;
return (int64) (data - start);
}
// WARNiNG: Requires the phm to be initialized already incl. element count and element size etc.
inline
bool perfect_hashmap_from_hashmap(PerfectHashMap* phm, const HashMap* hm, int32 seed_trys, RingMemory* ring)
{
char** keys = (char **) ring_get_memory(ring, sizeof(char *) * hm->buf.count, 8);
// Find all keys
int32 key_index = 0;
for (int32 i = 0; i < hm->buf.count; ++i) {
HashEntry* entry = (HashEntry *) hm->table[i];
while (entry != NULL) {
keys[key_index++] = entry->key;
entry = (HashEntry *) entry->next;
}
}
// Check if we can turn it into a perfect hash map
bool is_perfect = perfect_hashmap_find_perfect_hash(phm, (char **) keys, key_index, seed_trys, ring);
if (!is_perfect) {
return false;
}
// Fill perfect hash map
for (int32 i = 0; i < hm->buf.count; ++i) {
HashEntry* entry = (HashEntry *) hm->table[i];
while (entry != NULL) {
perfect_hashmap_insert(phm, entry->key, entry->value);
entry = (HashEntry *) entry->next;
}
}
return true;
}
#endif

View File

@ -32,7 +32,7 @@ void thread_create(Worker* worker, ThreadJobFunc routine, void* arg)
void thread_stop(Worker* worker)
{
atomic_set(&worker->state, 0);
atomic_set_acquire(&worker->state, 0);
pthread_join(worker->thread, NULL);
}

View File

@ -60,32 +60,32 @@ static THREAD_RETURN thread_pool_worker(void* arg)
break;
}
work = (PoolWorker *) queue_dequeue_keep(&pool->work_queue, sizeof(PoolWorker), 64);
work = (PoolWorker *) queue_dequeue_keep(&pool->work_queue);
pthread_mutex_unlock(&pool->work_mutex);
if (!work) {
continue;
}
atomic_increment(&pool->working_cnt);
atomic_set(&work->state, 2);
atomic_increment_relaxed(&pool->working_cnt);
atomic_set_release(&work->state, 2);
work->func(work);
atomic_set(&work->state, 1);
atomic_set_release(&work->state, 1);
// Job gets marked after completion -> can be overwritten now
if (atomic_get(&work->id) == -1) {
atomic_set(&work->id, 0);
if (atomic_get_relaxed(&work->id) == -1) {
atomic_set_release(&work->id, 0);
}
atomic_decrement(&pool->working_cnt);
atomic_decrement_relaxed(&pool->working_cnt);
if (atomic_get(&pool->state) == 0 && atomic_get(&pool->working_cnt) == 0) {
if (atomic_get_relaxed(&pool->state) == 0 && atomic_get_relaxed(&pool->working_cnt) == 0) {
pthread_cond_signal(&pool->working_cond);
}
}
pthread_cond_signal(&pool->working_cond);
atomic_decrement(&pool->thread_cnt);
atomic_decrement_relaxed(&pool->thread_cnt);
return NULL;
}
@ -121,10 +121,10 @@ void thread_pool_wait(ThreadPool* pool)
void thread_pool_destroy(ThreadPool* pool)
{
// This sets the queue to empty
atomic_set((void **) &pool->work_queue.tail, (void **) &pool->work_queue.head);
atomic_set_acquire((void **) &pool->work_queue.tail, (void **) &pool->work_queue.head);
// This sets the state to "shutdown"
atomic_set(&pool->state, 1);
atomic_set_release(&pool->state, 1);
pthread_cond_broadcast(&pool->work_cond);
thread_pool_wait(pool);
@ -137,8 +137,8 @@ void thread_pool_destroy(ThreadPool* pool)
PoolWorker* thread_pool_add_work(ThreadPool* pool, const PoolWorker* job)
{
pthread_mutex_lock(&pool->work_mutex);
PoolWorker* temp_job = (PoolWorker *) ring_get_memory_nomove(&pool->work_queue, sizeof(PoolWorker), 64);
if (atomic_get(&temp_job->id) > 0) {
PoolWorker* temp_job = (PoolWorker *) ring_get_memory_nomove((RingMemory *) &pool->work_queue, sizeof(PoolWorker), 64);
if (atomic_get_relaxed(&temp_job->id) > 0) {
pthread_mutex_unlock(&pool->work_mutex);
ASSERT_SIMPLE(temp_job->id == 0);
@ -146,10 +146,10 @@ PoolWorker* thread_pool_add_work(ThreadPool* pool, const PoolWorker* job)
}
memcpy(temp_job, job, sizeof(PoolWorker));
ring_move_pointer(&pool->work_queue, &pool->work_queue.head, sizeof(PoolWorker), 64);
ring_move_pointer((RingMemory *) &pool->work_queue, &pool->work_queue.head, sizeof(PoolWorker), 64);
if (temp_job->id == 0) {
temp_job->id = atomic_fetch_add(&pool->id_counter, 1);
temp_job->id = atomic_fetch_add_acquire(&pool->id_counter, 1);
}
pthread_cond_broadcast(&pool->work_cond);
@ -164,8 +164,8 @@ PoolWorker* thread_pool_add_work_start(ThreadPool* pool)
{
pthread_mutex_lock(&pool->work_mutex);
PoolWorker* temp_job = (PoolWorker *) queue_enqueue_start(&pool->work_queue, sizeof(PoolWorker), 64);
if (atomic_get(&temp_job->id) > 0) {
PoolWorker* temp_job = (PoolWorker *) queue_enqueue_start(&pool->work_queue);
if (atomic_get_relaxed(&temp_job->id) > 0) {
pthread_mutex_unlock(&pool->work_mutex);
ASSERT_SIMPLE(temp_job->id == 0);
@ -174,7 +174,7 @@ PoolWorker* thread_pool_add_work_start(ThreadPool* pool)
if (temp_job->id == 0) {
// +1 because otherwise the very first job would be id = 0 which is not a valid id
temp_job->id = atomic_fetch_add(&pool->id_counter, 1) + 1;
temp_job->id = atomic_fetch_add_acquire(&pool->id_counter, 1) + 1;
}
return temp_job;
@ -182,7 +182,7 @@ PoolWorker* thread_pool_add_work_start(ThreadPool* pool)
void thread_pool_add_work_end(ThreadPool* pool)
{
queue_enqueue_end(&pool->work_queue, sizeof(PoolWorker), 64);
queue_enqueue_end(&pool->work_queue);
pthread_cond_broadcast(&pool->work_cond);
pthread_mutex_unlock(&pool->work_mutex);
}

View File

@ -31,6 +31,7 @@ struct UIThemeStyle {
// A theme may have N named styles
// The hashmap contains the offset where the respective style can be found
// @performance Switch to perfect hash map
HashMap hash_map;
};