From 4b70891c88e90f50d6c5c7cf7a3c2e0ba63a76d3 Mon Sep 17 00:00:00 2001 From: Dennis Eichhorn Date: Sat, 30 Nov 2024 03:37:26 +0100 Subject: [PATCH] implemented threaded logging. kinda working but there is sometimes a memory issue. fps much more stable now --- asset/AssetArchive.h | 3 ++ asset/AssetManagementSystem.h | 1 + encryption/CeasarEncryption.h | 36 ++++++++++++++ encryption/XorEncryption.h | 28 +++++++++++ gpuapi/RenderUtils.h | 2 +- log/Debug.cpp | 16 ++++++ log/Debug.h | 4 ++ memory/ThreadedQueue.h | 92 ++++++++++++++++++++++++++++++----- 8 files changed, 170 insertions(+), 12 deletions(-) create mode 100644 encryption/CeasarEncryption.h create mode 100644 encryption/XorEncryption.h diff --git a/asset/AssetArchive.h b/asset/AssetArchive.h index 2094143..8183d5d 100644 --- a/asset/AssetArchive.h +++ b/asset/AssetArchive.h @@ -112,6 +112,9 @@ void asset_archive_load(AssetArchive* archive, const char* path, BufferMemory* b { // Get file handle archive->fd = file_read_async_handle(path); + if (!archive->fd) { + return; + } FileBody file; file.size = 64; diff --git a/asset/AssetManagementSystem.h b/asset/AssetManagementSystem.h index e336526..bc39f91 100644 --- a/asset/AssetManagementSystem.h +++ b/asset/AssetManagementSystem.h @@ -43,6 +43,7 @@ struct AssetManagementSystem { Asset* first; Asset* last; + // @question do we want to create an extra threaded version? Or a combined one, like we have right now. pthread_mutex_t mutex; }; diff --git a/encryption/CeasarEncryption.h b/encryption/CeasarEncryption.h new file mode 100644 index 0000000..0aaf920 --- /dev/null +++ b/encryption/CeasarEncryption.h @@ -0,0 +1,36 @@ +/** + * Jingga + * + * @copyright Jingga + * @license OMS License 2.0 + * @version 1.0.0 + * @link https://jingga.app + */ +#ifndef TOS_ENCRYPTION_CEASAR_H +#define TOS_ENCRYPTION_CEASAR_H + +#include "../stdlib/Types.h" + +constexpr inline +void encrypt_ceasar(char* input, int32 shift) { + for (int32 i = 0; input[i] != '\0'; i++) { + if (input[i] >= 'A' && input[i] <= 'Z') { + input[i] = 'A' + (input[i] - 'A' + shift) % 26; + } else if (input[i] >= 'a' && input[i] <= 'z') { + input[i] = 'a' + (input[i] - 'a' + shift) % 26; + } + } +} + +constexpr inline +void decrypt_ceasar(char* input, int32 shift) { + for (int32 i = 0; input[i] != '\0'; i++) { + if (input[i] >= 'A' && input[i] <= 'Z') { + input[i] = 'A' + (input[i] - 'A' - shift + 26) % 26; + } else if (input[i] >= 'a' && input[i] <= 'z') { + input[i] = 'a' + (input[i] - 'a' - shift + 26) % 26; + } + } +} + +#endif \ No newline at end of file diff --git a/encryption/XorEncryption.h b/encryption/XorEncryption.h new file mode 100644 index 0000000..e578d0c --- /dev/null +++ b/encryption/XorEncryption.h @@ -0,0 +1,28 @@ +/** + * Jingga + * + * @copyright Jingga + * @license OMS License 2.0 + * @version 1.0.0 + * @link https://jingga.app + */ +#ifndef TOS_ENCRYPTION_XOR_H +#define TOS_ENCRYPTION_XOR_H + +#include "../stdlib/Types.h" + +constexpr inline +void encrypt_xor(char* input, char key) { + for (int32 i = 0; input[i] != '\0'; i++) { + input[i] ^= key; + } +} + +constexpr inline +void decrypt_xor(char* input, char key) { + for (int32 i = 0; input[i] != '\0'; i++) { + input[i] ^= key; + } +} + +#endif \ No newline at end of file diff --git a/gpuapi/RenderUtils.h b/gpuapi/RenderUtils.h index c22d1bd..8c4a965 100644 --- a/gpuapi/RenderUtils.h +++ b/gpuapi/RenderUtils.h @@ -511,7 +511,7 @@ f32 vertex_text_create( for (int32 i = 0; i < length; ++i) { int32 character = is_ascii ? text[i] : utf8_get_char_at(text, i); if (character == '\n') { - y += font->line_height * scale; + y -= font->line_height * scale; offset_x = x; continue; diff --git a/log/Debug.cpp b/log/Debug.cpp index 8670e0d..2018bd3 100644 --- a/log/Debug.cpp +++ b/log/Debug.cpp @@ -86,17 +86,21 @@ void update_timing_stat(uint32 stat, const char* function) { uint64 new_tick_count = __rdtsc(); + spinlock_start(&debug_container->perf_stats_spinlock); TimingStat* timing_stat = &debug_container->perf_stats[stat]; timing_stat->function = function; timing_stat->delta_tick = new_tick_count - timing_stat->old_tick_count; timing_stat->delta_time = (double) timing_stat->delta_tick / (double) debug_container->performance_count_frequency; timing_stat->old_tick_count = new_tick_count; + spinlock_end(&debug_container->perf_stats_spinlock); } inline void update_timing_stat_start(uint32 stat, const char*) { + spinlock_start(&debug_container->perf_stats_spinlock); debug_container->perf_stats[stat].old_tick_count = __rdtsc(); + spinlock_end(&debug_container->perf_stats_spinlock); } inline @@ -104,11 +108,13 @@ void update_timing_stat_end(uint32 stat, const char* function) { uint64 new_tick_count = __rdtsc(); + spinlock_start(&debug_container->perf_stats_spinlock); TimingStat* timing_stat = &debug_container->perf_stats[stat]; timing_stat->function = function; timing_stat->delta_tick = new_tick_count - timing_stat->old_tick_count; timing_stat->delta_time = (double) timing_stat->delta_tick / (double) debug_container->performance_count_frequency; timing_stat->old_tick_count = new_tick_count; + spinlock_end(&debug_container->perf_stats_spinlock); } inline @@ -116,35 +122,45 @@ void update_timing_stat_end_continued(uint32 stat, const char* function) { uint64 new_tick_count = __rdtsc(); + spinlock_start(&debug_container->perf_stats_spinlock); TimingStat* timing_stat = &debug_container->perf_stats[stat]; timing_stat->function = function; timing_stat->delta_tick = timing_stat->delta_tick + new_tick_count - timing_stat->old_tick_count; timing_stat->delta_time = timing_stat->delta_time + (double) timing_stat->delta_tick / (double) debug_container->performance_count_frequency; timing_stat->old_tick_count = new_tick_count; + spinlock_end(&debug_container->perf_stats_spinlock); } inline void update_timing_stat_reset(uint32 stat) { + spinlock_start(&debug_container->perf_stats_spinlock); debug_container->perf_stats[stat].function = NULL; + spinlock_end(&debug_container->perf_stats_spinlock); } inline void reset_counter(int32 id) { + spinlock_start(&debug_container->perf_stats_spinlock); debug_container->counter[id] = 0; + spinlock_end(&debug_container->perf_stats_spinlock); } inline void log_increment(int32 id, int32 by = 1) { + spinlock_start(&debug_container->perf_stats_spinlock); debug_container->counter[id] += by; + spinlock_end(&debug_container->perf_stats_spinlock); } inline void log_counter(int32 id, int32 value) { + spinlock_start(&debug_container->perf_stats_spinlock); debug_container->counter[id] = value; + spinlock_end(&debug_container->perf_stats_spinlock); } // @todo don't use a pointer to this should be in a global together with other logging data (see Log.h) diff --git a/log/Debug.h b/log/Debug.h index dc42402..8c89c0c 100644 --- a/log/Debug.h +++ b/log/Debug.h @@ -15,6 +15,9 @@ #if _WIN32 #include + #include "../platform/win32/threading/Spinlock.h" +#elif __linux__ + #include "../platform/linux/threading/Spinlock.h" #endif struct LogMemory { @@ -33,6 +36,7 @@ struct DebugContainer { // Used for logging timings for different sections TimingStat* perf_stats; + spinlock32 perf_stats_spinlock; // Required to calculate the "fps" uint64 performance_count_frequency; diff --git a/memory/ThreadedQueue.h b/memory/ThreadedQueue.h index 3f04542..a4d63a6 100644 --- a/memory/ThreadedQueue.h +++ b/memory/ThreadedQueue.h @@ -94,7 +94,7 @@ void threaded_queue_free(ThreadedQueue* queue) // @todo Create enqueue_unique inline -void threaded_queue_enqueue_unique(ThreadedQueue* queue, byte* data, uint64 size, byte aligned = 0) +void threaded_queue_enqueue_unique_wait(ThreadedQueue* queue, byte* data, uint64 size, byte aligned = 0) { ASSERT_SIMPLE((uint64_t) data % 4 == 0); pthread_mutex_lock(&queue->mutex); @@ -124,12 +124,63 @@ void threaded_queue_enqueue_unique(ThreadedQueue* queue, byte* data, uint64 size pthread_mutex_unlock(&queue->mutex); } +inline +void threaded_queue_enqueue_unique(ThreadedQueue* queue, byte* data, uint64 size, byte aligned = 0) +{ + ASSERT_SIMPLE((uint64_t) data % 4 == 0); + pthread_mutex_lock(&queue->mutex); + + byte* tail = queue->tail; + while (tail != queue->tail) { + ASSERT_SIMPLE((uint64_t) tail % 4 == 0); + + // @performance we could probably make this faster since we don't need to compare the entire range + if (is_equal_aligned(tail, data, size) == 0) { + pthread_mutex_unlock(&queue->mutex); + + return; + } + + ring_move_pointer((RingMemory *) queue, &tail, size, aligned); + } + + if (!ring_commit_safe((RingMemory *) queue, size)) { + pthread_mutex_unlock(&queue->mutex); + + return; + } + + byte* mem = ring_get_memory((RingMemory *) queue, size, aligned); + memcpy(mem, data, size); + + pthread_cond_signal(&queue->cond); + pthread_mutex_unlock(&queue->mutex); +} + // Conditional Lock inline void threaded_queue_enqueue(ThreadedQueue* queue, byte* data, uint64 size, byte aligned = 0) { pthread_mutex_lock(&queue->mutex); + if (!ring_commit_safe((RingMemory *) queue, size)) { + pthread_mutex_unlock(&queue->mutex); + + return; + } + + byte* mem = ring_get_memory((RingMemory *) queue, size, aligned); + memcpy(mem, data, size); + + pthread_cond_signal(&queue->cond); + pthread_mutex_unlock(&queue->mutex); +} + +inline +void threaded_queue_enqueue_wait(ThreadedQueue* queue, byte* data, uint64 size, byte aligned = 0) +{ + pthread_mutex_lock(&queue->mutex); + while (!ring_commit_safe((RingMemory *) queue, size)) { pthread_cond_wait(&queue->cond, &queue->mutex); } @@ -142,7 +193,7 @@ void threaded_queue_enqueue(ThreadedQueue* queue, byte* data, uint64 size, byte } inline -byte* threaded_queue_enqueue_start(ThreadedQueue* queue, uint64 size, byte aligned = 0) +byte* threaded_queue_enqueue_start_wait(ThreadedQueue* queue, uint64 size, byte aligned = 0) { pthread_mutex_lock(&queue->mutex); @@ -154,7 +205,7 @@ byte* threaded_queue_enqueue_start(ThreadedQueue* queue, uint64 size, byte align } inline -void threaded_queue_enqueue_end(ThreadedQueue* queue) +void threaded_queue_enqueue_end_wait(ThreadedQueue* queue) { pthread_cond_signal(&queue->cond); pthread_mutex_unlock(&queue->mutex); @@ -165,6 +216,25 @@ void threaded_queue_dequeue(ThreadedQueue* queue, byte* data, uint64 size, byte { pthread_mutex_lock(&queue->mutex); + if (queue->head == queue->tail) { + pthread_mutex_unlock(&queue->mutex); + + return; + } + + memcpy(data, queue->tail, size); + ring_move_pointer((RingMemory *) queue, &queue->tail, size, aligned); + + pthread_cond_signal(&queue->cond); + pthread_mutex_unlock(&queue->mutex); +} + +// Waits until a dequeue is available +inline +void threaded_queue_dequeue_wait(ThreadedQueue* queue, byte* data, uint64 size, byte aligned = 0) +{ + pthread_mutex_lock(&queue->mutex); + while (queue->head == queue->tail) { pthread_cond_wait(&queue->cond, &queue->mutex); } @@ -177,7 +247,7 @@ void threaded_queue_dequeue(ThreadedQueue* queue, byte* data, uint64 size, byte } inline -byte* threaded_queue_dequeue_start(ThreadedQueue* queue) +byte* threaded_queue_dequeue_start_wait(ThreadedQueue* queue) { pthread_mutex_lock(&queue->mutex); @@ -189,7 +259,7 @@ byte* threaded_queue_dequeue_start(ThreadedQueue* queue) } inline -void threaded_queue_dequeue_end(ThreadedQueue* queue, uint64 size, byte aligned = 0) +void threaded_queue_dequeue_end_wait(ThreadedQueue* queue, uint64 size, byte aligned = 0) { ring_move_pointer((RingMemory *) queue, &queue->tail, size, aligned); @@ -199,7 +269,7 @@ void threaded_queue_dequeue_end(ThreadedQueue* queue, uint64 size, byte aligned // Semaphore Lock inline -void threaded_queue_enqueue_sem(ThreadedQueue* queue, byte* data, uint64 size, byte aligned = 0) +void threaded_queue_enqueue_sem_wait(ThreadedQueue* queue, byte* data, uint64 size, byte aligned = 0) { sem_wait(&queue->empty); pthread_mutex_lock(&queue->mutex); @@ -212,7 +282,7 @@ void threaded_queue_enqueue_sem(ThreadedQueue* queue, byte* data, uint64 size, b } inline -byte* threaded_queue_enqueue_start_sem(ThreadedQueue* queue, uint64 size, byte aligned = 0) +byte* threaded_queue_enqueue_start_sem_wait(ThreadedQueue* queue, uint64 size, byte aligned = 0) { sem_wait(&queue->empty); pthread_mutex_lock(&queue->mutex); @@ -221,14 +291,14 @@ byte* threaded_queue_enqueue_start_sem(ThreadedQueue* queue, uint64 size, byte a } inline -void threaded_queue_enqueue_end_sem(ThreadedQueue* queue) +void threaded_queue_enqueue_end_sem_wait(ThreadedQueue* queue) { pthread_mutex_unlock(&queue->mutex); sem_post(&queue->full); } inline -byte* threaded_queue_dequeue_sem(ThreadedQueue* queue, byte* data, uint64 size, byte aligned = 0) +byte* threaded_queue_dequeue_sem_wait(ThreadedQueue* queue, byte* data, uint64 size, byte aligned = 0) { sem_wait(&queue->full); pthread_mutex_lock(&queue->mutex); @@ -241,7 +311,7 @@ byte* threaded_queue_dequeue_sem(ThreadedQueue* queue, byte* data, uint64 size, } inline -byte* threaded_queue_dequeue_start_sem(ThreadedQueue* queue) +byte* threaded_queue_dequeue_start_sem_wait(ThreadedQueue* queue) { sem_wait(&queue->full); pthread_mutex_lock(&queue->mutex); @@ -250,7 +320,7 @@ byte* threaded_queue_dequeue_start_sem(ThreadedQueue* queue) } inline -void threaded_queue_dequeue_end_sem(ThreadedQueue* queue, uint64 size, byte aligned = 0) +void threaded_queue_dequeue_end_sem_wait(ThreadedQueue* queue, uint64 size, byte aligned = 0) { ring_move_pointer((RingMemory *) queue, &queue->tail, size, aligned);