From 4b70891c88e90f50d6c5c7cf7a3c2e0ba63a76d3 Mon Sep 17 00:00:00 2001
From: Dennis Eichhorn <spl1nes.com@googlemail.com>
Date: Sat, 30 Nov 2024 03:37:26 +0100
Subject: [PATCH] implemented threaded logging. kinda working but there is
 sometimes a memory issue. fps much more stable now

---
 asset/AssetArchive.h          |  3 ++
 asset/AssetManagementSystem.h |  1 +
 encryption/CeasarEncryption.h | 36 ++++++++++++++
 encryption/XorEncryption.h    | 28 +++++++++++
 gpuapi/RenderUtils.h          |  2 +-
 log/Debug.cpp                 | 16 ++++++
 log/Debug.h                   |  4 ++
 memory/ThreadedQueue.h        | 92 ++++++++++++++++++++++++++++++-----
 8 files changed, 170 insertions(+), 12 deletions(-)
 create mode 100644 encryption/CeasarEncryption.h
 create mode 100644 encryption/XorEncryption.h

diff --git a/asset/AssetArchive.h b/asset/AssetArchive.h
index 2094143..8183d5d 100644
--- a/asset/AssetArchive.h
+++ b/asset/AssetArchive.h
@@ -112,6 +112,9 @@ void asset_archive_load(AssetArchive* archive, const char* path, BufferMemory* b
 {
     // Get file handle
     archive->fd = file_read_async_handle(path);
+    if (!archive->fd) {
+        return;
+    }
 
     FileBody file;
     file.size = 64;
diff --git a/asset/AssetManagementSystem.h b/asset/AssetManagementSystem.h
index e336526..bc39f91 100644
--- a/asset/AssetManagementSystem.h
+++ b/asset/AssetManagementSystem.h
@@ -43,6 +43,7 @@ struct AssetManagementSystem {
     Asset* first;
     Asset* last;
 
+    // @question do we want to create an extra threaded version? Or a combined one, like we have right now.
     pthread_mutex_t mutex;
 };
 
diff --git a/encryption/CeasarEncryption.h b/encryption/CeasarEncryption.h
new file mode 100644
index 0000000..0aaf920
--- /dev/null
+++ b/encryption/CeasarEncryption.h
@@ -0,0 +1,36 @@
+/**
+ * Jingga
+ *
+ * @copyright Jingga
+ * @license   OMS License 2.0
+ * @version   1.0.0
+ * @link      https://jingga.app
+ */
+#ifndef TOS_ENCRYPTION_CEASAR_H
+#define TOS_ENCRYPTION_CEASAR_H
+
+#include "../stdlib/Types.h"
+
+constexpr inline
+void encrypt_ceasar(char* input, int32 shift) {
+    for (int32 i = 0; input[i] != '\0'; i++) {
+        if (input[i] >= 'A' && input[i] <= 'Z') {
+            input[i] = 'A' + (input[i] - 'A' + shift) % 26;
+        } else if (input[i] >= 'a' && input[i] <= 'z') {
+            input[i] = 'a' + (input[i] - 'a' + shift) % 26;
+        }
+    }
+}
+
+constexpr inline
+void decrypt_ceasar(char* input, int32 shift) {
+    for (int32 i = 0; input[i] != '\0'; i++) {
+        if (input[i] >= 'A' && input[i] <= 'Z') {
+            input[i] = 'A' + (input[i] - 'A' - shift + 26) % 26;
+        } else if (input[i] >= 'a' && input[i] <= 'z') {
+            input[i] = 'a' + (input[i] - 'a' - shift + 26) % 26;
+        }
+    }
+}
+
+#endif
\ No newline at end of file
diff --git a/encryption/XorEncryption.h b/encryption/XorEncryption.h
new file mode 100644
index 0000000..e578d0c
--- /dev/null
+++ b/encryption/XorEncryption.h
@@ -0,0 +1,28 @@
+/**
+ * Jingga
+ *
+ * @copyright Jingga
+ * @license   OMS License 2.0
+ * @version   1.0.0
+ * @link      https://jingga.app
+ */
+#ifndef TOS_ENCRYPTION_XOR_H
+#define TOS_ENCRYPTION_XOR_H
+
+#include "../stdlib/Types.h"
+
+constexpr inline
+void encrypt_xor(char* input, char key) {
+    for (int32 i = 0; input[i] != '\0'; i++) {
+        input[i] ^= key;
+    }
+}
+
+constexpr inline
+void decrypt_xor(char* input, char key) {
+    for (int32 i = 0; input[i] != '\0'; i++) {
+        input[i] ^= key;
+    }
+}
+
+#endif
\ No newline at end of file
diff --git a/gpuapi/RenderUtils.h b/gpuapi/RenderUtils.h
index c22d1bd..8c4a965 100644
--- a/gpuapi/RenderUtils.h
+++ b/gpuapi/RenderUtils.h
@@ -511,7 +511,7 @@ f32 vertex_text_create(
     for (int32 i = 0; i < length; ++i) {
         int32 character = is_ascii ? text[i] : utf8_get_char_at(text, i);
         if (character == '\n') {
-            y += font->line_height * scale;
+            y -= font->line_height * scale;
             offset_x = x;
 
             continue;
diff --git a/log/Debug.cpp b/log/Debug.cpp
index 8670e0d..2018bd3 100644
--- a/log/Debug.cpp
+++ b/log/Debug.cpp
@@ -86,17 +86,21 @@ void update_timing_stat(uint32 stat, const char* function)
 {
     uint64 new_tick_count = __rdtsc();
 
+    spinlock_start(&debug_container->perf_stats_spinlock);
     TimingStat* timing_stat = &debug_container->perf_stats[stat];
     timing_stat->function = function;
     timing_stat->delta_tick = new_tick_count - timing_stat->old_tick_count;
     timing_stat->delta_time = (double) timing_stat->delta_tick / (double) debug_container->performance_count_frequency;
     timing_stat->old_tick_count = new_tick_count;
+    spinlock_end(&debug_container->perf_stats_spinlock);
 }
 
 inline
 void update_timing_stat_start(uint32 stat, const char*)
 {
+    spinlock_start(&debug_container->perf_stats_spinlock);
     debug_container->perf_stats[stat].old_tick_count = __rdtsc();
+    spinlock_end(&debug_container->perf_stats_spinlock);
 }
 
 inline
@@ -104,11 +108,13 @@ void update_timing_stat_end(uint32 stat, const char* function)
 {
     uint64 new_tick_count = __rdtsc();
 
+    spinlock_start(&debug_container->perf_stats_spinlock);
     TimingStat* timing_stat = &debug_container->perf_stats[stat];
     timing_stat->function = function;
     timing_stat->delta_tick = new_tick_count - timing_stat->old_tick_count;
     timing_stat->delta_time = (double) timing_stat->delta_tick / (double) debug_container->performance_count_frequency;
     timing_stat->old_tick_count = new_tick_count;
+    spinlock_end(&debug_container->perf_stats_spinlock);
 }
 
 inline
@@ -116,35 +122,45 @@ void update_timing_stat_end_continued(uint32 stat, const char* function)
 {
     uint64 new_tick_count = __rdtsc();
 
+    spinlock_start(&debug_container->perf_stats_spinlock);
     TimingStat* timing_stat = &debug_container->perf_stats[stat];
     timing_stat->function = function;
     timing_stat->delta_tick = timing_stat->delta_tick + new_tick_count - timing_stat->old_tick_count;
     timing_stat->delta_time = timing_stat->delta_time + (double) timing_stat->delta_tick / (double) debug_container->performance_count_frequency;
     timing_stat->old_tick_count = new_tick_count;
+    spinlock_end(&debug_container->perf_stats_spinlock);
 }
 
 inline
 void update_timing_stat_reset(uint32 stat)
 {
+    spinlock_start(&debug_container->perf_stats_spinlock);
     debug_container->perf_stats[stat].function = NULL;
+    spinlock_end(&debug_container->perf_stats_spinlock);
 }
 
 inline
 void reset_counter(int32 id)
 {
+    spinlock_start(&debug_container->perf_stats_spinlock);
     debug_container->counter[id] = 0;
+    spinlock_end(&debug_container->perf_stats_spinlock);
 }
 
 inline
 void log_increment(int32 id, int32 by = 1)
 {
+    spinlock_start(&debug_container->perf_stats_spinlock);
     debug_container->counter[id] += by;
+    spinlock_end(&debug_container->perf_stats_spinlock);
 }
 
 inline
 void log_counter(int32 id, int32 value)
 {
+    spinlock_start(&debug_container->perf_stats_spinlock);
     debug_container->counter[id] = value;
+    spinlock_end(&debug_container->perf_stats_spinlock);
 }
 
 // @todo don't use a pointer to this should be in a global together with other logging data (see Log.h)
diff --git a/log/Debug.h b/log/Debug.h
index dc42402..8c89c0c 100644
--- a/log/Debug.h
+++ b/log/Debug.h
@@ -15,6 +15,9 @@
 
 #if _WIN32
     #include <windows.h>
+    #include "../platform/win32/threading/Spinlock.h"
+#elif __linux__
+    #include "../platform/linux/threading/Spinlock.h"
 #endif
 
 struct LogMemory {
@@ -33,6 +36,7 @@ struct DebugContainer {
 
     // Used for logging timings for different sections
     TimingStat* perf_stats;
+    spinlock32 perf_stats_spinlock;
 
     // Required to calculate the "fps"
     uint64 performance_count_frequency;
diff --git a/memory/ThreadedQueue.h b/memory/ThreadedQueue.h
index 3f04542..a4d63a6 100644
--- a/memory/ThreadedQueue.h
+++ b/memory/ThreadedQueue.h
@@ -94,7 +94,7 @@ void threaded_queue_free(ThreadedQueue* queue)
 
 // @todo Create enqueue_unique
 inline
-void threaded_queue_enqueue_unique(ThreadedQueue* queue, byte* data, uint64 size, byte aligned = 0)
+void threaded_queue_enqueue_unique_wait(ThreadedQueue* queue, byte* data, uint64 size, byte aligned = 0)
 {
     ASSERT_SIMPLE((uint64_t) data % 4 == 0);
     pthread_mutex_lock(&queue->mutex);
@@ -124,12 +124,63 @@ void threaded_queue_enqueue_unique(ThreadedQueue* queue, byte* data, uint64 size
     pthread_mutex_unlock(&queue->mutex);
 }
 
+inline
+void threaded_queue_enqueue_unique(ThreadedQueue* queue, byte* data, uint64 size, byte aligned = 0)
+{
+    ASSERT_SIMPLE((uint64_t) data % 4 == 0);
+    pthread_mutex_lock(&queue->mutex);
+
+    byte* tail = queue->tail;
+    while (tail != queue->tail) {
+        ASSERT_SIMPLE((uint64_t) tail % 4 == 0);
+
+        // @performance we could probably make this faster since we don't need to compare the entire range
+        if (is_equal_aligned(tail, data, size) == 0) {
+            pthread_mutex_unlock(&queue->mutex);
+
+            return;
+        }
+
+        ring_move_pointer((RingMemory *) queue, &tail, size, aligned);
+    }
+
+    if (!ring_commit_safe((RingMemory *) queue, size)) {
+        pthread_mutex_unlock(&queue->mutex);
+
+        return;
+    }
+
+    byte* mem = ring_get_memory((RingMemory *) queue, size, aligned);
+    memcpy(mem, data, size);
+
+    pthread_cond_signal(&queue->cond);
+    pthread_mutex_unlock(&queue->mutex);
+}
+
 // Conditional Lock
 inline
 void threaded_queue_enqueue(ThreadedQueue* queue, byte* data, uint64 size, byte aligned = 0)
 {
     pthread_mutex_lock(&queue->mutex);
 
+    if (!ring_commit_safe((RingMemory *) queue, size)) {
+        pthread_mutex_unlock(&queue->mutex);
+
+        return;
+    }
+
+    byte* mem = ring_get_memory((RingMemory *) queue, size, aligned);
+    memcpy(mem, data, size);
+
+    pthread_cond_signal(&queue->cond);
+    pthread_mutex_unlock(&queue->mutex);
+}
+
+inline
+void threaded_queue_enqueue_wait(ThreadedQueue* queue, byte* data, uint64 size, byte aligned = 0)
+{
+    pthread_mutex_lock(&queue->mutex);
+
     while (!ring_commit_safe((RingMemory *) queue, size)) {
         pthread_cond_wait(&queue->cond, &queue->mutex);
     }
@@ -142,7 +193,7 @@ void threaded_queue_enqueue(ThreadedQueue* queue, byte* data, uint64 size, byte
 }
 
 inline
-byte* threaded_queue_enqueue_start(ThreadedQueue* queue, uint64 size, byte aligned = 0)
+byte* threaded_queue_enqueue_start_wait(ThreadedQueue* queue, uint64 size, byte aligned = 0)
 {
     pthread_mutex_lock(&queue->mutex);
 
@@ -154,7 +205,7 @@ byte* threaded_queue_enqueue_start(ThreadedQueue* queue, uint64 size, byte align
 }
 
 inline
-void threaded_queue_enqueue_end(ThreadedQueue* queue)
+void threaded_queue_enqueue_end_wait(ThreadedQueue* queue)
 {
     pthread_cond_signal(&queue->cond);
     pthread_mutex_unlock(&queue->mutex);
@@ -165,6 +216,25 @@ void threaded_queue_dequeue(ThreadedQueue* queue, byte* data, uint64 size, byte
 {
     pthread_mutex_lock(&queue->mutex);
 
+    if (queue->head == queue->tail) {
+        pthread_mutex_unlock(&queue->mutex);
+
+        return;
+    }
+
+    memcpy(data, queue->tail, size);
+    ring_move_pointer((RingMemory *) queue, &queue->tail, size, aligned);
+
+    pthread_cond_signal(&queue->cond);
+    pthread_mutex_unlock(&queue->mutex);
+}
+
+// Waits until a dequeue is available
+inline
+void threaded_queue_dequeue_wait(ThreadedQueue* queue, byte* data, uint64 size, byte aligned = 0)
+{
+    pthread_mutex_lock(&queue->mutex);
+
     while (queue->head == queue->tail) {
         pthread_cond_wait(&queue->cond, &queue->mutex);
     }
@@ -177,7 +247,7 @@ void threaded_queue_dequeue(ThreadedQueue* queue, byte* data, uint64 size, byte
 }
 
 inline
-byte* threaded_queue_dequeue_start(ThreadedQueue* queue)
+byte* threaded_queue_dequeue_start_wait(ThreadedQueue* queue)
 {
     pthread_mutex_lock(&queue->mutex);
 
@@ -189,7 +259,7 @@ byte* threaded_queue_dequeue_start(ThreadedQueue* queue)
 }
 
 inline
-void threaded_queue_dequeue_end(ThreadedQueue* queue, uint64 size, byte aligned = 0)
+void threaded_queue_dequeue_end_wait(ThreadedQueue* queue, uint64 size, byte aligned = 0)
 {
     ring_move_pointer((RingMemory *) queue, &queue->tail, size, aligned);
 
@@ -199,7 +269,7 @@ void threaded_queue_dequeue_end(ThreadedQueue* queue, uint64 size, byte aligned
 
 // Semaphore Lock
 inline
-void threaded_queue_enqueue_sem(ThreadedQueue* queue, byte* data, uint64 size, byte aligned = 0)
+void threaded_queue_enqueue_sem_wait(ThreadedQueue* queue, byte* data, uint64 size, byte aligned = 0)
 {
     sem_wait(&queue->empty);
     pthread_mutex_lock(&queue->mutex);
@@ -212,7 +282,7 @@ void threaded_queue_enqueue_sem(ThreadedQueue* queue, byte* data, uint64 size, b
 }
 
 inline
-byte* threaded_queue_enqueue_start_sem(ThreadedQueue* queue, uint64 size, byte aligned = 0)
+byte* threaded_queue_enqueue_start_sem_wait(ThreadedQueue* queue, uint64 size, byte aligned = 0)
 {
     sem_wait(&queue->empty);
     pthread_mutex_lock(&queue->mutex);
@@ -221,14 +291,14 @@ byte* threaded_queue_enqueue_start_sem(ThreadedQueue* queue, uint64 size, byte a
 }
 
 inline
-void threaded_queue_enqueue_end_sem(ThreadedQueue* queue)
+void threaded_queue_enqueue_end_sem_wait(ThreadedQueue* queue)
 {
     pthread_mutex_unlock(&queue->mutex);
     sem_post(&queue->full);
 }
 
 inline
-byte* threaded_queue_dequeue_sem(ThreadedQueue* queue, byte* data, uint64 size, byte aligned = 0)
+byte* threaded_queue_dequeue_sem_wait(ThreadedQueue* queue, byte* data, uint64 size, byte aligned = 0)
 {
     sem_wait(&queue->full);
     pthread_mutex_lock(&queue->mutex);
@@ -241,7 +311,7 @@ byte* threaded_queue_dequeue_sem(ThreadedQueue* queue, byte* data, uint64 size,
 }
 
 inline
-byte* threaded_queue_dequeue_start_sem(ThreadedQueue* queue)
+byte* threaded_queue_dequeue_start_sem_wait(ThreadedQueue* queue)
 {
     sem_wait(&queue->full);
     pthread_mutex_lock(&queue->mutex);
@@ -250,7 +320,7 @@ byte* threaded_queue_dequeue_start_sem(ThreadedQueue* queue)
 }
 
 inline
-void threaded_queue_dequeue_end_sem(ThreadedQueue* queue, uint64 size, byte aligned = 0)
+void threaded_queue_dequeue_end_sem_wait(ThreadedQueue* queue, uint64 size, byte aligned = 0)
 {
     ring_move_pointer((RingMemory *) queue, &queue->tail, size, aligned);