release build fixed

2026-03-07 10:08:41 +00:00 · 2024-12-12 18:44:22 +01:00 · 2024-12-12 18:44:22 +01:00 · fd963ca891
commit fd963ca891
parent d5e8a0c936
12 changed files with 232 additions and 165 deletions
--- a/gpuapi/RenderUtils.h
+++ b/gpuapi/RenderUtils.h
@ -35,20 +35,10 @@ void vertex_degenerate_create(
    // They are alternating every loop BUT since we use references they look the same in code
    // WARNING: Before using we must make sure that the 0 index is defined
    //          The easiest way is to just define a "degenerate" starting point
-    vertices[*index].position.x = vertices[*index - 1].position.x;
-    vertices[*index].position.y = vertices[*index - 1].position.y;
-    vertices[*index].position.z = zindex;
-    vertices[*index].tex_coord.x = 0;
-    vertices[*index].tex_coord.y = 0;
-    vertices[*index].color = 0;
+    vertices[*index] = {{vertices[*index - 1].position.x, vertices[*index - 1].position.y, zindex}, {0, 0}, 0};
    ++(*index);

-    vertices[*index].position.x = x;
-    vertices[*index].position.y = y;
-    vertices[*index].position.z = zindex;
-    vertices[*index].tex_coord.x = 0;
-    vertices[*index].tex_coord.y = 0;
-    vertices[*index].color = 0;
+    vertices[*index] = {{x, y, zindex}, {0, 0}, 0};
    ++(*index);
 }

@ -76,61 +66,24 @@ void vertex_line_create(

    f32 n1 = -(y2 - y1);
    f32 n2 = x2 - x1;
-    f32 n_ = sqrtf(n2 * n2 + n1 * n1);
-    f32 norm1 = n1 / n_;
-    f32 norm2 = n2 / n_;
-
-    // @todo Currently we always use p1 and never p2
-    //      This is wrong and depends on the Alignment, no? Maybe not
-    // Calculate both parallel points to the start position
-    f32 p1_x1 = x1 + thickness * norm1;
-    f32 p1_y1 = y1 + thickness * norm2;
-
-    // f32 p2_x1 = x1 - thickness * norm1;
-    // f32 p2_y1 = y1 - thickness * norm2;
-
-    // Calculate both parallel points to the end position
-    f32 p1_x2 = x2 + thickness * norm1;
-    f32 p1_y2 = y2 + thickness * norm2;
-
-    // f32 p2_x2 = x2 - thickness * norm1;
-    // f32 p2_y2 = y2 - thickness * norm2;
+    f32 n_ = oms_rsqrt(n2 * n2 + n1 * n1);
+    f32 norm1 = n1 * n_;
+    f32 norm2 = n2 * n_;

    vertex_degenerate_create(vertices, index, zindex, x1, y1);

-    vertices[*index].position.x = x1;
-    vertices[*index].position.y = y1;
-    vertices[*index].position.z = zindex;
-    vertices[*index].tex_coord.x = tex_x1;
-    vertices[*index].tex_coord.y = tex_y1;
-    vertices[*index].color = color_index;
-    ++(*index);
+    int32 idx = *index;

-    vertices[*index].position.x = p1_x1;
-    vertices[*index].position.y = p1_y1;
-    vertices[*index].position.z = zindex;
-    vertices[*index].tex_coord.x = tex_x1;
-    vertices[*index].tex_coord.y = tex_y2;
-    vertices[*index].color = color_index;
-    ++(*index);
+    vertices[idx++] = {{x1, y1, zindex}, {tex_x1, tex_y1}, color_index};
+    vertices[idx++] = {{x1 + thickness * norm1, y1 + thickness * norm2, zindex}, {tex_x1, tex_y2}, color_index};
+    vertices[idx++] = {{x2, y2, zindex}, {tex_x2, tex_y1}, color_index};
+    vertices[idx++] = {{x2 + thickness * norm1, y2 + thickness * norm2, zindex}, {tex_x2, tex_y2}, color_index};

-    vertices[*index].position.x = x2;
-    vertices[*index].position.y = y2;
-    vertices[*index].position.z = zindex;
-    vertices[*index].tex_coord.x = tex_x2;
-    vertices[*index].tex_coord.y = tex_y1;
-    vertices[*index].color = color_index;
-    ++(*index);
-
-    vertices[*index].position.x = p1_x2;
-    vertices[*index].position.y = p1_y2;
-    vertices[*index].position.z = zindex;
-    vertices[*index].tex_coord.x = tex_x2;
-    vertices[*index].tex_coord.y = tex_y2;
-    vertices[*index].color = color_index;
-    ++(*index);
+    *index = idx;
 }

+// @performance Do we really want to create the UI as one continuous mesh?
+// Individual meshes without degenerates might be faster
 inline
 void vertex_rect_create(
    Vertex3DTextureColorIndex* __restrict vertices, uint32* __restrict index, f32 zindex,
@ -155,37 +108,14 @@ void vertex_rect_create(
    f32 x_width = x + width;

    // Rectangle
-    vertices[*index].position.x = x;
-    vertices[*index].position.y = y;
-    vertices[*index].position.z = zindex;
-    vertices[*index].tex_coord.x = tex_x1;
-    vertices[*index].tex_coord.y = tex_y1;
-    vertices[*index].color = color_index;
-    ++(*index);
+    int32 idx = *index;

-    vertices[*index].position.x = x;
-    vertices[*index].position.y = y_height;
-    vertices[*index].position.z = zindex;
-    vertices[*index].tex_coord.x = tex_x1;
-    vertices[*index].tex_coord.y = tex_y2;
-    vertices[*index].color = color_index;
-    ++(*index);
+    vertices[idx++] = {{x, y, zindex}, {tex_x1, tex_y1}, color_index};
+    vertices[idx++] = {{x, y_height, zindex}, {tex_x1, tex_y2}, color_index};
+    vertices[idx++] = {{x_width, y, zindex}, {tex_x2, tex_y1}, color_index};
+    vertices[idx++] = {{x_width, y_height, zindex}, {tex_x2, tex_y2}, color_index};

-    vertices[*index].position.x = x_width;
-    vertices[*index].position.y = y;
-    vertices[*index].position.z = zindex;
-    vertices[*index].tex_coord.x = tex_x2;
-    vertices[*index].tex_coord.y = tex_y1;
-    vertices[*index].color = color_index;
-    ++(*index);
-
-    vertices[*index].position.x = x_width;
-    vertices[*index].position.y = y_height;
-    vertices[*index].position.z = zindex;
-    vertices[*index].tex_coord.x = tex_x2;
-    vertices[*index].tex_coord.y = tex_y2;
-    vertices[*index].color = color_index;
-    ++(*index);
+    *index = idx;
 }

 inline
@ -479,7 +409,7 @@ f32 vertex_text_create(
    const Font* __restrict font, const char* __restrict text, f32 size, f32 color_index = 0
 ) {
    int32 length = utf8_strlen(text);
-    bool is_ascii = strlen(text) == length;
+    bool is_ascii = (int32) strlen(text) == length;
    f32 scale = size / font->size;

    // If we do a different alignment we need to pre-calculate the width and height
@ -507,10 +437,12 @@ f32 vertex_text_create(

    uint32 first_glyph = font->glyphs[0].codepoint;

+    int32 first_char = is_ascii ? text[0] : utf8_get_char_at(text, 0);
+
    f32 offset_x = x;
-    for (int32 i = 0; i < length; ++i) {
+    for (int32 i = (first_char == '\n' ? 1 : 0); i < length; ++i) {
        int32 character = is_ascii ? text[i] : utf8_get_char_at(text, i);
-        if (character == '\n' && i != 0) {
+        if (character == '\n') {
            y -= font->line_height * scale;
            offset_x = x;

@ -527,7 +459,7 @@ f32 vertex_text_create(
            glyph = &font->glyphs[perfect_glyph_pos];
        } else {
            // @performance consider to do binary search
-            for (int32 j = 0; j <= perfect_glyph_pos && j < font->glyph_count; ++j) {
+            for (uint32 j = 0; j <= perfect_glyph_pos && j < font->glyph_count; ++j) {
                if (font->glyphs[j].codepoint == character) {
                    glyph = &font->glyphs[j];

@ -651,10 +583,12 @@ f32 ui_text_create(
    int32 start = *index;
    f32 offset_x = (f32) x->value_int;
    f32 offset_y = (f32) y->value_int;
-    for (int32 i = 0; i < length; ++i) {
+
+    int32 first_char = is_ascii ? text->value_str[0] : utf8_get_char_at(text->value_str, 0);
+    for (int32 i = (first_char == '\n' ? 1 : 0); i < length; ++i) {
        int32 character = is_ascii ? text->value_str[i] : utf8_get_char_at(text->value_str, i);

-        if (character == '\n' && i != 0) {
+        if (character == '\n') {
            offset_y += theme->font.line_height * scale;
            offset_x = (f32) x->value_int;

--- a/log/Debug.cpp
+++ b/log/Debug.cpp
@ -102,7 +102,9 @@ void update_timing_stat(uint32 stat, const char* function)
 inline
 void update_timing_stat_start(uint32 stat, const char*)
 {
-    atomic_set((int64 *) &debug_container->perf_stats[stat].old_tick_count, __rdtsc());
+    spinlock_start(&debug_container->perf_stats_spinlock);
+    debug_container->perf_stats[stat].old_tick_count = __rdtsc();
+    spinlock_end(&debug_container->perf_stats_spinlock);
 }

 inline
@ -220,7 +222,7 @@ void debug_memory_log(uint64 start, uint64 size, int32 type, const char* functio
        return;
    }

-    uint64 idx = atomic_add_fetch(&mem->action_idx, 1);
+    uint64 idx = atomic_fetch_add(&mem->action_idx, 1);
    if (idx >= ARRAY_COUNT(mem->last_action)) {
        atomic_set(&mem->action_idx, 1);
        idx %= ARRAY_COUNT(mem->last_action);
@ -253,7 +255,7 @@ void debug_memory_reserve(uint64 start, uint64 size, int32 type, const char* fun
        return;
    }

-    uint64 idx = atomic_add_fetch(&mem->reserve_action_idx, 1);
+    uint64 idx = atomic_fetch_add(&mem->reserve_action_idx, 1);
    if (idx >= ARRAY_COUNT(mem->reserve_action)) {
        atomic_set(&mem->reserve_action_idx, 1);
        idx %= ARRAY_COUNT(mem->last_action);
--- a/log/Log.h
+++ b/log/Log.h
@ -13,10 +13,6 @@
 #include "../stdlib/Types.h"
 #include "Debug.h"

-#ifndef LOG_LEVEL
-    #define LOG_LEVEL 0
-#endif
-
 #ifndef MAX_LOG_LENGTH
    #define MAX_LOG_LENGTH 128
 #endif
@ -39,7 +35,7 @@ void log(const char* format, LogDataType data_type, void* data, bool should_log,
 void log_increment(int32, int64);
 void log_counter(int32, int64);

-#if (LOG_LEVEL == 0)
+#if (!DEBUG && !INTERNAL)
    // Don't perform any logging at log level 0
    #define LOG(str, should_log, save) ((void) 0)
    #define LOG_FORMAT(format, data_type, data, should_log, save) ((void) 0)
@ -49,8 +45,8 @@ void log_counter(int32, int64);
    #define LOG_COUNTER(a, b) ((void) 0)
    #define RESET_COUNTER(a) ((void) 0)
 #else
-    #define LOG(str, should_log, save) log((str), (should_log), (save), __FILE__, __func__, __LINE__)
-    #define LOG_FORMAT(format, data_type, data, should_log, save) log((format), (data_type), (data), (should_log), (save), __FILE__, __func__, __LINE__)
+    #define LOG(str, should_log, save) ((void) 0)
+    #define LOG_FORMAT(format, data_type, data, should_log, save) ((void) 0)
    #define LOG_TO_FILE() log_to_file()
    #define LOG_INCREMENT(a) log_increment((a), 1)
    #define LOG_INCREMENT_BY(a, b) log_increment((a), (b))
--- a/math/matrix/MatrixFloat32.h
+++ b/math/matrix/MatrixFloat32.h
@ -30,10 +30,10 @@
 inline
 void vec2_normalize(f32* __restrict x, f32* __restrict y)
 {
-    f32 d = sqrtf((*x) * (*x) + (*y) * (*y));
+    f32 d = oms_rsqrt((*x) * (*x) + (*y) * (*y));

-    *x /= d;
-    *y /= d;
+    *x *= d;
+    *y *= d;
 }

 inline
@ -114,21 +114,21 @@ f32 vec3_length(v3_f32* vec)
 inline
 void vec3_normalize(f32* __restrict x, f32* __restrict y, f32* __restrict z)
 {
-    f32 d = sqrtf((*x) * (*x) + (*y) * (*y) + (*z) * (*z));
+    f32 d = oms_rsqrt((*x) * (*x) + (*y) * (*y) + (*z) * (*z));

-    *x /= d;
-    *y /= d;
-    *z /= d;
+    *x *= d;
+    *y *= d;
+    *z *= d;
 }

 inline
 void vec3_normalize(v3_f32* vec)
 {
-    f32 d = sqrtf(vec->x * vec->x + vec->y * vec->y + vec->z * vec->z);
+    f32 d = oms_rsqrt(vec->x * vec->x + vec->y * vec->y + vec->z * vec->z);

-    vec->x /= d;
-    vec->y /= d;
-    vec->z /= d;
+    vec->x *= d;
+    vec->y *= d;
+    vec->z *= d;
 }

 inline
@ -205,12 +205,12 @@ f32 vec3_dot(const v3_f32* a, const v3_f32* b) {

 void vec4_normalize(f32* __restrict x, f32* __restrict y, f32* __restrict z, f32* __restrict w)
 {
-    f32 d = sqrtf((*x) * (*x) + (*y) * (*y) + (*z) * (*z) + (*w) * (*w));
+    f32 d = oms_rsqrt((*x) * (*x) + (*y) * (*y) + (*z) * (*z) + (*w) * (*w));

-    *x /= d;
-    *y /= d;
-    *z /= d;
-    *w /= d;
+    *x *= d;
+    *y *= d;
+    *z *= d;
+    *w *= d;
 }

 inline
--- a/math/matrix/QuaternionFloat32.h
+++ b/math/matrix/QuaternionFloat32.h
@ -25,12 +25,12 @@
 inline
 void quaternion_unit(v4_f32* quat)
 {
-    f32 length = sqrtf(quat->w * quat->w + quat->x * quat->x + quat->y * quat->y + quat->z * quat->z);
+    f32 length = oms_rsqrt(quat->w * quat->w + quat->x * quat->x + quat->y * quat->y + quat->z * quat->z);

-    quat->w /= length;
-    quat->x /= length;
-    quat->y /= length;
-    quat->z /= length;
+    quat->w *= length;
+    quat->x *= length;
+    quat->y *= length;
+    quat->z *= length;
 }

 inline
--- a/platform/linux/threading/Atomic.h
+++ b/platform/linux/threading/Atomic.h
@ -115,22 +115,22 @@ int32 atomic_compare_exchange_weak(volatile int32* value, int32* expected, int32
 }

 inline
-int32 atomic_add_fetch(volatile int32* value, int32 operand) {
+int32 atomic_fetch_add(volatile int32* value, int32 operand) {
    return __atomic_add_fetch(value, operand, __ATOMIC_SEQ_CST);
 }

 inline
-int32 atomic_sub_fetch(volatile int32* value, int32 operand) {
+int32 atomic_fetch_sub(volatile int32* value, int32 operand) {
    return __atomic_sub_fetch(value, operand, __ATOMIC_SEQ_CST);
 }

 inline
-int64 atomic_add_fetch(volatile int64* value, int64 operand) {
+int64 atomic_fetch_add(volatile int64* value, int64 operand) {
    return __atomic_add_fetch(value, operand, __ATOMIC_SEQ_CST);
 }

 inline
-int64 atomic_sub_fetch(volatile int64* value, int64 operand) {
+int64 atomic_fetch_sub(volatile int64* value, int64 operand) {
    return __atomic_sub_fetch(value, operand, __ATOMIC_SEQ_CST);
 }

@ -211,22 +211,22 @@ uint32 atomic_compare_exchange_weak(volatile uint32* value, uint32* expected, ui
 }

 inline
-uint32 atomic_add_fetch(volatile uint32* value, uint32 operand) {
+uint32 atomic_fetch_add(volatile uint32* value, uint32 operand) {
    return __atomic_add_fetch(value, operand, __ATOMIC_SEQ_CST);
 }

 inline
-uint32 atomic_sub_fetch(volatile uint32* value, uint32 operand) {
+uint32 atomic_fetch_sub(volatile uint32* value, uint32 operand) {
    return __atomic_sub_fetch(value, operand, __ATOMIC_SEQ_CST);
 }

 inline
-uint64 atomic_add_fetch(volatile uint64* value, uint64 operand) {
+uint64 atomic_fetch_add(volatile uint64* value, uint64 operand) {
    return __atomic_add_fetch(value, operand, __ATOMIC_SEQ_CST);
 }

 inline
-uint64 atomic_sub_fetch(volatile uint64* value, uint64 operand) {
+uint64 atomic_fetch_sub(volatile uint64* value, uint64 operand) {
    return __atomic_sub_fetch(value, operand, __ATOMIC_SEQ_CST);
 }

--- a/platform/linux/threading/Thread.h
+++ b/platform/linux/threading/Thread.h
@ -98,7 +98,7 @@ int32 pthread_cond_wait(pthread_cond_t* cond, pthread_mutex_t* mutex) {
 }

 int32 pthread_cond_signal(pthread_cond_t* cond) {
-    atomic_add_fetch(cond, 1);
+    atomic_fetch_add(cond, 1);
    syscall(SYS_futex, cond, FUTEX_WAKE, 1, NULL, NULL, 0);

    return 0;
@ -114,7 +114,7 @@ int32 pthread_rwlock_init(pthread_rwlock_t* rwlock, const pthread_rwlockattr_t*)
 int32 pthread_rwlock_rdlock(pthread_rwlock_t* rwlock) {
    while (atomic_get(&rwlock->writer)) {}

-    atomic_add_fetch(&rwlock->readers, 1);
+    atomic_fetch_add(&rwlock->readers, 1);

    return 0;
 }
@ -129,7 +129,7 @@ int32 pthread_rwlock_unlock(pthread_rwlock_t* rwlock) {
    if (atomic_get(&rwlock->writer)) {
        atomic_set(&rwlock->writer, 0);
    } else {
-        atomic_sub_fetch(&rwlock->readers, 1);
+        atomic_fetch_sub(&rwlock->readers, 1);
    }

    return 0;
--- a/platform/win32/threading/Atomic.h
+++ b/platform/win32/threading/Atomic.h
@ -36,6 +36,18 @@ void atomic_set(volatile int64* value, int64 new_value)
    InterlockedExchange((long *) value, (long) new_value);
 }

+inline
+void atomic_set(volatile f32* value, f32 new_value)
+{
+    InterlockedExchange((long *) value, (long) new_value);
+}
+
+inline
+void atomic_set(volatile f64* value, f64 new_value)
+{
+    InterlockedExchange((long *) value, (long) new_value);
+}
+
 inline
 int32 atomic_set_fetch(volatile int32* value, int32 new_value)
 {
@ -81,6 +93,18 @@ int64 atomic_get(volatile int64* value)
    return (int64) InterlockedCompareExchange((long *) value, 0, 0);
 }

+inline
+f32 atomic_get(volatile f32* value)
+{
+    return (f32) InterlockedCompareExchange((long *) value, 0, 0);
+}
+
+inline
+f64 atomic_get(volatile f64* value)
+{
+    return (f64) InterlockedCompareExchange((long *) value, 0, 0);
+}
+
 inline
 void atomic_get(volatile byte* value, byte data[16])
 {
@ -133,22 +157,22 @@ int32 atomic_compare_exchange_weak(volatile int32* value, int32* expected, int32
 }

 inline
-int32 atomic_add_fetch(volatile int32* value, int32 operand) {
+int32 atomic_fetch_add(volatile int32* value, int32 operand) {
    return (int32) InterlockedExchangeAdd((long *) value, operand);
 }

 inline
-int32 atomic_sub_fetch(volatile int32* value, int32 operand) {
+int32 atomic_fetch_sub(volatile int32* value, int32 operand) {
    return (int32) InterlockedExchangeSubtract((unsigned long *) value, operand);
 }

 inline
-int64 atomic_add_fetch(volatile int64* value, int64 operand) {
+int64 atomic_fetch_add(volatile int64* value, int64 operand) {
    return (int64) InterlockedExchangeAdd((long *) value, (long) operand);
 }

 inline
-int64 atomic_sub_fetch(volatile int64* value, int64 operand) {
+int64 atomic_fetch_sub(volatile int64* value, int64 operand) {
    return (int64) InterlockedExchangeSubtract((unsigned long *) value, (long) operand);
 }

@ -234,22 +258,22 @@ uint32 atomic_compare_exchange_weak(volatile uint32* value, uint32* expected, ui
 }

 inline
-uint32 atomic_add_fetch(volatile uint32* value, uint32 operand) {
+uint32 atomic_fetch_add(volatile uint32* value, uint32 operand) {
    return (uint32) InterlockedExchangeAdd((long *) value, operand);
 }

 inline
-uint32 atomic_sub_fetch(volatile uint32* value, uint32 operand) {
+uint32 atomic_fetch_sub(volatile uint32* value, uint32 operand) {
    return (uint32) InterlockedExchangeSubtract((unsigned long *) value, operand);
 }

 inline
-uint64 atomic_add_fetch(volatile uint64* value, uint64 operand) {
+uint64 atomic_fetch_add(volatile uint64* value, uint64 operand) {
    return (uint64) InterlockedExchangeAdd((long *) value, (long) operand);
 }

 inline
-uint64 atomic_sub_fetch(volatile uint64* value, uint64 operand) {
+uint64 atomic_fetch_sub(volatile uint64* value, uint64 operand) {
    return (uint64) InterlockedExchangeSubtract((unsigned long *) value, (long) operand);
 }

--- a/thread/Thread.h
+++ b/thread/Thread.h
@ -27,11 +27,6 @@

 void thread_create(Worker* worker, ThreadJobFunc routine, void* arg)
 {
-    for (int32 i = 0; i < worker->mutex_size; ++i) {
-        pthread_mutex_init(&worker->mutex[i], NULL);
-    }
-
-    pthread_cond_init(&worker->condition, NULL);
    pthread_create(&worker->thread, NULL, routine, arg);
 }

@ -39,11 +34,6 @@ void thread_stop(Worker* worker)
 {
    atomic_set(&worker->state, 0);
    pthread_join(worker->thread, NULL);
-    pthread_cond_destroy(&worker->condition);
-
-    for (int32 i = 0; i < worker->mutex_size; ++i) {
-        pthread_mutex_destroy(&worker->mutex[i]);
-    }
 }

 #endif
--- a/thread/ThreadJob.h
+++ b/thread/ThreadJob.h
@ -34,11 +34,7 @@ struct PoolWorker {

 struct Worker {
    volatile int32 state;
-
    pthread_t thread;
-    pthread_cond_t condition;
-    int32 mutex_size;
-    pthread_mutex_t* mutex;
 };

 #endif
--- a/thread/ThreadPool.h
+++ b/thread/ThreadPool.h
@ -149,7 +149,7 @@ PoolWorker* thread_pool_add_work(ThreadPool* pool, const PoolWorker* job)
    ring_move_pointer(&pool->work_queue, &pool->work_queue.head, sizeof(PoolWorker), 64);

    if (temp_job->id == 0) {
-        temp_job->id = atomic_add_fetch(&pool->id_counter, 1);
+        temp_job->id = atomic_fetch_add(&pool->id_counter, 1);
    }

    pthread_cond_broadcast(&pool->work_cond);
@ -174,7 +174,7 @@ PoolWorker* thread_pool_add_work_start(ThreadPool* pool)

    if (temp_job->id == 0) {
        // +1 because otherwise the very first job would be id = 0 which is not a valid id
-        temp_job->id = atomic_add_fetch(&pool->id_counter, 1) + 1;
+        temp_job->id = atomic_fetch_add(&pool->id_counter, 1) + 1;
    }

    return temp_job;
--- a/utils/StringUtils.h
+++ b/utils/StringUtils.h
@ -166,7 +166,7 @@ void wchar_to_char(const char* __restrict str, char* __restrict dest)
 }

 inline constexpr
-int32 str_to_int(const char *str)
+int32 str_to_int(const char* str)
 {
    int32 result = 0;

@ -198,9 +198,7 @@ int32 int_to_str(int64 number, char *str, const char thousands = ',') {
    }

    while (number > 0) {
-        if (thousands != '\0'
-            && (digit_count == 3 || digit_count == 6 || digit_count == 9 || digit_count == 12 || digit_count == 15)
-        ) {
+        if (digit_count && digit_count % 3 == 0) {
            str[i++] = thousands;
        }

@ -723,4 +721,131 @@ void str_pad(const char* input, char* output, char pad, size_t len) {
    }
 }

+void sprintf_fast(char *buffer, const char* format, ...) {
+    va_list args;
+    va_start(args, format);
+
+    const char* ptr = format;
+    char *buf_ptr = buffer;
+
+    while (*ptr) {
+        if (*ptr != '%') {
+            *buf_ptr++ = *ptr;
+        } else if (*ptr == '\\' && *(ptr + 1) == '%') {
+            ++ptr;
+            *buf_ptr++ = *ptr;
+        } else {
+            ++ptr;
+
+            switch (*ptr) {
+                case 's': {
+                    const char* str = va_arg(args, const char*);
+                    while (*str) {
+                        *buf_ptr++ = *str++;
+                    }
+                } break;
+                case 'd': {
+                    int32 val = va_arg(args, int32);
+                    if (val < 0) {
+                        *buf_ptr++ = '-';
+                        val = -val;
+                    }
+
+                    char temp[20];
+                    int32 index = 0;
+
+                    do {
+                        temp[index++] = (val % 10) + '0';
+                        val /= 10;
+                    } while (val > 0);
+
+                    while (index > 0) {
+                        *buf_ptr++ = temp[--index];
+                    }
+                } break;
+                case 'l': {
+                    int64 val = va_arg(args, int64);
+                    if (val < 0) {
+                        *buf_ptr++ = '-';
+                        val = -val;
+                    }
+
+                    char temp[20];
+                    int64 index = 0;
+
+                    do {
+                        temp[index++] = (val % 10) + '0';
+                        val /= 10;
+                    } while (val > 0);
+
+                    while (index > 0) {
+                        *buf_ptr++ = temp[--index];
+                    }
+                } break;
+                case 'f': {
+                    f64 val = va_arg(args, f64);
+
+                    int32 precision = 6; // Default precision
+
+                    // @question Consider to implement rounding
+                    // Check for optional precision specifier
+                    const char* prec_ptr = ptr + 1;
+                    if (*prec_ptr >= '0' && *prec_ptr <= '9') {
+                        precision = 0;
+                        while (*prec_ptr >= '0' && *prec_ptr <= '9') {
+                            precision = precision * 10 + (*prec_ptr - '0');
+                            prec_ptr++;
+                        }
+
+                        ptr = prec_ptr - 1;
+                    }
+
+                    if (val < 0) {
+                        *buf_ptr++ = '-';
+                        val = -val;
+                    }
+
+                    // Handle integer part
+                    int32 int_part = (int32) val;
+                    f64 frac_part = val - int_part;
+
+                    char temp[20];
+                    int32 index = 0;
+
+                    do {
+                        temp[index++] = (int_part % 10) + '0';
+                        int_part /= 10;
+                    } while (int_part > 0);
+
+                    while (index > 0) {
+                        *buf_ptr++ = temp[--index];
+                    }
+
+                    // Handle fractional part
+                    if (precision > 0) {
+                        *buf_ptr++ = '.';
+                        while (precision--) {
+                            frac_part *= 10;
+                            int32 digit = (int32) frac_part;
+                            *buf_ptr++ = (char) (digit + '0');
+                            frac_part -= digit;
+                        }
+                    }
+                } break;
+                default: {
+                    // Handle unknown format specifiers
+                    *buf_ptr++ = '%';
+                } break;
+            }
+        }
+
+        ++ptr;
+    }
+
+    *buf_ptr = '\0';
+    va_end(args);
+}
+
+
+
 #endif