From 17b803a0b693f219f32e1a5425383b6968b59e7b Mon Sep 17 00:00:00 2001
From: Dennis Eichhorn <spl1nes.com@googlemail.com>
Date: Sun, 9 Mar 2025 18:15:08 +0100
Subject: [PATCH] prepare directx ui, not working yet

---
 asset/AssetArchive.h                 |   8 +-
 asset/AssetManagementSystem.h        |   6 +-
 command/AppCmdBuffer.cpp             |  18 +-
 gpuapi/direct3d/AppCmdBuffer.h       |  10 +-
 gpuapi/direct3d/DirectXUtils.h       | 525 +++++++++++++++++++++-
 gpuapi/direct3d/ShaderUtils.h        | 247 ++++++++++-
 gpuapi/opengl/AppCmdBuffer.h         |   4 +-
 gpuapi/opengl/OpenglUtils.h          |  23 +-
 gpuapi/opengl/ShaderUtils.h          |  54 ++-
 gpuapi/vulkan/AppCmdBuffer.h         |   4 +-
 gpuapi/vulkan/ShaderUtils.h          |  24 +-
 gpuapi/vulkan/VulkanUtils.h          | 142 +++---
 log/Log.h                            | 211 +++------
 log/PerformanceProfiler.h            |   6 +-
 memory/BufferMemory.h                |   3 +-
 memory/ChunkMemory.h                 |   7 +-
 memory/RingMemory.h                  |   5 +-
 platform/win32/ExceptionHandler.h    |   8 +-
 platform/win32/threading/Semaphore.h |   6 +
 platform/win32/threading/Thread.h    |   1 +
 stdlib/HashMap.h                     |  12 +-
 stdlib/PerfectHashMap.h              |   4 +-
 tests/MainTest.cpp                   |  14 +-
 tests/TestFramework.h                | 182 +++++---
 tests/math/EvaluatorTest.cpp         |   6 +-
 tests/memory/ChunkMemoryTest.cpp     |  19 +-
 tests/memory/RingMemoryTest.cpp      |  14 +-
 tests/stdlib/HashMapTest.cpp         |   6 +-
 tests/ui/UILayoutTest.cpp            |   6 +-
 tests/ui/UIThemeTest.cpp             |   4 +-
 tests/utils/BitUtilsTest.cpp         |  50 +--
 tests/utils/EndianUtilsTest.cpp      |  24 +-
 tests/utils/MathUtilsTest.cpp        | 624 +++++++++++++++++++++++++++
 tests/utils/StringUtilsTest.cpp      |  70 +--
 tests/utils/UtilsTest.cpp            |  44 +-
 thread/ThreadPool.h                  |   2 +-
 ui/UILayout.h                        |   8 +-
 utils/MathUtils.h                    | 319 ++++++++++++++
 38 files changed, 2222 insertions(+), 498 deletions(-)
 create mode 100644 tests/utils/MathUtilsTest.cpp
 create mode 100644 utils/MathUtils.h

diff --git a/asset/AssetArchive.h b/asset/AssetArchive.h
index d77b295..4c792ae 100644
--- a/asset/AssetArchive.h
+++ b/asset/AssetArchive.h
@@ -133,7 +133,7 @@ void asset_archive_load(AssetArchive* archive, const char* path, BufferMemory* b
 {
     PROFILE(PROFILE_ASSET_ARCHIVE_LOAD, path, false, true);
 
-    LOG_FORMAT_1(
+    LOG_1(
         "Load AssetArchive %s",
         {{LOG_DATA_CHAR_STR, (void *) path}}
     );
@@ -174,7 +174,7 @@ void asset_archive_load(AssetArchive* archive, const char* path, BufferMemory* b
     file_read(archive->fd, &file, 0, file.size);
     asset_archive_header_load(&archive->header, file.content, steps);
 
-    LOG_FORMAT_1(
+    LOG_1(
         "Loaded AssetArchive %s with %d assets",
         {{LOG_DATA_CHAR_STR, (void *) path}, {LOG_DATA_UINT32, (void *) &archive->header.asset_count}}
     );
@@ -204,7 +204,7 @@ Asset* asset_archive_asset_load(const AssetArchive* archive, int32 id, AssetMana
     byte component_id = archive->asset_type_map[element->type];
     //AssetComponent* ac = &ams->asset_components[component_id];
 
-    LOG_FORMAT_2(
+    LOG_2(
         "Load asset %d from archive %d for AMS %d with %n B compressed and %n B uncompressed",
         {{LOG_DATA_UINT64, &id}, {LOG_DATA_UINT32, &element->type}, {LOG_DATA_BYTE, &component_id}, {LOG_DATA_UINT32, &element->length}, {LOG_DATA_UINT32, &element->uncompressed}}
     );
@@ -314,7 +314,7 @@ Asset* asset_archive_asset_load(const AssetArchive* archive, int32 id, AssetMana
     // the main program should still be able to do some work if possible
     thrd_ams_set_loaded(asset);
 
-    LOG_FORMAT_2(
+    LOG_2(
         "Loaded asset %d from archive %d for AMS %d with %n B compressed and %n B uncompressed",
         {{LOG_DATA_UINT64, &id}, {LOG_DATA_UINT32, &element->type}, {LOG_DATA_BYTE, &component_id}, {LOG_DATA_UINT32, &element->length}, {LOG_DATA_UINT32, &element->uncompressed}}
     );
diff --git a/asset/AssetManagementSystem.h b/asset/AssetManagementSystem.h
index df159d4..3fabeca 100644
--- a/asset/AssetManagementSystem.h
+++ b/asset/AssetManagementSystem.h
@@ -42,7 +42,7 @@ struct AssetManagementSystem {
 inline
 void ams_create(AssetManagementSystem* ams, BufferMemory* buf, int32 asset_component_count, int32 count)
 {
-    LOG_FORMAT_1("Create AMS for %n assets", {{LOG_DATA_INT32, &count}});
+    LOG_1("Create AMS for %n assets", {{LOG_DATA_INT32, &count}});
     hashmap_create(&ams->hash_map, count, sizeof(HashEntry) + sizeof(Asset), buf);
     ams->asset_component_count = asset_component_count;
     ams->asset_components = (AssetComponent *) buffer_get_memory(buf, asset_component_count * sizeof(AssetComponent), 64, true);
@@ -52,7 +52,7 @@ inline
 void ams_component_create(AssetComponent* ac, BufferMemory* buf, int32 chunk_size, int32 count)
 {
     ASSERT_SIMPLE(chunk_size);
-    LOG_FORMAT_1("Create AMS Component for %n assets and %n B", {{LOG_DATA_INT32, &count}, {LOG_DATA_UINT32, &chunk_size}});
+    LOG_1("Create AMS Component for %n assets and %n B", {{LOG_DATA_INT32, &count}, {LOG_DATA_UINT32, &chunk_size}});
 
     chunk_init(&ac->asset_memory, buf, count, chunk_size, 64);
     pthread_mutex_init(&ac->mutex, NULL);
@@ -62,7 +62,7 @@ inline
 void ams_component_create(AssetComponent* ac, byte* buf, int32 chunk_size, int32 count)
 {
     ASSERT_SIMPLE(chunk_size);
-    LOG_FORMAT_1("Create AMS Component for %n assets and %n B", {{LOG_DATA_INT32, &count}, {LOG_DATA_UINT32, &chunk_size}});
+    LOG_1("Create AMS Component for %n assets and %n B", {{LOG_DATA_INT32, &count}, {LOG_DATA_UINT32, &chunk_size}});
 
     ac->asset_memory.count = count;
     ac->asset_memory.chunk_size = chunk_size;
diff --git a/command/AppCmdBuffer.cpp b/command/AppCmdBuffer.cpp
index 43d2424..9e932f8 100644
--- a/command/AppCmdBuffer.cpp
+++ b/command/AppCmdBuffer.cpp
@@ -40,7 +40,7 @@ void cmd_buffer_create(AppCmdBuffer* cb, BufferMemory* buf, int32 commands_count
     chunk_init(&cb->commands, buf, commands_count, sizeof(Command), 64);
     pthread_mutex_init(&cb->mutex, NULL);
 
-    LOG_FORMAT_1("Created AppCmdBuffer: %n B", {{LOG_DATA_UINT64, &cb->commands.size}});
+    LOG_1("Created AppCmdBuffer: %n B", {{LOG_DATA_UINT64, &cb->commands.size}});
 }
 
 // This doesn't load the asset directly but tells (most likely) a worker thread to load an asset
@@ -356,7 +356,7 @@ inline void* cmd_func_run(AppCmdBuffer*, CommandFunction func) {
 }
 
 inline Asset* cmd_texture_load_sync(AppCmdBuffer* cb, int32 asset_id) {
-    LOG_FORMAT_1("Load texture %d", {{LOG_DATA_INT32, &asset_id}});
+    LOG_1("Load texture %d", {{LOG_DATA_INT32, &asset_id}});
 
     // Check if asset already loaded
     char id_str[9];
@@ -385,7 +385,7 @@ inline Asset* cmd_texture_load_sync(AppCmdBuffer* cb, int32 asset_id) {
 }
 
 inline Asset* cmd_texture_load_sync(AppCmdBuffer* cb, const char* name) {
-    LOG_FORMAT_1("Load texture %d", {{LOG_DATA_CHAR_STR, (void *) name}});
+    LOG_1("Load texture %d", {{LOG_DATA_CHAR_STR, (void *) name}});
     PROFILE(PROFILE_CMD_ASSET_LOAD_SYNC, name, false, true);
 
     // Check if asset already loaded
@@ -413,7 +413,7 @@ inline Asset* cmd_texture_load_sync(AppCmdBuffer* cb, const char* name) {
 
 inline Asset* cmd_font_load_sync(AppCmdBuffer* cb, int32 asset_id)
 {
-    LOG_FORMAT_1("Load font %d", {{LOG_DATA_INT32, &asset_id}});
+    LOG_1("Load font %d", {{LOG_DATA_INT32, &asset_id}});
 
     // Check if asset already loaded
     char id_str[9];
@@ -442,7 +442,7 @@ inline Asset* cmd_font_load_sync(AppCmdBuffer* cb, int32 asset_id)
 
 inline Asset* cmd_font_load_sync(AppCmdBuffer* cb, const char* name)
 {
-    LOG_FORMAT_1("Load font %s", {{LOG_DATA_CHAR_STR, (void *) name}});
+    LOG_1("Load font %s", {{LOG_DATA_CHAR_STR, (void *) name}});
     PROFILE(PROFILE_CMD_FONT_LOAD_SYNC, name, false, true);
 
     // Check if asset already loaded
@@ -472,13 +472,13 @@ UILayout* cmd_layout_load_sync(
     UILayout* __restrict layout, const char* __restrict layout_path
 ) {
     PROFILE(PROFILE_CMD_LAYOUT_LOAD_SYNC, layout_path, false, true);
-    LOG_FORMAT_1("Load layout %s", {{LOG_DATA_CHAR_STR, (void *) layout_path}});
+    LOG_1("Load layout %s", {{LOG_DATA_CHAR_STR, (void *) layout_path}});
 
     FileBody layout_file = {};
     file_read(layout_path, &layout_file, cb->mem_vol);
 
     if (!layout_file.content) {
-        LOG_FORMAT_1("Failed loading layout \"%s\"", {{LOG_DATA_CHAR_STR, (void *) layout_path}});
+        LOG_1("Failed loading layout \"%s\"", {{LOG_DATA_CHAR_STR, (void *) layout_path}});
         return NULL;
     }
 
@@ -493,7 +493,7 @@ UIThemeStyle* cmd_theme_load_sync(
     UIThemeStyle* __restrict theme, const char* __restrict theme_path
 ) {
     PROFILE(PROFILE_CMD_THEME_LOAD_SYNC, theme_path, false, true);
-    LOG_FORMAT_1("Load theme %s", {{LOG_DATA_CHAR_STR, (void *) theme_path}});
+    LOG_1("Load theme %s", {{LOG_DATA_CHAR_STR, (void *) theme_path}});
 
     FileBody theme_file = {};
     file_read(theme_path, &theme_file, cb->mem_vol);
@@ -519,7 +519,7 @@ UILayout* cmd_ui_load_sync(
     const Camera* __restrict camera
 ) {
     PROFILE(PROFILE_CMD_UI_LOAD_SYNC, layout_path, false, true);
-    LOG_FORMAT_1("Load ui with layout %s and theme %s", {{LOG_DATA_CHAR_STR, (void *) layout_path}, {LOG_DATA_CHAR_STR, (void *) theme_path}});
+    LOG_1("Load ui with layout %s and theme %s", {{LOG_DATA_CHAR_STR, (void *) layout_path}, {LOG_DATA_CHAR_STR, (void *) theme_path}});
 
     if (!cmd_layout_load_sync(cb, layout, layout_path)) {
         // We have to make sure that at least the font is set
diff --git a/gpuapi/direct3d/AppCmdBuffer.h b/gpuapi/direct3d/AppCmdBuffer.h
index 7adb5f2..ada822a 100644
--- a/gpuapi/direct3d/AppCmdBuffer.h
+++ b/gpuapi/direct3d/AppCmdBuffer.h
@@ -27,7 +27,8 @@ void* cmd_shader_load(AppCmdBuffer*, Command*) {
 
 void* cmd_shader_load_sync(
     AppCmdBuffer* __restrict cb, Shader* __restrict shader, const int32* __restrict shader_ids,
-    ID3D12Device* __restrict device, ID3D12PipelineState** __restrict pipeline, ID3D12RootSignature* __restrict pipeline_layout
+    ID3D12Device* __restrict device, ID3D12PipelineState** __restrict pipeline, ID3D12RootSignature* __restrict pipeline_layout,
+    D3D12_INPUT_ELEMENT_DESC* __restrict descriptor_set_layouts, int32 layout_count
 ) {
     PROFILE(PROFILE_CMD_SHADER_LOAD_SYNC, NULL, false, true);
     char asset_id[9];
@@ -53,7 +54,7 @@ void* cmd_shader_load_sync(
         }
 
         // Make sub shader
-        shader_assets[i] = shader_make(
+        shader_assets[i] = gpuapi_shader_make(
             shader_type_index((ShaderType) (i + 1)),
             (char *) shader_asset->self,
             shader_asset->ram_size
@@ -64,11 +65,14 @@ void* cmd_shader_load_sync(
     }
 
     // Make shader/program
-    shader->id = pipeline_make(
+    shader->id = gpuapi_pipeline_make(
         device, pipeline, pipeline_layout,
+        descriptor_set_layouts, layout_count,
         shader_assets[0], shader_assets[1], shader_assets[2]
     );
 
+    // @question do I release shader_assets[..]?
+
     return NULL;
 }
 
diff --git a/gpuapi/direct3d/DirectXUtils.h b/gpuapi/direct3d/DirectXUtils.h
index be7c65e..6717aa4 100644
--- a/gpuapi/direct3d/DirectXUtils.h
+++ b/gpuapi/direct3d/DirectXUtils.h
@@ -15,11 +15,15 @@
 #include <dxgi1_6.h>
 #include <d3dcommon.h>
 #include "../../../GameEngine/log/Log.h"
+#include "../../../GameEngine/memory/RingMemory.h"
+#include "../../../GameEngine/object/Texture.h"
+#include "../../../GameEngine/image/Image.cpp"
+#include "../../compiler/CompilerUtils.h"
 // #include "../../../EngineDependencies/directx/d3d12.h"
 // #include "../../../EngineDependencies/directx/d3dx12.h"
 #include "FramesInFlightContainer.h"
 
-// A more (compile-time) efficient version of the windows macro IID_PPV_ARGS
+// Replacement for the windows macro IID_PPVOID
 #define IID_PPVOID(pointer) __uuidof(**(pointer)), (void **) (pointer)
 
 bool is_directx_supported(D3D_FEATURE_LEVEL version)
@@ -89,6 +93,22 @@ int32 max_directx_version()
     return 0;
 }
 
+inline
+void change_viewport(
+    int32 width, int32 height,
+    ID3D12GraphicsCommandList* command_buffer, D3D12_VIEWPORT* viewport, D3D12_RECT* scissor_rect
+)
+{
+    viewport->Width = (f32) width;
+    viewport->Height = (f32) height;
+
+    scissor_rect->right = width;
+    scissor_rect->bottom = height;
+
+    command_buffer->RSSetViewports(1, viewport);
+    command_buffer->RSSetScissorRects(1, scissor_rect);
+}
+
 // Returns frame index
 int32 wait_for_previous_frame(
     FramesInFlightContainer* frames_in_flight,
@@ -100,11 +120,13 @@ int32 wait_for_previous_frame(
     // sample illustrates how to use fences for efficient resource usage and to
     // maximize GPU utilization.
 
-    UINT64 fence_value_temp = frames_in_flight->fence_value;
+    uint64 fence_value_temp = frames_in_flight->fence_value;
+
+    HRESULT hr;
 
     // Signal and increment the fence value.
-    if(FAILED(graphics_queue->Signal(frames_in_flight->fence, fence_value_temp))) {
-        LOG_1("DirectX12 Signal");
+    if(FAILED(hr = graphics_queue->Signal(frames_in_flight->fence, fence_value_temp))) {
+        LOG_1("DirectX12 Signal: %d", {{LOG_DATA_INT32, &hr}});
         ASSERT_SIMPLE(false);
     }
 
@@ -112,8 +134,8 @@ int32 wait_for_previous_frame(
 
     // Wait until the previous frame is finished.
     if (frames_in_flight->fence->GetCompletedValue() < fence_value_temp) {
-        if (FAILED(frames_in_flight->fence->SetEventOnCompletion(fence_value_temp, frames_in_flight->fence_event))) {
-            LOG_1("DirectX12 SetEventOnCompletion");
+        if (FAILED(hr = frames_in_flight->fence->SetEventOnCompletion(fence_value_temp, frames_in_flight->fence_event))) {
+            LOG_1("DirectX12 SetEventOnCompletion: %d", {{LOG_DATA_INT32, &hr}});
             ASSERT_SIMPLE(false);
         }
 
@@ -170,11 +192,496 @@ void gpuapi_debug_messenger_setup(ID3D12Device* device)
 }
 
 inline
-void gpuapi_create_logical_device(ID3D12Device** device) {
-    if (FAILED(D3D12CreateDevice(NULL, D3D_FEATURE_LEVEL_11_0, IID_PPVOID(device)))) {
-        LOG_1("DirectX12 D3D12CreateDevice");
+void gpuapi_pick_physical_device(IDXGIFactory6* instance, IDXGIAdapter1** physical_device, bool requestHighPerformanceAdapter = true)
+{
+    IDXGIAdapter1* adapter = NULL;
+    IDXGIFactory6* factory6 = NULL;
+
+    if (SUCCEEDED(instance->QueryInterface(IID_PPVOID(&factory6)))) {
+        for (uint32 adapterIndex = 0;
+            SUCCEEDED(factory6->EnumAdapterByGpuPreference(
+                adapterIndex,
+                requestHighPerformanceAdapter == true ? DXGI_GPU_PREFERENCE_HIGH_PERFORMANCE : DXGI_GPU_PREFERENCE_UNSPECIFIED,
+                IID_PPVOID(&adapter))
+            );
+            ++adapterIndex
+        ) {
+            DXGI_ADAPTER_DESC1 desc;
+            adapter->GetDesc1(&desc);
+
+            if (desc.Flags & DXGI_ADAPTER_FLAG_SOFTWARE) {
+                // Don't select the Basic Render Driver adapter.
+                // If you want a software adapter, pass in "/warp" on the command line.
+                continue;
+            }
+
+            // Check to see whether the adapter supports Direct3D 12, but don't create the actual device yet.
+            if (SUCCEEDED(D3D12CreateDevice(adapter, D3D_FEATURE_LEVEL_11_0, _uuidof(ID3D12Device), NULL))) {
+                break;
+            }
+        }
+    }
+
+    if(!adapter) {
+        for (uint32 adapterIndex = 0; SUCCEEDED(instance->EnumAdapters1(adapterIndex, &adapter)); ++adapterIndex) {
+            DXGI_ADAPTER_DESC1 desc;
+            adapter->GetDesc1(&desc);
+
+            if (desc.Flags & DXGI_ADAPTER_FLAG_SOFTWARE) {
+                // Don't select the Basic Render Driver adapter.
+                continue;
+            }
+
+            // Check to see whether the adapter supports Direct3D 12, but don't create the actual device yet.
+            if (SUCCEEDED(D3D12CreateDevice(adapter, D3D_FEATURE_LEVEL_11_0, _uuidof(ID3D12Device), NULL))) {
+                break;
+            }
+        }
+    }
+
+    *physical_device = adapter;
+    if (factory6) {
+        factory6->Release();
+    }
+}
+
+inline
+void gpuapi_create_logical_device(IDXGIAdapter1* physical_device, ID3D12Device** device)
+{
+    HRESULT hr;
+    if (FAILED(hr = D3D12CreateDevice(physical_device, D3D_FEATURE_LEVEL_11_0, IID_PPVOID(device)))) {
+        LOG_1("DirectX12 D3D12CreateDevice: %d", {{LOG_DATA_INT32, &hr}});
         ASSERT_SIMPLE(false);
     }
 }
 
+inline
+void gpuapi_command_buffer_create(
+    ID3D12Device* device,
+    ID3D12CommandAllocator* command_pool,
+    ID3D12PipelineState* pipeline,
+    ID3D12GraphicsCommandList** command_buffer
+)
+{
+    HRESULT hr;
+    if (FAILED(hr = device->CreateCommandList(
+        0, D3D12_COMMAND_LIST_TYPE_DIRECT,
+        command_pool, pipeline,
+        IID_PPVOID(command_buffer)))
+    ) {
+        LOG_1("DirectX12 CreateCommandList: %d", {{LOG_DATA_INT32, &hr}});
+        ASSERT_SIMPLE(false);
+    };
+}
+
+static
+DXGI_FORMAT gpuapi_texture_format(byte settings)
+{
+    if ((settings & IMAGE_SETTING_CHANNEL_4_SIZE)) {
+        switch (settings & IMAGE_SETTING_CHANNEL_COUNT) {
+            case 1:
+                return DXGI_FORMAT_R32_FLOAT;
+            case 2:
+                return DXGI_FORMAT_R32G32_FLOAT;
+            case 3:
+                return DXGI_FORMAT_R32G32B32_FLOAT;
+            case 4:
+                return DXGI_FORMAT_R32G32B32A32_FLOAT;
+            default:
+                UNREACHABLE();
+        }
+    } else {
+        switch (settings & IMAGE_SETTING_CHANNEL_COUNT) {
+            case 1:
+                return DXGI_FORMAT_R8_UNORM;
+            case 2:
+                return DXGI_FORMAT_R8G8_UNORM;
+            case 3:
+                // RGB is not supported (probably due to the alignment
+                return DXGI_FORMAT_R8G8B8A8_UNORM;
+            case 4:
+                return DXGI_FORMAT_R8G8B8A8_UNORM;
+            default:
+                UNREACHABLE();
+        }
+    }
+}
+
+// @performance Sometimes we want to upload multiple textures in one go (more performant). Allow that or don't use this function in that case.
+D3D12_CPU_DESCRIPTOR_HANDLE load_texture_to_gpu(
+    ID3D12Device* device,
+    ID3D12GraphicsCommandList* command_buffer,
+    ID3D12Resource** texture_resource,
+    int32 descriptorOffset,
+    ID3D12DescriptorHeap* srv_heap,
+    const Texture* texture,
+    RingMemory* ring
+) {
+    DXGI_FORMAT textureFormat = gpuapi_texture_format(texture->image.image_settings);
+
+    D3D12_RESOURCE_DESC textureDesc = {};
+    textureDesc.MipLevels = 1;
+    textureDesc.Format = textureFormat;
+    textureDesc.Width = texture->image.width;
+    textureDesc.Height = texture->image.height;
+    textureDesc.Flags = D3D12_RESOURCE_FLAG_NONE;
+    textureDesc.DepthOrArraySize = 1;
+    textureDesc.SampleDesc.Count = 1;
+    textureDesc.SampleDesc.Quality = 0;
+    textureDesc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D;
+
+    D3D12_HEAP_PROPERTIES texture_heap_property = {
+        .Type = D3D12_HEAP_TYPE_DEFAULT,
+        .CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN,
+        .MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN,
+        .CreationNodeMask = 1,
+        .VisibleNodeMask = 1
+    };
+
+    HRESULT hr;
+    if (FAILED(hr = device->CreateCommittedResource(
+        &texture_heap_property,
+        D3D12_HEAP_FLAG_NONE,
+        &textureDesc,
+        D3D12_RESOURCE_STATE_COPY_DEST,
+        NULL,
+        IID_PPVOID(texture_resource)))
+    ) {
+        LOG_1("DirectX12 CreateCommittedResource: %d", {{LOG_DATA_INT32, &hr}});
+        ASSERT_SIMPLE(false);
+
+        return {0};
+    }
+
+    const D3D12_RESOURCE_DESC DestinationDesc = (*texture_resource)->GetDesc();
+    uint64 uploadBufferSize = 0;
+    ID3D12Device* pDevice = NULL;
+    (*texture_resource)->GetDevice(IID_PPVOID(&pDevice));
+    pDevice->GetCopyableFootprints(&DestinationDesc, 0, 1, 0, NULL, NULL, NULL, &uploadBufferSize);
+
+    D3D12_RESOURCE_DESC texture_upload_buffer = {
+        .Dimension = D3D12_RESOURCE_DIMENSION_BUFFER,
+        .Alignment = 0,
+        .Width = uploadBufferSize,
+        .Height = 1,
+        .DepthOrArraySize = 1,
+        .MipLevels = 1,
+        .Format = DXGI_FORMAT_UNKNOWN,
+        .SampleDesc = {
+            .Count = 1,
+            .Quality = 0,
+        },
+        .Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR,
+        .Flags = D3D12_RESOURCE_FLAG_NONE
+    };
+
+    D3D12_HEAP_PROPERTIES texture_upload_heap_property = {
+        .Type = D3D12_HEAP_TYPE_UPLOAD,
+        .CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN,
+        .MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN,
+        .CreationNodeMask = 1,
+        .VisibleNodeMask = 1
+    };
+
+    ID3D12Resource* texture_upload_heap;
+    if (FAILED(hr = device->CreateCommittedResource(
+        &texture_heap_property,
+        D3D12_HEAP_FLAG_NONE,
+        &texture_upload_buffer,
+        D3D12_RESOURCE_STATE_GENERIC_READ,
+        NULL,
+        IID_PPVOID(&texture_upload_heap)))
+    ) {
+        if (pDevice) {
+            pDevice->Release();
+        }
+
+        LOG_1("DirectX12 CreateCommittedResource: %d", {{LOG_DATA_INT32, &hr}});
+        ASSERT_SIMPLE(false);
+
+        return {0};
+    }
+
+    int32 pixel_size = image_pixel_size_from_type(texture->image.image_settings);
+    D3D12_SUBRESOURCE_DATA textureData[] = {
+        {
+            .pData = texture->image.pixels,
+            .RowPitch = texture->image.width * pixel_size,
+            .SlicePitch = (texture->image.width * pixel_size) * texture->image.height,
+        }
+    };
+
+    uint32 number_of_resources = ARRAY_COUNT(textureData);
+    uint32 FirstSubresource = 0;
+    uint64 IntermediateOffset = 0;
+    uint64 RequiredSize = 0;
+    uint64 MemToAlloc = (uint64) (sizeof(D3D12_PLACED_SUBRESOURCE_FOOTPRINT) + sizeof(uint32) + sizeof(uint64)) * number_of_resources;
+
+    D3D12_PLACED_SUBRESOURCE_FOOTPRINT* pLayouts = (D3D12_PLACED_SUBRESOURCE_FOOTPRINT *) ring_get_memory(ring, MemToAlloc, 64);
+    uint64* pRowSizesInBytes = (uint64 *) (pLayouts + number_of_resources);
+    uint32* pNumRows = (uint32 *) (pRowSizesInBytes + number_of_resources);
+
+    pDevice->GetCopyableFootprints(&DestinationDesc, FirstSubresource, number_of_resources, IntermediateOffset, pLayouts, pNumRows, pRowSizesInBytes, &RequiredSize);
+    pDevice->Release();
+
+    const D3D12_RESOURCE_DESC IntermediateDesc = texture_upload_heap->GetDesc();
+    if (IntermediateDesc.Dimension != D3D12_RESOURCE_DIMENSION_BUFFER
+        || IntermediateDesc.Width < RequiredSize + pLayouts[0].Offset
+        || RequiredSize > ((size_t) -1)
+        || (DestinationDesc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER &&
+                (FirstSubresource != 0 || number_of_resources != 1)
+        )
+    ) {
+        if (texture_upload_heap) {
+            texture_upload_heap->Release();
+        }
+
+        LOG_1("DirectX12 texture resource setup");
+        ASSERT_SIMPLE(false);
+
+        return {0};
+    }
+
+    byte* pData;
+    if (FAILED(hr = texture_upload_heap->Map(0, NULL, (void **) &pData))) {
+        if (texture_upload_heap) {
+            texture_upload_heap->Release();
+        }
+
+        LOG_1("DirectX12 Map: %d", {{LOG_DATA_INT32, &hr}});
+        ASSERT_SIMPLE(false);
+
+        return {0};
+    }
+
+    for (uint32 i = 0; i < number_of_resources; ++i) {
+        ASSERT_SIMPLE(pRowSizesInBytes[i] <= ((size_t) -1));
+
+        D3D12_MEMCPY_DEST DestData = { pData + pLayouts[i].Offset, pLayouts[i].Footprint.RowPitch, ((size_t) pLayouts[i].Footprint.RowPitch) * ((size_t) pNumRows[i]) };
+        for (uint32 z = 0; z < pLayouts[i].Footprint.Depth; ++z) {
+            byte* pDestSlice = ((byte *) DestData.pData) + DestData.SlicePitch * z;
+            byte* pSrcSlice = ((byte *) textureData[i].pData) + textureData[i].SlicePitch * ((intptr_t) z);
+            for (uint32 y = 0; y < pNumRows[i]; ++y) {
+                memcpy(
+                    pDestSlice + DestData.RowPitch * y,
+                    pSrcSlice + textureData[i].RowPitch * ((intptr_t) y),
+                    (size_t) pRowSizesInBytes[i]
+                );
+            }
+        }
+    }
+    texture_upload_heap->Unmap(0, NULL);
+
+    if (DestinationDesc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER) {
+        command_buffer->CopyBufferRegion(
+            *texture_resource, 0, texture_upload_heap, pLayouts[0].Offset, pLayouts[0].Footprint.Width
+        );
+    } else {
+        for (uint32 i = 0; i < number_of_resources; ++i) {
+            D3D12_TEXTURE_COPY_LOCATION Dst = {
+                .pResource = *texture_resource,
+                .Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX,
+                .SubresourceIndex = i + FirstSubresource,
+            };
+
+            D3D12_TEXTURE_COPY_LOCATION Src = {
+                .pResource = texture_upload_heap,
+                .Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT,
+                .PlacedFootprint = pLayouts[i],
+            };
+
+            command_buffer->CopyTextureRegion(&Dst, 0, 0, 0, &Src, NULL);
+        }
+    }
+
+    D3D12_RESOURCE_BARRIER barrier = {
+        .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION,
+        .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE,
+        .Transition = {
+            .pResource = *texture_resource,
+            .Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES,
+            .StateBefore = D3D12_RESOURCE_STATE_COPY_DEST,
+            .StateAfter = D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE,
+        }
+    };
+    command_buffer->ResourceBarrier(1, &barrier);
+
+    D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {};
+    srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
+    srvDesc.Format = textureDesc.Format;
+    srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D;
+    srvDesc.Texture2D.MipLevels = 1;
+
+    D3D12_CPU_DESCRIPTOR_HANDLE srv_handle = srv_heap->GetCPUDescriptorHandleForHeapStart();
+    device->CreateShaderResourceView(*texture_resource, &srvDesc, srv_handle);
+
+    if (texture_upload_heap) {
+        texture_upload_heap->Release();
+    }
+
+    srv_handle.ptr += descriptorOffset * device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
+
+    return srv_handle;
+}
+
+void gpuapi_vertex_buffer_create(
+    ID3D12Device* device,
+    D3D12_VERTEX_BUFFER_VIEW* vertex_buffer_view,
+    ID3D12Resource** vertex_buffer,
+    const void* __restrict vertices, uint32 vertex_size, uint32 vertex_count
+)
+{
+    D3D12_RESOURCE_DESC resource_info = {
+        .Dimension = D3D12_RESOURCE_DIMENSION_BUFFER,
+        .Alignment = 0,
+        .Width = vertex_size * vertex_count,
+        .Height = 1,
+        .DepthOrArraySize = 1,
+        .MipLevels = 1,
+        .Format = DXGI_FORMAT_UNKNOWN,
+        .SampleDesc = {
+            .Count = 1,
+            .Quality = 0
+        },
+        .Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR,
+        .Flags = D3D12_RESOURCE_FLAG_NONE
+    };
+
+    // Note: using upload heaps to transfer static data like vert buffers is not
+    // recommended. Every time the GPU needs it, the upload heap will be marshalled
+    // over. Please read up on Default Heap usage. An upload heap is used here for
+    // code simplicity and because there are very few verts to actually transfer.
+    D3D12_HEAP_PROPERTIES heap_property = {
+        .Type = D3D12_HEAP_TYPE_UPLOAD,
+        .CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN,
+        .MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN,
+        .CreationNodeMask = 1,
+        .VisibleNodeMask = 1
+    };
+
+    HRESULT hr;
+    if (FAILED(hr = device->CreateCommittedResource(
+        &heap_property,
+        D3D12_HEAP_FLAG_NONE,
+        &resource_info,
+        D3D12_RESOURCE_STATE_GENERIC_READ,
+        NULL,
+        IID_PPVOID(vertex_buffer)))
+    ) {
+        LOG_1("DirectX12 CreateCommittedResource: %d", {{LOG_DATA_INT32, &hr}});
+        ASSERT_SIMPLE(false);
+
+        return;
+    }
+
+    // Copy the triangle data to the vertex buffer
+    uint8* pVertexDataBegin;
+    // We do not intend to read from this resource on the CPU
+    D3D12_RANGE readRange = {};
+    if (FAILED(hr = (*vertex_buffer)->Map(0, &readRange, (void **) &pVertexDataBegin))) {
+        LOG_1("DirectX12 Map: %d", {{LOG_DATA_INT32, &hr}});
+        ASSERT_SIMPLE(false);
+    }
+
+    memcpy(pVertexDataBegin, vertices, vertex_size * vertex_count);
+    (*vertex_buffer)->Unmap(0, NULL);
+
+    // Initialize the vertex buffer view
+    vertex_buffer_view->BufferLocation = (*vertex_buffer)->GetGPUVirtualAddress();
+    vertex_buffer_view->StrideInBytes = vertex_size;
+    vertex_buffer_view->SizeInBytes = vertex_size * vertex_count;
+}
+
+void gpuapi_vertex_buffer_update(
+    ID3D12Resource* vertex_buffer,
+    const void* __restrict vertices,
+    uint32 vertex_size,
+    uint32 vertex_count,
+    uint32 offset = 0
+)
+{
+    uint64 size = vertex_count * vertex_size;
+
+    uint8* pVertexDataBegin;
+    D3D12_RANGE readRange = {};
+    D3D12_RANGE writeRange = { offset, offset + size };
+
+    HRESULT hr;
+    if (FAILED(hr = vertex_buffer->Map(0, &readRange, (void**)&pVertexDataBegin))) {
+        LOG_1("DirectX12 Map: %d", {{LOG_DATA_INT32, &hr}});
+        ASSERT_SIMPLE(false);
+        return;
+    }
+
+    memcpy(pVertexDataBegin + offset, vertices, size);
+
+    vertex_buffer->Unmap(0, &writeRange);
+}
+
+// In directx this is actually called a constant buffer
+void gpuapi_uniform_buffers_create(
+    ID3D12Device* device,
+    ID3D12Resource** uniform_buffer,
+    const void* __restrict data, uint32 buffer_size
+)
+{
+    D3D12_RESOURCE_DESC resource_info = {
+        .Dimension = D3D12_RESOURCE_DIMENSION_BUFFER,
+        .Alignment = 0,
+        .Width = buffer_size,
+        .Height = 1,
+        .DepthOrArraySize = 1,
+        .MipLevels = 1,
+        .Format = DXGI_FORMAT_UNKNOWN,
+        .SampleDesc = {
+            .Count = 1,
+            .Quality = 0
+        },
+        .Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR,
+        .Flags = D3D12_RESOURCE_FLAG_NONE
+    };
+
+    // Note: using upload heaps to transfer static data like vert buffers is not
+    // recommended. Every time the GPU needs it, the upload heap will be marshalled
+    // over. Please read up on Default Heap usage. An upload heap is used here for
+    // code simplicity and because there are very few verts to actually transfer.
+    D3D12_HEAP_PROPERTIES heap_property = {
+        .Type = D3D12_HEAP_TYPE_UPLOAD,
+        .CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN,
+        .MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN,
+        .CreationNodeMask = 1,
+        .VisibleNodeMask = 1
+    };
+
+    device->CreateCommittedResource(
+        &heap_property,
+        D3D12_HEAP_FLAG_NONE,
+        &resource_info,
+        D3D12_RESOURCE_STATE_GENERIC_READ,
+        NULL,
+        IID_PPV_ARGS(uniform_buffer));
+
+    D3D12_RANGE readRange = {};
+
+    uint8* pCBDataBegin;
+    (*uniform_buffer)->Map(0, &readRange, (void **) &pCBDataBegin);
+    memcpy(pCBDataBegin, &data, buffer_size);
+    (*uniform_buffer)->Unmap(0, NULL);
+}
+
+void gpuapi_uniform_buffer_update(
+    ID3D12Resource* uniform_buffer,
+    const void* __restrict data,
+    uint32 buffer_size
+)
+{
+    D3D12_RANGE readRange = {};
+    uint8* pCBDataBegin = nullptr;
+    uniform_buffer->Map(0, &readRange, (void **) &pCBDataBegin);
+
+    memcpy(pCBDataBegin, data, buffer_size);
+
+    uniform_buffer->Unmap(0, nullptr);
+}
+
 #endif
\ No newline at end of file
diff --git a/gpuapi/direct3d/ShaderUtils.h b/gpuapi/direct3d/ShaderUtils.h
index 83755fc..5a83471 100644
--- a/gpuapi/direct3d/ShaderUtils.h
+++ b/gpuapi/direct3d/ShaderUtils.h
@@ -17,7 +17,13 @@
 #include "../../stdlib/Types.h"
 #include "../../memory/RingMemory.h"
 #include "../../log/Log.h"
+#include "../../log/Stats.h"
+#include "../../log/PerformanceProfiler.h"
+#include "../../object/Vertex.h"
+#include "../../utils/StringUtils.h"
+#include "../../log/Log.h"
 #include "../ShaderType.h"
+#include "../GpuAttributeType.h"
 
 #pragma comment(lib, "d3dcompiler.lib")
 
@@ -33,7 +39,7 @@ const char* shader_type_index(ShaderType type)
     }
 }
 
-ID3DBlob* shader_make(const char* type, const char* source, int32 source_size)
+ID3DBlob* gpuapi_shader_make(const char* type, const char* source, int32 source_size)
 {
     LOG_1("Create shader");
     #if DEBUG || INTERNAL
@@ -44,8 +50,10 @@ ID3DBlob* shader_make(const char* type, const char* source, int32 source_size)
 
     ID3DBlob* blob;
     ID3DBlob* errMsgs;
-    if (FAILED(D3DCompile2(source, source_size, NULL, NULL, NULL, "main", type, compileFlags, 0, 0, NULL, 0, &blob, &errMsgs))) {
-        LOG_1("DirectX12 D3DCompile2");
+    HRESULT hr;
+
+    if (FAILED(hr = D3DCompile2(source, source_size, NULL, NULL, NULL, "main", type, compileFlags, 0, 0, NULL, 0, &blob, &errMsgs))) {
+        LOG_1("DirectX12 D3DCompile2: %d, %s", {{LOG_DATA_INT32, &hr}, {LOG_DATA_CHAR_STR, errMsgs->GetBufferPointer()}});
         ASSERT_SIMPLE(false);
     }
 
@@ -58,24 +66,21 @@ ID3DBlob* shader_make(const char* type, const char* source, int32 source_size)
     return blob;
 }
 
-ID3D12PipelineState* pipeline_make(
+
+ID3D12PipelineState* gpuapi_pipeline_make(
     ID3D12Device* device,
     ID3D12PipelineState** pipeline,
     ID3D12RootSignature* pipeline_layout,
+    D3D12_INPUT_ELEMENT_DESC* descriptor_set_layouts, uint32 layout_count,
     ID3DBlob* vertex_shader,
     ID3DBlob* fragment_shader,
     ID3DBlob*
 ) {
     PROFILE(PROFILE_PIPELINE_MAKE, NULL, false, true);
     LOG_1("Create pipeline");
-    // @todo We need to find a way to do this somewhere else:
-    D3D12_INPUT_ELEMENT_DESC input_element_info[] = {
-        { "POSITION", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, 0, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0 },
-        { "COLOR", 0, DXGI_FORMAT_R32G32B32A32_FLOAT, 0, 12, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0 }
-    };
 
     D3D12_GRAPHICS_PIPELINE_STATE_DESC pipeline_state_info = {};
-    pipeline_state_info.InputLayout = { input_element_info, _countof(input_element_info) };
+    pipeline_state_info.InputLayout = { descriptor_set_layouts, layout_count };
     pipeline_state_info.pRootSignature = pipeline_layout;
     pipeline_state_info.VS = {
         .pShaderBytecode = vertex_shader->GetBufferPointer(),
@@ -122,8 +127,9 @@ ID3D12PipelineState* pipeline_make(
     pipeline_state_info.RTVFormats[0] = DXGI_FORMAT_R8G8B8A8_UNORM;
     pipeline_state_info.SampleDesc.Count = 1;
 
-    if (FAILED(device->CreateGraphicsPipelineState(&pipeline_state_info, IID_PPV_ARGS(pipeline)))) {
-        LOG_1("DirectX12 CreateGraphicsPipelineState");
+    HRESULT hr;
+    if (FAILED(hr = device->CreateGraphicsPipelineState(&pipeline_state_info, IID_PPV_ARGS(pipeline)))) {
+        LOG_1("DirectX12 CreateGraphicsPipelineState: %d", {{LOG_DATA_INT32, &hr}});
         ASSERT_SIMPLE(false);
     }
 
@@ -133,10 +139,223 @@ ID3D12PipelineState* pipeline_make(
     return *pipeline;
 }
 
-inline
-void pipeline_use(ID3D12GraphicsCommandList* command_buffer, ID3D12PipelineState* pipelineState)
+FORCE_INLINE
+void gpuapi_pipeline_use(ID3D12GraphicsCommandList* command_buffer, ID3D12PipelineState* pipelineState)
 {
     command_buffer->SetPipelineState(pipelineState);
 }
 
+// In DirectX Attribute info and descriptor set layout are combined into one
+constexpr
+void gpuapi_attribute_info_create(GpuAttributeType type, D3D12_INPUT_ELEMENT_DESC* attr)
+{
+    switch (type) {
+        case GPU_ATTRIBUTE_TYPE_VERTEX_3D: {
+            attr[0] = {
+                .SemanticIndex = 0,
+                .Format = DXGI_FORMAT_R32G32B32_FLOAT,
+                .AlignedByteOffset = offsetof(Vertex3D, position),
+                .InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA,
+                .InstanceDataStepRate = 0,
+            };
+
+            attr[1] = {
+                .SemanticIndex = 1,
+                .Format = DXGI_FORMAT_R32G32B32_FLOAT,
+                .AlignedByteOffset = offsetof(Vertex3D, normal),
+                .InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA,
+                .InstanceDataStepRate = 0,
+            };
+
+            attr[2] = {
+                .SemanticIndex = 2,
+                .Format = DXGI_FORMAT_R32G32_FLOAT,
+                .AlignedByteOffset = offsetof(Vertex3D, tex_coord),
+                .InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA,
+                .InstanceDataStepRate = 0,
+            };
+
+            attr[3] = {
+                .SemanticIndex = 3,
+                .Format = DXGI_FORMAT_R32G32B32A32_FLOAT,
+                .AlignedByteOffset = offsetof(Vertex3D, color),
+                .InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA,
+                .InstanceDataStepRate = 0,
+            };
+        } return;
+        case GPU_ATTRIBUTE_TYPE_VERTEX_3D_NORMAL: {
+            attr[0] = {
+                .SemanticIndex = 0,
+                .Format = DXGI_FORMAT_R32G32B32_FLOAT,
+                .AlignedByteOffset = offsetof(Vertex3DNormal, position),
+                .InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA,
+                .InstanceDataStepRate = 0,
+            };
+
+            attr[1] = {
+                .SemanticIndex = 1,
+                .Format = DXGI_FORMAT_R32G32B32_FLOAT,
+                .AlignedByteOffset = offsetof(Vertex3DNormal, normal),
+                .InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA,
+                .InstanceDataStepRate = 0,
+            };
+        } return;
+        case GPU_ATTRIBUTE_TYPE_VERTEX_3D_COLOR: {
+            attr[0] = {
+                .SemanticIndex = 0,
+                .Format = DXGI_FORMAT_R32G32B32_FLOAT,
+                .AlignedByteOffset = offsetof(Vertex3DColor, position),
+                .InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA,
+                .InstanceDataStepRate = 0,
+            };
+
+            attr[1] = {
+                .SemanticIndex = 1,
+                .Format = DXGI_FORMAT_R32G32B32A32_FLOAT,
+                .AlignedByteOffset = offsetof(Vertex3DColor, color),
+                .InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA,
+                .InstanceDataStepRate = 0,
+            };
+        } return;
+        case GPU_ATTRIBUTE_TYPE_VERTEX_3D_TEXTURE_COLOR: {
+            attr[0] = {
+                .SemanticIndex = 0,
+                .Format = DXGI_FORMAT_R32G32B32_FLOAT,
+                .AlignedByteOffset = offsetof(Vertex3DTextureColor, position),
+                .InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA,
+                .InstanceDataStepRate = 0,
+            };
+
+            attr[1] = {
+                .SemanticIndex = 1,
+                .Format = DXGI_FORMAT_R32G32_FLOAT,
+                .AlignedByteOffset = offsetof(Vertex3DTextureColor, texture_color),
+                .InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA,
+                .InstanceDataStepRate = 0,
+            };
+        } return;
+        case GPU_ATTRIBUTE_TYPE_VERTEX_3D_SAMPLER_TEXTURE_COLOR: {
+            attr[0] = {
+                .SemanticIndex = 0,
+                .Format = DXGI_FORMAT_R32G32B32_FLOAT,
+                .AlignedByteOffset = offsetof(Vertex3DSamplerTextureColor, position),
+                .InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA,
+                .InstanceDataStepRate = 0,
+            };
+
+            attr[1] = {
+                .SemanticIndex = 1,
+                .Format = DXGI_FORMAT_R32_SINT,
+                .AlignedByteOffset = offsetof(Vertex3DSamplerTextureColor, sampler),
+                .InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA,
+                .InstanceDataStepRate = 0,
+            };
+
+            attr[2] = {
+                .SemanticIndex = 2,
+                .Format = DXGI_FORMAT_R32G32_FLOAT,
+                .AlignedByteOffset = offsetof(Vertex3DSamplerTextureColor, texture_color),
+                .InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA,
+                .InstanceDataStepRate = 0,
+            };
+        } return;
+        case GPU_ATTRIBUTE_TYPE_VERTEX_2D_TEXTURE: {
+            attr[0] = {
+                .SemanticIndex = 0,
+                .Format = DXGI_FORMAT_R32G32_FLOAT,
+                .AlignedByteOffset = offsetof(Vertex2DTexture, position),
+                .InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA,
+                .InstanceDataStepRate = 0,
+            };
+
+            attr[1] = {
+                .SemanticIndex = 1,
+                .Format = DXGI_FORMAT_R32G32_FLOAT,
+                .AlignedByteOffset = offsetof(Vertex2DTexture, tex_coord),
+                .InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA,
+                .InstanceDataStepRate = 0,
+            };
+        } return;
+        default:
+            UNREACHABLE();
+    };
+}
+
+int32 directx_program_optimize(const char* input, char* output)
+{
+    const char* read_ptr = input;
+    char* write_ptr = output;
+    bool in_string = false;
+
+    while (*read_ptr) {
+        str_skip_empty(&read_ptr);
+
+        if (write_ptr != output
+            && *(write_ptr - 1) != '\n' && *(write_ptr - 1) != ';' && *(write_ptr - 1) != '{'
+            && *(write_ptr - 1) != '('
+            && *(write_ptr - 1) != ','
+        ) {
+            *write_ptr++ = '\n';
+        }
+
+        // Handle single-line comments (//)
+        if (*read_ptr == '/' && *(read_ptr + 1) == '/' && !in_string) {
+            str_move_to(&read_ptr, '\n');
+
+            continue;
+        }
+
+        // Handle multi-line comments (/* */)
+        if (*read_ptr == '/' && *(read_ptr + 1) == '*' && !in_string) {
+            // Go to end of comment
+            while (*read_ptr && (*read_ptr != '*' || *(read_ptr + 1) != '/')) {
+                ++read_ptr;
+            }
+
+            if (*read_ptr == '*' && *(read_ptr + 1) == '/') {
+                read_ptr += 2;
+            }
+
+            continue;
+        }
+
+        // Handle strings to avoid removing content within them
+        if (*read_ptr == '"') {
+            in_string = !in_string;
+        }
+
+        // Copy valid characters to write_ptr
+        while (*read_ptr && !is_eol(read_ptr) && *read_ptr != '"'
+            && !(*read_ptr == '/' && (*(read_ptr + 1) == '/' || *(read_ptr + 1) == '*'))
+        ) {
+            if (!in_string
+                && (*read_ptr == '*' || *read_ptr == '/' || *read_ptr == '=' || *read_ptr == '+' || *read_ptr == '-' || *read_ptr == '%'
+                    || *read_ptr == '(' || *read_ptr == ')'
+                    || *read_ptr == '{' || *read_ptr == '}'
+                    || *read_ptr == ',' || *read_ptr == '?' || *read_ptr == ':' || *read_ptr == ';'
+                    || *read_ptr == '&' || *read_ptr == '|'
+                    || *read_ptr == '>' || *read_ptr == '<'
+                )
+            ) {
+                if (is_whitespace(*(write_ptr - 1)) || *(write_ptr - 1) == '\n') {
+                    --write_ptr;
+                }
+
+                *write_ptr++ = *read_ptr++;
+
+                if (*read_ptr && is_whitespace(*read_ptr)) {
+                    ++read_ptr;
+                }
+            } else {
+                *write_ptr++ = *read_ptr++;
+            }
+        }
+    }
+
+    *write_ptr = '\0';
+
+    // -1 to remove \0 from length, same as strlen
+    return (int32) (write_ptr - output);
+}
+
 #endif
\ No newline at end of file
diff --git a/gpuapi/opengl/AppCmdBuffer.h b/gpuapi/opengl/AppCmdBuffer.h
index 731995b..4f6da61 100644
--- a/gpuapi/opengl/AppCmdBuffer.h
+++ b/gpuapi/opengl/AppCmdBuffer.h
@@ -47,7 +47,7 @@ void* cmd_shader_load_sync(AppCmdBuffer* __restrict cb, Shader* __restrict shade
         }
 
         // Make sub shader
-        shader_assets[i] = shader_make(
+        shader_assets[i] = gpuapi_shader_make(
             shader_type_index((ShaderType) (i + 1)),
             (char *) shader_asset->self
         );
@@ -57,7 +57,7 @@ void* cmd_shader_load_sync(AppCmdBuffer* __restrict cb, Shader* __restrict shade
     }
 
     // Make shader/program
-    shader->id = pipeline_make(
+    shader->id = gpuapi_pipeline_make(
         shader_assets[0], shader_assets[1], shader_assets[2]
     );
 
diff --git a/gpuapi/opengl/OpenglUtils.h b/gpuapi/opengl/OpenglUtils.h
index 1a6e076..34fb62c 100644
--- a/gpuapi/opengl/OpenglUtils.h
+++ b/gpuapi/opengl/OpenglUtils.h
@@ -33,7 +33,7 @@
     {
         GLenum err;
         while ((err = glGetError()) != GL_NO_ERROR) {
-            LOG_FORMAT_1("Opengl error: %d", {{LOG_DATA_INT32, (int32 *) &err}});
+            LOG_1("Opengl error: %d", {{LOG_DATA_INT32, (int32 *) &err}});
             ASSERT_SIMPLE(err == GL_NO_ERROR);
         }
     }
@@ -108,6 +108,7 @@ void opengl_info(OpenglInfo* info)
     }
 }
 
+// @todo rename to gpuapi_*
 inline
 uint32 get_texture_data_type(uint32 texture_data_type)
 {
@@ -145,6 +146,7 @@ uint32 get_texture_data_type(uint32 texture_data_type)
 // 4. load_texture_to_gpu
 // 5. texture_use
 
+// @todo this should have a gpuapi_ name
 inline
 void prepare_texture(Texture* texture)
 {
@@ -155,9 +157,11 @@ void prepare_texture(Texture* texture)
     glBindTexture(texture_data_type, (GLuint) texture->id);
 }
 
+// @todo this should have a gpuapi_ name
 inline
 void load_texture_to_gpu(const Texture* texture, int32 mipmap_level = 0)
 {
+    // @todo also handle different texture formats (R, RG, RGB, 1 byte vs 4 byte per pixel)
     uint32 texture_data_type = get_texture_data_type(texture->texture_data_type);
     glTexImage2D(
         texture_data_type, mipmap_level, GL_RGBA,
@@ -173,6 +177,7 @@ void load_texture_to_gpu(const Texture* texture, int32 mipmap_level = 0)
     LOG_INCREMENT_BY(DEBUG_COUNTER_GPU_VERTEX_UPLOAD, texture->image.pixel_count * image_pixel_size_from_type(texture->image.image_settings));
 }
 
+// @todo this should have a gpuapi_ name
 inline
 void texture_use(const Texture* texture)
 {
@@ -182,6 +187,7 @@ void texture_use(const Texture* texture)
     glBindTexture(texture_data_type, (GLuint) texture->id);
 }
 
+// @todo this should have a gpuapi_ name
 inline
 void texture_delete(Texture* texture) {
     glDeleteTextures(1, &texture->id);
@@ -392,14 +398,23 @@ void gpuapi_buffer_update_dynamic(uint32 vbo, int32 size, const void* data)
     LOG_INCREMENT_BY(DEBUG_COUNTER_GPU_VERTEX_UPLOAD, size);
 }
 
+// @todo change name. vulkan and directx have different functions for vertex buffer updates
 inline
-void gpuapi_buffer_update_sub(uint32 vbo, int32 offset, int32 size, const void* data)
+void gpuapi_vertex_buffer_update(
+    uint32 vbo,
+    const void* data, int32 vertex_size, int32 vertex_count, int32 offset = 0
+)
 {
     glBindBuffer(GL_ARRAY_BUFFER, vbo);
-    glBufferSubData(GL_ARRAY_BUFFER, offset, size, data);
+    // @performance Does this if even make sense or is glBufferSubData always the better choice?
+    if (offset) {
+        glBufferSubData(GL_ARRAY_BUFFER, offset, vertex_size * vertex_count - offset, ((byte *) data) + offset);
+    } else {
+        glBufferData(GL_ARRAY_BUFFER, vertex_size * vertex_count, data, GL_DYNAMIC_DRAW);
+    }
     ASSERT_GPU_API();
 
-    LOG_INCREMENT_BY(DEBUG_COUNTER_GPU_VERTEX_UPLOAD, size);
+    LOG_INCREMENT_BY(DEBUG_COUNTER_GPU_VERTEX_UPLOAD, vertex_size * vertex_count - offset);
 }
 
 inline
diff --git a/gpuapi/opengl/ShaderUtils.h b/gpuapi/opengl/ShaderUtils.h
index 1d5cc53..4e1be6a 100644
--- a/gpuapi/opengl/ShaderUtils.h
+++ b/gpuapi/opengl/ShaderUtils.h
@@ -15,6 +15,7 @@
 #include "../../log/Stats.h"
 #include "../../log/PerformanceProfiler.h"
 #include "../../object/Vertex.h"
+#include "../../utils/StringUtils.h"
 #include "Shader.h"
 #include "Opengl.h"
 #include "../ShaderType.h"
@@ -44,79 +45,79 @@ int32 shader_type_index(ShaderType type)
 // @todo change naming to gpuapi_uniform_buffer_update (same as vulkan)
 // @todo change from upload to uniform upload since it is a special form of upload
 FORCE_INLINE
-void shader_set_value(uint32 location, bool value)
+void gpuapi_uniform_buffer_update_value(uint32 location, bool value)
 {
     glUniform1i(location, (int32) value);
     LOG_INCREMENT_BY(DEBUG_COUNTER_GPU_UNIFORM_UPLOAD, sizeof(value));
 }
 
 FORCE_INLINE
-void shader_set_value(uint32 location, int32 value)
+void gpuapi_uniform_buffer_update_value(uint32 location, int32 value)
 {
     glUniform1i(location, value);
     LOG_INCREMENT_BY(DEBUG_COUNTER_GPU_UNIFORM_UPLOAD, sizeof(value));
 }
 
 FORCE_INLINE
-void shader_set_value(uint32 location, f32 value)
+void gpuapi_uniform_buffer_update_value(uint32 location, f32 value)
 {
     glUniform1f(location, value);
     LOG_INCREMENT_BY(DEBUG_COUNTER_GPU_UNIFORM_UPLOAD, sizeof(value));
 }
 
 FORCE_INLINE
-void shader_set_v2(uint32 location, const f32* value)
+void gpuapi_uniform_buffer_update_v2(uint32 location, const f32* value)
 {
     glUniform2fv(location, 1, value);
     LOG_INCREMENT_BY(DEBUG_COUNTER_GPU_UNIFORM_UPLOAD, sizeof(*value) * 2);
 }
 
 FORCE_INLINE
-void shader_set_v3(uint32 location, const f32* value)
+void gpuapi_uniform_buffer_update_v3(uint32 location, const f32* value)
 {
     glUniform3fv(location, 1, value);
     LOG_INCREMENT_BY(DEBUG_COUNTER_GPU_UNIFORM_UPLOAD, sizeof(*value) * 3);
 }
 
 FORCE_INLINE
-void shader_set_v4(uint32 location, const f32* value)
+void gpuapi_uniform_buffer_update_v4(uint32 location, const f32* value)
 {
     glUniform4fv(location, 1, value);
     LOG_INCREMENT_BY(DEBUG_COUNTER_GPU_UNIFORM_UPLOAD, sizeof(*value) * 4);
 }
 
 FORCE_INLINE
-void shader_set_m2(uint32 location, const f32* value)
+void gpuapi_uniform_buffer_update_m2(uint32 location, const f32* value)
 {
     glUniformMatrix2fv(location, 1, GL_FALSE, value);
     LOG_INCREMENT_BY(DEBUG_COUNTER_GPU_UNIFORM_UPLOAD, sizeof(*value) * 4);
 }
 
 FORCE_INLINE
-void shader_set_m3(uint32 location, const f32* value)
+void gpuapi_uniform_buffer_update_m3(uint32 location, const f32* value)
 {
     glUniformMatrix3fv(location, 1, GL_FALSE, value);
     LOG_INCREMENT_BY(DEBUG_COUNTER_GPU_UNIFORM_UPLOAD, sizeof(*value) * 9);
 }
 
 FORCE_INLINE
-void shader_set_m4(uint32 location, const f32* value)
+void gpuapi_uniform_buffer_update_m4(uint32 location, const f32* value)
 {
     glUniformMatrix4fv(location, 1, GL_FALSE, value);
     LOG_INCREMENT_BY(DEBUG_COUNTER_GPU_UNIFORM_UPLOAD, sizeof(*value) * 16);
 }
 
 FORCE_INLINE
-uint32 shader_get_attrib_location(uint32 id, const char* name)
+uint32 opengl_get_attrib_location(uint32 id, const char* name)
 {
-    // By using this you can retreive the shader variable name at a point where and when you know it
+    // By using this you can retrieve the shader variable name at a point where and when you know it
     // BUT set values later on in generalized functions without knowing the shader variable name
     // Basically like pointers
     return glGetAttribLocation(id, name);
 }
 
 inline
-void shader_check_link_errors(uint32 id, char* log)
+void opengl_check_link_errors(uint32 id, char* log)
 {
     GLint success;
     glGetProgramiv(id, GL_LINK_STATUS, &success);
@@ -126,7 +127,7 @@ void shader_check_link_errors(uint32 id, char* log)
 }
 
 inline
-void shader_check_compile_errors(uint32 id, char* log)
+void opengl_check_compile_errors(uint32 id, char* log)
 {
     GLint success;
     glGetShaderiv(id, GL_COMPILE_STATUS, &success);
@@ -135,17 +136,14 @@ void shader_check_compile_errors(uint32 id, char* log)
     }
 }
 
-int32 shader_program_optimize(const char* input, char* output)
+int32 opengl_program_optimize(const char* __restrict input, char* __restrict output)
 {
     const char* read_ptr = input;
     char* write_ptr = output;
     bool in_string = false;
 
     while (*read_ptr) {
-        // Remove leading whitespace
-        while (*read_ptr == ' ' || *read_ptr == '\t' || is_eol(read_ptr)) {
-            ++read_ptr;
-        }
+        str_skip_empty(&read_ptr);
 
         if (write_ptr != output
             && *(write_ptr - 1) != '\n' && *(write_ptr - 1) != ';' && *(write_ptr - 1) != '{'
@@ -157,10 +155,7 @@ int32 shader_program_optimize(const char* input, char* output)
 
         // Handle single-line comments (//)
         if (*read_ptr == '/' && *(read_ptr + 1) == '/' && !in_string) {
-            // Go to end of line
-            while (*read_ptr && *read_ptr != '\n') {
-                ++read_ptr;
-            }
+            str_move_to(&read_ptr, '\n');
 
             continue;
         }
@@ -218,7 +213,7 @@ int32 shader_program_optimize(const char* input, char* output)
     return (int32) (write_ptr - output);
 }
 
-GLuint shader_make(GLenum type, const char* source)
+GLuint gpuapi_shader_make(GLenum type, const char* source)
 {
     LOG_1("Create shader");
     GLuint shader = glCreateShader(type);
@@ -249,7 +244,7 @@ GLuint shader_make(GLenum type, const char* source)
 }
 
 inline
-int32 program_get_size(uint32 program)
+int32 opengl_program_get_size(uint32 program)
 {
     int32 size;
     glGetProgramiv(program, GL_PROGRAM_BINARY_LENGTH, &size);
@@ -259,7 +254,7 @@ int32 program_get_size(uint32 program)
 
 // @todo Instead of passing the shaders one by one, pass one array called ShaderStage* shader_stages
 // This way we can handle this more dynamic
-GLuint pipeline_make(
+GLuint gpuapi_pipeline_make(
     GLuint vertex_shader,
     GLuint fragment_shader,
     GLint geometry_shader
@@ -316,9 +311,8 @@ GLuint pipeline_make(
     return program;
 }
 
-// @question Depending on how the different gpu apis work we may want to pass Shader* to have a uniform structure
 FORCE_INLINE
-void pipeline_use(uint32 id)
+void gpuapi_pipeline_use(uint32 id)
 {
     glUseProgram(id);
 }
@@ -347,7 +341,7 @@ void gpuapi_attribute_info_create(GpuAttributeType type, OpenglVertexInputAttrib
                 .count = 3,
                 .format = GL_FLOAT,
                 .stride = sizeof(Vertex3D),
-                .offset = (void *) offsetof(Vertex3DTextureColor, position)
+                .offset = (void *) offsetof(Vertex3D, position)
             };
 
             attr[1] = {
@@ -402,7 +396,7 @@ void gpuapi_attribute_info_create(GpuAttributeType type, OpenglVertexInputAttrib
 
             attr[1] = {
                 .location = 1,
-                .count = 2,
+                .count = 4,
                 .format = GL_FLOAT,
                 .stride = sizeof(Vertex3DColor),
                 .offset = (void *) offsetof(Vertex3DColor, color)
@@ -472,7 +466,7 @@ void gpuapi_attribute_info_create(GpuAttributeType type, OpenglVertexInputAttrib
     };
 }
 
-void gpuapi_descriptor_set_layout_create(Shader* shader, const OpenglDescriptorSetLayoutBinding* bindings, int32 binding_length) {
+void gpuapi_descriptor_set_layout_create(Shader* __restrict shader, const OpenglDescriptorSetLayoutBinding* __restrict bindings, int32 binding_length) {
     for (int32 i = 0; i < binding_length; ++i) {
         shader->descriptor_set_layout[i].binding = glGetUniformLocation(shader->id, bindings[i].name);
         shader->descriptor_set_layout[i].name = bindings[i].name;
diff --git a/gpuapi/vulkan/AppCmdBuffer.h b/gpuapi/vulkan/AppCmdBuffer.h
index 6bdba3c..f01dc78 100644
--- a/gpuapi/vulkan/AppCmdBuffer.h
+++ b/gpuapi/vulkan/AppCmdBuffer.h
@@ -50,7 +50,7 @@ void* cmd_shader_load_sync(
         }
 
         // Make sub shader
-        shader_assets[i] = shader_make(
+        shader_assets[i] = gpuapi_shader_make(
             device,
             (char *) shader_asset->self,
             shader_asset->ram_size
@@ -61,7 +61,7 @@ void* cmd_shader_load_sync(
     }
 
     // Make shader/program
-    shader->id = pipeline_make(
+    shader->id = gpuapi_pipeline_make(
         device, render_pass, pipeline_layout, pipeline,
         descriptor_set_layouts,
         shader_assets[0], shader_assets[1], shader_assets[2]
diff --git a/gpuapi/vulkan/ShaderUtils.h b/gpuapi/vulkan/ShaderUtils.h
index 8abfcad..9f12a3d 100644
--- a/gpuapi/vulkan/ShaderUtils.h
+++ b/gpuapi/vulkan/ShaderUtils.h
@@ -34,7 +34,7 @@ uint32_t shader_get_uniform_location(
 }
 
 inline
-void shader_set_value(VkDevice device, VkDescriptorSet descriptorSet, uint32_t binding, VkDescriptorType descriptorType, int32_t value)
+void gpuapi_uniform_buffer_update_value(VkDevice device, VkDescriptorSet descriptorSet, uint32_t binding, VkDescriptorType descriptorType, int32_t value)
 {
     VkDescriptorBufferInfo bufferInfo = {};
     bufferInfo.buffer = {};  // You should have a buffer holding the value
@@ -54,7 +54,7 @@ void shader_set_value(VkDevice device, VkDescriptorSet descriptorSet, uint32_t b
 }
 
 inline
-VkShaderModule shader_make(VkDevice device, const char* source, int32 source_size)
+VkShaderModule gpuapi_shader_make(VkDevice device, const char* source, int32 source_size)
 {
     LOG_1("Create shader");
     // Create shader module create info
@@ -68,7 +68,7 @@ VkShaderModule shader_make(VkDevice device, const char* source, int32 source_siz
     VkResult result = vkCreateShaderModule(device, &create_info, NULL, &shader_module);
 
     if (result != VK_SUCCESS) {
-        LOG_FORMAT_1("Vulkan vkCreateShaderModule: %d", {{LOG_DATA_INT32, (int32 *) &result}});
+        LOG_1("Vulkan vkCreateShaderModule: %d", {{LOG_DATA_INT32, (int32 *) &result}});
         ASSERT_SIMPLE(false);
 
         return VK_NULL_HANDLE;
@@ -144,7 +144,7 @@ void gpuapi_attribute_info_create(GpuAttributeType type, VkVertexInputAttributeD
             attr[1] = {
                 .location = 1,
                 .binding = 0,
-                .format = VK_FORMAT_R32_UINT,
+                .format = VK_FORMAT_R32G32B32A32_SFLOAT,
                 .offset = offsetof(Vertex3DColor, color)
             };
         } return;
@@ -190,15 +190,15 @@ void gpuapi_attribute_info_create(GpuAttributeType type, VkVertexInputAttributeD
     };
 }
 
-inline
-void pipeline_use(VkCommandBuffer command_buffer, VkPipeline pipeline)
+FORCE_INLINE
+void gpuapi_pipeline_use(VkCommandBuffer command_buffer, VkPipeline pipeline)
 {
     vkCmdBindPipeline(command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
 }
 
 // @todo Instead of passing the shaders one by one, pass one array called ShaderStage* shader_stages
 // This way we can handle this more dynamic
-VkPipeline pipeline_make(
+VkPipeline gpuapi_pipeline_make(
     VkDevice device, VkRenderPass render_pass, VkPipelineLayout* __restrict pipeline_layout, VkPipeline* __restrict pipeline,
     VkDescriptorSetLayout* descriptor_set_layouts,
     VkShaderModule vertex_shader, VkShaderModule fragment_shader,
@@ -301,7 +301,7 @@ VkPipeline pipeline_make(
 
     VkResult result;
     if ((result = vkCreatePipelineLayout(device, &pipeline_info_layout, NULL, pipeline_layout)) != VK_SUCCESS) {
-        LOG_FORMAT_1("Vulkan vkCreatePipelineLayout: %d", {{LOG_DATA_INT32, (int32 *) &result}});
+        LOG_1("Vulkan vkCreatePipelineLayout: %d", {{LOG_DATA_INT32, (int32 *) &result}});
         ASSERT_SIMPLE(false);
 
         return NULL;
@@ -324,7 +324,7 @@ VkPipeline pipeline_make(
     pipeline_info.basePipelineHandle = VK_NULL_HANDLE;
 
     if ((result = vkCreateGraphicsPipelines(device, VK_NULL_HANDLE, 1, &pipeline_info, NULL, pipeline)) != VK_SUCCESS) {
-        LOG_FORMAT_1("Vulkan vkCreateGraphicsPipelines: %d", {{LOG_DATA_INT32, (int32 *) &result}});
+        LOG_1("Vulkan vkCreateGraphicsPipelines: %d", {{LOG_DATA_INT32, (int32 *) &result}});
         ASSERT_SIMPLE(false);
 
         return NULL;
@@ -358,7 +358,7 @@ void gpuapi_descriptor_set_layout_create(
 
     VkResult result;
     if ((result = vkCreateDescriptorSetLayout(device, &layout_info, NULL, descriptor_set_layout)) != VK_SUCCESS) {
-        LOG_FORMAT_1("Vulkan vkCreateDescriptorSetLayout: %d", {{LOG_DATA_INT32, (int32 *) &result}});
+        LOG_1("Vulkan vkCreateDescriptorSetLayout: %d", {{LOG_DATA_INT32, (int32 *) &result}});
         ASSERT_SIMPLE(false);
     }
 }
@@ -390,7 +390,7 @@ void vulkan_descriptor_pool_create(
 
     VkResult result;
     if ((result = vkCreateDescriptorPool(device, &poolInfo, NULL, descriptor_pool)) != VK_SUCCESS) {
-        LOG_FORMAT_1("Vulkan vkCreateDescriptorPool: %d", {{LOG_DATA_INT32, (int32 *) &result}});
+        LOG_1("Vulkan vkCreateDescriptorPool: %d", {{LOG_DATA_INT32, (int32 *) &result}});
         ASSERT_SIMPLE(false);
     }
 }
@@ -417,7 +417,7 @@ void vulkan_descriptor_sets_create(
 
     VkResult result;
     if ((result = vkAllocateDescriptorSets(device, &alloc_info, descriptor_sets)) != VK_SUCCESS) {
-        LOG_FORMAT_1("Vulkan vkAllocateDescriptorSets: %d", {{LOG_DATA_INT32, (int32 *) &result}});
+        LOG_1("Vulkan vkAllocateDescriptorSets: %d", {{LOG_DATA_INT32, (int32 *) &result}});
         ASSERT_SIMPLE(false);
 
         return;
diff --git a/gpuapi/vulkan/VulkanUtils.h b/gpuapi/vulkan/VulkanUtils.h
index ebd034a..2a3bba1 100644
--- a/gpuapi/vulkan/VulkanUtils.h
+++ b/gpuapi/vulkan/VulkanUtils.h
@@ -31,17 +31,18 @@
 #include "../../log/Stats.h"
 #include "../../log/PerformanceProfiler.h"
 #include "../../memory/RingMemory.h"
+#include "../../compiler/CompilerUtils.h"
 #include "ShaderUtils.h"
 #include "FramesInFlightContainer.h"
 
 #if DEBUG
-    #define ASSERT_GPU_API(x)                                                       \
-        do {                                                                        \
-            VkResult err = (x);                                                     \
-            if (err) {                                                              \
-                LOG_FORMAT_1("Vulkan error: %d", {{LOG_DATA_INT32, (int32 *) &err}}); \
-                ASSERT_SIMPLE(false);                                               \
-            }                                                                       \
+    #define ASSERT_GPU_API(x)                                                   \
+        do {                                                                    \
+            VkResult err = (x);                                                 \
+            if (err) {                                                          \
+                LOG_1("Vulkan error: %d", {{LOG_DATA_INT32, (int32 *) &err}});  \
+                ASSERT_SIMPLE(false);                                           \
+            }                                                                   \
         } while (0)
 #else
     #define ASSERT_GPU_API(x) (x)
@@ -68,7 +69,11 @@ struct VulkanSwapChainSupportDetails {
 };
 
 inline
-void change_viewport(int32 width, int32 height, VkCommandBuffer command_buffer, VkExtent2D swapchain_extent, int32 offset_x = 0, int32 offset_y = 0)
+void change_viewport(
+    int32 width, int32 height,
+    VkCommandBuffer command_buffer, VkExtent2D swapchain_extent,
+    int32 offset_x = 0, int32 offset_y = 0
+)
 {
     VkViewport viewport = {};
     viewport.x = (f32) offset_x;
@@ -178,7 +183,7 @@ void vulkan_instance_create(
     if (validation_layer_count
         && (err = vulkan_check_validation_layer_support(validation_layers, validation_layer_count, ring))
     ) {
-        LOG_FORMAT_1("Vulkan validation_layer missing: %d", {{LOG_DATA_CHAR_STR, (void *) validation_layers[-err - 1]}});
+        LOG_1("Vulkan validation_layer missing: %d", {{LOG_DATA_CHAR_STR, (void *) validation_layers[-err - 1]}});
         ASSERT_SIMPLE(false);
 
         return;
@@ -187,7 +192,7 @@ void vulkan_instance_create(
     if (extension_count
         && (err = vulkan_check_extension_support(extensions, extension_count, ring))
     ) {
-        LOG_FORMAT_1("Vulkan extension missing: %d", {{LOG_DATA_CHAR_STR, (void *) extensions[-err - 1]}});
+        LOG_1("Vulkan extension missing: %d", {{LOG_DATA_CHAR_STR, (void *) extensions[-err - 1]}});
         ASSERT_SIMPLE(false);
 
         return;
@@ -224,7 +229,7 @@ void vulkan_instance_create(
 
     VkResult result;
     if ((result = vkCreateInstance(&create_info, NULL, instance)) != VK_SUCCESS) {
-        LOG_FORMAT_1("Vulkan vkCreateInstance: %d", {{LOG_DATA_INT32, (int32 *) &result}});
+        LOG_1("Vulkan vkCreateInstance: %d", {{LOG_DATA_INT32, (int32 *) &result}});
         ASSERT_SIMPLE(false);
     }
 }
@@ -240,7 +245,7 @@ void vulkan_surface_create(VkInstance instance, VkSurfaceKHR* surface, Window* w
 
         VkResult result;
         if ((result = vkCreateWin32SurfaceKHR(instance, &surface_create_info, NULL, surface)) != VK_SUCCESS) {
-            LOG_FORMAT_1("Vulkan vkCreateWin32SurfaceKHR: %d", {{LOG_DATA_INT32, (int32 *) &result}});
+            LOG_1("Vulkan vkCreateWin32SurfaceKHR: %d", {{LOG_DATA_INT32, (int32 *) &result}});
             return;
         }
     #elif __linux__
@@ -310,7 +315,7 @@ VulkanQueueFamilyIndices vulkan_find_queue_families(VkPhysicalDevice physical_de
 
         VkResult result;
         if ((result = vkGetPhysicalDeviceSurfaceSupportKHR(physical_device, i, surface, &present_support)) != VK_SUCCESS) {
-            LOG_FORMAT_1("Vulkan vkGetPhysicalDeviceSurfaceSupportKHR: %d", {{LOG_DATA_INT32, (int32 *) &result}});
+            LOG_1("Vulkan vkGetPhysicalDeviceSurfaceSupportKHR: %d", {{LOG_DATA_INT32, (int32 *) &result}});
             ASSERT_SIMPLE(false);
 
             return indices;
@@ -437,7 +442,7 @@ void gpuapi_create_logical_device(
 
     VkResult result;
     if ((result = vkCreateDevice(physical_device, &create_info, NULL, device)) != VK_SUCCESS) {
-        LOG_FORMAT_1("Vulkan vkCreateDevice: %d", {{LOG_DATA_INT32, (int32 *) &result}});
+        LOG_1("Vulkan vkCreateDevice: %d", {{LOG_DATA_INT32, (int32 *) &result}});
         ASSERT_SIMPLE(false);
     }
 
@@ -523,7 +528,7 @@ void gpuapi_swapchain_create(
 
     VkResult result;
     if ((result = vkCreateSwapchainKHR(device, &create_info, NULL, swapchain)) != VK_SUCCESS) {
-        LOG_FORMAT_1("Vulkan vkCreateSwapchainKHR: %d", {{LOG_DATA_INT32, (int32 *) &result}});
+        LOG_1("Vulkan vkCreateSwapchainKHR: %d", {{LOG_DATA_INT32, (int32 *) &result}});
         ASSERT_SIMPLE(false);
 
         return;
@@ -585,7 +590,7 @@ void vulkan_image_views_create(
         create_info.subresourceRange.layerCount = 1;
 
         if ((result = vkCreateImageView(device, &create_info, NULL, &swapchain_image_views[i])) != VK_SUCCESS) {
-            LOG_FORMAT_1("Vulkan vkCreateImageView: %d", {{LOG_DATA_INT32, (int32 *) &result}});
+            LOG_1("Vulkan vkCreateImageView: %d", {{LOG_DATA_INT32, (int32 *) &result}});
             ASSERT_SIMPLE(false);
         }
     }
@@ -632,7 +637,7 @@ void vulkan_render_pass_create(
 
     VkResult result;
     if ((result = vkCreateRenderPass(device, &render_pass_info, NULL, render_pass)) != VK_SUCCESS) {
-        LOG_FORMAT_1("Vulkan vkCreateRenderPass: %d", {{LOG_DATA_INT32, (int32 *) &result}});
+        LOG_1("Vulkan vkCreateRenderPass: %d", {{LOG_DATA_INT32, (int32 *) &result}});
         ASSERT_SIMPLE(false);
     }
 }
@@ -660,7 +665,7 @@ void vulkan_framebuffer_create(
         framebufferInfo.layers = 1;
 
         if ((result = vkCreateFramebuffer(device, &framebufferInfo, NULL, &framebuffers[i])) != VK_SUCCESS) {
-            LOG_FORMAT_1("Vulkan vkCreateFramebuffer: %d", {{LOG_DATA_INT32, (int32 *) &result}});
+            LOG_1("Vulkan vkCreateFramebuffer: %d", {{LOG_DATA_INT32, (int32 *) &result}});
             ASSERT_SIMPLE(false);
         }
     }
@@ -679,12 +684,12 @@ void vulkan_command_pool_create(
 
     VkResult result;
     if ((result = vkCreateCommandPool(device, &pool_info, NULL, command_pool)) != VK_SUCCESS) {
-        LOG_FORMAT_1("Vulkan vkCreateCommandPool: %d", {{LOG_DATA_INT32, (int32 *) &result}});
+        LOG_1("Vulkan vkCreateCommandPool: %d", {{LOG_DATA_INT32, (int32 *) &result}});
         ASSERT_SIMPLE(false);
     }
 }
 
-void vulkan_command_buffers_create(VkDevice device, VkCommandPool command_pool, VkCommandBuffer* command_buffers, uint32 command_buffer_count)
+void gpuapi_command_buffer_create(VkDevice device, VkCommandPool command_pool, VkCommandBuffer* command_buffers, uint32 command_buffer_count)
 {
     VkCommandBufferAllocateInfo alloc_info = {};
     alloc_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
@@ -694,7 +699,7 @@ void vulkan_command_buffers_create(VkDevice device, VkCommandPool command_pool,
 
     VkResult result;
     if ((result = vkAllocateCommandBuffers(device, &alloc_info, command_buffers)) != VK_SUCCESS) {
-        LOG_FORMAT_1("Vulkan vkAllocateCommandBuffers: %d", {{LOG_DATA_INT32, (int32 *) &result}});
+        LOG_1("Vulkan vkAllocateCommandBuffers: %d", {{LOG_DATA_INT32, (int32 *) &result}});
         ASSERT_SIMPLE(false);
     }
 }
@@ -716,7 +721,7 @@ void vulkan_sync_objects_create(
             || (result = vkCreateSemaphore(device, &semaphore_info, NULL, &frames_in_flight->render_finished_semaphores[i])) != VK_SUCCESS
             || (result = vkCreateFence(device, &fence_info, NULL, &frames_in_flight->fences[i])) != VK_SUCCESS
         ) {
-            LOG_FORMAT_1("Vulkan vulkan_sync_objects_create: %d", {{LOG_DATA_INT32, (int32 *) &result}});
+            LOG_1("Vulkan vulkan_sync_objects_create: %d", {{LOG_DATA_INT32, (int32 *) &result}});
             ASSERT_SIMPLE(false);
         }
     }
@@ -842,18 +847,52 @@ void vulkan_transition_image_layout(VkCommandBuffer command_buffer, VkImage imag
     );
 }
 
-// @todo replace references with pointers
+static
+VkFormat gpuapi_texture_format(byte settings)
+{
+    if ((settings & IMAGE_SETTING_CHANNEL_4_SIZE)) {
+        switch (settings & IMAGE_SETTING_CHANNEL_COUNT) {
+            case 1:
+                return VK_FORMAT_R32_SFLOAT;
+            case 2:
+                return VK_FORMAT_R32G32_SFLOAT;
+            case 3:
+                return VK_FORMAT_R32G32B32_SFLOAT;
+            case 4:
+                return VK_FORMAT_R32G32B32A32_SFLOAT;
+            default:
+                UNREACHABLE();
+        }
+    } else {
+        switch (settings & IMAGE_SETTING_CHANNEL_COUNT) {
+            case 1:
+                return VK_FORMAT_R8_SRGB;
+            case 2:
+                return VK_FORMAT_R8G8_SRGB;
+            case 3:
+                return VK_FORMAT_R8G8B8_SRGB;
+            case 4:
+                return VK_FORMAT_R8G8B8A8_SRGB;
+            default:
+                UNREACHABLE();
+        }
+    }
+}
+
+// @performance Sometimes we want to upload multiple textures in one go (more performant). Allow that or don't use this function in that case.
 void load_texture_to_gpu(
     VkDevice device, VkPhysicalDevice physical_device,
     VkCommandPool command_pool, VkQueue queue,
-    VkImage& texture_image, VkDeviceMemory& texture_image_memory, VkImageView& texture_image_view, VkSampler& texture_sampler,
+    VkImage* texture_image, VkDeviceMemory* texture_image_memory, VkImageView* texture_image_view, VkSampler* texture_sampler,
     const Texture* texture)
 {
+    VkFormat textureFormat = gpuapi_texture_format(texture->image.image_settings);
+
     // Create the Vulkan image
     VkImageCreateInfo image_info = {};
     image_info.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO;
     image_info.imageType = VK_IMAGE_TYPE_2D;
-    image_info.format = VK_FORMAT_R8G8B8A8_SRGB;
+    image_info.format = textureFormat;
     image_info.extent.width = texture->image.width;
     image_info.extent.height = texture->image.height;
     image_info.extent.depth = 1;
@@ -865,19 +904,19 @@ void load_texture_to_gpu(
     image_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
     image_info.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
 
-    ASSERT_GPU_API(vkCreateImage(device, &image_info, NULL, &texture_image));
+    ASSERT_GPU_API(vkCreateImage(device, &image_info, NULL, texture_image));
 
     // Allocate memory for the image
     VkMemoryRequirements memRequirements;
-    vkGetImageMemoryRequirements(device, texture_image, &memRequirements);
+    vkGetImageMemoryRequirements(device, *texture_image, &memRequirements);
 
     VkMemoryAllocateInfo allocInfo = {};
     allocInfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
     allocInfo.allocationSize = memRequirements.size;
     allocInfo.memoryTypeIndex = vulkan_find_memory_type(physical_device, memRequirements.memoryTypeBits, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
 
-    ASSERT_GPU_API(vkAllocateMemory(device, &allocInfo, NULL, &texture_image_memory));
-    ASSERT_GPU_API(vkBindImageMemory(device, texture_image, texture_image_memory, 0));
+    ASSERT_GPU_API(vkAllocateMemory(device, &allocInfo, NULL, texture_image_memory));
+    ASSERT_GPU_API(vkBindImageMemory(device, *texture_image, *texture_image_memory, 0));
 
     int32 image_size = image_pixel_size_from_type(texture->image.image_settings) * texture->image.width * texture->image.height;
 
@@ -894,10 +933,10 @@ void load_texture_to_gpu(
 
     // Transition the image layout
     VkCommandBuffer command_buffer;
-    vulkan_command_buffers_create(device, command_pool, &command_buffer, 1);
+    gpuapi_command_buffer_create(device, command_pool, &command_buffer, 1);
     vulkan_single_commands_begin(command_buffer);
 
-    vulkan_transition_image_layout(command_buffer, texture_image, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
+    vulkan_transition_image_layout(command_buffer, *texture_image, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
     vulkan_single_commands_end(queue, command_buffer);
 
     // Copy data from the staging buffer to the image
@@ -910,13 +949,13 @@ void load_texture_to_gpu(
     region.imageSubresource.layerCount = 1;
     region.imageExtent = {texture->image.width, texture->image.height, 1};
 
-    vkCmdCopyBufferToImage(command_buffer, staging_buffer, texture_image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &region);
+    vkCmdCopyBufferToImage(command_buffer, staging_buffer, *texture_image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &region);
     vulkan_single_commands_end(queue, command_buffer);
 
     // Transition the image layout for shader access
     vulkan_command_buffer_reset(command_buffer);
     vulkan_single_commands_begin(command_buffer);
-    vulkan_transition_image_layout(command_buffer, texture_image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
+    vulkan_transition_image_layout(command_buffer, *texture_image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
     vulkan_single_commands_end(queue, command_buffer);
 
     vulkan_single_commands_free(device, command_pool, command_buffer);
@@ -928,16 +967,16 @@ void load_texture_to_gpu(
     // Create an image view
     VkImageViewCreateInfo view_info = {};
     view_info.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO;
-    view_info.image = texture_image;
+    view_info.image = *texture_image;
     view_info.viewType = VK_IMAGE_VIEW_TYPE_2D;
-    view_info.format = VK_FORMAT_R8G8B8A8_SRGB;
+    view_info.format = textureFormat;
     view_info.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
     view_info.subresourceRange.baseMipLevel = 0;
     view_info.subresourceRange.levelCount = 1;
     view_info.subresourceRange.baseArrayLayer = 0;
     view_info.subresourceRange.layerCount = 1;
 
-    ASSERT_GPU_API(vkCreateImageView(device, &view_info, NULL, &texture_image_view));
+    ASSERT_GPU_API(vkCreateImageView(device, &view_info, NULL, texture_image_view));
 
     // Create a sampler
     VkPhysicalDeviceProperties properties = {};
@@ -958,14 +997,14 @@ void load_texture_to_gpu(
     sampler_info.compareOp = VK_COMPARE_OP_ALWAYS;
     sampler_info.mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR;
 
-    ASSERT_GPU_API(vkCreateSampler(device, &sampler_info, NULL, &texture_sampler));
+    ASSERT_GPU_API(vkCreateSampler(device, &sampler_info, NULL, texture_sampler));
 }
 
 // @todo Rename to same name as opengl (or rename opengl obviously)
-void vulkan_vertex_buffer_update(
+void gpuapi_vertex_buffer_update(
     VkDevice device, VkPhysicalDevice physical_device, VkCommandPool command_pool, VkQueue queue,
-    VkBuffer* vertexBuffer,
-    const void* __restrict vertices, int32 vertex_size, int32 vertex_count
+    VkBuffer* vertex_buffer,
+    const void* __restrict vertices, int32 vertex_size, int32 vertex_count, int32 offset = 0
 )
 {
     VkDeviceSize bufferSize = vertex_size * vertex_count;
@@ -986,12 +1025,14 @@ void vulkan_vertex_buffer_update(
     vkUnmapMemory(device, stagingBufferMemory);
 
     VkCommandBuffer commandBuffer;
-    vulkan_command_buffers_create(device, command_pool, &commandBuffer, 1);
+    gpuapi_command_buffer_create(device, command_pool, &commandBuffer, 1);
     vulkan_single_commands_begin(commandBuffer);
 
     VkBufferCopy copyRegion = {};
+    copyRegion.srcOffset = offset;
+    copyRegion.dstOffset = offset;
     copyRegion.size = bufferSize;
-    vkCmdCopyBuffer(commandBuffer, stagingBuffer, *vertexBuffer, 1, &copyRegion);
+    vkCmdCopyBuffer(commandBuffer, stagingBuffer, *vertex_buffer, 1, &copyRegion);
     vulkan_single_commands_end(queue, commandBuffer);
 
     vulkan_single_commands_free(device, command_pool, commandBuffer);
@@ -999,12 +1040,12 @@ void vulkan_vertex_buffer_update(
     vkDestroyBuffer(device, stagingBuffer, NULL);
     vkFreeMemory(device, stagingBufferMemory, NULL);
 
-    LOG_INCREMENT_BY(DEBUG_COUNTER_GPU_VERTEX_UPLOAD, vertex_size * vertex_count);
+    LOG_INCREMENT_BY(DEBUG_COUNTER_GPU_VERTEX_UPLOAD, bufferSize - offset);
 }
 
-void vulkan_vertex_buffer_create(
+void gpuapi_vertex_buffer_create(
     VkDevice device, VkPhysicalDevice physical_device, VkCommandPool command_pool, VkQueue queue,
-    VkBuffer* vertexBuffer, VkDeviceMemory vertexBufferMemory,
+    VkBuffer* vertex_buffer, VkDeviceMemory vertex_bufferMemory,
     const void* __restrict vertices, int32 vertex_size, int32 vertex_count
 )
 {
@@ -1031,18 +1072,18 @@ void vulkan_vertex_buffer_create(
         bufferSize,
         VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,
         VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
-        *vertexBuffer, vertexBufferMemory
+        *vertex_buffer, vertex_bufferMemory
     );
 
     // Copy buffer
     // @performance Would it make sense to use a "global" temp buffer for that? If yes, we only need to reset
     VkCommandBuffer commandBuffer;
-    vulkan_command_buffers_create(device, command_pool, &commandBuffer, 1);
+    gpuapi_command_buffer_create(device, command_pool, &commandBuffer, 1);
     vulkan_single_commands_begin(commandBuffer);
 
     VkBufferCopy copyRegion = {};
     copyRegion.size = bufferSize;
-    vkCmdCopyBuffer(commandBuffer, stagingBuffer, *vertexBuffer, 1, &copyRegion);
+    vkCmdCopyBuffer(commandBuffer, stagingBuffer, *vertex_buffer, 1, &copyRegion);
     vulkan_single_commands_end(queue, commandBuffer);
 
     // @todo if we change behaviour according to the comment above we don't need this
@@ -1084,7 +1125,7 @@ void vulkan_index_buffer_create(
 
     // Copy buffer
     VkCommandBuffer commandBuffer;
-    vulkan_command_buffers_create(device, command_pool, &commandBuffer, 1);
+    gpuapi_command_buffer_create(device, command_pool, &commandBuffer, 1);
     vulkan_single_commands_begin(commandBuffer);
 
     VkBufferCopy copyRegion = {};
@@ -1101,7 +1142,7 @@ void vulkan_index_buffer_create(
 
 
 // @todo We also need a free function (unmap buffer)
-void vulkan_uniform_buffers_create(
+void gpuapi_uniform_buffers_create(
     VkDevice device, VkPhysicalDevice physical_device,
     VkBuffer* __restrict uniform_buffers, VkDeviceMemory* __restrict uniform_buffers_memory, void** __restrict uniform_buffers_mapped,
     size_t uniform_buffer_object_size,
@@ -1109,6 +1150,7 @@ void vulkan_uniform_buffers_create(
 )
 {
     // e.g. uniform_buffer_object_size = sizeof(struct {model; view; proj};)
+    // @question Do I really need one uniform_buffer per frames_in_flight? This seems VERY inefficient
     VkDeviceSize bufferSize = uniform_buffer_object_size;
     for (uint32 i = 0; i < frames_in_flight; ++i) {
         vulkan_buffer_create(
diff --git a/log/Log.h b/log/Log.h
index 3ece18d..a961c4e 100644
--- a/log/Log.h
+++ b/log/Log.h
@@ -182,17 +182,17 @@ void log(const char* str, const char* file, const char* function, int32 line)
 
 void log(const char* format, LogDataArray data, const char* file, const char* function, int32 line)
 {
-    ASSERT_SIMPLE(str_length(format) + str_length(file) + str_length(function) + 50 < MAX_LOG_LENGTH);
-
     if (!_log_memory) {
         return;
     }
 
-    if (data.data[0].type == LOG_DATA_VOID) {
+    if (data.data[0].type == LOG_DATA_VOID || data.data[0].type == LOG_DATA_NONE) {
         log(format, file, function, line);
         return;
     }
 
+    ASSERT_SIMPLE(str_length(format) + str_length(file) + str_length(function) + 50 < MAX_LOG_LENGTH);
+
     LogMessage* msg = (LogMessage *) log_get_memory();
     msg->file = file;
     msg->function = function;
@@ -205,7 +205,7 @@ void log(const char* format, LogDataArray data, const char* file, const char* fu
     str_copy_short(msg->message, format);
 
     for (int32 i = 0; i < LOG_DATA_ARRAY; ++i) {
-        if (data.data[i].type == LOG_DATA_VOID) {
+        if (data.data[i].type == LOG_DATA_VOID || data.data[i].type == LOG_DATA_NONE) {
             break;
         }
 
@@ -262,36 +262,20 @@ void log(const char* format, LogDataArray data, const char* file, const char* fu
 #define LOG_TO_FILE() log_to_file()
 
 #if LOG_LEVEL == 4
-    // Complete logging
-    #define LOG_1(str) log((str), __FILE__, __func__, __LINE__)
-    #define LOG_2(str) log((str), __FILE__, __func__, __LINE__)
-    #define LOG_3(str) log((str), __FILE__, __func__, __LINE__)
-    #define LOG_4(str) log((str), __FILE__, __func__, __LINE__)
+    #define LOG_1(format, ...) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
+    #define LOG_2(format, ...) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
+    #define LOG_3(format, ...) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
+    #define LOG_4(format, ...) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
 
-    #define LOG_FORMAT_1(format, ...) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
-    #define LOG_FORMAT_2(format, ...) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
-    #define LOG_FORMAT_3(format, ...) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
-    #define LOG_FORMAT_4(format, ...) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
+    #define LOG_TRUE_1(should_log, format, ...) if ((should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
+    #define LOG_TRUE_2(should_log, format, ...) if ((should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
+    #define LOG_TRUE_3(should_log, format, ...) if ((should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
+    #define LOG_TRUE_4(should_log, format, ...) if ((should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
 
-    #define LOG_TRUE_1(should_log, str) if ((should_log)) log((str), __FILE__, __func__, __LINE__)
-    #define LOG_TRUE_2(should_log, str) if ((should_log)) log((str), __FILE__, __func__, __LINE__)
-    #define LOG_TRUE_3(should_log, str) if ((should_log)) log((str), __FILE__, __func__, __LINE__)
-    #define LOG_TRUE_4(should_log, str) if ((should_log)) log((str), __FILE__, __func__, __LINE__)
-
-    #define LOG_FALSE_1(should_log, str) if (!(should_log)) log((str), __FILE__, __func__, __LINE__)
-    #define LOG_FALSE_2(should_log, str) if (!(should_log)) log((str), __FILE__, __func__, __LINE__)
-    #define LOG_FALSE_3(should_log, str) if (!(should_log)) log((str), __FILE__, __func__, __LINE__)
-    #define LOG_FALSE_4(should_log, str) if (!(should_log)) log((str), __FILE__, __func__, __LINE__)
-
-    #define LOG_FORMAT_TRUE_1(should_log, format, ...) if ((should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
-    #define LOG_FORMAT_TRUE_2(should_log, format, ...) if ((should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
-    #define LOG_FORMAT_TRUE_3(should_log, format, ...) if ((should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
-    #define LOG_FORMAT_TRUE_4(should_log, format, ...) if ((should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
-
-    #define LOG_FORMAT_FALSE_1(should_log, format, ...) if (!(should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
-    #define LOG_FORMAT_FALSE_2(should_log, format, ...) if (!(should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
-    #define LOG_FORMAT_FALSE_3(should_log, format, ...) if (!(should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
-    #define LOG_FORMAT_FALSE_4(should_log, format, ...) if (!(should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
+    #define LOG_FALSE_1(should_log, format, ...) if (!(should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
+    #define LOG_FALSE_2(should_log, format, ...) if (!(should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
+    #define LOG_FALSE_3(should_log, format, ...) if (!(should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
+    #define LOG_FALSE_4(should_log, format, ...) if (!(should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
 
     #define LOG_IF_1(expr, str_succeeded, str_failed) if ((expr)) { log((str_succeeded), __FILE__, __func__, __LINE__); } else { log((str_succeeded), __FILE__, __func__, __LINE__); }
     #define LOG_IF_2(expr, str_succeeded, str_failed) if ((expr)) { log((str_succeeded), __FILE__, __func__, __LINE__); } else { log((str_succeeded), __FILE__, __func__, __LINE__); }
@@ -301,7 +285,7 @@ void log(const char* format, LogDataArray data, const char* file, const char* fu
     #define LOG_CYCLE_START(var_name) uint64 var_name##_start_time = intrin_timestamp_counter()
     #define LOG_CYCLE_END(var_name, format) \
         uint64 var_name##_duration = (uint64) (intrin_timestamp_counter() - var_name##_start_time); \
-        LOG_FORMAT_1((format), {{LOG_DATA_UINT64, &var_name##_duration}})
+        LOG_1((format), {{LOG_DATA_UINT64, &var_name##_duration}})
 
     // Only intended for manual debugging
     // Of course a developer could always use printf but by providing this option,
@@ -315,35 +299,20 @@ void log(const char* format, LogDataArray data, const char* file, const char* fu
         compiler_debug_print((debug_str)); \
     })
 #elif LOG_LEVEL == 3
-    #define LOG_1(str) log((str), __FILE__, __func__, __LINE__)
-    #define LOG_2(str) log((str), __FILE__, __func__, __LINE__)
-    #define LOG_3(str) log((str), __FILE__, __func__, __LINE__)
-    #define LOG_4(str) ((void) 0)
+    #define LOG_1(format, ...) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
+    #define LOG_2(format, ...) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
+    #define LOG_3(format, ...) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
+    #define LOG_4(format, ...) ((void) 0)
 
-    #define LOG_FORMAT_1(format, ...) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
-    #define LOG_FORMAT_2(format, ...) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
-    #define LOG_FORMAT_3(format, ...) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
-    #define LOG_FORMAT_4(format, ...) ((void) 0)
+    #define LOG_TRUE_1(should_log, format, ...) if ((should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
+    #define LOG_TRUE_2(should_log, format, ...) if ((should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
+    #define LOG_TRUE_3(should_log, format, ...) if ((should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
+    #define LOG_TRUE_4(should_log, format, ...) ((void) 0)
 
-    #define LOG_TRUE_1(should_log, str) if ((should_log)) log((str), __FILE__, __func__, __LINE__)
-    #define LOG_TRUE_2(should_log, str) if ((should_log)) log((str), __FILE__, __func__, __LINE__)
-    #define LOG_TRUE_3(should_log, str) if ((should_log)) log((str), __FILE__, __func__, __LINE__)
-    #define LOG_TRUE_4(should_log, str) ((void) 0)
-
-    #define LOG_FALSE_1(should_log, str) if (!(should_log)) log((str), __FILE__, __func__, __LINE__)
-    #define LOG_FALSE_2(should_log, str) if (!(should_log)) log((str), __FILE__, __func__, __LINE__)
-    #define LOG_FALSE_3(should_log, str) if (!(should_log)) log((str), __FILE__, __func__, __LINE__)
-    #define LOG_FALSE_4(should_log, str) ((void) 0)
-
-    #define LOG_FORMAT_TRUE_1(should_log, format, ...) if ((should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
-    #define LOG_FORMAT_TRUE_2(should_log, format, ...) if ((should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
-    #define LOG_FORMAT_TRUE_3(should_log, format, ...) if ((should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
-    #define LOG_FORMAT_TRUE_4(should_log, format, ...) ((void) 0)
-
-    #define LOG_FORMAT_FALSE_1(should_log, format, ...) if (!(should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
-    #define LOG_FORMAT_FALSE_2(should_log, format, ...) if (!(should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
-    #define LOG_FORMAT_FALSE_3(should_log, format, ...) if (!(should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
-    #define LOG_FORMAT_FALSE_4(should_log, format, ...) ((void) 0)
+    #define LOG_FALSE_1(should_log, format, ...) if (!(should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
+    #define LOG_FALSE_2(should_log, format, ...) if (!(should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
+    #define LOG_FALSE_3(should_log, format, ...) if (!(should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
+    #define LOG_FALSE_4(should_log, format, ...) ((void) 0)
 
     #define LOG_IF_1(expr, str_succeeded, str_failed) if ((expr)) { log((str_succeeded), __FILE__, __func__, __LINE__); } else { log((str_succeeded), __FILE__, __func__, __LINE__); }
     #define LOG_IF_2(expr, str_succeeded, str_failed) if ((expr)) { log((str_succeeded), __FILE__, __func__, __LINE__); } else { log((str_succeeded), __FILE__, __func__, __LINE__); }
@@ -354,40 +323,25 @@ void log(const char* format, LogDataArray data, const char* file, const char* fu
     #define LOG_CYCLE_START(var_name) uint64 var_name##_start_time = intrin_timestamp_counter()
     #define LOG_CYCLE_END(var_name, format) \
         uint64 var_name##_duration = (uint64) (intrin_timestamp_counter() - var_name##_start_time); \
-        LOG_FORMAT_1((format), {{LOG_DATA_UINT64, &var_name##_duration}})
+        LOG_1((format), {{LOG_DATA_UINT64, &var_name##_duration}})
 
     #define DEBUG_VERBOSE(str) ((void) 0)
     #define DEBUG_FORMAT_VERBOSE(str, ...) ((void) 0)
 #elif LOG_LEVEL == 2
-    #define LOG_1(str) log((str), __FILE__, __func__, __LINE__)
-    #define LOG_2(str) log((str), __FILE__, __func__, __LINE__)
-    #define LOG_3(str) ((void) 0)
-    #define LOG_4(str) ((void) 0)
+    #define LOG_1(format, ...) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
+    #define LOG_2(format, ...) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
+    #define LOG_3(format, ...) ((void) 0)
+    #define LOG_4(format, ...) ((void) 0)
 
-    #define LOG_FORMAT_1(format, ...) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
-    #define LOG_FORMAT_2(format, ...) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
-    #define LOG_FORMAT_3(format, ...) ((void) 0)
-    #define LOG_FORMAT_4(format, ...) ((void) 0)
+    #define LOG_TRUE_1(should_log, format, ...) if ((should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
+    #define LOG_TRUE_2(should_log, format, ...) if ((should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
+    #define LOG_TRUE_3(should_log, format, ...) ((void) 0)
+    #define LOG_TRUE_4(should_log, format, ...) ((void) 0)
 
-    #define LOG_TRUE_1(should_log, str) if ((should_log)) log((str), __FILE__, __func__, __LINE__)
-    #define LOG_TRUE_2(should_log, str) if ((should_log)) log((str), __FILE__, __func__, __LINE__)
-    #define LOG_TRUE_3(should_log, str) ((void) 0)
-    #define LOG_TRUE_4(should_log, str) ((void) 0)
-
-    #define LOG_FALSE_1(should_log, str) if (!(should_log)) log((str), __FILE__, __func__, __LINE__)
-    #define LOG_FALSE_2(should_log, str) if (!(should_log)) log((str), __FILE__, __func__, __LINE__)
-    #define LOG_FALSE_3(should_log, str) ((void) 0)
-    #define LOG_FALSE_4(should_log, str) ((void) 0)
-
-    #define LOG_FORMAT_TRUE_1(should_log, format, ...) if ((should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
-    #define LOG_FORMAT_TRUE_2(should_log, format, ...) if ((should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
-    #define LOG_FORMAT_TRUE_3(should_log, format, ...) ((void) 0)
-    #define LOG_FORMAT_TRUE_4(should_log, format, ...) ((void) 0)
-
-    #define LOG_FORMAT_FALSE_1(should_log, format, ...) if (!(should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
-    #define LOG_FORMAT_FALSE_2(should_log, format, ...) if (!(should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
-    #define LOG_FORMAT_FALSE_3(should_log, format, ...) ((void) 0)
-    #define LOG_FORMAT_FALSE_4(should_log, format, ...) ((void) 0)
+    #define LOG_FALSE_1(should_log, format, ...) if (!(should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
+    #define LOG_FALSE_2(should_log, format, ...) if (!(should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
+    #define LOG_FALSE_3(should_log, format, ...) ((void) 0)
+    #define LOG_FALSE_4(should_log, format, ...) ((void) 0)
 
     #define LOG_IF_1(expr, str_succeeded, str_failed) if ((expr)) { log((str_succeeded), __FILE__, __func__, __LINE__); } else { log((str_succeeded), __FILE__, __func__, __LINE__); }
     #define LOG_IF_2(expr, str_succeeded, str_failed) if ((expr)) { log((str_succeeded), __FILE__, __func__, __LINE__); } else { log((str_succeeded), __FILE__, __func__, __LINE__); }
@@ -398,40 +352,25 @@ void log(const char* format, LogDataArray data, const char* file, const char* fu
     #define LOG_CYCLE_START(var_name) uint64 var_name##_start_time = intrin_timestamp_counter()
     #define LOG_CYCLE_END(var_name, format) \
         uint64 var_name##_duration = (uint64) (intrin_timestamp_counter() - var_name##_start_time); \
-        LOG_FORMAT_1((format), {{LOG_DATA_UINT64, &var_name##_duration}})
+        LOG_1((format), {{LOG_DATA_UINT64, &var_name##_duration}})
 
     #define DEBUG_VERBOSE(str) ((void) 0)
     #define DEBUG_FORMAT_VERBOSE(str, ...) ((void) 0)
 #elif LOG_LEVEL == 1
-    #define LOG_1(str) log((str), __FILE__, __func__, __LINE__)
-    #define LOG_2(str) ((void) 0)
-    #define LOG_3(str) ((void) 0)
-    #define LOG_4(str) ((void) 0)
+    #define LOG_1(format, ...) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
+    #define LOG_2(format, ...) ((void) 0)
+    #define LOG_3(format, ...) ((void) 0)
+    #define LOG_4(format, ...) ((void) 0)
 
-    #define LOG_FORMAT_1(format, ...) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
-    #define LOG_FORMAT_2(format, ...) ((void) 0)
-    #define LOG_FORMAT_3(format, ...) ((void) 0)
-    #define LOG_FORMAT_4(format, ...) ((void) 0)
+    #define LOG_TRUE_1(should_log, format, ...) if ((should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
+    #define LOG_TRUE_2(should_log, format, ...) ((void) 0)
+    #define LOG_TRUE_3(should_log, format, ...) ((void) 0)
+    #define LOG_TRUE_4(should_log, format, ...) ((void) 0)
 
-    #define LOG_TRUE_1(should_log, str) if ((should_log)) log((str), __FILE__, __func__, __LINE__)
-    #define LOG_TRUE_2(should_log, str) ((void) 0)
-    #define LOG_TRUE_3(should_log, str) ((void) 0)
-    #define LOG_TRUE_4(should_log, str) ((void) 0)
-
-    #define LOG_FALSE_1(should_log, str) if (!(should_log)) log((str), __FILE__, __func__, __LINE__)
-    #define LOG_FALSE_2(should_log, str) ((void) 0)
-    #define LOG_FALSE_3(should_log, str) ((void) 0)
-    #define LOG_FALSE_4(should_log, str) ((void) 0)
-
-    #define LOG_FORMAT_TRUE_1(should_log, format, ...) if ((should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
-    #define LOG_FORMAT_TRUE_2(should_log, format, ...) ((void) 0)
-    #define LOG_FORMAT_TRUE_3(should_log, format, ...) ((void) 0)
-    #define LOG_FORMAT_TRUE_4(should_log, format, ...) ((void) 0)
-
-    #define LOG_FORMAT_FALSE_1(should_log, format, ...) if (!(should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
-    #define LOG_FORMAT_FALSE_2(should_log, format, ...) ((void) 0)
-    #define LOG_FORMAT_FALSE_3(should_log, format, ...) ((void) 0)
-    #define LOG_FORMAT_FALSE_4(should_log, format, ...) ((void) 0)
+    #define LOG_FALSE_1(should_log, format, ...) if (!(should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
+    #define LOG_FALSE_2(should_log, format, ...) ((void) 0)
+    #define LOG_FALSE_3(should_log, format, ...) ((void) 0)
+    #define LOG_FALSE_4(should_log, format, ...) ((void) 0)
 
     #define LOG_IF_1(expr, str_succeeded, str_failed) if ((expr)) { log((str_succeeded), __FILE__, __func__, __LINE__); } else { log((str_succeeded), __FILE__, __func__, __LINE__); }
     // Only logs on failure
@@ -445,36 +384,20 @@ void log(const char* format, LogDataArray data, const char* file, const char* fu
     #define DEBUG_VERBOSE(str) ((void) 0)
     #define DEBUG_FORMAT_VERBOSE(str, ...) ((void) 0)
 #elif LOG_LEVEL == 0
-    // No logging whatsoever
-    #define LOG_1(str) ((void) 0)
-    #define LOG_2(str) ((void) 0)
-    #define LOG_3(str) ((void) 0)
-    #define LOG_4(str) ((void) 0)
+    #define LOG_1(format, ...) ((void) 0)
+    #define LOG_2(format, ...) ((void) 0)
+    #define LOG_3(format, ...) ((void) 0)
+    #define LOG_4(format, ...) ((void) 0)
 
-    #define LOG_FORMAT_1(format, ...) ((void) 0)
-    #define LOG_FORMAT_2(format, ...) ((void) 0)
-    #define LOG_FORMAT_3(format, ...) ((void) 0)
-    #define LOG_FORMAT_4(format, ...) ((void) 0)
+    #define LOG_TRUE_1(should_log, format, ...) ((void) 0)
+    #define LOG_TRUE_2(should_log, format, ...) ((void) 0)
+    #define LOG_TRUE_3(should_log, format, ...) ((void) 0)
+    #define LOG_TRUE_4(should_log, format, ...) ((void) 0)
 
-    #define LOG_TRUE_1(should_log, str) ((void) 0)
-    #define LOG_TRUE_2(should_log, str) ((void) 0)
-    #define LOG_TRUE_3(should_log, str) ((void) 0)
-    #define LOG_TRUE_4(should_log, str) ((void) 0)
-
-    #define LOG_FALSE_1(should_log, str) ((void) 0)
-    #define LOG_FALSE_2(should_log, str) ((void) 0)
-    #define LOG_FALSE_3(should_log, str) ((void) 0)
-    #define LOG_FALSE_4(should_log, str) ((void) 0)
-
-    #define LOG_FORMAT_TRUE_1(should_log, format, ...) ((void) 0)
-    #define LOG_FORMAT_TRUE_2(should_log, format, ...) ((void) 0)
-    #define LOG_FORMAT_TRUE_3(should_log, format, ...) ((void) 0)
-    #define LOG_FORMAT_TRUE_4(should_log, format, ...) ((void) 0)
-
-    #define LOG_FORMAT_FALSE_1(should_log, format, ...) ((void) 0)
-    #define LOG_FORMAT_FALSE_2(should_log, format, ...) ((void) 0)
-    #define LOG_FORMAT_FALSE_3(should_log, format, ...) ((void) 0)
-    #define LOG_FORMAT_FALSE_4(should_log, format, ...) ((void) 0)
+    #define LOG_FALSE_1(should_log, format, ...) ((void) 0)
+    #define LOG_FALSE_2(should_log, format, ...) ((void) 0)
+    #define LOG_FALSE_3(should_log, format, ...) ((void) 0)
+    #define LOG_FALSE_4(should_log, format, ...) ((void) 0)
 
     #define LOG_IF_1(expr, str_succeeded, str_failed) ((void) 0)
     #define LOG_IF_2(expr, str_succeeded, str_failed) ((void) 0)
diff --git a/log/PerformanceProfiler.h b/log/PerformanceProfiler.h
index 914b13f..2b4748f 100644
--- a/log/PerformanceProfiler.h
+++ b/log/PerformanceProfiler.h
@@ -152,7 +152,7 @@ struct PerformanceProfiler {
 
         if (this->auto_log) {
             if (this->info_msg && this->info_msg[0]) {
-                LOG_FORMAT_2(
+                LOG_2(
                     "-PERF %s (%s): %l cycles",
                     {
                         {LOG_DATA_CHAR_STR, (void *) perf->name},
@@ -161,7 +161,7 @@ struct PerformanceProfiler {
                     }
                 );
             } else {
-                LOG_FORMAT_2(
+                LOG_2(
                     "-PERF %s: %l cycles",
                     {
                         {LOG_DATA_CHAR_STR, (void *) perf->name},
@@ -210,7 +210,7 @@ void performance_profiler_end(int32 id) noexcept
     #define PROFILE_SCOPE(id, name) PerformanceProfiler __profile_scope_##__func__##_##__LINE__((id), (name))
     #define PROFILE_RESET(id) if(_perf_active && *_perf_active) performance_profiler_reset((id))
 #else
-    #define PROFILE(id) ((void) 0)
+    #define PROFILE(id, ...) ((void) 0)
 
     #define PROFILE_START(id, name) ((void) 0)
     #define PROFILE_END(id) ((void) 0)
diff --git a/memory/BufferMemory.h b/memory/BufferMemory.h
index 6882be0..92b4fe2 100644
--- a/memory/BufferMemory.h
+++ b/memory/BufferMemory.h
@@ -15,6 +15,7 @@
 #include "../utils/TestUtils.h"
 #include "../log/Log.h"
 #include "../log/Stats.h"
+#include "../log/PerformanceProfiler.h"
 #include "../log/DebugMemory.h"
 #include "../system/Allocator.h"
 
@@ -35,7 +36,7 @@ void buffer_alloc(BufferMemory* buf, uint64 size, int32 alignment = 64)
 {
     ASSERT_SIMPLE(size);
     PROFILE(PROFILE_BUFFER_ALLOC, NULL, false, true);
-    LOG_FORMAT_1("Allocating BufferMemory: %n B", {{LOG_DATA_UINT64, &size}});
+    LOG_1("Allocating BufferMemory: %n B", {{LOG_DATA_UINT64, &size}});
 
     buf->memory = alignment < 2
         ? (byte *) platform_alloc(size)
diff --git a/memory/ChunkMemory.h b/memory/ChunkMemory.h
index 6f8441b..6d0f611 100644
--- a/memory/ChunkMemory.h
+++ b/memory/ChunkMemory.h
@@ -17,6 +17,7 @@
 #include "../compiler/CompilerUtils.h"
 #include "../log/Log.h"
 #include "../log/Stats.h"
+#include "../log/PerformanceProfiler.h"
 #include "../log/DebugMemory.h"
 #include "BufferMemory.h"
 #include "../system/Allocator.h"
@@ -63,7 +64,7 @@ void chunk_alloc(ChunkMemory* buf, uint32 count, uint32 chunk_size, int32 alignm
 
     memset(buf->memory, 0, buf->size);
 
-    LOG_FORMAT_1("Allocated ChunkMemory: %n B", {{LOG_DATA_UINT64, &buf->size}});
+    LOG_1("Allocated ChunkMemory: %n B", {{LOG_DATA_UINT64, &buf->size}});
 }
 
 inline
@@ -327,7 +328,7 @@ int64 chunk_dump(const ChunkMemory* buf, byte* data)
     memcpy(data, buf->memory, buf->size);
     data += buf->size;
 
-    LOG_FORMAT_1("Dumped ChunkMemory: %n B", {{LOG_DATA_UINT64, (void *) &buf->size}});
+    LOG_1("Dumped ChunkMemory: %n B", {{LOG_DATA_UINT64, (void *) &buf->size}});
 
     return data - start;
 }
@@ -362,7 +363,7 @@ int64 chunk_load(ChunkMemory* buf, const byte* data)
 
     buf->free = (uint64 *) (buf->memory + buf->count * buf->chunk_size);
 
-    LOG_FORMAT_1("Loaded ChunkMemory: %n B", {{LOG_DATA_UINT64, &buf->size}});
+    LOG_1("Loaded ChunkMemory: %n B", {{LOG_DATA_UINT64, &buf->size}});
 
     return buf->size;
 }
diff --git a/memory/RingMemory.h b/memory/RingMemory.h
index 997ff67..2377b87 100644
--- a/memory/RingMemory.h
+++ b/memory/RingMemory.h
@@ -19,6 +19,7 @@
 #include "BufferMemory.h"
 #include "../log/Log.h"
 #include "../log/Stats.h"
+#include "../log/PerformanceProfiler.h"
 #include "../log/DebugMemory.h"
 #include "../thread/Atomic.h"
 #include "../thread/Semaphore.h"
@@ -48,7 +49,7 @@ void ring_alloc(RingMemory* ring, uint64 size, uint32 alignment = 64)
 {
     ASSERT_SIMPLE(size);
     PROFILE(PROFILE_RING_ALLOC, NULL, false, true);
-    LOG_FORMAT_1("Allocating RingMemory: %n B", {{LOG_DATA_UINT64, &size}});
+    LOG_1("Allocating RingMemory: %n B", {{LOG_DATA_UINT64, &size}});
 
     ring->memory = alignment < 2
         ? (byte *) platform_alloc(size)
@@ -62,7 +63,7 @@ void ring_alloc(RingMemory* ring, uint64 size, uint32 alignment = 64)
 
     memset(ring->memory, 0, ring->size);
 
-    LOG_FORMAT_1("Allocated RingMemory: %n B", {{LOG_DATA_UINT64, &ring->size}});
+    LOG_1("Allocated RingMemory: %n B", {{LOG_DATA_UINT64, &ring->size}});
 }
 
 inline
diff --git a/platform/win32/ExceptionHandler.h b/platform/win32/ExceptionHandler.h
index 1edab80..7c7a23c 100644
--- a/platform/win32/ExceptionHandler.h
+++ b/platform/win32/ExceptionHandler.h
@@ -88,9 +88,9 @@ void log_stack_trace(CONTEXT *context) {
         symbol->MaxNameLen = MAX_SYM_NAME;
 
         if (SymFromAddr(process, address, NULL, symbol)) {
-            LOG_FORMAT_1("Function: %s - Address: %l", {{LOG_DATA_CHAR_STR, symbol->Name}, {LOG_DATA_INT64, &symbol->Address}});
+            LOG_1("Function: %s - Address: %l", {{LOG_DATA_CHAR_STR, symbol->Name}, {LOG_DATA_INT64, &symbol->Address}});
         } else {
-            LOG_FORMAT_1("Function: (unknown) - Address: %l", {{LOG_DATA_INT64, &address}});
+            LOG_1("Function: (unknown) - Address: %l", {{LOG_DATA_INT64, &address}});
         }
 
         // Resolve file and line number
@@ -99,7 +99,7 @@ void log_stack_trace(CONTEXT *context) {
         line.SizeOfStruct = sizeof(IMAGEHLP_LINE64);
 
         if (SymGetLineFromAddr64(process, address, &displacement, &line)) {
-            LOG_FORMAT_1("    File: %s, Line: %l", {{LOG_DATA_CHAR_STR, line.FileName}, {LOG_DATA_INT64, &line.LineNumber}});
+            LOG_1("    File: %s, Line: %l", {{LOG_DATA_CHAR_STR, line.FileName}, {LOG_DATA_INT64, &line.LineNumber}});
         } else {
             LOG_1("    File: (unknown), Line: (unknown)");
         }
@@ -108,7 +108,7 @@ void log_stack_trace(CONTEXT *context) {
         IMAGEHLP_MODULE64 module_info;
         module_info.SizeOfStruct = sizeof(IMAGEHLP_MODULE64);
         if (SymGetModuleInfo64(process, address, &module_info)) {
-            LOG_FORMAT_1("    Module: %s", {{LOG_DATA_CHAR_STR, module_info.ModuleName}});
+            LOG_1("    Module: %s", {{LOG_DATA_CHAR_STR, module_info.ModuleName}});
         } else {
             LOG_1("    Module: (unknown)");
         }
diff --git a/platform/win32/threading/Semaphore.h b/platform/win32/threading/Semaphore.h
index 4846a38..0ee0fd1 100644
--- a/platform/win32/threading/Semaphore.h
+++ b/platform/win32/threading/Semaphore.h
@@ -14,30 +14,36 @@
 
 typedef HANDLE sem_t;
 
+inline
 void sem_init(sem_t* semaphore, int32 value)
 {
     *semaphore = CreateSemaphore(NULL, value, MAX_UINT32, NULL);
 }
 
+inline
 void sem_destroy(sem_t* semaphore)
 {
     CloseHandle(*semaphore);
 }
 
 // decrement if != 0, if = 0 wait
+inline
 void sem_wait(sem_t* semaphore) {
     WaitForSingleObject(*semaphore, INFINITE);
 }
 
+inline
 int32 sem_timedwait(sem_t* semaphore, uint64 ms) {
     return (int32) WaitForSingleObject(*semaphore, (DWORD) ms);
 }
 
+inline
 int32 sem_trywait(sem_t* semaphore) {
     return (int32) WaitForSingleObject(*semaphore, 0);
 }
 
 // increment
+inline
 void sem_post(sem_t* semaphore) {
     ReleaseSemaphore(*semaphore, 1, NULL);
 }
diff --git a/platform/win32/threading/Thread.h b/platform/win32/threading/Thread.h
index 93a0071..feba623 100644
--- a/platform/win32/threading/Thread.h
+++ b/platform/win32/threading/Thread.h
@@ -94,6 +94,7 @@ int32 pthread_mutex_unlock(pthread_mutex_t* mutex)
     return 0;
 }
 
+// WARNING: We don't support windows events since they are much slower than conditional variables/mutexes
 inline
 int32 pthread_cond_init(pthread_cond_t* cond, pthread_condattr_t*)
 {
diff --git a/stdlib/HashMap.h b/stdlib/HashMap.h
index 6c9212b..778abf7 100644
--- a/stdlib/HashMap.h
+++ b/stdlib/HashMap.h
@@ -124,7 +124,7 @@ struct HashMap {
 inline
 void hashmap_alloc(HashMap* hm, int32 count, int32 element_size)
 {
-    LOG_FORMAT_1("Allocate HashMap for %n elements with %n B per element", {{LOG_DATA_INT32, &count}, {LOG_DATA_INT32, &element_size}});
+    LOG_1("Allocate HashMap for %n elements with %n B per element", {{LOG_DATA_INT32, &count}, {LOG_DATA_INT32, &element_size}});
     byte* data = (byte *) platform_alloc(
         count * (sizeof(uint16) + element_size)
         + CEIL_DIV(count, 64) * sizeof(hm->buf.free)
@@ -148,7 +148,7 @@ void hashmap_free(HashMap* hm)
 inline
 void hashmap_create(HashMap* hm, int32 count, int32 element_size, RingMemory* ring) noexcept
 {
-    LOG_FORMAT_1("Create HashMap for %n elements with %n B per element", {{LOG_DATA_INT32, &count}, {LOG_DATA_INT32, &element_size}});
+    LOG_1("Create HashMap for %n elements with %n B per element", {{LOG_DATA_INT32, &count}, {LOG_DATA_INT32, &element_size}});
     byte* data = ring_get_memory(
         ring,
         count * (sizeof(uint16) + element_size)
@@ -163,7 +163,7 @@ void hashmap_create(HashMap* hm, int32 count, int32 element_size, RingMemory* ri
 inline
 void hashmap_create(HashMap* hm, int32 count, int32 element_size, BufferMemory* buf) noexcept
 {
-    LOG_FORMAT_1("Create HashMap for %n elements with %n B per element", {{LOG_DATA_INT32, &count}, {LOG_DATA_INT32, &element_size}});
+    LOG_1("Create HashMap for %n elements with %n B per element", {{LOG_DATA_INT32, &count}, {LOG_DATA_INT32, &element_size}});
     byte* data = buffer_get_memory(
         buf,
         count * (sizeof(uint16) + element_size)
@@ -178,7 +178,7 @@ void hashmap_create(HashMap* hm, int32 count, int32 element_size, BufferMemory*
 inline
 void hashmap_create(HashMap* hm, int32 count, int32 element_size, byte* buf) noexcept
 {
-    LOG_FORMAT_1("Create HashMap for %n elements with %n B per element", {{LOG_DATA_INT32, &count}, {LOG_DATA_INT32, &element_size}});
+    LOG_1("Create HashMap for %n elements with %n B per element", {{LOG_DATA_INT32, &count}, {LOG_DATA_INT32, &element_size}});
     hm->table = (uint16 *) buf;
     chunk_init(&hm->buf, buf + sizeof(uint16) * count, count, element_size, 8);
 }
@@ -797,7 +797,7 @@ int64 hashmap_dump(const HashMap* hm, byte* data, [[maybe_unused]] int32 steps =
     // dump free array
     memcpy(data, hm->buf.free, sizeof(uint64) * CEIL_DIV(hm->buf.count, 64));
 
-    LOG_FORMAT_1("Dumped HashMap: %n B", {{LOG_DATA_UINT64, (void *) &hm->buf.size}});
+    LOG_1("Dumped HashMap: %n B", {{LOG_DATA_UINT64, (void *) &hm->buf.size}});
 
     return sizeof(hm->buf.count) // hash map count = buffer count
         + hm->buf.count * sizeof(uint16) // table content
@@ -851,7 +851,7 @@ int64 hashmap_load(HashMap* hm, const byte* data, [[maybe_unused]] int32 steps =
         }
     } chunk_iterate_end;
 
-    LOG_FORMAT_1("Loaded HashMap: %n B", {{LOG_DATA_UINT64, &hm->buf.size}});
+    LOG_1("Loaded HashMap: %n B", {{LOG_DATA_UINT64, &hm->buf.size}});
 
     // How many bytes was read from data
     return sizeof(hm->buf.count) // hash map count = buffer count
diff --git a/stdlib/PerfectHashMap.h b/stdlib/PerfectHashMap.h
index 5276315..56bba62 100644
--- a/stdlib/PerfectHashMap.h
+++ b/stdlib/PerfectHashMap.h
@@ -111,7 +111,7 @@ PerfectHashMap* perfect_hashmap_prepare(PerfectHashMap* hm, const char** keys, i
 // WARNING: element_size = element size + remaining HashEntry data size
 void perfect_hashmap_create(PerfectHashMap* hm, int32 count, int32 element_size, BufferMemory* buf)
 {
-    LOG_FORMAT_1("Create PerfectHashMap for %n elements with %n B per element", {{LOG_DATA_INT32, &count}, {LOG_DATA_INT32, &element_size}});
+    LOG_1("Create PerfectHashMap for %n elements with %n B per element", {{LOG_DATA_INT32, &count}, {LOG_DATA_INT32, &element_size}});
     hm->map_size = count;
     hm->entry_size = element_size;
     hm->hash_entries = buffer_get_memory(
@@ -124,7 +124,7 @@ void perfect_hashmap_create(PerfectHashMap* hm, int32 count, int32 element_size,
 // WARNING: element_size = element size + remaining HashEntry data size
 void perfect_hashmap_create(PerfectHashMap* hm, int32 count, int32 element_size, byte* buf)
 {
-    LOG_FORMAT_1("Create PerfectHashMap for %n elements with %n B per element", {{LOG_DATA_INT32, &count}, {LOG_DATA_INT32, &element_size}});
+    LOG_1("Create PerfectHashMap for %n elements with %n B per element", {{LOG_DATA_INT32, &count}, {LOG_DATA_INT32, &element_size}});
     hm->map_size = count;
     hm->entry_size = element_size;
     hm->hash_entries = buf;
diff --git a/tests/MainTest.cpp b/tests/MainTest.cpp
index 2e3c31e..5037932 100644
--- a/tests/MainTest.cpp
+++ b/tests/MainTest.cpp
@@ -9,6 +9,7 @@
 #include "utils/BitUtilsTest.cpp"
 #include "utils/EndianUtilsTest.cpp"
 #include "utils/StringUtilsTest.cpp"
+#include "utils/MathUtilsTest.cpp"
 #include "utils/UtilsTest.cpp"
 
 #ifdef UBER_TEST
@@ -18,8 +19,7 @@
 #endif
 
 int main() {
-    printf("\nStat Tests   Asserts         Details\n");
-    printf("========================================================================================================================\n");
+    TEST_HEADER();
 
     MathEvaluatorTest();
     MemoryChunkMemoryTest();
@@ -29,16 +29,10 @@ int main() {
     UIUIThemeTest();
     UtilsBitUtilsTest();
     UtilsStringUtilsTest();
+    UtilsMathUtilsTest();
     UtilsUtilsTest();
 
-    printf("========================================================================================================================\n");
-    printf(
-        "%s %5d   (%5d/%5d)\n\n",
-        _test_global_assert_count ? "[NG]" : "[OK]",
-        _test_global_count,
-        _test_global_assert_count - _test_global_assert_error_count,
-        _test_global_assert_count
-    );
+    TEST_FOOTER();
 
     return _test_global_assert_error_count ? 1 : 0;
 }
\ No newline at end of file
diff --git a/tests/TestFramework.h b/tests/TestFramework.h
index eb045f8..7c42451 100644
--- a/tests/TestFramework.h
+++ b/tests/TestFramework.h
@@ -22,6 +22,33 @@ static int32_t _test_global_assert_count = 0;
 static int32_t _test_global_assert_error_count = 0;
 static int32_t _test_global_count = 0;
 
+static int64_t _test_start;
+
+#define TEST_PROFILING_LOOPS 1000
+
+#define TEST_HEADER()                                             \
+    int64_t _test_total_start = test_start_time();                \
+    printf("\nStat Tests   Assert(OK/NG)   Time(ms)  Details\n"); \
+    printf("========================================================================================================================\n")
+
+#define TEST_FOOTER()                                                                                                                     \
+    printf("========================================================================================================================\n"); \
+    printf(                                                                                                                               \
+        "%s %5d   (%5d/%5d)   %8.0f\n\n",                                                                                                 \
+        _test_global_assert_count ? "[NG]" : "[OK]",                                                                                      \
+        _test_global_count,                                                                                                               \
+        _test_global_assert_count - _test_global_assert_error_count,                                                                      \
+        _test_global_assert_count,                                                                                                        \
+        test_duration_time(_test_total_start) / 1000000)
+
+#ifdef UBER_TEST
+#define TEST_INIT_HEADER() (void)0
+#define TEST_FINALIZE_FOOTER() (void)0
+#else
+#define TEST_INIT_HEADER() TEST_HEADER()
+#define TEST_FINALIZE_FOOTER() TEST_FOOTER()
+#endif
+
 #if _WIN32
 #include "../platform/win32/ExceptionHandler.h"
 #include <windows.h>
@@ -33,12 +60,32 @@ LONG WINAPI test_exception_handler(EXCEPTION_POINTERS *exception_info)
     return EXCEPTION_EXECUTE_HANDLER;
 }
 
-double test_measure_func_time_ns(void (*func)(void *), void *para)
+int64_t test_start_time()
+{
+    LARGE_INTEGER start;
+    QueryPerformanceCounter(&start);
+
+    return start.QuadPart;
+}
+
+double test_duration_time(int64_t start)
+{
+    LARGE_INTEGER frequency, end;
+    QueryPerformanceFrequency(&frequency);
+    QueryPerformanceCounter(&end);
+
+    return (double)(end.QuadPart - start) * 1e9 / frequency.QuadPart;
+}
+
+double test_measure_func_time_ns(void (*func)(volatile void *), volatile void *para)
 {
     LARGE_INTEGER frequency, start, end;
     QueryPerformanceFrequency(&frequency);
     QueryPerformanceCounter(&start);
-    func(para);
+    for (int32_t i = 0; i < TEST_PROFILING_LOOPS; ++i)
+    {
+        func(para);
+    }
     QueryPerformanceCounter(&end);
     return (double)(end.QuadPart - start.QuadPart) * 1e9 / frequency.QuadPart;
 }
@@ -46,8 +93,10 @@ double test_measure_func_time_ns(void (*func)(void *), void *para)
 #define TEST_INIT(test_count)                                                                  \
     do                                                                                         \
     {                                                                                          \
+        TEST_INIT_HEADER();                                                                    \
         setvbuf(stdout, NULL, _IONBF, 0);                                                      \
         SetUnhandledExceptionFilter(test_exception_handler);                                   \
+        _test_start = test_start_time();                                                       \
         _test_assert_error_count = 0;                                                          \
         _test_count = 0;                                                                       \
         _test_assert_count = 0;                                                                \
@@ -69,12 +118,32 @@ void test_exception_handler(int signum)
     exit(1);
 }
 
-#include <time.h>
-double test_measure_func_time_ns(void (*func)(void *), void *para)
+int64_t test_start_time()
 {
     struct timespec start, end;
     clock_gettime(CLOCK_MONOTONIC, &start);
-    func(para);
+
+    return start.tv_sec * 1e9 + start.tv_nsec;
+}
+
+double test_duration_time(int64_t start)
+{
+    LARGE_INTEGER frequency, end;
+    QueryPerformanceFrequency(&frequency);
+    QueryPerformanceCounter(&end);
+
+    return (double)(end.tv_sec * 1e9 + end.tv_nsec - start);
+}
+
+#include <time.h>
+double test_measure_func_time_ns(void (*func)(volatile void *), volatile void *para)
+{
+    struct timespec start, end;
+    clock_gettime(CLOCK_MONOTONIC, &start);
+    for (int32_t i = 0; i < TEST_PROFILING_LOOPS; ++i)
+    {
+        func(para);
+    }
     clock_gettime(CLOCK_MONOTONIC, &end);
     return (double)(end.tv_sec * 1e9 + end.tv_nsec) - (double)(start.tv_sec * 1e9 + start.tv_nsec);
 }
@@ -82,9 +151,11 @@ double test_measure_func_time_ns(void (*func)(void *), void *para)
 #define TEST_INIT(test_count)                                                                  \
     do                                                                                         \
     {                                                                                          \
+        TEST_INIT_HEADER();                                                                    \
         setvbuf(stdout, NULL, _IONBF, 0);                                                      \
         signal(SIGSEGV, test_exception_handler);                                               \
         signal(SIGABRT, test_exception_handler);                                               \
+        _test_start = test_start_time();                                                       \
         _test_assert_error_count = 0;                                                          \
         _test_count = 0;                                                                       \
         _test_assert_count = 0;                                                                \
@@ -99,35 +170,36 @@ double test_measure_func_time_ns(void (*func)(void *), void *para)
     } while (0)
 #endif
 
-#define TEST_FINALIZE()                                                                                    \
-    do                                                                                                     \
-    {                                                                                                      \
-        if (_test_assert_error_count)                                                                      \
-        {                                                                                                  \
-            printf(                                                                                        \
-                "[NG] %5d   (%5d/%5d)   %s\n",                                                             \
-                _test_count, _test_assert_count - _test_assert_error_count, _test_assert_count, __FILE__); \
-            for (int i = 0; i < _test_assert_error_count; ++i)                                             \
-            {                                                                                              \
-                printf("                               %s\n", _test_log[i]);                               \
-                fflush(stdout);                                                                            \
-            }                                                                                              \
-        }                                                                                                  \
-        else                                                                                               \
-        {                                                                                                  \
-            printf(                                                                                        \
-                "[OK] %5d   (%5d/%5d)   %s\n",                                                             \
-                _test_count, _test_assert_count - _test_assert_error_count, _test_assert_count, __FILE__); \
-        }                                                                                                  \
-        fflush(stdout);                                                                                    \
-        free(_test_log);                                                                                   \
-        _test_log = NULL;                                                                                  \
-        _test_assert_error_count = 0;                                                                      \
-        _test_count = 0;                                                                                   \
-        _test_assert_count = 0;                                                                            \
+#define TEST_FINALIZE()                                                                                                                               \
+    do                                                                                                                                                \
+    {                                                                                                                                                 \
+        if (_test_assert_error_count)                                                                                                                 \
+        {                                                                                                                                             \
+            printf(                                                                                                                                   \
+                "[NG] %5d   (%5d/%5d)   %8.0f   %s\n",                                                                                                \
+                _test_count, _test_assert_count - _test_assert_error_count, _test_assert_count, test_duration_time(_test_start) / 1000000, __FILE__); \
+            for (int i = 0; i < _test_assert_error_count; ++i)                                                                                        \
+            {                                                                                                                                         \
+                printf("                                            %s\n", _test_log[i]);                                                             \
+                fflush(stdout);                                                                                                                       \
+            }                                                                                                                                         \
+        }                                                                                                                                             \
+        else                                                                                                                                          \
+        {                                                                                                                                             \
+            printf(                                                                                                                                   \
+                "[OK] %5d   (%5d/%5d)   %8.0f   %s\n",                                                                                                \
+                _test_count, _test_assert_count - _test_assert_error_count, _test_assert_count, test_duration_time(_test_start) / 1000000, __FILE__); \
+        }                                                                                                                                             \
+        fflush(stdout);                                                                                                                               \
+        free(_test_log);                                                                                                                              \
+        _test_log = NULL;                                                                                                                             \
+        _test_assert_error_count = 0;                                                                                                                 \
+        _test_count = 0;                                                                                                                              \
+        _test_assert_count = 0;                                                                                                                       \
+        TEST_FINALIZE_FOOTER();                                                                                                                       \
     } while (0)
 
-#define RUN_TEST(func)    \
+#define TEST_RUN(func)    \
     ++_test_count;        \
     ++_test_global_count; \
     func()
@@ -274,13 +346,19 @@ double test_measure_func_time_ns(void (*func)(void *), void *para)
                                                                                                                   \
         /* Measure func1 */                                                                                       \
         start = intrin_timestamp_counter();                                                                       \
-        func1((void *)&a);                                                                                        \
+        for (int32_t i = 0; i < TEST_PROFILING_LOOPS; ++i)                                                        \
+        {                                                                                                         \
+            func1((volatile void *)&a);                                                                           \
+        }                                                                                                         \
         end = intrin_timestamp_counter();                                                                         \
         cycles_func1 = end - start;                                                                               \
                                                                                                                   \
         /* Measure func2 */                                                                                       \
         start = intrin_timestamp_counter();                                                                       \
-        func2((void *)&b);                                                                                        \
+        for (int32_t i = 0; i < TEST_PROFILING_LOOPS; ++i)                                                        \
+        {                                                                                                         \
+            func2((volatile void *)&b);                                                                           \
+        }                                                                                                         \
         end = intrin_timestamp_counter();                                                                         \
         cycles_func2 = end - start;                                                                               \
                                                                                                                   \
@@ -296,7 +374,7 @@ double test_measure_func_time_ns(void (*func)(void *), void *para)
                 "%4i: %.2f%% (%s: %llu cycles, %s: %llu cycles)",                                                 \
                 __LINE__, percent_diff + 100.0f, #func1, (uint64_t)cycles_func1, #func2, (uint64_t)cycles_func2); \
         }                                                                                                         \
-        ASSERT_EQUALS(a, b);                                                                                      \
+        ASSERT_TRUE((a && b) || a == b);                                                                          \
     } while (0)
 
 #define ASSERT_FUNCTION_TEST_CYCLE(func, cycles)                                \
@@ -310,7 +388,10 @@ double test_measure_func_time_ns(void (*func)(void *), void *para)
                                                                                 \
         /* Measure func */                                                      \
         start = intrin_timestamp_counter();                                     \
-        func((void *)&para);                                                    \
+        for (int32_t i = 0; i < TEST_PROFILING_LOOPS; ++i)                      \
+        {                                                                       \
+            func((volatile void *)&para);                                       \
+        }                                                                       \
         end = intrin_timestamp_counter();                                       \
         cycles_func = end - start;                                              \
                                                                                 \
@@ -333,10 +414,10 @@ double test_measure_func_time_ns(void (*func)(void *), void *para)
         int64_t a = 0, b = 0;                                                             \
                                                                                           \
         /* Measure func1 */                                                               \
-        time_func1 = test_measure_func_time_ns(func1, (void *)&a);                        \
+        time_func1 = test_measure_func_time_ns(func1, (volatile void *)&a);               \
                                                                                           \
         /* Measure func2 */                                                               \
-        time_func2 = test_measure_func_time_ns(func2, (void *)&b);                        \
+        time_func2 = test_measure_func_time_ns(func2, (volatile void *)&b);               \
                                                                                           \
         /* Calculate percentage difference */                                             \
         double percent_diff = 100.0 * (time_func1 - time_func2) / time_func2;             \
@@ -347,31 +428,10 @@ double test_measure_func_time_ns(void (*func)(void *), void *para)
             ++_test_global_assert_error_count;                                            \
             snprintf(                                                                     \
                 _test_log[_test_assert_error_count++], 1024,                              \
-                "%4i: %.2f%% (%s: %.2f ns, %s: %.2f ns)",                                 \
+                "%4i: %.2f%% (%s: %.2f us, %s: %.2f us)",                                 \
                 __LINE__, percent_diff + 100.0f, #func1, time_func1, #func2, time_func2); \
         }                                                                                 \
-        ASSERT_EQUALS(a, b);                                                              \
-    } while (0)
-
-#define ASSERT_FUNCTION_TEST_TIME(func, duration)                   \
-    do                                                              \
-    {                                                               \
-        ++_test_assert_count;                                       \
-        ++_test_global_assert_count;                                \
-        double time_func;                                           \
-        int64_t para = 0;                                           \
-                                                                    \
-        /* Measure func */                                          \
-        time_func = test_measure_func_time_ns(func, (void *)&para); \
-                                                                    \
-        if (time_func >= duration)                                  \
-        {                                                           \
-            ++_test_global_assert_error_count;                      \
-            snprintf(                                               \
-                _test_log[_test_assert_error_count++], 1024,        \
-                "%4i: %.2f%% (%s: %.2f ns, %s: %.2f ns)",           \
-                __LINE__, percent_diff + 100.0f, #func, time_func); \
-        }                                                           \
+        ASSERT_TRUE((a && b) || a == b);                                                  \
     } while (0)
 
 #endif
\ No newline at end of file
diff --git a/tests/math/EvaluatorTest.cpp b/tests/math/EvaluatorTest.cpp
index 0712b6d..d0181c8 100644
--- a/tests/math/EvaluatorTest.cpp
+++ b/tests/math/EvaluatorTest.cpp
@@ -47,9 +47,9 @@ static void test_evaluator_evaluate_function() {
 int main() {
     TEST_INIT(10);
 
-    RUN_TEST(test_evaluator_evaluate);
-    RUN_TEST(test_evaluator_evaluate_variables);
-    RUN_TEST(test_evaluator_evaluate_function);
+    TEST_RUN(test_evaluator_evaluate);
+    TEST_RUN(test_evaluator_evaluate_variables);
+    TEST_RUN(test_evaluator_evaluate_function);
 
     TEST_FINALIZE();
 
diff --git a/tests/memory/ChunkMemoryTest.cpp b/tests/memory/ChunkMemoryTest.cpp
index 9e548c7..a6b1f78 100644
--- a/tests/memory/ChunkMemoryTest.cpp
+++ b/tests/memory/ChunkMemoryTest.cpp
@@ -105,6 +105,7 @@ static void test_chunk_reserve_last_element() {
     static void test_chunk_reserve_full() {
         ChunkMemory mem = {};
         chunk_alloc(&mem, 10, 10);
+        mem.free[0] = 0xFFFFFFFFFFFFFFFF;
 
         ASSERT_EQUALS(chunk_reserve(&mem, 1), -1);
     }
@@ -129,17 +130,17 @@ static void test_chunk_reserve_last_element() {
 int main() {
     TEST_INIT(25);
 
-    RUN_TEST(test_chunk_alloc);
-    RUN_TEST(test_chunk_id_from_memory);
-    RUN_TEST(test_chunk_get_element);
-    RUN_TEST(test_chunk_reserve);
-    RUN_TEST(test_chunk_free_elements);
-    RUN_TEST(test_chunk_reserve_wrapping);
-    RUN_TEST(test_chunk_reserve_last_element);
+    TEST_RUN(test_chunk_alloc);
+    TEST_RUN(test_chunk_id_from_memory);
+    TEST_RUN(test_chunk_get_element);
+    TEST_RUN(test_chunk_reserve);
+    TEST_RUN(test_chunk_free_elements);
+    TEST_RUN(test_chunk_reserve_wrapping);
+    TEST_RUN(test_chunk_reserve_last_element);
 
     #if !DEBUG
-        RUN_TEST(test_chunk_reserve_full);
-        RUN_TEST(test_chunk_reserve_invalid_size);
+        TEST_RUN(test_chunk_reserve_full);
+        TEST_RUN(test_chunk_reserve_invalid_size);
     #endif
 
     TEST_FINALIZE();
diff --git a/tests/memory/RingMemoryTest.cpp b/tests/memory/RingMemoryTest.cpp
index beb9f21..58bd266 100644
--- a/tests/memory/RingMemoryTest.cpp
+++ b/tests/memory/RingMemoryTest.cpp
@@ -94,13 +94,13 @@ static void test_ring_commit_safe() {
 int main() {
     TEST_INIT(25);
 
-    RUN_TEST(test_ring_alloc);
-    RUN_TEST(test_ring_get_memory);
-    RUN_TEST(test_ring_calculate_position);
-    RUN_TEST(test_ring_reset);
-    RUN_TEST(test_ring_get_memory_nomove);
-    RUN_TEST(test_ring_move_pointer);
-    RUN_TEST(test_ring_commit_safe);
+    TEST_RUN(test_ring_alloc);
+    TEST_RUN(test_ring_get_memory);
+    TEST_RUN(test_ring_calculate_position);
+    TEST_RUN(test_ring_reset);
+    TEST_RUN(test_ring_get_memory_nomove);
+    TEST_RUN(test_ring_move_pointer);
+    TEST_RUN(test_ring_commit_safe);
 
     TEST_FINALIZE();
 
diff --git a/tests/stdlib/HashMapTest.cpp b/tests/stdlib/HashMapTest.cpp
index 26d9eba..d189134 100644
--- a/tests/stdlib/HashMapTest.cpp
+++ b/tests/stdlib/HashMapTest.cpp
@@ -78,9 +78,9 @@ static void test_hashmap_dump_load() {
 int main() {
     TEST_INIT(25);
 
-    RUN_TEST(test_hashmap_alloc);
-    RUN_TEST(test_hashmap_insert_int32);
-    RUN_TEST(test_hashmap_dump_load);
+    TEST_RUN(test_hashmap_alloc);
+    TEST_RUN(test_hashmap_insert_int32);
+    TEST_RUN(test_hashmap_dump_load);
 
     TEST_FINALIZE();
 
diff --git a/tests/ui/UILayoutTest.cpp b/tests/ui/UILayoutTest.cpp
index 825f363..af613f3 100644
--- a/tests/ui/UILayoutTest.cpp
+++ b/tests/ui/UILayoutTest.cpp
@@ -87,9 +87,9 @@ static void test_layout_from_theme() {
 int main() {
     TEST_INIT(100);
 
-    RUN_TEST(test_layout_from_file_txt);
-    RUN_TEST(test_layout_to_from_data);
-    RUN_TEST(test_layout_from_theme);
+    TEST_RUN(test_layout_from_file_txt);
+    TEST_RUN(test_layout_to_from_data);
+    TEST_RUN(test_layout_from_theme);
 
     TEST_FINALIZE();
 
diff --git a/tests/ui/UIThemeTest.cpp b/tests/ui/UIThemeTest.cpp
index 57bb531..0096bd8 100644
--- a/tests/ui/UIThemeTest.cpp
+++ b/tests/ui/UIThemeTest.cpp
@@ -65,8 +65,8 @@ static void test_theme_to_from_data() {
 int main() {
     TEST_INIT(100);
 
-    RUN_TEST(test_theme_from_file_txt);
-    RUN_TEST(test_theme_to_from_data);
+    TEST_RUN(test_theme_from_file_txt);
+    TEST_RUN(test_theme_to_from_data);
 
     TEST_FINALIZE();
 
diff --git a/tests/utils/BitUtilsTest.cpp b/tests/utils/BitUtilsTest.cpp
index 5cbca1a..20f7903 100644
--- a/tests/utils/BitUtilsTest.cpp
+++ b/tests/utils/BitUtilsTest.cpp
@@ -165,32 +165,32 @@ static void test_bytes_merge_8_r2l() {
 int main() {
     TEST_INIT(75);
 
-    RUN_TEST(test_is_bit_set_l2r);
-    RUN_TEST(test_bit_set_l2r);
-    RUN_TEST(test_bit_unset_l2r);
-    RUN_TEST(test_bit_flip_l2r);
-    RUN_TEST(test_bit_set_to_l2r);
-    RUN_TEST(test_bits_get_8_l2r);
-    RUN_TEST(test_bits_get_16_l2r);
-    RUN_TEST(test_bits_get_32_l2r);
-    RUN_TEST(test_bits_get_64_l2r);
-    RUN_TEST(test_bytes_merge_2_l2r);
-    RUN_TEST(test_bytes_merge_4_l2r);
-    RUN_TEST(test_bytes_merge_8_l2r);
+    TEST_RUN(test_is_bit_set_l2r);
+    TEST_RUN(test_bit_set_l2r);
+    TEST_RUN(test_bit_unset_l2r);
+    TEST_RUN(test_bit_flip_l2r);
+    TEST_RUN(test_bit_set_to_l2r);
+    TEST_RUN(test_bits_get_8_l2r);
+    TEST_RUN(test_bits_get_16_l2r);
+    TEST_RUN(test_bits_get_32_l2r);
+    TEST_RUN(test_bits_get_64_l2r);
+    TEST_RUN(test_bytes_merge_2_l2r);
+    TEST_RUN(test_bytes_merge_4_l2r);
+    TEST_RUN(test_bytes_merge_8_l2r);
 
-    RUN_TEST(test_is_bit_set_r2l);
-    RUN_TEST(test_is_bit_set_64_r2l);
-    RUN_TEST(test_bit_set_r2l);
-    RUN_TEST(test_bit_unset_r2l);
-    RUN_TEST(test_bit_flip_r2l);
-    RUN_TEST(test_bit_set_to_r2l);
-    RUN_TEST(test_bits_get_8_r2l);
-    RUN_TEST(test_bits_get_16_r2l);
-    RUN_TEST(test_bits_get_32_r2l);
-    RUN_TEST(test_bits_get_64_r2l);
-    RUN_TEST(test_bytes_merge_2_r2l);
-    RUN_TEST(test_bytes_merge_4_r2l);
-    RUN_TEST(test_bytes_merge_8_r2l);
+    TEST_RUN(test_is_bit_set_r2l);
+    TEST_RUN(test_is_bit_set_64_r2l);
+    TEST_RUN(test_bit_set_r2l);
+    TEST_RUN(test_bit_unset_r2l);
+    TEST_RUN(test_bit_flip_r2l);
+    TEST_RUN(test_bit_set_to_r2l);
+    TEST_RUN(test_bits_get_8_r2l);
+    TEST_RUN(test_bits_get_16_r2l);
+    TEST_RUN(test_bits_get_32_r2l);
+    TEST_RUN(test_bits_get_64_r2l);
+    TEST_RUN(test_bytes_merge_2_r2l);
+    TEST_RUN(test_bytes_merge_4_r2l);
+    TEST_RUN(test_bytes_merge_8_r2l);
 
     TEST_FINALIZE();
 
diff --git a/tests/utils/EndianUtilsTest.cpp b/tests/utils/EndianUtilsTest.cpp
index e84158f..323e1ed 100644
--- a/tests/utils/EndianUtilsTest.cpp
+++ b/tests/utils/EndianUtilsTest.cpp
@@ -118,19 +118,19 @@ static void test_endian_swap_double() {
 int main() {
     TEST_INIT(50);
 
-    RUN_TEST(test_swap_endian_16);
-    RUN_TEST(test_swap_endian_32);
-    RUN_TEST(test_swap_endian_64);
+    TEST_RUN(test_swap_endian_16);
+    TEST_RUN(test_swap_endian_32);
+    TEST_RUN(test_swap_endian_64);
 
-    RUN_TEST(test_is_little_endian);
-    RUN_TEST(test_endian_swap_uint16);
-    RUN_TEST(test_endian_swap_int16);
-    RUN_TEST(test_endian_swap_uint32);
-    RUN_TEST(test_endian_swap_int32);
-    RUN_TEST(test_endian_swap_uint64);
-    RUN_TEST(test_endian_swap_int64);
-    RUN_TEST(test_endian_swap_float);
-    RUN_TEST(test_endian_swap_double);
+    TEST_RUN(test_is_little_endian);
+    TEST_RUN(test_endian_swap_uint16);
+    TEST_RUN(test_endian_swap_int16);
+    TEST_RUN(test_endian_swap_uint32);
+    TEST_RUN(test_endian_swap_int32);
+    TEST_RUN(test_endian_swap_uint64);
+    TEST_RUN(test_endian_swap_int64);
+    TEST_RUN(test_endian_swap_float);
+    TEST_RUN(test_endian_swap_double);
 
     TEST_FINALIZE();
 
diff --git a/tests/utils/MathUtilsTest.cpp b/tests/utils/MathUtilsTest.cpp
new file mode 100644
index 0000000..e3b2e00
--- /dev/null
+++ b/tests/utils/MathUtilsTest.cpp
@@ -0,0 +1,624 @@
+#include "../TestFramework.h"
+#include "../../utils/MathUtils.h"
+#include <math.h>
+
+// Correctness tests for f32 (float) approximate functions
+static void test_sin_approx_f32() {
+    ASSERT_EQUALS_WITH_DELTA(sin_approx(0.0f), sinf(0.0f), 0.001f);
+    ASSERT_EQUALS_WITH_DELTA(sin_approx(1.0f), sinf(1.0f), 0.001f);
+    ASSERT_EQUALS_WITH_DELTA(sin_approx(3.14f), sinf(3.14f), 0.001f);
+}
+
+static void test_cos_approx_f32() {
+    ASSERT_EQUALS_WITH_DELTA(cos_approx(0.0f), cosf(0.0f), 0.001f);
+    ASSERT_EQUALS_WITH_DELTA(cos_approx(1.0f), cosf(1.0f), 0.001f);
+    ASSERT_EQUALS_WITH_DELTA(cos_approx(3.14f), cosf(3.14f), 0.001f);
+}
+
+static void test_tan_approx_f32() {
+    ASSERT_EQUALS_WITH_DELTA(tan_approx(0.0f), tanf(0.0f), 0.001f);
+    ASSERT_EQUALS_WITH_DELTA(tan_approx(1.0f), tanf(1.0f), 0.001f);
+    ASSERT_EQUALS_WITH_DELTA(tan_approx(0.5f), tanf(0.5f), 0.001f);
+}
+
+static void test_sqrt_approx_f32() {
+    ASSERT_EQUALS_WITH_DELTA(sqrt_approx(1.0f), sqrtf(1.0f), 0.001f);
+    ASSERT_EQUALS_WITH_DELTA(sqrt_approx(2.0f), sqrtf(2.0f), 0.001f);
+    ASSERT_EQUALS_WITH_DELTA(sqrt_approx(100.0f), sqrtf(100.0f), 0.001f);
+}
+
+static void test_asin_approx_f32() {
+    ASSERT_EQUALS_WITH_DELTA(asin_approx(0.0f), asinf(0.0f), 0.001f);
+    ASSERT_EQUALS_WITH_DELTA(asin_approx(0.5f), asinf(0.5f), 0.001f);
+    ASSERT_EQUALS_WITH_DELTA(asin_approx(-0.5f), asinf(-0.5f), 0.001f);
+}
+
+static void test_acos_approx_f32() {
+    ASSERT_EQUALS_WITH_DELTA(acos_approx(0.0f), acosf(0.0f), 0.001f);
+    ASSERT_EQUALS_WITH_DELTA(acos_approx(0.5f), acosf(0.5f), 0.001f);
+    ASSERT_EQUALS_WITH_DELTA(acos_approx(-0.5f), acosf(-0.5f), 0.001f);
+}
+
+static void test_atan_approx_f32() {
+    ASSERT_EQUALS_WITH_DELTA(atan_approx(0.0f), atanf(0.0f), 0.001f);
+    ASSERT_EQUALS_WITH_DELTA(atan_approx(1.0f), atanf(1.0f), 0.001f);
+    ASSERT_EQUALS_WITH_DELTA(atan_approx(-1.0f), atanf(-1.0f), 0.001f);
+}
+
+static void test_rsqrt_approx_f32() {
+    ASSERT_EQUALS_WITH_DELTA(rsqrt_approx(1.0f), 1.0f / sqrtf(1.0f), 0.001f);
+    ASSERT_EQUALS_WITH_DELTA(rsqrt_approx(2.0f), 1.0f / sqrtf(2.0f), 0.001f);
+    ASSERT_EQUALS_WITH_DELTA(rsqrt_approx(100.0f), 1.0f / sqrtf(100.0f), 0.001f);
+}
+
+static void test_exp_approx_f32() {
+    ASSERT_EQUALS_WITH_DELTA(exp_approx(0.0f), expf(0.0f), 0.001f);
+    ASSERT_EQUALS_WITH_DELTA(exp_approx(1.0f), expf(1.0f), 0.001f);
+    ASSERT_EQUALS_WITH_DELTA(exp_approx(-1.0f), expf(-1.0f), 0.001f);
+}
+
+static void test_log_approx_f32() {
+    ASSERT_EQUALS_WITH_DELTA(log_approx(1.0f), logf(1.0f), 0.001f);
+    ASSERT_EQUALS_WITH_DELTA(log_approx(2.0f), logf(2.0f), 0.001f);
+    ASSERT_EQUALS_WITH_DELTA(log_approx(10.0f), logf(10.0f), 0.001f);
+}
+
+static void test_pow_approx_f32() {
+    ASSERT_EQUALS_WITH_DELTA(pow_approx(2.0f, 3.0f), powf(2.0f, 3.0f), 0.001f);
+    ASSERT_EQUALS_WITH_DELTA(pow_approx(3.0f, 2.0f), powf(3.0f, 2.0f), 0.001f);
+    ASSERT_EQUALS_WITH_DELTA(pow_approx(10.0f, 0.5f), powf(10.0f, 0.5f), 0.001f);
+}
+
+// Correctness tests for f64 (double) approximate functions
+static void test_sin_approx_f64() {
+    ASSERT_EQUALS_WITH_DELTA(sin_approx(0.0), sin(0.0), 0.001);
+    ASSERT_EQUALS_WITH_DELTA(sin_approx(1.0), sin(1.0), 0.001);
+    ASSERT_EQUALS_WITH_DELTA(sin_approx(3.14), sin(3.14), 0.001);
+}
+
+static void test_cos_approx_f64() {
+    ASSERT_EQUALS_WITH_DELTA(cos_approx(0.0), cos(0.0), 0.001);
+    ASSERT_EQUALS_WITH_DELTA(cos_approx(1.0), cos(1.0), 0.001);
+    ASSERT_EQUALS_WITH_DELTA(cos_approx(3.14), cos(3.14), 0.001);
+}
+
+static void test_tan_approx_f64() {
+    ASSERT_EQUALS_WITH_DELTA(tan_approx(0.0), tan(0.0), 0.001);
+    ASSERT_EQUALS_WITH_DELTA(tan_approx(1.0), tan(1.0), 0.001);
+    ASSERT_EQUALS_WITH_DELTA(tan_approx(0.5), tan(0.5), 0.001);
+}
+
+static void test_sqrt_approx_f64() {
+    ASSERT_EQUALS_WITH_DELTA(sqrt_approx(1.0), sqrt(1.0), 0.001);
+    ASSERT_EQUALS_WITH_DELTA(sqrt_approx(2.0), sqrt(2.0), 0.001);
+    ASSERT_EQUALS_WITH_DELTA(sqrt_approx(100.0), sqrt(100.0), 0.001);
+}
+
+static void test_asin_approx_f64() {
+    ASSERT_EQUALS_WITH_DELTA(asin_approx(0.0), asin(0.0), 0.001);
+    ASSERT_EQUALS_WITH_DELTA(asin_approx(0.5), asin(0.5), 0.001);
+    ASSERT_EQUALS_WITH_DELTA(asin_approx(-0.5), asin(-0.5), 0.001);
+}
+
+static void test_acos_approx_f64() {
+    ASSERT_EQUALS_WITH_DELTA(acos_approx(0.0), acos(0.0), 0.001);
+    ASSERT_EQUALS_WITH_DELTA(acos_approx(0.5), acos(0.5), 0.001);
+    ASSERT_EQUALS_WITH_DELTA(acos_approx(-0.5), acos(-0.5), 0.001);
+}
+
+static void test_atan_approx_f64() {
+    ASSERT_EQUALS_WITH_DELTA(atan_approx(0.0), atan(0.0), 0.001);
+    ASSERT_EQUALS_WITH_DELTA(atan_approx(1.0), atan(1.0), 0.001);
+    ASSERT_EQUALS_WITH_DELTA(atan_approx(-1.0), atan(-1.0), 0.001);
+}
+
+static void test_rsqrt_approx_f64() {
+    ASSERT_EQUALS_WITH_DELTA(rsqrt_approx(1.0), 1.0 / sqrt(1.0), 0.001);
+    ASSERT_EQUALS_WITH_DELTA(rsqrt_approx(2.0), 1.0 / sqrt(2.0), 0.001);
+    ASSERT_EQUALS_WITH_DELTA(rsqrt_approx(100.0), 1.0 / sqrt(100.0), 0.001);
+}
+
+static void test_exp_approx_f64() {
+    ASSERT_EQUALS_WITH_DELTA(exp_approx(0.0), exp(0.0), 0.001);
+    ASSERT_EQUALS_WITH_DELTA(exp_approx(1.0), exp(1.0), 0.001);
+    ASSERT_EQUALS_WITH_DELTA(exp_approx(-1.0), exp(-1.0), 0.001);
+}
+
+static void test_log_approx_f64() {
+    ASSERT_EQUALS_WITH_DELTA(log_approx(1.0), log(1.0), 0.001);
+    ASSERT_EQUALS_WITH_DELTA(log_approx(2.0), log(2.0), 0.001);
+    ASSERT_EQUALS_WITH_DELTA(log_approx(10.0), log(10.0), 0.001);
+}
+
+static void test_pow_approx_f64() {
+    ASSERT_EQUALS_WITH_DELTA(pow_approx(2.0, 3.0), pow(2.0, 3.0), 0.001);
+    ASSERT_EQUALS_WITH_DELTA(pow_approx(3.0, 2.0), pow(3.0, 2.0), 0.001);
+    ASSERT_EQUALS_WITH_DELTA(pow_approx(10.0, 0.5), pow(10.0, 0.5), 0.001);
+}
+
+// Performance tests for f32 (float) approximate functions
+static void _sin_approx_f32(volatile void* val) {
+    f32* res = (f32*)val;
+    srand((int32) *res);
+
+    *res += sin_approx((f32)rand() / RAND_MAX);
+}
+
+static void _sin_f32(volatile void* val) {
+    f32* res = (f32*)val;
+    srand((int32) *res);
+
+    *res += sinf((f32)rand() / RAND_MAX);
+}
+
+static void test_sin_approx_performance_f32() {
+    COMPARE_FUNCTION_TEST_TIME(_sin_approx_f32, _sin_f32, 5.0);
+    COMPARE_FUNCTION_TEST_CYCLE(_sin_approx_f32, _sin_f32, 5.0);
+}
+
+static void _cos_approx_f32(volatile void* val) {
+    f32* res = (f32*)val;
+    srand((int32) *res);
+
+    *res += cos_approx((f32)rand() / RAND_MAX);
+}
+
+static void _cos_f32(volatile void* val) {
+    f32* res = (f32*)val;
+    srand((int32) *res);
+
+    *res += cosf((f32)rand() / RAND_MAX);
+}
+
+static void test_cos_approx_performance_f32() {
+    COMPARE_FUNCTION_TEST_TIME(_cos_approx_f32, _cos_f32, 5.0);
+    COMPARE_FUNCTION_TEST_CYCLE(_cos_approx_f32, _cos_f32, 5.0);
+}
+
+static void _tan_approx_f32(volatile void* val) {
+    f32* res = (f32*)val;
+    srand((int32) *res);
+
+    *res += tan_approx((f32)rand() / RAND_MAX);
+}
+
+static void _tan_f32(volatile void* val) {
+    f32* res = (f32*)val;
+    srand((int32) *res);
+
+    *res += tanf((f32)rand() / RAND_MAX);
+}
+
+static void test_tan_approx_performance_f32() {
+    COMPARE_FUNCTION_TEST_TIME(_tan_approx_f32, _tan_f32, 5.0);
+    COMPARE_FUNCTION_TEST_CYCLE(_tan_approx_f32, _tan_f32, 5.0);
+}
+
+static void _sqrt_approx_f32(volatile void* val) {
+    f32* res = (f32*)val;
+    srand((int32) *res);
+
+    *res += sqrt_approx((f32)rand() / RAND_MAX + 0.1f); // Avoid sqrt(0)
+}
+
+static void _sqrt_f32(volatile void* val) {
+    f32* res = (f32*)val;
+    srand((int32) *res);
+
+    *res += sqrtf((f32)rand() / RAND_MAX + 0.1f); // Avoid sqrt(0)
+}
+
+static void test_sqrt_approx_performance_f32() {
+    COMPARE_FUNCTION_TEST_TIME(_sqrt_approx_f32, _sqrt_f32, 5.0);
+    COMPARE_FUNCTION_TEST_CYCLE(_sqrt_approx_f32, _sqrt_f32, 5.0);
+}
+
+static void _asin_approx_f32(volatile void* val) {
+    f32* res = (f32*)val;
+    srand((int32) *res);
+
+    *res += asin_approx((f32)rand() / RAND_MAX);
+}
+
+static void _asin_f32(volatile void* val) {
+    f32* res = (f32*)val;
+    srand((int32) *res);
+
+    *res += asinf((f32)rand() / RAND_MAX);
+}
+
+static void test_asin_approx_performance_f32() {
+    COMPARE_FUNCTION_TEST_TIME(_asin_approx_f32, _asin_f32, 5.0);
+    COMPARE_FUNCTION_TEST_CYCLE(_asin_approx_f32, _asin_f32, 5.0);
+}
+
+static void _acos_approx_f32(volatile void* val) {
+    f32* res = (f32*)val;
+    srand((int32) *res);
+
+    *res += acos_approx((f32)rand() / RAND_MAX);
+}
+
+static void _acos_f32(volatile void* val) {
+    f32* res = (f32*)val;
+    srand((int32) *res);
+
+    *res += acosf((f32)rand() / RAND_MAX);
+}
+
+static void test_acos_approx_performance_f32() {
+    COMPARE_FUNCTION_TEST_TIME(_acos_approx_f32, _acos_f32, 5.0);
+    COMPARE_FUNCTION_TEST_CYCLE(_acos_approx_f32, _acos_f32, 5.0);
+}
+
+static void _atan_approx_f32(volatile void* val) {
+    f32* res = (f32*)val;
+    srand((int32) *res);
+
+    *res += atan_approx((f32)rand() / RAND_MAX);
+}
+
+static void _atan_f32(volatile void* val) {
+    f32* res = (f32*)val;
+    srand((int32) *res);
+
+    *res += atanf((f32)rand() / RAND_MAX);
+}
+
+static void test_atan_approx_performance_f32() {
+    COMPARE_FUNCTION_TEST_TIME(_atan_approx_f32, _atan_f32, 5.0);
+    COMPARE_FUNCTION_TEST_CYCLE(_atan_approx_f32, _atan_f32, 5.0);
+}
+
+static void _rsqrt_approx_f32(volatile void* val) {
+    f32* res = (f32*)val;
+    srand((int32) *res);
+
+    *res += rsqrt_approx((f32)rand() / RAND_MAX + 0.1f); // Avoid division by zero
+}
+
+static void _rsqrt_f32(volatile void* val) {
+    f32* res = (f32*)val;
+    srand((int32) *res);
+
+    *res += 1.0f / sqrtf((f32)rand() / RAND_MAX + 0.1f); // Avoid division by zero
+}
+
+static void test_rsqrt_approx_performance_f32() {
+    COMPARE_FUNCTION_TEST_TIME(_rsqrt_approx_f32, _rsqrt_f32, 5.0);
+    COMPARE_FUNCTION_TEST_CYCLE(_rsqrt_approx_f32, _rsqrt_f32, 5.0);
+}
+
+static void _exp_approx_f32(volatile void* val) {
+    f32* res = (f32*)val;
+    srand((int32) *res);
+
+    *res += exp_approx((f32)rand() / RAND_MAX);
+}
+
+static void _exp_f32(volatile void* val) {
+    f32* res = (f32*)val;
+    srand((int32) *res);
+
+    *res += expf((f32)rand() / RAND_MAX);
+}
+
+static void test_exp_approx_performance_f32() {
+    COMPARE_FUNCTION_TEST_TIME(_exp_approx_f32, _exp_f32, 5.0);
+    COMPARE_FUNCTION_TEST_CYCLE(_exp_approx_f32, _exp_f32, 5.0);
+}
+
+static void _log_approx_f32(volatile void* val) {
+    f32* res = (f32*)val;
+    srand((int32) *res);
+
+    *res += log_approx((f32)rand() / RAND_MAX + 0.1f); // Avoid log(0)
+}
+
+static void _log_f32(volatile void* val) {
+    f32* res = (f32*)val;
+    srand((int32) *res);
+
+    *res += logf((f32)rand() / RAND_MAX + 0.1f); // Avoid log(0)
+}
+
+static void test_log_approx_performance_f32() {
+    COMPARE_FUNCTION_TEST_TIME(_log_approx_f32, _log_f32, 5.0);
+    COMPARE_FUNCTION_TEST_CYCLE(_log_approx_f32, _log_f32, 5.0);
+}
+
+static void _pow_approx_f32(volatile void* val) {
+    f32* res = (f32*)val;
+    srand((int32) *res);
+
+    *res += pow_approx((f32)rand() / RAND_MAX, (f32)rand() / RAND_MAX);
+}
+
+static void _pow_f32(volatile void* val) {
+    f32* res = (f32*)val;
+    srand((int32) *res);
+
+    *res += powf((f32)rand() / RAND_MAX, (f32)rand() / RAND_MAX);
+}
+
+static void test_pow_approx_performance_f32() {
+    COMPARE_FUNCTION_TEST_TIME(_pow_approx_f32, _pow_f32, 5.0);
+    COMPARE_FUNCTION_TEST_CYCLE(_pow_approx_f32, _pow_f32, 5.0);
+}
+
+// Performance tests for f64 (double) approximate functions
+static void _sin_approx_f64(volatile void* val) {
+    f64* res = (f64*)val;
+    srand((int32) *res);
+
+    *res += sin_approx((f64)rand() / RAND_MAX);
+}
+
+static void _sin_f64(volatile void* val) {
+    f64* res = (f64*)val;
+    srand((int32) *res);
+
+    *res += sin((f64)rand() / RAND_MAX);
+}
+
+static void test_sin_approx_performance_f64() {
+    COMPARE_FUNCTION_TEST_TIME(_sin_approx_f64, _sin_f64, 5.0);
+    COMPARE_FUNCTION_TEST_CYCLE(_sin_approx_f64, _sin_f64, 5.0);
+}
+
+static void _cos_approx_f64(volatile void* val) {
+    f64* res = (f64*)val;
+    srand((int32) *res);
+
+    *res += cos_approx((f64)rand() / RAND_MAX);
+}
+
+static void _cos_f64(volatile void* val) {
+    f64* res = (f64*)val;
+    srand((int32) *res);
+
+    *res += cos((f64)rand() / RAND_MAX);
+}
+
+static void test_cos_approx_performance_f64() {
+    COMPARE_FUNCTION_TEST_TIME(_cos_approx_f64, _cos_f64, 5.0);
+    COMPARE_FUNCTION_TEST_CYCLE(_cos_approx_f64, _cos_f64, 5.0);
+}
+
+static void _tan_approx_f64(volatile void* val) {
+    f64* res = (f64*)val;
+    srand((int32) *res);
+
+    *res += tan_approx((f64)rand() / RAND_MAX);
+}
+
+static void _tan_f64(volatile void* val) {
+    f64* res = (f64*)val;
+    srand((int32) *res);
+
+    *res += tan((f64)rand() / RAND_MAX);
+}
+
+static void test_tan_approx_performance_f64() {
+    COMPARE_FUNCTION_TEST_TIME(_tan_approx_f64, _tan_f64, 5.0);
+    COMPARE_FUNCTION_TEST_CYCLE(_tan_approx_f64, _tan_f64, 5.0);
+}
+
+static void _sqrt_approx_f64(volatile void* val) {
+    f64* res = (f64*)val;
+    srand((int32) *res);
+
+    *res += sqrt_approx((f64)rand() / RAND_MAX + 0.1); // Avoid sqrt(0)
+}
+
+static void _sqrt_f64(volatile void* val) {
+    f64* res = (f64*)val;
+    srand((int32) *res);
+
+    *res += sqrt((f64)rand() / RAND_MAX + 0.1); // Avoid sqrt(0)
+}
+
+static void test_sqrt_approx_performance_f64() {
+    COMPARE_FUNCTION_TEST_TIME(_sqrt_approx_f64, _sqrt_f64, 5.0);
+    COMPARE_FUNCTION_TEST_CYCLE(_sqrt_approx_f64, _sqrt_f64, 5.0);
+}
+
+static void _asin_approx_f64(volatile void* val) {
+    f64* res = (f64*)val;
+    srand((int32) *res);
+
+    *res += asin_approx((f64)rand() / RAND_MAX);
+}
+
+static void _asin_f64(volatile void* val) {
+    f64* res = (f64*)val;
+    srand((int32) *res);
+
+    *res += asin((f64)rand() / RAND_MAX);
+}
+
+static void test_asin_approx_performance_f64() {
+    COMPARE_FUNCTION_TEST_TIME(_asin_approx_f64, _asin_f64, 5.0);
+    COMPARE_FUNCTION_TEST_CYCLE(_asin_approx_f64, _asin_f64, 5.0);
+}
+
+static void _acos_approx_f64(volatile void* val) {
+    f64* res = (f64*)val;
+    srand((int32) *res);
+
+    *res += acos_approx((f64)rand() / RAND_MAX);
+}
+
+static void _acos_f64(volatile void* val) {
+    f64* res = (f64*)val;
+    srand((int32) *res);
+
+    *res += acos((f64)rand() / RAND_MAX);
+}
+
+static void test_acos_approx_performance_f64() {
+    COMPARE_FUNCTION_TEST_TIME(_acos_approx_f64, _acos_f64, 5.0);
+    COMPARE_FUNCTION_TEST_CYCLE(_acos_approx_f64, _acos_f64, 5.0);
+}
+
+static void _atan_approx_f64(volatile void* val) {
+    f64* res = (f64*)val;
+    srand((int32) *res);
+
+    *res += atan_approx((f64)rand() / RAND_MAX);
+}
+
+static void _atan_f64(volatile void* val) {
+    f64* res = (f64*)val;
+    srand((int32) *res);
+
+    *res += atan((f64)rand() / RAND_MAX);
+}
+
+static void test_atan_approx_performance_f64() {
+    COMPARE_FUNCTION_TEST_TIME(_atan_approx_f64, _atan_f64, 5.0);
+    COMPARE_FUNCTION_TEST_CYCLE(_atan_approx_f64, _atan_f64, 5.0);
+}
+
+static void _rsqrt_approx_f64(volatile void* val) {
+    f64* res = (f64*)val;
+    srand((int32) *res);
+
+    *res += rsqrt_approx((f64)rand() / RAND_MAX + 0.1); // Avoid division by zero
+}
+
+static void _rsqrt_f64(volatile void* val) {
+    f64* res = (f64*)val;
+    srand((int32) *res);
+
+    *res += 1.0 / sqrt((f64)rand() / RAND_MAX + 0.1); // Avoid division by zero
+}
+
+static void test_rsqrt_approx_performance_f64() {
+    COMPARE_FUNCTION_TEST_TIME(_rsqrt_approx_f64, _rsqrt_f64, 5.0);
+    COMPARE_FUNCTION_TEST_CYCLE(_rsqrt_approx_f64, _rsqrt_f64, 5.0);
+}
+
+static void _exp_approx_f64(volatile void* val) {
+    f64* res = (f64*)val;
+    srand((int32) *res);
+
+    *res += exp_approx((f64)rand() / RAND_MAX);
+}
+
+static void _exp_f64(volatile void* val) {
+    f64* res = (f64*)val;
+    srand((int32) *res);
+
+    *res += exp((f64)rand() / RAND_MAX);
+}
+
+static void test_exp_approx_performance_f64() {
+    COMPARE_FUNCTION_TEST_TIME(_exp_approx_f64, _exp_f64, 5.0);
+    COMPARE_FUNCTION_TEST_CYCLE(_exp_approx_f64, _exp_f64, 5.0);
+}
+
+static void _log_approx_f64(volatile void* val) {
+    f64* res = (f64*)val;
+    srand((int32) *res);
+
+    *res += log_approx((f64)rand() / RAND_MAX + 0.1); // Avoid log(0)
+}
+
+static void _log_f64(volatile void* val) {
+    f64* res = (f64*)val;
+    srand((int32) *res);
+
+    *res += log((f64)rand() / RAND_MAX + 0.1); // Avoid log(0)
+}
+
+static void test_log_approx_performance_f64() {
+    COMPARE_FUNCTION_TEST_TIME(_log_approx_f64, _log_f64, 5.0);
+    COMPARE_FUNCTION_TEST_CYCLE(_log_approx_f64, _log_f64, 5.0);
+}
+
+static void _pow_approx_f64(volatile void* val) {
+    f64* res = (f64*)val;
+    srand((int32) *res);
+
+    *res += pow_approx((f64)rand() / RAND_MAX, (f64)rand() / RAND_MAX);
+}
+
+static void _pow_f64(volatile void* val) {
+    f64* res = (f64*)val;
+    srand((int32) *res);
+
+    *res += pow((f64)rand() / RAND_MAX, (f64)rand() / RAND_MAX);
+}
+
+static void test_pow_approx_performance_f64() {
+    COMPARE_FUNCTION_TEST_TIME(_pow_approx_f64, _pow_f64, 5.0);
+    COMPARE_FUNCTION_TEST_CYCLE(_pow_approx_f64, _pow_f64, 5.0);
+}
+
+#ifdef UBER_TEST
+    #ifdef main
+        #undef main
+    #endif
+    #define main UtilsMathUtilsTest
+#endif
+
+int main() {
+    TEST_INIT(200);
+
+    // Run correctness tests for f32 functions
+    TEST_RUN(test_sin_approx_f32);
+    TEST_RUN(test_cos_approx_f32);
+    TEST_RUN(test_tan_approx_f32);
+    TEST_RUN(test_sqrt_approx_f32);
+    TEST_RUN(test_asin_approx_f32);
+    TEST_RUN(test_acos_approx_f32);
+    TEST_RUN(test_atan_approx_f32);
+    TEST_RUN(test_rsqrt_approx_f32);
+    TEST_RUN(test_exp_approx_f32);
+    TEST_RUN(test_log_approx_f32);
+    TEST_RUN(test_pow_approx_f32);
+
+    // Run correctness tests for f64 functions
+    TEST_RUN(test_sin_approx_f64);
+    TEST_RUN(test_cos_approx_f64);
+    TEST_RUN(test_tan_approx_f64);
+    TEST_RUN(test_sqrt_approx_f64);
+    TEST_RUN(test_asin_approx_f64);
+    TEST_RUN(test_acos_approx_f64);
+    TEST_RUN(test_atan_approx_f64);
+    TEST_RUN(test_rsqrt_approx_f64);
+    TEST_RUN(test_exp_approx_f64);
+    TEST_RUN(test_log_approx_f64);
+    TEST_RUN(test_pow_approx_f64);
+
+    // Run performance tests for f32 functions
+    TEST_RUN(test_sin_approx_performance_f32);
+    TEST_RUN(test_cos_approx_performance_f32);
+    TEST_RUN(test_tan_approx_performance_f32);
+    TEST_RUN(test_sqrt_approx_performance_f32);
+    TEST_RUN(test_asin_approx_performance_f32);
+    TEST_RUN(test_acos_approx_performance_f32);
+    TEST_RUN(test_atan_approx_performance_f32);
+    TEST_RUN(test_rsqrt_approx_performance_f32);
+    TEST_RUN(test_exp_approx_performance_f32);
+    TEST_RUN(test_log_approx_performance_f32);
+    TEST_RUN(test_pow_approx_performance_f32);
+
+    // Run performance tests for f64 functions
+    TEST_RUN(test_sin_approx_performance_f64);
+    TEST_RUN(test_cos_approx_performance_f64);
+    TEST_RUN(test_tan_approx_performance_f64);
+    TEST_RUN(test_sqrt_approx_performance_f64);
+    TEST_RUN(test_asin_approx_performance_f64);
+    TEST_RUN(test_acos_approx_performance_f64);
+    TEST_RUN(test_atan_approx_performance_f64);
+    TEST_RUN(test_rsqrt_approx_performance_f64);
+    TEST_RUN(test_exp_approx_performance_f64);
+    TEST_RUN(test_log_approx_performance_f64);
+    TEST_RUN(test_pow_approx_performance_f64);
+
+    TEST_FINALIZE();
+
+    return 0;
+}
\ No newline at end of file
diff --git a/tests/utils/StringUtilsTest.cpp b/tests/utils/StringUtilsTest.cpp
index f39242b..3692436 100644
--- a/tests/utils/StringUtilsTest.cpp
+++ b/tests/utils/StringUtilsTest.cpp
@@ -83,16 +83,24 @@ static void test_str_length()
     ASSERT_EQUALS(str_length("2asdf dw"), 8);
 }
 
-static void _str_length(void* val) {
-    int64* res = (int64 *) val;
+static void _str_length(volatile void* val) {
+    volatile int64* res = (volatile int64 *) val;
 
-    *res = (int64) str_length("This %d is a %s with %f values");
+    char buffer[32];
+    memcpy(buffer, "This %d is a %s with %f values", sizeof("This %d is a %s with %f values"));
+    buffer[30] = (byte) *res;
+
+    *res += (int64) str_length(buffer);
 }
 
-static void _strlen(void* val) {
-    int64* res = (int64 *) val;
+static void _strlen(volatile void* val) {
+    volatile int64* res = (volatile int64 *) val;
 
-    *res = (int64) strlen("This %d is a %s with %f values");
+    char buffer[32];
+    memcpy(buffer, "This %d is a %s with %f values", sizeof("This %d is a %s with %f values"));
+    buffer[30] = (byte) *res;
+
+    *res += (int64) strlen(buffer);
 }
 
 static void test_str_length_performance() {
@@ -100,7 +108,7 @@ static void test_str_length_performance() {
     COMPARE_FUNCTION_TEST_CYCLE(_str_length, _strlen, 5.0);
 }
 
-static void _str_is_alphanum(void* val) {
+static void _str_is_alphanum(volatile void* val) {
     bool* res = (bool *) val;
     srand(0);
 
@@ -109,10 +117,10 @@ static void _str_is_alphanum(void* val) {
         a += str_is_alphanum((byte) rand());
     }
 
-    *res = (bool) a;
+    *res |= (bool) a;
 }
 
-static void _isalnum(void* val) {
+static void _isalnum(volatile void* val) {
     bool* res = (bool *) val;
     srand(0);
 
@@ -121,7 +129,7 @@ static void _isalnum(void* val) {
         a += isalnum((byte) rand());
     }
 
-    *res = (bool) a;
+    *res |= (bool) a;
 }
 
 static void test_str_is_alphanum_performance() {
@@ -136,20 +144,20 @@ static void test_sprintf_fast()
     ASSERT_TRUE(strcmp(buffer, "This 1337 is a test with 3.00000 values") == 0);
 }
 
-static void _sprintf_fast(void* val) {
-    bool* res = (bool *) val;
+static void _sprintf_fast(volatile void* val) {
+    volatile bool* res = (volatile bool *) val;
 
     char buffer[256];
     sprintf_fast(buffer, "This %d is a %s with %f values", 1337, "test", 3.0);
-    *res = (bool) (strcmp(buffer, "This 1337 is a test with 3.00000 values") == 0);
+    *res |= (bool) (strcmp(buffer, "This 1337 is a test with 3.00000 values") == 0);
 }
 
-static void _sprintf(void* val) {
-    bool* res = (bool *) val;
+static void _sprintf(volatile void* val) {
+    volatile bool* res = (volatile bool *) val;
 
     char buffer[256];
     sprintf(buffer, "This %d is a %s with %f values", 1337, "test", 3.0);
-    *res = (bool) (strcmp(buffer, "This 1337 is a test with 3.000000 values") == 0);
+    *res |= (bool) (strcmp(buffer, "This 1337 is a test with 3.000000 values") == 0);
 }
 
 static void test_sprintf_fast_performance() {
@@ -171,24 +179,26 @@ static void test_str_to_float()
     #define main UtilsStringUtilsTest
 #endif
 
+#include <windows.h>
+
 int main() {
     TEST_INIT(100);
 
-    RUN_TEST(test_utf8_encode);
-    RUN_TEST(test_utf8_decode);
-    RUN_TEST(test_utf8_str_length);
-    RUN_TEST(test_str_is_float);
-    RUN_TEST(test_str_is_integer);
-    RUN_TEST(test_sprintf_fast);
-    RUN_TEST(test_str_is_alpha);
-    RUN_TEST(test_str_is_num);
-    RUN_TEST(test_str_is_alphanum);
-    RUN_TEST(test_str_length);
-    RUN_TEST(test_str_to_float);
+    TEST_RUN(test_utf8_encode);
+    TEST_RUN(test_utf8_decode);
+    TEST_RUN(test_utf8_str_length);
+    TEST_RUN(test_str_is_float);
+    TEST_RUN(test_str_is_integer);
+    TEST_RUN(test_sprintf_fast);
+    TEST_RUN(test_str_is_alpha);
+    TEST_RUN(test_str_is_num);
+    TEST_RUN(test_str_is_alphanum);
+    TEST_RUN(test_str_length);
+    TEST_RUN(test_str_to_float);
 
-    RUN_TEST(test_str_length_performance);
-    RUN_TEST(test_str_is_alphanum_performance);
-    RUN_TEST(test_sprintf_fast_performance);
+    TEST_RUN(test_str_length_performance);
+    TEST_RUN(test_str_is_alphanum_performance);
+    TEST_RUN(test_sprintf_fast_performance);
 
     TEST_FINALIZE();
 
diff --git a/tests/utils/UtilsTest.cpp b/tests/utils/UtilsTest.cpp
index 8e85e8c..7b8f04f 100644
--- a/tests/utils/UtilsTest.cpp
+++ b/tests/utils/UtilsTest.cpp
@@ -54,26 +54,26 @@ static void test_is_empty() {
     ASSERT_TRUE(is_empty(region1, 0));
 }
 
-static void _is_equal(void* val) {
-    bool* res = (bool *) val;
+static void _is_equal(volatile void* val) {
+    volatile bool* res = (volatile bool *) val;
 
     uint8_t region1[64];
     uint8_t region2[64];
     memset(region1, 0xAA, sizeof(region1));
     memset(region2, 0xAA, sizeof(region2));
 
-    *res = is_equal(region1, region2, sizeof(region1));
+    *res |= is_equal(region1, region2, sizeof(region1));
 }
 
-static void _memcmp(void* val) {
-    bool* res = (bool *) val;
+static void _memcmp(volatile void* val) {
+    volatile bool* res = (volatile bool *) val;
 
     uint8_t region1[64];
     uint8_t region2[64];
     memset(region1, 0xAA, sizeof(region1));
     memset(region2, 0xAA, sizeof(region2));
 
-    *res = (bool) (memcmp(region1, region2, sizeof(region1)) == 0);
+    *res |= (bool) (memcmp(region1, region2, sizeof(region1)) == 0);
 }
 
 static void test_is_equal_performance() {
@@ -81,40 +81,40 @@ static void test_is_equal_performance() {
     COMPARE_FUNCTION_TEST_CYCLE(_is_equal, _memcmp, 10.0);
 }
 
-static void _is_empty(void* val) {
-    bool* res = (bool *) val;
+static void _is_empty(volatile void* val) {
+    volatile bool* res = (volatile bool *) val;
 
     alignas(64) uint8_t region1[64];
     memset(region1, 0xAA, sizeof(region1));
 
-    *res = is_empty(region1, sizeof(region1));
+    *res |= is_empty(region1, sizeof(region1));
 }
 
-static void _memcmp_empty(void* val) {
-    bool* res = (bool *) val;
+static void _memcmp_empty(volatile void* val) {
+    volatile bool* res = (volatile bool *) val;
 
     alignas(64) uint8_t region1[64];
     memset(region1, 0xAA, sizeof(region1));
 
-    *res = *region1 == 0 && memcmp(region1, region1 + 1, sizeof(region1) - 1) == 0;
+    *res |= *region1 == 0 && memcmp(region1, region1 + 1, sizeof(region1) - 1) == 0;
 }
 
-static void _is_empty2(void* val) {
-    bool* res = (bool *) val;
+static void _is_empty2(volatile void* val) {
+    volatile bool* res = (volatile bool *) val;
 
     alignas(64) uint8_t region1[64];
     memset(region1, 0, sizeof(region1));
 
-    *res = is_empty(region1, sizeof(region1));
+    *res |= is_empty(region1, sizeof(region1));
 }
 
-static void _memcmp_empty2(void* val) {
-    bool* res = (bool *) val;
+static void _memcmp_empty2(volatile void* val) {
+    volatile bool* res = (volatile bool *) val;
 
     alignas(64) uint8_t region1[64];
     memset(region1, 0, sizeof(region1));
 
-    *res = *region1 == 0 && memcmp(region1, region1 + 1, sizeof(region1) - 1) == 0;
+    *res |= *region1 == 0 && memcmp(region1, region1 + 1, sizeof(region1) - 1) == 0;
 }
 
 static void test_is_empty_performance() {
@@ -135,11 +135,11 @@ static void test_is_empty_performance() {
 int main() {
     TEST_INIT(10);
 
-    RUN_TEST(test_is_equal);
-    RUN_TEST(test_is_empty);
+    TEST_RUN(test_is_equal);
+    TEST_RUN(test_is_empty);
 
-    RUN_TEST(test_is_equal_performance);
-    RUN_TEST(test_is_empty_performance);
+    TEST_RUN(test_is_equal_performance);
+    TEST_RUN(test_is_empty_performance);
 
     TEST_FINALIZE();
 
diff --git a/thread/ThreadPool.h b/thread/ThreadPool.h
index a901a22..bab9229 100644
--- a/thread/ThreadPool.h
+++ b/thread/ThreadPool.h
@@ -65,7 +65,7 @@ static THREAD_RETURN thread_pool_worker(void* arg)
         LOG_2("ThreadPool worker ended");
         // At the end of a thread the ring memory automatically is considered freed
         DEBUG_MEMORY_FREE((uintptr_t) work->ring.memory);
-        LOG_FORMAT_2("Freed thread RingMemory: %n B", {{LOG_DATA_UINT64, &work->ring.size}});
+        LOG_2("Freed thread RingMemory: %n B", {{LOG_DATA_UINT64, &work->ring.size}});
         atomic_set_release(&work->state, 1);
 
         // Job gets marked after completion -> can be overwritten now
diff --git a/ui/UILayout.h b/ui/UILayout.h
index b342285..02bf26e 100644
--- a/ui/UILayout.h
+++ b/ui/UILayout.h
@@ -83,12 +83,14 @@ struct UILayout {
     //      2. Once we are ready to switch the scene we copy the temporary memory into this data pointer
     byte* data; // Owner of the actual data
 
+    // @todo replace bools with bit field
+
     // Changes on a as needed basis
-    uint32 vertex_size_static;
+    uint32 vertex_count_static;
     bool static_content_changed;
 
     // Changes every frame
-    uint32 vertex_size_dynamic;
+    uint32 vertex_count_dynamic;
     bool dynamic_content_changed;
 
     // Contains both static and dynamic content
@@ -105,7 +107,7 @@ struct UILayout {
     // This is very similar to the currently rendered UI output but may have some empty space between elements
     // The reason for this is that some elements may need different vertex counts for different states (e.g. input field)
     // WARNING: This memory is shared between different layouts
-    uint32 active_vertex_size;
+    uint32 active_vertex_count;
     Vertex3DSamplerTextureColor* vertices_active; // Not the data owner (see data above)
 
     // Used during the initialization so that every element knows where we currently are during the setup process
diff --git a/utils/MathUtils.h b/utils/MathUtils.h
new file mode 100644
index 0000000..ab7aaf3
--- /dev/null
+++ b/utils/MathUtils.h
@@ -0,0 +1,319 @@
+/**
+ * Jingga
+ *
+ * @copyright Jingga
+ * @license   OMS License 2.0
+ * @version   1.0.0
+ * @link      https://jingga.app
+ */
+#ifndef TOS_UTILS_MATH_UTILS_H
+#define TOS_UTILS_MATH_UTILS_H
+
+#include "../stdlib/Types.h"
+#include "../utils/TestUtils.h"
+
+// WARNING: Don't use any of these functions yet. They are too imprecise and too slow
+
+inline
+f64 factorial(int32 n) {
+    f64 result = 1.0;
+    for (int32 i = 1; i <= n; ++i) {
+        result *= i;
+    }
+
+    return result;
+}
+
+inline
+f32 sin_approx(f32 x) {
+    // Normalize x to the range [-π, π] for better accuracy
+    while (x > OMS_PI) {
+        x -= OMS_TWO_PI;
+    }
+
+    while (x < -OMS_PI) {
+        x += OMS_TWO_PI;
+    }
+
+    f32 x2 = x * x;
+    return x * (1.0f + x2 * (-1.0f / 6.0f + x2 * (1.0f / 120.0f + x2 * (-1.0f / 5040.0f + x2 * (1.0f / 362880.0f)))));
+}
+
+inline
+f32 cos_approx(f32 x) {
+    return sin_approx(OMS_PI_OVER_TWO - x);
+}
+
+inline
+f32 tan_approx(f32 x) {
+    return sin_approx(x) / cos_approx(x);
+}
+
+inline
+f32 asin_approx(f32 x) {
+    // Undefined for |x| > 1
+    ASSERT_SIMPLE(x >= -1.0f && x <= 1.0f);
+
+    f32 result = x;
+    f32 term = x;
+    for (int32 i = 1; i <= 6; ++i) {
+        term *= x * x * (2 * i - 1) * (2 * i - 1) / ((2 * i) * (2 * i + 1));
+        result += term;
+    }
+
+    return result;
+}
+
+inline
+f32 acos_approx(f32 x) {
+    // π/2 - asin_approx(x)
+    return OMS_PI_OVER_TWO - asin_approx(x);
+}
+
+inline
+f32 atan_approx(f32 x) {
+    if (x > 1.0f) {
+        // π/2 - atan_approx(1/x)
+        return OMS_PI_OVER_TWO - atan_approx(1.0f / x);
+    } else if (x < -1.0f) {
+        // -π/2 - atan_approx(1/x)
+        return -OMS_PI_OVER_TWO - atan_approx(1.0f / x);
+    }
+
+    f32 result = x;
+    f32 term = x;
+    for (int32 i = 1; i <= 6; ++i) {
+        term *= -x * x;
+        result += term / (2.0f * i + 1);
+    }
+
+    return result;
+}
+
+inline
+f32 sqrt_approx(f32 a) {
+    ASSERT_SIMPLE(a >= 0);
+
+    int32_t i = *(int32_t*)&a;
+    // Magic number for initial guess
+    i = 0x1FBD1DF5 + (i >> 1);
+    float x = *(float*)&i;
+
+    // Newton-Raphson iterations
+    x = 0.5f * (x + a / x);
+    x = 0.5f * (x + a / x);
+    x = 0.5f * (x + a / x);
+
+    return x;
+}
+
+inline
+f32 rsqrt_approx(f32 a) {
+    ASSERT_SIMPLE(a >= 0);
+
+    // Initial guess using magic number (Quake III hack)
+    f32 x = a;
+    uint32 i = *(uint32 *)&x;
+    i = 0x5F3759DF - (i >> 1); // Magic number for initial guess
+    x = *(f32 *) &i;
+
+    // Newton-Raphson iterations
+    x = x * (1.5f - 0.5f * a * x * x);
+    x = x * (1.5f - 0.5f * a * x * x);
+    x = x * (1.5f - 0.5f * a * x * x);
+
+    return x;
+}
+
+inline
+f32 exp_approx(f32 x) {
+    // Range reduction: e^x = e^(x / n)^n
+    const int32 n = 8;
+    x /= n;
+
+    // Taylor series approximation for e^x
+    f32 result = 1.0f;
+    f32 term = 1.0f;
+    for (int32 i = 1; i <= 10; ++i) {
+        term *= x / i;
+        result += term;
+    }
+
+    // Raise to the nth power
+    f32 final_result = result;
+    for (int32 i = 1; i < n; ++i) {
+        final_result *= result;
+    }
+
+    return final_result;
+}
+
+inline
+f32 log_approx(f32 x) {
+    ASSERT_SIMPLE(x > 0);
+
+    // Polynomial approximation
+    f32 y = (x - 1) / (x + 1);
+    f32 y2 = y * y;
+    f32 result = y * (1.0f + y2 * (1.0f / 3.0f + y2 * (1.0f / 5.0f + y2 * (1.0f / 7.0f))));
+
+    return 2.0f * result;
+}
+
+inline
+f32 pow_approx(f32 a, f32 b) {
+    if (a == 0.0f) {
+        return 0.0f;
+    }
+
+    return exp_approx(b * log_approx(a));
+}
+
+////////////////////////////////////////////////////////////////
+
+inline
+f64 sin_approx(f64 x) {
+    // Normalize x to the range [-π, π] for better accuracy
+    while (x > OMS_PI) {
+        x -= OMS_TWO_PI;
+    }
+
+    while (x < -OMS_PI) {
+        x += OMS_TWO_PI;
+    }
+
+    f64 x2 = x * x;
+    return x * (1.0 + x2 * (-1.0 / 6.0 + x2 * (1.0 / 120.0 + x2 * (-1.0 / 5040.0 + x2 * (1.0 / 362880.0)))));
+}
+
+inline
+f64 cos_approx(f64 x) {
+    return sin_approx(OMS_PI_OVER_TWO - x);
+}
+
+inline
+f64 tan_approx(f64 x) {
+    return sin_approx(x) / cos_approx(x);
+}
+
+inline
+f64 asin_approx(f64 x) {
+    // Undefined for |x| > 1
+    ASSERT_SIMPLE(x >= -1.0 && x <= 1.0);
+
+    f64 result = x;
+    f64 term = x;
+    for (int32 i = 1; i <= 6; ++i) {
+        term *= x * x * (2 * i - 1) * (2 * i - 1) / ((2 * i) * (2 * i + 1));
+        result += term;
+    }
+
+    return result;
+}
+
+inline
+f64 acos_approx(f64 x) {
+    // π/2 - asin_approx(x)
+    return OMS_PI_OVER_TWO - asin_approx(x);
+}
+
+inline
+f64 atan_approx(f64 x) {
+    if (x > 1.0) {
+        // π/2 - atan_approx(1/x)
+        return OMS_PI_OVER_TWO - atan_approx(1.0 / x);
+    } else if (x < -1.0) {
+        // -π/2 - atan_approx(1/x)
+        return -OMS_PI_OVER_TWO - atan_approx(1.0 / x);
+    }
+
+    f64 result = x;
+    f64 term = x;
+    for (int32 i = 1; i <= 6; ++i) {
+        term *= -x * x;
+        result += term / (2 * i + 1);
+    }
+
+    return result;
+}
+
+inline
+f64 sqrt_approx(f64 a) {
+    ASSERT_SIMPLE(a >= 0);
+
+    int64_t i = *(int64_t*)&a;
+    // Magic number for initial guess
+    i = 0x1FF7A3BEA91D9B1B + (i >> 1);
+    f64 x = *(f64*)&i;
+
+    // Newton-Raphson iterations
+    x = 0.5 * (x + a / x);
+    x = 0.5 * (x + a / x);
+    x = 0.5 * (x + a / x);
+
+    return x;
+}
+
+inline
+f64 rsqrt_approx(f64 a) {
+    ASSERT_SIMPLE(a >= 0);
+
+    // Initial guess using magic number (Quake III hack)
+    f64 x = a;
+    uint64 i = *(uint64 *)&x;
+    i = 0x5fe6eb50c7b537a9 - (i >> 1); // Magic number for initial guess
+    x = *(f64 *) &i;
+
+    // Newton-Raphson iterations
+    x = x * (1.5 - 0.5 * a * x * x);
+    x = x * (1.5 - 0.5 * a * x * x);
+    x = x * (1.5 - 0.5 * a * x * x);
+
+    return x;
+}
+
+inline
+f64 exp_approx(f64 x) {
+    // Range reduction: e^x = e^(x / n)^n
+    const int32 n = 8;
+    x /= n;
+
+    // Taylor series approximation for e^x
+    f64 result = 1.0;
+    f64 term = 1.0;
+    for (int32 i = 1; i <= 10; ++i) {
+        term *= x / i;
+        result += term;
+    }
+
+    // Raise to the nth power
+    f64 final_result = 1.0;
+    for (int32 i = 0; i < n; ++i) {
+        final_result *= result;
+    }
+
+    return final_result;
+}
+
+inline
+f64 log_approx(f64 x) {
+    ASSERT_SIMPLE(x > 0);
+
+    // Polynomial approximation
+    f64 y = (x - 1) / (x + 1);
+    f64 y2 = y * y;
+    f64 result = y * (1.0 + y2 * (1.0 / 3.0 + y2 * (1.0 / 5.0 + y2 * (1.0 / 7.0))));
+
+    return 2.0 * result;
+}
+
+inline
+f64 pow_approx(f64 a, f64 b) {
+    if (a == 0.0) {
+        return 0.0;
+    }
+
+    return exp_approx(b * log_approx(a));
+}
+
+#endif