From 17b803a0b693f219f32e1a5425383b6968b59e7b Mon Sep 17 00:00:00 2001 From: Dennis Eichhorn Date: Sun, 9 Mar 2025 18:15:08 +0100 Subject: [PATCH] prepare directx ui, not working yet --- asset/AssetArchive.h | 8 +- asset/AssetManagementSystem.h | 6 +- command/AppCmdBuffer.cpp | 18 +- gpuapi/direct3d/AppCmdBuffer.h | 10 +- gpuapi/direct3d/DirectXUtils.h | 525 +++++++++++++++++++++- gpuapi/direct3d/ShaderUtils.h | 247 ++++++++++- gpuapi/opengl/AppCmdBuffer.h | 4 +- gpuapi/opengl/OpenglUtils.h | 23 +- gpuapi/opengl/ShaderUtils.h | 54 ++- gpuapi/vulkan/AppCmdBuffer.h | 4 +- gpuapi/vulkan/ShaderUtils.h | 24 +- gpuapi/vulkan/VulkanUtils.h | 142 +++--- log/Log.h | 211 +++------ log/PerformanceProfiler.h | 6 +- memory/BufferMemory.h | 3 +- memory/ChunkMemory.h | 7 +- memory/RingMemory.h | 5 +- platform/win32/ExceptionHandler.h | 8 +- platform/win32/threading/Semaphore.h | 6 + platform/win32/threading/Thread.h | 1 + stdlib/HashMap.h | 12 +- stdlib/PerfectHashMap.h | 4 +- tests/MainTest.cpp | 14 +- tests/TestFramework.h | 182 +++++--- tests/math/EvaluatorTest.cpp | 6 +- tests/memory/ChunkMemoryTest.cpp | 19 +- tests/memory/RingMemoryTest.cpp | 14 +- tests/stdlib/HashMapTest.cpp | 6 +- tests/ui/UILayoutTest.cpp | 6 +- tests/ui/UIThemeTest.cpp | 4 +- tests/utils/BitUtilsTest.cpp | 50 +-- tests/utils/EndianUtilsTest.cpp | 24 +- tests/utils/MathUtilsTest.cpp | 624 +++++++++++++++++++++++++++ tests/utils/StringUtilsTest.cpp | 70 +-- tests/utils/UtilsTest.cpp | 44 +- thread/ThreadPool.h | 2 +- ui/UILayout.h | 8 +- utils/MathUtils.h | 319 ++++++++++++++ 38 files changed, 2222 insertions(+), 498 deletions(-) create mode 100644 tests/utils/MathUtilsTest.cpp create mode 100644 utils/MathUtils.h diff --git a/asset/AssetArchive.h b/asset/AssetArchive.h index d77b295..4c792ae 100644 --- a/asset/AssetArchive.h +++ b/asset/AssetArchive.h @@ -133,7 +133,7 @@ void asset_archive_load(AssetArchive* archive, const char* path, BufferMemory* b { PROFILE(PROFILE_ASSET_ARCHIVE_LOAD, path, false, true); - LOG_FORMAT_1( + LOG_1( "Load AssetArchive %s", {{LOG_DATA_CHAR_STR, (void *) path}} ); @@ -174,7 +174,7 @@ void asset_archive_load(AssetArchive* archive, const char* path, BufferMemory* b file_read(archive->fd, &file, 0, file.size); asset_archive_header_load(&archive->header, file.content, steps); - LOG_FORMAT_1( + LOG_1( "Loaded AssetArchive %s with %d assets", {{LOG_DATA_CHAR_STR, (void *) path}, {LOG_DATA_UINT32, (void *) &archive->header.asset_count}} ); @@ -204,7 +204,7 @@ Asset* asset_archive_asset_load(const AssetArchive* archive, int32 id, AssetMana byte component_id = archive->asset_type_map[element->type]; //AssetComponent* ac = &ams->asset_components[component_id]; - LOG_FORMAT_2( + LOG_2( "Load asset %d from archive %d for AMS %d with %n B compressed and %n B uncompressed", {{LOG_DATA_UINT64, &id}, {LOG_DATA_UINT32, &element->type}, {LOG_DATA_BYTE, &component_id}, {LOG_DATA_UINT32, &element->length}, {LOG_DATA_UINT32, &element->uncompressed}} ); @@ -314,7 +314,7 @@ Asset* asset_archive_asset_load(const AssetArchive* archive, int32 id, AssetMana // the main program should still be able to do some work if possible thrd_ams_set_loaded(asset); - LOG_FORMAT_2( + LOG_2( "Loaded asset %d from archive %d for AMS %d with %n B compressed and %n B uncompressed", {{LOG_DATA_UINT64, &id}, {LOG_DATA_UINT32, &element->type}, {LOG_DATA_BYTE, &component_id}, {LOG_DATA_UINT32, &element->length}, {LOG_DATA_UINT32, &element->uncompressed}} ); diff --git a/asset/AssetManagementSystem.h b/asset/AssetManagementSystem.h index df159d4..3fabeca 100644 --- a/asset/AssetManagementSystem.h +++ b/asset/AssetManagementSystem.h @@ -42,7 +42,7 @@ struct AssetManagementSystem { inline void ams_create(AssetManagementSystem* ams, BufferMemory* buf, int32 asset_component_count, int32 count) { - LOG_FORMAT_1("Create AMS for %n assets", {{LOG_DATA_INT32, &count}}); + LOG_1("Create AMS for %n assets", {{LOG_DATA_INT32, &count}}); hashmap_create(&ams->hash_map, count, sizeof(HashEntry) + sizeof(Asset), buf); ams->asset_component_count = asset_component_count; ams->asset_components = (AssetComponent *) buffer_get_memory(buf, asset_component_count * sizeof(AssetComponent), 64, true); @@ -52,7 +52,7 @@ inline void ams_component_create(AssetComponent* ac, BufferMemory* buf, int32 chunk_size, int32 count) { ASSERT_SIMPLE(chunk_size); - LOG_FORMAT_1("Create AMS Component for %n assets and %n B", {{LOG_DATA_INT32, &count}, {LOG_DATA_UINT32, &chunk_size}}); + LOG_1("Create AMS Component for %n assets and %n B", {{LOG_DATA_INT32, &count}, {LOG_DATA_UINT32, &chunk_size}}); chunk_init(&ac->asset_memory, buf, count, chunk_size, 64); pthread_mutex_init(&ac->mutex, NULL); @@ -62,7 +62,7 @@ inline void ams_component_create(AssetComponent* ac, byte* buf, int32 chunk_size, int32 count) { ASSERT_SIMPLE(chunk_size); - LOG_FORMAT_1("Create AMS Component for %n assets and %n B", {{LOG_DATA_INT32, &count}, {LOG_DATA_UINT32, &chunk_size}}); + LOG_1("Create AMS Component for %n assets and %n B", {{LOG_DATA_INT32, &count}, {LOG_DATA_UINT32, &chunk_size}}); ac->asset_memory.count = count; ac->asset_memory.chunk_size = chunk_size; diff --git a/command/AppCmdBuffer.cpp b/command/AppCmdBuffer.cpp index 43d2424..9e932f8 100644 --- a/command/AppCmdBuffer.cpp +++ b/command/AppCmdBuffer.cpp @@ -40,7 +40,7 @@ void cmd_buffer_create(AppCmdBuffer* cb, BufferMemory* buf, int32 commands_count chunk_init(&cb->commands, buf, commands_count, sizeof(Command), 64); pthread_mutex_init(&cb->mutex, NULL); - LOG_FORMAT_1("Created AppCmdBuffer: %n B", {{LOG_DATA_UINT64, &cb->commands.size}}); + LOG_1("Created AppCmdBuffer: %n B", {{LOG_DATA_UINT64, &cb->commands.size}}); } // This doesn't load the asset directly but tells (most likely) a worker thread to load an asset @@ -356,7 +356,7 @@ inline void* cmd_func_run(AppCmdBuffer*, CommandFunction func) { } inline Asset* cmd_texture_load_sync(AppCmdBuffer* cb, int32 asset_id) { - LOG_FORMAT_1("Load texture %d", {{LOG_DATA_INT32, &asset_id}}); + LOG_1("Load texture %d", {{LOG_DATA_INT32, &asset_id}}); // Check if asset already loaded char id_str[9]; @@ -385,7 +385,7 @@ inline Asset* cmd_texture_load_sync(AppCmdBuffer* cb, int32 asset_id) { } inline Asset* cmd_texture_load_sync(AppCmdBuffer* cb, const char* name) { - LOG_FORMAT_1("Load texture %d", {{LOG_DATA_CHAR_STR, (void *) name}}); + LOG_1("Load texture %d", {{LOG_DATA_CHAR_STR, (void *) name}}); PROFILE(PROFILE_CMD_ASSET_LOAD_SYNC, name, false, true); // Check if asset already loaded @@ -413,7 +413,7 @@ inline Asset* cmd_texture_load_sync(AppCmdBuffer* cb, const char* name) { inline Asset* cmd_font_load_sync(AppCmdBuffer* cb, int32 asset_id) { - LOG_FORMAT_1("Load font %d", {{LOG_DATA_INT32, &asset_id}}); + LOG_1("Load font %d", {{LOG_DATA_INT32, &asset_id}}); // Check if asset already loaded char id_str[9]; @@ -442,7 +442,7 @@ inline Asset* cmd_font_load_sync(AppCmdBuffer* cb, int32 asset_id) inline Asset* cmd_font_load_sync(AppCmdBuffer* cb, const char* name) { - LOG_FORMAT_1("Load font %s", {{LOG_DATA_CHAR_STR, (void *) name}}); + LOG_1("Load font %s", {{LOG_DATA_CHAR_STR, (void *) name}}); PROFILE(PROFILE_CMD_FONT_LOAD_SYNC, name, false, true); // Check if asset already loaded @@ -472,13 +472,13 @@ UILayout* cmd_layout_load_sync( UILayout* __restrict layout, const char* __restrict layout_path ) { PROFILE(PROFILE_CMD_LAYOUT_LOAD_SYNC, layout_path, false, true); - LOG_FORMAT_1("Load layout %s", {{LOG_DATA_CHAR_STR, (void *) layout_path}}); + LOG_1("Load layout %s", {{LOG_DATA_CHAR_STR, (void *) layout_path}}); FileBody layout_file = {}; file_read(layout_path, &layout_file, cb->mem_vol); if (!layout_file.content) { - LOG_FORMAT_1("Failed loading layout \"%s\"", {{LOG_DATA_CHAR_STR, (void *) layout_path}}); + LOG_1("Failed loading layout \"%s\"", {{LOG_DATA_CHAR_STR, (void *) layout_path}}); return NULL; } @@ -493,7 +493,7 @@ UIThemeStyle* cmd_theme_load_sync( UIThemeStyle* __restrict theme, const char* __restrict theme_path ) { PROFILE(PROFILE_CMD_THEME_LOAD_SYNC, theme_path, false, true); - LOG_FORMAT_1("Load theme %s", {{LOG_DATA_CHAR_STR, (void *) theme_path}}); + LOG_1("Load theme %s", {{LOG_DATA_CHAR_STR, (void *) theme_path}}); FileBody theme_file = {}; file_read(theme_path, &theme_file, cb->mem_vol); @@ -519,7 +519,7 @@ UILayout* cmd_ui_load_sync( const Camera* __restrict camera ) { PROFILE(PROFILE_CMD_UI_LOAD_SYNC, layout_path, false, true); - LOG_FORMAT_1("Load ui with layout %s and theme %s", {{LOG_DATA_CHAR_STR, (void *) layout_path}, {LOG_DATA_CHAR_STR, (void *) theme_path}}); + LOG_1("Load ui with layout %s and theme %s", {{LOG_DATA_CHAR_STR, (void *) layout_path}, {LOG_DATA_CHAR_STR, (void *) theme_path}}); if (!cmd_layout_load_sync(cb, layout, layout_path)) { // We have to make sure that at least the font is set diff --git a/gpuapi/direct3d/AppCmdBuffer.h b/gpuapi/direct3d/AppCmdBuffer.h index 7adb5f2..ada822a 100644 --- a/gpuapi/direct3d/AppCmdBuffer.h +++ b/gpuapi/direct3d/AppCmdBuffer.h @@ -27,7 +27,8 @@ void* cmd_shader_load(AppCmdBuffer*, Command*) { void* cmd_shader_load_sync( AppCmdBuffer* __restrict cb, Shader* __restrict shader, const int32* __restrict shader_ids, - ID3D12Device* __restrict device, ID3D12PipelineState** __restrict pipeline, ID3D12RootSignature* __restrict pipeline_layout + ID3D12Device* __restrict device, ID3D12PipelineState** __restrict pipeline, ID3D12RootSignature* __restrict pipeline_layout, + D3D12_INPUT_ELEMENT_DESC* __restrict descriptor_set_layouts, int32 layout_count ) { PROFILE(PROFILE_CMD_SHADER_LOAD_SYNC, NULL, false, true); char asset_id[9]; @@ -53,7 +54,7 @@ void* cmd_shader_load_sync( } // Make sub shader - shader_assets[i] = shader_make( + shader_assets[i] = gpuapi_shader_make( shader_type_index((ShaderType) (i + 1)), (char *) shader_asset->self, shader_asset->ram_size @@ -64,11 +65,14 @@ void* cmd_shader_load_sync( } // Make shader/program - shader->id = pipeline_make( + shader->id = gpuapi_pipeline_make( device, pipeline, pipeline_layout, + descriptor_set_layouts, layout_count, shader_assets[0], shader_assets[1], shader_assets[2] ); + // @question do I release shader_assets[..]? + return NULL; } diff --git a/gpuapi/direct3d/DirectXUtils.h b/gpuapi/direct3d/DirectXUtils.h index be7c65e..6717aa4 100644 --- a/gpuapi/direct3d/DirectXUtils.h +++ b/gpuapi/direct3d/DirectXUtils.h @@ -15,11 +15,15 @@ #include #include #include "../../../GameEngine/log/Log.h" +#include "../../../GameEngine/memory/RingMemory.h" +#include "../../../GameEngine/object/Texture.h" +#include "../../../GameEngine/image/Image.cpp" +#include "../../compiler/CompilerUtils.h" // #include "../../../EngineDependencies/directx/d3d12.h" // #include "../../../EngineDependencies/directx/d3dx12.h" #include "FramesInFlightContainer.h" -// A more (compile-time) efficient version of the windows macro IID_PPV_ARGS +// Replacement for the windows macro IID_PPVOID #define IID_PPVOID(pointer) __uuidof(**(pointer)), (void **) (pointer) bool is_directx_supported(D3D_FEATURE_LEVEL version) @@ -89,6 +93,22 @@ int32 max_directx_version() return 0; } +inline +void change_viewport( + int32 width, int32 height, + ID3D12GraphicsCommandList* command_buffer, D3D12_VIEWPORT* viewport, D3D12_RECT* scissor_rect +) +{ + viewport->Width = (f32) width; + viewport->Height = (f32) height; + + scissor_rect->right = width; + scissor_rect->bottom = height; + + command_buffer->RSSetViewports(1, viewport); + command_buffer->RSSetScissorRects(1, scissor_rect); +} + // Returns frame index int32 wait_for_previous_frame( FramesInFlightContainer* frames_in_flight, @@ -100,11 +120,13 @@ int32 wait_for_previous_frame( // sample illustrates how to use fences for efficient resource usage and to // maximize GPU utilization. - UINT64 fence_value_temp = frames_in_flight->fence_value; + uint64 fence_value_temp = frames_in_flight->fence_value; + + HRESULT hr; // Signal and increment the fence value. - if(FAILED(graphics_queue->Signal(frames_in_flight->fence, fence_value_temp))) { - LOG_1("DirectX12 Signal"); + if(FAILED(hr = graphics_queue->Signal(frames_in_flight->fence, fence_value_temp))) { + LOG_1("DirectX12 Signal: %d", {{LOG_DATA_INT32, &hr}}); ASSERT_SIMPLE(false); } @@ -112,8 +134,8 @@ int32 wait_for_previous_frame( // Wait until the previous frame is finished. if (frames_in_flight->fence->GetCompletedValue() < fence_value_temp) { - if (FAILED(frames_in_flight->fence->SetEventOnCompletion(fence_value_temp, frames_in_flight->fence_event))) { - LOG_1("DirectX12 SetEventOnCompletion"); + if (FAILED(hr = frames_in_flight->fence->SetEventOnCompletion(fence_value_temp, frames_in_flight->fence_event))) { + LOG_1("DirectX12 SetEventOnCompletion: %d", {{LOG_DATA_INT32, &hr}}); ASSERT_SIMPLE(false); } @@ -170,11 +192,496 @@ void gpuapi_debug_messenger_setup(ID3D12Device* device) } inline -void gpuapi_create_logical_device(ID3D12Device** device) { - if (FAILED(D3D12CreateDevice(NULL, D3D_FEATURE_LEVEL_11_0, IID_PPVOID(device)))) { - LOG_1("DirectX12 D3D12CreateDevice"); +void gpuapi_pick_physical_device(IDXGIFactory6* instance, IDXGIAdapter1** physical_device, bool requestHighPerformanceAdapter = true) +{ + IDXGIAdapter1* adapter = NULL; + IDXGIFactory6* factory6 = NULL; + + if (SUCCEEDED(instance->QueryInterface(IID_PPVOID(&factory6)))) { + for (uint32 adapterIndex = 0; + SUCCEEDED(factory6->EnumAdapterByGpuPreference( + adapterIndex, + requestHighPerformanceAdapter == true ? DXGI_GPU_PREFERENCE_HIGH_PERFORMANCE : DXGI_GPU_PREFERENCE_UNSPECIFIED, + IID_PPVOID(&adapter)) + ); + ++adapterIndex + ) { + DXGI_ADAPTER_DESC1 desc; + adapter->GetDesc1(&desc); + + if (desc.Flags & DXGI_ADAPTER_FLAG_SOFTWARE) { + // Don't select the Basic Render Driver adapter. + // If you want a software adapter, pass in "/warp" on the command line. + continue; + } + + // Check to see whether the adapter supports Direct3D 12, but don't create the actual device yet. + if (SUCCEEDED(D3D12CreateDevice(adapter, D3D_FEATURE_LEVEL_11_0, _uuidof(ID3D12Device), NULL))) { + break; + } + } + } + + if(!adapter) { + for (uint32 adapterIndex = 0; SUCCEEDED(instance->EnumAdapters1(adapterIndex, &adapter)); ++adapterIndex) { + DXGI_ADAPTER_DESC1 desc; + adapter->GetDesc1(&desc); + + if (desc.Flags & DXGI_ADAPTER_FLAG_SOFTWARE) { + // Don't select the Basic Render Driver adapter. + continue; + } + + // Check to see whether the adapter supports Direct3D 12, but don't create the actual device yet. + if (SUCCEEDED(D3D12CreateDevice(adapter, D3D_FEATURE_LEVEL_11_0, _uuidof(ID3D12Device), NULL))) { + break; + } + } + } + + *physical_device = adapter; + if (factory6) { + factory6->Release(); + } +} + +inline +void gpuapi_create_logical_device(IDXGIAdapter1* physical_device, ID3D12Device** device) +{ + HRESULT hr; + if (FAILED(hr = D3D12CreateDevice(physical_device, D3D_FEATURE_LEVEL_11_0, IID_PPVOID(device)))) { + LOG_1("DirectX12 D3D12CreateDevice: %d", {{LOG_DATA_INT32, &hr}}); ASSERT_SIMPLE(false); } } +inline +void gpuapi_command_buffer_create( + ID3D12Device* device, + ID3D12CommandAllocator* command_pool, + ID3D12PipelineState* pipeline, + ID3D12GraphicsCommandList** command_buffer +) +{ + HRESULT hr; + if (FAILED(hr = device->CreateCommandList( + 0, D3D12_COMMAND_LIST_TYPE_DIRECT, + command_pool, pipeline, + IID_PPVOID(command_buffer))) + ) { + LOG_1("DirectX12 CreateCommandList: %d", {{LOG_DATA_INT32, &hr}}); + ASSERT_SIMPLE(false); + }; +} + +static +DXGI_FORMAT gpuapi_texture_format(byte settings) +{ + if ((settings & IMAGE_SETTING_CHANNEL_4_SIZE)) { + switch (settings & IMAGE_SETTING_CHANNEL_COUNT) { + case 1: + return DXGI_FORMAT_R32_FLOAT; + case 2: + return DXGI_FORMAT_R32G32_FLOAT; + case 3: + return DXGI_FORMAT_R32G32B32_FLOAT; + case 4: + return DXGI_FORMAT_R32G32B32A32_FLOAT; + default: + UNREACHABLE(); + } + } else { + switch (settings & IMAGE_SETTING_CHANNEL_COUNT) { + case 1: + return DXGI_FORMAT_R8_UNORM; + case 2: + return DXGI_FORMAT_R8G8_UNORM; + case 3: + // RGB is not supported (probably due to the alignment + return DXGI_FORMAT_R8G8B8A8_UNORM; + case 4: + return DXGI_FORMAT_R8G8B8A8_UNORM; + default: + UNREACHABLE(); + } + } +} + +// @performance Sometimes we want to upload multiple textures in one go (more performant). Allow that or don't use this function in that case. +D3D12_CPU_DESCRIPTOR_HANDLE load_texture_to_gpu( + ID3D12Device* device, + ID3D12GraphicsCommandList* command_buffer, + ID3D12Resource** texture_resource, + int32 descriptorOffset, + ID3D12DescriptorHeap* srv_heap, + const Texture* texture, + RingMemory* ring +) { + DXGI_FORMAT textureFormat = gpuapi_texture_format(texture->image.image_settings); + + D3D12_RESOURCE_DESC textureDesc = {}; + textureDesc.MipLevels = 1; + textureDesc.Format = textureFormat; + textureDesc.Width = texture->image.width; + textureDesc.Height = texture->image.height; + textureDesc.Flags = D3D12_RESOURCE_FLAG_NONE; + textureDesc.DepthOrArraySize = 1; + textureDesc.SampleDesc.Count = 1; + textureDesc.SampleDesc.Quality = 0; + textureDesc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D; + + D3D12_HEAP_PROPERTIES texture_heap_property = { + .Type = D3D12_HEAP_TYPE_DEFAULT, + .CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN, + .MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN, + .CreationNodeMask = 1, + .VisibleNodeMask = 1 + }; + + HRESULT hr; + if (FAILED(hr = device->CreateCommittedResource( + &texture_heap_property, + D3D12_HEAP_FLAG_NONE, + &textureDesc, + D3D12_RESOURCE_STATE_COPY_DEST, + NULL, + IID_PPVOID(texture_resource))) + ) { + LOG_1("DirectX12 CreateCommittedResource: %d", {{LOG_DATA_INT32, &hr}}); + ASSERT_SIMPLE(false); + + return {0}; + } + + const D3D12_RESOURCE_DESC DestinationDesc = (*texture_resource)->GetDesc(); + uint64 uploadBufferSize = 0; + ID3D12Device* pDevice = NULL; + (*texture_resource)->GetDevice(IID_PPVOID(&pDevice)); + pDevice->GetCopyableFootprints(&DestinationDesc, 0, 1, 0, NULL, NULL, NULL, &uploadBufferSize); + + D3D12_RESOURCE_DESC texture_upload_buffer = { + .Dimension = D3D12_RESOURCE_DIMENSION_BUFFER, + .Alignment = 0, + .Width = uploadBufferSize, + .Height = 1, + .DepthOrArraySize = 1, + .MipLevels = 1, + .Format = DXGI_FORMAT_UNKNOWN, + .SampleDesc = { + .Count = 1, + .Quality = 0, + }, + .Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR, + .Flags = D3D12_RESOURCE_FLAG_NONE + }; + + D3D12_HEAP_PROPERTIES texture_upload_heap_property = { + .Type = D3D12_HEAP_TYPE_UPLOAD, + .CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN, + .MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN, + .CreationNodeMask = 1, + .VisibleNodeMask = 1 + }; + + ID3D12Resource* texture_upload_heap; + if (FAILED(hr = device->CreateCommittedResource( + &texture_heap_property, + D3D12_HEAP_FLAG_NONE, + &texture_upload_buffer, + D3D12_RESOURCE_STATE_GENERIC_READ, + NULL, + IID_PPVOID(&texture_upload_heap))) + ) { + if (pDevice) { + pDevice->Release(); + } + + LOG_1("DirectX12 CreateCommittedResource: %d", {{LOG_DATA_INT32, &hr}}); + ASSERT_SIMPLE(false); + + return {0}; + } + + int32 pixel_size = image_pixel_size_from_type(texture->image.image_settings); + D3D12_SUBRESOURCE_DATA textureData[] = { + { + .pData = texture->image.pixels, + .RowPitch = texture->image.width * pixel_size, + .SlicePitch = (texture->image.width * pixel_size) * texture->image.height, + } + }; + + uint32 number_of_resources = ARRAY_COUNT(textureData); + uint32 FirstSubresource = 0; + uint64 IntermediateOffset = 0; + uint64 RequiredSize = 0; + uint64 MemToAlloc = (uint64) (sizeof(D3D12_PLACED_SUBRESOURCE_FOOTPRINT) + sizeof(uint32) + sizeof(uint64)) * number_of_resources; + + D3D12_PLACED_SUBRESOURCE_FOOTPRINT* pLayouts = (D3D12_PLACED_SUBRESOURCE_FOOTPRINT *) ring_get_memory(ring, MemToAlloc, 64); + uint64* pRowSizesInBytes = (uint64 *) (pLayouts + number_of_resources); + uint32* pNumRows = (uint32 *) (pRowSizesInBytes + number_of_resources); + + pDevice->GetCopyableFootprints(&DestinationDesc, FirstSubresource, number_of_resources, IntermediateOffset, pLayouts, pNumRows, pRowSizesInBytes, &RequiredSize); + pDevice->Release(); + + const D3D12_RESOURCE_DESC IntermediateDesc = texture_upload_heap->GetDesc(); + if (IntermediateDesc.Dimension != D3D12_RESOURCE_DIMENSION_BUFFER + || IntermediateDesc.Width < RequiredSize + pLayouts[0].Offset + || RequiredSize > ((size_t) -1) + || (DestinationDesc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER && + (FirstSubresource != 0 || number_of_resources != 1) + ) + ) { + if (texture_upload_heap) { + texture_upload_heap->Release(); + } + + LOG_1("DirectX12 texture resource setup"); + ASSERT_SIMPLE(false); + + return {0}; + } + + byte* pData; + if (FAILED(hr = texture_upload_heap->Map(0, NULL, (void **) &pData))) { + if (texture_upload_heap) { + texture_upload_heap->Release(); + } + + LOG_1("DirectX12 Map: %d", {{LOG_DATA_INT32, &hr}}); + ASSERT_SIMPLE(false); + + return {0}; + } + + for (uint32 i = 0; i < number_of_resources; ++i) { + ASSERT_SIMPLE(pRowSizesInBytes[i] <= ((size_t) -1)); + + D3D12_MEMCPY_DEST DestData = { pData + pLayouts[i].Offset, pLayouts[i].Footprint.RowPitch, ((size_t) pLayouts[i].Footprint.RowPitch) * ((size_t) pNumRows[i]) }; + for (uint32 z = 0; z < pLayouts[i].Footprint.Depth; ++z) { + byte* pDestSlice = ((byte *) DestData.pData) + DestData.SlicePitch * z; + byte* pSrcSlice = ((byte *) textureData[i].pData) + textureData[i].SlicePitch * ((intptr_t) z); + for (uint32 y = 0; y < pNumRows[i]; ++y) { + memcpy( + pDestSlice + DestData.RowPitch * y, + pSrcSlice + textureData[i].RowPitch * ((intptr_t) y), + (size_t) pRowSizesInBytes[i] + ); + } + } + } + texture_upload_heap->Unmap(0, NULL); + + if (DestinationDesc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER) { + command_buffer->CopyBufferRegion( + *texture_resource, 0, texture_upload_heap, pLayouts[0].Offset, pLayouts[0].Footprint.Width + ); + } else { + for (uint32 i = 0; i < number_of_resources; ++i) { + D3D12_TEXTURE_COPY_LOCATION Dst = { + .pResource = *texture_resource, + .Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX, + .SubresourceIndex = i + FirstSubresource, + }; + + D3D12_TEXTURE_COPY_LOCATION Src = { + .pResource = texture_upload_heap, + .Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT, + .PlacedFootprint = pLayouts[i], + }; + + command_buffer->CopyTextureRegion(&Dst, 0, 0, 0, &Src, NULL); + } + } + + D3D12_RESOURCE_BARRIER barrier = { + .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, + .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE, + .Transition = { + .pResource = *texture_resource, + .Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES, + .StateBefore = D3D12_RESOURCE_STATE_COPY_DEST, + .StateAfter = D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE, + } + }; + command_buffer->ResourceBarrier(1, &barrier); + + D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; + srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + srvDesc.Format = textureDesc.Format; + srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; + srvDesc.Texture2D.MipLevels = 1; + + D3D12_CPU_DESCRIPTOR_HANDLE srv_handle = srv_heap->GetCPUDescriptorHandleForHeapStart(); + device->CreateShaderResourceView(*texture_resource, &srvDesc, srv_handle); + + if (texture_upload_heap) { + texture_upload_heap->Release(); + } + + srv_handle.ptr += descriptorOffset * device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + + return srv_handle; +} + +void gpuapi_vertex_buffer_create( + ID3D12Device* device, + D3D12_VERTEX_BUFFER_VIEW* vertex_buffer_view, + ID3D12Resource** vertex_buffer, + const void* __restrict vertices, uint32 vertex_size, uint32 vertex_count +) +{ + D3D12_RESOURCE_DESC resource_info = { + .Dimension = D3D12_RESOURCE_DIMENSION_BUFFER, + .Alignment = 0, + .Width = vertex_size * vertex_count, + .Height = 1, + .DepthOrArraySize = 1, + .MipLevels = 1, + .Format = DXGI_FORMAT_UNKNOWN, + .SampleDesc = { + .Count = 1, + .Quality = 0 + }, + .Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR, + .Flags = D3D12_RESOURCE_FLAG_NONE + }; + + // Note: using upload heaps to transfer static data like vert buffers is not + // recommended. Every time the GPU needs it, the upload heap will be marshalled + // over. Please read up on Default Heap usage. An upload heap is used here for + // code simplicity and because there are very few verts to actually transfer. + D3D12_HEAP_PROPERTIES heap_property = { + .Type = D3D12_HEAP_TYPE_UPLOAD, + .CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN, + .MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN, + .CreationNodeMask = 1, + .VisibleNodeMask = 1 + }; + + HRESULT hr; + if (FAILED(hr = device->CreateCommittedResource( + &heap_property, + D3D12_HEAP_FLAG_NONE, + &resource_info, + D3D12_RESOURCE_STATE_GENERIC_READ, + NULL, + IID_PPVOID(vertex_buffer))) + ) { + LOG_1("DirectX12 CreateCommittedResource: %d", {{LOG_DATA_INT32, &hr}}); + ASSERT_SIMPLE(false); + + return; + } + + // Copy the triangle data to the vertex buffer + uint8* pVertexDataBegin; + // We do not intend to read from this resource on the CPU + D3D12_RANGE readRange = {}; + if (FAILED(hr = (*vertex_buffer)->Map(0, &readRange, (void **) &pVertexDataBegin))) { + LOG_1("DirectX12 Map: %d", {{LOG_DATA_INT32, &hr}}); + ASSERT_SIMPLE(false); + } + + memcpy(pVertexDataBegin, vertices, vertex_size * vertex_count); + (*vertex_buffer)->Unmap(0, NULL); + + // Initialize the vertex buffer view + vertex_buffer_view->BufferLocation = (*vertex_buffer)->GetGPUVirtualAddress(); + vertex_buffer_view->StrideInBytes = vertex_size; + vertex_buffer_view->SizeInBytes = vertex_size * vertex_count; +} + +void gpuapi_vertex_buffer_update( + ID3D12Resource* vertex_buffer, + const void* __restrict vertices, + uint32 vertex_size, + uint32 vertex_count, + uint32 offset = 0 +) +{ + uint64 size = vertex_count * vertex_size; + + uint8* pVertexDataBegin; + D3D12_RANGE readRange = {}; + D3D12_RANGE writeRange = { offset, offset + size }; + + HRESULT hr; + if (FAILED(hr = vertex_buffer->Map(0, &readRange, (void**)&pVertexDataBegin))) { + LOG_1("DirectX12 Map: %d", {{LOG_DATA_INT32, &hr}}); + ASSERT_SIMPLE(false); + return; + } + + memcpy(pVertexDataBegin + offset, vertices, size); + + vertex_buffer->Unmap(0, &writeRange); +} + +// In directx this is actually called a constant buffer +void gpuapi_uniform_buffers_create( + ID3D12Device* device, + ID3D12Resource** uniform_buffer, + const void* __restrict data, uint32 buffer_size +) +{ + D3D12_RESOURCE_DESC resource_info = { + .Dimension = D3D12_RESOURCE_DIMENSION_BUFFER, + .Alignment = 0, + .Width = buffer_size, + .Height = 1, + .DepthOrArraySize = 1, + .MipLevels = 1, + .Format = DXGI_FORMAT_UNKNOWN, + .SampleDesc = { + .Count = 1, + .Quality = 0 + }, + .Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR, + .Flags = D3D12_RESOURCE_FLAG_NONE + }; + + // Note: using upload heaps to transfer static data like vert buffers is not + // recommended. Every time the GPU needs it, the upload heap will be marshalled + // over. Please read up on Default Heap usage. An upload heap is used here for + // code simplicity and because there are very few verts to actually transfer. + D3D12_HEAP_PROPERTIES heap_property = { + .Type = D3D12_HEAP_TYPE_UPLOAD, + .CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN, + .MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN, + .CreationNodeMask = 1, + .VisibleNodeMask = 1 + }; + + device->CreateCommittedResource( + &heap_property, + D3D12_HEAP_FLAG_NONE, + &resource_info, + D3D12_RESOURCE_STATE_GENERIC_READ, + NULL, + IID_PPV_ARGS(uniform_buffer)); + + D3D12_RANGE readRange = {}; + + uint8* pCBDataBegin; + (*uniform_buffer)->Map(0, &readRange, (void **) &pCBDataBegin); + memcpy(pCBDataBegin, &data, buffer_size); + (*uniform_buffer)->Unmap(0, NULL); +} + +void gpuapi_uniform_buffer_update( + ID3D12Resource* uniform_buffer, + const void* __restrict data, + uint32 buffer_size +) +{ + D3D12_RANGE readRange = {}; + uint8* pCBDataBegin = nullptr; + uniform_buffer->Map(0, &readRange, (void **) &pCBDataBegin); + + memcpy(pCBDataBegin, data, buffer_size); + + uniform_buffer->Unmap(0, nullptr); +} + #endif \ No newline at end of file diff --git a/gpuapi/direct3d/ShaderUtils.h b/gpuapi/direct3d/ShaderUtils.h index 83755fc..5a83471 100644 --- a/gpuapi/direct3d/ShaderUtils.h +++ b/gpuapi/direct3d/ShaderUtils.h @@ -17,7 +17,13 @@ #include "../../stdlib/Types.h" #include "../../memory/RingMemory.h" #include "../../log/Log.h" +#include "../../log/Stats.h" +#include "../../log/PerformanceProfiler.h" +#include "../../object/Vertex.h" +#include "../../utils/StringUtils.h" +#include "../../log/Log.h" #include "../ShaderType.h" +#include "../GpuAttributeType.h" #pragma comment(lib, "d3dcompiler.lib") @@ -33,7 +39,7 @@ const char* shader_type_index(ShaderType type) } } -ID3DBlob* shader_make(const char* type, const char* source, int32 source_size) +ID3DBlob* gpuapi_shader_make(const char* type, const char* source, int32 source_size) { LOG_1("Create shader"); #if DEBUG || INTERNAL @@ -44,8 +50,10 @@ ID3DBlob* shader_make(const char* type, const char* source, int32 source_size) ID3DBlob* blob; ID3DBlob* errMsgs; - if (FAILED(D3DCompile2(source, source_size, NULL, NULL, NULL, "main", type, compileFlags, 0, 0, NULL, 0, &blob, &errMsgs))) { - LOG_1("DirectX12 D3DCompile2"); + HRESULT hr; + + if (FAILED(hr = D3DCompile2(source, source_size, NULL, NULL, NULL, "main", type, compileFlags, 0, 0, NULL, 0, &blob, &errMsgs))) { + LOG_1("DirectX12 D3DCompile2: %d, %s", {{LOG_DATA_INT32, &hr}, {LOG_DATA_CHAR_STR, errMsgs->GetBufferPointer()}}); ASSERT_SIMPLE(false); } @@ -58,24 +66,21 @@ ID3DBlob* shader_make(const char* type, const char* source, int32 source_size) return blob; } -ID3D12PipelineState* pipeline_make( + +ID3D12PipelineState* gpuapi_pipeline_make( ID3D12Device* device, ID3D12PipelineState** pipeline, ID3D12RootSignature* pipeline_layout, + D3D12_INPUT_ELEMENT_DESC* descriptor_set_layouts, uint32 layout_count, ID3DBlob* vertex_shader, ID3DBlob* fragment_shader, ID3DBlob* ) { PROFILE(PROFILE_PIPELINE_MAKE, NULL, false, true); LOG_1("Create pipeline"); - // @todo We need to find a way to do this somewhere else: - D3D12_INPUT_ELEMENT_DESC input_element_info[] = { - { "POSITION", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, 0, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0 }, - { "COLOR", 0, DXGI_FORMAT_R32G32B32A32_FLOAT, 0, 12, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0 } - }; D3D12_GRAPHICS_PIPELINE_STATE_DESC pipeline_state_info = {}; - pipeline_state_info.InputLayout = { input_element_info, _countof(input_element_info) }; + pipeline_state_info.InputLayout = { descriptor_set_layouts, layout_count }; pipeline_state_info.pRootSignature = pipeline_layout; pipeline_state_info.VS = { .pShaderBytecode = vertex_shader->GetBufferPointer(), @@ -122,8 +127,9 @@ ID3D12PipelineState* pipeline_make( pipeline_state_info.RTVFormats[0] = DXGI_FORMAT_R8G8B8A8_UNORM; pipeline_state_info.SampleDesc.Count = 1; - if (FAILED(device->CreateGraphicsPipelineState(&pipeline_state_info, IID_PPV_ARGS(pipeline)))) { - LOG_1("DirectX12 CreateGraphicsPipelineState"); + HRESULT hr; + if (FAILED(hr = device->CreateGraphicsPipelineState(&pipeline_state_info, IID_PPV_ARGS(pipeline)))) { + LOG_1("DirectX12 CreateGraphicsPipelineState: %d", {{LOG_DATA_INT32, &hr}}); ASSERT_SIMPLE(false); } @@ -133,10 +139,223 @@ ID3D12PipelineState* pipeline_make( return *pipeline; } -inline -void pipeline_use(ID3D12GraphicsCommandList* command_buffer, ID3D12PipelineState* pipelineState) +FORCE_INLINE +void gpuapi_pipeline_use(ID3D12GraphicsCommandList* command_buffer, ID3D12PipelineState* pipelineState) { command_buffer->SetPipelineState(pipelineState); } +// In DirectX Attribute info and descriptor set layout are combined into one +constexpr +void gpuapi_attribute_info_create(GpuAttributeType type, D3D12_INPUT_ELEMENT_DESC* attr) +{ + switch (type) { + case GPU_ATTRIBUTE_TYPE_VERTEX_3D: { + attr[0] = { + .SemanticIndex = 0, + .Format = DXGI_FORMAT_R32G32B32_FLOAT, + .AlignedByteOffset = offsetof(Vertex3D, position), + .InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, + .InstanceDataStepRate = 0, + }; + + attr[1] = { + .SemanticIndex = 1, + .Format = DXGI_FORMAT_R32G32B32_FLOAT, + .AlignedByteOffset = offsetof(Vertex3D, normal), + .InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, + .InstanceDataStepRate = 0, + }; + + attr[2] = { + .SemanticIndex = 2, + .Format = DXGI_FORMAT_R32G32_FLOAT, + .AlignedByteOffset = offsetof(Vertex3D, tex_coord), + .InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, + .InstanceDataStepRate = 0, + }; + + attr[3] = { + .SemanticIndex = 3, + .Format = DXGI_FORMAT_R32G32B32A32_FLOAT, + .AlignedByteOffset = offsetof(Vertex3D, color), + .InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, + .InstanceDataStepRate = 0, + }; + } return; + case GPU_ATTRIBUTE_TYPE_VERTEX_3D_NORMAL: { + attr[0] = { + .SemanticIndex = 0, + .Format = DXGI_FORMAT_R32G32B32_FLOAT, + .AlignedByteOffset = offsetof(Vertex3DNormal, position), + .InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, + .InstanceDataStepRate = 0, + }; + + attr[1] = { + .SemanticIndex = 1, + .Format = DXGI_FORMAT_R32G32B32_FLOAT, + .AlignedByteOffset = offsetof(Vertex3DNormal, normal), + .InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, + .InstanceDataStepRate = 0, + }; + } return; + case GPU_ATTRIBUTE_TYPE_VERTEX_3D_COLOR: { + attr[0] = { + .SemanticIndex = 0, + .Format = DXGI_FORMAT_R32G32B32_FLOAT, + .AlignedByteOffset = offsetof(Vertex3DColor, position), + .InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, + .InstanceDataStepRate = 0, + }; + + attr[1] = { + .SemanticIndex = 1, + .Format = DXGI_FORMAT_R32G32B32A32_FLOAT, + .AlignedByteOffset = offsetof(Vertex3DColor, color), + .InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, + .InstanceDataStepRate = 0, + }; + } return; + case GPU_ATTRIBUTE_TYPE_VERTEX_3D_TEXTURE_COLOR: { + attr[0] = { + .SemanticIndex = 0, + .Format = DXGI_FORMAT_R32G32B32_FLOAT, + .AlignedByteOffset = offsetof(Vertex3DTextureColor, position), + .InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, + .InstanceDataStepRate = 0, + }; + + attr[1] = { + .SemanticIndex = 1, + .Format = DXGI_FORMAT_R32G32_FLOAT, + .AlignedByteOffset = offsetof(Vertex3DTextureColor, texture_color), + .InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, + .InstanceDataStepRate = 0, + }; + } return; + case GPU_ATTRIBUTE_TYPE_VERTEX_3D_SAMPLER_TEXTURE_COLOR: { + attr[0] = { + .SemanticIndex = 0, + .Format = DXGI_FORMAT_R32G32B32_FLOAT, + .AlignedByteOffset = offsetof(Vertex3DSamplerTextureColor, position), + .InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, + .InstanceDataStepRate = 0, + }; + + attr[1] = { + .SemanticIndex = 1, + .Format = DXGI_FORMAT_R32_SINT, + .AlignedByteOffset = offsetof(Vertex3DSamplerTextureColor, sampler), + .InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, + .InstanceDataStepRate = 0, + }; + + attr[2] = { + .SemanticIndex = 2, + .Format = DXGI_FORMAT_R32G32_FLOAT, + .AlignedByteOffset = offsetof(Vertex3DSamplerTextureColor, texture_color), + .InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, + .InstanceDataStepRate = 0, + }; + } return; + case GPU_ATTRIBUTE_TYPE_VERTEX_2D_TEXTURE: { + attr[0] = { + .SemanticIndex = 0, + .Format = DXGI_FORMAT_R32G32_FLOAT, + .AlignedByteOffset = offsetof(Vertex2DTexture, position), + .InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, + .InstanceDataStepRate = 0, + }; + + attr[1] = { + .SemanticIndex = 1, + .Format = DXGI_FORMAT_R32G32_FLOAT, + .AlignedByteOffset = offsetof(Vertex2DTexture, tex_coord), + .InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, + .InstanceDataStepRate = 0, + }; + } return; + default: + UNREACHABLE(); + }; +} + +int32 directx_program_optimize(const char* input, char* output) +{ + const char* read_ptr = input; + char* write_ptr = output; + bool in_string = false; + + while (*read_ptr) { + str_skip_empty(&read_ptr); + + if (write_ptr != output + && *(write_ptr - 1) != '\n' && *(write_ptr - 1) != ';' && *(write_ptr - 1) != '{' + && *(write_ptr - 1) != '(' + && *(write_ptr - 1) != ',' + ) { + *write_ptr++ = '\n'; + } + + // Handle single-line comments (//) + if (*read_ptr == '/' && *(read_ptr + 1) == '/' && !in_string) { + str_move_to(&read_ptr, '\n'); + + continue; + } + + // Handle multi-line comments (/* */) + if (*read_ptr == '/' && *(read_ptr + 1) == '*' && !in_string) { + // Go to end of comment + while (*read_ptr && (*read_ptr != '*' || *(read_ptr + 1) != '/')) { + ++read_ptr; + } + + if (*read_ptr == '*' && *(read_ptr + 1) == '/') { + read_ptr += 2; + } + + continue; + } + + // Handle strings to avoid removing content within them + if (*read_ptr == '"') { + in_string = !in_string; + } + + // Copy valid characters to write_ptr + while (*read_ptr && !is_eol(read_ptr) && *read_ptr != '"' + && !(*read_ptr == '/' && (*(read_ptr + 1) == '/' || *(read_ptr + 1) == '*')) + ) { + if (!in_string + && (*read_ptr == '*' || *read_ptr == '/' || *read_ptr == '=' || *read_ptr == '+' || *read_ptr == '-' || *read_ptr == '%' + || *read_ptr == '(' || *read_ptr == ')' + || *read_ptr == '{' || *read_ptr == '}' + || *read_ptr == ',' || *read_ptr == '?' || *read_ptr == ':' || *read_ptr == ';' + || *read_ptr == '&' || *read_ptr == '|' + || *read_ptr == '>' || *read_ptr == '<' + ) + ) { + if (is_whitespace(*(write_ptr - 1)) || *(write_ptr - 1) == '\n') { + --write_ptr; + } + + *write_ptr++ = *read_ptr++; + + if (*read_ptr && is_whitespace(*read_ptr)) { + ++read_ptr; + } + } else { + *write_ptr++ = *read_ptr++; + } + } + } + + *write_ptr = '\0'; + + // -1 to remove \0 from length, same as strlen + return (int32) (write_ptr - output); +} + #endif \ No newline at end of file diff --git a/gpuapi/opengl/AppCmdBuffer.h b/gpuapi/opengl/AppCmdBuffer.h index 731995b..4f6da61 100644 --- a/gpuapi/opengl/AppCmdBuffer.h +++ b/gpuapi/opengl/AppCmdBuffer.h @@ -47,7 +47,7 @@ void* cmd_shader_load_sync(AppCmdBuffer* __restrict cb, Shader* __restrict shade } // Make sub shader - shader_assets[i] = shader_make( + shader_assets[i] = gpuapi_shader_make( shader_type_index((ShaderType) (i + 1)), (char *) shader_asset->self ); @@ -57,7 +57,7 @@ void* cmd_shader_load_sync(AppCmdBuffer* __restrict cb, Shader* __restrict shade } // Make shader/program - shader->id = pipeline_make( + shader->id = gpuapi_pipeline_make( shader_assets[0], shader_assets[1], shader_assets[2] ); diff --git a/gpuapi/opengl/OpenglUtils.h b/gpuapi/opengl/OpenglUtils.h index 1a6e076..34fb62c 100644 --- a/gpuapi/opengl/OpenglUtils.h +++ b/gpuapi/opengl/OpenglUtils.h @@ -33,7 +33,7 @@ { GLenum err; while ((err = glGetError()) != GL_NO_ERROR) { - LOG_FORMAT_1("Opengl error: %d", {{LOG_DATA_INT32, (int32 *) &err}}); + LOG_1("Opengl error: %d", {{LOG_DATA_INT32, (int32 *) &err}}); ASSERT_SIMPLE(err == GL_NO_ERROR); } } @@ -108,6 +108,7 @@ void opengl_info(OpenglInfo* info) } } +// @todo rename to gpuapi_* inline uint32 get_texture_data_type(uint32 texture_data_type) { @@ -145,6 +146,7 @@ uint32 get_texture_data_type(uint32 texture_data_type) // 4. load_texture_to_gpu // 5. texture_use +// @todo this should have a gpuapi_ name inline void prepare_texture(Texture* texture) { @@ -155,9 +157,11 @@ void prepare_texture(Texture* texture) glBindTexture(texture_data_type, (GLuint) texture->id); } +// @todo this should have a gpuapi_ name inline void load_texture_to_gpu(const Texture* texture, int32 mipmap_level = 0) { + // @todo also handle different texture formats (R, RG, RGB, 1 byte vs 4 byte per pixel) uint32 texture_data_type = get_texture_data_type(texture->texture_data_type); glTexImage2D( texture_data_type, mipmap_level, GL_RGBA, @@ -173,6 +177,7 @@ void load_texture_to_gpu(const Texture* texture, int32 mipmap_level = 0) LOG_INCREMENT_BY(DEBUG_COUNTER_GPU_VERTEX_UPLOAD, texture->image.pixel_count * image_pixel_size_from_type(texture->image.image_settings)); } +// @todo this should have a gpuapi_ name inline void texture_use(const Texture* texture) { @@ -182,6 +187,7 @@ void texture_use(const Texture* texture) glBindTexture(texture_data_type, (GLuint) texture->id); } +// @todo this should have a gpuapi_ name inline void texture_delete(Texture* texture) { glDeleteTextures(1, &texture->id); @@ -392,14 +398,23 @@ void gpuapi_buffer_update_dynamic(uint32 vbo, int32 size, const void* data) LOG_INCREMENT_BY(DEBUG_COUNTER_GPU_VERTEX_UPLOAD, size); } +// @todo change name. vulkan and directx have different functions for vertex buffer updates inline -void gpuapi_buffer_update_sub(uint32 vbo, int32 offset, int32 size, const void* data) +void gpuapi_vertex_buffer_update( + uint32 vbo, + const void* data, int32 vertex_size, int32 vertex_count, int32 offset = 0 +) { glBindBuffer(GL_ARRAY_BUFFER, vbo); - glBufferSubData(GL_ARRAY_BUFFER, offset, size, data); + // @performance Does this if even make sense or is glBufferSubData always the better choice? + if (offset) { + glBufferSubData(GL_ARRAY_BUFFER, offset, vertex_size * vertex_count - offset, ((byte *) data) + offset); + } else { + glBufferData(GL_ARRAY_BUFFER, vertex_size * vertex_count, data, GL_DYNAMIC_DRAW); + } ASSERT_GPU_API(); - LOG_INCREMENT_BY(DEBUG_COUNTER_GPU_VERTEX_UPLOAD, size); + LOG_INCREMENT_BY(DEBUG_COUNTER_GPU_VERTEX_UPLOAD, vertex_size * vertex_count - offset); } inline diff --git a/gpuapi/opengl/ShaderUtils.h b/gpuapi/opengl/ShaderUtils.h index 1d5cc53..4e1be6a 100644 --- a/gpuapi/opengl/ShaderUtils.h +++ b/gpuapi/opengl/ShaderUtils.h @@ -15,6 +15,7 @@ #include "../../log/Stats.h" #include "../../log/PerformanceProfiler.h" #include "../../object/Vertex.h" +#include "../../utils/StringUtils.h" #include "Shader.h" #include "Opengl.h" #include "../ShaderType.h" @@ -44,79 +45,79 @@ int32 shader_type_index(ShaderType type) // @todo change naming to gpuapi_uniform_buffer_update (same as vulkan) // @todo change from upload to uniform upload since it is a special form of upload FORCE_INLINE -void shader_set_value(uint32 location, bool value) +void gpuapi_uniform_buffer_update_value(uint32 location, bool value) { glUniform1i(location, (int32) value); LOG_INCREMENT_BY(DEBUG_COUNTER_GPU_UNIFORM_UPLOAD, sizeof(value)); } FORCE_INLINE -void shader_set_value(uint32 location, int32 value) +void gpuapi_uniform_buffer_update_value(uint32 location, int32 value) { glUniform1i(location, value); LOG_INCREMENT_BY(DEBUG_COUNTER_GPU_UNIFORM_UPLOAD, sizeof(value)); } FORCE_INLINE -void shader_set_value(uint32 location, f32 value) +void gpuapi_uniform_buffer_update_value(uint32 location, f32 value) { glUniform1f(location, value); LOG_INCREMENT_BY(DEBUG_COUNTER_GPU_UNIFORM_UPLOAD, sizeof(value)); } FORCE_INLINE -void shader_set_v2(uint32 location, const f32* value) +void gpuapi_uniform_buffer_update_v2(uint32 location, const f32* value) { glUniform2fv(location, 1, value); LOG_INCREMENT_BY(DEBUG_COUNTER_GPU_UNIFORM_UPLOAD, sizeof(*value) * 2); } FORCE_INLINE -void shader_set_v3(uint32 location, const f32* value) +void gpuapi_uniform_buffer_update_v3(uint32 location, const f32* value) { glUniform3fv(location, 1, value); LOG_INCREMENT_BY(DEBUG_COUNTER_GPU_UNIFORM_UPLOAD, sizeof(*value) * 3); } FORCE_INLINE -void shader_set_v4(uint32 location, const f32* value) +void gpuapi_uniform_buffer_update_v4(uint32 location, const f32* value) { glUniform4fv(location, 1, value); LOG_INCREMENT_BY(DEBUG_COUNTER_GPU_UNIFORM_UPLOAD, sizeof(*value) * 4); } FORCE_INLINE -void shader_set_m2(uint32 location, const f32* value) +void gpuapi_uniform_buffer_update_m2(uint32 location, const f32* value) { glUniformMatrix2fv(location, 1, GL_FALSE, value); LOG_INCREMENT_BY(DEBUG_COUNTER_GPU_UNIFORM_UPLOAD, sizeof(*value) * 4); } FORCE_INLINE -void shader_set_m3(uint32 location, const f32* value) +void gpuapi_uniform_buffer_update_m3(uint32 location, const f32* value) { glUniformMatrix3fv(location, 1, GL_FALSE, value); LOG_INCREMENT_BY(DEBUG_COUNTER_GPU_UNIFORM_UPLOAD, sizeof(*value) * 9); } FORCE_INLINE -void shader_set_m4(uint32 location, const f32* value) +void gpuapi_uniform_buffer_update_m4(uint32 location, const f32* value) { glUniformMatrix4fv(location, 1, GL_FALSE, value); LOG_INCREMENT_BY(DEBUG_COUNTER_GPU_UNIFORM_UPLOAD, sizeof(*value) * 16); } FORCE_INLINE -uint32 shader_get_attrib_location(uint32 id, const char* name) +uint32 opengl_get_attrib_location(uint32 id, const char* name) { - // By using this you can retreive the shader variable name at a point where and when you know it + // By using this you can retrieve the shader variable name at a point where and when you know it // BUT set values later on in generalized functions without knowing the shader variable name // Basically like pointers return glGetAttribLocation(id, name); } inline -void shader_check_link_errors(uint32 id, char* log) +void opengl_check_link_errors(uint32 id, char* log) { GLint success; glGetProgramiv(id, GL_LINK_STATUS, &success); @@ -126,7 +127,7 @@ void shader_check_link_errors(uint32 id, char* log) } inline -void shader_check_compile_errors(uint32 id, char* log) +void opengl_check_compile_errors(uint32 id, char* log) { GLint success; glGetShaderiv(id, GL_COMPILE_STATUS, &success); @@ -135,17 +136,14 @@ void shader_check_compile_errors(uint32 id, char* log) } } -int32 shader_program_optimize(const char* input, char* output) +int32 opengl_program_optimize(const char* __restrict input, char* __restrict output) { const char* read_ptr = input; char* write_ptr = output; bool in_string = false; while (*read_ptr) { - // Remove leading whitespace - while (*read_ptr == ' ' || *read_ptr == '\t' || is_eol(read_ptr)) { - ++read_ptr; - } + str_skip_empty(&read_ptr); if (write_ptr != output && *(write_ptr - 1) != '\n' && *(write_ptr - 1) != ';' && *(write_ptr - 1) != '{' @@ -157,10 +155,7 @@ int32 shader_program_optimize(const char* input, char* output) // Handle single-line comments (//) if (*read_ptr == '/' && *(read_ptr + 1) == '/' && !in_string) { - // Go to end of line - while (*read_ptr && *read_ptr != '\n') { - ++read_ptr; - } + str_move_to(&read_ptr, '\n'); continue; } @@ -218,7 +213,7 @@ int32 shader_program_optimize(const char* input, char* output) return (int32) (write_ptr - output); } -GLuint shader_make(GLenum type, const char* source) +GLuint gpuapi_shader_make(GLenum type, const char* source) { LOG_1("Create shader"); GLuint shader = glCreateShader(type); @@ -249,7 +244,7 @@ GLuint shader_make(GLenum type, const char* source) } inline -int32 program_get_size(uint32 program) +int32 opengl_program_get_size(uint32 program) { int32 size; glGetProgramiv(program, GL_PROGRAM_BINARY_LENGTH, &size); @@ -259,7 +254,7 @@ int32 program_get_size(uint32 program) // @todo Instead of passing the shaders one by one, pass one array called ShaderStage* shader_stages // This way we can handle this more dynamic -GLuint pipeline_make( +GLuint gpuapi_pipeline_make( GLuint vertex_shader, GLuint fragment_shader, GLint geometry_shader @@ -316,9 +311,8 @@ GLuint pipeline_make( return program; } -// @question Depending on how the different gpu apis work we may want to pass Shader* to have a uniform structure FORCE_INLINE -void pipeline_use(uint32 id) +void gpuapi_pipeline_use(uint32 id) { glUseProgram(id); } @@ -347,7 +341,7 @@ void gpuapi_attribute_info_create(GpuAttributeType type, OpenglVertexInputAttrib .count = 3, .format = GL_FLOAT, .stride = sizeof(Vertex3D), - .offset = (void *) offsetof(Vertex3DTextureColor, position) + .offset = (void *) offsetof(Vertex3D, position) }; attr[1] = { @@ -402,7 +396,7 @@ void gpuapi_attribute_info_create(GpuAttributeType type, OpenglVertexInputAttrib attr[1] = { .location = 1, - .count = 2, + .count = 4, .format = GL_FLOAT, .stride = sizeof(Vertex3DColor), .offset = (void *) offsetof(Vertex3DColor, color) @@ -472,7 +466,7 @@ void gpuapi_attribute_info_create(GpuAttributeType type, OpenglVertexInputAttrib }; } -void gpuapi_descriptor_set_layout_create(Shader* shader, const OpenglDescriptorSetLayoutBinding* bindings, int32 binding_length) { +void gpuapi_descriptor_set_layout_create(Shader* __restrict shader, const OpenglDescriptorSetLayoutBinding* __restrict bindings, int32 binding_length) { for (int32 i = 0; i < binding_length; ++i) { shader->descriptor_set_layout[i].binding = glGetUniformLocation(shader->id, bindings[i].name); shader->descriptor_set_layout[i].name = bindings[i].name; diff --git a/gpuapi/vulkan/AppCmdBuffer.h b/gpuapi/vulkan/AppCmdBuffer.h index 6bdba3c..f01dc78 100644 --- a/gpuapi/vulkan/AppCmdBuffer.h +++ b/gpuapi/vulkan/AppCmdBuffer.h @@ -50,7 +50,7 @@ void* cmd_shader_load_sync( } // Make sub shader - shader_assets[i] = shader_make( + shader_assets[i] = gpuapi_shader_make( device, (char *) shader_asset->self, shader_asset->ram_size @@ -61,7 +61,7 @@ void* cmd_shader_load_sync( } // Make shader/program - shader->id = pipeline_make( + shader->id = gpuapi_pipeline_make( device, render_pass, pipeline_layout, pipeline, descriptor_set_layouts, shader_assets[0], shader_assets[1], shader_assets[2] diff --git a/gpuapi/vulkan/ShaderUtils.h b/gpuapi/vulkan/ShaderUtils.h index 8abfcad..9f12a3d 100644 --- a/gpuapi/vulkan/ShaderUtils.h +++ b/gpuapi/vulkan/ShaderUtils.h @@ -34,7 +34,7 @@ uint32_t shader_get_uniform_location( } inline -void shader_set_value(VkDevice device, VkDescriptorSet descriptorSet, uint32_t binding, VkDescriptorType descriptorType, int32_t value) +void gpuapi_uniform_buffer_update_value(VkDevice device, VkDescriptorSet descriptorSet, uint32_t binding, VkDescriptorType descriptorType, int32_t value) { VkDescriptorBufferInfo bufferInfo = {}; bufferInfo.buffer = {}; // You should have a buffer holding the value @@ -54,7 +54,7 @@ void shader_set_value(VkDevice device, VkDescriptorSet descriptorSet, uint32_t b } inline -VkShaderModule shader_make(VkDevice device, const char* source, int32 source_size) +VkShaderModule gpuapi_shader_make(VkDevice device, const char* source, int32 source_size) { LOG_1("Create shader"); // Create shader module create info @@ -68,7 +68,7 @@ VkShaderModule shader_make(VkDevice device, const char* source, int32 source_siz VkResult result = vkCreateShaderModule(device, &create_info, NULL, &shader_module); if (result != VK_SUCCESS) { - LOG_FORMAT_1("Vulkan vkCreateShaderModule: %d", {{LOG_DATA_INT32, (int32 *) &result}}); + LOG_1("Vulkan vkCreateShaderModule: %d", {{LOG_DATA_INT32, (int32 *) &result}}); ASSERT_SIMPLE(false); return VK_NULL_HANDLE; @@ -144,7 +144,7 @@ void gpuapi_attribute_info_create(GpuAttributeType type, VkVertexInputAttributeD attr[1] = { .location = 1, .binding = 0, - .format = VK_FORMAT_R32_UINT, + .format = VK_FORMAT_R32G32B32A32_SFLOAT, .offset = offsetof(Vertex3DColor, color) }; } return; @@ -190,15 +190,15 @@ void gpuapi_attribute_info_create(GpuAttributeType type, VkVertexInputAttributeD }; } -inline -void pipeline_use(VkCommandBuffer command_buffer, VkPipeline pipeline) +FORCE_INLINE +void gpuapi_pipeline_use(VkCommandBuffer command_buffer, VkPipeline pipeline) { vkCmdBindPipeline(command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); } // @todo Instead of passing the shaders one by one, pass one array called ShaderStage* shader_stages // This way we can handle this more dynamic -VkPipeline pipeline_make( +VkPipeline gpuapi_pipeline_make( VkDevice device, VkRenderPass render_pass, VkPipelineLayout* __restrict pipeline_layout, VkPipeline* __restrict pipeline, VkDescriptorSetLayout* descriptor_set_layouts, VkShaderModule vertex_shader, VkShaderModule fragment_shader, @@ -301,7 +301,7 @@ VkPipeline pipeline_make( VkResult result; if ((result = vkCreatePipelineLayout(device, &pipeline_info_layout, NULL, pipeline_layout)) != VK_SUCCESS) { - LOG_FORMAT_1("Vulkan vkCreatePipelineLayout: %d", {{LOG_DATA_INT32, (int32 *) &result}}); + LOG_1("Vulkan vkCreatePipelineLayout: %d", {{LOG_DATA_INT32, (int32 *) &result}}); ASSERT_SIMPLE(false); return NULL; @@ -324,7 +324,7 @@ VkPipeline pipeline_make( pipeline_info.basePipelineHandle = VK_NULL_HANDLE; if ((result = vkCreateGraphicsPipelines(device, VK_NULL_HANDLE, 1, &pipeline_info, NULL, pipeline)) != VK_SUCCESS) { - LOG_FORMAT_1("Vulkan vkCreateGraphicsPipelines: %d", {{LOG_DATA_INT32, (int32 *) &result}}); + LOG_1("Vulkan vkCreateGraphicsPipelines: %d", {{LOG_DATA_INT32, (int32 *) &result}}); ASSERT_SIMPLE(false); return NULL; @@ -358,7 +358,7 @@ void gpuapi_descriptor_set_layout_create( VkResult result; if ((result = vkCreateDescriptorSetLayout(device, &layout_info, NULL, descriptor_set_layout)) != VK_SUCCESS) { - LOG_FORMAT_1("Vulkan vkCreateDescriptorSetLayout: %d", {{LOG_DATA_INT32, (int32 *) &result}}); + LOG_1("Vulkan vkCreateDescriptorSetLayout: %d", {{LOG_DATA_INT32, (int32 *) &result}}); ASSERT_SIMPLE(false); } } @@ -390,7 +390,7 @@ void vulkan_descriptor_pool_create( VkResult result; if ((result = vkCreateDescriptorPool(device, &poolInfo, NULL, descriptor_pool)) != VK_SUCCESS) { - LOG_FORMAT_1("Vulkan vkCreateDescriptorPool: %d", {{LOG_DATA_INT32, (int32 *) &result}}); + LOG_1("Vulkan vkCreateDescriptorPool: %d", {{LOG_DATA_INT32, (int32 *) &result}}); ASSERT_SIMPLE(false); } } @@ -417,7 +417,7 @@ void vulkan_descriptor_sets_create( VkResult result; if ((result = vkAllocateDescriptorSets(device, &alloc_info, descriptor_sets)) != VK_SUCCESS) { - LOG_FORMAT_1("Vulkan vkAllocateDescriptorSets: %d", {{LOG_DATA_INT32, (int32 *) &result}}); + LOG_1("Vulkan vkAllocateDescriptorSets: %d", {{LOG_DATA_INT32, (int32 *) &result}}); ASSERT_SIMPLE(false); return; diff --git a/gpuapi/vulkan/VulkanUtils.h b/gpuapi/vulkan/VulkanUtils.h index ebd034a..2a3bba1 100644 --- a/gpuapi/vulkan/VulkanUtils.h +++ b/gpuapi/vulkan/VulkanUtils.h @@ -31,17 +31,18 @@ #include "../../log/Stats.h" #include "../../log/PerformanceProfiler.h" #include "../../memory/RingMemory.h" +#include "../../compiler/CompilerUtils.h" #include "ShaderUtils.h" #include "FramesInFlightContainer.h" #if DEBUG - #define ASSERT_GPU_API(x) \ - do { \ - VkResult err = (x); \ - if (err) { \ - LOG_FORMAT_1("Vulkan error: %d", {{LOG_DATA_INT32, (int32 *) &err}}); \ - ASSERT_SIMPLE(false); \ - } \ + #define ASSERT_GPU_API(x) \ + do { \ + VkResult err = (x); \ + if (err) { \ + LOG_1("Vulkan error: %d", {{LOG_DATA_INT32, (int32 *) &err}}); \ + ASSERT_SIMPLE(false); \ + } \ } while (0) #else #define ASSERT_GPU_API(x) (x) @@ -68,7 +69,11 @@ struct VulkanSwapChainSupportDetails { }; inline -void change_viewport(int32 width, int32 height, VkCommandBuffer command_buffer, VkExtent2D swapchain_extent, int32 offset_x = 0, int32 offset_y = 0) +void change_viewport( + int32 width, int32 height, + VkCommandBuffer command_buffer, VkExtent2D swapchain_extent, + int32 offset_x = 0, int32 offset_y = 0 +) { VkViewport viewport = {}; viewport.x = (f32) offset_x; @@ -178,7 +183,7 @@ void vulkan_instance_create( if (validation_layer_count && (err = vulkan_check_validation_layer_support(validation_layers, validation_layer_count, ring)) ) { - LOG_FORMAT_1("Vulkan validation_layer missing: %d", {{LOG_DATA_CHAR_STR, (void *) validation_layers[-err - 1]}}); + LOG_1("Vulkan validation_layer missing: %d", {{LOG_DATA_CHAR_STR, (void *) validation_layers[-err - 1]}}); ASSERT_SIMPLE(false); return; @@ -187,7 +192,7 @@ void vulkan_instance_create( if (extension_count && (err = vulkan_check_extension_support(extensions, extension_count, ring)) ) { - LOG_FORMAT_1("Vulkan extension missing: %d", {{LOG_DATA_CHAR_STR, (void *) extensions[-err - 1]}}); + LOG_1("Vulkan extension missing: %d", {{LOG_DATA_CHAR_STR, (void *) extensions[-err - 1]}}); ASSERT_SIMPLE(false); return; @@ -224,7 +229,7 @@ void vulkan_instance_create( VkResult result; if ((result = vkCreateInstance(&create_info, NULL, instance)) != VK_SUCCESS) { - LOG_FORMAT_1("Vulkan vkCreateInstance: %d", {{LOG_DATA_INT32, (int32 *) &result}}); + LOG_1("Vulkan vkCreateInstance: %d", {{LOG_DATA_INT32, (int32 *) &result}}); ASSERT_SIMPLE(false); } } @@ -240,7 +245,7 @@ void vulkan_surface_create(VkInstance instance, VkSurfaceKHR* surface, Window* w VkResult result; if ((result = vkCreateWin32SurfaceKHR(instance, &surface_create_info, NULL, surface)) != VK_SUCCESS) { - LOG_FORMAT_1("Vulkan vkCreateWin32SurfaceKHR: %d", {{LOG_DATA_INT32, (int32 *) &result}}); + LOG_1("Vulkan vkCreateWin32SurfaceKHR: %d", {{LOG_DATA_INT32, (int32 *) &result}}); return; } #elif __linux__ @@ -310,7 +315,7 @@ VulkanQueueFamilyIndices vulkan_find_queue_families(VkPhysicalDevice physical_de VkResult result; if ((result = vkGetPhysicalDeviceSurfaceSupportKHR(physical_device, i, surface, &present_support)) != VK_SUCCESS) { - LOG_FORMAT_1("Vulkan vkGetPhysicalDeviceSurfaceSupportKHR: %d", {{LOG_DATA_INT32, (int32 *) &result}}); + LOG_1("Vulkan vkGetPhysicalDeviceSurfaceSupportKHR: %d", {{LOG_DATA_INT32, (int32 *) &result}}); ASSERT_SIMPLE(false); return indices; @@ -437,7 +442,7 @@ void gpuapi_create_logical_device( VkResult result; if ((result = vkCreateDevice(physical_device, &create_info, NULL, device)) != VK_SUCCESS) { - LOG_FORMAT_1("Vulkan vkCreateDevice: %d", {{LOG_DATA_INT32, (int32 *) &result}}); + LOG_1("Vulkan vkCreateDevice: %d", {{LOG_DATA_INT32, (int32 *) &result}}); ASSERT_SIMPLE(false); } @@ -523,7 +528,7 @@ void gpuapi_swapchain_create( VkResult result; if ((result = vkCreateSwapchainKHR(device, &create_info, NULL, swapchain)) != VK_SUCCESS) { - LOG_FORMAT_1("Vulkan vkCreateSwapchainKHR: %d", {{LOG_DATA_INT32, (int32 *) &result}}); + LOG_1("Vulkan vkCreateSwapchainKHR: %d", {{LOG_DATA_INT32, (int32 *) &result}}); ASSERT_SIMPLE(false); return; @@ -585,7 +590,7 @@ void vulkan_image_views_create( create_info.subresourceRange.layerCount = 1; if ((result = vkCreateImageView(device, &create_info, NULL, &swapchain_image_views[i])) != VK_SUCCESS) { - LOG_FORMAT_1("Vulkan vkCreateImageView: %d", {{LOG_DATA_INT32, (int32 *) &result}}); + LOG_1("Vulkan vkCreateImageView: %d", {{LOG_DATA_INT32, (int32 *) &result}}); ASSERT_SIMPLE(false); } } @@ -632,7 +637,7 @@ void vulkan_render_pass_create( VkResult result; if ((result = vkCreateRenderPass(device, &render_pass_info, NULL, render_pass)) != VK_SUCCESS) { - LOG_FORMAT_1("Vulkan vkCreateRenderPass: %d", {{LOG_DATA_INT32, (int32 *) &result}}); + LOG_1("Vulkan vkCreateRenderPass: %d", {{LOG_DATA_INT32, (int32 *) &result}}); ASSERT_SIMPLE(false); } } @@ -660,7 +665,7 @@ void vulkan_framebuffer_create( framebufferInfo.layers = 1; if ((result = vkCreateFramebuffer(device, &framebufferInfo, NULL, &framebuffers[i])) != VK_SUCCESS) { - LOG_FORMAT_1("Vulkan vkCreateFramebuffer: %d", {{LOG_DATA_INT32, (int32 *) &result}}); + LOG_1("Vulkan vkCreateFramebuffer: %d", {{LOG_DATA_INT32, (int32 *) &result}}); ASSERT_SIMPLE(false); } } @@ -679,12 +684,12 @@ void vulkan_command_pool_create( VkResult result; if ((result = vkCreateCommandPool(device, &pool_info, NULL, command_pool)) != VK_SUCCESS) { - LOG_FORMAT_1("Vulkan vkCreateCommandPool: %d", {{LOG_DATA_INT32, (int32 *) &result}}); + LOG_1("Vulkan vkCreateCommandPool: %d", {{LOG_DATA_INT32, (int32 *) &result}}); ASSERT_SIMPLE(false); } } -void vulkan_command_buffers_create(VkDevice device, VkCommandPool command_pool, VkCommandBuffer* command_buffers, uint32 command_buffer_count) +void gpuapi_command_buffer_create(VkDevice device, VkCommandPool command_pool, VkCommandBuffer* command_buffers, uint32 command_buffer_count) { VkCommandBufferAllocateInfo alloc_info = {}; alloc_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; @@ -694,7 +699,7 @@ void vulkan_command_buffers_create(VkDevice device, VkCommandPool command_pool, VkResult result; if ((result = vkAllocateCommandBuffers(device, &alloc_info, command_buffers)) != VK_SUCCESS) { - LOG_FORMAT_1("Vulkan vkAllocateCommandBuffers: %d", {{LOG_DATA_INT32, (int32 *) &result}}); + LOG_1("Vulkan vkAllocateCommandBuffers: %d", {{LOG_DATA_INT32, (int32 *) &result}}); ASSERT_SIMPLE(false); } } @@ -716,7 +721,7 @@ void vulkan_sync_objects_create( || (result = vkCreateSemaphore(device, &semaphore_info, NULL, &frames_in_flight->render_finished_semaphores[i])) != VK_SUCCESS || (result = vkCreateFence(device, &fence_info, NULL, &frames_in_flight->fences[i])) != VK_SUCCESS ) { - LOG_FORMAT_1("Vulkan vulkan_sync_objects_create: %d", {{LOG_DATA_INT32, (int32 *) &result}}); + LOG_1("Vulkan vulkan_sync_objects_create: %d", {{LOG_DATA_INT32, (int32 *) &result}}); ASSERT_SIMPLE(false); } } @@ -842,18 +847,52 @@ void vulkan_transition_image_layout(VkCommandBuffer command_buffer, VkImage imag ); } -// @todo replace references with pointers +static +VkFormat gpuapi_texture_format(byte settings) +{ + if ((settings & IMAGE_SETTING_CHANNEL_4_SIZE)) { + switch (settings & IMAGE_SETTING_CHANNEL_COUNT) { + case 1: + return VK_FORMAT_R32_SFLOAT; + case 2: + return VK_FORMAT_R32G32_SFLOAT; + case 3: + return VK_FORMAT_R32G32B32_SFLOAT; + case 4: + return VK_FORMAT_R32G32B32A32_SFLOAT; + default: + UNREACHABLE(); + } + } else { + switch (settings & IMAGE_SETTING_CHANNEL_COUNT) { + case 1: + return VK_FORMAT_R8_SRGB; + case 2: + return VK_FORMAT_R8G8_SRGB; + case 3: + return VK_FORMAT_R8G8B8_SRGB; + case 4: + return VK_FORMAT_R8G8B8A8_SRGB; + default: + UNREACHABLE(); + } + } +} + +// @performance Sometimes we want to upload multiple textures in one go (more performant). Allow that or don't use this function in that case. void load_texture_to_gpu( VkDevice device, VkPhysicalDevice physical_device, VkCommandPool command_pool, VkQueue queue, - VkImage& texture_image, VkDeviceMemory& texture_image_memory, VkImageView& texture_image_view, VkSampler& texture_sampler, + VkImage* texture_image, VkDeviceMemory* texture_image_memory, VkImageView* texture_image_view, VkSampler* texture_sampler, const Texture* texture) { + VkFormat textureFormat = gpuapi_texture_format(texture->image.image_settings); + // Create the Vulkan image VkImageCreateInfo image_info = {}; image_info.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO; image_info.imageType = VK_IMAGE_TYPE_2D; - image_info.format = VK_FORMAT_R8G8B8A8_SRGB; + image_info.format = textureFormat; image_info.extent.width = texture->image.width; image_info.extent.height = texture->image.height; image_info.extent.depth = 1; @@ -865,19 +904,19 @@ void load_texture_to_gpu( image_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; image_info.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; - ASSERT_GPU_API(vkCreateImage(device, &image_info, NULL, &texture_image)); + ASSERT_GPU_API(vkCreateImage(device, &image_info, NULL, texture_image)); // Allocate memory for the image VkMemoryRequirements memRequirements; - vkGetImageMemoryRequirements(device, texture_image, &memRequirements); + vkGetImageMemoryRequirements(device, *texture_image, &memRequirements); VkMemoryAllocateInfo allocInfo = {}; allocInfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; allocInfo.allocationSize = memRequirements.size; allocInfo.memoryTypeIndex = vulkan_find_memory_type(physical_device, memRequirements.memoryTypeBits, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); - ASSERT_GPU_API(vkAllocateMemory(device, &allocInfo, NULL, &texture_image_memory)); - ASSERT_GPU_API(vkBindImageMemory(device, texture_image, texture_image_memory, 0)); + ASSERT_GPU_API(vkAllocateMemory(device, &allocInfo, NULL, texture_image_memory)); + ASSERT_GPU_API(vkBindImageMemory(device, *texture_image, *texture_image_memory, 0)); int32 image_size = image_pixel_size_from_type(texture->image.image_settings) * texture->image.width * texture->image.height; @@ -894,10 +933,10 @@ void load_texture_to_gpu( // Transition the image layout VkCommandBuffer command_buffer; - vulkan_command_buffers_create(device, command_pool, &command_buffer, 1); + gpuapi_command_buffer_create(device, command_pool, &command_buffer, 1); vulkan_single_commands_begin(command_buffer); - vulkan_transition_image_layout(command_buffer, texture_image, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); + vulkan_transition_image_layout(command_buffer, *texture_image, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); vulkan_single_commands_end(queue, command_buffer); // Copy data from the staging buffer to the image @@ -910,13 +949,13 @@ void load_texture_to_gpu( region.imageSubresource.layerCount = 1; region.imageExtent = {texture->image.width, texture->image.height, 1}; - vkCmdCopyBufferToImage(command_buffer, staging_buffer, texture_image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, ®ion); + vkCmdCopyBufferToImage(command_buffer, staging_buffer, *texture_image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, ®ion); vulkan_single_commands_end(queue, command_buffer); // Transition the image layout for shader access vulkan_command_buffer_reset(command_buffer); vulkan_single_commands_begin(command_buffer); - vulkan_transition_image_layout(command_buffer, texture_image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + vulkan_transition_image_layout(command_buffer, *texture_image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); vulkan_single_commands_end(queue, command_buffer); vulkan_single_commands_free(device, command_pool, command_buffer); @@ -928,16 +967,16 @@ void load_texture_to_gpu( // Create an image view VkImageViewCreateInfo view_info = {}; view_info.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; - view_info.image = texture_image; + view_info.image = *texture_image; view_info.viewType = VK_IMAGE_VIEW_TYPE_2D; - view_info.format = VK_FORMAT_R8G8B8A8_SRGB; + view_info.format = textureFormat; view_info.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; view_info.subresourceRange.baseMipLevel = 0; view_info.subresourceRange.levelCount = 1; view_info.subresourceRange.baseArrayLayer = 0; view_info.subresourceRange.layerCount = 1; - ASSERT_GPU_API(vkCreateImageView(device, &view_info, NULL, &texture_image_view)); + ASSERT_GPU_API(vkCreateImageView(device, &view_info, NULL, texture_image_view)); // Create a sampler VkPhysicalDeviceProperties properties = {}; @@ -958,14 +997,14 @@ void load_texture_to_gpu( sampler_info.compareOp = VK_COMPARE_OP_ALWAYS; sampler_info.mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR; - ASSERT_GPU_API(vkCreateSampler(device, &sampler_info, NULL, &texture_sampler)); + ASSERT_GPU_API(vkCreateSampler(device, &sampler_info, NULL, texture_sampler)); } // @todo Rename to same name as opengl (or rename opengl obviously) -void vulkan_vertex_buffer_update( +void gpuapi_vertex_buffer_update( VkDevice device, VkPhysicalDevice physical_device, VkCommandPool command_pool, VkQueue queue, - VkBuffer* vertexBuffer, - const void* __restrict vertices, int32 vertex_size, int32 vertex_count + VkBuffer* vertex_buffer, + const void* __restrict vertices, int32 vertex_size, int32 vertex_count, int32 offset = 0 ) { VkDeviceSize bufferSize = vertex_size * vertex_count; @@ -986,12 +1025,14 @@ void vulkan_vertex_buffer_update( vkUnmapMemory(device, stagingBufferMemory); VkCommandBuffer commandBuffer; - vulkan_command_buffers_create(device, command_pool, &commandBuffer, 1); + gpuapi_command_buffer_create(device, command_pool, &commandBuffer, 1); vulkan_single_commands_begin(commandBuffer); VkBufferCopy copyRegion = {}; + copyRegion.srcOffset = offset; + copyRegion.dstOffset = offset; copyRegion.size = bufferSize; - vkCmdCopyBuffer(commandBuffer, stagingBuffer, *vertexBuffer, 1, ©Region); + vkCmdCopyBuffer(commandBuffer, stagingBuffer, *vertex_buffer, 1, ©Region); vulkan_single_commands_end(queue, commandBuffer); vulkan_single_commands_free(device, command_pool, commandBuffer); @@ -999,12 +1040,12 @@ void vulkan_vertex_buffer_update( vkDestroyBuffer(device, stagingBuffer, NULL); vkFreeMemory(device, stagingBufferMemory, NULL); - LOG_INCREMENT_BY(DEBUG_COUNTER_GPU_VERTEX_UPLOAD, vertex_size * vertex_count); + LOG_INCREMENT_BY(DEBUG_COUNTER_GPU_VERTEX_UPLOAD, bufferSize - offset); } -void vulkan_vertex_buffer_create( +void gpuapi_vertex_buffer_create( VkDevice device, VkPhysicalDevice physical_device, VkCommandPool command_pool, VkQueue queue, - VkBuffer* vertexBuffer, VkDeviceMemory vertexBufferMemory, + VkBuffer* vertex_buffer, VkDeviceMemory vertex_bufferMemory, const void* __restrict vertices, int32 vertex_size, int32 vertex_count ) { @@ -1031,18 +1072,18 @@ void vulkan_vertex_buffer_create( bufferSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, - *vertexBuffer, vertexBufferMemory + *vertex_buffer, vertex_bufferMemory ); // Copy buffer // @performance Would it make sense to use a "global" temp buffer for that? If yes, we only need to reset VkCommandBuffer commandBuffer; - vulkan_command_buffers_create(device, command_pool, &commandBuffer, 1); + gpuapi_command_buffer_create(device, command_pool, &commandBuffer, 1); vulkan_single_commands_begin(commandBuffer); VkBufferCopy copyRegion = {}; copyRegion.size = bufferSize; - vkCmdCopyBuffer(commandBuffer, stagingBuffer, *vertexBuffer, 1, ©Region); + vkCmdCopyBuffer(commandBuffer, stagingBuffer, *vertex_buffer, 1, ©Region); vulkan_single_commands_end(queue, commandBuffer); // @todo if we change behaviour according to the comment above we don't need this @@ -1084,7 +1125,7 @@ void vulkan_index_buffer_create( // Copy buffer VkCommandBuffer commandBuffer; - vulkan_command_buffers_create(device, command_pool, &commandBuffer, 1); + gpuapi_command_buffer_create(device, command_pool, &commandBuffer, 1); vulkan_single_commands_begin(commandBuffer); VkBufferCopy copyRegion = {}; @@ -1101,7 +1142,7 @@ void vulkan_index_buffer_create( // @todo We also need a free function (unmap buffer) -void vulkan_uniform_buffers_create( +void gpuapi_uniform_buffers_create( VkDevice device, VkPhysicalDevice physical_device, VkBuffer* __restrict uniform_buffers, VkDeviceMemory* __restrict uniform_buffers_memory, void** __restrict uniform_buffers_mapped, size_t uniform_buffer_object_size, @@ -1109,6 +1150,7 @@ void vulkan_uniform_buffers_create( ) { // e.g. uniform_buffer_object_size = sizeof(struct {model; view; proj};) + // @question Do I really need one uniform_buffer per frames_in_flight? This seems VERY inefficient VkDeviceSize bufferSize = uniform_buffer_object_size; for (uint32 i = 0; i < frames_in_flight; ++i) { vulkan_buffer_create( diff --git a/log/Log.h b/log/Log.h index 3ece18d..a961c4e 100644 --- a/log/Log.h +++ b/log/Log.h @@ -182,17 +182,17 @@ void log(const char* str, const char* file, const char* function, int32 line) void log(const char* format, LogDataArray data, const char* file, const char* function, int32 line) { - ASSERT_SIMPLE(str_length(format) + str_length(file) + str_length(function) + 50 < MAX_LOG_LENGTH); - if (!_log_memory) { return; } - if (data.data[0].type == LOG_DATA_VOID) { + if (data.data[0].type == LOG_DATA_VOID || data.data[0].type == LOG_DATA_NONE) { log(format, file, function, line); return; } + ASSERT_SIMPLE(str_length(format) + str_length(file) + str_length(function) + 50 < MAX_LOG_LENGTH); + LogMessage* msg = (LogMessage *) log_get_memory(); msg->file = file; msg->function = function; @@ -205,7 +205,7 @@ void log(const char* format, LogDataArray data, const char* file, const char* fu str_copy_short(msg->message, format); for (int32 i = 0; i < LOG_DATA_ARRAY; ++i) { - if (data.data[i].type == LOG_DATA_VOID) { + if (data.data[i].type == LOG_DATA_VOID || data.data[i].type == LOG_DATA_NONE) { break; } @@ -262,36 +262,20 @@ void log(const char* format, LogDataArray data, const char* file, const char* fu #define LOG_TO_FILE() log_to_file() #if LOG_LEVEL == 4 - // Complete logging - #define LOG_1(str) log((str), __FILE__, __func__, __LINE__) - #define LOG_2(str) log((str), __FILE__, __func__, __LINE__) - #define LOG_3(str) log((str), __FILE__, __func__, __LINE__) - #define LOG_4(str) log((str), __FILE__, __func__, __LINE__) + #define LOG_1(format, ...) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__) + #define LOG_2(format, ...) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__) + #define LOG_3(format, ...) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__) + #define LOG_4(format, ...) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__) - #define LOG_FORMAT_1(format, ...) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__) - #define LOG_FORMAT_2(format, ...) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__) - #define LOG_FORMAT_3(format, ...) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__) - #define LOG_FORMAT_4(format, ...) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__) + #define LOG_TRUE_1(should_log, format, ...) if ((should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__) + #define LOG_TRUE_2(should_log, format, ...) if ((should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__) + #define LOG_TRUE_3(should_log, format, ...) if ((should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__) + #define LOG_TRUE_4(should_log, format, ...) if ((should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__) - #define LOG_TRUE_1(should_log, str) if ((should_log)) log((str), __FILE__, __func__, __LINE__) - #define LOG_TRUE_2(should_log, str) if ((should_log)) log((str), __FILE__, __func__, __LINE__) - #define LOG_TRUE_3(should_log, str) if ((should_log)) log((str), __FILE__, __func__, __LINE__) - #define LOG_TRUE_4(should_log, str) if ((should_log)) log((str), __FILE__, __func__, __LINE__) - - #define LOG_FALSE_1(should_log, str) if (!(should_log)) log((str), __FILE__, __func__, __LINE__) - #define LOG_FALSE_2(should_log, str) if (!(should_log)) log((str), __FILE__, __func__, __LINE__) - #define LOG_FALSE_3(should_log, str) if (!(should_log)) log((str), __FILE__, __func__, __LINE__) - #define LOG_FALSE_4(should_log, str) if (!(should_log)) log((str), __FILE__, __func__, __LINE__) - - #define LOG_FORMAT_TRUE_1(should_log, format, ...) if ((should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__) - #define LOG_FORMAT_TRUE_2(should_log, format, ...) if ((should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__) - #define LOG_FORMAT_TRUE_3(should_log, format, ...) if ((should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__) - #define LOG_FORMAT_TRUE_4(should_log, format, ...) if ((should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__) - - #define LOG_FORMAT_FALSE_1(should_log, format, ...) if (!(should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__) - #define LOG_FORMAT_FALSE_2(should_log, format, ...) if (!(should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__) - #define LOG_FORMAT_FALSE_3(should_log, format, ...) if (!(should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__) - #define LOG_FORMAT_FALSE_4(should_log, format, ...) if (!(should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__) + #define LOG_FALSE_1(should_log, format, ...) if (!(should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__) + #define LOG_FALSE_2(should_log, format, ...) if (!(should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__) + #define LOG_FALSE_3(should_log, format, ...) if (!(should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__) + #define LOG_FALSE_4(should_log, format, ...) if (!(should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__) #define LOG_IF_1(expr, str_succeeded, str_failed) if ((expr)) { log((str_succeeded), __FILE__, __func__, __LINE__); } else { log((str_succeeded), __FILE__, __func__, __LINE__); } #define LOG_IF_2(expr, str_succeeded, str_failed) if ((expr)) { log((str_succeeded), __FILE__, __func__, __LINE__); } else { log((str_succeeded), __FILE__, __func__, __LINE__); } @@ -301,7 +285,7 @@ void log(const char* format, LogDataArray data, const char* file, const char* fu #define LOG_CYCLE_START(var_name) uint64 var_name##_start_time = intrin_timestamp_counter() #define LOG_CYCLE_END(var_name, format) \ uint64 var_name##_duration = (uint64) (intrin_timestamp_counter() - var_name##_start_time); \ - LOG_FORMAT_1((format), {{LOG_DATA_UINT64, &var_name##_duration}}) + LOG_1((format), {{LOG_DATA_UINT64, &var_name##_duration}}) // Only intended for manual debugging // Of course a developer could always use printf but by providing this option, @@ -315,35 +299,20 @@ void log(const char* format, LogDataArray data, const char* file, const char* fu compiler_debug_print((debug_str)); \ }) #elif LOG_LEVEL == 3 - #define LOG_1(str) log((str), __FILE__, __func__, __LINE__) - #define LOG_2(str) log((str), __FILE__, __func__, __LINE__) - #define LOG_3(str) log((str), __FILE__, __func__, __LINE__) - #define LOG_4(str) ((void) 0) + #define LOG_1(format, ...) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__) + #define LOG_2(format, ...) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__) + #define LOG_3(format, ...) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__) + #define LOG_4(format, ...) ((void) 0) - #define LOG_FORMAT_1(format, ...) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__) - #define LOG_FORMAT_2(format, ...) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__) - #define LOG_FORMAT_3(format, ...) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__) - #define LOG_FORMAT_4(format, ...) ((void) 0) + #define LOG_TRUE_1(should_log, format, ...) if ((should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__) + #define LOG_TRUE_2(should_log, format, ...) if ((should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__) + #define LOG_TRUE_3(should_log, format, ...) if ((should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__) + #define LOG_TRUE_4(should_log, format, ...) ((void) 0) - #define LOG_TRUE_1(should_log, str) if ((should_log)) log((str), __FILE__, __func__, __LINE__) - #define LOG_TRUE_2(should_log, str) if ((should_log)) log((str), __FILE__, __func__, __LINE__) - #define LOG_TRUE_3(should_log, str) if ((should_log)) log((str), __FILE__, __func__, __LINE__) - #define LOG_TRUE_4(should_log, str) ((void) 0) - - #define LOG_FALSE_1(should_log, str) if (!(should_log)) log((str), __FILE__, __func__, __LINE__) - #define LOG_FALSE_2(should_log, str) if (!(should_log)) log((str), __FILE__, __func__, __LINE__) - #define LOG_FALSE_3(should_log, str) if (!(should_log)) log((str), __FILE__, __func__, __LINE__) - #define LOG_FALSE_4(should_log, str) ((void) 0) - - #define LOG_FORMAT_TRUE_1(should_log, format, ...) if ((should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__) - #define LOG_FORMAT_TRUE_2(should_log, format, ...) if ((should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__) - #define LOG_FORMAT_TRUE_3(should_log, format, ...) if ((should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__) - #define LOG_FORMAT_TRUE_4(should_log, format, ...) ((void) 0) - - #define LOG_FORMAT_FALSE_1(should_log, format, ...) if (!(should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__) - #define LOG_FORMAT_FALSE_2(should_log, format, ...) if (!(should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__) - #define LOG_FORMAT_FALSE_3(should_log, format, ...) if (!(should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__) - #define LOG_FORMAT_FALSE_4(should_log, format, ...) ((void) 0) + #define LOG_FALSE_1(should_log, format, ...) if (!(should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__) + #define LOG_FALSE_2(should_log, format, ...) if (!(should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__) + #define LOG_FALSE_3(should_log, format, ...) if (!(should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__) + #define LOG_FALSE_4(should_log, format, ...) ((void) 0) #define LOG_IF_1(expr, str_succeeded, str_failed) if ((expr)) { log((str_succeeded), __FILE__, __func__, __LINE__); } else { log((str_succeeded), __FILE__, __func__, __LINE__); } #define LOG_IF_2(expr, str_succeeded, str_failed) if ((expr)) { log((str_succeeded), __FILE__, __func__, __LINE__); } else { log((str_succeeded), __FILE__, __func__, __LINE__); } @@ -354,40 +323,25 @@ void log(const char* format, LogDataArray data, const char* file, const char* fu #define LOG_CYCLE_START(var_name) uint64 var_name##_start_time = intrin_timestamp_counter() #define LOG_CYCLE_END(var_name, format) \ uint64 var_name##_duration = (uint64) (intrin_timestamp_counter() - var_name##_start_time); \ - LOG_FORMAT_1((format), {{LOG_DATA_UINT64, &var_name##_duration}}) + LOG_1((format), {{LOG_DATA_UINT64, &var_name##_duration}}) #define DEBUG_VERBOSE(str) ((void) 0) #define DEBUG_FORMAT_VERBOSE(str, ...) ((void) 0) #elif LOG_LEVEL == 2 - #define LOG_1(str) log((str), __FILE__, __func__, __LINE__) - #define LOG_2(str) log((str), __FILE__, __func__, __LINE__) - #define LOG_3(str) ((void) 0) - #define LOG_4(str) ((void) 0) + #define LOG_1(format, ...) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__) + #define LOG_2(format, ...) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__) + #define LOG_3(format, ...) ((void) 0) + #define LOG_4(format, ...) ((void) 0) - #define LOG_FORMAT_1(format, ...) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__) - #define LOG_FORMAT_2(format, ...) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__) - #define LOG_FORMAT_3(format, ...) ((void) 0) - #define LOG_FORMAT_4(format, ...) ((void) 0) + #define LOG_TRUE_1(should_log, format, ...) if ((should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__) + #define LOG_TRUE_2(should_log, format, ...) if ((should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__) + #define LOG_TRUE_3(should_log, format, ...) ((void) 0) + #define LOG_TRUE_4(should_log, format, ...) ((void) 0) - #define LOG_TRUE_1(should_log, str) if ((should_log)) log((str), __FILE__, __func__, __LINE__) - #define LOG_TRUE_2(should_log, str) if ((should_log)) log((str), __FILE__, __func__, __LINE__) - #define LOG_TRUE_3(should_log, str) ((void) 0) - #define LOG_TRUE_4(should_log, str) ((void) 0) - - #define LOG_FALSE_1(should_log, str) if (!(should_log)) log((str), __FILE__, __func__, __LINE__) - #define LOG_FALSE_2(should_log, str) if (!(should_log)) log((str), __FILE__, __func__, __LINE__) - #define LOG_FALSE_3(should_log, str) ((void) 0) - #define LOG_FALSE_4(should_log, str) ((void) 0) - - #define LOG_FORMAT_TRUE_1(should_log, format, ...) if ((should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__) - #define LOG_FORMAT_TRUE_2(should_log, format, ...) if ((should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__) - #define LOG_FORMAT_TRUE_3(should_log, format, ...) ((void) 0) - #define LOG_FORMAT_TRUE_4(should_log, format, ...) ((void) 0) - - #define LOG_FORMAT_FALSE_1(should_log, format, ...) if (!(should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__) - #define LOG_FORMAT_FALSE_2(should_log, format, ...) if (!(should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__) - #define LOG_FORMAT_FALSE_3(should_log, format, ...) ((void) 0) - #define LOG_FORMAT_FALSE_4(should_log, format, ...) ((void) 0) + #define LOG_FALSE_1(should_log, format, ...) if (!(should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__) + #define LOG_FALSE_2(should_log, format, ...) if (!(should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__) + #define LOG_FALSE_3(should_log, format, ...) ((void) 0) + #define LOG_FALSE_4(should_log, format, ...) ((void) 0) #define LOG_IF_1(expr, str_succeeded, str_failed) if ((expr)) { log((str_succeeded), __FILE__, __func__, __LINE__); } else { log((str_succeeded), __FILE__, __func__, __LINE__); } #define LOG_IF_2(expr, str_succeeded, str_failed) if ((expr)) { log((str_succeeded), __FILE__, __func__, __LINE__); } else { log((str_succeeded), __FILE__, __func__, __LINE__); } @@ -398,40 +352,25 @@ void log(const char* format, LogDataArray data, const char* file, const char* fu #define LOG_CYCLE_START(var_name) uint64 var_name##_start_time = intrin_timestamp_counter() #define LOG_CYCLE_END(var_name, format) \ uint64 var_name##_duration = (uint64) (intrin_timestamp_counter() - var_name##_start_time); \ - LOG_FORMAT_1((format), {{LOG_DATA_UINT64, &var_name##_duration}}) + LOG_1((format), {{LOG_DATA_UINT64, &var_name##_duration}}) #define DEBUG_VERBOSE(str) ((void) 0) #define DEBUG_FORMAT_VERBOSE(str, ...) ((void) 0) #elif LOG_LEVEL == 1 - #define LOG_1(str) log((str), __FILE__, __func__, __LINE__) - #define LOG_2(str) ((void) 0) - #define LOG_3(str) ((void) 0) - #define LOG_4(str) ((void) 0) + #define LOG_1(format, ...) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__) + #define LOG_2(format, ...) ((void) 0) + #define LOG_3(format, ...) ((void) 0) + #define LOG_4(format, ...) ((void) 0) - #define LOG_FORMAT_1(format, ...) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__) - #define LOG_FORMAT_2(format, ...) ((void) 0) - #define LOG_FORMAT_3(format, ...) ((void) 0) - #define LOG_FORMAT_4(format, ...) ((void) 0) + #define LOG_TRUE_1(should_log, format, ...) if ((should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__) + #define LOG_TRUE_2(should_log, format, ...) ((void) 0) + #define LOG_TRUE_3(should_log, format, ...) ((void) 0) + #define LOG_TRUE_4(should_log, format, ...) ((void) 0) - #define LOG_TRUE_1(should_log, str) if ((should_log)) log((str), __FILE__, __func__, __LINE__) - #define LOG_TRUE_2(should_log, str) ((void) 0) - #define LOG_TRUE_3(should_log, str) ((void) 0) - #define LOG_TRUE_4(should_log, str) ((void) 0) - - #define LOG_FALSE_1(should_log, str) if (!(should_log)) log((str), __FILE__, __func__, __LINE__) - #define LOG_FALSE_2(should_log, str) ((void) 0) - #define LOG_FALSE_3(should_log, str) ((void) 0) - #define LOG_FALSE_4(should_log, str) ((void) 0) - - #define LOG_FORMAT_TRUE_1(should_log, format, ...) if ((should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__) - #define LOG_FORMAT_TRUE_2(should_log, format, ...) ((void) 0) - #define LOG_FORMAT_TRUE_3(should_log, format, ...) ((void) 0) - #define LOG_FORMAT_TRUE_4(should_log, format, ...) ((void) 0) - - #define LOG_FORMAT_FALSE_1(should_log, format, ...) if (!(should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__) - #define LOG_FORMAT_FALSE_2(should_log, format, ...) ((void) 0) - #define LOG_FORMAT_FALSE_3(should_log, format, ...) ((void) 0) - #define LOG_FORMAT_FALSE_4(should_log, format, ...) ((void) 0) + #define LOG_FALSE_1(should_log, format, ...) if (!(should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__) + #define LOG_FALSE_2(should_log, format, ...) ((void) 0) + #define LOG_FALSE_3(should_log, format, ...) ((void) 0) + #define LOG_FALSE_4(should_log, format, ...) ((void) 0) #define LOG_IF_1(expr, str_succeeded, str_failed) if ((expr)) { log((str_succeeded), __FILE__, __func__, __LINE__); } else { log((str_succeeded), __FILE__, __func__, __LINE__); } // Only logs on failure @@ -445,36 +384,20 @@ void log(const char* format, LogDataArray data, const char* file, const char* fu #define DEBUG_VERBOSE(str) ((void) 0) #define DEBUG_FORMAT_VERBOSE(str, ...) ((void) 0) #elif LOG_LEVEL == 0 - // No logging whatsoever - #define LOG_1(str) ((void) 0) - #define LOG_2(str) ((void) 0) - #define LOG_3(str) ((void) 0) - #define LOG_4(str) ((void) 0) + #define LOG_1(format, ...) ((void) 0) + #define LOG_2(format, ...) ((void) 0) + #define LOG_3(format, ...) ((void) 0) + #define LOG_4(format, ...) ((void) 0) - #define LOG_FORMAT_1(format, ...) ((void) 0) - #define LOG_FORMAT_2(format, ...) ((void) 0) - #define LOG_FORMAT_3(format, ...) ((void) 0) - #define LOG_FORMAT_4(format, ...) ((void) 0) + #define LOG_TRUE_1(should_log, format, ...) ((void) 0) + #define LOG_TRUE_2(should_log, format, ...) ((void) 0) + #define LOG_TRUE_3(should_log, format, ...) ((void) 0) + #define LOG_TRUE_4(should_log, format, ...) ((void) 0) - #define LOG_TRUE_1(should_log, str) ((void) 0) - #define LOG_TRUE_2(should_log, str) ((void) 0) - #define LOG_TRUE_3(should_log, str) ((void) 0) - #define LOG_TRUE_4(should_log, str) ((void) 0) - - #define LOG_FALSE_1(should_log, str) ((void) 0) - #define LOG_FALSE_2(should_log, str) ((void) 0) - #define LOG_FALSE_3(should_log, str) ((void) 0) - #define LOG_FALSE_4(should_log, str) ((void) 0) - - #define LOG_FORMAT_TRUE_1(should_log, format, ...) ((void) 0) - #define LOG_FORMAT_TRUE_2(should_log, format, ...) ((void) 0) - #define LOG_FORMAT_TRUE_3(should_log, format, ...) ((void) 0) - #define LOG_FORMAT_TRUE_4(should_log, format, ...) ((void) 0) - - #define LOG_FORMAT_FALSE_1(should_log, format, ...) ((void) 0) - #define LOG_FORMAT_FALSE_2(should_log, format, ...) ((void) 0) - #define LOG_FORMAT_FALSE_3(should_log, format, ...) ((void) 0) - #define LOG_FORMAT_FALSE_4(should_log, format, ...) ((void) 0) + #define LOG_FALSE_1(should_log, format, ...) ((void) 0) + #define LOG_FALSE_2(should_log, format, ...) ((void) 0) + #define LOG_FALSE_3(should_log, format, ...) ((void) 0) + #define LOG_FALSE_4(should_log, format, ...) ((void) 0) #define LOG_IF_1(expr, str_succeeded, str_failed) ((void) 0) #define LOG_IF_2(expr, str_succeeded, str_failed) ((void) 0) diff --git a/log/PerformanceProfiler.h b/log/PerformanceProfiler.h index 914b13f..2b4748f 100644 --- a/log/PerformanceProfiler.h +++ b/log/PerformanceProfiler.h @@ -152,7 +152,7 @@ struct PerformanceProfiler { if (this->auto_log) { if (this->info_msg && this->info_msg[0]) { - LOG_FORMAT_2( + LOG_2( "-PERF %s (%s): %l cycles", { {LOG_DATA_CHAR_STR, (void *) perf->name}, @@ -161,7 +161,7 @@ struct PerformanceProfiler { } ); } else { - LOG_FORMAT_2( + LOG_2( "-PERF %s: %l cycles", { {LOG_DATA_CHAR_STR, (void *) perf->name}, @@ -210,7 +210,7 @@ void performance_profiler_end(int32 id) noexcept #define PROFILE_SCOPE(id, name) PerformanceProfiler __profile_scope_##__func__##_##__LINE__((id), (name)) #define PROFILE_RESET(id) if(_perf_active && *_perf_active) performance_profiler_reset((id)) #else - #define PROFILE(id) ((void) 0) + #define PROFILE(id, ...) ((void) 0) #define PROFILE_START(id, name) ((void) 0) #define PROFILE_END(id) ((void) 0) diff --git a/memory/BufferMemory.h b/memory/BufferMemory.h index 6882be0..92b4fe2 100644 --- a/memory/BufferMemory.h +++ b/memory/BufferMemory.h @@ -15,6 +15,7 @@ #include "../utils/TestUtils.h" #include "../log/Log.h" #include "../log/Stats.h" +#include "../log/PerformanceProfiler.h" #include "../log/DebugMemory.h" #include "../system/Allocator.h" @@ -35,7 +36,7 @@ void buffer_alloc(BufferMemory* buf, uint64 size, int32 alignment = 64) { ASSERT_SIMPLE(size); PROFILE(PROFILE_BUFFER_ALLOC, NULL, false, true); - LOG_FORMAT_1("Allocating BufferMemory: %n B", {{LOG_DATA_UINT64, &size}}); + LOG_1("Allocating BufferMemory: %n B", {{LOG_DATA_UINT64, &size}}); buf->memory = alignment < 2 ? (byte *) platform_alloc(size) diff --git a/memory/ChunkMemory.h b/memory/ChunkMemory.h index 6f8441b..6d0f611 100644 --- a/memory/ChunkMemory.h +++ b/memory/ChunkMemory.h @@ -17,6 +17,7 @@ #include "../compiler/CompilerUtils.h" #include "../log/Log.h" #include "../log/Stats.h" +#include "../log/PerformanceProfiler.h" #include "../log/DebugMemory.h" #include "BufferMemory.h" #include "../system/Allocator.h" @@ -63,7 +64,7 @@ void chunk_alloc(ChunkMemory* buf, uint32 count, uint32 chunk_size, int32 alignm memset(buf->memory, 0, buf->size); - LOG_FORMAT_1("Allocated ChunkMemory: %n B", {{LOG_DATA_UINT64, &buf->size}}); + LOG_1("Allocated ChunkMemory: %n B", {{LOG_DATA_UINT64, &buf->size}}); } inline @@ -327,7 +328,7 @@ int64 chunk_dump(const ChunkMemory* buf, byte* data) memcpy(data, buf->memory, buf->size); data += buf->size; - LOG_FORMAT_1("Dumped ChunkMemory: %n B", {{LOG_DATA_UINT64, (void *) &buf->size}}); + LOG_1("Dumped ChunkMemory: %n B", {{LOG_DATA_UINT64, (void *) &buf->size}}); return data - start; } @@ -362,7 +363,7 @@ int64 chunk_load(ChunkMemory* buf, const byte* data) buf->free = (uint64 *) (buf->memory + buf->count * buf->chunk_size); - LOG_FORMAT_1("Loaded ChunkMemory: %n B", {{LOG_DATA_UINT64, &buf->size}}); + LOG_1("Loaded ChunkMemory: %n B", {{LOG_DATA_UINT64, &buf->size}}); return buf->size; } diff --git a/memory/RingMemory.h b/memory/RingMemory.h index 997ff67..2377b87 100644 --- a/memory/RingMemory.h +++ b/memory/RingMemory.h @@ -19,6 +19,7 @@ #include "BufferMemory.h" #include "../log/Log.h" #include "../log/Stats.h" +#include "../log/PerformanceProfiler.h" #include "../log/DebugMemory.h" #include "../thread/Atomic.h" #include "../thread/Semaphore.h" @@ -48,7 +49,7 @@ void ring_alloc(RingMemory* ring, uint64 size, uint32 alignment = 64) { ASSERT_SIMPLE(size); PROFILE(PROFILE_RING_ALLOC, NULL, false, true); - LOG_FORMAT_1("Allocating RingMemory: %n B", {{LOG_DATA_UINT64, &size}}); + LOG_1("Allocating RingMemory: %n B", {{LOG_DATA_UINT64, &size}}); ring->memory = alignment < 2 ? (byte *) platform_alloc(size) @@ -62,7 +63,7 @@ void ring_alloc(RingMemory* ring, uint64 size, uint32 alignment = 64) memset(ring->memory, 0, ring->size); - LOG_FORMAT_1("Allocated RingMemory: %n B", {{LOG_DATA_UINT64, &ring->size}}); + LOG_1("Allocated RingMemory: %n B", {{LOG_DATA_UINT64, &ring->size}}); } inline diff --git a/platform/win32/ExceptionHandler.h b/platform/win32/ExceptionHandler.h index 1edab80..7c7a23c 100644 --- a/platform/win32/ExceptionHandler.h +++ b/platform/win32/ExceptionHandler.h @@ -88,9 +88,9 @@ void log_stack_trace(CONTEXT *context) { symbol->MaxNameLen = MAX_SYM_NAME; if (SymFromAddr(process, address, NULL, symbol)) { - LOG_FORMAT_1("Function: %s - Address: %l", {{LOG_DATA_CHAR_STR, symbol->Name}, {LOG_DATA_INT64, &symbol->Address}}); + LOG_1("Function: %s - Address: %l", {{LOG_DATA_CHAR_STR, symbol->Name}, {LOG_DATA_INT64, &symbol->Address}}); } else { - LOG_FORMAT_1("Function: (unknown) - Address: %l", {{LOG_DATA_INT64, &address}}); + LOG_1("Function: (unknown) - Address: %l", {{LOG_DATA_INT64, &address}}); } // Resolve file and line number @@ -99,7 +99,7 @@ void log_stack_trace(CONTEXT *context) { line.SizeOfStruct = sizeof(IMAGEHLP_LINE64); if (SymGetLineFromAddr64(process, address, &displacement, &line)) { - LOG_FORMAT_1(" File: %s, Line: %l", {{LOG_DATA_CHAR_STR, line.FileName}, {LOG_DATA_INT64, &line.LineNumber}}); + LOG_1(" File: %s, Line: %l", {{LOG_DATA_CHAR_STR, line.FileName}, {LOG_DATA_INT64, &line.LineNumber}}); } else { LOG_1(" File: (unknown), Line: (unknown)"); } @@ -108,7 +108,7 @@ void log_stack_trace(CONTEXT *context) { IMAGEHLP_MODULE64 module_info; module_info.SizeOfStruct = sizeof(IMAGEHLP_MODULE64); if (SymGetModuleInfo64(process, address, &module_info)) { - LOG_FORMAT_1(" Module: %s", {{LOG_DATA_CHAR_STR, module_info.ModuleName}}); + LOG_1(" Module: %s", {{LOG_DATA_CHAR_STR, module_info.ModuleName}}); } else { LOG_1(" Module: (unknown)"); } diff --git a/platform/win32/threading/Semaphore.h b/platform/win32/threading/Semaphore.h index 4846a38..0ee0fd1 100644 --- a/platform/win32/threading/Semaphore.h +++ b/platform/win32/threading/Semaphore.h @@ -14,30 +14,36 @@ typedef HANDLE sem_t; +inline void sem_init(sem_t* semaphore, int32 value) { *semaphore = CreateSemaphore(NULL, value, MAX_UINT32, NULL); } +inline void sem_destroy(sem_t* semaphore) { CloseHandle(*semaphore); } // decrement if != 0, if = 0 wait +inline void sem_wait(sem_t* semaphore) { WaitForSingleObject(*semaphore, INFINITE); } +inline int32 sem_timedwait(sem_t* semaphore, uint64 ms) { return (int32) WaitForSingleObject(*semaphore, (DWORD) ms); } +inline int32 sem_trywait(sem_t* semaphore) { return (int32) WaitForSingleObject(*semaphore, 0); } // increment +inline void sem_post(sem_t* semaphore) { ReleaseSemaphore(*semaphore, 1, NULL); } diff --git a/platform/win32/threading/Thread.h b/platform/win32/threading/Thread.h index 93a0071..feba623 100644 --- a/platform/win32/threading/Thread.h +++ b/platform/win32/threading/Thread.h @@ -94,6 +94,7 @@ int32 pthread_mutex_unlock(pthread_mutex_t* mutex) return 0; } +// WARNING: We don't support windows events since they are much slower than conditional variables/mutexes inline int32 pthread_cond_init(pthread_cond_t* cond, pthread_condattr_t*) { diff --git a/stdlib/HashMap.h b/stdlib/HashMap.h index 6c9212b..778abf7 100644 --- a/stdlib/HashMap.h +++ b/stdlib/HashMap.h @@ -124,7 +124,7 @@ struct HashMap { inline void hashmap_alloc(HashMap* hm, int32 count, int32 element_size) { - LOG_FORMAT_1("Allocate HashMap for %n elements with %n B per element", {{LOG_DATA_INT32, &count}, {LOG_DATA_INT32, &element_size}}); + LOG_1("Allocate HashMap for %n elements with %n B per element", {{LOG_DATA_INT32, &count}, {LOG_DATA_INT32, &element_size}}); byte* data = (byte *) platform_alloc( count * (sizeof(uint16) + element_size) + CEIL_DIV(count, 64) * sizeof(hm->buf.free) @@ -148,7 +148,7 @@ void hashmap_free(HashMap* hm) inline void hashmap_create(HashMap* hm, int32 count, int32 element_size, RingMemory* ring) noexcept { - LOG_FORMAT_1("Create HashMap for %n elements with %n B per element", {{LOG_DATA_INT32, &count}, {LOG_DATA_INT32, &element_size}}); + LOG_1("Create HashMap for %n elements with %n B per element", {{LOG_DATA_INT32, &count}, {LOG_DATA_INT32, &element_size}}); byte* data = ring_get_memory( ring, count * (sizeof(uint16) + element_size) @@ -163,7 +163,7 @@ void hashmap_create(HashMap* hm, int32 count, int32 element_size, RingMemory* ri inline void hashmap_create(HashMap* hm, int32 count, int32 element_size, BufferMemory* buf) noexcept { - LOG_FORMAT_1("Create HashMap for %n elements with %n B per element", {{LOG_DATA_INT32, &count}, {LOG_DATA_INT32, &element_size}}); + LOG_1("Create HashMap for %n elements with %n B per element", {{LOG_DATA_INT32, &count}, {LOG_DATA_INT32, &element_size}}); byte* data = buffer_get_memory( buf, count * (sizeof(uint16) + element_size) @@ -178,7 +178,7 @@ void hashmap_create(HashMap* hm, int32 count, int32 element_size, BufferMemory* inline void hashmap_create(HashMap* hm, int32 count, int32 element_size, byte* buf) noexcept { - LOG_FORMAT_1("Create HashMap for %n elements with %n B per element", {{LOG_DATA_INT32, &count}, {LOG_DATA_INT32, &element_size}}); + LOG_1("Create HashMap for %n elements with %n B per element", {{LOG_DATA_INT32, &count}, {LOG_DATA_INT32, &element_size}}); hm->table = (uint16 *) buf; chunk_init(&hm->buf, buf + sizeof(uint16) * count, count, element_size, 8); } @@ -797,7 +797,7 @@ int64 hashmap_dump(const HashMap* hm, byte* data, [[maybe_unused]] int32 steps = // dump free array memcpy(data, hm->buf.free, sizeof(uint64) * CEIL_DIV(hm->buf.count, 64)); - LOG_FORMAT_1("Dumped HashMap: %n B", {{LOG_DATA_UINT64, (void *) &hm->buf.size}}); + LOG_1("Dumped HashMap: %n B", {{LOG_DATA_UINT64, (void *) &hm->buf.size}}); return sizeof(hm->buf.count) // hash map count = buffer count + hm->buf.count * sizeof(uint16) // table content @@ -851,7 +851,7 @@ int64 hashmap_load(HashMap* hm, const byte* data, [[maybe_unused]] int32 steps = } } chunk_iterate_end; - LOG_FORMAT_1("Loaded HashMap: %n B", {{LOG_DATA_UINT64, &hm->buf.size}}); + LOG_1("Loaded HashMap: %n B", {{LOG_DATA_UINT64, &hm->buf.size}}); // How many bytes was read from data return sizeof(hm->buf.count) // hash map count = buffer count diff --git a/stdlib/PerfectHashMap.h b/stdlib/PerfectHashMap.h index 5276315..56bba62 100644 --- a/stdlib/PerfectHashMap.h +++ b/stdlib/PerfectHashMap.h @@ -111,7 +111,7 @@ PerfectHashMap* perfect_hashmap_prepare(PerfectHashMap* hm, const char** keys, i // WARNING: element_size = element size + remaining HashEntry data size void perfect_hashmap_create(PerfectHashMap* hm, int32 count, int32 element_size, BufferMemory* buf) { - LOG_FORMAT_1("Create PerfectHashMap for %n elements with %n B per element", {{LOG_DATA_INT32, &count}, {LOG_DATA_INT32, &element_size}}); + LOG_1("Create PerfectHashMap for %n elements with %n B per element", {{LOG_DATA_INT32, &count}, {LOG_DATA_INT32, &element_size}}); hm->map_size = count; hm->entry_size = element_size; hm->hash_entries = buffer_get_memory( @@ -124,7 +124,7 @@ void perfect_hashmap_create(PerfectHashMap* hm, int32 count, int32 element_size, // WARNING: element_size = element size + remaining HashEntry data size void perfect_hashmap_create(PerfectHashMap* hm, int32 count, int32 element_size, byte* buf) { - LOG_FORMAT_1("Create PerfectHashMap for %n elements with %n B per element", {{LOG_DATA_INT32, &count}, {LOG_DATA_INT32, &element_size}}); + LOG_1("Create PerfectHashMap for %n elements with %n B per element", {{LOG_DATA_INT32, &count}, {LOG_DATA_INT32, &element_size}}); hm->map_size = count; hm->entry_size = element_size; hm->hash_entries = buf; diff --git a/tests/MainTest.cpp b/tests/MainTest.cpp index 2e3c31e..5037932 100644 --- a/tests/MainTest.cpp +++ b/tests/MainTest.cpp @@ -9,6 +9,7 @@ #include "utils/BitUtilsTest.cpp" #include "utils/EndianUtilsTest.cpp" #include "utils/StringUtilsTest.cpp" +#include "utils/MathUtilsTest.cpp" #include "utils/UtilsTest.cpp" #ifdef UBER_TEST @@ -18,8 +19,7 @@ #endif int main() { - printf("\nStat Tests Asserts Details\n"); - printf("========================================================================================================================\n"); + TEST_HEADER(); MathEvaluatorTest(); MemoryChunkMemoryTest(); @@ -29,16 +29,10 @@ int main() { UIUIThemeTest(); UtilsBitUtilsTest(); UtilsStringUtilsTest(); + UtilsMathUtilsTest(); UtilsUtilsTest(); - printf("========================================================================================================================\n"); - printf( - "%s %5d (%5d/%5d)\n\n", - _test_global_assert_count ? "[NG]" : "[OK]", - _test_global_count, - _test_global_assert_count - _test_global_assert_error_count, - _test_global_assert_count - ); + TEST_FOOTER(); return _test_global_assert_error_count ? 1 : 0; } \ No newline at end of file diff --git a/tests/TestFramework.h b/tests/TestFramework.h index eb045f8..7c42451 100644 --- a/tests/TestFramework.h +++ b/tests/TestFramework.h @@ -22,6 +22,33 @@ static int32_t _test_global_assert_count = 0; static int32_t _test_global_assert_error_count = 0; static int32_t _test_global_count = 0; +static int64_t _test_start; + +#define TEST_PROFILING_LOOPS 1000 + +#define TEST_HEADER() \ + int64_t _test_total_start = test_start_time(); \ + printf("\nStat Tests Assert(OK/NG) Time(ms) Details\n"); \ + printf("========================================================================================================================\n") + +#define TEST_FOOTER() \ + printf("========================================================================================================================\n"); \ + printf( \ + "%s %5d (%5d/%5d) %8.0f\n\n", \ + _test_global_assert_count ? "[NG]" : "[OK]", \ + _test_global_count, \ + _test_global_assert_count - _test_global_assert_error_count, \ + _test_global_assert_count, \ + test_duration_time(_test_total_start) / 1000000) + +#ifdef UBER_TEST +#define TEST_INIT_HEADER() (void)0 +#define TEST_FINALIZE_FOOTER() (void)0 +#else +#define TEST_INIT_HEADER() TEST_HEADER() +#define TEST_FINALIZE_FOOTER() TEST_FOOTER() +#endif + #if _WIN32 #include "../platform/win32/ExceptionHandler.h" #include @@ -33,12 +60,32 @@ LONG WINAPI test_exception_handler(EXCEPTION_POINTERS *exception_info) return EXCEPTION_EXECUTE_HANDLER; } -double test_measure_func_time_ns(void (*func)(void *), void *para) +int64_t test_start_time() +{ + LARGE_INTEGER start; + QueryPerformanceCounter(&start); + + return start.QuadPart; +} + +double test_duration_time(int64_t start) +{ + LARGE_INTEGER frequency, end; + QueryPerformanceFrequency(&frequency); + QueryPerformanceCounter(&end); + + return (double)(end.QuadPart - start) * 1e9 / frequency.QuadPart; +} + +double test_measure_func_time_ns(void (*func)(volatile void *), volatile void *para) { LARGE_INTEGER frequency, start, end; QueryPerformanceFrequency(&frequency); QueryPerformanceCounter(&start); - func(para); + for (int32_t i = 0; i < TEST_PROFILING_LOOPS; ++i) + { + func(para); + } QueryPerformanceCounter(&end); return (double)(end.QuadPart - start.QuadPart) * 1e9 / frequency.QuadPart; } @@ -46,8 +93,10 @@ double test_measure_func_time_ns(void (*func)(void *), void *para) #define TEST_INIT(test_count) \ do \ { \ + TEST_INIT_HEADER(); \ setvbuf(stdout, NULL, _IONBF, 0); \ SetUnhandledExceptionFilter(test_exception_handler); \ + _test_start = test_start_time(); \ _test_assert_error_count = 0; \ _test_count = 0; \ _test_assert_count = 0; \ @@ -69,12 +118,32 @@ void test_exception_handler(int signum) exit(1); } -#include -double test_measure_func_time_ns(void (*func)(void *), void *para) +int64_t test_start_time() { struct timespec start, end; clock_gettime(CLOCK_MONOTONIC, &start); - func(para); + + return start.tv_sec * 1e9 + start.tv_nsec; +} + +double test_duration_time(int64_t start) +{ + LARGE_INTEGER frequency, end; + QueryPerformanceFrequency(&frequency); + QueryPerformanceCounter(&end); + + return (double)(end.tv_sec * 1e9 + end.tv_nsec - start); +} + +#include +double test_measure_func_time_ns(void (*func)(volatile void *), volatile void *para) +{ + struct timespec start, end; + clock_gettime(CLOCK_MONOTONIC, &start); + for (int32_t i = 0; i < TEST_PROFILING_LOOPS; ++i) + { + func(para); + } clock_gettime(CLOCK_MONOTONIC, &end); return (double)(end.tv_sec * 1e9 + end.tv_nsec) - (double)(start.tv_sec * 1e9 + start.tv_nsec); } @@ -82,9 +151,11 @@ double test_measure_func_time_ns(void (*func)(void *), void *para) #define TEST_INIT(test_count) \ do \ { \ + TEST_INIT_HEADER(); \ setvbuf(stdout, NULL, _IONBF, 0); \ signal(SIGSEGV, test_exception_handler); \ signal(SIGABRT, test_exception_handler); \ + _test_start = test_start_time(); \ _test_assert_error_count = 0; \ _test_count = 0; \ _test_assert_count = 0; \ @@ -99,35 +170,36 @@ double test_measure_func_time_ns(void (*func)(void *), void *para) } while (0) #endif -#define TEST_FINALIZE() \ - do \ - { \ - if (_test_assert_error_count) \ - { \ - printf( \ - "[NG] %5d (%5d/%5d) %s\n", \ - _test_count, _test_assert_count - _test_assert_error_count, _test_assert_count, __FILE__); \ - for (int i = 0; i < _test_assert_error_count; ++i) \ - { \ - printf(" %s\n", _test_log[i]); \ - fflush(stdout); \ - } \ - } \ - else \ - { \ - printf( \ - "[OK] %5d (%5d/%5d) %s\n", \ - _test_count, _test_assert_count - _test_assert_error_count, _test_assert_count, __FILE__); \ - } \ - fflush(stdout); \ - free(_test_log); \ - _test_log = NULL; \ - _test_assert_error_count = 0; \ - _test_count = 0; \ - _test_assert_count = 0; \ +#define TEST_FINALIZE() \ + do \ + { \ + if (_test_assert_error_count) \ + { \ + printf( \ + "[NG] %5d (%5d/%5d) %8.0f %s\n", \ + _test_count, _test_assert_count - _test_assert_error_count, _test_assert_count, test_duration_time(_test_start) / 1000000, __FILE__); \ + for (int i = 0; i < _test_assert_error_count; ++i) \ + { \ + printf(" %s\n", _test_log[i]); \ + fflush(stdout); \ + } \ + } \ + else \ + { \ + printf( \ + "[OK] %5d (%5d/%5d) %8.0f %s\n", \ + _test_count, _test_assert_count - _test_assert_error_count, _test_assert_count, test_duration_time(_test_start) / 1000000, __FILE__); \ + } \ + fflush(stdout); \ + free(_test_log); \ + _test_log = NULL; \ + _test_assert_error_count = 0; \ + _test_count = 0; \ + _test_assert_count = 0; \ + TEST_FINALIZE_FOOTER(); \ } while (0) -#define RUN_TEST(func) \ +#define TEST_RUN(func) \ ++_test_count; \ ++_test_global_count; \ func() @@ -274,13 +346,19 @@ double test_measure_func_time_ns(void (*func)(void *), void *para) \ /* Measure func1 */ \ start = intrin_timestamp_counter(); \ - func1((void *)&a); \ + for (int32_t i = 0; i < TEST_PROFILING_LOOPS; ++i) \ + { \ + func1((volatile void *)&a); \ + } \ end = intrin_timestamp_counter(); \ cycles_func1 = end - start; \ \ /* Measure func2 */ \ start = intrin_timestamp_counter(); \ - func2((void *)&b); \ + for (int32_t i = 0; i < TEST_PROFILING_LOOPS; ++i) \ + { \ + func2((volatile void *)&b); \ + } \ end = intrin_timestamp_counter(); \ cycles_func2 = end - start; \ \ @@ -296,7 +374,7 @@ double test_measure_func_time_ns(void (*func)(void *), void *para) "%4i: %.2f%% (%s: %llu cycles, %s: %llu cycles)", \ __LINE__, percent_diff + 100.0f, #func1, (uint64_t)cycles_func1, #func2, (uint64_t)cycles_func2); \ } \ - ASSERT_EQUALS(a, b); \ + ASSERT_TRUE((a && b) || a == b); \ } while (0) #define ASSERT_FUNCTION_TEST_CYCLE(func, cycles) \ @@ -310,7 +388,10 @@ double test_measure_func_time_ns(void (*func)(void *), void *para) \ /* Measure func */ \ start = intrin_timestamp_counter(); \ - func((void *)¶); \ + for (int32_t i = 0; i < TEST_PROFILING_LOOPS; ++i) \ + { \ + func((volatile void *)¶); \ + } \ end = intrin_timestamp_counter(); \ cycles_func = end - start; \ \ @@ -333,10 +414,10 @@ double test_measure_func_time_ns(void (*func)(void *), void *para) int64_t a = 0, b = 0; \ \ /* Measure func1 */ \ - time_func1 = test_measure_func_time_ns(func1, (void *)&a); \ + time_func1 = test_measure_func_time_ns(func1, (volatile void *)&a); \ \ /* Measure func2 */ \ - time_func2 = test_measure_func_time_ns(func2, (void *)&b); \ + time_func2 = test_measure_func_time_ns(func2, (volatile void *)&b); \ \ /* Calculate percentage difference */ \ double percent_diff = 100.0 * (time_func1 - time_func2) / time_func2; \ @@ -347,31 +428,10 @@ double test_measure_func_time_ns(void (*func)(void *), void *para) ++_test_global_assert_error_count; \ snprintf( \ _test_log[_test_assert_error_count++], 1024, \ - "%4i: %.2f%% (%s: %.2f ns, %s: %.2f ns)", \ + "%4i: %.2f%% (%s: %.2f us, %s: %.2f us)", \ __LINE__, percent_diff + 100.0f, #func1, time_func1, #func2, time_func2); \ } \ - ASSERT_EQUALS(a, b); \ - } while (0) - -#define ASSERT_FUNCTION_TEST_TIME(func, duration) \ - do \ - { \ - ++_test_assert_count; \ - ++_test_global_assert_count; \ - double time_func; \ - int64_t para = 0; \ - \ - /* Measure func */ \ - time_func = test_measure_func_time_ns(func, (void *)¶); \ - \ - if (time_func >= duration) \ - { \ - ++_test_global_assert_error_count; \ - snprintf( \ - _test_log[_test_assert_error_count++], 1024, \ - "%4i: %.2f%% (%s: %.2f ns, %s: %.2f ns)", \ - __LINE__, percent_diff + 100.0f, #func, time_func); \ - } \ + ASSERT_TRUE((a && b) || a == b); \ } while (0) #endif \ No newline at end of file diff --git a/tests/math/EvaluatorTest.cpp b/tests/math/EvaluatorTest.cpp index 0712b6d..d0181c8 100644 --- a/tests/math/EvaluatorTest.cpp +++ b/tests/math/EvaluatorTest.cpp @@ -47,9 +47,9 @@ static void test_evaluator_evaluate_function() { int main() { TEST_INIT(10); - RUN_TEST(test_evaluator_evaluate); - RUN_TEST(test_evaluator_evaluate_variables); - RUN_TEST(test_evaluator_evaluate_function); + TEST_RUN(test_evaluator_evaluate); + TEST_RUN(test_evaluator_evaluate_variables); + TEST_RUN(test_evaluator_evaluate_function); TEST_FINALIZE(); diff --git a/tests/memory/ChunkMemoryTest.cpp b/tests/memory/ChunkMemoryTest.cpp index 9e548c7..a6b1f78 100644 --- a/tests/memory/ChunkMemoryTest.cpp +++ b/tests/memory/ChunkMemoryTest.cpp @@ -105,6 +105,7 @@ static void test_chunk_reserve_last_element() { static void test_chunk_reserve_full() { ChunkMemory mem = {}; chunk_alloc(&mem, 10, 10); + mem.free[0] = 0xFFFFFFFFFFFFFFFF; ASSERT_EQUALS(chunk_reserve(&mem, 1), -1); } @@ -129,17 +130,17 @@ static void test_chunk_reserve_last_element() { int main() { TEST_INIT(25); - RUN_TEST(test_chunk_alloc); - RUN_TEST(test_chunk_id_from_memory); - RUN_TEST(test_chunk_get_element); - RUN_TEST(test_chunk_reserve); - RUN_TEST(test_chunk_free_elements); - RUN_TEST(test_chunk_reserve_wrapping); - RUN_TEST(test_chunk_reserve_last_element); + TEST_RUN(test_chunk_alloc); + TEST_RUN(test_chunk_id_from_memory); + TEST_RUN(test_chunk_get_element); + TEST_RUN(test_chunk_reserve); + TEST_RUN(test_chunk_free_elements); + TEST_RUN(test_chunk_reserve_wrapping); + TEST_RUN(test_chunk_reserve_last_element); #if !DEBUG - RUN_TEST(test_chunk_reserve_full); - RUN_TEST(test_chunk_reserve_invalid_size); + TEST_RUN(test_chunk_reserve_full); + TEST_RUN(test_chunk_reserve_invalid_size); #endif TEST_FINALIZE(); diff --git a/tests/memory/RingMemoryTest.cpp b/tests/memory/RingMemoryTest.cpp index beb9f21..58bd266 100644 --- a/tests/memory/RingMemoryTest.cpp +++ b/tests/memory/RingMemoryTest.cpp @@ -94,13 +94,13 @@ static void test_ring_commit_safe() { int main() { TEST_INIT(25); - RUN_TEST(test_ring_alloc); - RUN_TEST(test_ring_get_memory); - RUN_TEST(test_ring_calculate_position); - RUN_TEST(test_ring_reset); - RUN_TEST(test_ring_get_memory_nomove); - RUN_TEST(test_ring_move_pointer); - RUN_TEST(test_ring_commit_safe); + TEST_RUN(test_ring_alloc); + TEST_RUN(test_ring_get_memory); + TEST_RUN(test_ring_calculate_position); + TEST_RUN(test_ring_reset); + TEST_RUN(test_ring_get_memory_nomove); + TEST_RUN(test_ring_move_pointer); + TEST_RUN(test_ring_commit_safe); TEST_FINALIZE(); diff --git a/tests/stdlib/HashMapTest.cpp b/tests/stdlib/HashMapTest.cpp index 26d9eba..d189134 100644 --- a/tests/stdlib/HashMapTest.cpp +++ b/tests/stdlib/HashMapTest.cpp @@ -78,9 +78,9 @@ static void test_hashmap_dump_load() { int main() { TEST_INIT(25); - RUN_TEST(test_hashmap_alloc); - RUN_TEST(test_hashmap_insert_int32); - RUN_TEST(test_hashmap_dump_load); + TEST_RUN(test_hashmap_alloc); + TEST_RUN(test_hashmap_insert_int32); + TEST_RUN(test_hashmap_dump_load); TEST_FINALIZE(); diff --git a/tests/ui/UILayoutTest.cpp b/tests/ui/UILayoutTest.cpp index 825f363..af613f3 100644 --- a/tests/ui/UILayoutTest.cpp +++ b/tests/ui/UILayoutTest.cpp @@ -87,9 +87,9 @@ static void test_layout_from_theme() { int main() { TEST_INIT(100); - RUN_TEST(test_layout_from_file_txt); - RUN_TEST(test_layout_to_from_data); - RUN_TEST(test_layout_from_theme); + TEST_RUN(test_layout_from_file_txt); + TEST_RUN(test_layout_to_from_data); + TEST_RUN(test_layout_from_theme); TEST_FINALIZE(); diff --git a/tests/ui/UIThemeTest.cpp b/tests/ui/UIThemeTest.cpp index 57bb531..0096bd8 100644 --- a/tests/ui/UIThemeTest.cpp +++ b/tests/ui/UIThemeTest.cpp @@ -65,8 +65,8 @@ static void test_theme_to_from_data() { int main() { TEST_INIT(100); - RUN_TEST(test_theme_from_file_txt); - RUN_TEST(test_theme_to_from_data); + TEST_RUN(test_theme_from_file_txt); + TEST_RUN(test_theme_to_from_data); TEST_FINALIZE(); diff --git a/tests/utils/BitUtilsTest.cpp b/tests/utils/BitUtilsTest.cpp index 5cbca1a..20f7903 100644 --- a/tests/utils/BitUtilsTest.cpp +++ b/tests/utils/BitUtilsTest.cpp @@ -165,32 +165,32 @@ static void test_bytes_merge_8_r2l() { int main() { TEST_INIT(75); - RUN_TEST(test_is_bit_set_l2r); - RUN_TEST(test_bit_set_l2r); - RUN_TEST(test_bit_unset_l2r); - RUN_TEST(test_bit_flip_l2r); - RUN_TEST(test_bit_set_to_l2r); - RUN_TEST(test_bits_get_8_l2r); - RUN_TEST(test_bits_get_16_l2r); - RUN_TEST(test_bits_get_32_l2r); - RUN_TEST(test_bits_get_64_l2r); - RUN_TEST(test_bytes_merge_2_l2r); - RUN_TEST(test_bytes_merge_4_l2r); - RUN_TEST(test_bytes_merge_8_l2r); + TEST_RUN(test_is_bit_set_l2r); + TEST_RUN(test_bit_set_l2r); + TEST_RUN(test_bit_unset_l2r); + TEST_RUN(test_bit_flip_l2r); + TEST_RUN(test_bit_set_to_l2r); + TEST_RUN(test_bits_get_8_l2r); + TEST_RUN(test_bits_get_16_l2r); + TEST_RUN(test_bits_get_32_l2r); + TEST_RUN(test_bits_get_64_l2r); + TEST_RUN(test_bytes_merge_2_l2r); + TEST_RUN(test_bytes_merge_4_l2r); + TEST_RUN(test_bytes_merge_8_l2r); - RUN_TEST(test_is_bit_set_r2l); - RUN_TEST(test_is_bit_set_64_r2l); - RUN_TEST(test_bit_set_r2l); - RUN_TEST(test_bit_unset_r2l); - RUN_TEST(test_bit_flip_r2l); - RUN_TEST(test_bit_set_to_r2l); - RUN_TEST(test_bits_get_8_r2l); - RUN_TEST(test_bits_get_16_r2l); - RUN_TEST(test_bits_get_32_r2l); - RUN_TEST(test_bits_get_64_r2l); - RUN_TEST(test_bytes_merge_2_r2l); - RUN_TEST(test_bytes_merge_4_r2l); - RUN_TEST(test_bytes_merge_8_r2l); + TEST_RUN(test_is_bit_set_r2l); + TEST_RUN(test_is_bit_set_64_r2l); + TEST_RUN(test_bit_set_r2l); + TEST_RUN(test_bit_unset_r2l); + TEST_RUN(test_bit_flip_r2l); + TEST_RUN(test_bit_set_to_r2l); + TEST_RUN(test_bits_get_8_r2l); + TEST_RUN(test_bits_get_16_r2l); + TEST_RUN(test_bits_get_32_r2l); + TEST_RUN(test_bits_get_64_r2l); + TEST_RUN(test_bytes_merge_2_r2l); + TEST_RUN(test_bytes_merge_4_r2l); + TEST_RUN(test_bytes_merge_8_r2l); TEST_FINALIZE(); diff --git a/tests/utils/EndianUtilsTest.cpp b/tests/utils/EndianUtilsTest.cpp index e84158f..323e1ed 100644 --- a/tests/utils/EndianUtilsTest.cpp +++ b/tests/utils/EndianUtilsTest.cpp @@ -118,19 +118,19 @@ static void test_endian_swap_double() { int main() { TEST_INIT(50); - RUN_TEST(test_swap_endian_16); - RUN_TEST(test_swap_endian_32); - RUN_TEST(test_swap_endian_64); + TEST_RUN(test_swap_endian_16); + TEST_RUN(test_swap_endian_32); + TEST_RUN(test_swap_endian_64); - RUN_TEST(test_is_little_endian); - RUN_TEST(test_endian_swap_uint16); - RUN_TEST(test_endian_swap_int16); - RUN_TEST(test_endian_swap_uint32); - RUN_TEST(test_endian_swap_int32); - RUN_TEST(test_endian_swap_uint64); - RUN_TEST(test_endian_swap_int64); - RUN_TEST(test_endian_swap_float); - RUN_TEST(test_endian_swap_double); + TEST_RUN(test_is_little_endian); + TEST_RUN(test_endian_swap_uint16); + TEST_RUN(test_endian_swap_int16); + TEST_RUN(test_endian_swap_uint32); + TEST_RUN(test_endian_swap_int32); + TEST_RUN(test_endian_swap_uint64); + TEST_RUN(test_endian_swap_int64); + TEST_RUN(test_endian_swap_float); + TEST_RUN(test_endian_swap_double); TEST_FINALIZE(); diff --git a/tests/utils/MathUtilsTest.cpp b/tests/utils/MathUtilsTest.cpp new file mode 100644 index 0000000..e3b2e00 --- /dev/null +++ b/tests/utils/MathUtilsTest.cpp @@ -0,0 +1,624 @@ +#include "../TestFramework.h" +#include "../../utils/MathUtils.h" +#include + +// Correctness tests for f32 (float) approximate functions +static void test_sin_approx_f32() { + ASSERT_EQUALS_WITH_DELTA(sin_approx(0.0f), sinf(0.0f), 0.001f); + ASSERT_EQUALS_WITH_DELTA(sin_approx(1.0f), sinf(1.0f), 0.001f); + ASSERT_EQUALS_WITH_DELTA(sin_approx(3.14f), sinf(3.14f), 0.001f); +} + +static void test_cos_approx_f32() { + ASSERT_EQUALS_WITH_DELTA(cos_approx(0.0f), cosf(0.0f), 0.001f); + ASSERT_EQUALS_WITH_DELTA(cos_approx(1.0f), cosf(1.0f), 0.001f); + ASSERT_EQUALS_WITH_DELTA(cos_approx(3.14f), cosf(3.14f), 0.001f); +} + +static void test_tan_approx_f32() { + ASSERT_EQUALS_WITH_DELTA(tan_approx(0.0f), tanf(0.0f), 0.001f); + ASSERT_EQUALS_WITH_DELTA(tan_approx(1.0f), tanf(1.0f), 0.001f); + ASSERT_EQUALS_WITH_DELTA(tan_approx(0.5f), tanf(0.5f), 0.001f); +} + +static void test_sqrt_approx_f32() { + ASSERT_EQUALS_WITH_DELTA(sqrt_approx(1.0f), sqrtf(1.0f), 0.001f); + ASSERT_EQUALS_WITH_DELTA(sqrt_approx(2.0f), sqrtf(2.0f), 0.001f); + ASSERT_EQUALS_WITH_DELTA(sqrt_approx(100.0f), sqrtf(100.0f), 0.001f); +} + +static void test_asin_approx_f32() { + ASSERT_EQUALS_WITH_DELTA(asin_approx(0.0f), asinf(0.0f), 0.001f); + ASSERT_EQUALS_WITH_DELTA(asin_approx(0.5f), asinf(0.5f), 0.001f); + ASSERT_EQUALS_WITH_DELTA(asin_approx(-0.5f), asinf(-0.5f), 0.001f); +} + +static void test_acos_approx_f32() { + ASSERT_EQUALS_WITH_DELTA(acos_approx(0.0f), acosf(0.0f), 0.001f); + ASSERT_EQUALS_WITH_DELTA(acos_approx(0.5f), acosf(0.5f), 0.001f); + ASSERT_EQUALS_WITH_DELTA(acos_approx(-0.5f), acosf(-0.5f), 0.001f); +} + +static void test_atan_approx_f32() { + ASSERT_EQUALS_WITH_DELTA(atan_approx(0.0f), atanf(0.0f), 0.001f); + ASSERT_EQUALS_WITH_DELTA(atan_approx(1.0f), atanf(1.0f), 0.001f); + ASSERT_EQUALS_WITH_DELTA(atan_approx(-1.0f), atanf(-1.0f), 0.001f); +} + +static void test_rsqrt_approx_f32() { + ASSERT_EQUALS_WITH_DELTA(rsqrt_approx(1.0f), 1.0f / sqrtf(1.0f), 0.001f); + ASSERT_EQUALS_WITH_DELTA(rsqrt_approx(2.0f), 1.0f / sqrtf(2.0f), 0.001f); + ASSERT_EQUALS_WITH_DELTA(rsqrt_approx(100.0f), 1.0f / sqrtf(100.0f), 0.001f); +} + +static void test_exp_approx_f32() { + ASSERT_EQUALS_WITH_DELTA(exp_approx(0.0f), expf(0.0f), 0.001f); + ASSERT_EQUALS_WITH_DELTA(exp_approx(1.0f), expf(1.0f), 0.001f); + ASSERT_EQUALS_WITH_DELTA(exp_approx(-1.0f), expf(-1.0f), 0.001f); +} + +static void test_log_approx_f32() { + ASSERT_EQUALS_WITH_DELTA(log_approx(1.0f), logf(1.0f), 0.001f); + ASSERT_EQUALS_WITH_DELTA(log_approx(2.0f), logf(2.0f), 0.001f); + ASSERT_EQUALS_WITH_DELTA(log_approx(10.0f), logf(10.0f), 0.001f); +} + +static void test_pow_approx_f32() { + ASSERT_EQUALS_WITH_DELTA(pow_approx(2.0f, 3.0f), powf(2.0f, 3.0f), 0.001f); + ASSERT_EQUALS_WITH_DELTA(pow_approx(3.0f, 2.0f), powf(3.0f, 2.0f), 0.001f); + ASSERT_EQUALS_WITH_DELTA(pow_approx(10.0f, 0.5f), powf(10.0f, 0.5f), 0.001f); +} + +// Correctness tests for f64 (double) approximate functions +static void test_sin_approx_f64() { + ASSERT_EQUALS_WITH_DELTA(sin_approx(0.0), sin(0.0), 0.001); + ASSERT_EQUALS_WITH_DELTA(sin_approx(1.0), sin(1.0), 0.001); + ASSERT_EQUALS_WITH_DELTA(sin_approx(3.14), sin(3.14), 0.001); +} + +static void test_cos_approx_f64() { + ASSERT_EQUALS_WITH_DELTA(cos_approx(0.0), cos(0.0), 0.001); + ASSERT_EQUALS_WITH_DELTA(cos_approx(1.0), cos(1.0), 0.001); + ASSERT_EQUALS_WITH_DELTA(cos_approx(3.14), cos(3.14), 0.001); +} + +static void test_tan_approx_f64() { + ASSERT_EQUALS_WITH_DELTA(tan_approx(0.0), tan(0.0), 0.001); + ASSERT_EQUALS_WITH_DELTA(tan_approx(1.0), tan(1.0), 0.001); + ASSERT_EQUALS_WITH_DELTA(tan_approx(0.5), tan(0.5), 0.001); +} + +static void test_sqrt_approx_f64() { + ASSERT_EQUALS_WITH_DELTA(sqrt_approx(1.0), sqrt(1.0), 0.001); + ASSERT_EQUALS_WITH_DELTA(sqrt_approx(2.0), sqrt(2.0), 0.001); + ASSERT_EQUALS_WITH_DELTA(sqrt_approx(100.0), sqrt(100.0), 0.001); +} + +static void test_asin_approx_f64() { + ASSERT_EQUALS_WITH_DELTA(asin_approx(0.0), asin(0.0), 0.001); + ASSERT_EQUALS_WITH_DELTA(asin_approx(0.5), asin(0.5), 0.001); + ASSERT_EQUALS_WITH_DELTA(asin_approx(-0.5), asin(-0.5), 0.001); +} + +static void test_acos_approx_f64() { + ASSERT_EQUALS_WITH_DELTA(acos_approx(0.0), acos(0.0), 0.001); + ASSERT_EQUALS_WITH_DELTA(acos_approx(0.5), acos(0.5), 0.001); + ASSERT_EQUALS_WITH_DELTA(acos_approx(-0.5), acos(-0.5), 0.001); +} + +static void test_atan_approx_f64() { + ASSERT_EQUALS_WITH_DELTA(atan_approx(0.0), atan(0.0), 0.001); + ASSERT_EQUALS_WITH_DELTA(atan_approx(1.0), atan(1.0), 0.001); + ASSERT_EQUALS_WITH_DELTA(atan_approx(-1.0), atan(-1.0), 0.001); +} + +static void test_rsqrt_approx_f64() { + ASSERT_EQUALS_WITH_DELTA(rsqrt_approx(1.0), 1.0 / sqrt(1.0), 0.001); + ASSERT_EQUALS_WITH_DELTA(rsqrt_approx(2.0), 1.0 / sqrt(2.0), 0.001); + ASSERT_EQUALS_WITH_DELTA(rsqrt_approx(100.0), 1.0 / sqrt(100.0), 0.001); +} + +static void test_exp_approx_f64() { + ASSERT_EQUALS_WITH_DELTA(exp_approx(0.0), exp(0.0), 0.001); + ASSERT_EQUALS_WITH_DELTA(exp_approx(1.0), exp(1.0), 0.001); + ASSERT_EQUALS_WITH_DELTA(exp_approx(-1.0), exp(-1.0), 0.001); +} + +static void test_log_approx_f64() { + ASSERT_EQUALS_WITH_DELTA(log_approx(1.0), log(1.0), 0.001); + ASSERT_EQUALS_WITH_DELTA(log_approx(2.0), log(2.0), 0.001); + ASSERT_EQUALS_WITH_DELTA(log_approx(10.0), log(10.0), 0.001); +} + +static void test_pow_approx_f64() { + ASSERT_EQUALS_WITH_DELTA(pow_approx(2.0, 3.0), pow(2.0, 3.0), 0.001); + ASSERT_EQUALS_WITH_DELTA(pow_approx(3.0, 2.0), pow(3.0, 2.0), 0.001); + ASSERT_EQUALS_WITH_DELTA(pow_approx(10.0, 0.5), pow(10.0, 0.5), 0.001); +} + +// Performance tests for f32 (float) approximate functions +static void _sin_approx_f32(volatile void* val) { + f32* res = (f32*)val; + srand((int32) *res); + + *res += sin_approx((f32)rand() / RAND_MAX); +} + +static void _sin_f32(volatile void* val) { + f32* res = (f32*)val; + srand((int32) *res); + + *res += sinf((f32)rand() / RAND_MAX); +} + +static void test_sin_approx_performance_f32() { + COMPARE_FUNCTION_TEST_TIME(_sin_approx_f32, _sin_f32, 5.0); + COMPARE_FUNCTION_TEST_CYCLE(_sin_approx_f32, _sin_f32, 5.0); +} + +static void _cos_approx_f32(volatile void* val) { + f32* res = (f32*)val; + srand((int32) *res); + + *res += cos_approx((f32)rand() / RAND_MAX); +} + +static void _cos_f32(volatile void* val) { + f32* res = (f32*)val; + srand((int32) *res); + + *res += cosf((f32)rand() / RAND_MAX); +} + +static void test_cos_approx_performance_f32() { + COMPARE_FUNCTION_TEST_TIME(_cos_approx_f32, _cos_f32, 5.0); + COMPARE_FUNCTION_TEST_CYCLE(_cos_approx_f32, _cos_f32, 5.0); +} + +static void _tan_approx_f32(volatile void* val) { + f32* res = (f32*)val; + srand((int32) *res); + + *res += tan_approx((f32)rand() / RAND_MAX); +} + +static void _tan_f32(volatile void* val) { + f32* res = (f32*)val; + srand((int32) *res); + + *res += tanf((f32)rand() / RAND_MAX); +} + +static void test_tan_approx_performance_f32() { + COMPARE_FUNCTION_TEST_TIME(_tan_approx_f32, _tan_f32, 5.0); + COMPARE_FUNCTION_TEST_CYCLE(_tan_approx_f32, _tan_f32, 5.0); +} + +static void _sqrt_approx_f32(volatile void* val) { + f32* res = (f32*)val; + srand((int32) *res); + + *res += sqrt_approx((f32)rand() / RAND_MAX + 0.1f); // Avoid sqrt(0) +} + +static void _sqrt_f32(volatile void* val) { + f32* res = (f32*)val; + srand((int32) *res); + + *res += sqrtf((f32)rand() / RAND_MAX + 0.1f); // Avoid sqrt(0) +} + +static void test_sqrt_approx_performance_f32() { + COMPARE_FUNCTION_TEST_TIME(_sqrt_approx_f32, _sqrt_f32, 5.0); + COMPARE_FUNCTION_TEST_CYCLE(_sqrt_approx_f32, _sqrt_f32, 5.0); +} + +static void _asin_approx_f32(volatile void* val) { + f32* res = (f32*)val; + srand((int32) *res); + + *res += asin_approx((f32)rand() / RAND_MAX); +} + +static void _asin_f32(volatile void* val) { + f32* res = (f32*)val; + srand((int32) *res); + + *res += asinf((f32)rand() / RAND_MAX); +} + +static void test_asin_approx_performance_f32() { + COMPARE_FUNCTION_TEST_TIME(_asin_approx_f32, _asin_f32, 5.0); + COMPARE_FUNCTION_TEST_CYCLE(_asin_approx_f32, _asin_f32, 5.0); +} + +static void _acos_approx_f32(volatile void* val) { + f32* res = (f32*)val; + srand((int32) *res); + + *res += acos_approx((f32)rand() / RAND_MAX); +} + +static void _acos_f32(volatile void* val) { + f32* res = (f32*)val; + srand((int32) *res); + + *res += acosf((f32)rand() / RAND_MAX); +} + +static void test_acos_approx_performance_f32() { + COMPARE_FUNCTION_TEST_TIME(_acos_approx_f32, _acos_f32, 5.0); + COMPARE_FUNCTION_TEST_CYCLE(_acos_approx_f32, _acos_f32, 5.0); +} + +static void _atan_approx_f32(volatile void* val) { + f32* res = (f32*)val; + srand((int32) *res); + + *res += atan_approx((f32)rand() / RAND_MAX); +} + +static void _atan_f32(volatile void* val) { + f32* res = (f32*)val; + srand((int32) *res); + + *res += atanf((f32)rand() / RAND_MAX); +} + +static void test_atan_approx_performance_f32() { + COMPARE_FUNCTION_TEST_TIME(_atan_approx_f32, _atan_f32, 5.0); + COMPARE_FUNCTION_TEST_CYCLE(_atan_approx_f32, _atan_f32, 5.0); +} + +static void _rsqrt_approx_f32(volatile void* val) { + f32* res = (f32*)val; + srand((int32) *res); + + *res += rsqrt_approx((f32)rand() / RAND_MAX + 0.1f); // Avoid division by zero +} + +static void _rsqrt_f32(volatile void* val) { + f32* res = (f32*)val; + srand((int32) *res); + + *res += 1.0f / sqrtf((f32)rand() / RAND_MAX + 0.1f); // Avoid division by zero +} + +static void test_rsqrt_approx_performance_f32() { + COMPARE_FUNCTION_TEST_TIME(_rsqrt_approx_f32, _rsqrt_f32, 5.0); + COMPARE_FUNCTION_TEST_CYCLE(_rsqrt_approx_f32, _rsqrt_f32, 5.0); +} + +static void _exp_approx_f32(volatile void* val) { + f32* res = (f32*)val; + srand((int32) *res); + + *res += exp_approx((f32)rand() / RAND_MAX); +} + +static void _exp_f32(volatile void* val) { + f32* res = (f32*)val; + srand((int32) *res); + + *res += expf((f32)rand() / RAND_MAX); +} + +static void test_exp_approx_performance_f32() { + COMPARE_FUNCTION_TEST_TIME(_exp_approx_f32, _exp_f32, 5.0); + COMPARE_FUNCTION_TEST_CYCLE(_exp_approx_f32, _exp_f32, 5.0); +} + +static void _log_approx_f32(volatile void* val) { + f32* res = (f32*)val; + srand((int32) *res); + + *res += log_approx((f32)rand() / RAND_MAX + 0.1f); // Avoid log(0) +} + +static void _log_f32(volatile void* val) { + f32* res = (f32*)val; + srand((int32) *res); + + *res += logf((f32)rand() / RAND_MAX + 0.1f); // Avoid log(0) +} + +static void test_log_approx_performance_f32() { + COMPARE_FUNCTION_TEST_TIME(_log_approx_f32, _log_f32, 5.0); + COMPARE_FUNCTION_TEST_CYCLE(_log_approx_f32, _log_f32, 5.0); +} + +static void _pow_approx_f32(volatile void* val) { + f32* res = (f32*)val; + srand((int32) *res); + + *res += pow_approx((f32)rand() / RAND_MAX, (f32)rand() / RAND_MAX); +} + +static void _pow_f32(volatile void* val) { + f32* res = (f32*)val; + srand((int32) *res); + + *res += powf((f32)rand() / RAND_MAX, (f32)rand() / RAND_MAX); +} + +static void test_pow_approx_performance_f32() { + COMPARE_FUNCTION_TEST_TIME(_pow_approx_f32, _pow_f32, 5.0); + COMPARE_FUNCTION_TEST_CYCLE(_pow_approx_f32, _pow_f32, 5.0); +} + +// Performance tests for f64 (double) approximate functions +static void _sin_approx_f64(volatile void* val) { + f64* res = (f64*)val; + srand((int32) *res); + + *res += sin_approx((f64)rand() / RAND_MAX); +} + +static void _sin_f64(volatile void* val) { + f64* res = (f64*)val; + srand((int32) *res); + + *res += sin((f64)rand() / RAND_MAX); +} + +static void test_sin_approx_performance_f64() { + COMPARE_FUNCTION_TEST_TIME(_sin_approx_f64, _sin_f64, 5.0); + COMPARE_FUNCTION_TEST_CYCLE(_sin_approx_f64, _sin_f64, 5.0); +} + +static void _cos_approx_f64(volatile void* val) { + f64* res = (f64*)val; + srand((int32) *res); + + *res += cos_approx((f64)rand() / RAND_MAX); +} + +static void _cos_f64(volatile void* val) { + f64* res = (f64*)val; + srand((int32) *res); + + *res += cos((f64)rand() / RAND_MAX); +} + +static void test_cos_approx_performance_f64() { + COMPARE_FUNCTION_TEST_TIME(_cos_approx_f64, _cos_f64, 5.0); + COMPARE_FUNCTION_TEST_CYCLE(_cos_approx_f64, _cos_f64, 5.0); +} + +static void _tan_approx_f64(volatile void* val) { + f64* res = (f64*)val; + srand((int32) *res); + + *res += tan_approx((f64)rand() / RAND_MAX); +} + +static void _tan_f64(volatile void* val) { + f64* res = (f64*)val; + srand((int32) *res); + + *res += tan((f64)rand() / RAND_MAX); +} + +static void test_tan_approx_performance_f64() { + COMPARE_FUNCTION_TEST_TIME(_tan_approx_f64, _tan_f64, 5.0); + COMPARE_FUNCTION_TEST_CYCLE(_tan_approx_f64, _tan_f64, 5.0); +} + +static void _sqrt_approx_f64(volatile void* val) { + f64* res = (f64*)val; + srand((int32) *res); + + *res += sqrt_approx((f64)rand() / RAND_MAX + 0.1); // Avoid sqrt(0) +} + +static void _sqrt_f64(volatile void* val) { + f64* res = (f64*)val; + srand((int32) *res); + + *res += sqrt((f64)rand() / RAND_MAX + 0.1); // Avoid sqrt(0) +} + +static void test_sqrt_approx_performance_f64() { + COMPARE_FUNCTION_TEST_TIME(_sqrt_approx_f64, _sqrt_f64, 5.0); + COMPARE_FUNCTION_TEST_CYCLE(_sqrt_approx_f64, _sqrt_f64, 5.0); +} + +static void _asin_approx_f64(volatile void* val) { + f64* res = (f64*)val; + srand((int32) *res); + + *res += asin_approx((f64)rand() / RAND_MAX); +} + +static void _asin_f64(volatile void* val) { + f64* res = (f64*)val; + srand((int32) *res); + + *res += asin((f64)rand() / RAND_MAX); +} + +static void test_asin_approx_performance_f64() { + COMPARE_FUNCTION_TEST_TIME(_asin_approx_f64, _asin_f64, 5.0); + COMPARE_FUNCTION_TEST_CYCLE(_asin_approx_f64, _asin_f64, 5.0); +} + +static void _acos_approx_f64(volatile void* val) { + f64* res = (f64*)val; + srand((int32) *res); + + *res += acos_approx((f64)rand() / RAND_MAX); +} + +static void _acos_f64(volatile void* val) { + f64* res = (f64*)val; + srand((int32) *res); + + *res += acos((f64)rand() / RAND_MAX); +} + +static void test_acos_approx_performance_f64() { + COMPARE_FUNCTION_TEST_TIME(_acos_approx_f64, _acos_f64, 5.0); + COMPARE_FUNCTION_TEST_CYCLE(_acos_approx_f64, _acos_f64, 5.0); +} + +static void _atan_approx_f64(volatile void* val) { + f64* res = (f64*)val; + srand((int32) *res); + + *res += atan_approx((f64)rand() / RAND_MAX); +} + +static void _atan_f64(volatile void* val) { + f64* res = (f64*)val; + srand((int32) *res); + + *res += atan((f64)rand() / RAND_MAX); +} + +static void test_atan_approx_performance_f64() { + COMPARE_FUNCTION_TEST_TIME(_atan_approx_f64, _atan_f64, 5.0); + COMPARE_FUNCTION_TEST_CYCLE(_atan_approx_f64, _atan_f64, 5.0); +} + +static void _rsqrt_approx_f64(volatile void* val) { + f64* res = (f64*)val; + srand((int32) *res); + + *res += rsqrt_approx((f64)rand() / RAND_MAX + 0.1); // Avoid division by zero +} + +static void _rsqrt_f64(volatile void* val) { + f64* res = (f64*)val; + srand((int32) *res); + + *res += 1.0 / sqrt((f64)rand() / RAND_MAX + 0.1); // Avoid division by zero +} + +static void test_rsqrt_approx_performance_f64() { + COMPARE_FUNCTION_TEST_TIME(_rsqrt_approx_f64, _rsqrt_f64, 5.0); + COMPARE_FUNCTION_TEST_CYCLE(_rsqrt_approx_f64, _rsqrt_f64, 5.0); +} + +static void _exp_approx_f64(volatile void* val) { + f64* res = (f64*)val; + srand((int32) *res); + + *res += exp_approx((f64)rand() / RAND_MAX); +} + +static void _exp_f64(volatile void* val) { + f64* res = (f64*)val; + srand((int32) *res); + + *res += exp((f64)rand() / RAND_MAX); +} + +static void test_exp_approx_performance_f64() { + COMPARE_FUNCTION_TEST_TIME(_exp_approx_f64, _exp_f64, 5.0); + COMPARE_FUNCTION_TEST_CYCLE(_exp_approx_f64, _exp_f64, 5.0); +} + +static void _log_approx_f64(volatile void* val) { + f64* res = (f64*)val; + srand((int32) *res); + + *res += log_approx((f64)rand() / RAND_MAX + 0.1); // Avoid log(0) +} + +static void _log_f64(volatile void* val) { + f64* res = (f64*)val; + srand((int32) *res); + + *res += log((f64)rand() / RAND_MAX + 0.1); // Avoid log(0) +} + +static void test_log_approx_performance_f64() { + COMPARE_FUNCTION_TEST_TIME(_log_approx_f64, _log_f64, 5.0); + COMPARE_FUNCTION_TEST_CYCLE(_log_approx_f64, _log_f64, 5.0); +} + +static void _pow_approx_f64(volatile void* val) { + f64* res = (f64*)val; + srand((int32) *res); + + *res += pow_approx((f64)rand() / RAND_MAX, (f64)rand() / RAND_MAX); +} + +static void _pow_f64(volatile void* val) { + f64* res = (f64*)val; + srand((int32) *res); + + *res += pow((f64)rand() / RAND_MAX, (f64)rand() / RAND_MAX); +} + +static void test_pow_approx_performance_f64() { + COMPARE_FUNCTION_TEST_TIME(_pow_approx_f64, _pow_f64, 5.0); + COMPARE_FUNCTION_TEST_CYCLE(_pow_approx_f64, _pow_f64, 5.0); +} + +#ifdef UBER_TEST + #ifdef main + #undef main + #endif + #define main UtilsMathUtilsTest +#endif + +int main() { + TEST_INIT(200); + + // Run correctness tests for f32 functions + TEST_RUN(test_sin_approx_f32); + TEST_RUN(test_cos_approx_f32); + TEST_RUN(test_tan_approx_f32); + TEST_RUN(test_sqrt_approx_f32); + TEST_RUN(test_asin_approx_f32); + TEST_RUN(test_acos_approx_f32); + TEST_RUN(test_atan_approx_f32); + TEST_RUN(test_rsqrt_approx_f32); + TEST_RUN(test_exp_approx_f32); + TEST_RUN(test_log_approx_f32); + TEST_RUN(test_pow_approx_f32); + + // Run correctness tests for f64 functions + TEST_RUN(test_sin_approx_f64); + TEST_RUN(test_cos_approx_f64); + TEST_RUN(test_tan_approx_f64); + TEST_RUN(test_sqrt_approx_f64); + TEST_RUN(test_asin_approx_f64); + TEST_RUN(test_acos_approx_f64); + TEST_RUN(test_atan_approx_f64); + TEST_RUN(test_rsqrt_approx_f64); + TEST_RUN(test_exp_approx_f64); + TEST_RUN(test_log_approx_f64); + TEST_RUN(test_pow_approx_f64); + + // Run performance tests for f32 functions + TEST_RUN(test_sin_approx_performance_f32); + TEST_RUN(test_cos_approx_performance_f32); + TEST_RUN(test_tan_approx_performance_f32); + TEST_RUN(test_sqrt_approx_performance_f32); + TEST_RUN(test_asin_approx_performance_f32); + TEST_RUN(test_acos_approx_performance_f32); + TEST_RUN(test_atan_approx_performance_f32); + TEST_RUN(test_rsqrt_approx_performance_f32); + TEST_RUN(test_exp_approx_performance_f32); + TEST_RUN(test_log_approx_performance_f32); + TEST_RUN(test_pow_approx_performance_f32); + + // Run performance tests for f64 functions + TEST_RUN(test_sin_approx_performance_f64); + TEST_RUN(test_cos_approx_performance_f64); + TEST_RUN(test_tan_approx_performance_f64); + TEST_RUN(test_sqrt_approx_performance_f64); + TEST_RUN(test_asin_approx_performance_f64); + TEST_RUN(test_acos_approx_performance_f64); + TEST_RUN(test_atan_approx_performance_f64); + TEST_RUN(test_rsqrt_approx_performance_f64); + TEST_RUN(test_exp_approx_performance_f64); + TEST_RUN(test_log_approx_performance_f64); + TEST_RUN(test_pow_approx_performance_f64); + + TEST_FINALIZE(); + + return 0; +} \ No newline at end of file diff --git a/tests/utils/StringUtilsTest.cpp b/tests/utils/StringUtilsTest.cpp index f39242b..3692436 100644 --- a/tests/utils/StringUtilsTest.cpp +++ b/tests/utils/StringUtilsTest.cpp @@ -83,16 +83,24 @@ static void test_str_length() ASSERT_EQUALS(str_length("2asdf dw"), 8); } -static void _str_length(void* val) { - int64* res = (int64 *) val; +static void _str_length(volatile void* val) { + volatile int64* res = (volatile int64 *) val; - *res = (int64) str_length("This %d is a %s with %f values"); + char buffer[32]; + memcpy(buffer, "This %d is a %s with %f values", sizeof("This %d is a %s with %f values")); + buffer[30] = (byte) *res; + + *res += (int64) str_length(buffer); } -static void _strlen(void* val) { - int64* res = (int64 *) val; +static void _strlen(volatile void* val) { + volatile int64* res = (volatile int64 *) val; - *res = (int64) strlen("This %d is a %s with %f values"); + char buffer[32]; + memcpy(buffer, "This %d is a %s with %f values", sizeof("This %d is a %s with %f values")); + buffer[30] = (byte) *res; + + *res += (int64) strlen(buffer); } static void test_str_length_performance() { @@ -100,7 +108,7 @@ static void test_str_length_performance() { COMPARE_FUNCTION_TEST_CYCLE(_str_length, _strlen, 5.0); } -static void _str_is_alphanum(void* val) { +static void _str_is_alphanum(volatile void* val) { bool* res = (bool *) val; srand(0); @@ -109,10 +117,10 @@ static void _str_is_alphanum(void* val) { a += str_is_alphanum((byte) rand()); } - *res = (bool) a; + *res |= (bool) a; } -static void _isalnum(void* val) { +static void _isalnum(volatile void* val) { bool* res = (bool *) val; srand(0); @@ -121,7 +129,7 @@ static void _isalnum(void* val) { a += isalnum((byte) rand()); } - *res = (bool) a; + *res |= (bool) a; } static void test_str_is_alphanum_performance() { @@ -136,20 +144,20 @@ static void test_sprintf_fast() ASSERT_TRUE(strcmp(buffer, "This 1337 is a test with 3.00000 values") == 0); } -static void _sprintf_fast(void* val) { - bool* res = (bool *) val; +static void _sprintf_fast(volatile void* val) { + volatile bool* res = (volatile bool *) val; char buffer[256]; sprintf_fast(buffer, "This %d is a %s with %f values", 1337, "test", 3.0); - *res = (bool) (strcmp(buffer, "This 1337 is a test with 3.00000 values") == 0); + *res |= (bool) (strcmp(buffer, "This 1337 is a test with 3.00000 values") == 0); } -static void _sprintf(void* val) { - bool* res = (bool *) val; +static void _sprintf(volatile void* val) { + volatile bool* res = (volatile bool *) val; char buffer[256]; sprintf(buffer, "This %d is a %s with %f values", 1337, "test", 3.0); - *res = (bool) (strcmp(buffer, "This 1337 is a test with 3.000000 values") == 0); + *res |= (bool) (strcmp(buffer, "This 1337 is a test with 3.000000 values") == 0); } static void test_sprintf_fast_performance() { @@ -171,24 +179,26 @@ static void test_str_to_float() #define main UtilsStringUtilsTest #endif +#include + int main() { TEST_INIT(100); - RUN_TEST(test_utf8_encode); - RUN_TEST(test_utf8_decode); - RUN_TEST(test_utf8_str_length); - RUN_TEST(test_str_is_float); - RUN_TEST(test_str_is_integer); - RUN_TEST(test_sprintf_fast); - RUN_TEST(test_str_is_alpha); - RUN_TEST(test_str_is_num); - RUN_TEST(test_str_is_alphanum); - RUN_TEST(test_str_length); - RUN_TEST(test_str_to_float); + TEST_RUN(test_utf8_encode); + TEST_RUN(test_utf8_decode); + TEST_RUN(test_utf8_str_length); + TEST_RUN(test_str_is_float); + TEST_RUN(test_str_is_integer); + TEST_RUN(test_sprintf_fast); + TEST_RUN(test_str_is_alpha); + TEST_RUN(test_str_is_num); + TEST_RUN(test_str_is_alphanum); + TEST_RUN(test_str_length); + TEST_RUN(test_str_to_float); - RUN_TEST(test_str_length_performance); - RUN_TEST(test_str_is_alphanum_performance); - RUN_TEST(test_sprintf_fast_performance); + TEST_RUN(test_str_length_performance); + TEST_RUN(test_str_is_alphanum_performance); + TEST_RUN(test_sprintf_fast_performance); TEST_FINALIZE(); diff --git a/tests/utils/UtilsTest.cpp b/tests/utils/UtilsTest.cpp index 8e85e8c..7b8f04f 100644 --- a/tests/utils/UtilsTest.cpp +++ b/tests/utils/UtilsTest.cpp @@ -54,26 +54,26 @@ static void test_is_empty() { ASSERT_TRUE(is_empty(region1, 0)); } -static void _is_equal(void* val) { - bool* res = (bool *) val; +static void _is_equal(volatile void* val) { + volatile bool* res = (volatile bool *) val; uint8_t region1[64]; uint8_t region2[64]; memset(region1, 0xAA, sizeof(region1)); memset(region2, 0xAA, sizeof(region2)); - *res = is_equal(region1, region2, sizeof(region1)); + *res |= is_equal(region1, region2, sizeof(region1)); } -static void _memcmp(void* val) { - bool* res = (bool *) val; +static void _memcmp(volatile void* val) { + volatile bool* res = (volatile bool *) val; uint8_t region1[64]; uint8_t region2[64]; memset(region1, 0xAA, sizeof(region1)); memset(region2, 0xAA, sizeof(region2)); - *res = (bool) (memcmp(region1, region2, sizeof(region1)) == 0); + *res |= (bool) (memcmp(region1, region2, sizeof(region1)) == 0); } static void test_is_equal_performance() { @@ -81,40 +81,40 @@ static void test_is_equal_performance() { COMPARE_FUNCTION_TEST_CYCLE(_is_equal, _memcmp, 10.0); } -static void _is_empty(void* val) { - bool* res = (bool *) val; +static void _is_empty(volatile void* val) { + volatile bool* res = (volatile bool *) val; alignas(64) uint8_t region1[64]; memset(region1, 0xAA, sizeof(region1)); - *res = is_empty(region1, sizeof(region1)); + *res |= is_empty(region1, sizeof(region1)); } -static void _memcmp_empty(void* val) { - bool* res = (bool *) val; +static void _memcmp_empty(volatile void* val) { + volatile bool* res = (volatile bool *) val; alignas(64) uint8_t region1[64]; memset(region1, 0xAA, sizeof(region1)); - *res = *region1 == 0 && memcmp(region1, region1 + 1, sizeof(region1) - 1) == 0; + *res |= *region1 == 0 && memcmp(region1, region1 + 1, sizeof(region1) - 1) == 0; } -static void _is_empty2(void* val) { - bool* res = (bool *) val; +static void _is_empty2(volatile void* val) { + volatile bool* res = (volatile bool *) val; alignas(64) uint8_t region1[64]; memset(region1, 0, sizeof(region1)); - *res = is_empty(region1, sizeof(region1)); + *res |= is_empty(region1, sizeof(region1)); } -static void _memcmp_empty2(void* val) { - bool* res = (bool *) val; +static void _memcmp_empty2(volatile void* val) { + volatile bool* res = (volatile bool *) val; alignas(64) uint8_t region1[64]; memset(region1, 0, sizeof(region1)); - *res = *region1 == 0 && memcmp(region1, region1 + 1, sizeof(region1) - 1) == 0; + *res |= *region1 == 0 && memcmp(region1, region1 + 1, sizeof(region1) - 1) == 0; } static void test_is_empty_performance() { @@ -135,11 +135,11 @@ static void test_is_empty_performance() { int main() { TEST_INIT(10); - RUN_TEST(test_is_equal); - RUN_TEST(test_is_empty); + TEST_RUN(test_is_equal); + TEST_RUN(test_is_empty); - RUN_TEST(test_is_equal_performance); - RUN_TEST(test_is_empty_performance); + TEST_RUN(test_is_equal_performance); + TEST_RUN(test_is_empty_performance); TEST_FINALIZE(); diff --git a/thread/ThreadPool.h b/thread/ThreadPool.h index a901a22..bab9229 100644 --- a/thread/ThreadPool.h +++ b/thread/ThreadPool.h @@ -65,7 +65,7 @@ static THREAD_RETURN thread_pool_worker(void* arg) LOG_2("ThreadPool worker ended"); // At the end of a thread the ring memory automatically is considered freed DEBUG_MEMORY_FREE((uintptr_t) work->ring.memory); - LOG_FORMAT_2("Freed thread RingMemory: %n B", {{LOG_DATA_UINT64, &work->ring.size}}); + LOG_2("Freed thread RingMemory: %n B", {{LOG_DATA_UINT64, &work->ring.size}}); atomic_set_release(&work->state, 1); // Job gets marked after completion -> can be overwritten now diff --git a/ui/UILayout.h b/ui/UILayout.h index b342285..02bf26e 100644 --- a/ui/UILayout.h +++ b/ui/UILayout.h @@ -83,12 +83,14 @@ struct UILayout { // 2. Once we are ready to switch the scene we copy the temporary memory into this data pointer byte* data; // Owner of the actual data + // @todo replace bools with bit field + // Changes on a as needed basis - uint32 vertex_size_static; + uint32 vertex_count_static; bool static_content_changed; // Changes every frame - uint32 vertex_size_dynamic; + uint32 vertex_count_dynamic; bool dynamic_content_changed; // Contains both static and dynamic content @@ -105,7 +107,7 @@ struct UILayout { // This is very similar to the currently rendered UI output but may have some empty space between elements // The reason for this is that some elements may need different vertex counts for different states (e.g. input field) // WARNING: This memory is shared between different layouts - uint32 active_vertex_size; + uint32 active_vertex_count; Vertex3DSamplerTextureColor* vertices_active; // Not the data owner (see data above) // Used during the initialization so that every element knows where we currently are during the setup process diff --git a/utils/MathUtils.h b/utils/MathUtils.h new file mode 100644 index 0000000..ab7aaf3 --- /dev/null +++ b/utils/MathUtils.h @@ -0,0 +1,319 @@ +/** + * Jingga + * + * @copyright Jingga + * @license OMS License 2.0 + * @version 1.0.0 + * @link https://jingga.app + */ +#ifndef TOS_UTILS_MATH_UTILS_H +#define TOS_UTILS_MATH_UTILS_H + +#include "../stdlib/Types.h" +#include "../utils/TestUtils.h" + +// WARNING: Don't use any of these functions yet. They are too imprecise and too slow + +inline +f64 factorial(int32 n) { + f64 result = 1.0; + for (int32 i = 1; i <= n; ++i) { + result *= i; + } + + return result; +} + +inline +f32 sin_approx(f32 x) { + // Normalize x to the range [-π, π] for better accuracy + while (x > OMS_PI) { + x -= OMS_TWO_PI; + } + + while (x < -OMS_PI) { + x += OMS_TWO_PI; + } + + f32 x2 = x * x; + return x * (1.0f + x2 * (-1.0f / 6.0f + x2 * (1.0f / 120.0f + x2 * (-1.0f / 5040.0f + x2 * (1.0f / 362880.0f))))); +} + +inline +f32 cos_approx(f32 x) { + return sin_approx(OMS_PI_OVER_TWO - x); +} + +inline +f32 tan_approx(f32 x) { + return sin_approx(x) / cos_approx(x); +} + +inline +f32 asin_approx(f32 x) { + // Undefined for |x| > 1 + ASSERT_SIMPLE(x >= -1.0f && x <= 1.0f); + + f32 result = x; + f32 term = x; + for (int32 i = 1; i <= 6; ++i) { + term *= x * x * (2 * i - 1) * (2 * i - 1) / ((2 * i) * (2 * i + 1)); + result += term; + } + + return result; +} + +inline +f32 acos_approx(f32 x) { + // π/2 - asin_approx(x) + return OMS_PI_OVER_TWO - asin_approx(x); +} + +inline +f32 atan_approx(f32 x) { + if (x > 1.0f) { + // π/2 - atan_approx(1/x) + return OMS_PI_OVER_TWO - atan_approx(1.0f / x); + } else if (x < -1.0f) { + // -π/2 - atan_approx(1/x) + return -OMS_PI_OVER_TWO - atan_approx(1.0f / x); + } + + f32 result = x; + f32 term = x; + for (int32 i = 1; i <= 6; ++i) { + term *= -x * x; + result += term / (2.0f * i + 1); + } + + return result; +} + +inline +f32 sqrt_approx(f32 a) { + ASSERT_SIMPLE(a >= 0); + + int32_t i = *(int32_t*)&a; + // Magic number for initial guess + i = 0x1FBD1DF5 + (i >> 1); + float x = *(float*)&i; + + // Newton-Raphson iterations + x = 0.5f * (x + a / x); + x = 0.5f * (x + a / x); + x = 0.5f * (x + a / x); + + return x; +} + +inline +f32 rsqrt_approx(f32 a) { + ASSERT_SIMPLE(a >= 0); + + // Initial guess using magic number (Quake III hack) + f32 x = a; + uint32 i = *(uint32 *)&x; + i = 0x5F3759DF - (i >> 1); // Magic number for initial guess + x = *(f32 *) &i; + + // Newton-Raphson iterations + x = x * (1.5f - 0.5f * a * x * x); + x = x * (1.5f - 0.5f * a * x * x); + x = x * (1.5f - 0.5f * a * x * x); + + return x; +} + +inline +f32 exp_approx(f32 x) { + // Range reduction: e^x = e^(x / n)^n + const int32 n = 8; + x /= n; + + // Taylor series approximation for e^x + f32 result = 1.0f; + f32 term = 1.0f; + for (int32 i = 1; i <= 10; ++i) { + term *= x / i; + result += term; + } + + // Raise to the nth power + f32 final_result = result; + for (int32 i = 1; i < n; ++i) { + final_result *= result; + } + + return final_result; +} + +inline +f32 log_approx(f32 x) { + ASSERT_SIMPLE(x > 0); + + // Polynomial approximation + f32 y = (x - 1) / (x + 1); + f32 y2 = y * y; + f32 result = y * (1.0f + y2 * (1.0f / 3.0f + y2 * (1.0f / 5.0f + y2 * (1.0f / 7.0f)))); + + return 2.0f * result; +} + +inline +f32 pow_approx(f32 a, f32 b) { + if (a == 0.0f) { + return 0.0f; + } + + return exp_approx(b * log_approx(a)); +} + +//////////////////////////////////////////////////////////////// + +inline +f64 sin_approx(f64 x) { + // Normalize x to the range [-π, π] for better accuracy + while (x > OMS_PI) { + x -= OMS_TWO_PI; + } + + while (x < -OMS_PI) { + x += OMS_TWO_PI; + } + + f64 x2 = x * x; + return x * (1.0 + x2 * (-1.0 / 6.0 + x2 * (1.0 / 120.0 + x2 * (-1.0 / 5040.0 + x2 * (1.0 / 362880.0))))); +} + +inline +f64 cos_approx(f64 x) { + return sin_approx(OMS_PI_OVER_TWO - x); +} + +inline +f64 tan_approx(f64 x) { + return sin_approx(x) / cos_approx(x); +} + +inline +f64 asin_approx(f64 x) { + // Undefined for |x| > 1 + ASSERT_SIMPLE(x >= -1.0 && x <= 1.0); + + f64 result = x; + f64 term = x; + for (int32 i = 1; i <= 6; ++i) { + term *= x * x * (2 * i - 1) * (2 * i - 1) / ((2 * i) * (2 * i + 1)); + result += term; + } + + return result; +} + +inline +f64 acos_approx(f64 x) { + // π/2 - asin_approx(x) + return OMS_PI_OVER_TWO - asin_approx(x); +} + +inline +f64 atan_approx(f64 x) { + if (x > 1.0) { + // π/2 - atan_approx(1/x) + return OMS_PI_OVER_TWO - atan_approx(1.0 / x); + } else if (x < -1.0) { + // -π/2 - atan_approx(1/x) + return -OMS_PI_OVER_TWO - atan_approx(1.0 / x); + } + + f64 result = x; + f64 term = x; + for (int32 i = 1; i <= 6; ++i) { + term *= -x * x; + result += term / (2 * i + 1); + } + + return result; +} + +inline +f64 sqrt_approx(f64 a) { + ASSERT_SIMPLE(a >= 0); + + int64_t i = *(int64_t*)&a; + // Magic number for initial guess + i = 0x1FF7A3BEA91D9B1B + (i >> 1); + f64 x = *(f64*)&i; + + // Newton-Raphson iterations + x = 0.5 * (x + a / x); + x = 0.5 * (x + a / x); + x = 0.5 * (x + a / x); + + return x; +} + +inline +f64 rsqrt_approx(f64 a) { + ASSERT_SIMPLE(a >= 0); + + // Initial guess using magic number (Quake III hack) + f64 x = a; + uint64 i = *(uint64 *)&x; + i = 0x5fe6eb50c7b537a9 - (i >> 1); // Magic number for initial guess + x = *(f64 *) &i; + + // Newton-Raphson iterations + x = x * (1.5 - 0.5 * a * x * x); + x = x * (1.5 - 0.5 * a * x * x); + x = x * (1.5 - 0.5 * a * x * x); + + return x; +} + +inline +f64 exp_approx(f64 x) { + // Range reduction: e^x = e^(x / n)^n + const int32 n = 8; + x /= n; + + // Taylor series approximation for e^x + f64 result = 1.0; + f64 term = 1.0; + for (int32 i = 1; i <= 10; ++i) { + term *= x / i; + result += term; + } + + // Raise to the nth power + f64 final_result = 1.0; + for (int32 i = 0; i < n; ++i) { + final_result *= result; + } + + return final_result; +} + +inline +f64 log_approx(f64 x) { + ASSERT_SIMPLE(x > 0); + + // Polynomial approximation + f64 y = (x - 1) / (x + 1); + f64 y2 = y * y; + f64 result = y * (1.0 + y2 * (1.0 / 3.0 + y2 * (1.0 / 5.0 + y2 * (1.0 / 7.0)))); + + return 2.0 * result; +} + +inline +f64 pow_approx(f64 a, f64 b) { + if (a == 0.0) { + return 0.0; + } + + return exp_approx(b * log_approx(a)); +} + +#endif