prepare directx ui, not working yet
Some checks failed
CodeQL / Analyze (${{ matrix.language }}) (autobuild, c-cpp) (push) Has been cancelled
Microsoft C++ Code Analysis / Analyze (push) Has been cancelled

This commit is contained in:
Dennis Eichhorn 2025-03-09 18:15:08 +01:00
parent faf95f3e1b
commit 17b803a0b6
38 changed files with 2222 additions and 498 deletions

View File

@ -133,7 +133,7 @@ void asset_archive_load(AssetArchive* archive, const char* path, BufferMemory* b
{
PROFILE(PROFILE_ASSET_ARCHIVE_LOAD, path, false, true);
LOG_FORMAT_1(
LOG_1(
"Load AssetArchive %s",
{{LOG_DATA_CHAR_STR, (void *) path}}
);
@ -174,7 +174,7 @@ void asset_archive_load(AssetArchive* archive, const char* path, BufferMemory* b
file_read(archive->fd, &file, 0, file.size);
asset_archive_header_load(&archive->header, file.content, steps);
LOG_FORMAT_1(
LOG_1(
"Loaded AssetArchive %s with %d assets",
{{LOG_DATA_CHAR_STR, (void *) path}, {LOG_DATA_UINT32, (void *) &archive->header.asset_count}}
);
@ -204,7 +204,7 @@ Asset* asset_archive_asset_load(const AssetArchive* archive, int32 id, AssetMana
byte component_id = archive->asset_type_map[element->type];
//AssetComponent* ac = &ams->asset_components[component_id];
LOG_FORMAT_2(
LOG_2(
"Load asset %d from archive %d for AMS %d with %n B compressed and %n B uncompressed",
{{LOG_DATA_UINT64, &id}, {LOG_DATA_UINT32, &element->type}, {LOG_DATA_BYTE, &component_id}, {LOG_DATA_UINT32, &element->length}, {LOG_DATA_UINT32, &element->uncompressed}}
);
@ -314,7 +314,7 @@ Asset* asset_archive_asset_load(const AssetArchive* archive, int32 id, AssetMana
// the main program should still be able to do some work if possible
thrd_ams_set_loaded(asset);
LOG_FORMAT_2(
LOG_2(
"Loaded asset %d from archive %d for AMS %d with %n B compressed and %n B uncompressed",
{{LOG_DATA_UINT64, &id}, {LOG_DATA_UINT32, &element->type}, {LOG_DATA_BYTE, &component_id}, {LOG_DATA_UINT32, &element->length}, {LOG_DATA_UINT32, &element->uncompressed}}
);

View File

@ -42,7 +42,7 @@ struct AssetManagementSystem {
inline
void ams_create(AssetManagementSystem* ams, BufferMemory* buf, int32 asset_component_count, int32 count)
{
LOG_FORMAT_1("Create AMS for %n assets", {{LOG_DATA_INT32, &count}});
LOG_1("Create AMS for %n assets", {{LOG_DATA_INT32, &count}});
hashmap_create(&ams->hash_map, count, sizeof(HashEntry) + sizeof(Asset), buf);
ams->asset_component_count = asset_component_count;
ams->asset_components = (AssetComponent *) buffer_get_memory(buf, asset_component_count * sizeof(AssetComponent), 64, true);
@ -52,7 +52,7 @@ inline
void ams_component_create(AssetComponent* ac, BufferMemory* buf, int32 chunk_size, int32 count)
{
ASSERT_SIMPLE(chunk_size);
LOG_FORMAT_1("Create AMS Component for %n assets and %n B", {{LOG_DATA_INT32, &count}, {LOG_DATA_UINT32, &chunk_size}});
LOG_1("Create AMS Component for %n assets and %n B", {{LOG_DATA_INT32, &count}, {LOG_DATA_UINT32, &chunk_size}});
chunk_init(&ac->asset_memory, buf, count, chunk_size, 64);
pthread_mutex_init(&ac->mutex, NULL);
@ -62,7 +62,7 @@ inline
void ams_component_create(AssetComponent* ac, byte* buf, int32 chunk_size, int32 count)
{
ASSERT_SIMPLE(chunk_size);
LOG_FORMAT_1("Create AMS Component for %n assets and %n B", {{LOG_DATA_INT32, &count}, {LOG_DATA_UINT32, &chunk_size}});
LOG_1("Create AMS Component for %n assets and %n B", {{LOG_DATA_INT32, &count}, {LOG_DATA_UINT32, &chunk_size}});
ac->asset_memory.count = count;
ac->asset_memory.chunk_size = chunk_size;

View File

@ -40,7 +40,7 @@ void cmd_buffer_create(AppCmdBuffer* cb, BufferMemory* buf, int32 commands_count
chunk_init(&cb->commands, buf, commands_count, sizeof(Command), 64);
pthread_mutex_init(&cb->mutex, NULL);
LOG_FORMAT_1("Created AppCmdBuffer: %n B", {{LOG_DATA_UINT64, &cb->commands.size}});
LOG_1("Created AppCmdBuffer: %n B", {{LOG_DATA_UINT64, &cb->commands.size}});
}
// This doesn't load the asset directly but tells (most likely) a worker thread to load an asset
@ -356,7 +356,7 @@ inline void* cmd_func_run(AppCmdBuffer*, CommandFunction func) {
}
inline Asset* cmd_texture_load_sync(AppCmdBuffer* cb, int32 asset_id) {
LOG_FORMAT_1("Load texture %d", {{LOG_DATA_INT32, &asset_id}});
LOG_1("Load texture %d", {{LOG_DATA_INT32, &asset_id}});
// Check if asset already loaded
char id_str[9];
@ -385,7 +385,7 @@ inline Asset* cmd_texture_load_sync(AppCmdBuffer* cb, int32 asset_id) {
}
inline Asset* cmd_texture_load_sync(AppCmdBuffer* cb, const char* name) {
LOG_FORMAT_1("Load texture %d", {{LOG_DATA_CHAR_STR, (void *) name}});
LOG_1("Load texture %d", {{LOG_DATA_CHAR_STR, (void *) name}});
PROFILE(PROFILE_CMD_ASSET_LOAD_SYNC, name, false, true);
// Check if asset already loaded
@ -413,7 +413,7 @@ inline Asset* cmd_texture_load_sync(AppCmdBuffer* cb, const char* name) {
inline Asset* cmd_font_load_sync(AppCmdBuffer* cb, int32 asset_id)
{
LOG_FORMAT_1("Load font %d", {{LOG_DATA_INT32, &asset_id}});
LOG_1("Load font %d", {{LOG_DATA_INT32, &asset_id}});
// Check if asset already loaded
char id_str[9];
@ -442,7 +442,7 @@ inline Asset* cmd_font_load_sync(AppCmdBuffer* cb, int32 asset_id)
inline Asset* cmd_font_load_sync(AppCmdBuffer* cb, const char* name)
{
LOG_FORMAT_1("Load font %s", {{LOG_DATA_CHAR_STR, (void *) name}});
LOG_1("Load font %s", {{LOG_DATA_CHAR_STR, (void *) name}});
PROFILE(PROFILE_CMD_FONT_LOAD_SYNC, name, false, true);
// Check if asset already loaded
@ -472,13 +472,13 @@ UILayout* cmd_layout_load_sync(
UILayout* __restrict layout, const char* __restrict layout_path
) {
PROFILE(PROFILE_CMD_LAYOUT_LOAD_SYNC, layout_path, false, true);
LOG_FORMAT_1("Load layout %s", {{LOG_DATA_CHAR_STR, (void *) layout_path}});
LOG_1("Load layout %s", {{LOG_DATA_CHAR_STR, (void *) layout_path}});
FileBody layout_file = {};
file_read(layout_path, &layout_file, cb->mem_vol);
if (!layout_file.content) {
LOG_FORMAT_1("Failed loading layout \"%s\"", {{LOG_DATA_CHAR_STR, (void *) layout_path}});
LOG_1("Failed loading layout \"%s\"", {{LOG_DATA_CHAR_STR, (void *) layout_path}});
return NULL;
}
@ -493,7 +493,7 @@ UIThemeStyle* cmd_theme_load_sync(
UIThemeStyle* __restrict theme, const char* __restrict theme_path
) {
PROFILE(PROFILE_CMD_THEME_LOAD_SYNC, theme_path, false, true);
LOG_FORMAT_1("Load theme %s", {{LOG_DATA_CHAR_STR, (void *) theme_path}});
LOG_1("Load theme %s", {{LOG_DATA_CHAR_STR, (void *) theme_path}});
FileBody theme_file = {};
file_read(theme_path, &theme_file, cb->mem_vol);
@ -519,7 +519,7 @@ UILayout* cmd_ui_load_sync(
const Camera* __restrict camera
) {
PROFILE(PROFILE_CMD_UI_LOAD_SYNC, layout_path, false, true);
LOG_FORMAT_1("Load ui with layout %s and theme %s", {{LOG_DATA_CHAR_STR, (void *) layout_path}, {LOG_DATA_CHAR_STR, (void *) theme_path}});
LOG_1("Load ui with layout %s and theme %s", {{LOG_DATA_CHAR_STR, (void *) layout_path}, {LOG_DATA_CHAR_STR, (void *) theme_path}});
if (!cmd_layout_load_sync(cb, layout, layout_path)) {
// We have to make sure that at least the font is set

View File

@ -27,7 +27,8 @@ void* cmd_shader_load(AppCmdBuffer*, Command*) {
void* cmd_shader_load_sync(
AppCmdBuffer* __restrict cb, Shader* __restrict shader, const int32* __restrict shader_ids,
ID3D12Device* __restrict device, ID3D12PipelineState** __restrict pipeline, ID3D12RootSignature* __restrict pipeline_layout
ID3D12Device* __restrict device, ID3D12PipelineState** __restrict pipeline, ID3D12RootSignature* __restrict pipeline_layout,
D3D12_INPUT_ELEMENT_DESC* __restrict descriptor_set_layouts, int32 layout_count
) {
PROFILE(PROFILE_CMD_SHADER_LOAD_SYNC, NULL, false, true);
char asset_id[9];
@ -53,7 +54,7 @@ void* cmd_shader_load_sync(
}
// Make sub shader
shader_assets[i] = shader_make(
shader_assets[i] = gpuapi_shader_make(
shader_type_index((ShaderType) (i + 1)),
(char *) shader_asset->self,
shader_asset->ram_size
@ -64,11 +65,14 @@ void* cmd_shader_load_sync(
}
// Make shader/program
shader->id = pipeline_make(
shader->id = gpuapi_pipeline_make(
device, pipeline, pipeline_layout,
descriptor_set_layouts, layout_count,
shader_assets[0], shader_assets[1], shader_assets[2]
);
// @question do I release shader_assets[..]?
return NULL;
}

View File

@ -15,11 +15,15 @@
#include <dxgi1_6.h>
#include <d3dcommon.h>
#include "../../../GameEngine/log/Log.h"
#include "../../../GameEngine/memory/RingMemory.h"
#include "../../../GameEngine/object/Texture.h"
#include "../../../GameEngine/image/Image.cpp"
#include "../../compiler/CompilerUtils.h"
// #include "../../../EngineDependencies/directx/d3d12.h"
// #include "../../../EngineDependencies/directx/d3dx12.h"
#include "FramesInFlightContainer.h"
// A more (compile-time) efficient version of the windows macro IID_PPV_ARGS
// Replacement for the windows macro IID_PPVOID
#define IID_PPVOID(pointer) __uuidof(**(pointer)), (void **) (pointer)
bool is_directx_supported(D3D_FEATURE_LEVEL version)
@ -89,6 +93,22 @@ int32 max_directx_version()
return 0;
}
inline
void change_viewport(
int32 width, int32 height,
ID3D12GraphicsCommandList* command_buffer, D3D12_VIEWPORT* viewport, D3D12_RECT* scissor_rect
)
{
viewport->Width = (f32) width;
viewport->Height = (f32) height;
scissor_rect->right = width;
scissor_rect->bottom = height;
command_buffer->RSSetViewports(1, viewport);
command_buffer->RSSetScissorRects(1, scissor_rect);
}
// Returns frame index
int32 wait_for_previous_frame(
FramesInFlightContainer* frames_in_flight,
@ -100,11 +120,13 @@ int32 wait_for_previous_frame(
// sample illustrates how to use fences for efficient resource usage and to
// maximize GPU utilization.
UINT64 fence_value_temp = frames_in_flight->fence_value;
uint64 fence_value_temp = frames_in_flight->fence_value;
HRESULT hr;
// Signal and increment the fence value.
if(FAILED(graphics_queue->Signal(frames_in_flight->fence, fence_value_temp))) {
LOG_1("DirectX12 Signal");
if(FAILED(hr = graphics_queue->Signal(frames_in_flight->fence, fence_value_temp))) {
LOG_1("DirectX12 Signal: %d", {{LOG_DATA_INT32, &hr}});
ASSERT_SIMPLE(false);
}
@ -112,8 +134,8 @@ int32 wait_for_previous_frame(
// Wait until the previous frame is finished.
if (frames_in_flight->fence->GetCompletedValue() < fence_value_temp) {
if (FAILED(frames_in_flight->fence->SetEventOnCompletion(fence_value_temp, frames_in_flight->fence_event))) {
LOG_1("DirectX12 SetEventOnCompletion");
if (FAILED(hr = frames_in_flight->fence->SetEventOnCompletion(fence_value_temp, frames_in_flight->fence_event))) {
LOG_1("DirectX12 SetEventOnCompletion: %d", {{LOG_DATA_INT32, &hr}});
ASSERT_SIMPLE(false);
}
@ -170,11 +192,496 @@ void gpuapi_debug_messenger_setup(ID3D12Device* device)
}
inline
void gpuapi_create_logical_device(ID3D12Device** device) {
if (FAILED(D3D12CreateDevice(NULL, D3D_FEATURE_LEVEL_11_0, IID_PPVOID(device)))) {
LOG_1("DirectX12 D3D12CreateDevice");
void gpuapi_pick_physical_device(IDXGIFactory6* instance, IDXGIAdapter1** physical_device, bool requestHighPerformanceAdapter = true)
{
IDXGIAdapter1* adapter = NULL;
IDXGIFactory6* factory6 = NULL;
if (SUCCEEDED(instance->QueryInterface(IID_PPVOID(&factory6)))) {
for (uint32 adapterIndex = 0;
SUCCEEDED(factory6->EnumAdapterByGpuPreference(
adapterIndex,
requestHighPerformanceAdapter == true ? DXGI_GPU_PREFERENCE_HIGH_PERFORMANCE : DXGI_GPU_PREFERENCE_UNSPECIFIED,
IID_PPVOID(&adapter))
);
++adapterIndex
) {
DXGI_ADAPTER_DESC1 desc;
adapter->GetDesc1(&desc);
if (desc.Flags & DXGI_ADAPTER_FLAG_SOFTWARE) {
// Don't select the Basic Render Driver adapter.
// If you want a software adapter, pass in "/warp" on the command line.
continue;
}
// Check to see whether the adapter supports Direct3D 12, but don't create the actual device yet.
if (SUCCEEDED(D3D12CreateDevice(adapter, D3D_FEATURE_LEVEL_11_0, _uuidof(ID3D12Device), NULL))) {
break;
}
}
}
if(!adapter) {
for (uint32 adapterIndex = 0; SUCCEEDED(instance->EnumAdapters1(adapterIndex, &adapter)); ++adapterIndex) {
DXGI_ADAPTER_DESC1 desc;
adapter->GetDesc1(&desc);
if (desc.Flags & DXGI_ADAPTER_FLAG_SOFTWARE) {
// Don't select the Basic Render Driver adapter.
continue;
}
// Check to see whether the adapter supports Direct3D 12, but don't create the actual device yet.
if (SUCCEEDED(D3D12CreateDevice(adapter, D3D_FEATURE_LEVEL_11_0, _uuidof(ID3D12Device), NULL))) {
break;
}
}
}
*physical_device = adapter;
if (factory6) {
factory6->Release();
}
}
inline
void gpuapi_create_logical_device(IDXGIAdapter1* physical_device, ID3D12Device** device)
{
HRESULT hr;
if (FAILED(hr = D3D12CreateDevice(physical_device, D3D_FEATURE_LEVEL_11_0, IID_PPVOID(device)))) {
LOG_1("DirectX12 D3D12CreateDevice: %d", {{LOG_DATA_INT32, &hr}});
ASSERT_SIMPLE(false);
}
}
inline
void gpuapi_command_buffer_create(
ID3D12Device* device,
ID3D12CommandAllocator* command_pool,
ID3D12PipelineState* pipeline,
ID3D12GraphicsCommandList** command_buffer
)
{
HRESULT hr;
if (FAILED(hr = device->CreateCommandList(
0, D3D12_COMMAND_LIST_TYPE_DIRECT,
command_pool, pipeline,
IID_PPVOID(command_buffer)))
) {
LOG_1("DirectX12 CreateCommandList: %d", {{LOG_DATA_INT32, &hr}});
ASSERT_SIMPLE(false);
};
}
static
DXGI_FORMAT gpuapi_texture_format(byte settings)
{
if ((settings & IMAGE_SETTING_CHANNEL_4_SIZE)) {
switch (settings & IMAGE_SETTING_CHANNEL_COUNT) {
case 1:
return DXGI_FORMAT_R32_FLOAT;
case 2:
return DXGI_FORMAT_R32G32_FLOAT;
case 3:
return DXGI_FORMAT_R32G32B32_FLOAT;
case 4:
return DXGI_FORMAT_R32G32B32A32_FLOAT;
default:
UNREACHABLE();
}
} else {
switch (settings & IMAGE_SETTING_CHANNEL_COUNT) {
case 1:
return DXGI_FORMAT_R8_UNORM;
case 2:
return DXGI_FORMAT_R8G8_UNORM;
case 3:
// RGB is not supported (probably due to the alignment
return DXGI_FORMAT_R8G8B8A8_UNORM;
case 4:
return DXGI_FORMAT_R8G8B8A8_UNORM;
default:
UNREACHABLE();
}
}
}
// @performance Sometimes we want to upload multiple textures in one go (more performant). Allow that or don't use this function in that case.
D3D12_CPU_DESCRIPTOR_HANDLE load_texture_to_gpu(
ID3D12Device* device,
ID3D12GraphicsCommandList* command_buffer,
ID3D12Resource** texture_resource,
int32 descriptorOffset,
ID3D12DescriptorHeap* srv_heap,
const Texture* texture,
RingMemory* ring
) {
DXGI_FORMAT textureFormat = gpuapi_texture_format(texture->image.image_settings);
D3D12_RESOURCE_DESC textureDesc = {};
textureDesc.MipLevels = 1;
textureDesc.Format = textureFormat;
textureDesc.Width = texture->image.width;
textureDesc.Height = texture->image.height;
textureDesc.Flags = D3D12_RESOURCE_FLAG_NONE;
textureDesc.DepthOrArraySize = 1;
textureDesc.SampleDesc.Count = 1;
textureDesc.SampleDesc.Quality = 0;
textureDesc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D;
D3D12_HEAP_PROPERTIES texture_heap_property = {
.Type = D3D12_HEAP_TYPE_DEFAULT,
.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN,
.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN,
.CreationNodeMask = 1,
.VisibleNodeMask = 1
};
HRESULT hr;
if (FAILED(hr = device->CreateCommittedResource(
&texture_heap_property,
D3D12_HEAP_FLAG_NONE,
&textureDesc,
D3D12_RESOURCE_STATE_COPY_DEST,
NULL,
IID_PPVOID(texture_resource)))
) {
LOG_1("DirectX12 CreateCommittedResource: %d", {{LOG_DATA_INT32, &hr}});
ASSERT_SIMPLE(false);
return {0};
}
const D3D12_RESOURCE_DESC DestinationDesc = (*texture_resource)->GetDesc();
uint64 uploadBufferSize = 0;
ID3D12Device* pDevice = NULL;
(*texture_resource)->GetDevice(IID_PPVOID(&pDevice));
pDevice->GetCopyableFootprints(&DestinationDesc, 0, 1, 0, NULL, NULL, NULL, &uploadBufferSize);
D3D12_RESOURCE_DESC texture_upload_buffer = {
.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER,
.Alignment = 0,
.Width = uploadBufferSize,
.Height = 1,
.DepthOrArraySize = 1,
.MipLevels = 1,
.Format = DXGI_FORMAT_UNKNOWN,
.SampleDesc = {
.Count = 1,
.Quality = 0,
},
.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR,
.Flags = D3D12_RESOURCE_FLAG_NONE
};
D3D12_HEAP_PROPERTIES texture_upload_heap_property = {
.Type = D3D12_HEAP_TYPE_UPLOAD,
.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN,
.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN,
.CreationNodeMask = 1,
.VisibleNodeMask = 1
};
ID3D12Resource* texture_upload_heap;
if (FAILED(hr = device->CreateCommittedResource(
&texture_heap_property,
D3D12_HEAP_FLAG_NONE,
&texture_upload_buffer,
D3D12_RESOURCE_STATE_GENERIC_READ,
NULL,
IID_PPVOID(&texture_upload_heap)))
) {
if (pDevice) {
pDevice->Release();
}
LOG_1("DirectX12 CreateCommittedResource: %d", {{LOG_DATA_INT32, &hr}});
ASSERT_SIMPLE(false);
return {0};
}
int32 pixel_size = image_pixel_size_from_type(texture->image.image_settings);
D3D12_SUBRESOURCE_DATA textureData[] = {
{
.pData = texture->image.pixels,
.RowPitch = texture->image.width * pixel_size,
.SlicePitch = (texture->image.width * pixel_size) * texture->image.height,
}
};
uint32 number_of_resources = ARRAY_COUNT(textureData);
uint32 FirstSubresource = 0;
uint64 IntermediateOffset = 0;
uint64 RequiredSize = 0;
uint64 MemToAlloc = (uint64) (sizeof(D3D12_PLACED_SUBRESOURCE_FOOTPRINT) + sizeof(uint32) + sizeof(uint64)) * number_of_resources;
D3D12_PLACED_SUBRESOURCE_FOOTPRINT* pLayouts = (D3D12_PLACED_SUBRESOURCE_FOOTPRINT *) ring_get_memory(ring, MemToAlloc, 64);
uint64* pRowSizesInBytes = (uint64 *) (pLayouts + number_of_resources);
uint32* pNumRows = (uint32 *) (pRowSizesInBytes + number_of_resources);
pDevice->GetCopyableFootprints(&DestinationDesc, FirstSubresource, number_of_resources, IntermediateOffset, pLayouts, pNumRows, pRowSizesInBytes, &RequiredSize);
pDevice->Release();
const D3D12_RESOURCE_DESC IntermediateDesc = texture_upload_heap->GetDesc();
if (IntermediateDesc.Dimension != D3D12_RESOURCE_DIMENSION_BUFFER
|| IntermediateDesc.Width < RequiredSize + pLayouts[0].Offset
|| RequiredSize > ((size_t) -1)
|| (DestinationDesc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER &&
(FirstSubresource != 0 || number_of_resources != 1)
)
) {
if (texture_upload_heap) {
texture_upload_heap->Release();
}
LOG_1("DirectX12 texture resource setup");
ASSERT_SIMPLE(false);
return {0};
}
byte* pData;
if (FAILED(hr = texture_upload_heap->Map(0, NULL, (void **) &pData))) {
if (texture_upload_heap) {
texture_upload_heap->Release();
}
LOG_1("DirectX12 Map: %d", {{LOG_DATA_INT32, &hr}});
ASSERT_SIMPLE(false);
return {0};
}
for (uint32 i = 0; i < number_of_resources; ++i) {
ASSERT_SIMPLE(pRowSizesInBytes[i] <= ((size_t) -1));
D3D12_MEMCPY_DEST DestData = { pData + pLayouts[i].Offset, pLayouts[i].Footprint.RowPitch, ((size_t) pLayouts[i].Footprint.RowPitch) * ((size_t) pNumRows[i]) };
for (uint32 z = 0; z < pLayouts[i].Footprint.Depth; ++z) {
byte* pDestSlice = ((byte *) DestData.pData) + DestData.SlicePitch * z;
byte* pSrcSlice = ((byte *) textureData[i].pData) + textureData[i].SlicePitch * ((intptr_t) z);
for (uint32 y = 0; y < pNumRows[i]; ++y) {
memcpy(
pDestSlice + DestData.RowPitch * y,
pSrcSlice + textureData[i].RowPitch * ((intptr_t) y),
(size_t) pRowSizesInBytes[i]
);
}
}
}
texture_upload_heap->Unmap(0, NULL);
if (DestinationDesc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER) {
command_buffer->CopyBufferRegion(
*texture_resource, 0, texture_upload_heap, pLayouts[0].Offset, pLayouts[0].Footprint.Width
);
} else {
for (uint32 i = 0; i < number_of_resources; ++i) {
D3D12_TEXTURE_COPY_LOCATION Dst = {
.pResource = *texture_resource,
.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX,
.SubresourceIndex = i + FirstSubresource,
};
D3D12_TEXTURE_COPY_LOCATION Src = {
.pResource = texture_upload_heap,
.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT,
.PlacedFootprint = pLayouts[i],
};
command_buffer->CopyTextureRegion(&Dst, 0, 0, 0, &Src, NULL);
}
}
D3D12_RESOURCE_BARRIER barrier = {
.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION,
.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE,
.Transition = {
.pResource = *texture_resource,
.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES,
.StateBefore = D3D12_RESOURCE_STATE_COPY_DEST,
.StateAfter = D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE,
}
};
command_buffer->ResourceBarrier(1, &barrier);
D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {};
srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
srvDesc.Format = textureDesc.Format;
srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D;
srvDesc.Texture2D.MipLevels = 1;
D3D12_CPU_DESCRIPTOR_HANDLE srv_handle = srv_heap->GetCPUDescriptorHandleForHeapStart();
device->CreateShaderResourceView(*texture_resource, &srvDesc, srv_handle);
if (texture_upload_heap) {
texture_upload_heap->Release();
}
srv_handle.ptr += descriptorOffset * device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
return srv_handle;
}
void gpuapi_vertex_buffer_create(
ID3D12Device* device,
D3D12_VERTEX_BUFFER_VIEW* vertex_buffer_view,
ID3D12Resource** vertex_buffer,
const void* __restrict vertices, uint32 vertex_size, uint32 vertex_count
)
{
D3D12_RESOURCE_DESC resource_info = {
.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER,
.Alignment = 0,
.Width = vertex_size * vertex_count,
.Height = 1,
.DepthOrArraySize = 1,
.MipLevels = 1,
.Format = DXGI_FORMAT_UNKNOWN,
.SampleDesc = {
.Count = 1,
.Quality = 0
},
.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR,
.Flags = D3D12_RESOURCE_FLAG_NONE
};
// Note: using upload heaps to transfer static data like vert buffers is not
// recommended. Every time the GPU needs it, the upload heap will be marshalled
// over. Please read up on Default Heap usage. An upload heap is used here for
// code simplicity and because there are very few verts to actually transfer.
D3D12_HEAP_PROPERTIES heap_property = {
.Type = D3D12_HEAP_TYPE_UPLOAD,
.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN,
.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN,
.CreationNodeMask = 1,
.VisibleNodeMask = 1
};
HRESULT hr;
if (FAILED(hr = device->CreateCommittedResource(
&heap_property,
D3D12_HEAP_FLAG_NONE,
&resource_info,
D3D12_RESOURCE_STATE_GENERIC_READ,
NULL,
IID_PPVOID(vertex_buffer)))
) {
LOG_1("DirectX12 CreateCommittedResource: %d", {{LOG_DATA_INT32, &hr}});
ASSERT_SIMPLE(false);
return;
}
// Copy the triangle data to the vertex buffer
uint8* pVertexDataBegin;
// We do not intend to read from this resource on the CPU
D3D12_RANGE readRange = {};
if (FAILED(hr = (*vertex_buffer)->Map(0, &readRange, (void **) &pVertexDataBegin))) {
LOG_1("DirectX12 Map: %d", {{LOG_DATA_INT32, &hr}});
ASSERT_SIMPLE(false);
}
memcpy(pVertexDataBegin, vertices, vertex_size * vertex_count);
(*vertex_buffer)->Unmap(0, NULL);
// Initialize the vertex buffer view
vertex_buffer_view->BufferLocation = (*vertex_buffer)->GetGPUVirtualAddress();
vertex_buffer_view->StrideInBytes = vertex_size;
vertex_buffer_view->SizeInBytes = vertex_size * vertex_count;
}
void gpuapi_vertex_buffer_update(
ID3D12Resource* vertex_buffer,
const void* __restrict vertices,
uint32 vertex_size,
uint32 vertex_count,
uint32 offset = 0
)
{
uint64 size = vertex_count * vertex_size;
uint8* pVertexDataBegin;
D3D12_RANGE readRange = {};
D3D12_RANGE writeRange = { offset, offset + size };
HRESULT hr;
if (FAILED(hr = vertex_buffer->Map(0, &readRange, (void**)&pVertexDataBegin))) {
LOG_1("DirectX12 Map: %d", {{LOG_DATA_INT32, &hr}});
ASSERT_SIMPLE(false);
return;
}
memcpy(pVertexDataBegin + offset, vertices, size);
vertex_buffer->Unmap(0, &writeRange);
}
// In directx this is actually called a constant buffer
void gpuapi_uniform_buffers_create(
ID3D12Device* device,
ID3D12Resource** uniform_buffer,
const void* __restrict data, uint32 buffer_size
)
{
D3D12_RESOURCE_DESC resource_info = {
.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER,
.Alignment = 0,
.Width = buffer_size,
.Height = 1,
.DepthOrArraySize = 1,
.MipLevels = 1,
.Format = DXGI_FORMAT_UNKNOWN,
.SampleDesc = {
.Count = 1,
.Quality = 0
},
.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR,
.Flags = D3D12_RESOURCE_FLAG_NONE
};
// Note: using upload heaps to transfer static data like vert buffers is not
// recommended. Every time the GPU needs it, the upload heap will be marshalled
// over. Please read up on Default Heap usage. An upload heap is used here for
// code simplicity and because there are very few verts to actually transfer.
D3D12_HEAP_PROPERTIES heap_property = {
.Type = D3D12_HEAP_TYPE_UPLOAD,
.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN,
.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN,
.CreationNodeMask = 1,
.VisibleNodeMask = 1
};
device->CreateCommittedResource(
&heap_property,
D3D12_HEAP_FLAG_NONE,
&resource_info,
D3D12_RESOURCE_STATE_GENERIC_READ,
NULL,
IID_PPV_ARGS(uniform_buffer));
D3D12_RANGE readRange = {};
uint8* pCBDataBegin;
(*uniform_buffer)->Map(0, &readRange, (void **) &pCBDataBegin);
memcpy(pCBDataBegin, &data, buffer_size);
(*uniform_buffer)->Unmap(0, NULL);
}
void gpuapi_uniform_buffer_update(
ID3D12Resource* uniform_buffer,
const void* __restrict data,
uint32 buffer_size
)
{
D3D12_RANGE readRange = {};
uint8* pCBDataBegin = nullptr;
uniform_buffer->Map(0, &readRange, (void **) &pCBDataBegin);
memcpy(pCBDataBegin, data, buffer_size);
uniform_buffer->Unmap(0, nullptr);
}
#endif

View File

@ -17,7 +17,13 @@
#include "../../stdlib/Types.h"
#include "../../memory/RingMemory.h"
#include "../../log/Log.h"
#include "../../log/Stats.h"
#include "../../log/PerformanceProfiler.h"
#include "../../object/Vertex.h"
#include "../../utils/StringUtils.h"
#include "../../log/Log.h"
#include "../ShaderType.h"
#include "../GpuAttributeType.h"
#pragma comment(lib, "d3dcompiler.lib")
@ -33,7 +39,7 @@ const char* shader_type_index(ShaderType type)
}
}
ID3DBlob* shader_make(const char* type, const char* source, int32 source_size)
ID3DBlob* gpuapi_shader_make(const char* type, const char* source, int32 source_size)
{
LOG_1("Create shader");
#if DEBUG || INTERNAL
@ -44,8 +50,10 @@ ID3DBlob* shader_make(const char* type, const char* source, int32 source_size)
ID3DBlob* blob;
ID3DBlob* errMsgs;
if (FAILED(D3DCompile2(source, source_size, NULL, NULL, NULL, "main", type, compileFlags, 0, 0, NULL, 0, &blob, &errMsgs))) {
LOG_1("DirectX12 D3DCompile2");
HRESULT hr;
if (FAILED(hr = D3DCompile2(source, source_size, NULL, NULL, NULL, "main", type, compileFlags, 0, 0, NULL, 0, &blob, &errMsgs))) {
LOG_1("DirectX12 D3DCompile2: %d, %s", {{LOG_DATA_INT32, &hr}, {LOG_DATA_CHAR_STR, errMsgs->GetBufferPointer()}});
ASSERT_SIMPLE(false);
}
@ -58,24 +66,21 @@ ID3DBlob* shader_make(const char* type, const char* source, int32 source_size)
return blob;
}
ID3D12PipelineState* pipeline_make(
ID3D12PipelineState* gpuapi_pipeline_make(
ID3D12Device* device,
ID3D12PipelineState** pipeline,
ID3D12RootSignature* pipeline_layout,
D3D12_INPUT_ELEMENT_DESC* descriptor_set_layouts, uint32 layout_count,
ID3DBlob* vertex_shader,
ID3DBlob* fragment_shader,
ID3DBlob*
) {
PROFILE(PROFILE_PIPELINE_MAKE, NULL, false, true);
LOG_1("Create pipeline");
// @todo We need to find a way to do this somewhere else:
D3D12_INPUT_ELEMENT_DESC input_element_info[] = {
{ "POSITION", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, 0, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0 },
{ "COLOR", 0, DXGI_FORMAT_R32G32B32A32_FLOAT, 0, 12, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0 }
};
D3D12_GRAPHICS_PIPELINE_STATE_DESC pipeline_state_info = {};
pipeline_state_info.InputLayout = { input_element_info, _countof(input_element_info) };
pipeline_state_info.InputLayout = { descriptor_set_layouts, layout_count };
pipeline_state_info.pRootSignature = pipeline_layout;
pipeline_state_info.VS = {
.pShaderBytecode = vertex_shader->GetBufferPointer(),
@ -122,8 +127,9 @@ ID3D12PipelineState* pipeline_make(
pipeline_state_info.RTVFormats[0] = DXGI_FORMAT_R8G8B8A8_UNORM;
pipeline_state_info.SampleDesc.Count = 1;
if (FAILED(device->CreateGraphicsPipelineState(&pipeline_state_info, IID_PPV_ARGS(pipeline)))) {
LOG_1("DirectX12 CreateGraphicsPipelineState");
HRESULT hr;
if (FAILED(hr = device->CreateGraphicsPipelineState(&pipeline_state_info, IID_PPV_ARGS(pipeline)))) {
LOG_1("DirectX12 CreateGraphicsPipelineState: %d", {{LOG_DATA_INT32, &hr}});
ASSERT_SIMPLE(false);
}
@ -133,10 +139,223 @@ ID3D12PipelineState* pipeline_make(
return *pipeline;
}
inline
void pipeline_use(ID3D12GraphicsCommandList* command_buffer, ID3D12PipelineState* pipelineState)
FORCE_INLINE
void gpuapi_pipeline_use(ID3D12GraphicsCommandList* command_buffer, ID3D12PipelineState* pipelineState)
{
command_buffer->SetPipelineState(pipelineState);
}
// In DirectX Attribute info and descriptor set layout are combined into one
constexpr
void gpuapi_attribute_info_create(GpuAttributeType type, D3D12_INPUT_ELEMENT_DESC* attr)
{
switch (type) {
case GPU_ATTRIBUTE_TYPE_VERTEX_3D: {
attr[0] = {
.SemanticIndex = 0,
.Format = DXGI_FORMAT_R32G32B32_FLOAT,
.AlignedByteOffset = offsetof(Vertex3D, position),
.InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA,
.InstanceDataStepRate = 0,
};
attr[1] = {
.SemanticIndex = 1,
.Format = DXGI_FORMAT_R32G32B32_FLOAT,
.AlignedByteOffset = offsetof(Vertex3D, normal),
.InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA,
.InstanceDataStepRate = 0,
};
attr[2] = {
.SemanticIndex = 2,
.Format = DXGI_FORMAT_R32G32_FLOAT,
.AlignedByteOffset = offsetof(Vertex3D, tex_coord),
.InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA,
.InstanceDataStepRate = 0,
};
attr[3] = {
.SemanticIndex = 3,
.Format = DXGI_FORMAT_R32G32B32A32_FLOAT,
.AlignedByteOffset = offsetof(Vertex3D, color),
.InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA,
.InstanceDataStepRate = 0,
};
} return;
case GPU_ATTRIBUTE_TYPE_VERTEX_3D_NORMAL: {
attr[0] = {
.SemanticIndex = 0,
.Format = DXGI_FORMAT_R32G32B32_FLOAT,
.AlignedByteOffset = offsetof(Vertex3DNormal, position),
.InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA,
.InstanceDataStepRate = 0,
};
attr[1] = {
.SemanticIndex = 1,
.Format = DXGI_FORMAT_R32G32B32_FLOAT,
.AlignedByteOffset = offsetof(Vertex3DNormal, normal),
.InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA,
.InstanceDataStepRate = 0,
};
} return;
case GPU_ATTRIBUTE_TYPE_VERTEX_3D_COLOR: {
attr[0] = {
.SemanticIndex = 0,
.Format = DXGI_FORMAT_R32G32B32_FLOAT,
.AlignedByteOffset = offsetof(Vertex3DColor, position),
.InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA,
.InstanceDataStepRate = 0,
};
attr[1] = {
.SemanticIndex = 1,
.Format = DXGI_FORMAT_R32G32B32A32_FLOAT,
.AlignedByteOffset = offsetof(Vertex3DColor, color),
.InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA,
.InstanceDataStepRate = 0,
};
} return;
case GPU_ATTRIBUTE_TYPE_VERTEX_3D_TEXTURE_COLOR: {
attr[0] = {
.SemanticIndex = 0,
.Format = DXGI_FORMAT_R32G32B32_FLOAT,
.AlignedByteOffset = offsetof(Vertex3DTextureColor, position),
.InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA,
.InstanceDataStepRate = 0,
};
attr[1] = {
.SemanticIndex = 1,
.Format = DXGI_FORMAT_R32G32_FLOAT,
.AlignedByteOffset = offsetof(Vertex3DTextureColor, texture_color),
.InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA,
.InstanceDataStepRate = 0,
};
} return;
case GPU_ATTRIBUTE_TYPE_VERTEX_3D_SAMPLER_TEXTURE_COLOR: {
attr[0] = {
.SemanticIndex = 0,
.Format = DXGI_FORMAT_R32G32B32_FLOAT,
.AlignedByteOffset = offsetof(Vertex3DSamplerTextureColor, position),
.InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA,
.InstanceDataStepRate = 0,
};
attr[1] = {
.SemanticIndex = 1,
.Format = DXGI_FORMAT_R32_SINT,
.AlignedByteOffset = offsetof(Vertex3DSamplerTextureColor, sampler),
.InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA,
.InstanceDataStepRate = 0,
};
attr[2] = {
.SemanticIndex = 2,
.Format = DXGI_FORMAT_R32G32_FLOAT,
.AlignedByteOffset = offsetof(Vertex3DSamplerTextureColor, texture_color),
.InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA,
.InstanceDataStepRate = 0,
};
} return;
case GPU_ATTRIBUTE_TYPE_VERTEX_2D_TEXTURE: {
attr[0] = {
.SemanticIndex = 0,
.Format = DXGI_FORMAT_R32G32_FLOAT,
.AlignedByteOffset = offsetof(Vertex2DTexture, position),
.InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA,
.InstanceDataStepRate = 0,
};
attr[1] = {
.SemanticIndex = 1,
.Format = DXGI_FORMAT_R32G32_FLOAT,
.AlignedByteOffset = offsetof(Vertex2DTexture, tex_coord),
.InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA,
.InstanceDataStepRate = 0,
};
} return;
default:
UNREACHABLE();
};
}
int32 directx_program_optimize(const char* input, char* output)
{
const char* read_ptr = input;
char* write_ptr = output;
bool in_string = false;
while (*read_ptr) {
str_skip_empty(&read_ptr);
if (write_ptr != output
&& *(write_ptr - 1) != '\n' && *(write_ptr - 1) != ';' && *(write_ptr - 1) != '{'
&& *(write_ptr - 1) != '('
&& *(write_ptr - 1) != ','
) {
*write_ptr++ = '\n';
}
// Handle single-line comments (//)
if (*read_ptr == '/' && *(read_ptr + 1) == '/' && !in_string) {
str_move_to(&read_ptr, '\n');
continue;
}
// Handle multi-line comments (/* */)
if (*read_ptr == '/' && *(read_ptr + 1) == '*' && !in_string) {
// Go to end of comment
while (*read_ptr && (*read_ptr != '*' || *(read_ptr + 1) != '/')) {
++read_ptr;
}
if (*read_ptr == '*' && *(read_ptr + 1) == '/') {
read_ptr += 2;
}
continue;
}
// Handle strings to avoid removing content within them
if (*read_ptr == '"') {
in_string = !in_string;
}
// Copy valid characters to write_ptr
while (*read_ptr && !is_eol(read_ptr) && *read_ptr != '"'
&& !(*read_ptr == '/' && (*(read_ptr + 1) == '/' || *(read_ptr + 1) == '*'))
) {
if (!in_string
&& (*read_ptr == '*' || *read_ptr == '/' || *read_ptr == '=' || *read_ptr == '+' || *read_ptr == '-' || *read_ptr == '%'
|| *read_ptr == '(' || *read_ptr == ')'
|| *read_ptr == '{' || *read_ptr == '}'
|| *read_ptr == ',' || *read_ptr == '?' || *read_ptr == ':' || *read_ptr == ';'
|| *read_ptr == '&' || *read_ptr == '|'
|| *read_ptr == '>' || *read_ptr == '<'
)
) {
if (is_whitespace(*(write_ptr - 1)) || *(write_ptr - 1) == '\n') {
--write_ptr;
}
*write_ptr++ = *read_ptr++;
if (*read_ptr && is_whitespace(*read_ptr)) {
++read_ptr;
}
} else {
*write_ptr++ = *read_ptr++;
}
}
}
*write_ptr = '\0';
// -1 to remove \0 from length, same as strlen
return (int32) (write_ptr - output);
}
#endif

View File

@ -47,7 +47,7 @@ void* cmd_shader_load_sync(AppCmdBuffer* __restrict cb, Shader* __restrict shade
}
// Make sub shader
shader_assets[i] = shader_make(
shader_assets[i] = gpuapi_shader_make(
shader_type_index((ShaderType) (i + 1)),
(char *) shader_asset->self
);
@ -57,7 +57,7 @@ void* cmd_shader_load_sync(AppCmdBuffer* __restrict cb, Shader* __restrict shade
}
// Make shader/program
shader->id = pipeline_make(
shader->id = gpuapi_pipeline_make(
shader_assets[0], shader_assets[1], shader_assets[2]
);

View File

@ -33,7 +33,7 @@
{
GLenum err;
while ((err = glGetError()) != GL_NO_ERROR) {
LOG_FORMAT_1("Opengl error: %d", {{LOG_DATA_INT32, (int32 *) &err}});
LOG_1("Opengl error: %d", {{LOG_DATA_INT32, (int32 *) &err}});
ASSERT_SIMPLE(err == GL_NO_ERROR);
}
}
@ -108,6 +108,7 @@ void opengl_info(OpenglInfo* info)
}
}
// @todo rename to gpuapi_*
inline
uint32 get_texture_data_type(uint32 texture_data_type)
{
@ -145,6 +146,7 @@ uint32 get_texture_data_type(uint32 texture_data_type)
// 4. load_texture_to_gpu
// 5. texture_use
// @todo this should have a gpuapi_ name
inline
void prepare_texture(Texture* texture)
{
@ -155,9 +157,11 @@ void prepare_texture(Texture* texture)
glBindTexture(texture_data_type, (GLuint) texture->id);
}
// @todo this should have a gpuapi_ name
inline
void load_texture_to_gpu(const Texture* texture, int32 mipmap_level = 0)
{
// @todo also handle different texture formats (R, RG, RGB, 1 byte vs 4 byte per pixel)
uint32 texture_data_type = get_texture_data_type(texture->texture_data_type);
glTexImage2D(
texture_data_type, mipmap_level, GL_RGBA,
@ -173,6 +177,7 @@ void load_texture_to_gpu(const Texture* texture, int32 mipmap_level = 0)
LOG_INCREMENT_BY(DEBUG_COUNTER_GPU_VERTEX_UPLOAD, texture->image.pixel_count * image_pixel_size_from_type(texture->image.image_settings));
}
// @todo this should have a gpuapi_ name
inline
void texture_use(const Texture* texture)
{
@ -182,6 +187,7 @@ void texture_use(const Texture* texture)
glBindTexture(texture_data_type, (GLuint) texture->id);
}
// @todo this should have a gpuapi_ name
inline
void texture_delete(Texture* texture) {
glDeleteTextures(1, &texture->id);
@ -392,14 +398,23 @@ void gpuapi_buffer_update_dynamic(uint32 vbo, int32 size, const void* data)
LOG_INCREMENT_BY(DEBUG_COUNTER_GPU_VERTEX_UPLOAD, size);
}
// @todo change name. vulkan and directx have different functions for vertex buffer updates
inline
void gpuapi_buffer_update_sub(uint32 vbo, int32 offset, int32 size, const void* data)
void gpuapi_vertex_buffer_update(
uint32 vbo,
const void* data, int32 vertex_size, int32 vertex_count, int32 offset = 0
)
{
glBindBuffer(GL_ARRAY_BUFFER, vbo);
glBufferSubData(GL_ARRAY_BUFFER, offset, size, data);
// @performance Does this if even make sense or is glBufferSubData always the better choice?
if (offset) {
glBufferSubData(GL_ARRAY_BUFFER, offset, vertex_size * vertex_count - offset, ((byte *) data) + offset);
} else {
glBufferData(GL_ARRAY_BUFFER, vertex_size * vertex_count, data, GL_DYNAMIC_DRAW);
}
ASSERT_GPU_API();
LOG_INCREMENT_BY(DEBUG_COUNTER_GPU_VERTEX_UPLOAD, size);
LOG_INCREMENT_BY(DEBUG_COUNTER_GPU_VERTEX_UPLOAD, vertex_size * vertex_count - offset);
}
inline

View File

@ -15,6 +15,7 @@
#include "../../log/Stats.h"
#include "../../log/PerformanceProfiler.h"
#include "../../object/Vertex.h"
#include "../../utils/StringUtils.h"
#include "Shader.h"
#include "Opengl.h"
#include "../ShaderType.h"
@ -44,79 +45,79 @@ int32 shader_type_index(ShaderType type)
// @todo change naming to gpuapi_uniform_buffer_update (same as vulkan)
// @todo change from upload to uniform upload since it is a special form of upload
FORCE_INLINE
void shader_set_value(uint32 location, bool value)
void gpuapi_uniform_buffer_update_value(uint32 location, bool value)
{
glUniform1i(location, (int32) value);
LOG_INCREMENT_BY(DEBUG_COUNTER_GPU_UNIFORM_UPLOAD, sizeof(value));
}
FORCE_INLINE
void shader_set_value(uint32 location, int32 value)
void gpuapi_uniform_buffer_update_value(uint32 location, int32 value)
{
glUniform1i(location, value);
LOG_INCREMENT_BY(DEBUG_COUNTER_GPU_UNIFORM_UPLOAD, sizeof(value));
}
FORCE_INLINE
void shader_set_value(uint32 location, f32 value)
void gpuapi_uniform_buffer_update_value(uint32 location, f32 value)
{
glUniform1f(location, value);
LOG_INCREMENT_BY(DEBUG_COUNTER_GPU_UNIFORM_UPLOAD, sizeof(value));
}
FORCE_INLINE
void shader_set_v2(uint32 location, const f32* value)
void gpuapi_uniform_buffer_update_v2(uint32 location, const f32* value)
{
glUniform2fv(location, 1, value);
LOG_INCREMENT_BY(DEBUG_COUNTER_GPU_UNIFORM_UPLOAD, sizeof(*value) * 2);
}
FORCE_INLINE
void shader_set_v3(uint32 location, const f32* value)
void gpuapi_uniform_buffer_update_v3(uint32 location, const f32* value)
{
glUniform3fv(location, 1, value);
LOG_INCREMENT_BY(DEBUG_COUNTER_GPU_UNIFORM_UPLOAD, sizeof(*value) * 3);
}
FORCE_INLINE
void shader_set_v4(uint32 location, const f32* value)
void gpuapi_uniform_buffer_update_v4(uint32 location, const f32* value)
{
glUniform4fv(location, 1, value);
LOG_INCREMENT_BY(DEBUG_COUNTER_GPU_UNIFORM_UPLOAD, sizeof(*value) * 4);
}
FORCE_INLINE
void shader_set_m2(uint32 location, const f32* value)
void gpuapi_uniform_buffer_update_m2(uint32 location, const f32* value)
{
glUniformMatrix2fv(location, 1, GL_FALSE, value);
LOG_INCREMENT_BY(DEBUG_COUNTER_GPU_UNIFORM_UPLOAD, sizeof(*value) * 4);
}
FORCE_INLINE
void shader_set_m3(uint32 location, const f32* value)
void gpuapi_uniform_buffer_update_m3(uint32 location, const f32* value)
{
glUniformMatrix3fv(location, 1, GL_FALSE, value);
LOG_INCREMENT_BY(DEBUG_COUNTER_GPU_UNIFORM_UPLOAD, sizeof(*value) * 9);
}
FORCE_INLINE
void shader_set_m4(uint32 location, const f32* value)
void gpuapi_uniform_buffer_update_m4(uint32 location, const f32* value)
{
glUniformMatrix4fv(location, 1, GL_FALSE, value);
LOG_INCREMENT_BY(DEBUG_COUNTER_GPU_UNIFORM_UPLOAD, sizeof(*value) * 16);
}
FORCE_INLINE
uint32 shader_get_attrib_location(uint32 id, const char* name)
uint32 opengl_get_attrib_location(uint32 id, const char* name)
{
// By using this you can retreive the shader variable name at a point where and when you know it
// By using this you can retrieve the shader variable name at a point where and when you know it
// BUT set values later on in generalized functions without knowing the shader variable name
// Basically like pointers
return glGetAttribLocation(id, name);
}
inline
void shader_check_link_errors(uint32 id, char* log)
void opengl_check_link_errors(uint32 id, char* log)
{
GLint success;
glGetProgramiv(id, GL_LINK_STATUS, &success);
@ -126,7 +127,7 @@ void shader_check_link_errors(uint32 id, char* log)
}
inline
void shader_check_compile_errors(uint32 id, char* log)
void opengl_check_compile_errors(uint32 id, char* log)
{
GLint success;
glGetShaderiv(id, GL_COMPILE_STATUS, &success);
@ -135,17 +136,14 @@ void shader_check_compile_errors(uint32 id, char* log)
}
}
int32 shader_program_optimize(const char* input, char* output)
int32 opengl_program_optimize(const char* __restrict input, char* __restrict output)
{
const char* read_ptr = input;
char* write_ptr = output;
bool in_string = false;
while (*read_ptr) {
// Remove leading whitespace
while (*read_ptr == ' ' || *read_ptr == '\t' || is_eol(read_ptr)) {
++read_ptr;
}
str_skip_empty(&read_ptr);
if (write_ptr != output
&& *(write_ptr - 1) != '\n' && *(write_ptr - 1) != ';' && *(write_ptr - 1) != '{'
@ -157,10 +155,7 @@ int32 shader_program_optimize(const char* input, char* output)
// Handle single-line comments (//)
if (*read_ptr == '/' && *(read_ptr + 1) == '/' && !in_string) {
// Go to end of line
while (*read_ptr && *read_ptr != '\n') {
++read_ptr;
}
str_move_to(&read_ptr, '\n');
continue;
}
@ -218,7 +213,7 @@ int32 shader_program_optimize(const char* input, char* output)
return (int32) (write_ptr - output);
}
GLuint shader_make(GLenum type, const char* source)
GLuint gpuapi_shader_make(GLenum type, const char* source)
{
LOG_1("Create shader");
GLuint shader = glCreateShader(type);
@ -249,7 +244,7 @@ GLuint shader_make(GLenum type, const char* source)
}
inline
int32 program_get_size(uint32 program)
int32 opengl_program_get_size(uint32 program)
{
int32 size;
glGetProgramiv(program, GL_PROGRAM_BINARY_LENGTH, &size);
@ -259,7 +254,7 @@ int32 program_get_size(uint32 program)
// @todo Instead of passing the shaders one by one, pass one array called ShaderStage* shader_stages
// This way we can handle this more dynamic
GLuint pipeline_make(
GLuint gpuapi_pipeline_make(
GLuint vertex_shader,
GLuint fragment_shader,
GLint geometry_shader
@ -316,9 +311,8 @@ GLuint pipeline_make(
return program;
}
// @question Depending on how the different gpu apis work we may want to pass Shader* to have a uniform structure
FORCE_INLINE
void pipeline_use(uint32 id)
void gpuapi_pipeline_use(uint32 id)
{
glUseProgram(id);
}
@ -347,7 +341,7 @@ void gpuapi_attribute_info_create(GpuAttributeType type, OpenglVertexInputAttrib
.count = 3,
.format = GL_FLOAT,
.stride = sizeof(Vertex3D),
.offset = (void *) offsetof(Vertex3DTextureColor, position)
.offset = (void *) offsetof(Vertex3D, position)
};
attr[1] = {
@ -402,7 +396,7 @@ void gpuapi_attribute_info_create(GpuAttributeType type, OpenglVertexInputAttrib
attr[1] = {
.location = 1,
.count = 2,
.count = 4,
.format = GL_FLOAT,
.stride = sizeof(Vertex3DColor),
.offset = (void *) offsetof(Vertex3DColor, color)
@ -472,7 +466,7 @@ void gpuapi_attribute_info_create(GpuAttributeType type, OpenglVertexInputAttrib
};
}
void gpuapi_descriptor_set_layout_create(Shader* shader, const OpenglDescriptorSetLayoutBinding* bindings, int32 binding_length) {
void gpuapi_descriptor_set_layout_create(Shader* __restrict shader, const OpenglDescriptorSetLayoutBinding* __restrict bindings, int32 binding_length) {
for (int32 i = 0; i < binding_length; ++i) {
shader->descriptor_set_layout[i].binding = glGetUniformLocation(shader->id, bindings[i].name);
shader->descriptor_set_layout[i].name = bindings[i].name;

View File

@ -50,7 +50,7 @@ void* cmd_shader_load_sync(
}
// Make sub shader
shader_assets[i] = shader_make(
shader_assets[i] = gpuapi_shader_make(
device,
(char *) shader_asset->self,
shader_asset->ram_size
@ -61,7 +61,7 @@ void* cmd_shader_load_sync(
}
// Make shader/program
shader->id = pipeline_make(
shader->id = gpuapi_pipeline_make(
device, render_pass, pipeline_layout, pipeline,
descriptor_set_layouts,
shader_assets[0], shader_assets[1], shader_assets[2]

View File

@ -34,7 +34,7 @@ uint32_t shader_get_uniform_location(
}
inline
void shader_set_value(VkDevice device, VkDescriptorSet descriptorSet, uint32_t binding, VkDescriptorType descriptorType, int32_t value)
void gpuapi_uniform_buffer_update_value(VkDevice device, VkDescriptorSet descriptorSet, uint32_t binding, VkDescriptorType descriptorType, int32_t value)
{
VkDescriptorBufferInfo bufferInfo = {};
bufferInfo.buffer = {}; // You should have a buffer holding the value
@ -54,7 +54,7 @@ void shader_set_value(VkDevice device, VkDescriptorSet descriptorSet, uint32_t b
}
inline
VkShaderModule shader_make(VkDevice device, const char* source, int32 source_size)
VkShaderModule gpuapi_shader_make(VkDevice device, const char* source, int32 source_size)
{
LOG_1("Create shader");
// Create shader module create info
@ -68,7 +68,7 @@ VkShaderModule shader_make(VkDevice device, const char* source, int32 source_siz
VkResult result = vkCreateShaderModule(device, &create_info, NULL, &shader_module);
if (result != VK_SUCCESS) {
LOG_FORMAT_1("Vulkan vkCreateShaderModule: %d", {{LOG_DATA_INT32, (int32 *) &result}});
LOG_1("Vulkan vkCreateShaderModule: %d", {{LOG_DATA_INT32, (int32 *) &result}});
ASSERT_SIMPLE(false);
return VK_NULL_HANDLE;
@ -144,7 +144,7 @@ void gpuapi_attribute_info_create(GpuAttributeType type, VkVertexInputAttributeD
attr[1] = {
.location = 1,
.binding = 0,
.format = VK_FORMAT_R32_UINT,
.format = VK_FORMAT_R32G32B32A32_SFLOAT,
.offset = offsetof(Vertex3DColor, color)
};
} return;
@ -190,15 +190,15 @@ void gpuapi_attribute_info_create(GpuAttributeType type, VkVertexInputAttributeD
};
}
inline
void pipeline_use(VkCommandBuffer command_buffer, VkPipeline pipeline)
FORCE_INLINE
void gpuapi_pipeline_use(VkCommandBuffer command_buffer, VkPipeline pipeline)
{
vkCmdBindPipeline(command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
}
// @todo Instead of passing the shaders one by one, pass one array called ShaderStage* shader_stages
// This way we can handle this more dynamic
VkPipeline pipeline_make(
VkPipeline gpuapi_pipeline_make(
VkDevice device, VkRenderPass render_pass, VkPipelineLayout* __restrict pipeline_layout, VkPipeline* __restrict pipeline,
VkDescriptorSetLayout* descriptor_set_layouts,
VkShaderModule vertex_shader, VkShaderModule fragment_shader,
@ -301,7 +301,7 @@ VkPipeline pipeline_make(
VkResult result;
if ((result = vkCreatePipelineLayout(device, &pipeline_info_layout, NULL, pipeline_layout)) != VK_SUCCESS) {
LOG_FORMAT_1("Vulkan vkCreatePipelineLayout: %d", {{LOG_DATA_INT32, (int32 *) &result}});
LOG_1("Vulkan vkCreatePipelineLayout: %d", {{LOG_DATA_INT32, (int32 *) &result}});
ASSERT_SIMPLE(false);
return NULL;
@ -324,7 +324,7 @@ VkPipeline pipeline_make(
pipeline_info.basePipelineHandle = VK_NULL_HANDLE;
if ((result = vkCreateGraphicsPipelines(device, VK_NULL_HANDLE, 1, &pipeline_info, NULL, pipeline)) != VK_SUCCESS) {
LOG_FORMAT_1("Vulkan vkCreateGraphicsPipelines: %d", {{LOG_DATA_INT32, (int32 *) &result}});
LOG_1("Vulkan vkCreateGraphicsPipelines: %d", {{LOG_DATA_INT32, (int32 *) &result}});
ASSERT_SIMPLE(false);
return NULL;
@ -358,7 +358,7 @@ void gpuapi_descriptor_set_layout_create(
VkResult result;
if ((result = vkCreateDescriptorSetLayout(device, &layout_info, NULL, descriptor_set_layout)) != VK_SUCCESS) {
LOG_FORMAT_1("Vulkan vkCreateDescriptorSetLayout: %d", {{LOG_DATA_INT32, (int32 *) &result}});
LOG_1("Vulkan vkCreateDescriptorSetLayout: %d", {{LOG_DATA_INT32, (int32 *) &result}});
ASSERT_SIMPLE(false);
}
}
@ -390,7 +390,7 @@ void vulkan_descriptor_pool_create(
VkResult result;
if ((result = vkCreateDescriptorPool(device, &poolInfo, NULL, descriptor_pool)) != VK_SUCCESS) {
LOG_FORMAT_1("Vulkan vkCreateDescriptorPool: %d", {{LOG_DATA_INT32, (int32 *) &result}});
LOG_1("Vulkan vkCreateDescriptorPool: %d", {{LOG_DATA_INT32, (int32 *) &result}});
ASSERT_SIMPLE(false);
}
}
@ -417,7 +417,7 @@ void vulkan_descriptor_sets_create(
VkResult result;
if ((result = vkAllocateDescriptorSets(device, &alloc_info, descriptor_sets)) != VK_SUCCESS) {
LOG_FORMAT_1("Vulkan vkAllocateDescriptorSets: %d", {{LOG_DATA_INT32, (int32 *) &result}});
LOG_1("Vulkan vkAllocateDescriptorSets: %d", {{LOG_DATA_INT32, (int32 *) &result}});
ASSERT_SIMPLE(false);
return;

View File

@ -31,17 +31,18 @@
#include "../../log/Stats.h"
#include "../../log/PerformanceProfiler.h"
#include "../../memory/RingMemory.h"
#include "../../compiler/CompilerUtils.h"
#include "ShaderUtils.h"
#include "FramesInFlightContainer.h"
#if DEBUG
#define ASSERT_GPU_API(x) \
do { \
VkResult err = (x); \
if (err) { \
LOG_FORMAT_1("Vulkan error: %d", {{LOG_DATA_INT32, (int32 *) &err}}); \
ASSERT_SIMPLE(false); \
} \
#define ASSERT_GPU_API(x) \
do { \
VkResult err = (x); \
if (err) { \
LOG_1("Vulkan error: %d", {{LOG_DATA_INT32, (int32 *) &err}}); \
ASSERT_SIMPLE(false); \
} \
} while (0)
#else
#define ASSERT_GPU_API(x) (x)
@ -68,7 +69,11 @@ struct VulkanSwapChainSupportDetails {
};
inline
void change_viewport(int32 width, int32 height, VkCommandBuffer command_buffer, VkExtent2D swapchain_extent, int32 offset_x = 0, int32 offset_y = 0)
void change_viewport(
int32 width, int32 height,
VkCommandBuffer command_buffer, VkExtent2D swapchain_extent,
int32 offset_x = 0, int32 offset_y = 0
)
{
VkViewport viewport = {};
viewport.x = (f32) offset_x;
@ -178,7 +183,7 @@ void vulkan_instance_create(
if (validation_layer_count
&& (err = vulkan_check_validation_layer_support(validation_layers, validation_layer_count, ring))
) {
LOG_FORMAT_1("Vulkan validation_layer missing: %d", {{LOG_DATA_CHAR_STR, (void *) validation_layers[-err - 1]}});
LOG_1("Vulkan validation_layer missing: %d", {{LOG_DATA_CHAR_STR, (void *) validation_layers[-err - 1]}});
ASSERT_SIMPLE(false);
return;
@ -187,7 +192,7 @@ void vulkan_instance_create(
if (extension_count
&& (err = vulkan_check_extension_support(extensions, extension_count, ring))
) {
LOG_FORMAT_1("Vulkan extension missing: %d", {{LOG_DATA_CHAR_STR, (void *) extensions[-err - 1]}});
LOG_1("Vulkan extension missing: %d", {{LOG_DATA_CHAR_STR, (void *) extensions[-err - 1]}});
ASSERT_SIMPLE(false);
return;
@ -224,7 +229,7 @@ void vulkan_instance_create(
VkResult result;
if ((result = vkCreateInstance(&create_info, NULL, instance)) != VK_SUCCESS) {
LOG_FORMAT_1("Vulkan vkCreateInstance: %d", {{LOG_DATA_INT32, (int32 *) &result}});
LOG_1("Vulkan vkCreateInstance: %d", {{LOG_DATA_INT32, (int32 *) &result}});
ASSERT_SIMPLE(false);
}
}
@ -240,7 +245,7 @@ void vulkan_surface_create(VkInstance instance, VkSurfaceKHR* surface, Window* w
VkResult result;
if ((result = vkCreateWin32SurfaceKHR(instance, &surface_create_info, NULL, surface)) != VK_SUCCESS) {
LOG_FORMAT_1("Vulkan vkCreateWin32SurfaceKHR: %d", {{LOG_DATA_INT32, (int32 *) &result}});
LOG_1("Vulkan vkCreateWin32SurfaceKHR: %d", {{LOG_DATA_INT32, (int32 *) &result}});
return;
}
#elif __linux__
@ -310,7 +315,7 @@ VulkanQueueFamilyIndices vulkan_find_queue_families(VkPhysicalDevice physical_de
VkResult result;
if ((result = vkGetPhysicalDeviceSurfaceSupportKHR(physical_device, i, surface, &present_support)) != VK_SUCCESS) {
LOG_FORMAT_1("Vulkan vkGetPhysicalDeviceSurfaceSupportKHR: %d", {{LOG_DATA_INT32, (int32 *) &result}});
LOG_1("Vulkan vkGetPhysicalDeviceSurfaceSupportKHR: %d", {{LOG_DATA_INT32, (int32 *) &result}});
ASSERT_SIMPLE(false);
return indices;
@ -437,7 +442,7 @@ void gpuapi_create_logical_device(
VkResult result;
if ((result = vkCreateDevice(physical_device, &create_info, NULL, device)) != VK_SUCCESS) {
LOG_FORMAT_1("Vulkan vkCreateDevice: %d", {{LOG_DATA_INT32, (int32 *) &result}});
LOG_1("Vulkan vkCreateDevice: %d", {{LOG_DATA_INT32, (int32 *) &result}});
ASSERT_SIMPLE(false);
}
@ -523,7 +528,7 @@ void gpuapi_swapchain_create(
VkResult result;
if ((result = vkCreateSwapchainKHR(device, &create_info, NULL, swapchain)) != VK_SUCCESS) {
LOG_FORMAT_1("Vulkan vkCreateSwapchainKHR: %d", {{LOG_DATA_INT32, (int32 *) &result}});
LOG_1("Vulkan vkCreateSwapchainKHR: %d", {{LOG_DATA_INT32, (int32 *) &result}});
ASSERT_SIMPLE(false);
return;
@ -585,7 +590,7 @@ void vulkan_image_views_create(
create_info.subresourceRange.layerCount = 1;
if ((result = vkCreateImageView(device, &create_info, NULL, &swapchain_image_views[i])) != VK_SUCCESS) {
LOG_FORMAT_1("Vulkan vkCreateImageView: %d", {{LOG_DATA_INT32, (int32 *) &result}});
LOG_1("Vulkan vkCreateImageView: %d", {{LOG_DATA_INT32, (int32 *) &result}});
ASSERT_SIMPLE(false);
}
}
@ -632,7 +637,7 @@ void vulkan_render_pass_create(
VkResult result;
if ((result = vkCreateRenderPass(device, &render_pass_info, NULL, render_pass)) != VK_SUCCESS) {
LOG_FORMAT_1("Vulkan vkCreateRenderPass: %d", {{LOG_DATA_INT32, (int32 *) &result}});
LOG_1("Vulkan vkCreateRenderPass: %d", {{LOG_DATA_INT32, (int32 *) &result}});
ASSERT_SIMPLE(false);
}
}
@ -660,7 +665,7 @@ void vulkan_framebuffer_create(
framebufferInfo.layers = 1;
if ((result = vkCreateFramebuffer(device, &framebufferInfo, NULL, &framebuffers[i])) != VK_SUCCESS) {
LOG_FORMAT_1("Vulkan vkCreateFramebuffer: %d", {{LOG_DATA_INT32, (int32 *) &result}});
LOG_1("Vulkan vkCreateFramebuffer: %d", {{LOG_DATA_INT32, (int32 *) &result}});
ASSERT_SIMPLE(false);
}
}
@ -679,12 +684,12 @@ void vulkan_command_pool_create(
VkResult result;
if ((result = vkCreateCommandPool(device, &pool_info, NULL, command_pool)) != VK_SUCCESS) {
LOG_FORMAT_1("Vulkan vkCreateCommandPool: %d", {{LOG_DATA_INT32, (int32 *) &result}});
LOG_1("Vulkan vkCreateCommandPool: %d", {{LOG_DATA_INT32, (int32 *) &result}});
ASSERT_SIMPLE(false);
}
}
void vulkan_command_buffers_create(VkDevice device, VkCommandPool command_pool, VkCommandBuffer* command_buffers, uint32 command_buffer_count)
void gpuapi_command_buffer_create(VkDevice device, VkCommandPool command_pool, VkCommandBuffer* command_buffers, uint32 command_buffer_count)
{
VkCommandBufferAllocateInfo alloc_info = {};
alloc_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
@ -694,7 +699,7 @@ void vulkan_command_buffers_create(VkDevice device, VkCommandPool command_pool,
VkResult result;
if ((result = vkAllocateCommandBuffers(device, &alloc_info, command_buffers)) != VK_SUCCESS) {
LOG_FORMAT_1("Vulkan vkAllocateCommandBuffers: %d", {{LOG_DATA_INT32, (int32 *) &result}});
LOG_1("Vulkan vkAllocateCommandBuffers: %d", {{LOG_DATA_INT32, (int32 *) &result}});
ASSERT_SIMPLE(false);
}
}
@ -716,7 +721,7 @@ void vulkan_sync_objects_create(
|| (result = vkCreateSemaphore(device, &semaphore_info, NULL, &frames_in_flight->render_finished_semaphores[i])) != VK_SUCCESS
|| (result = vkCreateFence(device, &fence_info, NULL, &frames_in_flight->fences[i])) != VK_SUCCESS
) {
LOG_FORMAT_1("Vulkan vulkan_sync_objects_create: %d", {{LOG_DATA_INT32, (int32 *) &result}});
LOG_1("Vulkan vulkan_sync_objects_create: %d", {{LOG_DATA_INT32, (int32 *) &result}});
ASSERT_SIMPLE(false);
}
}
@ -842,18 +847,52 @@ void vulkan_transition_image_layout(VkCommandBuffer command_buffer, VkImage imag
);
}
// @todo replace references with pointers
static
VkFormat gpuapi_texture_format(byte settings)
{
if ((settings & IMAGE_SETTING_CHANNEL_4_SIZE)) {
switch (settings & IMAGE_SETTING_CHANNEL_COUNT) {
case 1:
return VK_FORMAT_R32_SFLOAT;
case 2:
return VK_FORMAT_R32G32_SFLOAT;
case 3:
return VK_FORMAT_R32G32B32_SFLOAT;
case 4:
return VK_FORMAT_R32G32B32A32_SFLOAT;
default:
UNREACHABLE();
}
} else {
switch (settings & IMAGE_SETTING_CHANNEL_COUNT) {
case 1:
return VK_FORMAT_R8_SRGB;
case 2:
return VK_FORMAT_R8G8_SRGB;
case 3:
return VK_FORMAT_R8G8B8_SRGB;
case 4:
return VK_FORMAT_R8G8B8A8_SRGB;
default:
UNREACHABLE();
}
}
}
// @performance Sometimes we want to upload multiple textures in one go (more performant). Allow that or don't use this function in that case.
void load_texture_to_gpu(
VkDevice device, VkPhysicalDevice physical_device,
VkCommandPool command_pool, VkQueue queue,
VkImage& texture_image, VkDeviceMemory& texture_image_memory, VkImageView& texture_image_view, VkSampler& texture_sampler,
VkImage* texture_image, VkDeviceMemory* texture_image_memory, VkImageView* texture_image_view, VkSampler* texture_sampler,
const Texture* texture)
{
VkFormat textureFormat = gpuapi_texture_format(texture->image.image_settings);
// Create the Vulkan image
VkImageCreateInfo image_info = {};
image_info.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO;
image_info.imageType = VK_IMAGE_TYPE_2D;
image_info.format = VK_FORMAT_R8G8B8A8_SRGB;
image_info.format = textureFormat;
image_info.extent.width = texture->image.width;
image_info.extent.height = texture->image.height;
image_info.extent.depth = 1;
@ -865,19 +904,19 @@ void load_texture_to_gpu(
image_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
image_info.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
ASSERT_GPU_API(vkCreateImage(device, &image_info, NULL, &texture_image));
ASSERT_GPU_API(vkCreateImage(device, &image_info, NULL, texture_image));
// Allocate memory for the image
VkMemoryRequirements memRequirements;
vkGetImageMemoryRequirements(device, texture_image, &memRequirements);
vkGetImageMemoryRequirements(device, *texture_image, &memRequirements);
VkMemoryAllocateInfo allocInfo = {};
allocInfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
allocInfo.allocationSize = memRequirements.size;
allocInfo.memoryTypeIndex = vulkan_find_memory_type(physical_device, memRequirements.memoryTypeBits, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
ASSERT_GPU_API(vkAllocateMemory(device, &allocInfo, NULL, &texture_image_memory));
ASSERT_GPU_API(vkBindImageMemory(device, texture_image, texture_image_memory, 0));
ASSERT_GPU_API(vkAllocateMemory(device, &allocInfo, NULL, texture_image_memory));
ASSERT_GPU_API(vkBindImageMemory(device, *texture_image, *texture_image_memory, 0));
int32 image_size = image_pixel_size_from_type(texture->image.image_settings) * texture->image.width * texture->image.height;
@ -894,10 +933,10 @@ void load_texture_to_gpu(
// Transition the image layout
VkCommandBuffer command_buffer;
vulkan_command_buffers_create(device, command_pool, &command_buffer, 1);
gpuapi_command_buffer_create(device, command_pool, &command_buffer, 1);
vulkan_single_commands_begin(command_buffer);
vulkan_transition_image_layout(command_buffer, texture_image, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
vulkan_transition_image_layout(command_buffer, *texture_image, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
vulkan_single_commands_end(queue, command_buffer);
// Copy data from the staging buffer to the image
@ -910,13 +949,13 @@ void load_texture_to_gpu(
region.imageSubresource.layerCount = 1;
region.imageExtent = {texture->image.width, texture->image.height, 1};
vkCmdCopyBufferToImage(command_buffer, staging_buffer, texture_image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &region);
vkCmdCopyBufferToImage(command_buffer, staging_buffer, *texture_image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &region);
vulkan_single_commands_end(queue, command_buffer);
// Transition the image layout for shader access
vulkan_command_buffer_reset(command_buffer);
vulkan_single_commands_begin(command_buffer);
vulkan_transition_image_layout(command_buffer, texture_image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
vulkan_transition_image_layout(command_buffer, *texture_image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
vulkan_single_commands_end(queue, command_buffer);
vulkan_single_commands_free(device, command_pool, command_buffer);
@ -928,16 +967,16 @@ void load_texture_to_gpu(
// Create an image view
VkImageViewCreateInfo view_info = {};
view_info.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO;
view_info.image = texture_image;
view_info.image = *texture_image;
view_info.viewType = VK_IMAGE_VIEW_TYPE_2D;
view_info.format = VK_FORMAT_R8G8B8A8_SRGB;
view_info.format = textureFormat;
view_info.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
view_info.subresourceRange.baseMipLevel = 0;
view_info.subresourceRange.levelCount = 1;
view_info.subresourceRange.baseArrayLayer = 0;
view_info.subresourceRange.layerCount = 1;
ASSERT_GPU_API(vkCreateImageView(device, &view_info, NULL, &texture_image_view));
ASSERT_GPU_API(vkCreateImageView(device, &view_info, NULL, texture_image_view));
// Create a sampler
VkPhysicalDeviceProperties properties = {};
@ -958,14 +997,14 @@ void load_texture_to_gpu(
sampler_info.compareOp = VK_COMPARE_OP_ALWAYS;
sampler_info.mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR;
ASSERT_GPU_API(vkCreateSampler(device, &sampler_info, NULL, &texture_sampler));
ASSERT_GPU_API(vkCreateSampler(device, &sampler_info, NULL, texture_sampler));
}
// @todo Rename to same name as opengl (or rename opengl obviously)
void vulkan_vertex_buffer_update(
void gpuapi_vertex_buffer_update(
VkDevice device, VkPhysicalDevice physical_device, VkCommandPool command_pool, VkQueue queue,
VkBuffer* vertexBuffer,
const void* __restrict vertices, int32 vertex_size, int32 vertex_count
VkBuffer* vertex_buffer,
const void* __restrict vertices, int32 vertex_size, int32 vertex_count, int32 offset = 0
)
{
VkDeviceSize bufferSize = vertex_size * vertex_count;
@ -986,12 +1025,14 @@ void vulkan_vertex_buffer_update(
vkUnmapMemory(device, stagingBufferMemory);
VkCommandBuffer commandBuffer;
vulkan_command_buffers_create(device, command_pool, &commandBuffer, 1);
gpuapi_command_buffer_create(device, command_pool, &commandBuffer, 1);
vulkan_single_commands_begin(commandBuffer);
VkBufferCopy copyRegion = {};
copyRegion.srcOffset = offset;
copyRegion.dstOffset = offset;
copyRegion.size = bufferSize;
vkCmdCopyBuffer(commandBuffer, stagingBuffer, *vertexBuffer, 1, &copyRegion);
vkCmdCopyBuffer(commandBuffer, stagingBuffer, *vertex_buffer, 1, &copyRegion);
vulkan_single_commands_end(queue, commandBuffer);
vulkan_single_commands_free(device, command_pool, commandBuffer);
@ -999,12 +1040,12 @@ void vulkan_vertex_buffer_update(
vkDestroyBuffer(device, stagingBuffer, NULL);
vkFreeMemory(device, stagingBufferMemory, NULL);
LOG_INCREMENT_BY(DEBUG_COUNTER_GPU_VERTEX_UPLOAD, vertex_size * vertex_count);
LOG_INCREMENT_BY(DEBUG_COUNTER_GPU_VERTEX_UPLOAD, bufferSize - offset);
}
void vulkan_vertex_buffer_create(
void gpuapi_vertex_buffer_create(
VkDevice device, VkPhysicalDevice physical_device, VkCommandPool command_pool, VkQueue queue,
VkBuffer* vertexBuffer, VkDeviceMemory vertexBufferMemory,
VkBuffer* vertex_buffer, VkDeviceMemory vertex_bufferMemory,
const void* __restrict vertices, int32 vertex_size, int32 vertex_count
)
{
@ -1031,18 +1072,18 @@ void vulkan_vertex_buffer_create(
bufferSize,
VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
*vertexBuffer, vertexBufferMemory
*vertex_buffer, vertex_bufferMemory
);
// Copy buffer
// @performance Would it make sense to use a "global" temp buffer for that? If yes, we only need to reset
VkCommandBuffer commandBuffer;
vulkan_command_buffers_create(device, command_pool, &commandBuffer, 1);
gpuapi_command_buffer_create(device, command_pool, &commandBuffer, 1);
vulkan_single_commands_begin(commandBuffer);
VkBufferCopy copyRegion = {};
copyRegion.size = bufferSize;
vkCmdCopyBuffer(commandBuffer, stagingBuffer, *vertexBuffer, 1, &copyRegion);
vkCmdCopyBuffer(commandBuffer, stagingBuffer, *vertex_buffer, 1, &copyRegion);
vulkan_single_commands_end(queue, commandBuffer);
// @todo if we change behaviour according to the comment above we don't need this
@ -1084,7 +1125,7 @@ void vulkan_index_buffer_create(
// Copy buffer
VkCommandBuffer commandBuffer;
vulkan_command_buffers_create(device, command_pool, &commandBuffer, 1);
gpuapi_command_buffer_create(device, command_pool, &commandBuffer, 1);
vulkan_single_commands_begin(commandBuffer);
VkBufferCopy copyRegion = {};
@ -1101,7 +1142,7 @@ void vulkan_index_buffer_create(
// @todo We also need a free function (unmap buffer)
void vulkan_uniform_buffers_create(
void gpuapi_uniform_buffers_create(
VkDevice device, VkPhysicalDevice physical_device,
VkBuffer* __restrict uniform_buffers, VkDeviceMemory* __restrict uniform_buffers_memory, void** __restrict uniform_buffers_mapped,
size_t uniform_buffer_object_size,
@ -1109,6 +1150,7 @@ void vulkan_uniform_buffers_create(
)
{
// e.g. uniform_buffer_object_size = sizeof(struct {model; view; proj};)
// @question Do I really need one uniform_buffer per frames_in_flight? This seems VERY inefficient
VkDeviceSize bufferSize = uniform_buffer_object_size;
for (uint32 i = 0; i < frames_in_flight; ++i) {
vulkan_buffer_create(

211
log/Log.h
View File

@ -182,17 +182,17 @@ void log(const char* str, const char* file, const char* function, int32 line)
void log(const char* format, LogDataArray data, const char* file, const char* function, int32 line)
{
ASSERT_SIMPLE(str_length(format) + str_length(file) + str_length(function) + 50 < MAX_LOG_LENGTH);
if (!_log_memory) {
return;
}
if (data.data[0].type == LOG_DATA_VOID) {
if (data.data[0].type == LOG_DATA_VOID || data.data[0].type == LOG_DATA_NONE) {
log(format, file, function, line);
return;
}
ASSERT_SIMPLE(str_length(format) + str_length(file) + str_length(function) + 50 < MAX_LOG_LENGTH);
LogMessage* msg = (LogMessage *) log_get_memory();
msg->file = file;
msg->function = function;
@ -205,7 +205,7 @@ void log(const char* format, LogDataArray data, const char* file, const char* fu
str_copy_short(msg->message, format);
for (int32 i = 0; i < LOG_DATA_ARRAY; ++i) {
if (data.data[i].type == LOG_DATA_VOID) {
if (data.data[i].type == LOG_DATA_VOID || data.data[i].type == LOG_DATA_NONE) {
break;
}
@ -262,36 +262,20 @@ void log(const char* format, LogDataArray data, const char* file, const char* fu
#define LOG_TO_FILE() log_to_file()
#if LOG_LEVEL == 4
// Complete logging
#define LOG_1(str) log((str), __FILE__, __func__, __LINE__)
#define LOG_2(str) log((str), __FILE__, __func__, __LINE__)
#define LOG_3(str) log((str), __FILE__, __func__, __LINE__)
#define LOG_4(str) log((str), __FILE__, __func__, __LINE__)
#define LOG_1(format, ...) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
#define LOG_2(format, ...) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
#define LOG_3(format, ...) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
#define LOG_4(format, ...) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
#define LOG_FORMAT_1(format, ...) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
#define LOG_FORMAT_2(format, ...) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
#define LOG_FORMAT_3(format, ...) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
#define LOG_FORMAT_4(format, ...) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
#define LOG_TRUE_1(should_log, format, ...) if ((should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
#define LOG_TRUE_2(should_log, format, ...) if ((should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
#define LOG_TRUE_3(should_log, format, ...) if ((should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
#define LOG_TRUE_4(should_log, format, ...) if ((should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
#define LOG_TRUE_1(should_log, str) if ((should_log)) log((str), __FILE__, __func__, __LINE__)
#define LOG_TRUE_2(should_log, str) if ((should_log)) log((str), __FILE__, __func__, __LINE__)
#define LOG_TRUE_3(should_log, str) if ((should_log)) log((str), __FILE__, __func__, __LINE__)
#define LOG_TRUE_4(should_log, str) if ((should_log)) log((str), __FILE__, __func__, __LINE__)
#define LOG_FALSE_1(should_log, str) if (!(should_log)) log((str), __FILE__, __func__, __LINE__)
#define LOG_FALSE_2(should_log, str) if (!(should_log)) log((str), __FILE__, __func__, __LINE__)
#define LOG_FALSE_3(should_log, str) if (!(should_log)) log((str), __FILE__, __func__, __LINE__)
#define LOG_FALSE_4(should_log, str) if (!(should_log)) log((str), __FILE__, __func__, __LINE__)
#define LOG_FORMAT_TRUE_1(should_log, format, ...) if ((should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
#define LOG_FORMAT_TRUE_2(should_log, format, ...) if ((should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
#define LOG_FORMAT_TRUE_3(should_log, format, ...) if ((should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
#define LOG_FORMAT_TRUE_4(should_log, format, ...) if ((should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
#define LOG_FORMAT_FALSE_1(should_log, format, ...) if (!(should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
#define LOG_FORMAT_FALSE_2(should_log, format, ...) if (!(should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
#define LOG_FORMAT_FALSE_3(should_log, format, ...) if (!(should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
#define LOG_FORMAT_FALSE_4(should_log, format, ...) if (!(should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
#define LOG_FALSE_1(should_log, format, ...) if (!(should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
#define LOG_FALSE_2(should_log, format, ...) if (!(should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
#define LOG_FALSE_3(should_log, format, ...) if (!(should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
#define LOG_FALSE_4(should_log, format, ...) if (!(should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
#define LOG_IF_1(expr, str_succeeded, str_failed) if ((expr)) { log((str_succeeded), __FILE__, __func__, __LINE__); } else { log((str_succeeded), __FILE__, __func__, __LINE__); }
#define LOG_IF_2(expr, str_succeeded, str_failed) if ((expr)) { log((str_succeeded), __FILE__, __func__, __LINE__); } else { log((str_succeeded), __FILE__, __func__, __LINE__); }
@ -301,7 +285,7 @@ void log(const char* format, LogDataArray data, const char* file, const char* fu
#define LOG_CYCLE_START(var_name) uint64 var_name##_start_time = intrin_timestamp_counter()
#define LOG_CYCLE_END(var_name, format) \
uint64 var_name##_duration = (uint64) (intrin_timestamp_counter() - var_name##_start_time); \
LOG_FORMAT_1((format), {{LOG_DATA_UINT64, &var_name##_duration}})
LOG_1((format), {{LOG_DATA_UINT64, &var_name##_duration}})
// Only intended for manual debugging
// Of course a developer could always use printf but by providing this option,
@ -315,35 +299,20 @@ void log(const char* format, LogDataArray data, const char* file, const char* fu
compiler_debug_print((debug_str)); \
})
#elif LOG_LEVEL == 3
#define LOG_1(str) log((str), __FILE__, __func__, __LINE__)
#define LOG_2(str) log((str), __FILE__, __func__, __LINE__)
#define LOG_3(str) log((str), __FILE__, __func__, __LINE__)
#define LOG_4(str) ((void) 0)
#define LOG_1(format, ...) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
#define LOG_2(format, ...) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
#define LOG_3(format, ...) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
#define LOG_4(format, ...) ((void) 0)
#define LOG_FORMAT_1(format, ...) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
#define LOG_FORMAT_2(format, ...) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
#define LOG_FORMAT_3(format, ...) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
#define LOG_FORMAT_4(format, ...) ((void) 0)
#define LOG_TRUE_1(should_log, format, ...) if ((should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
#define LOG_TRUE_2(should_log, format, ...) if ((should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
#define LOG_TRUE_3(should_log, format, ...) if ((should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
#define LOG_TRUE_4(should_log, format, ...) ((void) 0)
#define LOG_TRUE_1(should_log, str) if ((should_log)) log((str), __FILE__, __func__, __LINE__)
#define LOG_TRUE_2(should_log, str) if ((should_log)) log((str), __FILE__, __func__, __LINE__)
#define LOG_TRUE_3(should_log, str) if ((should_log)) log((str), __FILE__, __func__, __LINE__)
#define LOG_TRUE_4(should_log, str) ((void) 0)
#define LOG_FALSE_1(should_log, str) if (!(should_log)) log((str), __FILE__, __func__, __LINE__)
#define LOG_FALSE_2(should_log, str) if (!(should_log)) log((str), __FILE__, __func__, __LINE__)
#define LOG_FALSE_3(should_log, str) if (!(should_log)) log((str), __FILE__, __func__, __LINE__)
#define LOG_FALSE_4(should_log, str) ((void) 0)
#define LOG_FORMAT_TRUE_1(should_log, format, ...) if ((should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
#define LOG_FORMAT_TRUE_2(should_log, format, ...) if ((should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
#define LOG_FORMAT_TRUE_3(should_log, format, ...) if ((should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
#define LOG_FORMAT_TRUE_4(should_log, format, ...) ((void) 0)
#define LOG_FORMAT_FALSE_1(should_log, format, ...) if (!(should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
#define LOG_FORMAT_FALSE_2(should_log, format, ...) if (!(should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
#define LOG_FORMAT_FALSE_3(should_log, format, ...) if (!(should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
#define LOG_FORMAT_FALSE_4(should_log, format, ...) ((void) 0)
#define LOG_FALSE_1(should_log, format, ...) if (!(should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
#define LOG_FALSE_2(should_log, format, ...) if (!(should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
#define LOG_FALSE_3(should_log, format, ...) if (!(should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
#define LOG_FALSE_4(should_log, format, ...) ((void) 0)
#define LOG_IF_1(expr, str_succeeded, str_failed) if ((expr)) { log((str_succeeded), __FILE__, __func__, __LINE__); } else { log((str_succeeded), __FILE__, __func__, __LINE__); }
#define LOG_IF_2(expr, str_succeeded, str_failed) if ((expr)) { log((str_succeeded), __FILE__, __func__, __LINE__); } else { log((str_succeeded), __FILE__, __func__, __LINE__); }
@ -354,40 +323,25 @@ void log(const char* format, LogDataArray data, const char* file, const char* fu
#define LOG_CYCLE_START(var_name) uint64 var_name##_start_time = intrin_timestamp_counter()
#define LOG_CYCLE_END(var_name, format) \
uint64 var_name##_duration = (uint64) (intrin_timestamp_counter() - var_name##_start_time); \
LOG_FORMAT_1((format), {{LOG_DATA_UINT64, &var_name##_duration}})
LOG_1((format), {{LOG_DATA_UINT64, &var_name##_duration}})
#define DEBUG_VERBOSE(str) ((void) 0)
#define DEBUG_FORMAT_VERBOSE(str, ...) ((void) 0)
#elif LOG_LEVEL == 2
#define LOG_1(str) log((str), __FILE__, __func__, __LINE__)
#define LOG_2(str) log((str), __FILE__, __func__, __LINE__)
#define LOG_3(str) ((void) 0)
#define LOG_4(str) ((void) 0)
#define LOG_1(format, ...) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
#define LOG_2(format, ...) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
#define LOG_3(format, ...) ((void) 0)
#define LOG_4(format, ...) ((void) 0)
#define LOG_FORMAT_1(format, ...) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
#define LOG_FORMAT_2(format, ...) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
#define LOG_FORMAT_3(format, ...) ((void) 0)
#define LOG_FORMAT_4(format, ...) ((void) 0)
#define LOG_TRUE_1(should_log, format, ...) if ((should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
#define LOG_TRUE_2(should_log, format, ...) if ((should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
#define LOG_TRUE_3(should_log, format, ...) ((void) 0)
#define LOG_TRUE_4(should_log, format, ...) ((void) 0)
#define LOG_TRUE_1(should_log, str) if ((should_log)) log((str), __FILE__, __func__, __LINE__)
#define LOG_TRUE_2(should_log, str) if ((should_log)) log((str), __FILE__, __func__, __LINE__)
#define LOG_TRUE_3(should_log, str) ((void) 0)
#define LOG_TRUE_4(should_log, str) ((void) 0)
#define LOG_FALSE_1(should_log, str) if (!(should_log)) log((str), __FILE__, __func__, __LINE__)
#define LOG_FALSE_2(should_log, str) if (!(should_log)) log((str), __FILE__, __func__, __LINE__)
#define LOG_FALSE_3(should_log, str) ((void) 0)
#define LOG_FALSE_4(should_log, str) ((void) 0)
#define LOG_FORMAT_TRUE_1(should_log, format, ...) if ((should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
#define LOG_FORMAT_TRUE_2(should_log, format, ...) if ((should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
#define LOG_FORMAT_TRUE_3(should_log, format, ...) ((void) 0)
#define LOG_FORMAT_TRUE_4(should_log, format, ...) ((void) 0)
#define LOG_FORMAT_FALSE_1(should_log, format, ...) if (!(should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
#define LOG_FORMAT_FALSE_2(should_log, format, ...) if (!(should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
#define LOG_FORMAT_FALSE_3(should_log, format, ...) ((void) 0)
#define LOG_FORMAT_FALSE_4(should_log, format, ...) ((void) 0)
#define LOG_FALSE_1(should_log, format, ...) if (!(should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
#define LOG_FALSE_2(should_log, format, ...) if (!(should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
#define LOG_FALSE_3(should_log, format, ...) ((void) 0)
#define LOG_FALSE_4(should_log, format, ...) ((void) 0)
#define LOG_IF_1(expr, str_succeeded, str_failed) if ((expr)) { log((str_succeeded), __FILE__, __func__, __LINE__); } else { log((str_succeeded), __FILE__, __func__, __LINE__); }
#define LOG_IF_2(expr, str_succeeded, str_failed) if ((expr)) { log((str_succeeded), __FILE__, __func__, __LINE__); } else { log((str_succeeded), __FILE__, __func__, __LINE__); }
@ -398,40 +352,25 @@ void log(const char* format, LogDataArray data, const char* file, const char* fu
#define LOG_CYCLE_START(var_name) uint64 var_name##_start_time = intrin_timestamp_counter()
#define LOG_CYCLE_END(var_name, format) \
uint64 var_name##_duration = (uint64) (intrin_timestamp_counter() - var_name##_start_time); \
LOG_FORMAT_1((format), {{LOG_DATA_UINT64, &var_name##_duration}})
LOG_1((format), {{LOG_DATA_UINT64, &var_name##_duration}})
#define DEBUG_VERBOSE(str) ((void) 0)
#define DEBUG_FORMAT_VERBOSE(str, ...) ((void) 0)
#elif LOG_LEVEL == 1
#define LOG_1(str) log((str), __FILE__, __func__, __LINE__)
#define LOG_2(str) ((void) 0)
#define LOG_3(str) ((void) 0)
#define LOG_4(str) ((void) 0)
#define LOG_1(format, ...) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
#define LOG_2(format, ...) ((void) 0)
#define LOG_3(format, ...) ((void) 0)
#define LOG_4(format, ...) ((void) 0)
#define LOG_FORMAT_1(format, ...) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
#define LOG_FORMAT_2(format, ...) ((void) 0)
#define LOG_FORMAT_3(format, ...) ((void) 0)
#define LOG_FORMAT_4(format, ...) ((void) 0)
#define LOG_TRUE_1(should_log, format, ...) if ((should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
#define LOG_TRUE_2(should_log, format, ...) ((void) 0)
#define LOG_TRUE_3(should_log, format, ...) ((void) 0)
#define LOG_TRUE_4(should_log, format, ...) ((void) 0)
#define LOG_TRUE_1(should_log, str) if ((should_log)) log((str), __FILE__, __func__, __LINE__)
#define LOG_TRUE_2(should_log, str) ((void) 0)
#define LOG_TRUE_3(should_log, str) ((void) 0)
#define LOG_TRUE_4(should_log, str) ((void) 0)
#define LOG_FALSE_1(should_log, str) if (!(should_log)) log((str), __FILE__, __func__, __LINE__)
#define LOG_FALSE_2(should_log, str) ((void) 0)
#define LOG_FALSE_3(should_log, str) ((void) 0)
#define LOG_FALSE_4(should_log, str) ((void) 0)
#define LOG_FORMAT_TRUE_1(should_log, format, ...) if ((should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
#define LOG_FORMAT_TRUE_2(should_log, format, ...) ((void) 0)
#define LOG_FORMAT_TRUE_3(should_log, format, ...) ((void) 0)
#define LOG_FORMAT_TRUE_4(should_log, format, ...) ((void) 0)
#define LOG_FORMAT_FALSE_1(should_log, format, ...) if (!(should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
#define LOG_FORMAT_FALSE_2(should_log, format, ...) ((void) 0)
#define LOG_FORMAT_FALSE_3(should_log, format, ...) ((void) 0)
#define LOG_FORMAT_FALSE_4(should_log, format, ...) ((void) 0)
#define LOG_FALSE_1(should_log, format, ...) if (!(should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__)
#define LOG_FALSE_2(should_log, format, ...) ((void) 0)
#define LOG_FALSE_3(should_log, format, ...) ((void) 0)
#define LOG_FALSE_4(should_log, format, ...) ((void) 0)
#define LOG_IF_1(expr, str_succeeded, str_failed) if ((expr)) { log((str_succeeded), __FILE__, __func__, __LINE__); } else { log((str_succeeded), __FILE__, __func__, __LINE__); }
// Only logs on failure
@ -445,36 +384,20 @@ void log(const char* format, LogDataArray data, const char* file, const char* fu
#define DEBUG_VERBOSE(str) ((void) 0)
#define DEBUG_FORMAT_VERBOSE(str, ...) ((void) 0)
#elif LOG_LEVEL == 0
// No logging whatsoever
#define LOG_1(str) ((void) 0)
#define LOG_2(str) ((void) 0)
#define LOG_3(str) ((void) 0)
#define LOG_4(str) ((void) 0)
#define LOG_1(format, ...) ((void) 0)
#define LOG_2(format, ...) ((void) 0)
#define LOG_3(format, ...) ((void) 0)
#define LOG_4(format, ...) ((void) 0)
#define LOG_FORMAT_1(format, ...) ((void) 0)
#define LOG_FORMAT_2(format, ...) ((void) 0)
#define LOG_FORMAT_3(format, ...) ((void) 0)
#define LOG_FORMAT_4(format, ...) ((void) 0)
#define LOG_TRUE_1(should_log, format, ...) ((void) 0)
#define LOG_TRUE_2(should_log, format, ...) ((void) 0)
#define LOG_TRUE_3(should_log, format, ...) ((void) 0)
#define LOG_TRUE_4(should_log, format, ...) ((void) 0)
#define LOG_TRUE_1(should_log, str) ((void) 0)
#define LOG_TRUE_2(should_log, str) ((void) 0)
#define LOG_TRUE_3(should_log, str) ((void) 0)
#define LOG_TRUE_4(should_log, str) ((void) 0)
#define LOG_FALSE_1(should_log, str) ((void) 0)
#define LOG_FALSE_2(should_log, str) ((void) 0)
#define LOG_FALSE_3(should_log, str) ((void) 0)
#define LOG_FALSE_4(should_log, str) ((void) 0)
#define LOG_FORMAT_TRUE_1(should_log, format, ...) ((void) 0)
#define LOG_FORMAT_TRUE_2(should_log, format, ...) ((void) 0)
#define LOG_FORMAT_TRUE_3(should_log, format, ...) ((void) 0)
#define LOG_FORMAT_TRUE_4(should_log, format, ...) ((void) 0)
#define LOG_FORMAT_FALSE_1(should_log, format, ...) ((void) 0)
#define LOG_FORMAT_FALSE_2(should_log, format, ...) ((void) 0)
#define LOG_FORMAT_FALSE_3(should_log, format, ...) ((void) 0)
#define LOG_FORMAT_FALSE_4(should_log, format, ...) ((void) 0)
#define LOG_FALSE_1(should_log, format, ...) ((void) 0)
#define LOG_FALSE_2(should_log, format, ...) ((void) 0)
#define LOG_FALSE_3(should_log, format, ...) ((void) 0)
#define LOG_FALSE_4(should_log, format, ...) ((void) 0)
#define LOG_IF_1(expr, str_succeeded, str_failed) ((void) 0)
#define LOG_IF_2(expr, str_succeeded, str_failed) ((void) 0)

View File

@ -152,7 +152,7 @@ struct PerformanceProfiler {
if (this->auto_log) {
if (this->info_msg && this->info_msg[0]) {
LOG_FORMAT_2(
LOG_2(
"-PERF %s (%s): %l cycles",
{
{LOG_DATA_CHAR_STR, (void *) perf->name},
@ -161,7 +161,7 @@ struct PerformanceProfiler {
}
);
} else {
LOG_FORMAT_2(
LOG_2(
"-PERF %s: %l cycles",
{
{LOG_DATA_CHAR_STR, (void *) perf->name},
@ -210,7 +210,7 @@ void performance_profiler_end(int32 id) noexcept
#define PROFILE_SCOPE(id, name) PerformanceProfiler __profile_scope_##__func__##_##__LINE__((id), (name))
#define PROFILE_RESET(id) if(_perf_active && *_perf_active) performance_profiler_reset((id))
#else
#define PROFILE(id) ((void) 0)
#define PROFILE(id, ...) ((void) 0)
#define PROFILE_START(id, name) ((void) 0)
#define PROFILE_END(id) ((void) 0)

View File

@ -15,6 +15,7 @@
#include "../utils/TestUtils.h"
#include "../log/Log.h"
#include "../log/Stats.h"
#include "../log/PerformanceProfiler.h"
#include "../log/DebugMemory.h"
#include "../system/Allocator.h"
@ -35,7 +36,7 @@ void buffer_alloc(BufferMemory* buf, uint64 size, int32 alignment = 64)
{
ASSERT_SIMPLE(size);
PROFILE(PROFILE_BUFFER_ALLOC, NULL, false, true);
LOG_FORMAT_1("Allocating BufferMemory: %n B", {{LOG_DATA_UINT64, &size}});
LOG_1("Allocating BufferMemory: %n B", {{LOG_DATA_UINT64, &size}});
buf->memory = alignment < 2
? (byte *) platform_alloc(size)

View File

@ -17,6 +17,7 @@
#include "../compiler/CompilerUtils.h"
#include "../log/Log.h"
#include "../log/Stats.h"
#include "../log/PerformanceProfiler.h"
#include "../log/DebugMemory.h"
#include "BufferMemory.h"
#include "../system/Allocator.h"
@ -63,7 +64,7 @@ void chunk_alloc(ChunkMemory* buf, uint32 count, uint32 chunk_size, int32 alignm
memset(buf->memory, 0, buf->size);
LOG_FORMAT_1("Allocated ChunkMemory: %n B", {{LOG_DATA_UINT64, &buf->size}});
LOG_1("Allocated ChunkMemory: %n B", {{LOG_DATA_UINT64, &buf->size}});
}
inline
@ -327,7 +328,7 @@ int64 chunk_dump(const ChunkMemory* buf, byte* data)
memcpy(data, buf->memory, buf->size);
data += buf->size;
LOG_FORMAT_1("Dumped ChunkMemory: %n B", {{LOG_DATA_UINT64, (void *) &buf->size}});
LOG_1("Dumped ChunkMemory: %n B", {{LOG_DATA_UINT64, (void *) &buf->size}});
return data - start;
}
@ -362,7 +363,7 @@ int64 chunk_load(ChunkMemory* buf, const byte* data)
buf->free = (uint64 *) (buf->memory + buf->count * buf->chunk_size);
LOG_FORMAT_1("Loaded ChunkMemory: %n B", {{LOG_DATA_UINT64, &buf->size}});
LOG_1("Loaded ChunkMemory: %n B", {{LOG_DATA_UINT64, &buf->size}});
return buf->size;
}

View File

@ -19,6 +19,7 @@
#include "BufferMemory.h"
#include "../log/Log.h"
#include "../log/Stats.h"
#include "../log/PerformanceProfiler.h"
#include "../log/DebugMemory.h"
#include "../thread/Atomic.h"
#include "../thread/Semaphore.h"
@ -48,7 +49,7 @@ void ring_alloc(RingMemory* ring, uint64 size, uint32 alignment = 64)
{
ASSERT_SIMPLE(size);
PROFILE(PROFILE_RING_ALLOC, NULL, false, true);
LOG_FORMAT_1("Allocating RingMemory: %n B", {{LOG_DATA_UINT64, &size}});
LOG_1("Allocating RingMemory: %n B", {{LOG_DATA_UINT64, &size}});
ring->memory = alignment < 2
? (byte *) platform_alloc(size)
@ -62,7 +63,7 @@ void ring_alloc(RingMemory* ring, uint64 size, uint32 alignment = 64)
memset(ring->memory, 0, ring->size);
LOG_FORMAT_1("Allocated RingMemory: %n B", {{LOG_DATA_UINT64, &ring->size}});
LOG_1("Allocated RingMemory: %n B", {{LOG_DATA_UINT64, &ring->size}});
}
inline

View File

@ -88,9 +88,9 @@ void log_stack_trace(CONTEXT *context) {
symbol->MaxNameLen = MAX_SYM_NAME;
if (SymFromAddr(process, address, NULL, symbol)) {
LOG_FORMAT_1("Function: %s - Address: %l", {{LOG_DATA_CHAR_STR, symbol->Name}, {LOG_DATA_INT64, &symbol->Address}});
LOG_1("Function: %s - Address: %l", {{LOG_DATA_CHAR_STR, symbol->Name}, {LOG_DATA_INT64, &symbol->Address}});
} else {
LOG_FORMAT_1("Function: (unknown) - Address: %l", {{LOG_DATA_INT64, &address}});
LOG_1("Function: (unknown) - Address: %l", {{LOG_DATA_INT64, &address}});
}
// Resolve file and line number
@ -99,7 +99,7 @@ void log_stack_trace(CONTEXT *context) {
line.SizeOfStruct = sizeof(IMAGEHLP_LINE64);
if (SymGetLineFromAddr64(process, address, &displacement, &line)) {
LOG_FORMAT_1(" File: %s, Line: %l", {{LOG_DATA_CHAR_STR, line.FileName}, {LOG_DATA_INT64, &line.LineNumber}});
LOG_1(" File: %s, Line: %l", {{LOG_DATA_CHAR_STR, line.FileName}, {LOG_DATA_INT64, &line.LineNumber}});
} else {
LOG_1(" File: (unknown), Line: (unknown)");
}
@ -108,7 +108,7 @@ void log_stack_trace(CONTEXT *context) {
IMAGEHLP_MODULE64 module_info;
module_info.SizeOfStruct = sizeof(IMAGEHLP_MODULE64);
if (SymGetModuleInfo64(process, address, &module_info)) {
LOG_FORMAT_1(" Module: %s", {{LOG_DATA_CHAR_STR, module_info.ModuleName}});
LOG_1(" Module: %s", {{LOG_DATA_CHAR_STR, module_info.ModuleName}});
} else {
LOG_1(" Module: (unknown)");
}

View File

@ -14,30 +14,36 @@
typedef HANDLE sem_t;
inline
void sem_init(sem_t* semaphore, int32 value)
{
*semaphore = CreateSemaphore(NULL, value, MAX_UINT32, NULL);
}
inline
void sem_destroy(sem_t* semaphore)
{
CloseHandle(*semaphore);
}
// decrement if != 0, if = 0 wait
inline
void sem_wait(sem_t* semaphore) {
WaitForSingleObject(*semaphore, INFINITE);
}
inline
int32 sem_timedwait(sem_t* semaphore, uint64 ms) {
return (int32) WaitForSingleObject(*semaphore, (DWORD) ms);
}
inline
int32 sem_trywait(sem_t* semaphore) {
return (int32) WaitForSingleObject(*semaphore, 0);
}
// increment
inline
void sem_post(sem_t* semaphore) {
ReleaseSemaphore(*semaphore, 1, NULL);
}

View File

@ -94,6 +94,7 @@ int32 pthread_mutex_unlock(pthread_mutex_t* mutex)
return 0;
}
// WARNING: We don't support windows events since they are much slower than conditional variables/mutexes
inline
int32 pthread_cond_init(pthread_cond_t* cond, pthread_condattr_t*)
{

View File

@ -124,7 +124,7 @@ struct HashMap {
inline
void hashmap_alloc(HashMap* hm, int32 count, int32 element_size)
{
LOG_FORMAT_1("Allocate HashMap for %n elements with %n B per element", {{LOG_DATA_INT32, &count}, {LOG_DATA_INT32, &element_size}});
LOG_1("Allocate HashMap for %n elements with %n B per element", {{LOG_DATA_INT32, &count}, {LOG_DATA_INT32, &element_size}});
byte* data = (byte *) platform_alloc(
count * (sizeof(uint16) + element_size)
+ CEIL_DIV(count, 64) * sizeof(hm->buf.free)
@ -148,7 +148,7 @@ void hashmap_free(HashMap* hm)
inline
void hashmap_create(HashMap* hm, int32 count, int32 element_size, RingMemory* ring) noexcept
{
LOG_FORMAT_1("Create HashMap for %n elements with %n B per element", {{LOG_DATA_INT32, &count}, {LOG_DATA_INT32, &element_size}});
LOG_1("Create HashMap for %n elements with %n B per element", {{LOG_DATA_INT32, &count}, {LOG_DATA_INT32, &element_size}});
byte* data = ring_get_memory(
ring,
count * (sizeof(uint16) + element_size)
@ -163,7 +163,7 @@ void hashmap_create(HashMap* hm, int32 count, int32 element_size, RingMemory* ri
inline
void hashmap_create(HashMap* hm, int32 count, int32 element_size, BufferMemory* buf) noexcept
{
LOG_FORMAT_1("Create HashMap for %n elements with %n B per element", {{LOG_DATA_INT32, &count}, {LOG_DATA_INT32, &element_size}});
LOG_1("Create HashMap for %n elements with %n B per element", {{LOG_DATA_INT32, &count}, {LOG_DATA_INT32, &element_size}});
byte* data = buffer_get_memory(
buf,
count * (sizeof(uint16) + element_size)
@ -178,7 +178,7 @@ void hashmap_create(HashMap* hm, int32 count, int32 element_size, BufferMemory*
inline
void hashmap_create(HashMap* hm, int32 count, int32 element_size, byte* buf) noexcept
{
LOG_FORMAT_1("Create HashMap for %n elements with %n B per element", {{LOG_DATA_INT32, &count}, {LOG_DATA_INT32, &element_size}});
LOG_1("Create HashMap for %n elements with %n B per element", {{LOG_DATA_INT32, &count}, {LOG_DATA_INT32, &element_size}});
hm->table = (uint16 *) buf;
chunk_init(&hm->buf, buf + sizeof(uint16) * count, count, element_size, 8);
}
@ -797,7 +797,7 @@ int64 hashmap_dump(const HashMap* hm, byte* data, [[maybe_unused]] int32 steps =
// dump free array
memcpy(data, hm->buf.free, sizeof(uint64) * CEIL_DIV(hm->buf.count, 64));
LOG_FORMAT_1("Dumped HashMap: %n B", {{LOG_DATA_UINT64, (void *) &hm->buf.size}});
LOG_1("Dumped HashMap: %n B", {{LOG_DATA_UINT64, (void *) &hm->buf.size}});
return sizeof(hm->buf.count) // hash map count = buffer count
+ hm->buf.count * sizeof(uint16) // table content
@ -851,7 +851,7 @@ int64 hashmap_load(HashMap* hm, const byte* data, [[maybe_unused]] int32 steps =
}
} chunk_iterate_end;
LOG_FORMAT_1("Loaded HashMap: %n B", {{LOG_DATA_UINT64, &hm->buf.size}});
LOG_1("Loaded HashMap: %n B", {{LOG_DATA_UINT64, &hm->buf.size}});
// How many bytes was read from data
return sizeof(hm->buf.count) // hash map count = buffer count

View File

@ -111,7 +111,7 @@ PerfectHashMap* perfect_hashmap_prepare(PerfectHashMap* hm, const char** keys, i
// WARNING: element_size = element size + remaining HashEntry data size
void perfect_hashmap_create(PerfectHashMap* hm, int32 count, int32 element_size, BufferMemory* buf)
{
LOG_FORMAT_1("Create PerfectHashMap for %n elements with %n B per element", {{LOG_DATA_INT32, &count}, {LOG_DATA_INT32, &element_size}});
LOG_1("Create PerfectHashMap for %n elements with %n B per element", {{LOG_DATA_INT32, &count}, {LOG_DATA_INT32, &element_size}});
hm->map_size = count;
hm->entry_size = element_size;
hm->hash_entries = buffer_get_memory(
@ -124,7 +124,7 @@ void perfect_hashmap_create(PerfectHashMap* hm, int32 count, int32 element_size,
// WARNING: element_size = element size + remaining HashEntry data size
void perfect_hashmap_create(PerfectHashMap* hm, int32 count, int32 element_size, byte* buf)
{
LOG_FORMAT_1("Create PerfectHashMap for %n elements with %n B per element", {{LOG_DATA_INT32, &count}, {LOG_DATA_INT32, &element_size}});
LOG_1("Create PerfectHashMap for %n elements with %n B per element", {{LOG_DATA_INT32, &count}, {LOG_DATA_INT32, &element_size}});
hm->map_size = count;
hm->entry_size = element_size;
hm->hash_entries = buf;

View File

@ -9,6 +9,7 @@
#include "utils/BitUtilsTest.cpp"
#include "utils/EndianUtilsTest.cpp"
#include "utils/StringUtilsTest.cpp"
#include "utils/MathUtilsTest.cpp"
#include "utils/UtilsTest.cpp"
#ifdef UBER_TEST
@ -18,8 +19,7 @@
#endif
int main() {
printf("\nStat Tests Asserts Details\n");
printf("========================================================================================================================\n");
TEST_HEADER();
MathEvaluatorTest();
MemoryChunkMemoryTest();
@ -29,16 +29,10 @@ int main() {
UIUIThemeTest();
UtilsBitUtilsTest();
UtilsStringUtilsTest();
UtilsMathUtilsTest();
UtilsUtilsTest();
printf("========================================================================================================================\n");
printf(
"%s %5d (%5d/%5d)\n\n",
_test_global_assert_count ? "[NG]" : "[OK]",
_test_global_count,
_test_global_assert_count - _test_global_assert_error_count,
_test_global_assert_count
);
TEST_FOOTER();
return _test_global_assert_error_count ? 1 : 0;
}

View File

@ -22,6 +22,33 @@ static int32_t _test_global_assert_count = 0;
static int32_t _test_global_assert_error_count = 0;
static int32_t _test_global_count = 0;
static int64_t _test_start;
#define TEST_PROFILING_LOOPS 1000
#define TEST_HEADER() \
int64_t _test_total_start = test_start_time(); \
printf("\nStat Tests Assert(OK/NG) Time(ms) Details\n"); \
printf("========================================================================================================================\n")
#define TEST_FOOTER() \
printf("========================================================================================================================\n"); \
printf( \
"%s %5d (%5d/%5d) %8.0f\n\n", \
_test_global_assert_count ? "[NG]" : "[OK]", \
_test_global_count, \
_test_global_assert_count - _test_global_assert_error_count, \
_test_global_assert_count, \
test_duration_time(_test_total_start) / 1000000)
#ifdef UBER_TEST
#define TEST_INIT_HEADER() (void)0
#define TEST_FINALIZE_FOOTER() (void)0
#else
#define TEST_INIT_HEADER() TEST_HEADER()
#define TEST_FINALIZE_FOOTER() TEST_FOOTER()
#endif
#if _WIN32
#include "../platform/win32/ExceptionHandler.h"
#include <windows.h>
@ -33,12 +60,32 @@ LONG WINAPI test_exception_handler(EXCEPTION_POINTERS *exception_info)
return EXCEPTION_EXECUTE_HANDLER;
}
double test_measure_func_time_ns(void (*func)(void *), void *para)
int64_t test_start_time()
{
LARGE_INTEGER start;
QueryPerformanceCounter(&start);
return start.QuadPart;
}
double test_duration_time(int64_t start)
{
LARGE_INTEGER frequency, end;
QueryPerformanceFrequency(&frequency);
QueryPerformanceCounter(&end);
return (double)(end.QuadPart - start) * 1e9 / frequency.QuadPart;
}
double test_measure_func_time_ns(void (*func)(volatile void *), volatile void *para)
{
LARGE_INTEGER frequency, start, end;
QueryPerformanceFrequency(&frequency);
QueryPerformanceCounter(&start);
func(para);
for (int32_t i = 0; i < TEST_PROFILING_LOOPS; ++i)
{
func(para);
}
QueryPerformanceCounter(&end);
return (double)(end.QuadPart - start.QuadPart) * 1e9 / frequency.QuadPart;
}
@ -46,8 +93,10 @@ double test_measure_func_time_ns(void (*func)(void *), void *para)
#define TEST_INIT(test_count) \
do \
{ \
TEST_INIT_HEADER(); \
setvbuf(stdout, NULL, _IONBF, 0); \
SetUnhandledExceptionFilter(test_exception_handler); \
_test_start = test_start_time(); \
_test_assert_error_count = 0; \
_test_count = 0; \
_test_assert_count = 0; \
@ -69,12 +118,32 @@ void test_exception_handler(int signum)
exit(1);
}
#include <time.h>
double test_measure_func_time_ns(void (*func)(void *), void *para)
int64_t test_start_time()
{
struct timespec start, end;
clock_gettime(CLOCK_MONOTONIC, &start);
func(para);
return start.tv_sec * 1e9 + start.tv_nsec;
}
double test_duration_time(int64_t start)
{
LARGE_INTEGER frequency, end;
QueryPerformanceFrequency(&frequency);
QueryPerformanceCounter(&end);
return (double)(end.tv_sec * 1e9 + end.tv_nsec - start);
}
#include <time.h>
double test_measure_func_time_ns(void (*func)(volatile void *), volatile void *para)
{
struct timespec start, end;
clock_gettime(CLOCK_MONOTONIC, &start);
for (int32_t i = 0; i < TEST_PROFILING_LOOPS; ++i)
{
func(para);
}
clock_gettime(CLOCK_MONOTONIC, &end);
return (double)(end.tv_sec * 1e9 + end.tv_nsec) - (double)(start.tv_sec * 1e9 + start.tv_nsec);
}
@ -82,9 +151,11 @@ double test_measure_func_time_ns(void (*func)(void *), void *para)
#define TEST_INIT(test_count) \
do \
{ \
TEST_INIT_HEADER(); \
setvbuf(stdout, NULL, _IONBF, 0); \
signal(SIGSEGV, test_exception_handler); \
signal(SIGABRT, test_exception_handler); \
_test_start = test_start_time(); \
_test_assert_error_count = 0; \
_test_count = 0; \
_test_assert_count = 0; \
@ -99,35 +170,36 @@ double test_measure_func_time_ns(void (*func)(void *), void *para)
} while (0)
#endif
#define TEST_FINALIZE() \
do \
{ \
if (_test_assert_error_count) \
{ \
printf( \
"[NG] %5d (%5d/%5d) %s\n", \
_test_count, _test_assert_count - _test_assert_error_count, _test_assert_count, __FILE__); \
for (int i = 0; i < _test_assert_error_count; ++i) \
{ \
printf(" %s\n", _test_log[i]); \
fflush(stdout); \
} \
} \
else \
{ \
printf( \
"[OK] %5d (%5d/%5d) %s\n", \
_test_count, _test_assert_count - _test_assert_error_count, _test_assert_count, __FILE__); \
} \
fflush(stdout); \
free(_test_log); \
_test_log = NULL; \
_test_assert_error_count = 0; \
_test_count = 0; \
_test_assert_count = 0; \
#define TEST_FINALIZE() \
do \
{ \
if (_test_assert_error_count) \
{ \
printf( \
"[NG] %5d (%5d/%5d) %8.0f %s\n", \
_test_count, _test_assert_count - _test_assert_error_count, _test_assert_count, test_duration_time(_test_start) / 1000000, __FILE__); \
for (int i = 0; i < _test_assert_error_count; ++i) \
{ \
printf(" %s\n", _test_log[i]); \
fflush(stdout); \
} \
} \
else \
{ \
printf( \
"[OK] %5d (%5d/%5d) %8.0f %s\n", \
_test_count, _test_assert_count - _test_assert_error_count, _test_assert_count, test_duration_time(_test_start) / 1000000, __FILE__); \
} \
fflush(stdout); \
free(_test_log); \
_test_log = NULL; \
_test_assert_error_count = 0; \
_test_count = 0; \
_test_assert_count = 0; \
TEST_FINALIZE_FOOTER(); \
} while (0)
#define RUN_TEST(func) \
#define TEST_RUN(func) \
++_test_count; \
++_test_global_count; \
func()
@ -274,13 +346,19 @@ double test_measure_func_time_ns(void (*func)(void *), void *para)
\
/* Measure func1 */ \
start = intrin_timestamp_counter(); \
func1((void *)&a); \
for (int32_t i = 0; i < TEST_PROFILING_LOOPS; ++i) \
{ \
func1((volatile void *)&a); \
} \
end = intrin_timestamp_counter(); \
cycles_func1 = end - start; \
\
/* Measure func2 */ \
start = intrin_timestamp_counter(); \
func2((void *)&b); \
for (int32_t i = 0; i < TEST_PROFILING_LOOPS; ++i) \
{ \
func2((volatile void *)&b); \
} \
end = intrin_timestamp_counter(); \
cycles_func2 = end - start; \
\
@ -296,7 +374,7 @@ double test_measure_func_time_ns(void (*func)(void *), void *para)
"%4i: %.2f%% (%s: %llu cycles, %s: %llu cycles)", \
__LINE__, percent_diff + 100.0f, #func1, (uint64_t)cycles_func1, #func2, (uint64_t)cycles_func2); \
} \
ASSERT_EQUALS(a, b); \
ASSERT_TRUE((a && b) || a == b); \
} while (0)
#define ASSERT_FUNCTION_TEST_CYCLE(func, cycles) \
@ -310,7 +388,10 @@ double test_measure_func_time_ns(void (*func)(void *), void *para)
\
/* Measure func */ \
start = intrin_timestamp_counter(); \
func((void *)&para); \
for (int32_t i = 0; i < TEST_PROFILING_LOOPS; ++i) \
{ \
func((volatile void *)&para); \
} \
end = intrin_timestamp_counter(); \
cycles_func = end - start; \
\
@ -333,10 +414,10 @@ double test_measure_func_time_ns(void (*func)(void *), void *para)
int64_t a = 0, b = 0; \
\
/* Measure func1 */ \
time_func1 = test_measure_func_time_ns(func1, (void *)&a); \
time_func1 = test_measure_func_time_ns(func1, (volatile void *)&a); \
\
/* Measure func2 */ \
time_func2 = test_measure_func_time_ns(func2, (void *)&b); \
time_func2 = test_measure_func_time_ns(func2, (volatile void *)&b); \
\
/* Calculate percentage difference */ \
double percent_diff = 100.0 * (time_func1 - time_func2) / time_func2; \
@ -347,31 +428,10 @@ double test_measure_func_time_ns(void (*func)(void *), void *para)
++_test_global_assert_error_count; \
snprintf( \
_test_log[_test_assert_error_count++], 1024, \
"%4i: %.2f%% (%s: %.2f ns, %s: %.2f ns)", \
"%4i: %.2f%% (%s: %.2f us, %s: %.2f us)", \
__LINE__, percent_diff + 100.0f, #func1, time_func1, #func2, time_func2); \
} \
ASSERT_EQUALS(a, b); \
} while (0)
#define ASSERT_FUNCTION_TEST_TIME(func, duration) \
do \
{ \
++_test_assert_count; \
++_test_global_assert_count; \
double time_func; \
int64_t para = 0; \
\
/* Measure func */ \
time_func = test_measure_func_time_ns(func, (void *)&para); \
\
if (time_func >= duration) \
{ \
++_test_global_assert_error_count; \
snprintf( \
_test_log[_test_assert_error_count++], 1024, \
"%4i: %.2f%% (%s: %.2f ns, %s: %.2f ns)", \
__LINE__, percent_diff + 100.0f, #func, time_func); \
} \
ASSERT_TRUE((a && b) || a == b); \
} while (0)
#endif

View File

@ -47,9 +47,9 @@ static void test_evaluator_evaluate_function() {
int main() {
TEST_INIT(10);
RUN_TEST(test_evaluator_evaluate);
RUN_TEST(test_evaluator_evaluate_variables);
RUN_TEST(test_evaluator_evaluate_function);
TEST_RUN(test_evaluator_evaluate);
TEST_RUN(test_evaluator_evaluate_variables);
TEST_RUN(test_evaluator_evaluate_function);
TEST_FINALIZE();

View File

@ -105,6 +105,7 @@ static void test_chunk_reserve_last_element() {
static void test_chunk_reserve_full() {
ChunkMemory mem = {};
chunk_alloc(&mem, 10, 10);
mem.free[0] = 0xFFFFFFFFFFFFFFFF;
ASSERT_EQUALS(chunk_reserve(&mem, 1), -1);
}
@ -129,17 +130,17 @@ static void test_chunk_reserve_last_element() {
int main() {
TEST_INIT(25);
RUN_TEST(test_chunk_alloc);
RUN_TEST(test_chunk_id_from_memory);
RUN_TEST(test_chunk_get_element);
RUN_TEST(test_chunk_reserve);
RUN_TEST(test_chunk_free_elements);
RUN_TEST(test_chunk_reserve_wrapping);
RUN_TEST(test_chunk_reserve_last_element);
TEST_RUN(test_chunk_alloc);
TEST_RUN(test_chunk_id_from_memory);
TEST_RUN(test_chunk_get_element);
TEST_RUN(test_chunk_reserve);
TEST_RUN(test_chunk_free_elements);
TEST_RUN(test_chunk_reserve_wrapping);
TEST_RUN(test_chunk_reserve_last_element);
#if !DEBUG
RUN_TEST(test_chunk_reserve_full);
RUN_TEST(test_chunk_reserve_invalid_size);
TEST_RUN(test_chunk_reserve_full);
TEST_RUN(test_chunk_reserve_invalid_size);
#endif
TEST_FINALIZE();

View File

@ -94,13 +94,13 @@ static void test_ring_commit_safe() {
int main() {
TEST_INIT(25);
RUN_TEST(test_ring_alloc);
RUN_TEST(test_ring_get_memory);
RUN_TEST(test_ring_calculate_position);
RUN_TEST(test_ring_reset);
RUN_TEST(test_ring_get_memory_nomove);
RUN_TEST(test_ring_move_pointer);
RUN_TEST(test_ring_commit_safe);
TEST_RUN(test_ring_alloc);
TEST_RUN(test_ring_get_memory);
TEST_RUN(test_ring_calculate_position);
TEST_RUN(test_ring_reset);
TEST_RUN(test_ring_get_memory_nomove);
TEST_RUN(test_ring_move_pointer);
TEST_RUN(test_ring_commit_safe);
TEST_FINALIZE();

View File

@ -78,9 +78,9 @@ static void test_hashmap_dump_load() {
int main() {
TEST_INIT(25);
RUN_TEST(test_hashmap_alloc);
RUN_TEST(test_hashmap_insert_int32);
RUN_TEST(test_hashmap_dump_load);
TEST_RUN(test_hashmap_alloc);
TEST_RUN(test_hashmap_insert_int32);
TEST_RUN(test_hashmap_dump_load);
TEST_FINALIZE();

View File

@ -87,9 +87,9 @@ static void test_layout_from_theme() {
int main() {
TEST_INIT(100);
RUN_TEST(test_layout_from_file_txt);
RUN_TEST(test_layout_to_from_data);
RUN_TEST(test_layout_from_theme);
TEST_RUN(test_layout_from_file_txt);
TEST_RUN(test_layout_to_from_data);
TEST_RUN(test_layout_from_theme);
TEST_FINALIZE();

View File

@ -65,8 +65,8 @@ static void test_theme_to_from_data() {
int main() {
TEST_INIT(100);
RUN_TEST(test_theme_from_file_txt);
RUN_TEST(test_theme_to_from_data);
TEST_RUN(test_theme_from_file_txt);
TEST_RUN(test_theme_to_from_data);
TEST_FINALIZE();

View File

@ -165,32 +165,32 @@ static void test_bytes_merge_8_r2l() {
int main() {
TEST_INIT(75);
RUN_TEST(test_is_bit_set_l2r);
RUN_TEST(test_bit_set_l2r);
RUN_TEST(test_bit_unset_l2r);
RUN_TEST(test_bit_flip_l2r);
RUN_TEST(test_bit_set_to_l2r);
RUN_TEST(test_bits_get_8_l2r);
RUN_TEST(test_bits_get_16_l2r);
RUN_TEST(test_bits_get_32_l2r);
RUN_TEST(test_bits_get_64_l2r);
RUN_TEST(test_bytes_merge_2_l2r);
RUN_TEST(test_bytes_merge_4_l2r);
RUN_TEST(test_bytes_merge_8_l2r);
TEST_RUN(test_is_bit_set_l2r);
TEST_RUN(test_bit_set_l2r);
TEST_RUN(test_bit_unset_l2r);
TEST_RUN(test_bit_flip_l2r);
TEST_RUN(test_bit_set_to_l2r);
TEST_RUN(test_bits_get_8_l2r);
TEST_RUN(test_bits_get_16_l2r);
TEST_RUN(test_bits_get_32_l2r);
TEST_RUN(test_bits_get_64_l2r);
TEST_RUN(test_bytes_merge_2_l2r);
TEST_RUN(test_bytes_merge_4_l2r);
TEST_RUN(test_bytes_merge_8_l2r);
RUN_TEST(test_is_bit_set_r2l);
RUN_TEST(test_is_bit_set_64_r2l);
RUN_TEST(test_bit_set_r2l);
RUN_TEST(test_bit_unset_r2l);
RUN_TEST(test_bit_flip_r2l);
RUN_TEST(test_bit_set_to_r2l);
RUN_TEST(test_bits_get_8_r2l);
RUN_TEST(test_bits_get_16_r2l);
RUN_TEST(test_bits_get_32_r2l);
RUN_TEST(test_bits_get_64_r2l);
RUN_TEST(test_bytes_merge_2_r2l);
RUN_TEST(test_bytes_merge_4_r2l);
RUN_TEST(test_bytes_merge_8_r2l);
TEST_RUN(test_is_bit_set_r2l);
TEST_RUN(test_is_bit_set_64_r2l);
TEST_RUN(test_bit_set_r2l);
TEST_RUN(test_bit_unset_r2l);
TEST_RUN(test_bit_flip_r2l);
TEST_RUN(test_bit_set_to_r2l);
TEST_RUN(test_bits_get_8_r2l);
TEST_RUN(test_bits_get_16_r2l);
TEST_RUN(test_bits_get_32_r2l);
TEST_RUN(test_bits_get_64_r2l);
TEST_RUN(test_bytes_merge_2_r2l);
TEST_RUN(test_bytes_merge_4_r2l);
TEST_RUN(test_bytes_merge_8_r2l);
TEST_FINALIZE();

View File

@ -118,19 +118,19 @@ static void test_endian_swap_double() {
int main() {
TEST_INIT(50);
RUN_TEST(test_swap_endian_16);
RUN_TEST(test_swap_endian_32);
RUN_TEST(test_swap_endian_64);
TEST_RUN(test_swap_endian_16);
TEST_RUN(test_swap_endian_32);
TEST_RUN(test_swap_endian_64);
RUN_TEST(test_is_little_endian);
RUN_TEST(test_endian_swap_uint16);
RUN_TEST(test_endian_swap_int16);
RUN_TEST(test_endian_swap_uint32);
RUN_TEST(test_endian_swap_int32);
RUN_TEST(test_endian_swap_uint64);
RUN_TEST(test_endian_swap_int64);
RUN_TEST(test_endian_swap_float);
RUN_TEST(test_endian_swap_double);
TEST_RUN(test_is_little_endian);
TEST_RUN(test_endian_swap_uint16);
TEST_RUN(test_endian_swap_int16);
TEST_RUN(test_endian_swap_uint32);
TEST_RUN(test_endian_swap_int32);
TEST_RUN(test_endian_swap_uint64);
TEST_RUN(test_endian_swap_int64);
TEST_RUN(test_endian_swap_float);
TEST_RUN(test_endian_swap_double);
TEST_FINALIZE();

View File

@ -0,0 +1,624 @@
#include "../TestFramework.h"
#include "../../utils/MathUtils.h"
#include <math.h>
// Correctness tests for f32 (float) approximate functions
static void test_sin_approx_f32() {
ASSERT_EQUALS_WITH_DELTA(sin_approx(0.0f), sinf(0.0f), 0.001f);
ASSERT_EQUALS_WITH_DELTA(sin_approx(1.0f), sinf(1.0f), 0.001f);
ASSERT_EQUALS_WITH_DELTA(sin_approx(3.14f), sinf(3.14f), 0.001f);
}
static void test_cos_approx_f32() {
ASSERT_EQUALS_WITH_DELTA(cos_approx(0.0f), cosf(0.0f), 0.001f);
ASSERT_EQUALS_WITH_DELTA(cos_approx(1.0f), cosf(1.0f), 0.001f);
ASSERT_EQUALS_WITH_DELTA(cos_approx(3.14f), cosf(3.14f), 0.001f);
}
static void test_tan_approx_f32() {
ASSERT_EQUALS_WITH_DELTA(tan_approx(0.0f), tanf(0.0f), 0.001f);
ASSERT_EQUALS_WITH_DELTA(tan_approx(1.0f), tanf(1.0f), 0.001f);
ASSERT_EQUALS_WITH_DELTA(tan_approx(0.5f), tanf(0.5f), 0.001f);
}
static void test_sqrt_approx_f32() {
ASSERT_EQUALS_WITH_DELTA(sqrt_approx(1.0f), sqrtf(1.0f), 0.001f);
ASSERT_EQUALS_WITH_DELTA(sqrt_approx(2.0f), sqrtf(2.0f), 0.001f);
ASSERT_EQUALS_WITH_DELTA(sqrt_approx(100.0f), sqrtf(100.0f), 0.001f);
}
static void test_asin_approx_f32() {
ASSERT_EQUALS_WITH_DELTA(asin_approx(0.0f), asinf(0.0f), 0.001f);
ASSERT_EQUALS_WITH_DELTA(asin_approx(0.5f), asinf(0.5f), 0.001f);
ASSERT_EQUALS_WITH_DELTA(asin_approx(-0.5f), asinf(-0.5f), 0.001f);
}
static void test_acos_approx_f32() {
ASSERT_EQUALS_WITH_DELTA(acos_approx(0.0f), acosf(0.0f), 0.001f);
ASSERT_EQUALS_WITH_DELTA(acos_approx(0.5f), acosf(0.5f), 0.001f);
ASSERT_EQUALS_WITH_DELTA(acos_approx(-0.5f), acosf(-0.5f), 0.001f);
}
static void test_atan_approx_f32() {
ASSERT_EQUALS_WITH_DELTA(atan_approx(0.0f), atanf(0.0f), 0.001f);
ASSERT_EQUALS_WITH_DELTA(atan_approx(1.0f), atanf(1.0f), 0.001f);
ASSERT_EQUALS_WITH_DELTA(atan_approx(-1.0f), atanf(-1.0f), 0.001f);
}
static void test_rsqrt_approx_f32() {
ASSERT_EQUALS_WITH_DELTA(rsqrt_approx(1.0f), 1.0f / sqrtf(1.0f), 0.001f);
ASSERT_EQUALS_WITH_DELTA(rsqrt_approx(2.0f), 1.0f / sqrtf(2.0f), 0.001f);
ASSERT_EQUALS_WITH_DELTA(rsqrt_approx(100.0f), 1.0f / sqrtf(100.0f), 0.001f);
}
static void test_exp_approx_f32() {
ASSERT_EQUALS_WITH_DELTA(exp_approx(0.0f), expf(0.0f), 0.001f);
ASSERT_EQUALS_WITH_DELTA(exp_approx(1.0f), expf(1.0f), 0.001f);
ASSERT_EQUALS_WITH_DELTA(exp_approx(-1.0f), expf(-1.0f), 0.001f);
}
static void test_log_approx_f32() {
ASSERT_EQUALS_WITH_DELTA(log_approx(1.0f), logf(1.0f), 0.001f);
ASSERT_EQUALS_WITH_DELTA(log_approx(2.0f), logf(2.0f), 0.001f);
ASSERT_EQUALS_WITH_DELTA(log_approx(10.0f), logf(10.0f), 0.001f);
}
static void test_pow_approx_f32() {
ASSERT_EQUALS_WITH_DELTA(pow_approx(2.0f, 3.0f), powf(2.0f, 3.0f), 0.001f);
ASSERT_EQUALS_WITH_DELTA(pow_approx(3.0f, 2.0f), powf(3.0f, 2.0f), 0.001f);
ASSERT_EQUALS_WITH_DELTA(pow_approx(10.0f, 0.5f), powf(10.0f, 0.5f), 0.001f);
}
// Correctness tests for f64 (double) approximate functions
static void test_sin_approx_f64() {
ASSERT_EQUALS_WITH_DELTA(sin_approx(0.0), sin(0.0), 0.001);
ASSERT_EQUALS_WITH_DELTA(sin_approx(1.0), sin(1.0), 0.001);
ASSERT_EQUALS_WITH_DELTA(sin_approx(3.14), sin(3.14), 0.001);
}
static void test_cos_approx_f64() {
ASSERT_EQUALS_WITH_DELTA(cos_approx(0.0), cos(0.0), 0.001);
ASSERT_EQUALS_WITH_DELTA(cos_approx(1.0), cos(1.0), 0.001);
ASSERT_EQUALS_WITH_DELTA(cos_approx(3.14), cos(3.14), 0.001);
}
static void test_tan_approx_f64() {
ASSERT_EQUALS_WITH_DELTA(tan_approx(0.0), tan(0.0), 0.001);
ASSERT_EQUALS_WITH_DELTA(tan_approx(1.0), tan(1.0), 0.001);
ASSERT_EQUALS_WITH_DELTA(tan_approx(0.5), tan(0.5), 0.001);
}
static void test_sqrt_approx_f64() {
ASSERT_EQUALS_WITH_DELTA(sqrt_approx(1.0), sqrt(1.0), 0.001);
ASSERT_EQUALS_WITH_DELTA(sqrt_approx(2.0), sqrt(2.0), 0.001);
ASSERT_EQUALS_WITH_DELTA(sqrt_approx(100.0), sqrt(100.0), 0.001);
}
static void test_asin_approx_f64() {
ASSERT_EQUALS_WITH_DELTA(asin_approx(0.0), asin(0.0), 0.001);
ASSERT_EQUALS_WITH_DELTA(asin_approx(0.5), asin(0.5), 0.001);
ASSERT_EQUALS_WITH_DELTA(asin_approx(-0.5), asin(-0.5), 0.001);
}
static void test_acos_approx_f64() {
ASSERT_EQUALS_WITH_DELTA(acos_approx(0.0), acos(0.0), 0.001);
ASSERT_EQUALS_WITH_DELTA(acos_approx(0.5), acos(0.5), 0.001);
ASSERT_EQUALS_WITH_DELTA(acos_approx(-0.5), acos(-0.5), 0.001);
}
static void test_atan_approx_f64() {
ASSERT_EQUALS_WITH_DELTA(atan_approx(0.0), atan(0.0), 0.001);
ASSERT_EQUALS_WITH_DELTA(atan_approx(1.0), atan(1.0), 0.001);
ASSERT_EQUALS_WITH_DELTA(atan_approx(-1.0), atan(-1.0), 0.001);
}
static void test_rsqrt_approx_f64() {
ASSERT_EQUALS_WITH_DELTA(rsqrt_approx(1.0), 1.0 / sqrt(1.0), 0.001);
ASSERT_EQUALS_WITH_DELTA(rsqrt_approx(2.0), 1.0 / sqrt(2.0), 0.001);
ASSERT_EQUALS_WITH_DELTA(rsqrt_approx(100.0), 1.0 / sqrt(100.0), 0.001);
}
static void test_exp_approx_f64() {
ASSERT_EQUALS_WITH_DELTA(exp_approx(0.0), exp(0.0), 0.001);
ASSERT_EQUALS_WITH_DELTA(exp_approx(1.0), exp(1.0), 0.001);
ASSERT_EQUALS_WITH_DELTA(exp_approx(-1.0), exp(-1.0), 0.001);
}
static void test_log_approx_f64() {
ASSERT_EQUALS_WITH_DELTA(log_approx(1.0), log(1.0), 0.001);
ASSERT_EQUALS_WITH_DELTA(log_approx(2.0), log(2.0), 0.001);
ASSERT_EQUALS_WITH_DELTA(log_approx(10.0), log(10.0), 0.001);
}
static void test_pow_approx_f64() {
ASSERT_EQUALS_WITH_DELTA(pow_approx(2.0, 3.0), pow(2.0, 3.0), 0.001);
ASSERT_EQUALS_WITH_DELTA(pow_approx(3.0, 2.0), pow(3.0, 2.0), 0.001);
ASSERT_EQUALS_WITH_DELTA(pow_approx(10.0, 0.5), pow(10.0, 0.5), 0.001);
}
// Performance tests for f32 (float) approximate functions
static void _sin_approx_f32(volatile void* val) {
f32* res = (f32*)val;
srand((int32) *res);
*res += sin_approx((f32)rand() / RAND_MAX);
}
static void _sin_f32(volatile void* val) {
f32* res = (f32*)val;
srand((int32) *res);
*res += sinf((f32)rand() / RAND_MAX);
}
static void test_sin_approx_performance_f32() {
COMPARE_FUNCTION_TEST_TIME(_sin_approx_f32, _sin_f32, 5.0);
COMPARE_FUNCTION_TEST_CYCLE(_sin_approx_f32, _sin_f32, 5.0);
}
static void _cos_approx_f32(volatile void* val) {
f32* res = (f32*)val;
srand((int32) *res);
*res += cos_approx((f32)rand() / RAND_MAX);
}
static void _cos_f32(volatile void* val) {
f32* res = (f32*)val;
srand((int32) *res);
*res += cosf((f32)rand() / RAND_MAX);
}
static void test_cos_approx_performance_f32() {
COMPARE_FUNCTION_TEST_TIME(_cos_approx_f32, _cos_f32, 5.0);
COMPARE_FUNCTION_TEST_CYCLE(_cos_approx_f32, _cos_f32, 5.0);
}
static void _tan_approx_f32(volatile void* val) {
f32* res = (f32*)val;
srand((int32) *res);
*res += tan_approx((f32)rand() / RAND_MAX);
}
static void _tan_f32(volatile void* val) {
f32* res = (f32*)val;
srand((int32) *res);
*res += tanf((f32)rand() / RAND_MAX);
}
static void test_tan_approx_performance_f32() {
COMPARE_FUNCTION_TEST_TIME(_tan_approx_f32, _tan_f32, 5.0);
COMPARE_FUNCTION_TEST_CYCLE(_tan_approx_f32, _tan_f32, 5.0);
}
static void _sqrt_approx_f32(volatile void* val) {
f32* res = (f32*)val;
srand((int32) *res);
*res += sqrt_approx((f32)rand() / RAND_MAX + 0.1f); // Avoid sqrt(0)
}
static void _sqrt_f32(volatile void* val) {
f32* res = (f32*)val;
srand((int32) *res);
*res += sqrtf((f32)rand() / RAND_MAX + 0.1f); // Avoid sqrt(0)
}
static void test_sqrt_approx_performance_f32() {
COMPARE_FUNCTION_TEST_TIME(_sqrt_approx_f32, _sqrt_f32, 5.0);
COMPARE_FUNCTION_TEST_CYCLE(_sqrt_approx_f32, _sqrt_f32, 5.0);
}
static void _asin_approx_f32(volatile void* val) {
f32* res = (f32*)val;
srand((int32) *res);
*res += asin_approx((f32)rand() / RAND_MAX);
}
static void _asin_f32(volatile void* val) {
f32* res = (f32*)val;
srand((int32) *res);
*res += asinf((f32)rand() / RAND_MAX);
}
static void test_asin_approx_performance_f32() {
COMPARE_FUNCTION_TEST_TIME(_asin_approx_f32, _asin_f32, 5.0);
COMPARE_FUNCTION_TEST_CYCLE(_asin_approx_f32, _asin_f32, 5.0);
}
static void _acos_approx_f32(volatile void* val) {
f32* res = (f32*)val;
srand((int32) *res);
*res += acos_approx((f32)rand() / RAND_MAX);
}
static void _acos_f32(volatile void* val) {
f32* res = (f32*)val;
srand((int32) *res);
*res += acosf((f32)rand() / RAND_MAX);
}
static void test_acos_approx_performance_f32() {
COMPARE_FUNCTION_TEST_TIME(_acos_approx_f32, _acos_f32, 5.0);
COMPARE_FUNCTION_TEST_CYCLE(_acos_approx_f32, _acos_f32, 5.0);
}
static void _atan_approx_f32(volatile void* val) {
f32* res = (f32*)val;
srand((int32) *res);
*res += atan_approx((f32)rand() / RAND_MAX);
}
static void _atan_f32(volatile void* val) {
f32* res = (f32*)val;
srand((int32) *res);
*res += atanf((f32)rand() / RAND_MAX);
}
static void test_atan_approx_performance_f32() {
COMPARE_FUNCTION_TEST_TIME(_atan_approx_f32, _atan_f32, 5.0);
COMPARE_FUNCTION_TEST_CYCLE(_atan_approx_f32, _atan_f32, 5.0);
}
static void _rsqrt_approx_f32(volatile void* val) {
f32* res = (f32*)val;
srand((int32) *res);
*res += rsqrt_approx((f32)rand() / RAND_MAX + 0.1f); // Avoid division by zero
}
static void _rsqrt_f32(volatile void* val) {
f32* res = (f32*)val;
srand((int32) *res);
*res += 1.0f / sqrtf((f32)rand() / RAND_MAX + 0.1f); // Avoid division by zero
}
static void test_rsqrt_approx_performance_f32() {
COMPARE_FUNCTION_TEST_TIME(_rsqrt_approx_f32, _rsqrt_f32, 5.0);
COMPARE_FUNCTION_TEST_CYCLE(_rsqrt_approx_f32, _rsqrt_f32, 5.0);
}
static void _exp_approx_f32(volatile void* val) {
f32* res = (f32*)val;
srand((int32) *res);
*res += exp_approx((f32)rand() / RAND_MAX);
}
static void _exp_f32(volatile void* val) {
f32* res = (f32*)val;
srand((int32) *res);
*res += expf((f32)rand() / RAND_MAX);
}
static void test_exp_approx_performance_f32() {
COMPARE_FUNCTION_TEST_TIME(_exp_approx_f32, _exp_f32, 5.0);
COMPARE_FUNCTION_TEST_CYCLE(_exp_approx_f32, _exp_f32, 5.0);
}
static void _log_approx_f32(volatile void* val) {
f32* res = (f32*)val;
srand((int32) *res);
*res += log_approx((f32)rand() / RAND_MAX + 0.1f); // Avoid log(0)
}
static void _log_f32(volatile void* val) {
f32* res = (f32*)val;
srand((int32) *res);
*res += logf((f32)rand() / RAND_MAX + 0.1f); // Avoid log(0)
}
static void test_log_approx_performance_f32() {
COMPARE_FUNCTION_TEST_TIME(_log_approx_f32, _log_f32, 5.0);
COMPARE_FUNCTION_TEST_CYCLE(_log_approx_f32, _log_f32, 5.0);
}
static void _pow_approx_f32(volatile void* val) {
f32* res = (f32*)val;
srand((int32) *res);
*res += pow_approx((f32)rand() / RAND_MAX, (f32)rand() / RAND_MAX);
}
static void _pow_f32(volatile void* val) {
f32* res = (f32*)val;
srand((int32) *res);
*res += powf((f32)rand() / RAND_MAX, (f32)rand() / RAND_MAX);
}
static void test_pow_approx_performance_f32() {
COMPARE_FUNCTION_TEST_TIME(_pow_approx_f32, _pow_f32, 5.0);
COMPARE_FUNCTION_TEST_CYCLE(_pow_approx_f32, _pow_f32, 5.0);
}
// Performance tests for f64 (double) approximate functions
static void _sin_approx_f64(volatile void* val) {
f64* res = (f64*)val;
srand((int32) *res);
*res += sin_approx((f64)rand() / RAND_MAX);
}
static void _sin_f64(volatile void* val) {
f64* res = (f64*)val;
srand((int32) *res);
*res += sin((f64)rand() / RAND_MAX);
}
static void test_sin_approx_performance_f64() {
COMPARE_FUNCTION_TEST_TIME(_sin_approx_f64, _sin_f64, 5.0);
COMPARE_FUNCTION_TEST_CYCLE(_sin_approx_f64, _sin_f64, 5.0);
}
static void _cos_approx_f64(volatile void* val) {
f64* res = (f64*)val;
srand((int32) *res);
*res += cos_approx((f64)rand() / RAND_MAX);
}
static void _cos_f64(volatile void* val) {
f64* res = (f64*)val;
srand((int32) *res);
*res += cos((f64)rand() / RAND_MAX);
}
static void test_cos_approx_performance_f64() {
COMPARE_FUNCTION_TEST_TIME(_cos_approx_f64, _cos_f64, 5.0);
COMPARE_FUNCTION_TEST_CYCLE(_cos_approx_f64, _cos_f64, 5.0);
}
static void _tan_approx_f64(volatile void* val) {
f64* res = (f64*)val;
srand((int32) *res);
*res += tan_approx((f64)rand() / RAND_MAX);
}
static void _tan_f64(volatile void* val) {
f64* res = (f64*)val;
srand((int32) *res);
*res += tan((f64)rand() / RAND_MAX);
}
static void test_tan_approx_performance_f64() {
COMPARE_FUNCTION_TEST_TIME(_tan_approx_f64, _tan_f64, 5.0);
COMPARE_FUNCTION_TEST_CYCLE(_tan_approx_f64, _tan_f64, 5.0);
}
static void _sqrt_approx_f64(volatile void* val) {
f64* res = (f64*)val;
srand((int32) *res);
*res += sqrt_approx((f64)rand() / RAND_MAX + 0.1); // Avoid sqrt(0)
}
static void _sqrt_f64(volatile void* val) {
f64* res = (f64*)val;
srand((int32) *res);
*res += sqrt((f64)rand() / RAND_MAX + 0.1); // Avoid sqrt(0)
}
static void test_sqrt_approx_performance_f64() {
COMPARE_FUNCTION_TEST_TIME(_sqrt_approx_f64, _sqrt_f64, 5.0);
COMPARE_FUNCTION_TEST_CYCLE(_sqrt_approx_f64, _sqrt_f64, 5.0);
}
static void _asin_approx_f64(volatile void* val) {
f64* res = (f64*)val;
srand((int32) *res);
*res += asin_approx((f64)rand() / RAND_MAX);
}
static void _asin_f64(volatile void* val) {
f64* res = (f64*)val;
srand((int32) *res);
*res += asin((f64)rand() / RAND_MAX);
}
static void test_asin_approx_performance_f64() {
COMPARE_FUNCTION_TEST_TIME(_asin_approx_f64, _asin_f64, 5.0);
COMPARE_FUNCTION_TEST_CYCLE(_asin_approx_f64, _asin_f64, 5.0);
}
static void _acos_approx_f64(volatile void* val) {
f64* res = (f64*)val;
srand((int32) *res);
*res += acos_approx((f64)rand() / RAND_MAX);
}
static void _acos_f64(volatile void* val) {
f64* res = (f64*)val;
srand((int32) *res);
*res += acos((f64)rand() / RAND_MAX);
}
static void test_acos_approx_performance_f64() {
COMPARE_FUNCTION_TEST_TIME(_acos_approx_f64, _acos_f64, 5.0);
COMPARE_FUNCTION_TEST_CYCLE(_acos_approx_f64, _acos_f64, 5.0);
}
static void _atan_approx_f64(volatile void* val) {
f64* res = (f64*)val;
srand((int32) *res);
*res += atan_approx((f64)rand() / RAND_MAX);
}
static void _atan_f64(volatile void* val) {
f64* res = (f64*)val;
srand((int32) *res);
*res += atan((f64)rand() / RAND_MAX);
}
static void test_atan_approx_performance_f64() {
COMPARE_FUNCTION_TEST_TIME(_atan_approx_f64, _atan_f64, 5.0);
COMPARE_FUNCTION_TEST_CYCLE(_atan_approx_f64, _atan_f64, 5.0);
}
static void _rsqrt_approx_f64(volatile void* val) {
f64* res = (f64*)val;
srand((int32) *res);
*res += rsqrt_approx((f64)rand() / RAND_MAX + 0.1); // Avoid division by zero
}
static void _rsqrt_f64(volatile void* val) {
f64* res = (f64*)val;
srand((int32) *res);
*res += 1.0 / sqrt((f64)rand() / RAND_MAX + 0.1); // Avoid division by zero
}
static void test_rsqrt_approx_performance_f64() {
COMPARE_FUNCTION_TEST_TIME(_rsqrt_approx_f64, _rsqrt_f64, 5.0);
COMPARE_FUNCTION_TEST_CYCLE(_rsqrt_approx_f64, _rsqrt_f64, 5.0);
}
static void _exp_approx_f64(volatile void* val) {
f64* res = (f64*)val;
srand((int32) *res);
*res += exp_approx((f64)rand() / RAND_MAX);
}
static void _exp_f64(volatile void* val) {
f64* res = (f64*)val;
srand((int32) *res);
*res += exp((f64)rand() / RAND_MAX);
}
static void test_exp_approx_performance_f64() {
COMPARE_FUNCTION_TEST_TIME(_exp_approx_f64, _exp_f64, 5.0);
COMPARE_FUNCTION_TEST_CYCLE(_exp_approx_f64, _exp_f64, 5.0);
}
static void _log_approx_f64(volatile void* val) {
f64* res = (f64*)val;
srand((int32) *res);
*res += log_approx((f64)rand() / RAND_MAX + 0.1); // Avoid log(0)
}
static void _log_f64(volatile void* val) {
f64* res = (f64*)val;
srand((int32) *res);
*res += log((f64)rand() / RAND_MAX + 0.1); // Avoid log(0)
}
static void test_log_approx_performance_f64() {
COMPARE_FUNCTION_TEST_TIME(_log_approx_f64, _log_f64, 5.0);
COMPARE_FUNCTION_TEST_CYCLE(_log_approx_f64, _log_f64, 5.0);
}
static void _pow_approx_f64(volatile void* val) {
f64* res = (f64*)val;
srand((int32) *res);
*res += pow_approx((f64)rand() / RAND_MAX, (f64)rand() / RAND_MAX);
}
static void _pow_f64(volatile void* val) {
f64* res = (f64*)val;
srand((int32) *res);
*res += pow((f64)rand() / RAND_MAX, (f64)rand() / RAND_MAX);
}
static void test_pow_approx_performance_f64() {
COMPARE_FUNCTION_TEST_TIME(_pow_approx_f64, _pow_f64, 5.0);
COMPARE_FUNCTION_TEST_CYCLE(_pow_approx_f64, _pow_f64, 5.0);
}
#ifdef UBER_TEST
#ifdef main
#undef main
#endif
#define main UtilsMathUtilsTest
#endif
int main() {
TEST_INIT(200);
// Run correctness tests for f32 functions
TEST_RUN(test_sin_approx_f32);
TEST_RUN(test_cos_approx_f32);
TEST_RUN(test_tan_approx_f32);
TEST_RUN(test_sqrt_approx_f32);
TEST_RUN(test_asin_approx_f32);
TEST_RUN(test_acos_approx_f32);
TEST_RUN(test_atan_approx_f32);
TEST_RUN(test_rsqrt_approx_f32);
TEST_RUN(test_exp_approx_f32);
TEST_RUN(test_log_approx_f32);
TEST_RUN(test_pow_approx_f32);
// Run correctness tests for f64 functions
TEST_RUN(test_sin_approx_f64);
TEST_RUN(test_cos_approx_f64);
TEST_RUN(test_tan_approx_f64);
TEST_RUN(test_sqrt_approx_f64);
TEST_RUN(test_asin_approx_f64);
TEST_RUN(test_acos_approx_f64);
TEST_RUN(test_atan_approx_f64);
TEST_RUN(test_rsqrt_approx_f64);
TEST_RUN(test_exp_approx_f64);
TEST_RUN(test_log_approx_f64);
TEST_RUN(test_pow_approx_f64);
// Run performance tests for f32 functions
TEST_RUN(test_sin_approx_performance_f32);
TEST_RUN(test_cos_approx_performance_f32);
TEST_RUN(test_tan_approx_performance_f32);
TEST_RUN(test_sqrt_approx_performance_f32);
TEST_RUN(test_asin_approx_performance_f32);
TEST_RUN(test_acos_approx_performance_f32);
TEST_RUN(test_atan_approx_performance_f32);
TEST_RUN(test_rsqrt_approx_performance_f32);
TEST_RUN(test_exp_approx_performance_f32);
TEST_RUN(test_log_approx_performance_f32);
TEST_RUN(test_pow_approx_performance_f32);
// Run performance tests for f64 functions
TEST_RUN(test_sin_approx_performance_f64);
TEST_RUN(test_cos_approx_performance_f64);
TEST_RUN(test_tan_approx_performance_f64);
TEST_RUN(test_sqrt_approx_performance_f64);
TEST_RUN(test_asin_approx_performance_f64);
TEST_RUN(test_acos_approx_performance_f64);
TEST_RUN(test_atan_approx_performance_f64);
TEST_RUN(test_rsqrt_approx_performance_f64);
TEST_RUN(test_exp_approx_performance_f64);
TEST_RUN(test_log_approx_performance_f64);
TEST_RUN(test_pow_approx_performance_f64);
TEST_FINALIZE();
return 0;
}

View File

@ -83,16 +83,24 @@ static void test_str_length()
ASSERT_EQUALS(str_length("2asdf dw"), 8);
}
static void _str_length(void* val) {
int64* res = (int64 *) val;
static void _str_length(volatile void* val) {
volatile int64* res = (volatile int64 *) val;
*res = (int64) str_length("This %d is a %s with %f values");
char buffer[32];
memcpy(buffer, "This %d is a %s with %f values", sizeof("This %d is a %s with %f values"));
buffer[30] = (byte) *res;
*res += (int64) str_length(buffer);
}
static void _strlen(void* val) {
int64* res = (int64 *) val;
static void _strlen(volatile void* val) {
volatile int64* res = (volatile int64 *) val;
*res = (int64) strlen("This %d is a %s with %f values");
char buffer[32];
memcpy(buffer, "This %d is a %s with %f values", sizeof("This %d is a %s with %f values"));
buffer[30] = (byte) *res;
*res += (int64) strlen(buffer);
}
static void test_str_length_performance() {
@ -100,7 +108,7 @@ static void test_str_length_performance() {
COMPARE_FUNCTION_TEST_CYCLE(_str_length, _strlen, 5.0);
}
static void _str_is_alphanum(void* val) {
static void _str_is_alphanum(volatile void* val) {
bool* res = (bool *) val;
srand(0);
@ -109,10 +117,10 @@ static void _str_is_alphanum(void* val) {
a += str_is_alphanum((byte) rand());
}
*res = (bool) a;
*res |= (bool) a;
}
static void _isalnum(void* val) {
static void _isalnum(volatile void* val) {
bool* res = (bool *) val;
srand(0);
@ -121,7 +129,7 @@ static void _isalnum(void* val) {
a += isalnum((byte) rand());
}
*res = (bool) a;
*res |= (bool) a;
}
static void test_str_is_alphanum_performance() {
@ -136,20 +144,20 @@ static void test_sprintf_fast()
ASSERT_TRUE(strcmp(buffer, "This 1337 is a test with 3.00000 values") == 0);
}
static void _sprintf_fast(void* val) {
bool* res = (bool *) val;
static void _sprintf_fast(volatile void* val) {
volatile bool* res = (volatile bool *) val;
char buffer[256];
sprintf_fast(buffer, "This %d is a %s with %f values", 1337, "test", 3.0);
*res = (bool) (strcmp(buffer, "This 1337 is a test with 3.00000 values") == 0);
*res |= (bool) (strcmp(buffer, "This 1337 is a test with 3.00000 values") == 0);
}
static void _sprintf(void* val) {
bool* res = (bool *) val;
static void _sprintf(volatile void* val) {
volatile bool* res = (volatile bool *) val;
char buffer[256];
sprintf(buffer, "This %d is a %s with %f values", 1337, "test", 3.0);
*res = (bool) (strcmp(buffer, "This 1337 is a test with 3.000000 values") == 0);
*res |= (bool) (strcmp(buffer, "This 1337 is a test with 3.000000 values") == 0);
}
static void test_sprintf_fast_performance() {
@ -171,24 +179,26 @@ static void test_str_to_float()
#define main UtilsStringUtilsTest
#endif
#include <windows.h>
int main() {
TEST_INIT(100);
RUN_TEST(test_utf8_encode);
RUN_TEST(test_utf8_decode);
RUN_TEST(test_utf8_str_length);
RUN_TEST(test_str_is_float);
RUN_TEST(test_str_is_integer);
RUN_TEST(test_sprintf_fast);
RUN_TEST(test_str_is_alpha);
RUN_TEST(test_str_is_num);
RUN_TEST(test_str_is_alphanum);
RUN_TEST(test_str_length);
RUN_TEST(test_str_to_float);
TEST_RUN(test_utf8_encode);
TEST_RUN(test_utf8_decode);
TEST_RUN(test_utf8_str_length);
TEST_RUN(test_str_is_float);
TEST_RUN(test_str_is_integer);
TEST_RUN(test_sprintf_fast);
TEST_RUN(test_str_is_alpha);
TEST_RUN(test_str_is_num);
TEST_RUN(test_str_is_alphanum);
TEST_RUN(test_str_length);
TEST_RUN(test_str_to_float);
RUN_TEST(test_str_length_performance);
RUN_TEST(test_str_is_alphanum_performance);
RUN_TEST(test_sprintf_fast_performance);
TEST_RUN(test_str_length_performance);
TEST_RUN(test_str_is_alphanum_performance);
TEST_RUN(test_sprintf_fast_performance);
TEST_FINALIZE();

View File

@ -54,26 +54,26 @@ static void test_is_empty() {
ASSERT_TRUE(is_empty(region1, 0));
}
static void _is_equal(void* val) {
bool* res = (bool *) val;
static void _is_equal(volatile void* val) {
volatile bool* res = (volatile bool *) val;
uint8_t region1[64];
uint8_t region2[64];
memset(region1, 0xAA, sizeof(region1));
memset(region2, 0xAA, sizeof(region2));
*res = is_equal(region1, region2, sizeof(region1));
*res |= is_equal(region1, region2, sizeof(region1));
}
static void _memcmp(void* val) {
bool* res = (bool *) val;
static void _memcmp(volatile void* val) {
volatile bool* res = (volatile bool *) val;
uint8_t region1[64];
uint8_t region2[64];
memset(region1, 0xAA, sizeof(region1));
memset(region2, 0xAA, sizeof(region2));
*res = (bool) (memcmp(region1, region2, sizeof(region1)) == 0);
*res |= (bool) (memcmp(region1, region2, sizeof(region1)) == 0);
}
static void test_is_equal_performance() {
@ -81,40 +81,40 @@ static void test_is_equal_performance() {
COMPARE_FUNCTION_TEST_CYCLE(_is_equal, _memcmp, 10.0);
}
static void _is_empty(void* val) {
bool* res = (bool *) val;
static void _is_empty(volatile void* val) {
volatile bool* res = (volatile bool *) val;
alignas(64) uint8_t region1[64];
memset(region1, 0xAA, sizeof(region1));
*res = is_empty(region1, sizeof(region1));
*res |= is_empty(region1, sizeof(region1));
}
static void _memcmp_empty(void* val) {
bool* res = (bool *) val;
static void _memcmp_empty(volatile void* val) {
volatile bool* res = (volatile bool *) val;
alignas(64) uint8_t region1[64];
memset(region1, 0xAA, sizeof(region1));
*res = *region1 == 0 && memcmp(region1, region1 + 1, sizeof(region1) - 1) == 0;
*res |= *region1 == 0 && memcmp(region1, region1 + 1, sizeof(region1) - 1) == 0;
}
static void _is_empty2(void* val) {
bool* res = (bool *) val;
static void _is_empty2(volatile void* val) {
volatile bool* res = (volatile bool *) val;
alignas(64) uint8_t region1[64];
memset(region1, 0, sizeof(region1));
*res = is_empty(region1, sizeof(region1));
*res |= is_empty(region1, sizeof(region1));
}
static void _memcmp_empty2(void* val) {
bool* res = (bool *) val;
static void _memcmp_empty2(volatile void* val) {
volatile bool* res = (volatile bool *) val;
alignas(64) uint8_t region1[64];
memset(region1, 0, sizeof(region1));
*res = *region1 == 0 && memcmp(region1, region1 + 1, sizeof(region1) - 1) == 0;
*res |= *region1 == 0 && memcmp(region1, region1 + 1, sizeof(region1) - 1) == 0;
}
static void test_is_empty_performance() {
@ -135,11 +135,11 @@ static void test_is_empty_performance() {
int main() {
TEST_INIT(10);
RUN_TEST(test_is_equal);
RUN_TEST(test_is_empty);
TEST_RUN(test_is_equal);
TEST_RUN(test_is_empty);
RUN_TEST(test_is_equal_performance);
RUN_TEST(test_is_empty_performance);
TEST_RUN(test_is_equal_performance);
TEST_RUN(test_is_empty_performance);
TEST_FINALIZE();

View File

@ -65,7 +65,7 @@ static THREAD_RETURN thread_pool_worker(void* arg)
LOG_2("ThreadPool worker ended");
// At the end of a thread the ring memory automatically is considered freed
DEBUG_MEMORY_FREE((uintptr_t) work->ring.memory);
LOG_FORMAT_2("Freed thread RingMemory: %n B", {{LOG_DATA_UINT64, &work->ring.size}});
LOG_2("Freed thread RingMemory: %n B", {{LOG_DATA_UINT64, &work->ring.size}});
atomic_set_release(&work->state, 1);
// Job gets marked after completion -> can be overwritten now

View File

@ -83,12 +83,14 @@ struct UILayout {
// 2. Once we are ready to switch the scene we copy the temporary memory into this data pointer
byte* data; // Owner of the actual data
// @todo replace bools with bit field
// Changes on a as needed basis
uint32 vertex_size_static;
uint32 vertex_count_static;
bool static_content_changed;
// Changes every frame
uint32 vertex_size_dynamic;
uint32 vertex_count_dynamic;
bool dynamic_content_changed;
// Contains both static and dynamic content
@ -105,7 +107,7 @@ struct UILayout {
// This is very similar to the currently rendered UI output but may have some empty space between elements
// The reason for this is that some elements may need different vertex counts for different states (e.g. input field)
// WARNING: This memory is shared between different layouts
uint32 active_vertex_size;
uint32 active_vertex_count;
Vertex3DSamplerTextureColor* vertices_active; // Not the data owner (see data above)
// Used during the initialization so that every element knows where we currently are during the setup process

319
utils/MathUtils.h Normal file
View File

@ -0,0 +1,319 @@
/**
* Jingga
*
* @copyright Jingga
* @license OMS License 2.0
* @version 1.0.0
* @link https://jingga.app
*/
#ifndef TOS_UTILS_MATH_UTILS_H
#define TOS_UTILS_MATH_UTILS_H
#include "../stdlib/Types.h"
#include "../utils/TestUtils.h"
// WARNING: Don't use any of these functions yet. They are too imprecise and too slow
inline
f64 factorial(int32 n) {
f64 result = 1.0;
for (int32 i = 1; i <= n; ++i) {
result *= i;
}
return result;
}
inline
f32 sin_approx(f32 x) {
// Normalize x to the range [-π, π] for better accuracy
while (x > OMS_PI) {
x -= OMS_TWO_PI;
}
while (x < -OMS_PI) {
x += OMS_TWO_PI;
}
f32 x2 = x * x;
return x * (1.0f + x2 * (-1.0f / 6.0f + x2 * (1.0f / 120.0f + x2 * (-1.0f / 5040.0f + x2 * (1.0f / 362880.0f)))));
}
inline
f32 cos_approx(f32 x) {
return sin_approx(OMS_PI_OVER_TWO - x);
}
inline
f32 tan_approx(f32 x) {
return sin_approx(x) / cos_approx(x);
}
inline
f32 asin_approx(f32 x) {
// Undefined for |x| > 1
ASSERT_SIMPLE(x >= -1.0f && x <= 1.0f);
f32 result = x;
f32 term = x;
for (int32 i = 1; i <= 6; ++i) {
term *= x * x * (2 * i - 1) * (2 * i - 1) / ((2 * i) * (2 * i + 1));
result += term;
}
return result;
}
inline
f32 acos_approx(f32 x) {
// π/2 - asin_approx(x)
return OMS_PI_OVER_TWO - asin_approx(x);
}
inline
f32 atan_approx(f32 x) {
if (x > 1.0f) {
// π/2 - atan_approx(1/x)
return OMS_PI_OVER_TWO - atan_approx(1.0f / x);
} else if (x < -1.0f) {
// -π/2 - atan_approx(1/x)
return -OMS_PI_OVER_TWO - atan_approx(1.0f / x);
}
f32 result = x;
f32 term = x;
for (int32 i = 1; i <= 6; ++i) {
term *= -x * x;
result += term / (2.0f * i + 1);
}
return result;
}
inline
f32 sqrt_approx(f32 a) {
ASSERT_SIMPLE(a >= 0);
int32_t i = *(int32_t*)&a;
// Magic number for initial guess
i = 0x1FBD1DF5 + (i >> 1);
float x = *(float*)&i;
// Newton-Raphson iterations
x = 0.5f * (x + a / x);
x = 0.5f * (x + a / x);
x = 0.5f * (x + a / x);
return x;
}
inline
f32 rsqrt_approx(f32 a) {
ASSERT_SIMPLE(a >= 0);
// Initial guess using magic number (Quake III hack)
f32 x = a;
uint32 i = *(uint32 *)&x;
i = 0x5F3759DF - (i >> 1); // Magic number for initial guess
x = *(f32 *) &i;
// Newton-Raphson iterations
x = x * (1.5f - 0.5f * a * x * x);
x = x * (1.5f - 0.5f * a * x * x);
x = x * (1.5f - 0.5f * a * x * x);
return x;
}
inline
f32 exp_approx(f32 x) {
// Range reduction: e^x = e^(x / n)^n
const int32 n = 8;
x /= n;
// Taylor series approximation for e^x
f32 result = 1.0f;
f32 term = 1.0f;
for (int32 i = 1; i <= 10; ++i) {
term *= x / i;
result += term;
}
// Raise to the nth power
f32 final_result = result;
for (int32 i = 1; i < n; ++i) {
final_result *= result;
}
return final_result;
}
inline
f32 log_approx(f32 x) {
ASSERT_SIMPLE(x > 0);
// Polynomial approximation
f32 y = (x - 1) / (x + 1);
f32 y2 = y * y;
f32 result = y * (1.0f + y2 * (1.0f / 3.0f + y2 * (1.0f / 5.0f + y2 * (1.0f / 7.0f))));
return 2.0f * result;
}
inline
f32 pow_approx(f32 a, f32 b) {
if (a == 0.0f) {
return 0.0f;
}
return exp_approx(b * log_approx(a));
}
////////////////////////////////////////////////////////////////
inline
f64 sin_approx(f64 x) {
// Normalize x to the range [-π, π] for better accuracy
while (x > OMS_PI) {
x -= OMS_TWO_PI;
}
while (x < -OMS_PI) {
x += OMS_TWO_PI;
}
f64 x2 = x * x;
return x * (1.0 + x2 * (-1.0 / 6.0 + x2 * (1.0 / 120.0 + x2 * (-1.0 / 5040.0 + x2 * (1.0 / 362880.0)))));
}
inline
f64 cos_approx(f64 x) {
return sin_approx(OMS_PI_OVER_TWO - x);
}
inline
f64 tan_approx(f64 x) {
return sin_approx(x) / cos_approx(x);
}
inline
f64 asin_approx(f64 x) {
// Undefined for |x| > 1
ASSERT_SIMPLE(x >= -1.0 && x <= 1.0);
f64 result = x;
f64 term = x;
for (int32 i = 1; i <= 6; ++i) {
term *= x * x * (2 * i - 1) * (2 * i - 1) / ((2 * i) * (2 * i + 1));
result += term;
}
return result;
}
inline
f64 acos_approx(f64 x) {
// π/2 - asin_approx(x)
return OMS_PI_OVER_TWO - asin_approx(x);
}
inline
f64 atan_approx(f64 x) {
if (x > 1.0) {
// π/2 - atan_approx(1/x)
return OMS_PI_OVER_TWO - atan_approx(1.0 / x);
} else if (x < -1.0) {
// -π/2 - atan_approx(1/x)
return -OMS_PI_OVER_TWO - atan_approx(1.0 / x);
}
f64 result = x;
f64 term = x;
for (int32 i = 1; i <= 6; ++i) {
term *= -x * x;
result += term / (2 * i + 1);
}
return result;
}
inline
f64 sqrt_approx(f64 a) {
ASSERT_SIMPLE(a >= 0);
int64_t i = *(int64_t*)&a;
// Magic number for initial guess
i = 0x1FF7A3BEA91D9B1B + (i >> 1);
f64 x = *(f64*)&i;
// Newton-Raphson iterations
x = 0.5 * (x + a / x);
x = 0.5 * (x + a / x);
x = 0.5 * (x + a / x);
return x;
}
inline
f64 rsqrt_approx(f64 a) {
ASSERT_SIMPLE(a >= 0);
// Initial guess using magic number (Quake III hack)
f64 x = a;
uint64 i = *(uint64 *)&x;
i = 0x5fe6eb50c7b537a9 - (i >> 1); // Magic number for initial guess
x = *(f64 *) &i;
// Newton-Raphson iterations
x = x * (1.5 - 0.5 * a * x * x);
x = x * (1.5 - 0.5 * a * x * x);
x = x * (1.5 - 0.5 * a * x * x);
return x;
}
inline
f64 exp_approx(f64 x) {
// Range reduction: e^x = e^(x / n)^n
const int32 n = 8;
x /= n;
// Taylor series approximation for e^x
f64 result = 1.0;
f64 term = 1.0;
for (int32 i = 1; i <= 10; ++i) {
term *= x / i;
result += term;
}
// Raise to the nth power
f64 final_result = 1.0;
for (int32 i = 0; i < n; ++i) {
final_result *= result;
}
return final_result;
}
inline
f64 log_approx(f64 x) {
ASSERT_SIMPLE(x > 0);
// Polynomial approximation
f64 y = (x - 1) / (x + 1);
f64 y2 = y * y;
f64 result = y * (1.0 + y2 * (1.0 / 3.0 + y2 * (1.0 / 5.0 + y2 * (1.0 / 7.0))));
return 2.0 * result;
}
inline
f64 pow_approx(f64 a, f64 b) {
if (a == 0.0) {
return 0.0;
}
return exp_approx(b * log_approx(a));
}
#endif