diff --git a/.gitignore b/.gitignore index ef5bbb3..7b12ee2 100644 --- a/.gitignore +++ b/.gitignore @@ -7,4 +7,5 @@ bin/* *.res *.exe *.map -*.pdb \ No newline at end of file +*.pdb +*.o \ No newline at end of file diff --git a/architecture/x86/CpuInfo.cpp b/architecture/x86/CpuInfo.cpp index a8f86df..69e12f1 100644 --- a/architecture/x86/CpuInfo.cpp +++ b/architecture/x86/CpuInfo.cpp @@ -13,45 +13,15 @@ #include #include "../../stdlib/Types.h" #include "../CpuInfo.h" +#include "../../compiler/CompilerUtils.h" -#ifdef _MSC_VER - #include - - static inline - void cpuid(int32 cpuInfo[4], int32 function_id) { - __cpuidex(cpuInfo, function_id, 0); - } -#else - /* - #include - - static inline - void cpuid(int32 cpuInfo[4], int32 function_id) { - __cpuid(function_id, cpuInfo[0], cpuInfo[1], cpuInfo[2], cpuInfo[3]); - } - */ - - static inline - void cpuid(int32 cpuInfo[4], int32 function_id) { - asm volatile( - "cpuid" - : "=a" (cpuInfo[0]), "=b" (cpuInfo[1]), "=c" (cpuInfo[2]), "=d" (cpuInfo[3]) - : "a" (function_id) - ); - } -#endif - -inline -int32 svcntw() { - return 0; -} +#define svcntw() 0 uint64 cpu_info_features() { uint64 feature_bitfield = 0; int32 cpuInfo[4] = {0}; - // Query function 0x00000001 - cpuid(cpuInfo, 0x00000001); + compiler_cpuid(cpuInfo, 0x00000001); uint32 ecx = (uint32) cpuInfo[2]; uint32 edx = (uint32) cpuInfo[3]; @@ -84,8 +54,7 @@ uint64 cpu_info_features() { if (edx & (1 << 25)) feature_bitfield |= CPU_FEATURE_SSE; if (edx & (1 << 26)) feature_bitfield |= CPU_FEATURE_SSE2; - // Query function 0x00000007 - cpuid(cpuInfo, 0x00000007); + compiler_cpuid(cpuInfo, 0x00000007); uint32 ebx = (uint32) cpuInfo[1]; uint32 ecx7 = (uint32) cpuInfo[2]; @@ -107,8 +76,7 @@ uint64 cpu_info_features() { // Map ECX features if (ecx7 & (1 << 0)) feature_bitfield |= CPU_FEATURE_PREFETCHWT1; - // Query extended function 0x80000001 - cpuid(cpuInfo, 0x80000001); + compiler_cpuid(cpuInfo, 0x80000001); uint32 ecx81 = (uint32) cpuInfo[2]; uint32 edx81 = (uint32) cpuInfo[3]; @@ -141,7 +109,7 @@ void cpu_info_cache(byte level, CpuCacheInfo* cache) { cache->line_size = 0; int32 regs[4]; - cpuid(regs, (0x04 << 8) | level); + compiler_cpuid(regs, (0x04 << 8) | level); eax = regs[0]; ebx = regs[1]; ecx = regs[2]; diff --git a/architecture/x86/Intrinsics.h b/architecture/x86/Intrinsics.h index 5d8bc01..b763cc9 100644 --- a/architecture/x86/Intrinsics.h +++ b/architecture/x86/Intrinsics.h @@ -54,9 +54,9 @@ #define intrin_bits_count_32(data) _mm_popcnt_u32((data)) #define intrin_bits_count_64(data) _mm_popcnt_u64((data)) -#define intrin_prefetch_l1(mem) _mm_prefetch((mem), _MM_HINT_T0) -#define intrin_prefetch_l2(mem) _mm_prefetch((mem), _MM_HINT_T1) -#define intrin_prefetch_l3(mem) _mm_prefetch((mem), _MM_HINT_T2) +#define intrin_prefetch_l1(mem) _mm_prefetch((const char *) (mem), _MM_HINT_T0) +#define intrin_prefetch_l2(mem) _mm_prefetch((const char *) (mem), _MM_HINT_T1) +#define intrin_prefetch_l3(mem) _mm_prefetch((const char *) (mem), _MM_HINT_T2) inline uint64 intrin_timestamp_counter() { diff --git a/asset/Asset.h b/asset/Asset.h index 9d8fc3b..10af81d 100644 --- a/asset/Asset.h +++ b/asset/Asset.h @@ -23,9 +23,6 @@ struct Asset { // Could be 0 if there is no official id uint32 official_id; - // @performance Maybe if we would set the IS_LOADED_STATE in the enum as the highest bit we could use the state variable and check it with >= - atomic_32 int32 is_loaded; - // Describes how much ram/vram the asset uses // E.g. vram_size = 0 but ram_size > 0 means that it never uses any gpu memory uint32 ram_size; @@ -35,25 +32,29 @@ struct Asset { // Usually 1 but in some cases an ams may hold entities of variable chunk length // For textures for example a 128x128 is of size 1 but 256x256 is of size 4 + // Needs to be uint16 since we need more than 2^8 for very large textures (4K/8K textures) uint16 size; + // @performance Maybe if we would set the IS_LOADED_STATE in the enum as the highest bit we could use the state variable and check it with >= + atomic_8 int8 is_loaded; + // Which asset component is used byte component_id; byte state; - // Actual memory address and specific asset data - byte* self; - // Counts the references to this asset // e.g. textures or entity schemas (NOT entities themselves) uint16 reference_count; + // Actual memory address and specific asset data + byte* self; + // An asset can reference up to N other assets // This allows us to quickly update the other assets // Uses official_id // @performance This could potentially be bad because many assets will have 0 or only 1-4 references - uint32 references[12]; + uint32 references[8]; }; #endif \ No newline at end of file diff --git a/asset/AssetArchive.h b/asset/AssetArchive.h index 13832ed..c67251f 100644 --- a/asset/AssetArchive.h +++ b/asset/AssetArchive.h @@ -214,6 +214,7 @@ Asset* asset_archive_asset_load(const AssetArchive* archive, int32 id, AssetMana if (element->type == 0) { asset = thrd_ams_reserve_asset(ams, (byte) component_id, id_str, element->uncompressed); asset->official_id = id; + asset->ram_size = element->uncompressed; FileBody file = {}; file.content = asset->self; diff --git a/camera/Camera.h b/camera/Camera.h index d80b694..8cda618 100644 --- a/camera/Camera.h +++ b/camera/Camera.h @@ -39,8 +39,6 @@ struct Camera { f32 sensitivity; f32 zoom; - // @question Consider to make these f32 values. - // Yes, this uses obviously more space BUT we use these values very often in vertex calculations and always have to cast it uint16 viewport_width; uint16 viewport_height; @@ -49,9 +47,9 @@ struct Camera { f32 zfar; f32 aspect; - f32 view[16]; - f32 projection[16]; - f32 orth[16]; + alignas(64) f32 view[16]; + alignas(64) f32 projection[16]; + alignas(64) f32 orth[16]; }; void @@ -228,11 +226,12 @@ void camera_movement(Camera* camera, CameraMovement* movement, f32 dt, bool rela inline void camera_orth_matrix_lh(Camera* __restrict camera) { - mat4_identity(camera->orth); + //mat4_identity(camera->orth); + camera->orth[15] = 1.0f; mat4_ortho_sparse_lh( camera->orth, - 0, camera->viewport_width, - 0, camera->viewport_height, + 0.0f, (f32) camera->viewport_width, + 0.0f, (f32) camera->viewport_height, camera->znear, camera->zfar ); @@ -241,11 +240,12 @@ void camera_orth_matrix_lh(Camera* __restrict camera) inline void camera_orth_matrix_rh(Camera* __restrict camera) { - mat4_identity(camera->orth); + //mat4_identity(camera->orth); + camera->orth[15] = 1.0f; mat4_ortho_sparse_rh( camera->orth, - 0, camera->viewport_width, - 0, camera->viewport_height, + 0.0f, (f32) camera->viewport_width, + 0.0f, (f32) camera->viewport_height, camera->znear, camera->zfar ); @@ -254,7 +254,8 @@ void camera_orth_matrix_rh(Camera* __restrict camera) inline void camera_projection_matrix_lh(Camera* __restrict camera) { - mat4_identity(camera->projection); + //mat4_identity(camera->projection); + camera->projection[15] = 1.0f; mat4_perspective_sparse_lh( camera->projection, camera->fov, @@ -267,7 +268,8 @@ void camera_projection_matrix_lh(Camera* __restrict camera) inline void camera_projection_matrix_rh(Camera* __restrict camera) { - mat4_identity(camera->projection); + //mat4_identity(camera->projection); + camera->projection[15] = 1.0f; mat4_perspective_sparse_rh( camera->projection, camera->fov, diff --git a/command/AppCmdBuffer.cpp b/command/AppCmdBuffer.cpp index 793a95e..d1762d6 100644 --- a/command/AppCmdBuffer.cpp +++ b/command/AppCmdBuffer.cpp @@ -126,7 +126,7 @@ Asset* cmd_texture_create(AppCmdBuffer* __restrict cb, Command* __restrict cmd) } Texture* texture = (Texture *) asset->self; - if (cb->gpu_api == GPU_API_TYPE_OPENGL + if (cb->gpu_api_type == GPU_API_TYPE_OPENGL && !(texture->image.image_settings & IMAGE_SETTING_BOTTOM_TO_TOP) ) { image_flip_vertical(cb->thrd_mem_vol, &texture->image); @@ -157,7 +157,7 @@ Asset* cmd_font_create(AppCmdBuffer* __restrict cb, Command* __restrict cmd) } Font* font = (Font *) asset->self; - if (cb->gpu_api == GPU_API_TYPE_OPENGL) { + if (cb->gpu_api_type == GPU_API_TYPE_OPENGL) { font_invert_coordinates(font); } @@ -369,7 +369,7 @@ inline Asset* cmd_texture_load_sync(AppCmdBuffer* cb, int32 asset_id) { // Setup basic texture Texture* texture = (Texture *) asset->self; - if (cb->gpu_api == GPU_API_TYPE_OPENGL + if (cb->gpu_api_type == GPU_API_TYPE_OPENGL && !(texture->image.image_settings & IMAGE_SETTING_BOTTOM_TO_TOP) ) { image_flip_vertical(cb->mem_vol, &texture->image); @@ -393,7 +393,7 @@ inline Asset* cmd_texture_load_sync(AppCmdBuffer* cb, const char* name) { // Setup basic texture Texture* texture = (Texture *) asset->self; - if (cb->gpu_api == GPU_API_TYPE_OPENGL + if (cb->gpu_api_type == GPU_API_TYPE_OPENGL && !(texture->image.image_settings & IMAGE_SETTING_BOTTOM_TO_TOP) ) { image_flip_vertical(cb->mem_vol, &texture->image); @@ -419,7 +419,7 @@ inline Asset* cmd_font_load_sync(AppCmdBuffer* cb, int32 asset_id) { // Setup font Font* font = (Font *) asset->self; - if (cb->gpu_api == GPU_API_TYPE_OPENGL) { + if (cb->gpu_api_type == GPU_API_TYPE_OPENGL) { font_invert_coordinates(font); } @@ -441,7 +441,7 @@ inline Asset* cmd_font_load_sync(AppCmdBuffer* cb, const char* name) { // Setup font Font* font = (Font *) asset->self; - if (cb->gpu_api == GPU_API_TYPE_OPENGL) { + if (cb->gpu_api_type == GPU_API_TYPE_OPENGL) { font_invert_coordinates(font); } @@ -483,10 +483,9 @@ UIThemeStyle* cmd_theme_load_sync( inline void cmd_layout_populate_sync( AppCmdBuffer*, - UILayout* layout, const UIThemeStyle* theme, - const Camera* camera + UILayout* layout, const UIThemeStyle* theme ) { - layout_from_theme(layout, theme, camera); + layout_from_theme(layout, theme); } inline @@ -504,9 +503,16 @@ UILayout* cmd_ui_load_sync( return NULL; } - cmd_layout_populate_sync(cb, layout, general_theme, camera); + cmd_layout_populate_sync(cb, layout, general_theme); cmd_theme_load_sync(cb, theme, theme_path); - cmd_layout_populate_sync(cb, layout, theme, camera); + cmd_layout_populate_sync(cb, layout, theme); + + UIElement* root = layout_get_element(layout, "root"); + UIWindow* default_style = (UIWindow *) layout_get_element_style(layout, root, UI_STYLE_TYPE_DEFAULT); + if (default_style) { + default_style->dimension.dimension.width = camera->viewport_width; + default_style->dimension.dimension.height = camera->viewport_height; + } return layout; } diff --git a/command/AppCmdBuffer.h b/command/AppCmdBuffer.h index 733a98c..fc38d49 100644 --- a/command/AppCmdBuffer.h +++ b/command/AppCmdBuffer.h @@ -43,17 +43,16 @@ struct AppCmdBuffer { Queue* assets_to_load; Queue* files_to_load; AudioMixer* mixer; - GpuApiType gpu_api; + GpuApiType gpu_api_type; + void* gpu_api; }; #if OPENGL #include "../gpuapi/opengl/AppCmdBuffer.h" #elif VULKAN - inline void* cmd_shader_load(AppCmdBuffer*, Command*) { return NULL; } - inline void* cmd_shader_load_sync(AppCmdBuffer*, void*, int32*) { return NULL; } + #include "../gpuapi/vulkan/AppCmdBuffer.h" #elif DIRECTX - inline void* cmd_shader_load(AppCmdBuffer*, Command*) { return NULL; } - inline void* cmd_shader_load_sync(AppCmdBuffer*, void*, int32*) { return NULL; } + #include "../gpuapi/direct3d/AppCmdBuffer.h" #else inline void* cmd_shader_load(AppCmdBuffer*, Command*) { return NULL; } inline void* cmd_shader_load_sync(AppCmdBuffer*, void*, int32*) { return NULL; } diff --git a/compiler/gcc/Atomic.h b/compiler/gcc/Atomic.h index b1df890..2121d40 100644 --- a/compiler/gcc/Atomic.h +++ b/compiler/gcc/Atomic.h @@ -10,195 +10,396 @@ #define TOS_COMPILER_GCC_ATOMIC_H #include "../../stdlib/Types.h" +#include "CompilerUtils.h" #include -inline void atomic_set_relaxed(void** target, void* value) { __atomic_store_n(target, value, __ATOMIC_RELAXED); } -inline void* atomic_get_relaxed(void** target) { return __atomic_load_n(target, __ATOMIC_RELAXED); } -inline void atomic_set_relaxed(volatile int32* value, int32 new_value) { __atomic_store_n(value, new_value, __ATOMIC_RELAXED); } -inline void atomic_set_relaxed(volatile int64* value, int64 new_value) { __atomic_store_n(value, new_value, __ATOMIC_RELAXED); } -inline int32 atomic_fetch_set_relaxed(volatile int32* value, int32 new_value) { return __atomic_exchange_n(value, new_value, __ATOMIC_RELAXED); } -inline int64 atomic_fetch_set_relaxed(volatile int64* value, int64 new_value) { return __atomic_exchange_n(value, new_value, __ATOMIC_RELAXED); } -inline int32 atomic_get_relaxed(volatile int32* value) { return __atomic_load_n((int32 *) value, __ATOMIC_RELAXED); } -inline int64 atomic_get_relaxed(volatile int64* value) { return __atomic_load_n((int64 *) value, __ATOMIC_RELAXED); } -inline void atomic_increment_relaxed(volatile int32* value) { __atomic_add_fetch(value, 1, __ATOMIC_RELAXED); } -inline void atomic_decrement_relaxed(volatile int32* value) { __atomic_sub_fetch(value, 1, __ATOMIC_RELAXED); } -inline void atomic_increment_relaxed(volatile int64* value) { __atomic_add_fetch(value, 1, __ATOMIC_RELAXED); } -inline void atomic_decrement_relaxed(volatile int64* value) { __atomic_sub_fetch(value, 1, __ATOMIC_RELAXED); } -inline void atomic_add_relaxed(volatile int32* value, int32 increment) { __atomic_add_fetch(value, increment, __ATOMIC_RELAXED); } -inline void atomic_sub_relaxed(volatile int32* value, int32 decrement) { __atomic_sub_fetch(value, decrement, __ATOMIC_RELAXED); } -inline void atomic_add_relaxed(volatile int64* value, int64 increment) { __atomic_add_fetch(value, increment, __ATOMIC_RELAXED); } -inline void atomic_sub_relaxed(volatile int64* value, int64 decrement) { __atomic_sub_fetch(value, decrement, __ATOMIC_RELAXED); } -inline int32 atomic_compare_exchange_weak_relaxed(volatile int32* value, int32* expected, int32 desired) { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED); return *expected; } -inline int32 atomic_fetch_add_relaxed(volatile int32* value, int32 operand) { return __atomic_add_fetch(value, operand, __ATOMIC_RELAXED); } -inline int32 atomic_fetch_sub_relaxed(volatile int32* value, int32 operand) { return __atomic_sub_fetch(value, operand, __ATOMIC_RELAXED); } -inline int64 atomic_fetch_add_relaxed(volatile int64* value, int64 operand) { return __atomic_add_fetch(value, operand, __ATOMIC_RELAXED); } -inline int64 atomic_fetch_sub_relaxed(volatile int64* value, int64 operand) { return __atomic_sub_fetch(value, operand, __ATOMIC_RELAXED); } -inline void atomic_set_relaxed(volatile uint32* value, uint32 new_value) { __atomic_store_n(value, new_value, __ATOMIC_RELAXED); } -inline void atomic_set_relaxed(volatile uint64* value, uint64 new_value) { __atomic_store_n(value, new_value, __ATOMIC_RELAXED); } -inline uint32 atomic_fetch_set_relaxed(volatile uint32* value, uint32 new_value) { return __atomic_exchange_n(value, new_value, __ATOMIC_RELAXED); } -inline uint64 atomic_fetch_set_relaxed(volatile uint64* value, uint64 new_value) { return __atomic_exchange_n(value, new_value, __ATOMIC_RELAXED); } -inline uint32 atomic_get_relaxed(volatile uint32* value) { return __atomic_load_n((uint32 *) value, __ATOMIC_RELAXED); } -inline uint64 atomic_get_relaxed(volatile uint64* value) { return __atomic_load_n((uint64 *) value, __ATOMIC_RELAXED); } -inline void atomic_increment_relaxed(volatile uint32* value) { __atomic_add_fetch(value, 1, __ATOMIC_RELAXED); } -inline void atomic_decrement_relaxed(volatile uint32* value) { __atomic_sub_fetch(value, 1, __ATOMIC_RELAXED); } -inline void atomic_increment_relaxed(volatile uint64* value) { __atomic_add_fetch(value, 1, __ATOMIC_RELAXED); } -inline void atomic_decrement_relaxed(volatile uint64* value) { __atomic_sub_fetch(value, 1, __ATOMIC_RELAXED); } -inline void atomic_add_relaxed(volatile uint32* value, uint32 increment) { __atomic_add_fetch(value, increment, __ATOMIC_RELAXED); } -inline void atomic_sub_relaxed(volatile uint32* value, uint32 decrement) { __atomic_sub_fetch(value, decrement, __ATOMIC_RELAXED); } -inline uint32 atomic_compare_exchange_weak_relaxed(volatile uint32* value, uint32* expected, uint32 desired) { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED); return *expected; } -inline uint32 atomic_fetch_add_relaxed(volatile uint32* value, uint32 operand) { return __atomic_add_fetch(value, operand, __ATOMIC_RELAXED); } -inline uint32 atomic_fetch_sub_relaxed(volatile uint32* value, uint32 operand) { return __atomic_sub_fetch(value, operand, __ATOMIC_RELAXED); } -inline uint64 atomic_fetch_add_relaxed(volatile uint64* value, uint64 operand) { return __atomic_add_fetch(value, operand, __ATOMIC_RELAXED); } -inline uint64 atomic_fetch_sub_relaxed(volatile uint64* value, uint64 operand) { return __atomic_sub_fetch(value, operand, __ATOMIC_RELAXED); } -inline void atomic_and_relaxed(volatile uint32* value, uint32 mask) { __atomic_fetch_and(value, mask, __ATOMIC_RELAXED); } -inline void atomic_and_relaxed(volatile int32* value, int32 mask) { __atomic_fetch_and(value, mask, __ATOMIC_RELAXED); } -inline void atomic_and_relaxed(volatile uint64* value, uint64 mask) { __atomic_fetch_and(value, mask, __ATOMIC_RELAXED); } -inline void atomic_and_relaxed(volatile int64* value, int64 mask) { __atomic_fetch_and(value, mask, __ATOMIC_RELAXED); } -inline void atomic_or_relaxed(volatile uint32* value, uint32 mask) { __atomic_fetch_or(value, mask, __ATOMIC_RELAXED); } -inline void atomic_or_relaxed(volatile int32* value, int32 mask) { __atomic_fetch_or(value, mask, __ATOMIC_RELAXED); } -inline void atomic_or_relaxed(volatile uint64* value, uint64 mask) { __atomic_fetch_or(value, mask, __ATOMIC_RELAXED); } -inline void atomic_or_relaxed(volatile int64* value, int64 mask) { __atomic_fetch_or(value, mask, __ATOMIC_RELAXED); } +FORCE_INLINE void atomic_set_relaxed(void** target, void* value) { __atomic_store_n(target, value, __ATOMIC_RELAXED); } +FORCE_INLINE void* atomic_get_relaxed(void** target) { return __atomic_load_n(target, __ATOMIC_RELAXED); } +FORCE_INLINE void atomic_set_relaxed(volatile int8* value, int8 new_value) { __atomic_store_n(value, new_value, __ATOMIC_RELAXED); } +FORCE_INLINE void atomic_set_relaxed(volatile int16* value, int16 new_value) { __atomic_store_n(value, new_value, __ATOMIC_RELAXED); } +FORCE_INLINE void atomic_set_relaxed(volatile int32* value, int32 new_value) { __atomic_store_n(value, new_value, __ATOMIC_RELAXED); } +FORCE_INLINE void atomic_set_relaxed(volatile int64* value, int64 new_value) { __atomic_store_n(value, new_value, __ATOMIC_RELAXED); } +FORCE_INLINE int8 atomic_fetch_set_relaxed(volatile int8* value, int8 new_value) { return __atomic_exchange_n(value, new_value, __ATOMIC_RELAXED); } +FORCE_INLINE int16 atomic_fetch_set_relaxed(volatile int16* value, int16 new_value) { return __atomic_exchange_n(value, new_value, __ATOMIC_RELAXED); } +FORCE_INLINE int32 atomic_fetch_set_relaxed(volatile int32* value, int32 new_value) { return __atomic_exchange_n(value, new_value, __ATOMIC_RELAXED); } +FORCE_INLINE int64 atomic_fetch_set_relaxed(volatile int64* value, int64 new_value) { return __atomic_exchange_n(value, new_value, __ATOMIC_RELAXED); } +FORCE_INLINE int8 atomic_get_relaxed(volatile int8* value) { return __atomic_load_n((int8 *) value, __ATOMIC_RELAXED); } +FORCE_INLINE int16 atomic_get_relaxed(volatile int16* value) { return __atomic_load_n((int16 *) value, __ATOMIC_RELAXED); } +FORCE_INLINE int32 atomic_get_relaxed(volatile int32* value) { return __atomic_load_n((int32 *) value, __ATOMIC_RELAXED); } +FORCE_INLINE int64 atomic_get_relaxed(volatile int64* value) { return __atomic_load_n((int64 *) value, __ATOMIC_RELAXED); } +FORCE_INLINE void atomic_increment_relaxed(volatile int8* value) { __atomic_add_fetch(value, 1, __ATOMIC_RELAXED); } +FORCE_INLINE void atomic_decrement_relaxed(volatile int8* value) { __atomic_sub_fetch(value, 1, __ATOMIC_RELAXED); } +FORCE_INLINE void atomic_increment_relaxed(volatile int16* value) { __atomic_add_fetch(value, 1, __ATOMIC_RELAXED); } +FORCE_INLINE void atomic_decrement_relaxed(volatile int16* value) { __atomic_sub_fetch(value, 1, __ATOMIC_RELAXED); } +FORCE_INLINE void atomic_increment_relaxed(volatile int32* value) { __atomic_add_fetch(value, 1, __ATOMIC_RELAXED); } +FORCE_INLINE void atomic_decrement_relaxed(volatile int32* value) { __atomic_sub_fetch(value, 1, __ATOMIC_RELAXED); } +FORCE_INLINE void atomic_increment_relaxed(volatile int64* value) { __atomic_add_fetch(value, 1, __ATOMIC_RELAXED); } +FORCE_INLINE void atomic_decrement_relaxed(volatile int64* value) { __atomic_sub_fetch(value, 1, __ATOMIC_RELAXED); } +FORCE_INLINE void atomic_add_relaxed(volatile int8* value, int8 increment) { __atomic_add_fetch(value, increment, __ATOMIC_RELAXED); } +FORCE_INLINE void atomic_sub_relaxed(volatile int8* value, int8 decrement) { __atomic_sub_fetch(value, decrement, __ATOMIC_RELAXED); } +FORCE_INLINE void atomic_add_relaxed(volatile int16* value, int16 increment) { __atomic_add_fetch(value, increment, __ATOMIC_RELAXED); } +FORCE_INLINE void atomic_sub_relaxed(volatile int16* value, int16 decrement) { __atomic_sub_fetch(value, decrement, __ATOMIC_RELAXED); } +FORCE_INLINE void atomic_add_relaxed(volatile int32* value, int32 increment) { __atomic_add_fetch(value, increment, __ATOMIC_RELAXED); } +FORCE_INLINE void atomic_sub_relaxed(volatile int32* value, int32 decrement) { __atomic_sub_fetch(value, decrement, __ATOMIC_RELAXED); } +FORCE_INLINE void atomic_add_relaxed(volatile int64* value, int64 increment) { __atomic_add_fetch(value, increment, __ATOMIC_RELAXED); } +FORCE_INLINE void atomic_sub_relaxed(volatile int64* value, int64 decrement) { __atomic_sub_fetch(value, decrement, __ATOMIC_RELAXED); } +FORCE_INLINE f32 atomic_compare_exchange_weak_relaxed(volatile f32* value, f32* expected, f32 desired) { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED); return *expected; } +FORCE_INLINE f64 atomic_compare_exchange_weak_relaxed(volatile f64* value, f64* expected, f64 desired) { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED); return *expected; } +FORCE_INLINE int32 atomic_compare_exchange_weak_relaxed(volatile int32* value, int32* expected, int32 desired) { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED); return *expected; } +FORCE_INLINE int64 atomic_compare_exchange_weak_relaxed(volatile int64* value, int64* expected, int64 desired) { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED); return *expected; } +FORCE_INLINE int8 atomic_fetch_add_relaxed(volatile int8* value, int8 operand) { return __atomic_add_fetch(value, operand, __ATOMIC_RELAXED); } +FORCE_INLINE int8 atomic_fetch_sub_relaxed(volatile int8* value, int8 operand) { return __atomic_sub_fetch(value, operand, __ATOMIC_RELAXED); } +FORCE_INLINE int16 atomic_fetch_add_relaxed(volatile int16* value, int16 operand) { return __atomic_add_fetch(value, operand, __ATOMIC_RELAXED); } +FORCE_INLINE int16 atomic_fetch_sub_relaxed(volatile int16* value, int16 operand) { return __atomic_sub_fetch(value, operand, __ATOMIC_RELAXED); } +FORCE_INLINE int32 atomic_fetch_add_relaxed(volatile int32* value, int32 operand) { return __atomic_add_fetch(value, operand, __ATOMIC_RELAXED); } +FORCE_INLINE int32 atomic_fetch_sub_relaxed(volatile int32* value, int32 operand) { return __atomic_sub_fetch(value, operand, __ATOMIC_RELAXED); } +FORCE_INLINE int64 atomic_fetch_add_relaxed(volatile int64* value, int64 operand) { return __atomic_add_fetch(value, operand, __ATOMIC_RELAXED); } +FORCE_INLINE int64 atomic_fetch_sub_relaxed(volatile int64* value, int64 operand) { return __atomic_sub_fetch(value, operand, __ATOMIC_RELAXED); } +FORCE_INLINE void atomic_set_relaxed(volatile uint8* value, uint8 new_value) { __atomic_store_n(value, new_value, __ATOMIC_RELAXED); } +FORCE_INLINE void atomic_set_relaxed(volatile uint16* value, uint16 new_value) { __atomic_store_n(value, new_value, __ATOMIC_RELAXED); } +FORCE_INLINE void atomic_set_relaxed(volatile uint32* value, uint32 new_value) { __atomic_store_n(value, new_value, __ATOMIC_RELAXED); } +FORCE_INLINE void atomic_set_relaxed(volatile uint64* value, uint64 new_value) { __atomic_store_n(value, new_value, __ATOMIC_RELAXED); } +FORCE_INLINE uint8 atomic_fetch_set_relaxed(volatile uint8* value, uint8 new_value) { return __atomic_exchange_n(value, new_value, __ATOMIC_RELAXED); } +FORCE_INLINE uint16 atomic_fetch_set_relaxed(volatile uint16* value, uint16 new_value) { return __atomic_exchange_n(value, new_value, __ATOMIC_RELAXED); } +FORCE_INLINE uint32 atomic_fetch_set_relaxed(volatile uint32* value, uint32 new_value) { return __atomic_exchange_n(value, new_value, __ATOMIC_RELAXED); } +FORCE_INLINE uint64 atomic_fetch_set_relaxed(volatile uint64* value, uint64 new_value) { return __atomic_exchange_n(value, new_value, __ATOMIC_RELAXED); } +FORCE_INLINE uint8 atomic_get_relaxed(volatile uint8* value) { return __atomic_load_n(value, __ATOMIC_RELAXED); } +FORCE_INLINE uint16 atomic_get_relaxed(volatile uint16* value) { return __atomic_load_n(value, __ATOMIC_RELAXED); } +FORCE_INLINE uint32 atomic_get_relaxed(volatile uint32* value) { return __atomic_load_n(value, __ATOMIC_RELAXED); } +FORCE_INLINE uint64 atomic_get_relaxed(volatile uint64* value) { return __atomic_load_n(value, __ATOMIC_RELAXED); } +FORCE_INLINE void atomic_increment_relaxed(volatile uint8* value) { __atomic_add_fetch(value, 1, __ATOMIC_RELAXED); } +FORCE_INLINE void atomic_decrement_relaxed(volatile uint8* value) { __atomic_sub_fetch(value, 1, __ATOMIC_RELAXED); } +FORCE_INLINE void atomic_increment_relaxed(volatile uint16* value) { __atomic_add_fetch(value, 1, __ATOMIC_RELAXED); } +FORCE_INLINE void atomic_decrement_relaxed(volatile uint16* value) { __atomic_sub_fetch(value, 1, __ATOMIC_RELAXED); } +FORCE_INLINE void atomic_increment_relaxed(volatile uint32* value) { __atomic_add_fetch(value, 1, __ATOMIC_RELAXED); } +FORCE_INLINE void atomic_decrement_relaxed(volatile uint32* value) { __atomic_sub_fetch(value, 1, __ATOMIC_RELAXED); } +FORCE_INLINE void atomic_increment_relaxed(volatile uint64* value) { __atomic_add_fetch(value, 1, __ATOMIC_RELAXED); } +FORCE_INLINE void atomic_decrement_relaxed(volatile uint64* value) { __atomic_sub_fetch(value, 1, __ATOMIC_RELAXED); } +FORCE_INLINE void atomic_add_relaxed(volatile uint8* value, uint8 increment) { __atomic_add_fetch(value, increment, __ATOMIC_RELAXED); } +FORCE_INLINE void atomic_sub_relaxed(volatile uint8* value, uint8 decrement) { __atomic_sub_fetch(value, decrement, __ATOMIC_RELAXED); } +FORCE_INLINE void atomic_add_relaxed(volatile uint16* value, uint16 increment) { __atomic_add_fetch(value, increment, __ATOMIC_RELAXED); } +FORCE_INLINE void atomic_sub_relaxed(volatile uint16* value, uint16 decrement) { __atomic_sub_fetch(value, decrement, __ATOMIC_RELAXED); } +FORCE_INLINE void atomic_add_relaxed(volatile uint32* value, uint32 increment) { __atomic_add_fetch(value, increment, __ATOMIC_RELAXED); } +FORCE_INLINE void atomic_sub_relaxed(volatile uint32* value, uint32 decrement) { __atomic_sub_fetch(value, decrement, __ATOMIC_RELAXED); } +FORCE_INLINE void atomic_add_relaxed(volatile uint64* value, uint64 increment) { __atomic_add_fetch(value, increment, __ATOMIC_RELAXED); } +FORCE_INLINE void atomic_sub_relaxed(volatile uint64* value, uint64 decrement) { __atomic_sub_fetch(value, decrement, __ATOMIC_RELAXED); } +FORCE_INLINE uint32 atomic_compare_exchange_weak_relaxed(volatile uint32* value, uint32* expected, uint32 desired) { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED); return *expected; } +FORCE_INLINE uint64 atomic_compare_exchange_weak_relaxed(volatile uint64* value, uint64* expected, uint64 desired) { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED); return *expected; } +FORCE_INLINE uint8 atomic_fetch_add_relaxed(volatile uint8* value, uint8 operand) { return __atomic_add_fetch(value, operand, __ATOMIC_RELAXED); } +FORCE_INLINE uint8 atomic_fetch_sub_relaxed(volatile uint8* value, uint8 operand) { return __atomic_sub_fetch(value, operand, __ATOMIC_RELAXED); } +FORCE_INLINE uint16 atomic_fetch_add_relaxed(volatile uint16* value, uint16 operand) { return __atomic_add_fetch(value, operand, __ATOMIC_RELAXED); } +FORCE_INLINE uint16 atomic_fetch_sub_relaxed(volatile uint16* value, uint16 operand) { return __atomic_sub_fetch(value, operand, __ATOMIC_RELAXED); } +FORCE_INLINE uint32 atomic_fetch_add_relaxed(volatile uint32* value, uint32 operand) { return __atomic_add_fetch(value, operand, __ATOMIC_RELAXED); } +FORCE_INLINE uint32 atomic_fetch_sub_relaxed(volatile uint32* value, uint32 operand) { return __atomic_sub_fetch(value, operand, __ATOMIC_RELAXED); } +FORCE_INLINE uint64 atomic_fetch_add_relaxed(volatile uint64* value, uint64 operand) { return __atomic_add_fetch(value, operand, __ATOMIC_RELAXED); } +FORCE_INLINE uint64 atomic_fetch_sub_relaxed(volatile uint64* value, uint64 operand) { return __atomic_sub_fetch(value, operand, __ATOMIC_RELAXED); } +FORCE_INLINE void atomic_and_relaxed(volatile uint8* value, uint8 mask) { __atomic_fetch_and(value, mask, __ATOMIC_RELAXED); } +FORCE_INLINE void atomic_and_relaxed(volatile int8* value, int8 mask) { __atomic_fetch_and(value, mask, __ATOMIC_RELAXED); } +FORCE_INLINE void atomic_and_relaxed(volatile uint16* value, uint16 mask) { __atomic_fetch_and(value, mask, __ATOMIC_RELAXED); } +FORCE_INLINE void atomic_and_relaxed(volatile int16* value, int16 mask) { __atomic_fetch_and(value, mask, __ATOMIC_RELAXED); } +FORCE_INLINE void atomic_and_relaxed(volatile uint32* value, uint32 mask) { __atomic_fetch_and(value, mask, __ATOMIC_RELAXED); } +FORCE_INLINE void atomic_and_relaxed(volatile int32* value, int32 mask) { __atomic_fetch_and(value, mask, __ATOMIC_RELAXED); } +FORCE_INLINE void atomic_and_relaxed(volatile uint64* value, uint64 mask) { __atomic_fetch_and(value, mask, __ATOMIC_RELAXED); } +FORCE_INLINE void atomic_and_relaxed(volatile int64* value, int64 mask) { __atomic_fetch_and(value, mask, __ATOMIC_RELAXED); } +FORCE_INLINE void atomic_or_relaxed(volatile uint8* value, uint8 mask) { __atomic_fetch_or(value, mask, __ATOMIC_RELAXED); } +FORCE_INLINE void atomic_or_relaxed(volatile int8* value, int8 mask) { __atomic_fetch_or(value, mask, __ATOMIC_RELAXED); } +FORCE_INLINE void atomic_or_relaxed(volatile uint16* value, uint16 mask) { __atomic_fetch_or(value, mask, __ATOMIC_RELAXED); } +FORCE_INLINE void atomic_or_relaxed(volatile int16* value, int16 mask) { __atomic_fetch_or(value, mask, __ATOMIC_RELAXED); } +FORCE_INLINE void atomic_or_relaxed(volatile uint32* value, uint32 mask) { __atomic_fetch_or(value, mask, __ATOMIC_RELAXED); } +FORCE_INLINE void atomic_or_relaxed(volatile int32* value, int32 mask) { __atomic_fetch_or(value, mask, __ATOMIC_RELAXED); } +FORCE_INLINE void atomic_or_relaxed(volatile uint64* value, uint64 mask) { __atomic_fetch_or(value, mask, __ATOMIC_RELAXED); } +FORCE_INLINE void atomic_or_relaxed(volatile int64* value, int64 mask) { __atomic_fetch_or(value, mask, __ATOMIC_RELAXED); } -inline void atomic_set_acquire(void** target, void* value) { __atomic_store_n(target, value, __ATOMIC_ACQUIRE); } -inline void* atomic_get_acquire(void** target) { return __atomic_load_n(target, __ATOMIC_ACQUIRE); } -inline void atomic_set_acquire(volatile int32* value, int32 new_value) { __atomic_store_n(value, new_value, __ATOMIC_ACQUIRE); } -inline void atomic_set_acquire(volatile int64* value, int64 new_value) { __atomic_store_n(value, new_value, __ATOMIC_ACQUIRE); } -inline int32 atomic_fetch_set_acquire(volatile int32* value, int32 new_value) { return __atomic_exchange_n(value, new_value, __ATOMIC_ACQUIRE); } -inline int64 atomic_fetch_set_acquire(volatile int64* value, int64 new_value) { return __atomic_exchange_n(value, new_value, __ATOMIC_ACQUIRE); } -inline int32 atomic_get_acquire(volatile int32* value) { return __atomic_load_n((int32 *) value, __ATOMIC_ACQUIRE); } -inline int64 atomic_get_acquire(volatile int64* value) { return __atomic_load_n((int64 *) value, __ATOMIC_ACQUIRE); } -inline void atomic_increment_acquire(volatile int32* value) { __atomic_add_fetch(value, 1, __ATOMIC_ACQUIRE); } -inline void atomic_decrement_acquire(volatile int32* value) { __atomic_sub_fetch(value, 1, __ATOMIC_ACQUIRE); } -inline void atomic_increment_acquire(volatile int64* value) { __atomic_add_fetch(value, 1, __ATOMIC_ACQUIRE); } -inline void atomic_decrement_acquire(volatile int64* value) { __atomic_sub_fetch(value, 1, __ATOMIC_ACQUIRE); } -inline void atomic_add_acquire(volatile int32* value, int32 increment) { __atomic_add_fetch(value, increment, __ATOMIC_ACQUIRE); } -inline void atomic_sub_acquire(volatile int32* value, int32 decrement) { __atomic_sub_fetch(value, decrement, __ATOMIC_ACQUIRE); } -inline void atomic_add_acquire(volatile int64* value, int64 increment) { __atomic_add_fetch(value, increment, __ATOMIC_ACQUIRE); } -inline void atomic_sub_acquire(volatile int64* value, int64 decrement) { __atomic_sub_fetch(value, decrement, __ATOMIC_ACQUIRE); } -inline int32 atomic_compare_exchange_weak_acquire(volatile int32* value, int32* expected, int32 desired) { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE); return *expected; } -inline int32 atomic_fetch_add_acquire(volatile int32* value, int32 operand) { return __atomic_add_fetch(value, operand, __ATOMIC_ACQUIRE); } -inline int32 atomic_fetch_sub_acquire(volatile int32* value, int32 operand) { return __atomic_sub_fetch(value, operand, __ATOMIC_ACQUIRE); } -inline int64 atomic_fetch_add_acquire(volatile int64* value, int64 operand) { return __atomic_add_fetch(value, operand, __ATOMIC_ACQUIRE); } -inline int64 atomic_fetch_sub_acquire(volatile int64* value, int64 operand) { return __atomic_sub_fetch(value, operand, __ATOMIC_ACQUIRE); } -inline void atomic_set_acquire(volatile uint32* value, uint32 new_value) { __atomic_store_n(value, new_value, __ATOMIC_ACQUIRE); } -inline void atomic_set_acquire(volatile uint64* value, uint64 new_value) { __atomic_store_n(value, new_value, __ATOMIC_ACQUIRE); } -inline uint32 atomic_fetch_set_acquire(volatile uint32* value, uint32 new_value) { return __atomic_exchange_n(value, new_value, __ATOMIC_ACQUIRE); } -inline uint64 atomic_fetch_set_acquire(volatile uint64* value, uint64 new_value) { return __atomic_exchange_n(value, new_value, __ATOMIC_ACQUIRE); } -inline uint32 atomic_get_acquire(volatile uint32* value) { return __atomic_load_n((uint32 *) value, __ATOMIC_ACQUIRE); } -inline uint64 atomic_get_acquire(volatile uint64* value) { return __atomic_load_n((uint64 *) value, __ATOMIC_ACQUIRE); } -inline void atomic_increment_acquire(volatile uint32* value) { __atomic_add_fetch(value, 1, __ATOMIC_ACQUIRE); } -inline void atomic_decrement_acquire(volatile uint32* value) { __atomic_sub_fetch(value, 1, __ATOMIC_ACQUIRE); } -inline void atomic_increment_acquire(volatile uint64* value) { __atomic_add_fetch(value, 1, __ATOMIC_ACQUIRE); } -inline void atomic_decrement_acquire(volatile uint64* value) { __atomic_sub_fetch(value, 1, __ATOMIC_ACQUIRE); } -inline void atomic_add_acquire(volatile uint32* value, uint32 increment) { __atomic_add_fetch(value, increment, __ATOMIC_ACQUIRE); } -inline void atomic_sub_acquire(volatile uint32* value, uint32 decrement) { __atomic_sub_fetch(value, decrement, __ATOMIC_ACQUIRE); } -inline uint32 atomic_compare_exchange_weak_acquire(volatile uint32* value, uint32* expected, uint32 desired) { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE); return *expected; } -inline uint32 atomic_fetch_add_acquire(volatile uint32* value, uint32 operand) { return __atomic_add_fetch(value, operand, __ATOMIC_ACQUIRE); } -inline uint32 atomic_fetch_sub_acquire(volatile uint32* value, uint32 operand) { return __atomic_sub_fetch(value, operand, __ATOMIC_ACQUIRE); } -inline uint64 atomic_fetch_add_acquire(volatile uint64* value, uint64 operand) { return __atomic_add_fetch(value, operand, __ATOMIC_ACQUIRE); } -inline uint64 atomic_fetch_sub_acquire(volatile uint64* value, uint64 operand) { return __atomic_sub_fetch(value, operand, __ATOMIC_ACQUIRE); } -inline void atomic_and_acquire(volatile uint32* value, uint32 mask) { __atomic_fetch_and(value, mask, __ATOMIC_ACQUIRE); } -inline void atomic_and_acquire(volatile int32* value, int32 mask) { __atomic_fetch_and(value, mask, __ATOMIC_ACQUIRE); } -inline void atomic_and_acquire(volatile uint64* value, uint64 mask) { __atomic_fetch_and(value, mask, __ATOMIC_ACQUIRE); } -inline void atomic_and_acquire(volatile int64* value, int64 mask) { __atomic_fetch_and(value, mask, __ATOMIC_ACQUIRE); } -inline void atomic_or_acquire(volatile uint32* value, uint32 mask) { __atomic_fetch_or(value, mask, __ATOMIC_ACQUIRE); } -inline void atomic_or_acquire(volatile int32* value, int32 mask) { __atomic_fetch_or(value, mask, __ATOMIC_ACQUIRE); } -inline void atomic_or_acquire(volatile uint64* value, uint64 mask) { __atomic_fetch_or(value, mask, __ATOMIC_ACQUIRE); } -inline void atomic_or_acquire(volatile int64* value, int64 mask) { __atomic_fetch_or(value, mask, __ATOMIC_ACQUIRE); } +FORCE_INLINE void atomic_set_relaxed(void** target, void* value) { __atomic_store_n(target, value, __ATOMIC_ACQUIRE); } +FORCE_INLINE void* atomic_get_relaxed(void** target) { return __atomic_load_n(target, __ATOMIC_ACQUIRE); } +FORCE_INLINE void atomic_set_relaxed(volatile int8* value, int8 new_value) { __atomic_store_n(value, new_value, __ATOMIC_ACQUIRE); } +FORCE_INLINE void atomic_set_relaxed(volatile int16* value, int16 new_value) { __atomic_store_n(value, new_value, __ATOMIC_ACQUIRE); } +FORCE_INLINE void atomic_set_relaxed(volatile int32* value, int32 new_value) { __atomic_store_n(value, new_value, __ATOMIC_ACQUIRE); } +FORCE_INLINE void atomic_set_relaxed(volatile int64* value, int64 new_value) { __atomic_store_n(value, new_value, __ATOMIC_ACQUIRE); } +FORCE_INLINE int8 atomic_fetch_set_relaxed(volatile int8* value, int8 new_value) { return __atomic_exchange_n(value, new_value, __ATOMIC_ACQUIRE); } +FORCE_INLINE int16 atomic_fetch_set_relaxed(volatile int16* value, int16 new_value) { return __atomic_exchange_n(value, new_value, __ATOMIC_ACQUIRE); } +FORCE_INLINE int32 atomic_fetch_set_relaxed(volatile int32* value, int32 new_value) { return __atomic_exchange_n(value, new_value, __ATOMIC_ACQUIRE); } +FORCE_INLINE int64 atomic_fetch_set_relaxed(volatile int64* value, int64 new_value) { return __atomic_exchange_n(value, new_value, __ATOMIC_ACQUIRE); } +FORCE_INLINE int8 atomic_get_relaxed(volatile int8* value) { return __atomic_load_n((int8 *) value, __ATOMIC_ACQUIRE); } +FORCE_INLINE int16 atomic_get_relaxed(volatile int16* value) { return __atomic_load_n((int16 *) value, __ATOMIC_ACQUIRE); } +FORCE_INLINE int32 atomic_get_relaxed(volatile int32* value) { return __atomic_load_n((int32 *) value, __ATOMIC_ACQUIRE); } +FORCE_INLINE int64 atomic_get_relaxed(volatile int64* value) { return __atomic_load_n((int64 *) value, __ATOMIC_ACQUIRE); } +FORCE_INLINE void atomic_increment_relaxed(volatile int8* value) { __atomic_add_fetch(value, 1, __ATOMIC_ACQUIRE); } +FORCE_INLINE void atomic_decrement_relaxed(volatile int8* value) { __atomic_sub_fetch(value, 1, __ATOMIC_ACQUIRE); } +FORCE_INLINE void atomic_increment_relaxed(volatile int16* value) { __atomic_add_fetch(value, 1, __ATOMIC_ACQUIRE); } +FORCE_INLINE void atomic_decrement_relaxed(volatile int16* value) { __atomic_sub_fetch(value, 1, __ATOMIC_ACQUIRE); } +FORCE_INLINE void atomic_increment_relaxed(volatile int32* value) { __atomic_add_fetch(value, 1, __ATOMIC_ACQUIRE); } +FORCE_INLINE void atomic_decrement_relaxed(volatile int32* value) { __atomic_sub_fetch(value, 1, __ATOMIC_ACQUIRE); } +FORCE_INLINE void atomic_increment_relaxed(volatile int64* value) { __atomic_add_fetch(value, 1, __ATOMIC_ACQUIRE); } +FORCE_INLINE void atomic_decrement_relaxed(volatile int64* value) { __atomic_sub_fetch(value, 1, __ATOMIC_ACQUIRE); } +FORCE_INLINE void atomic_add_relaxed(volatile int8* value, int8 increment) { __atomic_add_fetch(value, increment, __ATOMIC_ACQUIRE); } +FORCE_INLINE void atomic_sub_relaxed(volatile int8* value, int8 decrement) { __atomic_sub_fetch(value, decrement, __ATOMIC_ACQUIRE); } +FORCE_INLINE void atomic_add_relaxed(volatile int16* value, int16 increment) { __atomic_add_fetch(value, increment, __ATOMIC_ACQUIRE); } +FORCE_INLINE void atomic_sub_relaxed(volatile int16* value, int16 decrement) { __atomic_sub_fetch(value, decrement, __ATOMIC_ACQUIRE); } +FORCE_INLINE void atomic_add_relaxed(volatile int32* value, int32 increment) { __atomic_add_fetch(value, increment, __ATOMIC_ACQUIRE); } +FORCE_INLINE void atomic_sub_relaxed(volatile int32* value, int32 decrement) { __atomic_sub_fetch(value, decrement, __ATOMIC_ACQUIRE); } +FORCE_INLINE void atomic_add_relaxed(volatile int64* value, int64 increment) { __atomic_add_fetch(value, increment, __ATOMIC_ACQUIRE); } +FORCE_INLINE void atomic_sub_relaxed(volatile int64* value, int64 decrement) { __atomic_sub_fetch(value, decrement, __ATOMIC_ACQUIRE); } +FORCE_INLINE f32 atomic_compare_exchange_weak_relaxed(volatile f32* value, f32* expected, f32 desired) { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE); return *expected; } +FORCE_INLINE f64 atomic_compare_exchange_weak_relaxed(volatile f64* value, f64* expected, f64 desired) { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE); return *expected; } +FORCE_INLINE int32 atomic_compare_exchange_weak_relaxed(volatile int32* value, int32* expected, int32 desired) { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE); return *expected; } +FORCE_INLINE int64 atomic_compare_exchange_weak_relaxed(volatile int64* value, int64* expected, int64 desired) { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE); return *expected; } +FORCE_INLINE int8 atomic_fetch_add_relaxed(volatile int8* value, int8 operand) { return __atomic_add_fetch(value, operand, __ATOMIC_ACQUIRE); } +FORCE_INLINE int8 atomic_fetch_sub_relaxed(volatile int8* value, int8 operand) { return __atomic_sub_fetch(value, operand, __ATOMIC_ACQUIRE); } +FORCE_INLINE int16 atomic_fetch_add_relaxed(volatile int16* value, int16 operand) { return __atomic_add_fetch(value, operand, __ATOMIC_ACQUIRE); } +FORCE_INLINE int16 atomic_fetch_sub_relaxed(volatile int16* value, int16 operand) { return __atomic_sub_fetch(value, operand, __ATOMIC_ACQUIRE); } +FORCE_INLINE int32 atomic_fetch_add_relaxed(volatile int32* value, int32 operand) { return __atomic_add_fetch(value, operand, __ATOMIC_ACQUIRE); } +FORCE_INLINE int32 atomic_fetch_sub_relaxed(volatile int32* value, int32 operand) { return __atomic_sub_fetch(value, operand, __ATOMIC_ACQUIRE); } +FORCE_INLINE int64 atomic_fetch_add_relaxed(volatile int64* value, int64 operand) { return __atomic_add_fetch(value, operand, __ATOMIC_ACQUIRE); } +FORCE_INLINE int64 atomic_fetch_sub_relaxed(volatile int64* value, int64 operand) { return __atomic_sub_fetch(value, operand, __ATOMIC_ACQUIRE); } +FORCE_INLINE void atomic_set_relaxed(volatile uint8* value, uint8 new_value) { __atomic_store_n(value, new_value, __ATOMIC_ACQUIRE); } +FORCE_INLINE void atomic_set_relaxed(volatile uint16* value, uint16 new_value) { __atomic_store_n(value, new_value, __ATOMIC_ACQUIRE); } +FORCE_INLINE void atomic_set_relaxed(volatile uint32* value, uint32 new_value) { __atomic_store_n(value, new_value, __ATOMIC_ACQUIRE); } +FORCE_INLINE void atomic_set_relaxed(volatile uint64* value, uint64 new_value) { __atomic_store_n(value, new_value, __ATOMIC_ACQUIRE); } +FORCE_INLINE uint8 atomic_fetch_set_relaxed(volatile uint8* value, uint8 new_value) { return __atomic_exchange_n(value, new_value, __ATOMIC_ACQUIRE); } +FORCE_INLINE uint16 atomic_fetch_set_relaxed(volatile uint16* value, uint16 new_value) { return __atomic_exchange_n(value, new_value, __ATOMIC_ACQUIRE); } +FORCE_INLINE uint32 atomic_fetch_set_relaxed(volatile uint32* value, uint32 new_value) { return __atomic_exchange_n(value, new_value, __ATOMIC_ACQUIRE); } +FORCE_INLINE uint64 atomic_fetch_set_relaxed(volatile uint64* value, uint64 new_value) { return __atomic_exchange_n(value, new_value, __ATOMIC_ACQUIRE); } +FORCE_INLINE uint8 atomic_get_relaxed(volatile uint8* value) { return __atomic_load_n(value, __ATOMIC_ACQUIRE); } +FORCE_INLINE uint16 atomic_get_relaxed(volatile uint16* value) { return __atomic_load_n(value, __ATOMIC_ACQUIRE); } +FORCE_INLINE uint32 atomic_get_relaxed(volatile uint32* value) { return __atomic_load_n(value, __ATOMIC_ACQUIRE); } +FORCE_INLINE uint64 atomic_get_relaxed(volatile uint64* value) { return __atomic_load_n(value, __ATOMIC_ACQUIRE); } +FORCE_INLINE void atomic_increment_relaxed(volatile uint8* value) { __atomic_add_fetch(value, 1, __ATOMIC_ACQUIRE); } +FORCE_INLINE void atomic_decrement_relaxed(volatile uint8* value) { __atomic_sub_fetch(value, 1, __ATOMIC_ACQUIRE); } +FORCE_INLINE void atomic_increment_relaxed(volatile uint16* value) { __atomic_add_fetch(value, 1, __ATOMIC_ACQUIRE); } +FORCE_INLINE void atomic_decrement_relaxed(volatile uint16* value) { __atomic_sub_fetch(value, 1, __ATOMIC_ACQUIRE); } +FORCE_INLINE void atomic_increment_relaxed(volatile uint32* value) { __atomic_add_fetch(value, 1, __ATOMIC_ACQUIRE); } +FORCE_INLINE void atomic_decrement_relaxed(volatile uint32* value) { __atomic_sub_fetch(value, 1, __ATOMIC_ACQUIRE); } +FORCE_INLINE void atomic_increment_relaxed(volatile uint64* value) { __atomic_add_fetch(value, 1, __ATOMIC_ACQUIRE); } +FORCE_INLINE void atomic_decrement_relaxed(volatile uint64* value) { __atomic_sub_fetch(value, 1, __ATOMIC_ACQUIRE); } +FORCE_INLINE void atomic_add_relaxed(volatile uint8* value, uint8 increment) { __atomic_add_fetch(value, increment, __ATOMIC_ACQUIRE); } +FORCE_INLINE void atomic_sub_relaxed(volatile uint8* value, uint8 decrement) { __atomic_sub_fetch(value, decrement, __ATOMIC_ACQUIRE); } +FORCE_INLINE void atomic_add_relaxed(volatile uint16* value, uint16 increment) { __atomic_add_fetch(value, increment, __ATOMIC_ACQUIRE); } +FORCE_INLINE void atomic_sub_relaxed(volatile uint16* value, uint16 decrement) { __atomic_sub_fetch(value, decrement, __ATOMIC_ACQUIRE); } +FORCE_INLINE void atomic_add_relaxed(volatile uint32* value, uint32 increment) { __atomic_add_fetch(value, increment, __ATOMIC_ACQUIRE); } +FORCE_INLINE void atomic_sub_relaxed(volatile uint32* value, uint32 decrement) { __atomic_sub_fetch(value, decrement, __ATOMIC_ACQUIRE); } +FORCE_INLINE void atomic_add_relaxed(volatile uint64* value, uint64 increment) { __atomic_add_fetch(value, increment, __ATOMIC_ACQUIRE); } +FORCE_INLINE void atomic_sub_relaxed(volatile uint64* value, uint64 decrement) { __atomic_sub_fetch(value, decrement, __ATOMIC_ACQUIRE); } +FORCE_INLINE uint32 atomic_compare_exchange_weak_relaxed(volatile uint32* value, uint32* expected, uint32 desired) { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE); return *expected; } +FORCE_INLINE uint64 atomic_compare_exchange_weak_relaxed(volatile uint64* value, uint64* expected, uint64 desired) { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE); return *expected; } +FORCE_INLINE uint8 atomic_fetch_add_relaxed(volatile uint8* value, uint8 operand) { return __atomic_add_fetch(value, operand, __ATOMIC_ACQUIRE); } +FORCE_INLINE uint8 atomic_fetch_sub_relaxed(volatile uint8* value, uint8 operand) { return __atomic_sub_fetch(value, operand, __ATOMIC_ACQUIRE); } +FORCE_INLINE uint16 atomic_fetch_add_relaxed(volatile uint16* value, uint16 operand) { return __atomic_add_fetch(value, operand, __ATOMIC_ACQUIRE); } +FORCE_INLINE uint16 atomic_fetch_sub_relaxed(volatile uint16* value, uint16 operand) { return __atomic_sub_fetch(value, operand, __ATOMIC_ACQUIRE); } +FORCE_INLINE uint32 atomic_fetch_add_relaxed(volatile uint32* value, uint32 operand) { return __atomic_add_fetch(value, operand, __ATOMIC_ACQUIRE); } +FORCE_INLINE uint32 atomic_fetch_sub_relaxed(volatile uint32* value, uint32 operand) { return __atomic_sub_fetch(value, operand, __ATOMIC_ACQUIRE); } +FORCE_INLINE uint64 atomic_fetch_add_relaxed(volatile uint64* value, uint64 operand) { return __atomic_add_fetch(value, operand, __ATOMIC_ACQUIRE); } +FORCE_INLINE uint64 atomic_fetch_sub_relaxed(volatile uint64* value, uint64 operand) { return __atomic_sub_fetch(value, operand, __ATOMIC_ACQUIRE); } +FORCE_INLINE void atomic_and_relaxed(volatile uint8* value, uint8 mask) { __atomic_fetch_and(value, mask, __ATOMIC_ACQUIRE); } +FORCE_INLINE void atomic_and_relaxed(volatile int8* value, int8 mask) { __atomic_fetch_and(value, mask, __ATOMIC_ACQUIRE); } +FORCE_INLINE void atomic_and_relaxed(volatile uint16* value, uint16 mask) { __atomic_fetch_and(value, mask, __ATOMIC_ACQUIRE); } +FORCE_INLINE void atomic_and_relaxed(volatile int16* value, int16 mask) { __atomic_fetch_and(value, mask, __ATOMIC_ACQUIRE); } +FORCE_INLINE void atomic_and_relaxed(volatile uint32* value, uint32 mask) { __atomic_fetch_and(value, mask, __ATOMIC_ACQUIRE); } +FORCE_INLINE void atomic_and_relaxed(volatile int32* value, int32 mask) { __atomic_fetch_and(value, mask, __ATOMIC_ACQUIRE); } +FORCE_INLINE void atomic_and_relaxed(volatile uint64* value, uint64 mask) { __atomic_fetch_and(value, mask, __ATOMIC_ACQUIRE); } +FORCE_INLINE void atomic_and_relaxed(volatile int64* value, int64 mask) { __atomic_fetch_and(value, mask, __ATOMIC_ACQUIRE); } +FORCE_INLINE void atomic_or_relaxed(volatile uint8* value, uint8 mask) { __atomic_fetch_or(value, mask, __ATOMIC_ACQUIRE); } +FORCE_INLINE void atomic_or_relaxed(volatile int8* value, int8 mask) { __atomic_fetch_or(value, mask, __ATOMIC_ACQUIRE); } +FORCE_INLINE void atomic_or_relaxed(volatile uint16* value, uint16 mask) { __atomic_fetch_or(value, mask, __ATOMIC_ACQUIRE); } +FORCE_INLINE void atomic_or_relaxed(volatile int16* value, int16 mask) { __atomic_fetch_or(value, mask, __ATOMIC_ACQUIRE); } +FORCE_INLINE void atomic_or_relaxed(volatile uint32* value, uint32 mask) { __atomic_fetch_or(value, mask, __ATOMIC_ACQUIRE); } +FORCE_INLINE void atomic_or_relaxed(volatile int32* value, int32 mask) { __atomic_fetch_or(value, mask, __ATOMIC_ACQUIRE); } +FORCE_INLINE void atomic_or_relaxed(volatile uint64* value, uint64 mask) { __atomic_fetch_or(value, mask, __ATOMIC_ACQUIRE); } +FORCE_INLINE void atomic_or_relaxed(volatile int64* value, int64 mask) { __atomic_fetch_or(value, mask, __ATOMIC_ACQUIRE); } -inline void atomic_set_release(void** target, void* value) { __atomic_store_n(target, value, __ATOMIC_RELEASE); } -inline void* atomic_get_release(void** target) { return __atomic_load_n(target, __ATOMIC_RELEASE); } -inline void atomic_set_release(volatile int32* value, int32 new_value) { __atomic_store_n(value, new_value, __ATOMIC_RELEASE); } -inline void atomic_set_release(volatile int64* value, int64 new_value) { __atomic_store_n(value, new_value, __ATOMIC_RELEASE); } -inline int32 atomic_fetch_set_release(volatile int32* value, int32 new_value) { return __atomic_exchange_n(value, new_value, __ATOMIC_RELEASE); } -inline int64 atomic_fetch_set_release(volatile int64* value, int64 new_value) { return __atomic_exchange_n(value, new_value, __ATOMIC_RELEASE); } -inline int32 atomic_get_release(volatile int32* value) { return __atomic_load_n((int32 *) value, __ATOMIC_RELEASE); } -inline int64 atomic_get_release(volatile int64* value) { return __atomic_load_n((int64 *) value, __ATOMIC_RELEASE); } -inline void atomic_increment_release(volatile int32* value) { __atomic_add_fetch(value, 1, __ATOMIC_RELEASE); } -inline void atomic_decrement_release(volatile int32* value) { __atomic_sub_fetch(value, 1, __ATOMIC_RELEASE); } -inline void atomic_increment_release(volatile int64* value) { __atomic_add_fetch(value, 1, __ATOMIC_RELEASE); } -inline void atomic_decrement_release(volatile int64* value) { __atomic_sub_fetch(value, 1, __ATOMIC_RELEASE); } -inline void atomic_add_release(volatile int32* value, int32 increment) { __atomic_add_fetch(value, increment, __ATOMIC_RELEASE); } -inline void atomic_sub_release(volatile int32* value, int32 decrement) { __atomic_sub_fetch(value, decrement, __ATOMIC_RELEASE); } -inline void atomic_add_release(volatile int64* value, int64 increment) { __atomic_add_fetch(value, increment, __ATOMIC_RELEASE); } -inline void atomic_sub_release(volatile int64* value, int64 decrement) { __atomic_sub_fetch(value, decrement, __ATOMIC_RELEASE); } -inline int32 atomic_compare_exchange_weak_release(volatile int32* value, int32* expected, int32 desired) { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_RELEASE, __ATOMIC_RELEASE); return *expected; } -inline int32 atomic_fetch_add_release(volatile int32* value, int32 operand) { return __atomic_add_fetch(value, operand, __ATOMIC_RELEASE); } -inline int32 atomic_fetch_sub_release(volatile int32* value, int32 operand) { return __atomic_sub_fetch(value, operand, __ATOMIC_RELEASE); } -inline int64 atomic_fetch_add_release(volatile int64* value, int64 operand) { return __atomic_add_fetch(value, operand, __ATOMIC_RELEASE); } -inline int64 atomic_fetch_sub_release(volatile int64* value, int64 operand) { return __atomic_sub_fetch(value, operand, __ATOMIC_RELEASE); } -inline void atomic_set_release(volatile uint32* value, uint32 new_value) { __atomic_store_n(value, new_value, __ATOMIC_RELEASE); } -inline void atomic_set_release(volatile uint64* value, uint64 new_value) { __atomic_store_n(value, new_value, __ATOMIC_RELEASE); } -inline uint32 atomic_fetch_set_release(volatile uint32* value, uint32 new_value) { return __atomic_exchange_n(value, new_value, __ATOMIC_RELEASE); } -inline uint64 atomic_fetch_set_release(volatile uint64* value, uint64 new_value) { return __atomic_exchange_n(value, new_value, __ATOMIC_RELEASE); } -inline uint32 atomic_get_release(volatile uint32* value) { return __atomic_load_n((uint32 *) value, __ATOMIC_RELEASE); } -inline uint64 atomic_get_release(volatile uint64* value) { return __atomic_load_n((uint64 *) value, __ATOMIC_RELEASE); } -inline void atomic_increment_release(volatile uint32* value) { __atomic_add_fetch(value, 1, __ATOMIC_RELEASE); } -inline void atomic_decrement_release(volatile uint32* value) { __atomic_sub_fetch(value, 1, __ATOMIC_RELEASE); } -inline void atomic_increment_release(volatile uint64* value) { __atomic_add_fetch(value, 1, __ATOMIC_RELEASE); } -inline void atomic_decrement_release(volatile uint64* value) { __atomic_sub_fetch(value, 1, __ATOMIC_RELEASE); } -inline void atomic_add_release(volatile uint32* value, uint32 increment) { __atomic_add_fetch(value, increment, __ATOMIC_RELEASE); } -inline void atomic_sub_release(volatile uint32* value, uint32 decrement) { __atomic_sub_fetch(value, decrement, __ATOMIC_RELEASE); } -inline uint32 atomic_compare_exchange_weak_release(volatile uint32* value, uint32* expected, uint32 desired) { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_RELEASE, __ATOMIC_RELEASE); return *expected; } -inline uint32 atomic_fetch_add_release(volatile uint32* value, uint32 operand) { return __atomic_add_fetch(value, operand, __ATOMIC_RELEASE); } -inline uint32 atomic_fetch_sub_release(volatile uint32* value, uint32 operand) { return __atomic_sub_fetch(value, operand, __ATOMIC_RELEASE); } -inline uint64 atomic_fetch_add_release(volatile uint64* value, uint64 operand) { return __atomic_add_fetch(value, operand, __ATOMIC_RELEASE); } -inline uint64 atomic_fetch_sub_release(volatile uint64* value, uint64 operand) { return __atomic_sub_fetch(value, operand, __ATOMIC_RELEASE); } -inline void atomic_and_release(volatile uint32* value, uint32 mask) { __atomic_fetch_and(value, mask, __ATOMIC_RELEASE); } -inline void atomic_and_release(volatile int32* value, int32 mask) { __atomic_fetch_and(value, mask, __ATOMIC_RELEASE); } -inline void atomic_and_release(volatile uint64* value, uint64 mask) { __atomic_fetch_and(value, mask, __ATOMIC_RELEASE); } -inline void atomic_and_release(volatile int64* value, int64 mask) { __atomic_fetch_and(value, mask, __ATOMIC_RELEASE); } -inline void atomic_or_release(volatile uint32* value, uint32 mask) { __atomic_fetch_or(value, mask, __ATOMIC_RELEASE); } -inline void atomic_or_release(volatile int32* value, int32 mask) { __atomic_fetch_or(value, mask, __ATOMIC_RELEASE); } -inline void atomic_or_release(volatile uint64* value, uint64 mask) { __atomic_fetch_or(value, mask, __ATOMIC_RELEASE); } -inline void atomic_or_release(volatile int64* value, int64 mask) { __atomic_fetch_or(value, mask, __ATOMIC_RELEASE); } +FORCE_INLINE void atomic_set_relaxed(void** target, void* value) { __atomic_store_n(target, value, __ATOMIC_RELEASE); } +FORCE_INLINE void* atomic_get_relaxed(void** target) { return __atomic_load_n(target, __ATOMIC_RELEASE); } +FORCE_INLINE void atomic_set_relaxed(volatile int8* value, int8 new_value) { __atomic_store_n(value, new_value, __ATOMIC_RELEASE); } +FORCE_INLINE void atomic_set_relaxed(volatile int16* value, int16 new_value) { __atomic_store_n(value, new_value, __ATOMIC_RELEASE); } +FORCE_INLINE void atomic_set_relaxed(volatile int32* value, int32 new_value) { __atomic_store_n(value, new_value, __ATOMIC_RELEASE); } +FORCE_INLINE void atomic_set_relaxed(volatile int64* value, int64 new_value) { __atomic_store_n(value, new_value, __ATOMIC_RELEASE); } +FORCE_INLINE int8 atomic_fetch_set_relaxed(volatile int8* value, int8 new_value) { return __atomic_exchange_n(value, new_value, __ATOMIC_RELEASE); } +FORCE_INLINE int16 atomic_fetch_set_relaxed(volatile int16* value, int16 new_value) { return __atomic_exchange_n(value, new_value, __ATOMIC_RELEASE); } +FORCE_INLINE int32 atomic_fetch_set_relaxed(volatile int32* value, int32 new_value) { return __atomic_exchange_n(value, new_value, __ATOMIC_RELEASE); } +FORCE_INLINE int64 atomic_fetch_set_relaxed(volatile int64* value, int64 new_value) { return __atomic_exchange_n(value, new_value, __ATOMIC_RELEASE); } +FORCE_INLINE int8 atomic_get_relaxed(volatile int8* value) { return __atomic_load_n((int8 *) value, __ATOMIC_RELEASE); } +FORCE_INLINE int16 atomic_get_relaxed(volatile int16* value) { return __atomic_load_n((int16 *) value, __ATOMIC_RELEASE); } +FORCE_INLINE int32 atomic_get_relaxed(volatile int32* value) { return __atomic_load_n((int32 *) value, __ATOMIC_RELEASE); } +FORCE_INLINE int64 atomic_get_relaxed(volatile int64* value) { return __atomic_load_n((int64 *) value, __ATOMIC_RELEASE); } +FORCE_INLINE void atomic_increment_relaxed(volatile int8* value) { __atomic_add_fetch(value, 1, __ATOMIC_RELEASE); } +FORCE_INLINE void atomic_decrement_relaxed(volatile int8* value) { __atomic_sub_fetch(value, 1, __ATOMIC_RELEASE); } +FORCE_INLINE void atomic_increment_relaxed(volatile int16* value) { __atomic_add_fetch(value, 1, __ATOMIC_RELEASE); } +FORCE_INLINE void atomic_decrement_relaxed(volatile int16* value) { __atomic_sub_fetch(value, 1, __ATOMIC_RELEASE); } +FORCE_INLINE void atomic_increment_relaxed(volatile int32* value) { __atomic_add_fetch(value, 1, __ATOMIC_RELEASE); } +FORCE_INLINE void atomic_decrement_relaxed(volatile int32* value) { __atomic_sub_fetch(value, 1, __ATOMIC_RELEASE); } +FORCE_INLINE void atomic_increment_relaxed(volatile int64* value) { __atomic_add_fetch(value, 1, __ATOMIC_RELEASE); } +FORCE_INLINE void atomic_decrement_relaxed(volatile int64* value) { __atomic_sub_fetch(value, 1, __ATOMIC_RELEASE); } +FORCE_INLINE void atomic_add_relaxed(volatile int8* value, int8 increment) { __atomic_add_fetch(value, increment, __ATOMIC_RELEASE); } +FORCE_INLINE void atomic_sub_relaxed(volatile int8* value, int8 decrement) { __atomic_sub_fetch(value, decrement, __ATOMIC_RELEASE); } +FORCE_INLINE void atomic_add_relaxed(volatile int16* value, int16 increment) { __atomic_add_fetch(value, increment, __ATOMIC_RELEASE); } +FORCE_INLINE void atomic_sub_relaxed(volatile int16* value, int16 decrement) { __atomic_sub_fetch(value, decrement, __ATOMIC_RELEASE); } +FORCE_INLINE void atomic_add_relaxed(volatile int32* value, int32 increment) { __atomic_add_fetch(value, increment, __ATOMIC_RELEASE); } +FORCE_INLINE void atomic_sub_relaxed(volatile int32* value, int32 decrement) { __atomic_sub_fetch(value, decrement, __ATOMIC_RELEASE); } +FORCE_INLINE void atomic_add_relaxed(volatile int64* value, int64 increment) { __atomic_add_fetch(value, increment, __ATOMIC_RELEASE); } +FORCE_INLINE void atomic_sub_relaxed(volatile int64* value, int64 decrement) { __atomic_sub_fetch(value, decrement, __ATOMIC_RELEASE); } +FORCE_INLINE f32 atomic_compare_exchange_weak_relaxed(volatile f32* value, f32* expected, f32 desired) { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_RELEASE, __ATOMIC_RELEASE); return *expected; } +FORCE_INLINE f64 atomic_compare_exchange_weak_relaxed(volatile f64* value, f64* expected, f64 desired) { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_RELEASE, __ATOMIC_RELEASE); return *expected; } +FORCE_INLINE int32 atomic_compare_exchange_weak_relaxed(volatile int32* value, int32* expected, int32 desired) { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_RELEASE, __ATOMIC_RELEASE); return *expected; } +FORCE_INLINE int64 atomic_compare_exchange_weak_relaxed(volatile int64* value, int64* expected, int64 desired) { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_RELEASE, __ATOMIC_RELEASE); return *expected; } +FORCE_INLINE int8 atomic_fetch_add_relaxed(volatile int8* value, int8 operand) { return __atomic_add_fetch(value, operand, __ATOMIC_RELEASE); } +FORCE_INLINE int8 atomic_fetch_sub_relaxed(volatile int8* value, int8 operand) { return __atomic_sub_fetch(value, operand, __ATOMIC_RELEASE); } +FORCE_INLINE int16 atomic_fetch_add_relaxed(volatile int16* value, int16 operand) { return __atomic_add_fetch(value, operand, __ATOMIC_RELEASE); } +FORCE_INLINE int16 atomic_fetch_sub_relaxed(volatile int16* value, int16 operand) { return __atomic_sub_fetch(value, operand, __ATOMIC_RELEASE); } +FORCE_INLINE int32 atomic_fetch_add_relaxed(volatile int32* value, int32 operand) { return __atomic_add_fetch(value, operand, __ATOMIC_RELEASE); } +FORCE_INLINE int32 atomic_fetch_sub_relaxed(volatile int32* value, int32 operand) { return __atomic_sub_fetch(value, operand, __ATOMIC_RELEASE); } +FORCE_INLINE int64 atomic_fetch_add_relaxed(volatile int64* value, int64 operand) { return __atomic_add_fetch(value, operand, __ATOMIC_RELEASE); } +FORCE_INLINE int64 atomic_fetch_sub_relaxed(volatile int64* value, int64 operand) { return __atomic_sub_fetch(value, operand, __ATOMIC_RELEASE); } +FORCE_INLINE void atomic_set_relaxed(volatile uint8* value, uint8 new_value) { __atomic_store_n(value, new_value, __ATOMIC_RELEASE); } +FORCE_INLINE void atomic_set_relaxed(volatile uint16* value, uint16 new_value) { __atomic_store_n(value, new_value, __ATOMIC_RELEASE); } +FORCE_INLINE void atomic_set_relaxed(volatile uint32* value, uint32 new_value) { __atomic_store_n(value, new_value, __ATOMIC_RELEASE); } +FORCE_INLINE void atomic_set_relaxed(volatile uint64* value, uint64 new_value) { __atomic_store_n(value, new_value, __ATOMIC_RELEASE); } +FORCE_INLINE uint8 atomic_fetch_set_relaxed(volatile uint8* value, uint8 new_value) { return __atomic_exchange_n(value, new_value, __ATOMIC_RELEASE); } +FORCE_INLINE uint16 atomic_fetch_set_relaxed(volatile uint16* value, uint16 new_value) { return __atomic_exchange_n(value, new_value, __ATOMIC_RELEASE); } +FORCE_INLINE uint32 atomic_fetch_set_relaxed(volatile uint32* value, uint32 new_value) { return __atomic_exchange_n(value, new_value, __ATOMIC_RELEASE); } +FORCE_INLINE uint64 atomic_fetch_set_relaxed(volatile uint64* value, uint64 new_value) { return __atomic_exchange_n(value, new_value, __ATOMIC_RELEASE); } +FORCE_INLINE uint8 atomic_get_relaxed(volatile uint8* value) { return __atomic_load_n(value, __ATOMIC_RELEASE); } +FORCE_INLINE uint16 atomic_get_relaxed(volatile uint16* value) { return __atomic_load_n(value, __ATOMIC_RELEASE); } +FORCE_INLINE uint32 atomic_get_relaxed(volatile uint32* value) { return __atomic_load_n(value, __ATOMIC_RELEASE); } +FORCE_INLINE uint64 atomic_get_relaxed(volatile uint64* value) { return __atomic_load_n(value, __ATOMIC_RELEASE); } +FORCE_INLINE void atomic_increment_relaxed(volatile uint8* value) { __atomic_add_fetch(value, 1, __ATOMIC_RELEASE); } +FORCE_INLINE void atomic_decrement_relaxed(volatile uint8* value) { __atomic_sub_fetch(value, 1, __ATOMIC_RELEASE); } +FORCE_INLINE void atomic_increment_relaxed(volatile uint16* value) { __atomic_add_fetch(value, 1, __ATOMIC_RELEASE); } +FORCE_INLINE void atomic_decrement_relaxed(volatile uint16* value) { __atomic_sub_fetch(value, 1, __ATOMIC_RELEASE); } +FORCE_INLINE void atomic_increment_relaxed(volatile uint32* value) { __atomic_add_fetch(value, 1, __ATOMIC_RELEASE); } +FORCE_INLINE void atomic_decrement_relaxed(volatile uint32* value) { __atomic_sub_fetch(value, 1, __ATOMIC_RELEASE); } +FORCE_INLINE void atomic_increment_relaxed(volatile uint64* value) { __atomic_add_fetch(value, 1, __ATOMIC_RELEASE); } +FORCE_INLINE void atomic_decrement_relaxed(volatile uint64* value) { __atomic_sub_fetch(value, 1, __ATOMIC_RELEASE); } +FORCE_INLINE void atomic_add_relaxed(volatile uint8* value, uint8 increment) { __atomic_add_fetch(value, increment, __ATOMIC_RELEASE); } +FORCE_INLINE void atomic_sub_relaxed(volatile uint8* value, uint8 decrement) { __atomic_sub_fetch(value, decrement, __ATOMIC_RELEASE); } +FORCE_INLINE void atomic_add_relaxed(volatile uint16* value, uint16 increment) { __atomic_add_fetch(value, increment, __ATOMIC_RELEASE); } +FORCE_INLINE void atomic_sub_relaxed(volatile uint16* value, uint16 decrement) { __atomic_sub_fetch(value, decrement, __ATOMIC_RELEASE); } +FORCE_INLINE void atomic_add_relaxed(volatile uint32* value, uint32 increment) { __atomic_add_fetch(value, increment, __ATOMIC_RELEASE); } +FORCE_INLINE void atomic_sub_relaxed(volatile uint32* value, uint32 decrement) { __atomic_sub_fetch(value, decrement, __ATOMIC_RELEASE); } +FORCE_INLINE void atomic_add_relaxed(volatile uint64* value, uint64 increment) { __atomic_add_fetch(value, increment, __ATOMIC_RELEASE); } +FORCE_INLINE void atomic_sub_relaxed(volatile uint64* value, uint64 decrement) { __atomic_sub_fetch(value, decrement, __ATOMIC_RELEASE); } +FORCE_INLINE uint32 atomic_compare_exchange_weak_relaxed(volatile uint32* value, uint32* expected, uint32 desired) { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_RELEASE, __ATOMIC_RELEASE); return *expected; } +FORCE_INLINE uint64 atomic_compare_exchange_weak_relaxed(volatile uint64* value, uint64* expected, uint64 desired) { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_RELEASE, __ATOMIC_RELEASE); return *expected; } +FORCE_INLINE uint8 atomic_fetch_add_relaxed(volatile uint8* value, uint8 operand) { return __atomic_add_fetch(value, operand, __ATOMIC_RELEASE); } +FORCE_INLINE uint8 atomic_fetch_sub_relaxed(volatile uint8* value, uint8 operand) { return __atomic_sub_fetch(value, operand, __ATOMIC_RELEASE); } +FORCE_INLINE uint16 atomic_fetch_add_relaxed(volatile uint16* value, uint16 operand) { return __atomic_add_fetch(value, operand, __ATOMIC_RELEASE); } +FORCE_INLINE uint16 atomic_fetch_sub_relaxed(volatile uint16* value, uint16 operand) { return __atomic_sub_fetch(value, operand, __ATOMIC_RELEASE); } +FORCE_INLINE uint32 atomic_fetch_add_relaxed(volatile uint32* value, uint32 operand) { return __atomic_add_fetch(value, operand, __ATOMIC_RELEASE); } +FORCE_INLINE uint32 atomic_fetch_sub_relaxed(volatile uint32* value, uint32 operand) { return __atomic_sub_fetch(value, operand, __ATOMIC_RELEASE); } +FORCE_INLINE uint64 atomic_fetch_add_relaxed(volatile uint64* value, uint64 operand) { return __atomic_add_fetch(value, operand, __ATOMIC_RELEASE); } +FORCE_INLINE uint64 atomic_fetch_sub_relaxed(volatile uint64* value, uint64 operand) { return __atomic_sub_fetch(value, operand, __ATOMIC_RELEASE); } +FORCE_INLINE void atomic_and_relaxed(volatile uint8* value, uint8 mask) { __atomic_fetch_and(value, mask, __ATOMIC_RELEASE); } +FORCE_INLINE void atomic_and_relaxed(volatile int8* value, int8 mask) { __atomic_fetch_and(value, mask, __ATOMIC_RELEASE); } +FORCE_INLINE void atomic_and_relaxed(volatile uint16* value, uint16 mask) { __atomic_fetch_and(value, mask, __ATOMIC_RELEASE); } +FORCE_INLINE void atomic_and_relaxed(volatile int16* value, int16 mask) { __atomic_fetch_and(value, mask, __ATOMIC_RELEASE); } +FORCE_INLINE void atomic_and_relaxed(volatile uint32* value, uint32 mask) { __atomic_fetch_and(value, mask, __ATOMIC_RELEASE); } +FORCE_INLINE void atomic_and_relaxed(volatile int32* value, int32 mask) { __atomic_fetch_and(value, mask, __ATOMIC_RELEASE); } +FORCE_INLINE void atomic_and_relaxed(volatile uint64* value, uint64 mask) { __atomic_fetch_and(value, mask, __ATOMIC_RELEASE); } +FORCE_INLINE void atomic_and_relaxed(volatile int64* value, int64 mask) { __atomic_fetch_and(value, mask, __ATOMIC_RELEASE); } +FORCE_INLINE void atomic_or_relaxed(volatile uint8* value, uint8 mask) { __atomic_fetch_or(value, mask, __ATOMIC_RELEASE); } +FORCE_INLINE void atomic_or_relaxed(volatile int8* value, int8 mask) { __atomic_fetch_or(value, mask, __ATOMIC_RELEASE); } +FORCE_INLINE void atomic_or_relaxed(volatile uint16* value, uint16 mask) { __atomic_fetch_or(value, mask, __ATOMIC_RELEASE); } +FORCE_INLINE void atomic_or_relaxed(volatile int16* value, int16 mask) { __atomic_fetch_or(value, mask, __ATOMIC_RELEASE); } +FORCE_INLINE void atomic_or_relaxed(volatile uint32* value, uint32 mask) { __atomic_fetch_or(value, mask, __ATOMIC_RELEASE); } +FORCE_INLINE void atomic_or_relaxed(volatile int32* value, int32 mask) { __atomic_fetch_or(value, mask, __ATOMIC_RELEASE); } +FORCE_INLINE void atomic_or_relaxed(volatile uint64* value, uint64 mask) { __atomic_fetch_or(value, mask, __ATOMIC_RELEASE); } +FORCE_INLINE void atomic_or_relaxed(volatile int64* value, int64 mask) { __atomic_fetch_or(value, mask, __ATOMIC_RELEASE); } -inline void atomic_set_acquire_release(void** target, void* value) { __atomic_store_n(target, value, __ATOMIC_SEQ_CST); } -inline void* atomic_get_acquire_release(void** target) { return __atomic_load_n(target, __ATOMIC_SEQ_CST); } -inline void atomic_set_acquire_release(volatile int32* value, int32 new_value) { __atomic_store_n(value, new_value, __ATOMIC_SEQ_CST); } -inline void atomic_set_acquire_release(volatile int64* value, int64 new_value) { __atomic_store_n(value, new_value, __ATOMIC_SEQ_CST); } -inline int32 atomic_fetch_set_acquire_release(volatile int32* value, int32 new_value) { return __atomic_exchange_n(value, new_value, __ATOMIC_SEQ_CST); } -inline int64 atomic_fetch_set_acquire_release(volatile int64* value, int64 new_value) { return __atomic_exchange_n(value, new_value, __ATOMIC_SEQ_CST); } -inline int32 atomic_get_acquire_release(volatile int32* value) { return __atomic_load_n((int32 *) value, __ATOMIC_SEQ_CST); } -inline int64 atomic_get_acquire_release(volatile int64* value) { return __atomic_load_n((int64 *) value, __ATOMIC_SEQ_CST); } -inline void atomic_increment_acquire_release(volatile int32* value) { __atomic_add_fetch(value, 1, __ATOMIC_SEQ_CST); } -inline void atomic_decrement_acquire_release(volatile int32* value) { __atomic_sub_fetch(value, 1, __ATOMIC_SEQ_CST); } -inline void atomic_increment_acquire_release(volatile int64* value) { __atomic_add_fetch(value, 1, __ATOMIC_SEQ_CST); } -inline void atomic_decrement_acquire_release(volatile int64* value) { __atomic_sub_fetch(value, 1, __ATOMIC_SEQ_CST); } -inline void atomic_add_acquire_release(volatile int32* value, int32 increment) { __atomic_add_fetch(value, increment, __ATOMIC_SEQ_CST); } -inline void atomic_sub_acquire_release(volatile int32* value, int32 decrement) { __atomic_sub_fetch(value, decrement, __ATOMIC_SEQ_CST); } -inline void atomic_add_acquire_release(volatile int64* value, int64 increment) { __atomic_add_fetch(value, increment, __ATOMIC_SEQ_CST); } -inline void atomic_sub_acquire_release(volatile int64* value, int64 decrement) { __atomic_sub_fetch(value, decrement, __ATOMIC_SEQ_CST); } -inline int32 atomic_compare_exchange_weak_acquire_release(volatile int32* value, int32* expected, int32 desired) { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); return *expected; } -inline int32 atomic_fetch_add_acquire_release(volatile int32* value, int32 operand) { return __atomic_add_fetch(value, operand, __ATOMIC_SEQ_CST); } -inline int32 atomic_fetch_sub_acquire_release(volatile int32* value, int32 operand) { return __atomic_sub_fetch(value, operand, __ATOMIC_SEQ_CST); } -inline int64 atomic_fetch_add_acquire_release(volatile int64* value, int64 operand) { return __atomic_add_fetch(value, operand, __ATOMIC_SEQ_CST); } -inline int64 atomic_fetch_sub_acquire_release(volatile int64* value, int64 operand) { return __atomic_sub_fetch(value, operand, __ATOMIC_SEQ_CST); } -inline void atomic_set_acquire_release(volatile uint32* value, uint32 new_value) { __atomic_store_n(value, new_value, __ATOMIC_SEQ_CST); } -inline void atomic_set_acquire_release(volatile uint64* value, uint64 new_value) { __atomic_store_n(value, new_value, __ATOMIC_SEQ_CST); } -inline uint32 atomic_fetch_set_acquire_release(volatile uint32* value, uint32 new_value) { return __atomic_exchange_n(value, new_value, __ATOMIC_SEQ_CST); } -inline uint64 atomic_fetch_set_acquire_release(volatile uint64* value, uint64 new_value) { return __atomic_exchange_n(value, new_value, __ATOMIC_SEQ_CST); } -inline uint32 atomic_get_acquire_release(volatile uint32* value) { return __atomic_load_n((uint32 *) value, __ATOMIC_SEQ_CST); } -inline uint64 atomic_get_acquire_release(volatile uint64* value) { return __atomic_load_n((uint64 *) value, __ATOMIC_SEQ_CST); } -inline void atomic_increment_acquire_release(volatile uint32* value) { __atomic_add_fetch(value, 1, __ATOMIC_SEQ_CST); } -inline void atomic_decrement_acquire_release(volatile uint32* value) { __atomic_sub_fetch(value, 1, __ATOMIC_SEQ_CST); } -inline void atomic_increment_acquire_release(volatile uint64* value) { __atomic_add_fetch(value, 1, __ATOMIC_SEQ_CST); } -inline void atomic_decrement_acquire_release(volatile uint64* value) { __atomic_sub_fetch(value, 1, __ATOMIC_SEQ_CST); } -inline void atomic_add_acquire_release(volatile uint32* value, uint32 increment) { __atomic_add_fetch(value, increment, __ATOMIC_SEQ_CST); } -inline void atomic_sub_acquire_release(volatile uint32* value, uint32 decrement) { __atomic_sub_fetch(value, decrement, __ATOMIC_SEQ_CST); } -inline uint32 atomic_compare_exchange_weak_acquire_release(volatile uint32* value, uint32* expected, uint32 desired) { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); return *expected; } -inline uint32 atomic_fetch_add_acquire_release(volatile uint32* value, uint32 operand) { return __atomic_add_fetch(value, operand, __ATOMIC_SEQ_CST); } -inline uint32 atomic_fetch_sub_acquire_release(volatile uint32* value, uint32 operand) { return __atomic_sub_fetch(value, operand, __ATOMIC_SEQ_CST); } -inline uint64 atomic_fetch_add_acquire_release(volatile uint64* value, uint64 operand) { return __atomic_add_fetch(value, operand, __ATOMIC_SEQ_CST); } -inline uint64 atomic_fetch_sub_acquire_release(volatile uint64* value, uint64 operand) { return __atomic_sub_fetch(value, operand, __ATOMIC_SEQ_CST); } -inline void atomic_and_acquire_release(volatile uint32* value, uint32 mask) { __atomic_fetch_and(value, mask, __ATOMIC_SEQ_CST); } -inline void atomic_and_acquire_release(volatile int32* value, int32 mask) { __atomic_fetch_and(value, mask, __ATOMIC_SEQ_CST); } -inline void atomic_and_acquire_release(volatile uint64* value, uint64 mask) { __atomic_fetch_and(value, mask, __ATOMIC_SEQ_CST); } -inline void atomic_and_acquire_release(volatile int64* value, int64 mask) { __atomic_fetch_and(value, mask, __ATOMIC_SEQ_CST); } -inline void atomic_or_acquire_release(volatile uint32* value, uint32 mask) { __atomic_fetch_or(value, mask, __ATOMIC_SEQ_CST); } -inline void atomic_or_acquire_release(volatile int32* value, int32 mask) { __atomic_fetch_or(value, mask, __ATOMIC_SEQ_CST); } -inline void atomic_or_acquire_release(volatile uint64* value, uint64 mask) { __atomic_fetch_or(value, mask, __ATOMIC_SEQ_CST); } -inline void atomic_or_acquire_release(volatile int64* value, int64 mask) { __atomic_fetch_or(value, mask, __ATOMIC_SEQ_CST); } +FORCE_INLINE void atomic_set_relaxed(void** target, void* value) { __atomic_store_n(target, value, __ATOMIC_SEQ_CST); } +FORCE_INLINE void* atomic_get_relaxed(void** target) { return __atomic_load_n(target, __ATOMIC_SEQ_CST); } +FORCE_INLINE void atomic_set_relaxed(volatile int8* value, int8 new_value) { __atomic_store_n(value, new_value, __ATOMIC_SEQ_CST); } +FORCE_INLINE void atomic_set_relaxed(volatile int16* value, int16 new_value) { __atomic_store_n(value, new_value, __ATOMIC_SEQ_CST); } +FORCE_INLINE void atomic_set_relaxed(volatile int32* value, int32 new_value) { __atomic_store_n(value, new_value, __ATOMIC_SEQ_CST); } +FORCE_INLINE void atomic_set_relaxed(volatile int64* value, int64 new_value) { __atomic_store_n(value, new_value, __ATOMIC_SEQ_CST); } +FORCE_INLINE int8 atomic_fetch_set_relaxed(volatile int8* value, int8 new_value) { return __atomic_exchange_n(value, new_value, __ATOMIC_SEQ_CST); } +FORCE_INLINE int16 atomic_fetch_set_relaxed(volatile int16* value, int16 new_value) { return __atomic_exchange_n(value, new_value, __ATOMIC_SEQ_CST); } +FORCE_INLINE int32 atomic_fetch_set_relaxed(volatile int32* value, int32 new_value) { return __atomic_exchange_n(value, new_value, __ATOMIC_SEQ_CST); } +FORCE_INLINE int64 atomic_fetch_set_relaxed(volatile int64* value, int64 new_value) { return __atomic_exchange_n(value, new_value, __ATOMIC_SEQ_CST); } +FORCE_INLINE int8 atomic_get_relaxed(volatile int8* value) { return __atomic_load_n((int8 *) value, __ATOMIC_SEQ_CST); } +FORCE_INLINE int16 atomic_get_relaxed(volatile int16* value) { return __atomic_load_n((int16 *) value, __ATOMIC_SEQ_CST); } +FORCE_INLINE int32 atomic_get_relaxed(volatile int32* value) { return __atomic_load_n((int32 *) value, __ATOMIC_SEQ_CST); } +FORCE_INLINE int64 atomic_get_relaxed(volatile int64* value) { return __atomic_load_n((int64 *) value, __ATOMIC_SEQ_CST); } +FORCE_INLINE void atomic_increment_relaxed(volatile int8* value) { __atomic_add_fetch(value, 1, __ATOMIC_SEQ_CST); } +FORCE_INLINE void atomic_decrement_relaxed(volatile int8* value) { __atomic_sub_fetch(value, 1, __ATOMIC_SEQ_CST); } +FORCE_INLINE void atomic_increment_relaxed(volatile int16* value) { __atomic_add_fetch(value, 1, __ATOMIC_SEQ_CST); } +FORCE_INLINE void atomic_decrement_relaxed(volatile int16* value) { __atomic_sub_fetch(value, 1, __ATOMIC_SEQ_CST); } +FORCE_INLINE void atomic_increment_relaxed(volatile int32* value) { __atomic_add_fetch(value, 1, __ATOMIC_SEQ_CST); } +FORCE_INLINE void atomic_decrement_relaxed(volatile int32* value) { __atomic_sub_fetch(value, 1, __ATOMIC_SEQ_CST); } +FORCE_INLINE void atomic_increment_relaxed(volatile int64* value) { __atomic_add_fetch(value, 1, __ATOMIC_SEQ_CST); } +FORCE_INLINE void atomic_decrement_relaxed(volatile int64* value) { __atomic_sub_fetch(value, 1, __ATOMIC_SEQ_CST); } +FORCE_INLINE void atomic_add_relaxed(volatile int8* value, int8 increment) { __atomic_add_fetch(value, increment, __ATOMIC_SEQ_CST); } +FORCE_INLINE void atomic_sub_relaxed(volatile int8* value, int8 decrement) { __atomic_sub_fetch(value, decrement, __ATOMIC_SEQ_CST); } +FORCE_INLINE void atomic_add_relaxed(volatile int16* value, int16 increment) { __atomic_add_fetch(value, increment, __ATOMIC_SEQ_CST); } +FORCE_INLINE void atomic_sub_relaxed(volatile int16* value, int16 decrement) { __atomic_sub_fetch(value, decrement, __ATOMIC_SEQ_CST); } +FORCE_INLINE void atomic_add_relaxed(volatile int32* value, int32 increment) { __atomic_add_fetch(value, increment, __ATOMIC_SEQ_CST); } +FORCE_INLINE void atomic_sub_relaxed(volatile int32* value, int32 decrement) { __atomic_sub_fetch(value, decrement, __ATOMIC_SEQ_CST); } +FORCE_INLINE void atomic_add_relaxed(volatile int64* value, int64 increment) { __atomic_add_fetch(value, increment, __ATOMIC_SEQ_CST); } +FORCE_INLINE void atomic_sub_relaxed(volatile int64* value, int64 decrement) { __atomic_sub_fetch(value, decrement, __ATOMIC_SEQ_CST); } +FORCE_INLINE f32 atomic_compare_exchange_weak_relaxed(volatile f32* value, f32* expected, f32 desired) { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); return *expected; } +FORCE_INLINE f64 atomic_compare_exchange_weak_relaxed(volatile f64* value, f64* expected, f64 desired) { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); return *expected; } +FORCE_INLINE int32 atomic_compare_exchange_weak_relaxed(volatile int32* value, int32* expected, int32 desired) { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); return *expected; } +FORCE_INLINE int64 atomic_compare_exchange_weak_relaxed(volatile int64* value, int64* expected, int64 desired) { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); return *expected; } +FORCE_INLINE int8 atomic_fetch_add_relaxed(volatile int8* value, int8 operand) { return __atomic_add_fetch(value, operand, __ATOMIC_SEQ_CST); } +FORCE_INLINE int8 atomic_fetch_sub_relaxed(volatile int8* value, int8 operand) { return __atomic_sub_fetch(value, operand, __ATOMIC_SEQ_CST); } +FORCE_INLINE int16 atomic_fetch_add_relaxed(volatile int16* value, int16 operand) { return __atomic_add_fetch(value, operand, __ATOMIC_SEQ_CST); } +FORCE_INLINE int16 atomic_fetch_sub_relaxed(volatile int16* value, int16 operand) { return __atomic_sub_fetch(value, operand, __ATOMIC_SEQ_CST); } +FORCE_INLINE int32 atomic_fetch_add_relaxed(volatile int32* value, int32 operand) { return __atomic_add_fetch(value, operand, __ATOMIC_SEQ_CST); } +FORCE_INLINE int32 atomic_fetch_sub_relaxed(volatile int32* value, int32 operand) { return __atomic_sub_fetch(value, operand, __ATOMIC_SEQ_CST); } +FORCE_INLINE int64 atomic_fetch_add_relaxed(volatile int64* value, int64 operand) { return __atomic_add_fetch(value, operand, __ATOMIC_SEQ_CST); } +FORCE_INLINE int64 atomic_fetch_sub_relaxed(volatile int64* value, int64 operand) { return __atomic_sub_fetch(value, operand, __ATOMIC_SEQ_CST); } +FORCE_INLINE void atomic_set_relaxed(volatile uint8* value, uint8 new_value) { __atomic_store_n(value, new_value, __ATOMIC_SEQ_CST); } +FORCE_INLINE void atomic_set_relaxed(volatile uint16* value, uint16 new_value) { __atomic_store_n(value, new_value, __ATOMIC_SEQ_CST); } +FORCE_INLINE void atomic_set_relaxed(volatile uint32* value, uint32 new_value) { __atomic_store_n(value, new_value, __ATOMIC_SEQ_CST); } +FORCE_INLINE void atomic_set_relaxed(volatile uint64* value, uint64 new_value) { __atomic_store_n(value, new_value, __ATOMIC_SEQ_CST); } +FORCE_INLINE uint8 atomic_fetch_set_relaxed(volatile uint8* value, uint8 new_value) { return __atomic_exchange_n(value, new_value, __ATOMIC_SEQ_CST); } +FORCE_INLINE uint16 atomic_fetch_set_relaxed(volatile uint16* value, uint16 new_value) { return __atomic_exchange_n(value, new_value, __ATOMIC_SEQ_CST); } +FORCE_INLINE uint32 atomic_fetch_set_relaxed(volatile uint32* value, uint32 new_value) { return __atomic_exchange_n(value, new_value, __ATOMIC_SEQ_CST); } +FORCE_INLINE uint64 atomic_fetch_set_relaxed(volatile uint64* value, uint64 new_value) { return __atomic_exchange_n(value, new_value, __ATOMIC_SEQ_CST); } +FORCE_INLINE uint8 atomic_get_relaxed(volatile uint8* value) { return __atomic_load_n(value, __ATOMIC_SEQ_CST); } +FORCE_INLINE uint16 atomic_get_relaxed(volatile uint16* value) { return __atomic_load_n(value, __ATOMIC_SEQ_CST); } +FORCE_INLINE uint32 atomic_get_relaxed(volatile uint32* value) { return __atomic_load_n(value, __ATOMIC_SEQ_CST); } +FORCE_INLINE uint64 atomic_get_relaxed(volatile uint64* value) { return __atomic_load_n(value, __ATOMIC_SEQ_CST); } +FORCE_INLINE void atomic_increment_relaxed(volatile uint8* value) { __atomic_add_fetch(value, 1, __ATOMIC_SEQ_CST); } +FORCE_INLINE void atomic_decrement_relaxed(volatile uint8* value) { __atomic_sub_fetch(value, 1, __ATOMIC_SEQ_CST); } +FORCE_INLINE void atomic_increment_relaxed(volatile uint16* value) { __atomic_add_fetch(value, 1, __ATOMIC_SEQ_CST); } +FORCE_INLINE void atomic_decrement_relaxed(volatile uint16* value) { __atomic_sub_fetch(value, 1, __ATOMIC_SEQ_CST); } +FORCE_INLINE void atomic_increment_relaxed(volatile uint32* value) { __atomic_add_fetch(value, 1, __ATOMIC_SEQ_CST); } +FORCE_INLINE void atomic_decrement_relaxed(volatile uint32* value) { __atomic_sub_fetch(value, 1, __ATOMIC_SEQ_CST); } +FORCE_INLINE void atomic_increment_relaxed(volatile uint64* value) { __atomic_add_fetch(value, 1, __ATOMIC_SEQ_CST); } +FORCE_INLINE void atomic_decrement_relaxed(volatile uint64* value) { __atomic_sub_fetch(value, 1, __ATOMIC_SEQ_CST); } +FORCE_INLINE void atomic_add_relaxed(volatile uint8* value, uint8 increment) { __atomic_add_fetch(value, increment, __ATOMIC_SEQ_CST); } +FORCE_INLINE void atomic_sub_relaxed(volatile uint8* value, uint8 decrement) { __atomic_sub_fetch(value, decrement, __ATOMIC_SEQ_CST); } +FORCE_INLINE void atomic_add_relaxed(volatile uint16* value, uint16 increment) { __atomic_add_fetch(value, increment, __ATOMIC_SEQ_CST); } +FORCE_INLINE void atomic_sub_relaxed(volatile uint16* value, uint16 decrement) { __atomic_sub_fetch(value, decrement, __ATOMIC_SEQ_CST); } +FORCE_INLINE void atomic_add_relaxed(volatile uint32* value, uint32 increment) { __atomic_add_fetch(value, increment, __ATOMIC_SEQ_CST); } +FORCE_INLINE void atomic_sub_relaxed(volatile uint32* value, uint32 decrement) { __atomic_sub_fetch(value, decrement, __ATOMIC_SEQ_CST); } +FORCE_INLINE void atomic_add_relaxed(volatile uint64* value, uint64 increment) { __atomic_add_fetch(value, increment, __ATOMIC_SEQ_CST); } +FORCE_INLINE void atomic_sub_relaxed(volatile uint64* value, uint64 decrement) { __atomic_sub_fetch(value, decrement, __ATOMIC_SEQ_CST); } +FORCE_INLINE uint32 atomic_compare_exchange_weak_relaxed(volatile uint32* value, uint32* expected, uint32 desired) { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); return *expected; } +FORCE_INLINE uint64 atomic_compare_exchange_weak_relaxed(volatile uint64* value, uint64* expected, uint64 desired) { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); return *expected; } +FORCE_INLINE uint8 atomic_fetch_add_relaxed(volatile uint8* value, uint8 operand) { return __atomic_add_fetch(value, operand, __ATOMIC_SEQ_CST); } +FORCE_INLINE uint8 atomic_fetch_sub_relaxed(volatile uint8* value, uint8 operand) { return __atomic_sub_fetch(value, operand, __ATOMIC_SEQ_CST); } +FORCE_INLINE uint16 atomic_fetch_add_relaxed(volatile uint16* value, uint16 operand) { return __atomic_add_fetch(value, operand, __ATOMIC_SEQ_CST); } +FORCE_INLINE uint16 atomic_fetch_sub_relaxed(volatile uint16* value, uint16 operand) { return __atomic_sub_fetch(value, operand, __ATOMIC_SEQ_CST); } +FORCE_INLINE uint32 atomic_fetch_add_relaxed(volatile uint32* value, uint32 operand) { return __atomic_add_fetch(value, operand, __ATOMIC_SEQ_CST); } +FORCE_INLINE uint32 atomic_fetch_sub_relaxed(volatile uint32* value, uint32 operand) { return __atomic_sub_fetch(value, operand, __ATOMIC_SEQ_CST); } +FORCE_INLINE uint64 atomic_fetch_add_relaxed(volatile uint64* value, uint64 operand) { return __atomic_add_fetch(value, operand, __ATOMIC_SEQ_CST); } +FORCE_INLINE uint64 atomic_fetch_sub_relaxed(volatile uint64* value, uint64 operand) { return __atomic_sub_fetch(value, operand, __ATOMIC_SEQ_CST); } +FORCE_INLINE void atomic_and_relaxed(volatile uint8* value, uint8 mask) { __atomic_fetch_and(value, mask, __ATOMIC_SEQ_CST); } +FORCE_INLINE void atomic_and_relaxed(volatile int8* value, int8 mask) { __atomic_fetch_and(value, mask, __ATOMIC_SEQ_CST); } +FORCE_INLINE void atomic_and_relaxed(volatile uint16* value, uint16 mask) { __atomic_fetch_and(value, mask, __ATOMIC_SEQ_CST); } +FORCE_INLINE void atomic_and_relaxed(volatile int16* value, int16 mask) { __atomic_fetch_and(value, mask, __ATOMIC_SEQ_CST); } +FORCE_INLINE void atomic_and_relaxed(volatile uint32* value, uint32 mask) { __atomic_fetch_and(value, mask, __ATOMIC_SEQ_CST); } +FORCE_INLINE void atomic_and_relaxed(volatile int32* value, int32 mask) { __atomic_fetch_and(value, mask, __ATOMIC_SEQ_CST); } +FORCE_INLINE void atomic_and_relaxed(volatile uint64* value, uint64 mask) { __atomic_fetch_and(value, mask, __ATOMIC_SEQ_CST); } +FORCE_INLINE void atomic_and_relaxed(volatile int64* value, int64 mask) { __atomic_fetch_and(value, mask, __ATOMIC_SEQ_CST); } +FORCE_INLINE void atomic_or_relaxed(volatile uint8* value, uint8 mask) { __atomic_fetch_or(value, mask, __ATOMIC_SEQ_CST); } +FORCE_INLINE void atomic_or_relaxed(volatile int8* value, int8 mask) { __atomic_fetch_or(value, mask, __ATOMIC_SEQ_CST); } +FORCE_INLINE void atomic_or_relaxed(volatile uint16* value, uint16 mask) { __atomic_fetch_or(value, mask, __ATOMIC_SEQ_CST); } +FORCE_INLINE void atomic_or_relaxed(volatile int16* value, int16 mask) { __atomic_fetch_or(value, mask, __ATOMIC_SEQ_CST); } +FORCE_INLINE void atomic_or_relaxed(volatile uint32* value, uint32 mask) { __atomic_fetch_or(value, mask, __ATOMIC_SEQ_CST); } +FORCE_INLINE void atomic_or_relaxed(volatile int32* value, int32 mask) { __atomic_fetch_or(value, mask, __ATOMIC_SEQ_CST); } +FORCE_INLINE void atomic_or_relaxed(volatile uint64* value, uint64 mask) { __atomic_fetch_or(value, mask, __ATOMIC_SEQ_CST); } +FORCE_INLINE void atomic_or_relaxed(volatile int64* value, int64 mask) { __atomic_fetch_or(value, mask, __ATOMIC_SEQ_CST); } // Check out the intrinsic functions fence_memory and fence_write // These are much faster and could accomplish what you are doing diff --git a/compiler/gcc/CompilerUtils.h b/compiler/gcc/CompilerUtils.h index 1ada586..d1b0e90 100644 --- a/compiler/gcc/CompilerUtils.h +++ b/compiler/gcc/CompilerUtils.h @@ -81,4 +81,22 @@ int32 compiler_find_first_bit_l2r(uint32 mask) { #endif } +/* +#include + +static inline +void cpuid(int32 cpuInfo[4], int32 function_id) { + __cpuid(function_id, cpuInfo[0], cpuInfo[1], cpuInfo[2], cpuInfo[3]); +} +*/ + +inline +void compiler_cpuid(int32 cpuInfo[4], int32 function_id) { + asm volatile( + "cpuid" + : "=a" (cpuInfo[0]), "=b" (cpuInfo[1]), "=c" (cpuInfo[2]), "=d" (cpuInfo[3]) + : "a" (function_id) + ); +} + #endif \ No newline at end of file diff --git a/compiler/msvc/CompilerUtils.h b/compiler/msvc/CompilerUtils.h index 4ddd1e4..b7f8901 100644 --- a/compiler/msvc/CompilerUtils.h +++ b/compiler/msvc/CompilerUtils.h @@ -11,6 +11,7 @@ #include "../../utils/TestUtils.h" #include +#include #define PACKED_STRUCT __pragma(pack(push, 1)) #define UNPACKED_STRUCT __pragma(pack(pop)) @@ -75,4 +76,9 @@ int32 compiler_find_first_bit_l2r(uint32 mask) { return _BitScanReverse(&index, mask) ? index : -1; } +inline +void compiler_cpuid(int32 cpuInfo[4], int32 function_id) { + __cpuidex(cpuInfo, function_id, 0); +} + #endif \ No newline at end of file diff --git a/gpuapi/direct3d/AppCmdBuffer.h b/gpuapi/direct3d/AppCmdBuffer.h new file mode 100644 index 0000000..13171f7 --- /dev/null +++ b/gpuapi/direct3d/AppCmdBuffer.h @@ -0,0 +1,71 @@ +/** + * Jingga + * + * @copyright Jingga + * @license OMS License 2.0 + * @version 1.0.0 + * @link https://jingga.app + */ +#ifndef TOS_GPUAPI_DIRECTX_APP_CMD_BUFFER_H +#define TOS_GPUAPI_DIRECTX_APP_CMD_BUFFER_H + +#include "../../stdlib/Types.h" +#include "Shader.h" +#include "ShaderUtils.h" +#include "../ShaderType.h" +#include "../../asset/Asset.h" +#include "../../command/AppCmdBuffer.h" +#include "GpuApiContainer.h" + +#include +#include +#include +#include + +void* cmd_shader_load(AppCmdBuffer*, Command*) { + return NULL; +} + +void* cmd_shader_load_sync(AppCmdBuffer* cb, Shader* shader, int32* shader_ids) { + char asset_id[9]; + + GpuApiContainer* gpu_api = (GpuApiContainer *) cb->gpu_api; + + Microsoft::WRL::ComPtr shader_assets[SHADER_TYPE_SIZE]; + + for (int32 i = 0; i < SHADER_TYPE_SIZE; ++i) { + if (!shader_ids[i]) { + continue; + } else if (shader_ids[i] < 0) { + break; + } + + // Load sub asset + int_to_hex(shader_ids[i], asset_id); + Asset* shader_asset = thrd_ams_get_asset_wait(cb->ams, asset_id); + if (!shader_asset) { + int32 archive_id = (shader_ids[i] >> 24) & 0xFF; + shader_asset = asset_archive_asset_load(&cb->asset_archives[archive_id], shader_ids[i], cb->ams, cb->mem_vol); + } + + // Make sub shader + shader_assets[i] = shader_make( + shader_type_index((ShaderType) (i + 1)), + (char *) shader_asset->self, + shader_asset->ram_size + ); + + shader_asset->state |= ASSET_STATE_RAM_GC; + shader_asset->state |= ASSET_STATE_VRAM_GC; + } + + // Make shader/program + shader->id = program_make( + gpu_api->device.Get(), gpu_api->pipeline_state, gpu_api->root_signature.Get(), + shader_assets[0].Get(), shader_assets[1].Get(), shader_assets[2].Get() + ); + + return NULL; +} + +#endif \ No newline at end of file diff --git a/gpuapi/direct3d/DirectXUtils.h b/gpuapi/direct3d/DirectXUtils.h index 3ccf3a1..cdff6ad 100644 --- a/gpuapi/direct3d/DirectXUtils.h +++ b/gpuapi/direct3d/DirectXUtils.h @@ -11,290 +11,159 @@ #include #include - #include +#include "../../../GameEngine/log/Log.h" #include "../../../EngineDependencies/directx/d3d12.h" #include "../../../EngineDependencies/directx/d3dx12.h" #include "../../stdlib/Types.h" -#define FRAME_COUNT 2 +// A more (compile-time) efficient version of the windows macro IID_PPV_ARGS +#define IID_PPVOID(pointer) __uuidof(**(&pointer)), reinterpret_cast(&pointer) -struct Window { - bool is_fullscreen; - int32 width; - int32 height; - char name[32]; - - int32 x; - int32 y; - - HWND hwnd; - - // @todo move this out of here to a separate gpuapi struct (same with opengl) - Microsoft::WRL::ComPtr m_swapChain; - - Microsoft::WRL::ComPtr device; - Microsoft::WRL::ComPtr m_renderTargets[FRAME_COUNT]; - Microsoft::WRL::ComPtr m_commandAllocator; - Microsoft::WRL::ComPtr m_commandQueue; - Microsoft::WRL::ComPtr m_rtvHeap; - Microsoft::WRL::ComPtr m_pipelineState; - Microsoft::WRL::ComPtr m_commandList; - Microsoft::WRL::ComPtr m_fence; - - UINT m_rtvDescriptorSize; - - UINT m_frameIndex; - HANDLE m_fenceEvent; - UINT64 m_fenceValue; -}; - -void window_create(Window* window, void* proc) +bool is_directx_supported(D3D_FEATURE_LEVEL version) { - WNDPROC wndproc = (WNDPROC) proc; - WNDCLASSEX wc = {}; - HINSTANCE hinstance = GetModuleHandle(0); - - wc.cbSize = sizeof(WNDCLASSEX); - wc.style = CS_OWNDC; - wc.lpfnWndProc = wndproc; - wc.hInstance = hinstance; - wc.lpszClassName = (LPCSTR) window->name; - - RegisterClassEx(&wc); - - if (window->is_fullscreen) { - window->width = GetSystemMetrics(SM_CXSCREEN); - window->height = GetSystemMetrics(SM_CYSCREEN); - - DEVMODE screen_settings; - - memset(&screen_settings, 0, sizeof(screen_settings)); - screen_settings.dmSize = sizeof(screen_settings); - screen_settings.dmPelsWidth = (unsigned long) window->width; - screen_settings.dmPelsHeight = (unsigned long) window->height; - screen_settings.dmBitsPerPel = 32; - screen_settings.dmFields = DM_BITSPERPEL | DM_PELSWIDTH | DM_PELSHEIGHT; - - ChangeDisplaySettings(&screen_settings, CDS_FULLSCREEN); - - window->x = 0; - window->y = 0; - } - - window->hwnd = CreateWindowEx((DWORD) NULL, - wc.lpszClassName, NULL, - WS_OVERLAPPEDWINDOW, - window->x, window->y, - window->width, - window->height, - NULL, NULL, hinstance, window - ); - - //SetWindowLongA(window->hwnd, GWL_STYLE, 0); -} - -void window_open(const Window* window) -{ - ShowWindow(window->hwnd, SW_SHOW); - SetForegroundWindow(window->hwnd); - SetFocus(window->hwnd); - ShowCursor(false); - UpdateWindow(window->hwnd); -} - -void window_close(Window* window) -{ - CloseWindow(window->hwnd); -} - -bool is_directx_12_supported() -{ - Microsoft::WRL::ComPtr factory; - HRESULT hr = CreateDXGIFactory1(IID_PPV_ARGS(&factory)); - if (FAILED(hr)) { + IDXGIFactory6* factory = NULL; + if (FAILED(CreateDXGIFactory1(IID_PPVOID(factory)))) { return false; } - Microsoft::WRL::ComPtr adapter; - for (UINT adapterIndex = 0; - DXGI_ERROR_NOT_FOUND != factory->EnumAdapters1(adapterIndex, &adapter); - ++adapterIndex) - { - DXGI_ADAPTER_DESC1 desc; - adapter->GetDesc1(&desc); + bool is_dx12_supported = false; - // Skip software adapters - if (desc.Flags & DXGI_ADAPTER_FLAG_SOFTWARE) { + IDXGIAdapter1* adapter = NULL; + for (uint32 i = 0; DXGI_ERROR_NOT_FOUND != factory->EnumAdapters1(i, &adapter); ++i) { + DXGI_ADAPTER_DESC1 desc; + if (FAILED(adapter->GetDesc1(&desc))) { + adapter->Release(); continue; } - try { - if (SUCCEEDED(D3D12CreateDevice(adapter.Get(), D3D_FEATURE_LEVEL_11_0, _uuidof(ID3D12Device), nullptr))) { - return true; - } - } catch (...) { - return false; + // Skip software adapters + if (desc.Flags & DXGI_ADAPTER_FLAG_SOFTWARE) { + adapter->Release(); + continue; } + + // Check for DirectX 12 support + if (SUCCEEDED(D3D12CreateDevice(adapter, version, _uuidof(ID3D12Device), NULL))) { + is_dx12_supported = true; + adapter->Release(); + break; + } + + adapter->Release(); } - return false; + factory->Release(); + + return is_dx12_supported; } -void find_hardware_adapter( - IDXGIFactory1* factory, - IDXGIAdapter1** adapter1, - bool use_high_performance_adapter = true +int32 max_directx_version() +{ + if (is_directx_supported(D3D_FEATURE_LEVEL_12_2)) { + return 122; + } else if (is_directx_supported(D3D_FEATURE_LEVEL_12_1)) { + return 121; + } else if (is_directx_supported(D3D_FEATURE_LEVEL_12_0)) { + return 120; + } else if (is_directx_supported(D3D_FEATURE_LEVEL_11_1)) { + return 111; + } else if (is_directx_supported(D3D_FEATURE_LEVEL_11_0)) { + return 110; + } else if (is_directx_supported(D3D_FEATURE_LEVEL_10_1)) { + return 101; + } else if (is_directx_supported(D3D_FEATURE_LEVEL_10_0)) { + return 100; + } else if (is_directx_supported(D3D_FEATURE_LEVEL_9_3)) { + return 93; + } else if (is_directx_supported(D3D_FEATURE_LEVEL_9_2)) { + return 92; + } else if (is_directx_supported(D3D_FEATURE_LEVEL_9_1)) { + return 91; + } else if (is_directx_supported(D3D_FEATURE_LEVEL_1_0_CORE)) { + return 90; + } + + return 0; +} + +// Returns frame index +int32 wait_for_previous_frame( + ID3D12Fence* fence, HANDLE fence_event, UINT64* fence_value, + ID3D12CommandQueue* command_queue, IDXGISwapChain3* swapchain +) +{ + // WAITING FOR THE FRAME TO COMPLETE BEFORE CONTINUING IS NOT BEST PRACTICE. + // This is code implemented as such for simplicity. The D3D12HelloFrameBuffering + // sample illustrates how to use fences for efficient resource usage and to + // maximize GPU utilization. + + UINT64 fence_value_temp = *fence_value; + + // Signal and increment the fence value. + if(FAILED(command_queue->Signal(fence, fence_value_temp))) { + LOG(true, "DirectX12 Signal"); + ASSERT_SIMPLE(false); + } + + ++(*fence_value); + + // Wait until the previous frame is finished. + if (fence->GetCompletedValue() < fence_value_temp) { + if (FAILED(fence->SetEventOnCompletion(fence_value_temp, fence_event))) { + LOG(true, "DirectX12 SetEventOnCompletion"); + ASSERT_SIMPLE(false); + } + + WaitForSingleObject(fence_event, INFINITE); + } + + return swapchain->GetCurrentBackBufferIndex(); +} + +static +void directx_debug_callback( + D3D12_MESSAGE_CATEGORY category, + D3D12_MESSAGE_SEVERITY severity, + D3D12_MESSAGE_ID id, + LPCSTR description, + void* context ) { - *adapter1 = nullptr; + // @todo handle severity + (void) category; + (void) severity; + (void) id; + (void*) context; + /* + if ((severity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT) + || (severity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT) + ) { - Microsoft::WRL::ComPtr adapter; - Microsoft::WRL::ComPtr factory6; - - if (SUCCEEDED(factory->QueryInterface(IID_PPV_ARGS(&factory6)))) { - for ( - UINT adapterIndex = 0; - SUCCEEDED(factory6->EnumAdapterByGpuPreference( - adapterIndex, - use_high_performance_adapter == true ? DXGI_GPU_PREFERENCE_HIGH_PERFORMANCE : DXGI_GPU_PREFERENCE_UNSPECIFIED, - IID_PPV_ARGS(&adapter))); - ++adapterIndex) - { - DXGI_ADAPTER_DESC1 desc; - adapter->GetDesc1(&desc); - - if (desc.Flags & DXGI_ADAPTER_FLAG_SOFTWARE) { - // Don't select the Basic Render Driver adapter. - continue; - } - - // Check to see whether the adapter supports Direct3D 12, but don't create the - // actual device yet. - if (SUCCEEDED(D3D12CreateDevice(adapter.Get(), D3D_FEATURE_LEVEL_11_0, _uuidof(ID3D12Device), nullptr))) { - break; - } - } } + */ - if(adapter.Get() == nullptr) { - for (UINT adapterIndex = 0; SUCCEEDED(factory->EnumAdapters1(adapterIndex, &adapter)); ++adapterIndex) { - DXGI_ADAPTER_DESC1 desc; - adapter->GetDesc1(&desc); - - if (desc.Flags & DXGI_ADAPTER_FLAG_SOFTWARE) { - // Don't select the Basic Render Driver adapter. - continue; - } - - // Check to see whether the adapter supports Direct3D 12, but don't create the - // actual device yet. - if (SUCCEEDED(D3D12CreateDevice(adapter.Get(), D3D_FEATURE_LEVEL_11_0, _uuidof(ID3D12Device), nullptr))) { - break; - } - } - } - - *adapter1 = adapter.Detach(); + LOG(true, description); + ASSERT_SIMPLE(false); } -void load_pipeline(Window* window) +void gpuapi_debug_messenger_setup(Microsoft::WRL::ComPtr& device) { - uint32 factory_flags = 0; + Microsoft::WRL::ComPtr info_queue; + if (FAILED(device.As(&info_queue))) { + return; + } - Microsoft::WRL::ComPtr factory; - CreateDXGIFactory2(factory_flags, IID_PPV_ARGS(&factory)); - - Microsoft::WRL::ComPtr hardware_adapter; - find_hardware_adapter(factory.Get(), &hardware_adapter); - - D3D12CreateDevice( - hardware_adapter.Get(), - D3D_FEATURE_LEVEL_11_0, - IID_PPV_ARGS(&window->device) + // Register the custom debug callback + info_queue->RegisterMessageCallback( + directx_debug_callback, + D3D12_MESSAGE_CALLBACK_FLAG_NONE, + NULL, // Context (can be used to pass additional data) + NULL // Callback cookie (unused) ); - // Describe and create the command queue. - D3D12_COMMAND_QUEUE_DESC queueDesc = {}; - queueDesc.Flags = D3D12_COMMAND_QUEUE_FLAG_NONE; - queueDesc.Type = D3D12_COMMAND_LIST_TYPE_DIRECT; - - window->device->CreateCommandQueue(&queueDesc, IID_PPV_ARGS(&window->m_commandQueue)); - - // Describe and create the swap chain. - DXGI_SWAP_CHAIN_DESC1 swapChainDesc = {}; - swapChainDesc.BufferCount = FRAME_COUNT; - swapChainDesc.Width = window->width; - swapChainDesc.Height = window->height; - swapChainDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; - swapChainDesc.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT; - swapChainDesc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD; - swapChainDesc.SampleDesc.Count = 1; - - Microsoft::WRL::ComPtr swapChain; - factory->CreateSwapChainForHwnd( - window->m_commandQueue.Get(), // Swap chain needs the queue so that it can force a flush on it. - window->hwnd, - &swapChainDesc, - nullptr, - nullptr, - &swapChain - ); - - // This sample does not support fullscreen transitions. - factory->MakeWindowAssociation(window->hwnd, DXGI_MWA_NO_ALT_ENTER); - - swapChain.As(&window->m_swapChain); - window->m_frameIndex = window->m_swapChain->GetCurrentBackBufferIndex(); - - // Create descriptor heaps. - { - // Describe and create a render target view (RTV) descriptor heap. - D3D12_DESCRIPTOR_HEAP_DESC rtvHeapDesc = {}; - rtvHeapDesc.NumDescriptors = FRAME_COUNT; - rtvHeapDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_RTV; - rtvHeapDesc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE; - window->device->CreateDescriptorHeap(&rtvHeapDesc, IID_PPV_ARGS(&window->m_rtvHeap)); - - window->m_rtvDescriptorSize = window->device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_RTV); - } - - // Create frame resources. - { - CD3DX12_CPU_DESCRIPTOR_HANDLE rtvHandle(window->m_rtvHeap->GetCPUDescriptorHandleForHeapStart()); - - // Create a RTV for each frame. - for (UINT n = 0; n < FRAME_COUNT; n++) - { - window->m_swapChain->GetBuffer(n, IID_PPV_ARGS(&window->m_renderTargets[n])); - window->device->CreateRenderTargetView(window->m_renderTargets[n].Get(), nullptr, rtvHandle); - rtvHandle.Offset(1, window->m_rtvDescriptorSize); - } - } - - window->device->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(&window->m_commandAllocator)); -} - -void load_assets(Window* window) -{ - // Create the command list. - window->device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, window->m_commandAllocator.Get(), nullptr, IID_PPV_ARGS(&window->m_commandList)); - - // Command lists are created in the recording state, but there is nothing - // to record yet. The main loop expects it to be closed, so close it now. - window->m_commandList->Close(); - - // Create synchronization objects. - { - window->device->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&window->m_fence)); - window->m_fenceValue = 1; - - // Create an event handle to use for frame synchronization. - window->m_fenceEvent = CreateEvent(nullptr, FALSE, FALSE, nullptr); - if (window->m_fenceEvent == nullptr) { - HRESULT_FROM_WIN32(GetLastError()); - } - } + // Set the message count limit to unlimited + info_queue->SetMessageCountLimit(0); } #endif \ No newline at end of file diff --git a/gpuapi/direct3d/GpuApiContainer.h b/gpuapi/direct3d/GpuApiContainer.h index 9f54960..e67be3f 100644 --- a/gpuapi/direct3d/GpuApiContainer.h +++ b/gpuapi/direct3d/GpuApiContainer.h @@ -9,6 +9,9 @@ #ifndef TOS_GPUAPI_DIRECTX_GPU_API_CONTAINER_H #define TOS_GPUAPI_DIRECTX_GPU_API_CONTAINER_H +#include "../../stdlib/Types.h" +#include "../../../EngineDependencies/directx/d3d12.h" +#include "../../../EngineDependencies/directx/d3dx12.h" #include #include #include @@ -16,18 +19,32 @@ #include struct GpuApiContainer { - Microsoft::WRL::ComPtr device; - Microsoft::WRL::ComPtr swapChain; + uint32 frames_in_flight; + uint32 framebuffer_idx; - Microsoft::WRL::ComPtr commandQueue; - Microsoft::WRL::ComPtr rtvHeap; - Microsoft::WRL::ComPtr renderTargets[2]; - Microsoft::WRL::ComPtr commandAllocator; - Microsoft::WRL::ComPtr commandList; - Microsoft::WRL::ComPtr pipelineState; - Microsoft::WRL::ComPtr rootSignature; + Microsoft::WRL::ComPtr device; + Microsoft::WRL::ComPtr swapchain; + + Microsoft::WRL::ComPtr command_queue; + Microsoft::WRL::ComPtr rtv_heap; + uint32 rtv_info_size; + // @todo should be dynamic size based on frames_in_flight, no? + Microsoft::WRL::ComPtr render_targets[2]; + Microsoft::WRL::ComPtr command_allocator; + Microsoft::WRL::ComPtr command_list; + Microsoft::WRL::ComPtr pipeline_state; + Microsoft::WRL::ComPtr root_signature; Microsoft::WRL::ComPtr fence; - UINT64 fenceValue = 0; + UINT64 fence_value = 0; + HANDLE fence_event; + + // ???? + CD3DX12_VIEWPORT m_viewport; + CD3DX12_RECT m_scissorRect; + + // @todo This definately doesn't belong here + Microsoft::WRL::ComPtr m_vertexBuffer; + D3D12_VERTEX_BUFFER_VIEW m_vertexBufferView; }; #endif \ No newline at end of file diff --git a/gpuapi/direct3d/Shader.h b/gpuapi/direct3d/Shader.h index e607073..e983802 100644 --- a/gpuapi/direct3d/Shader.h +++ b/gpuapi/direct3d/Shader.h @@ -10,9 +10,11 @@ #define TOS_GPUAPI_DIRECT3D_SHADER_H #include "../../stdlib/Types.h" +#include +#include struct Shader { - uint32 id; + Microsoft::WRL::ComPtr id; uint32 locations[7]; byte data[16]; }; diff --git a/gpuapi/direct3d/ShaderUtils.h b/gpuapi/direct3d/ShaderUtils.h index 1e216b3..6adb404 100644 --- a/gpuapi/direct3d/ShaderUtils.h +++ b/gpuapi/direct3d/ShaderUtils.h @@ -9,43 +9,87 @@ #ifndef TOS_GPUAPI_DIRECTX_SHADER_UTILS_H #define TOS_GPUAPI_DIRECTX_SHADER_UTILS_H +#include #include +#include +#include #include +#include "../../../EngineDependencies/directx/d3d12.h" +#include "../../../EngineDependencies/directx/d3dx12.h" #include "../../stdlib/Types.h" #include "../../memory/RingMemory.h" #include "../../log/Log.h" +#include "../ShaderType.h" -D3D12_SHADER_BYTECODE shader_make(ID3D12Device* device, const char* source, int32 source_size, RingMemory* ring) +#pragma comment(lib, "d3dcompiler.lib") + +const char* shader_type_index(ShaderType type) { - // Create the shader object (bytecode) - D3D12_SHADER_BYTECODE shaderBytecodeDesc = {}; - shaderBytecodeDesc.pShaderBytecode = source; - shaderBytecodeDesc.BytecodeLength = source_size; + switch (type) { + case SHADER_TYPE_VERTEX: + return "vs_5_0"; + case SHADER_TYPE_FRAGMENT: + return "ps_5_0"; + default: + UNREACHABLE(); + } +} - return shaderBytecodeDesc; +Microsoft::WRL::ComPtr shader_make(const char* type, const char* source, int32 source_size) +{ + #if DEBUG || INTERNAL + uint32 compileFlags = D3DCOMPILE_DEBUG | D3DCOMPILE_SKIP_OPTIMIZATION; + #else + uint32 compileFlags = 0; + #endif + + Microsoft::WRL::ComPtr blob; + Microsoft::WRL::ComPtr errMsgs; + if (FAILED(D3DCompile2(source, source_size, NULL, NULL, NULL, "main", type, compileFlags, 0, 0, NULL, 0, blob.GetAddressOf(), errMsgs.GetAddressOf()))) { + LOG(true, "DirectX12 D3DCompile2"); + ASSERT_SIMPLE(false); + } + + return blob; } ID3D12PipelineState* program_make( ID3D12Device* device, - D3D12_SHADER_BYTECODE vertex_shader, - D3D12_SHADER_BYTECODE fragment_shader, - D3D12_SHADER_BYTECODE geometry_shader + Microsoft::WRL::ComPtr& pipeline_state, + ID3D12RootSignature* root_signature, + ID3DBlob* vertex_shader, + ID3DBlob* fragment_shader, + ID3DBlob* ) { - D3D12_GRAPHICS_PIPELINE_STATE_DESC psoDesc = {}; - psoDesc.VS = vertex_shader; - psoDesc.PS = fragment_shader; - psoDesc.GS = geometry_shader; + // @todo We need to find a way to do this somewhere else: + D3D12_INPUT_ELEMENT_DESC input_element_info[] = + { + { "POSITION", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, 0, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0 }, + { "COLOR", 0, DXGI_FORMAT_R32G32B32A32_FLOAT, 0, 12, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0 } + }; - ID3D12PipelineState* pipelineState = NULL; - HRESULT hr = device->CreateGraphicsPipelineState(&psoDesc, IID_PPV_ARGS(&pipelineState)); + D3D12_GRAPHICS_PIPELINE_STATE_DESC pipeline_state_info = {}; + pipeline_state_info.InputLayout = { input_element_info, _countof(input_element_info) }; + pipeline_state_info.pRootSignature = root_signature; + pipeline_state_info.VS = CD3DX12_SHADER_BYTECODE(vertex_shader); + pipeline_state_info.PS = CD3DX12_SHADER_BYTECODE(fragment_shader); + pipeline_state_info.RasterizerState = CD3DX12_RASTERIZER_DESC(D3D12_DEFAULT); + pipeline_state_info.BlendState = CD3DX12_BLEND_DESC(D3D12_DEFAULT); + pipeline_state_info.DepthStencilState.DepthEnable = FALSE; + pipeline_state_info.DepthStencilState.StencilEnable = FALSE; + pipeline_state_info.SampleMask = UINT_MAX; + pipeline_state_info.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; + pipeline_state_info.NumRenderTargets = 1; + pipeline_state_info.RTVFormats[0] = DXGI_FORMAT_R8G8B8A8_UNORM; + pipeline_state_info.SampleDesc.Count = 1; - if (FAILED(hr)) { - LOG(true, "Failed to create program"); - return NULL; + if (FAILED(device->CreateGraphicsPipelineState(&pipeline_state_info, IID_PPV_ARGS(&pipeline_state)))) { + LOG(true, "DirectX12 CreateGraphicsPipelineState"); + ASSERT_SIMPLE(false); } - return pipelineState; + return pipeline_state.Get(); } inline diff --git a/gpuapi/opengl/AppCmdBuffer.h b/gpuapi/opengl/AppCmdBuffer.h index 463e7f2..c8980f5 100644 --- a/gpuapi/opengl/AppCmdBuffer.h +++ b/gpuapi/opengl/AppCmdBuffer.h @@ -15,6 +15,7 @@ #include "ShaderUtils.h" #include "../ShaderType.h" #include "../../asset/Asset.h" +#include "../../command/AppCmdBuffer.h" void* cmd_shader_load(AppCmdBuffer*, Command*) { return NULL; diff --git a/gpuapi/opengl/OpenglUtils.h b/gpuapi/opengl/OpenglUtils.h index 41f8696..aa74fc5 100644 --- a/gpuapi/opengl/OpenglUtils.h +++ b/gpuapi/opengl/OpenglUtils.h @@ -64,7 +64,7 @@ void opengl_debug_callback(GLenum, GLenum, GLuint, GLenum severity, GLsizei, con } inline -void change_viewport(int16 width, int16 height, int32 offset_x = 0, int32 offset_y = 0) +void change_viewport(int32 width, int32 height, int32 offset_x = 0, int32 offset_y = 0) { glViewport(offset_x, offset_y, width, height); } diff --git a/gpuapi/opengl/ShaderUtils.h b/gpuapi/opengl/ShaderUtils.h index 9759822..80cc122 100644 --- a/gpuapi/opengl/ShaderUtils.h +++ b/gpuapi/opengl/ShaderUtils.h @@ -23,7 +23,7 @@ int32 shader_type_index(ShaderType type) case SHADER_TYPE_FRAGMENT: return GL_FRAGMENT_SHADER; default: - return 0; + UNREACHABLE(); } } diff --git a/gpuapi/vulkan/AppCmdBuffer.h b/gpuapi/vulkan/AppCmdBuffer.h new file mode 100644 index 0000000..b18e9ad --- /dev/null +++ b/gpuapi/vulkan/AppCmdBuffer.h @@ -0,0 +1,69 @@ +/** + * Jingga + * + * @copyright Jingga + * @license OMS License 2.0 + * @version 1.0.0 + * @link https://jingga.app + */ +#ifndef TOS_GPUAPI_VULKAN_APP_CMD_BUFFER_H +#define TOS_GPUAPI_VULKAN_APP_CMD_BUFFER_H + +#include "../../stdlib/Types.h" +#include "Shader.h" +#include "ShaderUtils.h" +#include "../ShaderType.h" +#include "../../asset/Asset.h" +#include "../../command/AppCmdBuffer.h" +#include "GpuApiContainer.h" + +void* cmd_shader_load(AppCmdBuffer*, Command*) { + return NULL; +} + +void* cmd_shader_load_sync(AppCmdBuffer* cb, Shader* shader, int32* shader_ids) { + char asset_id[9]; + + GpuApiContainer* gpu_api = (GpuApiContainer *) cb->gpu_api; + + VkShaderModule shader_assets[SHADER_TYPE_SIZE]; + for (int32 i = 0; i < SHADER_TYPE_SIZE; ++i) { + shader_assets[i] = NULL; + } + + for (int32 i = 0; i < SHADER_TYPE_SIZE; ++i) { + if (!shader_ids[i]) { + continue; + } else if (shader_ids[i] < 0) { + break; + } + + // Load sub asset + int_to_hex(shader_ids[i], asset_id); + Asset* shader_asset = thrd_ams_get_asset_wait(cb->ams, asset_id); + if (!shader_asset) { + int32 archive_id = (shader_ids[i] >> 24) & 0xFF; + shader_asset = asset_archive_asset_load(&cb->asset_archives[archive_id], shader_ids[i], cb->ams, cb->mem_vol); + } + + // Make sub shader + shader_assets[i] = shader_make( + ((GpuApiContainer *) cb->gpu_api)->device, + (char *) shader_asset->self, + shader_asset->ram_size + ); + + shader_asset->state |= ASSET_STATE_RAM_GC; + shader_asset->state |= ASSET_STATE_VRAM_GC; + } + + // Make shader/program + shader->id = program_make( + gpu_api->device, gpu_api->render_pass, &gpu_api->pipeline_layout, &gpu_api->pipeline, + shader_assets[0], shader_assets[1], shader_assets[2] + ); + + return NULL; +} + +#endif \ No newline at end of file diff --git a/gpuapi/vulkan/GpuApiContainer.h b/gpuapi/vulkan/GpuApiContainer.h index c370a1d..3557567 100644 --- a/gpuapi/vulkan/GpuApiContainer.h +++ b/gpuapi/vulkan/GpuApiContainer.h @@ -12,6 +12,8 @@ #include "../../stdlib/Types.h" #include +#define FRAME_LAG 2 + struct GpuApiContainer { VkInstance instance; VkSurfaceKHR surface; @@ -19,11 +21,11 @@ struct GpuApiContainer { VkSwapchainKHR swapchain; uint32 swapchain_image_count; VkFormat swapchain_image_format; - VkImage* swapchain_images; // swapchain_image_count - VkImageView* swapchain_image_views; // swapchain_image_count - VkFramebuffer* swapchain_framebuffers; // swapchain_image_count + VkImage* swapchain_images; // length = swapchain_image_count + VkImageView* swapchain_image_views; // length = swapchain_image_count + VkFramebuffer* swapchain_framebuffers; // length = swapchain_image_count VkExtent2D swapchain_extent; - VkPipelineLayout pipelineLayout; + VkPipelineLayout pipeline_layout; VkQueue graphics_queue; VkQueue present_queue; VkRenderPass render_pass; @@ -33,6 +35,10 @@ struct GpuApiContainer { VkSemaphore image_available_semaphore; VkSemaphore render_finished_semaphore; VkFence in_flight_fence; + + #if DEBUG || INTERNAL + VkDebugUtilsMessengerEXT debug_messenger; + #endif }; #endif \ No newline at end of file diff --git a/gpuapi/vulkan/Shader.h b/gpuapi/vulkan/Shader.h index e18e2ce..330ef73 100644 --- a/gpuapi/vulkan/Shader.h +++ b/gpuapi/vulkan/Shader.h @@ -10,9 +10,12 @@ #define TOS_GPUAPI_VULKAN_SHADER_H #include "../../stdlib/Types.h" +#include struct Shader { - uint32 id; + // @todo Consider to rename to pipeline (also in opengl, directx) if this makes sense. + // @question Does this have to be a pointer to the gpuapi->pipeline var? + VkPipeline id; uint32 locations[7]; byte data[16]; }; diff --git a/gpuapi/vulkan/ShaderUtils.h b/gpuapi/vulkan/ShaderUtils.h index 42c53a1..25f9e55 100644 --- a/gpuapi/vulkan/ShaderUtils.h +++ b/gpuapi/vulkan/ShaderUtils.h @@ -59,7 +59,7 @@ VkShaderModule shader_make(VkDevice device, const char* source, int32 source_siz VkResult result = vkCreateShaderModule(device, &create_info, NULL, &shader_module); if (result != VK_SUCCESS) { - LOG(true, "Failed to create shader module"); + LOG_FORMAT(true, "Vulkan vkCreateShaderModule: %d", LOG_DATA_INT32, (int32 *) &result); ASSERT_SIMPLE(false); return VK_NULL_HANDLE; @@ -74,4 +74,123 @@ void pipeline_use(VkCommandBuffer command_list, VkPipeline pipeline) vkCmdBindPipeline(command_list, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); } +VkPipeline program_make( + VkDevice device, VkRenderPass render_pass, VkPipelineLayout* pipeline_layout, VkPipeline* pipeline, + VkShaderModule vertex_shader, + VkShaderModule fragment_shader, + VkShaderModule +) { + VkPipelineShaderStageCreateInfo vertex_shader_stage_info = {}; + vertex_shader_stage_info.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + vertex_shader_stage_info.stage = VK_SHADER_STAGE_VERTEX_BIT; + vertex_shader_stage_info.module = vertex_shader; + vertex_shader_stage_info.pName = "main"; + + VkPipelineShaderStageCreateInfo fragment_shader_stage_info = {}; + fragment_shader_stage_info.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + fragment_shader_stage_info.stage = VK_SHADER_STAGE_FRAGMENT_BIT; + fragment_shader_stage_info.module = fragment_shader; + fragment_shader_stage_info.pName = "main"; + + VkPipelineShaderStageCreateInfo shader_stages[] = {vertex_shader_stage_info, fragment_shader_stage_info}; + + VkPipelineVertexInputStateCreateInfo vertex_input_info = {}; + vertex_input_info.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO; + vertex_input_info.vertexBindingDescriptionCount = 0; + vertex_input_info.vertexAttributeDescriptionCount = 0; + + VkPipelineInputAssemblyStateCreateInfo input_assembly = {}; + input_assembly.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO; + input_assembly.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; + input_assembly.primitiveRestartEnable = VK_FALSE; + + VkPipelineViewportStateCreateInfo viewport_state = {}; + viewport_state.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO; + viewport_state.viewportCount = 1; + viewport_state.scissorCount = 1; + + VkPipelineRasterizationStateCreateInfo rasterizer = {}; + rasterizer.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO; + rasterizer.depthClampEnable = VK_FALSE; + rasterizer.rasterizerDiscardEnable = VK_FALSE; + rasterizer.polygonMode = VK_POLYGON_MODE_FILL; + rasterizer.lineWidth = 1.0f; + rasterizer.cullMode = VK_CULL_MODE_BACK_BIT; + rasterizer.frontFace = VK_FRONT_FACE_CLOCKWISE; + rasterizer.depthBiasEnable = VK_FALSE; + + VkPipelineMultisampleStateCreateInfo multisampling = {}; + multisampling.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO; + multisampling.sampleShadingEnable = VK_FALSE; + multisampling.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT; + + VkPipelineColorBlendAttachmentState color_blend_attachment = {}; + color_blend_attachment.colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT; + color_blend_attachment.blendEnable = VK_FALSE; + + VkPipelineColorBlendStateCreateInfo color_blending = {}; + color_blending.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO; + color_blending.logicOpEnable = VK_FALSE; + color_blending.logicOp = VK_LOGIC_OP_COPY; + color_blending.attachmentCount = 1; + color_blending.pAttachments = &color_blend_attachment; + color_blending.blendConstants[0] = 0.0f; + color_blending.blendConstants[1] = 0.0f; + color_blending.blendConstants[2] = 0.0f; + color_blending.blendConstants[3] = 0.0f; + + VkDynamicState dynamic_states[] = { + VK_DYNAMIC_STATE_VIEWPORT, + VK_DYNAMIC_STATE_SCISSOR + }; + + VkPipelineDynamicStateCreateInfo dynamic_state = {}; + dynamic_state.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO; + dynamic_state.dynamicStateCount = ARRAY_COUNT(dynamic_states); + dynamic_state.pDynamicStates = dynamic_states; + + VkPipelineLayoutCreateInfo pipeline_info_layout = {}; + pipeline_info_layout.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; + pipeline_info_layout.setLayoutCount = 0; + pipeline_info_layout.pushConstantRangeCount = 0; + + VkResult result; + if ((result = vkCreatePipelineLayout(device, &pipeline_info_layout, NULL, pipeline_layout)) != VK_SUCCESS) { + LOG_FORMAT(true, "Vulkan vkCreatePipelineLayout: %d", LOG_DATA_INT32, (int32 *) &result); + ASSERT_SIMPLE(false); + + return NULL; + } + + VkGraphicsPipelineCreateInfo pipeline_info = {}; + pipeline_info.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO; + pipeline_info.stageCount = 2; + pipeline_info.pStages = shader_stages; + pipeline_info.pVertexInputState = &vertex_input_info; + pipeline_info.pInputAssemblyState = &input_assembly; + pipeline_info.pViewportState = &viewport_state; + pipeline_info.pRasterizationState = &rasterizer; + pipeline_info.pMultisampleState = &multisampling; + pipeline_info.pColorBlendState = &color_blending; + pipeline_info.pDynamicState = &dynamic_state; + pipeline_info.layout = *pipeline_layout; + pipeline_info.renderPass = render_pass; + pipeline_info.subpass = 0; + pipeline_info.basePipelineHandle = VK_NULL_HANDLE; + + if ((result = vkCreateGraphicsPipelines(device, VK_NULL_HANDLE, 1, &pipeline_info, NULL, pipeline)) != VK_SUCCESS) { + LOG_FORMAT(true, "Vulkan vkCreateGraphicsPipelines: %d", LOG_DATA_INT32, (int32 *) &result); + ASSERT_SIMPLE(false); + + return NULL; + } + + vkDestroyShaderModule(device, fragment_shader, NULL); + vkDestroyShaderModule(device, vertex_shader, NULL); + + // @question Do we want to return the value or the pointer? + // I think the value is already a pointer? + return *pipeline; +} + #endif \ No newline at end of file diff --git a/gpuapi/vulkan/VulkanUtils.h b/gpuapi/vulkan/VulkanUtils.h index f473c47..36653d2 100644 --- a/gpuapi/vulkan/VulkanUtils.h +++ b/gpuapi/vulkan/VulkanUtils.h @@ -34,8 +34,8 @@ PACKED_STRUCT; // The reason for the packing is that sometimes we want to use it as an array // I am only packing it on the off chance there is some funky behaviour. struct VulkanQueueFamilyIndices { - uint32 graphics_family; - uint32 present_family; + int32 graphics_family; + int32 present_family; }; UNPACKED_STRUCT; @@ -116,39 +116,28 @@ static VKAPI_ATTR VkBool32 VKAPI_CALL vulkan_debug_callback( || (severity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT) ) { LOG(true, debug_callback_data->pMessage); + ASSERT_SIMPLE(false); } return VK_FALSE; } -void vulkan_populate_debug_messenger_create_info(VkDebugUtilsMessengerCreateInfoEXT* create_info) +void gpuapi_debug_messenger_setup(VkInstance instance, VkDebugUtilsMessengerEXT* debug_messenger) { - create_info->sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT; - create_info->messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT; - create_info->messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT; - create_info->pfnUserCallback = vulkan_debug_callback; -} + // @question Why do I need this twice (see other definition) + VkDebugUtilsMessengerCreateInfoEXT create_info = {}; + create_info.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT; + create_info.messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT; + create_info.messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT; + create_info.pfnUserCallback = vulkan_debug_callback; -VkResult vulkan_debug_utils_messenger_create( - VkInstance instance, const VkDebugUtilsMessengerCreateInfoEXT* create_info, - const VkAllocationCallbacks* allocator, VkDebugUtilsMessengerEXT* debug_messenger -) { PFN_vkCreateDebugUtilsMessengerEXT func = (PFN_vkCreateDebugUtilsMessengerEXT) vkGetInstanceProcAddr(instance, "vkCreateDebugUtilsMessengerEXT"); - if (!func) { ASSERT_SIMPLE(func); - return VK_ERROR_EXTENSION_NOT_PRESENT; + return; } - return func(instance, create_info, allocator, debug_messenger); -} - -void vulkan_debug_messenger_setup(VkInstance instance, VkDebugUtilsMessengerEXT* debug_messenger) -{ - VkDebugUtilsMessengerCreateInfoEXT create_info = {}; - vulkan_populate_debug_messenger_create_info(&create_info); - - if (vulkan_debug_utils_messenger_create(instance, &create_info, NULL, debug_messenger) != VK_SUCCESS) { + if (func(instance, &create_info, NULL, debug_messenger) != VK_SUCCESS) { ASSERT_SIMPLE(false); } } @@ -164,6 +153,8 @@ void vulkan_instance_create( ) { LOG_FORMAT(true, "Vulkan validation_layer missing: %d", {{LOG_DATA_CHAR_STR, (void *) validation_layers[-err - 1]}}); ASSERT_SIMPLE(false); + + return; } if (extension_count @@ -171,6 +162,8 @@ void vulkan_instance_create( ) { LOG_FORMAT(true, "Vulkan extension missing: %d", {{LOG_DATA_CHAR_STR, (void *) extensions[-err - 1]}}); ASSERT_SIMPLE(false); + + return; } VkApplicationInfo app_info = {}; @@ -188,12 +181,17 @@ void vulkan_instance_create( create_info.enabledExtensionCount = extension_count; create_info.ppEnabledExtensionNames = extensions; - VkDebugUtilsMessengerCreateInfoEXT debug_create_info = {}; if (validation_layer_count) { create_info.enabledLayerCount = validation_layer_count; create_info.ppEnabledLayerNames = validation_layers; - vulkan_populate_debug_messenger_create_info(&debug_create_info); + // @question Why do I need this twice (see other definition) + VkDebugUtilsMessengerCreateInfoEXT debug_create_info = {}; + debug_create_info.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT; + debug_create_info.messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT; + debug_create_info.messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT; + debug_create_info.pfnUserCallback = vulkan_debug_callback; + create_info.pNext = (VkDebugUtilsMessengerCreateInfoEXT *) &debug_create_info; } @@ -218,6 +216,7 @@ void vulkan_surface_create(VkInstance instance, VkSurfaceKHR* surface, Window* w return; } #elif __linux__ + // @todo implement #endif } @@ -265,7 +264,7 @@ void vulkan_available_extensions(RingMemory* ring) { VulkanQueueFamilyIndices vulkan_find_queue_families(VkPhysicalDevice physical_device, VkSurfaceKHR surface, RingMemory* ring) { - VulkanQueueFamilyIndices indices = {}; + VulkanQueueFamilyIndices indices = { -1, -1 }; uint32 queue_family_count = 0; vkGetPhysicalDeviceQueueFamilyProperties(physical_device, &queue_family_count, NULL); @@ -274,17 +273,24 @@ VulkanQueueFamilyIndices vulkan_find_queue_families(VkPhysicalDevice physical_de for (uint32 i = 0; i < queue_family_count; ++i) { if (queue_families[i].queueFlags & VK_QUEUE_GRAPHICS_BIT) { - indices.graphics_family = i + 1; + indices.graphics_family = i; } VkBool32 present_support = false; - vkGetPhysicalDeviceSurfaceSupportKHR(physical_device, i, surface, &present_support); - if (present_support) { - indices.present_family = i + 1; + VkResult result; + if ((result = vkGetPhysicalDeviceSurfaceSupportKHR(physical_device, i, surface, &present_support)) != VK_SUCCESS) { + LOG_FORMAT(true, "Vulkan vkGetPhysicalDeviceSurfaceSupportKHR: %d", LOG_DATA_INT32, (int32 *) &result); + ASSERT_SIMPLE(false); + + return indices; } - if (indices.graphics_family && indices.present_family) { + if (present_support) { + indices.present_family = i; + } + + if (indices.graphics_family >= 0 && indices.present_family >= 0) { break; } } @@ -326,7 +332,7 @@ bool vulkan_is_device_suitable(VkPhysicalDevice physical_device, VkSurfaceKHR su swap_chain_adequate = swap_chain_support.format_size && swap_chain_support.present_modes; } - return indices.graphics_family && indices.present_family + return indices.graphics_family >= 0 && indices.present_family >= 0 && extensions_supported && swap_chain_adequate; } @@ -404,8 +410,6 @@ void gpuapi_create_logical_device( vkGetDeviceQueue(*device, indices.present_family, 0, present_queue); } -// WARNING: swapchain_images needs to already have reserved enough memory -// @todo How can we ensure swapchain_images has enough but not too much space? // @question Do we need to handle old swapchains? void vulkan_swap_chain_create( VkDevice device, VkPhysicalDevice physical_device, VkSurfaceKHR surface, @@ -486,11 +490,15 @@ void vulkan_swap_chain_create( if ((result = vkCreateSwapchainKHR(device, &create_info, NULL, swapchain)) != VK_SUCCESS) { LOG_FORMAT(true, "Vulkan vkCreateSwapchainKHR: %d", LOG_DATA_INT32, (int32 *) &result); ASSERT_SIMPLE(false); + + return; } memcpy(swapchain_image_format, &surface_format->format, sizeof(VkFormat)); } +// WARNING: swapchain_images needs to already have reserved enough memory +// @todo How can we ensure swapchain_images has enough but not too much space? void vulkan_swap_chain_images_create( VkDevice device, VkSwapchainKHR swapchain, VkImage** swapchain_images, uint32* swapchain_image_count, @@ -531,27 +539,27 @@ void vulkan_image_views_create( } } -void create_render_pass( +void vulkan_render_pass_create( VkDevice device, VkRenderPass* render_pass, VkFormat swapchain_image_format ) { - VkAttachmentDescription colorAttachment = {}; - colorAttachment.format = swapchain_image_format; - colorAttachment.samples = VK_SAMPLE_COUNT_1_BIT; - colorAttachment.loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR; - colorAttachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE; - colorAttachment.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; - colorAttachment.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; - colorAttachment.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; - colorAttachment.finalLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR; + VkAttachmentDescription color_attachment = {}; + color_attachment.format = swapchain_image_format; + color_attachment.samples = VK_SAMPLE_COUNT_1_BIT; + color_attachment.loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR; + color_attachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE; + color_attachment.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; + color_attachment.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; + color_attachment.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; + color_attachment.finalLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR; - VkAttachmentReference colorAttachmentRef = {}; - colorAttachmentRef.attachment = 0; - colorAttachmentRef.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + VkAttachmentReference color_attachment_ref = {}; + color_attachment_ref.attachment = 0; + color_attachment_ref.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; VkSubpassDescription subpass = {}; subpass.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; subpass.colorAttachmentCount = 1; - subpass.pColorAttachments = &colorAttachmentRef; + subpass.pColorAttachments = &color_attachment_ref; VkSubpassDependency dependency = {}; dependency.srcSubpass = VK_SUBPASS_EXTERNAL; @@ -561,141 +569,22 @@ void create_render_pass( dependency.dstStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; dependency.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; - VkRenderPassCreateInfo renderPassInfo = {}; - renderPassInfo.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO; - renderPassInfo.attachmentCount = 1; - renderPassInfo.pAttachments = &colorAttachment; - renderPassInfo.subpassCount = 1; - renderPassInfo.pSubpasses = &subpass; - renderPassInfo.dependencyCount = 1; - renderPassInfo.pDependencies = &dependency; + VkRenderPassCreateInfo render_pass_info = {}; + render_pass_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO; + render_pass_info.attachmentCount = 1; + render_pass_info.pAttachments = &color_attachment; + render_pass_info.subpassCount = 1; + render_pass_info.pSubpasses = &subpass; + render_pass_info.dependencyCount = 1; + render_pass_info.pDependencies = &dependency; VkResult result; - if ((result = vkCreateRenderPass(device, &renderPassInfo, NULL, render_pass)) != VK_SUCCESS) { + if ((result = vkCreateRenderPass(device, &render_pass_info, NULL, render_pass)) != VK_SUCCESS) { LOG_FORMAT(true, "Vulkan vkCreateRenderPass: %d", LOG_DATA_INT32, (int32 *) &result); ASSERT_SIMPLE(false); } } -// @todo This is very similar to program_make in opengl. Consider to rename opengl -void vulkan_pipeline_create( - VkDevice device, - VkShaderModule vertex_shader, - VkShaderModule fragment_shader, - [[maybe_unused]] VkShaderModule geometry_shader, - VkPipeline* pipeline, - VkPipelineLayout* pipeline_layout, - VkRenderPass render_pass -) -{ - uint32 stage_count = 0; - VkPipelineShaderStageCreateInfo shaderStages[3]; - - VkPipelineShaderStageCreateInfo vs_stage_create = {}; - vs_stage_create.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; - vs_stage_create.stage = VK_SHADER_STAGE_VERTEX_BIT; - vs_stage_create.module = vertex_shader; - vs_stage_create.pName = "main"; - ++stage_count; - - VkPipelineShaderStageCreateInfo fs_stage_create = {}; - fs_stage_create.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; - fs_stage_create.stage = VK_SHADER_STAGE_FRAGMENT_BIT; - fs_stage_create.module = fragment_shader; - fs_stage_create.pName = "main"; - ++stage_count; - - VkPipelineVertexInputStateCreateInfo vertexInputInfo = {}; - vertexInputInfo.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO; - vertexInputInfo.vertexBindingDescriptionCount = 0; - vertexInputInfo.vertexAttributeDescriptionCount = 0; - - VkPipelineInputAssemblyStateCreateInfo inputAssembly = {}; - inputAssembly.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO; - inputAssembly.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; - inputAssembly.primitiveRestartEnable = VK_FALSE; - - VkPipelineViewportStateCreateInfo viewportState = {}; - viewportState.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO; - viewportState.viewportCount = 1; - viewportState.scissorCount = 1; - - VkPipelineRasterizationStateCreateInfo rasterizer = {}; - rasterizer.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO; - rasterizer.depthClampEnable = VK_FALSE; - rasterizer.rasterizerDiscardEnable = VK_FALSE; - rasterizer.polygonMode = VK_POLYGON_MODE_FILL; - rasterizer.lineWidth = 1.0f; - rasterizer.cullMode = VK_CULL_MODE_BACK_BIT; - rasterizer.frontFace = VK_FRONT_FACE_CLOCKWISE; - rasterizer.depthBiasEnable = VK_FALSE; - - VkPipelineMultisampleStateCreateInfo multisampling = {}; - multisampling.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO; - multisampling.sampleShadingEnable = VK_FALSE; - multisampling.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT; - - VkPipelineColorBlendAttachmentState colorBlendAttachment = {}; - colorBlendAttachment.colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT; - colorBlendAttachment.blendEnable = VK_FALSE; - - VkPipelineColorBlendStateCreateInfo colorBlending = {}; - colorBlending.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO; - colorBlending.logicOpEnable = VK_FALSE; - colorBlending.logicOp = VK_LOGIC_OP_COPY; - colorBlending.attachmentCount = 1; - colorBlending.pAttachments = &colorBlendAttachment; - colorBlending.blendConstants[0] = 0.0f; - colorBlending.blendConstants[1] = 0.0f; - colorBlending.blendConstants[2] = 0.0f; - colorBlending.blendConstants[3] = 0.0f; - - VkDynamicState dynamicStates[] = { - VK_DYNAMIC_STATE_VIEWPORT, - VK_DYNAMIC_STATE_SCISSOR - }; - - VkPipelineDynamicStateCreateInfo dynamicState = {}; - dynamicState.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO; - dynamicState.dynamicStateCount = ARRAY_COUNT(dynamicStates); - dynamicState.pDynamicStates = dynamicStates; - - VkPipelineLayoutCreateInfo pipelineLayoutInfo = {}; - pipelineLayoutInfo.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; - pipelineLayoutInfo.setLayoutCount = 0; - pipelineLayoutInfo.pushConstantRangeCount = 0; - - VkResult result; - if ((result = vkCreatePipelineLayout(device, &pipelineLayoutInfo, NULL, pipeline_layout)) != VK_SUCCESS) { - LOG_FORMAT(true, "Vulkan vkCreatePipelineLayout: %d", LOG_DATA_INT32, (int32 *) &result); - ASSERT_SIMPLE(false); - } - - VkGraphicsPipelineCreateInfo pipelineInfo = {}; - pipelineInfo.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO; - pipelineInfo.stageCount = stage_count; - pipelineInfo.pStages = shaderStages; - pipelineInfo.pVertexInputState = &vertexInputInfo; - pipelineInfo.pInputAssemblyState = &inputAssembly; - pipelineInfo.pViewportState = &viewportState; - pipelineInfo.pRasterizationState = &rasterizer; - pipelineInfo.pMultisampleState = &multisampling; - pipelineInfo.pColorBlendState = &colorBlending; - pipelineInfo.pDynamicState = &dynamicState; - pipelineInfo.layout = *pipeline_layout; - pipelineInfo.renderPass = render_pass; - pipelineInfo.subpass = 0; - pipelineInfo.basePipelineHandle = VK_NULL_HANDLE; - - if ((result = vkCreateGraphicsPipelines(device, VK_NULL_HANDLE, 1, &pipelineInfo, NULL, pipeline)) != VK_SUCCESS) { - LOG_FORMAT(true, "Vulkan vkCreateGraphicsPipelines: %d", LOG_DATA_INT32, (int32 *) &result); - ASSERT_SIMPLE(false); - } - - vkDestroyShaderModule(device, vertex_shader, NULL); - vkDestroyShaderModule(device, fragment_shader, NULL); -} - // @todo consider to rename to same name as opengl // WARNING: swapchain_framebuffers needs to be initialized void vulkan_framebuffer_create( @@ -731,13 +620,13 @@ void vulkan_command_pool_create( ) { VulkanQueueFamilyIndices queue_family_indices = vulkan_find_queue_families(physical_device, surface, ring); - VkCommandPoolCreateInfo poolInfo = {}; - poolInfo.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; - poolInfo.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT; - poolInfo.queueFamilyIndex = queue_family_indices.graphics_family; + VkCommandPoolCreateInfo pool_info = {}; + pool_info.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; + pool_info.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT; + pool_info.queueFamilyIndex = queue_family_indices.graphics_family; VkResult result; - if ((result = vkCreateCommandPool(device, &poolInfo, NULL, command_pool)) != VK_SUCCESS) { + if ((result = vkCreateCommandPool(device, &pool_info, NULL, command_pool)) != VK_SUCCESS) { LOG_FORMAT(true, "Vulkan vkCreateCommandPool: %d", LOG_DATA_INT32, (int32 *) &result); ASSERT_SIMPLE(false); } @@ -745,14 +634,14 @@ void vulkan_command_pool_create( void vulkan_command_buffer_create(VkDevice device, VkCommandBuffer* command_buffer, VkCommandPool command_pool) { - VkCommandBufferAllocateInfo allocInfo = {}; - allocInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; - allocInfo.commandPool = command_pool; - allocInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; - allocInfo.commandBufferCount = 1; + VkCommandBufferAllocateInfo alloc_info = {}; + alloc_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; + alloc_info.commandPool = command_pool; + alloc_info.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; + alloc_info.commandBufferCount = 1; VkResult result; - if ((result = vkAllocateCommandBuffers(device, &allocInfo, command_buffer)) != VK_SUCCESS) { + if ((result = vkAllocateCommandBuffers(device, &alloc_info, command_buffer)) != VK_SUCCESS) { LOG_FORMAT(true, "Vulkan vkAllocateCommandBuffers: %d", LOG_DATA_INT32, (int32 *) &result); ASSERT_SIMPLE(false); } @@ -760,70 +649,22 @@ void vulkan_command_buffer_create(VkDevice device, VkCommandBuffer* command_buff void vulkan_sync_objects_create(VkDevice device, VkSemaphore* image_available_semaphore, VkSemaphore* render_finished_semaphore, VkFence* in_flight_fence) { - VkSemaphoreCreateInfo semaphoreInfo = {}; - semaphoreInfo.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO; + VkSemaphoreCreateInfo semaphore_info = {}; + semaphore_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO; - VkFenceCreateInfo fenceInfo{}; - fenceInfo.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; - fenceInfo.flags = VK_FENCE_CREATE_SIGNALED_BIT; + VkFenceCreateInfo fence_info = {}; + fence_info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; + fence_info.flags = VK_FENCE_CREATE_SIGNALED_BIT; VkResult result; - if ((result = vkCreateSemaphore(device, &semaphoreInfo, NULL, image_available_semaphore)) != VK_SUCCESS - || (result = vkCreateSemaphore(device, &semaphoreInfo, NULL, render_finished_semaphore)) != VK_SUCCESS - || (result = vkCreateFence(device, &fenceInfo, NULL, in_flight_fence)) != VK_SUCCESS + if ((result = vkCreateSemaphore(device, &semaphore_info, NULL, image_available_semaphore)) != VK_SUCCESS + || (result = vkCreateSemaphore(device, &semaphore_info, NULL, render_finished_semaphore)) != VK_SUCCESS + || (result = vkCreateFence(device, &fence_info, NULL, in_flight_fence)) != VK_SUCCESS ) { LOG_FORMAT(true, "Vulkan vulkan_sync_objects_create: %d", LOG_DATA_INT32, (int32 *) &result); ASSERT_SIMPLE(false); } } -/* -void vulkan_command_buffer_record( - VkCommandBuffer command_buffer -) { - VkCommandBufferBeginInfo beginInfo{}; - beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; - if (vkBeginCommandBuffer(command_buffer, &beginInfo) != VK_SUCCESS) { - ASSERT_SIMPLE(false); - } - - VkRenderPassBeginInfo renderPassInfo{}; - renderPassInfo.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; - renderPassInfo.renderPass = renderPass; - renderPassInfo.framebuffer = swapChainFramebuffers[imageIndex]; - renderPassInfo.renderArea.offset = {0, 0}; - renderPassInfo.renderArea.extent = swapChainExtent; - - VkClearValue clearColor = {{{0.0f, 0.0f, 0.0f, 1.0f}}}; - renderPassInfo.clearValueCount = 1; - renderPassInfo.pClearValues = &clearColor; - - vkCmdBeginRenderPass(commandBuffer, &renderPassInfo, VK_SUBPASS_CONTENTS_INLINE); - - vkCmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, graphicsPipeline); - - VkViewport viewport{}; - viewport.x = 0.0f; - viewport.y = 0.0f; - viewport.width = static_cast(swapChainExtent.width); - viewport.height = static_cast(swapChainExtent.height); - viewport.minDepth = 0.0f; - viewport.maxDepth = 1.0f; - vkCmdSetViewport(commandBuffer, 0, 1, &viewport); - - VkRect2D scissor{}; - scissor.offset = {0, 0}; - scissor.extent = swapChainExtent; - vkCmdSetScissor(commandBuffer, 0, 1, &scissor); - - vkCmdDraw(commandBuffer, 3, 1, 0, 0); - - vkCmdEndRenderPass(commandBuffer); - - if (vkEndCommandBuffer(commandBuffer) != VK_SUCCESS) { - ASSERT_SIMPLE(false); - } -} -*/ #endif \ No newline at end of file diff --git a/math/matrix/MatrixFloat32.h b/math/matrix/MatrixFloat32.h index 858c096..d98a663 100644 --- a/math/matrix/MatrixFloat32.h +++ b/math/matrix/MatrixFloat32.h @@ -416,40 +416,6 @@ void mat3vec3_mult(const f32* __restrict matrix, const f32* __restrict vector, f result[2] = matrix[6] * vector[0] + matrix[7] * vector[1] + matrix[8] * vector[2]; } -// @question could simple mul add sse be faster? -void mat3vec3_mult_sse(const f32* __restrict matrix, const f32* __restrict vector, f32* __restrict result) -{ - __m128 vec = _mm_load_ps(vector); - vec = _mm_insert_ps(vec, _mm_setzero_ps(), 0x30); // vec[3] = 0 - - for (int32 i = 0; i < 3; ++i) { - __m128 row = _mm_load_ps(&matrix[i * 3]); - row = _mm_insert_ps(row, _mm_setzero_ps(), 0x30); // row[3] = 0 - - __m128 dot = _mm_dp_ps(row, vec, 0xF1); - - result[i] = _mm_cvtss_f32(dot); - } -} - -// @question could simple mul add sse be faster? -void mat3vec3_mult_sse(const __m128* __restrict matrix, const __m128* __restrict vector, f32* __restrict result) -{ - for (int32 i = 0; i < 3; ++i) { - __m128 dot = _mm_dp_ps(matrix[i], *vector, 0xF1); - - result[i] = _mm_cvtss_f32(dot); - } -} - -// @question could simple mul add sse be faster? -void mat3vec3_mult_sse(const __m128* __restrict matrix, const __m128* __restrict vector, __m128* __restrict result) -{ - for (int32 i = 0; i < 4; ++i) { - result[i] = _mm_dp_ps(matrix[i], *vector, 0xF1); - } -} - inline void mat4vec4_mult(const f32* __restrict matrix, const f32* __restrict vector, f32* __restrict result) { @@ -514,146 +480,32 @@ void mat4mat4_mult(const f32* __restrict a, const f32* __restrict b, f32* __rest result[15] = a[12] * b[3] + a[13] * b[7] + a[14] * b[11] + a[15] * b[15]; } -void mat4mat4_mult(const f32* __restrict a, const f32* __restrict b, f32* __restrict result, int32 steps) -{ - if (steps > 1) { - // @todo check http://fhtr.blogspot.com/2010/02/4x4-f32-matrix-multiplication-using.html - // @question could simple mul add sse be faster? - // Load rows of matrix a - __m128 a_1 = _mm_load_ps(a); - __m128 a_2 = _mm_load_ps(&a[4]); - __m128 a_3 = _mm_load_ps(&a[8]); - __m128 a_4 = _mm_load_ps(&a[12]); - - // Load columns of matrix b - __m128 b_1 = _mm_load_ps(b); - __m128 b_2 = _mm_load_ps(&b[4]); - __m128 b_3 = _mm_load_ps(&b[8]); - __m128 b_4 = _mm_load_ps(&b[12]); - - _mm_storeu_ps(&result[0], - _mm_add_ps( - _mm_add_ps( - _mm_mul_ps(_mm_shuffle_ps(a_1, a_1, _MM_SHUFFLE(0, 0, 0, 0)), b_1), - _mm_mul_ps(_mm_shuffle_ps(a_1, a_1, _MM_SHUFFLE(1, 1, 1, 1)), b_2) - ), - _mm_add_ps( - _mm_mul_ps(_mm_shuffle_ps(a_1, a_1, _MM_SHUFFLE(2, 2, 2, 2)), b_3), - _mm_mul_ps(_mm_shuffle_ps(a_1, a_1, _MM_SHUFFLE(3, 3, 3, 3)), b_4) - ) - ) - ); - - _mm_storeu_ps(&result[4], - _mm_add_ps( - _mm_add_ps( - _mm_mul_ps(_mm_shuffle_ps(a_2, a_2, _MM_SHUFFLE(0, 0, 0, 0)), b_1), - _mm_mul_ps(_mm_shuffle_ps(a_2, a_2, _MM_SHUFFLE(1, 1, 1, 1)), b_2) - ), - _mm_add_ps( - _mm_mul_ps(_mm_shuffle_ps(a_2, a_2, _MM_SHUFFLE(2, 2, 2, 2)), b_3), - _mm_mul_ps(_mm_shuffle_ps(a_2, a_2, _MM_SHUFFLE(3, 3, 3, 3)), b_4) - ) - ) - ); - - _mm_storeu_ps(&result[8], - _mm_add_ps( - _mm_add_ps( - _mm_mul_ps(_mm_shuffle_ps(a_3, a_3, _MM_SHUFFLE(0, 0, 0, 0)), b_1), - _mm_mul_ps(_mm_shuffle_ps(a_3, a_3, _MM_SHUFFLE(1, 1, 1, 1)), b_2) - ), - _mm_add_ps( - _mm_mul_ps(_mm_shuffle_ps(a_3, a_3, _MM_SHUFFLE(2, 2, 2, 2)), b_3), - _mm_mul_ps(_mm_shuffle_ps(a_3, a_3, _MM_SHUFFLE(3, 3, 3, 3)), b_4) - ) - ) - ); - - _mm_storeu_ps(&result[12], - _mm_add_ps( - _mm_add_ps( - _mm_mul_ps(_mm_shuffle_ps(a_4, a_4, _MM_SHUFFLE(0, 0, 0, 0)), b_1), - _mm_mul_ps(_mm_shuffle_ps(a_4, a_4, _MM_SHUFFLE(1, 1, 1, 1)), b_2) - ), - _mm_add_ps( - _mm_mul_ps(_mm_shuffle_ps(a_4, a_4, _MM_SHUFFLE(2, 2, 2, 2)), b_3), - _mm_mul_ps(_mm_shuffle_ps(a_4, a_4, _MM_SHUFFLE(3, 3, 3, 3)), b_4) - ) - ) - ); - } else { - mat4mat4_mult(a, b, result); - } -} - -void mat4mat4_mult_sse(const __m128* __restrict a, const __m128* __restrict b_transposed, f32* __restrict result) -{ - __m128 dot; - - // @question could simple mul add sse be faster? - // b1 - dot = _mm_dp_ps(a[0], b_transposed[0], 0xF1); - result[0] = _mm_cvtss_f32(dot); - - dot = _mm_dp_ps(a[1], b_transposed[0], 0xF1); - result[1] = _mm_cvtss_f32(dot); - - dot = _mm_dp_ps(a[2], b_transposed[0], 0xF1); - result[2] = _mm_cvtss_f32(dot); - - dot = _mm_dp_ps(a[3], b_transposed[0], 0xF1); - result[3] = _mm_cvtss_f32(dot); - - // b2 - dot = _mm_dp_ps(a[0], b_transposed[1], 0xF1); - result[4] = _mm_cvtss_f32(dot); - - dot = _mm_dp_ps(a[1], b_transposed[1], 0xF1); - result[5] = _mm_cvtss_f32(dot); - - dot = _mm_dp_ps(a[2], b_transposed[1], 0xF1); - result[6] = _mm_cvtss_f32(dot); - - dot = _mm_dp_ps(a[3], b_transposed[1], 0xF1); - result[7] = _mm_cvtss_f32(dot); - - // b3 - dot = _mm_dp_ps(a[0], b_transposed[2], 0xF1); - result[8] = _mm_cvtss_f32(dot); - - dot = _mm_dp_ps(a[1], b_transposed[2], 0xF1); - result[9] = _mm_cvtss_f32(dot); - - dot = _mm_dp_ps(a[2], b_transposed[2], 0xF1); - result[10] = _mm_cvtss_f32(dot); - - dot = _mm_dp_ps(a[3], b_transposed[2], 0xF1); - result[11] = _mm_cvtss_f32(dot); - - // b4 - dot = _mm_dp_ps(a[0], b_transposed[3], 0xF1); - result[12] = _mm_cvtss_f32(dot); - - dot = _mm_dp_ps(a[1], b_transposed[3], 0xF1); - result[13] = _mm_cvtss_f32(dot); - - dot = _mm_dp_ps(a[2], b_transposed[3], 0xF1); - result[14] = _mm_cvtss_f32(dot); - - dot = _mm_dp_ps(a[3], b_transposed[3], 0xF1); - result[15] = _mm_cvtss_f32(dot); -} - inline -void mat4mat4_mult_sse(const __m128* __restrict a, const __m128* __restrict b_transpose, __m128* __restrict result) +void mat4mat4_mult_simd(const f32* __restrict a, const f32* __restrict b, f32* __restrict result) { - for (int32 i = 0; i < 4; ++i) { - result[i] = _mm_mul_ps(a[0], b_transpose[i]); + __m128 row1 = _mm_loadu_ps(&b[0]); + __m128 row2 = _mm_loadu_ps(&b[4]); + __m128 row3 = _mm_loadu_ps(&b[8]); + __m128 row4 = _mm_loadu_ps(&b[12]); - for (int32 j = 1; j < 4; ++j) { - result[i] = _mm_add_ps(_mm_mul_ps(a[j], b_transpose[4 * j + i]), result[i]); - } + for (int32 i = 3; i >= 0; --i) { + __m128 vW = _mm_loadu_ps(&a[i * 4]); + + __m128 vX = _mm_shuffle_ps(vW, vW, _MM_SHUFFLE(0, 0, 0, 0)); + __m128 vY = _mm_shuffle_ps(vW, vW, _MM_SHUFFLE(1, 1, 1, 1)); + __m128 vZ = _mm_shuffle_ps(vW, vW, _MM_SHUFFLE(2, 2, 2, 2)); + vW = _mm_shuffle_ps(vW, vW, _MM_SHUFFLE(3, 3, 3, 3)); + + vX = _mm_mul_ps(vX, row1); + vY = _mm_mul_ps(vY, row2); + vZ = _mm_mul_ps(vZ, row3); + vW = _mm_mul_ps(vW, row4); + + vX = _mm_add_ps(vX, vZ); + vY = _mm_add_ps(vY, vW); + vX = _mm_add_ps(vX, vY); + + _mm_storeu_ps(&result[i * 4], vX); } } @@ -863,7 +715,7 @@ void mat4_translate(f32* matrix, f32 dx, f32 dy, f32 dz) mat4mat4_mult(temp, translation_matrix, matrix); } -void mat4_translate(f32* matrix, f32 dx, f32 dy, f32 dz, int32 steps) +void mat4_translate_simd(f32* matrix, f32 dx, f32 dy, f32 dz) { alignas(64) f32 temp[16]; memcpy(temp, matrix, sizeof(f32) * 16); @@ -874,7 +726,7 @@ void mat4_translate(f32* matrix, f32 dx, f32 dy, f32 dz, int32 steps) translation_matrix[8] = 0.0f; translation_matrix[9] = 0.0f; translation_matrix[10] = 1.0f; translation_matrix[11] = dz; translation_matrix[12] = 0.0f; translation_matrix[13] = 0.0f; translation_matrix[14] = 0.0f; translation_matrix[15] = 1.0f; - mat4mat4_mult(temp, translation_matrix, matrix, steps); + mat4mat4_mult_simd(temp, translation_matrix, matrix); } inline diff --git a/object/Model.h b/object/Model.h new file mode 100644 index 0000000..5ccf160 --- /dev/null +++ b/object/Model.h @@ -0,0 +1,47 @@ + +// Memory layout TBD, we can probably optimize it for matrix operations + +/////////////////////////////// +// Model +// ============================ +// Mesh +// ============================ +// Current Transform/Mesh +// ============================ +// Joint 1 +// ============================ +// Child Joint Id 1 +// Child Joint Id 2 +// ... +// ============================ +// Joint 2 +// .... +// ============================ +// Animation 1 +// ============================ +// Keyframe 1 +// ============================ +// Joint 1 Transform +// Joint 2 Transform +// .... +// ============================ +// Keyframe 2 +// ============================ +// .... +// ============================ +// Animation 2 +// ============================ +// .... +// ============================ + +struct Model { + Mesh* mesh; +}; + +struct ModelJoint { + byte children_count; +}; + +struct ModelAnimation { + // joint positions +}; \ No newline at end of file diff --git a/object/Vertex.h b/object/Vertex.h index 73b8484..80b5b5c 100644 --- a/object/Vertex.h +++ b/object/Vertex.h @@ -35,6 +35,11 @@ struct Vertex3DColorIndex { f32 color; }; +struct Vertex3DColor { + v3_f32 position; + v4_f32 color; +}; + struct Vertex2D { v2_f32 position; v2_f32 tex_coord; diff --git a/platform/linux/FileUtils.cpp b/platform/linux/FileUtils.cpp index c4af4be..b50517b 100644 --- a/platform/linux/FileUtils.cpp +++ b/platform/linux/FileUtils.cpp @@ -242,14 +242,14 @@ void file_read(const char* path, FileBody* file, RingMemory* ring) { return; } - file->size = file_stat.st_size + 1; + file->size = file_stat.st_size; } if (ring != NULL) { - file->content = ring_get_memory(ring, file->size); + file->content = ring_get_memory(ring, file->size + 1); } - ssize_t bytes_read = read(fp, file->content, file->size - 1); + ssize_t bytes_read = read(fp, file->content, file->size); if (bytes_read != file->size) { close(fp); file->content = NULL; diff --git a/platform/win32/FileUtils.cpp b/platform/win32/FileUtils.cpp index a87f621..c71d32a 100644 --- a/platform/win32/FileUtils.cpp +++ b/platform/win32/FileUtils.cpp @@ -185,15 +185,15 @@ file_read(const char* path, FileBody* file, RingMemory* ring = NULL) return; } - file->size = size.QuadPart + 1; + file->size = size.QuadPart; } if (ring != NULL) { - file->content = ring_get_memory(ring, file->size); + file->content = ring_get_memory(ring, file->size + 1); } DWORD bytes_read; - if (!ReadFile(fp, file->content, (uint32) file->size - 1, &bytes_read, NULL)) { + if (!ReadFile(fp, file->content, (uint32) file->size, &bytes_read, NULL)) { CloseHandle(fp); file->content = NULL; @@ -203,7 +203,7 @@ file_read(const char* path, FileBody* file, RingMemory* ring = NULL) CloseHandle(fp); file->content[bytes_read] = '\0'; - file->size = bytes_read + 1; + file->size = bytes_read; LOG_INCREMENT_BY(DEBUG_COUNTER_DRIVE_READ, bytes_read); } diff --git a/platform/win32/UtilsWindows.h b/platform/win32/UtilsWindows.h index b486035..90ac1a2 100644 --- a/platform/win32/UtilsWindows.h +++ b/platform/win32/UtilsWindows.h @@ -159,12 +159,12 @@ HBITMAP CreateBitmapFromRGBA(HDC hdc, const byte* rgba, int32 width, int32 heigh bmi.bmiHeader.biCompression = BI_RGB; void* pbits; - HBITMAP hBitmap = CreateDIBSection(hdc, &bmi, DIB_RGB_COLORS, &pbits, NULL, 0); - if (hBitmap) { + HBITMAP hbitmap = CreateDIBSection(hdc, &bmi, DIB_RGB_COLORS, &pbits, NULL, 0); + if (hbitmap) { memcpy(pbits, rgba, width * height * 4); } - return hBitmap; + return hbitmap; } #endif \ No newline at end of file diff --git a/platform/win32/input/RawInput.h b/platform/win32/input/RawInput.h index c5c7956..4e2b5d9 100644 --- a/platform/win32/input/RawInput.h +++ b/platform/win32/input/RawInput.h @@ -206,9 +206,9 @@ void input_mouse_position(HWND hwnd, v2_int32* pos) } } -int32 input_raw_handle(RAWINPUT* __restrict raw, Input* __restrict states, int32 state_count, uint64 time) +int16 input_raw_handle(RAWINPUT* __restrict raw, Input* __restrict states, int32 state_count, uint64 time) { - int32 input_count = 0; + int16 input_count = 0; int32 i = 0; if (raw->header.dwType == RIM_TYPEMOUSE) { @@ -398,7 +398,7 @@ void input_handle(LPARAM lParam, Input* __restrict states, int state_count, Ring input_raw_handle((RAWINPUT *) lpb, states, state_count, time); } -int32 input_handle_buffered(int32 buffer_size, Input* __restrict states, int state_count, RingMemory* ring, uint64 time) +int16 input_handle_buffered(int32 buffer_size, Input* __restrict states, int32 state_count, RingMemory* ring, uint64 time) { uint32 cb_size; GetRawInputBuffer(NULL, &cb_size, sizeof(RAWINPUTHEADER)); @@ -411,7 +411,7 @@ int32 input_handle_buffered(int32 buffer_size, Input* __restrict states, int sta PRAWINPUT raw_input = (PRAWINPUT) ring_get_memory(ring, cb_size, 4); - int32 input_count = 0; + int16 input_count = 0; uint32 input; while (true) { diff --git a/platform/win32/threading/Atomic.h b/platform/win32/threading/Atomic.h index 6b02eda..44d91a5 100644 --- a/platform/win32/threading/Atomic.h +++ b/platform/win32/threading/Atomic.h @@ -10,243 +10,424 @@ #define TOS_PLATFORM_WIN32_THREADING_ATOMIC_H #include "../../../stdlib/Types.h" +#include "../../../compiler/CompilerUtils.h" #include -// WARNING: Windows doesn't really have relaxed, release, acquire function on x86_64. -// You can see that by checking out how they are defined - // We need the following helper types to "cast" between float and long. // We can't just perform a "normal" cast since that re-interprets the bits. We need to maintain the bits +// @question Don't we also have to do this for signed vs unsigned? typedef union { f32 f; LONG l; } _atomic_32; typedef union { f64 f; LONG64 l; } _atomic_64; -inline void atomic_set_relaxed(void** target, void* new_pointer) { InterlockedExchangePointerNoFence(target, new_pointer); } -inline void* atomic_get_relaxed(void** target) { return InterlockedCompareExchangePointerNoFence(target, NULL, NULL); } -inline void atomic_set_relaxed(volatile int32* value, int32 new_value) { InterlockedExchangeNoFence((long *) value, new_value); } -inline void atomic_set_relaxed(volatile int64* value, int64 new_value) { InterlockedExchangeNoFence64((LONG64 *) value, (LONG64) new_value); } -inline void atomic_set_relaxed(volatile f32* value, f32 new_value) { _atomic_32 temp = {.f = new_value}; InterlockedExchangeNoFence((long *) value, (long) temp.l); } -inline void atomic_set_relaxed(volatile f64* value, f64 new_value) { _atomic_64 temp = {.f = new_value}; InterlockedExchangeNoFence64((LONG64 *) value, (LONG64) temp.l); } -inline int32 atomic_fetch_set_relaxed(volatile int32* value, int32 new_value) { return (int32) InterlockedExchangeNoFence((long *) value, new_value); } -inline int64 atomic_fetch_set_relaxed(volatile int64* value, int64 new_value) { return (int64) InterlockedExchangeNoFence64((LONG64 *) value, (LONG64) new_value); } -inline int32 atomic_get_relaxed(volatile int32* value) { return (int32) InterlockedCompareExchangeNoFence((long *) value, 0, 0); } -inline int64 atomic_get_relaxed(volatile int64* value) { return (int64) InterlockedCompareExchangeNoFence64((LONG64 *) value, 0, 0); } -inline f32 atomic_get_relaxed(volatile f32* value) { _atomic_32 temp = {.l = InterlockedCompareExchangeNoFence((long *) value, 0, 0)}; return temp.f; } -inline f64 atomic_get_relaxed(volatile f64* value) { _atomic_64 temp = {.l = InterlockedCompareExchangeNoFence64((LONG64 *) value, 0, 0)}; return temp.f; } -inline void atomic_increment_relaxed(volatile int32* value) { InterlockedIncrementNoFence((long *) value); } -inline void atomic_decrement_relaxed(volatile int32* value) { InterlockedDecrementNoFence((long *) value); } -inline void atomic_increment_relaxed(volatile int64* value) { InterlockedIncrementNoFence64((LONG64 *) value); } -inline void atomic_decrement_relaxed(volatile int64* value) { InterlockedDecrementNoFence64((LONG64 *) value); } -inline void atomic_add_relaxed(volatile int32* value, int32 increment) { InterlockedAddNoFence((long *) value, increment); } -inline void atomic_sub_relaxed(volatile int32* value, int32 decrement) { InterlockedAddNoFence((long *) value, -decrement); } -inline void atomic_add_relaxed(volatile int64* value, int64 increment) { InterlockedAddNoFence64((LONG64 *) value, (LONG64) increment); } -inline void atomic_sub_relaxed(volatile int64* value, int64 decrement) { InterlockedAddNoFence64((LONG64 *) value, -((LONG64) decrement)); } -inline f32 atomic_compare_exchange_weak_relaxed(volatile f32* value, f32* expected, f32 desired) { _atomic_32 temp = {.l = InterlockedCompareExchangeNoFence((long *) value, (long) desired, (long) *expected) }; return temp.f; } -inline f64 atomic_compare_exchange_weak_relaxed(volatile f64* value, f64* expected, f64 desired) { _atomic_64 temp = {.l = InterlockedCompareExchangeNoFence64((LONG64 *) value, (LONG64) desired, (LONG64) *expected) }; return temp.f; } -inline int32 atomic_compare_exchange_weak_relaxed(volatile int32* value, int32* expected, int32 desired) { return (int32) InterlockedCompareExchangeNoFence((long *) value, desired, *expected); } -inline int64 atomic_compare_exchange_weak_relaxed(volatile int64* value, int64* expected, int64 desired) { return (int64) InterlockedCompareExchangeNoFence64((LONG64 *) value, (LONG64) desired, (LONG64) *expected); } -inline int32 atomic_fetch_add_relaxed(volatile int32* value, int32 operand) { return (int32) InterlockedExchangeAddNoFence((long *) value, operand); } -inline int32 atomic_fetch_sub_relaxed(volatile int32* value, int32 operand) { return (int32) InterlockedExchangeAddNoFence((unsigned long *) value, -((long) operand)); } -inline int64 atomic_fetch_add_relaxed(volatile int64* value, int64 operand) { return (int64) InterlockedExchangeAddNoFence64((LONG64 *) value, (LONG64) operand); } -inline int64 atomic_fetch_sub_relaxed(volatile int64* value, int64 operand) { return (int64) InterlockedExchangeAdd64((LONG64 *) value, -((LONG64) operand)); } -inline void atomic_set_relaxed(volatile uint32* value, uint32 new_value) { InterlockedExchangeNoFence((long *) value, new_value); } -inline void atomic_set_relaxed(volatile uint64* value, uint64 new_value) { InterlockedExchangeNoFence64((LONG64 *) value, (LONG64) new_value); } -inline uint32 atomic_fetch_set_relaxed(volatile uint32* value, uint32 new_value) { return (uint32) InterlockedExchangeNoFence((long *) value, new_value); } -inline uint64 atomic_fetch_set_relaxed(volatile uint64* value, uint64 new_value) { return (uint64) InterlockedExchangeNoFence64((LONG64 *) value, (LONG64) new_value); } -inline uint32 atomic_get_relaxed(volatile uint32* value) { return (uint32) InterlockedCompareExchangeNoFence((long *) value, 0, 0); } -inline uint64 atomic_get_relaxed(volatile uint64* value) { return (uint64) InterlockedCompareExchangeNoFence64((LONG64 *) value, 0, 0); } -inline void atomic_increment_relaxed(volatile uint32* value) { InterlockedIncrementNoFence((long *) value); } -inline void atomic_decrement_relaxed(volatile uint32* value) { InterlockedDecrementNoFence((long *) value); } -inline void atomic_increment_relaxed(volatile uint64* value) { InterlockedIncrementNoFence64((LONG64 *) value); } -inline void atomic_decrement_relaxed(volatile uint64* value) { InterlockedDecrementNoFence64((LONG64 *) value); } -inline void atomic_add_relaxed(volatile uint32* value, uint32 increment) { InterlockedAddNoFence((long *) value, increment); } -inline void atomic_sub_relaxed(volatile uint32* value, uint32 decrement) { InterlockedAddNoFence((long *) value, -1 * ((int32) decrement)); } -inline void atomic_add_relaxed(volatile uint64* value, uint64 increment) { InterlockedAddNoFence64((LONG64 *) value, (LONG64) increment); } -inline void atomic_sub_relaxed(volatile uint64* value, uint64 decrement) { InterlockedAddNoFence64((LONG64 *) value, -((LONG64) decrement)); } -inline uint32 atomic_compare_exchange_weak_relaxed(volatile uint32* value, uint32* expected, uint32 desired) { return (uint32) InterlockedCompareExchangeNoFence((long *) value, desired, *expected); } -inline uint64 atomic_compare_exchange_weak_relaxed(volatile uint64* value, uint64* expected, uint64 desired) { return (uint64) InterlockedCompareExchangeNoFence64((LONG64 *) value, (LONG64) desired, (LONG64) *expected); } -inline uint32 atomic_fetch_add_relaxed(volatile uint32* value, uint32 operand) { return (uint32) InterlockedExchangeAddNoFence((long *) value, operand); } -inline uint32 atomic_fetch_sub_relaxed(volatile uint32* value, uint32 operand) { return (uint32) InterlockedExchangeAddNoFence((unsigned long *) value, -((long) operand)); } -inline uint64 atomic_fetch_add_relaxed(volatile uint64* value, uint64 operand) { return (uint64) InterlockedExchangeAddNoFence64((LONG64 *) value, (LONG64) operand); } -inline uint64 atomic_fetch_sub_relaxed(volatile uint64* value, uint64 operand) { return (uint64) InterlockedExchangeAdd64((LONG64 *) value, -((LONG64) operand)); } -inline void atomic_and_relaxed(volatile uint32* value, uint32 mask) { InterlockedAndNoFence((volatile LONG *) value, mask); } -inline void atomic_and_relaxed(volatile int32* value, int32 mask) { InterlockedAndNoFence((volatile LONG *) value, (LONG)mask); } -inline void atomic_and_relaxed(volatile uint64* value, uint64 mask) { InterlockedAnd64NoFence((volatile LONG64 *) value, mask); } -inline void atomic_and_relaxed(volatile int64* value, int64 mask) { InterlockedAnd64NoFence((volatile LONG64 *) value, mask); } -inline void atomic_or_relaxed(volatile uint32* value, uint32 mask) { InterlockedOrNoFence((volatile LONG *) value, mask); } -inline void atomic_or_relaxed(volatile int32* value, int32 mask) { InterlockedOrNoFence((volatile LONG *) value, (LONG)mask); } -inline void atomic_or_relaxed(volatile uint64* value, uint64 mask) { InterlockedOr64NoFence((volatile LONG64 *) value, mask); } -inline void atomic_or_relaxed(volatile int64* value, int64 mask) { InterlockedOr64NoFence((volatile LONG64 *) value, mask); } +// WARNING: Windows doesn't really have relaxed, release, acquire function on x86_64. +// You can see that by checking out how they are defined +// @bug As a result we are not always using the correct fenced/unfenced version on ARM +// (e.g. see _InterlockedCompareExchange8, it should be _InterlockedCompareExchange8_nf/rel/acq) +// To solve this we would probably have to make some of these functions Architecture specific in addition to platform specific -inline void atomic_set_acquire(void** target, void* new_pointer) { InterlockedExchangePointerAcquire(target, new_pointer); } -inline void* atomic_get_acquire(void** target) { return InterlockedCompareExchangePointerAcquire(target, NULL, NULL); } -inline void atomic_set_acquire(volatile int32* value, int32 new_value) { InterlockedExchangeAcquire((long *) value, new_value); } -inline void atomic_set_acquire(volatile int64* value, int64 new_value) { InterlockedExchangeAcquire64((LONG64 *) value, (LONG64) new_value); } -inline void atomic_set_acquire(volatile f32* value, f32 new_value) { _atomic_32 temp = {.f = new_value}; InterlockedExchangeAcquire((long *) value, (long) temp.l); } -inline void atomic_set_acquire(volatile f64* value, f64 new_value) { _atomic_64 temp = {.f = new_value}; InterlockedExchangeAcquire64((LONG64 *) value, (LONG64) temp.l); } -inline int32 atomic_fetch_set_acquire(volatile int32* value, int32 new_value) { return (int32) InterlockedExchangeAcquire((long *) value, new_value); } -inline int64 atomic_fetch_set_acquire(volatile int64* value, int64 new_value) { return (int64) InterlockedExchangeAcquire64((LONG64 *) value, (LONG64) new_value); } -inline int32 atomic_get_acquire(volatile int32* value) { return (int32) InterlockedCompareExchangeAcquire((long *) value, 0, 0); } -inline int64 atomic_get_acquire(volatile int64* value) { return (int64) InterlockedCompareExchangeAcquire64((LONG64 *) value, 0, 0); } -inline f32 atomic_get_acquire(volatile f32* value) { _atomic_32 temp = {.l = InterlockedCompareExchangeAcquire((long *) value, 0, 0)}; return temp.f; } -inline f64 atomic_get_acquire(volatile f64* value) { _atomic_64 temp = {.l = InterlockedCompareExchangeAcquire64((LONG64 *) value, 0, 0)}; return temp.f; } -inline void atomic_increment_acquire(volatile int32* value) { InterlockedIncrementAcquire((long *) value); } -inline void atomic_decrement_acquire(volatile int32* value) { InterlockedDecrementAcquire((long *) value); } -inline void atomic_increment_acquire(volatile int64* value) { InterlockedIncrementAcquire64((LONG64 *) value); } -inline void atomic_decrement_acquire(volatile int64* value) { InterlockedDecrementAcquire64((LONG64 *) value); } -inline void atomic_add_acquire(volatile int32* value, int32 increment) { InterlockedAddAcquire((long *) value, increment); } -inline void atomic_sub_acquire(volatile int32* value, int32 decrement) { InterlockedAddAcquire((long *) value, -decrement); } -inline void atomic_add_acquire(volatile int64* value, int64 increment) { InterlockedAddAcquire64((LONG64 *) value, (LONG64) increment); } -inline void atomic_sub_acquire(volatile int64* value, int64 decrement) { InterlockedAddAcquire64((LONG64 *) value, -((LONG64) decrement)); } -inline f32 atomic_compare_exchange_weak_acquire(volatile f32* value, f32* expected, f32 desired) { _atomic_32 temp = {.l = InterlockedCompareExchangeAcquire((long *) value, (long) desired, (long) *expected) }; return temp.f; } -inline f64 atomic_compare_exchange_weak_acquire(volatile f64* value, f64* expected, f64 desired) { _atomic_64 temp = {.l = InterlockedCompareExchangeAcquire64((LONG64 *) value, (LONG64) desired, (LONG64) *expected) }; return temp.f; } -inline int32 atomic_compare_exchange_weak_acquire(volatile int32* value, int32* expected, int32 desired) { return (int32) InterlockedCompareExchangeAcquire((long *) value, desired, *expected); } -inline int64 atomic_compare_exchange_weak_acquire(volatile int64* value, int64* expected, int64 desired) { return (int64) InterlockedCompareExchangeAcquire64((LONG64 *) value, (LONG64) desired, (LONG64) *expected); } -inline int32 atomic_fetch_add_acquire(volatile int32* value, int32 operand) { return (int32) InterlockedExchangeAddAcquire((long *) value, operand); } -inline int32 atomic_fetch_sub_acquire(volatile int32* value, int32 operand) { return (int32) InterlockedExchangeAddAcquire((unsigned long *) value, -((long) operand)); } -inline int64 atomic_fetch_add_acquire(volatile int64* value, int64 operand) { return (int64) InterlockedExchangeAddAcquire64((LONG64 *) value, (LONG64) operand); } -inline int64 atomic_fetch_sub_acquire(volatile int64* value, int64 operand) { return (int64) InterlockedExchangeAdd64((LONG64 *) value, -((LONG64) operand)); } -inline void atomic_set_acquire(volatile uint32* value, uint32 new_value) { InterlockedExchangeAcquire((long *) value, new_value); } -inline void atomic_set_acquire(volatile uint64* value, uint64 new_value) { InterlockedExchangeAcquire64((LONG64 *) value, (LONG64) new_value); } -inline uint32 atomic_fetch_set_acquire(volatile uint32* value, uint32 new_value) { return (uint32) InterlockedExchangeAcquire((long *) value, new_value); } -inline uint64 atomic_fetch_set_acquire(volatile uint64* value, uint64 new_value) { return (uint64) InterlockedExchangeAcquire64((LONG64 *) value, (LONG64) new_value); } -inline uint32 atomic_get_acquire(volatile uint32* value) { return (uint32) InterlockedCompareExchangeAcquire((long *) value, 0, 0); } -inline uint64 atomic_get_acquire(volatile uint64* value) { return (uint64) InterlockedCompareExchangeAcquire64((LONG64 *) value, 0, 0); } -inline void atomic_increment_acquire(volatile uint32* value) { InterlockedIncrementAcquire((long *) value); } -inline void atomic_decrement_acquire(volatile uint32* value) { InterlockedDecrementAcquire((long *) value); } -inline void atomic_increment_acquire(volatile uint64* value) { InterlockedIncrementAcquire64((LONG64 *) value); } -inline void atomic_decrement_acquire(volatile uint64* value) { InterlockedDecrementAcquire64((LONG64 *) value); } -inline void atomic_add_acquire(volatile uint32* value, uint32 increment) { InterlockedAddAcquire((long *) value, increment); } -inline void atomic_sub_acquire(volatile uint32* value, uint32 decrement) { InterlockedAddAcquire((long *) value, -1 * ((int32) decrement)); } -inline void atomic_add_acquire(volatile uint64* value, uint64 increment) { InterlockedAddAcquire64((LONG64 *) value, (LONG64) increment); } -inline void atomic_sub_acquire(volatile uint64* value, uint64 decrement) { InterlockedAddAcquire64((LONG64 *) value, -((LONG64) decrement)); } -inline uint32 atomic_compare_exchange_weak_acquire(volatile uint32* value, uint32* expected, uint32 desired) { return (uint32) InterlockedCompareExchangeAcquire((long *) value, desired, *expected); } -inline uint64 atomic_compare_exchange_weak_acquire(volatile uint64* value, uint64* expected, uint64 desired) { return (uint64) InterlockedCompareExchangeAcquire64((LONG64 *) value, (LONG64) desired, (LONG64) *expected); } -inline uint32 atomic_fetch_add_acquire(volatile uint32* value, uint32 operand) { return (uint32) InterlockedExchangeAddAcquire((long *) value, operand); } -inline uint32 atomic_fetch_sub_acquire(volatile uint32* value, uint32 operand) { return (uint32) InterlockedExchangeAddAcquire((unsigned long *) value, -((long) operand)); } -inline uint64 atomic_fetch_add_acquire(volatile uint64* value, uint64 operand) { return (uint64) InterlockedExchangeAddAcquire64((LONG64 *) value, (LONG64) operand); } -inline uint64 atomic_fetch_sub_acquire(volatile uint64* value, uint64 operand) { return (uint64) InterlockedExchangeAdd64((LONG64 *) value, -((LONG64) operand)); } -inline void atomic_and_acquire(volatile uint32* value, uint32 mask) { InterlockedAndAcquire((volatile LONG *) value, mask); } -inline void atomic_and_acquire(volatile int32* value, int32 mask) { InterlockedAndAcquire((volatile LONG *) value, (LONG)mask); } -inline void atomic_and_acquire(volatile uint64* value, uint64 mask) { InterlockedAnd64Acquire((volatile LONG64 *) value, mask); } -inline void atomic_and_acquire(volatile int64* value, int64 mask) { InterlockedAnd64Acquire((volatile LONG64 *) value, mask); } -inline void atomic_or_acquire(volatile uint32* value, uint32 mask) { InterlockedOrAcquire((volatile LONG *) value, mask); } -inline void atomic_or_acquire(volatile int32* value, int32 mask) { InterlockedOrAcquire((volatile LONG *) value, (LONG)mask); } -inline void atomic_or_acquire(volatile uint64* value, uint64 mask) { InterlockedOr64Acquire((volatile LONG64 *) value, mask); } -inline void atomic_or_acquire(volatile int64* value, int64 mask) { InterlockedOr64Acquire((volatile LONG64 *) value, mask); } +FORCE_INLINE void atomic_set_relaxed(void** target, void* new_pointer) { InterlockedExchangePointerNoFence(target, new_pointer); } +FORCE_INLINE void* atomic_get_relaxed(void** target) { return InterlockedCompareExchangePointerNoFence(target, NULL, NULL); } +FORCE_INLINE void atomic_set_relaxed(volatile int8* value, int8 new_value) { InterlockedExchangeNoFence8((volatile char *) value, new_value); } +FORCE_INLINE void atomic_set_relaxed(volatile int16* value, int16 new_value) { InterlockedExchangeNoFence16((volatile short *) value, new_value); } +FORCE_INLINE void atomic_set_relaxed(volatile int32* value, int32 new_value) { InterlockedExchangeNoFence((volatile long *) value, new_value); } +FORCE_INLINE void atomic_set_relaxed(volatile int64* value, int64 new_value) { InterlockedExchangeNoFence64((volatile LONG64 *) value, (LONG64) new_value); } +FORCE_INLINE void atomic_set_relaxed(volatile f32* value, f32 new_value) { _atomic_32 temp = {.f = new_value}; InterlockedExchangeNoFence((volatile long *) value, (long) temp.l); } +FORCE_INLINE void atomic_set_relaxed(volatile f64* value, f64 new_value) { _atomic_64 temp = {.f = new_value}; InterlockedExchangeNoFence64((volatile LONG64 *) value, (LONG64) temp.l); } +FORCE_INLINE int8 atomic_fetch_set_relaxed(volatile int8* value, int8 new_value) { return (int8) InterlockedExchangeNoFence8((volatile char *) value, (char) new_value); } +FORCE_INLINE int16 atomic_fetch_set_relaxed(volatile int16* value, int16 new_value) { return (int16) InterlockedExchangeNoFence16((volatile short *) value, (short) new_value); } +FORCE_INLINE int32 atomic_fetch_set_relaxed(volatile int32* value, int32 new_value) { return (int32) InterlockedExchangeNoFence((volatile long *) value, new_value); } +FORCE_INLINE int64 atomic_fetch_set_relaxed(volatile int64* value, int64 new_value) { return (int64) InterlockedExchangeNoFence64((volatile LONG64 *) value, (LONG64) new_value); } +FORCE_INLINE int8 atomic_get_relaxed(volatile int8* value) { return (int8) _InterlockedCompareExchange8((volatile char *) value, 0, 0); } +FORCE_INLINE int16 atomic_get_relaxed(volatile int16* value) { return (int16) InterlockedCompareExchangeNoFence16((volatile short *) value, 0, 0); } +FORCE_INLINE int32 atomic_get_relaxed(volatile int32* value) { return (int32) InterlockedCompareExchangeNoFence((volatile long *) value, 0, 0); } +FORCE_INLINE int64 atomic_get_relaxed(volatile int64* value) { return (int64) InterlockedCompareExchangeNoFence64((volatile LONG64 *) value, 0, 0); } +FORCE_INLINE f32 atomic_get_relaxed(volatile f32* value) { _atomic_32 temp = {.l = InterlockedCompareExchangeNoFence((volatile long *) value, 0, 0)}; return temp.f; } +FORCE_INLINE f64 atomic_get_relaxed(volatile f64* value) { _atomic_64 temp = {.l = InterlockedCompareExchangeNoFence64((volatile LONG64 *) value, 0, 0)}; return temp.f; } +FORCE_INLINE void atomic_increment_relaxed(volatile int8* value) { InterlockedExchangeAdd8((volatile char *) value, 1); } +FORCE_INLINE void atomic_decrement_relaxed(volatile int8* value) { InterlockedExchangeAdd8((volatile char *) value, -1); } +FORCE_INLINE void atomic_increment_relaxed(volatile int16* value) { InterlockedIncrementNoFence16((volatile short *) value); } +FORCE_INLINE void atomic_decrement_relaxed(volatile int16* value) { InterlockedDecrementNoFence16((volatile short *) value); } +FORCE_INLINE void atomic_increment_relaxed(volatile int32* value) { InterlockedIncrementNoFence((volatile long *) value); } +FORCE_INLINE void atomic_decrement_relaxed(volatile int32* value) { InterlockedDecrementNoFence((volatile long *) value); } +FORCE_INLINE void atomic_increment_relaxed(volatile int64* value) { InterlockedIncrementNoFence64((volatile LONG64 *) value); } +FORCE_INLINE void atomic_decrement_relaxed(volatile int64* value) { InterlockedDecrementNoFence64((volatile LONG64 *) value); } +FORCE_INLINE void atomic_add_relaxed(volatile int8* value, int8 increment) { InterlockedExchangeAdd8((volatile char *) value, (char) increment); } +FORCE_INLINE void atomic_sub_relaxed(volatile int8* value, int8 decrement) { InterlockedExchangeAdd8((volatile char *) value, -((char) decrement)); } +FORCE_INLINE void atomic_add_relaxed(volatile int16* value, int16 increment) { InterlockedExchangeAdd16((volatile short *) value, (short) increment); } +FORCE_INLINE void atomic_sub_relaxed(volatile int16* value, int16 decrement) { InterlockedExchangeAdd16((volatile short *) value, -((short) decrement)); } +FORCE_INLINE void atomic_add_relaxed(volatile int32* value, int32 increment) { InterlockedAddNoFence((volatile long *) value, increment); } +FORCE_INLINE void atomic_sub_relaxed(volatile int32* value, int32 decrement) { InterlockedAddNoFence((volatile long *) value, -decrement); } +FORCE_INLINE void atomic_add_relaxed(volatile int64* value, int64 increment) { InterlockedAddNoFence64((volatile LONG64 *) value, (LONG64) increment); } +FORCE_INLINE void atomic_sub_relaxed(volatile int64* value, int64 decrement) { InterlockedAddNoFence64((volatile LONG64 *) value, -((LONG64) decrement)); } +FORCE_INLINE f32 atomic_compare_exchange_weak_relaxed(volatile f32* value, f32* expected, f32 desired) { _atomic_32 temp = {.l = InterlockedCompareExchangeNoFence((volatile long *) value, (long) desired, (long) *expected) }; return temp.f; } +FORCE_INLINE f64 atomic_compare_exchange_weak_relaxed(volatile f64* value, f64* expected, f64 desired) { _atomic_64 temp = {.l = InterlockedCompareExchangeNoFence64((volatile LONG64 *) value, (LONG64) desired, (LONG64) *expected) }; return temp.f; } +FORCE_INLINE int32 atomic_compare_exchange_weak_relaxed(volatile int32* value, int32* expected, int32 desired) { return (int32) InterlockedCompareExchangeNoFence((volatile long *) value, desired, *expected); } +FORCE_INLINE int64 atomic_compare_exchange_weak_relaxed(volatile int64* value, int64* expected, int64 desired) { return (int64) InterlockedCompareExchangeNoFence64((volatile LONG64 *) value, (LONG64) desired, (LONG64) *expected); } +FORCE_INLINE int8 atomic_fetch_add_relaxed(volatile int8* value, int8 operand) { return (int8) InterlockedExchangeAdd8((volatile char *) value, (char) operand); } +FORCE_INLINE int8 atomic_fetch_sub_relaxed(volatile int8* value, int8 operand) { return (int8) InterlockedExchangeAdd8((volatile char *) value, -((char) operand)); } +FORCE_INLINE int16 atomic_fetch_add_relaxed(volatile int16* value, int16 operand) { return (int16) InterlockedExchangeAdd16((volatile short *) value, (short) operand); } +FORCE_INLINE int16 atomic_fetch_sub_relaxed(volatile int16* value, int16 operand) { return (int16) InterlockedExchangeAdd16((volatile short *) value, -((short) operand)); } +FORCE_INLINE int32 atomic_fetch_add_relaxed(volatile int32* value, int32 operand) { return (int32) InterlockedExchangeAddNoFence((volatile long *) value, operand); } +FORCE_INLINE int32 atomic_fetch_sub_relaxed(volatile int32* value, int32 operand) { return (int32) InterlockedExchangeAddNoFence((volatile unsigned long *) value, -((long) operand)); } +FORCE_INLINE int64 atomic_fetch_add_relaxed(volatile int64* value, int64 operand) { return (int64) InterlockedExchangeAddNoFence64((volatile LONG64 *) value, (LONG64) operand); } +FORCE_INLINE int64 atomic_fetch_sub_relaxed(volatile int64* value, int64 operand) { return (int64) InterlockedExchangeAdd64((volatile LONG64 *) value, -((LONG64) operand)); } +FORCE_INLINE void atomic_set_relaxed(volatile uint8* value, uint8 new_value) { InterlockedExchangeNoFence8((volatile char *) value, (char) new_value); } +FORCE_INLINE void atomic_set_relaxed(volatile uint16* value, uint16 new_value) { InterlockedExchangeNoFence16((volatile short *) value, (short) new_value); } +FORCE_INLINE void atomic_set_relaxed(volatile uint32* value, uint32 new_value) { InterlockedExchangeNoFence((volatile long *) value, new_value); } +FORCE_INLINE void atomic_set_relaxed(volatile uint64* value, uint64 new_value) { InterlockedExchangeNoFence64((volatile LONG64 *) value, (LONG64) new_value); } +FORCE_INLINE uint8 atomic_fetch_set_relaxed(volatile uint8* value, uint8 new_value) { return (uint8) InterlockedExchangeNoFence8((volatile char *) value, (char) new_value); } +FORCE_INLINE uint16 atomic_fetch_set_relaxed(volatile uint16* value, uint16 new_value) { return (uint16) InterlockedExchangeNoFence16((volatile short *) value, (short) new_value); } +FORCE_INLINE uint32 atomic_fetch_set_relaxed(volatile uint32* value, uint32 new_value) { return (uint32) InterlockedExchangeNoFence((volatile long *) value, new_value); } +FORCE_INLINE uint64 atomic_fetch_set_relaxed(volatile uint64* value, uint64 new_value) { return (uint64) InterlockedExchangeNoFence64((volatile LONG64 *) value, (LONG64) new_value); } +FORCE_INLINE uint8 atomic_get_relaxed(volatile uint8* value) { return (uint8) _InterlockedCompareExchange8((volatile char *) value, 0, 0); } +FORCE_INLINE uint16 atomic_get_relaxed(volatile uint16* value) { return (uint16) InterlockedCompareExchangeNoFence16((volatile short *) value, 0, 0); } +FORCE_INLINE uint32 atomic_get_relaxed(volatile uint32* value) { return (uint32) InterlockedCompareExchangeNoFence((volatile long *) value, 0, 0); } +FORCE_INLINE uint64 atomic_get_relaxed(volatile uint64* value) { return (uint64) InterlockedCompareExchangeNoFence64((volatile LONG64 *) value, 0, 0); } +FORCE_INLINE void atomic_increment_relaxed(volatile uint8* value) { InterlockedExchangeAdd8((volatile char *) value, 1); } +FORCE_INLINE void atomic_decrement_relaxed(volatile uint8* value) { InterlockedExchangeAdd8((volatile char *) value, -1); } +FORCE_INLINE void atomic_increment_relaxed(volatile uint16* value) { InterlockedIncrementNoFence16((volatile short *) value); } +FORCE_INLINE void atomic_decrement_relaxed(volatile uint16* value) { InterlockedDecrementNoFence16((volatile short *) value); } +FORCE_INLINE void atomic_increment_relaxed(volatile uint32* value) { InterlockedIncrementNoFence((volatile long *) value); } +FORCE_INLINE void atomic_decrement_relaxed(volatile uint32* value) { InterlockedDecrementNoFence((volatile long *) value); } +FORCE_INLINE void atomic_increment_relaxed(volatile uint64* value) { InterlockedIncrementNoFence64((volatile LONG64 *) value); } +FORCE_INLINE void atomic_decrement_relaxed(volatile uint64* value) { InterlockedDecrementNoFence64((volatile LONG64 *) value); } +FORCE_INLINE void atomic_add_relaxed(volatile uint8* value, uint8 increment) { InterlockedExchangeAdd8((volatile char *) value, (char) increment); } +FORCE_INLINE void atomic_sub_relaxed(volatile uint8* value, uint8 decrement) { InterlockedExchangeAdd8((volatile char *) value, -((char) decrement)); } +FORCE_INLINE void atomic_add_relaxed(volatile uint16* value, uint16 increment) { InterlockedExchangeAdd16((volatile short *) value, (short) increment); } +FORCE_INLINE void atomic_sub_relaxed(volatile uint16* value, uint16 decrement) { InterlockedExchangeAdd16((volatile short *) value, -((short) decrement)); } +FORCE_INLINE void atomic_add_relaxed(volatile uint32* value, uint32 increment) { InterlockedAddNoFence((volatile long *) value, increment); } +FORCE_INLINE void atomic_sub_relaxed(volatile uint32* value, uint32 decrement) { InterlockedAddNoFence((volatile long *) value, -1 * ((int32) decrement)); } +FORCE_INLINE void atomic_add_relaxed(volatile uint64* value, uint64 increment) { InterlockedAddNoFence64((volatile LONG64 *) value, (LONG64) increment); } +FORCE_INLINE void atomic_sub_relaxed(volatile uint64* value, uint64 decrement) { InterlockedAddNoFence64((volatile LONG64 *) value, -((LONG64) decrement)); } +FORCE_INLINE uint32 atomic_compare_exchange_weak_relaxed(volatile uint32* value, uint32* expected, uint32 desired) { return (uint32) InterlockedCompareExchangeNoFence((volatile long *) value, desired, *expected); } +FORCE_INLINE uint64 atomic_compare_exchange_weak_relaxed(volatile uint64* value, uint64* expected, uint64 desired) { return (uint64) InterlockedCompareExchangeNoFence64((volatile LONG64 *) value, (LONG64) desired, (LONG64) *expected); } +FORCE_INLINE uint8 atomic_fetch_add_relaxed(volatile uint8* value, uint8 operand) { return (uint8) InterlockedExchangeAdd8((volatile char *) value, (char) operand); } +FORCE_INLINE uint8 atomic_fetch_sub_relaxed(volatile uint8* value, uint8 operand) { return (uint8) InterlockedExchangeAdd8((volatile char *) value, -((char) operand)); } +FORCE_INLINE uint16 atomic_fetch_add_relaxed(volatile uint16* value, uint16 operand) { return (uint16) InterlockedExchangeAdd16((volatile short *) value, (short) operand); } +FORCE_INLINE uint16 atomic_fetch_sub_relaxed(volatile uint16* value, uint16 operand) { return (uint16) InterlockedExchangeAdd16((volatile short *) value, -((short) operand)); } +FORCE_INLINE uint32 atomic_fetch_add_relaxed(volatile uint32* value, uint32 operand) { return (uint32) InterlockedExchangeAddNoFence((volatile long *) value, operand); } +FORCE_INLINE uint32 atomic_fetch_sub_relaxed(volatile uint32* value, uint32 operand) { return (uint32) InterlockedExchangeAddNoFence((volatile unsigned long *) value, -((long) operand)); } +FORCE_INLINE uint64 atomic_fetch_add_relaxed(volatile uint64* value, uint64 operand) { return (uint64) InterlockedExchangeAddNoFence64((volatile LONG64 *) value, (LONG64) operand); } +FORCE_INLINE uint64 atomic_fetch_sub_relaxed(volatile uint64* value, uint64 operand) { return (uint64) InterlockedExchangeAdd64((volatile LONG64 *) value, -((LONG64) operand)); } +FORCE_INLINE void atomic_and_relaxed(volatile uint8* value, uint8 mask) { InterlockedAnd8((volatile char *) value, mask); } +FORCE_INLINE void atomic_and_relaxed(volatile int8* value, int8 mask) { InterlockedAnd8((volatile char *) value, mask); } +FORCE_INLINE void atomic_and_relaxed(volatile uint16* value, uint16 mask) { InterlockedAnd16((volatile short *) value, mask); } +FORCE_INLINE void atomic_and_relaxed(volatile int16* value, int16 mask) { InterlockedAnd16((volatile short *) value, mask); } +FORCE_INLINE void atomic_and_relaxed(volatile uint32* value, uint32 mask) { InterlockedAndNoFence((volatile LONG *) value, mask); } +FORCE_INLINE void atomic_and_relaxed(volatile int32* value, int32 mask) { InterlockedAndNoFence((volatile LONG *) value, (LONG)mask); } +FORCE_INLINE void atomic_and_relaxed(volatile uint64* value, uint64 mask) { InterlockedAnd64NoFence((volatile LONG64 *) value, mask); } +FORCE_INLINE void atomic_and_relaxed(volatile int64* value, int64 mask) { InterlockedAnd64NoFence((volatile LONG64 *) value, mask); } +FORCE_INLINE void atomic_or_relaxed(volatile uint8* value, uint8 mask) { InterlockedOr8((volatile char *) value, mask); } +FORCE_INLINE void atomic_or_relaxed(volatile int8* value, int8 mask) { InterlockedOr8((volatile char *) value, mask); } +FORCE_INLINE void atomic_or_relaxed(volatile uint16* value, uint16 mask) { InterlockedOr16((volatile short *) value, mask); } +FORCE_INLINE void atomic_or_relaxed(volatile int16* value, int16 mask) { InterlockedOr16((volatile short *) value, mask); } +FORCE_INLINE void atomic_or_relaxed(volatile uint32* value, uint32 mask) { InterlockedOrNoFence((volatile LONG *) value, mask); } +FORCE_INLINE void atomic_or_relaxed(volatile int32* value, int32 mask) { InterlockedOrNoFence((volatile LONG *) value, (LONG)mask); } +FORCE_INLINE void atomic_or_relaxed(volatile uint64* value, uint64 mask) { InterlockedOr64NoFence((volatile LONG64 *) value, mask); } +FORCE_INLINE void atomic_or_relaxed(volatile int64* value, int64 mask) { InterlockedOr64NoFence((volatile LONG64 *) value, mask); } -inline void atomic_set_release(void** target, void* new_pointer) { InterlockedExchangePointer(target, new_pointer); } -inline void* atomic_get_release(void** target) { return InterlockedCompareExchangePointerRelease(target, NULL, NULL); } -inline void atomic_set_release(volatile int32* value, int32 new_value) { InterlockedExchange((long *) value, new_value); } -inline void atomic_set_release(volatile int64* value, int64 new_value) { InterlockedExchange64((LONG64 *) value, (LONG64) new_value); } -inline void atomic_set_release(volatile f32* value, f32 new_value) { _atomic_32 temp = {.f = new_value}; InterlockedExchange((long *) value, (long) temp.l); } -inline void atomic_set_release(volatile f64* value, f64 new_value) { _atomic_64 temp = {.f = new_value}; InterlockedExchange64((LONG64 *) value, (LONG64) temp.l); } -inline int32 atomic_fetch_set_release(volatile int32* value, int32 new_value) { return (int32) InterlockedExchange((long *) value, new_value); } -inline int64 atomic_fetch_set_release(volatile int64* value, int64 new_value) { return (int64) InterlockedExchange64((LONG64 *) value, (LONG64) new_value); } -inline int32 atomic_get_release(volatile int32* value) { return (int32) InterlockedCompareExchangeRelease((long *) value, 0, 0); } -inline int64 atomic_get_release(volatile int64* value) { return (int64) InterlockedCompareExchangeRelease64((LONG64 *) value, 0, 0); } -inline f32 atomic_get_release(volatile f32* value) { _atomic_32 temp = {.l = InterlockedCompareExchangeRelease((long *) value, 0, 0)}; return temp.f; } -inline f64 atomic_get_release(volatile f64* value) { _atomic_64 temp = {.l = InterlockedCompareExchangeRelease64((LONG64 *) value, 0, 0)}; return temp.f; } -inline void atomic_increment_release(volatile int32* value) { InterlockedIncrementRelease((long *) value); } -inline void atomic_decrement_release(volatile int32* value) { InterlockedDecrementRelease((long *) value); } -inline void atomic_increment_release(volatile int64* value) { InterlockedIncrementRelease64((LONG64 *) value); } -inline void atomic_decrement_release(volatile int64* value) { InterlockedDecrementRelease64((LONG64 *) value); } -inline void atomic_add_release(volatile int32* value, int32 increment) { InterlockedAddRelease((long *) value, increment); } -inline void atomic_sub_release(volatile int32* value, int32 decrement) { InterlockedAddRelease((long *) value, -decrement); } -inline void atomic_add_release(volatile int64* value, int64 increment) { InterlockedAddRelease64((LONG64 *) value, (LONG64) increment); } -inline void atomic_sub_release(volatile int64* value, int64 decrement) { InterlockedAddRelease64((LONG64 *) value, -((LONG64) decrement)); } -inline f32 atomic_compare_exchange_weak_release(volatile f32* value, f32* expected, f32 desired) { _atomic_32 temp = {.l = InterlockedCompareExchangeRelease((long *) value, (long) desired, (long) *expected) }; return temp.f; } -inline f64 atomic_compare_exchange_weak_release(volatile f64* value, f64* expected, f64 desired) { _atomic_64 temp = {.l = InterlockedCompareExchangeRelease64((LONG64 *) value, (LONG64) desired, (LONG64) *expected) }; return temp.f; } -inline int32 atomic_compare_exchange_weak_release(volatile int32* value, int32* expected, int32 desired) { return (int32) InterlockedCompareExchangeRelease((long *) value, desired, *expected); } -inline int64 atomic_compare_exchange_weak_release(volatile int64* value, int64* expected, int64 desired) { return (int64) InterlockedCompareExchangeRelease64((LONG64 *) value, (LONG64) desired, (LONG64) *expected); } -inline int32 atomic_fetch_add_release(volatile int32* value, int32 operand) { return (int32) InterlockedExchangeAddRelease((long *) value, operand); } -inline int32 atomic_fetch_sub_release(volatile int32* value, int32 operand) { return (int32) InterlockedExchangeAddRelease((unsigned long *) value, -((long) operand)); } -inline int64 atomic_fetch_add_release(volatile int64* value, int64 operand) { return (int64) InterlockedExchangeAddRelease64((LONG64 *) value, (LONG64) operand); } -inline int64 atomic_fetch_sub_release(volatile int64* value, int64 operand) { return (int64) InterlockedExchangeAdd64((LONG64 *) value, -((LONG64) operand)); } -inline void atomic_set_release(volatile uint32* value, uint32 new_value) { InterlockedExchange((long *) value, new_value); } -inline void atomic_set_release(volatile uint64* value, uint64 new_value) { InterlockedExchange64((LONG64 *) value, (LONG64) new_value); } -inline uint32 atomic_fetch_set_release(volatile uint32* value, uint32 new_value) { return (uint32) InterlockedExchange((long *) value, new_value); } -inline uint64 atomic_fetch_set_release(volatile uint64* value, uint64 new_value) { return (uint64) InterlockedExchange64((LONG64 *) value, (LONG64) new_value); } -inline uint32 atomic_get_release(volatile uint32* value) { return (uint32) InterlockedCompareExchangeRelease((long *) value, 0, 0); } -inline uint64 atomic_get_release(volatile uint64* value) { return (uint64) InterlockedCompareExchangeRelease64((LONG64 *) value, 0, 0); } -inline void atomic_increment_release(volatile uint32* value) { InterlockedIncrementRelease((long *) value); } -inline void atomic_decrement_release(volatile uint32* value) { InterlockedDecrementRelease((long *) value); } -inline void atomic_increment_release(volatile uint64* value) { InterlockedIncrementRelease64((LONG64 *) value); } -inline void atomic_decrement_release(volatile uint64* value) { InterlockedDecrementRelease64((LONG64 *) value); } -inline void atomic_add_release(volatile uint32* value, uint32 increment) { InterlockedAddRelease((long *) value, increment); } -inline void atomic_sub_release(volatile uint32* value, uint32 decrement) { InterlockedAddRelease((long *) value, -1 * ((int32) decrement)); } -inline void atomic_add_release(volatile uint64* value, uint64 increment) { InterlockedAddRelease64((LONG64 *) value, (LONG64) increment); } -inline void atomic_sub_release(volatile uint64* value, uint64 decrement) { InterlockedAddRelease64((LONG64 *) value, -((LONG64) decrement)); } -inline uint32 atomic_compare_exchange_weak_release(volatile uint32* value, uint32* expected, uint32 desired) { return (uint32) InterlockedCompareExchangeRelease((long *) value, desired, *expected); } -inline uint64 atomic_compare_exchange_weak_release(volatile uint64* value, uint64* expected, uint64 desired) { return (uint64) InterlockedCompareExchangeRelease64((LONG64 *) value, (LONG64) desired, (LONG64) *expected); } -inline uint32 atomic_fetch_add_release(volatile uint32* value, uint32 operand) { return (uint32) InterlockedExchangeAddRelease((long *) value, operand); } -inline uint32 atomic_fetch_sub_release(volatile uint32* value, uint32 operand) { return (uint32) InterlockedExchangeAddRelease((unsigned long *) value, -((long) operand)); } -inline uint64 atomic_fetch_add_release(volatile uint64* value, uint64 operand) { return (uint64) InterlockedExchangeAddRelease64((LONG64 *) value, (LONG64) operand); } -inline uint64 atomic_fetch_sub_release(volatile uint64* value, uint64 operand) { return (uint64) InterlockedExchangeAdd64((LONG64 *) value, -((LONG64) operand)); } -inline void atomic_and_release(volatile uint32* value, uint32 mask) { InterlockedAndRelease((volatile LONG *) value, mask); } -inline void atomic_and_release(volatile int32* value, int32 mask) { InterlockedAndRelease((volatile LONG *) value, (LONG)mask); } -inline void atomic_and_release(volatile uint64* value, uint64 mask) { InterlockedAnd64Release((volatile LONG64 *) value, mask); } -inline void atomic_and_release(volatile int64* value, int64 mask) { InterlockedAnd64Release((volatile LONG64 *) value, mask); } -inline void atomic_or_release(volatile uint32* value, uint32 mask) { InterlockedOrRelease((volatile LONG *) value, mask); } -inline void atomic_or_release(volatile int32* value, int32 mask) { InterlockedOrRelease((volatile LONG *) value, (LONG)mask); } -inline void atomic_or_release(volatile uint64* value, uint64 mask) { InterlockedOr64Release((volatile LONG64 *) value, mask); } -inline void atomic_or_release(volatile int64* value, int64 mask) { InterlockedOr64Release((volatile LONG64 *) value, mask); } +FORCE_INLINE void atomic_set_acquire(void** target, void* new_pointer) { InterlockedExchangePointerAcquire(target, new_pointer); } +FORCE_INLINE void* atomic_get_acquire(void** target) { return InterlockedCompareExchangePointerAcquire(target, NULL, NULL); } +FORCE_INLINE void atomic_set_acquire(volatile int8* value, int8 new_value) { InterlockedExchangeAcquire8((volatile char *) value, new_value); } +FORCE_INLINE void atomic_set_acquire(volatile int16* value, int16 new_value) { InterlockedExchangeAcquire16((volatile short *) value, new_value); } +FORCE_INLINE void atomic_set_acquire(volatile int32* value, int32 new_value) { InterlockedExchangeAcquire((volatile long *) value, new_value); } +FORCE_INLINE void atomic_set_acquire(volatile int64* value, int64 new_value) { InterlockedExchangeAcquire64((volatile LONG64 *) value, (LONG64) new_value); } +FORCE_INLINE void atomic_set_acquire(volatile f32* value, f32 new_value) { _atomic_32 temp = {.f = new_value}; InterlockedExchangeAcquire((volatile long *) value, (long) temp.l); } +FORCE_INLINE void atomic_set_acquire(volatile f64* value, f64 new_value) { _atomic_64 temp = {.f = new_value}; InterlockedExchangeAcquire64((volatile LONG64 *) value, (LONG64) temp.l); } +FORCE_INLINE int8 atomic_fetch_set_acquire(volatile int8* value, int8 new_value) { return (int8) InterlockedExchangeAcquire8((volatile char *) value, (char) new_value); } +FORCE_INLINE int16 atomic_fetch_set_acquire(volatile int16* value, int16 new_value) { return (int16) InterlockedExchangeAcquire16((volatile short *) value, (short) new_value); } +FORCE_INLINE int32 atomic_fetch_set_acquire(volatile int32* value, int32 new_value) { return (int32) InterlockedExchangeAcquire((volatile long *) value, new_value); } +FORCE_INLINE int64 atomic_fetch_set_acquire(volatile int64* value, int64 new_value) { return (int64) InterlockedExchangeAcquire64((volatile LONG64 *) value, (LONG64) new_value); } +FORCE_INLINE int8 atomic_get_acquire(volatile int8* value) { return (int8) _InterlockedCompareExchange8((volatile char *) value, 0, 0); } +FORCE_INLINE int16 atomic_get_acquire(volatile int16* value) { return (int16) InterlockedCompareExchangeAcquire16((volatile short *) value, 0, 0); } +FORCE_INLINE int32 atomic_get_acquire(volatile int32* value) { return (int32) InterlockedCompareExchangeAcquire((volatile long *) value, 0, 0); } +FORCE_INLINE int64 atomic_get_acquire(volatile int64* value) { return (int64) InterlockedCompareExchangeAcquire64((volatile LONG64 *) value, 0, 0); } +FORCE_INLINE f32 atomic_get_acquire(volatile f32* value) { _atomic_32 temp = {.l = InterlockedCompareExchangeAcquire((volatile long *) value, 0, 0)}; return temp.f; } +FORCE_INLINE f64 atomic_get_acquire(volatile f64* value) { _atomic_64 temp = {.l = InterlockedCompareExchangeAcquire64((volatile LONG64 *) value, 0, 0)}; return temp.f; } +FORCE_INLINE void atomic_increment_acquire(volatile int8* value) { InterlockedExchangeAdd8((volatile char *) value, 1); } +FORCE_INLINE void atomic_decrement_acquire(volatile int8* value) { InterlockedExchangeAdd8((volatile char *) value, -1); } +FORCE_INLINE void atomic_increment_acquire(volatile int16* value) { InterlockedIncrementAcquire16((volatile short *) value); } +FORCE_INLINE void atomic_decrement_acquire(volatile int16* value) { InterlockedDecrementAcquire16((volatile short *) value); } +FORCE_INLINE void atomic_increment_acquire(volatile int32* value) { InterlockedIncrementAcquire((volatile long *) value); } +FORCE_INLINE void atomic_decrement_acquire(volatile int32* value) { InterlockedDecrementAcquire((volatile long *) value); } +FORCE_INLINE void atomic_increment_acquire(volatile int64* value) { InterlockedIncrementAcquire64((volatile LONG64 *) value); } +FORCE_INLINE void atomic_decrement_acquire(volatile int64* value) { InterlockedDecrementAcquire64((volatile LONG64 *) value); } +FORCE_INLINE void atomic_add_acquire(volatile int8* value, int8 increment) { InterlockedExchangeAdd8((volatile char *) value, (char) increment); } +FORCE_INLINE void atomic_sub_acquire(volatile int8* value, int8 decrement) { InterlockedExchangeAdd8((volatile char *) value, -((char) decrement)); } +FORCE_INLINE void atomic_add_acquire(volatile int16* value, int16 increment) { InterlockedExchangeAdd16((volatile short *) value, (short) increment); } +FORCE_INLINE void atomic_sub_acquire(volatile int16* value, int16 decrement) { InterlockedExchangeAdd16((volatile short *) value, -((short) decrement)); } +FORCE_INLINE void atomic_add_acquire(volatile int32* value, int32 increment) { InterlockedAddAcquire((volatile long *) value, increment); } +FORCE_INLINE void atomic_sub_acquire(volatile int32* value, int32 decrement) { InterlockedAddAcquire((volatile long *) value, -decrement); } +FORCE_INLINE void atomic_add_acquire(volatile int64* value, int64 increment) { InterlockedAddAcquire64((volatile LONG64 *) value, (LONG64) increment); } +FORCE_INLINE void atomic_sub_acquire(volatile int64* value, int64 decrement) { InterlockedAddAcquire64((volatile LONG64 *) value, -((LONG64) decrement)); } +FORCE_INLINE f32 atomic_compare_exchange_weak_acquire(volatile f32* value, f32* expected, f32 desired) { _atomic_32 temp = {.l = InterlockedCompareExchangeAcquire((volatile long *) value, (long) desired, (long) *expected) }; return temp.f; } +FORCE_INLINE f64 atomic_compare_exchange_weak_acquire(volatile f64* value, f64* expected, f64 desired) { _atomic_64 temp = {.l = InterlockedCompareExchangeAcquire64((volatile LONG64 *) value, (LONG64) desired, (LONG64) *expected) }; return temp.f; } +FORCE_INLINE int32 atomic_compare_exchange_weak_acquire(volatile int32* value, int32* expected, int32 desired) { return (int32) InterlockedCompareExchangeAcquire((volatile long *) value, desired, *expected); } +FORCE_INLINE int64 atomic_compare_exchange_weak_acquire(volatile int64* value, int64* expected, int64 desired) { return (int64) InterlockedCompareExchangeAcquire64((volatile LONG64 *) value, (LONG64) desired, (LONG64) *expected); } +FORCE_INLINE int8 atomic_fetch_add_acquire(volatile int8* value, int8 operand) { return (int8) InterlockedExchangeAdd8((volatile char *) value, (char) operand); } +FORCE_INLINE int8 atomic_fetch_sub_acquire(volatile int8* value, int8 operand) { return (int8) InterlockedExchangeAdd8((volatile char *) value, -((char) operand)); } +FORCE_INLINE int16 atomic_fetch_add_acquire(volatile int16* value, int16 operand) { return (int16) InterlockedExchangeAdd16((volatile short *) value, (short) operand); } +FORCE_INLINE int16 atomic_fetch_sub_acquire(volatile int16* value, int16 operand) { return (int16) InterlockedExchangeAdd16((volatile short *) value, -((short) operand)); } +FORCE_INLINE int32 atomic_fetch_add_acquire(volatile int32* value, int32 operand) { return (int32) InterlockedExchangeAddAcquire((volatile long *) value, operand); } +FORCE_INLINE int32 atomic_fetch_sub_acquire(volatile int32* value, int32 operand) { return (int32) InterlockedExchangeAddAcquire((volatile unsigned long *) value, -((long) operand)); } +FORCE_INLINE int64 atomic_fetch_add_acquire(volatile int64* value, int64 operand) { return (int64) InterlockedExchangeAddAcquire64((volatile LONG64 *) value, (LONG64) operand); } +FORCE_INLINE int64 atomic_fetch_sub_acquire(volatile int64* value, int64 operand) { return (int64) InterlockedExchangeAdd64((volatile LONG64 *) value, -((LONG64) operand)); } +FORCE_INLINE void atomic_set_acquire(volatile uint8* value, uint8 new_value) { InterlockedExchangeAcquire8((volatile char *) value, (char) new_value); } +FORCE_INLINE void atomic_set_acquire(volatile uint16* value, uint16 new_value) { InterlockedExchangeAcquire16((volatile short *) value, (short) new_value); } +FORCE_INLINE void atomic_set_acquire(volatile uint32* value, uint32 new_value) { InterlockedExchangeAcquire((volatile long *) value, new_value); } +FORCE_INLINE void atomic_set_acquire(volatile uint64* value, uint64 new_value) { InterlockedExchangeAcquire64((volatile LONG64 *) value, (LONG64) new_value); } +FORCE_INLINE uint8 atomic_fetch_set_acquire(volatile uint8* value, uint8 new_value) { return (uint8) InterlockedExchangeAcquire8((volatile char *) value, (char) new_value); } +FORCE_INLINE uint16 atomic_fetch_set_acquire(volatile uint16* value, uint16 new_value) { return (uint16) InterlockedExchangeAcquire16((volatile short *) value, (short) new_value); } +FORCE_INLINE uint32 atomic_fetch_set_acquire(volatile uint32* value, uint32 new_value) { return (uint32) InterlockedExchangeAcquire((volatile long *) value, new_value); } +FORCE_INLINE uint64 atomic_fetch_set_acquire(volatile uint64* value, uint64 new_value) { return (uint64) InterlockedExchangeAcquire64((volatile LONG64 *) value, (LONG64) new_value); } +FORCE_INLINE uint8 atomic_get_acquire(volatile uint8* value) { return (uint8) _InterlockedCompareExchange8((volatile char *) value, 0, 0); } +FORCE_INLINE uint16 atomic_get_acquire(volatile uint16* value) { return (uint16) InterlockedCompareExchangeAcquire16((volatile short *) value, 0, 0); } +FORCE_INLINE uint32 atomic_get_acquire(volatile uint32* value) { return (uint32) InterlockedCompareExchangeAcquire((volatile long *) value, 0, 0); } +FORCE_INLINE uint64 atomic_get_acquire(volatile uint64* value) { return (uint64) InterlockedCompareExchangeAcquire64((volatile LONG64 *) value, 0, 0); } +FORCE_INLINE void atomic_increment_acquire(volatile uint8* value) { InterlockedExchangeAdd8((volatile char *) value, 1); } +FORCE_INLINE void atomic_decrement_acquire(volatile uint8* value) { InterlockedExchangeAdd8((volatile char *) value, -1); } +FORCE_INLINE void atomic_increment_acquire(volatile uint16* value) { InterlockedIncrementAcquire16((volatile short *) value); } +FORCE_INLINE void atomic_decrement_acquire(volatile uint16* value) { InterlockedDecrementAcquire16((volatile short *) value); } +FORCE_INLINE void atomic_increment_acquire(volatile uint32* value) { InterlockedIncrementAcquire((volatile long *) value); } +FORCE_INLINE void atomic_decrement_acquire(volatile uint32* value) { InterlockedDecrementAcquire((volatile long *) value); } +FORCE_INLINE void atomic_increment_acquire(volatile uint64* value) { InterlockedIncrementAcquire64((volatile LONG64 *) value); } +FORCE_INLINE void atomic_decrement_acquire(volatile uint64* value) { InterlockedDecrementAcquire64((volatile LONG64 *) value); } +FORCE_INLINE void atomic_add_acquire(volatile uint8* value, uint8 increment) { InterlockedExchangeAdd8((volatile char *) value, (char) increment); } +FORCE_INLINE void atomic_sub_acquire(volatile uint8* value, uint8 decrement) { InterlockedExchangeAdd8((volatile char *) value, -((char) decrement)); } +FORCE_INLINE void atomic_add_acquire(volatile uint16* value, uint16 increment) { InterlockedExchangeAdd16((volatile short *) value, (short) increment); } +FORCE_INLINE void atomic_sub_acquire(volatile uint16* value, uint16 decrement) { InterlockedExchangeAdd16((volatile short *) value, -((short) decrement)); } +FORCE_INLINE void atomic_add_acquire(volatile uint32* value, uint32 increment) { InterlockedAddAcquire((volatile long *) value, increment); } +FORCE_INLINE void atomic_sub_acquire(volatile uint32* value, uint32 decrement) { InterlockedAddAcquire((volatile long *) value, -1 * ((int32) decrement)); } +FORCE_INLINE void atomic_add_acquire(volatile uint64* value, uint64 increment) { InterlockedAddAcquire64((volatile LONG64 *) value, (LONG64) increment); } +FORCE_INLINE void atomic_sub_acquire(volatile uint64* value, uint64 decrement) { InterlockedAddAcquire64((volatile LONG64 *) value, -((LONG64) decrement)); } +FORCE_INLINE uint32 atomic_compare_exchange_weak_acquire(volatile uint32* value, uint32* expected, uint32 desired) { return (uint32) InterlockedCompareExchangeAcquire((volatile long *) value, desired, *expected); } +FORCE_INLINE uint64 atomic_compare_exchange_weak_acquire(volatile uint64* value, uint64* expected, uint64 desired) { return (uint64) InterlockedCompareExchangeAcquire64((volatile LONG64 *) value, (LONG64) desired, (LONG64) *expected); } +FORCE_INLINE uint8 atomic_fetch_add_acquire(volatile uint8* value, uint8 operand) { return (uint8) InterlockedExchangeAdd8((volatile char *) value, (char) operand); } +FORCE_INLINE uint8 atomic_fetch_sub_acquire(volatile uint8* value, uint8 operand) { return (uint8) InterlockedExchangeAdd8((volatile char *) value, -((char) operand)); } +FORCE_INLINE uint16 atomic_fetch_add_acquire(volatile uint16* value, uint16 operand) { return (uint16) InterlockedExchangeAdd16((volatile short *) value, (short) operand); } +FORCE_INLINE uint16 atomic_fetch_sub_acquire(volatile uint16* value, uint16 operand) { return (uint16) InterlockedExchangeAdd16((volatile short *) value, -((short) operand)); } +FORCE_INLINE uint32 atomic_fetch_add_acquire(volatile uint32* value, uint32 operand) { return (uint32) InterlockedExchangeAddAcquire((volatile long *) value, operand); } +FORCE_INLINE uint32 atomic_fetch_sub_acquire(volatile uint32* value, uint32 operand) { return (uint32) InterlockedExchangeAddAcquire((volatile unsigned long *) value, -((long) operand)); } +FORCE_INLINE uint64 atomic_fetch_add_acquire(volatile uint64* value, uint64 operand) { return (uint64) InterlockedExchangeAddAcquire64((volatile LONG64 *) value, (LONG64) operand); } +FORCE_INLINE uint64 atomic_fetch_sub_acquire(volatile uint64* value, uint64 operand) { return (uint64) InterlockedExchangeAdd64((volatile LONG64 *) value, -((LONG64) operand)); } +FORCE_INLINE void atomic_and_acquire(volatile uint8* value, uint8 mask) { InterlockedAnd8((volatile char *) value, mask); } +FORCE_INLINE void atomic_and_acquire(volatile int8* value, int8 mask) { InterlockedAnd8((volatile char *) value, mask); } +FORCE_INLINE void atomic_and_acquire(volatile uint16* value, uint16 mask) { InterlockedAnd16((volatile short *) value, mask); } +FORCE_INLINE void atomic_and_acquire(volatile int16* value, int16 mask) { InterlockedAnd16((volatile short *) value, mask); } +FORCE_INLINE void atomic_and_acquire(volatile uint32* value, uint32 mask) { InterlockedAndAcquire((volatile LONG *) value, mask); } +FORCE_INLINE void atomic_and_acquire(volatile int32* value, int32 mask) { InterlockedAndAcquire((volatile LONG *) value, (LONG)mask); } +FORCE_INLINE void atomic_and_acquire(volatile uint64* value, uint64 mask) { InterlockedAnd64Acquire((volatile LONG64 *) value, mask); } +FORCE_INLINE void atomic_and_acquire(volatile int64* value, int64 mask) { InterlockedAnd64Acquire((volatile LONG64 *) value, mask); } +FORCE_INLINE void atomic_or_acquire(volatile uint8* value, uint8 mask) { InterlockedOr8((volatile char *) value, mask); } +FORCE_INLINE void atomic_or_acquire(volatile int8* value, int8 mask) { InterlockedOr8((volatile char *) value, mask); } +FORCE_INLINE void atomic_or_acquire(volatile uint16* value, uint16 mask) { InterlockedOr16((volatile short *) value, mask); } +FORCE_INLINE void atomic_or_acquire(volatile int16* value, int16 mask) { InterlockedOr16((volatile short *) value, mask); } +FORCE_INLINE void atomic_or_acquire(volatile uint32* value, uint32 mask) { InterlockedOrAcquire((volatile LONG *) value, mask); } +FORCE_INLINE void atomic_or_acquire(volatile int32* value, int32 mask) { InterlockedOrAcquire((volatile LONG *) value, (LONG)mask); } +FORCE_INLINE void atomic_or_acquire(volatile uint64* value, uint64 mask) { InterlockedOr64Acquire((volatile LONG64 *) value, mask); } +FORCE_INLINE void atomic_or_acquire(volatile int64* value, int64 mask) { InterlockedOr64Acquire((volatile LONG64 *) value, mask); } -inline void atomic_set_acquire_release(void** target, void* new_pointer) { InterlockedExchangePointer(target, new_pointer); } -inline void* atomic_get_acquire_release(void** target) { return InterlockedCompareExchangePointer(target, NULL, NULL); } -inline void atomic_set_acquire_release(volatile int32* value, int32 new_value) { InterlockedExchange((long *) value, new_value); } -inline void atomic_set_acquire_release(volatile int64* value, int64 new_value) { InterlockedExchange64((LONG64 *) value, (LONG64) new_value); } -inline void atomic_set_acquire_release(volatile f32* value, f32 new_value) { _atomic_32 temp = {.f = new_value}; InterlockedExchange((long *) value, (long) temp.l); } -inline void atomic_set_acquire_release(volatile f64* value, f64 new_value) { _atomic_64 temp = {.f = new_value}; InterlockedExchange64((LONG64 *) value, (LONG64) temp.l); } -inline int32 atomic_fetch_set_acquire_release(volatile int32* value, int32 new_value) { return (int32) InterlockedExchange((long *) value, new_value); } -inline int64 atomic_fetch_set_acquire_release(volatile int64* value, int64 new_value) { return (int64) InterlockedExchange64((LONG64 *) value, (LONG64) new_value); } -inline int32 atomic_get_acquire_release(volatile int32* value) { return (int32) InterlockedCompareExchange((long *) value, 0, 0); } -inline int64 atomic_get_acquire_release(volatile int64* value) { return (int64) InterlockedCompareExchange64((LONG64 *) value, 0, 0); } -inline f32 atomic_get_acquire_release(volatile f32* value) { _atomic_32 temp = {.l = InterlockedCompareExchange((long *) value, 0, 0)}; return temp.f; } -inline f64 atomic_get_acquire_release(volatile f64* value) { _atomic_64 temp = {.l = InterlockedCompareExchange64((LONG64 *) value, 0, 0)}; return temp.f; } -inline void atomic_increment_acquire_release(volatile int32* value) { InterlockedIncrement((long *) value); } -inline void atomic_decrement_acquire_release(volatile int32* value) { InterlockedDecrement((long *) value); } -inline void atomic_increment_acquire_release(volatile int64* value) { InterlockedIncrement64((LONG64 *) value); } -inline void atomic_decrement_acquire_release(volatile int64* value) { InterlockedDecrement64((LONG64 *) value); } -inline void atomic_add_acquire_release(volatile int32* value, int32 increment) { InterlockedAdd((long *) value, increment); } -inline void atomic_sub_acquire_release(volatile int32* value, int32 decrement) { InterlockedAdd((long *) value, -decrement); } -inline void atomic_add_acquire_release(volatile int64* value, int64 increment) { InterlockedAdd64((LONG64 *) value, (LONG64) increment); } -inline void atomic_sub_acquire_release(volatile int64* value, int64 decrement) { InterlockedAdd64((LONG64 *) value, -((LONG64) decrement)); } -inline f32 atomic_compare_exchange_weak_acquire_release(volatile f32* value, f32* expected, f32 desired) { _atomic_32 temp = {.l = InterlockedCompareExchange((long *) value, (long) desired, (long) *expected) }; return temp.f; } -inline f64 atomic_compare_exchange_weak_acquire_release(volatile f64* value, f64* expected, f64 desired) { _atomic_64 temp = {.l = InterlockedCompareExchange64((LONG64 *) value, (LONG64) desired, (LONG64) *expected) }; return temp.f; } -inline int32 atomic_compare_exchange_weak_acquire_release(volatile int32* value, int32* expected, int32 desired) { return (int32) InterlockedCompareExchange((long *) value, desired, *expected); } -inline int64 atomic_compare_exchange_weak_acquire_release(volatile int64* value, int64* expected, int64 desired) { return (int64) InterlockedCompareExchange64((LONG64 *) value, (LONG64) desired, (LONG64) *expected); } -inline int32 atomic_fetch_add_acquire_release(volatile int32* value, int32 operand) { return (int32) InterlockedExchangeAdd((long *) value, operand); } -inline int32 atomic_fetch_sub_acquire_release(volatile int32* value, int32 operand) { return (int32) InterlockedExchangeAdd((unsigned long *) value, -((long) operand)); } -inline int64 atomic_fetch_add_acquire_release(volatile int64* value, int64 operand) { return (int64) InterlockedExchangeAdd64((LONG64 *) value, (LONG64) operand); } -inline int64 atomic_fetch_sub_acquire_release(volatile int64* value, int64 operand) { return (int64) InterlockedExchangeAdd64((LONG64 *) value, -((LONG64) operand)); } -inline void atomic_set_acquire_release(volatile uint32* value, uint32 new_value) { InterlockedExchange((long *) value, new_value); } -inline void atomic_set_acquire_release(volatile uint64* value, uint64 new_value) { InterlockedExchange64((LONG64 *) value, (LONG64) new_value); } -inline uint32 atomic_fetch_set_acquire_release(volatile uint32* value, uint32 new_value) { return (uint32) InterlockedExchange((long *) value, new_value); } -inline uint64 atomic_fetch_set_acquire_release(volatile uint64* value, uint64 new_value) { return (uint64) InterlockedExchange64((LONG64 *) value, (LONG64) new_value); } -inline uint32 atomic_get_acquire_release(volatile uint32* value) { return (uint32) InterlockedCompareExchange((long *) value, 0, 0); } -inline uint64 atomic_get_acquire_release(volatile uint64* value) { return (uint64) InterlockedCompareExchange64((LONG64 *) value, 0, 0); } -inline void atomic_increment_acquire_release(volatile uint32* value) { InterlockedIncrement((long *) value); } -inline void atomic_decrement_acquire_release(volatile uint32* value) { InterlockedDecrement((long *) value); } -inline void atomic_increment_acquire_release(volatile uint64* value) { InterlockedIncrement64((LONG64 *) value); } -inline void atomic_decrement_acquire_release(volatile uint64* value) { InterlockedDecrement64((LONG64 *) value); } -inline void atomic_add_acquire_release(volatile uint32* value, uint32 increment) { InterlockedAdd((long *) value, increment); } -inline void atomic_sub_acquire_release(volatile uint32* value, uint32 decrement) { InterlockedAdd((long *) value, -1 * ((int32) decrement)); } -inline void atomic_add_acquire_release(volatile uint64* value, uint64 increment) { InterlockedAdd64((LONG64 *) value, (LONG64) increment); } -inline void atomic_sub_acquire_release(volatile uint64* value, uint64 decrement) { InterlockedAdd64((LONG64 *) value, -((LONG64) decrement)); } -inline uint32 atomic_compare_exchange_weak_acquire_release(volatile uint32* value, uint32* expected, uint32 desired) { return (uint32) InterlockedCompareExchange((long *) value, desired, *expected); } -inline uint64 atomic_compare_exchange_weak_acquire_release(volatile uint64* value, uint64* expected, uint64 desired) { return (uint64) InterlockedCompareExchange64((LONG64 *) value, (LONG64) desired, (LONG64) *expected); } -inline uint32 atomic_fetch_add_acquire_release(volatile uint32* value, uint32 operand) { return (uint32) InterlockedExchangeAdd((long *) value, operand); } -inline uint32 atomic_fetch_sub_acquire_release(volatile uint32* value, uint32 operand) { return (uint32) InterlockedExchangeAdd((unsigned long *) value, -((long) operand)); } -inline uint64 atomic_fetch_add_acquire_release(volatile uint64* value, uint64 operand) { return (uint64) InterlockedExchangeAdd64((LONG64 *) value, (LONG64) operand); } -inline uint64 atomic_fetch_sub_acquire_release(volatile uint64* value, uint64 operand) { return (uint64) InterlockedExchangeAdd64((LONG64 *) value, -((LONG64) operand)); } -inline void atomic_and_acquire_release(volatile uint32* value, uint32 mask) { InterlockedAnd((volatile LONG *) value, mask); } -inline void atomic_and_acquire_release(volatile int32* value, int32 mask) { InterlockedAnd((volatile LONG *) value, (LONG)mask); } -inline void atomic_and_acquire_release(volatile uint64* value, uint64 mask) { InterlockedAnd64((volatile LONG64 *) value, mask); } -inline void atomic_and_acquire_release(volatile int64* value, int64 mask) { InterlockedAnd64((volatile LONG64 *) value, mask); } -inline void atomic_or_acquire_release(volatile uint32* value, uint32 mask) { InterlockedOr((volatile LONG *) value, mask); } -inline void atomic_or_acquire_release(volatile int32* value, int32 mask) { InterlockedOr((volatile LONG *) value, (LONG)mask); } -inline void atomic_or_acquire_release(volatile uint64* value, uint64 mask) { InterlockedOr64((volatile LONG64 *) value, mask); } -inline void atomic_or_acquire_release(volatile int64* value, int64 mask) { InterlockedOr64((volatile LONG64 *) value, mask); } +FORCE_INLINE void atomic_set_release(void** target, void* new_pointer) { InterlockedExchangePointer(target, new_pointer); } +FORCE_INLINE void* atomic_get_release(void** target) { return InterlockedCompareExchangePointerRelease(target, NULL, NULL); } +FORCE_INLINE void atomic_set_release(volatile int8* value, int8 new_value) { InterlockedExchange8((volatile char *) value, new_value); } +FORCE_INLINE void atomic_set_release(volatile int16* value, int16 new_value) { InterlockedExchange16((volatile short *) value, new_value); } +FORCE_INLINE void atomic_set_release(volatile int32* value, int32 new_value) { InterlockedExchange((volatile long *) value, new_value); } +FORCE_INLINE void atomic_set_release(volatile int64* value, int64 new_value) { InterlockedExchange64((volatile LONG64 *) value, (LONG64) new_value); } +FORCE_INLINE void atomic_set_release(volatile f32* value, f32 new_value) { _atomic_32 temp = {.f = new_value}; InterlockedExchange((volatile long *) value, (long) temp.l); } +FORCE_INLINE void atomic_set_release(volatile f64* value, f64 new_value) { _atomic_64 temp = {.f = new_value}; InterlockedExchange64((volatile LONG64 *) value, (LONG64) temp.l); } +FORCE_INLINE int8 atomic_fetch_set_release(volatile int8* value, int8 new_value) { return (int8) InterlockedExchange8((volatile char *) value, (char) new_value); } +FORCE_INLINE int16 atomic_fetch_set_release(volatile int16* value, int16 new_value) { return (int16) InterlockedExchange16((volatile short *) value, (short) new_value); } +FORCE_INLINE int32 atomic_fetch_set_release(volatile int32* value, int32 new_value) { return (int32) InterlockedExchange((volatile long *) value, new_value); } +FORCE_INLINE int64 atomic_fetch_set_release(volatile int64* value, int64 new_value) { return (int64) InterlockedExchange64((volatile LONG64 *) value, (LONG64) new_value); } +FORCE_INLINE int8 atomic_get_release(volatile int8* value) { return (int8) _InterlockedCompareExchange8((volatile char *) value, 0, 0); } +FORCE_INLINE int16 atomic_get_release(volatile int16* value) { return (int16) InterlockedCompareExchangeRelease16((volatile short *) value, 0, 0); } +FORCE_INLINE int32 atomic_get_release(volatile int32* value) { return (int32) InterlockedCompareExchangeRelease((volatile long *) value, 0, 0); } +FORCE_INLINE int64 atomic_get_release(volatile int64* value) { return (int64) InterlockedCompareExchangeRelease64((volatile LONG64 *) value, 0, 0); } +FORCE_INLINE f32 atomic_get_release(volatile f32* value) { _atomic_32 temp = {.l = InterlockedCompareExchangeRelease((volatile long *) value, 0, 0)}; return temp.f; } +FORCE_INLINE f64 atomic_get_release(volatile f64* value) { _atomic_64 temp = {.l = InterlockedCompareExchangeRelease64((volatile LONG64 *) value, 0, 0)}; return temp.f; } +FORCE_INLINE void atomic_increment_release(volatile int8* value) { InterlockedExchangeAdd8((volatile char *) value, 1); } +FORCE_INLINE void atomic_decrement_release(volatile int8* value) { InterlockedExchangeAdd8((volatile char *) value, -1); } +FORCE_INLINE void atomic_increment_release(volatile int16* value) { InterlockedIncrementRelease16((volatile short *) value); } +FORCE_INLINE void atomic_decrement_release(volatile int16* value) { InterlockedDecrementRelease16((volatile short *) value); } +FORCE_INLINE void atomic_increment_release(volatile int32* value) { InterlockedIncrementRelease((volatile long *) value); } +FORCE_INLINE void atomic_decrement_release(volatile int32* value) { InterlockedDecrementRelease((volatile long *) value); } +FORCE_INLINE void atomic_increment_release(volatile int64* value) { InterlockedIncrementRelease64((volatile LONG64 *) value); } +FORCE_INLINE void atomic_decrement_release(volatile int64* value) { InterlockedDecrementRelease64((volatile LONG64 *) value); } +FORCE_INLINE void atomic_add_release(volatile int8* value, int8 increment) { InterlockedExchangeAdd8((volatile char *) value, (char) increment); } +FORCE_INLINE void atomic_sub_release(volatile int8* value, int8 decrement) { InterlockedExchangeAdd8((volatile char *) value, -((char) decrement)); } +FORCE_INLINE void atomic_add_release(volatile int16* value, int16 increment) { InterlockedExchangeAdd16((volatile short *) value, (short) increment); } +FORCE_INLINE void atomic_sub_release(volatile int16* value, int16 decrement) { InterlockedExchangeAdd16((volatile short *) value, -((short) decrement)); } +FORCE_INLINE void atomic_add_release(volatile int32* value, int32 increment) { InterlockedAddRelease((volatile long *) value, increment); } +FORCE_INLINE void atomic_sub_release(volatile int32* value, int32 decrement) { InterlockedAddRelease((volatile long *) value, -decrement); } +FORCE_INLINE void atomic_add_release(volatile int64* value, int64 increment) { InterlockedAddRelease64((volatile LONG64 *) value, (LONG64) increment); } +FORCE_INLINE void atomic_sub_release(volatile int64* value, int64 decrement) { InterlockedAddRelease64((volatile LONG64 *) value, -((LONG64) decrement)); } +FORCE_INLINE f32 atomic_compare_exchange_weak_release(volatile f32* value, f32* expected, f32 desired) { _atomic_32 temp = {.l = InterlockedCompareExchangeRelease((volatile long *) value, (long) desired, (long) *expected) }; return temp.f; } +FORCE_INLINE f64 atomic_compare_exchange_weak_release(volatile f64* value, f64* expected, f64 desired) { _atomic_64 temp = {.l = InterlockedCompareExchangeRelease64((volatile LONG64 *) value, (LONG64) desired, (LONG64) *expected) }; return temp.f; } +FORCE_INLINE int32 atomic_compare_exchange_weak_release(volatile int32* value, int32* expected, int32 desired) { return (int32) InterlockedCompareExchangeRelease((volatile long *) value, desired, *expected); } +FORCE_INLINE int64 atomic_compare_exchange_weak_release(volatile int64* value, int64* expected, int64 desired) { return (int64) InterlockedCompareExchangeRelease64((volatile LONG64 *) value, (LONG64) desired, (LONG64) *expected); } +FORCE_INLINE int8 atomic_fetch_add_release(volatile int8* value, int8 operand) { return (int8) InterlockedExchangeAdd8((volatile char *) value, (char) operand); } +FORCE_INLINE int8 atomic_fetch_sub_release(volatile int8* value, int8 operand) { return (int8) InterlockedExchangeAdd8((volatile char *) value, -((char) operand)); } +FORCE_INLINE int16 atomic_fetch_add_release(volatile int16* value, int16 operand) { return (int16) InterlockedExchangeAdd16((volatile short *) value, (short) operand); } +FORCE_INLINE int16 atomic_fetch_sub_release(volatile int16* value, int16 operand) { return (int16) InterlockedExchangeAdd16((volatile short *) value, -((short) operand)); } +FORCE_INLINE int32 atomic_fetch_add_release(volatile int32* value, int32 operand) { return (int32) InterlockedExchangeAddRelease((volatile long *) value, operand); } +FORCE_INLINE int32 atomic_fetch_sub_release(volatile int32* value, int32 operand) { return (int32) InterlockedExchangeAddRelease((volatile unsigned long *) value, -((long) operand)); } +FORCE_INLINE int64 atomic_fetch_add_release(volatile int64* value, int64 operand) { return (int64) InterlockedExchangeAddRelease64((volatile LONG64 *) value, (LONG64) operand); } +FORCE_INLINE int64 atomic_fetch_sub_release(volatile int64* value, int64 operand) { return (int64) InterlockedExchangeAdd64((volatile LONG64 *) value, -((LONG64) operand)); } +FORCE_INLINE void atomic_set_release(volatile uint8* value, uint8 new_value) { InterlockedExchange8((volatile char *) value, (char) new_value); } +FORCE_INLINE void atomic_set_release(volatile uint16* value, uint16 new_value) { InterlockedExchange16((volatile short *) value, (short) new_value); } +FORCE_INLINE void atomic_set_release(volatile uint32* value, uint32 new_value) { InterlockedExchange((volatile long *) value, new_value); } +FORCE_INLINE void atomic_set_release(volatile uint64* value, uint64 new_value) { InterlockedExchange64((volatile LONG64 *) value, (LONG64) new_value); } +FORCE_INLINE uint8 atomic_fetch_set_release(volatile uint8* value, uint8 new_value) { return (uint8) InterlockedExchange8((volatile char *) value, (char) new_value); } +FORCE_INLINE uint16 atomic_fetch_set_release(volatile uint16* value, uint16 new_value) { return (uint16) InterlockedExchange16((volatile short *) value, (short) new_value); } +FORCE_INLINE uint32 atomic_fetch_set_release(volatile uint32* value, uint32 new_value) { return (uint32) InterlockedExchange((volatile long *) value, new_value); } +FORCE_INLINE uint64 atomic_fetch_set_release(volatile uint64* value, uint64 new_value) { return (uint64) InterlockedExchange64((volatile LONG64 *) value, (LONG64) new_value); } +FORCE_INLINE uint8 atomic_get_release(volatile uint8* value) { return (uint8) _InterlockedCompareExchange8((volatile char *) value, 0, 0); } +FORCE_INLINE uint16 atomic_get_release(volatile uint16* value) { return (uint16) InterlockedCompareExchangeRelease16((volatile short *) value, 0, 0); } +FORCE_INLINE uint32 atomic_get_release(volatile uint32* value) { return (uint32) InterlockedCompareExchangeRelease((volatile long *) value, 0, 0); } +FORCE_INLINE uint64 atomic_get_release(volatile uint64* value) { return (uint64) InterlockedCompareExchangeRelease64((volatile LONG64 *) value, 0, 0); } +FORCE_INLINE void atomic_increment_release(volatile uint8* value) { InterlockedExchangeAdd8((volatile char *) value, 1); } +FORCE_INLINE void atomic_decrement_release(volatile uint8* value) { InterlockedExchangeAdd8((volatile char *) value, -1); } +FORCE_INLINE void atomic_increment_release(volatile uint16* value) { InterlockedIncrementRelease16((volatile short *) value); } +FORCE_INLINE void atomic_decrement_release(volatile uint16* value) { InterlockedDecrementRelease16((volatile short *) value); } +FORCE_INLINE void atomic_increment_release(volatile uint32* value) { InterlockedIncrementRelease((volatile long *) value); } +FORCE_INLINE void atomic_decrement_release(volatile uint32* value) { InterlockedDecrementRelease((volatile long *) value); } +FORCE_INLINE void atomic_increment_release(volatile uint64* value) { InterlockedIncrementRelease64((volatile LONG64 *) value); } +FORCE_INLINE void atomic_decrement_release(volatile uint64* value) { InterlockedDecrementRelease64((volatile LONG64 *) value); } +FORCE_INLINE void atomic_add_release(volatile uint8* value, uint8 increment) { InterlockedExchangeAdd8((volatile char *) value, (char) increment); } +FORCE_INLINE void atomic_sub_release(volatile uint8* value, uint8 decrement) { InterlockedExchangeAdd8((volatile char *) value, -((char) decrement)); } +FORCE_INLINE void atomic_add_release(volatile uint16* value, uint16 increment) { InterlockedExchangeAdd16((volatile short *) value, (short) increment); } +FORCE_INLINE void atomic_sub_release(volatile uint16* value, uint16 decrement) { InterlockedExchangeAdd16((volatile short *) value, -((short) decrement)); } +FORCE_INLINE void atomic_add_release(volatile uint32* value, uint32 increment) { InterlockedAddRelease((volatile long *) value, increment); } +FORCE_INLINE void atomic_sub_release(volatile uint32* value, uint32 decrement) { InterlockedAddRelease((volatile long *) value, -1 * ((int32) decrement)); } +FORCE_INLINE void atomic_add_release(volatile uint64* value, uint64 increment) { InterlockedAddRelease64((volatile LONG64 *) value, (LONG64) increment); } +FORCE_INLINE void atomic_sub_release(volatile uint64* value, uint64 decrement) { InterlockedAddRelease64((volatile LONG64 *) value, -((LONG64) decrement)); } +FORCE_INLINE uint32 atomic_compare_exchange_weak_release(volatile uint32* value, uint32* expected, uint32 desired) { return (uint32) InterlockedCompareExchangeRelease((volatile long *) value, desired, *expected); } +FORCE_INLINE uint64 atomic_compare_exchange_weak_release(volatile uint64* value, uint64* expected, uint64 desired) { return (uint64) InterlockedCompareExchangeRelease64((volatile LONG64 *) value, (LONG64) desired, (LONG64) *expected); } +FORCE_INLINE uint8 atomic_fetch_add_release(volatile uint8* value, uint8 operand) { return (uint8) InterlockedExchangeAdd8((volatile char *) value, (char) operand); } +FORCE_INLINE uint8 atomic_fetch_sub_release(volatile uint8* value, uint8 operand) { return (uint8) InterlockedExchangeAdd8((volatile char *) value, -((char) operand)); } +FORCE_INLINE uint16 atomic_fetch_add_release(volatile uint16* value, uint16 operand) { return (uint16) InterlockedExchangeAdd16((volatile short *) value, (short) operand); } +FORCE_INLINE uint16 atomic_fetch_sub_release(volatile uint16* value, uint16 operand) { return (uint16) InterlockedExchangeAdd16((volatile short *) value, -((short) operand)); } +FORCE_INLINE uint32 atomic_fetch_add_release(volatile uint32* value, uint32 operand) { return (uint32) InterlockedExchangeAddRelease((volatile long *) value, operand); } +FORCE_INLINE uint32 atomic_fetch_sub_release(volatile uint32* value, uint32 operand) { return (uint32) InterlockedExchangeAddRelease((volatile unsigned long *) value, -((long) operand)); } +FORCE_INLINE uint64 atomic_fetch_add_release(volatile uint64* value, uint64 operand) { return (uint64) InterlockedExchangeAddRelease64((volatile LONG64 *) value, (LONG64) operand); } +FORCE_INLINE uint64 atomic_fetch_sub_release(volatile uint64* value, uint64 operand) { return (uint64) InterlockedExchangeAdd64((volatile LONG64 *) value, -((LONG64) operand)); } +FORCE_INLINE void atomic_and_release(volatile uint8* value, uint8 mask) { InterlockedAnd8((volatile char *) value, mask); } +FORCE_INLINE void atomic_and_release(volatile int8* value, int8 mask) { InterlockedAnd8((volatile char *) value, mask); } +FORCE_INLINE void atomic_and_release(volatile uint16* value, uint16 mask) { InterlockedAnd16((volatile short *) value, mask); } +FORCE_INLINE void atomic_and_release(volatile int16* value, int16 mask) { InterlockedAnd16((volatile short *) value, mask); } +FORCE_INLINE void atomic_and_release(volatile uint32* value, uint32 mask) { InterlockedAndRelease((volatile LONG *) value, mask); } +FORCE_INLINE void atomic_and_release(volatile int32* value, int32 mask) { InterlockedAndRelease((volatile LONG *) value, (LONG)mask); } +FORCE_INLINE void atomic_and_release(volatile uint64* value, uint64 mask) { InterlockedAnd64Release((volatile LONG64 *) value, mask); } +FORCE_INLINE void atomic_and_release(volatile int64* value, int64 mask) { InterlockedAnd64Release((volatile LONG64 *) value, mask); } +FORCE_INLINE void atomic_or_release(volatile uint8* value, uint8 mask) { InterlockedOr8((volatile char *) value, mask); } +FORCE_INLINE void atomic_or_release(volatile int8* value, int8 mask) { InterlockedOr8((volatile char *) value, mask); } +FORCE_INLINE void atomic_or_release(volatile uint16* value, uint16 mask) { InterlockedOr16((volatile short *) value, mask); } +FORCE_INLINE void atomic_or_release(volatile int16* value, int16 mask) { InterlockedOr16((volatile short *) value, mask); } +FORCE_INLINE void atomic_or_release(volatile uint32* value, uint32 mask) { InterlockedOrRelease((volatile LONG *) value, mask); } +FORCE_INLINE void atomic_or_release(volatile int32* value, int32 mask) { InterlockedOrRelease((volatile LONG *) value, (LONG)mask); } +FORCE_INLINE void atomic_or_release(volatile uint64* value, uint64 mask) { InterlockedOr64Release((volatile LONG64 *) value, mask); } +FORCE_INLINE void atomic_or_release(volatile int64* value, int64 mask) { InterlockedOr64Release((volatile LONG64 *) value, mask); } + +FORCE_INLINE void atomic_set_acquire_release(void** target, void* new_pointer) { InterlockedExchangePointer(target, new_pointer); } +FORCE_INLINE void* atomic_get_acquire_release(void** target) { return InterlockedCompareExchangePointer(target, NULL, NULL); } +FORCE_INLINE void atomic_set_acquire_release(volatile int8* value, int8 new_value) { InterlockedExchange8((volatile char *) value, new_value); } +FORCE_INLINE void atomic_set_acquire_release(volatile int16* value, int16 new_value) { InterlockedExchange16((volatile short *) value, new_value); } +FORCE_INLINE void atomic_set_acquire_release(volatile int32* value, int32 new_value) { InterlockedExchange((volatile long *) value, new_value); } +FORCE_INLINE void atomic_set_acquire_release(volatile int64* value, int64 new_value) { InterlockedExchange64((volatile LONG64 *) value, (LONG64) new_value); } +FORCE_INLINE void atomic_set_acquire_release(volatile f32* value, f32 new_value) { _atomic_32 temp = {.f = new_value}; InterlockedExchange((volatile long *) value, (long) temp.l); } +FORCE_INLINE void atomic_set_acquire_release(volatile f64* value, f64 new_value) { _atomic_64 temp = {.f = new_value}; InterlockedExchange64((volatile LONG64 *) value, (LONG64) temp.l); } +FORCE_INLINE int8 atomic_fetch_set_acquire_release(volatile int8* value, int8 new_value) { return (int8) InterlockedExchange8((volatile char *) value, (char) new_value); } +FORCE_INLINE int16 atomic_fetch_set_acquire_release(volatile int16* value, int16 new_value) { return (int16) InterlockedExchange16((volatile short *) value, (short) new_value); } +FORCE_INLINE int32 atomic_fetch_set_acquire_release(volatile int32* value, int32 new_value) { return (int32) InterlockedExchange((volatile long *) value, new_value); } +FORCE_INLINE int64 atomic_fetch_set_acquire_release(volatile int64* value, int64 new_value) { return (int64) InterlockedExchange64((volatile LONG64 *) value, (LONG64) new_value); } +FORCE_INLINE int8 atomic_get_acquire_release(volatile int8* value) { return (int8) _InterlockedCompareExchange8((volatile char *) value, 0, 0); } +FORCE_INLINE int16 atomic_get_acquire_release(volatile int16* value) { return (int16) InterlockedCompareExchange16((volatile short *) value, 0, 0); } +FORCE_INLINE int32 atomic_get_acquire_release(volatile int32* value) { return (int32) InterlockedCompareExchange((volatile long *) value, 0, 0); } +FORCE_INLINE int64 atomic_get_acquire_release(volatile int64* value) { return (int64) InterlockedCompareExchange64((volatile LONG64 *) value, 0, 0); } +FORCE_INLINE f32 atomic_get_acquire_release(volatile f32* value) { _atomic_32 temp = {.l = InterlockedCompareExchange((volatile long *) value, 0, 0)}; return temp.f; } +FORCE_INLINE f64 atomic_get_acquire_release(volatile f64* value) { _atomic_64 temp = {.l = InterlockedCompareExchange64((volatile LONG64 *) value, 0, 0)}; return temp.f; } +FORCE_INLINE void atomic_increment_acquire_release(volatile int8* value) { InterlockedExchangeAdd8((volatile char *) value, 1); } +FORCE_INLINE void atomic_decrement_acquire_release(volatile int8* value) { InterlockedExchangeAdd8((volatile char *) value, -1); } +FORCE_INLINE void atomic_increment_acquire_release(volatile int16* value) { InterlockedIncrement16((volatile short *) value); } +FORCE_INLINE void atomic_decrement_acquire_release(volatile int16* value) { InterlockedDecrement16((volatile short *) value); } +FORCE_INLINE void atomic_increment_acquire_release(volatile int32* value) { InterlockedIncrement((volatile long *) value); } +FORCE_INLINE void atomic_decrement_acquire_release(volatile int32* value) { InterlockedDecrement((volatile long *) value); } +FORCE_INLINE void atomic_increment_acquire_release(volatile int64* value) { InterlockedIncrement64((volatile LONG64 *) value); } +FORCE_INLINE void atomic_decrement_acquire_release(volatile int64* value) { InterlockedDecrement64((volatile LONG64 *) value); } +FORCE_INLINE void atomic_add_acquire_release(volatile int8* value, int8 increment) { InterlockedExchangeAdd8((volatile char *) value, (char) increment); } +FORCE_INLINE void atomic_sub_acquire_release(volatile int8* value, int8 decrement) { InterlockedExchangeAdd8((volatile char *) value, -((char) decrement)); } +FORCE_INLINE void atomic_add_acquire_release(volatile int16* value, int16 increment) { InterlockedExchangeAdd16((volatile short *) value, (short) increment); } +FORCE_INLINE void atomic_sub_acquire_release(volatile int16* value, int16 decrement) { InterlockedExchangeAdd16((volatile short *) value, -((short) decrement)); } +FORCE_INLINE void atomic_add_acquire_release(volatile int32* value, int32 increment) { InterlockedAdd((volatile long *) value, increment); } +FORCE_INLINE void atomic_sub_acquire_release(volatile int32* value, int32 decrement) { InterlockedAdd((volatile long *) value, -decrement); } +FORCE_INLINE void atomic_add_acquire_release(volatile int64* value, int64 increment) { InterlockedAdd64((volatile LONG64 *) value, (LONG64) increment); } +FORCE_INLINE void atomic_sub_acquire_release(volatile int64* value, int64 decrement) { InterlockedAdd64((volatile LONG64 *) value, -((LONG64) decrement)); } +FORCE_INLINE f32 atomic_compare_exchange_weak_acquire_release(volatile f32* value, f32* expected, f32 desired) { _atomic_32 temp = {.l = InterlockedCompareExchange((volatile long *) value, (long) desired, (long) *expected) }; return temp.f; } +FORCE_INLINE f64 atomic_compare_exchange_weak_acquire_release(volatile f64* value, f64* expected, f64 desired) { _atomic_64 temp = {.l = InterlockedCompareExchange64((volatile LONG64 *) value, (LONG64) desired, (LONG64) *expected) }; return temp.f; } +FORCE_INLINE int32 atomic_compare_exchange_weak_acquire_release(volatile int32* value, int32* expected, int32 desired) { return (int32) InterlockedCompareExchange((volatile long *) value, desired, *expected); } +FORCE_INLINE int64 atomic_compare_exchange_weak_acquire_release(volatile int64* value, int64* expected, int64 desired) { return (int64) InterlockedCompareExchange64((volatile LONG64 *) value, (LONG64) desired, (LONG64) *expected); } +FORCE_INLINE int8 atomic_fetch_add_acquire_release(volatile int8* value, int8 operand) { return (int8) InterlockedExchangeAdd8((volatile char *) value, (char) operand); } +FORCE_INLINE int8 atomic_fetch_sub_acquire_release(volatile int8* value, int8 operand) { return (int8) InterlockedExchangeAdd8((volatile char *) value, -((char) operand)); } +FORCE_INLINE int16 atomic_fetch_add_acquire_release(volatile int16* value, int16 operand) { return (int16) InterlockedExchangeAdd16((volatile short *) value, (short) operand); } +FORCE_INLINE int16 atomic_fetch_sub_acquire_release(volatile int16* value, int16 operand) { return (int16) InterlockedExchangeAdd16((volatile short *) value, -((short) operand)); } +FORCE_INLINE int32 atomic_fetch_add_acquire_release(volatile int32* value, int32 operand) { return (int32) InterlockedExchangeAdd((volatile long *) value, operand); } +FORCE_INLINE int32 atomic_fetch_sub_acquire_release(volatile int32* value, int32 operand) { return (int32) InterlockedExchangeAdd((volatile unsigned long *) value, -((long) operand)); } +FORCE_INLINE int64 atomic_fetch_add_acquire_release(volatile int64* value, int64 operand) { return (int64) InterlockedExchangeAdd64((volatile LONG64 *) value, (LONG64) operand); } +FORCE_INLINE int64 atomic_fetch_sub_acquire_release(volatile int64* value, int64 operand) { return (int64) InterlockedExchangeAdd64((volatile LONG64 *) value, -((LONG64) operand)); } +FORCE_INLINE void atomic_set_acquire_release(volatile uint8* value, uint8 new_value) { InterlockedExchange8((volatile char *) value, (char) new_value); } +FORCE_INLINE void atomic_set_acquire_release(volatile uint16* value, uint16 new_value) { InterlockedExchange16((volatile short *) value, (short) new_value); } +FORCE_INLINE void atomic_set_acquire_release(volatile uint32* value, uint32 new_value) { InterlockedExchange((volatile long *) value, new_value); } +FORCE_INLINE void atomic_set_acquire_release(volatile uint64* value, uint64 new_value) { InterlockedExchange64((volatile LONG64 *) value, (LONG64) new_value); } +FORCE_INLINE uint8 atomic_fetch_set_acquire_release(volatile uint8* value, uint8 new_value) { return (uint8) InterlockedExchange8((volatile char *) value, (char) new_value); } +FORCE_INLINE uint16 atomic_fetch_set_acquire_release(volatile uint16* value, uint16 new_value) { return (uint16) InterlockedExchange16((volatile short *) value, (short) new_value); } +FORCE_INLINE uint32 atomic_fetch_set_acquire_release(volatile uint32* value, uint32 new_value) { return (uint32) InterlockedExchange((volatile long *) value, new_value); } +FORCE_INLINE uint64 atomic_fetch_set_acquire_release(volatile uint64* value, uint64 new_value) { return (uint64) InterlockedExchange64((volatile LONG64 *) value, (LONG64) new_value); } +FORCE_INLINE uint8 atomic_get_acquire_release(volatile uint8* value) { return (uint8) _InterlockedCompareExchange8((volatile char *) value, 0, 0); } +FORCE_INLINE uint16 atomic_get_acquire_release(volatile uint16* value) { return (uint16) InterlockedCompareExchange16((volatile short *) value, 0, 0); } +FORCE_INLINE uint32 atomic_get_acquire_release(volatile uint32* value) { return (uint32) InterlockedCompareExchange((volatile long *) value, 0, 0); } +FORCE_INLINE uint64 atomic_get_acquire_release(volatile uint64* value) { return (uint64) InterlockedCompareExchange64((volatile LONG64 *) value, 0, 0); } +FORCE_INLINE void atomic_increment_acquire_release(volatile uint8* value) { InterlockedExchangeAdd8((volatile char *) value, 1); } +FORCE_INLINE void atomic_decrement_acquire_release(volatile uint8* value) { InterlockedExchangeAdd8((volatile char *) value, -1); } +FORCE_INLINE void atomic_increment_acquire_release(volatile uint16* value) { InterlockedIncrement16((volatile short *) value); } +FORCE_INLINE void atomic_decrement_acquire_release(volatile uint16* value) { InterlockedDecrement16((volatile short *) value); } +FORCE_INLINE void atomic_increment_acquire_release(volatile uint32* value) { InterlockedIncrement((volatile long *) value); } +FORCE_INLINE void atomic_decrement_acquire_release(volatile uint32* value) { InterlockedDecrement((volatile long *) value); } +FORCE_INLINE void atomic_increment_acquire_release(volatile uint64* value) { InterlockedIncrement64((volatile LONG64 *) value); } +FORCE_INLINE void atomic_decrement_acquire_release(volatile uint64* value) { InterlockedDecrement64((volatile LONG64 *) value); } +FORCE_INLINE void atomic_add_acquire_release(volatile uint8* value, uint8 increment) { InterlockedExchangeAdd8((volatile char *) value, (char) increment); } +FORCE_INLINE void atomic_sub_acquire_release(volatile uint8* value, uint8 decrement) { InterlockedExchangeAdd8((volatile char *) value, -((char) decrement)); } +FORCE_INLINE void atomic_add_acquire_release(volatile uint16* value, uint16 increment) { InterlockedExchangeAdd16((volatile short *) value, (short) increment); } +FORCE_INLINE void atomic_sub_acquire_release(volatile uint16* value, uint16 decrement) { InterlockedExchangeAdd16((volatile short *) value, -((short) decrement)); } +FORCE_INLINE void atomic_add_acquire_release(volatile uint32* value, uint32 increment) { InterlockedAdd((volatile long *) value, increment); } +FORCE_INLINE void atomic_sub_acquire_release(volatile uint32* value, uint32 decrement) { InterlockedAdd((volatile long *) value, -1 * ((int32) decrement)); } +FORCE_INLINE void atomic_add_acquire_release(volatile uint64* value, uint64 increment) { InterlockedAdd64((volatile LONG64 *) value, (LONG64) increment); } +FORCE_INLINE void atomic_sub_acquire_release(volatile uint64* value, uint64 decrement) { InterlockedAdd64((volatile LONG64 *) value, -((LONG64) decrement)); } +FORCE_INLINE uint32 atomic_compare_exchange_weak_acquire_release(volatile uint32* value, uint32* expected, uint32 desired) { return (uint32) InterlockedCompareExchange((volatile long *) value, desired, *expected); } +FORCE_INLINE uint64 atomic_compare_exchange_weak_acquire_release(volatile uint64* value, uint64* expected, uint64 desired) { return (uint64) InterlockedCompareExchange64((volatile LONG64 *) value, (LONG64) desired, (LONG64) *expected); } +FORCE_INLINE uint8 atomic_fetch_add_acquire_release(volatile uint8* value, uint8 operand) { return (uint8) InterlockedExchangeAdd8((volatile char *) value, (char) operand); } +FORCE_INLINE uint8 atomic_fetch_sub_acquire_release(volatile uint8* value, uint8 operand) { return (uint8) InterlockedExchangeAdd8((volatile char *) value, -((char) operand)); } +FORCE_INLINE uint16 atomic_fetch_add_acquire_release(volatile uint16* value, uint16 operand) { return (uint16) InterlockedExchangeAdd16((volatile short *) value, (short) operand); } +FORCE_INLINE uint16 atomic_fetch_sub_acquire_release(volatile uint16* value, uint16 operand) { return (uint16) InterlockedExchangeAdd16((volatile short *) value, -((short) operand)); } +FORCE_INLINE uint32 atomic_fetch_add_acquire_release(volatile uint32* value, uint32 operand) { return (uint32) InterlockedExchangeAdd((volatile long *) value, operand); } +FORCE_INLINE uint32 atomic_fetch_sub_acquire_release(volatile uint32* value, uint32 operand) { return (uint32) InterlockedExchangeAdd((volatile unsigned long *) value, -((long) operand)); } +FORCE_INLINE uint64 atomic_fetch_add_acquire_release(volatile uint64* value, uint64 operand) { return (uint64) InterlockedExchangeAdd64((volatile LONG64 *) value, (LONG64) operand); } +FORCE_INLINE uint64 atomic_fetch_sub_acquire_release(volatile uint64* value, uint64 operand) { return (uint64) InterlockedExchangeAdd64((volatile LONG64 *) value, -((LONG64) operand)); } +FORCE_INLINE void atomic_and_acquire_release(volatile uint8* value, uint8 mask) { InterlockedAnd8((volatile char *) value, mask); } +FORCE_INLINE void atomic_and_acquire_release(volatile int8* value, int8 mask) { InterlockedAnd8((volatile char *) value, mask); } +FORCE_INLINE void atomic_and_acquire_release(volatile uint16* value, uint16 mask) { InterlockedAnd16((volatile short *) value, mask); } +FORCE_INLINE void atomic_and_acquire_release(volatile int16* value, int16 mask) { InterlockedAnd16((volatile short *) value, mask); } +FORCE_INLINE void atomic_and_acquire_release(volatile uint32* value, uint32 mask) { InterlockedAnd((volatile LONG *) value, mask); } +FORCE_INLINE void atomic_and_acquire_release(volatile int32* value, int32 mask) { InterlockedAnd((volatile LONG *) value, (LONG)mask); } +FORCE_INLINE void atomic_and_acquire_release(volatile uint64* value, uint64 mask) { InterlockedAnd64((volatile LONG64 *) value, mask); } +FORCE_INLINE void atomic_and_acquire_release(volatile int64* value, int64 mask) { InterlockedAnd64((volatile LONG64 *) value, mask); } +FORCE_INLINE void atomic_or_acquire_release(volatile uint8* value, uint8 mask) { InterlockedOr8((volatile char *) value, mask); } +FORCE_INLINE void atomic_or_acquire_release(volatile int8* value, int8 mask) { InterlockedOr8((volatile char *) value, mask); } +FORCE_INLINE void atomic_or_acquire_release(volatile uint16* value, uint16 mask) { InterlockedOr16((volatile short *) value, mask); } +FORCE_INLINE void atomic_or_acquire_release(volatile int16* value, int16 mask) { InterlockedOr16((volatile short *) value, mask); } +FORCE_INLINE void atomic_or_acquire_release(volatile uint32* value, uint32 mask) { InterlockedOr((volatile LONG *) value, mask); } +FORCE_INLINE void atomic_or_acquire_release(volatile int32* value, int32 mask) { InterlockedOr((volatile LONG *) value, (LONG)mask); } +FORCE_INLINE void atomic_or_acquire_release(volatile uint64* value, uint64 mask) { InterlockedOr64((volatile LONG64 *) value, mask); } +FORCE_INLINE void atomic_or_acquire_release(volatile int64* value, int64 mask) { InterlockedOr64((volatile LONG64 *) value, mask); } // Check out the intrinsic functions fence_memory and fence_write // These are much faster and could accomplish what you are doing diff --git a/stdlib/Types.h b/stdlib/Types.h index dee30b1..28af015 100644 --- a/stdlib/Types.h +++ b/stdlib/Types.h @@ -42,6 +42,8 @@ typedef uintptr_t umm; typedef intptr_t smm; // @question consider to implement atomic_16 depending on intrinsic support +#define atomic_8 volatile +#define atomic_16 alignas(2) volatile #define atomic_32 alignas(4) volatile #define atomic_64 alignas(8) volatile diff --git a/tests/ui/UILayoutTest.cpp b/tests/ui/UILayoutTest.cpp index 1fdd6de..825f363 100644 --- a/tests/ui/UILayoutTest.cpp +++ b/tests/ui/UILayoutTest.cpp @@ -60,11 +60,8 @@ static void test_layout_from_theme() { theme2.data = (byte *) platform_alloc(2 * MEGABYTE); theme_from_file_txt(&theme2, "./../../GameEditor/assets/themes/default/scene1.themetxt", &ring); - Camera camera = {}; - camera.viewport_width = 1024; - camera.viewport_height = 768; - layout_from_theme(&layout, &theme1, &camera); - layout_from_theme(&layout, &theme2, &camera); + layout_from_theme(&layout, &theme1); + layout_from_theme(&layout, &theme2); UIElement* element = layout_get_element(&layout, "cmd_window"); ASSERT_NOT_EQUALS(element, NULL); diff --git a/ui/UIElement.h b/ui/UIElement.h index 5f82cd9..47ae418 100644 --- a/ui/UIElement.h +++ b/ui/UIElement.h @@ -52,15 +52,17 @@ enum UIElementState : byte { struct UIElement { // @see UIElementState byte state_flag; + + // Used for grouping ui elements (e.g. const ui elements, fairly static elements, dynamic elements) + // Children are still checked even if a parent doesn't match the category (e.g. a static window may still have dynamic content) + byte category; + UIElementType type; // Used to keep track of the current state (= _old) and the next state or state we are transitioning into UIStyleType style_old; UIStyleType style_new; - // Used for grouping ui elements (e.g. const ui elements, fairly static elements, dynamic elements) - byte category; - f32 zindex; // Some elements need information from their parent element @@ -111,15 +113,15 @@ struct UIElement { // Cache ////////////////////////////////////// - // We cache the last UI element rendering for re-use in the next frame - // @question There might be custom UI elements which need more than 2^16 vertices - uint16 vertex_count; - // The max vertex count is defined in the theme file uint16 vertex_count_max; + // We cache the last UI element rendering for re-use in the next frame + // @question There might be custom UI elements which need more than 2^16 vertices + uint16 vertex_count_active; + // Offset into the vertex array (NOT in bytes but in vertices) - uint32 vertices_active; + uint32 vertices_active_offset; }; #endif \ No newline at end of file diff --git a/ui/UIInput.h b/ui/UIInput.h index 1418557..6d39661 100644 --- a/ui/UIInput.h +++ b/ui/UIInput.h @@ -145,35 +145,37 @@ void ui_input_element_unserialize(UIInput* __restrict details, const byte** __re void ui_input_element_populate( UILayout* layout, + UIElement* element, const UIAttributeGroup* __restrict group, - UIInput* __restrict input, - UIElement* parent, - EvaluatorVariable* __restrict variables + UIInput* __restrict input ) { - if (parent) { + v4_f32 parent_dimension = {}; + if (element->parent) { + UIElement* parent = element->parent ? (UIElement *) (layout->data + element->parent) : NULL; // @bug How to ensure that the parent is initialized before the child element // Currently the order of the initialization depends on the theme file, NOT the layout file // We could fix it by loading the style based on the layout order but this would result in many misses when looking up styles // The reason for these misses are, that often only 1-2 style_types exist per element - - v4_f32* parent_dimension; switch (parent->type) { case UI_ELEMENT_TYPE_VIEW_WINDOW: { UIWindow* parent_window = (UIWindow *) (layout->data + parent->style_types[UI_STYLE_TYPE_ACTIVE]); - parent_dimension = &parent_window->dimension.dimension; + parent_dimension = parent_window->dimension.dimension; } break; case UI_ELEMENT_TYPE_VIEW_PANEL: { UIPanel* parent_window = (UIPanel *) (layout->data + parent->style_types[UI_STYLE_TYPE_ACTIVE]); - parent_dimension = &parent_window->dimension.dimension; + parent_dimension = parent_window->dimension.dimension; } break; default: UNREACHABLE(); } + } - variables[2].value = parent_dimension->x; - variables[3].value = parent_dimension->y; - variables[4].value = parent_dimension->width; - variables[5].value = parent_dimension->height; + if (!element->vertices_active_offset && !element->vertex_count_max) { + element->vertices_active_offset = layout->active_vertex_offset; + UIAttribute* vertex_attr = ui_attribute_from_group(group, UI_ATTRIBUTE_TYPE_VERTEX_COUNT); + element->vertex_count_max = (uint16) (vertex_attr ? vertex_attr->value_int : 8); + + layout->active_vertex_offset += element->vertex_count_max; } UIAttribute* attributes = (UIAttribute *) (group + 1); @@ -185,7 +187,7 @@ void ui_input_element_populate( case UI_ATTRIBUTE_TYPE_DIMENSION_WIDTH: case UI_ATTRIBUTE_TYPE_POSITION_Y: case UI_ATTRIBUTE_TYPE_DIMENSION_HEIGHT: { - ui_theme_assign_dimension(&input->dimension, &attributes[i], 6, variables); + ui_theme_assign_dimension(&input->dimension, &attributes[i]); } break; } } @@ -204,7 +206,7 @@ int32 ui_input_element_update(UILayout* layout, UIElement* element) // Border if (input->border.thickness) { idx += vertex_rect_create( - layout->vertices_active + element->vertices_active, zindex, + layout->vertices_active + element->vertices_active_offset, zindex, dimension, input->dimension.alignment, input->border.color ); @@ -222,7 +224,7 @@ int32 ui_input_element_update(UILayout* layout, UIElement* element) // Background if (input->background.background_color) { idx += vertex_rect_create( - layout->vertices_active + element->vertices_active + idx, zindex, + layout->vertices_active + element->vertices_active_offset + idx, zindex, dimension, input->dimension.alignment, input->background.background_color ); @@ -238,13 +240,13 @@ int32 ui_input_element_update(UILayout* layout, UIElement* element) idx += ui_cursor_element_update(layout, cursor_element); memcpy( - layout->vertices_active + element->vertices_active + idx, - layout->vertices_active + cursor_element->vertices_active, - cursor_element->vertex_count * sizeof(*(layout->vertices_active)) + layout->vertices_active + element->vertices_active_offset + idx, + layout->vertices_active + cursor_element->vertices_active_offset, + cursor_element->vertex_count_active * sizeof(*(layout->vertices_active)) ); } - element->vertex_count = (uint16) idx; + element->vertex_count_active = (uint16) idx; return idx; } diff --git a/ui/UILabel.h b/ui/UILabel.h index ba16b2c..dc1143c 100644 --- a/ui/UILabel.h +++ b/ui/UILabel.h @@ -52,43 +52,51 @@ void ui_label_element_unserialize(UILabel* __restrict details, const byte** __re void ui_label_element_populate( UILayout* layout, + UIElement* element, const UIAttributeGroup* __restrict group, - UILabel* __restrict label, - UIElement* parent, - EvaluatorVariable* __restrict variables + UILabel* __restrict label ) { - if (parent) { + v4_f32 parent_dimension = {}; + if (element->parent) { + UIElement* parent = element->parent ? (UIElement *) (layout->data + element->parent) : NULL; // @bug How to ensure that the parent is initialized before the child element // Currently the order of the initialization depends on the theme file, NOT the layout file // We could fix it by loading the style based on the layout order but this would result in many misses when looking up styles // The reason for these misses are, that often only 1-2 style_types exist per element - - v4_f32* parent_dimension; switch (parent->type) { case UI_ELEMENT_TYPE_VIEW_WINDOW: { UIWindow* parent_window = (UIWindow *) (layout->data + parent->style_types[UI_STYLE_TYPE_ACTIVE]); - parent_dimension = &parent_window->dimension.dimension; + parent_dimension = parent_window->dimension.dimension; } break; case UI_ELEMENT_TYPE_VIEW_PANEL: { UIPanel* parent_window = (UIPanel *) (layout->data + parent->style_types[UI_STYLE_TYPE_ACTIVE]); - parent_dimension = &parent_window->dimension.dimension; + parent_dimension = parent_window->dimension.dimension; } break; case UI_ELEMENT_TYPE_BUTTON: { UIButton* parent_button = (UIButton *) (layout->data + parent->style_types[UI_STYLE_TYPE_ACTIVE]); - parent_dimension = &parent_button->dimension.dimension; + parent_dimension = parent_button->dimension.dimension; } break; case UI_ELEMENT_TYPE_INPUT: { UIInput* parent_input = (UIInput *) (layout->data + parent->style_types[UI_STYLE_TYPE_ACTIVE]); - parent_dimension = &parent_input->dimension.dimension; + parent_dimension = parent_input->dimension.dimension; } break; default: UNREACHABLE(); } + } - variables[2].value = parent_dimension->x; - variables[3].value = parent_dimension->y; - variables[4].value = parent_dimension->width; - variables[5].value = parent_dimension->height; + if (!element->vertices_active_offset && !element->vertex_count_max) { + element->vertices_active_offset = layout->active_vertex_offset; + UIAttribute* vertex_attr = ui_attribute_from_group(group, UI_ATTRIBUTE_TYPE_VERTEX_COUNT); + + if (vertex_attr) { + element->vertex_count_max = (uint16) vertex_attr->value_int; + } else { + // @todo Use state to calculate vertex_count_max (2 * max string length) + element->vertex_count_max = 128; + } + + layout->active_vertex_offset += element->vertex_count_max; } UIAttribute* attributes = (UIAttribute *) (group + 1); @@ -100,7 +108,7 @@ void ui_label_element_populate( case UI_ATTRIBUTE_TYPE_DIMENSION_WIDTH: case UI_ATTRIBUTE_TYPE_POSITION_Y: case UI_ATTRIBUTE_TYPE_DIMENSION_HEIGHT: { - ui_theme_assign_dimension(&label->dimension, &attributes[i], 6, variables); + ui_theme_assign_dimension(&label->dimension, &attributes[i]); } break; case UI_ATTRIBUTE_TYPE_FONT_NAME: case UI_ATTRIBUTE_TYPE_FONT_COLOR: @@ -119,7 +127,7 @@ int32 ui_label_element_update(UILayout* layout, UIElement* element) UILabelState* state = (UILabelState *) (layout->data + element->state); return vertex_text_create( - layout->vertices_active + element->vertices_active, element->zindex, + layout->vertices_active + element->vertices_active_offset, element->zindex, label->dimension.dimension, label->font.alignment, layout->font, state->content, label->font.size, label->font.color diff --git a/ui/UILayout.cpp b/ui/UILayout.cpp index d7e3b83..5aefe4e 100644 --- a/ui/UILayout.cpp +++ b/ui/UILayout.cpp @@ -18,6 +18,7 @@ #include "UIWindow.h" // @todo We should add some asserts that ensure that the respective structs at least start at a 4byte memory alignment +// @performance We are prefetching stuff but we are not yet ensuring data is cache line aligned. We should align each UIElements, element specific data // Doesn't change the position of pos outside of the function, since lookahead static @@ -339,11 +340,14 @@ void ui_layout_serialize_element( *((uint32 *) *out) = SWAP_ENDIAN_LITTLE(element->animations); *out += sizeof(element->animations); - *((uint16 *) *out) = SWAP_ENDIAN_LITTLE(element->vertex_count); - *out += sizeof(element->vertex_count); + *((uint16 *) *out) = SWAP_ENDIAN_LITTLE(element->vertex_count_max); + *out += sizeof(element->vertex_count_max); - *((uint32 *) *out) = SWAP_ENDIAN_LITTLE(element->vertices_active); - *out += sizeof(element->vertices_active); + *((uint16 *) *out) = SWAP_ENDIAN_LITTLE(element->vertex_count_active); + *out += sizeof(element->vertex_count_active); + + *((uint32 *) *out) = SWAP_ENDIAN_LITTLE(element->vertices_active_offset); + *out += sizeof(element->vertices_active_offset); // Output dynamic length content directly after UIElement // @@ -491,11 +495,14 @@ void ui_layout_parse_element(HashEntryInt32* entry, byte* data, const byte** in) element->animations = SWAP_ENDIAN_LITTLE(*((uint32 *) *in)); *in += sizeof(element->animations); - element->vertex_count = SWAP_ENDIAN_LITTLE(*((uint16 *) *in)); - *in += sizeof(element->vertex_count); + element->vertex_count_max = SWAP_ENDIAN_LITTLE(*((uint16 *) *in)); + *in += sizeof(element->vertex_count_max); - element->vertices_active = SWAP_ENDIAN_LITTLE(*((uint32 *) *in)); - *in += sizeof(element->vertices_active); + element->vertex_count_active = SWAP_ENDIAN_LITTLE(*((uint16 *) *in)); + *in += sizeof(element->vertex_count_active); + + element->vertices_active_offset = SWAP_ENDIAN_LITTLE(*((uint32 *) *in)); + *in += sizeof(element->vertices_active_offset); // Load dynamic length content // Some of the content belongs directly after the element but some of it belongs at very specific offsets @@ -589,18 +596,8 @@ int32 layout_from_data( void layout_from_theme( UILayout* __restrict layout, - const UIThemeStyle* __restrict theme, - const Camera* __restrict camera + const UIThemeStyle* __restrict theme ) { - EvaluatorVariable variables[] = { - { "vw", (f32) camera->viewport_width }, - { "vh", (f32) camera->viewport_height }, - { "px", 0.0 }, // Placeholder for parent values - { "py", 0.0 }, // Placeholder for parent values - { "pw", 0.0 }, // Placeholder for parent values - { "ph", 0.0 }, // Placeholder for parent values - }; - // @todo Handle animations // @todo Handle vertices_active offset if (theme->font) { @@ -632,7 +629,6 @@ void layout_from_theme( // Populate default element UIElement* element = (UIElement *) (layout->data + entry->value); UIAttributeGroup* group = (UIAttributeGroup *) (theme->data + style_entry->value); - UIElement* parent = element->parent ? (UIElement *) (layout->data + element->parent) : NULL; // @todo Continue implementation switch (element->type) { @@ -640,30 +636,27 @@ void layout_from_theme( ui_label_state_populate(group, (UILabelState *) (layout->data + element->state)); ui_label_element_populate( layout, + element, group, - (UILabel *) (layout->data + element->style_types[UI_STYLE_TYPE_DEFAULT]), - parent, - variables + (UILabel *) (layout->data + element->style_types[UI_STYLE_TYPE_DEFAULT]) ); } break; case UI_ELEMENT_TYPE_INPUT: { ui_input_state_populate(group, (UIInputState *) (layout->data + element->state)); ui_input_element_populate( layout, + element, group, - (UIInput *) (layout->data + element->style_types[UI_STYLE_TYPE_DEFAULT]), - parent, - variables + (UIInput *) (layout->data + element->style_types[UI_STYLE_TYPE_DEFAULT]) ); } break; case UI_ELEMENT_TYPE_VIEW_WINDOW: { ui_window_state_populate(group, (UIWindowState *) (layout->data + element->state)); ui_window_element_populate( layout, + element, group, - (UIWindow *) (layout->data + element->style_types[UI_STYLE_TYPE_DEFAULT]), - parent, - variables + (UIWindow *) (layout->data + element->style_types[UI_STYLE_TYPE_DEFAULT]) ); } break; } @@ -714,35 +707,31 @@ void layout_from_theme( // Populate element style_types UIAttributeGroup* group = (UIAttributeGroup *) (theme->data + style_entry->value); - UIElement* parent = element->parent ? (UIElement *) (layout->data + element->parent) : NULL; // @todo Continue implementation switch (element->type) { case UI_ELEMENT_TYPE_LABEL: { ui_label_element_populate( layout, + element, group, - (UILabel *) (layout->data + element->style_types[style_type]), - parent, - variables + (UILabel *) (layout->data + element->style_types[style_type]) ); } break; case UI_ELEMENT_TYPE_INPUT: { ui_input_element_populate( layout, + element, group, - (UIInput *) (layout->data + element->style_types[style_type]), - parent, - variables + (UIInput *) (layout->data + element->style_types[style_type]) ); } break; case UI_ELEMENT_TYPE_VIEW_WINDOW: { ui_window_element_populate( layout, + element, group, - (UIWindow *) (layout->data + element->style_types[style_type]), - parent, - variables + (UIWindow *) (layout->data + element->style_types[style_type]) ); } break; } @@ -807,12 +796,17 @@ void ui_layout_update(UILayout* layout, UIElement* element) { // This increases our RAM requirements (every vertex is in cache AND in the asset AND in VRAM) // However, this also has the benefit of allowing us to ONLY re-render individual elements +// @performance Profile our prefetching, no sure if it is actually helpful or harmful + // @performance In our immediate mode solution we decided the update/render based on a bitfield // That is very efficient, the code below isn't doing that maybe there is a way to implement that here as well? // I don't think so but it would be nice // This function caches the vertices void ui_layout_update_dfs(UILayout* layout, UIElement* element, byte category = 0) { - if (element->type == UI_ELEMENT_TYPE_MANUAL) { + if (element->type == UI_ELEMENT_TYPE_MANUAL + || !(element->state_flag & UI_ELEMENT_STATE_VISIBLE) + || !(element->state_flag & UI_ELEMENT_STATE_CHANGED) + ) { return; } @@ -820,9 +814,14 @@ void ui_layout_update_dfs(UILayout* layout, UIElement* element, byte category = ui_layout_update(layout, element); } - uint32* children = (uint32 *) (element + 1); - for (int32 i = 0; i < element->children_count; ++i) { - ui_layout_update(layout, (UIElement *) (layout->data + children[i])); + if (element->children_count) { + uint32* children = (uint32 *) (element + 1); + for (int32 i = 0; i < element->children_count - 1; ++i) { + intrin_prefetch_l2(layout->data + children[i + 1]); + ui_layout_update(layout, (UIElement *) (layout->data + children[i])); + } + + ui_layout_update(layout, (UIElement *) (layout->data + children[element->children_count - 1])); } } @@ -831,21 +830,31 @@ uint32 ui_layout_render_dfs( UIElement* element, Vertex3DTextureColor* __restrict vertices, byte category = 0 ) { - if (element->type == UI_ELEMENT_TYPE_MANUAL) { + if (element->type == UI_ELEMENT_TYPE_MANUAL + || !(element->state_flag & UI_ELEMENT_STATE_VISIBLE) + ) { return 0; } uint32 vertex_count = 0; - if (element->vertex_count && element->category == category) { - memcpy(vertices, layout->vertices_active + element->vertices_active, sizeof(*vertices) * element->vertex_count); - vertices += element->vertex_count; - vertex_count += element->vertex_count; + if (element->vertex_count_active && element->category == category) { + memcpy(vertices, layout->vertices_active + element->vertices_active_offset, sizeof(*vertices) * element->vertex_count_active); + vertices += element->vertex_count_active; + vertex_count += element->vertex_count_active; } - uint32* children = (uint32 *) (element + 1); - for (int32 i = 0; i < element->children_count; ++i) { - uint32 child_vertex_count = ui_layout_render_dfs(layout, (UIElement *) (layout->data + children[i]), vertices, category); + if (element->children_count) { + uint32* children = (uint32 *) (element + 1); + for (int32 i = 0; i < element->children_count - 1; ++i) { + intrin_prefetch_l2(layout->data + children[i + 1]); + + uint32 child_vertex_count = ui_layout_render_dfs(layout, (UIElement *) (layout->data + children[i]), vertices, category); + vertices += child_vertex_count; + vertex_count += child_vertex_count; + } + + uint32 child_vertex_count = ui_layout_render_dfs(layout, (UIElement *) (layout->data + children[element->children_count - 1]), vertices, category); vertices += child_vertex_count; vertex_count += child_vertex_count; } @@ -858,7 +867,9 @@ uint32 ui_layout_update_render_dfs( UIElement* __restrict element, Vertex3DTextureColor* __restrict vertices, byte category = 0 ) { - if (element->type == UI_ELEMENT_TYPE_MANUAL) { + if (element->type == UI_ELEMENT_TYPE_MANUAL + || !(element->state_flag & UI_ELEMENT_STATE_VISIBLE) + ) { return 0; } @@ -867,14 +878,22 @@ uint32 ui_layout_update_render_dfs( if (element->category == category) { ui_layout_update(layout, element); - memcpy(vertices, layout->vertices_active + element->vertices_active, sizeof(*vertices) * element->vertex_count); - vertices += element->vertex_count; - vertex_count += element->vertex_count; + memcpy(vertices, layout->vertices_active + element->vertices_active_offset, sizeof(*vertices) * element->vertex_count_active); + vertices += element->vertex_count_active; + vertex_count += element->vertex_count_active; } - uint32* children = (uint32 *) (element + 1); - for (int32 i = 0; i < element->children_count; ++i) { - uint32 child_vertex_count = ui_layout_update_render_dfs(layout, (UIElement *) (layout->data + children[i]), vertices, category); + if (element->children_count) { + uint32* children = (uint32 *) (element + 1); + for (int32 i = 0; i < element->children_count - 1; ++i) { + intrin_prefetch_l2(layout->data + children[i + 1]); + + uint32 child_vertex_count = ui_layout_update_render_dfs(layout, (UIElement *) (layout->data + children[i]), vertices, category); + vertices += child_vertex_count; + vertex_count += child_vertex_count; + } + + uint32 child_vertex_count = ui_layout_update_render_dfs(layout, (UIElement *) (layout->data + children[element->children_count - 1]), vertices, category); vertices += child_vertex_count; vertex_count += child_vertex_count; } @@ -908,18 +927,30 @@ void* layout_get_element_state(const UILayout* layout, UIElement* element) inline void* layout_get_element_style(const UILayout* layout, UIElement* element, UIStyleType style_type) { + if (!element) { + return NULL; + } + return layout->data + element->style_types[style_type]; } inline UIElement* layout_get_element_parent(const UILayout* layout, UIElement* element) { + if (!element) { + return NULL; + } + return (UIElement *) (layout->data + element->parent); } inline UIElement* layout_get_element_child(const UILayout* layout, UIElement* element, uint16 child) { + if (!element) { + return NULL; + } + uint16* children = (uint16 *) (element + 1); return (UIElement *) (layout->data + children[child]); } diff --git a/ui/UILayout.h b/ui/UILayout.h index 500099d..fe65289 100644 --- a/ui/UILayout.h +++ b/ui/UILayout.h @@ -10,6 +10,36 @@ #define UI_LAYOUT_VERSION 1 +/////////////////////////////// +// UIElement +// ============================ +// child_offset 1 +// child_offset 2 +// ... +// ============================ +// UIElementState +// ============================ +// UIElementStyle Active +// UIElementStyle Default +// ... +// ============================ + +// ... +// Somewhere else in the buffer +// ... + +// UIAnimation 1 - Info +// ============================ +// UIAnimation 1 - Keyframe 1 +// UIAnimation 1 - Keyframe 2 +// ... +// ============================ +// UIAnimation 2 +// ============================ +// ... +// ============================ + + // Modified for every scene struct UILayout { // This array has the size of the game window and represents in color codes where interactible ui elements are @@ -67,7 +97,7 @@ struct UILayout { Asset* ui_asset; // Total count of the ui_asset vertices - uint32 vertex_size; + uint32 vertex_count_max; // @question Should we maybe also hold the font atlas asset here? @@ -75,11 +105,11 @@ struct UILayout { // This is very similar to the currently rendered UI output but may have some empty space between elements // The reason for this is that some elements may need different vertex counts for different states (e.g. input field) // WARNING: This memory is shared between different layouts - // @performance Maybe we could use this also for rendering by setting free vertices and elements currently hidden to 0 - // This would allow us to effectively remove the ui_asset - // @bug We currently use ui_mesh (Asset) to also keep track of gpu memory uint32 active_vertex_size; Vertex3DTextureColor* vertices_active; // Not the data owner (see data above) + + // Used during the initialization so that every element knows where we currently are during the setup process + uint32 active_vertex_offset; }; #endif \ No newline at end of file diff --git a/ui/UIWindow.h b/ui/UIWindow.h index 1209c4a..4959db9 100644 --- a/ui/UIWindow.h +++ b/ui/UIWindow.h @@ -62,31 +62,36 @@ void ui_window_element_unserialize(UIWindow* __restrict details, const byte** __ void ui_window_element_populate( UILayout* layout, + UIElement* element, const UIAttributeGroup* __restrict group, - UIWindow* __restrict window, - UIElement* parent, - EvaluatorVariable* __restrict variables + UIWindow* __restrict window ) { - if (parent) { + v4_f32 parent_dimension = {}; + if (element->parent) { + UIElement* parent = element->parent ? (UIElement *) (layout->data + element->parent) : NULL; // @bug How to ensure that the parent is initialized before the child element // Currently the order of the initialization depends on the theme file, NOT the layout file // We could fix it by loading the style based on the layout order but this would result in many misses when looking up styles // The reason for these misses are, that often only 1-2 style_types exist per element - v4_f32* parent_dimension; switch (parent->type) { case UI_ELEMENT_TYPE_VIEW_PANEL: { UIPanel* parent_window = (UIPanel *) (layout->data + parent->style_types[UI_STYLE_TYPE_ACTIVE]); - parent_dimension = &parent_window->dimension.dimension; + parent_dimension = parent_window->dimension.dimension; } break; default: UNREACHABLE(); } + } - variables[2].value = parent_dimension->x; - variables[3].value = parent_dimension->y; - variables[4].value = parent_dimension->width; - variables[5].value = parent_dimension->height; + if (!element->vertices_active_offset && !element->vertex_count_max) { + element->vertices_active_offset = layout->active_vertex_offset; + UIAttribute* vertex_attr = ui_attribute_from_group(group, UI_ATTRIBUTE_TYPE_VERTEX_COUNT); + + // @todo Strongly depends on the window components (e.g. title bar, close button, ...) + element->vertex_count_max = (uint16) (vertex_attr ? vertex_attr->value_int : 8); + + layout->active_vertex_offset += element->vertex_count_max; } UIAttribute* attributes = (UIAttribute *) (group + 1); @@ -98,7 +103,7 @@ void ui_window_element_populate( case UI_ATTRIBUTE_TYPE_DIMENSION_WIDTH: case UI_ATTRIBUTE_TYPE_POSITION_Y: case UI_ATTRIBUTE_TYPE_DIMENSION_HEIGHT: { - ui_theme_assign_dimension(&window->dimension, &attributes[i], 6, variables); + ui_theme_assign_dimension(&window->dimension, &attributes[i]); } break; } } diff --git a/ui/attribute/UIAttribute.h b/ui/attribute/UIAttribute.h index a28c25f..a15cf99 100644 --- a/ui/attribute/UIAttribute.h +++ b/ui/attribute/UIAttribute.h @@ -39,7 +39,7 @@ struct UIAttributeGroup { //UIAttribute* attributes; }; -UIAttribute* ui_attribute_from_group(UIAttributeGroup* group, UIAttributeType type) +UIAttribute* ui_attribute_from_group(const UIAttributeGroup* group, UIAttributeType type) { if (!group->attribute_count) { return NULL; @@ -194,6 +194,8 @@ int32 ui_attribute_type_to_id(const char* attribute_name) return UI_ATTRIBUTE_TYPE_CACHE_SIZE; } else if (str_compare(attribute_name, "anim") == 0) { return UI_ATTRIBUTE_TYPE_ANIMATION; + } else if (str_compare(attribute_name, "vertex_count") == 0) { + return UI_ATTRIBUTE_TYPE_VERTEX_COUNT; } ASSERT_SIMPLE(false); @@ -226,7 +228,7 @@ void ui_attribute_parse_value(UIAttribute* attr, const char* attribute_name, con } inline -void ui_theme_assign_f32(f32* a, const UIAttribute* attr, int32 variable_count = 0, const EvaluatorVariable* variables = NULL) +void ui_theme_assign_f32(f32* a, const UIAttribute* attr) { if (attr->datatype == UI_ATTRIBUTE_DATA_TYPE_INT) { *a = (f32) attr->value_int; @@ -237,25 +239,25 @@ void ui_theme_assign_f32(f32* a, const UIAttribute* attr, int32 variable_count = char value[32]; memcpy(value, attr->value_str, ARRAY_COUNT(attr->value_str)); - *a = (f32) evaluator_evaluate(value, variable_count, variables); + *a = (f32) evaluator_evaluate(value); } } inline -void ui_theme_assign_dimension(UIAttributeDimension* dimension, const UIAttribute* attr, int32 variable_count, const EvaluatorVariable* variables) +void ui_theme_assign_dimension(UIAttributeDimension* dimension, const UIAttribute* attr) { switch (attr->attribute_id) { case UI_ATTRIBUTE_TYPE_POSITION_X: { - ui_theme_assign_f32(&dimension->dimension.x, attr, variable_count, variables); + ui_theme_assign_f32(&dimension->dimension.x, attr); } break; case UI_ATTRIBUTE_TYPE_DIMENSION_WIDTH: { - ui_theme_assign_f32(&dimension->dimension.width, attr, variable_count, variables); + ui_theme_assign_f32(&dimension->dimension.width, attr); } break; case UI_ATTRIBUTE_TYPE_POSITION_Y: { - ui_theme_assign_f32(&dimension->dimension.y, attr, variable_count, variables); + ui_theme_assign_f32(&dimension->dimension.y, attr); } break; case UI_ATTRIBUTE_TYPE_DIMENSION_HEIGHT: { - ui_theme_assign_f32(&dimension->dimension.height, attr, variable_count, variables); + ui_theme_assign_f32(&dimension->dimension.height, attr); } break; default: { UNREACHABLE(); diff --git a/ui/attribute/UIAttributeType.h b/ui/attribute/UIAttributeType.h index 94e2f9e..f376b90 100644 --- a/ui/attribute/UIAttributeType.h +++ b/ui/attribute/UIAttributeType.h @@ -114,6 +114,10 @@ enum UIAttributeType : uint16 { UI_ATTRIBUTE_TYPE_ANIMATION, + // What is the max amount of possible vertices for an element + // This is used to reserve memory in our vertex cache + UI_ATTRIBUTE_TYPE_VERTEX_COUNT, + UI_ATTRIBUTE_TYPE_SIZE, };