diff --git a/architecture/x86/Intrinsics.h b/architecture/x86/Intrinsics.h index b763cc9..f351d1f 100644 --- a/architecture/x86/Intrinsics.h +++ b/architecture/x86/Intrinsics.h @@ -59,7 +59,7 @@ #define intrin_prefetch_l3(mem) _mm_prefetch((const char *) (mem), _MM_HINT_T2) inline -uint64 intrin_timestamp_counter() { +uint64 intrin_timestamp_counter() noexcept { _mm_mfence(); uint64 res = __rdtsc(); _mm_mfence(); diff --git a/asset/AssetArchive.h b/asset/AssetArchive.h index c67251f..9909f42 100644 --- a/asset/AssetArchive.h +++ b/asset/AssetArchive.h @@ -131,6 +131,8 @@ AssetArchiveElement* asset_archive_element_find(const AssetArchive* archive, int void asset_archive_load(AssetArchive* archive, const char* path, BufferMemory* buf, RingMemory* ring, int32 steps = 8) { + PROFILE_VERBOSE(PROFILE_ASSET_ARCHIVE_LOAD, path); + archive->fd = file_read_handle(path); if (!archive->fd) { return; @@ -167,7 +169,7 @@ void asset_archive_load(AssetArchive* archive, const char* path, BufferMemory* b file_read(archive->fd, &file, 0, file.size); asset_archive_header_load(&archive->header, file.content, steps); - LOG_LEVEL_2( + LOG_FORMAT_2( "Loaded AssetArchive %s with %d assets", {{LOG_DATA_CHAR_STR, (void *) path}, {LOG_DATA_UINT32, (void *) &archive->header.asset_count}} ); @@ -180,14 +182,6 @@ void asset_archive_load(AssetArchive* archive, const char* path, BufferMemory* b // The only problem is that we need to pass the pointer to this int in the thrd_queue since we queue the files to load there Asset* asset_archive_asset_load(const AssetArchive* archive, int32 id, AssetManagementSystem* ams, RingMemory* ring) { - // @todo add calculation from element->type to ams index. Probably requires an app specific conversion function - - // We have to mask 0x00FFFFFF since the highest bits define the archive id, not the element id - AssetArchiveElement* element = &archive->header.asset_element[id & 0x00FFFFFF]; - - byte component_id = archive->asset_type_map[element->type]; - //AssetComponent* ac = &ams->asset_components[component_id]; - // Create a string representation from the asset id // We can't just use the asset id, since an int can have a \0 between high byte and low byte // @question We maybe can switch the AMS to work with ints as keys. @@ -196,6 +190,15 @@ Asset* asset_archive_asset_load(const AssetArchive* archive, int32 id, AssetMana char id_str[9]; int_to_hex(id, id_str); + PROFILE_VERBOSE(PROFILE_ASSET_ARCHIVE_ASSET_LOAD, id_str); + // @todo add calculation from element->type to ams index. Probably requires an app specific conversion function + + // We have to mask 0x00FFFFFF since the highest bits define the archive id, not the element id + AssetArchiveElement* element = &archive->header.asset_element[id & 0x00FFFFFF]; + + byte component_id = archive->asset_type_map[element->type]; + //AssetComponent* ac = &ams->asset_components[component_id]; + Asset* asset = thrd_ams_get_asset_wait(ams, id_str); if (asset) { @@ -301,7 +304,7 @@ Asset* asset_archive_asset_load(const AssetArchive* archive, int32 id, AssetMana // the main program should still be able to do some work if possible thrd_ams_set_loaded(asset); - LOG_LEVEL_2( + LOG_FORMAT_2( "Asset %d loaded from archive %d for AMS %d with %n B compressed and %n B uncompressed", {{LOG_DATA_UINT64, &id}, {LOG_DATA_UINT32, &element->type}, {LOG_DATA_BYTE, &component_id}, {LOG_DATA_UINT32, &element->length}, {LOG_DATA_UINT32, &element->uncompressed}} ); diff --git a/asset/AssetManagementSystem.h b/asset/AssetManagementSystem.h index 3fbb8ba..0cfc4bf 100644 --- a/asset/AssetManagementSystem.h +++ b/asset/AssetManagementSystem.h @@ -46,7 +46,7 @@ void ams_create(AssetManagementSystem* ams, BufferMemory* buf, int32 asset_compo ams->asset_component_count = asset_component_count; ams->asset_components = (AssetComponent *) buffer_get_memory(buf, asset_component_count * sizeof(AssetComponent), 64, true); - LOG_LEVEL_2("Created AMS for %n assets", {{LOG_DATA_INT32, &count}}); + LOG_FORMAT_2("Created AMS for %n assets", {{LOG_DATA_INT32, &count}}); } inline @@ -57,7 +57,7 @@ void ams_component_create(AssetComponent* ac, BufferMemory* buf, int32 chunk_siz chunk_init(&ac->asset_memory, buf, count, chunk_size, 64); pthread_mutex_init(&ac->mutex, NULL); - LOG_LEVEL_2("Created AMS Component for %n assets and %n B = %n B", {{LOG_DATA_INT32, &count}, {LOG_DATA_UINT32, &chunk_size}, {LOG_DATA_UINT64, &ac->asset_memory.size}}); + LOG_FORMAT_2("Created AMS Component for %n assets and %n B = %n B", {{LOG_DATA_INT32, &count}, {LOG_DATA_UINT32, &chunk_size}, {LOG_DATA_UINT64, &ac->asset_memory.size}}); } inline @@ -74,7 +74,7 @@ void ams_component_create(AssetComponent* ac, byte* buf, int32 chunk_size, int32 pthread_mutex_init(&ac->mutex, NULL); - LOG_LEVEL_2("Created AMS Component for %n assets and %n B = %n B", {{LOG_DATA_INT32, &count}, {LOG_DATA_UINT32, &chunk_size}, {LOG_DATA_UINT64, &ac->asset_memory.size}}); + LOG_FORMAT_2("Created AMS Component for %n assets and %n B = %n B", {{LOG_DATA_INT32, &count}, {LOG_DATA_UINT32, &chunk_size}, {LOG_DATA_UINT64, &ac->asset_memory.size}}); } inline diff --git a/audio/AudioMixer.h b/audio/AudioMixer.h index 549d3d3..6df8d6c 100644 --- a/audio/AudioMixer.h +++ b/audio/AudioMixer.h @@ -444,6 +444,7 @@ int32 mixer_effects_stereo() } void audio_mixer_mix(AudioMixer* mixer, uint32 size) { + PROFILE(PROFILE_AUDIO_MIXER_MIX); memset(mixer->settings.buffer, 0, size); mixer->settings.sample_buffer_size = 0; diff --git a/camera/Camera.h b/camera/Camera.h index 8cda618..a26ed72 100644 --- a/camera/Camera.h +++ b/camera/Camera.h @@ -52,8 +52,8 @@ struct Camera { alignas(64) f32 orth[16]; }; -void -camera_update_vectors(Camera* camera) +static inline +void camera_update_vectors(Camera* camera) noexcept { f32 cos_ori_x = cosf(OMS_DEG2RAD(camera->orientation.x)); camera->front.x = cos_ori_x * cosf(OMS_DEG2RAD(camera->orientation.y)); @@ -69,32 +69,34 @@ camera_update_vectors(Camera* camera) vec3_normalize(&camera->up); } -void camera_rotate(Camera* camera, int32 dx, int32 dy) +void camera_rotate(Camera* camera, int32 dx, int32 dy) noexcept { camera->state_changes |= CAMERA_STATE_CHANGE_NORMAL; camera->orientation.x += dy * camera->sensitivity; camera->orientation.y -= dx * camera->sensitivity; - if (true) { - if (camera->orientation.x > 89.0f) { - camera->orientation.x = 89.0f; - } else if (camera->orientation.x < -89.0f) { - camera->orientation.x = -89.0f; - } + if (camera->orientation.x > 89.0f) { + camera->orientation.x = 89.0f; + } else if (camera->orientation.x < -89.0f) { + camera->orientation.x = -89.0f; + } - if (camera->orientation.y > 360.0f) { - camera->orientation.y -= 360.0f; - } else if (camera->orientation.y < -360.0f) { - camera->orientation.y += 360.0f; - } + if (camera->orientation.y > 360.0f) { + camera->orientation.y -= 360.0f; + } else if (camera->orientation.y < -360.0f) { + camera->orientation.y += 360.0f; } camera_update_vectors(camera); } // you can have up to 4 camera movement inputs at the same time -void camera_movement(Camera* camera, CameraMovement* movement, f32 dt, bool relative_to_world = true) -{ +void camera_movement( + Camera* __restrict camera, + CameraMovement* __restrict movement, + f32 dt, + bool relative_to_world = true +) noexcept { camera->state_changes |= CAMERA_STATE_CHANGE_NORMAL; f32 velocity = camera->speed * dt; @@ -224,7 +226,7 @@ void camera_movement(Camera* camera, CameraMovement* movement, f32 dt, bool rela } inline -void camera_orth_matrix_lh(Camera* __restrict camera) +void camera_orth_matrix_lh(Camera* __restrict camera) noexcept { //mat4_identity(camera->orth); camera->orth[15] = 1.0f; @@ -238,7 +240,7 @@ void camera_orth_matrix_lh(Camera* __restrict camera) } inline -void camera_orth_matrix_rh(Camera* __restrict camera) +void camera_orth_matrix_rh(Camera* __restrict camera) noexcept { //mat4_identity(camera->orth); camera->orth[15] = 1.0f; @@ -252,7 +254,7 @@ void camera_orth_matrix_rh(Camera* __restrict camera) } inline -void camera_projection_matrix_lh(Camera* __restrict camera) +void camera_projection_matrix_lh(Camera* __restrict camera) noexcept { //mat4_identity(camera->projection); camera->projection[15] = 1.0f; @@ -266,7 +268,7 @@ void camera_projection_matrix_lh(Camera* __restrict camera) } inline -void camera_projection_matrix_rh(Camera* __restrict camera) +void camera_projection_matrix_rh(Camera* __restrict camera) noexcept { //mat4_identity(camera->projection); camera->projection[15] = 1.0f; @@ -282,7 +284,7 @@ void camera_projection_matrix_rh(Camera* __restrict camera) // This is usually not used, since it is included in the view matrix // expects the identity matrix inline -void camera_translation_matrix_sparse_rh(const Camera* __restrict camera, f32* translation) +void camera_translation_matrix_sparse_rh(const Camera* __restrict camera, f32* translation) noexcept { translation[12] = camera->location.x; translation[13] = camera->location.y; @@ -290,7 +292,7 @@ void camera_translation_matrix_sparse_rh(const Camera* __restrict camera, f32* t } inline -void camera_translation_matrix_sparse_lh(const Camera* __restrict camera, f32* translation) +void camera_translation_matrix_sparse_lh(const Camera* __restrict camera, f32* translation) noexcept { translation[3] = camera->location.x; translation[7] = camera->location.y; @@ -298,7 +300,7 @@ void camera_translation_matrix_sparse_lh(const Camera* __restrict camera, f32* t } void -camera_view_matrix_lh(Camera* __restrict camera) +camera_view_matrix_lh(Camera* __restrict camera) noexcept { v3_f32 zaxis = { camera->front.x, camera->front.y, camera->front.z }; @@ -330,7 +332,7 @@ camera_view_matrix_lh(Camera* __restrict camera) } void -camera_view_matrix_rh(Camera* __restrict camera) +camera_view_matrix_rh(Camera* __restrict camera) noexcept { v3_f32 zaxis = { -camera->front.x, -camera->front.y, -camera->front.z }; diff --git a/command/AppCmdBuffer.cpp b/command/AppCmdBuffer.cpp index d1762d6..3a33568 100644 --- a/command/AppCmdBuffer.cpp +++ b/command/AppCmdBuffer.cpp @@ -27,6 +27,7 @@ #include "../ui/UILayout.h" #include "../ui/UILayout.cpp" #include "../ui/UITheme.h" +#include "../log/Log.h" #include "../scene/SceneInfo.h" #include "../system/FileUtils.cpp" #include "../compiler/CompilerUtils.h" @@ -39,7 +40,7 @@ void cmd_buffer_create(AppCmdBuffer* cb, BufferMemory* buf, int32 commands_count chunk_init(&cb->commands, buf, commands_count, sizeof(Command), 64); pthread_mutex_init(&cb->mutex, NULL); - LOG_LEVEL_2("Created AppCmdBuffer: %n B", {{LOG_DATA_UINT64, &cb->commands.size}}); + LOG_FORMAT_2("Created AppCmdBuffer: %n B", {{LOG_DATA_UINT64, &cb->commands.size}}); } // This doesn't load the asset directly but tells (most likely) a worker thread to load an asset @@ -404,11 +405,14 @@ inline Asset* cmd_texture_load_sync(AppCmdBuffer* cb, const char* name) { return asset; } -inline Asset* cmd_font_load_sync(AppCmdBuffer* cb, int32 asset_id) { +inline Asset* cmd_font_load_sync(AppCmdBuffer* cb, int32 asset_id) +{ // Check if asset already loaded char id_str[9]; int_to_hex(asset_id, id_str); + PROFILE_VERBOSE(PROFILE_CMD_FONT_LOAD_SYNC, id_str); + Asset* asset = thrd_ams_get_asset_wait(cb->ams, id_str); // Load asset if not loaded @@ -428,7 +432,10 @@ inline Asset* cmd_font_load_sync(AppCmdBuffer* cb, int32 asset_id) { return asset; } -inline Asset* cmd_font_load_sync(AppCmdBuffer* cb, const char* name) { +inline Asset* cmd_font_load_sync(AppCmdBuffer* cb, const char* name) +{ + PROFILE_VERBOSE(PROFILE_CMD_FONT_LOAD_SYNC, name); + // Check if asset already loaded Asset* asset = thrd_ams_get_asset_wait(cb->ams, name); @@ -452,14 +459,15 @@ inline Asset* cmd_font_load_sync(AppCmdBuffer* cb, const char* name) { inline UILayout* cmd_layout_load_sync( - AppCmdBuffer* cb, - UILayout* layout, const char* layout_path + AppCmdBuffer* __restrict cb, + UILayout* __restrict layout, const char* __restrict layout_path ) { + PROFILE_VERBOSE(PROFILE_CMD_LAYOUT_LOAD_SYNC, layout_path); FileBody layout_file = {}; file_read(layout_path, &layout_file, cb->mem_vol); if (!layout_file.content) { - LOG_FORMAT(layout_file.content == NULL, "Failed loading layout \"%s\"\n", {{LOG_DATA_CHAR_STR, &layout_path}}); + LOG_FORMAT_1("Failed loading layout \"%s\"\n", {{LOG_DATA_CHAR_STR, &layout_path}}); return NULL; } @@ -470,9 +478,11 @@ UILayout* cmd_layout_load_sync( inline UIThemeStyle* cmd_theme_load_sync( - AppCmdBuffer* cb, - UIThemeStyle* theme, const char* theme_path + AppCmdBuffer* __restrict cb, + UIThemeStyle* __restrict theme, const char* __restrict theme_path ) { + PROFILE_VERBOSE(PROFILE_CMD_THEME_LOAD_SYNC, theme_path); + FileBody theme_file = {}; file_read(theme_path, &theme_file, cb->mem_vol); theme_from_data(theme_file.content, theme); @@ -496,6 +506,7 @@ UILayout* cmd_ui_load_sync( UIThemeStyle* __restrict theme, const char* __restrict theme_path, const Camera* __restrict camera ) { + PROFILE_VERBOSE(PROFILE_CMD_UI_LOAD_SYNC, layout_path); if (!cmd_layout_load_sync(cb, layout, layout_path)) { // We have to make sure that at least the font is set layout->font = general_theme->font; @@ -534,7 +545,7 @@ UILayout* cmd_ui_load(AppCmdBuffer* __restrict cb, const Command* __restrict cmd char* theme_path = (char *) pos; str_move_to((const char **) &pos, '\0'); ++pos; - Camera* camera = (Camera *) *((uintptr_t *) pos); + const Camera* camera = (Camera *) *((uintptr_t *) pos); return cmd_ui_load_sync( cb, @@ -592,6 +603,7 @@ void thrd_cmd_ui_load( // If we do it right then and DON'T defer it, this would also solve the first question void cmd_iterate(AppCmdBuffer* cb) { + PROFILE(PROFILE_CMD_ITERATE); int32 last_element = 0; uint32 chunk_id = 0; chunk_iterate_start(&cb->commands, chunk_id) diff --git a/command/AppCmdBuffer.h b/command/AppCmdBuffer.h index fc38d49..0571015 100644 --- a/command/AppCmdBuffer.h +++ b/command/AppCmdBuffer.h @@ -44,7 +44,6 @@ struct AppCmdBuffer { Queue* files_to_load; AudioMixer* mixer; GpuApiType gpu_api_type; - void* gpu_api; }; #if OPENGL @@ -55,7 +54,7 @@ struct AppCmdBuffer { #include "../gpuapi/direct3d/AppCmdBuffer.h" #else inline void* cmd_shader_load(AppCmdBuffer*, Command*) { return NULL; } - inline void* cmd_shader_load_sync(AppCmdBuffer*, void*, int32*) { return NULL; } + inline void* cmd_shader_load_sync(AppCmdBuffer*, void*, const int32*, ...) { return NULL; } #endif #endif \ No newline at end of file diff --git a/compiler/gcc/Atomic.h b/compiler/gcc/Atomic.h index 2121d40..b18e3a8 100644 --- a/compiler/gcc/Atomic.h +++ b/compiler/gcc/Atomic.h @@ -13,393 +13,393 @@ #include "CompilerUtils.h" #include -FORCE_INLINE void atomic_set_relaxed(void** target, void* value) { __atomic_store_n(target, value, __ATOMIC_RELAXED); } -FORCE_INLINE void* atomic_get_relaxed(void** target) { return __atomic_load_n(target, __ATOMIC_RELAXED); } -FORCE_INLINE void atomic_set_relaxed(volatile int8* value, int8 new_value) { __atomic_store_n(value, new_value, __ATOMIC_RELAXED); } -FORCE_INLINE void atomic_set_relaxed(volatile int16* value, int16 new_value) { __atomic_store_n(value, new_value, __ATOMIC_RELAXED); } -FORCE_INLINE void atomic_set_relaxed(volatile int32* value, int32 new_value) { __atomic_store_n(value, new_value, __ATOMIC_RELAXED); } -FORCE_INLINE void atomic_set_relaxed(volatile int64* value, int64 new_value) { __atomic_store_n(value, new_value, __ATOMIC_RELAXED); } -FORCE_INLINE int8 atomic_fetch_set_relaxed(volatile int8* value, int8 new_value) { return __atomic_exchange_n(value, new_value, __ATOMIC_RELAXED); } -FORCE_INLINE int16 atomic_fetch_set_relaxed(volatile int16* value, int16 new_value) { return __atomic_exchange_n(value, new_value, __ATOMIC_RELAXED); } -FORCE_INLINE int32 atomic_fetch_set_relaxed(volatile int32* value, int32 new_value) { return __atomic_exchange_n(value, new_value, __ATOMIC_RELAXED); } -FORCE_INLINE int64 atomic_fetch_set_relaxed(volatile int64* value, int64 new_value) { return __atomic_exchange_n(value, new_value, __ATOMIC_RELAXED); } -FORCE_INLINE int8 atomic_get_relaxed(volatile int8* value) { return __atomic_load_n((int8 *) value, __ATOMIC_RELAXED); } -FORCE_INLINE int16 atomic_get_relaxed(volatile int16* value) { return __atomic_load_n((int16 *) value, __ATOMIC_RELAXED); } -FORCE_INLINE int32 atomic_get_relaxed(volatile int32* value) { return __atomic_load_n((int32 *) value, __ATOMIC_RELAXED); } -FORCE_INLINE int64 atomic_get_relaxed(volatile int64* value) { return __atomic_load_n((int64 *) value, __ATOMIC_RELAXED); } -FORCE_INLINE void atomic_increment_relaxed(volatile int8* value) { __atomic_add_fetch(value, 1, __ATOMIC_RELAXED); } -FORCE_INLINE void atomic_decrement_relaxed(volatile int8* value) { __atomic_sub_fetch(value, 1, __ATOMIC_RELAXED); } -FORCE_INLINE void atomic_increment_relaxed(volatile int16* value) { __atomic_add_fetch(value, 1, __ATOMIC_RELAXED); } -FORCE_INLINE void atomic_decrement_relaxed(volatile int16* value) { __atomic_sub_fetch(value, 1, __ATOMIC_RELAXED); } -FORCE_INLINE void atomic_increment_relaxed(volatile int32* value) { __atomic_add_fetch(value, 1, __ATOMIC_RELAXED); } -FORCE_INLINE void atomic_decrement_relaxed(volatile int32* value) { __atomic_sub_fetch(value, 1, __ATOMIC_RELAXED); } -FORCE_INLINE void atomic_increment_relaxed(volatile int64* value) { __atomic_add_fetch(value, 1, __ATOMIC_RELAXED); } -FORCE_INLINE void atomic_decrement_relaxed(volatile int64* value) { __atomic_sub_fetch(value, 1, __ATOMIC_RELAXED); } -FORCE_INLINE void atomic_add_relaxed(volatile int8* value, int8 increment) { __atomic_add_fetch(value, increment, __ATOMIC_RELAXED); } -FORCE_INLINE void atomic_sub_relaxed(volatile int8* value, int8 decrement) { __atomic_sub_fetch(value, decrement, __ATOMIC_RELAXED); } -FORCE_INLINE void atomic_add_relaxed(volatile int16* value, int16 increment) { __atomic_add_fetch(value, increment, __ATOMIC_RELAXED); } -FORCE_INLINE void atomic_sub_relaxed(volatile int16* value, int16 decrement) { __atomic_sub_fetch(value, decrement, __ATOMIC_RELAXED); } -FORCE_INLINE void atomic_add_relaxed(volatile int32* value, int32 increment) { __atomic_add_fetch(value, increment, __ATOMIC_RELAXED); } -FORCE_INLINE void atomic_sub_relaxed(volatile int32* value, int32 decrement) { __atomic_sub_fetch(value, decrement, __ATOMIC_RELAXED); } -FORCE_INLINE void atomic_add_relaxed(volatile int64* value, int64 increment) { __atomic_add_fetch(value, increment, __ATOMIC_RELAXED); } -FORCE_INLINE void atomic_sub_relaxed(volatile int64* value, int64 decrement) { __atomic_sub_fetch(value, decrement, __ATOMIC_RELAXED); } -FORCE_INLINE f32 atomic_compare_exchange_weak_relaxed(volatile f32* value, f32* expected, f32 desired) { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED); return *expected; } -FORCE_INLINE f64 atomic_compare_exchange_weak_relaxed(volatile f64* value, f64* expected, f64 desired) { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED); return *expected; } -FORCE_INLINE int32 atomic_compare_exchange_weak_relaxed(volatile int32* value, int32* expected, int32 desired) { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED); return *expected; } -FORCE_INLINE int64 atomic_compare_exchange_weak_relaxed(volatile int64* value, int64* expected, int64 desired) { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED); return *expected; } -FORCE_INLINE int8 atomic_fetch_add_relaxed(volatile int8* value, int8 operand) { return __atomic_add_fetch(value, operand, __ATOMIC_RELAXED); } -FORCE_INLINE int8 atomic_fetch_sub_relaxed(volatile int8* value, int8 operand) { return __atomic_sub_fetch(value, operand, __ATOMIC_RELAXED); } -FORCE_INLINE int16 atomic_fetch_add_relaxed(volatile int16* value, int16 operand) { return __atomic_add_fetch(value, operand, __ATOMIC_RELAXED); } -FORCE_INLINE int16 atomic_fetch_sub_relaxed(volatile int16* value, int16 operand) { return __atomic_sub_fetch(value, operand, __ATOMIC_RELAXED); } -FORCE_INLINE int32 atomic_fetch_add_relaxed(volatile int32* value, int32 operand) { return __atomic_add_fetch(value, operand, __ATOMIC_RELAXED); } -FORCE_INLINE int32 atomic_fetch_sub_relaxed(volatile int32* value, int32 operand) { return __atomic_sub_fetch(value, operand, __ATOMIC_RELAXED); } -FORCE_INLINE int64 atomic_fetch_add_relaxed(volatile int64* value, int64 operand) { return __atomic_add_fetch(value, operand, __ATOMIC_RELAXED); } -FORCE_INLINE int64 atomic_fetch_sub_relaxed(volatile int64* value, int64 operand) { return __atomic_sub_fetch(value, operand, __ATOMIC_RELAXED); } -FORCE_INLINE void atomic_set_relaxed(volatile uint8* value, uint8 new_value) { __atomic_store_n(value, new_value, __ATOMIC_RELAXED); } -FORCE_INLINE void atomic_set_relaxed(volatile uint16* value, uint16 new_value) { __atomic_store_n(value, new_value, __ATOMIC_RELAXED); } -FORCE_INLINE void atomic_set_relaxed(volatile uint32* value, uint32 new_value) { __atomic_store_n(value, new_value, __ATOMIC_RELAXED); } -FORCE_INLINE void atomic_set_relaxed(volatile uint64* value, uint64 new_value) { __atomic_store_n(value, new_value, __ATOMIC_RELAXED); } -FORCE_INLINE uint8 atomic_fetch_set_relaxed(volatile uint8* value, uint8 new_value) { return __atomic_exchange_n(value, new_value, __ATOMIC_RELAXED); } -FORCE_INLINE uint16 atomic_fetch_set_relaxed(volatile uint16* value, uint16 new_value) { return __atomic_exchange_n(value, new_value, __ATOMIC_RELAXED); } -FORCE_INLINE uint32 atomic_fetch_set_relaxed(volatile uint32* value, uint32 new_value) { return __atomic_exchange_n(value, new_value, __ATOMIC_RELAXED); } -FORCE_INLINE uint64 atomic_fetch_set_relaxed(volatile uint64* value, uint64 new_value) { return __atomic_exchange_n(value, new_value, __ATOMIC_RELAXED); } -FORCE_INLINE uint8 atomic_get_relaxed(volatile uint8* value) { return __atomic_load_n(value, __ATOMIC_RELAXED); } -FORCE_INLINE uint16 atomic_get_relaxed(volatile uint16* value) { return __atomic_load_n(value, __ATOMIC_RELAXED); } -FORCE_INLINE uint32 atomic_get_relaxed(volatile uint32* value) { return __atomic_load_n(value, __ATOMIC_RELAXED); } -FORCE_INLINE uint64 atomic_get_relaxed(volatile uint64* value) { return __atomic_load_n(value, __ATOMIC_RELAXED); } -FORCE_INLINE void atomic_increment_relaxed(volatile uint8* value) { __atomic_add_fetch(value, 1, __ATOMIC_RELAXED); } -FORCE_INLINE void atomic_decrement_relaxed(volatile uint8* value) { __atomic_sub_fetch(value, 1, __ATOMIC_RELAXED); } -FORCE_INLINE void atomic_increment_relaxed(volatile uint16* value) { __atomic_add_fetch(value, 1, __ATOMIC_RELAXED); } -FORCE_INLINE void atomic_decrement_relaxed(volatile uint16* value) { __atomic_sub_fetch(value, 1, __ATOMIC_RELAXED); } -FORCE_INLINE void atomic_increment_relaxed(volatile uint32* value) { __atomic_add_fetch(value, 1, __ATOMIC_RELAXED); } -FORCE_INLINE void atomic_decrement_relaxed(volatile uint32* value) { __atomic_sub_fetch(value, 1, __ATOMIC_RELAXED); } -FORCE_INLINE void atomic_increment_relaxed(volatile uint64* value) { __atomic_add_fetch(value, 1, __ATOMIC_RELAXED); } -FORCE_INLINE void atomic_decrement_relaxed(volatile uint64* value) { __atomic_sub_fetch(value, 1, __ATOMIC_RELAXED); } -FORCE_INLINE void atomic_add_relaxed(volatile uint8* value, uint8 increment) { __atomic_add_fetch(value, increment, __ATOMIC_RELAXED); } -FORCE_INLINE void atomic_sub_relaxed(volatile uint8* value, uint8 decrement) { __atomic_sub_fetch(value, decrement, __ATOMIC_RELAXED); } -FORCE_INLINE void atomic_add_relaxed(volatile uint16* value, uint16 increment) { __atomic_add_fetch(value, increment, __ATOMIC_RELAXED); } -FORCE_INLINE void atomic_sub_relaxed(volatile uint16* value, uint16 decrement) { __atomic_sub_fetch(value, decrement, __ATOMIC_RELAXED); } -FORCE_INLINE void atomic_add_relaxed(volatile uint32* value, uint32 increment) { __atomic_add_fetch(value, increment, __ATOMIC_RELAXED); } -FORCE_INLINE void atomic_sub_relaxed(volatile uint32* value, uint32 decrement) { __atomic_sub_fetch(value, decrement, __ATOMIC_RELAXED); } -FORCE_INLINE void atomic_add_relaxed(volatile uint64* value, uint64 increment) { __atomic_add_fetch(value, increment, __ATOMIC_RELAXED); } -FORCE_INLINE void atomic_sub_relaxed(volatile uint64* value, uint64 decrement) { __atomic_sub_fetch(value, decrement, __ATOMIC_RELAXED); } -FORCE_INLINE uint32 atomic_compare_exchange_weak_relaxed(volatile uint32* value, uint32* expected, uint32 desired) { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED); return *expected; } -FORCE_INLINE uint64 atomic_compare_exchange_weak_relaxed(volatile uint64* value, uint64* expected, uint64 desired) { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED); return *expected; } -FORCE_INLINE uint8 atomic_fetch_add_relaxed(volatile uint8* value, uint8 operand) { return __atomic_add_fetch(value, operand, __ATOMIC_RELAXED); } -FORCE_INLINE uint8 atomic_fetch_sub_relaxed(volatile uint8* value, uint8 operand) { return __atomic_sub_fetch(value, operand, __ATOMIC_RELAXED); } -FORCE_INLINE uint16 atomic_fetch_add_relaxed(volatile uint16* value, uint16 operand) { return __atomic_add_fetch(value, operand, __ATOMIC_RELAXED); } -FORCE_INLINE uint16 atomic_fetch_sub_relaxed(volatile uint16* value, uint16 operand) { return __atomic_sub_fetch(value, operand, __ATOMIC_RELAXED); } -FORCE_INLINE uint32 atomic_fetch_add_relaxed(volatile uint32* value, uint32 operand) { return __atomic_add_fetch(value, operand, __ATOMIC_RELAXED); } -FORCE_INLINE uint32 atomic_fetch_sub_relaxed(volatile uint32* value, uint32 operand) { return __atomic_sub_fetch(value, operand, __ATOMIC_RELAXED); } -FORCE_INLINE uint64 atomic_fetch_add_relaxed(volatile uint64* value, uint64 operand) { return __atomic_add_fetch(value, operand, __ATOMIC_RELAXED); } -FORCE_INLINE uint64 atomic_fetch_sub_relaxed(volatile uint64* value, uint64 operand) { return __atomic_sub_fetch(value, operand, __ATOMIC_RELAXED); } -FORCE_INLINE void atomic_and_relaxed(volatile uint8* value, uint8 mask) { __atomic_fetch_and(value, mask, __ATOMIC_RELAXED); } -FORCE_INLINE void atomic_and_relaxed(volatile int8* value, int8 mask) { __atomic_fetch_and(value, mask, __ATOMIC_RELAXED); } -FORCE_INLINE void atomic_and_relaxed(volatile uint16* value, uint16 mask) { __atomic_fetch_and(value, mask, __ATOMIC_RELAXED); } -FORCE_INLINE void atomic_and_relaxed(volatile int16* value, int16 mask) { __atomic_fetch_and(value, mask, __ATOMIC_RELAXED); } -FORCE_INLINE void atomic_and_relaxed(volatile uint32* value, uint32 mask) { __atomic_fetch_and(value, mask, __ATOMIC_RELAXED); } -FORCE_INLINE void atomic_and_relaxed(volatile int32* value, int32 mask) { __atomic_fetch_and(value, mask, __ATOMIC_RELAXED); } -FORCE_INLINE void atomic_and_relaxed(volatile uint64* value, uint64 mask) { __atomic_fetch_and(value, mask, __ATOMIC_RELAXED); } -FORCE_INLINE void atomic_and_relaxed(volatile int64* value, int64 mask) { __atomic_fetch_and(value, mask, __ATOMIC_RELAXED); } -FORCE_INLINE void atomic_or_relaxed(volatile uint8* value, uint8 mask) { __atomic_fetch_or(value, mask, __ATOMIC_RELAXED); } -FORCE_INLINE void atomic_or_relaxed(volatile int8* value, int8 mask) { __atomic_fetch_or(value, mask, __ATOMIC_RELAXED); } -FORCE_INLINE void atomic_or_relaxed(volatile uint16* value, uint16 mask) { __atomic_fetch_or(value, mask, __ATOMIC_RELAXED); } -FORCE_INLINE void atomic_or_relaxed(volatile int16* value, int16 mask) { __atomic_fetch_or(value, mask, __ATOMIC_RELAXED); } -FORCE_INLINE void atomic_or_relaxed(volatile uint32* value, uint32 mask) { __atomic_fetch_or(value, mask, __ATOMIC_RELAXED); } -FORCE_INLINE void atomic_or_relaxed(volatile int32* value, int32 mask) { __atomic_fetch_or(value, mask, __ATOMIC_RELAXED); } -FORCE_INLINE void atomic_or_relaxed(volatile uint64* value, uint64 mask) { __atomic_fetch_or(value, mask, __ATOMIC_RELAXED); } -FORCE_INLINE void atomic_or_relaxed(volatile int64* value, int64 mask) { __atomic_fetch_or(value, mask, __ATOMIC_RELAXED); } +FORCE_INLINE void atomic_set_relaxed(void** target, void* value) noexcept { __atomic_store_n(target, value, __ATOMIC_RELAXED); } +FORCE_INLINE void* atomic_get_relaxed(void** target) noexcept { return __atomic_load_n(target, __ATOMIC_RELAXED); } +FORCE_INLINE void atomic_set_relaxed(volatile int8* value, int8 new_value) noexcept { __atomic_store_n(value, new_value, __ATOMIC_RELAXED); } +FORCE_INLINE void atomic_set_relaxed(volatile int16* value, int16 new_value) noexcept { __atomic_store_n(value, new_value, __ATOMIC_RELAXED); } +FORCE_INLINE void atomic_set_relaxed(volatile int32* value, int32 new_value) noexcept { __atomic_store_n(value, new_value, __ATOMIC_RELAXED); } +FORCE_INLINE void atomic_set_relaxed(volatile int64* value, int64 new_value) noexcept { __atomic_store_n(value, new_value, __ATOMIC_RELAXED); } +FORCE_INLINE int8 atomic_fetch_set_relaxed(volatile int8* value, int8 new_value) noexcept { return __atomic_exchange_n(value, new_value, __ATOMIC_RELAXED); } +FORCE_INLINE int16 atomic_fetch_set_relaxed(volatile int16* value, int16 new_value) noexcept { return __atomic_exchange_n(value, new_value, __ATOMIC_RELAXED); } +FORCE_INLINE int32 atomic_fetch_set_relaxed(volatile int32* value, int32 new_value) noexcept { return __atomic_exchange_n(value, new_value, __ATOMIC_RELAXED); } +FORCE_INLINE int64 atomic_fetch_set_relaxed(volatile int64* value, int64 new_value) noexcept { return __atomic_exchange_n(value, new_value, __ATOMIC_RELAXED); } +FORCE_INLINE int8 atomic_get_relaxed(volatile int8* value) noexcept { return __atomic_load_n((int8 *) value, __ATOMIC_RELAXED); } +FORCE_INLINE int16 atomic_get_relaxed(volatile int16* value) noexcept { return __atomic_load_n((int16 *) value, __ATOMIC_RELAXED); } +FORCE_INLINE int32 atomic_get_relaxed(volatile int32* value) noexcept { return __atomic_load_n((int32 *) value, __ATOMIC_RELAXED); } +FORCE_INLINE int64 atomic_get_relaxed(volatile int64* value) noexcept { return __atomic_load_n((int64 *) value, __ATOMIC_RELAXED); } +FORCE_INLINE int8 atomic_increment_relaxed(volatile int8* value) noexcept { return __atomic_add_fetch(value, 1, __ATOMIC_RELAXED); } +FORCE_INLINE int8 atomic_decrement_relaxed(volatile int8* value) noexcept { return __atomic_sub_fetch(value, 1, __ATOMIC_RELAXED); } +FORCE_INLINE int16 atomic_increment_relaxed(volatile int16* value) noexcept { return __atomic_add_fetch(value, 1, __ATOMIC_RELAXED); } +FORCE_INLINE int16 atomic_decrement_relaxed(volatile int16* value) noexcept { return __atomic_sub_fetch(value, 1, __ATOMIC_RELAXED); } +FORCE_INLINE int32 atomic_increment_relaxed(volatile int32* value) noexcept { return __atomic_add_fetch(value, 1, __ATOMIC_RELAXED); } +FORCE_INLINE int32 atomic_decrement_relaxed(volatile int32* value) noexcept { return __atomic_sub_fetch(value, 1, __ATOMIC_RELAXED); } +FORCE_INLINE int64 atomic_increment_relaxed(volatile int64* value) noexcept { return __atomic_add_fetch(value, 1, __ATOMIC_RELAXED); } +FORCE_INLINE int64 atomic_decrement_relaxed(volatile int64* value) noexcept { return __atomic_sub_fetch(value, 1, __ATOMIC_RELAXED); } +FORCE_INLINE void atomic_add_relaxed(volatile int8* value, int8 increment) noexcept { __atomic_add_fetch(value, increment, __ATOMIC_RELAXED); } +FORCE_INLINE void atomic_sub_relaxed(volatile int8* value, int8 decrement) noexcept { __atomic_sub_fetch(value, decrement, __ATOMIC_RELAXED); } +FORCE_INLINE void atomic_add_relaxed(volatile int16* value, int16 increment) noexcept { __atomic_add_fetch(value, increment, __ATOMIC_RELAXED); } +FORCE_INLINE void atomic_sub_relaxed(volatile int16* value, int16 decrement) noexcept { __atomic_sub_fetch(value, decrement, __ATOMIC_RELAXED); } +FORCE_INLINE void atomic_add_relaxed(volatile int32* value, int32 increment) noexcept { __atomic_add_fetch(value, increment, __ATOMIC_RELAXED); } +FORCE_INLINE void atomic_sub_relaxed(volatile int32* value, int32 decrement) noexcept { __atomic_sub_fetch(value, decrement, __ATOMIC_RELAXED); } +FORCE_INLINE void atomic_add_relaxed(volatile int64* value, int64 increment) noexcept { __atomic_add_fetch(value, increment, __ATOMIC_RELAXED); } +FORCE_INLINE void atomic_sub_relaxed(volatile int64* value, int64 decrement) noexcept { __atomic_sub_fetch(value, decrement, __ATOMIC_RELAXED); } +FORCE_INLINE f32 atomic_compare_exchange_weak_relaxed(volatile f32* value, f32* expected, f32 desired) noexcept { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED); return *expected; } +FORCE_INLINE f64 atomic_compare_exchange_weak_relaxed(volatile f64* value, f64* expected, f64 desired) noexcept { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED); return *expected; } +FORCE_INLINE int32 atomic_compare_exchange_weak_relaxed(volatile int32* value, int32* expected, int32 desired) noexcept { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED); return *expected; } +FORCE_INLINE int64 atomic_compare_exchange_weak_relaxed(volatile int64* value, int64* expected, int64 desired) noexcept { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED); return *expected; } +FORCE_INLINE int8 atomic_fetch_add_relaxed(volatile int8* value, int8 operand) noexcept { return __atomic_add_fetch(value, operand, __ATOMIC_RELAXED); } +FORCE_INLINE int8 atomic_fetch_sub_relaxed(volatile int8* value, int8 operand) noexcept { return __atomic_sub_fetch(value, operand, __ATOMIC_RELAXED); } +FORCE_INLINE int16 atomic_fetch_add_relaxed(volatile int16* value, int16 operand) noexcept { return __atomic_add_fetch(value, operand, __ATOMIC_RELAXED); } +FORCE_INLINE int16 atomic_fetch_sub_relaxed(volatile int16* value, int16 operand) noexcept { return __atomic_sub_fetch(value, operand, __ATOMIC_RELAXED); } +FORCE_INLINE int32 atomic_fetch_add_relaxed(volatile int32* value, int32 operand) noexcept { return __atomic_add_fetch(value, operand, __ATOMIC_RELAXED); } +FORCE_INLINE int32 atomic_fetch_sub_relaxed(volatile int32* value, int32 operand) noexcept { return __atomic_sub_fetch(value, operand, __ATOMIC_RELAXED); } +FORCE_INLINE int64 atomic_fetch_add_relaxed(volatile int64* value, int64 operand) noexcept { return __atomic_add_fetch(value, operand, __ATOMIC_RELAXED); } +FORCE_INLINE int64 atomic_fetch_sub_relaxed(volatile int64* value, int64 operand) noexcept { return __atomic_sub_fetch(value, operand, __ATOMIC_RELAXED); } +FORCE_INLINE void atomic_set_relaxed(volatile uint8* value, uint8 new_value) noexcept { __atomic_store_n(value, new_value, __ATOMIC_RELAXED); } +FORCE_INLINE void atomic_set_relaxed(volatile uint16* value, uint16 new_value) noexcept { __atomic_store_n(value, new_value, __ATOMIC_RELAXED); } +FORCE_INLINE void atomic_set_relaxed(volatile uint32* value, uint32 new_value) noexcept { __atomic_store_n(value, new_value, __ATOMIC_RELAXED); } +FORCE_INLINE void atomic_set_relaxed(volatile uint64* value, uint64 new_value) noexcept { __atomic_store_n(value, new_value, __ATOMIC_RELAXED); } +FORCE_INLINE uint8 atomic_fetch_set_relaxed(volatile uint8* value, uint8 new_value) noexcept { return __atomic_exchange_n(value, new_value, __ATOMIC_RELAXED); } +FORCE_INLINE uint16 atomic_fetch_set_relaxed(volatile uint16* value, uint16 new_value) noexcept { return __atomic_exchange_n(value, new_value, __ATOMIC_RELAXED); } +FORCE_INLINE uint32 atomic_fetch_set_relaxed(volatile uint32* value, uint32 new_value) noexcept { return __atomic_exchange_n(value, new_value, __ATOMIC_RELAXED); } +FORCE_INLINE uint64 atomic_fetch_set_relaxed(volatile uint64* value, uint64 new_value) noexcept { return __atomic_exchange_n(value, new_value, __ATOMIC_RELAXED); } +FORCE_INLINE uint8 atomic_get_relaxed(volatile uint8* value) noexcept { return __atomic_load_n(value, __ATOMIC_RELAXED); } +FORCE_INLINE uint16 atomic_get_relaxed(volatile uint16* value) noexcept { return __atomic_load_n(value, __ATOMIC_RELAXED); } +FORCE_INLINE uint32 atomic_get_relaxed(volatile uint32* value) noexcept { return __atomic_load_n(value, __ATOMIC_RELAXED); } +FORCE_INLINE uint64 atomic_get_relaxed(volatile uint64* value) noexcept { return __atomic_load_n(value, __ATOMIC_RELAXED); } +FORCE_INLINE uint8 atomic_increment_relaxed(volatile uint8* value) noexcept { return __atomic_add_fetch(value, 1, __ATOMIC_RELAXED); } +FORCE_INLINE uint8 atomic_decrement_relaxed(volatile uint8* value) noexcept { return __atomic_sub_fetch(value, 1, __ATOMIC_RELAXED); } +FORCE_INLINE uint16 atomic_increment_relaxed(volatile uint16* value) noexcept { return __atomic_add_fetch(value, 1, __ATOMIC_RELAXED); } +FORCE_INLINE uint16 atomic_decrement_relaxed(volatile uint16* value) noexcept { return __atomic_sub_fetch(value, 1, __ATOMIC_RELAXED); } +FORCE_INLINE uint32 atomic_increment_relaxed(volatile uint32* value) noexcept { return __atomic_add_fetch(value, 1, __ATOMIC_RELAXED); } +FORCE_INLINE uint32 atomic_decrement_relaxed(volatile uint32* value) noexcept { return __atomic_sub_fetch(value, 1, __ATOMIC_RELAXED); } +FORCE_INLINE uint64 atomic_increment_relaxed(volatile uint64* value) noexcept { return __atomic_add_fetch(value, 1, __ATOMIC_RELAXED); } +FORCE_INLINE uint64 atomic_decrement_relaxed(volatile uint64* value) noexcept { return __atomic_sub_fetch(value, 1, __ATOMIC_RELAXED); } +FORCE_INLINE void atomic_add_relaxed(volatile uint8* value, uint8 increment) noexcept { __atomic_add_fetch(value, increment, __ATOMIC_RELAXED); } +FORCE_INLINE void atomic_sub_relaxed(volatile uint8* value, uint8 decrement) noexcept { __atomic_sub_fetch(value, decrement, __ATOMIC_RELAXED); } +FORCE_INLINE void atomic_add_relaxed(volatile uint16* value, uint16 increment) noexcept { __atomic_add_fetch(value, increment, __ATOMIC_RELAXED); } +FORCE_INLINE void atomic_sub_relaxed(volatile uint16* value, uint16 decrement) noexcept { __atomic_sub_fetch(value, decrement, __ATOMIC_RELAXED); } +FORCE_INLINE void atomic_add_relaxed(volatile uint32* value, uint32 increment) noexcept { __atomic_add_fetch(value, increment, __ATOMIC_RELAXED); } +FORCE_INLINE void atomic_sub_relaxed(volatile uint32* value, uint32 decrement) noexcept { __atomic_sub_fetch(value, decrement, __ATOMIC_RELAXED); } +FORCE_INLINE void atomic_add_relaxed(volatile uint64* value, uint64 increment) noexcept { __atomic_add_fetch(value, increment, __ATOMIC_RELAXED); } +FORCE_INLINE void atomic_sub_relaxed(volatile uint64* value, uint64 decrement) noexcept { __atomic_sub_fetch(value, decrement, __ATOMIC_RELAXED); } +FORCE_INLINE uint32 atomic_compare_exchange_weak_relaxed(volatile uint32* value, uint32* expected, uint32 desired) noexcept { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED); return *expected; } +FORCE_INLINE uint64 atomic_compare_exchange_weak_relaxed(volatile uint64* value, uint64* expected, uint64 desired) noexcept { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED); return *expected; } +FORCE_INLINE uint8 atomic_fetch_add_relaxed(volatile uint8* value, uint8 operand) noexcept { return __atomic_add_fetch(value, operand, __ATOMIC_RELAXED); } +FORCE_INLINE uint8 atomic_fetch_sub_relaxed(volatile uint8* value, uint8 operand) noexcept { return __atomic_sub_fetch(value, operand, __ATOMIC_RELAXED); } +FORCE_INLINE uint16 atomic_fetch_add_relaxed(volatile uint16* value, uint16 operand) noexcept { return __atomic_add_fetch(value, operand, __ATOMIC_RELAXED); } +FORCE_INLINE uint16 atomic_fetch_sub_relaxed(volatile uint16* value, uint16 operand) noexcept { return __atomic_sub_fetch(value, operand, __ATOMIC_RELAXED); } +FORCE_INLINE uint32 atomic_fetch_add_relaxed(volatile uint32* value, uint32 operand) noexcept { return __atomic_add_fetch(value, operand, __ATOMIC_RELAXED); } +FORCE_INLINE uint32 atomic_fetch_sub_relaxed(volatile uint32* value, uint32 operand) noexcept { return __atomic_sub_fetch(value, operand, __ATOMIC_RELAXED); } +FORCE_INLINE uint64 atomic_fetch_add_relaxed(volatile uint64* value, uint64 operand) noexcept { return __atomic_add_fetch(value, operand, __ATOMIC_RELAXED); } +FORCE_INLINE uint64 atomic_fetch_sub_relaxed(volatile uint64* value, uint64 operand) noexcept { return __atomic_sub_fetch(value, operand, __ATOMIC_RELAXED); } +FORCE_INLINE void atomic_and_relaxed(volatile uint8* value, uint8 mask) noexcept { __atomic_fetch_and(value, mask, __ATOMIC_RELAXED); } +FORCE_INLINE void atomic_and_relaxed(volatile int8* value, int8 mask) noexcept { __atomic_fetch_and(value, mask, __ATOMIC_RELAXED); } +FORCE_INLINE void atomic_and_relaxed(volatile uint16* value, uint16 mask) noexcept { __atomic_fetch_and(value, mask, __ATOMIC_RELAXED); } +FORCE_INLINE void atomic_and_relaxed(volatile int16* value, int16 mask) noexcept { __atomic_fetch_and(value, mask, __ATOMIC_RELAXED); } +FORCE_INLINE void atomic_and_relaxed(volatile uint32* value, uint32 mask) noexcept { __atomic_fetch_and(value, mask, __ATOMIC_RELAXED); } +FORCE_INLINE void atomic_and_relaxed(volatile int32* value, int32 mask) noexcept { __atomic_fetch_and(value, mask, __ATOMIC_RELAXED); } +FORCE_INLINE void atomic_and_relaxed(volatile uint64* value, uint64 mask) noexcept { __atomic_fetch_and(value, mask, __ATOMIC_RELAXED); } +FORCE_INLINE void atomic_and_relaxed(volatile int64* value, int64 mask) noexcept { __atomic_fetch_and(value, mask, __ATOMIC_RELAXED); } +FORCE_INLINE void atomic_or_relaxed(volatile uint8* value, uint8 mask) noexcept { __atomic_fetch_or(value, mask, __ATOMIC_RELAXED); } +FORCE_INLINE void atomic_or_relaxed(volatile int8* value, int8 mask) noexcept { __atomic_fetch_or(value, mask, __ATOMIC_RELAXED); } +FORCE_INLINE void atomic_or_relaxed(volatile uint16* value, uint16 mask) noexcept { __atomic_fetch_or(value, mask, __ATOMIC_RELAXED); } +FORCE_INLINE void atomic_or_relaxed(volatile int16* value, int16 mask) noexcept { __atomic_fetch_or(value, mask, __ATOMIC_RELAXED); } +FORCE_INLINE void atomic_or_relaxed(volatile uint32* value, uint32 mask) noexcept { __atomic_fetch_or(value, mask, __ATOMIC_RELAXED); } +FORCE_INLINE void atomic_or_relaxed(volatile int32* value, int32 mask) noexcept { __atomic_fetch_or(value, mask, __ATOMIC_RELAXED); } +FORCE_INLINE void atomic_or_relaxed(volatile uint64* value, uint64 mask) noexcept { __atomic_fetch_or(value, mask, __ATOMIC_RELAXED); } +FORCE_INLINE void atomic_or_relaxed(volatile int64* value, int64 mask) noexcept { __atomic_fetch_or(value, mask, __ATOMIC_RELAXED); } -FORCE_INLINE void atomic_set_relaxed(void** target, void* value) { __atomic_store_n(target, value, __ATOMIC_ACQUIRE); } -FORCE_INLINE void* atomic_get_relaxed(void** target) { return __atomic_load_n(target, __ATOMIC_ACQUIRE); } -FORCE_INLINE void atomic_set_relaxed(volatile int8* value, int8 new_value) { __atomic_store_n(value, new_value, __ATOMIC_ACQUIRE); } -FORCE_INLINE void atomic_set_relaxed(volatile int16* value, int16 new_value) { __atomic_store_n(value, new_value, __ATOMIC_ACQUIRE); } -FORCE_INLINE void atomic_set_relaxed(volatile int32* value, int32 new_value) { __atomic_store_n(value, new_value, __ATOMIC_ACQUIRE); } -FORCE_INLINE void atomic_set_relaxed(volatile int64* value, int64 new_value) { __atomic_store_n(value, new_value, __ATOMIC_ACQUIRE); } -FORCE_INLINE int8 atomic_fetch_set_relaxed(volatile int8* value, int8 new_value) { return __atomic_exchange_n(value, new_value, __ATOMIC_ACQUIRE); } -FORCE_INLINE int16 atomic_fetch_set_relaxed(volatile int16* value, int16 new_value) { return __atomic_exchange_n(value, new_value, __ATOMIC_ACQUIRE); } -FORCE_INLINE int32 atomic_fetch_set_relaxed(volatile int32* value, int32 new_value) { return __atomic_exchange_n(value, new_value, __ATOMIC_ACQUIRE); } -FORCE_INLINE int64 atomic_fetch_set_relaxed(volatile int64* value, int64 new_value) { return __atomic_exchange_n(value, new_value, __ATOMIC_ACQUIRE); } -FORCE_INLINE int8 atomic_get_relaxed(volatile int8* value) { return __atomic_load_n((int8 *) value, __ATOMIC_ACQUIRE); } -FORCE_INLINE int16 atomic_get_relaxed(volatile int16* value) { return __atomic_load_n((int16 *) value, __ATOMIC_ACQUIRE); } -FORCE_INLINE int32 atomic_get_relaxed(volatile int32* value) { return __atomic_load_n((int32 *) value, __ATOMIC_ACQUIRE); } -FORCE_INLINE int64 atomic_get_relaxed(volatile int64* value) { return __atomic_load_n((int64 *) value, __ATOMIC_ACQUIRE); } -FORCE_INLINE void atomic_increment_relaxed(volatile int8* value) { __atomic_add_fetch(value, 1, __ATOMIC_ACQUIRE); } -FORCE_INLINE void atomic_decrement_relaxed(volatile int8* value) { __atomic_sub_fetch(value, 1, __ATOMIC_ACQUIRE); } -FORCE_INLINE void atomic_increment_relaxed(volatile int16* value) { __atomic_add_fetch(value, 1, __ATOMIC_ACQUIRE); } -FORCE_INLINE void atomic_decrement_relaxed(volatile int16* value) { __atomic_sub_fetch(value, 1, __ATOMIC_ACQUIRE); } -FORCE_INLINE void atomic_increment_relaxed(volatile int32* value) { __atomic_add_fetch(value, 1, __ATOMIC_ACQUIRE); } -FORCE_INLINE void atomic_decrement_relaxed(volatile int32* value) { __atomic_sub_fetch(value, 1, __ATOMIC_ACQUIRE); } -FORCE_INLINE void atomic_increment_relaxed(volatile int64* value) { __atomic_add_fetch(value, 1, __ATOMIC_ACQUIRE); } -FORCE_INLINE void atomic_decrement_relaxed(volatile int64* value) { __atomic_sub_fetch(value, 1, __ATOMIC_ACQUIRE); } -FORCE_INLINE void atomic_add_relaxed(volatile int8* value, int8 increment) { __atomic_add_fetch(value, increment, __ATOMIC_ACQUIRE); } -FORCE_INLINE void atomic_sub_relaxed(volatile int8* value, int8 decrement) { __atomic_sub_fetch(value, decrement, __ATOMIC_ACQUIRE); } -FORCE_INLINE void atomic_add_relaxed(volatile int16* value, int16 increment) { __atomic_add_fetch(value, increment, __ATOMIC_ACQUIRE); } -FORCE_INLINE void atomic_sub_relaxed(volatile int16* value, int16 decrement) { __atomic_sub_fetch(value, decrement, __ATOMIC_ACQUIRE); } -FORCE_INLINE void atomic_add_relaxed(volatile int32* value, int32 increment) { __atomic_add_fetch(value, increment, __ATOMIC_ACQUIRE); } -FORCE_INLINE void atomic_sub_relaxed(volatile int32* value, int32 decrement) { __atomic_sub_fetch(value, decrement, __ATOMIC_ACQUIRE); } -FORCE_INLINE void atomic_add_relaxed(volatile int64* value, int64 increment) { __atomic_add_fetch(value, increment, __ATOMIC_ACQUIRE); } -FORCE_INLINE void atomic_sub_relaxed(volatile int64* value, int64 decrement) { __atomic_sub_fetch(value, decrement, __ATOMIC_ACQUIRE); } -FORCE_INLINE f32 atomic_compare_exchange_weak_relaxed(volatile f32* value, f32* expected, f32 desired) { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE); return *expected; } -FORCE_INLINE f64 atomic_compare_exchange_weak_relaxed(volatile f64* value, f64* expected, f64 desired) { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE); return *expected; } -FORCE_INLINE int32 atomic_compare_exchange_weak_relaxed(volatile int32* value, int32* expected, int32 desired) { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE); return *expected; } -FORCE_INLINE int64 atomic_compare_exchange_weak_relaxed(volatile int64* value, int64* expected, int64 desired) { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE); return *expected; } -FORCE_INLINE int8 atomic_fetch_add_relaxed(volatile int8* value, int8 operand) { return __atomic_add_fetch(value, operand, __ATOMIC_ACQUIRE); } -FORCE_INLINE int8 atomic_fetch_sub_relaxed(volatile int8* value, int8 operand) { return __atomic_sub_fetch(value, operand, __ATOMIC_ACQUIRE); } -FORCE_INLINE int16 atomic_fetch_add_relaxed(volatile int16* value, int16 operand) { return __atomic_add_fetch(value, operand, __ATOMIC_ACQUIRE); } -FORCE_INLINE int16 atomic_fetch_sub_relaxed(volatile int16* value, int16 operand) { return __atomic_sub_fetch(value, operand, __ATOMIC_ACQUIRE); } -FORCE_INLINE int32 atomic_fetch_add_relaxed(volatile int32* value, int32 operand) { return __atomic_add_fetch(value, operand, __ATOMIC_ACQUIRE); } -FORCE_INLINE int32 atomic_fetch_sub_relaxed(volatile int32* value, int32 operand) { return __atomic_sub_fetch(value, operand, __ATOMIC_ACQUIRE); } -FORCE_INLINE int64 atomic_fetch_add_relaxed(volatile int64* value, int64 operand) { return __atomic_add_fetch(value, operand, __ATOMIC_ACQUIRE); } -FORCE_INLINE int64 atomic_fetch_sub_relaxed(volatile int64* value, int64 operand) { return __atomic_sub_fetch(value, operand, __ATOMIC_ACQUIRE); } -FORCE_INLINE void atomic_set_relaxed(volatile uint8* value, uint8 new_value) { __atomic_store_n(value, new_value, __ATOMIC_ACQUIRE); } -FORCE_INLINE void atomic_set_relaxed(volatile uint16* value, uint16 new_value) { __atomic_store_n(value, new_value, __ATOMIC_ACQUIRE); } -FORCE_INLINE void atomic_set_relaxed(volatile uint32* value, uint32 new_value) { __atomic_store_n(value, new_value, __ATOMIC_ACQUIRE); } -FORCE_INLINE void atomic_set_relaxed(volatile uint64* value, uint64 new_value) { __atomic_store_n(value, new_value, __ATOMIC_ACQUIRE); } -FORCE_INLINE uint8 atomic_fetch_set_relaxed(volatile uint8* value, uint8 new_value) { return __atomic_exchange_n(value, new_value, __ATOMIC_ACQUIRE); } -FORCE_INLINE uint16 atomic_fetch_set_relaxed(volatile uint16* value, uint16 new_value) { return __atomic_exchange_n(value, new_value, __ATOMIC_ACQUIRE); } -FORCE_INLINE uint32 atomic_fetch_set_relaxed(volatile uint32* value, uint32 new_value) { return __atomic_exchange_n(value, new_value, __ATOMIC_ACQUIRE); } -FORCE_INLINE uint64 atomic_fetch_set_relaxed(volatile uint64* value, uint64 new_value) { return __atomic_exchange_n(value, new_value, __ATOMIC_ACQUIRE); } -FORCE_INLINE uint8 atomic_get_relaxed(volatile uint8* value) { return __atomic_load_n(value, __ATOMIC_ACQUIRE); } -FORCE_INLINE uint16 atomic_get_relaxed(volatile uint16* value) { return __atomic_load_n(value, __ATOMIC_ACQUIRE); } -FORCE_INLINE uint32 atomic_get_relaxed(volatile uint32* value) { return __atomic_load_n(value, __ATOMIC_ACQUIRE); } -FORCE_INLINE uint64 atomic_get_relaxed(volatile uint64* value) { return __atomic_load_n(value, __ATOMIC_ACQUIRE); } -FORCE_INLINE void atomic_increment_relaxed(volatile uint8* value) { __atomic_add_fetch(value, 1, __ATOMIC_ACQUIRE); } -FORCE_INLINE void atomic_decrement_relaxed(volatile uint8* value) { __atomic_sub_fetch(value, 1, __ATOMIC_ACQUIRE); } -FORCE_INLINE void atomic_increment_relaxed(volatile uint16* value) { __atomic_add_fetch(value, 1, __ATOMIC_ACQUIRE); } -FORCE_INLINE void atomic_decrement_relaxed(volatile uint16* value) { __atomic_sub_fetch(value, 1, __ATOMIC_ACQUIRE); } -FORCE_INLINE void atomic_increment_relaxed(volatile uint32* value) { __atomic_add_fetch(value, 1, __ATOMIC_ACQUIRE); } -FORCE_INLINE void atomic_decrement_relaxed(volatile uint32* value) { __atomic_sub_fetch(value, 1, __ATOMIC_ACQUIRE); } -FORCE_INLINE void atomic_increment_relaxed(volatile uint64* value) { __atomic_add_fetch(value, 1, __ATOMIC_ACQUIRE); } -FORCE_INLINE void atomic_decrement_relaxed(volatile uint64* value) { __atomic_sub_fetch(value, 1, __ATOMIC_ACQUIRE); } -FORCE_INLINE void atomic_add_relaxed(volatile uint8* value, uint8 increment) { __atomic_add_fetch(value, increment, __ATOMIC_ACQUIRE); } -FORCE_INLINE void atomic_sub_relaxed(volatile uint8* value, uint8 decrement) { __atomic_sub_fetch(value, decrement, __ATOMIC_ACQUIRE); } -FORCE_INLINE void atomic_add_relaxed(volatile uint16* value, uint16 increment) { __atomic_add_fetch(value, increment, __ATOMIC_ACQUIRE); } -FORCE_INLINE void atomic_sub_relaxed(volatile uint16* value, uint16 decrement) { __atomic_sub_fetch(value, decrement, __ATOMIC_ACQUIRE); } -FORCE_INLINE void atomic_add_relaxed(volatile uint32* value, uint32 increment) { __atomic_add_fetch(value, increment, __ATOMIC_ACQUIRE); } -FORCE_INLINE void atomic_sub_relaxed(volatile uint32* value, uint32 decrement) { __atomic_sub_fetch(value, decrement, __ATOMIC_ACQUIRE); } -FORCE_INLINE void atomic_add_relaxed(volatile uint64* value, uint64 increment) { __atomic_add_fetch(value, increment, __ATOMIC_ACQUIRE); } -FORCE_INLINE void atomic_sub_relaxed(volatile uint64* value, uint64 decrement) { __atomic_sub_fetch(value, decrement, __ATOMIC_ACQUIRE); } -FORCE_INLINE uint32 atomic_compare_exchange_weak_relaxed(volatile uint32* value, uint32* expected, uint32 desired) { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE); return *expected; } -FORCE_INLINE uint64 atomic_compare_exchange_weak_relaxed(volatile uint64* value, uint64* expected, uint64 desired) { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE); return *expected; } -FORCE_INLINE uint8 atomic_fetch_add_relaxed(volatile uint8* value, uint8 operand) { return __atomic_add_fetch(value, operand, __ATOMIC_ACQUIRE); } -FORCE_INLINE uint8 atomic_fetch_sub_relaxed(volatile uint8* value, uint8 operand) { return __atomic_sub_fetch(value, operand, __ATOMIC_ACQUIRE); } -FORCE_INLINE uint16 atomic_fetch_add_relaxed(volatile uint16* value, uint16 operand) { return __atomic_add_fetch(value, operand, __ATOMIC_ACQUIRE); } -FORCE_INLINE uint16 atomic_fetch_sub_relaxed(volatile uint16* value, uint16 operand) { return __atomic_sub_fetch(value, operand, __ATOMIC_ACQUIRE); } -FORCE_INLINE uint32 atomic_fetch_add_relaxed(volatile uint32* value, uint32 operand) { return __atomic_add_fetch(value, operand, __ATOMIC_ACQUIRE); } -FORCE_INLINE uint32 atomic_fetch_sub_relaxed(volatile uint32* value, uint32 operand) { return __atomic_sub_fetch(value, operand, __ATOMIC_ACQUIRE); } -FORCE_INLINE uint64 atomic_fetch_add_relaxed(volatile uint64* value, uint64 operand) { return __atomic_add_fetch(value, operand, __ATOMIC_ACQUIRE); } -FORCE_INLINE uint64 atomic_fetch_sub_relaxed(volatile uint64* value, uint64 operand) { return __atomic_sub_fetch(value, operand, __ATOMIC_ACQUIRE); } -FORCE_INLINE void atomic_and_relaxed(volatile uint8* value, uint8 mask) { __atomic_fetch_and(value, mask, __ATOMIC_ACQUIRE); } -FORCE_INLINE void atomic_and_relaxed(volatile int8* value, int8 mask) { __atomic_fetch_and(value, mask, __ATOMIC_ACQUIRE); } -FORCE_INLINE void atomic_and_relaxed(volatile uint16* value, uint16 mask) { __atomic_fetch_and(value, mask, __ATOMIC_ACQUIRE); } -FORCE_INLINE void atomic_and_relaxed(volatile int16* value, int16 mask) { __atomic_fetch_and(value, mask, __ATOMIC_ACQUIRE); } -FORCE_INLINE void atomic_and_relaxed(volatile uint32* value, uint32 mask) { __atomic_fetch_and(value, mask, __ATOMIC_ACQUIRE); } -FORCE_INLINE void atomic_and_relaxed(volatile int32* value, int32 mask) { __atomic_fetch_and(value, mask, __ATOMIC_ACQUIRE); } -FORCE_INLINE void atomic_and_relaxed(volatile uint64* value, uint64 mask) { __atomic_fetch_and(value, mask, __ATOMIC_ACQUIRE); } -FORCE_INLINE void atomic_and_relaxed(volatile int64* value, int64 mask) { __atomic_fetch_and(value, mask, __ATOMIC_ACQUIRE); } -FORCE_INLINE void atomic_or_relaxed(volatile uint8* value, uint8 mask) { __atomic_fetch_or(value, mask, __ATOMIC_ACQUIRE); } -FORCE_INLINE void atomic_or_relaxed(volatile int8* value, int8 mask) { __atomic_fetch_or(value, mask, __ATOMIC_ACQUIRE); } -FORCE_INLINE void atomic_or_relaxed(volatile uint16* value, uint16 mask) { __atomic_fetch_or(value, mask, __ATOMIC_ACQUIRE); } -FORCE_INLINE void atomic_or_relaxed(volatile int16* value, int16 mask) { __atomic_fetch_or(value, mask, __ATOMIC_ACQUIRE); } -FORCE_INLINE void atomic_or_relaxed(volatile uint32* value, uint32 mask) { __atomic_fetch_or(value, mask, __ATOMIC_ACQUIRE); } -FORCE_INLINE void atomic_or_relaxed(volatile int32* value, int32 mask) { __atomic_fetch_or(value, mask, __ATOMIC_ACQUIRE); } -FORCE_INLINE void atomic_or_relaxed(volatile uint64* value, uint64 mask) { __atomic_fetch_or(value, mask, __ATOMIC_ACQUIRE); } -FORCE_INLINE void atomic_or_relaxed(volatile int64* value, int64 mask) { __atomic_fetch_or(value, mask, __ATOMIC_ACQUIRE); } +FORCE_INLINE void atomic_set_acquire(void** target, void* value) noexcept { __atomic_store_n(target, value, __ATOMIC_ACQUIRE); } +FORCE_INLINE void* atomic_get_acquire(void** target) noexcept { return __atomic_load_n(target, __ATOMIC_ACQUIRE); } +FORCE_INLINE void atomic_set_acquire(volatile int8* value, int8 new_value) noexcept { __atomic_store_n(value, new_value, __ATOMIC_ACQUIRE); } +FORCE_INLINE void atomic_set_acquire(volatile int16* value, int16 new_value) noexcept { __atomic_store_n(value, new_value, __ATOMIC_ACQUIRE); } +FORCE_INLINE void atomic_set_acquire(volatile int32* value, int32 new_value) noexcept { __atomic_store_n(value, new_value, __ATOMIC_ACQUIRE); } +FORCE_INLINE void atomic_set_acquire(volatile int64* value, int64 new_value) noexcept { __atomic_store_n(value, new_value, __ATOMIC_ACQUIRE); } +FORCE_INLINE int8 atomic_fetch_set_acquire(volatile int8* value, int8 new_value) noexcept { return __atomic_exchange_n(value, new_value, __ATOMIC_ACQUIRE); } +FORCE_INLINE int16 atomic_fetch_set_acquire(volatile int16* value, int16 new_value) noexcept { return __atomic_exchange_n(value, new_value, __ATOMIC_ACQUIRE); } +FORCE_INLINE int32 atomic_fetch_set_acquire(volatile int32* value, int32 new_value) noexcept { return __atomic_exchange_n(value, new_value, __ATOMIC_ACQUIRE); } +FORCE_INLINE int64 atomic_fetch_set_acquire(volatile int64* value, int64 new_value) noexcept { return __atomic_exchange_n(value, new_value, __ATOMIC_ACQUIRE); } +FORCE_INLINE int8 atomic_get_acquire(volatile int8* value) noexcept { return __atomic_load_n((int8 *) value, __ATOMIC_ACQUIRE); } +FORCE_INLINE int16 atomic_get_acquire(volatile int16* value) noexcept { return __atomic_load_n((int16 *) value, __ATOMIC_ACQUIRE); } +FORCE_INLINE int32 atomic_get_acquire(volatile int32* value) noexcept { return __atomic_load_n((int32 *) value, __ATOMIC_ACQUIRE); } +FORCE_INLINE int64 atomic_get_acquire(volatile int64* value) noexcept { return __atomic_load_n((int64 *) value, __ATOMIC_ACQUIRE); } +FORCE_INLINE int8 atomic_increment_acquire(volatile int8* value) noexcept { return __atomic_add_fetch(value, 1, __ATOMIC_ACQUIRE); } +FORCE_INLINE int8 atomic_decrement_acquire(volatile int8* value) noexcept { return __atomic_sub_fetch(value, 1, __ATOMIC_ACQUIRE); } +FORCE_INLINE int16 atomic_increment_acquire(volatile int16* value) noexcept { return __atomic_add_fetch(value, 1, __ATOMIC_ACQUIRE); } +FORCE_INLINE int16 atomic_decrement_acquire(volatile int16* value) noexcept { return __atomic_sub_fetch(value, 1, __ATOMIC_ACQUIRE); } +FORCE_INLINE int32 atomic_increment_acquire(volatile int32* value) noexcept { return __atomic_add_fetch(value, 1, __ATOMIC_ACQUIRE); } +FORCE_INLINE int32 atomic_decrement_acquire(volatile int32* value) noexcept { return __atomic_sub_fetch(value, 1, __ATOMIC_ACQUIRE); } +FORCE_INLINE int64 atomic_increment_acquire(volatile int64* value) noexcept { return __atomic_add_fetch(value, 1, __ATOMIC_ACQUIRE); } +FORCE_INLINE int64 atomic_decrement_acquire(volatile int64* value) noexcept { return __atomic_sub_fetch(value, 1, __ATOMIC_ACQUIRE); } +FORCE_INLINE void atomic_add_acquire(volatile int8* value, int8 increment) noexcept { __atomic_add_fetch(value, increment, __ATOMIC_ACQUIRE); } +FORCE_INLINE void atomic_sub_acquire(volatile int8* value, int8 decrement) noexcept { __atomic_sub_fetch(value, decrement, __ATOMIC_ACQUIRE); } +FORCE_INLINE void atomic_add_acquire(volatile int16* value, int16 increment) noexcept { __atomic_add_fetch(value, increment, __ATOMIC_ACQUIRE); } +FORCE_INLINE void atomic_sub_acquire(volatile int16* value, int16 decrement) noexcept { __atomic_sub_fetch(value, decrement, __ATOMIC_ACQUIRE); } +FORCE_INLINE void atomic_add_acquire(volatile int32* value, int32 increment) noexcept { __atomic_add_fetch(value, increment, __ATOMIC_ACQUIRE); } +FORCE_INLINE void atomic_sub_acquire(volatile int32* value, int32 decrement) noexcept { __atomic_sub_fetch(value, decrement, __ATOMIC_ACQUIRE); } +FORCE_INLINE void atomic_add_acquire(volatile int64* value, int64 increment) noexcept { __atomic_add_fetch(value, increment, __ATOMIC_ACQUIRE); } +FORCE_INLINE void atomic_sub_acquire(volatile int64* value, int64 decrement) noexcept { __atomic_sub_fetch(value, decrement, __ATOMIC_ACQUIRE); } +FORCE_INLINE f32 atomic_compare_exchange_weak_acquire(volatile f32* value, f32* expected, f32 desired) noexcept { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE); return *expected; } +FORCE_INLINE f64 atomic_compare_exchange_weak_acquire(volatile f64* value, f64* expected, f64 desired) noexcept { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE); return *expected; } +FORCE_INLINE int32 atomic_compare_exchange_weak_acquire(volatile int32* value, int32* expected, int32 desired) noexcept { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE); return *expected; } +FORCE_INLINE int64 atomic_compare_exchange_weak_acquire(volatile int64* value, int64* expected, int64 desired) noexcept { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE); return *expected; } +FORCE_INLINE int8 atomic_fetch_add_acquire(volatile int8* value, int8 operand) noexcept { return __atomic_add_fetch(value, operand, __ATOMIC_ACQUIRE); } +FORCE_INLINE int8 atomic_fetch_sub_acquire(volatile int8* value, int8 operand) noexcept { return __atomic_sub_fetch(value, operand, __ATOMIC_ACQUIRE); } +FORCE_INLINE int16 atomic_fetch_add_acquire(volatile int16* value, int16 operand) noexcept { return __atomic_add_fetch(value, operand, __ATOMIC_ACQUIRE); } +FORCE_INLINE int16 atomic_fetch_sub_acquire(volatile int16* value, int16 operand) noexcept { return __atomic_sub_fetch(value, operand, __ATOMIC_ACQUIRE); } +FORCE_INLINE int32 atomic_fetch_add_acquire(volatile int32* value, int32 operand) noexcept { return __atomic_add_fetch(value, operand, __ATOMIC_ACQUIRE); } +FORCE_INLINE int32 atomic_fetch_sub_acquire(volatile int32* value, int32 operand) noexcept { return __atomic_sub_fetch(value, operand, __ATOMIC_ACQUIRE); } +FORCE_INLINE int64 atomic_fetch_add_acquire(volatile int64* value, int64 operand) noexcept { return __atomic_add_fetch(value, operand, __ATOMIC_ACQUIRE); } +FORCE_INLINE int64 atomic_fetch_sub_acquire(volatile int64* value, int64 operand) noexcept { return __atomic_sub_fetch(value, operand, __ATOMIC_ACQUIRE); } +FORCE_INLINE void atomic_set_acquire(volatile uint8* value, uint8 new_value) noexcept { __atomic_store_n(value, new_value, __ATOMIC_ACQUIRE); } +FORCE_INLINE void atomic_set_acquire(volatile uint16* value, uint16 new_value) noexcept { __atomic_store_n(value, new_value, __ATOMIC_ACQUIRE); } +FORCE_INLINE void atomic_set_acquire(volatile uint32* value, uint32 new_value) noexcept { __atomic_store_n(value, new_value, __ATOMIC_ACQUIRE); } +FORCE_INLINE void atomic_set_acquire(volatile uint64* value, uint64 new_value) noexcept { __atomic_store_n(value, new_value, __ATOMIC_ACQUIRE); } +FORCE_INLINE uint8 atomic_fetch_set_acquire(volatile uint8* value, uint8 new_value) noexcept { return __atomic_exchange_n(value, new_value, __ATOMIC_ACQUIRE); } +FORCE_INLINE uint16 atomic_fetch_set_acquire(volatile uint16* value, uint16 new_value) noexcept { return __atomic_exchange_n(value, new_value, __ATOMIC_ACQUIRE); } +FORCE_INLINE uint32 atomic_fetch_set_acquire(volatile uint32* value, uint32 new_value) noexcept { return __atomic_exchange_n(value, new_value, __ATOMIC_ACQUIRE); } +FORCE_INLINE uint64 atomic_fetch_set_acquire(volatile uint64* value, uint64 new_value) noexcept { return __atomic_exchange_n(value, new_value, __ATOMIC_ACQUIRE); } +FORCE_INLINE uint8 atomic_get_acquire(volatile uint8* value) noexcept { return __atomic_load_n(value, __ATOMIC_ACQUIRE); } +FORCE_INLINE uint16 atomic_get_acquire(volatile uint16* value) noexcept { return __atomic_load_n(value, __ATOMIC_ACQUIRE); } +FORCE_INLINE uint32 atomic_get_acquire(volatile uint32* value) noexcept { return __atomic_load_n(value, __ATOMIC_ACQUIRE); } +FORCE_INLINE uint64 atomic_get_acquire(volatile uint64* value) noexcept { return __atomic_load_n(value, __ATOMIC_ACQUIRE); } +FORCE_INLINE uint8 atomic_increment_acquire(volatile uint8* value) noexcept { return __atomic_add_fetch(value, 1, __ATOMIC_ACQUIRE); } +FORCE_INLINE uint8 atomic_decrement_acquire(volatile uint8* value) noexcept { return __atomic_sub_fetch(value, 1, __ATOMIC_ACQUIRE); } +FORCE_INLINE uint16 atomic_increment_acquire(volatile uint16* value) noexcept { return __atomic_add_fetch(value, 1, __ATOMIC_ACQUIRE); } +FORCE_INLINE uint16 atomic_decrement_acquire(volatile uint16* value) noexcept { return __atomic_sub_fetch(value, 1, __ATOMIC_ACQUIRE); } +FORCE_INLINE uint32 atomic_increment_acquire(volatile uint32* value) noexcept { return __atomic_add_fetch(value, 1, __ATOMIC_ACQUIRE); } +FORCE_INLINE uint32 atomic_decrement_acquire(volatile uint32* value) noexcept { return __atomic_sub_fetch(value, 1, __ATOMIC_ACQUIRE); } +FORCE_INLINE uint64 atomic_increment_acquire(volatile uint64* value) noexcept { return __atomic_add_fetch(value, 1, __ATOMIC_ACQUIRE); } +FORCE_INLINE uint64 atomic_decrement_acquire(volatile uint64* value) noexcept { return __atomic_sub_fetch(value, 1, __ATOMIC_ACQUIRE); } +FORCE_INLINE void atomic_add_acquire(volatile uint8* value, uint8 increment) noexcept { __atomic_add_fetch(value, increment, __ATOMIC_ACQUIRE); } +FORCE_INLINE void atomic_sub_acquire(volatile uint8* value, uint8 decrement) noexcept { __atomic_sub_fetch(value, decrement, __ATOMIC_ACQUIRE); } +FORCE_INLINE void atomic_add_acquire(volatile uint16* value, uint16 increment) noexcept { __atomic_add_fetch(value, increment, __ATOMIC_ACQUIRE); } +FORCE_INLINE void atomic_sub_acquire(volatile uint16* value, uint16 decrement) noexcept { __atomic_sub_fetch(value, decrement, __ATOMIC_ACQUIRE); } +FORCE_INLINE void atomic_add_acquire(volatile uint32* value, uint32 increment) noexcept { __atomic_add_fetch(value, increment, __ATOMIC_ACQUIRE); } +FORCE_INLINE void atomic_sub_acquire(volatile uint32* value, uint32 decrement) noexcept { __atomic_sub_fetch(value, decrement, __ATOMIC_ACQUIRE); } +FORCE_INLINE void atomic_add_acquire(volatile uint64* value, uint64 increment) noexcept { __atomic_add_fetch(value, increment, __ATOMIC_ACQUIRE); } +FORCE_INLINE void atomic_sub_acquire(volatile uint64* value, uint64 decrement) noexcept { __atomic_sub_fetch(value, decrement, __ATOMIC_ACQUIRE); } +FORCE_INLINE uint32 atomic_compare_exchange_weak_acquire(volatile uint32* value, uint32* expected, uint32 desired) noexcept { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE); return *expected; } +FORCE_INLINE uint64 atomic_compare_exchange_weak_acquire(volatile uint64* value, uint64* expected, uint64 desired) noexcept { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE); return *expected; } +FORCE_INLINE uint8 atomic_fetch_add_acquire(volatile uint8* value, uint8 operand) noexcept { return __atomic_add_fetch(value, operand, __ATOMIC_ACQUIRE); } +FORCE_INLINE uint8 atomic_fetch_sub_acquire(volatile uint8* value, uint8 operand) noexcept { return __atomic_sub_fetch(value, operand, __ATOMIC_ACQUIRE); } +FORCE_INLINE uint16 atomic_fetch_add_acquire(volatile uint16* value, uint16 operand) noexcept { return __atomic_add_fetch(value, operand, __ATOMIC_ACQUIRE); } +FORCE_INLINE uint16 atomic_fetch_sub_acquire(volatile uint16* value, uint16 operand) noexcept { return __atomic_sub_fetch(value, operand, __ATOMIC_ACQUIRE); } +FORCE_INLINE uint32 atomic_fetch_add_acquire(volatile uint32* value, uint32 operand) noexcept { return __atomic_add_fetch(value, operand, __ATOMIC_ACQUIRE); } +FORCE_INLINE uint32 atomic_fetch_sub_acquire(volatile uint32* value, uint32 operand) noexcept { return __atomic_sub_fetch(value, operand, __ATOMIC_ACQUIRE); } +FORCE_INLINE uint64 atomic_fetch_add_acquire(volatile uint64* value, uint64 operand) noexcept { return __atomic_add_fetch(value, operand, __ATOMIC_ACQUIRE); } +FORCE_INLINE uint64 atomic_fetch_sub_acquire(volatile uint64* value, uint64 operand) noexcept { return __atomic_sub_fetch(value, operand, __ATOMIC_ACQUIRE); } +FORCE_INLINE void atomic_and_acquire(volatile uint8* value, uint8 mask) noexcept { __atomic_fetch_and(value, mask, __ATOMIC_ACQUIRE); } +FORCE_INLINE void atomic_and_acquire(volatile int8* value, int8 mask) noexcept { __atomic_fetch_and(value, mask, __ATOMIC_ACQUIRE); } +FORCE_INLINE void atomic_and_acquire(volatile uint16* value, uint16 mask) noexcept { __atomic_fetch_and(value, mask, __ATOMIC_ACQUIRE); } +FORCE_INLINE void atomic_and_acquire(volatile int16* value, int16 mask) noexcept { __atomic_fetch_and(value, mask, __ATOMIC_ACQUIRE); } +FORCE_INLINE void atomic_and_acquire(volatile uint32* value, uint32 mask) noexcept { __atomic_fetch_and(value, mask, __ATOMIC_ACQUIRE); } +FORCE_INLINE void atomic_and_acquire(volatile int32* value, int32 mask) noexcept { __atomic_fetch_and(value, mask, __ATOMIC_ACQUIRE); } +FORCE_INLINE void atomic_and_acquire(volatile uint64* value, uint64 mask) noexcept { __atomic_fetch_and(value, mask, __ATOMIC_ACQUIRE); } +FORCE_INLINE void atomic_and_acquire(volatile int64* value, int64 mask) noexcept { __atomic_fetch_and(value, mask, __ATOMIC_ACQUIRE); } +FORCE_INLINE void atomic_or_acquire(volatile uint8* value, uint8 mask) noexcept { __atomic_fetch_or(value, mask, __ATOMIC_ACQUIRE); } +FORCE_INLINE void atomic_or_acquire(volatile int8* value, int8 mask) noexcept { __atomic_fetch_or(value, mask, __ATOMIC_ACQUIRE); } +FORCE_INLINE void atomic_or_acquire(volatile uint16* value, uint16 mask) noexcept { __atomic_fetch_or(value, mask, __ATOMIC_ACQUIRE); } +FORCE_INLINE void atomic_or_acquire(volatile int16* value, int16 mask) noexcept { __atomic_fetch_or(value, mask, __ATOMIC_ACQUIRE); } +FORCE_INLINE void atomic_or_acquire(volatile uint32* value, uint32 mask) noexcept { __atomic_fetch_or(value, mask, __ATOMIC_ACQUIRE); } +FORCE_INLINE void atomic_or_acquire(volatile int32* value, int32 mask) noexcept { __atomic_fetch_or(value, mask, __ATOMIC_ACQUIRE); } +FORCE_INLINE void atomic_or_acquire(volatile uint64* value, uint64 mask) noexcept { __atomic_fetch_or(value, mask, __ATOMIC_ACQUIRE); } +FORCE_INLINE void atomic_or_acquire(volatile int64* value, int64 mask) noexcept { __atomic_fetch_or(value, mask, __ATOMIC_ACQUIRE); } -FORCE_INLINE void atomic_set_relaxed(void** target, void* value) { __atomic_store_n(target, value, __ATOMIC_RELEASE); } -FORCE_INLINE void* atomic_get_relaxed(void** target) { return __atomic_load_n(target, __ATOMIC_RELEASE); } -FORCE_INLINE void atomic_set_relaxed(volatile int8* value, int8 new_value) { __atomic_store_n(value, new_value, __ATOMIC_RELEASE); } -FORCE_INLINE void atomic_set_relaxed(volatile int16* value, int16 new_value) { __atomic_store_n(value, new_value, __ATOMIC_RELEASE); } -FORCE_INLINE void atomic_set_relaxed(volatile int32* value, int32 new_value) { __atomic_store_n(value, new_value, __ATOMIC_RELEASE); } -FORCE_INLINE void atomic_set_relaxed(volatile int64* value, int64 new_value) { __atomic_store_n(value, new_value, __ATOMIC_RELEASE); } -FORCE_INLINE int8 atomic_fetch_set_relaxed(volatile int8* value, int8 new_value) { return __atomic_exchange_n(value, new_value, __ATOMIC_RELEASE); } -FORCE_INLINE int16 atomic_fetch_set_relaxed(volatile int16* value, int16 new_value) { return __atomic_exchange_n(value, new_value, __ATOMIC_RELEASE); } -FORCE_INLINE int32 atomic_fetch_set_relaxed(volatile int32* value, int32 new_value) { return __atomic_exchange_n(value, new_value, __ATOMIC_RELEASE); } -FORCE_INLINE int64 atomic_fetch_set_relaxed(volatile int64* value, int64 new_value) { return __atomic_exchange_n(value, new_value, __ATOMIC_RELEASE); } -FORCE_INLINE int8 atomic_get_relaxed(volatile int8* value) { return __atomic_load_n((int8 *) value, __ATOMIC_RELEASE); } -FORCE_INLINE int16 atomic_get_relaxed(volatile int16* value) { return __atomic_load_n((int16 *) value, __ATOMIC_RELEASE); } -FORCE_INLINE int32 atomic_get_relaxed(volatile int32* value) { return __atomic_load_n((int32 *) value, __ATOMIC_RELEASE); } -FORCE_INLINE int64 atomic_get_relaxed(volatile int64* value) { return __atomic_load_n((int64 *) value, __ATOMIC_RELEASE); } -FORCE_INLINE void atomic_increment_relaxed(volatile int8* value) { __atomic_add_fetch(value, 1, __ATOMIC_RELEASE); } -FORCE_INLINE void atomic_decrement_relaxed(volatile int8* value) { __atomic_sub_fetch(value, 1, __ATOMIC_RELEASE); } -FORCE_INLINE void atomic_increment_relaxed(volatile int16* value) { __atomic_add_fetch(value, 1, __ATOMIC_RELEASE); } -FORCE_INLINE void atomic_decrement_relaxed(volatile int16* value) { __atomic_sub_fetch(value, 1, __ATOMIC_RELEASE); } -FORCE_INLINE void atomic_increment_relaxed(volatile int32* value) { __atomic_add_fetch(value, 1, __ATOMIC_RELEASE); } -FORCE_INLINE void atomic_decrement_relaxed(volatile int32* value) { __atomic_sub_fetch(value, 1, __ATOMIC_RELEASE); } -FORCE_INLINE void atomic_increment_relaxed(volatile int64* value) { __atomic_add_fetch(value, 1, __ATOMIC_RELEASE); } -FORCE_INLINE void atomic_decrement_relaxed(volatile int64* value) { __atomic_sub_fetch(value, 1, __ATOMIC_RELEASE); } -FORCE_INLINE void atomic_add_relaxed(volatile int8* value, int8 increment) { __atomic_add_fetch(value, increment, __ATOMIC_RELEASE); } -FORCE_INLINE void atomic_sub_relaxed(volatile int8* value, int8 decrement) { __atomic_sub_fetch(value, decrement, __ATOMIC_RELEASE); } -FORCE_INLINE void atomic_add_relaxed(volatile int16* value, int16 increment) { __atomic_add_fetch(value, increment, __ATOMIC_RELEASE); } -FORCE_INLINE void atomic_sub_relaxed(volatile int16* value, int16 decrement) { __atomic_sub_fetch(value, decrement, __ATOMIC_RELEASE); } -FORCE_INLINE void atomic_add_relaxed(volatile int32* value, int32 increment) { __atomic_add_fetch(value, increment, __ATOMIC_RELEASE); } -FORCE_INLINE void atomic_sub_relaxed(volatile int32* value, int32 decrement) { __atomic_sub_fetch(value, decrement, __ATOMIC_RELEASE); } -FORCE_INLINE void atomic_add_relaxed(volatile int64* value, int64 increment) { __atomic_add_fetch(value, increment, __ATOMIC_RELEASE); } -FORCE_INLINE void atomic_sub_relaxed(volatile int64* value, int64 decrement) { __atomic_sub_fetch(value, decrement, __ATOMIC_RELEASE); } -FORCE_INLINE f32 atomic_compare_exchange_weak_relaxed(volatile f32* value, f32* expected, f32 desired) { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_RELEASE, __ATOMIC_RELEASE); return *expected; } -FORCE_INLINE f64 atomic_compare_exchange_weak_relaxed(volatile f64* value, f64* expected, f64 desired) { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_RELEASE, __ATOMIC_RELEASE); return *expected; } -FORCE_INLINE int32 atomic_compare_exchange_weak_relaxed(volatile int32* value, int32* expected, int32 desired) { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_RELEASE, __ATOMIC_RELEASE); return *expected; } -FORCE_INLINE int64 atomic_compare_exchange_weak_relaxed(volatile int64* value, int64* expected, int64 desired) { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_RELEASE, __ATOMIC_RELEASE); return *expected; } -FORCE_INLINE int8 atomic_fetch_add_relaxed(volatile int8* value, int8 operand) { return __atomic_add_fetch(value, operand, __ATOMIC_RELEASE); } -FORCE_INLINE int8 atomic_fetch_sub_relaxed(volatile int8* value, int8 operand) { return __atomic_sub_fetch(value, operand, __ATOMIC_RELEASE); } -FORCE_INLINE int16 atomic_fetch_add_relaxed(volatile int16* value, int16 operand) { return __atomic_add_fetch(value, operand, __ATOMIC_RELEASE); } -FORCE_INLINE int16 atomic_fetch_sub_relaxed(volatile int16* value, int16 operand) { return __atomic_sub_fetch(value, operand, __ATOMIC_RELEASE); } -FORCE_INLINE int32 atomic_fetch_add_relaxed(volatile int32* value, int32 operand) { return __atomic_add_fetch(value, operand, __ATOMIC_RELEASE); } -FORCE_INLINE int32 atomic_fetch_sub_relaxed(volatile int32* value, int32 operand) { return __atomic_sub_fetch(value, operand, __ATOMIC_RELEASE); } -FORCE_INLINE int64 atomic_fetch_add_relaxed(volatile int64* value, int64 operand) { return __atomic_add_fetch(value, operand, __ATOMIC_RELEASE); } -FORCE_INLINE int64 atomic_fetch_sub_relaxed(volatile int64* value, int64 operand) { return __atomic_sub_fetch(value, operand, __ATOMIC_RELEASE); } -FORCE_INLINE void atomic_set_relaxed(volatile uint8* value, uint8 new_value) { __atomic_store_n(value, new_value, __ATOMIC_RELEASE); } -FORCE_INLINE void atomic_set_relaxed(volatile uint16* value, uint16 new_value) { __atomic_store_n(value, new_value, __ATOMIC_RELEASE); } -FORCE_INLINE void atomic_set_relaxed(volatile uint32* value, uint32 new_value) { __atomic_store_n(value, new_value, __ATOMIC_RELEASE); } -FORCE_INLINE void atomic_set_relaxed(volatile uint64* value, uint64 new_value) { __atomic_store_n(value, new_value, __ATOMIC_RELEASE); } -FORCE_INLINE uint8 atomic_fetch_set_relaxed(volatile uint8* value, uint8 new_value) { return __atomic_exchange_n(value, new_value, __ATOMIC_RELEASE); } -FORCE_INLINE uint16 atomic_fetch_set_relaxed(volatile uint16* value, uint16 new_value) { return __atomic_exchange_n(value, new_value, __ATOMIC_RELEASE); } -FORCE_INLINE uint32 atomic_fetch_set_relaxed(volatile uint32* value, uint32 new_value) { return __atomic_exchange_n(value, new_value, __ATOMIC_RELEASE); } -FORCE_INLINE uint64 atomic_fetch_set_relaxed(volatile uint64* value, uint64 new_value) { return __atomic_exchange_n(value, new_value, __ATOMIC_RELEASE); } -FORCE_INLINE uint8 atomic_get_relaxed(volatile uint8* value) { return __atomic_load_n(value, __ATOMIC_RELEASE); } -FORCE_INLINE uint16 atomic_get_relaxed(volatile uint16* value) { return __atomic_load_n(value, __ATOMIC_RELEASE); } -FORCE_INLINE uint32 atomic_get_relaxed(volatile uint32* value) { return __atomic_load_n(value, __ATOMIC_RELEASE); } -FORCE_INLINE uint64 atomic_get_relaxed(volatile uint64* value) { return __atomic_load_n(value, __ATOMIC_RELEASE); } -FORCE_INLINE void atomic_increment_relaxed(volatile uint8* value) { __atomic_add_fetch(value, 1, __ATOMIC_RELEASE); } -FORCE_INLINE void atomic_decrement_relaxed(volatile uint8* value) { __atomic_sub_fetch(value, 1, __ATOMIC_RELEASE); } -FORCE_INLINE void atomic_increment_relaxed(volatile uint16* value) { __atomic_add_fetch(value, 1, __ATOMIC_RELEASE); } -FORCE_INLINE void atomic_decrement_relaxed(volatile uint16* value) { __atomic_sub_fetch(value, 1, __ATOMIC_RELEASE); } -FORCE_INLINE void atomic_increment_relaxed(volatile uint32* value) { __atomic_add_fetch(value, 1, __ATOMIC_RELEASE); } -FORCE_INLINE void atomic_decrement_relaxed(volatile uint32* value) { __atomic_sub_fetch(value, 1, __ATOMIC_RELEASE); } -FORCE_INLINE void atomic_increment_relaxed(volatile uint64* value) { __atomic_add_fetch(value, 1, __ATOMIC_RELEASE); } -FORCE_INLINE void atomic_decrement_relaxed(volatile uint64* value) { __atomic_sub_fetch(value, 1, __ATOMIC_RELEASE); } -FORCE_INLINE void atomic_add_relaxed(volatile uint8* value, uint8 increment) { __atomic_add_fetch(value, increment, __ATOMIC_RELEASE); } -FORCE_INLINE void atomic_sub_relaxed(volatile uint8* value, uint8 decrement) { __atomic_sub_fetch(value, decrement, __ATOMIC_RELEASE); } -FORCE_INLINE void atomic_add_relaxed(volatile uint16* value, uint16 increment) { __atomic_add_fetch(value, increment, __ATOMIC_RELEASE); } -FORCE_INLINE void atomic_sub_relaxed(volatile uint16* value, uint16 decrement) { __atomic_sub_fetch(value, decrement, __ATOMIC_RELEASE); } -FORCE_INLINE void atomic_add_relaxed(volatile uint32* value, uint32 increment) { __atomic_add_fetch(value, increment, __ATOMIC_RELEASE); } -FORCE_INLINE void atomic_sub_relaxed(volatile uint32* value, uint32 decrement) { __atomic_sub_fetch(value, decrement, __ATOMIC_RELEASE); } -FORCE_INLINE void atomic_add_relaxed(volatile uint64* value, uint64 increment) { __atomic_add_fetch(value, increment, __ATOMIC_RELEASE); } -FORCE_INLINE void atomic_sub_relaxed(volatile uint64* value, uint64 decrement) { __atomic_sub_fetch(value, decrement, __ATOMIC_RELEASE); } -FORCE_INLINE uint32 atomic_compare_exchange_weak_relaxed(volatile uint32* value, uint32* expected, uint32 desired) { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_RELEASE, __ATOMIC_RELEASE); return *expected; } -FORCE_INLINE uint64 atomic_compare_exchange_weak_relaxed(volatile uint64* value, uint64* expected, uint64 desired) { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_RELEASE, __ATOMIC_RELEASE); return *expected; } -FORCE_INLINE uint8 atomic_fetch_add_relaxed(volatile uint8* value, uint8 operand) { return __atomic_add_fetch(value, operand, __ATOMIC_RELEASE); } -FORCE_INLINE uint8 atomic_fetch_sub_relaxed(volatile uint8* value, uint8 operand) { return __atomic_sub_fetch(value, operand, __ATOMIC_RELEASE); } -FORCE_INLINE uint16 atomic_fetch_add_relaxed(volatile uint16* value, uint16 operand) { return __atomic_add_fetch(value, operand, __ATOMIC_RELEASE); } -FORCE_INLINE uint16 atomic_fetch_sub_relaxed(volatile uint16* value, uint16 operand) { return __atomic_sub_fetch(value, operand, __ATOMIC_RELEASE); } -FORCE_INLINE uint32 atomic_fetch_add_relaxed(volatile uint32* value, uint32 operand) { return __atomic_add_fetch(value, operand, __ATOMIC_RELEASE); } -FORCE_INLINE uint32 atomic_fetch_sub_relaxed(volatile uint32* value, uint32 operand) { return __atomic_sub_fetch(value, operand, __ATOMIC_RELEASE); } -FORCE_INLINE uint64 atomic_fetch_add_relaxed(volatile uint64* value, uint64 operand) { return __atomic_add_fetch(value, operand, __ATOMIC_RELEASE); } -FORCE_INLINE uint64 atomic_fetch_sub_relaxed(volatile uint64* value, uint64 operand) { return __atomic_sub_fetch(value, operand, __ATOMIC_RELEASE); } -FORCE_INLINE void atomic_and_relaxed(volatile uint8* value, uint8 mask) { __atomic_fetch_and(value, mask, __ATOMIC_RELEASE); } -FORCE_INLINE void atomic_and_relaxed(volatile int8* value, int8 mask) { __atomic_fetch_and(value, mask, __ATOMIC_RELEASE); } -FORCE_INLINE void atomic_and_relaxed(volatile uint16* value, uint16 mask) { __atomic_fetch_and(value, mask, __ATOMIC_RELEASE); } -FORCE_INLINE void atomic_and_relaxed(volatile int16* value, int16 mask) { __atomic_fetch_and(value, mask, __ATOMIC_RELEASE); } -FORCE_INLINE void atomic_and_relaxed(volatile uint32* value, uint32 mask) { __atomic_fetch_and(value, mask, __ATOMIC_RELEASE); } -FORCE_INLINE void atomic_and_relaxed(volatile int32* value, int32 mask) { __atomic_fetch_and(value, mask, __ATOMIC_RELEASE); } -FORCE_INLINE void atomic_and_relaxed(volatile uint64* value, uint64 mask) { __atomic_fetch_and(value, mask, __ATOMIC_RELEASE); } -FORCE_INLINE void atomic_and_relaxed(volatile int64* value, int64 mask) { __atomic_fetch_and(value, mask, __ATOMIC_RELEASE); } -FORCE_INLINE void atomic_or_relaxed(volatile uint8* value, uint8 mask) { __atomic_fetch_or(value, mask, __ATOMIC_RELEASE); } -FORCE_INLINE void atomic_or_relaxed(volatile int8* value, int8 mask) { __atomic_fetch_or(value, mask, __ATOMIC_RELEASE); } -FORCE_INLINE void atomic_or_relaxed(volatile uint16* value, uint16 mask) { __atomic_fetch_or(value, mask, __ATOMIC_RELEASE); } -FORCE_INLINE void atomic_or_relaxed(volatile int16* value, int16 mask) { __atomic_fetch_or(value, mask, __ATOMIC_RELEASE); } -FORCE_INLINE void atomic_or_relaxed(volatile uint32* value, uint32 mask) { __atomic_fetch_or(value, mask, __ATOMIC_RELEASE); } -FORCE_INLINE void atomic_or_relaxed(volatile int32* value, int32 mask) { __atomic_fetch_or(value, mask, __ATOMIC_RELEASE); } -FORCE_INLINE void atomic_or_relaxed(volatile uint64* value, uint64 mask) { __atomic_fetch_or(value, mask, __ATOMIC_RELEASE); } -FORCE_INLINE void atomic_or_relaxed(volatile int64* value, int64 mask) { __atomic_fetch_or(value, mask, __ATOMIC_RELEASE); } +FORCE_INLINE void atomic_set_release(void** target, void* value) noexcept { __atomic_store_n(target, value, __ATOMIC_RELEASE); } +FORCE_INLINE void* atomic_get_release(void** target) noexcept { return __atomic_load_n(target, __ATOMIC_RELEASE); } +FORCE_INLINE void atomic_set_release(volatile int8* value, int8 new_value) noexcept { __atomic_store_n(value, new_value, __ATOMIC_RELEASE); } +FORCE_INLINE void atomic_set_release(volatile int16* value, int16 new_value) noexcept { __atomic_store_n(value, new_value, __ATOMIC_RELEASE); } +FORCE_INLINE void atomic_set_release(volatile int32* value, int32 new_value) noexcept { __atomic_store_n(value, new_value, __ATOMIC_RELEASE); } +FORCE_INLINE void atomic_set_release(volatile int64* value, int64 new_value) noexcept { __atomic_store_n(value, new_value, __ATOMIC_RELEASE); } +FORCE_INLINE int8 atomic_fetch_set_release(volatile int8* value, int8 new_value) noexcept { return __atomic_exchange_n(value, new_value, __ATOMIC_RELEASE); } +FORCE_INLINE int16 atomic_fetch_set_release(volatile int16* value, int16 new_value) noexcept { return __atomic_exchange_n(value, new_value, __ATOMIC_RELEASE); } +FORCE_INLINE int32 atomic_fetch_set_release(volatile int32* value, int32 new_value) noexcept { return __atomic_exchange_n(value, new_value, __ATOMIC_RELEASE); } +FORCE_INLINE int64 atomic_fetch_set_release(volatile int64* value, int64 new_value) noexcept { return __atomic_exchange_n(value, new_value, __ATOMIC_RELEASE); } +FORCE_INLINE int8 atomic_get_release(volatile int8* value) noexcept { return __atomic_load_n((int8 *) value, __ATOMIC_RELEASE); } +FORCE_INLINE int16 atomic_get_release(volatile int16* value) noexcept { return __atomic_load_n((int16 *) value, __ATOMIC_RELEASE); } +FORCE_INLINE int32 atomic_get_release(volatile int32* value) noexcept { return __atomic_load_n((int32 *) value, __ATOMIC_RELEASE); } +FORCE_INLINE int64 atomic_get_release(volatile int64* value) noexcept { return __atomic_load_n((int64 *) value, __ATOMIC_RELEASE); } +FORCE_INLINE int8 atomic_increment_release(volatile int8* value) noexcept { return __atomic_add_fetch(value, 1, __ATOMIC_RELEASE); } +FORCE_INLINE int8 atomic_decrement_release(volatile int8* value) noexcept { return __atomic_sub_fetch(value, 1, __ATOMIC_RELEASE); } +FORCE_INLINE int16 atomic_increment_release(volatile int16* value) noexcept { return __atomic_add_fetch(value, 1, __ATOMIC_RELEASE); } +FORCE_INLINE int16 atomic_decrement_release(volatile int16* value) noexcept { return __atomic_sub_fetch(value, 1, __ATOMIC_RELEASE); } +FORCE_INLINE int32 atomic_increment_release(volatile int32* value) noexcept { return __atomic_add_fetch(value, 1, __ATOMIC_RELEASE); } +FORCE_INLINE int32 atomic_decrement_release(volatile int32* value) noexcept { return __atomic_sub_fetch(value, 1, __ATOMIC_RELEASE); } +FORCE_INLINE int64 atomic_increment_release(volatile int64* value) noexcept { return __atomic_add_fetch(value, 1, __ATOMIC_RELEASE); } +FORCE_INLINE int64 atomic_decrement_release(volatile int64* value) noexcept { return __atomic_sub_fetch(value, 1, __ATOMIC_RELEASE); } +FORCE_INLINE void atomic_add_release(volatile int8* value, int8 increment) noexcept { __atomic_add_fetch(value, increment, __ATOMIC_RELEASE); } +FORCE_INLINE void atomic_sub_release(volatile int8* value, int8 decrement) noexcept { __atomic_sub_fetch(value, decrement, __ATOMIC_RELEASE); } +FORCE_INLINE void atomic_add_release(volatile int16* value, int16 increment) noexcept { __atomic_add_fetch(value, increment, __ATOMIC_RELEASE); } +FORCE_INLINE void atomic_sub_release(volatile int16* value, int16 decrement) noexcept { __atomic_sub_fetch(value, decrement, __ATOMIC_RELEASE); } +FORCE_INLINE void atomic_add_release(volatile int32* value, int32 increment) noexcept { __atomic_add_fetch(value, increment, __ATOMIC_RELEASE); } +FORCE_INLINE void atomic_sub_release(volatile int32* value, int32 decrement) noexcept { __atomic_sub_fetch(value, decrement, __ATOMIC_RELEASE); } +FORCE_INLINE void atomic_add_release(volatile int64* value, int64 increment) noexcept { __atomic_add_fetch(value, increment, __ATOMIC_RELEASE); } +FORCE_INLINE void atomic_sub_release(volatile int64* value, int64 decrement) noexcept { __atomic_sub_fetch(value, decrement, __ATOMIC_RELEASE); } +FORCE_INLINE f32 atomic_compare_exchange_weak_release(volatile f32* value, f32* expected, f32 desired) noexcept { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_RELEASE, __ATOMIC_RELEASE); return *expected; } +FORCE_INLINE f64 atomic_compare_exchange_weak_release(volatile f64* value, f64* expected, f64 desired) noexcept { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_RELEASE, __ATOMIC_RELEASE); return *expected; } +FORCE_INLINE int32 atomic_compare_exchange_weak_release(volatile int32* value, int32* expected, int32 desired) noexcept { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_RELEASE, __ATOMIC_RELEASE); return *expected; } +FORCE_INLINE int64 atomic_compare_exchange_weak_release(volatile int64* value, int64* expected, int64 desired) noexcept { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_RELEASE, __ATOMIC_RELEASE); return *expected; } +FORCE_INLINE int8 atomic_fetch_add_release(volatile int8* value, int8 operand) noexcept { return __atomic_add_fetch(value, operand, __ATOMIC_RELEASE); } +FORCE_INLINE int8 atomic_fetch_sub_release(volatile int8* value, int8 operand) noexcept { return __atomic_sub_fetch(value, operand, __ATOMIC_RELEASE); } +FORCE_INLINE int16 atomic_fetch_add_release(volatile int16* value, int16 operand) noexcept { return __atomic_add_fetch(value, operand, __ATOMIC_RELEASE); } +FORCE_INLINE int16 atomic_fetch_sub_release(volatile int16* value, int16 operand) noexcept { return __atomic_sub_fetch(value, operand, __ATOMIC_RELEASE); } +FORCE_INLINE int32 atomic_fetch_add_release(volatile int32* value, int32 operand) noexcept { return __atomic_add_fetch(value, operand, __ATOMIC_RELEASE); } +FORCE_INLINE int32 atomic_fetch_sub_release(volatile int32* value, int32 operand) noexcept { return __atomic_sub_fetch(value, operand, __ATOMIC_RELEASE); } +FORCE_INLINE int64 atomic_fetch_add_release(volatile int64* value, int64 operand) noexcept { return __atomic_add_fetch(value, operand, __ATOMIC_RELEASE); } +FORCE_INLINE int64 atomic_fetch_sub_release(volatile int64* value, int64 operand) noexcept { return __atomic_sub_fetch(value, operand, __ATOMIC_RELEASE); } +FORCE_INLINE void atomic_set_release(volatile uint8* value, uint8 new_value) noexcept { __atomic_store_n(value, new_value, __ATOMIC_RELEASE); } +FORCE_INLINE void atomic_set_release(volatile uint16* value, uint16 new_value) noexcept { __atomic_store_n(value, new_value, __ATOMIC_RELEASE); } +FORCE_INLINE void atomic_set_release(volatile uint32* value, uint32 new_value) noexcept { __atomic_store_n(value, new_value, __ATOMIC_RELEASE); } +FORCE_INLINE void atomic_set_release(volatile uint64* value, uint64 new_value) noexcept { __atomic_store_n(value, new_value, __ATOMIC_RELEASE); } +FORCE_INLINE uint8 atomic_fetch_set_release(volatile uint8* value, uint8 new_value) noexcept { return __atomic_exchange_n(value, new_value, __ATOMIC_RELEASE); } +FORCE_INLINE uint16 atomic_fetch_set_release(volatile uint16* value, uint16 new_value) noexcept { return __atomic_exchange_n(value, new_value, __ATOMIC_RELEASE); } +FORCE_INLINE uint32 atomic_fetch_set_release(volatile uint32* value, uint32 new_value) noexcept { return __atomic_exchange_n(value, new_value, __ATOMIC_RELEASE); } +FORCE_INLINE uint64 atomic_fetch_set_release(volatile uint64* value, uint64 new_value) noexcept { return __atomic_exchange_n(value, new_value, __ATOMIC_RELEASE); } +FORCE_INLINE uint8 atomic_get_release(volatile uint8* value) noexcept { return __atomic_load_n(value, __ATOMIC_RELEASE); } +FORCE_INLINE uint16 atomic_get_release(volatile uint16* value) noexcept { return __atomic_load_n(value, __ATOMIC_RELEASE); } +FORCE_INLINE uint32 atomic_get_release(volatile uint32* value) noexcept { return __atomic_load_n(value, __ATOMIC_RELEASE); } +FORCE_INLINE uint64 atomic_get_release(volatile uint64* value) noexcept { return __atomic_load_n(value, __ATOMIC_RELEASE); } +FORCE_INLINE uint8 atomic_increment_release(volatile uint8* value) noexcept { return __atomic_add_fetch(value, 1, __ATOMIC_RELEASE); } +FORCE_INLINE uint8 atomic_decrement_release(volatile uint8* value) noexcept { return __atomic_sub_fetch(value, 1, __ATOMIC_RELEASE); } +FORCE_INLINE uint16 atomic_increment_release(volatile uint16* value) noexcept { return __atomic_add_fetch(value, 1, __ATOMIC_RELEASE); } +FORCE_INLINE uint16 atomic_decrement_release(volatile uint16* value) noexcept { return __atomic_sub_fetch(value, 1, __ATOMIC_RELEASE); } +FORCE_INLINE uint32 atomic_increment_release(volatile uint32* value) noexcept { return __atomic_add_fetch(value, 1, __ATOMIC_RELEASE); } +FORCE_INLINE uint32 atomic_decrement_release(volatile uint32* value) noexcept { return __atomic_sub_fetch(value, 1, __ATOMIC_RELEASE); } +FORCE_INLINE uint64 atomic_increment_release(volatile uint64* value) noexcept { return __atomic_add_fetch(value, 1, __ATOMIC_RELEASE); } +FORCE_INLINE uint64 atomic_decrement_release(volatile uint64* value) noexcept { return __atomic_sub_fetch(value, 1, __ATOMIC_RELEASE); } +FORCE_INLINE void atomic_add_release(volatile uint8* value, uint8 increment) noexcept { __atomic_add_fetch(value, increment, __ATOMIC_RELEASE); } +FORCE_INLINE void atomic_sub_release(volatile uint8* value, uint8 decrement) noexcept { __atomic_sub_fetch(value, decrement, __ATOMIC_RELEASE); } +FORCE_INLINE void atomic_add_release(volatile uint16* value, uint16 increment) noexcept { __atomic_add_fetch(value, increment, __ATOMIC_RELEASE); } +FORCE_INLINE void atomic_sub_release(volatile uint16* value, uint16 decrement) noexcept { __atomic_sub_fetch(value, decrement, __ATOMIC_RELEASE); } +FORCE_INLINE void atomic_add_release(volatile uint32* value, uint32 increment) noexcept { __atomic_add_fetch(value, increment, __ATOMIC_RELEASE); } +FORCE_INLINE void atomic_sub_release(volatile uint32* value, uint32 decrement) noexcept { __atomic_sub_fetch(value, decrement, __ATOMIC_RELEASE); } +FORCE_INLINE void atomic_add_release(volatile uint64* value, uint64 increment) noexcept { __atomic_add_fetch(value, increment, __ATOMIC_RELEASE); } +FORCE_INLINE void atomic_sub_release(volatile uint64* value, uint64 decrement) noexcept { __atomic_sub_fetch(value, decrement, __ATOMIC_RELEASE); } +FORCE_INLINE uint32 atomic_compare_exchange_weak_release(volatile uint32* value, uint32* expected, uint32 desired) noexcept { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_RELEASE, __ATOMIC_RELEASE); return *expected; } +FORCE_INLINE uint64 atomic_compare_exchange_weak_release(volatile uint64* value, uint64* expected, uint64 desired) noexcept { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_RELEASE, __ATOMIC_RELEASE); return *expected; } +FORCE_INLINE uint8 atomic_fetch_add_release(volatile uint8* value, uint8 operand) noexcept { return __atomic_add_fetch(value, operand, __ATOMIC_RELEASE); } +FORCE_INLINE uint8 atomic_fetch_sub_release(volatile uint8* value, uint8 operand) noexcept { return __atomic_sub_fetch(value, operand, __ATOMIC_RELEASE); } +FORCE_INLINE uint16 atomic_fetch_add_release(volatile uint16* value, uint16 operand) noexcept { return __atomic_add_fetch(value, operand, __ATOMIC_RELEASE); } +FORCE_INLINE uint16 atomic_fetch_sub_release(volatile uint16* value, uint16 operand) noexcept { return __atomic_sub_fetch(value, operand, __ATOMIC_RELEASE); } +FORCE_INLINE uint32 atomic_fetch_add_release(volatile uint32* value, uint32 operand) noexcept { return __atomic_add_fetch(value, operand, __ATOMIC_RELEASE); } +FORCE_INLINE uint32 atomic_fetch_sub_release(volatile uint32* value, uint32 operand) noexcept { return __atomic_sub_fetch(value, operand, __ATOMIC_RELEASE); } +FORCE_INLINE uint64 atomic_fetch_add_release(volatile uint64* value, uint64 operand) noexcept { return __atomic_add_fetch(value, operand, __ATOMIC_RELEASE); } +FORCE_INLINE uint64 atomic_fetch_sub_release(volatile uint64* value, uint64 operand) noexcept { return __atomic_sub_fetch(value, operand, __ATOMIC_RELEASE); } +FORCE_INLINE void atomic_and_release(volatile uint8* value, uint8 mask) noexcept { __atomic_fetch_and(value, mask, __ATOMIC_RELEASE); } +FORCE_INLINE void atomic_and_release(volatile int8* value, int8 mask) noexcept { __atomic_fetch_and(value, mask, __ATOMIC_RELEASE); } +FORCE_INLINE void atomic_and_release(volatile uint16* value, uint16 mask) noexcept { __atomic_fetch_and(value, mask, __ATOMIC_RELEASE); } +FORCE_INLINE void atomic_and_release(volatile int16* value, int16 mask) noexcept { __atomic_fetch_and(value, mask, __ATOMIC_RELEASE); } +FORCE_INLINE void atomic_and_release(volatile uint32* value, uint32 mask) noexcept { __atomic_fetch_and(value, mask, __ATOMIC_RELEASE); } +FORCE_INLINE void atomic_and_release(volatile int32* value, int32 mask) noexcept { __atomic_fetch_and(value, mask, __ATOMIC_RELEASE); } +FORCE_INLINE void atomic_and_release(volatile uint64* value, uint64 mask) noexcept { __atomic_fetch_and(value, mask, __ATOMIC_RELEASE); } +FORCE_INLINE void atomic_and_release(volatile int64* value, int64 mask) noexcept { __atomic_fetch_and(value, mask, __ATOMIC_RELEASE); } +FORCE_INLINE void atomic_or_release(volatile uint8* value, uint8 mask) noexcept { __atomic_fetch_or(value, mask, __ATOMIC_RELEASE); } +FORCE_INLINE void atomic_or_release(volatile int8* value, int8 mask) noexcept { __atomic_fetch_or(value, mask, __ATOMIC_RELEASE); } +FORCE_INLINE void atomic_or_release(volatile uint16* value, uint16 mask) noexcept { __atomic_fetch_or(value, mask, __ATOMIC_RELEASE); } +FORCE_INLINE void atomic_or_release(volatile int16* value, int16 mask) noexcept { __atomic_fetch_or(value, mask, __ATOMIC_RELEASE); } +FORCE_INLINE void atomic_or_release(volatile uint32* value, uint32 mask) noexcept { __atomic_fetch_or(value, mask, __ATOMIC_RELEASE); } +FORCE_INLINE void atomic_or_release(volatile int32* value, int32 mask) noexcept { __atomic_fetch_or(value, mask, __ATOMIC_RELEASE); } +FORCE_INLINE void atomic_or_release(volatile uint64* value, uint64 mask) noexcept { __atomic_fetch_or(value, mask, __ATOMIC_RELEASE); } +FORCE_INLINE void atomic_or_release(volatile int64* value, int64 mask) noexcept { __atomic_fetch_or(value, mask, __ATOMIC_RELEASE); } -FORCE_INLINE void atomic_set_relaxed(void** target, void* value) { __atomic_store_n(target, value, __ATOMIC_SEQ_CST); } -FORCE_INLINE void* atomic_get_relaxed(void** target) { return __atomic_load_n(target, __ATOMIC_SEQ_CST); } -FORCE_INLINE void atomic_set_relaxed(volatile int8* value, int8 new_value) { __atomic_store_n(value, new_value, __ATOMIC_SEQ_CST); } -FORCE_INLINE void atomic_set_relaxed(volatile int16* value, int16 new_value) { __atomic_store_n(value, new_value, __ATOMIC_SEQ_CST); } -FORCE_INLINE void atomic_set_relaxed(volatile int32* value, int32 new_value) { __atomic_store_n(value, new_value, __ATOMIC_SEQ_CST); } -FORCE_INLINE void atomic_set_relaxed(volatile int64* value, int64 new_value) { __atomic_store_n(value, new_value, __ATOMIC_SEQ_CST); } -FORCE_INLINE int8 atomic_fetch_set_relaxed(volatile int8* value, int8 new_value) { return __atomic_exchange_n(value, new_value, __ATOMIC_SEQ_CST); } -FORCE_INLINE int16 atomic_fetch_set_relaxed(volatile int16* value, int16 new_value) { return __atomic_exchange_n(value, new_value, __ATOMIC_SEQ_CST); } -FORCE_INLINE int32 atomic_fetch_set_relaxed(volatile int32* value, int32 new_value) { return __atomic_exchange_n(value, new_value, __ATOMIC_SEQ_CST); } -FORCE_INLINE int64 atomic_fetch_set_relaxed(volatile int64* value, int64 new_value) { return __atomic_exchange_n(value, new_value, __ATOMIC_SEQ_CST); } -FORCE_INLINE int8 atomic_get_relaxed(volatile int8* value) { return __atomic_load_n((int8 *) value, __ATOMIC_SEQ_CST); } -FORCE_INLINE int16 atomic_get_relaxed(volatile int16* value) { return __atomic_load_n((int16 *) value, __ATOMIC_SEQ_CST); } -FORCE_INLINE int32 atomic_get_relaxed(volatile int32* value) { return __atomic_load_n((int32 *) value, __ATOMIC_SEQ_CST); } -FORCE_INLINE int64 atomic_get_relaxed(volatile int64* value) { return __atomic_load_n((int64 *) value, __ATOMIC_SEQ_CST); } -FORCE_INLINE void atomic_increment_relaxed(volatile int8* value) { __atomic_add_fetch(value, 1, __ATOMIC_SEQ_CST); } -FORCE_INLINE void atomic_decrement_relaxed(volatile int8* value) { __atomic_sub_fetch(value, 1, __ATOMIC_SEQ_CST); } -FORCE_INLINE void atomic_increment_relaxed(volatile int16* value) { __atomic_add_fetch(value, 1, __ATOMIC_SEQ_CST); } -FORCE_INLINE void atomic_decrement_relaxed(volatile int16* value) { __atomic_sub_fetch(value, 1, __ATOMIC_SEQ_CST); } -FORCE_INLINE void atomic_increment_relaxed(volatile int32* value) { __atomic_add_fetch(value, 1, __ATOMIC_SEQ_CST); } -FORCE_INLINE void atomic_decrement_relaxed(volatile int32* value) { __atomic_sub_fetch(value, 1, __ATOMIC_SEQ_CST); } -FORCE_INLINE void atomic_increment_relaxed(volatile int64* value) { __atomic_add_fetch(value, 1, __ATOMIC_SEQ_CST); } -FORCE_INLINE void atomic_decrement_relaxed(volatile int64* value) { __atomic_sub_fetch(value, 1, __ATOMIC_SEQ_CST); } -FORCE_INLINE void atomic_add_relaxed(volatile int8* value, int8 increment) { __atomic_add_fetch(value, increment, __ATOMIC_SEQ_CST); } -FORCE_INLINE void atomic_sub_relaxed(volatile int8* value, int8 decrement) { __atomic_sub_fetch(value, decrement, __ATOMIC_SEQ_CST); } -FORCE_INLINE void atomic_add_relaxed(volatile int16* value, int16 increment) { __atomic_add_fetch(value, increment, __ATOMIC_SEQ_CST); } -FORCE_INLINE void atomic_sub_relaxed(volatile int16* value, int16 decrement) { __atomic_sub_fetch(value, decrement, __ATOMIC_SEQ_CST); } -FORCE_INLINE void atomic_add_relaxed(volatile int32* value, int32 increment) { __atomic_add_fetch(value, increment, __ATOMIC_SEQ_CST); } -FORCE_INLINE void atomic_sub_relaxed(volatile int32* value, int32 decrement) { __atomic_sub_fetch(value, decrement, __ATOMIC_SEQ_CST); } -FORCE_INLINE void atomic_add_relaxed(volatile int64* value, int64 increment) { __atomic_add_fetch(value, increment, __ATOMIC_SEQ_CST); } -FORCE_INLINE void atomic_sub_relaxed(volatile int64* value, int64 decrement) { __atomic_sub_fetch(value, decrement, __ATOMIC_SEQ_CST); } -FORCE_INLINE f32 atomic_compare_exchange_weak_relaxed(volatile f32* value, f32* expected, f32 desired) { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); return *expected; } -FORCE_INLINE f64 atomic_compare_exchange_weak_relaxed(volatile f64* value, f64* expected, f64 desired) { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); return *expected; } -FORCE_INLINE int32 atomic_compare_exchange_weak_relaxed(volatile int32* value, int32* expected, int32 desired) { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); return *expected; } -FORCE_INLINE int64 atomic_compare_exchange_weak_relaxed(volatile int64* value, int64* expected, int64 desired) { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); return *expected; } -FORCE_INLINE int8 atomic_fetch_add_relaxed(volatile int8* value, int8 operand) { return __atomic_add_fetch(value, operand, __ATOMIC_SEQ_CST); } -FORCE_INLINE int8 atomic_fetch_sub_relaxed(volatile int8* value, int8 operand) { return __atomic_sub_fetch(value, operand, __ATOMIC_SEQ_CST); } -FORCE_INLINE int16 atomic_fetch_add_relaxed(volatile int16* value, int16 operand) { return __atomic_add_fetch(value, operand, __ATOMIC_SEQ_CST); } -FORCE_INLINE int16 atomic_fetch_sub_relaxed(volatile int16* value, int16 operand) { return __atomic_sub_fetch(value, operand, __ATOMIC_SEQ_CST); } -FORCE_INLINE int32 atomic_fetch_add_relaxed(volatile int32* value, int32 operand) { return __atomic_add_fetch(value, operand, __ATOMIC_SEQ_CST); } -FORCE_INLINE int32 atomic_fetch_sub_relaxed(volatile int32* value, int32 operand) { return __atomic_sub_fetch(value, operand, __ATOMIC_SEQ_CST); } -FORCE_INLINE int64 atomic_fetch_add_relaxed(volatile int64* value, int64 operand) { return __atomic_add_fetch(value, operand, __ATOMIC_SEQ_CST); } -FORCE_INLINE int64 atomic_fetch_sub_relaxed(volatile int64* value, int64 operand) { return __atomic_sub_fetch(value, operand, __ATOMIC_SEQ_CST); } -FORCE_INLINE void atomic_set_relaxed(volatile uint8* value, uint8 new_value) { __atomic_store_n(value, new_value, __ATOMIC_SEQ_CST); } -FORCE_INLINE void atomic_set_relaxed(volatile uint16* value, uint16 new_value) { __atomic_store_n(value, new_value, __ATOMIC_SEQ_CST); } -FORCE_INLINE void atomic_set_relaxed(volatile uint32* value, uint32 new_value) { __atomic_store_n(value, new_value, __ATOMIC_SEQ_CST); } -FORCE_INLINE void atomic_set_relaxed(volatile uint64* value, uint64 new_value) { __atomic_store_n(value, new_value, __ATOMIC_SEQ_CST); } -FORCE_INLINE uint8 atomic_fetch_set_relaxed(volatile uint8* value, uint8 new_value) { return __atomic_exchange_n(value, new_value, __ATOMIC_SEQ_CST); } -FORCE_INLINE uint16 atomic_fetch_set_relaxed(volatile uint16* value, uint16 new_value) { return __atomic_exchange_n(value, new_value, __ATOMIC_SEQ_CST); } -FORCE_INLINE uint32 atomic_fetch_set_relaxed(volatile uint32* value, uint32 new_value) { return __atomic_exchange_n(value, new_value, __ATOMIC_SEQ_CST); } -FORCE_INLINE uint64 atomic_fetch_set_relaxed(volatile uint64* value, uint64 new_value) { return __atomic_exchange_n(value, new_value, __ATOMIC_SEQ_CST); } -FORCE_INLINE uint8 atomic_get_relaxed(volatile uint8* value) { return __atomic_load_n(value, __ATOMIC_SEQ_CST); } -FORCE_INLINE uint16 atomic_get_relaxed(volatile uint16* value) { return __atomic_load_n(value, __ATOMIC_SEQ_CST); } -FORCE_INLINE uint32 atomic_get_relaxed(volatile uint32* value) { return __atomic_load_n(value, __ATOMIC_SEQ_CST); } -FORCE_INLINE uint64 atomic_get_relaxed(volatile uint64* value) { return __atomic_load_n(value, __ATOMIC_SEQ_CST); } -FORCE_INLINE void atomic_increment_relaxed(volatile uint8* value) { __atomic_add_fetch(value, 1, __ATOMIC_SEQ_CST); } -FORCE_INLINE void atomic_decrement_relaxed(volatile uint8* value) { __atomic_sub_fetch(value, 1, __ATOMIC_SEQ_CST); } -FORCE_INLINE void atomic_increment_relaxed(volatile uint16* value) { __atomic_add_fetch(value, 1, __ATOMIC_SEQ_CST); } -FORCE_INLINE void atomic_decrement_relaxed(volatile uint16* value) { __atomic_sub_fetch(value, 1, __ATOMIC_SEQ_CST); } -FORCE_INLINE void atomic_increment_relaxed(volatile uint32* value) { __atomic_add_fetch(value, 1, __ATOMIC_SEQ_CST); } -FORCE_INLINE void atomic_decrement_relaxed(volatile uint32* value) { __atomic_sub_fetch(value, 1, __ATOMIC_SEQ_CST); } -FORCE_INLINE void atomic_increment_relaxed(volatile uint64* value) { __atomic_add_fetch(value, 1, __ATOMIC_SEQ_CST); } -FORCE_INLINE void atomic_decrement_relaxed(volatile uint64* value) { __atomic_sub_fetch(value, 1, __ATOMIC_SEQ_CST); } -FORCE_INLINE void atomic_add_relaxed(volatile uint8* value, uint8 increment) { __atomic_add_fetch(value, increment, __ATOMIC_SEQ_CST); } -FORCE_INLINE void atomic_sub_relaxed(volatile uint8* value, uint8 decrement) { __atomic_sub_fetch(value, decrement, __ATOMIC_SEQ_CST); } -FORCE_INLINE void atomic_add_relaxed(volatile uint16* value, uint16 increment) { __atomic_add_fetch(value, increment, __ATOMIC_SEQ_CST); } -FORCE_INLINE void atomic_sub_relaxed(volatile uint16* value, uint16 decrement) { __atomic_sub_fetch(value, decrement, __ATOMIC_SEQ_CST); } -FORCE_INLINE void atomic_add_relaxed(volatile uint32* value, uint32 increment) { __atomic_add_fetch(value, increment, __ATOMIC_SEQ_CST); } -FORCE_INLINE void atomic_sub_relaxed(volatile uint32* value, uint32 decrement) { __atomic_sub_fetch(value, decrement, __ATOMIC_SEQ_CST); } -FORCE_INLINE void atomic_add_relaxed(volatile uint64* value, uint64 increment) { __atomic_add_fetch(value, increment, __ATOMIC_SEQ_CST); } -FORCE_INLINE void atomic_sub_relaxed(volatile uint64* value, uint64 decrement) { __atomic_sub_fetch(value, decrement, __ATOMIC_SEQ_CST); } -FORCE_INLINE uint32 atomic_compare_exchange_weak_relaxed(volatile uint32* value, uint32* expected, uint32 desired) { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); return *expected; } -FORCE_INLINE uint64 atomic_compare_exchange_weak_relaxed(volatile uint64* value, uint64* expected, uint64 desired) { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); return *expected; } -FORCE_INLINE uint8 atomic_fetch_add_relaxed(volatile uint8* value, uint8 operand) { return __atomic_add_fetch(value, operand, __ATOMIC_SEQ_CST); } -FORCE_INLINE uint8 atomic_fetch_sub_relaxed(volatile uint8* value, uint8 operand) { return __atomic_sub_fetch(value, operand, __ATOMIC_SEQ_CST); } -FORCE_INLINE uint16 atomic_fetch_add_relaxed(volatile uint16* value, uint16 operand) { return __atomic_add_fetch(value, operand, __ATOMIC_SEQ_CST); } -FORCE_INLINE uint16 atomic_fetch_sub_relaxed(volatile uint16* value, uint16 operand) { return __atomic_sub_fetch(value, operand, __ATOMIC_SEQ_CST); } -FORCE_INLINE uint32 atomic_fetch_add_relaxed(volatile uint32* value, uint32 operand) { return __atomic_add_fetch(value, operand, __ATOMIC_SEQ_CST); } -FORCE_INLINE uint32 atomic_fetch_sub_relaxed(volatile uint32* value, uint32 operand) { return __atomic_sub_fetch(value, operand, __ATOMIC_SEQ_CST); } -FORCE_INLINE uint64 atomic_fetch_add_relaxed(volatile uint64* value, uint64 operand) { return __atomic_add_fetch(value, operand, __ATOMIC_SEQ_CST); } -FORCE_INLINE uint64 atomic_fetch_sub_relaxed(volatile uint64* value, uint64 operand) { return __atomic_sub_fetch(value, operand, __ATOMIC_SEQ_CST); } -FORCE_INLINE void atomic_and_relaxed(volatile uint8* value, uint8 mask) { __atomic_fetch_and(value, mask, __ATOMIC_SEQ_CST); } -FORCE_INLINE void atomic_and_relaxed(volatile int8* value, int8 mask) { __atomic_fetch_and(value, mask, __ATOMIC_SEQ_CST); } -FORCE_INLINE void atomic_and_relaxed(volatile uint16* value, uint16 mask) { __atomic_fetch_and(value, mask, __ATOMIC_SEQ_CST); } -FORCE_INLINE void atomic_and_relaxed(volatile int16* value, int16 mask) { __atomic_fetch_and(value, mask, __ATOMIC_SEQ_CST); } -FORCE_INLINE void atomic_and_relaxed(volatile uint32* value, uint32 mask) { __atomic_fetch_and(value, mask, __ATOMIC_SEQ_CST); } -FORCE_INLINE void atomic_and_relaxed(volatile int32* value, int32 mask) { __atomic_fetch_and(value, mask, __ATOMIC_SEQ_CST); } -FORCE_INLINE void atomic_and_relaxed(volatile uint64* value, uint64 mask) { __atomic_fetch_and(value, mask, __ATOMIC_SEQ_CST); } -FORCE_INLINE void atomic_and_relaxed(volatile int64* value, int64 mask) { __atomic_fetch_and(value, mask, __ATOMIC_SEQ_CST); } -FORCE_INLINE void atomic_or_relaxed(volatile uint8* value, uint8 mask) { __atomic_fetch_or(value, mask, __ATOMIC_SEQ_CST); } -FORCE_INLINE void atomic_or_relaxed(volatile int8* value, int8 mask) { __atomic_fetch_or(value, mask, __ATOMIC_SEQ_CST); } -FORCE_INLINE void atomic_or_relaxed(volatile uint16* value, uint16 mask) { __atomic_fetch_or(value, mask, __ATOMIC_SEQ_CST); } -FORCE_INLINE void atomic_or_relaxed(volatile int16* value, int16 mask) { __atomic_fetch_or(value, mask, __ATOMIC_SEQ_CST); } -FORCE_INLINE void atomic_or_relaxed(volatile uint32* value, uint32 mask) { __atomic_fetch_or(value, mask, __ATOMIC_SEQ_CST); } -FORCE_INLINE void atomic_or_relaxed(volatile int32* value, int32 mask) { __atomic_fetch_or(value, mask, __ATOMIC_SEQ_CST); } -FORCE_INLINE void atomic_or_relaxed(volatile uint64* value, uint64 mask) { __atomic_fetch_or(value, mask, __ATOMIC_SEQ_CST); } -FORCE_INLINE void atomic_or_relaxed(volatile int64* value, int64 mask) { __atomic_fetch_or(value, mask, __ATOMIC_SEQ_CST); } +FORCE_INLINE void atomic_set_acquire_release(void** target, void* value) noexcept { __atomic_store_n(target, value, __ATOMIC_SEQ_CST); } +FORCE_INLINE void* atomic_get_acquire_release(void** target) noexcept { return __atomic_load_n(target, __ATOMIC_SEQ_CST); } +FORCE_INLINE void atomic_set_acquire_release(volatile int8* value, int8 new_value) noexcept { __atomic_store_n(value, new_value, __ATOMIC_SEQ_CST); } +FORCE_INLINE void atomic_set_acquire_release(volatile int16* value, int16 new_value) noexcept { __atomic_store_n(value, new_value, __ATOMIC_SEQ_CST); } +FORCE_INLINE void atomic_set_acquire_release(volatile int32* value, int32 new_value) noexcept { __atomic_store_n(value, new_value, __ATOMIC_SEQ_CST); } +FORCE_INLINE void atomic_set_acquire_release(volatile int64* value, int64 new_value) noexcept { __atomic_store_n(value, new_value, __ATOMIC_SEQ_CST); } +FORCE_INLINE int8 atomic_fetch_set_acquire_release(volatile int8* value, int8 new_value) noexcept { return __atomic_exchange_n(value, new_value, __ATOMIC_SEQ_CST); } +FORCE_INLINE int16 atomic_fetch_set_acquire_release(volatile int16* value, int16 new_value) noexcept { return __atomic_exchange_n(value, new_value, __ATOMIC_SEQ_CST); } +FORCE_INLINE int32 atomic_fetch_set_acquire_release(volatile int32* value, int32 new_value) noexcept { return __atomic_exchange_n(value, new_value, __ATOMIC_SEQ_CST); } +FORCE_INLINE int64 atomic_fetch_set_acquire_release(volatile int64* value, int64 new_value) noexcept { return __atomic_exchange_n(value, new_value, __ATOMIC_SEQ_CST); } +FORCE_INLINE int8 atomic_get_acquire_release(volatile int8* value) noexcept { return __atomic_load_n((int8 *) value, __ATOMIC_SEQ_CST); } +FORCE_INLINE int16 atomic_get_acquire_release(volatile int16* value) noexcept { return __atomic_load_n((int16 *) value, __ATOMIC_SEQ_CST); } +FORCE_INLINE int32 atomic_get_acquire_release(volatile int32* value) noexcept { return __atomic_load_n((int32 *) value, __ATOMIC_SEQ_CST); } +FORCE_INLINE int64 atomic_get_acquire_release(volatile int64* value) noexcept { return __atomic_load_n((int64 *) value, __ATOMIC_SEQ_CST); } +FORCE_INLINE int8 atomic_increment_acquire_release(volatile int8* value) noexcept { return __atomic_add_fetch(value, 1, __ATOMIC_SEQ_CST); } +FORCE_INLINE int8 atomic_decrement_acquire_release(volatile int8* value) noexcept { return __atomic_sub_fetch(value, 1, __ATOMIC_SEQ_CST); } +FORCE_INLINE int16 atomic_increment_acquire_release(volatile int16* value) noexcept { return __atomic_add_fetch(value, 1, __ATOMIC_SEQ_CST); } +FORCE_INLINE int16 atomic_decrement_acquire_release(volatile int16* value) noexcept { return __atomic_sub_fetch(value, 1, __ATOMIC_SEQ_CST); } +FORCE_INLINE int32 atomic_increment_acquire_release(volatile int32* value) noexcept { return __atomic_add_fetch(value, 1, __ATOMIC_SEQ_CST); } +FORCE_INLINE int32 atomic_decrement_acquire_release(volatile int32* value) noexcept { return __atomic_sub_fetch(value, 1, __ATOMIC_SEQ_CST); } +FORCE_INLINE int64 atomic_increment_acquire_release(volatile int64* value) noexcept { return __atomic_add_fetch(value, 1, __ATOMIC_SEQ_CST); } +FORCE_INLINE int64 atomic_decrement_acquire_release(volatile int64* value) noexcept { return __atomic_sub_fetch(value, 1, __ATOMIC_SEQ_CST); } +FORCE_INLINE void atomic_add_acquire_release(volatile int8* value, int8 increment) noexcept { __atomic_add_fetch(value, increment, __ATOMIC_SEQ_CST); } +FORCE_INLINE void atomic_sub_acquire_release(volatile int8* value, int8 decrement) noexcept { __atomic_sub_fetch(value, decrement, __ATOMIC_SEQ_CST); } +FORCE_INLINE void atomic_add_acquire_release(volatile int16* value, int16 increment) noexcept { __atomic_add_fetch(value, increment, __ATOMIC_SEQ_CST); } +FORCE_INLINE void atomic_sub_acquire_release(volatile int16* value, int16 decrement) noexcept { __atomic_sub_fetch(value, decrement, __ATOMIC_SEQ_CST); } +FORCE_INLINE void atomic_add_acquire_release(volatile int32* value, int32 increment) noexcept { __atomic_add_fetch(value, increment, __ATOMIC_SEQ_CST); } +FORCE_INLINE void atomic_sub_acquire_release(volatile int32* value, int32 decrement) noexcept { __atomic_sub_fetch(value, decrement, __ATOMIC_SEQ_CST); } +FORCE_INLINE void atomic_add_acquire_release(volatile int64* value, int64 increment) noexcept { __atomic_add_fetch(value, increment, __ATOMIC_SEQ_CST); } +FORCE_INLINE void atomic_sub_acquire_release(volatile int64* value, int64 decrement) noexcept { __atomic_sub_fetch(value, decrement, __ATOMIC_SEQ_CST); } +FORCE_INLINE f32 atomic_compare_exchange_weak_acquire_release(volatile f32* value, f32* expected, f32 desired) noexcept { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); return *expected; } +FORCE_INLINE f64 atomic_compare_exchange_weak_acquire_release(volatile f64* value, f64* expected, f64 desired) noexcept { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); return *expected; } +FORCE_INLINE int32 atomic_compare_exchange_weak_acquire_release(volatile int32* value, int32* expected, int32 desired) noexcept { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); return *expected; } +FORCE_INLINE int64 atomic_compare_exchange_weak_acquire_release(volatile int64* value, int64* expected, int64 desired) noexcept { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); return *expected; } +FORCE_INLINE int8 atomic_fetch_add_acquire_release(volatile int8* value, int8 operand) noexcept { return __atomic_add_fetch(value, operand, __ATOMIC_SEQ_CST); } +FORCE_INLINE int8 atomic_fetch_sub_acquire_release(volatile int8* value, int8 operand) noexcept { return __atomic_sub_fetch(value, operand, __ATOMIC_SEQ_CST); } +FORCE_INLINE int16 atomic_fetch_add_acquire_release(volatile int16* value, int16 operand) noexcept { return __atomic_add_fetch(value, operand, __ATOMIC_SEQ_CST); } +FORCE_INLINE int16 atomic_fetch_sub_acquire_release(volatile int16* value, int16 operand) noexcept { return __atomic_sub_fetch(value, operand, __ATOMIC_SEQ_CST); } +FORCE_INLINE int32 atomic_fetch_add_acquire_release(volatile int32* value, int32 operand) noexcept { return __atomic_add_fetch(value, operand, __ATOMIC_SEQ_CST); } +FORCE_INLINE int32 atomic_fetch_sub_acquire_release(volatile int32* value, int32 operand) noexcept { return __atomic_sub_fetch(value, operand, __ATOMIC_SEQ_CST); } +FORCE_INLINE int64 atomic_fetch_add_acquire_release(volatile int64* value, int64 operand) noexcept { return __atomic_add_fetch(value, operand, __ATOMIC_SEQ_CST); } +FORCE_INLINE int64 atomic_fetch_sub_acquire_release(volatile int64* value, int64 operand) noexcept { return __atomic_sub_fetch(value, operand, __ATOMIC_SEQ_CST); } +FORCE_INLINE void atomic_set_acquire_release(volatile uint8* value, uint8 new_value) noexcept { __atomic_store_n(value, new_value, __ATOMIC_SEQ_CST); } +FORCE_INLINE void atomic_set_acquire_release(volatile uint16* value, uint16 new_value) noexcept { __atomic_store_n(value, new_value, __ATOMIC_SEQ_CST); } +FORCE_INLINE void atomic_set_acquire_release(volatile uint32* value, uint32 new_value) noexcept { __atomic_store_n(value, new_value, __ATOMIC_SEQ_CST); } +FORCE_INLINE void atomic_set_acquire_release(volatile uint64* value, uint64 new_value) noexcept { __atomic_store_n(value, new_value, __ATOMIC_SEQ_CST); } +FORCE_INLINE uint8 atomic_fetch_set_acquire_release(volatile uint8* value, uint8 new_value) noexcept { return __atomic_exchange_n(value, new_value, __ATOMIC_SEQ_CST); } +FORCE_INLINE uint16 atomic_fetch_set_acquire_release(volatile uint16* value, uint16 new_value) noexcept { return __atomic_exchange_n(value, new_value, __ATOMIC_SEQ_CST); } +FORCE_INLINE uint32 atomic_fetch_set_acquire_release(volatile uint32* value, uint32 new_value) noexcept { return __atomic_exchange_n(value, new_value, __ATOMIC_SEQ_CST); } +FORCE_INLINE uint64 atomic_fetch_set_acquire_release(volatile uint64* value, uint64 new_value) noexcept { return __atomic_exchange_n(value, new_value, __ATOMIC_SEQ_CST); } +FORCE_INLINE uint8 atomic_get_acquire_release(volatile uint8* value) noexcept { return __atomic_load_n(value, __ATOMIC_SEQ_CST); } +FORCE_INLINE uint16 atomic_get_acquire_release(volatile uint16* value) noexcept { return __atomic_load_n(value, __ATOMIC_SEQ_CST); } +FORCE_INLINE uint32 atomic_get_acquire_release(volatile uint32* value) noexcept { return __atomic_load_n(value, __ATOMIC_SEQ_CST); } +FORCE_INLINE uint64 atomic_get_acquire_release(volatile uint64* value) noexcept { return __atomic_load_n(value, __ATOMIC_SEQ_CST); } +FORCE_INLINE uint8 atomic_increment_acquire_release(volatile uint8* value) noexcept { return __atomic_add_fetch(value, 1, __ATOMIC_SEQ_CST); } +FORCE_INLINE uint8 atomic_decrement_acquire_release(volatile uint8* value) noexcept { return __atomic_sub_fetch(value, 1, __ATOMIC_SEQ_CST); } +FORCE_INLINE uint16 atomic_increment_acquire_release(volatile uint16* value) noexcept { return __atomic_add_fetch(value, 1, __ATOMIC_SEQ_CST); } +FORCE_INLINE uint16 atomic_decrement_acquire_release(volatile uint16* value) noexcept { return __atomic_sub_fetch(value, 1, __ATOMIC_SEQ_CST); } +FORCE_INLINE uint32 atomic_increment_acquire_release(volatile uint32* value) noexcept { return __atomic_add_fetch(value, 1, __ATOMIC_SEQ_CST); } +FORCE_INLINE uint32 atomic_decrement_acquire_release(volatile uint32* value) noexcept { return __atomic_sub_fetch(value, 1, __ATOMIC_SEQ_CST); } +FORCE_INLINE uint64 atomic_increment_acquire_release(volatile uint64* value) noexcept { return __atomic_add_fetch(value, 1, __ATOMIC_SEQ_CST); } +FORCE_INLINE uint64 atomic_decrement_acquire_release(volatile uint64* value) noexcept { return __atomic_sub_fetch(value, 1, __ATOMIC_SEQ_CST); } +FORCE_INLINE void atomic_add_acquire_release(volatile uint8* value, uint8 increment) noexcept { __atomic_add_fetch(value, increment, __ATOMIC_SEQ_CST); } +FORCE_INLINE void atomic_sub_acquire_release(volatile uint8* value, uint8 decrement) noexcept { __atomic_sub_fetch(value, decrement, __ATOMIC_SEQ_CST); } +FORCE_INLINE void atomic_add_acquire_release(volatile uint16* value, uint16 increment) noexcept { __atomic_add_fetch(value, increment, __ATOMIC_SEQ_CST); } +FORCE_INLINE void atomic_sub_acquire_release(volatile uint16* value, uint16 decrement) noexcept { __atomic_sub_fetch(value, decrement, __ATOMIC_SEQ_CST); } +FORCE_INLINE void atomic_add_acquire_release(volatile uint32* value, uint32 increment) noexcept { __atomic_add_fetch(value, increment, __ATOMIC_SEQ_CST); } +FORCE_INLINE void atomic_sub_acquire_release(volatile uint32* value, uint32 decrement) noexcept { __atomic_sub_fetch(value, decrement, __ATOMIC_SEQ_CST); } +FORCE_INLINE void atomic_add_acquire_release(volatile uint64* value, uint64 increment) noexcept { __atomic_add_fetch(value, increment, __ATOMIC_SEQ_CST); } +FORCE_INLINE void atomic_sub_acquire_release(volatile uint64* value, uint64 decrement) noexcept { __atomic_sub_fetch(value, decrement, __ATOMIC_SEQ_CST); } +FORCE_INLINE uint32 atomic_compare_exchange_weak_acquire_release(volatile uint32* value, uint32* expected, uint32 desired) noexcept { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); return *expected; } +FORCE_INLINE uint64 atomic_compare_exchange_weak_acquire_release(volatile uint64* value, uint64* expected, uint64 desired) noexcept { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); return *expected; } +FORCE_INLINE uint8 atomic_fetch_add_acquire_release(volatile uint8* value, uint8 operand) noexcept { return __atomic_add_fetch(value, operand, __ATOMIC_SEQ_CST); } +FORCE_INLINE uint8 atomic_fetch_sub_acquire_release(volatile uint8* value, uint8 operand) noexcept { return __atomic_sub_fetch(value, operand, __ATOMIC_SEQ_CST); } +FORCE_INLINE uint16 atomic_fetch_add_acquire_release(volatile uint16* value, uint16 operand) noexcept { return __atomic_add_fetch(value, operand, __ATOMIC_SEQ_CST); } +FORCE_INLINE uint16 atomic_fetch_sub_acquire_release(volatile uint16* value, uint16 operand) noexcept { return __atomic_sub_fetch(value, operand, __ATOMIC_SEQ_CST); } +FORCE_INLINE uint32 atomic_fetch_add_acquire_release(volatile uint32* value, uint32 operand) noexcept { return __atomic_add_fetch(value, operand, __ATOMIC_SEQ_CST); } +FORCE_INLINE uint32 atomic_fetch_sub_acquire_release(volatile uint32* value, uint32 operand) noexcept { return __atomic_sub_fetch(value, operand, __ATOMIC_SEQ_CST); } +FORCE_INLINE uint64 atomic_fetch_add_acquire_release(volatile uint64* value, uint64 operand) noexcept { return __atomic_add_fetch(value, operand, __ATOMIC_SEQ_CST); } +FORCE_INLINE uint64 atomic_fetch_sub_acquire_release(volatile uint64* value, uint64 operand) noexcept { return __atomic_sub_fetch(value, operand, __ATOMIC_SEQ_CST); } +FORCE_INLINE void atomic_and_acquire_release(volatile uint8* value, uint8 mask) noexcept { __atomic_fetch_and(value, mask, __ATOMIC_SEQ_CST); } +FORCE_INLINE void atomic_and_acquire_release(volatile int8* value, int8 mask) noexcept { __atomic_fetch_and(value, mask, __ATOMIC_SEQ_CST); } +FORCE_INLINE void atomic_and_acquire_release(volatile uint16* value, uint16 mask) noexcept { __atomic_fetch_and(value, mask, __ATOMIC_SEQ_CST); } +FORCE_INLINE void atomic_and_acquire_release(volatile int16* value, int16 mask) noexcept { __atomic_fetch_and(value, mask, __ATOMIC_SEQ_CST); } +FORCE_INLINE void atomic_and_acquire_release(volatile uint32* value, uint32 mask) noexcept { __atomic_fetch_and(value, mask, __ATOMIC_SEQ_CST); } +FORCE_INLINE void atomic_and_acquire_release(volatile int32* value, int32 mask) noexcept { __atomic_fetch_and(value, mask, __ATOMIC_SEQ_CST); } +FORCE_INLINE void atomic_and_acquire_release(volatile uint64* value, uint64 mask) noexcept { __atomic_fetch_and(value, mask, __ATOMIC_SEQ_CST); } +FORCE_INLINE void atomic_and_acquire_release(volatile int64* value, int64 mask) noexcept { __atomic_fetch_and(value, mask, __ATOMIC_SEQ_CST); } +FORCE_INLINE void atomic_or_acquire_release(volatile uint8* value, uint8 mask) noexcept { __atomic_fetch_or(value, mask, __ATOMIC_SEQ_CST); } +FORCE_INLINE void atomic_or_acquire_release(volatile int8* value, int8 mask) noexcept { __atomic_fetch_or(value, mask, __ATOMIC_SEQ_CST); } +FORCE_INLINE void atomic_or_acquire_release(volatile uint16* value, uint16 mask) noexcept { __atomic_fetch_or(value, mask, __ATOMIC_SEQ_CST); } +FORCE_INLINE void atomic_or_acquire_release(volatile int16* value, int16 mask) noexcept { __atomic_fetch_or(value, mask, __ATOMIC_SEQ_CST); } +FORCE_INLINE void atomic_or_acquire_release(volatile uint32* value, uint32 mask) noexcept { __atomic_fetch_or(value, mask, __ATOMIC_SEQ_CST); } +FORCE_INLINE void atomic_or_acquire_release(volatile int32* value, int32 mask) noexcept { __atomic_fetch_or(value, mask, __ATOMIC_SEQ_CST); } +FORCE_INLINE void atomic_or_acquire_release(volatile uint64* value, uint64 mask) noexcept { __atomic_fetch_or(value, mask, __ATOMIC_SEQ_CST); } +FORCE_INLINE void atomic_or_acquire_release(volatile int64* value, int64 mask) noexcept { __atomic_fetch_or(value, mask, __ATOMIC_SEQ_CST); } // Check out the intrinsic functions fence_memory and fence_write // These are much faster and could accomplish what you are doing diff --git a/compiler/gcc/CompilerUtils.h b/compiler/gcc/CompilerUtils.h index d1b0e90..fa2805f 100644 --- a/compiler/gcc/CompilerUtils.h +++ b/compiler/gcc/CompilerUtils.h @@ -24,6 +24,9 @@ #define FORCE_INLINE __attribute__((always_inline)) +#include +#define compiler_debug_print(message) while (*message++) { write(STDOUT_FILENO, (message), 1); } + #define compiler_popcount_32(data) __builtin_popcount((data)) #define compiler_popcount_64(data) __builtin_popcountl((data)) #define __restrict __restrict__ @@ -33,7 +36,7 @@ #define compiler_prefetch_l2(mem) __builtin_prefetch((mem), 0, 2) #define compiler_prefetch_l3(mem) __builtin_prefetch((mem), 0, 1) -int32 compiler_find_first_bit_r2l(uint64 mask) { +int32 compiler_find_first_bit_r2l(uint64 mask) noexcept { if (!mask) { return -1; } @@ -45,7 +48,7 @@ int32 compiler_find_first_bit_r2l(uint64 mask) { #endif } -int32 compiler_find_first_bit_r2l(uint32 mask) { +int32 compiler_find_first_bit_r2l(uint32 mask) noexcept { if (!mask) { return -1; } @@ -57,7 +60,7 @@ int32 compiler_find_first_bit_r2l(uint32 mask) { #endif } -int32 compiler_find_first_bit_l2r(uint64 mask) { +int32 compiler_find_first_bit_l2r(uint64 mask) noexcept { if (!mask) { return -1; } @@ -69,7 +72,7 @@ int32 compiler_find_first_bit_l2r(uint64 mask) { #endif } -int32 compiler_find_first_bit_l2r(uint32 mask) { +int32 compiler_find_first_bit_l2r(uint32 mask) noexcept { if (!mask) { return -1; } @@ -91,7 +94,7 @@ void cpuid(int32 cpuInfo[4], int32 function_id) { */ inline -void compiler_cpuid(int32 cpuInfo[4], int32 function_id) { +void compiler_cpuid(int32 cpuInfo[4], int32 function_id) noexcept { asm volatile( "cpuid" : "=a" (cpuInfo[0]), "=b" (cpuInfo[1]), "=c" (cpuInfo[2]), "=d" (cpuInfo[3]) diff --git a/compiler/msvc/CompilerUtils.h b/compiler/msvc/CompilerUtils.h index b7f8901..1cc5fe6 100644 --- a/compiler/msvc/CompilerUtils.h +++ b/compiler/msvc/CompilerUtils.h @@ -28,6 +28,8 @@ typedef SSIZE_T ssize_t; #define FORCE_INLINE __forceinline +#define compiler_debug_print(message) OutputDebugStringA((message)) + #define compiler_popcount_32(data) __popcnt((data)) #define compiler_popcount_64(data) __popcnt64((data)) @@ -37,7 +39,7 @@ typedef SSIZE_T ssize_t; #define compiler_prefetch_l3(mem) __prefetch((mem)) inline -int32 compiler_find_first_bit_r2l(uint64 mask) { +int32 compiler_find_first_bit_r2l(uint64 mask) noexcept { if (!mask) { return -1; } @@ -47,7 +49,7 @@ int32 compiler_find_first_bit_r2l(uint64 mask) { } inline -int32 compiler_find_first_bit_r2l(uint32 mask) { +int32 compiler_find_first_bit_r2l(uint32 mask) noexcept { if (!mask) { return -1; } @@ -57,7 +59,7 @@ int32 compiler_find_first_bit_r2l(uint32 mask) { } inline -int32 compiler_find_first_bit_l2r(uint64 mask) { +int32 compiler_find_first_bit_l2r(uint64 mask) noexcept { if (!mask) { return -1; } @@ -67,7 +69,7 @@ int32 compiler_find_first_bit_l2r(uint64 mask) { } inline -int32 compiler_find_first_bit_l2r(uint32 mask) { +int32 compiler_find_first_bit_l2r(uint32 mask) noexcept { if (!mask) { return -1; } @@ -77,7 +79,7 @@ int32 compiler_find_first_bit_l2r(uint32 mask) { } inline -void compiler_cpuid(int32 cpuInfo[4], int32 function_id) { +void compiler_cpuid(int32 cpuInfo[4], int32 function_id) noexcept { __cpuidex(cpuInfo, function_id, 0); } diff --git a/compression/CRC.h b/compression/CRC.h deleted file mode 100644 index f4e1e61..0000000 --- a/compression/CRC.h +++ /dev/null @@ -1,73 +0,0 @@ -/** - * Jingga - * - * @copyright Jingga - * @license OMS License 2.0 - * @version 1.0.0 - * @link https://jingga.app - */ -#ifndef TOS_COMPRESSION_CRC_H -#define TOS_COMPRESSION_CRC_H - -#include "../stdlib/Types.h" - -uint32 crc_table[256] = -{ - 0x00000000, 0x77073096, 0xEE0E612C, 0x990951BA, 0x076DC419, 0x706AF48F, 0xE963A535, 0x9E6495A3, - 0x0EDB8832, 0x79DCB8A4, 0xE0D5E91E, 0x97D2D988, 0x09B64C2B, 0x7EB17CBD, 0xE7B82D07, 0x90BF1D91, - 0x1DB71064, 0x6AB020F2, 0xF3B97148, 0x84BE41DE, 0x1ADAD47D, 0x6DDDE4EB, 0xF4D4B551, 0x83D385C7, - 0x136C9856, 0x646BA8C0, 0xFD62F97A, 0x8A65C9EC, 0x14015C4F, 0x63066CD9, 0xFA0F3D63, 0x8D080DF5, - 0x3B6E20C8, 0x4C69105E, 0xD56041E4, 0xA2677172, 0x3C03E4D1, 0x4B04D447, 0xD20D85FD, 0xA50AB56B, - 0x35B5A8FA, 0x42B2986C, 0xDBBBC9D6, 0xACBCF940, 0x32D86CE3, 0x45DF5C75, 0xDCD60DCF, 0xABD13D59, - 0x26D930AC, 0x51DE003A, 0xC8D75180, 0xBFD06116, 0x21B4F4B5, 0x56B3C423, 0xCFBA9599, 0xB8BDA50F, - 0x2802B89E, 0x5F058808, 0xC60CD9B2, 0xB10BE924, 0x2F6F7C87, 0x58684C11, 0xC1611DAB, 0xB6662D3D, - 0x76DC4190, 0x01DB7106, 0x98D220BC, 0xEFD5102A, 0x71B18589, 0x06B6B51F, 0x9FBFE4A5, 0xE8B8D433, - 0x7807C9A2, 0x0F00F934, 0x9609A88E, 0xE10E9818, 0x7F6A0DBB, 0x086D3D2D, 0x91646C97, 0xE6635C01, - 0x6B6B51F4, 0x1C6C6162, 0x856530D8, 0xF262004E, 0x6C0695ED, 0x1B01A57B, 0x8208F4C1, 0xF50FC457, - 0x65B0D9C6, 0x12B7E950, 0x8BBEB8EA, 0xFCB9887C, 0x62DD1DDF, 0x15DA2D49, 0x8CD37CF3, 0xFBD44C65, - 0x4DB26158, 0x3AB551CE, 0xA3BC0074, 0xD4BB30E2, 0x4ADFA541, 0x3DD895D7, 0xA4D1C46D, 0xD3D6F4FB, - 0x4369E96A, 0x346ED9FC, 0xAD678846, 0xDA60B8D0, 0x44042D73, 0x33031DE5, 0xAA0A4C5F, 0xDD0D7CC9, - 0x5005713C, 0x270241AA, 0xBE0B1010, 0xC90C2086, 0x5768B525, 0x206F85B3, 0xB966D409, 0xCE61E49F, - 0x5EDEF90E, 0x29D9C998, 0xB0D09822, 0xC7D7A8B4, 0x59B33D17, 0x2EB40D81, 0xB7BD5C3B, 0xC0BA6CAD, - 0xEDB88320, 0x9ABFB3B6, 0x03B6E20C, 0x74B1D29A, 0xEAD54739, 0x9DD277AF, 0x04DB2615, 0x73DC1683, - 0xE3630B12, 0x94643B84, 0x0D6D6A3E, 0x7A6A5AA8, 0xE40ECF0B, 0x9309FF9D, 0x0A00AE27, 0x7D079EB1, - 0xF00F9344, 0x8708A3D2, 0x1E01F268, 0x6906C2FE, 0xF762575D, 0x806567CB, 0x196C3671, 0x6E6B06E7, - 0xFED41B76, 0x89D32BE0, 0x10DA7A5A, 0x67DD4ACC, 0xF9B9DF6F, 0x8EBEEFF9, 0x17B7BE43, 0x60B08ED5, - 0xD6D6A3E8, 0xA1D1937E, 0x38D8C2C4, 0x4FDFF252, 0xD1BB67F1, 0xA6BC5767, 0x3FB506DD, 0x48B2364B, - 0xD80D2BDA, 0xAF0A1B4C, 0x36034AF6, 0x41047A60, 0xDF60EFC3, 0xA867DF55, 0x316E8EEF, 0x4669BE79, - 0xCB61B38C, 0xBC66831A, 0x256FD2A0, 0x5268E236, 0xCC0C7795, 0xBB0B4703, 0x220216B9, 0x5505262F, - 0xC5BA3BBE, 0xB2BD0B28, 0x2BB45A92, 0x5CB36A04, 0xC2D7FFA7, 0xB5D0CF31, 0x2CD99E8B, 0x5BDEAE1D, - 0x9B64C2B0, 0xEC63F226, 0x756AA39C, 0x026D930A, 0x9C0906A9, 0xEB0E363F, 0x72076785, 0x05005713, - 0x95BF4A82, 0xE2B87A14, 0x7BB12BAE, 0x0CB61B38, 0x92D28E9B, 0xE5D5BE0D, 0x7CDCEFB7, 0x0BDBDF21, - 0x86D3D2D4, 0xF1D4E242, 0x68DDB3F8, 0x1FDA836E, 0x81BE16CD, 0xF6B9265B, 0x6FB077E1, 0x18B74777, - 0x88085AE6, 0xFF0F6A70, 0x66063BCA, 0x11010B5C, 0x8F659EFF, 0xF862AE69, 0x616BFFD3, 0x166CCF45, - 0xA00AE278, 0xD70DD2EE, 0x4E048354, 0x3903B3C2, 0xA7672661, 0xD06016F7, 0x4969474D, 0x3E6E77DB, - 0xAED16A4A, 0xD9D65ADC, 0x40DF0B66, 0x37D83BF0, 0xA9BCAE53, 0xDEBB9EC5, 0x47B2CF7F, 0x30B5FFE9, - 0xBDBDF21C, 0xCABAC28A, 0x53B39330, 0x24B4A3A6, 0xBAD03605, 0xCDD70693, 0x54DE5729, 0x23D967BF, - 0xB3667A2E, 0xC4614AB8, 0x5D681B02, 0x2A6F2B94, 0xB40BBE37, 0xC30C8EA1, 0x5A05DF1B, 0x2D02EF8D, -}; - -uint32 crc32_checksum_calculate(uint8 *p, uint32 length) -{ - uint32 crc = 0xFFFFFFFF; - while (length-- != 0) { - crc = crc_table[((uint8) crc ^ *(p++))] ^ (crc >> 8); - } - - // return (~crc); also works - return (crc ^ 0xFFFFFFFF); -} - -void crc32_table_fill(uint32 *table){ - uint8 index = 0,z; - do { - table[index] = index; - for(z = 8; z; z--) { - table[index] = (table[index] & 1) - ? (table[index] >> 1) ^ 0xEDB88320 - : table[index] >> 1; - } - } while(++index); -} - -#endif \ No newline at end of file diff --git a/compression/Huffman.h b/compression/Huffman.h index 801ee04..0de0473 100644 --- a/compression/Huffman.h +++ b/compression/Huffman.h @@ -36,8 +36,8 @@ struct Huffman { // We could combine this function with the one below but this would introduce a if != 0 check for the frequency // I would assume the current version is faster since we avoid a branch -inline -HuffmanNode* huffman_node_create(Huffman* hf, int32 frequency, byte character, HuffmanNode* left, HuffmanNode* right) +static inline +HuffmanNode* huffman_node_create(Huffman* hf, int32 frequency, byte character, HuffmanNode* left, HuffmanNode* right) noexcept { HuffmanNode* node = hf->pool + hf->node_count++; node->character = character; @@ -47,8 +47,8 @@ HuffmanNode* huffman_node_create(Huffman* hf, int32 frequency, byte character, H } // Same as other function but frequency = 0 -inline -HuffmanNode* huffman_node_create(Huffman* hf, byte character, HuffmanNode* left, HuffmanNode* right) +static inline +HuffmanNode* huffman_node_create(Huffman* hf, byte character, HuffmanNode* left, HuffmanNode* right) noexcept { HuffmanNode* node = hf->pool + hf->node_count++; node->left = left; @@ -58,8 +58,8 @@ HuffmanNode* huffman_node_create(Huffman* hf, byte character, HuffmanNode* left, return node; } -inline -void huffman_node_insert(Huffman* hf, HuffmanNode* node) +static inline +void huffman_node_insert(Huffman* hf, HuffmanNode* node) noexcept { int32 child_id; int32 parent_id = hf->pq_end++; @@ -72,7 +72,8 @@ void huffman_node_insert(Huffman* hf, HuffmanNode* node) hf->pq[parent_id] = node; } -HuffmanNode* huffman_node_remove(Huffman* hf) +static +HuffmanNode* huffman_node_remove(Huffman* hf) noexcept { int32 parent_id = 1; int32 left_child_id; @@ -99,7 +100,8 @@ HuffmanNode* huffman_node_remove(Huffman* hf) return min_node; } -int64 huffman_code_build(Huffman* hf, HuffmanNode* root, char* code, int32 length, char* code_buffer, int32* buffer_position) +static +int64 huffman_code_build(Huffman* hf, HuffmanNode* root, char* code, int32 length, char* code_buffer, int32* buffer_position) noexcept { if (root->character) { code[length] = 0; @@ -114,7 +116,7 @@ int64 huffman_code_build(Huffman* hf, HuffmanNode* root, char* code, int32 lengt code[length] = '1'; huffman_code_build(hf, root->right, code, length + 1, code_buffer, buffer_position); } -void huffman_init(Huffman* hf, const byte* in) +void huffman_init(Huffman* hf, const byte* in) noexcept { int32 frequency[256] = {0}; int32 buffer_position = 0; @@ -174,7 +176,7 @@ void huffman_load(Huffman* hf, const byte* in) } inline -int64 huffman_encode(Huffman* hf, const byte* in, byte* out) +int64 huffman_encode(Huffman* hf, const byte* in, byte* out) noexcept { uint64 bit_length = 0; int32 pos_bit = 0; @@ -202,7 +204,7 @@ int64 huffman_encode(Huffman* hf, const byte* in, byte* out) } inline -int64 huffman_decode(Huffman* hf, const byte* in, byte* out, uint64 bit_length) +int64 huffman_decode(Huffman* hf, const byte* in, byte* out, uint64 bit_length) noexcept { HuffmanNode* current = hf->pq[1]; int32 pos_bit = 0; diff --git a/compression/LZP.h b/compression/LZP.h index bbef1ac..19b13cb 100644 --- a/compression/LZP.h +++ b/compression/LZP.h @@ -14,7 +14,7 @@ #include "../stdlib/Types.h" -uint32 lzp_encode(const byte* in, size_t length, byte* out) +uint32 lzp_encode(const byte* in, size_t length, byte* out) noexcept { byte buf[9]; byte table[1 << 16] = {0}; @@ -58,7 +58,7 @@ uint32 lzp_encode(const byte* in, size_t length, byte* out) return out_pos; } -uint32 lzp_decode(const byte* in, size_t length, byte* out) +uint32 lzp_decode(const byte* in, size_t length, byte* out) noexcept { byte buf[8]; byte table[1 << 16] = {0}; @@ -100,7 +100,8 @@ uint32 lzp_decode(const byte* in, size_t length, byte* out) return out_pos; } -int32 find_longest_match(char *window, int32 window_start, char *buffer, int32 buffer_size, int32 *match_position) { +int32 find_longest_match(char *window, int32 window_start, char *buffer, int32 buffer_size, int32 *match_position) noexcept +{ int32 best_length = 0; int32 best_offset = 0; @@ -125,7 +126,8 @@ int32 find_longest_match(char *window, int32 window_start, char *buffer, int32 b return best_length; } -uint32 lzp3_encode(const byte* in, size_t length, byte* out) { +uint32 lzp3_encode(const byte* in, size_t length, byte* out) noexcept +{ char window[4096] = {0}; int32 window_start = 0; @@ -161,7 +163,8 @@ uint32 lzp3_encode(const byte* in, size_t length, byte* out) { return out_size; } -uint32 lzp3_decode(const byte* in, size_t length, byte* out) { +uint32 lzp3_decode(const byte* in, size_t length, byte* out) noexcept +{ char window[4096] = {0}; int32 window_start = 0; diff --git a/compression/RLE.h b/compression/RLE.h index b261d68..ceacd5f 100644 --- a/compression/RLE.h +++ b/compression/RLE.h @@ -16,7 +16,7 @@ #include "../utils/StringUtils.h" // max out length = length * 2 + 1 -uint64 rle_encode(const char* in, size_t length, char* out) +uint64 rle_encode(const char* in, size_t length, char* out) noexcept { uint64 count; uint64 j = 0; @@ -38,7 +38,7 @@ uint64 rle_encode(const char* in, size_t length, char* out) return j; } -uint64 rle_decode(const char* in, size_t length, char* out) +uint64 rle_decode(const char* in, size_t length, char* out) noexcept { uint64 j = 0; diff --git a/font/Font.h b/font/Font.h index d1326e5..aa03dc5 100644 --- a/font/Font.h +++ b/font/Font.h @@ -30,7 +30,7 @@ struct Glyph { struct Font { uint32 glyph_count; - char texture_name[32]; + char texture_name[32]; // @question Do we even need this f32 size; // Default font size at which the font renders best f32 line_height; // How tall is a single line (mostly important for multiple lines) @@ -46,8 +46,9 @@ void font_init(Font* font, byte* data, int count) font->glyph_count = count; } +// @performance replace with Eytzinger (obviously we would also have to change the order in the font font file itself) inline -Glyph* font_glyph_find(const Font* font, uint32 codepoint) +Glyph* font_glyph_find(const Font* font, uint32 codepoint) noexcept { uint32 perfect_glyph_pos = codepoint - font->glyphs[0].codepoint; uint32 limit = OMS_MIN(perfect_glyph_pos, font->glyph_count - 1); @@ -250,8 +251,8 @@ int32 font_to_data( return size; } -inline -f32 font_line_height(Font* font, f32 size) +FORCE_INLINE +f32 font_line_height(Font* font, f32 size) noexcept { return font->line_height * size / font->size; } diff --git a/gpuapi/AntiAliasing.h b/gpuapi/AntiAliasing.h index 7ee749d..e12ece8 100644 --- a/gpuapi/AntiAliasing.h +++ b/gpuapi/AntiAliasing.h @@ -9,10 +9,13 @@ #ifndef TOS_GPUAPI_ANTI_ALIASING_H #define TOS_GPUAPI_ANTI_ALIASING_H -enum AntiAliasingType { +enum AntiAliasingType : byte { ANTI_ALIASING_TYPE_NONE, ANTI_ALIASING_TYPE_MSAA, ANTI_ALIASING_TYPE_SSAA, + ANTI_ALIASING_TYPE_SMAA, + ANTI_ALIASING_TYPE_FXAA, + ANTI_ALIASING_TYPE_TAA, }; #endif \ No newline at end of file diff --git a/gpuapi/GpuAttributeType.h b/gpuapi/GpuAttributeType.h new file mode 100644 index 0000000..d8f36cd --- /dev/null +++ b/gpuapi/GpuAttributeType.h @@ -0,0 +1,51 @@ +/** + * Jingga + * + * @copyright Jingga + * @license OMS License 2.0 + * @version 1.0.0 + * @link https://jingga.app + */ +#ifndef TOS_GPUAPI_ATTRIBUTE_TYPE_H +#define TOS_GPUAPI_ATTRIBUTE_TYPE_H + +#include "../stdlib/Types.h" +#include "../compiler/CompilerUtils.h" + +enum GpuAttributeType { + GPU_ATTRIBUTE_TYPE_VERTEX_3D, + GPU_ATTRIBUTE_TYPE_VERTEX_3D_NORMAL, + GPU_ATTRIBUTE_TYPE_VERTEX_3D_COLOR, + GPU_ATTRIBUTE_TYPE_VERTEX_3D_TEXTURE_COLOR, + GPU_ATTRIBUTE_TYPE_VERTEX_3D_SAMPLER_TEXTURE_COLOR, + GPU_ATTRIBUTE_TYPE_VERTEX_2D_TEXTURE +}; + +inline constexpr +int32 gpuapi_attribute_count(GpuAttributeType type) +{ + switch (type) { + case GPU_ATTRIBUTE_TYPE_VERTEX_3D: { + return 4; + }; + case GPU_ATTRIBUTE_TYPE_VERTEX_3D_NORMAL: { + return 2; + }; + case GPU_ATTRIBUTE_TYPE_VERTEX_3D_COLOR: { + return 2; + }; + case GPU_ATTRIBUTE_TYPE_VERTEX_3D_TEXTURE_COLOR: { + return 2; + }; + case GPU_ATTRIBUTE_TYPE_VERTEX_3D_SAMPLER_TEXTURE_COLOR: { + return 3; + }; + case GPU_ATTRIBUTE_TYPE_VERTEX_2D_TEXTURE: { + return 2; + }; + default: + UNREACHABLE(); + }; +} + +#endif \ No newline at end of file diff --git a/gpuapi/RenderUtils.h b/gpuapi/RenderUtils.h index 69b803b..51f8962 100644 --- a/gpuapi/RenderUtils.h +++ b/gpuapi/RenderUtils.h @@ -18,17 +18,17 @@ #include "../ui/UIAlignment.h" #include "../architecture/Intrinsics.h" -inline +FORCE_INLINE int32 vertex_degenerate_create( - Vertex3DTextureColor* __restrict vertices, f32 zindex, + Vertex3DSamplerTextureColor* __restrict vertices, f32 zindex, f32 x, f32 y -) { +) noexcept { // Degenerate triangles // They are alternating every loop BUT since we use references they look the same in code // WARNING: Before using we must make sure that the 0 index is defined // The easiest way is to just define a "degenerate" starting point - vertices[0] = {{vertices[-1].position.x, vertices[-1].position.y, zindex}, {}}; - vertices[1] = {{x, y, zindex}, {}}; + vertices[0] = {{vertices[-1].position.x, vertices[-1].position.y, zindex}, -1, {}}; + vertices[1] = {{x, y, zindex}, -1, {}}; return 2; } @@ -38,7 +38,7 @@ void adjust_aligned_position( f32* __restrict x, f32* __restrict y, f32 width, f32 height, byte alignment -) +) noexcept { if (alignment & UI_ALIGN_H_RIGHT) { *x -= width; @@ -57,7 +57,7 @@ static inline void adjust_aligned_position( v4_f32* vec, byte alignment -) +) noexcept { if (alignment & UI_ALIGN_H_RIGHT) { vec->x -= vec->width; @@ -74,10 +74,10 @@ void adjust_aligned_position( inline int32 vertex_line_create( - Vertex3DTextureColor* __restrict vertices, f32 zindex, + Vertex3DSamplerTextureColor* __restrict vertices, f32 zindex, v2_f32 start, v2_f32 end, f32 thickness, byte alignment, uint32 rgba = 0 -) { +) noexcept { if (alignment & UI_ALIGN_H_RIGHT) { start.x -= thickness; end.x -= thickness; @@ -102,13 +102,13 @@ int32 vertex_line_create( int32 idx = 0; - vertices[idx++] = {{start.x, start.y, zindex}, {-1.0f, BITCAST(rgba, f32)}}; - vertices[idx++] = {{start.x + thickness * norm1, start.y + thickness * norm2, zindex}, {-1.0f, BITCAST(rgba, f32)}}; - vertices[idx++] = {{end.x, end.y, zindex}, {-1.0f, BITCAST(rgba, f32)}}; + vertices[idx++] = {{start.x, start.y, zindex}, -1, {-1.0f, BITCAST(rgba, f32)}}; + vertices[idx++] = {{start.x + thickness * norm1, start.y + thickness * norm2, zindex}, -1, {-1.0f, BITCAST(rgba, f32)}}; + vertices[idx++] = {{end.x, end.y, zindex}, -1, {-1.0f, BITCAST(rgba, f32)}}; - vertices[idx++] = {{end.x, end.y, zindex}, {-1.0f, BITCAST(rgba, f32)}}; - vertices[idx++] = {{end.x + thickness * norm1, end.y + thickness * norm2, zindex}, {-1.0f, BITCAST(rgba, f32)}}; - vertices[idx++] = {{start.x + thickness * norm1, start.y + thickness * norm2, zindex}, {-1.0f, BITCAST(rgba, f32)}}; + vertices[idx++] = {{end.x, end.y, zindex}, -1, {-1.0f, BITCAST(rgba, f32)}}; + vertices[idx++] = {{end.x + thickness * norm1, end.y + thickness * norm2, zindex}, -1, {-1.0f, BITCAST(rgba, f32)}}; + vertices[idx++] = {{start.x + thickness * norm1, start.y + thickness * norm2, zindex}, -1, {-1.0f, BITCAST(rgba, f32)}}; return idx; } @@ -117,10 +117,11 @@ int32 vertex_line_create( // Individual meshes without degenerates might be faster inline int32 vertex_rect_create( - Vertex3DTextureColor* __restrict vertices, f32 zindex, + Vertex3DSamplerTextureColor* __restrict vertices, f32 zindex, int32 sampler, v4_f32 dimension, byte alignment, uint32 rgba = 0, v2_f32 tex1 = {}, v2_f32 tex2 = {} -) { +) noexcept { + PROFILE(PROFILE_VERTEX_RECT_CREATE); if (alignment) { adjust_aligned_position(&dimension, alignment); } @@ -137,13 +138,13 @@ int32 vertex_rect_create( f32 x_width = dimension.x + dimension.width; int32 idx = 0; - vertices[idx++] = {{dimension.x, dimension.y, zindex}, tex1}; - vertices[idx++] = {{dimension.x, y_height, zindex}, {tex1.x, tex2.y}}; - vertices[idx++] = {{x_width, dimension.y, zindex}, {tex2.x, tex1.y}}; + vertices[idx++] = {{dimension.x, dimension.y, zindex}, sampler, tex1}; + vertices[idx++] = {{dimension.x, y_height, zindex}, sampler, {tex1.x, tex2.y}}; + vertices[idx++] = {{x_width, dimension.y, zindex}, sampler, {tex2.x, tex1.y}}; - vertices[idx++] = {{x_width, dimension.y, zindex}, {tex2.x, tex1.y}}; - vertices[idx++] = {{dimension.x, y_height, zindex}, {tex1.x, tex2.y}}; - vertices[idx++] = {{x_width, y_height, zindex}, tex2}; + vertices[idx++] = {{x_width, dimension.y, zindex}, sampler, {tex2.x, tex1.y}}; + vertices[idx++] = {{dimension.x, y_height, zindex}, sampler, {tex1.x, tex2.y}}; + vertices[idx++] = {{x_width, y_height, zindex}, sampler, tex2}; return idx; } @@ -151,7 +152,7 @@ int32 vertex_rect_create( static inline f32 text_calculate_dimensions_height( const Font* __restrict font, const char* __restrict text, f32 scale, int32 length -) { +) noexcept { f32 line_height = font->line_height * scale; f32 y = line_height; @@ -169,7 +170,7 @@ f32 text_calculate_dimensions_height( static inline f32 text_calculate_dimensions_width( const Font* __restrict font, const char* __restrict text, bool is_ascii, f32 scale, int32 length -) { +) noexcept { f32 x = 0; f32 offset_x = 0; @@ -200,7 +201,7 @@ static inline void text_calculate_dimensions( f32* __restrict width, f32* __restrict height, const Font* __restrict font, const char* __restrict text, bool is_ascii, f32 scale, int32 length -) { +) noexcept { f32 line_height = font->line_height * scale; f32 x = 0; f32 y = line_height; @@ -238,11 +239,12 @@ void text_calculate_dimensions( // @todo We should be able to cut off text at an arbitrary position, not just at a line_height incremental // we could probably get the MIN of the glyph height and the remaining window height v3_int32 vertex_text_create( - Vertex3DTextureColor* __restrict vertices, f32 zindex, + Vertex3DSamplerTextureColor* __restrict vertices, f32 zindex, int32 sampler, v4_f32 dimension, byte alignment, const Font* __restrict font, const char* __restrict text, f32 size, uint32 rgba = 0 -) { +) noexcept { + PROFILE(PROFILE_VERTEX_TEXT_CREATE); int32 length = utf8_str_length(text); if (length < 1) { return {}; @@ -300,7 +302,7 @@ v3_int32 vertex_text_create( if (character != ' ' && character != '\t') { // @todo We should probably inline the code here, we might be able to even optimize it then idx += vertex_rect_create( - vertices + idx, zindex, + vertices + idx, zindex, sampler, {offset_x, offset_y, glyph->metrics.width * scale, glyph->metrics.height * scale}, 0, 0, glyph->coords.start, glyph->coords.end ); diff --git a/gpuapi/direct3d/AppCmdBuffer.h b/gpuapi/direct3d/AppCmdBuffer.h index 949b78d..643c79d 100644 --- a/gpuapi/direct3d/AppCmdBuffer.h +++ b/gpuapi/direct3d/AppCmdBuffer.h @@ -10,12 +10,12 @@ #define TOS_GPUAPI_DIRECTX_APP_CMD_BUFFER_H #include "../../stdlib/Types.h" +#include "../../log/PerformanceProfiler.h" #include "Shader.h" #include "ShaderUtils.h" #include "../ShaderType.h" #include "../../asset/Asset.h" #include "../../command/AppCmdBuffer.h" -#include "GpuApiContainer.h" #include #include @@ -25,11 +25,13 @@ void* cmd_shader_load(AppCmdBuffer*, Command*) { return NULL; } -void* cmd_shader_load_sync(AppCmdBuffer* cb, Shader* shader, int32* shader_ids) { +void* cmd_shader_load_sync( + AppCmdBuffer* __restrict cb, Shader* __restrict shader, const int32* __restrict shader_ids, + ID3D12Device* __restrict device, ID3D12PipelineState** __restrict pipeline, ID3D12RootSignature* __restrict pipeline_layout +) { + PROFILE_VERBOSE(PROFILE_CMD_SHADER_LOAD_SYNC, ""); char asset_id[9]; - GpuApiContainer* gpu_api = (GpuApiContainer *) cb->gpu_api; - ID3DBlob* shader_assets[SHADER_TYPE_SIZE]; for (int32 i = 0; i < SHADER_TYPE_SIZE; ++i) { shader_assets[i] = NULL; @@ -62,8 +64,8 @@ void* cmd_shader_load_sync(AppCmdBuffer* cb, Shader* shader, int32* shader_ids) } // Make shader/program - shader->id = program_make( - gpu_api->device, &gpu_api->pipeline, gpu_api->pipeline_layout, + shader->id = pipeline_make( + device, pipeline, pipeline_layout, shader_assets[0], shader_assets[1], shader_assets[2] ); diff --git a/gpuapi/direct3d/DirectXUtils.h b/gpuapi/direct3d/DirectXUtils.h index 89478f5..be7c65e 100644 --- a/gpuapi/direct3d/DirectXUtils.h +++ b/gpuapi/direct3d/DirectXUtils.h @@ -9,6 +9,7 @@ #ifndef TOS_GPUAPI_DIRECTX_UTILS_H #define TOS_GPUAPI_DIRECTX_UTILS_H +#include "../../stdlib/Types.h" #include #include #include @@ -16,8 +17,7 @@ #include "../../../GameEngine/log/Log.h" // #include "../../../EngineDependencies/directx/d3d12.h" // #include "../../../EngineDependencies/directx/d3dx12.h" - -#include "../../stdlib/Types.h" +#include "FramesInFlightContainer.h" // A more (compile-time) efficient version of the windows macro IID_PPV_ARGS #define IID_PPVOID(pointer) __uuidof(**(pointer)), (void **) (pointer) @@ -91,33 +91,33 @@ int32 max_directx_version() // Returns frame index int32 wait_for_previous_frame( - ID3D12Fence* fence, HANDLE fence_event, UINT64* fence_value, + FramesInFlightContainer* frames_in_flight, ID3D12CommandQueue* graphics_queue, IDXGISwapChain3* swapchain ) { - // WAITING FOR THE FRAME TO COMPLETE BEFORE CONTINUING IS NOT BEST PRACTICE. + // @todo WAITING FOR THE FRAME TO COMPLETE BEFORE CONTINUING IS NOT BEST PRACTICE. // This is code implemented as such for simplicity. The D3D12HelloFrameBuffering // sample illustrates how to use fences for efficient resource usage and to // maximize GPU utilization. - UINT64 fence_value_temp = *fence_value; + UINT64 fence_value_temp = frames_in_flight->fence_value; // Signal and increment the fence value. - if(FAILED(graphics_queue->Signal(fence, fence_value_temp))) { - LOG(true, "DirectX12 Signal"); + if(FAILED(graphics_queue->Signal(frames_in_flight->fence, fence_value_temp))) { + LOG_1("DirectX12 Signal"); ASSERT_SIMPLE(false); } - ++(*fence_value); + ++frames_in_flight->fence_value; // Wait until the previous frame is finished. - if (fence->GetCompletedValue() < fence_value_temp) { - if (FAILED(fence->SetEventOnCompletion(fence_value_temp, fence_event))) { - LOG(true, "DirectX12 SetEventOnCompletion"); + if (frames_in_flight->fence->GetCompletedValue() < fence_value_temp) { + if (FAILED(frames_in_flight->fence->SetEventOnCompletion(fence_value_temp, frames_in_flight->fence_event))) { + LOG_1("DirectX12 SetEventOnCompletion"); ASSERT_SIMPLE(false); } - WaitForSingleObject(fence_event, INFINITE); + WaitForSingleObject(frames_in_flight->fence_event, INFINITE); } return swapchain->GetCurrentBackBufferIndex(); @@ -144,7 +144,7 @@ void directx_debug_callback( } */ - LOG(true, description); + LOG_1(description); ASSERT_SIMPLE(false); } @@ -172,7 +172,7 @@ void gpuapi_debug_messenger_setup(ID3D12Device* device) inline void gpuapi_create_logical_device(ID3D12Device** device) { if (FAILED(D3D12CreateDevice(NULL, D3D_FEATURE_LEVEL_11_0, IID_PPVOID(device)))) { - LOG(true, "DirectX12 D3D12CreateDevice"); + LOG_1("DirectX12 D3D12CreateDevice"); ASSERT_SIMPLE(false); } } diff --git a/gpuapi/direct3d/FramesInFlightContainer.h b/gpuapi/direct3d/FramesInFlightContainer.h new file mode 100644 index 0000000..a80454f --- /dev/null +++ b/gpuapi/direct3d/FramesInFlightContainer.h @@ -0,0 +1,24 @@ +/** + * Jingga + * + * @copyright Jingga + * @license OMS License 2.0 + * @version 1.0.0 + * @link https://jingga.app + */ +#ifndef TOS_GPUAPI_DIRECT3D_FRAMES_IN_FLIGHT_CONTAINER_H +#define TOS_GPUAPI_DIRECT3D_FRAMES_IN_FLIGHT_CONTAINER_H + +#include "../../stdlib/Types.h" +#include + +struct FramesInFlightContainer { + uint32 count; + uint32 index; + ID3D12Fence* fence; + UINT64 fence_value; + HANDLE fence_event; + ID3D12Resource** framebuffers; +}; + +#endif \ No newline at end of file diff --git a/gpuapi/direct3d/GpuApiContainer.h b/gpuapi/direct3d/GpuApiContainer.h deleted file mode 100644 index 46f1410..0000000 --- a/gpuapi/direct3d/GpuApiContainer.h +++ /dev/null @@ -1,49 +0,0 @@ -/** - * Jingga - * - * @copyright Jingga - * @license OMS License 2.0 - * @version 1.0.0 - * @link https://jingga.app - */ -#ifndef TOS_GPUAPI_DIRECTX_GPU_API_CONTAINER_H -#define TOS_GPUAPI_DIRECTX_GPU_API_CONTAINER_H - -#include "../../stdlib/Types.h" -// #include "../../../EngineDependencies/directx/d3d12.h" -// #include "../../../EngineDependencies/directx/d3dx12.h" -#include -#include -#include -#include - -struct GpuApiContainer { - uint32 frames_in_flight; - uint32 framebuffer_idx; - - ID3D12Device* device; - IDXGISwapChain4* swapchain; - - ID3D12CommandQueue* graphics_queue; - ID3D12DescriptorHeap* rtv_heap; // basically = swapchain_image_views - uint32 rtv_info_size; - // @todo should be dynamic size based on frames_in_flight, no? - ID3D12Resource* framebuffer[2]; - ID3D12CommandAllocator* command_pool; - ID3D12GraphicsCommandList* command_buffer; - ID3D12PipelineState* pipeline; - ID3D12RootSignature* pipeline_layout; - ID3D12Fence* in_flight_fence; - UINT64 fence_value = 0; - HANDLE fence_event; - - // ???? - D3D12_VIEWPORT m_viewport; - D3D12_RECT m_scissorRect; - - // @todo This definitely doesn't belong here - ID3D12Resource* m_vertexBuffer; - D3D12_VERTEX_BUFFER_VIEW m_vertexBufferView; -}; - -#endif \ No newline at end of file diff --git a/gpuapi/direct3d/ShaderUtils.h b/gpuapi/direct3d/ShaderUtils.h index a2c683e..39d4886 100644 --- a/gpuapi/direct3d/ShaderUtils.h +++ b/gpuapi/direct3d/ShaderUtils.h @@ -44,7 +44,7 @@ ID3DBlob* shader_make(const char* type, const char* source, int32 source_size) ID3DBlob* blob; ID3DBlob* errMsgs; if (FAILED(D3DCompile2(source, source_size, NULL, NULL, NULL, "main", type, compileFlags, 0, 0, NULL, 0, &blob, &errMsgs))) { - LOG(true, "DirectX12 D3DCompile2"); + LOG_1("DirectX12 D3DCompile2"); ASSERT_SIMPLE(false); } @@ -55,7 +55,7 @@ ID3DBlob* shader_make(const char* type, const char* source, int32 source_size) return blob; } -ID3D12PipelineState* program_make( +ID3D12PipelineState* pipeline_make( ID3D12Device* device, ID3D12PipelineState** pipeline, ID3D12RootSignature* pipeline_layout, @@ -118,7 +118,7 @@ ID3D12PipelineState* program_make( pipeline_state_info.SampleDesc.Count = 1; if (FAILED(device->CreateGraphicsPipelineState(&pipeline_state_info, IID_PPV_ARGS(pipeline)))) { - LOG(true, "DirectX12 CreateGraphicsPipelineState"); + LOG_1("DirectX12 CreateGraphicsPipelineState"); ASSERT_SIMPLE(false); } diff --git a/gpuapi/opengl/AppCmdBuffer.h b/gpuapi/opengl/AppCmdBuffer.h index c8980f5..e05f0d8 100644 --- a/gpuapi/opengl/AppCmdBuffer.h +++ b/gpuapi/opengl/AppCmdBuffer.h @@ -10,6 +10,7 @@ #define TOS_GPUAPI_OPENGL_APP_CMD_BUFFER_H #include "../../stdlib/Types.h" +#include "../../log/PerformanceProfiler.h" #include "OpenglUtils.h" #include "Shader.h" #include "ShaderUtils.h" @@ -21,7 +22,8 @@ void* cmd_shader_load(AppCmdBuffer*, Command*) { return NULL; } -void* cmd_shader_load_sync(AppCmdBuffer* cb, Shader* shader, int32* shader_ids) { +void* cmd_shader_load_sync(AppCmdBuffer* __restrict cb, Shader* __restrict shader, const int32* __restrict shader_ids) { + PROFILE_VERBOSE(PROFILE_CMD_SHADER_LOAD_SYNC, ""); char asset_id[9]; int32 shader_assets[SHADER_TYPE_SIZE]; @@ -56,7 +58,7 @@ void* cmd_shader_load_sync(AppCmdBuffer* cb, Shader* shader, int32* shader_ids) } // Make shader/program - shader->id = program_make( + shader->id = pipeline_make( shader_assets[0], shader_assets[1], shader_assets[2], cb->mem_vol ); diff --git a/gpuapi/opengl/GpuApiContainer.h b/gpuapi/opengl/FramesInFlightContainer.h similarity index 57% rename from gpuapi/opengl/GpuApiContainer.h rename to gpuapi/opengl/FramesInFlightContainer.h index 9e4e722..c9c9d38 100644 --- a/gpuapi/opengl/GpuApiContainer.h +++ b/gpuapi/opengl/FramesInFlightContainer.h @@ -6,15 +6,15 @@ * @version 1.0.0 * @link https://jingga.app */ -#ifndef TOS_GPUAPI_OPENGL_GPU_API_CONTAINER_H -#define TOS_GPUAPI_OPENGL_GPU_API_CONTAINER_H +#ifndef TOS_GPUAPI_OPENGL_FRAMES_IN_FLIGHT_CONTAINER_H +#define TOS_GPUAPI_OPENGL_FRAMES_IN_FLIGHT_CONTAINER_H #include "../../stdlib/Types.h" #include "OpenglUtils.h" -struct GpuApiContainer { - uint32 frames_in_flight; - uint32 framebuffer_idx; +struct FramesInFlightContainer { + uint32 count; + uint32 index; OpenglFrameData* framebuffers; GLsync framebuffer_sync; }; diff --git a/gpuapi/opengl/OpenglDescriptorSetLayoutBinding.h b/gpuapi/opengl/OpenglDescriptorSetLayoutBinding.h new file mode 100644 index 0000000..9a4e9e5 --- /dev/null +++ b/gpuapi/opengl/OpenglDescriptorSetLayoutBinding.h @@ -0,0 +1,19 @@ +/** + * Jingga + * + * @copyright Jingga + * @license OMS License 2.0 + * @version 1.0.0 + * @link https://jingga.app + */ +#ifndef TOS_GPUAPI_OPENGL_DESCRIPTOR_SET_LAYOUT_BINDING_H +#define TOS_GPUAPI_OPENGL_DESCRIPTOR_SET_LAYOUT_BINDING_H + +#include "../../stdlib/Types.h" + +struct OpenglDescriptorSetLayoutBinding { + int32 binding; + const char* name; +}; + +#endif \ No newline at end of file diff --git a/gpuapi/opengl/OpenglUtils.h b/gpuapi/opengl/OpenglUtils.h index aa74fc5..d535b87 100644 --- a/gpuapi/opengl/OpenglUtils.h +++ b/gpuapi/opengl/OpenglUtils.h @@ -16,6 +16,7 @@ #include "../../image/Image.cpp" #include "../../utils/StringUtils.h" #include "../../log/Log.h" +#include "../../log/Stats.h" #include "../../system/FileUtils.cpp" #include "../RenderUtils.h" #include "Opengl.h" @@ -31,7 +32,7 @@ { GLenum err; while ((err = glGetError()) != GL_NO_ERROR) { - LOG_FORMAT(true, "Opengl error: %d", {{LOG_DATA_INT32, (int32 *) &err}}); + LOG_FORMAT_1("Opengl error: %d", {{LOG_DATA_INT32, (int32 *) &err}}); ASSERT_SIMPLE(err == GL_NO_ERROR); } } @@ -59,7 +60,7 @@ void opengl_debug_callback(GLenum, GLenum, GLuint, GLenum severity, GLsizei, con return; } - LOG(true, message); + LOG_1(message); ASSERT_SIMPLE(false); } diff --git a/gpuapi/opengl/OpenglWin32.h b/gpuapi/opengl/OpenglWin32.h index 148f73e..90a967b 100644 --- a/gpuapi/opengl/OpenglWin32.h +++ b/gpuapi/opengl/OpenglWin32.h @@ -852,7 +852,7 @@ void opengl_destroy(Window* window) ReleaseDC(window->hwnd, window->hdc); } -void opengl_init(Window* window, int32 multisample = 0) +void opengl_instance_create(Window* window, int32 multisample = 0) { gl_extensions_load(); diff --git a/gpuapi/opengl/Shader.h b/gpuapi/opengl/Shader.h index 264c72e..3dfc102 100644 --- a/gpuapi/opengl/Shader.h +++ b/gpuapi/opengl/Shader.h @@ -10,10 +10,13 @@ #define TOS_GPUAPI_OPENGL_SHADER_H #include "../../stdlib/Types.h" +#include "OpenglDescriptorSetLayoutBinding.h" struct Shader { uint32 id; - uint32 locations[7]; + + OpenglDescriptorSetLayoutBinding descriptor_set_layout[7]; + byte data[16]; }; diff --git a/gpuapi/opengl/ShaderUtils.h b/gpuapi/opengl/ShaderUtils.h index 80cc122..33cf7ac 100644 --- a/gpuapi/opengl/ShaderUtils.h +++ b/gpuapi/opengl/ShaderUtils.h @@ -12,8 +12,19 @@ #include "../../stdlib/Types.h" #include "../../memory/RingMemory.h" #include "../../log/Log.h" +#include "../../object/Vertex.h" +#include "Shader.h" #include "Opengl.h" #include "../ShaderType.h" +#include "../GpuAttributeType.h" + +struct OpenglVertexInputAttributeDescription { + uint32 location; + uint32 count; + int32 format; + int32 stride; + void* offset; +}; int32 shader_type_index(ShaderType type) { @@ -27,61 +38,6 @@ int32 shader_type_index(ShaderType type) } } -// Set value based on shader uniform name -inline -void shader_set_value(uint32 id, const char* name, bool value) -{ - glUniform1i(glGetUniformLocation(id, name), (int32) value); -} - -inline -void shader_set_value(uint32 id, const char* name, int32 value) -{ - glUniform1i(glGetUniformLocation(id, name), value); -} - -inline -void shader_set_value(uint32 id, const char* name, f32 value) -{ - glUniform1f(glGetUniformLocation(id, name), value); -} - -inline -void shader_set_v2(uint32 id, const char* name, const f32* value) -{ - glUniform2fv(glGetUniformLocation(id, name), 1, value); -} - -inline -void shader_set_v3(uint32 id, const char* name, const f32* value) -{ - glUniform3fv(glGetUniformLocation(id, name), 1, value); -} - -inline -void shader_set_v4(uint32 id, const char* name, const f32* value) -{ - glUniform4fv(glGetUniformLocation(id, name), 1, value); -} - -inline -void shader_set_m2(uint32 id, const char* name, const f32* value) -{ - glUniformMatrix2fv(glGetUniformLocation(id, name), 1, GL_FALSE, value); -} - -inline -void shader_set_m3(uint32 id, const char* name, const f32* value) -{ - glUniformMatrix3fv(glGetUniformLocation(id, name), 1, GL_FALSE, value); -} - -inline -void shader_set_m4(uint32 id, const char* name, const f32* value) -{ - glUniformMatrix4fv(glGetUniformLocation(id, name), 1, GL_FALSE, value); -} - // Set value based on uniform location inline void shader_set_value(uint32 location, bool value) @@ -146,15 +102,6 @@ uint32 shader_get_attrib_location(uint32 id, const char* name) return glGetAttribLocation(id, name); } -inline -uint32 shader_get_uniform_location(uint32 id, const char* name) -{ - // By using this you can retreive the shader variable name at a point where and when you know it - // BUT set values later on in generalized functions without knowing the shader variable name - // Basically like pointers - return glGetUniformLocation(id, name); -} - inline void shader_check_link_errors(uint32 id, char* log) { @@ -272,10 +219,10 @@ GLuint shader_make(GLenum type, const char* source, RingMemory* ring) GLint length; glGetShaderiv(shader, GL_INFO_LOG_LENGTH, &length); - GLchar *info = (GLchar *) ring_get_memory(ring, length * sizeof(GLchar)); + GLchar* info = (GLchar *) ring_get_memory(ring, length * sizeof(GLchar)); glGetShaderInfoLog(shader, length, NULL, info); - LOG(true, info); + LOG_1(info); ASSERT_SIMPLE(false); } @@ -293,7 +240,7 @@ int32 program_get_size(uint32 program) return size; } -GLuint program_make( +GLuint pipeline_make( GLuint vertex_shader, GLuint fragment_shader, GLint geometry_shader, @@ -321,7 +268,7 @@ GLuint program_make( GLchar *info = (GLchar *) ring_get_memory(ring, length * sizeof(GLchar)); glGetProgramInfoLog(program, length, NULL, info); - LOG(true, info); + LOG_1(info); ASSERT_SIMPLE(false); } @@ -353,4 +300,160 @@ void pipeline_use(uint32 id) glUseProgram(id); } +inline +void gpuapi_attribute_setup(GpuAttributeType type, const OpenglVertexInputAttributeDescription* attr) +{ + int32 length = gpuapi_attribute_count(type); + for (int32 i = 0; i < length; ++i) { + if (attr[i].format == GL_INT) { + glVertexAttribIPointer(attr[i].location, attr[i].count, attr[i].format, attr[i].stride, attr[i].offset); + } else { + glVertexAttribPointer(attr[i].location, attr[i].count, attr[i].format, false, attr[i].stride, attr[i].offset); + } + glEnableVertexAttribArray(attr[i].location); + } +} + +constexpr +void gpuapi_attribute_info_create(GpuAttributeType type, OpenglVertexInputAttributeDescription* attr) +{ + switch (type) { + case GPU_ATTRIBUTE_TYPE_VERTEX_3D: { + attr[0] = { + .location = 0, + .count = 3, + .format = GL_FLOAT, + .stride = sizeof(Vertex3D), + .offset = (void *) offsetof(Vertex3DTextureColor, position) + }; + + attr[1] = { + .location = 1, + .count = 3, + .format = GL_FLOAT, + .stride = sizeof(Vertex3D), + .offset = (void *) offsetof(Vertex3D, normal) + }; + + attr[2] = { + .location = 2, + .count = 2, + .format = GL_FLOAT, + .stride = sizeof(Vertex3D), + .offset = (void *) offsetof(Vertex3D, tex_coord) + }; + + attr[3] = { + .location = 3, + .count = 4, + .format = GL_FLOAT, + .stride = sizeof(Vertex3D), + .offset = (void *) offsetof(Vertex3D, color) + }; + } return; + case GPU_ATTRIBUTE_TYPE_VERTEX_3D_NORMAL: { + attr[0] = { + .location = 0, + .count = 3, + .format = GL_FLOAT, + .stride = sizeof(Vertex3DNormal), + .offset = (void *) offsetof(Vertex3DNormal, position) + }; + + attr[1] = { + .location = 1, + .count = 3, + .format = GL_FLOAT, + .stride = sizeof(Vertex3DNormal), + .offset = (void *) offsetof(Vertex3DNormal, normal) + }; + } return; + case GPU_ATTRIBUTE_TYPE_VERTEX_3D_COLOR: { + attr[0] = { + .location = 0, + .count = 3, + .format = GL_FLOAT, + .stride = sizeof(Vertex3DColor), + .offset = (void *) offsetof(Vertex3DColor, position) + }; + + attr[1] = { + .location = 1, + .count = 2, + .format = GL_FLOAT, + .stride = sizeof(Vertex3DColor), + .offset = (void *) offsetof(Vertex3DColor, color) + }; + } return; + case GPU_ATTRIBUTE_TYPE_VERTEX_3D_TEXTURE_COLOR: { + attr[0] = { + .location = 0, + .count = 3, + .format = GL_FLOAT, + .stride = sizeof(Vertex3DTextureColor), + .offset = (void *) offsetof(Vertex3DTextureColor, position) + }; + + attr[1] = { + .location = 1, + .count = 2, + .format = GL_FLOAT, + .stride = sizeof(Vertex3DTextureColor), + .offset = (void *) offsetof(Vertex3DTextureColor, texture_color) + }; + } return; + case GPU_ATTRIBUTE_TYPE_VERTEX_3D_SAMPLER_TEXTURE_COLOR: { + attr[0] = { + .location = 0, + .count = 3, + .format = GL_FLOAT, + .stride = sizeof(Vertex3DSamplerTextureColor), + .offset = (void *) offsetof(Vertex3DSamplerTextureColor, position) + }; + + attr[1] = { + .location = 1, + .count = 1, + .format = GL_INT, + .stride = sizeof(Vertex3DSamplerTextureColor), + .offset = (void *) offsetof(Vertex3DSamplerTextureColor, sampler) + }; + + attr[2] = { + .location = 2, + .count = 2, + .format = GL_FLOAT, + .stride = sizeof(Vertex3DSamplerTextureColor), + .offset = (void *) offsetof(Vertex3DSamplerTextureColor, texture_color) + }; + } return; + case GPU_ATTRIBUTE_TYPE_VERTEX_2D_TEXTURE: { + attr[0] = { + .location = 0, + .count = 2, + .format = GL_FLOAT, + .stride = sizeof(Vertex2DTexture), + .offset = (void *) offsetof(Vertex2DTexture, position) + }; + + attr[1] = { + .location = 1, + .count = 2, + .format = GL_FLOAT, + .stride = sizeof(Vertex2DTexture), + .offset = (void *) offsetof(Vertex2DTexture, tex_coord) + }; + } return; + default: + UNREACHABLE(); + }; +} + +void gpuapi_descriptor_set_layout_create(Shader* shader, const OpenglDescriptorSetLayoutBinding* bindings, int32 binding_length) { + for (int32 i = 0; i < binding_length; ++i) { + shader->descriptor_set_layout[i].binding = glGetUniformLocation(shader->id, bindings[i].name); + shader->descriptor_set_layout[i].name = bindings[i].name; + } +} + #endif \ No newline at end of file diff --git a/gpuapi/vulkan/AppCmdBuffer.h b/gpuapi/vulkan/AppCmdBuffer.h index b18e9ad..56b8688 100644 --- a/gpuapi/vulkan/AppCmdBuffer.h +++ b/gpuapi/vulkan/AppCmdBuffer.h @@ -10,22 +10,25 @@ #define TOS_GPUAPI_VULKAN_APP_CMD_BUFFER_H #include "../../stdlib/Types.h" +#include "../../log/PerformanceProfiler.h" #include "Shader.h" #include "ShaderUtils.h" #include "../ShaderType.h" #include "../../asset/Asset.h" #include "../../command/AppCmdBuffer.h" -#include "GpuApiContainer.h" void* cmd_shader_load(AppCmdBuffer*, Command*) { return NULL; } -void* cmd_shader_load_sync(AppCmdBuffer* cb, Shader* shader, int32* shader_ids) { +void* cmd_shader_load_sync( + AppCmdBuffer* __restrict cb, Shader* __restrict shader, const int32* __restrict shader_ids, + VkDevice device, VkRenderPass render_pass, VkPipelineLayout* __restrict pipeline_layout, VkPipeline* __restrict pipeline, + VkDescriptorSetLayout* __restrict descriptor_set_layouts +) { + PROFILE_VERBOSE(PROFILE_CMD_SHADER_LOAD_SYNC, ""); char asset_id[9]; - GpuApiContainer* gpu_api = (GpuApiContainer *) cb->gpu_api; - VkShaderModule shader_assets[SHADER_TYPE_SIZE]; for (int32 i = 0; i < SHADER_TYPE_SIZE; ++i) { shader_assets[i] = NULL; @@ -48,7 +51,7 @@ void* cmd_shader_load_sync(AppCmdBuffer* cb, Shader* shader, int32* shader_ids) // Make sub shader shader_assets[i] = shader_make( - ((GpuApiContainer *) cb->gpu_api)->device, + device, (char *) shader_asset->self, shader_asset->ram_size ); @@ -58,8 +61,9 @@ void* cmd_shader_load_sync(AppCmdBuffer* cb, Shader* shader, int32* shader_ids) } // Make shader/program - shader->id = program_make( - gpu_api->device, gpu_api->render_pass, &gpu_api->pipeline_layout, &gpu_api->pipeline, + shader->id = pipeline_make( + device, render_pass, pipeline_layout, pipeline, + descriptor_set_layouts, shader_assets[0], shader_assets[1], shader_assets[2] ); diff --git a/gpuapi/vulkan/FramesInFlightContainer.h b/gpuapi/vulkan/FramesInFlightContainer.h new file mode 100644 index 0000000..045a785 --- /dev/null +++ b/gpuapi/vulkan/FramesInFlightContainer.h @@ -0,0 +1,23 @@ +/** + * Jingga + * + * @copyright Jingga + * @license OMS License 2.0 + * @version 1.0.0 + * @link https://jingga.app + */ +#ifndef TOS_GPUAPI_VULKAN_FRAMES_IN_FLIGHT_CONTAINER_H +#define TOS_GPUAPI_VULKAN_FRAMES_IN_FLIGHT_CONTAINER_H + +#include "../../stdlib/Types.h" +#include + +struct FramesInFlightContainer { + uint32 count; + uint32 index; + VkSemaphore* image_available_semaphores; + VkSemaphore* render_finished_semaphores; + VkFence* fences; +}; + +#endif \ No newline at end of file diff --git a/gpuapi/vulkan/GpuApiContainer.h b/gpuapi/vulkan/GpuApiContainer.h deleted file mode 100644 index c8fe93f..0000000 --- a/gpuapi/vulkan/GpuApiContainer.h +++ /dev/null @@ -1,44 +0,0 @@ -/** - * Jingga - * - * @copyright Jingga - * @license OMS License 2.0 - * @version 1.0.0 - * @link https://jingga.app - */ -#ifndef TOS_GPUAPI_VULKAN_GPU_API_CONTAINER_H -#define TOS_GPUAPI_VULKAN_GPU_API_CONTAINER_H - -#include "../../stdlib/Types.h" -#include - -#define FRAME_LAG 2 - -struct GpuApiContainer { - VkInstance instance; - VkSurfaceKHR surface; - VkDevice device; - VkSwapchainKHR swapchain; - uint32 swapchain_image_count; - VkFormat swapchain_image_format; - VkImage* swapchain_images; // length = swapchain_image_count - VkImageView* swapchain_image_views; // length = swapchain_image_count - VkFramebuffer* framebuffers; // length = swapchain_image_count - VkExtent2D swapchain_extent; - VkPipelineLayout pipeline_layout; - VkQueue graphics_queue; - VkQueue present_queue; - VkRenderPass render_pass; - VkPipeline pipeline; - VkCommandPool command_pool; - VkCommandBuffer command_buffer; - VkSemaphore image_available_semaphore; - VkSemaphore render_finished_semaphore; - VkFence in_flight_fence; - - #if DEBUG || INTERNAL - VkDebugUtilsMessengerEXT debug_messenger; - #endif -}; - -#endif \ No newline at end of file diff --git a/gpuapi/vulkan/ShaderUtils.h b/gpuapi/vulkan/ShaderUtils.h index 0e3ac5f..e5f22b9 100644 --- a/gpuapi/vulkan/ShaderUtils.h +++ b/gpuapi/vulkan/ShaderUtils.h @@ -13,9 +13,12 @@ #include "../../stdlib/Types.h" #include "../../memory/RingMemory.h" +#include "../GpuAttributeType.h" +#include "../../object/Vertex.h" #include "../../log/Log.h" -inline uint32_t shader_get_uniform_location( +inline +uint32_t shader_get_uniform_location( VkWriteDescriptorSet* descriptor, VkDescriptorSet descriptorSet, uint32_t binding, VkDescriptorType descriptorType ) { @@ -27,7 +30,8 @@ inline uint32_t shader_get_uniform_location( descriptor->descriptorCount = 1; } -inline void shader_set_value(VkDevice device, VkDescriptorSet descriptorSet, uint32_t binding, VkDescriptorType descriptorType, int32_t value) +inline +void shader_set_value(VkDevice device, VkDescriptorSet descriptorSet, uint32_t binding, VkDescriptorType descriptorType, int32_t value) { VkDescriptorBufferInfo bufferInfo = {}; bufferInfo.buffer = {}; // You should have a buffer holding the value @@ -46,6 +50,7 @@ inline void shader_set_value(VkDevice device, VkDescriptorSet descriptorSet, uin vkUpdateDescriptorSets(device, 1, &descriptorWrite, 0, NULL); } +inline VkShaderModule shader_make(VkDevice device, const char* source, int32 source_size) { // Create shader module create info @@ -59,7 +64,7 @@ VkShaderModule shader_make(VkDevice device, const char* source, int32 source_siz VkResult result = vkCreateShaderModule(device, &create_info, NULL, &shader_module); if (result != VK_SUCCESS) { - LOG_FORMAT(true, "Vulkan vkCreateShaderModule: %d", LOG_DATA_INT32, (int32 *) &result); + LOG_FORMAT_1("Vulkan vkCreateShaderModule: %d", {{LOG_DATA_INT32, (int32 *) &result}}); ASSERT_SIMPLE(false); return VK_NULL_HANDLE; @@ -68,16 +73,127 @@ VkShaderModule shader_make(VkDevice device, const char* source, int32 source_siz return shader_module; } +inline +void vulkan_vertex_binding_description(uint32 size, VkVertexInputBindingDescription* binding) { + binding->binding = 0; + binding->stride = size; + binding->inputRate = VK_VERTEX_INPUT_RATE_VERTEX; +} + +void gpuapi_attribute_info_create(GpuAttributeType type, VkVertexInputAttributeDescription* attr) +{ + switch (type) { + case GPU_ATTRIBUTE_TYPE_VERTEX_3D: { + attr[0] = { + .location = 0, + .binding = 0, + .format = VK_FORMAT_R32G32_SFLOAT, + .offset = offsetof(Vertex3D, position) + }; + + attr[1] = { + .location = 1, + .binding = 0, + .format = VK_FORMAT_R32G32_SFLOAT, + .offset = offsetof(Vertex3D, normal) + }; + + attr[2] = { + .location = 2, + .binding = 0, + .format = VK_FORMAT_R32G32_SFLOAT, + .offset = offsetof(Vertex3D, tex_coord) + }; + + attr[3] = { + .location = 3, + .binding = 0, + .format = VK_FORMAT_R32G32B32_SFLOAT, + .offset = offsetof(Vertex3D, color) + }; + } return; + case GPU_ATTRIBUTE_TYPE_VERTEX_3D_NORMAL: { + attr[0] = { + .location = 0, + .binding = 0, + .format = VK_FORMAT_R32G32_SFLOAT, + .offset = offsetof(Vertex3DNormal, position) + }; + + attr[1] = { + .location = 1, + .binding = 0, + .format = VK_FORMAT_R32G32_SFLOAT, + .offset = offsetof(Vertex3DNormal, normal) + }; + } return; + case GPU_ATTRIBUTE_TYPE_VERTEX_3D_COLOR: { + attr[0] = { + .location = 0, + .binding = 0, + .format = VK_FORMAT_R32G32_SFLOAT, + .offset = offsetof(Vertex3DColor, position) + }; + + attr[1] = { + .location = 1, + .binding = 0, + .format = VK_FORMAT_R32G32B32_SFLOAT, + .offset = offsetof(Vertex3DColor, color) + }; + } return; + case GPU_ATTRIBUTE_TYPE_VERTEX_3D_TEXTURE_COLOR: { + attr[0] = { + .location = 0, + .binding = 0, + .format = VK_FORMAT_R32G32_SFLOAT, + .offset = offsetof(Vertex3DTextureColor, position) + }; + + attr[1] = { + .location = 1, + .binding = 0, + .format = VK_FORMAT_R32G32_SFLOAT, + .offset = offsetof(Vertex3DTextureColor, texture_color) + }; + } return; + case GPU_ATTRIBUTE_TYPE_VERTEX_3D_SAMPLER_TEXTURE_COLOR: { + attr[0] = { + .location = 0, + .binding = 0, + .format = VK_FORMAT_R32G32_SFLOAT, + .offset = offsetof(Vertex3DSamplerTextureColor, position) + }; + + attr[1] = { + .location = 1, + .binding = 0, + .format = VK_FORMAT_R32_UINT, + .offset = offsetof(Vertex3DSamplerTextureColor, sampler) + }; + + attr[2] = { + .location = 2, + .binding = 0, + .format = VK_FORMAT_R32G32_SFLOAT, + .offset = offsetof(Vertex3DSamplerTextureColor, texture_color) + }; + } return; + default: + UNREACHABLE(); + }; +} + inline void pipeline_use(VkCommandBuffer command_buffer, VkPipeline pipeline) { vkCmdBindPipeline(command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); } -VkPipeline program_make( - VkDevice device, VkRenderPass render_pass, VkPipelineLayout* pipeline_layout, VkPipeline* pipeline, - VkShaderModule vertex_shader, - VkShaderModule fragment_shader, +VkPipeline pipeline_make( + VkDevice device, VkRenderPass render_pass, VkPipelineLayout* __restrict pipeline_layout, VkPipeline* __restrict pipeline, + VkDescriptorSetLayout* descriptor_set_layouts, + VkShaderModule vertex_shader, VkShaderModule fragment_shader, VkShaderModule ) { VkPipelineShaderStageCreateInfo vertex_shader_stage_info = {}; @@ -94,10 +210,18 @@ VkPipeline program_make( VkPipelineShaderStageCreateInfo shader_stages[] = {vertex_shader_stage_info, fragment_shader_stage_info}; + VkVertexInputBindingDescription binding_description; + vulkan_vertex_binding_description(sizeof(Vertex3DTextureColor), &binding_description); + + VkVertexInputAttributeDescription input_attribute_description[2]; + gpuapi_attribute_info_create(GPU_ATTRIBUTE_TYPE_VERTEX_3D_TEXTURE_COLOR, input_attribute_description); + VkPipelineVertexInputStateCreateInfo vertex_input_info = {}; vertex_input_info.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO; - vertex_input_info.vertexBindingDescriptionCount = 0; - vertex_input_info.vertexAttributeDescriptionCount = 0; + vertex_input_info.vertexBindingDescriptionCount = 1; + vertex_input_info.pVertexBindingDescriptions = &binding_description; + vertex_input_info.vertexAttributeDescriptionCount = ARRAY_COUNT(input_attribute_description); + vertex_input_info.pVertexAttributeDescriptions = input_attribute_description; VkPipelineInputAssemblyStateCreateInfo input_assembly = {}; input_assembly.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO; @@ -139,7 +263,7 @@ VkPipeline program_make( color_blending.blendConstants[2] = 0.0f; color_blending.blendConstants[3] = 0.0f; - VkDynamicState dynamic_states[] = { + const VkDynamicState dynamic_states[] = { VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR }; @@ -151,12 +275,13 @@ VkPipeline program_make( VkPipelineLayoutCreateInfo pipeline_info_layout = {}; pipeline_info_layout.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; - pipeline_info_layout.setLayoutCount = 0; + pipeline_info_layout.setLayoutCount = 1; + pipeline_info_layout.pSetLayouts = descriptor_set_layouts; pipeline_info_layout.pushConstantRangeCount = 0; VkResult result; if ((result = vkCreatePipelineLayout(device, &pipeline_info_layout, NULL, pipeline_layout)) != VK_SUCCESS) { - LOG_FORMAT(true, "Vulkan vkCreatePipelineLayout: %d", LOG_DATA_INT32, (int32 *) &result); + LOG_FORMAT_1("Vulkan vkCreatePipelineLayout: %d", {{LOG_DATA_INT32, (int32 *) &result}}); ASSERT_SIMPLE(false); return NULL; @@ -179,7 +304,7 @@ VkPipeline program_make( pipeline_info.basePipelineHandle = VK_NULL_HANDLE; if ((result = vkCreateGraphicsPipelines(device, VK_NULL_HANDLE, 1, &pipeline_info, NULL, pipeline)) != VK_SUCCESS) { - LOG_FORMAT(true, "Vulkan vkCreateGraphicsPipelines: %d", LOG_DATA_INT32, (int32 *) &result); + LOG_FORMAT_1("Vulkan vkCreateGraphicsPipelines: %d", {{LOG_DATA_INT32, (int32 *) &result}}); ASSERT_SIMPLE(false); return NULL; @@ -193,4 +318,124 @@ VkPipeline program_make( return *pipeline; } +inline +void pipeline_cleanup(VkDevice device, VkPipeline pipeline, VkPipelineLayout pipeline_layout) { + vkDestroyPipeline(device, pipeline, NULL); + vkDestroyPipelineLayout(device, pipeline_layout, NULL); +} + +inline +void gpuapi_descriptor_set_layout_create( + VkDevice device, + VkDescriptorSetLayout* descriptor_set_layout, VkDescriptorSetLayoutBinding* bindings, int32 binding_length +) { + VkDescriptorSetLayoutCreateInfo layout_info = {}; + layout_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; + layout_info.bindingCount = binding_length; + layout_info.pBindings = bindings; + + VkResult result; + if ((result = vkCreateDescriptorSetLayout(device, &layout_info, NULL, descriptor_set_layout)) != VK_SUCCESS) { + LOG_FORMAT_1("Vulkan vkCreateDescriptorSetLayout: %d", {{LOG_DATA_INT32, (int32 *) &result}}); + ASSERT_SIMPLE(false); + } +} + +inline +void vulkan_descriptor_pool_create( + VkDevice device, VkDescriptorPool* descriptor_pool, + uint32 frames_in_flight +) +{ + // @question Why is the pool size 2? + // @todo Isn't this shader specific? + VkDescriptorPoolSize poolSizes[2] = { + { + .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .descriptorCount = frames_in_flight, + }, + { + .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .descriptorCount = frames_in_flight, + } + }; + + VkDescriptorPoolCreateInfo poolInfo{}; + poolInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO; + poolInfo.poolSizeCount = ARRAY_COUNT(poolSizes); + poolInfo.pPoolSizes = poolSizes; + poolInfo.maxSets = frames_in_flight; + + VkResult result; + if ((result = vkCreateDescriptorPool(device, &poolInfo, NULL, descriptor_pool)) != VK_SUCCESS) { + LOG_FORMAT_1("Vulkan vkCreateDescriptorPool: %d", {{LOG_DATA_INT32, (int32 *) &result}}); + ASSERT_SIMPLE(false); + } +} + +void vulkan_descriptor_sets_create( + VkDevice device, VkDescriptorPool descriptor_pool, + VkDescriptorSetLayout descriptor_set_layout, VkDescriptorSet* descriptor_sets, + VkImageView texture_image_view, VkSampler& texture_sampler, + VkBuffer* __restrict uniform_buffers, + size_t uniform_buffer_object_size, + uint32 frames_in_flight, RingMemory* ring +) +{ + VkDescriptorSetLayout* layouts = (VkDescriptorSetLayout *) ring_get_memory(ring, sizeof(VkDescriptorSetLayout), 64); + for (uint32 i = 0; i < frames_in_flight; ++i) { + layouts[i] = descriptor_set_layout; + } + + VkDescriptorSetAllocateInfo alloc_info = {}; + alloc_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; + alloc_info.descriptorPool = descriptor_pool; + alloc_info.descriptorSetCount = frames_in_flight; + alloc_info.pSetLayouts = layouts; + + VkResult result; + if ((result = vkAllocateDescriptorSets(device, &alloc_info, descriptor_sets)) != VK_SUCCESS) { + LOG_FORMAT_1("Vulkan vkAllocateDescriptorSets: %d", {{LOG_DATA_INT32, (int32 *) &result}}); + ASSERT_SIMPLE(false); + + return; + } + + // @todo this is shader specific, it shouldn't be here + for (uint32 i = 0; i < frames_in_flight; ++i) { + VkDescriptorBufferInfo buffer_info = {}; + buffer_info.buffer = uniform_buffers[i]; + buffer_info.offset = 0; + buffer_info.range = uniform_buffer_object_size; + + VkDescriptorImageInfo image_info = {}; + image_info.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + image_info.imageView = texture_image_view; + image_info.sampler = texture_sampler; + + VkWriteDescriptorSet descriptor_writes[2] = { + { + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstSet = descriptor_sets[i], + .dstBinding = 0, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + .pBufferInfo = &buffer_info, + }, + { + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstSet = descriptor_sets[i], + .dstBinding = 1, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .pImageInfo = &image_info, + } + }; + + vkUpdateDescriptorSets(device, ARRAY_COUNT(descriptor_writes), descriptor_writes, 0, NULL); + } +} + #endif \ No newline at end of file diff --git a/gpuapi/vulkan/VulkanUtils.h b/gpuapi/vulkan/VulkanUtils.h index 55a455f..3cfe5e8 100644 --- a/gpuapi/vulkan/VulkanUtils.h +++ b/gpuapi/vulkan/VulkanUtils.h @@ -25,14 +25,30 @@ #include "../../stdlib/Types.h" #include "../../utils/StringUtils.h" #include "../../utils/TestUtils.h" +#include "../../object/Texture.h" +#include "../../image/Image.cpp" #include "../../log/Log.h" #include "../../memory/RingMemory.h" #include "ShaderUtils.h" +#include "FramesInFlightContainer.h" + +#if DEBUG + #define ASSERT_GPU_API(x) \ + do { \ + VkResult err = (x); \ + if (err) { \ + LOG_FORMAT_1("Vulkan error: %d", {{LOG_DATA_INT32, (int32 *) &err}}); \ + ASSERT_SIMPLE(false); \ + } \ + } while (0) +#else + #define ASSERT_GPU_API(x) (x) +#endif PACKED_STRUCT; -// WARNING: indices values start at one (are offset by +1) because 0 means no value in our implementation -// The reason for the packing is that sometimes we want to use it as an array +// WARNING: The reason for the packing is that sometimes we want to use it as an array // I am only packing it on the off chance there is some funky behaviour. +// @question Is this really required though? Isn't it basically guaranteed on our platforms to be packed? struct VulkanQueueFamilyIndices { int32 graphics_family; int32 present_family; @@ -50,12 +66,21 @@ struct VulkanSwapChainSupportDetails { }; inline -void change_viewport(Window* w, int32 offset_x = 0, int32 offset_y = 0) +void change_viewport(int32 width, int32 height, VkCommandBuffer command_buffer, VkExtent2D swapchain_extent, int32 offset_x = 0, int32 offset_y = 0) { - (void *) w; - (void) offset_x; - (void) offset_y; - // @todo implement + VkViewport viewport = {}; + viewport.x = (f32) offset_x; + viewport.y = (f32) offset_y; + viewport.width = (f32) width; + viewport.height = (f32) height; + viewport.minDepth = 0.0f; + viewport.maxDepth = 1.0f; + vkCmdSetViewport(command_buffer, 0, 1, &viewport); + + VkRect2D scissor = {}; + scissor.offset = {offset_x, offset_y}; + scissor.extent = swapchain_extent; + vkCmdSetScissor(command_buffer, 0, 1, &scissor); } int32 vulkan_check_validation_layer_support(const char** validation_layers, uint32 validation_layer_count, RingMemory* ring) { @@ -115,7 +140,7 @@ static VKAPI_ATTR VkBool32 VKAPI_CALL vulkan_debug_callback( if ((severity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT) || (severity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT) ) { - LOG(true, debug_callback_data->pMessage); + LOG_1(debug_callback_data->pMessage); ASSERT_SIMPLE(false); } @@ -151,7 +176,7 @@ void vulkan_instance_create( if (validation_layer_count && (err = vulkan_check_validation_layer_support(validation_layers, validation_layer_count, ring)) ) { - LOG_FORMAT(true, "Vulkan validation_layer missing: %d", {{LOG_DATA_CHAR_STR, (void *) validation_layers[-err - 1]}}); + LOG_FORMAT_1("Vulkan validation_layer missing: %d", {{LOG_DATA_CHAR_STR, (void *) validation_layers[-err - 1]}}); ASSERT_SIMPLE(false); return; @@ -160,7 +185,7 @@ void vulkan_instance_create( if (extension_count && (err = vulkan_check_extension_support(extensions, extension_count, ring)) ) { - LOG_FORMAT(true, "Vulkan extension missing: %d", {{LOG_DATA_CHAR_STR, (void *) extensions[-err - 1]}}); + LOG_FORMAT_1("Vulkan extension missing: %d", {{LOG_DATA_CHAR_STR, (void *) extensions[-err - 1]}}); ASSERT_SIMPLE(false); return; @@ -197,11 +222,12 @@ void vulkan_instance_create( VkResult result; if ((result = vkCreateInstance(&create_info, NULL, instance)) != VK_SUCCESS) { - LOG_FORMAT(true, "Vulkan vkCreateInstance: %d", LOG_DATA_INT32, (int32 *) &result); + LOG_FORMAT_1("Vulkan vkCreateInstance: %d", {{LOG_DATA_INT32, (int32 *) &result}}); ASSERT_SIMPLE(false); } } +inline void vulkan_surface_create(VkInstance instance, VkSurfaceKHR* surface, Window* window) { #if _WIN32 @@ -212,7 +238,7 @@ void vulkan_surface_create(VkInstance instance, VkSurfaceKHR* surface, Window* w VkResult result; if ((result = vkCreateWin32SurfaceKHR(instance, &surface_create_info, NULL, surface)) != VK_SUCCESS) { - LOG_FORMAT(true, "Vulkan vkCreateWin32SurfaceKHR: %d", LOG_DATA_INT32, (int32 *) &result); + LOG_FORMAT_1("Vulkan vkCreateWin32SurfaceKHR: %d", {{LOG_DATA_INT32, (int32 *) &result}}); return; } #elif __linux__ @@ -245,6 +271,7 @@ bool vulkan_device_supports_extensions(VkPhysicalDevice device, const char** dev } // @todo Allow to fill array +inline void vulkan_available_layers(RingMemory* ring) { uint32 layer_count; vkEnumerateInstanceLayerProperties(&layer_count, NULL); @@ -254,6 +281,7 @@ void vulkan_available_layers(RingMemory* ring) { } // @todo Allow to fill array +inline void vulkan_available_extensions(RingMemory* ring) { uint32 extension_count; vkEnumerateInstanceExtensionProperties(NULL, &extension_count, NULL); @@ -280,7 +308,7 @@ VulkanQueueFamilyIndices vulkan_find_queue_families(VkPhysicalDevice physical_de VkResult result; if ((result = vkGetPhysicalDeviceSurfaceSupportKHR(physical_device, i, surface, &present_support)) != VK_SUCCESS) { - LOG_FORMAT(true, "Vulkan vkGetPhysicalDeviceSurfaceSupportKHR: %d", LOG_DATA_INT32, (int32 *) &result); + LOG_FORMAT_1("Vulkan vkGetPhysicalDeviceSurfaceSupportKHR: %d", {{LOG_DATA_INT32, (int32 *) &result}}); ASSERT_SIMPLE(false); return indices; @@ -321,6 +349,7 @@ VulkanSwapChainSupportDetails vulkan_query_swap_chain_support(VkPhysicalDevice p return details; } +inline bool vulkan_is_device_suitable(VkPhysicalDevice physical_device, VkSurfaceKHR surface, const char** device_extensions, uint32 device_extension_count, RingMemory* ring) { VulkanQueueFamilyIndices indices = vulkan_find_queue_families(physical_device, surface, ring); @@ -337,6 +366,7 @@ bool vulkan_is_device_suitable(VkPhysicalDevice physical_device, VkSurfaceKHR su } // @todo Do we want to implement something similar in opengl that does something vaguely different despite not really necessary? (see wglGetGPUIDs, wglCreateAssociatedContextAMD) +inline void gpuapi_pick_physical_device( VkInstance instance, VkSurfaceKHR surface, VkPhysicalDevice* physical_device, const char** device_extensions, uint32 device_extension_count, RingMemory* ring @@ -355,7 +385,7 @@ void gpuapi_pick_physical_device( } } - LOG(true, "Vulkan failed to find physical device"); + LOG_1("Vulkan failed to find physical device"); ASSERT_SIMPLE(false); } @@ -384,7 +414,10 @@ void gpuapi_create_logical_device( ++queue_create_info_count; } + // @todo how to make device specific? VkPhysicalDeviceFeatures device_features = {}; + device_features.samplerAnisotropy = VK_TRUE; + VkDeviceCreateInfo create_info = {}; create_info.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO; create_info.queueCreateInfoCount = queue_create_info_count; @@ -402,7 +435,7 @@ void gpuapi_create_logical_device( VkResult result; if ((result = vkCreateDevice(physical_device, &create_info, NULL, device)) != VK_SUCCESS) { - LOG_FORMAT(true, "Vulkan vkCreateDevice: %d", LOG_DATA_INT32, (int32 *) &result); + LOG_FORMAT_1("Vulkan vkCreateDevice: %d", {{LOG_DATA_INT32, (int32 *) &result}}); ASSERT_SIMPLE(false); } @@ -488,17 +521,19 @@ void gpuapi_swapchain_create( VkResult result; if ((result = vkCreateSwapchainKHR(device, &create_info, NULL, swapchain)) != VK_SUCCESS) { - LOG_FORMAT(true, "Vulkan vkCreateSwapchainKHR: %d", LOG_DATA_INT32, (int32 *) &result); + LOG_FORMAT_1("Vulkan vkCreateSwapchainKHR: %d", {{LOG_DATA_INT32, (int32 *) &result}}); ASSERT_SIMPLE(false); return; } - memcpy(swapchain_image_format, &surface_format->format, sizeof(VkFormat)); + *swapchain_image_format = surface_format->format; + //memcpy(swapchain_image_format, &surface_format->format, sizeof(VkFormat)); } // WARNING: swapchain_images needs to already have reserved enough memory // @todo How can we ensure swapchain_images has enough but not too much space? +inline void vulkan_swapchain_images_create( VkDevice device, VkSwapchainKHR swapchain, VkImage** swapchain_images, uint32* swapchain_image_count, @@ -511,6 +546,21 @@ void vulkan_swapchain_images_create( vkGetSwapchainImagesKHR(device, swapchain, swapchain_image_count, *swapchain_images); } +inline +void vulkan_swapchain_cleanup( + VkDevice device, VkFramebuffer* framebuffers, + VkSwapchainKHR swapchain, VkImageView* swapchain_image_views, uint32 swapchain_count +) +{ + for (uint32 i = 0; i < swapchain_count; ++i) { + vkDestroyFramebuffer(device, framebuffers[i], NULL); + vkDestroyImageView(device, swapchain_image_views[i], NULL); + } + + vkDestroySwapchainKHR(device, swapchain, NULL); +} + +inline void vulkan_image_views_create( VkDevice device, VkImageView* swapchain_image_views, VkImage* swapchain_images, uint32 swapchain_image_count, VkFormat swapchain_image_format @@ -533,7 +583,7 @@ void vulkan_image_views_create( create_info.subresourceRange.layerCount = 1; if ((result = vkCreateImageView(device, &create_info, NULL, &swapchain_image_views[i])) != VK_SUCCESS) { - LOG_FORMAT(true, "Vulkan vkCreateImageView: %d", LOG_DATA_INT32, (int32 *) &result); + LOG_FORMAT_1("Vulkan vkCreateImageView: %d", {{LOG_DATA_INT32, (int32 *) &result}}); ASSERT_SIMPLE(false); } } @@ -580,7 +630,7 @@ void vulkan_render_pass_create( VkResult result; if ((result = vkCreateRenderPass(device, &render_pass_info, NULL, render_pass)) != VK_SUCCESS) { - LOG_FORMAT(true, "Vulkan vkCreateRenderPass: %d", LOG_DATA_INT32, (int32 *) &result); + LOG_FORMAT_1("Vulkan vkCreateRenderPass: %d", {{LOG_DATA_INT32, (int32 *) &result}}); ASSERT_SIMPLE(false); } } @@ -593,7 +643,7 @@ void vulkan_framebuffer_create( VkRenderPass render_pass ) { VkResult result; - for (uint32 i = 0; i < swapchain_image_count; i++) { + for (uint32 i = 0; i < swapchain_image_count; ++i) { VkImageView attachments[] = { swapchain_image_views[i] }; @@ -608,7 +658,7 @@ void vulkan_framebuffer_create( framebufferInfo.layers = 1; if ((result = vkCreateFramebuffer(device, &framebufferInfo, NULL, &framebuffers[i])) != VK_SUCCESS) { - LOG_FORMAT(true, "Vulkan vkCreateFramebuffer: %d", LOG_DATA_INT32, (int32 *) &result); + LOG_FORMAT_1("Vulkan vkCreateFramebuffer: %d", {{LOG_DATA_INT32, (int32 *) &result}}); ASSERT_SIMPLE(false); } } @@ -627,27 +677,29 @@ void vulkan_command_pool_create( VkResult result; if ((result = vkCreateCommandPool(device, &pool_info, NULL, command_pool)) != VK_SUCCESS) { - LOG_FORMAT(true, "Vulkan vkCreateCommandPool: %d", LOG_DATA_INT32, (int32 *) &result); + LOG_FORMAT_1("Vulkan vkCreateCommandPool: %d", {{LOG_DATA_INT32, (int32 *) &result}}); ASSERT_SIMPLE(false); } } -void vulkan_command_buffer_create(VkDevice device, VkCommandBuffer* command_buffer, VkCommandPool command_pool) +void vulkan_command_buffers_create(VkDevice device, VkCommandPool command_pool, VkCommandBuffer* command_buffers, uint32 command_buffer_count) { VkCommandBufferAllocateInfo alloc_info = {}; alloc_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; alloc_info.commandPool = command_pool; alloc_info.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; - alloc_info.commandBufferCount = 1; + alloc_info.commandBufferCount = command_buffer_count; VkResult result; - if ((result = vkAllocateCommandBuffers(device, &alloc_info, command_buffer)) != VK_SUCCESS) { - LOG_FORMAT(true, "Vulkan vkAllocateCommandBuffers: %d", LOG_DATA_INT32, (int32 *) &result); + if ((result = vkAllocateCommandBuffers(device, &alloc_info, command_buffers)) != VK_SUCCESS) { + LOG_FORMAT_1("Vulkan vkAllocateCommandBuffers: %d", {{LOG_DATA_INT32, (int32 *) &result}}); ASSERT_SIMPLE(false); } } -void vulkan_sync_objects_create(VkDevice device, VkSemaphore* image_available_semaphore, VkSemaphore* render_finished_semaphore, VkFence* in_flight_fence) +void vulkan_sync_objects_create( + VkDevice device, FramesInFlightContainer* frames_in_flight +) { VkSemaphoreCreateInfo semaphore_info = {}; semaphore_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO; @@ -657,13 +709,410 @@ void vulkan_sync_objects_create(VkDevice device, VkSemaphore* image_available_se fence_info.flags = VK_FENCE_CREATE_SIGNALED_BIT; VkResult result; - if ((result = vkCreateSemaphore(device, &semaphore_info, NULL, image_available_semaphore)) != VK_SUCCESS - || (result = vkCreateSemaphore(device, &semaphore_info, NULL, render_finished_semaphore)) != VK_SUCCESS - || (result = vkCreateFence(device, &fence_info, NULL, in_flight_fence)) != VK_SUCCESS - ) { - LOG_FORMAT(true, "Vulkan vulkan_sync_objects_create: %d", LOG_DATA_INT32, (int32 *) &result); + for (uint32 i = 0; i < frames_in_flight->count; ++i) { + if ((result = vkCreateSemaphore(device, &semaphore_info, NULL, &frames_in_flight->image_available_semaphores[i])) != VK_SUCCESS + || (result = vkCreateSemaphore(device, &semaphore_info, NULL, &frames_in_flight->render_finished_semaphores[i])) != VK_SUCCESS + || (result = vkCreateFence(device, &fence_info, NULL, &frames_in_flight->fences[i])) != VK_SUCCESS + ) { + LOG_FORMAT_1("Vulkan vulkan_sync_objects_create: %d", {{LOG_DATA_INT32, (int32 *) &result}}); + ASSERT_SIMPLE(false); + } + } +} - ASSERT_SIMPLE(false); +inline +int32 vulkan_find_memory_type(VkPhysicalDevice physical_device, uint32 type, VkMemoryPropertyFlags properties) { + VkPhysicalDeviceMemoryProperties mem_properties; + vkGetPhysicalDeviceMemoryProperties(physical_device, &mem_properties); + + for (uint32 i = 0; i < mem_properties.memoryTypeCount; ++i) { + if ((type & (1 << i)) && (mem_properties.memoryTypes[i].propertyFlags & properties) == properties) { + return i; + } + } + + return -1; +} + +inline +void vulkan_buffer_create( + VkDevice device, VkPhysicalDevice physical_device, + VkDeviceSize size, VkBufferUsageFlags usage, VkMemoryPropertyFlags properties, VkBuffer& buffer, VkDeviceMemory& buffer_memory +) { + ASSERT_SIMPLE(size > 0); + + VkBufferCreateInfo buffer_info = {}; + buffer_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; + buffer_info.size = size; + buffer_info.usage = usage; + buffer_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + + ASSERT_GPU_API(vkCreateBuffer(device, &buffer_info, NULL, &buffer)); + + // Allocate memory for the buffer + VkMemoryRequirements mem_requirements; + vkGetBufferMemoryRequirements(device, buffer, &mem_requirements); + + VkMemoryAllocateInfo alloc_info = {}; + alloc_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; + alloc_info.allocationSize = mem_requirements.size; + alloc_info.memoryTypeIndex = vulkan_find_memory_type(physical_device, mem_requirements.memoryTypeBits, properties); + + ASSERT_GPU_API(vkAllocateMemory(device, &alloc_info, NULL, &buffer_memory)); + ASSERT_GPU_API(vkBindBufferMemory(device, buffer, buffer_memory, 0)); +} + +FORCE_INLINE +void vulkan_command_buffer_reset(VkCommandBuffer command_buffer) { + ASSERT_GPU_API(vkResetCommandBuffer(command_buffer, 0)); +} + +inline +void vulkan_single_commands_begin(VkCommandBuffer command_buffer) +{ + VkCommandBufferBeginInfo begin_info = {}; + begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; + begin_info.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; + + ASSERT_GPU_API(vkBeginCommandBuffer(command_buffer, &begin_info)); +} + +inline +void vulkan_single_commands_end(VkQueue queue, VkCommandBuffer command_buffer) +{ + ASSERT_GPU_API(vkEndCommandBuffer(command_buffer)); + + VkSubmitInfo submitInfo = {}; + submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + submitInfo.commandBufferCount = 1; + submitInfo.pCommandBuffers = &command_buffer; + + ASSERT_GPU_API(vkQueueSubmit(queue, 1, &submitInfo, VK_NULL_HANDLE)); + ASSERT_GPU_API(vkQueueWaitIdle(queue)); +} + +inline +void vulkan_single_commands_free(VkDevice device, VkCommandPool command_pool, VkCommandBuffer command_buffer) +{ + vkFreeCommandBuffers(device, command_pool, 1, &command_buffer); +} + +void vulkan_transition_image_layout(VkCommandBuffer command_buffer, VkImage image, VkImageLayout oldLayout, VkImageLayout newLayout) { + VkImageMemoryBarrier barrier = {}; + barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + barrier.oldLayout = oldLayout; + barrier.newLayout = newLayout; + barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.image = image; + barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + barrier.subresourceRange.baseMipLevel = 0; + barrier.subresourceRange.levelCount = 1; + barrier.subresourceRange.baseArrayLayer = 0; + barrier.subresourceRange.layerCount = 1; + + VkPipelineStageFlags source_stage; + VkPipelineStageFlags destination_stage; + + if (oldLayout == VK_IMAGE_LAYOUT_UNDEFINED && newLayout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) { + barrier.srcAccessMask = 0; + barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + + source_stage = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; + destination_stage = VK_PIPELINE_STAGE_TRANSFER_BIT; + } else if (oldLayout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL && newLayout == VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL) { + barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; + + source_stage = VK_PIPELINE_STAGE_TRANSFER_BIT; + destination_stage = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + } else { + UNREACHABLE(); + } + + vkCmdPipelineBarrier( + command_buffer, + source_stage, destination_stage, + 0, + 0, NULL, + 0, NULL, + 1, &barrier + ); +} + +// @todo replace references with pointers +void load_texture_to_gpu( + VkDevice device, VkPhysicalDevice physical_device, + VkCommandPool command_pool, VkQueue queue, + VkImage& texture_image, VkDeviceMemory& texture_image_memory, VkImageView& texture_image_view, VkSampler& texture_sampler, + const Texture* texture) +{ + // Create the Vulkan image + VkImageCreateInfo image_info = {}; + image_info.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO; + image_info.imageType = VK_IMAGE_TYPE_2D; + image_info.format = VK_FORMAT_R8G8B8A8_SRGB; + image_info.extent.width = texture->image.width; + image_info.extent.height = texture->image.height; + image_info.extent.depth = 1; + image_info.mipLevels = 1; + image_info.arrayLayers = 1; + image_info.samples = VK_SAMPLE_COUNT_1_BIT; + image_info.tiling = VK_IMAGE_TILING_OPTIMAL; + image_info.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT; + image_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + image_info.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; + + ASSERT_GPU_API(vkCreateImage(device, &image_info, NULL, &texture_image)); + + // Allocate memory for the image + VkMemoryRequirements memRequirements; + vkGetImageMemoryRequirements(device, texture_image, &memRequirements); + + VkMemoryAllocateInfo allocInfo = {}; + allocInfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; + allocInfo.allocationSize = memRequirements.size; + allocInfo.memoryTypeIndex = vulkan_find_memory_type(physical_device, memRequirements.memoryTypeBits, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); + + ASSERT_GPU_API(vkAllocateMemory(device, &allocInfo, NULL, &texture_image_memory)); + ASSERT_GPU_API(vkBindImageMemory(device, texture_image, texture_image_memory, 0)); + + int32 image_size = image_pixel_size_from_type(texture->image.image_settings) * texture->image.width * texture->image.height; + + // Create a staging buffer + VkBuffer staging_buffer; + VkDeviceMemory staging_buffer_memory; + vulkan_buffer_create(device, physical_device, image_size, VK_BUFFER_USAGE_TRANSFER_SRC_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, staging_buffer, staging_buffer_memory); + + // Copy texture data to the staging buffer + void* data; + vkMapMemory(device, staging_buffer_memory, 0, image_size, 0, &data); + memcpy(data, texture->image.pixels, image_size); + vkUnmapMemory(device, staging_buffer_memory); + + // Transition the image layout + VkCommandBuffer command_buffer; + vulkan_command_buffers_create(device, command_pool, &command_buffer, 1); + vulkan_single_commands_begin(command_buffer); + + vulkan_transition_image_layout(command_buffer, texture_image, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); + vulkan_single_commands_end(queue, command_buffer); + + // Copy data from the staging buffer to the image + vulkan_command_buffer_reset(command_buffer); + vulkan_single_commands_begin(command_buffer); + VkBufferImageCopy region = {}; + region.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + region.imageSubresource.mipLevel = 0; + region.imageSubresource.baseArrayLayer = 0; + region.imageSubresource.layerCount = 1; + region.imageExtent = {texture->image.width, texture->image.height, 1}; + + vkCmdCopyBufferToImage(command_buffer, staging_buffer, texture_image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, ®ion); + vulkan_single_commands_end(queue, command_buffer); + + // Transition the image layout for shader access + vulkan_command_buffer_reset(command_buffer); + vulkan_single_commands_begin(command_buffer); + vulkan_transition_image_layout(command_buffer, texture_image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + vulkan_single_commands_end(queue, command_buffer); + + vulkan_single_commands_free(device, command_pool, command_buffer); + + // Clean up the staging buffer + vkDestroyBuffer(device, staging_buffer, NULL); + vkFreeMemory(device, staging_buffer_memory, NULL); + + // Create an image view + VkImageViewCreateInfo view_info = {}; + view_info.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; + view_info.image = texture_image; + view_info.viewType = VK_IMAGE_VIEW_TYPE_2D; + view_info.format = VK_FORMAT_R8G8B8A8_SRGB; + view_info.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + view_info.subresourceRange.baseMipLevel = 0; + view_info.subresourceRange.levelCount = 1; + view_info.subresourceRange.baseArrayLayer = 0; + view_info.subresourceRange.layerCount = 1; + + ASSERT_GPU_API(vkCreateImageView(device, &view_info, NULL, &texture_image_view)); + + // Create a sampler + VkPhysicalDeviceProperties properties = {}; + vkGetPhysicalDeviceProperties(physical_device, &properties); + + VkSamplerCreateInfo sampler_info = {}; + sampler_info.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO; + sampler_info.magFilter = VK_FILTER_LINEAR; + sampler_info.minFilter = VK_FILTER_LINEAR; + sampler_info.addressModeU = VK_SAMPLER_ADDRESS_MODE_REPEAT; + sampler_info.addressModeV = VK_SAMPLER_ADDRESS_MODE_REPEAT; + sampler_info.addressModeW = VK_SAMPLER_ADDRESS_MODE_REPEAT; + sampler_info.anisotropyEnable = VK_TRUE; + sampler_info.maxAnisotropy = properties.limits.maxSamplerAnisotropy; + sampler_info.borderColor = VK_BORDER_COLOR_INT_OPAQUE_BLACK; + sampler_info.unnormalizedCoordinates = VK_FALSE; + sampler_info.compareEnable = VK_FALSE; + sampler_info.compareOp = VK_COMPARE_OP_ALWAYS; + sampler_info.mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR; + + ASSERT_GPU_API(vkCreateSampler(device, &sampler_info, NULL, &texture_sampler)); +} + +void vulkan_vertex_buffer_update( + VkDevice device, VkPhysicalDevice physical_device, VkCommandPool command_pool, VkQueue queue, + VkBuffer vertexBuffer, + const void* __restrict vertices, int32 vertex_size, int32 vertex_count +) +{ + VkDeviceSize bufferSize = vertex_size * vertex_count; + + VkBuffer stagingBuffer; + VkDeviceMemory stagingBufferMemory; + vulkan_buffer_create( + device, physical_device, + bufferSize, + VK_BUFFER_USAGE_TRANSFER_SRC_BIT, + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, + stagingBuffer, stagingBufferMemory + ); + + void* data; + vkMapMemory(device, stagingBufferMemory, 0, bufferSize, 0, &data); + memcpy(data, vertices, (size_t) bufferSize); + vkUnmapMemory(device, stagingBufferMemory); + + VkCommandBuffer commandBuffer; + vulkan_command_buffers_create(device, command_pool, &commandBuffer, 1); + vulkan_single_commands_begin(commandBuffer); + + VkBufferCopy copyRegion = {}; + copyRegion.size = bufferSize; + vkCmdCopyBuffer(commandBuffer, stagingBuffer, vertexBuffer, 1, ©Region); + vulkan_single_commands_end(queue, commandBuffer); + + vulkan_single_commands_free(device, command_pool, commandBuffer); + + vkDestroyBuffer(device, stagingBuffer, NULL); + vkFreeMemory(device, stagingBufferMemory, NULL); +} + +void vulkan_vertex_buffer_create( + VkDevice device, VkPhysicalDevice physical_device, VkCommandPool command_pool, VkQueue queue, + VkBuffer vertexBuffer, VkDeviceMemory vertexBufferMemory, + const void* __restrict vertices, int32 vertex_size, int32 vertex_count +) +{ + VkDeviceSize bufferSize = vertex_size * vertex_count; + + VkBuffer stagingBuffer; + VkDeviceMemory stagingBufferMemory; + vulkan_buffer_create( + device, physical_device, + bufferSize, + VK_BUFFER_USAGE_TRANSFER_SRC_BIT, + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, + stagingBuffer, stagingBufferMemory + ); + + void* data; + vkMapMemory(device, stagingBufferMemory, 0, bufferSize, 0, &data); + memcpy(data, vertices, (size_t) bufferSize); + vkUnmapMemory(device, stagingBufferMemory); + + // @question do I need to delete the vertex buffer (memory) on scene switch? + vulkan_buffer_create( + device, physical_device, + bufferSize, + VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, + vertexBuffer, vertexBufferMemory + ); + + // Copy buffer + // @performance Would it make sense to use a "global" temp buffer for that? If yes, we only need to reset + VkCommandBuffer commandBuffer; + vulkan_command_buffers_create(device, command_pool, &commandBuffer, 1); + vulkan_single_commands_begin(commandBuffer); + + VkBufferCopy copyRegion = {}; + copyRegion.size = bufferSize; + vkCmdCopyBuffer(commandBuffer, stagingBuffer, vertexBuffer, 1, ©Region); + vulkan_single_commands_end(queue, commandBuffer); + + // @todo if we change behaviour according to the comment above we don't need this + vulkan_single_commands_free(device, command_pool, commandBuffer); + + vkDestroyBuffer(device, stagingBuffer, NULL); + vkFreeMemory(device, stagingBufferMemory, NULL); +} + +void vulkan_index_buffer_create( + VkDevice device, VkPhysicalDevice physical_device, VkCommandPool command_pool, VkQueue queue, + VkBuffer indexBuffer, VkDeviceMemory indexBufferMemory, + const uint16* __restrict indices, int32 index_count +) { + VkDeviceSize bufferSize = sizeof(uint16) * index_count; + + VkBuffer stagingBuffer; + VkDeviceMemory stagingBufferMemory; + vulkan_buffer_create( + device, physical_device, + bufferSize, + VK_BUFFER_USAGE_TRANSFER_SRC_BIT, + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, + stagingBuffer, stagingBufferMemory + ); + + void* data; + vkMapMemory(device, stagingBufferMemory, 0, bufferSize, 0, &data); + memcpy(data, indices, (size_t) bufferSize); + vkUnmapMemory(device, stagingBufferMemory); + + vulkan_buffer_create( + device, physical_device, + bufferSize, + VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT, + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, + indexBuffer, indexBufferMemory + ); + + // Copy buffer + VkCommandBuffer commandBuffer; + vulkan_command_buffers_create(device, command_pool, &commandBuffer, 1); + vulkan_single_commands_begin(commandBuffer); + + VkBufferCopy copyRegion = {}; + copyRegion.size = bufferSize; + vkCmdCopyBuffer(commandBuffer, stagingBuffer, indexBuffer, 1, ©Region); + vulkan_single_commands_end(queue, commandBuffer); + + // @todo if we change behaviour according to the comment above we don't need this + vulkan_single_commands_free(device, command_pool, commandBuffer); + + vkDestroyBuffer(device, stagingBuffer, NULL); + vkFreeMemory(device, stagingBufferMemory, NULL); +} + +void vulkan_uniform_buffers_create( + VkDevice device, VkPhysicalDevice physical_device, + VkBuffer* __restrict uniform_buffers, VkDeviceMemory* __restrict uniform_buffers_memory, void** __restrict uniform_buffers_mapped, + size_t uniform_buffer_object_size, + uint32 frames_in_flight +) +{ + // e.g. uniform_buffer_object_size = sizeof(struct {model; view; proj};) + VkDeviceSize bufferSize = uniform_buffer_object_size; + for (uint32 i = 0; i < frames_in_flight; ++i) { + vulkan_buffer_create( + device, physical_device, + bufferSize, + VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, + uniform_buffers[i], uniform_buffers_memory[i] + ); + + vkMapMemory(device, uniform_buffers_memory[i], 0, bufferSize, 0, &uniform_buffers_mapped[i]); } } diff --git a/hash/Crc.h b/hash/Crc.h index 8beaa5e..80e503e 100644 --- a/hash/Crc.h +++ b/hash/Crc.h @@ -12,8 +12,74 @@ #include "../stdlib/Types.h" #include "../architecture/Intrinsics.h" +static const uint32 crc_table[256] = +{ + 0x00000000, 0x77073096, 0xEE0E612C, 0x990951BA, 0x076DC419, 0x706AF48F, 0xE963A535, 0x9E6495A3, + 0x0EDB8832, 0x79DCB8A4, 0xE0D5E91E, 0x97D2D988, 0x09B64C2B, 0x7EB17CBD, 0xE7B82D07, 0x90BF1D91, + 0x1DB71064, 0x6AB020F2, 0xF3B97148, 0x84BE41DE, 0x1ADAD47D, 0x6DDDE4EB, 0xF4D4B551, 0x83D385C7, + 0x136C9856, 0x646BA8C0, 0xFD62F97A, 0x8A65C9EC, 0x14015C4F, 0x63066CD9, 0xFA0F3D63, 0x8D080DF5, + 0x3B6E20C8, 0x4C69105E, 0xD56041E4, 0xA2677172, 0x3C03E4D1, 0x4B04D447, 0xD20D85FD, 0xA50AB56B, + 0x35B5A8FA, 0x42B2986C, 0xDBBBC9D6, 0xACBCF940, 0x32D86CE3, 0x45DF5C75, 0xDCD60DCF, 0xABD13D59, + 0x26D930AC, 0x51DE003A, 0xC8D75180, 0xBFD06116, 0x21B4F4B5, 0x56B3C423, 0xCFBA9599, 0xB8BDA50F, + 0x2802B89E, 0x5F058808, 0xC60CD9B2, 0xB10BE924, 0x2F6F7C87, 0x58684C11, 0xC1611DAB, 0xB6662D3D, + 0x76DC4190, 0x01DB7106, 0x98D220BC, 0xEFD5102A, 0x71B18589, 0x06B6B51F, 0x9FBFE4A5, 0xE8B8D433, + 0x7807C9A2, 0x0F00F934, 0x9609A88E, 0xE10E9818, 0x7F6A0DBB, 0x086D3D2D, 0x91646C97, 0xE6635C01, + 0x6B6B51F4, 0x1C6C6162, 0x856530D8, 0xF262004E, 0x6C0695ED, 0x1B01A57B, 0x8208F4C1, 0xF50FC457, + 0x65B0D9C6, 0x12B7E950, 0x8BBEB8EA, 0xFCB9887C, 0x62DD1DDF, 0x15DA2D49, 0x8CD37CF3, 0xFBD44C65, + 0x4DB26158, 0x3AB551CE, 0xA3BC0074, 0xD4BB30E2, 0x4ADFA541, 0x3DD895D7, 0xA4D1C46D, 0xD3D6F4FB, + 0x4369E96A, 0x346ED9FC, 0xAD678846, 0xDA60B8D0, 0x44042D73, 0x33031DE5, 0xAA0A4C5F, 0xDD0D7CC9, + 0x5005713C, 0x270241AA, 0xBE0B1010, 0xC90C2086, 0x5768B525, 0x206F85B3, 0xB966D409, 0xCE61E49F, + 0x5EDEF90E, 0x29D9C998, 0xB0D09822, 0xC7D7A8B4, 0x59B33D17, 0x2EB40D81, 0xB7BD5C3B, 0xC0BA6CAD, + 0xEDB88320, 0x9ABFB3B6, 0x03B6E20C, 0x74B1D29A, 0xEAD54739, 0x9DD277AF, 0x04DB2615, 0x73DC1683, + 0xE3630B12, 0x94643B84, 0x0D6D6A3E, 0x7A6A5AA8, 0xE40ECF0B, 0x9309FF9D, 0x0A00AE27, 0x7D079EB1, + 0xF00F9344, 0x8708A3D2, 0x1E01F268, 0x6906C2FE, 0xF762575D, 0x806567CB, 0x196C3671, 0x6E6B06E7, + 0xFED41B76, 0x89D32BE0, 0x10DA7A5A, 0x67DD4ACC, 0xF9B9DF6F, 0x8EBEEFF9, 0x17B7BE43, 0x60B08ED5, + 0xD6D6A3E8, 0xA1D1937E, 0x38D8C2C4, 0x4FDFF252, 0xD1BB67F1, 0xA6BC5767, 0x3FB506DD, 0x48B2364B, + 0xD80D2BDA, 0xAF0A1B4C, 0x36034AF6, 0x41047A60, 0xDF60EFC3, 0xA867DF55, 0x316E8EEF, 0x4669BE79, + 0xCB61B38C, 0xBC66831A, 0x256FD2A0, 0x5268E236, 0xCC0C7795, 0xBB0B4703, 0x220216B9, 0x5505262F, + 0xC5BA3BBE, 0xB2BD0B28, 0x2BB45A92, 0x5CB36A04, 0xC2D7FFA7, 0xB5D0CF31, 0x2CD99E8B, 0x5BDEAE1D, + 0x9B64C2B0, 0xEC63F226, 0x756AA39C, 0x026D930A, 0x9C0906A9, 0xEB0E363F, 0x72076785, 0x05005713, + 0x95BF4A82, 0xE2B87A14, 0x7BB12BAE, 0x0CB61B38, 0x92D28E9B, 0xE5D5BE0D, 0x7CDCEFB7, 0x0BDBDF21, + 0x86D3D2D4, 0xF1D4E242, 0x68DDB3F8, 0x1FDA836E, 0x81BE16CD, 0xF6B9265B, 0x6FB077E1, 0x18B74777, + 0x88085AE6, 0xFF0F6A70, 0x66063BCA, 0x11010B5C, 0x8F659EFF, 0xF862AE69, 0x616BFFD3, 0x166CCF45, + 0xA00AE278, 0xD70DD2EE, 0x4E048354, 0x3903B3C2, 0xA7672661, 0xD06016F7, 0x4969474D, 0x3E6E77DB, + 0xAED16A4A, 0xD9D65ADC, 0x40DF0B66, 0x37D83BF0, 0xA9BCAE53, 0xDEBB9EC5, 0x47B2CF7F, 0x30B5FFE9, + 0xBDBDF21C, 0xCABAC28A, 0x53B39330, 0x24B4A3A6, 0xBAD03605, 0xCDD70693, 0x54DE5729, 0x23D967BF, + 0xB3667A2E, 0xC4614AB8, 0x5D681B02, 0x2A6F2B94, 0xB40BBE37, 0xC30C8EA1, 0x5A05DF1B, 0x2D02EF8D, +}; + inline -uint32 crc32_software_u8(const byte* data, size_t length) { +void crc32_table_fill(uint32* table) noexcept +{ + uint8 index = 0; + uint8 z; + + do { + table[index] = index; + for(z = 8; z; z--) { + table[index] = (table[index] & 1) + ? (table[index] >> 1) ^ 0xEDB88320 + : table[index] >> 1; + } + } while(++index); +} + +inline +uint32 crc32_software_u8_table(const uint8* data, uint32 length) noexcept +{ + uint32 crc = 0xFFFFFFFF; + while (length-- != 0) { + crc = crc_table[((uint8) crc ^ *data)] ^ (crc >> 8); + ++data; + } + + // return (~crc); also works + return (crc ^ 0xFFFFFFFF); +} + +inline +uint32 crc32_software_u8(const byte* data, size_t length) noexcept +{ uint32 crc = 0xFFFFFFFF; // Standard CRC-32 polynomial @@ -34,7 +100,8 @@ uint32 crc32_software_u8(const byte* data, size_t length) { } inline -uint32 crc32_intrin_u8(const byte* data, size_t length) { +uint32 crc32_intrin_u8(const byte* data, size_t length) noexcept +{ uint32 crc = 0xFFFFFFFF; for (size_t i = 0; i < length; ++i) { crc = intrin_crc32_u8(crc, data[i]); @@ -44,7 +111,8 @@ uint32 crc32_intrin_u8(const byte* data, size_t length) { } inline -uint32 crc32_intrin_u16(const uint16* data, size_t length) { +uint32 crc32_intrin_u16(const uint16* data, size_t length) noexcept +{ uint32 crc = 0xFFFFFFFF; for (size_t i = 0; i < length; ++i) { crc = intrin_crc32_u16(crc, data[i]); @@ -54,7 +122,8 @@ uint32 crc32_intrin_u16(const uint16* data, size_t length) { } inline -uint32 crc32_intrin_u32(const uint32* data, size_t length) { +uint32 crc32_intrin_u32(const uint32* data, size_t length) noexcept +{ uint32 crc = 0xFFFFFFFF; for (size_t i = 0; i < length; ++i) { crc = intrin_crc32_u32(crc, data[i]); @@ -64,7 +133,8 @@ uint32 crc32_intrin_u32(const uint32* data, size_t length) { } inline -uint32 crc32_intrin_u64(const uint64* data, size_t length) { +uint32 crc32_intrin_u64(const uint64* data, size_t length) noexcept +{ uint64 crc = 0xFFFFFFFF; for (size_t i = 0; i < length; ++i) { crc = intrin_crc32_u64(crc, data[i]); diff --git a/hash/GeneralHash.h b/hash/GeneralHash.h index 1856bb9..87969db 100644 --- a/hash/GeneralHash.h +++ b/hash/GeneralHash.h @@ -12,7 +12,8 @@ #include "../stdlib/Types.h" inline constexpr -uint64 hash_djb2(const char* key) { +uint64 hash_djb2(const char* key) noexcept +{ uint64 hash = 5381; int32 c; @@ -24,7 +25,7 @@ uint64 hash_djb2(const char* key) { } inline -uint64 hash_sdbm(const byte* key) +uint64 hash_sdbm(const byte* key) noexcept { uint64 hash = 0; int32 c; @@ -37,7 +38,7 @@ uint64 hash_sdbm(const byte* key) } inline -uint64 hash_lose_lose(const byte* key) +uint64 hash_lose_lose(const byte* key) noexcept { uint64 hash = 0; int32 c; @@ -50,7 +51,7 @@ uint64 hash_lose_lose(const byte* key) } inline -uint64 hash_polynomial_rolling(const char* str) +uint64 hash_polynomial_rolling(const char* str) noexcept { const int32 p = 31; const int32 m = 1000000009; @@ -67,7 +68,7 @@ uint64 hash_polynomial_rolling(const char* str) } inline -uint64 hash_fnv1a(const char* str) +uint64 hash_fnv1a(const char* str) noexcept { const uint64 FNV_OFFSET_BASIS = 14695981039346656037UL; const uint64 FNV_PRIME = 1099511628211UL; @@ -83,7 +84,7 @@ uint64 hash_fnv1a(const char* str) } inline -uint32 hash_oat(const char* str) +uint32 hash_oat(const char* str) noexcept { uint32 hash = 0; @@ -101,7 +102,7 @@ uint32 hash_oat(const char* str) } inline -uint32 hash_ejb(const char* str) +uint32 hash_ejb(const char* str) noexcept { const uint32 PRIME1 = 37; const uint32 PRIME2 = 1048583; @@ -116,7 +117,8 @@ uint32 hash_ejb(const char* str) #define ROTL32(x, r) ((x) << (r)) | ((x) >> (32 - (r))) inline constexpr -uint32 hash_murmur3_32(const byte* key, size_t len, uint32 seed = 0) { +uint32 hash_murmur3_32(const byte* key, size_t len, uint32 seed = 0) noexcept +{ uint32 h = seed; uint32 k; @@ -164,7 +166,8 @@ uint32 hash_murmur3_32(const byte* key, size_t len, uint32 seed = 0) { } #define ROTL64(x, r) ((x) << (r)) | ((x) >> (64 - (r))) -uint64 hash_murmur3_64(const void* key, size_t len, uint64 seed = 0) { +uint64 hash_murmur3_64(const void* key, size_t len, uint64 seed = 0) noexcept +{ const uint64 c1 = 0x87c37b91114253d5ULL; const uint64 c2 = 0x4cf5ad432745937fULL; @@ -288,7 +291,7 @@ uint64 hash_murmur3_64(const void* key, size_t len, uint64 seed = 0) { //////////////////////////////////// inline constexpr -uint64 hash_djb2_seeded(const char* key, int32 seed) +uint64 hash_djb2_seeded(const char* key, int32 seed) noexcept { uint64 hash = 5381; int32 c; @@ -301,7 +304,7 @@ uint64 hash_djb2_seeded(const char* key, int32 seed) } inline -uint64 hash_sdbm_seeded(const char* key, int32 seed) +uint64 hash_sdbm_seeded(const char* key, int32 seed) noexcept { uint64 hash = 0; int32 c; @@ -314,7 +317,7 @@ uint64 hash_sdbm_seeded(const char* key, int32 seed) } inline -uint64 hash_lose_lose_seeded(const char* key, int32 seed) +uint64 hash_lose_lose_seeded(const char* key, int32 seed) noexcept { uint64 hash = 0; int32 c; @@ -327,7 +330,7 @@ uint64 hash_lose_lose_seeded(const char* key, int32 seed) } inline -uint64 hash_polynomial_rolling_seeded(const char* str, int32 seed) +uint64 hash_polynomial_rolling_seeded(const char* str, int32 seed) noexcept { const int32 p = 31; const int32 m = 1000000009; @@ -344,7 +347,7 @@ uint64 hash_polynomial_rolling_seeded(const char* str, int32 seed) } inline -uint64 hash_fnv1a_seeded(const char* str, int32 seed) +uint64 hash_fnv1a_seeded(const char* str, int32 seed) noexcept { const uint64 FNV_OFFSET_BASIS = 14695981039346656037UL; const uint64 FNV_PRIME = 1099511628211UL; @@ -360,7 +363,7 @@ uint64 hash_fnv1a_seeded(const char* str, int32 seed) } inline -uint64 hash_oat_seeded(const char* str, int32 seed) +uint64 hash_oat_seeded(const char* str, int32 seed) noexcept { uint64 hash = 0; @@ -378,7 +381,7 @@ uint64 hash_oat_seeded(const char* str, int32 seed) } inline -uint64 hash_ejb_seeded(const char* str, int32 seed) +uint64 hash_ejb_seeded(const char* str, int32 seed) noexcept { const uint64 PRIME1 = 37; const uint64 PRIME2 = 1048583; @@ -392,7 +395,7 @@ uint64 hash_ejb_seeded(const char* str, int32 seed) } inline -uint32 intrin_hash(uint64 a, uint64 b = 0) +uint32 intrin_hash(uint64 a, uint64 b = 0) noexcept { uint8 seed[16] = { 0xaa, 0x9b, 0xbd, 0xb8, 0xa1, 0x98, 0xac, 0x3f, 0x1f, 0x94, 0x07, 0xb3, 0x8c, 0x27, 0x93, 0x69, diff --git a/image/Bitmap.h b/image/Bitmap.h index 1d31d35..773f4c4 100644 --- a/image/Bitmap.h +++ b/image/Bitmap.h @@ -193,7 +193,7 @@ struct Bitmap { byte* data; // WARNING: This is not the owner of the data. The owner is the FileBody }; -void generate_default_bitmap_references(const FileBody* file, Bitmap* bitmap) +void generate_default_bitmap_references(const FileBody* file, Bitmap* bitmap) noexcept { bitmap->size = (uint32) file->size; bitmap->data = file->content; @@ -267,7 +267,7 @@ void generate_default_bitmap_references(const FileBody* file, Bitmap* bitmap) bitmap->pixels = (byte *) (file->content + bitmap->header.offset); } -void image_bmp_generate(const FileBody* src_data, Image* image) +void image_bmp_generate(const FileBody* src_data, Image* image) noexcept { // @performance We are generating the struct and then filling the data. // There is some assignment/copy overhead diff --git a/image/Image.cpp b/image/Image.cpp index 118c48e..f9160c0 100644 --- a/image/Image.cpp +++ b/image/Image.cpp @@ -32,7 +32,7 @@ void image_from_file(Image* __restrict image, const char* __restrict path, RingM } } -void image_flip_vertical(RingMemory* __restrict ring, Image* __restrict image) +void image_flip_vertical(RingMemory* __restrict ring, Image* __restrict image) noexcept { uint32 stride = image->width * sizeof(uint32); byte* temp = ring_get_memory(ring, image->pixel_count * sizeof(uint32)); @@ -49,7 +49,7 @@ void image_flip_vertical(RingMemory* __restrict ring, Image* __restrict image) } inline -int32 image_pixel_size_from_type(byte type) +int32 image_pixel_size_from_type(byte type) noexcept { int32 channel_size = type & IMAGE_SETTING_CHANNEL_4_SIZE ? 4 : 1; int32 channel_count = type & IMAGE_SETTING_CHANNEL_COUNT; @@ -58,7 +58,7 @@ int32 image_pixel_size_from_type(byte type) } inline -int32 image_data_size(const Image* image) +int32 image_data_size(const Image* image) noexcept { return image->pixel_count * image_pixel_size_from_type(image->image_settings) + sizeof(image->width) + sizeof(image->height) @@ -66,7 +66,7 @@ int32 image_data_size(const Image* image) } inline -uint32 image_header_from_data(const byte* __restrict data, Image* __restrict image) +uint32 image_header_from_data(const byte* __restrict data, Image* __restrict image) noexcept { const byte* start = data; @@ -84,7 +84,7 @@ uint32 image_header_from_data(const byte* __restrict data, Image* __restrict ima return (int32) (data - start); } -uint32 image_from_data(const byte* __restrict data, Image* __restrict image) +uint32 image_from_data(const byte* __restrict data, Image* __restrict image) noexcept { const byte* pos = data; pos += image_header_from_data(data, image); @@ -97,7 +97,7 @@ uint32 image_from_data(const byte* __restrict data, Image* __restrict image) } inline -uint32 image_header_to_data(const Image* __restrict image, byte* __restrict data) +uint32 image_header_to_data(const Image* __restrict image, byte* __restrict data) noexcept { byte* start = data; @@ -113,7 +113,7 @@ uint32 image_header_to_data(const Image* __restrict image, byte* __restrict data return (int32) (data - start); } -uint32 image_to_data(const Image* __restrict image, byte* __restrict data) +uint32 image_to_data(const Image* __restrict image, byte* __restrict data) noexcept { byte* pos = data; pos += image_header_to_data(image, data); diff --git a/image/Qoi.h b/image/Qoi.h index 8b379fb..7764a3c 100644 --- a/image/Qoi.h +++ b/image/Qoi.h @@ -33,14 +33,14 @@ // @performance I feel like there is some more optimization possible by handling fully transparent pixels in a special way // @todo We need to implement monochrome handling, which is very important for game assets that often use monochrome assets for all kinds of things (e.g. translucency) -const byte optable[128] = { +static const byte optable[128] = { 0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3 }; -int32 qoi_encode(const Image* image, byte* data) +int32 qoi_encode(const Image* image, byte* data) noexcept { byte* start = data; data += image_header_to_data(image, data); @@ -191,7 +191,8 @@ int32 qoi_encode(const Image* image, byte* data) return (int32) (data - start); } -int32 qoi_decode_4(const byte* data, Image* image) +static +int32 qoi_decode_4(const byte* data, Image* image) noexcept { uint32 px_len = image->width * image->height * 4; v4_byte px = {0, 0, 0, 255}; @@ -241,7 +242,8 @@ int32 qoi_decode_4(const byte* data, Image* image) return px_len; } -int32 qoi_decode_3(const byte* data, Image* image) +static +int32 qoi_decode_3(const byte* data, Image* image) noexcept { uint32 px_len = image->width * image->height * 3; v3_byte px = {0, 0, 0}; @@ -288,7 +290,7 @@ int32 qoi_decode_3(const byte* data, Image* image) return px_len; } -int32 qoi_decode(const byte* data, Image* image) +int32 qoi_decode(const byte* data, Image* image) noexcept { int32 header_length = image_header_from_data(data, image); diff --git a/image/Tga.h b/image/Tga.h index f57e355..bc19854 100644 --- a/image/Tga.h +++ b/image/Tga.h @@ -48,7 +48,7 @@ struct Tga { byte* data; // WARNING: This is not the owner of the data. The owner is the FileBody }; -void generate_default_tga_references(const FileBody* file, Tga* tga) +void generate_default_tga_references(const FileBody* file, Tga* tga) noexcept { tga->size = (uint32) file->size; tga->data = file->content; @@ -76,7 +76,7 @@ void generate_default_tga_references(const FileBody* file, Tga* tga) + tga->header.color_map_length * (tga->header.color_map_bits / 8); // can be 0 } -void image_tga_generate(const FileBody* src_data, Image* image) +void image_tga_generate(const FileBody* src_data, Image* image) noexcept { // @performance We are generating the struct and then filling the data. // There is some assignment/copy overhead diff --git a/input/Input.h b/input/Input.h index d4482c1..ccbda0a 100644 --- a/input/Input.h +++ b/input/Input.h @@ -174,7 +174,7 @@ struct Input { }; inline -void input_init(Input* input, uint8 size, void* callback_data, BufferMemory* buf) +void input_init(Input* input, uint8 size, void* callback_data, BufferMemory* buf) noexcept { // Init input input->callback_data = callback_data; @@ -197,7 +197,7 @@ void input_init(Input* input, uint8 size, void* callback_data, BufferMemory* buf } inline -void input_clean_state(InputKey* active_keys, KeyPressType press_status = KEY_PRESS_TYPE_RELEASED) +void input_clean_state(InputKey* active_keys, KeyPressType press_status = KEY_PRESS_TYPE_RELEASED) noexcept { if (press_status) { for (int32 i = 0; i < MAX_KEY_PRESS_TYPES; ++i) { @@ -211,7 +211,7 @@ void input_clean_state(InputKey* active_keys, KeyPressType press_status = KEY_PR } inline -bool input_action_exists(const InputKey* active_keys, int16 key, KeyPressType press_type = KEY_PRESS_TYPE_PRESSED) +bool input_action_exists(const InputKey* active_keys, int16 key, KeyPressType press_type = KEY_PRESS_TYPE_PRESSED) noexcept { return (active_keys[0].scan_code == key && active_keys[0].key_state == press_type) || (active_keys[1].scan_code == key && active_keys[1].key_state == press_type) @@ -227,7 +227,7 @@ bool input_action_exists(const InputKey* active_keys, int16 key, KeyPressType pr } inline -bool input_is_down(const InputKey* active_keys, int16 key) +bool input_is_down(const InputKey* active_keys, int16 key) noexcept { return (active_keys[0].scan_code == key && active_keys[0].key_state != KEY_PRESS_TYPE_RELEASED) || (active_keys[1].scan_code == key && active_keys[1].key_state != KEY_PRESS_TYPE_RELEASED) @@ -242,7 +242,7 @@ bool input_is_down(const InputKey* active_keys, int16 key) } inline -bool input_is_pressed(const InputKey* active_keys, int16 key) +bool input_is_pressed(const InputKey* active_keys, int16 key) noexcept { return (active_keys[0].scan_code == key && active_keys[0].key_state == KEY_PRESS_TYPE_PRESSED) || (active_keys[1].scan_code == key && active_keys[1].key_state == KEY_PRESS_TYPE_PRESSED) @@ -257,7 +257,7 @@ bool input_is_pressed(const InputKey* active_keys, int16 key) } inline -bool input_is_held(const InputKey* active_keys, int16 key) +bool input_is_held(const InputKey* active_keys, int16 key) noexcept { return (active_keys[0].scan_code == key && active_keys[0].key_state == KEY_PRESS_TYPE_HELD) || (active_keys[1].scan_code == key && active_keys[1].key_state == KEY_PRESS_TYPE_HELD) @@ -273,7 +273,7 @@ bool input_is_held(const InputKey* active_keys, int16 key) } inline -bool input_is_released(const InputKey* active_keys, int16 key) +bool input_is_released(const InputKey* active_keys, int16 key) noexcept { return (active_keys[0].scan_code == key && active_keys[0].key_state == KEY_PRESS_TYPE_RELEASED) || (active_keys[1].scan_code == key && active_keys[1].key_state == KEY_PRESS_TYPE_RELEASED) @@ -289,7 +289,7 @@ bool input_is_released(const InputKey* active_keys, int16 key) } inline -bool input_was_down(const InputKey* active_keys, int16 key) +bool input_was_down(const InputKey* active_keys, int16 key) noexcept { return (active_keys[0].scan_code == key && active_keys[0].key_state == KEY_PRESS_TYPE_RELEASED) || (active_keys[1].scan_code == key && active_keys[1].key_state == KEY_PRESS_TYPE_RELEASED) @@ -308,7 +308,7 @@ inline bool inputs_are_down( const InputKey* active_keys, int16 key0, int16 key1 = 0, int16 key2 = 0, int16 key3 = 0, int16 key4 = 0 -) { +) noexcept { return (key0 != 0 && input_is_down(active_keys, key0)) && (key1 == 0 || input_is_down(active_keys, key1)) && (key2 == 0 || input_is_down(active_keys, key2)) @@ -316,7 +316,7 @@ bool inputs_are_down( && (key4 == 0 || input_is_down(active_keys, key4)); } -void input_add_callback(InputMapping* mapping, uint8 hotkey, InputCallback callback) +void input_add_callback(InputMapping* mapping, uint8 hotkey, InputCallback callback) noexcept { mapping->hotkeys[hotkey].callback = callback; } @@ -329,7 +329,7 @@ input_add_hotkey( InputMapping* mapping, uint8 hotkey, int16 key0, int16 key1 = 0, int16 key2 = 0, KeyPressType press_type = KEY_PRESS_TYPE_PRESSED -) +) noexcept { int32 count = 0; @@ -356,7 +356,7 @@ input_add_hotkey( } inline -bool hotkey_is_active(const uint16* active_hotkeys, uint16 hotkey) +bool hotkey_is_active(const uint16* active_hotkeys, uint16 hotkey) noexcept { return active_hotkeys[0] == hotkey || active_hotkeys[1] == hotkey @@ -368,7 +368,7 @@ bool hotkey_is_active(const uint16* active_hotkeys, uint16 hotkey) // similar to hotkey_is_active but instead of just performing a lookup in the input_hotkey_state created results // this is actively checking the current input state (not the hotkey state) inline -bool hotkey_keys_are_active(const InputKey* active_keys, const InputMapping* mapping, uint16 hotkey) +bool hotkey_keys_are_active(const InputKey* active_keys, const InputMapping* mapping, uint16 hotkey) noexcept { int16 key0 = mapping->hotkeys[(hotkey - 1)].scan_codes[0]; int16 key1 = mapping->hotkeys[(hotkey - 1)].scan_codes[1]; @@ -398,7 +398,7 @@ bool hotkey_keys_are_active(const InputKey* active_keys, const InputMapping* map } inline -void input_set_state(InputKey* __restrict active_keys, const InputKey* __restrict new_key) +void input_set_state(InputKey* __restrict active_keys, const InputKey* __restrict new_key) noexcept { InputKey* free_state = NULL; @@ -432,7 +432,7 @@ void input_set_state(InputKey* __restrict active_keys, const InputKey* __restric // We need to poll them and then check the old state against this new state (annoying but necessary) // Mice are fully supported by RawInput and are fairly generalized in terms of their buttons -> no special function needed inline -void input_set_controller_state(Input* input, ControllerInput* controller, uint64 time) +void input_set_controller_state(Input* input, ControllerInput* controller, uint64 time) noexcept { // Check active keys that might need to be set to inactive for (int32 i = 0; i < MAX_KEY_PRESSES; ++i) { @@ -506,7 +506,7 @@ void input_set_controller_state(Input* input, ControllerInput* controller, uint6 input->general_states |= INPUT_STATE_GENERAL_BUTTON_CHANGE; } -void input_hotkey_state(Input* input) +void input_hotkey_state(Input* input) noexcept { InputState* state = &input->state; memset(state->active_hotkeys, 0, sizeof(uint16) * MAX_KEY_PRESSES); @@ -630,7 +630,7 @@ void input_hotkey_state(Input* input) } // @todo We probably need a way to unset a specific key and hotkey after processing it -bool input_key_is_longpress(const InputState* state, int16 key, uint64 time, f32 dt = 0.0f) { +bool input_key_is_longpress(const InputState* state, int16 key, uint64 time, f32 dt = 0.0f) noexcept { for (int32 i = 0; i < MAX_KEY_PRESS_TYPES; ++i) { if (state->active_keys[i].scan_code == key) { return (f32) (time - state->active_keys[i].time) / 1000.0f >= (dt == 0.0f ? INPUT_LONG_PRESS_DURATION : dt); @@ -641,7 +641,7 @@ bool input_key_is_longpress(const InputState* state, int16 key, uint64 time, f32 } // @todo I wrote this code at 9am after staying awake for the whole night and that is how that code looks like... fix it! -bool input_hotkey_is_longpress(const Input* input, uint8 hotkey, uint64 time, f32 dt = 0.0f) { +bool input_hotkey_is_longpress(const Input* input, uint8 hotkey, uint64 time, f32 dt = 0.0f) noexcept { bool is_longpress = false; for (int32 i = 0; i < MAX_KEY_PRESSES; ++i) { if (input->state.active_hotkeys[i] != hotkey) { @@ -688,7 +688,7 @@ bool input_hotkey_is_longpress(const Input* input, uint8 hotkey, uint64 time, f3 return is_longpress; } -uint32 input_get_typed_character(InputState* state, uint64 time, uint64 dt) +uint32 input_get_typed_character(InputState* state, uint64 time, uint64 dt) noexcept { byte keyboard_state[256] = {}; for (int32 key_state = 0; key_state < MAX_KEY_PRESS_TYPES; ++key_state) { diff --git a/log/DebugContainer.h b/log/DebugContainer.h index 81f954d..0b3dfb5 100644 --- a/log/DebugContainer.h +++ b/log/DebugContainer.h @@ -24,11 +24,11 @@ struct DebugContainer { #endif LogMemory* log_memory; - PerformanceProfileResult* perf_stats; - atomic_64 int64* stats_counter; + PerformanceProfileResult* perf_stats; PerformanceProfiler** perf_current_scope; + int32* perf_active; }; #endif \ No newline at end of file diff --git a/log/DebugMemory.h b/log/DebugMemory.h index 47ebe04..3f91f7a 100644 --- a/log/DebugMemory.h +++ b/log/DebugMemory.h @@ -57,7 +57,7 @@ enum MemoryDebugType { }; inline -DebugMemory* debug_memory_find(uintptr_t start) +DebugMemory* debug_memory_find(uintptr_t start) noexcept { for (uint64 i = 0; i < _dmc->memory_size; ++i) { if (_dmc->memory_stats[i].start <= start @@ -70,7 +70,7 @@ DebugMemory* debug_memory_find(uintptr_t start) return NULL; } -void debug_memory_init(uintptr_t start, uint64 size) +void debug_memory_init(uintptr_t start, uint64 size) noexcept { if (!start || !_dmc) { return; @@ -101,7 +101,7 @@ void debug_memory_init(uintptr_t start, uint64 size) ++_dmc->memory_element_idx; } -void debug_memory_log(uintptr_t start, uint64 size, int32 type, const char* function) +void debug_memory_log(uintptr_t start, uint64 size, int32 type, const char* function) noexcept { if (!start || !_dmc) { return; @@ -133,7 +133,7 @@ void debug_memory_log(uintptr_t start, uint64 size, int32 type, const char* func } } -void debug_memory_reserve(uintptr_t start, uint64 size, int32 type, const char* function) +void debug_memory_reserve(uintptr_t start, uint64 size, int32 type, const char* function) noexcept { if (!start || !_dmc) { return; @@ -160,7 +160,7 @@ void debug_memory_reserve(uintptr_t start, uint64 size, int32 type, const char* } // undo reserve -void debug_memory_free(uintptr_t start) +void debug_memory_free(uintptr_t start) noexcept { if (!start || !_dmc) { return; @@ -182,7 +182,7 @@ void debug_memory_free(uintptr_t start) // @bug This probably requires thread safety inline -void debug_memory_reset() +void debug_memory_reset() noexcept { if (!_dmc) { return; diff --git a/log/Log.h b/log/Log.h index 58fdab0..3ece18d 100644 --- a/log/Log.h +++ b/log/Log.h @@ -11,13 +11,32 @@ #include "../stdlib/Types.h" #include "../compiler/CompilerUtils.h" +#include "../architecture/Intrinsics.h" #include "../utils/StringUtils.h" #include "../platform/win32/TimeUtils.h" +/** + * The logging is both using file logging and in-memory logging. + * Debug builds also log to the debug console, or alternative standard output if no dedicated debug console is available + */ + #define LOG_DATA_ARRAY 5 #ifndef LOG_LEVEL - #define LOG_LEVEL 0 + // 0 = no logging at all + // 1 = release logging + // 2 = internal logging + // 3 = debug logging + // 4 = most verbose (probably has significant performance impacts) + #if DEBUG + #define LOG_LEVEL 3 + #elif INTERNAL + #define LOG_LEVEL 2 + #elif RELEASE + #define LOG_LEVEL 1 + #else + #define LOG_LEVEL 0 + #endif #endif #ifndef MAX_LOG_LENGTH @@ -62,6 +81,11 @@ struct LogMessage { int32 line; uint64 time; char* message; + + // We use this element to force a new line when saving the log to the file + // This is MUCH faster compared to iteratively export every log message with a new line + // The new line makes it much easier to manually read the log file (especially during development) + char newline; }; struct LogData { @@ -74,7 +98,7 @@ struct LogDataArray{ }; // @bug This probably requires thread safety -byte* log_get_memory() +byte* log_get_memory() noexcept { if (_log_memory->pos + MAX_LOG_LENGTH > _log_memory->size) { _log_memory->pos = 0; @@ -88,6 +112,7 @@ byte* log_get_memory() return offset; } +// @performance This should only be called async to avoid blocking (e.g. render loop) void log_to_file() { // we don't log an empty log pool @@ -117,9 +142,9 @@ void log_to_file() #endif } -void log(const char* str, bool should_log, const char* file, const char* function, int32 line) +void log(const char* str, const char* file, const char* function, int32 line) { - if (!should_log || !_log_memory) { + if (!_log_memory) { return; } @@ -127,12 +152,13 @@ void log(const char* str, bool should_log, const char* file, const char* functio while (len > 0) { LogMessage* msg = (LogMessage *) log_get_memory(); - // Fill file + // Dump to file msg->file = file; msg->function = function; msg->line = line; msg->message = (char *) (msg + 1); msg->time = system_time(); + msg->newline = '\n'; int32 message_length = (int32) OMS_MIN(MAX_LOG_LENGTH - sizeof(LogMessage) - 1, len); @@ -141,6 +167,12 @@ void log(const char* str, bool should_log, const char* file, const char* functio str += message_length; len -= MAX_LOG_LENGTH - sizeof(LogMessage); + #if DEBUG + // In debug mode we always output the log message to the debug console + compiler_debug_print(msg->message); + compiler_debug_print("\n"); + #endif + if (_log_memory->size - _log_memory->pos < MAX_LOG_LENGTH) { log_to_file(); _log_memory->pos = 0; @@ -148,16 +180,16 @@ void log(const char* str, bool should_log, const char* file, const char* functio } } -void log(const char* format, LogDataArray data, bool should_log, const char* file, const char* function, int32 line) +void log(const char* format, LogDataArray data, const char* file, const char* function, int32 line) { ASSERT_SIMPLE(str_length(format) + str_length(file) + str_length(function) + 50 < MAX_LOG_LENGTH); - if (!should_log || !_log_memory) { + if (!_log_memory) { return; } if (data.data[0].type == LOG_DATA_VOID) { - log(format, should_log, file, function, line); + log(format, file, function, line); return; } @@ -167,6 +199,7 @@ void log(const char* format, LogDataArray data, bool should_log, const char* fil msg->line = line; msg->message = (char *) (msg + 1); msg->time = system_time(); + msg->newline = '\n'; char temp_format[MAX_LOG_LENGTH]; str_copy_short(msg->message, format); @@ -214,25 +247,245 @@ void log(const char* format, LogDataArray data, bool should_log, const char* fil } } + #if DEBUG + // In debug mode we always output the log message to the debug console + compiler_debug_print(msg->message); + compiler_debug_print("\n"); + #endif + if (_log_memory->size - _log_memory->pos < MAX_LOG_LENGTH) { log_to_file(); _log_memory->pos = 0; } } -#define LOG(should_log, str) log((str), (should_log), __FILE__, __func__, __LINE__) -#define LOG_FORMAT(should_log, format, ...) log((format), LogDataArray{__VA_ARGS__}, (should_log), __FILE__, __func__, __LINE__) #define LOG_TO_FILE() log_to_file() -#if LOG_LEVEL == 2 - #define LOG_LEVEL_1(format, ...) log((format), LogDataArray{__VA_ARGS__}, true, __FILE__, __func__, __LINE__) - #define LOG_LEVEL_2(format, ...) log((format), LogDataArray{__VA_ARGS__}, true, __FILE__, __func__, __LINE__) +#if LOG_LEVEL == 4 + // Complete logging + #define LOG_1(str) log((str), __FILE__, __func__, __LINE__) + #define LOG_2(str) log((str), __FILE__, __func__, __LINE__) + #define LOG_3(str) log((str), __FILE__, __func__, __LINE__) + #define LOG_4(str) log((str), __FILE__, __func__, __LINE__) + + #define LOG_FORMAT_1(format, ...) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__) + #define LOG_FORMAT_2(format, ...) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__) + #define LOG_FORMAT_3(format, ...) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__) + #define LOG_FORMAT_4(format, ...) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__) + + #define LOG_TRUE_1(should_log, str) if ((should_log)) log((str), __FILE__, __func__, __LINE__) + #define LOG_TRUE_2(should_log, str) if ((should_log)) log((str), __FILE__, __func__, __LINE__) + #define LOG_TRUE_3(should_log, str) if ((should_log)) log((str), __FILE__, __func__, __LINE__) + #define LOG_TRUE_4(should_log, str) if ((should_log)) log((str), __FILE__, __func__, __LINE__) + + #define LOG_FALSE_1(should_log, str) if (!(should_log)) log((str), __FILE__, __func__, __LINE__) + #define LOG_FALSE_2(should_log, str) if (!(should_log)) log((str), __FILE__, __func__, __LINE__) + #define LOG_FALSE_3(should_log, str) if (!(should_log)) log((str), __FILE__, __func__, __LINE__) + #define LOG_FALSE_4(should_log, str) if (!(should_log)) log((str), __FILE__, __func__, __LINE__) + + #define LOG_FORMAT_TRUE_1(should_log, format, ...) if ((should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__) + #define LOG_FORMAT_TRUE_2(should_log, format, ...) if ((should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__) + #define LOG_FORMAT_TRUE_3(should_log, format, ...) if ((should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__) + #define LOG_FORMAT_TRUE_4(should_log, format, ...) if ((should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__) + + #define LOG_FORMAT_FALSE_1(should_log, format, ...) if (!(should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__) + #define LOG_FORMAT_FALSE_2(should_log, format, ...) if (!(should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__) + #define LOG_FORMAT_FALSE_3(should_log, format, ...) if (!(should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__) + #define LOG_FORMAT_FALSE_4(should_log, format, ...) if (!(should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__) + + #define LOG_IF_1(expr, str_succeeded, str_failed) if ((expr)) { log((str_succeeded), __FILE__, __func__, __LINE__); } else { log((str_succeeded), __FILE__, __func__, __LINE__); } + #define LOG_IF_2(expr, str_succeeded, str_failed) if ((expr)) { log((str_succeeded), __FILE__, __func__, __LINE__); } else { log((str_succeeded), __FILE__, __func__, __LINE__); } + #define LOG_IF_3(expr, str_succeeded, str_failed) if ((expr)) { log((str_succeeded), __FILE__, __func__, __LINE__); } else { log((str_succeeded), __FILE__, __func__, __LINE__); } + #define LOG_IF_4(expr, str_succeeded, str_failed) if ((expr)) { log((str_succeeded), __FILE__, __func__, __LINE__); } else { log((str_succeeded), __FILE__, __func__, __LINE__); } + + #define LOG_CYCLE_START(var_name) uint64 var_name##_start_time = intrin_timestamp_counter() + #define LOG_CYCLE_END(var_name, format) \ + uint64 var_name##_duration = (uint64) (intrin_timestamp_counter() - var_name##_start_time); \ + LOG_FORMAT_1((format), {{LOG_DATA_UINT64, &var_name##_duration}}) + + // Only intended for manual debugging + // Of course a developer could always use printf but by providing this option, + // we hope to avoid the situation where someone forgets to remove the printf + // By using this macro we at least ensure it gets removed from the release build + #define DEBUG_VERBOSE(str) compiler_debug_print((str)) + #define DEBUG_FORMAT_VERBOSE(format, ...) \ + ({ \ + char debug_str[1024]; \ + sprintf_fast(&debug_str, 1024, format, __VA_ARGS__); \ + compiler_debug_print((debug_str)); \ + }) +#elif LOG_LEVEL == 3 + #define LOG_1(str) log((str), __FILE__, __func__, __LINE__) + #define LOG_2(str) log((str), __FILE__, __func__, __LINE__) + #define LOG_3(str) log((str), __FILE__, __func__, __LINE__) + #define LOG_4(str) ((void) 0) + + #define LOG_FORMAT_1(format, ...) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__) + #define LOG_FORMAT_2(format, ...) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__) + #define LOG_FORMAT_3(format, ...) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__) + #define LOG_FORMAT_4(format, ...) ((void) 0) + + #define LOG_TRUE_1(should_log, str) if ((should_log)) log((str), __FILE__, __func__, __LINE__) + #define LOG_TRUE_2(should_log, str) if ((should_log)) log((str), __FILE__, __func__, __LINE__) + #define LOG_TRUE_3(should_log, str) if ((should_log)) log((str), __FILE__, __func__, __LINE__) + #define LOG_TRUE_4(should_log, str) ((void) 0) + + #define LOG_FALSE_1(should_log, str) if (!(should_log)) log((str), __FILE__, __func__, __LINE__) + #define LOG_FALSE_2(should_log, str) if (!(should_log)) log((str), __FILE__, __func__, __LINE__) + #define LOG_FALSE_3(should_log, str) if (!(should_log)) log((str), __FILE__, __func__, __LINE__) + #define LOG_FALSE_4(should_log, str) ((void) 0) + + #define LOG_FORMAT_TRUE_1(should_log, format, ...) if ((should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__) + #define LOG_FORMAT_TRUE_2(should_log, format, ...) if ((should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__) + #define LOG_FORMAT_TRUE_3(should_log, format, ...) if ((should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__) + #define LOG_FORMAT_TRUE_4(should_log, format, ...) ((void) 0) + + #define LOG_FORMAT_FALSE_1(should_log, format, ...) if (!(should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__) + #define LOG_FORMAT_FALSE_2(should_log, format, ...) if (!(should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__) + #define LOG_FORMAT_FALSE_3(should_log, format, ...) if (!(should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__) + #define LOG_FORMAT_FALSE_4(should_log, format, ...) ((void) 0) + + #define LOG_IF_1(expr, str_succeeded, str_failed) if ((expr)) { log((str_succeeded), __FILE__, __func__, __LINE__); } else { log((str_succeeded), __FILE__, __func__, __LINE__); } + #define LOG_IF_2(expr, str_succeeded, str_failed) if ((expr)) { log((str_succeeded), __FILE__, __func__, __LINE__); } else { log((str_succeeded), __FILE__, __func__, __LINE__); } + #define LOG_IF_3(expr, str_succeeded, str_failed) if ((expr)) { log((str_succeeded), __FILE__, __func__, __LINE__); } else { log((str_succeeded), __FILE__, __func__, __LINE__); } + // Only logs on failure + #define LOG_IF_4(expr, str_succeeded, str_failed) if (!(expr)) log((str_succeeded), __FILE__, __func__, __LINE__) + + #define LOG_CYCLE_START(var_name) uint64 var_name##_start_time = intrin_timestamp_counter() + #define LOG_CYCLE_END(var_name, format) \ + uint64 var_name##_duration = (uint64) (intrin_timestamp_counter() - var_name##_start_time); \ + LOG_FORMAT_1((format), {{LOG_DATA_UINT64, &var_name##_duration}}) + + #define DEBUG_VERBOSE(str) ((void) 0) + #define DEBUG_FORMAT_VERBOSE(str, ...) ((void) 0) +#elif LOG_LEVEL == 2 + #define LOG_1(str) log((str), __FILE__, __func__, __LINE__) + #define LOG_2(str) log((str), __FILE__, __func__, __LINE__) + #define LOG_3(str) ((void) 0) + #define LOG_4(str) ((void) 0) + + #define LOG_FORMAT_1(format, ...) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__) + #define LOG_FORMAT_2(format, ...) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__) + #define LOG_FORMAT_3(format, ...) ((void) 0) + #define LOG_FORMAT_4(format, ...) ((void) 0) + + #define LOG_TRUE_1(should_log, str) if ((should_log)) log((str), __FILE__, __func__, __LINE__) + #define LOG_TRUE_2(should_log, str) if ((should_log)) log((str), __FILE__, __func__, __LINE__) + #define LOG_TRUE_3(should_log, str) ((void) 0) + #define LOG_TRUE_4(should_log, str) ((void) 0) + + #define LOG_FALSE_1(should_log, str) if (!(should_log)) log((str), __FILE__, __func__, __LINE__) + #define LOG_FALSE_2(should_log, str) if (!(should_log)) log((str), __FILE__, __func__, __LINE__) + #define LOG_FALSE_3(should_log, str) ((void) 0) + #define LOG_FALSE_4(should_log, str) ((void) 0) + + #define LOG_FORMAT_TRUE_1(should_log, format, ...) if ((should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__) + #define LOG_FORMAT_TRUE_2(should_log, format, ...) if ((should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__) + #define LOG_FORMAT_TRUE_3(should_log, format, ...) ((void) 0) + #define LOG_FORMAT_TRUE_4(should_log, format, ...) ((void) 0) + + #define LOG_FORMAT_FALSE_1(should_log, format, ...) if (!(should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__) + #define LOG_FORMAT_FALSE_2(should_log, format, ...) if (!(should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__) + #define LOG_FORMAT_FALSE_3(should_log, format, ...) ((void) 0) + #define LOG_FORMAT_FALSE_4(should_log, format, ...) ((void) 0) + + #define LOG_IF_1(expr, str_succeeded, str_failed) if ((expr)) { log((str_succeeded), __FILE__, __func__, __LINE__); } else { log((str_succeeded), __FILE__, __func__, __LINE__); } + #define LOG_IF_2(expr, str_succeeded, str_failed) if ((expr)) { log((str_succeeded), __FILE__, __func__, __LINE__); } else { log((str_succeeded), __FILE__, __func__, __LINE__); } + // Only logs on failure + #define LOG_IF_3(expr, str_succeeded, str_failed) if (!(expr)) log((str_succeeded), __FILE__, __func__, __LINE__) + #define LOG_IF_4(expr, str_succeeded, str_failed) ((void) 0) + + #define LOG_CYCLE_START(var_name) uint64 var_name##_start_time = intrin_timestamp_counter() + #define LOG_CYCLE_END(var_name, format) \ + uint64 var_name##_duration = (uint64) (intrin_timestamp_counter() - var_name##_start_time); \ + LOG_FORMAT_1((format), {{LOG_DATA_UINT64, &var_name##_duration}}) + + #define DEBUG_VERBOSE(str) ((void) 0) + #define DEBUG_FORMAT_VERBOSE(str, ...) ((void) 0) #elif LOG_LEVEL == 1 - #define LOG_LEVEL_1(format, ...) log((format), LogDataArray{__VA_ARGS__}, true, __FILE__, __func__, __LINE__) - #define LOG_LEVEL_2(format, ...) ((void) 0) + #define LOG_1(str) log((str), __FILE__, __func__, __LINE__) + #define LOG_2(str) ((void) 0) + #define LOG_3(str) ((void) 0) + #define LOG_4(str) ((void) 0) + + #define LOG_FORMAT_1(format, ...) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__) + #define LOG_FORMAT_2(format, ...) ((void) 0) + #define LOG_FORMAT_3(format, ...) ((void) 0) + #define LOG_FORMAT_4(format, ...) ((void) 0) + + #define LOG_TRUE_1(should_log, str) if ((should_log)) log((str), __FILE__, __func__, __LINE__) + #define LOG_TRUE_2(should_log, str) ((void) 0) + #define LOG_TRUE_3(should_log, str) ((void) 0) + #define LOG_TRUE_4(should_log, str) ((void) 0) + + #define LOG_FALSE_1(should_log, str) if (!(should_log)) log((str), __FILE__, __func__, __LINE__) + #define LOG_FALSE_2(should_log, str) ((void) 0) + #define LOG_FALSE_3(should_log, str) ((void) 0) + #define LOG_FALSE_4(should_log, str) ((void) 0) + + #define LOG_FORMAT_TRUE_1(should_log, format, ...) if ((should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__) + #define LOG_FORMAT_TRUE_2(should_log, format, ...) ((void) 0) + #define LOG_FORMAT_TRUE_3(should_log, format, ...) ((void) 0) + #define LOG_FORMAT_TRUE_4(should_log, format, ...) ((void) 0) + + #define LOG_FORMAT_FALSE_1(should_log, format, ...) if (!(should_log)) log((format), LogDataArray{__VA_ARGS__}, __FILE__, __func__, __LINE__) + #define LOG_FORMAT_FALSE_2(should_log, format, ...) ((void) 0) + #define LOG_FORMAT_FALSE_3(should_log, format, ...) ((void) 0) + #define LOG_FORMAT_FALSE_4(should_log, format, ...) ((void) 0) + + #define LOG_IF_1(expr, str_succeeded, str_failed) if ((expr)) { log((str_succeeded), __FILE__, __func__, __LINE__); } else { log((str_succeeded), __FILE__, __func__, __LINE__); } + // Only logs on failure + #define LOG_IF_2(expr, str_succeeded, str_failed) if (!(expr)) log((str_succeeded), __FILE__, __func__, __LINE__) + #define LOG_IF_3(expr, str_succeeded, str_failed) ((void) 0) + #define LOG_IF_4(expr, str_succeeded, str_failed) ((void) 0) + + #define LOG_CYCLE_START(var_name) ((void) 0) + #define LOG_CYCLE_END(var_name, format) ((void) 0) + + #define DEBUG_VERBOSE(str) ((void) 0) + #define DEBUG_FORMAT_VERBOSE(str, ...) ((void) 0) #elif LOG_LEVEL == 0 - #define LOG_LEVEL_1(format, ...) ((void) 0) - #define LOG_LEVEL_2(format, ...) ((void) 0) + // No logging whatsoever + #define LOG_1(str) ((void) 0) + #define LOG_2(str) ((void) 0) + #define LOG_3(str) ((void) 0) + #define LOG_4(str) ((void) 0) + + #define LOG_FORMAT_1(format, ...) ((void) 0) + #define LOG_FORMAT_2(format, ...) ((void) 0) + #define LOG_FORMAT_3(format, ...) ((void) 0) + #define LOG_FORMAT_4(format, ...) ((void) 0) + + #define LOG_TRUE_1(should_log, str) ((void) 0) + #define LOG_TRUE_2(should_log, str) ((void) 0) + #define LOG_TRUE_3(should_log, str) ((void) 0) + #define LOG_TRUE_4(should_log, str) ((void) 0) + + #define LOG_FALSE_1(should_log, str) ((void) 0) + #define LOG_FALSE_2(should_log, str) ((void) 0) + #define LOG_FALSE_3(should_log, str) ((void) 0) + #define LOG_FALSE_4(should_log, str) ((void) 0) + + #define LOG_FORMAT_TRUE_1(should_log, format, ...) ((void) 0) + #define LOG_FORMAT_TRUE_2(should_log, format, ...) ((void) 0) + #define LOG_FORMAT_TRUE_3(should_log, format, ...) ((void) 0) + #define LOG_FORMAT_TRUE_4(should_log, format, ...) ((void) 0) + + #define LOG_FORMAT_FALSE_1(should_log, format, ...) ((void) 0) + #define LOG_FORMAT_FALSE_2(should_log, format, ...) ((void) 0) + #define LOG_FORMAT_FALSE_3(should_log, format, ...) ((void) 0) + #define LOG_FORMAT_FALSE_4(should_log, format, ...) ((void) 0) + + #define LOG_IF_1(expr, str_succeeded, str_failed) ((void) 0) + #define LOG_IF_2(expr, str_succeeded, str_failed) ((void) 0) + #define LOG_IF_3(expr, str_succeeded, str_failed) ((void) 0) + #define LOG_IF_4(expr, str_succeeded, str_failed) ((void) 0) + + #define LOG_CYCLE_START(var_name) ((void) 0) + #define LOG_CYCLE_END(var_name, format) ((void) 0) + + #define DEBUG_VERBOSE(str) ((void) 0) + #define DEBUG_FORMAT_VERBOSE(str, ...) ((void) 0) #endif #endif \ No newline at end of file diff --git a/log/PerformanceProfiler.h b/log/PerformanceProfiler.h index 3ad21e3..290d677 100644 --- a/log/PerformanceProfiler.h +++ b/log/PerformanceProfiler.h @@ -11,34 +11,65 @@ #include "../stdlib/Types.h" #include "../platform/win32/TimeUtils.h" +#include "../thread/Spinlock.cpp" +#include "../thread/Atomic.h" +#include "../system/Allocator.h" +#include "../hash/GeneralHash.h" #include "../architecture/Intrinsics.h" #include "../compiler/CompilerUtils.h" +#include "Log.h" + +#ifndef PERFORMANCE_PROFILE_STATS + #define PERFORMANCE_PROFILE_STATS 1 + enum TimingStats { + PROFILE_TEMP, // used for quick test debugging, not for permanent use + + PROFILE_FILE_UTILS, + PROFILE_BUFFER_ALLOC, + PROFILE_CHUNK_ALLOC, + PROFILE_RING_ALLOC, + PROFILE_CMD_ITERATE, + PROFILE_CMD_FONT_LOAD_SYNC, + PROFILE_CMD_SHADER_LOAD_SYNC, + PROFILE_CMD_LAYOUT_LOAD_SYNC, + PROFILE_CMD_THEME_LOAD_SYNC, + PROFILE_CMD_UI_LOAD_SYNC, + PROFILE_LAYOUT_FROM_DATA, + PROFILE_LAYOUT_FROM_THEME, + PROFILE_THEME_FROM_THEME, + PROFILE_AUDIO_BUFFER_FILLABLE, + PROFILE_AUDIO_PLAY_BUFFER, + PROFILE_AUDIO_MIXER_MIX, + PROFILE_ASSET_ARCHIVE_LOAD, + PROFILE_ASSET_ARCHIVE_ASSET_LOAD, + PROFILE_VERTEX_RECT_CREATE, + PROFILE_VERTEX_TEXT_CREATE, + + PROFILE_SIZE, + }; +#endif struct PerformanceProfileResult { - const char* name; - atomic_64 int64 total_time; + atomic_64 const char* name; + atomic_64 int64 total_cycle; - atomic_64 int64 self_time; atomic_64 int64 self_cycle; - // Required for manual start/stop otherwise we would have to use one of the existing values above, - // which corrupts them for rendering - atomic_64 int64 tmp_time; - atomic_64 int64 tmp_cycle; - PerformanceProfileResult* parent; + atomic_32 uint32 counter; + uint32 parent; }; static PerformanceProfileResult* _perf_stats = NULL; +static int32* _perf_active = NULL; struct PerformanceProfiler; -static PerformanceProfiler** _perf_current_scope = NULL; // Used when sharing profiler across dlls and threads (threads unlikely) -static PerformanceProfiler* _perf_current_scope_internal; // Used when in dll or thread and no shared pointer found +static thread_local PerformanceProfiler** _perf_current_scope = NULL; // Used when sharing profiler across dlls and threads (threads unlikely) +static thread_local PerformanceProfiler* _perf_current_scope_internal; // Used when in dll or thread and no shared pointer found struct PerformanceProfiler { - const char* name; - int32 id; + bool is_active; - int64 start_time; - int64 total_time; - int64 self_time; + const char* name; + const char* info_msg; + int32 id; int64 start_cycle; int64 total_cycle; @@ -46,105 +77,178 @@ struct PerformanceProfiler { PerformanceProfiler* parent; - PerformanceProfiler(int32 id, const char* scope_name) : id(id) { - name = scope_name; + bool auto_log; + bool is_stateless; - start_time = time_mu(); - start_cycle = intrin_timestamp_counter(); + // @question Do we want to make the self cost represent calls * "self_time/cycle" + // Stateless allows to ONLY output to log instead of storing the performance data in an array + PerformanceProfiler( + int32 id, const char* scope_name, const char* info = NULL, + bool stateless = false, bool should_log = false + ) { + if (!_perf_active || !*_perf_active) { + this->is_active = false; - total_time = 0; - total_cycle = 0; + return; + } - self_time = 0; - self_cycle = 0; + this->id = id; + ++_perf_stats[id].counter; - if (_perf_current_scope) { - parent = *_perf_current_scope; - *_perf_current_scope = this; + this->name = scope_name; + this->info_msg = info; + this->is_stateless = stateless; + this->auto_log = stateless || should_log; + + this->start_cycle = intrin_timestamp_counter(); + this->total_cycle = 0; + this->self_cycle = 0; + + if (this->is_stateless) { + this->parent = NULL; } else { - parent = _perf_current_scope_internal; - _perf_current_scope_internal = this; + if (_perf_current_scope) { + this->parent = *_perf_current_scope; + *_perf_current_scope = this; + } else { + this->parent = _perf_current_scope_internal; + _perf_current_scope_internal = this; + } } } ~PerformanceProfiler() { - uint64 end_time = time_mu(); - uint64 end_cycle = intrin_timestamp_counter(); - - total_time = OMS_MAX(end_time - start_time, 0); - total_cycle = OMS_MAX(end_cycle - start_cycle, 0); - - self_time += total_time; - self_cycle += total_cycle; - - if (parent) { - parent->self_time -= total_time; - parent->self_cycle -= total_cycle; + if (!this->is_active) { + return; } - // Store result - PerformanceProfileResult* perf = &_perf_stats[id]; - perf->name = name; - perf->total_time = total_time; - perf->total_cycle = total_cycle; - perf->self_time = self_time; - perf->self_cycle = self_cycle; - // @todo create reference to parent result + uint64 end_cycle = intrin_timestamp_counter(); + this->total_cycle = OMS_MAX(end_cycle - start_cycle, 0); + this->self_cycle += total_cycle; - if (_perf_current_scope) { - *_perf_current_scope = parent; - } else { - _perf_current_scope_internal = parent; + // Store result + PerformanceProfileResult temp_perf = {}; + PerformanceProfileResult* perf = this->is_stateless ? &temp_perf : &_perf_stats[this->id]; + + perf->name = this->name; + perf->total_cycle = this->total_cycle; + perf->self_cycle = this->self_cycle; + + if (!this->is_stateless) { + if (this->parent) { + this->parent->self_cycle -= this->total_cycle; + perf->parent = this->parent->id; + } + + if (_perf_current_scope) { + *_perf_current_scope = this->parent; + } else { + _perf_current_scope_internal = this->parent; + } + } + + if (this->auto_log) { + if (this->info_msg && this->info_msg[0]) { + LOG_FORMAT_2( + "%s (%s): %l cycles", + { + {LOG_DATA_CHAR_STR, (void *) perf->name}, + {LOG_DATA_CHAR_STR, (void *) this->info_msg}, + {LOG_DATA_INT64, (void *) &perf->total_cycle}, + } + ); + } else { + LOG_FORMAT_2( + "%s: %l cycles", + { + {LOG_DATA_CHAR_STR, (void *) perf->name}, + {LOG_DATA_INT64, (void *) &perf->total_cycle}, + } + ); + } } } }; -void performance_profiler_reset(uint32 id) +inline +void performance_profiler_reset(int32 id) noexcept { PerformanceProfileResult* perf = &_perf_stats[id]; - perf->total_time = 0; perf->total_cycle = 0; - perf->self_time = 0; perf->self_cycle = 0; + perf->parent = NULL; } -void performance_profiler_start(uint32 id, const char* name) +inline +void performance_profiler_start(int32 id, const char* name) noexcept { PerformanceProfileResult* perf = &_perf_stats[id]; perf->name = name; - - perf->tmp_time = time_mu(); - perf->tmp_cycle = intrin_timestamp_counter(); + perf->self_cycle = -((int64) intrin_timestamp_counter()); } -void performance_profiler_end(uint32 id) +inline +void performance_profiler_end(int32 id) noexcept { PerformanceProfileResult* perf = &_perf_stats[id]; - perf->total_time = time_mu() - perf->tmp_time; - perf->total_cycle = intrin_timestamp_counter() - perf->tmp_cycle; - perf->self_time = perf->total_time; - perf->self_cycle = perf->self_cycle; + perf->total_cycle = intrin_timestamp_counter() + perf->self_cycle; + perf->self_cycle = perf->total_cycle; } -#if (!DEBUG && !INTERNAL) || RELEASE - #define PROFILE_SCOPE(id, name) ((void) 0) +// @question Do we want different levels of PROFILE_VERBOSE and PROFILE_STATELESS same as in Log.h +// This would allow us to go ham in a lot of functions (e.g. file reading) + +#if LOG_LEVEL == 4 + // Only these function can properly handle self-time calculation + // Use these whenever you want to profile an entire function + #define PROFILE(id) PerformanceProfiler __profile_scope_##__func__##_##__LINE__((id), __func__) + #define PROFILE_VERBOSE(id, info) PerformanceProfiler __profile_scope_##__func__##_##__LINE__((id), __func__, (info), false, true) + #define PROFILE_STATELESS(id, info) PerformanceProfiler __profile_scope_##__func__##_##__LINE__((id), __func__, (info), true, true) + + #define PROFILE_START(id, name) if(_perf_active && *_perf_active) performance_profiler_start((id), (name)) + #define PROFILE_END(id) if(_perf_active && *_perf_active) performance_profiler_end((id)) + #define PROFILE_SCOPE(id, name) PerformanceProfiler __profile_scope_##__func__##_##__LINE__((id), (name)) + #define PROFILE_RESET(id) if(_perf_active && *_perf_active) performance_profiler_reset((id)) +#elif LOG_LEVEL == 3 + // Only these function can properly handle self-time calculation + // Use these whenever you want to profile an entire function + #define PROFILE(id) PerformanceProfiler __profile_scope_##__func__##_##__LINE__((id), __func__) + #define PROFILE_VERBOSE(id, info) PerformanceProfiler __profile_scope_##__func__##_##__LINE__((id), __func__, (info), false, true) + #define PROFILE_STATELESS(id, info) PerformanceProfiler __profile_scope_##__func__##_##__LINE__((id), __func__, (info), true, true) + + #define PROFILE_START(id, name) if(_perf_active && *_perf_active) performance_profiler_start((id), (name)) + #define PROFILE_END(id) if(_perf_active && *_perf_active) performance_profiler_end((id)) + #define PROFILE_SCOPE(id, name) PerformanceProfiler __profile_scope_##__func__##_##__LINE__((id), (name)) + #define PROFILE_RESET(id) if(_perf_active && *_perf_active) performance_profiler_reset((id)) +#elif LOG_LEVEL == 2 + // Only these function can properly handle self-time calculation + // Use these whenever you want to profile an entire function + #define PROFILE(id) PerformanceProfiler __profile_scope_##__func__##_##__LINE__((id), __func__) + #define PROFILE_VERBOSE(id, info) PerformanceProfiler __profile_scope_##__func__##_##__LINE__((id), __func__, (info), false, true) + #define PROFILE_STATELESS(id, info) PerformanceProfiler __profile_scope_##__func__##_##__LINE__((id), __func__, (info), true, true) + + #define PROFILE_START(id, name) if(_perf_active && *_perf_active) performance_profiler_start((id), (name)) + #define PROFILE_END(id) if(_perf_active && *_perf_active) performance_profiler_end((id)) + #define PROFILE_SCOPE(id, name) PerformanceProfiler __profile_scope_##__func__##_##__LINE__((id), (name)) + #define PROFILE_RESET(id) if(_perf_active && *_perf_active) performance_profiler_reset((id)) +#elif LOG_LEVEL == 1 #define PROFILE(id) ((void) 0) + #define PROFILE_VERBOSE(name) ((void) 0) + #define PROFILE_STATELESS(id, info) ((void) 0) #define PROFILE_START(id, name) ((void) 0) #define PROFILE_END(id) ((void) 0) - + #define PROFILE_SCOPE(id, name) ((void) 0) #define PROFILE_RESET(id) ((void) 0) -#else - #define PROFILE_SCOPE(id, name) PerformanceProfiler __profile_scope_##id(id, name) +#elif LOG_LEVEL == 0 + #define PROFILE(id) ((void) 0) + #define PROFILE_VERBOSE(name) ((void) 0) + #define PROFILE_STATELESS() ((void) 0) - // Only this function can properly handle self-time calculation - // Use this whenever you want to profile an entire function - #define PROFILE(id) PROFILE_SCOPE(id, __func__) - - #define PROFILE_START(id, name) performance_profiler_start(id, name) - #define PROFILE_END(id) performance_profiler_end(id) - - #define PROFILE_RESET(id) performance_profiler_reset((id)) + #define PROFILE_START(id, name) ((void) 0) + #define PROFILE_END(id) ((void) 0) + #define PROFILE_SCOPE(id, name) ((void) 0) + #define PROFILE_RESET(id) ((void) 0) #endif #endif \ No newline at end of file diff --git a/log/Stats.h b/log/Stats.h index 0e9a74d..8e6195e 100644 --- a/log/Stats.h +++ b/log/Stats.h @@ -5,6 +5,9 @@ #include "../stdlib/Types.h" #include "../thread/Atomic.h" +// @question See PerformanceProfiler (hashmap) and implement same here +// The problem with that is, the hash map is much slower +// and we probably want to maybe use this (at least partially) in release mode? #ifndef DEBUG_COUNTER #define DEBUG_COUNTER 1 enum DebugCounter { @@ -13,6 +16,8 @@ DEBUG_COUNTER_DRIVE_READ, DEBUG_COUNTER_DRIVE_WRITE, + DEBUG_COUNTER_GPU_UPLOAD, + DEBUG_COUNTER_SIZE }; #endif @@ -20,7 +25,7 @@ static atomic_64 int64* _stats_counter = NULL; inline -void reset_counter(int32 id) +void reset_counter(int32 id) noexcept { if (!_stats_counter) { return; @@ -30,7 +35,7 @@ void reset_counter(int32 id) } inline -void log_increment(int32 id, int64 by = 1) +void log_increment(int32 id, int64 by = 1) noexcept { if (!_stats_counter) { return; @@ -40,7 +45,7 @@ void log_increment(int32 id, int64 by = 1) } inline -void log_counter(int32 id, int64 value) +void log_counter(int32 id, int64 value) noexcept { if (!_stats_counter) { return; diff --git a/math/matrix/MatrixFloat32.h b/math/matrix/MatrixFloat32.h index d98a663..ded9149 100644 --- a/math/matrix/MatrixFloat32.h +++ b/math/matrix/MatrixFloat32.h @@ -14,14 +14,16 @@ #include "../../utils/TestUtils.h" #include "../../architecture/Intrinsics.h" -// @todo Implement intrinsic versions! - -// INFO: I thought we could remove some of the functions. Sometimes we have a function that modifies the original value and then we also have the same function that fills a new result value. -// On gcc the optimized code creates the same assembly if we would just choose to return the new value vs. modifying a value by pointer. -// However, on MSVC this is not the case and the pointer version has more and slower assembly code for the pass-by-value function +// INFO: I thought we could remove some of the functions. +// Sometimes we have a function that modifies the original value +// and then we also have the same function that fills a new result value. +// On gcc the optimized code creates the same assembly if we would just choose to return the new value +// vs. modifying a value by pointer. +// However, on MSVC this is not the case and the pointer version has more and slower assembly code vs. the +// pass-by-value function inline -void vec2_normalize(f32* __restrict x, f32* __restrict y) +void vec2_normalize(f32* __restrict x, f32* __restrict y) noexcept { f32 d = intrin_rsqrt_f32((*x) * (*x) + (*y) * (*y)); @@ -30,82 +32,93 @@ void vec2_normalize(f32* __restrict x, f32* __restrict y) } inline -void vec2_add(v2_f32* __restrict vec, const v2_f32* a, const v2_f32* b) { +void vec2_add(v2_f32* __restrict vec, const v2_f32* a, const v2_f32* b) noexcept +{ vec->x = a->x + b->x; vec->y = a->y + b->y; } inline -void vec2_add(v2_f32* __restrict vec, const v2_f32* b) { +void vec2_add(v2_f32* __restrict vec, const v2_f32* b) noexcept +{ vec->x += b->x; vec->y += b->y; } inline -void vec2_sub(v2_f32* __restrict vec, const v2_f32* __restrict a, const v2_f32* __restrict b) { +void vec2_sub(v2_f32* __restrict vec, const v2_f32* __restrict a, const v2_f32* __restrict b) noexcept +{ vec->x = a->x - b->x; vec->y = a->y - b->y; } inline -void vec2_sub(v2_f32* __restrict vec, const v2_f32* __restrict b) { +void vec2_sub(v2_f32* __restrict vec, const v2_f32* __restrict b) noexcept +{ vec->x -= b->x; vec->y -= b->y; } inline -void vec2_mul(v2_f32* __restrict vec, const v2_f32* __restrict a, f32 s) { +void vec2_mul(v2_f32* __restrict vec, const v2_f32* __restrict a, f32 s) noexcept +{ vec->x = a->x * s; vec->y = a->y * s; } inline -void vec2_mul(v2_f32* vec, f32 s) { +void vec2_mul(v2_f32* vec, f32 s) noexcept +{ vec->x *= s; vec->y *= s; } inline -f32 vec2_mul(const v2_f32* a, const v2_f32* b) { +f32 vec2_mul(const v2_f32* a, const v2_f32* b) noexcept +{ return a->x * b->x + a->y * b->y; } inline -void vec2_mul(v2_f32* __restrict vec, const v2_f32* a, const v2_f32* b) { +void vec2_mul(v2_f32* __restrict vec, const v2_f32* a, const v2_f32* b) noexcept +{ vec->x = a->x * b->x; vec->y = a->y * b->y; } inline -void vec2_mul(v2_f32* vec, const v2_f32* b) { +void vec2_mul(v2_f32* vec, const v2_f32* b) noexcept +{ vec->x *= b->x; vec->y *= b->y; } inline -f32 vec2_cross(const v2_f32* a, const v2_f32* b) { +f32 vec2_cross(const v2_f32* a, const v2_f32* b) noexcept +{ return a->x * b->y - a->y * b->x; } inline -f32 vec2_dot(const v2_f32* a, const v2_f32* b) { +f32 vec2_dot(const v2_f32* a, const v2_f32* b) noexcept +{ return a->x * b->x + a->y * b->y; } inline -f32 vec3_length(f32 x, f32 y, f32 z) +f32 vec3_length(f32 x, f32 y, f32 z) noexcept { return sqrtf(x * x + y * y + z * z); } inline -f32 vec3_length(v3_f32* vec) +f32 vec3_length(v3_f32* vec) noexcept { return sqrtf(vec->x * vec->x + vec->y * vec->y + vec->z * vec->z); } inline -void vec3_normalize(f32* __restrict x, f32* __restrict y, f32* __restrict z) +void vec3_normalize(f32* __restrict x, f32* __restrict y, f32* __restrict z) noexcept { f32 d = intrin_rsqrt_f32((*x) * (*x) + (*y) * (*y) + (*z) * (*z)); @@ -115,7 +128,7 @@ void vec3_normalize(f32* __restrict x, f32* __restrict y, f32* __restrict z) } inline -void vec3_normalize(v3_f32* vec) +void vec3_normalize(v3_f32* vec) noexcept { f32 d = intrin_rsqrt_f32(vec->x * vec->x + vec->y * vec->y + vec->z * vec->z); @@ -125,78 +138,89 @@ void vec3_normalize(v3_f32* vec) } inline -void vec3_add(v3_f32* __restrict vec, const v3_f32* a, const v3_f32* b) { +void vec3_add(v3_f32* __restrict vec, const v3_f32* a, const v3_f32* b) noexcept +{ vec->x = a->x + b->x; vec->y = a->y + b->y; vec->z = a->z + b->z; } inline -void vec3_add(v3_f32* __restrict vec, const v3_f32* b) { +void vec3_add(v3_f32* __restrict vec, const v3_f32* b) noexcept +{ vec->x += b->x; vec->y += b->y; vec->z += b->z; } inline -void vec3_sub(v3_f32* __restrict vec, const v3_f32* __restrict a, const v3_f32* __restrict b) { +void vec3_sub(v3_f32* __restrict vec, const v3_f32* __restrict a, const v3_f32* __restrict b) noexcept +{ vec->x = a->x - b->x; vec->y = a->y - b->y; vec->z = a->z - b->z; } inline -void vec3_sub(v3_f32* __restrict vec, const v3_f32* __restrict b) { +void vec3_sub(v3_f32* __restrict vec, const v3_f32* __restrict b) noexcept +{ vec->x -= b->x; vec->y -= b->y; vec->z -= b->z; } inline -void vec3_mul(v3_f32* __restrict vec, const v3_f32* __restrict a, f32 s) { +void vec3_mul(v3_f32* __restrict vec, const v3_f32* __restrict a, f32 s) noexcept +{ vec->x = a->x * s; vec->y = a->y * s; vec->z = a->z * s; } inline -void vec3_mul(v3_f32* vec, f32 s) { +void vec3_mul(v3_f32* vec, f32 s) noexcept +{ vec->x *= s; vec->y *= s; vec->z *= s; } inline -f32 vec3_mul(const v3_f32* a, const v3_f32* b) { +f32 vec3_mul(const v3_f32* a, const v3_f32* b) noexcept +{ return a->x * b->x + a->y * b->y + a->z * b->z; } inline -void vec3_mul(v3_f32* __restrict vec, const v3_f32* a, const v3_f32* b) { +void vec3_mul(v3_f32* __restrict vec, const v3_f32* a, const v3_f32* b) noexcept +{ vec->x = a->x * b->x; vec->y = a->y * b->y; vec->z = a->z * b->z; } inline -void vec3_mul(v3_f32* vec, const v3_f32* b) { +void vec3_mul(v3_f32* vec, const v3_f32* b) noexcept +{ vec->x *= b->x; vec->y *= b->y; vec->z *= b->z; } -void vec3_cross(v3_f32* __restrict vec, const v3_f32* a, const v3_f32* b) { +void vec3_cross(v3_f32* __restrict vec, const v3_f32* a, const v3_f32* b) noexcept +{ vec->x = a->y * b->z - a->z * b->y; vec->y = a->z * b->x - a->x * b->z; vec->z = a->x * b->y - a->y * b->x; } inline -f32 vec3_dot(const v3_f32* a, const v3_f32* b) { +f32 vec3_dot(const v3_f32* a, const v3_f32* b) noexcept +{ return a->x * b->x + a->y * b->y + a->z * b->z; } -void vec4_normalize(f32* __restrict x, f32* __restrict y, f32* __restrict z, f32* __restrict w) +void vec4_normalize(f32* __restrict x, f32* __restrict y, f32* __restrict z, f32* __restrict w) noexcept { f32 d = intrin_rsqrt_f32((*x) * (*x) + (*y) * (*y) + (*z) * (*z) + (*w) * (*w)); @@ -207,7 +231,8 @@ void vec4_normalize(f32* __restrict x, f32* __restrict y, f32* __restrict z, f32 } inline -void vec4_add(v4_f32* __restrict vec, const v4_f32* a, const v4_f32* b) { +void vec4_add(v4_f32* __restrict vec, const v4_f32* a, const v4_f32* b) noexcept +{ vec->x = a->x + b->x; vec->y = a->y + b->y; vec->z = a->z + b->z; @@ -215,7 +240,8 @@ void vec4_add(v4_f32* __restrict vec, const v4_f32* a, const v4_f32* b) { } inline -void vec4_add(v4_f32* __restrict vec, const v4_f32* b) { +void vec4_add(v4_f32* __restrict vec, const v4_f32* b) noexcept +{ vec->x += b->x; vec->y += b->y; vec->z += b->z; @@ -223,7 +249,8 @@ void vec4_add(v4_f32* __restrict vec, const v4_f32* b) { } inline -void vec4_sub(v4_f32* __restrict vec, const v4_f32* __restrict a, const v4_f32* __restrict b) { +void vec4_sub(v4_f32* __restrict vec, const v4_f32* __restrict a, const v4_f32* __restrict b) noexcept +{ vec->x = a->x - b->x; vec->y = a->y - b->y; vec->z = a->z - b->z; @@ -231,7 +258,8 @@ void vec4_sub(v4_f32* __restrict vec, const v4_f32* __restrict a, const v4_f32* } inline -void vec4_sub(v4_f32* __restrict vec, const v4_f32* __restrict b) { +void vec4_sub(v4_f32* __restrict vec, const v4_f32* __restrict b) noexcept +{ vec->x -= b->x; vec->y -= b->y; vec->z -= b->z; @@ -239,7 +267,8 @@ void vec4_sub(v4_f32* __restrict vec, const v4_f32* __restrict b) { } inline -void vec4_mul(v4_f32* __restrict vec, const v4_f32* __restrict a, f32 s) { +void vec4_mul(v4_f32* __restrict vec, const v4_f32* __restrict a, f32 s) noexcept +{ vec->x = a->x * s; vec->y = a->y * s; vec->z = a->z * s; @@ -247,7 +276,8 @@ void vec4_mul(v4_f32* __restrict vec, const v4_f32* __restrict a, f32 s) { } inline -void vec4_mul(v4_f32* vec, f32 s) { +void vec4_mul(v4_f32* vec, f32 s) noexcept +{ vec->x *= s; vec->y *= s; vec->z *= s; @@ -255,12 +285,14 @@ void vec4_mul(v4_f32* vec, f32 s) { } inline -f32 vec4_mul(const v4_f32* a, const v4_f32* b) { +f32 vec4_mul(const v4_f32* a, const v4_f32* b) noexcept +{ return a->x * b->x + a->y * b->y + a->z * b->z + a->w * b->w; } inline -void vec4_mul(v4_f32* __restrict vec, const v4_f32* a, const v4_f32* b) { +void vec4_mul(v4_f32* __restrict vec, const v4_f32* a, const v4_f32* b) noexcept +{ vec->x = a->x * b->x; vec->y = a->y * b->y; vec->z = a->z * b->z; @@ -268,7 +300,8 @@ void vec4_mul(v4_f32* __restrict vec, const v4_f32* a, const v4_f32* b) { } inline -void vec4_mul(v4_f32* vec, const v4_f32* b) { +void vec4_mul(v4_f32* vec, const v4_f32* b) noexcept +{ vec->x *= b->x; vec->y *= b->y; vec->z *= b->z; @@ -276,12 +309,14 @@ void vec4_mul(v4_f32* vec, const v4_f32* b) { } inline -f32 vec4_dot(const v4_f32* a, const v4_f32* b) { +f32 vec4_dot(const v4_f32* a, const v4_f32* b) noexcept +{ return a->x * b->x + a->y * b->y + a->z * b->z + a->w * b->w; } inline -void vec4_cross(v4_f32* __restrict vec, const v4_f32* a, const v4_f32* b, const v4_f32* c) { +void vec4_cross(v4_f32* __restrict vec, const v4_f32* a, const v4_f32* b, const v4_f32* c) noexcept +{ vec->x = a->y * (b->z * c->w - b->w * c->z) - a->z * (b->y * c->w - b->w * c->y) + a->w * (b->y * c->z - b->z * c->y); vec->y = -(a->x * (b->z * c->w - b->w * c->z) - a->z * (b->x * c->w - b->w * c->x) + a->w * (b->x * c->z - b->z * c->x)); vec->z = a->x * (b->y * c->w - b->w * c->y) - a->y * (b->x * c->w - b->w * c->x) + a->w * (b->x * c->y - b->y * c->x); @@ -289,7 +324,7 @@ void vec4_cross(v4_f32* __restrict vec, const v4_f32* a, const v4_f32* b, const } inline -void mat3_identity(f32* matrix) +void mat3_identity(f32* matrix) noexcept { matrix[0] = 1.0f; matrix[1] = 0.0f; matrix[2] = 0.0f; matrix[3] = 0.0f; matrix[4] = 1.0f; matrix[5] = 0.0f; @@ -297,13 +332,13 @@ void mat3_identity(f32* matrix) } inline -void mat3_identity_sparse(f32* matrix) +void mat3_identity_sparse(f32* matrix) noexcept { matrix[0] = 1.0f; matrix[4] = 1.0f; matrix[8] = 1.0f; } inline -void mat3_identity(__m128* matrix) +void mat3_identity(__m128* matrix) noexcept { matrix[0] = _mm_set_ps(1.0f, 0.0f, 0.0f, 0.0f); matrix[1] = _mm_set_ps(0.0f, 1.0f, 0.0f, 0.0f); @@ -311,7 +346,7 @@ void mat3_identity(__m128* matrix) } inline -void mat4_identity(f32* matrix) +void mat4_identity(f32* matrix) noexcept { matrix[0] = 1.0f; matrix[1] = 0.0f; matrix[2] = 0.0f; matrix[3] = 0.0f; matrix[4] = 0.0f; matrix[5] = 1.0f; matrix[6] = 0.0f; matrix[7] = 0.0f; @@ -320,13 +355,13 @@ void mat4_identity(f32* matrix) } inline -void mat4_identity_sparse(f32* matrix) +void mat4_identity_sparse(f32* matrix) noexcept { matrix[0] = 1.0f; matrix[5] = 1.0f; matrix[10] = 1.0f; matrix[15] = 1.0f; } inline -void mat4_identity(__m128* matrix) +void mat4_identity(__m128* matrix) noexcept { matrix[0] = _mm_set_ps(1.0f, 0.0f, 0.0f, 0.0f); matrix[1] = _mm_set_ps(0.0f, 1.0f, 0.0f, 0.0f); @@ -336,7 +371,7 @@ void mat4_identity(__m128* matrix) // x, y, z need to be normalized // https://en.wikipedia.org/wiki/Rodrigues%27_rotation_formula -void mat4_rotation(f32* matrix, f32 x, f32 y, f32 z, f32 angle) +void mat4_rotation(f32* matrix, f32 x, f32 y, f32 z, f32 angle) noexcept { ASSERT_SIMPLE(OMS_ABS_F32(x * x + y * y + z * z - 1.0f) < 0.01); @@ -378,7 +413,7 @@ void mat4_rotation(f32* matrix, f32 x, f32 y, f32 z, f32 angle) matrix[15] = 1.0f; } -void mat4_rotation(f32* matrix, f32 pitch, f32 yaw, f32 roll) +void mat4_rotation(f32* matrix, f32 pitch, f32 yaw, f32 roll) noexcept { f32 cos_pitch = cosf(pitch); f32 sin_pitch = sinf(pitch); @@ -409,7 +444,7 @@ void mat4_rotation(f32* matrix, f32 pitch, f32 yaw, f32 roll) } inline -void mat3vec3_mult(const f32* __restrict matrix, const f32* __restrict vector, f32* __restrict result) +void mat3vec3_mult(const f32* __restrict matrix, const f32* __restrict vector, f32* __restrict result) noexcept { result[0] = matrix[0] * vector[0] + matrix[1] * vector[1] + matrix[2] * vector[2]; result[1] = matrix[3] * vector[0] + matrix[4] * vector[1] + matrix[5] * vector[2]; @@ -417,7 +452,7 @@ void mat3vec3_mult(const f32* __restrict matrix, const f32* __restrict vector, f } inline -void mat4vec4_mult(const f32* __restrict matrix, const f32* __restrict vector, f32* __restrict result) +void mat4vec4_mult(const f32* __restrict matrix, const f32* __restrict vector, f32* __restrict result) noexcept { result[0] = matrix[0] * vector[0] + matrix[1] * vector[1] + matrix[2] * vector[2] + matrix[3] * vector[3]; result[1] = matrix[4] * vector[0] + matrix[5] * vector[1] + matrix[6] * vector[2] + matrix[7] * vector[3]; @@ -426,7 +461,7 @@ void mat4vec4_mult(const f32* __restrict matrix, const f32* __restrict vector, f } // @question could simple mul add sse be faster? -void mat4vec4_mult_sse(const f32* __restrict matrix, const f32* __restrict vector, f32* __restrict result) +void mat4vec4_mult_sse(const f32* __restrict matrix, const f32* __restrict vector, f32* __restrict result) noexcept { __m128 vec = _mm_load_ps(vector); @@ -439,7 +474,7 @@ void mat4vec4_mult_sse(const f32* __restrict matrix, const f32* __restrict vecto } // @question could simple mul add sse be faster? -void mat4vec4_mult_sse(const __m128* __restrict matrix, const __m128* __restrict vector, f32* __restrict result) +void mat4vec4_mult_sse(const __m128* __restrict matrix, const __m128* __restrict vector, f32* __restrict result) noexcept { for (int32 i = 0; i < 4; ++i) { __m128 dot = _mm_dp_ps(matrix[i], *vector, 0xF1); @@ -449,7 +484,7 @@ void mat4vec4_mult_sse(const __m128* __restrict matrix, const __m128* __restrict } // @question could simple mul add sse be faster? -void mat4vec4_mult_sse(const __m128* __restrict matrix, const __m128* __restrict vector, __m128* __restrict result) +void mat4vec4_mult_sse(const __m128* __restrict matrix, const __m128* __restrict vector, __m128* __restrict result) noexcept { for (int32 i = 0; i < 4; ++i) { result[i] = _mm_dp_ps(matrix[i], *vector, 0xF1); @@ -457,7 +492,7 @@ void mat4vec4_mult_sse(const __m128* __restrict matrix, const __m128* __restrict } inline -void mat4mat4_mult(const f32* __restrict a, const f32* __restrict b, f32* __restrict result) +void mat4mat4_mult(const f32* __restrict a, const f32* __restrict b, f32* __restrict result) noexcept { result[0] = a[0] * b[0] + a[1] * b[4] + a[2] * b[8] + a[3] * b[12]; result[1] = a[0] * b[1] + a[1] * b[5] + a[2] * b[9] + a[3] * b[13]; @@ -481,7 +516,7 @@ void mat4mat4_mult(const f32* __restrict a, const f32* __restrict b, f32* __rest } inline -void mat4mat4_mult_simd(const f32* __restrict a, const f32* __restrict b, f32* __restrict result) +void mat4mat4_mult_simd(const f32* __restrict a, const f32* __restrict b, f32* __restrict result) noexcept { __m128 row1 = _mm_loadu_ps(&b[0]); __m128 row2 = _mm_loadu_ps(&b[4]); @@ -510,7 +545,8 @@ void mat4mat4_mult_simd(const f32* __restrict a, const f32* __restrict b, f32* _ } // @performance Consider to replace with 1d array -void mat4_frustum_planes(f32 planes[6][4], f32 radius, f32 *matrix) { +void mat4_frustum_planes(f32 planes[6][4], f32 radius, f32 *matrix) noexcept +{ // @todo make this a setting // @bug fix to row-major system // @todo don't use 2d arrays @@ -554,7 +590,7 @@ void mat4_frustum_sparse_rh( f32 *matrix, f32 left, f32 right, f32 bottom, f32 top, f32 znear, f32 zfar - ) { + ) noexcept { f32 temp = 2.0f * znear; f32 rl_delta = right - left; f32 tb_delta = top - bottom; @@ -585,7 +621,7 @@ void mat4_frustum_sparse_lh( f32 *matrix, f32 left, f32 right, f32 bottom, f32 top, f32 znear, f32 zfar - ) { + ) noexcept { f32 temp = 2.0f * znear; f32 rl_delta = right - left; f32 tb_delta = top - bottom; @@ -616,8 +652,8 @@ void mat4_frustum_sparse_lh( inline void mat4_perspective_sparse_lh( f32 *matrix, f32 fov, f32 aspect, - f32 znear, f32 zfar) -{ + f32 znear, f32 zfar +) noexcept { ASSERT_SIMPLE(znear > 0.0f); f32 ymax, xmax; @@ -630,8 +666,8 @@ void mat4_perspective_sparse_lh( inline void mat4_perspective_sparse_rh( f32 *matrix, f32 fov, f32 aspect, - f32 znear, f32 zfar) -{ + f32 znear, f32 zfar +) noexcept { ASSERT_SIMPLE(znear > 0.0f); f32 ymax, xmax; @@ -645,7 +681,7 @@ void mat4_ortho_sparse_lh( f32 *matrix, f32 left, f32 right, f32 bottom, f32 top, f32 znear, f32 zfar -) { +) noexcept { f32 rl_delta = right - left; f32 tb_delta = top - bottom; f32 fn_delta = zfar - znear; @@ -675,7 +711,7 @@ void mat4_ortho_sparse_rh( f32 *matrix, f32 left, f32 right, f32 bottom, f32 top, f32 znear, f32 zfar -) { +) noexcept { f32 rl_delta = right - left; f32 tb_delta = top - bottom; f32 fn_delta = zfar - znear; @@ -701,7 +737,7 @@ void mat4_ortho_sparse_rh( matrix[15] = 1.0f; } -void mat4_translate(f32* matrix, f32 dx, f32 dy, f32 dz) +void mat4_translate(f32* matrix, f32 dx, f32 dy, f32 dz) noexcept { f32 temp[16]; memcpy(temp, matrix, sizeof(f32) * 16); @@ -715,7 +751,7 @@ void mat4_translate(f32* matrix, f32 dx, f32 dy, f32 dz) mat4mat4_mult(temp, translation_matrix, matrix); } -void mat4_translate_simd(f32* matrix, f32 dx, f32 dy, f32 dz) +void mat4_translate_simd(f32* matrix, f32 dx, f32 dy, f32 dz) noexcept { alignas(64) f32 temp[16]; memcpy(temp, matrix, sizeof(f32) * 16); @@ -730,7 +766,7 @@ void mat4_translate_simd(f32* matrix, f32 dx, f32 dy, f32 dz) } inline -void mat4_translation(f32* matrix, f32 dx, f32 dy, f32 dz) +void mat4_translation(f32* matrix, f32 dx, f32 dy, f32 dz) noexcept { matrix[0] = 1.0f; matrix[1] = 0.0f; matrix[2] = 0.0f; matrix[3] = dx; matrix[4] = 0.0f; matrix[5] = 1.0f; matrix[6] = 0.0f; matrix[7] = dy; @@ -739,7 +775,7 @@ void mat4_translation(f32* matrix, f32 dx, f32 dy, f32 dz) } inline -void mat4_translation_sparse(f32* matrix, f32 dx, f32 dy, f32 dz) +void mat4_translation_sparse(f32* matrix, f32 dx, f32 dy, f32 dz) noexcept { matrix[3] = dx; matrix[7] = dy; @@ -747,7 +783,7 @@ void mat4_translation_sparse(f32* matrix, f32 dx, f32 dy, f32 dz) } inline -void mat4_scale(f32* matrix, f32 dx, f32 dy, f32 dz) +void mat4_scale(f32* matrix, f32 dx, f32 dy, f32 dz) noexcept { matrix[0] = dx; matrix[1] = 0.0f; matrix[2] = 0.0f; matrix[3] = 0.0f; matrix[4] = 0.0f; matrix[5] = dy; matrix[6] = 0.0f; matrix[7] = 0.0f; @@ -756,7 +792,7 @@ void mat4_scale(f32* matrix, f32 dx, f32 dy, f32 dz) } inline -void mat4_scale_sparse(f32* matrix, f32 dx, f32 dy, f32 dz) +void mat4_scale_sparse(f32* matrix, f32 dx, f32 dy, f32 dz) noexcept { matrix[0] = dx; matrix[5] = dy; @@ -764,7 +800,7 @@ void mat4_scale_sparse(f32* matrix, f32 dx, f32 dy, f32 dz) } inline -void mat4_transpose(const f32* __restrict matrix, f32* __restrict transposed) +void mat4_transpose(const f32* __restrict matrix, f32* __restrict transposed) noexcept { transposed[1] = matrix[4]; transposed[2] = matrix[8]; @@ -781,7 +817,7 @@ void mat4_transpose(const f32* __restrict matrix, f32* __restrict transposed) } inline -void mat4_transpose(f32* matrix) +void mat4_transpose(f32* matrix) noexcept { f32 temp; @@ -811,7 +847,7 @@ void mat4_transpose(f32* matrix) } inline -void mat3_transpose(const f32* __restrict matrix, f32* __restrict transposed) +void mat3_transpose(const f32* __restrict matrix, f32* __restrict transposed) noexcept { transposed[1] = matrix[3]; transposed[2] = matrix[6]; @@ -822,7 +858,7 @@ void mat3_transpose(const f32* __restrict matrix, f32* __restrict transposed) } inline -void mat3_transpose(f32* matrix) +void mat3_transpose(f32* matrix) noexcept { f32 temp; @@ -840,18 +876,49 @@ void mat3_transpose(f32* matrix) } inline -void mat2_transpose(const f32* __restrict matrix, f32* __restrict transposed) +void mat2_transpose(const f32* __restrict matrix, f32* __restrict transposed) noexcept { transposed[1] = matrix[2]; transposed[2] = matrix[1]; } inline -void mat2_transpose(f32* matrix) +void mat2_transpose(f32* matrix) noexcept { f32 temp = matrix[1]; matrix[1] = matrix[2]; matrix[2] = temp; } +inline +void vec3_normal( + v3_f32* __restrict normal, + const v3_f32* __restrict a, const v3_f32* __restrict b, const v3_f32* __restrict c +) noexcept { + v3_f32 edge1; + v3_f32 edge2; + + // Calculate two edges of the triangle + edge1.x = b->x - a->x; + edge1.y = b->y - a->y; + edge1.z = b->z - a->z; + + edge2.x = c->x - a->x; + edge2.y = c->y - a->y; + edge2.z = c->z - a->z; + + vec3_cross(normal, &edge1, &edge2); + vec3_normalize(normal); +} + +inline +void vec3_barycenter( + v3_f32* __restrict barycenter, + const v3_f32* __restrict a, const v3_f32* __restrict b, const v3_f32* __restrict c +) noexcept { + barycenter->x = (a->x + b->x + c->x) / 3.0f; + barycenter->y = (a->y + b->y + c->y) / 3.0f; + barycenter->z = (a->z + b->z + c->z) / 3.0f; +} + #endif \ No newline at end of file diff --git a/memory/BufferMemory.h b/memory/BufferMemory.h index 2d6cfdc..d813603 100644 --- a/memory/BufferMemory.h +++ b/memory/BufferMemory.h @@ -34,6 +34,7 @@ inline void buffer_alloc(BufferMemory* buf, uint64 size, int32 alignment = 64) { ASSERT_SIMPLE(size); + PROFILE_VERBOSE(PROFILE_BUFFER_ALLOC, ""); buf->memory = alignment < 2 ? (byte *) platform_alloc(size) @@ -49,7 +50,7 @@ void buffer_alloc(BufferMemory* buf, uint64 size, int32 alignment = 64) DEBUG_MEMORY_INIT((uintptr_t) buf->memory, buf->size); LOG_INCREMENT_BY(DEBUG_COUNTER_MEM_ALLOC, buf->size); - LOG_LEVEL_2("Allocated BufferMemory: %n B", {{LOG_DATA_UINT64, &buf->size}}); + LOG_FORMAT_2("Allocated BufferMemory: %n B", {{LOG_DATA_UINT64, &buf->size}}); } inline @@ -82,7 +83,7 @@ void buffer_init(BufferMemory* buf, byte* data, uint64 size, int32 alignment = 6 } inline -void buffer_reset(BufferMemory* buf) +void buffer_reset(BufferMemory* buf) noexcept { // @bug aren't we wasting element 0 (see get_memory, we are not using 0 only next element) DEBUG_MEMORY_DELETE((uintptr_t) buf->memory, buf->head - buf->memory); @@ -90,7 +91,7 @@ void buffer_reset(BufferMemory* buf) } inline -byte* buffer_get_memory(BufferMemory* buf, uint64 size, int32 aligned = 4, bool zeroed = false) +byte* buffer_get_memory(BufferMemory* buf, uint64 size, int32 aligned = 4, bool zeroed = false) noexcept { ASSERT_SIMPLE(size <= buf->size); diff --git a/memory/ChunkMemory.h b/memory/ChunkMemory.h index 2b69e8b..1b3f57d 100644 --- a/memory/ChunkMemory.h +++ b/memory/ChunkMemory.h @@ -43,6 +43,7 @@ void chunk_alloc(ChunkMemory* buf, uint32 count, uint32 chunk_size, int32 alignm { ASSERT_SIMPLE(chunk_size); ASSERT_SIMPLE(count); + PROFILE_VERBOSE(PROFILE_CHUNK_ALLOC, ""); chunk_size = ROUND_TO_NEAREST(chunk_size, alignment); @@ -63,7 +64,7 @@ void chunk_alloc(ChunkMemory* buf, uint32 count, uint32 chunk_size, int32 alignm DEBUG_MEMORY_INIT((uintptr_t) buf->memory, buf->size); LOG_INCREMENT_BY(DEBUG_COUNTER_MEM_ALLOC, buf->size); - LOG_LEVEL_2("Allocated ChunkMemory: %n B", {{LOG_DATA_UINT64, &buf->size}}); + LOG_FORMAT_2("Allocated ChunkMemory: %n B", {{LOG_DATA_UINT64, &buf->size}}); } inline @@ -133,12 +134,12 @@ void chunk_free(ChunkMemory* buf) } inline -uint32 chunk_id_from_memory(const ChunkMemory* buf, const byte* pos) { +uint32 chunk_id_from_memory(const ChunkMemory* buf, const byte* pos) noexcept { return (uint32) ((uintptr_t) pos - (uintptr_t) buf->memory) / buf->chunk_size; } inline -byte* chunk_get_element(ChunkMemory* buf, uint64 element, bool zeroed = false) +byte* chunk_get_element(ChunkMemory* buf, uint64 element, bool zeroed = false) noexcept { if (element >= buf->count) { return NULL; @@ -156,7 +157,7 @@ byte* chunk_get_element(ChunkMemory* buf, uint64 element, bool zeroed = false) return offset; } -int32 chunk_reserve(ChunkMemory* buf, uint32 elements = 1) +int32 chunk_reserve(ChunkMemory* buf, uint32 elements = 1) noexcept { if ((uint32) (buf->last_pos + 1) >= buf->count) { buf->last_pos = -1; @@ -237,7 +238,7 @@ int32 chunk_reserve(ChunkMemory* buf, uint32 elements = 1) uint32 bits_in_current_block = OMS_MIN(64 - current_bit_index, elements_temp); // Create a mask to set the bits - uint64 mask = ((1ULL << bits_in_current_block) - 1) << current_bit_index; + uint64 mask = ((1ULL << (bits_in_current_block & 63)) - 1) << current_bit_index | ((bits_in_current_block >> 6) * ((uint64_t)-1)); buf->free[current_free_index] |= mask; // Update the counters and indices @@ -264,14 +265,14 @@ int32 chunk_reserve(ChunkMemory* buf, uint32 elements = 1) } inline -void chunk_free_element(ChunkMemory* buf, uint64 free_index, int32 bit_index) +void chunk_free_element(ChunkMemory* buf, uint64 free_index, int32 bit_index) noexcept { DEBUG_MEMORY_DELETE((uintptr_t) (buf->memory + (free_index * 64 + bit_index) * buf->chunk_size), buf->chunk_size); buf->free[free_index] &= ~(1ULL << bit_index); } inline -void chunk_free_elements(ChunkMemory* buf, uint64 element, uint32 element_count = 1) +void chunk_free_elements(ChunkMemory* buf, uint64 element, uint32 element_count = 1) noexcept { DEBUG_MEMORY_DELETE((uintptr_t) (buf->memory + element * buf->chunk_size), buf->chunk_size); @@ -359,7 +360,7 @@ int64 chunk_load(ChunkMemory* buf, const byte* data) buf->free = (uint64 *) (buf->memory + buf->count * buf->chunk_size); - LOG_LEVEL_2("Loaded ChunkMemory: %n B", {{LOG_DATA_UINT64, &buf->size}}); + LOG_FORMAT_2("Loaded ChunkMemory: %n B", {{LOG_DATA_UINT64, &buf->size}}); return buf->size; } diff --git a/memory/Queue.h b/memory/Queue.h index 046ed9a..7dee67c 100644 --- a/memory/Queue.h +++ b/memory/Queue.h @@ -61,28 +61,28 @@ void queue_init(Queue* queue, byte* buf, uint64 element_count, uint32 element_si } inline -void queue_free(Queue* queue) +void queue_free(Queue* queue) noexcept { ring_free((RingMemory *) queue); } inline -bool queue_is_empty(Queue* queue) { +bool queue_is_empty(Queue* queue) noexcept { return queue->head == queue->tail; } inline -bool queue_set_empty(Queue* queue) { +bool queue_set_empty(Queue* queue) noexcept { return queue->head = queue->tail; } inline -bool queue_is_full(Queue* queue) { +bool queue_is_full(Queue* queue) noexcept { return !ring_commit_safe((RingMemory *) queue, queue->element_size, queue->alignment); } inline -void queue_enqueue_unique(Queue* queue, const byte* data) +void queue_enqueue_unique(Queue* queue, const byte* data) noexcept { ASSERT_SIMPLE((uint64_t) data % 4 == 0); @@ -107,7 +107,7 @@ void queue_enqueue_unique(Queue* queue, const byte* data) } inline -byte* queue_enqueue(Queue* queue, byte* data) +byte* queue_enqueue(Queue* queue, byte* data) noexcept { byte* mem = ring_get_memory_nomove((RingMemory *) queue, queue->element_size, queue->alignment); memcpy(mem, data, queue->element_size); @@ -117,7 +117,7 @@ byte* queue_enqueue(Queue* queue, byte* data) } inline -byte* queue_enqueue_safe(Queue* queue, byte* data) +byte* queue_enqueue_safe(Queue* queue, byte* data) noexcept { if(queue_is_full(queue)) { return NULL; @@ -132,7 +132,7 @@ byte* queue_enqueue_safe(Queue* queue, byte* data) // WARNING: Only useful for single producer single consumer inline -byte* queue_enqueue_wait_atomic(Queue* queue, byte* data) +byte* queue_enqueue_wait_atomic(Queue* queue, byte* data) noexcept { while (!ring_commit_safe_atomic((RingMemory *) queue, queue->alignment)) {} @@ -145,7 +145,7 @@ byte* queue_enqueue_wait_atomic(Queue* queue, byte* data) // WARNING: Only useful for single producer single consumer inline -byte* queue_enqueue_safe_atomic(Queue* queue, byte* data) +byte* queue_enqueue_safe_atomic(Queue* queue, byte* data) noexcept { if (!ring_commit_safe_atomic((RingMemory *) queue, queue->alignment)) { return NULL; @@ -159,19 +159,19 @@ byte* queue_enqueue_safe_atomic(Queue* queue, byte* data) } inline -byte* queue_enqueue_start(Queue* queue) +byte* queue_enqueue_start(Queue* queue) noexcept { return ring_get_memory_nomove((RingMemory *) queue, queue->element_size, queue->alignment); } inline -void queue_enqueue_end(Queue* queue) +void queue_enqueue_end(Queue* queue) noexcept { ring_move_pointer((RingMemory *) queue, &queue->head, queue->element_size, queue->alignment); } inline -bool queue_dequeue(Queue* queue, byte* data) +bool queue_dequeue(Queue* queue, byte* data) noexcept { if (queue->head == queue->tail) { return false; @@ -190,7 +190,7 @@ bool queue_dequeue(Queue* queue, byte* data) // WARNING: Only useful for single producer single consumer inline -bool queue_dequeue_atomic(Queue* queue, byte* data) +bool queue_dequeue_atomic(Queue* queue, byte* data) noexcept { if ((uint64) atomic_get_acquire_release((void **) &queue->head) == (uint64) queue->tail) { return false; @@ -208,7 +208,7 @@ bool queue_dequeue_atomic(Queue* queue, byte* data) } inline -byte* queue_dequeue_keep(Queue* queue) +byte* queue_dequeue_keep(Queue* queue) noexcept { if (queue->head == queue->tail) { return NULL; @@ -221,13 +221,13 @@ byte* queue_dequeue_keep(Queue* queue) } inline -byte* queue_dequeue_start(Queue* queue) +byte* queue_dequeue_start(Queue* queue) noexcept { return queue->tail; } inline -void queue_dequeue_end(Queue* queue) +void queue_dequeue_end(Queue* queue) noexcept { ring_move_pointer((RingMemory *) queue, &queue->tail, queue->element_size, queue->alignment); } diff --git a/memory/RingMemory.h b/memory/RingMemory.h index b8a2d7c..9d4933d 100644 --- a/memory/RingMemory.h +++ b/memory/RingMemory.h @@ -47,6 +47,7 @@ inline void ring_alloc(RingMemory* ring, uint64 size, uint32 alignment = 64) { ASSERT_SIMPLE(size); + PROFILE_VERBOSE(PROFILE_RING_ALLOC, ""); ring->memory = alignment < 2 ? (byte *) platform_alloc(size) @@ -62,7 +63,7 @@ void ring_alloc(RingMemory* ring, uint64 size, uint32 alignment = 64) DEBUG_MEMORY_INIT((uintptr_t) ring->memory, ring->size); LOG_INCREMENT_BY(DEBUG_COUNTER_MEM_ALLOC, ring->size); - LOG_LEVEL_2("Allocated RingMemory: %n B", {{LOG_DATA_UINT64, &ring->size}}); + LOG_FORMAT_2("Allocated RingMemory: %n B", {{LOG_DATA_UINT64, &ring->size}}); } inline @@ -117,7 +118,7 @@ void ring_free(RingMemory* ring) } inline -byte* ring_calculate_position(const RingMemory* ring, uint64 size, uint32 aligned = 4) +byte* ring_calculate_position(const RingMemory* ring, uint64 size, uint32 aligned = 4) noexcept { byte* head = ring->head; @@ -140,7 +141,7 @@ byte* ring_calculate_position(const RingMemory* ring, uint64 size, uint32 aligne } inline -void ring_reset(RingMemory* ring) +void ring_reset(RingMemory* ring) noexcept { DEBUG_MEMORY_DELETE((uintptr_t) ring->memory, ring->size); ring->head = ring->memory; @@ -148,7 +149,7 @@ void ring_reset(RingMemory* ring) // Moves a pointer based on the size you want to consume (new position = after consuming size) // Usually used to move head or tail pointer (= pos) -void ring_move_pointer(RingMemory* ring, byte** pos, uint64 size, uint32 aligned = 4) +void ring_move_pointer(RingMemory* ring, byte** pos, uint64 size, uint32 aligned = 4) noexcept { ASSERT_SIMPLE(size <= ring->size); @@ -174,7 +175,7 @@ void ring_move_pointer(RingMemory* ring, byte** pos, uint64 size, uint32 aligned *pos += size; } -byte* ring_get_memory(RingMemory* ring, uint64 size, uint32 aligned = 4, bool zeroed = false) +byte* ring_get_memory(RingMemory* ring, uint64 size, uint32 aligned = 4, bool zeroed = false) noexcept { ASSERT_SIMPLE(size <= ring->size); @@ -208,7 +209,7 @@ byte* ring_get_memory(RingMemory* ring, uint64 size, uint32 aligned = 4, bool ze } // Same as ring_get_memory but DOESN'T move the head -byte* ring_get_memory_nomove(RingMemory* ring, uint64 size, uint32 aligned = 4, bool zeroed = false) +byte* ring_get_memory_nomove(RingMemory* ring, uint64 size, uint32 aligned = 4, bool zeroed = false) noexcept { ASSERT_SIMPLE(size <= ring->size); @@ -241,7 +242,7 @@ byte* ring_get_memory_nomove(RingMemory* ring, uint64 size, uint32 aligned = 4, // Used if the ring only contains elements of a certain size // This way you can get a certain element inline -byte* ring_get_element(const RingMemory* ring, uint64 element, uint64 size) +byte* ring_get_element(const RingMemory* ring, uint64 element, uint64 size) noexcept { DEBUG_MEMORY_READ((uintptr_t) (ring->memory + element * size), 1); @@ -252,7 +253,7 @@ byte* ring_get_element(const RingMemory* ring, uint64 element, uint64 size) * Checks if one additional element can be inserted without overwriting the tail index */ inline -bool ring_commit_safe(const RingMemory* ring, uint64 size, uint32 aligned = 4) +bool ring_commit_safe(const RingMemory* ring, uint64 size, uint32 aligned = 4) noexcept { // aligned * 2 since that should be the maximum overhead for an element // -1 since that is the worst case, we can't be missing a complete alignment because than it would be already aligned @@ -270,7 +271,7 @@ bool ring_commit_safe(const RingMemory* ring, uint64 size, uint32 aligned = 4) } inline -bool ring_commit_safe_atomic(const RingMemory* ring, uint64 size, uint32 aligned = 4) +bool ring_commit_safe_atomic(const RingMemory* ring, uint64 size, uint32 aligned = 4) noexcept { // aligned * 2 since that should be the maximum overhead for an element // -1 since that is the worst case, we can't be missing a complete alignment because than it would be already aligned diff --git a/memory/ThreadedQueue.h b/memory/ThreadedQueue.h index 916b969..b9c5952 100644 --- a/memory/ThreadedQueue.h +++ b/memory/ThreadedQueue.h @@ -104,7 +104,7 @@ void thrd_queue_free(ThreadedQueue* queue) // @todo Create enqueue_unique and enqueue_unique_sem inline -void thrd_queue_enqueue_unique_wait(ThreadedQueue* queue, const byte* data) +void thrd_queue_enqueue_unique_wait(ThreadedQueue* queue, const byte* data) noexcept { ASSERT_SIMPLE((uint64_t) data % 4 == 0); pthread_mutex_lock(&queue->mutex); @@ -135,7 +135,7 @@ void thrd_queue_enqueue_unique_wait(ThreadedQueue* queue, const byte* data) } inline -void thrd_queue_enqueue_unique(ThreadedQueue* queue, const byte* data) +void thrd_queue_enqueue_unique(ThreadedQueue* queue, const byte* data) noexcept { ASSERT_SIMPLE((uint64_t) data % 4 == 0); pthread_mutex_lock(&queue->mutex); @@ -169,7 +169,7 @@ void thrd_queue_enqueue_unique(ThreadedQueue* queue, const byte* data) // Conditional Lock inline -void thrd_queue_enqueue(ThreadedQueue* queue, const byte* data) +void thrd_queue_enqueue(ThreadedQueue* queue, const byte* data) noexcept { pthread_mutex_lock(&queue->mutex); @@ -187,7 +187,7 @@ void thrd_queue_enqueue(ThreadedQueue* queue, const byte* data) } inline -void thrd_queue_enqueue_wait(ThreadedQueue* queue, const byte* data) +void thrd_queue_enqueue_wait(ThreadedQueue* queue, const byte* data) noexcept { pthread_mutex_lock(&queue->mutex); @@ -203,7 +203,7 @@ void thrd_queue_enqueue_wait(ThreadedQueue* queue, const byte* data) } inline -byte* thrd_queue_enqueue_start_wait(ThreadedQueue* queue) +byte* thrd_queue_enqueue_start_wait(ThreadedQueue* queue) noexcept { pthread_mutex_lock(&queue->mutex); @@ -215,14 +215,14 @@ byte* thrd_queue_enqueue_start_wait(ThreadedQueue* queue) } inline -void thrd_queue_enqueue_end_wait(ThreadedQueue* queue) +void thrd_queue_enqueue_end_wait(ThreadedQueue* queue) noexcept { pthread_cond_signal(&queue->cond); pthread_mutex_unlock(&queue->mutex); } inline -bool thrd_queue_dequeue(ThreadedQueue* queue, byte* data) +bool thrd_queue_dequeue(ThreadedQueue* queue, byte* data) noexcept { if (queue->head == queue->tail) { return false; @@ -250,7 +250,7 @@ bool thrd_queue_dequeue(ThreadedQueue* queue, byte* data) } inline -bool thrd_queue_empty(ThreadedQueue* queue) { +bool thrd_queue_empty(ThreadedQueue* queue) noexcept { pthread_mutex_lock(&queue->mutex); bool is_empty = queue->head == queue->tail; pthread_mutex_unlock(&queue->mutex); @@ -259,7 +259,7 @@ bool thrd_queue_empty(ThreadedQueue* queue) { } inline -bool thrd_queue_full(ThreadedQueue* queue) { +bool thrd_queue_full(ThreadedQueue* queue) noexcept { pthread_mutex_lock(&queue->mutex); bool is_full = !ring_commit_safe((RingMemory *) queue, queue->element_size, queue->alignment); pthread_mutex_unlock(&queue->mutex); @@ -269,7 +269,7 @@ bool thrd_queue_full(ThreadedQueue* queue) { // Waits until a dequeue is available inline -void thrd_queue_dequeue_wait(ThreadedQueue* queue, byte* data) +void thrd_queue_dequeue_wait(ThreadedQueue* queue, byte* data) noexcept { pthread_mutex_lock(&queue->mutex); @@ -285,7 +285,7 @@ void thrd_queue_dequeue_wait(ThreadedQueue* queue, byte* data) } inline -byte* thrd_queue_dequeue_start_wait(ThreadedQueue* queue) +byte* thrd_queue_dequeue_start_wait(ThreadedQueue* queue) noexcept { pthread_mutex_lock(&queue->mutex); @@ -297,7 +297,7 @@ byte* thrd_queue_dequeue_start_wait(ThreadedQueue* queue) } inline -void thrd_queue_dequeue_end_wait(ThreadedQueue* queue) +void thrd_queue_dequeue_end_wait(ThreadedQueue* queue) noexcept { ring_move_pointer((RingMemory *) queue, &queue->tail, queue->element_size, queue->alignment); @@ -307,7 +307,7 @@ void thrd_queue_dequeue_end_wait(ThreadedQueue* queue) // Semaphore Lock inline -void thrd_queue_enqueue_sem_wait(ThreadedQueue* queue, const byte* data) +void thrd_queue_enqueue_sem_wait(ThreadedQueue* queue, const byte* data) noexcept { sem_wait(&queue->empty); pthread_mutex_lock(&queue->mutex); @@ -320,7 +320,7 @@ void thrd_queue_enqueue_sem_wait(ThreadedQueue* queue, const byte* data) } inline -bool thrd_queue_enqueue_sem_timedwait(ThreadedQueue* queue, const byte* data, uint64 wait) +bool thrd_queue_enqueue_sem_timedwait(ThreadedQueue* queue, const byte* data, uint64 wait) noexcept { if (sem_timedwait(&queue->empty, wait)) { return false; @@ -338,7 +338,7 @@ bool thrd_queue_enqueue_sem_timedwait(ThreadedQueue* queue, const byte* data, ui } inline -byte* thrd_queue_enqueue_start_sem_wait(ThreadedQueue* queue) +byte* thrd_queue_enqueue_start_sem_wait(ThreadedQueue* queue) noexcept { sem_wait(&queue->empty); pthread_mutex_lock(&queue->mutex); @@ -347,14 +347,14 @@ byte* thrd_queue_enqueue_start_sem_wait(ThreadedQueue* queue) } inline -void thrd_queue_enqueue_end_sem_wait(ThreadedQueue* queue) +void thrd_queue_enqueue_end_sem_wait(ThreadedQueue* queue) noexcept { pthread_mutex_unlock(&queue->mutex); sem_post(&queue->full); } inline -byte* thrd_queue_dequeue_sem_wait(ThreadedQueue* queue, byte* data) +byte* thrd_queue_dequeue_sem_wait(ThreadedQueue* queue, byte* data) noexcept { sem_wait(&queue->full); pthread_mutex_lock(&queue->mutex); @@ -367,7 +367,7 @@ byte* thrd_queue_dequeue_sem_wait(ThreadedQueue* queue, byte* data) } inline -bool thrd_queue_dequeue_sem_timedwait(ThreadedQueue* queue, byte* data, uint64 wait) +bool thrd_queue_dequeue_sem_timedwait(ThreadedQueue* queue, byte* data, uint64 wait) noexcept { if (sem_timedwait(&queue->full, wait)) { return false; @@ -385,7 +385,7 @@ bool thrd_queue_dequeue_sem_timedwait(ThreadedQueue* queue, byte* data, uint64 w } inline -byte* thrd_queue_dequeue_start_sem_wait(ThreadedQueue* queue) +byte* thrd_queue_dequeue_start_sem_wait(ThreadedQueue* queue) noexcept { sem_wait(&queue->full); pthread_mutex_lock(&queue->mutex); @@ -394,7 +394,7 @@ byte* thrd_queue_dequeue_start_sem_wait(ThreadedQueue* queue) } inline -void thrd_queue_dequeue_end_sem_wait(ThreadedQueue* queue) +void thrd_queue_dequeue_end_sem_wait(ThreadedQueue* queue) noexcept { ring_move_pointer((RingMemory *) queue, &queue->tail, queue->element_size, queue->alignment); diff --git a/memory/ThreadedRingMemory.h b/memory/ThreadedRingMemory.h index 85bc3da..862c4ce 100644 --- a/memory/ThreadedRingMemory.h +++ b/memory/ThreadedRingMemory.h @@ -64,7 +64,7 @@ void thrd_ring_free(ThreadedRingMemory* ring) } inline -byte* thrd_ring_calculate_position(ThreadedRingMemory* ring, uint64 size, byte aligned = 4) +byte* thrd_ring_calculate_position(ThreadedRingMemory* ring, uint64 size, byte aligned = 4) noexcept { pthread_mutex_lock(&ring->mutex); byte* result = ring_calculate_position((RingMemory *) ring, size, aligned); @@ -74,7 +74,7 @@ byte* thrd_ring_calculate_position(ThreadedRingMemory* ring, uint64 size, byte a } inline -void thrd_ring_reset(ThreadedRingMemory* ring) +void thrd_ring_reset(ThreadedRingMemory* ring) noexcept { pthread_mutex_lock(&ring->mutex); ring_reset((RingMemory *) ring); @@ -82,14 +82,14 @@ void thrd_ring_reset(ThreadedRingMemory* ring) } // Moves a pointer based on the size you want to consume (new position = after consuming size) -void thrd_ring_move_pointer(ThreadedRingMemory* ring, byte** pos, uint64 size, byte aligned = 4) +void thrd_ring_move_pointer(ThreadedRingMemory* ring, byte** pos, uint64 size, byte aligned = 4) noexcept { pthread_mutex_lock(&ring->mutex); ring_move_pointer((RingMemory *) ring, pos, size, aligned); pthread_mutex_unlock(&ring->mutex); } -byte* thrd_ring_get_memory(ThreadedRingMemory* ring, uint64 size, byte aligned = 4, bool zeroed = false) +byte* thrd_ring_get_memory(ThreadedRingMemory* ring, uint64 size, byte aligned = 4, bool zeroed = false) noexcept { pthread_mutex_lock(&ring->mutex); byte* result = ring_get_memory((RingMemory *) ring, size, aligned, zeroed); @@ -99,7 +99,7 @@ byte* thrd_ring_get_memory(ThreadedRingMemory* ring, uint64 size, byte aligned = } // Same as ring_get_memory but DOESN'T move the head -byte* thrd_ring_get_memory_nomove(ThreadedRingMemory* ring, uint64 size, byte aligned = 4, bool zeroed = false) +byte* thrd_ring_get_memory_nomove(ThreadedRingMemory* ring, uint64 size, byte aligned = 4, bool zeroed = false) noexcept { pthread_mutex_lock(&ring->mutex); byte* result = ring_get_memory_nomove((RingMemory *) ring, size, aligned, zeroed); @@ -111,7 +111,7 @@ byte* thrd_ring_get_memory_nomove(ThreadedRingMemory* ring, uint64 size, byte al // Used if the ring only contains elements of a certain size // This way you can get a certain element inline -byte* thrd_ring_get_element(ThreadedRingMemory* ring, uint64 element, uint64 size) +byte* thrd_ring_get_element(ThreadedRingMemory* ring, uint64 element, uint64 size) noexcept { pthread_mutex_lock(&ring->mutex); byte* result = ring_get_element((RingMemory *) ring, element, size); @@ -124,7 +124,7 @@ byte* thrd_ring_get_element(ThreadedRingMemory* ring, uint64 element, uint64 siz * Checks if one additional element can be inserted without overwriting the tail index */ inline -bool thrd_ring_commit_safe(ThreadedRingMemory* ring, uint64 size, byte aligned = 4) +bool thrd_ring_commit_safe(ThreadedRingMemory* ring, uint64 size, byte aligned = 4) noexcept { pthread_mutex_lock(&ring->mutex); bool result = ring_commit_safe((RingMemory *) ring, size, aligned); @@ -134,19 +134,19 @@ bool thrd_ring_commit_safe(ThreadedRingMemory* ring, uint64 size, byte aligned = } inline -void thrd_ring_force_head_update(const ThreadedRingMemory* ring) +void thrd_ring_force_head_update(const ThreadedRingMemory* ring) noexcept { _mm_clflush(ring->head); } inline -void thrd_ring_force_tail_update(const ThreadedRingMemory* ring) +void thrd_ring_force_tail_update(const ThreadedRingMemory* ring) noexcept { _mm_clflush(ring->tail); } inline -int64 thrd_ring_dump(ThreadedRingMemory* ring, byte* data) +int64 thrd_ring_dump(ThreadedRingMemory* ring, byte* data) noexcept { pthread_mutex_lock(&ring->mutex); int64 result = ring_dump((RingMemory *) ring, data); diff --git a/object/Vertex.h b/object/Vertex.h index 80b5b5c..592eba0 100644 --- a/object/Vertex.h +++ b/object/Vertex.h @@ -23,6 +23,14 @@ struct Vertex3DNormal { v3_f32 normal; }; +struct Vertex3DSamplerTextureColor { + v3_f32 position; + int32 sampler; + + // If negative = color, positive = texture + v2_f32 texture_color; +}; + struct Vertex3DTextureColor { v3_f32 position; diff --git a/platform/linux/FileUtils.cpp b/platform/linux/FileUtils.cpp index b50517b..236ef78 100644 --- a/platform/linux/FileUtils.cpp +++ b/platform/linux/FileUtils.cpp @@ -25,6 +25,7 @@ #include "../../utils/Utils.h" #include "../../utils/TestUtils.h" #include "../../memory/RingMemory.h" +#include "../../log/PerformanceProfiler.h" #ifndef MAX_PATH #define MAX_PATH PATH_MAX @@ -80,7 +81,7 @@ void file_mmf_close(MMFHandle fh) { } inline -void relative_to_absolute(const char* rel, char* path) +void relative_to_absolute(const char* __restrict rel, char* __restrict path) { char self_path[MAX_PATH]; int32 self_path_length = readlink("/proc/self/exe", self_path, MAX_PATH - 1); @@ -142,6 +143,8 @@ FileHandle file_append_handle(const char* path) { inline bool file_exists(const char* path) { + PROFILE_VERBOSE(PROFILE_FILE_UTILS, path); + struct stat buffer; const char* full_path = path; char abs_path[MAX_PATH]; @@ -155,7 +158,9 @@ bool file_exists(const char* path) { } inline -bool file_copy(const char* src, const char* dst) { +bool file_copy(const char* __restrict src, const char* __restrict dst) { + PROFILE_VERBOSE(PROFILE_FILE_UTILS, src); + char src_full_path[MAX_PATH]; char dst_full_path[MAX_PATH]; @@ -207,7 +212,9 @@ bool file_copy(const char* src, const char* dst) { } inline -void file_read(const char* path, FileBody* file, RingMemory* ring) { +void file_read(const char* __restrict path, FileBody* __restrict file, RingMemory* __restrict ring = NULL) { + PROFILE_VERBOSE(PROFILE_FILE_UTILS, path); + char full_path[MAX_PATH]; const char* abs_path = path; @@ -271,8 +278,8 @@ void file_read(const char* path, FileBody* file, RingMemory* ring) { // Since the mentality of this function is to be called consecutively we do it this way. bool file_read_line( FileHandle fp, - char* line_buffer, size_t buffer_size, - char internal_buffer[512], ssize_t* internal_buffer_size, char** internal_pos + char* __restrict line_buffer, size_t buffer_size, + char internal_buffer[512], ssize_t* __restrict internal_buffer_size, char** internal_pos ) { if (!(*internal_pos)) { *internal_pos = internal_buffer; @@ -320,7 +327,9 @@ bool file_read_line( } inline -bool file_write(const char* path, const FileBody* file) { +bool file_write(const char* __restrict path, const FileBody* __restrict file) { + PROFILE_VERBOSE(PROFILE_FILE_UTILS, path); + int32 fd; char full_path[PATH_MAX]; @@ -360,7 +369,7 @@ void file_close_handle(FileHandle fp) inline void self_path(char* path) { size_t len = readlink("/proc/self/exe", path, PATH_MAX); - if (len > 0) { + if (len > 0) { [[likely]] path[len] = '\0'; } else { path[0] = '\0'; diff --git a/platform/linux/SystemInfo.cpp b/platform/linux/SystemInfo.cpp index aec5049..cbf9adf 100644 --- a/platform/linux/SystemInfo.cpp +++ b/platform/linux/SystemInfo.cpp @@ -20,6 +20,10 @@ #include #include +#include +#include + +// -lX11 -lXrandr // @todo Implement own line by line file reading @@ -96,7 +100,7 @@ int32 network_info_get(NetworkInfo* info) { FileBody file = {}; - for (i = 0; i < 4; i++) { + for (i = 0; i < 4; ++i) { sprintf_fast(path, "/sys/class/net/eth%d", i); if (stat(path, &st) == 0) { @@ -172,6 +176,38 @@ void ram_info_get(RamInfo* info) { info->memory = total_memory / 1024; } +RamChannelType ram_channel_info() { + FILE* fp; + char buffer[128]; + int32 ram_module_count = 0; + int32 dual_channel_capable = 0; + + fp = popen("dmidecode -t memory | grep 'Channel'", "r"); + if (fp == NULL) { + return RAM_CHANNEL_TYPE_FAILED; + } + + while (fgets(buffer, sizeof(buffer), fp) != NULL) { + if (strstr(buffer, "ChannelA") || strstr(buffer, "ChannelB")) { + ++ram_module_count; + dual_channel_capable = 1; + } else if (strstr(buffer, "Channel")) { + ++ram_module_count; + } + } + pclose(fp); + + if (ram_module_count == 1) { + return RAM_CHANNEL_TYPE_SINGLE_CHANNEL; + } else if (ram_module_count == 2 && dual_channel_capable) { + return RAM_CHANNEL_TYPE_DUAL_CHANNEL; + } else if (ram_module_count == 2 && !dual_channel_capable) { + return RAM_CHANNEL_TYPE_CAN_UPGRADE; + } else { + return RAM_CHANNEL_TYPE_FAILED; + } +} + uint32 gpu_info_get(GpuInfo* info) { FILE* fp = popen("lspci | grep VGA", "r"); if (fp == NULL) { @@ -193,7 +229,7 @@ uint32 gpu_info_get(GpuInfo* info) { // @todo this is Wrong info[count].vram = 2048; - count++; + ++count; } fclose(fp); @@ -221,7 +257,7 @@ uint32 display_info_get(DisplayInfo* info) { info[count].height = height; info[count].hz = hz; info[count].is_primary = str_find(line, "primary"); - count++; + ++count; } } } @@ -231,4 +267,44 @@ uint32 display_info_get(DisplayInfo* info) { return count; } +bool is_dedicated_gpu_connected() { + Display* display = XOpenDisplay(NULL); + if (!display) { + return 0; + } + + Window root = DefaultRootWindow(display); + XRRScreenResources* screenResources = XRRGetScreenResources(display, root); + if (!screenResources) { + XCloseDisplay(display); + return 0; + } + + for (int i = 0; i < screenResources->noutput; i++) { + XRROutputInfo* outputInfo = XRRGetOutputInfo(display, screenResources, screenResources->outputs[i]); + if (outputInfo && outputInfo->connection == RR_Connected) { + XRRProviderInfo* providerInfo = XRRGetProviderInfo(display, screenResources, outputInfo->provider); + if (providerInfo && providerInfo->name) { + if (strstr(providerInfo->name, "NVIDIA") + || strstr(providerInfo->name, "AMD") + || strstr(providerInfo->name, "Intel") + ) { + XRRFreeOutputInfo(outputInfo); + XRRFreeProviderInfo(providerInfo); + XRRFreeScreenResources(screenResources); + XCloseDisplay(display); + return true; + } + } + XRRFreeProviderInfo(providerInfo); + } + XRRFreeOutputInfo(outputInfo); + } + + XRRFreeScreenResources(screenResources); + XCloseDisplay(display); + + return false; +} + #endif \ No newline at end of file diff --git a/platform/win32/ExceptionHandler.h b/platform/win32/ExceptionHandler.h index 43b93e7..1edab80 100644 --- a/platform/win32/ExceptionHandler.h +++ b/platform/win32/ExceptionHandler.h @@ -68,7 +68,7 @@ void log_stack_trace(CONTEXT *context) { stack_frame.AddrStack.Offset = context->Rsp; stack_frame.AddrStack.Mode = AddrModeFlat; - LOG(true, "Stack trace:"); + LOG_1("Stack trace:"); // Walk the stack while (StackWalk64(machine_type, process, thread, &stack_frame, context, NULL, @@ -88,9 +88,9 @@ void log_stack_trace(CONTEXT *context) { symbol->MaxNameLen = MAX_SYM_NAME; if (SymFromAddr(process, address, NULL, symbol)) { - LOG_FORMAT(true, "Function: %s - Address: %l", {{LOG_DATA_CHAR_STR, symbol->Name}, {LOG_DATA_INT64, &symbol->Address}}); + LOG_FORMAT_1("Function: %s - Address: %l", {{LOG_DATA_CHAR_STR, symbol->Name}, {LOG_DATA_INT64, &symbol->Address}}); } else { - LOG_FORMAT(true, "Function: (unknown) - Address: %l", {{LOG_DATA_INT64, &address}}); + LOG_FORMAT_1("Function: (unknown) - Address: %l", {{LOG_DATA_INT64, &address}}); } // Resolve file and line number @@ -99,18 +99,18 @@ void log_stack_trace(CONTEXT *context) { line.SizeOfStruct = sizeof(IMAGEHLP_LINE64); if (SymGetLineFromAddr64(process, address, &displacement, &line)) { - LOG_FORMAT(true, " File: %s, Line: %l", {{LOG_DATA_CHAR_STR, line.FileName}, {LOG_DATA_INT64, &line.LineNumber}}); + LOG_FORMAT_1(" File: %s, Line: %l", {{LOG_DATA_CHAR_STR, line.FileName}, {LOG_DATA_INT64, &line.LineNumber}}); } else { - LOG(true, " File: (unknown), Line: (unknown)"); + LOG_1(" File: (unknown), Line: (unknown)"); } // Print module name IMAGEHLP_MODULE64 module_info; module_info.SizeOfStruct = sizeof(IMAGEHLP_MODULE64); if (SymGetModuleInfo64(process, address, &module_info)) { - LOG_FORMAT(true, " Module: %s", {{LOG_DATA_CHAR_STR, module_info.ModuleName}}); + LOG_FORMAT_1(" Module: %s", {{LOG_DATA_CHAR_STR, module_info.ModuleName}}); } else { - LOG(true, " Module: (unknown)"); + LOG_1(" Module: (unknown)"); } } diff --git a/platform/win32/FileUtils.cpp b/platform/win32/FileUtils.cpp index c71d32a..ab45a05 100644 --- a/platform/win32/FileUtils.cpp +++ b/platform/win32/FileUtils.cpp @@ -22,6 +22,7 @@ #include "../../utils/TestUtils.h" #include "../../memory/RingMemory.h" #include "../../log/Stats.h" +#include "../../log/PerformanceProfiler.h" typedef HANDLE FileHandle; typedef HANDLE MMFHandle; @@ -60,7 +61,7 @@ void file_mmf_close(MMFHandle fh) { } inline -void relative_to_absolute(const char* rel, char* path) +void relative_to_absolute(const char* __restrict rel, char* __restrict path) { char self_path[MAX_PATH]; int32 self_path_length = GetModuleFileNameA(NULL, self_path, MAX_PATH); @@ -88,6 +89,8 @@ void relative_to_absolute(const char* rel, char* path) inline uint64 file_size(const char* path) { + PROFILE_VERBOSE(PROFILE_FILE_UTILS, path); + // @performance Profile against fseek strategy FileHandle fp; if (*path == '.') { @@ -130,6 +133,8 @@ file_size(const char* path) inline bool file_exists(const char* path) { + PROFILE_VERBOSE(PROFILE_FILE_UTILS, path); + DWORD file_attr; if (*path == '.') { @@ -145,8 +150,10 @@ bool file_exists(const char* path) } inline void -file_read(const char* path, FileBody* file, RingMemory* ring = NULL) +file_read(const char* __restrict path, FileBody* __restrict file, RingMemory* __restrict ring = NULL) { + PROFILE_VERBOSE(PROFILE_FILE_UTILS, path); + FileHandle fp; if (*path == '.') { char full_path[MAX_PATH]; @@ -210,8 +217,10 @@ file_read(const char* path, FileBody* file, RingMemory* ring = NULL) // @question Do we really need length? we have file.size we could use as we do in a function above inline -void file_read(const char* path, FileBody* file, uint64 offset, uint64 length = MAX_UINT64, RingMemory* ring = NULL) +void file_read(const char* __restrict path, FileBody* __restrict file, uint64 offset, uint64 length = MAX_UINT64, RingMemory* __restrict ring = NULL) { + PROFILE_VERBOSE(PROFILE_FILE_UTILS, path); + FileHandle fp; if (*path == '.') { char full_path[MAX_PATH]; @@ -294,7 +303,7 @@ void file_read(const char* path, FileBody* file, uint64 offset, uint64 length = } inline -void file_read(FileHandle fp, FileBody* file, uint64 offset = 0, uint64 length = MAX_UINT64, RingMemory* ring = NULL) +void file_read(FileHandle fp, FileBody* __restrict file, uint64 offset = 0, uint64 length = MAX_UINT64, RingMemory* __restrict ring = NULL) { LARGE_INTEGER size; if (!GetFileSizeEx(fp, &size)) { @@ -348,8 +357,8 @@ void file_read(FileHandle fp, FileBody* file, uint64 offset = 0, uint64 length = inline bool file_read_line( FileHandle fp, - char* line_buffer, size_t buffer_size, - char internal_buffer[512], ssize_t* internal_buffer_size, char** internal_pos + char* __restrict line_buffer, size_t buffer_size, + char internal_buffer[512], ssize_t* __restrict internal_buffer_size, char** internal_pos ) { if (!(*internal_pos)) { *internal_pos = internal_buffer; @@ -397,62 +406,11 @@ bool file_read_line( return true; } -inline uint64 -file_read_struct(const char* path, void* file, uint32 size) -{ - FileHandle fp; - if (*path == '.') { - char full_path[MAX_PATH]; - relative_to_absolute(path, full_path); - - fp = CreateFileA((LPCSTR) full_path, - GENERIC_READ, - FILE_SHARE_READ, - NULL, - OPEN_EXISTING, - FILE_ATTRIBUTE_NORMAL, - NULL - ); - } else { - fp = CreateFileA((LPCSTR) path, - GENERIC_READ, - FILE_SHARE_READ, - NULL, - OPEN_EXISTING, - FILE_ATTRIBUTE_NORMAL, - NULL - ); - } - - if (fp == INVALID_HANDLE_VALUE) { - return 0; - } - - LARGE_INTEGER fsize; - if (!GetFileSizeEx(fp, &fsize)) { - CloseHandle(fp); - - return 0; - } - - DWORD read; - ASSERT_SIMPLE(fsize.QuadPart > size); - if (!ReadFile(fp, file, (uint32) size, &read, NULL)) { - CloseHandle(fp); - - return 0; - } - - CloseHandle(fp); - - LOG_INCREMENT_BY(DEBUG_COUNTER_DRIVE_READ, read); - - return read; -} - inline bool -file_write(const char* path, const FileBody* file) +file_write(const char* __restrict path, const FileBody* __restrict file) { + PROFILE_VERBOSE(PROFILE_FILE_UTILS, path); + FileHandle fp; if (*path == '.') { char full_path[MAX_PATH]; @@ -495,50 +453,11 @@ file_write(const char* path, const FileBody* file) return true; } -inline bool -file_write_struct(const char* path, const void* file, uint32 size) -{ - FileHandle fp; - if (*path == '.') { - char full_path[MAX_PATH]; - relative_to_absolute(path, full_path); - - fp = CreateFileA((LPCSTR) full_path, - GENERIC_WRITE, - 0, - NULL, - CREATE_ALWAYS, - FILE_ATTRIBUTE_NORMAL, - NULL - ); - } else { - fp = CreateFileA((LPCSTR) path, - GENERIC_WRITE, - 0, - NULL, - CREATE_ALWAYS, - FILE_ATTRIBUTE_NORMAL, - NULL - ); - } - - DWORD written; - ASSERT_SIMPLE(size < MAX_UINT32); - if (!WriteFile(fp, file, size, &written, NULL)) { - CloseHandle(fp); - return false; - } - - CloseHandle(fp); - - LOG_INCREMENT_BY(DEBUG_COUNTER_DRIVE_WRITE, written); - - return true; -} - inline void -file_copy(const char* src, const char* dst) +file_copy(const char* __restrict src, const char* __restrict dst) { + PROFILE_VERBOSE(PROFILE_FILE_UTILS, src); + if (*src == '.') { char src_full_path[MAX_PATH]; relative_to_absolute(src, src_full_path); @@ -604,10 +523,10 @@ HANDLE file_append_handle(const char* path) inline bool file_read_async( FileHandle fp, - FileBodyAsync* file, + FileBodyAsync* __restrict file, uint64_t offset = 0, uint64_t length = MAX_UINT64, - RingMemory* ring = NULL + RingMemory* __restrict ring = NULL ) { LARGE_INTEGER size; if (!GetFileSizeEx(fp, &size)) { @@ -741,8 +660,10 @@ FileHandle file_read_async_handle(const char* path) return fp; } -bool file_append(const char* path, const char* file) +bool file_append(const char* __restrict path, const char* __restrict file) { + PROFILE_VERBOSE(PROFILE_FILE_UTILS, path); + FileHandle fp; if (*path == '.') { char full_path[MAX_PATH]; @@ -788,6 +709,8 @@ bool file_append(const char* path, const char* file) inline bool file_append(FileHandle fp, const char* file) { + PROFILE_VERBOSE(PROFILE_FILE_UTILS, file); + if (fp == INVALID_HANDLE_VALUE) { ASSERT_SIMPLE(false); return false; @@ -808,6 +731,8 @@ file_append(FileHandle fp, const char* file) inline bool file_append(FileHandle fp, const char* file, size_t length) { + PROFILE_VERBOSE(PROFILE_FILE_UTILS, file); + if (fp == INVALID_HANDLE_VALUE) { ASSERT_SIMPLE(false); return false; @@ -825,8 +750,10 @@ file_append(FileHandle fp, const char* file, size_t length) } inline bool -file_append(const char* path, const FileBody* file) +file_append(const char* __restrict path, const FileBody* __restrict file) { + PROFILE_VERBOSE(PROFILE_FILE_UTILS, path); + FileHandle fp; if (*path == '.') { char full_path[MAX_PATH]; diff --git a/platform/win32/SystemInfo.cpp b/platform/win32/SystemInfo.cpp index 9d4a9a0..c381339 100644 --- a/platform/win32/SystemInfo.cpp +++ b/platform/win32/SystemInfo.cpp @@ -28,6 +28,8 @@ #include #include #include +#include +#include // @performance Do we really need all these libs, can't we simplify that?! // At least we should dynamically load them, this way the application won't crash if the lib doesn't exist @@ -38,6 +40,9 @@ #pragma comment(lib, "d3d12.lib") #pragma comment(lib, "dxgi.lib") #pragma comment(lib, "Ws2_32.lib") +#pragma comment(lib, "setupapi.lib") +#pragma comment(lib, "cfgmgr32.lib") +#pragma comment(lib, "comsuppw.lib") uint64 system_private_memory_usage() { @@ -359,6 +364,93 @@ void ram_info_get(RamInfo* info) { info->memory = (uint32) (statex.ullTotalPhys / (1024 * 1024)); } +RamChannelType ram_channel_info() { + HRESULT hres; + hres = CoInitializeEx(0, COINIT_MULTITHREADED); + if (FAILED(hres)) { + return RAM_CHANNEL_TYPE_FAILED; + } + + hres = CoInitializeSecurity(NULL, -1, NULL, NULL, RPC_C_AUTHN_LEVEL_DEFAULT, RPC_C_IMP_LEVEL_IMPERSONATE, NULL, EOAC_NONE, NULL); + if (FAILED(hres)) { + CoUninitialize(); + + return RAM_CHANNEL_TYPE_FAILED; + } + + IWbemLocator *pLoc = NULL; + hres = CoCreateInstance(CLSID_WbemLocator, 0, CLSCTX_INPROC_SERVER, IID_IWbemLocator, (LPVOID *)&pLoc); + if (FAILED(hres)) { + CoUninitialize(); + + return RAM_CHANNEL_TYPE_FAILED; + } + + IWbemServices *pSvc = NULL; + hres = pLoc->ConnectServer(_bstr_t(L"ROOT\\CIMV2"), NULL, NULL, 0, NULL, 0, 0, &pSvc); + if (FAILED(hres)) { + pLoc->Release(); + CoUninitialize(); + + return RAM_CHANNEL_TYPE_FAILED; + } + + hres = CoSetProxyBlanket(pSvc, RPC_C_AUTHN_WINNT, RPC_C_AUTHZ_NONE, NULL, RPC_C_AUTHN_LEVEL_CALL, RPC_C_IMP_LEVEL_IMPERSONATE, NULL, EOAC_NONE); + if (FAILED(hres)) { + pSvc->Release(); + pLoc->Release(); + CoUninitialize(); + + return RAM_CHANNEL_TYPE_FAILED; + } + + IEnumWbemClassObject* pEnumerator = NULL; + hres = pSvc->ExecQuery(bstr_t("WQL"), bstr_t("SELECT * FROM Win32_PhysicalMemory"), WBEM_FLAG_FORWARD_ONLY | WBEM_FLAG_RETURN_IMMEDIATELY, NULL, &pEnumerator); + if (FAILED(hres)) { + pSvc->Release(); + pLoc->Release(); + CoUninitialize(); + + return RAM_CHANNEL_TYPE_FAILED; + } + + IWbemClassObject *pclsObj = NULL; + ULONG uReturn = 0; + int32 ram_module_count = 0; + int32 dual_channel_capable = 0; + + while (pEnumerator) { + hres = pEnumerator->Next(WBEM_INFINITE, 1, &pclsObj, &uReturn); + if (uReturn == 0) break; + + VARIANT vtProp; + hres = pclsObj->Get(L"BankLabel", 0, &vtProp, 0, 0); + if (SUCCEEDED(hres)) { + ++ram_module_count; + if (wcscmp(vtProp.bstrVal, L"BANK 0") == 0 || wcscmp(vtProp.bstrVal, L"BANK 1") == 0) { + dual_channel_capable = 1; + } + + VariantClear(&vtProp); + } + pclsObj->Release(); + } + + pSvc->Release(); + pLoc->Release(); + CoUninitialize(); + + if (ram_module_count == 1) { + return RAM_CHANNEL_TYPE_SINGLE_CHANNEL; + } else if (ram_module_count == 2 && dual_channel_capable) { + return RAM_CHANNEL_TYPE_DUAL_CHANNEL; + } else if (ram_module_count == 2 && !dual_channel_capable) { + return RAM_CHANNEL_TYPE_CAN_UPGRADE; + } else { + return RAM_CHANNEL_TYPE_FAILED; + } +} + uint32 gpu_info_get(GpuInfo* info) { IDXGIFactory *pFactory = NULL; IDXGIAdapter *pAdapter = NULL; @@ -382,7 +474,7 @@ uint32 gpu_info_get(GpuInfo* info) { info[i].vram = (uint32) (adapterDesc.DedicatedVideoMemory / (1024 * 1024)); pAdapter->Release(); - i++; + ++i; } pFactory->Release(); @@ -415,4 +507,27 @@ uint32 display_info_get(DisplayInfo* info) { return i; } +bool is_dedicated_gpu_connected() { + DISPLAY_DEVICEA displayDevice; + displayDevice.cb = sizeof(DISPLAY_DEVICEA); + for (int32 i = 0; EnumDisplayDevicesA(NULL, i, &displayDevice, 0); ++i) { + if (displayDevice.StateFlags & DISPLAY_DEVICE_ATTACHED_TO_DESKTOP) { + DISPLAY_DEVICEA gpuDevice; + gpuDevice.cb = sizeof(DISPLAY_DEVICEA); + if (EnumDisplayDevicesA(displayDevice.DeviceName, 0, &gpuDevice, 0)) { + if (gpuDevice.DeviceID + && (str_contains(gpuDevice.DeviceID, "PCI\\VEN_10DE") // Nvidia + || str_contains(gpuDevice.DeviceID, "PCI\\VEN_1002") // AMD + || str_contains(gpuDevice.DeviceID, "PCI\\VEN_8086") // Intel + ) + ) { + return true; + } + } + } + } + + return false; +} + #endif \ No newline at end of file diff --git a/platform/win32/TimeUtils.h b/platform/win32/TimeUtils.h index 7915e0e..18dd7fa 100644 --- a/platform/win32/TimeUtils.h +++ b/platform/win32/TimeUtils.h @@ -16,16 +16,10 @@ void usleep(uint64 microseconds) { - if ((microseconds % 1000) == 0) { - Sleep((DWORD) (microseconds / 1000)); - return; - } - - LARGE_INTEGER frequency; + LARGE_INTEGER frequency, start, end; QueryPerformanceFrequency(&frequency); - - LARGE_INTEGER start, end; QueryPerformanceCounter(&start); + long long target = start.QuadPart + (microseconds * frequency.QuadPart) / 1000000; do { diff --git a/platform/win32/UtilsWin32.h b/platform/win32/UtilsWin32.h index d04a4a8..a0e729a 100644 --- a/platform/win32/UtilsWin32.h +++ b/platform/win32/UtilsWin32.h @@ -15,7 +15,7 @@ #define strtok_r strtok_s -uint32 key_to_unicode(byte scan_code, byte vkey, byte keyboard_state[256]) +uint32 key_to_unicode(byte scan_code, byte vkey, byte keyboard_state[256]) noexcept { WCHAR char_buffer[5] = {}; int32 result = ToUnicode(vkey, scan_code, keyboard_state, char_buffer, 5, 0); diff --git a/platform/win32/Window.h b/platform/win32/Window.h index 71fefe9..3c0429b 100644 --- a/platform/win32/Window.h +++ b/platform/win32/Window.h @@ -46,7 +46,7 @@ struct Window { }; inline -void window_backup_state(Window* __restrict w) +void window_backup_state(Window* __restrict w) noexcept { w->state_old.style = GetWindowLongPtr(w->hwnd, GWL_STYLE); w->state_old.width = w->width; @@ -56,7 +56,7 @@ void window_backup_state(Window* __restrict w) } inline -void window_restore_state(Window* __restrict w) +void window_restore_state(Window* __restrict w) noexcept { w->width = w->state_old.width; w->height = w->state_old.height; diff --git a/platform/win32/audio/DirectSound.h b/platform/win32/audio/DirectSound.h index f3eb32b..2c17162 100644 --- a/platform/win32/audio/DirectSound.h +++ b/platform/win32/audio/DirectSound.h @@ -34,7 +34,7 @@ HRESULT WINAPI DirectSoundCreate8Stub(LPCGUID, LPDIRECTSOUND8*, LPUNKNOWN) { void audio_load(HWND hwnd, AudioSetting* setting, DirectSoundSetting* api_setting) { HMODULE lib = LoadLibraryExA((LPCSTR) "dsound.dll", NULL, LOAD_LIBRARY_SEARCH_SYSTEM32); if (!lib) { - LOG(true, "DirectSound: Couldn't load dsound.dll\n"); + LOG_1("DirectSound: Couldn't load dsound.dll\n"); return; } @@ -42,13 +42,13 @@ void audio_load(HWND hwnd, AudioSetting* setting, DirectSoundSetting* api_settin DirectSoundCreate8_t* DirectSoundCreate8 = (DirectSoundCreate8_t *) GetProcAddress(lib, "DirectSoundCreate8"); if (!DirectSoundCreate8 || !SUCCEEDED(DirectSoundCreate8(0, &api_setting->audio_handle, 0))) { - LOG(true, "DirectSound: DirectSoundCreate8 failed\n"); + LOG_1("DirectSound: DirectSoundCreate8 failed\n"); return; } if(!SUCCEEDED(api_setting->audio_handle->SetCooperativeLevel(hwnd, DSSCL_PRIORITY))) { - LOG(true, "DirectSound: SetCooperativeLevel failed.\n"); + LOG_1("DirectSound: SetCooperativeLevel failed.\n"); return; } @@ -70,13 +70,13 @@ void audio_load(HWND hwnd, AudioSetting* setting, DirectSoundSetting* api_settin buffer_desc.dwFlags = DSBCAPS_PRIMARYBUFFER; if(!SUCCEEDED(api_setting->audio_handle->CreateSoundBuffer(&buffer_desc, &api_setting->primary_buffer, 0))) { - LOG(true, "DirectSound: CreateSoundBuffer1 failed.\n"); + LOG_1("DirectSound: CreateSoundBuffer1 failed.\n"); return; } if (!SUCCEEDED(api_setting->primary_buffer->SetFormat(&wf))) { - LOG(true, "DirectSound: SetFormat failed.\n"); + LOG_1("DirectSound: SetFormat failed.\n"); return; } @@ -92,7 +92,7 @@ void audio_load(HWND hwnd, AudioSetting* setting, DirectSoundSetting* api_settin buffer_desc2.lpwfxFormat = &wf; if(!SUCCEEDED(api_setting->audio_handle->CreateSoundBuffer(&buffer_desc2, &api_setting->secondary_buffer, 0))) { - LOG(true, "DirectSound: CreateSoundBuffer2 failed.\n"); + LOG_1("DirectSound: CreateSoundBuffer2 failed.\n"); return; } @@ -139,10 +139,12 @@ void audio_free(AudioSetting*, DirectSoundSetting* api_setting) inline uint32 audio_buffer_fillable(const AudioSetting* setting, const DirectSoundSetting* api_setting) { + PROFILE(PROFILE_AUDIO_BUFFER_FILLABLE); + DWORD player_cursor; DWORD write_cursor; if (!SUCCEEDED(api_setting->secondary_buffer->GetCurrentPosition(&player_cursor, &write_cursor))) { - LOG(true, "DirectSound: GetCurrentPosition failed.\n"); + LOG_1("DirectSound: GetCurrentPosition failed.\n"); return 0; } @@ -169,6 +171,7 @@ uint32 audio_buffer_fillable(const AudioSetting* setting, const DirectSoundSetti inline void audio_play_buffer(AudioSetting* setting, DirectSoundSetting* api_setting) { + PROFILE(PROFILE_AUDIO_PLAY_BUFFER); if (setting->sample_buffer_size == 0) { return; } diff --git a/platform/win32/audio/Wasapi.h b/platform/win32/audio/Wasapi.h index 305623c..adaf3ea 100644 --- a/platform/win32/audio/Wasapi.h +++ b/platform/win32/audio/Wasapi.h @@ -43,7 +43,7 @@ typedef HRESULT WINAPI IAudioClient_GetService_t(IAudioClient*, REFIID, void**); void audio_load(HWND hwnd, AudioSetting* setting, WasapiSetting* api_setting) { HMODULE ole32 = LoadLibraryExA((LPCSTR) "ole32.dll", NULL, LOAD_LIBRARY_SEARCH_SYSTEM32); if (!ole32) { - LOG(true, "Wasapi: Couldn't load ole32.dll\n"); + LOG_1("Wasapi: Couldn't load ole32.dll\n"); return; } @@ -52,14 +52,14 @@ void audio_load(HWND hwnd, AudioSetting* setting, WasapiSetting* api_setting) { CoCreateInstance_t* co_create_instance = (CoCreateInstance_t *) GetProcAddress(ole32, "CoCreateInstance"); if (!co_initialize_ex || !co_create_instance) { - LOG(true, "Wasapi: ole32 function binding failed\n"); + LOG_1("Wasapi: ole32 function binding failed\n"); return; } HMODULE mmdevapi = LoadLibraryExA((LPCSTR) "mmdevapi.dll", NULL, LOAD_LIBRARY_SEARCH_SYSTEM32); if (!mmdevapi) { - LOG(true, "Wasapi: Couldn't load mmdevapi.dll\n"); + LOG_1("Wasapi: Couldn't load mmdevapi.dll\n"); return; } @@ -68,14 +68,14 @@ void audio_load(HWND hwnd, AudioSetting* setting, WasapiSetting* api_setting) { IMMDevice_Activate_t* IMMDevice_Activate = (IMMDevice_Activate_t *) GetProcAddress(mmdevapi, "IMMDevice_Activate"); if (!IMMDeviceEnumerator_GetDefaultAudioEndpoint || !IMMDevice_Activate) { - LOG(true, "Wasapi: mmdevapi function binding failed\n"); + LOG_1("Wasapi: mmdevapi function binding failed\n"); return; } HMODULE audioclient = LoadLibraryExA((LPCSTR) "audioclient.dll", NULL, LOAD_LIBRARY_SEARCH_SYSTEM32); if (!audioclient) { - LOG(true, "Wasapi: Couldn't load audioclient.dll\n"); + LOG_1("Wasapi: Couldn't load audioclient.dll\n"); return; } @@ -87,14 +87,14 @@ void audio_load(HWND hwnd, AudioSetting* setting, WasapiSetting* api_setting) { IAudioClient_GetService_t* pIAudioClient_GetService = (IAudioClient_GetService_t *) GetProcAddress(audioclient, "IAudioClient_GetService"); if (!pIAudioClient_GetMixFormat || !pIAudioClient_Initialize || !pIAudioClient_Start || !pIAudioClient_Stop || !pIAudioClient_GetService) { - LOG(true, "Wasapi: audioclient function binding failed\n"); + LOG_1("Wasapi: audioclient function binding failed\n"); return; } HRESULT hr = co_initialize_ex(NULL, COINIT_MULTITHREADED); if (FAILED(hr)) { - LOG(true, "Wasapi: Wasapi initialize failed\n"); + LOG_1("Wasapi: Wasapi initialize failed\n"); return; } @@ -104,14 +104,14 @@ void audio_load(HWND hwnd, AudioSetting* setting, WasapiSetting* api_setting) { hr = co_create_instance(CLSID_MMDeviceEnumerator, NULL, CLSCTX_ALL, IID_IMMDeviceEnumerator, (void **) &enumerator); if (FAILED(hr)) { - LOG(true, "Wasapi: Wasapi CreateInstance failed\n"); + LOG_1("Wasapi: Wasapi CreateInstance failed\n"); return; } hr = IMMDeviceEnumerator_GetDefaultAudioEndpoint(enumerator, eRender, eConsole, &device); if (FAILED(hr)) { - LOG(true, "Wasapi: Wasapi DefaultAudioEndpoint failed\n"); + LOG_1("Wasapi: Wasapi DefaultAudioEndpoint failed\n"); enumerator->Release(); @@ -120,7 +120,7 @@ void audio_load(HWND hwnd, AudioSetting* setting, WasapiSetting* api_setting) { hr = IMMDevice_Activate(device, IID_IAudioClient, CLSCTX_ALL, NULL, (void **) &api_setting->audio_handle); if (FAILED(hr)) { - LOG(true, "Wasapi: Wasapi DeviceActivate failed\n"); + LOG_1("Wasapi: Wasapi DeviceActivate failed\n"); device->Release(); enumerator->Release(); @@ -171,6 +171,7 @@ void audio_free(AudioSetting* setting, WasapiSetting* api_setting) inline uint32 audio_buffer_fillable(const AudioSetting* setting, const WasapiSetting* api_setting) { + PROFILE(PROFILE_AUDIO_BUFFER_FILLABLE); if (!api_setting->audio_handle) { return 0; } @@ -186,6 +187,7 @@ uint32 audio_buffer_fillable(const AudioSetting* setting, const WasapiSetting* a inline void audio_play_buffer(AudioSetting* setting, WasapiSetting* api_setting) { + PROFILE(PROFILE_AUDIO_PLAY_BUFFER); if (!api_setting->audio_handle || setting->sample_buffer_size == 0) { return; } diff --git a/platform/win32/audio/XAudio2.h b/platform/win32/audio/XAudio2.h index dbd1b20..610f18b 100644 --- a/platform/win32/audio/XAudio2.h +++ b/platform/win32/audio/XAudio2.h @@ -37,20 +37,20 @@ void audio_load(HWND hwnd, AudioSetting* setting, XAudio2Setting* api_setting) { CoInitialize(NULL); HMODULE lib = LoadLibraryExA((LPCSTR) "xaudio2_9.dll", NULL, LOAD_LIBRARY_SEARCH_SYSTEM32); if (!lib) { - LOG(true, "Xaudio2: Couldn't load xaudio2_9.dll\n"); + LOG_1("Xaudio2: Couldn't load xaudio2_9.dll\n"); lib = LoadLibraryExA((LPCSTR) "xaudio2_8.dll", NULL, LOAD_LIBRARY_SEARCH_SYSTEM32); } if (!lib) { - LOG(true, "Xaudio2: Couldn't load xaudio2_8.dll\n"); + LOG_1("Xaudio2: Couldn't load xaudio2_8.dll\n"); return; } XAudio2Create_t* XAudio2Create = (XAudio2Create_t *) GetProcAddress(lib, "XAudio2Create"); if (!XAudio2Create || !SUCCEEDED(XAudio2Create(&api_setting->audio_handle, 0, XAUDIO2_DEFAULT_PROCESSOR))) { - LOG(true, "Xaudio2: XAudio2Create failed\n"); + LOG_1("Xaudio2: XAudio2Create failed\n"); return; } @@ -63,7 +63,7 @@ void audio_load(HWND hwnd, AudioSetting* setting, XAudio2Setting* api_setting) { 0, NULL)) ) { - LOG(true, "Xaudio2: CreateMasteringVoice failed\n"); + LOG_1("Xaudio2: CreateMasteringVoice failed\n"); return; } @@ -78,7 +78,7 @@ void audio_load(HWND hwnd, AudioSetting* setting, XAudio2Setting* api_setting) { wf.cbSize = 0; if (!SUCCEEDED(api_setting->audio_handle->CreateSourceVoice(&api_setting->source_voice, &wf))) { - LOG(true, "Xaudio2: CreateSourceVoice failed\n"); + LOG_1("Xaudio2: CreateSourceVoice failed\n"); return; } @@ -159,6 +159,7 @@ void audio_free(AudioSetting* setting, XAudio2Setting* api_setting) inline uint32 audio_buffer_fillable(const AudioSetting* setting, const XAudio2Setting* api_setting) { + PROFILE(PROFILE_AUDIO_BUFFER_FILLABLE); if (!api_setting->source_voice) { return 0; } @@ -174,6 +175,8 @@ uint32 audio_buffer_fillable(const AudioSetting* setting, const XAudio2Setting* inline void audio_play_buffer(AudioSetting* setting, XAudio2Setting* api_setting) { + PROFILE(PROFILE_AUDIO_PLAY_BUFFER); + if (!api_setting->source_voice || setting->sample_buffer_size == 0) { return; } @@ -187,7 +190,7 @@ void audio_play_buffer(AudioSetting* setting, XAudio2Setting* api_setting) { ); if (!SUCCEEDED(api_setting->source_voice->SubmitSourceBuffer(&api_setting->internal_buffer[idx]))) { - LOG(true, "Xaudio2: SubmitSourceBuffer failed\n"); + LOG_1("Xaudio2: SubmitSourceBuffer failed\n"); return; } diff --git a/platform/win32/threading/Atomic.h b/platform/win32/threading/Atomic.h index 44d91a5..b33f5f2 100644 --- a/platform/win32/threading/Atomic.h +++ b/platform/win32/threading/Atomic.h @@ -25,409 +25,409 @@ typedef union { f64 f; LONG64 l; } _atomic_64; // (e.g. see _InterlockedCompareExchange8, it should be _InterlockedCompareExchange8_nf/rel/acq) // To solve this we would probably have to make some of these functions Architecture specific in addition to platform specific -FORCE_INLINE void atomic_set_relaxed(void** target, void* new_pointer) { InterlockedExchangePointerNoFence(target, new_pointer); } -FORCE_INLINE void* atomic_get_relaxed(void** target) { return InterlockedCompareExchangePointerNoFence(target, NULL, NULL); } -FORCE_INLINE void atomic_set_relaxed(volatile int8* value, int8 new_value) { InterlockedExchangeNoFence8((volatile char *) value, new_value); } -FORCE_INLINE void atomic_set_relaxed(volatile int16* value, int16 new_value) { InterlockedExchangeNoFence16((volatile short *) value, new_value); } -FORCE_INLINE void atomic_set_relaxed(volatile int32* value, int32 new_value) { InterlockedExchangeNoFence((volatile long *) value, new_value); } -FORCE_INLINE void atomic_set_relaxed(volatile int64* value, int64 new_value) { InterlockedExchangeNoFence64((volatile LONG64 *) value, (LONG64) new_value); } -FORCE_INLINE void atomic_set_relaxed(volatile f32* value, f32 new_value) { _atomic_32 temp = {.f = new_value}; InterlockedExchangeNoFence((volatile long *) value, (long) temp.l); } -FORCE_INLINE void atomic_set_relaxed(volatile f64* value, f64 new_value) { _atomic_64 temp = {.f = new_value}; InterlockedExchangeNoFence64((volatile LONG64 *) value, (LONG64) temp.l); } -FORCE_INLINE int8 atomic_fetch_set_relaxed(volatile int8* value, int8 new_value) { return (int8) InterlockedExchangeNoFence8((volatile char *) value, (char) new_value); } -FORCE_INLINE int16 atomic_fetch_set_relaxed(volatile int16* value, int16 new_value) { return (int16) InterlockedExchangeNoFence16((volatile short *) value, (short) new_value); } -FORCE_INLINE int32 atomic_fetch_set_relaxed(volatile int32* value, int32 new_value) { return (int32) InterlockedExchangeNoFence((volatile long *) value, new_value); } -FORCE_INLINE int64 atomic_fetch_set_relaxed(volatile int64* value, int64 new_value) { return (int64) InterlockedExchangeNoFence64((volatile LONG64 *) value, (LONG64) new_value); } -FORCE_INLINE int8 atomic_get_relaxed(volatile int8* value) { return (int8) _InterlockedCompareExchange8((volatile char *) value, 0, 0); } -FORCE_INLINE int16 atomic_get_relaxed(volatile int16* value) { return (int16) InterlockedCompareExchangeNoFence16((volatile short *) value, 0, 0); } -FORCE_INLINE int32 atomic_get_relaxed(volatile int32* value) { return (int32) InterlockedCompareExchangeNoFence((volatile long *) value, 0, 0); } -FORCE_INLINE int64 atomic_get_relaxed(volatile int64* value) { return (int64) InterlockedCompareExchangeNoFence64((volatile LONG64 *) value, 0, 0); } -FORCE_INLINE f32 atomic_get_relaxed(volatile f32* value) { _atomic_32 temp = {.l = InterlockedCompareExchangeNoFence((volatile long *) value, 0, 0)}; return temp.f; } -FORCE_INLINE f64 atomic_get_relaxed(volatile f64* value) { _atomic_64 temp = {.l = InterlockedCompareExchangeNoFence64((volatile LONG64 *) value, 0, 0)}; return temp.f; } -FORCE_INLINE void atomic_increment_relaxed(volatile int8* value) { InterlockedExchangeAdd8((volatile char *) value, 1); } -FORCE_INLINE void atomic_decrement_relaxed(volatile int8* value) { InterlockedExchangeAdd8((volatile char *) value, -1); } -FORCE_INLINE void atomic_increment_relaxed(volatile int16* value) { InterlockedIncrementNoFence16((volatile short *) value); } -FORCE_INLINE void atomic_decrement_relaxed(volatile int16* value) { InterlockedDecrementNoFence16((volatile short *) value); } -FORCE_INLINE void atomic_increment_relaxed(volatile int32* value) { InterlockedIncrementNoFence((volatile long *) value); } -FORCE_INLINE void atomic_decrement_relaxed(volatile int32* value) { InterlockedDecrementNoFence((volatile long *) value); } -FORCE_INLINE void atomic_increment_relaxed(volatile int64* value) { InterlockedIncrementNoFence64((volatile LONG64 *) value); } -FORCE_INLINE void atomic_decrement_relaxed(volatile int64* value) { InterlockedDecrementNoFence64((volatile LONG64 *) value); } -FORCE_INLINE void atomic_add_relaxed(volatile int8* value, int8 increment) { InterlockedExchangeAdd8((volatile char *) value, (char) increment); } -FORCE_INLINE void atomic_sub_relaxed(volatile int8* value, int8 decrement) { InterlockedExchangeAdd8((volatile char *) value, -((char) decrement)); } -FORCE_INLINE void atomic_add_relaxed(volatile int16* value, int16 increment) { InterlockedExchangeAdd16((volatile short *) value, (short) increment); } -FORCE_INLINE void atomic_sub_relaxed(volatile int16* value, int16 decrement) { InterlockedExchangeAdd16((volatile short *) value, -((short) decrement)); } -FORCE_INLINE void atomic_add_relaxed(volatile int32* value, int32 increment) { InterlockedAddNoFence((volatile long *) value, increment); } -FORCE_INLINE void atomic_sub_relaxed(volatile int32* value, int32 decrement) { InterlockedAddNoFence((volatile long *) value, -decrement); } -FORCE_INLINE void atomic_add_relaxed(volatile int64* value, int64 increment) { InterlockedAddNoFence64((volatile LONG64 *) value, (LONG64) increment); } -FORCE_INLINE void atomic_sub_relaxed(volatile int64* value, int64 decrement) { InterlockedAddNoFence64((volatile LONG64 *) value, -((LONG64) decrement)); } -FORCE_INLINE f32 atomic_compare_exchange_weak_relaxed(volatile f32* value, f32* expected, f32 desired) { _atomic_32 temp = {.l = InterlockedCompareExchangeNoFence((volatile long *) value, (long) desired, (long) *expected) }; return temp.f; } -FORCE_INLINE f64 atomic_compare_exchange_weak_relaxed(volatile f64* value, f64* expected, f64 desired) { _atomic_64 temp = {.l = InterlockedCompareExchangeNoFence64((volatile LONG64 *) value, (LONG64) desired, (LONG64) *expected) }; return temp.f; } -FORCE_INLINE int32 atomic_compare_exchange_weak_relaxed(volatile int32* value, int32* expected, int32 desired) { return (int32) InterlockedCompareExchangeNoFence((volatile long *) value, desired, *expected); } -FORCE_INLINE int64 atomic_compare_exchange_weak_relaxed(volatile int64* value, int64* expected, int64 desired) { return (int64) InterlockedCompareExchangeNoFence64((volatile LONG64 *) value, (LONG64) desired, (LONG64) *expected); } -FORCE_INLINE int8 atomic_fetch_add_relaxed(volatile int8* value, int8 operand) { return (int8) InterlockedExchangeAdd8((volatile char *) value, (char) operand); } -FORCE_INLINE int8 atomic_fetch_sub_relaxed(volatile int8* value, int8 operand) { return (int8) InterlockedExchangeAdd8((volatile char *) value, -((char) operand)); } -FORCE_INLINE int16 atomic_fetch_add_relaxed(volatile int16* value, int16 operand) { return (int16) InterlockedExchangeAdd16((volatile short *) value, (short) operand); } -FORCE_INLINE int16 atomic_fetch_sub_relaxed(volatile int16* value, int16 operand) { return (int16) InterlockedExchangeAdd16((volatile short *) value, -((short) operand)); } -FORCE_INLINE int32 atomic_fetch_add_relaxed(volatile int32* value, int32 operand) { return (int32) InterlockedExchangeAddNoFence((volatile long *) value, operand); } -FORCE_INLINE int32 atomic_fetch_sub_relaxed(volatile int32* value, int32 operand) { return (int32) InterlockedExchangeAddNoFence((volatile unsigned long *) value, -((long) operand)); } -FORCE_INLINE int64 atomic_fetch_add_relaxed(volatile int64* value, int64 operand) { return (int64) InterlockedExchangeAddNoFence64((volatile LONG64 *) value, (LONG64) operand); } -FORCE_INLINE int64 atomic_fetch_sub_relaxed(volatile int64* value, int64 operand) { return (int64) InterlockedExchangeAdd64((volatile LONG64 *) value, -((LONG64) operand)); } -FORCE_INLINE void atomic_set_relaxed(volatile uint8* value, uint8 new_value) { InterlockedExchangeNoFence8((volatile char *) value, (char) new_value); } -FORCE_INLINE void atomic_set_relaxed(volatile uint16* value, uint16 new_value) { InterlockedExchangeNoFence16((volatile short *) value, (short) new_value); } -FORCE_INLINE void atomic_set_relaxed(volatile uint32* value, uint32 new_value) { InterlockedExchangeNoFence((volatile long *) value, new_value); } -FORCE_INLINE void atomic_set_relaxed(volatile uint64* value, uint64 new_value) { InterlockedExchangeNoFence64((volatile LONG64 *) value, (LONG64) new_value); } -FORCE_INLINE uint8 atomic_fetch_set_relaxed(volatile uint8* value, uint8 new_value) { return (uint8) InterlockedExchangeNoFence8((volatile char *) value, (char) new_value); } -FORCE_INLINE uint16 atomic_fetch_set_relaxed(volatile uint16* value, uint16 new_value) { return (uint16) InterlockedExchangeNoFence16((volatile short *) value, (short) new_value); } -FORCE_INLINE uint32 atomic_fetch_set_relaxed(volatile uint32* value, uint32 new_value) { return (uint32) InterlockedExchangeNoFence((volatile long *) value, new_value); } -FORCE_INLINE uint64 atomic_fetch_set_relaxed(volatile uint64* value, uint64 new_value) { return (uint64) InterlockedExchangeNoFence64((volatile LONG64 *) value, (LONG64) new_value); } -FORCE_INLINE uint8 atomic_get_relaxed(volatile uint8* value) { return (uint8) _InterlockedCompareExchange8((volatile char *) value, 0, 0); } -FORCE_INLINE uint16 atomic_get_relaxed(volatile uint16* value) { return (uint16) InterlockedCompareExchangeNoFence16((volatile short *) value, 0, 0); } -FORCE_INLINE uint32 atomic_get_relaxed(volatile uint32* value) { return (uint32) InterlockedCompareExchangeNoFence((volatile long *) value, 0, 0); } -FORCE_INLINE uint64 atomic_get_relaxed(volatile uint64* value) { return (uint64) InterlockedCompareExchangeNoFence64((volatile LONG64 *) value, 0, 0); } -FORCE_INLINE void atomic_increment_relaxed(volatile uint8* value) { InterlockedExchangeAdd8((volatile char *) value, 1); } -FORCE_INLINE void atomic_decrement_relaxed(volatile uint8* value) { InterlockedExchangeAdd8((volatile char *) value, -1); } -FORCE_INLINE void atomic_increment_relaxed(volatile uint16* value) { InterlockedIncrementNoFence16((volatile short *) value); } -FORCE_INLINE void atomic_decrement_relaxed(volatile uint16* value) { InterlockedDecrementNoFence16((volatile short *) value); } -FORCE_INLINE void atomic_increment_relaxed(volatile uint32* value) { InterlockedIncrementNoFence((volatile long *) value); } -FORCE_INLINE void atomic_decrement_relaxed(volatile uint32* value) { InterlockedDecrementNoFence((volatile long *) value); } -FORCE_INLINE void atomic_increment_relaxed(volatile uint64* value) { InterlockedIncrementNoFence64((volatile LONG64 *) value); } -FORCE_INLINE void atomic_decrement_relaxed(volatile uint64* value) { InterlockedDecrementNoFence64((volatile LONG64 *) value); } -FORCE_INLINE void atomic_add_relaxed(volatile uint8* value, uint8 increment) { InterlockedExchangeAdd8((volatile char *) value, (char) increment); } -FORCE_INLINE void atomic_sub_relaxed(volatile uint8* value, uint8 decrement) { InterlockedExchangeAdd8((volatile char *) value, -((char) decrement)); } -FORCE_INLINE void atomic_add_relaxed(volatile uint16* value, uint16 increment) { InterlockedExchangeAdd16((volatile short *) value, (short) increment); } -FORCE_INLINE void atomic_sub_relaxed(volatile uint16* value, uint16 decrement) { InterlockedExchangeAdd16((volatile short *) value, -((short) decrement)); } -FORCE_INLINE void atomic_add_relaxed(volatile uint32* value, uint32 increment) { InterlockedAddNoFence((volatile long *) value, increment); } -FORCE_INLINE void atomic_sub_relaxed(volatile uint32* value, uint32 decrement) { InterlockedAddNoFence((volatile long *) value, -1 * ((int32) decrement)); } -FORCE_INLINE void atomic_add_relaxed(volatile uint64* value, uint64 increment) { InterlockedAddNoFence64((volatile LONG64 *) value, (LONG64) increment); } -FORCE_INLINE void atomic_sub_relaxed(volatile uint64* value, uint64 decrement) { InterlockedAddNoFence64((volatile LONG64 *) value, -((LONG64) decrement)); } -FORCE_INLINE uint32 atomic_compare_exchange_weak_relaxed(volatile uint32* value, uint32* expected, uint32 desired) { return (uint32) InterlockedCompareExchangeNoFence((volatile long *) value, desired, *expected); } -FORCE_INLINE uint64 atomic_compare_exchange_weak_relaxed(volatile uint64* value, uint64* expected, uint64 desired) { return (uint64) InterlockedCompareExchangeNoFence64((volatile LONG64 *) value, (LONG64) desired, (LONG64) *expected); } -FORCE_INLINE uint8 atomic_fetch_add_relaxed(volatile uint8* value, uint8 operand) { return (uint8) InterlockedExchangeAdd8((volatile char *) value, (char) operand); } -FORCE_INLINE uint8 atomic_fetch_sub_relaxed(volatile uint8* value, uint8 operand) { return (uint8) InterlockedExchangeAdd8((volatile char *) value, -((char) operand)); } -FORCE_INLINE uint16 atomic_fetch_add_relaxed(volatile uint16* value, uint16 operand) { return (uint16) InterlockedExchangeAdd16((volatile short *) value, (short) operand); } -FORCE_INLINE uint16 atomic_fetch_sub_relaxed(volatile uint16* value, uint16 operand) { return (uint16) InterlockedExchangeAdd16((volatile short *) value, -((short) operand)); } -FORCE_INLINE uint32 atomic_fetch_add_relaxed(volatile uint32* value, uint32 operand) { return (uint32) InterlockedExchangeAddNoFence((volatile long *) value, operand); } -FORCE_INLINE uint32 atomic_fetch_sub_relaxed(volatile uint32* value, uint32 operand) { return (uint32) InterlockedExchangeAddNoFence((volatile unsigned long *) value, -((long) operand)); } -FORCE_INLINE uint64 atomic_fetch_add_relaxed(volatile uint64* value, uint64 operand) { return (uint64) InterlockedExchangeAddNoFence64((volatile LONG64 *) value, (LONG64) operand); } -FORCE_INLINE uint64 atomic_fetch_sub_relaxed(volatile uint64* value, uint64 operand) { return (uint64) InterlockedExchangeAdd64((volatile LONG64 *) value, -((LONG64) operand)); } -FORCE_INLINE void atomic_and_relaxed(volatile uint8* value, uint8 mask) { InterlockedAnd8((volatile char *) value, mask); } -FORCE_INLINE void atomic_and_relaxed(volatile int8* value, int8 mask) { InterlockedAnd8((volatile char *) value, mask); } -FORCE_INLINE void atomic_and_relaxed(volatile uint16* value, uint16 mask) { InterlockedAnd16((volatile short *) value, mask); } -FORCE_INLINE void atomic_and_relaxed(volatile int16* value, int16 mask) { InterlockedAnd16((volatile short *) value, mask); } -FORCE_INLINE void atomic_and_relaxed(volatile uint32* value, uint32 mask) { InterlockedAndNoFence((volatile LONG *) value, mask); } -FORCE_INLINE void atomic_and_relaxed(volatile int32* value, int32 mask) { InterlockedAndNoFence((volatile LONG *) value, (LONG)mask); } -FORCE_INLINE void atomic_and_relaxed(volatile uint64* value, uint64 mask) { InterlockedAnd64NoFence((volatile LONG64 *) value, mask); } -FORCE_INLINE void atomic_and_relaxed(volatile int64* value, int64 mask) { InterlockedAnd64NoFence((volatile LONG64 *) value, mask); } -FORCE_INLINE void atomic_or_relaxed(volatile uint8* value, uint8 mask) { InterlockedOr8((volatile char *) value, mask); } -FORCE_INLINE void atomic_or_relaxed(volatile int8* value, int8 mask) { InterlockedOr8((volatile char *) value, mask); } -FORCE_INLINE void atomic_or_relaxed(volatile uint16* value, uint16 mask) { InterlockedOr16((volatile short *) value, mask); } -FORCE_INLINE void atomic_or_relaxed(volatile int16* value, int16 mask) { InterlockedOr16((volatile short *) value, mask); } -FORCE_INLINE void atomic_or_relaxed(volatile uint32* value, uint32 mask) { InterlockedOrNoFence((volatile LONG *) value, mask); } -FORCE_INLINE void atomic_or_relaxed(volatile int32* value, int32 mask) { InterlockedOrNoFence((volatile LONG *) value, (LONG)mask); } -FORCE_INLINE void atomic_or_relaxed(volatile uint64* value, uint64 mask) { InterlockedOr64NoFence((volatile LONG64 *) value, mask); } -FORCE_INLINE void atomic_or_relaxed(volatile int64* value, int64 mask) { InterlockedOr64NoFence((volatile LONG64 *) value, mask); } +FORCE_INLINE void atomic_set_relaxed(void** target, void* new_pointer) noexcept { InterlockedExchangePointerNoFence(target, new_pointer); } +FORCE_INLINE void* atomic_get_relaxed(void** target) noexcept { return InterlockedCompareExchangePointerNoFence(target, NULL, NULL); } +FORCE_INLINE void atomic_set_relaxed(volatile int8* value, int8 new_value) noexcept { InterlockedExchangeNoFence8((volatile char *) value, new_value); } +FORCE_INLINE void atomic_set_relaxed(volatile int16* value, int16 new_value) noexcept { InterlockedExchangeNoFence16((volatile short *) value, new_value); } +FORCE_INLINE void atomic_set_relaxed(volatile int32* value, int32 new_value) noexcept { InterlockedExchangeNoFence((volatile long *) value, new_value); } +FORCE_INLINE void atomic_set_relaxed(volatile int64* value, int64 new_value) noexcept { InterlockedExchangeNoFence64((volatile LONG64 *) value, (LONG64) new_value); } +FORCE_INLINE void atomic_set_relaxed(volatile f32* value, f32 new_value) noexcept { _atomic_32 temp = {.f = new_value}; InterlockedExchangeNoFence((volatile long *) value, (long) temp.l); } +FORCE_INLINE void atomic_set_relaxed(volatile f64* value, f64 new_value) noexcept { _atomic_64 temp = {.f = new_value}; InterlockedExchangeNoFence64((volatile LONG64 *) value, (LONG64) temp.l); } +FORCE_INLINE int8 atomic_fetch_set_relaxed(volatile int8* value, int8 new_value) noexcept { return (int8) InterlockedExchangeNoFence8((volatile char *) value, (char) new_value); } +FORCE_INLINE int16 atomic_fetch_set_relaxed(volatile int16* value, int16 new_value) noexcept { return (int16) InterlockedExchangeNoFence16((volatile short *) value, (short) new_value); } +FORCE_INLINE int32 atomic_fetch_set_relaxed(volatile int32* value, int32 new_value) noexcept { return (int32) InterlockedExchangeNoFence((volatile long *) value, new_value); } +FORCE_INLINE int64 atomic_fetch_set_relaxed(volatile int64* value, int64 new_value) noexcept { return (int64) InterlockedExchangeNoFence64((volatile LONG64 *) value, (LONG64) new_value); } +FORCE_INLINE int8 atomic_get_relaxed(volatile int8* value) noexcept { return (int8) _InterlockedCompareExchange8((volatile char *) value, 0, 0); } +FORCE_INLINE int16 atomic_get_relaxed(volatile int16* value) noexcept { return (int16) InterlockedCompareExchangeNoFence16((volatile short *) value, 0, 0); } +FORCE_INLINE int32 atomic_get_relaxed(volatile int32* value) noexcept { return (int32) InterlockedCompareExchangeNoFence((volatile long *) value, 0, 0); } +FORCE_INLINE int64 atomic_get_relaxed(volatile int64* value) noexcept { return (int64) InterlockedCompareExchangeNoFence64((volatile LONG64 *) value, 0, 0); } +FORCE_INLINE f32 atomic_get_relaxed(volatile f32* value) noexcept { _atomic_32 temp = {.l = InterlockedCompareExchangeNoFence((volatile long *) value, 0, 0)}; return temp.f; } +FORCE_INLINE f64 atomic_get_relaxed(volatile f64* value) noexcept { _atomic_64 temp = {.l = InterlockedCompareExchangeNoFence64((volatile LONG64 *) value, 0, 0)}; return temp.f; } +FORCE_INLINE int8 atomic_increment_relaxed(volatile int8* value) noexcept { return InterlockedExchangeAdd8((volatile char *) value, 1); } +FORCE_INLINE int8 atomic_decrement_relaxed(volatile int8* value) noexcept { return InterlockedExchangeAdd8((volatile char *) value, -1); } +FORCE_INLINE int16 atomic_increment_relaxed(volatile int16* value) noexcept { return InterlockedIncrementNoFence16((volatile short *) value); } +FORCE_INLINE int16 atomic_decrement_relaxed(volatile int16* value) noexcept { return InterlockedDecrementNoFence16((volatile short *) value); } +FORCE_INLINE int32 atomic_increment_relaxed(volatile int32* value) noexcept { return InterlockedIncrementNoFence((volatile long *) value); } +FORCE_INLINE int32 atomic_decrement_relaxed(volatile int32* value) noexcept { return InterlockedDecrementNoFence((volatile long *) value); } +FORCE_INLINE int64 atomic_increment_relaxed(volatile int64* value) noexcept { return InterlockedIncrementNoFence64((volatile LONG64 *) value); } +FORCE_INLINE int64 atomic_decrement_relaxed(volatile int64* value) noexcept { return InterlockedDecrementNoFence64((volatile LONG64 *) value); } +FORCE_INLINE void atomic_add_relaxed(volatile int8* value, int8 increment) noexcept { InterlockedExchangeAdd8((volatile char *) value, (char) increment); } +FORCE_INLINE void atomic_sub_relaxed(volatile int8* value, int8 decrement) noexcept { InterlockedExchangeAdd8((volatile char *) value, -((char) decrement)); } +FORCE_INLINE void atomic_add_relaxed(volatile int16* value, int16 increment) noexcept { InterlockedExchangeAdd16((volatile short *) value, (short) increment); } +FORCE_INLINE void atomic_sub_relaxed(volatile int16* value, int16 decrement) noexcept { InterlockedExchangeAdd16((volatile short *) value, -((short) decrement)); } +FORCE_INLINE void atomic_add_relaxed(volatile int32* value, int32 increment) noexcept { InterlockedAddNoFence((volatile long *) value, increment); } +FORCE_INLINE void atomic_sub_relaxed(volatile int32* value, int32 decrement) noexcept { InterlockedAddNoFence((volatile long *) value, -decrement); } +FORCE_INLINE void atomic_add_relaxed(volatile int64* value, int64 increment) noexcept { InterlockedAddNoFence64((volatile LONG64 *) value, (LONG64) increment); } +FORCE_INLINE void atomic_sub_relaxed(volatile int64* value, int64 decrement) noexcept { InterlockedAddNoFence64((volatile LONG64 *) value, -((LONG64) decrement)); } +FORCE_INLINE f32 atomic_compare_exchange_weak_relaxed(volatile f32* value, f32* expected, f32 desired) noexcept { _atomic_32 temp = {.l = InterlockedCompareExchangeNoFence((volatile long *) value, (long) desired, (long) *expected) }; return temp.f; } +FORCE_INLINE f64 atomic_compare_exchange_weak_relaxed(volatile f64* value, f64* expected, f64 desired) noexcept { _atomic_64 temp = {.l = InterlockedCompareExchangeNoFence64((volatile LONG64 *) value, (LONG64) desired, (LONG64) *expected) }; return temp.f; } +FORCE_INLINE int32 atomic_compare_exchange_weak_relaxed(volatile int32* value, int32* expected, int32 desired) noexcept { return (int32) InterlockedCompareExchangeNoFence((volatile long *) value, desired, *expected); } +FORCE_INLINE int64 atomic_compare_exchange_weak_relaxed(volatile int64* value, int64* expected, int64 desired) noexcept { return (int64) InterlockedCompareExchangeNoFence64((volatile LONG64 *) value, (LONG64) desired, (LONG64) *expected); } +FORCE_INLINE int8 atomic_fetch_add_relaxed(volatile int8* value, int8 operand) noexcept { return (int8) InterlockedExchangeAdd8((volatile char *) value, (char) operand); } +FORCE_INLINE int8 atomic_fetch_sub_relaxed(volatile int8* value, int8 operand) noexcept { return (int8) InterlockedExchangeAdd8((volatile char *) value, -((char) operand)); } +FORCE_INLINE int16 atomic_fetch_add_relaxed(volatile int16* value, int16 operand) noexcept { return (int16) InterlockedExchangeAdd16((volatile short *) value, (short) operand); } +FORCE_INLINE int16 atomic_fetch_sub_relaxed(volatile int16* value, int16 operand) noexcept { return (int16) InterlockedExchangeAdd16((volatile short *) value, -((short) operand)); } +FORCE_INLINE int32 atomic_fetch_add_relaxed(volatile int32* value, int32 operand) noexcept { return (int32) InterlockedExchangeAddNoFence((volatile long *) value, operand); } +FORCE_INLINE int32 atomic_fetch_sub_relaxed(volatile int32* value, int32 operand) noexcept { return (int32) InterlockedExchangeAddNoFence((volatile unsigned long *) value, -((long) operand)); } +FORCE_INLINE int64 atomic_fetch_add_relaxed(volatile int64* value, int64 operand) noexcept { return (int64) InterlockedExchangeAddNoFence64((volatile LONG64 *) value, (LONG64) operand); } +FORCE_INLINE int64 atomic_fetch_sub_relaxed(volatile int64* value, int64 operand) noexcept { return (int64) InterlockedExchangeAdd64((volatile LONG64 *) value, -((LONG64) operand)); } +FORCE_INLINE void atomic_set_relaxed(volatile uint8* value, uint8 new_value) noexcept { InterlockedExchangeNoFence8((volatile char *) value, (char) new_value); } +FORCE_INLINE void atomic_set_relaxed(volatile uint16* value, uint16 new_value) noexcept { InterlockedExchangeNoFence16((volatile short *) value, (short) new_value); } +FORCE_INLINE void atomic_set_relaxed(volatile uint32* value, uint32 new_value) noexcept { InterlockedExchangeNoFence((volatile long *) value, new_value); } +FORCE_INLINE void atomic_set_relaxed(volatile uint64* value, uint64 new_value) noexcept { InterlockedExchangeNoFence64((volatile LONG64 *) value, (LONG64) new_value); } +FORCE_INLINE uint8 atomic_fetch_set_relaxed(volatile uint8* value, uint8 new_value) noexcept { return (uint8) InterlockedExchangeNoFence8((volatile char *) value, (char) new_value); } +FORCE_INLINE uint16 atomic_fetch_set_relaxed(volatile uint16* value, uint16 new_value) noexcept { return (uint16) InterlockedExchangeNoFence16((volatile short *) value, (short) new_value); } +FORCE_INLINE uint32 atomic_fetch_set_relaxed(volatile uint32* value, uint32 new_value) noexcept { return (uint32) InterlockedExchangeNoFence((volatile long *) value, new_value); } +FORCE_INLINE uint64 atomic_fetch_set_relaxed(volatile uint64* value, uint64 new_value) noexcept { return (uint64) InterlockedExchangeNoFence64((volatile LONG64 *) value, (LONG64) new_value); } +FORCE_INLINE uint8 atomic_get_relaxed(volatile uint8* value) noexcept { return (uint8) _InterlockedCompareExchange8((volatile char *) value, 0, 0); } +FORCE_INLINE uint16 atomic_get_relaxed(volatile uint16* value) noexcept { return (uint16) InterlockedCompareExchangeNoFence16((volatile short *) value, 0, 0); } +FORCE_INLINE uint32 atomic_get_relaxed(volatile uint32* value) noexcept { return (uint32) InterlockedCompareExchangeNoFence((volatile long *) value, 0, 0); } +FORCE_INLINE uint64 atomic_get_relaxed(volatile uint64* value) noexcept { return (uint64) InterlockedCompareExchangeNoFence64((volatile LONG64 *) value, 0, 0); } +FORCE_INLINE uint8 atomic_increment_relaxed(volatile uint8* value) noexcept { return InterlockedExchangeAdd8((volatile char *) value, 1); } +FORCE_INLINE uint8 atomic_decrement_relaxed(volatile uint8* value) noexcept { return InterlockedExchangeAdd8((volatile char *) value, -1); } +FORCE_INLINE uint16 atomic_increment_relaxed(volatile uint16* value) noexcept { return InterlockedIncrementNoFence16((volatile short *) value); } +FORCE_INLINE uint16 atomic_decrement_relaxed(volatile uint16* value) noexcept { return InterlockedDecrementNoFence16((volatile short *) value); } +FORCE_INLINE uint32 atomic_increment_relaxed(volatile uint32* value) noexcept { return InterlockedIncrementNoFence((volatile long *) value); } +FORCE_INLINE uint32 atomic_decrement_relaxed(volatile uint32* value) noexcept { return InterlockedDecrementNoFence((volatile long *) value); } +FORCE_INLINE uint64 atomic_increment_relaxed(volatile uint64* value) noexcept { return InterlockedIncrementNoFence64((volatile LONG64 *) value); } +FORCE_INLINE uint64 atomic_decrement_relaxed(volatile uint64* value) noexcept { return InterlockedDecrementNoFence64((volatile LONG64 *) value); } +FORCE_INLINE void atomic_add_relaxed(volatile uint8* value, uint8 increment) noexcept { InterlockedExchangeAdd8((volatile char *) value, (char) increment); } +FORCE_INLINE void atomic_sub_relaxed(volatile uint8* value, uint8 decrement) noexcept { InterlockedExchangeAdd8((volatile char *) value, -((char) decrement)); } +FORCE_INLINE void atomic_add_relaxed(volatile uint16* value, uint16 increment) noexcept { InterlockedExchangeAdd16((volatile short *) value, (short) increment); } +FORCE_INLINE void atomic_sub_relaxed(volatile uint16* value, uint16 decrement) noexcept { InterlockedExchangeAdd16((volatile short *) value, -((short) decrement)); } +FORCE_INLINE void atomic_add_relaxed(volatile uint32* value, uint32 increment) noexcept { InterlockedAddNoFence((volatile long *) value, increment); } +FORCE_INLINE void atomic_sub_relaxed(volatile uint32* value, uint32 decrement) noexcept { InterlockedAddNoFence((volatile long *) value, -1 * ((int32) decrement)); } +FORCE_INLINE void atomic_add_relaxed(volatile uint64* value, uint64 increment) noexcept { InterlockedAddNoFence64((volatile LONG64 *) value, (LONG64) increment); } +FORCE_INLINE void atomic_sub_relaxed(volatile uint64* value, uint64 decrement) noexcept { InterlockedAddNoFence64((volatile LONG64 *) value, -((LONG64) decrement)); } +FORCE_INLINE uint32 atomic_compare_exchange_weak_relaxed(volatile uint32* value, uint32* expected, uint32 desired) noexcept { return (uint32) InterlockedCompareExchangeNoFence((volatile long *) value, desired, *expected); } +FORCE_INLINE uint64 atomic_compare_exchange_weak_relaxed(volatile uint64* value, uint64* expected, uint64 desired) noexcept { return (uint64) InterlockedCompareExchangeNoFence64((volatile LONG64 *) value, (LONG64) desired, (LONG64) *expected); } +FORCE_INLINE uint8 atomic_fetch_add_relaxed(volatile uint8* value, uint8 operand) noexcept { return (uint8) InterlockedExchangeAdd8((volatile char *) value, (char) operand); } +FORCE_INLINE uint8 atomic_fetch_sub_relaxed(volatile uint8* value, uint8 operand) noexcept { return (uint8) InterlockedExchangeAdd8((volatile char *) value, -((char) operand)); } +FORCE_INLINE uint16 atomic_fetch_add_relaxed(volatile uint16* value, uint16 operand) noexcept { return (uint16) InterlockedExchangeAdd16((volatile short *) value, (short) operand); } +FORCE_INLINE uint16 atomic_fetch_sub_relaxed(volatile uint16* value, uint16 operand) noexcept { return (uint16) InterlockedExchangeAdd16((volatile short *) value, -((short) operand)); } +FORCE_INLINE uint32 atomic_fetch_add_relaxed(volatile uint32* value, uint32 operand) noexcept { return (uint32) InterlockedExchangeAddNoFence((volatile long *) value, operand); } +FORCE_INLINE uint32 atomic_fetch_sub_relaxed(volatile uint32* value, uint32 operand) noexcept { return (uint32) InterlockedExchangeAddNoFence((volatile unsigned long *) value, -((long) operand)); } +FORCE_INLINE uint64 atomic_fetch_add_relaxed(volatile uint64* value, uint64 operand) noexcept { return (uint64) InterlockedExchangeAddNoFence64((volatile LONG64 *) value, (LONG64) operand); } +FORCE_INLINE uint64 atomic_fetch_sub_relaxed(volatile uint64* value, uint64 operand) noexcept { return (uint64) InterlockedExchangeAdd64((volatile LONG64 *) value, -((LONG64) operand)); } +FORCE_INLINE void atomic_and_relaxed(volatile uint8* value, uint8 mask) noexcept { InterlockedAnd8((volatile char *) value, mask); } +FORCE_INLINE void atomic_and_relaxed(volatile int8* value, int8 mask) noexcept { InterlockedAnd8((volatile char *) value, mask); } +FORCE_INLINE void atomic_and_relaxed(volatile uint16* value, uint16 mask) noexcept { InterlockedAnd16((volatile short *) value, mask); } +FORCE_INLINE void atomic_and_relaxed(volatile int16* value, int16 mask) noexcept { InterlockedAnd16((volatile short *) value, mask); } +FORCE_INLINE void atomic_and_relaxed(volatile uint32* value, uint32 mask) noexcept { InterlockedAndNoFence((volatile LONG *) value, mask); } +FORCE_INLINE void atomic_and_relaxed(volatile int32* value, int32 mask) noexcept { InterlockedAndNoFence((volatile LONG *) value, (LONG)mask); } +FORCE_INLINE void atomic_and_relaxed(volatile uint64* value, uint64 mask) noexcept { InterlockedAnd64NoFence((volatile LONG64 *) value, mask); } +FORCE_INLINE void atomic_and_relaxed(volatile int64* value, int64 mask) noexcept { InterlockedAnd64NoFence((volatile LONG64 *) value, mask); } +FORCE_INLINE void atomic_or_relaxed(volatile uint8* value, uint8 mask) noexcept { InterlockedOr8((volatile char *) value, mask); } +FORCE_INLINE void atomic_or_relaxed(volatile int8* value, int8 mask) noexcept { InterlockedOr8((volatile char *) value, mask); } +FORCE_INLINE void atomic_or_relaxed(volatile uint16* value, uint16 mask) noexcept { InterlockedOr16((volatile short *) value, mask); } +FORCE_INLINE void atomic_or_relaxed(volatile int16* value, int16 mask) noexcept { InterlockedOr16((volatile short *) value, mask); } +FORCE_INLINE void atomic_or_relaxed(volatile uint32* value, uint32 mask) noexcept { InterlockedOrNoFence((volatile LONG *) value, mask); } +FORCE_INLINE void atomic_or_relaxed(volatile int32* value, int32 mask) noexcept { InterlockedOrNoFence((volatile LONG *) value, (LONG)mask); } +FORCE_INLINE void atomic_or_relaxed(volatile uint64* value, uint64 mask) noexcept { InterlockedOr64NoFence((volatile LONG64 *) value, mask); } +FORCE_INLINE void atomic_or_relaxed(volatile int64* value, int64 mask) noexcept { InterlockedOr64NoFence((volatile LONG64 *) value, mask); } -FORCE_INLINE void atomic_set_acquire(void** target, void* new_pointer) { InterlockedExchangePointerAcquire(target, new_pointer); } -FORCE_INLINE void* atomic_get_acquire(void** target) { return InterlockedCompareExchangePointerAcquire(target, NULL, NULL); } -FORCE_INLINE void atomic_set_acquire(volatile int8* value, int8 new_value) { InterlockedExchangeAcquire8((volatile char *) value, new_value); } -FORCE_INLINE void atomic_set_acquire(volatile int16* value, int16 new_value) { InterlockedExchangeAcquire16((volatile short *) value, new_value); } -FORCE_INLINE void atomic_set_acquire(volatile int32* value, int32 new_value) { InterlockedExchangeAcquire((volatile long *) value, new_value); } -FORCE_INLINE void atomic_set_acquire(volatile int64* value, int64 new_value) { InterlockedExchangeAcquire64((volatile LONG64 *) value, (LONG64) new_value); } -FORCE_INLINE void atomic_set_acquire(volatile f32* value, f32 new_value) { _atomic_32 temp = {.f = new_value}; InterlockedExchangeAcquire((volatile long *) value, (long) temp.l); } -FORCE_INLINE void atomic_set_acquire(volatile f64* value, f64 new_value) { _atomic_64 temp = {.f = new_value}; InterlockedExchangeAcquire64((volatile LONG64 *) value, (LONG64) temp.l); } -FORCE_INLINE int8 atomic_fetch_set_acquire(volatile int8* value, int8 new_value) { return (int8) InterlockedExchangeAcquire8((volatile char *) value, (char) new_value); } -FORCE_INLINE int16 atomic_fetch_set_acquire(volatile int16* value, int16 new_value) { return (int16) InterlockedExchangeAcquire16((volatile short *) value, (short) new_value); } -FORCE_INLINE int32 atomic_fetch_set_acquire(volatile int32* value, int32 new_value) { return (int32) InterlockedExchangeAcquire((volatile long *) value, new_value); } -FORCE_INLINE int64 atomic_fetch_set_acquire(volatile int64* value, int64 new_value) { return (int64) InterlockedExchangeAcquire64((volatile LONG64 *) value, (LONG64) new_value); } -FORCE_INLINE int8 atomic_get_acquire(volatile int8* value) { return (int8) _InterlockedCompareExchange8((volatile char *) value, 0, 0); } -FORCE_INLINE int16 atomic_get_acquire(volatile int16* value) { return (int16) InterlockedCompareExchangeAcquire16((volatile short *) value, 0, 0); } -FORCE_INLINE int32 atomic_get_acquire(volatile int32* value) { return (int32) InterlockedCompareExchangeAcquire((volatile long *) value, 0, 0); } -FORCE_INLINE int64 atomic_get_acquire(volatile int64* value) { return (int64) InterlockedCompareExchangeAcquire64((volatile LONG64 *) value, 0, 0); } -FORCE_INLINE f32 atomic_get_acquire(volatile f32* value) { _atomic_32 temp = {.l = InterlockedCompareExchangeAcquire((volatile long *) value, 0, 0)}; return temp.f; } -FORCE_INLINE f64 atomic_get_acquire(volatile f64* value) { _atomic_64 temp = {.l = InterlockedCompareExchangeAcquire64((volatile LONG64 *) value, 0, 0)}; return temp.f; } -FORCE_INLINE void atomic_increment_acquire(volatile int8* value) { InterlockedExchangeAdd8((volatile char *) value, 1); } -FORCE_INLINE void atomic_decrement_acquire(volatile int8* value) { InterlockedExchangeAdd8((volatile char *) value, -1); } -FORCE_INLINE void atomic_increment_acquire(volatile int16* value) { InterlockedIncrementAcquire16((volatile short *) value); } -FORCE_INLINE void atomic_decrement_acquire(volatile int16* value) { InterlockedDecrementAcquire16((volatile short *) value); } -FORCE_INLINE void atomic_increment_acquire(volatile int32* value) { InterlockedIncrementAcquire((volatile long *) value); } -FORCE_INLINE void atomic_decrement_acquire(volatile int32* value) { InterlockedDecrementAcquire((volatile long *) value); } -FORCE_INLINE void atomic_increment_acquire(volatile int64* value) { InterlockedIncrementAcquire64((volatile LONG64 *) value); } -FORCE_INLINE void atomic_decrement_acquire(volatile int64* value) { InterlockedDecrementAcquire64((volatile LONG64 *) value); } -FORCE_INLINE void atomic_add_acquire(volatile int8* value, int8 increment) { InterlockedExchangeAdd8((volatile char *) value, (char) increment); } -FORCE_INLINE void atomic_sub_acquire(volatile int8* value, int8 decrement) { InterlockedExchangeAdd8((volatile char *) value, -((char) decrement)); } -FORCE_INLINE void atomic_add_acquire(volatile int16* value, int16 increment) { InterlockedExchangeAdd16((volatile short *) value, (short) increment); } -FORCE_INLINE void atomic_sub_acquire(volatile int16* value, int16 decrement) { InterlockedExchangeAdd16((volatile short *) value, -((short) decrement)); } -FORCE_INLINE void atomic_add_acquire(volatile int32* value, int32 increment) { InterlockedAddAcquire((volatile long *) value, increment); } -FORCE_INLINE void atomic_sub_acquire(volatile int32* value, int32 decrement) { InterlockedAddAcquire((volatile long *) value, -decrement); } -FORCE_INLINE void atomic_add_acquire(volatile int64* value, int64 increment) { InterlockedAddAcquire64((volatile LONG64 *) value, (LONG64) increment); } -FORCE_INLINE void atomic_sub_acquire(volatile int64* value, int64 decrement) { InterlockedAddAcquire64((volatile LONG64 *) value, -((LONG64) decrement)); } -FORCE_INLINE f32 atomic_compare_exchange_weak_acquire(volatile f32* value, f32* expected, f32 desired) { _atomic_32 temp = {.l = InterlockedCompareExchangeAcquire((volatile long *) value, (long) desired, (long) *expected) }; return temp.f; } -FORCE_INLINE f64 atomic_compare_exchange_weak_acquire(volatile f64* value, f64* expected, f64 desired) { _atomic_64 temp = {.l = InterlockedCompareExchangeAcquire64((volatile LONG64 *) value, (LONG64) desired, (LONG64) *expected) }; return temp.f; } -FORCE_INLINE int32 atomic_compare_exchange_weak_acquire(volatile int32* value, int32* expected, int32 desired) { return (int32) InterlockedCompareExchangeAcquire((volatile long *) value, desired, *expected); } -FORCE_INLINE int64 atomic_compare_exchange_weak_acquire(volatile int64* value, int64* expected, int64 desired) { return (int64) InterlockedCompareExchangeAcquire64((volatile LONG64 *) value, (LONG64) desired, (LONG64) *expected); } -FORCE_INLINE int8 atomic_fetch_add_acquire(volatile int8* value, int8 operand) { return (int8) InterlockedExchangeAdd8((volatile char *) value, (char) operand); } -FORCE_INLINE int8 atomic_fetch_sub_acquire(volatile int8* value, int8 operand) { return (int8) InterlockedExchangeAdd8((volatile char *) value, -((char) operand)); } -FORCE_INLINE int16 atomic_fetch_add_acquire(volatile int16* value, int16 operand) { return (int16) InterlockedExchangeAdd16((volatile short *) value, (short) operand); } -FORCE_INLINE int16 atomic_fetch_sub_acquire(volatile int16* value, int16 operand) { return (int16) InterlockedExchangeAdd16((volatile short *) value, -((short) operand)); } -FORCE_INLINE int32 atomic_fetch_add_acquire(volatile int32* value, int32 operand) { return (int32) InterlockedExchangeAddAcquire((volatile long *) value, operand); } -FORCE_INLINE int32 atomic_fetch_sub_acquire(volatile int32* value, int32 operand) { return (int32) InterlockedExchangeAddAcquire((volatile unsigned long *) value, -((long) operand)); } -FORCE_INLINE int64 atomic_fetch_add_acquire(volatile int64* value, int64 operand) { return (int64) InterlockedExchangeAddAcquire64((volatile LONG64 *) value, (LONG64) operand); } -FORCE_INLINE int64 atomic_fetch_sub_acquire(volatile int64* value, int64 operand) { return (int64) InterlockedExchangeAdd64((volatile LONG64 *) value, -((LONG64) operand)); } -FORCE_INLINE void atomic_set_acquire(volatile uint8* value, uint8 new_value) { InterlockedExchangeAcquire8((volatile char *) value, (char) new_value); } -FORCE_INLINE void atomic_set_acquire(volatile uint16* value, uint16 new_value) { InterlockedExchangeAcquire16((volatile short *) value, (short) new_value); } -FORCE_INLINE void atomic_set_acquire(volatile uint32* value, uint32 new_value) { InterlockedExchangeAcquire((volatile long *) value, new_value); } -FORCE_INLINE void atomic_set_acquire(volatile uint64* value, uint64 new_value) { InterlockedExchangeAcquire64((volatile LONG64 *) value, (LONG64) new_value); } -FORCE_INLINE uint8 atomic_fetch_set_acquire(volatile uint8* value, uint8 new_value) { return (uint8) InterlockedExchangeAcquire8((volatile char *) value, (char) new_value); } -FORCE_INLINE uint16 atomic_fetch_set_acquire(volatile uint16* value, uint16 new_value) { return (uint16) InterlockedExchangeAcquire16((volatile short *) value, (short) new_value); } -FORCE_INLINE uint32 atomic_fetch_set_acquire(volatile uint32* value, uint32 new_value) { return (uint32) InterlockedExchangeAcquire((volatile long *) value, new_value); } -FORCE_INLINE uint64 atomic_fetch_set_acquire(volatile uint64* value, uint64 new_value) { return (uint64) InterlockedExchangeAcquire64((volatile LONG64 *) value, (LONG64) new_value); } -FORCE_INLINE uint8 atomic_get_acquire(volatile uint8* value) { return (uint8) _InterlockedCompareExchange8((volatile char *) value, 0, 0); } -FORCE_INLINE uint16 atomic_get_acquire(volatile uint16* value) { return (uint16) InterlockedCompareExchangeAcquire16((volatile short *) value, 0, 0); } -FORCE_INLINE uint32 atomic_get_acquire(volatile uint32* value) { return (uint32) InterlockedCompareExchangeAcquire((volatile long *) value, 0, 0); } -FORCE_INLINE uint64 atomic_get_acquire(volatile uint64* value) { return (uint64) InterlockedCompareExchangeAcquire64((volatile LONG64 *) value, 0, 0); } -FORCE_INLINE void atomic_increment_acquire(volatile uint8* value) { InterlockedExchangeAdd8((volatile char *) value, 1); } -FORCE_INLINE void atomic_decrement_acquire(volatile uint8* value) { InterlockedExchangeAdd8((volatile char *) value, -1); } -FORCE_INLINE void atomic_increment_acquire(volatile uint16* value) { InterlockedIncrementAcquire16((volatile short *) value); } -FORCE_INLINE void atomic_decrement_acquire(volatile uint16* value) { InterlockedDecrementAcquire16((volatile short *) value); } -FORCE_INLINE void atomic_increment_acquire(volatile uint32* value) { InterlockedIncrementAcquire((volatile long *) value); } -FORCE_INLINE void atomic_decrement_acquire(volatile uint32* value) { InterlockedDecrementAcquire((volatile long *) value); } -FORCE_INLINE void atomic_increment_acquire(volatile uint64* value) { InterlockedIncrementAcquire64((volatile LONG64 *) value); } -FORCE_INLINE void atomic_decrement_acquire(volatile uint64* value) { InterlockedDecrementAcquire64((volatile LONG64 *) value); } -FORCE_INLINE void atomic_add_acquire(volatile uint8* value, uint8 increment) { InterlockedExchangeAdd8((volatile char *) value, (char) increment); } -FORCE_INLINE void atomic_sub_acquire(volatile uint8* value, uint8 decrement) { InterlockedExchangeAdd8((volatile char *) value, -((char) decrement)); } -FORCE_INLINE void atomic_add_acquire(volatile uint16* value, uint16 increment) { InterlockedExchangeAdd16((volatile short *) value, (short) increment); } -FORCE_INLINE void atomic_sub_acquire(volatile uint16* value, uint16 decrement) { InterlockedExchangeAdd16((volatile short *) value, -((short) decrement)); } -FORCE_INLINE void atomic_add_acquire(volatile uint32* value, uint32 increment) { InterlockedAddAcquire((volatile long *) value, increment); } -FORCE_INLINE void atomic_sub_acquire(volatile uint32* value, uint32 decrement) { InterlockedAddAcquire((volatile long *) value, -1 * ((int32) decrement)); } -FORCE_INLINE void atomic_add_acquire(volatile uint64* value, uint64 increment) { InterlockedAddAcquire64((volatile LONG64 *) value, (LONG64) increment); } -FORCE_INLINE void atomic_sub_acquire(volatile uint64* value, uint64 decrement) { InterlockedAddAcquire64((volatile LONG64 *) value, -((LONG64) decrement)); } -FORCE_INLINE uint32 atomic_compare_exchange_weak_acquire(volatile uint32* value, uint32* expected, uint32 desired) { return (uint32) InterlockedCompareExchangeAcquire((volatile long *) value, desired, *expected); } -FORCE_INLINE uint64 atomic_compare_exchange_weak_acquire(volatile uint64* value, uint64* expected, uint64 desired) { return (uint64) InterlockedCompareExchangeAcquire64((volatile LONG64 *) value, (LONG64) desired, (LONG64) *expected); } -FORCE_INLINE uint8 atomic_fetch_add_acquire(volatile uint8* value, uint8 operand) { return (uint8) InterlockedExchangeAdd8((volatile char *) value, (char) operand); } -FORCE_INLINE uint8 atomic_fetch_sub_acquire(volatile uint8* value, uint8 operand) { return (uint8) InterlockedExchangeAdd8((volatile char *) value, -((char) operand)); } -FORCE_INLINE uint16 atomic_fetch_add_acquire(volatile uint16* value, uint16 operand) { return (uint16) InterlockedExchangeAdd16((volatile short *) value, (short) operand); } -FORCE_INLINE uint16 atomic_fetch_sub_acquire(volatile uint16* value, uint16 operand) { return (uint16) InterlockedExchangeAdd16((volatile short *) value, -((short) operand)); } -FORCE_INLINE uint32 atomic_fetch_add_acquire(volatile uint32* value, uint32 operand) { return (uint32) InterlockedExchangeAddAcquire((volatile long *) value, operand); } -FORCE_INLINE uint32 atomic_fetch_sub_acquire(volatile uint32* value, uint32 operand) { return (uint32) InterlockedExchangeAddAcquire((volatile unsigned long *) value, -((long) operand)); } -FORCE_INLINE uint64 atomic_fetch_add_acquire(volatile uint64* value, uint64 operand) { return (uint64) InterlockedExchangeAddAcquire64((volatile LONG64 *) value, (LONG64) operand); } -FORCE_INLINE uint64 atomic_fetch_sub_acquire(volatile uint64* value, uint64 operand) { return (uint64) InterlockedExchangeAdd64((volatile LONG64 *) value, -((LONG64) operand)); } -FORCE_INLINE void atomic_and_acquire(volatile uint8* value, uint8 mask) { InterlockedAnd8((volatile char *) value, mask); } -FORCE_INLINE void atomic_and_acquire(volatile int8* value, int8 mask) { InterlockedAnd8((volatile char *) value, mask); } -FORCE_INLINE void atomic_and_acquire(volatile uint16* value, uint16 mask) { InterlockedAnd16((volatile short *) value, mask); } -FORCE_INLINE void atomic_and_acquire(volatile int16* value, int16 mask) { InterlockedAnd16((volatile short *) value, mask); } -FORCE_INLINE void atomic_and_acquire(volatile uint32* value, uint32 mask) { InterlockedAndAcquire((volatile LONG *) value, mask); } -FORCE_INLINE void atomic_and_acquire(volatile int32* value, int32 mask) { InterlockedAndAcquire((volatile LONG *) value, (LONG)mask); } -FORCE_INLINE void atomic_and_acquire(volatile uint64* value, uint64 mask) { InterlockedAnd64Acquire((volatile LONG64 *) value, mask); } -FORCE_INLINE void atomic_and_acquire(volatile int64* value, int64 mask) { InterlockedAnd64Acquire((volatile LONG64 *) value, mask); } -FORCE_INLINE void atomic_or_acquire(volatile uint8* value, uint8 mask) { InterlockedOr8((volatile char *) value, mask); } -FORCE_INLINE void atomic_or_acquire(volatile int8* value, int8 mask) { InterlockedOr8((volatile char *) value, mask); } -FORCE_INLINE void atomic_or_acquire(volatile uint16* value, uint16 mask) { InterlockedOr16((volatile short *) value, mask); } -FORCE_INLINE void atomic_or_acquire(volatile int16* value, int16 mask) { InterlockedOr16((volatile short *) value, mask); } -FORCE_INLINE void atomic_or_acquire(volatile uint32* value, uint32 mask) { InterlockedOrAcquire((volatile LONG *) value, mask); } -FORCE_INLINE void atomic_or_acquire(volatile int32* value, int32 mask) { InterlockedOrAcquire((volatile LONG *) value, (LONG)mask); } -FORCE_INLINE void atomic_or_acquire(volatile uint64* value, uint64 mask) { InterlockedOr64Acquire((volatile LONG64 *) value, mask); } -FORCE_INLINE void atomic_or_acquire(volatile int64* value, int64 mask) { InterlockedOr64Acquire((volatile LONG64 *) value, mask); } +FORCE_INLINE void atomic_set_acquire(void** target, void* new_pointer) noexcept { InterlockedExchangePointerAcquire(target, new_pointer); } +FORCE_INLINE void* atomic_get_acquire(void** target) noexcept { return InterlockedCompareExchangePointerAcquire(target, NULL, NULL); } +FORCE_INLINE void atomic_set_acquire(volatile int8* value, int8 new_value) noexcept { InterlockedExchangeAcquire8((volatile char *) value, new_value); } +FORCE_INLINE void atomic_set_acquire(volatile int16* value, int16 new_value) noexcept { InterlockedExchangeAcquire16((volatile short *) value, new_value); } +FORCE_INLINE void atomic_set_acquire(volatile int32* value, int32 new_value) noexcept { InterlockedExchangeAcquire((volatile long *) value, new_value); } +FORCE_INLINE void atomic_set_acquire(volatile int64* value, int64 new_value) noexcept { InterlockedExchangeAcquire64((volatile LONG64 *) value, (LONG64) new_value); } +FORCE_INLINE void atomic_set_acquire(volatile f32* value, f32 new_value) noexcept { _atomic_32 temp = {.f = new_value}; InterlockedExchangeAcquire((volatile long *) value, (long) temp.l); } +FORCE_INLINE void atomic_set_acquire(volatile f64* value, f64 new_value) noexcept { _atomic_64 temp = {.f = new_value}; InterlockedExchangeAcquire64((volatile LONG64 *) value, (LONG64) temp.l); } +FORCE_INLINE int8 atomic_fetch_set_acquire(volatile int8* value, int8 new_value) noexcept { return (int8) InterlockedExchangeAcquire8((volatile char *) value, (char) new_value); } +FORCE_INLINE int16 atomic_fetch_set_acquire(volatile int16* value, int16 new_value) noexcept { return (int16) InterlockedExchangeAcquire16((volatile short *) value, (short) new_value); } +FORCE_INLINE int32 atomic_fetch_set_acquire(volatile int32* value, int32 new_value) noexcept { return (int32) InterlockedExchangeAcquire((volatile long *) value, new_value); } +FORCE_INLINE int64 atomic_fetch_set_acquire(volatile int64* value, int64 new_value) noexcept { return (int64) InterlockedExchangeAcquire64((volatile LONG64 *) value, (LONG64) new_value); } +FORCE_INLINE int8 atomic_get_acquire(volatile int8* value) noexcept { return (int8) _InterlockedCompareExchange8((volatile char *) value, 0, 0); } +FORCE_INLINE int16 atomic_get_acquire(volatile int16* value) noexcept { return (int16) InterlockedCompareExchangeAcquire16((volatile short *) value, 0, 0); } +FORCE_INLINE int32 atomic_get_acquire(volatile int32* value) noexcept { return (int32) InterlockedCompareExchangeAcquire((volatile long *) value, 0, 0); } +FORCE_INLINE int64 atomic_get_acquire(volatile int64* value) noexcept { return (int64) InterlockedCompareExchangeAcquire64((volatile LONG64 *) value, 0, 0); } +FORCE_INLINE f32 atomic_get_acquire(volatile f32* value) noexcept { _atomic_32 temp = {.l = InterlockedCompareExchangeAcquire((volatile long *) value, 0, 0)}; return temp.f; } +FORCE_INLINE f64 atomic_get_acquire(volatile f64* value) noexcept { _atomic_64 temp = {.l = InterlockedCompareExchangeAcquire64((volatile LONG64 *) value, 0, 0)}; return temp.f; } +FORCE_INLINE int8 atomic_increment_acquire(volatile int8* value) noexcept { return InterlockedExchangeAdd8((volatile char *) value, 1); } +FORCE_INLINE int8 atomic_decrement_acquire(volatile int8* value) noexcept { return InterlockedExchangeAdd8((volatile char *) value, -1); } +FORCE_INLINE int16 atomic_increment_acquire(volatile int16* value) noexcept { return InterlockedIncrementAcquire16((volatile short *) value); } +FORCE_INLINE int16 atomic_decrement_acquire(volatile int16* value) noexcept { return InterlockedDecrementAcquire16((volatile short *) value); } +FORCE_INLINE int32 atomic_increment_acquire(volatile int32* value) noexcept { return InterlockedIncrementAcquire((volatile long *) value); } +FORCE_INLINE int32 atomic_decrement_acquire(volatile int32* value) noexcept { return InterlockedDecrementAcquire((volatile long *) value); } +FORCE_INLINE int64 atomic_increment_acquire(volatile int64* value) noexcept { return InterlockedIncrementAcquire64((volatile LONG64 *) value); } +FORCE_INLINE int64 atomic_decrement_acquire(volatile int64* value) noexcept { return InterlockedDecrementAcquire64((volatile LONG64 *) value); } +FORCE_INLINE void atomic_add_acquire(volatile int8* value, int8 increment) noexcept { InterlockedExchangeAdd8((volatile char *) value, (char) increment); } +FORCE_INLINE void atomic_sub_acquire(volatile int8* value, int8 decrement) noexcept { InterlockedExchangeAdd8((volatile char *) value, -((char) decrement)); } +FORCE_INLINE void atomic_add_acquire(volatile int16* value, int16 increment) noexcept { InterlockedExchangeAdd16((volatile short *) value, (short) increment); } +FORCE_INLINE void atomic_sub_acquire(volatile int16* value, int16 decrement) noexcept { InterlockedExchangeAdd16((volatile short *) value, -((short) decrement)); } +FORCE_INLINE void atomic_add_acquire(volatile int32* value, int32 increment) noexcept { InterlockedAddAcquire((volatile long *) value, increment); } +FORCE_INLINE void atomic_sub_acquire(volatile int32* value, int32 decrement) noexcept { InterlockedAddAcquire((volatile long *) value, -decrement); } +FORCE_INLINE void atomic_add_acquire(volatile int64* value, int64 increment) noexcept { InterlockedAddAcquire64((volatile LONG64 *) value, (LONG64) increment); } +FORCE_INLINE void atomic_sub_acquire(volatile int64* value, int64 decrement) noexcept { InterlockedAddAcquire64((volatile LONG64 *) value, -((LONG64) decrement)); } +FORCE_INLINE f32 atomic_compare_exchange_weak_acquire(volatile f32* value, f32* expected, f32 desired) noexcept { _atomic_32 temp = {.l = InterlockedCompareExchangeAcquire((volatile long *) value, (long) desired, (long) *expected) }; return temp.f; } +FORCE_INLINE f64 atomic_compare_exchange_weak_acquire(volatile f64* value, f64* expected, f64 desired) noexcept { _atomic_64 temp = {.l = InterlockedCompareExchangeAcquire64((volatile LONG64 *) value, (LONG64) desired, (LONG64) *expected) }; return temp.f; } +FORCE_INLINE int32 atomic_compare_exchange_weak_acquire(volatile int32* value, int32* expected, int32 desired) noexcept { return (int32) InterlockedCompareExchangeAcquire((volatile long *) value, desired, *expected); } +FORCE_INLINE int64 atomic_compare_exchange_weak_acquire(volatile int64* value, int64* expected, int64 desired) noexcept { return (int64) InterlockedCompareExchangeAcquire64((volatile LONG64 *) value, (LONG64) desired, (LONG64) *expected); } +FORCE_INLINE int8 atomic_fetch_add_acquire(volatile int8* value, int8 operand) noexcept { return (int8) InterlockedExchangeAdd8((volatile char *) value, (char) operand); } +FORCE_INLINE int8 atomic_fetch_sub_acquire(volatile int8* value, int8 operand) noexcept { return (int8) InterlockedExchangeAdd8((volatile char *) value, -((char) operand)); } +FORCE_INLINE int16 atomic_fetch_add_acquire(volatile int16* value, int16 operand) noexcept { return (int16) InterlockedExchangeAdd16((volatile short *) value, (short) operand); } +FORCE_INLINE int16 atomic_fetch_sub_acquire(volatile int16* value, int16 operand) noexcept { return (int16) InterlockedExchangeAdd16((volatile short *) value, -((short) operand)); } +FORCE_INLINE int32 atomic_fetch_add_acquire(volatile int32* value, int32 operand) noexcept { return (int32) InterlockedExchangeAddAcquire((volatile long *) value, operand); } +FORCE_INLINE int32 atomic_fetch_sub_acquire(volatile int32* value, int32 operand) noexcept { return (int32) InterlockedExchangeAddAcquire((volatile unsigned long *) value, -((long) operand)); } +FORCE_INLINE int64 atomic_fetch_add_acquire(volatile int64* value, int64 operand) noexcept { return (int64) InterlockedExchangeAddAcquire64((volatile LONG64 *) value, (LONG64) operand); } +FORCE_INLINE int64 atomic_fetch_sub_acquire(volatile int64* value, int64 operand) noexcept { return (int64) InterlockedExchangeAdd64((volatile LONG64 *) value, -((LONG64) operand)); } +FORCE_INLINE void atomic_set_acquire(volatile uint8* value, uint8 new_value) noexcept { InterlockedExchangeAcquire8((volatile char *) value, (char) new_value); } +FORCE_INLINE void atomic_set_acquire(volatile uint16* value, uint16 new_value) noexcept { InterlockedExchangeAcquire16((volatile short *) value, (short) new_value); } +FORCE_INLINE void atomic_set_acquire(volatile uint32* value, uint32 new_value) noexcept { InterlockedExchangeAcquire((volatile long *) value, new_value); } +FORCE_INLINE void atomic_set_acquire(volatile uint64* value, uint64 new_value) noexcept { InterlockedExchangeAcquire64((volatile LONG64 *) value, (LONG64) new_value); } +FORCE_INLINE uint8 atomic_fetch_set_acquire(volatile uint8* value, uint8 new_value) noexcept { return (uint8) InterlockedExchangeAcquire8((volatile char *) value, (char) new_value); } +FORCE_INLINE uint16 atomic_fetch_set_acquire(volatile uint16* value, uint16 new_value) noexcept { return (uint16) InterlockedExchangeAcquire16((volatile short *) value, (short) new_value); } +FORCE_INLINE uint32 atomic_fetch_set_acquire(volatile uint32* value, uint32 new_value) noexcept { return (uint32) InterlockedExchangeAcquire((volatile long *) value, new_value); } +FORCE_INLINE uint64 atomic_fetch_set_acquire(volatile uint64* value, uint64 new_value) noexcept { return (uint64) InterlockedExchangeAcquire64((volatile LONG64 *) value, (LONG64) new_value); } +FORCE_INLINE uint8 atomic_get_acquire(volatile uint8* value) noexcept { return (uint8) _InterlockedCompareExchange8((volatile char *) value, 0, 0); } +FORCE_INLINE uint16 atomic_get_acquire(volatile uint16* value) noexcept { return (uint16) InterlockedCompareExchangeAcquire16((volatile short *) value, 0, 0); } +FORCE_INLINE uint32 atomic_get_acquire(volatile uint32* value) noexcept { return (uint32) InterlockedCompareExchangeAcquire((volatile long *) value, 0, 0); } +FORCE_INLINE uint64 atomic_get_acquire(volatile uint64* value) noexcept { return (uint64) InterlockedCompareExchangeAcquire64((volatile LONG64 *) value, 0, 0); } +FORCE_INLINE uint8 atomic_increment_acquire(volatile uint8* value) noexcept { return InterlockedExchangeAdd8((volatile char *) value, 1); } +FORCE_INLINE uint8 atomic_decrement_acquire(volatile uint8* value) noexcept { return InterlockedExchangeAdd8((volatile char *) value, -1); } +FORCE_INLINE uint16 atomic_increment_acquire(volatile uint16* value) noexcept { return InterlockedIncrementAcquire16((volatile short *) value); } +FORCE_INLINE uint16 atomic_decrement_acquire(volatile uint16* value) noexcept { return InterlockedDecrementAcquire16((volatile short *) value); } +FORCE_INLINE uint32 atomic_increment_acquire(volatile uint32* value) noexcept { return InterlockedIncrementAcquire((volatile long *) value); } +FORCE_INLINE uint32 atomic_decrement_acquire(volatile uint32* value) noexcept { return InterlockedDecrementAcquire((volatile long *) value); } +FORCE_INLINE uint64 atomic_increment_acquire(volatile uint64* value) noexcept { return InterlockedIncrementAcquire64((volatile LONG64 *) value); } +FORCE_INLINE uint64 atomic_decrement_acquire(volatile uint64* value) noexcept { return InterlockedDecrementAcquire64((volatile LONG64 *) value); } +FORCE_INLINE void atomic_add_acquire(volatile uint8* value, uint8 increment) noexcept { InterlockedExchangeAdd8((volatile char *) value, (char) increment); } +FORCE_INLINE void atomic_sub_acquire(volatile uint8* value, uint8 decrement) noexcept { InterlockedExchangeAdd8((volatile char *) value, -((char) decrement)); } +FORCE_INLINE void atomic_add_acquire(volatile uint16* value, uint16 increment) noexcept { InterlockedExchangeAdd16((volatile short *) value, (short) increment); } +FORCE_INLINE void atomic_sub_acquire(volatile uint16* value, uint16 decrement) noexcept { InterlockedExchangeAdd16((volatile short *) value, -((short) decrement)); } +FORCE_INLINE void atomic_add_acquire(volatile uint32* value, uint32 increment) noexcept { InterlockedAddAcquire((volatile long *) value, increment); } +FORCE_INLINE void atomic_sub_acquire(volatile uint32* value, uint32 decrement) noexcept { InterlockedAddAcquire((volatile long *) value, -1 * ((int32) decrement)); } +FORCE_INLINE void atomic_add_acquire(volatile uint64* value, uint64 increment) noexcept { InterlockedAddAcquire64((volatile LONG64 *) value, (LONG64) increment); } +FORCE_INLINE void atomic_sub_acquire(volatile uint64* value, uint64 decrement) noexcept { InterlockedAddAcquire64((volatile LONG64 *) value, -((LONG64) decrement)); } +FORCE_INLINE uint32 atomic_compare_exchange_weak_acquire(volatile uint32* value, uint32* expected, uint32 desired) noexcept { return (uint32) InterlockedCompareExchangeAcquire((volatile long *) value, desired, *expected); } +FORCE_INLINE uint64 atomic_compare_exchange_weak_acquire(volatile uint64* value, uint64* expected, uint64 desired) noexcept { return (uint64) InterlockedCompareExchangeAcquire64((volatile LONG64 *) value, (LONG64) desired, (LONG64) *expected); } +FORCE_INLINE uint8 atomic_fetch_add_acquire(volatile uint8* value, uint8 operand) noexcept { return (uint8) InterlockedExchangeAdd8((volatile char *) value, (char) operand); } +FORCE_INLINE uint8 atomic_fetch_sub_acquire(volatile uint8* value, uint8 operand) noexcept { return (uint8) InterlockedExchangeAdd8((volatile char *) value, -((char) operand)); } +FORCE_INLINE uint16 atomic_fetch_add_acquire(volatile uint16* value, uint16 operand) noexcept { return (uint16) InterlockedExchangeAdd16((volatile short *) value, (short) operand); } +FORCE_INLINE uint16 atomic_fetch_sub_acquire(volatile uint16* value, uint16 operand) noexcept { return (uint16) InterlockedExchangeAdd16((volatile short *) value, -((short) operand)); } +FORCE_INLINE uint32 atomic_fetch_add_acquire(volatile uint32* value, uint32 operand) noexcept { return (uint32) InterlockedExchangeAddAcquire((volatile long *) value, operand); } +FORCE_INLINE uint32 atomic_fetch_sub_acquire(volatile uint32* value, uint32 operand) noexcept { return (uint32) InterlockedExchangeAddAcquire((volatile unsigned long *) value, -((long) operand)); } +FORCE_INLINE uint64 atomic_fetch_add_acquire(volatile uint64* value, uint64 operand) noexcept { return (uint64) InterlockedExchangeAddAcquire64((volatile LONG64 *) value, (LONG64) operand); } +FORCE_INLINE uint64 atomic_fetch_sub_acquire(volatile uint64* value, uint64 operand) noexcept { return (uint64) InterlockedExchangeAdd64((volatile LONG64 *) value, -((LONG64) operand)); } +FORCE_INLINE void atomic_and_acquire(volatile uint8* value, uint8 mask) noexcept { InterlockedAnd8((volatile char *) value, mask); } +FORCE_INLINE void atomic_and_acquire(volatile int8* value, int8 mask) noexcept { InterlockedAnd8((volatile char *) value, mask); } +FORCE_INLINE void atomic_and_acquire(volatile uint16* value, uint16 mask) noexcept { InterlockedAnd16((volatile short *) value, mask); } +FORCE_INLINE void atomic_and_acquire(volatile int16* value, int16 mask) noexcept { InterlockedAnd16((volatile short *) value, mask); } +FORCE_INLINE void atomic_and_acquire(volatile uint32* value, uint32 mask) noexcept { InterlockedAndAcquire((volatile LONG *) value, mask); } +FORCE_INLINE void atomic_and_acquire(volatile int32* value, int32 mask) noexcept { InterlockedAndAcquire((volatile LONG *) value, (LONG)mask); } +FORCE_INLINE void atomic_and_acquire(volatile uint64* value, uint64 mask) noexcept { InterlockedAnd64Acquire((volatile LONG64 *) value, mask); } +FORCE_INLINE void atomic_and_acquire(volatile int64* value, int64 mask) noexcept { InterlockedAnd64Acquire((volatile LONG64 *) value, mask); } +FORCE_INLINE void atomic_or_acquire(volatile uint8* value, uint8 mask) noexcept { InterlockedOr8((volatile char *) value, mask); } +FORCE_INLINE void atomic_or_acquire(volatile int8* value, int8 mask) noexcept { InterlockedOr8((volatile char *) value, mask); } +FORCE_INLINE void atomic_or_acquire(volatile uint16* value, uint16 mask) noexcept { InterlockedOr16((volatile short *) value, mask); } +FORCE_INLINE void atomic_or_acquire(volatile int16* value, int16 mask) noexcept { InterlockedOr16((volatile short *) value, mask); } +FORCE_INLINE void atomic_or_acquire(volatile uint32* value, uint32 mask) noexcept { InterlockedOrAcquire((volatile LONG *) value, mask); } +FORCE_INLINE void atomic_or_acquire(volatile int32* value, int32 mask) noexcept { InterlockedOrAcquire((volatile LONG *) value, (LONG)mask); } +FORCE_INLINE void atomic_or_acquire(volatile uint64* value, uint64 mask) noexcept { InterlockedOr64Acquire((volatile LONG64 *) value, mask); } +FORCE_INLINE void atomic_or_acquire(volatile int64* value, int64 mask) noexcept { InterlockedOr64Acquire((volatile LONG64 *) value, mask); } -FORCE_INLINE void atomic_set_release(void** target, void* new_pointer) { InterlockedExchangePointer(target, new_pointer); } -FORCE_INLINE void* atomic_get_release(void** target) { return InterlockedCompareExchangePointerRelease(target, NULL, NULL); } -FORCE_INLINE void atomic_set_release(volatile int8* value, int8 new_value) { InterlockedExchange8((volatile char *) value, new_value); } -FORCE_INLINE void atomic_set_release(volatile int16* value, int16 new_value) { InterlockedExchange16((volatile short *) value, new_value); } -FORCE_INLINE void atomic_set_release(volatile int32* value, int32 new_value) { InterlockedExchange((volatile long *) value, new_value); } -FORCE_INLINE void atomic_set_release(volatile int64* value, int64 new_value) { InterlockedExchange64((volatile LONG64 *) value, (LONG64) new_value); } -FORCE_INLINE void atomic_set_release(volatile f32* value, f32 new_value) { _atomic_32 temp = {.f = new_value}; InterlockedExchange((volatile long *) value, (long) temp.l); } -FORCE_INLINE void atomic_set_release(volatile f64* value, f64 new_value) { _atomic_64 temp = {.f = new_value}; InterlockedExchange64((volatile LONG64 *) value, (LONG64) temp.l); } -FORCE_INLINE int8 atomic_fetch_set_release(volatile int8* value, int8 new_value) { return (int8) InterlockedExchange8((volatile char *) value, (char) new_value); } -FORCE_INLINE int16 atomic_fetch_set_release(volatile int16* value, int16 new_value) { return (int16) InterlockedExchange16((volatile short *) value, (short) new_value); } -FORCE_INLINE int32 atomic_fetch_set_release(volatile int32* value, int32 new_value) { return (int32) InterlockedExchange((volatile long *) value, new_value); } -FORCE_INLINE int64 atomic_fetch_set_release(volatile int64* value, int64 new_value) { return (int64) InterlockedExchange64((volatile LONG64 *) value, (LONG64) new_value); } -FORCE_INLINE int8 atomic_get_release(volatile int8* value) { return (int8) _InterlockedCompareExchange8((volatile char *) value, 0, 0); } -FORCE_INLINE int16 atomic_get_release(volatile int16* value) { return (int16) InterlockedCompareExchangeRelease16((volatile short *) value, 0, 0); } -FORCE_INLINE int32 atomic_get_release(volatile int32* value) { return (int32) InterlockedCompareExchangeRelease((volatile long *) value, 0, 0); } -FORCE_INLINE int64 atomic_get_release(volatile int64* value) { return (int64) InterlockedCompareExchangeRelease64((volatile LONG64 *) value, 0, 0); } -FORCE_INLINE f32 atomic_get_release(volatile f32* value) { _atomic_32 temp = {.l = InterlockedCompareExchangeRelease((volatile long *) value, 0, 0)}; return temp.f; } -FORCE_INLINE f64 atomic_get_release(volatile f64* value) { _atomic_64 temp = {.l = InterlockedCompareExchangeRelease64((volatile LONG64 *) value, 0, 0)}; return temp.f; } -FORCE_INLINE void atomic_increment_release(volatile int8* value) { InterlockedExchangeAdd8((volatile char *) value, 1); } -FORCE_INLINE void atomic_decrement_release(volatile int8* value) { InterlockedExchangeAdd8((volatile char *) value, -1); } -FORCE_INLINE void atomic_increment_release(volatile int16* value) { InterlockedIncrementRelease16((volatile short *) value); } -FORCE_INLINE void atomic_decrement_release(volatile int16* value) { InterlockedDecrementRelease16((volatile short *) value); } -FORCE_INLINE void atomic_increment_release(volatile int32* value) { InterlockedIncrementRelease((volatile long *) value); } -FORCE_INLINE void atomic_decrement_release(volatile int32* value) { InterlockedDecrementRelease((volatile long *) value); } -FORCE_INLINE void atomic_increment_release(volatile int64* value) { InterlockedIncrementRelease64((volatile LONG64 *) value); } -FORCE_INLINE void atomic_decrement_release(volatile int64* value) { InterlockedDecrementRelease64((volatile LONG64 *) value); } -FORCE_INLINE void atomic_add_release(volatile int8* value, int8 increment) { InterlockedExchangeAdd8((volatile char *) value, (char) increment); } -FORCE_INLINE void atomic_sub_release(volatile int8* value, int8 decrement) { InterlockedExchangeAdd8((volatile char *) value, -((char) decrement)); } -FORCE_INLINE void atomic_add_release(volatile int16* value, int16 increment) { InterlockedExchangeAdd16((volatile short *) value, (short) increment); } -FORCE_INLINE void atomic_sub_release(volatile int16* value, int16 decrement) { InterlockedExchangeAdd16((volatile short *) value, -((short) decrement)); } -FORCE_INLINE void atomic_add_release(volatile int32* value, int32 increment) { InterlockedAddRelease((volatile long *) value, increment); } -FORCE_INLINE void atomic_sub_release(volatile int32* value, int32 decrement) { InterlockedAddRelease((volatile long *) value, -decrement); } -FORCE_INLINE void atomic_add_release(volatile int64* value, int64 increment) { InterlockedAddRelease64((volatile LONG64 *) value, (LONG64) increment); } -FORCE_INLINE void atomic_sub_release(volatile int64* value, int64 decrement) { InterlockedAddRelease64((volatile LONG64 *) value, -((LONG64) decrement)); } -FORCE_INLINE f32 atomic_compare_exchange_weak_release(volatile f32* value, f32* expected, f32 desired) { _atomic_32 temp = {.l = InterlockedCompareExchangeRelease((volatile long *) value, (long) desired, (long) *expected) }; return temp.f; } -FORCE_INLINE f64 atomic_compare_exchange_weak_release(volatile f64* value, f64* expected, f64 desired) { _atomic_64 temp = {.l = InterlockedCompareExchangeRelease64((volatile LONG64 *) value, (LONG64) desired, (LONG64) *expected) }; return temp.f; } -FORCE_INLINE int32 atomic_compare_exchange_weak_release(volatile int32* value, int32* expected, int32 desired) { return (int32) InterlockedCompareExchangeRelease((volatile long *) value, desired, *expected); } -FORCE_INLINE int64 atomic_compare_exchange_weak_release(volatile int64* value, int64* expected, int64 desired) { return (int64) InterlockedCompareExchangeRelease64((volatile LONG64 *) value, (LONG64) desired, (LONG64) *expected); } -FORCE_INLINE int8 atomic_fetch_add_release(volatile int8* value, int8 operand) { return (int8) InterlockedExchangeAdd8((volatile char *) value, (char) operand); } -FORCE_INLINE int8 atomic_fetch_sub_release(volatile int8* value, int8 operand) { return (int8) InterlockedExchangeAdd8((volatile char *) value, -((char) operand)); } -FORCE_INLINE int16 atomic_fetch_add_release(volatile int16* value, int16 operand) { return (int16) InterlockedExchangeAdd16((volatile short *) value, (short) operand); } -FORCE_INLINE int16 atomic_fetch_sub_release(volatile int16* value, int16 operand) { return (int16) InterlockedExchangeAdd16((volatile short *) value, -((short) operand)); } -FORCE_INLINE int32 atomic_fetch_add_release(volatile int32* value, int32 operand) { return (int32) InterlockedExchangeAddRelease((volatile long *) value, operand); } -FORCE_INLINE int32 atomic_fetch_sub_release(volatile int32* value, int32 operand) { return (int32) InterlockedExchangeAddRelease((volatile unsigned long *) value, -((long) operand)); } -FORCE_INLINE int64 atomic_fetch_add_release(volatile int64* value, int64 operand) { return (int64) InterlockedExchangeAddRelease64((volatile LONG64 *) value, (LONG64) operand); } -FORCE_INLINE int64 atomic_fetch_sub_release(volatile int64* value, int64 operand) { return (int64) InterlockedExchangeAdd64((volatile LONG64 *) value, -((LONG64) operand)); } -FORCE_INLINE void atomic_set_release(volatile uint8* value, uint8 new_value) { InterlockedExchange8((volatile char *) value, (char) new_value); } -FORCE_INLINE void atomic_set_release(volatile uint16* value, uint16 new_value) { InterlockedExchange16((volatile short *) value, (short) new_value); } -FORCE_INLINE void atomic_set_release(volatile uint32* value, uint32 new_value) { InterlockedExchange((volatile long *) value, new_value); } -FORCE_INLINE void atomic_set_release(volatile uint64* value, uint64 new_value) { InterlockedExchange64((volatile LONG64 *) value, (LONG64) new_value); } -FORCE_INLINE uint8 atomic_fetch_set_release(volatile uint8* value, uint8 new_value) { return (uint8) InterlockedExchange8((volatile char *) value, (char) new_value); } -FORCE_INLINE uint16 atomic_fetch_set_release(volatile uint16* value, uint16 new_value) { return (uint16) InterlockedExchange16((volatile short *) value, (short) new_value); } -FORCE_INLINE uint32 atomic_fetch_set_release(volatile uint32* value, uint32 new_value) { return (uint32) InterlockedExchange((volatile long *) value, new_value); } -FORCE_INLINE uint64 atomic_fetch_set_release(volatile uint64* value, uint64 new_value) { return (uint64) InterlockedExchange64((volatile LONG64 *) value, (LONG64) new_value); } -FORCE_INLINE uint8 atomic_get_release(volatile uint8* value) { return (uint8) _InterlockedCompareExchange8((volatile char *) value, 0, 0); } -FORCE_INLINE uint16 atomic_get_release(volatile uint16* value) { return (uint16) InterlockedCompareExchangeRelease16((volatile short *) value, 0, 0); } -FORCE_INLINE uint32 atomic_get_release(volatile uint32* value) { return (uint32) InterlockedCompareExchangeRelease((volatile long *) value, 0, 0); } -FORCE_INLINE uint64 atomic_get_release(volatile uint64* value) { return (uint64) InterlockedCompareExchangeRelease64((volatile LONG64 *) value, 0, 0); } -FORCE_INLINE void atomic_increment_release(volatile uint8* value) { InterlockedExchangeAdd8((volatile char *) value, 1); } -FORCE_INLINE void atomic_decrement_release(volatile uint8* value) { InterlockedExchangeAdd8((volatile char *) value, -1); } -FORCE_INLINE void atomic_increment_release(volatile uint16* value) { InterlockedIncrementRelease16((volatile short *) value); } -FORCE_INLINE void atomic_decrement_release(volatile uint16* value) { InterlockedDecrementRelease16((volatile short *) value); } -FORCE_INLINE void atomic_increment_release(volatile uint32* value) { InterlockedIncrementRelease((volatile long *) value); } -FORCE_INLINE void atomic_decrement_release(volatile uint32* value) { InterlockedDecrementRelease((volatile long *) value); } -FORCE_INLINE void atomic_increment_release(volatile uint64* value) { InterlockedIncrementRelease64((volatile LONG64 *) value); } -FORCE_INLINE void atomic_decrement_release(volatile uint64* value) { InterlockedDecrementRelease64((volatile LONG64 *) value); } -FORCE_INLINE void atomic_add_release(volatile uint8* value, uint8 increment) { InterlockedExchangeAdd8((volatile char *) value, (char) increment); } -FORCE_INLINE void atomic_sub_release(volatile uint8* value, uint8 decrement) { InterlockedExchangeAdd8((volatile char *) value, -((char) decrement)); } -FORCE_INLINE void atomic_add_release(volatile uint16* value, uint16 increment) { InterlockedExchangeAdd16((volatile short *) value, (short) increment); } -FORCE_INLINE void atomic_sub_release(volatile uint16* value, uint16 decrement) { InterlockedExchangeAdd16((volatile short *) value, -((short) decrement)); } -FORCE_INLINE void atomic_add_release(volatile uint32* value, uint32 increment) { InterlockedAddRelease((volatile long *) value, increment); } -FORCE_INLINE void atomic_sub_release(volatile uint32* value, uint32 decrement) { InterlockedAddRelease((volatile long *) value, -1 * ((int32) decrement)); } -FORCE_INLINE void atomic_add_release(volatile uint64* value, uint64 increment) { InterlockedAddRelease64((volatile LONG64 *) value, (LONG64) increment); } -FORCE_INLINE void atomic_sub_release(volatile uint64* value, uint64 decrement) { InterlockedAddRelease64((volatile LONG64 *) value, -((LONG64) decrement)); } -FORCE_INLINE uint32 atomic_compare_exchange_weak_release(volatile uint32* value, uint32* expected, uint32 desired) { return (uint32) InterlockedCompareExchangeRelease((volatile long *) value, desired, *expected); } -FORCE_INLINE uint64 atomic_compare_exchange_weak_release(volatile uint64* value, uint64* expected, uint64 desired) { return (uint64) InterlockedCompareExchangeRelease64((volatile LONG64 *) value, (LONG64) desired, (LONG64) *expected); } -FORCE_INLINE uint8 atomic_fetch_add_release(volatile uint8* value, uint8 operand) { return (uint8) InterlockedExchangeAdd8((volatile char *) value, (char) operand); } -FORCE_INLINE uint8 atomic_fetch_sub_release(volatile uint8* value, uint8 operand) { return (uint8) InterlockedExchangeAdd8((volatile char *) value, -((char) operand)); } -FORCE_INLINE uint16 atomic_fetch_add_release(volatile uint16* value, uint16 operand) { return (uint16) InterlockedExchangeAdd16((volatile short *) value, (short) operand); } -FORCE_INLINE uint16 atomic_fetch_sub_release(volatile uint16* value, uint16 operand) { return (uint16) InterlockedExchangeAdd16((volatile short *) value, -((short) operand)); } -FORCE_INLINE uint32 atomic_fetch_add_release(volatile uint32* value, uint32 operand) { return (uint32) InterlockedExchangeAddRelease((volatile long *) value, operand); } -FORCE_INLINE uint32 atomic_fetch_sub_release(volatile uint32* value, uint32 operand) { return (uint32) InterlockedExchangeAddRelease((volatile unsigned long *) value, -((long) operand)); } -FORCE_INLINE uint64 atomic_fetch_add_release(volatile uint64* value, uint64 operand) { return (uint64) InterlockedExchangeAddRelease64((volatile LONG64 *) value, (LONG64) operand); } -FORCE_INLINE uint64 atomic_fetch_sub_release(volatile uint64* value, uint64 operand) { return (uint64) InterlockedExchangeAdd64((volatile LONG64 *) value, -((LONG64) operand)); } -FORCE_INLINE void atomic_and_release(volatile uint8* value, uint8 mask) { InterlockedAnd8((volatile char *) value, mask); } -FORCE_INLINE void atomic_and_release(volatile int8* value, int8 mask) { InterlockedAnd8((volatile char *) value, mask); } -FORCE_INLINE void atomic_and_release(volatile uint16* value, uint16 mask) { InterlockedAnd16((volatile short *) value, mask); } -FORCE_INLINE void atomic_and_release(volatile int16* value, int16 mask) { InterlockedAnd16((volatile short *) value, mask); } -FORCE_INLINE void atomic_and_release(volatile uint32* value, uint32 mask) { InterlockedAndRelease((volatile LONG *) value, mask); } -FORCE_INLINE void atomic_and_release(volatile int32* value, int32 mask) { InterlockedAndRelease((volatile LONG *) value, (LONG)mask); } -FORCE_INLINE void atomic_and_release(volatile uint64* value, uint64 mask) { InterlockedAnd64Release((volatile LONG64 *) value, mask); } -FORCE_INLINE void atomic_and_release(volatile int64* value, int64 mask) { InterlockedAnd64Release((volatile LONG64 *) value, mask); } -FORCE_INLINE void atomic_or_release(volatile uint8* value, uint8 mask) { InterlockedOr8((volatile char *) value, mask); } -FORCE_INLINE void atomic_or_release(volatile int8* value, int8 mask) { InterlockedOr8((volatile char *) value, mask); } -FORCE_INLINE void atomic_or_release(volatile uint16* value, uint16 mask) { InterlockedOr16((volatile short *) value, mask); } -FORCE_INLINE void atomic_or_release(volatile int16* value, int16 mask) { InterlockedOr16((volatile short *) value, mask); } -FORCE_INLINE void atomic_or_release(volatile uint32* value, uint32 mask) { InterlockedOrRelease((volatile LONG *) value, mask); } -FORCE_INLINE void atomic_or_release(volatile int32* value, int32 mask) { InterlockedOrRelease((volatile LONG *) value, (LONG)mask); } -FORCE_INLINE void atomic_or_release(volatile uint64* value, uint64 mask) { InterlockedOr64Release((volatile LONG64 *) value, mask); } -FORCE_INLINE void atomic_or_release(volatile int64* value, int64 mask) { InterlockedOr64Release((volatile LONG64 *) value, mask); } +FORCE_INLINE void atomic_set_release(void** target, void* new_pointer) noexcept { InterlockedExchangePointer(target, new_pointer); } +FORCE_INLINE void* atomic_get_release(void** target) noexcept { return InterlockedCompareExchangePointerRelease(target, NULL, NULL); } +FORCE_INLINE void atomic_set_release(volatile int8* value, int8 new_value) noexcept { InterlockedExchange8((volatile char *) value, new_value); } +FORCE_INLINE void atomic_set_release(volatile int16* value, int16 new_value) noexcept { InterlockedExchange16((volatile short *) value, new_value); } +FORCE_INLINE void atomic_set_release(volatile int32* value, int32 new_value) noexcept { InterlockedExchange((volatile long *) value, new_value); } +FORCE_INLINE void atomic_set_release(volatile int64* value, int64 new_value) noexcept { InterlockedExchange64((volatile LONG64 *) value, (LONG64) new_value); } +FORCE_INLINE void atomic_set_release(volatile f32* value, f32 new_value) noexcept { _atomic_32 temp = {.f = new_value}; InterlockedExchange((volatile long *) value, (long) temp.l); } +FORCE_INLINE void atomic_set_release(volatile f64* value, f64 new_value) noexcept { _atomic_64 temp = {.f = new_value}; InterlockedExchange64((volatile LONG64 *) value, (LONG64) temp.l); } +FORCE_INLINE int8 atomic_fetch_set_release(volatile int8* value, int8 new_value) noexcept { return (int8) InterlockedExchange8((volatile char *) value, (char) new_value); } +FORCE_INLINE int16 atomic_fetch_set_release(volatile int16* value, int16 new_value) noexcept { return (int16) InterlockedExchange16((volatile short *) value, (short) new_value); } +FORCE_INLINE int32 atomic_fetch_set_release(volatile int32* value, int32 new_value) noexcept { return (int32) InterlockedExchange((volatile long *) value, new_value); } +FORCE_INLINE int64 atomic_fetch_set_release(volatile int64* value, int64 new_value) noexcept { return (int64) InterlockedExchange64((volatile LONG64 *) value, (LONG64) new_value); } +FORCE_INLINE int8 atomic_get_release(volatile int8* value) noexcept { return (int8) _InterlockedCompareExchange8((volatile char *) value, 0, 0); } +FORCE_INLINE int16 atomic_get_release(volatile int16* value) noexcept { return (int16) InterlockedCompareExchangeRelease16((volatile short *) value, 0, 0); } +FORCE_INLINE int32 atomic_get_release(volatile int32* value) noexcept { return (int32) InterlockedCompareExchangeRelease((volatile long *) value, 0, 0); } +FORCE_INLINE int64 atomic_get_release(volatile int64* value) noexcept { return (int64) InterlockedCompareExchangeRelease64((volatile LONG64 *) value, 0, 0); } +FORCE_INLINE f32 atomic_get_release(volatile f32* value) noexcept { _atomic_32 temp = {.l = InterlockedCompareExchangeRelease((volatile long *) value, 0, 0)}; return temp.f; } +FORCE_INLINE f64 atomic_get_release(volatile f64* value) noexcept { _atomic_64 temp = {.l = InterlockedCompareExchangeRelease64((volatile LONG64 *) value, 0, 0)}; return temp.f; } +FORCE_INLINE int8 atomic_increment_release(volatile int8* value) noexcept { return InterlockedExchangeAdd8((volatile char *) value, 1); } +FORCE_INLINE int8 atomic_decrement_release(volatile int8* value) noexcept { return InterlockedExchangeAdd8((volatile char *) value, -1); } +FORCE_INLINE int16 atomic_increment_release(volatile int16* value) noexcept { return InterlockedIncrementRelease16((volatile short *) value); } +FORCE_INLINE int16 atomic_decrement_release(volatile int16* value) noexcept { return InterlockedDecrementRelease16((volatile short *) value); } +FORCE_INLINE int32 atomic_increment_release(volatile int32* value) noexcept { return InterlockedIncrementRelease((volatile long *) value); } +FORCE_INLINE int32 atomic_decrement_release(volatile int32* value) noexcept { return InterlockedDecrementRelease((volatile long *) value); } +FORCE_INLINE int64 atomic_increment_release(volatile int64* value) noexcept { return InterlockedIncrementRelease64((volatile LONG64 *) value); } +FORCE_INLINE int64 atomic_decrement_release(volatile int64* value) noexcept { return InterlockedDecrementRelease64((volatile LONG64 *) value); } +FORCE_INLINE void atomic_add_release(volatile int8* value, int8 increment) noexcept { InterlockedExchangeAdd8((volatile char *) value, (char) increment); } +FORCE_INLINE void atomic_sub_release(volatile int8* value, int8 decrement) noexcept { InterlockedExchangeAdd8((volatile char *) value, -((char) decrement)); } +FORCE_INLINE void atomic_add_release(volatile int16* value, int16 increment) noexcept { InterlockedExchangeAdd16((volatile short *) value, (short) increment); } +FORCE_INLINE void atomic_sub_release(volatile int16* value, int16 decrement) noexcept { InterlockedExchangeAdd16((volatile short *) value, -((short) decrement)); } +FORCE_INLINE void atomic_add_release(volatile int32* value, int32 increment) noexcept { InterlockedAddRelease((volatile long *) value, increment); } +FORCE_INLINE void atomic_sub_release(volatile int32* value, int32 decrement) noexcept { InterlockedAddRelease((volatile long *) value, -decrement); } +FORCE_INLINE void atomic_add_release(volatile int64* value, int64 increment) noexcept { InterlockedAddRelease64((volatile LONG64 *) value, (LONG64) increment); } +FORCE_INLINE void atomic_sub_release(volatile int64* value, int64 decrement) noexcept { InterlockedAddRelease64((volatile LONG64 *) value, -((LONG64) decrement)); } +FORCE_INLINE f32 atomic_compare_exchange_weak_release(volatile f32* value, f32* expected, f32 desired) noexcept { _atomic_32 temp = {.l = InterlockedCompareExchangeRelease((volatile long *) value, (long) desired, (long) *expected) }; return temp.f; } +FORCE_INLINE f64 atomic_compare_exchange_weak_release(volatile f64* value, f64* expected, f64 desired) noexcept { _atomic_64 temp = {.l = InterlockedCompareExchangeRelease64((volatile LONG64 *) value, (LONG64) desired, (LONG64) *expected) }; return temp.f; } +FORCE_INLINE int32 atomic_compare_exchange_weak_release(volatile int32* value, int32* expected, int32 desired) noexcept { return (int32) InterlockedCompareExchangeRelease((volatile long *) value, desired, *expected); } +FORCE_INLINE int64 atomic_compare_exchange_weak_release(volatile int64* value, int64* expected, int64 desired) noexcept { return (int64) InterlockedCompareExchangeRelease64((volatile LONG64 *) value, (LONG64) desired, (LONG64) *expected); } +FORCE_INLINE int8 atomic_fetch_add_release(volatile int8* value, int8 operand) noexcept { return (int8) InterlockedExchangeAdd8((volatile char *) value, (char) operand); } +FORCE_INLINE int8 atomic_fetch_sub_release(volatile int8* value, int8 operand) noexcept { return (int8) InterlockedExchangeAdd8((volatile char *) value, -((char) operand)); } +FORCE_INLINE int16 atomic_fetch_add_release(volatile int16* value, int16 operand) noexcept { return (int16) InterlockedExchangeAdd16((volatile short *) value, (short) operand); } +FORCE_INLINE int16 atomic_fetch_sub_release(volatile int16* value, int16 operand) noexcept { return (int16) InterlockedExchangeAdd16((volatile short *) value, -((short) operand)); } +FORCE_INLINE int32 atomic_fetch_add_release(volatile int32* value, int32 operand) noexcept { return (int32) InterlockedExchangeAddRelease((volatile long *) value, operand); } +FORCE_INLINE int32 atomic_fetch_sub_release(volatile int32* value, int32 operand) noexcept { return (int32) InterlockedExchangeAddRelease((volatile unsigned long *) value, -((long) operand)); } +FORCE_INLINE int64 atomic_fetch_add_release(volatile int64* value, int64 operand) noexcept { return (int64) InterlockedExchangeAddRelease64((volatile LONG64 *) value, (LONG64) operand); } +FORCE_INLINE int64 atomic_fetch_sub_release(volatile int64* value, int64 operand) noexcept { return (int64) InterlockedExchangeAdd64((volatile LONG64 *) value, -((LONG64) operand)); } +FORCE_INLINE void atomic_set_release(volatile uint8* value, uint8 new_value) noexcept { InterlockedExchange8((volatile char *) value, (char) new_value); } +FORCE_INLINE void atomic_set_release(volatile uint16* value, uint16 new_value) noexcept { InterlockedExchange16((volatile short *) value, (short) new_value); } +FORCE_INLINE void atomic_set_release(volatile uint32* value, uint32 new_value) noexcept { InterlockedExchange((volatile long *) value, new_value); } +FORCE_INLINE void atomic_set_release(volatile uint64* value, uint64 new_value) noexcept { InterlockedExchange64((volatile LONG64 *) value, (LONG64) new_value); } +FORCE_INLINE uint8 atomic_fetch_set_release(volatile uint8* value, uint8 new_value) noexcept { return (uint8) InterlockedExchange8((volatile char *) value, (char) new_value); } +FORCE_INLINE uint16 atomic_fetch_set_release(volatile uint16* value, uint16 new_value) noexcept { return (uint16) InterlockedExchange16((volatile short *) value, (short) new_value); } +FORCE_INLINE uint32 atomic_fetch_set_release(volatile uint32* value, uint32 new_value) noexcept { return (uint32) InterlockedExchange((volatile long *) value, new_value); } +FORCE_INLINE uint64 atomic_fetch_set_release(volatile uint64* value, uint64 new_value) noexcept { return (uint64) InterlockedExchange64((volatile LONG64 *) value, (LONG64) new_value); } +FORCE_INLINE uint8 atomic_get_release(volatile uint8* value) noexcept { return (uint8) _InterlockedCompareExchange8((volatile char *) value, 0, 0); } +FORCE_INLINE uint16 atomic_get_release(volatile uint16* value) noexcept { return (uint16) InterlockedCompareExchangeRelease16((volatile short *) value, 0, 0); } +FORCE_INLINE uint32 atomic_get_release(volatile uint32* value) noexcept { return (uint32) InterlockedCompareExchangeRelease((volatile long *) value, 0, 0); } +FORCE_INLINE uint64 atomic_get_release(volatile uint64* value) noexcept { return (uint64) InterlockedCompareExchangeRelease64((volatile LONG64 *) value, 0, 0); } +FORCE_INLINE uint8 atomic_increment_release(volatile uint8* value) noexcept { return InterlockedExchangeAdd8((volatile char *) value, 1); } +FORCE_INLINE uint8 atomic_decrement_release(volatile uint8* value) noexcept { return InterlockedExchangeAdd8((volatile char *) value, -1); } +FORCE_INLINE uint16 atomic_increment_release(volatile uint16* value) noexcept { return InterlockedIncrementRelease16((volatile short *) value); } +FORCE_INLINE uint16 atomic_decrement_release(volatile uint16* value) noexcept { return InterlockedDecrementRelease16((volatile short *) value); } +FORCE_INLINE uint32 atomic_increment_release(volatile uint32* value) noexcept { return InterlockedIncrementRelease((volatile long *) value); } +FORCE_INLINE uint32 atomic_decrement_release(volatile uint32* value) noexcept { return InterlockedDecrementRelease((volatile long *) value); } +FORCE_INLINE uint64 atomic_increment_release(volatile uint64* value) noexcept { return InterlockedIncrementRelease64((volatile LONG64 *) value); } +FORCE_INLINE uint64 atomic_decrement_release(volatile uint64* value) noexcept { return InterlockedDecrementRelease64((volatile LONG64 *) value); } +FORCE_INLINE void atomic_add_release(volatile uint8* value, uint8 increment) noexcept { InterlockedExchangeAdd8((volatile char *) value, (char) increment); } +FORCE_INLINE void atomic_sub_release(volatile uint8* value, uint8 decrement) noexcept { InterlockedExchangeAdd8((volatile char *) value, -((char) decrement)); } +FORCE_INLINE void atomic_add_release(volatile uint16* value, uint16 increment) noexcept { InterlockedExchangeAdd16((volatile short *) value, (short) increment); } +FORCE_INLINE void atomic_sub_release(volatile uint16* value, uint16 decrement) noexcept { InterlockedExchangeAdd16((volatile short *) value, -((short) decrement)); } +FORCE_INLINE void atomic_add_release(volatile uint32* value, uint32 increment) noexcept { InterlockedAddRelease((volatile long *) value, increment); } +FORCE_INLINE void atomic_sub_release(volatile uint32* value, uint32 decrement) noexcept { InterlockedAddRelease((volatile long *) value, -1 * ((int32) decrement)); } +FORCE_INLINE void atomic_add_release(volatile uint64* value, uint64 increment) noexcept { InterlockedAddRelease64((volatile LONG64 *) value, (LONG64) increment); } +FORCE_INLINE void atomic_sub_release(volatile uint64* value, uint64 decrement) noexcept { InterlockedAddRelease64((volatile LONG64 *) value, -((LONG64) decrement)); } +FORCE_INLINE uint32 atomic_compare_exchange_weak_release(volatile uint32* value, uint32* expected, uint32 desired) noexcept { return (uint32) InterlockedCompareExchangeRelease((volatile long *) value, desired, *expected); } +FORCE_INLINE uint64 atomic_compare_exchange_weak_release(volatile uint64* value, uint64* expected, uint64 desired) noexcept { return (uint64) InterlockedCompareExchangeRelease64((volatile LONG64 *) value, (LONG64) desired, (LONG64) *expected); } +FORCE_INLINE uint8 atomic_fetch_add_release(volatile uint8* value, uint8 operand) noexcept { return (uint8) InterlockedExchangeAdd8((volatile char *) value, (char) operand); } +FORCE_INLINE uint8 atomic_fetch_sub_release(volatile uint8* value, uint8 operand) noexcept { return (uint8) InterlockedExchangeAdd8((volatile char *) value, -((char) operand)); } +FORCE_INLINE uint16 atomic_fetch_add_release(volatile uint16* value, uint16 operand) noexcept { return (uint16) InterlockedExchangeAdd16((volatile short *) value, (short) operand); } +FORCE_INLINE uint16 atomic_fetch_sub_release(volatile uint16* value, uint16 operand) noexcept { return (uint16) InterlockedExchangeAdd16((volatile short *) value, -((short) operand)); } +FORCE_INLINE uint32 atomic_fetch_add_release(volatile uint32* value, uint32 operand) noexcept { return (uint32) InterlockedExchangeAddRelease((volatile long *) value, operand); } +FORCE_INLINE uint32 atomic_fetch_sub_release(volatile uint32* value, uint32 operand) noexcept { return (uint32) InterlockedExchangeAddRelease((volatile unsigned long *) value, -((long) operand)); } +FORCE_INLINE uint64 atomic_fetch_add_release(volatile uint64* value, uint64 operand) noexcept { return (uint64) InterlockedExchangeAddRelease64((volatile LONG64 *) value, (LONG64) operand); } +FORCE_INLINE uint64 atomic_fetch_sub_release(volatile uint64* value, uint64 operand) noexcept { return (uint64) InterlockedExchangeAdd64((volatile LONG64 *) value, -((LONG64) operand)); } +FORCE_INLINE void atomic_and_release(volatile uint8* value, uint8 mask) noexcept { InterlockedAnd8((volatile char *) value, mask); } +FORCE_INLINE void atomic_and_release(volatile int8* value, int8 mask) noexcept { InterlockedAnd8((volatile char *) value, mask); } +FORCE_INLINE void atomic_and_release(volatile uint16* value, uint16 mask) noexcept { InterlockedAnd16((volatile short *) value, mask); } +FORCE_INLINE void atomic_and_release(volatile int16* value, int16 mask) noexcept { InterlockedAnd16((volatile short *) value, mask); } +FORCE_INLINE void atomic_and_release(volatile uint32* value, uint32 mask) noexcept { InterlockedAndRelease((volatile LONG *) value, mask); } +FORCE_INLINE void atomic_and_release(volatile int32* value, int32 mask) noexcept { InterlockedAndRelease((volatile LONG *) value, (LONG)mask); } +FORCE_INLINE void atomic_and_release(volatile uint64* value, uint64 mask) noexcept { InterlockedAnd64Release((volatile LONG64 *) value, mask); } +FORCE_INLINE void atomic_and_release(volatile int64* value, int64 mask) noexcept { InterlockedAnd64Release((volatile LONG64 *) value, mask); } +FORCE_INLINE void atomic_or_release(volatile uint8* value, uint8 mask) noexcept { InterlockedOr8((volatile char *) value, mask); } +FORCE_INLINE void atomic_or_release(volatile int8* value, int8 mask) noexcept { InterlockedOr8((volatile char *) value, mask); } +FORCE_INLINE void atomic_or_release(volatile uint16* value, uint16 mask) noexcept { InterlockedOr16((volatile short *) value, mask); } +FORCE_INLINE void atomic_or_release(volatile int16* value, int16 mask) noexcept { InterlockedOr16((volatile short *) value, mask); } +FORCE_INLINE void atomic_or_release(volatile uint32* value, uint32 mask) noexcept { InterlockedOrRelease((volatile LONG *) value, mask); } +FORCE_INLINE void atomic_or_release(volatile int32* value, int32 mask) noexcept { InterlockedOrRelease((volatile LONG *) value, (LONG)mask); } +FORCE_INLINE void atomic_or_release(volatile uint64* value, uint64 mask) noexcept { InterlockedOr64Release((volatile LONG64 *) value, mask); } +FORCE_INLINE void atomic_or_release(volatile int64* value, int64 mask) noexcept { InterlockedOr64Release((volatile LONG64 *) value, mask); } -FORCE_INLINE void atomic_set_acquire_release(void** target, void* new_pointer) { InterlockedExchangePointer(target, new_pointer); } -FORCE_INLINE void* atomic_get_acquire_release(void** target) { return InterlockedCompareExchangePointer(target, NULL, NULL); } -FORCE_INLINE void atomic_set_acquire_release(volatile int8* value, int8 new_value) { InterlockedExchange8((volatile char *) value, new_value); } -FORCE_INLINE void atomic_set_acquire_release(volatile int16* value, int16 new_value) { InterlockedExchange16((volatile short *) value, new_value); } -FORCE_INLINE void atomic_set_acquire_release(volatile int32* value, int32 new_value) { InterlockedExchange((volatile long *) value, new_value); } -FORCE_INLINE void atomic_set_acquire_release(volatile int64* value, int64 new_value) { InterlockedExchange64((volatile LONG64 *) value, (LONG64) new_value); } -FORCE_INLINE void atomic_set_acquire_release(volatile f32* value, f32 new_value) { _atomic_32 temp = {.f = new_value}; InterlockedExchange((volatile long *) value, (long) temp.l); } -FORCE_INLINE void atomic_set_acquire_release(volatile f64* value, f64 new_value) { _atomic_64 temp = {.f = new_value}; InterlockedExchange64((volatile LONG64 *) value, (LONG64) temp.l); } -FORCE_INLINE int8 atomic_fetch_set_acquire_release(volatile int8* value, int8 new_value) { return (int8) InterlockedExchange8((volatile char *) value, (char) new_value); } -FORCE_INLINE int16 atomic_fetch_set_acquire_release(volatile int16* value, int16 new_value) { return (int16) InterlockedExchange16((volatile short *) value, (short) new_value); } -FORCE_INLINE int32 atomic_fetch_set_acquire_release(volatile int32* value, int32 new_value) { return (int32) InterlockedExchange((volatile long *) value, new_value); } -FORCE_INLINE int64 atomic_fetch_set_acquire_release(volatile int64* value, int64 new_value) { return (int64) InterlockedExchange64((volatile LONG64 *) value, (LONG64) new_value); } -FORCE_INLINE int8 atomic_get_acquire_release(volatile int8* value) { return (int8) _InterlockedCompareExchange8((volatile char *) value, 0, 0); } -FORCE_INLINE int16 atomic_get_acquire_release(volatile int16* value) { return (int16) InterlockedCompareExchange16((volatile short *) value, 0, 0); } -FORCE_INLINE int32 atomic_get_acquire_release(volatile int32* value) { return (int32) InterlockedCompareExchange((volatile long *) value, 0, 0); } -FORCE_INLINE int64 atomic_get_acquire_release(volatile int64* value) { return (int64) InterlockedCompareExchange64((volatile LONG64 *) value, 0, 0); } -FORCE_INLINE f32 atomic_get_acquire_release(volatile f32* value) { _atomic_32 temp = {.l = InterlockedCompareExchange((volatile long *) value, 0, 0)}; return temp.f; } -FORCE_INLINE f64 atomic_get_acquire_release(volatile f64* value) { _atomic_64 temp = {.l = InterlockedCompareExchange64((volatile LONG64 *) value, 0, 0)}; return temp.f; } -FORCE_INLINE void atomic_increment_acquire_release(volatile int8* value) { InterlockedExchangeAdd8((volatile char *) value, 1); } -FORCE_INLINE void atomic_decrement_acquire_release(volatile int8* value) { InterlockedExchangeAdd8((volatile char *) value, -1); } -FORCE_INLINE void atomic_increment_acquire_release(volatile int16* value) { InterlockedIncrement16((volatile short *) value); } -FORCE_INLINE void atomic_decrement_acquire_release(volatile int16* value) { InterlockedDecrement16((volatile short *) value); } -FORCE_INLINE void atomic_increment_acquire_release(volatile int32* value) { InterlockedIncrement((volatile long *) value); } -FORCE_INLINE void atomic_decrement_acquire_release(volatile int32* value) { InterlockedDecrement((volatile long *) value); } -FORCE_INLINE void atomic_increment_acquire_release(volatile int64* value) { InterlockedIncrement64((volatile LONG64 *) value); } -FORCE_INLINE void atomic_decrement_acquire_release(volatile int64* value) { InterlockedDecrement64((volatile LONG64 *) value); } -FORCE_INLINE void atomic_add_acquire_release(volatile int8* value, int8 increment) { InterlockedExchangeAdd8((volatile char *) value, (char) increment); } -FORCE_INLINE void atomic_sub_acquire_release(volatile int8* value, int8 decrement) { InterlockedExchangeAdd8((volatile char *) value, -((char) decrement)); } -FORCE_INLINE void atomic_add_acquire_release(volatile int16* value, int16 increment) { InterlockedExchangeAdd16((volatile short *) value, (short) increment); } -FORCE_INLINE void atomic_sub_acquire_release(volatile int16* value, int16 decrement) { InterlockedExchangeAdd16((volatile short *) value, -((short) decrement)); } -FORCE_INLINE void atomic_add_acquire_release(volatile int32* value, int32 increment) { InterlockedAdd((volatile long *) value, increment); } -FORCE_INLINE void atomic_sub_acquire_release(volatile int32* value, int32 decrement) { InterlockedAdd((volatile long *) value, -decrement); } -FORCE_INLINE void atomic_add_acquire_release(volatile int64* value, int64 increment) { InterlockedAdd64((volatile LONG64 *) value, (LONG64) increment); } -FORCE_INLINE void atomic_sub_acquire_release(volatile int64* value, int64 decrement) { InterlockedAdd64((volatile LONG64 *) value, -((LONG64) decrement)); } -FORCE_INLINE f32 atomic_compare_exchange_weak_acquire_release(volatile f32* value, f32* expected, f32 desired) { _atomic_32 temp = {.l = InterlockedCompareExchange((volatile long *) value, (long) desired, (long) *expected) }; return temp.f; } -FORCE_INLINE f64 atomic_compare_exchange_weak_acquire_release(volatile f64* value, f64* expected, f64 desired) { _atomic_64 temp = {.l = InterlockedCompareExchange64((volatile LONG64 *) value, (LONG64) desired, (LONG64) *expected) }; return temp.f; } -FORCE_INLINE int32 atomic_compare_exchange_weak_acquire_release(volatile int32* value, int32* expected, int32 desired) { return (int32) InterlockedCompareExchange((volatile long *) value, desired, *expected); } -FORCE_INLINE int64 atomic_compare_exchange_weak_acquire_release(volatile int64* value, int64* expected, int64 desired) { return (int64) InterlockedCompareExchange64((volatile LONG64 *) value, (LONG64) desired, (LONG64) *expected); } -FORCE_INLINE int8 atomic_fetch_add_acquire_release(volatile int8* value, int8 operand) { return (int8) InterlockedExchangeAdd8((volatile char *) value, (char) operand); } -FORCE_INLINE int8 atomic_fetch_sub_acquire_release(volatile int8* value, int8 operand) { return (int8) InterlockedExchangeAdd8((volatile char *) value, -((char) operand)); } -FORCE_INLINE int16 atomic_fetch_add_acquire_release(volatile int16* value, int16 operand) { return (int16) InterlockedExchangeAdd16((volatile short *) value, (short) operand); } -FORCE_INLINE int16 atomic_fetch_sub_acquire_release(volatile int16* value, int16 operand) { return (int16) InterlockedExchangeAdd16((volatile short *) value, -((short) operand)); } -FORCE_INLINE int32 atomic_fetch_add_acquire_release(volatile int32* value, int32 operand) { return (int32) InterlockedExchangeAdd((volatile long *) value, operand); } -FORCE_INLINE int32 atomic_fetch_sub_acquire_release(volatile int32* value, int32 operand) { return (int32) InterlockedExchangeAdd((volatile unsigned long *) value, -((long) operand)); } -FORCE_INLINE int64 atomic_fetch_add_acquire_release(volatile int64* value, int64 operand) { return (int64) InterlockedExchangeAdd64((volatile LONG64 *) value, (LONG64) operand); } -FORCE_INLINE int64 atomic_fetch_sub_acquire_release(volatile int64* value, int64 operand) { return (int64) InterlockedExchangeAdd64((volatile LONG64 *) value, -((LONG64) operand)); } -FORCE_INLINE void atomic_set_acquire_release(volatile uint8* value, uint8 new_value) { InterlockedExchange8((volatile char *) value, (char) new_value); } -FORCE_INLINE void atomic_set_acquire_release(volatile uint16* value, uint16 new_value) { InterlockedExchange16((volatile short *) value, (short) new_value); } -FORCE_INLINE void atomic_set_acquire_release(volatile uint32* value, uint32 new_value) { InterlockedExchange((volatile long *) value, new_value); } -FORCE_INLINE void atomic_set_acquire_release(volatile uint64* value, uint64 new_value) { InterlockedExchange64((volatile LONG64 *) value, (LONG64) new_value); } -FORCE_INLINE uint8 atomic_fetch_set_acquire_release(volatile uint8* value, uint8 new_value) { return (uint8) InterlockedExchange8((volatile char *) value, (char) new_value); } -FORCE_INLINE uint16 atomic_fetch_set_acquire_release(volatile uint16* value, uint16 new_value) { return (uint16) InterlockedExchange16((volatile short *) value, (short) new_value); } -FORCE_INLINE uint32 atomic_fetch_set_acquire_release(volatile uint32* value, uint32 new_value) { return (uint32) InterlockedExchange((volatile long *) value, new_value); } -FORCE_INLINE uint64 atomic_fetch_set_acquire_release(volatile uint64* value, uint64 new_value) { return (uint64) InterlockedExchange64((volatile LONG64 *) value, (LONG64) new_value); } -FORCE_INLINE uint8 atomic_get_acquire_release(volatile uint8* value) { return (uint8) _InterlockedCompareExchange8((volatile char *) value, 0, 0); } -FORCE_INLINE uint16 atomic_get_acquire_release(volatile uint16* value) { return (uint16) InterlockedCompareExchange16((volatile short *) value, 0, 0); } -FORCE_INLINE uint32 atomic_get_acquire_release(volatile uint32* value) { return (uint32) InterlockedCompareExchange((volatile long *) value, 0, 0); } -FORCE_INLINE uint64 atomic_get_acquire_release(volatile uint64* value) { return (uint64) InterlockedCompareExchange64((volatile LONG64 *) value, 0, 0); } -FORCE_INLINE void atomic_increment_acquire_release(volatile uint8* value) { InterlockedExchangeAdd8((volatile char *) value, 1); } -FORCE_INLINE void atomic_decrement_acquire_release(volatile uint8* value) { InterlockedExchangeAdd8((volatile char *) value, -1); } -FORCE_INLINE void atomic_increment_acquire_release(volatile uint16* value) { InterlockedIncrement16((volatile short *) value); } -FORCE_INLINE void atomic_decrement_acquire_release(volatile uint16* value) { InterlockedDecrement16((volatile short *) value); } -FORCE_INLINE void atomic_increment_acquire_release(volatile uint32* value) { InterlockedIncrement((volatile long *) value); } -FORCE_INLINE void atomic_decrement_acquire_release(volatile uint32* value) { InterlockedDecrement((volatile long *) value); } -FORCE_INLINE void atomic_increment_acquire_release(volatile uint64* value) { InterlockedIncrement64((volatile LONG64 *) value); } -FORCE_INLINE void atomic_decrement_acquire_release(volatile uint64* value) { InterlockedDecrement64((volatile LONG64 *) value); } -FORCE_INLINE void atomic_add_acquire_release(volatile uint8* value, uint8 increment) { InterlockedExchangeAdd8((volatile char *) value, (char) increment); } -FORCE_INLINE void atomic_sub_acquire_release(volatile uint8* value, uint8 decrement) { InterlockedExchangeAdd8((volatile char *) value, -((char) decrement)); } -FORCE_INLINE void atomic_add_acquire_release(volatile uint16* value, uint16 increment) { InterlockedExchangeAdd16((volatile short *) value, (short) increment); } -FORCE_INLINE void atomic_sub_acquire_release(volatile uint16* value, uint16 decrement) { InterlockedExchangeAdd16((volatile short *) value, -((short) decrement)); } -FORCE_INLINE void atomic_add_acquire_release(volatile uint32* value, uint32 increment) { InterlockedAdd((volatile long *) value, increment); } -FORCE_INLINE void atomic_sub_acquire_release(volatile uint32* value, uint32 decrement) { InterlockedAdd((volatile long *) value, -1 * ((int32) decrement)); } -FORCE_INLINE void atomic_add_acquire_release(volatile uint64* value, uint64 increment) { InterlockedAdd64((volatile LONG64 *) value, (LONG64) increment); } -FORCE_INLINE void atomic_sub_acquire_release(volatile uint64* value, uint64 decrement) { InterlockedAdd64((volatile LONG64 *) value, -((LONG64) decrement)); } -FORCE_INLINE uint32 atomic_compare_exchange_weak_acquire_release(volatile uint32* value, uint32* expected, uint32 desired) { return (uint32) InterlockedCompareExchange((volatile long *) value, desired, *expected); } -FORCE_INLINE uint64 atomic_compare_exchange_weak_acquire_release(volatile uint64* value, uint64* expected, uint64 desired) { return (uint64) InterlockedCompareExchange64((volatile LONG64 *) value, (LONG64) desired, (LONG64) *expected); } -FORCE_INLINE uint8 atomic_fetch_add_acquire_release(volatile uint8* value, uint8 operand) { return (uint8) InterlockedExchangeAdd8((volatile char *) value, (char) operand); } -FORCE_INLINE uint8 atomic_fetch_sub_acquire_release(volatile uint8* value, uint8 operand) { return (uint8) InterlockedExchangeAdd8((volatile char *) value, -((char) operand)); } -FORCE_INLINE uint16 atomic_fetch_add_acquire_release(volatile uint16* value, uint16 operand) { return (uint16) InterlockedExchangeAdd16((volatile short *) value, (short) operand); } -FORCE_INLINE uint16 atomic_fetch_sub_acquire_release(volatile uint16* value, uint16 operand) { return (uint16) InterlockedExchangeAdd16((volatile short *) value, -((short) operand)); } -FORCE_INLINE uint32 atomic_fetch_add_acquire_release(volatile uint32* value, uint32 operand) { return (uint32) InterlockedExchangeAdd((volatile long *) value, operand); } -FORCE_INLINE uint32 atomic_fetch_sub_acquire_release(volatile uint32* value, uint32 operand) { return (uint32) InterlockedExchangeAdd((volatile unsigned long *) value, -((long) operand)); } -FORCE_INLINE uint64 atomic_fetch_add_acquire_release(volatile uint64* value, uint64 operand) { return (uint64) InterlockedExchangeAdd64((volatile LONG64 *) value, (LONG64) operand); } -FORCE_INLINE uint64 atomic_fetch_sub_acquire_release(volatile uint64* value, uint64 operand) { return (uint64) InterlockedExchangeAdd64((volatile LONG64 *) value, -((LONG64) operand)); } -FORCE_INLINE void atomic_and_acquire_release(volatile uint8* value, uint8 mask) { InterlockedAnd8((volatile char *) value, mask); } -FORCE_INLINE void atomic_and_acquire_release(volatile int8* value, int8 mask) { InterlockedAnd8((volatile char *) value, mask); } -FORCE_INLINE void atomic_and_acquire_release(volatile uint16* value, uint16 mask) { InterlockedAnd16((volatile short *) value, mask); } -FORCE_INLINE void atomic_and_acquire_release(volatile int16* value, int16 mask) { InterlockedAnd16((volatile short *) value, mask); } -FORCE_INLINE void atomic_and_acquire_release(volatile uint32* value, uint32 mask) { InterlockedAnd((volatile LONG *) value, mask); } -FORCE_INLINE void atomic_and_acquire_release(volatile int32* value, int32 mask) { InterlockedAnd((volatile LONG *) value, (LONG)mask); } -FORCE_INLINE void atomic_and_acquire_release(volatile uint64* value, uint64 mask) { InterlockedAnd64((volatile LONG64 *) value, mask); } -FORCE_INLINE void atomic_and_acquire_release(volatile int64* value, int64 mask) { InterlockedAnd64((volatile LONG64 *) value, mask); } -FORCE_INLINE void atomic_or_acquire_release(volatile uint8* value, uint8 mask) { InterlockedOr8((volatile char *) value, mask); } -FORCE_INLINE void atomic_or_acquire_release(volatile int8* value, int8 mask) { InterlockedOr8((volatile char *) value, mask); } -FORCE_INLINE void atomic_or_acquire_release(volatile uint16* value, uint16 mask) { InterlockedOr16((volatile short *) value, mask); } -FORCE_INLINE void atomic_or_acquire_release(volatile int16* value, int16 mask) { InterlockedOr16((volatile short *) value, mask); } -FORCE_INLINE void atomic_or_acquire_release(volatile uint32* value, uint32 mask) { InterlockedOr((volatile LONG *) value, mask); } -FORCE_INLINE void atomic_or_acquire_release(volatile int32* value, int32 mask) { InterlockedOr((volatile LONG *) value, (LONG)mask); } -FORCE_INLINE void atomic_or_acquire_release(volatile uint64* value, uint64 mask) { InterlockedOr64((volatile LONG64 *) value, mask); } -FORCE_INLINE void atomic_or_acquire_release(volatile int64* value, int64 mask) { InterlockedOr64((volatile LONG64 *) value, mask); } +FORCE_INLINE void atomic_set_acquire_release(void** target, void* new_pointer) noexcept { InterlockedExchangePointer(target, new_pointer); } +FORCE_INLINE void* atomic_get_acquire_release(void** target) noexcept { return InterlockedCompareExchangePointer(target, NULL, NULL); } +FORCE_INLINE void atomic_set_acquire_release(volatile int8* value, int8 new_value) noexcept { InterlockedExchange8((volatile char *) value, new_value); } +FORCE_INLINE void atomic_set_acquire_release(volatile int16* value, int16 new_value) noexcept { InterlockedExchange16((volatile short *) value, new_value); } +FORCE_INLINE void atomic_set_acquire_release(volatile int32* value, int32 new_value) noexcept { InterlockedExchange((volatile long *) value, new_value); } +FORCE_INLINE void atomic_set_acquire_release(volatile int64* value, int64 new_value) noexcept { InterlockedExchange64((volatile LONG64 *) value, (LONG64) new_value); } +FORCE_INLINE void atomic_set_acquire_release(volatile f32* value, f32 new_value) noexcept { _atomic_32 temp = {.f = new_value}; InterlockedExchange((volatile long *) value, (long) temp.l); } +FORCE_INLINE void atomic_set_acquire_release(volatile f64* value, f64 new_value) noexcept { _atomic_64 temp = {.f = new_value}; InterlockedExchange64((volatile LONG64 *) value, (LONG64) temp.l); } +FORCE_INLINE int8 atomic_fetch_set_acquire_release(volatile int8* value, int8 new_value) noexcept { return (int8) InterlockedExchange8((volatile char *) value, (char) new_value); } +FORCE_INLINE int16 atomic_fetch_set_acquire_release(volatile int16* value, int16 new_value) noexcept { return (int16) InterlockedExchange16((volatile short *) value, (short) new_value); } +FORCE_INLINE int32 atomic_fetch_set_acquire_release(volatile int32* value, int32 new_value) noexcept { return (int32) InterlockedExchange((volatile long *) value, new_value); } +FORCE_INLINE int64 atomic_fetch_set_acquire_release(volatile int64* value, int64 new_value) noexcept { return (int64) InterlockedExchange64((volatile LONG64 *) value, (LONG64) new_value); } +FORCE_INLINE int8 atomic_get_acquire_release(volatile int8* value) noexcept { return (int8) _InterlockedCompareExchange8((volatile char *) value, 0, 0); } +FORCE_INLINE int16 atomic_get_acquire_release(volatile int16* value) noexcept { return (int16) InterlockedCompareExchange16((volatile short *) value, 0, 0); } +FORCE_INLINE int32 atomic_get_acquire_release(volatile int32* value) noexcept { return (int32) InterlockedCompareExchange((volatile long *) value, 0, 0); } +FORCE_INLINE int64 atomic_get_acquire_release(volatile int64* value) noexcept { return (int64) InterlockedCompareExchange64((volatile LONG64 *) value, 0, 0); } +FORCE_INLINE f32 atomic_get_acquire_release(volatile f32* value) noexcept { _atomic_32 temp = {.l = InterlockedCompareExchange((volatile long *) value, 0, 0)}; return temp.f; } +FORCE_INLINE f64 atomic_get_acquire_release(volatile f64* value) noexcept { _atomic_64 temp = {.l = InterlockedCompareExchange64((volatile LONG64 *) value, 0, 0)}; return temp.f; } +FORCE_INLINE int8 atomic_increment_acquire_release(volatile int8* value) noexcept { return InterlockedExchangeAdd8((volatile char *) value, 1); } +FORCE_INLINE int8 atomic_decrement_acquire_release(volatile int8* value) noexcept { return InterlockedExchangeAdd8((volatile char *) value, -1); } +FORCE_INLINE int16 atomic_increment_acquire_release(volatile int16* value) noexcept { return InterlockedIncrement16((volatile short *) value); } +FORCE_INLINE int16 atomic_decrement_acquire_release(volatile int16* value) noexcept { return InterlockedDecrement16((volatile short *) value); } +FORCE_INLINE int32 atomic_increment_acquire_release(volatile int32* value) noexcept { return InterlockedIncrement((volatile long *) value); } +FORCE_INLINE int32 atomic_decrement_acquire_release(volatile int32* value) noexcept { return InterlockedDecrement((volatile long *) value); } +FORCE_INLINE int64 atomic_increment_acquire_release(volatile int64* value) noexcept { return InterlockedIncrement64((volatile LONG64 *) value); } +FORCE_INLINE int64 atomic_decrement_acquire_release(volatile int64* value) noexcept { return InterlockedDecrement64((volatile LONG64 *) value); } +FORCE_INLINE void atomic_add_acquire_release(volatile int8* value, int8 increment) noexcept { InterlockedExchangeAdd8((volatile char *) value, (char) increment); } +FORCE_INLINE void atomic_sub_acquire_release(volatile int8* value, int8 decrement) noexcept { InterlockedExchangeAdd8((volatile char *) value, -((char) decrement)); } +FORCE_INLINE void atomic_add_acquire_release(volatile int16* value, int16 increment) noexcept { InterlockedExchangeAdd16((volatile short *) value, (short) increment); } +FORCE_INLINE void atomic_sub_acquire_release(volatile int16* value, int16 decrement) noexcept { InterlockedExchangeAdd16((volatile short *) value, -((short) decrement)); } +FORCE_INLINE void atomic_add_acquire_release(volatile int32* value, int32 increment) noexcept { InterlockedAdd((volatile long *) value, increment); } +FORCE_INLINE void atomic_sub_acquire_release(volatile int32* value, int32 decrement) noexcept { InterlockedAdd((volatile long *) value, -decrement); } +FORCE_INLINE void atomic_add_acquire_release(volatile int64* value, int64 increment) noexcept { InterlockedAdd64((volatile LONG64 *) value, (LONG64) increment); } +FORCE_INLINE void atomic_sub_acquire_release(volatile int64* value, int64 decrement) noexcept { InterlockedAdd64((volatile LONG64 *) value, -((LONG64) decrement)); } +FORCE_INLINE f32 atomic_compare_exchange_weak_acquire_release(volatile f32* value, f32* expected, f32 desired) noexcept { _atomic_32 temp = {.l = InterlockedCompareExchange((volatile long *) value, (long) desired, (long) *expected) }; return temp.f; } +FORCE_INLINE f64 atomic_compare_exchange_weak_acquire_release(volatile f64* value, f64* expected, f64 desired) noexcept { _atomic_64 temp = {.l = InterlockedCompareExchange64((volatile LONG64 *) value, (LONG64) desired, (LONG64) *expected) }; return temp.f; } +FORCE_INLINE int32 atomic_compare_exchange_weak_acquire_release(volatile int32* value, int32* expected, int32 desired) noexcept { return (int32) InterlockedCompareExchange((volatile long *) value, desired, *expected); } +FORCE_INLINE int64 atomic_compare_exchange_weak_acquire_release(volatile int64* value, int64* expected, int64 desired) noexcept { return (int64) InterlockedCompareExchange64((volatile LONG64 *) value, (LONG64) desired, (LONG64) *expected); } +FORCE_INLINE int8 atomic_fetch_add_acquire_release(volatile int8* value, int8 operand) noexcept { return (int8) InterlockedExchangeAdd8((volatile char *) value, (char) operand); } +FORCE_INLINE int8 atomic_fetch_sub_acquire_release(volatile int8* value, int8 operand) noexcept { return (int8) InterlockedExchangeAdd8((volatile char *) value, -((char) operand)); } +FORCE_INLINE int16 atomic_fetch_add_acquire_release(volatile int16* value, int16 operand) noexcept { return (int16) InterlockedExchangeAdd16((volatile short *) value, (short) operand); } +FORCE_INLINE int16 atomic_fetch_sub_acquire_release(volatile int16* value, int16 operand) noexcept { return (int16) InterlockedExchangeAdd16((volatile short *) value, -((short) operand)); } +FORCE_INLINE int32 atomic_fetch_add_acquire_release(volatile int32* value, int32 operand) noexcept { return (int32) InterlockedExchangeAdd((volatile long *) value, operand); } +FORCE_INLINE int32 atomic_fetch_sub_acquire_release(volatile int32* value, int32 operand) noexcept { return (int32) InterlockedExchangeAdd((volatile unsigned long *) value, -((long) operand)); } +FORCE_INLINE int64 atomic_fetch_add_acquire_release(volatile int64* value, int64 operand) noexcept { return (int64) InterlockedExchangeAdd64((volatile LONG64 *) value, (LONG64) operand); } +FORCE_INLINE int64 atomic_fetch_sub_acquire_release(volatile int64* value, int64 operand) noexcept { return (int64) InterlockedExchangeAdd64((volatile LONG64 *) value, -((LONG64) operand)); } +FORCE_INLINE void atomic_set_acquire_release(volatile uint8* value, uint8 new_value) noexcept { InterlockedExchange8((volatile char *) value, (char) new_value); } +FORCE_INLINE void atomic_set_acquire_release(volatile uint16* value, uint16 new_value) noexcept { InterlockedExchange16((volatile short *) value, (short) new_value); } +FORCE_INLINE void atomic_set_acquire_release(volatile uint32* value, uint32 new_value) noexcept { InterlockedExchange((volatile long *) value, new_value); } +FORCE_INLINE void atomic_set_acquire_release(volatile uint64* value, uint64 new_value) noexcept { InterlockedExchange64((volatile LONG64 *) value, (LONG64) new_value); } +FORCE_INLINE uint8 atomic_fetch_set_acquire_release(volatile uint8* value, uint8 new_value) noexcept { return (uint8) InterlockedExchange8((volatile char *) value, (char) new_value); } +FORCE_INLINE uint16 atomic_fetch_set_acquire_release(volatile uint16* value, uint16 new_value) noexcept { return (uint16) InterlockedExchange16((volatile short *) value, (short) new_value); } +FORCE_INLINE uint32 atomic_fetch_set_acquire_release(volatile uint32* value, uint32 new_value) noexcept { return (uint32) InterlockedExchange((volatile long *) value, new_value); } +FORCE_INLINE uint64 atomic_fetch_set_acquire_release(volatile uint64* value, uint64 new_value) noexcept { return (uint64) InterlockedExchange64((volatile LONG64 *) value, (LONG64) new_value); } +FORCE_INLINE uint8 atomic_get_acquire_release(volatile uint8* value) noexcept { return (uint8) _InterlockedCompareExchange8((volatile char *) value, 0, 0); } +FORCE_INLINE uint16 atomic_get_acquire_release(volatile uint16* value) noexcept { return (uint16) InterlockedCompareExchange16((volatile short *) value, 0, 0); } +FORCE_INLINE uint32 atomic_get_acquire_release(volatile uint32* value) noexcept { return (uint32) InterlockedCompareExchange((volatile long *) value, 0, 0); } +FORCE_INLINE uint64 atomic_get_acquire_release(volatile uint64* value) noexcept { return (uint64) InterlockedCompareExchange64((volatile LONG64 *) value, 0, 0); } +FORCE_INLINE uint8 atomic_increment_acquire_release(volatile uint8* value) noexcept { return InterlockedExchangeAdd8((volatile char *) value, 1); } +FORCE_INLINE uint8 atomic_decrement_acquire_release(volatile uint8* value) noexcept { return InterlockedExchangeAdd8((volatile char *) value, -1); } +FORCE_INLINE uint16 atomic_increment_acquire_release(volatile uint16* value) noexcept { return InterlockedIncrement16((volatile short *) value); } +FORCE_INLINE uint16 atomic_decrement_acquire_release(volatile uint16* value) noexcept { return InterlockedDecrement16((volatile short *) value); } +FORCE_INLINE uint32 atomic_increment_acquire_release(volatile uint32* value) noexcept { return InterlockedIncrement((volatile long *) value); } +FORCE_INLINE uint32 atomic_decrement_acquire_release(volatile uint32* value) noexcept { return InterlockedDecrement((volatile long *) value); } +FORCE_INLINE uint64 atomic_increment_acquire_release(volatile uint64* value) noexcept { return InterlockedIncrement64((volatile LONG64 *) value); } +FORCE_INLINE uint64 atomic_decrement_acquire_release(volatile uint64* value) noexcept { return InterlockedDecrement64((volatile LONG64 *) value); } +FORCE_INLINE void atomic_add_acquire_release(volatile uint8* value, uint8 increment) noexcept { InterlockedExchangeAdd8((volatile char *) value, (char) increment); } +FORCE_INLINE void atomic_sub_acquire_release(volatile uint8* value, uint8 decrement) noexcept { InterlockedExchangeAdd8((volatile char *) value, -((char) decrement)); } +FORCE_INLINE void atomic_add_acquire_release(volatile uint16* value, uint16 increment) noexcept { InterlockedExchangeAdd16((volatile short *) value, (short) increment); } +FORCE_INLINE void atomic_sub_acquire_release(volatile uint16* value, uint16 decrement) noexcept { InterlockedExchangeAdd16((volatile short *) value, -((short) decrement)); } +FORCE_INLINE void atomic_add_acquire_release(volatile uint32* value, uint32 increment) noexcept { InterlockedAdd((volatile long *) value, increment); } +FORCE_INLINE void atomic_sub_acquire_release(volatile uint32* value, uint32 decrement) noexcept { InterlockedAdd((volatile long *) value, -1 * ((int32) decrement)); } +FORCE_INLINE void atomic_add_acquire_release(volatile uint64* value, uint64 increment) noexcept { InterlockedAdd64((volatile LONG64 *) value, (LONG64) increment); } +FORCE_INLINE void atomic_sub_acquire_release(volatile uint64* value, uint64 decrement) noexcept { InterlockedAdd64((volatile LONG64 *) value, -((LONG64) decrement)); } +FORCE_INLINE uint32 atomic_compare_exchange_weak_acquire_release(volatile uint32* value, uint32* expected, uint32 desired) noexcept { return (uint32) InterlockedCompareExchange((volatile long *) value, desired, *expected); } +FORCE_INLINE uint64 atomic_compare_exchange_weak_acquire_release(volatile uint64* value, uint64* expected, uint64 desired) noexcept { return (uint64) InterlockedCompareExchange64((volatile LONG64 *) value, (LONG64) desired, (LONG64) *expected); } +FORCE_INLINE uint8 atomic_fetch_add_acquire_release(volatile uint8* value, uint8 operand) noexcept { return (uint8) InterlockedExchangeAdd8((volatile char *) value, (char) operand); } +FORCE_INLINE uint8 atomic_fetch_sub_acquire_release(volatile uint8* value, uint8 operand) noexcept { return (uint8) InterlockedExchangeAdd8((volatile char *) value, -((char) operand)); } +FORCE_INLINE uint16 atomic_fetch_add_acquire_release(volatile uint16* value, uint16 operand) noexcept { return (uint16) InterlockedExchangeAdd16((volatile short *) value, (short) operand); } +FORCE_INLINE uint16 atomic_fetch_sub_acquire_release(volatile uint16* value, uint16 operand) noexcept { return (uint16) InterlockedExchangeAdd16((volatile short *) value, -((short) operand)); } +FORCE_INLINE uint32 atomic_fetch_add_acquire_release(volatile uint32* value, uint32 operand) noexcept { return (uint32) InterlockedExchangeAdd((volatile long *) value, operand); } +FORCE_INLINE uint32 atomic_fetch_sub_acquire_release(volatile uint32* value, uint32 operand) noexcept { return (uint32) InterlockedExchangeAdd((volatile unsigned long *) value, -((long) operand)); } +FORCE_INLINE uint64 atomic_fetch_add_acquire_release(volatile uint64* value, uint64 operand) noexcept { return (uint64) InterlockedExchangeAdd64((volatile LONG64 *) value, (LONG64) operand); } +FORCE_INLINE uint64 atomic_fetch_sub_acquire_release(volatile uint64* value, uint64 operand) noexcept { return (uint64) InterlockedExchangeAdd64((volatile LONG64 *) value, -((LONG64) operand)); } +FORCE_INLINE void atomic_and_acquire_release(volatile uint8* value, uint8 mask) noexcept { InterlockedAnd8((volatile char *) value, mask); } +FORCE_INLINE void atomic_and_acquire_release(volatile int8* value, int8 mask) noexcept { InterlockedAnd8((volatile char *) value, mask); } +FORCE_INLINE void atomic_and_acquire_release(volatile uint16* value, uint16 mask) noexcept { InterlockedAnd16((volatile short *) value, mask); } +FORCE_INLINE void atomic_and_acquire_release(volatile int16* value, int16 mask) noexcept { InterlockedAnd16((volatile short *) value, mask); } +FORCE_INLINE void atomic_and_acquire_release(volatile uint32* value, uint32 mask) noexcept { InterlockedAnd((volatile LONG *) value, mask); } +FORCE_INLINE void atomic_and_acquire_release(volatile int32* value, int32 mask) noexcept { InterlockedAnd((volatile LONG *) value, (LONG)mask); } +FORCE_INLINE void atomic_and_acquire_release(volatile uint64* value, uint64 mask) noexcept { InterlockedAnd64((volatile LONG64 *) value, mask); } +FORCE_INLINE void atomic_and_acquire_release(volatile int64* value, int64 mask) noexcept { InterlockedAnd64((volatile LONG64 *) value, mask); } +FORCE_INLINE void atomic_or_acquire_release(volatile uint8* value, uint8 mask) noexcept { InterlockedOr8((volatile char *) value, mask); } +FORCE_INLINE void atomic_or_acquire_release(volatile int8* value, int8 mask) noexcept { InterlockedOr8((volatile char *) value, mask); } +FORCE_INLINE void atomic_or_acquire_release(volatile uint16* value, uint16 mask) noexcept { InterlockedOr16((volatile short *) value, mask); } +FORCE_INLINE void atomic_or_acquire_release(volatile int16* value, int16 mask) noexcept { InterlockedOr16((volatile short *) value, mask); } +FORCE_INLINE void atomic_or_acquire_release(volatile uint32* value, uint32 mask) noexcept { InterlockedOr((volatile LONG *) value, mask); } +FORCE_INLINE void atomic_or_acquire_release(volatile int32* value, int32 mask) noexcept { InterlockedOr((volatile LONG *) value, (LONG)mask); } +FORCE_INLINE void atomic_or_acquire_release(volatile uint64* value, uint64 mask) noexcept { InterlockedOr64((volatile LONG64 *) value, mask); } +FORCE_INLINE void atomic_or_acquire_release(volatile int64* value, int64 mask) noexcept { InterlockedOr64((volatile LONG64 *) value, mask); } // Check out the intrinsic functions fence_memory and fence_write // These are much faster and could accomplish what you are doing diff --git a/platform/win32/threading/Spinlock.cpp b/platform/win32/threading/Spinlock.cpp index 01997ed..0b05388 100644 --- a/platform/win32/threading/Spinlock.cpp +++ b/platform/win32/threading/Spinlock.cpp @@ -14,6 +14,11 @@ #include "../TimeUtils.h" #include "Spinlock.h" +inline +void spinlock_init(spinlock32* lock) { + lock = 0; +} + inline void spinlock_start(spinlock32* lock, int32 delay = 10) { while (InterlockedExchange(lock, 1) != 0) { diff --git a/sort/EytzingerSearch.h b/sort/EytzingerSearch.h index 752faad..0d1bc4e 100644 --- a/sort/EytzingerSearch.h +++ b/sort/EytzingerSearch.h @@ -7,7 +7,7 @@ // @performance We could optimize eytzinger by using 1 based index // Consider this https://en.algorithmica.org/hpc/data-structures/binary-search/ -void eytzinger_rearrange(byte* arr, byte* temp, size_t start, size_t* index, size_t num, size_t size) { +void eytzinger_rearrange(byte* arr, byte* temp, size_t start, size_t* index, size_t num, size_t size) noexcept { if (start >= num) { return; } diff --git a/sort/HeapSort.h b/sort/HeapSort.h index 0c838b7..fbb32c9 100644 --- a/sort/HeapSort.h +++ b/sort/HeapSort.h @@ -4,7 +4,7 @@ #include "../stdlib/Types.h" #include "../utils/Utils.h" -void heapsort(void* arr, size_t num, size_t size, int32 (*compare)(const void*, const void*)) { +void heapsort(void* arr, size_t num, size_t size, int32 (*compare)(const void* __restrict, const void* __restrict)) noexcept { char* base = (char*)arr; // Build a max heap diff --git a/sort/InsertionSort.h b/sort/InsertionSort.h index 79d0aae..4dab7a0 100644 --- a/sort/InsertionSort.h +++ b/sort/InsertionSort.h @@ -4,7 +4,7 @@ #include "../stdlib/Types.h" #include "../utils/Utils.h" -void insertionsort(void* arr, size_t num, size_t size, int32 (*compare)(const void*, const void*)) { +void insertionsort(void* arr, size_t num, size_t size, int32 (*compare)(const void* __restrict, const void* __restrict)) noexcept { char* base = (char*) arr; for (size_t i = 1; i < num; ++i) { for (size_t j = i; j > 0 && compare(base + j * size, base + (j - 1) * size) < 0; --j) { diff --git a/sort/IntroSort.h b/sort/IntroSort.h index 3019ae2..3373121 100644 --- a/sort/IntroSort.h +++ b/sort/IntroSort.h @@ -6,7 +6,7 @@ #include "HeapSort.h" #include "QuickSort.h" -void introsort(void* arr, size_t num, size_t size, int32 (*compare)(const void*, const void*), size_t depth_limit) { +void introsort(void* arr, size_t num, size_t size, int32 (*compare)(const void* __restrict, const void* __restrict), size_t depth_limit) noexcept { byte* base = (byte*) arr; // Use InsertionSort for small subarrays diff --git a/sort/QuickSort.h b/sort/QuickSort.h index 1a43464..56d7325 100644 --- a/sort/QuickSort.h +++ b/sort/QuickSort.h @@ -4,7 +4,7 @@ #include "../stdlib/Types.h" #include "../utils/Utils.h" -size_t quicksort_partition(void* arr, size_t size, size_t low, size_t high, int32 (*compare)(const void*, const void*)) { +size_t quicksort_partition(void* arr, size_t size, size_t low, size_t high, int32 (*compare)(const void* __restrict, const void* __restrict)) noexcept { char* base = (char*) arr; void* pivot = base + high * size; size_t i = low; @@ -20,7 +20,7 @@ size_t quicksort_partition(void* arr, size_t size, size_t low, size_t high, int3 return i; } -void quicksort(void* arr, size_t size, size_t low, size_t high, int32 (*compare)(const void*, const void*)) { +void quicksort(void* arr, size_t size, size_t low, size_t high, int32 (*compare)(const void* __restrict, const void* __restrict)) noexcept { if (low < high) { size_t pi = quicksort_partition(arr, size, low, high, compare); diff --git a/sort/Sort.h b/sort/Sort.h index 08c842c..94dd9c1 100644 --- a/sort/Sort.h +++ b/sort/Sort.h @@ -8,7 +8,7 @@ #include "InsertionSort.h" inline -void sort_introsort(void* arr, size_t num, size_t size, int32 (*compare)(const void*, const void*)) { +void sort_introsort(void* arr, size_t num, size_t size, int32 (*compare)(const void* __restrict, const void* __restrict) noexcept) noexcept { size_t depth_limit = 0; for (size_t n = num; n > 0; n >>= 1) { ++depth_limit; @@ -20,14 +20,14 @@ void sort_introsort(void* arr, size_t num, size_t size, int32 (*compare)(const v } inline -void sort_quicksort(void* arr, size_t num, size_t size, int32 (*compare)(const void*, const void*)) { +void sort_quicksort(void* arr, size_t num, size_t size, int32 (*compare)(const void* __restrict, const void* __restrict) noexcept) noexcept { quicksort(arr, size, 0, num - 1, compare); } #define sort_heapsort heapsort #define sort_insertionsort insertionsort -int32 sort_compare_int32(const void* a, const void* b) { +int32 sort_compare_int32(const void* __restrict a, const void* __restrict b) noexcept { return (*(int32 *) a) - (*(int32 *) b); } diff --git a/stdlib/HashMap.h b/stdlib/HashMap.h index 228289d..fd03da2 100644 --- a/stdlib/HashMap.h +++ b/stdlib/HashMap.h @@ -117,10 +117,11 @@ struct HashMap { // Values are 1-indexed/offset since 0 means not used/found uint16* table; - // @todo We might want to align the ChunkMemory memory to 8byte, currently it's either 4 or 8 byte depending on the length + // @question We might want to align the ChunkMemory memory to 8byte, currently it's either 4 or 8 byte depending on the length ChunkMemory buf; }; +inline void hashmap_alloc(HashMap* hm, int32 count, int32 element_size) { byte* data = (byte *) platform_alloc( @@ -133,7 +134,7 @@ void hashmap_alloc(HashMap* hm, int32 count, int32 element_size) DEBUG_MEMORY_INIT((uintptr_t) hm->buf.memory, hm->buf.size); LOG_INCREMENT_BY(DEBUG_COUNTER_MEM_ALLOC, hm->buf.size); - LOG_LEVEL_2("Allocated HashMap for %n elements with %n B per element", {{LOG_DATA_INT32, &count}, {LOG_DATA_INT32, &element_size}}); + LOG_FORMAT_2("Allocated HashMap for %n elements with %n B per element", {{LOG_DATA_INT32, &count}, {LOG_DATA_INT32, &element_size}}); } inline @@ -149,7 +150,8 @@ void hashmap_free(HashMap* hm) } // WARNING: element_size = element size + remaining HashEntry data size -void hashmap_create(HashMap* hm, int32 count, int32 element_size, RingMemory* ring) +inline +void hashmap_create(HashMap* hm, int32 count, int32 element_size, RingMemory* ring) noexcept { byte* data = ring_get_memory( ring, @@ -160,11 +162,12 @@ void hashmap_create(HashMap* hm, int32 count, int32 element_size, RingMemory* ri hm->table = (uint16 *) data; chunk_init(&hm->buf, data + sizeof(uint16) * count, count, element_size, 8); - LOG_LEVEL_2("Created HashMap for %n elements with %n B per element = %n B", {{LOG_DATA_INT32, &count}, {LOG_DATA_INT32, &element_size}, {LOG_DATA_UINT64, &hm->buf.size}}); + LOG_FORMAT_2("Created HashMap for %n elements with %n B per element = %n B", {{LOG_DATA_INT32, &count}, {LOG_DATA_INT32, &element_size}, {LOG_DATA_UINT64, &hm->buf.size}}); } // WARNING: element_size = element size + remaining HashEntry data size -void hashmap_create(HashMap* hm, int32 count, int32 element_size, BufferMemory* buf) +inline +void hashmap_create(HashMap* hm, int32 count, int32 element_size, BufferMemory* buf) noexcept { byte* data = buffer_get_memory( buf, @@ -175,19 +178,21 @@ void hashmap_create(HashMap* hm, int32 count, int32 element_size, BufferMemory* hm->table = (uint16 *) data; chunk_init(&hm->buf, data + sizeof(uint16) * count, count, element_size, 8); - LOG_LEVEL_2("Created HashMap for %n elements with %n B per element = %n B", {{LOG_DATA_INT32, &count}, {LOG_DATA_INT32, &element_size}, {LOG_DATA_UINT64, &hm->buf.size}}); + LOG_FORMAT_2("Created HashMap for %n elements with %n B per element = %n B", {{LOG_DATA_INT32, &count}, {LOG_DATA_INT32, &element_size}, {LOG_DATA_UINT64, &hm->buf.size}}); } // WARNING: element_size = element size + remaining HashEntry data size -void hashmap_create(HashMap* hm, int32 count, int32 element_size, byte* buf) +inline +void hashmap_create(HashMap* hm, int32 count, int32 element_size, byte* buf) noexcept { hm->table = (uint16 *) buf; chunk_init(&hm->buf, buf + sizeof(uint16) * count, count, element_size, 8); - LOG_LEVEL_2("Created HashMap for %n elements with %n B per element = %n B", {{LOG_DATA_INT32, &count}, {LOG_DATA_INT32, &element_size}, {LOG_DATA_UINT64, &hm->buf.size}}); + LOG_FORMAT_2("Created HashMap for %n elements with %n B per element = %n B", {{LOG_DATA_INT32, &count}, {LOG_DATA_INT32, &element_size}, {LOG_DATA_UINT64, &hm->buf.size}}); } -void hashmap_update_data_pointer(HashMap* hm, byte* data) +inline +void hashmap_update_data_pointer(HashMap* hm, byte* data) noexcept { hm->table = (uint16 *) data; hm->buf.memory = data + sizeof(uint16) * hm->buf.count; @@ -195,7 +200,7 @@ void hashmap_update_data_pointer(HashMap* hm, byte* data) // Calculates how large a hashmap will be inline -int64 hashmap_size(int count, int32 element_size) +int64 hashmap_size(int count, int32 element_size) noexcept { return count * sizeof(element_size) // table + count * element_size // elements @@ -203,7 +208,7 @@ int64 hashmap_size(int count, int32 element_size) } inline -int64 hashmap_size(const HashMap* hm) +int64 hashmap_size(const HashMap* hm) noexcept { return hm->buf.count * sizeof(uint16) + hm->buf.size; } @@ -211,7 +216,7 @@ int64 hashmap_size(const HashMap* hm) ///////////////////////////// // string key ///////////////////////////// -void hashmap_insert(HashMap* hm, const char* key, int32 value) { +void hashmap_insert(HashMap* hm, const char* key, int32 value) noexcept { uint64 index = hash_djb2(key) % hm->buf.count; int32 element = chunk_reserve(&hm->buf, 1); @@ -233,7 +238,7 @@ void hashmap_insert(HashMap* hm, const char* key, int32 value) { *target = (uint16) (element + 1); } -void hashmap_insert(HashMap* hm, const char* key, int64 value) { +void hashmap_insert(HashMap* hm, const char* key, int64 value) noexcept { uint64 index = hash_djb2(key) % hm->buf.count; int32 element = chunk_reserve(&hm->buf, 1); @@ -255,7 +260,7 @@ void hashmap_insert(HashMap* hm, const char* key, int64 value) { *target = (uint16) (element + 1); } -void hashmap_insert(HashMap* hm, const char* key, uintptr_t value) { +void hashmap_insert(HashMap* hm, const char* key, uintptr_t value) noexcept { uint64 index = hash_djb2(key) % hm->buf.count; int32 element = chunk_reserve(&hm->buf, 1); @@ -277,7 +282,7 @@ void hashmap_insert(HashMap* hm, const char* key, uintptr_t value) { *target = (uint16) (element + 1); } -void hashmap_insert(HashMap* hm, const char* key, void* value) { +void hashmap_insert(HashMap* hm, const char* key, void* value) noexcept { uint64 index = hash_djb2(key) % hm->buf.count; int32 element = chunk_reserve(&hm->buf, 1); @@ -299,7 +304,7 @@ void hashmap_insert(HashMap* hm, const char* key, void* value) { *target = (uint16) (element + 1); } -void hashmap_insert(HashMap* hm, const char* key, f32 value) { +void hashmap_insert(HashMap* hm, const char* key, f32 value) noexcept { uint64 index = hash_djb2(key) % hm->buf.count; int32 element = chunk_reserve(&hm->buf, 1); @@ -321,7 +326,7 @@ void hashmap_insert(HashMap* hm, const char* key, f32 value) { *target = (uint16) (element + 1); } -void hashmap_insert(HashMap* hm, const char* key, const char* value) { +void hashmap_insert(HashMap* hm, const char* key, const char* value) noexcept { uint64 index = hash_djb2(key) % hm->buf.count; int32 element = chunk_reserve(&hm->buf, 1); @@ -345,7 +350,7 @@ void hashmap_insert(HashMap* hm, const char* key, const char* value) { *target = (uint16) (element + 1); } -HashEntry* hashmap_insert(HashMap* hm, const char* key, byte* value) { +HashEntry* hashmap_insert(HashMap* hm, const char* key, byte* value) noexcept { uint64 index = hash_djb2(key) % hm->buf.count; int32 element = chunk_reserve(&hm->buf, 1); @@ -372,7 +377,7 @@ HashEntry* hashmap_insert(HashMap* hm, const char* key, byte* value) { return entry; } -HashEntry* hashmap_reserve(HashMap* hm, const char* key) { +HashEntry* hashmap_reserve(HashMap* hm, const char* key) noexcept { uint64 index = hash_djb2(key) % hm->buf.count; int32 element = chunk_reserve(&hm->buf, 1); @@ -398,7 +403,7 @@ HashEntry* hashmap_reserve(HashMap* hm, const char* key) { } // Returns existing element or element to be filled -HashEntry* hashmap_get_reserve(HashMap* hm, const char* key) +HashEntry* hashmap_get_reserve(HashMap* hm, const char* key) noexcept { uint64 index = hash_djb2(key) % hm->buf.count; HashEntry* entry = (HashEntry *) chunk_get_element(&hm->buf, hm->table[index] - 1, false); @@ -437,12 +442,12 @@ HashEntry* hashmap_get_reserve(HashMap* hm, const char* key) // @performance Some places use this in order to iterate the hashmap that is horrible!!! Use the actual iterate function! inline -HashEntry* hashmap_get_entry_by_element(HashMap* hm, uint32 element) +HashEntry* hashmap_get_entry_by_element(HashMap* hm, uint32 element) noexcept { return (HashEntry *) chunk_get_element(&hm->buf, element - 1, false); } -HashEntry* hashmap_get_entry(HashMap* hm, const char* key) { +HashEntry* hashmap_get_entry(HashMap* hm, const char* key) noexcept { uint64 index = hash_djb2(key) % hm->buf.count; HashEntry* entry = (HashEntry *) chunk_get_element(&hm->buf, hm->table[index] - 1, false); @@ -458,7 +463,7 @@ HashEntry* hashmap_get_entry(HashMap* hm, const char* key) { return NULL; } -uint32 hashmap_get_element(const HashMap* hm, const char* key) { +uint32 hashmap_get_element(const HashMap* hm, const char* key) noexcept { uint64 index = hash_djb2(key) % hm->buf.count; const HashEntry* entry = (const HashEntry *) chunk_get_element((ChunkMemory *) &hm->buf, hm->table[index] - 1, false); @@ -478,14 +483,14 @@ uint32 hashmap_get_element(const HashMap* hm, const char* key) { } inline -uint32 hashmap_get_element_by_entry(const HashMap* hm, const HashEntry* entry) +uint32 hashmap_get_element_by_entry(const HashMap* hm, const HashEntry* entry) noexcept { return chunk_id_from_memory(&hm->buf, (byte *) entry) + 1; } // This function only saves one step (omission of the hash function) // The reason for this is in some cases we can use compile time hashing -HashEntry* hashmap_get_entry(HashMap* hm, const char* key, uint64 hash) { +HashEntry* hashmap_get_entry(HashMap* hm, const char* key, uint64 hash) noexcept { hash %= hm->buf.count; HashEntry* entry = (HashEntry *) chunk_get_element(&hm->buf, hm->table[hash] - 1, false); @@ -504,7 +509,7 @@ HashEntry* hashmap_get_entry(HashMap* hm, const char* key, uint64 hash) { // @performance If we had a doubly linked list we could delete keys much easier // However that would make insertion slower // Maybe we create a nother hashmap that is doubly linked -void hashmap_remove(HashMap* hm, const char* key) { +void hashmap_remove(HashMap* hm, const char* key) noexcept { uint64 index = hash_djb2(key) % hm->buf.count; HashEntry* entry = (HashEntry *) chunk_get_element(&hm->buf, hm->table[index] - 1, false); HashEntry* prev = NULL; @@ -533,7 +538,7 @@ void hashmap_remove(HashMap* hm, const char* key) { ///////////////////////////// // int key ///////////////////////////// -void hashmap_insert(HashMap* hm, int32 key, int32 value) { +void hashmap_insert(HashMap* hm, int32 key, int32 value) noexcept { uint64 index = key % hm->buf.count; int32 element = chunk_reserve(&hm->buf, 1); @@ -551,7 +556,7 @@ void hashmap_insert(HashMap* hm, int32 key, int32 value) { *target = (uint16) (element + 1); } -void hashmap_insert(HashMap* hm, int32 key, int64 value) { +void hashmap_insert(HashMap* hm, int32 key, int64 value) noexcept { uint64 index = key % hm->buf.count; int32 element = chunk_reserve(&hm->buf, 1); @@ -569,7 +574,7 @@ void hashmap_insert(HashMap* hm, int32 key, int64 value) { *target = (uint16) (element + 1); } -void hashmap_insert(HashMap* hm, int32 key, uintptr_t value) { +void hashmap_insert(HashMap* hm, int32 key, uintptr_t value) noexcept { uint64 index = key % hm->buf.count; int32 element = chunk_reserve(&hm->buf, 1); @@ -587,7 +592,7 @@ void hashmap_insert(HashMap* hm, int32 key, uintptr_t value) { *target = (uint16) (element + 1); } -void hashmap_insert(HashMap* hm, int32 key, void* value) { +void hashmap_insert(HashMap* hm, int32 key, void* value) noexcept { uint64 index = key % hm->buf.count; int32 element = chunk_reserve(&hm->buf, 1); @@ -605,7 +610,7 @@ void hashmap_insert(HashMap* hm, int32 key, void* value) { *target = (uint16) (element + 1); } -void hashmap_insert(HashMap* hm, int32 key, f32 value) { +void hashmap_insert(HashMap* hm, int32 key, f32 value) noexcept { uint64 index = key % hm->buf.count; int32 element = chunk_reserve(&hm->buf, 1); @@ -623,7 +628,7 @@ void hashmap_insert(HashMap* hm, int32 key, f32 value) { *target = (uint16) (element + 1); } -void hashmap_insert(HashMap* hm, int32 key, const char* value) { +void hashmap_insert(HashMap* hm, int32 key, const char* value) noexcept { uint64 index = key % hm->buf.count; int32 element = chunk_reserve(&hm->buf, 1); @@ -644,7 +649,7 @@ void hashmap_insert(HashMap* hm, int32 key, const char* value) { *target = (uint16) (element + 1); } -void hashmap_insert(HashMap* hm, int32 key, byte* value) { +void hashmap_insert(HashMap* hm, int32 key, byte* value) noexcept { uint64 index = key % hm->buf.count; int32 element = chunk_reserve(&hm->buf, 1); @@ -665,7 +670,7 @@ void hashmap_insert(HashMap* hm, int32 key, byte* value) { *target = (uint16) (element + 1); } -HashEntryKeyInt32* hashmap_get_entry(HashMap* hm, int32 key) { +HashEntryKeyInt32* hashmap_get_entry(HashMap* hm, int32 key) noexcept { uint64 index = key % hm->buf.count; HashEntryKeyInt32* entry = (HashEntryKeyInt32 *) chunk_get_element(&hm->buf, hm->table[index] - 1, false); @@ -683,7 +688,7 @@ HashEntryKeyInt32* hashmap_get_entry(HashMap* hm, int32 key) { // This function only saves one step (omission of the hash function) // The reason for this is in some cases we can use compile time hashing -HashEntryKeyInt32* hashmap_get_entry(HashMap* hm, int32 key, uint64 hash) { +HashEntryKeyInt32* hashmap_get_entry(HashMap* hm, int32 key, uint64 hash) noexcept { hash %= hm->buf.count; HashEntryKeyInt32* entry = (HashEntryKeyInt32 *) chunk_get_element(&hm->buf, hm->table[hash] - 1, false); @@ -702,7 +707,7 @@ HashEntryKeyInt32* hashmap_get_entry(HashMap* hm, int32 key, uint64 hash) { // @performance If we had a doubly linked list we could delete keys much easier // However that would make insertion slower // Maybe we create a nother hashmap that is doubly linked -void hashmap_remove(HashMap* hm, int32 key) { +void hashmap_remove(HashMap* hm, int32 key) noexcept { uint64 index = key % hm->buf.count; HashEntryKeyInt32* entry = (HashEntryKeyInt32 *) chunk_get_element(&hm->buf, hm->table[index] - 1, false); HashEntryKeyInt32* prev = NULL; @@ -729,7 +734,7 @@ void hashmap_remove(HashMap* hm, int32 key) { } inline -int32 hashmap_value_size(const HashMap* hm) +int32 hashmap_value_size(const HashMap* hm) noexcept { return (uint32) ( hm->buf.chunk_size @@ -850,7 +855,7 @@ int64 hashmap_load(HashMap* hm, const byte* data, [[maybe_unused]] int32 steps = } chunk_iterate_end; - LOG_LEVEL_2("Loaded HashMap: %n B", {{LOG_DATA_UINT64, &hm->buf.size}}); + LOG_FORMAT_2("Loaded HashMap: %n B", {{LOG_DATA_UINT64, &hm->buf.size}}); // How many bytes was read from data return sizeof(hm->buf.count) // hash map count = buffer count diff --git a/stdlib/PerfectHashMap.h b/stdlib/PerfectHashMap.h index 0620dd6..7b75ee5 100644 --- a/stdlib/PerfectHashMap.h +++ b/stdlib/PerfectHashMap.h @@ -119,7 +119,7 @@ void perfect_hashmap_create(PerfectHashMap* hm, int32 count, int32 element_size, 0, true ); - LOG_LEVEL_2("Created PerfectHashMap for %n elements with %n B per element", {{LOG_DATA_INT32, &count}, {LOG_DATA_INT32, &element_size}}); + LOG_FORMAT_2("Created PerfectHashMap for %n elements with %n B per element", {{LOG_DATA_INT32, &count}, {LOG_DATA_INT32, &element_size}}); } // WARNING: element_size = element size + remaining HashEntry data size @@ -129,7 +129,7 @@ void perfect_hashmap_create(PerfectHashMap* hm, int32 count, int32 element_size, hm->entry_size = element_size; hm->hash_entries = buf; - LOG_LEVEL_2("Created PerfectHashMap for %n elements with %n B per element", {{LOG_DATA_INT32, &count}, {LOG_DATA_INT32, &element_size}}); + LOG_FORMAT_2("Created PerfectHashMap for %n elements with %n B per element", {{LOG_DATA_INT32, &count}, {LOG_DATA_INT32, &element_size}}); } // Calculates how large a hashmap will be diff --git a/system/SystemInfo.h b/system/SystemInfo.h index 283d8a3..8318357 100644 --- a/system/SystemInfo.h +++ b/system/SystemInfo.h @@ -67,4 +67,18 @@ struct SystemInfo { int32 language; }; +enum RamChannelType { + RAM_CHANNEL_TYPE_FAILED, + RAM_CHANNEL_TYPE_SINGLE_CHANNEL, + RAM_CHANNEL_TYPE_CAN_UPGRADE, + RAM_CHANNEL_TYPE_DUAL_CHANNEL, +}; + +enum DriveType { + DRIVE_TYPE_UNKNOWN, + DRIVE_TYPE_NVME, + DRIVE_TYPE_SSD, + DRIVE_TYPE_HDD, +}; + #endif \ No newline at end of file diff --git a/thread/Thread.h b/thread/Thread.h index 154c479..0d45645 100644 --- a/thread/Thread.h +++ b/thread/Thread.h @@ -11,6 +11,7 @@ #include #include "../stdlib/Types.h" +#include "../log/Log.h" #include "Atomic.h" #if _WIN32 @@ -24,8 +25,7 @@ void thread_create(Worker* worker, ThreadJobFunc routine, void* arg) { - // @todo test to remove {} - LOG_LEVEL_2("Thread started", {}); + LOG_2("Thread started"); pthread_create(&worker->thread, NULL, routine, arg); } @@ -33,7 +33,7 @@ void thread_stop(Worker* worker) { atomic_set_acquire(&worker->state, 0); pthread_join(worker->thread, NULL); - LOG_LEVEL_2("Thread ended", {}); + LOG_2("Thread ended"); } #endif \ No newline at end of file diff --git a/thread/ThreadPool.h b/thread/ThreadPool.h index aea3533..2ec2d42 100644 --- a/thread/ThreadPool.h +++ b/thread/ThreadPool.h @@ -60,12 +60,12 @@ static THREAD_RETURN thread_pool_worker(void* arg) atomic_increment_relaxed(&pool->working_cnt); atomic_set_release(&work->state, 2); - LOG_LEVEL_2("ThreadPool worker started", {}); + LOG_FORMAT_2("ThreadPool worker started", {}); work->func(work); - LOG_LEVEL_2("ThreadPool worker ended", {}); + LOG_FORMAT_2("ThreadPool worker ended", {}); // At the end of a thread the ring memory automatically is considered freed DEBUG_MEMORY_FREE((uintptr_t) work->ring.memory, work->ring.size); - LOG_LEVEL_2("Freed thread RingMemory: %n B", {{LOG_DATA_UINT64, &work->ring.size}}); + LOG_FORMAT_2("Freed thread RingMemory: %n B", {{LOG_DATA_UINT64, &work->ring.size}}); atomic_set_release(&work->state, 1); // Job gets marked after completion -> can be overwritten now diff --git a/ui/UIInput.h b/ui/UIInput.h index 6d39661..de8cda5 100644 --- a/ui/UIInput.h +++ b/ui/UIInput.h @@ -206,7 +206,7 @@ int32 ui_input_element_update(UILayout* layout, UIElement* element) // Border if (input->border.thickness) { idx += vertex_rect_create( - layout->vertices_active + element->vertices_active_offset, zindex, + layout->vertices_active + element->vertices_active_offset, zindex, -1, dimension, input->dimension.alignment, input->border.color ); @@ -224,7 +224,7 @@ int32 ui_input_element_update(UILayout* layout, UIElement* element) // Background if (input->background.background_color) { idx += vertex_rect_create( - layout->vertices_active + element->vertices_active_offset + idx, zindex, + layout->vertices_active + element->vertices_active_offset + idx, zindex, -1, dimension, input->dimension.alignment, input->background.background_color ); diff --git a/ui/UILabel.h b/ui/UILabel.h index dc1143c..b096806 100644 --- a/ui/UILabel.h +++ b/ui/UILabel.h @@ -127,7 +127,7 @@ int32 ui_label_element_update(UILayout* layout, UIElement* element) UILabelState* state = (UILabelState *) (layout->data + element->state); return vertex_text_create( - layout->vertices_active + element->vertices_active_offset, element->zindex, + layout->vertices_active + element->vertices_active_offset, element->zindex, -1, label->dimension.dimension, label->font.alignment, layout->font, state->content, label->font.size, label->font.color diff --git a/ui/UILayout.cpp b/ui/UILayout.cpp index 5aefe4e..7e12b66 100644 --- a/ui/UILayout.cpp +++ b/ui/UILayout.cpp @@ -567,6 +567,8 @@ int32 layout_from_data( const byte* __restrict data, UILayout* __restrict layout ) { + PROFILE_VERBOSE(PROFILE_LAYOUT_FROM_DATA, ""); + const byte* in = data; int32 version = SWAP_ENDIAN_LITTLE(*((int32 *) in)); @@ -598,6 +600,8 @@ void layout_from_theme( UILayout* __restrict layout, const UIThemeStyle* __restrict theme ) { + PROFILE_VERBOSE(PROFILE_LAYOUT_FROM_THEME, ""); + // @todo Handle animations // @todo Handle vertices_active offset if (theme->font) { @@ -827,7 +831,7 @@ void ui_layout_update_dfs(UILayout* layout, UIElement* element, byte category = uint32 ui_layout_render_dfs( UILayout* layout, - UIElement* element, Vertex3DTextureColor* __restrict vertices, + UIElement* element, Vertex3DSamplerTextureColor* __restrict vertices, byte category = 0 ) { if (element->type == UI_ELEMENT_TYPE_MANUAL @@ -864,7 +868,7 @@ uint32 ui_layout_render_dfs( uint32 ui_layout_update_render_dfs( UILayout* layout, - UIElement* __restrict element, Vertex3DTextureColor* __restrict vertices, + UIElement* __restrict element, Vertex3DSamplerTextureColor* __restrict vertices, byte category = 0 ) { if (element->type == UI_ELEMENT_TYPE_MANUAL @@ -902,13 +906,13 @@ uint32 ui_layout_update_render_dfs( } inline -uint32 layout_element_from_location(UILayout* layout, uint16 x, uint16 y) +uint32 layout_element_from_location(UILayout* layout, uint16 x, uint16 y) noexcept { return layout->ui_chroma_codes[layout->width * y / 4 + x / 4]; } inline -UIElement* layout_get_element(const UILayout* __restrict layout, const char* __restrict element) +UIElement* layout_get_element(const UILayout* __restrict layout, const char* __restrict element) noexcept { HashEntryInt32* entry = (HashEntryInt32 *) hashmap_get_entry((HashMap *) &layout->hash_map, element); if (!entry) { @@ -919,13 +923,13 @@ UIElement* layout_get_element(const UILayout* __restrict layout, const char* __r } inline -void* layout_get_element_state(const UILayout* layout, UIElement* element) +void* layout_get_element_state(const UILayout* layout, UIElement* element) noexcept { return layout->data + element->state; } inline -void* layout_get_element_style(const UILayout* layout, UIElement* element, UIStyleType style_type) +void* layout_get_element_style(const UILayout* layout, UIElement* element, UIStyleType style_type) noexcept { if (!element) { return NULL; @@ -935,7 +939,7 @@ void* layout_get_element_style(const UILayout* layout, UIElement* element, UISty } inline -UIElement* layout_get_element_parent(const UILayout* layout, UIElement* element) +UIElement* layout_get_element_parent(const UILayout* layout, UIElement* element) noexcept { if (!element) { return NULL; @@ -945,7 +949,7 @@ UIElement* layout_get_element_parent(const UILayout* layout, UIElement* element) } inline -UIElement* layout_get_element_child(const UILayout* layout, UIElement* element, uint16 child) +UIElement* layout_get_element_child(const UILayout* layout, UIElement* element, uint16 child) noexcept { if (!element) { return NULL; diff --git a/ui/UILayout.h b/ui/UILayout.h index fe65289..b342285 100644 --- a/ui/UILayout.h +++ b/ui/UILayout.h @@ -106,7 +106,7 @@ struct UILayout { // The reason for this is that some elements may need different vertex counts for different states (e.g. input field) // WARNING: This memory is shared between different layouts uint32 active_vertex_size; - Vertex3DTextureColor* vertices_active; // Not the data owner (see data above) + Vertex3DSamplerTextureColor* vertices_active; // Not the data owner (see data above) // Used during the initialization so that every element knows where we currently are during the setup process uint32 active_vertex_offset; diff --git a/ui/UITheme.h b/ui/UITheme.h index de81859..d1f803b 100644 --- a/ui/UITheme.h +++ b/ui/UITheme.h @@ -49,7 +49,7 @@ UIAttributeGroup* theme_style_group(UIThemeStyle* theme, const char* group_name) } static inline -int compare_by_attribute_id(const void* a, const void* b) { +int compare_by_attribute_id(const void* __restrict a, const void* __restrict b) noexcept { UIAttribute* attr_a = (UIAttribute *) a; UIAttribute* attr_b = (UIAttribute *) b; @@ -267,6 +267,7 @@ int32 theme_from_data( const byte* __restrict data, UIThemeStyle* __restrict theme ) { + PROFILE_VERBOSE(PROFILE_THEME_FROM_THEME, ""); const byte* in = data; int32 version = SWAP_ENDIAN_LITTLE(*((int32 *) in)); diff --git a/utils/BitUtils.h b/utils/BitUtils.h index 504493b..d5d646f 100644 --- a/utils/BitUtils.h +++ b/utils/BitUtils.h @@ -60,7 +60,7 @@ struct BitWalk { }; inline -void bits_walk(BitWalk* stream, uint32 bits_to_walk) +void bits_walk(BitWalk* stream, uint32 bits_to_walk) noexcept { stream->bit_pos += bits_to_walk; stream->pos += stream->bit_pos / 8; @@ -68,7 +68,7 @@ void bits_walk(BitWalk* stream, uint32 bits_to_walk) } inline -void bits_flush(BitWalk* stream) +void bits_flush(BitWalk* stream) noexcept { if (stream->bit_pos > 0) { stream->bit_pos = 0; @@ -286,7 +286,7 @@ void bits_flush(BitWalk* stream) // } static -inline int32 find_first_set_bit(int32 value) { +inline int32 find_first_set_bit(int32 value) noexcept { if (value == 0) { return 0; } @@ -294,27 +294,27 @@ inline int32 find_first_set_bit(int32 value) { #if __GNUC__ || __clang__ return __builtin_ffs(value); #elif _MSC_VER - unsigned long index; // For _BitScanForward, an unsigned long is expected + unsigned long index; if (_BitScanForward(&index, value)) { - return (int32) index + 1; // Convert to 1-based index + return (int32) index + 1; } else { - return 0; // No set bit found + return 0; } #else - int32 index = 1; // Start at 1 for 1-based index + int32 index = 1; while (value) { if (value & 1) { return index; } - value >>= 1; // Shift right to check the next bit + value >>= 1; index++; } - return 0; // No set bit found + return 0; #endif } inline -uint32 bits_reverse(uint32 data, uint32 count) +uint32 bits_reverse(uint32 data, uint32 count) noexcept { uint32 reversed = 0; for (uint32 i = 0; i <= (count / 2); ++i) { @@ -345,7 +345,7 @@ static const int32 BIT_COUNT_LOOKUP_TABLE[256] = { 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8 }; -int32 bits_count(uint64 data, bool use_abm = false) { +int32 bits_count(uint64 data, bool use_abm = false) noexcept { if (use_abm) { return (int32) intrin_bits_count_64(data); } else { @@ -360,7 +360,7 @@ int32 bits_count(uint64 data, bool use_abm = false) { } } -int32 bits_count(uint32 data, bool use_abm = false) { +int32 bits_count(uint32 data, bool use_abm = false) noexcept { if (use_abm) { return intrin_bits_count_32(data); } else { @@ -371,12 +371,12 @@ int32 bits_count(uint32 data, bool use_abm = false) { } } -int32 bits_count(uint16 data) { +int32 bits_count(uint16 data) noexcept { return BIT_COUNT_LOOKUP_TABLE[data & 0xFF] + BIT_COUNT_LOOKUP_TABLE[(data >> 8) & 0xFF]; } -int32 bits_count(uint8 data) { +int32 bits_count(uint8 data) noexcept { return BIT_COUNT_LOOKUP_TABLE[data]; } diff --git a/utils/EndianUtils.h b/utils/EndianUtils.h index 5ee1509..d01a94a 100644 --- a/utils/EndianUtils.h +++ b/utils/EndianUtils.h @@ -26,26 +26,26 @@ #endif inline -bool is_little_endian() +bool is_little_endian() noexcept { uint32 num = 1; return ((int32) (*(char *) & num)) == 1; } inline -uint16 endian_swap(uint16 val) +uint16 endian_swap(uint16 val) noexcept { return ((val << 8) | (val >> 8)); } inline -int16 endian_swap(int16 val) +int16 endian_swap(int16 val) noexcept { return (int16) ((val << 8) | (val >> 8)); } inline -uint32 endian_swap(uint32 val) +uint32 endian_swap(uint32 val) noexcept { return ((val << 24) | ((val & 0xFF00) << 8) @@ -54,7 +54,7 @@ uint32 endian_swap(uint32 val) } inline -int32 endian_swap(int32 val) +int32 endian_swap(int32 val) noexcept { return (int32) ((val << 24) | ((val & 0xFF00) << 8) @@ -63,7 +63,7 @@ int32 endian_swap(int32 val) } inline -uint64 endian_swap(uint64 val) +uint64 endian_swap(uint64 val) noexcept { return ((val << 56) | ((val & 0x000000000000FF00ULL) << 40) @@ -76,7 +76,7 @@ uint64 endian_swap(uint64 val) } inline -int64 endian_swap(int64 val) +int64 endian_swap(int64 val) noexcept { return (int64) ((val << 56) | ((val & 0x000000000000FF00ULL) << 40) @@ -89,13 +89,13 @@ int64 endian_swap(int64 val) } inline -f32 endian_swap(f32 val) +f32 endian_swap(f32 val) noexcept { return (f32) endian_swap(val); } inline -f64 endian_swap(f64 val) +f64 endian_swap(f64 val) noexcept { return (f64) endian_swap(val); } diff --git a/utils/StringUtils.h b/utils/StringUtils.h index 647d2f4..b955e7b 100644 --- a/utils/StringUtils.h +++ b/utils/StringUtils.h @@ -17,7 +17,7 @@ #define HAS_CHAR(x, c) (HAS_ZERO((x) ^ (((size_t)-1 / 0xFF) * (c)))) inline constexpr -size_t str_length(const char* str) +size_t str_length(const char* str) noexcept { const char* ptr = str; @@ -50,7 +50,7 @@ size_t str_length(const char* str) } } -const char* str_find(const char* str, const char* needle) { +const char* str_find(const char* str, const char* needle) noexcept { size_t needle_len = str_length(needle); size_t str_len = str_length(str); size_t limit = str_len - needle_len + 1; @@ -64,7 +64,7 @@ const char* str_find(const char* str, const char* needle) { return NULL; } -const char* str_find(const char* str, char needle) { +const char* str_find(const char* str, char needle) noexcept { byte target = (byte) needle; // Process byte-by-byte until alignment is achieved @@ -108,7 +108,7 @@ const char* str_find(const char* str, char needle) { } inline -int32 utf8_encode(uint32 codepoint, char* out) +int32 utf8_encode(uint32 codepoint, char* out) noexcept { if (codepoint <= 0x7F) { // 1-byte sequence: 0xxxxxxx @@ -142,7 +142,7 @@ int32 utf8_encode(uint32 codepoint, char* out) } inline -int32 utf8_decode(const char* __restrict in, uint32* __restrict codepoint) { +int32 utf8_decode(const char* __restrict in, uint32* __restrict codepoint) noexcept { byte ch = (byte) *in; if (ch <= 0x7F) { @@ -171,7 +171,7 @@ int32 utf8_decode(const char* __restrict in, uint32* __restrict codepoint) { } inline -int32 utf8_decode(const uint32 codepoint, char* __restrict out) { +int32 utf8_decode(const uint32 codepoint, char* __restrict out) noexcept { if (codepoint <= 0x7F) { // 1-byte sequence (ASCII) out[0] = (char) codepoint; @@ -204,7 +204,7 @@ int32 utf8_decode(const uint32 codepoint, char* __restrict out) { } inline -int32 utf8_str_length(const char* in) { +int32 utf8_str_length(const char* in) noexcept { int32 length = 0; int32 bytes; uint32 codepoint; @@ -223,7 +223,7 @@ int32 utf8_str_length(const char* in) { } inline -void string_to_utf8(const uint32* in, char* out) { +void string_to_utf8(const uint32* in, char* out) noexcept { char buffer[5] = {0}; while (*in) { int32 len = utf8_encode(*in, buffer); @@ -236,7 +236,7 @@ void string_to_utf8(const uint32* in, char* out) { } inline -int32 utf8_get_char_at(const char* in, int32 index) { +int32 utf8_get_char_at(const char* in, int32 index) noexcept { int32 i = 0; int32 bytes_consumed; uint32 codepoint; @@ -259,7 +259,7 @@ int32 utf8_get_char_at(const char* in, int32 index) { } inline -void wchar_to_char(wchar_t* str) +void wchar_to_char(wchar_t* str) noexcept { char* src = (char*) str; char* dest = src; @@ -276,7 +276,7 @@ void wchar_to_char(wchar_t* str) } inline -void wchar_to_char(const char* __restrict str, char* __restrict dest) +void wchar_to_char(const char* __restrict str, char* __restrict dest) noexcept { while (*str != '\0' && str[1] != '\0') { if (*str != '\0') { @@ -325,12 +325,12 @@ static const bool STR_IS_ALPHA_LOOKUP_TABLE[] = { }; inline constexpr -bool str_is_alpha(char str) { +bool str_is_alpha(char str) noexcept { return STR_IS_ALPHA_LOOKUP_TABLE[(byte) str]; } inline constexpr -bool str_is_alpha(const char* str) { +bool str_is_alpha(const char* str) noexcept { while (*str != '\0') { if (!str_is_alpha(*str++)) { return false; @@ -376,7 +376,7 @@ static const bool STR_IS_NUM_LOOKUP_TABLE[] = { }; inline constexpr -bool str_is_num(char str) { +bool str_is_num(char str) noexcept { return STR_IS_NUM_LOOKUP_TABLE[(byte) str]; } @@ -416,12 +416,12 @@ static const bool STR_IS_ALPHANUM_LOOKUP_TABLE[] = { }; inline constexpr -bool str_is_alphanum(char str) { +bool str_is_alphanum(char str) noexcept { return STR_IS_ALPHANUM_LOOKUP_TABLE[(byte) str]; } inline -bool str_is_alphanum(const char* str) { +bool str_is_alphanum(const char* str) noexcept { while (*str != '\0') { if (!str_is_alphanum(*str++)) { return false; @@ -432,7 +432,7 @@ bool str_is_alphanum(const char* str) { } inline -bool str_is_float(const char* str) { +bool str_is_float(const char* str) noexcept { bool has_dot = false; if (*str == '-' || *str == '+') { @@ -457,7 +457,7 @@ bool str_is_float(const char* str) { } inline -bool str_is_integer(const char* str) { +bool str_is_integer(const char* str) noexcept { if (*str == '-' || *str == '+') { [[unlikely]] str++; } @@ -476,7 +476,7 @@ bool str_is_integer(const char* str) { } inline constexpr -int64 str_to_int(const char* str, const char** pos = NULL) +int64 str_to_int(const char* str, const char** pos = NULL) noexcept { int64 sign = 1; if (*str == '-') { @@ -500,7 +500,7 @@ int64 str_to_int(const char* str, const char** pos = NULL) } inline -int32 int_to_str(int64 number, char str[15], const char thousands) +int32 int_to_str(int64 number, char str[15], const char thousands) noexcept { if (number == 0) { *str++ = '0'; @@ -543,7 +543,7 @@ int32 int_to_str(int64 number, char str[15], const char thousands) } inline constexpr -int32 int_to_str(int64 number, char str[12]) { +int32 int_to_str(int64 number, char str[12]) noexcept { int32 i = -1; int64 sign = number; @@ -572,7 +572,7 @@ int32 int_to_str(int64 number, char str[12]) { } inline constexpr -int32 uint_to_str(uint64 number, char str[12]) { +int32 uint_to_str(uint64 number, char str[12]) noexcept { int32 i = -1; do { @@ -632,7 +632,7 @@ static const bool HEX_LOOKUP_TABLE[256] = { }; inline -bool str_is_hex_color(const char* str) +bool str_is_hex_color(const char* str) noexcept { if (str[0] != '#') { return false; @@ -652,7 +652,7 @@ bool str_is_hex_color(const char* str) } inline constexpr -int32 int_to_hex(int64 number, char str[9]) { +int32 int_to_hex(int64 number, char str[9]) noexcept { int32 i = -1; uint64 n = (uint64) number; @@ -674,7 +674,7 @@ int32 int_to_hex(int64 number, char str[9]) { } inline constexpr -int64 hex_to_int(const char* hex) +int64 hex_to_int(const char* hex) noexcept { int64 result = 0; while (HEX_LOOKUP_TABLE[(byte) *hex]) { @@ -694,7 +694,7 @@ int64 hex_to_int(const char* hex) } inline -size_t str_count(const char* __restrict str, const char* __restrict substr) +size_t str_count(const char* __restrict str, const char* __restrict substr) noexcept { size_t l1 = str_length(str); size_t l2 = str_length(substr); @@ -712,7 +712,7 @@ size_t str_count(const char* __restrict str, const char* __restrict substr) } inline constexpr -int32 is_eol(const char* str) +int32 is_eol(const char* str) noexcept { if (*str == '\n') { [[unlikely]] return 1; @@ -724,7 +724,7 @@ int32 is_eol(const char* str) } inline -int32 str_copy_until(char* __restrict dest, const char* __restrict src, char delim) +int32 str_copy_until(char* __restrict dest, const char* __restrict src, char delim) noexcept { int32 len = 0; while (*src != delim && *src != '\0') { @@ -739,7 +739,7 @@ int32 str_copy_until(char* __restrict dest, const char* __restrict src, char del // @todo Inconsistent parameter order of dest and src with other functions inline -void str_copy_until(const char* __restrict src, char* __restrict dest, const char* __restrict delim) +void str_copy_until(const char* __restrict src, char* __restrict dest, const char* __restrict delim) noexcept { size_t len = str_length(delim); @@ -758,7 +758,7 @@ void str_copy_until(const char* __restrict src, char* __restrict dest, const cha } inline -void str_copy_short(char* __restrict dest, const char* __restrict src, int32 length) +void str_copy_short(char* __restrict dest, const char* __restrict src, int32 length) noexcept { int32 i = -1; while (*src != '\0' && ++i < length) { @@ -769,7 +769,7 @@ void str_copy_short(char* __restrict dest, const char* __restrict src, int32 len } inline -void str_copy_short(char* __restrict dest, const char* __restrict src) +void str_copy_short(char* __restrict dest, const char* __restrict src) noexcept { while (*src != '\0') { *dest++ = *src++; @@ -779,7 +779,7 @@ void str_copy_short(char* __restrict dest, const char* __restrict src) } inline -void str_copy_long(char* __restrict dest, const char* __restrict src) +void str_copy_long(char* __restrict dest, const char* __restrict src) noexcept { char* d = dest; const char *s = src; @@ -809,7 +809,7 @@ void str_copy_long(char* __restrict dest, const char* __restrict src) } inline -void str_copy_move_until(const char** __restrict src, char* __restrict dest, char delim) +void str_copy_move_until(const char** __restrict src, char* __restrict dest, char delim) noexcept { while (**src != delim && **src != '\0') { *dest++ = **src; @@ -820,7 +820,7 @@ void str_copy_move_until(const char** __restrict src, char* __restrict dest, cha } inline -void str_copy_move_until(const char** __restrict src, char* __restrict dest, const char* __restrict delim) +void str_copy_move_until(const char** __restrict src, char* __restrict dest, const char* __restrict delim) noexcept { size_t len = str_length(delim); @@ -840,7 +840,7 @@ void str_copy_move_until(const char** __restrict src, char* __restrict dest, con } inline -int32 strcpy_to_eol(const char* src, char* dst) +int32 strcpy_to_eol(const char* src, char* dst) noexcept { int32 offset = 0; while (!is_eol(src) && *src != '\0') { @@ -854,7 +854,7 @@ int32 strcpy_to_eol(const char* src, char* dst) } inline -char* strsep(const char** sp, const char* sep) +char* strsep(const char** sp, const char* sep) noexcept { char* p, *s; @@ -879,7 +879,7 @@ str_concat_new( char* dst, const char* src1, const char* src2 -) { +) noexcept { while (*src1) { *dst++ = *src1++; } while (*src2) { *dst++ = *src2++; } @@ -887,7 +887,7 @@ str_concat_new( } inline void -str_concat_append(char* dst, const char* src) +str_concat_append(char* dst, const char* src) noexcept { while (*dst) { ++dst; @@ -897,7 +897,7 @@ str_concat_append(char* dst, const char* src) } inline void -str_concat_new(char* dst, const char* src1, const char* src2, const char* src3) +str_concat_new(char* dst, const char* src1, const char* src2, const char* src3) noexcept { while (*src1) { *dst++ = *src1++; } while (*src2) { *dst++ = *src2++; } @@ -907,7 +907,7 @@ str_concat_new(char* dst, const char* src1, const char* src2, const char* src3) } inline int64 -str_concat_append(char* dst, size_t dst_length, const char* src, size_t src_length) +str_concat_append(char* dst, size_t dst_length, const char* src, size_t src_length) noexcept { memcpy(&dst[dst_length], src, src_length); dst[dst_length + src_length] = '\0'; @@ -916,7 +916,7 @@ str_concat_append(char* dst, size_t dst_length, const char* src, size_t src_leng } inline void -str_concat_append(char* dst, size_t dst_length, const char* src) +str_concat_append(char* dst, size_t dst_length, const char* src) noexcept { str_copy_short(&dst[dst_length], src); } @@ -926,7 +926,7 @@ str_concat_new( char* dst, const char* src1, size_t src1_length, const char* src2, size_t src2_length -) { +) noexcept { memcpy(dst, src1, src1_length); dst += src1_length; @@ -943,7 +943,7 @@ void str_concat_new( char* dst, const char* src, size_t src_length, int64 data -) { +) noexcept { memcpy(dst, src, src_length); int32 len = int_to_str(data, dst + src_length); @@ -954,13 +954,13 @@ inline void str_concat_append( char* dst, int64 data -) { +) noexcept { size_t dst_len = str_length(dst); int_to_str(data, dst + dst_len); } inline void -str_concat_new(char* dst, const char* src, int64 data) +str_concat_new(char* dst, const char* src, int64 data) noexcept { size_t src_len = str_length(src); memcpy(dst, src, src_len); @@ -969,7 +969,7 @@ str_concat_new(char* dst, const char* src, int64 data) } inline -void str_insert(char* __restrict dst, size_t insert_pos, const char* __restrict src) { +void str_insert(char* __restrict dst, size_t insert_pos, const char* __restrict src) noexcept { size_t src_length = str_length(src); size_t dst_length = str_length(dst); memcpy(dst + insert_pos + src_length, dst + insert_pos, dst_length - insert_pos + 1); @@ -977,13 +977,13 @@ void str_insert(char* __restrict dst, size_t insert_pos, const char* __restrict } inline -void str_remove(char* __restrict dst, size_t remove_pos, size_t remove_length) { +void str_remove(char* __restrict dst, size_t remove_pos, size_t remove_length) noexcept { size_t src_length = str_length(dst); memmove(dst + remove_pos, dst + remove_pos + remove_length, src_length - (remove_pos + remove_length) + 1); } inline -char* strtok(char* str, const char* __restrict delim, char* *key) { +char* strtok(char* str, const char* __restrict delim, char* *key) noexcept { char* result; if (str == NULL) { str = *key; @@ -1007,13 +1007,13 @@ char* strtok(char* str, const char* __restrict delim, char* *key) { } inline constexpr -char toupper_ascii(char c) +char toupper_ascii(char c) noexcept { return c - 32 * (c >= 'a' && c <= 'z'); } inline -void toupper_ascii(char* str) +void toupper_ascii(char* str) noexcept { while (*str != '\0') { *str -= 32 * (*str >= 'a' && *str <= 'z'); @@ -1022,13 +1022,13 @@ void toupper_ascii(char* str) } inline constexpr -char tolower_ascii(char c) +char tolower_ascii(char c) noexcept { return c + 32 * (c >= 'A' && c <= 'Z'); } inline -void tolower_ascii(char* str) +void tolower_ascii(char* str) noexcept { while (*str != '\0') { *str += 32 * (*str >= 'A' && *str <= 'Z'); @@ -1036,31 +1036,32 @@ void tolower_ascii(char* str) } } -inline constexpr -void create_const_name(const byte* name, char* modified_name) +constexpr inline +bool str_contains(const char* haystack, const char* needle) noexcept { - size_t i = 0; - while (*name != '\0') { - modified_name[i] = *name == ' ' ? '_' : toupper_ascii(*name); - ++name; - ++i; + // @performance would it make sense to only check until haystack - strlen(needle)? + // I'm not sure the strlen overhead is worth it + while (*haystack != '\0') { + const char* p1 = haystack; + const char* p2 = needle; + + while (*p1 != '\0' && *p2 != '\0' && *p1 == *p2) { + ++p1; + ++p2; + } + + if (*p2 == '\0') { + return true; + } + + ++haystack; } - modified_name[i] = '\0'; -} - -inline -void create_const_name(byte* name) -{ - while (*name != '\0') { - *name = *name == ' ' ? '_' : toupper_ascii(*name); - } - - *name = '\0'; + return false; } constexpr inline -int32 str_compare(const char* str1, const char* str2) +int32 str_compare(const char* str1, const char* str2) noexcept { byte c1, c2; @@ -1072,7 +1073,7 @@ int32 str_compare(const char* str1, const char* str2) return c1 - c2; } -int32 str_compare(const char* str1, const char* str2, size_t n) +int32 str_compare(const char* str1, const char* str2, size_t n) noexcept { byte c1 = '\0'; byte c2 = '\0'; @@ -1128,7 +1129,7 @@ int32 str_compare(const char* str1, const char* str2, size_t n) } inline constexpr -bool str_ends_with(const char* str, const char* suffix) { +bool str_ends_with(const char* str, const char* suffix) noexcept { if (!str || !suffix) { return false; } @@ -1144,7 +1145,7 @@ bool str_ends_with(const char* str, const char* suffix) { } // WARNING: result needs to have the correct length -void str_replace(const char* str, const char* __restrict search, const char* __restrict replace, char* result) { +void str_replace(const char* str, const char* __restrict search, const char* __restrict replace, char* result) noexcept { if (str == NULL || search == NULL || replace == NULL || result == NULL) { return; } @@ -1197,13 +1198,13 @@ void print_bytes(const void* ptr, size_t size) */ inline constexpr -bool is_whitespace(char str) +bool is_whitespace(char str) noexcept { return str == ' ' || str == '\t'; } inline -int32 str_to_eol(const char* str) +int32 str_to_eol(const char* str) noexcept { int32 offset = 0; while (!is_eol(str) && *str++ != '\0') { @@ -1214,7 +1215,7 @@ int32 str_to_eol(const char* str) } inline -int32 str_to(const char* str, char delim) +int32 str_to(const char* str, char delim) noexcept { int32 offset = 0; while (*str != delim && *str++ != '\0') { @@ -1225,7 +1226,7 @@ int32 str_to(const char* str, char delim) } inline -void str_move_to(const char** str, char delim) +void str_move_to(const char** str, char delim) noexcept { while (**str != delim && **str != '\0') { ++(*str); @@ -1234,17 +1235,15 @@ void str_move_to(const char** str, char delim) // Negative pos counts backwards inline -void str_move_to_pos(const char** str, int32 pos) +void str_move_to_pos(const char** str, int32 pos) noexcept { - if (pos >= 0) { - *str += pos; - } else { - (*str) += OMS_MAX(((int32) str_length(*str) + pos), 0); - } + *str += pos >= 0 + ? pos + : OMS_MAX(((int32) str_length(*str) + pos), 0); } inline -void str_move_past(const char** str, char delim) +void str_move_past(const char** str, char delim) noexcept { while (**str != delim && **str != '\0') { ++(*str); @@ -1256,7 +1255,7 @@ void str_move_past(const char** str, char delim) } inline -void str_move_past_alpha_num(const char** str) +void str_move_past_alpha_num(const char** str) noexcept { while (str_is_alphanum(**str) || **str == 45 || **str == 95 @@ -1266,13 +1265,13 @@ void str_move_past_alpha_num(const char** str) } inline -bool str_is_comment(const char* str) +bool str_is_comment(const char* str) noexcept { return (*str == '/' && str[1] == '/') || (*str == '/' && str[1] == '*'); } inline -void str_skip(const char** str, char delim) +void str_skip(const char** str, char delim) noexcept { while (**str && **str == delim) { ++(*str); @@ -1280,7 +1279,7 @@ void str_skip(const char** str, char delim) } inline -void str_skip_whitespace(const char** str) +void str_skip_whitespace(const char** str) noexcept { while (**str && (**str == ' ' || **str == '\t')) { ++(*str); @@ -1288,7 +1287,7 @@ void str_skip_whitespace(const char** str) } inline -void str_skip_empty(const char** str) +void str_skip_empty(const char** str) noexcept { while (**str == ' ' || **str == '\t' || **str == '\n' || **str == '\r') { ++(*str); @@ -1296,7 +1295,7 @@ void str_skip_empty(const char** str) } inline -void str_skip_non_empty(const char** str) +void str_skip_non_empty(const char** str) noexcept { while (**str != ' ' && **str != '\t' && **str != '\n' && **str != '\0') { ++(*str); @@ -1304,7 +1303,7 @@ void str_skip_non_empty(const char** str) } inline -void str_skip_list(const char** __restrict str, const char* __restrict delim, int32 len) +void str_skip_list(const char** __restrict str, const char* __restrict delim, int32 len) noexcept { bool run = true; while (run && **str != '\0') { @@ -1322,7 +1321,7 @@ void str_skip_list(const char** __restrict str, const char* __restrict delim, in } inline -void str_skip_until_list(const char** __restrict str, const char* __restrict delim) +void str_skip_until_list(const char** __restrict str, const char* __restrict delim) noexcept { while (**str != '\0') { const char* delim_temp = delim; @@ -1339,7 +1338,7 @@ void str_skip_until_list(const char** __restrict str, const char* __restrict del } inline -void hexstr_to_rgba(v4_f32* rgba, const char* hex) +void hexstr_to_rgba(v4_f32* rgba, const char* hex) noexcept { if (*hex == '#') { ++hex; @@ -1353,7 +1352,7 @@ void hexstr_to_rgba(v4_f32* rgba, const char* hex) } inline constexpr -void str_pad_right(const char* input, char* output, char pad, size_t len) { +void str_pad_right(const char* input, char* output, char pad, size_t len) noexcept { size_t i = 0; for (; i < len && input[i] != '\0'; ++i) { output[i] = input[i]; @@ -1365,7 +1364,7 @@ void str_pad_right(const char* input, char* output, char pad, size_t len) { } inline constexpr -void str_pad_left(const char* input, char* output, char pad, size_t len) { +void str_pad_left(const char* input, char* output, char pad, size_t len) noexcept { size_t input_len = str_length(input); size_t i = 0; @@ -1379,7 +1378,7 @@ void str_pad_left(const char* input, char* output, char pad, size_t len) { } inline -f32 str_to_float(const char* str, const char** pos = NULL) +f32 str_to_float(const char* str, const char** pos = NULL) noexcept { const char *p = str; f32 result = 0.0f; @@ -1430,7 +1429,7 @@ f32 str_to_float(const char* str, const char** pos = NULL) } inline -int32 float_to_str(f64 value, char* buffer, int32 precision = 5) +int32 float_to_str(f64 value, char* buffer, int32 precision = 5) noexcept { ASSERT_SIMPLE(precision < 6); @@ -1481,7 +1480,7 @@ int32 float_to_str(f64 value, char* buffer, int32 precision = 5) } inline -void format_time_hh_mm_ss(char time_str[9], int32 hours, int32 minutes, int32 secs) { +void format_time_hh_mm_ss(char time_str[9], int32 hours, int32 minutes, int32 secs) noexcept { time_str[0] = (char) ('0' + (hours / 10)); time_str[1] = (char) ('0' + (hours % 10)); time_str[2] = ':'; @@ -1494,7 +1493,7 @@ void format_time_hh_mm_ss(char time_str[9], int32 hours, int32 minutes, int32 se } inline -void format_time_hh_mm_ss(char time_str[9], uint64 time) { +void format_time_hh_mm_ss(char time_str[9], uint64 time) noexcept { int32 hours = (time / 3600) % 24; int32 minutes = (time / 60) % 60; int32 secs = time % 60; @@ -1503,7 +1502,7 @@ void format_time_hh_mm_ss(char time_str[9], uint64 time) { } inline -void format_time_hh_mm(char time_str[6], int32 hours, int32 minutes) { +void format_time_hh_mm(char time_str[6], int32 hours, int32 minutes) noexcept { time_str[0] = (char) ('0' + (hours / 10)); time_str[1] = (char) ('0' + (hours % 10)); time_str[2] = ':'; @@ -1513,14 +1512,14 @@ void format_time_hh_mm(char time_str[6], int32 hours, int32 minutes) { } inline -void format_time_hh_mm(char time_str[6], uint64 time) { +void format_time_hh_mm(char time_str[6], uint64 time) noexcept { int32 hours = (time / 3600) % 24; int32 minutes = (time / 60) % 60; format_time_hh_mm(time_str, hours, minutes); } -void sprintf_fast(char* __restrict buffer, const char* __restrict format, ...) { +void sprintf_fast(char* __restrict buffer, const char* __restrict format, ...) noexcept { va_list args; va_start(args, format); @@ -1594,7 +1593,7 @@ void sprintf_fast(char* __restrict buffer, const char* __restrict format, ...) { va_end(args); } -void sprintf_fast(char* __restrict buffer, int32 buffer_length, const char* __restrict format, ...) { +void sprintf_fast(char* __restrict buffer, int32 buffer_length, const char* __restrict format, ...) noexcept { va_list args; va_start(args, format); @@ -1677,7 +1676,7 @@ void sprintf_fast(char* __restrict buffer, int32 buffer_length, const char* __re } // There are situations where you only want to replace a certain amount of % -void sprintf_fast_iter(char* buffer, const char* format, ...) { +void sprintf_fast_iter(char* buffer, const char* format, ...) noexcept { va_list args; va_start(args, format); diff --git a/utils/TestUtils.h b/utils/TestUtils.h index 5727175..ea7c313 100644 --- a/utils/TestUtils.h +++ b/utils/TestUtils.h @@ -9,35 +9,12 @@ #ifndef TOS_UTILS_TEST_UTILS_H #define TOS_UTILS_TEST_UTILS_H -#include "../architecture/Intrinsics.h" - #if DEBUG - #define ASSERT_SIMPLE(a) \ - if (!(a)) \ - { \ - *(volatile int *)0 = 0; \ - } - - #define ASSERT_SIMPLE_CONST(a) \ - if constexpr (!(a)) \ - { \ - *(volatile int *)0 = 0; \ - } - - #define ASSERT_PERFORMANCE_START(time_start) \ - ({ \ - time_start = intrin_timestamp_counter(); \ - }) - - #define ASSERT_PERFORMANCE_END(time_start, max_duration) \ - ({ \ - ASSERT_SIMPLE(intrin_timestamp_counter() - (time_start) <= (max_duration)); \ - }) + #define ASSERT_SIMPLE(a) if (!(a)) { *(volatile int *)0 = 0; } + #define ASSERT_SIMPLE_CONST(a) if constexpr (!(a)) { *(volatile int *)0 = 0; } #else #define ASSERT_SIMPLE(a) ((void)0) #define ASSERT_SIMPLE_CONST(a) ((void)0) - #define ASSERT_PERFORMANCE_START(time_start) ((void)0) - #define ASSERT_PERFORMANCE_END(time_start, max_duration) ((void)0) #endif #endif diff --git a/utils/Utils.h b/utils/Utils.h index efee627..f082c03 100644 --- a/utils/Utils.h +++ b/utils/Utils.h @@ -36,7 +36,7 @@ struct FileBody { }; FORCE_INLINE -bool is_equal(const byte* __restrict region1, const byte* __restrict region2, uint64 size) +bool is_equal(const byte* __restrict region1, const byte* __restrict region2, uint64 size) noexcept { return memcmp(region1, region2, size) == 0; } @@ -57,7 +57,7 @@ void str_output(const char* __restrict str, ...) { } inline -void swap_memory(void* __restrict a, void* __restrict b, size_t size) { +void swap_memory(void* __restrict a, void* __restrict b, size_t size) noexcept { byte* p = (byte*) a; byte* q = (byte*) b;