From 2ecb47117b5f34d387d2aba2f48946ce8888f174 Mon Sep 17 00:00:00 2001 From: Dennis Eichhorn Date: Tue, 7 Jan 2025 20:47:16 +0100 Subject: [PATCH] huge re-write of ams. impl. of command buffer, ... --- animation/Animation.h | 457 +++++++------- asset/Asset.h | 64 +- asset/AssetArchive.h | 68 ++- asset/AssetManagementSystem.h | 708 +++++++++++++--------- asset/AssetType.h | 2 +- audio/Audio.cpp | 7 +- audio/AudioMixer.h | 65 +- audio/QoaSimd.h | 2 +- command/AppCmdBuffer.cpp | 491 +++++++++++++++ command/AppCmdBuffer.h | 60 ++ command/Command.h | 35 ++ entity/AnimationEntity.h | 78 --- entity/AnimationEntityComponent.h | 69 +++ entity/CursorEntity.h | 20 + entity/Entity.h | 34 +- entity/EntityComponentSystem.h | 120 +++- entity/EntitySize.h | 28 + entity/EntityType.h | 21 - font/Font.h | 23 +- gpuapi/GpuApiType.h | 19 + gpuapi/ShaderType.h | 25 + gpuapi/direct3d/GpuApiContainer.h | 4 +- gpuapi/direct3d/Shader.h | 20 + gpuapi/opengl/AppCmdBuffer.h | 66 ++ gpuapi/opengl/GpuApiContainer.h | 4 +- gpuapi/opengl/OpenglUtils.h | 9 +- gpuapi/opengl/Shader.h | 6 +- gpuapi/opengl/ShaderUtils.h | 14 + gpuapi/vulkan/GpuApiContainer.h | 4 +- gpuapi/vulkan/Shader.h | 20 + gpuapi/vulkan/ShaderUtils.h | 2 +- gpuapi/vulkan/VulkanUtils.h | 2 +- image/Image.cpp | 18 +- localization/Language.h | 7 +- log/Debug.cpp | 57 +- log/Debug.h | 6 +- log/DebugMemory.h | 13 +- log/Log.h | 2 +- log/TimingStat.h | 4 +- math/matrix/VectorFloat32.h | 7 +- math/matrix/VectorFloat64.h | 7 +- math/matrix/VectorInt32.h | 7 +- math/matrix/VectorInt64.h | 7 +- memory/BufferMemory.h | 7 +- memory/ChunkMemory.h | 300 ++++----- memory/Heap.h | 7 +- memory/RingMemory.h | 16 +- memory/ThreadedChunkMemory.h | 11 +- memory/ThreadedQueue.h | 10 +- memory/ThreadedRingMemory.h | 7 +- models/mob/MobStats.cpp | 2 +- models/mob/PrimaryStatsPoints.cpp | 2 +- models/mob/SecondaryStatsPoints.cpp | 2 +- module/Module.h | 5 +- module/ModuleManager.h | 3 +- network/Socket.h | 18 + object/Animation.h | 7 +- object/Hitbox.h | 7 +- object/Material.h | 7 +- object/Mesh.h | 32 +- platform/linux/Allocator.h | 1 - platform/linux/FileUtils.cpp | 2 +- platform/linux/{Library.h => Library.cpp} | 18 +- platform/linux/SystemInfo.cpp | 4 +- platform/win32/Allocator.h | 5 +- platform/win32/FileUtils.cpp | 2 +- platform/win32/{Library.h => Library.cpp} | 16 +- platform/win32/SystemInfo.cpp | 6 +- platform/win32/threading/Spinlock.cpp | 1 + stdlib/HashMap.h | 475 +++++++++++++-- stdlib/PerfectHashMap.h | 14 +- stdlib/{simd => }/SIMD_Helper.h | 4 +- stdlib/Simd.h | 24 + stdlib/ThreadedHashMap.h | 16 +- stdlib/Types.h | 4 +- system/Allocator.h | 18 + system/FileUtils.cpp | 18 + system/Library.cpp | 18 + system/SystemInfo.cpp | 18 + thread/Atomic.h | 18 + thread/Semaphore.h | 18 + thread/Spinlock.cpp | 18 + thread/Spinlock.h | 18 + thread/Thread.h | 3 +- thread/ThreadDefines.h | 18 + thread/ThreadJob.h | 7 +- thread/ThreadPool.h | 14 +- ui/UIAttribute.h | 2 +- ui/UIElementType.h | 2 +- ui/UILayout.h | 2 + ui/UITheme.h | 102 ++-- utils/BitUtils.h | 1 + utils/MathUtils.h | 1 + utils/StringUtils.h | 548 ++++++++++++----- 94 files changed, 3089 insertions(+), 1472 deletions(-) create mode 100644 command/AppCmdBuffer.cpp create mode 100644 command/AppCmdBuffer.h create mode 100644 command/Command.h delete mode 100644 entity/AnimationEntity.h create mode 100644 entity/AnimationEntityComponent.h create mode 100644 entity/CursorEntity.h create mode 100644 entity/EntitySize.h delete mode 100644 entity/EntityType.h create mode 100644 gpuapi/GpuApiType.h create mode 100644 gpuapi/ShaderType.h create mode 100644 gpuapi/direct3d/Shader.h create mode 100644 gpuapi/opengl/AppCmdBuffer.h create mode 100644 gpuapi/vulkan/Shader.h create mode 100644 network/Socket.h rename platform/linux/{Library.h => Library.cpp} (78%) rename platform/win32/{Library.h => Library.cpp} (80%) rename stdlib/{simd => }/SIMD_Helper.h (99%) create mode 100644 stdlib/Simd.h create mode 100644 system/Allocator.h create mode 100644 system/FileUtils.cpp create mode 100644 system/Library.cpp create mode 100644 system/SystemInfo.cpp create mode 100644 thread/Atomic.h create mode 100644 thread/Semaphore.h create mode 100644 thread/Spinlock.cpp create mode 100644 thread/Spinlock.h create mode 100644 thread/ThreadDefines.h diff --git a/animation/Animation.h b/animation/Animation.h index 668a1f0..9582c04 100644 --- a/animation/Animation.h +++ b/animation/Animation.h @@ -24,6 +24,234 @@ f32 lerp(f32 a, f32 b, f32 t) f32 smoothstep(f32 t) { return t * t * (3 - 2 * t); } +inline +f32 anim_discrete(f32 t) { + return t >= 1.0f ? 1.0f : 0.0f; +} + +inline +f32 anim_ease_linear(f32 t) { + return t; +} + +inline +f32 anim_ease_in_sine(f32 t) { + return 1.0f - cosf((t * OMS_PI) / 2.0f); +} + +inline +f32 anim_ease_out_sine(f32 t) { + return sinf((t * OMS_PI) / 2.0f); +} + +inline +f32 anim_ease_in_out_sine(f32 t) { + return -(cosf(OMS_PI * t) - 1) / 2.0f; +} + +inline +f32 anim_ease_in_quad(f32 t) { + return t * t; +} + +inline +f32 anim_ease_out_quad(f32 t) { + return 1.0f - (1.0f - t) * (1.0f - t); +} + +inline +f32 anim_ease_in_out_quad(f32 t) { + return t < 0.5f + ? 2 * t * t + : 1.0f - powf(-2 * t + 2, 2) / 2.0f; +} + +inline +f32 anim_ease_in_cubic(f32 t) { + return t * t * t; +} + +inline +f32 anim_ease_out_cubic(f32 t) { + return 1.0f - powf(1.0f - t, 3); +} + +inline +f32 anim_ease_in_out_cubic(f32 t) { + return t < 0.5f + ? 4 * t * t * t + : 1.0f - powf(-2 * t + 2, 3) / 2.0f; +} + +inline +f32 anim_ease_in_quart(f32 t) { + return t * t * t * t; +} + +inline +f32 anim_ease_out_quart(f32 t) { + return 1.0f - powf(1.0f - t, 4); +} + +inline +f32 anim_ease_in_perlin(f32 t) { + return t * t * t * (t * (t * 6 - 15) + 10); +} + +inline +f32 anim_ease_in_out_quart(f32 t) { + return t < 0.5f + ? 8 * t * t * t * t + : 1.0f - powf(-2 * t + 2, 4) / 2.0f; +} + +inline +f32 anim_ease_in_quint(f32 t) { + return t * t * t * t * t; +} + +inline +f32 anim_ease_out_quint(f32 t) { + return 1.0f - powf(1.0f - t, 5); +} + +inline +f32 anim_ease_in_out_quint(f32 t) { + return t < 0.5f + ? 16 * t * t * t * t * t + : 1.0f - powf(-2 * t + 2, 5) / 2.0f; +} + +inline +f32 anim_ease_in_expo(f32 t) { + return t == 0.0f + ? 0.0f + : powf(2, 10 * t - 10); +} + +inline +f32 anim_ease_out_expo(f32 t) { + return t == 1.0f + ? 1.0f + : 1.0f - powf(2, -10 * t); +} + +inline +f32 anim_ease_in_out_expo(f32 t) { + if (t == 0.0f || t == 1.0f) { + return t; + } + + return t < 0.5f + ? powf(2, 20 * t - 10) / 2.0f + : (2 - powf(2, -20 * t + 10)) / 2.0f; +} + +inline +f32 anim_ease_in_circ(f32 t) { + return 1.0f - sqrtf(1.0f - powf(t, 2)); +} + +inline +f32 anim_ease_out_circ(f32 t) { + return sqrtf(1.0f - powf(t - 1, 2)); +} + +inline +f32 anim_ease_in_out_circ(f32 t) { + return t < 0.5f + ? (1.0f - sqrtf(1.0f - powf(2 * t, 2))) / 2.0f + : (sqrtf(1.0f - powf(-2 * t + 2, 2)) + 1) / 2.0f; +} + +inline +f32 anim_ease_in_back(f32 t) { + const f32 c1 = 1.70158f; + const f32 c3 = c1 + 1.0f; + + return c3 * t * t * t - c1 * t * t; +} + +inline +f32 anim_ease_out_back(f32 t) { + const f32 c1 = 1.70158f; + const f32 c3 = c1 + 1.0f; + + return 1 + c3 * powf(t - 1, 3) + c1 * powf(t - 1, 2); +} + +inline +f32 anim_ease_in_out_back(f32 t) { + const f32 c1 = 1.70158f; + const f32 c2 = c1 * 1.525f; + + return t < 0.5f + ? (powf(2 * t, 2) * ((c2 + 1) * 2 * t - c2)) / 2.0f + : (powf(2 * t - 2, 2) * ((c2 + 1) * (t * 2 - 2) + c2) + 2) / 2.0f; +} + +inline +f32 anim_ease_in_elastic(f32 t) { + const f32 c4 = OMS_TWO_PI / 3; + + if (t == 0.0f || t == 1.0f) { + return t; + } + + return -powf(2, 10 * t - 10) * sinf((t * 10 - 10.75f) * c4); +} + +inline +f32 anim_ease_out_elastic(f32 t) { + const f32 c4 = OMS_TWO_PI / 3; + + if (t == 0.0f || t == 1.0f) { + return t; + } + + return powf(2, -10 * t) * sinf((t * 10 - 0.75f) * c4) + 1; +} + +inline +f32 anim_ease_in_out_elastic(f32 t) { + const f32 c5 = OMS_TWO_PI / 4.5f; + + if (t == 0.0f || t == 1.0f) { + return t; + } else if (t < 0.5f) { + return -(powf(2, 20 * t - 10) * sinf((20 * t - 11.125f) * c5)) / 2.0f; + } + + return (powf(2, -20 * t + 10) * sinf((20 * t - 11.125f) * c5)) / 2.0f + 1.0f; +} + +inline +f32 anim_ease_out_bounce(f32 t) { + const f32 n1 = 7.5625f; + const f32 d1 = 2.75f; + + if (t < 1.0f / d1) { + return n1 * t * t; + } else if (t < 2.0f / d1) { + return n1 * (t -= 1.5f / d1) * t + 0.75f; + } else if (t < 2.5f / d1) { + return n1 * (t -= 2.25f / d1) * t + 0.9375f; + } + + return n1 * (t -= 2.625f / d1) * t + 0.984375f; +} + +inline +f32 anim_ease_in_bounce(f32 t) { + return 1.0f - anim_ease_out_bounce(1.0f - t); +} + +inline +f32 anim_ease_in_out_bounce(f32 t) { + return t < 0.5f + ? (1.0f - anim_ease_out_bounce(1.0f - 2.0f * t)) / 2.0f + : (1.0f + anim_ease_out_bounce(2.0f * t - 1.0f)) / 2.0f; +} f32 anim_ease(f32 t, AnimationEaseType type) { switch(type) { @@ -125,233 +353,4 @@ f32 anim_ease(f32 t, AnimationEaseType type) { } } -inline -f32 anim_discrete(f32 t) { - return t >= 1.0f ? 1.0f : 0.0f; -} - -inline -f32 anim_ease_linear(f32 t) { - return t; -} - -inline -f32 anim_ease_in_sine(f32 t) { - return 1 - cosf((t * OMS_PI) / 2); -} - -inline -f32 anim_ease_out_sine(f32 t) { - return sinf((t * OMS_PI) / 2); -} - -inline -f32 anim_ease_in_out_sine(f32 t) { - return -(cosf(OMS_PI * t) - 1) / 2; -} - -inline -f32 anim_ease_in_quad(f32 t) { - return t * t; -} - -inline -f32 anim_ease_out_quad(f32 t) { - return 1 - (1 - t) * (1 - t); -} - -inline -f32 anim_ease_in_out_quad(f32 t) { - return t < 0.5 - ? 2 * t * t - : 1 - pow(-2 * t + 2, 2) / 2; -} - -inline -f32 anim_ease_in_cubic(f32 t) { - return t * t * t; -} - -inline -f32 anim_ease_out_cubic(f32 t) { - return 1 - pow(1 - t, 3); -} - -inline -f32 anim_ease_in_out_cubic(f32 t) { - return t < 0.5 - ? 4 * t * t * t - : 1 - pow(-2 * t + 2, 3) / 2; -} - -inline -f32 anim_ease_in_quart(f32 t) { - return t * t * t * t; -} - -inline -f32 anim_ease_out_quart(f32 t) { - return 1 - pow(1 - t, 4); -} - -inline -f32 anim_ease_in_perlin(f32 t) { - return t * t * t * (t * (t * 6 - 15) + 10); -} - -inline -f32 anim_ease_in_out_quart(f32 t) { - return t < 0.5 - ? 8 * t * t * t * t - : 1 - pow(-2 * t + 2, 4) / 2; -} - -inline -f32 anim_ease_in_quint(f32 t) { - return t * t * t * t * t; -} - -inline -f32 anim_ease_out_quint(f32 t) { - return 1 - pow(1 - t, 5); -} - -inline -f32 anim_ease_in_out_quint(f32 t) { - return t < 0.5 - ? 16 * t * t * t * t * t - : 1 - pow(-2 * t + 2, 5) / 2; -} - -inline -f32 anim_ease_in_expo(f32 t) { - return t == 0 - ? 0 - : pow(2, 10 * t - 10); -} - -inline -f32 anim_ease_out_expo(f32 t) { - return t == 1 - ? 1 - : 1 - pow(2, -10 * t); -} - -inline -f32 anim_ease_in_out_expo(f32 t) { - if (t == 0 || t == 1) { - return t; - } - - return t < 0.5 - ? pow(2, 20 * t - 10) / 2 - : (2 - pow(2, -20 * t + 10)) / 2; -} - -inline -f32 anim_ease_in_circ(f32 t) { - return 1 - sqrtf(1 - pow(t, 2)); -} - -inline -f32 anim_ease_out_circ(f32 t) { - return sqrtf(1 - pow(t - 1, 2)); -} - -inline -f32 anim_ease_in_out_circ(f32 t) { - return t < 0.5 - ? (1 - sqrtf(1 - pow(2 * t, 2))) / 2 - : (sqrtf(1 - pow(-2 * t + 2, 2)) + 1) / 2; -} - -inline -f32 anim_ease_in_back(f32 t) { - const f32 c1 = 1.70158; - const f32 c3 = c1 + 1; - - return c3 * t * t * t - c1 * t * t; -} - -inline -f32 anim_ease_out_back(f32 t) { - const f32 c1 = 1.70158; - const f32 c3 = c1 + 1; - - return 1 + c3 * pow(t - 1, 3) + c1 * pow(t - 1, 2); -} - -inline -f32 anim_ease_in_out_back(f32 t) { - const f32 c1 = 1.70158; - const f32 c2 = c1 * 1.525; - - return t < 0.5 - ? (pow(2 * t, 2) * ((c2 + 1) * 2 * t - c2)) / 2 - : (pow(2 * t - 2, 2) * ((c2 + 1) * (t * 2 - 2) + c2) + 2) / 2; -} - -inline -f32 anim_ease_in_elastic(f32 t) { - const f32 c4 = (2 * OMS_PI) / 3; - - if (t == 0 || t == 1) { - return t; - } - - return -pow(2, 10 * t - 10) * sinf((t * 10 - 10.75) * c4); -} - -inline -f32 anim_ease_out_elastic(f32 t) { - const f32 c4 = (2 * OMS_PI) / 3; - - if (t == 0.0 || t == 1.0) { - return t; - } - - return pow(2, -10 * t) * sinf((t * 10 - 0.75) * c4) + 1; -} - -inline -f32 anim_ease_in_out_elastic(f32 t) { - const f32 c5 = (2 * OMS_PI) / 4.5; - - if (t == 0.0 || t == 1.0) { - return t; - } else if (t < 0.5) { - return -(pow(2, 20 * t - 10) * sinf((20 * t - 11.125) * c5)) / 2; - } - - return (pow(2, -20 * t + 10) * sinf((20 * t - 11.125) * c5)) / 2 + 1; -} - -inline -f32 anim_ease_in_bounce(f32 t) { - return 1 - anim_ease_out_bounce(1 - t); -} - -inline -f32 anim_ease_out_bounce(f32 t) { - const f32 n1 = 7.5625; - const f32 d1 = 2.75; - - if (t < 1 / d1) { - return n1 * t * t; - } else if (t < 2 / d1) { - return n1 * (t -= 1.5 / d1) * t + 0.75; - } else if (t < 2.5 / d1) { - return n1 * (t -= 2.25 / d1) * t + 0.9375; - } - - return n1 * (t -= 2.625 / d1) * t + 0.984375; -} - -inline -f32 anim_ease_in_out_bounce(f32 t) { - return t < 0.5 - ? (1 - anim_ease_out_bounce(1 - 2 * t)) / 2 - : (1 + anim_ease_out_bounce(2 * t - 1)) / 2; -} - #endif \ No newline at end of file diff --git a/asset/Asset.h b/asset/Asset.h index 626cd31..e748901 100644 --- a/asset/Asset.h +++ b/asset/Asset.h @@ -12,61 +12,49 @@ #include "../stdlib/Types.h" #include "AssetType.h" -#define MAX_ASSET_NAME_LENGTH 32 +enum AssetState : byte { + ASSET_STATE_IN_RAM = 1 << 0, + ASSET_STATE_IN_VRAM = 1 << 1, + ASSET_STATE_RAM_GC = 1 << 2, + ASSET_STATE_VRAM_GC = 1 << 3, +}; struct Asset { - // The id is the same as its location in memory/in the ams array - // This is is only an internal id and NOT the same as a db id (e.g. player id) - uint64 internal_id; - // Could be 0 if there is no official id - uint64 official_id; + uint32 official_id; - // @performance This is bad, this uses the same name as the hashmap - // We effectively store the asset name twice which shouldn't be the case - char name[MAX_ASSET_NAME_LENGTH]; - - AssetType type; - - // Counts the references to this asset - // e.g. textures - int32 reference_count; + // @performance We would like to use a bool but windows only supports 32bit atomic values as smallest value + // Maybe if we would set the IS_LOADED_STATE in the enum as the highest bit we could use the state variable and check it with >= + int32 is_loaded; // Describes how much ram/vram the asset uses // E.g. vram_size = 0 but ram_size > 0 means that it never uses any gpu memory uint32 ram_size; uint32 vram_size; - uint64 last_access; + + uint32 last_access; // Usually 1 but in some cases an ams may hold entities of variable chunk length // For textures for example a 128x128 is of size 1 but 256x256 is of size 4 - uint32 size; + uint16 size; - // Variable used for thread safety - bool is_loaded; + // Which asset component is used + byte component_id; - // Describes if the memory is currently available in ram/vram - // E.g. an asset might be uploaded to the gpu and no longer held in ram (or the other way around) - bool is_ram; - bool is_vram; - - // Describes if the asset can be removed/garbage collected IF necessary - // This however only happens if space is needed - bool can_garbage_collect_ram; - bool can_garbage_collect_vram; - - Asset* next; - Asset* prev; - - // An asset can reference up to N other entities - // This allows us to quickly update the other entities - // Example: A player pulls N mobs - // @bug This means there are hard limits on how many mobs can be pulled by a player - Asset* references[50]; - uint64 free_references; // bits show which is free + byte state; // Actual memory address and specific asset data byte* self; + + // Counts the references to this asset + // e.g. textures or entity schemas (NOT entities themselves) + uint16 reference_count; + + // An asset can reference up to N other assets + // This allows us to quickly update the other assets + // Uses official_id + // @performance This could potentially be bad because many assets will have 0 or only 1-4 references + uint32 references[12]; }; #endif \ No newline at end of file diff --git a/asset/AssetArchive.h b/asset/AssetArchive.h index b9a1782..98e4227 100644 --- a/asset/AssetArchive.h +++ b/asset/AssetArchive.h @@ -25,19 +25,8 @@ #include "../localization/Language.h" #include "../ui/UITheme.h" #include "AssetManagementSystem.h" - -#if __aarch64__ - #include "../stdlib/sve/SVE_I32.h" -#else - #include "../stdlib/simd/SIMD_I32.h" -#endif - -#if _WIN32 - #include - #include "../platform/win32/FileUtils.cpp" -#elif __linux__ - #include "../platform/win32/FileUtils.cpp" -#endif +#include "../system/FileUtils.cpp" +#include "../stdlib/Simd.h" #define ASSET_ARCHIVE_VERSION 1 @@ -78,7 +67,7 @@ struct AssetArchive { // This is used to tell the asset archive in which AssetManagementSystem (AMS) which asset type is located. // Remember, many AMS only contain one asset type (e.g. image, audio, ...) - int32 asset_type_map[ASSET_TYPE_SIZE]; + byte asset_type_map[ASSET_TYPE_SIZE]; }; // Calculates how large the header memory has to be to hold all its information @@ -183,37 +172,47 @@ void asset_archive_load(AssetArchive* archive, const char* path, BufferMemory* b // Maybe we could just accept a int value which we set atomically as a flag that the asset is complete? // this way we can check much faster if we can work with this data from the caller?! // The only problem is that we need to pass the pointer to this int in the thrd_queue since we queue the files to load there -Asset* asset_archive_asset_load(const AssetArchive* archive, int32 id, AssetManagementSystem* ams_array, RingMemory* ring) +Asset* asset_archive_asset_load(const AssetArchive* archive, int32 id, AssetManagementSystem* ams, RingMemory* ring) { - // @todo add calculation from element->type to ams index + // @todo add calculation from element->type to ams index. Probably requires an app specific conversion function // We have to mask 0x00FFFFFF since the highest bits define the archive id, not the element id AssetArchiveElement* element = &archive->header.asset_element[id & 0x00FFFFFF]; - AssetManagementSystem* ams = &ams_array[archive->asset_type_map[element->type]]; - // @todo This is a little bit stupid, reconsider - char id_str[32]; - _itoa(id, id_str, 16); + byte component_id = archive->asset_type_map[element->type]; + AssetComponent* ac = &ams->asset_components[component_id]; - Asset* asset; + // Create a string representation from the asset id + // We can't just use the asset id, since an int can have a \0 between high byte and low byte + // @question We maybe can switch the AMS to work with ints as keys. + // We would then have to also create an application specific enum for general assets, + // that are not stored in the asset archive (e.g. color palette, which is generated at runtime). + char id_str[9]; + int_to_hex(id, id_str); + + Asset* asset = thrd_ams_get_asset_wait(ams, id_str); - // @performance I think we could optimize the ams_reserver_asset in a way so we don't have to lock it the entire time - pthread_mutex_lock(&ams->mutex); - asset = ams_get_asset(ams, id_str); if (asset) { - // Asset already loaded - pthread_mutex_unlock(&ams->mutex); + // Prevent garbage collection + asset->state &= ~ASSET_STATE_RAM_GC; + asset->state &= ~ASSET_STATE_VRAM_GC; return asset; } + // @bug Couldn't the asset become available from thrd_ams_get_asset_wait to here? + // This would mean we are overwriting it + // A solution could be a function called thrd_ams_get_reserve_wait() that reserves, if not available + // However, that function would have to lock the ams during that entire time + if (element->type == 0) { - asset = ams_reserve_asset(ams, id_str, ams_calculate_chunks(ams, element->uncompressed)); + asset = thrd_ams_reserve_asset(ams, (byte) component_id, id_str, element->uncompressed); + asset->official_id = id; FileBody file = {}; file.content = asset->self; - // @performance Consider to implement gzip here + // @performance Consider to implement general purpose fast compression algorithm // We are directly reading into the correct destination file_read(archive->fd, &file, element->start, element->length); @@ -230,8 +229,10 @@ Asset* asset_archive_asset_load(const AssetArchive* archive, int32 id, AssetMana // This happens while the file system loads the data // The important part is to reserve the uncompressed file size, not the compressed one - asset = ams_reserve_asset(ams, id_str, ams_calculate_chunks(ams, element->uncompressed)); - asset->is_ram = true; + asset = thrd_ams_reserve_asset(ams, (byte) component_id, id_str, element->uncompressed); + asset->official_id = id; + + asset->state |= ASSET_STATE_IN_RAM; file_async_wait(archive->fd_async, &file.ov, true); switch (element->type) { @@ -288,10 +289,13 @@ Asset* asset_archive_asset_load(const AssetArchive* archive, int32 id, AssetMana } } } - pthread_mutex_unlock(&ams->mutex); + + // Even though dependencies are still being loaded + // the main program should still be able to do some work if possible + thrd_ams_set_loaded(asset); // @performance maybe do in worker threads? This just feels very slow - // @question dependencies might be stored in different archives? + // @bug dependencies might be stored in different archives? for (uint32 i = 0; i < element->dependency_count; ++i) { asset_archive_asset_load(archive, id, ams, ring); } diff --git a/asset/AssetManagementSystem.h b/asset/AssetManagementSystem.h index fb4c1b8..0edd87d 100644 --- a/asset/AssetManagementSystem.h +++ b/asset/AssetManagementSystem.h @@ -14,191 +14,100 @@ #include "Asset.h" #include "../memory/ChunkMemory.h" #include "../utils/TestUtils.h" +#include "../utils/BitUtils.h" #include "../stdlib/HashMap.h" #include "../log/DebugMemory.h" +#include "../thread/Atomic.h" // The major asset types should have their own asset component system // All other entities are grouped together in one asset component system -// @question Asset component systems could be created per region -> easy to simulate a specific region -// @bug This means players might not be able to transition from one area to another?! - -// @performance There is a huge performance flaw. We CANNOT have an asset only in vram because it always also allocates the ram (asset_data_memory) -struct AssetManagementSystem { - // @question is this even necessary or could we integrate this directly into the system here? - HashMap hash_map; +struct AssetComponent { + ChunkMemory asset_memory; uint64 ram_size; uint64 vram_size; uint64 asset_count; - int32 overhead; - bool has_changed; - // The indices of asset_memory and asset_data_memory are always linked - - // @question Wouldn't it make much more sense to have a general AMS for this data - // In that case we would only need one AMS which holds the Asset information. All others would only need the data_memory - // We could probably dramatically simplify the AMS that holds the actual data. We might only need the ChunkMemory? - - // @question Even further, why would we want to split stats and DATA at all? we are talking about assets which most likely don't fit into a single L1 cache line - // BUT they may fit in L2 or L3 and therefore require less pointer chasing - // Sure collecting data is faster with split memory (ram/vram usage) - - // General asset memory - // Fixed chunk size of sizeof(Asset) - ChunkMemory asset_memory; - - // Actual asset data - // Chunk size defined during initialization - ChunkMemory asset_data_memory; - - // @performance Do we really need the linked list, the ChunkMemory should allow us to do some smart stuff - Asset* first; - Asset* last; - - // @question do we want to create an extra threaded version? Or a combined one, like we have right now. // @question Do we want to add a mutex to assets. This way we don't have to lock the entire ams. pthread_mutex_t mutex; }; -void ams_create(AssetManagementSystem* ams, BufferMemory* buf, int32 chunk_size, int32 count, int32 overhead = 0) +struct AssetManagementSystem { + HashMap hash_map; + + int32 asset_component_count; + AssetComponent* asset_components; +}; + +inline +void ams_create(AssetManagementSystem* ams, BufferMemory* buf, int32 asset_component_count, int32 count) { - // setup hash_map - hashmap_create(&ams->hash_map, count, sizeof(HashEntryInt64), buf); - - ams->overhead = overhead; - - // setup asset_memory - chunk_init(&ams->asset_memory, buf, count, sizeof(Asset), 64); - - // setup asset_data_memory - chunk_init(&ams->asset_data_memory, buf, count, chunk_size, 64); - - ams->first = NULL; - ams->last = NULL; - - pthread_mutex_init(&ams->mutex, NULL); + hashmap_create(&ams->hash_map, count, sizeof(HashEntry) + sizeof(Asset), buf); + ams->asset_component_count = asset_component_count; + ams->asset_components = (AssetComponent *) buffer_get_memory(buf, asset_component_count * sizeof(AssetComponent), 64, true); } -// WARNING: buf size see ams_get_buffer_size -void ams_create(AssetManagementSystem* ams, byte* buf, int32 chunk_size, int32 count, int32 overhead = 0) +inline +void ams_component_create(AssetComponent* ac, BufferMemory* buf, int32 chunk_size, int32 count) { ASSERT_SIMPLE(chunk_size); - // setup hash_map - hashmap_create(&ams->hash_map, count, sizeof(HashEntryInt64), buf); - - ams->overhead = overhead; - - // setup asset_memory - ams->asset_memory.count = count; - ams->asset_memory.chunk_size = sizeof(Asset); - ams->asset_memory.last_pos = 0; - ams->asset_memory.alignment = 64; - ams->asset_memory.memory = buf; - ams->asset_memory.free = (uint64 *) (ams->asset_memory.memory + ams->asset_memory.chunk_size * count); - - // setup asset_data_memory - ams->asset_data_memory.count = count; - ams->asset_data_memory.chunk_size = chunk_size; - ams->asset_data_memory.last_pos = 0; - ams->asset_data_memory.alignment = 64; - ams->asset_data_memory.memory = (byte *) (ams->asset_memory.free + CEIL_DIV(count, 64)); - ams->asset_data_memory.free = (uint64 *) (ams->asset_data_memory.memory + ams->asset_data_memory.chunk_size * count); - - ams->first = NULL; - ams->last = NULL; - - pthread_mutex_init(&ams->mutex, NULL); + chunk_init(&ac->asset_memory, buf, count, chunk_size, 64); + pthread_mutex_init(&ac->mutex, NULL); } +inline +void ams_component_create(AssetComponent* ac, byte* buf, int32 chunk_size, int32 count) +{ + ASSERT_SIMPLE(chunk_size); + + ac->asset_memory.count = count; + ac->asset_memory.chunk_size = chunk_size; + ac->asset_memory.last_pos = 0; + ac->asset_memory.alignment = 64; + ac->asset_memory.memory = buf; + ac->asset_memory.free = (uint64 *) (ac->asset_memory.memory + ac->asset_memory.chunk_size * count); + + pthread_mutex_init(&ac->mutex, NULL); +} + +inline +void ams_component_free(AssetComponent* ac) +{ + pthread_mutex_destroy(&ac->mutex); +} + +inline void ams_free(AssetManagementSystem* ams) { - pthread_mutex_destroy(&ams->mutex); -} - -inline -int32 ams_calculate_chunks(const AssetManagementSystem* ams, int32 byte_size) -{ - return (int32) CEIL_DIV(byte_size + ams->overhead, ams->asset_data_memory.chunk_size); -} - -inline -int64 ams_get_buffer_size(int32 count, int32 chunk_size) -{ - return hashmap_size(count, sizeof(HashEntryInt64)) // hash map - + sizeof(Asset) * count + CEIL_DIV(count, 64) * sizeof(uint64) // asset_memory - + chunk_size * count + CEIL_DIV(count, 64) * sizeof(uint64); // asset_data_memory -} - -inline -void ams_update_stats(AssetManagementSystem* ams) -{ - ams->vram_size = 0; - ams->ram_size = 0; - ams->asset_count = 0; - - Asset* temp_asset = ams->first; - - while (temp_asset) { - ams->vram_size += temp_asset->vram_size; - ams->ram_size += temp_asset->ram_size; - ++ams->asset_count; - - temp_asset = temp_asset->next; + for (int32 i = 0; i < ams->asset_component_count; ++i) { + ams_component_free(&ams->asset_components[i]); } - - ams->has_changed = false; } inline -uint64 ams_get_asset_count(AssetManagementSystem* ams) +uint16 ams_calculate_chunks(const AssetComponent* ac, int32 byte_size, int32 overhead) { - if (ams->has_changed) { - ams_update_stats(ams); - } - - return ams->asset_count; + return (uint16) CEIL_DIV(byte_size + overhead, ac->asset_memory.chunk_size); } inline -uint64 ams_get_vram_usage(AssetManagementSystem* ams) +void thrd_ams_set_loaded(Asset* asset) { - if (ams->has_changed) { - ams_update_stats(ams); - } - - return ams->vram_size; + atomic_set_release(&asset->is_loaded, 1); } inline -uint64 ams_get_ram_usage(AssetManagementSystem* ams) +bool thrd_ams_is_loaded(Asset* asset) { - if (ams->has_changed) { - ams_update_stats(ams); - } - - return ams->ram_size; -} - -void ams_free_asset(AssetManagementSystem* ams, Asset* asset) -{ - asset->prev->next = asset->next; - asset->next->prev = asset->prev; - - hashmap_delete_entry(&ams->hash_map, asset->name); - - for (uint32 i = 0; i < asset->size; ++i) { - chunk_free_element(&ams->asset_memory, asset->internal_id + i); - chunk_free_element(&ams->asset_data_memory, asset->internal_id + i); - } - - ams->has_changed = true; + return asset && atomic_get_acquire(&asset->is_loaded) > 0; } inline -Asset* ams_get_asset(AssetManagementSystem* ams, uint64 element) +bool thrd_ams_is_in_vram(Asset* asset) { - return (Asset *) chunk_get_element(&ams->asset_memory, element, false); + return asset && atomic_get_acquire(&asset->is_loaded) + && (asset->state & ASSET_STATE_IN_VRAM); } inline @@ -206,29 +115,6 @@ Asset* ams_get_asset(AssetManagementSystem* ams, const char* key) { HashEntry* entry = hashmap_get_entry(&ams->hash_map, key); - DEBUG_MEMORY_READ( - (uint64) (entry ? (Asset *) entry->value : 0), - entry ? sizeof(Asset) : 0 - ); - - DEBUG_MEMORY_READ( - (uint64) (entry ? ((Asset *) entry->value)->self : 0), - entry ? ((Asset *) entry->value)->ram_size : 0 - ); - - return entry ? (Asset *) entry->value : NULL; -} - -inline -Asset* ams_get_asset(AssetManagementSystem* ams, const char* key, uint64 hash) -{ - HashEntry* entry = hashmap_get_entry(&ams->hash_map, key, hash); - - DEBUG_MEMORY_READ( - (uint64) (entry ? (Asset *) entry->value : 0), - entry ? sizeof(Asset) : 0 - ); - DEBUG_MEMORY_READ( (uint64) (entry ? ((Asset *) entry->value)->self : 0), entry ? ((Asset *) entry->value)->ram_size : 0 @@ -238,159 +124,417 @@ Asset* ams_get_asset(AssetManagementSystem* ams, const char* key, uint64 hash) } // @performance We could probably avoid locking by adding a atomic flag to indicate if the value is valid -Asset* thrd_ams_get_asset(AssetManagementSystem* ams, uint64 element) { - pthread_mutex_lock(&ams->mutex); - Asset* asset = ams_get_asset(ams, element); - pthread_mutex_unlock(&ams->mutex); - - return asset; -} - +inline Asset* thrd_ams_get_asset(AssetManagementSystem* ams, const char* key) { - pthread_mutex_lock(&ams->mutex); - Asset* asset = ams_get_asset(ams, key); - pthread_mutex_unlock(&ams->mutex); + HashEntry* entry = hashmap_get_entry(&ams->hash_map, key); + + if (!entry || atomic_get_acquire(&((Asset *) entry->value)->is_loaded) <= 0) { + return NULL; + } + + DEBUG_MEMORY_READ( + (uint64) (entry ? ((Asset *) entry->value)->self : 0), + entry ? ((Asset *) entry->value)->ram_size : 0 + ); + + return (Asset *) entry->value; +} + +inline +Asset* thrd_ams_get_asset_wait(AssetManagementSystem* ams, const char* key) { + HashEntry* entry = hashmap_get_entry(&ams->hash_map, key); + + if (!entry) { + return NULL; + } + + int32 state = 0; + while (!(state = atomic_get_acquire(&((Asset *) entry->value)->is_loaded))) {} + if (state < 0) { + // Marked for removal + return NULL; + } + + DEBUG_MEMORY_READ( + (uint64) (entry ? ((Asset *) entry->value)->self : 0), + entry ? ((Asset *) entry->value)->ram_size : 0 + ); + + return (Asset *) entry->value; +} + +inline +Asset* thrd_ams_get_asset_wait(AssetManagementSystem* ams, const char* key, uint64 hash) { + HashEntry* entry = hashmap_get_entry(&ams->hash_map, key, hash); + + if (!entry) { + return NULL; + } + + int32 state = 0; + while (!(state = atomic_get_acquire(&((Asset *) entry->value)->is_loaded))) {} + if (state < 0) { + // Marked for removal + return NULL; + } + + DEBUG_MEMORY_READ( + (uint64) (entry ? ((Asset *) entry->value)->self : 0), + entry ? ((Asset *) entry->value)->ram_size : 0 + ); + + return (Asset *) entry->value; +} + +inline +Asset* thrd_ams_get_reserve_asset_wait(AssetManagementSystem* ams, byte type, const char* name, uint32 size, uint32 overhead = 0) +{ + // @bug Isn't hashmap_get_reserve unsafe for threading? + HashEntry* entry = hashmap_get_reserve(&ams->hash_map, name); + Asset* asset = (Asset *) entry->value; + + if (asset->self) { + int32 state = 0; + while (!(state = atomic_get_acquire(&((Asset *) entry->value)->is_loaded))) {} + if (state > 0) { + return asset; + } + } + + AssetComponent* ac = &ams->asset_components[type]; + uint16 elements = ams_calculate_chunks(ac, size, overhead); + int32 free_data = chunk_reserve(&ac->asset_memory, elements); + + byte* data = chunk_get_element(&ac->asset_memory, free_data, true); + + asset->component_id = type; + asset->self = data; + asset->size = elements; // Crucial for freeing + asset->ram_size = ac->asset_memory.chunk_size * elements; + + ac->vram_size += asset->vram_size; + ac->ram_size += asset->ram_size; + ++ac->asset_count; + + DEBUG_MEMORY_RESERVE((uint64) asset, asset->ram_size, 180); return asset; } -Asset* thrd_ams_get_asset(AssetManagementSystem* ams, const char* key, uint64 hash) { - pthread_mutex_lock(&ams->mutex); - Asset* asset = ams_get_asset(ams, key, hash); - pthread_mutex_unlock(&ams->mutex); +inline +void ams_remove_asset(AssetManagementSystem* ams, AssetComponent* ac, Asset* asset, const char* name) +{ + // @todo remove from vram - return asset; + asset->is_loaded = 0; + ac->vram_size -= asset->vram_size; + ac->ram_size -= asset->ram_size; + --ac->asset_count; + + hashmap_remove(&ams->hash_map, name); + chunk_free_elements( + &ac->asset_memory, + chunk_id_from_memory(&ac->asset_memory, asset->self), + asset->size + ); +} + +inline +void ams_remove_asset_ram(AssetManagementSystem* ams, AssetComponent* ac, Asset* asset) +{ + ac->ram_size -= asset->ram_size; + chunk_free_elements( + &ac->asset_memory, + chunk_id_from_memory(&ac->asset_memory, asset->self), + asset->size + ); +} + +// @todo It would be nice if we could remove the asset by passing it as a parameter instead of the name +// The problem is there is no correlation between asset data (e.g. internal_id) and global hashmap (e.g. element_id) +// This means we would have to iterate all hashmap entries and remove it this way, which is very slow +inline +void ams_remove_asset(AssetManagementSystem* ams, const char* name) +{ + // @todo remove from vram + + Asset* asset = ams_get_asset(ams, name); + AssetComponent* ac = &ams->asset_components[asset->component_id]; + + asset->is_loaded = 0; + ac->vram_size -= asset->vram_size; + ac->ram_size -= asset->ram_size; + --ac->asset_count; + + hashmap_remove(&ams->hash_map, name); + chunk_free_elements( + &ac->asset_memory, + chunk_id_from_memory(&ac->asset_memory, asset->self), + asset->size + ); +} + +inline +void ams_remove_asset(AssetManagementSystem* ams, Asset* asset, const char* name) +{ + // @todo remove from vram + + AssetComponent* ac = &ams->asset_components[asset->component_id]; + + asset->is_loaded = 0; + ac->vram_size -= asset->vram_size; + ac->ram_size -= asset->ram_size; + --ac->asset_count; + + hashmap_remove(&ams->hash_map, name); + chunk_free_elements( + &ac->asset_memory, + chunk_id_from_memory(&ac->asset_memory, asset->self), + asset->size + ); +} + +inline +void ams_remove_asset_ram(AssetManagementSystem* ams, Asset* asset) +{ + AssetComponent* ac = &ams->asset_components[asset->component_id]; + ac->ram_size -= asset->ram_size; + + chunk_free_elements( + &ac->asset_memory, + chunk_id_from_memory(&ac->asset_memory, asset->self), + asset->size + ); +} + +inline +void thrd_ams_remove_asset(AssetManagementSystem* ams, AssetComponent* ac, Asset* asset, const char* name) +{ + // @todo remove from vram + + asset->is_loaded = 0; + ac->vram_size -= asset->vram_size; + ac->ram_size -= asset->ram_size; + --ac->asset_count; + + atomic_set_release(&asset->is_loaded, 0); + hashmap_remove(&ams->hash_map, name); + chunk_free_elements( + &ac->asset_memory, + chunk_id_from_memory(&ac->asset_memory, asset->self), + asset->size + ); +} + +void thrd_ams_remove_asset(AssetManagementSystem* ams, const char* name) +{ + HashEntry* entry = hashmap_get_entry(&ams->hash_map, name); + Asset* asset = (Asset *) entry->value; + atomic_set_release(&asset->is_loaded, -1); + hashmap_remove(&ams->hash_map, name); + + AssetComponent* ac = &ams->asset_components[asset->component_id]; + chunk_free_elements( + &ac->asset_memory, + chunk_id_from_memory(&ac->asset_memory, asset->self), + asset->size + ); + + ac->vram_size -= asset->vram_size; + ac->ram_size -= asset->ram_size; + --ac->asset_count; +} + +void thrd_ams_remove_asset(AssetManagementSystem* ams, const char* name, Asset* asset) +{ + atomic_set_release(&asset->is_loaded, -1); + hashmap_remove(&ams->hash_map, name); + + AssetComponent* ac = &ams->asset_components[asset->component_id]; + chunk_free_elements( + &ac->asset_memory, + chunk_id_from_memory(&ac->asset_memory, asset->self), + asset->size + ); + + ac->vram_size -= asset->vram_size; + ac->ram_size -= asset->ram_size; + --ac->asset_count; } // @todo implement defragment command to optimize memory layout since the memory layout will become fragmented over time -// @performance This function is VERY important, check if we can optimize it -// We could probably optimize the threaded version by adding a atomic_set_release(asset->is_loaded, true); -Asset* ams_reserve_asset(AssetManagementSystem* ams, const char* name, uint32 elements = 1) +Asset* ams_reserve_asset(AssetManagementSystem* ams, byte type, const char* name, uint32 size, uint32 overhead = 0) { - int64 free_asset = chunk_reserve(&ams->asset_memory, elements, true); - if (free_asset < 0) { - ASSERT_SIMPLE(free_asset >= 0); + ASSERT_SIMPLE(strlen(name) < HASH_MAP_MAX_KEY_LENGTH - 1); + + AssetComponent* ac = &ams->asset_components[type]; + uint16 elements = ams_calculate_chunks(ac, size, overhead); + + int32 free_data = chunk_reserve(&ac->asset_memory, elements); + if (free_data < 0) { + ASSERT_SIMPLE(free_data >= 0); return NULL; } - size_t name_length = strlen(name); - ASSERT_SIMPLE(name_length < MAX_ASSET_NAME_LENGTH - 1); + byte* asset_data = chunk_get_element(&ac->asset_memory, free_data, true); + Asset* asset = (Asset *) hashmap_reserve(&ams->hash_map, name)->value; - Asset* asset = (Asset *) chunk_get_element(&ams->asset_memory, free_asset); - asset->internal_id = free_asset; - - strncpy(asset->name, name, name_length); - asset->name[name_length] = '\0'; - - hashmap_insert(&ams->hash_map, name, (uintptr_t) asset); - - chunk_reserve_index(&ams->asset_data_memory, free_asset, elements, true); - asset->self = chunk_get_element(&ams->asset_data_memory, free_asset); + asset->component_id = type; + asset->self = asset_data; asset->size = elements; // Crucial for freeing - asset->ram_size = (ams->asset_memory.chunk_size + ams->asset_data_memory.chunk_size) * elements; + asset->ram_size = ac->asset_memory.chunk_size * elements; - DEBUG_MEMORY_RESERVE((uint64) asset->self, elements * ams->asset_data_memory.chunk_size, 180); + ac->vram_size += asset->vram_size; + ac->ram_size += asset->ram_size; + ++ac->asset_count; - // @performance Do we really want a double linked list. Are we really using this feature or is the free_index enough? - if (free_asset > 0 && free_asset < ams->asset_memory.count - 1) { - Asset* next = ams->first; - while (next->next != NULL - && next->next->internal_id < asset->internal_id - && next->internal_id < ams->asset_memory.count - ) { - next = next->next; - } - - asset->prev = next; - asset->next = asset->prev->next; - - if (asset->next) { - asset->next->prev = asset; - } else { - ams->last = asset; - } - - asset->prev->next = asset; - } else if (free_asset == 0) { - asset->next = ams->first; - - if (ams->first) { - ams->first->prev = asset; - } - - ams->first = asset; - } else if (free_asset == ams->asset_memory.count - 1) { - asset->prev = ams->last; - - // WARNING: no if here because we assume there is no ECS with just a size of 1 - ams->last->next = asset; - ams->last = asset; - } - - ams->has_changed = true; + DEBUG_MEMORY_RESERVE((uint64) asset, asset->ram_size, 180); return asset; } -void ams_garbage_collect(AssetManagementSystem* ams, uint64 time, uint64 dt) -{ - Asset* asset = ams->first; +inline +Asset* thrd_ams_reserve_asset(AssetManagementSystem* ams, byte type, const char* name, uint32 size, uint32 overhead = 0) { + AssetComponent* ac = &ams->asset_components[type]; + uint16 elements = ams_calculate_chunks(ac, size, overhead); - while (asset) { - // @performance We cannot just remove ram and keep vram. This is a huge flaw - if (asset->can_garbage_collect_ram && asset->can_garbage_collect_vram && time - asset->last_access <= dt) { - ams_free_asset(ams, asset); + pthread_mutex_lock(&ams->asset_components[type].mutex); + int32 free_data = chunk_reserve(&ac->asset_memory, elements); + if (free_data < 0) { + pthread_mutex_unlock(&ams->asset_components[type].mutex); + ASSERT_SIMPLE(free_data >= 0); + + return NULL; + } + pthread_mutex_unlock(&ams->asset_components[type].mutex); + + byte* asset_data = chunk_get_element(&ac->asset_memory, free_data, true); + + Asset asset = {}; + + asset.component_id = type; + asset.self = asset_data; + asset.size = elements; // Crucial for freeing + asset.ram_size = ac->asset_memory.chunk_size * elements; + + ac->vram_size += asset.vram_size; + ac->ram_size += asset.ram_size; + ++ac->asset_count; + + DEBUG_MEMORY_RESERVE((uint64) asset_data, asset.ram_size, 180); + + ASSERT_SIMPLE(strlen(name) < HASH_MAP_MAX_KEY_LENGTH - 1); + + return (Asset *) hashmap_insert(&ams->hash_map, name, (byte *) &asset)->value; +} + +// @todo Find a way to handle manual ram/vram changes +// Either implement a ams_update(AssetManagementSystem* ams, Asset* asset) function +// Or set .has_changed = true (even if garbage collection gets set) and call this func somewhere (maybe thread?) +// Perform general ams update (stats and garbage collection) +// We perform multiple things in one iteration to reduce the iteration costs +// @todo don't use uint64 for time, use uint32 and use relative time to start of program +void thrd_ams_update(AssetManagementSystem* ams, uint64 time, uint64 dt) +{ + for (int32 i = 0; i < ams->asset_component_count; ++i) { + ams->asset_components[i].vram_size = 0; + ams->asset_components[i].ram_size = 0; + ams->asset_components[i].asset_count = 0; + } + + // Iterate the hash map to find all assets + int32 chunk_id = 0; + chunk_iterate_start(&ams->hash_map.buf, chunk_id) + HashEntry* entry = (HashEntry *) chunk_get_element(&ams->hash_map.buf, chunk_id); + Asset* asset = (Asset *) entry->value; + + if (!thrd_ams_is_loaded(asset)) { + continue; } - asset = asset->next; - } -} + ams->asset_components[asset->component_id].vram_size += asset->vram_size; + ams->asset_components[asset->component_id].ram_size += asset->ram_size; + ++ams->asset_components[asset->component_id].asset_count; -void ams_garbage_collect(AssetManagementSystem* ams) -{ - Asset* asset = ams->first; - - while (asset) { - // @performance We cannot just remove ram and keep vram. This is a huge flaw - if (asset->can_garbage_collect_ram && asset->can_garbage_collect_vram) { - ams_free_asset(ams, asset); + if ((asset->state & ASSET_STATE_RAM_GC) || (asset->state & ASSET_STATE_VRAM_GC)) { + if ((asset->state & ASSET_STATE_RAM_GC) + && (asset->state & ASSET_STATE_VRAM_GC) + && time - asset->last_access <= dt + ) { + // @performance Ideally we would like to pass the entry to delete + // The problem is the hashmap_delete function can't work with entries directly since it is not a doubly linked list + thrd_ams_remove_asset(ams, &ams->asset_components[asset->component_id], asset, entry->key); + } else if ((asset->state & ASSET_STATE_RAM_GC) + && time - asset->last_access <= dt + ) { + ams_remove_asset_ram(ams, &ams->asset_components[asset->component_id], asset); + } else if ((asset->state & ASSET_STATE_VRAM_GC) + && time - asset->last_access <= dt + ) { + ams->asset_components[asset->component_id].vram_size -= asset->vram_size; + } } + chunk_iterate_end; +} - asset = asset->next; +Asset* ams_insert_asset(AssetManagementSystem* ams, Asset* asset_temp, const char* name) +{ + AssetComponent* ac = &ams->asset_components[asset_temp->component_id]; + + int32 free_data = chunk_reserve(&ac->asset_memory, asset_temp->size); + if (free_data < 0) { + ASSERT_SIMPLE(free_data >= 0); + return NULL; } -} -void thrd_ams_garbage_collect(AssetManagementSystem* ams, uint64 time, uint64 dt) -{ - pthread_mutex_lock(&ams->mutex); - ams_garbage_collect(ams, time, dt); - pthread_mutex_unlock(&ams->mutex); -} + byte* asset_data = chunk_get_element(&ac->asset_memory, free_data); -void thrd_ams_garbage_collect(AssetManagementSystem* ams) -{ - pthread_mutex_lock(&ams->mutex); - ams_garbage_collect(ams); - pthread_mutex_unlock(&ams->mutex); -} + asset_temp->self = asset_data; + asset_temp->size = asset_temp->size; // Crucial for freeing + asset_temp->ram_size = ac->asset_memory.chunk_size * asset_temp->size; -Asset* thrd_ams_reserve_asset(AssetManagementSystem* ams, const char* name, uint32 elements = 1) { - pthread_mutex_lock(&ams->mutex); - Asset* asset = ams_reserve_asset(ams, name, elements); - pthread_mutex_unlock(&ams->mutex); + ac->vram_size += asset_temp->vram_size; + ac->ram_size += asset_temp->ram_size; + ++ac->asset_count; + + Asset* asset = (Asset *) hashmap_insert(&ams->hash_map, name, (byte *) asset_temp)->value; + DEBUG_MEMORY_RESERVE((uint64) asset->self, asset->ram_size, 180); return asset; } -Asset* thrd_ams_reserve_asset_start(AssetManagementSystem* ams, const char* name, uint32 elements = 1) { - pthread_mutex_lock(&ams->mutex); +inline +Asset* thrd_ams_insert_asset(AssetManagementSystem* ams, Asset* asset_temp, const char* name) +{ + AssetComponent* ac = &ams->asset_components[asset_temp->component_id]; - return ams_reserve_asset(ams, name, elements); -} + pthread_mutex_lock(&ams->asset_components[asset_temp->component_id].mutex); + int32 free_data = chunk_reserve(&ac->asset_memory, asset_temp->size); + if (free_data < 0) { + pthread_mutex_unlock(&ams->asset_components[asset_temp->component_id].mutex); + ASSERT_SIMPLE(free_data >= 0); -void thrd_ams_reserve_asset_end(AssetManagementSystem* ams) { - pthread_mutex_unlock(&ams->mutex); + return NULL; + } + pthread_mutex_unlock(&ams->asset_components[asset_temp->component_id].mutex); + + byte* asset_data = chunk_get_element(&ac->asset_memory, free_data); + memcpy(asset_data, asset_temp->self, sizeof(Asset)); + + asset_temp->self = asset_data; + asset_temp->ram_size = ac->asset_memory.chunk_size * asset_temp->size; + + ac->vram_size += asset_temp->vram_size; + ac->ram_size += asset_temp->ram_size; + ++ac->asset_count; + + Asset* asset = (Asset *) hashmap_insert(&ams->hash_map, name, (byte *) asset_temp)->value; + DEBUG_MEMORY_RESERVE((uint64) asset->self, asset->ram_size, 180); + + atomic_set_release(&asset->is_loaded, 1); + + return asset; } #endif \ No newline at end of file diff --git a/asset/AssetType.h b/asset/AssetType.h index 40f8beb..5377362 100644 --- a/asset/AssetType.h +++ b/asset/AssetType.h @@ -9,7 +9,7 @@ #ifndef TOS_ASSET_TYPE_H #define TOS_ASSET_TYPE_H -enum AssetType { +enum AssetType : byte { ASSET_TYPE_GENERAL, ASSET_TYPE_OBJ, ASSET_TYPE_AUDIO, diff --git a/audio/Audio.cpp b/audio/Audio.cpp index 32ad25c..b99f113 100644 --- a/audio/Audio.cpp +++ b/audio/Audio.cpp @@ -11,12 +11,7 @@ #include "../utils/StringUtils.h" #include "../memory/RingMemory.h" - -#if _WIN32 - #include "../platform/win32/FileUtils.cpp" -#else - #include "../platform/linux/FileUtils.cpp" -#endif +#include "../system/FileUtils.cpp" #include "Audio.h" #include "AudioSetting.h" diff --git a/audio/AudioMixer.h b/audio/AudioMixer.h index 9a87480..677e384 100644 --- a/audio/AudioMixer.h +++ b/audio/AudioMixer.h @@ -16,12 +16,7 @@ #include "../utils/MathUtils.h" #include "../memory/ChunkMemory.h" #include "../math/matrix/MatrixFloat32.h" - -#if _WIN32 - #include "../platform/win32/threading/Atomic.h" -#elif __linux__ - #include "../platform/linux/threading/Atomic.h" -#endif +#include "../thread/Atomic.h" #if DIRECT_SOUND #include "../platform/win32/audio/DirectSound.h" @@ -50,10 +45,11 @@ enum AudioEffect { AUDIO_EFFECT_EASE_IN = 1 << 14, AUDIO_EFFECT_EASE_OUT = 1 << 15, AUDIO_EFFECT_SPEED = 1 << 16, + AUDIO_EFFECT_REPEAT = 1 << 17, }; struct AudioInstance { - int64 id; + int32 id; AudioLocationSetting origin; uint32 audio_size; @@ -62,7 +58,6 @@ struct AudioInstance { uint64 effect; uint32 sample_index; byte channels; - bool repeat; // @todo How to implement audio that is only supposed to be played after a certain other sound file is finished // e.g. queueing soundtracks/ambient noise @@ -130,28 +125,37 @@ bool audio_mixer_is_active(AudioMixer* mixer) { return (mixer->state_old = mixer_state) == AUDIO_MIXER_STATE_ACTIVE; } -// @todo expand AudioLocationSetting so that it also includes audio effects, repeat etc. -void audio_mixer_add(AudioMixer* mixer, int64 id, Audio* audio, AudioLocationSetting* origin) +void audio_mixer_play(AudioMixer* mixer, int32 id, Audio* audio, AudioInstance* settings = NULL) { - int64 index = chunk_reserve(&mixer->audio_instances, 1); + int32 index = chunk_reserve(&mixer->audio_instances, 1); if (index < 0) { return; } - // @question Do I really want to use audio instance? wouldn't Audio* be sufficient? - // Well AudioInstance is a little bit smaller but is this really worth it, probably yes?! AudioInstance* instance = (AudioInstance *) chunk_get_element(&mixer->audio_instances, index); instance->id = id; instance->audio_size = audio->size; instance->audio_data = audio->data; instance->channels = audio->channels; - if (origin) { - memcpy(&instance->origin, origin, sizeof(AudioLocationSetting)); + if (settings) { + memcpy(&instance->origin, &settings->origin, sizeof(AudioLocationSetting)); + instance->effect = settings->effect; } } -void audio_mixer_add_unique(AudioMixer* mixer, int64 id, Audio* audio, AudioLocationSetting* origin) +void audio_mixer_play(AudioMixer* mixer, AudioInstance* settings) +{ + int32 index = chunk_reserve(&mixer->audio_instances, 1); + if (index < 0) { + return; + } + + AudioInstance* instance = (AudioInstance *) chunk_get_element(&mixer->audio_instances, index); + memcpy(instance, settings, sizeof(AudioInstance)); +} + +void audio_mixer_play_unique(AudioMixer* mixer, int32 id, Audio* audio, AudioInstance* settings = NULL) { for (uint32 i = 0; i < mixer->audio_instances.count; ++i) { // @performance We are not really utilizing chunk memory. @@ -163,16 +167,31 @@ void audio_mixer_add_unique(AudioMixer* mixer, int64 id, Audio* audio, AudioLoca } } - audio_mixer_add(mixer, id, audio, origin); + audio_mixer_play(mixer, id, audio, settings); } -void audio_mixer_remove(AudioMixer* mixer, int64 id) +void audio_mixer_play_unique(AudioMixer* mixer, AudioInstance* settings) +{ + for (uint32 i = 0; i < mixer->audio_instances.count; ++i) { + // @performance We are not really utilizing chunk memory. + // Maybe a simple array would be better + // Or we need to use more chunk functions / maybe even create a chunk_iterate() function? + AudioInstance* instance = (AudioInstance *) chunk_get_element(&mixer->audio_instances, i); + if (instance->id == settings->id) { + return; + } + } + + audio_mixer_play(mixer, settings); +} + +void audio_mixer_remove(AudioMixer* mixer, int32 id) { for (uint32 i = 0; i < mixer->audio_instances.count; ++i) { AudioInstance* instance = (AudioInstance *) chunk_get_element(&mixer->audio_instances, i); if (instance->id == id) { instance->id = 0; - chunk_free_element(&mixer->audio_instances, i); + chunk_free_elements(&mixer->audio_instances, i); // No return, since we want to remove all instances } @@ -475,7 +494,7 @@ void audio_mixer_mix(AudioMixer* mixer, uint32 size) { // We make it stereo for (int32 j = 0; j < limit; ++j) { if (sound_sample_index >= sound_sample_count) { - if (!sound->repeat) { + if (!(sound->effect & AUDIO_EFFECT_REPEAT)) { limit = j; break; } @@ -494,7 +513,7 @@ void audio_mixer_mix(AudioMixer* mixer, uint32 size) { } // Apply effects based on sound's effect type - if (sound->effect) { + if (sound->effect && sound->effect != AUDIO_EFFECT_REPEAT) { int32 sample_adjustment = mixer_effects_mono(mixer, sound->effect, sound_sample_index); sound_sample_index += sample_adjustment; limit += sample_adjustment; @@ -502,7 +521,7 @@ void audio_mixer_mix(AudioMixer* mixer, uint32 size) { } else { for (int32 j = 0; j < limit; ++j) { if (sound_sample_index >= sound_sample_count) { - if (!sound->repeat) { + if (!(sound->effect & AUDIO_EFFECT_REPEAT)) { limit = j; break; } @@ -520,7 +539,7 @@ void audio_mixer_mix(AudioMixer* mixer, uint32 size) { } // Apply effects based on sound's effect type - if (sound->effect) { + if (sound->effect && sound->effect != AUDIO_EFFECT_REPEAT) { int32 sample_adjustment = mixer_effects_stereo() / 2;; sound_sample_index += sample_adjustment; limit += sample_adjustment; diff --git a/audio/QoaSimd.h b/audio/QoaSimd.h index 11d408c..00668df 100644 --- a/audio/QoaSimd.h +++ b/audio/QoaSimd.h @@ -13,7 +13,7 @@ #include "../stdlib/Types.h" #include "../utils/EndianUtils.h" #include "../audio/Audio.cpp" -#include "../stdlib/simd/SIMD_I32.h" +#include "../stdlib/Simd.h" #define QOA_SLICE_LEN 20 #define QOA_SLICES_PER_FRAME 256 diff --git a/command/AppCmdBuffer.cpp b/command/AppCmdBuffer.cpp new file mode 100644 index 0000000..21f811e --- /dev/null +++ b/command/AppCmdBuffer.cpp @@ -0,0 +1,491 @@ +/** + * Jingga + * + * @copyright Jingga + * @license OMS License 2.0 + * @version 1.0.0 + * @link https://jingga.app + */ +#ifndef TOS_APP_COMMAND_BUFFER_C +#define TOS_APP_COMMAND_BUFFER_C + +/** + * The AppCmdBuffer by itself doesn't do much, it simply takes in commands and executes them. + * The actual execution depends on the implementation of the underlying systems like: + * ECS, AMS, AudioMixer, ... + * The AppCmdBuffer simplifies the interaction with those systems since the caller has to care less + * about the information flow, function structure etc. + * On the other hand the caller loses some control: + * No control over the execution order, unless additional overhead like priority gets introduced + * No control over what type of command are executed, unless additional overhead like command type checks get introduced + * ... + * In many cases you don't need this type of control, but when you need it you should probably look at how + * this AppCmdBuffer interacts with the individual systems and manually call those + */ +#include "AppCmdBuffer.h" + +inline +void cmd_buffer_create(AppCmdBuffer* cb, BufferMemory* buf, int32 commands_count) +{ + chunk_init(&cb->commands, buf, commands_count, sizeof(Command), 64); + pthread_mutex_init(&cb->mutex, NULL); +} + +// This doesn't load the asset directly but tells (most likely) a worker thread to load an asset +static inline +void cmd_asset_load_enqueue(AppCmdBuffer* cb, Command* cmd) +{ + queue_enqueue_wait_atomic(cb->assets_to_load, (byte *) cmd->data); +} + +static inline +void* cmd_func_run(AppCmdBuffer* cb, Command* cmd) +{ + CommandFunc func = *((CommandFunc *) cmd->data); + return func(cmd); +} + +static inline +Asset* cmd_asset_load(AppCmdBuffer* cb, Command* cmd) +{ + int32 asset_id = (int32) str_to_int((char *) cmd->data); + int32 archive_id = (asset_id >> 24) & 0xFF; + return asset_archive_asset_load(&cb->asset_archives[archive_id], asset_id, cb->ams, cb->thrd_mem_vol); +} + +static inline +Asset* cmd_audio_play_enqueue(AppCmdBuffer* cb, Command* cmd) +{ + Asset* asset = thrd_ams_get_asset_wait(cb->ams, (char *) cmd->data); + if (!asset) { + return asset; + } + + // @todo How to handle settings = AudioInstance + audio_mixer_play( + &cb->mixer[(cmd->data + 32) ? *((int32 *) (cmd->data + 32)) : 0], // @bug how to handle multiple mixers + asset->official_id + 1, // @bug + 1 necessary since it starts at 0, I think. we are still in the design phase :) + (Audio *) asset->self + ); + + return asset; +} + +static inline +Asset* cmd_audio_play_async(AppCmdBuffer* cb, Command* cmd) +{ + Asset* asset = thrd_ams_get_asset_wait(cb->ams, (char *) cmd->data); + if (!asset) { + cmd_asset_load_enqueue(cb, cmd); + } else { + cmd_audio_play_enqueue(cb, cmd); + } + + return asset; +} + +static inline +Asset* cmd_texture_create(AppCmdBuffer* cb, Command* cmd) +{ + Asset* asset = thrd_ams_get_asset_wait(cb->ams, (char *) cmd->data); + if (!asset) { + return asset; + } + + Texture* texture = (Texture *) asset->self; + if (cb->gpu_api == GPU_API_TYPE_OPENGL + && !(texture->image.image_settings & IMAGE_SETTING_BOTTOM_TO_TOP) + ) { + image_flip_vertical(cb->thrd_mem_vol, &texture->image); + } + + return asset; +} + +static inline +Asset* cmd_texture_load_async(AppCmdBuffer* cb, Command* cmd) +{ + Asset* asset = thrd_ams_get_asset_wait(cb->ams, (char *) cmd->data); + if (!asset) { + cmd_asset_load_enqueue(cb, cmd); + } else { + cmd_texture_create(cb, cmd); + } + + return asset; +} + +static inline +Asset* cmd_font_create(AppCmdBuffer* cb, Command* cmd) +{ + Asset* asset = thrd_ams_get_asset_wait(cb->ams, (char *) cmd->data); + if (!asset) { + return asset; + } + + Font* font = (Font *) asset->self; + if (cb->gpu_api == GPU_API_TYPE_OPENGL) { + font_invert_coordinates(font); + } + + return asset; +} + +static inline +Asset* cmd_font_load_async(AppCmdBuffer* cb, Command* cmd) +{ + Asset* asset = thrd_ams_get_asset_wait(cb->ams, (char *) cmd->data); + if (!asset) { + cmd_asset_load_enqueue(cb, cmd); + } else { + cmd_font_create(cb, cmd); + } + + return asset; +} + +inline +void thrd_cmd_insert(AppCmdBuffer* cb, Command* cmd_temp) +{ + pthread_mutex_lock(&cb->mutex); + int32 index = chunk_reserve(&cb->commands, 1); + if (index < 0) { + pthread_mutex_unlock(&cb->mutex); + ASSERT_SIMPLE(false); + + return; + } + + if (index > cb->last_element) { + cb->last_element = index; + } + + Command* cmd = (Command *) chunk_get_element(&cb->commands, index); + memcpy(cmd, cmd_temp, sizeof(Command)); + pthread_mutex_unlock(&cb->mutex); +} + +inline +void thrd_cmd_insert(AppCmdBuffer* cb, CommandType type, int32 data) +{ + Command cmd; + cmd.type = type; + *((int32 *) cmd.data) = data; + + thrd_cmd_insert(cb, &cmd); +} + +inline +void thrd_cmd_insert(AppCmdBuffer* cb, CommandType type, const char* data) +{ + Command cmd; + cmd.type = type; + str_copy_short((char *) cmd.data, data); + + thrd_cmd_insert(cb, &cmd); +} + +inline void thrd_cmd_func_insert(AppCmdBuffer* cb, CommandType type, CommandFunc* func) { + Command cmd; + cmd.type = CMD_FUNC_RUN; + *((CommandFunc *) cmd.data) = *func; + + thrd_cmd_insert(cb, &cmd); +} + +inline void thrd_cmd_audio_play(AppCmdBuffer* cb, int32 data) { + Command cmd; + cmd.type = CMD_AUDIO_PLAY; + *((int32 *) cmd.data) = data; + + thrd_cmd_insert(cb, &cmd); +} + +inline void thrd_cmd_audio_play(AppCmdBuffer* cb, const char* data) { + Command cmd; + cmd.type = CMD_AUDIO_PLAY; + str_copy_short((char *) cmd.data, data); + + thrd_cmd_insert(cb, &cmd); +} + +inline void thrd_cmd_func_run(AppCmdBuffer* cb, CommandFunc* func) { + Command cmd; + cmd.type = CMD_FUNC_RUN; + *((CommandFunc *) cmd.data) = *func; + + thrd_cmd_insert(cb, &cmd); +} + +inline void thrd_cmd_texture_load(AppCmdBuffer* cb, int32 data) { + Command cmd; + cmd.type = CMD_TEXTURE_LOAD; + *((int32 *) cmd.data) = data; + + thrd_cmd_insert(cb, &cmd); +} + +inline void thrd_cmd_texture_load(AppCmdBuffer* cb, const char* data) { + Command cmd; + cmd.type = CMD_TEXTURE_LOAD; + str_copy_short((char *) cmd.data, data); + + thrd_cmd_insert(cb, &cmd); +} + +inline void thrd_cmd_font_load(AppCmdBuffer* cb, int32 data) { + Command cmd; + cmd.type = CMD_FONT_LOAD; + *((int32 *) cmd.data) = data; + + thrd_cmd_insert(cb, &cmd); +} + +inline void thrd_cmd_font_load(AppCmdBuffer* cb, const char* data) { + Command cmd; + cmd.type = CMD_FONT_LOAD; + str_copy_short((char *) cmd.data, data); + + thrd_cmd_insert(cb, &cmd); +} + +inline Asset* cmd_asset_load(AppCmdBuffer* cb, int32 asset_id) +{ + int32 archive_id = (asset_id >> 24) & 0xFF; + return asset_archive_asset_load(&cb->asset_archives[archive_id], asset_id, cb->ams, cb->mem_vol); +} + +inline Asset* cmd_asset_load(AppCmdBuffer* cb, const char* asset_id_str) +{ + int32 asset_id = (int32) str_to_int(asset_id_str); + int32 archive_id = (asset_id >> 24) & 0xFF; + return asset_archive_asset_load(&cb->asset_archives[archive_id], asset_id, cb->ams, cb->mem_vol); +} + +inline Asset* cmd_audio_play(AppCmdBuffer* cb, int32 asset_id) +{ + // Check if asset already loaded + char id_str[9]; + int_to_hex(asset_id, id_str); + + Asset* asset = thrd_ams_get_asset_wait(cb->ams, id_str); + + // Load asset if not loaded + if (!asset) { + int32 archive_id = (asset_id >> 24) & 0xFF; + asset = asset_archive_asset_load(&cb->asset_archives[archive_id], asset_id, cb->ams, cb->mem_vol); + } + + // @todo How to handle settings = AudioInstance + audio_mixer_play( + &cb->mixer[0], // @bug how to handle multiple mixers + asset->official_id + 1, // @bug + 1 necessary since it starts at 0, I think. we are still in the design phase :) + (Audio *) asset->self + ); + + return asset; +} + +inline Asset* cmd_audio_play(AppCmdBuffer* cb, const char* name) { + // Check if asset already loaded + Asset* asset = thrd_ams_get_asset_wait(cb->ams, name); + + // Load asset if not loaded + if (!asset) { + int32 asset_id = (int32) hex_to_int(name); + int32 archive_id = (asset_id >> 24) & 0xFF; + asset = asset_archive_asset_load(&cb->asset_archives[archive_id], asset_id, cb->ams, cb->mem_vol); + } + + // @todo How to handle settings = AudioInstance + audio_mixer_play( + &cb->mixer[0], // @bug how to handle multiple mixers + asset->official_id + 1, // @bug + 1 necessary since it starts at 0, I think. we are still in the design phase :) + (Audio *) asset->self + ); + + return asset; +} + +inline void* cmd_func_run(AppCmdBuffer* cb, CommandFunc func) { + return func(NULL); +} + +inline Asset* cmd_texture_load(AppCmdBuffer* cb, int32 asset_id) { + // Check if asset already loaded + char id_str[9]; + int_to_hex(asset_id, id_str); + + Asset* asset = thrd_ams_get_asset_wait(cb->ams, id_str); + + // Load asset if not loaded + if (!asset) { + int32 archive_id = (asset_id >> 24) & 0xFF; + asset = asset_archive_asset_load(&cb->asset_archives[archive_id], asset_id, cb->ams, cb->mem_vol); + } + + // Setup basic texture + Texture* texture = (Texture *) asset->self; + if (cb->gpu_api == GPU_API_TYPE_OPENGL + && !(texture->image.image_settings & IMAGE_SETTING_BOTTOM_TO_TOP) + ) { + image_flip_vertical(cb->mem_vol, &texture->image); + } + + // @question What about texture upload? + + return asset; +} + +inline Asset* cmd_texture_load(AppCmdBuffer* cb, const char* name) { + // Check if asset already loaded + Asset* asset = thrd_ams_get_asset_wait(cb->ams, name); + + // Load asset if not loaded + if (!asset) { + int32 asset_id = (int32) hex_to_int(name); + int32 archive_id = (asset_id >> 24) & 0xFF; + asset = asset_archive_asset_load(&cb->asset_archives[archive_id], asset_id, cb->ams, cb->mem_vol); + } + + // Setup basic texture + Texture* texture = (Texture *) asset->self; + if (cb->gpu_api == GPU_API_TYPE_OPENGL + && !(texture->image.image_settings & IMAGE_SETTING_BOTTOM_TO_TOP) + ) { + image_flip_vertical(cb->mem_vol, &texture->image); + } + + // @question What about texture upload? + + return asset; +} + +inline Asset* cmd_font_load(AppCmdBuffer* cb, int32 asset_id) { + // Check if asset already loaded + char id_str[9]; + int_to_hex(asset_id, id_str); + + Asset* asset = thrd_ams_get_asset_wait(cb->ams, id_str); + + // Load asset if not loaded + if (!asset) { + int32 archive_id = (asset_id >> 24) & 0xFF; + asset = asset_archive_asset_load(&cb->asset_archives[archive_id], asset_id, cb->ams, cb->mem_vol); + } + + // Setup font + Font* font = (Font *) asset->self; + if (cb->gpu_api == GPU_API_TYPE_OPENGL) { + font_invert_coordinates(font); + } + + // @question What about also loading the font atlas + + return asset; +} + +inline Asset* cmd_font_load(AppCmdBuffer* cb, const char* name) { + // Check if asset already loaded + Asset* asset = thrd_ams_get_asset_wait(cb->ams, name); + + // Load asset if not loaded + if (!asset) { + int32 asset_id = (int32) hex_to_int(name); + int32 archive_id = (asset_id >> 24) & 0xFF; + asset = asset_archive_asset_load(&cb->asset_archives[archive_id], asset_id, cb->ams, cb->mem_vol); + } + + // Setup font + Font* font = (Font *) asset->self; + if (cb->gpu_api == GPU_API_TYPE_OPENGL) { + font_invert_coordinates(font); + } + + // @question What about also loading the font atlas + + return asset; +} + +// @question In some cases we don't remove an element if it couldn't get completed +// Would it make more sense to remove it and add a new follow up command automatically in such cases? +// e.g. couldn't play audio since it isn't loaded -> queue for asset load -> queue for internal play +// I gues this only makes sense if we would switch to a queue +void cmd_iterate(AppCmdBuffer* cb) +{ + int32 last_element = 0; + int32 chunk_id = 0; + chunk_iterate_start(&cb->commands, chunk_id) + Command* cmd = (Command *) chunk_get_element(&cb->commands, chunk_id); + bool remove = true; + + switch (cmd->type) { + case CMD_FUNC_RUN: { + cmd_func_run(cb, cmd); + } break; + case CMD_ASSET_ENQUEUE: { + cmd_asset_load_enqueue(cb, cmd); + } break; + case CMD_ASSET_LOAD: { + cmd_asset_load(cb, cmd); + } break; + case CMD_FILE_LOAD: {} break; + case CMD_TEXTURE_LOAD: { + remove = cmd_texture_load_async(cb, cmd) != NULL; + } break; + case CMD_TEXTURE_CREATE: { + // Internal only + cmd_texture_create(cb, cmd); + } break; + case CMD_FONT_LOAD: { + remove = cmd_font_load_async(cb, cmd) != NULL; + } break; + case CMD_FONT_CREATE: { + // Internal only + cmd_font_create(cb, cmd); + } break; + case CMD_AUDIO_PLAY: { + cmd_audio_play_async(cb, cmd); + } break; + case CMD_AUDIO_ENQUEUE: { + // Internal only + remove = cmd_audio_play_enqueue(cb, cmd) != NULL; + } break; + case CMD_SHADER_LOAD: { + remove = cmd_shader_load(cb, cmd) != NULL; + } break; + default: { + UNREACHABLE(); + } + } + + if (!remove) { + last_element = chunk_id; + continue; + } + + chunk_free_element(&cb->commands, free_index, bit_index); + + // @performance This adds some unnecessary overhead. + // It would be much better, if we could define cb->last_element as the limit in the for loop + if (chunk_id == cb->last_element) { + break; + } + chunk_iterate_end; + + cb->last_element = last_element; +} + +// @performance Locking the entire thing during the iteration is horribly slow, fix. +// Solution 1: Use Queue +// Solution 2: create a mask for the chunk->free which will be set (and only then locked) after everything is done +// This has the risk that if it takes a long time we may run out of free indices for insert +// This shouldn't happen since the command buffer shouldn't fill up in just 1-3 frames +void thrd_cmd_iterate(AppCmdBuffer* cb) +{ + pthread_mutex_lock(&cb->mutex); + cmd_iterate(cb); + pthread_mutex_unlock(&cb->mutex); +} + +#endif \ No newline at end of file diff --git a/command/AppCmdBuffer.h b/command/AppCmdBuffer.h new file mode 100644 index 0000000..41d4236 --- /dev/null +++ b/command/AppCmdBuffer.h @@ -0,0 +1,60 @@ +/** + * Jingga + * + * @copyright Jingga + * @license OMS License 2.0 + * @version 1.0.0 + * @link https://jingga.app + */ +#ifndef TOS_APP_COMMAND_BUFFER_H +#define TOS_APP_COMMAND_BUFFER_H + +#include "../stdlib/Types.h" +#include "../memory/ChunkMemory.h" +#include "../memory/RingMemory.h" +#include "../audio/AudioMixer.h" +#include "../audio/Audio.h" +#include "../asset/AssetArchive.h" +#include "../gpuapi/GpuApiType.h" +#include "../asset/Asset.h" +#include "../asset/AssetManagementSystem.h" +#include "../object/Texture.h" +#include "../memory/Queue.h" +#include "Command.h" + +struct AppCmdBuffer { + // @performance A queue would be much faster than ChunkMemory. + // We only use Chunk memory since we might want to run only certain commands instead of all of them + ChunkMemory commands; + int32 last_element; + + pthread_mutex_t mutex; + + // Application data for cmd access + // The list below depends on what kind of systems our command buffer needs access to + // Memory for when a buffer function (e.g. load_asset) is run in a thread context + RingMemory* thrd_mem_vol; + + // Memory for when a buffer function (e.g. load_asset) is run in the main loop + RingMemory* mem_vol; + AssetManagementSystem* ams; + AssetArchive* asset_archives; + Queue* assets_to_load; + AudioMixer* mixer; + GpuApiType gpu_api; +}; + +#if OPENGL + #include "../gpuapi/opengl/AppCmdBuffer.h" +#elif VULKAN + inline void* cmd_shader_load(AppCmdBuffer* cb, Command* cmd) { return NULL; } + inline void* cmd_shader_load(AppCmdBuffer* cb, void* shader, int32* shader_ids) { return NULL; } +#elif DIRECTX + inline void* cmd_shader_load(AppCmdBuffer* cb, Command* cmd) { return NULL; } + inline void* cmd_shader_load(AppCmdBuffer* cb, void* shader, int32* shader_ids) { return NULL; } +#else + inline void* cmd_shader_load(AppCmdBuffer* cb, Command* cmd) { return NULL; } + inline void* cmd_shader_load(AppCmdBuffer* cb, void* shader, int32* shader_ids) { return NULL; } +#endif + +#endif \ No newline at end of file diff --git a/command/Command.h b/command/Command.h new file mode 100644 index 0000000..3a6baf6 --- /dev/null +++ b/command/Command.h @@ -0,0 +1,35 @@ +/** + * Jingga + * + * @copyright Jingga + * @license OMS License 2.0 + * @version 1.0.0 + * @link https://jingga.app + */ +#ifndef TOS_COMMAND_H +#define TOS_COMMAND_H + +#include "../stdlib/Types.h" + +enum CommandType { + CMD_FUNC_RUN, + CMD_ASSET_ENQUEUE, + CMD_ASSET_LOAD, + CMD_FILE_LOAD, + CMD_FONT_LOAD, + CMD_FONT_CREATE, + CMD_TEXTURE_LOAD, + CMD_TEXTURE_CREATE, // Only for internal use + CMD_AUDIO_PLAY, + CMD_AUDIO_ENQUEUE, // Only for internal use + CMD_SHADER_LOAD, +}; + +struct Command { + CommandType type; + byte data[28]; // @todo to be adjusted +}; + +typedef void* (*CommandFunc)(Command*); + +#endif \ No newline at end of file diff --git a/entity/AnimationEntity.h b/entity/AnimationEntity.h deleted file mode 100644 index 13bdc03..0000000 --- a/entity/AnimationEntity.h +++ /dev/null @@ -1,78 +0,0 @@ -/** - * Jingga - * - * @copyright Jingga - * @license OMS License 2.0 - * @version 1.0.0 - * @link https://jingga.app - */ -#ifndef TOS_ANIMATION_ENTITY_H -#define TOS_ANIMATION_ENTITY_H - -#include "../stdlib/Types.h" -#include "../animation/AnimationEaseType.h" -#include "../animation/Animation.h" -#include "../utils/BitUtils.h" -#include "EntityComponentSystem.h" - -struct AnimationEntity { - AnimationEaseType type; - uint32 start_time; - uint32 last_time; - f32 interval; - f32 progress; - byte state_last; - byte state; - - // @question Do we want another flag that indicates if the entity got handled by the main loop? - // this way we could do the animation process in a thread and only overwrite the state_last whenever the flag is true - // However, we would have to implement locking or atomics which might be really bad depending on how we use this data -}; - -void update_animation_entity(AnimationEntity* anim, uint32 time, uint32 delay) -{ - anim->state_last = anim->state; - - switch (anim->type) { - case ANIMATION_EASE_DISCRETE: { - anim->progress = anim_discrete((f32) (time - anim->start_time + delay) / (f32) anim->interval); - anim->state = (int32) ((f32) anim->state - anim->progress); - } break; - default: {} - } -} - -void update_animation_entities(EntityComponentSystem* ecs, uint32 time, uint32 delay) -{ - int32 chunk_bytes = (ecs->entity_data_memory.size + 63) / 64; - - // @performance It might make sense to iterate by int16 or even int32 instead of byte. Needs profiling - for (int32 i = 0; i < chunk_bytes; ++i) { - // @question Do we want this to be the first case. It probably depends on how often a byte is realistically empty - if (!ecs->entity_data_memory.free[i]) { - continue; - } else if (ecs->entity_data_memory.free[i] == 256) { - // @performance If we go larger than 8bit in the outer loop we also have to adjust it here - // AND maybe we would want to do sub checks then for 8bit again - for (int32 j = 0; j < 8; ++j) { - AnimationEntity* anim = (AnimationEntity *) chunk_get_element(&ecs->entity_data_memory, i * 8 + j); - update_animation_entity(anim, time, delay); - } - - continue; - } - - // @performance If we go larger than 8bit in the outer loop we also have to adjust it here - // AND maybe we would want to do sub checks then for 8bit again - for (int32 j = 0; j < 8; ++j) { - if (!IS_BIT_SET_L2R(ecs->entity_data_memory.free[i], j, 1)) { - continue; - } - - AnimationEntity* anim = (AnimationEntity *) chunk_get_element(&ecs->entity_data_memory, i * 8 + j); - update_animation_entity(anim, time, delay); - } - } -} - -#endif \ No newline at end of file diff --git a/entity/AnimationEntityComponent.h b/entity/AnimationEntityComponent.h new file mode 100644 index 0000000..8286aac --- /dev/null +++ b/entity/AnimationEntityComponent.h @@ -0,0 +1,69 @@ +/** + * Jingga + * + * @copyright Jingga + * @license OMS License 2.0 + * @version 1.0.0 + * @link https://jingga.app + */ +#ifndef TOS_ANIMATION_ENTITY_H +#define TOS_ANIMATION_ENTITY_H + +#include "../stdlib/Types.h" +#include "../animation/AnimationEaseType.h" +#include "../animation/Animation.h" +#include "../utils/BitUtils.h" +#include "EntityComponentSystem.h" +#include "Entity.h" + +enum AnimationSetting { + ANIMATION_SETTING_PAUSE = 1 << 0, + ANIMATION_SETTING_REPEAT = 1 << 1, +}; + +struct AnimationEntityComponent { + Entity* entity; + AnimationEaseType type; + uint32 start_time; + uint32 last_time; + f32 interval; + f32 progress; + byte state_last; + byte state; + + // Contains repeat, pause etc + byte setting; + + // @question Do we want another flag that indicates if the entity got handled by the main loop? + // this way we could do the animation process in a thread and only overwrite the state_last whenever the flag is true + // However, we would have to implement locking or atomics which might be really bad depending on how we use this data +}; + +static inline +void update_animation_entity(AnimationEntityComponent* anim, uint32 time, uint32 delay) +{ + anim->state_last = anim->state; + + switch (anim->type) { + case ANIMATION_EASE_DISCRETE: { + anim->progress = anim_discrete((f32) (time - anim->start_time + delay) / (f32) anim->interval); + anim->state = (byte) ((f32) anim->state - anim->progress + FLOAT_CAST_EPS); + } break; + default: {} + } +} + +void update_animation_entities(ChunkMemory* anim_ec, uint32 time, uint32 delay) +{ + int32 chunk_id = 0; + chunk_iterate_start(anim_ec, chunk_id) + AnimationEntityComponent* anim = (AnimationEntityComponent *) chunk_get_element(anim_ec, chunk_id); + if (anim->setting & ANIMATION_SETTING_PAUSE) { + continue; + } + + update_animation_entity(anim, time, delay); + chunk_iterate_end; +} + +#endif \ No newline at end of file diff --git a/entity/CursorEntity.h b/entity/CursorEntity.h new file mode 100644 index 0000000..e00a48a --- /dev/null +++ b/entity/CursorEntity.h @@ -0,0 +1,20 @@ +/** + * Jingga + * + * @copyright Jingga + * @license OMS License 2.0 + * @version 1.0.0 + * @link https://jingga.app + */ +#ifndef TOS_ENTITY_CURSOR_H +#define TOS_ENTITY_CURSOR_H + +#include "Entity.h" +#include "AnimationEntityComponent.h" + +struct EntityCursor { + Entity* general; + AnimationEntityComponent* anim; +}; + +#endif \ No newline at end of file diff --git a/entity/Entity.h b/entity/Entity.h index 8fac196..6e7ed66 100644 --- a/entity/Entity.h +++ b/entity/Entity.h @@ -11,41 +11,23 @@ #include "../stdlib/Types.h" #include "../stdlib/HashMap.h" -#include "EntityType.h" #define MAX_ENTITY_NAME_LENGTH 32 struct Entity { // The id is the same as its location in memory/in the ecs array // This is is only an internal id and NOT the same as a db id (e.g. player id) - uint64 internal_id; + uint32 internal_id; - EntityType type; + uint32 last_access; - uint64 last_access; + // Which entity is used + byte type; - // Variable used for thread safety - bool is_loaded; + byte state; - // Describes if the asset can be removed/garbage collected IF necessary - // This however only happens if space is needed - bool can_garbage_collect_ram; - bool can_garbage_collect_vram; - - // Counts the references to this entity - // e.g. textures - int16 reference_count; - - // A entity can reference up to N other entities - // This allows us to quickly update the other entities - // Example: A player pulls N mobs - // @bug This means there are hard limits on how many mobs can be pulled by a player - // @question should this be an entity id? - Entity* references[50]; - uint64 free_references; // bits show which is free - - // @question should this be an entity id? - Entity* schema; // This entity represents the schema for this entity (most likely stored in a separate ecs) + // This entity represents the schema for this entity (most likely stored in a separate ecs) + uint32 schema; // Actual memory address and specific entity data byte* self; @@ -59,7 +41,7 @@ struct EntitySchema { // Could be 0 if there is no official id uint64 official_id; - EntityType type; + byte type; // Counts the references to this entity // e.g. textures diff --git a/entity/EntityComponentSystem.h b/entity/EntityComponentSystem.h index 16be6d7..cc97493 100644 --- a/entity/EntityComponentSystem.h +++ b/entity/EntityComponentSystem.h @@ -13,55 +13,109 @@ #include "../stdlib/Types.h" #include "../memory/ChunkMemory.h" #include "../utils/TestUtils.h" +#include "../utils/BitUtils.h" #include "../stdlib/HashMap.h" +#include "../log/DebugMemory.h" #include "Entity.h" +// Entities can be directly accessed by their id +// highest byte = entity type, lower bytes = id in respective ecs struct EntityComponentSystem { - // @question is this even necessary or could we integrate this directly into the system here? - HashMap hash_map; + int32 entity_type_count; + int32 component_type_count; + + ChunkMemory* entities; + ChunkMemory* components; uint64 ram_size; uint64 vram_size; uint64 entity_count; - int32 overhead; + uint64 component_count; - // @question Do we want this, I would assume this should be almost always true in the final game - bool has_changed; - - // The indices of entity_memory and entity_data_memory are always linked - - // @question Consider to reset entity_memory->last_pos to 0 before adding a new element - // This allows us to make the chunk memory more continuous which is better for iteration later on - // However, adding elements would now be slower. Needs profiling - - // General entity memory - ChunkMemory entity_memory; - - // Actual entity data - ChunkMemory entity_data_memory; + // @question Do we want to add a mutex to assets. This way we don't have to lock the entire ams. + pthread_mutex_t* entity_mutex; + pthread_mutex_t* component_mutex; }; -struct EntitySchemaSystem { - // @question is this even necessary or could we integrate this directly into the system here? - HashMap hash_map; +inline +void ecs_create(EntityComponentSystem* ecs, BufferMemory* buf, int32 entity_count, int32 component_count) +{ + ecs->entity_type_count = entity_count; + ecs->entities = (ChunkMemory *) buffer_get_memory(buf, sizeof(ChunkMemory) * entity_count, 64); - uint64 ram_size; - uint64 vram_size; - uint64 entity_count; - int32 overhead; - bool has_changed; + ecs->component_type_count = component_count; + ecs->components = (ChunkMemory *) buffer_get_memory(buf, sizeof(ChunkMemory) * component_count, 64); +} - // The indices of entity_memory and entity_data_memory are always linked +inline +void ecs_entity_type_create(ChunkMemory* ec, BufferMemory* buf, int32 chunk_size, int32 count) +{ + ASSERT_SIMPLE(chunk_size); - // General entity memory - ChunkMemory entity_memory; + chunk_init(ec, buf, count, chunk_size, 64); + //pthread_mutex_init(&ec->mutex, NULL); +} - // Actual entity data - ChunkMemory entity_data_memory; +inline +void ecs_component_type_create(ChunkMemory* ec, BufferMemory* buf, int32 chunk_size, int32 count) +{ + ASSERT_SIMPLE(chunk_size); - EntitySchema* first; - EntitySchema* last; -}; + chunk_init(ec, buf, count, chunk_size, 64); + //pthread_mutex_init(&ec->mutex, NULL); +} + +Entity* ecs_get_entity(EntityComponentSystem* ecs, int32 entity_id) +{ + int32 ecs_type = (entity_id >> 24) & 0xFF; + int32 raw_id = entity_id & 0x00FFFFFF; + + int32 byte_index = raw_id / 64; + int32 bit_index = raw_id & 63; + + return IS_BIT_SET_64_R2L(ecs->entities[ecs_type].free[byte_index], bit_index) ? + (Entity *) chunk_get_element(&ecs->entities[ecs_type], raw_id) + : NULL; +} + +Entity* ecs_reserve_entity(EntityComponentSystem* ecs, uint32 entity_type) +{ + ChunkMemory* mem = &ecs->entities[entity_type]; + int32 free_entity = chunk_reserve(mem, 1); + if (free_entity < 0) { + ASSERT_SIMPLE(free_entity >= 0); + return NULL; + } + + Entity* entity = (Entity *) chunk_get_element(mem, free_entity); + + // @todo log entity stats (count, ram, vram) + + return entity; +} + +Entity* ecs_insert_entity(EntityComponentSystem* ecs, Entity* entity_temp, int32 entity_type) +{ + ChunkMemory* mem = &ecs->entities[entity_type]; + int32 free_entity = chunk_reserve(mem, 1); + if (free_entity < 0) { + ASSERT_SIMPLE(free_entity >= 0); + return NULL; + } + + Entity* entity = (Entity *) chunk_get_element(mem, free_entity); + memcpy(entity, entity_temp, mem->chunk_size); + + // @todo log entity stats (count, ram, vram) + //DEBUG_MEMORY_RESERVE((uint64) entity, entity->ram_size, 180); + + return entity; +} + +void ecs_insert_component() +{ + +} #endif \ No newline at end of file diff --git a/entity/EntitySize.h b/entity/EntitySize.h new file mode 100644 index 0000000..dfe4819 --- /dev/null +++ b/entity/EntitySize.h @@ -0,0 +1,28 @@ +/** + * Jingga + * + * @copyright Jingga + * @license OMS License 2.0 + * @version 1.0.0 + * @link https://jingga.app + */ +#ifndef TOS_ENTITY_SIZE_H +#define TOS_ENTITY_SIZE_H + +enum EntitySize { + ENTITY_SIZE_32, + ENTITY_SIZE_64, + ENTITY_SIZE_128, + ENTITY_SIZE_256, + ENTITY_SIZE_512, + ENTITY_SIZE_1024, + ENTITY_SIZE_2048, + ENTITY_SIZE_4096, + ENTITY_SIZE_8192, + ENTITY_SIZE_16384, + ENTITY_SIZE_32768, + ENTITY_SIZE_65536, + ENTITY_SIZE_SIZE +}; + +#endif \ No newline at end of file diff --git a/entity/EntityType.h b/entity/EntityType.h deleted file mode 100644 index 58bf1ad..0000000 --- a/entity/EntityType.h +++ /dev/null @@ -1,21 +0,0 @@ -/** - * Jingga - * - * @copyright Jingga - * @license OMS License 2.0 - * @version 1.0.0 - * @link https://jingga.app - */ -#ifndef TOS_ENTITY_TYPE_H -#define TOS_ENTITY_TYPE_H - -enum EntityType { - ENTITY_TYPE_MONSTER, - ENTITY_TYPE_NPC, - ENTITY_TYPE_PLAYER, - ENTITY_TYPE_ITEM, - ENTITY_TYPE_OBJ, - ENTITY_TYPE_SIZE -}; - -#endif \ No newline at end of file diff --git a/font/Font.h b/font/Font.h index 4416382..9d9be80 100644 --- a/font/Font.h +++ b/font/Font.h @@ -5,18 +5,8 @@ #include "../memory/BufferMemory.h" #include "../utils/EndianUtils.h" #include "../utils/Utils.h" - -#if __aarch64__ - #include "../stdlib/sve/SVE_I32.h" -#else - #include "../stdlib/simd/SIMD_I32.h" -#endif - -#if _WIN32 - #include "../platform/win32/FileUtils.cpp" -#else - #include "../platform/linux/FileUtils.cpp" -#endif +#include "../stdlib/Simd.h" +#include "../system/FileUtils.cpp" struct GlyphMetrics { f32 width; // Width of the glyph @@ -212,15 +202,6 @@ int32 font_from_data( memcpy(font->glyphs, pos, font->glyph_count * sizeof(Glyph)); - #if OPENGL - // @todo Implement y-offset correction - for (uint32 i = 0; i < font->glyph_count; ++i) { - float temp = font->glyphs[i].coords.y1; - font->glyphs[i].coords.y1 = 1.0f - font->glyphs[i].coords.y2; - font->glyphs[i].coords.y2 = 1.0f - temp; - } - #endif - SWAP_ENDIAN_LITTLE_SIMD( (int32 *) font->glyphs, (int32 *) font->glyphs, diff --git a/gpuapi/GpuApiType.h b/gpuapi/GpuApiType.h new file mode 100644 index 0000000..8188b0d --- /dev/null +++ b/gpuapi/GpuApiType.h @@ -0,0 +1,19 @@ +/** + * Jingga + * + * @copyright Jingga + * @license OMS License 2.0 + * @version 1.0.0 + * @link https://jingga.app + */ +#ifndef TOS_GPUAPI_TYPE_H +#define TOS_GPUAPI_TYPE_H + +enum GpuApiType { + GPU_API_TYPE_NONE, + GPU_API_TYPE_OPENGL, + GPU_API_TYPE_VULKAN, + GPU_API_TYPE_DIRECTX +}; + +#endif \ No newline at end of file diff --git a/gpuapi/ShaderType.h b/gpuapi/ShaderType.h new file mode 100644 index 0000000..9b191ec --- /dev/null +++ b/gpuapi/ShaderType.h @@ -0,0 +1,25 @@ +/** + * Jingga + * + * @copyright Jingga + * @license OMS License 2.0 + * @version 1.0.0 + * @link https://jingga.app + */ +#ifndef TOS_GPUAPI_SHADER_TYPE_H +#define TOS_GPUAPI_SHADER_TYPE_H + +enum ShaderType { + SHADER_TYPE_NONE, + SHADER_TYPE_VERTEX, + SHADER_TYPE_FRAGMENT, + SHADER_TYPE_GEOMETRY, + SHADER_TYPE_TESSELATION, + SHADER_TYPE_PIXEL, + SHADER_TYPE_MESH, + SHADER_TYPE_RAYTRACING, + SHADER_TYPE_TENSOR, + SHADER_TYPE_SIZE +}; + +#endif \ No newline at end of file diff --git a/gpuapi/direct3d/GpuApiContainer.h b/gpuapi/direct3d/GpuApiContainer.h index 4d9da9d..9f54960 100644 --- a/gpuapi/direct3d/GpuApiContainer.h +++ b/gpuapi/direct3d/GpuApiContainer.h @@ -6,8 +6,8 @@ * @version 1.0.0 * @link https://jingga.app */ -#ifndef TOS_GPUAPI_DIRECTX_GPU_API_CONTAINER -#define TOS_GPUAPI_DIRECTX_GPU_API_CONTAINER +#ifndef TOS_GPUAPI_DIRECTX_GPU_API_CONTAINER_H +#define TOS_GPUAPI_DIRECTX_GPU_API_CONTAINER_H #include #include diff --git a/gpuapi/direct3d/Shader.h b/gpuapi/direct3d/Shader.h new file mode 100644 index 0000000..e607073 --- /dev/null +++ b/gpuapi/direct3d/Shader.h @@ -0,0 +1,20 @@ +/** + * Jingga + * + * @copyright Jingga + * @license OMS License 2.0 + * @version 1.0.0 + * @link https://jingga.app + */ +#ifndef TOS_GPUAPI_DIRECT3D_SHADER_H +#define TOS_GPUAPI_DIRECT3D_SHADER_H + +#include "../../stdlib/Types.h" + +struct Shader { + uint32 id; + uint32 locations[7]; + byte data[16]; +}; + +#endif \ No newline at end of file diff --git a/gpuapi/opengl/AppCmdBuffer.h b/gpuapi/opengl/AppCmdBuffer.h new file mode 100644 index 0000000..6e560f6 --- /dev/null +++ b/gpuapi/opengl/AppCmdBuffer.h @@ -0,0 +1,66 @@ +/** + * Jingga + * + * @copyright Jingga + * @license OMS License 2.0 + * @version 1.0.0 + * @link https://jingga.app + */ +#ifndef TOS_GPUAPI_OPENGL_APP_CMD_BUFFER_H +#define TOS_GPUAPI_OPENGL_APP_CMD_BUFFER_H + +#include "../../stdlib/Types.h" +#include "OpenglUtils.h" +#include "Shader.h" +#include "ShaderUtils.h" +#include "../ShaderType.h" +#include "../../asset/Asset.h" + +void* cmd_shader_load(AppCmdBuffer* cb, Command* cmd) { + return NULL; +} + +void* cmd_shader_load(AppCmdBuffer* cb, Shader* shader, int32* shader_ids) { + char asset_id[9]; + + int32 shader_assets[SHADER_TYPE_SIZE]; + for (int32 i = 0; i < SHADER_TYPE_SIZE; ++i) { + shader_assets[i] = -1; + } + + for (int32 i = 0; i < SHADER_TYPE_SIZE; ++i) { + if (!shader_ids[i]) { + continue; + } else if (shader_ids[i] < 0) { + break; + } + + // Load sub asset + int_to_hex(shader_ids[i], asset_id); + Asset* shader_asset = thrd_ams_get_asset_wait(cb->ams, asset_id); + if (!shader_asset) { + int32 archive_id = (shader_ids[i] >> 24) & 0xFF; + shader_asset = asset_archive_asset_load(&cb->asset_archives[archive_id], shader_ids[i], cb->ams, cb->mem_vol); + } + + // Make sub shader + shader_assets[i] = shader_make( + shader_type_index((ShaderType) (i + 1)), + (char *) shader_asset->self, + cb->mem_vol + ); + + shader_asset->state |= ASSET_STATE_RAM_GC; + shader_asset->state |= ASSET_STATE_VRAM_GC; + } + + // Make shader/program + shader->id = program_make( + shader_assets[0], shader_assets[1], shader_assets[2], + cb->mem_vol + ); + + return NULL; +} + +#endif \ No newline at end of file diff --git a/gpuapi/opengl/GpuApiContainer.h b/gpuapi/opengl/GpuApiContainer.h index 686269c..9e4e722 100644 --- a/gpuapi/opengl/GpuApiContainer.h +++ b/gpuapi/opengl/GpuApiContainer.h @@ -6,8 +6,8 @@ * @version 1.0.0 * @link https://jingga.app */ -#ifndef TOS_GPUAPI_OPENGL_GPU_API_CONTAINER -#define TOS_GPUAPI_OPENGL_GPU_API_CONTAINER +#ifndef TOS_GPUAPI_OPENGL_GPU_API_CONTAINER_H +#define TOS_GPUAPI_OPENGL_GPU_API_CONTAINER_H #include "../../stdlib/Types.h" #include "OpenglUtils.h" diff --git a/gpuapi/opengl/OpenglUtils.h b/gpuapi/opengl/OpenglUtils.h index 444472e..dd5d0b0 100644 --- a/gpuapi/opengl/OpenglUtils.h +++ b/gpuapi/opengl/OpenglUtils.h @@ -16,16 +16,13 @@ #include "../../image/Image.cpp" #include "../../utils/StringUtils.h" #include "../../log/Log.h" - +#include "../../system/FileUtils.cpp" #include "../RenderUtils.h" #include "Opengl.h" #if _WIN32 - #include - #include "../../platform/win32/FileUtils.cpp" #include "../../platform/win32/Window.h" #elif __linux__ - #include "../../platform/linux/FileUtils.cpp" #include "../../platform/linux/Window.h" #endif @@ -85,10 +82,10 @@ void opengl_info(OpenglInfo* info) for (char *at = version; *at; ++at) { if (*at == '.') { - info->major = str_to_int(version); + info->major = (int32) str_to_int(version); ++at; - info->minor = str_to_int(at); + info->minor = (int32) str_to_int(at); break; } } diff --git a/gpuapi/opengl/Shader.h b/gpuapi/opengl/Shader.h index aa46ab0..264c72e 100644 --- a/gpuapi/opengl/Shader.h +++ b/gpuapi/opengl/Shader.h @@ -12,9 +12,9 @@ #include "../../stdlib/Types.h" struct Shader { - uint32 shader_id; - uint32 shader_locations[7]; - byte shader_data[16]; + uint32 id; + uint32 locations[7]; + byte data[16]; }; #endif \ No newline at end of file diff --git a/gpuapi/opengl/ShaderUtils.h b/gpuapi/opengl/ShaderUtils.h index 972c4d3..fb38b9e 100644 --- a/gpuapi/opengl/ShaderUtils.h +++ b/gpuapi/opengl/ShaderUtils.h @@ -13,6 +13,19 @@ #include "../../memory/RingMemory.h" #include "../../log/Log.h" #include "Opengl.h" +#include "../ShaderType.h" + +int32 shader_type_index(ShaderType type) +{ + switch (type) { + case SHADER_TYPE_VERTEX: + return GL_VERTEX_SHADER; + case SHADER_TYPE_FRAGMENT: + return GL_FRAGMENT_SHADER; + default: + return 0; + } +} // Set value based on shader uniform name inline @@ -333,6 +346,7 @@ GLuint program_make( return program; } +// @question Depending on how the different gpu apis work we may want to pass Shader* to have a uniform structure inline void pipeline_use(uint32 id) { diff --git a/gpuapi/vulkan/GpuApiContainer.h b/gpuapi/vulkan/GpuApiContainer.h index 1528093..c370a1d 100644 --- a/gpuapi/vulkan/GpuApiContainer.h +++ b/gpuapi/vulkan/GpuApiContainer.h @@ -6,8 +6,8 @@ * @version 1.0.0 * @link https://jingga.app */ -#ifndef TOS_GPUAPI_VULKAN_GPU_API_CONTAINER -#define TOS_GPUAPI_VULKAN_GPU_API_CONTAINER +#ifndef TOS_GPUAPI_VULKAN_GPU_API_CONTAINER_H +#define TOS_GPUAPI_VULKAN_GPU_API_CONTAINER_H #include "../../stdlib/Types.h" #include diff --git a/gpuapi/vulkan/Shader.h b/gpuapi/vulkan/Shader.h new file mode 100644 index 0000000..e18e2ce --- /dev/null +++ b/gpuapi/vulkan/Shader.h @@ -0,0 +1,20 @@ +/** + * Jingga + * + * @copyright Jingga + * @license OMS License 2.0 + * @version 1.0.0 + * @link https://jingga.app + */ +#ifndef TOS_GPUAPI_VULKAN_SHADER_H +#define TOS_GPUAPI_VULKAN_SHADER_H + +#include "../../stdlib/Types.h" + +struct Shader { + uint32 id; + uint32 locations[7]; + byte data[16]; +}; + +#endif \ No newline at end of file diff --git a/gpuapi/vulkan/ShaderUtils.h b/gpuapi/vulkan/ShaderUtils.h index 1fc2842..404fb9d 100644 --- a/gpuapi/vulkan/ShaderUtils.h +++ b/gpuapi/vulkan/ShaderUtils.h @@ -42,7 +42,7 @@ inline void shader_set_value(VkDevice device, VkCommandBuffer commandBuffer, VkD descriptorWrite.descriptorCount = 1; descriptorWrite.pBufferInfo = &bufferInfo; - vkUpdateDescriptorSets(device, 1, &descriptorWrite, 0, nullptr); + vkUpdateDescriptorSets(device, 1, &descriptorWrite, 0, NULL); } VkShaderModule shader_make(VkDevice device, const char* source, int32 source_size) diff --git a/gpuapi/vulkan/VulkanUtils.h b/gpuapi/vulkan/VulkanUtils.h index d45f95e..fd15578 100644 --- a/gpuapi/vulkan/VulkanUtils.h +++ b/gpuapi/vulkan/VulkanUtils.h @@ -743,7 +743,7 @@ void vulkan_command_pool_create( void vulkan_command_buffer_create(VkDevice device, VkCommandBuffer* command_buffer, VkCommandPool command_pool) { - VkCommandBufferAllocateInfo allocInfo{}; + VkCommandBufferAllocateInfo allocInfo = {}; allocInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; allocInfo.commandPool = command_pool; allocInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; diff --git a/image/Image.cpp b/image/Image.cpp index b8505ae..068922a 100644 --- a/image/Image.cpp +++ b/image/Image.cpp @@ -11,12 +11,7 @@ #include "../utils/StringUtils.h" #include "../memory/RingMemory.h" - -#if _WIN32 - #include "../platform/win32/FileUtils.cpp" -#else - #include "../platform/linux/FileUtils.cpp" -#endif +#include "../system/FileUtils.cpp" #include "Image.h" #include "Tga.h" @@ -50,17 +45,6 @@ void image_flip_vertical(RingMemory* ring, Image* image) memcpy(image->pixels + y * stride, end - y * stride, stride); } - /* Flipping with small temp row - byte* temp_row = ring_get_memory(ring, stride); - - for (int y = 0; y < image->height / 2; ++y) { - memcpy(temp_row, image->pixels + y * stride, stride); - - memcpy(image->pixels + y * stride, image->pixels - y * stride, stride); - memcpy(image->pixels - y * stride, temp_row, stride); - } - */ - image->image_settings ^= IMAGE_SETTING_BOTTOM_TO_TOP; } diff --git a/localization/Language.h b/localization/Language.h index 200b4fa..94ed864 100644 --- a/localization/Language.h +++ b/localization/Language.h @@ -3,12 +3,7 @@ #include "../stdlib/Types.h" #include "../memory/RingMemory.h" - -#if _WIN32 - #include "../platform/win32/FileUtils.cpp" -#else - #include "../platform/linux/FileUtils.cpp" -#endif +#include "../system/FileUtils.cpp" #define LANGUAGE_VERSION 1 diff --git a/log/Debug.cpp b/log/Debug.cpp index 4116c96..ba66bdb 100644 --- a/log/Debug.cpp +++ b/log/Debug.cpp @@ -9,13 +9,23 @@ #include "../utils/StringUtils.h" #include "../utils/TestUtils.h" #include "../utils/MathUtils.h" +#include "../thread/Atomic.h" + +// Required for rdtsc(); +#if _WIN32 + #include +#else + #include +#endif global_persist DebugContainer* debug_container = NULL; +// WARNING: Spinlock uses TimeUtils which uses performance counter, which is part of DebugContainer +// @todo The explanation above is insane. We did this so we only have to set the performance counter once but it is biting us now +#include "../thread/Spinlock.cpp" + #if _WIN32 #include - #include "../platform/win32/threading/Atomic.h" - #include "../platform/win32/threading/Spinlock.cpp" void setup_performance_count() { if (!debug_container) { return; @@ -26,8 +36,6 @@ global_persist DebugContainer* debug_container = NULL; debug_container->performance_count_frequency = perf_counter.QuadPart; } #elif __linux__ -#include "../platform/linux/threading/Atomic.h" -#include "../platform/linux/threading/Spinlock.cpp" void setup_performance_count() { if (!debug_container) { return; @@ -102,8 +110,8 @@ void update_timing_stat(uint32 stat, const char* function) spinlock_start(&debug_container->perf_stats_spinlock); timing_stat->function = function; - timing_stat->delta_tick = new_tick_count - timing_stat->old_tick_count; - timing_stat->delta_time = (double) timing_stat->delta_tick / (double) debug_container->performance_count_frequency; + timing_stat->delta_tick = (uint32) (new_tick_count - timing_stat->old_tick_count); + timing_stat->delta_time = (f64) timing_stat->delta_tick / (f64) debug_container->performance_count_frequency; timing_stat->old_tick_count = new_tick_count; spinlock_end(&debug_container->perf_stats_spinlock); } @@ -125,8 +133,8 @@ void update_timing_stat_end(uint32 stat, const char* function) spinlock_start(&debug_container->perf_stats_spinlock); timing_stat->function = function; - timing_stat->delta_tick = new_tick_count - timing_stat->old_tick_count; - timing_stat->delta_time = (double) timing_stat->delta_tick / (double) debug_container->performance_count_frequency; + timing_stat->delta_tick = (uint32) (new_tick_count - timing_stat->old_tick_count); + timing_stat->delta_time = (f64) timing_stat->delta_tick / (f64) debug_container->performance_count_frequency; timing_stat->old_tick_count = new_tick_count; spinlock_end(&debug_container->perf_stats_spinlock); } @@ -140,8 +148,8 @@ void update_timing_stat_end_continued(uint32 stat, const char* function) spinlock_start(&debug_container->perf_stats_spinlock); timing_stat->function = function; - timing_stat->delta_tick = timing_stat->delta_tick + new_tick_count - timing_stat->old_tick_count; - timing_stat->delta_time = timing_stat->delta_time + (double) timing_stat->delta_tick / (double) debug_container->performance_count_frequency; + timing_stat->delta_tick = (uint32) ((uint32) (new_tick_count - timing_stat->old_tick_count) + timing_stat->delta_tick); + timing_stat->delta_time = timing_stat->delta_time + (f64) timing_stat->delta_tick / (f64) debug_container->performance_count_frequency; timing_stat->old_tick_count = new_tick_count; spinlock_end(&debug_container->perf_stats_spinlock); } @@ -269,7 +277,7 @@ void debug_memory_reserve(uint64 start, uint64 size, int32 type, const char* fun uint64 idx = atomic_fetch_add_relaxed(&mem->reserve_action_idx, 1); if (idx >= ARRAY_COUNT(mem->reserve_action)) { atomic_set_acquire(&mem->reserve_action_idx, 1); - idx %= ARRAY_COUNT(mem->last_action); + idx %= ARRAY_COUNT(mem->reserve_action); } DebugMemoryRange* dmr = &mem->reserve_action[idx]; @@ -281,6 +289,27 @@ void debug_memory_reserve(uint64 start, uint64 size, int32 type, const char* fun dmr->function_name = function; } +// undo reserve +void debug_memory_free(uint64 start, uint64 size) +{ + if (!start || !debug_container) { + return; + } + + DebugMemory* mem = debug_memory_find(start); + if (!mem) { + return; + } + + for (int32 i = 0; i < ARRAY_COUNT(mem->reserve_action); ++i) { + DebugMemoryRange* dmr = &mem->reserve_action[i]; + if (dmr->start == start - mem->start) { + dmr->size = 0; + return; + } + } +} + // @bug This probably requires thread safety inline void debug_memory_reset() @@ -302,7 +331,7 @@ void debug_memory_reset() } // @bug This probably requires thread safety -byte* log_get_memory(uint64 size, byte aligned = 1, bool zeroed = false) +byte* log_get_memory(uint64 size, byte aligned = 4, bool zeroed = false) { if (!debug_container) { return 0; @@ -347,8 +376,8 @@ void log(const char* str, bool should_log, bool save, const char* file, const ch size_t file_len = strlen(file); size_t function_len = strlen(function); - char line_str[10]; - int_to_str(line, line_str, '\0'); + char line_str[14]; + uint_to_str(line, line_str); size_t line_len = strlen(line_str); diff --git a/log/Debug.h b/log/Debug.h index 566a23c..0cb1aee 100644 --- a/log/Debug.h +++ b/log/Debug.h @@ -12,12 +12,10 @@ #include "../stdlib/Types.h" #include "DebugMemory.h" #include "TimingStat.h" +#include "../thread/Spinlock.h" #if _WIN32 #include - #include "../platform/win32/threading/Spinlock.h" -#elif __linux__ - #include "../platform/linux/threading/Spinlock.h" #endif struct LogMemory { @@ -26,7 +24,7 @@ struct LogMemory { uint32 id; uint64 size; uint64 pos; - int32 alignment; + uint32 alignment; uint64 start; uint64 end; }; diff --git a/log/DebugMemory.h b/log/DebugMemory.h index 8fba803..1e53b16 100644 --- a/log/DebugMemory.h +++ b/log/DebugMemory.h @@ -9,18 +9,8 @@ #ifndef TOS_LOG_DEBUG_MEMORY_H #define TOS_LOG_DEBUG_MEMORY_H -#include -#include - #include "../stdlib/Types.h" -// required for __rdtsc -#if _WIN32 - #include -#else - #include -#endif - #define DEBUG_MEMORY_RANGE_MAX 500 #define DEBUG_MEMORY_RANGE_RES_MAX 100 @@ -55,6 +45,7 @@ struct DebugMemoryContainer { void debug_memory_init(uint64, uint64); void debug_memory_log(uint64, uint64, int32, const char*); void debug_memory_reserve(uint64, uint64, int32, const char*); + void debug_memory_free(uint64, uint64); void debug_memory_reset(); #define DEBUG_MEMORY_INIT(start, size) debug_memory_init((start), (size)) @@ -62,6 +53,7 @@ struct DebugMemoryContainer { #define DEBUG_MEMORY_WRITE(start, size) debug_memory_log((start), (size), 1, __func__) #define DEBUG_MEMORY_DELETE(start, size) debug_memory_log((start), (size), -1, __func__) #define DEBUG_MEMORY_RESERVE(start, size, type) debug_memory_reserve((start), (size), (type), __func__) + #define DEBUG_MEMORY_FREE(start, size) debug_memory_free((start), (size)) #define DEBUG_MEMORY_RESET() debug_memory_reset() #else #define DEBUG_MEMORY_INIT(start, size) ((void) 0) @@ -69,6 +61,7 @@ struct DebugMemoryContainer { #define DEBUG_MEMORY_WRITE(start, size) ((void) 0) #define DEBUG_MEMORY_DELETE(start, size) ((void) 0) #define DEBUG_MEMORY_RESERVE(start, size, type) ((void) 0) + #define DEBUG_MEMORY_FREE(start, size) ((void) 0) #define DEBUG_MEMORY_RESET() ((void) 0) #endif diff --git a/log/Log.h b/log/Log.h index 0f300ae..d19e3d9 100644 --- a/log/Log.h +++ b/log/Log.h @@ -45,7 +45,7 @@ void log_counter(int32, int64); printf("%ld\n", __rdtsc() - (time_start)); \ }) -#if (!DEBUG && !INTERNAL) +#if (!DEBUG && !INTERNAL) || RELEASE // Don't perform any logging at log level 0 #define LOG(str, should_log, save) log((str), (should_log), (save), __FILE__, __func__, __LINE__) #define LOG_FORMAT(format, data_type, data, should_log, save) log((format), (data_type), (data), (should_log), (save), __FILE__, __func__, __LINE__) diff --git a/log/TimingStat.h b/log/TimingStat.h index 10d1686..7650224 100644 --- a/log/TimingStat.h +++ b/log/TimingStat.h @@ -22,8 +22,8 @@ struct TimingStat { const char* function; uint64 old_tick_count; - uint64 delta_tick; - double delta_time; + f64 delta_time; + uint32 delta_tick; }; // Sometimes we want to only do logging in debug mode. diff --git a/math/matrix/VectorFloat32.h b/math/matrix/VectorFloat32.h index de15cf4..ee0b541 100644 --- a/math/matrix/VectorFloat32.h +++ b/math/matrix/VectorFloat32.h @@ -10,12 +10,7 @@ #define TOS_MATH_MATRIX_VECTOR_FLOAT32_H #include "../../utils/MathUtils.h" - -#if __aarch64__ - #include "../../../GameEngine/stdlib/sve/SVE_F32.h" -#else - #include "../../../GameEngine/stdlib/simd/SIMD_F32.h" -#endif +#include "../../stdlib/Simd.h" struct v3_f32_4 { union { diff --git a/math/matrix/VectorFloat64.h b/math/matrix/VectorFloat64.h index 83bea00..47bfab1 100644 --- a/math/matrix/VectorFloat64.h +++ b/math/matrix/VectorFloat64.h @@ -10,11 +10,6 @@ #define TOS_MATH_MATRIX_VECTOR_FLOAT64_H #include "../../utils/MathUtils.h" - -#if __aarch64__ - #include "../../../GameEngine/stdlib/sve/SVE_F64.h" -#else - #include "../../../GameEngine/stdlib/simd/SIMD_F64.h" -#endif +#include "../../stdlib/Simd.h" #endif \ No newline at end of file diff --git a/math/matrix/VectorInt32.h b/math/matrix/VectorInt32.h index 02d73a6..1ed4da2 100644 --- a/math/matrix/VectorInt32.h +++ b/math/matrix/VectorInt32.h @@ -13,12 +13,7 @@ #include #include "../../utils/MathUtils.h" - -#if __aarch64__ - #include "../../../GameEngine/stdlib/sve/SVE_I32.h" -#else - #include "../../../GameEngine/stdlib/simd/SIMD_I32.h" -#endif +#include "../../stdlib/Simd.h" struct v3_int32_4 { union { diff --git a/math/matrix/VectorInt64.h b/math/matrix/VectorInt64.h index 0e25bb4..acaff72 100644 --- a/math/matrix/VectorInt64.h +++ b/math/matrix/VectorInt64.h @@ -13,12 +13,7 @@ #include #include "../../utils/MathUtils.h" - -#if __aarch64__ - #include "../../../GameEngine/stdlib/sve/SVE_I64.h" -#else - #include "../../../GameEngine/stdlib/simd/SIMD_I64.h" -#endif +#include "../../stdlib/Simd.h" struct v3_int64_2 { union { diff --git a/memory/BufferMemory.h b/memory/BufferMemory.h index ac9d3c3..6af17ff 100644 --- a/memory/BufferMemory.h +++ b/memory/BufferMemory.h @@ -15,12 +15,7 @@ #include "../utils/EndianUtils.h" #include "../utils/TestUtils.h" #include "../log/DebugMemory.h" - -#if _WIN32 - #include "../platform/win32/Allocator.h" -#elif __linux__ - #include "../platform/linux/Allocator.h" -#endif +#include "../system/Allocator.h" // @question Consider to use element_alignment to automatically align/pad elements diff --git a/memory/ChunkMemory.h b/memory/ChunkMemory.h index 0ee9b52..45bf43e 100644 --- a/memory/ChunkMemory.h +++ b/memory/ChunkMemory.h @@ -14,27 +14,19 @@ #include "../utils/MathUtils.h" #include "../utils/TestUtils.h" #include "../utils/EndianUtils.h" +#include "../utils/BitUtils.h" #include "../log/DebugMemory.h" #include "BufferMemory.h" - -#if _WIN32 - #include "../platform/win32/Allocator.h" -#elif __linux__ - #include "../platform/linux/Allocator.h" -#endif - -#if _WIN32 - #include "../platform/win32/threading/Thread.h" -#elif __linux__ - #include "../platform/linux/threading/Thread.h" -#endif +#include "../system/Allocator.h" +#include "../thread/Thread.h" struct ChunkMemory { byte* memory; - uint64 count; + // @question Why are we making the count 64 bit? is this really realistically possible? uint64 size; - uint64 last_pos; + int32 last_pos; + uint32 count; uint32 chunk_size; uint32 alignment; @@ -44,7 +36,7 @@ struct ChunkMemory { }; inline -void chunk_alloc(ChunkMemory* buf, uint64 count, uint32 chunk_size, int32 alignment = 64) +void chunk_alloc(ChunkMemory* buf, uint32 count, uint32 chunk_size, int32 alignment = 64) { ASSERT_SIMPLE(chunk_size); ASSERT_SIMPLE(count); @@ -58,7 +50,7 @@ void chunk_alloc(ChunkMemory* buf, uint64 count, uint32 chunk_size, int32 alignm buf->count = count; buf->size = count * chunk_size + sizeof(uint64) * CEIL_DIV(count, 64); buf->chunk_size = chunk_size; - buf->last_pos = 0; + buf->last_pos = -1; buf->alignment = alignment; // @question Could it be beneficial to have this before the element data? @@ -70,7 +62,7 @@ void chunk_alloc(ChunkMemory* buf, uint64 count, uint32 chunk_size, int32 alignm } inline -void chunk_init(ChunkMemory* buf, BufferMemory* data, uint64 count, uint32 chunk_size, int32 alignment = 64) +void chunk_init(ChunkMemory* buf, BufferMemory* data, uint32 count, uint32 chunk_size, int32 alignment = 64) { ASSERT_SIMPLE(chunk_size); ASSERT_SIMPLE(count); @@ -82,7 +74,7 @@ void chunk_init(ChunkMemory* buf, BufferMemory* data, uint64 count, uint32 chunk buf->count = count; buf->size = count * chunk_size + sizeof(uint64) * CEIL_DIV(count, 64); buf->chunk_size = chunk_size; - buf->last_pos = 0; + buf->last_pos = -1; buf->alignment = alignment; // @question Could it be beneficial to have this before the element data? @@ -95,7 +87,7 @@ void chunk_init(ChunkMemory* buf, BufferMemory* data, uint64 count, uint32 chunk } inline -void chunk_init(ChunkMemory* buf, byte* data, uint64 count, uint32 chunk_size, int32 alignment = 64) +void chunk_init(ChunkMemory* buf, byte* data, uint32 count, uint32 chunk_size, int32 alignment = 64) { ASSERT_SIMPLE(chunk_size); ASSERT_SIMPLE(count); @@ -108,7 +100,7 @@ void chunk_init(ChunkMemory* buf, byte* data, uint64 count, uint32 chunk_size, i buf->count = count; buf->size = count * chunk_size + sizeof(uint64) * CEIL_DIV(count, 64); buf->chunk_size = chunk_size; - buf->last_pos = 0; + buf->last_pos = -1; buf->alignment = alignment; // @question Could it be beneficial to have this before the element data? @@ -131,6 +123,11 @@ void chunk_free(ChunkMemory* buf) } } +inline +uint32 chunk_id_from_memory(ChunkMemory* buf, byte* pos) { + return (uint32) ((uintptr_t) pos - (uintptr_t) buf->memory) / buf->chunk_size; +} + inline byte* chunk_get_element(ChunkMemory* buf, uint64 element, bool zeroed = false) { @@ -146,93 +143,102 @@ byte* chunk_get_element(ChunkMemory* buf, uint64 element, bool zeroed = false) return offset; } -/** - * In some cases we know exactly which index is free - */ -void chunk_reserve_index(ChunkMemory* buf, int64 index, int64 elements = 1, bool zeroed = false) +// @performance This is a very important function, revisit in the future for optimization (e.g. ABM) +int32 chunk_reserve(ChunkMemory* buf, uint32 elements = 1) { - int64 byte_index = index / 64; - int32 bit_index = index % 64; + int32 free_index = (buf->last_pos + 1) / 64; + int32 bit_index = (buf->last_pos + 1) & 63; + int32 free_element = -1; - // Mark the bits as reserved - for (int32 j = 0; j < elements; ++j) { - int64 current_byte_index = byte_index + (bit_index + j) / 64; - int32 current_bit_index = (bit_index + j) % 64; - buf->free[current_byte_index] |= (1LL << current_bit_index); - } + int32 i = -1; + int32 consecutive_free_bits = 0; - if (zeroed) { - memset(buf->memory + index * buf->chunk_size, 0, elements * buf->chunk_size); - } - - DEBUG_MEMORY_WRITE((uint64) (buf->memory + index * buf->chunk_size), elements * buf->chunk_size); - - buf->last_pos = index; -} - -int64 chunk_reserve(ChunkMemory* buf, uint64 elements = 1, bool zeroed = false) -{ - int64 free_index = (buf->last_pos + 1) / 64; - int32 bit_index = buf->last_pos - free_index * 64; - int64 free_element = -1; - - int32 i = 0; - int64 max_bytes = (buf->count + 7) / 64; - - while (free_element < 0 && i < buf->count) { - ++i; - - if (free_index >= max_bytes) { + while (free_element < 0 && ++i < buf->count) { + // Skip fully filled ranges + if (free_index * 64 + bit_index + elements - consecutive_free_bits >= buf->count) { free_index = 0; - } - - if (buf->free[free_index] == 0xFF) { + bit_index = 0; + i += buf->count - (free_index * 64 + bit_index); + consecutive_free_bits = 0; + } else if (buf->free[free_index] == 0xFFFFFFFFFFFFFFFF) { ++free_index; + bit_index = 0; + i += 63; + consecutive_free_bits = 0; continue; } - // @performance There is some redundancy happening down below, we should ++free_index in certain conditions? - for (; bit_index < 64; ++bit_index) { - int32 consecutive_free_bits = 0; + // Find first free element + while (IS_BIT_SET_64_R2L(buf->free[free_index], bit_index)) { + consecutive_free_bits = 0; + ++bit_index; + ++i; - // Check if there are 'elements' consecutive free bits - for (int32 j = 0; j < elements; ++j) { - // Check if there is enough space until the end of the buffer. - // Remember, the last free index may only allow only 1 bit if the size is 65 - if (free_index * 64 + (bit_index + j) >= buf->count) { - break; - } - - uint64 current_free_index = free_index + (bit_index + j) / 64; - int32 current_bit_index = (bit_index + j) % 64; - - int64 mask = 1LL << current_bit_index; - if ((buf->free[current_free_index] & mask) == 0) { - ++consecutive_free_bits; - } else { - break; - } - } - - if (consecutive_free_bits == elements) { - free_element = free_index * 64 + bit_index; - - // Mark the bits as reserved - for (int32 j = 0; j < elements; ++j) { - int64 current_free_index = free_index + (bit_index + j) / 64; - int32 current_bit_index = (bit_index + j) % 64; - buf->free[current_free_index] |= (1LL << current_bit_index); - } + // We still need to check for overflow since our initial bit_index is based on buf->last_pos + if (bit_index > 63) { + bit_index = 0; + ++free_index; break; } } - bit_index = 0; + // The previous while may exit with an "overflow", that's why this check is required + if (IS_BIT_SET_64_R2L(buf->free[free_index], bit_index)) { + consecutive_free_bits = 0; - ++i; - ++free_index; + continue; + } + + // We found our first free element, let's check if we have enough free space + while (!IS_BIT_SET_64_R2L(buf->free[free_index], bit_index) + && consecutive_free_bits != elements + && free_index * 64 + bit_index + elements - consecutive_free_bits < buf->count + ) { + ++i; + ++consecutive_free_bits; + ++bit_index; + + if (bit_index > 63) { + bit_index = 0; + ++free_index; + + break; + } + } + + // Do we have enough free bits? + if (consecutive_free_bits == elements) { + free_element = free_index * 64 + bit_index - elements; + int32 possible_free_index = free_element / 64; + int32 possible_bit_index = free_element & 63; + + // Mark as used + if (elements == 1) { + buf->free[possible_free_index] |= (1LL << possible_bit_index); + } else { + uint32 elements_temp = elements; + int64 current_free_index = possible_free_index; + int32 current_bit_index = possible_bit_index; + + while (elements > 0) { + // Calculate the number of bits we can set in the current 64-bit block + int32 bits_in_current_block = OMS_MIN(64 - current_bit_index, elements); + + // Create a mask to set the bits + uint64 mask = ((1ULL << bits_in_current_block) - 1) << current_bit_index; + buf->free[current_free_index] |= mask; + + // Update the counters and indices + elements -= bits_in_current_block; + ++current_free_index; + current_bit_index = 0; + } + } + + break; + } } if (free_element < 0) { @@ -240,70 +246,46 @@ int64 chunk_reserve(ChunkMemory* buf, uint64 elements = 1, bool zeroed = false) return -1; } - if (zeroed) { - memset(buf->memory + free_element * buf->chunk_size, 0, elements * buf->chunk_size); - } - DEBUG_MEMORY_WRITE((uint64) (buf->memory + free_element * buf->chunk_size), elements * buf->chunk_size); buf->last_pos = free_element; - return free_element; -} - -byte* chunk_find_free(ChunkMemory* buf) -{ - int64 free_index = (buf->last_pos + 1) / 64; - int32 bit_index; - - int64 free_element = -1; - int64 mask; - - int32 i = 0; - int64 max_bytes = (buf->count + 7) / 64; - - while (free_element < 0 && i < buf->count) { - if (free_index >= max_bytes) { - free_index = 0; - } - - if (buf->free[free_index] == 0xFF) { - ++i; - ++free_index; - - continue; - } - - // This always breaks! - // @performance on the first iteration through the buffer we could optimize this by starting at a different bit_index - // because we know that the bit_index is based on last_pos - for (bit_index = 0; bit_index < 64; ++bit_index) { - mask = 1LL << bit_index; - if ((buf->free[free_index] & mask) == 0) { - free_element = free_index * 64 + bit_index; - buf->free[free_index] |= (1LL << bit_index); - - break; - } - } - } - - if (free_element < 0) { - return NULL; - } - - return buf->memory + free_element * buf->chunk_size; + return (int32) free_element; } inline -void chunk_free_element(ChunkMemory* buf, uint64 element) +void chunk_free_element(ChunkMemory* buf, uint64 free_index, int32 bit_index) +{ + DEBUG_MEMORY_DELETE((uint64) (buf->memory + (free_index * 64 + bit_index) * buf->chunk_size), buf->chunk_size); + buf->free[free_index] &= ~(1LL << bit_index); +} + +inline +void chunk_free_elements(ChunkMemory* buf, uint64 element, uint32 element_count = 1) { DEBUG_MEMORY_DELETE((uint64) (buf->memory + element * buf->chunk_size), buf->chunk_size); int64 free_index = element / 64; - int32 bit_index = element % 64; + int32 bit_index = element & 63; - buf->free[free_index] &= ~(1LL << bit_index); + if (element == 1) { + chunk_free_element(buf, free_index, bit_index); + return; + } + + while (element_count > 0) { + // Calculate the number of bits we can clear in the current 64-bit block + uint32 bits_in_current_block = OMS_MIN(64 - bit_index, element_count); + + // Create a mask to clear the bits + uint64 mask = ((1ULL << bits_in_current_block) - 1) << bit_index; + buf->free[free_index] &= ~mask; + + // Update the counters and indices + element_count -= bits_in_current_block; + ++free_index; + bit_index = 0; + } } inline @@ -312,7 +294,7 @@ int64 chunk_dump(const ChunkMemory* buf, byte* data) byte* start = data; // Count - *((uint64 *) data) = SWAP_ENDIAN_LITTLE(buf->count); + *((uint32 *) data) = SWAP_ENDIAN_LITTLE(buf->count); data += sizeof(buf->count); // Size @@ -324,7 +306,7 @@ int64 chunk_dump(const ChunkMemory* buf, byte* data) data += sizeof(buf->chunk_size); // Last pos - *((uint64 *) data) = SWAP_ENDIAN_LITTLE(buf->last_pos); + *((int32 *) data) = SWAP_ENDIAN_LITTLE(buf->last_pos); data += sizeof(buf->last_pos); // Alignment @@ -343,7 +325,7 @@ inline int64 chunk_load(ChunkMemory* buf, const byte* data) { // Count - buf->count = SWAP_ENDIAN_LITTLE(*((uint64 *) data)); + buf->count = SWAP_ENDIAN_LITTLE(*((uint32 *) data)); data += sizeof(buf->count); // Size @@ -355,7 +337,7 @@ int64 chunk_load(ChunkMemory* buf, const byte* data) data += sizeof(buf->chunk_size); // Last pos - buf->last_pos = SWAP_ENDIAN_LITTLE(*((uint64 *) data)); + buf->last_pos = SWAP_ENDIAN_LITTLE(*((int32 *) data)); data += sizeof(buf->last_pos); // Alignment @@ -370,4 +352,28 @@ int64 chunk_load(ChunkMemory* buf, const byte* data) return buf->size; } +#define chunk_iterate_start(buf, chunk_id) \ + int32 free_index = 0; \ + int32 bit_index = 0; \ + \ + /* Iterate the chunk memory */ \ + for (; chunk_id < (buf)->count; ++chunk_id) { \ + /* Check if asset is defined */ \ + if (!(buf)->free[free_index]) { \ + /* Skip various elements */ \ + /* @performance Consider to only check 1 byte instead of 8 */ \ + /* There are probably even better ways by using compiler intrinsics if available */ \ + bit_index += 63; /* +64 - 1 since the loop also increases by 1 */ \ + } else if ((buf)->free[free_index] & (1ULL << bit_index)) { + +#define chunk_iterate_end \ + } \ + \ + ++bit_index; \ + if (bit_index > 63) { \ + bit_index = 0; \ + ++free_index; \ + } \ + } + #endif \ No newline at end of file diff --git a/memory/Heap.h b/memory/Heap.h index f665b99..7d37c95 100644 --- a/memory/Heap.h +++ b/memory/Heap.h @@ -14,12 +14,7 @@ #include "../stdlib/Types.h" #include "../log/DebugMemory.h" #include "BufferMemory.h" - -#if _WIN32 - #include "../platform/win32/Allocator.h" -#elif __linux__ - #include "../platform/linux/Allocator.h" -#endif +#include "../system/Allocator.h" struct Heap { byte* elements; diff --git a/memory/RingMemory.h b/memory/RingMemory.h index 583f502..15527a7 100644 --- a/memory/RingMemory.h +++ b/memory/RingMemory.h @@ -19,18 +19,10 @@ #include "BufferMemory.h" #include "../log/DebugMemory.h" - -#if _WIN32 - #include "../platform/win32/Allocator.h" - #include "../platform/win32/threading/ThreadDefines.h" - #include "../platform/win32/threading/Semaphore.h" - #include "../platform/win32/threading/Atomic.h" -#elif __linux__ - #include "../platform/linux/Allocator.h" - #include "../platform/linux/threading/ThreadDefines.h" - #include "../platform/linux/threading/Semaphore.h" - #include "../platform/linux/threading/Atomic.h" -#endif +#include "../thread/Atomic.h" +#include "../thread/Semaphore.h" +#include "../thread/ThreadDefines.h" +#include "../system/Allocator.h" // WARNING: Changing this structure has effects on other data structures (e.g. Queue) // When chaning make sure you understand what you are doing diff --git a/memory/ThreadedChunkMemory.h b/memory/ThreadedChunkMemory.h index d1cb426..10341ef 100644 --- a/memory/ThreadedChunkMemory.h +++ b/memory/ThreadedChunkMemory.h @@ -11,19 +11,14 @@ #include #include "../stdlib/Types.h" - -#if _WIN32 - #include "../platform/win32/threading/Thread.h" -#elif __linux__ - #include "../platform/linux/threading/Thread.h" -#endif +#include "../thread/Thread.h" struct ThreadedChunkMemory { byte* memory; - uint64 count; uint64 size; - int64 last_pos; + uint32 last_pos; + uint32 count; uint32 chunk_size; int32 alignment; diff --git a/memory/ThreadedQueue.h b/memory/ThreadedQueue.h index 6e7dfd3..1a7dc50 100644 --- a/memory/ThreadedQueue.h +++ b/memory/ThreadedQueue.h @@ -14,14 +14,8 @@ #include "../stdlib/Types.h" #include "../utils/Utils.h" #include "RingMemory.h" - -#if _WIN32 - #include "../platform/win32/threading/Thread.h" - #include "../platform/win32/threading/Semaphore.h" -#elif __linux__ - #include "../platform/linux/threading/Thread.h" - #include "../platform/linux/threading/Semaphore.h" -#endif +#include "../thread/Thread.h" +#include "../thread/Semaphore.h" struct ThreadedQueue { byte* memory; diff --git a/memory/ThreadedRingMemory.h b/memory/ThreadedRingMemory.h index 284b98f..245145a 100644 --- a/memory/ThreadedRingMemory.h +++ b/memory/ThreadedRingMemory.h @@ -10,12 +10,7 @@ #define TOS_MEMORY_THREADED_RING_MEMORY_H #include "RingMemory.h" - -#if _WIN32 - #include "../platform/win32/threading/Thread.h" -#elif __linux__ - #include "../platform/linux/threading/Thread.h" -#endif +#include "../thread/Thread.h" // @todo This is a horrible implementation. Please implement a lock free solution diff --git a/models/mob/MobStats.cpp b/models/mob/MobStats.cpp index 56b636e..1ae8ef5 100644 --- a/models/mob/MobStats.cpp +++ b/models/mob/MobStats.cpp @@ -10,7 +10,7 @@ #define TOS_MODELS_MOB_STATS_C #include "MobStats.h" -#include "../../stdlib/simd/SIMD_I32.h" +#include "../../stdlib/Simd.h" // Calculate whenever character points or items change // 1. combine primary Item points with character points diff --git a/models/mob/PrimaryStatsPoints.cpp b/models/mob/PrimaryStatsPoints.cpp index 653db0a..ca8beff 100644 --- a/models/mob/PrimaryStatsPoints.cpp +++ b/models/mob/PrimaryStatsPoints.cpp @@ -9,7 +9,7 @@ #ifndef TOS_MODELS_MOB_PRIMARY_STATS_POINTS_C #define TOS_MODELS_MOB_PRIMARY_STATS_POINTS_C -#include "../../stdlib/simd/SIMD_I8.h" +#include "../../stdlib/Simd.h" #include "PrimaryStatsPoints.h" void calculate_primary_values(const PrimaryStatsPoints* points, PrimaryStatsValues* values, int step = 8) diff --git a/models/mob/SecondaryStatsPoints.cpp b/models/mob/SecondaryStatsPoints.cpp index 5b2b9d0..0057e34 100644 --- a/models/mob/SecondaryStatsPoints.cpp +++ b/models/mob/SecondaryStatsPoints.cpp @@ -9,7 +9,7 @@ #ifndef TOS_MODELS_MOB_SECONDARY_STATS_POINTS_C #define TOS_MODELS_MOB_SECONDARY_STATS_POINTS_C -#include "../../stdlib/simd/SIMD_I8.h" +#include "../../stdlib/Simd.h" #include "SecondaryStatsPoints.h" void calculate_primary_values(const SecondaryStatsPoints* points, SecondaryStatsValues* values, int step = 8) diff --git a/module/Module.h b/module/Module.h index 3f6f906..7fb606f 100644 --- a/module/Module.h +++ b/module/Module.h @@ -2,10 +2,7 @@ #define TOS_MODULE_H #include "../stdlib/Types.h" - -#ifdef _WIN32 - #include "../../GameEngine/platform/win32/Library.h" -#endif +#include "../../GameEngine/system/Library.h" enum ModuleType { MODULE_TYPE_HUD, diff --git a/module/ModuleManager.h b/module/ModuleManager.h index a86d42d..d255ded 100644 --- a/module/ModuleManager.h +++ b/module/ModuleManager.h @@ -3,12 +3,11 @@ #include "Module.h" #include "../memory/RingMemory.h" +#include "../system/FileUtils.cpp" #if _WIN32 - #include "../platform/win32/FileUtils.cpp" #include "../platform/win32/UtilsWin32.h" #elif __linux__ - #include "../platform/linux/FileUtils.cpp" #endif struct ModuleManager { diff --git a/network/Socket.h b/network/Socket.h new file mode 100644 index 0000000..6524492 --- /dev/null +++ b/network/Socket.h @@ -0,0 +1,18 @@ +/** + * Jingga + * + * @copyright Jingga + * @license OMS License 2.0 + * @version 1.0.0 + * @link https://jingga.app + */ +#ifndef TOS_NETWORK_SOCKET_H +#define TOS_NETWORK_SOCKET_H + +#if _WIN32 + #include "../platform/win32/network/Socket.h" +#elif __linux__ + #include "../platform/linux/network/Socket.h" +#endif + +#endif \ No newline at end of file diff --git a/object/Animation.h b/object/Animation.h index 5fa4db6..34d0091 100644 --- a/object/Animation.h +++ b/object/Animation.h @@ -11,12 +11,7 @@ #include "../stdlib/Types.h" #include "../memory/RingMemory.h" - -#if _WIN32 - #include "../platform/win32/FileUtils.cpp" -#else - #include "../platform/linux/FileUtils.cpp" -#endif +#include "../system/FileUtils.cpp" struct Skeleton { diff --git a/object/Hitbox.h b/object/Hitbox.h index 592adc7..7dbdd3d 100644 --- a/object/Hitbox.h +++ b/object/Hitbox.h @@ -11,12 +11,7 @@ #include "../stdlib/Types.h" #include "../memory/RingMemory.h" - -#if _WIN32 - #include "../platform/win32/FileUtils.cpp" -#else - #include "../platform/linux/FileUtils.cpp" -#endif +#include "../system/FileUtils.cpp" struct Hitbox { diff --git a/object/Material.h b/object/Material.h index f966fc0..9e0a57b 100644 --- a/object/Material.h +++ b/object/Material.h @@ -11,12 +11,7 @@ #include "../stdlib/Types.h" #include "../memory/RingMemory.h" - -#if _WIN32 - #include "../platform/win32/FileUtils.cpp" -#else - #include "../platform/linux/FileUtils.cpp" -#endif +#include "../system/FileUtils.cpp" struct Material { diff --git a/object/Mesh.h b/object/Mesh.h index e7ce776..36edfd3 100644 --- a/object/Mesh.h +++ b/object/Mesh.h @@ -11,22 +11,11 @@ #include "Vertex.h" #include "../stdlib/Types.h" - -#if _WIN32 - #include "../platform/win32/FileUtils.cpp" -#else - #include "../platform/linux/FileUtils.cpp" -#endif - +#include "../system/FileUtils.cpp" #include "../memory/RingMemory.h" #include "../utils/EndianUtils.h" #include "../utils/StringUtils.h" - -#if __aarch64__ - #include "../stdlib/sve/SVE_I32.h" -#else - #include "../stdlib/simd/SIMD_I32.h" -#endif +#include "../stdlib/Simd.h" #define MESH_VERSION 1 @@ -36,8 +25,6 @@ struct Mesh { byte* data; // memory owner that subdivides into the pointers below - // @todo Implement the version into the file, currently not implemented - int32 version; uint32 object; uint32 group_count; @@ -90,7 +77,8 @@ void mesh_from_file_txt( // move past the version string pos += 8; - mesh->version = strtol(pos, &pos, 10); ++pos; + // @todo us version for different handling + int32 version = strtol(pos, &pos, 10); ++pos; int32 object_index = 0; int32 group_index = 0; @@ -480,9 +468,9 @@ int32 mesh_from_data( { const byte* pos = data; - // Read version - mesh->version = *((int32 *) pos); - pos += sizeof(mesh->version); + // Read version, use to handle different versions differently + int32 version = *((int32 *) pos); + pos += sizeof(version); // Read base data mesh->vertex_type = *((int32 *) pos); @@ -549,7 +537,7 @@ int32 mesh_from_data( // We would have to check the vertex format to calculate the actual size int32 mesh_data_size(const Mesh* mesh) { - return sizeof(mesh->version) + return sizeof(int32) + sizeof(mesh->vertex_type) + sizeof(mesh->vertex_count) + 12 * sizeof(f32) * mesh->vertex_count; // 12 is the maximum value @@ -565,8 +553,8 @@ int32 mesh_to_data( byte* pos = data; // version - memcpy(pos, &mesh->version, sizeof(mesh->version)); - pos += sizeof(mesh->version); + *((int32 *) pos) = MESH_VERSION; + pos += sizeof(int32); // vertices if (vertex_save_format == VERTEX_TYPE_ALL) { diff --git a/platform/linux/Allocator.h b/platform/linux/Allocator.h index 430d9c7..e383c84 100644 --- a/platform/linux/Allocator.h +++ b/platform/linux/Allocator.h @@ -17,7 +17,6 @@ #include "../../utils/TestUtils.h" // @todo Currently alignment only effects the starting position, but it should also effect the ending/size -// @todo Consider to rename file to Allocator.h // @question Since we store at least the size of the memory in the beginning, // does this have a negative impact on caching? diff --git a/platform/linux/FileUtils.cpp b/platform/linux/FileUtils.cpp index 7a5fcfe..5e312e2 100644 --- a/platform/linux/FileUtils.cpp +++ b/platform/linux/FileUtils.cpp @@ -102,7 +102,7 @@ void relative_to_absolute(const char* rel, char* path) ++self_path_length; memcpy(path, self_path, self_path_length); - strcpy(path + self_path_length, temp); + str_copy_short(path + self_path_length, temp); } // @todo implement relative path support, similar to UtilsWin32 diff --git a/platform/linux/Library.h b/platform/linux/Library.cpp similarity index 78% rename from platform/linux/Library.h rename to platform/linux/Library.cpp index 46e1286..1f25039 100644 --- a/platform/linux/Library.h +++ b/platform/linux/Library.cpp @@ -6,8 +6,8 @@ * @version 1.0.0 * @link https://jingga.app */ -#ifndef TOS_PLATFORM_LINUX_LIBRARY_H -#define TOS_PLATFORM_LINUX_LIBRARY_H +#ifndef TOS_PLATFORM_LINUX_LIBRARY_C +#define TOS_PLATFORM_LINUX_LIBRARY_C #include #include @@ -18,30 +18,22 @@ #include "../../stdlib/Types.h" #include "../../utils/StringUtils.h" #include "UtilsLinux.h" -#include "../Library.h" +#include "../../system/Library.h" // @todo Rename file to Library.cpp inline bool library_load(Library* lib) { - size_t path_length = strlen(lib->dir); - char dst[PATH_MAX]; - str_concat( - lib->dir, path_length, - lib->dst, strlen(lib->dst), - dst - ); + str_concat_new(dst, lib->dir, lib->dst); #if DEBUG char src[PATH_MAX]; size_t dst_len = strlen(dst); memcpy(src, dst, dst_len + 1); - - memcpy(dst + dst_len - (sizeof(".so") - 1), "_temp", sizeof("_temp") - 1); - memcpy(dst + dst_len - (sizeof(".so") - 1) + (sizeof("_temp") - 1), ".so", sizeof(".so")); + str_insert(dst, dst_len - (sizeof(".so") - 1), "_temp"); lib->last_load = file_last_modified(src); file_copy(src, dst); diff --git a/platform/linux/SystemInfo.cpp b/platform/linux/SystemInfo.cpp index 55b0c2a..dc704c5 100644 --- a/platform/linux/SystemInfo.cpp +++ b/platform/linux/SystemInfo.cpp @@ -18,7 +18,7 @@ #include #if __aarch64__ - #include "../../stdlib/simd/SIMD_Helper.h" + #include "../../stdlib/SIMD_Helper.h" #else #include "../../stdlib/sve/SVE_Helper.h" #endif @@ -358,7 +358,7 @@ uint32 display_info_get(DisplayInfo* info) { mode.dmSize = sizeof(mode); if (EnumDisplaySettingsA(device.DeviceName, ENUM_CURRENT_SETTINGS, &mode)) { - strcpy(info[i].name, device.DeviceName); + str_copy_short(info[i].name, device.DeviceName); info[i].width = mode.dmPelsWidth; info[i].height = mode.dmPelsHeight; info[i].hz = mode.dmDisplayFrequency; diff --git a/platform/win32/Allocator.h b/platform/win32/Allocator.h index 632098a..ab4a5f6 100644 --- a/platform/win32/Allocator.h +++ b/platform/win32/Allocator.h @@ -15,7 +15,6 @@ #include "../../utils/TestUtils.h" // @todo Currently alignment only effects the starting position, but it should also effect the ending/size -// @todo Consider to rename file to Allocator.h inline void* platform_alloc(size_t size) @@ -23,6 +22,10 @@ void* platform_alloc(size_t size) return VirtualAlloc(NULL, size, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE); } +// @question Since we store at least the size of the memory in the beginning, +// does this have a negative impact on caching? +// Our Memory doesn't start at the cache line beginning but at least offset by sizeof(size_t) + inline void* platform_alloc_aligned(size_t size, int32 alignment) { diff --git a/platform/win32/FileUtils.cpp b/platform/win32/FileUtils.cpp index 7100af9..07da482 100644 --- a/platform/win32/FileUtils.cpp +++ b/platform/win32/FileUtils.cpp @@ -82,7 +82,7 @@ void relative_to_absolute(const char* rel, char* path) ++self_path_length; memcpy(path, self_path, self_path_length); - strcpy(path + self_path_length, temp); + str_copy_short(path + self_path_length, temp); } inline uint64 diff --git a/platform/win32/Library.h b/platform/win32/Library.cpp similarity index 80% rename from platform/win32/Library.h rename to platform/win32/Library.cpp index a9777a0..06953e8 100644 --- a/platform/win32/Library.h +++ b/platform/win32/Library.cpp @@ -6,8 +6,8 @@ * @version 1.0.0 * @link https://jingga.app */ -#ifndef TOS_PLATFORM_WIN32_LIBRARY_H -#define TOS_PLATFORM_WIN32_LIBRARY_H +#ifndef TOS_PLATFORM_WIN32_LIBRARY_C +#define TOS_PLATFORM_WIN32_LIBRARY_C #include #include @@ -23,23 +23,15 @@ inline bool library_load(Library* lib) { - size_t path_length = strlen(lib->dir); - char dst[MAX_PATH]; - str_concat( - lib->dir, path_length, - lib->dst, strlen(lib->dst), - dst - ); + str_concat_new(dst, lib->dir, lib->dst); #if DEBUG char src[MAX_PATH]; size_t dst_len = strlen(dst); memcpy(src, dst, dst_len + 1); - - memcpy(dst + dst_len - (sizeof(".dll") - 1), "_temp", sizeof(".temp") - 1); - memcpy(dst + dst_len - (sizeof(".dll") - 1) + (sizeof(".temp") - 1), ".dll", sizeof(".dll")); + str_insert(dst, dst_len - (sizeof(".dll") - 1), "_temp"); lib->last_load = file_last_modified(src); file_copy(src, dst); diff --git a/platform/win32/SystemInfo.cpp b/platform/win32/SystemInfo.cpp index 55e4223..8b6df42 100644 --- a/platform/win32/SystemInfo.cpp +++ b/platform/win32/SystemInfo.cpp @@ -30,7 +30,7 @@ #if __aarch64__ #include "../../stdlib/sve/SVE_Helper.h" #else - #include "../../stdlib/simd/SIMD_Helper.h" + #include "../../stdlib/SIMD_Helper.h" #endif // @performance Do we really need all these libs, can't we simplify that?! @@ -451,7 +451,7 @@ void display_info_get_primary(DisplayInfo* info) { mode.dmSize = sizeof(mode); if (EnumDisplaySettingsA(device.DeviceName, ENUM_CURRENT_SETTINGS, &mode)) { - strcpy(info->name, device.DeviceName); + str_copy_short(info->name, device.DeviceName); info->width = mode.dmPelsWidth; info->height = mode.dmPelsHeight; info->hz = mode.dmDisplayFrequency; @@ -473,7 +473,7 @@ uint32 display_info_get(DisplayInfo* info) { mode.dmSize = sizeof(mode); if (EnumDisplaySettingsA(device.DeviceName, ENUM_CURRENT_SETTINGS, &mode)) { - strcpy(info[i].name, device.DeviceName); + str_copy_short(info[i].name, device.DeviceName); info[i].width = mode.dmPelsWidth; info[i].height = mode.dmPelsHeight; info[i].hz = mode.dmDisplayFrequency; diff --git a/platform/win32/threading/Spinlock.cpp b/platform/win32/threading/Spinlock.cpp index ef82733..01997ed 100644 --- a/platform/win32/threading/Spinlock.cpp +++ b/platform/win32/threading/Spinlock.cpp @@ -10,6 +10,7 @@ #define TOS_PLATFORM_WIN32_THREADING_SPINLOCK_C #include +#include "../../../stdlib/Types.h" #include "../TimeUtils.h" #include "Spinlock.h" diff --git a/stdlib/HashMap.h b/stdlib/HashMap.h index b64d599..a70d165 100644 --- a/stdlib/HashMap.h +++ b/stdlib/HashMap.h @@ -16,57 +16,113 @@ #include "../memory/ChunkMemory.h" #include "../utils/StringUtils.h" -#define HASH_MAP_MAX_KEY_LENGTH 32 +// WARNING Length of 28 used to ensure perfect padding with element_id and key +#define HASH_MAP_MAX_KEY_LENGTH 28 +///////////////////////////// +// string key +///////////////////////////// struct HashEntryInt32 { - int64 element_id; + uint32 element_id; char key[HASH_MAP_MAX_KEY_LENGTH]; HashEntryInt32* next; int32 value; }; struct HashEntryInt64 { - int64 element_id; + uint32 element_id; char key[HASH_MAP_MAX_KEY_LENGTH]; HashEntryInt64* next; int64 value; }; struct HashEntryUIntPtr { - int64 element_id; + uint32 element_id; char key[HASH_MAP_MAX_KEY_LENGTH]; HashEntryUIntPtr* next; uintptr_t value; }; struct HashEntryVoidP { - int64 element_id; + uint32 element_id; char key[HASH_MAP_MAX_KEY_LENGTH]; HashEntryVoidP* next; void* value; }; struct HashEntryFloat { - int64 element_id; + uint32 element_id; char key[HASH_MAP_MAX_KEY_LENGTH]; HashEntryFloat* next; f32 value; }; struct HashEntryStr { - int64 element_id; + uint32 element_id; char key[HASH_MAP_MAX_KEY_LENGTH]; HashEntryStr* next; char value[HASH_MAP_MAX_KEY_LENGTH]; }; struct HashEntry { - int64 element_id; + uint32 element_id; char key[HASH_MAP_MAX_KEY_LENGTH]; HashEntry* next; byte* value; }; +///////////////////////////// +// int key +///////////////////////////// +struct HashEntryInt32KeyInt32 { + uint32 element_id; + int32 key; + HashEntryInt32KeyInt32* next; + int32 value; +}; + +struct HashEntryInt64KeyInt32 { + uint32 element_id; + int32 key; + HashEntryInt64KeyInt32* next; + int64 value; +}; + +struct HashEntryUIntPtrKeyInt32 { + uint32 element_id; + int32 key; + HashEntryUIntPtrKeyInt32* next; + uintptr_t value; +}; + +struct HashEntryVoidPKeyInt32 { + uint32 element_id; + int32 key; + HashEntryVoidPKeyInt32* next; + void* value; +}; + +struct HashEntryFloatKeyInt32 { + uint32 element_id; + int32 key; + HashEntryFloatKeyInt32* next; + f32 value; +}; + +struct HashEntryStrKeyInt32 { + uint32 element_id; + int32 key; + HashEntryStrKeyInt32* next; + char value[HASH_MAP_MAX_KEY_LENGTH]; +}; + +struct HashEntryKeyInt32 { + uint32 element_id; + int32 key; + HashEntryKeyInt32* next; + byte* value; +}; + struct HashMap { void** table; ChunkMemory buf; @@ -83,7 +139,7 @@ void hashmap_create(HashMap* hm, int32 count, int32 element_size, RingMemory* ri ); hm->table = (void **) data; - chunk_init(&hm->buf, data + sizeof(void *) * count, count, element_size, 1); + chunk_init(&hm->buf, data + sizeof(void *) * count, count, element_size, 8); } // WARNING: element_size = element size + remaining HashEntry data size @@ -96,14 +152,14 @@ void hashmap_create(HashMap* hm, int32 count, int32 element_size, BufferMemory* ); hm->table = (void **) data; - chunk_init(&hm->buf, data + sizeof(void *) * count, count, element_size, 1); + chunk_init(&hm->buf, data + sizeof(void *) * count, count, element_size, 8); } // WARNING: element_size = element size + remaining HashEntry data size void hashmap_create(HashMap* hm, int32 count, int32 element_size, byte* buf) { hm->table = (void **) buf; - chunk_init(&hm->buf, buf + sizeof(void *) * count, count, element_size, 1); + chunk_init(&hm->buf, buf + sizeof(void *) * count, count, element_size, 8); } // Calculates how large a hashmap will be @@ -121,10 +177,13 @@ int64 hashmap_size(const HashMap* hm) return hm->buf.count * sizeof(hm->table) + hm->buf.size; } +///////////////////////////// +// string key +///////////////////////////// void hashmap_insert(HashMap* hm, const char* key, int32 value) { uint64 index = hash_djb2(key) % hm->buf.count; - int64 element = chunk_reserve(&hm->buf, 1); + int32 element = chunk_reserve(&hm->buf, 1); HashEntryInt32* entry = (HashEntryInt32 *) chunk_get_element(&hm->buf, element, true); entry->element_id = element; @@ -150,7 +209,7 @@ void hashmap_insert(HashMap* hm, const char* key, int32 value) { void hashmap_insert(HashMap* hm, const char* key, int64 value) { uint64 index = hash_djb2(key) % hm->buf.count; - int64 element = chunk_reserve(&hm->buf, 1); + int32 element = chunk_reserve(&hm->buf, 1); HashEntryInt64* entry = (HashEntryInt64 *) chunk_get_element(&hm->buf, element, true); entry->element_id = element; @@ -175,7 +234,7 @@ void hashmap_insert(HashMap* hm, const char* key, int64 value) { void hashmap_insert(HashMap* hm, const char* key, uintptr_t value) { uint64 index = hash_djb2(key) % hm->buf.count; - int64 element = chunk_reserve(&hm->buf, 1); + int32 element = chunk_reserve(&hm->buf, 1); HashEntryUIntPtr* entry = (HashEntryUIntPtr *) chunk_get_element(&hm->buf, element, true); entry->element_id = element; @@ -200,7 +259,7 @@ void hashmap_insert(HashMap* hm, const char* key, uintptr_t value) { void hashmap_insert(HashMap* hm, const char* key, void* value) { uint64 index = hash_djb2(key) % hm->buf.count; - int64 element = chunk_reserve(&hm->buf, 1); + int32 element = chunk_reserve(&hm->buf, 1); HashEntryVoidP* entry = (HashEntryVoidP *) chunk_get_element(&hm->buf, element, true); entry->element_id = element; @@ -225,7 +284,7 @@ void hashmap_insert(HashMap* hm, const char* key, void* value) { void hashmap_insert(HashMap* hm, const char* key, f32 value) { uint64 index = hash_djb2(key) % hm->buf.count; - int64 element = chunk_reserve(&hm->buf, 1); + int32 element = chunk_reserve(&hm->buf, 1); HashEntryFloat* entry = (HashEntryFloat *) chunk_get_element(&hm->buf, element, true); entry->element_id = element; @@ -250,7 +309,7 @@ void hashmap_insert(HashMap* hm, const char* key, f32 value) { void hashmap_insert(HashMap* hm, const char* key, const char* value) { uint64 index = hash_djb2(key) % hm->buf.count; - int64 element = chunk_reserve(&hm->buf, 1); + int32 element = chunk_reserve(&hm->buf, 1); HashEntryStr* entry = (HashEntryStr *) chunk_get_element(&hm->buf, element, true); entry->element_id = element; @@ -274,10 +333,10 @@ void hashmap_insert(HashMap* hm, const char* key, const char* value) { } } -void hashmap_insert(HashMap* hm, const char* key, byte* value) { +HashEntry* hashmap_insert(HashMap* hm, const char* key, byte* value) { uint64 index = hash_djb2(key) % hm->buf.count; - int64 element = chunk_reserve(&hm->buf, 1); + int32 element = chunk_reserve(&hm->buf, 1); HashEntry* entry = (HashEntry *) chunk_get_element(&hm->buf, element, true); entry->element_id = element; @@ -300,6 +359,73 @@ void hashmap_insert(HashMap* hm, const char* key, byte* value) { } else { hm->table[index] = entry; } + + return entry; +} + +HashEntry* hashmap_reserve(HashMap* hm, const char* key) { + uint64 index = hash_djb2(key) % hm->buf.count; + + int32 element = chunk_reserve(&hm->buf, 1); + HashEntry* entry = (HashEntry *) chunk_get_element(&hm->buf, element, true); + entry->element_id = element; + + entry->value = (byte *) entry + sizeof(HashEntry); + + strncpy(entry->key, key, HASH_MAP_MAX_KEY_LENGTH); + entry->key[HASH_MAP_MAX_KEY_LENGTH - 1] = '\0'; + + entry->next = NULL; + + if (hm->table[index]) { + HashEntry* tmp = (HashEntry *) hm->table[index]; + while(tmp->next) { + tmp = tmp->next; + } + + tmp->next = entry; + } else { + hm->table[index] = entry; + } + + return entry; +} + +// Returns existing element or element to be filled +HashEntry* hashmap_get_reserve(HashMap* hm, const char* key) +{ + uint64 index = hash_djb2(key) % hm->buf.count; + HashEntry* entry = (HashEntry *) hm->table[index]; + + while (entry != NULL) { + if (str_compare(entry->key, key, HASH_MAP_MAX_KEY_LENGTH) == 0) { + DEBUG_MEMORY_READ((uint64) entry, sizeof(HashEntry)); + return entry; + } + + if (((HashEntry *) entry->next) == NULL) { + break; + } + + entry = (HashEntry *) entry->next; + } + + int32 element = chunk_reserve(&hm->buf, 1); + HashEntry* entry_new = (HashEntry *) chunk_get_element(&hm->buf, element, true); + entry_new->element_id = element; + + entry_new->value = (byte *) entry_new + sizeof(HashEntry); + + strncpy(entry_new->key, key, HASH_MAP_MAX_KEY_LENGTH); + entry_new->key[HASH_MAP_MAX_KEY_LENGTH - 1] = '\0'; + + if (entry) { + entry->next = entry_new; + } else { + hm->table[index] = entry_new; + } + + return entry_new; } HashEntry* hashmap_get_entry(const HashMap* hm, const char* key) { @@ -307,7 +433,8 @@ HashEntry* hashmap_get_entry(const HashMap* hm, const char* key) { HashEntry* entry = (HashEntry *) hm->table[index]; while (entry != NULL) { - if (strncmp(entry->key, key, HASH_MAP_MAX_KEY_LENGTH) == 0) { + if (str_compare(entry->key, key, HASH_MAP_MAX_KEY_LENGTH) == 0) { + DEBUG_MEMORY_READ((uint64) entry, sizeof(HashEntry)); return entry; } @@ -324,7 +451,8 @@ HashEntry* hashmap_get_entry(const HashMap* hm, const char* key, uint64 hash) { HashEntry* entry = (HashEntry *) hm->table[hash]; while (entry != NULL) { - if (strncmp(entry->key, key, HASH_MAP_MAX_KEY_LENGTH) == 0) { + if (str_compare(entry->key, key, HASH_MAP_MAX_KEY_LENGTH) == 0) { + DEBUG_MEMORY_READ((uint64) entry, sizeof(HashEntry)); return entry; } @@ -334,20 +462,253 @@ HashEntry* hashmap_get_entry(const HashMap* hm, const char* key, uint64 hash) { return NULL; } -void hashmap_delete_entry(HashMap* hm, const char* key) { +// @performance If we had a doubly linked list we could delete keys much easier +// However that would make insertion slower +// Maybe we create a nother hashmap that is doubly linked +void hashmap_remove(HashMap* hm, const char* key) { uint64 index = hash_djb2(key) % hm->buf.count; HashEntry* entry = (HashEntry *) hm->table[index]; HashEntry* prev = NULL; while (entry != NULL) { - if (strncmp(entry->key, key, HASH_MAP_MAX_KEY_LENGTH) == 0) { + if (str_compare(entry->key, key, HASH_MAP_MAX_KEY_LENGTH) == 0) { if (prev == NULL) { hm->table[index] = entry->next; } else { prev->next = entry->next; } - chunk_free_element(&hm->buf, entry->element_id); + chunk_free_elements(&hm->buf, entry->element_id); + + return; + } + + prev = entry; + entry = entry->next; + } +} + +///////////////////////////// +// int key +///////////////////////////// +void hashmap_insert(HashMap* hm, int32 key, int32 value) { + uint64 index = key % hm->buf.count; + + int32 element = chunk_reserve(&hm->buf, 1); + HashEntryInt32KeyInt32* entry = (HashEntryInt32KeyInt32 *) chunk_get_element(&hm->buf, element, true); + entry->element_id = element; + + entry->key = key; + entry->value = value; + entry->next = NULL; + + if (hm->table[index]) { + HashEntryInt32KeyInt32* tmp = (HashEntryInt32KeyInt32 *) hm->table[index]; + while(tmp->next) { + tmp = tmp->next; + } + + tmp->next = entry; + } else { + hm->table[index] = entry; + } +} + +void hashmap_insert(HashMap* hm, int32 key, int64 value) { + uint64 index = key % hm->buf.count; + + int32 element = chunk_reserve(&hm->buf, 1); + HashEntryInt64KeyInt32* entry = (HashEntryInt64KeyInt32 *) chunk_get_element(&hm->buf, element, true); + entry->element_id = element; + + entry->key = key; + entry->value = value; + entry->next = NULL; + + if (hm->table[index]) { + HashEntryInt64KeyInt32* tmp = (HashEntryInt64KeyInt32 *) hm->table[index]; + while(tmp->next) { + tmp = tmp->next; + } + + tmp->next = entry; + } else { + hm->table[index] = entry; + } +} + +void hashmap_insert(HashMap* hm, int32 key, uintptr_t value) { + uint64 index = key % hm->buf.count; + + int32 element = chunk_reserve(&hm->buf, 1); + HashEntryUIntPtrKeyInt32* entry = (HashEntryUIntPtrKeyInt32 *) chunk_get_element(&hm->buf, element, true); + entry->element_id = element; + + entry->key = key; + entry->value = value; + entry->next = NULL; + + if (hm->table[index]) { + HashEntryUIntPtrKeyInt32* tmp = (HashEntryUIntPtrKeyInt32 *) hm->table[index]; + while(tmp->next) { + tmp = tmp->next; + } + + tmp->next = entry; + } else { + hm->table[index] = entry; + } +} + +void hashmap_insert(HashMap* hm, int32 key, void* value) { + uint64 index = key % hm->buf.count; + + int32 element = chunk_reserve(&hm->buf, 1); + HashEntryVoidPKeyInt32* entry = (HashEntryVoidPKeyInt32 *) chunk_get_element(&hm->buf, element, true); + entry->element_id = element; + + entry->key = key; + entry->value = value; + entry->next = NULL; + + if (hm->table[index]) { + HashEntryVoidPKeyInt32* tmp = (HashEntryVoidPKeyInt32 *) hm->table[index]; + while(tmp->next) { + tmp = tmp->next; + } + + tmp->next = entry; + } else { + hm->table[index] = entry; + } +} + +void hashmap_insert(HashMap* hm, int32 key, f32 value) { + uint64 index = key % hm->buf.count; + + int32 element = chunk_reserve(&hm->buf, 1); + HashEntryFloatKeyInt32* entry = (HashEntryFloatKeyInt32 *) chunk_get_element(&hm->buf, element, true); + entry->element_id = element; + + entry->key = key; + entry->value = value; + entry->next = NULL; + + if (hm->table[index]) { + HashEntryFloatKeyInt32* tmp = (HashEntryFloatKeyInt32 *) hm->table[index]; + while(tmp->next) { + tmp = tmp->next; + } + + tmp->next = entry; + } else { + hm->table[index] = entry; + } +} + +void hashmap_insert(HashMap* hm, int32 key, const char* value) { + uint64 index = key % hm->buf.count; + + int32 element = chunk_reserve(&hm->buf, 1); + HashEntryStrKeyInt32* entry = (HashEntryStrKeyInt32 *) chunk_get_element(&hm->buf, element, true); + entry->element_id = element; + + entry->key = key; + + strncpy(entry->value, value, HASH_MAP_MAX_KEY_LENGTH); + entry->value[HASH_MAP_MAX_KEY_LENGTH - 1] = '\0'; + + entry->next = NULL; + + if (hm->table[index]) { + HashEntryStrKeyInt32* tmp = (HashEntryStrKeyInt32 *) hm->table[index]; + while(tmp->next) { + tmp = tmp->next; + } + + tmp->next = entry; + } else { + hm->table[index] = entry; + } +} + +void hashmap_insert(HashMap* hm, int32 key, byte* value) { + uint64 index = key % hm->buf.count; + + int32 element = chunk_reserve(&hm->buf, 1); + HashEntryKeyInt32* entry = (HashEntryKeyInt32 *) chunk_get_element(&hm->buf, element, true); + entry->element_id = element; + + entry->key = key; + entry->value = (byte *) entry + sizeof(HashEntryKeyInt32); + + memcpy(entry->value, value, hm->buf.chunk_size - sizeof(HashEntryKeyInt32)); + + entry->next = NULL; + + if (hm->table[index]) { + HashEntryKeyInt32* tmp = (HashEntryKeyInt32 *) hm->table[index]; + while(tmp->next) { + tmp = tmp->next; + } + + tmp->next = entry; + } else { + hm->table[index] = entry; + } +} + +HashEntryKeyInt32* hashmap_get_entry(const HashMap* hm, int32 key) { + uint64 index = key % hm->buf.count; + HashEntryKeyInt32* entry = (HashEntryKeyInt32 *) hm->table[index]; + + while (entry != NULL) { + if (entry->key == key) { + DEBUG_MEMORY_READ((uint64) entry, sizeof(HashEntryKeyInt32)); + return entry; + } + + entry = (HashEntryKeyInt32 *) entry->next; + } + + return NULL; +} + +// This function only saves one step (omission of the hash function) +// The reason for this is in some cases we can use compile time hashing +HashEntryKeyInt32* hashmap_get_entry(const HashMap* hm, int32 key, uint64 hash) { + hash %= hm->buf.count; + HashEntryKeyInt32* entry = (HashEntryKeyInt32 *) hm->table[hash]; + + while (entry != NULL) { + if (entry->key == key) { + DEBUG_MEMORY_READ((uint64) entry, sizeof(HashEntryKeyInt32)); + return entry; + } + + entry = (HashEntryKeyInt32 *) entry->next; + } + + return NULL; +} + +// @performance If we had a doubly linked list we could delete keys much easier +// However that would make insertion slower +// Maybe we create a nother hashmap that is doubly linked +void hashmap_remove(HashMap* hm, int32 key) { + uint64 index = key % hm->buf.count; + HashEntryKeyInt32* entry = (HashEntryKeyInt32 *) hm->table[index]; + HashEntryKeyInt32* prev = NULL; + + while (entry != NULL) { + if (entry->key == key) { + if (prev == NULL) { + hm->table[index] = entry->next; + } else { + prev->next = entry->next; + } + + chunk_free_elements(&hm->buf, entry->element_id); return; } @@ -357,10 +718,22 @@ void hashmap_delete_entry(HashMap* hm, const char* key) { } } +inline +int32 hashmap_value_size(const HashMap* hm) +{ + return (uint32) ( + hm->buf.chunk_size + - sizeof(uint32) // element id + - sizeof(char) * HASH_MAP_MAX_KEY_LENGTH // key + - sizeof(uintptr_t) // next pointer + ); +} + +// @question Shouldn't we also store the hashmap count, chunk size etc? Currently not done and expected to be correctly initialized. inline int64 hashmap_dump(const HashMap* hm, byte* data) { - *((uint64 *) data) = SWAP_ENDIAN_LITTLE(hm->buf.count); + *((uint32 *) data) = SWAP_ENDIAN_LITTLE(hm->buf.count); data += sizeof(hm->buf.count); // Dump the table content where the elements are relative indices/pointers @@ -371,17 +744,19 @@ int64 hashmap_dump(const HashMap* hm, byte* data) } data += sizeof(uint64) * hm->buf.count; - int64 value_size = hm->buf.chunk_size - sizeof(uint64) - sizeof(char) * HASH_MAP_MAX_KEY_LENGTH - sizeof(uint64); + // @bug what if Int32 key? + int32 value_size = hashmap_value_size(hm); // Dumb hash map content = buffer memory + // Since we are using ChunkMemory we can be smart about it and iterate the chunk memory instead of performing pointer chasing int32 free_index = 0; int32 bit_index = 0; for (uint32 i = 0; i < hm->buf.count; ++i) { - if ((hm->buf.free[free_index] & (1ULL << bit_index)) > 0) { + if (hm->buf.free[free_index] & (1ULL << bit_index)) { HashEntry* entry = (HashEntry *) chunk_get_element((ChunkMemory *) &hm->buf, i); // element_id - *((uint64 *) data) = SWAP_ENDIAN_LITTLE(entry->element_id); + *((uint32 *) data) = SWAP_ENDIAN_LITTLE(entry->element_id); data += sizeof(entry->element_id); // key @@ -430,8 +805,8 @@ int64 hashmap_dump(const HashMap* hm, byte* data) inline int64 hashmap_load(HashMap* hm, const byte* data) { - uint64 count = SWAP_ENDIAN_LITTLE(*((uint64 *) data)); - data += sizeof(uint64); + uint64 count = SWAP_ENDIAN_LITTLE(*((uint32 *) data)); + data += sizeof(uint32); // Load the table content for (uint32 i = 0; i < count; ++i) { @@ -450,33 +825,31 @@ int64 hashmap_load(HashMap* hm, const byte* data) // @question don't we have to possibly endian swap check the free array as well? memcpy(hm->buf.free, data, sizeof(uint64) * CEIL_DIV(hm->buf.count, 64)); - int64 value_size = hm->buf.chunk_size - sizeof(uint64) - sizeof(char) * HASH_MAP_MAX_KEY_LENGTH - sizeof(uint64); + // @bug what if Int32 key? + int32 value_size = hashmap_value_size(hm); // Switch endian AND turn offsets to pointers - int32 free_index = 0; - int32 bit_index = 0; - for (uint32 i = 0; i < hm->buf.count; ++i) { - if ((hm->buf.free[free_index] & (1ULL << bit_index)) > 0) { - HashEntry* entry = (HashEntry *) chunk_get_element((ChunkMemory *) &hm->buf, i); + int32 chunk_id = 0; + chunk_iterate_start(&hm->buf, chunk_id) + HashEntry* entry = (HashEntry *) chunk_get_element((ChunkMemory *) &hm->buf, chunk_id); - // element id - entry->element_id = SWAP_ENDIAN_LITTLE(entry->element_id); + // element id + entry->element_id = SWAP_ENDIAN_LITTLE(entry->element_id); - // key is already loaded with the memcpy - // @question Do we even want to use memcpy? We are re-checking all the values here anyways + // key is already loaded with the memcpy + // @question Do we even want to use memcpy? We are re-checking all the values here anyways - // next pointer - if (entry->next) { - entry->next = (HashEntry *) (hm->buf.memory + SWAP_ENDIAN_LITTLE((uint64) entry->next)); - } - - if (value_size == 4) { - ((HashEntryInt32 *) entry)->value = SWAP_ENDIAN_LITTLE(((HashEntryInt32 *) entry)->value); - } else if (value_size == 8) { - ((HashEntryInt64 *) entry)->value = SWAP_ENDIAN_LITTLE(((HashEntryInt64 *) entry)->value); - } + // next pointer + if (entry->next) { + entry->next = (HashEntry *) (hm->buf.memory + SWAP_ENDIAN_LITTLE((uint64) entry->next)); } - } + + if (value_size == 4) { + ((HashEntryInt32 *) entry)->value = SWAP_ENDIAN_LITTLE(((HashEntryInt32 *) entry)->value); + } else if (value_size == 8) { + ((HashEntryInt64 *) entry)->value = SWAP_ENDIAN_LITTLE(((HashEntryInt64 *) entry)->value); + } + chunk_iterate_end; // How many bytes was read from data return sizeof(hm->buf.count) // hash map count = buffer count diff --git a/stdlib/PerfectHashMap.h b/stdlib/PerfectHashMap.h index 1d82664..af3840c 100644 --- a/stdlib/PerfectHashMap.h +++ b/stdlib/PerfectHashMap.h @@ -198,7 +198,7 @@ void perfect_hashmap_insert(PerfectHashMap* hm, const char* key, int32 value) { int32 index = hm->hash_function(key, hm->hash_seed) % hm->map_size; PerfectHashEntryInt32* entry = (PerfectHashEntryInt32 *) (hm->hash_entries + hm->entry_size * index); entry->element_id = index; - strcpy(entry->key, key); + str_copy_short(entry->key, key); entry->value = value; } @@ -207,7 +207,7 @@ void perfect_hashmap_insert(PerfectHashMap* hm, const char* key, int64 value) { int32 index = hm->hash_function(key, hm->hash_seed) % hm->map_size; PerfectHashEntryInt64* entry = (PerfectHashEntryInt64 *) (hm->hash_entries + hm->entry_size * index); entry->element_id = index; - strcpy(entry->key, key); + str_copy_short(entry->key, key); entry->value = value; } @@ -216,7 +216,7 @@ void perfect_hashmap_insert(PerfectHashMap* hm, const char* key, uintptr_t value int32 index = hm->hash_function(key, hm->hash_seed) % hm->map_size; PerfectHashEntryUIntPtr* entry = (PerfectHashEntryUIntPtr *) (hm->hash_entries + hm->entry_size * index); entry->element_id = index; - strcpy(entry->key, key); + str_copy_short(entry->key, key); entry->value = value; } @@ -225,7 +225,7 @@ void perfect_hashmap_insert(PerfectHashMap* hm, const char* key, void* value) { int32 index = hm->hash_function(key, hm->hash_seed) % hm->map_size; PerfectHashEntryVoidP* entry = (PerfectHashEntryVoidP *) (hm->hash_entries + hm->entry_size * index); entry->element_id = index; - strcpy(entry->key, key); + str_copy_short(entry->key, key); entry->value = value; } @@ -234,7 +234,7 @@ void perfect_hashmap_insert(PerfectHashMap* hm, const char* key, f32 value) { int32 index = hm->hash_function(key, hm->hash_seed) % hm->map_size; PerfectHashEntryFloat* entry = (PerfectHashEntryFloat *) (hm->hash_entries + hm->entry_size * index); entry->element_id = index; - strcpy(entry->key, key); + str_copy_short(entry->key, key); entry->value = value; } @@ -243,7 +243,7 @@ void perfect_hashmap_insert(PerfectHashMap* hm, const char* key, const char* val int32 index = hm->hash_function(key, hm->hash_seed) % hm->map_size; PerfectHashEntryStr* entry = (PerfectHashEntryStr *) (hm->hash_entries + hm->entry_size * index); entry->element_id = index; - strcpy(entry->key, key); + str_copy_short(entry->key, key); memcpy(entry->value, value, PERFECT_HASH_MAP_MAX_KEY_LENGTH); } @@ -252,7 +252,7 @@ void perfect_hashmap_insert(PerfectHashMap* hm, const char* key, byte* value) { int32 index = hm->hash_function(key, hm->hash_seed) % hm->map_size; PerfectHashEntryStr* entry = (PerfectHashEntryStr *) (hm->hash_entries + hm->entry_size * index); entry->element_id = index; - strcpy(entry->key, key); + str_copy_short(entry->key, key); memcpy(entry->value, value, hm->entry_size - sizeof(PerfectHashEntry)); } diff --git a/stdlib/simd/SIMD_Helper.h b/stdlib/SIMD_Helper.h similarity index 99% rename from stdlib/simd/SIMD_Helper.h rename to stdlib/SIMD_Helper.h index 4ca8d93..cc070bd 100644 --- a/stdlib/simd/SIMD_Helper.h +++ b/stdlib/SIMD_Helper.h @@ -12,14 +12,12 @@ #include #include #include -#include "../Types.h" +#include "Types.h" // @todo split into platform code for windows and linux #if _WIN32 #include - #include - #ifdef _MSC_VER #include #endif diff --git a/stdlib/Simd.h b/stdlib/Simd.h new file mode 100644 index 0000000..7335348 --- /dev/null +++ b/stdlib/Simd.h @@ -0,0 +1,24 @@ +/** + * Jingga + * + * @copyright Jingga + * @license OMS License 2.0 + * @version 1.0.0 + * @link https://jingga.app + */ +#ifndef TOS_STDLIB_SIMD_H +#define TOS_STDLIB_SIMD_H + +#if __aarch64__ + +#else + #include "simd/SIMD_F32.h" + #include "simd/SIMD_F64.h" + #include "simd/SIMD_I8.h" + #include "simd/SIMD_I16.h" + #include "simd/SIMD_I32.h" + #include "simd/SIMD_I64.h" + #include "simd/SIMD_SVML.h" +#endif + +#endif \ No newline at end of file diff --git a/stdlib/ThreadedHashMap.h b/stdlib/ThreadedHashMap.h index 60208c3..4b4de93 100644 --- a/stdlib/ThreadedHashMap.h +++ b/stdlib/ThreadedHashMap.h @@ -12,15 +12,9 @@ #include "../stdlib/Types.h" #include "HashMap.h" -#if _WIN32 - #include "../platform/win32/threading/Thread.h" - #include "../platform/win32/threading/Semaphore.h" - #include "../platform/win32/threading/Atomic.h" -#elif __linux__ - #include "../platform/linux/threading/Thread.h" - #include "../platform/linux/threading/Semaphore.h" - #include "../platform/linux/threading/Atomic.h" -#endif +#include "../thread/Atomic.h" +#include "../thread/Semaphore.h" +#include "../thread/Thread.h" struct ThreadedHashMap { void** table; @@ -125,9 +119,9 @@ void thrd_hashmap_get_entry(ThreadedHashMap* hm, HashEntry* entry, const char* k } inline -void thrd_hashmap_delete_entry(ThreadedHashMap* hm, const char* key) { +void thrd_hashmap_remove(ThreadedHashMap* hm, const char* key) { pthread_mutex_lock(&hm->mutex); - hashmap_delete_entry((HashMap *) hm, key); + hashmap_remove((HashMap *) hm, key); pthread_mutex_unlock(&hm->mutex); } diff --git a/stdlib/Types.h b/stdlib/Types.h index 5c0d164..ae27c29 100644 --- a/stdlib/Types.h +++ b/stdlib/Types.h @@ -73,9 +73,11 @@ typedef intptr_t smm; #define MIN_INT32 0x80000000 #define MIN_INT64 0x8000000000000000 +#define MIN_MILLI 60000 #define SEC_MILLI 1000 -#define MILLI_MICRO 1000 +#define MIN_MICRO 60000000 #define SEC_MICRO 1000000 +#define MILLI_MICRO 1000 #define MHZ 1000000 #define GHZ 1000000000 diff --git a/system/Allocator.h b/system/Allocator.h new file mode 100644 index 0000000..833cbc0 --- /dev/null +++ b/system/Allocator.h @@ -0,0 +1,18 @@ +/** + * Jingga + * + * @copyright Jingga + * @license OMS License 2.0 + * @version 1.0.0 + * @link https://jingga.app + */ +#ifndef TOS_SYSTEM_ALLOCATOR_H +#define TOS_SYSTEM_ALLOCATOR_H + +#if _WIN32 + #include "../platform/win32/Allocator.h" +#elif __linux__ + #include "../platform/linux/Allocator.h" +#endif + +#endif \ No newline at end of file diff --git a/system/FileUtils.cpp b/system/FileUtils.cpp new file mode 100644 index 0000000..402281d --- /dev/null +++ b/system/FileUtils.cpp @@ -0,0 +1,18 @@ +/** + * Jingga + * + * @copyright Jingga + * @license OMS License 2.0 + * @version 1.0.0 + * @link https://jingga.app + */ +#ifndef TOS_SYSTEM_FILE_UTILS_C +#define TOS_SYSTEM_FILE_UTILS_C + +#if _WIN32 + #include "../platform/win32/FileUtils.cpp" +#elif __linux__ + #include "../platform/linux/FileUtils.cpp" +#endif + +#endif \ No newline at end of file diff --git a/system/Library.cpp b/system/Library.cpp new file mode 100644 index 0000000..db62a91 --- /dev/null +++ b/system/Library.cpp @@ -0,0 +1,18 @@ +/** + * Jingga + * + * @copyright Jingga + * @license OMS License 2.0 + * @version 1.0.0 + * @link https://jingga.app + */ +#ifndef TOS_SYSTEM_LIBRARY_C +#define TOS_SYSTEM_LIBRARY_C + +#if _WIN32 + #include "../platform/win32/Library.cpp" +#elif __linux__ + #include "../platform/linux/Library.cpp" +#endif + +#endif \ No newline at end of file diff --git a/system/SystemInfo.cpp b/system/SystemInfo.cpp new file mode 100644 index 0000000..720af75 --- /dev/null +++ b/system/SystemInfo.cpp @@ -0,0 +1,18 @@ +/** + * Jingga + * + * @copyright Jingga + * @license OMS License 2.0 + * @version 1.0.0 + * @link https://jingga.app + */ +#ifndef TOS_SYSTEM_INFO_C +#define TOS_SYSTEM_INFO_C + +#if _WIN32 + #include "../platform/win32/SystemInfo.cpp" +#elif __linux__ + #include "../platform/linux/SystemInfo.cpp" +#endif + +#endif \ No newline at end of file diff --git a/thread/Atomic.h b/thread/Atomic.h new file mode 100644 index 0000000..5d3dc12 --- /dev/null +++ b/thread/Atomic.h @@ -0,0 +1,18 @@ +/** + * Jingga + * + * @copyright Jingga + * @license OMS License 2.0 + * @version 1.0.0 + * @link https://jingga.app + */ +#ifndef TOS_THREADS_ATOMIC_H +#define TOS_THREADS_ATOMIC_H + +#if _WIN32 + #include "../platform/win32/threading/Atomic.h" +#elif __linux__ + #include "../platform/linux/threading/Atomic.h" +#endif + +#endif \ No newline at end of file diff --git a/thread/Semaphore.h b/thread/Semaphore.h new file mode 100644 index 0000000..d0233e2 --- /dev/null +++ b/thread/Semaphore.h @@ -0,0 +1,18 @@ +/** + * Jingga + * + * @copyright Jingga + * @license OMS License 2.0 + * @version 1.0.0 + * @link https://jingga.app + */ +#ifndef TOS_THREADS_SEMAPHORE_H +#define TOS_THREADS_SEMAPHORE_H + +#if _WIN32 + #include "../platform/win32/threading/Semaphore.h" +#elif __linux__ + #include "../platform/linux/threading/Semaphore.h" +#endif + +#endif \ No newline at end of file diff --git a/thread/Spinlock.cpp b/thread/Spinlock.cpp new file mode 100644 index 0000000..1740134 --- /dev/null +++ b/thread/Spinlock.cpp @@ -0,0 +1,18 @@ +/** + * Jingga + * + * @copyright Jingga + * @license OMS License 2.0 + * @version 1.0.0 + * @link https://jingga.app + */ +#ifndef TOS_THREADS_SPINLOCK_C +#define TOS_THREADS_SPINLOCK_C + +#if _WIN32 + #include "../platform/win32/threading/Spinlock.cpp" +#elif __linux__ + #include "../platform/linux/threading/Spinlock.cpp" +#endif + +#endif \ No newline at end of file diff --git a/thread/Spinlock.h b/thread/Spinlock.h new file mode 100644 index 0000000..6d6444b --- /dev/null +++ b/thread/Spinlock.h @@ -0,0 +1,18 @@ +/** + * Jingga + * + * @copyright Jingga + * @license OMS License 2.0 + * @version 1.0.0 + * @link https://jingga.app + */ +#ifndef TOS_THREADS_SPINLOCK_H +#define TOS_THREADS_SPINLOCK_H + +#if _WIN32 + #include "../platform/win32/threading/Spinlock.h" +#elif __linux__ + #include "../platform/linux/threading/Spinlock.h" +#endif + +#endif \ No newline at end of file diff --git a/thread/Thread.h b/thread/Thread.h index fdd5227..672ba9a 100644 --- a/thread/Thread.h +++ b/thread/Thread.h @@ -13,13 +13,12 @@ #include #include "../stdlib/Types.h" +#include "Atomic.h" #if _WIN32 #include "../platform/win32/threading/Thread.h" - #include "../platform/win32/threading/Atomic.h" #elif __linux__ #include "../platform/linux/threading/Thread.h" - #include "../platform/linux/threading/Atomic.h" #endif #include "ThreadJob.h" diff --git a/thread/ThreadDefines.h b/thread/ThreadDefines.h new file mode 100644 index 0000000..e57594a --- /dev/null +++ b/thread/ThreadDefines.h @@ -0,0 +1,18 @@ +/** + * Jingga + * + * @copyright Jingga + * @license OMS License 2.0 + * @version 1.0.0 + * @link https://jingga.app + */ +#ifndef TOS_THREADS_THREAD_DEFINES_H +#define TOS_THREADS_THREAD_DEFINES_H + +#if _WIN32 + #include "../platform/win32/threading/ThreadDefines.h" +#elif __linux__ + #include "../platform/linux/threading/ThreadDefines.h" +#endif + +#endif \ No newline at end of file diff --git a/thread/ThreadJob.h b/thread/ThreadJob.h index 1513e9a..8b24773 100644 --- a/thread/ThreadJob.h +++ b/thread/ThreadJob.h @@ -14,12 +14,7 @@ #include "../stdlib/Types.h" #include "../memory/ThreadedRingMemory.h" - -#if _WIN32 - #include "../platform/win32/threading/ThreadDefines.h" -#elif __linux__ - #include "../platform/linux/threading/ThreadDefines.h" -#endif +#include "../thread/ThreadDefines.h" typedef void (*ThreadPoolJobFunc)(void*); diff --git a/thread/ThreadPool.h b/thread/ThreadPool.h index a38b6c5..5933644 100644 --- a/thread/ThreadPool.h +++ b/thread/ThreadPool.h @@ -15,15 +15,9 @@ #include "../stdlib/Types.h" #include "../memory/Queue.h" #include "../memory/BufferMemory.h" - -#ifdef _WIN32 - #include "../platform/win32/threading/Thread.h" - #include "../platform/win32/threading/Atomic.h" -#elif __linux__ - #include "../platform/linux/threading/Thread.h" - #include "../platform/linux/threading/Atomic.h" -#endif - +#include "../log/DebugMemory.h" +#include "Thread.h" +#include "Atomic.h" #include "ThreadJob.h" struct ThreadPool { @@ -70,6 +64,8 @@ static THREAD_RETURN thread_pool_worker(void* arg) atomic_increment_relaxed(&pool->working_cnt); atomic_set_release(&work->state, 2); work->func(work); + // At the end of a thread the ring memory automatically is considered freed + DEBUG_MEMORY_FREE((uint64) work->ring.memory, work->ring.size); atomic_set_release(&work->state, 1); // Job gets marked after completion -> can be overwritten now diff --git a/ui/UIAttribute.h b/ui/UIAttribute.h index e241e24..7fcf274 100644 --- a/ui/UIAttribute.h +++ b/ui/UIAttribute.h @@ -119,7 +119,7 @@ UIAttribute* ui_attribute_from_group(UIAttributeGroup* group, UIAttributeType ty return NULL; } -constexpr const char* ui_attribute_type_to_string_const(UIAttributeType e) +constexpr const char* ui_attribute_type_to_string(UIAttributeType e) { switch (e) { case UI_ATTRIBUTE_TYPE_TYPE: diff --git a/ui/UIElementType.h b/ui/UIElementType.h index df81443..7ec82be 100644 --- a/ui/UIElementType.h +++ b/ui/UIElementType.h @@ -21,7 +21,7 @@ enum UIElementType { UI_ELEMENT_TYPE_SIZE, }; -constexpr const char* ui_element_type_to_string_const(UIElementType e) +constexpr const char* ui_element_type_to_string(UIElementType e) { switch (e) { case UI_ELEMENT_TYPE_BUTTON: diff --git a/ui/UILayout.h b/ui/UILayout.h index b357300..e5c54e6 100644 --- a/ui/UILayout.h +++ b/ui/UILayout.h @@ -28,6 +28,8 @@ struct UILayout { int32 vertex_size; Asset* ui_asset; + // @question Should we maybe also hold the font atlas asset here AND the color palette? + // Defines the length of the static vertex array int32 vertex_size_static; }; diff --git a/ui/UITheme.h b/ui/UITheme.h index 2c67d15..1f8329d 100644 --- a/ui/UITheme.h +++ b/ui/UITheme.h @@ -7,16 +7,11 @@ #include "../utils/StringUtils.h" #include "../stdlib/HashMap.h" #include "../font/Font.h" +#include "../system/FileUtils.cpp" #include "UIAttribute.h" #include "UIElementType.h" -#if _WIN32 - #include "../platform/win32/FileUtils.cpp" -#else - #include "../platform/linux/FileUtils.cpp" -#endif - #define UI_THEME_VERSION 1 // @question Currently there is some data duplication in here and in the UIElement. @@ -27,8 +22,6 @@ struct UIThemeStyle { byte* data; - int32 version; - // A theme may have N named styles // The hashmap contains the offset where the respective style can be found // @performance Switch to perfect hash map @@ -121,7 +114,8 @@ void theme_from_file_txt( // move past the version string pos += 8; - theme->version = strtol(pos, &pos, 10); ++pos; + // Use version for different handling + int32 version = strtol(pos, &pos, 10); ++pos; bool block_open = false; char block_name[32]; @@ -157,7 +151,9 @@ void theme_from_file_txt( UIAttributeGroup* temp_group = NULL; pos = (char *) file.content; - pos += 8; // move past version + + // move past version string + str_move_past(&pos, '\n'); while (*pos != '\0') { str_skip_whitespace(&pos); @@ -213,14 +209,14 @@ void theme_from_file_txt( // Handle different attribute types UIAttribute attribute = {}; - if (strcmp(ui_attribute_type_to_string_const(UI_ATTRIBUTE_TYPE_TYPE), attribute_name) == 0) { + if (strcmp(ui_attribute_type_to_string(UI_ATTRIBUTE_TYPE_TYPE), attribute_name) == 0) { attribute.attribute_id = UI_ATTRIBUTE_TYPE_TYPE; char str[32]; str_copy_move_until(&pos, str, '\n'); for (int32 j = 0; j < UI_ELEMENT_TYPE_SIZE; ++j) { - if (strcmp(str, ui_element_type_to_string_const((UIElementType) j)) == 0) { + if (strcmp(str, ui_element_type_to_string((UIElementType) j)) == 0) { attribute.value_int = j; break; @@ -228,135 +224,135 @@ void theme_from_file_txt( } ++pos; - } else if (strcmp(ui_attribute_type_to_string_const(UI_ATTRIBUTE_TYPE_STYLE), attribute_name) == 0) { + } else if (strcmp(ui_attribute_type_to_string(UI_ATTRIBUTE_TYPE_STYLE), attribute_name) == 0) { attribute.attribute_id = UI_ATTRIBUTE_TYPE_STYLE; str_copy_move_until(&pos, attribute.value_str, '\n'); - } else if (strcmp(ui_attribute_type_to_string_const(UI_ATTRIBUTE_TYPE_FONT_COLOR), attribute_name) == 0) { + } else if (strcmp(ui_attribute_type_to_string(UI_ATTRIBUTE_TYPE_FONT_COLOR), attribute_name) == 0) { ++pos; // Skip '#' attribute.attribute_id = UI_ATTRIBUTE_TYPE_FONT_COLOR; hexstr_to_rgba(&attribute.value_v4_f32, pos); - } else if (strcmp(ui_attribute_type_to_string_const(UI_ATTRIBUTE_TYPE_FONT_SIZE), attribute_name) == 0) { + } else if (strcmp(ui_attribute_type_to_string(UI_ATTRIBUTE_TYPE_FONT_SIZE), attribute_name) == 0) { attribute.attribute_id = UI_ATTRIBUTE_TYPE_FONT_SIZE; attribute.value_float = strtof(pos, &pos); - } else if (strcmp(ui_attribute_type_to_string_const(UI_ATTRIBUTE_TYPE_FONT_WEIGHT), attribute_name) == 0) { + } else if (strcmp(ui_attribute_type_to_string(UI_ATTRIBUTE_TYPE_FONT_WEIGHT), attribute_name) == 0) { attribute.attribute_id = UI_ATTRIBUTE_TYPE_FONT_WEIGHT; attribute.value_int = strtoul(pos, &pos, 10); - } else if (strcmp(ui_attribute_type_to_string_const(UI_ATTRIBUTE_TYPE_FONT_LINE_HEIGHT), attribute_name) == 0) { + } else if (strcmp(ui_attribute_type_to_string(UI_ATTRIBUTE_TYPE_FONT_LINE_HEIGHT), attribute_name) == 0) { attribute.attribute_id = UI_ATTRIBUTE_TYPE_FONT_LINE_HEIGHT; attribute.value_float = strtof(pos, &pos); - } else if (strcmp(ui_attribute_type_to_string_const(UI_ATTRIBUTE_TYPE_ALIGN_H), attribute_name) == 0) { + } else if (strcmp(ui_attribute_type_to_string(UI_ATTRIBUTE_TYPE_ALIGN_H), attribute_name) == 0) { attribute.attribute_id = UI_ATTRIBUTE_TYPE_ALIGN_H; attribute.value_int = strtoul(pos, &pos, 10); - } else if (strcmp(ui_attribute_type_to_string_const(UI_ATTRIBUTE_TYPE_ALIGN_V), attribute_name) == 0) { + } else if (strcmp(ui_attribute_type_to_string(UI_ATTRIBUTE_TYPE_ALIGN_V), attribute_name) == 0) { attribute.attribute_id = UI_ATTRIBUTE_TYPE_ALIGN_V; attribute.value_int = strtoul(pos, &pos, 10); - } else if (strcmp(ui_attribute_type_to_string_const(UI_ATTRIBUTE_TYPE_ZINDEX), attribute_name) == 0) { + } else if (strcmp(ui_attribute_type_to_string(UI_ATTRIBUTE_TYPE_ZINDEX), attribute_name) == 0) { attribute.attribute_id = UI_ATTRIBUTE_TYPE_ZINDEX; attribute.value_float = SWAP_ENDIAN_LITTLE(strtof(pos, &pos)); - } else if (strcmp(ui_attribute_type_to_string_const(UI_ATTRIBUTE_TYPE_BACKGROUND_COLOR), attribute_name) == 0) { + } else if (strcmp(ui_attribute_type_to_string(UI_ATTRIBUTE_TYPE_BACKGROUND_COLOR), attribute_name) == 0) { ++pos; // Skip '#' attribute.attribute_id = UI_ATTRIBUTE_TYPE_BACKGROUND_COLOR; hexstr_to_rgba(&attribute.value_v4_f32, pos); - } else if (strcmp(ui_attribute_type_to_string_const(UI_ATTRIBUTE_TYPE_BACKGROUND_IMG), attribute_name) == 0) { + } else if (strcmp(ui_attribute_type_to_string(UI_ATTRIBUTE_TYPE_BACKGROUND_IMG), attribute_name) == 0) { attribute.attribute_id = UI_ATTRIBUTE_TYPE_BACKGROUND_IMG; str_copy_move_until(&pos, attribute.value_str, '\n'); - } else if (strcmp(ui_attribute_type_to_string_const(UI_ATTRIBUTE_TYPE_BACKGROUND_IMG_OPACITY), attribute_name) == 0) { + } else if (strcmp(ui_attribute_type_to_string(UI_ATTRIBUTE_TYPE_BACKGROUND_IMG_OPACITY), attribute_name) == 0) { attribute.attribute_id = UI_ATTRIBUTE_TYPE_BACKGROUND_IMG_OPACITY; attribute.value_float = SWAP_ENDIAN_LITTLE(strtof(pos, &pos)); - } else if (strcmp(ui_attribute_type_to_string_const(UI_ATTRIBUTE_TYPE_BACKGROUND_IMG_POSITION_V), attribute_name) == 0) { + } else if (strcmp(ui_attribute_type_to_string(UI_ATTRIBUTE_TYPE_BACKGROUND_IMG_POSITION_V), attribute_name) == 0) { attribute.attribute_id = UI_ATTRIBUTE_TYPE_BACKGROUND_IMG_POSITION_V; attribute.value_int = strtoul(pos, &pos, 10); - } else if (strcmp(ui_attribute_type_to_string_const(UI_ATTRIBUTE_TYPE_BACKGROUND_IMG_POSITION_H), attribute_name) == 0) { + } else if (strcmp(ui_attribute_type_to_string(UI_ATTRIBUTE_TYPE_BACKGROUND_IMG_POSITION_H), attribute_name) == 0) { attribute.attribute_id = UI_ATTRIBUTE_TYPE_BACKGROUND_IMG_POSITION_H; attribute.value_int = strtoul(pos, &pos, 10); - } else if (strcmp(ui_attribute_type_to_string_const(UI_ATTRIBUTE_TYPE_BACKGROUND_IMG_STYLE), attribute_name) == 0) { + } else if (strcmp(ui_attribute_type_to_string(UI_ATTRIBUTE_TYPE_BACKGROUND_IMG_STYLE), attribute_name) == 0) { attribute.attribute_id = UI_ATTRIBUTE_TYPE_BACKGROUND_IMG_STYLE; attribute.value_int = strtoul(pos, &pos, 10); - } else if (strcmp(ui_attribute_type_to_string_const(UI_ATTRIBUTE_TYPE_BORDER_COLOR), attribute_name) == 0) { + } else if (strcmp(ui_attribute_type_to_string(UI_ATTRIBUTE_TYPE_BORDER_COLOR), attribute_name) == 0) { ++pos; // Skip '#' attribute.attribute_id = UI_ATTRIBUTE_TYPE_BORDER_COLOR; hexstr_to_rgba(&attribute.value_v4_f32, pos); - } else if (strcmp(ui_attribute_type_to_string_const(UI_ATTRIBUTE_TYPE_BORDER_WIDTH), attribute_name) == 0) { + } else if (strcmp(ui_attribute_type_to_string(UI_ATTRIBUTE_TYPE_BORDER_WIDTH), attribute_name) == 0) { attribute.attribute_id = UI_ATTRIBUTE_TYPE_BORDER_WIDTH; attribute.value_int = strtoul(pos, &pos, 10); - } else if (strcmp(ui_attribute_type_to_string_const(UI_ATTRIBUTE_TYPE_BORDER_TOP_COLOR), attribute_name) == 0) { + } else if (strcmp(ui_attribute_type_to_string(UI_ATTRIBUTE_TYPE_BORDER_TOP_COLOR), attribute_name) == 0) { ++pos; // Skip '#' attribute.attribute_id = UI_ATTRIBUTE_TYPE_BORDER_TOP_COLOR; hexstr_to_rgba(&attribute.value_v4_f32, pos); - } else if (strcmp(ui_attribute_type_to_string_const(UI_ATTRIBUTE_TYPE_BORDER_TOP_WIDTH), attribute_name) == 0) { + } else if (strcmp(ui_attribute_type_to_string(UI_ATTRIBUTE_TYPE_BORDER_TOP_WIDTH), attribute_name) == 0) { attribute.attribute_id = UI_ATTRIBUTE_TYPE_BORDER_TOP_WIDTH; attribute.value_int = strtoul(pos, &pos, 10); - } else if (strcmp(ui_attribute_type_to_string_const(UI_ATTRIBUTE_TYPE_BORDER_RIGHT_COLOR), attribute_name) == 0) { + } else if (strcmp(ui_attribute_type_to_string(UI_ATTRIBUTE_TYPE_BORDER_RIGHT_COLOR), attribute_name) == 0) { ++pos; // Skip '#' attribute.attribute_id = UI_ATTRIBUTE_TYPE_BORDER_RIGHT_COLOR; hexstr_to_rgba(&attribute.value_v4_f32, pos); - } else if (strcmp(ui_attribute_type_to_string_const(UI_ATTRIBUTE_TYPE_BORDER_RIGHT_WIDTH), attribute_name) == 0) { + } else if (strcmp(ui_attribute_type_to_string(UI_ATTRIBUTE_TYPE_BORDER_RIGHT_WIDTH), attribute_name) == 0) { attribute.attribute_id = UI_ATTRIBUTE_TYPE_BORDER_RIGHT_WIDTH; attribute.value_int = strtoul(pos, &pos, 10); - } else if (strcmp(ui_attribute_type_to_string_const(UI_ATTRIBUTE_TYPE_BORDER_BOTTOM_COLOR), attribute_name) == 0) { + } else if (strcmp(ui_attribute_type_to_string(UI_ATTRIBUTE_TYPE_BORDER_BOTTOM_COLOR), attribute_name) == 0) { ++pos; // Skip '#' attribute.attribute_id = UI_ATTRIBUTE_TYPE_BORDER_BOTTOM_COLOR; hexstr_to_rgba(&attribute.value_v4_f32, pos); - } else if (strcmp(ui_attribute_type_to_string_const(UI_ATTRIBUTE_TYPE_BORDER_BOTTOM_WIDTH), attribute_name) == 0) { + } else if (strcmp(ui_attribute_type_to_string(UI_ATTRIBUTE_TYPE_BORDER_BOTTOM_WIDTH), attribute_name) == 0) { attribute.attribute_id = UI_ATTRIBUTE_TYPE_BORDER_BOTTOM_WIDTH; attribute.value_int = strtoul(pos, &pos, 10); - } else if (strcmp(ui_attribute_type_to_string_const(UI_ATTRIBUTE_TYPE_BORDER_LEFT_COLOR), attribute_name) == 0) { + } else if (strcmp(ui_attribute_type_to_string(UI_ATTRIBUTE_TYPE_BORDER_LEFT_COLOR), attribute_name) == 0) { ++pos; // Skip '#' attribute.attribute_id = UI_ATTRIBUTE_TYPE_BORDER_LEFT_COLOR; hexstr_to_rgba(&attribute.value_v4_f32, pos); - } else if (strcmp(ui_attribute_type_to_string_const(UI_ATTRIBUTE_TYPE_BORDER_LEFT_WIDTH), attribute_name) == 0) { + } else if (strcmp(ui_attribute_type_to_string(UI_ATTRIBUTE_TYPE_BORDER_LEFT_WIDTH), attribute_name) == 0) { attribute.attribute_id = UI_ATTRIBUTE_TYPE_BORDER_LEFT_WIDTH; attribute.value_int = strtoul(pos, &pos, 10); - } else if (strcmp(ui_attribute_type_to_string_const(UI_ATTRIBUTE_TYPE_PADDING), attribute_name) == 0) { + } else if (strcmp(ui_attribute_type_to_string(UI_ATTRIBUTE_TYPE_PADDING), attribute_name) == 0) { attribute.attribute_id = UI_ATTRIBUTE_TYPE_PADDING; attribute.value_int = strtoul(pos, &pos, 10); - } else if (strcmp(ui_attribute_type_to_string_const(UI_ATTRIBUTE_TYPE_PADDING_TOP), attribute_name) == 0) { + } else if (strcmp(ui_attribute_type_to_string(UI_ATTRIBUTE_TYPE_PADDING_TOP), attribute_name) == 0) { attribute.attribute_id = UI_ATTRIBUTE_TYPE_PADDING_TOP; attribute.value_int = strtoul(pos, &pos, 10); - } else if (strcmp(ui_attribute_type_to_string_const(UI_ATTRIBUTE_TYPE_PADDING_RIGHT), attribute_name) == 0) { + } else if (strcmp(ui_attribute_type_to_string(UI_ATTRIBUTE_TYPE_PADDING_RIGHT), attribute_name) == 0) { attribute.attribute_id = UI_ATTRIBUTE_TYPE_PADDING_RIGHT; attribute.value_int = strtoul(pos, &pos, 10); - } else if (strcmp(ui_attribute_type_to_string_const(UI_ATTRIBUTE_TYPE_PADDING_BOTTOM), attribute_name) == 0) { + } else if (strcmp(ui_attribute_type_to_string(UI_ATTRIBUTE_TYPE_PADDING_BOTTOM), attribute_name) == 0) { attribute.attribute_id = UI_ATTRIBUTE_TYPE_PADDING_BOTTOM; attribute.value_int = strtoul(pos, &pos, 10); - } else if (strcmp(ui_attribute_type_to_string_const(UI_ATTRIBUTE_TYPE_PADDING_LEFT), attribute_name) == 0) { + } else if (strcmp(ui_attribute_type_to_string(UI_ATTRIBUTE_TYPE_PADDING_LEFT), attribute_name) == 0) { attribute.attribute_id = UI_ATTRIBUTE_TYPE_PADDING_LEFT; attribute.value_int = strtoul(pos, &pos, 10); - } else if (strcmp(ui_attribute_type_to_string_const(UI_ATTRIBUTE_TYPE_SHADOW_INNER_COLOR), attribute_name) == 0) { + } else if (strcmp(ui_attribute_type_to_string(UI_ATTRIBUTE_TYPE_SHADOW_INNER_COLOR), attribute_name) == 0) { ++pos; // Skip '#' attribute.attribute_id = UI_ATTRIBUTE_TYPE_SHADOW_INNER_COLOR; hexstr_to_rgba(&attribute.value_v4_f32, pos); - } else if (strcmp(ui_attribute_type_to_string_const(UI_ATTRIBUTE_TYPE_SHADOW_INNER_ANGLE), attribute_name) == 0) { + } else if (strcmp(ui_attribute_type_to_string(UI_ATTRIBUTE_TYPE_SHADOW_INNER_ANGLE), attribute_name) == 0) { attribute.attribute_id = UI_ATTRIBUTE_TYPE_SHADOW_INNER_ANGLE; attribute.value_float = strtof(pos, &pos); - } else if (strcmp(ui_attribute_type_to_string_const(UI_ATTRIBUTE_TYPE_SHADOW_INNER_DISTANCE), attribute_name) == 0) { + } else if (strcmp(ui_attribute_type_to_string(UI_ATTRIBUTE_TYPE_SHADOW_INNER_DISTANCE), attribute_name) == 0) { attribute.attribute_id = UI_ATTRIBUTE_TYPE_SHADOW_INNER_DISTANCE; attribute.value_int = strtoul(pos, &pos, 10); - } else if (strcmp(ui_attribute_type_to_string_const(UI_ATTRIBUTE_TYPE_SHADOW_OUTER_COLOR), attribute_name) == 0) { + } else if (strcmp(ui_attribute_type_to_string(UI_ATTRIBUTE_TYPE_SHADOW_OUTER_COLOR), attribute_name) == 0) { ++pos; // Skip '#' attribute.attribute_id = UI_ATTRIBUTE_TYPE_SHADOW_OUTER_COLOR; hexstr_to_rgba(&attribute.value_v4_f32, pos); - } else if (strcmp(ui_attribute_type_to_string_const(UI_ATTRIBUTE_TYPE_SHADOW_OUTER_ANGLE), attribute_name) == 0) { + } else if (strcmp(ui_attribute_type_to_string(UI_ATTRIBUTE_TYPE_SHADOW_OUTER_ANGLE), attribute_name) == 0) { attribute.attribute_id = UI_ATTRIBUTE_TYPE_SHADOW_OUTER_ANGLE; attribute.value_float = strtof(pos, &pos); - } else if (strcmp(ui_attribute_type_to_string_const(UI_ATTRIBUTE_TYPE_SHADOW_OUTER_DISTANCE), attribute_name) == 0) { + } else if (strcmp(ui_attribute_type_to_string(UI_ATTRIBUTE_TYPE_SHADOW_OUTER_DISTANCE), attribute_name) == 0) { attribute.attribute_id = UI_ATTRIBUTE_TYPE_SHADOW_OUTER_DISTANCE; attribute.value_int = strtoul(pos, &pos, 10); - } else if (strcmp(ui_attribute_type_to_string_const(UI_ATTRIBUTE_TYPE_TRANSITION_ANIMATION), attribute_name) == 0) { + } else if (strcmp(ui_attribute_type_to_string(UI_ATTRIBUTE_TYPE_TRANSITION_ANIMATION), attribute_name) == 0) { attribute.attribute_id = UI_ATTRIBUTE_TYPE_TRANSITION_ANIMATION; attribute.value_int = strtoul(pos, &pos, 10); - } else if (strcmp(ui_attribute_type_to_string_const(UI_ATTRIBUTE_TYPE_TRANSITION_DURATION), attribute_name) == 0) { + } else if (strcmp(ui_attribute_type_to_string(UI_ATTRIBUTE_TYPE_TRANSITION_DURATION), attribute_name) == 0) { attribute.attribute_id = UI_ATTRIBUTE_TYPE_TRANSITION_DURATION; attribute.value_float = strtof(pos, &pos); } else { @@ -406,8 +402,8 @@ int32 theme_from_data( ) { const byte* pos = data; - theme->version = *((int32 *) pos); - pos += sizeof(theme->version); + int32 version = *((int32 *) pos); + pos += sizeof(version); // Prepare hashmap (incl. reserve memory) by initializing it the same way we originally did // Of course we still need to populate the data using hashmap_load() @@ -487,8 +483,8 @@ int32 theme_to_data( byte* pos = data; // version - *((int32 *) pos) = SWAP_ENDIAN_LITTLE(theme->version); - pos += sizeof(theme->version); + *((int32 *) pos) = SWAP_ENDIAN_LITTLE(UI_THEME_VERSION); + pos += sizeof(int32); // hashmap byte* start = pos; diff --git a/utils/BitUtils.h b/utils/BitUtils.h index ac0c50d..b8cd28d 100644 --- a/utils/BitUtils.h +++ b/utils/BitUtils.h @@ -35,6 +35,7 @@ // Right to left (little endian) #define IS_BIT_SET_R2L(num, pos) ((bool) ((num) & (1 << (pos)))) +#define IS_BIT_SET_64_R2L(num, pos) ((bool) ((num) & (1LL << (pos)))) #define BIT_SET_R2L(num, pos) ((num) | ((uint32) 1 << (pos))) #define BIT_UNSET_R2L(num, pos) ((num) & ~((uint32) 1 << (pos))) #define BIT_FLIP_R2L(num, pos) ((num) ^ ((uint32) 1 << (pos))) diff --git a/utils/MathUtils.h b/utils/MathUtils.h index 41e103f..c4838a5 100644 --- a/utils/MathUtils.h +++ b/utils/MathUtils.h @@ -28,6 +28,7 @@ #define OMS_CEIL(x) ((x) == (int)(x) ? (int)(x) : ((x) > 0 ? (int)(x) + 1 : (int)(x))) #define OMS_ROUND(x) (((x) >= 0) ? ((int)((x) + 0.5f)) : ((int)((x) - 0.5f))) #define OMS_ROUND_POSITIVE(x) ((int)((x) + 0.5f)) +#define FLOAT_CAST_EPS 0.001953125 // Modulo function when b is a power of 2 #define MODULO_2(a, b) ((a) & (b - 1)) diff --git a/utils/StringUtils.h b/utils/StringUtils.h index 88eb7b5..06ff849 100644 --- a/utils/StringUtils.h +++ b/utils/StringUtils.h @@ -167,13 +167,14 @@ void wchar_to_char(const char* __restrict str, char* __restrict dest) } inline constexpr -int32 str_to_int(const char* str) +int64 str_to_int(const char* str) { - int32 result = 0; + int64 result = 0; - int32 sign = 1; - if (*str++ == '-') { + int64 sign = 1; + if (*str == '-') { sign = -1; + ++str; } while (*str >= '0' && *str <= '9') { @@ -186,15 +187,21 @@ int32 str_to_int(const char* str) return result * sign; } -inline constexpr -int32 int_to_str(int64 number, char *str, const char thousands = ',') { +inline +int32 int_to_str(int64 number, char str[15], const char thousands) +{ + if (number == 0) { + *str++ = '0'; + *str = '\0'; + + return 1; + } + int32 i = 0; int32 digit_count = 0; int64 sign = number; - if (number == 0) { - str[i++] = '0'; - } else if (number < 0) { + if (number < 0) { number = -number; } @@ -212,8 +219,84 @@ int32 int_to_str(int64 number, char *str, const char thousands = ',') { str[i++] = '-'; } + for (int32 j = 0, k = i - 1; j < k; ++j, --k) { + char temp = str[j]; + str[j] = str[k]; + str[k] = temp; + } + str[i] = '\0'; + return i; +} + +inline constexpr +int32 int_to_str(int64 number, char str[12]) { + int32 i = -1; + int64 sign = number; + + if (number < 0) { + number = -number; + } + + do { + str[++i] = number % 10 + '0'; + number /= 10; + } while (number > 0); + + if (sign < 0) { + str[++i] = '-'; + } + + for (int32 j = 0, k = i; j < k; ++j, --k) { + char temp = str[j]; + str[j] = str[k]; + str[k] = temp; + } + + str[++i] = '\0'; + + return i; +} + +inline constexpr +int32 uint_to_str(uint64 number, char str[12]) { + int32 i = -1; + + do { + str[++i] = number % 10 + '0'; + number /= 10; + } while (number > 0); + + for (int32 j = 0, k = i; j < k; ++j, --k) { + char temp = str[j]; + str[j] = str[k]; + str[k] = temp; + } + + str[++i] = '\0'; + + return i; +} + +static const char HEX_TABLE[] = { + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', + 'A', 'B', 'C', 'D', 'E', 'F' +}; + +inline constexpr +int32 int_to_hex(int64 number, char str[9]) { + int32 i = -1; + uint64 n = (uint64) number; + + do { + byte digit = n % 16; + str[++i] = HEX_TABLE[digit]; + n /= 16; + } while (n > 0); + + str[++i] = '\0'; + for (int32 j = 0, k = i - 1; j < k; ++j, --k) { char temp = str[j]; str[j] = str[k]; @@ -223,6 +306,29 @@ int32 int_to_str(int64 number, char *str, const char thousands = ',') { return i; } +inline constexpr +int64 hex_to_int(const char* hex) +{ + int64 result = 0; + while ((*hex >= '0' && *hex <= '9') + || (*hex >= 'A' && *hex <= 'F') + || (*hex >= 'a' && *hex <= 'f') + ) { + byte value = *hex++; + if (value >= '0' && value <= '9') { + value = value - '0'; + } else if (value >= 'A' && value <='F') { + value = value - 'A' + 10; + } else if (value >= 'a' && value <='f') { + value = value - 'a' + 10; + } + + result = (result << 4) | (value & 0xF); + } + + return result; +} + inline size_t str_count(const char* __restrict str, const char* __restrict substr) { @@ -241,6 +347,142 @@ size_t str_count(const char* __restrict str, const char* __restrict substr) return count; } +inline constexpr +int32 is_eol(const char* str) +{ + if (*str == '\n') { + return 1; + } else if (*str == '\r' && str[1] == '\n') { + return 2; + } + + return 0; +} + +inline +void str_copy_until(const char* __restrict src, char* __restrict dest, char delim) +{ + while (*src != delim && *src != '\0') { + *dest++ = *src++; + } + + *dest = '\0'; +} + +inline +void str_copy_until(const char* __restrict src, char* __restrict dest, const char* __restrict delim, int32 len) +{ + while (*src != '\0') { + for (int32 i = 0; i < len; ++i) { + if (*src == delim[i]) { + *dest = '\0'; + return; + } + } + + *dest++ = *src++; + } + + *dest = '\0'; +} + +inline +int32 str_copy_until(char* __restrict dest, const char* __restrict src, char delim) +{ + int32 len = 0; + while (*src != delim && *src != '\0') { + *dest++ = *src++; + ++len; + } + + *dest = '\0'; + + return len; +} + +inline +void str_copy_short(char* __restrict dest, const char* __restrict src, char delim = '\0') +{ + while (*src != delim) { + *dest++ = *src++; + } + + *dest = '\0'; +} + +inline +void str_copy_long(char* __restrict dest, const char* __restrict src, char delim = '\0') +{ + char* d = dest; + const char *s = src; + + // Align destination to its natural alignment + while (((uintptr_t) d & (sizeof(uintptr_t) - 1)) != 0 && *s != '\0') { + *d++ = *s++; + } + + // Copy using larger chunks (size of uintptr_t) + uintptr_t* aligned_dest = (uintptr_t *) d; + const uintptr_t* aligned_src = (const uintptr_t *) s; + + while (*aligned_src != 0) { + *aligned_dest++ = *aligned_src++; + } + + d = (char *) aligned_dest; + s = (const char *) aligned_src; + + // Copy remaining bytes + while (*s != '\0') { + *d++ = *s++; + } + + *d = '\0'; +} + +inline +void str_copy_move_until(char** __restrict src, char* __restrict dest, char delim) +{ + while (**src != delim && **src != '\0') { + *dest++ = **src; + ++(*src); + } + + *dest = '\0'; +} + +inline +void str_copy_move_until(char** __restrict src, char* __restrict dest, const char* __restrict delim, int32 len) +{ + while (**src != '\0') { + for (int32 i = 0; i < len; ++i) { + if (**src == delim[i]) { + *dest = '\0'; + return; + } + } + + *dest++ = **src; + ++(*src); + } + + *dest = '\0'; +} + +inline +int32 strcpy_to_eol(const char* src, char* dst) +{ + int32 offset = 0; + while (!is_eol(src) && *src != '\0') { + *dst++ = *src++; + ++offset; + } + + *dst = '\0'; + + return offset; +} + inline char* strsep(const char** sp, const char* sep) { @@ -262,69 +504,58 @@ char* strsep(const char** sp, const char* sep) return s; } -inline int64 -str_concat( +inline void +str_concat_new( + char* dst, const char* src1, - const char* src2, - char* dst + const char* src2 ) { - int64 len = strlen(src1); - int64 len_total = len; - - memcpy(dst, src1, len); - dst += len; - - len = strlen(src2); - memcpy(dst, src2, len); - dst += len; + while (*src1) { *dst++ = *src1++; } + while (*src2) { *dst++ = *src2++; } *dst = '\0'; - - return len_total + len; -} - -// @question Why is this called str_add instead of str_concat like the other functions? -inline void -str_add(char* base, const char* src) -{ - while (*base) { - ++base; - } - - strcpy(base, src); } inline void -str_add(char* base, const char* src, size_t src_length) +str_concat_append(char* dst, const char* src) { - while (*base) { - ++base; + while (*dst) { + ++dst; } - memcpy(base, src, src_length); - base[src_length] = '\0'; + str_copy_short(dst, src); +} + +inline void +str_concat_new(char* dst, const char* src1, const char* src2, const char* src3) +{ + while (*src1) { *dst++ = *src1++; } + while (*src2) { *dst++ = *src2++; } + while (*src3) { *dst++ = *src3++; } + + *dst = '\0'; } inline int64 -str_add(char* base, size_t base_length, const char* src, size_t src_length) +str_concat_append(char* dst, size_t dst_length, const char* src, size_t src_length) { - memcpy(&base[base_length], src, src_length); - base[base_length + src_length] = '\0'; + memcpy(&dst[dst_length], src, src_length); + dst[dst_length + src_length] = '\0'; - return base_length + src_length; + return dst_length + src_length; } inline void -str_add(char* base, size_t base_length, const char* src) +str_concat_append(char* dst, size_t dst_length, const char* src) { - strcpy(&base[base_length], src); + str_copy_short(&dst[dst_length], src); } inline int64 -str_concat( +str_concat_new( + char* dst, const char* src1, size_t src1_length, - const char* src2, size_t src2_length, - char* dst + const char* src2, size_t src2_length ) { memcpy(dst, src1, src1_length); dst += src1_length; @@ -338,10 +569,10 @@ str_concat( } inline -void str_concat( +void str_concat_new( + char* dst, const char* src, size_t src_length, - int64 data, - char* dst + int64 data ) { memcpy(dst, src, src_length); int32 len = int_to_str(data, dst + src_length); @@ -349,6 +580,32 @@ void str_concat( dst[src_length + len] = '\0'; } +inline +void str_concat_append( + char* dst, + int64 data +) { + size_t dst_len = strlen(dst); + int_to_str(data, dst + dst_len); +} + +inline void +str_concat_new(char* dst, const char* src, int64 data) +{ + size_t src_len = strlen(src); + memcpy(dst, src, src_len); + + int_to_str(data, dst + src_len); +} + +inline +void str_insert(char* __restrict dst, size_t insert_pos, const char* __restrict src) { + size_t src_length = strlen(src); + size_t dst_length = strlen(dst); + memcpy(dst + insert_pos + src_length, dst + insert_pos, dst_length - insert_pos + 1); + memcpy(dst + insert_pos, src, src_length); +} + inline char* strtok(char* str, const char* __restrict delim, char* *key) { char* result; @@ -426,6 +683,77 @@ void create_const_name(unsigned char* name) *name = '\0'; } +int32 str_compare(const char* str1, const char* str2) +{ + byte c1, c2; + + do { + c1 = (byte) *str1++; + c2 = (byte) *str2++; + + if (c1 == '\0') { + return c1 - c2; + } + } while (c1 == c2); + + return c1 - c2; +} + +int32 str_compare(const char* str1, const char* str2, size_t n) +{ + byte c1 = '\0'; + byte c2 = '\0'; + + if (n >= 4) { + size_t n4 = n >> 2; + + do { + c1 = (byte) *str1++; + c2 = (byte) *str2++; + + if (c1 == '\0' || c1 != c2) { + return c1 - c2; + } + + c1 = (byte) *str1++; + c2 = (byte) *str2++; + + if (c1 == '\0' || c1 != c2) { + return c1 - c2; + } + + c1 = (byte) *str1++; + c2 = (byte) *str2++; + + if (c1 == '\0' || c1 != c2) { + return c1 - c2; + } + + c1 = (byte) *str1++; + c2 = (byte) *str2++; + + if (c1 == '\0' || c1 != c2) { + return c1 - c2; + } + } while (--n4 > 0); + + n &= 3; + } + + while (n > 0) { + c1 = (byte) *str1++; + c2 = (byte) *str2++; + + if (c1 == '\0' || c1 != c2) { + return c1 - c2; + } + + --n; + } + + return c1 - c2; +} + inline constexpr bool str_ends_with(const char* str, const char* suffix) { if (!str || !suffix) { @@ -439,7 +767,7 @@ bool str_ends_with(const char* str, const char* suffix) { return false; } - return strncmp(str + str_len - suffix_len, suffix, suffix_len) == 0; + return str_compare(str + str_len - suffix_len, suffix, suffix_len) == 0; } // WARNING: result needs to have the correct length @@ -452,7 +780,7 @@ void str_replace(const char* str, const char* __restrict search, const char* __r size_t replace_len = strlen(replace); if (search_len == 0) { - strcpy(result, str); + str_copy_short(result, str); return; } @@ -471,7 +799,7 @@ void str_replace(const char* str, const char* __restrict search, const char* __r str = current; } - strcpy(result_ptr, str); + str_copy_short(result_ptr, str); } void print_bytes(const void* ptr, size_t size) @@ -493,18 +821,6 @@ void print_bytes(const void* ptr, size_t size) } } -inline constexpr -int32 is_eol(const char* str) -{ - if (*str == '\n') { - return 1; - } else if (*str == '\r' && str[1] == '\n') { - return 2; - } - - return 0; -} - inline constexpr bool is_whitespace(char str) { @@ -639,104 +955,6 @@ void str_skip_until_list(char** __restrict str, const char* __restrict delim) } } -inline -void str_copy_until(const char* __restrict src, char* __restrict dest, char delim) -{ - while (*src != delim && *src != '\0') { - *dest++ = *src++; - } - - *dest = '\0'; -} - -inline -void str_copy_until(const char* __restrict src, char* __restrict dest, const char* __restrict delim, int32 len) -{ - while (*src != '\0') { - for (int32 i = 0; i < len; ++i) { - if (*src == delim[i]) { - *dest = '\0'; - return; - } - } - - *dest++ = *src++; - } - - *dest = '\0'; -} - -inline -int32 str_copy_until(char* __restrict dest, const char* __restrict src, char delim) -{ - int32 len = 0; - while (*src != delim && *src != '\0') { - *dest++ = *src++; - ++len; - } - - *dest = '\0'; - - return len; -} - -inline -int32 str_copy(char* __restrict dest, const char* __restrict src, char delim) -{ - int32 len = 0; - while (*src != delim) { - *dest++ = *src++; - ++len; - } - - *dest = '\0'; - - return len; -} - -inline -void str_copy_move_until(char** __restrict src, char* __restrict dest, char delim) -{ - while (**src != delim && **src != '\0') { - *dest++ = **src; - ++(*src); - } - - *dest = '\0'; -} - -inline -void str_copy_move_until(char** __restrict src, char* __restrict dest, const char* __restrict delim, int32 len) -{ - while (**src != '\0') { - for (int32 i = 0; i < len; ++i) { - if (**src == delim[i]) { - *dest = '\0'; - return; - } - } - - *dest++ = **src; - ++(*src); - } - - *dest = '\0'; -} - -inline -int32 strcpy_to_eol(const char* src, char* dst) -{ - int32 offset = 0; - while (!is_eol(src) && *src != '\0') { - *dst++ = *src++; - ++offset; - } - - *dst = '\0'; - - return offset; -} - inline void hexstr_to_rgba(v4_f32* rgba, const char* hex) {