From dc9f37b7265c4de2518fbc39ec3cb86a3f8e515f Mon Sep 17 00:00:00 2001 From: Dennis Eichhorn Date: Sun, 6 Apr 2025 10:34:47 +0000 Subject: [PATCH] update --- README.md | 2 +- account/Permission.h | 45 +++ account/PermissionType.h | 23 ++ architecture/CpuInfo.h | 2 +- architecture/arm/neon/utils/Utils.h | 8 +- architecture/arm/sve/utils/Utils.h | 10 +- architecture/x86/CpuInfo.cpp | 4 +- architecture/x86/simd/SIMD_F32.h | 31 +- architecture/x86/simd/SIMD_F64.h | 6 +- architecture/x86/simd/SIMD_I16.h | 18 +- architecture/x86/simd/SIMD_I16_AVX2.h | 4 +- architecture/x86/simd/SIMD_I16_AVX512.h | 4 +- architecture/x86/simd/SIMD_I16_SSE.h | 4 +- architecture/x86/simd/SIMD_I32.h | 98 +++--- architecture/x86/simd/SIMD_I32_AVX2.h | 4 +- architecture/x86/simd/SIMD_I32_AVX512.h | 4 +- architecture/x86/simd/SIMD_I32_SSE.h | 4 +- architecture/x86/simd/SIMD_I64.h | 10 +- architecture/x86/simd/SIMD_I64_AVX2.h | 4 +- architecture/x86/simd/SIMD_I64_AVX512.h | 4 +- architecture/x86/simd/SIMD_I64_SSE.h | 4 +- architecture/x86/simd/SIMD_I8.h | 16 +- architecture/x86/simd/SIMD_I8_AVX2.h | 4 +- architecture/x86/simd/SIMD_I8_AVX512.h | 4 +- architecture/x86/simd/SIMD_I8_SSE.h | 4 +- architecture/x86/simd/SIMD_SVML.h | 6 +- architecture/x86/simd/utils/Utils.h | 6 +- asset/AssetManagementSystem.h | 20 +- command/AppCmdBuffer.cpp | 12 +- command/AppCmdBuffer.h | 2 +- compiler/gcc/Atomic.h | 56 ++-- database/Database.h | 35 +- database/DatabaseConnection.h | 7 +- database/DatabaseType.h | 2 +- database/sqlite/SqliteDatabase.h | 34 ++ encoding/Base64.h | 110 ++++++ encoding/Base64Definitions.h | 46 +++ encoding/Base64SimdArm.h | 271 +++++++++++++++ encoding/Base64SimdX86.h | 405 +++++++++++++++++++++++ entity/EntityComponentSystem.h | 8 +- gpuapi/direct3d/DirectXUtils.h | 8 +- gpuapi/vulkan/VulkanUtils.h | 8 +- hash/Sha1.h | 171 ++++++++++ hash/Sha1Definitions.h | 31 ++ hash/Sha1SimdArm.h | 156 +++++++++ hash/Sha1SimdX86.h | 125 +++++++ html/template/HtmlTemplateCache.h | 39 ++- http/HttpDispatcher.h | 97 ++++++ http/HttpHeader.h | 22 ++ http/HttpHeaderKey.h | 153 +++++++++ http/HttpMethod.h | 25 ++ http/HttpProtocol.h | 21 ++ http/HttpRequest.h | 260 +++++++++++++++ http/HttpResponse.h | 27 ++ http/HttpRoute.h | 51 +++ http/HttpRouter.h | 208 ++++++++++++ http/HttpSessionManager.h | 275 +++++++++++++++ http/HttpStatusCode.h | 194 +++++++++++ http/HttpUri.h | 31 ++ log/Log.h | 4 +- log/PerformanceProfiler.h | 5 +- memory/ChunkMemory.h | 76 ++++- memory/DataPool.h | 164 +++++++++ memory/ThreadedChunkMemory.h | 272 ++++++++++++++- memory/ThreadedDataPool.h | 93 ++++++ memory/ThreadedQueue.h | 149 +++++---- memory/ThreadedRingMemory.h | 42 +-- module/Module.h | 2 +- module/WebModule.h | 9 + platform/linux/FileUtils.cpp | 21 +- platform/linux/GuiUtils.h | 61 ++++ platform/linux/Library.cpp | 8 +- platform/linux/SystemInfo.cpp | 120 +++---- platform/linux/TimeUtils.h | 9 + platform/linux/UtilsLinux.h | 61 ---- platform/linux/network/Server.h | 79 ++++- platform/linux/threading/Semaphore.h | 18 + platform/linux/threading/Spinlock.cpp | 2 +- platform/linux/threading/Thread.h | 153 +++++---- platform/linux/threading/ThreadDefines.h | 20 +- platform/win32/GuiUtils.h | 170 ++++++++++ platform/win32/Library.cpp | 2 +- platform/win32/SystemInfo.cpp | 2 +- platform/win32/TimeUtils.h | 9 + platform/win32/UtilsWindows.h | 170 ---------- platform/win32/threading/Atomic.h | 48 +-- platform/win32/threading/Semaphore.h | 27 +- platform/win32/threading/Spinlock.cpp | 7 +- platform/win32/threading/Thread.h | 22 +- platform/win32/threading/ThreadDefines.h | 6 +- sort/BinarySearch.h | 11 + sort/EytzingerSearch.h | 2 + stdlib/HashMap.h | 33 +- stdlib/PerfectHashMap.h | 57 +++- stdlib/Simd.h | 5 - stdlib/ThreadedHashMap.h | 50 +-- stdlib/Types.h | 4 +- system/SystemInfo.cpp | 8 +- tests.bat | 2 +- tests/.vscode/settings.json | 2 +- tests_iter.bat | 2 +- thread/Thread.h | 2 +- thread/ThreadJob.h | 1 + thread/ThreadPool.h | 77 +++-- utils/RandomUtils.h | 62 +++- utils/RegexSimplified.h | 385 +++++++++++++++++++++ utils/StringUtils.h | 219 ++++++++++-- 107 files changed, 5073 insertions(+), 923 deletions(-) create mode 100644 account/Permission.h create mode 100644 account/PermissionType.h create mode 100644 database/sqlite/SqliteDatabase.h create mode 100644 encoding/Base64.h create mode 100644 encoding/Base64Definitions.h create mode 100644 encoding/Base64SimdArm.h create mode 100644 encoding/Base64SimdX86.h create mode 100644 hash/Sha1.h create mode 100644 hash/Sha1Definitions.h create mode 100644 hash/Sha1SimdArm.h create mode 100644 hash/Sha1SimdX86.h create mode 100644 http/HttpDispatcher.h create mode 100644 http/HttpHeader.h create mode 100644 http/HttpHeaderKey.h create mode 100644 http/HttpMethod.h create mode 100644 http/HttpProtocol.h create mode 100644 http/HttpRequest.h create mode 100644 http/HttpResponse.h create mode 100644 http/HttpRoute.h create mode 100644 http/HttpRouter.h create mode 100644 http/HttpSessionManager.h create mode 100644 http/HttpStatusCode.h create mode 100644 http/HttpUri.h create mode 100644 memory/DataPool.h create mode 100644 memory/ThreadedDataPool.h create mode 100644 module/WebModule.h create mode 100644 platform/linux/GuiUtils.h create mode 100644 platform/win32/GuiUtils.h create mode 100644 utils/RegexSimplified.h diff --git a/README.md b/README.md index 6cd6a6f..0ee8c94 100755 --- a/README.md +++ b/README.md @@ -1 +1 @@ -# GameEngine +# cOMS diff --git a/account/Permission.h b/account/Permission.h new file mode 100644 index 0000000..d2ce0b1 --- /dev/null +++ b/account/Permission.h @@ -0,0 +1,45 @@ +/** + * Jingga + * + * @copyright Jingga + * @license OMS License 2.0 + * @version 1.0.0 + * @link https://jingga.app + */ +#ifndef COMS_ACCOUNT_PERMISSION_H +#define COMS_ACCOUNT_PERMISSION_H + +#include "../stdlib/Types.h" + +struct Permission { + uint32 id; + uint32 uint; + uint32 app; + uint16 module; + + // Providing module + uint16 from; + + // Used by the module to internally handle permssions for different areas + // e.g. In the news module one category could be news to specify the permissions for news, or news category, ... + uint16 category; + + // Specific element + uint32 element; + + // Component of an element (e.g. only allowed to change content but not title of news) + uint32 component; + + /** + * 0x00000001 = read permission + * 0x00000010 = update permission + * 0x00000100 = create permission + * 0x00001000 = delete permission + * 0x00010000 = can change permission + */ + byte permission_flag; + + // @question Is this defaultCPermissions used? +}; + +#endif \ No newline at end of file diff --git a/account/PermissionType.h b/account/PermissionType.h new file mode 100644 index 0000000..d5acce6 --- /dev/null +++ b/account/PermissionType.h @@ -0,0 +1,23 @@ +/** + * Jingga + * + * @copyright Jingga + * @license OMS License 2.0 + * @version 1.0.0 + * @link https://jingga.app + */ +#ifndef COMS_ACCOUNT_PERMISSION_TYPE_H +#define COMS_ACCOUNT_PERMISSION_TYPE_H + +#include "../stdlib/Types.h" + +enum PermissionType { + PERMISSION_TYPE_NONE = 0, + PERMISSION_TYPE_READ = 1 << 0, + PERMISSION_TYPE_UPDATE = 1 << 1, + PERMISSION_TYPE_CREATE = 1 << 2, + PERMISSION_TYPE_DELETE = 1 << 3, + PERMISSION_TYPE_PERMISSION = 1 << 4, +}; + +#endif \ No newline at end of file diff --git a/architecture/CpuInfo.h b/architecture/CpuInfo.h index 1ef33ac..0fe0e85 100755 --- a/architecture/CpuInfo.h +++ b/architecture/CpuInfo.h @@ -91,7 +91,7 @@ struct CpuInfo { char brand[49]; byte model; byte family; - byte thread_count; + int16 core_count; int32 mhz; uint32 page_size; CpuCacheInfo cache[4]; diff --git a/architecture/arm/neon/utils/Utils.h b/architecture/arm/neon/utils/Utils.h index 88c3d59..c765752 100755 --- a/architecture/arm/neon/utils/Utils.h +++ b/architecture/arm/neon/utils/Utils.h @@ -17,12 +17,12 @@ // @question When do we want to use neon and when do we want to use sve? // Only allowed for data >= 64 bits -bool is_empty(const uint8_t* region, uint64_t size, int32_t steps = 8) { - if (*((uint64_t *) region) != 0) { +bool is_empty(const uint8* region, uint64 size, int32 steps = 8) { + if (*((uint64 *) region) != 0) { return false; } - const uint8_t* end = region + size; + const uint8* end = region + size; steps = intrin_validate_steps(region, steps); switch (steps) { @@ -85,7 +85,7 @@ bool is_empty(const uint8_t* region, uint64_t size, int32_t steps = 8) { } case 1: { while (region + 4 <= end) { - if (*((const uint32_t *) region) != 0) { + if (*((const uint32 *) region) != 0) { return false; } diff --git a/architecture/arm/sve/utils/Utils.h b/architecture/arm/sve/utils/Utils.h index 2fd759e..4bf98d6 100755 --- a/architecture/arm/sve/utils/Utils.h +++ b/architecture/arm/sve/utils/Utils.h @@ -16,15 +16,15 @@ #include // Only allowed for data >= 64 bits -bool is_empty(const uint8_t* region, uint64_t size, int32_t steps = 8) { - if (*((uint64_t *) region) != 0) { +bool is_empty(const uint8* region, uint64 size, int32 steps = 8) { + if (*((uint64 *) region) != 0) { return false; } - const uint8_t* end = region + size; + const uint8* end = region + size; steps = intrin_validate_steps(region, steps); - uint64_t sve_vector_bytes = svcntb(); + uint64 sve_vector_bytes = svcntb(); switch (steps) { case 16: { @@ -86,7 +86,7 @@ bool is_empty(const uint8_t* region, uint64_t size, int32_t steps = 8) { } case 1: { while (region + 4 <= end) { - if (*((const uint32_t *) region) != 0) { + if (*((const uint32 *) region) != 0) { return false; } diff --git a/architecture/x86/CpuInfo.cpp b/architecture/x86/CpuInfo.cpp index edea828..37ce4f3 100755 --- a/architecture/x86/CpuInfo.cpp +++ b/architecture/x86/CpuInfo.cpp @@ -98,7 +98,7 @@ uint64 cpu_info_features() { } void cpu_info_cache(byte level, CpuCacheInfo* cache) { - uint32 eax, ebx, ecx, edx; + uint32 eax, ebx, ecx; //, edx; int32 type; cache->level = level; @@ -113,7 +113,7 @@ void cpu_info_cache(byte level, CpuCacheInfo* cache) { eax = regs[0]; ebx = regs[1]; ecx = regs[2]; - edx = regs[3]; + // edx = regs[3]; type = (eax & 0x1F); diff --git a/architecture/x86/simd/SIMD_F32.h b/architecture/x86/simd/SIMD_F32.h index 2051b8c..942c3f6 100755 --- a/architecture/x86/simd/SIMD_F32.h +++ b/architecture/x86/simd/SIMD_F32.h @@ -14,30 +14,29 @@ #include "../../../stdlib/Types.h" -#ifdef MACRO_CPU_FEATURE_SSE42 +#ifdef __SSE4_2__ #include "SIMD_F32_SSE.h" #endif -#ifdef MACRO_CPU_FEATURE_AVX2 +#ifdef __AVX2__ #include "SIMD_F32_AVX2.h" #endif -#ifdef MACRO_CPU_FEATURE_AVX512 +#ifdef __AVX512F__ #include "SIMD_F32_AVX512.h" #endif // @todo from down here we can optimize some of the code by NOT using the wrappers // the code is self contained and we could use te intrinsic functions directly -inline -void simd_mult(const f32* a, const f32* b, f32* result, int32 size, int32 steps) +void simd_mult(const f32* a, const f32* b, f32* result, int32 size, int32 steps = 16) { int32 i = 0; steps = intrin_validate_steps((const byte*) a, steps); steps = intrin_validate_steps((const byte*) b, steps); steps = intrin_validate_steps((const byte*) result, steps); - #ifdef MACRO_CPU_FEATURE_AVX512 + #ifdef __AVX512F__ if (steps >= 16) { steps = 16; __m512 a_16; @@ -59,7 +58,7 @@ void simd_mult(const f32* a, const f32* b, f32* result, int32 size, int32 steps) } #endif - #ifdef MACRO_CPU_FEATURE_AVX2 + #ifdef __AVX2__ if (steps >= 8) { steps = 8; __m256 a_8; @@ -81,7 +80,7 @@ void simd_mult(const f32* a, const f32* b, f32* result, int32 size, int32 steps) } #endif - #ifdef MACRO_CPU_FEATURE_SSE42 + #ifdef __SSE4_2__ if (steps >= 4) { steps = 4; __m128 a_4; @@ -111,13 +110,13 @@ void simd_mult(const f32* a, const f32* b, f32* result, int32 size, int32 steps) } inline -void simd_mult(const f32* a, f32 b, f32* result, int32 size, int32 steps) +void simd_mult(const f32* a, f32 b, f32* result, int32 size, int32 steps = 16) { int32 i = 0; steps = intrin_validate_steps((const byte*) a, steps); steps = intrin_validate_steps((const byte*) result, steps); - #ifdef MACRO_CPU_FEATURE_AVX512 + #ifdef __AVX512F__ if (steps >= 16) { steps = 16; __m512 a_16; @@ -135,7 +134,7 @@ void simd_mult(const f32* a, f32 b, f32* result, int32 size, int32 steps) } #endif - #ifdef MACRO_CPU_FEATURE_AVX2 + #ifdef __AVX2__ if (steps >= 8) { steps = 8; __m256 a_8; @@ -153,7 +152,7 @@ void simd_mult(const f32* a, f32 b, f32* result, int32 size, int32 steps) } #endif - #ifdef MACRO_CPU_FEATURE_SSE42 + #ifdef __SSE4_2__ if (steps >= 4) { steps = 4; __m128 a_4; @@ -180,13 +179,13 @@ void simd_mult(const f32* a, f32 b, f32* result, int32 size, int32 steps) } inline -void simd_div(const f32* a, f32 b, f32* result, int32 size, int32 steps) +void simd_div(const f32* a, f32 b, f32* result, int32 size, int32 steps = 16) { int32 i = 0; steps = intrin_validate_steps((const byte*) a, steps); steps = intrin_validate_steps((const byte*) result, steps); - #ifdef MACRO_CPU_FEATURE_AVX512 + #ifdef __AVX512F__ if (steps >= 16) { steps = 16; __m512 a_16; @@ -204,7 +203,7 @@ void simd_div(const f32* a, f32 b, f32* result, int32 size, int32 steps) } #endif - #ifdef MACRO_CPU_FEATURE_AVX2 + #ifdef __AVX2__ if (steps >= 8) { steps = 8; __m256 a_8; @@ -222,7 +221,7 @@ void simd_div(const f32* a, f32 b, f32* result, int32 size, int32 steps) } #endif - #ifdef MACRO_CPU_FEATURE_SSE42 + #ifdef __SSE4_2__ if (steps >= 4) { steps = 4; __m128 a_4; diff --git a/architecture/x86/simd/SIMD_F64.h b/architecture/x86/simd/SIMD_F64.h index 57101fc..e1819e7 100755 --- a/architecture/x86/simd/SIMD_F64.h +++ b/architecture/x86/simd/SIMD_F64.h @@ -14,15 +14,15 @@ #include "../../../stdlib/Types.h" -#ifdef MACRO_CPU_FEATURE_SSE42 +#ifdef __SSE4_2__ #include "SIMD_F64_SSE.h" #endif -#ifdef MACRO_CPU_FEATURE_AVX2 +#ifdef __AVX2__ #include "SIMD_F64_AVX2.h" #endif -#ifdef MACRO_CPU_FEATURE_AVX512 +#ifdef __AVX512F__ #include "SIMD_F64_AVX512.h" #endif diff --git a/architecture/x86/simd/SIMD_I16.h b/architecture/x86/simd/SIMD_I16.h index 09ff02e..ebb4b23 100755 --- a/architecture/x86/simd/SIMD_I16.h +++ b/architecture/x86/simd/SIMD_I16.h @@ -6,23 +6,23 @@ * @version 1.0.0 * @link https://jingga.app */ -#ifndef COMS_TOS_STDLIB_SIMD_I16_H -#define COMS_TOS_STDLIB_SIMD_I16_H +#ifndef COMS_STDLIB_SIMD_I16_H +#define COMS_STDLIB_SIMD_I16_H #include #include #include "../../../stdlib/Types.h" -#ifdef MACRO_CPU_FEATURE_SSE42 +#ifdef __SSE4_2__ #include "SIMD_I16_SSE.h" #endif -#ifdef MACRO_CPU_FEATURE_AVX2 +#ifdef __AVX2__ #include "SIMD_I16_AVX2.h" #endif -#ifdef MACRO_CPU_FEATURE_AVX512 +#ifdef __AVX512F__ #include "SIMD_I16_AVX512.h" #endif @@ -30,13 +30,13 @@ // the code is self contained and we could use te intrinsic functions directly inline -void simd_mult(const int16* a, f32 b, int16* result, int32 size, int32 steps) +void simd_mult(const int16* a, f32 b, int16* result, int32 size, int32 steps = 16) { int32 i = 0; steps = intrin_validate_steps((const byte*) a, steps); steps = intrin_validate_steps((const byte*) result, steps); - #ifdef MACRO_CPU_FEATURE_AVX512 + #ifdef __AVX512F__ if (steps >= 16) { steps = 16; __m512i a_16; @@ -65,7 +65,7 @@ void simd_mult(const int16* a, f32 b, int16* result, int32 size, int32 steps) } #endif - #ifdef MACRO_CPU_FEATURE_AVX2 + #ifdef __AVX2__ if (steps >= 8) { steps = 8; __m256i a_8; @@ -94,7 +94,7 @@ void simd_mult(const int16* a, f32 b, int16* result, int32 size, int32 steps) } #endif - #ifdef MACRO_CPU_FEATURE_SSE42 + #ifdef __SSE4_2__ if (steps >= 4) { steps = 4; __m128i a_4; diff --git a/architecture/x86/simd/SIMD_I16_AVX2.h b/architecture/x86/simd/SIMD_I16_AVX2.h index 90b2cd7..4e6e2c4 100644 --- a/architecture/x86/simd/SIMD_I16_AVX2.h +++ b/architecture/x86/simd/SIMD_I16_AVX2.h @@ -6,8 +6,8 @@ * @version 1.0.0 * @link https://jingga.app */ -#ifndef COMS_TOS_STDLIB_SIMD_I16_AVX2_H -#define COMS_TOS_STDLIB_SIMD_I16_AVX2_H +#ifndef COMS_STDLIB_SIMD_I16_AVX2_H +#define COMS_STDLIB_SIMD_I16_AVX2_H #include #include diff --git a/architecture/x86/simd/SIMD_I16_AVX512.h b/architecture/x86/simd/SIMD_I16_AVX512.h index 26c7a9d..d253b8a 100644 --- a/architecture/x86/simd/SIMD_I16_AVX512.h +++ b/architecture/x86/simd/SIMD_I16_AVX512.h @@ -6,8 +6,8 @@ * @version 1.0.0 * @link https://jingga.app */ -#ifndef COMS_TOS_STDLIB_SIMD_I16_AVX512_H -#define COMS_TOS_STDLIB_SIMD_I16_AVX512_H +#ifndef COMS_STDLIB_SIMD_I16_AVX512_H +#define COMS_STDLIB_SIMD_I16_AVX512_H #include #include diff --git a/architecture/x86/simd/SIMD_I16_SSE.h b/architecture/x86/simd/SIMD_I16_SSE.h index ea5d295..7f21732 100644 --- a/architecture/x86/simd/SIMD_I16_SSE.h +++ b/architecture/x86/simd/SIMD_I16_SSE.h @@ -6,8 +6,8 @@ * @version 1.0.0 * @link https://jingga.app */ -#ifndef COMS_TOS_STDLIB_SIMD_I16_SSE_H -#define COMS_TOS_STDLIB_SIMD_I16_SSE_H +#ifndef COMS_STDLIB_SIMD_I16_SSE_H +#define COMS_STDLIB_SIMD_I16_SSE_H #include #include diff --git a/architecture/x86/simd/SIMD_I32.h b/architecture/x86/simd/SIMD_I32.h index 86f23f1..5ecc4ab 100755 --- a/architecture/x86/simd/SIMD_I32.h +++ b/architecture/x86/simd/SIMD_I32.h @@ -6,8 +6,8 @@ * @version 1.0.0 * @link https://jingga.app */ -#ifndef COMS_TOS_STDLIB_SIMD_I32_H -#define COMS_TOS_STDLIB_SIMD_I32_H +#ifndef COMS_STDLIB_SIMD_I32_H +#define COMS_STDLIB_SIMD_I32_H #include #include @@ -16,27 +16,27 @@ #include "../../../stdlib/Types.h" #include "../../../utils/BitUtils.h" -#ifdef MACRO_CPU_FEATURE_SSE42 +#ifdef __SSE4_2__ #include "SIMD_I32_SSE.h" #endif -#ifdef MACRO_CPU_FEATURE_AVX2 +#ifdef __AVX2__ #include "SIMD_I32_AVX2.h" #endif -#ifdef MACRO_CPU_FEATURE_AVX512 +#ifdef __AVX512F__ #include "SIMD_I32_AVX512.h" #endif inline -void simd_mult(const int32* a, const int32* b, int32* result, int32 size, int32 steps) +void simd_mult(const int32* a, const int32* b, int32* result, int32 size, int32 steps = 16) { int32 i = 0; steps = intrin_validate_steps((const byte*) a, steps); steps = intrin_validate_steps((const byte*) b, steps); steps = intrin_validate_steps((const byte*) result, steps); - #ifdef MACRO_CPU_FEATURE_AVX512 + #ifdef __AVX512F__ if (steps >= 16) { steps = 16; __m512i a_16; @@ -58,7 +58,7 @@ void simd_mult(const int32* a, const int32* b, int32* result, int32 size, int32 } #endif - #ifdef MACRO_CPU_FEATURE_AVX2 + #ifdef __AVX2__ if (steps >= 8) { steps = 8; __m256i a_8; @@ -80,7 +80,7 @@ void simd_mult(const int32* a, const int32* b, int32* result, int32 size, int32 } #endif - #ifdef MACRO_CPU_FEATURE_SSE42 + #ifdef __SSE4_2__ if (steps >= 4) { steps = 4; __m128i a_4; @@ -110,14 +110,14 @@ void simd_mult(const int32* a, const int32* b, int32* result, int32 size, int32 } inline -void simd_mult(const int32* a, const f32* b, f32* result, int32 size, int32 steps) +void simd_mult(const int32* a, const f32* b, f32* result, int32 size, int32 steps = 16) { int32 i = 0; steps = intrin_validate_steps((const byte*) a, steps); steps = intrin_validate_steps((const byte*) b, steps); steps = intrin_validate_steps((const byte*) result, steps); - #ifdef MACRO_CPU_FEATURE_AVX512 + #ifdef __AVX512F__ if (steps >= 16) { steps = 16; __m512i a_16; @@ -141,7 +141,7 @@ void simd_mult(const int32* a, const f32* b, f32* result, int32 size, int32 step } #endif - #ifdef MACRO_CPU_FEATURE_AVX2 + #ifdef __AVX2__ if (steps >= 8) { steps = 8; __m256i a_8; @@ -165,7 +165,7 @@ void simd_mult(const int32* a, const f32* b, f32* result, int32 size, int32 step } #endif - #ifdef MACRO_CPU_FEATURE_SSE42 + #ifdef __SSE4_2__ if (steps >= 4) { steps = 4; __m128i a_4; @@ -197,14 +197,14 @@ void simd_mult(const int32* a, const f32* b, f32* result, int32 size, int32 step } inline -void simd_mult(const int32* a, const f32* b, int32* result, int32 size, int32 steps) +void simd_mult(const int32* a, const f32* b, int32* result, int32 size, int32 steps = 16) { int32 i = 0; steps = intrin_validate_steps((const byte*) a, steps); steps = intrin_validate_steps((const byte*) b, steps); steps = intrin_validate_steps((const byte*) result, steps); - #ifdef MACRO_CPU_FEATURE_AVX512 + #ifdef __AVX512F__ if (steps >= 16) { steps = 16; __m512i a_16; @@ -230,7 +230,7 @@ void simd_mult(const int32* a, const f32* b, int32* result, int32 size, int32 st } #endif - #ifdef MACRO_CPU_FEATURE_AVX2 + #ifdef __AVX2__ if (steps >= 8) { steps = 8; __m256i a_8; @@ -256,7 +256,7 @@ void simd_mult(const int32* a, const f32* b, int32* result, int32 size, int32 st } #endif - #ifdef MACRO_CPU_FEATURE_SSE42 + #ifdef __SSE4_2__ if (steps >= 4) { steps = 4; __m128i a_4; @@ -290,13 +290,13 @@ void simd_mult(const int32* a, const f32* b, int32* result, int32 size, int32 st } inline -void simd_mult(const int32* a, f32 b, int32* result, int32 size, int32 steps) +void simd_mult(const int32* a, f32 b, int32* result, int32 size, int32 steps = 16) { int32 i = 0; steps = intrin_validate_steps((const byte*) a, steps); steps = intrin_validate_steps((const byte*) result, steps); - #ifdef MACRO_CPU_FEATURE_AVX512 + #ifdef __AVX512F__ if (steps >= 16) { steps = 16; __m512i a_16; @@ -320,7 +320,7 @@ void simd_mult(const int32* a, f32 b, int32* result, int32 size, int32 steps) } #endif - #ifdef MACRO_CPU_FEATURE_AVX2 + #ifdef __AVX2__ if (steps >= 8) { steps = 8; __m256i a_8; @@ -344,7 +344,7 @@ void simd_mult(const int32* a, f32 b, int32* result, int32 size, int32 steps) } #endif - #ifdef MACRO_CPU_FEATURE_SSE42 + #ifdef __SSE4_2__ if (steps >= 4) { steps = 4; __m128i a_4; @@ -375,13 +375,13 @@ void simd_mult(const int32* a, f32 b, int32* result, int32 size, int32 steps) } inline -void simd_div(const int32* a, f32 b, f32* result, int32 size, int32 steps) +void simd_div(const int32* a, f32 b, f32* result, int32 size, int32 steps = 16) { int32 i = 0; steps = intrin_validate_steps((const byte*) a, steps); steps = intrin_validate_steps((const byte*) result, steps); - #ifdef MACRO_CPU_FEATURE_AVX512 + #ifdef __AVX512F__ if (steps >= 16) { steps = 16; __m512i a_16; @@ -403,7 +403,7 @@ void simd_div(const int32* a, f32 b, f32* result, int32 size, int32 steps) } #endif - #ifdef MACRO_CPU_FEATURE_AVX2 + #ifdef __AVX2__ if (steps >= 8) { steps = 8; __m256i a_8; @@ -425,7 +425,7 @@ void simd_div(const int32* a, f32 b, f32* result, int32 size, int32 steps) } #endif - #ifdef MACRO_CPU_FEATURE_SSE42 + #ifdef __SSE4_2__ if (steps >= 4) { steps = 4; __m128i a_4; @@ -454,14 +454,14 @@ void simd_div(const int32* a, f32 b, f32* result, int32 size, int32 steps) } inline -void simd_add(const int32* a, const int32* b, int32* result, int32 size, int32 steps) +void simd_add(const int32* a, const int32* b, int32* result, int32 size, int32 steps = 16) { int32 i = 0; steps = intrin_validate_steps((const byte*) a, steps); steps = intrin_validate_steps((const byte*) b, steps); steps = intrin_validate_steps((const byte*) result, steps); - #ifdef MACRO_CPU_FEATURE_AVX512 + #ifdef __AVX512F__ if (steps >= 16) { steps = 16; __m512i a_16; @@ -483,7 +483,7 @@ void simd_add(const int32* a, const int32* b, int32* result, int32 size, int32 s } #endif - #ifdef MACRO_CPU_FEATURE_AVX2 + #ifdef __AVX2__ if (steps >= 8) { steps = 8; __m256i a_8; @@ -505,7 +505,7 @@ void simd_add(const int32* a, const int32* b, int32* result, int32 size, int32 s } #endif - #ifdef MACRO_CPU_FEATURE_SSE42 + #ifdef __SSE4_2__ if (steps >= 4) { steps = 4; __m128i a_4; @@ -535,14 +535,14 @@ void simd_add(const int32* a, const int32* b, int32* result, int32 size, int32 s } inline -void simd_add(const int32* a, const f32* b, f32* result, int32 size, int32 steps) +void simd_add(const int32* a, const f32* b, f32* result, int32 size, int32 steps = 16) { int32 i = 0; steps = intrin_validate_steps((const byte*) a, steps); steps = intrin_validate_steps((const byte*) b, steps); steps = intrin_validate_steps((const byte*) result, steps); - #ifdef MACRO_CPU_FEATURE_AVX512 + #ifdef __AVX512F__ if (steps >= 16) { steps = 16; __m512i a_16; @@ -566,7 +566,7 @@ void simd_add(const int32* a, const f32* b, f32* result, int32 size, int32 steps } #endif - #ifdef MACRO_CPU_FEATURE_AVX2 + #ifdef __AVX2__ if (steps >= 8) { steps = 8; __m256i a_8; @@ -590,7 +590,7 @@ void simd_add(const int32* a, const f32* b, f32* result, int32 size, int32 steps } #endif - #ifdef MACRO_CPU_FEATURE_SSE42 + #ifdef __SSE4_2__ if (steps >= 4) { steps = 4; __m128i a_4; @@ -622,14 +622,14 @@ void simd_add(const int32* a, const f32* b, f32* result, int32 size, int32 steps } inline -void simd_add(const int32* a, const f32* b, int32* result, int32 size, int32 steps) +void simd_add(const int32* a, const f32* b, int32* result, int32 size, int32 steps = 16) { int32 i = 0; steps = intrin_validate_steps((const byte*) a, steps); steps = intrin_validate_steps((const byte*) b, steps); steps = intrin_validate_steps((const byte*) result, steps); - #ifdef MACRO_CPU_FEATURE_AVX512 + #ifdef __AVX512F__ if (steps >= 16) { steps = 16; __m512i a_16; @@ -655,7 +655,7 @@ void simd_add(const int32* a, const f32* b, int32* result, int32 size, int32 ste } #endif - #ifdef MACRO_CPU_FEATURE_AVX2 + #ifdef __AVX2__ if (steps >= 8) { steps = 8; __m256i a_8; @@ -681,7 +681,7 @@ void simd_add(const int32* a, const f32* b, int32* result, int32 size, int32 ste } #endif - #ifdef MACRO_CPU_FEATURE_SSE42 + #ifdef __SSE4_2__ if (steps >= 4) { steps = 4; __m128i a_4; @@ -715,13 +715,13 @@ void simd_add(const int32* a, const f32* b, int32* result, int32 size, int32 ste } void -endian_swap(const int32* val, int32* result, int32 size, int32 steps) +endian_swap(const int32* val, int32* result, int32 size, int32 steps = 16) { int32 i = 0; steps = intrin_validate_steps((const byte*) val, steps); steps = intrin_validate_steps((const byte*) result, steps); - #ifdef MACRO_CPU_FEATURE_AVX2 + #ifdef __AVX2__ if (steps >= 8) { steps = 8; const __m256i mask_256 = _mm256_setr_epi8( @@ -742,7 +742,7 @@ endian_swap(const int32* val, int32* result, int32 size, int32 steps) } #endif - #ifdef MACRO_CPU_FEATURE_SSE42 + #ifdef __SSE4_2__ if (steps >= 4) { steps = 4; const __m128i mask_128 = _mm_setr_epi8( @@ -771,13 +771,13 @@ endian_swap(const int32* val, int32* result, int32 size, int32 steps) } void -endian_swap(const uint32* val, uint32* result, int32 size, int32 steps) +endian_swap(const uint32* val, uint32* result, int32 size, int32 steps = 16) { int32 i = 0; steps = intrin_validate_steps((const byte*) val, steps); steps = intrin_validate_steps((const byte*) result, steps); - #ifdef MACRO_CPU_FEATURE_AVX2 + #ifdef __AVX2__ if (steps >= 8) { steps = 8; const __m256i mask_256 = _mm256_setr_epi8( @@ -798,7 +798,7 @@ endian_swap(const uint32* val, uint32* result, int32 size, int32 steps) } #endif - #ifdef MACRO_CPU_FEATURE_SSE42 + #ifdef __SSE4_2__ if (steps >= 4) { steps = 4; const __m128i mask_128 = _mm_setr_epi8( @@ -826,13 +826,13 @@ endian_swap(const uint32* val, uint32* result, int32 size, int32 steps) } } -void endian_swap(const int16* val, int16* result, int32 size, int32 steps) +void endian_swap(const int16* val, int16* result, int32 size, int32 steps = 16) { int32 i = 0; steps = intrin_validate_steps((const byte*) val, steps); steps = intrin_validate_steps((const byte*) result, steps); - #ifdef MACRO_CPU_FEATURE_AVX2 + #ifdef __AVX2__ if (steps >= 8) { steps = 8; const __m256i mask_256 = _mm256_setr_epi8( @@ -851,7 +851,7 @@ void endian_swap(const int16* val, int16* result, int32 size, int32 steps) } #endif - #ifdef MACRO_CPU_FEATURE_SSE42 + #ifdef __SSE4_2__ if (steps >= 4) { steps = 4; const __m128i mask_128 = _mm_setr_epi8( @@ -873,13 +873,13 @@ void endian_swap(const int16* val, int16* result, int32 size, int32 steps) } } -void endian_swap(const uint16* val, uint16* result, int32 size, int32 steps) +void endian_swap(const uint16* val, uint16* result, int32 size, int32 steps = 16) { int32 i = 0; steps = intrin_validate_steps((const byte*) val, steps); steps = intrin_validate_steps((const byte*) result, steps); - #ifdef MACRO_CPU_FEATURE_AVX2 + #ifdef __AVX2__ if (steps >= 8) { steps = 8; const __m256i mask_256 = _mm256_setr_epi8( @@ -898,7 +898,7 @@ void endian_swap(const uint16* val, uint16* result, int32 size, int32 steps) } #endif - #ifdef MACRO_CPU_FEATURE_SSE42 + #ifdef __SSE4_2__ if (steps >= 4) { steps = 4; const __m128i mask_128 = _mm_setr_epi8( diff --git a/architecture/x86/simd/SIMD_I32_AVX2.h b/architecture/x86/simd/SIMD_I32_AVX2.h index 6cb7b0e..99430c9 100644 --- a/architecture/x86/simd/SIMD_I32_AVX2.h +++ b/architecture/x86/simd/SIMD_I32_AVX2.h @@ -6,8 +6,8 @@ * @version 1.0.0 * @link https://jingga.app */ -#ifndef COMS_TOS_STDLIB_SIMD_I32_H -#define COMS_TOS_STDLIB_SIMD_I32_H +#ifndef COMS_STDLIB_SIMD_I32_H +#define COMS_STDLIB_SIMD_I32_H #include #include diff --git a/architecture/x86/simd/SIMD_I32_AVX512.h b/architecture/x86/simd/SIMD_I32_AVX512.h index cd56539..0f2d6d0 100644 --- a/architecture/x86/simd/SIMD_I32_AVX512.h +++ b/architecture/x86/simd/SIMD_I32_AVX512.h @@ -6,8 +6,8 @@ * @version 1.0.0 * @link https://jingga.app */ -#ifndef COMS_TOS_STDLIB_SIMD_I32_H -#define COMS_TOS_STDLIB_SIMD_I32_H +#ifndef COMS_STDLIB_SIMD_I32_H +#define COMS_STDLIB_SIMD_I32_H #include #include diff --git a/architecture/x86/simd/SIMD_I32_SSE.h b/architecture/x86/simd/SIMD_I32_SSE.h index 399c49f..4acca99 100644 --- a/architecture/x86/simd/SIMD_I32_SSE.h +++ b/architecture/x86/simd/SIMD_I32_SSE.h @@ -6,8 +6,8 @@ * @version 1.0.0 * @link https://jingga.app */ -#ifndef COMS_TOS_STDLIB_SIMD_I32_H -#define COMS_TOS_STDLIB_SIMD_I32_H +#ifndef COMS_STDLIB_SIMD_I32_H +#define COMS_STDLIB_SIMD_I32_H #include #include diff --git a/architecture/x86/simd/SIMD_I64.h b/architecture/x86/simd/SIMD_I64.h index 151c222..81bce03 100755 --- a/architecture/x86/simd/SIMD_I64.h +++ b/architecture/x86/simd/SIMD_I64.h @@ -6,23 +6,23 @@ * @version 1.0.0 * @link https://jingga.app */ -#ifndef COMS_TOS_STDLIB_SIMD_I64_H -#define COMS_TOS_STDLIB_SIMD_I64_H +#ifndef COMS_STDLIB_SIMD_I64_H +#define COMS_STDLIB_SIMD_I64_H #include #include #include "../../../stdlib/Types.h" -#ifdef MACRO_CPU_FEATURE_SSE42 +#ifdef __SSE4_2__ #include "SIMD_I64_SSE.h" #endif -#ifdef MACRO_CPU_FEATURE_AVX2 +#ifdef __AVX2__ #include "SIMD_I64_AVX2.h" #endif -#ifdef MACRO_CPU_FEATURE_AVX512 +#ifdef __AVX512F__ #include "SIMD_I64_AVX512.h" #endif diff --git a/architecture/x86/simd/SIMD_I64_AVX2.h b/architecture/x86/simd/SIMD_I64_AVX2.h index aa61750..13ba386 100644 --- a/architecture/x86/simd/SIMD_I64_AVX2.h +++ b/architecture/x86/simd/SIMD_I64_AVX2.h @@ -6,8 +6,8 @@ * @version 1.0.0 * @link https://jingga.app */ -#ifndef COMS_TOS_STDLIB_SIMD_I64_AVX2_H -#define COMS_TOS_STDLIB_SIMD_I64_AVX2_H +#ifndef COMS_STDLIB_SIMD_I64_AVX2_H +#define COMS_STDLIB_SIMD_I64_AVX2_H #include #include diff --git a/architecture/x86/simd/SIMD_I64_AVX512.h b/architecture/x86/simd/SIMD_I64_AVX512.h index 8c49d05..aff6ab4 100644 --- a/architecture/x86/simd/SIMD_I64_AVX512.h +++ b/architecture/x86/simd/SIMD_I64_AVX512.h @@ -6,8 +6,8 @@ * @version 1.0.0 * @link https://jingga.app */ -#ifndef COMS_TOS_STDLIB_SIMD_I64_AVX512_H -#define COMS_TOS_STDLIB_SIMD_I64_AVX512_H +#ifndef COMS_STDLIB_SIMD_I64_AVX512_H +#define COMS_STDLIB_SIMD_I64_AVX512_H #include #include diff --git a/architecture/x86/simd/SIMD_I64_SSE.h b/architecture/x86/simd/SIMD_I64_SSE.h index 82ea226..879278c 100644 --- a/architecture/x86/simd/SIMD_I64_SSE.h +++ b/architecture/x86/simd/SIMD_I64_SSE.h @@ -6,8 +6,8 @@ * @version 1.0.0 * @link https://jingga.app */ -#ifndef COMS_TOS_STDLIB_SIMD_I64_SSE_H -#define COMS_TOS_STDLIB_SIMD_I64_SSE_H +#ifndef COMS_STDLIB_SIMD_I64_SSE_H +#define COMS_STDLIB_SIMD_I64_SSE_H #include #include diff --git a/architecture/x86/simd/SIMD_I8.h b/architecture/x86/simd/SIMD_I8.h index 4cb5205..b3577dc 100755 --- a/architecture/x86/simd/SIMD_I8.h +++ b/architecture/x86/simd/SIMD_I8.h @@ -6,23 +6,23 @@ * @version 1.0.0 * @link https://jingga.app */ -#ifndef COMS_TOS_STDLIB_SIMD_I8_H -#define COMS_TOS_STDLIB_SIMD_I8_H +#ifndef COMS_STDLIB_SIMD_I8_H +#define COMS_STDLIB_SIMD_I8_H #include #include #include "../../../stdlib/Types.h" -#ifdef MACRO_CPU_FEATURE_SSE42 +#ifdef __SSE4_2__ #include "SIMD_I8_SSE.h" #endif -#ifdef MACRO_CPU_FEATURE_AVX2 +#ifdef __AVX2__ #include "SIMD_I8_AVX2.h" #endif -#ifdef MACRO_CPU_FEATURE_AVX512 +#ifdef __AVX512F__ #include "SIMD_I8_AVX512.h" #endif @@ -31,7 +31,7 @@ int simd_equal(const byte* a, const byte* b, uint32 size, uint32 steps = 8) { steps = intrin_validate_steps((const byte*) a, steps); steps = intrin_validate_steps((const byte*) b, steps); - #ifdef MACRO_CPU_FEATURE_AVX512 + #ifdef __AVX512F__ if (steps >= 16) { steps = 16; if (size >= 128) { @@ -62,7 +62,7 @@ int simd_equal(const byte* a, const byte* b, uint32 size, uint32 steps = 8) { } #endif - #ifdef MACRO_CPU_FEATURE_AVX2 + #ifdef __AVX2__ if (steps >= 8) { steps = 8; if (size >= 64) { @@ -91,7 +91,7 @@ int simd_equal(const byte* a, const byte* b, uint32 size, uint32 steps = 8) { } #endif - #ifdef MACRO_CPU_FEATURE_SSE42 + #ifdef __SSE4_2__ if (steps >= 4) { steps = 4; if (size >= 16) { diff --git a/architecture/x86/simd/SIMD_I8_AVX2.h b/architecture/x86/simd/SIMD_I8_AVX2.h index 462beaa..5c9a4e2 100644 --- a/architecture/x86/simd/SIMD_I8_AVX2.h +++ b/architecture/x86/simd/SIMD_I8_AVX2.h @@ -6,8 +6,8 @@ * @version 1.0.0 * @link https://jingga.app */ -#ifndef COMS_TOS_STDLIB_SIMD_I8_H -#define COMS_TOS_STDLIB_SIMD_I8_H +#ifndef COMS_STDLIB_SIMD_I8_H +#define COMS_STDLIB_SIMD_I8_H #include #include diff --git a/architecture/x86/simd/SIMD_I8_AVX512.h b/architecture/x86/simd/SIMD_I8_AVX512.h index a14047d..9f01796 100644 --- a/architecture/x86/simd/SIMD_I8_AVX512.h +++ b/architecture/x86/simd/SIMD_I8_AVX512.h @@ -6,8 +6,8 @@ * @version 1.0.0 * @link https://jingga.app */ -#ifndef COMS_TOS_STDLIB_SIMD_I8_H -#define COMS_TOS_STDLIB_SIMD_I8_H +#ifndef COMS_STDLIB_SIMD_I8_H +#define COMS_STDLIB_SIMD_I8_H #include #include diff --git a/architecture/x86/simd/SIMD_I8_SSE.h b/architecture/x86/simd/SIMD_I8_SSE.h index e676bc6..37ea149 100644 --- a/architecture/x86/simd/SIMD_I8_SSE.h +++ b/architecture/x86/simd/SIMD_I8_SSE.h @@ -6,8 +6,8 @@ * @version 1.0.0 * @link https://jingga.app */ -#ifndef COMS_TOS_STDLIB_SIMD_I8_H -#define COMS_TOS_STDLIB_SIMD_I8_H +#ifndef COMS_STDLIB_SIMD_I8_H +#define COMS_STDLIB_SIMD_I8_H #include #include diff --git a/architecture/x86/simd/SIMD_SVML.h b/architecture/x86/simd/SIMD_SVML.h index 83a5c10..f54a06f 100644 --- a/architecture/x86/simd/SIMD_SVML.h +++ b/architecture/x86/simd/SIMD_SVML.h @@ -9,15 +9,15 @@ #ifndef COMS_STDLIB_SIMD_SVML_H #define COMS_STDLIB_SIMD_SVML_H -#ifdef MACRO_CPU_FEATURE_SSE42 +#ifdef __SSE4_2__ #include "SIMD_SVML_SSE.h" #endif -#ifdef MACRO_CPU_FEATURE_AVX2 +#ifdef __AVX2__ #include "SIMD_SVML_AVX2.h" #endif -#ifdef MACRO_CPU_FEATURE_AVX512 +#ifdef __AVX512F__ #include "SIMD_SVML_AVX512.h" #endif diff --git a/architecture/x86/simd/utils/Utils.h b/architecture/x86/simd/utils/Utils.h index b5633c6..a62b3e0 100755 --- a/architecture/x86/simd/utils/Utils.h +++ b/architecture/x86/simd/utils/Utils.h @@ -26,7 +26,7 @@ bool is_empty(const byte* region, uint64 size, int32 steps = 8) steps = intrin_validate_steps(region, steps); switch (steps) { - #ifdef MACRO_CPU_FEATURE_AVX512 + #ifdef __AVX512F__ case 16: { while (region + 64 <= end) { __m512i chunk = _mm512_load_si512((const __m512i *) region); @@ -42,7 +42,7 @@ bool is_empty(const byte* region, uint64 size, int32 steps = 8) #else case 16: [[fallthrough]]; #endif - #ifdef MACRO_CPU_FEATURE_AVX2 + #ifdef __AVX2__ case 8: { while (region + 32 <= end) { __m256i chunk = _mm256_load_si256((const __m256i *) region); @@ -57,7 +57,7 @@ bool is_empty(const byte* region, uint64 size, int32 steps = 8) #else case 8: [[fallthrough]]; #endif - #ifdef MACRO_CPU_FEATURE_SSE42 + #ifdef __SSE4_2__ case 4: { while (region + 16 <= end) { __m128i chunk = _mm_load_si128((const __m128i *) region); diff --git a/asset/AssetManagementSystem.h b/asset/AssetManagementSystem.h index 512a499..c4bc505 100755 --- a/asset/AssetManagementSystem.h +++ b/asset/AssetManagementSystem.h @@ -29,7 +29,7 @@ struct AssetComponent { uint64 asset_count; // @question Do we want to add a mutex to assets. This way we don't have to lock the entire ams. - coms_pthread_mutex_t mutex; + mutex mutex; }; struct AssetManagementSystem { @@ -55,7 +55,7 @@ void ams_component_create(AssetComponent* ac, BufferMemory* buf, int32 chunk_siz LOG_1("Create AMS Component for %n assets and %n B", {{LOG_DATA_INT32, &count}, {LOG_DATA_UINT32, &chunk_size}}); chunk_init(&ac->asset_memory, buf, count, chunk_size, 64); - coms_pthread_mutex_init(&ac->mutex, NULL); + mutex_init(&ac->mutex, NULL); } inline @@ -71,13 +71,13 @@ void ams_component_create(AssetComponent* ac, byte* buf, int32 chunk_size, int32 ac->asset_memory.memory = buf; ac->asset_memory.free = (uint64 *) (ac->asset_memory.memory + ac->asset_memory.chunk_size * count); - coms_pthread_mutex_init(&ac->mutex, NULL); + mutex_init(&ac->mutex, NULL); } inline void ams_component_free(AssetComponent* ac) { - coms_pthread_mutex_destroy(&ac->mutex); + mutex_destroy(&ac->mutex); } inline @@ -400,15 +400,15 @@ Asset* thrd_ams_reserve_asset(AssetManagementSystem* ams, byte type, const char* AssetComponent* ac = &ams->asset_components[type]; uint16 elements = ams_calculate_chunks(ac, size, overhead); - coms_pthread_mutex_lock(&ams->asset_components[type].mutex); + mutex_lock(&ams->asset_components[type].mutex); int32 free_data = chunk_reserve(&ac->asset_memory, elements); if (free_data < 0) { - coms_pthread_mutex_unlock(&ams->asset_components[type].mutex); + mutex_unlock(&ams->asset_components[type].mutex); ASSERT_SIMPLE(free_data >= 0); return NULL; } - coms_pthread_mutex_unlock(&ams->asset_components[type].mutex); + mutex_unlock(&ams->asset_components[type].mutex); byte* asset_data = chunk_get_element(&ac->asset_memory, free_data, true); @@ -514,15 +514,15 @@ Asset* thrd_ams_insert_asset(AssetManagementSystem* ams, Asset* asset_temp, cons { AssetComponent* ac = &ams->asset_components[asset_temp->component_id]; - coms_pthread_mutex_lock(&ams->asset_components[asset_temp->component_id].mutex); + mutex_lock(&ams->asset_components[asset_temp->component_id].mutex); int32 free_data = chunk_reserve(&ac->asset_memory, asset_temp->size); if (free_data < 0) { - coms_pthread_mutex_unlock(&ams->asset_components[asset_temp->component_id].mutex); + mutex_unlock(&ams->asset_components[asset_temp->component_id].mutex); ASSERT_SIMPLE(free_data >= 0); return NULL; } - coms_pthread_mutex_unlock(&ams->asset_components[asset_temp->component_id].mutex); + mutex_unlock(&ams->asset_components[asset_temp->component_id].mutex); byte* asset_data = chunk_get_element(&ac->asset_memory, free_data); memcpy(asset_data, asset_temp->self, sizeof(Asset)); diff --git a/command/AppCmdBuffer.cpp b/command/AppCmdBuffer.cpp index 3a22033..390b66f 100755 --- a/command/AppCmdBuffer.cpp +++ b/command/AppCmdBuffer.cpp @@ -38,7 +38,7 @@ inline void cmd_buffer_create(AppCmdBuffer* cb, BufferMemory* buf, int32 commands_count) { chunk_init(&cb->commands, buf, commands_count, sizeof(Command), 64); - coms_pthread_mutex_init(&cb->mutex, NULL); + mutex_init(&cb->mutex, NULL); LOG_1("Created AppCmdBuffer: %n B", {{LOG_DATA_UINT64, &cb->commands.size}}); } @@ -181,10 +181,10 @@ Asset* cmd_font_load_async(AppCmdBuffer* __restrict cb, Command* __restrict cmd) inline void thrd_cmd_insert(AppCmdBuffer* __restrict cb, Command* __restrict cmd_temp) { - coms_pthread_mutex_lock(&cb->mutex); + mutex_lock(&cb->mutex); int32 index = chunk_reserve(&cb->commands, 1); if (index < 0) { - coms_pthread_mutex_unlock(&cb->mutex); + mutex_unlock(&cb->mutex); ASSERT_SIMPLE(false); return; @@ -196,7 +196,7 @@ void thrd_cmd_insert(AppCmdBuffer* __restrict cb, Command* __restrict cmd_temp) Command* cmd = (Command *) chunk_get_element(&cb->commands, index); memcpy(cmd, cmd_temp, sizeof(Command)); - coms_pthread_mutex_unlock(&cb->mutex); + mutex_unlock(&cb->mutex); } inline @@ -697,9 +697,9 @@ void cmd_iterate(AppCmdBuffer* cb) // This shouldn't happen since the command buffer shouldn't fill up in just 1-3 frames void thrd_cmd_iterate(AppCmdBuffer* cb) { - coms_pthread_mutex_lock(&cb->mutex); + mutex_lock(&cb->mutex); cmd_iterate(cb); - coms_pthread_mutex_unlock(&cb->mutex); + mutex_unlock(&cb->mutex); } #endif \ No newline at end of file diff --git a/command/AppCmdBuffer.h b/command/AppCmdBuffer.h index 79da41f..4f082e8 100755 --- a/command/AppCmdBuffer.h +++ b/command/AppCmdBuffer.h @@ -29,7 +29,7 @@ struct AppCmdBuffer { ChunkMemory commands; int32 last_element; - coms_pthread_mutex_t mutex; + mutex mutex; // Application data for cmd access // The list below depends on what kind of systems our command buffer needs access to diff --git a/compiler/gcc/Atomic.h b/compiler/gcc/Atomic.h index 61a2705..6df5a9f 100755 --- a/compiler/gcc/Atomic.h +++ b/compiler/gcc/Atomic.h @@ -45,7 +45,7 @@ FORCE_INLINE void atomic_add_relaxed(volatile int32* value, int32 increment) noe FORCE_INLINE void atomic_sub_relaxed(volatile int32* value, int32 decrement) noexcept { __atomic_sub_fetch(value, decrement, __ATOMIC_RELAXED); } FORCE_INLINE void atomic_add_relaxed(volatile int64* value, int64 increment) noexcept { __atomic_add_fetch(value, increment, __ATOMIC_RELAXED); } FORCE_INLINE void atomic_sub_relaxed(volatile int64* value, int64 decrement) noexcept { __atomic_sub_fetch(value, decrement, __ATOMIC_RELAXED); } -FORCE_INLINE f32 atomic_compare_exchange_weak_relaxed(volatile f32* value, f32* expected, f32 desired) noexcept { +FORCE_INLINE f32 atomic_compare_exchange_strong_relaxed(volatile f32* value, f32* expected, f32 desired) noexcept { volatile _atomic_32* value_as_union = (volatile _atomic_32*)value; _atomic_32* expected_as_union = (_atomic_32*)expected; _atomic_32 desired_as_union; @@ -58,7 +58,7 @@ FORCE_INLINE f32 atomic_compare_exchange_weak_relaxed(volatile f32* value, f32* return expected_as_union->f; } -FORCE_INLINE f64 atomic_compare_exchange_weak_relaxed(volatile f64* value, f64* expected, f64 desired) noexcept { +FORCE_INLINE f64 atomic_compare_exchange_strong_relaxed(volatile f64* value, f64* expected, f64 desired) noexcept { volatile _atomic_64* value_as_union = (volatile _atomic_64*)value; _atomic_64* expected_as_union = (_atomic_64*)expected; _atomic_64 desired_as_union; @@ -71,8 +71,8 @@ FORCE_INLINE f64 atomic_compare_exchange_weak_relaxed(volatile f64* value, f64* return expected_as_union->f; } -FORCE_INLINE int32 atomic_compare_exchange_weak_relaxed(volatile int32* value, int32* expected, int32 desired) noexcept { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED); return *expected; } -FORCE_INLINE int64 atomic_compare_exchange_weak_relaxed(volatile int64* value, int64* expected, int64 desired) noexcept { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED); return *expected; } +FORCE_INLINE int32 atomic_compare_exchange_strong_relaxed(volatile int32* value, int32* expected, int32 desired) noexcept { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED); return *expected; } +FORCE_INLINE int64 atomic_compare_exchange_strong_relaxed(volatile int64* value, int64* expected, int64 desired) noexcept { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED); return *expected; } FORCE_INLINE int8 atomic_fetch_add_relaxed(volatile int8* value, int8 operand) noexcept { return __atomic_add_fetch(value, operand, __ATOMIC_RELAXED); } FORCE_INLINE int8 atomic_fetch_sub_relaxed(volatile int8* value, int8 operand) noexcept { return __atomic_sub_fetch(value, operand, __ATOMIC_RELAXED); } FORCE_INLINE int16 atomic_fetch_add_relaxed(volatile int16* value, int16 operand) noexcept { return __atomic_add_fetch(value, operand, __ATOMIC_RELAXED); } @@ -109,8 +109,8 @@ FORCE_INLINE void atomic_add_relaxed(volatile uint32* value, uint32 increment) n FORCE_INLINE void atomic_sub_relaxed(volatile uint32* value, uint32 decrement) noexcept { __atomic_sub_fetch(value, decrement, __ATOMIC_RELAXED); } FORCE_INLINE void atomic_add_relaxed(volatile uint64* value, uint64 increment) noexcept { __atomic_add_fetch(value, increment, __ATOMIC_RELAXED); } FORCE_INLINE void atomic_sub_relaxed(volatile uint64* value, uint64 decrement) noexcept { __atomic_sub_fetch(value, decrement, __ATOMIC_RELAXED); } -FORCE_INLINE uint32 atomic_compare_exchange_weak_relaxed(volatile uint32* value, uint32* expected, uint32 desired) noexcept { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED); return *expected; } -FORCE_INLINE uint64 atomic_compare_exchange_weak_relaxed(volatile uint64* value, uint64* expected, uint64 desired) noexcept { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED); return *expected; } +FORCE_INLINE uint32 atomic_compare_exchange_strong_relaxed(volatile uint32* value, uint32* expected, uint32 desired) noexcept { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED); return *expected; } +FORCE_INLINE uint64 atomic_compare_exchange_strong_relaxed(volatile uint64* value, uint64* expected, uint64 desired) noexcept { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED); return *expected; } FORCE_INLINE uint8 atomic_fetch_add_relaxed(volatile uint8* value, uint8 operand) noexcept { return __atomic_add_fetch(value, operand, __ATOMIC_RELAXED); } FORCE_INLINE uint8 atomic_fetch_sub_relaxed(volatile uint8* value, uint8 operand) noexcept { return __atomic_sub_fetch(value, operand, __ATOMIC_RELAXED); } FORCE_INLINE uint16 atomic_fetch_add_relaxed(volatile uint16* value, uint16 operand) noexcept { return __atomic_add_fetch(value, operand, __ATOMIC_RELAXED); } @@ -161,7 +161,7 @@ FORCE_INLINE void atomic_add_acquire(volatile int32* value, int32 increment) noe FORCE_INLINE void atomic_sub_acquire(volatile int32* value, int32 decrement) noexcept { __atomic_sub_fetch(value, decrement, __ATOMIC_ACQUIRE); } FORCE_INLINE void atomic_add_acquire(volatile int64* value, int64 increment) noexcept { __atomic_add_fetch(value, increment, __ATOMIC_ACQUIRE); } FORCE_INLINE void atomic_sub_acquire(volatile int64* value, int64 decrement) noexcept { __atomic_sub_fetch(value, decrement, __ATOMIC_ACQUIRE); } -FORCE_INLINE f32 atomic_compare_exchange_weak_acquire(volatile f32* value, f32* expected, f32 desired) noexcept { +FORCE_INLINE f32 atomic_compare_exchange_strong_acquire(volatile f32* value, f32* expected, f32 desired) noexcept { volatile _atomic_32* value_as_union = (volatile _atomic_32*)value; _atomic_32* expected_as_union = (_atomic_32*)expected; _atomic_32 desired_as_union; @@ -169,12 +169,12 @@ FORCE_INLINE f32 atomic_compare_exchange_weak_acquire(volatile f32* value, f32* __atomic_compare_exchange_n( &value_as_union->l, &expected_as_union->l, desired_as_union.l, 0, - __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE + __ATOMIC_ACQUIRE, __ATOMIC_RELAXED ); return expected_as_union->f; } -FORCE_INLINE f64 atomic_compare_exchange_weak_acquire(volatile f64* value, f64* expected, f64 desired) noexcept { +FORCE_INLINE f64 atomic_compare_exchange_strong_acquire(volatile f64* value, f64* expected, f64 desired) noexcept { volatile _atomic_64* value_as_union = (volatile _atomic_64*)value; _atomic_64* expected_as_union = (_atomic_64*)expected; _atomic_64 desired_as_union; @@ -182,13 +182,13 @@ FORCE_INLINE f64 atomic_compare_exchange_weak_acquire(volatile f64* value, f64* __atomic_compare_exchange_n( &value_as_union->l, &expected_as_union->l, desired_as_union.l, 0, - __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE + __ATOMIC_ACQUIRE, __ATOMIC_RELAXED ); return expected_as_union->f; } -FORCE_INLINE int32 atomic_compare_exchange_weak_acquire(volatile int32* value, int32* expected, int32 desired) noexcept { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE); return *expected; } -FORCE_INLINE int64 atomic_compare_exchange_weak_acquire(volatile int64* value, int64* expected, int64 desired) noexcept { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE); return *expected; } +FORCE_INLINE int32 atomic_compare_exchange_strong_acquire(volatile int32* value, int32* expected, int32 desired) noexcept { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE); return *expected; } +FORCE_INLINE int64 atomic_compare_exchange_strong_acquire(volatile int64* value, int64* expected, int64 desired) noexcept { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE); return *expected; } FORCE_INLINE int8 atomic_fetch_add_acquire(volatile int8* value, int8 operand) noexcept { return __atomic_add_fetch(value, operand, __ATOMIC_ACQUIRE); } FORCE_INLINE int8 atomic_fetch_sub_acquire(volatile int8* value, int8 operand) noexcept { return __atomic_sub_fetch(value, operand, __ATOMIC_ACQUIRE); } FORCE_INLINE int16 atomic_fetch_add_acquire(volatile int16* value, int16 operand) noexcept { return __atomic_add_fetch(value, operand, __ATOMIC_ACQUIRE); } @@ -225,8 +225,8 @@ FORCE_INLINE void atomic_add_acquire(volatile uint32* value, uint32 increment) n FORCE_INLINE void atomic_sub_acquire(volatile uint32* value, uint32 decrement) noexcept { __atomic_sub_fetch(value, decrement, __ATOMIC_ACQUIRE); } FORCE_INLINE void atomic_add_acquire(volatile uint64* value, uint64 increment) noexcept { __atomic_add_fetch(value, increment, __ATOMIC_ACQUIRE); } FORCE_INLINE void atomic_sub_acquire(volatile uint64* value, uint64 decrement) noexcept { __atomic_sub_fetch(value, decrement, __ATOMIC_ACQUIRE); } -FORCE_INLINE uint32 atomic_compare_exchange_weak_acquire(volatile uint32* value, uint32* expected, uint32 desired) noexcept { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE); return *expected; } -FORCE_INLINE uint64 atomic_compare_exchange_weak_acquire(volatile uint64* value, uint64* expected, uint64 desired) noexcept { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE); return *expected; } +FORCE_INLINE uint32 atomic_compare_exchange_strong_acquire(volatile uint32* value, uint32* expected, uint32 desired) noexcept { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE); return *expected; } +FORCE_INLINE uint64 atomic_compare_exchange_strong_acquire(volatile uint64* value, uint64* expected, uint64 desired) noexcept { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE); return *expected; } FORCE_INLINE uint8 atomic_fetch_add_acquire(volatile uint8* value, uint8 operand) noexcept { return __atomic_add_fetch(value, operand, __ATOMIC_ACQUIRE); } FORCE_INLINE uint8 atomic_fetch_sub_acquire(volatile uint8* value, uint8 operand) noexcept { return __atomic_sub_fetch(value, operand, __ATOMIC_ACQUIRE); } FORCE_INLINE uint16 atomic_fetch_add_acquire(volatile uint16* value, uint16 operand) noexcept { return __atomic_add_fetch(value, operand, __ATOMIC_ACQUIRE); } @@ -282,7 +282,7 @@ FORCE_INLINE void atomic_add_release(volatile int32* value, int32 increment) noe FORCE_INLINE void atomic_sub_release(volatile int32* value, int32 decrement) noexcept { __atomic_sub_fetch(value, decrement, __ATOMIC_RELEASE); } FORCE_INLINE void atomic_add_release(volatile int64* value, int64 increment) noexcept { __atomic_add_fetch(value, increment, __ATOMIC_RELEASE); } FORCE_INLINE void atomic_sub_release(volatile int64* value, int64 decrement) noexcept { __atomic_sub_fetch(value, decrement, __ATOMIC_RELEASE); } -FORCE_INLINE f32 atomic_compare_exchange_weak_release(volatile f32* value, f32* expected, f32 desired) noexcept { +FORCE_INLINE f32 atomic_compare_exchange_strong_release(volatile f32* value, f32* expected, f32 desired) noexcept { volatile _atomic_32* value_as_union = (volatile _atomic_32*)value; _atomic_32* expected_as_union = (_atomic_32*)expected; _atomic_32 desired_as_union; @@ -290,12 +290,12 @@ FORCE_INLINE f32 atomic_compare_exchange_weak_release(volatile f32* value, f32* __atomic_compare_exchange_n( &value_as_union->l, &expected_as_union->l, desired_as_union.l, 0, - __ATOMIC_RELEASE, __ATOMIC_RELEASE + __ATOMIC_RELEASE, __ATOMIC_RELAXED ); return expected_as_union->f; } -FORCE_INLINE f64 atomic_compare_exchange_weak_release(volatile f64* value, f64* expected, f64 desired) noexcept { +FORCE_INLINE f64 atomic_compare_exchange_strong_release(volatile f64* value, f64* expected, f64 desired) noexcept { volatile _atomic_64* value_as_union = (volatile _atomic_64*)value; _atomic_64* expected_as_union = (_atomic_64*)expected; _atomic_64 desired_as_union; @@ -303,13 +303,13 @@ FORCE_INLINE f64 atomic_compare_exchange_weak_release(volatile f64* value, f64* __atomic_compare_exchange_n( &value_as_union->l, &expected_as_union->l, desired_as_union.l, 0, - __ATOMIC_RELEASE, __ATOMIC_RELEASE + __ATOMIC_RELEASE, __ATOMIC_RELAXED ); return expected_as_union->f; } -FORCE_INLINE int32 atomic_compare_exchange_weak_release(volatile int32* value, int32* expected, int32 desired) noexcept { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_RELEASE, __ATOMIC_RELEASE); return *expected; } -FORCE_INLINE int64 atomic_compare_exchange_weak_release(volatile int64* value, int64* expected, int64 desired) noexcept { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_RELEASE, __ATOMIC_RELEASE); return *expected; } +FORCE_INLINE int32 atomic_compare_exchange_strong_release(volatile int32* value, int32* expected, int32 desired) noexcept { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_RELEASE, __ATOMIC_RELEASE); return *expected; } +FORCE_INLINE int64 atomic_compare_exchange_strong_release(volatile int64* value, int64* expected, int64 desired) noexcept { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_RELEASE, __ATOMIC_RELEASE); return *expected; } FORCE_INLINE int8 atomic_fetch_add_release(volatile int8* value, int8 operand) noexcept { return __atomic_add_fetch(value, operand, __ATOMIC_RELEASE); } FORCE_INLINE int8 atomic_fetch_sub_release(volatile int8* value, int8 operand) noexcept { return __atomic_sub_fetch(value, operand, __ATOMIC_RELEASE); } FORCE_INLINE int16 atomic_fetch_add_release(volatile int16* value, int16 operand) noexcept { return __atomic_add_fetch(value, operand, __ATOMIC_RELEASE); } @@ -346,8 +346,8 @@ FORCE_INLINE void atomic_add_release(volatile uint32* value, uint32 increment) n FORCE_INLINE void atomic_sub_release(volatile uint32* value, uint32 decrement) noexcept { __atomic_sub_fetch(value, decrement, __ATOMIC_RELEASE); } FORCE_INLINE void atomic_add_release(volatile uint64* value, uint64 increment) noexcept { __atomic_add_fetch(value, increment, __ATOMIC_RELEASE); } FORCE_INLINE void atomic_sub_release(volatile uint64* value, uint64 decrement) noexcept { __atomic_sub_fetch(value, decrement, __ATOMIC_RELEASE); } -FORCE_INLINE uint32 atomic_compare_exchange_weak_release(volatile uint32* value, uint32* expected, uint32 desired) noexcept { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_RELEASE, __ATOMIC_RELEASE); return *expected; } -FORCE_INLINE uint64 atomic_compare_exchange_weak_release(volatile uint64* value, uint64* expected, uint64 desired) noexcept { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_RELEASE, __ATOMIC_RELEASE); return *expected; } +FORCE_INLINE uint32 atomic_compare_exchange_strong_release(volatile uint32* value, uint32* expected, uint32 desired) noexcept { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_RELEASE, __ATOMIC_RELAXED); return *expected; } +FORCE_INLINE uint64 atomic_compare_exchange_strong_release(volatile uint64* value, uint64* expected, uint64 desired) noexcept { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_RELEASE, __ATOMIC_RELAXED); return *expected; } FORCE_INLINE uint8 atomic_fetch_add_release(volatile uint8* value, uint8 operand) noexcept { return __atomic_add_fetch(value, operand, __ATOMIC_RELEASE); } FORCE_INLINE uint8 atomic_fetch_sub_release(volatile uint8* value, uint8 operand) noexcept { return __atomic_sub_fetch(value, operand, __ATOMIC_RELEASE); } FORCE_INLINE uint16 atomic_fetch_add_release(volatile uint16* value, uint16 operand) noexcept { return __atomic_add_fetch(value, operand, __ATOMIC_RELEASE); } @@ -403,7 +403,7 @@ FORCE_INLINE void atomic_add_acquire_release(volatile int32* value, int32 increm FORCE_INLINE void atomic_sub_acquire_release(volatile int32* value, int32 decrement) noexcept { __atomic_sub_fetch(value, decrement, __ATOMIC_SEQ_CST); } FORCE_INLINE void atomic_add_acquire_release(volatile int64* value, int64 increment) noexcept { __atomic_add_fetch(value, increment, __ATOMIC_SEQ_CST); } FORCE_INLINE void atomic_sub_acquire_release(volatile int64* value, int64 decrement) noexcept { __atomic_sub_fetch(value, decrement, __ATOMIC_SEQ_CST); } -FORCE_INLINE f32 atomic_compare_exchange_weak_acquire_release(volatile f32* value, f32* expected, f32 desired) noexcept { +FORCE_INLINE f32 atomic_compare_exchange_strong_acquire_release(volatile f32* value, f32* expected, f32 desired) noexcept { volatile _atomic_32* value_as_union = (volatile _atomic_32*)value; _atomic_32* expected_as_union = (_atomic_32*)expected; _atomic_32 desired_as_union; @@ -416,7 +416,7 @@ FORCE_INLINE f32 atomic_compare_exchange_weak_acquire_release(volatile f32* valu return expected_as_union->f; } -FORCE_INLINE f64 atomic_compare_exchange_weak_acquire_release(volatile f64* value, f64* expected, f64 desired) noexcept { +FORCE_INLINE f64 atomic_compare_exchange_strong_acquire_release(volatile f64* value, f64* expected, f64 desired) noexcept { volatile _atomic_64* value_as_union = (volatile _atomic_64*)value; _atomic_64* expected_as_union = (_atomic_64*)expected; _atomic_64 desired_as_union; @@ -429,8 +429,8 @@ FORCE_INLINE f64 atomic_compare_exchange_weak_acquire_release(volatile f64* valu return expected_as_union->f; } -FORCE_INLINE int32 atomic_compare_exchange_weak_acquire_release(volatile int32* value, int32* expected, int32 desired) noexcept { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); return *expected; } -FORCE_INLINE int64 atomic_compare_exchange_weak_acquire_release(volatile int64* value, int64* expected, int64 desired) noexcept { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); return *expected; } +FORCE_INLINE int32 atomic_compare_exchange_strong_acquire_release(volatile int32* value, int32* expected, int32 desired) noexcept { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); return *expected; } +FORCE_INLINE int64 atomic_compare_exchange_strong_acquire_release(volatile int64* value, int64* expected, int64 desired) noexcept { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); return *expected; } FORCE_INLINE int8 atomic_fetch_add_acquire_release(volatile int8* value, int8 operand) noexcept { return __atomic_add_fetch(value, operand, __ATOMIC_SEQ_CST); } FORCE_INLINE int8 atomic_fetch_sub_acquire_release(volatile int8* value, int8 operand) noexcept { return __atomic_sub_fetch(value, operand, __ATOMIC_SEQ_CST); } FORCE_INLINE int16 atomic_fetch_add_acquire_release(volatile int16* value, int16 operand) noexcept { return __atomic_add_fetch(value, operand, __ATOMIC_SEQ_CST); } @@ -467,8 +467,8 @@ FORCE_INLINE void atomic_add_acquire_release(volatile uint32* value, uint32 incr FORCE_INLINE void atomic_sub_acquire_release(volatile uint32* value, uint32 decrement) noexcept { __atomic_sub_fetch(value, decrement, __ATOMIC_SEQ_CST); } FORCE_INLINE void atomic_add_acquire_release(volatile uint64* value, uint64 increment) noexcept { __atomic_add_fetch(value, increment, __ATOMIC_SEQ_CST); } FORCE_INLINE void atomic_sub_acquire_release(volatile uint64* value, uint64 decrement) noexcept { __atomic_sub_fetch(value, decrement, __ATOMIC_SEQ_CST); } -FORCE_INLINE uint32 atomic_compare_exchange_weak_acquire_release(volatile uint32* value, uint32* expected, uint32 desired) noexcept { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); return *expected; } -FORCE_INLINE uint64 atomic_compare_exchange_weak_acquire_release(volatile uint64* value, uint64* expected, uint64 desired) noexcept { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); return *expected; } +FORCE_INLINE uint32 atomic_compare_exchange_strong_acquire_release(volatile uint32* value, uint32* expected, uint32 desired) noexcept { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED); return *expected; } +FORCE_INLINE uint64 atomic_compare_exchange_strong_acquire_release(volatile uint64* value, uint64* expected, uint64 desired) noexcept { __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED); return *expected; } FORCE_INLINE uint8 atomic_fetch_add_acquire_release(volatile uint8* value, uint8 operand) noexcept { return __atomic_add_fetch(value, operand, __ATOMIC_SEQ_CST); } FORCE_INLINE uint8 atomic_fetch_sub_acquire_release(volatile uint8* value, uint8 operand) noexcept { return __atomic_sub_fetch(value, operand, __ATOMIC_SEQ_CST); } FORCE_INLINE uint16 atomic_fetch_add_acquire_release(volatile uint16* value, uint16 operand) noexcept { return __atomic_add_fetch(value, operand, __ATOMIC_SEQ_CST); } diff --git a/database/Database.h b/database/Database.h index 3506570..4dc00ed 100755 --- a/database/Database.h +++ b/database/Database.h @@ -10,26 +10,31 @@ #define COMS_DATABASE_H #include "../stdlib/Types.h" -#include "../../EngineDependencies/sqlite/src/sqlite3.h" #include "DatabaseType.h" #include "DatabaseConnection.h" -inline -int db_open_sqlite(DatabaseConnection* con) -{ - int rc; - rc = sqlite3_open(con->host, &con->db_sqlite); +#if DB_MYSQL || DB_MARIA +#else + int32 db_open_maria(void*) { return 0; }; + void db_close_maria(void*) {}; +#endif - if (rc) { - return rc; - } +#if DB_PSQL +#else + int32 db_open_psql(void*) { return 0; }; + void db_close_psql(void*) {}; +#endif - return 0; -} +#if DB_SQLITE + #include "sqlite/SqliteDatabase.h" +#else + int32 db_open_sqlite(void*) { return 0; }; + void db_close_sqlite(void*) {}; +#endif inline -int db_open(DatabaseConnection* con) +int32 db_open(DatabaseConnection* con) { switch (con->type) { case DB_TYPE_SQLITE: { @@ -49,12 +54,6 @@ int db_open(DatabaseConnection* con) return 0; } -inline -void db_close_sqlite(DatabaseConnection* con) -{ - sqlite3_close(con->db_sqlite); -} - inline void db_close(DatabaseConnection* con) { diff --git a/database/DatabaseConnection.h b/database/DatabaseConnection.h index 92ffdf0..1ebb272 100755 --- a/database/DatabaseConnection.h +++ b/database/DatabaseConnection.h @@ -10,15 +10,10 @@ #define COMS_DATABASE_CONNECTION_H #include "../stdlib/Types.h" -#include "../../EngineDependencies/sqlite/src/sqlite3.h" - #include "DatabaseType.h" struct DatabaseConnection { - union { - sqlite3* db_sqlite; - sqlite3* db_pgsql; - }; + void* con; DatabaseType type; uint16 port; diff --git a/database/DatabaseType.h b/database/DatabaseType.h index 839e5b5..6083f99 100755 --- a/database/DatabaseType.h +++ b/database/DatabaseType.h @@ -9,7 +9,7 @@ #ifndef COMS_DATABASE_TYPE_H #define COMS_DATABASE_TYPE_H -enum DatabaseType { +enum DatabaseType : byte { DB_TYPE_SQLITE, DB_TYPE_MARIA, DB_TYPE_PSQL, diff --git a/database/sqlite/SqliteDatabase.h b/database/sqlite/SqliteDatabase.h new file mode 100644 index 0000000..c375a73 --- /dev/null +++ b/database/sqlite/SqliteDatabase.h @@ -0,0 +1,34 @@ +/** + * Jingga + * + * @copyright Jingga + * @license OMS License 2.0 + * @version 1.0.0 + * @link https://jingga.app + */ +#ifndef COMS_DATABASE_SQLITE_H +#define COMS_DATABASE_SQLITE_H + +#include "../../stdlib/Types.h" +#include "../../EngineDependencies/sqlite/src/sqlite3.h" + +inline +int32 db_open_sqlite(DatabaseConnection* con) +{ + int32 rc; + rc = sqlite3_open(con->host, &con->db_sqlite); + + if (rc) { + return rc; + } + + return 0; +} + +inline +void db_close_sqlite(DatabaseConnection* con) +{ + sqlite3_close(con->db_sqlite); +} + +#endif \ No newline at end of file diff --git a/encoding/Base64.h b/encoding/Base64.h new file mode 100644 index 0000000..e4b133f --- /dev/null +++ b/encoding/Base64.h @@ -0,0 +1,110 @@ +/** + * Jingga + * + * @copyright Jingga + * @license OMS License 2.0 + * @version 1.0.0 + * @link https://jingga.app + */ +#ifndef COMS_ENCODING_BASE64_H +#define COMS_ENCODING_BASE64_H + +#include "../stdlib/Types.h" +#include "../utils/StringUtils.h" +#include "Base64Definitions.h" + +void base64_encode(const byte* data, char* encoded_data, size_t data_length = 0) { + size_t output_length = 4 * ((data_length + 2) / 3); + + if (!data_length) { + // WARNING: This should only happen if the data is a char string + // Binary data is not allowed since it often has '\0' characters + data_length = str_length((const char *) data); + } + + size_t i = 0; + size_t j = 0; + while (i + 3 <= data_length) { + uint32 triple = ((uint32) data[i] << 16) | ((uint32) data[i + 1] << 8) | data[i + 2]; + + encoded_data[j++] = BASE64_CHARS[(triple >> 3 * 6) & 0x3F]; + encoded_data[j++] = BASE64_CHARS[(triple >> 2 * 6) & 0x3F]; + encoded_data[j++] = BASE64_CHARS[(triple >> 1 * 6) & 0x3F]; + encoded_data[j++] = BASE64_CHARS[(triple >> 0 * 6) & 0x3F]; + + i += 3; + } + + if (i < data_length) { + uint32 triple = ((uint32) data[i] << 16); + if (i + 1 < data_length) { + triple |= ((uint32) data[i + 1] << 8); + } + + encoded_data[j++] = BASE64_CHARS[(triple >> 18) & 0x3F]; + encoded_data[j++] = BASE64_CHARS[(triple >> 12) & 0x3F]; + encoded_data[j] = (i + 1 < data_length) ? BASE64_CHARS[(triple >> 6) & 0x3F] : '='; + encoded_data[j + 1] = '='; + } + + encoded_data[output_length] = '\0'; +} + +size_t base64_decode(const char* encoded_data, byte* data, size_t encoded_length = 0) { + if (!encoded_length) { + encoded_length = str_length(encoded_data); + } + + size_t output_length = encoded_length / 4 * 3; + int32 padding = 0; + + if (data[encoded_length - 1] == '=') { + --output_length; + ++padding; + + if (data[encoded_length - 2] == '=') { + --output_length; + ++padding; + } + } + + size_t complete_blocks = (encoded_length - padding) / 4; + size_t i, j; + + for (i = 0, j = 0; i < complete_blocks * 4; i += 4, j += 3) { + uint32 sextet_a = BASE64_LOOKUP[(byte) encoded_data[i]]; + uint32 sextet_b = BASE64_LOOKUP[(byte) encoded_data[i + 1]]; + uint32 sextet_c = BASE64_LOOKUP[(byte) encoded_data[i + 2]]; + uint32 sextet_d = BASE64_LOOKUP[(byte) encoded_data[i + 3]]; + + uint32 triple = (sextet_a << 18) | (sextet_b << 12) | (sextet_c << 6) | sextet_d; + + data[j] = (triple >> 16) & 0xFF; + data[j + 1] = (triple >> 8) & 0xFF; + data[j + 2] = triple & 0xFF; + } + + if (padding > 0) { + uint32 sextet_a = BASE64_LOOKUP[(byte) encoded_data[i]]; + uint32 sextet_b = BASE64_LOOKUP[(byte) encoded_data[i + 1]]; + uint32 sextet_c = (padding > 1) ? 0 : BASE64_LOOKUP[(byte) encoded_data[i + 2]]; + uint32 sextet_d = 0; + + uint32 triple = (sextet_a << 18) | (sextet_b << 12) | (sextet_c << 6) | sextet_d; + + data[j + 1] = (triple >> 16) & 0xFF; + if (padding == 1) { + data[j + 2] = (triple >> 8) & 0xFF; + } + } + + return output_length; +} + +#if __aarch64__ + #include "Base64SimdArm.h" +#else + #include "Base64SimdX86.h" +#endif + +#endif \ No newline at end of file diff --git a/encoding/Base64Definitions.h b/encoding/Base64Definitions.h new file mode 100644 index 0000000..dfcd811 --- /dev/null +++ b/encoding/Base64Definitions.h @@ -0,0 +1,46 @@ +/** + * Jingga + * + * @copyright Jingga + * @license OMS License 2.0 + * @version 1.0.0 + * @link https://jingga.app + */ +#ifndef COMS_ENCODING_BASE64_DEFINITIONS_H +#define COMS_ENCODING_BASE64_DEFINITIONS_H + +#include "../stdlib/Types.h" + +static const char BASE64_CHARS[] = + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; + +static const int8 BASE64_LOOKUP[256] = { + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,62,-1,-1,-1,63, + 52,53,54,55,56,57,58,59,60,61,-1,-1,-1,-1,-1,-1, + -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14, + 15,16,17,18,19,20,21,22,23,24,25,-1,-1,-1,-1,-1, + -1,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40, + 41,42,43,44,45,46,47,48,49,50,51,-1,-1,-1,-1,-1, + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 +}; + +inline +size_t base64_encoded_length(size_t data_length) { + return 4 * ((data_length + 2) / 3); +} + +inline +size_t base64_encoded_length(size_t encoded_length) { + return encoded_length / 4 * 3; +} + +#endif \ No newline at end of file diff --git a/encoding/Base64SimdArm.h b/encoding/Base64SimdArm.h new file mode 100644 index 0000000..ed2ff21 --- /dev/null +++ b/encoding/Base64SimdArm.h @@ -0,0 +1,271 @@ +/** + * Jingga + * + * @copyright Jingga + * @license OMS License 2.0 + * @version 1.0.0 + * @link https://jingga.app + */ +#ifndef COMS_ENCODING_BASE64_SIMD_ARM_H +#define COMS_ENCODING_BASE64_SIMD_ARM_H + +#include "../stdlib/Types.h" +#include "../stdlib/Simd.h" +#include "../utils/StringUtils.h" +#include "Base64Definitions.h" + +#ifdef __ARM_FEATURE_SVE + #include +#endif + +#ifdef __ARM_NEON + #include +#endif + +void base64_encode_simd(const byte* data, char* encoded_data, size_t data_length = 0, int32 steps = 16) { + if (!data_length) { + // WARNING: This should only happen if the data is a char string + // Binary data is not allowed since it often has '\0' characters + data_length = str_length((const char *) data); + } + + steps = intrin_validate_steps(data, steps); + steps = intrin_validate_steps((const byte*) encoded_data, steps); + + size_t i = 0; + + #ifdef __ARM_FEATURE_SVE + if (steps >= 8) { + const size_t sve_vec_bytes = steps; + const size_t triple_per_vec = sve_vec_bytes / 3; + + for (; i + (triple_per_vec * 3 - 1) < data_length; i += triple_per_vec * 3) { + svuint8_t in = svld1_u8(svptrue_b8(), data + i); + + svuint32_t triple0 = svreinterpret_u32(svld1_u8(svptrue_b8(), data + i)); + svuint32_t triple1 = svreinterpret_u32(svld1_u8(svptrue_b8(), data + i + 4)); + svuint32_t triple2 = svreinterpret_u32(svld1_u8(svptrue_b8(), data + i + 8)); + + svuint32_t combined = svorr_u32_x(svptrue_b32(), + svlsl_n_u32_x(svptrue_b32(), triple0, 16), + svorr_u32_x(svptrue_b32(), + svlsl_n_u32_x(svptrue_b32(), triple1, 8), + triple2)); + + svuint32_t idx0 = svand_u32_x(svptrue_b32(), svlsr_n_u32_x(svptrue_b32(), combined, 18), 0x3F); + svuint32_t idx1 = svand_u32_x(svptrue_b32(), svlsr_n_u32_x(svptrue_b32(), combined, 12), 0x3F); + svuint32_t idx2 = svand_u32_x(svptrue_b32(), svlsr_n_u32_x(svptrue_b32(), combined, 6), 0x3F); + svuint32_t idx3 = svand_u32_x(svptrue_b32(), combined, 0x3F); + + svuint8_t chars0 = svld1_u8(svptrue_b8(), (const byte*)BASE64_CHARS); + svuint8_t enc0 = svtbl_u8(chars0, svreinterpret_u8_u32(idx0)); + svuint8_t enc1 = svtbl_u8(chars0, svreinterpret_u8_u32(idx1)); + svuint8_t enc2 = svtbl_u8(chars0, svreinterpret_u8_u32(idx2)); + svuint8_t enc3 = svtbl_u8(chars0, svreinterpret_u8_u32(idx3)); + + svuint8_t encoded = svzip1_u8(svzip1_u8(enc0, enc1), svzip1_u8(enc2, enc3)); + + svst1_u8(svptrue_b8(), (uint8_t*)(encoded_data + (i/3*4)), encoded); + } + + steps = 1; + } + #endif + + #ifdef __ARM_NEON + if (steps >= 4) { + steps = 4; + + const uint8x16_t base64_table = vld1q_u8((const byte*)BASE64_CHARS); + const uint8x16_t mask_3F = vdupq_n_u8(0x3F); + + for (; i + 11 < data_length; i += 12) { + uint8x16_t in = vld1q_u8(data + i); + + uint32x4_t triple0 = vreinterpretq_u32_u8(vshrq_n_u8(in, 2)); + uint32x4_t triple1 = vreinterpretq_u32_u8(vshrq_n_u8(vextq_u8(in, in, 1), 4)); + uint32x4_t triple2 = vreinterpretq_u32_u8(vshrq_n_u8(vextq_u8(in, in, 2), 6)); + + uint32x4_t combined = vorrq_u32( + vshlq_n_u32(triple0, 16), + vorrq_u32( + vshlq_n_u32(triple1, 8), + triple2)); + + uint32x4_t idx0 = vandq_u32(vshrq_n_u32(combined, 18), 0x3F); + uint32x4_t idx1 = vandq_u32(vshrq_n_u32(combined, 12), 0x3F); + uint32x4_t idx2 = vandq_u32(vshrq_n_u32(combined, 6), 0x3F); + uint32x4_t idx3 = vandq_u32(combined, 0x3F); + + uint8x16_t enc0 = vqtbl1q_u8(base64_table, vreinterpretq_u8_u32(idx0)); + uint8x16_t enc1 = vqtbl1q_u8(base64_table, vreinterpretq_u8_u32(idx1)); + uint8x16_t enc2 = vqtbl1q_u8(base64_table, vreinterpretq_u8_u32(idx2)); + uint8x16_t enc3 = vqtbl1q_u8(base64_table, vreinterpretq_u8_u32(idx3)); + + uint8x16x2_t zip01 = vzipq_u8(enc0, enc1); + uint8x16x2_t zip23 = vzipq_u8(enc2, enc3); + uint8x16_t encoded = vcombine_u8(vget_low_u8(zip01.val[0]), vget_low_u8(zip23.val[0])); + + vst1q_u8((uint8_t*)(encoded_data + (i/3*4)), encoded); + } + } + #endif + + for (; i + 3 <= data_length; i += 3) { + uint32 triple = ((uint32)data[i] << 16) | ((uint32)data[i + 1] << 8) | data[i + 2]; + + encoded_data[i/3*4 + 0] = BASE64_CHARS[(triple >> 18) & 0x3F]; + encoded_data[i/3*4 + 1] = BASE64_CHARS[(triple >> 12) & 0x3F]; + encoded_data[i/3*4 + 2] = BASE64_CHARS[(triple >> 6) & 0x3F]; + encoded_data[i/3*4 + 3] = BASE64_CHARS[triple & 0x3F]; + } + + if (i < data_length) { + uint32 triple = ((uint32)data[i] << 16); + if (i + 1 < data_length) { + triple |= ((uint32)data[i + 1] << 8); + } + + size_t j = i/3*4; + encoded_data[j++] = BASE64_CHARS[(triple >> 18) & 0x3F]; + encoded_data[j++] = BASE64_CHARS[(triple >> 12) & 0x3F]; + encoded_data[j++] = (i + 1 < data_length) ? BASE64_CHARS[(triple >> 6) & 0x3F] : '='; + encoded_data[j] = '='; + } + + encoded_data[base64_encoded_length(data_length)] = '\0'; +} + +size_t base64_decode_simd(const char* encoded_data, byte* data, size_t encoded_length = 0, int32 steps = 16) { + if (!encoded_length) { + encoded_length = str_length(encoded_data); + } + + size_t padding = 0; + if (encoded_data[encoded_length - 1] == '=') { + ++padding; + if (encoded_data[encoded_length - 2] == '=') { + ++padding; + } + } + + size_t output_length = (encoded_length / 4) * 3 - padding; + size_t complete_blocks = (encoded_length - padding) / 4; + + steps = intrin_validate_steps((const byte*) encoded_data, steps); + steps = intrin_validate_steps(data, steps); + + size_t i = 0; + size_t j = 0; + + #ifdef __ARM_FEATURE_SVE + if (steps >= 8) { + const size_t sve_vec_bytes = steps; + const size_t quad_per_vec = sve_vec_bytes / 4; + + for (; i + (quad_per_vec * 4 - 1) < complete_blocks * 4; i += quad_per_vec * 4, j += quad_per_vec * 3) { + + svuint8_t in = svld1_u8(svptrue_b8(), (const byte*)(encoded_data + i)); + + byte chars[sve_vec_bytes]; + svst1_u8(svptrue_b8(), chars, in); + + uint32 sextets[quad_per_vec]; + for (size_t k = 0; k < quad_per_vec; ++k) { + sextets[k] = + (BASE64_LOOKUP[chars[k*4 + 0]] << 18) | + (BASE64_LOOKUP[chars[k*4 + 1]] << 12) | + (BASE64_LOOKUP[chars[k*4 + 2]] << 6) | + (BASE64_LOOKUP[chars[k*4 + 3]]); + } + + byte output_bytes[quad_per_vec * 3]; + for (size_t k = 0; k < quad_per_vec; ++k) { + output_bytes[k*3 + 0] = (sextets[k] >> 16) & 0xFF; + output_bytes[k*3 + 1] = (sextets[k] >> 8) & 0xFF; + output_bytes[k*3 + 2] = sextets[k] & 0xFF; + } + + + svst1_u8(svptrue_b8(), data + j, svld1_u8(svptrue_b8(), output_bytes)); + } + + steps = 1; + } + #endif + + #ifdef __ARM_NEON + if (steps >= 4) { + steps = 4; + + const uint8x16_t mask_3F = vdupq_n_u8(0x3F); + const uint8x16_t shuffle_mask = { + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2 + }; + + for (; i + 15 < complete_blocks * 4; i += 16, j += 12) { + + uint8x16_t in = vld1q_u8((const byte*)(encoded_data + i)); + + byte chars[16]; + vst1q_u8(chars, in); + + uint32 sextets[4]; + for (int k = 0; k < 4; ++k) { + sextets[k] = + (BASE64_LOOKUP[chars[k*4 + 0]] << 18) | + (BASE64_LOOKUP[chars[k*4 + 1]] << 12) | + (BASE64_LOOKUP[chars[k*4 + 2]] << 6) | + (BASE64_LOOKUP[chars[k*4 + 3]]); + } + + uint8x16_t bytes0 = vreinterpretq_u8_u32(vshrq_n_u32(vld1q_u32(sextets), 16)); + uint8x16_t bytes1 = vreinterpretq_u8_u32(vshrq_n_u32(vld1q_u32(sextets), 8)); + uint8x16_t bytes2 = vreinterpretq_u8_u32(vld1q_u32(sextets)); + + bytes0 = vandq_u8(bytes0, vdupq_n_u8(0xFF)); + bytes1 = vandq_u8(bytes1, vdupq_n_u8(0xFF)); + bytes2 = vandq_u8(bytes2, vdupq_n_u8(0xFF)); + + + uint8x16_t packed = vqtbl1q_u8(vcombine_u8( + vget_low_u8(bytes0), vget_low_u8(bytes1)), shuffle_mask); + + vst1q_u8(data + j, packed); + + vst1_u8(data + j + 8, vget_low_u8(bytes2)); + } + } + #endif + + for (; i < complete_blocks * 4; i += 4, j += 3) { + uint32 sextet_a = BASE64_LOOKUP[(byte) encoded_data[i]]; + uint32 sextet_b = BASE64_LOOKUP[(byte) encoded_data[i + 1]]; + uint32 sextet_c = BASE64_LOOKUP[(byte) encoded_data[i + 2]]; + uint32 sextet_d = BASE64_LOOKUP[(byte) encoded_data[i + 3]]; + + uint32 triple = (sextet_a << 18) | (sextet_b << 12) | (sextet_c << 6) | sextet_d; + + data[j] = (triple >> 16) & 0xFF; + data[j + 1] = (triple >> 8) & 0xFF; + data[j + 2] = triple & 0xFF; + } + + if (padding > 0) { + uint32 sextet_a = BASE64_LOOKUP[(byte) encoded_data[i]]; + uint32 sextet_b = BASE64_LOOKUP[(byte) encoded_data[i + 1]]; + uint32 sextet_c = (padding > 1) ? 0 : BASE64_LOOKUP[(byte) encoded_data[i + 2]]; + uint32 sextet_d = 0; + + uint32 triple = (sextet_a << 18) | (sextet_b << 12) | (sextet_c << 6) | sextet_d; + + data[j] = (triple >> 16) & 0xFF; + if (padding == 1) { + data[j + 1] = (triple >> 8) & 0xFF; + } + } + + return output_length; +} + +#endif \ No newline at end of file diff --git a/encoding/Base64SimdX86.h b/encoding/Base64SimdX86.h new file mode 100644 index 0000000..d558f37 --- /dev/null +++ b/encoding/Base64SimdX86.h @@ -0,0 +1,405 @@ +/** + * Jingga + * + * @copyright Jingga + * @license OMS License 2.0 + * @version 1.0.0 + * @link https://jingga.app + */ +#ifndef COMS_ENCODING_BASE64_SIMD_X86_H +#define COMS_ENCODING_BASE64_SIMD_X86_H + +#include +#include "../stdlib/Types.h" +#include "../stdlib/Simd.h" +#include "../utils/StringUtils.h" +#include "Base64Definitions.h" + +void base64_encode_simd(const byte* data, char* encoded_data, size_t data_length = 0, int32 steps = 16) { + if (!data_length) { + // WARNING: This should only happen if the data is a char string + // Binary data is not allowed since it often has '\0' characters + data_length = str_length((const char *) data); + } + + steps = intrin_validate_steps(data, steps); + steps = intrin_validate_steps((const byte*) encoded_data, steps); + + size_t i = 0; + + #ifdef __AVX512F__ + if (steps >= 16) { + const __m512i mask0 = _mm512_set1_epi32(0x00FC0000); + const __m512i mask1 = _mm512_set1_epi32(0x0003F000); + const __m512i mask2 = _mm512_set1_epi32(0x00000FC0); + const __m512i mask3 = _mm512_set1_epi32(0x0000003F); + + const __m512i shift0 = _mm512_set1_epi32(18); + const __m512i shift1 = _mm512_set1_epi32(12); + const __m512i shift2 = _mm512_set1_epi32(6); + const __m512i shift3 = _mm512_set1_epi32(0); + + const __m512i shuffle_mask = _mm512_set_epi8( + 5,4,6,5,7,6,8,7,9,8,10,9,11,10,12,11, + 1,0,2,1,3,2,4,3,5,4,6,5,7,6,8,7, + 5,4,6,5,7,6,8,7,9,8,10,9,11,10,12,11, + 1,0,2,1,3,2,4,3,5,4,6,5,7,6,8,7 + ); + + const __m512i permute_mask = _mm512_set_epi32(15,13,11,9,7,5,3,1,14,12,10,8,6,4,2,0); + + for (; i + 47 < data_length; i += 48) { + __m512i in = _mm512_loadu_si512((const __m512i*)(data + i)); + + __m512i shuffled = _mm512_shuffle_epi8(in, shuffle_mask); + + __m512i permuted = _mm512_permutexvar_epi32(permute_mask, shuffled); + + __m512i indices0 = _mm512_srlv_epi32(permuted, shift0); + __m512i indices1 = _mm512_srlv_epi32(permuted, shift1); + __m512i indices2 = _mm512_srlv_epi32(permuted, shift2); + __m512i indices3 = _mm512_srlv_epi32(permuted, shift3); + + indices0 = _mm512_and_si512(indices0, mask0); + indices1 = _mm512_and_si512(indices1, mask1); + indices2 = _mm512_and_si512(indices2, mask2); + indices3 = _mm512_and_si512(indices3, mask3); + + __m512i indices = _mm512_or_si512( + _mm512_or_si512(indices0, indices1), + _mm512_or_si512(indices2, indices3) + ); + + alignas(64) uint32 idx[16]; + _mm512_store_si512((__m512i*)idx, indices); + + for (int32 k = 0; k < 16; ++k) { + encoded_data[i/3*4 + k*4 + 0] = BASE64_CHARS[(idx[k] >> 18) & 0x3F]; + encoded_data[i/3*4 + k*4 + 1] = BASE64_CHARS[(idx[k] >> 12) & 0x3F]; + encoded_data[i/3*4 + k*4 + 2] = BASE64_CHARS[(idx[k] >> 6) & 0x3F]; + encoded_data[i/3*4 + k*4 + 3] = BASE64_CHARS[(idx[k] >> 0) & 0x3F]; + } + } + + steps = 1; + } else + #endif + + #ifdef __AVX2__ + if (steps >= 8) { + const __m256i mask0 = _mm256_set1_epi32(0x00FC0000); + const __m256i mask1 = _mm256_set1_epi32(0x0003F000); + const __m256i mask2 = _mm256_set1_epi32(0x00000FC0); + const __m256i mask3 = _mm256_set1_epi32(0x0000003F); + + const __m256i shift0 = _mm256_set1_epi32(18); + const __m256i shift1 = _mm256_set1_epi32(12); + const __m256i shift2 = _mm256_set1_epi32(6); + const __m256i shift3 = _mm256_set1_epi32(0); + + const __m256i shuffle_mask = _mm256_set_epi8( + 5,4,6,5,7,6,8,7,9,8,10,9,11,10,12,11, + 1,0,2,1,3,2,4,3,5,4,6,5,7,6,8,7 + ); + + const __m256i permute_mask = _mm256_set_epi32(7,5,3,1,6,4,2,0); + + for (; i + 23 < data_length; i += 24) { + __m256i in = _mm256_loadu_si256((const __m256i*)(data + i)); + + __m256i shuffled = _mm256_shuffle_epi8(in, shuffle_mask); + + __m256i permuted = _mm256_permutevar8x32_epi32(shuffled, permute_mask); + + __m256i indices0 = _mm256_srlv_epi32(permuted, shift0); + __m256i indices1 = _mm256_srlv_epi32(permuted, shift1); + __m256i indices2 = _mm256_srlv_epi32(permuted, shift2); + __m256i indices3 = _mm256_srlv_epi32(permuted, shift3); + + indices0 = _mm256_and_si256(indices0, mask0); + indices1 = _mm256_and_si256(indices1, mask1); + indices2 = _mm256_and_si256(indices2, mask2); + indices3 = _mm256_and_si256(indices3, mask3); + + __m256i indices = _mm256_or_si256( + _mm256_or_si256(indices0, indices1), + _mm256_or_si256(indices2, indices3) + ); + + alignas(32) uint32 idx[8]; + _mm256_store_si256((__m256i*)idx, indices); + + for (int32 k = 0; k < 8; ++k) { + encoded_data[i/3*4 + k*4 + 0] = BASE64_CHARS[(idx[k] >> 18) & 0x3F]; + encoded_data[i/3*4 + k*4 + 1] = BASE64_CHARS[(idx[k] >> 12) & 0x3F]; + encoded_data[i/3*4 + k*4 + 2] = BASE64_CHARS[(idx[k] >> 6) & 0x3F]; + encoded_data[i/3*4 + k*4 + 3] = BASE64_CHARS[(idx[k] >> 0) & 0x3F]; + } + } + + steps = 1; + } else + #endif + + #ifdef __SSE4_2__ + if (steps >= 4) { + const __m128i mask0 = _mm_set1_epi32(0x00FC0000); + const __m128i mask1 = _mm_set1_epi32(0x0003F000); + const __m128i mask2 = _mm_set1_epi32(0x00000FC0); + const __m128i mask3 = _mm_set1_epi32(0x0000003F); + + const __m128i shift0 = _mm_set1_epi32(18); + const __m128i shift1 = _mm_set1_epi32(12); + const __m128i shift2 = _mm_set1_epi32(6); + const __m128i shift3 = _mm_set1_epi32(0); + + const __m128i shuffle_mask = _mm_set_epi8( + 5,4,6,5,7,6,8,7,9,8,10,9,11,10,12,11 + ); + + for (; i + 11 < data_length; i += 12) { + __m128i in = _mm_loadu_si128((const __m128i*)(data + i)); + + __m128i shuffled = _mm_shuffle_epi8(in, shuffle_mask); + + __m128i indices0 = _mm_srlv_epi32(shuffled, shift0); + __m128i indices1 = _mm_srlv_epi32(shuffled, shift1); + __m128i indices2 = _mm_srlv_epi32(shuffled, shift2); + __m128i indices3 = _mm_srlv_epi32(shuffled, shift3); + + indices0 = _mm_and_si128(indices0, mask0); + indices1 = _mm_and_si128(indices1, mask1); + indices2 = _mm_and_si128(indices2, mask2); + indices3 = _mm_and_si128(indices3, mask3); + + __m128i indices = _mm_or_si128(_mm_or_si128(indices0, indices1), _mm_or_si128(indices2, indices3)); + + alignas(16) uint32 idx[4]; + _mm_store_si128((__m128i*)idx, indices); + + for (int32 k = 0; k < 4; ++k) { + encoded_data[i/3*4 + k*4 + 0] = BASE64_CHARS[(idx[k] >> 18) & 0x3F]; + encoded_data[i/3*4 + k*4 + 1] = BASE64_CHARS[(idx[k] >> 12) & 0x3F]; + encoded_data[i/3*4 + k*4 + 2] = BASE64_CHARS[(idx[k] >> 6) & 0x3F]; + encoded_data[i/3*4 + k*4 + 3] = BASE64_CHARS[(idx[k] >> 0) & 0x3F]; + } + } + } + #endif + + for (; i + 3 <= data_length; i += 3) { + uint32 triple = ((uint32)data[i] << 16) | ((uint32)data[i + 1] << 8) | data[i + 2]; + + encoded_data[i/3*4 + 0] = BASE64_CHARS[(triple >> 18) & 0x3F]; + encoded_data[i/3*4 + 1] = BASE64_CHARS[(triple >> 12) & 0x3F]; + encoded_data[i/3*4 + 2] = BASE64_CHARS[(triple >> 6) & 0x3F]; + encoded_data[i/3*4 + 3] = BASE64_CHARS[triple & 0x3F]; + } + + if (i < data_length) { + uint32 triple = ((uint32)data[i] << 16); + if (i + 1 < data_length) { + triple |= ((uint32)data[i + 1] << 8); + } + + size_t j = i/3*4; + encoded_data[j++] = BASE64_CHARS[(triple >> 18) & 0x3F]; + encoded_data[j++] = BASE64_CHARS[(triple >> 12) & 0x3F]; + encoded_data[j++] = (i + 1 < data_length) ? BASE64_CHARS[(triple >> 6) & 0x3F] : '='; + encoded_data[j] = '='; + } + + encoded_data[base64_encoded_length(data_length)] = '\0'; +} + +size_t base64_decode_simd(const char* encoded_data, byte* data, size_t encoded_length, int32 steps = 16) { + if (!encoded_length) { + encoded_length = str_length(encoded_data); + } + + size_t padding = 0; + if (encoded_data[encoded_length - 1] == '=') { + ++padding; + + if (encoded_data[encoded_length - 2] == '=') { + ++padding; + } + } + + size_t output_length = (encoded_length / 4) * 3 - padding; + size_t complete_blocks = (encoded_length - padding) / 4; + + steps = intrin_validate_steps((const byte*)encoded_data, steps); + steps = intrin_validate_steps(data, steps); + + size_t i = 0; + size_t j = 0; + + #ifdef __AVX512F__ + if (steps >= 16) { + steps = 16; + + const __m512i mask6bits = _mm512_set1_epi32(0x3F); + const __m512i shuffle_mask = _mm512_set_epi8( + -1,-1,-1,-1,-1,-1,11,10,9,8,7,6,5,4,3,2, + -1,-1,-1,-1,-1,-1,11,10,9,8,7,6,5,4,3,2, + -1,-1,-1,-1,-1,-1,11,10,9,8,7,6,5,4,3,2, + -1,-1,-1,-1,-1,-1,11,10,9,8,7,6,5,4,3,2 + ); + + const __m512i permute_mask = _mm512_set_epi32(0,0,0,0,0,0,0,0,14,12,10,8,6,4,2,0); + + for (; i + 63 < complete_blocks * 4; i += 64, j += 48) { + __m512i in = _mm512_loadu_si512((const __m512i*)(encoded_data + i)); + + alignas(64) byte chars[64]; + _mm512_store_si512((__m512i*)chars, in); + + alignas(64) uint32 sextets[16]; + for (int32 k = 0; k < 16; ++k) { + sextets[k] = + (BASE64_LOOKUP[(byte)chars[k*4 + 0]] << 18) | + (BASE64_LOOKUP[(byte)chars[k*4 + 1]] << 12) | + (BASE64_LOOKUP[(byte)chars[k*4 + 2]] << 6) | + (BASE64_LOOKUP[(byte)chars[k*4 + 3]]); + } + + __m512i sextet_vec = _mm512_load_si512((const __m512i*)sextets); + + __m512i bytes0 = _mm512_srli_epi32(sextet_vec, 16); + __m512i bytes1 = _mm512_srli_epi32(sextet_vec, 8); + __m512i bytes2 = sextet_vec; + + bytes0 = _mm512_and_si512(bytes0, _mm512_set1_epi32(0xFF)); + bytes1 = _mm512_and_si512(bytes1, _mm512_set1_epi32(0xFF)); + bytes2 = _mm512_and_si512(bytes2, _mm512_set1_epi32(0xFF)); + + __m512i packed = _mm512_shuffle_epi8(_mm512_packus_epi32(bytes0, bytes1), shuffle_mask); + packed = _mm512_permutexvar_epi32(permute_mask, packed); + + _mm512_storeu_si512((__m512i*)(data + j), packed); + } + + steps = 1; + } + #endif + + #ifdef __AVX2__ + if (steps >= 8) { + steps = 8; + + const __m256i mask6bits = _mm256_set1_epi32(0x3F); + const __m256i shuffle_mask = _mm256_set_epi8( + -1,-1,-1,-1,-1,-1,11,10,9,8,7,6,5,4,3,2, + -1,-1,-1,-1,-1,-1,11,10,9,8,7,6,5,4,3,2 + ); + + const __m256i permute_mask = _mm256_set_epi32(0,0,0,0,6,4,2,0); + + for (; i + 31 < complete_blocks * 4; i += 32, j += 24) { + __m256i in = _mm256_loadu_si256((const __m256i*)(encoded_data + i)); + + alignas(32) byte chars[32]; + _mm256_store_si256((__m256i*)chars, in); + + alignas(32) uint32 sextets[8]; + for (int32 k = 0; k < 8; ++k) { + sextets[k] = + (BASE64_LOOKUP[(byte)chars[k*4 + 0]] << 18) | + (BASE64_LOOKUP[(byte)chars[k*4 + 1]] << 12) | + (BASE64_LOOKUP[(byte)chars[k*4 + 2]] << 6) | + (BASE64_LOOKUP[(byte)chars[k*4 + 3]]); + } + + __m256i sextet_vec = _mm256_load_si256((const __m256i*)sextets); + + __m256i bytes0 = _mm256_srli_epi32(sextet_vec, 16); + __m256i bytes1 = _mm256_srli_epi32(sextet_vec, 8); + __m256i bytes2 = sextet_vec; + + bytes0 = _mm256_and_si256(bytes0, _mm256_set1_epi32(0xFF)); + bytes1 = _mm256_and_si256(bytes1, _mm256_set1_epi32(0xFF)); + bytes2 = _mm256_and_si256(bytes2, _mm256_set1_epi32(0xFF)); + + __m256i packed = _mm256_shuffle_epi8(_mm256_packus_epi32(bytes0, bytes1), shuffle_mask); + packed = _mm256_permutevar8x32_epi32(packed, permute_mask); + + _mm256_storeu_si256((__m256i*)(data + j), packed); + } + + steps = 1; + } + #endif + + #ifdef __SSE4_2__ + if (steps >= 4) { + steps = 4; + + const __m128i mask6bits = _mm_set1_epi32(0x3F); + const __m128i shuffle_mask = _mm_set_epi8( + -1,-1,-1,-1,-1,-1,11,10,9,8,7,6,5,4,3,2 + ); + + for (; i + 15 < complete_blocks * 4; i += 16, j += 12) { + __m128i in = _mm_loadu_si128((const __m128i*)(encoded_data + i)); + + alignas(16) byte chars[16]; + _mm_store_si128((__m128i*)chars, in); + + alignas(16) uint32 sextets[4]; + for (int32 k = 0; k < 4; ++k) { + sextets[k] = + (BASE64_LOOKUP[(byte)chars[k*4 + 0]] << 18) | + (BASE64_LOOKUP[(byte)chars[k*4 + 1]] << 12) | + (BASE64_LOOKUP[(byte)chars[k*4 + 2]] << 6) | + (BASE64_LOOKUP[(byte)chars[k*4 + 3]]); + } + + __m128i sextet_vec = _mm_load_si128((const __m128i*)sextets); + + __m128i bytes0 = _mm_srli_epi32(sextet_vec, 16); + __m128i bytes1 = _mm_srli_epi32(sextet_vec, 8); + __m128i bytes2 = sextet_vec; + + bytes0 = _mm_and_si128(bytes0, _mm_set1_epi32(0xFF)); + bytes1 = _mm_and_si128(bytes1, _mm_set1_epi32(0xFF)); + bytes2 = _mm_and_si128(bytes2, _mm_set1_epi32(0xFF)); + + __m128i packed = _mm_shuffle_epi8(_mm_packus_epi32(bytes0, bytes1), shuffle_mask); + + _mm_storeu_si128((__m128i*)(data + j), packed); + } + } + #endif + + for (; i < complete_blocks * 4; i += 4, j += 3) { + uint32 sextet_a = BASE64_LOOKUP[(byte) encoded_data[i]]; + uint32 sextet_b = BASE64_LOOKUP[(byte) encoded_data[i + 1]]; + uint32 sextet_c = BASE64_LOOKUP[(byte) encoded_data[i + 2]]; + uint32 sextet_d = BASE64_LOOKUP[(byte) encoded_data[i + 3]]; + + uint32 triple = (sextet_a << 18) | (sextet_b << 12) | (sextet_c << 6) | sextet_d; + + data[j] = (triple >> 16) & 0xFF; + data[j + 1] = (triple >> 8) & 0xFF; + data[j + 2] = triple & 0xFF; + } + + if (padding > 0) { + uint32 sextet_a = BASE64_LOOKUP[(byte) encoded_data[i]]; + uint32 sextet_b = BASE64_LOOKUP[(byte) encoded_data[i + 1]]; + uint32 sextet_c = (padding > 1) ? 0 : BASE64_LOOKUP[(byte) encoded_data[i + 2]]; + uint32 sextet_d = 0; + + uint32 triple = (sextet_a << 18) | (sextet_b << 12) | (sextet_c << 6) | sextet_d; + + data[j] = (triple >> 16) & 0xFF; + if (padding == 1) { + data[j + 1] = (triple >> 8) & 0xFF; + } + } + + return output_length; +} + +#endif \ No newline at end of file diff --git a/entity/EntityComponentSystem.h b/entity/EntityComponentSystem.h index 152c203..d25f0b3 100755 --- a/entity/EntityComponentSystem.h +++ b/entity/EntityComponentSystem.h @@ -34,8 +34,8 @@ struct EntityComponentSystem { uint64 component_count; // @question Do we want to add a mutex to assets. This way we don't have to lock the entire ams. - coms_pthread_mutex_t* entity_mutex; - coms_pthread_mutex_t* component_mutex; + mutex* entity_mutex; + mutex* component_mutex; }; inline @@ -54,7 +54,7 @@ void ecs_entity_type_create(ChunkMemory* ec, BufferMemory* buf, int32 chunk_size ASSERT_SIMPLE(chunk_size); chunk_init(ec, buf, count, chunk_size, 64); - //coms_pthread_mutex_init(&ec->mutex, NULL); + //mutex_init(&ec->mutex, NULL); } inline @@ -63,7 +63,7 @@ void ecs_component_type_create(ChunkMemory* ec, BufferMemory* buf, int32 chunk_s ASSERT_SIMPLE(chunk_size); chunk_init(ec, buf, count, chunk_size, 64); - //coms_pthread_mutex_init(&ec->mutex, NULL); + //mutex_init(&ec->mutex, NULL); } Entity* ecs_get_entity(EntityComponentSystem* ecs, int32 entity_id) diff --git a/gpuapi/direct3d/DirectXUtils.h b/gpuapi/direct3d/DirectXUtils.h index 4d8eeff..b82f342 100755 --- a/gpuapi/direct3d/DirectXUtils.h +++ b/gpuapi/direct3d/DirectXUtils.h @@ -14,10 +14,10 @@ #include #include #include -#include "../../../GameEngine/log/Log.h" -#include "../../../GameEngine/memory/RingMemory.h" -#include "../../../GameEngine/object/Texture.h" -#include "../../../GameEngine/image/Image.cpp" +#include "../../../cOMS/log/Log.h" +#include "../../../cOMS/memory/RingMemory.h" +#include "../../../cOMS/object/Texture.h" +#include "../../../cOMS/image/Image.cpp" #include "../../compiler/CompilerUtils.h" // #include "../../../EngineDependencies/directx/d3d12.h" // #include "../../../EngineDependencies/directx/d3dx12.h" diff --git a/gpuapi/vulkan/VulkanUtils.h b/gpuapi/vulkan/VulkanUtils.h index 78d5145..21bfd4d 100755 --- a/gpuapi/vulkan/VulkanUtils.h +++ b/gpuapi/vulkan/VulkanUtils.h @@ -482,14 +482,14 @@ void gpuapi_swapchain_create( } else { swapchain_extent->width = OMS_CLAMP( window->width, - swap_chain_support.capabilities.maxImageExtent.width, - swap_chain_support.capabilities.minImageExtent.width + swap_chain_support.capabilities.minImageExtent.width, + swap_chain_support.capabilities.maxImageExtent.width ); swapchain_extent->height = OMS_CLAMP( window->height, - swap_chain_support.capabilities.maxImageExtent.height, - swap_chain_support.capabilities.minImageExtent.height + swap_chain_support.capabilities.minImageExtent.height, + swap_chain_support.capabilities.maxImageExtent.height ); } diff --git a/hash/Sha1.h b/hash/Sha1.h new file mode 100644 index 0000000..31ed3ad --- /dev/null +++ b/hash/Sha1.h @@ -0,0 +1,171 @@ +/** + * Jingga + * + * @copyright Jingga + * @license OMS License 2.0 + * @version 1.0.0 + * @link https://jingga.app + */ +#ifndef COMS_HASH_SHA1_H +#define COMS_HASH_SHA1_H + +#include +#include +#include + +#include "../stdlib/Types.h" +#include "../stdlib/Simd.h" +#include "Sha1Definitions.h" + +#ifdef __SSE4_2__ + #include "Sha1SimdX86.h" +#elif defined(__ARM_FEATURE_SVE) || defined(__ARM_NEON) + #include "Sha1SimdArm.h" +#else + static + void sha1_transform(SHA1_CTX *ctx, const byte data[64], [[mayb_unused]] int32 steps) { + uint32 a, b, c, d, e, temp; + uint32 w[80]; + + for (int i = 0; i < 16; i++) { + w[i] = ((uint32)data[i * 4 + 0] << 24) | + ((uint32)data[i * 4 + 1] << 16) | + ((uint32)data[i * 4 + 2] << 8) | + ((uint32)data[i * 4 + 3]); + } + + for (int i = 16; i < 80; i++) { + temp = w[i-3] ^ w[i-8] ^ w[i-14] ^ w[i-16]; + w[i] = (temp << 1) | (temp >> 31); + } + + a = ctx->state[0]; + b = ctx->state[1]; + c = ctx->state[2]; + d = ctx->state[3]; + e = ctx->state[4]; + + // Main compression loop - unrolled for better performance + // Round 1 (0-19) + for (int i = 0; i < 20; i++) { + temp = SHA1_ROTL32(a, 5) + SHA1_Ch(b, c, d) + e + K1 + w[i]; + e = d; + d = c; + c = SHA1_ROTL32(b, 30); + b = a; + a = temp; + } + + // Round 2 (20-39) + for (int i = 20; i < 40; i++) { + temp = SHA1_ROTL32(a, 5) + SHA1_Parity(b, c, d) + e + K2 + w[i]; + e = d; + d = c; + c = SHA1_ROTL32(b, 30); + b = a; + a = temp; + } + + // Round 3 (40-59) + for (int i = 40; i < 60; i++) { + temp = SHA1_ROTL32(a, 5) + SHA1_Maj(b, c, d) + e + K3 + w[i]; + e = d; + d = c; + c = SHA1_ROTL32(b, 30); + b = a; + a = temp; + } + + // Round 4 (60-79) + for (int i = 60; i < 80; i++) { + temp = SHA1_ROTL32(a, 5) + SHA1_Parity(b, c, d) + e + K4 + w[i]; + e = d; + d = c; + c = SHA1_ROTL32(b, 30); + b = a; + a = temp; + } + + ctx->state[0] += a; + ctx->state[1] += b; + ctx->state[2] += c; + ctx->state[3] += d; + ctx->state[4] += e; + } +#endif + +void sha1_init(SHA1_CTX* ctx) { + ctx->state[0] = 0x67452301; + ctx->state[1] = 0xEFCDAB89; + ctx->state[2] = 0x98BADCFE; + ctx->state[3] = 0x10325476; + ctx->state[4] = 0xC3D2E1F0; + ctx->count = 0; +} + +static +void sha1_update(SHA1_CTX* ctx, const byte* data, size_t len, int32 steps) { + size_t i, index, part_len; + + index = (size_t)((ctx->count >> 3) & 0x3F); + ctx->count += len << 3; + part_len = 64 - index; + + if (len >= part_len) { + memcpy(&ctx->buffer[index], data, part_len); + sha1_transform(ctx, ctx->buffer, steps); + + for (i = part_len; i + 63 < len; i += 64) { + sha1_transform(ctx, &data[i], steps); + } + + index = 0; + } else { + i = 0; + } + + memcpy(&ctx->buffer[index], &data[i], len - i); +} + +static +void sha1_final(SHA1_CTX* ctx, byte digest[20], int32 steps) { + byte bits[8]; + uint32 index, pad_len; + + for (int32 i = 0; i < 8; i++) { + bits[i] = (byte) ((ctx->count >> ((7 - i) * 8)) & 0xFF); + } + + index = (uint32)((ctx->count >> 3) & 0x3F); + pad_len = (index < 56) ? (56 - index) : (120 - index); + sha1_update(ctx, (const byte*) "\x80", 1, steps); + while (pad_len-- > 1) { + sha1_update(ctx, (const byte*) "\0", 1, steps); + } + + sha1_update(ctx, bits, 8, steps); + + for (int32 i = 0; i < 5; i++) { + digest[i*4+0] = (byte) ((ctx->state[i] >> 24) & 0xFF); + digest[i*4+1] = (byte) ((ctx->state[i] >> 16) & 0xFF); + digest[i*4+2] = (byte) ((ctx->state[i] >> 8) & 0xFF); + digest[i*4+3] = (byte) ( ctx->state[i] & 0xFF); + } + + // WARNING: We are not doing this since no sensitive data should use sha1 anyways + // Normally this would be done to reduce side channel attacks + // memset(ctx, 0, sizeof(*ctx)); +} + +inline +void sha1_hash(const byte* data, size_t len, byte digest[20], int32 steps = 16) { + SHA1_CTX ctx; + sha1_init(&ctx); + + steps = intrin_validate_steps(data, steps); + + sha1_update(&ctx, data, len, steps); + sha1_final(&ctx, digest, steps); +} + +#endif \ No newline at end of file diff --git a/hash/Sha1Definitions.h b/hash/Sha1Definitions.h new file mode 100644 index 0000000..99de66d --- /dev/null +++ b/hash/Sha1Definitions.h @@ -0,0 +1,31 @@ +/** + * Jingga + * + * @copyright Jingga + * @license OMS License 2.0 + * @version 1.0.0 + * @link https://jingga.app + */ +#ifndef COMS_HASH_SHA1_DEFINITIONS_H +#define COMS_HASH_SHA1_DEFINITIONS_H + +#include "../stdlib/Types.h" + +#define SHA1_ROTL32(x, n) (((x) << (n)) | ((x) >> (32 - (n)))) + +#define SHA1_Ch(x, y, z) (((x) & (y)) ^ (~(x) & (z))) +#define SHA1_Parity(x, y, z) ((x) ^ (y) ^ (z)) +#define SHA1_Maj(x, y, z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z))) + +#define K1 0x5A827999 +#define K2 0x6ED9EBA1 +#define K3 0x8F1BBCDC +#define K4 0xCA62C1D6 + +struct SHA1_CTX { + alignas(64) byte buffer[64]; + uint64 count; + uint32 state[5]; +}; + +#endif \ No newline at end of file diff --git a/hash/Sha1SimdArm.h b/hash/Sha1SimdArm.h new file mode 100644 index 0000000..d126cf9 --- /dev/null +++ b/hash/Sha1SimdArm.h @@ -0,0 +1,156 @@ +/** + * Jingga + * + * @copyright Jingga + * @license OMS License 2.0 + * @version 1.0.0 + * @link https://jingga.app + */ +#ifndef COMS_HASH_SHA1_SIMD_ARM_H +#define COMS_HASH_SHA1_SIMD_ARM_H + +#include +#include "../stdlib/Types.h" +#include "Sha1Definitions.h" + +#ifdef __ARM_FEATURE_SVE + #include + + static + void sha1_transform(SHA1_CTX* ctx, const byte data[64], int32 steps) { + uint32 a, b, c, d, e, temp; + alignas(64) uint32 w[80]; + + // @question Does it make sense to also do SIMD here? + for (int32 i = 0; i < 16; ++i) { + w[i] = ((uint32) data[i * 4 + 0] << 24) + | ((uint32) data[i * 4 + 1] << 16) + | ((uint32) data[i * 4 + 2] << 8) + | ((uint32) data[i * 4 + 3]); + } + + svuint32_t sv_k3 = svdup_n_u32(3); + svuint32_t sv_k8 = svdup_n_u32(8); + svuint32_t sv_k14 = svdup_n_u32(14); + svuint32_t sv_k16 = svdup_n_u32(16); + + for (int32 i = 16; i < 80; i += steps) { + svbool_t pg = svwhilelt_b32(i, 80); + + // Calculate indices + svuint32_t idx_3 = svsub_n_u32_z(pg, svindex_u32(i, 1), 3); + svuint32_t idx_8 = svsub_n_u32_z(pg, svindex_u32(i, 1), 8); + svuint32_t idx_14 = svsub_n_u32_z(pg, svindex_u32(i, 1), 14); + svuint32_t idx_16 = svsub_n_u32_z(pg, svindex_u32(i, 1), 16); + + // Gather values + svuint32_t v_3 = svld1_gather_index_u32(pg, w, idx_3); + svuint32_t v_8 = svld1_gather_index_u32(pg, w, idx_8); + svuint32_t v_14 = svld1_gather_index_u32(pg, w, idx_14); + svuint32_t v_16 = svld1_gather_index_u32(pg, w, idx_16); + + // Compute w[i] = (w[i-3] ^ w[i-8] ^ w[i-14] ^ w[i-16]) <<< 1 + svuint32_t v = sveor_u32_z(pg, v_3, v_8); + v = sveor_u32_z(pg, v, v_14); + v = sveor_u32_z(pg, v, v_16); + v = svorr_u32_z(pg, svlsl_n_u32_z(pg, v, 1), svlsr_n_u32_z(pg, v, 31)); + + svst1_u32(pg, &w[i], v); + } + + a = ctx->state[0]; + b = ctx->state[1]; + c = ctx->state[2]; + d = ctx->state[3]; + e = ctx->state[4]; + + for (int32 i = 0; i < 80; ++i) { + if (i < 20) { + temp = SHA1_ROTL32(a, 5) + SHA1_Ch(b, c, d) + e + K1 + w[i]; + } else if (i < 40) { + temp = SHA1_ROTL32(a, 5) + SHA1_Parity(b, c, d) + e + K2 + w[i]; + } else if (i < 60) { + temp = SHA1_ROTL32(a, 5) + SHA1_Maj(b, c, d) + e + K3 + w[i]; + } else { + temp = SHA1_ROTL32(a, 5) + SHA1_Parity(b, c, d) + e + K4 + w[i]; + } + + e = d; + d = c; + c = SHA1_ROTL32(b, 30); + b = a; + a = temp; + } + + ctx->state[0] += a; + ctx->state[1] += b; + ctx->state[2] += c; + ctx->state[3] += d; + ctx->state[4] += e; + } +#elif defined (__ARM_NEON) + #include + + static + void sha1_transform(SHA1_CTX* ctx, const byte data[64], int32 steps) { + uint32 a, b, c, d, e, temp; + alignas(64) uint32 w[80]; + + // @question Does it make sense to also do SIMD here? + for (int32 i = 0; i < 16; ++i) { + w[i] = ((uint32) data[i * 4 + 0] << 24) + | ((uint32) data[i * 4 + 1] << 16) + | ((uint32) data[i * 4 + 2] << 8) + | ((uint32) data[i * 4 + 3]); + } + + for (int32 i = 16; i < 80; i += 4) { + uint32x4_t v = veorq_u32( + vld1q_u32(&w[i-3]), + vld1q_u32(&w[i-8]) + ); + v = veorq_u32(v, vld1q_u32(&w[i-14])); + v = veorq_u32(v, vld1q_u32(&w[i-16])); + + // Rotate left by 1 + uint32x4_t v_rot = vorrq_u32( + vshlq_n_u32(v, 1), + vshrq_n_u32(v, 31) + ); + + vst1q_u32(&w[i], v_rot); + } + + a = ctx->state[0]; + b = ctx->state[1]; + c = ctx->state[2]; + d = ctx->state[3]; + e = ctx->state[4]; + + for (int32 i = 0; i < 80; ++i) { + if (i < 20) { + temp = SHA1_ROTL32(a, 5) + SHA1_Ch(b, c, d) + e + K1 + w[i]; + } else if (i < 40) { + temp = SHA1_ROTL32(a, 5) + SHA1_Parity(b, c, d) + e + K2 + w[i]; + } else if (i < 60) { + temp = SHA1_ROTL32(a, 5) + SHA1_Maj(b, c, d) + e + K3 + w[i]; + } else { + temp = SHA1_ROTL32(a, 5) + SHA1_Parity(b, c, d) + e + K4 + w[i]; + } + + e = d; + d = c; + c = SHA1_ROTL32(b, 30); + b = a; + a = temp; + } + + ctx->state[0] += a; + ctx->state[1] += b; + ctx->state[2] += c; + ctx->state[3] += d; + ctx->state[4] += e; + } +#endif + +#endif \ No newline at end of file diff --git a/hash/Sha1SimdX86.h b/hash/Sha1SimdX86.h new file mode 100644 index 0000000..ce2bbc3 --- /dev/null +++ b/hash/Sha1SimdX86.h @@ -0,0 +1,125 @@ +/** + * Jingga + * + * @copyright Jingga + * @license OMS License 2.0 + * @version 1.0.0 + * @link https://jingga.app + */ +#ifndef COMS_HASH_SHA1_SIMD_X86_H +#define COMS_HASH_SHA1_SIMD_X86_H + +#include +#include "../stdlib/Types.h" +#include "Sha1Definitions.h" + +static +void sha1_transform(SHA1_CTX* ctx, const byte data[64], int32 steps) { + uint32 a, b, c, d, e, temp; + alignas(64) uint32 w[80]; + + // @question Does it make sense to also do SIMD here? + for (int32 i = 0; i < 16; ++i) { + w[i] = ((uint32) data[i * 4 + 0] << 24) + | ((uint32) data[i * 4 + 1] << 16) + | ((uint32) data[i * 4 + 2] << 8) + | ((uint32) data[i * 4 + 3]); + } + + #ifdef __AVX512F__ + if (steps >= 16) { + for (int i = 16; i < 80; i += 16) { + __m512i v3 = _mm512_loadu_si512((__m512i*)&w[i-3]); + __m512i v8 = _mm512_load_si512((__m512i*)&w[i-8]); + __m512i v14 = _mm512_loadu_si512((__m512i*)&w[i-14]); + __m512i v16 = _mm512_load_si512((__m512i*)&w[i-16]); + + __m512i v = _mm512_xor_si512(v3, v8); + v = _mm512_xor_si512(v, v14); + v = _mm512_xor_si512(v, v16); + + __m512i v_rot = _mm512_or_si512( + _mm512_slli_epi32(v, 1), + _mm512_srli_epi32(v, 31) + ); + + _mm512_store_si512((__m512i*)&w[i], v_rot); + } + + steps = 1 + } else + #endif + + #ifdef __AVX2__ + if (steps >= 8) { + for (int i = 16; i < 80; i += 8) { + __m256i v3 = _mm256_loadu_si256((__m256i*)&w[i-3]); + __m256i v8 = _mm256_load_si256((__m256i*)&w[i-8]); + __m256i v14 = _mm256_loadu_si256((__m256i*)&w[i-14]); + __m256i v16 = _mm256_load_si256((__m256i*)&w[i-16]); + + __m256i v = _mm256_xor_si256(v3, v8); + v = _mm256_xor_si256(v, v14); + v = _mm256_xor_si256(v, v16); + + __m256i v_rot = _mm256_or_si256( + _mm256_slli_epi32(v, 1), + _mm256_srli_epi32(v, 31) + ); + + _mm256_store_si256((__m256i*)&w[i], v_rot); + } + + steps = 1; + } else + #endif + + #ifdef __SSE4_2__ + if (steps >= 4) { + for (int32 i = 16; i < 80; i += 4) { + __m128i v = _mm_xor_si128( + _mm_loadu_si128((__m128i*) &w[i-3]), + _mm_load_si128((__m128i*) &w[i-8]) + ); + v = _mm_xor_si128(v, _mm_loadu_si128((__m128i*) &w[i-14])); + v = _mm_xor_si128(v, _mm_load_si128((__m128i*) &w[i-16])); + + v = _mm_or_si128(_mm_slli_epi32(v, 1), _mm_srli_epi32(v, 31)); + + _mm_store_si128((__m128i*) &w[i], v); + } + } + #endif + + a = ctx->state[0]; + b = ctx->state[1]; + c = ctx->state[2]; + d = ctx->state[3]; + e = ctx->state[4]; + + for (int32 i = 0; i < 80; ++i) { + if (i < 20) { + temp = SHA1_ROTL32(a, 5) + SHA1_Ch(b, c, d) + e + K1 + w[i]; + } else if (i < 40) { + temp = SHA1_ROTL32(a, 5) + SHA1_Parity(b, c, d) + e + K2 + w[i]; + } else if (i < 60) { + temp = SHA1_ROTL32(a, 5) + SHA1_Maj(b, c, d) + e + K3 + w[i]; + } else { + temp = SHA1_ROTL32(a, 5) + SHA1_Parity(b, c, d) + e + K4 + w[i]; + } + + e = d; + d = c; + c = SHA1_ROTL32(b, 30); + b = a; + a = temp; + } + + ctx->state[0] += a; + ctx->state[1] += b; + ctx->state[2] += c; + ctx->state[3] += d; + ctx->state[4] += e; +} + +#endif \ No newline at end of file diff --git a/html/template/HtmlTemplateCache.h b/html/template/HtmlTemplateCache.h index 6681e22..f65898e 100755 --- a/html/template/HtmlTemplateCache.h +++ b/html/template/HtmlTemplateCache.h @@ -27,6 +27,7 @@ struct HtmlTemplateCache { byte* cache; // Total cache size + // It has to contain the templates and the AST of the template uint32 cache_size; // Current position @@ -55,20 +56,32 @@ void html_template_find(const char* path, va_list args) { ++(*path_count); } -void html_template_cache_init(HtmlTemplateCache* cache, const char* basedir, BufferMemory* buf, RingMemory* ring) { +void html_template_cache_alloc(HtmlTemplateCache* cache, const char* basedir, RingMemory* ring, int32 alignment = 64) { + // @todo limit the maximum cache size in the dynamic resize + uint32 max_path_count = 1000; uint32 path_count = 0; char* paths = (char *) ring_get_memory(ring, max_path_count * 256 * sizeof(char), 8, true); uint32 total_file_size = 0; iterate_directory(basedir, ".tpl.html", html_template_find, &paths, &path_count, &max_path_count, &total_file_size, ring); - cache->cache_size = (uint64) (total_file_size * 1.2); - cache->cache = (byte *) buffer_get_memory(buf, cache->cache_size, 64, true); + cache->cache_size = OMS_MAX((uint64) (total_file_size * 1.2f), (uint64) (total_file_size + 1 * KILOBYTE)); + uint32 buffer_size = ROUND_TO_NEAREST(cache->cache_size + perfect_hashmap_size(path_count, sizeof(PerfectHashEntryInt32)), 4096); + byte* buf = (byte *) platform_alloc_aligned(buffer_size, alignment); perfect_hashmap_create(&cache->hm, path_count, sizeof(PerfectHashEntryInt32), buf); + + cache->cache = (byte *) ROUND_TO_NEAREST((uintptr_t) (buf + perfect_hashmap_size(path_count, sizeof(PerfectHashEntryInt32))), alignment); perfect_hashmap_prepare(&cache->hm, (const char*) paths, path_count, 256, 10000, ring); - LOG_1("Created HtmlTemplateCache with %n B for %n templates with %n B in uncompressed file size", {{LOG_DATA_INT64, &cache->cache_size}, {LOG_DATA_INT32, &path_count}, {LOG_DATA_INT32, &total_file_size}}); + LOG_1( + "Created HtmlTemplateCache with %n B for %n templates with %n B in uncompressed file size", + { + {LOG_DATA_INT64, &cache->cache_size}, + {LOG_DATA_INT32, &path_count}, + {LOG_DATA_INT32, &total_file_size} + } + ); } bool html_template_in_control_structure(const char* str, const char** controls, int32 control_length) { @@ -81,8 +94,8 @@ bool html_template_in_control_structure(const char* str, const char** controls, return false; } -void html_template_cache_load(HtmlTemplateCache* cache, const char* key, const char* str) { - char* minified = (char *) ROUND_TO_NEAREST((uintptr_t) cache->cache + (uintptr_t) cache->cache_pos, 64); +void html_template_cache_load(HtmlTemplateCache* cache, const char* key, const char* str, int32 alignment = 64) { + char* minified = (char *) ROUND_TO_NEAREST((uintptr_t) cache->cache + (uintptr_t) cache->cache_pos, alignment); char* minified_start = minified; static const char* CONTROL_STRUCTURE_START[] = { @@ -99,14 +112,23 @@ void html_template_cache_load(HtmlTemplateCache* cache, const char* key, const c // All-in-all let's consider this a pre-pass that we might want to move to the lexer in the future but I don't think so int32 in_control_structure = 0; while (*str) { + // @performance What about optional tags such as ,
, , , , ... + // @performance Remove comments /* */ and // if (!in_control_structure && str_is_eol(*str)) { str_skip_eol(&str); + //continue; // @question Why does this fail? } else if (!in_control_structure && str_is_empty(*str)) { - // @performance This keeps whitespaces, which we don't want and could optimize away str_skip_empty(&str); + // @performance This keeps whitespaces, which we don't want and could optimize away + // We would have to check the previous char and the next char to be != > and != < + // the problem however is that we would have to specially handle the first char and last char in str --str; } + if (!(*str)) { + break; + } + if (!in_control_structure && html_template_in_control_structure(str, CONTROL_STRUCTURE_START, ARRAY_COUNT(CONTROL_STRUCTURE_START)) ) { @@ -144,7 +166,8 @@ void html_template_cache_load(HtmlTemplateCache* cache, const char* key, const c cache->cache_pos += ((uintptr_t) memory - (uintptr_t) memory_start); - ASSERT_SIMPLE(((uintptr_t) ast) % 64 == 0); + ASSERT_SIMPLE(ast); + ASSERT_SIMPLE(((uintptr_t) ast) % alignment == 0); perfect_hashmap_insert(&cache->hm, key, (int32) ((uintptr_t) ast - (uintptr_t) cache->cache)); } diff --git a/http/HttpDispatcher.h b/http/HttpDispatcher.h new file mode 100644 index 0000000..f24486c --- /dev/null +++ b/http/HttpDispatcher.h @@ -0,0 +1,97 @@ +/** + * Jingga + * + * @copyright Jingga + * @license OMS License 2.0 + * @version 1.0.0 + * @link https://jingga.app + */ +#ifndef COMS_HTML_DISPATCHER_H +#define COMS_HTML_DISPATCHER_H + +#include "../stdlib/Types.h" +#include "../memory/BufferMemory.h" +#include "HttpRequest.h" +#include "HttpResponse.h" +#include "../module/WebModule.h" + +typedef void* (*ControllerFunction)(WebModule* module, HttpRequest* request, HttpResponse* response, void* data); + +struct DispatchData { + WebModule* module; + HttpRequest* request; + HttpResponse* response; + void* data; +}; + +struct Dispatcher { + void* app; + + // This is an array of arrays to make looking up functions faster + // The first index defines the module id, the second level defines the function + // functions[module_id][function_id] + // The 0 index is used for custom functions + // The other indeces are filled by the WebModuleManager + ControllerFunction** functions; + int16 category_count; + int16 base_func_count; +}; + +void dispatcher_alloc(Dispatcher* dispatcher, void* app, int32 category_count, int32 func_count, BufferMemory* buf, int32 alignment = 64) { + dispatcher->app = app; + dispatcher->base_func_count = func_count; + dispatcher->functions = (ControllerFunction **) buffer_get_memory(buf, sizeof(ControllerFunction *) * category_count, alignment, true); + dispatcher->functions[0] = (ControllerFunction *) buffer_get_memory(buf, sizeof(ControllerFunction) * func_count, alignment, true); +} + +void dispatcher_set_func(Dispatcher* dispatcher, uint32 id, ControllerFunction func) { + /** + * The 16 high bits of function_id define the module id + * and the lower 16 bits define the relative function id in that module. + */ + uint16 category_id = id >> 16; + if (category_id >= dispatcher->category_count) { + return; + } + + uint16 function_id = id & 0x0000FFFF; + if (category_id == 0 && function_id >= dispatcher->base_func_count) { + return; + } + + dispatcher->functions[category_id][function_id] = func; +} + +void dispatcher_set_category(Dispatcher* dispatcher, uint16 category, ControllerFunction* functions) { + if (category >= dispatcher->category_count) { + return; + } + + dispatcher->functions[category] = functions; +} + +ControllerFunction dispatcher_get_function(Dispatcher* dispatcher, uint32 id) { + uint16 category_id = id >> 16; + if (category_id >= dispatcher->category_count) { + return NULL; + } + + uint16 function_id = id & 0x0000FFFF; + if (category_id == 0 && function_id >= dispatcher->base_func_count) { + return NULL; + } + + return dispatcher->functions[category_id][function_id]; +} + +void* dispatcher_dispatch(Dispatcher* dispatcher, uint32 id, DispatchData* dispatch_data) { + ControllerFunction func = dispatcher_get_function(dispatcher, id); + + if (func) { + return func(dispatch_data->module, dispatch_data->request, dispatch_data->response, dispatch_data->data); + } + + return NULL; +} + +#endif \ No newline at end of file diff --git a/http/HttpHeader.h b/http/HttpHeader.h new file mode 100644 index 0000000..11d7de1 --- /dev/null +++ b/http/HttpHeader.h @@ -0,0 +1,22 @@ +/** + * Jingga + * + * @copyright Jingga + * @license OMS License 2.0 + * @version 1.0.0 + * @link https://jingga.app + */ +#ifndef COMS_JINGGA_HTTP_HEADER_H +#define COMS_JINGGA_HTTP_HEADER_H + +#include "../stdlib/Types.h" +#include "HttpHeaderKey.h" + +struct HttpHeader { + HttpHeaderKey key; + byte value_length; + + uint16 value_offset; +}; + +#endif \ No newline at end of file diff --git a/http/HttpHeaderKey.h b/http/HttpHeaderKey.h new file mode 100644 index 0000000..efce599 --- /dev/null +++ b/http/HttpHeaderKey.h @@ -0,0 +1,153 @@ +/** + * Jingga + * + * @copyright Jingga + * @license OMS License 2.0 + * @version 1.0.0 + * @link https://jingga.app + */ +#ifndef COMS_JINGGA_HTTP_HEADER_KEY_H +#define COMS_JINGGA_HTTP_HEADER_KEY_H + +#include "../stdlib/Types.h" + +enum HttpHeaderKey : byte { + // Standard HTTP/1.1 & HTTP/2 Headers (RFC 9110, 9113, etc.) + HTTP_HEADER_KEY_HOST, + HTTP_HEADER_KEY_USER_AGENT, + HTTP_HEADER_KEY_ACCEPT, + HTTP_HEADER_KEY_ACCEPT_CHARSET, + HTTP_HEADER_KEY_ACCEPT_ENCODING, + HTTP_HEADER_KEY_ACCEPT_LANGUAGE, + HTTP_HEADER_KEY_ACCEPT_DATETIME, + HTTP_HEADER_KEY_ACCEPT_PATCH, + HTTP_HEADER_KEY_ACCEPT_RANGES, + HTTP_HEADER_KEY_AGE, + HTTP_HEADER_KEY_ALLOW, + HTTP_HEADER_KEY_AUTHORIZATION, + HTTP_HEADER_KEY_CACHE_CONTROL, + HTTP_HEADER_KEY_CONNECTION, + HTTP_HEADER_KEY_CONTENT_DISPOSITION, + HTTP_HEADER_KEY_CONTENT_ENCODING, + HTTP_HEADER_KEY_CONTENT_LANGUAGE, + HTTP_HEADER_KEY_CONTENT_LENGTH, + HTTP_HEADER_KEY_CONTENT_LOCATION, + HTTP_HEADER_KEY_CONTENT_MD5, + HTTP_HEADER_KEY_CONTENT_RANGE, + HTTP_HEADER_KEY_CONTENT_TYPE, + HTTP_HEADER_KEY_COOKIE, + HTTP_HEADER_KEY_DATE, + HTTP_HEADER_KEY_ETAG, + HTTP_HEADER_KEY_EXPECT, + HTTP_HEADER_KEY_EXPIRES, + HTTP_HEADER_KEY_FROM, + HTTP_HEADER_KEY_IF_MATCH, + HTTP_HEADER_KEY_IF_MODIFIED_SINCE, + HTTP_HEADER_KEY_IF_NONE_MATCH, + HTTP_HEADER_KEY_IF_RANGE, + HTTP_HEADER_KEY_IF_UNMODIFIED_SINCE, + HTTP_HEADER_KEY_LAST_MODIFIED, + HTTP_HEADER_KEY_LINK, + HTTP_HEADER_KEY_LOCATION, + HTTP_HEADER_KEY_MAX_FORWARDS, + HTTP_HEADER_KEY_ORIGIN, + HTTP_HEADER_KEY_PRAGMA, + HTTP_HEADER_KEY_PROXY_AUTHENTICATE, + HTTP_HEADER_KEY_PROXY_AUTHORIZATION, + HTTP_HEADER_KEY_RANGE, + HTTP_HEADER_KEY_REFERER, + HTTP_HEADER_KEY_RETRY_AFTER, + HTTP_HEADER_KEY_SERVER, + HTTP_HEADER_KEY_SET_COOKIE, + HTTP_HEADER_KEY_STRICT_TRANSPORT_SECURITY, + HTTP_HEADER_KEY_TE, + HTTP_HEADER_KEY_TRAILER, + HTTP_HEADER_KEY_TRANSFER_ENCODING, + HTTP_HEADER_KEY_UPGRADE, + HTTP_HEADER_KEY_VARY, + HTTP_HEADER_KEY_VIA, + HTTP_HEADER_KEY_WARNING, + HTTP_HEADER_KEY_WWW_AUTHENTICATE, + + // Common Non-Standard (X-*) and Extension Headers + HTTP_HEADER_KEY_X_FORWARDED_FOR, + HTTP_HEADER_KEY_X_FORWARDED_HOST, + HTTP_HEADER_KEY_X_FORWARDED_PROTO, + HTTP_HEADER_KEY_X_REQUESTED_WITH, + HTTP_HEADER_KEY_X_CSRF_TOKEN, + HTTP_HEADER_KEY_X_XSS_PROTECTION, + HTTP_HEADER_KEY_X_CONTENT_TYPE_OPTIONS, + HTTP_HEADER_KEY_X_FRAME_OPTIONS, + HTTP_HEADER_KEY_X_POWERED_BY, + HTTP_HEADER_KEY_X_UPLOAD_ID, + HTTP_HEADER_KEY_X_RATE_LIMIT_LIMIT, + HTTP_HEADER_KEY_X_RATE_LIMIT_REMAINING, + HTTP_HEADER_KEY_X_RATE_LIMIT_RESET, + HTTP_HEADER_KEY_X_UA_COMPATIBLE, + HTTP_HEADER_KEY_X_DNS_PREFETCH_CONTROL, + HTTP_HEADER_KEY_X_DOWNLOAD_OPTIONS, + HTTP_HEADER_KEY_X_PERMITTED_CROSS_DOMAIN_POLICIES, + + // CORS Headers + HTTP_HEADER_KEY_ACCESS_CONTROL_ALLOW_ORIGIN, + HTTP_HEADER_KEY_ACCESS_CONTROL_ALLOW_CREDENTIALS, + HTTP_HEADER_KEY_ACCESS_CONTROL_ALLOW_HEADERS, + HTTP_HEADER_KEY_ACCESS_CONTROL_ALLOW_METHODS, + HTTP_HEADER_KEY_ACCESS_CONTROL_EXPOSE_HEADERS, + HTTP_HEADER_KEY_ACCESS_CONTROL_MAX_AGE, + HTTP_HEADER_KEY_ACCESS_CONTROL_REQUEST_HEADERS, + HTTP_HEADER_KEY_ACCESS_CONTROL_REQUEST_METHOD, + + // Security Headers + HTTP_HEADER_KEY_CONTENT_SECURITY_POLICY, + HTTP_HEADER_KEY_PERMISSIONS_POLICY, + HTTP_HEADER_KEY_REFERRER_POLICY, + HTTP_HEADER_KEY_EXPECT_CT, + HTTP_HEADER_KEY_FEATURE_POLICY, + HTTP_HEADER_KEY_CROSS_ORIGIN_EMBEDDER_POLICY, + HTTP_HEADER_KEY_CROSS_ORIGIN_OPENER_POLICY, + HTTP_HEADER_KEY_CROSS_ORIGIN_RESOURCE_POLICY, + + // WebSocket Headers + HTTP_HEADER_KEY_SEC_WEBSOCKET_KEY, + HTTP_HEADER_KEY_SEC_WEBSOCKET_ACCEPT, + HTTP_HEADER_KEY_SEC_WEBSOCKET_VERSION, + HTTP_HEADER_KEY_SEC_WEBSOCKET_PROTOCOL, + HTTP_HEADER_KEY_SEC_WEBSOCKET_EXTENSIONS, + + // HTTP/3 and QUIC Headers + HTTP_HEADER_KEY_ALT_SVC, + HTTP_HEADER_KEY_EARLY_DATA, + + // Cloud & CDN Headers + HTTP_HEADER_KEY_CF_CONNECTING_IP, + HTTP_HEADER_KEY_CF_IPCOUNTRY, + HTTP_HEADER_KEY_CF_RAY, + HTTP_HEADER_KEY_TRUE_CLIENT_IP, + HTTP_HEADER_KEY_X_AMZ_CF_ID, + HTTP_HEADER_KEY_X_AMZN_TRACE_ID, + + // Custom/Experimental Headers + HTTP_HEADER_KEY_DNT, // Do Not Track + HTTP_HEADER_KEY_SAVE_DATA, + HTTP_HEADER_KEY_DOWNLINK, + HTTP_HEADER_KEY_ECT, // Effective Connection Type + HTTP_HEADER_KEY_RTT, + HTTP_HEADER_KEY_PURPOSE, + HTTP_HEADER_KEY_SEC_FETCH_SITE, + HTTP_HEADER_KEY_SEC_FETCH_MODE, + HTTP_HEADER_KEY_SEC_FETCH_USER, + HTTP_HEADER_KEY_SEC_FETCH_DEST, + HTTP_HEADER_KEY_SERVICE_WORKER_NAVIGATION_PRELOAD, + HTTP_HEADER_KEY_LAST_EVENT_ID, + HTTP_HEADER_KEY_REPORT_TO, + HTTP_HEADER_KEY_PRIORITY, + HTTP_HEADER_KEY_SIGNATURE, + HTTP_HEADER_KEY_SIGNATURE_KEY, + HTTP_HEADER_KEY_FORWARDED, + HTTP_HEADER_KEY_ORIGINAL_METHOD, + HTTP_HEADER_KEY_ORIGINAL_URL, + HTTP_HEADER_KEY_ORIGINAL_HOST, +}; + +#endif \ No newline at end of file diff --git a/http/HttpMethod.h b/http/HttpMethod.h new file mode 100644 index 0000000..253510b --- /dev/null +++ b/http/HttpMethod.h @@ -0,0 +1,25 @@ +/** + * Jingga + * + * @copyright Jingga + * @license OMS License 2.0 + * @version 1.0.0 + * @link https://jingga.app + */ +#ifndef COMS_JINGGA_HTTP_METHOD_H +#define COMS_JINGGA_HTTP_METHOD_H + +#include "../stdlib/Types.h" + +enum HttpMethod : byte { + HTTP_METHOD_UNKNOWN = 0, + HTTP_METHOD_GET = 1 << 0, + HTTP_METHOD_POST = 1 << 1, + HTTP_METHOD_PUT = 1 << 2, + HTTP_METHOD_DELETE = 1 << 3, + HTTP_METHOD_ANY = (1 << 4) - 1, +}; + +typedef HttpMethod HttpVerb; + +#endif \ No newline at end of file diff --git a/http/HttpProtocol.h b/http/HttpProtocol.h new file mode 100644 index 0000000..47fc962 --- /dev/null +++ b/http/HttpProtocol.h @@ -0,0 +1,21 @@ +/** + * Jingga + * + * @copyright Jingga + * @license OMS License 2.0 + * @version 1.0.0 + * @link https://jingga.app + */ +#ifndef COMS_JINGGA_HTTP_PROTOCOL_H +#define COMS_JINGGA_HTTP_PROTOCOL_H + +#include "../stdlib/Types.h" + +enum HttpProtocol : byte { + HTTP_PROTOCOL_UNKNOWN, + HTTP_PROTOCOL_1_1, + HTTP_PROTOCOL_2, + HTTP_PROTOCOL_3, +}; + +#endif \ No newline at end of file diff --git a/http/HttpRequest.h b/http/HttpRequest.h new file mode 100644 index 0000000..0f377eb --- /dev/null +++ b/http/HttpRequest.h @@ -0,0 +1,260 @@ +/** + * Jingga + * + * @copyright Jingga + * @license OMS License 2.0 + * @version 1.0.0 + * @link https://jingga.app + */ +#ifndef COMS_JINGGA_HTTP_REQUEST_H +#define COMS_JINGGA_HTTP_REQUEST_H + +#include +#include +#include + +#include "../stdlib/Types.h" +#include "../utils/StringUtils.h" +#include "../log/Log.h" + +#include "HttpMethod.h" +#include "HttpProtocol.h" +#include "HttpHeaderKey.h" +#include "HttpUri.h" +#include "HttpHeader.h" + +/** + * Data layout + * HttpRequest + * ... + * Request body (excl. file data if available) + * HttpHeader[] + * + */ + +// The order of the members and their types is carefully chosen to make them fit into 1 cache line +// The request content must come directly after the request +struct HttpRequest { + HttpUri uri; + HttpMethod method; + HttpProtocol protocol; + + uint16 header_count; + uint16 body_offset; + uint32 body_length; + uint32 request_length; + uint32 headers_offset; +}; + +inline +bool http_request_header_is_complete(const char* request, size_t length) { + return str_contains(request, "\r\n\r\n", length); +} + +// Binary search for the key +inline +const HttpHeader* http_request_header_get(const HttpRequest* request, HttpHeaderKey key) { + const HttpHeader* base = (HttpHeader *) ((uintptr_t) request + sizeof(HttpRequest) + request->request_length); + int32 header_count = OMS_MIN(request->header_count, (uint16) key); + + while (header_count > 1) { + int32 half = header_count / 2; + header_count -= half; + + base += (base[half - 1].key < key) * half; + } + + return base->key == key ? base : NULL; +} + +inline +const char* http_request_header_value_get(const HttpRequest* request, const HttpHeader* header) { + const char* request_data = (const char *) ((uintptr_t) request + sizeof(HttpRequest)); + + return request_data + header->value_offset; +} + +bool http_request_has_file_upload(const HttpRequest* request) { + const HttpHeader* header = http_request_header_get(request, HTTP_HEADER_KEY_CONTENT_TYPE); + if (!header) { + return false; + } + + const char* header_value = http_request_header_value_get(request, header); + if ((str_compare_caseless(header_value, "application/", OMS_MIN(header->value_length, sizeof("application/") - 1)) == 0 + && str_compare_caseless(header_value, "application/json", OMS_MIN(header->value_length, sizeof("application/json") - 1)) != 0) + || str_compare_caseless(header_value, "image/", OMS_MIN(header->value_length, sizeof("image/") - 1)) == 0 + || str_compare_caseless(header_value, "audio/", OMS_MIN(header->value_length, sizeof("audio/") - 1)) == 0 + || str_compare_caseless(header_value, "video/", OMS_MIN(header->value_length, sizeof("video/") - 1)) == 0 + || str_compare_caseless(header_value, "text/csv", OMS_MIN(header->value_length, sizeof("text/csv") - 1)) == 0 + ) { + return true; + } + + if (str_compare_caseless(header_value, "multipart/form-data", OMS_MIN(header->value_length, sizeof("multipart/form-data") - 1)) != 0) { + return false; + } + + // @todo not every form-data is a file upload but it is at least possible + + return false; +} + +void http_request_header_parse(HttpRequest* http_request, const char* request) { + const char* request_start = request; + + ////////////////////////////////////////////////// + // Parsing HTTP request line + ////////////////////////////////////////////////// + + str_skip_empty(&request); + + // Parse request type + if (str_compare(request, "GET") == 0) { + http_request->method = HTTP_METHOD_GET; + } else if (str_compare(request, "POST") == 0) { + http_request->method = HTTP_METHOD_POST; + } else if (str_compare(request, "PUT") == 0) { + http_request->method = HTTP_METHOD_PUT; + } else if (str_compare(request, "DELETE") == 0) { + http_request->method = HTTP_METHOD_DELETE; + } else { + // Additional request types are possible BUT we don't support them in our internal framework + // If this would be a public framework we would've to support additional request types + http_request->method = HTTP_METHOD_UNKNOWN; + } + + // Parse reuqest path + str_move_past(&request, ' '); + http_request->uri.path_offset = request - request_start; + + str_skip_until_list(&request, ":?# "); + http_request->uri.path_length = (request - request_start) - http_request->uri.path_offset; + + // Parse port + if (*request == ':') { + http_request->uri.port = (uint16) str_to_int(request, &request); + } + + // Parse query parameters + if (*request == '?') { + http_request->uri.parameter_offset = request - request_start; + str_skip_until_list(&request, "# "); + http_request->uri.path_length = (request - request_start) - http_request->uri.parameter_offset; + } + + // Parse fragment + if (*request == '#') { + http_request->uri.fragment_offset = request - request_start; + str_move_to(&request, ' '); + http_request->uri.fragment_length = (request - request_start) - http_request->uri.fragment_offset; + } + + // Parse protocol + str_move_past(&request, ' '); + if (str_compare(request, "HTTP/", sizeof("HTTP/") - 1) != 0) { + LOG_1("Invalid HTTP header, no protocol defined"); + ASSERT_SIMPLE(false); + + return; + } + + request += sizeof("HTTP/") - 1; + if (*request == '1') { + http_request->protocol = HTTP_PROTOCOL_1_1; + } else if (*request == '2') { + http_request->protocol = HTTP_PROTOCOL_2; + } else if (*request == '3') { + http_request->protocol = HTTP_PROTOCOL_3; + } else { + http_request->protocol = HTTP_PROTOCOL_UNKNOWN; + } + + ////////////////////////////////////////////////// + // Parsing HTTP headers + ////////////////////////////////////////////////// + // The HTTP headers end with \r\n\r\n (= one empty line/element) + while (request[0] != '\r' && request[1] != '\n' && request[2] != '\r' && request[3] != '\n') { + str_move_past(&request, '\n'); + + // @todo parse headers + } + + ////////////////////////////////////////////////// + // Parsing HTTP body + ////////////////////////////////////////////////// + request += 4; + http_request->body_offset = request - request_start; + http_request->body_length = http_request->request_length - http_request->body_offset; +} + +void parse_multipart_data(const char *body, const char *boundary) { + char *buffer = strdup(body); + if (!buffer) { + perror("Failed to allocate memory for buffer"); + exit(EXIT_FAILURE); + } + + char *part = strtok(buffer, boundary); + while (part) { + // Skip leading and trailing newlines + while (*part == '\r' || *part == '\n') part++; + + // Parse part headers and content + char *headers_end = strstr(part, "\r\n\r\n"); + if (headers_end) { + *headers_end = '\0'; // Terminate headers + char *content = headers_end + 4; // Skip "\r\n\r\n" + + printf("Part Headers:\n%s\n", part); + printf("Part Content:\n%s\n", content); + } + + part = strtok(NULL, boundary); + } + + free(buffer); +} + +/* +int main() { + const char *http_request = + "POST /submit HTTP/1.1\n" + "Host: www.example.com\n" + "Content-Type: multipart/form-data; boundary=----WebKitFormBoundary7MA4YWxkTrZu0gW\n" + "Content-Length: 348\n" + "\n" + "------WebKitFormBoundary7MA4YWxkTrZu0gW\n" + "Content-Disposition: form-data; name=\"username\"\n" + "\n" + "john_doe\n" + "------WebKitFormBoundary7MA4YWxkTrZu0gW\n" + "Content-Disposition: form-data; name=\"file\"; filename=\"example.txt\"\n" + "Content-Type: text/plain\n" + "\n" + "This is a sample file.\n" + "------WebKitFormBoundary7MA4YWxkTrZu0gW--\n"; + + HttpRequest request; + http_request_parse(http_request, &request); + print_http_request(&request); + + // Parse multipart data if Content-Type is multipart/form-data + for (int i = 0; i < request.header_count; i++) { + if (strstr(request.headers[i], "Content-Type: multipart/form-data")) { + const char *boundary_start = strstr(request.headers[i], "boundary="); + if (boundary_start) { + char boundary[128]; + sscanf(boundary_start, "boundary=%127s", boundary); + printf("\nParsing multipart data with boundary: %s\n", boundary); + parse_multipart_data(request.body, boundary); + } + break; + } + } + + return 0; +} + */ + +#endif \ No newline at end of file diff --git a/http/HttpResponse.h b/http/HttpResponse.h new file mode 100644 index 0000000..4030dab --- /dev/null +++ b/http/HttpResponse.h @@ -0,0 +1,27 @@ +/** + * Jingga + * + * @copyright Jingga + * @license OMS License 2.0 + * @version 1.0.0 + * @link https://jingga.app + */ +#ifndef COMS_JINGGA_HTTP_RESPONSE_H +#define COMS_JINGGA_HTTP_RESPONSE_H + +#include "../stdlib/Types.h" + +#include "HttpMethod.h" +#include "HttpProtocol.h" +#include "HttpStatusCode.h" + +struct HttpResponse { + HttpMethod method; + HttpProtocol protocol; + HttpStatusCode status_code; +}; + +// @performance Create a cached header line for 200 responses +// @performance Create a cached header for most common response (incl. CSP, referrer, x-*, ...) + +#endif \ No newline at end of file diff --git a/http/HttpRoute.h b/http/HttpRoute.h new file mode 100644 index 0000000..9ff58cf --- /dev/null +++ b/http/HttpRoute.h @@ -0,0 +1,51 @@ +/** + * Jingga + * + * @copyright Jingga + * @license OMS License 2.0 + * @version 1.0.0 + * @link https://jingga.app + */ +#ifndef COMS_JINGGA_HTTP_ROUTE_H +#define COMS_JINGGA_HTTP_ROUTE_H + +#include "../stdlib/Types.h" +#include "HttpMethod.h" +#include "../account/PermissionType.h" + +#define MAX_HTTP_ROUTE_LENGTH 127 + +enum HttpRouteFlags { + HTTP_ROUTE_FLAG_ACTUVE = 1 << 0, + HTTP_ROUTE_FLAG_CSRF_REQUIRED = 1 << 1, +}; + +struct HttpRoutePermission { + uint16 unit; + uint16 app; + uint16 module; + PermissionType type; + uint16 category; +}; + +// A route can have different end points depending on the method +struct HttpRouteDetails { + uint32 func_id; + + // bit field for HttpMethod + byte method; + + // bit field for HttpRouteFlags + byte flags; + + HttpRoutePermission permission; +}; + +struct HttpRoute { + char route[MAX_HTTP_ROUTE_LENGTH]; + + byte details_count; + HttpRouteDetails* details; +}; + +#endif \ No newline at end of file diff --git a/http/HttpRouter.h b/http/HttpRouter.h new file mode 100644 index 0000000..a7c1391 --- /dev/null +++ b/http/HttpRouter.h @@ -0,0 +1,208 @@ +/** + * Jingga + * + * @copyright Jingga + * @license OMS License 2.0 + * @version 1.0.0 + * @link https://jingga.app + */ +#ifndef COMS_JINGGA_HTTP_ROUTER_H +#define COMS_JINGGA_HTTP_ROUTER_H + +#include "../stdlib/Types.h" +#include "../memory/BufferMemory.h" +#include "../utils/RegexSimplified.h" +#include "HttpRoute.h" +#include "HttpMethod.h" + +#define HTTP_ROUTE_SEGMENT_LENGTH 32 + +struct HttpRouteNode { + char segment[HTTP_ROUTE_SEGMENT_LENGTH]; + + // Route information + // This is empty relatively often since only the last node(s) in a path usually have an endpoint defined + // However, replacing this with another uint16 route_id for example only saves us 4 bytes, + // BUT costs us another indirection once we arrive at a matched route/endpoint + // The current implemenation allows us to directly jump into the detail definitions and iterate them + uint32 detail_offset; + byte detail_count; + + // Do this node require regex matching? + bool is_regex; + + // How many child nodes does this node have + uint16 children_count; + + // Defines the offset into the nodes array where the children can be found + uint32 children_offset; +}; + +struct HttpRouter { + HttpRouteNode* nodes; + HttpRouteDetails* route_details; + + uint16 node_count; + uint16 node_capacity; + + uint32 route_detail_count; + uint32 route_detail_capacity; +}; + +void http_router_init(HttpRouter* router, uint32 route_count, BufferMemory* buf, int32 alignment = 64) { + // We expect 3 path components per route + // If more are required, we will increase the memory later + router->nodes = (HttpRouteNode *) buffer_get_memory(buf, route_count * 3 * sizeof(HttpRouteNode), alignment, true); + router->node_capacity = route_count * 3; + router->node_count = 0; + + // We expect at least one route detail per route + // On average it is probably more like 1.x but if we need more we will increase as required later + router->route_details = (HttpRouteDetails *) buffer_get_memory(buf, route_count * sizeof(HttpRouteDetails), alignment, true); + router->route_detail_capacity = route_count; + router->route_detail_count = 0; +} + +/** + * Optimizes the memory layout of the router by making all nodes with the same level consecutive in memory + * This improves the caching since an element doesn't match we need to compare our current search with the other elements of the same level + * If these elements are consecutive, there is a bigger chance that they are already loaded into L1 or L2 cache + */ +void http_router_optimize() { + +} + +// Add a new route +void http_router_add( + HttpRouter* router, + const HttpRoute* route +) { + +} + +void http_router_find_iter( + const HttpRouter* router, + const char* uri_segments, + int32 uri_segment_index, + int32 uri_segment_count, + HttpRouteDetails** matches, + int32* match_count, + HttpRouteNode* node = NULL +) { + for (uint32 i = 0; i < node->children_count; ++i) { + HttpRouteNode* test_node = &router->nodes[node->children_offset + i]; + if ((!test_node->is_regex && str_compare(test_node->segment, uri_segments) == 0) + || (test_node->is_regex && regex_simplified_validate(test_node->segment, uri_segments)) + ) { + if (uri_segment_index < uri_segment_count && test_node->children_count) { + // We have more in our uri path AND more child nodes + // -> We need to continue pattern matching + http_router_find_iter( + router, + uri_segments + str_length(uri_segments) + 1, + uri_segment_index + 1, + uri_segment_count, + matches, + match_count, + test_node + ); + } else if (uri_segment_index == uri_segment_count && !test_node->children_count) { + // We reached the end of the uri path and the end of the node chain + // -> We found a possible match + matches[(*match_count)++] = &router->route_details[test_node->detail_offset + i]; + } else if (uri_segment_index >= uri_segment_count && test_node->children_count) { + // We reached the end of the uri path BUT still have child nodes + // -> This can only be a match if any of the child chains from here on are optional/wildcard matches + http_router_find_iter( + router, + "", + uri_segment_index + 1, + uri_segment_count, + matches, + match_count, + test_node + ); + } else if (uri_segment_index < uri_segment_count && !test_node->children_count) { + // We have more in our uri path BUT no more child nodes + // -> This can only be a match if the test_node is a regex node that also matches all other path segments + if (test_node->is_regex) { + bool is_valid = true; + for (int32 j = uri_segment_index + 1; j < uri_segment_count; ++j) { + if (!regex_simplified_validate(test_node->segment, uri_segments)) { + is_valid = false; + break; + } + } + + if (is_valid) { + matches[(*match_count)++] = &router->route_details[test_node->detail_offset + i]; + } + } + } + } + } +} + +void http_router_route( + const HttpRouter* router, + const char* uri, + bool has_csrf, + HttpMethod method, + HttpRouteDetails** matches, + int32* match_count +) { + char uri_segments[MAX_HTTP_ROUTE_LENGTH]; + char* segments_temp = uri_segments; + + int32 uri_segment_count = 0; + int32 i = 0; + + while (*uri != '\0' && i < MAX_HTTP_ROUTE_LENGTH) { + if (*uri == '/') { + *segments_temp++ = '\0'; + ++uri; + ++uri_segment_count; + } else { + *segments_temp++ = *uri++; + } + + ++i; + } + + *segments_temp = '\0'; + + // Find potential matches based on the route + int32 temp_match_count = 0; + http_router_find_iter( + router, + uri_segments, + 0, + uri_segment_count, + matches, + &temp_match_count, + router->nodes + ); + + // Remove matches that don't fit the additional criteria + // The reason why we don't do this in the route iteration is that we don't want to pass this information in every step + // We need to remember that often only the last 1/2 path entries have actually a route attached + *match_count = 0; + for (i = 0; i < temp_match_count; ++i) { + if ((matches[i]->method & method) // matches method/verb + && (matches[i]->flags & HTTP_ROUTE_FLAG_ACTUVE) // route is active + && (!(matches[i]->flags & HTTP_ROUTE_FLAG_CSRF_REQUIRED) // doesn't require csrf + || ((matches[i]->flags & HTTP_ROUTE_FLAG_CSRF_REQUIRED) && has_csrf) // requires csrf & person has csrf + ) + ) { + // We only have to re-assign if the temp result has different elements than the final result + // aka if a route has additional conditions like method, activity, ... + if (*match_count != i) { + matches[*match_count] = matches[i]; + } + + ++(*match_count); + } + } +} + +#endif \ No newline at end of file diff --git a/http/HttpSessionManager.h b/http/HttpSessionManager.h new file mode 100644 index 0000000..35ded36 --- /dev/null +++ b/http/HttpSessionManager.h @@ -0,0 +1,275 @@ +#include "../stdlib/Types.h" +#include "../memory/BufferMemory.h" +#include "../system/Allocator.h" +#include "../stdlib/HashMap.h" +#include "../utils/StringUtils.h" +#include "../utils/RandomUtils.h" + +#define MAX_SESSION_ID_LENGTH 32 + +struct Session { + uint64 last_used; + + // Hash map that contains the offsets into the data memory + // The hash map starts at the + HashMap hm; + + // offset into the data memory + uint32 offset; + uint32 data_size; +}; + +struct SessionManager { + // Hash map used to find sessions by ID + // The hash map contains the offsets into the sessions array + // @todo make random_string() for the session_id + HashMap hm; + + Session *sessions; + + // Data shared accross sessions + byte* session_data; + + size_t count; + size_t capacity; + const char *storage_path; + uint64 last_cleanup; +}; + +SessionManager* session_manager_alloc( + SessionManager* manager, + const char* storage_path, + size_t initial_capacity, + int32 alignment = 64 +) { + size_t internal_buffer_size = ROUND_TO_NEAREST(sizeof(Session) * initial_capacity, 4096); + byte* internal_buffer = (byte *) platform_alloc_aligned(internal_buffer_size, alignment); + + // distribute internal_buffer to: + // session_key_data + // session_data + // hm per session + + manager->sessions = (Session *) internal_buffer; + + manager->count = 0; + manager->capacity = initial_capacity; + manager->storage_path = strdup(storage_path); + manager->last_cleanup = time(NULL); + + ensure_storage_directory_exists(storage_path); + + return manager; +} + +Session* session_manager_create(SessionManager *manager) { + if (manager->count >= manager->capacity) { + // Try to cleanup first + session_manager_cleanup(manager); + + // If still full, move oldest session to disk + if (manager->count >= manager->capacity) { + // Find oldest session + time_t oldest_time = time(NULL); + size_t oldest_index = 0; + + for (size_t i = 0; i < manager->count; i++) { + if (manager->sessions[i].last_used < oldest_time) { + oldest_time = manager->sessions[i].last_used; + oldest_index = i; + } + } + + // Save to disk + save_session_to_disk(manager, &manager->sessions[oldest_index]); + + // Remove from memory + if (oldest_index != manager->count - 1) { + memmove(&manager->sessions[oldest_index], + &manager->sessions[oldest_index + 1], + (manager->count - oldest_index - 1) * sizeof(Session)); + } + manager->count--; + } + } + + // Create new session + Session *session = &manager->sessions[manager->count++]; + session_id_generate(session->id); + session->last_used = time(NULL); + session->data_size = 0; + session->data[0] = '\0'; + + return session; +} + +Session* session_manager_get(SessionManager *manager, const char *session_id) { + // First check memory + for (size_t i = 0; i < manager->count; i++) { + if (strcmp(manager->sessions[i].id, session_id) == 0) { + manager->sessions[i].last_used = time(NULL); + return &manager->sessions[i]; + } + } + + // Not in memory, try disk + Session temp_session; + if (load_session_from_disk(manager, &temp_session, session_id)) { + // Make space if needed + if (manager->count >= manager->capacity) { + session_manager_cleanup(manager); + + if (manager->count >= manager->capacity) { + // Still full, need to move one to disk + time_t oldest_time = time(NULL); + size_t oldest_index = 0; + + for (size_t i = 0; i < manager->count; i++) { + if (manager->sessions[i].last_used < oldest_time) { + oldest_time = manager->sessions[i].last_used; + oldest_index = i; + } + } + + save_session_to_disk(manager, &manager->sessions[oldest_index]); + + if (oldest_index != manager->count - 1) { + memmove(&manager->sessions[oldest_index], + &manager->sessions[oldest_index + 1], + (manager->count - oldest_index - 1) * sizeof(Session)); + } + manager->count--; + } + } + + // Add to memory + Session *session = &manager->sessions[manager->count++]; + memcpy(session, &temp_session, sizeof(Session)); + session->last_used = time(NULL); // Update last used time + + // Remove from disk (it's now in memory) + delete_session_from_disk(manager, session_id); + + return session; + } + + return NULL; // Not found +} + +void session_manager_delete(SessionManager *manager, const char *session_id) { + // Delete from memory + for (size_t i = 0; i < manager->count; i++) { + if (strcmp(manager->sessions[i].id, session_id) == 0) { + if (i != manager->count - 1) { + memmove(&manager->sessions[i], + &manager->sessions[i + 1], + (manager->count - i - 1) * sizeof(Session)); + } + manager->count--; + break; + } + } + + // Delete from disk + delete_session_from_disk(manager, session_id); +} + +void session_manager_cleanup(SessionManager *manager) { + time_t now = time(NULL); + + // Clean memory + size_t i = 0; + while (i < manager->count) { + if (now - manager->sessions[i].last_used > SESSION_EXPIRY_SECONDS) { + // Move to disk before deleting (if we want to keep expired sessions on disk) + // Or just delete completely: + + if (i != manager->count - 1) { + memmove(&manager->sessions[i], + &manager->sessions[i + 1], + (manager->count - i - 1) * sizeof(Session)); + } + manager->count--; + } else { + i++; + } + } + + // Clean disk storage (do this less frequently) + if (now - manager->last_cleanup > (SESSION_EXPIRY_SECONDS / 2)) { + DIR *dir = opendir(manager->storage_path); + if (dir) { + struct dirent *entry; + while ((entry = readdir(dir)) != NULL) { + if (strlen(entry->d_name) == SESSION_ID_LENGTH) { + char path[PATH_MAX]; + snprintf(path, sizeof(path), "%s/%s", manager->storage_path, entry->d_name); + + struct stat st; + if (stat(path, &st) == 0) { + if (now - st.st_mtime > SESSION_EXPIRY_SECONDS) { + unlink(path); + } + } + } + } + closedir(dir); + } + manager->last_cleanup = now; + } +} + +void session_id_generate(char* id) { + uint64 x = time_index(); + int32 id_length = (rand_fast(&x) % 6) + HASH_MAP_MAX_KEY_LENGTH - 5; + + random_string( + "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz@!", + sizeof("0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz@!") - 1, + id, + id_length - 1 + ); +} + +static void save_session_to_disk(SessionManager *manager, const Session *session) { + char path[PATH_MAX]; + snprintf(path, sizeof(path), "%s/%s", manager->storage_path, session->id); + + FILE *file = fopen(path, "wb"); + if (!file) return; + + fwrite(session, sizeof(Session), 1, file); + fclose(file); + + // Update file modification time to match last_used + struct utimbuf times; + times.actime = session->last_used; + times.modtime = session->last_used; + utime(path, ×); +} + +static bool load_session_from_disk(SessionManager *manager, Session *session, const char *session_id) { + char path[PATH_MAX]; + snprintf(path, sizeof(path), "%s/%s", manager->storage_path, session_id); + + FILE *file = fopen(path, "rb"); + if (!file) return false; + + bool success = fread(session, sizeof(Session), 1, file) == 1; + fclose(file); + + return success; +} + +static void delete_session_from_disk(SessionManager *manager, const char *session_id) { + char path[PATH_MAX]; + snprintf(path, sizeof(path), "%s/%s", manager->storage_path, session_id); + unlink(path); +} + +static void ensure_storage_directory_exists(const char *path) { + struct stat st = {0}; + if (stat(path, &st) == -1) { + mkdir(path, 0700); + } +} \ No newline at end of file diff --git a/http/HttpStatusCode.h b/http/HttpStatusCode.h new file mode 100644 index 0000000..e7d0c81 --- /dev/null +++ b/http/HttpStatusCode.h @@ -0,0 +1,194 @@ +/** + * Jingga + * + * @copyright Jingga + * @license OMS License 2.0 + * @version 1.0.0 + * @link https://jingga.app + */ +#ifndef COMS_JINGGA_HTTP_STATUS_CODE_H +#define COMS_JINGGA_HTTP_STATUS_CODE_H + +#include "../stdlib/Types.h" + +enum HttpStatusCode : uint16 { + HTTP_STATUS_CODE_100 = 100, + HTTP_STATUS_CODE_101 = 101, + HTTP_STATUS_CODE_102 = 102, + HTTP_STATUS_CODE_200 = 200, + HTTP_STATUS_CODE_201 = 201, + HTTP_STATUS_CODE_202 = 202, + HTTP_STATUS_CODE_203 = 203, + HTTP_STATUS_CODE_204 = 204, + HTTP_STATUS_CODE_205 = 205, + HTTP_STATUS_CODE_206 = 206, + HTTP_STATUS_CODE_207 = 207, + HTTP_STATUS_CODE_300 = 300, + HTTP_STATUS_CODE_301 = 301, + HTTP_STATUS_CODE_302 = 302, + HTTP_STATUS_CODE_303 = 303, + HTTP_STATUS_CODE_304 = 304, + HTTP_STATUS_CODE_305 = 305, + HTTP_STATUS_CODE_306 = 306, + HTTP_STATUS_CODE_307 = 307, + HTTP_STATUS_CODE_308 = 308, + HTTP_STATUS_CODE_400 = 400, + HTTP_STATUS_CODE_401 = 401, + HTTP_STATUS_CODE_402 = 402, + HTTP_STATUS_CODE_403 = 403, + HTTP_STATUS_CODE_404 = 404, + HTTP_STATUS_CODE_405 = 405, + HTTP_STATUS_CODE_406 = 406, + HTTP_STATUS_CODE_407 = 407, + HTTP_STATUS_CODE_408 = 408, + HTTP_STATUS_CODE_409 = 409, + HTTP_STATUS_CODE_410 = 410, + HTTP_STATUS_CODE_411 = 411, + HTTP_STATUS_CODE_412 = 412, + HTTP_STATUS_CODE_413 = 413, + HTTP_STATUS_CODE_414 = 414, + HTTP_STATUS_CODE_415 = 415, + HTTP_STATUS_CODE_416 = 416, + HTTP_STATUS_CODE_417 = 417, + HTTP_STATUS_CODE_421 = 421, + HTTP_STATUS_CODE_422 = 422, + HTTP_STATUS_CODE_423 = 423, + HTTP_STATUS_CODE_424 = 424, + HTTP_STATUS_CODE_425 = 425, + HTTP_STATUS_CODE_426 = 426, + HTTP_STATUS_CODE_428 = 428, + HTTP_STATUS_CODE_429 = 429, + HTTP_STATUS_CODE_431 = 431, + HTTP_STATUS_CODE_451 = 451, + HTTP_STATUS_CODE_500 = 500, + HTTP_STATUS_CODE_501 = 501, + HTTP_STATUS_CODE_502 = 502, + HTTP_STATUS_CODE_503 = 503, + HTTP_STATUS_CODE_504 = 504, + HTTP_STATUS_CODE_505 = 505, + HTTP_STATUS_CODE_506 = 506, + HTTP_STATUS_CODE_507 = 507, + HTTP_STATUS_CODE_508 = 508, + HTTP_STATUS_CODE_509 = 509, + HTTP_STATUS_CODE_510 = 510, + HTTP_STATUS_CODE_511 = 511, +}; + +const char* http_status_text(HttpStatusCode code) { + switch (code) { + case HTTP_STATUS_CODE_100: + return "Continue"; + case HTTP_STATUS_CODE_101: + return "Switching Protocols"; + case HTTP_STATUS_CODE_102: + return "Processing"; + case HTTP_STATUS_CODE_200: + return "OK"; + case HTTP_STATUS_CODE_201: + return "Created"; + case HTTP_STATUS_CODE_202: + return "Accepted"; + case HTTP_STATUS_CODE_203: + return "Non-Authoritative Information"; + case HTTP_STATUS_CODE_204: + return "No Content"; + case HTTP_STATUS_CODE_205: + return "Reset Content"; + case HTTP_STATUS_CODE_206: + return "Partial Content"; + case HTTP_STATUS_CODE_207: + return "Multi-Status"; + case HTTP_STATUS_CODE_300: + return "Multiple Choices"; + case HTTP_STATUS_CODE_301: + return "Moved Permanently"; + case HTTP_STATUS_CODE_302: + return "Found"; + case HTTP_STATUS_CODE_303: + return "See Other"; + case HTTP_STATUS_CODE_304: + return "Not Modified"; + case HTTP_STATUS_CODE_305: + return "Use Proxy"; + case HTTP_STATUS_CODE_306: + return "Switch Proxy"; + case HTTP_STATUS_CODE_307: + return "Temporary Redirect"; + case HTTP_STATUS_CODE_400: + return "Bad Request"; + case HTTP_STATUS_CODE_401: + return "Unauthorized"; + case HTTP_STATUS_CODE_402: + return "Payment Required"; + case HTTP_STATUS_CODE_403: + return "Forbidden"; + case HTTP_STATUS_CODE_404: + return "Not Found"; + case HTTP_STATUS_CODE_405: + return "Method Not Allowed"; + case HTTP_STATUS_CODE_406: + return "Not Acceptable"; + case HTTP_STATUS_CODE_407: + return "Proxy Authentication Required"; + case HTTP_STATUS_CODE_408: + return "Request Timeout"; + case HTTP_STATUS_CODE_409: + return "Conflict"; + case HTTP_STATUS_CODE_410: + return "Gone"; + case HTTP_STATUS_CODE_411: + return "Length Required"; + case HTTP_STATUS_CODE_412: + return "Precondition Failed"; + case HTTP_STATUS_CODE_413: + return "Request Entity Too Large"; + case HTTP_STATUS_CODE_414: + return "Request-URI Too Long"; + case HTTP_STATUS_CODE_415: + return "Unsupported Media Type"; + case HTTP_STATUS_CODE_416: + return "Requested Range Not Satisfiable"; + case HTTP_STATUS_CODE_417: + return "Expectation Failed"; + case HTTP_STATUS_CODE_421: + return "Misdirected Request"; + case HTTP_STATUS_CODE_422: + return "Unprocessable Entity"; + case HTTP_STATUS_CODE_423: + return "Locked"; + case HTTP_STATUS_CODE_424: + return "Failed Dependency"; + case HTTP_STATUS_CODE_425: + return "Unordered Collection"; + case HTTP_STATUS_CODE_426: + return "Upgrade Required"; + case HTTP_STATUS_CODE_431: + return "Request Header Fields Too Large"; + case HTTP_STATUS_CODE_500: + return "Internal Server Error"; + case HTTP_STATUS_CODE_501: + return "Not Implemented"; + case HTTP_STATUS_CODE_502: + return "Bad Gateway"; + case HTTP_STATUS_CODE_503: + return "Service Unavailable"; + case HTTP_STATUS_CODE_504: + return "Gateway Timeout"; + case HTTP_STATUS_CODE_505: + return "HTTP Version Not Supported"; + case HTTP_STATUS_CODE_506: + return "Variant Also Negotiates"; + case HTTP_STATUS_CODE_507: + return "Insufficient Storage"; + case HTTP_STATUS_CODE_509: + return "Bandwidth Limit Exceeded"; + case HTTP_STATUS_CODE_510: + return "Not Extended"; + case HTTP_STATUS_CODE_511: + return "Network Authentication Required"; + default: + UNREACHABLE(); + } +} + +#endif \ No newline at end of file diff --git a/http/HttpUri.h b/http/HttpUri.h new file mode 100644 index 0000000..8aad91d --- /dev/null +++ b/http/HttpUri.h @@ -0,0 +1,31 @@ +/** + * Jingga + * + * @copyright Jingga + * @license OMS License 2.0 + * @version 1.0.0 + * @link https://jingga.app + */ +#ifndef COMS_JINGGA_HTTP_URI_H +#define COMS_JINGGA_HTTP_URI_H + +#include "../stdlib/Types.h" + +// WARNING: Be careful when changing order, members and types +// The current configuration is carefully chosen (see below) +struct HttpUri { + byte path_offset; + byte path_length; + + uint16 parameter_offset; + uint16 parementers_length; + + // A parameter consists of 2 values: n-th value = length; n+1-th value = offset + uint16 parementer_array_offset; + byte parameter_array_count; + byte fragment_length; + uint16 fragment_offset; + + uint16 port; +}; +#endif \ No newline at end of file diff --git a/log/Log.h b/log/Log.h index 0d6a085..bdce232 100755 --- a/log/Log.h +++ b/log/Log.h @@ -159,7 +159,7 @@ void log(const char* str, const char* file, const char* function, int32 line) return; } - size_t len = str_length(str); + int32 len = str_length(str); while (len > 0) { LogMessage* msg = (LogMessage *) log_get_memory(); @@ -171,7 +171,7 @@ void log(const char* str, const char* file, const char* function, int32 line) msg->time = system_time(); msg->newline = '\n'; - int32 message_length = (int32) OMS_MIN(MAX_LOG_LENGTH - sizeof(LogMessage) - 1, len); + int32 message_length = (int32) OMS_MIN((int32) (MAX_LOG_LENGTH - sizeof(LogMessage) - 1), len); memcpy(msg->message, str, message_length); msg->message[message_length] = '\0'; diff --git a/log/PerformanceProfiler.h b/log/PerformanceProfiler.h index 41ba6a0..d312b3a 100755 --- a/log/PerformanceProfiler.h +++ b/log/PerformanceProfiler.h @@ -28,6 +28,7 @@ PROFILE_BUFFER_ALLOC, PROFILE_CHUNK_ALLOC, PROFILE_RING_ALLOC, + PROFILE_THREAD_POOL_ALLOC, PROFILE_CMD_ITERATE, PROFILE_CMD_FONT_LOAD_SYNC, PROFILE_CMD_SHADER_LOAD_SYNC, @@ -154,7 +155,7 @@ struct PerformanceProfiler { if (this->auto_log) { if (this->info_msg && this->info_msg[0]) { LOG_2( - "-PERF %s (%s): %l cycles", + "-PERF %s (%s): %n cycles", { {LOG_DATA_CHAR_STR, (void *) perf->name}, {LOG_DATA_CHAR_STR, (void *) this->info_msg}, @@ -163,7 +164,7 @@ struct PerformanceProfiler { ); } else { LOG_2( - "-PERF %s: %l cycles", + "-PERF %s: %n cycles", { {LOG_DATA_CHAR_STR, (void *) perf->name}, {LOG_DATA_INT64, (void *) &perf->total_cycle}, diff --git a/memory/ChunkMemory.h b/memory/ChunkMemory.h index 22e3f90..1c22398 100755 --- a/memory/ChunkMemory.h +++ b/memory/ChunkMemory.h @@ -26,7 +26,6 @@ struct ChunkMemory { byte* memory; - // @question Why are we making the count 64 bit? is this really realistically possible? uint64 size; int32 last_pos; uint32 count; @@ -35,7 +34,7 @@ struct ChunkMemory { // length = count // free describes which locations are used and which are free - uint64* free; + alignas(8) uint64* free; }; // INFO: A chunk count of 2^n is recommended for maximum performance @@ -49,18 +48,22 @@ void chunk_alloc(ChunkMemory* buf, uint32 count, uint32 chunk_size, int32 alignm chunk_size = ROUND_TO_NEAREST(chunk_size, alignment); + uint64 size = count * chunk_size + + sizeof(uint64) * CEIL_DIV(count, alignment) // free + + alignment * 2; // overhead for alignment + buf->memory = alignment < 2 - ? (byte *) platform_alloc(count * chunk_size + sizeof(uint64) * CEIL_DIV(count, 64)) - : (byte *) platform_alloc_aligned(count * chunk_size + sizeof(uint64) * CEIL_DIV(count, 64), alignment); + ? (byte *) platform_alloc(size) + : (byte *) platform_alloc_aligned(size, alignment); buf->count = count; - buf->size = count * chunk_size + sizeof(uint64) * CEIL_DIV(count, 64); + buf->size = size; buf->chunk_size = chunk_size; buf->last_pos = -1; buf->alignment = alignment; // @question Could it be beneficial to have this before the element data? - buf->free = (uint64 *) (buf->memory + count * chunk_size); + buf->free = (uint64 *) ROUND_TO_NEAREST((uintptr_t) (buf->memory + count * chunk_size), alignment); memset(buf->memory, 0, buf->size); @@ -75,10 +78,14 @@ void chunk_init(ChunkMemory* buf, BufferMemory* data, uint32 count, uint32 chunk chunk_size = ROUND_TO_NEAREST(chunk_size, alignment); - buf->memory = buffer_get_memory(data, count * chunk_size + sizeof(uint64) * CEIL_DIV(count, 64)); + uint64 size = count * chunk_size + + sizeof(uint64) * CEIL_DIV(count, alignment) // free + + alignment * 2; // overhead for alignment + + buf->memory = buffer_get_memory(data, size); buf->count = count; - buf->size = count * chunk_size + sizeof(uint64) * CEIL_DIV(count, 64); + buf->size = size; buf->chunk_size = chunk_size; buf->last_pos = -1; buf->alignment = alignment; @@ -86,7 +93,7 @@ void chunk_init(ChunkMemory* buf, BufferMemory* data, uint32 count, uint32 chunk // @question Could it be beneficial to have this before the element data? // On the other hand the way we do it right now we never have to move past the free array since it is at the end // On another hand we could by accident overwrite the values in free if we are not careful - buf->free = (uint64 *) (buf->memory + count * chunk_size); + buf->free = (uint64 *) ROUND_TO_NEAREST((uintptr_t) (buf->memory + count * chunk_size), 64); DEBUG_MEMORY_SUBREGION((uintptr_t) buf->memory, buf->size); } @@ -99,11 +106,15 @@ void chunk_init(ChunkMemory* buf, byte* data, uint32 count, uint32 chunk_size, i chunk_size = ROUND_TO_NEAREST(chunk_size, alignment); + uint64 size = count * chunk_size + + sizeof(uint64) * CEIL_DIV(count, alignment) // free + + alignment * 2; // overhead for alignment + // @bug what if an alignment is defined? buf->memory = data; buf->count = count; - buf->size = count * chunk_size + sizeof(uint64) * CEIL_DIV(count, 64); + buf->size = size; buf->chunk_size = chunk_size; buf->last_pos = -1; buf->alignment = alignment; @@ -111,7 +122,7 @@ void chunk_init(ChunkMemory* buf, byte* data, uint32 count, uint32 chunk_size, i // @question Could it be beneficial to have this before the element data? // On the other hand the way we do it right now we never have to move past the free array since it is at the end // On another hand we could by accident overwrite the values in free if we are not careful - buf->free = (uint64 *) (buf->memory + count * chunk_size); + buf->free = (uint64 *) ROUND_TO_NEAREST((uintptr_t) (buf->memory + count * chunk_size), alignment); DEBUG_MEMORY_SUBREGION((uintptr_t) buf->memory, buf->size); } @@ -131,13 +142,13 @@ void chunk_free(ChunkMemory* buf) buf->memory = NULL; } -inline +FORCE_INLINE uint32 chunk_id_from_memory(const ChunkMemory* buf, const byte* pos) noexcept { return (uint32) ((uintptr_t) pos - (uintptr_t) buf->memory) / buf->chunk_size; } inline -byte* chunk_get_element(ChunkMemory* buf, uint64 element, bool zeroed = false) noexcept +byte* chunk_get_element(ChunkMemory* buf, uint32 element, bool zeroed = false) noexcept { if (element >= buf->count) { return NULL; @@ -155,6 +166,39 @@ byte* chunk_get_element(ChunkMemory* buf, uint64 element, bool zeroed = false) n return offset; } +int32 chunk_get_unset(uint64* state, uint32 state_count, int32 start_index = 0) { + if ((uint32) start_index >= state_count) { + start_index = 0; + } + + uint32 free_index = start_index / 64; + uint32 bit_index = start_index & 63; + + // Check standard simple solution + if (!IS_BIT_SET_64_R2L(state[free_index], bit_index)) { + state[free_index] |= (1ULL << bit_index); + + return free_index * 64 + bit_index; + } + + for (uint32 i = 0; i < state_count; ++i) { + if (state[free_index] != 0xFFFFFFFFFFFFFFFF) { + bit_index = compiler_find_first_bit_r2l(~state[free_index]); + state[free_index] |= (1ULL << bit_index); + + return free_index * 64 + bit_index; + } + + ++free_index; + if (free_index * 64 >= state_count) { + free_index = 0; + } + } + + return -1; +} + +inline int32 chunk_reserve(ChunkMemory* buf, uint32 elements = 1) noexcept { if ((uint32) (buf->last_pos + 1) >= buf->count) { @@ -265,15 +309,13 @@ int32 chunk_reserve(ChunkMemory* buf, uint32 elements = 1) noexcept inline void chunk_free_element(ChunkMemory* buf, uint64 free_index, int32 bit_index) noexcept { - DEBUG_MEMORY_DELETE((uintptr_t) (buf->memory + (free_index * 64 + bit_index) * buf->chunk_size), buf->chunk_size); buf->free[free_index] &= ~(1ULL << bit_index); + DEBUG_MEMORY_DELETE((uintptr_t) (buf->memory + (free_index * 64 + bit_index) * buf->chunk_size), buf->chunk_size); } inline void chunk_free_elements(ChunkMemory* buf, uint64 element, uint32 element_count = 1) noexcept { - DEBUG_MEMORY_DELETE((uintptr_t) (buf->memory + element * buf->chunk_size), buf->chunk_size); - uint64 free_index = element / 64; uint32 bit_index = element & 63; @@ -295,6 +337,8 @@ void chunk_free_elements(ChunkMemory* buf, uint64 element, uint32 element_count ++free_index; bit_index = 0; } + + DEBUG_MEMORY_DELETE((uintptr_t) (buf->memory + element * buf->chunk_size), buf->chunk_size); } inline diff --git a/memory/DataPool.h b/memory/DataPool.h new file mode 100644 index 0000000..5521f1e --- /dev/null +++ b/memory/DataPool.h @@ -0,0 +1,164 @@ +/** + * Jingga + * + * @copyright Jingga + * @license OMS License 2.0 + * @version 1.0.0 + * @link https://jingga.app + */ +#ifndef COMS_MEMORY_DATA_POOL_H +#define COMS_MEMORY_DATA_POOL_H + +#include "../stdlib/Types.h" +#include "ChunkMemory.h" + +// WARNING: Structure needs to be the same as RingMemory +struct DataPool { + byte* memory; + + uint64 size; + uint32 last_pos; + uint32 count; + uint32 chunk_size; + int32 alignment; + + // length = count + // free describes which locations are used and which are free + alignas(8) uint64* free; + + // Chunk implementation ends here + // This is a bit field that specifies which elements in the data pool are currently in use + alignas(8) uint64* used; +}; + +// INFO: A chunk count of 2^n is recommended for maximum performance +inline +void pool_alloc(DataPool* buf, uint32 count, uint32 chunk_size, int32 alignment = 64) +{ + ASSERT_SIMPLE(chunk_size); + ASSERT_SIMPLE(count); + PROFILE(PROFILE_CHUNK_ALLOC, NULL, false, true); + LOG_1("Allocating DataPool"); + + chunk_size = ROUND_TO_NEAREST(chunk_size, alignment); + + uint64 size = count * chunk_size + + sizeof(uint64) * CEIL_DIV(count, alignment) // free + + sizeof(uint64) * CEIL_DIV(count, alignment) // used + + alignment * 3; // overhead for alignment + + buf->memory = alignment < 2 + ? (byte *) platform_alloc(size) + : (byte *) platform_alloc_aligned(size, alignment); + + buf->count = count; + buf->size = size; + buf->chunk_size = chunk_size; + buf->last_pos = -1; + buf->alignment = alignment; + + // @question Could it be beneficial to have this before the element data? + buf->free = (uint64 *) ROUND_TO_NEAREST((uintptr_t) (buf->memory + count * chunk_size), alignment); + buf->used = (uint64 *) ROUND_TO_NEAREST((uintptr_t) (buf->free + count), 6alignment4); + + memset(buf->memory, 0, buf->size); + + LOG_1("Allocated DataPool: %n B", {{LOG_DATA_UINT64, &buf->size}}); +} + +inline +void pool_init(DataPool* buf, BufferMemory* data, uint32 count, uint32 chunk_size, int32 alignment = 64) +{ + ASSERT_SIMPLE(chunk_size); + ASSERT_SIMPLE(count); + + chunk_size = ROUND_TO_NEAREST(chunk_size, alignment); + + uint64 size = count * chunk_size + + sizeof(uint64) * CEIL_DIV(count, alignment) // free + + sizeof(uint64) * CEIL_DIV(count, alignment) // used + + alignment * 3; // overhead for alignment + + buf->memory = buffer_get_memory(data, size); + + buf->count = count; + buf->size = size; + buf->chunk_size = chunk_size; + buf->last_pos = -1; + buf->alignment = alignment; + + // @question Could it be beneficial to have this before the element data? + // On the other hand the way we do it right now we never have to move past the free array since it is at the end + // On another hand we could by accident overwrite the values in free if we are not careful + buf->free = (uint64 *) ROUND_TO_NEAREST((uintptr_t) (buf->memory + count * chunk_size), alignment); + buf->used = (uint64 *) ROUND_TO_NEAREST((uintptr_t) (buf->free + count), alignment); + + DEBUG_MEMORY_SUBREGION((uintptr_t) buf->memory, buf->size); +} + +inline +void pool_init(DataPool* buf, byte* data, uint32 count, uint32 chunk_size, int32 alignment = 64) +{ + ASSERT_SIMPLE(chunk_size); + ASSERT_SIMPLE(count); + + chunk_size = ROUND_TO_NEAREST(chunk_size, alignment); + + uint64 size = count * chunk_size + + sizeof(uint64) * CEIL_DIV(count, alignment) // free + + sizeof(uint64) * CEIL_DIV(count, alignment) // used + + alignment * 3; // overhead for alignment + + // @bug what if an alignment is defined? + buf->memory = data; + + buf->count = count; + buf->size = size; + buf->chunk_size = chunk_size; + buf->last_pos = -1; + buf->alignment = alignment; + + // @question Could it be beneficial to have this before the element data? + // On the other hand the way we do it right now we never have to move past the free array since it is at the end + // On another hand we could by accident overwrite the values in free if we are not careful + buf->free = (uint64 *) ROUND_TO_NEAREST((uintptr_t) (buf->memory + count * chunk_size), alignment); + buf->used = (uint64 *) ROUND_TO_NEAREST((uintptr_t) (buf->free + count), alignment); + + DEBUG_MEMORY_SUBREGION((uintptr_t) buf->memory, buf->size); +} + +FORCE_INLINE +void pool_free(DataPool* buf) noexcept +{ + chunk_free((ChunkMemory *) buf); +} + +FORCE_INLINE +int32 pool_reserve(DataPool* buf, uint32 elements = 1) noexcept +{ + return chunk_reserve((ChunkMemory *) buf, elements); +} + +FORCE_INLINE +byte* pool_get_element(DataPool* buf, uint64 element, bool zeroed = false) noexcept +{ + return chunk_get_element((ChunkMemory *) buf, element, zeroed); +} + +// Find a unused/unlocked element in the data pool +FORCE_INLINE +int32 pool_get_unused(DataPool* buf, int32 start_index = 0) noexcept +{ + return chunk_get_unset(buf->used, buf->count, start_index); +} + +// Release an element to be used by someone else +inline +void pool_release(DataPool* buf, int32 element) noexcept +{ + uint32 free_index = element / 64; + uint32 bit_index = element & 63; + buf->used[free_index] |= (1ULL << bit_index); +} + +#endif \ No newline at end of file diff --git a/memory/ThreadedChunkMemory.h b/memory/ThreadedChunkMemory.h index b02c6df..b02e55f 100755 --- a/memory/ThreadedChunkMemory.h +++ b/memory/ThreadedChunkMemory.h @@ -9,9 +9,9 @@ #ifndef COMS_MEMORY_THREADED_CHUNK_MEMORY_H #define COMS_MEMORY_THREADED_CHUNK_MEMORY_H -#include #include "../stdlib/Types.h" #include "../thread/Thread.h" +#include "ChunkMemory.h" struct ThreadedChunkMemory { byte* memory; @@ -24,14 +24,276 @@ struct ThreadedChunkMemory { // length = count // free describes which locations are used and which are free - uint64* free; + alignas(8) atomic_64 uint64* free; // Chunk implementation ends here // The completeness indicates if the data is completely written to - uint64* completeness; + alignas(8) atomic_64 uint64* completeness; - coms_pthread_mutex_t mutex; - coms_pthread_cond_t cond; + mutex lock; }; +// INFO: A chunk count of 2^n is recommended for maximum performance +inline +void thrd_chunk_alloc(ThreadedChunkMemory* buf, uint32 count, uint32 chunk_size, int32 alignment = 64) +{ + ASSERT_SIMPLE(chunk_size); + ASSERT_SIMPLE(count); + PROFILE(PROFILE_CHUNK_ALLOC, NULL, false, true); + LOG_1("Allocating ChunkMemory"); + + chunk_size = ROUND_TO_NEAREST(chunk_size, alignment); + + uint64 size = count * chunk_size + + sizeof(uint64) * CEIL_DIV(count, alignment) // free + + sizeof(uint64) * CEIL_DIV(count, alignment) // completeness + + alignment * 3; // overhead for alignment + + buf->memory = alignment < 2 + ? (byte *) platform_alloc(size) + : (byte *) platform_alloc_aligned(size, alignment); + + buf->count = count; + buf->size = size; + buf->chunk_size = chunk_size; + buf->last_pos = -1; + buf->alignment = alignment; + + // @question Could it be beneficial to have this before the element data? + buf->free = (uint64 *) ROUND_TO_NEAREST((uintptr_t) (buf->memory + count * chunk_size), alignment); + buf->completeness = (uint64 *) ROUND_TO_NEAREST((uintptr_t) (buf->free + count), alignment); + + memset(buf->memory, 0, buf->size); + mutex_init(&buf->lock, NULL); + + LOG_1("Allocated ChunkMemory: %n B", {{LOG_DATA_UINT64, &buf->size}}); +} + +inline +void thrd_chunk_init(ThreadedChunkMemory* buf, BufferMemory* data, uint32 count, uint32 chunk_size, int32 alignment = 64) +{ + ASSERT_SIMPLE(chunk_size); + ASSERT_SIMPLE(count); + + chunk_size = ROUND_TO_NEAREST(chunk_size, alignment); + + uint64 size = count * chunk_size + + sizeof(uint64) * CEIL_DIV(count, alignment) // free + + sizeof(uint64) * CEIL_DIV(count, alignment) // completeness + + alignment * 3; // overhead for alignment + + buf->memory = buffer_get_memory(data, size); + + buf->count = count; + buf->size = size; + buf->chunk_size = chunk_size; + buf->last_pos = -1; + buf->alignment = alignment; + + // @question Could it be beneficial to have this before the element data? + // On the other hand the way we do it right now we never have to move past the free array since it is at the end + // On another hand we could by accident overwrite the values in free if we are not careful + buf->free = (uint64 *) ROUND_TO_NEAREST((uintptr_t) (buf->memory + count * chunk_size), alignment); + buf->completeness = (uint64 *) ROUND_TO_NEAREST((uintptr_t) (buf->free + count), alignment); + + mutex_init(&buf->lock, NULL); + + DEBUG_MEMORY_SUBREGION((uintptr_t) buf->memory, buf->size); +} + +inline +void thrd_chunk_init(ThreadedChunkMemory* buf, byte* data, uint32 count, uint32 chunk_size, int32 alignment = 64) +{ + ASSERT_SIMPLE(chunk_size); + ASSERT_SIMPLE(count); + + chunk_size = ROUND_TO_NEAREST(chunk_size, alignment); + + uint64 size = count * chunk_size + + sizeof(uint64) * CEIL_DIV(count, alignment) // free + + sizeof(uint64) * CEIL_DIV(count, alignment) // completeness + + alignment * 3; // overhead for alignment + + // @bug what if an alignment is defined? + buf->memory = data; + + buf->count = count; + buf->size = size; + buf->chunk_size = chunk_size; + buf->last_pos = -1; + buf->alignment = alignment; + + // @question Could it be beneficial to have this before the element data? + // On the other hand the way we do it right now we never have to move past the free array since it is at the end + // On another hand we could by accident overwrite the values in free if we are not careful + buf->free = (uint64 *) ROUND_TO_NEAREST((uintptr_t) (buf->memory + count * chunk_size), alignment); + buf->completeness = (uint64 *) ROUND_TO_NEAREST((uintptr_t) (buf->free + count), alignment); + + mutex_init(&buf->lock, NULL); + + DEBUG_MEMORY_SUBREGION((uintptr_t) buf->memory, buf->size); +} + +FORCE_INLINE +void thrd_chunk_free(ThreadedChunkMemory* buf) noexcept +{ + chunk_free((ChunkMemory *) buf); + mutex_destroy(&buf->lock); +} + +FORCE_INLINE +uint32 thrd_chunk_id_from_memory(const ThreadedChunkMemory* buf, const byte* pos) noexcept +{ + return chunk_id_from_memory((ChunkMemory *) buf, pos); +} + +FORCE_INLINE +byte* thrd_chunk_get_element(ThreadedChunkMemory* buf, uint32 element, bool zeroed = false) noexcept +{ + return chunk_get_element((ChunkMemory *) buf, element, zeroed); +} + +void thrd_chunk_set_unset(uint32 element, atomic_64 uint64* state) { + uint32 free_index = element / 64; + uint32 bit_index = element & 63; + + alignas(8) atomic_64 uint64* target = &state[free_index]; + uint64 old_value, new_value; + + do { + old_value = atomic_get_relaxed(target); + new_value = old_value | (1ULL << bit_index); + + if (old_value == new_value) { + return; + } + } while (!atomic_compare_exchange_strong_release(target, &old_value, new_value)); +} + +int32 thrd_chunk_get_unset(ThreadedChunkMemory* buf, atomic_64 uint64* state, int32 start_index = 0) { + if ((uint32) start_index >= buf->count) { + start_index = 0; + } + + uint32 free_index = start_index / 64; + uint32 bit_index = start_index & 63; + + if (!IS_BIT_SET_64_R2L(state[free_index], bit_index)) { + uint64 expected = atomic_get_relaxed(&state[free_index]); + expected &= ~(1ULL << bit_index); + uint64 desired = expected | (1ULL << bit_index); + + if (atomic_compare_exchange_strong_release(&state[free_index], &expected, desired)) { + return free_index * 64 + bit_index; + } + } + + for (uint32 i = 0; i < buf->count; ++i) { + if (state[free_index] != 0xFFFFFFFFFFFFFFFF) { + // We will try 3 times, usually this would be a while but since compiler_find_... doesn't use atomics + // we might get the same index over and over again + for (uint32 j = 0; j < 3; ++j) { + bit_index = compiler_find_first_bit_r2l(~state[free_index]); + + uint64 expected = atomic_get_relaxed(&state[free_index]); + expected &= ~(1ULL << bit_index); + uint64 desired = expected | (1ULL << bit_index); + + if (atomic_compare_exchange_strong_release(&state[free_index], &expected, desired)) { + return free_index * 64 + bit_index; + } + } + } + + ++free_index; + if (free_index * 64 >= buf->count) { + free_index = 0; + } + } + + return -1; +} + +inline +int32 thrd_chunk_reserve(ThreadedChunkMemory* buf, uint32 elements = 1) noexcept +{ + mutex_lock(&buf->lock); + int32 free_element = chunk_reserve((ChunkMemory *) buf, elements); + mutex_unlock(&buf->lock); + + return free_element; +} + +inline +void thrd_chunk_free_element(ThreadedChunkMemory* buf, uint64 free_index, int32 bit_index) noexcept +{ + alignas(8) atomic_64 uint64* target = &buf->free[free_index]; + uint64 old_value, new_value; + + do { + old_value = atomic_get_relaxed(target); + new_value = old_value | (1ULL << bit_index); + + if (old_value == new_value) { + return; + } + } while (!atomic_compare_exchange_strong_release(target, &old_value, new_value)); + + DEBUG_MEMORY_DELETE((uintptr_t) (buf->memory + (free_index * 64 + bit_index) * buf->chunk_size), buf->chunk_size); +} + +inline +void thrd_chunk_free_elements(ThreadedChunkMemory* buf, uint64 element, uint32 element_count = 1) noexcept +{ + uint64 free_index = element / 64; + uint32 bit_index = element & 63; + + if (element == 1) { + thrd_chunk_free_element(buf, free_index, bit_index); + return; + } + + alignas(8) atomic_64 uint64* target; + uint64 old_value, new_value; + + while (element_count > 0) { + // Calculate the number of bits we can clear in the current 64-bit block + uint32 bits_in_current_block = OMS_MIN(64 - bit_index, element_count); + + // Create a mask to clear the bits + uint64 mask = ((1ULL << bits_in_current_block) - 1) << bit_index; + + target = &buf->free[free_index]; + + do { + old_value = atomic_get_relaxed(target); + new_value = old_value & ~mask; + + if (old_value == new_value) { + break; + } + } while (!atomic_compare_exchange_strong_release(target, &old_value, new_value)); + + // Update the counters and indices + element_count -= bits_in_current_block; + ++free_index; + bit_index = 0; + } + + DEBUG_MEMORY_DELETE((uintptr_t) (buf->memory + element * buf->chunk_size), buf->chunk_size); +} + +inline +int32 thrd_chunk_resize(ThreadedChunkMemory* buf, int32 element_id, uint32 elements_old, uint32 elements_new) noexcept +{ + byte* data = thrd_chunk_get_element(buf, element_id); + + int32 chunk_id = thrd_chunk_reserve(buf, elements_new); + byte* data_new = thrd_chunk_get_element(buf, chunk_id); + + memcpy(data_new, data, buf->chunk_size * elements_old); + + return chunk_id; +} + #endif \ No newline at end of file diff --git a/memory/ThreadedDataPool.h b/memory/ThreadedDataPool.h new file mode 100644 index 0000000..55f3955 --- /dev/null +++ b/memory/ThreadedDataPool.h @@ -0,0 +1,93 @@ +/** + * Jingga + * + * @copyright Jingga + * @license OMS License 2.0 + * @version 1.0.0 + * @link https://jingga.app + */ +#ifndef COMS_MEMORY_THREADED_DATA_POOL_H +#define COMS_MEMORY_THREADED_DATA_POOL_H + +#include "../stdlib/Types.h" +#include "DataPool.h" +#include "ThreadedChunkMemory.h" + +/** + * WARNING: This implementation assumes the initial setup (insertion of elements) is synchronous + * Only the retrieval of unused elements and the release are thread protected + */ + +// WARNING: Structure needs to be the same as RingMemory +struct ThreadedDataPool { + byte* memory; + + uint64 size; + uint32 last_pos; + uint32 count; + uint32 chunk_size; + int32 alignment; + + // length = count + // free describes which locations are used and which are free + alignas(8) atomic_64 uint64* free; + + // Chunk implementation ends here + // This is a bit field that specifies which elements in the data pool are currently in use + alignas(8) atomic_64 uint64* used; + + mutex mutex; +}; + +// INFO: A chunk count of 2^n is recommended for maximum performance +FORCE_INLINE +void thrd_pool_alloc(ThreadedDataPool* buf, uint32 count, uint32 chunk_size, int32 alignment = 64) +{ + pool_alloc((DataPool *) buf, count, chunk_size, alignment); +} + +FORCE_INLINE +void thrd_pool_init(ThreadedDataPool* buf, BufferMemory* data, uint32 count, uint32 chunk_size, int32 alignment = 64) +{ + pool_init((DataPool *) buf, data, count, chunk_size, alignment); +} + +FORCE_INLINE +void thrd_pool_init(ThreadedDataPool* buf, byte* data, uint32 count, uint32 chunk_size, int32 alignment = 64) +{ + pool_init((DataPool *) buf, data, count, chunk_size, alignment); +} + +FORCE_INLINE +void thrd_pool_free(ThreadedDataPool* buf) noexcept +{ + chunk_free((ChunkMemory *) buf); +} + +FORCE_INLINE +int32 thrd_pool_reserve(ThreadedDataPool* buf, uint32 elements = 1) noexcept +{ + return chunk_reserve((ChunkMemory *) buf, elements); +} + +FORCE_INLINE +byte* thrd_pool_get_element(ThreadedDataPool* buf, uint64 element, bool zeroed = false) noexcept +{ + return chunk_get_element((ChunkMemory *) buf, element, zeroed); +} + +// Find a unused/unlocked element in the data pool +FORCE_INLINE +int32 thrd_pool_get_unused(ThreadedDataPool* buf, int32 start_index = 0) noexcept +{ + return thrd_chunk_get_unset((ThreadedChunkMemory *) buf, buf->used, start_index); +} + +// Release an element to be used by someone else +FORCE_INLINE +void thrd_pool_release(ThreadedDataPool* buf, int32 element) noexcept +{ + thrd_chunk_set_unset(element, buf->used); +} + +#endif \ No newline at end of file diff --git a/memory/ThreadedQueue.h b/memory/ThreadedQueue.h index a8599d0..f8f9733 100755 --- a/memory/ThreadedQueue.h +++ b/memory/ThreadedQueue.h @@ -10,6 +10,9 @@ #define COMS_MEMORY_THREADED_QUEUE_H // @todo This is a horrible implementation. Please implement a lock free solution +/** + * WARNING: This implementation is a single-producer, single-consumer (SPSC) implementation + */ #include "../stdlib/Types.h" #include "../utils/Utils.h" @@ -37,11 +40,11 @@ struct ThreadedQueue { // We support both conditional locking and semaphore locking // These values are not initialized and not used unless you use the queue - coms_pthread_mutex_t mutex; - coms_pthread_cond_t cond; + mutex mutex; + mutex_cond cond; - sem_t empty; - sem_t full; + sem empty; + sem full; }; inline @@ -53,11 +56,11 @@ void thrd_queue_alloc(ThreadedQueue* queue, uint32 element_count, uint32 element queue->element_size = element_size; - coms_pthread_mutex_init(&queue->mutex, NULL); + mutex_init(&queue->mutex, NULL); coms_pthread_cond_init(&queue->cond, NULL); - sem_init(&queue->empty, element_count); - sem_init(&queue->full, 0); + coms_sem_init(&queue->empty, element_count); + coms_sem_init(&queue->full, 0); } inline @@ -69,11 +72,11 @@ void thrd_queue_init(ThreadedQueue* queue, BufferMemory* buf, uint32 element_cou queue->element_size = element_size; - coms_pthread_mutex_init(&queue->mutex, NULL); + mutex_init(&queue->mutex, NULL); coms_pthread_cond_init(&queue->cond, NULL); - sem_init(&queue->empty, element_count); - sem_init(&queue->full, 0); + coms_sem_init(&queue->empty, element_count); + coms_sem_init(&queue->full, 0); } inline @@ -85,20 +88,20 @@ void thrd_queue_init(ThreadedQueue* queue, byte* buf, uint32 element_count, uint queue->element_size = element_size; - coms_pthread_mutex_init(&queue->mutex, NULL); + mutex_init(&queue->mutex, NULL); coms_pthread_cond_init(&queue->cond, NULL); - sem_init(&queue->empty, element_count); - sem_init(&queue->full, 0); + coms_sem_init(&queue->empty, element_count); + coms_sem_init(&queue->full, 0); } inline void thrd_queue_free(ThreadedQueue* queue) { ring_free((RingMemory *) queue); - sem_destroy(&queue->empty); - sem_destroy(&queue->full); - coms_pthread_mutex_destroy(&queue->mutex); + coms_sem_destroy(&queue->empty); + coms_sem_destroy(&queue->full); + mutex_destroy(&queue->mutex); coms_pthread_cond_destroy(&queue->cond); } @@ -107,7 +110,7 @@ inline void thrd_queue_enqueue_unique_wait(ThreadedQueue* queue, const byte* data) noexcept { ASSERT_SIMPLE((uint64_t) data % 4 == 0); - coms_pthread_mutex_lock(&queue->mutex); + mutex_lock(&queue->mutex); byte* tail = queue->tail; while (tail != queue->tail) { @@ -115,7 +118,7 @@ void thrd_queue_enqueue_unique_wait(ThreadedQueue* queue, const byte* data) noex // @performance we could probably make this faster since we don't need to compare the entire range if (is_equal(tail, data, queue->element_size) == 0) { - coms_pthread_mutex_unlock(&queue->mutex); + mutex_unlock(&queue->mutex); return; } @@ -131,14 +134,14 @@ void thrd_queue_enqueue_unique_wait(ThreadedQueue* queue, const byte* data) noex memcpy(mem, data, queue->element_size); coms_pthread_cond_signal(&queue->cond); - coms_pthread_mutex_unlock(&queue->mutex); + mutex_unlock(&queue->mutex); } inline void thrd_queue_enqueue_unique(ThreadedQueue* queue, const byte* data) noexcept { ASSERT_SIMPLE((uint64_t) data % 4 == 0); - coms_pthread_mutex_lock(&queue->mutex); + mutex_lock(&queue->mutex); byte* tail = queue->tail; while (tail != queue->tail) { @@ -146,7 +149,7 @@ void thrd_queue_enqueue_unique(ThreadedQueue* queue, const byte* data) noexcept // @performance we could probably make this faster since we don't need to compare the entire range if (is_equal(tail, data, queue->element_size) == 0) { - coms_pthread_mutex_unlock(&queue->mutex); + mutex_unlock(&queue->mutex); return; } @@ -155,7 +158,7 @@ void thrd_queue_enqueue_unique(ThreadedQueue* queue, const byte* data) noexcept } if (!ring_commit_safe((RingMemory *) queue, queue->element_size, queue->alignment)) { - coms_pthread_mutex_unlock(&queue->mutex); + mutex_unlock(&queue->mutex); return; } @@ -164,17 +167,17 @@ void thrd_queue_enqueue_unique(ThreadedQueue* queue, const byte* data) noexcept memcpy(mem, data, queue->element_size); coms_pthread_cond_signal(&queue->cond); - coms_pthread_mutex_unlock(&queue->mutex); + mutex_unlock(&queue->mutex); } // Conditional Lock inline void thrd_queue_enqueue(ThreadedQueue* queue, const byte* data) noexcept { - coms_pthread_mutex_lock(&queue->mutex); + mutex_lock(&queue->mutex); if (!ring_commit_safe((RingMemory *) queue, queue->element_size, queue->alignment)) { - coms_pthread_mutex_unlock(&queue->mutex); + mutex_unlock(&queue->mutex); return; } @@ -183,13 +186,13 @@ void thrd_queue_enqueue(ThreadedQueue* queue, const byte* data) noexcept memcpy(mem, data, queue->element_size); coms_pthread_cond_signal(&queue->cond); - coms_pthread_mutex_unlock(&queue->mutex); + mutex_unlock(&queue->mutex); } inline void thrd_queue_enqueue_wait(ThreadedQueue* queue, const byte* data) noexcept { - coms_pthread_mutex_lock(&queue->mutex); + mutex_lock(&queue->mutex); while (!ring_commit_safe((RingMemory *) queue, queue->element_size, queue->alignment)) { coms_pthread_cond_wait(&queue->cond, &queue->mutex); @@ -199,13 +202,13 @@ void thrd_queue_enqueue_wait(ThreadedQueue* queue, const byte* data) noexcept memcpy(mem, data, queue->element_size); coms_pthread_cond_signal(&queue->cond); - coms_pthread_mutex_unlock(&queue->mutex); + mutex_unlock(&queue->mutex); } inline byte* thrd_queue_enqueue_start_wait(ThreadedQueue* queue) noexcept { - coms_pthread_mutex_lock(&queue->mutex); + mutex_lock(&queue->mutex); while (!ring_commit_safe((RingMemory *) queue, queue->element_size, queue->alignment)) { coms_pthread_cond_wait(&queue->cond, &queue->mutex); @@ -218,7 +221,7 @@ inline void thrd_queue_enqueue_end_wait(ThreadedQueue* queue) noexcept { coms_pthread_cond_signal(&queue->cond); - coms_pthread_mutex_unlock(&queue->mutex); + mutex_unlock(&queue->mutex); } inline @@ -229,9 +232,9 @@ bool thrd_queue_dequeue(ThreadedQueue* queue, byte* data) noexcept } // we do this twice because the first one is very fast but may return a false positive - coms_pthread_mutex_lock(&queue->mutex); + mutex_lock(&queue->mutex); if (queue->head == queue->tail) { - coms_pthread_mutex_unlock(&queue->mutex); + mutex_unlock(&queue->mutex); return false; } @@ -244,25 +247,25 @@ bool thrd_queue_dequeue(ThreadedQueue* queue, byte* data) noexcept ring_move_pointer((RingMemory *) queue, &queue->tail, queue->element_size, queue->alignment); coms_pthread_cond_signal(&queue->cond); - coms_pthread_mutex_unlock(&queue->mutex); + mutex_unlock(&queue->mutex); return true; } inline bool thrd_queue_empty(ThreadedQueue* queue) noexcept { - coms_pthread_mutex_lock(&queue->mutex); + mutex_lock(&queue->mutex); bool is_empty = queue->head == queue->tail; - coms_pthread_mutex_unlock(&queue->mutex); + mutex_unlock(&queue->mutex); return is_empty; } inline bool thrd_queue_full(ThreadedQueue* queue) noexcept { - coms_pthread_mutex_lock(&queue->mutex); + mutex_lock(&queue->mutex); bool is_full = !ring_commit_safe((RingMemory *) queue, queue->element_size, queue->alignment); - coms_pthread_mutex_unlock(&queue->mutex); + mutex_unlock(&queue->mutex); return is_full; } @@ -271,7 +274,7 @@ bool thrd_queue_full(ThreadedQueue* queue) noexcept { inline void thrd_queue_dequeue_wait(ThreadedQueue* queue, byte* data) noexcept { - coms_pthread_mutex_lock(&queue->mutex); + mutex_lock(&queue->mutex); while (queue->head == queue->tail) { coms_pthread_cond_wait(&queue->cond, &queue->mutex); @@ -281,13 +284,13 @@ void thrd_queue_dequeue_wait(ThreadedQueue* queue, byte* data) noexcept ring_move_pointer((RingMemory *) queue, &queue->tail, queue->element_size, queue->alignment); coms_pthread_cond_signal(&queue->cond); - coms_pthread_mutex_unlock(&queue->mutex); + mutex_unlock(&queue->mutex); } inline byte* thrd_queue_dequeue_start_wait(ThreadedQueue* queue) noexcept { - coms_pthread_mutex_lock(&queue->mutex); + mutex_lock(&queue->mutex); while (queue->head == queue->tail) { coms_pthread_cond_wait(&queue->cond, &queue->mutex); @@ -302,104 +305,104 @@ void thrd_queue_dequeue_end_wait(ThreadedQueue* queue) noexcept ring_move_pointer((RingMemory *) queue, &queue->tail, queue->element_size, queue->alignment); coms_pthread_cond_signal(&queue->cond); - coms_pthread_mutex_unlock(&queue->mutex); + mutex_unlock(&queue->mutex); } // Semaphore Lock inline -void thrd_queue_enqueue_sem_wait(ThreadedQueue* queue, const byte* data) noexcept +void thrd_queue_enqueue_coms_sem_wait(ThreadedQueue* queue, const byte* data) noexcept { - sem_wait(&queue->empty); - coms_pthread_mutex_lock(&queue->mutex); + coms_sem_wait(&queue->empty); + mutex_lock(&queue->mutex); byte* mem = ring_get_memory((RingMemory *) queue, queue->element_size, queue->alignment); memcpy(mem, data, queue->element_size); - coms_pthread_mutex_unlock(&queue->mutex); - sem_post(&queue->full); + mutex_unlock(&queue->mutex); + coms_sem_post(&queue->full); } inline -bool thrd_queue_enqueue_sem_timedwait(ThreadedQueue* queue, const byte* data, uint64 wait) noexcept +bool thrd_queue_enqueue_semimedwait(ThreadedQueue* queue, const byte* data, uint64 wait) noexcept { - if (sem_timedwait(&queue->empty, wait)) { + if (semimedwait(&queue->empty, wait)) { return false; } - coms_pthread_mutex_lock(&queue->mutex); + mutex_lock(&queue->mutex); byte* mem = ring_get_memory((RingMemory *) queue, queue->element_size, queue->alignment); memcpy(mem, data, queue->element_size); - coms_pthread_mutex_unlock(&queue->mutex); - sem_post(&queue->full); + mutex_unlock(&queue->mutex); + coms_sem_post(&queue->full); return true; } inline -byte* thrd_queue_enqueue_start_sem_wait(ThreadedQueue* queue) noexcept +byte* thrd_queue_enqueue_start_coms_sem_wait(ThreadedQueue* queue) noexcept { - sem_wait(&queue->empty); - coms_pthread_mutex_lock(&queue->mutex); + coms_sem_wait(&queue->empty); + mutex_lock(&queue->mutex); return ring_get_memory((RingMemory *) queue, queue->element_size, queue->alignment); } inline -void thrd_queue_enqueue_end_sem_wait(ThreadedQueue* queue) noexcept +void thrd_queue_enqueue_end_coms_sem_wait(ThreadedQueue* queue) noexcept { - coms_pthread_mutex_unlock(&queue->mutex); - sem_post(&queue->full); + mutex_unlock(&queue->mutex); + coms_sem_post(&queue->full); } inline -byte* thrd_queue_dequeue_sem_wait(ThreadedQueue* queue, byte* data) noexcept +byte* thrd_queue_dequeue_coms_sem_wait(ThreadedQueue* queue, byte* data) noexcept { - sem_wait(&queue->full); - coms_pthread_mutex_lock(&queue->mutex); + coms_sem_wait(&queue->full); + mutex_lock(&queue->mutex); memcpy(data, queue->tail, queue->element_size); ring_move_pointer((RingMemory *) queue, &queue->tail, queue->element_size, queue->alignment); - coms_pthread_mutex_unlock(&queue->mutex); - sem_post(&queue->empty); + mutex_unlock(&queue->mutex); + coms_sem_post(&queue->empty); } inline -bool thrd_queue_dequeue_sem_timedwait(ThreadedQueue* queue, byte* data, uint64 wait) noexcept +bool thrd_queue_dequeue_semimedwait(ThreadedQueue* queue, byte* data, uint64 wait) noexcept { - if (sem_timedwait(&queue->full, wait)) { + if (semimedwait(&queue->full, wait)) { return false; } - coms_pthread_mutex_lock(&queue->mutex); + mutex_lock(&queue->mutex); memcpy(data, queue->tail, queue->element_size); ring_move_pointer((RingMemory *) queue, &queue->tail, queue->element_size, queue->alignment); - coms_pthread_mutex_unlock(&queue->mutex); - sem_post(&queue->empty); + mutex_unlock(&queue->mutex); + coms_sem_post(&queue->empty); return true; } inline -byte* thrd_queue_dequeue_start_sem_wait(ThreadedQueue* queue) noexcept +byte* thrd_queue_dequeue_start_coms_sem_wait(ThreadedQueue* queue) noexcept { - sem_wait(&queue->full); - coms_pthread_mutex_lock(&queue->mutex); + coms_sem_wait(&queue->full); + mutex_lock(&queue->mutex); return queue->tail; } inline -void thrd_queue_dequeue_end_sem_wait(ThreadedQueue* queue) noexcept +void thrd_queue_dequeue_end_coms_sem_wait(ThreadedQueue* queue) noexcept { ring_move_pointer((RingMemory *) queue, &queue->tail, queue->element_size, queue->alignment); - coms_pthread_mutex_unlock(&queue->mutex); - sem_post(&queue->empty); + mutex_unlock(&queue->mutex); + coms_sem_post(&queue->empty); } #endif \ No newline at end of file diff --git a/memory/ThreadedRingMemory.h b/memory/ThreadedRingMemory.h index 36a911b..6584773 100755 --- a/memory/ThreadedRingMemory.h +++ b/memory/ThreadedRingMemory.h @@ -30,7 +30,7 @@ struct ThreadedRingMemory { int32 alignment; // The ring memory ends here - coms_pthread_mutex_t mutex; + mutex lock; }; // @bug alignment should also include the end point, not just the start @@ -39,36 +39,36 @@ inline void thrd_ring_alloc(ThreadedRingMemory* ring, uint64 size, int32 alignment = 64) { ring_alloc((RingMemory *) ring, size, alignment); - coms_pthread_mutex_init(&ring->mutex, NULL); + mutex_init(&ring->lock, NULL); } inline void thrd_ring_init(ThreadedRingMemory* ring, BufferMemory* buf, uint64 size, int32 alignment = 64) { ring_init((RingMemory *) ring, buf, size, alignment); - coms_pthread_mutex_init(&ring->mutex, NULL); + mutex_init(&ring->lock, NULL); } inline void thrd_ring_init(ThreadedRingMemory* ring, byte* buf, uint64 size, int32 alignment = 64) { ring_init((RingMemory *) ring, buf, size, alignment); - coms_pthread_mutex_init(&ring->mutex, NULL); + mutex_init(&ring->lock, NULL); } inline void thrd_ring_free(ThreadedRingMemory* ring) { ring_free((RingMemory *) ring); - coms_pthread_mutex_destroy(&ring->mutex); + mutex_destroy(&ring->lock); } inline byte* thrd_ring_calculate_position(ThreadedRingMemory* ring, uint64 size, byte aligned = 4) noexcept { - coms_pthread_mutex_lock(&ring->mutex); + mutex_lock(&ring->lock); byte* result = ring_calculate_position((RingMemory *) ring, size, aligned); - coms_pthread_mutex_unlock(&ring->mutex); + mutex_unlock(&ring->lock); return result; } @@ -76,24 +76,24 @@ byte* thrd_ring_calculate_position(ThreadedRingMemory* ring, uint64 size, byte a inline void thrd_ring_reset(ThreadedRingMemory* ring) noexcept { - coms_pthread_mutex_lock(&ring->mutex); + mutex_lock(&ring->lock); ring_reset((RingMemory *) ring); - coms_pthread_mutex_unlock(&ring->mutex); + mutex_unlock(&ring->lock); } // Moves a pointer based on the size you want to consume (new position = after consuming size) void thrd_ring_move_pointer(ThreadedRingMemory* ring, byte** pos, uint64 size, byte aligned = 4) noexcept { - coms_pthread_mutex_lock(&ring->mutex); + mutex_lock(&ring->lock); ring_move_pointer((RingMemory *) ring, pos, size, aligned); - coms_pthread_mutex_unlock(&ring->mutex); + mutex_unlock(&ring->lock); } byte* thrd_ring_get_memory(ThreadedRingMemory* ring, uint64 size, byte aligned = 4, bool zeroed = false) noexcept { - coms_pthread_mutex_lock(&ring->mutex); + mutex_lock(&ring->lock); byte* result = ring_get_memory((RingMemory *) ring, size, aligned, zeroed); - coms_pthread_mutex_unlock(&ring->mutex); + mutex_unlock(&ring->lock); return result; } @@ -101,9 +101,9 @@ byte* thrd_ring_get_memory(ThreadedRingMemory* ring, uint64 size, byte aligned = // Same as ring_get_memory but DOESN'T move the head byte* thrd_ring_get_memory_nomove(ThreadedRingMemory* ring, uint64 size, byte aligned = 4, bool zeroed = false) noexcept { - coms_pthread_mutex_lock(&ring->mutex); + mutex_lock(&ring->lock); byte* result = ring_get_memory_nomove((RingMemory *) ring, size, aligned, zeroed); - coms_pthread_mutex_unlock(&ring->mutex); + mutex_unlock(&ring->lock); return result; } @@ -113,9 +113,9 @@ byte* thrd_ring_get_memory_nomove(ThreadedRingMemory* ring, uint64 size, byte al inline byte* thrd_ring_get_element(ThreadedRingMemory* ring, uint64 element, uint64 size) noexcept { - coms_pthread_mutex_lock(&ring->mutex); + mutex_lock(&ring->lock); byte* result = ring_get_element((RingMemory *) ring, element, size); - coms_pthread_mutex_unlock(&ring->mutex); + mutex_unlock(&ring->lock); return result; } @@ -126,9 +126,9 @@ byte* thrd_ring_get_element(ThreadedRingMemory* ring, uint64 element, uint64 siz inline bool thrd_ring_commit_safe(ThreadedRingMemory* ring, uint64 size, byte aligned = 4) noexcept { - coms_pthread_mutex_lock(&ring->mutex); + mutex_lock(&ring->lock); bool result = ring_commit_safe((RingMemory *) ring, size, aligned); - coms_pthread_mutex_unlock(&ring->mutex); + mutex_unlock(&ring->lock); return result; } @@ -148,9 +148,9 @@ void thrd_ring_force_tail_update(const ThreadedRingMemory* ring) noexcept inline int64 thrd_ring_dump(ThreadedRingMemory* ring, byte* data) noexcept { - coms_pthread_mutex_lock(&ring->mutex); + mutex_lock(&ring->lock); int64 result = ring_dump((RingMemory *) ring, data); - coms_pthread_mutex_unlock(&ring->mutex); + mutex_unlock(&ring->lock); return result; } diff --git a/module/Module.h b/module/Module.h index f696a94..6d59811 100755 --- a/module/Module.h +++ b/module/Module.h @@ -2,7 +2,7 @@ #define COMS_MODULE_H #include "../stdlib/Types.h" -#include "../../GameEngine/system/Library.h" +#include "../../cOMS/system/Library.h" enum ModuleType { MODULE_TYPE_HUD, diff --git a/module/WebModule.h b/module/WebModule.h new file mode 100644 index 0000000..e93b211 --- /dev/null +++ b/module/WebModule.h @@ -0,0 +1,9 @@ +#ifndef COMS_MODULE_WEB_H +#define COMS_MODULE_WEB_H + +#include "../stdlib/Types.h" + +struct WebModule { +}; + +#endif \ No newline at end of file diff --git a/platform/linux/FileUtils.cpp b/platform/linux/FileUtils.cpp index 5f630a2..a0da035 100755 --- a/platform/linux/FileUtils.cpp +++ b/platform/linux/FileUtils.cpp @@ -275,7 +275,7 @@ void file_read(const char* __restrict path, FileBody* __restrict file, RingMemor } ssize_t bytes_read = read(fp, file->content, file->size); - if (bytes_read <= 0 || (size_t) bytes_read != file->size) { + if (bytes_read <= 0) { close(fp); file->content = NULL; file->size = 0; @@ -378,6 +378,25 @@ bool file_write(const char* __restrict path, const FileBody* __restrict file) { return true; } +FileHandle file_read_handle(const char* path) { + FileHandle fd; + char full_path[MAX_PATH]; + + if (*path == '.') { + relative_to_absolute(path, full_path); + fd = open(full_path, O_RDONLY); + } else { + fd = open(path, O_RDONLY); + } + + if (fd == -1) { + perror("open"); + return -1; + } + + return fd; +} + inline void file_close_handle(FileHandle fp) { diff --git a/platform/linux/GuiUtils.h b/platform/linux/GuiUtils.h new file mode 100644 index 0000000..580a8ed --- /dev/null +++ b/platform/linux/GuiUtils.h @@ -0,0 +1,61 @@ +/** + * Jingga + * + * @copyright Jingga + * @license OMS License 2.0 + * @version 1.0.0 + * @link https://jingga.app + */ +#ifndef COMS_PLATFORM_LINUX_GUI_UTILS_H +#define COMS_PLATFORM_LINUX_GUI_UTILS_H + +#include "../../stdlib/Types.h" +#include +#include +#include +#include +#include + +void clipboard_get(char* text, int32 max_length) +{ + *text = '\0'; + + Display *display = XOpenDisplay(NULL); + if (display == NULL) { + return; + } + + Atom clipboard = XInternAtom(display, "CLIPBOARD", false); + Atom utf8_string = XInternAtom(display, "UTF8_STRING", false); + Atom xa_string = XInternAtom(display, "STRING", false); + Window window = XDefaultRootWindow(display); + + XConvertSelection(display, clipboard, utf8_string, xa_string, window, CurrentTime); + XEvent event; + XNextEvent(display, &event); + + if (event.type == SelectionNotify) { + if (event.xselection.property) { + Atom type; + int32 format; + unsigned long nitems, bytes_after; + byte* data = NULL; + + XGetWindowProperty( + display, event.xselection.requestor, + event.xselection.property, 0, (~0L), false, + AnyPropertyType, &type, &format, &nitems, + &bytes_after, &data + ); + + if (data) { + str_copy_short(text, clipboard_text, max_length); + XFree(data); + } + } + } + + XCloseDisplay(display); +} + +#endif \ No newline at end of file diff --git a/platform/linux/Library.cpp b/platform/linux/Library.cpp index 789527b..6ac6a0c 100755 --- a/platform/linux/Library.cpp +++ b/platform/linux/Library.cpp @@ -18,7 +18,7 @@ #include "../../stdlib/Types.h" #include "../../utils/StringUtils.h" #include "../../system/Library.h" -#include "UtilsLinux.h" +#include "../../system/FileUtils.cpp" inline bool library_load(Library* lib) @@ -48,8 +48,11 @@ bool library_load(Library* lib) // @question we might want RTLD_NOW? lib->handle = dlopen(dst, RTLD_LAZY); if (!lib->handle) { + const char* error = dlerror(); + LOG_1(error); + lib->is_valid = false; - return lib->is_valid; + return false; } lib->is_valid = true; @@ -59,6 +62,7 @@ bool library_load(Library* lib) lib->functions[c] = function; } else { lib->is_valid = false; + return false; } } diff --git a/platform/linux/SystemInfo.cpp b/platform/linux/SystemInfo.cpp index 60f51b6..635db6d 100755 --- a/platform/linux/SystemInfo.cpp +++ b/platform/linux/SystemInfo.cpp @@ -20,8 +20,6 @@ #include #include -#include -#include // -lX11 -lXrandr @@ -76,46 +74,54 @@ uint16 system_country_code() } void mainboard_info_get(MainboardInfo* info) { - FileBody file = {}; + FileBody file; - file.content = info->name; - file.size = sizeof(info->name); + file.content = (byte *) info->name; + file.size = sizeof(info->name) - 1; file_read("/sys/class/dmi/id/board_name", &file); - - file.content = info->serial_number; - file.size = sizeof(info->serial_number); - file_read("/sys/class/dmi/id/board_serial", &file); - info->name[sizeof(info->name) - 1] = '\0'; - info->serial_number[sizeof(info->serial_number) - 1] = '\0'; - info->name[strcspn(info->name, "\n")] = '\0'; - info->serial_number[strcspn(info->serial_number, "\n")] = '\0'; + file.content = (byte *) info->serial_number; + file.size = sizeof(info->serial_number) - 1; + file_read("/sys/class/dmi/id/board_serial", &file); + info->name[sizeof(info->serial_number) - 1] = '\0'; } int32 network_info_get(NetworkInfo* info) { - char path[256]; + char path[64]; + memset(path, 0, sizeof(path)); + struct stat st; int32 i = 0; FileBody file = {}; + memcpy(path, "/sys/class/net/eth", sizeof("/sys/class/net/eth")); for (i = 0; i < 4; ++i) { - sprintf_fast(path, "/sys/class/net/eth%d", i); + int_to_str(i, path + sizeof("/sys/class/net/eth")); - if (stat(path, &st) == 0) { - // Read MAC address - sprintf_fast(path, "/sys/class/net/eth%d/address", i); - file.content = info[i].mac; - file.size = sizeof(info[i].mac); - file_read(path, &file); - - // Read interface name - sprintf_fast(path, "/sys/class/net/eth%d/ifindex", i); - file.content = info[i].slot; - file.size = sizeof(info[i].slot); - file_read(path, &file); + if (stat(path, &st) != 0) { + break; } + + char path2[64]; + memcpy(path2, path, sizeof("/sys/class/net/eth")); + + // Read MAC address + path2[sizeof("/sys/class/net/eth") + 1] = '\0'; + str_concat_append(path2, "/address"); + + file.content = info[i].mac; + file.size = sizeof(info[i].mac) - 1; + file_read(path2, &file); + + // Read interface name + path2[sizeof("/sys/class/net/eth") + 1] = '\0'; + str_concat_append(path2, "/ifindex"); + + file.content = (byte *) info[i].slot; + file.size = sizeof(info[i].slot) - 1; + file_read(path, &file); } return i; @@ -129,15 +135,15 @@ void cpu_info_get(CpuInfo* info) { char* internal_pos = NULL; while (file_read_line(fp, line, sizeof(line), internal_buffer, &internal_buffer_size, &internal_pos)) { - if (str_compare(line, "vendor_id", 9) == 0) { + if (str_compare(line, "vendor_id", sizeof("vendor_id") - 1) == 0) { sscanf(line, "vendor_id : %s", info->vendor); - } else if (str_compare(line, "model", 5) == 0) { - sscanf(line, "model : %d", &info->model); - } else if (str_compare(line, "cpu MHz", 7) == 0) { + } else if (str_compare(line, "model", sizeof("model") - 1) == 0) { + sscanf(line, "model : %hhd", &info->model); + } else if (str_compare(line, "cpu MHz", sizeof("cpu MHz") - 1) == 0) { sscanf(line, "cpu MHz : %d", &info->mhz); - } else if (str_compare(line, "cpu cores", 10) == 0) { - sscanf(line, "cpu cores : %d", &info->thread_coun); - } else if (str_compare(line, "model name", 10) == 0) { + } else if (str_compare(line, "cpu cores", sizeof("cpu cores") - 1) == 0) { + sscanf(line, "cpu cores : %hd", &info->core_count); + } else if (str_compare(line, "model name", sizeof("model name") - 1) == 0) { sscanf(line, "model name : %63[^\n]", info->brand); } } @@ -232,13 +238,13 @@ uint32 gpu_info_get(GpuInfo* info) { ++count; } - fclose(fp); + pclose(fp); return count; } uint32 display_info_get(DisplayInfo* info) { - FILE* fp = popen("xrandr --current", "r"); + FILE* fp = popen("xrandr --current 2>/dev/null", "r"); if (fp == NULL) { return 0; } @@ -262,49 +268,9 @@ uint32 display_info_get(DisplayInfo* info) { } } - fclose(fp); + pclose(fp); return count; } -bool is_dedicated_gpu_connected() { - Display* display = XOpenDisplay(NULL); - if (!display) { - return 0; - } - - Window root = DefaultRootWindow(display); - XRRScreenResources* screenResources = XRRGetScreenResources(display, root); - if (!screenResources) { - XCloseDisplay(display); - return 0; - } - - for (int i = 0; i < screenResources->noutput; i++) { - XRROutputInfo* outputInfo = XRRGetOutputInfo(display, screenResources, screenResources->outputs[i]); - if (outputInfo && outputInfo->connection == RR_Connected) { - XRRProviderInfo* providerInfo = XRRGetProviderInfo(display, screenResources, outputInfo->provider); - if (providerInfo && providerInfo->name) { - if (strstr(providerInfo->name, "NVIDIA") - || strstr(providerInfo->name, "AMD") - || strstr(providerInfo->name, "Intel") - ) { - XRRFreeOutputInfo(outputInfo); - XRRFreeProviderInfo(providerInfo); - XRRFreeScreenResources(screenResources); - XCloseDisplay(display); - return true; - } - } - XRRFreeProviderInfo(providerInfo); - } - XRRFreeOutputInfo(outputInfo); - } - - XRRFreeScreenResources(screenResources); - XCloseDisplay(display); - - return false; -} - #endif \ No newline at end of file diff --git a/platform/linux/TimeUtils.h b/platform/linux/TimeUtils.h index 0ea6c0a..80de3ce 100755 --- a/platform/linux/TimeUtils.h +++ b/platform/linux/TimeUtils.h @@ -20,6 +20,15 @@ uint64 system_time() { return (uint64_t) ts.tv_sec * 1000000ULL + (uint64_t) ts.tv_nsec / 1000ULL; } +// Used as initializer for 64bit random number generators instead of time() +inline +uint64 time_index() { + struct timespec ts; + clock_gettime(CLOCK_MONOTONIC, &ts); + + return (uint64) ts.tv_sec * 1000000ULL + (uint64) (ts.tv_nsec / 1000); +} + uint64 time_mu() { struct timespec ts; clock_gettime(CLOCK_MONOTONIC, &ts); diff --git a/platform/linux/UtilsLinux.h b/platform/linux/UtilsLinux.h index d5189de..e69de29 100755 --- a/platform/linux/UtilsLinux.h +++ b/platform/linux/UtilsLinux.h @@ -1,61 +0,0 @@ -/** - * Jingga - * - * @copyright Jingga - * @license OMS License 2.0 - * @version 1.0.0 - * @link https://jingga.app - */ -#ifndef COMS_UTILS_LINUX_H -#define COMS_UTILS_LINUX_H - -#include "../../stdlib/Types.h" -#include -#include -#include -#include -#include - -void clipboard_get(char* text, int32 max_length) -{ - *text = '\0'; - - Display *display = XOpenDisplay(NULL); - if (display == NULL) { - return; - } - - Atom clipboard = XInternAtom(display, "CLIPBOARD", false); - Atom utf8_string = XInternAtom(display, "UTF8_STRING", false); - Atom xa_string = XInternAtom(display, "STRING", false); - Window window = XDefaultRootWindow(display); - - XConvertSelection(display, clipboard, utf8_string, xa_string, window, CurrentTime); - XEvent event; - XNextEvent(display, &event); - - if (event.type == SelectionNotify) { - if (event.xselection.property) { - Atom type; - int32 format; - unsigned long nitems, bytes_after; - byte* data = NULL; - - XGetWindowProperty( - display, event.xselection.requestor, - event.xselection.property, 0, (~0L), false, - AnyPropertyType, &type, &format, &nitems, - &bytes_after, &data - ); - - if (data) { - str_copy_short(text, clipboard_text, max_length); - XFree(data); - } - } - } - - XCloseDisplay(display); -} - -#endif \ No newline at end of file diff --git a/platform/linux/network/Server.h b/platform/linux/network/Server.h index f0e9c8f..891ebcd 100755 --- a/platform/linux/network/Server.h +++ b/platform/linux/network/Server.h @@ -25,21 +25,27 @@ #include "../../../network/SocketConnection.h" #include "../../../utils/EndianUtils.h" +void socket_non_blocking(SocketConnection* con) +{ + int flags = fcntl(con->sd, F_GETFL, 0); + fcntl(con->sd, F_SETFL, flags | O_NONBLOCK); +} + // WARNING: requires `sudo setcap cap_net_raw=eip /path/to/your_program` -void socket_server_raw_create(SocketConnection* con) { +bool socket_server_raw_create(SocketConnection* con) { con->sd = socket(AF_INET6, SOCK_RAW, 255); int32 flags; if ((flags = fcntl(con->sd, F_GETFL, 0)) < 0) { close(con->sd); con->sd = 0; - return; + return false; } if (fcntl(con->sd, F_SETFL, flags | O_NONBLOCK) < 0) { close(con->sd); con->sd = 0; - return; + return false; } memset(&con->addr, 0, sizeof(con->addr)); @@ -50,25 +56,27 @@ void socket_server_raw_create(SocketConnection* con) { if (bind(con->sd, (sockaddr *) &con->addr, sizeof(con->addr)) < 0) { close(con->sd); con->sd = 0; - return; + return false; } + + return true; } // WARNING: requires `sudo setcap cap_net_raw=eip /path/to/your_program` -void socket_server_udp_raw_create(SocketConnection* con) { +bool socket_server_udp_raw_create(SocketConnection* con) { con->sd = socket(AF_INET6, SOCK_RAW, IPPROTO_UDP); int32 flags; if ((flags = fcntl(con->sd, F_GETFL, 0)) < 0) { close(con->sd); con->sd = 0; - return; + return false; } if (fcntl(con->sd, F_SETFL, flags | O_NONBLOCK) < 0) { close(con->sd); con->sd = 0; - return; + return false; } memset(&con->addr, 0, sizeof(con->addr)); @@ -79,24 +87,26 @@ void socket_server_udp_raw_create(SocketConnection* con) { if (bind(con->sd, (sockaddr *) &con->addr, sizeof(con->addr)) < 0) { close(con->sd); con->sd = 0; - return; + return false; } + + return true; } -void socket_server_udp_create(SocketConnection* con) { +bool socket_server_udp_create(SocketConnection* con) { con->sd = socket(AF_INET6, SOCK_DGRAM, IPPROTO_UDP); int32 flags; if ((flags = fcntl(con->sd, F_GETFL, 0)) < 0) { close(con->sd); con->sd = 0; - return; + return false; } if (fcntl(con->sd, F_SETFL, flags | O_NONBLOCK) < 0) { close(con->sd); con->sd = 0; - return; + return false; } memset(&con->addr, 0, sizeof(con->addr)); @@ -107,33 +117,72 @@ void socket_server_udp_create(SocketConnection* con) { if (bind(con->sd, (sockaddr *) &con->addr, sizeof(con->addr)) < 0) { close(con->sd); con->sd = 0; - return; + return false; } + + return true; } bool socket_server_http_create(SocketConnection* con) { - // Create socket con->sd = socket(AF_INET6, SOCK_STREAM, 0); if (con->sd < 0) { con->sd = 0; return false; } - // Bind socket + int32 opt = 1; + setsockopt(con->sd, SOL_SOCKET, SO_REUSEADDR, (const char*) &opt, sizeof(opt)); + memset(&con->addr, 0, sizeof(con->addr)); con->addr.sin6_family = AF_INET6; con->addr.sin6_addr = in6addr_any; con->addr.sin6_port = htons(con->port); if (bind(con->sd, (struct sockaddr *) &con->addr, sizeof(con->addr)) < 0) { + close(con->sd); + return false; } - // Listen for incoming connections if (listen(con->sd, 5) < 0) { close(con->sd); con->sd = 0; + + return false; + } + + return true; +} + +bool socket_server_websocket_create(SocketConnection* con) { + con->sd = socket(AF_INET6, SOCK_STREAM, IPPROTO_TCP); + + int32 flags; + if ((flags = fcntl(con->sd, F_GETFL, 0)) < 0 || + fcntl(con->sd, F_SETFL, flags | O_NONBLOCK) < 0) { + close(con->sd); + con->sd = 0; + return false; + } + + int opt = 1; + setsockopt(con->sd, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt)); + + memset(&con->addr, 0, sizeof(con->addr)); + con->addr.sin6_family = AF_INET6; + con->addr.sin6_addr = in6addr_any; + con->addr.sin6_port = htons(con->port); + + if (bind(con->sd, (sockaddr*)&con->addr, sizeof(con->addr)) < 0) { + close(con->sd); + con->sd = 0; + return false; + } + + if (listen(con->sd, SOMAXCONN) < 0) { + close(con->sd); + con->sd = 0; return false; } diff --git a/platform/linux/threading/Semaphore.h b/platform/linux/threading/Semaphore.h index c92ade9..329153b 100755 --- a/platform/linux/threading/Semaphore.h +++ b/platform/linux/threading/Semaphore.h @@ -10,5 +10,23 @@ #define COMS_PLATFORM_LINUX_THREADING_SEMAPHORE_H #include +#include +#include "../../../stdlib/Types.h" +#include "../../../compiler/CompilerUtils.h" + +typedef sem_t sem; + +#define coms_sem_init(semaphore, value) sem_init((semaphore), 0, (value)) +#define coms_sem_destroy(semaphore) sem_destroy((semaphore)) +#define coms_sem_wait(semaphore) sem_wait((semaphore)) +#define coms_sem_post(semaphore) sem_post((semaphore)) + +int32 semimedwait(sem* semaphore, int32 wait) { + timespec ts; + clock_gettime(CLOCK_REALTIME, &ts); + ts.tv_sec += wait; + + return sem_timedwait(semaphore, &ts); +} #endif \ No newline at end of file diff --git a/platform/linux/threading/Spinlock.cpp b/platform/linux/threading/Spinlock.cpp index f961659..07c9449 100755 --- a/platform/linux/threading/Spinlock.cpp +++ b/platform/linux/threading/Spinlock.cpp @@ -20,7 +20,7 @@ void spinlock_start(spinlock32* lock, int32 delay = 10) { } } -inline +FORCE_INLINE void spinlock_end(spinlock32* lock) { __atomic_store_n(lock, 0, __ATOMIC_RELEASE); } diff --git a/platform/linux/threading/Thread.h b/platform/linux/threading/Thread.h index 897a5cb..aa3cfb4 100755 --- a/platform/linux/threading/Thread.h +++ b/platform/linux/threading/Thread.h @@ -18,24 +18,29 @@ #include #include "../../../stdlib/Types.h" +#include "../../../compiler/CompilerUtils.h" #include "../Allocator.h" #include "ThreadDefines.h" +#include "Atomic.h" -inline int32 futex_wait(int32 *futex, int32 val) { +FORCE_INLINE +int32 futex_wait(volatile int32* futex, int32 val) { return syscall(SYS_futex, futex, FUTEX_WAIT, val, NULL, NULL, 0); } -inline int32 futex_wake(int32 *futex, int32 n) { +FORCE_INLINE +int32 futex_wake(volatile int32* futex, int32 n) { return syscall(SYS_futex, futex, FUTEX_WAKE, n, NULL, NULL, 0); } -inline int32 coms_pthread_create(coms_pthread_t* thread, void*, ThreadJobFunc start_routine, void* arg) { +inline +int32 coms_pthread_create(coms_pthread_t* thread, void*, ThreadJobFunc start_routine, void* arg) { if (thread == NULL || start_routine == NULL) { return 1; } int32 flags = CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND | CLONE_THREAD | CLONE_SYSVSEM; - *thread = clone((int32 (*)(void*))start_routine, (void*)((char*)malloc(4096) + 4096), flags, arg); + *thread = clone((int32 (*)(void*))start_routine, NULL, flags, arg); if (*thread == -1) { return 1; } @@ -43,186 +48,220 @@ inline int32 coms_pthread_create(coms_pthread_t* thread, void*, ThreadJobFunc st return 0; } -inline int32 coms_pthread_join(coms_pthread_t thread, void** retval) { - if (syscall(SYS_waitid, P_PID, thread, retval, WEXITED, NULL) == -1) { - return 1; - } - return 0; +FORCE_INLINE +int32 coms_pthread_join(coms_pthread_t thread, void** retval) { + return syscall(SYS_waitid, P_PID, thread, retval, WEXITED, NULL) == -1 + ? 1 + : 0; } -inline int32 coms_pthread_detach(coms_pthread_t) { +FORCE_INLINE +int32 coms_pthread_detach(coms_pthread_t) { // In Linux, threads are automatically detached when they exit. return 0; } -inline int32 coms_pthread_mutex_init(coms_pthread_mutex_t* mutex, coms_pthread_mutexattr_t*) { - if (mutex == NULL) { - return 1; - } - mutex->futex = 0; - return 0; +FORCE_INLINE +int32 mutex_init(mutex* mutex, mutexattr_t*) { + return mutex == NULL ? 1 : 0; } -inline int32 coms_pthread_mutex_destroy(coms_pthread_mutex_t* mutex) { - if (mutex == NULL) { - return 1; - } - return 0; +FORCE_INLINE +int32 mutex_destroy(mutex* mutex) { + return mutex == NULL ? 1 : 0; } -inline int32 coms_pthread_mutex_lock(coms_pthread_mutex_t* mutex) { +inline +int32 mutex_lock(mutex* mutex) { if (mutex == NULL) { return 1; } - while (__atomic_exchange_n(&mutex->futex, 1, __ATOMIC_ACQUIRE) != 0) { + + while (atomic_fetch_set_acquire(&mutex->futex, 1) != 0) { futex_wait(&mutex->futex, 1); } + return 0; } -inline int32 coms_pthread_mutex_unlock(coms_pthread_mutex_t* mutex) { +inline +int32 mutex_unlock(mutex* mutex) { if (mutex == NULL) { return 1; } - __atomic_store_n(&mutex->futex, 0, __ATOMIC_RELEASE); + + atomic_set_release(&mutex->futex, 0); futex_wake(&mutex->futex, 1); + return 0; } -inline int32 coms_pthread_cond_init(coms_pthread_cond_t* cond, coms_pthread_condattr_t*) { +inline +int32 coms_pthread_cond_init(mutex_cond* cond, coms_pthread_condattr_t*) { if (cond == NULL) { return 1; } + cond->futex = 0; + return 0; } -inline int32 coms_pthread_cond_destroy(coms_pthread_cond_t* cond) { - if (cond == NULL) { - return 1; - } - return 0; +FORCE_INLINE +int32 coms_pthread_cond_destroy(mutex_cond* cond) { + return cond == NULL ? 1 : 0; } -inline int32 coms_pthread_cond_timedwait(coms_pthread_cond_t* cond, coms_pthread_mutex_t* mutex, const struct timespec*) { +inline +int32 mutex_condimedwait(mutex_cond* cond, mutex* mutex, const struct timespec*) { if (cond == NULL || mutex == NULL) { return 1; } - int32 oldval = __atomic_load_n(&cond->futex, __ATOMIC_ACQUIRE); - coms_pthread_mutex_unlock(mutex); + + int32 oldval = atomic_get_acquire(&cond->futex); + mutex_unlock(mutex); futex_wait(&cond->futex, oldval); - coms_pthread_mutex_lock(mutex); + mutex_lock(mutex); + return 0; } -inline int32 coms_pthread_cond_wait(coms_pthread_cond_t* cond, coms_pthread_mutex_t* mutex) { - return coms_pthread_cond_timedwait(cond, mutex, NULL); +inline +int32 coms_pthread_cond_wait(mutex_cond* cond, mutex* mutex) { + return mutex_condimedwait(cond, mutex, NULL); } -inline int32 coms_pthread_cond_signal(coms_pthread_cond_t* cond) { +inline +int32 coms_pthread_cond_signal(mutex_cond* cond) { if (cond == NULL) { return 1; } - __atomic_add_fetch(&cond->futex, 1, __ATOMIC_RELEASE); + + atomic_increment_release(&cond->futex); futex_wake(&cond->futex, 1); + return 0; } -inline int32 coms_pthread_cond_broadcast(coms_pthread_cond_t* cond) { +inline +int32 coms_pthread_cond_broadcast(mutex_cond* cond) { if (cond == NULL) { return 1; } - __atomic_add_fetch(&cond->futex, 1, __ATOMIC_RELEASE); + + atomic_increment_release(&cond->futex); futex_wake(&cond->futex, INT32_MAX); + return 0; } -inline int32 coms_pthread_rwlock_init(coms_pthread_rwlock_t* rwlock, const coms_pthread_rwlockattr_t*) { +inline +int32 coms_pthread_rwlock_init(coms_pthread_rwlock_t* rwlock, const coms_pthread_rwlockattr_t*) { if (rwlock == NULL) { return 1; } + rwlock->futex = 0; rwlock->exclusive = false; + return 0; } -inline int32 coms_pthread_rwlock_destroy(coms_pthread_rwlock_t* rwlock) { +inline +int32 coms_pthread_rwlock_destroy(coms_pthread_rwlock_t* rwlock) { if (rwlock == NULL) { return 1; } + return 0; } -inline int32 coms_pthread_rwlock_rdlock(coms_pthread_rwlock_t* rwlock) { +inline +int32 coms_pthread_rwlock_rdlock(coms_pthread_rwlock_t* rwlock) { if (rwlock == NULL) { return 1; } - while (1) { - int32 val = __atomic_load_n(&rwlock->futex, __ATOMIC_ACQUIRE); + + while (true) { + int32 val = atomic_get_acquire(&rwlock->futex); if (val >= 0 && __atomic_compare_exchange_n(&rwlock->futex, &val, val + 1, false, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE)) { break; } futex_wait(&rwlock->futex, val); } + return 0; } -inline int32 coms_pthread_rwlock_tryrdlock(coms_pthread_rwlock_t* rwlock) { +inline +int32 coms_pthread_rwlock_tryrdlock(coms_pthread_rwlock_t* rwlock) { if (rwlock == NULL) { return 1; } - int32 val = __atomic_load_n(&rwlock->futex, __ATOMIC_ACQUIRE); + + int32 val = atomic_get_acquire(&rwlock->futex); if (val >= 0 && __atomic_compare_exchange_n(&rwlock->futex, &val, val + 1, false, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE)) { return 0; } + return 1; } -inline int32 coms_pthread_rwlock_wrlock(coms_pthread_rwlock_t* rwlock) { +inline +int32 coms_pthread_rwlock_wrlock(coms_pthread_rwlock_t* rwlock) { if (rwlock == NULL) { return 1; } - while (1) { - int32 val = __atomic_load_n(&rwlock->futex, __ATOMIC_ACQUIRE); + + while (true) { + int32 val = atomic_get_acquire(&rwlock->futex); if (val == 0 && __atomic_compare_exchange_n(&rwlock->futex, &val, -1, false, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE)) { rwlock->exclusive = true; break; } futex_wait(&rwlock->futex, val); } + return 0; } -inline int32 coms_pthread_rwlock_trywrlock(coms_pthread_rwlock_t* rwlock) { +inline +int32 coms_pthread_rwlock_trywrlock(coms_pthread_rwlock_t* rwlock) { if (rwlock == NULL) { return 1; } - int32 val = __atomic_load_n(&rwlock->futex, __ATOMIC_ACQUIRE); + + int32 val = atomic_get_acquire(&rwlock->futex); if (val == 0 && __atomic_compare_exchange_n(&rwlock->futex, &val, -1, false, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE)) { rwlock->exclusive = true; return 0; } + return 1; } -inline int32 coms_pthread_rwlock_unlock(coms_pthread_rwlock_t* rwlock) { +inline +int32 coms_pthread_rwlock_unlock(coms_pthread_rwlock_t* rwlock) { if (rwlock == NULL) { return 1; } + if (rwlock->exclusive) { rwlock->exclusive = false; - __atomic_store_n(&rwlock->futex, 0, __ATOMIC_RELEASE); + atomic_set_release(&rwlock->futex, 0); futex_wake(&rwlock->futex, 1); } else { - int32 val = __atomic_sub_fetch(&rwlock->futex, 1, __ATOMIC_RELEASE); + int32 val = atomic_decrement_release(&rwlock->futex); if (val == 0) { futex_wake(&rwlock->futex, 1); } } + return 0; } -inline uint32 pcthread_get_num_procs() { +FORCE_INLINE +uint32 pcthread_get_num_procs() { return sysconf(_SC_NPROCESSORS_ONLN); } diff --git a/platform/linux/threading/ThreadDefines.h b/platform/linux/threading/ThreadDefines.h index cb982a1..0edae2e 100755 --- a/platform/linux/threading/ThreadDefines.h +++ b/platform/linux/threading/ThreadDefines.h @@ -17,22 +17,22 @@ #define THREAD_RETURN int32 typedef THREAD_RETURN (*ThreadJobFunc)(void*); -typedef struct { - int32 futex; -} coms_pthread_mutex_t; +struct mutex { + alignas(4) atomic_32 int32 futex; +}; -typedef void coms_pthread_mutexattr_t; +typedef void mutexattr_t; typedef void coms_pthread_condattr_t; typedef void coms_pthread_rwlockattr_t; -typedef struct { - int32 futex; -} coms_pthread_cond_t; +struct mutex_cond { + alignas(4) atomic_32 int32 futex; +} ; -typedef struct { - int32 futex; +struct coms_pthread_rwlock_t { + alignas(4) atomic_32 int32 futex; bool exclusive; -} coms_pthread_rwlock_t; +}; typedef int coms_pthread_t; diff --git a/platform/win32/GuiUtils.h b/platform/win32/GuiUtils.h new file mode 100644 index 0000000..a29b26e --- /dev/null +++ b/platform/win32/GuiUtils.h @@ -0,0 +1,170 @@ +/** + * Jingga + * + * @copyright Jingga + * @license OMS License 2.0 + * @version 1.0.0 + * @link https://jingga.app + */ +#ifndef COMS_PLATFORM_WIN32_GUI_UTILS_H +#define COMS_PLATFORM_WIN32_GUI_UTILS_H + +#include +#include "Window.h" +#include "../../stdlib/Types.h" +#include "../../utils/TestUtils.h" + +// @question Shouldn't this function and the next one accept a parameter of what to add/remove? +inline +void window_remove_style(Window* w) +{ + LONG_PTR style = GetWindowLongPtrA(w->hwnd, GWL_STYLE); + style &= ~WS_OVERLAPPEDWINDOW; + SetWindowLongPtr(w->hwnd, GWL_STYLE, style); +} + +inline +void window_add_style(Window* w) +{ + LONG_PTR style = GetWindowLongPtrA(w->hwnd, GWL_STYLE); + style |= WS_OVERLAPPEDWINDOW; + SetWindowLongPtr(w->hwnd, GWL_STYLE, style); +} + +inline +void monitor_resolution(const Window* __restrict w, v2_int32* __restrict resolution) +{ + resolution->width = GetDeviceCaps(w->hdc, HORZRES); + resolution->height = GetDeviceCaps(w->hdc, VERTRES); +} + +inline +void monitor_resolution(Window* w) +{ + w->width = (uint16) GetDeviceCaps(w->hdc, HORZRES); + w->height = (uint16) GetDeviceCaps(w->hdc, VERTRES); +} + +inline +void window_resolution(Window* w) +{ + RECT rect; + GetClientRect(w->hwnd, &rect); + + w->width = (uint16) (rect.right - rect.left); + w->height = (uint16) (rect.bottom - rect.top); +} + +inline +void window_fullscreen(Window* w) +{ + monitor_resolution(w); + w->x = 0; + w->y = 0; + + window_remove_style(w); + SetWindowPos(w->hwnd, HWND_TOP, 0, 0, w->width, w->height, SWP_NOACTIVATE | SWP_NOZORDER); +} + +inline +void window_restore(Window* w) +{ + window_restore_state(w); + + SetWindowLongPtr(w->hwnd, GWL_STYLE, w->state_old.style); + SetWindowPos( + w->hwnd, HWND_TOP, + w->state_old.x, w->state_old.y, + w->state_old.width, w->state_old.height, + SWP_NOACTIVATE | SWP_NOZORDER + ); +} + +void window_create(Window* __restrict window, void* proc) +{ + ASSERT_SIMPLE(proc); + + WNDPROC wndproc = (WNDPROC) proc; + WNDCLASSEXA wc = {}; + + if (!window->hInstance) { + window->hInstance = GetModuleHandle(0); + } + + wc.cbSize = sizeof(WNDCLASSEXA); + wc.style = CS_OWNDC; + wc.lpfnWndProc = wndproc; + wc.hInstance = window->hInstance; + wc.hCursor = LoadCursor(NULL, IDC_ARROW); + wc.lpszClassName = (LPCSTR) window->name; + + if (!RegisterClassExA(&wc)) { + return; + } + + if (window->is_fullscreen) { + window->width = (uint16) GetSystemMetrics(SM_CXSCREEN); + window->height = (uint16) GetSystemMetrics(SM_CYSCREEN); + + DEVMODE screen_settings; + + memset(&screen_settings, 0, sizeof(screen_settings)); + screen_settings.dmSize = sizeof(screen_settings); + screen_settings.dmPelsWidth = (unsigned long) window->width; + screen_settings.dmPelsHeight = (unsigned long) window->height; + screen_settings.dmBitsPerPel = 32; + screen_settings.dmFields = DM_BITSPERPEL | DM_PELSWIDTH | DM_PELSHEIGHT; + + ChangeDisplaySettings(&screen_settings, CDS_FULLSCREEN); + + window->x = 0; + window->y = 0; + } + + window->hwnd = CreateWindowExA((DWORD) NULL, + wc.lpszClassName, NULL, + WS_OVERLAPPEDWINDOW, + window->x, window->y, + window->width, + window->height, + NULL, NULL, window->hInstance, window + ); + + ASSERT_SIMPLE(window->hwnd); +} + +void window_open(Window* window) +{ + ShowWindow(window->hwnd, SW_SHOW); + SetForegroundWindow(window->hwnd); + SetFocus(window->hwnd); + UpdateWindow(window->hwnd); + + window->state_changes |= WINDOW_STATE_CHANGE_FOCUS; +} + +void window_close(Window* window) +{ + CloseWindow(window->hwnd); + DestroyWindow(window->hwnd); +} + +HBITMAP CreateBitmapFromRGBA(HDC hdc, const byte* rgba, int32 width, int32 height) { + BITMAPINFO bmi = {}; + bmi.bmiHeader.biSize = sizeof(BITMAPINFOHEADER); + bmi.bmiHeader.biWidth = width; + bmi.bmiHeader.biHeight = height; + bmi.bmiHeader.biPlanes = 1; + bmi.bmiHeader.biBitCount = 32; + bmi.bmiHeader.biCompression = BI_RGB; + + void* pbits; + HBITMAP hbitmap = CreateDIBSection(hdc, &bmi, DIB_RGB_COLORS, &pbits, NULL, 0); + if (hbitmap) { + memcpy(pbits, rgba, width * height * 4); + } + + return hbitmap; +} + +#endif \ No newline at end of file diff --git a/platform/win32/Library.cpp b/platform/win32/Library.cpp index 436fc67..a3cd039 100755 --- a/platform/win32/Library.cpp +++ b/platform/win32/Library.cpp @@ -14,9 +14,9 @@ #include #include "../../stdlib/Types.h" -#include "FileUtils.cpp" #include "../../utils/StringUtils.h" #include "../../system/Library.h" +#include "../../system/FileUtils.cpp" inline bool library_load(Library* lib) diff --git a/platform/win32/SystemInfo.cpp b/platform/win32/SystemInfo.cpp index 4a3b1d1..fdfce4a 100755 --- a/platform/win32/SystemInfo.cpp +++ b/platform/win32/SystemInfo.cpp @@ -300,7 +300,7 @@ void cpu_info_get(CpuInfo* info) { SYSTEM_INFO sys_info; GetSystemInfo(&sys_info); - info->thread_count = (byte) sys_info.dwNumberOfProcessors; + info->core_count = (uint16) sys_info.dwNumberOfProcessors; info->page_size = (uint16) sys_info.dwPageSize; int32 cpuInfo[4] = { 0 }; diff --git a/platform/win32/TimeUtils.h b/platform/win32/TimeUtils.h index d1db47d..9fbac63 100755 --- a/platform/win32/TimeUtils.h +++ b/platform/win32/TimeUtils.h @@ -44,6 +44,15 @@ uint64 system_time() return ((uint64) (largeInt.QuadPart / 10000000ULL)) - ((uint64) 11644473600ULL); } +// Used as initializer for 64bit random number generators instead of time() +inline +uint64 time_index() { + LARGE_INTEGER counter; + QueryPerformanceCounter(&counter); + + return counter.QuadPart; +} + // doesn't return clock time, only to return time since program start // -> can be used for profiling inline diff --git a/platform/win32/UtilsWindows.h b/platform/win32/UtilsWindows.h index 6dabad8..e69de29 100755 --- a/platform/win32/UtilsWindows.h +++ b/platform/win32/UtilsWindows.h @@ -1,170 +0,0 @@ -/** - * Jingga - * - * @copyright Jingga - * @license OMS License 2.0 - * @version 1.0.0 - * @link https://jingga.app - */ -#ifndef COMS_PLATFORM_WIN32_UTILS_WINDOWS_H -#define COMS_PLATFORM_WIN32_UTILS_WINDOWS_H - -#include -#include "Window.h" -#include "../../stdlib/Types.h" -#include "../../utils/TestUtils.h" - -// @question Shouldn't this function and the next one accept a parameter of what to add/remove? -inline -void window_remove_style(Window* w) -{ - LONG_PTR style = GetWindowLongPtrA(w->hwnd, GWL_STYLE); - style &= ~WS_OVERLAPPEDWINDOW; - SetWindowLongPtr(w->hwnd, GWL_STYLE, style); -} - -inline -void window_add_style(Window* w) -{ - LONG_PTR style = GetWindowLongPtrA(w->hwnd, GWL_STYLE); - style |= WS_OVERLAPPEDWINDOW; - SetWindowLongPtr(w->hwnd, GWL_STYLE, style); -} - -inline -void monitor_resolution(const Window* __restrict w, v2_int32* __restrict resolution) -{ - resolution->width = GetDeviceCaps(w->hdc, HORZRES); - resolution->height = GetDeviceCaps(w->hdc, VERTRES); -} - -inline -void monitor_resolution(Window* w) -{ - w->width = (uint16) GetDeviceCaps(w->hdc, HORZRES); - w->height = (uint16) GetDeviceCaps(w->hdc, VERTRES); -} - -inline -void window_resolution(Window* w) -{ - RECT rect; - GetClientRect(w->hwnd, &rect); - - w->width = (uint16) (rect.right - rect.left); - w->height = (uint16) (rect.bottom - rect.top); -} - -inline -void window_fullscreen(Window* w) -{ - monitor_resolution(w); - w->x = 0; - w->y = 0; - - window_remove_style(w); - SetWindowPos(w->hwnd, HWND_TOP, 0, 0, w->width, w->height, SWP_NOACTIVATE | SWP_NOZORDER); -} - -inline -void window_restore(Window* w) -{ - window_restore_state(w); - - SetWindowLongPtr(w->hwnd, GWL_STYLE, w->state_old.style); - SetWindowPos( - w->hwnd, HWND_TOP, - w->state_old.x, w->state_old.y, - w->state_old.width, w->state_old.height, - SWP_NOACTIVATE | SWP_NOZORDER - ); -} - -void window_create(Window* __restrict window, void* proc) -{ - ASSERT_SIMPLE(proc); - - WNDPROC wndproc = (WNDPROC) proc; - WNDCLASSEXA wc = {}; - - if (!window->hInstance) { - window->hInstance = GetModuleHandle(0); - } - - wc.cbSize = sizeof(WNDCLASSEXA); - wc.style = CS_OWNDC; - wc.lpfnWndProc = wndproc; - wc.hInstance = window->hInstance; - wc.hCursor = LoadCursor(NULL, IDC_ARROW); - wc.lpszClassName = (LPCSTR) window->name; - - if (!RegisterClassExA(&wc)) { - return; - } - - if (window->is_fullscreen) { - window->width = (uint16) GetSystemMetrics(SM_CXSCREEN); - window->height = (uint16) GetSystemMetrics(SM_CYSCREEN); - - DEVMODE screen_settings; - - memset(&screen_settings, 0, sizeof(screen_settings)); - screen_settings.dmSize = sizeof(screen_settings); - screen_settings.dmPelsWidth = (unsigned long) window->width; - screen_settings.dmPelsHeight = (unsigned long) window->height; - screen_settings.dmBitsPerPel = 32; - screen_settings.dmFields = DM_BITSPERPEL | DM_PELSWIDTH | DM_PELSHEIGHT; - - ChangeDisplaySettings(&screen_settings, CDS_FULLSCREEN); - - window->x = 0; - window->y = 0; - } - - window->hwnd = CreateWindowExA((DWORD) NULL, - wc.lpszClassName, NULL, - WS_OVERLAPPEDWINDOW, - window->x, window->y, - window->width, - window->height, - NULL, NULL, window->hInstance, window - ); - - ASSERT_SIMPLE(window->hwnd); -} - -void window_open(Window* window) -{ - ShowWindow(window->hwnd, SW_SHOW); - SetForegroundWindow(window->hwnd); - SetFocus(window->hwnd); - UpdateWindow(window->hwnd); - - window->state_changes |= WINDOW_STATE_CHANGE_FOCUS; -} - -void window_close(Window* window) -{ - CloseWindow(window->hwnd); - DestroyWindow(window->hwnd); -} - -HBITMAP CreateBitmapFromRGBA(HDC hdc, const byte* rgba, int32 width, int32 height) { - BITMAPINFO bmi = {}; - bmi.bmiHeader.biSize = sizeof(BITMAPINFOHEADER); - bmi.bmiHeader.biWidth = width; - bmi.bmiHeader.biHeight = height; - bmi.bmiHeader.biPlanes = 1; - bmi.bmiHeader.biBitCount = 32; - bmi.bmiHeader.biCompression = BI_RGB; - - void* pbits; - HBITMAP hbitmap = CreateDIBSection(hdc, &bmi, DIB_RGB_COLORS, &pbits, NULL, 0); - if (hbitmap) { - memcpy(pbits, rgba, width * height * 4); - } - - return hbitmap; -} - -#endif \ No newline at end of file diff --git a/platform/win32/threading/Atomic.h b/platform/win32/threading/Atomic.h index 3126afa..a0ab7c0 100755 --- a/platform/win32/threading/Atomic.h +++ b/platform/win32/threading/Atomic.h @@ -59,10 +59,10 @@ FORCE_INLINE void atomic_add_relaxed(volatile int32* value, int32 increment) noe FORCE_INLINE void atomic_sub_relaxed(volatile int32* value, int32 decrement) noexcept { InterlockedAddNoFence((volatile long *) value, -decrement); } FORCE_INLINE void atomic_add_relaxed(volatile int64* value, int64 increment) noexcept { InterlockedAddNoFence64((volatile LONG64 *) value, (LONG64) increment); } FORCE_INLINE void atomic_sub_relaxed(volatile int64* value, int64 decrement) noexcept { InterlockedAddNoFence64((volatile LONG64 *) value, -((LONG64) decrement)); } -FORCE_INLINE f32 atomic_compare_exchange_weak_relaxed(volatile f32* value, f32* expected, f32 desired) noexcept { _atomic_32 temp = {.l = InterlockedCompareExchangeNoFence((volatile long *) value, (long) desired, (long) *expected) }; return temp.f; } -FORCE_INLINE f64 atomic_compare_exchange_weak_relaxed(volatile f64* value, f64* expected, f64 desired) noexcept { _atomic_64 temp = {.l = InterlockedCompareExchangeNoFence64((volatile LONG64 *) value, (LONG64) desired, (LONG64) *expected) }; return temp.f; } -FORCE_INLINE int32 atomic_compare_exchange_weak_relaxed(volatile int32* value, int32* expected, int32 desired) noexcept { return (int32) InterlockedCompareExchangeNoFence((volatile long *) value, desired, *expected); } -FORCE_INLINE int64 atomic_compare_exchange_weak_relaxed(volatile int64* value, int64* expected, int64 desired) noexcept { return (int64) InterlockedCompareExchangeNoFence64((volatile LONG64 *) value, (LONG64) desired, (LONG64) *expected); } +FORCE_INLINE f32 atomic_compare_exchange_strong_relaxed(volatile f32* value, f32* expected, f32 desired) noexcept { _atomic_32 temp = {.l = InterlockedCompareExchangeNoFence((volatile long *) value, (long) desired, (long) *expected) }; return temp.f; } +FORCE_INLINE f64 atomic_compare_exchange_strong_relaxed(volatile f64* value, f64* expected, f64 desired) noexcept { _atomic_64 temp = {.l = InterlockedCompareExchangeNoFence64((volatile LONG64 *) value, (LONG64) desired, (LONG64) *expected) }; return temp.f; } +FORCE_INLINE int32 atomic_compare_exchange_strong_relaxed(volatile int32* value, int32* expected, int32 desired) noexcept { return (int32) InterlockedCompareExchangeNoFence((volatile long *) value, desired, *expected); } +FORCE_INLINE int64 atomic_compare_exchange_strong_relaxed(volatile int64* value, int64* expected, int64 desired) noexcept { return (int64) InterlockedCompareExchangeNoFence64((volatile LONG64 *) value, (LONG64) desired, (LONG64) *expected); } FORCE_INLINE int8 atomic_fetch_add_relaxed(volatile int8* value, int8 operand) noexcept { return (int8) InterlockedExchangeAdd8((volatile char *) value, (char) operand); } FORCE_INLINE int8 atomic_fetch_sub_relaxed(volatile int8* value, int8 operand) noexcept { return (int8) InterlockedExchangeAdd8((volatile char *) value, -((char) operand)); } FORCE_INLINE int16 atomic_fetch_add_relaxed(volatile int16* value, int16 operand) noexcept { return (int16) InterlockedExchangeAdd16((volatile short *) value, (short) operand); } @@ -99,8 +99,8 @@ FORCE_INLINE void atomic_add_relaxed(volatile uint32* value, uint32 increment) n FORCE_INLINE void atomic_sub_relaxed(volatile uint32* value, uint32 decrement) noexcept { InterlockedAddNoFence((volatile long *) value, -1 * ((int32) decrement)); } FORCE_INLINE void atomic_add_relaxed(volatile uint64* value, uint64 increment) noexcept { InterlockedAddNoFence64((volatile LONG64 *) value, (LONG64) increment); } FORCE_INLINE void atomic_sub_relaxed(volatile uint64* value, uint64 decrement) noexcept { InterlockedAddNoFence64((volatile LONG64 *) value, -((LONG64) decrement)); } -FORCE_INLINE uint32 atomic_compare_exchange_weak_relaxed(volatile uint32* value, uint32* expected, uint32 desired) noexcept { return (uint32) InterlockedCompareExchangeNoFence((volatile long *) value, desired, *expected); } -FORCE_INLINE uint64 atomic_compare_exchange_weak_relaxed(volatile uint64* value, uint64* expected, uint64 desired) noexcept { return (uint64) InterlockedCompareExchangeNoFence64((volatile LONG64 *) value, (LONG64) desired, (LONG64) *expected); } +FORCE_INLINE uint32 atomic_compare_exchange_strong_relaxed(volatile uint32* value, uint32* expected, uint32 desired) noexcept { return (uint32) InterlockedCompareExchangeNoFence((volatile long *) value, desired, *expected); } +FORCE_INLINE uint64 atomic_compare_exchange_strong_relaxed(volatile uint64* value, uint64* expected, uint64 desired) noexcept { return (uint64) InterlockedCompareExchangeNoFence64((volatile LONG64 *) value, (LONG64) desired, (LONG64) *expected); } FORCE_INLINE uint8 atomic_fetch_add_relaxed(volatile uint8* value, uint8 operand) noexcept { return (uint8) InterlockedExchangeAdd8((volatile char *) value, (char) operand); } FORCE_INLINE uint8 atomic_fetch_sub_relaxed(volatile uint8* value, uint8 operand) noexcept { return (uint8) InterlockedExchangeAdd8((volatile char *) value, -((char) operand)); } FORCE_INLINE uint16 atomic_fetch_add_relaxed(volatile uint16* value, uint16 operand) noexcept { return (uint16) InterlockedExchangeAdd16((volatile short *) value, (short) operand); } @@ -153,10 +153,10 @@ FORCE_INLINE void atomic_add_acquire(volatile int32* value, int32 increment) noe FORCE_INLINE void atomic_sub_acquire(volatile int32* value, int32 decrement) noexcept { InterlockedAddAcquire((volatile long *) value, -decrement); } FORCE_INLINE void atomic_add_acquire(volatile int64* value, int64 increment) noexcept { InterlockedAddAcquire64((volatile LONG64 *) value, (LONG64) increment); } FORCE_INLINE void atomic_sub_acquire(volatile int64* value, int64 decrement) noexcept { InterlockedAddAcquire64((volatile LONG64 *) value, -((LONG64) decrement)); } -FORCE_INLINE f32 atomic_compare_exchange_weak_acquire(volatile f32* value, f32* expected, f32 desired) noexcept { _atomic_32 temp = {.l = InterlockedCompareExchangeAcquire((volatile long *) value, (long) desired, (long) *expected) }; return temp.f; } -FORCE_INLINE f64 atomic_compare_exchange_weak_acquire(volatile f64* value, f64* expected, f64 desired) noexcept { _atomic_64 temp = {.l = InterlockedCompareExchangeAcquire64((volatile LONG64 *) value, (LONG64) desired, (LONG64) *expected) }; return temp.f; } -FORCE_INLINE int32 atomic_compare_exchange_weak_acquire(volatile int32* value, int32* expected, int32 desired) noexcept { return (int32) InterlockedCompareExchangeAcquire((volatile long *) value, desired, *expected); } -FORCE_INLINE int64 atomic_compare_exchange_weak_acquire(volatile int64* value, int64* expected, int64 desired) noexcept { return (int64) InterlockedCompareExchangeAcquire64((volatile LONG64 *) value, (LONG64) desired, (LONG64) *expected); } +FORCE_INLINE f32 atomic_compare_exchange_strong_acquire(volatile f32* value, f32* expected, f32 desired) noexcept { _atomic_32 temp = {.l = InterlockedCompareExchangeAcquire((volatile long *) value, (long) desired, (long) *expected) }; return temp.f; } +FORCE_INLINE f64 atomic_compare_exchange_strong_acquire(volatile f64* value, f64* expected, f64 desired) noexcept { _atomic_64 temp = {.l = InterlockedCompareExchangeAcquire64((volatile LONG64 *) value, (LONG64) desired, (LONG64) *expected) }; return temp.f; } +FORCE_INLINE int32 atomic_compare_exchange_strong_acquire(volatile int32* value, int32* expected, int32 desired) noexcept { return (int32) InterlockedCompareExchangeAcquire((volatile long *) value, desired, *expected); } +FORCE_INLINE int64 atomic_compare_exchange_strong_acquire(volatile int64* value, int64* expected, int64 desired) noexcept { return (int64) InterlockedCompareExchangeAcquire64((volatile LONG64 *) value, (LONG64) desired, (LONG64) *expected); } FORCE_INLINE int8 atomic_fetch_add_acquire(volatile int8* value, int8 operand) noexcept { return (int8) InterlockedExchangeAdd8((volatile char *) value, (char) operand); } FORCE_INLINE int8 atomic_fetch_sub_acquire(volatile int8* value, int8 operand) noexcept { return (int8) InterlockedExchangeAdd8((volatile char *) value, -((char) operand)); } FORCE_INLINE int16 atomic_fetch_add_acquire(volatile int16* value, int16 operand) noexcept { return (int16) InterlockedExchangeAdd16((volatile short *) value, (short) operand); } @@ -193,8 +193,8 @@ FORCE_INLINE void atomic_add_acquire(volatile uint32* value, uint32 increment) n FORCE_INLINE void atomic_sub_acquire(volatile uint32* value, uint32 decrement) noexcept { InterlockedAddAcquire((volatile long *) value, -1 * ((int32) decrement)); } FORCE_INLINE void atomic_add_acquire(volatile uint64* value, uint64 increment) noexcept { InterlockedAddAcquire64((volatile LONG64 *) value, (LONG64) increment); } FORCE_INLINE void atomic_sub_acquire(volatile uint64* value, uint64 decrement) noexcept { InterlockedAddAcquire64((volatile LONG64 *) value, -((LONG64) decrement)); } -FORCE_INLINE uint32 atomic_compare_exchange_weak_acquire(volatile uint32* value, uint32* expected, uint32 desired) noexcept { return (uint32) InterlockedCompareExchangeAcquire((volatile long *) value, desired, *expected); } -FORCE_INLINE uint64 atomic_compare_exchange_weak_acquire(volatile uint64* value, uint64* expected, uint64 desired) noexcept { return (uint64) InterlockedCompareExchangeAcquire64((volatile LONG64 *) value, (LONG64) desired, (LONG64) *expected); } +FORCE_INLINE uint32 atomic_compare_exchange_strong_acquire(volatile uint32* value, uint32* expected, uint32 desired) noexcept { return (uint32) InterlockedCompareExchangeAcquire((volatile long *) value, desired, *expected); } +FORCE_INLINE uint64 atomic_compare_exchange_strong_acquire(volatile uint64* value, uint64* expected, uint64 desired) noexcept { return (uint64) InterlockedCompareExchangeAcquire64((volatile LONG64 *) value, (LONG64) desired, (LONG64) *expected); } FORCE_INLINE uint8 atomic_fetch_add_acquire(volatile uint8* value, uint8 operand) noexcept { return (uint8) InterlockedExchangeAdd8((volatile char *) value, (char) operand); } FORCE_INLINE uint8 atomic_fetch_sub_acquire(volatile uint8* value, uint8 operand) noexcept { return (uint8) InterlockedExchangeAdd8((volatile char *) value, -((char) operand)); } FORCE_INLINE uint16 atomic_fetch_add_acquire(volatile uint16* value, uint16 operand) noexcept { return (uint16) InterlockedExchangeAdd16((volatile short *) value, (short) operand); } @@ -254,10 +254,10 @@ FORCE_INLINE void atomic_add_release(volatile int32* value, int32 increment) noe FORCE_INLINE void atomic_sub_release(volatile int32* value, int32 decrement) noexcept { InterlockedAddRelease((volatile long *) value, -decrement); } FORCE_INLINE void atomic_add_release(volatile int64* value, int64 increment) noexcept { InterlockedAddRelease64((volatile LONG64 *) value, (LONG64) increment); } FORCE_INLINE void atomic_sub_release(volatile int64* value, int64 decrement) noexcept { InterlockedAddRelease64((volatile LONG64 *) value, -((LONG64) decrement)); } -FORCE_INLINE f32 atomic_compare_exchange_weak_release(volatile f32* value, f32* expected, f32 desired) noexcept { _atomic_32 temp = {.l = InterlockedCompareExchangeRelease((volatile long *) value, (long) desired, (long) *expected) }; return temp.f; } -FORCE_INLINE f64 atomic_compare_exchange_weak_release(volatile f64* value, f64* expected, f64 desired) noexcept { _atomic_64 temp = {.l = InterlockedCompareExchangeRelease64((volatile LONG64 *) value, (LONG64) desired, (LONG64) *expected) }; return temp.f; } -FORCE_INLINE int32 atomic_compare_exchange_weak_release(volatile int32* value, int32* expected, int32 desired) noexcept { return (int32) InterlockedCompareExchangeRelease((volatile long *) value, desired, *expected); } -FORCE_INLINE int64 atomic_compare_exchange_weak_release(volatile int64* value, int64* expected, int64 desired) noexcept { return (int64) InterlockedCompareExchangeRelease64((volatile LONG64 *) value, (LONG64) desired, (LONG64) *expected); } +FORCE_INLINE f32 atomic_compare_exchange_strong_release(volatile f32* value, f32* expected, f32 desired) noexcept { _atomic_32 temp = {.l = InterlockedCompareExchangeRelease((volatile long *) value, (long) desired, (long) *expected) }; return temp.f; } +FORCE_INLINE f64 atomic_compare_exchange_strong_release(volatile f64* value, f64* expected, f64 desired) noexcept { _atomic_64 temp = {.l = InterlockedCompareExchangeRelease64((volatile LONG64 *) value, (LONG64) desired, (LONG64) *expected) }; return temp.f; } +FORCE_INLINE int32 atomic_compare_exchange_strong_release(volatile int32* value, int32* expected, int32 desired) noexcept { return (int32) InterlockedCompareExchangeRelease((volatile long *) value, desired, *expected); } +FORCE_INLINE int64 atomic_compare_exchange_strong_release(volatile int64* value, int64* expected, int64 desired) noexcept { return (int64) InterlockedCompareExchangeRelease64((volatile LONG64 *) value, (LONG64) desired, (LONG64) *expected); } FORCE_INLINE int8 atomic_fetch_add_release(volatile int8* value, int8 operand) noexcept { return (int8) InterlockedExchangeAdd8((volatile char *) value, (char) operand); } FORCE_INLINE int8 atomic_fetch_sub_release(volatile int8* value, int8 operand) noexcept { return (int8) InterlockedExchangeAdd8((volatile char *) value, -((char) operand)); } FORCE_INLINE int16 atomic_fetch_add_release(volatile int16* value, int16 operand) noexcept { return (int16) InterlockedExchangeAdd16((volatile short *) value, (short) operand); } @@ -294,8 +294,8 @@ FORCE_INLINE void atomic_add_release(volatile uint32* value, uint32 increment) n FORCE_INLINE void atomic_sub_release(volatile uint32* value, uint32 decrement) noexcept { InterlockedAddRelease((volatile long *) value, -1 * ((int32) decrement)); } FORCE_INLINE void atomic_add_release(volatile uint64* value, uint64 increment) noexcept { InterlockedAddRelease64((volatile LONG64 *) value, (LONG64) increment); } FORCE_INLINE void atomic_sub_release(volatile uint64* value, uint64 decrement) noexcept { InterlockedAddRelease64((volatile LONG64 *) value, -((LONG64) decrement)); } -FORCE_INLINE uint32 atomic_compare_exchange_weak_release(volatile uint32* value, uint32* expected, uint32 desired) noexcept { return (uint32) InterlockedCompareExchangeRelease((volatile long *) value, desired, *expected); } -FORCE_INLINE uint64 atomic_compare_exchange_weak_release(volatile uint64* value, uint64* expected, uint64 desired) noexcept { return (uint64) InterlockedCompareExchangeRelease64((volatile LONG64 *) value, (LONG64) desired, (LONG64) *expected); } +FORCE_INLINE uint32 atomic_compare_exchange_strong_release(volatile uint32* value, uint32* expected, uint32 desired) noexcept { return (uint32) InterlockedCompareExchangeRelease((volatile long *) value, desired, *expected); } +FORCE_INLINE uint64 atomic_compare_exchange_strong_release(volatile uint64* value, uint64* expected, uint64 desired) noexcept { return (uint64) InterlockedCompareExchangeRelease64((volatile LONG64 *) value, (LONG64) desired, (LONG64) *expected); } FORCE_INLINE uint8 atomic_fetch_add_release(volatile uint8* value, uint8 operand) noexcept { return (uint8) InterlockedExchangeAdd8((volatile char *) value, (char) operand); } FORCE_INLINE uint8 atomic_fetch_sub_release(volatile uint8* value, uint8 operand) noexcept { return (uint8) InterlockedExchangeAdd8((volatile char *) value, -((char) operand)); } FORCE_INLINE uint16 atomic_fetch_add_release(volatile uint16* value, uint16 operand) noexcept { return (uint16) InterlockedExchangeAdd16((volatile short *) value, (short) operand); } @@ -355,10 +355,10 @@ FORCE_INLINE void atomic_add_acquire_release(volatile int32* value, int32 increm FORCE_INLINE void atomic_sub_acquire_release(volatile int32* value, int32 decrement) noexcept { InterlockedAdd((volatile long *) value, -decrement); } FORCE_INLINE void atomic_add_acquire_release(volatile int64* value, int64 increment) noexcept { InterlockedAdd64((volatile LONG64 *) value, (LONG64) increment); } FORCE_INLINE void atomic_sub_acquire_release(volatile int64* value, int64 decrement) noexcept { InterlockedAdd64((volatile LONG64 *) value, -((LONG64) decrement)); } -FORCE_INLINE f32 atomic_compare_exchange_weak_acquire_release(volatile f32* value, f32* expected, f32 desired) noexcept { _atomic_32 temp = {.l = InterlockedCompareExchange((volatile long *) value, (long) desired, (long) *expected) }; return temp.f; } -FORCE_INLINE f64 atomic_compare_exchange_weak_acquire_release(volatile f64* value, f64* expected, f64 desired) noexcept { _atomic_64 temp = {.l = InterlockedCompareExchange64((volatile LONG64 *) value, (LONG64) desired, (LONG64) *expected) }; return temp.f; } -FORCE_INLINE int32 atomic_compare_exchange_weak_acquire_release(volatile int32* value, int32* expected, int32 desired) noexcept { return (int32) InterlockedCompareExchange((volatile long *) value, desired, *expected); } -FORCE_INLINE int64 atomic_compare_exchange_weak_acquire_release(volatile int64* value, int64* expected, int64 desired) noexcept { return (int64) InterlockedCompareExchange64((volatile LONG64 *) value, (LONG64) desired, (LONG64) *expected); } +FORCE_INLINE f32 atomic_compare_exchange_strong_acquire_release(volatile f32* value, f32* expected, f32 desired) noexcept { _atomic_32 temp = {.l = InterlockedCompareExchange((volatile long *) value, (long) desired, (long) *expected) }; return temp.f; } +FORCE_INLINE f64 atomic_compare_exchange_strong_acquire_release(volatile f64* value, f64* expected, f64 desired) noexcept { _atomic_64 temp = {.l = InterlockedCompareExchange64((volatile LONG64 *) value, (LONG64) desired, (LONG64) *expected) }; return temp.f; } +FORCE_INLINE int32 atomic_compare_exchange_strong_acquire_release(volatile int32* value, int32* expected, int32 desired) noexcept { return (int32) InterlockedCompareExchange((volatile long *) value, desired, *expected); } +FORCE_INLINE int64 atomic_compare_exchange_strong_acquire_release(volatile int64* value, int64* expected, int64 desired) noexcept { return (int64) InterlockedCompareExchange64((volatile LONG64 *) value, (LONG64) desired, (LONG64) *expected); } FORCE_INLINE int8 atomic_fetch_add_acquire_release(volatile int8* value, int8 operand) noexcept { return (int8) InterlockedExchangeAdd8((volatile char *) value, (char) operand); } FORCE_INLINE int8 atomic_fetch_sub_acquire_release(volatile int8* value, int8 operand) noexcept { return (int8) InterlockedExchangeAdd8((volatile char *) value, -((char) operand)); } FORCE_INLINE int16 atomic_fetch_add_acquire_release(volatile int16* value, int16 operand) noexcept { return (int16) InterlockedExchangeAdd16((volatile short *) value, (short) operand); } @@ -395,8 +395,8 @@ FORCE_INLINE void atomic_add_acquire_release(volatile uint32* value, uint32 incr FORCE_INLINE void atomic_sub_acquire_release(volatile uint32* value, uint32 decrement) noexcept { InterlockedAdd((volatile long *) value, -1 * ((int32) decrement)); } FORCE_INLINE void atomic_add_acquire_release(volatile uint64* value, uint64 increment) noexcept { InterlockedAdd64((volatile LONG64 *) value, (LONG64) increment); } FORCE_INLINE void atomic_sub_acquire_release(volatile uint64* value, uint64 decrement) noexcept { InterlockedAdd64((volatile LONG64 *) value, -((LONG64) decrement)); } -FORCE_INLINE uint32 atomic_compare_exchange_weak_acquire_release(volatile uint32* value, uint32* expected, uint32 desired) noexcept { return (uint32) InterlockedCompareExchange((volatile long *) value, desired, *expected); } -FORCE_INLINE uint64 atomic_compare_exchange_weak_acquire_release(volatile uint64* value, uint64* expected, uint64 desired) noexcept { return (uint64) InterlockedCompareExchange64((volatile LONG64 *) value, (LONG64) desired, (LONG64) *expected); } +FORCE_INLINE uint32 atomic_compare_exchange_strong_acquire_release(volatile uint32* value, uint32* expected, uint32 desired) noexcept { return (uint32) InterlockedCompareExchange((volatile long *) value, desired, *expected); } +FORCE_INLINE uint64 atomic_compare_exchange_strong_acquire_release(volatile uint64* value, uint64* expected, uint64 desired) noexcept { return (uint64) InterlockedCompareExchange64((volatile LONG64 *) value, (LONG64) desired, (LONG64) *expected); } FORCE_INLINE uint8 atomic_fetch_add_acquire_release(volatile uint8* value, uint8 operand) noexcept { return (uint8) InterlockedExchangeAdd8((volatile char *) value, (char) operand); } FORCE_INLINE uint8 atomic_fetch_sub_acquire_release(volatile uint8* value, uint8 operand) noexcept { return (uint8) InterlockedExchangeAdd8((volatile char *) value, -((char) operand)); } FORCE_INLINE uint16 atomic_fetch_add_acquire_release(volatile uint16* value, uint16 operand) noexcept { return (uint16) InterlockedExchangeAdd16((volatile short *) value, (short) operand); } diff --git a/platform/win32/threading/Semaphore.h b/platform/win32/threading/Semaphore.h index 5d177d6..5ea2e22 100755 --- a/platform/win32/threading/Semaphore.h +++ b/platform/win32/threading/Semaphore.h @@ -11,40 +11,41 @@ #include #include "../../../stdlib/Types.h" +#include "../../../compiler/CompilerUtils.h" -typedef HANDLE sem_t; +typedef HANDLE sem; -inline -void sem_init(sem_t* semaphore, int32 value) +FORCE_INLINE +void coms_sem_init(sem* semaphore, int32 value) { *semaphore = CreateSemaphore(NULL, value, MAX_UINT32, NULL); } -inline -void sem_destroy(sem_t* semaphore) +FORCE_INLINE +void coms_sem_destroy(sem* semaphore) { CloseHandle(*semaphore); } // decrement if != 0, if = 0 wait -inline -void sem_wait(sem_t* semaphore) { +FORCE_INLINE +void coms_sem_wait(sem* semaphore) { WaitForSingleObject(*semaphore, INFINITE); } -inline -int32 sem_timedwait(sem_t* semaphore, uint64 ms) { +FORCE_INLINE +int32 semimedwait(sem* semaphore, uint64 ms) { return (int32) WaitForSingleObject(*semaphore, (DWORD) ms); } -inline -int32 sem_trywait(sem_t* semaphore) { +FORCE_INLINE +int32 semrywait(sem* semaphore) { return (int32) WaitForSingleObject(*semaphore, 0); } // increment -inline -void sem_post(sem_t* semaphore) { +FORCE_INLINE +void coms_sem_post(sem* semaphore) { ReleaseSemaphore(*semaphore, 1, NULL); } diff --git a/platform/win32/threading/Spinlock.cpp b/platform/win32/threading/Spinlock.cpp index 069855d..1caeaeb 100755 --- a/platform/win32/threading/Spinlock.cpp +++ b/platform/win32/threading/Spinlock.cpp @@ -13,20 +13,21 @@ #include "../../../stdlib/Types.h" #include "../TimeUtils.h" #include "Spinlock.h" +#include "../../../compiler/CompilerUtils.h" -inline +FORCE_INLINE void spinlock_init(spinlock32* lock) { lock = 0; } -inline +FORCE_INLINE void spinlock_start(spinlock32* lock, int32 delay = 10) { while (InterlockedExchange(lock, 1) != 0) { usleep(delay); } } -inline +FORCE_INLINE void spinlock_end(spinlock32* lock) { InterlockedExchange(lock, 0); } diff --git a/platform/win32/threading/Thread.h b/platform/win32/threading/Thread.h index 4204138..766a23f 100755 --- a/platform/win32/threading/Thread.h +++ b/platform/win32/threading/Thread.h @@ -47,7 +47,7 @@ int32 coms_pthread_detach(coms_pthread_t thread) } inline -int32 coms_pthread_mutex_init(coms_pthread_mutex_t* mutex, coms_pthread_mutexattr_t*) +int32 mutex_init(mutex* mutex, mutexattr_t*) { if (mutex == NULL) { return 1; @@ -59,7 +59,7 @@ int32 coms_pthread_mutex_init(coms_pthread_mutex_t* mutex, coms_pthread_mutexatt } inline -int32 coms_pthread_mutex_destroy(coms_pthread_mutex_t* mutex) +int32 mutex_destroy(mutex* mutex) { if (mutex == NULL) { return 1; @@ -71,7 +71,7 @@ int32 coms_pthread_mutex_destroy(coms_pthread_mutex_t* mutex) } inline -int32 coms_pthread_mutex_lock(coms_pthread_mutex_t* mutex) +int32 mutex_lock(mutex* mutex) { if (mutex == NULL) { return 1; @@ -83,7 +83,7 @@ int32 coms_pthread_mutex_lock(coms_pthread_mutex_t* mutex) } inline -int32 coms_pthread_mutex_unlock(coms_pthread_mutex_t* mutex) +int32 mutex_unlock(mutex* mutex) { if (mutex == NULL) { return 1; @@ -96,7 +96,7 @@ int32 coms_pthread_mutex_unlock(coms_pthread_mutex_t* mutex) // WARNING: We don't support windows events since they are much slower than conditional variables/mutexes inline -int32 coms_pthread_cond_init(coms_pthread_cond_t* cond, coms_pthread_condattr_t*) +int32 coms_pthread_cond_init(mutex_cond* cond, coms_pthread_condattr_t*) { if (cond == NULL) { return 1; @@ -108,7 +108,7 @@ int32 coms_pthread_cond_init(coms_pthread_cond_t* cond, coms_pthread_condattr_t* } inline -int32 coms_pthread_cond_destroy(coms_pthread_cond_t*) +int32 coms_pthread_cond_destroy(mutex_cond*) { /* Windows does not have a destroy for conditionals */ return 0; @@ -116,7 +116,7 @@ int32 coms_pthread_cond_destroy(coms_pthread_cond_t*) // @question Can't we turn timespec in a typedef of uint64? I would like to avoid the time.h class inline -int32 coms_pthread_cond_timedwait(coms_pthread_cond_t* cond, coms_pthread_mutex_t* mutex, const timespec* abstime) +int32 mutex_condimedwait(mutex_cond* cond, mutex* mutex, const timespec* abstime) { if (cond == NULL || mutex == NULL) { return 1; @@ -130,17 +130,17 @@ int32 coms_pthread_cond_timedwait(coms_pthread_cond_t* cond, coms_pthread_mutex_ } inline -int32 coms_pthread_cond_wait(coms_pthread_cond_t* cond, coms_pthread_mutex_t* mutex) +int32 coms_pthread_cond_wait(mutex_cond* cond, mutex* mutex) { if (cond == NULL || mutex == NULL) { return 1; } - return coms_pthread_cond_timedwait(cond, mutex, NULL); + return mutex_condimedwait(cond, mutex, NULL); } inline -int32 coms_pthread_cond_signal(coms_pthread_cond_t* cond) +int32 coms_pthread_cond_signal(mutex_cond* cond) { if (cond == NULL) { return 1; @@ -152,7 +152,7 @@ int32 coms_pthread_cond_signal(coms_pthread_cond_t* cond) } inline -int32 coms_pthread_cond_broadcast(coms_pthread_cond_t* cond) +int32 coms_pthread_cond_broadcast(mutex_cond* cond) { if (cond == NULL) { return 1; diff --git a/platform/win32/threading/ThreadDefines.h b/platform/win32/threading/ThreadDefines.h index aaa7795..61aeb45 100755 --- a/platform/win32/threading/ThreadDefines.h +++ b/platform/win32/threading/ThreadDefines.h @@ -14,12 +14,12 @@ #define THREAD_RETURN DWORD WINAPI typedef DWORD (WINAPI *ThreadJobFunc)(void*); -typedef CRITICAL_SECTION coms_pthread_mutex_t; -typedef void coms_pthread_mutexattr_t; +typedef CRITICAL_SECTION mutex; +typedef void mutexattr_t; typedef void coms_pthread_condattr_t; typedef void coms_pthread_rwlockattr_t; typedef HANDLE coms_pthread_t; -typedef CONDITION_VARIABLE coms_pthread_cond_t; +typedef CONDITION_VARIABLE mutex_cond; // Thread local variable Already exists in c++11 // #define thread_local __declspec(thread) diff --git a/sort/BinarySearch.h b/sort/BinarySearch.h index 2636c14..c803320 100755 --- a/sort/BinarySearch.h +++ b/sort/BinarySearch.h @@ -20,4 +20,15 @@ int32 lower_bound(int32* t, size_t len, int32 x) { } */ +/* +const char** string_lower_bound(const char** base, size_t len, const char* target) { + while (len > 1) { + size_t half = len / 2; + len -= half; + base += (strcmp(base[half - 1], target) < 0) * half; + } + return base; +} +*/ + #endif \ No newline at end of file diff --git a/sort/EytzingerSearch.h b/sort/EytzingerSearch.h index 422ea75..0282e80 100755 --- a/sort/EytzingerSearch.h +++ b/sort/EytzingerSearch.h @@ -7,6 +7,7 @@ // @performance We could optimize eytzinger by using 1 based index // Consider this https://en.algorithmica.org/hpc/data-structures/binary-search/ +static void eytzinger_rearrange(byte* arr, byte* temp, size_t start, size_t* index, size_t num, size_t size) noexcept { if (start >= num) { return; @@ -23,6 +24,7 @@ void eytzinger_rearrange(byte* arr, byte* temp, size_t start, size_t* index, siz eytzinger_rearrange(arr, temp, 2 * start + 2, index, num, size); } +// @performance Instead of expecting a sorted array maybe we can improve it by immediately create the eytzinger array and thus avoid one "sort" // arr MUST be sorted by a sorting algorithm of your choice void eytzinger_create(byte* arr, size_t num, size_t size, RingMemory* ring) { byte* temp = ring_get_memory(ring, size * num); diff --git a/stdlib/HashMap.h b/stdlib/HashMap.h index 8a519ed..0e36747 100755 --- a/stdlib/HashMap.h +++ b/stdlib/HashMap.h @@ -124,12 +124,12 @@ struct HashMap { // @todo Change so the hashmap can grow or maybe even better create a static and dynamic version inline -void hashmap_alloc(HashMap* hm, int32 count, int32 element_size) +void hashmap_alloc(HashMap* hm, int32 count, int32 element_size, int32 alignment = 64) { LOG_1("Allocate HashMap for %n elements with %n B per element", {{LOG_DATA_INT32, &count}, {LOG_DATA_INT32, &element_size}}); byte* data = (byte *) platform_alloc( count * (sizeof(uint16) + element_size) - + CEIL_DIV(count, 64) * sizeof(hm->buf.free) + + CEIL_DIV(count, alignment) * sizeof(hm->buf.free) ); hm->table = (uint16 *) data; @@ -148,13 +148,13 @@ void hashmap_free(HashMap* hm) // WARNING: element_size = element size + remaining HashEntry data size inline -void hashmap_create(HashMap* hm, int32 count, int32 element_size, RingMemory* ring) noexcept +void hashmap_create(HashMap* hm, int32 count, int32 element_size, RingMemory* ring, int32 alignment = 64) noexcept { LOG_1("Create HashMap for %n elements with %n B per element", {{LOG_DATA_INT32, &count}, {LOG_DATA_INT32, &element_size}}); byte* data = ring_get_memory( ring, count * (sizeof(uint16) + element_size) - + CEIL_DIV(count, 64) * sizeof(hm->buf.free) + + CEIL_DIV(count, alignment) * sizeof(hm->buf.free) ); hm->table = (uint16 *) data; @@ -163,13 +163,13 @@ void hashmap_create(HashMap* hm, int32 count, int32 element_size, RingMemory* ri // WARNING: element_size = element size + remaining HashEntry data size inline -void hashmap_create(HashMap* hm, int32 count, int32 element_size, BufferMemory* buf) noexcept +void hashmap_create(HashMap* hm, int32 count, int32 element_size, BufferMemory* buf, int32 alignment = 64) noexcept { LOG_1("Create HashMap for %n elements with %n B per element", {{LOG_DATA_INT32, &count}, {LOG_DATA_INT32, &element_size}}); byte* data = buffer_get_memory( buf, count * (sizeof(uint16) + element_size) - + CEIL_DIV(count, 64) * sizeof(hm->buf.free) + + CEIL_DIV(count, alignment) * sizeof(hm->buf.free) ); hm->table = (uint16 *) data; @@ -406,8 +406,10 @@ HashEntry* hashmap_get_reserve(HashMap* hm, const char* key) noexcept uint64 index = hash_djb2(key) % hm->buf.count; HashEntry* entry = (HashEntry *) chunk_get_element(&hm->buf, hm->table[index] - 1, false); + str_move_to_pos(&key, -HASH_MAP_MAX_KEY_LENGTH); + while (entry != NULL) { - if (str_compare(entry->key, key, HASH_MAP_MAX_KEY_LENGTH) == 0) { + if (str_compare(entry->key, key) == 0) { DEBUG_MEMORY_READ((uintptr_t) entry, sizeof(HashEntry)); return entry; } @@ -438,7 +440,6 @@ HashEntry* hashmap_get_reserve(HashMap* hm, const char* key) noexcept return entry_new; } -// @performance Some places use this in order to iterate the hashmap that is horrible!!! Use the actual iterate function! inline HashEntry* hashmap_get_entry_by_element(HashMap* hm, uint32 element) noexcept { @@ -449,8 +450,10 @@ HashEntry* hashmap_get_entry(HashMap* hm, const char* key) noexcept { uint64 index = hash_djb2(key) % hm->buf.count; HashEntry* entry = (HashEntry *) chunk_get_element(&hm->buf, hm->table[index] - 1, false); + str_move_to_pos(&key, -HASH_MAP_MAX_KEY_LENGTH); + while (entry != NULL) { - if (str_compare(entry->key, key, HASH_MAP_MAX_KEY_LENGTH) == 0) { + if (str_compare(entry->key, key) == 0) { DEBUG_MEMORY_READ((uintptr_t) entry, sizeof(HashEntry)); return entry; } @@ -467,8 +470,10 @@ uint32 hashmap_get_element(const HashMap* hm, const char* key) noexcept { uint32 element_id = hm->table[index]; + str_move_to_pos(&key, -HASH_MAP_MAX_KEY_LENGTH); + while (entry != NULL) { - if (str_compare(entry->key, key, HASH_MAP_MAX_KEY_LENGTH) == 0) { + if (str_compare(entry->key, key) == 0) { DEBUG_MEMORY_READ((uintptr_t) entry, sizeof(HashEntry)); return element_id; } @@ -492,8 +497,10 @@ HashEntry* hashmap_get_entry(HashMap* hm, const char* key, uint64 hash) noexcept hash %= hm->buf.count; HashEntry* entry = (HashEntry *) chunk_get_element(&hm->buf, hm->table[hash] - 1, false); + str_move_to_pos(&key, -HASH_MAP_MAX_KEY_LENGTH); + while (entry != NULL) { - if (str_compare(entry->key, key, HASH_MAP_MAX_KEY_LENGTH) == 0) { + if (str_compare(entry->key, key) == 0) { DEBUG_MEMORY_READ((uintptr_t) entry, sizeof(HashEntry)); return entry; } @@ -514,8 +521,10 @@ void hashmap_remove(HashMap* hm, const char* key) noexcept { uint32 element_id = hm->table[index]; + str_move_to_pos(&key, -HASH_MAP_MAX_KEY_LENGTH); + while (entry != NULL) { - if (str_compare(entry->key, key, HASH_MAP_MAX_KEY_LENGTH) == 0) { + if (str_compare(entry->key, key) == 0) { if (prev == NULL) { hm->table[index] = entry->next; } else { diff --git a/stdlib/PerfectHashMap.h b/stdlib/PerfectHashMap.h index 1417757..c907d24 100755 --- a/stdlib/PerfectHashMap.h +++ b/stdlib/PerfectHashMap.h @@ -179,13 +179,13 @@ void perfect_hashmap_create(PerfectHashMap* hm, int32 count, int32 element_size, // Calculates how large a hashmap will be inline -int64 perfect_hashmap_count(int count, int32 element_size) +int64 perfect_hashmap_size(int32 count, int32 element_size) { return count * element_size; } inline -int64 perfect_hashmap_count(const PerfectHashMap* hm) +int64 perfect_hashmap_size(const PerfectHashMap* hm) { return hm->entry_size * hm->map_count; } @@ -195,7 +195,12 @@ inline void perfect_hashmap_insert(PerfectHashMap* hm, const char* key, int32 value) { int32 index = hm->hash_function(key, hm->hash_seed) % hm->map_count; PerfectHashEntryInt32* entry = (PerfectHashEntryInt32 *) (hm->hash_entries + hm->entry_size * index); - str_copy_short(entry->key, key); + + // Ensure key length + str_move_to_pos(&key, -PERFECT_HASH_MAP_MAX_KEY_LENGTH); + str_copy_short(entry->key, key, PERFECT_HASH_MAP_MAX_KEY_LENGTH); + entry->key[PERFECT_HASH_MAP_MAX_KEY_LENGTH - 1] = '\0'; + entry->value = value; } @@ -203,7 +208,12 @@ inline void perfect_hashmap_insert(PerfectHashMap* hm, const char* key, int64 value) { int32 index = hm->hash_function(key, hm->hash_seed) % hm->map_count; PerfectHashEntryInt64* entry = (PerfectHashEntryInt64 *) (hm->hash_entries + hm->entry_size * index); - str_copy_short(entry->key, key); + + // Ensure key length + str_move_to_pos(&key, -PERFECT_HASH_MAP_MAX_KEY_LENGTH); + str_copy_short(entry->key, key, PERFECT_HASH_MAP_MAX_KEY_LENGTH); + entry->key[PERFECT_HASH_MAP_MAX_KEY_LENGTH - 1] = '\0'; + entry->value = value; } @@ -211,7 +221,12 @@ inline void perfect_hashmap_insert(PerfectHashMap* hm, const char* key, uintptr_t value) { int32 index = hm->hash_function(key, hm->hash_seed) % hm->map_count; PerfectHashEntryUIntPtr* entry = (PerfectHashEntryUIntPtr *) (hm->hash_entries + hm->entry_size * index); - str_copy_short(entry->key, key); + + // Ensure key length + str_move_to_pos(&key, -PERFECT_HASH_MAP_MAX_KEY_LENGTH); + str_copy_short(entry->key, key, PERFECT_HASH_MAP_MAX_KEY_LENGTH); + entry->key[PERFECT_HASH_MAP_MAX_KEY_LENGTH - 1] = '\0'; + entry->value = value; } @@ -219,7 +234,12 @@ inline void perfect_hashmap_insert(PerfectHashMap* hm, const char* key, void* value) { int32 index = hm->hash_function(key, hm->hash_seed) % hm->map_count; PerfectHashEntryVoidP* entry = (PerfectHashEntryVoidP *) (hm->hash_entries + hm->entry_size * index); - str_copy_short(entry->key, key); + + // Ensure key length + str_move_to_pos(&key, -PERFECT_HASH_MAP_MAX_KEY_LENGTH); + str_copy_short(entry->key, key, PERFECT_HASH_MAP_MAX_KEY_LENGTH); + entry->key[PERFECT_HASH_MAP_MAX_KEY_LENGTH - 1] = '\0'; + entry->value = value; } @@ -227,7 +247,12 @@ inline void perfect_hashmap_insert(PerfectHashMap* hm, const char* key, f32 value) { int32 index = hm->hash_function(key, hm->hash_seed) % hm->map_count; PerfectHashEntryFloat* entry = (PerfectHashEntryFloat *) (hm->hash_entries + hm->entry_size * index); - str_copy_short(entry->key, key); + + // Ensure key length + str_move_to_pos(&key, -PERFECT_HASH_MAP_MAX_KEY_LENGTH); + str_copy_short(entry->key, key, PERFECT_HASH_MAP_MAX_KEY_LENGTH); + entry->key[PERFECT_HASH_MAP_MAX_KEY_LENGTH - 1] = '\0'; + entry->value = value; } @@ -235,7 +260,12 @@ inline void perfect_hashmap_insert(PerfectHashMap* hm, const char* key, const char* value) { int32 index = hm->hash_function(key, hm->hash_seed) % hm->map_count; PerfectHashEntryStr* entry = (PerfectHashEntryStr *) (hm->hash_entries + hm->entry_size * index); - str_copy_short(entry->key, key); + + // Ensure key length + str_move_to_pos(&key, -PERFECT_HASH_MAP_MAX_KEY_LENGTH); + str_copy_short(entry->key, key, PERFECT_HASH_MAP_MAX_KEY_LENGTH); + entry->key[PERFECT_HASH_MAP_MAX_KEY_LENGTH - 1] = '\0'; + str_copy_short(entry->value, value, PERFECT_HASH_MAP_MAX_KEY_LENGTH); } @@ -243,7 +273,12 @@ inline void perfect_hashmap_insert(PerfectHashMap* hm, const char* key, byte* value) { int32 index = hm->hash_function(key, hm->hash_seed) % hm->map_count; PerfectHashEntryStr* entry = (PerfectHashEntryStr *) (hm->hash_entries + hm->entry_size * index); - str_copy_short(entry->key, key); + + // Ensure key length + str_move_to_pos(&key, -PERFECT_HASH_MAP_MAX_KEY_LENGTH); + str_copy_short(entry->key, key, PERFECT_HASH_MAP_MAX_KEY_LENGTH); + entry->key[PERFECT_HASH_MAP_MAX_KEY_LENGTH - 1] = '\0'; + memcpy(entry->value, value, hm->entry_size - sizeof(PerfectHashEntry)); } @@ -252,6 +287,8 @@ PerfectHashEntry* perfect_hashmap_get_entry(const PerfectHashMap* hm, const char int32 index = hm->hash_function(key, hm->hash_seed) % hm->map_count; PerfectHashEntry* entry = (PerfectHashEntry *) (hm->hash_entries + hm->entry_size * index); + str_move_to_pos(&key, -HASH_MAP_MAX_KEY_LENGTH); + return str_compare(entry->key, key) == 0 ? entry : NULL; } @@ -260,6 +297,8 @@ void perfect_hashmap_delete_entry(PerfectHashMap* hm, const char* key) { int32 index = hm->hash_function(key, hm->hash_seed) % hm->map_count; PerfectHashEntry* entry = (PerfectHashEntry *) (hm->hash_entries + hm->entry_size * index); + str_move_to_pos(&key, -HASH_MAP_MAX_KEY_LENGTH); + if (str_compare(entry->key, key) != 0) { return; } diff --git a/stdlib/Simd.h b/stdlib/Simd.h index 1e8e85e..29be750 100755 --- a/stdlib/Simd.h +++ b/stdlib/Simd.h @@ -14,11 +14,6 @@ // Adjusts the step size based on the memory alignment inline int32 intrin_validate_steps(const byte* mem, int32 steps) { - // During development we want to spot invalid alignment - ASSERT_SIMPLE(steps < 16 || (steps >= 16 && ((uintptr_t) mem & 63) == 0)); - ASSERT_SIMPLE(steps < 8 || (steps >= 8 && ((uintptr_t) mem & 31) == 0)); - ASSERT_SIMPLE(steps < 4 || (steps >= 4 && ((uintptr_t) mem & 15) == 0)); - if (steps >= 16 && ((uintptr_t) mem & 63) == 0) { return 16; } else if (steps >= 8 && ((uintptr_t) mem & 31) == 0) { diff --git a/stdlib/ThreadedHashMap.h b/stdlib/ThreadedHashMap.h index c64ada8..cdec124 100755 --- a/stdlib/ThreadedHashMap.h +++ b/stdlib/ThreadedHashMap.h @@ -20,7 +20,7 @@ struct ThreadedHashMap { void** table; ChunkMemory buf; - coms_pthread_mutex_t mutex; + mutex mutex; }; // WARNING: element_size = element size + remaining HashEntry data size @@ -28,7 +28,7 @@ inline void thrd_hashmap_create(ThreadedHashMap* hm, int32 count, int32 element_size, RingMemory* ring) { hashmap_create((HashMap *) hm, count, element_size, ring); - coms_pthread_mutex_init(&hm->mutex, NULL); + mutex_init(&hm->mutex, NULL); } // WARNING: element_size = element size + remaining HashEntry data size @@ -36,7 +36,7 @@ inline void thrd_hashmap_create(ThreadedHashMap* hm, int32 count, int32 element_size, BufferMemory* buf) { hashmap_create((HashMap *) hm, count, element_size, buf); - coms_pthread_mutex_init(&hm->mutex, NULL); + mutex_init(&hm->mutex, NULL); } // WARNING: element_size = element size + remaining HashEntry data size @@ -44,85 +44,85 @@ inline void thrd_hashmap_create(ThreadedHashMap* hm, int32 count, int32 element_size, byte* buf) { hashmap_create((HashMap *) hm, count, element_size, buf); - coms_pthread_mutex_init(&hm->mutex, NULL); + mutex_init(&hm->mutex, NULL); } inline void thrd_hashmap_free(ThreadedHashMap* hm) { - coms_pthread_mutex_destroy(&hm->mutex); + mutex_destroy(&hm->mutex); } inline void thrd_hashmap_insert(ThreadedHashMap* hm, const char* key, int32 value) { - coms_pthread_mutex_lock(&hm->mutex); + mutex_lock(&hm->mutex); hashmap_insert((HashMap *) hm, key, value); - coms_pthread_mutex_unlock(&hm->mutex); + mutex_unlock(&hm->mutex); } inline void thrd_hashmap_insert(ThreadedHashMap* hm, const char* key, int64 value) { - coms_pthread_mutex_lock(&hm->mutex); + mutex_lock(&hm->mutex); hashmap_insert((HashMap *) hm, key, value); - coms_pthread_mutex_unlock(&hm->mutex); + mutex_unlock(&hm->mutex); } inline void thrd_hashmap_insert(ThreadedHashMap* hm, const char* key, uintptr_t value) { - coms_pthread_mutex_lock(&hm->mutex); + mutex_lock(&hm->mutex); hashmap_insert((HashMap *) hm, key, value); - coms_pthread_mutex_unlock(&hm->mutex); + mutex_unlock(&hm->mutex); } inline void thrd_hashmap_insert(ThreadedHashMap* hm, const char* key, void* value) { - coms_pthread_mutex_lock(&hm->mutex); + mutex_lock(&hm->mutex); hashmap_insert((HashMap *) hm, key, value); - coms_pthread_mutex_unlock(&hm->mutex); + mutex_unlock(&hm->mutex); } inline void thrd_hashmap_insert(ThreadedHashMap* hm, const char* key, f32 value) { - coms_pthread_mutex_lock(&hm->mutex); + mutex_lock(&hm->mutex); hashmap_insert((HashMap *) hm, key, value); - coms_pthread_mutex_unlock(&hm->mutex); + mutex_unlock(&hm->mutex); } inline void thrd_hashmap_insert(ThreadedHashMap* hm, const char* key, const char* value) { - coms_pthread_mutex_lock(&hm->mutex); + mutex_lock(&hm->mutex); hashmap_insert((HashMap *) hm, key, value); - coms_pthread_mutex_unlock(&hm->mutex); + mutex_unlock(&hm->mutex); } inline void thrd_hashmap_insert(ThreadedHashMap* hm, const char* key, byte* value) { - coms_pthread_mutex_lock(&hm->mutex); + mutex_lock(&hm->mutex); hashmap_insert((HashMap *) hm, key, value); - coms_pthread_mutex_unlock(&hm->mutex); + mutex_unlock(&hm->mutex); } inline void thrd_hashmap_get_entry(ThreadedHashMap* hm, HashEntry* entry, const char* key) { - coms_pthread_mutex_lock(&hm->mutex); + mutex_lock(&hm->mutex); HashEntry* temp = hashmap_get_entry((HashMap *) hm, key); memcpy(entry, temp, hm->buf.chunk_size); - coms_pthread_mutex_unlock(&hm->mutex); + mutex_unlock(&hm->mutex); } inline void thrd_hashmap_get_entry(ThreadedHashMap* hm, HashEntry* entry, const char* key, uint64 index) { - coms_pthread_mutex_lock(&hm->mutex); + mutex_lock(&hm->mutex); HashEntry* temp = hashmap_get_entry((HashMap *) hm, key, index); memcpy(entry, temp, hm->buf.chunk_size); - coms_pthread_mutex_unlock(&hm->mutex); + mutex_unlock(&hm->mutex); } inline void thrd_hashmap_remove(ThreadedHashMap* hm, const char* key) { - coms_pthread_mutex_lock(&hm->mutex); + mutex_lock(&hm->mutex); hashmap_remove((HashMap *) hm, key); - coms_pthread_mutex_unlock(&hm->mutex); + mutex_unlock(&hm->mutex); } #endif \ No newline at end of file diff --git a/stdlib/Types.h b/stdlib/Types.h index d340665..365544a 100755 --- a/stdlib/Types.h +++ b/stdlib/Types.h @@ -43,7 +43,6 @@ typedef char sbyte; typedef uintptr_t umm; typedef intptr_t smm; -// @question consider to implement atomic_16 depending on intrinsic support #define atomic_8 volatile #define atomic_16 volatile #define atomic_32 volatile @@ -57,8 +56,7 @@ typedef intptr_t smm; #define OMS_MAX(a, b) ((a) > (b) ? (a) : (b)) #define OMS_MIN(a, b) ((a) > (b) ? (b) : (a)) -// @todo Switch the order of high and low -#define OMS_CLAMP(val, high, low) ((val) < (low) ? (low) : ((val) > (high) ? (high) : (val))) +#define OMS_CLAMP(val, low, high) ((val) < (low) ? (low) : ((val) > (high) ? (high) : (val))) #define OMS_ABS(a) ((a) > 0 ? (a) : -(a)) #define OMS_ABS_INT8(a) ((uint8) ((a) & 0x7F)) diff --git a/system/SystemInfo.cpp b/system/SystemInfo.cpp index 948edb0..bc452ee 100755 --- a/system/SystemInfo.cpp +++ b/system/SystemInfo.cpp @@ -16,7 +16,7 @@ #endif void system_info_render(char* buf, const SystemInfo* info) { - sprintf_s( + snprintf( buf, 4096, "OS:\n" @@ -39,7 +39,7 @@ void system_info_render(char* buf, const SystemInfo* info) { "\n" "CPU:\n" "==============\n" - "Hardware\n" "Vendor: %s\n" "Brand: %s\n" "Model: %d\n" "Family: %d\n" "Mhz: %d\n" "Thread Count: %d\n" "Page Size: %d\n" + "Hardware\n" "Vendor: %s\n" "Brand: %s\n" "Model: %d\n" "Family: %d\n" "Mhz: %d\n" "Core Count: %d\n" "Page Size: %d\n" "\n" "Cache:\n" "L1: Size %d Line %d\n" @@ -47,7 +47,7 @@ void system_info_render(char* buf, const SystemInfo* info) { "L3: Size %d Line %d\n" "L4: Size %d Line %d\n" "\n" - "Features: %lld\n" + "Features: %ld\n" "\n" "GPU:\n" "==============\n" @@ -73,7 +73,7 @@ void system_info_render(char* buf, const SystemInfo* info) { info->network_count < 2 ? "" : info->network[1].slot, info->network_count < 2 ? 0 : info->network[1].mac[0], info->network_count < 2 ? 0 : info->network[1].mac[1], info->network_count < 2 ? 0 : info->network[1].mac[2], info->network_count < 2 ? 0 : info->network[1].mac[3], info->network_count < 2 ? 0 : info->network[1].mac[4], info->network_count < 2 ? 0 : info->network[1].mac[5], info->network_count < 2 ? 0 : info->network[1].mac[6], info->network_count < 2 ? 0 : info->network[1].mac[7], info->network_count < 3 ? "" : info->network[2].slot, info->network_count < 3 ? 0 : info->network[2].mac[0], info->network_count < 3 ? 0 : info->network[2].mac[1], info->network_count < 3 ? 0 : info->network[2].mac[2], info->network_count < 3 ? 0 : info->network[2].mac[3], info->network_count < 3 ? 0 : info->network[2].mac[4], info->network_count < 3 ? 0 : info->network[2].mac[5], info->network_count < 3 ? 0 : info->network[2].mac[6], info->network_count < 3 ? 0 : info->network[2].mac[7], info->network_count < 4 ? "" : info->network[3].slot, info->network_count < 4 ? 0 : info->network[3].mac[0], info->network_count < 4 ? 0 : info->network[3].mac[1], info->network_count < 4 ? 0 : info->network[3].mac[2], info->network_count < 4 ? 0 : info->network[3].mac[3], info->network_count < 4 ? 0 : info->network[3].mac[4], info->network_count < 4 ? 0 : info->network[3].mac[5], info->network_count < 4 ? 0 : info->network[3].mac[6], info->network_count < 4 ? 0 : info->network[3].mac[7], - info->cpu.vendor, info->cpu.brand, info->cpu.model, info->cpu.family, info->cpu.mhz, info->cpu.thread_count, info->cpu.page_size, + info->cpu.vendor, info->cpu.brand, info->cpu.model, info->cpu.family, info->cpu.mhz, info->cpu.core_count, info->cpu.page_size, info->cpu.cache[0].size, info->cpu.cache[0].line_size, info->cpu.cache[1].size, info->cpu.cache[1].line_size, info->cpu.cache[2].size, info->cpu.cache[2].line_size, diff --git a/tests.bat b/tests.bat index 961413f..393e3d9 100755 --- a/tests.bat +++ b/tests.bat @@ -18,7 +18,7 @@ if "%Platform%" neq "x64" ( cd "%DESTINATION_DIR%" del *.pdb > NUL 2> NUL del *.idb > NUL 2> NUL -cd ..\..\GameEngine +cd ..\..\cOMS REM Use /showIncludes for include debugging diff --git a/tests/.vscode/settings.json b/tests/.vscode/settings.json index 3ce2dc5..98b5c34 100755 --- a/tests/.vscode/settings.json +++ b/tests/.vscode/settings.json @@ -207,7 +207,7 @@ "uiwindow.h": "c", "uiattributefont.h": "c", "renderutils.h": "c", - "application.h": "c", + "ApplicationData.h": "c", "shaderutils.h": "c", "uiattributeshadow.h": "c", "matrixfloat32.h": "c", diff --git a/tests_iter.bat b/tests_iter.bat index 3e32bb9..6a504d6 100755 --- a/tests_iter.bat +++ b/tests_iter.bat @@ -17,7 +17,7 @@ if "%Platform%" neq "x64" ( cd "%DESTINATION_DIR%" del *.pdb > NUL 2> NUL del *.idb > NUL 2> NUL -cd ..\..\GameEngine +cd ..\..\cOMS REM Use /showIncludes for include debugging diff --git a/thread/Thread.h b/thread/Thread.h index 3ca2bda..39c4745 100755 --- a/thread/Thread.h +++ b/thread/Thread.h @@ -25,7 +25,7 @@ void thread_create(Worker* worker, ThreadJobFunc routine, void* arg) { - LOG_1("Thread started"); + LOG_1("Thread starting"); coms_pthread_create(&worker->thread, NULL, routine, arg); } diff --git a/thread/ThreadJob.h b/thread/ThreadJob.h index 1bbfd4e..c7490e2 100755 --- a/thread/ThreadJob.h +++ b/thread/ThreadJob.h @@ -31,6 +31,7 @@ struct PoolWorker { struct Worker { alignas(4) atomic_32 int32 state; coms_pthread_t thread; + void* arg; }; #endif \ No newline at end of file diff --git a/thread/ThreadPool.h b/thread/ThreadPool.h index 2ff2f33..569ffd4 100755 --- a/thread/ThreadPool.h +++ b/thread/ThreadPool.h @@ -21,9 +21,9 @@ struct ThreadPool { // This is not a threaded queue since we want to handle the mutex in here, not in the queue for finer control Queue work_queue; - coms_pthread_mutex_t work_mutex; - coms_pthread_cond_t work_cond; - coms_pthread_cond_t working_cond; + mutex work_mutex; + mutex_cond work_cond; + mutex_cond working_cond; alignas(4) atomic_32 int32 working_cnt; alignas(4) atomic_32 int32 thread_cnt; @@ -40,19 +40,19 @@ static THREAD_RETURN thread_pool_worker(void* arg) PoolWorker* work; while (true) { - coms_pthread_mutex_lock(&pool->work_mutex); + mutex_lock(&pool->work_mutex); while (queue_is_empty(&pool->work_queue) && !pool->state) { coms_pthread_cond_wait(&pool->work_cond, &pool->work_mutex); } if (pool->state == 1) { - coms_pthread_mutex_unlock(&pool->work_mutex); + mutex_unlock(&pool->work_mutex); break; } work = (PoolWorker *) queue_dequeue_keep(&pool->work_queue); - coms_pthread_mutex_unlock(&pool->work_mutex); + mutex_unlock(&pool->work_mutex); if (!work) { continue; @@ -86,15 +86,52 @@ static THREAD_RETURN thread_pool_worker(void* arg) return (THREAD_RETURN) NULL; } -void thread_pool_create(ThreadPool* pool, BufferMemory* buf, int32 thread_count) +void thread_pool_alloc(ThreadPool* pool, int32 thread_count, int32 worker_count, int32 alignment = 64) { - queue_init(&pool->work_queue, buf, 64, sizeof(PoolWorker), 64); + PROFILE(PROFILE_THREAD_POOL_ALLOC); + LOG_1( + "Allocating thread pool with %d threads and %d queue length", + { + {LOG_DATA_INT32, &thread_count}, + {LOG_DATA_INT32, &worker_count} + } + ); + + queue_alloc(&pool->work_queue, worker_count, sizeof(PoolWorker), alignment); pool->thread_cnt = thread_count; // @todo switch from pool mutex and pool cond to threadjob mutex/cond // thread_pool_wait etc. should just iterate over all mutexes - coms_pthread_mutex_init(&pool->work_mutex, NULL); + mutex_init(&pool->work_mutex, NULL); + coms_pthread_cond_init(&pool->work_cond, NULL); + coms_pthread_cond_init(&pool->working_cond, NULL); + + coms_pthread_t thread; + for (pool->size = 0; pool->size < thread_count; ++pool->size) { + coms_pthread_create(&thread, NULL, thread_pool_worker, pool); + coms_pthread_detach(thread); + } +} + +void thread_pool_create(ThreadPool* pool, BufferMemory* buf, int32 thread_count, int32 worker_count, int32 alignment = 64) +{ + PROFILE(PROFILE_THREAD_POOL_ALLOC); + LOG_1( + "Creating thread pool with %d threads and %d queue length", + { + {LOG_DATA_INT32, &thread_count}, + {LOG_DATA_INT32, &worker_count} + } + ); + + queue_init(&pool->work_queue, buf, worker_count, sizeof(PoolWorker), alignment); + + pool->thread_cnt = thread_count; + + // @todo switch from pool mutex and pool cond to threadjob mutex/cond + // thread_pool_wait etc. should just iterate over all mutexes + mutex_init(&pool->work_mutex, NULL); coms_pthread_cond_init(&pool->work_cond, NULL); coms_pthread_cond_init(&pool->working_cond, NULL); @@ -107,11 +144,11 @@ void thread_pool_create(ThreadPool* pool, BufferMemory* buf, int32 thread_count) void thread_pool_wait(ThreadPool* pool) { - coms_pthread_mutex_lock(&pool->work_mutex); + mutex_lock(&pool->work_mutex); while ((!pool->state && pool->working_cnt != 0) || (pool->state && pool->thread_cnt != 0)) { coms_pthread_cond_wait(&pool->working_cond, &pool->work_mutex); } - coms_pthread_mutex_unlock(&pool->work_mutex); + mutex_unlock(&pool->work_mutex); } void thread_pool_destroy(ThreadPool* pool) @@ -125,31 +162,31 @@ void thread_pool_destroy(ThreadPool* pool) coms_pthread_cond_broadcast(&pool->work_cond); thread_pool_wait(pool); - coms_pthread_mutex_destroy(&pool->work_mutex); + mutex_destroy(&pool->work_mutex); coms_pthread_cond_destroy(&pool->work_cond); coms_pthread_cond_destroy(&pool->working_cond); } PoolWorker* thread_pool_add_work(ThreadPool* pool, const PoolWorker* job) { - coms_pthread_mutex_lock(&pool->work_mutex); - PoolWorker* temp_job = (PoolWorker *) ring_get_memory_nomove((RingMemory *) &pool->work_queue, sizeof(PoolWorker), 64); + mutex_lock(&pool->work_mutex); + PoolWorker* temp_job = (PoolWorker *) ring_get_memory_nomove((RingMemory *) &pool->work_queue, sizeof(PoolWorker), 8); if (atomic_get_relaxed(&temp_job->id) > 0) { - coms_pthread_mutex_unlock(&pool->work_mutex); + mutex_unlock(&pool->work_mutex); ASSERT_SIMPLE(temp_job->id == 0); return NULL; } memcpy(temp_job, job, sizeof(PoolWorker)); - ring_move_pointer((RingMemory *) &pool->work_queue, &pool->work_queue.head, sizeof(PoolWorker), 64); + ring_move_pointer((RingMemory *) &pool->work_queue, &pool->work_queue.head, sizeof(PoolWorker), 8); if (temp_job->id == 0) { temp_job->id = atomic_fetch_add_acquire(&pool->id_counter, 1); } coms_pthread_cond_broadcast(&pool->work_cond); - coms_pthread_mutex_unlock(&pool->work_mutex); + mutex_unlock(&pool->work_mutex); return temp_job; } @@ -158,11 +195,11 @@ PoolWorker* thread_pool_add_work(ThreadPool* pool, const PoolWorker* job) // This makes it faster, since we can avoid a memcpy PoolWorker* thread_pool_add_work_start(ThreadPool* pool) { - coms_pthread_mutex_lock(&pool->work_mutex); + mutex_lock(&pool->work_mutex); PoolWorker* temp_job = (PoolWorker *) queue_enqueue_start(&pool->work_queue); if (atomic_get_relaxed(&temp_job->id) > 0) { - coms_pthread_mutex_unlock(&pool->work_mutex); + mutex_unlock(&pool->work_mutex); ASSERT_SIMPLE(temp_job->id == 0); return NULL; @@ -180,7 +217,7 @@ void thread_pool_add_work_end(ThreadPool* pool) { queue_enqueue_end(&pool->work_queue); coms_pthread_cond_broadcast(&pool->work_cond); - coms_pthread_mutex_unlock(&pool->work_mutex); + mutex_unlock(&pool->work_mutex); } diff --git a/utils/RandomUtils.h b/utils/RandomUtils.h index de58190..d9512f3 100755 --- a/utils/RandomUtils.h +++ b/utils/RandomUtils.h @@ -11,32 +11,37 @@ #include #include "../stdlib/Types.h" +#include "../utils/TestUtils.h" +#include "../utils/TimeUtils.h" -global_persist uint32 fast_seed; -#define FAST_RAND_MAX 32767 - -inline -uint32 fast_rand1(void) { - fast_seed = (214013 * fast_seed + 2531011); - - return (fast_seed >> 16) & 0x7FFF; -} - -uint32 fast_rand2(uint32* state) { +uint32 rand_fast(uint32* state) { + static const uint32 z = 0x9E3779B9; uint32 x = *state; - x ^= x << 13; - x ^= x >> 17; - x ^= x << 5; + x ^= ((x << 13) | (x >> 19)) ^ ((x << 5) | (x >> 27)); + x *= z; + x ^= x >> 16; + x *= z; + x ^= x >> 15; *state = x; return x; } -inline -f32 fast_rand_percentage(void) { - return (f32) fast_rand1() / (f32) FAST_RAND_MAX; +uint64 rand_fast(uint64* state) { + static const uint64 z = 0x9FB21C651E98DF25; + uint64 x = *state; + + x ^= ((x << 49) | (x >> 15)) ^ ((x << 24) | (x >> 40)); + x *= z; + x ^= x >> 35; + x *= z; + x ^= x >> 28; + + *state = x; + + return x; } /** @@ -78,4 +83,25 @@ int32 random_weighted_index(const int32* arr, int32 array_count) return item_rarity; } -#endif \ No newline at end of file +// WARNING: The allowed_chars string length needs to be of power 2 for performance reasons +// Supporting any allowed_chars length is trivial but usually we prefer the performance improvement +void random_string(const char* allowed_chars, uint32 allowed_length, char* out, int32 out_length) { + ASSERT_SIMPLE(allowed_length & 2 == 0); + + const uint32 mask = allowed_length - 1; + + uint64 x = time_index(); + + size_t i = 0; + while (i < out_length) { + uint64 rand_val = rand_fast(&x); + + for (int32 j = 0; j < 8 && i < out_length; ++j, ++i) { + out[i] = allowed_chars[((rand_val >> (8 * j)) & 0xFF) & allowed_length]; + } + } + + out[out_length] = '\0'; +} + +#endif diff --git a/utils/RegexSimplified.h b/utils/RegexSimplified.h new file mode 100644 index 0000000..7c917f2 --- /dev/null +++ b/utils/RegexSimplified.h @@ -0,0 +1,385 @@ +/** + * Jingga + * + * @copyright Jingga + * @license OMS License 2.0 + * @version 1.0.0 + * @link https://jingga.app + */ +#ifndef COMS_UTILS_REGEX_SIMPLIFIED_H +#define COMS_UTILS_REGEX_SIMPLIFIED_H + +#include "../stdlib/Types.h" +#include "StringUtils.h" + +struct SimplifiedRegexParser { + const char *pattern; + int32 pos; +}; + +struct MatchResult { + bool matched; + int32 length; +}; + +static +void regex_skip_whitespace(SimplifiedRegexParser *parser) { + while (parser->pattern[parser->pos] == ' ') { + parser->pos++; + } +} + +MatchResult regex_match_char(SimplifiedRegexParser *parser, const char *text) { + regex_skip_whitespace(parser); + MatchResult result = {false, 0}; + + if (parser->pattern[parser->pos] == '\0' || *text == '\0') { + return result; + } + + if (parser->pattern[parser->pos] == '\\') { + // Handle escape sequences + parser->pos++; + if (parser->pattern[parser->pos] == 'd') { + if (str_is_num(*text)) { + result.matched = true; + result.length = 1; + parser->pos++; + } + } else if (parser->pattern[parser->pos] == *text) { + result.matched = true; + result.length = 1; + parser->pos++; + } + } else if (parser->pattern[parser->pos] == '.') { + // Match any character + result.matched = true; + result.length = 1; + parser->pos++; + } else if (parser->pattern[parser->pos] == *text) { + // Match literal character + result.matched = true; + result.length = 1; + parser->pos++; + } else if (parser->pattern[parser->pos] == 'a' && + parser->pattern[parser->pos+1] == '-' && + parser->pattern[parser->pos+2] == 'z') { + // Match a-z range + if (*text >= 'a' && *text <= 'z') { + result.matched = true; + result.length = 1; + parser->pos += 3; + } + } else if (parser->pattern[parser->pos] == 'A' && + parser->pattern[parser->pos+1] == '-' && + parser->pattern[parser->pos+2] == 'Z') { + // Match A-Z range + if (*text >= 'A' && *text <= 'Z') { + result.matched = true; + result.length = 1; + parser->pos += 3; + } + } else if (parser->pattern[parser->pos] == '0' && + parser->pattern[parser->pos+1] == '-' && + parser->pattern[parser->pos+2] == '9') { + // Match 0-9 range + if (str_is_num(*text)) { + result.matched = true; + result.length = 1; + parser->pos += 3; + } + } + + return result; +} + +int regex_parse_number(SimplifiedRegexParser *parser) { + int32 num = 0; + while (str_is_num(parser->pattern[parser->pos])) { + num = num * 10 + (parser->pattern[parser->pos] - '0'); + parser->pos++; + } + return num; +} + +MatchResult regex_match_pattern(SimplifiedRegexParser *parser, const char *text); +MatchResult regex_match_atom(SimplifiedRegexParser *parser, const char *text) { + regex_skip_whitespace(parser); + MatchResult result = {false, 0}; + + if (parser->pattern[parser->pos] == '(') { + // Handle group + int32 saved_pos = parser->pos; + parser->pos++; + result = regex_match_pattern(parser, text); + if (parser->pattern[parser->pos] == ')') { + parser->pos++; + } else { + // Group not properly closed, backtrack + parser->pos = saved_pos; + result = regex_match_char(parser, text); + } + } else { + // Handle single character + result = regex_match_char(parser, text); + } + + return result; +} + +MatchResult regex_match_repetition(SimplifiedRegexParser *parser, const char *text, MatchResult atom_result) { + MatchResult result = {false, 0}; + + parser->pos++; // Skip '{' + regex_skip_whitespace(parser); + + int32 min = regex_parse_number(parser); + regex_skip_whitespace(parser); + + int32 max = min; + if (parser->pattern[parser->pos] == ',') { + parser->pos++; + regex_skip_whitespace(parser); + if (parser->pattern[parser->pos] == '}') { + // {x,} means x or more (no max) + max = -1; + } else { + max = regex_parse_number(parser); + } + } + + regex_skip_whitespace(parser); + if (parser->pattern[parser->pos] != '}') { + // Invalid repetition syntax + return result; + } + parser->pos++; // Skip '}' + + if (min < 0 || (max != -1 && max < min)) { + // Invalid range + return result; + } + + // Try to match exactly min times first + int32 count = 0; + int32 total_length = 0; + const char *current_text = text; + + while (true) { + if (max != -1 && count >= max) break; + + MatchResult next_result = regex_match_atom(parser, current_text); + if (!next_result.matched) break; + + count++; + total_length += next_result.length; + current_text += next_result.length; + } + + if (count >= min && (max == -1 || count <= max)) { + result.matched = true; + result.length = total_length; + } + + return result; +} + +MatchResult regex_match_element(SimplifiedRegexParser *parser, const char *text) { + MatchResult atom_result = regex_match_atom(parser, text); + + if (!atom_result.matched) { + return atom_result; + } + + regex_skip_whitespace(parser); + char quantifier = parser->pattern[parser->pos]; + + if (quantifier == '*') { + // Zero or more + parser->pos++; + int32 consumed = atom_result.length; + const char *remaining_text = text + consumed; + MatchResult star_result = {true, consumed}; + + while (true) { + MatchResult next_result = regex_match_atom(parser, remaining_text); + if (!next_result.matched) break; + consumed += next_result.length; + remaining_text += next_result.length; + star_result.length = consumed; + } + + return star_result; + } else if (quantifier == '+') { + // One or more + parser->pos++; + int32 consumed = atom_result.length; + const char *remaining_text = text + consumed; + MatchResult plus_result = {true, consumed}; + + while (true) { + MatchResult next_result = regex_match_atom(parser, remaining_text); + if (!next_result.matched) break; + consumed += next_result.length; + remaining_text += next_result.length; + plus_result.length = consumed; + } + + return plus_result; + } else if (quantifier == '?') { + // Zero or one + parser->pos++; + return atom_result; + } else if (quantifier == '{') { + // Min/max repetition {x,y} + return regex_match_repetition(parser, text, atom_result); + } else { + // No quantifier + return atom_result; + } +} + +MatchResult regex_match_pattern(SimplifiedRegexParser *parser, const char *text) { + MatchResult result = regex_match_element(parser, text); + + regex_skip_whitespace(parser); + if (parser->pattern[parser->pos] == '|') { + parser->pos++; + MatchResult alternative = regex_match_pattern(parser, text); + if (alternative.matched) { + return alternative; + } + } + + return result; +} + +bool regex_simplified_validate(const char* pattern, const char* text) { + SimplifiedRegexParser parser = {pattern, 0}; + bool starts_with = false; + bool ends_with = false; + + // Check for ^ and $ anchors + if (parser.pattern[parser.pos] == '^') { + starts_with = true; + parser.pos++; + } + + MatchResult result = regex_match_pattern(&parser, text); + + if (parser.pattern[parser.pos] == '$') { + ends_with = true; + parser.pos++; + } + + // Check if we consumed the entire pattern + if (parser.pattern[parser.pos] != '\0') { + return false; + } + + // Check anchors + if (starts_with && ends_with) { + return result.matched && (result.length == str_length(text)); + } else if (starts_with) { + return result.matched && (result.length > 0); + } else if (ends_with) { + return result.matched && (text[result.length] == '\0'); + } else { + return result.matched; + } +} + +/* +// Test function +void test_regex(const char *pattern, const char *text, bool expected) { + bool result = regex_simplified_validate(pattern, text); + printf("Pattern: '%-10s'\tText: '%-6s'\tExpected: %-5s\tActual: %-5s\t%s\n", + pattern, text, expected ? "true" : "false", result ? "true" : "false", + (result == expected) ? "✓" : "✗"); +} + +int main() { + // Test cases + printf("Enhanced Regex Validator Tests\n"); + printf("=============================\n"); + + // Basic tests + test_regex("abc", "abc", true); + test_regex("^abc$", "abc", true); + test_regex("^abc$", "abcd", false); + + // Character classes + test_regex("a-z", "a", true); + test_regex("a-z", "z", true); + test_regex("a-z", "A", false); + test_regex("A-Z", "Z", true); + test_regex("A-Z", "a", false); + test_regex("0-9", "5", true); + test_regex("0-9", "a", false); + test_regex("\\d", "5", true); + test_regex("\\d", "a", false); + + // Quantifiers + test_regex("a*", "", true); + test_regex("a*", "a", true); + test_regex("a*", "aaa", true); + test_regex("a+", "", false); + test_regex("a+", "a", true); + test_regex("a+", "aaa", true); + test_regex("a?b", "b", true); + test_regex("a?b", "ab", true); + test_regex("a?b", "aab", false); + + // Groups and alternation + test_regex("(a|b)c", "ac", true); + test_regex("(a|b)c", "bc", true); + test_regex("(a|b)c", "cc", false); + test_regex("(a-z)+", "abc", true); + test_regex("(A-Z)+", "ABC", true); + test_regex("(0-9)+", "123", true); + + // Escape sequences + test_regex("\\.", ".", true); + test_regex("\\.", "a", false); + test_regex("a\\db", "a0b", true); + test_regex("a\\db", "a9b", true); + test_regex("a\\db", "aab", false); + + // Any character + test_regex("a.b", "a b", true); + test_regex("a.b", "a0b", true); + test_regex("a.b", "a\nb", true); + test_regex("a.b", "ab", false); + + // Repetition tests + test_regex("a{2}", "aa", true); + test_regex("a{2}", "a", false); + test_regex("a{2}", "aaa", true); // More than min is allowed + test_regex("a{2,4}", "aa", true); + test_regex("a{2,4}", "aaa", true); + test_regex("a{2,4}", "aaaa", true); + test_regex("a{2,4}", "a", false); + test_regex("a{2,4}", "aaaaa", false); + test_regex("a{2,}", "aa", true); + test_regex("a{2,}", "aaaaa", true); + test_regex("a{2,}", "a", false); + test_regex("(a-z){3}", "abc", true); + test_regex("(a-z){3}", "ab", false); + test_regex("(a-z){2,4}", "ab", true); + test_regex("(a-z){2,4}", "abcd", true); + test_regex("(a-z){2,4}", "abcde", false); + test_regex("\\d{3}-\\d{2}", "123-45", true); + test_regex("\\d{3}-\\d{2}", "12-345", false); + + // Combined tests + test_regex("^a{2}b{1,3}c$", "aabbc", true); + test_regex("^a{2}b{1,3}c$", "aabbbc", true); + test_regex("^a{2}b{1,3}c$", "aabc", true); + test_regex("^a{2}b{1,3}c$", "aabbbbc", false); + test_regex("^a{2}b{1,3}c$", "abbc", false); + + return 0; +} +*/ + +#endif \ No newline at end of file diff --git a/utils/StringUtils.h b/utils/StringUtils.h index 5de0068..917202e 100755 --- a/utils/StringUtils.h +++ b/utils/StringUtils.h @@ -57,6 +57,107 @@ const char* str_find(const char* str, const char* needle) noexcept { return NULL; } +static const unsigned char TO_LOWER_TABLE[256] = { + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, + 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, + 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, + 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F, + 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, + 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F, + 0x40, 'a', 'b', 'c', 'd', 'e', 'f', 'g', + 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', + 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', + 'x', 'y', 'z', 0x5B, 0x5C, 0x5D, 0x5E, 0x5F, + 0x60, 'a', 'b', 'c', 'd', 'e', 'f', 'g', + 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', + 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', + 'x', 'y', 'z', 0x7B, 0x7C, 0x7D, 0x7E, 0x7F, + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, + 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F, + 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, + 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF, + 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, + 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF, + 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, + 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, + 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, + 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, + 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, + 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, + 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, + 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF +}; + +static const unsigned char TO_UPPER_TABLE[256] = { + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, + 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, + 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, + 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F, + 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, + 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F, + 0x40, 'A', 'B', 'C', 'D', 'E', 'F', 'G', + 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', + 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', + 'X', 'Y', 'Z', 0x5B, 0x5C, 0x5D, 0x5E, 0x5F, + 0x60, 'A', 'B', 'C', 'D', 'E', 'F', 'G', + 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', + 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', + 'X', 'Y', 'Z', 0x7B, 0x7C, 0x7D, 0x7E, 0x7F, + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, + 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F, + 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, + 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF, + 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, + 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF, + 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, + 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, + 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, + 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, + 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, + 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, + 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, + 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF +}; + + +inline constexpr +char toupper_ascii(char c) noexcept +{ + return c - 32 * (c >= 'a' && c <= 'z'); +} + +inline +void toupper_ascii(char* str) noexcept +{ + while (*str != '\0') { + *str -= 32 * (*str >= 'a' && *str <= 'z'); + ++str; + } +} + +inline constexpr +char tolower_ascii(char c) noexcept +{ + return c + 32 * (c >= 'A' && c <= 'Z'); +} + +inline +void tolower_ascii(char* str) noexcept +{ + while (*str != '\0') { + *str += 32 * (*str >= 'A' && *str <= 'Z'); + ++str; + } +} + const char* str_find(const char* str, char needle) noexcept { byte target = (byte) needle; @@ -999,36 +1100,6 @@ char* strtok(char* str, const char* __restrict delim, char* *key) noexcept { return result; } -inline constexpr -char toupper_ascii(char c) noexcept -{ - return c - 32 * (c >= 'a' && c <= 'z'); -} - -inline -void toupper_ascii(char* str) noexcept -{ - while (*str != '\0') { - *str -= 32 * (*str >= 'a' && *str <= 'z'); - ++str; - } -} - -inline constexpr -char tolower_ascii(char c) noexcept -{ - return c + 32 * (c >= 'A' && c <= 'Z'); -} - -inline -void tolower_ascii(char* str) noexcept -{ - while (*str != '\0') { - *str += 32 * (*str >= 'A' && *str <= 'Z'); - ++str; - } -} - inline constexpr bool str_contains(const char* haystack, const char* needle) noexcept { @@ -1053,6 +1124,30 @@ bool str_contains(const char* haystack, const char* needle) noexcept return false; } +inline constexpr +bool str_contains(const char* haystack, const char* needle, size_t length) noexcept +{ + while (*haystack != '\0' && length > 0) { + const char* p1 = haystack; + const char* p2 = needle; + + while (*p1 != '\0' && *p2 != '\0' && *p1 == *p2) { + ++p1; + ++p2; + --length; + } + + if (*p2 == '\0') { + return true; + } + + ++haystack; + --length; + } + + return false; +} + inline int32 str_compare(const char* str1, const char* str2) noexcept { @@ -1121,6 +1216,70 @@ int32 str_compare(const char* str1, const char* str2, size_t n) noexcept return c1 - c2; } +inline +int32 str_compare_caseless(const char* str1, const char* str2) noexcept +{ + byte c1, c2; + + do { + c1 = TO_LOWER_TABLE[(byte) *str1++]; + c2 = TO_LOWER_TABLE[(byte) *str2++]; + } while (c1 == c2 && c1 != '\0'); + + return c1 - c2; +} + +int32 str_compare_caseless(const char* str1, const char* str2, size_t n) noexcept +{ + byte c1 = '\0'; + byte c2 = '\0'; + + if (n >= 4) { + size_t n4 = n >> 2; + + do { + c1 = TO_LOWER_TABLE[(byte) *str1++]; + c2 = TO_LOWER_TABLE[(byte) *str2++]; + if (c1 == '\0' || c1 != c2) { + return c1 - c2; + } + + c1 = TO_LOWER_TABLE[(byte) *str1++]; + c2 = TO_LOWER_TABLE[(byte) *str2++]; + if (c1 == '\0' || c1 != c2) { + return c1 - c2; + } + + c1 = TO_LOWER_TABLE[(byte) *str1++]; + c2 = TO_LOWER_TABLE[(byte) *str2++]; + if (c1 == '\0' || c1 != c2) { + return c1 - c2; + } + + c1 = TO_LOWER_TABLE[(byte) *str1++]; + c2 = TO_LOWER_TABLE[(byte) *str2++]; + if (c1 == '\0' || c1 != c2) { + return c1 - c2; + } + } while (--n4 > 0); + + n &= 3; + } + + while (n > 0) { + c1 = TO_LOWER_TABLE[(byte) *str1++]; + c2 = TO_LOWER_TABLE[(byte) *str2++]; + + if (c1 == '\0' || c1 != c2) { + return c1 - c2; + } + + --n; + } + + return c1 - c2; +} + inline constexpr bool str_ends_with(const char* str, const char* suffix) noexcept { if (!str || !suffix) {