diff --git a/asset/Asset.h b/asset/Asset.h new file mode 100644 index 0000000..4949b55 --- /dev/null +++ b/asset/Asset.h @@ -0,0 +1,60 @@ +/** + * Jingga + * + * @copyright Jingga + * @license OMS License 2.0 + * @version 1.0.0 + * @link https://jingga.app + */ +#ifndef TOS_ASSET_H +#define TOS_ASSET_H + +#include "../stdlib/Types.h" + +struct Asset { + // A id of 0 means the entity is no longer alive + // The id is the same as its location in memory/in the ecs array + // This is is only an internal id and NOT the same as a db id (e.g. player id) + uint32 internal_id; + uint32 type; + + // Could be 0 if there is no official id + uint32 official_id; + + uint32 vao; // vertex buffer + uint32 vbo; // index buffer + uint32 ebo; // input layout + + // Counts the references to this entity + // e.g. textures + int reference_count; + + // Describes how much ram/vram the asset uses + // E.g. vram_size = 0 but ram_size > 0 means that it never uses any gpu memory + uint32 ram_size; + uint32 vram_size; + + // Usually 1 but in some cases an ECS may hold entities of variable chunk length + // For textures for example a 128x128 is of size 1 but 256x256 is of size 4 + uint32 size; + + // Describes if the memory is currently available in ram/vram + // E.g. a entity might be uploaded to the gpu and no longer held in ram (or the other way around) + bool is_ram; + bool is_vram; + + Asset* next; + Asset* prev; + + // A entity can reference up to N other entities + // This allows us to quickly update the other entities + // Example: A player pulls N mobs + // @bug This means there are hard limits on how many mobs can be pulled by a player + Asset* entity_references[50]; + bool free_entity_references[50]; + + // Actual memory address + byte* self; +}; + +#endif \ No newline at end of file diff --git a/asset/AssetManagementSystem.h b/asset/AssetManagementSystem.h new file mode 100644 index 0000000..b0b2f4b --- /dev/null +++ b/asset/AssetManagementSystem.h @@ -0,0 +1,108 @@ +/** + * Jingga + * + * @copyright Jingga + * @license OMS License 2.0 + * @version 1.0.0 + * @link https://jingga.app + */ +#ifndef TOS_ASSET_MANAGEMENT_SYSTEM_H +#define TOS_ASSET_MANAGEMENT_SYSTEM_H + +#include +#include "../stdlib/Types.h" +#include "Asset.h" +#include "AssetType.h" +#include "../memory/ChunkMemory.h" +#include "../utils/TestUtils.h" + +// The major asset types should have their own asset component system +// All other entities are grouped together in one asset component system +// @question Asset component systems could be created per region -> easy to simulate a specific region +// @bug This means players might not be able to transition from one area to another?! + +struct AssetManagementSystem { + // The indices of asset_memory and asset_data_memory are always linked + + // General asset memory + ChunkMemory asset_memory; + + // Actual asset data + ChunkMemory asset_data_memory; + + Asset* first; + Asset* last; +}; + +int ams_get_vram_usage(AssetManagementSystem* ams) +{ + uint64 size = 0; + for (int i = 0; i < ams->asset_memory.count; ++i) { + size += ((Asset *) (ams->asset_memory.memory))[i].vram_size; + } + + return size; +} + +void asset_delete(AssetManagementSystem* ams, Asset* asset) +{ + asset->prev->next = asset->next; + asset->next->prev = asset->prev; + + for (int i = 0; i < asset->size; ++i) { + chunk_element_free(&ams->asset_memory, asset->internal_id + i); + chunk_element_free(&ams->asset_data_memory, asset->internal_id + i); + } +} + +// @todo implement defragment command to optimize memory layout since the memory layout will become fragmented over time + +Asset* asset_reserve(AssetManagementSystem* ams, uint64 elements = 1) +{ + int64 free_asset = chunk_reserve(&ams->asset_memory, elements, true); + ASSERT_SIMPLE(free_asset >= 0); + + chunk_reserve_index(&ams->asset_data_memory, free_asset, elements, true); + + Asset* asset = (Asset *) chunk_get_memory(&ams->asset_memory, free_asset); + asset->internal_id = free_asset; + asset->self = chunk_get_memory(&ams->asset_data_memory, free_asset); + asset->ram_size = ams->asset_memory.chunk_size * elements; + + // @performance Do we really want a double linked list. Are we really using this feature or is the free_index enough? + if (free_asset > 0 && free_asset < ams->asset_memory.count - 1) { + Asset* next = ams->first; + while (next->next->internal_id < asset->internal_id && next->internal_id < ams->asset_memory.count) { + next = next->next; + } + + asset->prev = next; + asset->next = asset->prev->next; + + if (asset->next) { + asset->next->prev = asset; + } else { + ams->last = asset; + } + + asset->prev->next = asset; + } else if (free_asset == 0) { + asset->next = ams->first; + + if (ams->first) { + ams->first->prev = asset; + } + + ams->first = asset; + } else if (free_asset == ams->asset_memory.count - 1) { + asset->prev = ams->last; + + // WARNING: no if here because we assume there is no ECS with just a size of 1 + ams->last->next = asset; + ams->last = asset; + } + + return asset; +} + +#endif \ No newline at end of file diff --git a/asset/AssetType.h b/asset/AssetType.h new file mode 100644 index 0000000..74a7ca5 --- /dev/null +++ b/asset/AssetType.h @@ -0,0 +1,20 @@ +/** + * Jingga + * + * @copyright Jingga + * @license OMS License 2.0 + * @version 1.0.0 + * @link https://jingga.app + */ +#ifndef TOS_ASSET_TYPES_H +#define TOS_ASSET_TYPES_H + +enum AssetType { + ASSET_TYPE_OBJ, + ASSET_TYPE_TEXTURE, + ASSET_TYPE_AUDIO, + ASSET_TYPE_ANIM, + ASSET_TYPE_SIZE +}; + +#endif \ No newline at end of file diff --git a/audio/Audio.h b/audio/Audio.h new file mode 100644 index 0000000..4772c4e --- /dev/null +++ b/audio/Audio.h @@ -0,0 +1,25 @@ +/** + * Jingga + * + * @copyright Jingga + * @license OMS License 2.0 + * @version 1.0.0 + * @link https://jingga.app + */ +#ifndef TOS_AUDIO_H +#define TOS_AUDIO_H + +#include "../stdlib/Types.h" + +struct Audio { + uint32 sample_rate; // bits_per_sample + uint32 sample_size; // byte_per_bloc + uint32 frequency; + uint32 channels; + uint32 bloc_size; + uint32 byte_per_sec; + uint32 size; + byte* data; // owner of data +}; + +#endif diff --git a/audio/Wav.h b/audio/Wav.h new file mode 100644 index 0000000..d26889d --- /dev/null +++ b/audio/Wav.h @@ -0,0 +1,153 @@ +/** + * Jingga + * + * @copyright Jingga + * @license OMS License 2.0 + * @version 1.0.0 + * @link https://jingga.app + */ +#ifndef TOS_AUDIO_WAV_H +#define TOS_AUDIO_WAV_H + +#include +#include "../stdlib/Types.h" +#include "../utils/Utils.h" +#include "../utils/EndianUtils.h" +#include "Audio.h" + +// See: https://en.wikipedia.org/wiki/WAV +// IMPORTANT: Remember that we are not using packing for the headers +// Because of that the struct size is different from the actual header size in the file +// This means we have to manually asign the data to the headers + +// Packed header size +#define WAV_HEADER_SIZE 44 +struct WavHeader { + // RIFF header + byte file_type_bloc_id[4]; + uint32 size; + byte file_format_id[4]; + + // Data format header + byte format_bloc_id[4]; + uint32 bloc_size; + uint16 audio_format; + uint16 nbr_channels; + uint32 frequency; + uint32 byte_per_sec; + uint16 byte_per_bloc; + uint16 bits_per_sample; + + // Sample data header + byte data_bloc_id[4]; + uint32 data_size; +}; + +struct Wav { + WavHeader header; + + byte* sample_data; // WARNING: This is not the owner of the data. The owner is the FileBody + + uint32 size; + byte* data; // WARNING: This is not the owner of the data. The owner is the FileBody +}; + +void generate_default_wav_references(const FileBody* file, Wav* wav) +{ + wav->size = (uint32) file->size; + wav->data = file->content; + + if (wav->size < WAV_HEADER_SIZE) { + // This shouldn't happen + return; + } + + // Check if we can copy memory directly + // The struct layout and header size should match on x86, but we still check it + if (sizeof(WavHeader) == WAV_HEADER_SIZE) { + memcpy(&wav->header, file->content, WAV_HEADER_SIZE); + + // swap endian if we are on big endian system + // @question Maybe this needs to be a runtime check? + #if !_WIN32 && !__LITTLE_ENDIAN + wav->header.size = SWAP_ENDIAN_LITTLE(wav->header.size); + wav->header.bloc_size = SWAP_ENDIAN_LITTLE(wav->header.bloc_size); + wav->header.audio_format = SWAP_ENDIAN_LITTLE(wav->header.audio_format); + wav->header.nbr_channels = SWAP_ENDIAN_LITTLE(wav->header.nbr_channels); + wav->header.frequency = SWAP_ENDIAN_LITTLE(wav->header.frequency); + wav->header.byte_per_sec = SWAP_ENDIAN_LITTLE(wav->header.byte_per_sec); + wav->header.byte_per_bloc = SWAP_ENDIAN_LITTLE(wav->header.byte_per_bloc); + wav->header.bits_per_sample = SWAP_ENDIAN_LITTLE(wav->header.bits_per_sample); + wav->header.data_size = SWAP_ENDIAN_LITTLE(wav->header.data_size); + #endif + } else { + // RIFF header + wav->header.file_type_bloc_id[0] = *(wav->data + 0); + wav->header.file_type_bloc_id[1] = *(wav->data + 1); + wav->header.file_type_bloc_id[2] = *(wav->data + 2); + wav->header.file_type_bloc_id[3] = *(wav->data + 3); + // should be (0x52, 0x49, 0x46, 0x46) + + wav->header.size = *(wav->data + 4); + SWAP_ENDIAN_LITTLE(&wav->header.size); + // should be file size - 8 bytes + + wav->header.file_format_id[0] = *(wav->data + 8); + wav->header.file_format_id[1] = *(wav->data + 9); + wav->header.file_format_id[2] = *(wav->data + 10); + wav->header.file_format_id[3] = *(wav->data + 11); + // should be (0x57, 0x41, 0x56, 0x45) + + // Data format header + wav->header.format_bloc_id[0] = *(wav->data + 12); + wav->header.format_bloc_id[1] = *(wav->data + 13); + wav->header.format_bloc_id[2] = *(wav->data + 14); + wav->header.format_bloc_id[3] = *(wav->data + 15); + // should be (0x66, 0x6D, 0x74, 0x20) + + wav->header.bloc_size = SWAP_ENDIAN_LITTLE(*((uint32 *) (wav->data + 16))); + // should be 16 + + wav->header.audio_format = SWAP_ENDIAN_LITTLE(*((uint16 *) (wav->data + 20))); + wav->header.nbr_channels = SWAP_ENDIAN_LITTLE(*((uint16 *) (wav->data + 22))); + wav->header.frequency = SWAP_ENDIAN_LITTLE(*((uint32 *) (wav->data + 24))); + + wav->header.byte_per_sec = SWAP_ENDIAN_LITTLE(*((uint32 *) (wav->data + 28))); + // should be frequency * byte_per_bloc + + wav->header.byte_per_bloc = SWAP_ENDIAN_LITTLE(*((uint16 *) (wav->data + 32))); + // should be nbr channels * bits_per_sample / 8 + + wav->header.bits_per_sample = SWAP_ENDIAN_LITTLE(*((uint16 *) (wav->data + 34))); + + // Sample data header + wav->header.data_bloc_id[0] = *(wav->data + 36); + wav->header.data_bloc_id[1] = *(wav->data + 37); + wav->header.data_bloc_id[2] = *(wav->data + 38); + wav->header.data_bloc_id[3] = *(wav->data + 39); + + wav->header.data_size = SWAP_ENDIAN_LITTLE(*((uint32 *) *(wav->data + 40))); + } + + wav->sample_data = wav->data + WAV_HEADER_SIZE; +} + +void generate_wav_image(const FileBody* src_data, Audio* audio) +{ + // @performance We are generating the struct and then filling the data. + // There is some asignment/copy overhead + Wav src = {}; + generate_default_wav_references(src_data, &src); + + audio->sample_rate = src.header.bits_per_sample; + audio->sample_size = src.header.byte_per_bloc; + audio->frequency = src.header.frequency; + audio->channels = src.header.nbr_channels; + audio->byte_per_sec = src.header.byte_per_sec; + audio->bloc_size = src.header.bloc_size; + audio->size = src.size - WAV_HEADER_SIZE; + + memcpy((void *) audio->data, src.sample_data, audio->size); +} + +#endif \ No newline at end of file diff --git a/gpuapi/RenderUtils.h b/gpuapi/RenderUtils.h index f55fc3d..059a3d2 100644 --- a/gpuapi/RenderUtils.h +++ b/gpuapi/RenderUtils.h @@ -156,11 +156,13 @@ void entity_clip_space_from_local_sse(float* clip_space, const float* local_spac mat4vec4_mult_sse(mat, local_space, clip_space); } +/* inline void entity_screen_space(float* screen_space, const float* clip_space, const float* viewport_mat) { // @todo implement } +*/ inline void entity_world_space_sse(float* world_space, const float* local_space, const float* model_mat) @@ -180,11 +182,13 @@ void entity_clip_space_sse(float* clip_space, const float* view_space, const flo mat4vec4_mult_sse(projection_mat, view_space, clip_space); } +/* inline void entity_screen_space_sse(float* screen_space, const float* clip_space, const float* viewport_mat) { // @todo implement } +*/ inline void entity_world_space_sse(__m128* world_space, const __m128* local_space, const __m128* model_mat) @@ -204,10 +208,12 @@ void entity_clip_space_sse(__m128* clip_space, const __m128* view_space, const _ mat4vec4_mult_sse(projection_mat, view_space, clip_space); } +/* inline void entity_screen_space_sse(__m128* screen_space, const __m128* clip_space, const __m128* viewport_mat) { // @todo implement } +*/ #endif \ No newline at end of file diff --git a/gpuapi/opengl/ShaterUtils.h b/gpuapi/opengl/ShaderUtils.h similarity index 100% rename from gpuapi/opengl/ShaterUtils.h rename to gpuapi/opengl/ShaderUtils.h diff --git a/gpuapi/opengl/UtilsOpengl.h b/gpuapi/opengl/UtilsOpengl.h index 991ace7..175ecc6 100644 --- a/gpuapi/opengl/UtilsOpengl.h +++ b/gpuapi/opengl/UtilsOpengl.h @@ -10,7 +10,8 @@ #define TOS_GPUAPI_OPENGL_UTILS_H #include "../../stdlib/Types.h" -#include "../../utils/RingMemory.h" +#include "../../memory/RingMemory.h" +#include "../../utils/TestUtils.h" #include "../../models/Attrib.h" #include "../../models/Texture.h" @@ -55,6 +56,8 @@ void window_create(Window* window, void*) NULL ); + ASSERT_SIMPLE(window->hwnd_lib); + //glfwSetInputMode(window->hwnd_lib, GLFW_CURSOR, GLFW_CURSOR_DISABLED); glfwMakeContextCurrent(window->hwnd_lib); @@ -126,24 +129,26 @@ void prepare_texture(TextureFile* texture, uint32 texture_unit) } inline -void load_texture_to_gpu(const TextureFile* texture) +void load_texture_to_gpu(const TextureFile* texture, int mipmap_level = 0) { uint32 texture_data_type = get_texture_data_type(texture->texture_data_type); glTexImage2D( - texture_data_type, 0, GL_RGBA, + texture_data_type, mipmap_level, GL_RGBA, texture->image.width, texture->image.height, 0, GL_RGBA, GL_UNSIGNED_BYTE, texture->image.pixels ); - // @question use mipmap? + if (mipmap_level > -1) { + glGenerateMipmap(GL_TEXTURE_2D); + } } inline void texture_use(const TextureFile* texture, uint32 texture_unit) { glActiveTexture(GL_TEXTURE0 + texture_unit); - glBindTexture(GL_TEXTURE_2D, texture->id); + glBindTexture(GL_TEXTURE_2D, (GLuint) texture->id); } GLuint make_shader(GLenum type, const char *source, RingMemory* ring) @@ -173,7 +178,7 @@ GLuint load_shader(GLenum type, const char *path, RingMemory* ring) { uint64 temp = ring->pos; // @bug potential bug for shaders > 4 mb - file_body file; + FileBody file; file.content = ring_get_memory(ring, MEGABYTE * 4); // @todo consider to accept file as parameter and load file before @@ -357,6 +362,21 @@ void gpuapi_buffer_delete(GLuint buffer) glDeleteBuffers(1, &buffer); } +int get_gpu_free_memory() +{ + GLint available = 0; + glGetIntegerv(GL_GPU_MEMORY_INFO_CURRENT_AVAILABLE_VIDMEM_NVX, &available); + + if (available != 0) { + return available; + } + + glGetIntegerv(GL_TEXTURE_FREE_MEMORY_ATI, &available); + + return available; +} + +/* void render_9_patch(GLuint texture, int imgWidth, int imgHeight, int img_x1, int img_x2, @@ -367,5 +387,6 @@ void render_9_patch(GLuint texture, { } +*/ #endif \ No newline at end of file diff --git a/image/Bitmap.h b/image/Bitmap.h index af0bb76..389a71a 100644 --- a/image/Bitmap.h +++ b/image/Bitmap.h @@ -169,17 +169,22 @@ struct Bitmap { // 2. rows are padded in multiples of 4 bytes // 3. rows start from the bottom (unless the height is negative) // 4. pixel data is stored in ABGR (graphics libraries usually need BGRA or RGBA) - byte* pixels; + byte* pixels; // WARNING: This is not the owner of the data. The owner is the FileBody uint32 size; - byte* data; + byte* data; // WARNING: This is not the owner of the data. The owner is the FileBody }; -void generate_default_bitmap_references(const file_body* file, Bitmap* bitmap) +void generate_default_bitmap_references(const FileBody* file, Bitmap* bitmap) { - bitmap->size = file->size; + bitmap->size = (uint32) file->size; bitmap->data = file->content; + if (bitmap->size < BITMAP_HEADER_SIZE) { + // This shouldn't happen + return; + } + // Fill header bitmap->header.identifier[0] = *(file->content + 0); bitmap->header.identifier[1] = *(file->content + 1); @@ -241,8 +246,10 @@ void generate_default_bitmap_references(const file_body* file, Bitmap* bitmap) bitmap->pixels = (byte *) (file->content + bitmap->header.offset); } -void generate_bmp_image(const file_body* src_data, Image* image) +void image_bmp_generate(const FileBody* src_data, Image* image) { + // @performance We are generating the struct and then filling the data. + // There is some asignment/copy overhead Bitmap src = {}; generate_default_bitmap_references(src_data, &src); @@ -254,7 +261,7 @@ void generate_bmp_image(const file_body* src_data, Image* image) uint32 width = ROUND_TO_NEAREST(src.dib_header.width, 4); uint32 pixel_bytes = src.dib_header.bits_per_pixel / 8; - if (image->order_pixels = IMAGE_PIXEL_ORDER_BGRA) { + if (image->order_pixels == IMAGE_PIXEL_ORDER_BGRA) { memcpy((void *) image->pixels, src.pixels, image->length * pixel_bytes); return; diff --git a/image/Image.cpp b/image/Image.cpp new file mode 100644 index 0000000..6fc4ad2 --- /dev/null +++ b/image/Image.cpp @@ -0,0 +1,44 @@ +/** + * Jingga + * + * @copyright Jingga + * @license OMS License 2.0 + * @version 1.0.0 + * @link https://jingga.app + */ +#ifndef TOS_IMAGE_C +#define TOS_IMAGE_C + +#include "../utils/StringUtils.h" +#include "Image.h" +#include "Tga.h" +#include "Bitmap.h" +#include "Png.h" +#include "../memory/RingMemory.h" + +#if _WIN32 + #include "../platform/win32/UtilsWin32.h" +#else + #include "../platform/linux/UtilsLinux.h" +#endif + +void image_from_file(RingMemory* ring, const char* path, Image* image) +{ + char full_path[MAX_PATH]; + if (*path == '.') { + relative_to_absolute(path, full_path); + } + + FileBody file; + file_read(full_path, &file, ring); + + if (str_ends_with(path, ".png")) { + image_png_generate(&file, image); + } else if (str_ends_with(path, ".tga")) { + image_tga_generate(&file, image); + } else if (str_ends_with(path, ".bmp")) { + image_bmp_generate(&file, image); + } +} + +#endif \ No newline at end of file diff --git a/image/Image.h b/image/Image.h index a6719c9..7e4bfd8 100644 --- a/image/Image.h +++ b/image/Image.h @@ -26,7 +26,7 @@ struct Image { byte order_pixels; // RGBA vs BGRA byte order_rows; // top-to-bottom vs bottom-to-top - uint32* pixels; + uint32* pixels; // owner of data }; #endif \ No newline at end of file diff --git a/image/Png.h b/image/Png.h new file mode 100644 index 0000000..d70fa66 --- /dev/null +++ b/image/Png.h @@ -0,0 +1,469 @@ +/** + * Jingga + * + * @copyright Jingga + * @license OMS License 2.0 + * @version 1.0.0 + * @link https://jingga.app + * + * png: https://www.w3.org/TR/2003/REC-PNG-20031110/ + * zlib: https://www.ietf.org/rfc/rfc1950.txt + * deflate: https://www.ietf.org/rfc/rfc1951.txt + */ +#ifndef TOS_IMAGE_PNG_H +#define TOS_IMAGE_PNG_H + +#include +#include "../stdlib/Types.h" +#include "../utils/Utils.h" +#include "../utils/EndianUtils.h" +#include "Image.h" + +// Packed header size +#define PNG_HEADER_SIZE 8 + +struct PngHeader { + byte signature[8]; +}; + +struct PngChunk { + uint32 length; + uint32 type; + uint32 crc; +}; + +struct PngIHDR { + uint32 length; + uint32 type; + uint32 width; + uint32 height; + byte bit_depth; + byte colory_type; + byte compression; + byte filter; + byte interlace; + uint32 crc; +}; + +struct PngIDATHeader { + byte zlib_method_flag; + byte add_flag; +}; + +struct Png { + PngHeader header; + PngIHDR ihdr; + + // Encoded pixel data + byte* pixels; // WARNING: This is not the owner of the data. The owner is the FileBody + + uint32 size; + byte* data; // WARNING: This is not the owner of the data. The owner is the FileBody +}; + +struct PngHuffmanEntry { + uint16 symbol; + uint16 bits_used; +}; + +struct PngHuffman { + uint32 max_code_length; // in bits + uint32 count; + PngHuffmanEntry entries[32768]; // 2^15 +}; + +static const byte PNG_SIGNATURE[] = {0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A}; +static const uint32 HUFFMAN_BIT_COUNTS[][2] = {{143, 8}, {255, 9}, {279, 7}, {287, 8}, {319, 5}}; +static const uint32 HUFFMAN_CODE_LENGTH_ALPHA[] = { + 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 +}; +static const PngHuffmanEntry PNG_LENGTH_EXTRA[] = { + {3, 0}, {4, 0}, {5, 0}, {6, 0}, {7, 0}, {8, 0}, {9, 0}, {10, 0}, {11, 1}, + {13, 1}, {15, 1}, {17, 1}, {19, 2}, {23, 2}, {27, 2}, {31, 2}, {35, 3}, + {43, 3}, {51, 3}, {59, 3}, {67, 4}, {83, 4}, {99, 4}, {115, 4}, {131, 5}, + {163, 5}, {195, 5}, {227, 5}, {258, 0} +}; + +static const PngHuffmanEntry PNG_DIST_EXTRA[] = { + {1, 0}, {2, 0}, {3, 0}, {4, 0}, {5, 1}, {7, 1}, {9, 2}, {13, 2}, {17, 3}, + {25, 3}, {33, 4}, {49, 4}, {65, 5}, {97, 5}, {129, 6}, {193, 6}, {257, 7}, + {385, 7}, {513, 8}, {769, 8}, {1025, 9}, {1537, 9}, {2049, 10}, {3073, 10}, + {4097, 11}, {6145, 11}, {8193, 12}, {12289, 12}, {16385, 13}, {24577, 13} +}; + +void huffman_png_compute(uint32 symbol_count, uint32* symbol_code_length, PngHuffman* huff) +{ + uint32 code_length_hist[16] = {}; + for (uint32 i = 0; i < symbol_count; ++i) { + ++code_length_hist[symbol_code_length[i]]; + } + + uint32 next_unused_code[16]; + next_unused_code[0] = 0; + code_length_hist[0] = 0; + + for (uint32 i = 1; i < 16; ++i) { + next_unused_code[i] = (next_unused_code[i - 1] + code_length_hist[i - 1]) << 1; + } + + for (uint32 i = 0; i < symbol_count; ++i) { + uint32 code_length = symbol_code_length[i]; + if (!code_length) { + continue; + } + + uint32 code = next_unused_code[code_length]++; + uint32 bits = huff->max_code_length - code_length; + uint32 entries = 1 << bits; + + for (uint32 j = 0; j < entries; ++j) { + uint32 base_index = (code << bits) | j; + uint32 index = reverse_bits(base_index, huff->max_code_length); + + PngHuffmanEntry* entry = huff->entries + index; + + entry->bits_used = (uint16) code_length; + entry->symbol = (uint16) i; + } + } +} + +PngHuffmanEntry huffman_png_decode(PngHuffman* huff, const byte* data, int pos) +{ + uint32 index = get_bits(data, huff->max_code_length, pos); + return huff->entries[index]; +} + +void png_filter_reconstruct(uint32 width, uint32 height, const byte* decompressed, byte* finalized, int steps) +{ + uint32 zero = 0; + byte* prev_row = NULL; + byte prev_row_advance = 0; + + for (uint32 y = 0; y < height; ++y) { + byte filter = *decompressed; + byte* current_row = ; + + switch (filter) { + case 0: { + memcpy(finalized + y * width, decompressed + y * width, width); + } break; + case 1: { + // no simd possible, well 4 + 4 probably not worth it + + } break; + case 2: { + // requires manual simd impl. since prev_row_advance can be 0 or 4 + } break; + case 3: { + // no simd possible, well 4 + 4 probably not worth it + } break; + case 4: { + // no simd possible, well 4 + 4 probably not worth it + } break; + default: { + + } + } + + prev_row = current_row; + prev_row_advance = 4; + } +} + +void generate_default_png_references(const FileBody* file, Png* png) +{ + png->size = (uint32) file->size; + png->data = file->content; + + if (png->size < 33) { + // This shouldn't happen + return; + } + + // The first chunk MUST be IHDR -> we handle it here + memcpy(png, file->content, 29); + png->ihdr.crc = SWAP_ENDIAN_BIG((uint32 *) (file->content + 30)); + + png->ihdr.length = SWAP_ENDIAN_BIG(&png->ihdr.length); + png->ihdr.type = SWAP_ENDIAN_BIG(&png->ihdr.type); + png->ihdr.width = SWAP_ENDIAN_BIG(&png->ihdr.width); + png->ihdr.height = SWAP_ENDIAN_BIG(&png->ihdr.height); +} + +bool image_png_generate(const FileBody* src_data, Image* image, int steps = 8) +{ + // @performance We are generating the struct and then filling the data. + // There is some asignment/copy overhead + Png src = {}; + generate_default_png_references(src_data, &src); + + // @todo We probably need the following buffers + // 1. file buffer (already here) + // 2. block buffer + // 3. temp pixel buffer (larger) + // 4. final pixel buffer (already here) + + if (src.ihdr.bit_depth != 8 + || src.ihdr.colory_type != 6 + || src.ihdr.compression != 0 + || src.ihdr.filter != 0 + || src.ihdr.interlace != 0 + ) { + // We don't support this type of png + return false; + } + + PngChunk chunk; + PngIDATHeader idat_header; + + bool is_first_idat = true; + + uint32 out_pos = 0; + + // @question the following is a lot of data, should this be moved to heap? + uint32 literal_length_dist_table[512]; + + PngHuffman literal_length_huffman; + literal_length_huffman.max_code_length = 15; + literal_length_huffman.count = 1 << literal_length_huffman.max_code_length; + + PngHuffman distance_huffman; + distance_huffman.max_code_length = 15; + distance_huffman.count = 1 << distance_huffman.max_code_length; + + PngHuffman dictionary_huffman; + dictionary_huffman.max_code_length = 7; + dictionary_huffman.count = 1 << dictionary_huffman.max_code_length; + + // i is the current byte to read + int i = 33; + + // r is the re-shift value in case we need to go back + int r = 0; + + // b is the current bit to read + int b = 0; + + while(i < src.size) { + chunk.length = SWAP_ENDIAN_BIG((uint32 *) (src_data->content + i)); + chunk.type = SWAP_ENDIAN_BIG((uint32 *) (src_data->content + i + 4)); + + // For our png reader, we only care about IDAT + // @question consider PLTE, tRNS, gAMA, iCCP + if (chunk.type == 'IEND') { + break; + } else if (chunk.type != 'IDAT') { + // IDAT chunks are continuous and we don't care for anything else + if (!is_first_idat) { + break; + } + + i += chunk.length + 12; + continue; + } + + if (is_first_idat) { + idat_header.zlib_method_flag = *(src_data->content + i + 8); + idat_header.add_flag = *(src_data->content + i + 9); + + byte CM = idat_header.zlib_method_flag & 0xF; + byte FDICT = (idat_header.add_flag >> 5) & 0x1; + + is_first_idat = false; + + if (CM != 8 || FDICT != 0) { + return false; + } + + i += 10; + } + + // @bug The algorithm below works on "blocks". + // Could it be possible that a block is spread accross 2 IDAT chunks? + // If so this would be bad and break the code below + // We could solve this by just having another counting variable and jump to the next block + + // start: src_data->content + i + 8 + // end: src_data->content + i + 8 + length - 1 + + // DEFLATE Algorithm + // @bug the following 3 lines are wrong, they don't have to start at a bit 0/1 + // A block doesn't have to start at an byte boundary + byte BFINAL = get_bits(src_data->content + i, 1, b); + i += (b > 7 - 1); + b = (b + 1) & 7; + + byte BTYPE = get_bits(src_data->content + i, 2, b); + i += (b > 7 - 2); + b = (b + 2) & 7; + + if (BTYPE == 0) { + // starts at byte boundary -> position = +1 of previous byte + if (b == 0) { + i -= 1; + } + + uint16 len = *((uint16 *) (src_data->content + i + 1)); + uint16 nlen = *((uint16 *) (src_data->content + i + 3)); + + memcpy(image->pixels + out_pos, src_data->content + i + 5, len); + out_pos += len; + + i += 5 + len; + b = 0; + } else { + // @question is this even required or are we overwriting anyways? + memset(&literal_length_dist_table, 0, 512 * 4); + memset(&literal_length_huffman.entries, 0, sizeof(PngHuffmanEntry) * 15); + memset(&distance_huffman.entries, 0, sizeof(PngHuffmanEntry) * 15); + memset(&dictionary_huffman.entries, 0, sizeof(PngHuffmanEntry) * 7); + + uint32 huffman_literal = 0; + uint32 huffman_dist = 0; + + if (BTYPE == 2) { + // Compressed with dynamic Huffman code + huffman_literal = get_bits(src_data->content + i, 5, b); + i += (b > 7 - 5); + b = (b + 5) & 7; + + huffman_dist = get_bits(src_data->content + i, 5, b); + i += (b > 7 - 5); + b = (b + 5) & 7; + + uint32 huffman_code_length = get_bits(src_data->content + i, 4, b); + i += (b > 7 - 4); + b = (b + 4) & 7; + + huffman_literal += 257; + huffman_dist += 1; + huffman_code_length += 4; + + uint32 huffman_code_length_table[19] = {}; + + for (uint32 j = 0; j < huffman_code_length; ++j) { + huffman_code_length_table[HUFFMAN_CODE_LENGTH_ALPHA[j]] = get_bits(src_data->content + i, 3, b); + i += (b > 7 - 3); + b = (b + 3) & 7; + } + + huffman_png_compute(19, huffman_code_length_table, &dictionary_huffman); + + uint32 literal_length_count = 0; + uint32 length_count = huffman_literal + huffman_dist; + + while (literal_length_count < length_count) { + // @todo implement + uint32 rep_count = 1; + uint32 rep_val = 0; + + PngHuffmanEntry dict = huffman_png_decode(&dictionary_huffman, src_data->content + i, b); + i += (b + dict.bits_used) / 8; + b = (b + dict.bits_used) & 7; + + uint32 encoded_length = dict.bits_used; + + if (encoded_length <= 15) { + rep_val = encoded_length; + } else if (encoded_length == 16) { + rep_count = 3 + get_bits(src_data->content + i, 2, b); + i += (b > 7 - 2); + b = (b + 2) & 7; + + rep_val = literal_length_dist_table[literal_length_count - 1]; + } else if (encoded_length == 17) { + rep_count = 3 + get_bits(src_data->content + i, 3, b); + i += (b > 7 - 3); + b = (b + 3) & 7; + } else if (encoded_length == 18) { + rep_count = 11 + get_bits(src_data->content + i, 7, b); + i += (b > 7 - 7); + b = (b + 7) & 7; + } + + memset(literal_length_dist_table + literal_length_count, rep_val, rep_count); + } + } else if (BTYPE == 1) { + // Compressed with fixed Huffman code + huffman_literal = 288; + huffman_dist = 32; + + uint32 bit_index = 0; + for(uint32 range_index = 0; range_index < 5; ++range_index) { + uint32 bit_count = HUFFMAN_BIT_COUNTS[range_index][1]; + uint32 last = HUFFMAN_BIT_COUNTS[range_index][0]; + + while(bit_index <= last) { + literal_length_dist_table[bit_index++] = bit_count; + } + } + } + + huffman_png_compute(huffman_literal, literal_length_dist_table, &literal_length_huffman); + huffman_png_compute(huffman_dist, literal_length_dist_table + huffman_literal, &distance_huffman); + + while (true) { + PngHuffmanEntry literal = huffman_png_decode(&literal_length_huffman, src_data->content + i, b); + i += (b + literal.bits_used) / 8; + b = (b + literal.bits_used) & 7; + + uint32 literal_length = literal.bits_used; + + if (literal_length == 256) { + break; + } + + if (literal_length <= 255) { + *(image->pixels + out_pos) = (byte) (literal_length & 0xFF); + ++out_pos; + } else { + uint32 length_tab_index = literal_length - 257; + PngHuffmanEntry length_tab = PNG_LENGTH_EXTRA[length_tab_index]; + uint32 length = length_tab.symbol; + + if (length_tab.bits_used) { + uint32 extra_bits = get_bits(src_data->content + i, length_tab.bits_used, b); + i += (b + length_tab.bits_used) / 8; + b = (b + length_tab.bits_used) & 7; + + length += extra_bits; + } + + PngHuffmanEntry tab = huffman_png_decode(&distance_huffman, src_data->content + i, b); + i += (b + tab.bits_used) / 8; + b = (b + tab.bits_used) & 7; + + uint32 dist_tab_index = tab.bits_used; + + PngHuffmanEntry dist_tab = PNG_DIST_EXTRA[dist_tab_index]; + uint32 dist = dist_tab.symbol; + + if (dist_tab.bits_used) { + uint32 extra_bits = get_bits(src_data->content + i, dist_tab.bits_used, b); + i += (b + dist_tab.bits_used) / 8; + b = (b + dist_tab.bits_used) & 7; + + dist += extra_bits; + } + + memcpy(image->pixels + out_pos, image->pixels + out_pos - dist, length); + } + } + } + + if (BFINAL == 0) { + break; + } + } + + image->width = src.ihdr.width; + image->height = src.ihdr.height; + + // @todo fix pixels parameter + png_filter_reconstruct(image->width, image->height, image->pixels, image->pixels, steps); + + return true; +} + +#endif \ No newline at end of file diff --git a/image/Tga.h b/image/Tga.h index 9f39def..f4b38b9 100644 --- a/image/Tga.h +++ b/image/Tga.h @@ -42,14 +42,22 @@ struct TgaHeader { struct Tga { TgaHeader header; - byte* pixels; + byte* pixels; // WARNING: This is not the owner of the data. The owner is the FileBody uint32 size; - byte* data; + byte* data; // WARNING: This is not the owner of the data. The owner is the FileBody }; -void generate_default_tga_references(const file_body* file, Tga* tga) +void generate_default_tga_references(const FileBody* file, Tga* tga) { + tga->size = (uint32) file->size; + tga->data = file->content; + + if (tga->size < TGA_HEADER_SIZE) { + // This shouldn't happen + return; + } + tga->header.id_length = file->content[0]; tga->header.color_map_type = file->content[1]; tga->header.image_type = file->content[2]; @@ -68,8 +76,10 @@ void generate_default_tga_references(const file_body* file, Tga* tga) + tga->header.color_map_length * (tga->header.color_map_bits / 8); // can be 0 } -void generate_tga_image(const file_body* src_data, Image* image) +void image_tga_generate(const FileBody* src_data, Image* image) { + // @performance We are generating the struct and then filling the data. + // There is some asignment/copy overhead Tga src = {}; generate_default_tga_references(src_data, &src); diff --git a/image/default_colors.h b/image/default_colors.h index 2b72213..9c336a6 100644 --- a/image/default_colors.h +++ b/image/default_colors.h @@ -31,6 +31,6 @@ const int default_colors_256[256] = { 0xE1D4FF, 0xD8ACFF, 0xCD9BFF, 0xC88DFA, 0xBD8AF9, 0xB160FF, 0xAA52FE, 0x9841FD, 0x8726FF, 0x8700F5, 0x7200F4, 0x5C00B7, 0x460489, 0x350077, 0x28004F, 0x1c0037, 0xFFC7FF, 0xFFB2FF, 0xFF9AFF, 0xF181F1, 0xFB6FFD, 0xF850FB, 0xFB46FF, 0xF91FFF, 0xF900FF, 0xDD00E6, 0xBF00C7, 0x9B0199, 0xB70090, 0x670362, 0x4F0153, 0x330035, 0xFDD2E6, 0xF9B5DA, 0xF7A4D4, 0xF198CB, 0xF682BD, 0xFF5FAE, 0xFF4CA9, 0xFF3CA4, 0xFF1A94, 0xF90979, 0xE80071, 0xC40061, 0x96004A, 0x670132, 0x4F0024, 0x310016 -} +}; #endif \ No newline at end of file diff --git a/input/Input.h b/input/Input.h index 4b84bc9..b2b41fa 100644 --- a/input/Input.h +++ b/input/Input.h @@ -43,7 +43,10 @@ struct InputState { // We only consider up to 4 pressed keys // Depending on the keyboard you may only be able to detect a limited amount of key presses anyway + int up_index; uint16 keys_down_old[MAX_KEY_PRESSES]; + + int down_index; uint16 keys_down[MAX_KEY_PRESSES]; // Mouse diff --git a/math/matrix/MatrixFloat32.h b/math/matrix/MatrixFloat32.h index 3e91dc0..97de20e 100644 --- a/math/matrix/MatrixFloat32.h +++ b/math/matrix/MatrixFloat32.h @@ -13,21 +13,21 @@ #include "../../stdlib/Mathtypes.h" #include "../../utils/MathUtils.h" -void mat3_identity_f32(float* matrix) +void mat3_identity(float* matrix) { matrix[0] = 1.0f; matrix[1] = 0.0f; matrix[2] = 0.0f; matrix[3] = 0.0f; matrix[4] = 1.0f; matrix[5] = 0.0f; matrix[6] = 0.0f; matrix[7] = 0.0f; matrix[8] = 1.0f; } -void mat3_identity_f32(__m128* matrix) +void mat3_identity(__m128* matrix) { matrix[0] = _mm_set_ps(1.0f, 0.0f, 0.0f, 0.0f); matrix[1] = _mm_set_ps(0.0f, 1.0f, 0.0f, 0.0f); matrix[2] = _mm_set_ps(0.0f, 0.0f, 1.0f, 0.0f); } -void mat4_identity_f32(float* matrix) +void mat4_identity(float* matrix) { matrix[0] = 1.0f; matrix[1] = 0.0f; matrix[2] = 0.0f; matrix[3] = 0.0f; matrix[4] = 0.0f; matrix[5] = 1.0f; matrix[6] = 0.0f; matrix[7] = 0.0f; @@ -35,7 +35,7 @@ void mat4_identity_f32(float* matrix) matrix[12] = 0.0f; matrix[13] = 0.0f; matrix[14] = 0.0f; matrix[15] = 1.0f; } -void mat4_identity_f32(__m128* matrix) +void mat4_identity(__m128* matrix) { matrix[0] = _mm_set_ps(1.0f, 0.0f, 0.0f, 0.0f); matrix[1] = _mm_set_ps(0.0f, 1.0f, 0.0f, 0.0f); @@ -43,7 +43,7 @@ void mat4_identity_f32(__m128* matrix) matrix[3] = _mm_set_ps(0.0f, 0.0f, 0.0f, 1.0f); } -void mat_translate_f32(float* matrix, float dx, float dy, float dz) +void mat4_translate(float* matrix, float dx, float dy, float dz) { matrix[0] = 1; matrix[1] = 0; matrix[2] = 0; matrix[3] = 0; matrix[4] = 0; matrix[5] = 1; matrix[6] = 0; matrix[7] = 0; @@ -52,7 +52,7 @@ void mat_translate_f32(float* matrix, float dx, float dy, float dz) } // x, y, z need to be normalized -void mat3_rotate(float* matrix, float x, float y, float z, float angle) +void mat4_rotate(float* matrix, float x, float y, float z, float angle) { float s = sinf_approx(angle); float c = cosf_approx(angle); @@ -118,7 +118,7 @@ void mat3vec3_mult_sse(const float* matrix, const float* vector, float* result) __m128 dot = _mm_dp_ps(row, vec, 0xF1); - result[i] = _mm_cvtss_f32(dot); + result[i] = _mm_cvtss(dot); } } @@ -128,7 +128,7 @@ void mat3vec3_mult_sse(const __m128* matrix, const __m128* vector, float* result for (int i = 0; i < 3; ++i) { __m128 dot = _mm_dp_ps(matrix[i], *vector, 0xF1); - result[i] = _mm_cvtss_f32(dot); + result[i] = _mm_cvtss(dot); } } @@ -157,7 +157,7 @@ void mat4vec4_mult_sse(const float* matrix, const float* vector, float* result) __m128 row = _mm_loadu_ps(&matrix[i * 4]); __m128 dot = _mm_dp_ps(row, vec, 0xF1); - result[i] = _mm_cvtss_f32(dot); + result[i] = _mm_cvtss(dot); } } @@ -167,7 +167,7 @@ void mat4vec4_mult_sse(const __m128* matrix, const __m128* vector, float* result for (int i = 0; i < 4; ++i) { __m128 dot = _mm_dp_ps(matrix[i], *vector, 0xF1); - result[i] = _mm_cvtss_f32(dot); + result[i] = _mm_cvtss(dot); } } @@ -225,55 +225,55 @@ void mat4mat4_mult_sse(const float* a, const float* b, float* result) // b1 dot = _mm_dp_ps(a_1, b_1, 0xF1); - result[0] = _mm_cvtss_f32(dot); + result[0] = _mm_cvtss(dot); dot = _mm_dp_ps(a_2, b_1, 0xF1); - result[1] = _mm_cvtss_f32(dot); + result[1] = _mm_cvtss(dot); dot = _mm_dp_ps(a_3, b_1, 0xF1); - result[2] = _mm_cvtss_f32(dot); + result[2] = _mm_cvtss(dot); dot = _mm_dp_ps(a_4, b_1, 0xF1); - result[3] = _mm_cvtss_f32(dot); + result[3] = _mm_cvtss(dot); // b2 dot = _mm_dp_ps(a_1, b_2, 0xF1); - result[4] = _mm_cvtss_f32(dot); + result[4] = _mm_cvtss(dot); dot = _mm_dp_ps(a_2, b_2, 0xF1); - result[5] = _mm_cvtss_f32(dot); + result[5] = _mm_cvtss(dot); dot = _mm_dp_ps(a_3, b_2, 0xF1); - result[6] = _mm_cvtss_f32(dot); + result[6] = _mm_cvtss(dot); dot = _mm_dp_ps(a_4, b_2, 0xF1); - result[7] = _mm_cvtss_f32(dot); + result[7] = _mm_cvtss(dot); // b3 dot = _mm_dp_ps(a_1, b_3, 0xF1); - result[8] = _mm_cvtss_f32(dot); + result[8] = _mm_cvtss(dot); dot = _mm_dp_ps(a_2, b_3, 0xF1); - result[9] = _mm_cvtss_f32(dot); + result[9] = _mm_cvtss(dot); dot = _mm_dp_ps(a_3, b_3, 0xF1); - result[10] = _mm_cvtss_f32(dot); + result[10] = _mm_cvtss(dot); dot = _mm_dp_ps(a_4, b_3, 0xF1); - result[11] = _mm_cvtss_f32(dot); + result[11] = _mm_cvtss(dot); // b4 dot = _mm_dp_ps(a_1, b_4, 0xF1); - result[12] = _mm_cvtss_f32(dot); + result[12] = _mm_cvtss(dot); dot = _mm_dp_ps(a_2, b_4, 0xF1); - result[13] = _mm_cvtss_f32(dot); + result[13] = _mm_cvtss(dot); dot = _mm_dp_ps(a_3, b_4, 0xF1); - result[14] = _mm_cvtss_f32(dot); + result[14] = _mm_cvtss(dot); dot = _mm_dp_ps(a_4, b_4, 0xF1); - result[15] = _mm_cvtss_f32(dot); + result[15] = _mm_cvtss(dot); } void mat4mat4_mult_sse(const __m128* a, const __m128* b_transposed, float* result) @@ -283,55 +283,55 @@ void mat4mat4_mult_sse(const __m128* a, const __m128* b_transposed, float* resul // @question could simple mul add sse be faster? // b1 dot = _mm_dp_ps(a[0], b_transposed[0], 0xF1); - result[0] = _mm_cvtss_f32(dot); + result[0] = _mm_cvtss(dot); dot = _mm_dp_ps(a[1], b_transposed[0], 0xF1); - result[1] = _mm_cvtss_f32(dot); + result[1] = _mm_cvtss(dot); dot = _mm_dp_ps(a[2], b_transposed[0], 0xF1); - result[2] = _mm_cvtss_f32(dot); + result[2] = _mm_cvtss(dot); dot = _mm_dp_ps(a[3], b_transposed[0], 0xF1); - result[3] = _mm_cvtss_f32(dot); + result[3] = _mm_cvtss(dot); // b2 dot = _mm_dp_ps(a[0], b_transposed[1], 0xF1); - result[4] = _mm_cvtss_f32(dot); + result[4] = _mm_cvtss(dot); dot = _mm_dp_ps(a[1], b_transposed[1], 0xF1); - result[5] = _mm_cvtss_f32(dot); + result[5] = _mm_cvtss(dot); dot = _mm_dp_ps(a[2], b_transposed[1], 0xF1); - result[6] = _mm_cvtss_f32(dot); + result[6] = _mm_cvtss(dot); dot = _mm_dp_ps(a[3], b_transposed[1], 0xF1); - result[7] = _mm_cvtss_f32(dot); + result[7] = _mm_cvtss(dot); // b3 dot = _mm_dp_ps(a[0], b_transposed[2], 0xF1); - result[8] = _mm_cvtss_f32(dot); + result[8] = _mm_cvtss(dot); dot = _mm_dp_ps(a[1], b_transposed[2], 0xF1); - result[9] = _mm_cvtss_f32(dot); + result[9] = _mm_cvtss(dot); dot = _mm_dp_ps(a[2], b_transposed[2], 0xF1); - result[10] = _mm_cvtss_f32(dot); + result[10] = _mm_cvtss(dot); dot = _mm_dp_ps(a[3], b_transposed[2], 0xF1); - result[11] = _mm_cvtss_f32(dot); + result[11] = _mm_cvtss(dot); // b4 dot = _mm_dp_ps(a[0], b_transposed[3], 0xF1); - result[12] = _mm_cvtss_f32(dot); + result[12] = _mm_cvtss(dot); dot = _mm_dp_ps(a[1], b_transposed[3], 0xF1); - result[13] = _mm_cvtss_f32(dot); + result[13] = _mm_cvtss(dot); dot = _mm_dp_ps(a[2], b_transposed[3], 0xF1); - result[14] = _mm_cvtss_f32(dot); + result[14] = _mm_cvtss(dot); dot = _mm_dp_ps(a[3], b_transposed[3], 0xF1); - result[15] = _mm_cvtss_f32(dot); + result[15] = _mm_cvtss(dot); } void mat4mat4_mult_sse(const __m128* a, const __m128* b_transpose, __m128* result) @@ -345,8 +345,8 @@ void mat4mat4_mult_sse(const __m128* a, const __m128* b_transpose, __m128* resul } } -// @question Consider to replace with 1d array -void frustum_planes(float planes[6][4], int radius, float *matrix) { +// @performance Consider to replace with 1d array +void mat4_frustum_planes(float planes[6][4], float radius, float *matrix) { // @todo make this a setting float znear = 0.125; float zfar = radius * 32 + 64; @@ -384,12 +384,12 @@ void frustum_planes(float planes[6][4], int radius, float *matrix) { planes[5][3] = zfar * m[15] - m[14]; } -void mat_frustum( +void mat4_frustum( float *matrix, float left, float right, float bottom, float top, float znear, float zfar) { float temp, temp2, temp3, temp4; - temp = 2.0 * znear; + temp = 2.0f * znear; temp2 = right - left; temp3 = top - bottom; temp4 = zfar - znear; @@ -415,24 +415,24 @@ void mat_frustum( matrix[15] = 0.0; } -void mat_perspective( +void mat4_perspective( float *matrix, float fov, float aspect, float znear, float zfar) { float ymax, xmax; - ymax = znear * tanf_approx(fov * OMS_PI / 360.0); + ymax = znear * tanf_approx(fov * OMS_PI / 360.0f); xmax = ymax * aspect; - mat_frustum(matrix, -xmax, xmax, -ymax, ymax, znear, zfar); + mat4_frustum(matrix, -xmax, xmax, -ymax, ymax, znear, zfar); } -void mat_ortho( +void mat4_ortho( float *matrix, - float left, float right, float bottom, float top, float near, float far) + float left, float right, float bottom, float top, float near_dist, float far_dist) { float rl_delta = right - left; float tb_delta = top - bottom; - float fn_delta = far - near; + float fn_delta = far_dist - near_dist; matrix[0] = 2 / rl_delta; matrix[1] = 0; @@ -451,7 +451,7 @@ void mat_ortho( matrix[12] = -(right + left) / rl_delta; matrix[13] = -(top + bottom) / tb_delta; - matrix[14] = -(far + near) / fn_delta; + matrix[14] = -(far_dist + near_dist) / fn_delta; matrix[15] = 1; } diff --git a/memory/BufferMemory.h b/memory/BufferMemory.h new file mode 100644 index 0000000..054ccec --- /dev/null +++ b/memory/BufferMemory.h @@ -0,0 +1,46 @@ +/** + * Jingga + * + * @copyright Jingga + * @license OMS License 2.0 + * @version 1.0.0 + * @link https://jingga.app + */ +#ifndef TOS_MEMORY_BUFFER_MEMORY_H +#define TOS_MEMORY_BUFFER_MEMORY_H + +#include +#include "../stdlib/Types.h" +#include "MathUtils.h" +#include "TestUtils.h" + +struct BufferMemory { + byte* memory; + + uint64 size; + uint64 pos; +}; + +inline +byte* buffer_get_memory(BufferMemory* buf, uint64 size, byte aligned = 1, bool zeroed = false) +{ + ASSERT_SIMPLE(size <= buf->size); + + if (aligned > 1 && buf->pos > 0) { + buf->pos = ROUND_TO_NEAREST(buf->pos, aligned); + } + + size = ROUND_TO_NEAREST(size, aligned); + ASSERT_SIMPLE(buf->pos + size <= buf->size); + + byte* offset = (byte *) (buf->memory + buf->pos); + if (zeroed) { + memset((void *) offset, 0, size); + } + + buf->pos += size; + + return offset; +} + +#endif \ No newline at end of file diff --git a/memory/ChunkMemory.h b/memory/ChunkMemory.h new file mode 100644 index 0000000..6d69bf4 --- /dev/null +++ b/memory/ChunkMemory.h @@ -0,0 +1,174 @@ +/** + * Jingga + * + * @copyright Jingga + * @license OMS License 2.0 + * @version 1.0.0 + * @link https://jingga.app + */ +#ifndef TOS_MEMORY_ELEMENT_MEMORY_H +#define TOS_MEMORY_ELEMENT_MEMORY_H + +#include +#include "../stdlib/Types.h" +#include "MathUtils.h" + +struct ChunkMemory { + byte* memory; + + uint64 count; + uint64 chunk_size; + uint64 last_pos = -1; + + // length = count + // free describes which locations are used and which are free + // @performance using uint32 or even uint64 might be faster + // since we can check for free elements faster if the memory is almost filled + // at the moment we can only check 8 elements at a time + uint64* free; +}; + +inline +byte* chunk_get_memory(ChunkMemory* buf, uint64 element) +{ + return buf->memory + element * buf->chunk_size; +} + +/** + * In some cases we know exactly which index is free + */ +void chunk_reserve_index(ChunkMemory* buf, int64 index, int elements = 1, bool zeroed = false) +{ + int byte_index = index / 64; + int bit_index = index % 64; + + // Mark the bits as reserved + for (int j = 0; j < elements; ++j) { + int current_byte_index = byte_index + (bit_index + j) / 64; + int current_bit_index = (bit_index + j) % 64; + buf->free[current_byte_index] |= (1 << current_bit_index); + } + + if (zeroed) { + memset(buf->memory + index * buf->chunk_size, 0, elements * buf->chunk_size); + } +} + +int64 chunk_reserve(ChunkMemory* buf, int elements = 1, bool zeroed = false) +{ + int64 byte_index = (buf->last_pos + 1) / 64; + int bit_index; + + int64 free_element = -1; + byte mask; + + int i = 0; + while (free_element < 0 && i < (buf->count + 7) / 64) { + ++i; + + if (buf->free[byte_index] == 0xFF) { + ++byte_index; + + continue; + } + + // @performance There is some redundancy happening down below, we should ++byte_index in certain conditions? + for (bit_index = 0; bit_index < 64; ++bit_index) { + int consecutive_free_bits = 0; + + // Check if there are 'elements' consecutive free bits + for (int j = 0; j < elements; ++j) { + int current_byte_index = byte_index + (bit_index + j) / 64; + int current_bit_index = (bit_index + j) % 64; + + if (current_byte_index >= (buf->count + 7) / 64) { + break; + } + + mask = 1 << current_bit_index; + if ((buf->free[current_byte_index] & mask) == 0) { + ++consecutive_free_bits; + } else { + break; + } + } + + if (consecutive_free_bits == elements) { + free_element = byte_index * 64 + bit_index; + + // Mark the bits as reserved + for (int j = 0; j < elements; ++j) { + int current_byte_index = byte_index + (bit_index + j) / 64; + int current_bit_index = (bit_index + j) % 64; + buf->free[current_byte_index] |= (1 << current_bit_index); + } + + break; + } + } + + ++i; + ++byte_index; + } + + if (free_element < 0) { + return -1; + } + + if (zeroed) { + memset(buf->memory + free_element * buf->chunk_size, 0, elements * buf->chunk_size); + } + + return free_element; +} + +byte* chunk_find_free(ChunkMemory* buf) +{ + int byte_index = (buf->last_pos + 1) / 64; + int bit_index; + + int64 free_element = -1; + byte mask; + + int i = 0; + int max_loop = buf->count * buf->chunk_size; + + while (free_element < 0 && i < max_loop) { + if (buf->free[byte_index] == 0xFF) { + ++i; + ++byte_index; + + continue; + } + + // This always breaks! + // @performance on the first iteration through the buffer we could optimize this by starting at a different bit_index + // because we know that the bit_index is based on last_pos + for (bit_index = 0; bit_index < 64; ++bit_index) { + mask = 1 << bit_index; + if ((buf->free[byte_index] & mask) == 0) { + free_element = byte_index * 64 + bit_index; + break; + } + } + } + + if (free_element < 0) { + return NULL; + } + + buf->free[byte_index] |= (1 << bit_index); + + return buf->memory + free_element * buf->chunk_size; +} + +inline +void chunk_element_free(ChunkMemory* buf, uint64 element) +{ + int byte_index = element / 64; + int bit_index = element % 64; + + buf->free[byte_index] &= ~(1 << bit_index); +} + +#endif \ No newline at end of file diff --git a/utils/RingMemory.h b/memory/RingMemory.h similarity index 96% rename from utils/RingMemory.h rename to memory/RingMemory.h index f904dbe..7aa8f7a 100644 --- a/utils/RingMemory.h +++ b/memory/RingMemory.h @@ -6,9 +6,10 @@ * @version 1.0.0 * @link https://jingga.app */ -#ifndef TOS_UTILS_RING_MEMORY_H -#define TOS_UTILS_RING_MEMORY_H +#ifndef TOS_MEMORY_RING_MEMORY_H +#define TOS_MEMORY_RING_MEMORY_H +#include #include "../stdlib/Types.h" #include "MathUtils.h" #include "TestUtils.h" diff --git a/models/Texture.h b/models/Texture.h index cb1c75e..8a53c67 100644 --- a/models/Texture.h +++ b/models/Texture.h @@ -39,6 +39,8 @@ struct TextureFile { uint64 id; + // @question Should the texture hold the texture unit? If yes remember to update prepare_texture() + byte texture_data_type; byte texture_wrap_type_s; diff --git a/models/event/event_file_format.txt b/models/event/event_file_format.txt deleted file mode 100644 index afe1277..0000000 --- a/models/event/event_file_format.txt +++ /dev/null @@ -1,42 +0,0 @@ -#COND0 -This is some text. -This is another text. - -TEXT_OPTIONS{3} = can select up to 3 options -// @todo how to add/hide options based on other info -1. My text ->COND1 -2. My text ->COND2,->COND2=12 -3. My text ->COND3 - -REWARDS{1,2} = pick one and then 2 -// @todo how to add/hide options based on other info -CONDA: 1. 213 564 55 ->COND2 -CONDA: 2. 12 32 ->COND2 -CONDA&CODB:3. 87 3325 11 ->COND2 -CODB: 3. 87 3325 11 ->COND2 - -#COND1 - -#COND2 - -#COND3 - -#COND1+#COND2 - -#COND1+#COND3 - -#COND2+#COND3 - -#COND1+#COND2+#COND3 - -#COND - is_true // defined through ->COND - int_value // defined through ->COND=12 - float_value - char_level - proficiencies_above[] - proficiencies_above_level[] - char_trait_above[] - char_trait_above_level[] - char_trait_below[] - char_trait_below_level[] \ No newline at end of file diff --git a/models/item/Consumable.h b/models/item/Consumable.h new file mode 100644 index 0000000..b610000 --- /dev/null +++ b/models/item/Consumable.h @@ -0,0 +1,36 @@ +/** + * Jingga + * + * @copyright Jingga + * @license OMS License 2.0 + * @version 1.0.0 + * @link https://jingga.app + */ +#ifndef TOS_MODELS_CONSUMABLE_H +#define TOS_MODELS_CONSUMABLE_H + +#include "../../stdlib/Types.h" + +#include "../mob/PrimaryStatsPoints.h" +#include "../mob/SecondaryStatsPoints.h" + +struct Consumable { + byte target; + f32 range; + + // Character + PrimaryStatsPoints primary_char_add; + SecondaryStatsPoints secondary_char_add; + + PrimaryStatsPoints primary_char_mul; + SecondaryStatsPoints secondary_char_mul; + + // Skill + PrimaryStatsPoints primary_skill_add; + SecondaryStatsPoints secondary_skill_add; + + PrimaryStatsPoints primary_skill_mul; + SecondaryStatsPoints secondary_skill_mul; +}; + +#endif \ No newline at end of file diff --git a/models/item/ConsumableType.h b/models/item/ConsumableType.h new file mode 100644 index 0000000..dee9e49 --- /dev/null +++ b/models/item/ConsumableType.h @@ -0,0 +1,36 @@ +/** + * Jingga + * + * @copyright Jingga + * @license OMS License 2.0 + * @version 1.0.0 + * @link https://jingga.app + */ +#ifndef TOS_MODELS_CONSUMABLE_TYPE_H +#define TOS_MODELS_CONSUMABLE_TYPE_H + +#include "../../stdlib/Types.h" + +#include "../mob/PrimaryStatsPoints.h" +#include "../mob/SecondaryStatsPoints.h" + +struct ConsumableType { + byte target; + f32 range; + + // Character + PrimaryStatsPoints primary_char_add; + SecondaryStatsPoints secondary_char_add; + + PrimaryStatsPoints primary_char_mul; + SecondaryStatsPoints secondary_char_mul; + + // Skill + PrimaryStatsPoints primary_skill_add; + SecondaryStatsPoints secondary_skill_add; + + PrimaryStatsPoints primary_skill_mul; + SecondaryStatsPoints secondary_skill_mul; +}; + +#endif \ No newline at end of file diff --git a/models/item/Equipment.cpp b/models/item/Equipment.cpp new file mode 100644 index 0000000..ce80276 --- /dev/null +++ b/models/item/Equipment.cpp @@ -0,0 +1,166 @@ +/** + * Jingga + * + * @copyright Jingga + * @license OMS License 2.0 + * @version 1.0.0 + * @link https://jingga.app + */ +#ifndef TOS_MODELS_EQUIPMENT_C +#define TOS_MODELS_EQUIPMENT_C + +#include +#include + +#include "../../stdlib/Types.h" +#include "../mob/monster/LootTable.h" + +#include "Equipment.h" +#include "EquipmentType.h" +#include "ItemRarityDefinition.h" +#include "MobLevelStats.h" +#include "_equipment_types.h" +#include "_equipment_slots.h" +#include "_item_rarity.h" + +int generate_random_equipment( + const EquipmentType* equipments, const RarityDefinition* rarities, const MobLevelStats* mob_levels, + SEquipmentStatsPoints* equipment, int mob_level, byte cclass = 0, int equipment_slot = 0 +) +{ + // find random equipment type + int valid_indices[EQUIPMENT_TYPE_SIZE]; + int valid_count = 0; + + if(cclass != 0 && equipment_slot == 0) { + for (int i = 0; i < EQUIPMENT_TYPE_SIZE; ++i) { + if (is_bit_set(equipments[i].char_class, cclass)) { + valid_indices[valid_count++] = i; + } + } + } else if(cclass != 0 && equipment_slot != 0) { + for (int i = 0; i < EQUIPMENT_TYPE_SIZE; ++i) { + if (is_bit_set(equipments[i].char_class, cclass) && equipments[i].slot == equipment_slot) { + valid_indices[valid_count++] = i; + } + } + } else if (cclass == 0 && equipment_slot == 0) { + valid_count = EQUIPMENT_TYPE_SIZE; + } else if(cclass == 0 && equipment_slot != 0) { + for (int i = 0; i < EQUIPMENT_TYPE_SIZE; ++i) { + if (equipments[i].slot == equipment_slot) { + valid_indices[valid_count++] = i; + } + } + } + + const EquipmentType* equipment_type = valid_count == EQUIPMENT_TYPE_SIZE + ? equipments + (rand() % EQUIPMENT_TYPE_SIZE) + : equipments + valid_indices[rand() % valid_count]; + + // find random item rarity + int item_rarity = get_random_item_rarity(rarities, RARITY_TYPE_SIZE); + + // find random item drop level + int item_level = get_random_item_level(mob_levels, mob_level); + + // generate stats + // requirements + equipment->requirements.stat_str = equipment_type->primary_item_req_min.stat_str + rand() % (equipment_type->primary_item_req_max.stat_str - equipment_type->primary_item_req_min.stat_str + 1); + equipment->requirements.stat_agi = equipment_type->primary_item_req_min.stat_agi + rand() % (equipment_type->primary_item_req_max.stat_agi - equipment_type->primary_item_req_min.stat_agi + 1); + equipment->requirements.stat_int = equipment_type->primary_item_req_min.stat_int + rand() % (equipment_type->primary_item_req_max.stat_int - equipment_type->primary_item_req_min.stat_int + 1); + equipment->requirements.stat_dex = equipment_type->primary_item_req_min.stat_dex + rand() % (equipment_type->primary_item_req_max.stat_dex - equipment_type->primary_item_req_min.stat_dex + 1); + equipment->requirements.stat_acc = equipment_type->primary_item_req_min.stat_acc + rand() % (equipment_type->primary_item_req_max.stat_acc - equipment_type->primary_item_req_min.stat_acc + 1); + equipment->requirements.stat_sta = equipment_type->primary_item_req_min.stat_sta + rand() % (equipment_type->primary_item_req_max.stat_sta - equipment_type->primary_item_req_min.stat_sta + 1); + equipment->requirements.stat_def = equipment_type->primary_item_req_min.stat_def + rand() % (equipment_type->primary_item_req_max.stat_def - equipment_type->primary_item_req_min.stat_def + 1); + + int primary_indices_random[PRIMARY_STAT_SIZE]; + int secondary_indices_random[SECONDARY_STAT_SIZE]; + + int stat_iter; + + // @todo in the area below we only handle the broad definitions, not the details + + // item stats + // @todo handle item details here now only then apply the remaining free stats + + memcpy(secondary_indices_random, SECONDARY_STAT_INDICES, SECONDARY_STAT_SIZE * sizeof(int)); + random_unique(secondary_indices_random, SECONDARY_STAT_SIZE); + + stat_iter = equipment_type->stats_distribution.item_secondary_count_min + rand() % (equipment_type->stats_distribution.item_secondary_count_max - equipment_type->stats_distribution.item_secondary_count_min + 1); + for (int i = 0; i < stat_iter; ++i) { + *(equipment->secondary_item.dmg + secondary_indices_random[i]) = *(equipment_type->secondary_item_min.dmg + secondary_indices_random[i]); + } + + // char stats + memcpy(primary_indices_random, PRIMARY_STAT_INDICES, PRIMARY_STAT_SIZE * sizeof(int)); + random_unique(primary_indices_random, PRIMARY_STAT_SIZE); + + stat_iter = equipment_type->stats_distribution.char_primary_count_min + rand() % (equipment_type->stats_distribution.char_primary_count_max - equipment_type->stats_distribution.char_primary_count_min + 1); + for (int i = 0; i < stat_iter; ++i) { + // add and mul are equally distributed + if (fast_rand1() < FAST_RAND_MAX / 2) { + *(&equipment->primary_char_add.stat_str + primary_indices_random[i]) = *(&equipment_type->primary_char_add_min.stat_str + primary_indices_random[i]); + } else { + *(&equipment->primary_char_mul.stat_str + primary_indices_random[i]) = *(&equipment_type->primary_char_mul_min.stat_str + primary_indices_random[i]); + } + } + + // @todo handle char_secondary_distribution skill_count_min/max here now + + memcpy(secondary_indices_random, SECONDARY_STAT_INDICES, SECONDARY_STAT_SIZE * sizeof(int)); + random_unique(secondary_indices_random, SECONDARY_STAT_SIZE); + + stat_iter = equipment_type->stats_distribution.char_secondary_count_min + rand() % (equipment_type->stats_distribution.char_secondary_count_max - equipment_type->stats_distribution.char_secondary_count_min + 1); + for (int i = 0; i < stat_iter; ++i) { + // add and mul are equally distributed + if (fast_rand1() < FAST_RAND_MAX / 2) { + *(equipment->secondary_char_add.dmg + secondary_indices_random[i]) = *(equipment_type->secondary_char_add_min.dmg + secondary_indices_random[i]); + } else { + *(equipment->secondary_char_mul.dmg + secondary_indices_random[i]) = *(equipment_type->secondary_char_mul_min.dmg + secondary_indices_random[i]); + } + } + + // @todo handle remaining char_count_min/max here now + + // skill + // @question is primary for skill necessary? + memcpy(primary_indices_random, PRIMARY_STAT_INDICES, PRIMARY_STAT_SIZE * sizeof(int)); + random_unique(primary_indices_random, PRIMARY_STAT_SIZE); + + stat_iter = equipment_type->stats_distribution.skill_primary_count_min + rand() % (equipment_type->stats_distribution.skill_primary_count_max - equipment_type->stats_distribution.skill_primary_count_min + 1); + for (int i = 0; i < stat_iter; ++i) { + // add and mul are equally distributed + if (fast_rand1() < FAST_RAND_MAX / 2) { + *(&equipment->primary_skill_add.stat_str + primary_indices_random[i]) = *(&equipment_type->primary_skill_add_min.stat_str + primary_indices_random[i]); + } else { + *(&equipment->primary_skill_mul.stat_str + primary_indices_random[i]) = *(&equipment_type->primary_skill_mul_min.stat_str + primary_indices_random[i]); + } + } + + // @todo handle skill_secondary_distribution skill_count_min/max here now + + memcpy(secondary_indices_random, SECONDARY_STAT_INDICES, SECONDARY_STAT_SIZE * sizeof(int)); + random_unique(secondary_indices_random, SECONDARY_STAT_SIZE); + + stat_iter = equipment_type->stats_distribution.skill_secondary_count_min + rand() % (equipment_type->stats_distribution.skill_secondary_count_max - equipment_type->stats_distribution.skill_secondary_count_min + 1); + for (int i = 0; i < stat_iter; ++i) { + // add and mul are equally distributed + if (fast_rand1() < FAST_RAND_MAX / 2) { + *(equipment->secondary_skill_add.dmg + secondary_indices_random[i]) = *(equipment_type->secondary_skill_add_min.dmg + secondary_indices_random[i]); + } else { + *(equipment->secondary_skill_mul.dmg + secondary_indices_random[i]) = *(equipment_type->secondary_skill_mul_min.dmg + secondary_indices_random[i]); + } + } + + // @todo handle remaining skill_count_min/max here now + + return equipment_type->slot; +} + +int generate_random_equipment(SEquipmentStatsPoints* equipment, int mob_level, byte cclass, const LootTable* table) +{ + return -1; +} + +#endif \ No newline at end of file diff --git a/models/item/Equipment.h b/models/item/Equipment.h index eccb149..e854f27 100644 --- a/models/item/Equipment.h +++ b/models/item/Equipment.h @@ -33,15 +33,19 @@ struct SEquipmentStatsPoints { // @todo A character cannot do for example fire damage (only items and skills can do that) // This means these stats are unused and just use up memory PrimaryStatsPoints primary_char_add; - PrimaryStatsRelPoints primary_char_mul; + PrimaryStatsPoints primary_char_mul; SecondaryStatsPoints secondary_char_add; - SecondaryStatsRelPoints secondary_char_mul; + SecondaryStatsPoints secondary_char_mul; // Modifies the skills // only modifies skills that have these stats != 0 + // @question is primary for skill necessary? + PrimaryStatsPoints primary_skill_add; + PrimaryStatsPoints primary_skill_mul; + SecondaryStatsPoints secondary_skill_add; - SecondaryStatsRelPoints secondary_skill_mul; + SecondaryStatsPoints secondary_skill_mul; }; #endif \ No newline at end of file diff --git a/models/item/EquipmentType.h b/models/item/EquipmentType.h index cb5ffaa..7140424 100644 --- a/models/item/EquipmentType.h +++ b/models/item/EquipmentType.h @@ -11,16 +11,71 @@ #include "../../stdlib/Types.h" +#include "../mob/MobStats.h" +#include "ItemStatsDistribution.h" + struct EquipmentType { - byte id; byte slot; - bool dual; - bool throwing; - bool projectile; - bool damage; - bool armor; - bool supporting; - bool beam; + uint32 char_class; + bool is_dual; + bool is_throwing; + bool is_projectile; + bool is_damage; + bool is_armor; + bool is_supporting; + bool is_beam; + bool is_ranged; + + byte potential_min; + byte potential_max; + + // @question Do we want a equipment specific potential? currently only rarity dependent! + + // This defines how many stats can be asigned to an item based on the item type + // @todo for the correct algorithm we however also need to consider rarity of the item defining how many + // total stats/affixes/enchantments can actually be assigned. + // we chust need to define that static struct/array (we already have a draft in the excel file) + // of course this should be probably automatically generated from the database at compile time as a pre_compile program + ItemStatsDistribution stats_distribution; + + // The min/max point range is calculated by checking the rarity values + item level + // The values stored in the structs below are the "average" value which then gets randomely shifted by the rarity+item level + PrimaryStatsPoints primary_item_req_min; + PrimaryStatsPoints primary_item_req_max; + + SecondaryStatsPoints secondary_item_min; + SecondaryStatsPoints secondary_item_max; + + // Character + // add + PrimaryStatsPoints primary_char_add_min; + PrimaryStatsPoints primary_char_add_max; + + SecondaryStatsPoints secondary_char_add_min; + SecondaryStatsPoints secondary_char_add_max; + + // mul + PrimaryStatsPoints primary_char_mul_min; + PrimaryStatsPoints primary_char_mul_max; + + SecondaryStatsPoints secondary_char_mul_min; + SecondaryStatsPoints secondary_char_mul_max; + + // Skill + // add + // @question is primary for skill necessary? + PrimaryStatsPoints primary_skill_add_min; + PrimaryStatsPoints primary_skill_add_max; + + SecondaryStatsPoints secondary_skill_add_min; + SecondaryStatsPoints secondary_skill_add_max; + + // mul + PrimaryStatsPoints primary_skill_mul_min; + PrimaryStatsPoints primary_skill_mul_max; + + SecondaryStatsPoints secondary_skill_mul_min; + SecondaryStatsPoints secondary_skill_mul_max; }; #endif \ No newline at end of file diff --git a/models/item/ItemAffixDistribution.h b/models/item/ItemAffixDistribution.h new file mode 100644 index 0000000..7607632 --- /dev/null +++ b/models/item/ItemAffixDistribution.h @@ -0,0 +1,63 @@ +/** + * Jingga + * + * @copyright Jingga + * @license OMS License 2.0 + * @version 1.0.0 + * @link https://jingga.app + */ +#ifndef TOS_MODELS_ITEM_AFFIX_DISTRIBUTION_H +#define TOS_MODELS_ITEM_AFFIX_DISTRIBUTION_H + +// WARNING: The sum of all mins must be smaller than max count of the whole category +// This allows us to define how many phys dmg stats an item should have etc. +// the sum of all min is most likely lower than the parent min/max -> we can randomely assign additional +// stat categories to the item as long as their min/max value is not 0 = which means not allowed +struct ItemAffixDistribution { + // damage + int dmg_count_min; + int dmg_count_max; + + int phys_dmg_count_min; + int phys_dmg_count_max; + + int elemental_dmg_count_min; + int elemental_dmg_count_max; + + int magic_dmg_count_min; + int magic_dmg_count_max; + + // defense + int def_count_min; + int def_count_max; + + int phys_def_count_min; + int phys_def_count_max; + + int elemental_def_count_min; + int elemental_def_count_max; + + int magic_def_count_min; + int magic_def_count_max; + + // other + int other_count_min; + int other_count_max; + + int health_count_min; + int health_count_max; + + int resource_count_min; + int resource_count_max; + + int movement_count_min; + int movement_count_max; + + int modifier_count_min; + int modifier_count_max; + + int special_count_min; + int special_count_max; +}; + +#endif \ No newline at end of file diff --git a/models/item/ItemLevelStats.h b/models/item/ItemLevelStats.h new file mode 100644 index 0000000..0712fd9 --- /dev/null +++ b/models/item/ItemLevelStats.h @@ -0,0 +1,20 @@ +/** + * Jingga + * + * @copyright Jingga + * @license OMS License 2.0 + * @version 1.0.0 + * @link https://jingga.app + */ +#ifndef TOS_MODELS_ITEM_LEVEL_STATS_H +#define TOS_MODELS_ITEM_LEVEL_STATS_H + +#include "ItemRarityStats.h" +#include "MobLevelStats.h" +#include "_item_rarity.h" + +struct ItemLevelStats { + ItemRarityStats rarity_stats[RARITY_TYPE_SIZE]; +}; + +#endif \ No newline at end of file diff --git a/models/item/ItemRarityDefinition.h b/models/item/ItemRarityDefinition.h new file mode 100644 index 0000000..85f720b --- /dev/null +++ b/models/item/ItemRarityDefinition.h @@ -0,0 +1,40 @@ +/** + * Jingga + * + * @copyright Jingga + * @license OMS License 2.0 + * @version 1.0.0 + * @link https://jingga.app + */ +#ifndef TOS_MODELS_ITEM_RARITY_DEFINITION_H +#define TOS_MODELS_ITEM_RARITY_DEFINITION_H + +#include "ItemStatsDistribution.h" + +#define ITEM_RARITY_100 10000000000 + +// This puts a hard limit on StatsDistribution +struct RarityDefinition { + int drop_chance; + + int potential_min; + int potential_max; + + ItemStatsDistribution stats_distribution; +}; + +int get_random_item_rarity(const RarityDefinition* rarities, int rarity_count) +{ + uint32 random_rarity = rand() % (ITEM_RARITY_100 + 1); + uint32 current_rarity = 0; + for (int i = 0; i < rarity_count - 1; ++i) { + current_rarity += rarities[i].drop_chance; + + if (current_rarity < random_rarity) { + return i; + } + } + + return rarity_count - 1; +} +#endif \ No newline at end of file diff --git a/models/item/ItemRarityStats.h b/models/item/ItemRarityStats.h new file mode 100644 index 0000000..7d430a5 --- /dev/null +++ b/models/item/ItemRarityStats.h @@ -0,0 +1,24 @@ +/** + * Jingga + * + * @copyright Jingga + * @license OMS License 2.0 + * @version 1.0.0 + * @link https://jingga.app + */ +#ifndef TOS_MODELS_ITEM_RARITY_STATS_H +#define TOS_MODELS_ITEM_RARITY_STATS_H + +/** + * This defines the basic stat range and averge for all item rarities. + */ +struct ItemRarityStats { + int stats_avg; + int stats_min; + int stats_max; + + float shift_min; + float shift_max; +}; + +#endif \ No newline at end of file diff --git a/models/item/ItemStatsDistribution.h b/models/item/ItemStatsDistribution.h new file mode 100644 index 0000000..b405441 --- /dev/null +++ b/models/item/ItemStatsDistribution.h @@ -0,0 +1,53 @@ +/** + * Jingga + * + * @copyright Jingga + * @license OMS License 2.0 + * @version 1.0.0 + * @link https://jingga.app + */ +#ifndef TOS_MODELS_ITEM_STATS_DISTRIBUTION_H +#define TOS_MODELS_ITEM_STATS_DISTRIBUTION_H + +#include "ItemAffixDistribution.h" + +// This is only used in EquipmentType +// @question Should we also use it in Rarity and replace the current impl. in Rarity? +// Implementing it also in rarity would basically give us more detailed control in rarities as well +struct ItemStatsDistribution { + // item + int item_secondary_count_min; + int item_secondary_count_max; + + int item_flags_dmg_count_min; + int item_flags_dmg_count_max; + + int item_flags_def_count_min; + int item_flags_def_count_max; + + // character + // can be add and mul + int char_count_min; + int char_count_max; + + int char_primary_count_min; + int char_primary_count_max; + + int char_secondary_count_min; + int char_secondary_count_max; + ItemAffixDistribution char_secondary_distribution; + + // skill + // can be add and mul + int skill_count_min; + int skill_count_max; + + int skill_primary_count_min; + int skill_primary_count_max; + + int skill_secondary_count_min; + int skill_secondary_count_max; + ItemAffixDistribution skill_secondary_distribution; +}; + +#endif \ No newline at end of file diff --git a/models/item/MobLevelStats.h b/models/item/MobLevelStats.h new file mode 100644 index 0000000..da16cb4 --- /dev/null +++ b/models/item/MobLevelStats.h @@ -0,0 +1,34 @@ +/** + * Jingga + * + * @copyright Jingga + * @license OMS License 2.0 + * @version 1.0.0 + * @link https://jingga.app + */ +#ifndef TOS_MODELS_ITEM_MOB_LEVEL_STATS_H +#define TOS_MODELS_ITEM_MOB_LEVEL_STATS_H + +/** + * This defines the bounds for what items can drop based on the mob level. + * Additional LootTables may further restrict or modify this. + * This also functions as a guard for typos in mob definitions to avoid ludicrous gold and xp drops + */ +struct MobLevelStats { + int xp; + + int gold_min; + int gold_max; + + int item_level_min; + int item_level_max; +}; + +inline +int get_random_item_level(const MobLevelStats* level_data, int mob_level) +{ + const MobLevelStats* mob_stats = level_data + mob_level - 1; + return mob_stats->item_level_min + rand() % (mob_stats->item_level_max - mob_stats->item_level_min + 1);; +} + +#endif \ No newline at end of file diff --git a/models/item/_equipment_slots.h b/models/item/_equipment_slots.h new file mode 100644 index 0000000..7336167 --- /dev/null +++ b/models/item/_equipment_slots.h @@ -0,0 +1,6 @@ +#ifndef TOS_MODELS_ITEM_EQUIPMENT_SLOTS_H +#define TOS_MODELS_ITEM_EQUIPMENT_SLOTS_H + +#define EQUIPMENT_SLOT_SIZE 14 + +#endif \ No newline at end of file diff --git a/models/item/_equipment_types.h b/models/item/_equipment_types.h new file mode 100644 index 0000000..73c329e --- /dev/null +++ b/models/item/_equipment_types.h @@ -0,0 +1,6 @@ +#ifndef TOS_MODELS_ITEM_EQUIPMENT_TYPES_H +#define TOS_MODELS_ITEM_EQUIPMENT_TYPES_H + +#define EQUIPMENT_TYPE_SIZE 54 + +#endif \ No newline at end of file diff --git a/models/item/_item_rarity.h b/models/item/_item_rarity.h new file mode 100644 index 0000000..1868eb1 --- /dev/null +++ b/models/item/_item_rarity.h @@ -0,0 +1,6 @@ +#ifndef TOS_MODELS_ITEM_RARITY_TYPES_H +#define TOS_MODELS_ITEM_RARITY_TYPES_H + +#define RARITY_TYPE_SIZE 7 + +#endif \ No newline at end of file diff --git a/models/item/equipment_slots.h b/models/item/equipment_slots.h deleted file mode 100644 index 4ddf072..0000000 --- a/models/item/equipment_slots.h +++ /dev/null @@ -1,27 +0,0 @@ -/** - * Jingga - * - * @copyright Jingga - * @license OMS License 2.0 - * @version 1.0.0 - * @link https://jingga.app - */ -#ifndef TOS_MODELS_ITEM_EQUIPMENT_SLOTS_H -#define TOS_MODELS_ITEM_EQUIPMENT_SLOTS_H - -#define EQUIPMENT_SLOT_HEAD 0x01 -#define EQUIPMENT_SLOT_NECK 0x02 -#define EQUIPMENT_SLOT_BODY 0x03 -#define EQUIPMENT_SLOT_BELT 0x04 -#define EQUIPMENT_SLOT_PANTS 0x05 -#define EQUIPMENT_SLOT_BOOTS 0x06 -#define EQUIPMENT_SLOT_RING 0x07 -#define EQUIPMENT_SLOT_MAIN_HAND 0x08 -#define EQUIPMENT_SLOT_OFF_HAND 0x09 -#define EQUIPMENT_SLOT_ARMS 0x0A -#define EQUIPMENT_SLOT_BELT_ATTACHMENT 0x0B -#define EQUIPMENT_SLOT_SHOULDER 0x0C -#define EQUIPMENT_SLOT_BACK 0x0D -#define EQUIPMENT_SLOT_HANDS 0x0E - -#endif \ No newline at end of file diff --git a/models/item/equipment_types.h b/models/item/equipment_types.h deleted file mode 100644 index 2528080..0000000 --- a/models/item/equipment_types.h +++ /dev/null @@ -1,115 +0,0 @@ -#ifndef TOS_MODELS_ITEM_EQUIPMENT_TYPES_H -#define TOS_MODELS_ITEM_EQUIPMENT_TYPES_H - -#include "equipment_slots.h" -#include "EquipmentType.h" - -#define EQUIPMENT_TYPE_ONE_HANDED_SWORD 0x01 -#define EQUIPMENT_TYPE_TWO_HANDED_SWORD 0x02 -#define EQUIPMENT_TYPE_HELMET 0x03 -#define EQUIPMENT_TYPE_EARING 0x04 -#define EQUIPMENT_TYPE_NECKLACE 0x05 -#define EQUIPMENT_TYPE_BOOTS 0x06 -#define EQUIPMENT_TYPE_STAFF 0x07 -#define EQUIPMENT_TYPE_WAND 0x08 -#define EQUIPMENT_TYPE_DOLL 0x09 -#define EQUIPMENT_TYPE_POLEAXE 0x0A -#define EQUIPMENT_TYPE_SABRE 0x0B -#define EQUIPMENT_TYPE_DAGGER 0x0C -#define EQUIPMENT_TYPE_JAVELIN 0x0D -#define EQUIPMENT_TYPE_QUARTERSTAFF 0x0E -#define EQUIPMENT_TYPE_SPEAR 0x0F -#define EQUIPMENT_TYPE_CLAYMORE 0x10 -#define EQUIPMENT_TYPE_DAO 0x11 -#define EQUIPMENT_TYPE_CLEAVER 0x12 -#define EQUIPMENT_TYPE_BROADSWORD 0x13 -#define EQUIPMENT_TYPE_LONGSWORD 0x14 -#define EQUIPMENT_TYPE_SCIMITAR 0x15 -#define EQUIPMENT_TYPE_RAPIER 0x16 -#define EQUIPMENT_TYPE_SICKLE 0x17 -#define EQUIPMENT_TYPE_SCYTHE 0x18 -#define EQUIPMENT_TYPE_PUNCHING_DAGGER 0x19 -#define EQUIPMENT_TYPE_LIGHT_WARHAMMER 0x1A -#define EQUIPMENT_TYPE_LIGHT_MACE 0x1B -#define EQUIPMENT_TYPE_HEAVY_MACE 0x1C -#define EQUIPMENT_TYPE_HEAVY_WARHAMMER 0x1D -#define EQUIPMENT_TYPE_LIGHT_FLAIL 0x1E -#define EQUIPMENT_TYPE_HEAVY_FLAIL 0x1F -#define EQUIPMENT_TYPE_SHURIKAN 0x20 -#define EQUIPMENT_TYPE_GLAIVE 0x21 -#define EQUIPMENT_TYPE_HALBERD 0x22 -#define EQUIPMENT_TYPE_PARTIZAN 0x23 -#define EQUIPMENT_TYPE_LONGBOW 0x24 -#define EQUIPMENT_TYPE_DOUBLE_BOW 0x25 -#define EQUIPMENT_TYPE_BOW 0x27 -#define EQUIPMENT_TYPE_RECURVE_BOW 0x28 -#define EQUIPMENT_TYPE_CROSSBOW 0x29 -#define EQUIPMENT_TYPE_HEAVY_CROSSBOW 0x2A -#define EQUIPMENT_TYPE_WHIP 0x2B -#define EQUIPMENT_TYPE_THROWING_AXE 0x2C -#define EQUIPMENT_TYPE_BLOWGUN 0x2D -#define EQUIPMENT_TYPE_CLUB 0x2E -#define EQUIPMENT_TYPE_GREATCLUB 0x2F -#define EQUIPMENT_TYPE_SLING 0x30 -#define EQUIPMENT_TYPE_CHAKRAM 0x31 -#define EQUIPMENT_TYPE_TRIDENT 0x32 -#define EQUIPMENT_TYPE_THROWING_SPEAR 0x33 -#define EQUIPMENT_TYPE_THROWING_KNIVES 0x34 -#define EQUIPMENT_TYPE_GRANADE 0x35 -#define EQUIPMENT_TYPE_SCRIPTURE 0x36 -#define EQUIPMENT_TYPE_BONES 0x37 -#define EQUIPMENT_TYPE_MAGIC_CRYSTAL 0x38 -#define EQUIPMENT_TYPE_SHIELD 0x39 -#define EQUIPMENT_TYPE_QUIVER 0x3D -#define EQUIPMENT_TYPE_PISTOL 0x3E -#define EQUIPMENT_TYPE_SHOTGUN 0x3F -#define EQUIPMENT_TYPE_RIFLE 0x40 -#define EQUIPMENT_TYPE_FLASK 0x41 -#define EQUIPMENT_TYPE_LIGHT_AXE 0x42 -#define EQUIPMENT_TYPE_QUILL 0x43 -#define EQUIPMENT_TYPE_PANTS 0x44 -#define EQUIPMENT_TYPE_BELT 0x45 -#define EQUIPMENT_TYPE_RING 0x46 -#define EQUIPMENT_TYPE_ARMS 0x47 -#define EQUIPMENT_TYPE_BELT_ATTACHMENT 0x48 -#define EQUIPMENT_TYPE_BODY 0x49 -#define EQUIPMENT_TYPE_CIRCLET 0x4A -#define EQUIPMENT_TYPE_BRACELET 0x4B -#define EQUIPMENT_TYPE_GADGET 0x4C -#define EQUIPMENT_TYPE_LANTERN 0x4D -#define EQUIPMENT_TYPE_GLASSES 0x4E -#define EQUIPMENT_TYPE_CAPE 0x4F -#define EQUIPMENT_TYPE_POLEARM 0x50 -#define EQUIPMENT_TYPE_HEAVY_AXE 0x51 -#define EQUIPMENT_TYPE_SCALES 0x52 -#define EQUIPMENT_TYPE_PRAYING_BEADS 0x53 -#define EQUIPMENT_TYPE_TONFA 0x54 -#define EQUIPMENT_TYPE_TETSUBO 0x55 -#define EQUIPMENT_TYPE_KAMA 0x56 -#define EQUIPMENT_TYPE_SAMURAI_SWORD 0x57 -#define EQUIPMENT_TYPE_BOOMERANG 0x58 -#define EQUIPMENT_TYPE_SLINGSHOT 0x59 -#define EQUIPMENT_TYPE_HARPOON 0x5A -#define EQUIPMENT_TYPE_ORB 0x5B -#define EQUIPMENT_TYPE_RUNESTONE 0x5C -#define EQUIPMENT_TYPE_TALISMAN 0x5D -#define EQUIPMENT_TYPE_GRIMOIRE 0x5E -#define EQUIPMENT_TYPE_SHURIKEN 0x5F -#define EQUIPMENT_TYPE_THROWING_DARTS 0x60 -#define EQUIPMENT_TYPE_COCKTAIL 0x61 -#define EQUIPMENT_TYPE_FLUTE 0x62 -#define EQUIPMENT_TYPE_FAN 0x63 -#define EQUIPMENT_TYPE_SCEPTER 0x64 -#define EQUIPMENT_TYPE_TAMBOURINE 0x65 -#define EQUIPMENT_TYPE_BAGPIPE 0x66 -#define EQUIPMENT_TYPE_HARP 0x67 -#define EQUIPMENT_TYPE_TROMPET 0x68 -#define EQUIPMENT_TYPE_LUTE 0x69 -#define EQUIPMENT_TYPE_HORN 0x6A -#define EQUIPMENT_TYPE_BELL 0x6B -#define EQUIPMENT_TYPE_VIOLIN 0x6C -#define EQUIPMENT_TYPE_VIOLIN 0x6D - -#define SIZE_EQUIPMENT_TYPE 0x69 - -#endif \ No newline at end of file diff --git a/models/mob/FixedStats.h b/models/mob/FixedStats.h new file mode 100644 index 0000000..4b7f3e4 --- /dev/null +++ b/models/mob/FixedStats.h @@ -0,0 +1,26 @@ +/** + * Jingga + * + * @copyright Jingga + * @license OMS License 2.0 + * @version 1.0.0 + * @link https://jingga.app + */ +#ifndef TOS_MODELS_MOB_SECONDARY_STATS_POINTS_H +#define TOS_MODELS_MOB_SECONDARY_STATS_POINTS_H + +#include "../stdlib/Types.h" + +struct FixedStats { + // Movement + // Additional speeds may be defined for Mobs + f32 speed_walk1; + f32 speed_swim1; + f32 speed_fly1; + + f32 speed_jump; + f32 speed_dodge; + f32 speed_turn; +}; + +#endif diff --git a/models/mob/MobStats.h b/models/mob/MobStats.h index 775ab06..a9ba944 100644 --- a/models/mob/MobStats.h +++ b/models/mob/MobStats.h @@ -10,554 +10,33 @@ #define TOS_MODELS_MOB_STATS_H #include "../../stdlib/Types.h" +#include "PrimaryStatsPoints.h" +#include "SecondaryStatsPoints.h" /** * @todo optimize order of struct members to ensure optimal struct size */ - -// Character stats modifiable through leveling (simple +/- buttons) -struct PrimaryStatsPoints { - byte stat_str; // strength : effects health + base damage - byte stat_int; // inteligence : effects resource + base demage - byte stat_acc; // accuracy : effects critical chance + base damage + miss chance - byte stat_agi; // agility : effects resource + base damage + dodge chance - byte stat_def; // defense : effects resource + base defense + dodge chance - byte stat_sta; // stamina : effects health regen + resource regen -}; - -struct PrimaryStatsRel { - f32 stat_str; - f32 stat_int; - f32 stat_acc; - f32 stat_agi; - f32 stat_def; - f32 stat_sta; -}; - -struct PrimaryStatsRelPoints { - byte stat_str; - byte stat_int; - byte stat_acc; - byte stat_agi; - byte stat_def; - byte stat_sta; -}; - -// Character stats modifiable thorugh skill tree? -struct SecondaryStatsPoints { - // Damage types - byte dmg_pircing; - byte dmg_slashing; - byte dmg_bludgeoning; - byte dmg_stabbing; - byte dmg_fire; - byte dmg_water; - byte dmg_wind; - byte dmg_earth; - byte dmg_poison; - byte dmg_lightning; - byte dmg_ice; - byte dmg_arcane; - byte dmg_corrupted; - byte dmg_holy; - byte dmg_reflection; - byte dmg_reflection_chance; - - byte dmg_crit; - byte dmg_crit_chance; - - // Health & Resource - byte health; - byte health_on_dmg_dealt; - byte health_on_dmg_taken; - - byte health_regen; - byte health_regen_rel; - byte health_regen_on_dmg_dealt; - byte health_regen_on_dmg_taken; - - byte resource; - byte resource_on_dmg_dealt; - byte resource_on_dmg_taken; - - byte resource_regen; - byte resource_regen_rel; - byte resource_regen_on_dmg_dealt; - byte resource_regen_on_dmg_taken; - - byte resource_loss; - byte resource_loss_on_dmg_dealt; - byte resource_loss_on_dmg_taken; - - // Defense types - // think about it as armor and/or resistence if it helps - byte defense_pircing; - byte defense_slashing; - byte defense_bludgeoning; - byte defense_stabbing; - byte defense_fire; - byte defense_water; - byte defense_ice; - byte defense_earth; - byte defense_wind; - byte defense_poison; - byte defense_lightning; - byte defense_holy; - byte defense_arcane; - byte defense_corrupted; - - // Accuracy - byte dodge_chance; - byte cc_protection; - byte miss_chance; - - // Movement - // Additional speeds may be defined for Mobs - byte speed_walk1; - byte speed_swim1; - byte speed_fly1; - - // Fighting speed - byte speed_cast; - byte speed_attack; - - byte pickup_range; - - byte shield; - - byte aoe_scale; - byte resource_cost; - byte health_cost; - byte attack_range; - byte melee_range; - byte projectile_speed; - byte projectile_count; - byte shatter_probability; - byte shatter_range; - byte shatter_dmg; - byte shatter_count; - byte passthrough_damage; - byte passthrough_count; - byte dot_duration; - byte dot_count; - byte bleeding_dot; - byte poison_dot; - byte burn_dot; - byte ice_dot; - byte resource_drain; - byte shatter_dot; - byte minon_duration; - byte minion_count; - byte effect_spreading_probability; - byte effect_spreading_radius; - byte effect_spreading_max_count; - byte effect_duration; - byte aura_range; - byte cast_duration; - - byte agro_range; -}; - - -// @todo change order for simd calculations so that all valus match up -struct SecondaryStatsValues { - // Damage types - int32 dmg_pircing; - int32 dmg_slashing; - int32 dmg_bludgeoning; - int32 dmg_stabbing; - int32 dmg_fire; - int32 dmg_water; - int32 dmg_wind; - int32 dmg_earth; - int32 dmg_poison; - int32 dmg_lightning; - int32 dmg_ice; - int32 dmg_arcane; - int32 dmg_corrupted; - int32 dmg_holy; - int32 dmg_reflection; - int32 dmg_reflection_chance; - - int32 dmg_crit; - f32 dmg_crit_chance; - - // Health & Resource - int32 health; - f32 health_on_dmg_dealt; - f32 health_on_dmg_taken; - - int32 health_regen; - f32 health_regen_rel; - f32 health_regen_on_dmg_dealt; - f32 health_regen_on_dmg_taken; - - int32 resource; - f32 resource_on_dmg_dealt; - f32 resource_on_dmg_taken; - - int32 resource_regen; - f32 resource_regen_rel; - f32 resource_regen_on_dmg_dealt; - f32 resource_regen_on_dmg_taken; - - int32 resource_loss; - f32 resource_loss_on_dmg_dealt; - f32 resource_loss_on_dmg_taken; - - // Defense types - // think about it as armor and/or resistence if it helps - int32 defense_pircing; - int32 defense_slashing; - int32 defense_bludgeoning; - int32 defense_stabbing; - int32 defense_fire; - int32 defense_water; - int32 defense_ice; - int32 defense_earth; - int32 defense_wind; - int32 defense_poison; - int32 defense_lightning; - int32 defense_holy; - int32 defense_arcane; - int32 defense_corrupted; - - // Accuracy - f32 dodge_chance; - f32 cc_protection; - f32 miss_chance; - - // Movement - // Additional speeds may be defined for Mobs - f32 speed_walk1; - f32 speed_swim1; - f32 speed_fly1; - - // Fighting speed - f32 speed_cast; - f32 speed_attack; - - f32 pickup_range; - - int32 shield; - - f32 aoe_scale; - f32 resource_cost; - f32 health_cost; - f32 attack_range; - f32 melee_range; - f32 projectile_speed; - int32 projectile_count; - f32 shatter_probability; - f32 shatter_range; - int32 shatter_dmg; - int32 shatter_count; - f32 passthrough_damage; - int32 passthrough_count; - f32 dot_duration; - int32 dot_count; - int32 bleeding_dot; - int32 poison_dot; - int32 burn_dot; - int32 ice_dot; - int32 resource_drain; - int32 shatter_dot; - f32 minon_duration; - int32 minion_count; - f32 effect_spreading_probability; - f32 effect_spreading_radius; - int32 effect_spreading_max_count; - f32 effect_duration; - f32 aura_range; - f32 cast_duration; - - f32 agro_range; -}; - -struct SecondaryStatsRel { - // Damage types - f32 dmg_pircing; - f32 dmg_slashing; - f32 dmg_bludgeoning; - f32 dmg_stabbing; - f32 dmg_fire; - f32 dmg_water; - f32 dmg_wind; - f32 dmg_earth; - f32 dmg_poison; - f32 dmg_lightning; - f32 dmg_ice; - f32 dmg_arcane; - f32 dmg_corrupted; - f32 dmg_holy; - f32 dmg_reflection; - f32 dmg_reflection_chance; - - f32 dmg_crit; - f32 dmg_crit_chance; - - // Health & Resource - f32 health; - f32 health_on_dmg_dealt; - f32 health_on_dmg_taken; - - f32 health_regen; - f32 health_regen_on_dmg_dealt; - f32 health_regen_on_dmg_taken; - - f32 resource; - f32 resource_on_dmg_dealt; - f32 resource_on_dmg_taken; - - f32 resource_regen; - f32 resource_regen_on_dmg_dealt; - f32 resource_regen_on_dmg_taken; - - f32 resource_loss; - f32 resource_loss_on_dmg_dealt; - f32 resource_loss_on_dmg_taken; - - // Defense types - // think about it as armor and/or resistence if it helps - f32 defense_pircing; - f32 defense_slashing; - f32 defense_bludgeoning; - f32 defense_stabbing; - f32 defense_fire; - f32 defense_water; - f32 defense_ice; - f32 defense_earth; - f32 defense_wind; - f32 defense_poison; - f32 defense_lightning; - f32 defense_holy; - f32 defense_arcane; - f32 defense_corrupted; - - // Accuracy - f32 dodge_chance; - f32 cc_protection; - f32 miss_chance; - - // Movement - // Additional speeds may be defined for Mobs - f32 speed_walk1; - f32 speed_swim1; - f32 speed_fly1; - - // Fighting speed - f32 speed_cast; - f32 speed_attack; - - f32 pickup_range; - - f32 shield; - - f32 aoe_scale; - f32 resource_cost; - f32 health_cost; - f32 attack_range; - f32 melee_range; - f32 projectile_speed; - f32 projectile_count; - f32 shatter_probability; - f32 shatter_range; - f32 shatter_dmg; - f32 shatter_count; - f32 passthrough_damage; - f32 passthrough_count; - f32 dot_duration; - f32 dot_count; - f32 bleeding_dot; - f32 poison_dot; - f32 burn_dot; - f32 ice_dot; - f32 resource_drain; - f32 shatter_dot; - f32 minon_duration; - f32 minion_count; - f32 effect_spreading_probability; - f32 effect_spreading_radius; - f32 effect_spreading_max_count; - f32 effect_duration; - f32 aura_range; - f32 cast_duration; - - f32 agro_range; -}; - -struct SecondaryStatsRelPoints { - // Damage types - byte dmg_pircing; - byte dmg_slashing; - byte dmg_bludgeoning; - byte dmg_stabbing; - byte dmg_fire; - byte dmg_water; - byte dmg_wind; - byte dmg_earth; - byte dmg_poison; - byte dmg_lightning; - byte dmg_ice; - byte dmg_arcane; - byte dmg_corrupted; - byte dmg_holy; - byte dmg_reflection; - byte dmg_reflection_chance; - - byte dmg_crit; - byte dmg_crit_chance; - - // Health & Resource - byte health; - byte health_on_dmg_dealt; - byte health_on_dmg_taken; - - byte health_regen; - byte health_regen_on_dmg_dealt; - byte health_regen_on_dmg_taken; - - byte resource; - byte resource_on_dmg_dealt; - byte resource_on_dmg_taken; - - byte resource_regen; - byte resource_regen_on_dmg_dealt; - byte resource_regen_on_dmg_taken; - - byte resource_loss; - byte resource_loss_on_dmg_dealt; - byte resource_loss_on_dmg_taken; - - // Defense types - // think about it as armor and/or resistence if it helps - byte defense_pircing; - byte defense_slashing; - byte defense_bludgeoning; - byte defense_stabbing; - byte defense_fire; - byte defense_water; - byte defense_ice; - byte defense_earth; - byte defense_wind; - byte defense_poison; - byte defense_lightning; - byte defense_holy; - byte defense_arcane; - byte defense_corrupted; - - // Accuracy - byte dodge_chance; - byte cc_protection; - byte miss_chance; - - // Movement - // Additional speeds may be defined for Mobs - byte speed_walk1; - byte speed_swim1; - byte speed_fly1; - - // Fighting speed - byte speed_cast; - byte speed_attack; - - byte pickup_range; - - byte shield; - - byte aoe_scale; - byte resource_cost; - byte health_cost; - byte attack_range; - byte melee_range; - byte projectile_speed; - byte projectile_count; - byte shatter_probability; - byte shatter_range; - byte shatter_dmg; - byte shatter_count; - byte passthrough_damage; - byte passthrough_count; - byte dot_duration; - byte dot_count; - byte bleeding_dot; - byte poison_dot; - byte burn_dot; - byte ice_dot; - byte resource_drain; - byte shatter_dot; - byte minon_duration; - byte minion_count; - byte effect_spreading_probability; - byte effect_spreading_radius; - byte effect_spreading_max_count; - byte effect_duration; - byte aura_range; - byte cast_duration; - - byte agro_range; -}; - -struct FixedStats { - // Movement - // Additional speeds may be defined for Mobs - float speed_walk1; - float speed_swim1; - float speed_fly1; - - f32 speed_jump; - f32 speed_dodge; - f32 speed_turn; -}; - -// @question Do we even want this? -struct PlayerStats { - f32 pickup_range; -}; - -struct SMobStatsTotal { - PrimaryStatsPoints primary_total; - SecondaryStatsValues secondary_total; - - FixedStats fixed_total; - - uint32 shield_type; - uint32 shield; - bool shield_dispellable; -}; - -struct SMobStatsTotalCached { - PrimaryStatsPoints primary_total; - PrimaryStatsPoints primary_char; // Only recalculated when char stats change - PrimaryStatsPoints primary_skill; // Only recalculated when skill effect runs out - PrimaryStatsPoints primary_item; // Only recalculated when item changes - PrimaryStatsPoints primary_effect; // External e.g. from mob or ally - - SecondaryStatsValues secondary_total; - SecondaryStatsValues secondary_char; // Only recalculated when char stats change - SecondaryStatsValues secondary_skill; // Only recalculated when skill effect runs out - SecondaryStatsValues secondary_item; // Only recalculated when item changes - SecondaryStatsValues secondary_effect; // External e.g. from mob or ally -}; - struct SMobStatsPoints { + // @todo Add min max for dmg + // Every attack should have a damage range (maybe 5%?) + // Self stats PrimaryStatsPoints primary_stats; SecondaryStatsPoints secondary_stats; // @todo this is bad, a char doesn't have fire dmg but might have crit chance ... needs to split? // Item modifiers PrimaryStatsPoints item_primary_add; - PrimaryStatsRelPoints item_primary_mul; + PrimaryStatsPoints item_primary_mul; SecondaryStatsPoints item_secondary_add; - SecondaryStatsRelPoints item_secondary_mul; + SecondaryStatsPoints item_secondary_mul; // Skill modifiers PrimaryStatsPoints skill_primary_add; - PrimaryStatsRelPoints skill_primary_mul; + PrimaryStatsPoints skill_primary_mul; SecondaryStatsPoints skill_secondary_add; - SecondaryStatsRelPoints skill_secondary_mul; + SecondaryStatsPoints skill_secondary_mul; }; #endif \ No newline at end of file diff --git a/models/mob/MobStatsType.h b/models/mob/MobStatsType.h new file mode 100644 index 0000000..18fbbf6 --- /dev/null +++ b/models/mob/MobStatsType.h @@ -0,0 +1,37 @@ +/** + * Jingga + * + * @copyright Jingga + * @license OMS License 2.0 + * @version 1.0.0 + * @link https://jingga.app + */ +#ifndef TOS_MODELS_MOB_STATS_TYPE_H +#define TOS_MODELS_MOB_STATS_TYPE_H + +// physical +#define MOB_STATS_TYPE_SLASHING 1 +#define MOB_STATS_TYPE_BLUDGEONING 2 +#define MOB_STATS_TYPE_STABBING 3 + +// elemental +#define MOB_STATS_TYPE_FIRE 4 +#define MOB_STATS_TYPE_WATER 5 +#define MOB_STATS_TYPE_WIND 6 +#define MOB_STATS_TYPE_EARTH 7 +#define MOB_STATS_TYPE_POISON 8 +#define MOB_STATS_TYPE_LIGHTNING 9 +#define MOB_STATS_TYPE_ICE 10 + +// magic +#define MOB_STATS_TYPE_ARCANE 11 +#define MOB_STATS_TYPE_CORRUPTED 12 +#define MOB_STATS_TYPE_HOLY 13 + +#define MOB_STATS_TYPE_SIZE 13 + +#define MOB_STATS_TYPE_PHYSICAL 14 +#define MOB_STATS_TYPE_MAGICAL 15 +#define MOB_STATS_TYPE_ELEMENTAL 16 + +#endif \ No newline at end of file diff --git a/models/mob/PrimaryStatsPoints.cpp b/models/mob/PrimaryStatsPoints.cpp new file mode 100644 index 0000000..653db0a --- /dev/null +++ b/models/mob/PrimaryStatsPoints.cpp @@ -0,0 +1,25 @@ +/** + * Jingga + * + * @copyright Jingga + * @license OMS License 2.0 + * @version 1.0.0 + * @link https://jingga.app + */ +#ifndef TOS_MODELS_MOB_PRIMARY_STATS_POINTS_C +#define TOS_MODELS_MOB_PRIMARY_STATS_POINTS_C + +#include "../../stdlib/simd/SIMD_I8.h" +#include "PrimaryStatsPoints.h" + +void calculate_primary_values(const PrimaryStatsPoints* points, PrimaryStatsValues* values, int step = 8) +{ + simd_mult((int16 *) points, 1.3f, (int32 *) values, sizeof(PrimaryStatsPoints), step); +} + +void calculate_primary_relatives(const PrimaryStatsPoints* points, PrimaryStatsRelValues* values, int step = 8) +{ + simd_mult((int16 *) points, 0.01f, (int32 *) values, sizeof(PrimaryStatsPoints), step); +} + +#endif \ No newline at end of file diff --git a/models/mob/PrimaryStatsPoints.h b/models/mob/PrimaryStatsPoints.h new file mode 100644 index 0000000..f4371f1 --- /dev/null +++ b/models/mob/PrimaryStatsPoints.h @@ -0,0 +1,36 @@ +/** + * Jingga + * + * @copyright Jingga + * @license OMS License 2.0 + * @version 1.0.0 + * @link https://jingga.app + */ +#ifndef TOS_MODELS_MOB_PRIMARY_STATS_POINTS_H +#define TOS_MODELS_MOB_PRIMARY_STATS_POINTS_H + +#include "../../stdlib/Types.h" + +#define PRIMARY_STAT_SIZE 7 +static const int PRIMARY_STAT_INDICES[] = {0, 1, 2, 3, 4, 5, 6, 7}; + +// Character stats modifiable through leveling (simple +/- buttons) +struct PrimaryStatsPoints { + uint16 stat_str; // strength : effects health + base damage + uint16 stat_int; // inteligence : effects resource + base demage + uint16 stat_acc; // accuracy : effects critical chance + base damage + miss chance + uint16 stat_agi; // agility : effects resource + base damage + dodge chance + // @todo not implemented in database + uint16 stat_def; // defense : effects resource + base defense + dodge chance + uint16 stat_sta; // stamina : effects health regen + resource regen + uint16 stat_dex; // dexterity : effects health regen + resource regen + // @question do we need dex and acc or only one? +}; + +struct PrimaryStatsValues { +}; + +struct PrimaryStatsRelValues { +}; + +#endif \ No newline at end of file diff --git a/models/mob/SecondaryStatsPoints.cpp b/models/mob/SecondaryStatsPoints.cpp new file mode 100644 index 0000000..5b2b9d0 --- /dev/null +++ b/models/mob/SecondaryStatsPoints.cpp @@ -0,0 +1,25 @@ +/** + * Jingga + * + * @copyright Jingga + * @license OMS License 2.0 + * @version 1.0.0 + * @link https://jingga.app + */ +#ifndef TOS_MODELS_MOB_SECONDARY_STATS_POINTS_C +#define TOS_MODELS_MOB_SECONDARY_STATS_POINTS_C + +#include "../../stdlib/simd/SIMD_I8.h" +#include "SecondaryStatsPoints.h" + +void calculate_primary_values(const SecondaryStatsPoints* points, SecondaryStatsValues* values, int step = 8) +{ + simd_mult((int16 *) points, 1.3f, (int32 *) values, sizeof(SecondaryStatsPoints), step); +} + +void calculate_primary_relatives(const SecondaryStatsRelPoints* points, SecondaryStatsRelValues* values, int step = 8) +{ + simd_mult((int16 *) points, 0.01f, (int32 *) values, sizeof(SecondaryStatsPoints), step); +} + +#endif diff --git a/models/mob/SecondaryStatsPoints.h b/models/mob/SecondaryStatsPoints.h new file mode 100644 index 0000000..29c22cf --- /dev/null +++ b/models/mob/SecondaryStatsPoints.h @@ -0,0 +1,348 @@ +/** + * Jingga + * + * @copyright Jingga + * @license OMS License 2.0 + * @version 1.0.0 + * @link https://jingga.app + */ +#ifndef TOS_MODELS_MOB_SECONDARY_STATS_POINTS_H +#define TOS_MODELS_MOB_SECONDARY_STATS_POINTS_H + +#include "../../stdlib/Types.h" +#include "MobStatsType.h" + +#define SECONDARY_STAT_SIZE 90 +static const int SECONDARY_STAT_INDICES[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, + 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, + 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, + 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, + 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, + 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, + 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, +}; + +/** + * @todo optimize order of struct members to ensure optimal struct size + */ + +// Character stats modifiable thorugh skill tree? +struct SecondaryStatsPoints { + /* + @todo + Composite damage types would allow us to combine skills of different players (e.g. arrow flies through fire -> adds fire damage flag) + Obviously this doesn't increase the damage directly but can have a positive impact if the enemy has low fire resistance for example + + @question what happens if a skill has two flags (fire&slashing) and the enemy has high resistance vs slashing. + Does this mean the damage is reduced, does it reduce by "50%" or does it only reduce the min of fire&slashing resistance. + -> if you have no slashing resistance you still take full damage + */ + + // Damage types + // This allows us to create skills with multiple additive damage types AND composite damage that has multiple types at the same time + uint16 dmg[MOB_STATS_TYPE_SIZE]; + + uint16 dmg_reflection; + uint16 dmg_reflection_chance; + + // @question is this a damage number or is this a % number of the total damage? + uint16 dmg_crit; + uint16 dmg_crit_chance; + + // @question is this similar to the different damage categories, is this a % of the total damage or should this just be a flag + uint16 dmg_pircing; + + // Health & Resource + uint16 health; + uint16 health_on_dmg_dealt; + uint16 health_on_dmg_taken; + + uint16 health_regen; + uint16 health_regen_rel; + uint16 health_regen_on_dmg_dealt; + uint16 health_regen_on_dmg_taken; + + uint16 resource; + uint16 resource_on_dmg_dealt; + uint16 resource_on_dmg_taken; + + uint16 resource_regen; + uint16 resource_regen_rel; + uint16 resource_regen_on_dmg_dealt; + uint16 resource_regen_on_dmg_taken; + + uint16 resource_loss; + uint16 resource_loss_on_dmg_dealt; + uint16 resource_loss_on_dmg_taken; + + // Defense types (resistances, armor, or whatever you want to call it) + uint16 defense[MOB_STATS_TYPE_SIZE]; + + // Accuracy + uint16 block_chance; + uint16 block_amount; + + uint16 dodge_chance; + uint16 cc_protection; + uint16 miss_chance; + + // Movement + // Additional speeds may be defined for Mobs + uint16 speed_walk1; + uint16 speed_swim1; + uint16 speed_fly1; + + // Fighting speed + uint16 speed_cast; + uint16 speed_attack; + + uint16 pickup_range; + + uint16 shield; + + // modifier + uint16 aoe_scale; + uint16 resource_cost; + uint16 health_cost; + uint16 attack_range; + uint16 melee_range; + uint16 projectile_speed; + uint16 projectile_count; + uint16 shatter_probability; + uint16 shatter_range; + uint16 shatter_dmg; + uint16 shatter_count; + uint16 passthrough_damage; + uint16 passthrough_count; + uint16 dot_duration; + uint16 dot_count; + uint16 bleeding_dot; + uint16 poison_dot; + uint16 burn_dot; + uint16 ice_dot; + uint16 resource_drain; + uint16 shatter_dot; + uint16 minion_duration; + uint16 minion_count; + uint16 effect_spreading_probability; + uint16 effect_spreading_radius; + uint16 effect_spreading_max_count; + uint16 effect_duration; + uint16 aura_range; + uint16 cast_duration; + + // special + uint16 aggro_range; +}; + +struct SecondaryStatsPoints2 { + /* + @todo + Composite damage types would allow us to combine skills of different players (e.g. arrow flies through fire -> adds fire damage flag) + Obviously this doesn't increase the damage directly but can have a positive impact if the enemy has low fire resistance for example + + @question what happens if a skill has two flags (fire&slashing) and the enemy has high resistance vs slashing. + Does this mean the damage is reduced, does it reduce by "50%" or does it only reduce the min of fire&slashing resistance. + -> if you have no slashing resistance you still take full damage + */ + + // Damage types + // This allows us to create skills with multiple additive damage types AND composite damage that has multiple types at the same time + byte damage[3]; + byte damage_flag[3 * 5]; // 3 * 5 = 15, every damage component can have up to 3 damage types and a limited amount from others + + byte dmg_reflection; + byte dmg_reflection_chance; + + // @question is this a damage number or is this a % number of the total damage? + byte dmg_crit; + byte dmg_crit_chance; + + // @question is this similar to the different damage categories, is this a % of the total damage or should this just be a flag + byte dmg_pircing; + + // Health & Resource + byte health; + byte health_on_dmg_dealt; + byte health_on_dmg_taken; + + byte health_regen; + byte health_regen_rel; + byte health_regen_on_dmg_dealt; + byte health_regen_on_dmg_taken; + + byte resource; + byte resource_on_dmg_dealt; + byte resource_on_dmg_taken; + + byte resource_regen; + byte resource_regen_rel; + byte resource_regen_on_dmg_dealt; + byte resource_regen_on_dmg_taken; + + byte resource_loss; + byte resource_loss_on_dmg_dealt; + byte resource_loss_on_dmg_taken; + + // Defense types (resistances, armor, or whatever you want to call it) + byte defense[MOB_STATS_TYPE_SIZE]; + + // Accuracy + byte block_chance; + byte block_amount; + + byte dodge_chance; + byte cc_protection; + byte miss_chance; + + // Movement + // Additional speeds may be defined for Mobs + byte speed_walk1; + byte speed_swim1; + byte speed_fly1; + + // Fighting speed + byte speed_cast; + byte speed_attack; + + byte pickup_range; + + byte shield; + + // modifier + byte aoe_scale; + byte resource_cost; + byte health_cost; + byte attack_range; + byte melee_range; + byte projectile_speed; + byte projectile_count; + byte shatter_probability; + byte shatter_range; + byte shatter_dmg; + byte shatter_count; + byte passthrough_damage; + byte passthrough_count; + byte dot_duration; + byte dot_count; + byte bleeding_dot; + byte poison_dot; + byte burn_dot; + byte ice_dot; + byte resource_drain; + byte shatter_dot; + byte minion_duration; + byte minion_count; + byte effect_spreading_probability; + byte effect_spreading_radius; + byte effect_spreading_max_count; + byte effect_duration; + byte aura_range; + byte cast_duration; + + // special + byte aggro_range; +}; + +struct SecondaryStatsRelPoints2 { + // Damage types + byte damage[3]; + + byte dmg_reflection; + byte dmg_reflection_chance; + + byte dmg_crit; + byte dmg_crit_chance; + + byte dmg_pircing; + + // Health & Resource + byte health; + byte health_on_dmg_dealt; + byte health_on_dmg_taken; + + byte health_regen; + byte health_regen_on_dmg_dealt; + byte health_regen_on_dmg_taken; + + byte resource; + byte resource_on_dmg_dealt; + byte resource_on_dmg_taken; + + byte resource_regen; + byte resource_regen_on_dmg_dealt; + byte resource_regen_on_dmg_taken; + + byte resource_loss; + byte resource_loss_on_dmg_dealt; + byte resource_loss_on_dmg_taken; + + // Defense types + // think about it as armor and/or resistence if it helps + byte defense[MOB_STATS_TYPE_SIZE]; + + // Accuracy + byte block_chance; + byte block_amount; + + byte dodge_chance; + byte cc_protection; + byte miss_chance; + + // Movement + // Additional speeds may be defined for Mobs + byte speed_walk1; + byte speed_swim1; + byte speed_fly1; + + // Fighting speed + byte speed_cast; + byte speed_attack; + + byte pickup_range; + + byte shield; + + byte aoe_scale; + byte resource_cost; + byte health_cost; + byte attack_range; + byte melee_range; + byte projectile_speed; + byte projectile_count; + byte shatter_probability; + byte shatter_range; + byte shatter_dmg; + byte shatter_count; + byte passthrough_damage; + byte passthrough_count; + byte dot_duration; + byte dot_count; + byte bleeding_dot; + byte poison_dot; + byte burn_dot; + byte ice_dot; + byte resource_drain; + byte shatter_dot; + byte minion_duration; + byte minion_count; + byte effect_spreading_probability; + byte effect_spreading_radius; + byte effect_spreading_max_count; + byte effect_duration; + byte aura_range; + byte cast_duration; + + byte aggro_range; +}; + +struct SecondaryStatsValues { +}; + +struct SecondaryStatsRelValues { +}; + +#endif \ No newline at end of file diff --git a/models/mob/mob_category.h b/models/mob/_mob_category.h similarity index 100% rename from models/mob/mob_category.h rename to models/mob/_mob_category.h diff --git a/models/mob/mob_list.h b/models/mob/_mob_list.h similarity index 100% rename from models/mob/mob_list.h rename to models/mob/_mob_list.h diff --git a/models/mob/player/Player.h b/models/mob/player/Player.h index b1798d9..b8a3766 100644 --- a/models/mob/player/Player.h +++ b/models/mob/player/Player.h @@ -29,7 +29,7 @@ #if SERVER struct SPlayer { Mob mob; - SMobStats player_stats; + SMobStatsPoints player_stats; char name[MAX_CHAR_NAME_LENGTH]; char title[MAX_CHAR_TITLE_LENGTH]; @@ -92,7 +92,7 @@ struct CPlayer { Mob mob; - CMobStats player_stats; + SMobStatsPoints player_stats; char name[MAX_CHAR_NAME_LENGTH]; char title[MAX_CHAR_TITLE_LENGTH]; diff --git a/models/mob/player/PlayerStats.h b/models/mob/player/PlayerStats.h new file mode 100644 index 0000000..a4ab004 --- /dev/null +++ b/models/mob/player/PlayerStats.h @@ -0,0 +1,19 @@ +/** + * Jingga + * + * @copyright Jingga + * @license OMS License 2.0 + * @version 1.0.0 + * @link https://jingga.app + */ +#ifndef TOS_MODELS_MOB_PLAYER_STATS_H +#define TOS_MODELS_MOB_PLAYER_STATS_H + +#include "../../../stdlib/Types.h" + +// @question Do we even want this? +struct PlayerStats { + f32 pickup_range; +}; + +#endif \ No newline at end of file diff --git a/models/mob/player/PlayerXPRequirement.h b/models/mob/player/PlayerXPRequirement.h new file mode 100644 index 0000000..f986fb5 --- /dev/null +++ b/models/mob/player/PlayerXPRequirement.h @@ -0,0 +1,18 @@ +/** + * Jingga + * + * @copyright Jingga + * @license OMS License 2.0 + * @version 1.0.0 + * @link https://jingga.app + */ +#ifndef TOS_MODELS_MOB_PLAYER_XP_REQUIREMENT_H +#define TOS_MODELS_MOB_PLAYER_XP_REQUIREMENT_H + +#include "../../../stdlib/Types.h" + +struct PlayerXPRequirement { + int xp; +}; + +#endif \ No newline at end of file diff --git a/models/mob/player/_player_class.h b/models/mob/player/_player_class.h new file mode 100644 index 0000000..33a934d --- /dev/null +++ b/models/mob/player/_player_class.h @@ -0,0 +1,8 @@ +#ifndef TOS_MODELS_MOB_PLAYER_CLASS_H +#define TOS_MODELS_MOB_PLAYER_CLASS_H + +#define PLAYER_CLASS_MAGE 1 + +#define PLAYER_CLASS_SIZE 24 + +#endif \ No newline at end of file diff --git a/models/mob/skill/Skill.h b/models/mob/skill/Skill.h index 194a2e5..3d88952 100644 --- a/models/mob/skill/Skill.h +++ b/models/mob/skill/Skill.h @@ -27,6 +27,7 @@ struct Skill // @todo animations void* animation_casting; void* animation_channeling; + void* icon; // @todo e.g. attack command, movement command, etc. for totems and minions void* commands; @@ -63,34 +64,34 @@ struct Skill // You can have 2 stats for 2 target types (e.g. you could create a buff and debuff in one skill) // 1 PrimaryStatsPoints stats1_primary_add; - PrimaryStatsRelPoints stats1_primary_mul; + PrimaryStatsPoints stats1_primary_mul; SecondaryStatsPoints stats1_secondary_add; - SecondaryStatsRelPoints stats1_secondary_mul; + SecondaryStatsPoints stats1_secondary_mul; StatsTarget stats1_target; // 2 PrimaryStatsPoints stats2_primary_add; - PrimaryStatsRelPoints stats2_primary_mul; + PrimaryStatsPoints stats2_primary_mul; SecondaryStatsPoints stats2_secondary_add; - SecondaryStatsRelPoints stats2_secondary_mul; + SecondaryStatsPoints stats2_secondary_mul; StatsTarget stats2_target; // Modifiers // Char PrimaryStatsPoints primary_char_add; - PrimaryStatsRelPoints primary_char_mul; + PrimaryStatsPoints primary_char_mul; SecondaryStatsPoints secondary_char_add; - SecondaryStatsRelPoints secondary_char_mul; + SecondaryStatsPoints secondary_char_mul; // Item PrimaryStatsPoints primary_item_add; - PrimaryStatsRelPoints primary_item_mul; + PrimaryStatsPoints primary_item_mul; SecondaryStatsPoints secondary_item_add; - SecondaryStatsRelPoints secondary_item_mul; + SecondaryStatsPoints secondary_item_mul; int skill_movement; // none, follows target, random moevement, random movement in aoe // @todo how to make specific custom movement pattern for boss fights @@ -109,6 +110,7 @@ struct Skill bool is_range; void* attack_anim; + int movement_pattern; // the skill moves in a specific pattern (e.g. straight line, random, circular motion, left/right wave, ...) bool is_melee; diff --git a/models/object/object_list.h b/models/object/_object_list.h similarity index 100% rename from models/object/object_list.h rename to models/object/_object_list.h diff --git a/models/object/object_types.h b/models/object/_object_types.h similarity index 100% rename from models/object/object_types.h rename to models/object/_object_types.h diff --git a/models/settings/Settings.h b/models/settings/Settings.h index 2c2dc81..8fb2ebc 100644 --- a/models/settings/Settings.h +++ b/models/settings/Settings.h @@ -70,6 +70,13 @@ struct SSettings { uint32 message_cache = 1024; uint32 interpolation_buffer; + + bool is_auction_house_enabled = true; + bool is_direct_trading_enabled = true; + + // @todo add more server settings for tournaments, tournament modes + // @todo add more server settings for raids and dungeons + // @todo add more settings for pvp }; // Player settings that the server needs to know about @@ -94,6 +101,7 @@ struct CSettings { byte gpu_api = SETTING_TYPE_GPU_API_NONE; byte gpu_type = SETTING_TYPE_GPU_MEDIUM; byte gpu_fps = SETTING_TYPE_UNLIMITED; + byte gpu_memory = 4; byte gpu_aspect_ratio; byte gpu_resolution; diff --git a/models/settings/client_high.cfg b/models/settings/client_high.cfg index e69de29..ee48d10 100644 --- a/models/settings/client_high.cfg +++ b/models/settings/client_high.cfg @@ -0,0 +1,7 @@ +texutre_count_8192x8192 +texutre_count_4096x4096 +texutre_count_2048x2048 +texutre_count_1024x1024 +texutre_count_512x512 +texutre_count_256x256 +texutre_count_128x128 \ No newline at end of file diff --git a/network/Client.h b/network/Client.h index 1322d22..a658fcc 100644 --- a/network/Client.h +++ b/network/Client.h @@ -15,7 +15,7 @@ #include "SocketConnection.h" #include "../stdlib/Types.h" -#include "../utils/RingMemory.h" +#include "../memory/RingMemory.h" #if _WIN32 #include diff --git a/network/packet/PacketCache.h b/network/packet/PacketCache.h index 1aafc0c..2fc391c 100644 --- a/network/packet/PacketCache.h +++ b/network/packet/PacketCache.h @@ -9,7 +9,7 @@ #ifndef TOS_NETWORK_PACKET_CACHE_H #define TOS_NETWORK_PACKET_CACHE_H -#include "../../utils/RingMemory.h" +#include "../../memory/RingMemory.h" #include "../../utils/BufferMemory.h" #if _WIN32 diff --git a/platform/linux/UtilsLinux.h b/platform/linux/UtilsLinux.h index 179a10a..3080bf8 100644 --- a/platform/linux/UtilsLinux.h +++ b/platform/linux/UtilsLinux.h @@ -72,7 +72,7 @@ uint64 last_modified(const char* filename) } inline -void file_read(const char* filename, file_body* file) +void file_read(const char* filename, FileBody* file, RingMemory* ring = NULL) { FILE *fp = fopen(filename, "rb"); fseek(fp, 0, SEEK_END); @@ -80,6 +80,10 @@ void file_read(const char* filename, file_body* file) file->size = ftell(fp); rewind(fp); + if (ring != NULL) { + file->content = ring_get_memory(ring, file->size); + } + fread(file->content, 1, file->size, fp); fclose(fp); @@ -92,11 +96,6 @@ uint64_t file_read_struct(const char* filename, void* file, uint32 size) { return 0; } - fseek(fp, 0, SEEK_END); - long fsize = ftell(fp); - fseek(fp, 0, SEEK_SET); - - ASSERT_SIMPLE(fsize > size); size_t read_bytes = fread(file, 1, size, fp); fclose(fp); @@ -104,7 +103,7 @@ uint64_t file_read_struct(const char* filename, void* file, uint32 size) { } inline -bool file_write(const char* filename, const file_body* file) { +bool file_write(const char* filename, const FileBody* file) { FILE *fp = fopen(filename, "wb"); if (!fp) { return false; @@ -186,7 +185,7 @@ inline bool file_append(FILE* fp, const char* file) { return written == length; } -inline bool file_append(const char* filename, const file_body* file) { +inline bool file_append(const char* filename, const FileBody* file) { FILE *fp = get_append_handle(filename); if (!fp) { return false; @@ -212,6 +211,11 @@ void self_path(char* path) { inline void relative_to_absolute(const char* rel, char* path) { + const char* temp = rel; + if (temp[0] == '.' && temp[1] == '/') { + temp += 2; + } + char self_path[MAX_PATH]; ssize_t count = readlink("/proc/self/exe", self_path, MAX_PATH - 1); if (count == -1) { @@ -224,7 +228,7 @@ inline void relative_to_absolute(const char* rel, char* path) *(last + 1) = '\0'; } - snprintf(path, MAX_PATH, "%s%s", self_path, rel); + snprintf(path, MAX_PATH, "%s%s", self_path, temp); } inline diff --git a/platform/win32/UtilsWin32.h b/platform/win32/UtilsWin32.h index 71aacc0..7dbd801 100644 --- a/platform/win32/UtilsWin32.h +++ b/platform/win32/UtilsWin32.h @@ -25,6 +25,7 @@ inline uint64 file_size(const char* filename) { + // @performance Profile against fseek strategy HANDLE fp = CreateFileA((LPCSTR) filename, GENERIC_READ, FILE_SHARE_READ, @@ -48,7 +49,7 @@ file_size(const char* filename) } inline void -file_read(const char* filename, file_body* file) +file_read(const char* filename, FileBody* file, RingMemory* ring = NULL) { HANDLE fp = CreateFileA((LPCSTR) filename, GENERIC_READ, @@ -71,6 +72,10 @@ file_read(const char* filename, file_body* file) return; } + if (ring != NULL) { + file->content = ring_get_memory(ring, size.QuadPart); + } + DWORD bytes; ASSERT_SIMPLE(size.QuadPart < MAX_INT32); if (!ReadFile(fp, file->content, (uint32) size.QuadPart, &bytes, NULL)) { @@ -122,7 +127,7 @@ file_read_struct(const char* filename, void* file, uint32 size) } inline bool -file_write(const char* filename, const file_body* file) +file_write(const char* filename, const FileBody* file) { HANDLE fp = CreateFileA((LPCSTR) filename, GENERIC_WRITE, @@ -247,7 +252,7 @@ file_append(HANDLE fp, const char* file) } inline bool -file_append(const char* filename, const file_body* file) +file_append(const char* filename, const FileBody* file) { HANDLE fp = CreateFileA((LPCSTR) filename, FILE_APPEND_DATA, @@ -305,12 +310,17 @@ inline void relative_to_absolute(const char* rel, char* path) return; } + const char* temp = rel; + if (temp[0] == '.' && temp[1] == '/') { + temp += 2; + } + char* last = strrchr(self_path, '\\'); if (last != NULL) { *(last + 1) = '\0'; } - snprintf(path, MAX_PATH, "%s%s", self_path, rel); + snprintf(path, MAX_PATH, "%s%s", self_path, temp); } void log_to_file(LogPool* logs, HANDLE fp) diff --git a/platform/win32/input/RawInput.h b/platform/win32/input/RawInput.h index cb0e123..59c881a 100644 --- a/platform/win32/input/RawInput.h +++ b/platform/win32/input/RawInput.h @@ -161,24 +161,14 @@ void handle_input(LPARAM lParam, InputState* states) // https://learn.microsoft.com/en-us/windows/win32/api/winuser/ns-winuser-rawkeyboard - RAWKEYBOARD rawKB = raw->data.keyboard; + RAWKEYBOARD rawKB = raw->data.keyboard; - states[i].key = raw->data.keyboard.MakeCode; - states[i].key_up = raw->data.keyboard.Flags & RI_KEY_BREAK; - states[i].key_down = raw->data.keyboard.Flags & RI_KEY_MAKE; + if (rawKB.Flags & RI_KEY_BREAK) { + states[i].keys_down_old[states[i].up_index++] = rawKB.MakeCode; + } - if (states[i].key_down) { - for (int j = 0; j < MAX_KEY_PRESSES; ++j) { - if (states[i].keys_down[j] == NULL) { - states[i].keys_down[j] = states[i].key; - } - } - } else if (states[i].key_up) { - for (int j = 0; j < MAX_KEY_PRESSES; ++j) { - if (states[i].keys_down[j] == states[i].key) { - states[i].keys_down[j] = NULL; - } - } + if (rawKB.Flags & RI_KEY_MAKE) { + states[i].keys_down[states[i].down_index++] = rawKB.MakeCode; } states[i].state_change_keyboard = true; diff --git a/platform/win32/input/XInput.h b/platform/win32/input/XInput.h index f865bcd..ee3bf62 100644 --- a/platform/win32/input/XInput.h +++ b/platform/win32/input/XInput.h @@ -102,19 +102,19 @@ void handle_controller_input(ControllerState* states) states[controller_index].down = controller_state.Gamepad.wButtons & XINPUT_GAMEPAD_DPAD_DOWN; states[controller_index].left = controller_state.Gamepad.wButtons & XINPUT_GAMEPAD_DPAD_LEFT; states[controller_index].right = controller_state.Gamepad.wButtons & XINPUT_GAMEPAD_DPAD_RIGHT; - states[controller_index].start = controller_state.Gamepad.wButtons & XINPUT_GAMEPAD_START; - states[controller_index].back = controller_state.Gamepad.wButtons & XINPUT_GAMEPAD_BACK; + states[controller_index].button[6] = controller_state.Gamepad.wButtons & XINPUT_GAMEPAD_START; + states[controller_index].button[7] = controller_state.Gamepad.wButtons & XINPUT_GAMEPAD_BACK; - states[controller_index].shoulder_left = controller_state.Gamepad.wButtons & XINPUT_GAMEPAD_LEFT_SHOULDER; - states[controller_index].shoulder_right = controller_state.Gamepad.wButtons & XINPUT_GAMEPAD_RIGHT_SHOULDER; + states[controller_index].button[4] = controller_state.Gamepad.wButtons & XINPUT_GAMEPAD_LEFT_SHOULDER; + states[controller_index].button[5] = controller_state.Gamepad.wButtons & XINPUT_GAMEPAD_RIGHT_SHOULDER; - states[controller_index].trigger_left = controller_state.Gamepad.bLeftTrigger; - states[controller_index].trigger_right = controller_state.Gamepad.bRightTrigger; + states[controller_index].trigger[0] = controller_state.Gamepad.bLeftTrigger; + states[controller_index].trigger[1] = controller_state.Gamepad.bRightTrigger; - states[controller_index].button_a = controller_state.Gamepad.wButtons & XINPUT_GAMEPAD_A; - states[controller_index].button_b = controller_state.Gamepad.wButtons & XINPUT_GAMEPAD_B; - states[controller_index].button_x = controller_state.Gamepad.wButtons & XINPUT_GAMEPAD_X; - states[controller_index].button_y = controller_state.Gamepad.wButtons & XINPUT_GAMEPAD_Y; + states[controller_index].button[0] = controller_state.Gamepad.wButtons & XINPUT_GAMEPAD_A; + states[controller_index].button[1] = controller_state.Gamepad.wButtons & XINPUT_GAMEPAD_B; + states[controller_index].button[2] = controller_state.Gamepad.wButtons & XINPUT_GAMEPAD_X; + states[controller_index].button[3] = controller_state.Gamepad.wButtons & XINPUT_GAMEPAD_Y; states[controller_index].stickl_x = controller_state.Gamepad.sThumbLX; states[controller_index].stickl_y = controller_state.Gamepad.sThumbLY; diff --git a/stdlib/simd/SIMD_F32.h b/stdlib/simd/SIMD_F32.h index 16ccd14..7a7e172 100644 --- a/stdlib/simd/SIMD_F32.h +++ b/stdlib/simd/SIMD_F32.h @@ -13,6 +13,7 @@ #include #include "../Types.h" +#include "SIMD_SVML.h" struct f32_4 { union { @@ -990,144 +991,214 @@ void simd_mult(const f32* a, const f32* b, f32* result, int size, int steps) int i = 0; if (steps == 16) { - f32_16 a_16; - f32_16 b_16; - f32_16 result_16; + __m512 a_16; + __m512 b_16; + __m512 result_16; - for (i = 0; i <= size - steps; i += steps) { - ++a; - ++b; - ++result; + for (; i <= size - steps; i += steps) { + a_16 = _mm512_loadu_ps(a); + b_16 = _mm512_loadu_ps(b); + result_16 = _mm512_mul_ps(a_16, b_16); + _mm512_store_ps(result, result_16); - a_16 = load_f32_16(a); - b_16 = load_f32_16(b); - result_16 = a_16 * b_16; - unload_f32_16(result_16, result); + a += steps; + b += steps; + result += steps; } } else if (steps == 8) { - f32_8 a_8; - f32_8 b_8; - f32_8 result_8; + __m256 a_8; + __m256 b_8; + __m256 result_8; - for (i = 0; i <= size - steps; i += steps) { - ++a; - ++b; - ++result; + for (; i <= size - steps; i += steps) { + a_8 = _mm256_loadu_ps(a); + b_8 = _mm256_loadu_ps(b); + result_8 = _mm256_mul_ps(a_8, b_8); + _mm256_store_ps(result, result_8); - a_8 = load_f32_8(a); - b_8 = load_f32_8(b); - result_8 = a_8 * b_8; - unload_f32_8(result_8, result); + a += steps; + b += steps; + result += steps; } } else if (steps == 4) { - f32_4 a_4; - f32_4 b_4; - f32_4 result_4; + __m128 a_4; + __m128 b_4; + __m128 result_4; - for (i = 0; i <= size - steps; i += steps) { - ++a; - ++b; - ++result; + for (; i <= size - steps; i += steps) { + a_4 = _mm_loadu_ps(a); + b_4 = _mm_loadu_ps(b); + result_4 = _mm_mul_ps(a_4, b_4); + _mm_store_ps(result, result_4); - a_4 = load_f32_4(a); - b_4 = load_f32_4(b); - result_4 = a_4 * b_4; - unload_f32_4(result_4, result); + a += steps; + b += steps; + result += steps; } } for (; i < size; ++i) { + *result = *a * *b; + ++a; ++b; ++result; - - *result = *a * *b; } } inline -void f32_4_mult(const f32* a, const f32* b, f32* result) -{ - f32_4 a_4 = load_f32_4(a); - f32_4 b_4 = load_f32_4(b); - f32_4 result_4 = a_4 * b_4; - - unload_f32_4(result_4, result); -} - -inline -void simd_mult(const f32* a, const f32* b, f32* result, int size, int steps) +void simd_mult(const f32* a, f32 b, f32* result, int size, int steps) { int i = 0; if (steps == 16) { - f32_16 a_16; - f32_16 b_16; - f32_16 result_16; + __m512 a_16; + __m512 b_16 = _mm512_set1_ps(b); + __m512 result_16; - for (i = 0; i <= size - steps; i += steps) { - ++a; - ++b; - ++result; + for (; i <= size - steps; i += steps) { + a_16 = _mm512_loadu_ps(a); + result_16 = _mm512_mul_ps(a_16, b_16); + _mm512_store_ps(result, result_16); - a_16 = load_f32_16(a); - b_16 = load_f32_16(b); - result_16 = a_16 + b_16; - unload_f32_16(result_16, result); + a += steps; + result += steps; } } else if (steps == 8) { - f32_8 a_8; - f32_8 b_8; - f32_8 result_8; + __m256 a_8; + __m256 b_8 = _mm256_set1_ps(b); + __m256 result_8; - for (i = 0; i <= size - steps; i += steps) { - ++a; - ++b; - ++result; + for (; i <= size - steps; i += steps) { + a_8 = _mm256_loadu_ps(a); + result_8 = _mm256_mul_ps(a_8, b_8); + _mm256_store_ps(result, result_8); - a_8 = load_f32_8(a); - b_8 = load_f32_8(b); - result_8 = a_8 + b_8; - unload_f32_8(result_8, result); + a += steps; + result += steps; } } else if (steps == 4) { - f32_4 a_4; - f32_4 b_4; - f32_4 result_4; + __m128 a_4; + __m128 b_4 = _mm_set1_ps(b); + __m128 result_4; - for (i = 0; i <= size - steps; i += steps) { - ++a; - ++b; - ++result; + for (; i <= size - steps; i += steps) { + a_4 = _mm_loadu_ps(a); + result_4 = _mm_mul_ps(a_4, b_4); + _mm_store_ps(result, result_4); - a_4 = load_f32_4(a); - b_4 = load_f32_4(b); - result_4 = a_4 + b_4; - unload_f32_4(result_4, result); + a += steps; + result += steps; } } for (; i < size; ++i) { - ++a; - ++b; - ++result; + *result = *a * b; - *result = *a + *b; + ++a; + ++result; } } inline -void f32_4_add(const f32* a, const f32* b, f32* result) +void simd_div(const f32* a, f32 b, f32* result, int size, int steps) { - f32_4 a_4 = load_f32_4(a); - f32_4 b_4 = load_f32_4(b); - f32_4 result_4 = a_4 + b_4; + int i = 0; - unload_f32_4(result_4, result); + if (steps == 16) { + __m512 a_16; + __m512 b_16 = _mm512_set1_ps(b); + __m512 result_16; + + for (; i <= size - steps; i += steps) { + a_16 = _mm512_loadu_ps(a); + result_16 = _mm512_div_ps(a_16, b_16); + _mm512_store_ps(result, result_16); + + a += steps; + result += steps; + } + } else if (steps == 8) { + __m256 a_8; + __m256 b_8 = _mm256_set1_ps(b); + __m256 result_8; + + for (; i <= size - steps; i += steps) { + a_8 = _mm256_loadu_ps(a); + result_8 = _mm256_div_ps(a_8, b_8); + _mm256_store_ps(result, result_8); + + a += steps; + result += steps; + } + } else if (steps == 4) { + __m128 a_4; + __m128 b_4 = _mm_set1_ps(b); + __m128 result_4; + + for (; i <= size - steps; i += steps) { + a_4 = _mm_loadu_ps(a); + result_4 = _mm_div_ps(a_4, b_4); + _mm_store_ps(result, result_4); + + a += steps; + result += steps; + } + } + + for (; i < size; ++i) { + *result = *a / b; + + ++a; + ++result; + } } -// @todo add more operations like the one above "f32_4_mult()" +inline +void simd_div(const f32* a, f32 b, __m256* result, int size) +{ + int i = 0; + int j = 0; + // @todo this his how all the functions should be implemented that take in baseic types and output basic types + __m256 a_8; + __m256 b_8 = _mm256_set1_ps(b); + __m256 result_8; + + for (; i <= size - 8; i += 8) { + a_8 = _mm256_loadu_ps(a); + result_8 = _mm256_div_ps(a_8, b_8); + result[j] = result_8; + + a += 8; + ++j; + } + + int diff = size - i; + alignas(32) float temp[8]; + + for (int k = 0; k < diff; k++) { + temp[k] = a[i + k] / b; + } + + result[j] = _mm256_loadu_ps(temp); +} + +inline +void simd_cmp_le(const __m256* a, f32 b, bool* result, int size) +{ + __m256 b_8 = _mm256_set1_ps(b); + + for (int i = 0; i < size; ++i) { + int mask = _mm256_movemask_ps(_mm256_cmp_ps(a[i], b_8, _CMP_LE_OQ)); + + for (int j = 0; j < 8; ++j) { + result[i * 8 + j] = (mask & (1 << j)) != 0; + } + } +} + +// @todo But a guard or warning on the trigonometric functions since they are only implemented for msvc/intel compiler inline f32_4 simd_sin(f32_4 a) { diff --git a/stdlib/simd/SIMD_I16.h b/stdlib/simd/SIMD_I16.h index 330f176..9ffb372 100644 --- a/stdlib/simd/SIMD_I16.h +++ b/stdlib/simd/SIMD_I16.h @@ -789,188 +789,4 @@ inline bool all_false(int16_32 a) // @todo from down here we can optimize some of the code by NOT using the wrappers // the code is self contained and we could use te intrinsic functions directly -inline -void simd_mult(const int16* a, const int16* b, int16* result, int size, int steps) -{ - int i = 0; - - if (steps == 16) { - int16_32 a_16; - int16_32 b_16; - int16_32 result_16; - - for (i = 0; i <= size - steps; i += steps) { - ++a; - ++b; - ++result; - - a_16 = load_int16_32(a); - b_16 = load_int16_32(b); - result_16 = a_16 * b_16; - unload_int16_32(result_16, result); - } - } else if (steps == 8) { - int16_16 a_8; - int16_16 b_8; - int16_16 result_8; - - for (i = 0; i <= size - steps; i += steps) { - ++a; - ++b; - ++result; - - a_8 = load_int16_16(a); - b_8 = load_int16_16(b); - result_8 = a_8 * b_8; - unload_int16_16(result_8, result); - } - } else if (steps == 4) { - int16_8 a_4; - int16_8 b_4; - int16_8 result_4; - - for (i = 0; i <= size - steps; i += steps) { - ++a; - ++b; - ++result; - - a_4 = load_int16_8(a); - b_4 = load_int16_8(b); - result_4 = a_4 * b_4; - unload_int16_8(result_4, result); - } - } - - for (; i < size; ++i) { - ++a; - ++b; - ++result; - - *result = *a * *b; - } -} - -inline -void simd_mult(const int16* a, const int16* b, int16* result) -{ - int16_8 a_4 = load_int16_8(a); - int16_8 b_4 = load_int16_8(b); - int16_8 result_4 = a_4 * b_4; - - unload_int16_8(result_4, result); -} - -inline -void int16_16_mult(const int16* a, const int16* b, int16* result) -{ - int16_16 a_8 = load_int16_16(a); - int16_16 b_8 = load_int16_16(b); - int16_16 result_8 = a_8 * b_8; - - unload_int16_16(result_8, result); -} - -inline -void int16_32_mult(const int16* a, const int16* b, int16* result) -{ - int16_32 a_16 = load_int16_32(a); - int16_32 b_16 = load_int16_32(b); - int16_32 result_16 = a_16 * b_16; - - unload_int16_32(result_16, result); -} - -inline -void simd_add(const int16* a, const int16* b, int16* result, int size, int steps) -{ - int i = 0; - - if (steps == 16) { - int16_32 a_16; - int16_32 b_16; - int16_32 result_16; - - for (i = 0; i <= size - steps; i += steps) { - ++a; - ++b; - ++result; - - a_16 = load_int16_32(a); - b_16 = load_int16_32(b); - result_16 = a_16 + b_16; - unload_int16_32(result_16, result); - } - } else if (steps == 8) { - int16_16 a_8; - int16_16 b_8; - int16_16 result_8; - - for (i = 0; i <= size - steps; i += steps) { - ++a; - ++b; - ++result; - - a_8 = load_int16_16(a); - b_8 = load_int16_16(b); - result_8 = a_8 + b_8; - unload_int16_16(result_8, result); - } - } else if (steps == 4) { - int16_8 a_4; - int16_8 b_4; - int16_8 result_4; - - for (i = 0; i <= size - steps; i += steps) { - ++a; - ++b; - ++result; - - a_4 = load_int16_8(a); - b_4 = load_int16_8(b); - result_4 = a_4 + b_4; - unload_int16_8(result_4, result); - } - } - - for (; i < size; ++i) { - ++a; - ++b; - ++result; - - *result = *a + *b; - } -} - -inline -void int16_8_add(const int16* a, const int16* b, int16* result) -{ - int16_8 a_4 = load_int16_8(a); - int16_8 b_4 = load_int16_8(b); - int16_8 result_4 = a_4 + b_4; - - unload_int16_8(result_4, result); -} - -inline -void int16_16_add(const int16* a, const int16* b, int16* result) -{ - int16_16 a_8 = load_int16_16(a); - int16_16 b_8 = load_int16_16(b); - int16_16 result_8 = a_8 + b_8; - - unload_int16_16(result_8, result); -} - -inline -void int16_32_add(const int16* a, const int16* b, int16* result) -{ - int16_32 a_16 = load_int16_32(a); - int16_32 b_16 = load_int16_32(b); - int16_32 result_16 = a_16 + b_16; - - unload_int16_32(result_16, result); -} - -// @todo add more operations like the one above "int16_8_mult()" - #endif \ No newline at end of file diff --git a/stdlib/simd/SIMD_I32.h b/stdlib/simd/SIMD_I32.h index ab45604..0c3d266 100644 --- a/stdlib/simd/SIMD_I32.h +++ b/stdlib/simd/SIMD_I32.h @@ -18,6 +18,8 @@ // @todo a lot of sse functions require high level (e.g. sse4.1) this needs to be changed to be more general // or better create alternative functions for the available sse version. +// @question why are we passing structs by value? + struct int32_4 { union { __m128i s; @@ -86,8 +88,9 @@ inline int32_16 load_int32_16(const int32* mem) inline int32_16 init_int32_16(const int32* mem) { int32_16 simd; - simd.s = _mm512_set_epi32(mem[0], mem[1], mem[2], mem[3], mem[4], mem[5], mem[6], mem[7], mem[8], mem[9], - mem[10], mem[11], mem[12], mem[13], mem[14], mem[15]); + simd.s = _mm512_set_epi32( + mem[0], mem[1], mem[2], mem[3], mem[4], mem[5], mem[6], mem[7], + mem[8], mem[9], mem[10], mem[11], mem[12], mem[13], mem[14], mem[15]); return simd; } @@ -654,7 +657,7 @@ inline int32_16 operator!=(int32_16 a, int32_16 b) inline int32_4 operator&(int32_4 a, int32_4 b) { int32_4 simd; - simd.s = _mm_and_epi32(a.s, b.s); + simd.s = _mm_and_si128(a.s, b.s); return simd; } @@ -662,7 +665,7 @@ inline int32_4 operator&(int32_4 a, int32_4 b) inline int32_8 operator&(int32_8 a, int32_8 b) { int32_8 simd; - simd.s = _mm256_and_epi32(a.s, b.s); + simd.s = _mm256_and_si256(a.s, b.s); return simd; } @@ -670,7 +673,7 @@ inline int32_8 operator&(int32_8 a, int32_8 b) inline int32_16 operator&(int32_16 a, int32_16 b) { int32_16 simd; - simd.s = _mm512_and_epi32(a.s, b.s); + simd.s = _mm512_and_si512(a.s, b.s); return simd; } @@ -816,7 +819,7 @@ inline int32_16 simd_max(int32_16 a, int32_16 b) inline int32_4 sign(int32_4 a) { __m128i mask = _mm_set1_epi32(0x80000000); - __m128i signBit = _mm_and_epi32(a.s, mask); + __m128i signBit = _mm_and_si128(a.s, mask); __m128i b = _mm_set1_epi32(1); int32_4 simd; @@ -828,7 +831,7 @@ inline int32_4 sign(int32_4 a) inline int32_8 sign(int32_8 a) { __m256i mask = _mm256_set1_epi32(0x80000000); - __m256i signBit = _mm256_and_epi32(a.s, mask); + __m256i signBit = _mm256_and_si256(a.s, mask); __m256i b = _mm256_set1_epi32(1); int32_8 simd; @@ -840,7 +843,7 @@ inline int32_8 sign(int32_8 a) inline int32_16 sign(int32_16 a) { __m512i mask = _mm512_set1_epi32(0x80000000); - __m512i signBit = _mm512_and_epi32(a.s, mask); + __m512i signBit = _mm512_and_si512(a.s, mask); __m512i b = _mm512_set1_epi32(1); int32_16 simd; @@ -1030,49 +1033,49 @@ void simd_mult(const int32* a, const int32* b, int32* result, int size, int step int i = 0; if (steps == 16) { - int32_16 a_16; - int32_16 b_16; - int32_16 result_16; + __m512i a_16; + __m512i b_16; + __m512i result_16; - for (i = 0; i <= size - steps; i += steps) { - ++a; - ++b; - ++result; + for (; i <= size - steps; i += steps) { + a_16 = _mm512_loadu_epi32(a); + b_16 = _mm512_loadu_epi32(b); + result_16 = _mm512_mul_epi32(a_16, b_16); + _mm512_store_epi32(result, result_16); - a_16 = load_int32_16(a); - b_16 = load_int32_16(b); - result_16 = a_16 * b_16; - unload_int32_16(result_16, result); + a += steps; + b += steps; + result += steps; } } else if (steps == 8) { - int32_8 a_8; - int32_8 b_8; - int32_8 result_8; + __m256i a_8; + __m256i b_8; + __m256i result_8; - for (i = 0; i <= size - steps; i += steps) { - ++a; - ++b; - ++result; + for (; i <= size - steps; i += steps) { + a_8 = _mm256_loadu_epi32(a); + b_8 = _mm256_loadu_epi32(b); + result_8 = _mm256_mul_epi32(a_8, b_8); + _mm256_store_si256((__m256i *) result, result_8); - a_8 = load_int32_8(a); - b_8 = load_int32_8(b); - result_8 = a_8 * b_8; - unload_int32_8(result_8, result); + a += steps; + b += steps; + result += steps; } } else if (steps == 4) { - int32_4 a_4; - int32_4 b_4; - int32_4 result_4; + __m128i a_4; + __m128i b_4; + __m128i result_4; - for (i = 0; i <= size - steps; i += steps) { - ++a; - ++b; - ++result; + for (; i <= size - steps; i += steps) { + a_4 = _mm_loadu_epi32(a); + b_4 = _mm_loadu_epi32(b); + result_4 = _mm_mul_epi32(a_4, b_4); + _mm_store_si128((__m128i *) result, result_4); - a_4 = load_int32_4(a); - b_4 = load_int32_4(b); - result_4 = a_4 * b_4; - unload_int32_4(result_4, result); + a += steps; + b += steps; + result += steps; } } @@ -1091,64 +1094,64 @@ void simd_mult(const int32* a, const f32* b, f32* result, int size, int steps) int i = 0; if (steps == 16) { - int32_16 a_16; - f32_16 af_16; - f32_16 b_16; - f32_16 result_16; + __m512i a_16; + __m512 af_16; + __m512 b_16; + __m512 result_16; - for (i = 0; i <= size - steps; i += steps) { - ++a; - ++b; - ++result; + for (; i <= size - steps; i += steps) { + a_16 = _mm512_loadu_epi32(a); + af_16 = _mm512_cvtepi32_ps(a_16); + b_16 = _mm512_loadu_ps(b); + result_16 = _mm512_mul_ps(af_16, b_16); + _mm512_store_ps(result, result_16); - a_16 = load_int32_16(a); - af_16 = int32_16_to_f32_16(a_16); - b_16 = load_f32_16(b); - result_16 = af_16 * b_16; - unload_f32_16(result_16, result); + a += steps; + b += steps; + result += steps; } } else if (steps == 8) { - int32_8 a_8; - f32_8 af_8; - f32_8 b_8; - f32_8 result_8; + __m256i a_8; + __m256 af_8; + __m256 b_8; + __m256 result_8; - for (i = 0; i <= size - steps; i += steps) { - ++a; - ++b; - ++result; + for (; i <= size - steps; i += steps) { + a_8 = _mm256_loadu_epi32(a); + af_8 = _mm256_cvtepi32_ps(a_8); + b_8 = _mm256_loadu_ps(b); + result_8 = _mm256_mul_ps(af_8, b_8); + _mm256_store_ps(result, result_8); - a_8 = load_int32_8(a); - af_8 = int32_8_to_f32_8(a_8); - b_8 = load_f32_8(b); - result_8 = af_8 * b_8; - unload_f32_8(result_8, result); + a += steps; + b += steps; + result += steps; } } else if (steps == 4) { - int32_4 a_4; - f32_4 af_4; - f32_4 b_4; - f32_4 result_4; + __m128i a_4; + __m128 af_4; + __m128 b_4; + __m128 result_4; - for (i = 0; i <= size - steps; i += steps) { - ++a; - ++b; - ++result; + for (; i <= size - steps; i += steps) { + a_4 = _mm_loadu_epi32(a); + af_4 = _mm_cvtepi32_ps(a_4); + b_4 = _mm_loadu_ps(b); + result_4 = _mm_mul_ps(af_4, b_4); + _mm_store_ps(result, result_4); - a_4 = load_int32_4(a); - af_4 = int32_4_to_f32_4(a_4); - b_4 = load_f32_4(b); - result_4 = af_4 * b_4; - unload_f32_4(result_4, result); + a += steps; + b += steps; + result += steps; } } for (; i < size; ++i) { + *result = *a * *b; + ++a; ++b; ++result; - - *result = *a * *b; } } @@ -1158,134 +1161,198 @@ void simd_mult(const int32* a, const f32* b, int32* result, int size, int steps) int i = 0; if (steps == 16) { - int32_16 a_16; - f32_16 af_16; - f32_16 b_16; - f32_16 result_16; - int32_16 resulti_16; + __m512i a_16; + __m512 af_16; + __m512 b_16; + __m512 result_16; + __m512i resulti_16; - for (i = 0; i <= size - steps; i += steps) { - ++a; - ++b; - ++result; + for (; i <= size - steps; i += steps) { + a_16 = _mm512_loadu_epi32(a); + af_16 = _mm512_cvtepi32_ps(a_16); + b_16 = _mm512_loadu_ps(b); + result_16 = _mm512_mul_ps(af_16, b_16); + resulti_16 = _mm512_cvtps_epi32(result_16); + _mm512_store_epi32(result, resulti_16); - a_16 = load_int32_16(a); - af_16 = int32_16_to_f32_16(a_16); - b_16 = load_f32_16(b); - result_16 = af_16 * b_16; - resulti_16 = f32_16_to_int32_16(result_16); - unload_int32_16(resulti_16, result); + a += steps; + b += steps; + result += steps; } } else if (steps == 8) { - int32_8 a_8; - f32_8 af_8; - f32_8 b_8; - f32_8 result_8; - int32_8 resulti_8; + __m256i a_8; + __m256 af_8; + __m256 b_8; + __m256 result_8; + __m256i resulti_8; - for (i = 0; i <= size - steps; i += steps) { - ++a; - ++b; - ++result; + for (; i <= size - steps; i += steps) { + a_8 = _mm256_loadu_epi32(a); + af_8 = _mm256_cvtepi32_ps(a_8); + b_8 = _mm256_loadu_ps(b); + result_8 = _mm256_mul_ps(af_8, b_8); + resulti_8 = _mm256_cvtps_epi32(result_8); + _mm256_store_si256((__m256i *) result, resulti_8); - a_8 = load_int32_8(a); - af_8 = int32_8_to_f32_8(a_8); - b_8 = load_f32_8(b); - result_8 = af_8 * b_8; - resulti_8 = f32_8_to_int32_8(result_8); - unload_int32_8(resulti_8, result); + a += steps; + b += steps; + result += steps; } } else if (steps == 4) { - int32_4 a_4; - f32_4 af_4; - f32_4 b_4; - f32_4 result_4; - int32_4 resulti_4; + __m128i a_4; + __m128 af_4; + __m128 b_4; + __m128 result_4; + __m128i resulti_4; - for (i = 0; i <= size - steps; i += steps) { - ++a; - ++b; - ++result; + for (; i <= size - steps; i += steps) { + a_4 = _mm_loadu_epi32(a); + af_4 = _mm_cvtepi32_ps(a_4); + b_4 = _mm_loadu_ps(b); + result_4 = _mm_mul_ps(af_4, b_4); + resulti_4 = _mm_cvtps_epi32(result_4); + _mm_store_si128((__m128i *) result, resulti_4); - a_4 = load_int32_4(a); - af_4 = int32_4_to_f32_4(a_4); - b_4 = load_f32_4(b); - result_4 = af_4 * b_4; - resulti_4 = f32_4_to_int32_4(result_4); - unload_int32_4(resulti_4, result); + a += steps; + b += steps; + result += steps; } } for (; i < size; ++i) { + *result = (int) (*a * *b); + ++a; ++b; ++result; - - *result = *a * *b; } } inline -void int32_4_mult(const int32* a, const int32* b, int32* result) +void simd_mult(const int32* a, f32 b, int32* result, int size, int steps) { - int32_4 a_4 = load_int32_4(a); - int32_4 b_4 = load_int32_4(b); - int32_4 result_4 = a_4 * b_4; + int i = 0; - unload_int32_4(result_4, result); + if (steps == 16) { + __m512i a_16; + __m512 af_16; + __m512 b_16 = _mm512_set1_ps(b); + __m512 result_16; + __m512i resulti_16; + + for (; i <= size - steps; i += steps) { + a_16 = _mm512_loadu_epi32(a); + af_16 = _mm512_cvtepi32_ps(a_16); + result_16 = _mm512_mul_ps(af_16, b_16); + resulti_16 = _mm512_cvtps_epi32(result_16); + _mm512_store_epi32(result, resulti_16); + + a += steps; + result += steps; + } + } else if (steps == 8) { + __m256i a_8; + __m256 af_8; + __m256 b_8 = _mm256_set1_ps(b); + __m256 result_8; + __m256i resulti_8; + + for (; i <= size - steps; i += steps) { + a_8 = _mm256_loadu_epi32(a); + af_8 = _mm256_cvtepi32_ps(a_8); + result_8 = _mm256_mul_ps(af_8, b_8); + resulti_8 = _mm256_cvtps_epi32(result_8); + _mm256_store_si256((__m256i *) result, resulti_8); + + a += steps; + result += steps; + } + } else if (steps == 4) { + __m128i a_4; + __m128 af_4; + __m128 b_4 = _mm_set1_ps(b); + __m128 result_4; + __m128i resulti_4; + + for (; i <= size - steps; i += steps) { + a_4 = _mm_loadu_epi32(a); + af_4 = _mm_cvtepi32_ps(a_4); + result_4 = _mm_mul_ps(af_4, b_4); + resulti_4 = _mm_cvtps_epi32(result_4); + _mm_store_si128((__m128i *) result, resulti_4); + + a += steps; + result += steps; + } + } + + for (; i < size; ++i) { + *result = (int32) (*a * b); + + ++a; + ++result; + } } inline -void int32_8_mult(const int32* a, const int32* b, int32* result) +void simd_div(const int32* a, f32 b, f32* result, int size, int steps) { - int32_8 a_8 = load_int32_8(a); - int32_8 b_8 = load_int32_8(b); - int32_8 result_8 = a_8 * b_8; + int i = 0; - unload_int32_8(result_8, result); -} + if (steps == 16) { + __m512i a_16; + __m512 af_16; + __m512 b_16 = _mm512_set1_ps(b); + __m512 result_16; -inline -void int32_16_mult(const int32* a, const int32* b, int32* result) -{ - int32_16 a_16 = load_int32_16(a); - int32_16 b_16 = load_int32_16(b); - int32_16 result_16 = a_16 * b_16; + for (; i <= size - steps; i += steps) { + a_16 = _mm512_loadu_epi32(a); + af_16 = _mm512_cvtepi32_ps(a_16); + result_16 = _mm512_div_ps(af_16, b_16); + _mm512_store_ps(result, result_16); - unload_int32_16(result_16, result); -} + a += steps; + result += steps; + } + } else if (steps == 8) { + // @todo this his how all the functions should be implemented that take in baseic types and output basic types + __m256i a_8; + __m256 af_8; + __m256 b_8 = _mm256_set1_ps(b); + __m256 result_8; -inline -void int32_4_mult(const int32* a, const f32* b, f32* result) -{ - int32_4 a_4 = load_int32_4(a); - f32_4 af_4 = int32_4_to_f32_4(a_4); - f32_4 b_4 = load_f32_4(b); - f32_4 result_4 = af_4 * b_4; + for (; i <= size - steps; i += steps) { + a_8 = _mm256_loadu_epi32(a); + af_8 = _mm256_cvtepi32_ps(a_8); + result_8 = _mm256_div_ps(af_8, b_8); + _mm256_store_ps(result, result_8); - unload_f32_4(result_4, result); -} + a += steps; + result += steps; + } + } else if (steps == 4) { + __m128i a_4; + __m128 af_4; + __m128 b_4 = _mm_set1_ps(b); + __m128 result_4; -inline -void int32_8_mult(const int32* a, const f32* b, f32* result) -{ - int32_8 a_8 = load_int32_8(a); - f32_8 af_8 = int32_8_to_f32_8(a_8); - f32_8 b_8 = load_f32_8(b); - f32_8 result_8 = af_8 * b_8; + for (; i <= size - steps; i += steps) { + a_4 = _mm_loadu_epi32(a); + af_4 = _mm_cvtepi32_ps(a_4); + result_4 = _mm_div_ps(af_4, b_4); + _mm_store_ps(result, result_4); - unload_f32_8(result_8, result); -} + a += steps; + result += steps; + } + } -inline -void int32_16_mult(const int32* a, const f32* b, f32* result) -{ - int32_16 a_16 = load_int32_16(a); - f32_16 af_16 = int32_16_to_f32_16(a_16); - f32_16 b_16 = load_f32_16(b); - f32_16 result_16 = af_16 * b_16; + for (; i < size; ++i) { + *result = *a / b; - unload_f32_16(result_16, result); + ++a; + ++result; + } } inline @@ -1294,58 +1361,58 @@ void simd_add(const int32* a, const int32* b, int32* result, int size, int steps int i = 0; if (steps == 16) { - int32_16 a_16; - int32_16 b_16; - int32_16 result_16; + __m512i a_16; + __m512i b_16; + __m512i result_16; - for (i = 0; i <= size - steps; i += steps) { - ++a; - ++b; - ++result; + for (; i <= size - steps; i += steps) { + a_16 = _mm512_loadu_epi32(a); + b_16 = _mm512_loadu_epi32(b); + result_16 = _mm512_add_epi32(a_16, b_16); + _mm512_store_epi32(result, result_16); - a_16 = load_int32_16(a); - b_16 = load_int32_16(b); - result_16 = a_16 + b_16; - unload_int32_16(result_16, result); + a += steps; + b += steps; + result += steps; } } else if (steps == 8) { - int32_8 a_8; - int32_8 b_8; - int32_8 result_8; + __m256i a_8; + __m256i b_8; + __m256i result_8; - for (i = 0; i <= size - steps; i += steps) { - ++a; - ++b; - ++result; + for (; i <= size - steps; i += steps) { + a_8 = _mm256_loadu_epi32(a); + b_8 = _mm256_loadu_epi32(b); + result_8 = _mm256_add_epi32(a_8, b_8); + _mm256_store_si256((__m256i *) result, result_8); - a_8 = load_int32_8(a); - b_8 = load_int32_8(b); - result_8 = a_8 + b_8; - unload_int32_8(result_8, result); + a += steps; + b += steps; + result += steps; } } else if (steps == 4) { - int32_4 a_4; - int32_4 b_4; - int32_4 result_4; + __m128i a_4; + __m128i b_4; + __m128i result_4; - for (i = 0; i <= size - steps; i += steps) { - ++a; - ++b; - ++result; + for (; i <= size - steps; i += steps) { + a_4 = _mm_loadu_epi32(a); + b_4 = _mm_loadu_epi32(b); + result_4 = _mm_add_epi32(a_4, b_4); + _mm_store_si128((__m128i *) result, result_4); - a_4 = load_int32_4(a); - b_4 = load_int32_4(b); - result_4 = a_4 + b_4; - unload_int32_4(result_4, result); + a += steps; + b += steps; + result += steps; } } for (; i < size; ++i) { + *result = *a + *b; + ++a; ++b; ++result; - - *result = *a + *b; } } @@ -1355,64 +1422,64 @@ void simd_add(const int32* a, const f32* b, f32* result, int size, int steps) int i = 0; if (steps == 16) { - int32_16 a_16; - f32_16 af_16; - f32_16 b_16; - f32_16 result_16; + __m512i a_16; + __m512 af_16; + __m512 b_16; + __m512 result_16; - for (i = 0; i <= size - steps; i += steps) { - ++a; - ++b; - ++result; + for (; i <= size - steps; i += steps) { + a_16 = _mm512_loadu_epi32(a); + af_16 = _mm512_cvtepi32_ps(a_16); + b_16 = _mm512_loadu_ps(b); + result_16 = _mm512_add_ps(af_16, b_16); + _mm512_store_ps(result, result_16); - a_16 = load_int32_16(a); - af_16 = int32_16_to_f32_16(a_16); - b_16 = load_f32_16(b); - result_16 = af_16 + b_16; - unload_f32_16(result_16, result); + a += steps; + b += steps; + result += steps; } } else if (steps == 8) { - int32_8 a_8; - f32_8 af_8; - f32_8 b_8; - f32_8 result_8; + __m256i a_8; + __m256 af_8; + __m256 b_8; + __m256 result_8; - for (i = 0; i <= size - steps; i += steps) { - ++a; - ++b; - ++result; + for (; i <= size - steps; i += steps) { + a_8 = _mm256_loadu_epi32(a); + af_8 = _mm256_cvtepi32_ps(a_8); + b_8 = _mm256_loadu_ps(b); + result_8 = _mm256_add_ps(af_8, b_8); + _mm256_store_ps(result, result_8); - a_8 = load_int32_8(a); - af_8 = int32_8_to_f32_8(a_8); - b_8 = load_f32_8(b); - result_8 = af_8 + b_8; - unload_f32_8(result_8, result); + a += steps; + b += steps; + result += steps; } } else if (steps == 4) { - int32_4 a_4; - f32_4 af_4; - f32_4 b_4; - f32_4 result_4; + __m128i a_4; + __m128 af_4; + __m128 b_4; + __m128 result_4; - for (i = 0; i <= size - steps; i += steps) { - ++a; - ++b; - ++result; + for (; i <= size - steps; i += steps) { + a_4 = _mm_loadu_epi32(a); + af_4 = _mm_cvtepi32_ps(a_4); + b_4 = _mm_loadu_ps(b); + result_4 = _mm_add_ps(af_4, b_4); + _mm_store_ps(result, result_4); - a_4 = load_int32_4(a); - af_4 = int32_4_to_f32_4(a_4); - b_4 = load_f32_4(b); - result_4 = af_4 + b_4; - unload_f32_4(result_4, result); + a += steps; + b += steps; + result += steps; } } for (; i < size; ++i) { + *result = *a + *b; + ++a; ++b; ++result; - - *result = *a + *b; } } @@ -1422,136 +1489,73 @@ void simd_add(const int32* a, const f32* b, int32* result, int size, int steps) int i = 0; if (steps == 16) { - int32_16 a_16; - f32_16 af_16; - f32_16 b_16; - f32_16 result_16; - int32_16 resulti_16; + __m512i a_16; + __m512 af_16; + __m512 b_16; + __m512 result_16; + __m512i resulti_16; - for (i = 0; i <= size - steps; i += steps) { - ++a; - ++b; - ++result; + for (; i <= size - steps; i += steps) { + a_16 = _mm512_loadu_epi32(a); + af_16 = _mm512_cvtepi32_ps(a_16); + b_16 = _mm512_loadu_ps(b); + result_16 = _mm512_add_ps(af_16, b_16); + resulti_16 = _mm512_cvtps_epi32(result_16); + _mm512_store_epi32(result, resulti_16); - a_16 = load_int32_16(a); - af_16 = int32_16_to_f32_16(a_16); - b_16 = load_f32_16(b); - result_16 = af_16 + b_16; - resulti_16 = f32_16_to_int32_16(result_16); - unload_int32_16(resulti_16, result); + a += steps; + b += steps; + result += steps; } } else if (steps == 8) { - int32_8 a_8; - f32_8 af_8; - f32_8 b_8; - f32_8 result_8; - int32_8 resulti_8; + __m256i a_8; + __m256 af_8; + __m256 b_8; + __m256 result_8; + __m256i resulti_8; - for (i = 0; i <= size - steps; i += steps) { - ++a; - ++b; - ++result; + for (; i <= size - steps; i += steps) { + a_8 = _mm256_loadu_epi32(a); + af_8 = _mm256_cvtepi32_ps(a_8); + b_8 = _mm256_loadu_ps(b); + result_8 = _mm256_add_ps(af_8, b_8); + resulti_8 = _mm256_cvtps_epi32(result_8); + _mm256_store_si256((__m256i *) result, resulti_8); - a_8 = load_int32_8(a); - af_8 = int32_8_to_f32_8(a_8); - b_8 = load_f32_8(b); - result_8 = af_8 + b_8; - resulti_8 = f32_8_to_int32_8(result_8); - unload_int32_8(resulti_8, result); + a += steps; + b += steps; + result += steps; } } else if (steps == 4) { - int32_4 a_4; - f32_4 af_4; - f32_4 b_4; - f32_4 result_4; - int32_4 resulti_4; + __m128i a_4; + __m128 af_4; + __m128 b_4; + __m128 result_4; + __m128i resulti_4; - for (i = 0; i <= size - steps; i += steps) { - ++a; - ++b; - ++result; + for (; i <= size - steps; i += steps) { + a_4 = _mm_loadu_epi32(a); + af_4 = _mm_cvtepi32_ps(a_4); + b_4 = _mm_loadu_ps(b); + result_4 = _mm_add_ps(af_4, b_4); + resulti_4 = _mm_cvtps_epi32(result_4); + _mm_store_si128((__m128i *) result, resulti_4); - a_4 = load_int32_4(a); - af_4 = int32_4_to_f32_4(a_4); - b_4 = load_f32_4(b); - result_4 = af_4 + b_4; - resulti_4 = f32_4_to_int32_4(result_4); - unload_int32_4(resulti_4, result); + a += steps; + b += steps; + result += steps; } } for (; i < size; ++i) { + *result = (int32) (*a + *b); + ++a; ++b; ++result; - - *result = *a + *b; } } -inline -void int32_4_add(const int32* a, const int32* b, int32* result) -{ - int32_4 a_4 = load_int32_4(a); - int32_4 b_4 = load_int32_4(b); - int32_4 result_4 = a_4 + b_4; - - unload_int32_4(result_4, result); -} - -inline -void int32_8_add(const int32* a, const int32* b, int32* result) -{ - int32_8 a_8 = load_int32_8(a); - int32_8 b_8 = load_int32_8(b); - int32_8 result_8 = a_8 + b_8; - - unload_int32_8(result_8, result); -} - -inline -void int32_16_add(const int32* a, const int32* b, int32* result) -{ - int32_16 a_16 = load_int32_16(a); - int32_16 b_16 = load_int32_16(b); - int32_16 result_16 = a_16 + b_16; - - unload_int32_16(result_16, result); -} - -inline -void int32_4_add(const int32* a, const f32* b, f32* result) -{ - int32_4 a_4 = load_int32_4(a); - f32_4 af_4 = int32_4_to_f32_4(a_4); - f32_4 b_4 = load_f32_4(b); - f32_4 result_4 = af_4 + b_4; - - unload_f32_4(result_4, result); -} - -inline -void int32_8_add(const int32* a, const f32* b, f32* result) -{ - int32_8 a_8 = load_int32_8(a); - f32_8 af_8 = int32_8_to_f32_8(a_8); - f32_8 b_8 = load_f32_8(b); - f32_8 result_8 = af_8 + b_8; - - unload_f32_8(result_8, result); -} - -inline -void int32_16_add(const int32* a, const f32* b, f32* result) -{ - int32_16 a_16 = load_int32_16(a); - f32_16 af_16 = int32_16_to_f32_16(a_16); - f32_16 b_16 = load_f32_16(b); - f32_16 result_16 = af_16 + b_16; - - unload_f32_16(result_16, result); -} - // WARNING: only works with SSE4.2 // WARNING: incl. \0 both strings must be <= 16 bool simd_str_compare(const char* str1, const char* str2) { @@ -1561,6 +1565,4 @@ bool simd_str_compare(const char* str1, const char* str2) { return _mm_cmpistrc(s1, s2, _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_EACH) == 0; } -// @todo add more operations like the one above "int32_4_mult()" - #endif diff --git a/stdlib/simd/SIMD_I8.h b/stdlib/simd/SIMD_I8.h index a0ab1c0..cf38071 100644 --- a/stdlib/simd/SIMD_I8.h +++ b/stdlib/simd/SIMD_I8.h @@ -13,6 +13,8 @@ #include #include "../Types.h" +#include "SIMD_F32.h" +#include "SIMD_I32.h" struct int8_16 { union { @@ -156,6 +158,33 @@ inline int8_64 init_value_int8_64(int8 value) return simd; } +inline +f32_4 int8_16_to_f32_4(int8_16 a) +{ + f32_4 result; + result.s = _mm_cvtepi32_ps(a.s); + + return result; +} + +inline +f32_8 int8_16_to_f32_8(int8_16 a) +{ + f32_8 result; + result.s = _mm256_cvtepi32_ps(_mm256_cvtepu8_epi32(a.s)); + + return result; +} + +inline +f32_16 int8_16_to_f32_16(int8_16 a) +{ + f32_16 result; + result.s = _mm512_cvtepi32_ps(_mm512_cvtepu8_epi32(a.s)); + + return result; +} + inline int8_16 operator+(int8_16 a, int8_16 b) { int8_16 simd; @@ -796,188 +825,32 @@ inline bool all_false(int8_64 a) // @todo from down here we can optimize some of the code by NOT using the wrappers // the code is self contained and we could use te intrinsic functions directly +/* inline -void simd_mult(const int8* a, const int8* b, int8* result, int size, int steps) +f32 simd_mult(const int8* a, f32 b, int size, int steps) { - int i = 0; - if (steps == 16) { - int8_64 a_16; - int8_64 b_16; - int8_64 result_16; + __m512i a_16 = _mm512_loadu_epi8(a); + __m512 af_16 = _mm512_cvtepi32_ps(a_16); + __m512 b_16 = _mm512_set1_ps(b); - for (i = 0; i <= size - steps; i += steps) { - ++a; - ++b; - ++result; - - a_16 = load_int8_64(a); - b_16 = load_int8_64(b); - result_16 = a_16 * b_16; - unload_int8_64(result_16, result); - } + __m512 result = _mm512_mul_ps(af_16, b_16); } else if (steps == 8) { - int8_32 a_8; - int8_32 b_8; - int8_32 result_8; + __m256i a_8 = _mm256_loadu_epi8(a); + __m256 af_8 = _mm256_cvtepi32_ps(a_8); + __m256 b_8 = _mm256_set1_ps(b); - for (i = 0; i <= size - steps; i += steps) { - ++a; - ++b; - ++result; - - a_8 = load_int8_32(a); - b_8 = load_int8_32(b); - result_8 = a_8 * b_8; - unload_int8_32(result_8, result); - } + __m256 result = _mm256_mul_ps(af_8, b_8); } else if (steps == 4) { - int8_16 a_4; - int8_16 b_4; - int8_16 result_4; + __m128i a_4 = _mm_loadu_epi8(a); + __m128 af_4 = _mm_cvtepi32_ps(a_4); + __m128 b_4 = _mm_set1_ps(b); - for (i = 0; i <= size - steps; i += steps) { - ++a; - ++b; - ++result; + __m128 result = _mm_mul_ps(af_4, b_4); + } else { - a_4 = load_int8_16(a); - b_4 = load_int8_16(b); - result_4 = a_4 * b_4; - unload_int8_16(result_4, result); - } - } - - for (; i < size; ++i) { - ++a; - ++b; - ++result; - - *result = *a * *b; } } - -inline -void int8_16_mult(const int8* a, const int8* b, int8* result) -{ - int8_16 a_4 = load_int8_16(a); - int8_16 b_4 = load_int8_16(b); - int8_16 result_4 = a_4 * b_4; - - unload_int8_16(result_4, result); -} - -inline -void int8_32_mult(const int8* a, const int8* b, int8* result) -{ - int8_32 a_8 = load_int8_32(a); - int8_32 b_8 = load_int8_32(b); - int8_32 result_8 = a_8 * b_8; - - unload_int8_32(result_8, result); -} - -inline -void int8_64_mult(const int8* a, const int8* b, int8* result) -{ - int8_64 a_16 = load_int8_64(a); - int8_64 b_16 = load_int8_64(b); - int8_64 result_16 = a_16 * b_16; - - unload_int8_64(result_16, result); -} - -inline -void simd_add(const int8* a, const int8* b, int8* result, int size, int steps) -{ - int i = 0; - - if (steps == 16) { - int8_64 a_16; - int8_64 b_16; - int8_64 result_16; - - for (i = 0; i <= size - steps; i += steps) { - ++a; - ++b; - ++result; - - a_16 = load_int8_64(a); - b_16 = load_int8_64(b); - result_16 = a_16 + b_16; - unload_int8_64(result_16, result); - } - } else if (steps == 8) { - int8_32 a_8; - int8_32 b_8; - int8_32 result_8; - - for (i = 0; i <= size - steps; i += steps) { - ++a; - ++b; - ++result; - - a_8 = load_int8_32(a); - b_8 = load_int8_32(b); - result_8 = a_8 + b_8; - unload_int8_32(result_8, result); - } - } else if (steps == 4) { - int8_16 a_4; - int8_16 b_4; - int8_16 result_4; - - for (i = 0; i <= size - steps; i += steps) { - ++a; - ++b; - ++result; - - a_4 = load_int8_16(a); - b_4 = load_int8_16(b); - result_4 = a_4 + b_4; - unload_int8_16(result_4, result); - } - } - - for (; i < size; ++i) { - ++a; - ++b; - ++result; - - *result = *a + *b; - } -} - -inline -void int8_16_add(const int8* a, const int8* b, int8* result) -{ - int8_16 a_4 = load_int8_16(a); - int8_16 b_4 = load_int8_16(b); - int8_16 result_4 = a_4 + b_4; - - unload_int8_16(result_4, result); -} - -inline -void int8_32_add(const int8* a, const int8* b, int8* result) -{ - int8_32 a_8 = load_int8_32(a); - int8_32 b_8 = load_int8_32(b); - int8_32 result_8 = a_8 + b_8; - - unload_int8_32(result_8, result); -} - -inline -void int8_64_add(const int8* a, const int8* b, int8* result) -{ - int8_64 a_16 = load_int8_64(a); - int8_64 b_16 = load_int8_64(b); - int8_64 result_16 = a_16 + b_16; - - unload_int8_64(result_16, result); -} - -// @todo add more operations like the one above "int8_16_mult()" +*/ #endif \ No newline at end of file diff --git a/stdlib/simd/SIMD_SVML.h b/stdlib/simd/SIMD_SVML.h new file mode 100644 index 0000000..62f5435 --- /dev/null +++ b/stdlib/simd/SIMD_SVML.h @@ -0,0 +1,166 @@ +/** + * Jingga + * + * @copyright Jingga + * @license OMS License 2.0 + * @version 1.0.0 + * @link https://jingga.app + */ +#ifndef TOS_STDLIB_SIMD_SVML_H +#define TOS_STDLIB_SIMD_SVML_H + +#include +#include + +#if __linux__ + #include "math.h" + + inline __m128i _mm_div_epi32(__m128i a, __m128i b) { + alignas(16) int32_t a_array[4], b_array[4], result[4]; + + _mm_storeu_si128((__m128i*)a_array, a); + _mm_storeu_si128((__m128i*)b_array, b); + + for (int i = 0; i < 4; ++i) { + result[i] = a_array[i] / b_array[i]; + } + + return _mm_loadu_si128((__m128i*)result); + } + + inline __m256i _mm256_div_epi32(__m256i a, __m256i b) { + alignas(32) int32_t a_array[8], b_array[8], result[8]; + + _mm256_storeu_si256((__m256i*)a_array, a); + _mm256_storeu_si256((__m256i*)b_array, b); + + for (int i = 0; i < 8; ++i) { + result[i] = a_array[i] / b_array[i]; + } + + return _mm256_loadu_si256((__m256i*)result); + } + + inline __m512i _mm512_div_epi32(__m512i a, __m512i b) { + alignas(64) int32_t a_array[16], b_array[16], result[16]; + + _mm512_storeu_si512((__m512i*)a_array, a); + _mm512_storeu_si512((__m512i*)b_array, b); + + for (int i = 0; i < 16; ++i) { + result[i] = a_array[i] / b_array[i]; + } + + return _mm512_loadu_si512((__m512i*)result); + } + + inline __m128 _mm_sin_ps(__m128 a) { + alignas(16) float a_array[4], result[4]; + _mm_storeu_ps(a_array, a); + for (int i = 0; i < 4; ++i) { + result[i] = sinf(a_array[i]); + } + return _mm_loadu_ps(result); + } + + inline __m128 _mm_cos_ps(__m128 a) { + alignas(16) float a_array[4], result[4]; + _mm_storeu_ps(a_array, a); + for (int i = 0; i < 4; ++i) { + result[i] = cosf(a_array[i]); + } + return _mm_loadu_ps(result); + } + + inline __m128 _mm_asin_ps(__m128 a) { + alignas(16) float a_array[4], result[4]; + _mm_storeu_ps(a_array, a); + for (int i = 0; i < 4; ++i) { + result[i] = asinf(a_array[i]); + } + return _mm_loadu_ps(result); + } + + inline __m128 _mm_acos_ps(__m128 a) { + alignas(16) float a_array[4], result[4]; + _mm_storeu_ps(a_array, a); + for (int i = 0; i < 4; ++i) { + result[i] = acosf(a_array[i]); + } + return _mm_loadu_ps(result); + } + + inline __m256 _mm256_sin_ps(__m256 a) { + alignas(32) float a_array[8], result[8]; + _mm256_storeu_ps(a_array, a); + for (int i = 0; i < 8; ++i) { + result[i] = sinf(a_array[i]); + } + return _mm256_loadu_ps(result); + } + + inline __m256 _mm256_cos_ps(__m256 a) { + alignas(32) float a_array[8], result[8]; + _mm256_storeu_ps(a_array, a); + for (int i = 0; i < 8; ++i) { + result[i] = cosf(a_array[i]); + } + return _mm256_loadu_ps(result); + } + + inline __m256 _mm256_asin_ps(__m256 a) { + alignas(32) float a_array[8], result[8]; + _mm256_storeu_ps(a_array, a); + for (int i = 0; i < 8; ++i) { + result[i] = asinf(a_array[i]); + } + return _mm256_loadu_ps(result); + } + + inline __m256 _mm256_acos_ps(__m256 a) { + alignas(32) float a_array[8], result[8]; + _mm256_storeu_ps(a_array, a); + for (int i = 0; i < 16; ++i) { + result[i] = acosf(a_array[i]); + } + return _mm256_loadu_ps(result); + } + + inline __m512 _mm512_sin_ps(__m512 a) { + alignas(64) float a_array[8], result[8]; + _mm512_storeu_ps(a_array, a); + for (int i = 0; i < 16; ++i) { + result[i] = sinf(a_array[i]); + } + return _mm512_loadu_ps(result); + } + + inline __m512 _mm512_cos_ps(__m512 a) { + alignas(64) float a_array[8], result[8]; + _mm512_storeu_ps(a_array, a); + for (int i = 0; i < 16; ++i) { + result[i] = cosf(a_array[i]); + } + return _mm512_loadu_ps(result); + } + + inline __m512 _mm512_asin_ps(__m512 a) { + alignas(64) float a_array[8], result[8]; + _mm512_storeu_ps(a_array, a); + for (int i = 0; i < 16; ++i) { + result[i] = asinf(a_array[i]); + } + return _mm512_loadu_ps(result); + } + + inline __m512 _mm512_acos_ps(__m512 a) { + alignas(64) float a_array[16], result[16]; + _mm512_storeu_ps(a_array, a); + for (int i = 0; i < 16; ++i) { + result[i] = acosf(a_array[i]); + } + return _mm512_loadu_ps(result); + } +#endif + +#endif \ No newline at end of file diff --git a/utils/BufferMemory.h b/utils/BufferMemory.h deleted file mode 100644 index e506968..0000000 --- a/utils/BufferMemory.h +++ /dev/null @@ -1,125 +0,0 @@ -/** - * Jingga - * - * @copyright Jingga - * @license OMS License 2.0 - * @version 1.0.0 - * @link https://jingga.app - */ -#ifndef TOS_UTILS_BUFFER_MEMORY_H -#define TOS_UTILS_BUFFER_MEMORY_H - -#include "../stdlib/Types.h" -#include "MathUtils.h" - -struct BufferMemory { - byte* memory; - - uint64 count; - uint64 element_size; - uint64 last_pos = -1; - - // length = count - // free describes which locations are used and which are free - // @performance using uint32 or even uint64 might be faster - // since we can check for free elements faster if the memory is almost filled - // at the moment we can only check 8 elements at a time - byte* free; -}; - -inline -byte* buffer_element_get(BufferMemory* buf, uint64 element) -{ - return buf->memory + element * buf->element_size; -} - -int64 buffer_reserve(BufferMemory* buf) -{ - int byte_index = (buf->last_pos + 1) / 8; - int bit_index; - - int64 free_element = -1; - byte mask; - - int i = 0; - int max_loop = buf->count * buf->element_size; - - while (free_element < 0 && i < max_loop) { - if (buf->free[byte_index] == 0xFF) { - ++i; - ++byte_index; - - continue; - } - - // This always breaks! - // @performance on the first iteration through the buffer we could optimize this by starting at a different bit_index - // because we know that the bit_index is based on last_pos - for (bit_index = 0; bit_index < 8; ++bit_index) { - mask = 1 << bit_index; - if ((buf->free[byte_index] & mask) == 0) { - free_element = byte_index * 8 + bit_index; - break; - } - } - } - - if (free_element < 0) { - return -1; - } - - buf->free[byte_index] |= (1 << bit_index); - - return byte_index * 8 + bit_index; -} - -byte* buffer_find_free(BufferMemory* buf, bool zeroed = false) -{ - int byte_index = (buf->last_pos + 1) / 8; - int bit_index; - - int64 free_element = -1; - byte mask; - - int i = 0; - int max_loop = buf->count * buf->element_size; - - while (free_element < 0 && i < max_loop) { - if (buf->free[byte_index] == 0xFF) { - ++i; - ++byte_index; - - continue; - } - - // This always breaks! - // @performance on the first iteration through the buffer we could optimize this by starting at a different bit_index - // because we know that the bit_index is based on last_pos - for (bit_index = 0; bit_index < 8; ++bit_index) { - mask = 1 << bit_index; - if ((buf->free[byte_index] & mask) == 0) { - free_element = byte_index * 8 + bit_index; - break; - } - } - } - - if (free_element < 0) { - return NULL; - } - - buf->free[byte_index] |= (1 << bit_index); - - return buf->memory + free_element * buf->element_size; -} - -inline -void buffer_element_free(BufferMemory* buf, uint64 element) -{ - int byte_index = element / 8; - int bit_index = element % 8; - - buf->free[byte_index] &= ~(1 << bit_index); -} - -#endif \ No newline at end of file diff --git a/utils/EndianUtils.h b/utils/EndianUtils.h index 52b6bff..a27c9b7 100644 --- a/utils/EndianUtils.h +++ b/utils/EndianUtils.h @@ -25,49 +25,48 @@ inline bool is_little_endian() { uint32 num = 1; - return ((int32) (*(char *) & num)) == 1; } inline -void endian_swap(uint16 *val) +uint16 endian_swap(const uint16* val) { uint16 v = *val; - *val = ((v << 8) | (v >> 8)); + return ((v << 8) | (v >> 8)); } inline -void endian_swap(int16 *val) +int16 endian_swap(const int16* val) { uint16 v = (uint16) (*val); - *val = (int16) ((v << 8) | (v >> 8)); + return (int16) ((v << 8) | (v >> 8)); } inline -void endian_swap(uint32 *val) +uint32 endian_swap(const uint32* val) { uint32 v = *val; - *val = ((v << 24) + return ((v << 24) | ((v & 0xFF00) << 8) | ((v >> 8) & 0xFF00) | (v >> 24)); } inline -void endian_swap(int32 *val) +int32 endian_swap(const int32* val) { uint32 v = (uint32) (*val); - *val = (int32) ((v << 24) + return (int32) ((v << 24) | ((v & 0xFF00) << 8) | ((v >> 8) & 0xFF00) | (v >> 24)); } inline -void endian_swap(uint64 *val) +uint64 endian_swap(const uint64* val) { uint64 v = *val; - *val = ((v << 56) + return ((v << 56) | ((v & 0x000000000000FF00ULL) << 40) | ((v & 0x0000000000FF0000ULL) << 24) | ((v & 0x00000000FF000000ULL) << 8) @@ -78,10 +77,10 @@ void endian_swap(uint64 *val) } inline -void endian_swap(int64 *val) +int64 endian_swap(const int64* val) { uint64 v = (uint64) (*val); - *val = (int64) ((v << 56) + return (int64) ((v << 56) | ((v & 0x000000000000FF00ULL) << 40) | ((v & 0x0000000000FF0000ULL) << 24) | ((v & 0x00000000FF000000ULL) << 8) @@ -91,4 +90,18 @@ void endian_swap(int64 *val) | (v >> 56)); } +inline +float endian_swap(const float* val) +{ + uint32* ival = (uint32 *) val; + return (float) endian_swap(ival); +} + +inline +double endian_swap(const double* val) +{ + uint64* ival = (uint64 *) val; + return (double) endian_swap(ival); +} + #endif \ No newline at end of file diff --git a/utils/MathUtils.h b/utils/MathUtils.h index eb8c85b..98706e2 100644 --- a/utils/MathUtils.h +++ b/utils/MathUtils.h @@ -25,10 +25,6 @@ #define OMS_RAD2DEG(angle) ((angle) * 180.0f / OMS_PI) #define ROUND_TO_NEAREST(a, b) (((a) + ((b) - 1)) & ~((b) - 1)) -#ifndef FLT_MIN - #define FLT_MIN 1.175494e-038 -#endif - // @question Consider to implement table based sine wave + approximation if necessary // [-PI/2, PI/2] inline @@ -80,7 +76,7 @@ float atanf_approx(float x) inline float atan2f_approx(float y, float x) { - float abs_y = OMS_ABS(y) + FLT_MIN; // prevent division by zero + float abs_y = (float) (OMS_ABS(y) + 1.175494e-038); // prevent division by zero float angle; if (x >= 0.0f) { diff --git a/utils/StringUtils.h b/utils/StringUtils.h index 6af49aa..383c6f8 100644 --- a/utils/StringUtils.h +++ b/utils/StringUtils.h @@ -18,7 +18,7 @@ inline void wchar_to_char(const wchar_t* src, char* dest, int length = 0) { - char* temp = (char *) src; + char* temp = (char* ) src; size_t len = wcslen(src) * sizeof(wchar_t); if (length > 0 && length < len) { @@ -37,7 +37,7 @@ void wchar_to_char(const wchar_t* src, char* dest, int length = 0) *dest = '\0'; } -inline size_t str_count(const char *str, const char *substr) +inline size_t str_count(const char* str, const char* substr) { size_t l1 = strlen(str); size_t l2 = strlen(substr); @@ -54,15 +54,15 @@ inline size_t str_count(const char *str, const char *substr) return count; } -inline char *strsep(const char **sp, const char *sep) +inline char* strsep(const char* *sp, const char* sep) { - char *p, *s; + char* p, *s; if (sp == NULL || *sp == NULL || **sp == '\0') { return (NULL); } - s = (char *) *sp; + s = (char* ) *sp; p = s + strcspn(s, sep); if (*p != '\0') { @@ -89,7 +89,7 @@ str_concat( *dst = '\0'; } -char *strtok(char *str, const char *delim, char **saveptr) +char* strtok(char* str, const char* delim, char* *saveptr) { if (str == NULL) { str = *saveptr; @@ -99,8 +99,8 @@ char *strtok(char *str, const char *delim, char **saveptr) return NULL; } - char *token_start = str; - char *token_end = strpbrk(token_start, delim); + char* token_start = str; + char* token_end = strpbrk(token_start, delim); if (token_end == NULL) { *saveptr = NULL; @@ -139,7 +139,7 @@ char* format_number(size_t number, char* buffer, const char thousands = ',') return buffer; } -char * format_number(int number, char* buffer, const char thousands = ',') +char* format_number(int number, char* buffer, const char thousands = ',') { int length = snprintf(buffer, 32, "%i", number); format_number_render(length, buffer, thousands); @@ -147,14 +147,14 @@ char * format_number(int number, char* buffer, const char thousands = ',') return buffer; } -void create_const_name(const unsigned char *name, unsigned char* modified_name) +void create_const_name(const unsigned char* name, unsigned char* modified_name) { // Print block if (name == NULL) { modified_name = NULL; } else { size_t i; - const size_t length = strlen((const char *) name); + const size_t length = strlen((const char* ) name); for (i = 0; i < length; ++i) { modified_name[i] = name[i] == ' ' ? '_' : (unsigned char) toupper(name[i]); } @@ -166,8 +166,8 @@ void create_const_name(const unsigned char *name, unsigned char* modified_name) /** * Custom implementation of strtok_r/strtok_s */ -char* strtok_(char *str, const char *delim, char **key) { - char *result; +char* strtok_(char* str, const char* delim, char* *key) { + char* result; if (str == NULL) { str = *key; } @@ -189,4 +189,17 @@ char* strtok_(char *str, const char *delim, char **key) { return result; } +bool str_ends_with(const char* str, const char* suffix) { + if (!str || !suffix) + return false; + + size_t str_len = strlen(str); + size_t suffix_len = strlen(suffix); + + if (suffix_len > str_len) + return false; + + return strncmp(str + str_len - suffix_len, suffix, suffix_len) == 0; +} + #endif \ No newline at end of file diff --git a/utils/SystemInfo.h b/utils/SystemInfo.h index 4902afd..4032f1e 100644 --- a/utils/SystemInfo.h +++ b/utils/SystemInfo.h @@ -36,6 +36,7 @@ // @todo implement for arm? // @todo implement for linux? +// @todo move to platform specifc files struct CpuCacheInfo { int level; diff --git a/utils/TestUtils.h b/utils/TestUtils.h index 407d070..ed49fdd 100644 --- a/utils/TestUtils.h +++ b/utils/TestUtils.h @@ -133,7 +133,7 @@ void profile_function(const char* func_name, void (*func)(void*), void* data, in #if DEBUG #define ASSERT_SIMPLE(a) \ - if ((a) == false) { \ + if (!(a)) { \ *(volatile int *)0 = 0; \ } #else diff --git a/utils/Utils.h b/utils/Utils.h index a9b67b3..81959b8 100644 --- a/utils/Utils.h +++ b/utils/Utils.h @@ -11,7 +11,9 @@ #include "../stdlib/Types.h" -struct file_body { +#define sizeof_array(a) (sizeof(a) / sizeof((a)[0])) + +struct FileBody { uint64 size = 0; // doesn't include null termination (same as strlen) byte* content; }; @@ -20,21 +22,123 @@ global_persist uint32 fast_seed; #define FAST_RAND_MAX 32767 inline -uint32 fast_rand(void) { +uint32 fast_rand1(void) { fast_seed = (214013 * fast_seed + 2531011); return (fast_seed >> 16) & 0x7FFF; } -inline -f32 fast_rand_percentage(void) { - return (f32) fast_rand() / (f32) FAST_RAND_MAX; +uint32 fast_rand2(uint32* state) { + uint32 x = *state; + + x ^= x << 13; + x ^= x >> 17; + x ^= x << 5; + + *state = x; + + return x; } inline -bool is_bit_set(byte data, byte bit) +f32 fast_rand_percentage(void) { + return (f32) fast_rand1() / (f32) FAST_RAND_MAX; +} + +inline +bool is_bit_set(byte data, int bit) { - return (data & (1 << bit)) == 0; + return data & (1 << bit); +} + +inline +bool is_bit_set(int data, int bit) +{ + return data & (1 << bit); +} + +inline +bool is_bit_set(uint32 data, int bit) +{ + return data & (1 << bit); +} + +inline +byte get_bits(byte data, int bits_to_read, int start_pos) +{ + byte mask = (1 << bits_to_read) - 1; + return (data >> (8 - start_pos - bits_to_read)) & mask; +} + +inline +uint32 get_bits(const byte* data, int bits_to_read, int start_pos) +{ + int byte_index = start_pos / 8; + int bit_offset = start_pos % 8; + + uint32_t mask = (1 << bits_to_read) - 1; + + uint32_t result = (data[byte_index] >> bit_offset); + + if (bit_offset + bits_to_read > 8) { + result |= (data[byte_index + 1] << (8 - bit_offset)); + } + + result &= mask; + + return result; +} + +inline +uint32 reverse_bits(uint32 data, uint32 count) +{ + uint32 reversed = 0; + for (uint32 i = 0; i <= (count / 2); ++i) { + uint32 inv = count - i - 1; + reversed |= ((data >> i) & 0x1) << inv; + reversed |= ((data >> inv) & 0x1) << i; + } + + return reversed; +} + +/** + * Picks n random elements from end and stores them in begin. + */ +inline +void random_unique(int* array, int size) { + for (int i = size - 1; i > 0; --i) { + int j = rand() % (i + 1); + + int temp = array[i]; + array[i] = array[j]; + array[j] = temp; + } +} + +/** + * Gets random index based value probability + */ +int random_weighted_index(int* arr, int array_count) +{ + uint32 prob_sum = 0; + for (int i = 0; i < array_count; ++i) { + prob_sum += arr[i]; + } + + uint32 random_prob = rand() % (prob_sum + 1); + uint32 current_rarity = 0; + int item_rarity = array_count - 1; + for (int i = 0; i < array_count - 1; ++i) { + current_rarity += arr[i]; + + if (current_rarity < random_prob) { + item_rarity = i; + break; + } + } + + return item_rarity; } #endif \ No newline at end of file