update

2026-03-07 10:08:41 +00:00 · 2024-08-16 04:07:09 +02:00 · 2024-08-16 04:07:09 +02:00 · 7f1a35d61a
commit 7f1a35d61a
parent c4f203ac11
73 changed files with 3306 additions and 1820 deletions
--- a/asset/Asset.h
+++ b/asset/Asset.h
@ -0,0 +1,60 @@
+/**
+ * Jingga
+ *
+ * @copyright Jingga
+ * @license   OMS License 2.0
+ * @version   1.0.0
+ * @link      https://jingga.app
+ */
+#ifndef TOS_ASSET_H
+#define TOS_ASSET_H
+
+#include "../stdlib/Types.h"
+
+struct Asset {
+    // A id of 0 means the entity is no longer alive
+    // The id is the same as its location in memory/in the ecs array
+    // This is is only an internal id and NOT the same as a db id (e.g. player id)
+    uint32 internal_id;
+    uint32 type;
+
+    // Could be 0 if there is no official id
+    uint32 official_id;
+
+    uint32 vao; // vertex buffer
+    uint32 vbo; // index buffer
+    uint32 ebo; // input layout
+
+    // Counts the references to this entity
+    // e.g. textures
+    int reference_count;
+
+    // Describes how much ram/vram the asset uses
+    // E.g. vram_size = 0 but ram_size > 0 means that it never uses any gpu memory
+    uint32 ram_size;
+    uint32 vram_size;
+
+    // Usually 1 but in some cases an ECS may hold entities of variable chunk length
+    // For textures for example a 128x128 is of size 1 but 256x256 is of size 4
+    uint32 size;
+
+    // Describes if the memory is currently available in ram/vram
+    // E.g. a entity might be uploaded to the gpu and no longer held in ram (or the other way around)
+    bool is_ram;
+    bool is_vram;
+
+    Asset* next;
+    Asset* prev;
+
+    // A entity can reference up to N other entities
+    // This allows us to quickly update the other entities
+    // Example: A player pulls N mobs
+    // @bug This means there are hard limits on how many mobs can be pulled by a player
+    Asset* entity_references[50];
+    bool free_entity_references[50];
+
+    // Actual memory address
+    byte* self;
+};
+
+#endif
--- a/asset/AssetManagementSystem.h
+++ b/asset/AssetManagementSystem.h
@ -0,0 +1,108 @@
+/**
+ * Jingga
+ *
+ * @copyright Jingga
+ * @license   OMS License 2.0
+ * @version   1.0.0
+ * @link      https://jingga.app
+ */
+#ifndef TOS_ASSET_MANAGEMENT_SYSTEM_H
+#define TOS_ASSET_MANAGEMENT_SYSTEM_H
+
+#include <string.h>
+#include "../stdlib/Types.h"
+#include "Asset.h"
+#include "AssetType.h"
+#include "../memory/ChunkMemory.h"
+#include "../utils/TestUtils.h"
+
+// The major asset types should have their own asset component system
+// All other entities are grouped together in one asset component system
+// @question Asset component systems could be created per region -> easy to simulate a specific region
+// @bug This means players might not be able to transition from one area to another?!
+
+struct AssetManagementSystem {
+    // The indices of asset_memory and asset_data_memory are always linked
+
+    // General asset memory
+    ChunkMemory asset_memory;
+
+    // Actual asset data
+    ChunkMemory asset_data_memory;
+
+    Asset* first;
+    Asset* last;
+};
+
+int ams_get_vram_usage(AssetManagementSystem* ams)
+{
+    uint64 size = 0;
+    for (int i = 0; i < ams->asset_memory.count; ++i) {
+        size += ((Asset *) (ams->asset_memory.memory))[i].vram_size;
+    }
+
+    return size;
+}
+
+void asset_delete(AssetManagementSystem* ams, Asset* asset)
+{
+    asset->prev->next = asset->next;
+    asset->next->prev = asset->prev;
+
+    for (int i = 0; i < asset->size; ++i) {
+        chunk_element_free(&ams->asset_memory, asset->internal_id + i);
+        chunk_element_free(&ams->asset_data_memory, asset->internal_id + i);
+    }
+}
+
+// @todo implement defragment command to optimize memory layout since the memory layout will become fragmented over time
+
+Asset* asset_reserve(AssetManagementSystem* ams, uint64 elements = 1)
+{
+    int64 free_asset = chunk_reserve(&ams->asset_memory, elements, true);
+    ASSERT_SIMPLE(free_asset >= 0);
+
+    chunk_reserve_index(&ams->asset_data_memory, free_asset, elements, true);
+
+    Asset* asset = (Asset *) chunk_get_memory(&ams->asset_memory, free_asset);
+    asset->internal_id = free_asset;
+    asset->self = chunk_get_memory(&ams->asset_data_memory, free_asset);
+    asset->ram_size = ams->asset_memory.chunk_size * elements;
+
+    // @performance Do we really want a double linked list. Are we really using this feature or is the free_index enough?
+    if (free_asset > 0 && free_asset < ams->asset_memory.count - 1) {
+        Asset* next = ams->first;
+        while (next->next->internal_id < asset->internal_id && next->internal_id < ams->asset_memory.count) {
+            next = next->next;
+        }
+
+        asset->prev = next;
+        asset->next = asset->prev->next;
+
+        if (asset->next) {
+            asset->next->prev = asset;
+        } else {
+            ams->last = asset;
+        }
+
+        asset->prev->next = asset;
+    } else if (free_asset == 0) {
+        asset->next = ams->first;
+
+        if (ams->first) {
+            ams->first->prev = asset;
+        }
+
+        ams->first = asset;
+    } else if (free_asset == ams->asset_memory.count - 1) {
+        asset->prev = ams->last;
+
+        // WARNING: no if here because we assume there is no ECS with just a size of 1
+        ams->last->next = asset;
+        ams->last = asset;
+    }
+
+    return asset;
+}
+
+#endif
--- a/asset/AssetType.h
+++ b/asset/AssetType.h
@ -0,0 +1,20 @@
+/**
+ * Jingga
+ *
+ * @copyright Jingga
+ * @license   OMS License 2.0
+ * @version   1.0.0
+ * @link      https://jingga.app
+ */
+#ifndef TOS_ASSET_TYPES_H
+#define TOS_ASSET_TYPES_H
+
+enum AssetType {
+    ASSET_TYPE_OBJ,
+    ASSET_TYPE_TEXTURE,
+    ASSET_TYPE_AUDIO,
+    ASSET_TYPE_ANIM,
+    ASSET_TYPE_SIZE
+};
+
+#endif
--- a/audio/Audio.h
+++ b/audio/Audio.h
@ -0,0 +1,25 @@
+/**
+ * Jingga
+ *
+ * @copyright Jingga
+ * @license   OMS License 2.0
+ * @version   1.0.0
+ * @link      https://jingga.app
+ */
+#ifndef TOS_AUDIO_H
+#define TOS_AUDIO_H
+
+#include "../stdlib/Types.h"
+
+struct Audio {
+    uint32 sample_rate; // bits_per_sample
+    uint32 sample_size; // byte_per_bloc
+    uint32 frequency;
+    uint32 channels;
+    uint32 bloc_size;
+    uint32 byte_per_sec;
+    uint32 size;
+    byte* data; // owner of data
+};
+
+#endif
--- a/audio/Wav.h
+++ b/audio/Wav.h
@ -0,0 +1,153 @@
+/**
+ * Jingga
+ *
+ * @copyright Jingga
+ * @license   OMS License 2.0
+ * @version   1.0.0
+ * @link      https://jingga.app
+ */
+#ifndef TOS_AUDIO_WAV_H
+#define TOS_AUDIO_WAV_H
+
+#include <string.h>
+#include "../stdlib/Types.h"
+#include "../utils/Utils.h"
+#include "../utils/EndianUtils.h"
+#include "Audio.h"
+
+// See: https://en.wikipedia.org/wiki/WAV
+// IMPORTANT: Remember that we are not using packing for the headers
+//      Because of that the struct size is different from the actual header size in the file
+//      This means we have to manually asign the data to the headers
+
+// Packed header size
+#define WAV_HEADER_SIZE 44
+struct WavHeader {
+    // RIFF header
+    byte file_type_bloc_id[4];
+    uint32 size;
+    byte file_format_id[4];
+
+    // Data format header
+    byte format_bloc_id[4];
+    uint32 bloc_size;
+    uint16 audio_format;
+    uint16 nbr_channels;
+    uint32 frequency;
+    uint32 byte_per_sec;
+    uint16 byte_per_bloc;
+    uint16 bits_per_sample;
+
+    // Sample data header
+    byte data_bloc_id[4];
+    uint32 data_size;
+};
+
+struct Wav {
+    WavHeader header;
+
+    byte* sample_data; // WARNING: This is not the owner of the data. The owner is the FileBody
+
+    uint32 size;
+    byte* data; // WARNING: This is not the owner of the data. The owner is the FileBody
+};
+
+void generate_default_wav_references(const FileBody* file, Wav* wav)
+{
+    wav->size = (uint32) file->size;
+    wav->data = file->content;
+
+    if (wav->size < WAV_HEADER_SIZE) {
+        // This shouldn't happen
+        return;
+    }
+
+    // Check if we can copy memory directly
+    // The struct layout and header size should match on x86, but we still check it
+    if (sizeof(WavHeader) == WAV_HEADER_SIZE) {
+        memcpy(&wav->header, file->content, WAV_HEADER_SIZE);
+
+        // swap endian if we are on big endian system
+        // @question Maybe this needs to be a runtime check?
+        #if !_WIN32 && !__LITTLE_ENDIAN
+            wav->header.size = SWAP_ENDIAN_LITTLE(wav->header.size);
+            wav->header.bloc_size = SWAP_ENDIAN_LITTLE(wav->header.bloc_size);
+            wav->header.audio_format = SWAP_ENDIAN_LITTLE(wav->header.audio_format);
+            wav->header.nbr_channels = SWAP_ENDIAN_LITTLE(wav->header.nbr_channels);
+            wav->header.frequency = SWAP_ENDIAN_LITTLE(wav->header.frequency);
+            wav->header.byte_per_sec = SWAP_ENDIAN_LITTLE(wav->header.byte_per_sec);
+            wav->header.byte_per_bloc = SWAP_ENDIAN_LITTLE(wav->header.byte_per_bloc);
+            wav->header.bits_per_sample = SWAP_ENDIAN_LITTLE(wav->header.bits_per_sample);
+            wav->header.data_size = SWAP_ENDIAN_LITTLE(wav->header.data_size);
+        #endif
+    } else {
+        // RIFF header
+        wav->header.file_type_bloc_id[0] = *(wav->data + 0);
+        wav->header.file_type_bloc_id[1] = *(wav->data + 1);
+        wav->header.file_type_bloc_id[2] = *(wav->data + 2);
+        wav->header.file_type_bloc_id[3] = *(wav->data + 3);
+        // should be (0x52, 0x49, 0x46, 0x46)
+
+        wav->header.size = *(wav->data + 4);
+        SWAP_ENDIAN_LITTLE(&wav->header.size);
+        // should be file size - 8 bytes
+
+        wav->header.file_format_id[0] = *(wav->data + 8);
+        wav->header.file_format_id[1] = *(wav->data + 9);
+        wav->header.file_format_id[2] = *(wav->data + 10);
+        wav->header.file_format_id[3] = *(wav->data + 11);
+        // should be (0x57, 0x41, 0x56, 0x45)
+
+        // Data format header
+        wav->header.format_bloc_id[0] = *(wav->data + 12);
+        wav->header.format_bloc_id[1] = *(wav->data + 13);
+        wav->header.format_bloc_id[2] = *(wav->data + 14);
+        wav->header.format_bloc_id[3] = *(wav->data + 15);
+        // should be (0x66, 0x6D, 0x74, 0x20)
+
+        wav->header.bloc_size = SWAP_ENDIAN_LITTLE(*((uint32 *) (wav->data + 16)));
+        // should be 16
+
+        wav->header.audio_format = SWAP_ENDIAN_LITTLE(*((uint16 *) (wav->data + 20)));
+        wav->header.nbr_channels = SWAP_ENDIAN_LITTLE(*((uint16 *) (wav->data + 22)));
+        wav->header.frequency = SWAP_ENDIAN_LITTLE(*((uint32 *) (wav->data + 24)));
+
+        wav->header.byte_per_sec = SWAP_ENDIAN_LITTLE(*((uint32 *) (wav->data + 28)));
+        // should be frequency * byte_per_bloc
+
+        wav->header.byte_per_bloc = SWAP_ENDIAN_LITTLE(*((uint16 *) (wav->data + 32)));
+        // should be nbr channels * bits_per_sample / 8
+
+        wav->header.bits_per_sample = SWAP_ENDIAN_LITTLE(*((uint16 *) (wav->data + 34)));
+
+        // Sample data header
+        wav->header.data_bloc_id[0] = *(wav->data + 36);
+        wav->header.data_bloc_id[1] = *(wav->data + 37);
+        wav->header.data_bloc_id[2] = *(wav->data + 38);
+        wav->header.data_bloc_id[3] = *(wav->data + 39);
+
+        wav->header.data_size = SWAP_ENDIAN_LITTLE(*((uint32 *) *(wav->data + 40)));
+    }
+
+     wav->sample_data = wav->data + WAV_HEADER_SIZE;
+}
+
+void generate_wav_image(const FileBody* src_data, Audio* audio)
+{
+    // @performance We are generating the struct and then filling the data.
+    //      There is some asignment/copy overhead
+    Wav src = {};
+    generate_default_wav_references(src_data, &src);
+
+    audio->sample_rate = src.header.bits_per_sample;
+    audio->sample_size = src.header.byte_per_bloc;
+    audio->frequency = src.header.frequency;
+    audio->channels = src.header.nbr_channels;
+    audio->byte_per_sec = src.header.byte_per_sec;
+    audio->bloc_size = src.header.bloc_size;
+    audio->size = src.size - WAV_HEADER_SIZE;
+
+    memcpy((void *) audio->data, src.sample_data, audio->size);
+}
+
+#endif
--- a/gpuapi/RenderUtils.h
+++ b/gpuapi/RenderUtils.h
@ -156,11 +156,13 @@ void entity_clip_space_from_local_sse(float* clip_space, const float* local_spac
    mat4vec4_mult_sse(mat, local_space, clip_space);
 }

+/*
 inline
 void entity_screen_space(float* screen_space, const float* clip_space, const float* viewport_mat)
 {
    // @todo implement
 }
+*/

 inline
 void entity_world_space_sse(float* world_space, const float* local_space, const float* model_mat)
@ -180,11 +182,13 @@ void entity_clip_space_sse(float* clip_space, const float* view_space, const flo
    mat4vec4_mult_sse(projection_mat, view_space, clip_space);
 }

+/*
 inline
 void entity_screen_space_sse(float* screen_space, const float* clip_space, const float* viewport_mat)
 {
    // @todo implement
 }
+*/

 inline
 void entity_world_space_sse(__m128* world_space, const __m128* local_space, const __m128* model_mat)
@ -204,10 +208,12 @@ void entity_clip_space_sse(__m128* clip_space, const __m128* view_space, const _
    mat4vec4_mult_sse(projection_mat, view_space, clip_space);
 }

+/*
 inline
 void entity_screen_space_sse(__m128* screen_space, const __m128* clip_space, const __m128* viewport_mat)
 {
    // @todo implement
 }
+*/

 #endif
--- a/gpuapi/opengl/ShaderUtils.h
+++ b/gpuapi/opengl/ShaderUtils.h
--- a/gpuapi/opengl/UtilsOpengl.h
+++ b/gpuapi/opengl/UtilsOpengl.h
@ -10,7 +10,8 @@
 #define TOS_GPUAPI_OPENGL_UTILS_H

 #include "../../stdlib/Types.h"
-#include "../../utils/RingMemory.h"
+#include "../../memory/RingMemory.h"
+#include "../../utils/TestUtils.h"
 #include "../../models/Attrib.h"
 #include "../../models/Texture.h"

@ -55,6 +56,8 @@ void window_create(Window* window, void*)
        NULL
    );

+    ASSERT_SIMPLE(window->hwnd_lib);
+
    //glfwSetInputMode(window->hwnd_lib, GLFW_CURSOR, GLFW_CURSOR_DISABLED);

    glfwMakeContextCurrent(window->hwnd_lib);
@ -126,24 +129,26 @@ void prepare_texture(TextureFile* texture, uint32 texture_unit)
 }

 inline
-void load_texture_to_gpu(const TextureFile* texture)
+void load_texture_to_gpu(const TextureFile* texture, int mipmap_level = 0)
 {
    uint32 texture_data_type = get_texture_data_type(texture->texture_data_type);
    glTexImage2D(
-        texture_data_type, 0, GL_RGBA,
+        texture_data_type, mipmap_level, GL_RGBA,
        texture->image.width, texture->image.height,
        0, GL_RGBA, GL_UNSIGNED_BYTE,
        texture->image.pixels
    );

-    // @question use mipmap?
+    if (mipmap_level > -1) {
+        glGenerateMipmap(GL_TEXTURE_2D);
+    }
 }

 inline
 void texture_use(const TextureFile* texture, uint32 texture_unit)
 {
    glActiveTexture(GL_TEXTURE0 + texture_unit);
-    glBindTexture(GL_TEXTURE_2D, texture->id);
+    glBindTexture(GL_TEXTURE_2D, (GLuint) texture->id);
 }

 GLuint make_shader(GLenum type, const char *source, RingMemory* ring)
@ -173,7 +178,7 @@ GLuint load_shader(GLenum type, const char *path, RingMemory* ring) {
    uint64 temp = ring->pos;

    // @bug potential bug for shaders > 4 mb
-    file_body file;
+    FileBody file;
    file.content = ring_get_memory(ring, MEGABYTE * 4);

    // @todo consider to accept file as parameter and load file before
@ -357,6 +362,21 @@ void gpuapi_buffer_delete(GLuint buffer)
    glDeleteBuffers(1, &buffer);
 }

+int get_gpu_free_memory()
+{
+    GLint available = 0;
+    glGetIntegerv(GL_GPU_MEMORY_INFO_CURRENT_AVAILABLE_VIDMEM_NVX, &available);
+
+    if (available != 0) {
+        return available;
+    }
+
+    glGetIntegerv(GL_TEXTURE_FREE_MEMORY_ATI, &available);
+
+    return available;
+}
+
+/*
 void render_9_patch(GLuint texture,
    int imgWidth, int imgHeight,
    int img_x1, int img_x2,
@ -367,5 +387,6 @@ void render_9_patch(GLuint texture,
 {

 }
+*/

 #endif
--- a/image/Bitmap.h
+++ b/image/Bitmap.h
@ -169,17 +169,22 @@ struct Bitmap {
    //      2. rows are padded in multiples of 4 bytes
    //      3. rows start from the bottom (unless the height is negative)
    //      4. pixel data is stored in ABGR (graphics libraries usually need BGRA or RGBA)
-    byte* pixels;
+    byte* pixels; // WARNING: This is not the owner of the data. The owner is the FileBody

    uint32 size;
-    byte* data;
+    byte* data; // WARNING: This is not the owner of the data. The owner is the FileBody
 };

-void generate_default_bitmap_references(const file_body* file, Bitmap* bitmap)
+void generate_default_bitmap_references(const FileBody* file, Bitmap* bitmap)
 {
-    bitmap->size = file->size;
+    bitmap->size = (uint32) file->size;
    bitmap->data = file->content;

+    if (bitmap->size < BITMAP_HEADER_SIZE) {
+        // This shouldn't happen
+        return;
+    }
+
    // Fill header
    bitmap->header.identifier[0] = *(file->content + 0);
    bitmap->header.identifier[1] = *(file->content + 1);
@ -241,8 +246,10 @@ void generate_default_bitmap_references(const file_body* file, Bitmap* bitmap)
    bitmap->pixels      = (byte *) (file->content + bitmap->header.offset);
 }

-void generate_bmp_image(const file_body* src_data, Image* image)
+void image_bmp_generate(const FileBody* src_data, Image* image)
 {
+    // @performance We are generating the struct and then filling the data.
+    //      There is some asignment/copy overhead
    Bitmap src = {};
    generate_default_bitmap_references(src_data, &src);

@ -254,7 +261,7 @@ void generate_bmp_image(const file_body* src_data, Image* image)
    uint32 width = ROUND_TO_NEAREST(src.dib_header.width, 4);

    uint32 pixel_bytes = src.dib_header.bits_per_pixel / 8;
-    if (image->order_pixels = IMAGE_PIXEL_ORDER_BGRA) {
+    if (image->order_pixels == IMAGE_PIXEL_ORDER_BGRA) {
        memcpy((void *) image->pixels, src.pixels, image->length * pixel_bytes);

        return;
--- a/image/Image.cpp
+++ b/image/Image.cpp
@ -0,0 +1,44 @@
+/**
+ * Jingga
+ *
+ * @copyright Jingga
+ * @license   OMS License 2.0
+ * @version   1.0.0
+ * @link      https://jingga.app
+ */
+#ifndef TOS_IMAGE_C
+#define TOS_IMAGE_C
+
+#include "../utils/StringUtils.h"
+#include "Image.h"
+#include "Tga.h"
+#include "Bitmap.h"
+#include "Png.h"
+#include "../memory/RingMemory.h"
+
+#if _WIN32
+    #include "../platform/win32/UtilsWin32.h"
+#else
+    #include "../platform/linux/UtilsLinux.h"
+#endif
+
+void image_from_file(RingMemory* ring, const char* path, Image* image)
+{
+    char full_path[MAX_PATH];
+    if (*path == '.') {
+        relative_to_absolute(path, full_path);
+    }
+
+    FileBody file;
+    file_read(full_path, &file, ring);
+
+    if (str_ends_with(path, ".png")) {
+        image_png_generate(&file, image);
+    } else if (str_ends_with(path, ".tga")) {
+        image_tga_generate(&file, image);
+    } else if (str_ends_with(path, ".bmp")) {
+        image_bmp_generate(&file, image);
+    }
+}
+
+#endif
--- a/image/Image.h
+++ b/image/Image.h
@ -26,7 +26,7 @@ struct Image {
    byte order_pixels; // RGBA vs BGRA
    byte order_rows; // top-to-bottom vs bottom-to-top

-    uint32* pixels;
+    uint32* pixels; // owner of data
 };

 #endif
--- a/image/Png.h
+++ b/image/Png.h
@ -0,0 +1,469 @@
+/**
+ * Jingga
+ *
+ * @copyright Jingga
+ * @license   OMS License 2.0
+ * @version   1.0.0
+ * @link      https://jingga.app
+ *
+ * png: https://www.w3.org/TR/2003/REC-PNG-20031110/
+ * zlib: https://www.ietf.org/rfc/rfc1950.txt
+ * deflate: https://www.ietf.org/rfc/rfc1951.txt
+ */
+#ifndef TOS_IMAGE_PNG_H
+#define TOS_IMAGE_PNG_H
+
+#include <string.h>
+#include "../stdlib/Types.h"
+#include "../utils/Utils.h"
+#include "../utils/EndianUtils.h"
+#include "Image.h"
+
+// Packed header size
+#define PNG_HEADER_SIZE 8
+
+struct PngHeader {
+    byte signature[8];
+};
+
+struct PngChunk {
+    uint32 length;
+    uint32 type;
+    uint32 crc;
+};
+
+struct PngIHDR {
+    uint32 length;
+    uint32 type;
+    uint32 width;
+    uint32 height;
+    byte bit_depth;
+    byte colory_type;
+    byte compression;
+    byte filter;
+    byte interlace;
+    uint32 crc;
+};
+
+struct PngIDATHeader {
+    byte zlib_method_flag;
+    byte add_flag;
+};
+
+struct Png {
+    PngHeader header;
+    PngIHDR ihdr;
+
+    // Encoded pixel data
+    byte* pixels; // WARNING: This is not the owner of the data. The owner is the FileBody
+
+    uint32 size;
+    byte* data; // WARNING: This is not the owner of the data. The owner is the FileBody
+};
+
+struct PngHuffmanEntry {
+    uint16 symbol;
+    uint16 bits_used;
+};
+
+struct PngHuffman {
+    uint32 max_code_length; // in bits
+    uint32 count;
+    PngHuffmanEntry entries[32768]; // 2^15
+};
+
+static const byte PNG_SIGNATURE[] = {0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A};
+static const uint32 HUFFMAN_BIT_COUNTS[][2] = {{143, 8}, {255, 9}, {279, 7}, {287, 8}, {319, 5}};
+static const uint32 HUFFMAN_CODE_LENGTH_ALPHA[] = {
+    16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15
+};
+static const PngHuffmanEntry PNG_LENGTH_EXTRA[] = {
+    {3, 0}, {4, 0}, {5, 0}, {6, 0}, {7, 0}, {8, 0}, {9, 0}, {10, 0}, {11, 1},
+    {13, 1}, {15, 1}, {17, 1}, {19, 2}, {23, 2}, {27, 2}, {31, 2}, {35, 3},
+    {43, 3}, {51, 3}, {59, 3}, {67, 4}, {83, 4}, {99, 4}, {115, 4}, {131, 5},
+    {163, 5}, {195, 5}, {227, 5}, {258, 0}
+};
+
+static const PngHuffmanEntry PNG_DIST_EXTRA[] = {
+    {1, 0}, {2, 0}, {3, 0}, {4, 0}, {5, 1}, {7, 1}, {9, 2}, {13, 2}, {17, 3},
+    {25, 3}, {33, 4}, {49, 4}, {65, 5}, {97, 5}, {129, 6}, {193, 6}, {257, 7},
+    {385, 7}, {513, 8}, {769, 8}, {1025, 9}, {1537, 9}, {2049, 10}, {3073, 10},
+    {4097, 11}, {6145, 11}, {8193, 12}, {12289, 12}, {16385, 13}, {24577, 13}
+};
+
+void huffman_png_compute(uint32 symbol_count, uint32* symbol_code_length, PngHuffman* huff)
+{
+    uint32 code_length_hist[16] = {};
+    for (uint32 i = 0; i < symbol_count; ++i) {
+        ++code_length_hist[symbol_code_length[i]];
+    }
+
+    uint32 next_unused_code[16];
+    next_unused_code[0] = 0;
+    code_length_hist[0] = 0;
+
+    for (uint32 i = 1; i < 16; ++i) {
+        next_unused_code[i] = (next_unused_code[i - 1] + code_length_hist[i - 1]) << 1;
+    }
+
+    for (uint32 i = 0; i < symbol_count; ++i) {
+        uint32 code_length = symbol_code_length[i];
+        if (!code_length) {
+            continue;
+        }
+
+        uint32 code = next_unused_code[code_length]++;
+        uint32 bits = huff->max_code_length - code_length;
+        uint32 entries = 1 << bits;
+
+        for (uint32 j = 0; j < entries; ++j) {
+            uint32 base_index = (code << bits) | j;
+            uint32 index = reverse_bits(base_index, huff->max_code_length);
+
+            PngHuffmanEntry* entry = huff->entries + index;
+
+            entry->bits_used = (uint16) code_length;
+            entry->symbol = (uint16) i;
+        }
+    }
+}
+
+PngHuffmanEntry huffman_png_decode(PngHuffman* huff, const byte* data, int pos)
+{
+    uint32 index = get_bits(data, huff->max_code_length, pos);
+    return huff->entries[index];
+}
+
+void png_filter_reconstruct(uint32 width, uint32 height, const byte* decompressed, byte* finalized, int steps)
+{
+    uint32 zero = 0;
+    byte* prev_row = NULL;
+    byte prev_row_advance = 0;
+
+    for (uint32 y = 0; y < height; ++y) {
+        byte filter = *decompressed;
+        byte* current_row = ;
+
+        switch (filter) {
+            case 0: {
+                    memcpy(finalized + y * width, decompressed + y * width, width);
+                } break;
+            case 1: {
+                    // no simd possible, well 4 + 4 probably not worth it
+
+                } break;
+            case 2: {
+                    // requires manual simd impl. since prev_row_advance can be 0 or 4
+                } break;
+            case 3: {
+                    // no simd possible, well 4 + 4 probably not worth it
+                } break;
+            case 4: {
+                    // no simd possible, well 4 + 4 probably not worth it
+                } break;
+            default: {
+
+            }
+        }
+
+        prev_row = current_row;
+        prev_row_advance = 4;
+    }
+}
+
+void generate_default_png_references(const FileBody* file, Png* png)
+{
+    png->size = (uint32) file->size;
+    png->data = file->content;
+
+    if (png->size < 33) {
+        // This shouldn't happen
+        return;
+    }
+
+    // The first chunk MUST be IHDR -> we handle it here
+    memcpy(png, file->content, 29);
+    png->ihdr.crc = SWAP_ENDIAN_BIG((uint32 *) (file->content + 30));
+
+    png->ihdr.length = SWAP_ENDIAN_BIG(&png->ihdr.length);
+    png->ihdr.type = SWAP_ENDIAN_BIG(&png->ihdr.type);
+    png->ihdr.width = SWAP_ENDIAN_BIG(&png->ihdr.width);
+    png->ihdr.height = SWAP_ENDIAN_BIG(&png->ihdr.height);
+}
+
+bool image_png_generate(const FileBody* src_data, Image* image, int steps = 8)
+{
+    // @performance We are generating the struct and then filling the data.
+    //      There is some asignment/copy overhead
+    Png src = {};
+    generate_default_png_references(src_data, &src);
+
+    // @todo We probably need the following buffers
+    //  1. file buffer (already here)
+    //  2. block buffer
+    //  3. temp pixel buffer (larger)
+    //  4. final pixel buffer (already here)
+
+    if (src.ihdr.bit_depth != 8
+        || src.ihdr.colory_type != 6
+        || src.ihdr.compression != 0
+        || src.ihdr.filter != 0
+        || src.ihdr.interlace != 0
+    ) {
+        // We don't support this type of png
+        return false;
+    }
+
+    PngChunk chunk;
+    PngIDATHeader idat_header;
+
+    bool is_first_idat = true;
+
+    uint32 out_pos = 0;
+
+    // @question the following is a lot of data, should this be moved to heap?
+    uint32 literal_length_dist_table[512];
+
+    PngHuffman literal_length_huffman;
+    literal_length_huffman.max_code_length = 15;
+    literal_length_huffman.count = 1 << literal_length_huffman.max_code_length;
+
+    PngHuffman distance_huffman;
+    distance_huffman.max_code_length = 15;
+    distance_huffman.count = 1 << distance_huffman.max_code_length;
+
+    PngHuffman dictionary_huffman;
+    dictionary_huffman.max_code_length = 7;
+    dictionary_huffman.count = 1 << dictionary_huffman.max_code_length;
+
+    // i is the current byte to read
+    int i = 33;
+
+    // r is the re-shift value in case we need to go back
+    int r = 0;
+
+    // b is the current bit to read
+    int b = 0;
+
+    while(i < src.size) {
+        chunk.length = SWAP_ENDIAN_BIG((uint32 *) (src_data->content + i));
+        chunk.type = SWAP_ENDIAN_BIG((uint32 *) (src_data->content + i + 4));
+
+        // For our png reader, we only care about IDAT
+        //  @question consider PLTE, tRNS, gAMA, iCCP
+        if (chunk.type == 'IEND') {
+            break;
+        } else if (chunk.type != 'IDAT') {
+            // IDAT chunks are continuous and we don't care for anything else
+            if (!is_first_idat) {
+                break;
+            }
+
+            i += chunk.length + 12;
+            continue;
+        }
+
+        if (is_first_idat) {
+            idat_header.zlib_method_flag = *(src_data->content + i + 8);
+            idat_header.add_flag = *(src_data->content + i + 9);
+
+            byte CM = idat_header.zlib_method_flag & 0xF;
+            byte FDICT = (idat_header.add_flag >> 5) & 0x1;
+
+            is_first_idat = false;
+
+            if (CM != 8 || FDICT != 0) {
+                return false;
+            }
+
+            i += 10;
+        }
+
+        // @bug The algorithm below works on "blocks".
+        //      Could it be possible that a block is spread accross 2 IDAT chunks?
+        //      If so this would be bad and break the code below
+        //      We could solve this by just having another counting variable and jump to the next block
+
+        // start: src_data->content + i + 8
+        // end: src_data->content + i + 8 + length - 1
+
+        // DEFLATE Algorithm
+        // @bug the following 3 lines are wrong, they don't have to start at a bit 0/1
+        //      A block doesn't have to start at an byte boundary
+        byte BFINAL = get_bits(src_data->content + i, 1, b);
+        i += (b > 7 - 1);
+        b = (b + 1) & 7;
+
+        byte BTYPE = get_bits(src_data->content + i, 2, b);
+        i += (b > 7 - 2);
+        b = (b + 2) & 7;
+
+        if (BTYPE == 0) {
+            // starts at byte boundary -> position = +1 of previous byte
+            if (b == 0) {
+                i -= 1;
+            }
+
+            uint16 len = *((uint16 *) (src_data->content + i + 1));
+            uint16 nlen = *((uint16 *) (src_data->content + i + 3));
+
+            memcpy(image->pixels + out_pos, src_data->content + i + 5, len);
+            out_pos += len;
+
+            i += 5 + len;
+            b = 0;
+        } else {
+            // @question is this even required or are we overwriting anyways?
+            memset(&literal_length_dist_table, 0, 512 * 4);
+            memset(&literal_length_huffman.entries, 0, sizeof(PngHuffmanEntry) * 15);
+            memset(&distance_huffman.entries, 0, sizeof(PngHuffmanEntry) * 15);
+            memset(&dictionary_huffman.entries, 0, sizeof(PngHuffmanEntry) * 7);
+
+            uint32 huffman_literal = 0;
+            uint32 huffman_dist = 0;
+
+            if (BTYPE == 2) {
+                // Compressed with dynamic Huffman code
+                huffman_literal = get_bits(src_data->content + i, 5, b);
+                i += (b > 7 - 5);
+                b = (b + 5) & 7;
+
+                huffman_dist = get_bits(src_data->content + i, 5, b);
+                i += (b > 7 - 5);
+                b = (b + 5) & 7;
+
+                uint32 huffman_code_length = get_bits(src_data->content + i, 4, b);
+                i += (b > 7 - 4);
+                b = (b + 4) & 7;
+
+                huffman_literal += 257;
+                huffman_dist += 1;
+                huffman_code_length += 4;
+
+                uint32 huffman_code_length_table[19] = {};
+
+                for (uint32 j = 0; j < huffman_code_length; ++j) {
+                    huffman_code_length_table[HUFFMAN_CODE_LENGTH_ALPHA[j]] = get_bits(src_data->content + i, 3, b);
+                    i += (b > 7 - 3);
+                    b = (b + 3) & 7;
+                }
+
+                huffman_png_compute(19, huffman_code_length_table, &dictionary_huffman);
+
+                uint32 literal_length_count = 0;
+                uint32 length_count = huffman_literal + huffman_dist;
+
+                while (literal_length_count < length_count) {
+                    // @todo implement
+                    uint32 rep_count = 1;
+                    uint32 rep_val = 0;
+
+                    PngHuffmanEntry dict = huffman_png_decode(&dictionary_huffman, src_data->content + i, b);
+                    i += (b + dict.bits_used) / 8;
+                    b = (b + dict.bits_used) & 7;
+
+                    uint32 encoded_length = dict.bits_used;
+
+                    if (encoded_length <= 15) {
+                        rep_val = encoded_length;
+                    } else if (encoded_length == 16) {
+                        rep_count = 3 + get_bits(src_data->content + i, 2, b);
+                        i += (b > 7 - 2);
+                        b = (b + 2) & 7;
+
+                        rep_val = literal_length_dist_table[literal_length_count - 1];
+                    } else if (encoded_length == 17) {
+                        rep_count = 3 + get_bits(src_data->content + i, 3, b);
+                        i += (b > 7 - 3);
+                        b = (b + 3) & 7;
+                    } else if (encoded_length == 18) {
+                        rep_count = 11 + get_bits(src_data->content + i, 7, b);
+                        i += (b > 7 - 7);
+                        b = (b + 7) & 7;
+                    }
+
+                    memset(literal_length_dist_table + literal_length_count, rep_val, rep_count);
+                }
+            } else if (BTYPE == 1) {
+                // Compressed with fixed Huffman code
+                huffman_literal = 288;
+                huffman_dist = 32;
+
+                uint32 bit_index = 0;
+                for(uint32 range_index = 0; range_index < 5; ++range_index) {
+                    uint32 bit_count = HUFFMAN_BIT_COUNTS[range_index][1];
+                    uint32 last = HUFFMAN_BIT_COUNTS[range_index][0];
+
+                    while(bit_index <= last) {
+                        literal_length_dist_table[bit_index++] = bit_count;
+                    }
+                }
+            }
+
+            huffman_png_compute(huffman_literal, literal_length_dist_table, &literal_length_huffman);
+            huffman_png_compute(huffman_dist, literal_length_dist_table + huffman_literal, &distance_huffman);
+
+            while (true) {
+                PngHuffmanEntry literal = huffman_png_decode(&literal_length_huffman, src_data->content + i, b);
+                i += (b + literal.bits_used) / 8;
+                b = (b + literal.bits_used) & 7;
+
+                uint32 literal_length = literal.bits_used;
+
+                if (literal_length == 256) {
+                    break;
+                }
+
+                if (literal_length <= 255) {
+                    *(image->pixels + out_pos) = (byte) (literal_length & 0xFF);
+                    ++out_pos;
+                } else {
+                    uint32 length_tab_index = literal_length - 257;
+                    PngHuffmanEntry length_tab = PNG_LENGTH_EXTRA[length_tab_index];
+                    uint32 length = length_tab.symbol;
+
+                    if (length_tab.bits_used) {
+                        uint32 extra_bits = get_bits(src_data->content + i, length_tab.bits_used, b);
+                        i += (b + length_tab.bits_used) / 8;
+                        b = (b + length_tab.bits_used) & 7;
+
+                        length += extra_bits;
+                    }
+
+                    PngHuffmanEntry tab = huffman_png_decode(&distance_huffman, src_data->content + i, b);
+                    i += (b + tab.bits_used) / 8;
+                    b = (b + tab.bits_used) & 7;
+
+                    uint32 dist_tab_index = tab.bits_used;
+
+                    PngHuffmanEntry dist_tab = PNG_DIST_EXTRA[dist_tab_index];
+                    uint32 dist = dist_tab.symbol;
+
+                    if (dist_tab.bits_used) {
+                        uint32 extra_bits = get_bits(src_data->content + i, dist_tab.bits_used, b);
+                        i += (b + dist_tab.bits_used) / 8;
+                        b = (b + dist_tab.bits_used) & 7;
+
+                        dist += extra_bits;
+                    }
+
+                    memcpy(image->pixels + out_pos, image->pixels + out_pos - dist, length);
+                }
+            }
+        }
+
+        if (BFINAL == 0) {
+            break;
+        }
+    }
+
+    image->width = src.ihdr.width;
+    image->height = src.ihdr.height;
+
+    // @todo fix pixels parameter
+    png_filter_reconstruct(image->width, image->height, image->pixels, image->pixels, steps);
+
+    return true;
+}
+
+#endif
--- a/image/Tga.h
+++ b/image/Tga.h
@ -42,14 +42,22 @@ struct TgaHeader {
 struct Tga {
    TgaHeader header;

-    byte* pixels;
+    byte* pixels; // WARNING: This is not the owner of the data. The owner is the FileBody

    uint32 size;
-    byte* data;
+    byte* data; // WARNING: This is not the owner of the data. The owner is the FileBody
 };

-void generate_default_tga_references(const file_body* file, Tga* tga)
+void generate_default_tga_references(const FileBody* file, Tga* tga)
 {
+    tga->size = (uint32) file->size;
+    tga->data = file->content;
+
+    if (tga->size < TGA_HEADER_SIZE) {
+        // This shouldn't happen
+        return;
+    }
+
    tga->header.id_length = file->content[0];
    tga->header.color_map_type = file->content[1];
    tga->header.image_type = file->content[2];
@ -68,8 +76,10 @@ void generate_default_tga_references(const file_body* file, Tga* tga)
        + tga->header.color_map_length * (tga->header.color_map_bits / 8); // can be 0
 }

-void generate_tga_image(const file_body* src_data, Image* image)
+void image_tga_generate(const FileBody* src_data, Image* image)
 {
+    // @performance We are generating the struct and then filling the data.
+    //      There is some asignment/copy overhead
    Tga src = {};
    generate_default_tga_references(src_data, &src);

--- a/image/default_colors.h
+++ b/image/default_colors.h
@ -31,6 +31,6 @@ const int default_colors_256[256] = {
    0xE1D4FF, 0xD8ACFF, 0xCD9BFF, 0xC88DFA, 0xBD8AF9, 0xB160FF, 0xAA52FE, 0x9841FD, 0x8726FF, 0x8700F5, 0x7200F4, 0x5C00B7, 0x460489, 0x350077, 0x28004F, 0x1c0037,
    0xFFC7FF, 0xFFB2FF, 0xFF9AFF, 0xF181F1, 0xFB6FFD, 0xF850FB, 0xFB46FF, 0xF91FFF, 0xF900FF, 0xDD00E6, 0xBF00C7, 0x9B0199, 0xB70090, 0x670362, 0x4F0153, 0x330035,
    0xFDD2E6, 0xF9B5DA, 0xF7A4D4, 0xF198CB, 0xF682BD, 0xFF5FAE, 0xFF4CA9, 0xFF3CA4, 0xFF1A94, 0xF90979, 0xE80071, 0xC40061, 0x96004A, 0x670132, 0x4F0024, 0x310016
-}
+};

 #endif
--- a/input/Input.h
+++ b/input/Input.h
@ -43,7 +43,10 @@ struct InputState {

    // We only consider up to 4 pressed keys
    // Depending on the keyboard you may only be able to detect a limited amount of key presses anyway
+    int up_index;
    uint16 keys_down_old[MAX_KEY_PRESSES];
+
+    int down_index;
    uint16 keys_down[MAX_KEY_PRESSES];

    // Mouse
--- a/math/matrix/MatrixFloat32.h
+++ b/math/matrix/MatrixFloat32.h
@ -13,21 +13,21 @@
 #include "../../stdlib/Mathtypes.h"
 #include "../../utils/MathUtils.h"

-void mat3_identity_f32(float* matrix)
+void mat3_identity(float* matrix)
 {
    matrix[0] = 1.0f; matrix[1] = 0.0f; matrix[2] = 0.0f;
    matrix[3] = 0.0f; matrix[4] = 1.0f; matrix[5] = 0.0f;
    matrix[6] = 0.0f; matrix[7] = 0.0f; matrix[8] = 1.0f;
 }

-void mat3_identity_f32(__m128* matrix)
+void mat3_identity(__m128* matrix)
 {
    matrix[0] = _mm_set_ps(1.0f, 0.0f, 0.0f, 0.0f);
    matrix[1] = _mm_set_ps(0.0f, 1.0f, 0.0f, 0.0f);
    matrix[2] = _mm_set_ps(0.0f, 0.0f, 1.0f, 0.0f);
 }

-void mat4_identity_f32(float* matrix)
+void mat4_identity(float* matrix)
 {
    matrix[0] = 1.0f;  matrix[1] = 0.0f;  matrix[2] = 0.0f;  matrix[3] = 0.0f;
    matrix[4] = 0.0f;  matrix[5] = 1.0f;  matrix[6] = 0.0f;  matrix[7] = 0.0f;
@ -35,7 +35,7 @@ void mat4_identity_f32(float* matrix)
    matrix[12] = 0.0f; matrix[13] = 0.0f; matrix[14] = 0.0f; matrix[15] = 1.0f;
 }

-void mat4_identity_f32(__m128* matrix)
+void mat4_identity(__m128* matrix)
 {
    matrix[0] = _mm_set_ps(1.0f, 0.0f, 0.0f, 0.0f);
    matrix[1] = _mm_set_ps(0.0f, 1.0f, 0.0f, 0.0f);
@ -43,7 +43,7 @@ void mat4_identity_f32(__m128* matrix)
    matrix[3] = _mm_set_ps(0.0f, 0.0f, 0.0f, 1.0f);
 }

-void mat_translate_f32(float* matrix, float dx, float dy, float dz)
+void mat4_translate(float* matrix, float dx, float dy, float dz)
 {
    matrix[0] = 1;   matrix[1] = 0;   matrix[2] = 0;   matrix[3] = 0;
    matrix[4] = 0;   matrix[5] = 1;   matrix[6] = 0;   matrix[7] = 0;
@ -52,7 +52,7 @@ void mat_translate_f32(float* matrix, float dx, float dy, float dz)
 }

 // x, y, z need to be normalized
-void mat3_rotate(float* matrix, float x, float y, float z, float angle)
+void mat4_rotate(float* matrix, float x, float y, float z, float angle)
 {
    float s = sinf_approx(angle);
    float c = cosf_approx(angle);
@ -118,7 +118,7 @@ void mat3vec3_mult_sse(const float* matrix, const float* vector, float* result)

        __m128 dot = _mm_dp_ps(row, vec, 0xF1);

-        result[i] = _mm_cvtss_f32(dot);
+        result[i] = _mm_cvtss(dot);
    }
 }

@ -128,7 +128,7 @@ void mat3vec3_mult_sse(const __m128* matrix, const __m128* vector, float* result
    for (int i = 0; i < 3; ++i) {
        __m128 dot = _mm_dp_ps(matrix[i], *vector, 0xF1);

-        result[i] = _mm_cvtss_f32(dot);
+        result[i] = _mm_cvtss(dot);
    }
 }

@ -157,7 +157,7 @@ void mat4vec4_mult_sse(const float* matrix, const float* vector, float* result)
        __m128 row = _mm_loadu_ps(&matrix[i * 4]);
        __m128 dot = _mm_dp_ps(row, vec, 0xF1);

-        result[i] = _mm_cvtss_f32(dot);
+        result[i] = _mm_cvtss(dot);
    }
 }

@ -167,7 +167,7 @@ void mat4vec4_mult_sse(const __m128* matrix, const __m128* vector, float* result
    for (int i = 0; i < 4; ++i) {
        __m128 dot = _mm_dp_ps(matrix[i], *vector, 0xF1);

-        result[i] = _mm_cvtss_f32(dot);
+        result[i] = _mm_cvtss(dot);
    }
 }

@ -225,55 +225,55 @@ void mat4mat4_mult_sse(const float* a, const float* b, float* result)

    // b1
    dot = _mm_dp_ps(a_1, b_1, 0xF1);
-    result[0] = _mm_cvtss_f32(dot);
+    result[0] = _mm_cvtss(dot);

    dot = _mm_dp_ps(a_2, b_1, 0xF1);
-    result[1] = _mm_cvtss_f32(dot);
+    result[1] = _mm_cvtss(dot);

    dot = _mm_dp_ps(a_3, b_1, 0xF1);
-    result[2] = _mm_cvtss_f32(dot);
+    result[2] = _mm_cvtss(dot);

    dot = _mm_dp_ps(a_4, b_1, 0xF1);
-    result[3] = _mm_cvtss_f32(dot);
+    result[3] = _mm_cvtss(dot);

    // b2
    dot = _mm_dp_ps(a_1, b_2, 0xF1);
-    result[4] = _mm_cvtss_f32(dot);
+    result[4] = _mm_cvtss(dot);

    dot = _mm_dp_ps(a_2, b_2, 0xF1);
-    result[5] = _mm_cvtss_f32(dot);
+    result[5] = _mm_cvtss(dot);

    dot = _mm_dp_ps(a_3, b_2, 0xF1);
-    result[6] = _mm_cvtss_f32(dot);
+    result[6] = _mm_cvtss(dot);

    dot = _mm_dp_ps(a_4, b_2, 0xF1);
-    result[7] = _mm_cvtss_f32(dot);
+    result[7] = _mm_cvtss(dot);

    // b3
    dot = _mm_dp_ps(a_1, b_3, 0xF1);
-    result[8] = _mm_cvtss_f32(dot);
+    result[8] = _mm_cvtss(dot);

    dot = _mm_dp_ps(a_2, b_3, 0xF1);
-    result[9] = _mm_cvtss_f32(dot);
+    result[9] = _mm_cvtss(dot);

    dot = _mm_dp_ps(a_3, b_3, 0xF1);
-    result[10] = _mm_cvtss_f32(dot);
+    result[10] = _mm_cvtss(dot);

    dot = _mm_dp_ps(a_4, b_3, 0xF1);
-    result[11] = _mm_cvtss_f32(dot);
+    result[11] = _mm_cvtss(dot);

    // b4
    dot = _mm_dp_ps(a_1, b_4, 0xF1);
-    result[12] = _mm_cvtss_f32(dot);
+    result[12] = _mm_cvtss(dot);

    dot = _mm_dp_ps(a_2, b_4, 0xF1);
-    result[13] = _mm_cvtss_f32(dot);
+    result[13] = _mm_cvtss(dot);

    dot = _mm_dp_ps(a_3, b_4, 0xF1);
-    result[14] = _mm_cvtss_f32(dot);
+    result[14] = _mm_cvtss(dot);

    dot = _mm_dp_ps(a_4, b_4, 0xF1);
-    result[15] = _mm_cvtss_f32(dot);
+    result[15] = _mm_cvtss(dot);
 }

 void mat4mat4_mult_sse(const __m128* a, const __m128* b_transposed, float* result)
@ -283,55 +283,55 @@ void mat4mat4_mult_sse(const __m128* a, const __m128* b_transposed, float* resul
    // @question could simple mul add sse be faster?
    // b1
    dot = _mm_dp_ps(a[0], b_transposed[0], 0xF1);
-    result[0] = _mm_cvtss_f32(dot);
+    result[0] = _mm_cvtss(dot);

    dot = _mm_dp_ps(a[1], b_transposed[0], 0xF1);
-    result[1] = _mm_cvtss_f32(dot);
+    result[1] = _mm_cvtss(dot);

    dot = _mm_dp_ps(a[2], b_transposed[0], 0xF1);
-    result[2] = _mm_cvtss_f32(dot);
+    result[2] = _mm_cvtss(dot);

    dot = _mm_dp_ps(a[3], b_transposed[0], 0xF1);
-    result[3] = _mm_cvtss_f32(dot);
+    result[3] = _mm_cvtss(dot);

    // b2
    dot = _mm_dp_ps(a[0], b_transposed[1], 0xF1);
-    result[4] = _mm_cvtss_f32(dot);
+    result[4] = _mm_cvtss(dot);

    dot = _mm_dp_ps(a[1], b_transposed[1], 0xF1);
-    result[5] = _mm_cvtss_f32(dot);
+    result[5] = _mm_cvtss(dot);

    dot = _mm_dp_ps(a[2], b_transposed[1], 0xF1);
-    result[6] = _mm_cvtss_f32(dot);
+    result[6] = _mm_cvtss(dot);

    dot = _mm_dp_ps(a[3], b_transposed[1], 0xF1);
-    result[7] = _mm_cvtss_f32(dot);
+    result[7] = _mm_cvtss(dot);

    // b3
    dot = _mm_dp_ps(a[0], b_transposed[2], 0xF1);
-    result[8] = _mm_cvtss_f32(dot);
+    result[8] = _mm_cvtss(dot);

    dot = _mm_dp_ps(a[1], b_transposed[2], 0xF1);
-    result[9] = _mm_cvtss_f32(dot);
+    result[9] = _mm_cvtss(dot);

    dot = _mm_dp_ps(a[2], b_transposed[2], 0xF1);
-    result[10] = _mm_cvtss_f32(dot);
+    result[10] = _mm_cvtss(dot);

    dot = _mm_dp_ps(a[3], b_transposed[2], 0xF1);
-    result[11] = _mm_cvtss_f32(dot);
+    result[11] = _mm_cvtss(dot);

    // b4
    dot = _mm_dp_ps(a[0], b_transposed[3], 0xF1);
-    result[12] = _mm_cvtss_f32(dot);
+    result[12] = _mm_cvtss(dot);

    dot = _mm_dp_ps(a[1], b_transposed[3], 0xF1);
-    result[13] = _mm_cvtss_f32(dot);
+    result[13] = _mm_cvtss(dot);

    dot = _mm_dp_ps(a[2], b_transposed[3], 0xF1);
-    result[14] = _mm_cvtss_f32(dot);
+    result[14] = _mm_cvtss(dot);

    dot = _mm_dp_ps(a[3], b_transposed[3], 0xF1);
-    result[15] = _mm_cvtss_f32(dot);
+    result[15] = _mm_cvtss(dot);
 }

 void mat4mat4_mult_sse(const __m128* a, const __m128* b_transpose, __m128* result)
@ -345,8 +345,8 @@ void mat4mat4_mult_sse(const __m128* a, const __m128* b_transpose, __m128* resul
    }
 }

-// @question Consider to replace with 1d array
-void frustum_planes(float planes[6][4], int radius, float *matrix) {
+// @performance Consider to replace with 1d array
+void mat4_frustum_planes(float planes[6][4], float radius, float *matrix) {
    // @todo make this a setting
    float znear = 0.125;
    float zfar = radius * 32 + 64;
@ -384,12 +384,12 @@ void frustum_planes(float planes[6][4], int radius, float *matrix) {
    planes[5][3] = zfar * m[15] - m[14];
 }

-void mat_frustum(
+void mat4_frustum(
    float *matrix, float left, float right, float bottom,
    float top, float znear, float zfar)
 {
    float temp, temp2, temp3, temp4;
-    temp = 2.0 * znear;
+    temp = 2.0f * znear;
    temp2 = right - left;
    temp3 = top - bottom;
    temp4 = zfar - znear;
@ -415,24 +415,24 @@ void mat_frustum(
    matrix[15] = 0.0;
 }

-void mat_perspective(
+void mat4_perspective(
    float *matrix, float fov, float aspect,
    float znear, float zfar)
 {
    float ymax, xmax;
-    ymax = znear * tanf_approx(fov * OMS_PI / 360.0);
+    ymax = znear * tanf_approx(fov * OMS_PI / 360.0f);
    xmax = ymax * aspect;

-    mat_frustum(matrix, -xmax, xmax, -ymax, ymax, znear, zfar);
+    mat4_frustum(matrix, -xmax, xmax, -ymax, ymax, znear, zfar);
 }

-void mat_ortho(
+void mat4_ortho(
    float *matrix,
-    float left, float right, float bottom, float top, float near, float far)
+    float left, float right, float bottom, float top, float near_dist, float far_dist)
 {
    float rl_delta = right - left;
    float tb_delta = top - bottom;
-    float fn_delta = far - near;
+    float fn_delta = far_dist - near_dist;

    matrix[0] = 2 / rl_delta;
    matrix[1] = 0;
@ -451,7 +451,7 @@ void mat_ortho(

    matrix[12] = -(right + left) / rl_delta;
    matrix[13] = -(top + bottom) / tb_delta;
-    matrix[14] = -(far + near) / fn_delta;
+    matrix[14] = -(far_dist + near_dist) / fn_delta;
    matrix[15] = 1;
 }

--- a/memory/BufferMemory.h
+++ b/memory/BufferMemory.h
@ -0,0 +1,46 @@
+/**
+ * Jingga
+ *
+ * @copyright Jingga
+ * @license   OMS License 2.0
+ * @version   1.0.0
+ * @link      https://jingga.app
+ */
+#ifndef TOS_MEMORY_BUFFER_MEMORY_H
+#define TOS_MEMORY_BUFFER_MEMORY_H
+
+#include <string.h>
+#include "../stdlib/Types.h"
+#include "MathUtils.h"
+#include "TestUtils.h"
+
+struct BufferMemory {
+    byte* memory;
+
+    uint64 size;
+    uint64 pos;
+};
+
+inline
+byte* buffer_get_memory(BufferMemory* buf, uint64 size, byte aligned = 1, bool zeroed = false)
+{
+    ASSERT_SIMPLE(size <= buf->size);
+
+    if (aligned > 1 && buf->pos > 0) {
+        buf->pos = ROUND_TO_NEAREST(buf->pos, aligned);
+    }
+
+    size = ROUND_TO_NEAREST(size, aligned);
+    ASSERT_SIMPLE(buf->pos + size <= buf->size);
+
+    byte* offset = (byte *) (buf->memory + buf->pos);
+    if (zeroed) {
+        memset((void *) offset, 0, size);
+    }
+
+    buf->pos += size;
+
+    return offset;
+}
+
+#endif
--- a/memory/ChunkMemory.h
+++ b/memory/ChunkMemory.h
@ -0,0 +1,174 @@
+/**
+ * Jingga
+ *
+ * @copyright Jingga
+ * @license   OMS License 2.0
+ * @version   1.0.0
+ * @link      https://jingga.app
+ */
+#ifndef TOS_MEMORY_ELEMENT_MEMORY_H
+#define TOS_MEMORY_ELEMENT_MEMORY_H
+
+#include <string.h>
+#include "../stdlib/Types.h"
+#include "MathUtils.h"
+
+struct ChunkMemory {
+    byte* memory;
+
+    uint64 count;
+    uint64 chunk_size;
+    uint64 last_pos = -1;
+
+    // length = count
+    // free describes which locations are used and which are free
+    // @performance using uint32 or even uint64 might be faster
+    //      since we can check for free elements faster if the memory is almost filled
+    //      at the moment we can only check 8 elements at a time
+    uint64* free;
+};
+
+inline
+byte* chunk_get_memory(ChunkMemory* buf, uint64 element)
+{
+    return buf->memory + element * buf->chunk_size;
+}
+
+/**
+ * In some cases we know exactly which index is free
+ */
+void chunk_reserve_index(ChunkMemory* buf, int64 index, int elements = 1, bool zeroed = false)
+{
+    int byte_index = index / 64;
+    int bit_index = index % 64;
+
+    // Mark the bits as reserved
+    for (int j = 0; j < elements; ++j) {
+        int current_byte_index = byte_index + (bit_index + j) / 64;
+        int current_bit_index = (bit_index + j) % 64;
+        buf->free[current_byte_index] |= (1 << current_bit_index);
+    }
+
+    if (zeroed) {
+        memset(buf->memory + index * buf->chunk_size, 0, elements * buf->chunk_size);
+    }
+}
+
+int64 chunk_reserve(ChunkMemory* buf, int elements = 1, bool zeroed = false)
+{
+    int64 byte_index = (buf->last_pos + 1) / 64;
+    int bit_index;
+
+    int64 free_element = -1;
+    byte mask;
+
+    int i = 0;
+    while (free_element < 0 && i < (buf->count + 7) / 64) {
+        ++i;
+
+        if (buf->free[byte_index] == 0xFF) {
+            ++byte_index;
+
+            continue;
+        }
+
+        // @performance There is some redundancy happening down below, we should ++byte_index in certain conditions?
+        for (bit_index = 0; bit_index < 64; ++bit_index) {
+            int consecutive_free_bits = 0;
+
+            // Check if there are 'elements' consecutive free bits
+            for (int j = 0; j < elements; ++j) {
+                int current_byte_index = byte_index + (bit_index + j) / 64;
+                int current_bit_index = (bit_index + j) % 64;
+
+                if (current_byte_index >= (buf->count + 7) / 64) {
+                    break;
+                }
+
+                mask = 1 << current_bit_index;
+                if ((buf->free[current_byte_index] & mask) == 0) {
+                    ++consecutive_free_bits;
+                } else {
+                    break;
+                }
+            }
+
+            if (consecutive_free_bits == elements) {
+                free_element = byte_index * 64 + bit_index;
+
+                // Mark the bits as reserved
+                for (int j = 0; j < elements; ++j) {
+                    int current_byte_index = byte_index + (bit_index + j) / 64;
+                    int current_bit_index = (bit_index + j) % 64;
+                    buf->free[current_byte_index] |= (1 << current_bit_index);
+                }
+
+                break;
+            }
+        }
+
+        ++i;
+        ++byte_index;
+    }
+
+    if (free_element < 0) {
+        return -1;
+    }
+
+    if (zeroed) {
+        memset(buf->memory + free_element * buf->chunk_size, 0, elements * buf->chunk_size);
+    }
+
+    return free_element;
+}
+
+byte* chunk_find_free(ChunkMemory* buf)
+{
+    int byte_index = (buf->last_pos + 1) / 64;
+    int bit_index;
+
+    int64 free_element = -1;
+    byte mask;
+
+    int i = 0;
+    int max_loop = buf->count * buf->chunk_size;
+
+    while (free_element < 0 && i < max_loop) {
+        if (buf->free[byte_index] == 0xFF) {
+            ++i;
+            ++byte_index;
+
+            continue;
+        }
+
+        // This always breaks!
+        // @performance on the first iteration through the buffer we could optimize this by starting at a different bit_index
+        // because we know that the bit_index is based on last_pos
+        for (bit_index = 0; bit_index < 64; ++bit_index) {
+            mask = 1 << bit_index;
+            if ((buf->free[byte_index] & mask) == 0) {
+                free_element = byte_index * 64 + bit_index;
+                break;
+            }
+        }
+    }
+
+    if (free_element < 0) {
+        return NULL;
+    }
+
+    buf->free[byte_index] |= (1 << bit_index);
+
+    return buf->memory + free_element * buf->chunk_size;
+}
+
+inline
+void chunk_element_free(ChunkMemory* buf, uint64 element)
+{
+    int byte_index = element / 64;
+    int bit_index = element % 64;
+
+    buf->free[byte_index] &= ~(1 << bit_index);
+}
+
+#endif
--- a/memory/RingMemory.h
+++ b/memory/RingMemory.h
@ -6,9 +6,10 @@
 * @version   1.0.0
 * @link      https://jingga.app
 */
-#ifndef TOS_UTILS_RING_MEMORY_H
-#define TOS_UTILS_RING_MEMORY_H
+#ifndef TOS_MEMORY_RING_MEMORY_H
+#define TOS_MEMORY_RING_MEMORY_H

+#include <string.h>
 #include "../stdlib/Types.h"
 #include "MathUtils.h"
 #include "TestUtils.h"
--- a/models/Texture.h
+++ b/models/Texture.h
@ -39,6 +39,8 @@
 struct TextureFile {
    uint64 id;

+    // @question Should the texture hold the texture unit? If yes remember to update prepare_texture()
+
    byte texture_data_type;

    byte texture_wrap_type_s;
--- a/models/event/event_file_format.txt
+++ b/models/event/event_file_format.txt
@ -1,42 +0,0 @@
-#COND0
-This is some text.
-This is another text.
-
-TEXT_OPTIONS{3} = can select up to 3 options
-// @todo how to add/hide options based on other info
-1. My text ->COND1
-2. My text ->COND2,->COND2=12
-3. My text ->COND3
-
-REWARDS{1,2} = pick one and then 2
-// @todo how to add/hide options based on other info
-CONDA: 1. 213 564 55 ->COND2
-CONDA: 2. 12 32 ->COND2
-CONDA&CODB:3. 87 3325 11 ->COND2
-CODB: 3. 87 3325 11 ->COND2
-
-#COND1
-
-#COND2
-
-#COND3
-
-#COND1+#COND2
-
-#COND1+#COND3
-
-#COND2+#COND3
-
-#COND1+#COND2+#COND3
-
-#COND
-	is_true // defined through ->COND
-	int_value // defined through ->COND=12
-	float_value
-	char_level
-	proficiencies_above[]
-	proficiencies_above_level[]
-	char_trait_above[]
-	char_trait_above_level[]
-	char_trait_below[]
-	char_trait_below_level[]
--- a/models/item/Consumable.h
+++ b/models/item/Consumable.h
@ -0,0 +1,36 @@
+/**
+ * Jingga
+ *
+ * @copyright Jingga
+ * @license   OMS License 2.0
+ * @version   1.0.0
+ * @link      https://jingga.app
+ */
+#ifndef TOS_MODELS_CONSUMABLE_H
+#define TOS_MODELS_CONSUMABLE_H
+
+#include "../../stdlib/Types.h"
+
+#include "../mob/PrimaryStatsPoints.h"
+#include "../mob/SecondaryStatsPoints.h"
+
+struct Consumable {
+    byte target;
+    f32 range;
+
+    // Character
+    PrimaryStatsPoints primary_char_add;
+    SecondaryStatsPoints secondary_char_add;
+
+    PrimaryStatsPoints primary_char_mul;
+    SecondaryStatsPoints secondary_char_mul;
+
+    // Skill
+    PrimaryStatsPoints primary_skill_add;
+    SecondaryStatsPoints secondary_skill_add;
+
+    PrimaryStatsPoints primary_skill_mul;
+    SecondaryStatsPoints secondary_skill_mul;
+};
+
+#endif
--- a/models/item/ConsumableType.h
+++ b/models/item/ConsumableType.h
@ -0,0 +1,36 @@
+/**
+ * Jingga
+ *
+ * @copyright Jingga
+ * @license   OMS License 2.0
+ * @version   1.0.0
+ * @link      https://jingga.app
+ */
+#ifndef TOS_MODELS_CONSUMABLE_TYPE_H
+#define TOS_MODELS_CONSUMABLE_TYPE_H
+
+#include "../../stdlib/Types.h"
+
+#include "../mob/PrimaryStatsPoints.h"
+#include "../mob/SecondaryStatsPoints.h"
+
+struct ConsumableType {
+    byte target;
+    f32 range;
+
+    // Character
+    PrimaryStatsPoints primary_char_add;
+    SecondaryStatsPoints secondary_char_add;
+
+    PrimaryStatsPoints primary_char_mul;
+    SecondaryStatsPoints secondary_char_mul;
+
+    // Skill
+    PrimaryStatsPoints primary_skill_add;
+    SecondaryStatsPoints secondary_skill_add;
+
+    PrimaryStatsPoints primary_skill_mul;
+    SecondaryStatsPoints secondary_skill_mul;
+};
+
+#endif
--- a/models/item/Equipment.cpp
+++ b/models/item/Equipment.cpp
@ -0,0 +1,166 @@
+/**
+ * Jingga
+ *
+ * @copyright Jingga
+ * @license   OMS License 2.0
+ * @version   1.0.0
+ * @link      https://jingga.app
+ */
+#ifndef TOS_MODELS_EQUIPMENT_C
+#define TOS_MODELS_EQUIPMENT_C
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "../../stdlib/Types.h"
+#include "../mob/monster/LootTable.h"
+
+#include "Equipment.h"
+#include "EquipmentType.h"
+#include "ItemRarityDefinition.h"
+#include "MobLevelStats.h"
+#include "_equipment_types.h"
+#include "_equipment_slots.h"
+#include "_item_rarity.h"
+
+int generate_random_equipment(
+    const EquipmentType* equipments, const RarityDefinition* rarities, const MobLevelStats* mob_levels,
+    SEquipmentStatsPoints* equipment, int mob_level, byte cclass = 0, int equipment_slot = 0
+)
+{
+    // find random equipment type
+    int valid_indices[EQUIPMENT_TYPE_SIZE];
+    int valid_count = 0;
+
+    if(cclass != 0 && equipment_slot == 0) {
+        for (int i = 0; i < EQUIPMENT_TYPE_SIZE; ++i) {
+            if (is_bit_set(equipments[i].char_class, cclass)) {
+                valid_indices[valid_count++] = i;
+            }
+        }
+    } else if(cclass != 0 && equipment_slot != 0) {
+        for (int i = 0; i < EQUIPMENT_TYPE_SIZE; ++i) {
+            if (is_bit_set(equipments[i].char_class, cclass) && equipments[i].slot == equipment_slot) {
+                valid_indices[valid_count++] = i;
+            }
+        }
+    } else if (cclass == 0 && equipment_slot == 0) {
+        valid_count = EQUIPMENT_TYPE_SIZE;
+    } else if(cclass == 0 && equipment_slot != 0) {
+        for (int i = 0; i < EQUIPMENT_TYPE_SIZE; ++i) {
+            if (equipments[i].slot == equipment_slot) {
+                valid_indices[valid_count++] = i;
+            }
+        }
+    }
+
+    const EquipmentType* equipment_type = valid_count == EQUIPMENT_TYPE_SIZE
+        ? equipments + (rand() % EQUIPMENT_TYPE_SIZE)
+        : equipments + valid_indices[rand() % valid_count];
+
+    // find random item rarity
+    int item_rarity = get_random_item_rarity(rarities, RARITY_TYPE_SIZE);
+
+    // find random item drop level
+    int item_level = get_random_item_level(mob_levels, mob_level);
+
+    // generate stats
+    // requirements
+    equipment->requirements.stat_str = equipment_type->primary_item_req_min.stat_str + rand() % (equipment_type->primary_item_req_max.stat_str - equipment_type->primary_item_req_min.stat_str + 1);
+    equipment->requirements.stat_agi = equipment_type->primary_item_req_min.stat_agi + rand() % (equipment_type->primary_item_req_max.stat_agi - equipment_type->primary_item_req_min.stat_agi + 1);
+    equipment->requirements.stat_int = equipment_type->primary_item_req_min.stat_int + rand() % (equipment_type->primary_item_req_max.stat_int - equipment_type->primary_item_req_min.stat_int + 1);
+    equipment->requirements.stat_dex = equipment_type->primary_item_req_min.stat_dex + rand() % (equipment_type->primary_item_req_max.stat_dex - equipment_type->primary_item_req_min.stat_dex + 1);
+    equipment->requirements.stat_acc = equipment_type->primary_item_req_min.stat_acc + rand() % (equipment_type->primary_item_req_max.stat_acc - equipment_type->primary_item_req_min.stat_acc + 1);
+    equipment->requirements.stat_sta = equipment_type->primary_item_req_min.stat_sta + rand() % (equipment_type->primary_item_req_max.stat_sta - equipment_type->primary_item_req_min.stat_sta + 1);
+    equipment->requirements.stat_def = equipment_type->primary_item_req_min.stat_def + rand() % (equipment_type->primary_item_req_max.stat_def - equipment_type->primary_item_req_min.stat_def + 1);
+
+    int primary_indices_random[PRIMARY_STAT_SIZE];
+    int secondary_indices_random[SECONDARY_STAT_SIZE];
+
+    int stat_iter;
+
+    // @todo in the area below we only handle the broad definitions, not the details
+
+    // item stats
+    // @todo handle item details here now only then apply the remaining free stats
+
+    memcpy(secondary_indices_random, SECONDARY_STAT_INDICES, SECONDARY_STAT_SIZE * sizeof(int));
+    random_unique(secondary_indices_random, SECONDARY_STAT_SIZE);
+
+    stat_iter = equipment_type->stats_distribution.item_secondary_count_min + rand() % (equipment_type->stats_distribution.item_secondary_count_max - equipment_type->stats_distribution.item_secondary_count_min + 1);
+    for (int i = 0; i < stat_iter; ++i) {
+        *(equipment->secondary_item.dmg + secondary_indices_random[i]) = *(equipment_type->secondary_item_min.dmg + secondary_indices_random[i]);
+    }
+
+    // char stats
+    memcpy(primary_indices_random, PRIMARY_STAT_INDICES, PRIMARY_STAT_SIZE * sizeof(int));
+    random_unique(primary_indices_random, PRIMARY_STAT_SIZE);
+
+    stat_iter = equipment_type->stats_distribution.char_primary_count_min + rand() % (equipment_type->stats_distribution.char_primary_count_max - equipment_type->stats_distribution.char_primary_count_min + 1);
+    for (int i = 0; i < stat_iter; ++i) {
+        // add and mul are equally distributed
+        if (fast_rand1() < FAST_RAND_MAX / 2) {
+            *(&equipment->primary_char_add.stat_str + primary_indices_random[i]) = *(&equipment_type->primary_char_add_min.stat_str + primary_indices_random[i]);
+        } else {
+            *(&equipment->primary_char_mul.stat_str + primary_indices_random[i]) = *(&equipment_type->primary_char_mul_min.stat_str + primary_indices_random[i]);
+        }
+    }
+
+    // @todo handle char_secondary_distribution skill_count_min/max here now
+
+    memcpy(secondary_indices_random, SECONDARY_STAT_INDICES, SECONDARY_STAT_SIZE * sizeof(int));
+    random_unique(secondary_indices_random, SECONDARY_STAT_SIZE);
+
+    stat_iter = equipment_type->stats_distribution.char_secondary_count_min + rand() % (equipment_type->stats_distribution.char_secondary_count_max - equipment_type->stats_distribution.char_secondary_count_min + 1);
+    for (int i = 0; i < stat_iter; ++i) {
+        // add and mul are equally distributed
+        if (fast_rand1() < FAST_RAND_MAX / 2) {
+            *(equipment->secondary_char_add.dmg + secondary_indices_random[i]) = *(equipment_type->secondary_char_add_min.dmg + secondary_indices_random[i]);
+        } else {
+            *(equipment->secondary_char_mul.dmg + secondary_indices_random[i]) = *(equipment_type->secondary_char_mul_min.dmg + secondary_indices_random[i]);
+        }
+    }
+
+    // @todo handle remaining char_count_min/max here now
+
+    // skill
+    // @question is primary for skill necessary?
+    memcpy(primary_indices_random, PRIMARY_STAT_INDICES, PRIMARY_STAT_SIZE * sizeof(int));
+    random_unique(primary_indices_random, PRIMARY_STAT_SIZE);
+
+    stat_iter = equipment_type->stats_distribution.skill_primary_count_min + rand() % (equipment_type->stats_distribution.skill_primary_count_max - equipment_type->stats_distribution.skill_primary_count_min + 1);
+    for (int i = 0; i < stat_iter; ++i) {
+        // add and mul are equally distributed
+        if (fast_rand1() < FAST_RAND_MAX / 2) {
+            *(&equipment->primary_skill_add.stat_str + primary_indices_random[i]) = *(&equipment_type->primary_skill_add_min.stat_str + primary_indices_random[i]);
+        } else {
+            *(&equipment->primary_skill_mul.stat_str + primary_indices_random[i]) = *(&equipment_type->primary_skill_mul_min.stat_str + primary_indices_random[i]);
+        }
+    }
+
+    // @todo handle skill_secondary_distribution skill_count_min/max here now
+
+    memcpy(secondary_indices_random, SECONDARY_STAT_INDICES, SECONDARY_STAT_SIZE * sizeof(int));
+    random_unique(secondary_indices_random, SECONDARY_STAT_SIZE);
+
+    stat_iter = equipment_type->stats_distribution.skill_secondary_count_min + rand() % (equipment_type->stats_distribution.skill_secondary_count_max - equipment_type->stats_distribution.skill_secondary_count_min + 1);
+    for (int i = 0; i < stat_iter; ++i) {
+        // add and mul are equally distributed
+        if (fast_rand1() < FAST_RAND_MAX / 2) {
+            *(equipment->secondary_skill_add.dmg + secondary_indices_random[i]) = *(equipment_type->secondary_skill_add_min.dmg + secondary_indices_random[i]);
+        } else {
+            *(equipment->secondary_skill_mul.dmg + secondary_indices_random[i]) = *(equipment_type->secondary_skill_mul_min.dmg + secondary_indices_random[i]);
+        }
+    }
+
+    // @todo handle remaining skill_count_min/max here now
+
+    return equipment_type->slot;
+}
+
+int generate_random_equipment(SEquipmentStatsPoints* equipment, int mob_level, byte cclass, const LootTable* table)
+{
+    return -1;
+}
+
+#endif
--- a/models/item/Equipment.h
+++ b/models/item/Equipment.h
@ -33,15 +33,19 @@ struct SEquipmentStatsPoints {
    // @todo A character cannot do for example fire damage (only items and skills can do that)
    //  This means these stats are unused and just use up memory
    PrimaryStatsPoints primary_char_add;
-    PrimaryStatsRelPoints primary_char_mul;
+    PrimaryStatsPoints primary_char_mul;

    SecondaryStatsPoints secondary_char_add;
-    SecondaryStatsRelPoints secondary_char_mul;
+    SecondaryStatsPoints secondary_char_mul;

    // Modifies the skills
    // only modifies skills that have these stats != 0
+    // @question is primary for skill necessary?
+    PrimaryStatsPoints primary_skill_add;
+    PrimaryStatsPoints primary_skill_mul;
+
    SecondaryStatsPoints secondary_skill_add;
-    SecondaryStatsRelPoints secondary_skill_mul;
+    SecondaryStatsPoints secondary_skill_mul;
 };

 #endif
--- a/models/item/EquipmentType.h
+++ b/models/item/EquipmentType.h
@ -11,16 +11,71 @@

 #include "../../stdlib/Types.h"

+#include "../mob/MobStats.h"
+#include "ItemStatsDistribution.h"
+
 struct EquipmentType {
-    byte id;
    byte slot;
-    bool dual;
-    bool throwing;
-    bool projectile;
-    bool damage;
-    bool armor;
-    bool supporting;
-    bool beam;
+    uint32 char_class;
+    bool is_dual;
+    bool is_throwing;
+    bool is_projectile;
+    bool is_damage;
+    bool is_armor;
+    bool is_supporting;
+    bool is_beam;
+    bool is_ranged;
+
+    byte potential_min;
+    byte potential_max;
+
+    // @question Do we want a equipment specific potential? currently only rarity dependent!
+
+    // This defines how many stats can be asigned to an item based on the item type
+    // @todo for the correct algorithm we however also need to consider rarity of the item defining how many
+    //      total stats/affixes/enchantments can actually be assigned.
+    //      we chust need to define that static struct/array (we already have a draft in the excel file)
+    //      of course this should be probably automatically generated from the database at compile time as a pre_compile program
+    ItemStatsDistribution stats_distribution;
+
+    // The min/max point range is calculated by checking the rarity values + item level
+    // The values stored in the structs below are the "average" value which then gets randomely shifted by the rarity+item level
+    PrimaryStatsPoints primary_item_req_min;
+    PrimaryStatsPoints primary_item_req_max;
+
+    SecondaryStatsPoints secondary_item_min;
+    SecondaryStatsPoints secondary_item_max;
+
+    // Character
+    // add
+    PrimaryStatsPoints primary_char_add_min;
+    PrimaryStatsPoints primary_char_add_max;
+
+    SecondaryStatsPoints secondary_char_add_min;
+    SecondaryStatsPoints secondary_char_add_max;
+
+    // mul
+    PrimaryStatsPoints primary_char_mul_min;
+    PrimaryStatsPoints primary_char_mul_max;
+
+    SecondaryStatsPoints secondary_char_mul_min;
+    SecondaryStatsPoints secondary_char_mul_max;
+
+    // Skill
+    // add
+    // @question is primary for skill necessary?
+    PrimaryStatsPoints primary_skill_add_min;
+    PrimaryStatsPoints primary_skill_add_max;
+
+    SecondaryStatsPoints secondary_skill_add_min;
+    SecondaryStatsPoints secondary_skill_add_max;
+
+    // mul
+    PrimaryStatsPoints primary_skill_mul_min;
+    PrimaryStatsPoints primary_skill_mul_max;
+
+    SecondaryStatsPoints secondary_skill_mul_min;
+    SecondaryStatsPoints secondary_skill_mul_max;
 };

 #endif
--- a/models/item/ItemAffixDistribution.h
+++ b/models/item/ItemAffixDistribution.h
@ -0,0 +1,63 @@
+/**
+ * Jingga
+ *
+ * @copyright Jingga
+ * @license   OMS License 2.0
+ * @version   1.0.0
+ * @link      https://jingga.app
+ */
+#ifndef TOS_MODELS_ITEM_AFFIX_DISTRIBUTION_H
+#define TOS_MODELS_ITEM_AFFIX_DISTRIBUTION_H
+
+// WARNING: The sum of all mins must be smaller than max count of the whole category
+// This allows us to define how many phys dmg stats an item should have etc.
+// the sum of all min is most likely lower than the parent min/max -> we can randomely assign additional
+// stat categories to the item as long as their min/max value is not 0 = which means not allowed
+struct ItemAffixDistribution {
+    // damage
+    int dmg_count_min;
+    int dmg_count_max;
+
+    int phys_dmg_count_min;
+    int phys_dmg_count_max;
+
+    int elemental_dmg_count_min;
+    int elemental_dmg_count_max;
+
+    int magic_dmg_count_min;
+    int magic_dmg_count_max;
+
+    // defense
+    int def_count_min;
+    int def_count_max;
+
+    int phys_def_count_min;
+    int phys_def_count_max;
+
+    int elemental_def_count_min;
+    int elemental_def_count_max;
+
+    int magic_def_count_min;
+    int magic_def_count_max;
+
+    // other
+    int other_count_min;
+    int other_count_max;
+
+    int health_count_min;
+    int health_count_max;
+
+    int resource_count_min;
+    int resource_count_max;
+
+    int movement_count_min;
+    int movement_count_max;
+
+    int modifier_count_min;
+    int modifier_count_max;
+
+    int special_count_min;
+    int special_count_max;
+};
+
+#endif
--- a/models/item/ItemLevelStats.h
+++ b/models/item/ItemLevelStats.h
@ -0,0 +1,20 @@
+/**
+ * Jingga
+ *
+ * @copyright Jingga
+ * @license   OMS License 2.0
+ * @version   1.0.0
+ * @link      https://jingga.app
+ */
+#ifndef TOS_MODELS_ITEM_LEVEL_STATS_H
+#define TOS_MODELS_ITEM_LEVEL_STATS_H
+
+#include "ItemRarityStats.h"
+#include "MobLevelStats.h"
+#include "_item_rarity.h"
+
+struct ItemLevelStats {
+    ItemRarityStats rarity_stats[RARITY_TYPE_SIZE];
+};
+
+#endif
--- a/models/item/ItemRarityDefinition.h
+++ b/models/item/ItemRarityDefinition.h
@ -0,0 +1,40 @@
+/**
+ * Jingga
+ *
+ * @copyright Jingga
+ * @license   OMS License 2.0
+ * @version   1.0.0
+ * @link      https://jingga.app
+ */
+#ifndef TOS_MODELS_ITEM_RARITY_DEFINITION_H
+#define TOS_MODELS_ITEM_RARITY_DEFINITION_H
+
+#include "ItemStatsDistribution.h"
+
+#define ITEM_RARITY_100 10000000000
+
+// This puts a hard limit on StatsDistribution
+struct RarityDefinition {
+    int drop_chance;
+
+    int potential_min;
+    int potential_max;
+
+    ItemStatsDistribution stats_distribution;
+};
+
+int get_random_item_rarity(const RarityDefinition* rarities, int rarity_count)
+{
+    uint32 random_rarity = rand() % (ITEM_RARITY_100 + 1);
+    uint32 current_rarity = 0;
+    for (int i = 0; i < rarity_count - 1; ++i) {
+        current_rarity += rarities[i].drop_chance;
+
+        if (current_rarity < random_rarity) {
+            return i;
+        }
+    }
+
+    return rarity_count - 1;
+}
+#endif
--- a/models/item/ItemRarityStats.h
+++ b/models/item/ItemRarityStats.h
@ -0,0 +1,24 @@
+/**
+ * Jingga
+ *
+ * @copyright Jingga
+ * @license   OMS License 2.0
+ * @version   1.0.0
+ * @link      https://jingga.app
+ */
+#ifndef TOS_MODELS_ITEM_RARITY_STATS_H
+#define TOS_MODELS_ITEM_RARITY_STATS_H
+
+/**
+ * This defines the basic stat range and averge for all item rarities.
+ */
+struct ItemRarityStats {
+    int stats_avg;
+    int stats_min;
+    int stats_max;
+
+    float shift_min;
+    float shift_max;
+};
+
+#endif
--- a/models/item/ItemStatsDistribution.h
+++ b/models/item/ItemStatsDistribution.h
@ -0,0 +1,53 @@
+/**
+ * Jingga
+ *
+ * @copyright Jingga
+ * @license   OMS License 2.0
+ * @version   1.0.0
+ * @link      https://jingga.app
+ */
+#ifndef TOS_MODELS_ITEM_STATS_DISTRIBUTION_H
+#define TOS_MODELS_ITEM_STATS_DISTRIBUTION_H
+
+#include "ItemAffixDistribution.h"
+
+// This is only used in EquipmentType
+// @question Should we also use it in Rarity and replace the current impl. in Rarity?
+//  Implementing it also in rarity would basically give us more detailed control in rarities as well
+struct ItemStatsDistribution {
+    // item
+    int item_secondary_count_min;
+    int item_secondary_count_max;
+
+    int item_flags_dmg_count_min;
+    int item_flags_dmg_count_max;
+
+    int item_flags_def_count_min;
+    int item_flags_def_count_max;
+
+    // character
+    // can be add and mul
+    int char_count_min;
+    int char_count_max;
+
+    int char_primary_count_min;
+    int char_primary_count_max;
+
+    int char_secondary_count_min;
+    int char_secondary_count_max;
+    ItemAffixDistribution char_secondary_distribution;
+
+    // skill
+    // can be add and mul
+    int skill_count_min;
+    int skill_count_max;
+
+    int skill_primary_count_min;
+    int skill_primary_count_max;
+
+    int skill_secondary_count_min;
+    int skill_secondary_count_max;
+    ItemAffixDistribution skill_secondary_distribution;
+};
+
+#endif
--- a/models/item/MobLevelStats.h
+++ b/models/item/MobLevelStats.h
@ -0,0 +1,34 @@
+/**
+ * Jingga
+ *
+ * @copyright Jingga
+ * @license   OMS License 2.0
+ * @version   1.0.0
+ * @link      https://jingga.app
+ */
+#ifndef TOS_MODELS_ITEM_MOB_LEVEL_STATS_H
+#define TOS_MODELS_ITEM_MOB_LEVEL_STATS_H
+
+/**
+ * This defines the bounds for what items can drop based on the mob level.
+ * Additional LootTables may further restrict or modify this.
+ * This also functions as a guard for typos in mob definitions to avoid ludicrous gold and xp drops
+ */
+struct MobLevelStats {
+    int xp;
+
+    int gold_min;
+    int gold_max;
+
+    int item_level_min;
+    int item_level_max;
+};
+
+inline
+int get_random_item_level(const MobLevelStats* level_data, int mob_level)
+{
+    const MobLevelStats* mob_stats = level_data + mob_level - 1;
+    return mob_stats->item_level_min + rand() % (mob_stats->item_level_max - mob_stats->item_level_min + 1);;
+}
+
+#endif
--- a/models/item/_equipment_slots.h
+++ b/models/item/_equipment_slots.h
@ -0,0 +1,6 @@
+#ifndef TOS_MODELS_ITEM_EQUIPMENT_SLOTS_H
+#define TOS_MODELS_ITEM_EQUIPMENT_SLOTS_H
+
+#define EQUIPMENT_SLOT_SIZE 14
+
+#endif
--- a/models/item/_equipment_types.h
+++ b/models/item/_equipment_types.h
@ -0,0 +1,6 @@
+#ifndef TOS_MODELS_ITEM_EQUIPMENT_TYPES_H
+#define TOS_MODELS_ITEM_EQUIPMENT_TYPES_H
+
+#define EQUIPMENT_TYPE_SIZE 54
+
+#endif
--- a/models/item/_item_rarity.h
+++ b/models/item/_item_rarity.h
@ -0,0 +1,6 @@
+#ifndef TOS_MODELS_ITEM_RARITY_TYPES_H
+#define TOS_MODELS_ITEM_RARITY_TYPES_H
+
+#define RARITY_TYPE_SIZE 7
+
+#endif
--- a/models/item/equipment_slots.h
+++ b/models/item/equipment_slots.h
@ -1,27 +0,0 @@
-/**
- * Jingga
- *
- * @copyright Jingga
- * @license   OMS License 2.0
- * @version   1.0.0
- * @link      https://jingga.app
- */
-#ifndef TOS_MODELS_ITEM_EQUIPMENT_SLOTS_H
-#define TOS_MODELS_ITEM_EQUIPMENT_SLOTS_H
-
-#define EQUIPMENT_SLOT_HEAD 0x01
-#define EQUIPMENT_SLOT_NECK 0x02
-#define EQUIPMENT_SLOT_BODY 0x03
-#define EQUIPMENT_SLOT_BELT 0x04
-#define EQUIPMENT_SLOT_PANTS 0x05
-#define EQUIPMENT_SLOT_BOOTS 0x06
-#define EQUIPMENT_SLOT_RING 0x07
-#define EQUIPMENT_SLOT_MAIN_HAND 0x08
-#define EQUIPMENT_SLOT_OFF_HAND 0x09
-#define EQUIPMENT_SLOT_ARMS 0x0A
-#define EQUIPMENT_SLOT_BELT_ATTACHMENT 0x0B
-#define EQUIPMENT_SLOT_SHOULDER 0x0C
-#define EQUIPMENT_SLOT_BACK 0x0D
-#define EQUIPMENT_SLOT_HANDS 0x0E
-
-#endif
--- a/models/item/equipment_types.h
+++ b/models/item/equipment_types.h
@ -1,115 +0,0 @@
-#ifndef TOS_MODELS_ITEM_EQUIPMENT_TYPES_H
-#define TOS_MODELS_ITEM_EQUIPMENT_TYPES_H
-
-#include "equipment_slots.h"
-#include "EquipmentType.h"
-
-#define EQUIPMENT_TYPE_ONE_HANDED_SWORD 0x01
-#define EQUIPMENT_TYPE_TWO_HANDED_SWORD 0x02
-#define EQUIPMENT_TYPE_HELMET 0x03
-#define EQUIPMENT_TYPE_EARING 0x04
-#define EQUIPMENT_TYPE_NECKLACE 0x05
-#define EQUIPMENT_TYPE_BOOTS 0x06
-#define EQUIPMENT_TYPE_STAFF 0x07
-#define EQUIPMENT_TYPE_WAND 0x08
-#define EQUIPMENT_TYPE_DOLL 0x09
-#define EQUIPMENT_TYPE_POLEAXE 0x0A
-#define EQUIPMENT_TYPE_SABRE 0x0B
-#define EQUIPMENT_TYPE_DAGGER 0x0C
-#define EQUIPMENT_TYPE_JAVELIN 0x0D
-#define EQUIPMENT_TYPE_QUARTERSTAFF 0x0E
-#define EQUIPMENT_TYPE_SPEAR 0x0F
-#define EQUIPMENT_TYPE_CLAYMORE 0x10
-#define EQUIPMENT_TYPE_DAO 0x11
-#define EQUIPMENT_TYPE_CLEAVER 0x12
-#define EQUIPMENT_TYPE_BROADSWORD 0x13
-#define EQUIPMENT_TYPE_LONGSWORD 0x14
-#define EQUIPMENT_TYPE_SCIMITAR 0x15
-#define EQUIPMENT_TYPE_RAPIER 0x16
-#define EQUIPMENT_TYPE_SICKLE 0x17
-#define EQUIPMENT_TYPE_SCYTHE 0x18
-#define EQUIPMENT_TYPE_PUNCHING_DAGGER 0x19
-#define EQUIPMENT_TYPE_LIGHT_WARHAMMER 0x1A
-#define EQUIPMENT_TYPE_LIGHT_MACE 0x1B
-#define EQUIPMENT_TYPE_HEAVY_MACE 0x1C
-#define EQUIPMENT_TYPE_HEAVY_WARHAMMER 0x1D
-#define EQUIPMENT_TYPE_LIGHT_FLAIL 0x1E
-#define EQUIPMENT_TYPE_HEAVY_FLAIL 0x1F
-#define EQUIPMENT_TYPE_SHURIKAN 0x20
-#define EQUIPMENT_TYPE_GLAIVE 0x21
-#define EQUIPMENT_TYPE_HALBERD 0x22
-#define EQUIPMENT_TYPE_PARTIZAN 0x23
-#define EQUIPMENT_TYPE_LONGBOW 0x24
-#define EQUIPMENT_TYPE_DOUBLE_BOW 0x25
-#define EQUIPMENT_TYPE_BOW 0x27
-#define EQUIPMENT_TYPE_RECURVE_BOW 0x28
-#define EQUIPMENT_TYPE_CROSSBOW 0x29
-#define EQUIPMENT_TYPE_HEAVY_CROSSBOW 0x2A
-#define EQUIPMENT_TYPE_WHIP 0x2B
-#define EQUIPMENT_TYPE_THROWING_AXE 0x2C
-#define EQUIPMENT_TYPE_BLOWGUN 0x2D
-#define EQUIPMENT_TYPE_CLUB 0x2E
-#define EQUIPMENT_TYPE_GREATCLUB 0x2F
-#define EQUIPMENT_TYPE_SLING 0x30
-#define EQUIPMENT_TYPE_CHAKRAM 0x31
-#define EQUIPMENT_TYPE_TRIDENT 0x32
-#define EQUIPMENT_TYPE_THROWING_SPEAR 0x33
-#define EQUIPMENT_TYPE_THROWING_KNIVES 0x34
-#define EQUIPMENT_TYPE_GRANADE 0x35
-#define EQUIPMENT_TYPE_SCRIPTURE 0x36
-#define EQUIPMENT_TYPE_BONES 0x37
-#define EQUIPMENT_TYPE_MAGIC_CRYSTAL 0x38
-#define EQUIPMENT_TYPE_SHIELD 0x39
-#define EQUIPMENT_TYPE_QUIVER 0x3D
-#define EQUIPMENT_TYPE_PISTOL 0x3E
-#define EQUIPMENT_TYPE_SHOTGUN 0x3F
-#define EQUIPMENT_TYPE_RIFLE 0x40
-#define EQUIPMENT_TYPE_FLASK 0x41
-#define EQUIPMENT_TYPE_LIGHT_AXE 0x42
-#define EQUIPMENT_TYPE_QUILL 0x43
-#define EQUIPMENT_TYPE_PANTS 0x44
-#define EQUIPMENT_TYPE_BELT 0x45
-#define EQUIPMENT_TYPE_RING 0x46
-#define EQUIPMENT_TYPE_ARMS 0x47
-#define EQUIPMENT_TYPE_BELT_ATTACHMENT 0x48
-#define EQUIPMENT_TYPE_BODY 0x49
-#define EQUIPMENT_TYPE_CIRCLET 0x4A
-#define EQUIPMENT_TYPE_BRACELET 0x4B
-#define EQUIPMENT_TYPE_GADGET 0x4C
-#define EQUIPMENT_TYPE_LANTERN 0x4D
-#define EQUIPMENT_TYPE_GLASSES 0x4E
-#define EQUIPMENT_TYPE_CAPE 0x4F
-#define EQUIPMENT_TYPE_POLEARM 0x50
-#define EQUIPMENT_TYPE_HEAVY_AXE 0x51
-#define EQUIPMENT_TYPE_SCALES 0x52
-#define EQUIPMENT_TYPE_PRAYING_BEADS 0x53
-#define EQUIPMENT_TYPE_TONFA 0x54
-#define EQUIPMENT_TYPE_TETSUBO 0x55
-#define EQUIPMENT_TYPE_KAMA 0x56
-#define EQUIPMENT_TYPE_SAMURAI_SWORD 0x57
-#define EQUIPMENT_TYPE_BOOMERANG 0x58
-#define EQUIPMENT_TYPE_SLINGSHOT 0x59
-#define EQUIPMENT_TYPE_HARPOON 0x5A
-#define EQUIPMENT_TYPE_ORB 0x5B
-#define EQUIPMENT_TYPE_RUNESTONE 0x5C
-#define EQUIPMENT_TYPE_TALISMAN 0x5D
-#define EQUIPMENT_TYPE_GRIMOIRE 0x5E
-#define EQUIPMENT_TYPE_SHURIKEN 0x5F
-#define EQUIPMENT_TYPE_THROWING_DARTS 0x60
-#define EQUIPMENT_TYPE_COCKTAIL 0x61
-#define EQUIPMENT_TYPE_FLUTE 0x62
-#define EQUIPMENT_TYPE_FAN 0x63
-#define EQUIPMENT_TYPE_SCEPTER 0x64
-#define EQUIPMENT_TYPE_TAMBOURINE 0x65
-#define EQUIPMENT_TYPE_BAGPIPE 0x66
-#define EQUIPMENT_TYPE_HARP 0x67
-#define EQUIPMENT_TYPE_TROMPET 0x68
-#define EQUIPMENT_TYPE_LUTE 0x69
-#define EQUIPMENT_TYPE_HORN 0x6A
-#define EQUIPMENT_TYPE_BELL 0x6B
-#define EQUIPMENT_TYPE_VIOLIN 0x6C
-#define EQUIPMENT_TYPE_VIOLIN 0x6D
-
-#define SIZE_EQUIPMENT_TYPE 0x69
-
-#endif
--- a/models/mob/FixedStats.h
+++ b/models/mob/FixedStats.h
@ -0,0 +1,26 @@
+/**
+ * Jingga
+ *
+ * @copyright Jingga
+ * @license   OMS License 2.0
+ * @version   1.0.0
+ * @link      https://jingga.app
+ */
+#ifndef TOS_MODELS_MOB_SECONDARY_STATS_POINTS_H
+#define TOS_MODELS_MOB_SECONDARY_STATS_POINTS_H
+
+#include "../stdlib/Types.h"
+
+struct FixedStats {
+    // Movement
+    // Additional speeds may be defined for Mobs
+    f32 speed_walk1;
+    f32 speed_swim1;
+    f32 speed_fly1;
+
+    f32 speed_jump;
+    f32 speed_dodge;
+    f32 speed_turn;
+};
+
+#endif
--- a/models/mob/MobStats.h
+++ b/models/mob/MobStats.h
@ -10,554 +10,33 @@
 #define TOS_MODELS_MOB_STATS_H

 #include "../../stdlib/Types.h"
+#include "PrimaryStatsPoints.h"
+#include "SecondaryStatsPoints.h"

 /**
 * @todo optimize order of struct members to ensure optimal struct size
 */
-
-// Character stats modifiable through leveling (simple +/- buttons)
-struct PrimaryStatsPoints {
-    byte stat_str; // strength      : effects health + base damage
-    byte stat_int; // inteligence   : effects resource + base demage
-    byte stat_acc; // accuracy      : effects critical chance + base damage + miss chance
-    byte stat_agi; // agility       : effects resource + base damage + dodge chance
-    byte stat_def; // defense       : effects resource + base defense + dodge chance
-    byte stat_sta; // stamina       : effects health regen + resource regen
-};
-
-struct PrimaryStatsRel {
-    f32 stat_str;
-    f32 stat_int;
-    f32 stat_acc;
-    f32 stat_agi;
-    f32 stat_def;
-    f32 stat_sta;
-};
-
-struct PrimaryStatsRelPoints {
-    byte stat_str;
-    byte stat_int;
-    byte stat_acc;
-    byte stat_agi;
-    byte stat_def;
-    byte stat_sta;
-};
-
-// Character stats modifiable thorugh skill tree?
-struct SecondaryStatsPoints {
-    // Damage types
-    byte dmg_pircing;
-    byte dmg_slashing;
-    byte dmg_bludgeoning;
-    byte dmg_stabbing;
-    byte dmg_fire;
-    byte dmg_water;
-    byte dmg_wind;
-    byte dmg_earth;
-    byte dmg_poison;
-    byte dmg_lightning;
-    byte dmg_ice;
-    byte dmg_arcane;
-    byte dmg_corrupted;
-    byte dmg_holy;
-    byte dmg_reflection;
-    byte dmg_reflection_chance;
-
-    byte dmg_crit;
-    byte dmg_crit_chance;
-
-    // Health & Resource
-    byte health;
-    byte health_on_dmg_dealt;
-    byte health_on_dmg_taken;
-
-    byte health_regen;
-    byte health_regen_rel;
-    byte health_regen_on_dmg_dealt;
-    byte health_regen_on_dmg_taken;
-
-    byte resource;
-    byte resource_on_dmg_dealt;
-    byte resource_on_dmg_taken;
-
-    byte resource_regen;
-    byte resource_regen_rel;
-    byte resource_regen_on_dmg_dealt;
-    byte resource_regen_on_dmg_taken;
-
-    byte resource_loss;
-    byte resource_loss_on_dmg_dealt;
-    byte resource_loss_on_dmg_taken;
-
-    // Defense types
-    //      think about it as armor and/or resistence if it helps
-    byte defense_pircing;
-    byte defense_slashing;
-    byte defense_bludgeoning;
-    byte defense_stabbing;
-    byte defense_fire;
-    byte defense_water;
-    byte defense_ice;
-    byte defense_earth;
-    byte defense_wind;
-    byte defense_poison;
-    byte defense_lightning;
-    byte defense_holy;
-    byte defense_arcane;
-    byte defense_corrupted;
-
-    // Accuracy
-    byte dodge_chance;
-    byte cc_protection;
-    byte miss_chance;
-
-    // Movement
-    // Additional speeds may be defined for Mobs
-    byte speed_walk1;
-    byte speed_swim1;
-    byte speed_fly1;
-
-    // Fighting speed
-    byte speed_cast;
-    byte speed_attack;
-
-    byte pickup_range;
-
-    byte shield;
-
-    byte aoe_scale;
-    byte resource_cost;
-    byte health_cost;
-    byte attack_range;
-    byte melee_range;
-    byte projectile_speed;
-    byte projectile_count;
-    byte shatter_probability;
-    byte shatter_range;
-    byte shatter_dmg;
-    byte shatter_count;
-    byte passthrough_damage;
-    byte passthrough_count;
-    byte dot_duration;
-    byte dot_count;
-    byte bleeding_dot;
-    byte poison_dot;
-    byte burn_dot;
-    byte ice_dot;
-    byte resource_drain;
-    byte shatter_dot;
-    byte minon_duration;
-    byte minion_count;
-    byte effect_spreading_probability;
-    byte effect_spreading_radius;
-    byte effect_spreading_max_count;
-    byte effect_duration;
-    byte aura_range;
-    byte cast_duration;
-
-    byte agro_range;
-};
-
-
-// @todo change order for simd calculations so that all valus match up
-struct SecondaryStatsValues {
-    // Damage types
-    int32 dmg_pircing;
-    int32 dmg_slashing;
-    int32 dmg_bludgeoning;
-    int32 dmg_stabbing;
-    int32 dmg_fire;
-    int32 dmg_water;
-    int32 dmg_wind;
-    int32 dmg_earth;
-    int32 dmg_poison;
-    int32 dmg_lightning;
-    int32 dmg_ice;
-    int32 dmg_arcane;
-    int32 dmg_corrupted;
-    int32 dmg_holy;
-    int32 dmg_reflection;
-    int32 dmg_reflection_chance;
-
-    int32 dmg_crit;
-    f32 dmg_crit_chance;
-
-    // Health & Resource
-    int32 health;
-    f32 health_on_dmg_dealt;
-    f32 health_on_dmg_taken;
-
-    int32 health_regen;
-    f32 health_regen_rel;
-    f32 health_regen_on_dmg_dealt;
-    f32 health_regen_on_dmg_taken;
-
-    int32 resource;
-    f32 resource_on_dmg_dealt;
-    f32 resource_on_dmg_taken;
-
-    int32 resource_regen;
-    f32 resource_regen_rel;
-    f32 resource_regen_on_dmg_dealt;
-    f32 resource_regen_on_dmg_taken;
-
-    int32 resource_loss;
-    f32 resource_loss_on_dmg_dealt;
-    f32 resource_loss_on_dmg_taken;
-
-    // Defense types
-    //      think about it as armor and/or resistence if it helps
-    int32 defense_pircing;
-    int32 defense_slashing;
-    int32 defense_bludgeoning;
-    int32 defense_stabbing;
-    int32 defense_fire;
-    int32 defense_water;
-    int32 defense_ice;
-    int32 defense_earth;
-    int32 defense_wind;
-    int32 defense_poison;
-    int32 defense_lightning;
-    int32 defense_holy;
-    int32 defense_arcane;
-    int32 defense_corrupted;
-
-    // Accuracy
-    f32 dodge_chance;
-    f32 cc_protection;
-    f32 miss_chance;
-
-    // Movement
-    // Additional speeds may be defined for Mobs
-    f32 speed_walk1;
-    f32 speed_swim1;
-    f32 speed_fly1;
-
-    // Fighting speed
-    f32 speed_cast;
-    f32 speed_attack;
-
-    f32 pickup_range;
-
-    int32 shield;
-
-    f32 aoe_scale;
-    f32 resource_cost;
-    f32 health_cost;
-    f32 attack_range;
-    f32 melee_range;
-    f32 projectile_speed;
-    int32 projectile_count;
-    f32 shatter_probability;
-    f32 shatter_range;
-    int32 shatter_dmg;
-    int32 shatter_count;
-    f32 passthrough_damage;
-    int32 passthrough_count;
-    f32 dot_duration;
-    int32 dot_count;
-    int32 bleeding_dot;
-    int32 poison_dot;
-    int32 burn_dot;
-    int32 ice_dot;
-    int32 resource_drain;
-    int32 shatter_dot;
-    f32 minon_duration;
-    int32 minion_count;
-    f32 effect_spreading_probability;
-    f32 effect_spreading_radius;
-    int32 effect_spreading_max_count;
-    f32 effect_duration;
-    f32 aura_range;
-    f32 cast_duration;
-
-    f32 agro_range;
-};
-
-struct SecondaryStatsRel {
-    // Damage types
-    f32 dmg_pircing;
-    f32 dmg_slashing;
-    f32 dmg_bludgeoning;
-    f32 dmg_stabbing;
-    f32 dmg_fire;
-    f32 dmg_water;
-    f32 dmg_wind;
-    f32 dmg_earth;
-    f32 dmg_poison;
-    f32 dmg_lightning;
-    f32 dmg_ice;
-    f32 dmg_arcane;
-    f32 dmg_corrupted;
-    f32 dmg_holy;
-    f32 dmg_reflection;
-    f32 dmg_reflection_chance;
-
-    f32 dmg_crit;
-    f32 dmg_crit_chance;
-
-    // Health & Resource
-    f32 health;
-    f32 health_on_dmg_dealt;
-    f32 health_on_dmg_taken;
-
-    f32 health_regen;
-    f32 health_regen_on_dmg_dealt;
-    f32 health_regen_on_dmg_taken;
-
-    f32 resource;
-    f32 resource_on_dmg_dealt;
-    f32 resource_on_dmg_taken;
-
-    f32 resource_regen;
-    f32 resource_regen_on_dmg_dealt;
-    f32 resource_regen_on_dmg_taken;
-
-    f32 resource_loss;
-    f32 resource_loss_on_dmg_dealt;
-    f32 resource_loss_on_dmg_taken;
-
-    // Defense types
-    //      think about it as armor and/or resistence if it helps
-    f32 defense_pircing;
-    f32 defense_slashing;
-    f32 defense_bludgeoning;
-    f32 defense_stabbing;
-    f32 defense_fire;
-    f32 defense_water;
-    f32 defense_ice;
-    f32 defense_earth;
-    f32 defense_wind;
-    f32 defense_poison;
-    f32 defense_lightning;
-    f32 defense_holy;
-    f32 defense_arcane;
-    f32 defense_corrupted;
-
-    // Accuracy
-    f32 dodge_chance;
-    f32 cc_protection;
-    f32 miss_chance;
-
-    // Movement
-    // Additional speeds may be defined for Mobs
-    f32 speed_walk1;
-    f32 speed_swim1;
-    f32 speed_fly1;
-
-    // Fighting speed
-    f32 speed_cast;
-    f32 speed_attack;
-
-    f32 pickup_range;
-
-    f32 shield;
-
-    f32 aoe_scale;
-    f32 resource_cost;
-    f32 health_cost;
-    f32 attack_range;
-    f32 melee_range;
-    f32 projectile_speed;
-    f32 projectile_count;
-    f32 shatter_probability;
-    f32 shatter_range;
-    f32 shatter_dmg;
-    f32 shatter_count;
-    f32 passthrough_damage;
-    f32 passthrough_count;
-    f32 dot_duration;
-    f32 dot_count;
-    f32 bleeding_dot;
-    f32 poison_dot;
-    f32 burn_dot;
-    f32 ice_dot;
-    f32 resource_drain;
-    f32 shatter_dot;
-    f32 minon_duration;
-    f32 minion_count;
-    f32 effect_spreading_probability;
-    f32 effect_spreading_radius;
-    f32 effect_spreading_max_count;
-    f32 effect_duration;
-    f32 aura_range;
-    f32 cast_duration;
-
-    f32 agro_range;
-};
-
-struct SecondaryStatsRelPoints {
-    // Damage types
-    byte dmg_pircing;
-    byte dmg_slashing;
-    byte dmg_bludgeoning;
-    byte dmg_stabbing;
-    byte dmg_fire;
-    byte dmg_water;
-    byte dmg_wind;
-    byte dmg_earth;
-    byte dmg_poison;
-    byte dmg_lightning;
-    byte dmg_ice;
-    byte dmg_arcane;
-    byte dmg_corrupted;
-    byte dmg_holy;
-    byte dmg_reflection;
-    byte dmg_reflection_chance;
-
-    byte dmg_crit;
-    byte dmg_crit_chance;
-
-    // Health & Resource
-    byte health;
-    byte health_on_dmg_dealt;
-    byte health_on_dmg_taken;
-
-    byte health_regen;
-    byte health_regen_on_dmg_dealt;
-    byte health_regen_on_dmg_taken;
-
-    byte resource;
-    byte resource_on_dmg_dealt;
-    byte resource_on_dmg_taken;
-
-    byte resource_regen;
-    byte resource_regen_on_dmg_dealt;
-    byte resource_regen_on_dmg_taken;
-
-    byte resource_loss;
-    byte resource_loss_on_dmg_dealt;
-    byte resource_loss_on_dmg_taken;
-
-    // Defense types
-    //      think about it as armor and/or resistence if it helps
-    byte defense_pircing;
-    byte defense_slashing;
-    byte defense_bludgeoning;
-    byte defense_stabbing;
-    byte defense_fire;
-    byte defense_water;
-    byte defense_ice;
-    byte defense_earth;
-    byte defense_wind;
-    byte defense_poison;
-    byte defense_lightning;
-    byte defense_holy;
-    byte defense_arcane;
-    byte defense_corrupted;
-
-    // Accuracy
-    byte dodge_chance;
-    byte cc_protection;
-    byte miss_chance;
-
-    // Movement
-    // Additional speeds may be defined for Mobs
-    byte speed_walk1;
-    byte speed_swim1;
-    byte speed_fly1;
-
-    // Fighting speed
-    byte speed_cast;
-    byte speed_attack;
-
-    byte pickup_range;
-
-    byte shield;
-
-    byte aoe_scale;
-    byte resource_cost;
-    byte health_cost;
-    byte attack_range;
-    byte melee_range;
-    byte projectile_speed;
-    byte projectile_count;
-    byte shatter_probability;
-    byte shatter_range;
-    byte shatter_dmg;
-    byte shatter_count;
-    byte passthrough_damage;
-    byte passthrough_count;
-    byte dot_duration;
-    byte dot_count;
-    byte bleeding_dot;
-    byte poison_dot;
-    byte burn_dot;
-    byte ice_dot;
-    byte resource_drain;
-    byte shatter_dot;
-    byte minon_duration;
-    byte minion_count;
-    byte effect_spreading_probability;
-    byte effect_spreading_radius;
-    byte effect_spreading_max_count;
-    byte effect_duration;
-    byte aura_range;
-    byte cast_duration;
-
-    byte agro_range;
-};
-
-struct FixedStats {
-    // Movement
-    // Additional speeds may be defined for Mobs
-    float speed_walk1;
-    float speed_swim1;
-    float speed_fly1;
-
-    f32 speed_jump;
-    f32 speed_dodge;
-    f32 speed_turn;
-};
-
-// @question Do we even want this?
-struct PlayerStats {
-    f32 pickup_range;
-};
-
-struct SMobStatsTotal {
-    PrimaryStatsPoints primary_total;
-    SecondaryStatsValues secondary_total;
-
-    FixedStats fixed_total;
-
-    uint32 shield_type;
-    uint32 shield;
-    bool shield_dispellable;
-};
-
-struct SMobStatsTotalCached {
-    PrimaryStatsPoints primary_total;
-    PrimaryStatsPoints primary_char; // Only recalculated when char stats change
-    PrimaryStatsPoints primary_skill; // Only recalculated when skill effect runs out
-    PrimaryStatsPoints primary_item; // Only recalculated when item changes
-    PrimaryStatsPoints primary_effect; // External e.g. from mob or ally
-
-    SecondaryStatsValues secondary_total;
-    SecondaryStatsValues secondary_char; // Only recalculated when char stats change
-    SecondaryStatsValues secondary_skill; // Only recalculated when skill effect runs out
-    SecondaryStatsValues secondary_item; // Only recalculated when item changes
-    SecondaryStatsValues secondary_effect; // External e.g. from mob or ally
-};
-
 struct SMobStatsPoints {
+    // @todo Add min max for dmg
+    //      Every attack should have a damage range (maybe 5%?)
+
    // Self stats
    PrimaryStatsPoints primary_stats;
    SecondaryStatsPoints secondary_stats; // @todo this is bad, a char doesn't have fire dmg but might have crit chance ... needs to split?

    // Item modifiers
    PrimaryStatsPoints item_primary_add;
-    PrimaryStatsRelPoints item_primary_mul;
+    PrimaryStatsPoints item_primary_mul;

    SecondaryStatsPoints item_secondary_add;
-    SecondaryStatsRelPoints item_secondary_mul;
+    SecondaryStatsPoints item_secondary_mul;

    // Skill modifiers
    PrimaryStatsPoints skill_primary_add;
-    PrimaryStatsRelPoints skill_primary_mul;
+    PrimaryStatsPoints skill_primary_mul;

    SecondaryStatsPoints skill_secondary_add;
-    SecondaryStatsRelPoints skill_secondary_mul;
+    SecondaryStatsPoints skill_secondary_mul;
 };

 #endif
--- a/models/mob/MobStatsType.h
+++ b/models/mob/MobStatsType.h
@ -0,0 +1,37 @@
+/**
+ * Jingga
+ *
+ * @copyright Jingga
+ * @license   OMS License 2.0
+ * @version   1.0.0
+ * @link      https://jingga.app
+ */
+#ifndef TOS_MODELS_MOB_STATS_TYPE_H
+#define TOS_MODELS_MOB_STATS_TYPE_H
+
+// physical
+#define MOB_STATS_TYPE_SLASHING 1
+#define MOB_STATS_TYPE_BLUDGEONING 2
+#define MOB_STATS_TYPE_STABBING 3
+
+// elemental
+#define MOB_STATS_TYPE_FIRE 4
+#define MOB_STATS_TYPE_WATER 5
+#define MOB_STATS_TYPE_WIND 6
+#define MOB_STATS_TYPE_EARTH 7
+#define MOB_STATS_TYPE_POISON 8
+#define MOB_STATS_TYPE_LIGHTNING 9
+#define MOB_STATS_TYPE_ICE 10
+
+// magic
+#define MOB_STATS_TYPE_ARCANE 11
+#define MOB_STATS_TYPE_CORRUPTED 12
+#define MOB_STATS_TYPE_HOLY 13
+
+#define MOB_STATS_TYPE_SIZE 13
+
+#define MOB_STATS_TYPE_PHYSICAL 14
+#define MOB_STATS_TYPE_MAGICAL 15
+#define MOB_STATS_TYPE_ELEMENTAL 16
+
+#endif
--- a/models/mob/PrimaryStatsPoints.cpp
+++ b/models/mob/PrimaryStatsPoints.cpp
@ -0,0 +1,25 @@
+/**
+ * Jingga
+ *
+ * @copyright Jingga
+ * @license   OMS License 2.0
+ * @version   1.0.0
+ * @link      https://jingga.app
+ */
+#ifndef TOS_MODELS_MOB_PRIMARY_STATS_POINTS_C
+#define TOS_MODELS_MOB_PRIMARY_STATS_POINTS_C
+
+#include "../../stdlib/simd/SIMD_I8.h"
+#include "PrimaryStatsPoints.h"
+
+void calculate_primary_values(const PrimaryStatsPoints* points, PrimaryStatsValues* values, int step = 8)
+{
+    simd_mult((int16 *) points, 1.3f, (int32 *) values, sizeof(PrimaryStatsPoints), step);
+}
+
+void calculate_primary_relatives(const PrimaryStatsPoints* points, PrimaryStatsRelValues* values, int step = 8)
+{
+    simd_mult((int16 *) points, 0.01f, (int32 *) values, sizeof(PrimaryStatsPoints), step);
+}
+
+#endif
--- a/models/mob/PrimaryStatsPoints.h
+++ b/models/mob/PrimaryStatsPoints.h
@ -0,0 +1,36 @@
+/**
+ * Jingga
+ *
+ * @copyright Jingga
+ * @license   OMS License 2.0
+ * @version   1.0.0
+ * @link      https://jingga.app
+ */
+#ifndef TOS_MODELS_MOB_PRIMARY_STATS_POINTS_H
+#define TOS_MODELS_MOB_PRIMARY_STATS_POINTS_H
+
+#include "../../stdlib/Types.h"
+
+#define PRIMARY_STAT_SIZE 7
+static const int PRIMARY_STAT_INDICES[] = {0, 1, 2, 3, 4, 5, 6, 7};
+
+// Character stats modifiable through leveling (simple +/- buttons)
+struct PrimaryStatsPoints {
+    uint16 stat_str; // strength      : effects health + base damage
+    uint16 stat_int; // inteligence   : effects resource + base demage
+    uint16 stat_acc; // accuracy      : effects critical chance + base damage + miss chance
+    uint16 stat_agi; // agility       : effects resource + base damage + dodge chance
+    // @todo not implemented in database
+    uint16 stat_def; // defense       : effects resource + base defense + dodge chance
+    uint16 stat_sta; // stamina       : effects health regen + resource regen
+    uint16 stat_dex; // dexterity     : effects health regen + resource regen
+    // @question do we need dex and acc or only one?
+};
+
+struct PrimaryStatsValues {
+};
+
+struct PrimaryStatsRelValues {
+};
+
+#endif
--- a/models/mob/SecondaryStatsPoints.cpp
+++ b/models/mob/SecondaryStatsPoints.cpp
@ -0,0 +1,25 @@
+/**
+ * Jingga
+ *
+ * @copyright Jingga
+ * @license   OMS License 2.0
+ * @version   1.0.0
+ * @link      https://jingga.app
+ */
+#ifndef TOS_MODELS_MOB_SECONDARY_STATS_POINTS_C
+#define TOS_MODELS_MOB_SECONDARY_STATS_POINTS_C
+
+#include "../../stdlib/simd/SIMD_I8.h"
+#include "SecondaryStatsPoints.h"
+
+void calculate_primary_values(const SecondaryStatsPoints* points, SecondaryStatsValues* values, int step = 8)
+{
+    simd_mult((int16 *) points, 1.3f, (int32 *) values, sizeof(SecondaryStatsPoints), step);
+}
+
+void calculate_primary_relatives(const SecondaryStatsRelPoints* points, SecondaryStatsRelValues* values, int step = 8)
+{
+    simd_mult((int16 *) points, 0.01f, (int32 *) values, sizeof(SecondaryStatsPoints), step);
+}
+
+#endif
--- a/models/mob/SecondaryStatsPoints.h
+++ b/models/mob/SecondaryStatsPoints.h
@ -0,0 +1,348 @@
+/**
+ * Jingga
+ *
+ * @copyright Jingga
+ * @license   OMS License 2.0
+ * @version   1.0.0
+ * @link      https://jingga.app
+ */
+#ifndef TOS_MODELS_MOB_SECONDARY_STATS_POINTS_H
+#define TOS_MODELS_MOB_SECONDARY_STATS_POINTS_H
+
+#include "../../stdlib/Types.h"
+#include "MobStatsType.h"
+
+#define SECONDARY_STAT_SIZE 90
+static const int SECONDARY_STAT_INDICES[] = {
+    0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
+    10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
+    20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
+    30, 31, 32, 33, 34, 35, 36, 37, 38, 39,
+    40, 41, 42, 43, 44, 45, 46, 47, 48, 49,
+    50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
+    60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
+    70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
+    80, 81, 82, 83, 84, 85, 86, 87, 88, 89,
+};
+
+/**
+ * @todo optimize order of struct members to ensure optimal struct size
+ */
+
+// Character stats modifiable thorugh skill tree?
+struct SecondaryStatsPoints {
+    /*
+    @todo
+    Composite damage types would allow us to combine skills of different players (e.g. arrow flies through fire -> adds fire damage flag)
+    Obviously this doesn't increase the damage directly but can have a positive impact if the enemy has low fire resistance for example
+
+    @question what happens if a skill has two flags (fire&slashing) and the enemy has high resistance vs slashing.
+        Does this mean the damage is reduced, does it reduce by "50%" or does it only reduce the min of fire&slashing resistance.
+            -> if you have no slashing resistance you still take full damage
+    */
+
+    // Damage types
+    // This allows us to create skills with multiple additive damage types AND composite damage that has multiple types at the same time
+    uint16 dmg[MOB_STATS_TYPE_SIZE];
+
+    uint16 dmg_reflection;
+    uint16 dmg_reflection_chance;
+
+    // @question is this a damage number or is this a % number of the total damage?
+    uint16 dmg_crit;
+    uint16 dmg_crit_chance;
+
+    // @question is this similar to the different damage categories, is this a % of the total damage or should this just be a flag
+    uint16 dmg_pircing;
+
+    // Health & Resource
+    uint16 health;
+    uint16 health_on_dmg_dealt;
+    uint16 health_on_dmg_taken;
+
+    uint16 health_regen;
+    uint16 health_regen_rel;
+    uint16 health_regen_on_dmg_dealt;
+    uint16 health_regen_on_dmg_taken;
+
+    uint16 resource;
+    uint16 resource_on_dmg_dealt;
+    uint16 resource_on_dmg_taken;
+
+    uint16 resource_regen;
+    uint16 resource_regen_rel;
+    uint16 resource_regen_on_dmg_dealt;
+    uint16 resource_regen_on_dmg_taken;
+
+    uint16 resource_loss;
+    uint16 resource_loss_on_dmg_dealt;
+    uint16 resource_loss_on_dmg_taken;
+
+    // Defense types (resistances, armor, or whatever you want to call it)
+    uint16 defense[MOB_STATS_TYPE_SIZE];
+
+    // Accuracy
+    uint16 block_chance;
+    uint16 block_amount;
+
+    uint16 dodge_chance;
+    uint16 cc_protection;
+    uint16 miss_chance;
+
+    // Movement
+    // Additional speeds may be defined for Mobs
+    uint16 speed_walk1;
+    uint16 speed_swim1;
+    uint16 speed_fly1;
+
+    // Fighting speed
+    uint16 speed_cast;
+    uint16 speed_attack;
+
+    uint16 pickup_range;
+
+    uint16 shield;
+
+    // modifier
+    uint16 aoe_scale;
+    uint16 resource_cost;
+    uint16 health_cost;
+    uint16 attack_range;
+    uint16 melee_range;
+    uint16 projectile_speed;
+    uint16 projectile_count;
+    uint16 shatter_probability;
+    uint16 shatter_range;
+    uint16 shatter_dmg;
+    uint16 shatter_count;
+    uint16 passthrough_damage;
+    uint16 passthrough_count;
+    uint16 dot_duration;
+    uint16 dot_count;
+    uint16 bleeding_dot;
+    uint16 poison_dot;
+    uint16 burn_dot;
+    uint16 ice_dot;
+    uint16 resource_drain;
+    uint16 shatter_dot;
+    uint16 minion_duration;
+    uint16 minion_count;
+    uint16 effect_spreading_probability;
+    uint16 effect_spreading_radius;
+    uint16 effect_spreading_max_count;
+    uint16 effect_duration;
+    uint16 aura_range;
+    uint16 cast_duration;
+
+    // special
+    uint16 aggro_range;
+};
+
+struct SecondaryStatsPoints2 {
+    /*
+    @todo
+    Composite damage types would allow us to combine skills of different players (e.g. arrow flies through fire -> adds fire damage flag)
+    Obviously this doesn't increase the damage directly but can have a positive impact if the enemy has low fire resistance for example
+
+    @question what happens if a skill has two flags (fire&slashing) and the enemy has high resistance vs slashing.
+        Does this mean the damage is reduced, does it reduce by "50%" or does it only reduce the min of fire&slashing resistance.
+            -> if you have no slashing resistance you still take full damage
+    */
+
+    // Damage types
+    // This allows us to create skills with multiple additive damage types AND composite damage that has multiple types at the same time
+    byte damage[3];
+    byte damage_flag[3 * 5]; // 3 * 5 = 15, every damage component can have up to 3 damage types and a limited amount from others
+
+    byte dmg_reflection;
+    byte dmg_reflection_chance;
+
+    // @question is this a damage number or is this a % number of the total damage?
+    byte dmg_crit;
+    byte dmg_crit_chance;
+
+    // @question is this similar to the different damage categories, is this a % of the total damage or should this just be a flag
+    byte dmg_pircing;
+
+    // Health & Resource
+    byte health;
+    byte health_on_dmg_dealt;
+    byte health_on_dmg_taken;
+
+    byte health_regen;
+    byte health_regen_rel;
+    byte health_regen_on_dmg_dealt;
+    byte health_regen_on_dmg_taken;
+
+    byte resource;
+    byte resource_on_dmg_dealt;
+    byte resource_on_dmg_taken;
+
+    byte resource_regen;
+    byte resource_regen_rel;
+    byte resource_regen_on_dmg_dealt;
+    byte resource_regen_on_dmg_taken;
+
+    byte resource_loss;
+    byte resource_loss_on_dmg_dealt;
+    byte resource_loss_on_dmg_taken;
+
+    // Defense types (resistances, armor, or whatever you want to call it)
+    byte defense[MOB_STATS_TYPE_SIZE];
+
+    // Accuracy
+    byte block_chance;
+    byte block_amount;
+
+    byte dodge_chance;
+    byte cc_protection;
+    byte miss_chance;
+
+    // Movement
+    // Additional speeds may be defined for Mobs
+    byte speed_walk1;
+    byte speed_swim1;
+    byte speed_fly1;
+
+    // Fighting speed
+    byte speed_cast;
+    byte speed_attack;
+
+    byte pickup_range;
+
+    byte shield;
+
+    // modifier
+    byte aoe_scale;
+    byte resource_cost;
+    byte health_cost;
+    byte attack_range;
+    byte melee_range;
+    byte projectile_speed;
+    byte projectile_count;
+    byte shatter_probability;
+    byte shatter_range;
+    byte shatter_dmg;
+    byte shatter_count;
+    byte passthrough_damage;
+    byte passthrough_count;
+    byte dot_duration;
+    byte dot_count;
+    byte bleeding_dot;
+    byte poison_dot;
+    byte burn_dot;
+    byte ice_dot;
+    byte resource_drain;
+    byte shatter_dot;
+    byte minion_duration;
+    byte minion_count;
+    byte effect_spreading_probability;
+    byte effect_spreading_radius;
+    byte effect_spreading_max_count;
+    byte effect_duration;
+    byte aura_range;
+    byte cast_duration;
+
+    // special
+    byte aggro_range;
+};
+
+struct SecondaryStatsRelPoints2 {
+    // Damage types
+    byte damage[3];
+
+    byte dmg_reflection;
+    byte dmg_reflection_chance;
+
+    byte dmg_crit;
+    byte dmg_crit_chance;
+
+    byte dmg_pircing;
+
+    // Health & Resource
+    byte health;
+    byte health_on_dmg_dealt;
+    byte health_on_dmg_taken;
+
+    byte health_regen;
+    byte health_regen_on_dmg_dealt;
+    byte health_regen_on_dmg_taken;
+
+    byte resource;
+    byte resource_on_dmg_dealt;
+    byte resource_on_dmg_taken;
+
+    byte resource_regen;
+    byte resource_regen_on_dmg_dealt;
+    byte resource_regen_on_dmg_taken;
+
+    byte resource_loss;
+    byte resource_loss_on_dmg_dealt;
+    byte resource_loss_on_dmg_taken;
+
+    // Defense types
+    //      think about it as armor and/or resistence if it helps
+    byte defense[MOB_STATS_TYPE_SIZE];
+
+    // Accuracy
+    byte block_chance;
+    byte block_amount;
+
+    byte dodge_chance;
+    byte cc_protection;
+    byte miss_chance;
+
+    // Movement
+    // Additional speeds may be defined for Mobs
+    byte speed_walk1;
+    byte speed_swim1;
+    byte speed_fly1;
+
+    // Fighting speed
+    byte speed_cast;
+    byte speed_attack;
+
+    byte pickup_range;
+
+    byte shield;
+
+    byte aoe_scale;
+    byte resource_cost;
+    byte health_cost;
+    byte attack_range;
+    byte melee_range;
+    byte projectile_speed;
+    byte projectile_count;
+    byte shatter_probability;
+    byte shatter_range;
+    byte shatter_dmg;
+    byte shatter_count;
+    byte passthrough_damage;
+    byte passthrough_count;
+    byte dot_duration;
+    byte dot_count;
+    byte bleeding_dot;
+    byte poison_dot;
+    byte burn_dot;
+    byte ice_dot;
+    byte resource_drain;
+    byte shatter_dot;
+    byte minion_duration;
+    byte minion_count;
+    byte effect_spreading_probability;
+    byte effect_spreading_radius;
+    byte effect_spreading_max_count;
+    byte effect_duration;
+    byte aura_range;
+    byte cast_duration;
+
+    byte aggro_range;
+};
+
+struct SecondaryStatsValues {
+};
+
+struct SecondaryStatsRelValues {
+};
+
+#endif
--- a/models/mob/_mob_category.h
+++ b/models/mob/_mob_category.h
--- a/models/mob/_mob_list.h
+++ b/models/mob/_mob_list.h
--- a/models/mob/player/Player.h
+++ b/models/mob/player/Player.h
@ -29,7 +29,7 @@
 #if SERVER
    struct SPlayer {
        Mob mob;
-        SMobStats player_stats;
+        SMobStatsPoints player_stats;

        char name[MAX_CHAR_NAME_LENGTH];
        char title[MAX_CHAR_TITLE_LENGTH];
@ -92,7 +92,7 @@

 struct CPlayer {
    Mob mob;
-    CMobStats player_stats;
+    SMobStatsPoints player_stats;

    char name[MAX_CHAR_NAME_LENGTH];
    char title[MAX_CHAR_TITLE_LENGTH];
--- a/models/mob/player/PlayerStats.h
+++ b/models/mob/player/PlayerStats.h
@ -0,0 +1,19 @@
+/**
+ * Jingga
+ *
+ * @copyright Jingga
+ * @license   OMS License 2.0
+ * @version   1.0.0
+ * @link      https://jingga.app
+ */
+#ifndef TOS_MODELS_MOB_PLAYER_STATS_H
+#define TOS_MODELS_MOB_PLAYER_STATS_H
+
+#include "../../../stdlib/Types.h"
+
+// @question Do we even want this?
+struct PlayerStats {
+    f32 pickup_range;
+};
+
+#endif
--- a/models/mob/player/PlayerXPRequirement.h
+++ b/models/mob/player/PlayerXPRequirement.h
@ -0,0 +1,18 @@
+/**
+ * Jingga
+ *
+ * @copyright Jingga
+ * @license   OMS License 2.0
+ * @version   1.0.0
+ * @link      https://jingga.app
+ */
+#ifndef TOS_MODELS_MOB_PLAYER_XP_REQUIREMENT_H
+#define TOS_MODELS_MOB_PLAYER_XP_REQUIREMENT_H
+
+#include "../../../stdlib/Types.h"
+
+struct PlayerXPRequirement {
+    int xp;
+};
+
+#endif
--- a/models/mob/player/_player_class.h
+++ b/models/mob/player/_player_class.h
@ -0,0 +1,8 @@
+#ifndef TOS_MODELS_MOB_PLAYER_CLASS_H
+#define TOS_MODELS_MOB_PLAYER_CLASS_H
+
+#define PLAYER_CLASS_MAGE 1
+
+#define PLAYER_CLASS_SIZE 24
+
+#endif
--- a/models/mob/skill/Skill.h
+++ b/models/mob/skill/Skill.h
@ -27,6 +27,7 @@ struct Skill
    // @todo animations
    void* animation_casting;
    void* animation_channeling;
+    void* icon;

    // @todo e.g. attack command, movement command, etc. for totems and minions
    void* commands;
@ -63,34 +64,34 @@ struct Skill
    // You can have 2 stats for 2 target types (e.g. you could create a buff and debuff in one skill)
    // 1
    PrimaryStatsPoints stats1_primary_add;
-    PrimaryStatsRelPoints stats1_primary_mul;
+    PrimaryStatsPoints stats1_primary_mul;

    SecondaryStatsPoints stats1_secondary_add;
-    SecondaryStatsRelPoints stats1_secondary_mul;
+    SecondaryStatsPoints stats1_secondary_mul;
    StatsTarget stats1_target;

    // 2
    PrimaryStatsPoints stats2_primary_add;
-    PrimaryStatsRelPoints stats2_primary_mul;
+    PrimaryStatsPoints stats2_primary_mul;

    SecondaryStatsPoints stats2_secondary_add;
-    SecondaryStatsRelPoints stats2_secondary_mul;
+    SecondaryStatsPoints stats2_secondary_mul;
    StatsTarget stats2_target;

    // Modifiers
    // Char
    PrimaryStatsPoints primary_char_add;
-    PrimaryStatsRelPoints primary_char_mul;
+    PrimaryStatsPoints primary_char_mul;

    SecondaryStatsPoints secondary_char_add;
-    SecondaryStatsRelPoints secondary_char_mul;
+    SecondaryStatsPoints secondary_char_mul;

    // Item
    PrimaryStatsPoints primary_item_add;
-    PrimaryStatsRelPoints primary_item_mul;
+    PrimaryStatsPoints primary_item_mul;

    SecondaryStatsPoints secondary_item_add;
-    SecondaryStatsRelPoints secondary_item_mul;
+    SecondaryStatsPoints secondary_item_mul;

    int skill_movement; // none, follows target, random moevement, random movement in aoe
    // @todo how to make specific custom movement pattern for boss fights
@ -109,6 +110,7 @@ struct Skill

    bool is_range;
    void* attack_anim;
+    int movement_pattern; // the skill moves in a specific pattern (e.g. straight line, random, circular motion, left/right wave, ...)

    bool is_melee;

--- a/models/object/_object_list.h
+++ b/models/object/_object_list.h
--- a/models/object/_object_types.h
+++ b/models/object/_object_types.h
--- a/models/settings/Settings.h
+++ b/models/settings/Settings.h
@ -70,6 +70,13 @@ struct SSettings {
    uint32 message_cache = 1024;

    uint32 interpolation_buffer;
+
+    bool is_auction_house_enabled = true;
+    bool is_direct_trading_enabled = true;
+
+    // @todo add more server settings for tournaments, tournament modes
+    // @todo add more server settings for raids and dungeons
+    // @todo add more settings for pvp
 };

 // Player settings that the server needs to know about
@ -94,6 +101,7 @@ struct CSettings {
    byte gpu_api = SETTING_TYPE_GPU_API_NONE;
    byte gpu_type = SETTING_TYPE_GPU_MEDIUM;
    byte gpu_fps = SETTING_TYPE_UNLIMITED;
+    byte gpu_memory = 4;

    byte gpu_aspect_ratio;
    byte gpu_resolution;
--- a/models/settings/client_high.cfg
+++ b/models/settings/client_high.cfg
@ -0,0 +1,7 @@
+texutre_count_8192x8192
+texutre_count_4096x4096
+texutre_count_2048x2048
+texutre_count_1024x1024
+texutre_count_512x512
+texutre_count_256x256
+texutre_count_128x128
--- a/network/Client.h
+++ b/network/Client.h
@ -15,7 +15,7 @@

 #include "SocketConnection.h"
 #include "../stdlib/Types.h"
-#include "../utils/RingMemory.h"
+#include "../memory/RingMemory.h"

 #if _WIN32
    #include <winsock2.h>
--- a/network/packet/PacketCache.h
+++ b/network/packet/PacketCache.h
@ -9,7 +9,7 @@
 #ifndef TOS_NETWORK_PACKET_CACHE_H
 #define TOS_NETWORK_PACKET_CACHE_H

-#include "../../utils/RingMemory.h"
+#include "../../memory/RingMemory.h"
 #include "../../utils/BufferMemory.h"

 #if _WIN32
--- a/platform/linux/UtilsLinux.h
+++ b/platform/linux/UtilsLinux.h
@ -72,7 +72,7 @@ uint64 last_modified(const char* filename)
 }

 inline
-void file_read(const char* filename, file_body* file)
+void file_read(const char* filename, FileBody* file, RingMemory* ring = NULL)
 {
    FILE *fp = fopen(filename, "rb");
    fseek(fp, 0, SEEK_END);
@ -80,6 +80,10 @@ void file_read(const char* filename, file_body* file)
    file->size = ftell(fp);
    rewind(fp);

+    if (ring != NULL) {
+        file->content = ring_get_memory(ring, file->size);
+    }
+
    fread(file->content, 1, file->size, fp);

    fclose(fp);
@ -92,11 +96,6 @@ uint64_t file_read_struct(const char* filename, void* file, uint32 size) {
        return 0;
    }

-    fseek(fp, 0, SEEK_END);
-    long fsize = ftell(fp);
-    fseek(fp, 0, SEEK_SET);
-
-    ASSERT_SIMPLE(fsize > size);
    size_t read_bytes = fread(file, 1, size, fp);
    fclose(fp);

@ -104,7 +103,7 @@ uint64_t file_read_struct(const char* filename, void* file, uint32 size) {
 }

 inline
-bool file_write(const char* filename, const file_body* file) {
+bool file_write(const char* filename, const FileBody* file) {
    FILE *fp = fopen(filename, "wb");
    if (!fp) {
        return false;
@ -186,7 +185,7 @@ inline bool file_append(FILE* fp, const char* file) {
    return written == length;
 }

-inline bool file_append(const char* filename, const file_body* file) {
+inline bool file_append(const char* filename, const FileBody* file) {
    FILE *fp = get_append_handle(filename);
    if (!fp) {
        return false;
@ -212,6 +211,11 @@ void self_path(char* path) {

 inline void relative_to_absolute(const char* rel, char* path)
 {
+    const char* temp = rel;
+    if (temp[0] == '.' && temp[1] == '/') {
+        temp += 2;
+    }
+
    char self_path[MAX_PATH];
    ssize_t count = readlink("/proc/self/exe", self_path, MAX_PATH - 1);
    if (count == -1) {
@ -224,7 +228,7 @@ inline void relative_to_absolute(const char* rel, char* path)
        *(last + 1) = '\0';
    }

-    snprintf(path, MAX_PATH, "%s%s", self_path, rel);
+    snprintf(path, MAX_PATH, "%s%s", self_path, temp);
 }

 inline
--- a/platform/win32/UtilsWin32.h
+++ b/platform/win32/UtilsWin32.h
@ -25,6 +25,7 @@
 inline uint64
 file_size(const char* filename)
 {
+    // @performance Profile against fseek strategy
    HANDLE fp = CreateFileA((LPCSTR) filename,
        GENERIC_READ,
        FILE_SHARE_READ,
@ -48,7 +49,7 @@ file_size(const char* filename)
 }

 inline void
-file_read(const char* filename, file_body* file)
+file_read(const char* filename, FileBody* file, RingMemory* ring = NULL)
 {
    HANDLE fp = CreateFileA((LPCSTR) filename,
        GENERIC_READ,
@ -71,6 +72,10 @@ file_read(const char* filename, file_body* file)
        return;
    }

+    if (ring != NULL) {
+        file->content = ring_get_memory(ring, size.QuadPart);
+    }
+
    DWORD bytes;
    ASSERT_SIMPLE(size.QuadPart < MAX_INT32);
    if (!ReadFile(fp, file->content, (uint32) size.QuadPart, &bytes, NULL)) {
@ -122,7 +127,7 @@ file_read_struct(const char* filename, void* file, uint32 size)
 }

 inline bool
-file_write(const char* filename, const file_body* file)
+file_write(const char* filename, const FileBody* file)
 {
    HANDLE fp = CreateFileA((LPCSTR) filename,
        GENERIC_WRITE,
@ -247,7 +252,7 @@ file_append(HANDLE fp, const char* file)
 }

 inline bool
-file_append(const char* filename, const file_body* file)
+file_append(const char* filename, const FileBody* file)
 {
    HANDLE fp = CreateFileA((LPCSTR) filename,
        FILE_APPEND_DATA,
@ -305,12 +310,17 @@ inline void relative_to_absolute(const char* rel, char* path)
        return;
    }

+    const char* temp = rel;
+    if (temp[0] == '.' && temp[1] == '/') {
+        temp += 2;
+    }
+
    char* last = strrchr(self_path, '\\');
    if (last != NULL) {
        *(last + 1) = '\0';
    }

-    snprintf(path, MAX_PATH, "%s%s", self_path, rel);
+    snprintf(path, MAX_PATH, "%s%s", self_path, temp);
 }

 void log_to_file(LogPool* logs, HANDLE fp)
--- a/platform/win32/input/RawInput.h
+++ b/platform/win32/input/RawInput.h
@ -161,24 +161,14 @@ void handle_input(LPARAM lParam, InputState* states)

        // https://learn.microsoft.com/en-us/windows/win32/api/winuser/ns-winuser-rawkeyboard

-        RAWKEYBOARD rawKB = raw->data.keyboard;
+        RAWKEYBOARD rawKB =  raw->data.keyboard;

-        states[i].key = raw->data.keyboard.MakeCode;
-        states[i].key_up = raw->data.keyboard.Flags & RI_KEY_BREAK;
-        states[i].key_down = raw->data.keyboard.Flags & RI_KEY_MAKE;
+        if (rawKB.Flags & RI_KEY_BREAK) {
+            states[i].keys_down_old[states[i].up_index++] = rawKB.MakeCode;
+        }

-        if (states[i].key_down) {
-            for (int j = 0; j < MAX_KEY_PRESSES; ++j) {
-                if (states[i].keys_down[j] == NULL) {
-                    states[i].keys_down[j] = states[i].key;
-                }
-            }
-        } else if (states[i].key_up) {
-            for (int j = 0; j < MAX_KEY_PRESSES; ++j) {
-                if (states[i].keys_down[j] == states[i].key) {
-                    states[i].keys_down[j] = NULL;
-                }
-            }
+        if (rawKB.Flags & RI_KEY_MAKE) {
+            states[i].keys_down[states[i].down_index++] = rawKB.MakeCode;
        }

        states[i].state_change_keyboard = true;
--- a/platform/win32/input/XInput.h
+++ b/platform/win32/input/XInput.h
@ -102,19 +102,19 @@ void handle_controller_input(ControllerState* states)
        states[controller_index].down = controller_state.Gamepad.wButtons & XINPUT_GAMEPAD_DPAD_DOWN;
        states[controller_index].left = controller_state.Gamepad.wButtons & XINPUT_GAMEPAD_DPAD_LEFT;
        states[controller_index].right = controller_state.Gamepad.wButtons & XINPUT_GAMEPAD_DPAD_RIGHT;
-        states[controller_index].start = controller_state.Gamepad.wButtons & XINPUT_GAMEPAD_START;
-        states[controller_index].back = controller_state.Gamepad.wButtons & XINPUT_GAMEPAD_BACK;
+        states[controller_index].button[6] = controller_state.Gamepad.wButtons & XINPUT_GAMEPAD_START;
+        states[controller_index].button[7] = controller_state.Gamepad.wButtons & XINPUT_GAMEPAD_BACK;

-        states[controller_index].shoulder_left = controller_state.Gamepad.wButtons & XINPUT_GAMEPAD_LEFT_SHOULDER;
-        states[controller_index].shoulder_right = controller_state.Gamepad.wButtons & XINPUT_GAMEPAD_RIGHT_SHOULDER;
+        states[controller_index].button[4] = controller_state.Gamepad.wButtons & XINPUT_GAMEPAD_LEFT_SHOULDER;
+        states[controller_index].button[5] = controller_state.Gamepad.wButtons & XINPUT_GAMEPAD_RIGHT_SHOULDER;

-        states[controller_index].trigger_left = controller_state.Gamepad.bLeftTrigger;
-        states[controller_index].trigger_right = controller_state.Gamepad.bRightTrigger;
+        states[controller_index].trigger[0] = controller_state.Gamepad.bLeftTrigger;
+        states[controller_index].trigger[1] = controller_state.Gamepad.bRightTrigger;

-        states[controller_index].button_a = controller_state.Gamepad.wButtons & XINPUT_GAMEPAD_A;
-        states[controller_index].button_b = controller_state.Gamepad.wButtons & XINPUT_GAMEPAD_B;
-        states[controller_index].button_x = controller_state.Gamepad.wButtons & XINPUT_GAMEPAD_X;
-        states[controller_index].button_y = controller_state.Gamepad.wButtons & XINPUT_GAMEPAD_Y;
+        states[controller_index].button[0] = controller_state.Gamepad.wButtons & XINPUT_GAMEPAD_A;
+        states[controller_index].button[1] = controller_state.Gamepad.wButtons & XINPUT_GAMEPAD_B;
+        states[controller_index].button[2] = controller_state.Gamepad.wButtons & XINPUT_GAMEPAD_X;
+        states[controller_index].button[3] = controller_state.Gamepad.wButtons & XINPUT_GAMEPAD_Y;

        states[controller_index].stickl_x = controller_state.Gamepad.sThumbLX;
        states[controller_index].stickl_y = controller_state.Gamepad.sThumbLY;
--- a/stdlib/simd/SIMD_F32.h
+++ b/stdlib/simd/SIMD_F32.h
@ -13,6 +13,7 @@
 #include <xmmintrin.h>

 #include "../Types.h"
+#include "SIMD_SVML.h"

 struct f32_4 {
    union {
@ -990,144 +991,214 @@ void simd_mult(const f32* a, const f32* b, f32* result, int size, int steps)
    int i = 0;

    if (steps == 16) {
-        f32_16 a_16;
-        f32_16 b_16;
-        f32_16 result_16;
+        __m512 a_16;
+        __m512 b_16;
+        __m512 result_16;

-        for (i = 0; i <= size - steps; i += steps) {
-            ++a;
-            ++b;
-            ++result;
+        for (; i <= size - steps; i += steps) {
+            a_16 = _mm512_loadu_ps(a);
+            b_16 = _mm512_loadu_ps(b);
+            result_16 = _mm512_mul_ps(a_16, b_16);
+            _mm512_store_ps(result, result_16);

-            a_16 = load_f32_16(a);
-            b_16 = load_f32_16(b);
-            result_16 = a_16 * b_16;
-            unload_f32_16(result_16, result);
+            a += steps;
+            b += steps;
+            result += steps;
       }
    } else if (steps == 8) {
-        f32_8 a_8;
-        f32_8 b_8;
-        f32_8 result_8;
+        __m256 a_8;
+        __m256 b_8;
+        __m256 result_8;

-        for (i = 0; i <= size - steps; i += steps) {
-            ++a;
-            ++b;
-            ++result;
+        for (; i <= size - steps; i += steps) {
+            a_8 = _mm256_loadu_ps(a);
+            b_8 = _mm256_loadu_ps(b);
+            result_8 = _mm256_mul_ps(a_8, b_8);
+            _mm256_store_ps(result, result_8);

-            a_8 = load_f32_8(a);
-            b_8 = load_f32_8(b);
-            result_8 = a_8 * b_8;
-            unload_f32_8(result_8, result);
+            a += steps;
+            b += steps;
+            result += steps;
       }
    } else if (steps == 4) {
-        f32_4 a_4;
-        f32_4 b_4;
-        f32_4 result_4;
+        __m128 a_4;
+        __m128 b_4;
+        __m128 result_4;

-        for (i = 0; i <= size - steps; i += steps) {
-            ++a;
-            ++b;
-            ++result;
+        for (; i <= size - steps; i += steps) {
+            a_4 = _mm_loadu_ps(a);
+            b_4 = _mm_loadu_ps(b);
+            result_4 = _mm_mul_ps(a_4, b_4);
+            _mm_store_ps(result, result_4);

-            a_4 = load_f32_4(a);
-            b_4 = load_f32_4(b);
-            result_4 = a_4 * b_4;
-            unload_f32_4(result_4, result);
+            a += steps;
+            b += steps;
+            result += steps;
       }
    }

    for (; i < size; ++i) {
+        *result = *a * *b;
+
        ++a;
        ++b;
        ++result;
-
-        *result = *a * *b;
    }
 }

 inline
-void f32_4_mult(const f32* a, const f32* b, f32* result)
-{
-    f32_4 a_4 = load_f32_4(a);
-    f32_4 b_4 = load_f32_4(b);
-    f32_4 result_4 = a_4 * b_4;
-
-    unload_f32_4(result_4, result);
-}
-
-inline
-void simd_mult(const f32* a, const f32* b, f32* result, int size, int steps)
+void simd_mult(const f32* a, f32 b, f32* result, int size, int steps)
 {
    int i = 0;

    if (steps == 16) {
-        f32_16 a_16;
-        f32_16 b_16;
-        f32_16 result_16;
+        __m512 a_16;
+        __m512 b_16 = _mm512_set1_ps(b);
+        __m512 result_16;

-        for (i = 0; i <= size - steps; i += steps) {
-            ++a;
-            ++b;
-            ++result;
+        for (; i <= size - steps; i += steps) {
+            a_16 = _mm512_loadu_ps(a);
+            result_16 = _mm512_mul_ps(a_16, b_16);
+            _mm512_store_ps(result, result_16);

-            a_16 = load_f32_16(a);
-            b_16 = load_f32_16(b);
-            result_16 = a_16 + b_16;
-            unload_f32_16(result_16, result);
+            a += steps;
+            result += steps;
       }
    } else if (steps == 8) {
-        f32_8 a_8;
-        f32_8 b_8;
-        f32_8 result_8;
+        __m256 a_8;
+        __m256 b_8 = _mm256_set1_ps(b);
+        __m256 result_8;

-        for (i = 0; i <= size - steps; i += steps) {
-            ++a;
-            ++b;
-            ++result;
+        for (; i <= size - steps; i += steps) {
+            a_8 = _mm256_loadu_ps(a);
+            result_8 = _mm256_mul_ps(a_8, b_8);
+            _mm256_store_ps(result, result_8);

-            a_8 = load_f32_8(a);
-            b_8 = load_f32_8(b);
-            result_8 = a_8 + b_8;
-            unload_f32_8(result_8, result);
+            a += steps;
+            result += steps;
       }
    } else if (steps == 4) {
-        f32_4 a_4;
-        f32_4 b_4;
-        f32_4 result_4;
+        __m128 a_4;
+        __m128 b_4 = _mm_set1_ps(b);
+        __m128 result_4;

-        for (i = 0; i <= size - steps; i += steps) {
-            ++a;
-            ++b;
-            ++result;
+        for (; i <= size - steps; i += steps) {
+            a_4 = _mm_loadu_ps(a);
+            result_4 = _mm_mul_ps(a_4, b_4);
+            _mm_store_ps(result, result_4);

-            a_4 = load_f32_4(a);
-            b_4 = load_f32_4(b);
-            result_4 = a_4 + b_4;
-            unload_f32_4(result_4, result);
+            a += steps;
+            result += steps;
       }
    }

    for (; i < size; ++i) {
-        ++a;
-        ++b;
-        ++result;
+        *result = *a * b;

-        *result = *a + *b;
+        ++a;
+        ++result;
    }
 }

 inline
-void f32_4_add(const f32* a, const f32* b, f32* result)
+void simd_div(const f32* a, f32 b, f32* result, int size, int steps)
 {
-    f32_4 a_4 = load_f32_4(a);
-    f32_4 b_4 = load_f32_4(b);
-    f32_4 result_4 = a_4 + b_4;
+    int i = 0;

-    unload_f32_4(result_4, result);
+    if (steps == 16) {
+        __m512 a_16;
+        __m512 b_16 = _mm512_set1_ps(b);
+        __m512 result_16;
+
+        for (; i <= size - steps; i += steps) {
+            a_16 = _mm512_loadu_ps(a);
+            result_16 = _mm512_div_ps(a_16, b_16);
+            _mm512_store_ps(result, result_16);
+
+            a += steps;
+            result += steps;
+       }
+    } else if (steps == 8) {
+        __m256 a_8;
+        __m256 b_8 = _mm256_set1_ps(b);
+        __m256 result_8;
+
+        for (; i <= size - steps; i += steps) {
+            a_8 = _mm256_loadu_ps(a);
+            result_8 = _mm256_div_ps(a_8, b_8);
+            _mm256_store_ps(result, result_8);
+
+            a += steps;
+            result += steps;
+       }
+    } else if (steps == 4) {
+        __m128 a_4;
+        __m128 b_4 = _mm_set1_ps(b);
+        __m128 result_4;
+
+        for (; i <= size - steps; i += steps) {
+            a_4 = _mm_loadu_ps(a);
+            result_4 = _mm_div_ps(a_4, b_4);
+            _mm_store_ps(result, result_4);
+
+            a += steps;
+            result += steps;
+       }
+    }
+
+    for (; i < size; ++i) {
+        *result = *a / b;
+
+        ++a;
+        ++result;
+    }
 }

-// @todo add more operations like the one above "f32_4_mult()"
+inline
+void simd_div(const f32* a, f32 b, __m256* result, int size)
+{
+    int i = 0;
+    int j = 0;

+    // @todo this his how all the functions should be implemented that take in baseic types and output basic types
+    __m256 a_8;
+    __m256 b_8 = _mm256_set1_ps(b);
+    __m256 result_8;
+
+    for (; i <= size - 8; i += 8) {
+        a_8 = _mm256_loadu_ps(a);
+        result_8 = _mm256_div_ps(a_8, b_8);
+        result[j] = result_8;
+
+        a += 8;
+        ++j;
+    }
+
+    int diff = size - i;
+    alignas(32) float temp[8];
+
+    for (int k = 0; k < diff; k++) {
+        temp[k] = a[i + k] / b;
+    }
+
+    result[j] = _mm256_loadu_ps(temp);
+}
+
+inline
+void simd_cmp_le(const __m256* a, f32 b, bool* result, int size)
+{
+    __m256 b_8 = _mm256_set1_ps(b);
+
+    for (int i = 0; i < size; ++i) {
+        int mask = _mm256_movemask_ps(_mm256_cmp_ps(a[i], b_8, _CMP_LE_OQ));
+
+        for (int j = 0; j < 8; ++j) {
+            result[i * 8 + j] = (mask & (1 << j)) != 0;
+        }
+    }
+}
+
+// @todo But a guard or warning on the trigonometric functions since they are only implemented for msvc/intel compiler
 inline
 f32_4 simd_sin(f32_4 a)
 {
--- a/stdlib/simd/SIMD_I16.h
+++ b/stdlib/simd/SIMD_I16.h
@ -789,188 +789,4 @@ inline bool all_false(int16_32 a)
 // @todo from down here we can optimize some of the code by NOT using the wrappers
 //      the code is self contained and we could use te intrinsic functions directly

-inline
-void simd_mult(const int16* a, const int16* b, int16* result, int size, int steps)
-{
-    int i = 0;
-
-    if (steps == 16) {
-        int16_32 a_16;
-        int16_32 b_16;
-        int16_32 result_16;
-
-        for (i = 0; i <= size - steps; i += steps) {
-            ++a;
-            ++b;
-            ++result;
-
-            a_16 = load_int16_32(a);
-            b_16 = load_int16_32(b);
-            result_16 = a_16 * b_16;
-            unload_int16_32(result_16, result);
-       }
-    } else if (steps == 8) {
-        int16_16 a_8;
-        int16_16 b_8;
-        int16_16 result_8;
-
-        for (i = 0; i <= size - steps; i += steps) {
-            ++a;
-            ++b;
-            ++result;
-
-            a_8 = load_int16_16(a);
-            b_8 = load_int16_16(b);
-            result_8 = a_8 * b_8;
-            unload_int16_16(result_8, result);
-       }
-    } else if (steps == 4) {
-        int16_8 a_4;
-        int16_8 b_4;
-        int16_8 result_4;
-
-        for (i = 0; i <= size - steps; i += steps) {
-            ++a;
-            ++b;
-            ++result;
-
-            a_4 = load_int16_8(a);
-            b_4 = load_int16_8(b);
-            result_4 = a_4 * b_4;
-            unload_int16_8(result_4, result);
-       }
-    }
-
-    for (; i < size; ++i) {
-        ++a;
-        ++b;
-        ++result;
-
-        *result = *a * *b;
-    }
-}
-
-inline
-void simd_mult(const int16* a, const int16* b, int16* result)
-{
-    int16_8 a_4 = load_int16_8(a);
-    int16_8 b_4 = load_int16_8(b);
-    int16_8 result_4 = a_4 * b_4;
-
-    unload_int16_8(result_4, result);
-}
-
-inline
-void int16_16_mult(const int16* a, const int16* b, int16* result)
-{
-    int16_16 a_8 = load_int16_16(a);
-    int16_16 b_8 = load_int16_16(b);
-    int16_16 result_8 = a_8 * b_8;
-
-    unload_int16_16(result_8, result);
-}
-
-inline
-void int16_32_mult(const int16* a, const int16* b, int16* result)
-{
-    int16_32 a_16 = load_int16_32(a);
-    int16_32 b_16 = load_int16_32(b);
-    int16_32 result_16 = a_16 * b_16;
-
-    unload_int16_32(result_16, result);
-}
-
-inline
-void simd_add(const int16* a, const int16* b, int16* result, int size, int steps)
-{
-    int i = 0;
-
-    if (steps == 16) {
-        int16_32 a_16;
-        int16_32 b_16;
-        int16_32 result_16;
-
-        for (i = 0; i <= size - steps; i += steps) {
-            ++a;
-            ++b;
-            ++result;
-
-            a_16 = load_int16_32(a);
-            b_16 = load_int16_32(b);
-            result_16 = a_16 + b_16;
-            unload_int16_32(result_16, result);
-       }
-    } else if (steps == 8) {
-        int16_16 a_8;
-        int16_16 b_8;
-        int16_16 result_8;
-
-        for (i = 0; i <= size - steps; i += steps) {
-            ++a;
-            ++b;
-            ++result;
-
-            a_8 = load_int16_16(a);
-            b_8 = load_int16_16(b);
-            result_8 = a_8 + b_8;
-            unload_int16_16(result_8, result);
-       }
-    } else if (steps == 4) {
-        int16_8 a_4;
-        int16_8 b_4;
-        int16_8 result_4;
-
-        for (i = 0; i <= size - steps; i += steps) {
-            ++a;
-            ++b;
-            ++result;
-
-            a_4 = load_int16_8(a);
-            b_4 = load_int16_8(b);
-            result_4 = a_4 + b_4;
-            unload_int16_8(result_4, result);
-       }
-    }
-
-    for (; i < size; ++i) {
-        ++a;
-        ++b;
-        ++result;
-
-        *result = *a + *b;
-    }
-}
-
-inline
-void int16_8_add(const int16* a, const int16* b, int16* result)
-{
-    int16_8 a_4 = load_int16_8(a);
-    int16_8 b_4 = load_int16_8(b);
-    int16_8 result_4 = a_4 + b_4;
-
-    unload_int16_8(result_4, result);
-}
-
-inline
-void int16_16_add(const int16* a, const int16* b, int16* result)
-{
-    int16_16 a_8 = load_int16_16(a);
-    int16_16 b_8 = load_int16_16(b);
-    int16_16 result_8 = a_8 + b_8;
-
-    unload_int16_16(result_8, result);
-}
-
-inline
-void int16_32_add(const int16* a, const int16* b, int16* result)
-{
-    int16_32 a_16 = load_int16_32(a);
-    int16_32 b_16 = load_int16_32(b);
-    int16_32 result_16 = a_16 + b_16;
-
-    unload_int16_32(result_16, result);
-}
-
-// @todo add more operations like the one above "int16_8_mult()"
-
 #endif
--- a/stdlib/simd/SIMD_I32.h
+++ b/stdlib/simd/SIMD_I32.h
@ -18,6 +18,8 @@
 // @todo a lot of sse functions require high level (e.g. sse4.1) this needs to be changed to be more general
 //      or better create alternative functions for the available sse version.

+// @question why are we passing structs by value?
+
 struct int32_4 {
    union {
        __m128i s;
@ -86,8 +88,9 @@ inline int32_16 load_int32_16(const int32* mem)
 inline int32_16 init_int32_16(const int32* mem)
 {
    int32_16 simd;
-    simd.s = _mm512_set_epi32(mem[0], mem[1], mem[2], mem[3], mem[4], mem[5], mem[6], mem[7], mem[8], mem[9],
-                                mem[10], mem[11], mem[12], mem[13], mem[14], mem[15]);
+    simd.s = _mm512_set_epi32(
+        mem[0], mem[1], mem[2], mem[3], mem[4], mem[5], mem[6], mem[7],
+        mem[8], mem[9], mem[10], mem[11], mem[12], mem[13], mem[14], mem[15]);

    return simd;
 }
@ -654,7 +657,7 @@ inline int32_16 operator!=(int32_16 a, int32_16 b)
 inline int32_4 operator&(int32_4 a, int32_4 b)
 {
    int32_4 simd;
-    simd.s = _mm_and_epi32(a.s, b.s);
+    simd.s = _mm_and_si128(a.s, b.s);

    return simd;
 }
@ -662,7 +665,7 @@ inline int32_4 operator&(int32_4 a, int32_4 b)
 inline int32_8 operator&(int32_8 a, int32_8 b)
 {
    int32_8 simd;
-    simd.s = _mm256_and_epi32(a.s, b.s);
+    simd.s = _mm256_and_si256(a.s, b.s);

    return simd;
 }
@ -670,7 +673,7 @@ inline int32_8 operator&(int32_8 a, int32_8 b)
 inline int32_16 operator&(int32_16 a, int32_16 b)
 {
    int32_16 simd;
-    simd.s = _mm512_and_epi32(a.s, b.s);
+    simd.s = _mm512_and_si512(a.s, b.s);

    return simd;
 }
@ -816,7 +819,7 @@ inline int32_16 simd_max(int32_16 a, int32_16 b)
 inline int32_4 sign(int32_4 a)
 {
    __m128i mask = _mm_set1_epi32(0x80000000);
-    __m128i signBit = _mm_and_epi32(a.s, mask);
+    __m128i signBit = _mm_and_si128(a.s, mask);
    __m128i b = _mm_set1_epi32(1);

    int32_4 simd;
@ -828,7 +831,7 @@ inline int32_4 sign(int32_4 a)
 inline int32_8 sign(int32_8 a)
 {
    __m256i mask = _mm256_set1_epi32(0x80000000);
-    __m256i signBit = _mm256_and_epi32(a.s, mask);
+    __m256i signBit = _mm256_and_si256(a.s, mask);
    __m256i b = _mm256_set1_epi32(1);

    int32_8 simd;
@ -840,7 +843,7 @@ inline int32_8 sign(int32_8 a)
 inline int32_16 sign(int32_16 a)
 {
    __m512i mask = _mm512_set1_epi32(0x80000000);
-    __m512i signBit = _mm512_and_epi32(a.s, mask);
+    __m512i signBit = _mm512_and_si512(a.s, mask);
    __m512i b = _mm512_set1_epi32(1);
    int32_16 simd;

@ -1030,49 +1033,49 @@ void simd_mult(const int32* a, const int32* b, int32* result, int size, int step
    int i = 0;

    if (steps == 16) {
-        int32_16 a_16;
-        int32_16 b_16;
-        int32_16 result_16;
+        __m512i a_16;
+        __m512i b_16;
+        __m512i result_16;

-        for (i = 0; i <= size - steps; i += steps) {
-            ++a;
-            ++b;
-            ++result;
+        for (; i <= size - steps; i += steps) {
+            a_16 = _mm512_loadu_epi32(a);
+            b_16 = _mm512_loadu_epi32(b);
+            result_16 = _mm512_mul_epi32(a_16, b_16);
+            _mm512_store_epi32(result, result_16);

-            a_16 = load_int32_16(a);
-            b_16 = load_int32_16(b);
-            result_16 = a_16 * b_16;
-            unload_int32_16(result_16, result);
+            a += steps;
+            b += steps;
+            result += steps;
       }
    } else if (steps == 8) {
-        int32_8 a_8;
-        int32_8 b_8;
-        int32_8 result_8;
+        __m256i a_8;
+        __m256i b_8;
+        __m256i result_8;

-        for (i = 0; i <= size - steps; i += steps) {
-            ++a;
-            ++b;
-            ++result;
+        for (; i <= size - steps; i += steps) {
+            a_8 = _mm256_loadu_epi32(a);
+            b_8 = _mm256_loadu_epi32(b);
+            result_8 = _mm256_mul_epi32(a_8, b_8);
+            _mm256_store_si256((__m256i *) result, result_8);

-            a_8 = load_int32_8(a);
-            b_8 = load_int32_8(b);
-            result_8 = a_8 * b_8;
-            unload_int32_8(result_8, result);
+            a += steps;
+            b += steps;
+            result += steps;
       }
    } else if (steps == 4) {
-        int32_4 a_4;
-        int32_4 b_4;
-        int32_4 result_4;
+        __m128i a_4;
+        __m128i b_4;
+        __m128i result_4;

-        for (i = 0; i <= size - steps; i += steps) {
-            ++a;
-            ++b;
-            ++result;
+        for (; i <= size - steps; i += steps) {
+            a_4 = _mm_loadu_epi32(a);
+            b_4 = _mm_loadu_epi32(b);
+            result_4 = _mm_mul_epi32(a_4, b_4);
+            _mm_store_si128((__m128i *) result, result_4);

-            a_4 = load_int32_4(a);
-            b_4 = load_int32_4(b);
-            result_4 = a_4 * b_4;
-            unload_int32_4(result_4, result);
+            a += steps;
+            b += steps;
+            result += steps;
       }
    }

@ -1091,64 +1094,64 @@ void simd_mult(const int32* a, const f32* b, f32* result, int size, int steps)
    int i = 0;

    if (steps == 16) {
-        int32_16 a_16;
-        f32_16 af_16;
-        f32_16 b_16;
-        f32_16 result_16;
+        __m512i a_16;
+        __m512 af_16;
+        __m512 b_16;
+        __m512 result_16;

-        for (i = 0; i <= size - steps; i += steps) {
-            ++a;
-            ++b;
-            ++result;
+        for (; i <= size - steps; i += steps) {
+            a_16 = _mm512_loadu_epi32(a);
+            af_16 = _mm512_cvtepi32_ps(a_16);
+            b_16 = _mm512_loadu_ps(b);
+            result_16 = _mm512_mul_ps(af_16, b_16);
+            _mm512_store_ps(result, result_16);

-            a_16 = load_int32_16(a);
-            af_16 = int32_16_to_f32_16(a_16);
-            b_16 = load_f32_16(b);
-            result_16 = af_16 * b_16;
-            unload_f32_16(result_16, result);
+            a += steps;
+            b += steps;
+            result += steps;
       }
    } else if (steps == 8) {
-        int32_8 a_8;
-        f32_8 af_8;
-        f32_8 b_8;
-        f32_8 result_8;
+        __m256i a_8;
+        __m256 af_8;
+        __m256 b_8;
+        __m256 result_8;

-        for (i = 0; i <= size - steps; i += steps) {
-            ++a;
-            ++b;
-            ++result;
+        for (; i <= size - steps; i += steps) {
+            a_8 = _mm256_loadu_epi32(a);
+            af_8 = _mm256_cvtepi32_ps(a_8);
+            b_8 = _mm256_loadu_ps(b);
+            result_8 = _mm256_mul_ps(af_8, b_8);
+            _mm256_store_ps(result, result_8);

-            a_8 = load_int32_8(a);
-            af_8 = int32_8_to_f32_8(a_8);
-            b_8 = load_f32_8(b);
-            result_8 = af_8 * b_8;
-            unload_f32_8(result_8, result);
+            a += steps;
+            b += steps;
+            result += steps;
       }
    } else if (steps == 4) {
-        int32_4 a_4;
-        f32_4 af_4;
-        f32_4 b_4;
-        f32_4 result_4;
+        __m128i a_4;
+        __m128 af_4;
+        __m128 b_4;
+        __m128 result_4;

-        for (i = 0; i <= size - steps; i += steps) {
-            ++a;
-            ++b;
-            ++result;
+        for (; i <= size - steps; i += steps) {
+            a_4 = _mm_loadu_epi32(a);
+            af_4 = _mm_cvtepi32_ps(a_4);
+            b_4 = _mm_loadu_ps(b);
+            result_4 = _mm_mul_ps(af_4, b_4);
+            _mm_store_ps(result, result_4);

-            a_4 = load_int32_4(a);
-            af_4 = int32_4_to_f32_4(a_4);
-            b_4 = load_f32_4(b);
-            result_4 = af_4 * b_4;
-            unload_f32_4(result_4, result);
+            a += steps;
+            b += steps;
+            result += steps;
       }
    }

    for (; i < size; ++i) {
+        *result = *a * *b;
+
        ++a;
        ++b;
        ++result;
-
-        *result = *a * *b;
    }
 }

@ -1158,134 +1161,198 @@ void simd_mult(const int32* a, const f32* b, int32* result, int size, int steps)
    int i = 0;

    if (steps == 16) {
-        int32_16 a_16;
-        f32_16 af_16;
-        f32_16 b_16;
-        f32_16 result_16;
-        int32_16 resulti_16;
+        __m512i a_16;
+        __m512 af_16;
+        __m512 b_16;
+        __m512 result_16;
+        __m512i resulti_16;

-        for (i = 0; i <= size - steps; i += steps) {
-            ++a;
-            ++b;
-            ++result;
+        for (; i <= size - steps; i += steps) {
+            a_16 = _mm512_loadu_epi32(a);
+            af_16 = _mm512_cvtepi32_ps(a_16);
+            b_16 = _mm512_loadu_ps(b);
+            result_16 = _mm512_mul_ps(af_16, b_16);
+            resulti_16 = _mm512_cvtps_epi32(result_16);
+            _mm512_store_epi32(result, resulti_16);

-            a_16 = load_int32_16(a);
-            af_16 = int32_16_to_f32_16(a_16);
-            b_16 = load_f32_16(b);
-            result_16 = af_16 * b_16;
-            resulti_16 = f32_16_to_int32_16(result_16);
-            unload_int32_16(resulti_16, result);
+            a += steps;
+            b += steps;
+            result += steps;
       }
    } else if (steps == 8) {
-        int32_8 a_8;
-        f32_8 af_8;
-        f32_8 b_8;
-        f32_8 result_8;
-        int32_8 resulti_8;
+        __m256i a_8;
+        __m256 af_8;
+        __m256 b_8;
+        __m256 result_8;
+        __m256i resulti_8;

-        for (i = 0; i <= size - steps; i += steps) {
-            ++a;
-            ++b;
-            ++result;
+        for (; i <= size - steps; i += steps) {
+            a_8 = _mm256_loadu_epi32(a);
+            af_8 = _mm256_cvtepi32_ps(a_8);
+            b_8 = _mm256_loadu_ps(b);
+            result_8 = _mm256_mul_ps(af_8, b_8);
+            resulti_8 = _mm256_cvtps_epi32(result_8);
+            _mm256_store_si256((__m256i *) result, resulti_8);

-            a_8 = load_int32_8(a);
-            af_8 = int32_8_to_f32_8(a_8);
-            b_8 = load_f32_8(b);
-            result_8 = af_8 * b_8;
-            resulti_8 = f32_8_to_int32_8(result_8);
-            unload_int32_8(resulti_8, result);
+            a += steps;
+            b += steps;
+            result += steps;
       }
    } else if (steps == 4) {
-        int32_4 a_4;
-        f32_4 af_4;
-        f32_4 b_4;
-        f32_4 result_4;
-        int32_4 resulti_4;
+        __m128i a_4;
+        __m128 af_4;
+        __m128 b_4;
+        __m128 result_4;
+        __m128i resulti_4;

-        for (i = 0; i <= size - steps; i += steps) {
-            ++a;
-            ++b;
-            ++result;
+        for (; i <= size - steps; i += steps) {
+            a_4 = _mm_loadu_epi32(a);
+            af_4 = _mm_cvtepi32_ps(a_4);
+            b_4 = _mm_loadu_ps(b);
+            result_4 = _mm_mul_ps(af_4, b_4);
+            resulti_4 = _mm_cvtps_epi32(result_4);
+            _mm_store_si128((__m128i *) result, resulti_4);

-            a_4 = load_int32_4(a);
-            af_4 = int32_4_to_f32_4(a_4);
-            b_4 = load_f32_4(b);
-            result_4 = af_4 * b_4;
-            resulti_4 = f32_4_to_int32_4(result_4);
-            unload_int32_4(resulti_4, result);
+            a += steps;
+            b += steps;
+            result += steps;
       }
    }

    for (; i < size; ++i) {
+        *result = (int) (*a * *b);
+
        ++a;
        ++b;
        ++result;
-
-        *result = *a * *b;
    }
 }

 inline
-void int32_4_mult(const int32* a, const int32* b, int32* result)
+void simd_mult(const int32* a, f32 b, int32* result, int size, int steps)
 {
-    int32_4 a_4 = load_int32_4(a);
-    int32_4 b_4 = load_int32_4(b);
-    int32_4 result_4 = a_4 * b_4;
+    int i = 0;

-    unload_int32_4(result_4, result);
+    if (steps == 16) {
+        __m512i a_16;
+        __m512 af_16;
+        __m512 b_16 = _mm512_set1_ps(b);
+        __m512 result_16;
+        __m512i resulti_16;
+
+        for (; i <= size - steps; i += steps) {
+            a_16 = _mm512_loadu_epi32(a);
+            af_16 = _mm512_cvtepi32_ps(a_16);
+            result_16 = _mm512_mul_ps(af_16, b_16);
+            resulti_16 = _mm512_cvtps_epi32(result_16);
+            _mm512_store_epi32(result, resulti_16);
+
+            a += steps;
+            result += steps;
+       }
+    } else if (steps == 8) {
+        __m256i a_8;
+        __m256 af_8;
+        __m256 b_8 = _mm256_set1_ps(b);
+        __m256 result_8;
+        __m256i resulti_8;
+
+        for (; i <= size - steps; i += steps) {
+            a_8 = _mm256_loadu_epi32(a);
+            af_8 = _mm256_cvtepi32_ps(a_8);
+            result_8 = _mm256_mul_ps(af_8, b_8);
+            resulti_8 = _mm256_cvtps_epi32(result_8);
+            _mm256_store_si256((__m256i *) result, resulti_8);
+
+            a += steps;
+            result += steps;
+       }
+    } else if (steps == 4) {
+        __m128i a_4;
+        __m128 af_4;
+        __m128 b_4 = _mm_set1_ps(b);
+        __m128 result_4;
+        __m128i resulti_4;
+
+        for (; i <= size - steps; i += steps) {
+            a_4 = _mm_loadu_epi32(a);
+            af_4 = _mm_cvtepi32_ps(a_4);
+            result_4 = _mm_mul_ps(af_4, b_4);
+            resulti_4 = _mm_cvtps_epi32(result_4);
+            _mm_store_si128((__m128i *) result, resulti_4);
+
+            a += steps;
+            result += steps;
+       }
+    }
+
+    for (; i < size; ++i) {
+        *result = (int32) (*a * b);
+
+        ++a;
+        ++result;
+    }
 }

 inline
-void int32_8_mult(const int32* a, const int32* b, int32* result)
+void simd_div(const int32* a, f32 b, f32* result, int size, int steps)
 {
-    int32_8 a_8 = load_int32_8(a);
-    int32_8 b_8 = load_int32_8(b);
-    int32_8 result_8 = a_8 * b_8;
+    int i = 0;

-    unload_int32_8(result_8, result);
-}
+    if (steps == 16) {
+        __m512i a_16;
+        __m512 af_16;
+        __m512 b_16 = _mm512_set1_ps(b);
+        __m512 result_16;

-inline
-void int32_16_mult(const int32* a, const int32* b, int32* result)
-{
-    int32_16 a_16 = load_int32_16(a);
-    int32_16 b_16 = load_int32_16(b);
-    int32_16 result_16 = a_16 * b_16;
+        for (; i <= size - steps; i += steps) {
+            a_16 = _mm512_loadu_epi32(a);
+            af_16 = _mm512_cvtepi32_ps(a_16);
+            result_16 = _mm512_div_ps(af_16, b_16);
+            _mm512_store_ps(result, result_16);

-    unload_int32_16(result_16, result);
-}
+            a += steps;
+            result += steps;
+       }
+    } else if (steps == 8) {
+        // @todo this his how all the functions should be implemented that take in baseic types and output basic types
+        __m256i a_8;
+        __m256 af_8;
+        __m256 b_8 = _mm256_set1_ps(b);
+        __m256 result_8;

-inline
-void int32_4_mult(const int32* a, const f32* b, f32* result)
-{
-    int32_4 a_4 = load_int32_4(a);
-    f32_4 af_4 = int32_4_to_f32_4(a_4);
-    f32_4 b_4 = load_f32_4(b);
-    f32_4 result_4 = af_4 * b_4;
+        for (; i <= size - steps; i += steps) {
+            a_8 = _mm256_loadu_epi32(a);
+            af_8 = _mm256_cvtepi32_ps(a_8);
+            result_8 = _mm256_div_ps(af_8, b_8);
+            _mm256_store_ps(result, result_8);

-    unload_f32_4(result_4, result);
-}
+            a += steps;
+            result += steps;
+       }
+    } else if (steps == 4) {
+        __m128i a_4;
+        __m128 af_4;
+        __m128 b_4 = _mm_set1_ps(b);
+        __m128 result_4;

-inline
-void int32_8_mult(const int32* a, const f32* b, f32* result)
-{
-    int32_8 a_8 = load_int32_8(a);
-    f32_8 af_8 = int32_8_to_f32_8(a_8);
-    f32_8 b_8 = load_f32_8(b);
-    f32_8 result_8 = af_8 * b_8;
+        for (; i <= size - steps; i += steps) {
+            a_4 = _mm_loadu_epi32(a);
+            af_4 = _mm_cvtepi32_ps(a_4);
+            result_4 = _mm_div_ps(af_4, b_4);
+            _mm_store_ps(result, result_4);

-    unload_f32_8(result_8, result);
-}
+            a += steps;
+            result += steps;
+       }
+    }

-inline
-void int32_16_mult(const int32* a, const f32* b, f32* result)
-{
-    int32_16 a_16 = load_int32_16(a);
-    f32_16 af_16 = int32_16_to_f32_16(a_16);
-    f32_16 b_16 = load_f32_16(b);
-    f32_16 result_16 = af_16 * b_16;
+    for (; i < size; ++i) {
+        *result = *a / b;

-    unload_f32_16(result_16, result);
+        ++a;
+        ++result;
+    }
 }

 inline
@ -1294,58 +1361,58 @@ void simd_add(const int32* a, const int32* b, int32* result, int size, int steps
    int i = 0;

    if (steps == 16) {
-        int32_16 a_16;
-        int32_16 b_16;
-        int32_16 result_16;
+        __m512i a_16;
+        __m512i b_16;
+        __m512i result_16;

-        for (i = 0; i <= size - steps; i += steps) {
-            ++a;
-            ++b;
-            ++result;
+        for (; i <= size - steps; i += steps) {
+            a_16 = _mm512_loadu_epi32(a);
+            b_16 = _mm512_loadu_epi32(b);
+            result_16 = _mm512_add_epi32(a_16, b_16);
+            _mm512_store_epi32(result, result_16);

-            a_16 = load_int32_16(a);
-            b_16 = load_int32_16(b);
-            result_16 = a_16 + b_16;
-            unload_int32_16(result_16, result);
+            a += steps;
+            b += steps;
+            result += steps;
       }
    } else if (steps == 8) {
-        int32_8 a_8;
-        int32_8 b_8;
-        int32_8 result_8;
+        __m256i a_8;
+        __m256i b_8;
+        __m256i result_8;

-        for (i = 0; i <= size - steps; i += steps) {
-            ++a;
-            ++b;
-            ++result;
+        for (; i <= size - steps; i += steps) {
+            a_8 = _mm256_loadu_epi32(a);
+            b_8 = _mm256_loadu_epi32(b);
+            result_8 = _mm256_add_epi32(a_8, b_8);
+            _mm256_store_si256((__m256i *) result, result_8);

-            a_8 = load_int32_8(a);
-            b_8 = load_int32_8(b);
-            result_8 = a_8 + b_8;
-            unload_int32_8(result_8, result);
+            a += steps;
+            b += steps;
+            result += steps;
       }
    } else if (steps == 4) {
-        int32_4 a_4;
-        int32_4 b_4;
-        int32_4 result_4;
+        __m128i a_4;
+        __m128i b_4;
+        __m128i result_4;

-        for (i = 0; i <= size - steps; i += steps) {
-            ++a;
-            ++b;
-            ++result;
+        for (; i <= size - steps; i += steps) {
+            a_4 = _mm_loadu_epi32(a);
+            b_4 = _mm_loadu_epi32(b);
+            result_4 = _mm_add_epi32(a_4, b_4);
+            _mm_store_si128((__m128i *) result, result_4);

-            a_4 = load_int32_4(a);
-            b_4 = load_int32_4(b);
-            result_4 = a_4 + b_4;
-            unload_int32_4(result_4, result);
+            a += steps;
+            b += steps;
+            result += steps;
       }
    }

    for (; i < size; ++i) {
+        *result = *a + *b;
+
        ++a;
        ++b;
        ++result;
-
-        *result = *a + *b;
    }
 }

@ -1355,64 +1422,64 @@ void simd_add(const int32* a, const f32* b, f32* result, int size, int steps)
    int i = 0;

    if (steps == 16) {
-        int32_16 a_16;
-        f32_16 af_16;
-        f32_16 b_16;
-        f32_16 result_16;
+        __m512i a_16;
+        __m512 af_16;
+        __m512 b_16;
+        __m512 result_16;

-        for (i = 0; i <= size - steps; i += steps) {
-            ++a;
-            ++b;
-            ++result;
+        for (; i <= size - steps; i += steps) {
+            a_16 = _mm512_loadu_epi32(a);
+            af_16 = _mm512_cvtepi32_ps(a_16);
+            b_16 = _mm512_loadu_ps(b);
+            result_16 = _mm512_add_ps(af_16, b_16);
+            _mm512_store_ps(result, result_16);

-            a_16 = load_int32_16(a);
-            af_16 = int32_16_to_f32_16(a_16);
-            b_16 = load_f32_16(b);
-            result_16 = af_16 + b_16;
-            unload_f32_16(result_16, result);
+            a += steps;
+            b += steps;
+            result += steps;
       }
    } else if (steps == 8) {
-        int32_8 a_8;
-        f32_8 af_8;
-        f32_8 b_8;
-        f32_8 result_8;
+        __m256i a_8;
+        __m256 af_8;
+        __m256 b_8;
+        __m256 result_8;

-        for (i = 0; i <= size - steps; i += steps) {
-            ++a;
-            ++b;
-            ++result;
+        for (; i <= size - steps; i += steps) {
+            a_8 = _mm256_loadu_epi32(a);
+            af_8 = _mm256_cvtepi32_ps(a_8);
+            b_8 = _mm256_loadu_ps(b);
+            result_8 = _mm256_add_ps(af_8, b_8);
+            _mm256_store_ps(result, result_8);

-            a_8 = load_int32_8(a);
-            af_8 = int32_8_to_f32_8(a_8);
-            b_8 = load_f32_8(b);
-            result_8 = af_8 + b_8;
-            unload_f32_8(result_8, result);
+            a += steps;
+            b += steps;
+            result += steps;
       }
    } else if (steps == 4) {
-        int32_4 a_4;
-        f32_4 af_4;
-        f32_4 b_4;
-        f32_4 result_4;
+        __m128i a_4;
+        __m128 af_4;
+        __m128 b_4;
+        __m128 result_4;

-        for (i = 0; i <= size - steps; i += steps) {
-            ++a;
-            ++b;
-            ++result;
+        for (; i <= size - steps; i += steps) {
+            a_4 = _mm_loadu_epi32(a);
+            af_4 = _mm_cvtepi32_ps(a_4);
+            b_4 = _mm_loadu_ps(b);
+            result_4 = _mm_add_ps(af_4, b_4);
+            _mm_store_ps(result, result_4);

-            a_4 = load_int32_4(a);
-            af_4 = int32_4_to_f32_4(a_4);
-            b_4 = load_f32_4(b);
-            result_4 = af_4 + b_4;
-            unload_f32_4(result_4, result);
+            a += steps;
+            b += steps;
+            result += steps;
       }
    }

    for (; i < size; ++i) {
+        *result = *a + *b;
+
        ++a;
        ++b;
        ++result;
-
-        *result = *a + *b;
    }
 }

@ -1422,136 +1489,73 @@ void simd_add(const int32* a, const f32* b, int32* result, int size, int steps)
    int i = 0;

    if (steps == 16) {
-        int32_16 a_16;
-        f32_16 af_16;
-        f32_16 b_16;
-        f32_16 result_16;
-        int32_16 resulti_16;
+        __m512i a_16;
+        __m512 af_16;
+        __m512 b_16;
+        __m512 result_16;
+        __m512i resulti_16;

-        for (i = 0; i <= size - steps; i += steps) {
-            ++a;
-            ++b;
-            ++result;
+        for (; i <= size - steps; i += steps) {
+            a_16 = _mm512_loadu_epi32(a);
+            af_16 = _mm512_cvtepi32_ps(a_16);
+            b_16 = _mm512_loadu_ps(b);
+            result_16 = _mm512_add_ps(af_16, b_16);
+            resulti_16 = _mm512_cvtps_epi32(result_16);
+            _mm512_store_epi32(result, resulti_16);

-            a_16 = load_int32_16(a);
-            af_16 = int32_16_to_f32_16(a_16);
-            b_16 = load_f32_16(b);
-            result_16 = af_16 + b_16;
-            resulti_16 = f32_16_to_int32_16(result_16);
-            unload_int32_16(resulti_16, result);
+            a += steps;
+            b += steps;
+            result += steps;
       }
    } else if (steps == 8) {
-        int32_8 a_8;
-        f32_8 af_8;
-        f32_8 b_8;
-        f32_8 result_8;
-        int32_8 resulti_8;
+        __m256i a_8;
+        __m256 af_8;
+        __m256 b_8;
+        __m256 result_8;
+        __m256i resulti_8;

-        for (i = 0; i <= size - steps; i += steps) {
-            ++a;
-            ++b;
-            ++result;
+        for (; i <= size - steps; i += steps) {
+            a_8 = _mm256_loadu_epi32(a);
+            af_8 = _mm256_cvtepi32_ps(a_8);
+            b_8 = _mm256_loadu_ps(b);
+            result_8 = _mm256_add_ps(af_8, b_8);
+            resulti_8 = _mm256_cvtps_epi32(result_8);
+            _mm256_store_si256((__m256i *) result, resulti_8);

-            a_8 = load_int32_8(a);
-            af_8 = int32_8_to_f32_8(a_8);
-            b_8 = load_f32_8(b);
-            result_8 = af_8 + b_8;
-            resulti_8 = f32_8_to_int32_8(result_8);
-            unload_int32_8(resulti_8, result);
+            a += steps;
+            b += steps;
+            result += steps;
       }
    } else if (steps == 4) {
-        int32_4 a_4;
-        f32_4 af_4;
-        f32_4 b_4;
-        f32_4 result_4;
-        int32_4 resulti_4;
+        __m128i a_4;
+        __m128 af_4;
+        __m128 b_4;
+        __m128 result_4;
+        __m128i resulti_4;

-        for (i = 0; i <= size - steps; i += steps) {
-            ++a;
-            ++b;
-            ++result;
+        for (; i <= size - steps; i += steps) {
+            a_4 = _mm_loadu_epi32(a);
+            af_4 = _mm_cvtepi32_ps(a_4);
+            b_4 = _mm_loadu_ps(b);
+            result_4 = _mm_add_ps(af_4, b_4);
+            resulti_4 = _mm_cvtps_epi32(result_4);
+            _mm_store_si128((__m128i *) result, resulti_4);

-            a_4 = load_int32_4(a);
-            af_4 = int32_4_to_f32_4(a_4);
-            b_4 = load_f32_4(b);
-            result_4 = af_4 + b_4;
-            resulti_4 = f32_4_to_int32_4(result_4);
-            unload_int32_4(resulti_4, result);
+            a += steps;
+            b += steps;
+            result += steps;
       }
    }

    for (; i < size; ++i) {
+        *result = (int32) (*a + *b);
+
        ++a;
        ++b;
        ++result;
-
-        *result = *a + *b;
    }
 }

-inline
-void int32_4_add(const int32* a, const int32* b, int32* result)
-{
-    int32_4 a_4 = load_int32_4(a);
-    int32_4 b_4 = load_int32_4(b);
-    int32_4 result_4 = a_4 + b_4;
-
-    unload_int32_4(result_4, result);
-}
-
-inline
-void int32_8_add(const int32* a, const int32* b, int32* result)
-{
-    int32_8 a_8 = load_int32_8(a);
-    int32_8 b_8 = load_int32_8(b);
-    int32_8 result_8 = a_8 + b_8;
-
-    unload_int32_8(result_8, result);
-}
-
-inline
-void int32_16_add(const int32* a, const int32* b, int32* result)
-{
-    int32_16 a_16 = load_int32_16(a);
-    int32_16 b_16 = load_int32_16(b);
-    int32_16 result_16 = a_16 + b_16;
-
-    unload_int32_16(result_16, result);
-}
-
-inline
-void int32_4_add(const int32* a, const f32* b, f32* result)
-{
-    int32_4 a_4 = load_int32_4(a);
-    f32_4 af_4 = int32_4_to_f32_4(a_4);
-    f32_4 b_4 = load_f32_4(b);
-    f32_4 result_4 = af_4 + b_4;
-
-    unload_f32_4(result_4, result);
-}
-
-inline
-void int32_8_add(const int32* a, const f32* b, f32* result)
-{
-    int32_8 a_8 = load_int32_8(a);
-    f32_8 af_8 = int32_8_to_f32_8(a_8);
-    f32_8 b_8 = load_f32_8(b);
-    f32_8 result_8 = af_8 + b_8;
-
-    unload_f32_8(result_8, result);
-}
-
-inline
-void int32_16_add(const int32* a, const f32* b, f32* result)
-{
-    int32_16 a_16 = load_int32_16(a);
-    f32_16 af_16 = int32_16_to_f32_16(a_16);
-    f32_16 b_16 = load_f32_16(b);
-    f32_16 result_16 = af_16 + b_16;
-
-    unload_f32_16(result_16, result);
-}
-
 // WARNING: only works with SSE4.2
 // WARNING: incl. \0 both strings must be <= 16
 bool simd_str_compare(const char* str1, const char* str2) {
@ -1561,6 +1565,4 @@ bool simd_str_compare(const char* str1, const char* str2) {
    return _mm_cmpistrc(s1, s2, _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_EACH) == 0;
 }

-// @todo add more operations like the one above "int32_4_mult()"
-
 #endif
--- a/stdlib/simd/SIMD_I8.h
+++ b/stdlib/simd/SIMD_I8.h
@ -13,6 +13,8 @@
 #include <xmmintrin.h>

 #include "../Types.h"
+#include "SIMD_F32.h"
+#include "SIMD_I32.h"

 struct int8_16 {
    union {
@ -156,6 +158,33 @@ inline int8_64 init_value_int8_64(int8 value)
    return simd;
 }

+inline
+f32_4 int8_16_to_f32_4(int8_16 a)
+{
+    f32_4 result;
+    result.s = _mm_cvtepi32_ps(a.s);
+
+    return result;
+}
+
+inline
+f32_8 int8_16_to_f32_8(int8_16 a)
+{
+    f32_8 result;
+    result.s = _mm256_cvtepi32_ps(_mm256_cvtepu8_epi32(a.s));
+
+    return result;
+}
+
+inline
+f32_16 int8_16_to_f32_16(int8_16 a)
+{
+    f32_16 result;
+    result.s = _mm512_cvtepi32_ps(_mm512_cvtepu8_epi32(a.s));
+
+    return result;
+}
+
 inline int8_16 operator+(int8_16 a, int8_16 b)
 {
    int8_16 simd;
@ -796,188 +825,32 @@ inline bool all_false(int8_64 a)
 // @todo from down here we can optimize some of the code by NOT using the wrappers
 //      the code is self contained and we could use te intrinsic functions directly

+/*
 inline
-void simd_mult(const int8* a, const int8* b, int8* result, int size, int steps)
+f32 simd_mult(const int8* a, f32 b, int size, int steps)
 {
-    int i = 0;
-
    if (steps == 16) {
-        int8_64 a_16;
-        int8_64 b_16;
-        int8_64 result_16;
+        __m512i a_16 = _mm512_loadu_epi8(a);
+        __m512 af_16 = _mm512_cvtepi32_ps(a_16);
+        __m512 b_16 = _mm512_set1_ps(b);

-        for (i = 0; i <= size - steps; i += steps) {
-            ++a;
-            ++b;
-            ++result;
-
-            a_16 = load_int8_64(a);
-            b_16 = load_int8_64(b);
-            result_16 = a_16 * b_16;
-            unload_int8_64(result_16, result);
-       }
+        __m512 result = _mm512_mul_ps(af_16, b_16);
    } else if (steps == 8) {
-        int8_32 a_8;
-        int8_32 b_8;
-        int8_32 result_8;
+        __m256i a_8 = _mm256_loadu_epi8(a);
+        __m256 af_8 = _mm256_cvtepi32_ps(a_8);
+        __m256 b_8 = _mm256_set1_ps(b);

-        for (i = 0; i <= size - steps; i += steps) {
-            ++a;
-            ++b;
-            ++result;
-
-            a_8 = load_int8_32(a);
-            b_8 = load_int8_32(b);
-            result_8 = a_8 * b_8;
-            unload_int8_32(result_8, result);
-       }
+        __m256 result = _mm256_mul_ps(af_8, b_8);
    } else if (steps == 4) {
-        int8_16 a_4;
-        int8_16 b_4;
-        int8_16 result_4;
+        __m128i a_4 = _mm_loadu_epi8(a);
+        __m128 af_4 = _mm_cvtepi32_ps(a_4);
+        __m128 b_4 = _mm_set1_ps(b);

-        for (i = 0; i <= size - steps; i += steps) {
-            ++a;
-            ++b;
-            ++result;
+        __m128 result = _mm_mul_ps(af_4, b_4);
+    } else {

-            a_4 = load_int8_16(a);
-            b_4 = load_int8_16(b);
-            result_4 = a_4 * b_4;
-            unload_int8_16(result_4, result);
-       }
-    }
-
-    for (; i < size; ++i) {
-        ++a;
-        ++b;
-        ++result;
-
-        *result = *a * *b;
    }
 }
-
-inline
-void int8_16_mult(const int8* a, const int8* b, int8* result)
-{
-    int8_16 a_4 = load_int8_16(a);
-    int8_16 b_4 = load_int8_16(b);
-    int8_16 result_4 = a_4 * b_4;
-
-    unload_int8_16(result_4, result);
-}
-
-inline
-void int8_32_mult(const int8* a, const int8* b, int8* result)
-{
-    int8_32 a_8 = load_int8_32(a);
-    int8_32 b_8 = load_int8_32(b);
-    int8_32 result_8 = a_8 * b_8;
-
-    unload_int8_32(result_8, result);
-}
-
-inline
-void int8_64_mult(const int8* a, const int8* b, int8* result)
-{
-    int8_64 a_16 = load_int8_64(a);
-    int8_64 b_16 = load_int8_64(b);
-    int8_64 result_16 = a_16 * b_16;
-
-    unload_int8_64(result_16, result);
-}
-
-inline
-void simd_add(const int8* a, const int8* b, int8* result, int size, int steps)
-{
-    int i = 0;
-
-    if (steps == 16) {
-        int8_64 a_16;
-        int8_64 b_16;
-        int8_64 result_16;
-
-        for (i = 0; i <= size - steps; i += steps) {
-            ++a;
-            ++b;
-            ++result;
-
-            a_16 = load_int8_64(a);
-            b_16 = load_int8_64(b);
-            result_16 = a_16 + b_16;
-            unload_int8_64(result_16, result);
-       }
-    } else if (steps == 8) {
-        int8_32 a_8;
-        int8_32 b_8;
-        int8_32 result_8;
-
-        for (i = 0; i <= size - steps; i += steps) {
-            ++a;
-            ++b;
-            ++result;
-
-            a_8 = load_int8_32(a);
-            b_8 = load_int8_32(b);
-            result_8 = a_8 + b_8;
-            unload_int8_32(result_8, result);
-       }
-    } else if (steps == 4) {
-        int8_16 a_4;
-        int8_16 b_4;
-        int8_16 result_4;
-
-        for (i = 0; i <= size - steps; i += steps) {
-            ++a;
-            ++b;
-            ++result;
-
-            a_4 = load_int8_16(a);
-            b_4 = load_int8_16(b);
-            result_4 = a_4 + b_4;
-            unload_int8_16(result_4, result);
-       }
-    }
-
-    for (; i < size; ++i) {
-        ++a;
-        ++b;
-        ++result;
-
-        *result = *a + *b;
-    }
-}
-
-inline
-void int8_16_add(const int8* a, const int8* b, int8* result)
-{
-    int8_16 a_4 = load_int8_16(a);
-    int8_16 b_4 = load_int8_16(b);
-    int8_16 result_4 = a_4 + b_4;
-
-    unload_int8_16(result_4, result);
-}
-
-inline
-void int8_32_add(const int8* a, const int8* b, int8* result)
-{
-    int8_32 a_8 = load_int8_32(a);
-    int8_32 b_8 = load_int8_32(b);
-    int8_32 result_8 = a_8 + b_8;
-
-    unload_int8_32(result_8, result);
-}
-
-inline
-void int8_64_add(const int8* a, const int8* b, int8* result)
-{
-    int8_64 a_16 = load_int8_64(a);
-    int8_64 b_16 = load_int8_64(b);
-    int8_64 result_16 = a_16 + b_16;
-
-    unload_int8_64(result_16, result);
-}
-
-// @todo add more operations like the one above "int8_16_mult()"
+*/

 #endif
--- a/stdlib/simd/SIMD_SVML.h
+++ b/stdlib/simd/SIMD_SVML.h
@ -0,0 +1,166 @@
+/**
+ * Jingga
+ *
+ * @copyright Jingga
+ * @license   OMS License 2.0
+ * @version   1.0.0
+ * @link      https://jingga.app
+ */
+#ifndef TOS_STDLIB_SIMD_SVML_H
+#define TOS_STDLIB_SIMD_SVML_H
+
+#include <immintrin.h>
+#include <xmmintrin.h>
+
+#if __linux__
+    #include "math.h"
+
+    inline __m128i _mm_div_epi32(__m128i a, __m128i b) {
+        alignas(16) int32_t a_array[4], b_array[4], result[4];
+
+        _mm_storeu_si128((__m128i*)a_array, a);
+        _mm_storeu_si128((__m128i*)b_array, b);
+
+        for (int i = 0; i < 4; ++i) {
+            result[i] = a_array[i] / b_array[i];
+        }
+
+        return _mm_loadu_si128((__m128i*)result);
+    }
+
+    inline __m256i _mm256_div_epi32(__m256i a, __m256i b) {
+        alignas(32) int32_t a_array[8], b_array[8], result[8];
+
+        _mm256_storeu_si256((__m256i*)a_array, a);
+        _mm256_storeu_si256((__m256i*)b_array, b);
+
+        for (int i = 0; i < 8; ++i) {
+            result[i] = a_array[i] / b_array[i];
+        }
+
+        return _mm256_loadu_si256((__m256i*)result);
+    }
+
+    inline __m512i _mm512_div_epi32(__m512i a, __m512i b) {
+        alignas(64) int32_t a_array[16], b_array[16], result[16];
+
+        _mm512_storeu_si512((__m512i*)a_array, a);
+        _mm512_storeu_si512((__m512i*)b_array, b);
+
+        for (int i = 0; i < 16; ++i) {
+            result[i] = a_array[i] / b_array[i];
+        }
+
+        return _mm512_loadu_si512((__m512i*)result);
+    }
+
+    inline __m128 _mm_sin_ps(__m128 a) {
+        alignas(16) float a_array[4], result[4];
+        _mm_storeu_ps(a_array, a);
+        for (int i = 0; i < 4; ++i) {
+            result[i] = sinf(a_array[i]);
+        }
+        return _mm_loadu_ps(result);
+    }
+
+    inline __m128 _mm_cos_ps(__m128 a) {
+        alignas(16) float a_array[4], result[4];
+        _mm_storeu_ps(a_array, a);
+        for (int i = 0; i < 4; ++i) {
+            result[i] = cosf(a_array[i]);
+        }
+        return _mm_loadu_ps(result);
+    }
+
+    inline __m128 _mm_asin_ps(__m128 a) {
+        alignas(16) float a_array[4], result[4];
+        _mm_storeu_ps(a_array, a);
+        for (int i = 0; i < 4; ++i) {
+            result[i] = asinf(a_array[i]);
+        }
+        return _mm_loadu_ps(result);
+    }
+
+    inline __m128 _mm_acos_ps(__m128 a) {
+        alignas(16) float a_array[4], result[4];
+        _mm_storeu_ps(a_array, a);
+        for (int i = 0; i < 4; ++i) {
+            result[i] = acosf(a_array[i]);
+        }
+        return _mm_loadu_ps(result);
+    }
+
+    inline __m256 _mm256_sin_ps(__m256 a) {
+        alignas(32) float a_array[8], result[8];
+        _mm256_storeu_ps(a_array, a);
+        for (int i = 0; i < 8; ++i) {
+            result[i] = sinf(a_array[i]);
+        }
+        return _mm256_loadu_ps(result);
+    }
+
+    inline __m256 _mm256_cos_ps(__m256 a) {
+        alignas(32) float a_array[8], result[8];
+        _mm256_storeu_ps(a_array, a);
+        for (int i = 0; i < 8; ++i) {
+            result[i] = cosf(a_array[i]);
+        }
+        return _mm256_loadu_ps(result);
+    }
+
+    inline __m256 _mm256_asin_ps(__m256 a) {
+        alignas(32) float a_array[8], result[8];
+        _mm256_storeu_ps(a_array, a);
+        for (int i = 0; i < 8; ++i) {
+            result[i] = asinf(a_array[i]);
+        }
+        return _mm256_loadu_ps(result);
+    }
+
+    inline __m256 _mm256_acos_ps(__m256 a) {
+        alignas(32) float a_array[8], result[8];
+        _mm256_storeu_ps(a_array, a);
+        for (int i = 0; i < 16; ++i) {
+            result[i] = acosf(a_array[i]);
+        }
+        return _mm256_loadu_ps(result);
+    }
+
+    inline __m512 _mm512_sin_ps(__m512 a) {
+        alignas(64) float a_array[8], result[8];
+        _mm512_storeu_ps(a_array, a);
+        for (int i = 0; i < 16; ++i) {
+            result[i] = sinf(a_array[i]);
+        }
+        return _mm512_loadu_ps(result);
+    }
+
+    inline __m512 _mm512_cos_ps(__m512 a) {
+        alignas(64) float a_array[8], result[8];
+        _mm512_storeu_ps(a_array, a);
+        for (int i = 0; i < 16; ++i) {
+            result[i] = cosf(a_array[i]);
+        }
+        return _mm512_loadu_ps(result);
+    }
+
+    inline __m512 _mm512_asin_ps(__m512 a) {
+        alignas(64) float a_array[8], result[8];
+        _mm512_storeu_ps(a_array, a);
+        for (int i = 0; i < 16; ++i) {
+            result[i] = asinf(a_array[i]);
+        }
+        return _mm512_loadu_ps(result);
+    }
+
+    inline __m512 _mm512_acos_ps(__m512 a) {
+        alignas(64) float a_array[16], result[16];
+        _mm512_storeu_ps(a_array, a);
+        for (int i = 0; i < 16; ++i) {
+            result[i] = acosf(a_array[i]);
+        }
+        return _mm512_loadu_ps(result);
+    }
+#endif
+
+#endif
--- a/utils/BufferMemory.h
+++ b/utils/BufferMemory.h
@ -1,125 +0,0 @@
-/**
- * Jingga
- *
- * @copyright Jingga
- * @license   OMS License 2.0
- * @version   1.0.0
- * @link      https://jingga.app
- */
-#ifndef TOS_UTILS_BUFFER_MEMORY_H
-#define TOS_UTILS_BUFFER_MEMORY_H
-
-#include "../stdlib/Types.h"
-#include "MathUtils.h"
-
-struct BufferMemory {
-    byte* memory;
-
-    uint64 count;
-    uint64 element_size;
-    uint64 last_pos = -1;
-
-    // length = count
-    // free describes which locations are used and which are free
-    // @performance using uint32 or even uint64 might be faster
-    //      since we can check for free elements faster if the memory is almost filled
-    //      at the moment we can only check 8 elements at a time
-    byte* free;
-};
-
-inline
-byte* buffer_element_get(BufferMemory* buf, uint64 element)
-{
-    return buf->memory + element * buf->element_size;
-}
-
-int64 buffer_reserve(BufferMemory* buf)
-{
-    int byte_index = (buf->last_pos + 1) / 8;
-    int bit_index;
-
-    int64 free_element = -1;
-    byte mask;
-
-    int i = 0;
-    int max_loop = buf->count * buf->element_size;
-
-    while (free_element < 0 && i < max_loop) {
-        if (buf->free[byte_index] == 0xFF) {
-            ++i;
-            ++byte_index;
-
-            continue;
-        }
-
-        // This always breaks!
-        // @performance on the first iteration through the buffer we could optimize this by starting at a different bit_index
-        // because we know that the bit_index is based on last_pos
-        for (bit_index = 0; bit_index < 8; ++bit_index) {
-            mask = 1 << bit_index;
-            if ((buf->free[byte_index] & mask) == 0) {
-                free_element = byte_index * 8 + bit_index;
-                break;
-            }
-        }
-    }
-
-    if (free_element < 0) {
-        return -1;
-    }
-
-    buf->free[byte_index] |= (1 << bit_index);
-
-    return byte_index * 8 + bit_index;
-}
-
-byte* buffer_find_free(BufferMemory* buf, bool zeroed = false)
-{
-    int byte_index = (buf->last_pos + 1) / 8;
-    int bit_index;
-
-    int64 free_element = -1;
-    byte mask;
-
-    int i = 0;
-    int max_loop = buf->count * buf->element_size;
-
-    while (free_element < 0 && i < max_loop) {
-        if (buf->free[byte_index] == 0xFF) {
-            ++i;
-            ++byte_index;
-
-            continue;
-        }
-
-        // This always breaks!
-        // @performance on the first iteration through the buffer we could optimize this by starting at a different bit_index
-        // because we know that the bit_index is based on last_pos
-        for (bit_index = 0; bit_index < 8; ++bit_index) {
-            mask = 1 << bit_index;
-            if ((buf->free[byte_index] & mask) == 0) {
-                free_element = byte_index * 8 + bit_index;
-                break;
-            }
-        }
-    }
-
-    if (free_element < 0) {
-        return NULL;
-    }
-
-    buf->free[byte_index] |= (1 << bit_index);
-
-    return buf->memory + free_element * buf->element_size;
-}
-
-inline
-void buffer_element_free(BufferMemory* buf, uint64 element)
-{
-    int byte_index = element / 8;
-    int bit_index = element % 8;
-
-    buf->free[byte_index] &= ~(1 << bit_index);
-}
-
-#endif
--- a/utils/EndianUtils.h
+++ b/utils/EndianUtils.h
@ -25,49 +25,48 @@ inline
 bool is_little_endian()
 {
    uint32 num = 1;
-
    return ((int32) (*(char *) & num)) == 1;
 }

 inline
-void endian_swap(uint16 *val)
+uint16 endian_swap(const uint16* val)
 {
    uint16 v = *val;
-    *val = ((v << 8) | (v >> 8));
+    return ((v << 8) | (v >> 8));
 }

 inline
-void endian_swap(int16 *val)
+int16 endian_swap(const int16* val)
 {
    uint16 v = (uint16) (*val);
-    *val = (int16) ((v << 8) | (v >> 8));
+    return (int16) ((v << 8) | (v >> 8));
 }

 inline
-void endian_swap(uint32 *val)
+uint32 endian_swap(const uint32* val)
 {
    uint32 v = *val;
-    *val = ((v << 24)
+    return ((v << 24)
        | ((v & 0xFF00) << 8)
        | ((v >> 8) & 0xFF00)
        | (v >> 24));
 }

 inline
-void endian_swap(int32 *val)
+int32 endian_swap(const int32* val)
 {
    uint32 v = (uint32) (*val);
-    *val = (int32) ((v << 24)
+    return (int32) ((v << 24)
        | ((v & 0xFF00) << 8)
        | ((v >> 8) & 0xFF00)
        | (v >> 24));
 }

 inline
-void endian_swap(uint64 *val)
+uint64 endian_swap(const uint64* val)
 {
    uint64 v = *val;
-    *val = ((v << 56)
+    return ((v << 56)
        | ((v & 0x000000000000FF00ULL) << 40)
        | ((v & 0x0000000000FF0000ULL) << 24)
        | ((v & 0x00000000FF000000ULL) << 8)
@ -78,10 +77,10 @@ void endian_swap(uint64 *val)
 }

 inline
-void endian_swap(int64 *val)
+int64 endian_swap(const int64* val)
 {
    uint64 v = (uint64) (*val);
-    *val = (int64) ((v << 56)
+    return (int64) ((v << 56)
        | ((v & 0x000000000000FF00ULL) << 40)
        | ((v & 0x0000000000FF0000ULL) << 24)
        | ((v & 0x00000000FF000000ULL) << 8)
@ -91,4 +90,18 @@ void endian_swap(int64 *val)
        | (v >> 56));
 }

+inline
+float endian_swap(const float* val)
+{
+    uint32* ival = (uint32 *) val;
+    return (float) endian_swap(ival);
+}
+
+inline
+double endian_swap(const double* val)
+{
+    uint64* ival = (uint64 *) val;
+    return (double) endian_swap(ival);
+}
+
 #endif
--- a/utils/MathUtils.h
+++ b/utils/MathUtils.h
@ -25,10 +25,6 @@
 #define OMS_RAD2DEG(angle) ((angle) * 180.0f / OMS_PI)
 #define ROUND_TO_NEAREST(a, b) (((a) + ((b) - 1)) & ~((b) - 1))

-#ifndef FLT_MIN
-    #define FLT_MIN 1.175494e-038
-#endif
-
 // @question Consider to implement table based sine wave + approximation if necessary
 // [-PI/2, PI/2]
 inline
@ -80,7 +76,7 @@ float atanf_approx(float x)
 inline
 float atan2f_approx(float y, float x)
 {
-    float abs_y = OMS_ABS(y) + FLT_MIN; // prevent division by zero
+    float abs_y = (float) (OMS_ABS(y) + 1.175494e-038); // prevent division by zero
    float angle;

    if (x >= 0.0f) {
--- a/utils/StringUtils.h
+++ b/utils/StringUtils.h
@ -18,7 +18,7 @@
 inline
 void wchar_to_char(const wchar_t* src, char* dest, int length = 0)
 {
-    char* temp = (char *) src;
+    char* temp = (char* ) src;
    size_t len = wcslen(src) * sizeof(wchar_t);

    if (length > 0 && length < len) {
@ -37,7 +37,7 @@ void wchar_to_char(const wchar_t* src, char* dest, int length = 0)
    *dest = '\0';
 }

-inline size_t str_count(const char *str, const char *substr)
+inline size_t str_count(const char* str, const char* substr)
 {
    size_t l1 = strlen(str);
    size_t l2 = strlen(substr);
@ -54,15 +54,15 @@ inline size_t str_count(const char *str, const char *substr)
    return count;
 }

-inline char *strsep(const char **sp, const char *sep)
+inline char* strsep(const char* *sp, const char* sep)
 {
-    char *p, *s;
+    char* p, *s;

    if (sp == NULL || *sp == NULL || **sp == '\0') {
        return (NULL);
    }

-    s = (char *) *sp;
+    s = (char* ) *sp;
    p = s + strcspn(s, sep);

    if (*p != '\0') {
@ -89,7 +89,7 @@ str_concat(
    *dst = '\0';
 }

-char *strtok(char *str, const char *delim, char **saveptr)
+char* strtok(char* str, const char* delim, char* *saveptr)
 {
    if (str == NULL) {
        str = *saveptr;
@ -99,8 +99,8 @@ char *strtok(char *str, const char *delim, char **saveptr)
        return NULL;
    }

-    char *token_start = str;
-    char *token_end   = strpbrk(token_start, delim);
+    char* token_start = str;
+    char* token_end   = strpbrk(token_start, delim);

    if (token_end == NULL) {
        *saveptr = NULL;
@ -139,7 +139,7 @@ char* format_number(size_t number, char* buffer, const char thousands = ',')
    return buffer;
 }

-char * format_number(int number, char* buffer, const char thousands = ',')
+char*  format_number(int number, char* buffer, const char thousands = ',')
 {
    int length = snprintf(buffer, 32, "%i", number);
    format_number_render(length, buffer, thousands);
@ -147,14 +147,14 @@ char * format_number(int number, char* buffer, const char thousands = ',')
    return buffer;
 }

-void create_const_name(const unsigned char *name, unsigned char* modified_name)
+void create_const_name(const unsigned char* name, unsigned char* modified_name)
 {
    // Print block
    if (name == NULL) {
        modified_name = NULL;
    } else {
        size_t i;
-        const size_t length = strlen((const char *) name);
+        const size_t length = strlen((const char* ) name);
        for (i = 0; i < length; ++i) {
            modified_name[i] = name[i] == ' ' ? '_' : (unsigned char) toupper(name[i]);
        }
@ -166,8 +166,8 @@ void create_const_name(const unsigned char *name, unsigned char* modified_name)
 /**
 * Custom implementation of strtok_r/strtok_s
 */
-char* strtok_(char *str, const char *delim, char **key) {
-    char *result;
+char* strtok_(char* str, const char* delim, char* *key) {
+    char* result;
    if (str == NULL) {
        str = *key;
    }
@ -189,4 +189,17 @@ char* strtok_(char *str, const char *delim, char **key) {
    return result;
 }

+bool str_ends_with(const char* str, const char* suffix) {
+    if (!str || !suffix)
+        return false;
+
+    size_t str_len = strlen(str);
+    size_t suffix_len = strlen(suffix);
+
+    if (suffix_len > str_len)
+        return false;
+
+    return strncmp(str + str_len - suffix_len, suffix, suffix_len) == 0;
+}
+
 #endif
--- a/utils/SystemInfo.h
+++ b/utils/SystemInfo.h
@ -36,6 +36,7 @@

 // @todo implement for arm?
 // @todo implement for linux?
+// @todo move to platform specifc files

 struct CpuCacheInfo {
    int level;
--- a/utils/TestUtils.h
+++ b/utils/TestUtils.h
@ -133,7 +133,7 @@ void profile_function(const char* func_name, void (*func)(void*), void* data, in

 #if DEBUG
    #define ASSERT_SIMPLE(a)                             \
-        if ((a) == false) {                              \
+        if (!(a)) {                                      \
            *(volatile int *)0 = 0;                      \
        }
 #else
--- a/utils/Utils.h
+++ b/utils/Utils.h
@ -11,7 +11,9 @@

 #include "../stdlib/Types.h"

-struct file_body {
+#define sizeof_array(a) (sizeof(a) / sizeof((a)[0]))
+
+struct FileBody {
    uint64 size = 0; // doesn't include null termination (same as strlen)
    byte* content;
 };
@ -20,21 +22,123 @@ global_persist uint32 fast_seed;
 #define FAST_RAND_MAX 32767

 inline
-uint32 fast_rand(void) {
+uint32 fast_rand1(void) {
    fast_seed = (214013 * fast_seed + 2531011);

    return (fast_seed >> 16) & 0x7FFF;
 }

-inline
-f32 fast_rand_percentage(void) {
-    return (f32) fast_rand() / (f32) FAST_RAND_MAX;
+uint32 fast_rand2(uint32* state) {
+    uint32 x = *state;
+
+    x ^= x << 13;
+    x ^= x >> 17;
+    x ^= x << 5;
+
+    *state = x;
+
+    return x;
 }

 inline
-bool is_bit_set(byte data, byte bit)
+f32 fast_rand_percentage(void) {
+    return (f32) fast_rand1() / (f32) FAST_RAND_MAX;
+}
+
+inline
+bool is_bit_set(byte data, int bit)
 {
-    return (data & (1 << bit)) == 0;
+    return data & (1 << bit);
+}
+
+inline
+bool is_bit_set(int data, int bit)
+{
+    return data & (1 << bit);
+}
+
+inline
+bool is_bit_set(uint32 data, int bit)
+{
+    return data & (1 << bit);
+}
+
+inline
+byte get_bits(byte data, int bits_to_read, int start_pos)
+{
+    byte mask = (1 << bits_to_read) - 1;
+    return (data >> (8 - start_pos - bits_to_read)) & mask;
+}
+
+inline
+uint32 get_bits(const byte* data, int bits_to_read, int start_pos)
+{
+    int byte_index = start_pos / 8;
+    int bit_offset = start_pos % 8;
+
+    uint32_t mask = (1 << bits_to_read) - 1;
+
+    uint32_t result = (data[byte_index] >> bit_offset);
+
+    if (bit_offset + bits_to_read > 8) {
+        result |= (data[byte_index + 1] << (8 - bit_offset));
+    }
+
+    result &= mask;
+
+    return result;
+}
+
+inline
+uint32 reverse_bits(uint32 data, uint32 count)
+{
+    uint32 reversed = 0;
+    for (uint32 i = 0; i <= (count / 2); ++i) {
+        uint32 inv = count - i - 1;
+        reversed |= ((data >> i) & 0x1) << inv;
+        reversed |= ((data >> inv) & 0x1) << i;
+    }
+
+    return reversed;
+}
+
+/**
+ * Picks n random elements from end and stores them in begin.
+ */
+inline
+void random_unique(int* array, int size) {
+    for (int i = size - 1; i > 0; --i) {
+        int j = rand() % (i + 1);
+
+        int temp = array[i];
+        array[i] = array[j];
+        array[j] = temp;
+    }
+}
+
+/**
+ * Gets random index based value probability
+ */
+int random_weighted_index(int* arr, int array_count)
+{
+    uint32 prob_sum = 0;
+    for (int i = 0; i < array_count; ++i) {
+        prob_sum += arr[i];
+    }
+
+    uint32 random_prob = rand() % (prob_sum + 1);
+    uint32 current_rarity = 0;
+    int item_rarity = array_count - 1;
+    for (int i = 0; i < array_count - 1; ++i) {
+        current_rarity += arr[i];
+
+        if (current_rarity < random_prob) {
+            item_rarity = i;
+            break;
+        }
+    }
+
+    return item_rarity;
 }

 #endif