From fe054ebb13cad5054ebc5b6411c691e803100b5b Mon Sep 17 00:00:00 2001 From: Dennis Eichhorn Date: Wed, 11 Dec 2024 21:03:47 +0100 Subject: [PATCH] implemented asset archives and more threading. kinda working --- asset/AssetArchive.h | 162 +++++++++++++++++----- asset/AssetManagementSystem.h | 36 ++++- asset/AssetType.h | 6 +- audio/Audio.cpp | 70 +++++++++- audio/Audio.h | 8 +- audio/AudioMixer.h | 132 ++++++++++++------ audio/AudioSetting.h | 13 +- audio/Wav.h | 43 +++--- camera/Camera.h | 38 +++-- compression/Huffman.h | 75 +++++----- compression/LZP.h | 26 ++-- compression/RLE.h | 6 +- font/Font.h | 63 ++++----- gpuapi/opengl/OpenglUtils.h | 45 +++--- gpuapi/opengl/ShaderUtils.h | 83 +++++++++++ image/Bitmap.h | 12 +- image/Image.cpp | 90 +++++++++++- image/Image.h | 14 +- image/Png.h | 4 +- image/Tga.h | 10 +- localization/Language.h | 80 +++++++---- log/Debug.cpp | 36 +++-- log/Debug.h | 2 +- log/Log.h | 4 +- math/matrix/MatrixFloat32.h | 4 + memory/Queue.h | 47 ++++++- memory/RingMemory.h | 14 +- memory/ThreadedQueue.h | 126 +++++++++++++---- memory/ThreadedRingMemory.h | 163 ++++++++++++++++++++++ object/Mesh.h | 60 ++++---- platform/linux/FileUtils.cpp | 64 ++++++++- platform/linux/{ => network}/Server.h | 10 +- platform/linux/{ => network}/Socket.h | 4 +- platform/linux/threading/Atomic.h | 191 ++++++++++++++++++++++++-- platform/linux/threading/Thread.h | 6 +- platform/win32/FileUtils.cpp | 123 +++++++++++------ platform/win32/Library.h | 1 + platform/win32/SystemInfo.cpp | 1 + platform/win32/Window.h | 12 +- platform/win32/audio/DirectSound.h | 8 +- platform/win32/audio/XAudio2.h | 2 +- platform/win32/input/HidInput.h | 2 +- platform/win32/{ => network}/Client.h | 10 +- platform/win32/{ => network}/Server.h | 8 +- platform/win32/{ => network}/Socket.h | 4 +- platform/win32/threading/Atomic.h | 187 ++++++++++++++++++++++++- platform/win32/threading/Semaphore.h | 8 ++ scene/SceneState.h | 23 ++++ stdlib/HashMap.h | 66 ++++----- stdlib/ThreadedHashMap.h | 28 ++-- stdlib/Types.h | 61 ++++++-- stdlib/simd/SIMD_I32.h | 1 - stdlib/simd/SIMD_SVML.h | 48 +++---- thread/ThreadJob.h | 17 ++- thread/ThreadPool.h | 185 ++++++++++++------------- ui/UITheme.h | 47 +++---- utils/MathUtils.h | 3 + utils/RandomUtils.h | 81 +++++++++++ utils/StringUtils.h | 16 ++- utils/Utils.h | 93 ++++--------- 60 files changed, 2052 insertions(+), 730 deletions(-) create mode 100644 memory/ThreadedRingMemory.h rename platform/linux/{ => network}/Server.h (91%) rename platform/linux/{ => network}/Socket.h (62%) rename platform/win32/{ => network}/Client.h (84%) rename platform/win32/{ => network}/Server.h (86%) rename platform/win32/{ => network}/Socket.h (62%) create mode 100644 scene/SceneState.h create mode 100644 utils/RandomUtils.h diff --git a/asset/AssetArchive.h b/asset/AssetArchive.h index 8183d5d..ddef886 100644 --- a/asset/AssetArchive.h +++ b/asset/AssetArchive.h @@ -16,6 +16,13 @@ #include "../stdlib/simd/SIMD_I32.h" #include "../memory/RingMemory.h" #include "../memory/BufferMemory.h" +#include "../image/Image.cpp" +#include "../object/Mesh.h" +#include "../object/Texture.h" +#include "../audio/Audio.cpp" +#include "../font/Font.h" +#include "../localization/Language.h" +#include "../ui/UITheme.h" #include "AssetManagementSystem.h" #if _WIN32 @@ -25,16 +32,21 @@ #include "../platform/win32/FileUtils.cpp" #endif +#define ASSET_ARCHIVE_VERSION 1 + struct AssetArchiveElement { - int32 type; + uint32 type; - int32 start; - int32 length; + uint32 start; + uint32 length; - int32 dependency_start; // actual index for asset_dependencies - int32 dependency_count; + uint32 dependency_start; // actual index for asset_dependencies + uint32 dependency_count; }; +// It is important to understand that for performance reasons the assets addresses are stored in an array +// This makes it very fast to access because there is only one indirection. +// On the other hand we can only find assets by their ID/location and not by name. struct AssetArchiveHeader { int32 version; @@ -49,7 +61,14 @@ struct AssetArchive { AssetArchiveHeader header; byte* data; // owner of the data - FileHandler fd; + FileHandle fd; + FileHandle fd_async; + + // @performance We still need to implement the loading with this and then profile it to see if it is faster. + // If not remove + MMFHandle mmf; + + int32 asset_type_map[ASSET_TYPE_SIZE]; }; // Calculates how large the header memory has to be to hold all its information @@ -91,7 +110,9 @@ void asset_archive_header_load(AssetArchiveHeader* header, byte* data, int32 ste steps ); - header->asset_dependencies = (int32 *) ((byte *) header->asset_element + header->asset_count * sizeof(AssetArchiveElement)); + if (header->asset_dependency_count) { + header->asset_dependencies = (int32 *) ((byte *) header->asset_element + header->asset_count * sizeof(AssetArchiveElement)); + } memcpy(header->asset_dependencies, data, header->asset_dependency_count * sizeof(int32)); SWAP_ENDIAN_LITTLE_SIMD( @@ -110,17 +131,22 @@ AssetArchiveElement* asset_archive_element_find(const AssetArchive* archive, int void asset_archive_load(AssetArchive* archive, const char* path, BufferMemory* buf, RingMemory* ring, int32 steps = 8) { - // Get file handle - archive->fd = file_read_async_handle(path); + archive->fd = file_read_handle(path); if (!archive->fd) { return; } + archive->fd_async = file_read_async_handle(path); + if (!archive->fd_async) { + return; + } + archive->mmf = file_mmf_handle(archive->fd_async); + FileBody file; file.size = 64; // Find header size - file.content = ring_get_memory(ring, file.size); + file.content = ring_get_memory(ring, file.size, 4); file_read(archive->fd, &file, 0, file.size); file.size = asset_archive_header_size(archive, file.content); @@ -134,33 +160,50 @@ void asset_archive_load(AssetArchive* archive, const char* path, BufferMemory* b 4 ); + archive->header.asset_element = (AssetArchiveElement *) archive->data; + // Read entire header file.content = ring_get_memory(ring, file.size); file_read(archive->fd, &file, 0, file.size); asset_archive_header_load(&archive->header, file.content, steps); } -// @performance This can probably be done much faster by handling the loading of dependencies faster -void asset_archive_asset_load(const AssetArchive* archive, int32 id, AssetManagementSystem* ams_array, RingMemory* ring) +// @question Do we want to allow a callback function? +// Very often we want to do something with the data (e.g. upload it to the gpu) +// Maybe we could just accept a int value which we set atomically as a flag that the asset is complete? +// this way we can check much faster if we can work with this data from the caller?! +// The only problem is that we need to pass the pointer to this int in the thrd_queue since we queue the files to load there +Asset* asset_archive_asset_load(const AssetArchive* archive, int32 id, AssetManagementSystem* ams_array, RingMemory* ring) { - AssetArchiveElement* element = &archive->header.asset_element[id]; - AssetManagementSystem* ams = element->type > 0 - ? &ams_array[element->type] - : &ams_array[0]; + // @todo add calculation from element->type to ams index - uint64 hash = hash_djb2((const char *) &id); + AssetArchiveElement* element = &archive->header.asset_element[id]; + AssetManagementSystem* ams = &ams_array[archive->asset_type_map[element->type]]; + + // @todo This is a little bit stupid, reconsider + char id_str[5]; + id_str[4] = '\0'; + *((int32 *) id_str) = id; + + uint64 hash = hash_djb2(id_str); + + Asset* asset; // @performance I think we could optimize the ams_reserver_asset in a way so we don't have to lock it the entire time pthread_mutex_lock(&ams->mutex); - // @bug this is not how this function works - if (hashmap_get_entry(&ams->hash_map, (const char *) &id, hash)) { + // @bug If we have multiple archive files the ids also repeat, which is not possible for the hash map + // Possible solution: also store a string name for every asset. This would add HASH_MAP_MAX_KEY_LENGTH bytes of data to every asset though (see hash map key size = 32) + + asset = ams_get_asset(ams, id_str, hash); + if (asset) { + // Asset already loaded pthread_mutex_unlock(&ams->mutex); + + return asset; } if (element->type == 0) { - // @bug We can't just do this, this won't work. Check if we might want to change the asset management directly to hash indices or at least int values - Asset* asset = ams_reserve_asset(ams, (const char *) &id, ams_calculate_chunks(ams, element->length)); - asset->self = (byte *) (asset + 1); + asset = ams_reserve_asset(ams, id_str, ams_calculate_chunks(ams, element->length)); FileBody file = {}; file.content = asset->self; @@ -168,34 +211,83 @@ void asset_archive_asset_load(const AssetArchive* archive, int32 id, AssetManage // We are directly reading into the correct destination file_read(archive->fd, &file, element->start, element->length); } else { + // @performance In this case we may want to check if memory mapped regions are better. + // 1. I don't think they work together with async loading + // 2. Profile which one is faster + // 3. The big benefit of mmf would be that we can avoid one memcpy and directly load the data into the object + // 4. Of course the disadvantage would be to no longer have async loading + // We are reading into temp memory since we have to perform transformations on the data FileBodyAsync file = {}; - file_read_async(archive->fd, &file, element->start, element->length, ring); + file_read_async(archive->fd_async, &file, element->start, element->length, ring); // This happens while the file system loads the data - Asset* asset = ams_reserve_asset(ams, (const char *) &id, ams_calculate_chunks(ams, element->length)); - asset->self = (byte *) (asset + 1); + asset = ams_reserve_asset(ams, id_str, ams_calculate_chunks(ams, element->length)); + asset->is_ram = true; - byte* data = ring_get_memory(ring, element->length, 64); - size_t data_size = 0; - - // @todo create platform wrapper - GetOverlappedResult(archive->fd, &file.ov, NULL, true); + file_async_wait(archive->fd_async, &file.ov, true); switch (element->type) { - case 1: { + case ASSET_TYPE_IMAGE: { + // @todo Do we really want to store textures in the asset management system or only images? + // If it is only images then we need to somehow also manage textures + Texture* texture = (Texture *) asset->self; + texture->image.pixels = (byte *) (texture + 1); + + image_from_data(file.content, &texture->image); + + asset->vram_size = texture->image.pixel_count * image_pixel_size_from_type(texture->image.pixel_type); + asset->ram_size = asset->vram_size + sizeof(Texture); + + #if OPENGL + // @bug I think order_rows has the wrong value + if (texture->image.order_rows == IMAGE_ROW_ORDER_TOP_TO_BOTTOM) { + image_flip_vertical(ring, &texture->image); + texture->image.order_rows = IMAGE_ROW_ORDER_BOTTOM_TO_TOP; + } + #endif + } break; + case ASSET_TYPE_AUDIO: { + Audio* audio = (Audio *) asset->self; + audio->data = (byte *) (audio + 1); + + audio_from_data(file.content, audio); + } break; + case ASSET_TYPE_OBJ: { + Mesh* mesh = (Mesh *) asset->self; + mesh->data = (byte *) (mesh + 1); + + mesh_from_data(file.content, mesh); + } break; + case ASSET_TYPE_LANGUAGE: { + Language* language = (Language *) asset->self; + language->data = (byte *) (language + 1); + + language_from_data(file.content, language); + } break; + case ASSET_TYPE_FONT: { + Font* font = (Font *) asset->self; + font->glyphs = (Glyph *) (font + 1); + + font_from_data(file.content, font); + } break; + case ASSET_TYPE_THEME: { + UIThemeStyle* theme = (UIThemeStyle *) asset->self; + theme->data = (byte *) (theme + 1); + + theme_from_data(file.content, theme); } break; default: { } } - - memcpy(asset->self, data, data_size); } pthread_mutex_unlock(&ams->mutex); - // @performance maybe do in worker threads? - for (int32 i = 0; i < element->dependency_count; ++i) { + // @performance maybe do in worker threads? This just feels very slow + for (uint32 i = 0; i < element->dependency_count; ++i) { asset_archive_asset_load(archive, id, ams, ring); } + + return asset; } #endif \ No newline at end of file diff --git a/asset/AssetManagementSystem.h b/asset/AssetManagementSystem.h index bc39f91..f251feb 100644 --- a/asset/AssetManagementSystem.h +++ b/asset/AssetManagementSystem.h @@ -34,9 +34,11 @@ struct AssetManagementSystem { // The indices of asset_memory and asset_data_memory are always linked // General asset memory + // Fixed chunk size of sizeof(Asset) ChunkMemory asset_memory; // Actual asset data + // Chunk size defined during initialization ChunkMemory asset_data_memory; // @performance Do we really need the linked list, the ChunkMemory should allow us to do some smart stuff @@ -44,7 +46,11 @@ struct AssetManagementSystem { Asset* last; // @question do we want to create an extra threaded version? Or a combined one, like we have right now. + // @question Do we want to add a mutex to assets. This way we don't have to lock the entire ams. pthread_mutex_t mutex; + + // @bug We probably also need a overhead value. + // In some cases we need more data than our normal data (see texture, it contains image + texture) }; void ams_create(AssetManagementSystem* ams, BufferMemory* buf, int32 chunk_size, int32 count) @@ -201,9 +207,9 @@ Asset* ams_get_asset(AssetManagementSystem* ams, const char* key) } inline -Asset* ams_get_asset(AssetManagementSystem* ams, const char* key, uint64 index) +Asset* ams_get_asset(AssetManagementSystem* ams, const char* key, uint64 hash) { - HashEntry* entry = hashmap_get_entry(&ams->hash_map, key, index); + HashEntry* entry = hashmap_get_entry(&ams->hash_map, key, hash); // @bug entry->value seems to be an address outside of any known buffer, how? DEBUG_MEMORY_READ( @@ -215,7 +221,7 @@ Asset* ams_get_asset(AssetManagementSystem* ams, const char* key, uint64 index) } // @performance We could probably avoid locking by adding a atomic flag to indicate if the value is valid -Asset* threaded_ams_get_asset(AssetManagementSystem* ams, uint64 element) { +Asset* thrd_ams_get_asset(AssetManagementSystem* ams, uint64 element) { pthread_mutex_lock(&ams->mutex); Asset* asset = ams_get_asset(ams, element); pthread_mutex_unlock(&ams->mutex); @@ -223,7 +229,7 @@ Asset* threaded_ams_get_asset(AssetManagementSystem* ams, uint64 element) { return asset; } -Asset* threaded_ams_get_asset(AssetManagementSystem* ams, const char* key) { +Asset* thrd_ams_get_asset(AssetManagementSystem* ams, const char* key) { pthread_mutex_lock(&ams->mutex); Asset* asset = ams_get_asset(ams, key); pthread_mutex_unlock(&ams->mutex); @@ -231,9 +237,9 @@ Asset* threaded_ams_get_asset(AssetManagementSystem* ams, const char* key) { return asset; } -Asset* threaded_ams_get_asset(AssetManagementSystem* ams, const char* key, uint64 index) { +Asset* thrd_ams_get_asset(AssetManagementSystem* ams, const char* key, uint64 hash) { pthread_mutex_lock(&ams->mutex); - Asset* asset = ams_get_asset(ams, key, index); + Asset* asset = ams_get_asset(ams, key, hash); pthread_mutex_unlock(&ams->mutex); return asset; @@ -309,4 +315,22 @@ Asset* ams_reserve_asset(AssetManagementSystem* ams, const char* name, uint32 el return asset; } +Asset* thrd_ams_reserve_asset(AssetManagementSystem* ams, const char* name, uint32 elements = 1) { + pthread_mutex_lock(&ams->mutex); + Asset* asset = ams_reserve_asset(ams, name, elements); + pthread_mutex_unlock(&ams->mutex); + + return asset; +} + +Asset* thrd_ams_reserve_asset_start(AssetManagementSystem* ams, const char* name, uint32 elements = 1) { + pthread_mutex_lock(&ams->mutex); + + return ams_reserve_asset(ams, name, elements); +} + +void thrd_ams_reserve_asset_end(AssetManagementSystem* ams) { + pthread_mutex_unlock(&ams->mutex); +} + #endif \ No newline at end of file diff --git a/asset/AssetType.h b/asset/AssetType.h index 163d0e3..40f8beb 100644 --- a/asset/AssetType.h +++ b/asset/AssetType.h @@ -12,9 +12,11 @@ enum AssetType { ASSET_TYPE_GENERAL, ASSET_TYPE_OBJ, - ASSET_TYPE_TEXTURE, ASSET_TYPE_AUDIO, - ASSET_TYPE_ANIM, + ASSET_TYPE_LANGUAGE, + ASSET_TYPE_FONT, + ASSET_TYPE_THEME, + ASSET_TYPE_IMAGE, ASSET_TYPE_SIZE }; diff --git a/audio/Audio.cpp b/audio/Audio.cpp index 22f00b3..682cea1 100644 --- a/audio/Audio.cpp +++ b/audio/Audio.cpp @@ -22,14 +22,80 @@ #include "AudioSetting.h" #include "Wav.h" -void audio_from_file(RingMemory* ring, const char* path, Audio* audio) +void audio_from_file(Audio* audio, const char* path, RingMemory* ring) { FileBody file; file_read(path, &file, ring); + ASSERT_SIMPLE(file.size); + if (str_ends_with(path, ".wav")) { - wav_audio_generate(&file, audio); + wav_from_data(file.content, (uint32) file.size, audio, ring); } } +int32 audio_data_size(const Audio* audio) +{ + return (int32) (audio->size + + sizeof(audio->sample_rate) + + sizeof(audio->sample_size) + + sizeof(audio->channels) + + sizeof(audio->bloc_size) + + sizeof(audio->byte_per_sec) + + sizeof(audio->size) + ); +} + +int32 audio_from_data(const byte* data, Audio* audio) +{ + audio->sample_rate = SWAP_ENDIAN_LITTLE(*((uint16 *) data)); + data += sizeof(audio->sample_rate); + + audio->sample_size = *data; + data += sizeof(audio->sample_size); + + audio->channels = *data; + data += sizeof(audio->channels); + + audio->bloc_size = *data; + data += sizeof(audio->bloc_size); + + audio->byte_per_sec = SWAP_ENDIAN_LITTLE(*((uint32 *) data)); + data += sizeof(audio->byte_per_sec); + + audio->size = SWAP_ENDIAN_LITTLE(*((uint32 *) data)); + data += sizeof(audio->size); + + memcpy(audio->data, data, audio->size); + data += audio->size; + + return audio_data_size(audio); +} + +int32 audio_to_data(const Audio* audio, byte* data) +{ + *((uint16 *) data) = SWAP_ENDIAN_LITTLE(audio->sample_rate); + data += sizeof(audio->sample_rate); + + *data = audio->sample_size; + data += sizeof(audio->sample_size); + + *data = audio->channels; + data += sizeof(audio->channels); + + *data = audio->bloc_size; + data += sizeof(audio->bloc_size); + + *((uint32 *) data) = SWAP_ENDIAN_LITTLE(audio->byte_per_sec); + data += sizeof(audio->byte_per_sec); + + *((uint32 *) data) = SWAP_ENDIAN_LITTLE(audio->size); + data += sizeof(audio->size); + + memcpy(data, audio->data, audio->size); + data += audio->size; + + return audio_data_size(audio); +} + #endif diff --git a/audio/Audio.h b/audio/Audio.h index 1ac446f..8bc764a 100644 --- a/audio/Audio.h +++ b/audio/Audio.h @@ -15,19 +15,19 @@ struct Audio { // bits per sample // usually 48000 or 44100 - uint32 sample_rate; + uint16 sample_rate; // bytes per bloc // channel count * bit // usually 2 * 16 = 4 - uint32 sample_size; + byte sample_size; // audio channels // usually 2 - uint32 channels; + byte channels; // usually 16 = 2 - uint32 bloc_size; + byte bloc_size; // sample_rate * sample_size uint32 byte_per_sec; diff --git a/audio/AudioMixer.h b/audio/AudioMixer.h index 9e8677e..f3e2d4e 100644 --- a/audio/AudioMixer.h +++ b/audio/AudioMixer.h @@ -48,6 +48,8 @@ struct AudioInstance { uint32 audio_size; byte* audio_data; + + uint32 sample_index; }; struct AudioMixer { @@ -71,6 +73,7 @@ struct AudioMixer { // do we need a condition or semaphore? }; +// @todo expand AudioLocationSetting so that it also includes audio effects, repeat etc. void audio_mixer_add(AudioMixer* mixer, int64 id, Audio* audio, AudioLocationSetting* origin) { int64 index = chunk_reserve(&mixer->audio_instances, 1); @@ -90,7 +93,7 @@ void audio_mixer_add(AudioMixer* mixer, int64 id, Audio* audio, AudioLocationSet void audio_mixer_add_unique(AudioMixer* mixer, int64 id, Audio* audio, AudioLocationSetting* origin) { - for (int32 i = 0; i < mixer->audio_instances.count; ++i) { + for (uint32 i = 0; i < mixer->audio_instances.count; ++i) { // @performance We are not really utilizing chunk memory. // Maybe a simple array would be better // Or we need to use more chunk functions / maybe even create a chunk_iterate() function? @@ -105,7 +108,7 @@ void audio_mixer_add_unique(AudioMixer* mixer, int64 id, Audio* audio, AudioLoca void audio_mixer_remove(AudioMixer* mixer, int64 id) { - for (int32 i = 0; i < mixer->audio_instances.count; ++i) { + for (uint32 i = 0; i < mixer->audio_instances.count; ++i) { AudioInstance* instance = (AudioInstance *) chunk_get_element(&mixer->audio_instances, i); if (instance->id == id) { instance->id = 0; @@ -116,38 +119,38 @@ void audio_mixer_remove(AudioMixer* mixer, int64 id) } } -void apply_echo(int16* buffer, uint16 buffer_size, f32 delay, f32 feedback, int32 sample_rate) { +void apply_echo(int16* buffer, uint32 buffer_size, f32 delay, f32 feedback, int32 sample_rate) { int32 delay_samples = (int32) (delay * sample_rate); - for (int32 i = delay_samples; i < buffer_size; ++i) { + for (uint32 i = delay_samples; i < buffer_size; ++i) { buffer[i] += (int16) (buffer[i - delay_samples] * feedback); } } -void apply_reverb(int16* buffer, uint16 buffer_size, f32 intensity) { +void apply_reverb(int16* buffer, uint32 buffer_size, f32 intensity) { intensity *= 0.5f; - for (int32 i = 1; i < buffer_size; ++i) { + for (uint32 i = 1; i < buffer_size; ++i) { buffer[i] += (int16) (buffer[i - 1] * intensity); // Simple reverb with decay } } -void apply_cave(int16* buffer, uint16 buffer_size, int32 sample_rate) { +void apply_cave(int16* buffer, uint32 buffer_size, int32 sample_rate) { f32 echo_delay = 0.1f; // Echo delay in seconds f32 feedback = 0.3f; // Echo feedback level apply_echo(buffer, buffer_size, echo_delay, feedback, sample_rate); apply_reverb(buffer, buffer_size, 0.4f); // Add mild reverb } -void apply_underwater(int16* buffer, uint16 buffer_size) { - for (int32 i = 0; i < buffer_size; ++i) { +void apply_underwater(int16* buffer, uint32 buffer_size) { + for (uint32 i = 0; i < buffer_size; ++i) { buffer[i] = (int16) sinf(buffer[i] * 0.5f); // Dampen + distortion } } -void apply_flanger(int16* buffer, uint16 buffer_size, f32 rate, f32 depth, int32 sample_rate) { - int32 delay_samples = (int32) (depth * sample_rate); +void apply_flanger(int16* buffer, uint32 buffer_size, f32 rate, f32 depth, int32 sample_rate) { + f32 delay_samples = depth * sample_rate; f32 temp = OMS_TWO_PI * rate / sample_rate; - for (int32 i = 0; i < buffer_size; ++i) { + for (uint32 i = 0; i < buffer_size; ++i) { int32 delay = (int32) (delay_samples * (0.5f + 0.5f * sinf(i * temp))); if (i >= delay) { buffer[i] += (int16) (buffer[i - delay] * 0.5f); @@ -155,27 +158,27 @@ void apply_flanger(int16* buffer, uint16 buffer_size, f32 rate, f32 depth, int32 } } -void apply_tremolo(int16* buffer, uint16 buffer_size, f32 rate, f32 depth, int32 sample_rate) { +void apply_tremolo(int16* buffer, uint32 buffer_size, f32 rate, f32 depth, int32 sample_rate) { f32 temp = OMS_TWO_PI * rate / sample_rate; f32 temp2 = (1.0f - depth) + depth; - for (int32 i = 0; i < buffer_size; ++i) { + for (uint32 i = 0; i < buffer_size; ++i) { f32 mod = temp2 * (0.5f + 0.5f * sinf(i * temp)); buffer[i] = (int16) (buffer[i] * mod); } } -void apply_distortion(int16* buffer, uint16 buffer_size, f32 gain) { - for (int32 i = 0; i < buffer_size; ++i) { +void apply_distortion(int16* buffer, uint32 buffer_size, f32 gain) { + for (uint32 i = 0; i < buffer_size; ++i) { buffer[i] = (int16) tanh(buffer[i] * gain); } } -void apply_chorus(int16* buffer, uint16 buffer_size, f32 rate, f32 depth, int32 sample_rate) { +void apply_chorus(int16* buffer, uint32 buffer_size, f32 rate, f32 depth, int32 sample_rate) { f32 temp = OMS_TWO_PI * rate / sample_rate; int32 max_delay = (int32) (depth * sample_rate); - for (int32 i = 0; i < buffer_size; ++i) { + for (uint32 i = 0; i < buffer_size; ++i) { int32 delay = (int32) (max_delay * (0.5f + 0.5f * sinf(i * temp))); if (i >= delay) { buffer[i] += (int16) (buffer[i - delay] * 0.5f); @@ -183,26 +186,26 @@ void apply_chorus(int16* buffer, uint16 buffer_size, f32 rate, f32 depth, int32 } } -void apply_pitch_shift(int16* buffer, uint16 buffer_size, f32 pitch_factor) { - for (int32 i = 0; i < buffer_size; ++i) { +void apply_pitch_shift(int16* buffer, uint32 buffer_size, f32 pitch_factor) { + for (uint32 i = 0; i < buffer_size; ++i) { buffer[i] = (int16) (buffer[i] * pitch_factor); } } -void apply_granular_delay(int16* buffer, uint16 buffer_size, f32 delay, f32 granularity, int32 sample_rate) { +void apply_granular_delay(int16* buffer, uint32 buffer_size, f32 delay, f32 granularity, int32 sample_rate) { int32 delay_samples = (int32) (delay * sample_rate); int32 limit = (int32) (granularity * sample_rate); - for (int32 i = 0; i < buffer_size; ++i) { + for (uint32 i = 0; i < buffer_size; ++i) { if (i % limit == 0 && i >= delay_samples) { buffer[i] += (int16) (buffer[i - delay_samples] * 0.6f); } } } -void apply_frequency_modulation(int16* buffer, uint16 buffer_size, f32 mod_freq, f32 mod_depth, int32 sample_rate) { +void apply_frequency_modulation(int16* buffer, uint32 buffer_size, f32 mod_freq, f32 mod_depth, int32 sample_rate) { f32 temp = OMS_TWO_PI * mod_freq / sample_rate; - for (int32 i = 0; i < buffer_size; ++i) { + for (uint32 i = 0; i < buffer_size; ++i) { buffer[i] = (int16) (buffer[i] * sinf(i * temp) * mod_depth); } } @@ -211,20 +214,20 @@ void apply_stereo_panning(int16* buffer, int32 buffer_size, f32 pan) { f32 left_gain = 1.0f - pan; f32 right_gain = pan; - for (int32 i = 0; i < buffer_size; ++i) { + for (uint32 i = 0; i < buffer_size; ++i) { buffer[i] = (int16) (buffer[i] * left_gain); buffer[i + 1] = (int16) (buffer[i + 1] * right_gain); } } -void apply_highpass(int16* buffer, uint16 buffer_size, f32 cutoff, int32 sample_rate) { +void apply_highpass(int16* buffer, uint32 buffer_size, f32 cutoff, int32 sample_rate) { f32 rc = 1.0f / (OMS_TWO_PI * cutoff); f32 dt = 1.0f / sample_rate; f32 alpha = rc / (rc + dt); f32 previous = buffer[0]; f32 previous_output = buffer[0]; - for (int32 i = 1; i < buffer_size; ++i) { + for (uint32 i = 1; i < buffer_size; ++i) { f32 current = buffer[i]; buffer[i] = (int16) (alpha * (previous_output + current - previous)); previous = current; @@ -232,53 +235,89 @@ void apply_highpass(int16* buffer, uint16 buffer_size, f32 cutoff, int32 sample_ } } - -void apply_lowpass(int16* buffer, uint16 buffer_size, f32 cutoff, int32 sample_rate) { +void apply_lowpass(int16* buffer, uint32 buffer_size, f32 cutoff, int32 sample_rate) { f32 rc = 1.0f / (OMS_TWO_PI * cutoff); f32 dt = 1.0f / sample_rate; f32 alpha = dt / (rc + dt); f32 previous = buffer[0]; - for (int32 i = 1; i < buffer_size; ++i) { + for (uint32 i = 1; i < buffer_size; ++i) { buffer[i] = (int16) (previous + alpha * (buffer[i] - previous)); previous = buffer[i]; } } -void audio_mixer_mix(AudioMixer *mixer) { - uint16 limit = (uint16) (mixer->settings.sample_buffer_size / mixer->settings.sample_size); +void audio_mixer_mix(AudioMixer* mixer) { + uint32 limit = OMS_MIN( + mixer->settings.sample_buffer_size / mixer->settings.sample_size, + mixer->settings.buffer_size / mixer->settings.sample_size + ); - for (int32 i = 0; i < mixer->audio_instances.count; ++i) { + bool has_location = !is_empty((byte *) &mixer->camera.audio_location, sizeof(mixer->camera.audio_location)); + + f32 volume_scale = mixer->settings.master_volume * mixer->settings.master_volume; + + for (uint32 i = 0; i < mixer->audio_instances.count; ++i) { AudioInstance* sound = (AudioInstance *) chunk_get_element(&mixer->audio_instances, i); if (sound->id == 0) { continue; } // Compute the vector from the player to the sound's origin - v3_f32 to_sound; - vec3_sub(&to_sound, &sound->origin.audio_location, &mixer->camera.audio_location); - f32 distance = vec3_length(&to_sound); - f32 distance_attenuation = OMS_MAX(0.0f, 1.0f - (distance / 50.0f)); - vec3_normalize(&to_sound); - f32 alignment = vec3_dot(&mixer->camera.audio_lookat, &to_sound); - f32 directional_attenuation = OMS_MAX(0.0f, alignment); - f32 total_attenuation = distance_attenuation * directional_attenuation; + v3_f32 to_sound = {}; + f32 total_attenuation = 1.0f; + bool has_origin = !is_empty((byte *) &sound->origin.audio_location, sizeof(sound->origin.audio_location)); + + if (has_location && has_origin) { + vec3_sub(&to_sound, &sound->origin.audio_location, &mixer->camera.audio_location); + + f32 distance = vec3_length(&to_sound); + if (distance) { + f32 distance_attenuation = OMS_MAX(0.0f, 1.0f - (distance / 50.0f)); + + vec3_normalize(&to_sound); + f32 alignment = vec3_dot(&mixer->camera.audio_lookat, &to_sound); + f32 directional_attenuation = OMS_MAX(0.0f, alignment); + + total_attenuation = distance_attenuation * directional_attenuation; + } + } + + uint32 sound_sample_count = sound->audio_size / mixer->settings.sample_size; + uint32 sound_sample_index = sound->sample_index; + int16* audio_data = (int16 *) sound->audio_data; // Temporary buffer for effects processing // @performance If there are situations where only one file exists in the mixer that should be played we could directly write to // the output buffer improving the performance. Some of those mixers are: music, cinematic, ui // Careful, NOT voice since we will probably manually layer them according to their position? for (int32 j = 0; j < limit; ++j) { - // @todo if repeat handle here + if (sound_sample_index >= sound_sample_count) { + // @todo if repeat we need to handle part of it here, else quit - mixer->buffer_temp[j] = (int16) (sound->audio_data[j * 2] * mixer->settings.master_volume * total_attenuation); - mixer->buffer_temp[j + 1] = (int16) (sound->audio_data[j * 2 + 2] * mixer->settings.master_volume * total_attenuation); + sound_sample_index = 0; + + // @question why are we doing this? + mixer->settings.sample_index = 0; + } + + mixer->buffer_temp[j * 2] = (int16) (audio_data[sound_sample_index * 2] * volume_scale * total_attenuation); + mixer->buffer_temp[j * 2 + 1] = (int16) (audio_data[sound_sample_index * 2 + 1] * volume_scale * total_attenuation); + + ++sound_sample_index; // @performance Some adjustments could be made right here the question is if this is faster. // Probably depends on how likely the adjustment is to happen. + + // @todo if end of file and no repeat -> remove from list } + // @question We also have to set setting->sample_index = sound_sample_index. + // But that currently happens in the sound api. Do we want to keep it there or move it here + // Apply effects based on sound's effect type + // @performance Depending on how we implement effects we could even pull them out of this loop + // What I mean is effects could either be sound file dependent (current location correct) or mixer dependent if (mixer->effect) { if (mixer->effect & AUDIO_EFFECT_ECHO) { apply_echo(mixer->buffer_temp, limit, 0.2f, 0.4f, mixer->settings.sample_rate); @@ -337,8 +376,11 @@ void audio_mixer_mix(AudioMixer *mixer) { } } + // @bug the actual output "limit" could be smaller if sound files end earlier and no repeat is defined + // In that case we would also have to adjust mixer->settings.sample_buffer_size + // Add the processed sound to the output buffer - for (int32 j = 0; j < limit; j++) { + for (uint32 j = 0; j < limit; j++) { mixer->settings.buffer[j] += mixer->buffer_temp[j]; } } diff --git a/audio/AudioSetting.h b/audio/AudioSetting.h index ba32693..d1031f6 100644 --- a/audio/AudioSetting.h +++ b/audio/AudioSetting.h @@ -20,23 +20,19 @@ struct AudioSetting { // WARNING: not the byte position, but the index based on the sample size uint32 sample_index; - // @todo add more settings e.g. repeat etc - - uint32 latency; - f32 master_volume; // bits per sample // usually 48000 or 44100 - uint32 sample_rate; + uint16 sample_rate; // bytes per bloc // channel count * bit // usually 2 * 16 = 4 - uint32 sample_size; + byte sample_size; // how often has the audio_play been called (required for xaudio) - uint32 sample_output; + byte sample_output; // max buffer content/size uint32 buffer_size; @@ -47,6 +43,9 @@ struct AudioSetting { int16* buffer; byte type = SOUND_API_DIRECT_SOUND; + byte latency; + + // @todo add more settings e.g. repeat etc }; struct AudioLocationSetting { diff --git a/audio/Wav.h b/audio/Wav.h index f48fe19..fd67c62 100644 --- a/audio/Wav.h +++ b/audio/Wav.h @@ -46,29 +46,23 @@ struct WavHeader { struct Wav { WavHeader header; - byte* sample_data; // WARNING: This is not the owner of the data. The owner is the FileBody + byte* sample_data; // WARNING: This is not the owner of the data. uint32 size; - byte* data; // WARNING: This is not the owner of the data. The owner is the FileBody + byte* data; // Data owner }; -void generate_default_wav_references(const FileBody* file, Wav* wav) +void generate_default_wav_references(const byte* data, uint32 size, Wav* wav) { - wav->size = (uint32) file->size; - wav->data = file->content; - - if (wav->size < WAV_HEADER_SIZE) { - // This shouldn't happen - return; - } + wav->size = size; + ASSERT_SIMPLE(size >= WAV_HEADER_SIZE); // Check if we can copy memory directly // The struct layout and header size should match on x86, but we still check it if constexpr (sizeof(WavHeader) == WAV_HEADER_SIZE) { - memcpy(&wav->header, file->content, WAV_HEADER_SIZE); + memcpy(&wav->header, data, WAV_HEADER_SIZE); // swap endian if we are on big endian system - // @question Maybe this needs to be a runtime check? #if !_WIN32 && !__LITTLE_ENDIAN wav->header.size = SWAP_ENDIAN_LITTLE(wav->header.size); wav->header.bloc_size = SWAP_ENDIAN_LITTLE(wav->header.bloc_size); @@ -121,33 +115,32 @@ void generate_default_wav_references(const FileBody* file, Wav* wav) wav->header.bits_per_sample = SWAP_ENDIAN_LITTLE(*((uint16 *) (wav->data + 34))); // Sample data header - wav->header.data_bloc_id[0] = *(wav->data + 36); - wav->header.data_bloc_id[1] = *(wav->data + 37); - wav->header.data_bloc_id[2] = *(wav->data + 38); - wav->header.data_bloc_id[3] = *(wav->data + 39); + memcpy(wav->header.data_bloc_id, wav->data + 36, 4); - wav->header.data_size = SWAP_ENDIAN_LITTLE(*((uint32 *) *(wav->data + 40))); + wav->header.data_size = SWAP_ENDIAN_LITTLE(*((uint32 *) *(wav->data + WAV_HEADER_SIZE - sizeof(wav->header.data_bloc_id)))); } wav->sample_data = wav->data + WAV_HEADER_SIZE; + memcpy(wav->sample_data, data + WAV_HEADER_SIZE, wav->header.data_size); } -void wav_audio_generate(const FileBody* src_data, Audio* audio) +void wav_from_data(const byte* data, uint32 size, Audio* audio, RingMemory* ring) { // @performance We are generating the struct and then filling the data. - // There is some asignment/copy overhead + // There is some assignment/copy overhead Wav src = {}; - generate_default_wav_references(src_data, &src); + src.data = ring_get_memory(ring, size, 4); + generate_default_wav_references(data, size, &src); if (!src.size) { return; } - audio->sample_rate = src.header.frequency; - audio->sample_size = (src.header.bits_per_sample / 8) * src.header.nbr_channels; - audio->channels = src.header.nbr_channels; - audio->byte_per_sec = src.header.byte_per_sec; - audio->bloc_size = src.header.bloc_size; + audio->sample_rate = (uint16) src.header.frequency; + audio->sample_size = (byte) ((src.header.bits_per_sample / 8) * src.header.nbr_channels); + audio->channels = (byte) src.header.nbr_channels; + audio->byte_per_sec = (uint32) src.header.byte_per_sec; + audio->bloc_size = (byte) src.header.bloc_size; audio->size = src.header.data_size; memcpy((void *) audio->data, src.sample_data, audio->size); diff --git a/camera/Camera.h b/camera/Camera.h index e102f34..e311e97 100644 --- a/camera/Camera.h +++ b/camera/Camera.h @@ -19,8 +19,14 @@ // @todo Please check out if we can switch to quaternions. We tried but failed. +enum CameraStateChanges : byte { + CAMERA_STATE_CHANGE_NONE = 0, + CAMERA_STATE_CHANGE_NORMAL = 1, + CAMERA_STATE_CHANGE_WINDOW = 2, +}; + struct Camera { - bool is_changed; + byte state_changes; v3_f32 location; v4_f32 orientation; @@ -43,6 +49,8 @@ struct Camera { f32 aspect; f32 view[16]; + f32 projection[16]; + f32 orth[16]; }; void @@ -64,7 +72,7 @@ camera_update_vectors(Camera* camera) void camera_rotate(Camera* camera, int32 dx, int32 dy, f32 dt) { - camera->is_changed = true; + camera->state_changes |= CAMERA_STATE_CHANGE_NORMAL; camera->orientation.x += dy * camera->sensitivity; camera->orientation.y -= dx * camera->sensitivity; @@ -88,7 +96,7 @@ void camera_rotate(Camera* camera, int32 dx, int32 dy, f32 dt) // you can have up to 4 camera movement inputs at the same time void camera_movement(Camera* camera, CameraMovement* movement, f32 dt, bool relative_to_world = true) { - camera->is_changed = true; + camera->state_changes |= CAMERA_STATE_CHANGE_NORMAL; f32 velocity = camera->speed * dt; if (relative_to_world) { @@ -214,11 +222,11 @@ void camera_movement(Camera* camera, CameraMovement* movement, f32 dt, bool rela } inline -void camera_orth_matrix_lh(const Camera* __restrict camera, f32* __restrict orth) +void camera_orth_matrix_lh(Camera* __restrict camera) { - mat4_identity_sparse(orth); + mat4_identity(camera->orth); mat4_ortho_sparse_lh( - orth, + camera->orth, 0, camera->viewport_width, 0, camera->viewport_height, camera->znear, @@ -227,11 +235,11 @@ void camera_orth_matrix_lh(const Camera* __restrict camera, f32* __restrict orth } inline -void camera_orth_matrix_rh(const Camera* __restrict camera, f32* __restrict orth) +void camera_orth_matrix_rh(Camera* __restrict camera) { - mat4_identity_sparse(orth); + mat4_identity(camera->orth); mat4_ortho_sparse_rh( - orth, + camera->orth, 0, camera->viewport_width, 0, camera->viewport_height, camera->znear, @@ -240,11 +248,11 @@ void camera_orth_matrix_rh(const Camera* __restrict camera, f32* __restrict orth } inline -void camera_projection_matrix_lh(const Camera* __restrict camera, f32* __restrict projection) +void camera_projection_matrix_lh(Camera* __restrict camera) { - mat4_identity_sparse(projection); + mat4_identity(camera->projection); mat4_perspective_sparse_lh( - projection, + camera->projection, camera->fov, camera->aspect, camera->znear, @@ -253,11 +261,11 @@ void camera_projection_matrix_lh(const Camera* __restrict camera, f32* __restric } inline -void camera_projection_matrix_rh(const Camera* __restrict camera, f32* __restrict projection) +void camera_projection_matrix_rh(Camera* __restrict camera) { - mat4_identity_sparse(projection); + mat4_identity(camera->projection); mat4_perspective_sparse_rh( - projection, + camera->projection, camera->fov, camera->aspect, camera->znear, diff --git a/compression/Huffman.h b/compression/Huffman.h index 6cfd080..1cd764d 100644 --- a/compression/Huffman.h +++ b/compression/Huffman.h @@ -14,6 +14,7 @@ #include "../stdlib/Types.h" #include "../utils/BitUtils.h" +#include "../utils/MathUtils.h" #include "../utils/EndianUtils.h" struct HuffmanNode { @@ -34,31 +35,37 @@ struct Huffman { char* code[256]; // Contains a pointer per ASCII character to the huffman code sequence }; +// We could combine this function with the one below but this would introduce a if != 0 check for the frequency +// I would assume the current version is faster since we avoid a branch +inline HuffmanNode* huffman_node_create(Huffman* hf, int32 frequency, byte character, HuffmanNode* left, HuffmanNode* right) { HuffmanNode* node = hf->pool + hf->node_count++; - if (frequency) { - node->character = character; - node->frequency = frequency; - } else { - node->left = left; - node->right = right; - node->frequency = left->frequency + right->frequency; - } + node->character = character; + node->frequency = frequency; return node; } +// Same as other function but frequency = 0 +inline +HuffmanNode* huffman_node_create(Huffman* hf, byte character, HuffmanNode* left, HuffmanNode* right) +{ + HuffmanNode* node = hf->pool + hf->node_count++; + node->left = left; + node->right = right; + node->frequency = left->frequency + right->frequency; + + return node; +} + +inline void huffman_node_insert(Huffman* hf, HuffmanNode* node) { int32 child_id; int32 parent_id = hf->pq_end++; - while ((child_id = parent_id / 2)) { - if (hf->pq[child_id]->frequency <= node->frequency) { - break; - } - + while ((child_id = parent_id / 2) && hf->pq[child_id]->frequency <= node->frequency) { hf->pq[parent_id] = hf->pq[child_id]; parent_id = child_id; } @@ -111,13 +118,15 @@ int64 huffman_code_build(Huffman* hf, HuffmanNode* root, char* code, int32 lengt void huffman_init(Huffman* hf, const byte* in) { int32 frequency[256] = {0}; - char temp_code[16]; int32 buffer_position = 0; + char temp_code[16]; // We artificially force the root element (usually the 0 element) to have the index 1. hf->pq = (HuffmanNode **) (hf->priority_queue - 1); - while (*in) frequency[(byte) *in++]++; + while (*in) { + ++frequency[(byte) *in++]; + } for (int32 i = 0; i < 256; ++i) { if (frequency[i]) { @@ -126,21 +135,20 @@ void huffman_init(Huffman* hf, const byte* in) } while (hf->pq_end > 2) { - huffman_node_insert(hf, huffman_node_create(hf, 0, 0, huffman_node_remove(hf), huffman_node_remove(hf))); + huffman_node_insert(hf, huffman_node_create(hf, 0, huffman_node_remove(hf), huffman_node_remove(hf))); } huffman_code_build(hf, hf->pq[1], temp_code, 0, hf->buffer, &buffer_position); } +inline void huffman_dump(const Huffman* hf, byte* out) { - // dump the char -> code relations as relative indeces + // dump the char -> code relations as relative indices for (int32 i = 0; i < ARRAY_COUNT(hf->code); ++i) { - if (hf->code[i]) { - *((int64 *) out) = SWAP_ENDIAN_LITTLE(hf->code[i] - hf->buffer); - } else { - *((int64 *) out) = SWAP_ENDIAN_LITTLE(-1); - } + *((int64 *) out) = hf->code[i] + ? SWAP_ENDIAN_LITTLE(hf->code[i] - hf->buffer) + : SWAP_ENDIAN_LITTLE(-1); out += sizeof(int64); } @@ -149,6 +157,7 @@ void huffman_dump(const Huffman* hf, byte* out) memcpy(out, hf->buffer, sizeof(char) * ARRAY_COUNT(hf->buffer)); } +inline void huffman_load(Huffman* hf, const byte* in) { // load the char -> code relations and convert relative indices to pointers @@ -165,6 +174,7 @@ void huffman_load(Huffman* hf, const byte* in) memcpy(hf->buffer, in, sizeof(char) * ARRAY_COUNT(hf->buffer)); } +inline int64 huffman_encode(Huffman* hf, const byte* in, byte* out) { uint64 bit_length = 0; @@ -180,11 +190,11 @@ int64 huffman_encode(Huffman* hf, const byte* in, byte* out) ++code; ++bit_length; - ++pos_bit; - if (pos_bit > 7) { + // Make sure it wraps around to 0 for pos_bit > 7 + pos_bit = MODULO_2(++pos_bit, 8); + if (pos_bit == 0) { ++out; - pos_bit = 0; } } } @@ -192,29 +202,26 @@ int64 huffman_encode(Huffman* hf, const byte* in, byte* out) return bit_length; } +inline int64 huffman_decode(Huffman* hf, const byte* in, byte* out, uint64 bit_length) { HuffmanNode* current = hf->pq[1]; int32 pos_bit = 0; - int64 out_length = 0; - byte* start = out; while (pos_bit < bit_length) { - if (BITS_GET_8_L2R(*in, pos_bit++, 1)) { - current = current->right; - } else { - current = current->left; - } + // Branchless version of checking if bit is set and then updating current + int32 bit = BITS_GET_8_L2R(*in, pos_bit, 1); + current = (HuffmanNode *) (((uintptr_t) current->left & ~bit) | ((uintptr_t) current->right & bit)); if (current->character) { *out++ = current->character; current = hf->pq[1]; } - if (pos_bit > 7) { + pos_bit = MODULO_2(++pos_bit, 8); + if (pos_bit == 0) { ++in; - pos_bit = 0; } } diff --git a/compression/LZP.h b/compression/LZP.h index 8dc7d4a..bbef1ac 100644 --- a/compression/LZP.h +++ b/compression/LZP.h @@ -92,10 +92,8 @@ uint32 lzp_decode(const byte* in, size_t length, byte* out) hash = (hash << 4) ^ c; } - if (j > 0) { - for (i = 0; i < j; ++i) { - out[out_pos++] = buf[i]; - } + for (i = 0; i < j; ++i) { + out[out_pos++] = buf[i]; } } @@ -106,13 +104,14 @@ int32 find_longest_match(char *window, int32 window_start, char *buffer, int32 b int32 best_length = 0; int32 best_offset = 0; - for (int32 i = window_start; i < 4096 && i < buffer_size; ++i) { + for (int32 i = window_start; i < 4096 && i < buffer_size; ++i) { int32 length = 0; - while (length < 18 && - i + length < 4096 && - buffer[length] == window[i + length]) { - length++; + while (length < 18 + && i + length < 4096 + && buffer[length] == window[i + length] + ) { + ++length; } if (length > best_length) { @@ -135,7 +134,12 @@ uint32 lzp3_encode(const byte* in, size_t length, byte* out) { size_t i = 0; while (i < length) { int32 match_position = 0; - int32 match_length = find_longest_match(window, window_start, (char *)&in[i], (int32) (length - i), &match_position); + int32 match_length = find_longest_match( + window, + window_start, + (char *) &in[i], (int32) (length - i), + &match_position + ); if (match_length > 2) { out[out_size++] = 0xFF; @@ -170,7 +174,7 @@ uint32 lzp3_decode(const byte* in, size_t length, byte* out) { int32 match_length = in[i + 2]; for (int32 j = 0; j < match_length; j++) { - out[out_size++] = window[(match_position + j) % 4096]; + out[out_size++] = window[MODULO_2(match_position + j, 4096)]; } memmove(window, window + match_length, 4096 - match_length); diff --git a/compression/RLE.h b/compression/RLE.h index 1e6b3bc..b261d68 100644 --- a/compression/RLE.h +++ b/compression/RLE.h @@ -21,7 +21,7 @@ uint64 rle_encode(const char* in, size_t length, char* out) uint64 count; uint64 j = 0; - for (uint64 i = 0; i < length; i++) { + for (uint64 i = 0; i < length; ++i) { count = 1; while (i + 1 < length && in[i] == in[i + 1]) { ++count; @@ -42,7 +42,7 @@ uint64 rle_decode(const char* in, size_t length, char* out) { uint64 j = 0; - for (int64 i = 0; i < length; i++) { + for (int64 i = 0; i < length; ++i) { char current_char = in[i]; ++i; @@ -53,7 +53,7 @@ uint64 rle_decode(const char* in, size_t length, char* out) } --i; - for (int32 k = 0; k < count; k++) { + for (int32 k = 0; k < count; ++k) { out[j++] = current_char; } } diff --git a/font/Font.h b/font/Font.h index 40d4d3f..bc7e8a6 100644 --- a/font/Font.h +++ b/font/Font.h @@ -28,6 +28,7 @@ struct GlyphTextureCoords { f32 y2; }; +#define GLYPH_SIZE 40 struct Glyph { uint32 codepoint; GlyphMetrics metrics; @@ -55,7 +56,7 @@ void font_init(Font* font, byte* data, int count) inline Glyph* font_glyph_find(Font* font, uint32 codepoint) { - for (int i = 0; i < font->glyph_count; ++i) { + for (uint32 i = 0; i < font->glyph_count; ++i) { if (font->glyphs[i].codepoint == codepoint) { return &font->glyphs[i]; } @@ -66,10 +67,15 @@ Glyph* font_glyph_find(Font* font, uint32 codepoint) void font_from_file_txt( Font* font, - byte* data + const char* path, + RingMemory* ring ) { - char* pos = (char *) data; + FileBody file; + file_read(path, &file, ring); + ASSERT_SIMPLE(file.size); + + char* pos = (char *) file.content; bool start = true; char block_name[32]; @@ -147,25 +153,21 @@ void font_from_file_txt( } } -// Calculates the required size for representing a font definition in memory inline -uint64 font_size_from_file(const byte* data) +int32 font_data_size(const Font* font) { - return SWAP_ENDIAN_LITTLE(*((uint32 *) data)) * sizeof(Glyph); + ASSERT_SIMPLE_CONST(sizeof(Glyph) == GLYPH_SIZE); + return font->glyph_count * sizeof(Glyph) + + sizeof(font->glyph_count) + + sizeof(font->texture_name) + + sizeof(font->size) + + sizeof(font->line_height); } -inline -uint64 font_size(const Font* font) -{ - // We have to remove the size of the pointer which will not be stored - return sizeof(font) - sizeof(Glyph*) - + font->glyph_count * sizeof(Glyph); -} - -void font_from_file( - Font* font, +int32 font_from_data( const byte* data, - int32 size = 8 + Font* font, + int32 steps = 8 ) { const byte* pos = data; @@ -190,7 +192,7 @@ void font_from_file( #if OPENGL // @todo Implement y-offset correction - for (int32 i = 0; i < font->glyph_count; ++i) { + for (uint32 i = 0; i < font->glyph_count; ++i) { float temp = font->glyphs[i].coords.y1; font->glyphs[i].coords.y1 = 1.0f - font->glyphs[i].coords.y2; font->glyphs[i].coords.y2 = 1.0f - temp; @@ -203,26 +205,17 @@ void font_from_file( font->glyph_count * sizeof(Glyph) / 4, // everything in here is 4 bytes -> super easy to swap steps ); + + return font_data_size(font); } -inline -int64 font_size_from_font(Font* font) -{ - return font->glyph_count * sizeof(Glyph) + sizeof(Font); -} - -void font_to_file( - RingMemory* ring, - const char* path, +int32 font_to_data( const Font* font, + byte* data, int32 steps = 8 ) { - FileBody file; - file.size = font->glyph_count * sizeof(Glyph) + sizeof(Font); - file.content = ring_get_memory(ring, file.size, 64); - - byte* pos = file.content; + byte* pos = data; // Glyph count *((uint32 *) pos) = font->glyph_count; @@ -244,16 +237,16 @@ void font_to_file( memcpy(pos, font->glyphs, font->glyph_count * sizeof(Glyph)); pos += font->glyph_count * sizeof(Glyph); - file.size = pos - file.content; + int32 size = (int32) (pos - data); SWAP_ENDIAN_LITTLE_SIMD( (int32 *) file.content, (int32 *) file.content, - file.size / 4, // everything in here is 4 bytes -> super easy to swap + size / 4, // everything in here is 4 bytes -> super easy to swap steps ); - file_write(path, &file); + return font_data_size(font); } #endif \ No newline at end of file diff --git a/gpuapi/opengl/OpenglUtils.h b/gpuapi/opengl/OpenglUtils.h index c4571b6..416e0e4 100644 --- a/gpuapi/opengl/OpenglUtils.h +++ b/gpuapi/opengl/OpenglUtils.h @@ -13,6 +13,7 @@ #include "../../memory/RingMemory.h" #include "../../utils/TestUtils.h" #include "../../object/Texture.h" +#include "../../image/Image.cpp" #include "../../utils/StringUtils.h" #include "../../log/Log.h" @@ -136,6 +137,8 @@ void load_texture_to_gpu(const Texture* texture, int32 mipmap_level = 0) if (mipmap_level > -1) { glGenerateMipmap(GL_TEXTURE_2D); } + + LOG_INCREMENT_BY(DEBUG_COUNTER_GPU_UPLOAD, texture->image.pixel_count * image_pixel_size_from_type(texture->image.pixel_type)); } inline @@ -162,17 +165,19 @@ GLuint shader_make(GLenum type, const char *source, RingMemory* ring) GLint status; glGetShaderiv(shader, GL_COMPILE_STATUS, &status); - if (status == GL_FALSE) { - GLint length; - glGetShaderiv(shader, GL_INFO_LOG_LENGTH, &length); + #if DEBUG || INTERNAL + if (status == GL_FALSE) { + GLint length; + glGetShaderiv(shader, GL_INFO_LOG_LENGTH, &length); - GLchar *info = (GLchar *) ring_get_memory(ring, length * sizeof(GLchar)); + GLchar *info = (GLchar *) ring_get_memory(ring, length * sizeof(GLchar)); - glGetShaderInfoLog(shader, length, NULL, info); - LOG(info, true, true); + glGetShaderInfoLog(shader, length, NULL, info); + LOG(info, true, true); - ASSERT_SIMPLE(false); - } + ASSERT_SIMPLE(false); + } + #endif return shader; } @@ -222,17 +227,19 @@ GLuint program_make( GLint status; glGetProgramiv(program, GL_LINK_STATUS, &status); - if (status == GL_FALSE) { - GLint length; - glGetProgramiv(program, GL_INFO_LOG_LENGTH, &length); + #if DEBUG || INTERNAL + if (status == GL_FALSE) { + GLint length; + glGetProgramiv(program, GL_INFO_LOG_LENGTH, &length); - GLchar *info = (GLchar *) ring_get_memory(ring, length * sizeof(GLchar)); + GLchar *info = (GLchar *) ring_get_memory(ring, length * sizeof(GLchar)); - glGetProgramInfoLog(program, length, NULL, info); - LOG(info, true, true); + glGetProgramInfoLog(program, length, NULL, info); + LOG(info, true, true); - ASSERT_SIMPLE(false); - } + ASSERT_SIMPLE(false); + } + #endif // @question really? if (geometry_shader > -1) { @@ -442,6 +449,8 @@ uint32 gpuapi_buffer_generate(int32 size, const void* data) glBindBuffer(GL_ARRAY_BUFFER, vbo); glBufferData(GL_ARRAY_BUFFER, size, data, GL_STATIC_DRAW); + LOG_INCREMENT_BY(DEBUG_COUNTER_GPU_UPLOAD, size); + return vbo; } @@ -454,6 +463,8 @@ uint32 gpuapi_buffer_generate_dynamic(int32 size, const void* data) glBindBuffer(GL_ARRAY_BUFFER, vbo); glBufferData(GL_ARRAY_BUFFER, size, data, GL_DYNAMIC_DRAW); + LOG_INCREMENT_BY(DEBUG_COUNTER_GPU_UPLOAD, size); + return vbo; } @@ -473,6 +484,8 @@ void gpuapi_buffer_update_dynamic(uint32 vbo, int32 size, const void* data) { glBindBuffer(GL_ARRAY_BUFFER, vbo); glBufferData(GL_ARRAY_BUFFER, size, data, GL_DYNAMIC_DRAW); + + LOG_INCREMENT_BY(DEBUG_COUNTER_GPU_UPLOAD, size); } inline diff --git a/gpuapi/opengl/ShaderUtils.h b/gpuapi/opengl/ShaderUtils.h index 642ab81..ca858ef 100644 --- a/gpuapi/opengl/ShaderUtils.h +++ b/gpuapi/opengl/ShaderUtils.h @@ -161,4 +161,87 @@ void shader_check_compile_errors(uint32 id, char* log) } } +int32 shader_program_optimize(const char* input, char* output) +{ + const char* read_ptr = input; + char* write_ptr = output; + bool in_string = false; + + while (*read_ptr) { + // Remove leading whitespace + while (*read_ptr == ' ' || *read_ptr == '\t' || is_eol(read_ptr)) { + ++read_ptr; + } + + if (write_ptr != output + && *(write_ptr - 1) != '\n' && *(write_ptr - 1) != ';' && *(write_ptr - 1) != '{' + && *(write_ptr - 1) != '(' + && *(write_ptr - 1) != ',' + ) { + *write_ptr++ = '\n'; + } + + // Handle single-line comments (//) + if (*read_ptr == '/' && *(read_ptr + 1) == '/' && !in_string) { + // Go to end of line + while (*read_ptr && *read_ptr != '\n') { + ++read_ptr; + } + + continue; + } + + // Handle multi-line comments (/* */) + if (*read_ptr == '/' && *(read_ptr + 1) == '*' && !in_string) { + // Go to end of comment + while (*read_ptr && (*read_ptr != '*' || *(read_ptr + 1) != '/')) { + ++read_ptr; + } + + if (*read_ptr == '*' && *(read_ptr + 1) == '/') { + read_ptr += 2; + } + + continue; + } + + // Handle strings to avoid removing content within them + if (*read_ptr == '"') { + in_string = !in_string; + } + + // Copy valid characters to write_ptr + while (*read_ptr && !is_eol(read_ptr) && *read_ptr != '"' + && !(*read_ptr == '/' && (*(read_ptr + 1) == '/' || *(read_ptr + 1) == '*')) + ) { + if (!in_string + && (*read_ptr == '*' || *read_ptr == '/' || *read_ptr == '=' || *read_ptr == '+' || *read_ptr == '-' || *read_ptr == '%' + || *read_ptr == '(' || *read_ptr == ')' + || *read_ptr == '{' || *read_ptr == '}' + || *read_ptr == ',' || *read_ptr == '?' || *read_ptr == ':' || *read_ptr == ';' + || *read_ptr == '&' || *read_ptr == '|' + || *read_ptr == '>' || *read_ptr == '<' + ) + ) { + if (is_whitespace(*(write_ptr - 1)) || *(write_ptr - 1) == '\n') { + --write_ptr; + } + + *write_ptr++ = *read_ptr++; + + if (*read_ptr && is_whitespace(*read_ptr)) { + ++read_ptr; + } + } else { + *write_ptr++ = *read_ptr++; + } + } + } + + *write_ptr = '\0'; + + // -1 to remove \0 from length, same as strlen + return (int32) (write_ptr - output); +} + #endif \ No newline at end of file diff --git a/image/Bitmap.h b/image/Bitmap.h index b2d7194..1f985b1 100644 --- a/image/Bitmap.h +++ b/image/Bitmap.h @@ -271,7 +271,7 @@ void generate_default_bitmap_references(const FileBody* file, Bitmap* bitmap) void image_bmp_generate(const FileBody* src_data, Image* image) { // @performance We are generating the struct and then filling the data. - // There is some asignment/copy overhead + // There is some assignment/copy overhead Bitmap src = {}; generate_default_bitmap_references(src_data, &src); @@ -285,7 +285,13 @@ void image_bmp_generate(const FileBody* src_data, Image* image) uint32 pixel_bytes = src.dib_header.bits_per_pixel / 8; byte alpha_offset = pixel_bytes > 3; - image->has_alpha |= (bool) alpha_offset; + if (pixel_bytes == 4) { + image->pixel_type = (byte) PIXEL_TYPE_RGBA; + } else if (pixel_bytes == 3) { + image->pixel_type = (byte) PIXEL_TYPE_RGB; + } else { + ASSERT_SIMPLE(false); + } if (image->order_pixels == IMAGE_PIXEL_ORDER_BGRA && image->order_rows == IMAGE_ROW_ORDER_BOTTOM_TO_TOP @@ -331,7 +337,7 @@ void image_bmp_generate(const FileBody* src_data, Image* image) // Add alpha channel at end of every RGB value if (alpha_offset > 0) { image->pixels[row_pos1 + x * pixel_bytes + 3] = src.pixels[row_pos2 + x * pixel_bytes + pixel_bytes + 3]; - } else if (image->has_alpha) { + } else if (image->pixel_type == PIXEL_TYPE_RGBA) { image->pixels[row_pos1 + x * pixel_bytes + 3] = 0xFF; } } diff --git a/image/Image.cpp b/image/Image.cpp index 3614359..d0109b6 100644 --- a/image/Image.cpp +++ b/image/Image.cpp @@ -23,7 +23,7 @@ #include "Bitmap.h" #include "Png.h" -void image_from_file(RingMemory* ring, const char* path, Image* image) +void image_from_file(Image* image, const char* path, RingMemory* ring) { FileBody file; file_read(path, &file, ring); @@ -46,7 +46,7 @@ void image_flip_vertical(RingMemory* ring, Image* image) // Last row const byte* end = temp + image->pixel_count * sizeof(uint32) - image->width * sizeof(uint32); - for (int y = 0; y < image->height; ++y) { + for (uint32 y = 0; y < image->height; ++y) { memcpy(image->pixels + y * stride, end - y * stride, stride); } @@ -64,4 +64,90 @@ void image_flip_vertical(RingMemory* ring, Image* image) image->order_rows = (byte) (!((bool) image->order_rows)); } +inline +int32 image_pixel_size_from_type(byte type) +{ + switch (type) { + case PIXEL_TYPE_RGBA: { + return 4; + } break; + case PIXEL_TYPE_RGB: { + return 3; + } break; + case PIXEL_TYPE_MONO: { + return 1; + } break; + case PIXEL_TYPE_RGBA_F: { + return 16; + } break; + case PIXEL_TYPE_RGB_F: { + return 12; + } break; + case PIXEL_TYPE_MONO_F: { + return 4; + } break; + default: { + return 0; + } + } +} + +int32 image_from_data(const byte* data, Image* image) +{ + const byte* pos = data; + + image->width = SWAP_ENDIAN_LITTLE(*((uint32 *) pos)); + pos += sizeof(image->width); + + image->height = SWAP_ENDIAN_LITTLE(*((uint32 *) pos)); + pos += sizeof(image->height); + + image->pixel_count = SWAP_ENDIAN_LITTLE(*((uint32 *) pos)); + pos += sizeof(image->pixel_count); + + image->order_pixels = *pos; + pos += sizeof(image->order_pixels); + + image->order_rows = *pos; + pos += sizeof(image->order_rows); + + image->pixel_type = *pos; + pos += sizeof(image->pixel_type); + + int32 image_size; + memcpy(image->pixels, pos, image_size = (image_pixel_size_from_type(image->pixel_type) * image->pixel_count)); + pos += image_size; + + return (int32) (pos - data); +} + +int32 image_to_data(const Image* image, byte* data) +{ + byte* pos = data; + + *((uint32 *) pos) = SWAP_ENDIAN_LITTLE(image->width); + pos += sizeof(image->width); + + *((uint32 *) pos) = SWAP_ENDIAN_LITTLE(image->height); + pos += sizeof(image->height); + + *((uint32 *) pos) = SWAP_ENDIAN_LITTLE(image->pixel_count); + pos += sizeof(image->pixel_count); + + *pos = image->order_pixels; + pos += sizeof(image->order_pixels); + + *pos = image->order_rows; + pos += sizeof(image->order_rows); + + *pos = image->pixel_type; + pos += sizeof(image->pixel_type); + + int32 image_size; + memcpy(pos, image->pixels, image_size = (image_pixel_size_from_type(image->pixel_type) * image->pixel_count)); + pos += image_size; + + return (int32) (pos - data); +} + #endif \ No newline at end of file diff --git a/image/Image.h b/image/Image.h index e5ffa0b..f17ce7a 100644 --- a/image/Image.h +++ b/image/Image.h @@ -17,6 +17,16 @@ #define IMAGE_ROW_ORDER_TOP_TO_BOTTOM 0 #define IMAGE_ROW_ORDER_BOTTOM_TO_TOP 1 +enum PixelType +{ + PIXEL_TYPE_RGBA, // 4 bytes + PIXEL_TYPE_RGB, // 3 bytes + PIXEL_TYPE_MONO, // 1 byte + PIXEL_TYPE_RGBA_F, // 16 bytes + PIXEL_TYPE_RGB_F, // 12 bytes + PIXEL_TYPE_MONO_F, // 4 bytes +}; + // This struct also functions as a setting on how to load the image data // has_alpha is defined it forces an alpha channel even for bitmaps // order_pixels defines how the pixels should be ordered @@ -27,11 +37,11 @@ struct Image { uint32 pixel_count; // @question Do we even need this? // Image settings - bool has_alpha; byte order_pixels; // RGBA vs BGRA byte order_rows; // top-to-bottom vs bottom-to-top + byte pixel_type; // Usually 4 or 3 bytes unless monochrome data - uint32* pixels; // owner of data + byte* pixels; // owner of data }; #endif \ No newline at end of file diff --git a/image/Png.h b/image/Png.h index 8736bac..94bbb3c 100644 --- a/image/Png.h +++ b/image/Png.h @@ -623,7 +623,7 @@ bool image_png_generate(const FileBody* src_data, Image* image, RingMemory* ring // essentially overwriting the **current** chunk header data, which doesn't matter since we already parsed it // then we reset the pos pointer backwards to where we want to start... gg - // https://www.ietf.org/rfc/rfc1951.txt - defalte + // https://www.ietf.org/rfc/rfc1951.txt - deflate // This data might be stored in the prvious IDAT chunk?! BFINAL = (uint8) BITS_GET_8_R2L(*stream.pos, stream.bit_pos, 1); bits_walk(&stream, 1); @@ -783,7 +783,7 @@ bool image_png_generate(const FileBody* src_data, Image* image, RingMemory* ring image->width = src.ihdr.width; image->height = src.ihdr.height; image->pixel_count = image->width * image->height; - image->has_alpha = (src.ihdr.color_type == 6); + image->pixel_type = (byte) (src.ihdr.color_type == 6 ? PIXEL_TYPE_RGBA : PIXEL_TYPE_RGB); image->order_pixels = IMAGE_PIXEL_ORDER_RGBA; image->order_rows = IMAGE_ROW_ORDER_TOP_TO_BOTTOM; diff --git a/image/Tga.h b/image/Tga.h index adddb58..735be6a 100644 --- a/image/Tga.h +++ b/image/Tga.h @@ -90,7 +90,13 @@ void image_tga_generate(const FileBody* src_data, Image* image) uint32 pixel_bytes = src.header.bits_per_pixel / 8; byte alpha_offset = pixel_bytes > 3; - image->has_alpha |= (bool) alpha_offset; + if (pixel_bytes == 4) { + image->pixel_type = (byte) PIXEL_TYPE_RGBA; + } else if (pixel_bytes == 3) { + image->pixel_type = (byte) PIXEL_TYPE_RGB; + } else { + ASSERT_SIMPLE(false); + } // We can check same settings through equality since we use the same values if (image->order_rows == src.header.vertical_ordering @@ -131,7 +137,7 @@ void image_tga_generate(const FileBody* src_data, Image* image) // Add alpha channel at end of every RGB value if (alpha_offset > 0) { image->pixels[row_pos1 + x * pixel_bytes + 3] = src.pixels[row_pos2 + x * pixel_bytes + pixel_bytes + 3]; - } else if (image->has_alpha) { + } else if (image->pixel_type == PIXEL_TYPE_RGBA) { image->pixels[row_pos1 + x * pixel_bytes + 3] = 0xFF; } } diff --git a/localization/Language.h b/localization/Language.h index 9208e08..200b4fa 100644 --- a/localization/Language.h +++ b/localization/Language.h @@ -10,22 +10,32 @@ #include "../platform/linux/FileUtils.cpp" #endif +#define LANGUAGE_VERSION 1 + struct Language { // WARNING: the actual start of data is data -= sizeof(count); see file loading below byte* data; int32 count; + int64 size; char** lang; }; void language_from_file_txt( Language* language, - byte* data + const char* path, + RingMemory* ring ) { + FileBody file; + file_read(path, &file, ring); + ASSERT_SIMPLE(file.size); + // count elements language->count = 1; int64 len = 0; + byte* data = file.content; + while (data[len] != '\0') { if (data[len] == '\n' && data[len + 1] == '\n') { ++language->count; @@ -36,6 +46,7 @@ void language_from_file_txt( ++len; } + language->size = len; language->lang = (char **) language->data; memcpy(language->data + language->count * sizeof(char *), data, len); @@ -54,22 +65,35 @@ void language_from_file_txt( } } +int32 language_data_size(const Language* language) +{ + return (int32) (language->size + + sizeof(language->count) + + sizeof(language->size) + + language->count * sizeof(uint64) + ); +} + // File layout - binary // offsets for start of strings // actual string data -void language_from_file( +int32 language_from_data( + const byte* data, Language* language ) { - byte* pos = language->data; + const byte* pos = data; // Count language->count = SWAP_ENDIAN_LITTLE(*((int32 *) pos)); pos += sizeof(language->count); - language->lang = (char **) pos; + language->size = SWAP_ENDIAN_LITTLE(*((int32 *) pos)); + pos += sizeof(language->size); + + language->lang = (char **) language->data; char** pos_lang = language->lang; - byte* start = pos; + byte* start = language->data; // Load pointers/offsets for (int32 i = 0; i < language->count; ++i) { @@ -77,28 +101,29 @@ void language_from_file( pos += sizeof(uint64); } - // We don't have to load the actual strings, they are already in ->data due to the file reading + memcpy( + language->data + language->count * sizeof(uint64), + pos, + language->size + ); + + return language_data_size(language); } -void language_to_file( - RingMemory* ring, - const char* path, - Language* language +int32 language_to_data( + const Language* language, + byte* data ) { - FileBody file; - - // Temporary file size for buffer - // @todo This is a bad placeholder, The problem is we don't know how much we actually need without stepping through the elements - // I also don't want to add a size variable to the theme as it is useless in all other cases - file.size = MEGABYTE * 32; - - file.content = ring_get_memory(ring, file.size, 64); - byte* pos = file.content; + byte* pos = data; // Count *((int32 *) pos) = SWAP_ENDIAN_LITTLE(language->count); pos += sizeof(language->count); + // Count + *((int32 *) pos) = SWAP_ENDIAN_LITTLE((int32) language->size); + pos += sizeof(language->size); + byte* start = pos; // Save pointers @@ -107,19 +132,14 @@ void language_to_file( pos += sizeof(uint64); } - int64 len_total = 0; - // Save actual strings - int64 len; - for (int32 i = 0; i < language->count; ++i) { - len = strlen(language->lang[i]); - len_total += len; - memcpy((char *) pos, language->lang[i], len + 1); - pos += len; - } + memcpy( + pos, + language->data + language->count * sizeof(uint64), + language->size + ); - file.size = pos - file.content; - file_write(path, &file); + return language_data_size(language); } #endif \ No newline at end of file diff --git a/log/Debug.cpp b/log/Debug.cpp index 9742417..4aa3ca9 100644 --- a/log/Debug.cpp +++ b/log/Debug.cpp @@ -136,7 +136,12 @@ void update_timing_stat_end_continued(uint32 stat, const char* function) inline void update_timing_stat_reset(uint32 stat) { - atomic_set((int32 *) debug_container->perf_stats[stat].function, 0); + spinlock_start(&debug_container->perf_stats_spinlock); + TimingStat* timing_stat = &debug_container->perf_stats[stat]; + timing_stat->function = NULL; + timing_stat->delta_tick = 0; + timing_stat->delta_time = 0; + spinlock_end(&debug_container->perf_stats_spinlock); } inline @@ -146,13 +151,13 @@ void reset_counter(int32 id) } inline -void log_increment(int32 id, int32 by = 1) +void log_increment(int32 id, int64 by = 1) { atomic_add(&debug_container->counter[id], by); } inline -void log_counter(int32 id, int32 value) +void log_counter(int32 id, int64 value) { atomic_set(&debug_container->counter[id], value); } @@ -215,11 +220,13 @@ void debug_memory_log(uint64 start, uint64 size, int32 type, const char* functio return; } - if (mem->action_idx == DEBUG_MEMORY_RANGE_MAX) { - mem->action_idx = 0; + uint64 idx = atomic_add_fetch(&mem->action_idx, 1); + if (idx >= ARRAY_COUNT(mem->last_action)) { + atomic_set(&mem->action_idx, 1); + idx %= ARRAY_COUNT(mem->last_action); } - DebugMemoryRange* dmr = &mem->last_action[mem->action_idx]; + DebugMemoryRange* dmr = &mem->last_action[idx]; dmr->type = type; dmr->start = start - mem->start; dmr->size = size; @@ -228,8 +235,6 @@ void debug_memory_log(uint64 start, uint64 size, int32 type, const char* functio dmr->time = __rdtsc(); dmr->function_name = function; - ++mem->action_idx; - if (type < 0 && mem->usage < size * -type) { mem->usage = 0; } else { @@ -248,11 +253,13 @@ void debug_memory_reserve(uint64 start, uint64 size, int32 type, const char* fun return; } - if (mem->reserve_action_idx == DEBUG_MEMORY_RANGE_MAX) { - mem->reserve_action_idx = 0; + uint64 idx = atomic_add_fetch(&mem->reserve_action_idx, 1); + if (idx >= ARRAY_COUNT(mem->reserve_action)) { + atomic_set(&mem->reserve_action_idx, 1); + idx %= ARRAY_COUNT(mem->last_action); } - DebugMemoryRange* dmr = &mem->reserve_action[mem->reserve_action_idx]; + DebugMemoryRange* dmr = &mem->reserve_action[idx]; dmr->type = type; dmr->start = start - mem->start; dmr->size = size; @@ -260,10 +267,9 @@ void debug_memory_reserve(uint64 start, uint64 size, int32 type, const char* fun // We are using rdtsc since it is faster -> less debugging overhead than using time() dmr->time = __rdtsc(); dmr->function_name = function; - - ++mem->reserve_action_idx; } +// @bug This probably requires thread safety inline void debug_memory_reset() { @@ -271,7 +277,8 @@ void debug_memory_reset() return; } - uint64 time = __rdtsc() - 1000000000; + // We remove debug information that are "older" than 1GHz + uint64 time = __rdtsc() - 1 * GHZ; for (uint64 i = 0; i < debug_container->dmc.memory_element_idx; ++i) { for (int32 j = 0; j < DEBUG_MEMORY_RANGE_MAX; ++j) { @@ -282,6 +289,7 @@ void debug_memory_reset() } } +// @bug This probably requires thread safety byte* log_get_memory(uint64 size, byte aligned = 1, bool zeroed = false) { if (!debug_container) { diff --git a/log/Debug.h b/log/Debug.h index 8c89c0c..566a23c 100644 --- a/log/Debug.h +++ b/log/Debug.h @@ -45,7 +45,7 @@ struct DebugContainer { LogMemory log_memory; // Used to log general int values (e.g. counter for draw calls etc.) - int32* counter; + int64* counter; #if _WIN32 HANDLE log_fp; diff --git a/log/Log.h b/log/Log.h index d0a997f..7ca55eb 100644 --- a/log/Log.h +++ b/log/Log.h @@ -36,8 +36,8 @@ enum LogDataType { void log_to_file(); void log(const char* str, bool should_log, bool save, const char* file, const char* function, int32 line); void log(const char* format, LogDataType data_type, void* data, bool should_log, bool save, const char* file, const char* function, int32 line); -void log_increment(int32, int32); -void log_counter(int32, int32); +void log_increment(int32, int64); +void log_counter(int32, int64); #if (LOG_LEVEL == 0) // Don't perform any logging at log level 0 diff --git a/math/matrix/MatrixFloat32.h b/math/matrix/MatrixFloat32.h index cb14c56..4ceff1a 100644 --- a/math/matrix/MatrixFloat32.h +++ b/math/matrix/MatrixFloat32.h @@ -23,6 +23,10 @@ // @todo Implement intrinsic versions! +// INFO: I thought we could remove some of the functions. Sometimes we have a function that modifies the original value and then we also have the same function that fills a new result value. +// On gcc the optimized code creates the same assembly if we would just choose to return the new value vs. modifying a value by pointer. +// However, on MSVC this is not the case and the pointer version has more and slower assembly code for the pass-by-value function + inline void vec2_normalize(f32* __restrict x, f32* __restrict y) { diff --git a/memory/Queue.h b/memory/Queue.h index 531426a..8115c6e 100644 --- a/memory/Queue.h +++ b/memory/Queue.h @@ -38,13 +38,30 @@ void queue_free(Queue* queue) ring_free(queue); } +inline +bool queue_is_empty(Queue* queue) { + return queue->head == queue->tail; +} + +inline +bool queue_set_empty(Queue* queue) { + return queue->head = queue->tail; +} + +inline +bool queue_is_full(Queue* queue, uint64 size, byte aligned = 0) { + return !ring_commit_safe((RingMemory *) queue, size, aligned); +} + // Conditional Lock inline -void queue_enqueue(Queue* queue, byte* data, uint64 size, byte aligned = 0) +byte* queue_enqueue(Queue* queue, byte* data, uint64 size, byte aligned = 0) { byte* mem = ring_get_memory_nomove(queue, size, aligned); memcpy(mem, data, size); ring_move_pointer(queue, &queue->head, size, aligned); + + return mem; } inline @@ -60,10 +77,34 @@ void queue_enqueue_end(Queue* queue, uint64 size, byte aligned = 0) } inline -byte* queue_dequeue(Queue* queue, byte* data, uint64 size, byte aligned = 0) +bool queue_dequeue(Queue* queue, byte* data, uint64 size, byte aligned = 0) { - memcpy(data, queue->tail, size); + if (queue->head == queue->tail) { + return false; + } + + if (size == 4) { + *((int32 *) data) = *((int32 *) queue->tail); + } else { + memcpy(data, queue->tail, size); + } + ring_move_pointer(queue, &queue->tail, size, aligned); + + return true; +} + +inline +byte* queue_dequeue_keep(Queue* queue, uint64 size, byte aligned = 0) +{ + if (queue->head == queue->tail) { + return NULL; + } + + byte* data = queue->tail; + ring_move_pointer(queue, &queue->tail, size, aligned); + + return data; } inline diff --git a/memory/RingMemory.h b/memory/RingMemory.h index 5a47056..eae684f 100644 --- a/memory/RingMemory.h +++ b/memory/RingMemory.h @@ -58,7 +58,7 @@ void ring_alloc(RingMemory* ring, uint64 size, int32 alignment = 64) ? (byte *) platform_alloc(size) : (byte *) platform_alloc_aligned(size, alignment); - ring->end = ring->memory + size;; + ring->end = ring->memory + size; ring->head = ring->memory; ring->tail = ring->memory; ring->size = size; @@ -77,7 +77,7 @@ void ring_init(RingMemory* ring, BufferMemory* buf, uint64 size, int32 alignment ring->memory = buffer_get_memory(buf, size, alignment, true); - ring->end = ring->memory + size;; + ring->end = ring->memory + size; ring->head = ring->memory; ring->tail = ring->memory; ring->size = size; @@ -96,7 +96,7 @@ void ring_init(RingMemory* ring, byte* buf, uint64 size, int32 alignment = 64) // @bug what if an alignment is defined? ring->memory = buf; - ring->end = ring->memory + size;; + ring->end = ring->memory + size; ring->head = ring->memory; ring->tail = ring->memory; ring->size = size; @@ -110,12 +110,12 @@ void ring_init(RingMemory* ring, byte* buf, uint64 size, int32 alignment = 64) } inline -void ring_free(RingMemory* buf) +void ring_free(RingMemory* ring) { - if (buf->alignment < 2) { - platform_free((void **) &buf->memory); + if (ring->alignment < 2) { + platform_free((void **) &ring->memory); } else { - platform_aligned_free((void **) &buf->memory); + platform_aligned_free((void **) &ring->memory); } } diff --git a/memory/ThreadedQueue.h b/memory/ThreadedQueue.h index a4d63a6..b9f38f8 100644 --- a/memory/ThreadedQueue.h +++ b/memory/ThreadedQueue.h @@ -6,8 +6,10 @@ * @version 1.0.0 * @link https://jingga.app */ -#ifndef TOS_MEMORY_QUEUE_H -#define TOS_MEMORY_QUEUE_H +#ifndef TOS_MEMORY_THREADED_QUEUE_H +#define TOS_MEMORY_THREADED_QUEUE_H + +// @todo This is a horrible implementation. Please implement a lock free solution #include "../stdlib/Types.h" #include "../utils/Utils.h" @@ -47,7 +49,7 @@ struct ThreadedQueue { }; inline -void threaded_queue_alloc(ThreadedQueue* queue, uint32 element_count, uint64 element_size, int32 alignment = 64) +void thrd_queue_alloc(ThreadedQueue* queue, uint32 element_count, uint64 element_size, int32 alignment = 64) { ring_alloc((RingMemory *) queue, element_count * element_size, alignment); @@ -59,7 +61,7 @@ void threaded_queue_alloc(ThreadedQueue* queue, uint32 element_count, uint64 ele } inline -void threaded_queue_init(ThreadedQueue* queue, BufferMemory* buf, uint32 element_count, uint64 element_size, int32 alignment = 64) +void thrd_queue_init(ThreadedQueue* queue, BufferMemory* buf, uint32 element_count, uint64 element_size, int32 alignment = 64) { ring_init((RingMemory *) queue, buf, element_count * element_size, alignment); @@ -71,7 +73,7 @@ void threaded_queue_init(ThreadedQueue* queue, BufferMemory* buf, uint32 element } inline -void threaded_queue_init(ThreadedQueue* queue, byte* buf, uint32 element_count, uint64 element_size, int32 alignment = 64) +void thrd_queue_init(ThreadedQueue* queue, byte* buf, uint32 element_count, uint64 element_size, int32 alignment = 64) { ring_init((RingMemory *) queue, buf, element_count * element_size, alignment); @@ -83,7 +85,7 @@ void threaded_queue_init(ThreadedQueue* queue, byte* buf, uint32 element_count, } inline -void threaded_queue_free(ThreadedQueue* queue) +void thrd_queue_free(ThreadedQueue* queue) { ring_free((RingMemory *) queue); sem_destroy(&queue->empty); @@ -92,9 +94,9 @@ void threaded_queue_free(ThreadedQueue* queue) pthread_cond_destroy(&queue->cond); } -// @todo Create enqueue_unique +// @todo Create enqueue_unique and enqueue_unique_sem inline -void threaded_queue_enqueue_unique_wait(ThreadedQueue* queue, byte* data, uint64 size, byte aligned = 0) +void thrd_queue_enqueue_unique_wait(ThreadedQueue* queue, const byte* data, uint64 size, byte aligned = 0) { ASSERT_SIMPLE((uint64_t) data % 4 == 0); pthread_mutex_lock(&queue->mutex); @@ -113,7 +115,7 @@ void threaded_queue_enqueue_unique_wait(ThreadedQueue* queue, byte* data, uint64 ring_move_pointer((RingMemory *) queue, &tail, size, aligned); } - while (!ring_commit_safe((RingMemory *) queue, size)) { + while (!ring_commit_safe((RingMemory *) queue, size, aligned)) { pthread_cond_wait(&queue->cond, &queue->mutex); } @@ -125,7 +127,7 @@ void threaded_queue_enqueue_unique_wait(ThreadedQueue* queue, byte* data, uint64 } inline -void threaded_queue_enqueue_unique(ThreadedQueue* queue, byte* data, uint64 size, byte aligned = 0) +void thrd_queue_enqueue_unique(ThreadedQueue* queue, const byte* data, uint64 size, byte aligned = 0) { ASSERT_SIMPLE((uint64_t) data % 4 == 0); pthread_mutex_lock(&queue->mutex); @@ -144,7 +146,7 @@ void threaded_queue_enqueue_unique(ThreadedQueue* queue, byte* data, uint64 size ring_move_pointer((RingMemory *) queue, &tail, size, aligned); } - if (!ring_commit_safe((RingMemory *) queue, size)) { + if (!ring_commit_safe((RingMemory *) queue, size, aligned)) { pthread_mutex_unlock(&queue->mutex); return; @@ -159,11 +161,11 @@ void threaded_queue_enqueue_unique(ThreadedQueue* queue, byte* data, uint64 size // Conditional Lock inline -void threaded_queue_enqueue(ThreadedQueue* queue, byte* data, uint64 size, byte aligned = 0) +void thrd_queue_enqueue(ThreadedQueue* queue, const byte* data, uint64 size, byte aligned = 0) { pthread_mutex_lock(&queue->mutex); - if (!ring_commit_safe((RingMemory *) queue, size)) { + if (!ring_commit_safe((RingMemory *) queue, size, aligned)) { pthread_mutex_unlock(&queue->mutex); return; @@ -177,11 +179,11 @@ void threaded_queue_enqueue(ThreadedQueue* queue, byte* data, uint64 size, byte } inline -void threaded_queue_enqueue_wait(ThreadedQueue* queue, byte* data, uint64 size, byte aligned = 0) +void thrd_queue_enqueue_wait(ThreadedQueue* queue, const byte* data, uint64 size, byte aligned = 0) { pthread_mutex_lock(&queue->mutex); - while (!ring_commit_safe((RingMemory *) queue, size)) { + while (!ring_commit_safe((RingMemory *) queue, size, aligned)) { pthread_cond_wait(&queue->cond, &queue->mutex); } @@ -193,7 +195,7 @@ void threaded_queue_enqueue_wait(ThreadedQueue* queue, byte* data, uint64 size, } inline -byte* threaded_queue_enqueue_start_wait(ThreadedQueue* queue, uint64 size, byte aligned = 0) +byte* thrd_queue_enqueue_start_wait(ThreadedQueue* queue, uint64 size, byte aligned = 0) { pthread_mutex_lock(&queue->mutex); @@ -205,33 +207,61 @@ byte* threaded_queue_enqueue_start_wait(ThreadedQueue* queue, uint64 size, byte } inline -void threaded_queue_enqueue_end_wait(ThreadedQueue* queue) +void thrd_queue_enqueue_end_wait(ThreadedQueue* queue) { pthread_cond_signal(&queue->cond); pthread_mutex_unlock(&queue->mutex); } inline -void threaded_queue_dequeue(ThreadedQueue* queue, byte* data, uint64 size, byte aligned = 0) +bool thrd_queue_dequeue(ThreadedQueue* queue, byte* data, uint64 size, byte aligned = 0) { - pthread_mutex_lock(&queue->mutex); + if (queue->head == queue->tail) { + return false; + } + // we do this twice because the first one is very fast but may return a false positive + pthread_mutex_lock(&queue->mutex); if (queue->head == queue->tail) { pthread_mutex_unlock(&queue->mutex); - return; + return false; } - memcpy(data, queue->tail, size); + if (size == 4) { + *((int32 *) data) = *((int32 *) queue->tail); + } else { + memcpy(data, queue->tail, size); + } ring_move_pointer((RingMemory *) queue, &queue->tail, size, aligned); pthread_cond_signal(&queue->cond); pthread_mutex_unlock(&queue->mutex); + + return true; +} + +inline +bool thrd_queue_empty(ThreadedQueue* queue) { + pthread_mutex_lock(&queue->mutex); + bool is_empty = queue->head == queue->tail; + pthread_mutex_unlock(&queue->mutex); + + return is_empty; +} + +inline +bool thrd_queue_full(ThreadedQueue* queue, uint64 size, byte aligned = 0) { + pthread_mutex_lock(&queue->mutex); + bool is_full = !ring_commit_safe((RingMemory *) queue, size, aligned); + pthread_mutex_unlock(&queue->mutex); + + return is_full; } // Waits until a dequeue is available inline -void threaded_queue_dequeue_wait(ThreadedQueue* queue, byte* data, uint64 size, byte aligned = 0) +void thrd_queue_dequeue_wait(ThreadedQueue* queue, byte* data, uint64 size, byte aligned = 0) { pthread_mutex_lock(&queue->mutex); @@ -247,7 +277,7 @@ void threaded_queue_dequeue_wait(ThreadedQueue* queue, byte* data, uint64 size, } inline -byte* threaded_queue_dequeue_start_wait(ThreadedQueue* queue) +byte* thrd_queue_dequeue_start_wait(ThreadedQueue* queue) { pthread_mutex_lock(&queue->mutex); @@ -259,7 +289,7 @@ byte* threaded_queue_dequeue_start_wait(ThreadedQueue* queue) } inline -void threaded_queue_dequeue_end_wait(ThreadedQueue* queue, uint64 size, byte aligned = 0) +void thrd_queue_dequeue_end_wait(ThreadedQueue* queue, uint64 size, byte aligned = 0) { ring_move_pointer((RingMemory *) queue, &queue->tail, size, aligned); @@ -269,7 +299,7 @@ void threaded_queue_dequeue_end_wait(ThreadedQueue* queue, uint64 size, byte ali // Semaphore Lock inline -void threaded_queue_enqueue_sem_wait(ThreadedQueue* queue, byte* data, uint64 size, byte aligned = 0) +void thrd_queue_enqueue_sem_wait(ThreadedQueue* queue, const byte* data, uint64 size, byte aligned = 0) { sem_wait(&queue->empty); pthread_mutex_lock(&queue->mutex); @@ -282,7 +312,25 @@ void threaded_queue_enqueue_sem_wait(ThreadedQueue* queue, byte* data, uint64 si } inline -byte* threaded_queue_enqueue_start_sem_wait(ThreadedQueue* queue, uint64 size, byte aligned = 0) +bool thrd_queue_enqueue_sem_timedwait(ThreadedQueue* queue, const byte* data, uint64 size, uint64 wait, byte aligned = 0) +{ + if (sem_timedwait(&queue->empty, wait)) { + return false; + } + + pthread_mutex_lock(&queue->mutex); + + byte* mem = ring_get_memory((RingMemory *) queue, size, aligned); + memcpy(mem, data, size); + + pthread_mutex_unlock(&queue->mutex); + sem_post(&queue->full); + + return true; +} + +inline +byte* thrd_queue_enqueue_start_sem_wait(ThreadedQueue* queue, uint64 size, byte aligned = 0) { sem_wait(&queue->empty); pthread_mutex_lock(&queue->mutex); @@ -291,14 +339,14 @@ byte* threaded_queue_enqueue_start_sem_wait(ThreadedQueue* queue, uint64 size, b } inline -void threaded_queue_enqueue_end_sem_wait(ThreadedQueue* queue) +void thrd_queue_enqueue_end_sem_wait(ThreadedQueue* queue) { pthread_mutex_unlock(&queue->mutex); sem_post(&queue->full); } inline -byte* threaded_queue_dequeue_sem_wait(ThreadedQueue* queue, byte* data, uint64 size, byte aligned = 0) +byte* thrd_queue_dequeue_sem_wait(ThreadedQueue* queue, byte* data, uint64 size, byte aligned = 0) { sem_wait(&queue->full); pthread_mutex_lock(&queue->mutex); @@ -311,7 +359,25 @@ byte* threaded_queue_dequeue_sem_wait(ThreadedQueue* queue, byte* data, uint64 s } inline -byte* threaded_queue_dequeue_start_sem_wait(ThreadedQueue* queue) +bool thrd_queue_dequeue_sem_timedwait(ThreadedQueue* queue, byte* data, uint64 size, uint64 wait, byte aligned = 0) +{ + if (sem_timedwait(&queue->full, wait)) { + return false; + } + + pthread_mutex_lock(&queue->mutex); + + memcpy(data, queue->tail, size); + ring_move_pointer((RingMemory *) queue, &queue->tail, size, aligned); + + pthread_mutex_unlock(&queue->mutex); + sem_post(&queue->empty); + + return true; +} + +inline +byte* thrd_queue_dequeue_start_sem_wait(ThreadedQueue* queue) { sem_wait(&queue->full); pthread_mutex_lock(&queue->mutex); @@ -320,7 +386,7 @@ byte* threaded_queue_dequeue_start_sem_wait(ThreadedQueue* queue) } inline -void threaded_queue_dequeue_end_sem_wait(ThreadedQueue* queue, uint64 size, byte aligned = 0) +void thrd_queue_dequeue_end_sem_wait(ThreadedQueue* queue, uint64 size, byte aligned = 0) { ring_move_pointer((RingMemory *) queue, &queue->tail, size, aligned); diff --git a/memory/ThreadedRingMemory.h b/memory/ThreadedRingMemory.h new file mode 100644 index 0000000..b9faa6e --- /dev/null +++ b/memory/ThreadedRingMemory.h @@ -0,0 +1,163 @@ +/** + * Jingga + * + * @copyright Jingga + * @license OMS License 2.0 + * @version 1.0.0 + * @link https://jingga.app + */ +#ifndef TOS_MEMORY_THREADED_RING_MEMORY_H +#define TOS_MEMORY_THREADED_RING_MEMORY_H + +#include "RingMemory.h" + +#if _WIN32 + #include "../platform/win32/threading/Thread.h" +#elif __linux__ + #include "../platform/linux/threading/Thread.h" +#endif + +// @todo This is a horrible implementation. Please implement a lock free solution + +struct ThreadedRingMemory { + byte* memory; + byte* end; + + byte* head; + + // This variable is usually only used by single producer/consumer code mostly found in threads. + // One thread inserts elements -> updates head + // The other thread reads elements -> updates tail + // This code itself doesn't change this variable + byte* tail; + + uint64 size; + int32 alignment; + int32 element_alignment; + + pthread_mutex_t mutex; +}; + +// @bug alignment should also include the end point, not just the start + +inline +void thrd_ring_alloc(ThreadedRingMemory* ring, uint64 size, int32 alignment = 64) +{ + ring_alloc((RingMemory *) ring, size, alignment); + pthread_mutex_init(&ring->mutex, NULL); +} + +inline +void thrd_ring_init(ThreadedRingMemory* ring, BufferMemory* buf, uint64 size, int32 alignment = 64) +{ + ring_init((RingMemory *) ring, buf, size, alignment); + pthread_mutex_init(&ring->mutex, NULL); +} + +inline +void thrd_ring_init(ThreadedRingMemory* ring, byte* buf, uint64 size, int32 alignment = 64) +{ + ring_init((RingMemory *) ring, buf, size, alignment); + pthread_mutex_init(&ring->mutex, NULL); +} + +inline +void thrd_ring_free(ThreadedRingMemory* ring) +{ + ring_free((RingMemory *) ring); + pthread_mutex_destroy(&ring->mutex); +} + +inline +byte* thrd_ring_calculate_position(ThreadedRingMemory* ring, uint64 size, byte aligned = 0) +{ + pthread_mutex_lock(&ring->mutex); + byte* result = ring_calculate_position((RingMemory *) ring, size, aligned); + pthread_mutex_unlock(&ring->mutex); + + return result; +} + +inline +void thrd_ring_reset(ThreadedRingMemory* ring) +{ + pthread_mutex_lock(&ring->mutex); + ring_reset((RingMemory *) ring); + pthread_mutex_unlock(&ring->mutex); +} + +// Moves a pointer based on the size you want to consume (new position = after consuming size) +void thrd_ring_move_pointer(ThreadedRingMemory* ring, byte** pos, uint64 size, byte aligned = 0) +{ + pthread_mutex_lock(&ring->mutex); + ring_move_pointer((RingMemory *) ring, pos, size, aligned); + pthread_mutex_unlock(&ring->mutex); +} + +byte* thrd_ring_get_memory(ThreadedRingMemory* ring, uint64 size, byte aligned = 0, bool zeroed = false) +{ + pthread_mutex_lock(&ring->mutex); + byte* result = ring_get_memory((RingMemory *) ring, size, aligned, zeroed); + pthread_mutex_unlock(&ring->mutex); + + return result; +} + +// Same as ring_get_memory but DOESN'T move the head +byte* thrd_ring_get_memory_nomove(ThreadedRingMemory* ring, uint64 size, byte aligned = 0, bool zeroed = false) +{ + pthread_mutex_lock(&ring->mutex); + byte* result = ring_get_memory_nomove((RingMemory *) ring, size, aligned, zeroed); + pthread_mutex_unlock(&ring->mutex); + + return result; +} + +// Used if the ring only contains elements of a certain size +// This way you can get a certain element +inline +byte* thrd_ring_get_element(ThreadedRingMemory* ring, uint64 element_count, uint64 element, uint64 size) +{ + pthread_mutex_lock(&ring->mutex); + byte* result = ring_get_element((RingMemory *) ring, element_count, element, size); + pthread_mutex_unlock(&ring->mutex); + + return result; +} + +/** + * Checks if one additional element can be inserted without overwriting the tail index + */ +inline +bool thrd_ring_commit_safe(ThreadedRingMemory* ring, uint64 size, byte aligned = 0) +{ + pthread_mutex_lock(&ring->mutex); + bool result = ring_commit_safe((RingMemory *) ring, size, aligned); + pthread_mutex_unlock(&ring->mutex); + + return result; +} + +inline +void thrd_ring_force_head_update(const ThreadedRingMemory* ring) +{ + _mm_clflush(ring->head); +} + +inline +void thrd_ring_force_tail_update(const ThreadedRingMemory* ring) +{ + _mm_clflush(ring->tail); +} + +inline +int64 thrd_ring_dump(ThreadedRingMemory* ring, byte* data) +{ + pthread_mutex_lock(&ring->mutex); + int64 result = ring_dump((RingMemory *) ring, data); + pthread_mutex_unlock(&ring->mutex); + + return result; +} + +#endif \ No newline at end of file diff --git a/object/Mesh.h b/object/Mesh.h index e7a62a5..490a2b9 100644 --- a/object/Mesh.h +++ b/object/Mesh.h @@ -29,7 +29,7 @@ // maybe make a mesh hold other meshes? // @todo handle vertices arrays where for example no texture coordinates are defined/used struct Mesh { - byte* data; // memory owner that subdevides into the pointers below + byte* data; // memory owner that subdivides into the pointers below // @todo Implement the version into the file, currently not implemented int32 version; @@ -70,13 +70,17 @@ struct Mesh { }; // @todo also handle textures etc. -// WARNING: mesh needs to have memory already reserved and asigned to data +// WARNING: mesh needs to have memory already reserved and assigned to data void mesh_from_file_txt( Mesh* mesh, - byte* data, + const char* path, RingMemory* ring ) { - char* pos = (char *) data; + FileBody file; + file_read(path, &file, ring); + ASSERT_SIMPLE(file.size); + + char* pos = (char *) file.content; // move past the version string pos += 8; @@ -458,19 +462,15 @@ enum MeshLoadingRestriction { // @todo sometimes we don't care about some data, we should have an option which defines which data should be loaded // this can improve performance for algorithms on this. e.g.: // on the server side we only care about the vertex positions for collision (no normals, no color, ...) -int32 mesh_from_file( - RingMemory* ring, - const char* path, +int32 mesh_from_data( + const byte* data, Mesh* mesh, const char* group = NULL, int32 load_format = MESH_LOADING_RESTRICTION_EVERYTHING, int32 steps = 8 ) { - FileBody file; - file_read(path, &file, ring); - - byte* pos = file.content; + const byte* pos = data; // Read version mesh->version = *((int32 *) pos); @@ -537,24 +537,24 @@ int32 mesh_from_file( return offset; } -void mesh_to_file( - RingMemory* ring, - const char* path, +// @bug this is wrong, since it is the max size +// We would have to check the vertex format to calculate the actual size +int32 mesh_data_size(const Mesh* mesh) +{ + return sizeof(mesh->version) + + sizeof(mesh->vertex_type) + + sizeof(mesh->vertex_count) + + 12 * sizeof(f32) * mesh->vertex_count; // 12 is the maximum value +} + +int32 mesh_to_data( const Mesh* mesh, + byte* data, int32 vertex_save_format = VERTEX_TYPE_ALL, int32 steps = 8 ) { - FileBody file; - - // Temporary file size for buffer - // @todo check the actual size, we are currently more or less guessing - file.size = sizeof(mesh) - + sizeof(Vertex3D) * mesh->vertex_count - + 4096; - - file.content = ring_get_memory(ring, file.size, 64); - byte* pos = file.content; + byte* pos = data; // version memcpy(pos, &mesh->version, sizeof(mesh->version)); @@ -571,7 +571,7 @@ void mesh_to_file( memcpy(pos, &mesh->vertex_count, sizeof(mesh->vertex_count)); pos += sizeof(mesh->vertex_count); - // verticies + // vertices int32 vertex_size = 0; if (mesh->vertex_type & VERTEX_TYPE_POSITION) { vertex_size += 3; @@ -614,16 +614,16 @@ void mesh_to_file( pos += vertex_size * sizeof(f32) * mesh->vertex_count; } - file.size = pos - file.content; + int32 size = (int32) (pos - data); SWAP_ENDIAN_LITTLE_SIMD( - (int32 *) file.content, - (int32 *) file.content, - file.size / 4, // everything in here is 4 bytes -> super easy to swap + (int32 *) data, + (int32 *) data, + size / 4, // everything in here is 4 bytes -> super easy to swap steps ); - file_write(path, &file); + return size; } #endif \ No newline at end of file diff --git a/platform/linux/FileUtils.cpp b/platform/linux/FileUtils.cpp index d8f8594..7a5fcfe 100644 --- a/platform/linux/FileUtils.cpp +++ b/platform/linux/FileUtils.cpp @@ -13,6 +13,8 @@ #include #include #include +#include +#include #include #include #include @@ -28,7 +30,54 @@ #define MAX_PATH PATH_MAX #endif -typedef int32 FileHandler; +typedef int32 FileHandle; +typedef int MMFHandle; + +inline +MMFHandle file_mmf_handle(FileHandle fp) { + return fp; +} + +inline +void* mmf_region_init(MMFHandle fh, size_t offset, size_t length = 0) { + if (length == 0) { + struct stat st; + if (fstat(fh, &st) != 0) { + return null; + } + + length = st.st_size - offset; + } + + size_t page_size = sysconf(_SC_PAGESIZE); + + // Offset (must be page-aligned) + size_t aligned_offset = offset & ~(page_size - 1); + size_t offset_diff = offset - aligned_offset; + size_t map_length = length + offset_diff; + + void *mapped_region = mmap(nullptr, map_length, PROT_READ, MAP_PRIVATE, fh, aligned_offset); + + if (mapped_region == MAP_FAILED) { + return null; + } + + return (char *) mapped_region + offset_diff; +} + +inline +void mmf_region_release(void* region, size_t length = 0) { + size_t page_size = sysconf(_SC_PAGESIZE); + + void *aligned_region = (void *) ((uintptr_t)region & ~(page_size - 1)); + + munmap(aligned_region, length); +} + +inline +void file_mmf_close(MMFHandle fh) { + close(fh); +} inline void relative_to_absolute(const char* rel, char* path) @@ -77,8 +126,8 @@ uint64 file_last_modified(const char* filename) } inline -FileHandler file_append_handle(const char* path) { - FileHandler fp; +FileHandle file_append_handle(const char* path) { + FileHandle fp; if (*path == '.') { char full_path[MAX_PATH]; relative_to_absolute(path, full_path); @@ -151,6 +200,9 @@ bool file_copy(const char* src, const char* dst) { close(src_fd); close(dst_fd); + LOG_INCREMENT_BY(DEBUG_COUNTER_DRIVE_READ, bytes_read); + LOG_INCREMENT_BY(DEBUG_COUNTER_DRIVE_WRITE, bytes_written); + return success; } @@ -205,6 +257,8 @@ void file_read(const char* path, FileBody* file, RingMemory* ring) { file->content[bytes_read] = '\0'; file->size = bytes_read; + LOG_INCREMENT_BY(DEBUG_COUNTER_DRIVE_READ, bytes_read); + close(fp); } @@ -235,11 +289,13 @@ bool file_write(const char* path, const FileBody* file) { return false; } + LOG_INCREMENT_BY(DEBUG_COUNTER_DRIVE_WRITE, written); + return true; } inline -void close_handle(FileHandler fp) +void file_close_handle(FileHandle fp) { close(fp); } diff --git a/platform/linux/Server.h b/platform/linux/network/Server.h similarity index 91% rename from platform/linux/Server.h rename to platform/linux/network/Server.h index ebc175b..f25908a 100644 --- a/platform/linux/Server.h +++ b/platform/linux/network/Server.h @@ -6,8 +6,8 @@ * @version 1.0.0 * @link https://jingga.app */ -#ifndef TOS_PLATFORM_LINUX_SERVER_H -#define TOS_PLATFORM_LINUX_SERVER_H +#ifndef TOS_PLATFORM_LINUX_NETWORK_SERVER_H +#define TOS_PLATFORM_LINUX_NETWORK_SERVER_H #include #include @@ -21,9 +21,9 @@ #include #include -#include "../../stdlib/Types.h" -#include "../../network/SocketConnection.h" -#include "../../utils/EndianUtils.h" +#include "../../../stdlib/Types.h" +#include "../../../network/SocketConnection.h" +#include "../../../utils/EndianUtils.h" // WARNING: requires `sudo setcap cap_net_raw=eip /path/to/your_program` void socket_server_raw_create(const char* hostname, SocketConnection* con) { diff --git a/platform/linux/Socket.h b/platform/linux/network/Socket.h similarity index 62% rename from platform/linux/Socket.h rename to platform/linux/network/Socket.h index afa619f..f4fa6a3 100644 --- a/platform/linux/Socket.h +++ b/platform/linux/network/Socket.h @@ -6,8 +6,8 @@ * @version 1.0.0 * @link https://jingga.app */ -#ifndef TOS_PLATFORM_LINUX_SOCKET_H -#define TOS_PLATFORM_LINUX_SOCKET_H +#ifndef TOS_PLATFORM_LINUX_NETWORK_SOCKET_H +#define TOS_PLATFORM_LINUX_NETWORK_SOCKET_H #define socket_close close diff --git a/platform/linux/threading/Atomic.h b/platform/linux/threading/Atomic.h index ff9cd24..a5ffda0 100644 --- a/platform/linux/threading/Atomic.h +++ b/platform/linux/threading/Atomic.h @@ -12,6 +12,16 @@ #include #include "../../../stdlib/Types.h" +inline +void atomic_set(void** target, void* value) { + __atomic_store_n(target, value, __ATOMIC_SEQ_CST); +} + +inline +void* atomic_get(void** target) { + return __atomic_load_n(target, __ATOMIC_SEQ_CST); +} + inline void atomic_set(volatile int32* value, int32 new_value) { @@ -60,22 +70,42 @@ void atomic_get(volatile byte* value, byte data[16]) inline void atomic_increment(volatile int32* value) { - __atomic_fetch_add(value, 1, __ATOMIC_SEQ_CST); + __atomic_add_fetch(value, 1, __ATOMIC_SEQ_CST); } inline void atomic_decrement(volatile int32* value) { - __atomic_fetch_sub(value, 1, __ATOMIC_SEQ_CST); + __atomic_sub_fetch(value, 1, __ATOMIC_SEQ_CST); +} + +inline +void atomic_increment(volatile int64* value) { + __atomic_add_fetch(value, 1, __ATOMIC_SEQ_CST); +} + +inline +void atomic_decrement(volatile int64* value) { + __atomic_sub_fetch(value, 1, __ATOMIC_SEQ_CST); } inline void atomic_add(volatile int32* value, int32 increment) { - __atomic_fetch_add(value, increment, __ATOMIC_SEQ_CST); + __atomic_add_fetch(value, increment, __ATOMIC_SEQ_CST); } inline void atomic_sub(volatile int32* value, int32 decrement) { - __atomic_fetch_sub(value, decrement, __ATOMIC_SEQ_CST); + __atomic_sub_fetch(value, decrement, __ATOMIC_SEQ_CST); +} + +inline +void atomic_add(volatile int64* value, int64 increment) { + __atomic_add_fetch(value, increment, __ATOMIC_SEQ_CST); +} + +inline +void atomic_sub(volatile int64* value, int64 decrement) { + __atomic_sub_fetch(value, decrement, __ATOMIC_SEQ_CST); } inline @@ -85,13 +115,158 @@ int32 atomic_compare_exchange_weak(volatile int32* value, int32* expected, int32 } inline -int32 atomic_fetch_add(volatile int32* value, int32 operand) { - return __atomic_fetch_add(value, operand, __ATOMIC_SEQ_CST); +int32 atomic_add_fetch(volatile int32* value, int32 operand) { + return __atomic_add_fetch(value, operand, __ATOMIC_SEQ_CST); } inline -int32 atomic_fetch_sub(volatile int32* value, int32 operand) { - return __atomic_fetch_sub(value, operand, __ATOMIC_SEQ_CST); +int32 atomic_sub_fetch(volatile int32* value, int32 operand) { + return __atomic_sub_fetch(value, operand, __ATOMIC_SEQ_CST); } +inline +int64 atomic_add_fetch(volatile int64* value, int64 operand) { + return __atomic_add_fetch(value, operand, __ATOMIC_SEQ_CST); +} + +inline +int64 atomic_sub_fetch(volatile int64* value, int64 operand) { + return __atomic_sub_fetch(value, operand, __ATOMIC_SEQ_CST); +} + +inline +void atomic_set(volatile uint32* value, uint32 new_value) +{ + __atomic_store_n(value, new_value, __ATOMIC_SEQ_CST); +} + +inline +void atomic_set(volatile uint64* value, uint64 new_value) +{ + __atomic_store_n(value, new_value, __ATOMIC_SEQ_CST); +} + +inline +uint32 atomic_set_fetch(volatile uint32* value, uint32 new_value) { + return __atomic_exchange_n(value, new_value, __ATOMIC_SEQ_CST); +} + +inline +uint64 atomic_set_fetch(volatile uint64* value, uint64 new_value) { + return __atomic_exchange_n(value, new_value, __ATOMIC_SEQ_CST); +} + +inline +void atomic_get(volatile byte* value, byte data[16]) +{ + __atomic_store((volatile __uint128 *) value, (__uint128 *) data, __ATOMIC_SEQ_CST); +} + +inline +uint32 atomic_get(volatile uint32* value) +{ + return __atomic_load_n((uint32 *) value, __ATOMIC_SEQ_CST); +} + +inline +uint64 atomic_get(volatile uint64* value) +{ + return __atomic_load_n((uint64 *) value, __ATOMIC_SEQ_CST); +} + +inline +void atomic_increment(volatile uint32* value) { + __atomic_add_fetch(value, 1, __ATOMIC_SEQ_CST); +} + +inline +void atomic_decrement(volatile uint32* value) { + __atomic_sub_fetch(value, 1, __ATOMIC_SEQ_CST); +} + +inline +void atomic_increment(volatile uint64* value) { + __atomic_add_fetch(value, 1, __ATOMIC_SEQ_CST); +} + +inline +void atomic_decrement(volatile uint64* value) { + __atomic_sub_fetch(value, 1, __ATOMIC_SEQ_CST); +} + +inline +void atomic_add(volatile uint32* value, uint32 increment) { + __atomic_add_fetch(value, increment, __ATOMIC_SEQ_CST); +} + +inline +void atomic_sub(volatile uint32* value, uint32 decrement) { + __atomic_sub_fetch(value, decrement, __ATOMIC_SEQ_CST); +} + +inline +uint32 atomic_compare_exchange_weak(volatile uint32* value, uint32* expected, uint32 desired) { + __atomic_compare_exchange_n(value, expected, desired, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); + return *expected; +} + +inline +uint32 atomic_add_fetch(volatile uint32* value, uint32 operand) { + return __atomic_add_fetch(value, operand, __ATOMIC_SEQ_CST); +} + +inline +uint32 atomic_sub_fetch(volatile uint32* value, uint32 operand) { + return __atomic_sub_fetch(value, operand, __ATOMIC_SEQ_CST); +} + +inline +uint64 atomic_add_fetch(volatile uint64* value, uint64 operand) { + return __atomic_add_fetch(value, operand, __ATOMIC_SEQ_CST); +} + +inline +uint64 atomic_sub_fetch(volatile uint64* value, uint64 operand) { + return __atomic_sub_fetch(value, operand, __ATOMIC_SEQ_CST); +} + +inline +void atomic_and(volatile uint32* value, uint32 mask) { + __atomic_fetch_and(value, mask, __ATOMIC_SEQ_CST); +} + +inline +void atomic_and(volatile int32* value, int32 mask) { + __atomic_fetch_and(value, mask, __ATOMIC_SEQ_CST); +} + +inline +void atomic_and(volatile uint64* value, uint64 mask) { + __atomic_fetch_and(value, mask, __ATOMIC_SEQ_CST); +} + +inline +void atomic_and(volatile int64* value, int64 mask) { + __atomic_fetch_and(value, mask, __ATOMIC_SEQ_CST); +} + +inline +void atomic_or(volatile uint32* value, uint32 mask) { + __atomic_fetch_or(value, mask, __ATOMIC_SEQ_CST); +} + +inline +void atomic_or(volatile int32* value, int32 mask) { + __atomic_fetch_or(value, mask, __ATOMIC_SEQ_CST); +} + +inline +void atomic_or(volatile uint64* value, uint64 mask) { + __atomic_fetch_or(value, mask, __ATOMIC_SEQ_CST); +} + +inline +void atomic_or(volatile int64* value, int64 mask) { + __atomic_fetch_or(value, mask, __ATOMIC_SEQ_CST); +} #endif \ No newline at end of file diff --git a/platform/linux/threading/Thread.h b/platform/linux/threading/Thread.h index 0dd9d74..6b36041 100644 --- a/platform/linux/threading/Thread.h +++ b/platform/linux/threading/Thread.h @@ -98,7 +98,7 @@ int32 pthread_cond_wait(pthread_cond_t* cond, pthread_mutex_t* mutex) { } int32 pthread_cond_signal(pthread_cond_t* cond) { - atomic_fetch_add(cond, 1); + atomic_add_fetch(cond, 1); syscall(SYS_futex, cond, FUTEX_WAKE, 1, NULL, NULL, 0); return 0; @@ -114,7 +114,7 @@ int32 pthread_rwlock_init(pthread_rwlock_t* rwlock, const pthread_rwlockattr_t*) int32 pthread_rwlock_rdlock(pthread_rwlock_t* rwlock) { while (atomic_get(&rwlock->writer)) {} - atomic_fetch_add(&rwlock->readers, 1); + atomic_add_fetch(&rwlock->readers, 1); return 0; } @@ -129,7 +129,7 @@ int32 pthread_rwlock_unlock(pthread_rwlock_t* rwlock) { if (atomic_get(&rwlock->writer)) { atomic_set(&rwlock->writer, 0); } else { - atomic_fetch_sub(&rwlock->readers, 1); + atomic_sub_fetch(&rwlock->readers, 1); } return 0; diff --git a/platform/win32/FileUtils.cpp b/platform/win32/FileUtils.cpp index 25feead..26c3bc0 100644 --- a/platform/win32/FileUtils.cpp +++ b/platform/win32/FileUtils.cpp @@ -22,7 +22,9 @@ #include "../../utils/TestUtils.h" #include "../../memory/RingMemory.h" -typedef HANDLE FileHandler; +typedef HANDLE FileHandle; +typedef HANDLE MMFHandle; +typedef OVERLAPPED file_overlapped; struct FileBodyAsync { // doesn't include null termination (same as strlen) @@ -31,7 +33,30 @@ struct FileBodyAsync { OVERLAPPED ov; }; -// @todo Consider to implement directly mapped files (CreateFileMapping) for certain files (e.g. map data or texture data, ...) +inline +MMFHandle file_mmf_handle(FileHandle fp) +{ + return CreateFileMappingA(fp, NULL, PAGE_READONLY, 0, 0, NULL); +} + +inline +void* mmf_region_init(MMFHandle fh, size_t offset, size_t length = 0) +{ + DWORD high = (DWORD) ((offset >> 32) & 0xFFFFFFFF); + DWORD low = (DWORD) (offset & 0xFFFFFFFF); + + return MapViewOfFile(fh, FILE_MAP_READ, high, low, length); +} + +inline +void mmf_region_release(void* fh) { + UnmapViewOfFile(fh); +} + +inline +void file_mmf_close(MMFHandle fh) { + CloseHandle(fh); +} inline void relative_to_absolute(const char* rel, char* path) @@ -63,7 +88,7 @@ inline uint64 file_size(const char* path) { // @performance Profile against fseek strategy - FileHandler fp; + FileHandle fp; if (*path == '.') { char full_path[MAX_PATH]; relative_to_absolute(path, full_path); @@ -121,7 +146,7 @@ bool file_exists(const char* path) inline void file_read(const char* path, FileBody* file, RingMemory* ring = NULL) { - FileHandler fp; + FileHandle fp; if (*path == '.') { char full_path[MAX_PATH]; relative_to_absolute(path, full_path); @@ -159,11 +184,10 @@ file_read(const char* path, FileBody* file, RingMemory* ring = NULL) } if (ring != NULL) { - file->content = ring_get_memory(ring, size.QuadPart); + file->content = ring_get_memory(ring, size.QuadPart + 1); } DWORD bytes; - ASSERT_SIMPLE(size.QuadPart < MAX_UINT32); if (!ReadFile(fp, file->content, (uint32) size.QuadPart, &bytes, NULL)) { CloseHandle(fp); file->content = NULL; @@ -175,12 +199,14 @@ file_read(const char* path, FileBody* file, RingMemory* ring = NULL) file->content[bytes] = '\0'; file->size = size.QuadPart; + + LOG_INCREMENT_BY(DEBUG_COUNTER_DRIVE_READ, bytes); } inline void file_read(const char* path, FileBody* file, uint64 offset, uint64 length = MAX_UINT64, RingMemory* ring = NULL) { - FileHandler fp; + FileHandle fp; if (*path == '.') { char full_path[MAX_PATH]; relative_to_absolute(path, full_path); @@ -232,7 +258,7 @@ void file_read(const char* path, FileBody* file, uint64 offset, uint64 length = uint64 read_length = OMS_MIN(length, file_size - offset); if (ring != NULL) { - file->content = ring_get_memory(ring, read_length); + file->content = ring_get_memory(ring, read_length + 1); } // Move the file pointer to the offset position @@ -246,7 +272,6 @@ void file_read(const char* path, FileBody* file, uint64 offset, uint64 length = } DWORD bytes; - ASSERT_SIMPLE(read_length < MAX_UINT32); if (!ReadFile(fp, file->content, (uint32) read_length, &bytes, NULL)) { CloseHandle(fp); file->content = NULL; @@ -258,10 +283,12 @@ void file_read(const char* path, FileBody* file, uint64 offset, uint64 length = file->content[bytes] = '\0'; file->size = bytes; + + LOG_INCREMENT_BY(DEBUG_COUNTER_DRIVE_READ, bytes); } inline -void file_read(FileHandler fp, FileBody* file, uint64 offset = 0, uint64 length = MAX_UINT64, RingMemory* ring = NULL) +void file_read(FileHandle fp, FileBody* file, uint64 offset = 0, uint64 length = MAX_UINT64, RingMemory* ring = NULL) { LARGE_INTEGER size; if (!GetFileSizeEx(fp, &size)) { @@ -285,7 +312,7 @@ void file_read(FileHandler fp, FileBody* file, uint64 offset = 0, uint64 length uint64 read_length = OMS_MIN(length, file_size - offset); if (ring != NULL) { - file->content = ring_get_memory(ring, read_length); + file->content = ring_get_memory(ring, read_length + 1); } // Move the file pointer to the offset position @@ -299,7 +326,6 @@ void file_read(FileHandler fp, FileBody* file, uint64 offset = 0, uint64 length } DWORD bytes; - ASSERT_SIMPLE(read_length < MAX_UINT32); if (!ReadFile(fp, file->content, (uint32) read_length, &bytes, NULL)) { CloseHandle(fp); file->content = NULL; @@ -307,16 +333,16 @@ void file_read(FileHandler fp, FileBody* file, uint64 offset = 0, uint64 length return; } - CloseHandle(fp); - file->content[bytes] = '\0'; file->size = bytes; + + LOG_INCREMENT_BY(DEBUG_COUNTER_DRIVE_READ, bytes); } inline uint64 file_read_struct(const char* path, void* file, uint32 size) { - FileHandler fp; + FileHandle fp; if (*path == '.') { char full_path[MAX_PATH]; relative_to_absolute(path, full_path); @@ -361,13 +387,15 @@ file_read_struct(const char* path, void* file, uint32 size) CloseHandle(fp); + LOG_INCREMENT_BY(DEBUG_COUNTER_DRIVE_READ, read); + return read; } inline bool file_write(const char* path, const FileBody* file) { - FileHandler fp; + FileHandle fp; if (*path == '.') { char full_path[MAX_PATH]; relative_to_absolute(path, full_path); @@ -397,7 +425,6 @@ file_write(const char* path, const FileBody* file) DWORD written; DWORD length = (DWORD) file->size; - ASSERT_SIMPLE(file->size < MAX_UINT32); if (!WriteFile(fp, file->content, length, &written, NULL)) { CloseHandle(fp); return false; @@ -405,13 +432,15 @@ file_write(const char* path, const FileBody* file) CloseHandle(fp); + LOG_INCREMENT_BY(DEBUG_COUNTER_DRIVE_WRITE, length); + return true; } inline bool file_write_struct(const char* path, const void* file, uint32 size) { - FileHandler fp; + FileHandle fp; if (*path == '.') { char full_path[MAX_PATH]; relative_to_absolute(path, full_path); @@ -444,6 +473,8 @@ file_write_struct(const char* path, const void* file, uint32 size) CloseHandle(fp); + LOG_INCREMENT_BY(DEBUG_COUNTER_DRIVE_WRITE, written); + return true; } @@ -473,7 +504,7 @@ file_copy(const char* src, const char* dst) } inline -void close_handle(FileHandler fp) +void file_close_handle(FileHandle fp) { CloseHandle(fp); } @@ -481,7 +512,7 @@ void close_handle(FileHandler fp) inline HANDLE file_append_handle(const char* path) { - FileHandler fp; + FileHandle fp; if (*path == '.') { char full_path[MAX_PATH]; relative_to_absolute(path, full_path); @@ -514,10 +545,10 @@ HANDLE file_append_handle(const char* path) inline bool file_read_async( - FileHandler fp, + FileHandle fp, FileBodyAsync* file, uint64_t offset = 0, - uint64_t length = MAXUINT64, + uint64_t length = MAX_UINT64, RingMemory* ring = NULL ) { LARGE_INTEGER size; @@ -559,7 +590,6 @@ bool file_read_async( file->ov.hEvent = CreateEvent(NULL, TRUE, FALSE, NULL); DWORD bytes_read = 0; - ASSERT_SIMPLE(read_length < MAXDWORD); if (!ReadFile(fp, file->content, (DWORD) read_length, &bytes_read, &file->ov)) { DWORD error = GetLastError(); if (error != ERROR_IO_PENDING) { @@ -573,13 +603,23 @@ bool file_read_async( } file->size = read_length; + + LOG_INCREMENT_BY(DEBUG_COUNTER_DRIVE_READ, read_length); + return true; } inline -FileHandler file_read_handle(const char* path) +void file_async_wait(FileHandle fp, file_overlapped* overlapped, bool wait) { - FileHandler fp; + DWORD bytesTransferred; + GetOverlappedResult(fp, overlapped, &bytesTransferred, wait); +} + +inline +FileHandle file_read_handle(const char* path) +{ + FileHandle fp; if (*path == '.') { char full_path[MAX_PATH]; relative_to_absolute(path, full_path); @@ -611,9 +651,9 @@ FileHandler file_read_handle(const char* path) } inline -FileHandler file_read_async_handle(const char* path) +FileHandle file_read_async_handle(const char* path) { - FileHandler fp; + FileHandle fp; if (*path == '.') { char full_path[MAX_PATH]; relative_to_absolute(path, full_path); @@ -646,7 +686,7 @@ FileHandler file_read_async_handle(const char* path) bool file_append(const char* path, const char* file) { - FileHandler fp; + FileHandle fp; if (*path == '.') { char full_path[MAX_PATH]; relative_to_absolute(path, full_path); @@ -675,39 +715,40 @@ bool file_append(const char* path, const char* file) } DWORD written; - DWORD length = (DWORD) strlen(file); // @question WHY is WriteFile not supporting larger data? - ASSERT_SIMPLE(length < MAX_UINT32); + DWORD length = (DWORD) strlen(file); if (!WriteFile(fp, file, length, &written, NULL)) { CloseHandle(fp); return false; } CloseHandle(fp); + + LOG_INCREMENT_BY(DEBUG_COUNTER_DRIVE_WRITE, written); + return true; } inline bool -file_append(FileHandler fp, const char* file) +file_append(FileHandle fp, const char* file) { if (fp == INVALID_HANDLE_VALUE) { return false; } DWORD written; - DWORD length = (DWORD) strlen(file); // @question WHY is WriteFile not supporting larger data? - ASSERT_SIMPLE(length < MAX_UINT32); - + DWORD length = (DWORD) strlen(file); if (!WriteFile(fp, file, length, &written, NULL)) { CloseHandle(fp); return false; } - CloseHandle(fp); + LOG_INCREMENT_BY(DEBUG_COUNTER_DRIVE_WRITE, written); + return true; } inline bool -file_append(FileHandler fp, const char* file, size_t length) +file_append(FileHandle fp, const char* file, size_t length) { if (fp == INVALID_HANDLE_VALUE) { return false; @@ -719,13 +760,15 @@ file_append(FileHandler fp, const char* file, size_t length) return false; } + LOG_INCREMENT_BY(DEBUG_COUNTER_DRIVE_WRITE, written); + return true; } inline bool file_append(const char* path, const FileBody* file) { - FileHandler fp; + FileHandle fp; if (*path == '.') { char full_path[MAX_PATH]; relative_to_absolute(path, full_path); @@ -755,13 +798,15 @@ file_append(const char* path, const FileBody* file) DWORD bytes; DWORD length = (DWORD) file->size; - ASSERT_SIMPLE(file->size < MAX_UINT32); if (!WriteFile(fp, file->content, length, &bytes, NULL)) { CloseHandle(fp); return false; } CloseHandle(fp); + + LOG_INCREMENT_BY(DEBUG_COUNTER_DRIVE_WRITE, bytes); + return true; } @@ -770,7 +815,7 @@ uint64 file_last_modified(const char* path) { WIN32_FIND_DATA find_data; - FileHandler fp; + FileHandle fp; if (*path == '.') { char full_path[MAX_PATH]; relative_to_absolute(path, full_path); diff --git a/platform/win32/Library.h b/platform/win32/Library.h index 86e3e41..de5162e 100644 --- a/platform/win32/Library.h +++ b/platform/win32/Library.h @@ -72,6 +72,7 @@ bool library_load(Library* lib) if (function) { lib->functions[c] = function; } else { + ASSERT_SIMPLE(false); lib->is_valid = false; } } diff --git a/platform/win32/SystemInfo.cpp b/platform/win32/SystemInfo.cpp index 5f46011..11e8e14 100644 --- a/platform/win32/SystemInfo.cpp +++ b/platform/win32/SystemInfo.cpp @@ -25,6 +25,7 @@ #include #include #include +#include // @performance Do we really need all these libs, can't we simplify that?! #include diff --git a/platform/win32/Window.h b/platform/win32/Window.h index 49605ec..ac72443 100644 --- a/platform/win32/Window.h +++ b/platform/win32/Window.h @@ -21,10 +21,14 @@ struct WindowState { uint64 style; }; -#define WINDOW_STATE_CHANGE_SIZE 1 -#define WINDOW_STATE_CHANGE_POS 2 -#define WINDOW_STATE_CHANGE_FOCUS 4 -#define WINDOW_STATE_CHANGE_FULLSCREEN 8 +enum WindowStateChanges : byte { + WINDOW_STATE_CHANGE_NONE = 0, + WINDOW_STATE_CHANGE_SIZE = 1, + WINDOW_STATE_CHANGE_POS = 2, + WINDOW_STATE_CHANGE_FOCUS = 4, + WINDOW_STATE_CHANGE_FULLSCREEN = 8, + WINDOW_STATE_CHANGE_ALL = 16, +}; struct Window { uint16 width; diff --git a/platform/win32/audio/DirectSound.h b/platform/win32/audio/DirectSound.h index aec1ae4..8635902 100644 --- a/platform/win32/audio/DirectSound.h +++ b/platform/win32/audio/DirectSound.h @@ -9,8 +9,9 @@ #ifndef TOS_SOUND_DIRECT_SOUND_H #define TOS_SOUND_DIRECT_SOUND_H -#include #include +#include +#include #include "../../../stdlib/Types.h" #include "../../../audio/AudioSetting.h" @@ -172,10 +173,10 @@ void audio_play_buffer(AudioSetting* setting, DirectSoundSetting* api_setting) return; } - void *region1; + void* region1; DWORD region1_size; - void *region2; + void* region2; DWORD region2_size; DWORD bytes_to_lock = (setting->sample_index * setting->sample_size) % setting->buffer_size; @@ -203,6 +204,7 @@ void audio_play_buffer(AudioSetting* setting, DirectSoundSetting* api_setting) api_setting->secondary_buffer->Unlock(region1, region1_size, region2, region2_size); + // @question Do we want to keep this here or move it to the audio mixer? setting->sample_index += setting->sample_buffer_size / setting->sample_size; setting->sample_buffer_size = 0; } diff --git a/platform/win32/audio/XAudio2.h b/platform/win32/audio/XAudio2.h index 8f56f60..7fa16ec 100644 --- a/platform/win32/audio/XAudio2.h +++ b/platform/win32/audio/XAudio2.h @@ -9,9 +9,9 @@ #ifndef TOS_SOUND_XAUDIO2_H #define TOS_SOUND_XAUDIO2_H -#include #include #include +#include #include "../../../stdlib/Types.h" #include "../../../audio/AudioSetting.h" diff --git a/platform/win32/input/HidInput.h b/platform/win32/input/HidInput.h index 5729d9a..4a1f2e1 100644 --- a/platform/win32/input/HidInput.h +++ b/platform/win32/input/HidInput.h @@ -137,7 +137,7 @@ void hid_init_controllers(Input* __restrict states, int32 state_count, RingMemor SetupDiDestroyDeviceInfoList(device_info_set); } -uint32 hid_divice_poll(Input* state, uint64 time) { +uint32 hid_device_poll(Input* state, uint64 time) { UCHAR buffer[128]; DWORD bytes_read; diff --git a/platform/win32/Client.h b/platform/win32/network/Client.h similarity index 84% rename from platform/win32/Client.h rename to platform/win32/network/Client.h index 37e52c3..3b25575 100644 --- a/platform/win32/Client.h +++ b/platform/win32/network/Client.h @@ -6,8 +6,8 @@ * @version 1.0.0 * @link https://jingga.app */ -#ifndef TOS_PLATFORM_WIN32_SERVER_H -#define TOS_PLATFORM_WIN32_SERVER_H +#ifndef TOS_PLATFORM_WIN32_NETWORK_SERVER_H +#define TOS_PLATFORM_WIN32_NETWORK_SERVER_H #include #include @@ -16,9 +16,9 @@ #include #include -#include "../../stdlib/Types.h" -#include "../../network/SocketConnection.h" -#include "../../utils/EndianUtils.h" +#include "../../../stdlib/Types.h" +#include "../../../network/SocketConnection.h" +#include "../../../utils/EndianUtils.h" #pragma comment(lib, "Ws2_32.lib") diff --git a/platform/win32/Server.h b/platform/win32/network/Server.h similarity index 86% rename from platform/win32/Server.h rename to platform/win32/network/Server.h index 7ca5c76..3c7a36c 100644 --- a/platform/win32/Server.h +++ b/platform/win32/network/Server.h @@ -6,8 +6,8 @@ * @version 1.0.0 * @link https://jingga.app */ -#ifndef TOS_PLATFORM_WIN32_SERVER_H -#define TOS_PLATFORM_WIN32_SERVER_H +#ifndef TOS_PLATFORM_WIN32_NETWORK_SERVER_H +#define TOS_PLATFORM_WIN32_NETWORK_SERVER_H #include #include @@ -16,8 +16,8 @@ #include #include -#include "../../network/SocketConnection.h" -#include "../../utils/EndianUtils.h" +#include "../../../network/SocketConnection.h" +#include "../../../utils/EndianUtils.h" #pragma comment(lib, "Ws2_32.lib") diff --git a/platform/win32/Socket.h b/platform/win32/network/Socket.h similarity index 62% rename from platform/win32/Socket.h rename to platform/win32/network/Socket.h index 472adde..709bb7e 100644 --- a/platform/win32/Socket.h +++ b/platform/win32/network/Socket.h @@ -6,8 +6,8 @@ * @version 1.0.0 * @link https://jingga.app */ -#ifndef TOS_PLATFORM_WIN32_SOCKET_H -#define TOS_PLATFORM_WIN32_SOCKET_H +#ifndef TOS_PLATFORM_WIN32_NETWORK_SOCKET_H +#define TOS_PLATFORM_WIN32_NETWORK_SOCKET_H #define socket_close closesocket diff --git a/platform/win32/threading/Atomic.h b/platform/win32/threading/Atomic.h index 32ab430..93ed91f 100644 --- a/platform/win32/threading/Atomic.h +++ b/platform/win32/threading/Atomic.h @@ -12,6 +12,18 @@ #include #include "../../../stdlib/Types.h" +inline +void atomic_set(void** target, void* new_pointer) +{ + InterlockedExchangePointer(target, new_pointer); +} + +inline +void* atomic_get(void** target) +{ + return InterlockedCompareExchangePointer(target, NULL, NULL); +} + inline void atomic_set(volatile int32* value, int32 new_value) { @@ -85,6 +97,16 @@ void atomic_decrement(volatile int32* value) { InterlockedDecrement((long *) value); } +inline +void atomic_increment(volatile int64* value) { + InterlockedIncrement((long *) value); +} + +inline +void atomic_decrement(volatile int64* value) { + InterlockedDecrement((long *) value); +} + inline void atomic_add(volatile int32* value, int32 increment) { InterlockedAdd((long *) value, increment); @@ -95,19 +117,180 @@ void atomic_sub(volatile int32* value, int32 decrement) { InterlockedAdd((long *) value, -decrement); } +inline +void atomic_add(volatile int64* value, int64 increment) { + InterlockedAdd((long *) value, (long) increment); +} + +inline +void atomic_sub(volatile int64* value, int64 decrement) { + InterlockedAdd((long *) value, -1 * ((long) decrement)); +} + inline int32 atomic_compare_exchange_weak(volatile int32* value, int32* expected, int32 desired) { return (int32) InterlockedCompareExchange((long *) value, desired, *expected); } inline -int32 atomic_fetch_add(volatile int32* value, int32 operand) { +int32 atomic_add_fetch(volatile int32* value, int32 operand) { return (int32) InterlockedExchangeAdd((long *) value, operand); } inline -int32 atomic_fetch_sub(volatile int32* value, int32 operand) { +int32 atomic_sub_fetch(volatile int32* value, int32 operand) { return (int32) InterlockedExchangeSubtract((unsigned long *) value, operand); } +inline +int64 atomic_add_fetch(volatile int64* value, int64 operand) { + return (int64) InterlockedExchangeAdd((long *) value, (long) operand); +} + +inline +int64 atomic_sub_fetch(volatile int64* value, int64 operand) { + return (int64) InterlockedExchangeSubtract((unsigned long *) value, (long) operand); +} + +inline +void atomic_set(volatile uint32* value, uint32 new_value) +{ + InterlockedExchange((long *) value, new_value); +} + +inline +void atomic_set(volatile uint64* value, uint64 new_value) +{ + InterlockedExchange((long *) value, (long) new_value); +} + +inline +uint32 atomic_set_fetch(volatile uint32* value, uint32 new_value) +{ + return (uint32) InterlockedExchange((long *) value, new_value); +} + +inline +uint64 atomic_set_fetch(volatile uint64* value, uint64 new_value) +{ + return (uint64) InterlockedExchange((long *) value, (long) new_value); +} + +inline +uint32 atomic_get(volatile uint32* value) +{ + return (uint32) InterlockedCompareExchange((long *) value, 0, 0); +} + +inline +uint64 atomic_get(volatile uint64* value) +{ + return (uint64) InterlockedCompareExchange((long *) value, 0, 0); +} + +inline +void atomic_increment(volatile uint32* value) { + InterlockedIncrement((long *) value); +} + +inline +void atomic_decrement(volatile uint32* value) { + InterlockedDecrement((long *) value); +} + +inline +void atomic_increment(volatile uint64* value) { + InterlockedIncrement((long *) value); +} + +inline +void atomic_decrement(volatile uint64* value) { + InterlockedDecrement((long *) value); +} + +inline +void atomic_add(volatile uint32* value, uint32 increment) { + InterlockedAdd((long *) value, increment); +} + +inline +void atomic_sub(volatile uint32* value, uint32 decrement) { + InterlockedAdd((long *) value, -1 * ((int32) decrement)); +} + +inline +void atomic_add(volatile uint64* value, uint64 increment) { + InterlockedAdd((long *) value, (long) increment); +} + +inline +void atomic_sub(volatile uint64* value, uint64 decrement) { + InterlockedAdd((long *) value, -1 * ((long) decrement)); +} + +inline +uint32 atomic_compare_exchange_weak(volatile uint32* value, uint32* expected, uint32 desired) { + return (uint32) InterlockedCompareExchange((long *) value, desired, *expected); +} + +inline +uint32 atomic_add_fetch(volatile uint32* value, uint32 operand) { + return (uint32) InterlockedExchangeAdd((long *) value, operand); +} + +inline +uint32 atomic_sub_fetch(volatile uint32* value, uint32 operand) { + return (uint32) InterlockedExchangeSubtract((unsigned long *) value, operand); +} + +inline +uint64 atomic_add_fetch(volatile uint64* value, uint64 operand) { + return (uint64) InterlockedExchangeAdd((long *) value, (long) operand); +} + +inline +uint64 atomic_sub_fetch(volatile uint64* value, uint64 operand) { + return (uint64) InterlockedExchangeSubtract((unsigned long *) value, (long) operand); +} + +inline +void atomic_and(volatile uint32* value, uint32 mask) { + InterlockedAnd((volatile LONG *) value, mask); +} + +inline +void atomic_and(volatile int32* value, int32 mask) { + InterlockedAnd((volatile LONG *) value, (LONG)mask); +} + +inline +void atomic_and(volatile uint64* value, uint64 mask) { + InterlockedAnd64((volatile LONG64 *) value, mask); +} + +inline +void atomic_and(volatile int64* value, int64 mask) { + InterlockedAnd64((volatile LONG64 *) value, mask); +} + +inline +void atomic_or(volatile uint32* value, uint32 mask) { + InterlockedOr((volatile LONG *) value, mask); +} + +inline +void atomic_or(volatile int32* value, int32 mask) { + InterlockedOr((volatile LONG *) value, (LONG)mask); +} + +inline +void atomic_or(volatile uint64* value, uint64 mask) { + InterlockedOr64((volatile LONG64 *) value, mask); +} + +inline +void atomic_or(volatile int64* value, int64 mask) { + InterlockedOr64((volatile LONG64 *) value, mask); +} + #endif \ No newline at end of file diff --git a/platform/win32/threading/Semaphore.h b/platform/win32/threading/Semaphore.h index 9fb718f..4846a38 100644 --- a/platform/win32/threading/Semaphore.h +++ b/platform/win32/threading/Semaphore.h @@ -29,6 +29,14 @@ void sem_wait(sem_t* semaphore) { WaitForSingleObject(*semaphore, INFINITE); } +int32 sem_timedwait(sem_t* semaphore, uint64 ms) { + return (int32) WaitForSingleObject(*semaphore, (DWORD) ms); +} + +int32 sem_trywait(sem_t* semaphore) { + return (int32) WaitForSingleObject(*semaphore, 0); +} + // increment void sem_post(sem_t* semaphore) { ReleaseSemaphore(*semaphore, 1, NULL); diff --git a/scene/SceneState.h b/scene/SceneState.h new file mode 100644 index 0000000..b048979 --- /dev/null +++ b/scene/SceneState.h @@ -0,0 +1,23 @@ +/** + * Jingga + * + * @copyright Jingga + * @license OMS License 2.0 + * @version 1.0.0 + * @link https://jingga.app + */ +#ifndef TOS_SCENE_STATE_H +#define TOS_SCENE_STATE_H + +#include "../stdlib/Types.h" + +enum SceneState : byte { + SCENE_STATE_DEFAULT = 0, + SCENE_STATE_WINDOW_CHANGED = 1, + SCENE_STATE_SHOULD_SWITCH = 2, + SCENE_STATE_STARTED_SETUP = 4, + SCENE_STATE_WAITING_SETUP = 8, + SCENE_STATE_READY = 16, +}; + +#endif \ No newline at end of file diff --git a/stdlib/HashMap.h b/stdlib/HashMap.h index 18e2dac..9abcc33 100644 --- a/stdlib/HashMap.h +++ b/stdlib/HashMap.h @@ -16,53 +16,53 @@ #include "../memory/ChunkMemory.h" #include "../utils/StringUtils.h" -#define MAX_KEY_LENGTH 32 +#define HASH_MAP_MAX_KEY_LENGTH 32 struct HashEntryInt32 { int64 element_id; - char key[MAX_KEY_LENGTH]; + char key[HASH_MAP_MAX_KEY_LENGTH]; HashEntryInt32* next; int32 value; }; struct HashEntryInt64 { int64 element_id; - char key[MAX_KEY_LENGTH]; + char key[HASH_MAP_MAX_KEY_LENGTH]; HashEntryInt64* next; int64 value; }; struct HashEntryUIntPtr { int64 element_id; - char key[MAX_KEY_LENGTH]; + char key[HASH_MAP_MAX_KEY_LENGTH]; HashEntryUIntPtr* next; uintptr_t value; }; struct HashEntryVoidP { int64 element_id; - char key[MAX_KEY_LENGTH]; + char key[HASH_MAP_MAX_KEY_LENGTH]; HashEntryVoidP* next; void* value; }; struct HashEntryFloat { int64 element_id; - char key[MAX_KEY_LENGTH]; + char key[HASH_MAP_MAX_KEY_LENGTH]; HashEntryFloat* next; f32 value; }; struct HashEntryStr { int64 element_id; - char key[MAX_KEY_LENGTH]; + char key[HASH_MAP_MAX_KEY_LENGTH]; HashEntryStr* next; - char value[MAX_KEY_LENGTH]; + char value[HASH_MAP_MAX_KEY_LENGTH]; }; struct HashEntry { int64 element_id; - char key[MAX_KEY_LENGTH]; + char key[HASH_MAP_MAX_KEY_LENGTH]; HashEntry* next; byte* value; }; @@ -128,8 +128,8 @@ void hashmap_insert(HashMap* hm, const char* key, int32 value) { HashEntryInt32* entry = (HashEntryInt32 *) chunk_get_element(&hm->buf, element, true); entry->element_id = element; - strncpy(entry->key, key, MAX_KEY_LENGTH); - entry->key[MAX_KEY_LENGTH - 1] = '\0'; + strncpy(entry->key, key, HASH_MAP_MAX_KEY_LENGTH); + entry->key[HASH_MAP_MAX_KEY_LENGTH - 1] = '\0'; entry->value = value; entry->next = NULL; @@ -153,8 +153,8 @@ void hashmap_insert(HashMap* hm, const char* key, int64 value) { HashEntryInt64* entry = (HashEntryInt64 *) chunk_get_element(&hm->buf, element, true); entry->element_id = element; - strncpy(entry->key, key, MAX_KEY_LENGTH); - entry->key[MAX_KEY_LENGTH - 1] = '\0'; + strncpy(entry->key, key, HASH_MAP_MAX_KEY_LENGTH); + entry->key[HASH_MAP_MAX_KEY_LENGTH - 1] = '\0'; entry->value = value; entry->next = NULL; @@ -178,8 +178,8 @@ void hashmap_insert(HashMap* hm, const char* key, uintptr_t value) { HashEntryUIntPtr* entry = (HashEntryUIntPtr *) chunk_get_element(&hm->buf, element, true); entry->element_id = element; - strncpy(entry->key, key, MAX_KEY_LENGTH); - entry->key[MAX_KEY_LENGTH - 1] = '\0'; + strncpy(entry->key, key, HASH_MAP_MAX_KEY_LENGTH); + entry->key[HASH_MAP_MAX_KEY_LENGTH - 1] = '\0'; entry->value = value; entry->next = NULL; @@ -203,8 +203,8 @@ void hashmap_insert(HashMap* hm, const char* key, void* value) { HashEntryVoidP* entry = (HashEntryVoidP *) chunk_get_element(&hm->buf, element, true); entry->element_id = element; - strncpy(entry->key, key, MAX_KEY_LENGTH); - entry->key[MAX_KEY_LENGTH - 1] = '\0'; + strncpy(entry->key, key, HASH_MAP_MAX_KEY_LENGTH); + entry->key[HASH_MAP_MAX_KEY_LENGTH - 1] = '\0'; entry->value = value; entry->next = NULL; @@ -228,8 +228,8 @@ void hashmap_insert(HashMap* hm, const char* key, f32 value) { HashEntryFloat* entry = (HashEntryFloat *) chunk_get_element(&hm->buf, element, true); entry->element_id = element; - strncpy(entry->key, key, MAX_KEY_LENGTH); - entry->key[MAX_KEY_LENGTH - 1] = '\0'; + strncpy(entry->key, key, HASH_MAP_MAX_KEY_LENGTH); + entry->key[HASH_MAP_MAX_KEY_LENGTH - 1] = '\0'; entry->value = value; entry->next = NULL; @@ -253,11 +253,11 @@ void hashmap_insert(HashMap* hm, const char* key, const char* value) { HashEntryStr* entry = (HashEntryStr *) chunk_get_element(&hm->buf, element, true); entry->element_id = element; - strncpy(entry->key, key, MAX_KEY_LENGTH); - entry->key[MAX_KEY_LENGTH - 1] = '\0'; + strncpy(entry->key, key, HASH_MAP_MAX_KEY_LENGTH); + entry->key[HASH_MAP_MAX_KEY_LENGTH - 1] = '\0'; - strncpy(entry->value, value, MAX_KEY_LENGTH); - entry->value[MAX_KEY_LENGTH - 1] = '\0'; + strncpy(entry->value, value, HASH_MAP_MAX_KEY_LENGTH); + entry->value[HASH_MAP_MAX_KEY_LENGTH - 1] = '\0'; entry->next = NULL; @@ -282,8 +282,8 @@ void hashmap_insert(HashMap* hm, const char* key, byte* value) { entry->value = (byte *) entry + sizeof(HashEntry); - strncpy(entry->key, key, MAX_KEY_LENGTH); - entry->key[MAX_KEY_LENGTH - 1] = '\0'; + strncpy(entry->key, key, HASH_MAP_MAX_KEY_LENGTH); + entry->key[HASH_MAP_MAX_KEY_LENGTH - 1] = '\0'; memcpy(entry->value, value, hm->buf.chunk_size - sizeof(HashEntry)); @@ -306,7 +306,7 @@ HashEntry* hashmap_get_entry(const HashMap* hm, const char* key) { HashEntry* entry = (HashEntry *) hm->table[index]; while (entry != NULL) { - if (strncmp(entry->key, key, MAX_KEY_LENGTH) == 0) { + if (strncmp(entry->key, key, HASH_MAP_MAX_KEY_LENGTH) == 0) { return entry; } @@ -318,12 +318,12 @@ HashEntry* hashmap_get_entry(const HashMap* hm, const char* key) { // This function only saves one step (omission of the hash function) // The reason for this is in some cases we can use compile time hashing -HashEntry* hashmap_get_entry(const HashMap* hm, const char* key, uint64 index) { - index %= hm->buf.count; - HashEntry* entry = (HashEntry *) hm->table[index]; +HashEntry* hashmap_get_entry(const HashMap* hm, const char* key, uint64 hash) { + hash %= hm->buf.count; + HashEntry* entry = (HashEntry *) hm->table[hash]; while (entry != NULL) { - if (strncmp(entry->key, key, MAX_KEY_LENGTH) == 0) { + if (strncmp(entry->key, key, HASH_MAP_MAX_KEY_LENGTH) == 0) { return entry; } @@ -339,7 +339,7 @@ void hashmap_delete_entry(HashMap* hm, const char* key) { HashEntry* prev = NULL; while (entry != NULL) { - if (strncmp(entry->key, key, MAX_KEY_LENGTH) == 0) { + if (strncmp(entry->key, key, HASH_MAP_MAX_KEY_LENGTH) == 0) { if (prev == NULL) { hm->table[index] = entry->next; } else { @@ -370,7 +370,7 @@ int64 hashmap_dump(const HashMap* hm, byte* data) } data += sizeof(uint64) * hm->buf.count; - int64 value_size = hm->buf.chunk_size - sizeof(uint64) - sizeof(char) * MAX_KEY_LENGTH - sizeof(uint64); + int64 value_size = hm->buf.chunk_size - sizeof(uint64) - sizeof(char) * HASH_MAP_MAX_KEY_LENGTH - sizeof(uint64); // Dumb hash map content = buffer memory int32 free_index = 0; @@ -449,7 +449,7 @@ int64 hashmap_load(HashMap* hm, const byte* data) // @question don't we have to possibly endian swap check the free array as well? memcpy(hm->buf.free, data, sizeof(uint64) * CEIL_DIV(hm->buf.count, 64)); - int64 value_size = hm->buf.chunk_size - sizeof(uint64) - sizeof(char) * MAX_KEY_LENGTH - sizeof(uint64); + int64 value_size = hm->buf.chunk_size - sizeof(uint64) - sizeof(char) * HASH_MAP_MAX_KEY_LENGTH - sizeof(uint64); // Switch endian AND turn offsets to pointers int32 free_index = 0; diff --git a/stdlib/ThreadedHashMap.h b/stdlib/ThreadedHashMap.h index 9eaf54a..60208c3 100644 --- a/stdlib/ThreadedHashMap.h +++ b/stdlib/ThreadedHashMap.h @@ -31,7 +31,7 @@ struct ThreadedHashMap { // WARNING: element_size = element size + remaining HashEntry data size inline -void threaded_hashmap_create(ThreadedHashMap* hm, int32 count, int32 element_size, RingMemory* ring) +void thrd_hashmap_create(ThreadedHashMap* hm, int32 count, int32 element_size, RingMemory* ring) { hashmap_create((HashMap *) hm, count, element_size, ring); pthread_mutex_init(&hm->mutex, NULL); @@ -39,7 +39,7 @@ void threaded_hashmap_create(ThreadedHashMap* hm, int32 count, int32 element_siz // WARNING: element_size = element size + remaining HashEntry data size inline -void threaded_hashmap_create(ThreadedHashMap* hm, int32 count, int32 element_size, BufferMemory* buf) +void thrd_hashmap_create(ThreadedHashMap* hm, int32 count, int32 element_size, BufferMemory* buf) { hashmap_create((HashMap *) hm, count, element_size, buf); pthread_mutex_init(&hm->mutex, NULL); @@ -47,69 +47,69 @@ void threaded_hashmap_create(ThreadedHashMap* hm, int32 count, int32 element_siz // WARNING: element_size = element size + remaining HashEntry data size inline -void threaded_hashmap_create(ThreadedHashMap* hm, int32 count, int32 element_size, byte* buf) +void thrd_hashmap_create(ThreadedHashMap* hm, int32 count, int32 element_size, byte* buf) { hashmap_create((HashMap *) hm, count, element_size, buf); pthread_mutex_init(&hm->mutex, NULL); } inline -void threaded_hashmap_free(ThreadedHashMap* hm) +void thrd_hashmap_free(ThreadedHashMap* hm) { pthread_mutex_destroy(&hm->mutex); } inline -void threaded_hashmap_insert(ThreadedHashMap* hm, const char* key, int32 value) { +void thrd_hashmap_insert(ThreadedHashMap* hm, const char* key, int32 value) { pthread_mutex_lock(&hm->mutex); hashmap_insert((HashMap *) hm, key, value); pthread_mutex_unlock(&hm->mutex); } inline -void threaded_hashmap_insert(ThreadedHashMap* hm, const char* key, int64 value) { +void thrd_hashmap_insert(ThreadedHashMap* hm, const char* key, int64 value) { pthread_mutex_lock(&hm->mutex); hashmap_insert((HashMap *) hm, key, value); pthread_mutex_unlock(&hm->mutex); } inline -void threaded_hashmap_insert(ThreadedHashMap* hm, const char* key, uintptr_t value) { +void thrd_hashmap_insert(ThreadedHashMap* hm, const char* key, uintptr_t value) { pthread_mutex_lock(&hm->mutex); hashmap_insert((HashMap *) hm, key, value); pthread_mutex_unlock(&hm->mutex); } inline -void threaded_hashmap_insert(ThreadedHashMap* hm, const char* key, void* value) { +void thrd_hashmap_insert(ThreadedHashMap* hm, const char* key, void* value) { pthread_mutex_lock(&hm->mutex); hashmap_insert((HashMap *) hm, key, value); pthread_mutex_unlock(&hm->mutex); } inline -void threaded_hashmap_insert(ThreadedHashMap* hm, const char* key, f32 value) { +void thrd_hashmap_insert(ThreadedHashMap* hm, const char* key, f32 value) { pthread_mutex_lock(&hm->mutex); hashmap_insert((HashMap *) hm, key, value); pthread_mutex_unlock(&hm->mutex); } inline -void threaded_hashmap_insert(ThreadedHashMap* hm, const char* key, const char* value) { +void thrd_hashmap_insert(ThreadedHashMap* hm, const char* key, const char* value) { pthread_mutex_lock(&hm->mutex); hashmap_insert((HashMap *) hm, key, value); pthread_mutex_unlock(&hm->mutex); } inline -void threaded_hashmap_insert(ThreadedHashMap* hm, const char* key, byte* value) { +void thrd_hashmap_insert(ThreadedHashMap* hm, const char* key, byte* value) { pthread_mutex_lock(&hm->mutex); hashmap_insert((HashMap *) hm, key, value); pthread_mutex_unlock(&hm->mutex); } inline -void threaded_hashmap_get_entry(ThreadedHashMap* hm, HashEntry* entry, const char* key) { +void thrd_hashmap_get_entry(ThreadedHashMap* hm, HashEntry* entry, const char* key) { pthread_mutex_lock(&hm->mutex); HashEntry* temp = hashmap_get_entry((HashMap *) hm, key); memcpy(entry, temp, hm->buf.chunk_size); @@ -117,7 +117,7 @@ void threaded_hashmap_get_entry(ThreadedHashMap* hm, HashEntry* entry, const cha } inline -void threaded_hashmap_get_entry(ThreadedHashMap* hm, HashEntry* entry, const char* key, uint64 index) { +void thrd_hashmap_get_entry(ThreadedHashMap* hm, HashEntry* entry, const char* key, uint64 index) { pthread_mutex_lock(&hm->mutex); HashEntry* temp = hashmap_get_entry((HashMap *) hm, key, index); memcpy(entry, temp, hm->buf.chunk_size); @@ -125,7 +125,7 @@ void threaded_hashmap_get_entry(ThreadedHashMap* hm, HashEntry* entry, const cha } inline -void threaded_hashmap_delete_entry(ThreadedHashMap* hm, const char* key) { +void thrd_hashmap_delete_entry(ThreadedHashMap* hm, const char* key) { pthread_mutex_lock(&hm->mutex); hashmap_delete_entry((HashMap *) hm, key); pthread_mutex_unlock(&hm->mutex); diff --git a/stdlib/Types.h b/stdlib/Types.h index 65d16ab..430ce55 100644 --- a/stdlib/Types.h +++ b/stdlib/Types.h @@ -12,12 +12,14 @@ #include #ifdef _MSC_VER + #include + #define PACKED_STRUCT __pragma(pack(push, 1)) #define UNPACKED_STRUCT __pragma(pack(pop)) typedef SSIZE_T ssize_t; #else #define PACKED_STRUCT __attribute__((__packed__)) - #define UNPACKED_STRUCT + #define UNPACKED_STRUCT ((void) 0) #endif #define ARRAY_COUNT(a) (sizeof(a) / sizeof((a)[0])) @@ -61,12 +63,49 @@ typedef intptr_t smm; #define MIN_INT32 0x80000000 #define MIN_INT64 0x8000000000000000 +#define SEC_MILLI 1000 #define MILLI_MICRO 1000 +#define SEC_MICRO 1000000 + +#define MHZ 1000000 +#define GHZ 1000000000 #define internal static // only allows local "file" access #define local_persist static #define global_persist static +struct v3_byte { + union { + struct { + byte x, y, z; + }; + + struct { + byte r, g, b; + }; + + byte v[3]; + }; +}; + +struct v4_byte { + union { + struct { + byte x, y, z, w; + }; + + struct { + byte r, g, b, a; + }; + + union { + byte v[4]; + uint32 val; + }; + }; +}; + + struct v2_int32 { union { struct { @@ -265,19 +304,19 @@ struct m_f64 { size_t m, n; }; -#define HALF_FLOAT_SIGN_MASK 0x8000 -#define HALF_FLOAT_EXP_MASK 0x7C00 -#define HALF_FLOAT_FRAC_MASK 0x03FF +#define HALF_FLOAT_SIGN_MASK 0x8000 +#define HALF_FLOAT_EXP_MASK 0x7C00 +#define HALF_FLOAT_FRAC_MASK 0x03FF -#define HALF_FLOAT_EXP_SHIFT 10 -#define HALF_FLOAT_EXP_BIAS 15 +#define HALF_FLOAT_EXP_SHIFT 10 +#define HALF_FLOAT_EXP_BIAS 15 -#define FLOAT32_SIGN_MASK 0x80000000 -#define FLOAT32_EXP_MASK 0x7F800000 -#define FLOAT32_FRAC_MASK 0x007FFFFF +#define FLOAT32_SIGN_MASK 0x80000000 +#define FLOAT32_EXP_MASK 0x7F800000 +#define FLOAT32_FRAC_MASK 0x007FFFFF -#define FLOAT32_EXP_SHIFT 23 -#define FLOAT32_EXP_BIAS 127 +#define FLOAT32_EXP_SHIFT 23 +#define FLOAT32_EXP_BIAS 127 uint16 float_to_f16(float f) { uint32_t f_bits = *((uint32_t*)&f); diff --git a/stdlib/simd/SIMD_I32.h b/stdlib/simd/SIMD_I32.h index 95c9eea..b43ce9c 100644 --- a/stdlib/simd/SIMD_I32.h +++ b/stdlib/simd/SIMD_I32.h @@ -1332,7 +1332,6 @@ void simd_div(const int32* a, f32 b, f32* result, int32 size, int32 steps) result += steps; } } else if (steps == 8) { - // @todo this his how all the functions should be implemented that take in baseic types and output basic types __m256i a_8; __m256 af_8; __m256 b_8 = _mm256_set1_ps(b); diff --git a/stdlib/simd/SIMD_SVML.h b/stdlib/simd/SIMD_SVML.h index e863957..0308ada 100644 --- a/stdlib/simd/SIMD_SVML.h +++ b/stdlib/simd/SIMD_SVML.h @@ -18,46 +18,46 @@ inline __m128i _mm_div_epi32(__m128i a, __m128i b) { alignas(16) int32_t a_array[4], b_array[4], result[4]; - _mm_storeu_si128((__m128i*)a_array, a); - _mm_storeu_si128((__m128i*)b_array, b); + _mm_storeu_si128((__m128i*) a_array, a); + _mm_storeu_si128((__m128i*) b_array, b); - for (int i = 0; i < 4; ++i) { + for (int32 i = 0; i < 4; ++i) { result[i] = a_array[i] / b_array[i]; } - return _mm_load_si128((__m128i*)result); + return _mm_load_si128((__m128i*) result); } inline __m256i _mm256_div_epi32(__m256i a, __m256i b) { alignas(32) int32_t a_array[8], b_array[8], result[8]; - _mm256_storeu_si256((__m256i*)a_array, a); - _mm256_storeu_si256((__m256i*)b_array, b); + _mm256_storeu_si256((__m256i*) a_array, a); + _mm256_storeu_si256((__m256i*) b_array, b); - for (int i = 0; i < 8; ++i) { + for (int32 i = 0; i < 8; ++i) { result[i] = a_array[i] / b_array[i]; } - return _mm256_load_si256((__m256i*)result); + return _mm256_load_si256((__m256i*) result); } inline __m512i _mm512_div_epi32(__m512i a, __m512i b) { alignas(64) int32_t a_array[16], b_array[16], result[16]; - _mm512_storeu_si512((__m512i*)a_array, a); - _mm512_storeu_si512((__m512i*)b_array, b); + _mm512_storeu_si512((__m512i*) a_array, a); + _mm512_storeu_si512((__m512i*) b_array, b); - for (int i = 0; i < 16; ++i) { + for (int32 i = 0; i < 16; ++i) { result[i] = a_array[i] / b_array[i]; } - return _mm512_load_si512((__m512i*)result); + return _mm512_load_si512((__m512i*) result); } inline __m128 _mm_sin_ps(__m128 a) { alignas(16) f32 a_array[4], result[4]; _mm_storeu_ps(a_array, a); - for (int i = 0; i < 4; ++i) { + for (int32 i = 0; i < 4; ++i) { result[i] = sinf(a_array[i]); } return _mm_load_ps(result); @@ -66,7 +66,7 @@ inline __m128 _mm_cos_ps(__m128 a) { alignas(16) f32 a_array[4], result[4]; _mm_storeu_ps(a_array, a); - for (int i = 0; i < 4; ++i) { + for (int32 i = 0; i < 4; ++i) { result[i] = cosf(a_array[i]); } return _mm_load_ps(result); @@ -75,7 +75,7 @@ inline __m128 _mm_asin_ps(__m128 a) { alignas(16) f32 a_array[4], result[4]; _mm_storeu_ps(a_array, a); - for (int i = 0; i < 4; ++i) { + for (int32 i = 0; i < 4; ++i) { result[i] = asinf(a_array[i]); } return _mm_load_ps(result); @@ -84,7 +84,7 @@ inline __m128 _mm_acos_ps(__m128 a) { alignas(16) f32 a_array[4], result[4]; _mm_storeu_ps(a_array, a); - for (int i = 0; i < 4; ++i) { + for (int32 i = 0; i < 4; ++i) { result[i] = acosf(a_array[i]); } return _mm_load_ps(result); @@ -93,7 +93,7 @@ inline __m256 _mm256_sin_ps(__m256 a) { alignas(32) f32 a_array[8], result[8]; _mm256_storeu_ps(a_array, a); - for (int i = 0; i < 8; ++i) { + for (int32 i = 0; i < 8; ++i) { result[i] = sinf(a_array[i]); } return _mm256_load_ps(result); @@ -102,7 +102,7 @@ inline __m256 _mm256_cos_ps(__m256 a) { alignas(32) f32 a_array[8], result[8]; _mm256_storeu_ps(a_array, a); - for (int i = 0; i < 8; ++i) { + for (int32 i = 0; i < 8; ++i) { result[i] = cosf(a_array[i]); } return _mm256_load_ps(result); @@ -111,7 +111,7 @@ inline __m256 _mm256_asin_ps(__m256 a) { alignas(32) f32 a_array[8], result[8]; _mm256_storeu_ps(a_array, a); - for (int i = 0; i < 8; ++i) { + for (int32 i = 0; i < 8; ++i) { result[i] = asinf(a_array[i]); } return _mm256_load_ps(result); @@ -120,7 +120,7 @@ inline __m256 _mm256_acos_ps(__m256 a) { alignas(32) f32 a_array[8], result[8]; _mm256_storeu_ps(a_array, a); - for (int i = 0; i < 16; ++i) { + for (int32 i = 0; i < 16; ++i) { result[i] = acosf(a_array[i]); } return _mm256_load_ps(result); @@ -129,7 +129,7 @@ inline __m512 _mm512_sin_ps(__m512 a) { alignas(64) f32 a_array[8], result[8]; _mm512_storeu_ps(a_array, a); - for (int i = 0; i < 16; ++i) { + for (int32 i = 0; i < 16; ++i) { result[i] = sinf(a_array[i]); } return _mm512_load_ps(result); @@ -138,7 +138,7 @@ inline __m512 _mm512_cos_ps(__m512 a) { alignas(64) f32 a_array[8], result[8]; _mm512_storeu_ps(a_array, a); - for (int i = 0; i < 16; ++i) { + for (int32 i = 0; i < 16; ++i) { result[i] = cosf(a_array[i]); } return _mm512_load_ps(result); @@ -147,7 +147,7 @@ inline __m512 _mm512_asin_ps(__m512 a) { alignas(64) f32 a_array[8], result[8]; _mm512_storeu_ps(a_array, a); - for (int i = 0; i < 16; ++i) { + for (int32 i = 0; i < 16; ++i) { result[i] = asinf(a_array[i]); } return _mm512_load_ps(result); @@ -156,7 +156,7 @@ inline __m512 _mm512_acos_ps(__m512 a) { alignas(64) f32 a_array[16], result[16]; _mm512_storeu_ps(a_array, a); - for (int i = 0; i < 16; ++i) { + for (int32 i = 0; i < 16; ++i) { result[i] = acosf(a_array[i]); } return _mm512_load_ps(result); diff --git a/thread/ThreadJob.h b/thread/ThreadJob.h index 7b8c7f2..e8b17ca 100644 --- a/thread/ThreadJob.h +++ b/thread/ThreadJob.h @@ -13,6 +13,7 @@ #include #include "../stdlib/Types.h" +#include "../memory/ThreadedRingMemory.h" #if _WIN32 #include "../platform/win32/threading/ThreadDefines.h" @@ -20,14 +21,16 @@ #include "../platform/linux/threading/ThreadDefines.h" #endif -struct PoolWorker { - ThreadJobFunc func; - void *arg; - volatile int32 state; - PoolWorker *next; -}; +typedef void (*ThreadPoolJobFunc)(void*); -typedef PoolWorker ThreadJob; +struct PoolWorker { + int32 id; + volatile int32 state; + void* arg; + void* result; + RingMemory ring; + ThreadPoolJobFunc func; +}; struct Worker { volatile int32 state; diff --git a/thread/ThreadPool.h b/thread/ThreadPool.h index 5468284..f47a4fa 100644 --- a/thread/ThreadPool.h +++ b/thread/ThreadPool.h @@ -13,158 +13,120 @@ #include #include "../stdlib/Types.h" +#include "../memory/Queue.h" +#include "../memory/BufferMemory.h" #ifdef _WIN32 #include "../platform/win32/threading/Thread.h" + #include "../platform/win32/threading/Atomic.h" #elif __linux__ #include "../platform/linux/threading/Thread.h" + #include "../platform/linux/threading/Atomic.h" #endif #include "ThreadJob.h" struct ThreadPool { - ThreadJob *work_first; - ThreadJob *work_last; + // This is not a threaded queue since we want to handle the mutex in here, not in the queue for finer control + Queue work_queue; pthread_mutex_t work_mutex; pthread_cond_t work_cond; pthread_cond_t working_cond; - size_t working_cnt; - size_t thread_cnt; + int32 working_cnt; + int32 thread_cnt; int32 size; - bool stop; + int32 state; + + uint32 id_counter; }; -ThreadJob *thread_pool_work_poll(ThreadPool *pool) -{ - if (pool == NULL) { - return NULL; - } - - ThreadJob *work = pool->work_first; - if (work == NULL) { - return NULL; - } - - if (work->next == NULL) { - pool->work_first = NULL; - pool->work_last = NULL; - } else { - pool->work_first = work->next; - } - - return work; -} - static THREAD_RETURN thread_pool_worker(void* arg) { - ThreadPool *pool = (ThreadPool *) arg; - ThreadJob *work; + ThreadPool* pool = (ThreadPool *) arg; + PoolWorker* work; while (true) { pthread_mutex_lock(&pool->work_mutex); - - while (pool->work_first == NULL && !pool->stop) { + while (queue_is_empty(&pool->work_queue) && !pool->state) { pthread_cond_wait(&pool->work_cond, &pool->work_mutex); } - if (pool->stop) { + if (pool->state == 1) { + pthread_mutex_unlock(&pool->work_mutex); + break; } - work = thread_pool_work_poll(pool); - ++(pool->working_cnt); + work = (PoolWorker *) queue_dequeue_keep(&pool->work_queue, sizeof(PoolWorker), 64); pthread_mutex_unlock(&pool->work_mutex); - if (work != NULL) { - work->func(work); + if (!work) { + continue; } - pthread_mutex_lock(&pool->work_mutex); - --(pool->working_cnt); + atomic_increment(&pool->working_cnt); + atomic_set(&work->state, 2); + work->func(work); + atomic_set(&work->state, 1); - if (!pool->stop && pool->working_cnt == 0 && pool->work_first == NULL) { + // Job gets marked after completion -> can be overwritten now + if (atomic_get(&work->id) == -1) { + atomic_set(&work->id, 0); + } + + atomic_decrement(&pool->working_cnt); + + if (atomic_get(&pool->state) == 0 && atomic_get(&pool->working_cnt) == 0) { pthread_cond_signal(&pool->working_cond); } - - pthread_mutex_unlock(&pool->work_mutex); } - --(pool->thread_cnt); pthread_cond_signal(&pool->working_cond); - pthread_mutex_unlock(&pool->work_mutex); + atomic_decrement(&pool->thread_cnt); return NULL; } -ThreadPool *thread_pool_create(size_t num, ThreadPool* pool) +void thread_pool_create(ThreadPool* pool, BufferMemory* buf, int32 thread_count) { - pthread_t thread; - size_t i; + queue_init(&pool->work_queue, buf, 64, sizeof(PoolWorker), 64); - if (num == 0) { - num = 2; - } - - pool->thread_cnt = num; + pool->thread_cnt = thread_count; // @todo switch from pool mutex and pool cond to threadjob mutex/cond - // thread_pool_wait etc. should just itereate over all mutexes + // thread_pool_wait etc. should just iterate over all mutexes pthread_mutex_init(&pool->work_mutex, NULL); pthread_cond_init(&pool->work_cond, NULL); pthread_cond_init(&pool->working_cond, NULL); - pool->work_first = NULL; - pool->work_last = NULL; - - for (i = 0; i < num; ++i) { + pthread_t thread; + for (pool->size = 0; pool->size < thread_count; ++pool->size) { pthread_create(&thread, NULL, thread_pool_worker, pool); - ++(pool->size); - pthread_detach(thread); } - - return pool; } -void thread_pool_wait(ThreadPool *pool) +void thread_pool_wait(ThreadPool* pool) { - if (pool == NULL) { - return; - } - pthread_mutex_lock(&pool->work_mutex); - - while (true) { - if ((!pool->stop && pool->working_cnt != 0) || (pool->stop && pool->thread_cnt != 0)) { - pthread_cond_wait(&pool->working_cond, &pool->work_mutex); - } else { - break; - } + while ((!pool->state && pool->working_cnt != 0) || (pool->state && pool->thread_cnt != 0)) { + pthread_cond_wait(&pool->working_cond, &pool->work_mutex); } - pthread_mutex_unlock(&pool->work_mutex); } -void thread_pool_destroy(ThreadPool *pool) +void thread_pool_destroy(ThreadPool* pool) { - if (pool == NULL) { - return; - } + // This sets the queue to empty + atomic_set((void **) &pool->work_queue.tail, (void **) &pool->work_queue.head); - pthread_mutex_lock(&pool->work_mutex); - ThreadJob *work = pool->work_first; + // This sets the state to "shutdown" + atomic_set(&pool->state, 1); - while (work != NULL) { - work = work->next; - } - - pool->stop = true; pthread_cond_broadcast(&pool->work_cond); - pthread_mutex_unlock(&pool->work_mutex); - thread_pool_wait(pool); pthread_mutex_destroy(&pool->work_mutex); @@ -172,25 +134,58 @@ void thread_pool_destroy(ThreadPool *pool) pthread_cond_destroy(&pool->working_cond); } -ThreadJob* thread_pool_add_work(ThreadPool *pool, ThreadJob* job) +PoolWorker* thread_pool_add_work(ThreadPool* pool, const PoolWorker* job) { - if (pool == NULL || job == NULL) { + pthread_mutex_lock(&pool->work_mutex); + PoolWorker* temp_job = (PoolWorker *) ring_get_memory_nomove(&pool->work_queue, sizeof(PoolWorker), 64); + if (atomic_get(&temp_job->id) > 0) { + pthread_mutex_unlock(&pool->work_mutex); + ASSERT_SIMPLE(temp_job->id == 0); + return NULL; } - pthread_mutex_lock(&pool->work_mutex); - if (pool->work_first == NULL) { - pool->work_first = job; - pool->work_last = pool->work_first; - } else { - pool->work_last->next = job; - pool->work_last = job; + memcpy(temp_job, job, sizeof(PoolWorker)); + ring_move_pointer(&pool->work_queue, &pool->work_queue.head, sizeof(PoolWorker), 64); + + if (temp_job->id == 0) { + temp_job->id = atomic_add_fetch(&pool->id_counter, 1); } pthread_cond_broadcast(&pool->work_cond); pthread_mutex_unlock(&pool->work_mutex); - return job; + return temp_job; } +// This is basically the same as thread_pool_add_work but allows us to directly write into the memory in the caller +// This makes it faster, since we can avoid a memcpy +PoolWorker* thread_pool_add_work_start(ThreadPool* pool) +{ + pthread_mutex_lock(&pool->work_mutex); + + PoolWorker* temp_job = (PoolWorker *) queue_enqueue_start(&pool->work_queue, sizeof(PoolWorker), 64); + if (atomic_get(&temp_job->id) > 0) { + pthread_mutex_unlock(&pool->work_mutex); + ASSERT_SIMPLE(temp_job->id == 0); + + return NULL; + } + + if (temp_job->id == 0) { + // +1 because otherwise the very first job would be id = 0 which is not a valid id + temp_job->id = atomic_add_fetch(&pool->id_counter, 1) + 1; + } + + return temp_job; +} + +void thread_pool_add_work_end(ThreadPool* pool) +{ + queue_enqueue_end(&pool->work_queue, sizeof(PoolWorker), 64); + pthread_cond_broadcast(&pool->work_cond); + pthread_mutex_unlock(&pool->work_mutex); +} + + #endif \ No newline at end of file diff --git a/ui/UITheme.h b/ui/UITheme.h index 9e48dc1..54a566c 100644 --- a/ui/UITheme.h +++ b/ui/UITheme.h @@ -108,9 +108,14 @@ int compare_by_attribute_id(const void* a, const void* b) { // WARNING: theme needs to have memory already reserved and assigned to data void theme_from_file_txt( UIThemeStyle* theme, - byte* data + const char* path, + RingMemory* ring ) { - char* pos = (char *) data; + FileBody file; + file_read(path, &file, ring); + ASSERT_SIMPLE(file.size); + + char* pos = (char *) file.content; // move past the version string pos += 8; @@ -150,11 +155,11 @@ void theme_from_file_txt( UIAttributeGroup* temp_group = NULL; - pos = (char *) data; + pos = (char *) file.content; pos += 8; // move past version while (*pos != '\0') { - str_skip_empty(&pos); + str_skip_whitespace(&pos); if (*pos == '\n') { ++pos; @@ -200,7 +205,7 @@ void theme_from_file_txt( str_copy_move_until(&pos, attribute_name, " :\n", sizeof(" :\n") - 1); - // Skip any white spaces or other delimeters + // Skip any white spaces or other delimeter str_skip_list(&pos, " \t:", sizeof(" \t:") - 1); ASSERT_SIMPLE((*pos != '\0' && *pos != '\n')); @@ -394,9 +399,9 @@ void theme_from_file_txt( // The size of theme->data should be the file size. // Yes, this means we have a little too much data but not by a lot -void theme_from_file( - UIThemeStyle* theme, - const byte* data +int32 theme_from_data( + const byte* data, + UIThemeStyle* theme ) { const byte* pos = data; @@ -445,13 +450,15 @@ void theme_from_file( entry = entry->next; } } + + return (int32) (pos - data); } // Calculates the maximum theme size // Not every group has all the attributes (most likely only a small subset) // However, an accurate calculation is probably too slow and not needed most of the time inline -int64 theme_size(const UIThemeStyle* theme) +int64 theme_data_size(const UIThemeStyle* theme) { return hashmap_size(&theme->hash_map) + theme->hash_map.buf.count * UI_ATTRIBUTE_TYPE_SIZE * sizeof(UIAttribute); @@ -472,20 +479,11 @@ int64 theme_size(const UIThemeStyle* theme) // attributes ... // attributes ... -void theme_to_file( - RingMemory* ring, - const char* path, - const UIThemeStyle* theme +int32 theme_to_data( + const UIThemeStyle* theme, + byte* data ) { - FileBody file; - - // Temporary file size for buffer - // @todo This is a bad placeholder, The problem is we don't know how much we actually need without stepping through the elements - // I also don't want to add a size variable to the theme as it is useless in all other cases - file.size = theme_size(theme); - - file.content = ring_get_memory(ring, file.size, 64, true); - byte* pos = file.content; + byte* pos = data; // version *((int32 *) pos) = SWAP_ENDIAN_LITTLE(theme->version); @@ -497,7 +495,7 @@ void theme_to_file( // theme data // Layout: first save the size of the group, then save the individual attributes - for (int32 i = 0; i < theme->hash_map.buf.count; ++i) { + for (uint32 i = 0; i < theme->hash_map.buf.count; ++i) { if (!theme->hash_map.table[i]) { continue; } @@ -530,8 +528,7 @@ void theme_to_file( } } - file.size = pos - file.content; - file_write(path, &file); + return (int32) (pos - data); } #endif \ No newline at end of file diff --git a/utils/MathUtils.h b/utils/MathUtils.h index 926998b..9e2e0ca 100644 --- a/utils/MathUtils.h +++ b/utils/MathUtils.h @@ -27,6 +27,9 @@ #define CEIL_DIV(a, b) (((a) + (b) - 1) / (b)) #define OMS_CEIL(x) ((x) == (int)(x) ? (int)(x) : ((x) > 0 ? (int)(x) + 1 : (int)(x))) +// Modulo function when b is a power of 2 +#define MODULO_2(a, b) ((a) & (b - 1)) + #define SQRT_2 1.4142135623730950488016887242097f #endif diff --git a/utils/RandomUtils.h b/utils/RandomUtils.h new file mode 100644 index 0000000..808ce16 --- /dev/null +++ b/utils/RandomUtils.h @@ -0,0 +1,81 @@ +/** + * Jingga + * + * @copyright Jingga + * @license OMS License 2.0 + * @version 1.0.0 + * @link https://jingga.app + */ +#ifndef TOS_UTILS_RANDOM_H +#define TOS_UTILS_RANDOM_H + +#include +#include "../stdlib/Types.h" + +global_persist uint32 fast_seed; +#define FAST_RAND_MAX 32767 + +inline +uint32 fast_rand1(void) { + fast_seed = (214013 * fast_seed + 2531011); + + return (fast_seed >> 16) & 0x7FFF; +} + +uint32 fast_rand2(uint32* state) { + uint32 x = *state; + + x ^= x << 13; + x ^= x >> 17; + x ^= x << 5; + + *state = x; + + return x; +} + +inline +f32 fast_rand_percentage(void) { + return (f32) fast_rand1() / (f32) FAST_RAND_MAX; +} + +/** + * Picks n random elements from end and stores them in begin. + */ +inline +void random_unique(int32* array, int32 size) { + for (int32 i = size - 1; i > 0; --i) { + int32 j = rand() % (i + 1); + + int32 temp = array[i]; + array[i] = array[j]; + array[j] = temp; + } +} + +/** + * Gets random index based value probability + */ +int32 random_weighted_index(const int32* arr, int32 array_count) +{ + uint32 prob_sum = 0; + for (int32 i = 0; i < array_count; ++i) { + prob_sum += arr[i]; + } + + uint32 random_prob = rand() % (prob_sum + 1); + uint32 current_rarity = 0; + int32 item_rarity = array_count - 1; + for (int32 i = 0; i < array_count - 1; ++i) { + current_rarity += arr[i]; + + if (current_rarity < random_prob) { + item_rarity = i; + break; + } + } + + return item_rarity; +} + +#endif \ No newline at end of file diff --git a/utils/StringUtils.h b/utils/StringUtils.h index 29d5564..b8d8c34 100644 --- a/utils/StringUtils.h +++ b/utils/StringUtils.h @@ -10,6 +10,7 @@ #define TOS_UTILS_STRING_UTILS_H #include +#include #include #include @@ -430,7 +431,8 @@ void str_replace(const char* str, const char* __restrict search, const char* __r memcpy(result_ptr, replace, replace_len); result_ptr += replace_len; - str = current + search_len; + current += search_len; + str = current; } strcpy(result_ptr, str); @@ -709,4 +711,16 @@ void hexstr_to_rgba(v4_f32* rgba, const char* hex) rgba->a = (f32) (value & 0xFF) / 255.0f; } +inline constexpr +void str_pad(const char* input, char* output, char pad, size_t len) { + size_t i = 0; + for (; i < len && input[i] != '\0'; ++i) { + output[i] = input[i]; + } + + for (; i < len; ++i) { + output[i] = pad; + } +} + #endif \ No newline at end of file diff --git a/utils/Utils.h b/utils/Utils.h index 0457c17..a032bd4 100644 --- a/utils/Utils.h +++ b/utils/Utils.h @@ -10,7 +10,6 @@ #define TOS_UTILS_H #include - #include "../stdlib/Types.h" struct FileBody { @@ -18,76 +17,11 @@ struct FileBody { byte* content; }; -global_persist uint32 fast_seed; -#define FAST_RAND_MAX 32767 - -inline -uint32 fast_rand1(void) { - fast_seed = (214013 * fast_seed + 2531011); - - return (fast_seed >> 16) & 0x7FFF; -} - -uint32 fast_rand2(uint32* state) { - uint32 x = *state; - - x ^= x << 13; - x ^= x >> 17; - x ^= x << 5; - - *state = x; - - return x; -} - -inline -f32 fast_rand_percentage(void) { - return (f32) fast_rand1() / (f32) FAST_RAND_MAX; -} - -/** - * Picks n random elements from end and stores them in begin. - */ -inline -void random_unique(int32* array, int32 size) { - for (int32 i = size - 1; i > 0; --i) { - int32 j = rand() % (i + 1); - - int32 temp = array[i]; - array[i] = array[j]; - array[j] = temp; - } -} - -/** - * Gets random index based value probability - */ -int random_weighted_index(const int32* arr, int32 array_count) -{ - uint32 prob_sum = 0; - for (int32 i = 0; i < array_count; ++i) { - prob_sum += arr[i]; - } - - uint32 random_prob = rand() % (prob_sum + 1); - uint32 current_rarity = 0; - int32 item_rarity = array_count - 1; - for (int32 i = 0; i < array_count - 1; ++i) { - current_rarity += arr[i]; - - if (current_rarity < random_prob) { - item_rarity = i; - break; - } - } - - return item_rarity; -} - +// @question Do we want to make the size comparison a step variable? bool is_equal_aligned(const byte* region1, const byte* region2, uint64 size) { while (size > 4) { - if (*(const int32_t*) region1 != *(const int32_t*) region2) { + if (*(const int32 *) region1 != *(const int32 *) region2) { return false; } @@ -108,4 +42,27 @@ bool is_equal_aligned(const byte* region1, const byte* region2, uint64 size) return true; } +// @question Do we want to make the size comparison a step variable? +bool is_empty(const byte* region, uint64 size) +{ + while (size > 4) { + if (*(const int32 *) region != 0) { + return false; + } + + region += 4; + size -= 4; + } + + for (; size > 0; --size) { + if (region != 0) { + return false; + } + + ++region; + } + + return true; +} + #endif \ No newline at end of file