diff --git a/asset/AssetArchive.h b/asset/AssetArchive.h index 4bcd24e..9c5fe4a 100644 --- a/asset/AssetArchive.h +++ b/asset/AssetArchive.h @@ -73,6 +73,8 @@ struct AssetArchive { // If not remove MMFHandle mmf; + // This is used to tell the asset archive in which AssetManagementSystem (AMS) which asset type is located. + // Remember, many AMS only contain one asset type (e.g. image, audio, ...) int32 asset_type_map[ASSET_TYPE_SIZE]; }; @@ -182,24 +184,19 @@ Asset* asset_archive_asset_load(const AssetArchive* archive, int32 id, AssetMana { // @todo add calculation from element->type to ams index - AssetArchiveElement* element = &archive->header.asset_element[id]; + // We have to mask 0x00FFFFFF since the highest bits define the archive id, not the element id + AssetArchiveElement* element = &archive->header.asset_element[id & 0x00FFFFFF]; AssetManagementSystem* ams = &ams_array[archive->asset_type_map[element->type]]; // @todo This is a little bit stupid, reconsider - char id_str[5]; - id_str[4] = '\0'; - *((int32 *) id_str) = id; - - uint64 hash = hash_djb2(id_str); + char id_str[32]; + _itoa(id, id_str, 16); Asset* asset; // @performance I think we could optimize the ams_reserver_asset in a way so we don't have to lock it the entire time pthread_mutex_lock(&ams->mutex); - // @bug If we have multiple archive files the ids also repeat, which is not possible for the hash map - // Possible solution: also store a string name for every asset. This would add HASH_MAP_MAX_KEY_LENGTH bytes of data to every asset though (see hash map key size = 32) - - asset = ams_get_asset(ams, id_str, hash); + asset = ams_get_asset(ams, id_str); if (asset) { // Asset already loaded pthread_mutex_unlock(&ams->mutex); @@ -238,17 +235,15 @@ Asset* asset_archive_asset_load(const AssetArchive* archive, int32 id, AssetMana Texture* texture = (Texture *) asset->self; texture->image.pixels = (byte *) (texture + 1); + // @todo implement qoi encoding image_from_data(file.content, &texture->image); asset->vram_size = texture->image.pixel_count * image_pixel_size_from_type(texture->image.pixel_type); asset->ram_size = asset->vram_size + sizeof(Texture); #if OPENGL - // @bug I think order_rows has the wrong value - if (texture->image.order_rows == IMAGE_ROW_ORDER_TOP_TO_BOTTOM) { - image_flip_vertical(ring, &texture->image); - texture->image.order_rows = IMAGE_ROW_ORDER_BOTTOM_TO_TOP; - } + image_flip_vertical(ring, &texture->image); + texture->image.order_rows = IMAGE_ROW_ORDER_BOTTOM_TO_TOP; #endif } break; case ASSET_TYPE_AUDIO: { @@ -289,6 +284,7 @@ Asset* asset_archive_asset_load(const AssetArchive* archive, int32 id, AssetMana pthread_mutex_unlock(&ams->mutex); // @performance maybe do in worker threads? This just feels very slow + // @question dependencies might be stored in different archives? for (uint32 i = 0; i < element->dependency_count; ++i) { asset_archive_asset_load(archive, id, ams, ring); } diff --git a/asset/AssetManagementSystem.h b/asset/AssetManagementSystem.h index 025b1e7..e99b746 100644 --- a/asset/AssetManagementSystem.h +++ b/asset/AssetManagementSystem.h @@ -84,7 +84,7 @@ void ams_create(AssetManagementSystem* ams, byte* buf, int32 chunk_size, int32 c // setup asset_memory ams->asset_memory.count = count; ams->asset_memory.chunk_size = sizeof(Asset); - ams->asset_memory.last_pos = -1; + ams->asset_memory.last_pos = 0; ams->asset_memory.alignment = 64; ams->asset_memory.memory = buf; ams->asset_memory.free = (uint64 *) (ams->asset_memory.memory + ams->asset_memory.chunk_size * count); @@ -92,7 +92,7 @@ void ams_create(AssetManagementSystem* ams, byte* buf, int32 chunk_size, int32 c // setup asset_data_memory ams->asset_data_memory.count = count; ams->asset_data_memory.chunk_size = chunk_size; - ams->asset_data_memory.last_pos = -1; + ams->asset_data_memory.last_pos = 0; ams->asset_data_memory.alignment = 64; ams->asset_data_memory.memory = (byte *) (ams->asset_memory.free + CEIL_DIV(count, 64)); ams->asset_data_memory.free = (uint64 *) (ams->asset_data_memory.memory + ams->asset_data_memory.chunk_size * count); @@ -204,8 +204,8 @@ Asset* ams_get_asset(AssetManagementSystem* ams, const char* key) ); DEBUG_MEMORY_READ( - (uint64) (entry ? (Asset *) entry->value : 0), - entry ? ((Asset *) entry->value)->self + ((Asset *) entry->value)->ram_size : 0 + (uint64) (entry ? ((Asset *) entry->value)->self : 0), + entry ? ((Asset *) entry->value)->ram_size : 0 ); return entry ? (Asset *) entry->value : NULL; @@ -222,8 +222,8 @@ Asset* ams_get_asset(AssetManagementSystem* ams, const char* key, uint64 hash) ); DEBUG_MEMORY_READ( - (uint64) (entry ? (Asset *) entry->value : 0), - entry ? ((Asset *) entry->value)->self + ((Asset *) entry->value)->ram_size : 0 + (uint64) (entry ? ((Asset *) entry->value)->self : 0), + entry ? ((Asset *) entry->value)->ram_size : 0 ); return entry ? (Asset *) entry->value : NULL; diff --git a/audio/AudioMixer.h b/audio/AudioMixer.h index 0e8b68f..89f79d5 100644 --- a/audio/AudioMixer.h +++ b/audio/AudioMixer.h @@ -31,22 +31,23 @@ enum AudioEffect { AUDIO_EFFECT_NONE, - AUDIO_EFFECT_ECHO = 1, - AUDIO_EFFECT_REVERB = 2, - AUDIO_EFFECT_UNDERWATER = 4, - AUDIO_EFFECT_CAVE = 8, - AUDIO_EFFECT_LOWPASS = 16, - AUDIO_EFFECT_HIGHPASS = 32, - AUDIO_EFFECT_FLANGER = 64, - AUDIO_EFFECT_TREMOLO = 128, - AUDIO_EFFECT_DISTORTION = 256, - AUDIO_EFFECT_CHORUS = 512, - AUDIO_EFFECT_PITCH_SHIFT = 1024, - AUDIO_EFFECT_GRANULAR_DELAY = 2048, - AUDIO_EFFECT_FM = 4096, - AUDIO_EFFECT_STEREO_PANNING = 8192, - AUDIO_EFFECT_EASE_IN = 16384, - AUDIO_EFFECT_EASE_OUT = 32768, + AUDIO_EFFECT_ECHO = 1 << 0, + AUDIO_EFFECT_REVERB = 1 << 1, + AUDIO_EFFECT_UNDERWATER = 1 << 2, + AUDIO_EFFECT_CAVE = 1 << 3, + AUDIO_EFFECT_LOWPASS = 1 << 4, + AUDIO_EFFECT_HIGHPASS = 1 << 5, + AUDIO_EFFECT_FLANGER = 1 << 6, + AUDIO_EFFECT_TREMOLO = 1 << 7, + AUDIO_EFFECT_DISTORTION = 1 << 8, + AUDIO_EFFECT_CHORUS = 1 << 9, + AUDIO_EFFECT_PITCH_SHIFT = 1 << 10, + AUDIO_EFFECT_GRANULAR_DELAY = 1 << 11, + AUDIO_EFFECT_FM = 1 << 12, + AUDIO_EFFECT_STEREO_PANNING = 1 << 13, + AUDIO_EFFECT_EASE_IN = 1 << 14, + AUDIO_EFFECT_EASE_OUT = 1 << 15, + AUDIO_EFFECT_SPEED = 1 << 16, }; struct AudioInstance { @@ -56,7 +57,13 @@ struct AudioInstance { uint32 audio_size; byte* audio_data; + uint64 effect; uint32 sample_index; + byte channels; + bool repeat; + + // @todo How to implement audio that is only supposed to be played after a certain other sound file is finished + // e.g. queueing soundtracks/ambient noise }; enum AudioMixerState { @@ -128,10 +135,13 @@ void audio_mixer_add(AudioMixer* mixer, int64 id, Audio* audio, AudioLocationSet return; } + // @question Do I really want to use audio instance? wouldn't Audio* be sufficient? + // Well AudioInstance is a little bit smaller but is this really worth it, probably yes?! AudioInstance* instance = (AudioInstance *) chunk_get_element(&mixer->audio_instances, index); instance->id = id; instance->audio_size = audio->size; instance->audio_data = audio->data; + instance->channels = audio->channels; if (origin) { memcpy(&instance->origin, origin, sizeof(AudioLocationSetting)); @@ -166,6 +176,41 @@ void audio_mixer_remove(AudioMixer* mixer, int64 id) } } +int32 apply_speed(int16* buffer, uint32 buffer_size, f32 speed) { + if (speed == 1.0f) { + return 0; + } + + // Has to be multiple of 2 to ensure stereo is implemented correctly + uint32 new_size = ROUND_TO_NEAREST((uint32) (buffer_size / speed), 2); + + // Speed up + if (speed > 1.0f) { + for (int32 i = 0; i < new_size; ++i) { + // @bug What if 2 consecutive values fall onto the same int index for stereo. This would break it. + // The problem is, even by doing this as stereo calculation we would still have the same issue just not on the current value but the next loop + uint32 src_index = (uint32) (i * speed); + buffer[i] = buffer[src_index]; + } + + // A speed up reduces the sample_index -> we reduce the data in the buffer + return new_size - buffer_size; + } + + // Slow down + for (int32 i = buffer_size - 1; i > 0; --i) { + uint32 src_index = (uint32) (i * speed); + buffer[i] = buffer[src_index]; + } + + return 0; +} + +// @performance Whenever we handle left and right the same we could half the buffer_size +// This allows us to re-use existing helper variables without re-calculating them for the next loop (e.g. delay below) +// Or, if the multiplier is an int we can even perform the multiplication on int32 through casting instead of 2 operations on int16 +// We might have to adjust some of the values to ensure correct multiplication if possible (e.g. feedback, intensity, ...) +// @todo We probably want to handle left and right channel differently to add some depth void apply_echo(int16* buffer, uint32 buffer_size, f32 delay, f32 feedback, int32 sample_rate) { int32 delay_samples = (int32) (delay * sample_rate); for (uint32 i = delay_samples; i < buffer_size; ++i) { @@ -173,6 +218,7 @@ void apply_echo(int16* buffer, uint32 buffer_size, f32 delay, f32 feedback, int3 } } +// @todo We probably want to handle left and right channel differently to add some depth void apply_reverb(int16* buffer, uint32 buffer_size, f32 intensity) { intensity *= 0.5f; for (uint32 i = 1; i < buffer_size; ++i) { @@ -294,11 +340,93 @@ void apply_lowpass(int16* buffer, uint32 buffer_size, f32 cutoff, int32 sample_r } } -void audio_mixer_mix(AudioMixer* mixer) { - uint32 limit = OMS_MIN( - mixer->settings.sample_buffer_size / mixer->settings.sample_size, - mixer->settings.buffer_size / mixer->settings.sample_size - ); +int32 mixer_effects_mono(AudioMixer* mixer, uint64 effect, int32 samples) +{ + int32 sound_sample_index = 0; + + if (effect & AUDIO_EFFECT_ECHO) { + apply_echo(mixer->buffer_temp, samples * 2, 0.2f, 0.4f, mixer->settings.sample_rate); + } + + if (effect & AUDIO_EFFECT_REVERB) { + apply_reverb(mixer->buffer_temp, samples * 2, 0.3f); + } + + if (effect & AUDIO_EFFECT_UNDERWATER) { + apply_underwater(mixer->buffer_temp, samples * 2); + } + + if (effect & AUDIO_EFFECT_CAVE) { + apply_cave(mixer->buffer_temp, samples * 2, mixer->settings.sample_rate); + } + + if (effect & AUDIO_EFFECT_LOWPASS) { + apply_lowpass(mixer->buffer_temp, samples * 2, 500.0f, mixer->settings.sample_rate); // Cutoff frequency 500 + } + + if (effect & AUDIO_EFFECT_HIGHPASS) { + apply_highpass(mixer->buffer_temp, samples * 2, 2000.0f, mixer->settings.sample_rate); // Cutoff frequency 2 kHz + } + + if (effect & AUDIO_EFFECT_FLANGER) { + apply_flanger(mixer->buffer_temp, samples * 2, 0.25f, 0.005f, mixer->settings.sample_rate); + } + + if (effect & AUDIO_EFFECT_TREMOLO) { + apply_tremolo(mixer->buffer_temp, samples * 2, 5.0f, 0.8f, mixer->settings.sample_rate); + } + + if (effect & AUDIO_EFFECT_DISTORTION) { + apply_distortion(mixer->buffer_temp, samples * 2, 10.0f); + } + + if (effect & AUDIO_EFFECT_CHORUS) { + apply_chorus(mixer->buffer_temp, samples * 2, 0.25f, 0.005f, mixer->settings.sample_rate); + } + + if (effect & AUDIO_EFFECT_PITCH_SHIFT) { + apply_pitch_shift(mixer->buffer_temp, samples * 2, 1.2f); // Slight pitch increase + } + + if (effect & AUDIO_EFFECT_GRANULAR_DELAY) { + apply_granular_delay(mixer->buffer_temp, samples * 2, 0.1f, 0.2f, mixer->settings.sample_rate); + } + + if (effect & AUDIO_EFFECT_FM) { + apply_frequency_modulation(mixer->buffer_temp, samples * 2, 2.0f, 0.5f, mixer->settings.sample_rate); + } + + if (effect & AUDIO_EFFECT_STEREO_PANNING) { + apply_stereo_panning(mixer->buffer_temp, samples * 2, 0.5f); + } + + /* + if (effect & AUDIO_EFFECT_EASE_IN) { + apply_ease_in(mixer->buffer_temp, samples * 2, 0.5f); + } + + if (effect & AUDIO_EFFECT_EASE_IN) { + apply_ease_out(mixer->buffer_temp, samples * 2, 0.5f); + } + */ + + if (effect & AUDIO_EFFECT_SPEED) { + sound_sample_index += apply_speed(mixer->buffer_temp, samples * 2, 1.0f); + } + + return sound_sample_index; +} + +int32 mixer_effects_stereo() +{ + return 0; +} + +void audio_mixer_mix(AudioMixer* mixer, uint32 size) { + memset(mixer->settings.buffer, 0, size); + + mixer->settings.sample_buffer_size = 0; + uint32 limit_max = size / mixer->settings.sample_size; bool has_location = !is_empty((byte *) &mixer->camera.audio_location, sizeof(mixer->camera.audio_location)); @@ -310,6 +438,8 @@ void audio_mixer_mix(AudioMixer* mixer) { continue; } + uint32 limit = limit_max; + // Compute the vector from the player to the sound's origin v3_f32 to_sound = {}; f32 total_attenuation = 1.0f; @@ -331,105 +461,93 @@ void audio_mixer_mix(AudioMixer* mixer) { } uint32 sound_sample_count = sound->audio_size / mixer->settings.sample_size; - uint32 sound_sample_index = sound->sample_index; + int32 sound_sample_index = sound->sample_index; int16* audio_data = (int16 *) sound->audio_data; // Temporary buffer for effects processing // @performance If there are situations where only one file exists in the mixer that should be played we could directly write to // the output buffer improving the performance. Some of those mixers are: music, cinematic, ui // Careful, NOT voice since we will probably manually layer them according to their position? - for (int32 j = 0; j < limit; ++j) { - if (sound_sample_index >= sound_sample_count) { - // @todo if repeat we need to handle part of it here, else quit + if (sound->channels == 1) { + // We make it stereo + for (int32 j = 0; j < limit; ++j) { + if (sound_sample_index >= sound_sample_count) { + if (!sound->repeat) { + limit = j; + break; + } - sound_sample_index = 0; + sound_sample_index = 0; + } - // @question why are we doing this? - mixer->settings.sample_index = 0; + // We could make the temp buffer stereo here but we later on have to touch the array anyways. + // This way we can easily perform mixer effects on a mono output. + mixer->buffer_temp[j] = (int16) (audio_data[sound_sample_index] * volume_scale * total_attenuation); + + ++sound_sample_index; + + // @performance Some adjustments could be made right here the question is if this is faster. + // Probably depends on how likely the adjustment is to happen. Orientation effects are probably very likely. } - mixer->buffer_temp[j * 2] = (int16) (audio_data[sound_sample_index * 2] * volume_scale * total_attenuation); - mixer->buffer_temp[j * 2 + 1] = (int16) (audio_data[sound_sample_index * 2 + 1] * volume_scale * total_attenuation); + // Apply effects based on sound's effect type + if (sound->effect) { + int32 sample_adjustment = mixer_effects_mono(mixer, sound->effect, sound_sample_index); + sound_sample_index += sample_adjustment; + limit += sample_adjustment; + } + } else { + for (int32 j = 0; j < limit; ++j) { + if (sound_sample_index >= sound_sample_count) { + if (!sound->repeat) { + limit = j; + break; + } - ++sound_sample_index; + sound_sample_index = 0; + } - // @performance Some adjustments could be made right here the question is if this is faster. - // Probably depends on how likely the adjustment is to happen. + mixer->buffer_temp[j * 2] = (int16) (audio_data[sound_sample_index * 2] * volume_scale * total_attenuation); + mixer->buffer_temp[j * 2 + 1] = (int16) (audio_data[sound_sample_index * 2 + 1] * volume_scale * total_attenuation); - // @todo if end of file and no repeat -> remove from list - } + ++sound_sample_index; - // @question We also have to set setting->sample_index = sound_sample_index. - // But that currently happens in the sound api. Do we want to keep it there or move it here - - // Apply effects based on sound's effect type - // @performance Depending on how we implement effects we could even pull them out of this loop - // What I mean is effects could either be sound file dependent (current location correct) or mixer dependent - if (mixer->effect) { - if (mixer->effect & AUDIO_EFFECT_ECHO) { - apply_echo(mixer->buffer_temp, limit, 0.2f, 0.4f, mixer->settings.sample_rate); + // @performance Some adjustments could be made right here the question is if this is faster. + // Probably depends on how likely the adjustment is to happen. Orientation effects are probably very likely. } - if (mixer->effect & AUDIO_EFFECT_REVERB) { - apply_reverb(mixer->buffer_temp, limit, 0.3f); - } - - if (mixer->effect & AUDIO_EFFECT_UNDERWATER) { - apply_underwater(mixer->buffer_temp, limit); - } - - if (mixer->effect & AUDIO_EFFECT_CAVE) { - apply_cave(mixer->buffer_temp, limit, mixer->settings.sample_rate); - } - - if (mixer->effect & AUDIO_EFFECT_LOWPASS) { - apply_lowpass(mixer->buffer_temp, limit, 500.0f, mixer->settings.sample_rate); // Cutoff frequency 500 - } - - if (mixer->effect & AUDIO_EFFECT_HIGHPASS) { - apply_highpass(mixer->buffer_temp, limit, 2000.0f, mixer->settings.sample_rate); // Cutoff frequency 2 kHz - } - - if (mixer->effect & AUDIO_EFFECT_FLANGER) { - apply_flanger(mixer->buffer_temp, limit, 0.25f, 0.005f, mixer->settings.sample_rate); - } - - if (mixer->effect & AUDIO_EFFECT_TREMOLO) { - apply_tremolo(mixer->buffer_temp, limit, 5.0f, 0.8f, mixer->settings.sample_rate); - } - - if (mixer->effect & AUDIO_EFFECT_DISTORTION) { - apply_distortion(mixer->buffer_temp, limit, 10.0f); - } - - if (mixer->effect & AUDIO_EFFECT_CHORUS) { - apply_chorus(mixer->buffer_temp, limit, 0.25f, 0.005f, mixer->settings.sample_rate); - } - - if (mixer->effect & AUDIO_EFFECT_PITCH_SHIFT) { - apply_pitch_shift(mixer->buffer_temp, limit, 1.2f); // Slight pitch increase - } - - if (mixer->effect & AUDIO_EFFECT_GRANULAR_DELAY) { - apply_granular_delay(mixer->buffer_temp, limit, 0.1f, 0.2f, mixer->settings.sample_rate); - } - - if (mixer->effect & AUDIO_EFFECT_FM) { - apply_frequency_modulation(mixer->buffer_temp, limit, 2.0f, 0.5f, mixer->settings.sample_rate); - } - - if (mixer->effect & AUDIO_EFFECT_STEREO_PANNING) { - apply_stereo_panning(mixer->buffer_temp, limit, 0.5f); + // Apply effects based on sound's effect type + if (sound->effect) { + int32 sample_adjustment = mixer_effects_stereo() / 2;; + sound_sample_index += sample_adjustment; + limit += sample_adjustment; } } - // @bug the actual output "limit" could be smaller if sound files end earlier and no repeat is defined - // In that case we would also have to adjust mixer->settings.sample_buffer_size + // @bug if we use speed up effect, this value could be negative. Fix. + sound->sample_index = sound_sample_index; // Add the processed sound to the output buffer - for (uint32 j = 0; j < limit; j++) { - mixer->settings.buffer[j] += mixer->buffer_temp[j]; + if (sound->channels == 1) { + // We turn it stereo here + for (uint32 j = 0; j < limit; ++j) { + mixer->settings.buffer[j * 2] += mixer->buffer_temp[j]; + mixer->settings.buffer[j * 2 + 1] += mixer->buffer_temp[j]; + } + } else { + for (uint32 j = 0; j < limit * 2; ++j) { + mixer->settings.buffer[j] += mixer->buffer_temp[j]; + } } + + mixer->settings.sample_buffer_size = OMS_MAX( + mixer->settings.sample_buffer_size, + limit * mixer->settings.sample_size + ); + } + + if (mixer->effect) { + mixer_effects_stereo(); } } diff --git a/audio/AudioSetting.h b/audio/AudioSetting.h index d1031f6..f1a3985 100644 --- a/audio/AudioSetting.h +++ b/audio/AudioSetting.h @@ -16,10 +16,6 @@ #define SOUND_API_XAUDIO2 1 struct AudioSetting { - // position in the audio data - // WARNING: not the byte position, but the index based on the sample size - uint32 sample_index; - f32 master_volume; // bits per sample diff --git a/font/Font.h b/font/Font.h index 2fe27c1..4416382 100644 --- a/font/Font.h +++ b/font/Font.h @@ -59,11 +59,28 @@ void font_init(Font* font, byte* data, int count) } inline -Glyph* font_glyph_find(Font* font, uint32 codepoint) +Glyph* font_glyph_find(const Font* font, uint32 codepoint) { - for (uint32 i = 0; i < font->glyph_count; ++i) { - if (font->glyphs[i].codepoint == codepoint) { - return &font->glyphs[i]; + int32 perfect_glyph_pos = codepoint - font->glyphs[0].codepoint; + int32 limit = OMS_MIN(perfect_glyph_pos, font->glyph_count - 1); + + // We try to jump to the correct glyph based on the glyph codepoint + if (font->glyphs[limit].codepoint == codepoint) { + return &font->glyphs[limit]; + } + + // If that doesn't work we iterate the glyph list BUT only until the last possible match. + // Glyphs must be sorted ascending. + int32 low = 0; + int32 high = limit; + while (low <= high) { + int32 mid = low + (high - low) / 2; + if (font->glyphs[mid].codepoint == codepoint) { + return &font->glyphs[mid]; + } else if (font->glyphs[mid].codepoint < codepoint) { + low = mid + 1; + } else { + high = mid - 1; } } @@ -254,9 +271,21 @@ int32 font_to_data( return size; } +inline f32 font_line_height(Font* font, f32 size) { return font->line_height * size / font->size; } +inline +void font_invert_coordinates(Font* font) +{ + // @todo Implement y-offset correction + for (uint32 i = 0; i < font->glyph_count; ++i) { + float temp = font->glyphs[i].coords.y1; + font->glyphs[i].coords.y1 = 1.0f - font->glyphs[i].coords.y2; + font->glyphs[i].coords.y2 = 1.0f - temp; + } +} + #endif \ No newline at end of file diff --git a/gpuapi/RenderUtils.h b/gpuapi/RenderUtils.h index 22002b5..fda7006 100644 --- a/gpuapi/RenderUtils.h +++ b/gpuapi/RenderUtils.h @@ -299,8 +299,6 @@ f32 text_calculate_dimensions_width( f32 x = 0; f32 offset_x = 0; - uint32 first_glyph = font->glyphs[0].codepoint; - // @todo remember to restrict to width/height if value > 0 -> force width to remain below certain value for (int32 i = 0; i < length; ++i) { @@ -313,25 +311,7 @@ f32 text_calculate_dimensions_width( continue; } - Glyph* glyph = NULL; - // We try to jump to the correct glyph based on the glyph codepoint - // If that doesn't work we iterate the glyph list BUT only until the last possible match (glyphs must be sorted ascending) - int32 perfect_glyph_pos = character - first_glyph; - if (font->glyph_count > perfect_glyph_pos - && font->glyphs[perfect_glyph_pos].codepoint == character - ) { - glyph = &font->glyphs[perfect_glyph_pos]; - } else { - // @performance consider to do binary search - for (int32 j = 0; j <= perfect_glyph_pos && j < font->glyph_count; ++j) { - if (font->glyphs[j].codepoint == character) { - glyph = &font->glyphs[j]; - - break; - } - } - } - + Glyph* glyph = font_glyph_find(font, character); if (!glyph) { continue; } @@ -353,8 +333,6 @@ void text_calculate_dimensions( f32 offset_x = 0; - uint32 first_glyph = font->glyphs[0].codepoint; - // @todo remember to restrict to width/height if value > 0 -> force width to remain below certain value for (int32 i = 0; i < length; ++i) { @@ -369,25 +347,7 @@ void text_calculate_dimensions( continue; } - Glyph* glyph = NULL; - // We try to jump to the correct glyph based on the glyph codepoint - // If that doesn't work we iterate the glyph list BUT only until the last possible match (glyphs must be sorted ascending) - int32 perfect_glyph_pos = character - first_glyph; - if (font->glyph_count > perfect_glyph_pos - && font->glyphs[perfect_glyph_pos].codepoint == character - ) { - glyph = &font->glyphs[perfect_glyph_pos]; - } else { - // @performance consider to do binary search - for (int32 j = 0; j <= perfect_glyph_pos && j < font->glyph_count; ++j) { - if (font->glyphs[j].codepoint == character) { - glyph = &font->glyphs[j]; - - break; - } - } - } - + Glyph* glyph = font_glyph_find(font, character); if (!glyph) { continue; } @@ -433,10 +393,6 @@ v2_f32 vertex_text_create( } } - uint32 first_glyph = font->glyphs[0].codepoint; - - int32 first_char = is_ascii ? text[0] : utf8_get_char_at(text, 0); - f32 offset_x = x; for (int32 i = 0; i < length; ++i) { int32 character = is_ascii ? text[i] : utf8_get_char_at(text, i); @@ -447,25 +403,7 @@ v2_f32 vertex_text_create( continue; } - Glyph* glyph = NULL; - // We try to jump to the correct glyph based on the glyph codepoint - // If that doesn't work we iterate the glyph list BUT only until the last possible match (glyphs must be sorted ascending) - int32 perfect_glyph_pos = character - first_glyph; - if (font->glyph_count > perfect_glyph_pos - && font->glyphs[perfect_glyph_pos].codepoint == character - ) { - glyph = &font->glyphs[perfect_glyph_pos]; - } else { - // @performance consider to do binary search - for (uint32 j = 0; j <= perfect_glyph_pos && j < font->glyph_count; ++j) { - if (font->glyphs[j].codepoint == character) { - glyph = &font->glyphs[j]; - - break; - } - } - } - + Glyph* glyph = font_glyph_find(font, character); if (!glyph) { continue; } @@ -577,8 +515,6 @@ f32 ui_text_create( } } - uint32 first_glyph = theme->font.glyphs[0].codepoint; - int32 start = *index; f32 offset_x = (f32) x->value_int; f32 offset_y = (f32) y->value_int; @@ -594,25 +530,7 @@ f32 ui_text_create( continue; } - Glyph* glyph = NULL; - // We try to jump to the correct glyph based on the glyph codepoint - // If that doesn't work we iterate the glyph list BUT only until the last possible match (glyphs must be sorted ascending) - int32 perfect_glyph_pos = character - first_glyph; - if (theme->font.glyph_count > perfect_glyph_pos - && theme->font.glyphs[perfect_glyph_pos].codepoint == character - ) { - glyph = &theme->font.glyphs[perfect_glyph_pos]; - } else { - // @performance consider to do binary search - for (int32 j = 0; j <= perfect_glyph_pos && j < theme->font.glyph_count; ++j) { - if (theme->font.glyphs[j].codepoint == character) { - glyph = &theme->font.glyphs[j]; - - break; - } - } - } - + Glyph* glyph = font_glyph_find(&theme->font, character); if (!glyph) { continue; } @@ -721,7 +639,7 @@ void ui_button_create( vertex_text_create( vertices, index, zindex, - x->value_float, y->value_float, width->value_float, height->value_float, align_h->value_float, align_v->value_float, + x->value_float, y->value_float, width->value_float, height->value_float, align_h->value_int, align_v->value_int, &theme->font, text->value_str, size->value_float, color_index->value_float ); diff --git a/gpuapi/opengl/OpenglUtils.h b/gpuapi/opengl/OpenglUtils.h index 53977b9..8f8f904 100644 --- a/gpuapi/opengl/OpenglUtils.h +++ b/gpuapi/opengl/OpenglUtils.h @@ -156,7 +156,7 @@ void texture_use_1D(const Texture* texture, uint32 texture_unit) glBindTexture(GL_TEXTURE_1D, (GLuint) texture->id); } -GLuint shader_make(GLenum type, const char *source, RingMemory* ring) +GLuint shader_make(GLenum type, const char* source, RingMemory* ring) { GLuint shader = glCreateShader(type); glShaderSource(shader, 1, (GLchar **) &source, NULL); diff --git a/image/Image.h b/image/Image.h index f17ce7a..80f2395 100644 --- a/image/Image.h +++ b/image/Image.h @@ -31,6 +31,7 @@ enum PixelType // has_alpha is defined it forces an alpha channel even for bitmaps // order_pixels defines how the pixels should be ordered // order_rows defines how the rows should be ordered +// @question Do we really ever need int32 for width/height? struct Image { uint32 width; uint32 height; diff --git a/image/Qoi.h b/image/Qoi.h new file mode 100644 index 0000000..dc9e65a --- /dev/null +++ b/image/Qoi.h @@ -0,0 +1,230 @@ +/** + * Jingga + * + * @copyright Jingga + * @license OMS License 2.0 + * @version 1.0.0 + * @link https://jingga.app + */ +#ifndef TOS_IMAGE_QOI_H +#define TOS_IMAGE_QOI_H + +#include "../stdlib/Types.h" +#include "../memory/RingMemory.h" + +#define QOI_OP_INDEX 0b00000000 +#define QOI_OP_DIFF 0b01000000 +#define QOI_OP_LUMA 0b10000000 +#define QOI_OP_RUN 0b11000000 // @todo There is a HUGE step from here to QOI_OP_RGB this leaves room for more cases or using this data +#define QOI_OP_RGB 0b11111110 +#define QOI_OP_RGBA 0b11111111 +#define QOI_MASK_2 0b11000000 + +#define QOI_COLOR_HASH(color) (color.r * 3 + color.g * 5 + color.b * 7 + color.a * 11) +#define QOI_COLOR_HASH_2(color) ((((uint32)(color)) * 0x9E3779B1U) >> 26) +#define QOI_HEADER_SIZE 9 + +// @question Do we really ever need int32 for width/height? +struct QoiDescription { + uint32 width; + uint32 height; + byte channels; + byte colorspace; +}; + +uint32 qoi_encode_size(QoiDescription* desc) +{ + return desc->width * desc->height * (desc->channels + 1) + QOI_HEADER_SIZE; +} + +int32 qoi_encode(const byte* data, byte* output, const QoiDescription* desc) { + if (desc->width == 0 || desc->height == 0 || + desc->channels < 3 || desc->channels > 4 || + desc->colorspace > 1 + ) { + return; + } + + int32 p = 0; + *((uint32 *) output[p]) = SWAP_ENDIAN_LITTLE(desc->width); p += 4; + *((uint32 *) output[p]) = SWAP_ENDIAN_LITTLE(desc->height); p += 4; + + // Channel count 1-4 requires 3 bits, colorspace requires 1 bit + output[p++] = ((desc->channels - 1) << 1) | (desc->colorspace & 0x01);; + + v4_byte index[64]; + memset(index, 0, sizeof(index)); + + v4_byte px_prev = {0, 0, 0, 255}; + v4_byte px = px_prev; + + int32 px_len = desc->width * desc->height * desc->channels; + int32 px_end = px_len - desc->channels; + int32 channels = desc->channels; + + int32 run = 0; + for (int32 px_pos = 0; px_pos < px_len; px_pos += channels) { + memcpy(&px, &data[px_pos], channels * sizeof(byte)); + + if (px.v == px_prev.v) { + ++run; + if (run == 62 || px_pos == px_end) { + output[p++] = QOI_OP_RUN | (run - 1); + run = 0; + } + } else { + if (run) { + output[p++] = QOI_OP_RUN | (run - 1); + run = 0; + } + + int32 index_pos = QOI_COLOR_HASH(px) % 64; + //int32 index_pos = QOI_COLOR_HASH_2(px); + + if (index[index_pos].v == px.v) { + output[p++] = QOI_OP_INDEX | index_pos; + } else { + index[index_pos] = px; + + if (px.a == px_prev.a) { + signed char vr = px.r - px_prev.r; + signed char vg = px.g - px_prev.g; + signed char vb = px.b - px_prev.b; + + signed char vg_r = vr - vg; + signed char vg_b = vb - vg; + + if (vr > -3 && vr < 2 + && vg > -3 && vg < 2 + && vb > -3 && vb < 2 + ) { + output[p++] = QOI_OP_DIFF | (vr + 2) << 4 | (vg + 2) << 2 | (vb + 2); + } else if (vg_r > -9 && vg_r < 8 + && vg > -33 && vg < 32 + && vg_b > -9 && vg_b < 8 + ) { + output[p++] = QOI_OP_LUMA | (vg + 32); + output[p++] = (vg_r + 8) << 4 | (vg_b + 8); + } else { + output[p++] = QOI_OP_RGB; + output[p++] = px.r; + output[p++] = px.g; + output[p++] = px.b; + } + } else { + output[p++] = QOI_OP_RGBA; + *((uint32 *) &output[p]) = SWAP_ENDIAN_LITTLE(px.val); + p += 4; + } + } + } + + px_prev = px; + } + + return p; +} + +uint32 qoi_decode_size(QoiDescription* desc, int32 channels) +{ + return desc->width * desc->height * channels; +} + +void qoi_decode(const byte* data, byte* output, int32 steps = 8) +{ + int32 p = 0; + uint32 width = SWAP_ENDIAN_LITTLE(*((uint32 *) &data[p])); p += 4; + uint32 height = SWAP_ENDIAN_LITTLE(*((uint32 *) &data[p])); p += 4; + + // Channel count 1-4 requires 3 bits, colorspace requires 1 bit + int32 colorspace = data[p] & 0x01; + uint32 channels = ((data[p] > 1) & 0x07) + 1; + + uint32 px_len = width * height * channels; + + v4_byte px = {0, 0, 0, 255}; + + v4_byte index[64]; + memset(index, 0, sizeof(index)); + + int32 run = 0; + + for (uint32 px_pos = 0; px_pos < px_len; px_pos += channels) { + int32 b1 = data[p++]; + + if (b1 == QOI_OP_RGB) { + px.r = data[p++]; + px.g = data[p++]; + px.b = data[p++]; + } else if (b1 == QOI_OP_RGBA) { + px.val = SWAP_ENDIAN_LITTLE(*((uint32 *) &data[p])); + p += 4; + } else if ((b1 & QOI_MASK_2) == QOI_OP_INDEX) { + px = index[b1]; + } else if ((b1 & QOI_MASK_2) == QOI_OP_DIFF) { + px.r += ((b1 >> 4) & 0x03) - 2; + px.g += ((b1 >> 2) & 0x03) - 2; + px.b += ( b1 & 0x03) - 2; + } else if ((b1 & QOI_MASK_2) == QOI_OP_LUMA) { + int32 b2 = data[p++]; + int32 vg = (b1 & 0x3f) - 32; + px.r += vg - 8 + ((b2 >> 4) & 0x0f); + px.g += vg; + px.b += vg - 8 + (b2 & 0x0f); + } else if ((b1 & QOI_MASK_2) == QOI_OP_RUN) { + run = (b1 & 0x3f); + + if (channels == 4) { + uint32 px_little_endian = SWAP_ENDIAN_LITTLE(px.val); + int32 pixel_step_size = steps * 4; + int32 i = 0; + + if (steps == 16) { + __m512i simd_value = _mm512_set1_epi32(px_little_endian); + for(; i <= run - steps; i += steps, px_pos += pixel_step_size) { + _mm512_storeu_si512((__m512i *) &output[px_pos], simd_value); + } + } else if (steps >= 8) { + __m256i simd_value = _mm256_set1_epi32(px_little_endian); + for (; i <= run - steps; i += steps, px_pos += pixel_step_size) { + _mm256_storeu_si256((__m256i *) &output[px_pos], simd_value); + } + } else if (steps >= 4) { + __m128i simd_value = _mm_set1_epi32(px_little_endian); + for(; i <= run - steps; i += steps, px_pos += pixel_step_size) { + _mm_storeu_si128((__m128i *) &output[px_pos], simd_value); + } + } + + for (; i < run; ++i) { + output[px_pos] = px_little_endian; + px_pos += channels; + } + } else if (channels == 3) { + for (int32 i = 0; i < run; ++i) { + output[px_pos++] = px.r; + output[px_pos++] = px.g; + output[px_pos++] = px.b; + } + } else if (channels == 1) { + memset(&output[px_pos], px.r, run * sizeof(byte)); + px_pos += run; + } + + // Correction, since the loop increments by channels count as well + px_pos -= channels; + + index[QOI_COLOR_HASH(px) % 64] = px; + //index[QOI_COLOR_HASH_2(px)] = px; + + continue; + } + + index[QOI_COLOR_HASH(px) % 64] = px; + //index[QOI_COLOR_HASH_2(px)] = px; + + memcpy(&output[px_pos], &px, channels * sizeof(byte)); + } +} + +#endif \ No newline at end of file diff --git a/memory/BufferMemory.h b/memory/BufferMemory.h index 8ca0557..ac9d3c3 100644 --- a/memory/BufferMemory.h +++ b/memory/BufferMemory.h @@ -92,7 +92,7 @@ void buffer_reset(BufferMemory* buf) } inline -byte* buffer_get_memory(BufferMemory* buf, uint64 size, int32 aligned = 0, bool zeroed = false) +byte* buffer_get_memory(BufferMemory* buf, uint64 size, int32 aligned = 4, bool zeroed = false) { ASSERT_SIMPLE(size <= buf->size); diff --git a/memory/ChunkMemory.h b/memory/ChunkMemory.h index 86ffc8a..3ab6597 100644 --- a/memory/ChunkMemory.h +++ b/memory/ChunkMemory.h @@ -34,9 +34,9 @@ struct ChunkMemory { uint64 count; uint64 size; - uint64 chunk_size; - int64 last_pos; - int32 alignment; + uint64 last_pos; + uint32 chunk_size; + uint32 alignment; // length = count // free describes which locations are used and which are free @@ -44,7 +44,7 @@ struct ChunkMemory { }; inline -void chunk_alloc(ChunkMemory* buf, uint64 count, uint64 chunk_size, int32 alignment = 64) +void chunk_alloc(ChunkMemory* buf, uint64 count, uint32 chunk_size, int32 alignment = 64) { ASSERT_SIMPLE(chunk_size); ASSERT_SIMPLE(count); @@ -58,7 +58,7 @@ void chunk_alloc(ChunkMemory* buf, uint64 count, uint64 chunk_size, int32 alignm buf->count = count; buf->size = count * chunk_size + sizeof(uint64) * CEIL_DIV(count, 64); buf->chunk_size = chunk_size; - buf->last_pos = -1; + buf->last_pos = 0; buf->alignment = alignment; // @question Could it be beneficial to have this before the element data? @@ -70,7 +70,7 @@ void chunk_alloc(ChunkMemory* buf, uint64 count, uint64 chunk_size, int32 alignm } inline -void chunk_init(ChunkMemory* buf, BufferMemory* data, uint64 count, uint64 chunk_size, int32 alignment = 64) +void chunk_init(ChunkMemory* buf, BufferMemory* data, uint64 count, uint32 chunk_size, int32 alignment = 64) { ASSERT_SIMPLE(chunk_size); ASSERT_SIMPLE(count); @@ -82,7 +82,7 @@ void chunk_init(ChunkMemory* buf, BufferMemory* data, uint64 count, uint64 chunk buf->count = count; buf->size = count * chunk_size + sizeof(uint64) * CEIL_DIV(count, 64); buf->chunk_size = chunk_size; - buf->last_pos = -1; + buf->last_pos = 0; buf->alignment = alignment; // @question Could it be beneficial to have this before the element data? @@ -95,7 +95,7 @@ void chunk_init(ChunkMemory* buf, BufferMemory* data, uint64 count, uint64 chunk } inline -void chunk_init(ChunkMemory* buf, byte* data, uint64 count, uint64 chunk_size, int32 alignment = 64) +void chunk_init(ChunkMemory* buf, byte* data, uint64 count, uint32 chunk_size, int32 alignment = 64) { ASSERT_SIMPLE(chunk_size); ASSERT_SIMPLE(count); @@ -108,7 +108,7 @@ void chunk_init(ChunkMemory* buf, byte* data, uint64 count, uint64 chunk_size, i buf->count = count; buf->size = count * chunk_size + sizeof(uint64) * CEIL_DIV(count, 64); buf->chunk_size = chunk_size; - buf->last_pos = -1; + buf->last_pos = 0; buf->alignment = alignment; // @question Could it be beneficial to have this before the element data? @@ -320,15 +320,15 @@ int64 chunk_dump(const ChunkMemory* buf, byte* data) data += sizeof(buf->size); // Chunk Size - *((uint64 *) data) = SWAP_ENDIAN_LITTLE(buf->chunk_size); + *((uint32 *) data) = SWAP_ENDIAN_LITTLE(buf->chunk_size); data += sizeof(buf->chunk_size); // Last pos - *((int64 *) data) = SWAP_ENDIAN_LITTLE(buf->last_pos); + *((uint64 *) data) = SWAP_ENDIAN_LITTLE(buf->last_pos); data += sizeof(buf->last_pos); // Alignment - *((int32 *) data) = SWAP_ENDIAN_LITTLE(buf->alignment); + *((uint32 *) data) = SWAP_ENDIAN_LITTLE(buf->alignment); data += sizeof(buf->alignment); // All memory is handled in the buffer -> simply copy the buffer @@ -351,15 +351,15 @@ int64 chunk_load(ChunkMemory* buf, const byte* data) data += sizeof(buf->size); // Chunk Size - buf->chunk_size = SWAP_ENDIAN_LITTLE(*((uint64 *) data)); + buf->chunk_size = SWAP_ENDIAN_LITTLE(*((uint32 *) data)); data += sizeof(buf->chunk_size); // Last pos - buf->last_pos = SWAP_ENDIAN_LITTLE(*((int64 *) data)); + buf->last_pos = SWAP_ENDIAN_LITTLE(*((uint64 *) data)); data += sizeof(buf->last_pos); // Alignment - buf->alignment = SWAP_ENDIAN_LITTLE(*((int32 *) data)); + buf->alignment = SWAP_ENDIAN_LITTLE(*((uint32 *) data)); data += sizeof(buf->alignment); memcpy(buf->memory, data, buf->size); diff --git a/memory/Queue.h b/memory/Queue.h index b3e6fcf..e93bcdd 100644 --- a/memory/Queue.h +++ b/memory/Queue.h @@ -10,6 +10,7 @@ #define TOS_MEMORY_QUEUE_H #include "../stdlib/Types.h" +#include "../utils/Utils.h" #include "RingMemory.h" // WARNING: Structure needs to be the same as RingMemory @@ -81,7 +82,7 @@ bool queue_is_full(Queue* queue) { } inline -void queue_enqueue_unique(ThreadedQueue* queue, const byte* data) +void queue_enqueue_unique(Queue* queue, const byte* data) { ASSERT_SIMPLE((uint64_t) data % 4 == 0); @@ -191,7 +192,7 @@ bool queue_dequeue(Queue* queue, byte* data) inline bool queue_dequeue_atomic(Queue* queue, byte* data) { - if (atomic_get_relaxed((uint64 *) &queue->head) == (uint64) queue->tail) { + if (atomic_get_acquire_release((volatile uint64 *) &queue->head) == (uint64) queue->tail) { return false; } diff --git a/memory/RingMemory.h b/memory/RingMemory.h index 8188728..583f502 100644 --- a/memory/RingMemory.h +++ b/memory/RingMemory.h @@ -92,7 +92,7 @@ void ring_init(RingMemory* ring, byte* buf, uint64 size, uint32 alignment = 64) { ASSERT_SIMPLE(size); - ring->memory = (byte *) ROUND_TO_NEAREST((uintptr_t) buf, alignment); + ring->memory = (byte *) ROUND_TO_NEAREST((uintptr_t) buf, (uint64) alignment); ring->end = ring->memory + size; ring->head = ring->memory; @@ -117,7 +117,7 @@ void ring_free(RingMemory* ring) } inline -byte* ring_calculate_position(const RingMemory* ring, uint64 size, byte aligned = 0) +byte* ring_calculate_position(const RingMemory* ring, uint64 size, uint32 aligned = 4) { byte* head = ring->head; @@ -126,7 +126,7 @@ byte* ring_calculate_position(const RingMemory* ring, uint64 size, byte aligned head += (aligned - (address & (aligned - 1))) % aligned; } - size = ROUND_TO_NEAREST(size, aligned); + size = ROUND_TO_NEAREST(size, (uint64) aligned); if (head + size > ring->end) { head = ring->memory; @@ -147,7 +147,7 @@ void ring_reset(RingMemory* ring) } // Moves a pointer based on the size you want to consume (new position = after consuming size) -void ring_move_pointer(RingMemory* ring, byte** pos, uint64 size, byte aligned = 0) +void ring_move_pointer(RingMemory* ring, byte** pos, uint64 size, uint32 aligned = 4) { ASSERT_SIMPLE(size <= ring->size); @@ -160,7 +160,7 @@ void ring_move_pointer(RingMemory* ring, byte** pos, uint64 size, byte aligned = *pos += (aligned - (address& (aligned - 1))) % aligned; } - size = ROUND_TO_NEAREST(size, aligned); + size = ROUND_TO_NEAREST(size, (uint64) aligned); if (*pos + size > ring->end) { *pos = ring->memory; @@ -173,7 +173,7 @@ void ring_move_pointer(RingMemory* ring, byte** pos, uint64 size, byte aligned = *pos += size; } -byte* ring_get_memory(RingMemory* ring, uint64 size, byte aligned = 0, bool zeroed = false) +byte* ring_get_memory(RingMemory* ring, uint64 size, uint32 aligned = 4, bool zeroed = false) { ASSERT_SIMPLE(size <= ring->size); @@ -182,7 +182,7 @@ byte* ring_get_memory(RingMemory* ring, uint64 size, byte aligned = 0, bool zero ring->head += (aligned - (address& (aligned - 1))) % aligned; } - size = ROUND_TO_NEAREST(size, aligned); + size = ROUND_TO_NEAREST(size, (uint64) aligned); if (ring->head + size > ring->end) { ring_reset(ring); @@ -207,7 +207,7 @@ byte* ring_get_memory(RingMemory* ring, uint64 size, byte aligned = 0, bool zero } // Same as ring_get_memory but DOESN'T move the head -byte* ring_get_memory_nomove(RingMemory* ring, uint64 size, byte aligned = 0, bool zeroed = false) +byte* ring_get_memory_nomove(RingMemory* ring, uint64 size, uint32 aligned = 4, bool zeroed = false) { ASSERT_SIMPLE(size <= ring->size); @@ -218,7 +218,7 @@ byte* ring_get_memory_nomove(RingMemory* ring, uint64 size, byte aligned = 0, bo pos += (aligned - (address& (aligned - 1))) % aligned; } - size = ROUND_TO_NEAREST(size, aligned); + size = ROUND_TO_NEAREST(size, (uint64) aligned); if (pos + size > ring->end) { ring_reset(ring); @@ -253,11 +253,10 @@ byte* ring_get_element(const RingMemory* ring, uint64 element_count, uint64 elem * Checks if one additional element can be inserted without overwriting the tail index */ inline -bool ring_commit_safe(const RingMemory* ring, uint64 size, byte aligned = 0) +bool ring_commit_safe(const RingMemory* ring, uint64 size, uint32 aligned = 4) { // aligned * 2 since that should be the maximum overhead for an element - // @bug could this result in a case where the ring is considered empty/full (false positive/negative)? - // The "correct" version would probably to use ring_move_pointer in some form + // This is not 100% correct BUT it is way faster than any correct version I can come up with uint64 max_mem_required = size + aligned * 2; if (ring->tail < ring->head) { @@ -271,15 +270,17 @@ bool ring_commit_safe(const RingMemory* ring, uint64 size, byte aligned = 0) } inline -bool ring_commit_safe_atomic(const RingMemory* ring, uint64 size, byte aligned = 0) +bool ring_commit_safe_atomic(const RingMemory* ring, uint64 size, uint32 aligned = 4) { // aligned * 2 since that should be the maximum overhead for an element - // @bug could this result in a case where the ring is considered empty/full (false positive/negative)? - // The "correct" version would probably to use ring_move_pointer in some form + // This is not 100% correct BUT it is way faster than any correct version I can come up with uint64 max_mem_required = size + aligned * 2; + // @todo consider to switch to uintptr_t uint64 tail = atomic_get_relaxed((uint64 *) &ring->tail); - uint64 head = atomic_get_relaxed((uint64 *) &ring->head); + + // This doesn't have to be atomic since we assume single producer/consumer and a commit is performed by the consumer + uint64 head = (uint64) ring->head; if (tail < head) { return ((uint64) (ring->end - head)) > max_mem_required @@ -291,18 +292,6 @@ bool ring_commit_safe_atomic(const RingMemory* ring, uint64 size, byte aligned = } } -inline -void ring_force_head_update(const RingMemory* ring) -{ - _mm_clflush(ring->head); -} - -inline -void ring_force_tail_update(const RingMemory* ring) -{ - _mm_clflush(ring->tail); -} - inline int64 ring_dump(const RingMemory* ring, byte* data) { diff --git a/memory/ThreadedChunkMemory.h b/memory/ThreadedChunkMemory.h index 9987d4b..d1cb426 100644 --- a/memory/ThreadedChunkMemory.h +++ b/memory/ThreadedChunkMemory.h @@ -23,8 +23,8 @@ struct ThreadedChunkMemory { uint64 count; uint64 size; - uint64 chunk_size; int64 last_pos; + uint32 chunk_size; int32 alignment; // length = count diff --git a/memory/ThreadedQueue.h b/memory/ThreadedQueue.h index e21a337..6e7dfd3 100644 --- a/memory/ThreadedQueue.h +++ b/memory/ThreadedQueue.h @@ -51,7 +51,7 @@ struct ThreadedQueue { }; inline -void thrd_queue_alloc(ThreadedQueue* queue, uint32 element_count, uint64 element_size, uint32 alignment = 64) +void thrd_queue_alloc(ThreadedQueue* queue, uint32 element_count, uint32 element_size, uint32 alignment = 64) { element_size = ROUND_TO_NEAREST(element_size, alignment); @@ -67,7 +67,7 @@ void thrd_queue_alloc(ThreadedQueue* queue, uint32 element_count, uint64 element } inline -void thrd_queue_init(ThreadedQueue* queue, BufferMemory* buf, uint32 element_count, uint64 element_size, uint32 alignment = 64) +void thrd_queue_init(ThreadedQueue* queue, BufferMemory* buf, uint32 element_count, uint32 element_size, uint32 alignment = 64) { element_size = ROUND_TO_NEAREST(element_size, alignment); @@ -83,7 +83,7 @@ void thrd_queue_init(ThreadedQueue* queue, BufferMemory* buf, uint32 element_cou } inline -void thrd_queue_init(ThreadedQueue* queue, byte* buf, uint32 element_count, uint64 element_size, uint32 alignment = 64) +void thrd_queue_init(ThreadedQueue* queue, byte* buf, uint32 element_count, uint32 element_size, uint32 alignment = 64) { element_size = ROUND_TO_NEAREST(element_size, alignment); diff --git a/memory/ThreadedRingMemory.h b/memory/ThreadedRingMemory.h index 3dcbf83..284b98f 100644 --- a/memory/ThreadedRingMemory.h +++ b/memory/ThreadedRingMemory.h @@ -69,7 +69,7 @@ void thrd_ring_free(ThreadedRingMemory* ring) } inline -byte* thrd_ring_calculate_position(ThreadedRingMemory* ring, uint64 size, byte aligned = 0) +byte* thrd_ring_calculate_position(ThreadedRingMemory* ring, uint64 size, byte aligned = 4) { pthread_mutex_lock(&ring->mutex); byte* result = ring_calculate_position((RingMemory *) ring, size, aligned); @@ -87,14 +87,14 @@ void thrd_ring_reset(ThreadedRingMemory* ring) } // Moves a pointer based on the size you want to consume (new position = after consuming size) -void thrd_ring_move_pointer(ThreadedRingMemory* ring, byte** pos, uint64 size, byte aligned = 0) +void thrd_ring_move_pointer(ThreadedRingMemory* ring, byte** pos, uint64 size, byte aligned = 4) { pthread_mutex_lock(&ring->mutex); ring_move_pointer((RingMemory *) ring, pos, size, aligned); pthread_mutex_unlock(&ring->mutex); } -byte* thrd_ring_get_memory(ThreadedRingMemory* ring, uint64 size, byte aligned = 0, bool zeroed = false) +byte* thrd_ring_get_memory(ThreadedRingMemory* ring, uint64 size, byte aligned = 4, bool zeroed = false) { pthread_mutex_lock(&ring->mutex); byte* result = ring_get_memory((RingMemory *) ring, size, aligned, zeroed); @@ -104,7 +104,7 @@ byte* thrd_ring_get_memory(ThreadedRingMemory* ring, uint64 size, byte aligned = } // Same as ring_get_memory but DOESN'T move the head -byte* thrd_ring_get_memory_nomove(ThreadedRingMemory* ring, uint64 size, byte aligned = 0, bool zeroed = false) +byte* thrd_ring_get_memory_nomove(ThreadedRingMemory* ring, uint64 size, byte aligned = 4, bool zeroed = false) { pthread_mutex_lock(&ring->mutex); byte* result = ring_get_memory_nomove((RingMemory *) ring, size, aligned, zeroed); @@ -129,7 +129,7 @@ byte* thrd_ring_get_element(ThreadedRingMemory* ring, uint64 element_count, uint * Checks if one additional element can be inserted without overwriting the tail index */ inline -bool thrd_ring_commit_safe(ThreadedRingMemory* ring, uint64 size, byte aligned = 0) +bool thrd_ring_commit_safe(ThreadedRingMemory* ring, uint64 size, byte aligned = 4) { pthread_mutex_lock(&ring->mutex); bool result = ring_commit_safe((RingMemory *) ring, size, aligned); diff --git a/object/Mesh.h b/object/Mesh.h index 284d67b..e7ce776 100644 --- a/object/Mesh.h +++ b/object/Mesh.h @@ -23,9 +23,9 @@ #include "../utils/StringUtils.h" #if __aarch64__ - #include "../../../GameEngine/stdlib/sve/SVE_I32.h" + #include "../stdlib/sve/SVE_I32.h" #else - #include "../../../GameEngine/stdlib/simd/SIMD_I32.h" + #include "../stdlib/simd/SIMD_I32.h" #endif #define MESH_VERSION 1 @@ -178,7 +178,7 @@ void mesh_from_file_txt( continue; } - // NOTE: we always load a file in the format: POSITON + NORMAL + TEXTURE + COLOR + // NOTE: we always load a file in the format: POSITION + NORMAL + TEXTURE + COLOR // EVEN if some of the data is missing. This is necessary to keep the memory kinda in line. // The actual binary file later will have the minimized layout. @@ -558,7 +558,7 @@ int32 mesh_data_size(const Mesh* mesh) int32 mesh_to_data( const Mesh* mesh, byte* data, - int32 vertex_save_format = VERTEX_TYPE_ALL, + uint32 vertex_save_format = VERTEX_TYPE_ALL, int32 steps = 8 ) { diff --git a/object/Texture.h b/object/Texture.h index 257a88d..3437abf 100644 --- a/object/Texture.h +++ b/object/Texture.h @@ -36,7 +36,7 @@ #include "../image/Image.h" struct Texture { - uint64 id; + uint32 id; byte sample_id; // @question Should the texture hold the texture unit? diff --git a/platform/win32/audio/DirectSound.h b/platform/win32/audio/DirectSound.h index 8635902..22ddb6e 100644 --- a/platform/win32/audio/DirectSound.h +++ b/platform/win32/audio/DirectSound.h @@ -148,11 +148,12 @@ uint32 audio_buffer_fillable(const AudioSetting* setting, const DirectSoundSetti return 0; } - DWORD bytes_to_lock = (setting->sample_index * setting->sample_size) % setting->buffer_size; + DWORD bytes_to_lock = setting->sample_buffer_size; DWORD bytes_to_write = 0; DWORD target_cursor = (player_cursor + (setting->latency * setting->sample_size)) % setting->buffer_size; + // @bug Why does this case even exist? if (bytes_to_lock == player_cursor) { // @bug What if just started? bytes_to_write = 0; @@ -179,7 +180,7 @@ void audio_play_buffer(AudioSetting* setting, DirectSoundSetting* api_setting) void* region2; DWORD region2_size; - DWORD bytes_to_lock = (setting->sample_index * setting->sample_size) % setting->buffer_size; + DWORD bytes_to_lock = setting->sample_buffer_size; api_setting->secondary_buffer->Lock( bytes_to_lock, setting->sample_buffer_size, @@ -204,8 +205,6 @@ void audio_play_buffer(AudioSetting* setting, DirectSoundSetting* api_setting) api_setting->secondary_buffer->Unlock(region1, region1_size, region2, region2_size); - // @question Do we want to keep this here or move it to the audio mixer? - setting->sample_index += setting->sample_buffer_size / setting->sample_size; setting->sample_buffer_size = 0; } diff --git a/platform/win32/audio/XAudio2.h b/platform/win32/audio/XAudio2.h index 7fa16ec..4190112 100644 --- a/platform/win32/audio/XAudio2.h +++ b/platform/win32/audio/XAudio2.h @@ -106,8 +106,6 @@ void audio_load(HWND hwnd, AudioSetting* setting, XAudio2Setting* api_setting) { api_setting->internal_buffer[1].LoopLength = 0; api_setting->internal_buffer[1].LoopCount = 0; api_setting->internal_buffer[1].pContext = NULL; - - setting->sample_index = 0; } inline @@ -117,10 +115,7 @@ void audio_play(AudioSetting* setting, XAudio2Setting* api_setting) { } api_setting->source_voice->Start(0, XAUDIO2_COMMIT_NOW); - - if (setting->sample_index > 1) { - setting->sample_index = 0; - } + setting->sample_index = 0; } inline @@ -199,7 +194,9 @@ void audio_play_buffer(AudioSetting* setting, XAudio2Setting* api_setting) { } ++setting->sample_output; - setting->sample_index += setting->sample_buffer_size / setting->sample_size; + + // @performance Why do I even need this? + //setting->sample_index += setting->sample_buffer_size / setting->sample_size; setting->sample_buffer_size = 0; } diff --git a/platform/win32/threading/Atomic.h b/platform/win32/threading/Atomic.h index 99d304a..21d2df1 100644 --- a/platform/win32/threading/Atomic.h +++ b/platform/win32/threading/Atomic.h @@ -12,54 +12,55 @@ #include #include "../../../stdlib/Types.h" -// WARNING: Windows doesn't really support all the relaxed implementations, we therefore often use acquire as alternative. +// WARNING: Windows doesn't really have relaxed, release, acquire function on x86_64. +// You can see that by checking out how they are defined inline void atomic_set_relaxed(void** target, void* new_pointer) { - InterlockedExchangePointerAcquire(target, new_pointer); + InterlockedExchangePointerNoFence(target, new_pointer); } inline void* atomic_get_relaxed(void** target) { - return InterlockedCompareExchangePointer(target, NULL, NULL); + return InterlockedCompareExchangePointerNoFence(target, NULL, NULL); } inline void atomic_set_relaxed(volatile int32* value, int32 new_value) { - InterlockedExchangeAcquire((long *) value, new_value); + InterlockedExchangeNoFence((long *) value, new_value); } inline void atomic_set_relaxed(volatile int64* value, int64 new_value) { - InterlockedExchangeAcquire((long *) value, (long) new_value); + InterlockedExchangeNoFence64((LONG64 *) value, (LONG64) new_value); } inline void atomic_set_relaxed(volatile f32* value, f32 new_value) { - InterlockedExchangeAcquire((long *) value, (long) new_value); + InterlockedExchangeNoFence((long *) value, (long) new_value); } inline void atomic_set_relaxed(volatile f64* value, f64 new_value) { - InterlockedExchangeAcquire((long *) value, (long) new_value); + InterlockedExchangeNoFence64((LONG64 *) value, (LONG64) new_value); } inline int32 atomic_fetch_set_relaxed(volatile int32* value, int32 new_value) { - return (int32) InterlockedExchangeAcquire((long *) value, new_value); + return (int32) InterlockedExchangeNoFence((long *) value, new_value); } inline int64 atomic_fetch_set_relaxed(volatile int64* value, int64 new_value) { - return (int64) InterlockedExchangeAcquire((long *) value, (long) new_value); + return (int64) InterlockedExchangeNoFence64((LONG64 *) value, (LONG64) new_value); } inline @@ -86,25 +87,25 @@ void atomic_set_relaxed(volatile byte* value, const byte new_value[16]) inline int32 atomic_get_relaxed(volatile int32* value) { - return (int32) InterlockedCompareExchangeAcquire((long *) value, 0, 0); + return (int32) InterlockedCompareExchangeNoFence((long *) value, 0, 0); } inline int64 atomic_get_relaxed(volatile int64* value) { - return (int64) InterlockedCompareExchangeAcquire((long *) value, 0, 0); + return (int64) InterlockedCompareExchangeNoFence64((LONG64 *) value, 0, 0); } inline f32 atomic_get_relaxed(volatile f32* value) { - return (f32) InterlockedCompareExchangeAcquire((long *) value, 0, 0); + return (f32) InterlockedCompareExchangeNoFence((long *) value, 0, 0); } inline f64 atomic_get_relaxed(volatile f64* value) { - return (f64) InterlockedCompareExchangeAcquire((long *) value, 0, 0); + return (f64) InterlockedCompareExchangeNoFence64((LONG64 *) value, 0, 0); } inline @@ -116,79 +117,79 @@ void atomic_get_relaxed(volatile byte* value, byte data[16]) inline void atomic_increment_relaxed(volatile int32* value) { - InterlockedIncrementAcquire((long *) value); + InterlockedIncrementNoFence((long *) value); } inline void atomic_decrement_relaxed(volatile int32* value) { - InterlockedDecrementAcquire((long *) value); + InterlockedDecrementNoFence((long *) value); } inline void atomic_increment_relaxed(volatile int64* value) { - InterlockedIncrementAcquire((long *) value); + InterlockedIncrementNoFence64((LONG64 *) value); } inline void atomic_decrement_relaxed(volatile int64* value) { - InterlockedDecrementAcquire((long *) value); + InterlockedDecrementNoFence64((LONG64 *) value); } inline void atomic_add_relaxed(volatile int32* value, int32 increment) { - InterlockedAddAcquire((long *) value, increment); + InterlockedAddNoFence((long *) value, increment); } inline void atomic_sub_relaxed(volatile int32* value, int32 decrement) { - InterlockedAddAcquire((long *) value, -decrement); + InterlockedAddNoFence((long *) value, -decrement); } inline void atomic_add_relaxed(volatile int64* value, int64 increment) { - InterlockedAddAcquire((long *) value, (long) increment); + InterlockedAddNoFence64((LONG64 *) value, (LONG64) increment); } inline void atomic_sub_relaxed(volatile int64* value, int64 decrement) { - InterlockedAddAcquire((long *) value, -1 * ((long) decrement)); + InterlockedAddNoFence64((LONG64 *) value, -((LONG64) decrement)); } inline f32 atomic_compare_exchange_weak_relaxed(volatile f32* value, f32* expected, f32 desired) { - return (f32) InterlockedCompareExchangeRelease((long *) value, (long) desired, (long) *expected); + return (f32) InterlockedCompareExchangeNoFence((long *) value, (long) desired, (long) *expected); } inline f64 atomic_compare_exchange_weak_relaxed(volatile f64* value, f64* expected, f64 desired) { - return (f64) InterlockedCompareExchangeRelease((long *) value, (long) desired, (long) *expected); + return (f64) InterlockedCompareExchangeNoFence64((LONG64 *) value, (LONG64) desired, (LONG64) *expected); } inline int32 atomic_compare_exchange_weak_relaxed(volatile int32* value, int32* expected, int32 desired) { - return (int32) InterlockedCompareExchangeRelease((long *) value, desired, *expected); + return (int32) InterlockedCompareExchangeNoFence((long *) value, desired, *expected); } inline int64 atomic_compare_exchange_weak_relaxed(volatile int64* value, int64* expected, int64 desired) { - return (int64) InterlockedCompareExchangeRelease((long *) value, (long) desired, (long) *expected); + return (int64) InterlockedCompareExchangeNoFence64((LONG64 *) value, (LONG64) desired, (LONG64) *expected); } inline int32 atomic_fetch_add_relaxed(volatile int32* value, int32 operand) { - return (int32) InterlockedExchangeAddRelease((long *) value, operand); + return (int32) InterlockedExchangeAddNoFence((long *) value, operand); } inline @@ -200,115 +201,115 @@ int32 atomic_fetch_sub_relaxed(volatile int32* value, int32 operand) inline int64 atomic_fetch_add_relaxed(volatile int64* value, int64 operand) { - return (int64) InterlockedExchangeAddRelease((long *) value, (long) operand); + return (int64) InterlockedExchangeAddNoFence64((LONG64 *) value, (LONG64) operand); } inline int64 atomic_fetch_sub_relaxed(volatile int64* value, int64 operand) { - return (int64) InterlockedExchangeSubtract((unsigned long *) value, (long) operand); + return (int64) InterlockedExchangeAdd64((LONG64 *) value, -((LONG64) operand)); } inline void atomic_set_relaxed(volatile uint32* value, uint32 new_value) { - InterlockedExchangeAcquire((long *) value, new_value); + InterlockedExchangeNoFence((long *) value, new_value); } inline void atomic_set_relaxed(volatile uint64* value, uint64 new_value) { - InterlockedExchangeAcquire((long *) value, (long) new_value); + InterlockedExchangeNoFence64((LONG64 *) value, (LONG64) new_value); } inline uint32 atomic_fetch_set_relaxed(volatile uint32* value, uint32 new_value) { - return (uint32) InterlockedExchangeAcquire((long *) value, new_value); + return (uint32) InterlockedExchangeNoFence((long *) value, new_value); } inline uint64 atomic_fetch_set_relaxed(volatile uint64* value, uint64 new_value) { - return (uint64) InterlockedExchangeAcquire((long *) value, (long) new_value); + return (uint64) InterlockedExchangeNoFence64((LONG64 *) value, (LONG64) new_value); } inline uint32 atomic_get_relaxed(volatile uint32* value) { - return (uint32) InterlockedCompareExchangeAcquire((long *) value, 0, 0); + return (uint32) InterlockedCompareExchangeNoFence((long *) value, 0, 0); } inline uint64 atomic_get_relaxed(volatile uint64* value) { - return (uint64) InterlockedCompareExchangeAcquire((long *) value, 0, 0); + return (uint64) InterlockedCompareExchangeNoFence64((LONG64 *) value, 0, 0); } inline void atomic_increment_relaxed(volatile uint32* value) { - InterlockedIncrementRelease((long *) value); + InterlockedIncrementNoFence((long *) value); } inline void atomic_decrement_relaxed(volatile uint32* value) { - InterlockedDecrementRelease((long *) value); + InterlockedDecrementNoFence((long *) value); } inline void atomic_increment_relaxed(volatile uint64* value) { - InterlockedIncrementRelease((long *) value); + InterlockedIncrementNoFence64((LONG64 *) value); } inline void atomic_decrement_relaxed(volatile uint64* value) { - InterlockedDecrementRelease((long *) value); + InterlockedDecrementNoFence64((LONG64 *) value); } inline void atomic_add_relaxed(volatile uint32* value, uint32 increment) { - InterlockedAddRelease((long *) value, increment); + InterlockedAddNoFence((long *) value, increment); } inline void atomic_sub_relaxed(volatile uint32* value, uint32 decrement) { - InterlockedAddRelease((long *) value, -1 * ((int32) decrement)); + InterlockedAddNoFence((long *) value, -1 * ((int32) decrement)); } inline void atomic_add_relaxed(volatile uint64* value, uint64 increment) { - InterlockedAddRelease((long *) value, (long) increment); + InterlockedAddNoFence64((LONG64 *) value, (LONG64) increment); } inline void atomic_sub_relaxed(volatile uint64* value, uint64 decrement) { - InterlockedAddRelease((long *) value, -1 * ((long) decrement)); + InterlockedAddNoFence64((LONG64 *) value, -((LONG64) decrement)); } inline uint32 atomic_compare_exchange_weak_relaxed(volatile uint32* value, uint32* expected, uint32 desired) { - return (uint32) InterlockedCompareExchangeAcquire((long *) value, desired, *expected); + return (uint32) InterlockedCompareExchangeNoFence((long *) value, desired, *expected); } inline uint64 atomic_compare_exchange_weak_relaxed(volatile uint64* value, uint64* expected, uint64 desired) { - return (uint64) InterlockedCompareExchangeAcquire((unsigned long long *) value, (unsigned long long) desired, (unsigned long long) *expected); + return (uint64) InterlockedCompareExchangeNoFence64((LONG64 *) value, (LONG64) desired, (LONG64) *expected); } inline uint32 atomic_fetch_add_relaxed(volatile uint32* value, uint32 operand) { - return (uint32) InterlockedExchangeAddRelease((long *) value, operand); + return (uint32) InterlockedExchangeAddNoFence((long *) value, operand); } inline @@ -320,61 +321,61 @@ uint32 atomic_fetch_sub_relaxed(volatile uint32* value, uint32 operand) inline uint64 atomic_fetch_add_relaxed(volatile uint64* value, uint64 operand) { - return (uint64) InterlockedExchangeAddRelease((long *) value, (long) operand); + return (uint64) InterlockedExchangeAddNoFence64((LONG64 *) value, (LONG64) operand); } inline uint64 atomic_fetch_sub_relaxed(volatile uint64* value, uint64 operand) { - return (uint64) InterlockedExchangeSubtract((unsigned long *) value, (long) operand); + return (uint64) InterlockedExchangeAdd64((LONG64 *) value, -((LONG64) operand)); } inline void atomic_and_relaxed(volatile uint32* value, uint32 mask) { - InterlockedAndRelease((volatile LONG *) value, mask); + InterlockedAndNoFence((volatile LONG *) value, mask); } inline void atomic_and_relaxed(volatile int32* value, int32 mask) { - InterlockedAndRelease((volatile LONG *) value, (LONG)mask); + InterlockedAndNoFence((volatile LONG *) value, (LONG)mask); } inline void atomic_and_relaxed(volatile uint64* value, uint64 mask) { - InterlockedAnd64Release((volatile LONG64 *) value, mask); + InterlockedAnd64NoFence((volatile LONG64 *) value, mask); } inline void atomic_and_relaxed(volatile int64* value, int64 mask) { - InterlockedAnd64Release((volatile LONG64 *) value, mask); + InterlockedAnd64NoFence((volatile LONG64 *) value, mask); } inline void atomic_or_relaxed(volatile uint32* value, uint32 mask) { - InterlockedOrRelease((volatile LONG *) value, mask); + InterlockedOrNoFence((volatile LONG *) value, mask); } inline void atomic_or_relaxed(volatile int32* value, int32 mask) { - InterlockedOrRelease((volatile LONG *) value, (LONG)mask); + InterlockedOrNoFence((volatile LONG *) value, (LONG)mask); } inline void atomic_or_relaxed(volatile uint64* value, uint64 mask) { - InterlockedOr64Release((volatile LONG64 *) value, mask); + InterlockedOr64NoFence((volatile LONG64 *) value, mask); } inline void atomic_or_relaxed(volatile int64* value, int64 mask) { - InterlockedOr64Release((volatile LONG64 *) value, mask); + InterlockedOr64NoFence((volatile LONG64 *) value, mask); } inline @@ -398,7 +399,7 @@ void atomic_set_acquire(volatile int32* value, int32 new_value) inline void atomic_set_acquire(volatile int64* value, int64 new_value) { - InterlockedExchangeAcquire((long *) value, (long) new_value); + InterlockedExchangeAcquire64((LONG64 *) value, (LONG64) new_value); } inline @@ -410,7 +411,7 @@ void atomic_set_acquire(volatile f32* value, f32 new_value) inline void atomic_set_acquire(volatile f64* value, f64 new_value) { - InterlockedExchangeAcquire((long *) value, (long) new_value); + InterlockedExchangeAcquire64((LONG64 *) value, (LONG64) new_value); } inline @@ -422,7 +423,7 @@ int32 atomic_fetch_set_acquire(volatile int32* value, int32 new_value) inline int64 atomic_fetch_set_acquire(volatile int64* value, int64 new_value) { - return (int64) InterlockedExchangeAcquire((long *) value, (long) new_value); + return (int64) InterlockedExchangeAcquire64((LONG64 *) value, (LONG64) new_value); } inline @@ -467,7 +468,7 @@ f32 atomic_get_acquire(volatile f32* value) inline f64 atomic_get_acquire(volatile f64* value) { - return (f64) InterlockedCompareExchangeAcquire((long *) value, 0, 0); + return (f64) InterlockedCompareExchangeAcquire64((LONG64 *) value, 0, 0); } inline @@ -533,7 +534,7 @@ f32 atomic_compare_exchange_weak_acquire(volatile f32* value, f32* expected, f32 inline f64 atomic_compare_exchange_weak_acquire(volatile f64* value, f64* expected, f64 desired) { - return (f64) InterlockedCompareExchangeAcquire((long *) value, (long) desired, (long) *expected); + return (f64) InterlockedCompareExchangeAcquire64((LONG64 *) value, (LONG64) desired, (LONG64) *expected); } inline @@ -545,7 +546,7 @@ int32 atomic_compare_exchange_weak_acquire(volatile int32* value, int32* expecte inline int64 atomic_compare_exchange_weak_acquire(volatile int64* value, int64* expected, int64 desired) { - return (int64) InterlockedCompareExchangeAcquire((long *) value, (long) desired, (long) *expected); + return (int64) InterlockedCompareExchangeAcquire64((LONG64 *) value, (LONG64) desired, (LONG64) *expected); } inline @@ -563,13 +564,13 @@ int32 atomic_fetch_sub_acquire(volatile int32* value, int32 operand) inline int64 atomic_fetch_add_acquire(volatile int64* value, int64 operand) { - return (int64) InterlockedExchangeSubtract((unsigned long *) value, operand); + return (int64) InterlockedExchangeAdd64((LONG64 *) value, -((LONG64) operand)); } inline int64 atomic_fetch_sub_acquire(volatile int64* value, int64 operand) { - return (int64) InterlockedExchangeSubtract((unsigned long *) value, (long) operand); + return (int64) InterlockedExchangeAdd64((LONG64 *) value, -((LONG64) operand)); } inline @@ -581,7 +582,7 @@ void atomic_set_acquire(volatile uint32* value, uint32 new_value) inline void atomic_set_acquire(volatile uint64* value, uint64 new_value) { - InterlockedExchangeAcquire((long *) value, (long) new_value); + InterlockedExchangeAcquire64((LONG64 *) value, (LONG64) new_value); } inline @@ -593,7 +594,7 @@ uint32 atomic_fetch_set_acquire(volatile uint32* value, uint32 new_value) inline uint64 atomic_fetch_set_acquire(volatile uint64* value, uint64 new_value) { - return (uint64) InterlockedExchangeAcquire((long *) value, (long) new_value); + return (uint64) InterlockedExchangeAcquire64((LONG64 *) value, (LONG64) new_value); } inline @@ -605,7 +606,7 @@ uint32 atomic_get_acquire(volatile uint32* value) inline uint64 atomic_get_acquire(volatile uint64* value) { - return (uint64) InterlockedCompareExchangeAcquire((long *) value, 0, 0); + return (uint64) InterlockedCompareExchangeAcquire64((LONG64 *) value, 0, 0); } inline @@ -623,13 +624,13 @@ void atomic_decrement_acquire(volatile uint32* value) inline void atomic_increment_acquire(volatile uint64* value) { - InterlockedIncrementAcquire((long *) value); + InterlockedIncrementAcquire64((LONG64 *) value); } inline void atomic_decrement_acquire(volatile uint64* value) { - InterlockedDecrementAcquire((long *) value); + InterlockedDecrementAcquire64((LONG64 *) value); } inline @@ -647,13 +648,13 @@ void atomic_sub_acquire(volatile uint32* value, uint32 decrement) inline void atomic_add_acquire(volatile uint64* value, uint64 increment) { - InterlockedAddAcquire((long *) value, (long) increment); + InterlockedAddAcquire64((LONG64 *) value, (LONG64) increment); } inline void atomic_sub_acquire(volatile uint64* value, uint64 decrement) { - InterlockedAddAcquire((long *) value, -1 * ((long) decrement)); + InterlockedAddAcquire64((LONG64 *) value, -((LONG64) decrement)); } inline @@ -665,7 +666,7 @@ uint32 atomic_compare_exchange_weak_acquire(volatile uint32* value, uint32* expe inline uint64 atomic_compare_exchange_weak_acquire(volatile uint64* value, uint64* expected, uint64 desired) { - return (uint64) InterlockedCompareExchangeAcquire((unsigned long long *) value, (unsigned long long) desired, (unsigned long long) *expected); + return (uint64) InterlockedCompareExchangeAcquire64((LONG64 *) value, (LONG64) desired, (LONG64) *expected); } inline @@ -683,13 +684,13 @@ uint32 atomic_fetch_sub_acquire(volatile uint32* value, uint32 operand) inline uint64 atomic_fetch_add_acquire(volatile uint64* value, uint64 operand) { - return (uint64) InterlockedExchangeAddAcquire((long *) value, (long) operand); + return (uint64) InterlockedExchangeAddAcquire64((LONG64 *) value, (LONG64) operand); } inline uint64 atomic_fetch_sub_acquire(volatile uint64* value, uint64 operand) { - return (uint64) InterlockedExchangeSubtract((unsigned long *) value, (long) operand); + return (uint64) InterlockedExchangeAdd64((LONG64 *) value, -((LONG64) operand)); } inline @@ -761,7 +762,7 @@ void atomic_set_release(volatile int32* value, int32 new_value) inline void atomic_set_release(volatile int64* value, int64 new_value) { - InterlockedExchange((long *) value, (long) new_value); + InterlockedExchange64((LONG64 *) value, (LONG64) new_value); } inline @@ -773,7 +774,7 @@ void atomic_set_release(volatile f32* value, f32 new_value) inline void atomic_set_release(volatile f64* value, f64 new_value) { - InterlockedExchange((long *) value, (long) new_value); + InterlockedExchange64((LONG64 *) value, (LONG64) new_value); } inline @@ -785,7 +786,7 @@ int32 atomic_fetch_set_release(volatile int32* value, int32 new_value) inline int64 atomic_fetch_set_release(volatile int64* value, int64 new_value) { - return (int64) InterlockedExchange((long *) value, (long) new_value); + return (int64) InterlockedExchange64((LONG64 *) value, (LONG64) new_value); } inline @@ -830,7 +831,7 @@ f32 atomic_get_release(volatile f32* value) inline f64 atomic_get_release(volatile f64* value) { - return (f64) InterlockedCompareExchangeRelease((long *) value, 0, 0); + return (f64) InterlockedCompareExchangeRelease64((LONG64 *) value, 0, 0); } inline @@ -854,13 +855,13 @@ void atomic_decrement_release(volatile int32* value) inline void atomic_increment_release(volatile int64* value) { - InterlockedIncrementRelease((long *) value); + InterlockedIncrementRelease64((LONG64 *) value); } inline void atomic_decrement_release(volatile int64* value) { - InterlockedDecrementRelease((long *) value); + InterlockedDecrementRelease64((LONG64 *) value); } inline @@ -878,13 +879,13 @@ void atomic_sub_release(volatile int32* value, int32 decrement) inline void atomic_add_release(volatile int64* value, int64 increment) { - InterlockedAddRelease((long *) value, (long) increment); + InterlockedAddRelease64((LONG64 *) value, (LONG64) increment); } inline void atomic_sub_release(volatile int64* value, int64 decrement) { - InterlockedAddRelease((long *) value, -1 * ((long) decrement)); + InterlockedAddRelease64((LONG64 *) value, -((LONG64) decrement)); } inline @@ -896,7 +897,7 @@ f32 atomic_compare_exchange_weak_release(volatile f32* value, f32* expected, f32 inline f64 atomic_compare_exchange_weak_release(volatile f64* value, f64* expected, f64 desired) { - return (f64) InterlockedCompareExchangeRelease((long *) value, (long) desired, (long) *expected); + return (f64) InterlockedCompareExchangeRelease64((LONG64 *) value, (LONG64) desired, (LONG64) *expected); } inline @@ -908,7 +909,7 @@ int32 atomic_compare_exchange_weak_release(volatile int32* value, int32* expecte inline int64 atomic_compare_exchange_weak_release(volatile int64* value, int64* expected, int64 desired) { - return (int64) InterlockedCompareExchangeRelease((long *) value, (long) desired, (long) *expected); + return (int64) InterlockedCompareExchangeRelease64((LONG64 *) value, (LONG64) desired, (LONG64) *expected); } inline @@ -926,13 +927,13 @@ int32 atomic_fetch_sub_release(volatile int32* value, int32 operand) inline int64 atomic_fetch_add_release(volatile int64* value, int64 operand) { - return (int64) InterlockedExchangeSubtract((unsigned long *) value, operand); + return (int64) InterlockedExchangeAdd64((LONG64 *) value, -((LONG64) operand)); } inline int64 atomic_fetch_sub_release(volatile int64* value, int64 operand) { - return (int64) InterlockedExchangeSubtract((unsigned long *) value, (long) operand); + return (int64) InterlockedExchangeAdd64((LONG64 *) value, -((LONG64) operand)); } inline @@ -944,7 +945,7 @@ void atomic_set_release(volatile uint32* value, uint32 new_value) inline void atomic_set_release(volatile uint64* value, uint64 new_value) { - InterlockedExchange((long *) value, (long) new_value); + InterlockedExchange64((LONG64 *) value, (LONG64) new_value); } inline @@ -956,7 +957,7 @@ uint32 atomic_fetch_set_release(volatile uint32* value, uint32 new_value) inline uint64 atomic_fetch_set_release(volatile uint64* value, uint64 new_value) { - return (uint64) InterlockedExchange((long *) value, (long) new_value); + return (uint64) InterlockedExchange64((LONG64 *) value, (LONG64) new_value); } inline @@ -968,7 +969,7 @@ uint32 atomic_get_release(volatile uint32* value) inline uint64 atomic_get_release(volatile uint64* value) { - return (uint64) InterlockedCompareExchangeRelease((long *) value, 0, 0); + return (uint64) InterlockedCompareExchangeRelease64((LONG64 *) value, 0, 0); } inline @@ -986,13 +987,13 @@ void atomic_decrement_release(volatile uint32* value) inline void atomic_increment_release(volatile uint64* value) { - InterlockedIncrementRelease((long *) value); + InterlockedIncrementRelease64((LONG64 *) value); } inline void atomic_decrement_release(volatile uint64* value) { - InterlockedDecrementRelease((long *) value); + InterlockedDecrementRelease64((LONG64 *) value); } inline @@ -1010,13 +1011,13 @@ void atomic_sub_release(volatile uint32* value, uint32 decrement) inline void atomic_add_release(volatile uint64* value, uint64 increment) { - InterlockedAddRelease((long *) value, (long) increment); + InterlockedAddRelease64((LONG64 *) value, (LONG64) increment); } inline void atomic_sub_release(volatile uint64* value, uint64 decrement) { - InterlockedAddRelease((long *) value, -1 * ((long) decrement)); + InterlockedAddRelease64((LONG64 *) value, -((LONG64) decrement)); } inline @@ -1028,7 +1029,7 @@ uint32 atomic_compare_exchange_weak_release(volatile uint32* value, uint32* expe inline uint64 atomic_compare_exchange_weak_release(volatile uint64* value, uint64* expected, uint64 desired) { - return (uint64) InterlockedCompareExchangeRelease((unsigned long long *) value, (unsigned long long) desired, (unsigned long long) *expected); + return (uint64) InterlockedCompareExchangeRelease64((LONG64 *) value, (LONG64) desired, (LONG64) *expected); } inline @@ -1046,13 +1047,13 @@ uint32 atomic_fetch_sub_release(volatile uint32* value, uint32 operand) inline uint64 atomic_fetch_add_release(volatile uint64* value, uint64 operand) { - return (uint64) InterlockedExchangeAddRelease((long *) value, (long) operand); + return (uint64) InterlockedExchangeAddRelease64((LONG64 *) value, (LONG64) operand); } inline uint64 atomic_fetch_sub_release(volatile uint64* value, uint64 operand) { - return (uint64) InterlockedExchangeSubtract((unsigned long *) value, (long) operand); + return (uint64) InterlockedExchangeAdd64((LONG64 *) value, -((LONG64) operand)); } inline @@ -1124,7 +1125,7 @@ void atomic_set_acquire_release(volatile int32* value, int32 new_value) inline void atomic_set_acquire_release(volatile int64* value, int64 new_value) { - InterlockedExchange((long *) value, (long) new_value); + InterlockedExchange64((LONG64 *) value, (LONG64) new_value); } inline @@ -1136,7 +1137,7 @@ void atomic_set_acquire_release(volatile f32* value, f32 new_value) inline void atomic_set_acquire_release(volatile f64* value, f64 new_value) { - InterlockedExchange((long *) value, (long) new_value); + InterlockedExchange64((LONG64 *) value, (LONG64) new_value); } inline @@ -1148,7 +1149,7 @@ int32 atomic_fetch_set_acquire_release(volatile int32* value, int32 new_value) inline int64 atomic_fetch_set_acquire_release(volatile int64* value, int64 new_value) { - return (int64) InterlockedExchange((long *) value, (long) new_value); + return (int64) InterlockedExchange64((LONG64 *) value, (LONG64) new_value); } inline @@ -1181,7 +1182,7 @@ int32 atomic_get_acquire_release(volatile int32* value) inline int64 atomic_get_acquire_release(volatile int64* value) { - return (int64) InterlockedCompareExchange((long *) value, 0, 0); + return (int64) InterlockedCompareExchange64((LONG64 *) value, 0, 0); } inline @@ -1193,7 +1194,7 @@ f32 atomic_get_acquire_release(volatile f32* value) inline f64 atomic_get_acquire_release(volatile f64* value) { - return (f64) InterlockedCompareExchange((long *) value, 0, 0); + return (f64) InterlockedCompareExchange64((LONG64 *) value, 0, 0); } inline @@ -1217,13 +1218,13 @@ void atomic_decrement_acquire_release(volatile int32* value) inline void atomic_increment_acquire_release(volatile int64* value) { - InterlockedIncrement((long *) value); + InterlockedIncrement64((LONG64 *) value); } inline void atomic_decrement_acquire_release(volatile int64* value) { - InterlockedDecrement((long *) value); + InterlockedDecrement64((LONG64 *) value); } inline @@ -1241,13 +1242,13 @@ void atomic_sub_acquire_release(volatile int32* value, int32 decrement) inline void atomic_add_acquire_release(volatile int64* value, int64 increment) { - InterlockedAdd((long *) value, (long) increment); + InterlockedAdd64((LONG64 *) value, (LONG64) increment); } inline void atomic_sub_acquire_release(volatile int64* value, int64 decrement) { - InterlockedAdd((long *) value, -1 * ((long) decrement)); + InterlockedAdd64((LONG64 *) value, -((LONG64) decrement)); } inline @@ -1259,7 +1260,7 @@ f32 atomic_compare_exchange_weak_acquire_release(volatile f32* value, f32* expec inline f64 atomic_compare_exchange_weak_acquire_release(volatile f64* value, f64* expected, f64 desired) { - return (f64) InterlockedCompareExchange((long *) value, (long) desired, (long) *expected); + return (f64) InterlockedCompareExchange64((LONG64 *) value, (LONG64) desired, (LONG64) *expected); } inline @@ -1271,7 +1272,7 @@ int32 atomic_compare_exchange_weak_acquire_release(volatile int32* value, int32* inline int64 atomic_compare_exchange_weak_acquire_release(volatile int64* value, int64* expected, int64 desired) { - return (int64) InterlockedCompareExchange((long *) value, (long) desired, (long) *expected); + return (int64) InterlockedCompareExchange64((LONG64 *) value, (LONG64) desired, (LONG64) *expected); } inline @@ -1291,7 +1292,7 @@ int32 atomic_fetch_sub_acquire_release(volatile int32* value, int32 operand) inline int64 atomic_fetch_add_acquire_release(volatile int64* value, int64 operand) { - int64 ret = (int64) InterlockedExchangeSubtract((unsigned long *) value, operand); + int64 ret = (int64) InterlockedExchangeAdd64((LONG64 *) value, -((LONG64) operand)); return ret; } @@ -1299,7 +1300,7 @@ int64 atomic_fetch_add_acquire_release(volatile int64* value, int64 operand) inline int64 atomic_fetch_sub_acquire_release(volatile int64* value, int64 operand) { - int64 ret = (int64) InterlockedExchangeSubtract((unsigned long *) value, (long) operand); + int64 ret = (int64) InterlockedExchangeAdd64((LONG64 *) value, -((LONG64) operand)); return ret; } @@ -1325,7 +1326,7 @@ uint32 atomic_fetch_set_acquire_release(volatile uint32* value, uint32 new_value inline uint64 atomic_fetch_set_acquire_release(volatile uint64* value, uint64 new_value) { - return (uint64) InterlockedExchange((long *) value, (long) new_value); + return (uint64) InterlockedExchange64((LONG64 *) value, (LONG64) new_value); } inline @@ -1337,7 +1338,7 @@ uint32 atomic_get_acquire_release(volatile uint32* value) inline uint64 atomic_get_acquire_release(volatile uint64* value) { - return (uint64) InterlockedCompareExchange((long *) value, 0, 0); + return (uint64) InterlockedCompareExchange64((LONG64 *) value, 0, 0); } inline @@ -1355,13 +1356,13 @@ void atomic_decrement_acquire_release(volatile uint32* value) inline void atomic_increment_acquire_release(volatile uint64* value) { - InterlockedIncrement((long *) value); + InterlockedIncrement64((LONG64 *) value); } inline void atomic_decrement_acquire_release(volatile uint64* value) { - InterlockedDecrement((long *) value); + InterlockedDecrement64((LONG64 *) value); } inline @@ -1379,13 +1380,13 @@ void atomic_sub_acquire_release(volatile uint32* value, uint32 decrement) inline void atomic_add_acquire_release(volatile uint64* value, uint64 increment) { - InterlockedAdd((long *) value, (long) increment); + InterlockedAdd64((LONG64 *) value, (LONG64) increment); } inline void atomic_sub_acquire_release(volatile uint64* value, uint64 decrement) { - InterlockedAdd((long *) value, -1 * ((long) decrement)); + InterlockedAdd64((LONG64 *) value, -((LONG64) decrement)); } inline @@ -1397,7 +1398,7 @@ uint32 atomic_compare_exchange_weak_acquire_release(volatile uint32* value, uint inline uint64 atomic_compare_exchange_weak_acquire_release(volatile uint64* value, uint64* expected, uint64 desired) { - return (uint64) InterlockedCompareExchange((unsigned long long *) value, (unsigned long long) desired, (unsigned long long) *expected); + return (uint64) InterlockedCompareExchange64((LONG64 *) value, (LONG64) desired, (LONG64) *expected); } inline @@ -1417,13 +1418,13 @@ uint32 atomic_fetch_sub_acquire_release(volatile uint32* value, uint32 operand) inline uint64 atomic_fetch_add_acquire_release(volatile uint64* value, uint64 operand) { - return (uint64) InterlockedExchangeAdd((long *) value, (long) operand); + return (uint64) InterlockedExchangeAdd64((LONG64 *) value, (LONG64) operand); } inline uint64 atomic_fetch_sub_acquire_release(volatile uint64* value, uint64 operand) { - uint64 ret = (uint64) InterlockedExchangeSubtract((unsigned long *) value, (long) operand); + uint64 ret = (uint64) InterlockedExchangeAdd64((LONG64 *) value, -((LONG64) operand)); return ret; } diff --git a/stdlib/HashMap.h b/stdlib/HashMap.h index e7ca2aa..b64d599 100644 --- a/stdlib/HashMap.h +++ b/stdlib/HashMap.h @@ -376,7 +376,7 @@ int64 hashmap_dump(const HashMap* hm, byte* data) // Dumb hash map content = buffer memory int32 free_index = 0; int32 bit_index = 0; - for (int32 i = 0; i < hm->buf.count; ++i) { + for (uint32 i = 0; i < hm->buf.count; ++i) { if ((hm->buf.free[free_index] & (1ULL << bit_index)) > 0) { HashEntry* entry = (HashEntry *) chunk_get_element((ChunkMemory *) &hm->buf, i); @@ -434,7 +434,7 @@ int64 hashmap_load(HashMap* hm, const byte* data) data += sizeof(uint64); // Load the table content - for (int i = 0; i < count; ++i) { + for (uint32 i = 0; i < count; ++i) { uint64 offset = SWAP_ENDIAN_LITTLE(*((uint64 *) data)); data += sizeof(offset); @@ -455,7 +455,7 @@ int64 hashmap_load(HashMap* hm, const byte* data) // Switch endian AND turn offsets to pointers int32 free_index = 0; int32 bit_index = 0; - for (int32 i = 0; i < hm->buf.count; ++i) { + for (uint32 i = 0; i < hm->buf.count; ++i) { if ((hm->buf.free[free_index] & (1ULL << bit_index)) > 0) { HashEntry* entry = (HashEntry *) chunk_get_element((ChunkMemory *) &hm->buf, i); diff --git a/stdlib/Types.h b/stdlib/Types.h index 461fe79..ce9f41f 100644 --- a/stdlib/Types.h +++ b/stdlib/Types.h @@ -109,7 +109,6 @@ struct v4_byte { }; }; - struct v2_int32 { union { struct { diff --git a/stdlib/simd/SIMD_I16.h b/stdlib/simd/SIMD_I16.h index a9dc646..3c3c73e 100644 --- a/stdlib/simd/SIMD_I16.h +++ b/stdlib/simd/SIMD_I16.h @@ -438,7 +438,8 @@ inline int16_16 operator<=(int16_16 a, int16_16 b) inline int16_32 operator<=(int16_32 a, int16_32 b) { int16_32 simd; - simd.s = _mm512_mask_blend_epi16(_mm512_knot(_mm512_cmpgt_epi16_mask(b.s, a.s)), b.s, a.s); + __mmask32 mask = _mm512_cmp_epi16_mask(a.s, b.s, _MM_CMPINT_LE); + simd.s = _mm512_mask_blend_epi16(mask, b.s, a.s); return simd; } @@ -716,25 +717,19 @@ inline int16_32 clamp(int16_32 min_value, int16_32 a, int16_32 max_value) return simd_min(simd_max(a, min_value), max_value); } -inline int16 which_true(int16_8 a) +inline int32 which_true(int16_8 a) { - int16 which_true = _mm_movemask_epi8(a.s); - - return which_true; + return _mm_movemask_epi8(a.s); } -inline int16 which_true(int16_16 a) +inline int32 which_true(int16_16 a) { - int16 which_true = _mm256_movemask_epi8(a.s); - - return which_true; + return _mm256_movemask_epi8(a.s); } -inline int16 which_true(int16_32 a) +inline int32 which_true(int16_32 a) { - int16 which_true = _mm512_movepi16_mask(a.s); - - return which_true; + return _mm512_movepi16_mask(a.s); } inline bool any_true(int16_8 a) diff --git a/utils/MathUtils.h b/utils/MathUtils.h index 9e2e0ca..2f83cb3 100644 --- a/utils/MathUtils.h +++ b/utils/MathUtils.h @@ -26,6 +26,8 @@ #define ROUND_TO_NEAREST(a, b) (((a) + ((b) - 1)) & ~((b) - 1)) #define CEIL_DIV(a, b) (((a) + (b) - 1) / (b)) #define OMS_CEIL(x) ((x) == (int)(x) ? (int)(x) : ((x) > 0 ? (int)(x) + 1 : (int)(x))) +#define OMS_ROUND(x) (((x) >= 0) ? ((int)((x) + 0.5f)) : ((int)((x) - 0.5f))) +#define OMS_ROUND_POSITIVE(x) ((int)((x) + 0.5f)) // Modulo function when b is a power of 2 #define MODULO_2(a, b) ((a) & (b - 1)) diff --git a/utils/StringUtils.h b/utils/StringUtils.h index 515a753..1295f1e 100644 --- a/utils/StringUtils.h +++ b/utils/StringUtils.h @@ -15,6 +15,7 @@ #include #include "../stdlib/Types.h" +#include "MathUtils.h" inline int32 utf8_encode(uint32 codepoint, char* out) @@ -821,9 +822,9 @@ void sprintf_fast(char *buffer, const char* format, ...) { case 'f': { f64 val = va_arg(args, f64); - int32 precision = 6; // Default precision + // Default precision + int32 precision = 5; - // @question Consider to implement rounding // Check for optional precision specifier const char* prec_ptr = ptr + 1; if (*prec_ptr >= '0' && *prec_ptr <= '9') { @@ -841,6 +842,15 @@ void sprintf_fast(char *buffer, const char* format, ...) { val = -val; } + if (precision < 6) { + static const float powers_of_ten[] = { + 1.0f, 10.0f, 100.0f, 1000.0f, 10000.0f, 100000.0f + }; + + f32 scale = powers_of_ten[precision]; + val = OMS_ROUND_POSITIVE(val * scale) / scale; + } + // Handle integer part int32 int_part = (int32) val; f64 frac_part = val - int_part; @@ -896,7 +906,7 @@ void format_time_hh_mm_ss(char* time_str, int32 hours, int32 minutes, int32 secs } inline -void format_time_hh_mm_ss(char* time_str, int32 time) { +void format_time_hh_mm_ss(char* time_str, uint64 time) { int32 hours = (time / 3600) % 24; int32 minutes = (time / 60) % 60; int32 secs = time % 60; @@ -915,7 +925,7 @@ void format_time_hh_mm(char* time_str, int32 hours, int32 minutes) { } inline -void format_time_hh_mm(char* time_str, int32 time) { +void format_time_hh_mm(char* time_str, uint64 time) { int32 hours = (time / 3600) % 24; int32 minutes = (time / 60) % 60; diff --git a/utils/Utils.h b/utils/Utils.h index a032bd4..52ae558 100644 --- a/utils/Utils.h +++ b/utils/Utils.h @@ -18,6 +18,7 @@ struct FileBody { }; // @question Do we want to make the size comparison a step variable? +inline bool is_equal_aligned(const byte* region1, const byte* region2, uint64 size) { while (size > 4) {