opengl rendering working again, also improving some code

This commit is contained in:
Dennis Eichhorn 2024-12-21 03:46:52 +01:00
parent fa9fcb6394
commit 5d7943016d
27 changed files with 706 additions and 424 deletions

View File

@ -73,6 +73,8 @@ struct AssetArchive {
// If not remove
MMFHandle mmf;
// This is used to tell the asset archive in which AssetManagementSystem (AMS) which asset type is located.
// Remember, many AMS only contain one asset type (e.g. image, audio, ...)
int32 asset_type_map[ASSET_TYPE_SIZE];
};
@ -182,24 +184,19 @@ Asset* asset_archive_asset_load(const AssetArchive* archive, int32 id, AssetMana
{
// @todo add calculation from element->type to ams index
AssetArchiveElement* element = &archive->header.asset_element[id];
// We have to mask 0x00FFFFFF since the highest bits define the archive id, not the element id
AssetArchiveElement* element = &archive->header.asset_element[id & 0x00FFFFFF];
AssetManagementSystem* ams = &ams_array[archive->asset_type_map[element->type]];
// @todo This is a little bit stupid, reconsider
char id_str[5];
id_str[4] = '\0';
*((int32 *) id_str) = id;
uint64 hash = hash_djb2(id_str);
char id_str[32];
_itoa(id, id_str, 16);
Asset* asset;
// @performance I think we could optimize the ams_reserver_asset in a way so we don't have to lock it the entire time
pthread_mutex_lock(&ams->mutex);
// @bug If we have multiple archive files the ids also repeat, which is not possible for the hash map
// Possible solution: also store a string name for every asset. This would add HASH_MAP_MAX_KEY_LENGTH bytes of data to every asset though (see hash map key size = 32)
asset = ams_get_asset(ams, id_str, hash);
asset = ams_get_asset(ams, id_str);
if (asset) {
// Asset already loaded
pthread_mutex_unlock(&ams->mutex);
@ -238,17 +235,15 @@ Asset* asset_archive_asset_load(const AssetArchive* archive, int32 id, AssetMana
Texture* texture = (Texture *) asset->self;
texture->image.pixels = (byte *) (texture + 1);
// @todo implement qoi encoding
image_from_data(file.content, &texture->image);
asset->vram_size = texture->image.pixel_count * image_pixel_size_from_type(texture->image.pixel_type);
asset->ram_size = asset->vram_size + sizeof(Texture);
#if OPENGL
// @bug I think order_rows has the wrong value
if (texture->image.order_rows == IMAGE_ROW_ORDER_TOP_TO_BOTTOM) {
image_flip_vertical(ring, &texture->image);
texture->image.order_rows = IMAGE_ROW_ORDER_BOTTOM_TO_TOP;
}
image_flip_vertical(ring, &texture->image);
texture->image.order_rows = IMAGE_ROW_ORDER_BOTTOM_TO_TOP;
#endif
} break;
case ASSET_TYPE_AUDIO: {
@ -289,6 +284,7 @@ Asset* asset_archive_asset_load(const AssetArchive* archive, int32 id, AssetMana
pthread_mutex_unlock(&ams->mutex);
// @performance maybe do in worker threads? This just feels very slow
// @question dependencies might be stored in different archives?
for (uint32 i = 0; i < element->dependency_count; ++i) {
asset_archive_asset_load(archive, id, ams, ring);
}

View File

@ -84,7 +84,7 @@ void ams_create(AssetManagementSystem* ams, byte* buf, int32 chunk_size, int32 c
// setup asset_memory
ams->asset_memory.count = count;
ams->asset_memory.chunk_size = sizeof(Asset);
ams->asset_memory.last_pos = -1;
ams->asset_memory.last_pos = 0;
ams->asset_memory.alignment = 64;
ams->asset_memory.memory = buf;
ams->asset_memory.free = (uint64 *) (ams->asset_memory.memory + ams->asset_memory.chunk_size * count);
@ -92,7 +92,7 @@ void ams_create(AssetManagementSystem* ams, byte* buf, int32 chunk_size, int32 c
// setup asset_data_memory
ams->asset_data_memory.count = count;
ams->asset_data_memory.chunk_size = chunk_size;
ams->asset_data_memory.last_pos = -1;
ams->asset_data_memory.last_pos = 0;
ams->asset_data_memory.alignment = 64;
ams->asset_data_memory.memory = (byte *) (ams->asset_memory.free + CEIL_DIV(count, 64));
ams->asset_data_memory.free = (uint64 *) (ams->asset_data_memory.memory + ams->asset_data_memory.chunk_size * count);
@ -204,8 +204,8 @@ Asset* ams_get_asset(AssetManagementSystem* ams, const char* key)
);
DEBUG_MEMORY_READ(
(uint64) (entry ? (Asset *) entry->value : 0),
entry ? ((Asset *) entry->value)->self + ((Asset *) entry->value)->ram_size : 0
(uint64) (entry ? ((Asset *) entry->value)->self : 0),
entry ? ((Asset *) entry->value)->ram_size : 0
);
return entry ? (Asset *) entry->value : NULL;
@ -222,8 +222,8 @@ Asset* ams_get_asset(AssetManagementSystem* ams, const char* key, uint64 hash)
);
DEBUG_MEMORY_READ(
(uint64) (entry ? (Asset *) entry->value : 0),
entry ? ((Asset *) entry->value)->self + ((Asset *) entry->value)->ram_size : 0
(uint64) (entry ? ((Asset *) entry->value)->self : 0),
entry ? ((Asset *) entry->value)->ram_size : 0
);
return entry ? (Asset *) entry->value : NULL;

View File

@ -31,22 +31,23 @@
enum AudioEffect {
AUDIO_EFFECT_NONE,
AUDIO_EFFECT_ECHO = 1,
AUDIO_EFFECT_REVERB = 2,
AUDIO_EFFECT_UNDERWATER = 4,
AUDIO_EFFECT_CAVE = 8,
AUDIO_EFFECT_LOWPASS = 16,
AUDIO_EFFECT_HIGHPASS = 32,
AUDIO_EFFECT_FLANGER = 64,
AUDIO_EFFECT_TREMOLO = 128,
AUDIO_EFFECT_DISTORTION = 256,
AUDIO_EFFECT_CHORUS = 512,
AUDIO_EFFECT_PITCH_SHIFT = 1024,
AUDIO_EFFECT_GRANULAR_DELAY = 2048,
AUDIO_EFFECT_FM = 4096,
AUDIO_EFFECT_STEREO_PANNING = 8192,
AUDIO_EFFECT_EASE_IN = 16384,
AUDIO_EFFECT_EASE_OUT = 32768,
AUDIO_EFFECT_ECHO = 1 << 0,
AUDIO_EFFECT_REVERB = 1 << 1,
AUDIO_EFFECT_UNDERWATER = 1 << 2,
AUDIO_EFFECT_CAVE = 1 << 3,
AUDIO_EFFECT_LOWPASS = 1 << 4,
AUDIO_EFFECT_HIGHPASS = 1 << 5,
AUDIO_EFFECT_FLANGER = 1 << 6,
AUDIO_EFFECT_TREMOLO = 1 << 7,
AUDIO_EFFECT_DISTORTION = 1 << 8,
AUDIO_EFFECT_CHORUS = 1 << 9,
AUDIO_EFFECT_PITCH_SHIFT = 1 << 10,
AUDIO_EFFECT_GRANULAR_DELAY = 1 << 11,
AUDIO_EFFECT_FM = 1 << 12,
AUDIO_EFFECT_STEREO_PANNING = 1 << 13,
AUDIO_EFFECT_EASE_IN = 1 << 14,
AUDIO_EFFECT_EASE_OUT = 1 << 15,
AUDIO_EFFECT_SPEED = 1 << 16,
};
struct AudioInstance {
@ -56,7 +57,13 @@ struct AudioInstance {
uint32 audio_size;
byte* audio_data;
uint64 effect;
uint32 sample_index;
byte channels;
bool repeat;
// @todo How to implement audio that is only supposed to be played after a certain other sound file is finished
// e.g. queueing soundtracks/ambient noise
};
enum AudioMixerState {
@ -128,10 +135,13 @@ void audio_mixer_add(AudioMixer* mixer, int64 id, Audio* audio, AudioLocationSet
return;
}
// @question Do I really want to use audio instance? wouldn't Audio* be sufficient?
// Well AudioInstance is a little bit smaller but is this really worth it, probably yes?!
AudioInstance* instance = (AudioInstance *) chunk_get_element(&mixer->audio_instances, index);
instance->id = id;
instance->audio_size = audio->size;
instance->audio_data = audio->data;
instance->channels = audio->channels;
if (origin) {
memcpy(&instance->origin, origin, sizeof(AudioLocationSetting));
@ -166,6 +176,41 @@ void audio_mixer_remove(AudioMixer* mixer, int64 id)
}
}
int32 apply_speed(int16* buffer, uint32 buffer_size, f32 speed) {
if (speed == 1.0f) {
return 0;
}
// Has to be multiple of 2 to ensure stereo is implemented correctly
uint32 new_size = ROUND_TO_NEAREST((uint32) (buffer_size / speed), 2);
// Speed up
if (speed > 1.0f) {
for (int32 i = 0; i < new_size; ++i) {
// @bug What if 2 consecutive values fall onto the same int index for stereo. This would break it.
// The problem is, even by doing this as stereo calculation we would still have the same issue just not on the current value but the next loop
uint32 src_index = (uint32) (i * speed);
buffer[i] = buffer[src_index];
}
// A speed up reduces the sample_index -> we reduce the data in the buffer
return new_size - buffer_size;
}
// Slow down
for (int32 i = buffer_size - 1; i > 0; --i) {
uint32 src_index = (uint32) (i * speed);
buffer[i] = buffer[src_index];
}
return 0;
}
// @performance Whenever we handle left and right the same we could half the buffer_size
// This allows us to re-use existing helper variables without re-calculating them for the next loop (e.g. delay below)
// Or, if the multiplier is an int we can even perform the multiplication on int32 through casting instead of 2 operations on int16
// We might have to adjust some of the values to ensure correct multiplication if possible (e.g. feedback, intensity, ...)
// @todo We probably want to handle left and right channel differently to add some depth
void apply_echo(int16* buffer, uint32 buffer_size, f32 delay, f32 feedback, int32 sample_rate) {
int32 delay_samples = (int32) (delay * sample_rate);
for (uint32 i = delay_samples; i < buffer_size; ++i) {
@ -173,6 +218,7 @@ void apply_echo(int16* buffer, uint32 buffer_size, f32 delay, f32 feedback, int3
}
}
// @todo We probably want to handle left and right channel differently to add some depth
void apply_reverb(int16* buffer, uint32 buffer_size, f32 intensity) {
intensity *= 0.5f;
for (uint32 i = 1; i < buffer_size; ++i) {
@ -294,11 +340,93 @@ void apply_lowpass(int16* buffer, uint32 buffer_size, f32 cutoff, int32 sample_r
}
}
void audio_mixer_mix(AudioMixer* mixer) {
uint32 limit = OMS_MIN(
mixer->settings.sample_buffer_size / mixer->settings.sample_size,
mixer->settings.buffer_size / mixer->settings.sample_size
);
int32 mixer_effects_mono(AudioMixer* mixer, uint64 effect, int32 samples)
{
int32 sound_sample_index = 0;
if (effect & AUDIO_EFFECT_ECHO) {
apply_echo(mixer->buffer_temp, samples * 2, 0.2f, 0.4f, mixer->settings.sample_rate);
}
if (effect & AUDIO_EFFECT_REVERB) {
apply_reverb(mixer->buffer_temp, samples * 2, 0.3f);
}
if (effect & AUDIO_EFFECT_UNDERWATER) {
apply_underwater(mixer->buffer_temp, samples * 2);
}
if (effect & AUDIO_EFFECT_CAVE) {
apply_cave(mixer->buffer_temp, samples * 2, mixer->settings.sample_rate);
}
if (effect & AUDIO_EFFECT_LOWPASS) {
apply_lowpass(mixer->buffer_temp, samples * 2, 500.0f, mixer->settings.sample_rate); // Cutoff frequency 500
}
if (effect & AUDIO_EFFECT_HIGHPASS) {
apply_highpass(mixer->buffer_temp, samples * 2, 2000.0f, mixer->settings.sample_rate); // Cutoff frequency 2 kHz
}
if (effect & AUDIO_EFFECT_FLANGER) {
apply_flanger(mixer->buffer_temp, samples * 2, 0.25f, 0.005f, mixer->settings.sample_rate);
}
if (effect & AUDIO_EFFECT_TREMOLO) {
apply_tremolo(mixer->buffer_temp, samples * 2, 5.0f, 0.8f, mixer->settings.sample_rate);
}
if (effect & AUDIO_EFFECT_DISTORTION) {
apply_distortion(mixer->buffer_temp, samples * 2, 10.0f);
}
if (effect & AUDIO_EFFECT_CHORUS) {
apply_chorus(mixer->buffer_temp, samples * 2, 0.25f, 0.005f, mixer->settings.sample_rate);
}
if (effect & AUDIO_EFFECT_PITCH_SHIFT) {
apply_pitch_shift(mixer->buffer_temp, samples * 2, 1.2f); // Slight pitch increase
}
if (effect & AUDIO_EFFECT_GRANULAR_DELAY) {
apply_granular_delay(mixer->buffer_temp, samples * 2, 0.1f, 0.2f, mixer->settings.sample_rate);
}
if (effect & AUDIO_EFFECT_FM) {
apply_frequency_modulation(mixer->buffer_temp, samples * 2, 2.0f, 0.5f, mixer->settings.sample_rate);
}
if (effect & AUDIO_EFFECT_STEREO_PANNING) {
apply_stereo_panning(mixer->buffer_temp, samples * 2, 0.5f);
}
/*
if (effect & AUDIO_EFFECT_EASE_IN) {
apply_ease_in(mixer->buffer_temp, samples * 2, 0.5f);
}
if (effect & AUDIO_EFFECT_EASE_IN) {
apply_ease_out(mixer->buffer_temp, samples * 2, 0.5f);
}
*/
if (effect & AUDIO_EFFECT_SPEED) {
sound_sample_index += apply_speed(mixer->buffer_temp, samples * 2, 1.0f);
}
return sound_sample_index;
}
int32 mixer_effects_stereo()
{
return 0;
}
void audio_mixer_mix(AudioMixer* mixer, uint32 size) {
memset(mixer->settings.buffer, 0, size);
mixer->settings.sample_buffer_size = 0;
uint32 limit_max = size / mixer->settings.sample_size;
bool has_location = !is_empty((byte *) &mixer->camera.audio_location, sizeof(mixer->camera.audio_location));
@ -310,6 +438,8 @@ void audio_mixer_mix(AudioMixer* mixer) {
continue;
}
uint32 limit = limit_max;
// Compute the vector from the player to the sound's origin
v3_f32 to_sound = {};
f32 total_attenuation = 1.0f;
@ -331,105 +461,93 @@ void audio_mixer_mix(AudioMixer* mixer) {
}
uint32 sound_sample_count = sound->audio_size / mixer->settings.sample_size;
uint32 sound_sample_index = sound->sample_index;
int32 sound_sample_index = sound->sample_index;
int16* audio_data = (int16 *) sound->audio_data;
// Temporary buffer for effects processing
// @performance If there are situations where only one file exists in the mixer that should be played we could directly write to
// the output buffer improving the performance. Some of those mixers are: music, cinematic, ui
// Careful, NOT voice since we will probably manually layer them according to their position?
for (int32 j = 0; j < limit; ++j) {
if (sound_sample_index >= sound_sample_count) {
// @todo if repeat we need to handle part of it here, else quit
if (sound->channels == 1) {
// We make it stereo
for (int32 j = 0; j < limit; ++j) {
if (sound_sample_index >= sound_sample_count) {
if (!sound->repeat) {
limit = j;
break;
}
sound_sample_index = 0;
sound_sample_index = 0;
}
// @question why are we doing this?
mixer->settings.sample_index = 0;
// We could make the temp buffer stereo here but we later on have to touch the array anyways.
// This way we can easily perform mixer effects on a mono output.
mixer->buffer_temp[j] = (int16) (audio_data[sound_sample_index] * volume_scale * total_attenuation);
++sound_sample_index;
// @performance Some adjustments could be made right here the question is if this is faster.
// Probably depends on how likely the adjustment is to happen. Orientation effects are probably very likely.
}
mixer->buffer_temp[j * 2] = (int16) (audio_data[sound_sample_index * 2] * volume_scale * total_attenuation);
mixer->buffer_temp[j * 2 + 1] = (int16) (audio_data[sound_sample_index * 2 + 1] * volume_scale * total_attenuation);
// Apply effects based on sound's effect type
if (sound->effect) {
int32 sample_adjustment = mixer_effects_mono(mixer, sound->effect, sound_sample_index);
sound_sample_index += sample_adjustment;
limit += sample_adjustment;
}
} else {
for (int32 j = 0; j < limit; ++j) {
if (sound_sample_index >= sound_sample_count) {
if (!sound->repeat) {
limit = j;
break;
}
++sound_sample_index;
sound_sample_index = 0;
}
// @performance Some adjustments could be made right here the question is if this is faster.
// Probably depends on how likely the adjustment is to happen.
mixer->buffer_temp[j * 2] = (int16) (audio_data[sound_sample_index * 2] * volume_scale * total_attenuation);
mixer->buffer_temp[j * 2 + 1] = (int16) (audio_data[sound_sample_index * 2 + 1] * volume_scale * total_attenuation);
// @todo if end of file and no repeat -> remove from list
}
++sound_sample_index;
// @question We also have to set setting->sample_index = sound_sample_index.
// But that currently happens in the sound api. Do we want to keep it there or move it here
// Apply effects based on sound's effect type
// @performance Depending on how we implement effects we could even pull them out of this loop
// What I mean is effects could either be sound file dependent (current location correct) or mixer dependent
if (mixer->effect) {
if (mixer->effect & AUDIO_EFFECT_ECHO) {
apply_echo(mixer->buffer_temp, limit, 0.2f, 0.4f, mixer->settings.sample_rate);
// @performance Some adjustments could be made right here the question is if this is faster.
// Probably depends on how likely the adjustment is to happen. Orientation effects are probably very likely.
}
if (mixer->effect & AUDIO_EFFECT_REVERB) {
apply_reverb(mixer->buffer_temp, limit, 0.3f);
}
if (mixer->effect & AUDIO_EFFECT_UNDERWATER) {
apply_underwater(mixer->buffer_temp, limit);
}
if (mixer->effect & AUDIO_EFFECT_CAVE) {
apply_cave(mixer->buffer_temp, limit, mixer->settings.sample_rate);
}
if (mixer->effect & AUDIO_EFFECT_LOWPASS) {
apply_lowpass(mixer->buffer_temp, limit, 500.0f, mixer->settings.sample_rate); // Cutoff frequency 500
}
if (mixer->effect & AUDIO_EFFECT_HIGHPASS) {
apply_highpass(mixer->buffer_temp, limit, 2000.0f, mixer->settings.sample_rate); // Cutoff frequency 2 kHz
}
if (mixer->effect & AUDIO_EFFECT_FLANGER) {
apply_flanger(mixer->buffer_temp, limit, 0.25f, 0.005f, mixer->settings.sample_rate);
}
if (mixer->effect & AUDIO_EFFECT_TREMOLO) {
apply_tremolo(mixer->buffer_temp, limit, 5.0f, 0.8f, mixer->settings.sample_rate);
}
if (mixer->effect & AUDIO_EFFECT_DISTORTION) {
apply_distortion(mixer->buffer_temp, limit, 10.0f);
}
if (mixer->effect & AUDIO_EFFECT_CHORUS) {
apply_chorus(mixer->buffer_temp, limit, 0.25f, 0.005f, mixer->settings.sample_rate);
}
if (mixer->effect & AUDIO_EFFECT_PITCH_SHIFT) {
apply_pitch_shift(mixer->buffer_temp, limit, 1.2f); // Slight pitch increase
}
if (mixer->effect & AUDIO_EFFECT_GRANULAR_DELAY) {
apply_granular_delay(mixer->buffer_temp, limit, 0.1f, 0.2f, mixer->settings.sample_rate);
}
if (mixer->effect & AUDIO_EFFECT_FM) {
apply_frequency_modulation(mixer->buffer_temp, limit, 2.0f, 0.5f, mixer->settings.sample_rate);
}
if (mixer->effect & AUDIO_EFFECT_STEREO_PANNING) {
apply_stereo_panning(mixer->buffer_temp, limit, 0.5f);
// Apply effects based on sound's effect type
if (sound->effect) {
int32 sample_adjustment = mixer_effects_stereo() / 2;;
sound_sample_index += sample_adjustment;
limit += sample_adjustment;
}
}
// @bug the actual output "limit" could be smaller if sound files end earlier and no repeat is defined
// In that case we would also have to adjust mixer->settings.sample_buffer_size
// @bug if we use speed up effect, this value could be negative. Fix.
sound->sample_index = sound_sample_index;
// Add the processed sound to the output buffer
for (uint32 j = 0; j < limit; j++) {
mixer->settings.buffer[j] += mixer->buffer_temp[j];
if (sound->channels == 1) {
// We turn it stereo here
for (uint32 j = 0; j < limit; ++j) {
mixer->settings.buffer[j * 2] += mixer->buffer_temp[j];
mixer->settings.buffer[j * 2 + 1] += mixer->buffer_temp[j];
}
} else {
for (uint32 j = 0; j < limit * 2; ++j) {
mixer->settings.buffer[j] += mixer->buffer_temp[j];
}
}
mixer->settings.sample_buffer_size = OMS_MAX(
mixer->settings.sample_buffer_size,
limit * mixer->settings.sample_size
);
}
if (mixer->effect) {
mixer_effects_stereo();
}
}

View File

@ -16,10 +16,6 @@
#define SOUND_API_XAUDIO2 1
struct AudioSetting {
// position in the audio data
// WARNING: not the byte position, but the index based on the sample size
uint32 sample_index;
f32 master_volume;
// bits per sample

View File

@ -59,11 +59,28 @@ void font_init(Font* font, byte* data, int count)
}
inline
Glyph* font_glyph_find(Font* font, uint32 codepoint)
Glyph* font_glyph_find(const Font* font, uint32 codepoint)
{
for (uint32 i = 0; i < font->glyph_count; ++i) {
if (font->glyphs[i].codepoint == codepoint) {
return &font->glyphs[i];
int32 perfect_glyph_pos = codepoint - font->glyphs[0].codepoint;
int32 limit = OMS_MIN(perfect_glyph_pos, font->glyph_count - 1);
// We try to jump to the correct glyph based on the glyph codepoint
if (font->glyphs[limit].codepoint == codepoint) {
return &font->glyphs[limit];
}
// If that doesn't work we iterate the glyph list BUT only until the last possible match.
// Glyphs must be sorted ascending.
int32 low = 0;
int32 high = limit;
while (low <= high) {
int32 mid = low + (high - low) / 2;
if (font->glyphs[mid].codepoint == codepoint) {
return &font->glyphs[mid];
} else if (font->glyphs[mid].codepoint < codepoint) {
low = mid + 1;
} else {
high = mid - 1;
}
}
@ -254,9 +271,21 @@ int32 font_to_data(
return size;
}
inline
f32 font_line_height(Font* font, f32 size)
{
return font->line_height * size / font->size;
}
inline
void font_invert_coordinates(Font* font)
{
// @todo Implement y-offset correction
for (uint32 i = 0; i < font->glyph_count; ++i) {
float temp = font->glyphs[i].coords.y1;
font->glyphs[i].coords.y1 = 1.0f - font->glyphs[i].coords.y2;
font->glyphs[i].coords.y2 = 1.0f - temp;
}
}
#endif

View File

@ -299,8 +299,6 @@ f32 text_calculate_dimensions_width(
f32 x = 0;
f32 offset_x = 0;
uint32 first_glyph = font->glyphs[0].codepoint;
// @todo remember to restrict to width/height if value > 0 -> force width to remain below certain value
for (int32 i = 0; i < length; ++i) {
@ -313,25 +311,7 @@ f32 text_calculate_dimensions_width(
continue;
}
Glyph* glyph = NULL;
// We try to jump to the correct glyph based on the glyph codepoint
// If that doesn't work we iterate the glyph list BUT only until the last possible match (glyphs must be sorted ascending)
int32 perfect_glyph_pos = character - first_glyph;
if (font->glyph_count > perfect_glyph_pos
&& font->glyphs[perfect_glyph_pos].codepoint == character
) {
glyph = &font->glyphs[perfect_glyph_pos];
} else {
// @performance consider to do binary search
for (int32 j = 0; j <= perfect_glyph_pos && j < font->glyph_count; ++j) {
if (font->glyphs[j].codepoint == character) {
glyph = &font->glyphs[j];
break;
}
}
}
Glyph* glyph = font_glyph_find(font, character);
if (!glyph) {
continue;
}
@ -353,8 +333,6 @@ void text_calculate_dimensions(
f32 offset_x = 0;
uint32 first_glyph = font->glyphs[0].codepoint;
// @todo remember to restrict to width/height if value > 0 -> force width to remain below certain value
for (int32 i = 0; i < length; ++i) {
@ -369,25 +347,7 @@ void text_calculate_dimensions(
continue;
}
Glyph* glyph = NULL;
// We try to jump to the correct glyph based on the glyph codepoint
// If that doesn't work we iterate the glyph list BUT only until the last possible match (glyphs must be sorted ascending)
int32 perfect_glyph_pos = character - first_glyph;
if (font->glyph_count > perfect_glyph_pos
&& font->glyphs[perfect_glyph_pos].codepoint == character
) {
glyph = &font->glyphs[perfect_glyph_pos];
} else {
// @performance consider to do binary search
for (int32 j = 0; j <= perfect_glyph_pos && j < font->glyph_count; ++j) {
if (font->glyphs[j].codepoint == character) {
glyph = &font->glyphs[j];
break;
}
}
}
Glyph* glyph = font_glyph_find(font, character);
if (!glyph) {
continue;
}
@ -433,10 +393,6 @@ v2_f32 vertex_text_create(
}
}
uint32 first_glyph = font->glyphs[0].codepoint;
int32 first_char = is_ascii ? text[0] : utf8_get_char_at(text, 0);
f32 offset_x = x;
for (int32 i = 0; i < length; ++i) {
int32 character = is_ascii ? text[i] : utf8_get_char_at(text, i);
@ -447,25 +403,7 @@ v2_f32 vertex_text_create(
continue;
}
Glyph* glyph = NULL;
// We try to jump to the correct glyph based on the glyph codepoint
// If that doesn't work we iterate the glyph list BUT only until the last possible match (glyphs must be sorted ascending)
int32 perfect_glyph_pos = character - first_glyph;
if (font->glyph_count > perfect_glyph_pos
&& font->glyphs[perfect_glyph_pos].codepoint == character
) {
glyph = &font->glyphs[perfect_glyph_pos];
} else {
// @performance consider to do binary search
for (uint32 j = 0; j <= perfect_glyph_pos && j < font->glyph_count; ++j) {
if (font->glyphs[j].codepoint == character) {
glyph = &font->glyphs[j];
break;
}
}
}
Glyph* glyph = font_glyph_find(font, character);
if (!glyph) {
continue;
}
@ -577,8 +515,6 @@ f32 ui_text_create(
}
}
uint32 first_glyph = theme->font.glyphs[0].codepoint;
int32 start = *index;
f32 offset_x = (f32) x->value_int;
f32 offset_y = (f32) y->value_int;
@ -594,25 +530,7 @@ f32 ui_text_create(
continue;
}
Glyph* glyph = NULL;
// We try to jump to the correct glyph based on the glyph codepoint
// If that doesn't work we iterate the glyph list BUT only until the last possible match (glyphs must be sorted ascending)
int32 perfect_glyph_pos = character - first_glyph;
if (theme->font.glyph_count > perfect_glyph_pos
&& theme->font.glyphs[perfect_glyph_pos].codepoint == character
) {
glyph = &theme->font.glyphs[perfect_glyph_pos];
} else {
// @performance consider to do binary search
for (int32 j = 0; j <= perfect_glyph_pos && j < theme->font.glyph_count; ++j) {
if (theme->font.glyphs[j].codepoint == character) {
glyph = &theme->font.glyphs[j];
break;
}
}
}
Glyph* glyph = font_glyph_find(&theme->font, character);
if (!glyph) {
continue;
}
@ -721,7 +639,7 @@ void ui_button_create(
vertex_text_create(
vertices, index, zindex,
x->value_float, y->value_float, width->value_float, height->value_float, align_h->value_float, align_v->value_float,
x->value_float, y->value_float, width->value_float, height->value_float, align_h->value_int, align_v->value_int,
&theme->font, text->value_str, size->value_float, color_index->value_float
);

View File

@ -156,7 +156,7 @@ void texture_use_1D(const Texture* texture, uint32 texture_unit)
glBindTexture(GL_TEXTURE_1D, (GLuint) texture->id);
}
GLuint shader_make(GLenum type, const char *source, RingMemory* ring)
GLuint shader_make(GLenum type, const char* source, RingMemory* ring)
{
GLuint shader = glCreateShader(type);
glShaderSource(shader, 1, (GLchar **) &source, NULL);

View File

@ -31,6 +31,7 @@ enum PixelType
// has_alpha is defined it forces an alpha channel even for bitmaps
// order_pixels defines how the pixels should be ordered
// order_rows defines how the rows should be ordered
// @question Do we really ever need int32 for width/height?
struct Image {
uint32 width;
uint32 height;

230
image/Qoi.h Normal file
View File

@ -0,0 +1,230 @@
/**
* Jingga
*
* @copyright Jingga
* @license OMS License 2.0
* @version 1.0.0
* @link https://jingga.app
*/
#ifndef TOS_IMAGE_QOI_H
#define TOS_IMAGE_QOI_H
#include "../stdlib/Types.h"
#include "../memory/RingMemory.h"
#define QOI_OP_INDEX 0b00000000
#define QOI_OP_DIFF 0b01000000
#define QOI_OP_LUMA 0b10000000
#define QOI_OP_RUN 0b11000000 // @todo There is a HUGE step from here to QOI_OP_RGB this leaves room for more cases or using this data
#define QOI_OP_RGB 0b11111110
#define QOI_OP_RGBA 0b11111111
#define QOI_MASK_2 0b11000000
#define QOI_COLOR_HASH(color) (color.r * 3 + color.g * 5 + color.b * 7 + color.a * 11)
#define QOI_COLOR_HASH_2(color) ((((uint32)(color)) * 0x9E3779B1U) >> 26)
#define QOI_HEADER_SIZE 9
// @question Do we really ever need int32 for width/height?
struct QoiDescription {
uint32 width;
uint32 height;
byte channels;
byte colorspace;
};
uint32 qoi_encode_size(QoiDescription* desc)
{
return desc->width * desc->height * (desc->channels + 1) + QOI_HEADER_SIZE;
}
int32 qoi_encode(const byte* data, byte* output, const QoiDescription* desc) {
if (desc->width == 0 || desc->height == 0 ||
desc->channels < 3 || desc->channels > 4 ||
desc->colorspace > 1
) {
return;
}
int32 p = 0;
*((uint32 *) output[p]) = SWAP_ENDIAN_LITTLE(desc->width); p += 4;
*((uint32 *) output[p]) = SWAP_ENDIAN_LITTLE(desc->height); p += 4;
// Channel count 1-4 requires 3 bits, colorspace requires 1 bit
output[p++] = ((desc->channels - 1) << 1) | (desc->colorspace & 0x01);;
v4_byte index[64];
memset(index, 0, sizeof(index));
v4_byte px_prev = {0, 0, 0, 255};
v4_byte px = px_prev;
int32 px_len = desc->width * desc->height * desc->channels;
int32 px_end = px_len - desc->channels;
int32 channels = desc->channels;
int32 run = 0;
for (int32 px_pos = 0; px_pos < px_len; px_pos += channels) {
memcpy(&px, &data[px_pos], channels * sizeof(byte));
if (px.v == px_prev.v) {
++run;
if (run == 62 || px_pos == px_end) {
output[p++] = QOI_OP_RUN | (run - 1);
run = 0;
}
} else {
if (run) {
output[p++] = QOI_OP_RUN | (run - 1);
run = 0;
}
int32 index_pos = QOI_COLOR_HASH(px) % 64;
//int32 index_pos = QOI_COLOR_HASH_2(px);
if (index[index_pos].v == px.v) {
output[p++] = QOI_OP_INDEX | index_pos;
} else {
index[index_pos] = px;
if (px.a == px_prev.a) {
signed char vr = px.r - px_prev.r;
signed char vg = px.g - px_prev.g;
signed char vb = px.b - px_prev.b;
signed char vg_r = vr - vg;
signed char vg_b = vb - vg;
if (vr > -3 && vr < 2
&& vg > -3 && vg < 2
&& vb > -3 && vb < 2
) {
output[p++] = QOI_OP_DIFF | (vr + 2) << 4 | (vg + 2) << 2 | (vb + 2);
} else if (vg_r > -9 && vg_r < 8
&& vg > -33 && vg < 32
&& vg_b > -9 && vg_b < 8
) {
output[p++] = QOI_OP_LUMA | (vg + 32);
output[p++] = (vg_r + 8) << 4 | (vg_b + 8);
} else {
output[p++] = QOI_OP_RGB;
output[p++] = px.r;
output[p++] = px.g;
output[p++] = px.b;
}
} else {
output[p++] = QOI_OP_RGBA;
*((uint32 *) &output[p]) = SWAP_ENDIAN_LITTLE(px.val);
p += 4;
}
}
}
px_prev = px;
}
return p;
}
uint32 qoi_decode_size(QoiDescription* desc, int32 channels)
{
return desc->width * desc->height * channels;
}
void qoi_decode(const byte* data, byte* output, int32 steps = 8)
{
int32 p = 0;
uint32 width = SWAP_ENDIAN_LITTLE(*((uint32 *) &data[p])); p += 4;
uint32 height = SWAP_ENDIAN_LITTLE(*((uint32 *) &data[p])); p += 4;
// Channel count 1-4 requires 3 bits, colorspace requires 1 bit
int32 colorspace = data[p] & 0x01;
uint32 channels = ((data[p] > 1) & 0x07) + 1;
uint32 px_len = width * height * channels;
v4_byte px = {0, 0, 0, 255};
v4_byte index[64];
memset(index, 0, sizeof(index));
int32 run = 0;
for (uint32 px_pos = 0; px_pos < px_len; px_pos += channels) {
int32 b1 = data[p++];
if (b1 == QOI_OP_RGB) {
px.r = data[p++];
px.g = data[p++];
px.b = data[p++];
} else if (b1 == QOI_OP_RGBA) {
px.val = SWAP_ENDIAN_LITTLE(*((uint32 *) &data[p]));
p += 4;
} else if ((b1 & QOI_MASK_2) == QOI_OP_INDEX) {
px = index[b1];
} else if ((b1 & QOI_MASK_2) == QOI_OP_DIFF) {
px.r += ((b1 >> 4) & 0x03) - 2;
px.g += ((b1 >> 2) & 0x03) - 2;
px.b += ( b1 & 0x03) - 2;
} else if ((b1 & QOI_MASK_2) == QOI_OP_LUMA) {
int32 b2 = data[p++];
int32 vg = (b1 & 0x3f) - 32;
px.r += vg - 8 + ((b2 >> 4) & 0x0f);
px.g += vg;
px.b += vg - 8 + (b2 & 0x0f);
} else if ((b1 & QOI_MASK_2) == QOI_OP_RUN) {
run = (b1 & 0x3f);
if (channels == 4) {
uint32 px_little_endian = SWAP_ENDIAN_LITTLE(px.val);
int32 pixel_step_size = steps * 4;
int32 i = 0;
if (steps == 16) {
__m512i simd_value = _mm512_set1_epi32(px_little_endian);
for(; i <= run - steps; i += steps, px_pos += pixel_step_size) {
_mm512_storeu_si512((__m512i *) &output[px_pos], simd_value);
}
} else if (steps >= 8) {
__m256i simd_value = _mm256_set1_epi32(px_little_endian);
for (; i <= run - steps; i += steps, px_pos += pixel_step_size) {
_mm256_storeu_si256((__m256i *) &output[px_pos], simd_value);
}
} else if (steps >= 4) {
__m128i simd_value = _mm_set1_epi32(px_little_endian);
for(; i <= run - steps; i += steps, px_pos += pixel_step_size) {
_mm_storeu_si128((__m128i *) &output[px_pos], simd_value);
}
}
for (; i < run; ++i) {
output[px_pos] = px_little_endian;
px_pos += channels;
}
} else if (channels == 3) {
for (int32 i = 0; i < run; ++i) {
output[px_pos++] = px.r;
output[px_pos++] = px.g;
output[px_pos++] = px.b;
}
} else if (channels == 1) {
memset(&output[px_pos], px.r, run * sizeof(byte));
px_pos += run;
}
// Correction, since the loop increments by channels count as well
px_pos -= channels;
index[QOI_COLOR_HASH(px) % 64] = px;
//index[QOI_COLOR_HASH_2(px)] = px;
continue;
}
index[QOI_COLOR_HASH(px) % 64] = px;
//index[QOI_COLOR_HASH_2(px)] = px;
memcpy(&output[px_pos], &px, channels * sizeof(byte));
}
}
#endif

View File

@ -92,7 +92,7 @@ void buffer_reset(BufferMemory* buf)
}
inline
byte* buffer_get_memory(BufferMemory* buf, uint64 size, int32 aligned = 0, bool zeroed = false)
byte* buffer_get_memory(BufferMemory* buf, uint64 size, int32 aligned = 4, bool zeroed = false)
{
ASSERT_SIMPLE(size <= buf->size);

View File

@ -34,9 +34,9 @@ struct ChunkMemory {
uint64 count;
uint64 size;
uint64 chunk_size;
int64 last_pos;
int32 alignment;
uint64 last_pos;
uint32 chunk_size;
uint32 alignment;
// length = count
// free describes which locations are used and which are free
@ -44,7 +44,7 @@ struct ChunkMemory {
};
inline
void chunk_alloc(ChunkMemory* buf, uint64 count, uint64 chunk_size, int32 alignment = 64)
void chunk_alloc(ChunkMemory* buf, uint64 count, uint32 chunk_size, int32 alignment = 64)
{
ASSERT_SIMPLE(chunk_size);
ASSERT_SIMPLE(count);
@ -58,7 +58,7 @@ void chunk_alloc(ChunkMemory* buf, uint64 count, uint64 chunk_size, int32 alignm
buf->count = count;
buf->size = count * chunk_size + sizeof(uint64) * CEIL_DIV(count, 64);
buf->chunk_size = chunk_size;
buf->last_pos = -1;
buf->last_pos = 0;
buf->alignment = alignment;
// @question Could it be beneficial to have this before the element data?
@ -70,7 +70,7 @@ void chunk_alloc(ChunkMemory* buf, uint64 count, uint64 chunk_size, int32 alignm
}
inline
void chunk_init(ChunkMemory* buf, BufferMemory* data, uint64 count, uint64 chunk_size, int32 alignment = 64)
void chunk_init(ChunkMemory* buf, BufferMemory* data, uint64 count, uint32 chunk_size, int32 alignment = 64)
{
ASSERT_SIMPLE(chunk_size);
ASSERT_SIMPLE(count);
@ -82,7 +82,7 @@ void chunk_init(ChunkMemory* buf, BufferMemory* data, uint64 count, uint64 chunk
buf->count = count;
buf->size = count * chunk_size + sizeof(uint64) * CEIL_DIV(count, 64);
buf->chunk_size = chunk_size;
buf->last_pos = -1;
buf->last_pos = 0;
buf->alignment = alignment;
// @question Could it be beneficial to have this before the element data?
@ -95,7 +95,7 @@ void chunk_init(ChunkMemory* buf, BufferMemory* data, uint64 count, uint64 chunk
}
inline
void chunk_init(ChunkMemory* buf, byte* data, uint64 count, uint64 chunk_size, int32 alignment = 64)
void chunk_init(ChunkMemory* buf, byte* data, uint64 count, uint32 chunk_size, int32 alignment = 64)
{
ASSERT_SIMPLE(chunk_size);
ASSERT_SIMPLE(count);
@ -108,7 +108,7 @@ void chunk_init(ChunkMemory* buf, byte* data, uint64 count, uint64 chunk_size, i
buf->count = count;
buf->size = count * chunk_size + sizeof(uint64) * CEIL_DIV(count, 64);
buf->chunk_size = chunk_size;
buf->last_pos = -1;
buf->last_pos = 0;
buf->alignment = alignment;
// @question Could it be beneficial to have this before the element data?
@ -320,15 +320,15 @@ int64 chunk_dump(const ChunkMemory* buf, byte* data)
data += sizeof(buf->size);
// Chunk Size
*((uint64 *) data) = SWAP_ENDIAN_LITTLE(buf->chunk_size);
*((uint32 *) data) = SWAP_ENDIAN_LITTLE(buf->chunk_size);
data += sizeof(buf->chunk_size);
// Last pos
*((int64 *) data) = SWAP_ENDIAN_LITTLE(buf->last_pos);
*((uint64 *) data) = SWAP_ENDIAN_LITTLE(buf->last_pos);
data += sizeof(buf->last_pos);
// Alignment
*((int32 *) data) = SWAP_ENDIAN_LITTLE(buf->alignment);
*((uint32 *) data) = SWAP_ENDIAN_LITTLE(buf->alignment);
data += sizeof(buf->alignment);
// All memory is handled in the buffer -> simply copy the buffer
@ -351,15 +351,15 @@ int64 chunk_load(ChunkMemory* buf, const byte* data)
data += sizeof(buf->size);
// Chunk Size
buf->chunk_size = SWAP_ENDIAN_LITTLE(*((uint64 *) data));
buf->chunk_size = SWAP_ENDIAN_LITTLE(*((uint32 *) data));
data += sizeof(buf->chunk_size);
// Last pos
buf->last_pos = SWAP_ENDIAN_LITTLE(*((int64 *) data));
buf->last_pos = SWAP_ENDIAN_LITTLE(*((uint64 *) data));
data += sizeof(buf->last_pos);
// Alignment
buf->alignment = SWAP_ENDIAN_LITTLE(*((int32 *) data));
buf->alignment = SWAP_ENDIAN_LITTLE(*((uint32 *) data));
data += sizeof(buf->alignment);
memcpy(buf->memory, data, buf->size);

View File

@ -10,6 +10,7 @@
#define TOS_MEMORY_QUEUE_H
#include "../stdlib/Types.h"
#include "../utils/Utils.h"
#include "RingMemory.h"
// WARNING: Structure needs to be the same as RingMemory
@ -81,7 +82,7 @@ bool queue_is_full(Queue* queue) {
}
inline
void queue_enqueue_unique(ThreadedQueue* queue, const byte* data)
void queue_enqueue_unique(Queue* queue, const byte* data)
{
ASSERT_SIMPLE((uint64_t) data % 4 == 0);
@ -191,7 +192,7 @@ bool queue_dequeue(Queue* queue, byte* data)
inline
bool queue_dequeue_atomic(Queue* queue, byte* data)
{
if (atomic_get_relaxed((uint64 *) &queue->head) == (uint64) queue->tail) {
if (atomic_get_acquire_release((volatile uint64 *) &queue->head) == (uint64) queue->tail) {
return false;
}

View File

@ -92,7 +92,7 @@ void ring_init(RingMemory* ring, byte* buf, uint64 size, uint32 alignment = 64)
{
ASSERT_SIMPLE(size);
ring->memory = (byte *) ROUND_TO_NEAREST((uintptr_t) buf, alignment);
ring->memory = (byte *) ROUND_TO_NEAREST((uintptr_t) buf, (uint64) alignment);
ring->end = ring->memory + size;
ring->head = ring->memory;
@ -117,7 +117,7 @@ void ring_free(RingMemory* ring)
}
inline
byte* ring_calculate_position(const RingMemory* ring, uint64 size, byte aligned = 0)
byte* ring_calculate_position(const RingMemory* ring, uint64 size, uint32 aligned = 4)
{
byte* head = ring->head;
@ -126,7 +126,7 @@ byte* ring_calculate_position(const RingMemory* ring, uint64 size, byte aligned
head += (aligned - (address & (aligned - 1))) % aligned;
}
size = ROUND_TO_NEAREST(size, aligned);
size = ROUND_TO_NEAREST(size, (uint64) aligned);
if (head + size > ring->end) {
head = ring->memory;
@ -147,7 +147,7 @@ void ring_reset(RingMemory* ring)
}
// Moves a pointer based on the size you want to consume (new position = after consuming size)
void ring_move_pointer(RingMemory* ring, byte** pos, uint64 size, byte aligned = 0)
void ring_move_pointer(RingMemory* ring, byte** pos, uint64 size, uint32 aligned = 4)
{
ASSERT_SIMPLE(size <= ring->size);
@ -160,7 +160,7 @@ void ring_move_pointer(RingMemory* ring, byte** pos, uint64 size, byte aligned =
*pos += (aligned - (address& (aligned - 1))) % aligned;
}
size = ROUND_TO_NEAREST(size, aligned);
size = ROUND_TO_NEAREST(size, (uint64) aligned);
if (*pos + size > ring->end) {
*pos = ring->memory;
@ -173,7 +173,7 @@ void ring_move_pointer(RingMemory* ring, byte** pos, uint64 size, byte aligned =
*pos += size;
}
byte* ring_get_memory(RingMemory* ring, uint64 size, byte aligned = 0, bool zeroed = false)
byte* ring_get_memory(RingMemory* ring, uint64 size, uint32 aligned = 4, bool zeroed = false)
{
ASSERT_SIMPLE(size <= ring->size);
@ -182,7 +182,7 @@ byte* ring_get_memory(RingMemory* ring, uint64 size, byte aligned = 0, bool zero
ring->head += (aligned - (address& (aligned - 1))) % aligned;
}
size = ROUND_TO_NEAREST(size, aligned);
size = ROUND_TO_NEAREST(size, (uint64) aligned);
if (ring->head + size > ring->end) {
ring_reset(ring);
@ -207,7 +207,7 @@ byte* ring_get_memory(RingMemory* ring, uint64 size, byte aligned = 0, bool zero
}
// Same as ring_get_memory but DOESN'T move the head
byte* ring_get_memory_nomove(RingMemory* ring, uint64 size, byte aligned = 0, bool zeroed = false)
byte* ring_get_memory_nomove(RingMemory* ring, uint64 size, uint32 aligned = 4, bool zeroed = false)
{
ASSERT_SIMPLE(size <= ring->size);
@ -218,7 +218,7 @@ byte* ring_get_memory_nomove(RingMemory* ring, uint64 size, byte aligned = 0, bo
pos += (aligned - (address& (aligned - 1))) % aligned;
}
size = ROUND_TO_NEAREST(size, aligned);
size = ROUND_TO_NEAREST(size, (uint64) aligned);
if (pos + size > ring->end) {
ring_reset(ring);
@ -253,11 +253,10 @@ byte* ring_get_element(const RingMemory* ring, uint64 element_count, uint64 elem
* Checks if one additional element can be inserted without overwriting the tail index
*/
inline
bool ring_commit_safe(const RingMemory* ring, uint64 size, byte aligned = 0)
bool ring_commit_safe(const RingMemory* ring, uint64 size, uint32 aligned = 4)
{
// aligned * 2 since that should be the maximum overhead for an element
// @bug could this result in a case where the ring is considered empty/full (false positive/negative)?
// The "correct" version would probably to use ring_move_pointer in some form
// This is not 100% correct BUT it is way faster than any correct version I can come up with
uint64 max_mem_required = size + aligned * 2;
if (ring->tail < ring->head) {
@ -271,15 +270,17 @@ bool ring_commit_safe(const RingMemory* ring, uint64 size, byte aligned = 0)
}
inline
bool ring_commit_safe_atomic(const RingMemory* ring, uint64 size, byte aligned = 0)
bool ring_commit_safe_atomic(const RingMemory* ring, uint64 size, uint32 aligned = 4)
{
// aligned * 2 since that should be the maximum overhead for an element
// @bug could this result in a case where the ring is considered empty/full (false positive/negative)?
// The "correct" version would probably to use ring_move_pointer in some form
// This is not 100% correct BUT it is way faster than any correct version I can come up with
uint64 max_mem_required = size + aligned * 2;
// @todo consider to switch to uintptr_t
uint64 tail = atomic_get_relaxed((uint64 *) &ring->tail);
uint64 head = atomic_get_relaxed((uint64 *) &ring->head);
// This doesn't have to be atomic since we assume single producer/consumer and a commit is performed by the consumer
uint64 head = (uint64) ring->head;
if (tail < head) {
return ((uint64) (ring->end - head)) > max_mem_required
@ -291,18 +292,6 @@ bool ring_commit_safe_atomic(const RingMemory* ring, uint64 size, byte aligned =
}
}
inline
void ring_force_head_update(const RingMemory* ring)
{
_mm_clflush(ring->head);
}
inline
void ring_force_tail_update(const RingMemory* ring)
{
_mm_clflush(ring->tail);
}
inline
int64 ring_dump(const RingMemory* ring, byte* data)
{

View File

@ -23,8 +23,8 @@ struct ThreadedChunkMemory {
uint64 count;
uint64 size;
uint64 chunk_size;
int64 last_pos;
uint32 chunk_size;
int32 alignment;
// length = count

View File

@ -51,7 +51,7 @@ struct ThreadedQueue {
};
inline
void thrd_queue_alloc(ThreadedQueue* queue, uint32 element_count, uint64 element_size, uint32 alignment = 64)
void thrd_queue_alloc(ThreadedQueue* queue, uint32 element_count, uint32 element_size, uint32 alignment = 64)
{
element_size = ROUND_TO_NEAREST(element_size, alignment);
@ -67,7 +67,7 @@ void thrd_queue_alloc(ThreadedQueue* queue, uint32 element_count, uint64 element
}
inline
void thrd_queue_init(ThreadedQueue* queue, BufferMemory* buf, uint32 element_count, uint64 element_size, uint32 alignment = 64)
void thrd_queue_init(ThreadedQueue* queue, BufferMemory* buf, uint32 element_count, uint32 element_size, uint32 alignment = 64)
{
element_size = ROUND_TO_NEAREST(element_size, alignment);
@ -83,7 +83,7 @@ void thrd_queue_init(ThreadedQueue* queue, BufferMemory* buf, uint32 element_cou
}
inline
void thrd_queue_init(ThreadedQueue* queue, byte* buf, uint32 element_count, uint64 element_size, uint32 alignment = 64)
void thrd_queue_init(ThreadedQueue* queue, byte* buf, uint32 element_count, uint32 element_size, uint32 alignment = 64)
{
element_size = ROUND_TO_NEAREST(element_size, alignment);

View File

@ -69,7 +69,7 @@ void thrd_ring_free(ThreadedRingMemory* ring)
}
inline
byte* thrd_ring_calculate_position(ThreadedRingMemory* ring, uint64 size, byte aligned = 0)
byte* thrd_ring_calculate_position(ThreadedRingMemory* ring, uint64 size, byte aligned = 4)
{
pthread_mutex_lock(&ring->mutex);
byte* result = ring_calculate_position((RingMemory *) ring, size, aligned);
@ -87,14 +87,14 @@ void thrd_ring_reset(ThreadedRingMemory* ring)
}
// Moves a pointer based on the size you want to consume (new position = after consuming size)
void thrd_ring_move_pointer(ThreadedRingMemory* ring, byte** pos, uint64 size, byte aligned = 0)
void thrd_ring_move_pointer(ThreadedRingMemory* ring, byte** pos, uint64 size, byte aligned = 4)
{
pthread_mutex_lock(&ring->mutex);
ring_move_pointer((RingMemory *) ring, pos, size, aligned);
pthread_mutex_unlock(&ring->mutex);
}
byte* thrd_ring_get_memory(ThreadedRingMemory* ring, uint64 size, byte aligned = 0, bool zeroed = false)
byte* thrd_ring_get_memory(ThreadedRingMemory* ring, uint64 size, byte aligned = 4, bool zeroed = false)
{
pthread_mutex_lock(&ring->mutex);
byte* result = ring_get_memory((RingMemory *) ring, size, aligned, zeroed);
@ -104,7 +104,7 @@ byte* thrd_ring_get_memory(ThreadedRingMemory* ring, uint64 size, byte aligned =
}
// Same as ring_get_memory but DOESN'T move the head
byte* thrd_ring_get_memory_nomove(ThreadedRingMemory* ring, uint64 size, byte aligned = 0, bool zeroed = false)
byte* thrd_ring_get_memory_nomove(ThreadedRingMemory* ring, uint64 size, byte aligned = 4, bool zeroed = false)
{
pthread_mutex_lock(&ring->mutex);
byte* result = ring_get_memory_nomove((RingMemory *) ring, size, aligned, zeroed);
@ -129,7 +129,7 @@ byte* thrd_ring_get_element(ThreadedRingMemory* ring, uint64 element_count, uint
* Checks if one additional element can be inserted without overwriting the tail index
*/
inline
bool thrd_ring_commit_safe(ThreadedRingMemory* ring, uint64 size, byte aligned = 0)
bool thrd_ring_commit_safe(ThreadedRingMemory* ring, uint64 size, byte aligned = 4)
{
pthread_mutex_lock(&ring->mutex);
bool result = ring_commit_safe((RingMemory *) ring, size, aligned);

View File

@ -23,9 +23,9 @@
#include "../utils/StringUtils.h"
#if __aarch64__
#include "../../../GameEngine/stdlib/sve/SVE_I32.h"
#include "../stdlib/sve/SVE_I32.h"
#else
#include "../../../GameEngine/stdlib/simd/SIMD_I32.h"
#include "../stdlib/simd/SIMD_I32.h"
#endif
#define MESH_VERSION 1
@ -178,7 +178,7 @@ void mesh_from_file_txt(
continue;
}
// NOTE: we always load a file in the format: POSITON + NORMAL + TEXTURE + COLOR
// NOTE: we always load a file in the format: POSITION + NORMAL + TEXTURE + COLOR
// EVEN if some of the data is missing. This is necessary to keep the memory kinda in line.
// The actual binary file later will have the minimized layout.
@ -558,7 +558,7 @@ int32 mesh_data_size(const Mesh* mesh)
int32 mesh_to_data(
const Mesh* mesh,
byte* data,
int32 vertex_save_format = VERTEX_TYPE_ALL,
uint32 vertex_save_format = VERTEX_TYPE_ALL,
int32 steps = 8
)
{

View File

@ -36,7 +36,7 @@
#include "../image/Image.h"
struct Texture {
uint64 id;
uint32 id;
byte sample_id;
// @question Should the texture hold the texture unit?

View File

@ -148,11 +148,12 @@ uint32 audio_buffer_fillable(const AudioSetting* setting, const DirectSoundSetti
return 0;
}
DWORD bytes_to_lock = (setting->sample_index * setting->sample_size) % setting->buffer_size;
DWORD bytes_to_lock = setting->sample_buffer_size;
DWORD bytes_to_write = 0;
DWORD target_cursor = (player_cursor + (setting->latency * setting->sample_size)) % setting->buffer_size;
// @bug Why does this case even exist?
if (bytes_to_lock == player_cursor) {
// @bug What if just started?
bytes_to_write = 0;
@ -179,7 +180,7 @@ void audio_play_buffer(AudioSetting* setting, DirectSoundSetting* api_setting)
void* region2;
DWORD region2_size;
DWORD bytes_to_lock = (setting->sample_index * setting->sample_size) % setting->buffer_size;
DWORD bytes_to_lock = setting->sample_buffer_size;
api_setting->secondary_buffer->Lock(
bytes_to_lock, setting->sample_buffer_size,
@ -204,8 +205,6 @@ void audio_play_buffer(AudioSetting* setting, DirectSoundSetting* api_setting)
api_setting->secondary_buffer->Unlock(region1, region1_size, region2, region2_size);
// @question Do we want to keep this here or move it to the audio mixer?
setting->sample_index += setting->sample_buffer_size / setting->sample_size;
setting->sample_buffer_size = 0;
}

View File

@ -106,8 +106,6 @@ void audio_load(HWND hwnd, AudioSetting* setting, XAudio2Setting* api_setting) {
api_setting->internal_buffer[1].LoopLength = 0;
api_setting->internal_buffer[1].LoopCount = 0;
api_setting->internal_buffer[1].pContext = NULL;
setting->sample_index = 0;
}
inline
@ -117,10 +115,7 @@ void audio_play(AudioSetting* setting, XAudio2Setting* api_setting) {
}
api_setting->source_voice->Start(0, XAUDIO2_COMMIT_NOW);
if (setting->sample_index > 1) {
setting->sample_index = 0;
}
setting->sample_index = 0;
}
inline
@ -199,7 +194,9 @@ void audio_play_buffer(AudioSetting* setting, XAudio2Setting* api_setting) {
}
++setting->sample_output;
setting->sample_index += setting->sample_buffer_size / setting->sample_size;
// @performance Why do I even need this?
//setting->sample_index += setting->sample_buffer_size / setting->sample_size;
setting->sample_buffer_size = 0;
}

View File

@ -12,54 +12,55 @@
#include <windows.h>
#include "../../../stdlib/Types.h"
// WARNING: Windows doesn't really support all the relaxed implementations, we therefore often use acquire as alternative.
// WARNING: Windows doesn't really have relaxed, release, acquire function on x86_64.
// You can see that by checking out how they are defined
inline
void atomic_set_relaxed(void** target, void* new_pointer)
{
InterlockedExchangePointerAcquire(target, new_pointer);
InterlockedExchangePointerNoFence(target, new_pointer);
}
inline
void* atomic_get_relaxed(void** target)
{
return InterlockedCompareExchangePointer(target, NULL, NULL);
return InterlockedCompareExchangePointerNoFence(target, NULL, NULL);
}
inline
void atomic_set_relaxed(volatile int32* value, int32 new_value)
{
InterlockedExchangeAcquire((long *) value, new_value);
InterlockedExchangeNoFence((long *) value, new_value);
}
inline
void atomic_set_relaxed(volatile int64* value, int64 new_value)
{
InterlockedExchangeAcquire((long *) value, (long) new_value);
InterlockedExchangeNoFence64((LONG64 *) value, (LONG64) new_value);
}
inline
void atomic_set_relaxed(volatile f32* value, f32 new_value)
{
InterlockedExchangeAcquire((long *) value, (long) new_value);
InterlockedExchangeNoFence((long *) value, (long) new_value);
}
inline
void atomic_set_relaxed(volatile f64* value, f64 new_value)
{
InterlockedExchangeAcquire((long *) value, (long) new_value);
InterlockedExchangeNoFence64((LONG64 *) value, (LONG64) new_value);
}
inline
int32 atomic_fetch_set_relaxed(volatile int32* value, int32 new_value)
{
return (int32) InterlockedExchangeAcquire((long *) value, new_value);
return (int32) InterlockedExchangeNoFence((long *) value, new_value);
}
inline
int64 atomic_fetch_set_relaxed(volatile int64* value, int64 new_value)
{
return (int64) InterlockedExchangeAcquire((long *) value, (long) new_value);
return (int64) InterlockedExchangeNoFence64((LONG64 *) value, (LONG64) new_value);
}
inline
@ -86,25 +87,25 @@ void atomic_set_relaxed(volatile byte* value, const byte new_value[16])
inline
int32 atomic_get_relaxed(volatile int32* value)
{
return (int32) InterlockedCompareExchangeAcquire((long *) value, 0, 0);
return (int32) InterlockedCompareExchangeNoFence((long *) value, 0, 0);
}
inline
int64 atomic_get_relaxed(volatile int64* value)
{
return (int64) InterlockedCompareExchangeAcquire((long *) value, 0, 0);
return (int64) InterlockedCompareExchangeNoFence64((LONG64 *) value, 0, 0);
}
inline
f32 atomic_get_relaxed(volatile f32* value)
{
return (f32) InterlockedCompareExchangeAcquire((long *) value, 0, 0);
return (f32) InterlockedCompareExchangeNoFence((long *) value, 0, 0);
}
inline
f64 atomic_get_relaxed(volatile f64* value)
{
return (f64) InterlockedCompareExchangeAcquire((long *) value, 0, 0);
return (f64) InterlockedCompareExchangeNoFence64((LONG64 *) value, 0, 0);
}
inline
@ -116,79 +117,79 @@ void atomic_get_relaxed(volatile byte* value, byte data[16])
inline
void atomic_increment_relaxed(volatile int32* value)
{
InterlockedIncrementAcquire((long *) value);
InterlockedIncrementNoFence((long *) value);
}
inline
void atomic_decrement_relaxed(volatile int32* value)
{
InterlockedDecrementAcquire((long *) value);
InterlockedDecrementNoFence((long *) value);
}
inline
void atomic_increment_relaxed(volatile int64* value)
{
InterlockedIncrementAcquire((long *) value);
InterlockedIncrementNoFence64((LONG64 *) value);
}
inline
void atomic_decrement_relaxed(volatile int64* value)
{
InterlockedDecrementAcquire((long *) value);
InterlockedDecrementNoFence64((LONG64 *) value);
}
inline
void atomic_add_relaxed(volatile int32* value, int32 increment)
{
InterlockedAddAcquire((long *) value, increment);
InterlockedAddNoFence((long *) value, increment);
}
inline
void atomic_sub_relaxed(volatile int32* value, int32 decrement)
{
InterlockedAddAcquire((long *) value, -decrement);
InterlockedAddNoFence((long *) value, -decrement);
}
inline
void atomic_add_relaxed(volatile int64* value, int64 increment)
{
InterlockedAddAcquire((long *) value, (long) increment);
InterlockedAddNoFence64((LONG64 *) value, (LONG64) increment);
}
inline
void atomic_sub_relaxed(volatile int64* value, int64 decrement)
{
InterlockedAddAcquire((long *) value, -1 * ((long) decrement));
InterlockedAddNoFence64((LONG64 *) value, -((LONG64) decrement));
}
inline
f32 atomic_compare_exchange_weak_relaxed(volatile f32* value, f32* expected, f32 desired)
{
return (f32) InterlockedCompareExchangeRelease((long *) value, (long) desired, (long) *expected);
return (f32) InterlockedCompareExchangeNoFence((long *) value, (long) desired, (long) *expected);
}
inline
f64 atomic_compare_exchange_weak_relaxed(volatile f64* value, f64* expected, f64 desired)
{
return (f64) InterlockedCompareExchangeRelease((long *) value, (long) desired, (long) *expected);
return (f64) InterlockedCompareExchangeNoFence64((LONG64 *) value, (LONG64) desired, (LONG64) *expected);
}
inline
int32 atomic_compare_exchange_weak_relaxed(volatile int32* value, int32* expected, int32 desired)
{
return (int32) InterlockedCompareExchangeRelease((long *) value, desired, *expected);
return (int32) InterlockedCompareExchangeNoFence((long *) value, desired, *expected);
}
inline
int64 atomic_compare_exchange_weak_relaxed(volatile int64* value, int64* expected, int64 desired)
{
return (int64) InterlockedCompareExchangeRelease((long *) value, (long) desired, (long) *expected);
return (int64) InterlockedCompareExchangeNoFence64((LONG64 *) value, (LONG64) desired, (LONG64) *expected);
}
inline
int32 atomic_fetch_add_relaxed(volatile int32* value, int32 operand)
{
return (int32) InterlockedExchangeAddRelease((long *) value, operand);
return (int32) InterlockedExchangeAddNoFence((long *) value, operand);
}
inline
@ -200,115 +201,115 @@ int32 atomic_fetch_sub_relaxed(volatile int32* value, int32 operand)
inline
int64 atomic_fetch_add_relaxed(volatile int64* value, int64 operand)
{
return (int64) InterlockedExchangeAddRelease((long *) value, (long) operand);
return (int64) InterlockedExchangeAddNoFence64((LONG64 *) value, (LONG64) operand);
}
inline
int64 atomic_fetch_sub_relaxed(volatile int64* value, int64 operand)
{
return (int64) InterlockedExchangeSubtract((unsigned long *) value, (long) operand);
return (int64) InterlockedExchangeAdd64((LONG64 *) value, -((LONG64) operand));
}
inline
void atomic_set_relaxed(volatile uint32* value, uint32 new_value)
{
InterlockedExchangeAcquire((long *) value, new_value);
InterlockedExchangeNoFence((long *) value, new_value);
}
inline
void atomic_set_relaxed(volatile uint64* value, uint64 new_value)
{
InterlockedExchangeAcquire((long *) value, (long) new_value);
InterlockedExchangeNoFence64((LONG64 *) value, (LONG64) new_value);
}
inline
uint32 atomic_fetch_set_relaxed(volatile uint32* value, uint32 new_value)
{
return (uint32) InterlockedExchangeAcquire((long *) value, new_value);
return (uint32) InterlockedExchangeNoFence((long *) value, new_value);
}
inline
uint64 atomic_fetch_set_relaxed(volatile uint64* value, uint64 new_value)
{
return (uint64) InterlockedExchangeAcquire((long *) value, (long) new_value);
return (uint64) InterlockedExchangeNoFence64((LONG64 *) value, (LONG64) new_value);
}
inline
uint32 atomic_get_relaxed(volatile uint32* value)
{
return (uint32) InterlockedCompareExchangeAcquire((long *) value, 0, 0);
return (uint32) InterlockedCompareExchangeNoFence((long *) value, 0, 0);
}
inline
uint64 atomic_get_relaxed(volatile uint64* value)
{
return (uint64) InterlockedCompareExchangeAcquire((long *) value, 0, 0);
return (uint64) InterlockedCompareExchangeNoFence64((LONG64 *) value, 0, 0);
}
inline
void atomic_increment_relaxed(volatile uint32* value)
{
InterlockedIncrementRelease((long *) value);
InterlockedIncrementNoFence((long *) value);
}
inline
void atomic_decrement_relaxed(volatile uint32* value)
{
InterlockedDecrementRelease((long *) value);
InterlockedDecrementNoFence((long *) value);
}
inline
void atomic_increment_relaxed(volatile uint64* value)
{
InterlockedIncrementRelease((long *) value);
InterlockedIncrementNoFence64((LONG64 *) value);
}
inline
void atomic_decrement_relaxed(volatile uint64* value)
{
InterlockedDecrementRelease((long *) value);
InterlockedDecrementNoFence64((LONG64 *) value);
}
inline
void atomic_add_relaxed(volatile uint32* value, uint32 increment)
{
InterlockedAddRelease((long *) value, increment);
InterlockedAddNoFence((long *) value, increment);
}
inline
void atomic_sub_relaxed(volatile uint32* value, uint32 decrement)
{
InterlockedAddRelease((long *) value, -1 * ((int32) decrement));
InterlockedAddNoFence((long *) value, -1 * ((int32) decrement));
}
inline
void atomic_add_relaxed(volatile uint64* value, uint64 increment)
{
InterlockedAddRelease((long *) value, (long) increment);
InterlockedAddNoFence64((LONG64 *) value, (LONG64) increment);
}
inline
void atomic_sub_relaxed(volatile uint64* value, uint64 decrement)
{
InterlockedAddRelease((long *) value, -1 * ((long) decrement));
InterlockedAddNoFence64((LONG64 *) value, -((LONG64) decrement));
}
inline
uint32 atomic_compare_exchange_weak_relaxed(volatile uint32* value, uint32* expected, uint32 desired)
{
return (uint32) InterlockedCompareExchangeAcquire((long *) value, desired, *expected);
return (uint32) InterlockedCompareExchangeNoFence((long *) value, desired, *expected);
}
inline
uint64 atomic_compare_exchange_weak_relaxed(volatile uint64* value, uint64* expected, uint64 desired)
{
return (uint64) InterlockedCompareExchangeAcquire((unsigned long long *) value, (unsigned long long) desired, (unsigned long long) *expected);
return (uint64) InterlockedCompareExchangeNoFence64((LONG64 *) value, (LONG64) desired, (LONG64) *expected);
}
inline
uint32 atomic_fetch_add_relaxed(volatile uint32* value, uint32 operand)
{
return (uint32) InterlockedExchangeAddRelease((long *) value, operand);
return (uint32) InterlockedExchangeAddNoFence((long *) value, operand);
}
inline
@ -320,61 +321,61 @@ uint32 atomic_fetch_sub_relaxed(volatile uint32* value, uint32 operand)
inline
uint64 atomic_fetch_add_relaxed(volatile uint64* value, uint64 operand)
{
return (uint64) InterlockedExchangeAddRelease((long *) value, (long) operand);
return (uint64) InterlockedExchangeAddNoFence64((LONG64 *) value, (LONG64) operand);
}
inline
uint64 atomic_fetch_sub_relaxed(volatile uint64* value, uint64 operand)
{
return (uint64) InterlockedExchangeSubtract((unsigned long *) value, (long) operand);
return (uint64) InterlockedExchangeAdd64((LONG64 *) value, -((LONG64) operand));
}
inline
void atomic_and_relaxed(volatile uint32* value, uint32 mask)
{
InterlockedAndRelease((volatile LONG *) value, mask);
InterlockedAndNoFence((volatile LONG *) value, mask);
}
inline
void atomic_and_relaxed(volatile int32* value, int32 mask)
{
InterlockedAndRelease((volatile LONG *) value, (LONG)mask);
InterlockedAndNoFence((volatile LONG *) value, (LONG)mask);
}
inline
void atomic_and_relaxed(volatile uint64* value, uint64 mask)
{
InterlockedAnd64Release((volatile LONG64 *) value, mask);
InterlockedAnd64NoFence((volatile LONG64 *) value, mask);
}
inline
void atomic_and_relaxed(volatile int64* value, int64 mask)
{
InterlockedAnd64Release((volatile LONG64 *) value, mask);
InterlockedAnd64NoFence((volatile LONG64 *) value, mask);
}
inline
void atomic_or_relaxed(volatile uint32* value, uint32 mask)
{
InterlockedOrRelease((volatile LONG *) value, mask);
InterlockedOrNoFence((volatile LONG *) value, mask);
}
inline
void atomic_or_relaxed(volatile int32* value, int32 mask)
{
InterlockedOrRelease((volatile LONG *) value, (LONG)mask);
InterlockedOrNoFence((volatile LONG *) value, (LONG)mask);
}
inline
void atomic_or_relaxed(volatile uint64* value, uint64 mask)
{
InterlockedOr64Release((volatile LONG64 *) value, mask);
InterlockedOr64NoFence((volatile LONG64 *) value, mask);
}
inline
void atomic_or_relaxed(volatile int64* value, int64 mask)
{
InterlockedOr64Release((volatile LONG64 *) value, mask);
InterlockedOr64NoFence((volatile LONG64 *) value, mask);
}
inline
@ -398,7 +399,7 @@ void atomic_set_acquire(volatile int32* value, int32 new_value)
inline
void atomic_set_acquire(volatile int64* value, int64 new_value)
{
InterlockedExchangeAcquire((long *) value, (long) new_value);
InterlockedExchangeAcquire64((LONG64 *) value, (LONG64) new_value);
}
inline
@ -410,7 +411,7 @@ void atomic_set_acquire(volatile f32* value, f32 new_value)
inline
void atomic_set_acquire(volatile f64* value, f64 new_value)
{
InterlockedExchangeAcquire((long *) value, (long) new_value);
InterlockedExchangeAcquire64((LONG64 *) value, (LONG64) new_value);
}
inline
@ -422,7 +423,7 @@ int32 atomic_fetch_set_acquire(volatile int32* value, int32 new_value)
inline
int64 atomic_fetch_set_acquire(volatile int64* value, int64 new_value)
{
return (int64) InterlockedExchangeAcquire((long *) value, (long) new_value);
return (int64) InterlockedExchangeAcquire64((LONG64 *) value, (LONG64) new_value);
}
inline
@ -467,7 +468,7 @@ f32 atomic_get_acquire(volatile f32* value)
inline
f64 atomic_get_acquire(volatile f64* value)
{
return (f64) InterlockedCompareExchangeAcquire((long *) value, 0, 0);
return (f64) InterlockedCompareExchangeAcquire64((LONG64 *) value, 0, 0);
}
inline
@ -533,7 +534,7 @@ f32 atomic_compare_exchange_weak_acquire(volatile f32* value, f32* expected, f32
inline
f64 atomic_compare_exchange_weak_acquire(volatile f64* value, f64* expected, f64 desired)
{
return (f64) InterlockedCompareExchangeAcquire((long *) value, (long) desired, (long) *expected);
return (f64) InterlockedCompareExchangeAcquire64((LONG64 *) value, (LONG64) desired, (LONG64) *expected);
}
inline
@ -545,7 +546,7 @@ int32 atomic_compare_exchange_weak_acquire(volatile int32* value, int32* expecte
inline
int64 atomic_compare_exchange_weak_acquire(volatile int64* value, int64* expected, int64 desired)
{
return (int64) InterlockedCompareExchangeAcquire((long *) value, (long) desired, (long) *expected);
return (int64) InterlockedCompareExchangeAcquire64((LONG64 *) value, (LONG64) desired, (LONG64) *expected);
}
inline
@ -563,13 +564,13 @@ int32 atomic_fetch_sub_acquire(volatile int32* value, int32 operand)
inline
int64 atomic_fetch_add_acquire(volatile int64* value, int64 operand)
{
return (int64) InterlockedExchangeSubtract((unsigned long *) value, operand);
return (int64) InterlockedExchangeAdd64((LONG64 *) value, -((LONG64) operand));
}
inline
int64 atomic_fetch_sub_acquire(volatile int64* value, int64 operand)
{
return (int64) InterlockedExchangeSubtract((unsigned long *) value, (long) operand);
return (int64) InterlockedExchangeAdd64((LONG64 *) value, -((LONG64) operand));
}
inline
@ -581,7 +582,7 @@ void atomic_set_acquire(volatile uint32* value, uint32 new_value)
inline
void atomic_set_acquire(volatile uint64* value, uint64 new_value)
{
InterlockedExchangeAcquire((long *) value, (long) new_value);
InterlockedExchangeAcquire64((LONG64 *) value, (LONG64) new_value);
}
inline
@ -593,7 +594,7 @@ uint32 atomic_fetch_set_acquire(volatile uint32* value, uint32 new_value)
inline
uint64 atomic_fetch_set_acquire(volatile uint64* value, uint64 new_value)
{
return (uint64) InterlockedExchangeAcquire((long *) value, (long) new_value);
return (uint64) InterlockedExchangeAcquire64((LONG64 *) value, (LONG64) new_value);
}
inline
@ -605,7 +606,7 @@ uint32 atomic_get_acquire(volatile uint32* value)
inline
uint64 atomic_get_acquire(volatile uint64* value)
{
return (uint64) InterlockedCompareExchangeAcquire((long *) value, 0, 0);
return (uint64) InterlockedCompareExchangeAcquire64((LONG64 *) value, 0, 0);
}
inline
@ -623,13 +624,13 @@ void atomic_decrement_acquire(volatile uint32* value)
inline
void atomic_increment_acquire(volatile uint64* value)
{
InterlockedIncrementAcquire((long *) value);
InterlockedIncrementAcquire64((LONG64 *) value);
}
inline
void atomic_decrement_acquire(volatile uint64* value)
{
InterlockedDecrementAcquire((long *) value);
InterlockedDecrementAcquire64((LONG64 *) value);
}
inline
@ -647,13 +648,13 @@ void atomic_sub_acquire(volatile uint32* value, uint32 decrement)
inline
void atomic_add_acquire(volatile uint64* value, uint64 increment)
{
InterlockedAddAcquire((long *) value, (long) increment);
InterlockedAddAcquire64((LONG64 *) value, (LONG64) increment);
}
inline
void atomic_sub_acquire(volatile uint64* value, uint64 decrement)
{
InterlockedAddAcquire((long *) value, -1 * ((long) decrement));
InterlockedAddAcquire64((LONG64 *) value, -((LONG64) decrement));
}
inline
@ -665,7 +666,7 @@ uint32 atomic_compare_exchange_weak_acquire(volatile uint32* value, uint32* expe
inline
uint64 atomic_compare_exchange_weak_acquire(volatile uint64* value, uint64* expected, uint64 desired)
{
return (uint64) InterlockedCompareExchangeAcquire((unsigned long long *) value, (unsigned long long) desired, (unsigned long long) *expected);
return (uint64) InterlockedCompareExchangeAcquire64((LONG64 *) value, (LONG64) desired, (LONG64) *expected);
}
inline
@ -683,13 +684,13 @@ uint32 atomic_fetch_sub_acquire(volatile uint32* value, uint32 operand)
inline
uint64 atomic_fetch_add_acquire(volatile uint64* value, uint64 operand)
{
return (uint64) InterlockedExchangeAddAcquire((long *) value, (long) operand);
return (uint64) InterlockedExchangeAddAcquire64((LONG64 *) value, (LONG64) operand);
}
inline
uint64 atomic_fetch_sub_acquire(volatile uint64* value, uint64 operand)
{
return (uint64) InterlockedExchangeSubtract((unsigned long *) value, (long) operand);
return (uint64) InterlockedExchangeAdd64((LONG64 *) value, -((LONG64) operand));
}
inline
@ -761,7 +762,7 @@ void atomic_set_release(volatile int32* value, int32 new_value)
inline
void atomic_set_release(volatile int64* value, int64 new_value)
{
InterlockedExchange((long *) value, (long) new_value);
InterlockedExchange64((LONG64 *) value, (LONG64) new_value);
}
inline
@ -773,7 +774,7 @@ void atomic_set_release(volatile f32* value, f32 new_value)
inline
void atomic_set_release(volatile f64* value, f64 new_value)
{
InterlockedExchange((long *) value, (long) new_value);
InterlockedExchange64((LONG64 *) value, (LONG64) new_value);
}
inline
@ -785,7 +786,7 @@ int32 atomic_fetch_set_release(volatile int32* value, int32 new_value)
inline
int64 atomic_fetch_set_release(volatile int64* value, int64 new_value)
{
return (int64) InterlockedExchange((long *) value, (long) new_value);
return (int64) InterlockedExchange64((LONG64 *) value, (LONG64) new_value);
}
inline
@ -830,7 +831,7 @@ f32 atomic_get_release(volatile f32* value)
inline
f64 atomic_get_release(volatile f64* value)
{
return (f64) InterlockedCompareExchangeRelease((long *) value, 0, 0);
return (f64) InterlockedCompareExchangeRelease64((LONG64 *) value, 0, 0);
}
inline
@ -854,13 +855,13 @@ void atomic_decrement_release(volatile int32* value)
inline
void atomic_increment_release(volatile int64* value)
{
InterlockedIncrementRelease((long *) value);
InterlockedIncrementRelease64((LONG64 *) value);
}
inline
void atomic_decrement_release(volatile int64* value)
{
InterlockedDecrementRelease((long *) value);
InterlockedDecrementRelease64((LONG64 *) value);
}
inline
@ -878,13 +879,13 @@ void atomic_sub_release(volatile int32* value, int32 decrement)
inline
void atomic_add_release(volatile int64* value, int64 increment)
{
InterlockedAddRelease((long *) value, (long) increment);
InterlockedAddRelease64((LONG64 *) value, (LONG64) increment);
}
inline
void atomic_sub_release(volatile int64* value, int64 decrement)
{
InterlockedAddRelease((long *) value, -1 * ((long) decrement));
InterlockedAddRelease64((LONG64 *) value, -((LONG64) decrement));
}
inline
@ -896,7 +897,7 @@ f32 atomic_compare_exchange_weak_release(volatile f32* value, f32* expected, f32
inline
f64 atomic_compare_exchange_weak_release(volatile f64* value, f64* expected, f64 desired)
{
return (f64) InterlockedCompareExchangeRelease((long *) value, (long) desired, (long) *expected);
return (f64) InterlockedCompareExchangeRelease64((LONG64 *) value, (LONG64) desired, (LONG64) *expected);
}
inline
@ -908,7 +909,7 @@ int32 atomic_compare_exchange_weak_release(volatile int32* value, int32* expecte
inline
int64 atomic_compare_exchange_weak_release(volatile int64* value, int64* expected, int64 desired)
{
return (int64) InterlockedCompareExchangeRelease((long *) value, (long) desired, (long) *expected);
return (int64) InterlockedCompareExchangeRelease64((LONG64 *) value, (LONG64) desired, (LONG64) *expected);
}
inline
@ -926,13 +927,13 @@ int32 atomic_fetch_sub_release(volatile int32* value, int32 operand)
inline
int64 atomic_fetch_add_release(volatile int64* value, int64 operand)
{
return (int64) InterlockedExchangeSubtract((unsigned long *) value, operand);
return (int64) InterlockedExchangeAdd64((LONG64 *) value, -((LONG64) operand));
}
inline
int64 atomic_fetch_sub_release(volatile int64* value, int64 operand)
{
return (int64) InterlockedExchangeSubtract((unsigned long *) value, (long) operand);
return (int64) InterlockedExchangeAdd64((LONG64 *) value, -((LONG64) operand));
}
inline
@ -944,7 +945,7 @@ void atomic_set_release(volatile uint32* value, uint32 new_value)
inline
void atomic_set_release(volatile uint64* value, uint64 new_value)
{
InterlockedExchange((long *) value, (long) new_value);
InterlockedExchange64((LONG64 *) value, (LONG64) new_value);
}
inline
@ -956,7 +957,7 @@ uint32 atomic_fetch_set_release(volatile uint32* value, uint32 new_value)
inline
uint64 atomic_fetch_set_release(volatile uint64* value, uint64 new_value)
{
return (uint64) InterlockedExchange((long *) value, (long) new_value);
return (uint64) InterlockedExchange64((LONG64 *) value, (LONG64) new_value);
}
inline
@ -968,7 +969,7 @@ uint32 atomic_get_release(volatile uint32* value)
inline
uint64 atomic_get_release(volatile uint64* value)
{
return (uint64) InterlockedCompareExchangeRelease((long *) value, 0, 0);
return (uint64) InterlockedCompareExchangeRelease64((LONG64 *) value, 0, 0);
}
inline
@ -986,13 +987,13 @@ void atomic_decrement_release(volatile uint32* value)
inline
void atomic_increment_release(volatile uint64* value)
{
InterlockedIncrementRelease((long *) value);
InterlockedIncrementRelease64((LONG64 *) value);
}
inline
void atomic_decrement_release(volatile uint64* value)
{
InterlockedDecrementRelease((long *) value);
InterlockedDecrementRelease64((LONG64 *) value);
}
inline
@ -1010,13 +1011,13 @@ void atomic_sub_release(volatile uint32* value, uint32 decrement)
inline
void atomic_add_release(volatile uint64* value, uint64 increment)
{
InterlockedAddRelease((long *) value, (long) increment);
InterlockedAddRelease64((LONG64 *) value, (LONG64) increment);
}
inline
void atomic_sub_release(volatile uint64* value, uint64 decrement)
{
InterlockedAddRelease((long *) value, -1 * ((long) decrement));
InterlockedAddRelease64((LONG64 *) value, -((LONG64) decrement));
}
inline
@ -1028,7 +1029,7 @@ uint32 atomic_compare_exchange_weak_release(volatile uint32* value, uint32* expe
inline
uint64 atomic_compare_exchange_weak_release(volatile uint64* value, uint64* expected, uint64 desired)
{
return (uint64) InterlockedCompareExchangeRelease((unsigned long long *) value, (unsigned long long) desired, (unsigned long long) *expected);
return (uint64) InterlockedCompareExchangeRelease64((LONG64 *) value, (LONG64) desired, (LONG64) *expected);
}
inline
@ -1046,13 +1047,13 @@ uint32 atomic_fetch_sub_release(volatile uint32* value, uint32 operand)
inline
uint64 atomic_fetch_add_release(volatile uint64* value, uint64 operand)
{
return (uint64) InterlockedExchangeAddRelease((long *) value, (long) operand);
return (uint64) InterlockedExchangeAddRelease64((LONG64 *) value, (LONG64) operand);
}
inline
uint64 atomic_fetch_sub_release(volatile uint64* value, uint64 operand)
{
return (uint64) InterlockedExchangeSubtract((unsigned long *) value, (long) operand);
return (uint64) InterlockedExchangeAdd64((LONG64 *) value, -((LONG64) operand));
}
inline
@ -1124,7 +1125,7 @@ void atomic_set_acquire_release(volatile int32* value, int32 new_value)
inline
void atomic_set_acquire_release(volatile int64* value, int64 new_value)
{
InterlockedExchange((long *) value, (long) new_value);
InterlockedExchange64((LONG64 *) value, (LONG64) new_value);
}
inline
@ -1136,7 +1137,7 @@ void atomic_set_acquire_release(volatile f32* value, f32 new_value)
inline
void atomic_set_acquire_release(volatile f64* value, f64 new_value)
{
InterlockedExchange((long *) value, (long) new_value);
InterlockedExchange64((LONG64 *) value, (LONG64) new_value);
}
inline
@ -1148,7 +1149,7 @@ int32 atomic_fetch_set_acquire_release(volatile int32* value, int32 new_value)
inline
int64 atomic_fetch_set_acquire_release(volatile int64* value, int64 new_value)
{
return (int64) InterlockedExchange((long *) value, (long) new_value);
return (int64) InterlockedExchange64((LONG64 *) value, (LONG64) new_value);
}
inline
@ -1181,7 +1182,7 @@ int32 atomic_get_acquire_release(volatile int32* value)
inline
int64 atomic_get_acquire_release(volatile int64* value)
{
return (int64) InterlockedCompareExchange((long *) value, 0, 0);
return (int64) InterlockedCompareExchange64((LONG64 *) value, 0, 0);
}
inline
@ -1193,7 +1194,7 @@ f32 atomic_get_acquire_release(volatile f32* value)
inline
f64 atomic_get_acquire_release(volatile f64* value)
{
return (f64) InterlockedCompareExchange((long *) value, 0, 0);
return (f64) InterlockedCompareExchange64((LONG64 *) value, 0, 0);
}
inline
@ -1217,13 +1218,13 @@ void atomic_decrement_acquire_release(volatile int32* value)
inline
void atomic_increment_acquire_release(volatile int64* value)
{
InterlockedIncrement((long *) value);
InterlockedIncrement64((LONG64 *) value);
}
inline
void atomic_decrement_acquire_release(volatile int64* value)
{
InterlockedDecrement((long *) value);
InterlockedDecrement64((LONG64 *) value);
}
inline
@ -1241,13 +1242,13 @@ void atomic_sub_acquire_release(volatile int32* value, int32 decrement)
inline
void atomic_add_acquire_release(volatile int64* value, int64 increment)
{
InterlockedAdd((long *) value, (long) increment);
InterlockedAdd64((LONG64 *) value, (LONG64) increment);
}
inline
void atomic_sub_acquire_release(volatile int64* value, int64 decrement)
{
InterlockedAdd((long *) value, -1 * ((long) decrement));
InterlockedAdd64((LONG64 *) value, -((LONG64) decrement));
}
inline
@ -1259,7 +1260,7 @@ f32 atomic_compare_exchange_weak_acquire_release(volatile f32* value, f32* expec
inline
f64 atomic_compare_exchange_weak_acquire_release(volatile f64* value, f64* expected, f64 desired)
{
return (f64) InterlockedCompareExchange((long *) value, (long) desired, (long) *expected);
return (f64) InterlockedCompareExchange64((LONG64 *) value, (LONG64) desired, (LONG64) *expected);
}
inline
@ -1271,7 +1272,7 @@ int32 atomic_compare_exchange_weak_acquire_release(volatile int32* value, int32*
inline
int64 atomic_compare_exchange_weak_acquire_release(volatile int64* value, int64* expected, int64 desired)
{
return (int64) InterlockedCompareExchange((long *) value, (long) desired, (long) *expected);
return (int64) InterlockedCompareExchange64((LONG64 *) value, (LONG64) desired, (LONG64) *expected);
}
inline
@ -1291,7 +1292,7 @@ int32 atomic_fetch_sub_acquire_release(volatile int32* value, int32 operand)
inline
int64 atomic_fetch_add_acquire_release(volatile int64* value, int64 operand)
{
int64 ret = (int64) InterlockedExchangeSubtract((unsigned long *) value, operand);
int64 ret = (int64) InterlockedExchangeAdd64((LONG64 *) value, -((LONG64) operand));
return ret;
}
@ -1299,7 +1300,7 @@ int64 atomic_fetch_add_acquire_release(volatile int64* value, int64 operand)
inline
int64 atomic_fetch_sub_acquire_release(volatile int64* value, int64 operand)
{
int64 ret = (int64) InterlockedExchangeSubtract((unsigned long *) value, (long) operand);
int64 ret = (int64) InterlockedExchangeAdd64((LONG64 *) value, -((LONG64) operand));
return ret;
}
@ -1325,7 +1326,7 @@ uint32 atomic_fetch_set_acquire_release(volatile uint32* value, uint32 new_value
inline
uint64 atomic_fetch_set_acquire_release(volatile uint64* value, uint64 new_value)
{
return (uint64) InterlockedExchange((long *) value, (long) new_value);
return (uint64) InterlockedExchange64((LONG64 *) value, (LONG64) new_value);
}
inline
@ -1337,7 +1338,7 @@ uint32 atomic_get_acquire_release(volatile uint32* value)
inline
uint64 atomic_get_acquire_release(volatile uint64* value)
{
return (uint64) InterlockedCompareExchange((long *) value, 0, 0);
return (uint64) InterlockedCompareExchange64((LONG64 *) value, 0, 0);
}
inline
@ -1355,13 +1356,13 @@ void atomic_decrement_acquire_release(volatile uint32* value)
inline
void atomic_increment_acquire_release(volatile uint64* value)
{
InterlockedIncrement((long *) value);
InterlockedIncrement64((LONG64 *) value);
}
inline
void atomic_decrement_acquire_release(volatile uint64* value)
{
InterlockedDecrement((long *) value);
InterlockedDecrement64((LONG64 *) value);
}
inline
@ -1379,13 +1380,13 @@ void atomic_sub_acquire_release(volatile uint32* value, uint32 decrement)
inline
void atomic_add_acquire_release(volatile uint64* value, uint64 increment)
{
InterlockedAdd((long *) value, (long) increment);
InterlockedAdd64((LONG64 *) value, (LONG64) increment);
}
inline
void atomic_sub_acquire_release(volatile uint64* value, uint64 decrement)
{
InterlockedAdd((long *) value, -1 * ((long) decrement));
InterlockedAdd64((LONG64 *) value, -((LONG64) decrement));
}
inline
@ -1397,7 +1398,7 @@ uint32 atomic_compare_exchange_weak_acquire_release(volatile uint32* value, uint
inline
uint64 atomic_compare_exchange_weak_acquire_release(volatile uint64* value, uint64* expected, uint64 desired)
{
return (uint64) InterlockedCompareExchange((unsigned long long *) value, (unsigned long long) desired, (unsigned long long) *expected);
return (uint64) InterlockedCompareExchange64((LONG64 *) value, (LONG64) desired, (LONG64) *expected);
}
inline
@ -1417,13 +1418,13 @@ uint32 atomic_fetch_sub_acquire_release(volatile uint32* value, uint32 operand)
inline
uint64 atomic_fetch_add_acquire_release(volatile uint64* value, uint64 operand)
{
return (uint64) InterlockedExchangeAdd((long *) value, (long) operand);
return (uint64) InterlockedExchangeAdd64((LONG64 *) value, (LONG64) operand);
}
inline
uint64 atomic_fetch_sub_acquire_release(volatile uint64* value, uint64 operand)
{
uint64 ret = (uint64) InterlockedExchangeSubtract((unsigned long *) value, (long) operand);
uint64 ret = (uint64) InterlockedExchangeAdd64((LONG64 *) value, -((LONG64) operand));
return ret;
}

View File

@ -376,7 +376,7 @@ int64 hashmap_dump(const HashMap* hm, byte* data)
// Dumb hash map content = buffer memory
int32 free_index = 0;
int32 bit_index = 0;
for (int32 i = 0; i < hm->buf.count; ++i) {
for (uint32 i = 0; i < hm->buf.count; ++i) {
if ((hm->buf.free[free_index] & (1ULL << bit_index)) > 0) {
HashEntry* entry = (HashEntry *) chunk_get_element((ChunkMemory *) &hm->buf, i);
@ -434,7 +434,7 @@ int64 hashmap_load(HashMap* hm, const byte* data)
data += sizeof(uint64);
// Load the table content
for (int i = 0; i < count; ++i) {
for (uint32 i = 0; i < count; ++i) {
uint64 offset = SWAP_ENDIAN_LITTLE(*((uint64 *) data));
data += sizeof(offset);
@ -455,7 +455,7 @@ int64 hashmap_load(HashMap* hm, const byte* data)
// Switch endian AND turn offsets to pointers
int32 free_index = 0;
int32 bit_index = 0;
for (int32 i = 0; i < hm->buf.count; ++i) {
for (uint32 i = 0; i < hm->buf.count; ++i) {
if ((hm->buf.free[free_index] & (1ULL << bit_index)) > 0) {
HashEntry* entry = (HashEntry *) chunk_get_element((ChunkMemory *) &hm->buf, i);

View File

@ -109,7 +109,6 @@ struct v4_byte {
};
};
struct v2_int32 {
union {
struct {

View File

@ -438,7 +438,8 @@ inline int16_16 operator<=(int16_16 a, int16_16 b)
inline int16_32 operator<=(int16_32 a, int16_32 b)
{
int16_32 simd;
simd.s = _mm512_mask_blend_epi16(_mm512_knot(_mm512_cmpgt_epi16_mask(b.s, a.s)), b.s, a.s);
__mmask32 mask = _mm512_cmp_epi16_mask(a.s, b.s, _MM_CMPINT_LE);
simd.s = _mm512_mask_blend_epi16(mask, b.s, a.s);
return simd;
}
@ -716,25 +717,19 @@ inline int16_32 clamp(int16_32 min_value, int16_32 a, int16_32 max_value)
return simd_min(simd_max(a, min_value), max_value);
}
inline int16 which_true(int16_8 a)
inline int32 which_true(int16_8 a)
{
int16 which_true = _mm_movemask_epi8(a.s);
return which_true;
return _mm_movemask_epi8(a.s);
}
inline int16 which_true(int16_16 a)
inline int32 which_true(int16_16 a)
{
int16 which_true = _mm256_movemask_epi8(a.s);
return which_true;
return _mm256_movemask_epi8(a.s);
}
inline int16 which_true(int16_32 a)
inline int32 which_true(int16_32 a)
{
int16 which_true = _mm512_movepi16_mask(a.s);
return which_true;
return _mm512_movepi16_mask(a.s);
}
inline bool any_true(int16_8 a)

View File

@ -26,6 +26,8 @@
#define ROUND_TO_NEAREST(a, b) (((a) + ((b) - 1)) & ~((b) - 1))
#define CEIL_DIV(a, b) (((a) + (b) - 1) / (b))
#define OMS_CEIL(x) ((x) == (int)(x) ? (int)(x) : ((x) > 0 ? (int)(x) + 1 : (int)(x)))
#define OMS_ROUND(x) (((x) >= 0) ? ((int)((x) + 0.5f)) : ((int)((x) - 0.5f)))
#define OMS_ROUND_POSITIVE(x) ((int)((x) + 0.5f))
// Modulo function when b is a power of 2
#define MODULO_2(a, b) ((a) & (b - 1))

View File

@ -15,6 +15,7 @@
#include <ctype.h>
#include "../stdlib/Types.h"
#include "MathUtils.h"
inline
int32 utf8_encode(uint32 codepoint, char* out)
@ -821,9 +822,9 @@ void sprintf_fast(char *buffer, const char* format, ...) {
case 'f': {
f64 val = va_arg(args, f64);
int32 precision = 6; // Default precision
// Default precision
int32 precision = 5;
// @question Consider to implement rounding
// Check for optional precision specifier
const char* prec_ptr = ptr + 1;
if (*prec_ptr >= '0' && *prec_ptr <= '9') {
@ -841,6 +842,15 @@ void sprintf_fast(char *buffer, const char* format, ...) {
val = -val;
}
if (precision < 6) {
static const float powers_of_ten[] = {
1.0f, 10.0f, 100.0f, 1000.0f, 10000.0f, 100000.0f
};
f32 scale = powers_of_ten[precision];
val = OMS_ROUND_POSITIVE(val * scale) / scale;
}
// Handle integer part
int32 int_part = (int32) val;
f64 frac_part = val - int_part;
@ -896,7 +906,7 @@ void format_time_hh_mm_ss(char* time_str, int32 hours, int32 minutes, int32 secs
}
inline
void format_time_hh_mm_ss(char* time_str, int32 time) {
void format_time_hh_mm_ss(char* time_str, uint64 time) {
int32 hours = (time / 3600) % 24;
int32 minutes = (time / 60) % 60;
int32 secs = time % 60;
@ -915,7 +925,7 @@ void format_time_hh_mm(char* time_str, int32 hours, int32 minutes) {
}
inline
void format_time_hh_mm(char* time_str, int32 time) {
void format_time_hh_mm(char* time_str, uint64 time) {
int32 hours = (time / 3600) % 24;
int32 minutes = (time / 60) % 60;

View File

@ -18,6 +18,7 @@ struct FileBody {
};
// @question Do we want to make the size comparison a step variable?
inline
bool is_equal_aligned(const byte* region1, const byte* region2, uint64 size)
{
while (size > 4) {