program starts again. broken are input somehow?!, fps and theme needs to be further implemented

This commit is contained in:
Dennis Eichhorn 2024-10-28 02:43:21 +01:00
parent ba244f8155
commit c7db2069c0
19 changed files with 599 additions and 421 deletions

View File

@ -100,6 +100,8 @@ void font_from_file_txt(
while (*pos != '\n') {
*texture_pos++ = *pos++;
}
*texture_pos++ = '\0';
} else if (strcmp(block_name, "font_size") == 0) {
font->size = strtof(pos, &pos);
} else if (strcmp(block_name, "line_height") == 0) {

View File

@ -693,15 +693,15 @@ void entity_clip_space_mat_sse(f32* result_mat, const f32* model_mat, const f32*
__m128 a[4];
__m128 b[4];
a[0] = _mm_loadu_ps(projection_mat);
a[1] = _mm_loadu_ps(&projection_mat[4]);
a[2] = _mm_loadu_ps(&projection_mat[8]);
a[3] = _mm_loadu_ps(&projection_mat[12]);
a[0] = _mm_load_ps(projection_mat);
a[1] = _mm_load_ps(&projection_mat[4]);
a[2] = _mm_load_ps(&projection_mat[8]);
a[3] = _mm_load_ps(&projection_mat[12]);
b[0] = _mm_loadu_ps(view_mat);
b[1] = _mm_loadu_ps(&view_mat[4]);
b[2] = _mm_loadu_ps(&view_mat[8]);
b[3] = _mm_loadu_ps(&view_mat[12]);
b[0] = _mm_load_ps(view_mat);
b[1] = _mm_load_ps(&view_mat[4]);
b[2] = _mm_load_ps(&view_mat[8]);
b[3] = _mm_load_ps(&view_mat[12]);
_MM_TRANSPOSE4_PS(b[0], b[1], b[2], b[3]);
mat4mat4_mult_sse(a, b, temp);
@ -711,10 +711,10 @@ void entity_clip_space_mat_sse(f32* result_mat, const f32* model_mat, const f32*
a[2] = temp[2];
a[3] = temp[3];
b[0] = _mm_loadu_ps(model_mat);
b[1] = _mm_loadu_ps(&model_mat[4]);
b[2] = _mm_loadu_ps(&model_mat[8]);
b[3] = _mm_loadu_ps(&model_mat[12]);
b[0] = _mm_load_ps(model_mat);
b[1] = _mm_load_ps(&model_mat[4]);
b[2] = _mm_load_ps(&model_mat[8]);
b[3] = _mm_load_ps(&model_mat[12]);
_MM_TRANSPOSE4_PS(b[0], b[1], b[2], b[3]);
mat4mat4_mult_sse(a, b, temp);

33
gpuapi/UIUtils.h Normal file
View File

@ -0,0 +1,33 @@
/**
* Jingga
*
* @copyright Jingga
* @license OMS License 2.0
* @version 1.0.0
* @link https://jingga.app
*/
#ifndef TOS_GPUAPI_UI_UTILS_H
#define TOS_GPUAPI_UI_UTILS_H
#include <stdio.h>
#include <string.h>
void ui_input_create(Vertex3DTextureColorIndex* __restrict vertices, uint32* __restrict index, f32 zindex,
f32 x, f32 y, f32 width, f32 height, int32 align_h, int32 align_v,
uint32 color_index = 0, f32 tex_x1 = 0.0f, f32 tex_y1 = 0.0f, f32 tex_x2 = 0.0f, f32 tex_y2 = 0.0f
)
{
vertex_rect_border_create(
vertices, index, zindex,
x, y, width, height, 1, UI_ALIGN_H_LEFT, UI_ALIGN_V_BOTTOM,
12, 0.0f, 0.0f
);
vertex_rect_create(
vertices, index, zindex,
x + 1, y + 1, width - 2, height - 2, UI_ALIGN_H_LEFT, UI_ALIGN_V_BOTTOM,
14, 0.0f, 0.0f
);
}
#endif

View File

@ -1,6 +1,7 @@
#ifndef TOS_LOG_DEBUG_MEMORY_C
#define TOS_LOG_DEBUG_MEMORY_C
#include "../stdlib/Types.h"
#include "Debug.h"
#include "DebugMemory.h"
#include "Log.h"

View File

@ -401,11 +401,11 @@ void mat3vec3_mult(const f32* __restrict matrix, const f32* __restrict vector, f
// @question could simple mul add sse be faster?
void mat3vec3_mult_sse(const f32* __restrict matrix, const f32* __restrict vector, f32* __restrict result)
{
__m128 vec = _mm_loadu_ps(vector);
__m128 vec = _mm_load_ps(vector);
vec = _mm_insert_ps(vec, _mm_setzero_ps(), 0x30); // vec[3] = 0
for (int32 i = 0; i < 3; ++i) {
__m128 row = _mm_loadu_ps(&matrix[i * 3]);
__m128 row = _mm_load_ps(&matrix[i * 3]);
row = _mm_insert_ps(row, _mm_setzero_ps(), 0x30); // row[3] = 0
__m128 dot = _mm_dp_ps(row, vec, 0xF1);
@ -444,10 +444,10 @@ void mat4vec4_mult(const f32* __restrict matrix, const f32* __restrict vector, f
// @question could simple mul add sse be faster?
void mat4vec4_mult_sse(const f32* __restrict matrix, const f32* __restrict vector, f32* __restrict result)
{
__m128 vec = _mm_loadu_ps(vector);
__m128 vec = _mm_load_ps(vector);
for (int32 i = 0; i < 4; ++i) {
__m128 row = _mm_loadu_ps(&matrix[i * 4]);
__m128 row = _mm_load_ps(&matrix[i * 4]);
__m128 dot = _mm_dp_ps(row, vec, 0xF1);
result[i] = _mm_cvtss_f32(dot);
@ -502,16 +502,16 @@ void mat4mat4_mult(const f32* __restrict a, const f32* __restrict b, f32* __rest
// @todo check http://fhtr.blogspot.com/2010/02/4x4-f32-matrix-multiplication-using.html
// @question could simple mul add sse be faster?
// Load rows of matrix a
__m128 a_1 = _mm_loadu_ps(a);
__m128 a_2 = _mm_loadu_ps(&a[4]);
__m128 a_3 = _mm_loadu_ps(&a[8]);
__m128 a_4 = _mm_loadu_ps(&a[12]);
__m128 a_1 = _mm_load_ps(a);
__m128 a_2 = _mm_load_ps(&a[4]);
__m128 a_3 = _mm_load_ps(&a[8]);
__m128 a_4 = _mm_load_ps(&a[12]);
// Load columns of matrix b
__m128 b_1 = _mm_loadu_ps(b);
__m128 b_2 = _mm_loadu_ps(&b[4]);
__m128 b_3 = _mm_loadu_ps(&b[8]);
__m128 b_4 = _mm_loadu_ps(&b[12]);
__m128 b_1 = _mm_load_ps(b);
__m128 b_2 = _mm_load_ps(&b[4]);
__m128 b_3 = _mm_load_ps(&b[8]);
__m128 b_4 = _mm_load_ps(&b[12]);
_mm_storeu_ps(&result[0],
_mm_add_ps(

View File

@ -69,12 +69,14 @@ void chunk_init(ChunkMemory* buf, byte* data, uint64 count, uint64 chunk_size, i
buf->memory = data;
buf->count = count;
buf->size = chunk_size + sizeof(buf->free) * CEIL_DIV(count, 64);
buf->size = chunk_size * count + sizeof(buf->free) * CEIL_DIV(count, 64);
buf->chunk_size = chunk_size;
buf->last_pos = -1;
buf->alignment = alignment;
// @question Could it be beneficial to have this before the element data?
// On the other hand the way we do it right now we never have to move past the free array since it is at the end
// On another hand we could by accident overwrite the values in free if we are not careful
buf->free = (uint64 *) (buf->memory + count * chunk_size);
DEBUG_MEMORY_INIT((uint64) buf->memory, buf->size);
@ -124,7 +126,7 @@ int64 chunk_reserve(ChunkMemory* buf, uint64 elements = 1, bool zeroed = false)
int32 bit_index;
int64 free_element = -1;
byte mask;
int64 mask;
int32 i = 0;
int64 max_bytes = (buf->count + 7) / 64;
@ -157,7 +159,7 @@ int64 chunk_reserve(ChunkMemory* buf, uint64 elements = 1, bool zeroed = false)
uint64 current_free_index = free_index + (bit_index + j) / 64;
int32 current_bit_index = (bit_index + j) % 64;
mask = 1 << current_bit_index;
mask = 1LL << current_bit_index;
if ((buf->free[current_free_index] & mask) == 0) {
++consecutive_free_bits;
} else {
@ -201,23 +203,23 @@ int64 chunk_reserve(ChunkMemory* buf, uint64 elements = 1, bool zeroed = false)
byte* chunk_find_free(ChunkMemory* buf)
{
int64 byte_index = (buf->last_pos + 1) / 64;
int64 free_index = (buf->last_pos + 1) / 64;
int32 bit_index;
int64 free_element = -1;
byte mask;
int64 mask;
int32 i = 0;
int64 max_bytes = (buf->count + 7) / 64;
while (free_element < 0 && i < buf->count) {
if (byte_index >= max_bytes) {
byte_index = 0;
if (free_index >= max_bytes) {
free_index = 0;
}
if (buf->free[byte_index] == 0xFF) {
if (buf->free[free_index] == 0xFF) {
++i;
++byte_index;
++free_index;
continue;
}
@ -226,10 +228,10 @@ byte* chunk_find_free(ChunkMemory* buf)
// @performance on the first iteration through the buffer we could optimize this by starting at a different bit_index
// because we know that the bit_index is based on last_pos
for (bit_index = 0; bit_index < 64; ++bit_index) {
mask = 1 << bit_index;
if ((buf->free[byte_index] & mask) == 0) {
free_element = byte_index * 64 + bit_index;
buf->free[byte_index] |= (1LL << bit_index);
mask = 1LL << bit_index;
if ((buf->free[free_index] & mask) == 0) {
free_element = free_index * 64 + bit_index;
buf->free[free_index] |= (1LL << bit_index);
break;
}
@ -248,10 +250,10 @@ void chunk_free_element(ChunkMemory* buf, uint64 element)
{
DEBUG_MEMORY_DELETE((uint64) (buf->memory + element * buf->chunk_size), buf->chunk_size);
int64 byte_index = element / 64;
int64 free_index = element / 64;
int32 bit_index = element % 64;
buf->free[byte_index] &= ~(1 << bit_index);
buf->free[free_index] &= ~(1LL << bit_index);
}
inline

View File

@ -62,7 +62,7 @@ void ring_alloc(RingMemory* ring, uint64 size, int32 alignment = 64)
inline
void ring_init(RingMemory* ring, BufferMemory* buf, uint64 size, int32 alignment = 64)
{
ring->memory = buffer_get_memory(buf, size, alignment);
ring->memory = buffer_get_memory(buf, size, alignment, true);
ring->size = size;
ring->pos = 0;
@ -71,8 +71,6 @@ void ring_init(RingMemory* ring, BufferMemory* buf, uint64 size, int32 alignment
ring->start = 0;
ring->end = 0;
memset(ring->memory, 0, buf->size);
DEBUG_MEMORY_INIT((uint64) ring->memory, ring->size);
}

View File

@ -122,6 +122,11 @@ int64 hashmap_size(const HashMap* hm)
}
void hashmap_insert(HashMap* hm, const char* key, int32 value) {
// @performance Do we really want to do this check every time?
if (hm->buf.count == 0) {
return;
}
uint64 index = hash_djb2(key) % hm->buf.count;
int64 element = chunk_reserve(&hm->buf, 1);
@ -147,6 +152,11 @@ void hashmap_insert(HashMap* hm, const char* key, int32 value) {
}
void hashmap_insert(HashMap* hm, const char* key, int64 value) {
// @performance Do we really want to do this check every time?
if (hm->buf.count == 0) {
return;
}
uint64 index = hash_djb2(key) % hm->buf.count;
int64 element = chunk_reserve(&hm->buf, 1);
@ -172,6 +182,11 @@ void hashmap_insert(HashMap* hm, const char* key, int64 value) {
}
void hashmap_insert(HashMap* hm, const char* key, uintptr_t value) {
// @performance Do we really want to do this check every time?
if (hm->buf.count == 0) {
return;
}
uint64 index = hash_djb2(key) % hm->buf.count;
int64 element = chunk_reserve(&hm->buf, 1);
@ -197,6 +212,11 @@ void hashmap_insert(HashMap* hm, const char* key, uintptr_t value) {
}
void hashmap_insert(HashMap* hm, const char* key, void* value) {
// @performance Do we really want to do this check every time?
if (hm->buf.count == 0) {
return;
}
uint64 index = hash_djb2(key) % hm->buf.count;
int64 element = chunk_reserve(&hm->buf, 1);
@ -222,6 +242,11 @@ void hashmap_insert(HashMap* hm, const char* key, void* value) {
}
void hashmap_insert(HashMap* hm, const char* key, f32 value) {
// @performance Do we really want to do this check every time?
if (hm->buf.count == 0) {
return;
}
uint64 index = hash_djb2(key) % hm->buf.count;
int64 element = chunk_reserve(&hm->buf, 1);
@ -247,6 +272,11 @@ void hashmap_insert(HashMap* hm, const char* key, f32 value) {
}
void hashmap_insert(HashMap* hm, const char* key, const char* value) {
// @performance Do we really want to do this check every time?
if (hm->buf.count == 0) {
return;
}
uint64 index = hash_djb2(key) % hm->buf.count;
int64 element = chunk_reserve(&hm->buf, 1);
@ -274,6 +304,11 @@ void hashmap_insert(HashMap* hm, const char* key, const char* value) {
}
void hashmap_insert(HashMap* hm, const char* key, byte* value) {
// @performance Do we really want to do this check every time?
if (hm->buf.count == 0) {
return;
}
uint64 index = hash_djb2(key) % hm->buf.count;
int64 element = chunk_reserve(&hm->buf, 1);
@ -302,6 +337,11 @@ void hashmap_insert(HashMap* hm, const char* key, byte* value) {
}
HashEntry* hashmap_get_entry(HashMap* hm, const char* key) {
// @performance Do we really want to do this check every time?
if (hm->buf.count == 0) {
return NULL;
}
uint64 index = hash_djb2(key) % hm->buf.count;
HashEntry* entry = (HashEntry *) hm->table[index];
@ -356,90 +396,131 @@ void hashmap_delete_entry(HashMap* hm, const char* key) {
}
}
// @bug We cannot know if the data needs endian swap (it coult be int/float, but also some other 4/8 byte value)
// -> if we save this to a file and load it on a different system we will have "corrupt" data
inline
int64 hashmap_dump(const HashMap* hm, byte* data)
{
*((uint64 *) data) = SWAP_ENDIAN_LITTLE(hm->buf.count);
data += sizeof(uint64);
uint64 next_count_total = 0;
// Dump the table content where the elements are relative indeces/pointers
for (int32 i = 0; i < hm->buf.count; ++i) {
*((uint64 *) data) = SWAP_ENDIAN_LITTLE((uintptr_t) hm->table[i] - (uintptr_t) hm->buf.memory);
*((uint64 *) data) = hm->table[i]
? SWAP_ENDIAN_LITTLE((uintptr_t) hm->table[i] - (uintptr_t) hm->buf.memory)
: 0ULL;
}
data += sizeof(uint64) * hm->buf.count;
int64 value_size = hm->buf.chunk_size - sizeof(uint64) - sizeof(char) * MAX_KEY_LENGTH - sizeof(uint64);
// Dumb hash map content = buffer memory
int32 free_index = 0;
int32 bit_index = 0;
for (int32 i = 0; i < hm->buf.count; ++i) {
if ((hm->buf.free[free_index] & (1ULL << bit_index)) > 0) {
HashEntry* entry = (HashEntry *) chunk_get_element((ChunkMemory *) &hm->buf, i);
// element_id
*((uint64 *) data) = SWAP_ENDIAN_LITTLE(entry->element_id);
data += sizeof(entry->element_id);
// key
memcpy(data, entry->key, sizeof(entry->key));
data += sizeof(entry->key);
// next pointer
if (entry->next) {
*((uint64 *) data) = SWAP_ENDIAN_LITTLE((uintptr_t) entry->next - (uintptr_t) hm->buf.memory);
} else {
memset(data, 0, sizeof(uint64));
}
data += sizeof(uint64);
// Also dump the next pointer
// Count how many next elements we have
HashEntry* entry = ((HashEntry *) hm->table[i])->next;
int32 next_count = 0;
while (entry) {
++next_count;
entry = entry->next;
// We just assume that 4 or 8 bytes = int -> endian handling
if (value_size == 4) {
*((int32 *) data) = SWAP_ENDIAN_LITTLE(((HashEntryInt32 *) entry)->value);
} else if (value_size == 8) {
*((int64 *) data) = SWAP_ENDIAN_LITTLE(((HashEntryInt64 *) entry)->value);
} else {
memcpy(data, entry->value, value_size);
}
data += value_size;
} else {
// No entry defined -> NULL
memset(data, 0, hm->buf.chunk_size);
data += hm->buf.chunk_size;
}
next_count_total += next_count;
*((int32 *) data) = SWAP_ENDIAN_LITTLE(next_count);
data += sizeof(next_count);
if (next_count > 0) {
entry = ((HashEntry *) hm->table[i])->next;
while (entry) {
*((uint64 *) data) = SWAP_ENDIAN_LITTLE((uintptr_t) entry - (uintptr_t) hm->buf.memory);
data += sizeof(uint64);
entry = entry->next;
}
++bit_index;
if (bit_index > 63) {
bit_index = 0;
++free_index;
}
}
// @performance chunk_dump() below contains some data we already output above
// (next pointer but it is useless, since we need relative positions)
// Maybe we should manually re-create the chunk_dump here and omit the already dumped data for the next pointer?
// dump free array
memcpy(data, hm->buf.free, sizeof(uint64) * CEIL_DIV(hm->buf.count, 64));
// How many bytes were written (+ dump the chunk memory)
return sizeof(hm->buf.count)
return sizeof(hm->buf.count) // hash map count = buffer count
+ hm->buf.count * sizeof(uint64) // table content
+ hm->buf.count * sizeof(int32) // counter for the next pointer (one for every element)
+ next_count_total * sizeof(uint64) // next pointer offset
+ chunk_dump(&hm->buf, data);
+ hm->buf.size; // hash map content + free array
}
// WARNING: Requires hashmap_create first
inline
int64 hashmap_load(HashMap* hm, const byte* data)
{
uint64 count = SWAP_ENDIAN_LITTLE(*((uint64 *) data));
data += sizeof(uint64);
uint64 next_count_total = 0;
// Load the table content, we also need to convert from relative indeces to pointers
// Load the table content
for (int i = 0; i < count; ++i) {
hm->table[i] = hm->buf.memory + SWAP_ENDIAN_LITTLE(*((uint64 *) data));
data += sizeof(uint64);
uint64 offset = SWAP_ENDIAN_LITTLE(*((uint64 *) data));
data += sizeof(offset);
// Also load the next pointer
// Count how many next elements we have
int32 next_count = SWAP_ENDIAN_LITTLE(*((int32 *) data));
data += sizeof(next_count);
// the first element has no offset!
hm->table[i] = offset || i == 0 ? hm->buf.memory + offset : NULL;
}
HashEntry* entry = ((HashEntry *) hm->table[i]);
for (int32 j = 0; j < next_count; ++j) {
entry->next = (HashEntry *) (hm->buf.memory + SWAP_ENDIAN_LITTLE(*((uint64 *) data)));
data += sizeof(uint64);
entry = entry->next;
// This loop here is why it is important to already have an initialized hashmap
// @question Do we maybe want to change this and not require an initalized hashmap?
memcpy(hm->buf.memory, data, hm->buf.size);
data += hm->buf.chunk_size * hm->buf.count;
// @question don't we have to possibly endian swap check the free array as well?
memcpy(hm->buf.free, data, sizeof(uint64) * CEIL_DIV(hm->buf.count, 64));
int64 value_size = hm->buf.chunk_size - sizeof(uint64) - sizeof(char) * MAX_KEY_LENGTH - sizeof(uint64);
// Switch endian AND turn offsets to pointers
int32 free_index = 0;
int32 bit_index = 0;
for (int32 i = 0; i < hm->buf.count; ++i) {
if ((hm->buf.free[free_index] & (1ULL << bit_index)) > 0) {
HashEntry* entry = (HashEntry *) chunk_get_element((ChunkMemory *) &hm->buf, i);
// element id
entry->element_id = SWAP_ENDIAN_LITTLE(entry->element_id);
// key is already loaded with the memcpy
// @question Do we even want to use memcpy? We are re-checking all the values here anyways
// next pointer
if (entry->next) {
entry->next = (HashEntry *) (hm->buf.memory + SWAP_ENDIAN_LITTLE((uint64) entry->next));
}
if (value_size == 4) {
((HashEntryInt32 *) entry)->value = SWAP_ENDIAN_LITTLE(((HashEntryInt32 *) entry)->value);
} else if (value_size == 8) {
((HashEntryInt64 *) entry)->value = SWAP_ENDIAN_LITTLE(((HashEntryInt64 *) entry)->value);
}
}
}
// How many bytes was read from data
return sizeof(count)
return sizeof(hm->buf.count) // hash map count = buffer count
+ hm->buf.count * sizeof(uint64) // table content
+ hm->buf.count * sizeof(int32) // counter for the next pointer (one for every element)
+ next_count_total * sizeof(uint64) // next pointer offset
+ chunk_load(&hm->buf, data);
+ hm->buf.size;
}
#endif

View File

@ -39,7 +39,7 @@ struct f32_16 {
inline f32_4 load_f32_4(const f32* mem)
{
f32_4 simd;
simd.s = _mm_loadu_ps(mem);
simd.s = _mm_load_ps(mem);
return simd;
}
@ -57,7 +57,7 @@ inline void unload_f32_4(f32_4 a, f32 *array) { _mm_store_ps(array, a.s); }
inline f32_8 load_f32_8(const f32* mem)
{
f32_8 simd;
simd.s = _mm256_loadu_ps(mem);
simd.s = _mm256_load_ps(mem);
return simd;
}
@ -75,7 +75,7 @@ inline void unload_f32_8(f32_8 a, f32 *array) { _mm256_store_ps(array, a.s); }
inline f32_16 load_f32_16(const f32* mem)
{
f32_16 simd;
simd.s = _mm512_loadu_ps(mem);
simd.s = _mm512_load_ps(mem);
return simd;
}
@ -996,8 +996,8 @@ void simd_mult(const f32* a, const f32* b, f32* result, int32 size, int32 steps)
__m512 result_16;
for (; i <= size - steps; i += steps) {
a_16 = _mm512_loadu_ps(a);
b_16 = _mm512_loadu_ps(b);
a_16 = _mm512_load_ps(a);
b_16 = _mm512_load_ps(b);
result_16 = _mm512_mul_ps(a_16, b_16);
_mm512_store_ps(result, result_16);
@ -1011,8 +1011,8 @@ void simd_mult(const f32* a, const f32* b, f32* result, int32 size, int32 steps)
__m256 result_8;
for (; i <= size - steps; i += steps) {
a_8 = _mm256_loadu_ps(a);
b_8 = _mm256_loadu_ps(b);
a_8 = _mm256_load_ps(a);
b_8 = _mm256_load_ps(b);
result_8 = _mm256_mul_ps(a_8, b_8);
_mm256_store_ps(result, result_8);
@ -1026,8 +1026,8 @@ void simd_mult(const f32* a, const f32* b, f32* result, int32 size, int32 steps)
__m128 result_4;
for (; i <= size - steps; i += steps) {
a_4 = _mm_loadu_ps(a);
b_4 = _mm_loadu_ps(b);
a_4 = _mm_load_ps(a);
b_4 = _mm_load_ps(b);
result_4 = _mm_mul_ps(a_4, b_4);
_mm_store_ps(result, result_4);
@ -1057,7 +1057,7 @@ void simd_mult(const f32* a, f32 b, f32* result, int32 size, int32 steps)
__m512 result_16;
for (; i <= size - steps; i += steps) {
a_16 = _mm512_loadu_ps(a);
a_16 = _mm512_load_ps(a);
result_16 = _mm512_mul_ps(a_16, b_16);
_mm512_store_ps(result, result_16);
@ -1070,7 +1070,7 @@ void simd_mult(const f32* a, f32 b, f32* result, int32 size, int32 steps)
__m256 result_8;
for (; i <= size - steps; i += steps) {
a_8 = _mm256_loadu_ps(a);
a_8 = _mm256_load_ps(a);
result_8 = _mm256_mul_ps(a_8, b_8);
_mm256_store_ps(result, result_8);
@ -1083,7 +1083,7 @@ void simd_mult(const f32* a, f32 b, f32* result, int32 size, int32 steps)
__m128 result_4;
for (; i <= size - steps; i += steps) {
a_4 = _mm_loadu_ps(a);
a_4 = _mm_load_ps(a);
result_4 = _mm_mul_ps(a_4, b_4);
_mm_store_ps(result, result_4);
@ -1111,7 +1111,7 @@ void simd_div(const f32* a, f32 b, f32* result, int32 size, int32 steps)
__m512 result_16;
for (; i <= size - steps; i += steps) {
a_16 = _mm512_loadu_ps(a);
a_16 = _mm512_load_ps(a);
result_16 = _mm512_div_ps(a_16, b_16);
_mm512_store_ps(result, result_16);
@ -1124,7 +1124,7 @@ void simd_div(const f32* a, f32 b, f32* result, int32 size, int32 steps)
__m256 result_8;
for (; i <= size - steps; i += steps) {
a_8 = _mm256_loadu_ps(a);
a_8 = _mm256_load_ps(a);
result_8 = _mm256_div_ps(a_8, b_8);
_mm256_store_ps(result, result_8);
@ -1137,7 +1137,7 @@ void simd_div(const f32* a, f32 b, f32* result, int32 size, int32 steps)
__m128 result_4;
for (; i <= size - steps; i += steps) {
a_4 = _mm_loadu_ps(a);
a_4 = _mm_load_ps(a);
result_4 = _mm_div_ps(a_4, b_4);
_mm_store_ps(result, result_4);
@ -1166,7 +1166,7 @@ void simd_div(const f32* a, f32 b, __m256* result, int32 size)
__m256 result_8;
for (; i <= size - 8; i += 8) {
a_8 = _mm256_loadu_ps(a);
a_8 = _mm256_load_ps(a);
result_8 = _mm256_div_ps(a_8, b_8);
result[j] = result_8;
@ -1181,7 +1181,7 @@ void simd_div(const f32* a, f32 b, __m256* result, int32 size)
temp[k] = a[i + k] / b;
}
result[j] = _mm256_loadu_ps(temp);
result[j] = _mm256_load_ps(temp);
}
inline

View File

@ -39,7 +39,7 @@ struct int16_32 {
inline int16_8 load_int16_8(const int16* mem)
{
int16_8 simd;
simd.s = _mm_loadu_epi16(mem);
simd.s = _mm_load_si128((__m128i *) mem);
return simd;
}
@ -60,7 +60,7 @@ inline void unload_int16_8(int16_8 a, int16 *array) { _mm_store_si128((__m128i *
inline int16_16 load_int16_16(const int16* mem)
{
int16_16 simd;
simd.s = _mm256_loadu_epi16(mem);
simd.s = _mm256_load_si256((__m256i *) mem);
return simd;
}
@ -81,7 +81,7 @@ inline void unload_int16_16(int16_16 a, int16 *array) { _mm256_store_si256((__m2
inline int16_32 load_int16_32(const int16* mem)
{
int16_32 simd;
simd.s = _mm512_loadu_epi16(mem);
simd.s = _mm512_load_si512((__m512i *) mem);
return simd;
}

View File

@ -11,6 +11,7 @@
#include <immintrin.h>
#include <xmmintrin.h>
#include <emmintrin.h>
#include "../Types.h"
#include "../../utils/BitUtils.h"
@ -45,7 +46,7 @@ struct int32_16 {
inline int32_4 load_int32_4(const int32* mem)
{
int32_4 simd;
simd.s = _mm_loadu_epi32(mem);
simd.s = _mm_load_si128((__m128i *) mem);
return simd;
}
@ -63,7 +64,7 @@ inline void unload_int32_4(int32_4 a, int32 *array) { _mm_store_si128((__m128i *
inline int32_8 load_int32_8(const int32* mem)
{
int32_8 simd;
simd.s = _mm256_loadu_epi32(mem);
simd.s = _mm256_load_si256((__m256i *) mem);
return simd;
}
@ -81,7 +82,7 @@ inline void unload_int32_8(int32_8 a, int32 *array) { _mm256_store_si256((__m256
inline int32_16 load_int32_16(const int32* mem)
{
int32_16 simd;
simd.s = _mm512_loadu_epi32(mem);
simd.s = _mm512_load_epi32(mem);
return simd;
}
@ -1039,8 +1040,8 @@ void simd_mult(const int32* a, const int32* b, int32* result, int32 size, int32
__m512i result_16;
for (; i <= size - steps; i += steps) {
a_16 = _mm512_loadu_epi32(a);
b_16 = _mm512_loadu_epi32(b);
a_16 = _mm512_load_epi32(a);
b_16 = _mm512_load_epi32(b);
result_16 = _mm512_mul_epi32(a_16, b_16);
_mm512_store_epi32(result, result_16);
@ -1054,8 +1055,8 @@ void simd_mult(const int32* a, const int32* b, int32* result, int32 size, int32
__m256i result_8;
for (; i <= size - steps; i += steps) {
a_8 = _mm256_loadu_epi32(a);
b_8 = _mm256_loadu_epi32(b);
a_8 = _mm256_load_si256((__m256i *) a);
b_8 = _mm256_load_si256((__m256i *) b);
result_8 = _mm256_mul_epi32(a_8, b_8);
_mm256_store_si256((__m256i *) result, result_8);
@ -1069,8 +1070,8 @@ void simd_mult(const int32* a, const int32* b, int32* result, int32 size, int32
__m128i result_4;
for (; i <= size - steps; i += steps) {
a_4 = _mm_loadu_epi32(a);
b_4 = _mm_loadu_epi32(b);
a_4 = _mm_load_si128((__m128i *) a);
b_4 = _mm_load_si128((__m128i *) b);
result_4 = _mm_mul_epi32(a_4, b_4);
_mm_store_si128((__m128i *) result, result_4);
@ -1101,9 +1102,9 @@ void simd_mult(const int32* a, const f32* b, f32* result, int32 size, int32 step
__m512 result_16;
for (; i <= size - steps; i += steps) {
a_16 = _mm512_loadu_epi32(a);
a_16 = _mm512_load_epi32(a);
af_16 = _mm512_cvtepi32_ps(a_16);
b_16 = _mm512_loadu_ps(b);
b_16 = _mm512_load_ps(b);
result_16 = _mm512_mul_ps(af_16, b_16);
_mm512_store_ps(result, result_16);
@ -1118,9 +1119,9 @@ void simd_mult(const int32* a, const f32* b, f32* result, int32 size, int32 step
__m256 result_8;
for (; i <= size - steps; i += steps) {
a_8 = _mm256_loadu_epi32(a);
a_8 = _mm256_load_si256((__m256i *) a);
af_8 = _mm256_cvtepi32_ps(a_8);
b_8 = _mm256_loadu_ps(b);
b_8 = _mm256_load_ps(b);
result_8 = _mm256_mul_ps(af_8, b_8);
_mm256_store_ps(result, result_8);
@ -1135,9 +1136,9 @@ void simd_mult(const int32* a, const f32* b, f32* result, int32 size, int32 step
__m128 result_4;
for (; i <= size - steps; i += steps) {
a_4 = _mm_loadu_epi32(a);
a_4 = _mm_load_si128((__m128i *) a);
af_4 = _mm_cvtepi32_ps(a_4);
b_4 = _mm_loadu_ps(b);
b_4 = _mm_load_ps(b);
result_4 = _mm_mul_ps(af_4, b_4);
_mm_store_ps(result, result_4);
@ -1169,9 +1170,9 @@ void simd_mult(const int32* a, const f32* b, int32* result, int32 size, int32 st
__m512i resulti_16;
for (; i <= size - steps; i += steps) {
a_16 = _mm512_loadu_epi32(a);
a_16 = _mm512_load_epi32(a);
af_16 = _mm512_cvtepi32_ps(a_16);
b_16 = _mm512_loadu_ps(b);
b_16 = _mm512_load_ps(b);
result_16 = _mm512_mul_ps(af_16, b_16);
resulti_16 = _mm512_cvtps_epi32(result_16);
_mm512_store_epi32(result, resulti_16);
@ -1188,9 +1189,9 @@ void simd_mult(const int32* a, const f32* b, int32* result, int32 size, int32 st
__m256i resulti_8;
for (; i <= size - steps; i += steps) {
a_8 = _mm256_loadu_epi32(a);
a_8 = _mm256_load_si256((__m256i *) a);
af_8 = _mm256_cvtepi32_ps(a_8);
b_8 = _mm256_loadu_ps(b);
b_8 = _mm256_load_ps(b);
result_8 = _mm256_mul_ps(af_8, b_8);
resulti_8 = _mm256_cvtps_epi32(result_8);
_mm256_store_si256((__m256i *) result, resulti_8);
@ -1207,9 +1208,9 @@ void simd_mult(const int32* a, const f32* b, int32* result, int32 size, int32 st
__m128i resulti_4;
for (; i <= size - steps; i += steps) {
a_4 = _mm_loadu_epi32(a);
a_4 = _mm_load_si128((__m128i *) a);
af_4 = _mm_cvtepi32_ps(a_4);
b_4 = _mm_loadu_ps(b);
b_4 = _mm_load_ps(b);
result_4 = _mm_mul_ps(af_4, b_4);
resulti_4 = _mm_cvtps_epi32(result_4);
_mm_store_si128((__m128i *) result, resulti_4);
@ -1242,7 +1243,7 @@ void simd_mult(const int32* a, f32 b, int32* result, int32 size, int32 steps)
__m512i resulti_16;
for (; i <= size - steps; i += steps) {
a_16 = _mm512_loadu_epi32(a);
a_16 = _mm512_load_epi32(a);
af_16 = _mm512_cvtepi32_ps(a_16);
result_16 = _mm512_mul_ps(af_16, b_16);
resulti_16 = _mm512_cvtps_epi32(result_16);
@ -1259,7 +1260,7 @@ void simd_mult(const int32* a, f32 b, int32* result, int32 size, int32 steps)
__m256i resulti_8;
for (; i <= size - steps; i += steps) {
a_8 = _mm256_loadu_epi32(a);
a_8 = _mm256_load_si256((__m256i *) a);
af_8 = _mm256_cvtepi32_ps(a_8);
result_8 = _mm256_mul_ps(af_8, b_8);
resulti_8 = _mm256_cvtps_epi32(result_8);
@ -1276,7 +1277,7 @@ void simd_mult(const int32* a, f32 b, int32* result, int32 size, int32 steps)
__m128i resulti_4;
for (; i <= size - steps; i += steps) {
a_4 = _mm_loadu_epi32(a);
a_4 = _mm_load_si128((__m128i *) a);
af_4 = _mm_cvtepi32_ps(a_4);
result_4 = _mm_mul_ps(af_4, b_4);
resulti_4 = _mm_cvtps_epi32(result_4);
@ -1307,7 +1308,7 @@ void simd_div(const int32* a, f32 b, f32* result, int32 size, int32 steps)
__m512 result_16;
for (; i <= size - steps; i += steps) {
a_16 = _mm512_loadu_epi32(a);
a_16 = _mm512_load_epi32(a);
af_16 = _mm512_cvtepi32_ps(a_16);
result_16 = _mm512_div_ps(af_16, b_16);
_mm512_store_ps(result, result_16);
@ -1323,7 +1324,7 @@ void simd_div(const int32* a, f32 b, f32* result, int32 size, int32 steps)
__m256 result_8;
for (; i <= size - steps; i += steps) {
a_8 = _mm256_loadu_epi32(a);
a_8 = _mm256_load_si256((__m256i *) a);
af_8 = _mm256_cvtepi32_ps(a_8);
result_8 = _mm256_div_ps(af_8, b_8);
_mm256_store_ps(result, result_8);
@ -1338,7 +1339,7 @@ void simd_div(const int32* a, f32 b, f32* result, int32 size, int32 steps)
__m128 result_4;
for (; i <= size - steps; i += steps) {
a_4 = _mm_loadu_epi32(a);
a_4 = _mm_load_si128((__m128i *) a);
af_4 = _mm_cvtepi32_ps(a_4);
result_4 = _mm_div_ps(af_4, b_4);
_mm_store_ps(result, result_4);
@ -1367,8 +1368,8 @@ void simd_add(const int32* a, const int32* b, int32* result, int32 size, int32 s
__m512i result_16;
for (; i <= size - steps; i += steps) {
a_16 = _mm512_loadu_epi32(a);
b_16 = _mm512_loadu_epi32(b);
a_16 = _mm512_load_epi32(a);
b_16 = _mm512_load_epi32(b);
result_16 = _mm512_add_epi32(a_16, b_16);
_mm512_store_epi32(result, result_16);
@ -1382,8 +1383,8 @@ void simd_add(const int32* a, const int32* b, int32* result, int32 size, int32 s
__m256i result_8;
for (; i <= size - steps; i += steps) {
a_8 = _mm256_loadu_epi32(a);
b_8 = _mm256_loadu_epi32(b);
a_8 = _mm256_load_si256((__m256i *) a);
b_8 = _mm256_load_si256((__m256i *) b);
result_8 = _mm256_add_epi32(a_8, b_8);
_mm256_store_si256((__m256i *) result, result_8);
@ -1397,8 +1398,8 @@ void simd_add(const int32* a, const int32* b, int32* result, int32 size, int32 s
__m128i result_4;
for (; i <= size - steps; i += steps) {
a_4 = _mm_loadu_epi32(a);
b_4 = _mm_loadu_epi32(b);
a_4 = _mm_load_si128((__m128i *) a);
b_4 = _mm_load_si128((__m128i *) b);
result_4 = _mm_add_epi32(a_4, b_4);
_mm_store_si128((__m128i *) result, result_4);
@ -1429,9 +1430,9 @@ void simd_add(const int32* a, const f32* b, f32* result, int32 size, int32 steps
__m512 result_16;
for (; i <= size - steps; i += steps) {
a_16 = _mm512_loadu_epi32(a);
a_16 = _mm512_load_epi32(a);
af_16 = _mm512_cvtepi32_ps(a_16);
b_16 = _mm512_loadu_ps(b);
b_16 = _mm512_load_ps(b);
result_16 = _mm512_add_ps(af_16, b_16);
_mm512_store_ps(result, result_16);
@ -1446,9 +1447,9 @@ void simd_add(const int32* a, const f32* b, f32* result, int32 size, int32 steps
__m256 result_8;
for (; i <= size - steps; i += steps) {
a_8 = _mm256_loadu_epi32(a);
a_8 = _mm256_load_si256((__m256i *) a);
af_8 = _mm256_cvtepi32_ps(a_8);
b_8 = _mm256_loadu_ps(b);
b_8 = _mm256_load_ps(b);
result_8 = _mm256_add_ps(af_8, b_8);
_mm256_store_ps(result, result_8);
@ -1463,9 +1464,9 @@ void simd_add(const int32* a, const f32* b, f32* result, int32 size, int32 steps
__m128 result_4;
for (; i <= size - steps; i += steps) {
a_4 = _mm_loadu_epi32(a);
a_4 = _mm_load_si128((__m128i *) a);
af_4 = _mm_cvtepi32_ps(a_4);
b_4 = _mm_loadu_ps(b);
b_4 = _mm_load_ps(b);
result_4 = _mm_add_ps(af_4, b_4);
_mm_store_ps(result, result_4);
@ -1497,9 +1498,9 @@ void simd_add(const int32* a, const f32* b, int32* result, int32 size, int32 ste
__m512i resulti_16;
for (; i <= size - steps; i += steps) {
a_16 = _mm512_loadu_epi32(a);
a_16 = _mm512_load_epi32(a);
af_16 = _mm512_cvtepi32_ps(a_16);
b_16 = _mm512_loadu_ps(b);
b_16 = _mm512_load_ps(b);
result_16 = _mm512_add_ps(af_16, b_16);
resulti_16 = _mm512_cvtps_epi32(result_16);
_mm512_store_epi32(result, resulti_16);
@ -1516,9 +1517,9 @@ void simd_add(const int32* a, const f32* b, int32* result, int32 size, int32 ste
__m256i resulti_8;
for (; i <= size - steps; i += steps) {
a_8 = _mm256_loadu_epi32(a);
a_8 = _mm256_load_si256((__m256i *) a);
af_8 = _mm256_cvtepi32_ps(a_8);
b_8 = _mm256_loadu_ps(b);
b_8 = _mm256_load_ps(b);
result_8 = _mm256_add_ps(af_8, b_8);
resulti_8 = _mm256_cvtps_epi32(result_8);
_mm256_store_si256((__m256i *) result, resulti_8);
@ -1535,9 +1536,9 @@ void simd_add(const int32* a, const f32* b, int32* result, int32 size, int32 ste
__m128i resulti_4;
for (; i <= size - steps; i += steps) {
a_4 = _mm_loadu_epi32(a);
a_4 = _mm_load_si128((__m128i *) a);
af_4 = _mm_cvtepi32_ps(a_4);
b_4 = _mm_loadu_ps(b);
b_4 = _mm_load_ps(b);
result_4 = _mm_add_ps(af_4, b_4);
resulti_4 = _mm_cvtps_epi32(result_4);
_mm_store_si128((__m128i *) result, resulti_4);
@ -1560,8 +1561,8 @@ void simd_add(const int32* a, const f32* b, int32* result, int32 size, int32 ste
// WARNING: only works with SSE4.2
// WARNING: incl. \0 both strings must be <= 16 length
bool str_compare_avx512(const char* str1, const char* str2) {
__m128i s1 = _mm_loadu_si128((const __m128i *) str1);
__m128i s2 = _mm_loadu_si128((const __m128i *) str2);
__m128i s1 = _mm_load_si128((__m128i *) (const __m128i *) str1);
__m128i s2 = _mm_load_si128((__m128i *) (const __m128i *) str2);
return _mm_cmpistrc(s1, s2, _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_EACH) == 0;
}
@ -1580,7 +1581,7 @@ endian_swap(const int* val, int* result, int32 size, int32 steps)
);
for (i = 0; i <= size - steps; i += steps) {
__m512i vec = _mm512_loadu_si512((const __m512i *) (val + i));
__m512i vec = _mm512_load_si512((const __m512i *) (val + i));
vec = _mm512_shuffle_epi8(vec, mask_512);
_mm512_storeu_si512((__m512i *) (result + i), vec);
@ -1594,7 +1595,7 @@ endian_swap(const int* val, int* result, int32 size, int32 steps)
);
for (i = 0; i <= size - steps; i += steps) {
__m256i vec = _mm256_loadu_si256((const __m256i *) (val + i));
__m256i vec = _mm256_load_si256((const __m256i *) (val + i));
vec = _mm256_shuffle_epi8(vec, mask_256);
_mm256_storeu_si256((__m256i *) (result + i), vec);
@ -1608,7 +1609,7 @@ endian_swap(const int* val, int* result, int32 size, int32 steps)
);
for (i = 0; i <= size - steps; i += steps) {
__m128i vec = _mm_loadu_si128((const __m128i *) (val + i));
__m128i vec = _mm_load_si128((__m128i *) (const __m128i *) (val + i));
vec = _mm_shuffle_epi8(vec, mask_128);
_mm_storeu_si128((__m128i *) (result + i), vec);

View File

@ -40,7 +40,7 @@ struct int8_64 {
inline int8_16 load_int8_16(const int8* mem)
{
int8_16 simd;
simd.s = _mm_loadu_epi8(mem);
simd.s = _mm_load_si128((__m128i *) mem);
return simd;
}
@ -63,7 +63,7 @@ inline void unload_int8_16(int8_16 a, int8 *array) { _mm_store_si128((__m128i *)
inline int8_32 load_int8_32(const int8* mem)
{
int8_32 simd;
simd.s = _mm256_loadu_epi8(mem);
simd.s = _mm256_load_si256((__m256i *) mem);
return simd;
}
@ -86,7 +86,7 @@ inline void unload_int8_32(int8_32 a, int8 *array) { _mm256_store_si256((__m256i
inline int8_64 load_int8_64(const int8* mem)
{
int8_64 simd;
simd.s = _mm512_loadu_epi8(mem);
simd.s = _mm512_load_si512((__m512i *) mem);
return simd;
}
@ -830,19 +830,19 @@ inline
f32 simd_mult(const int8* a, f32 b, int32 size, int32 steps)
{
if (steps == 16) {
__m512i a_16 = _mm512_loadu_epi8(a);
__m512i a_16 = _mm512_load_si512((__m512i *) a);
__m512 af_16 = _mm512_cvtepi32_ps(a_16);
__m512 b_16 = _mm512_set1_ps(b);
__m512 result = _mm512_mul_ps(af_16, b_16);
} else if (steps == 8) {
__m256i a_8 = _mm256_loadu_epi8(a);
__m256i a_8 = _mm256_load_si256((__m256i *) a);
__m256 af_8 = _mm256_cvtepi32_ps(a_8);
__m256 b_8 = _mm256_set1_ps(b);
__m256 result = _mm256_mul_ps(af_8, b_8);
} else if (steps == 4) {
__m128i a_4 = _mm_loadu_epi8(a);
__m128i a_4 = _mm_load_si128((__m128i *) a);
__m128 af_4 = _mm_cvtepi32_ps(a_4);
__m128 b_4 = _mm_set1_ps(b);
@ -855,11 +855,11 @@ f32 simd_mult(const int8* a, f32 b, int32 size, int32 steps)
bool simd_compare_64(const byte* a, const byte* b)
{
__m256i chunk1_a = _mm256_loadu_si256((__m256i*) a);
__m256i chunk1_b = _mm256_loadu_si256((__m256i*) b);
__m256i chunk1_a = _mm256_load_si256((__m256i*) a);
__m256i chunk1_b = _mm256_load_si256((__m256i*) b);
__m256i chunk2_a = _mm256_loadu_si256((__m256i*) (a + 32));
__m256i chunk2_b = _mm256_loadu_si256((__m256i*) (b + 32));
__m256i chunk2_a = _mm256_load_si256((__m256i*) (a + 32));
__m256i chunk2_b = _mm256_load_si256((__m256i*) (b + 32));
__m256i result1 = _mm256_cmpeq_epi8(chunk1_a, chunk1_b);
__m256i result2 = _mm256_cmpeq_epi8(chunk2_a, chunk2_b);
@ -879,8 +879,8 @@ int simd_compare(const byte* a, const byte* b, uint32 size, uint32 steps = 8) {
__mmask64 result_mask;
for (; i <= size - 64; i += 64) { // 64 bytes per iteration
a_16 = _mm512_loadu_si512((__m512i*) a);
b_16 = _mm512_loadu_si512((__m512i*) b);
a_16 = _mm512_load_si512((__m512i*) a);
b_16 = _mm512_load_si512((__m512i*) b);
result_mask = _mm512_cmpeq_epi8_mask(a_16, b_16);
@ -905,8 +905,8 @@ int simd_compare(const byte* a, const byte* b, uint32 size, uint32 steps = 8) {
__m256i result_8;
for (; i <= size - steps; i += steps) {
a_8 = _mm256_loadu_si256((__m256i*) a);
b_8 = _mm256_loadu_si256((__m256i*) b);
a_8 = _mm256_load_si256((__m256i*) a);
b_8 = _mm256_load_si256((__m256i*) b);
result_8 = _mm256_cmpeq_epi8(a_8, b_8);
@ -929,8 +929,8 @@ int simd_compare(const byte* a, const byte* b, uint32 size, uint32 steps = 8) {
__m128i result_4;
for (; i <= size - steps; i += steps) {
a_4 = _mm_loadu_si128((__m128i*) a);
b_4 = _mm_loadu_si128((__m128i*) b);
a_4 = _mm_load_si128((__m128i*) a);
b_4 = _mm_load_si128((__m128i*) b);
result_4 = _mm_cmpeq_epi8(a_4, b_4);

View File

@ -25,7 +25,7 @@
result[i] = a_array[i] / b_array[i];
}
return _mm_loadu_si128((__m128i*)result);
return _mm_load_si128((__m128i*)result);
}
inline __m256i _mm256_div_epi32(__m256i a, __m256i b) {
@ -38,7 +38,7 @@
result[i] = a_array[i] / b_array[i];
}
return _mm256_loadu_si256((__m256i*)result);
return _mm256_load_si256((__m256i*)result);
}
inline __m512i _mm512_div_epi32(__m512i a, __m512i b) {
@ -51,7 +51,7 @@
result[i] = a_array[i] / b_array[i];
}
return _mm512_loadu_si512((__m512i*)result);
return _mm512_load_si512((__m512i*)result);
}
inline __m128 _mm_sin_ps(__m128 a) {
@ -60,7 +60,7 @@
for (int i = 0; i < 4; ++i) {
result[i] = sinf(a_array[i]);
}
return _mm_loadu_ps(result);
return _mm_load_ps(result);
}
inline __m128 _mm_cos_ps(__m128 a) {
@ -69,7 +69,7 @@
for (int i = 0; i < 4; ++i) {
result[i] = cosf(a_array[i]);
}
return _mm_loadu_ps(result);
return _mm_load_ps(result);
}
inline __m128 _mm_asin_ps(__m128 a) {
@ -78,7 +78,7 @@
for (int i = 0; i < 4; ++i) {
result[i] = asinf(a_array[i]);
}
return _mm_loadu_ps(result);
return _mm_load_ps(result);
}
inline __m128 _mm_acos_ps(__m128 a) {
@ -87,7 +87,7 @@
for (int i = 0; i < 4; ++i) {
result[i] = acosf(a_array[i]);
}
return _mm_loadu_ps(result);
return _mm_load_ps(result);
}
inline __m256 _mm256_sin_ps(__m256 a) {
@ -96,7 +96,7 @@
for (int i = 0; i < 8; ++i) {
result[i] = sinf(a_array[i]);
}
return _mm256_loadu_ps(result);
return _mm256_load_ps(result);
}
inline __m256 _mm256_cos_ps(__m256 a) {
@ -105,7 +105,7 @@
for (int i = 0; i < 8; ++i) {
result[i] = cosf(a_array[i]);
}
return _mm256_loadu_ps(result);
return _mm256_load_ps(result);
}
inline __m256 _mm256_asin_ps(__m256 a) {
@ -114,7 +114,7 @@
for (int i = 0; i < 8; ++i) {
result[i] = asinf(a_array[i]);
}
return _mm256_loadu_ps(result);
return _mm256_load_ps(result);
}
inline __m256 _mm256_acos_ps(__m256 a) {
@ -123,7 +123,7 @@
for (int i = 0; i < 16; ++i) {
result[i] = acosf(a_array[i]);
}
return _mm256_loadu_ps(result);
return _mm256_load_ps(result);
}
inline __m512 _mm512_sin_ps(__m512 a) {
@ -132,7 +132,7 @@
for (int i = 0; i < 16; ++i) {
result[i] = sinf(a_array[i]);
}
return _mm512_loadu_ps(result);
return _mm512_load_ps(result);
}
inline __m512 _mm512_cos_ps(__m512 a) {
@ -141,7 +141,7 @@
for (int i = 0; i < 16; ++i) {
result[i] = cosf(a_array[i]);
}
return _mm512_loadu_ps(result);
return _mm512_load_ps(result);
}
inline __m512 _mm512_asin_ps(__m512 a) {
@ -150,7 +150,7 @@
for (int i = 0; i < 16; ++i) {
result[i] = asinf(a_array[i]);
}
return _mm512_loadu_ps(result);
return _mm512_load_ps(result);
}
inline __m512 _mm512_acos_ps(__m512 a) {
@ -159,7 +159,7 @@
for (int i = 0; i < 16; ++i) {
result[i] = acosf(a_array[i]);
}
return _mm512_loadu_ps(result);
return _mm512_load_ps(result);
}
#endif

View File

@ -96,9 +96,23 @@ enum UIAttributeType {
UIAttribute* ui_attribute_from_group(UIAttributeGroup* group, UIAttributeType type)
{
for (int i = 0; i < UI_ATTRIBUTE_TYPE_SIZE && i <= type; ++i) {
if (group->attributes[i].attribute_id == type) {
return &group->attributes[i];
if (!group->attributes) {
return NULL;
}
int32 left = 0;
int32 right = type;
// Binary search since attributes are sorted by attribute_id
while (left <= right) {
int32 mid = left + (right - left) / 2;
if (group->attributes[mid].attribute_id == type) {
return &group->attributes[mid];
} else if (group->attributes[mid].attribute_id < type) {
left = mid + 1;
} else {
right = mid - 1;
}
}
@ -199,98 +213,4 @@ constexpr const char* ui_attribute_type_to_string_const(UIAttributeType e)
return NULL;
}
const char* ui_attribute_type_to_string(UIAttributeType e)
{
switch (e) {
case UI_ATTRIBUTE_TYPE_TYPE:
return "type";
case UI_ATTRIBUTE_TYPE_STYLE:
return "style";
case UI_ATTRIBUTE_TYPE_DIMENSION_X:
return "x";
case UI_ATTRIBUTE_TYPE_DIMENSION_Y:
return "y";
case UI_ATTRIBUTE_TYPE_DIMENSION_WIDTH:
return "width";
case UI_ATTRIBUTE_TYPE_DIMENSION_HEIGHT:
return "height";
case UI_ATTRIBUTE_TYPE_FONT_NAME:
return "font_name";
case UI_ATTRIBUTE_TYPE_FONT_COLOR:
return "font_color";
case UI_ATTRIBUTE_TYPE_FONT_SIZE:
return "font_size";
case UI_ATTRIBUTE_TYPE_FONT_WEIGHT:
return "font_weight";
case UI_ATTRIBUTE_TYPE_FONT_LINE_HEIGHT:
return "font_line_height";
case UI_ATTRIBUTE_TYPE_ALIGN_H:
return "align_h";
case UI_ATTRIBUTE_TYPE_ALIGN_V:
return "align_v";
case UI_ATTRIBUTE_TYPE_ZINDEX:
return "zindex";
case UI_ATTRIBUTE_TYPE_BACKGROUND_COLOR:
return "background_color";
case UI_ATTRIBUTE_TYPE_BACKGROUND_IMG:
return "background_img";
case UI_ATTRIBUTE_TYPE_BACKGROUND_IMG_OPACITY:
return "background_img_opacity";
case UI_ATTRIBUTE_TYPE_BACKGROUND_IMG_POSITION_V:
return "background_img_position_v";
case UI_ATTRIBUTE_TYPE_BACKGROUND_IMG_POSITION_H:
return "background_img_position_h";
case UI_ATTRIBUTE_TYPE_BACKGROUND_IMG_STYLE:
return "background_img_style";
case UI_ATTRIBUTE_TYPE_BORDER_COLOR:
return "border_color";
case UI_ATTRIBUTE_TYPE_BORDER_WIDTH:
return "border_width";
case UI_ATTRIBUTE_TYPE_BORDER_TOP_COLOR:
return "border_top_color";
case UI_ATTRIBUTE_TYPE_BORDER_TOP_WIDTH:
return "border_top_width";
case UI_ATTRIBUTE_TYPE_BORDER_RIGHT_COLOR:
return "border_right_color";
case UI_ATTRIBUTE_TYPE_BORDER_RIGHT_WIDTH:
return "border_right_width";
case UI_ATTRIBUTE_TYPE_BORDER_BOTTOM_COLOR:
return "border_bottom_color";
case UI_ATTRIBUTE_TYPE_BORDER_BOTTOM_WIDTH:
return "border_bottom_width";
case UI_ATTRIBUTE_TYPE_BORDER_LEFT_COLOR:
return "border_left_color";
case UI_ATTRIBUTE_TYPE_BORDER_LEFT_WIDTH:
return "border_left_width";
case UI_ATTRIBUTE_TYPE_PADDING:
return "padding";
case UI_ATTRIBUTE_TYPE_PADDING_TOP:
return "padding_top";
case UI_ATTRIBUTE_TYPE_PADDING_RIGHT:
return "padding_right";
case UI_ATTRIBUTE_TYPE_PADDING_BOTTOM:
return "padding_bottom";
case UI_ATTRIBUTE_TYPE_PADDING_LEFT:
return "padding_left";
case UI_ATTRIBUTE_TYPE_SHADOW_INNER_COLOR:
return "shadow_inner_color";
case UI_ATTRIBUTE_TYPE_SHADOW_INNER_ANGLE:
return "shadow_inner_angle";
case UI_ATTRIBUTE_TYPE_SHADOW_INNER_DISTANCE:
return "shadow_inner_distance";
case UI_ATTRIBUTE_TYPE_SHADOW_OUTER_COLOR:
return "shadow_outer_color";
case UI_ATTRIBUTE_TYPE_SHADOW_OUTER_ANGLE:
return "shadow_outer_angle";
case UI_ATTRIBUTE_TYPE_SHADOW_OUTER_DISTANCE:
return "shadow_outer_distance";
case UI_ATTRIBUTE_TYPE_TRANSITION_ANIMATION:
return "transition_animation";
case UI_ATTRIBUTE_TYPE_TRANSITION_DURATION:
return "transition_duration";
}
return NULL;
}
#endif

View File

@ -5,6 +5,9 @@
#include "UIElementType.h"
#include "../object/Vertex.h"
#include <immintrin.h>
#include <xmmintrin.h>
struct UIElementDimension {
int16 x1;
int16 y1;
@ -22,6 +25,7 @@ struct UIElement {
const char* name;
int32 id;
UIElementType type;
bool is_dynamic;
int16 window_id;
int16 panel_id;

View File

@ -53,36 +53,4 @@ constexpr const char* ui_element_type_to_string_const(UIElementType e)
return NULL;
}
const char* ui_element_type_to_string(UIElementType e)
{
switch (e) {
case UI_ELEMENT_TYPE_BUTTON:
return "button";
case UI_ELEMENT_TYPE_SELECT:
return "select";
case UI_ELEMENT_TYPE_DROPDOWN:
return "dropdown";
case UI_ELEMENT_TYPE_TEXTFIELD:
return "textfield";
case UI_ELEMENT_TYPE_TEXTAREA:
return "textarea";
case UI_ELEMENT_TYPE_IMAGE:
return "image";
case UI_ELEMENT_TYPE_TEXT:
return "text";
case UI_ELEMENT_TYPE_LINK:
return "link";
case UI_ELEMENT_TYPE_TABLE:
return "table";
case UI_ELEMENT_TYPE_VIEW_WINDOW:
return "view_window";
case UI_ELEMENT_TYPE_VIEW_PANEL:
return "view_panel";
case UI_ELEMENT_TYPE_VIEW_TAB:
return "view_tab";
}
return NULL;
}
#endif

View File

@ -7,30 +7,136 @@
// Modified for every scene
struct UILayout {
int32 ui_deadzone_size = 5;
UIElementDimension ui_deadzone[5];
// This array has the size of the game window and represents in color codes where interactible ui elements are
// Size is based on screen size (we don't need full screen size since we assume an interactible element is at least 4 pixels width and height)
// width = 25% of screen size
// height = 25% of screen size
uint16 width;
uint16 height;
int32 element_hoverable_size;
int32 element_hoverable_pos;
UIElementDimension* elements_hoverable;
// Contains all UI elements also dynamic ones (e.g. movable windows)
uint32* ui_chroma_codes;
int32 element_interactible_size;
int32 element_interactible_pos;
UIElementDimension* elements_interactible;
// Contains constant UI elements that usually don't change (e.g. HUD)
uint32* ui_chroma_codes_static;
// @question Since we use a hashmap below, do we even need the size?
// Isn't the size exactly the same as the hash_map buf size
int32 element_size;
int32 element_pos;
HashMap hash_map; // Used to directly find element by name
// @question Do we even need this or should the hashmap values be the elements directly?
// In other places (e.g. theme) we simply define a byte* data variable which actually holds the info.
UIElement* elements;
// Used to directly find element by name
// The values are the UIElements
HashMap hash_map;
int32 vertex_size;
int32 vertex_pos;
Vertex3DTextureColorIndex* vertices;
};
inline
uint32 layout_element_from_location(UILayout* layout, uint16 x, uint16 y)
{
return layout->ui_chroma_codes[layout->width * y / 4 + x / 4];
}
// This function should only get called if the location of a UI Element changes
// @performance How to handle moving elements (= dragging a window). We don't want to update this while dragging!
void layout_chroma_codes_update(UILayout* layout)
{
// Reset all
memcpy(layout->ui_chroma_codes, layout->ui_chroma_codes_static, layout->width * layout->height * sizeof(uint32));
// @question Are the dimension values below even absolute? They may be in relation to the parent?!
for (int32 i = 0; i < layout->hash_map.buf.count; ++i) {
if (!layout->hash_map.table[i]) {
continue;
}
HashEntry* entry = (HashEntry *) layout->hash_map.table[i];
UIElement* element = (UIElement *) entry->value;
if (element->is_dynamic) {
continue;
}
int32 y_start = element->dimension.y1 / 4;
int32 y_end = element->dimension.y2 / 4;
int32 x_start = element->dimension.x1 / 4;
int32 x_end = element->dimension.x2 / 4;
for (int32 y = y_start; y < y_end; ++y) {
int32 y_offset = layout->width * y;
for (int32 x = x_start; x < x_end; ++x) {
layout->ui_chroma_codes[y_offset + x] = (uint32) element->id;
}
}
// Now handle all next elements
while (entry->next) {
entry = entry->next;
element = (UIElement *) entry->value;
y_start = element->dimension.y1 / 4;
y_end = element->dimension.y2 / 4;
x_start = element->dimension.x1 / 4;
x_end = element->dimension.x2 / 4;
for (int32 y = y_start; y < y_end; ++y) {
int32 y_offset = layout->width * y;
for (int32 x = x_start; x < x_end; ++x) {
layout->ui_chroma_codes[y_offset + x] = (uint32) element->id;
}
}
}
}
}
void layout_chroma_codes_update_static(UILayout* layout)
{
// Reset all
memset(layout->ui_chroma_codes_static, 0, layout->width * layout->height * sizeof(uint32));
// @question Are the dimension values below even absolute? They may be in relation to the parent?!
for (int32 i = 0; i < layout->hash_map.buf.count; ++i) {
if (!layout->hash_map.table[i]) {
continue;
}
HashEntry* entry = (HashEntry *) layout->hash_map.table[i];
UIElement* element = (UIElement *) entry->value;
if (!element->is_dynamic) {
continue;
}
int32 y_start = element->dimension.y1 / 4;
int32 y_end = element->dimension.y2 / 4;
int32 x_start = element->dimension.x1 / 4;
int32 x_end = element->dimension.x2 / 4;
for (int32 y = y_start; y < y_end; ++y) {
int32 y_offset = layout->width * y;
for (int32 x = x_start; x < x_end; ++x) {
layout->ui_chroma_codes_static[y_offset + x] = (uint32) element->id;
}
}
// Now handle all next elements
while (entry->next) {
entry = entry->next;
element = (UIElement *) entry->value;
y_start = element->dimension.y1 / 4;
y_end = element->dimension.y2 / 4;
x_start = element->dimension.x1 / 4;
x_end = element->dimension.x2 / 4;
for (int32 y = y_start; y < y_end; ++y) {
int32 y_offset = layout->width * y;
for (int32 x = x_start; x < x_end; ++x) {
layout->ui_chroma_codes_static[y_offset + x] = (uint32) element->id;
}
}
}
}
}
#endif

View File

@ -66,6 +66,11 @@ inline
UIAttributeGroup* theme_style_group(UIThemeStyle* theme, const char* group_name)
{
HashEntryInt64* entry = (HashEntryInt64 *) hashmap_get_entry(&theme->hash_map, group_name);
if (!entry) {
ASSERT_SIMPLE(false);
return NULL;
}
return (UIAttributeGroup *) (theme->data + entry->value);
}
@ -73,6 +78,11 @@ inline
UIAttributeGroup* theme_style_group(UIThemeStyle* theme, const char* group_name, int32 group_id)
{
HashEntryInt64* entry = (HashEntryInt64 *) hashmap_get_entry(&theme->hash_map, group_name, group_id);
if (!entry) {
ASSERT_SIMPLE(false);
return NULL;
}
return (UIAttributeGroup *) (theme->data + entry->value);
}
@ -229,7 +239,7 @@ void theme_from_file_txt(
*temp = '\0';
for (int32 j = 0; j < UI_ELEMENT_TYPE_SIZE; ++j) {
if (strcmp(str, ui_element_type_to_string((UIElementType) j)) == 0) {
if (strcmp(str, ui_element_type_to_string_const((UIElementType) j)) == 0) {
attribute.value_int = j;
break;
@ -246,13 +256,11 @@ void theme_from_file_txt(
}
*temp = '\0';
++pos;
} else if (strcmp(ui_attribute_type_to_string_const(UI_ATTRIBUTE_TYPE_FONT_COLOR), attribute_name) == 0) {
++pos; // Skip '#'
attribute.attribute_id = UI_ATTRIBUTE_TYPE_FONT_COLOR;
uint32 value = (uint32) strtoul(pos, &pos, 16);
pos += 4;
attribute.value_v4_f32.r = (f32) ((value >> 24) & 0xFF) / 255.0f;
attribute.value_v4_f32.g = (f32) ((value >> 16) & 0xFF) / 255.0f;
@ -260,28 +268,27 @@ void theme_from_file_txt(
attribute.value_v4_f32.a = (f32) (value & 0xFF) / 255.0f;
} else if (strcmp(ui_attribute_type_to_string_const(UI_ATTRIBUTE_TYPE_FONT_SIZE), attribute_name) == 0) {
attribute.attribute_id = UI_ATTRIBUTE_TYPE_FONT_SIZE;
attribute.value_float = strtof(pos, &pos); ++pos;
attribute.value_float = strtof(pos, &pos);
} else if (strcmp(ui_attribute_type_to_string_const(UI_ATTRIBUTE_TYPE_FONT_WEIGHT), attribute_name) == 0) {
attribute.attribute_id = UI_ATTRIBUTE_TYPE_FONT_WEIGHT;
attribute.value_int = strtoul(pos, &pos, 10); ++pos;
attribute.value_int = strtoul(pos, &pos, 10);
} else if (strcmp(ui_attribute_type_to_string_const(UI_ATTRIBUTE_TYPE_FONT_LINE_HEIGHT), attribute_name) == 0) {
attribute.attribute_id = UI_ATTRIBUTE_TYPE_FONT_LINE_HEIGHT;
attribute.value_float = strtof(pos, &pos); ++pos;
attribute.value_float = strtof(pos, &pos);
} else if (strcmp(ui_attribute_type_to_string_const(UI_ATTRIBUTE_TYPE_ALIGN_H), attribute_name) == 0) {
attribute.attribute_id = UI_ATTRIBUTE_TYPE_ALIGN_H;
attribute.value_int = strtoul(pos, &pos, 10); ++pos;
attribute.value_int = strtoul(pos, &pos, 10);
} else if (strcmp(ui_attribute_type_to_string_const(UI_ATTRIBUTE_TYPE_ALIGN_V), attribute_name) == 0) {
attribute.attribute_id = UI_ATTRIBUTE_TYPE_ALIGN_V;
attribute.value_int = strtoul(pos, &pos, 10); ++pos;
attribute.value_int = strtoul(pos, &pos, 10);
} else if (strcmp(ui_attribute_type_to_string_const(UI_ATTRIBUTE_TYPE_ZINDEX), attribute_name) == 0) {
attribute.attribute_id = UI_ATTRIBUTE_TYPE_ZINDEX;
attribute.value_float = SWAP_ENDIAN_LITTLE(strtof(pos, &pos)); ++pos;
attribute.value_float = SWAP_ENDIAN_LITTLE(strtof(pos, &pos));
} else if (strcmp(ui_attribute_type_to_string_const(UI_ATTRIBUTE_TYPE_BACKGROUND_COLOR), attribute_name) == 0) {
++pos; // Skip '#'
attribute.attribute_id = UI_ATTRIBUTE_TYPE_BACKGROUND_COLOR;
uint32 value = (uint32) strtoul(pos, &pos, 16);
pos += 4;
attribute.value_v4_f32.r = (f32) ((value >> 24) & 0xFF) / 255.0f;
attribute.value_v4_f32.g = (f32) ((value >> 16) & 0xFF) / 255.0f;
@ -298,22 +305,21 @@ void theme_from_file_txt(
attribute.value_str[i] = '\0';
} else if (strcmp(ui_attribute_type_to_string_const(UI_ATTRIBUTE_TYPE_BACKGROUND_IMG_OPACITY), attribute_name) == 0) {
attribute.attribute_id = UI_ATTRIBUTE_TYPE_BACKGROUND_IMG_OPACITY;
attribute.value_float = SWAP_ENDIAN_LITTLE(strtof(pos, &pos)); ++pos;
attribute.value_float = SWAP_ENDIAN_LITTLE(strtof(pos, &pos));
} else if (strcmp(ui_attribute_type_to_string_const(UI_ATTRIBUTE_TYPE_BACKGROUND_IMG_POSITION_V), attribute_name) == 0) {
attribute.attribute_id = UI_ATTRIBUTE_TYPE_BACKGROUND_IMG_POSITION_V;
attribute.value_int = strtoul(pos, &pos, 10); ++pos;
attribute.value_int = strtoul(pos, &pos, 10);
} else if (strcmp(ui_attribute_type_to_string_const(UI_ATTRIBUTE_TYPE_BACKGROUND_IMG_POSITION_H), attribute_name) == 0) {
attribute.attribute_id = UI_ATTRIBUTE_TYPE_BACKGROUND_IMG_POSITION_H;
attribute.value_int = strtoul(pos, &pos, 10); ++pos;
attribute.value_int = strtoul(pos, &pos, 10);
} else if (strcmp(ui_attribute_type_to_string_const(UI_ATTRIBUTE_TYPE_BACKGROUND_IMG_STYLE), attribute_name) == 0) {
attribute.attribute_id = UI_ATTRIBUTE_TYPE_BACKGROUND_IMG_STYLE;
attribute.value_int = strtoul(pos, &pos, 10); ++pos;
attribute.value_int = strtoul(pos, &pos, 10);
} else if (strcmp(ui_attribute_type_to_string_const(UI_ATTRIBUTE_TYPE_BORDER_COLOR), attribute_name) == 0) {
++pos; // Skip '#'
attribute.attribute_id = UI_ATTRIBUTE_TYPE_BORDER_COLOR;
uint32 value = (uint32) strtoul(pos, &pos, 16);
pos += 4;
attribute.value_v4_f32.r = (f32) ((value >> 24) & 0xFF) / 255.0f;
attribute.value_v4_f32.g = (f32) ((value >> 16) & 0xFF) / 255.0f;
@ -321,13 +327,12 @@ void theme_from_file_txt(
attribute.value_v4_f32.a = (f32) (value & 0xFF) / 255.0f;
} else if (strcmp(ui_attribute_type_to_string_const(UI_ATTRIBUTE_TYPE_BORDER_WIDTH), attribute_name) == 0) {
attribute.attribute_id = UI_ATTRIBUTE_TYPE_BORDER_WIDTH;
attribute.value_int = strtoul(pos, &pos, 10); ++pos;
attribute.value_int = strtoul(pos, &pos, 10);
} else if (strcmp(ui_attribute_type_to_string_const(UI_ATTRIBUTE_TYPE_BORDER_TOP_COLOR), attribute_name) == 0) {
++pos; // Skip '#'
attribute.attribute_id = UI_ATTRIBUTE_TYPE_BORDER_TOP_COLOR;
uint32 value = (uint32) strtoul(pos, &pos, 16);
pos += 4;
attribute.value_v4_f32.r = (f32) ((value >> 24) & 0xFF) / 255.0f;
attribute.value_v4_f32.g = (f32) ((value >> 16) & 0xFF) / 255.0f;
@ -335,13 +340,12 @@ void theme_from_file_txt(
attribute.value_v4_f32.a = (f32) (value & 0xFF) / 255.0f;
} else if (strcmp(ui_attribute_type_to_string_const(UI_ATTRIBUTE_TYPE_BORDER_TOP_WIDTH), attribute_name) == 0) {
attribute.attribute_id = UI_ATTRIBUTE_TYPE_BORDER_TOP_WIDTH;
attribute.value_int = strtoul(pos, &pos, 10); ++pos;
attribute.value_int = strtoul(pos, &pos, 10);
} else if (strcmp(ui_attribute_type_to_string_const(UI_ATTRIBUTE_TYPE_BORDER_RIGHT_COLOR), attribute_name) == 0) {
++pos; // Skip '#'
attribute.attribute_id = UI_ATTRIBUTE_TYPE_BORDER_RIGHT_COLOR;
uint32 value = (uint32) strtoul(pos, &pos, 16);
pos += 4;
attribute.value_v4_f32.r = (f32) ((value >> 24) & 0xFF) / 255.0f;
attribute.value_v4_f32.g = (f32) ((value >> 16) & 0xFF) / 255.0f;
@ -349,13 +353,12 @@ void theme_from_file_txt(
attribute.value_v4_f32.a = (f32) (value & 0xFF) / 255.0f;
} else if (strcmp(ui_attribute_type_to_string_const(UI_ATTRIBUTE_TYPE_BORDER_RIGHT_WIDTH), attribute_name) == 0) {
attribute.attribute_id = UI_ATTRIBUTE_TYPE_BORDER_RIGHT_WIDTH;
attribute.value_int = strtoul(pos, &pos, 10); ++pos;
attribute.value_int = strtoul(pos, &pos, 10);
} else if (strcmp(ui_attribute_type_to_string_const(UI_ATTRIBUTE_TYPE_BORDER_BOTTOM_COLOR), attribute_name) == 0) {
++pos; // Skip '#'
attribute.attribute_id = UI_ATTRIBUTE_TYPE_BORDER_BOTTOM_COLOR;
uint32 value = (uint32) strtoul(pos, &pos, 16);
pos += 4;
attribute.value_v4_f32.r = (f32) ((value >> 24) & 0xFF) / 255.0f;
attribute.value_v4_f32.g = (f32) ((value >> 16) & 0xFF) / 255.0f;
@ -363,13 +366,12 @@ void theme_from_file_txt(
attribute.value_v4_f32.a = (f32) (value & 0xFF) / 255.0f;
} else if (strcmp(ui_attribute_type_to_string_const(UI_ATTRIBUTE_TYPE_BORDER_BOTTOM_WIDTH), attribute_name) == 0) {
attribute.attribute_id = UI_ATTRIBUTE_TYPE_BORDER_BOTTOM_WIDTH;
attribute.value_int = strtoul(pos, &pos, 10); ++pos;
attribute.value_int = strtoul(pos, &pos, 10);
} else if (strcmp(ui_attribute_type_to_string_const(UI_ATTRIBUTE_TYPE_BORDER_LEFT_COLOR), attribute_name) == 0) {
++pos; // Skip '#'
attribute.attribute_id = UI_ATTRIBUTE_TYPE_BORDER_LEFT_COLOR;
uint32 value = (uint32) strtoul(pos, &pos, 16);
pos += 4;
attribute.value_v4_f32.r = (f32) ((value >> 24) & 0xFF) / 255.0f;
attribute.value_v4_f32.g = (f32) ((value >> 16) & 0xFF) / 255.0f;
@ -377,28 +379,27 @@ void theme_from_file_txt(
attribute.value_v4_f32.a = (f32) (value & 0xFF) / 255.0f;
} else if (strcmp(ui_attribute_type_to_string_const(UI_ATTRIBUTE_TYPE_BORDER_LEFT_WIDTH), attribute_name) == 0) {
attribute.attribute_id = UI_ATTRIBUTE_TYPE_BORDER_LEFT_WIDTH;
attribute.value_int = strtoul(pos, &pos, 10); ++pos;
attribute.value_int = strtoul(pos, &pos, 10);
} else if (strcmp(ui_attribute_type_to_string_const(UI_ATTRIBUTE_TYPE_PADDING), attribute_name) == 0) {
attribute.attribute_id = UI_ATTRIBUTE_TYPE_PADDING;
attribute.value_int = strtoul(pos, &pos, 10); ++pos;
attribute.value_int = strtoul(pos, &pos, 10);
} else if (strcmp(ui_attribute_type_to_string_const(UI_ATTRIBUTE_TYPE_PADDING_TOP), attribute_name) == 0) {
attribute.attribute_id = UI_ATTRIBUTE_TYPE_PADDING_TOP;
attribute.value_int = strtoul(pos, &pos, 10); ++pos;
attribute.value_int = strtoul(pos, &pos, 10);
} else if (strcmp(ui_attribute_type_to_string_const(UI_ATTRIBUTE_TYPE_PADDING_RIGHT), attribute_name) == 0) {
attribute.attribute_id = UI_ATTRIBUTE_TYPE_PADDING_RIGHT;
attribute.value_int = strtoul(pos, &pos, 10); ++pos;
attribute.value_int = strtoul(pos, &pos, 10);
} else if (strcmp(ui_attribute_type_to_string_const(UI_ATTRIBUTE_TYPE_PADDING_BOTTOM), attribute_name) == 0) {
attribute.attribute_id = UI_ATTRIBUTE_TYPE_PADDING_BOTTOM;
attribute.value_int = strtoul(pos, &pos, 10); ++pos;
attribute.value_int = strtoul(pos, &pos, 10);
} else if (strcmp(ui_attribute_type_to_string_const(UI_ATTRIBUTE_TYPE_PADDING_LEFT), attribute_name) == 0) {
attribute.attribute_id = UI_ATTRIBUTE_TYPE_PADDING_LEFT;
attribute.value_int = strtoul(pos, &pos, 10); ++pos;
attribute.value_int = strtoul(pos, &pos, 10);
} else if (strcmp(ui_attribute_type_to_string_const(UI_ATTRIBUTE_TYPE_SHADOW_INNER_COLOR), attribute_name) == 0) {
++pos; // Skip '#'
attribute.attribute_id = UI_ATTRIBUTE_TYPE_SHADOW_INNER_COLOR;
uint32 value = (uint32) strtoul(pos, &pos, 16);
pos += 4;
attribute.value_v4_f32.r = (f32) ((value >> 24) & 0xFF) / 255.0f;
attribute.value_v4_f32.g = (f32) ((value >> 16) & 0xFF) / 255.0f;
@ -406,16 +407,15 @@ void theme_from_file_txt(
attribute.value_v4_f32.a = (f32) (value & 0xFF) / 255.0f;
} else if (strcmp(ui_attribute_type_to_string_const(UI_ATTRIBUTE_TYPE_SHADOW_INNER_ANGLE), attribute_name) == 0) {
attribute.attribute_id = UI_ATTRIBUTE_TYPE_SHADOW_INNER_ANGLE;
attribute.value_float = strtof(pos, &pos); ++pos;
attribute.value_float = strtof(pos, &pos);
} else if (strcmp(ui_attribute_type_to_string_const(UI_ATTRIBUTE_TYPE_SHADOW_INNER_DISTANCE), attribute_name) == 0) {
attribute.attribute_id = UI_ATTRIBUTE_TYPE_SHADOW_INNER_DISTANCE;
attribute.value_int = strtoul(pos, &pos, 10); ++pos;
attribute.value_int = strtoul(pos, &pos, 10);
} else if (strcmp(ui_attribute_type_to_string_const(UI_ATTRIBUTE_TYPE_SHADOW_OUTER_COLOR), attribute_name) == 0) {
++pos; // Skip '#'
attribute.attribute_id = UI_ATTRIBUTE_TYPE_SHADOW_OUTER_COLOR;
uint32 value = (uint32) strtoul(pos, &pos, 16);
pos += 4;
attribute.value_v4_f32.r = (f32) ((value >> 24) & 0xFF) / 255.0f;
attribute.value_v4_f32.g = (f32) ((value >> 16) & 0xFF) / 255.0f;
@ -423,16 +423,16 @@ void theme_from_file_txt(
attribute.value_v4_f32.a = (f32) (value & 0xFF) / 255.0f;
} else if (strcmp(ui_attribute_type_to_string_const(UI_ATTRIBUTE_TYPE_SHADOW_OUTER_ANGLE), attribute_name) == 0) {
attribute.attribute_id = UI_ATTRIBUTE_TYPE_SHADOW_OUTER_ANGLE;
attribute.value_float = strtof(pos, &pos); ++pos;
attribute.value_float = strtof(pos, &pos);
} else if (strcmp(ui_attribute_type_to_string_const(UI_ATTRIBUTE_TYPE_SHADOW_OUTER_DISTANCE), attribute_name) == 0) {
attribute.attribute_id = UI_ATTRIBUTE_TYPE_SHADOW_OUTER_DISTANCE;
attribute.value_int = strtoul(pos, &pos, 10); ++pos;
attribute.value_int = strtoul(pos, &pos, 10);
} else if (strcmp(ui_attribute_type_to_string_const(UI_ATTRIBUTE_TYPE_TRANSITION_ANIMATION), attribute_name) == 0) {
attribute.attribute_id = UI_ATTRIBUTE_TYPE_TRANSITION_ANIMATION;
attribute.value_int = strtoul(pos, &pos, 10); ++pos;
attribute.value_int = strtoul(pos, &pos, 10);
} else if (strcmp(ui_attribute_type_to_string_const(UI_ATTRIBUTE_TYPE_TRANSITION_DURATION), attribute_name) == 0) {
attribute.attribute_id = UI_ATTRIBUTE_TYPE_TRANSITION_DURATION;
attribute.value_float = strtof(pos, &pos); ++pos;
attribute.value_float = strtof(pos, &pos);
} else {
while (*pos != '\n' && *pos != '\0') {
++pos;
@ -487,12 +487,20 @@ void theme_from_file(
// Of course we still need to populate the data using hashmap_load()
// The value is a int64 (because this is the value of the chunk buffer size but the hashmap only allows int32)
hashmap_create(&theme->hash_map, (int32) SWAP_ENDIAN_LITTLE(*((uint64 *) pos)), sizeof(HashEntryInt64), theme->data);
const byte* start = theme->hash_map.buf.memory;
pos += hashmap_load(&theme->hash_map, pos);
// theme data
// Layout: first load the size of the group, then load the individual attributes
for (int32 i = 0; i < theme->hash_map.buf.count; ++i) {
if (!theme->hash_map.table[i]) {
continue;
}
HashEntryInt64* entry = (HashEntryInt64 *) theme->hash_map.table[i];
pos = start + entry->value;
UIAttributeGroup* group = (UIAttributeGroup *) (theme->data + entry->value);
group->attribute_size = SWAP_ENDIAN_LITTLE(*((int32 *) pos));
@ -501,6 +509,21 @@ void theme_from_file(
// @performance The UIAttribute contains a char array which makes this WAY larger than it needs to be in 99% of the cases
memcpy(group->attributes, pos, group->attribute_size * sizeof(UIAttribute));
pos += group->attribute_size * sizeof(UIAttribute);
// load all the next elements
while (entry->next) {
pos = start + entry->value;
group = (UIAttributeGroup *) (theme->data + entry->value);
group->attribute_size = SWAP_ENDIAN_LITTLE(*((int32 *) pos));
pos += sizeof(group->attribute_size);
// @performance The UIAttribute contains a char array which makes this WAY larger than it needs to be in 99% of the cases
memcpy(group->attributes, pos, group->attribute_size * sizeof(UIAttribute));
pos += group->attribute_size * sizeof(UIAttribute);
entry = entry->next;
}
}
}
@ -541,7 +564,7 @@ void theme_to_file(
// I also don't want to add a size variable to the theme as it is useless in all other cases
file.size = theme_size(theme);
file.content = ring_get_memory(ring, file.size, 64);
file.content = ring_get_memory(ring, file.size, 64, true);
byte* pos = file.content;
// version
@ -549,12 +572,19 @@ void theme_to_file(
pos += sizeof(theme->version);
// hashmap
byte* start = pos;
pos += hashmap_dump(&theme->hash_map, pos);
// theme data
// Layout: first save the size of the group, then save the individual attributes
for (int32 i = 0; i < theme->hash_map.buf.count; ++i) {
if (!theme->hash_map.table[i]) {
continue;
}
HashEntryInt64* entry = (HashEntryInt64 *) theme->hash_map.table[i];
pos = start + entry->value;
UIAttributeGroup* group = (UIAttributeGroup *) (theme->data + entry->value);
*((int32 *) pos) = SWAP_ENDIAN_LITTLE(group->attribute_size);
@ -562,7 +592,22 @@ void theme_to_file(
// @performance The UIAttribute contains a char array which makes this WAY larger than it needs to be in 99% of the cases
memcpy(pos, group->attributes, group->attribute_size * sizeof(UIAttribute));
pos += group->attribute_size * sizeof(UIAttribute);
pos += sizeof(UIAttribute);
// save all the next elements
while (entry->next) {
pos = start + entry->value;
group = (UIAttributeGroup *) (theme->data + entry->value);
*((int32 *) pos) = SWAP_ENDIAN_LITTLE(group->attribute_size);
pos += sizeof(group->attribute_size);
// @performance The UIAttribute contains a char array which makes this WAY larger than it needs to be in 99% of the cases
memcpy(pos, group->attributes, group->attribute_size * sizeof(UIAttribute));
pos += sizeof(UIAttribute);
entry = entry->next;
}
}
file.size = pos - file.content;

View File

@ -176,7 +176,7 @@ void wchar_to_char(const wchar_t* __restrict src, char* __restrict dest, int32 l
*dest = '\0';
}
inline
inline constexpr
int32 str_to_int(const char *str)
{
int32 result = 0;
@ -197,7 +197,47 @@ int32 str_to_int(const char *str)
return result * sign;
}
inline size_t str_count(const char* __restrict str, const char* __restrict substr)
inline constexpr
int32 int_to_str(int64 number, char *str, const char thousands = ',') {
int32 i = 0;
int64 sign = number;
int32 digit_count = 0;
if (number == 0) {
str[i++] = '0';
} else if (number < 0) {
number = -number;
}
while (number > 0) {
if (thousands
&& (digit_count == 3 || digit_count == 6 || digit_count == 9 || digit_count == 12 || digit_count == 15)
) {
str[i++] = thousands;
}
str[i++] = number % 10 + '0';
number /= 10;
++digit_count;
}
if (sign < 0) {
str[i++] = '-';
}
str[i] = '\0';
for (int32 j = 0, k = i - 1; j < k; ++j, --k) {
char temp = str[j];
str[j] = str[k];
str[k] = temp;
}
return i - 1;
}
inline
size_t str_count(const char* __restrict str, const char* __restrict substr)
{
size_t l1 = strlen(str);
size_t l2 = strlen(substr);
@ -296,45 +336,7 @@ char* strtok(char* str, const char* __restrict delim, char* *key) {
return result;
}
inline
int32 int_to_str(int64 number, char *str, const char thousands = ',') {
int32 i = 0;
int64 sign = number;
int32 digit_count = 0;
if (number == 0) {
str[i++] = '0';
} else if (number < 0) {
number = -number;
}
while (number > 0) {
if (thousands
&& (digit_count == 3 || digit_count == 6 || digit_count == 9 || digit_count == 12 || digit_count == 15)
) {
str[i++] = thousands;
}
str[i++] = number % 10 + '0';
number /= 10;
++digit_count;
}
if (sign < 0) {
str[i++] = '-';
}
str[i] = '\0';
for (int32 j = 0, k = i - 1; j < k; ++j, --k) {
char temp = str[j];
str[j] = str[k];
str[k] = temp;
}
return i - 1;
}
inline constexpr
char toupper_ascii(char c)
{
return c >= 'a' && c <= 'z'
@ -342,6 +344,7 @@ char toupper_ascii(char c)
: c;
}
inline constexpr
char tolower_ascii(char c)
{
return c >= 'A' && c <= 'Z'
@ -349,6 +352,7 @@ char tolower_ascii(char c)
: c;
}
inline constexpr
void create_const_name(const unsigned char* name, char* modified_name)
{
// Print block
@ -365,6 +369,7 @@ void create_const_name(const unsigned char* name, char* modified_name)
}
}
inline constexpr
bool str_ends_with(const char* str, const char* suffix) {
if (!str || !suffix) {
return false;
@ -431,4 +436,16 @@ void print_bytes(const void* ptr, size_t size)
}
}
inline constexpr
int32 is_eol(const char* str)
{
if (*str == '\n') {
return 1;
} else if (*str == '\r' && str[1] == '\n') {
return 2;
}
return 0;
}
#endif