minor code optimizations

This commit is contained in:
Dennis Eichhorn 2024-11-22 10:25:47 +01:00
parent ab9ab3fceb
commit 333c7d6e53
5 changed files with 190 additions and 158 deletions

View File

@ -20,6 +20,8 @@
// @todo Please check out if we can switch to quaternions. We tried but failed. // @todo Please check out if we can switch to quaternions. We tried but failed.
struct Camera { struct Camera {
bool is_changed;
v3_f32 location; v3_f32 location;
v4_f32 orientation; v4_f32 orientation;
@ -39,6 +41,8 @@ struct Camera {
f32 znear; f32 znear;
f32 zfar; f32 zfar;
f32 aspect; f32 aspect;
f32 view[16];
}; };
void void
@ -48,17 +52,19 @@ camera_update_vectors(Camera* camera)
camera->front.x = cos_ori_x * cosf(OMS_DEG2RAD(camera->orientation.y)); camera->front.x = cos_ori_x * cosf(OMS_DEG2RAD(camera->orientation.y));
camera->front.y = sinf(OMS_DEG2RAD(camera->orientation.x)); camera->front.y = sinf(OMS_DEG2RAD(camera->orientation.x));
camera->front.z = cos_ori_x * sinf(OMS_DEG2RAD(camera->orientation.y)); camera->front.z = cos_ori_x * sinf(OMS_DEG2RAD(camera->orientation.y));
vec3_normalize(&camera->front);
vec3_cross(&camera->right, &camera->front, &camera->world_up); vec3_cross(&camera->right, &camera->front, &camera->world_up);
vec3_normalize(&camera->right);
vec3_cross(&camera->up, &camera->right, &camera->front); vec3_cross(&camera->up, &camera->right, &camera->front);
// We checked if combining these 3 into a single SIMD function, but it was slower
vec3_normalize(&camera->right);
vec3_normalize(&camera->front);
vec3_normalize(&camera->up); vec3_normalize(&camera->up);
} }
void camera_rotate(Camera* camera, int32 dx, int32 dy, f32 dt) void camera_rotate(Camera* camera, int32 dx, int32 dy, f32 dt)
{ {
camera->is_changed = true;
camera->orientation.x += dy * camera->sensitivity; camera->orientation.x += dy * camera->sensitivity;
camera->orientation.y -= dx * camera->sensitivity; camera->orientation.y -= dx * camera->sensitivity;
@ -82,6 +88,7 @@ void camera_rotate(Camera* camera, int32 dx, int32 dy, f32 dt)
// you can have up to 4 camera movement inputs at the same time // you can have up to 4 camera movement inputs at the same time
void camera_movement(Camera* camera, CameraMovement* movement, f32 dt, bool relative_to_world = true) void camera_movement(Camera* camera, CameraMovement* movement, f32 dt, bool relative_to_world = true)
{ {
camera->is_changed = true;
f32 velocity = camera->speed * dt; f32 velocity = camera->speed * dt;
if (relative_to_world) { if (relative_to_world) {
@ -137,10 +144,11 @@ void camera_movement(Camera* camera, CameraMovement* movement, f32 dt, bool rela
v3_f32 right; v3_f32 right;
vec3_cross(&right, &camera->world_up, &forward); vec3_cross(&right, &camera->world_up, &forward);
vec3_normalize(&right);
v3_f32 up; v3_f32 up;
vec3_cross(&up, &right, &forward); vec3_cross(&up, &right, &forward);
vec3_normalize(&right);
vec3_normalize(&up); vec3_normalize(&up);
for (int32 i = 0; i < CAMERA_MAX_INPUTS; i++) { for (int32 i = 0; i < CAMERA_MAX_INPUTS; i++) {
@ -275,10 +283,8 @@ void camera_translation_matrix_sparse_lh(const Camera* __restrict camera, f32* t
translation[11] = camera->location.z; translation[11] = camera->location.z;
} }
// @performance This function might be optimizable with simd?
// the normalization might also be not required?
void void
camera_view_matrix_lh(const Camera* __restrict camera, f32* __restrict view) camera_view_matrix_lh(Camera* __restrict camera)
{ {
v3_f32 zaxis = { camera->front.x, camera->front.y, camera->front.z }; v3_f32 zaxis = { camera->front.x, camera->front.y, camera->front.z };
@ -289,28 +295,28 @@ camera_view_matrix_lh(const Camera* __restrict camera, f32* __restrict view)
v3_f32 yaxis; v3_f32 yaxis;
vec3_cross(&yaxis, &zaxis, &xaxis); vec3_cross(&yaxis, &zaxis, &xaxis);
view[0] = xaxis.x; // We tested if it would make sense to create a vec3_dot_sse version for the 3 dot products
view[1] = yaxis.x; // The result was that it is not faster, only if we would do 4 dot products would we see an improvement
view[2] = zaxis.x; camera->view[0] = xaxis.x;
view[3] = 0.0f; camera->view[1] = yaxis.x;
view[4] = xaxis.y; camera->view[2] = zaxis.x;
view[5] = yaxis.y; camera->view[3] = 0.0f;
view[6] = zaxis.y; camera->view[4] = xaxis.y;
view[7] = 0.0f; camera->view[5] = yaxis.y;
view[8] = xaxis.z; camera->view[6] = zaxis.y;
view[9] = yaxis.z; camera->view[7] = 0.0f;
view[10] = zaxis.z; camera->view[8] = xaxis.z;
view[11] = 0; camera->view[9] = yaxis.z;
view[12] = -vec3_dot(&xaxis, &camera->location); camera->view[10] = zaxis.z;
view[13] = -vec3_dot(&yaxis, &camera->location); camera->view[11] = 0;
view[14] = -vec3_dot(&zaxis, &camera->location); camera->view[12] = -vec3_dot(&xaxis, &camera->location);
view[15] = 1.0f; camera->view[13] = -vec3_dot(&yaxis, &camera->location);
camera->view[14] = -vec3_dot(&zaxis, &camera->location);
camera->view[15] = 1.0f;
} }
// @performance This function might be optimizable with simd?
// the normalization might also be not required?
void void
camera_view_matrix_rh(const Camera* __restrict camera, f32* __restrict view) camera_view_matrix_rh(Camera* __restrict camera)
{ {
v3_f32 zaxis = { -camera->front.x, -camera->front.y, -camera->front.z }; v3_f32 zaxis = { -camera->front.x, -camera->front.y, -camera->front.z };
@ -321,22 +327,24 @@ camera_view_matrix_rh(const Camera* __restrict camera, f32* __restrict view)
v3_f32 yaxis; v3_f32 yaxis;
vec3_cross(&yaxis, &zaxis, &xaxis); vec3_cross(&yaxis, &zaxis, &xaxis);
view[0] = xaxis.x; // We tested if it would make sense to create a vec3_dot_sse version for the 3 dot products
view[1] = yaxis.x; // The result was that it is not faster, only if we would do 4 dot products would we see an improvement
view[2] = zaxis.x; camera->view[0] = xaxis.x;
view[3] = 0.0f; camera->view[1] = yaxis.x;
view[4] = xaxis.y; camera->view[2] = zaxis.x;
view[5] = yaxis.y; camera->view[3] = 0.0f;
view[6] = zaxis.y; camera->view[4] = xaxis.y;
view[7] = 0.0f; camera->view[5] = yaxis.y;
view[8] = xaxis.z; camera->view[6] = zaxis.y;
view[9] = yaxis.z; camera->view[7] = 0.0f;
view[10] = zaxis.z; camera->view[8] = xaxis.z;
view[11] = 0; camera->view[9] = yaxis.z;
view[12] = -vec3_dot(&xaxis, &camera->location); camera->view[10] = zaxis.z;
view[13] = -vec3_dot(&yaxis, &camera->location); camera->view[11] = 0;
view[14] = -vec3_dot(&zaxis, &camera->location); camera->view[12] = -vec3_dot(&xaxis, &camera->location);
view[15] = 1.0f; camera->view[13] = -vec3_dot(&yaxis, &camera->location);
camera->view[14] = -vec3_dot(&zaxis, &camera->location);
camera->view[15] = 1.0f;
} }
#endif #endif

View File

@ -71,27 +71,27 @@ void vertex_line_create(
y2 -= thickness / 2; y2 -= thickness / 2;
} }
float n1 = -(y2 - y1); f32 n1 = -(y2 - y1);
float n2 = x2 - x1; f32 n2 = x2 - x1;
float n_ = sqrtf(n2 * n2 + n1 * n1); f32 n_ = sqrtf(n2 * n2 + n1 * n1);
float norm1 = n1 / n_; f32 norm1 = n1 / n_;
float norm2 = n2 / n_; f32 norm2 = n2 / n_;
// @todo Currently we always use p1 and never p2 // @todo Currently we always use p1 and never p2
// This is wrong and depends on the Alignment, no? Maybe not // This is wrong and depends on the Alignment, no? Maybe not
// Calculate both parallel points to the start position // Calculate both parallel points to the start position
float p1_x1 = x1 + thickness * norm1; f32 p1_x1 = x1 + thickness * norm1;
float p1_y1 = y1 + thickness * norm2; f32 p1_y1 = y1 + thickness * norm2;
// float p2_x1 = x1 - thickness * norm1; // f32 p2_x1 = x1 - thickness * norm1;
// float p2_y1 = y1 - thickness * norm2; // f32 p2_y1 = y1 - thickness * norm2;
// Calculate both parallel points to the end position // Calculate both parallel points to the end position
float p1_x2 = x2 + thickness * norm1; f32 p1_x2 = x2 + thickness * norm1;
float p1_y2 = y2 + thickness * norm2; f32 p1_y2 = y2 + thickness * norm2;
// float p2_x2 = x2 - thickness * norm1; // f32 p2_x2 = x2 - thickness * norm1;
// float p2_y2 = y2 - thickness * norm2; // f32 p2_y2 = y2 - thickness * norm2;
vertex_degenerate_create(vertices, index, zindex, x1, y1); vertex_degenerate_create(vertices, index, zindex, x1, y1);
@ -148,6 +148,9 @@ void vertex_rect_create(
vertex_degenerate_create(vertices, index, zindex, x, y); vertex_degenerate_create(vertices, index, zindex, x, y);
f32 y_height = y + height;
f32 x_width = x + width;
// Rectangle // Rectangle
vertices[*index].position.x = x; vertices[*index].position.x = x;
vertices[*index].position.y = y; vertices[*index].position.y = y;
@ -158,14 +161,14 @@ void vertex_rect_create(
++(*index); ++(*index);
vertices[*index].position.x = x; vertices[*index].position.x = x;
vertices[*index].position.y = y + height; vertices[*index].position.y = y_height;
vertices[*index].position.z = zindex; vertices[*index].position.z = zindex;
vertices[*index].tex_coord.x = tex_x1; vertices[*index].tex_coord.x = tex_x1;
vertices[*index].tex_coord.y = tex_y2; vertices[*index].tex_coord.y = tex_y2;
vertices[*index].color = color_index; vertices[*index].color = color_index;
++(*index); ++(*index);
vertices[*index].position.x = x + width; vertices[*index].position.x = x_width;
vertices[*index].position.y = y; vertices[*index].position.y = y;
vertices[*index].position.z = zindex; vertices[*index].position.z = zindex;
vertices[*index].tex_coord.x = tex_x2; vertices[*index].tex_coord.x = tex_x2;
@ -173,8 +176,8 @@ void vertex_rect_create(
vertices[*index].color = color_index; vertices[*index].color = color_index;
++(*index); ++(*index);
vertices[*index].position.x = x + width; vertices[*index].position.x = x_width;
vertices[*index].position.y = y + height; vertices[*index].position.y = y_height;
vertices[*index].position.z = zindex; vertices[*index].position.z = zindex;
vertices[*index].tex_coord.x = tex_x2; vertices[*index].tex_coord.x = tex_x2;
vertices[*index].tex_coord.y = tex_y2; vertices[*index].tex_coord.y = tex_y2;
@ -205,6 +208,11 @@ void vertex_rect_border_create(
// @bug While this works for the whole rectangle it doesn't work for individual borders // @bug While this works for the whole rectangle it doesn't work for individual borders
// @todo We need a version where you can define individual borders // @todo We need a version where you can define individual borders
f32 y_height = y + height;
f32 y_thickness = y + thickness;
f32 x_width = x + width;
f32 x_thickness = x + thickness;
// Rectangle // Rectangle
// Top border // Top border
vertices[*index].position.x = x; vertices[*index].position.x = x;
@ -216,14 +224,14 @@ void vertex_rect_border_create(
++(*index); ++(*index);
vertices[*index].position.x = x; vertices[*index].position.x = x;
vertices[*index].position.y = y + thickness; vertices[*index].position.y = y_thickness;
vertices[*index].position.z = zindex; vertices[*index].position.z = zindex;
vertices[*index].tex_coord.x = tex_x1; vertices[*index].tex_coord.x = tex_x1;
vertices[*index].tex_coord.y = tex_y2; vertices[*index].tex_coord.y = tex_y2;
vertices[*index].color = color_index; vertices[*index].color = color_index;
++(*index); ++(*index);
vertices[*index].position.x = x + width; vertices[*index].position.x = x_width;
vertices[*index].position.y = y; vertices[*index].position.y = y;
vertices[*index].position.z = zindex; vertices[*index].position.z = zindex;
vertices[*index].tex_coord.x = tex_x2; vertices[*index].tex_coord.x = tex_x2;
@ -231,8 +239,8 @@ void vertex_rect_border_create(
vertices[*index].color = color_index; vertices[*index].color = color_index;
++(*index); ++(*index);
vertices[*index].position.x = x + width; vertices[*index].position.x = x_width;
vertices[*index].position.y = y + thickness; vertices[*index].position.y = y_thickness;
vertices[*index].position.z = zindex; vertices[*index].position.z = zindex;
vertices[*index].tex_coord.x = tex_x2; vertices[*index].tex_coord.x = tex_x2;
vertices[*index].tex_coord.y = tex_y2; vertices[*index].tex_coord.y = tex_y2;
@ -240,24 +248,24 @@ void vertex_rect_border_create(
++(*index); ++(*index);
// Right border // Right border
vertices[*index].position.x = x + width - thickness; vertices[*index].position.x = x_width - thickness;
vertices[*index].position.y = y + thickness; vertices[*index].position.y = y_thickness;
vertices[*index].position.z = zindex; vertices[*index].position.z = zindex;
vertices[*index].tex_coord.x = tex_x2; vertices[*index].tex_coord.x = tex_x2;
vertices[*index].tex_coord.y = tex_y2; vertices[*index].tex_coord.y = tex_y2;
vertices[*index].color = color_index; vertices[*index].color = color_index;
++(*index); ++(*index);
vertices[*index].position.x = x + width; vertices[*index].position.x = x_width;
vertices[*index].position.y = y + height; vertices[*index].position.y = y_height;
vertices[*index].position.z = zindex; vertices[*index].position.z = zindex;
vertices[*index].tex_coord.x = tex_x1; vertices[*index].tex_coord.x = tex_x1;
vertices[*index].tex_coord.y = tex_y2; vertices[*index].tex_coord.y = tex_y2;
vertices[*index].color = color_index; vertices[*index].color = color_index;
++(*index); ++(*index);
vertices[*index].position.x = x + width - thickness; vertices[*index].position.x = x_width - thickness;
vertices[*index].position.y = y + height; vertices[*index].position.y = y_height;
vertices[*index].position.z = zindex; vertices[*index].position.z = zindex;
vertices[*index].tex_coord.x = tex_x2; vertices[*index].tex_coord.x = tex_x2;
vertices[*index].tex_coord.y = tex_y1; vertices[*index].tex_coord.y = tex_y1;
@ -265,8 +273,8 @@ void vertex_rect_border_create(
++(*index); ++(*index);
// Bottom border // Bottom border
vertices[*index].position.x = x + width - thickness; vertices[*index].position.x = x_width - thickness;
vertices[*index].position.y = y + height - thickness; vertices[*index].position.y = y_height - thickness;
vertices[*index].position.z = zindex; vertices[*index].position.z = zindex;
vertices[*index].tex_coord.x = tex_x2; vertices[*index].tex_coord.x = tex_x2;
vertices[*index].tex_coord.y = tex_y2; vertices[*index].tex_coord.y = tex_y2;
@ -274,7 +282,7 @@ void vertex_rect_border_create(
++(*index); ++(*index);
vertices[*index].position.x = x; vertices[*index].position.x = x;
vertices[*index].position.y = y + height; vertices[*index].position.y = y_height;
vertices[*index].position.z = zindex; vertices[*index].position.z = zindex;
vertices[*index].tex_coord.x = tex_x1; vertices[*index].tex_coord.x = tex_x1;
vertices[*index].tex_coord.y = tex_y2; vertices[*index].tex_coord.y = tex_y2;
@ -282,7 +290,7 @@ void vertex_rect_border_create(
++(*index); ++(*index);
vertices[*index].position.x = x; vertices[*index].position.x = x;
vertices[*index].position.y = y + height - thickness; vertices[*index].position.y = y_height - thickness;
vertices[*index].position.z = zindex; vertices[*index].position.z = zindex;
vertices[*index].tex_coord.x = tex_x2; vertices[*index].tex_coord.x = tex_x2;
vertices[*index].tex_coord.y = tex_y1; vertices[*index].tex_coord.y = tex_y1;
@ -290,8 +298,8 @@ void vertex_rect_border_create(
++(*index); ++(*index);
// Left border // Left border
vertices[*index].position.x = x + thickness; vertices[*index].position.x = x_thickness;
vertices[*index].position.y = y + height - thickness; vertices[*index].position.y = y_height - thickness;
vertices[*index].position.z = zindex; vertices[*index].position.z = zindex;
vertices[*index].tex_coord.x = tex_x2; vertices[*index].tex_coord.x = tex_x2;
vertices[*index].tex_coord.y = tex_y2; vertices[*index].tex_coord.y = tex_y2;
@ -299,15 +307,15 @@ void vertex_rect_border_create(
++(*index); ++(*index);
vertices[*index].position.x = x; vertices[*index].position.x = x;
vertices[*index].position.y = y + thickness; vertices[*index].position.y = y_thickness;
vertices[*index].position.z = zindex; vertices[*index].position.z = zindex;
vertices[*index].tex_coord.x = tex_x1; vertices[*index].tex_coord.x = tex_x1;
vertices[*index].tex_coord.y = tex_y2; vertices[*index].tex_coord.y = tex_y2;
vertices[*index].color = color_index; vertices[*index].color = color_index;
++(*index); ++(*index);
vertices[*index].position.x = x + thickness; vertices[*index].position.x = x_thickness;
vertices[*index].position.y = y + thickness; vertices[*index].position.y = y_thickness;
vertices[*index].position.z = zindex; vertices[*index].position.z = zindex;
vertices[*index].tex_coord.x = tex_x2; vertices[*index].tex_coord.x = tex_x2;
vertices[*index].tex_coord.y = tex_y1; vertices[*index].tex_coord.y = tex_y1;
@ -338,13 +346,14 @@ f32 text_calculate_dimensions_height(
f32 height, f32 height,
const Font* __restrict font, const char* __restrict text, f32 scale, int32 length const Font* __restrict font, const char* __restrict text, f32 scale, int32 length
) { ) {
f32 y = font->line_height * scale; f32 line_height = font->line_height * scale;
f32 y = line_height;
// @todo remember to restrict to width/height if value > 0 -> force width to remain below certain value // @todo remember to restrict to width/height if value > 0 -> force width to remain below certain value
for (int i = 0; i < length; ++i) { for (int32 i = 0; i < length; ++i) {
if (text[i] == '\n') { if (text[i] == '\n') {
y += font->line_height * scale; y += line_height;
} }
} }
@ -363,7 +372,7 @@ f32 text_calculate_dimensions_width(
// @todo remember to restrict to width/height if value > 0 -> force width to remain below certain value // @todo remember to restrict to width/height if value > 0 -> force width to remain below certain value
for (int i = 0; i < length; ++i) { for (int32 i = 0; i < length; ++i) {
int32 character = is_ascii ? text[i] : utf8_get_char_at(text, i); int32 character = is_ascii ? text[i] : utf8_get_char_at(text, i);
if (character == '\n') { if (character == '\n') {
@ -376,13 +385,14 @@ f32 text_calculate_dimensions_width(
Glyph* glyph = NULL; Glyph* glyph = NULL;
// We try to jump t othe correct glyph based on the glyph codepoint // We try to jump t othe correct glyph based on the glyph codepoint
// If that doesn't work we iterate the glyph list BUT only until the last possible match (glyphs must be sorted ascending) // If that doesn't work we iterate the glyph list BUT only until the last possible match (glyphs must be sorted ascending)
if (font->glyph_count > character - first_glyph int32 perfect_glyph_pos = character - first_glyph;
&& font->glyphs[character - first_glyph].codepoint == character if (font->glyph_count > perfect_glyph_pos
&& font->glyphs[perfect_glyph_pos].codepoint == character
) { ) {
glyph = &font->glyphs[character - first_glyph]; glyph = &font->glyphs[perfect_glyph_pos];
} else { } else {
// @performance consider to do binary search // @performance consider to do binary search
for (int j = 0; j <= character - first_glyph && j < font->glyph_count; ++j) { for (int32 j = 0; j <= perfect_glyph_pos && j < font->glyph_count; ++j) {
if (font->glyphs[j].codepoint == character) { if (font->glyphs[j].codepoint == character) {
glyph = &font->glyphs[j]; glyph = &font->glyphs[j];
@ -406,8 +416,9 @@ void text_calculate_dimensions(
f32* __restrict width, f32* __restrict height, f32* __restrict width, f32* __restrict height,
const Font* __restrict font, const char* __restrict text, bool is_ascii, f32 scale, int32 length const Font* __restrict font, const char* __restrict text, bool is_ascii, f32 scale, int32 length
) { ) {
f32 line_height = font->line_height * scale;
f32 x = 0; f32 x = 0;
f32 y = font->line_height * scale; f32 y = line_height;
f32 offset_x = 0; f32 offset_x = 0;
@ -415,12 +426,12 @@ void text_calculate_dimensions(
// @todo remember to restrict to width/height if value > 0 -> force width to remain below certain value // @todo remember to restrict to width/height if value > 0 -> force width to remain below certain value
for (int i = 0; i < length; ++i) { for (int32 i = 0; i < length; ++i) {
int32 character = is_ascii ? text[i] : utf8_get_char_at(text, i); int32 character = is_ascii ? text[i] : utf8_get_char_at(text, i);
if (character == '\n') { if (character == '\n') {
x = OMS_MAX(x, offset_x); x = OMS_MAX(x, offset_x);
y += font->line_height * scale; y += line_height;
offset_x = 0; offset_x = 0;
@ -430,13 +441,14 @@ void text_calculate_dimensions(
Glyph* glyph = NULL; Glyph* glyph = NULL;
// We try to jump t othe correct glyph based on the glyph codepoint // We try to jump t othe correct glyph based on the glyph codepoint
// If that doesn't work we iterate the glyph list BUT only until the last possible match (glyphs must be sorted ascending) // If that doesn't work we iterate the glyph list BUT only until the last possible match (glyphs must be sorted ascending)
if (font->glyph_count > character - first_glyph int32 perfect_glyph_pos = character - first_glyph;
&& font->glyphs[character - first_glyph].codepoint == character if (font->glyph_count > perfect_glyph_pos
&& font->glyphs[perfect_glyph_pos].codepoint == character
) { ) {
glyph = &font->glyphs[character - first_glyph]; glyph = &font->glyphs[perfect_glyph_pos];
} else { } else {
// @performance consider to do binary search // @performance consider to do binary search
for (int j = 0; j <= character - first_glyph && j < font->glyph_count; ++j) { for (int32 j = 0; j <= perfect_glyph_pos && j < font->glyph_count; ++j) {
if (font->glyphs[j].codepoint == character) { if (font->glyphs[j].codepoint == character) {
glyph = &font->glyphs[j]; glyph = &font->glyphs[j];
@ -465,7 +477,7 @@ f32 vertex_text_create(
) { ) {
int32 length = utf8_strlen(text); int32 length = utf8_strlen(text);
bool is_ascii = strlen(text) == length; bool is_ascii = strlen(text) == length;
float scale = size / font->size; f32 scale = size / font->size;
// If we do a different alignment we need to pre-calculate the width and height // If we do a different alignment we need to pre-calculate the width and height
if (align_h != 0 || align_v != 0) { if (align_h != 0 || align_v != 0) {
@ -493,7 +505,7 @@ f32 vertex_text_create(
uint32 first_glyph = font->glyphs[0].codepoint; uint32 first_glyph = font->glyphs[0].codepoint;
f32 offset_x = x; f32 offset_x = x;
for (int i = 0; i < length; ++i) { for (int32 i = 0; i < length; ++i) {
int32 character = is_ascii ? text[i] : utf8_get_char_at(text, i); int32 character = is_ascii ? text[i] : utf8_get_char_at(text, i);
if (character == '\n') { if (character == '\n') {
y += font->line_height * scale; y += font->line_height * scale;
@ -505,13 +517,14 @@ f32 vertex_text_create(
Glyph* glyph = NULL; Glyph* glyph = NULL;
// We try to jump t othe correct glyph based on the glyph codepoint // We try to jump t othe correct glyph based on the glyph codepoint
// If that doesn't work we iterate the glyph list BUT only until the last possible match (glyphs must be sorted ascending) // If that doesn't work we iterate the glyph list BUT only until the last possible match (glyphs must be sorted ascending)
if (font->glyph_count > character - first_glyph int32 perfect_glyph_pos = character - first_glyph;
&& font->glyphs[character - first_glyph].codepoint == character if (font->glyph_count > perfect_glyph_pos
&& font->glyphs[perfect_glyph_pos].codepoint == character
) { ) {
glyph = &font->glyphs[character - first_glyph]; glyph = &font->glyphs[perfect_glyph_pos];
} else { } else {
// @performance consider to do binary search // @performance consider to do binary search
for (int j = 0; j <= character - first_glyph && j < font->glyph_count; ++j) { for (int32 j = 0; j <= perfect_glyph_pos && j < font->glyph_count; ++j) {
if (font->glyphs[j].codepoint == character) { if (font->glyphs[j].codepoint == character) {
glyph = &font->glyphs[j]; glyph = &font->glyphs[j];
@ -602,7 +615,7 @@ f32 ui_text_create(
int32 length = utf8_strlen(text->value_str); int32 length = utf8_strlen(text->value_str);
bool is_ascii = strlen(text->value_str) == length; bool is_ascii = strlen(text->value_str) == length;
float scale = size->value_float / theme->font.size; f32 scale = size->value_float / theme->font.size;
// If we do a different alignment we need to pre-calculate the width and height // If we do a different alignment we need to pre-calculate the width and height
if (align_h != NULL || align_v != NULL) { if (align_h != NULL || align_v != NULL) {
@ -635,7 +648,7 @@ f32 ui_text_create(
int32 start = *index; int32 start = *index;
f32 offset_x = x->value_int; f32 offset_x = x->value_int;
f32 offset_y = y->value_int; f32 offset_y = y->value_int;
for (int i = 0; i < length; ++i) { for (int32 i = 0; i < length; ++i) {
int32 character = is_ascii ? text->value_str[i] : utf8_get_char_at(text->value_str, i); int32 character = is_ascii ? text->value_str[i] : utf8_get_char_at(text->value_str, i);
if (character == '\n') { if (character == '\n') {
@ -648,13 +661,14 @@ f32 ui_text_create(
Glyph* glyph = NULL; Glyph* glyph = NULL;
// We try to jump t othe correct glyph based on the glyph codepoint // We try to jump t othe correct glyph based on the glyph codepoint
// If that doesn't work we iterate the glyph list BUT only until the last possible match (glyphs must be sorted ascending) // If that doesn't work we iterate the glyph list BUT only until the last possible match (glyphs must be sorted ascending)
if (theme->font.glyph_count > character - first_glyph int32 perfect_glyph_pos = character - first_glyph;
&& theme->font.glyphs[character - first_glyph].codepoint == character if (theme->font.glyph_count > perfect_glyph_pos
&& theme->font.glyphs[perfect_glyph_pos].codepoint == character
) { ) {
glyph = &theme->font.glyphs[character - first_glyph]; glyph = &theme->font.glyphs[perfect_glyph_pos];
} else { } else {
// @performance consider to do binary search // @performance consider to do binary search
for (int j = 0; j <= character - first_glyph && j < theme->font.glyph_count; ++j) { for (int32 j = 0; j <= perfect_glyph_pos && j < theme->font.glyph_count; ++j) {
if (theme->font.glyphs[j].codepoint == character) { if (theme->font.glyphs[j].codepoint == character) {
glyph = &theme->font.glyphs[j]; glyph = &theme->font.glyphs[j];

View File

@ -533,16 +533,18 @@ void
input_hotkey_state(Input* input) input_hotkey_state(Input* input)
{ {
uint8 old_hotkeys[MAX_KEY_PRESSES]; uint8 old_hotkeys[MAX_KEY_PRESSES];
memcpy(old_hotkeys, input->state.state_hotkeys, sizeof(uint8) * MAX_KEY_PRESSES); InputState* state = &input->state;
memset(input->state.state_hotkeys, 0, sizeof(uint8) * MAX_KEY_PRESSES); memcpy(old_hotkeys, state->state_hotkeys, sizeof(uint8) * MAX_KEY_PRESSES);
memset(state->state_hotkeys, 0, sizeof(uint8) * MAX_KEY_PRESSES);
int32 active_hotkeys = 0; int32 active_hotkeys = 0;
// Check every key down state // Check every key down state
for (int key_state = 0; key_state < MAX_KEY_STATES; ++key_state) { for (int key_state = 0; key_state < MAX_KEY_STATES; ++key_state) {
if (input->state.state_keys[key_state].key_id == 0 if (state->state_keys[key_state].key_id == 0
|| input->state.state_keys[key_state].key_state == KEY_STATE_RELEASED || state->state_keys[key_state].key_state == KEY_STATE_RELEASED
) { ) {
// no key defined for this down state // no key defined for this down state
continue; continue;
@ -551,7 +553,7 @@ input_hotkey_state(Input* input)
// Is a key defined for this state AND is at least one hotkey defined for this key // Is a key defined for this state AND is at least one hotkey defined for this key
// If no hotkey is defined we don't care // If no hotkey is defined we don't care
// Careful, remember MAX_MOUSE_KEYS offset // Careful, remember MAX_MOUSE_KEYS offset
InputKey* key = &input->state.state_keys[key_state]; InputKey* key = &state->state_keys[key_state];
int32 internal_key_id = (key->key_id & ~(INPUT_KEYBOARD_PREFIX | INPUT_CONTROLLER_PREFIX)) int32 internal_key_id = (key->key_id & ~(INPUT_KEYBOARD_PREFIX | INPUT_CONTROLLER_PREFIX))
+ ((bool) (key->key_id & INPUT_KEYBOARD_PREFIX)) * MAX_MOUSE_KEYS + ((bool) (key->key_id & INPUT_KEYBOARD_PREFIX)) * MAX_MOUSE_KEYS
+ ((bool) (key->key_id & INPUT_CONTROLLER_PREFIX)) * (MAX_MOUSE_KEYS + MAX_KEYBOARD_KEYS); + ((bool) (key->key_id & INPUT_CONTROLLER_PREFIX)) * (MAX_MOUSE_KEYS + MAX_KEYBOARD_KEYS);
@ -589,17 +591,17 @@ input_hotkey_state(Input* input)
// Hotkey already active // Hotkey already active
// @question Do we even need this? This shouldn't happen anyway?! // @question Do we even need this? This shouldn't happen anyway?!
if (hotkey_is_active(input->state.state_hotkeys, hotkeys_for_key[possible_hotkey_idx])) { if (hotkey_is_active(state->state_hotkeys, hotkeys_for_key[possible_hotkey_idx])) {
continue; continue;
} }
// store active hotkey, if it is not already active // store active hotkey, if it is not already active
bool is_pressed = hotkey_keys_are_active(input->state.state_keys, mapping, hotkeys_for_key[possible_hotkey_idx]); bool is_pressed = hotkey_keys_are_active(state->state_keys, mapping, hotkeys_for_key[possible_hotkey_idx]);
if (!is_pressed) { if (!is_pressed) {
continue; continue;
} }
input->state.state_hotkeys[active_hotkeys] = hotkeys_for_key[possible_hotkey_idx]; state->state_hotkeys[active_hotkeys] = hotkeys_for_key[possible_hotkey_idx];
++active_hotkeys; ++active_hotkeys;
// Run callback if defined // Run callback if defined

View File

@ -84,18 +84,17 @@ void update_timing_stat(uint32 stat, const char* function)
{ {
uint64 new_tick_count = __rdtsc(); uint64 new_tick_count = __rdtsc();
debug_container->perf_stats[stat].function = function; TimingStat* timing_stat = &debug_container->perf_stats[stat];
debug_container->perf_stats[stat].delta_tick = new_tick_count - debug_container->perf_stats[stat].old_tick_count; timing_stat->function = function;
debug_container->perf_stats[stat].delta_time = (double) debug_container->perf_stats[stat].delta_tick / (double) debug_container->performance_count_frequency; timing_stat->delta_tick = new_tick_count - timing_stat->old_tick_count;
debug_container->perf_stats[stat].old_tick_count = new_tick_count; timing_stat->delta_time = (double) timing_stat->delta_tick / (double) debug_container->performance_count_frequency;
timing_stat->old_tick_count = new_tick_count;
} }
inline inline
void update_timing_stat_start(uint32 stat, const char*) void update_timing_stat_start(uint32 stat, const char*)
{ {
uint64 new_tick_count = __rdtsc(); debug_container->perf_stats[stat].old_tick_count = __rdtsc();
debug_container->perf_stats[stat].old_tick_count = new_tick_count;
} }
inline inline
@ -103,10 +102,11 @@ void update_timing_stat_end(uint32 stat, const char* function)
{ {
uint64 new_tick_count = __rdtsc(); uint64 new_tick_count = __rdtsc();
debug_container->perf_stats[stat].function = function; TimingStat* timing_stat = &debug_container->perf_stats[stat];
debug_container->perf_stats[stat].delta_tick = new_tick_count - debug_container->perf_stats[stat].old_tick_count; timing_stat->function = function;
debug_container->perf_stats[stat].delta_time = (double) debug_container->perf_stats[stat].delta_tick / (double) debug_container->performance_count_frequency; timing_stat->delta_tick = new_tick_count - timing_stat->old_tick_count;
debug_container->perf_stats[stat].old_tick_count = new_tick_count; timing_stat->delta_time = (double) timing_stat->delta_tick / (double) debug_container->performance_count_frequency;
timing_stat->old_tick_count = new_tick_count;
} }
inline inline
@ -114,12 +114,11 @@ void update_timing_stat_end_continued(uint32 stat, const char* function)
{ {
uint64 new_tick_count = __rdtsc(); uint64 new_tick_count = __rdtsc();
debug_container->perf_stats[stat].function = function; TimingStat* timing_stat = &debug_container->perf_stats[stat];
debug_container->perf_stats[stat].delta_tick = debug_container->perf_stats[stat].delta_tick timing_stat->function = function;
+ new_tick_count - debug_container->perf_stats[stat].old_tick_count; timing_stat->delta_tick = timing_stat->delta_tick + new_tick_count - timing_stat->old_tick_count;
debug_container->perf_stats[stat].delta_time = debug_container->perf_stats[stat].delta_time timing_stat->delta_time = timing_stat->delta_time + (double) timing_stat->delta_tick / (double) debug_container->performance_count_frequency;
+ (double) debug_container->perf_stats[stat].delta_tick / (double) debug_container->performance_count_frequency; timing_stat->old_tick_count = new_tick_count;
debug_container->perf_stats[stat].old_tick_count = new_tick_count;
} }
inline inline
@ -172,23 +171,25 @@ void debug_memory_init(uint64 start, uint64 size)
return; return;
} }
if (debug_container->dmc.memory_size <= debug_container->dmc.memory_element_idx) { DebugMemoryContainer* dmc = &debug_container->dmc;
DebugMemory* old = debug_container->dmc.memory_stats; if (dmc->memory_size <= dmc->memory_element_idx) {
DebugMemory* old = dmc->memory_stats;
debug_container->dmc.memory_size += 3; dmc->memory_size += 3;
debug_container->dmc.memory_stats = (DebugMemory *) calloc(debug_container->dmc.memory_size, sizeof(DebugMemory)); dmc->memory_stats = (DebugMemory *) calloc(dmc->memory_size, sizeof(DebugMemory));
if (old) { if (old) {
memcpy(debug_container->dmc.memory_stats, old, (debug_container->dmc.memory_size - 3) * sizeof(DebugMemory)); memcpy(dmc->memory_stats, old, (dmc->memory_size - 3) * sizeof(DebugMemory));
free(old); free(old);
} }
} }
debug_container->dmc.memory_stats[debug_container->dmc.memory_element_idx].start = start; DebugMemory* debug_mem = &dmc->memory_stats[dmc->memory_element_idx];
debug_container->dmc.memory_stats[debug_container->dmc.memory_element_idx].size = size; debug_mem->start = start;
debug_container->dmc.memory_stats[debug_container->dmc.memory_element_idx].usage = 0; debug_mem->size = size;
debug_mem->usage = 0;
++debug_container->dmc.memory_element_idx; ++dmc->memory_element_idx;
} }
void debug_memory_log(uint64 start, uint64 size, int32 type, const char* function) void debug_memory_log(uint64 start, uint64 size, int32 type, const char* function)
@ -206,13 +207,14 @@ void debug_memory_log(uint64 start, uint64 size, int32 type, const char* functio
mem->action_idx = 0; mem->action_idx = 0;
} }
mem->last_action[mem->action_idx].type = type; DebugMemoryRange* dmr = &mem->last_action[mem->action_idx];
mem->last_action[mem->action_idx].start = start - mem->start; dmr->type = type;
mem->last_action[mem->action_idx].size = size; dmr->start = start - mem->start;
dmr->size = size;
// We are using rdtsc since it is faster -> less debugging overhead than using time() // We are using rdtsc since it is faster -> less debugging overhead than using time()
mem->last_action[mem->action_idx].time = __rdtsc(); dmr->time = __rdtsc();
mem->last_action[mem->action_idx].function_name = function; dmr->function_name = function;
++mem->action_idx; ++mem->action_idx;
@ -238,13 +240,14 @@ void debug_memory_reserve(uint64 start, uint64 size, int32 type, const char* fun
mem->reserve_action_idx = 0; mem->reserve_action_idx = 0;
} }
mem->reserve_action[mem->reserve_action_idx].type = type; DebugMemoryRange* dmr = &mem->reserve_action[mem->reserve_action_idx];
mem->reserve_action[mem->reserve_action_idx].start = start - mem->start; dmr->type = type;
mem->reserve_action[mem->reserve_action_idx].size = size; dmr->start = start - mem->start;
dmr->size = size;
// We are using rdtsc since it is faster -> less debugging overhead than using time() // We are using rdtsc since it is faster -> less debugging overhead than using time()
mem->reserve_action[mem->reserve_action_idx].time = __rdtsc(); dmr->time = __rdtsc();
mem->reserve_action[mem->reserve_action_idx].function_name = function; dmr->function_name = function;
++mem->reserve_action_idx; ++mem->reserve_action_idx;
} }
@ -273,29 +276,30 @@ byte* log_get_memory(uint64 size, byte aligned = 1, bool zeroed = false)
return 0; return 0;
} }
ASSERT_SIMPLE(size <= debug_container->log_memory.size); LogMemory* log_mem = &debug_container->log_memory;
ASSERT_SIMPLE(size <= log_mem->size);
if (aligned > 1) { if (aligned > 1) {
uintptr_t address = (uintptr_t) debug_container->log_memory.memory; uintptr_t address = (uintptr_t) log_mem->memory;
debug_container->log_memory.pos += (aligned - ((address + debug_container->log_memory.pos) & (aligned - 1))) % aligned; log_mem->pos += (aligned - ((address + log_mem->pos) & (aligned - 1))) % aligned;
} }
size = ROUND_TO_NEAREST(size, aligned); size = ROUND_TO_NEAREST(size, aligned);
if (debug_container->log_memory.pos + size > debug_container->log_memory.size) { if (log_mem->pos + size > log_mem->size) {
debug_container->log_memory.pos = 0; log_mem->pos = 0;
if (aligned > 1) { if (aligned > 1) {
uintptr_t address = (uintptr_t) debug_container->log_memory.memory; uintptr_t address = (uintptr_t) log_mem->memory;
debug_container->log_memory.pos += (aligned - ((address + debug_container->log_memory.pos) & (aligned - 1))) % aligned; log_mem->pos += (aligned - ((address + log_mem->pos) & (aligned - 1))) % aligned;
} }
} }
byte* offset = (byte *) (debug_container->log_memory.memory + debug_container->log_memory.pos); byte* offset = (byte *) (log_mem->memory + log_mem->pos);
if (zeroed) { if (zeroed) {
memset((void *) offset, 0, size); memset((void *) offset, 0, size);
} }
debug_container->log_memory.pos += size; log_mem->pos += size;
return offset; return offset;
} }

View File

@ -23,6 +23,7 @@
// @todo Implement intrinsic versions! // @todo Implement intrinsic versions!
inline
void vec2_normalize(f32* __restrict x, f32* __restrict y) void vec2_normalize(f32* __restrict x, f32* __restrict y)
{ {
f32 d = sqrtf((*x) * (*x) + (*y) * (*y)); f32 d = sqrtf((*x) * (*x) + (*y) * (*y));
@ -94,6 +95,7 @@ f32 vec2_dot(const v2_f32* a, const v2_f32* b) {
return a->x * b->x + a->y * b->y; return a->x * b->x + a->y * b->y;
} }
inline
void vec3_normalize(f32* __restrict x, f32* __restrict y, f32* __restrict z) void vec3_normalize(f32* __restrict x, f32* __restrict y, f32* __restrict z)
{ {
f32 d = sqrtf((*x) * (*x) + (*y) * (*y) + (*z) * (*z)); f32 d = sqrtf((*x) * (*x) + (*y) * (*y) + (*z) * (*z));
@ -103,6 +105,7 @@ void vec3_normalize(f32* __restrict x, f32* __restrict y, f32* __restrict z)
*z /= d; *z /= d;
} }
inline
void vec3_normalize(v3_f32* vec) void vec3_normalize(v3_f32* vec)
{ {
f32 d = sqrtf(vec->x * vec->x + vec->y * vec->y + vec->z * vec->z); f32 d = sqrtf(vec->x * vec->x + vec->y * vec->y + vec->z * vec->z);
@ -179,6 +182,7 @@ void vec3_cross(v3_f32* __restrict vec, const v3_f32* a, const v3_f32* b) {
vec->z = a->x * b->y - a->y * b->x; vec->z = a->x * b->y - a->y * b->x;
} }
inline
f32 vec3_dot(const v3_f32* a, const v3_f32* b) { f32 vec3_dot(const v3_f32* a, const v3_f32* b) {
return a->x * b->x + a->y * b->y + a->z * b->z; return a->x * b->x + a->y * b->y + a->z * b->z;
} }