diff --git a/camera/Camera.h b/camera/Camera.h index 62246b8..e102f34 100644 --- a/camera/Camera.h +++ b/camera/Camera.h @@ -20,6 +20,8 @@ // @todo Please check out if we can switch to quaternions. We tried but failed. struct Camera { + bool is_changed; + v3_f32 location; v4_f32 orientation; @@ -39,6 +41,8 @@ struct Camera { f32 znear; f32 zfar; f32 aspect; + + f32 view[16]; }; void @@ -48,17 +52,19 @@ camera_update_vectors(Camera* camera) camera->front.x = cos_ori_x * cosf(OMS_DEG2RAD(camera->orientation.y)); camera->front.y = sinf(OMS_DEG2RAD(camera->orientation.x)); camera->front.z = cos_ori_x * sinf(OMS_DEG2RAD(camera->orientation.y)); - vec3_normalize(&camera->front); vec3_cross(&camera->right, &camera->front, &camera->world_up); - vec3_normalize(&camera->right); - vec3_cross(&camera->up, &camera->right, &camera->front); + + // We checked if combining these 3 into a single SIMD function, but it was slower + vec3_normalize(&camera->right); + vec3_normalize(&camera->front); vec3_normalize(&camera->up); } void camera_rotate(Camera* camera, int32 dx, int32 dy, f32 dt) { + camera->is_changed = true; camera->orientation.x += dy * camera->sensitivity; camera->orientation.y -= dx * camera->sensitivity; @@ -82,6 +88,7 @@ void camera_rotate(Camera* camera, int32 dx, int32 dy, f32 dt) // you can have up to 4 camera movement inputs at the same time void camera_movement(Camera* camera, CameraMovement* movement, f32 dt, bool relative_to_world = true) { + camera->is_changed = true; f32 velocity = camera->speed * dt; if (relative_to_world) { @@ -137,10 +144,11 @@ void camera_movement(Camera* camera, CameraMovement* movement, f32 dt, bool rela v3_f32 right; vec3_cross(&right, &camera->world_up, &forward); - vec3_normalize(&right); v3_f32 up; vec3_cross(&up, &right, &forward); + + vec3_normalize(&right); vec3_normalize(&up); for (int32 i = 0; i < CAMERA_MAX_INPUTS; i++) { @@ -275,10 +283,8 @@ void camera_translation_matrix_sparse_lh(const Camera* __restrict camera, f32* t translation[11] = camera->location.z; } -// @performance This function might be optimizable with simd? -// the normalization might also be not required? void -camera_view_matrix_lh(const Camera* __restrict camera, f32* __restrict view) +camera_view_matrix_lh(Camera* __restrict camera) { v3_f32 zaxis = { camera->front.x, camera->front.y, camera->front.z }; @@ -289,28 +295,28 @@ camera_view_matrix_lh(const Camera* __restrict camera, f32* __restrict view) v3_f32 yaxis; vec3_cross(&yaxis, &zaxis, &xaxis); - view[0] = xaxis.x; - view[1] = yaxis.x; - view[2] = zaxis.x; - view[3] = 0.0f; - view[4] = xaxis.y; - view[5] = yaxis.y; - view[6] = zaxis.y; - view[7] = 0.0f; - view[8] = xaxis.z; - view[9] = yaxis.z; - view[10] = zaxis.z; - view[11] = 0; - view[12] = -vec3_dot(&xaxis, &camera->location); - view[13] = -vec3_dot(&yaxis, &camera->location); - view[14] = -vec3_dot(&zaxis, &camera->location); - view[15] = 1.0f; + // We tested if it would make sense to create a vec3_dot_sse version for the 3 dot products + // The result was that it is not faster, only if we would do 4 dot products would we see an improvement + camera->view[0] = xaxis.x; + camera->view[1] = yaxis.x; + camera->view[2] = zaxis.x; + camera->view[3] = 0.0f; + camera->view[4] = xaxis.y; + camera->view[5] = yaxis.y; + camera->view[6] = zaxis.y; + camera->view[7] = 0.0f; + camera->view[8] = xaxis.z; + camera->view[9] = yaxis.z; + camera->view[10] = zaxis.z; + camera->view[11] = 0; + camera->view[12] = -vec3_dot(&xaxis, &camera->location); + camera->view[13] = -vec3_dot(&yaxis, &camera->location); + camera->view[14] = -vec3_dot(&zaxis, &camera->location); + camera->view[15] = 1.0f; } -// @performance This function might be optimizable with simd? -// the normalization might also be not required? void -camera_view_matrix_rh(const Camera* __restrict camera, f32* __restrict view) +camera_view_matrix_rh(Camera* __restrict camera) { v3_f32 zaxis = { -camera->front.x, -camera->front.y, -camera->front.z }; @@ -321,22 +327,24 @@ camera_view_matrix_rh(const Camera* __restrict camera, f32* __restrict view) v3_f32 yaxis; vec3_cross(&yaxis, &zaxis, &xaxis); - view[0] = xaxis.x; - view[1] = yaxis.x; - view[2] = zaxis.x; - view[3] = 0.0f; - view[4] = xaxis.y; - view[5] = yaxis.y; - view[6] = zaxis.y; - view[7] = 0.0f; - view[8] = xaxis.z; - view[9] = yaxis.z; - view[10] = zaxis.z; - view[11] = 0; - view[12] = -vec3_dot(&xaxis, &camera->location); - view[13] = -vec3_dot(&yaxis, &camera->location); - view[14] = -vec3_dot(&zaxis, &camera->location); - view[15] = 1.0f; + // We tested if it would make sense to create a vec3_dot_sse version for the 3 dot products + // The result was that it is not faster, only if we would do 4 dot products would we see an improvement + camera->view[0] = xaxis.x; + camera->view[1] = yaxis.x; + camera->view[2] = zaxis.x; + camera->view[3] = 0.0f; + camera->view[4] = xaxis.y; + camera->view[5] = yaxis.y; + camera->view[6] = zaxis.y; + camera->view[7] = 0.0f; + camera->view[8] = xaxis.z; + camera->view[9] = yaxis.z; + camera->view[10] = zaxis.z; + camera->view[11] = 0; + camera->view[12] = -vec3_dot(&xaxis, &camera->location); + camera->view[13] = -vec3_dot(&yaxis, &camera->location); + camera->view[14] = -vec3_dot(&zaxis, &camera->location); + camera->view[15] = 1.0f; } #endif \ No newline at end of file diff --git a/gpuapi/RenderUtils.h b/gpuapi/RenderUtils.h index 02c7005..8a643cf 100644 --- a/gpuapi/RenderUtils.h +++ b/gpuapi/RenderUtils.h @@ -71,27 +71,27 @@ void vertex_line_create( y2 -= thickness / 2; } - float n1 = -(y2 - y1); - float n2 = x2 - x1; - float n_ = sqrtf(n2 * n2 + n1 * n1); - float norm1 = n1 / n_; - float norm2 = n2 / n_; + f32 n1 = -(y2 - y1); + f32 n2 = x2 - x1; + f32 n_ = sqrtf(n2 * n2 + n1 * n1); + f32 norm1 = n1 / n_; + f32 norm2 = n2 / n_; // @todo Currently we always use p1 and never p2 // This is wrong and depends on the Alignment, no? Maybe not // Calculate both parallel points to the start position - float p1_x1 = x1 + thickness * norm1; - float p1_y1 = y1 + thickness * norm2; + f32 p1_x1 = x1 + thickness * norm1; + f32 p1_y1 = y1 + thickness * norm2; - // float p2_x1 = x1 - thickness * norm1; - // float p2_y1 = y1 - thickness * norm2; + // f32 p2_x1 = x1 - thickness * norm1; + // f32 p2_y1 = y1 - thickness * norm2; // Calculate both parallel points to the end position - float p1_x2 = x2 + thickness * norm1; - float p1_y2 = y2 + thickness * norm2; + f32 p1_x2 = x2 + thickness * norm1; + f32 p1_y2 = y2 + thickness * norm2; - // float p2_x2 = x2 - thickness * norm1; - // float p2_y2 = y2 - thickness * norm2; + // f32 p2_x2 = x2 - thickness * norm1; + // f32 p2_y2 = y2 - thickness * norm2; vertex_degenerate_create(vertices, index, zindex, x1, y1); @@ -148,6 +148,9 @@ void vertex_rect_create( vertex_degenerate_create(vertices, index, zindex, x, y); + f32 y_height = y + height; + f32 x_width = x + width; + // Rectangle vertices[*index].position.x = x; vertices[*index].position.y = y; @@ -158,14 +161,14 @@ void vertex_rect_create( ++(*index); vertices[*index].position.x = x; - vertices[*index].position.y = y + height; + vertices[*index].position.y = y_height; vertices[*index].position.z = zindex; vertices[*index].tex_coord.x = tex_x1; vertices[*index].tex_coord.y = tex_y2; vertices[*index].color = color_index; ++(*index); - vertices[*index].position.x = x + width; + vertices[*index].position.x = x_width; vertices[*index].position.y = y; vertices[*index].position.z = zindex; vertices[*index].tex_coord.x = tex_x2; @@ -173,8 +176,8 @@ void vertex_rect_create( vertices[*index].color = color_index; ++(*index); - vertices[*index].position.x = x + width; - vertices[*index].position.y = y + height; + vertices[*index].position.x = x_width; + vertices[*index].position.y = y_height; vertices[*index].position.z = zindex; vertices[*index].tex_coord.x = tex_x2; vertices[*index].tex_coord.y = tex_y2; @@ -205,6 +208,11 @@ void vertex_rect_border_create( // @bug While this works for the whole rectangle it doesn't work for individual borders // @todo We need a version where you can define individual borders + f32 y_height = y + height; + f32 y_thickness = y + thickness; + f32 x_width = x + width; + f32 x_thickness = x + thickness; + // Rectangle // Top border vertices[*index].position.x = x; @@ -216,14 +224,14 @@ void vertex_rect_border_create( ++(*index); vertices[*index].position.x = x; - vertices[*index].position.y = y + thickness; + vertices[*index].position.y = y_thickness; vertices[*index].position.z = zindex; vertices[*index].tex_coord.x = tex_x1; vertices[*index].tex_coord.y = tex_y2; vertices[*index].color = color_index; ++(*index); - vertices[*index].position.x = x + width; + vertices[*index].position.x = x_width; vertices[*index].position.y = y; vertices[*index].position.z = zindex; vertices[*index].tex_coord.x = tex_x2; @@ -231,8 +239,8 @@ void vertex_rect_border_create( vertices[*index].color = color_index; ++(*index); - vertices[*index].position.x = x + width; - vertices[*index].position.y = y + thickness; + vertices[*index].position.x = x_width; + vertices[*index].position.y = y_thickness; vertices[*index].position.z = zindex; vertices[*index].tex_coord.x = tex_x2; vertices[*index].tex_coord.y = tex_y2; @@ -240,24 +248,24 @@ void vertex_rect_border_create( ++(*index); // Right border - vertices[*index].position.x = x + width - thickness; - vertices[*index].position.y = y + thickness; + vertices[*index].position.x = x_width - thickness; + vertices[*index].position.y = y_thickness; vertices[*index].position.z = zindex; vertices[*index].tex_coord.x = tex_x2; vertices[*index].tex_coord.y = tex_y2; vertices[*index].color = color_index; ++(*index); - vertices[*index].position.x = x + width; - vertices[*index].position.y = y + height; + vertices[*index].position.x = x_width; + vertices[*index].position.y = y_height; vertices[*index].position.z = zindex; vertices[*index].tex_coord.x = tex_x1; vertices[*index].tex_coord.y = tex_y2; vertices[*index].color = color_index; ++(*index); - vertices[*index].position.x = x + width - thickness; - vertices[*index].position.y = y + height; + vertices[*index].position.x = x_width - thickness; + vertices[*index].position.y = y_height; vertices[*index].position.z = zindex; vertices[*index].tex_coord.x = tex_x2; vertices[*index].tex_coord.y = tex_y1; @@ -265,8 +273,8 @@ void vertex_rect_border_create( ++(*index); // Bottom border - vertices[*index].position.x = x + width - thickness; - vertices[*index].position.y = y + height - thickness; + vertices[*index].position.x = x_width - thickness; + vertices[*index].position.y = y_height - thickness; vertices[*index].position.z = zindex; vertices[*index].tex_coord.x = tex_x2; vertices[*index].tex_coord.y = tex_y2; @@ -274,7 +282,7 @@ void vertex_rect_border_create( ++(*index); vertices[*index].position.x = x; - vertices[*index].position.y = y + height; + vertices[*index].position.y = y_height; vertices[*index].position.z = zindex; vertices[*index].tex_coord.x = tex_x1; vertices[*index].tex_coord.y = tex_y2; @@ -282,7 +290,7 @@ void vertex_rect_border_create( ++(*index); vertices[*index].position.x = x; - vertices[*index].position.y = y + height - thickness; + vertices[*index].position.y = y_height - thickness; vertices[*index].position.z = zindex; vertices[*index].tex_coord.x = tex_x2; vertices[*index].tex_coord.y = tex_y1; @@ -290,8 +298,8 @@ void vertex_rect_border_create( ++(*index); // Left border - vertices[*index].position.x = x + thickness; - vertices[*index].position.y = y + height - thickness; + vertices[*index].position.x = x_thickness; + vertices[*index].position.y = y_height - thickness; vertices[*index].position.z = zindex; vertices[*index].tex_coord.x = tex_x2; vertices[*index].tex_coord.y = tex_y2; @@ -299,15 +307,15 @@ void vertex_rect_border_create( ++(*index); vertices[*index].position.x = x; - vertices[*index].position.y = y + thickness; + vertices[*index].position.y = y_thickness; vertices[*index].position.z = zindex; vertices[*index].tex_coord.x = tex_x1; vertices[*index].tex_coord.y = tex_y2; vertices[*index].color = color_index; ++(*index); - vertices[*index].position.x = x + thickness; - vertices[*index].position.y = y + thickness; + vertices[*index].position.x = x_thickness; + vertices[*index].position.y = y_thickness; vertices[*index].position.z = zindex; vertices[*index].tex_coord.x = tex_x2; vertices[*index].tex_coord.y = tex_y1; @@ -338,13 +346,14 @@ f32 text_calculate_dimensions_height( f32 height, const Font* __restrict font, const char* __restrict text, f32 scale, int32 length ) { - f32 y = font->line_height * scale; + f32 line_height = font->line_height * scale; + f32 y = line_height; // @todo remember to restrict to width/height if value > 0 -> force width to remain below certain value - for (int i = 0; i < length; ++i) { + for (int32 i = 0; i < length; ++i) { if (text[i] == '\n') { - y += font->line_height * scale; + y += line_height; } } @@ -363,7 +372,7 @@ f32 text_calculate_dimensions_width( // @todo remember to restrict to width/height if value > 0 -> force width to remain below certain value - for (int i = 0; i < length; ++i) { + for (int32 i = 0; i < length; ++i) { int32 character = is_ascii ? text[i] : utf8_get_char_at(text, i); if (character == '\n') { @@ -376,13 +385,14 @@ f32 text_calculate_dimensions_width( Glyph* glyph = NULL; // We try to jump t othe correct glyph based on the glyph codepoint // If that doesn't work we iterate the glyph list BUT only until the last possible match (glyphs must be sorted ascending) - if (font->glyph_count > character - first_glyph - && font->glyphs[character - first_glyph].codepoint == character + int32 perfect_glyph_pos = character - first_glyph; + if (font->glyph_count > perfect_glyph_pos + && font->glyphs[perfect_glyph_pos].codepoint == character ) { - glyph = &font->glyphs[character - first_glyph]; + glyph = &font->glyphs[perfect_glyph_pos]; } else { // @performance consider to do binary search - for (int j = 0; j <= character - first_glyph && j < font->glyph_count; ++j) { + for (int32 j = 0; j <= perfect_glyph_pos && j < font->glyph_count; ++j) { if (font->glyphs[j].codepoint == character) { glyph = &font->glyphs[j]; @@ -406,8 +416,9 @@ void text_calculate_dimensions( f32* __restrict width, f32* __restrict height, const Font* __restrict font, const char* __restrict text, bool is_ascii, f32 scale, int32 length ) { + f32 line_height = font->line_height * scale; f32 x = 0; - f32 y = font->line_height * scale; + f32 y = line_height; f32 offset_x = 0; @@ -415,12 +426,12 @@ void text_calculate_dimensions( // @todo remember to restrict to width/height if value > 0 -> force width to remain below certain value - for (int i = 0; i < length; ++i) { + for (int32 i = 0; i < length; ++i) { int32 character = is_ascii ? text[i] : utf8_get_char_at(text, i); if (character == '\n') { x = OMS_MAX(x, offset_x); - y += font->line_height * scale; + y += line_height; offset_x = 0; @@ -430,13 +441,14 @@ void text_calculate_dimensions( Glyph* glyph = NULL; // We try to jump t othe correct glyph based on the glyph codepoint // If that doesn't work we iterate the glyph list BUT only until the last possible match (glyphs must be sorted ascending) - if (font->glyph_count > character - first_glyph - && font->glyphs[character - first_glyph].codepoint == character + int32 perfect_glyph_pos = character - first_glyph; + if (font->glyph_count > perfect_glyph_pos + && font->glyphs[perfect_glyph_pos].codepoint == character ) { - glyph = &font->glyphs[character - first_glyph]; + glyph = &font->glyphs[perfect_glyph_pos]; } else { // @performance consider to do binary search - for (int j = 0; j <= character - first_glyph && j < font->glyph_count; ++j) { + for (int32 j = 0; j <= perfect_glyph_pos && j < font->glyph_count; ++j) { if (font->glyphs[j].codepoint == character) { glyph = &font->glyphs[j]; @@ -465,7 +477,7 @@ f32 vertex_text_create( ) { int32 length = utf8_strlen(text); bool is_ascii = strlen(text) == length; - float scale = size / font->size; + f32 scale = size / font->size; // If we do a different alignment we need to pre-calculate the width and height if (align_h != 0 || align_v != 0) { @@ -493,7 +505,7 @@ f32 vertex_text_create( uint32 first_glyph = font->glyphs[0].codepoint; f32 offset_x = x; - for (int i = 0; i < length; ++i) { + for (int32 i = 0; i < length; ++i) { int32 character = is_ascii ? text[i] : utf8_get_char_at(text, i); if (character == '\n') { y += font->line_height * scale; @@ -505,13 +517,14 @@ f32 vertex_text_create( Glyph* glyph = NULL; // We try to jump t othe correct glyph based on the glyph codepoint // If that doesn't work we iterate the glyph list BUT only until the last possible match (glyphs must be sorted ascending) - if (font->glyph_count > character - first_glyph - && font->glyphs[character - first_glyph].codepoint == character + int32 perfect_glyph_pos = character - first_glyph; + if (font->glyph_count > perfect_glyph_pos + && font->glyphs[perfect_glyph_pos].codepoint == character ) { - glyph = &font->glyphs[character - first_glyph]; + glyph = &font->glyphs[perfect_glyph_pos]; } else { // @performance consider to do binary search - for (int j = 0; j <= character - first_glyph && j < font->glyph_count; ++j) { + for (int32 j = 0; j <= perfect_glyph_pos && j < font->glyph_count; ++j) { if (font->glyphs[j].codepoint == character) { glyph = &font->glyphs[j]; @@ -602,7 +615,7 @@ f32 ui_text_create( int32 length = utf8_strlen(text->value_str); bool is_ascii = strlen(text->value_str) == length; - float scale = size->value_float / theme->font.size; + f32 scale = size->value_float / theme->font.size; // If we do a different alignment we need to pre-calculate the width and height if (align_h != NULL || align_v != NULL) { @@ -635,7 +648,7 @@ f32 ui_text_create( int32 start = *index; f32 offset_x = x->value_int; f32 offset_y = y->value_int; - for (int i = 0; i < length; ++i) { + for (int32 i = 0; i < length; ++i) { int32 character = is_ascii ? text->value_str[i] : utf8_get_char_at(text->value_str, i); if (character == '\n') { @@ -648,13 +661,14 @@ f32 ui_text_create( Glyph* glyph = NULL; // We try to jump t othe correct glyph based on the glyph codepoint // If that doesn't work we iterate the glyph list BUT only until the last possible match (glyphs must be sorted ascending) - if (theme->font.glyph_count > character - first_glyph - && theme->font.glyphs[character - first_glyph].codepoint == character + int32 perfect_glyph_pos = character - first_glyph; + if (theme->font.glyph_count > perfect_glyph_pos + && theme->font.glyphs[perfect_glyph_pos].codepoint == character ) { - glyph = &theme->font.glyphs[character - first_glyph]; + glyph = &theme->font.glyphs[perfect_glyph_pos]; } else { // @performance consider to do binary search - for (int j = 0; j <= character - first_glyph && j < theme->font.glyph_count; ++j) { + for (int32 j = 0; j <= perfect_glyph_pos && j < theme->font.glyph_count; ++j) { if (theme->font.glyphs[j].codepoint == character) { glyph = &theme->font.glyphs[j]; diff --git a/input/Input.h b/input/Input.h index 030925e..7bb66a7 100644 --- a/input/Input.h +++ b/input/Input.h @@ -533,16 +533,18 @@ void input_hotkey_state(Input* input) { uint8 old_hotkeys[MAX_KEY_PRESSES]; - memcpy(old_hotkeys, input->state.state_hotkeys, sizeof(uint8) * MAX_KEY_PRESSES); + InputState* state = &input->state; - memset(input->state.state_hotkeys, 0, sizeof(uint8) * MAX_KEY_PRESSES); + memcpy(old_hotkeys, state->state_hotkeys, sizeof(uint8) * MAX_KEY_PRESSES); + + memset(state->state_hotkeys, 0, sizeof(uint8) * MAX_KEY_PRESSES); int32 active_hotkeys = 0; // Check every key down state for (int key_state = 0; key_state < MAX_KEY_STATES; ++key_state) { - if (input->state.state_keys[key_state].key_id == 0 - || input->state.state_keys[key_state].key_state == KEY_STATE_RELEASED + if (state->state_keys[key_state].key_id == 0 + || state->state_keys[key_state].key_state == KEY_STATE_RELEASED ) { // no key defined for this down state continue; @@ -551,7 +553,7 @@ input_hotkey_state(Input* input) // Is a key defined for this state AND is at least one hotkey defined for this key // If no hotkey is defined we don't care // Careful, remember MAX_MOUSE_KEYS offset - InputKey* key = &input->state.state_keys[key_state]; + InputKey* key = &state->state_keys[key_state]; int32 internal_key_id = (key->key_id & ~(INPUT_KEYBOARD_PREFIX | INPUT_CONTROLLER_PREFIX)) + ((bool) (key->key_id & INPUT_KEYBOARD_PREFIX)) * MAX_MOUSE_KEYS + ((bool) (key->key_id & INPUT_CONTROLLER_PREFIX)) * (MAX_MOUSE_KEYS + MAX_KEYBOARD_KEYS); @@ -589,17 +591,17 @@ input_hotkey_state(Input* input) // Hotkey already active // @question Do we even need this? This shouldn't happen anyway?! - if (hotkey_is_active(input->state.state_hotkeys, hotkeys_for_key[possible_hotkey_idx])) { + if (hotkey_is_active(state->state_hotkeys, hotkeys_for_key[possible_hotkey_idx])) { continue; } // store active hotkey, if it is not already active - bool is_pressed = hotkey_keys_are_active(input->state.state_keys, mapping, hotkeys_for_key[possible_hotkey_idx]); + bool is_pressed = hotkey_keys_are_active(state->state_keys, mapping, hotkeys_for_key[possible_hotkey_idx]); if (!is_pressed) { continue; } - input->state.state_hotkeys[active_hotkeys] = hotkeys_for_key[possible_hotkey_idx]; + state->state_hotkeys[active_hotkeys] = hotkeys_for_key[possible_hotkey_idx]; ++active_hotkeys; // Run callback if defined diff --git a/log/Debug.cpp b/log/Debug.cpp index f8d5ca1..af4f191 100644 --- a/log/Debug.cpp +++ b/log/Debug.cpp @@ -84,18 +84,17 @@ void update_timing_stat(uint32 stat, const char* function) { uint64 new_tick_count = __rdtsc(); - debug_container->perf_stats[stat].function = function; - debug_container->perf_stats[stat].delta_tick = new_tick_count - debug_container->perf_stats[stat].old_tick_count; - debug_container->perf_stats[stat].delta_time = (double) debug_container->perf_stats[stat].delta_tick / (double) debug_container->performance_count_frequency; - debug_container->perf_stats[stat].old_tick_count = new_tick_count; + TimingStat* timing_stat = &debug_container->perf_stats[stat]; + timing_stat->function = function; + timing_stat->delta_tick = new_tick_count - timing_stat->old_tick_count; + timing_stat->delta_time = (double) timing_stat->delta_tick / (double) debug_container->performance_count_frequency; + timing_stat->old_tick_count = new_tick_count; } inline void update_timing_stat_start(uint32 stat, const char*) { - uint64 new_tick_count = __rdtsc(); - - debug_container->perf_stats[stat].old_tick_count = new_tick_count; + debug_container->perf_stats[stat].old_tick_count = __rdtsc(); } inline @@ -103,10 +102,11 @@ void update_timing_stat_end(uint32 stat, const char* function) { uint64 new_tick_count = __rdtsc(); - debug_container->perf_stats[stat].function = function; - debug_container->perf_stats[stat].delta_tick = new_tick_count - debug_container->perf_stats[stat].old_tick_count; - debug_container->perf_stats[stat].delta_time = (double) debug_container->perf_stats[stat].delta_tick / (double) debug_container->performance_count_frequency; - debug_container->perf_stats[stat].old_tick_count = new_tick_count; + TimingStat* timing_stat = &debug_container->perf_stats[stat]; + timing_stat->function = function; + timing_stat->delta_tick = new_tick_count - timing_stat->old_tick_count; + timing_stat->delta_time = (double) timing_stat->delta_tick / (double) debug_container->performance_count_frequency; + timing_stat->old_tick_count = new_tick_count; } inline @@ -114,12 +114,11 @@ void update_timing_stat_end_continued(uint32 stat, const char* function) { uint64 new_tick_count = __rdtsc(); - debug_container->perf_stats[stat].function = function; - debug_container->perf_stats[stat].delta_tick = debug_container->perf_stats[stat].delta_tick - + new_tick_count - debug_container->perf_stats[stat].old_tick_count; - debug_container->perf_stats[stat].delta_time = debug_container->perf_stats[stat].delta_time - + (double) debug_container->perf_stats[stat].delta_tick / (double) debug_container->performance_count_frequency; - debug_container->perf_stats[stat].old_tick_count = new_tick_count; + TimingStat* timing_stat = &debug_container->perf_stats[stat]; + timing_stat->function = function; + timing_stat->delta_tick = timing_stat->delta_tick + new_tick_count - timing_stat->old_tick_count; + timing_stat->delta_time = timing_stat->delta_time + (double) timing_stat->delta_tick / (double) debug_container->performance_count_frequency; + timing_stat->old_tick_count = new_tick_count; } inline @@ -172,23 +171,25 @@ void debug_memory_init(uint64 start, uint64 size) return; } - if (debug_container->dmc.memory_size <= debug_container->dmc.memory_element_idx) { - DebugMemory* old = debug_container->dmc.memory_stats; + DebugMemoryContainer* dmc = &debug_container->dmc; + if (dmc->memory_size <= dmc->memory_element_idx) { + DebugMemory* old = dmc->memory_stats; - debug_container->dmc.memory_size += 3; - debug_container->dmc.memory_stats = (DebugMemory *) calloc(debug_container->dmc.memory_size, sizeof(DebugMemory)); + dmc->memory_size += 3; + dmc->memory_stats = (DebugMemory *) calloc(dmc->memory_size, sizeof(DebugMemory)); if (old) { - memcpy(debug_container->dmc.memory_stats, old, (debug_container->dmc.memory_size - 3) * sizeof(DebugMemory)); + memcpy(dmc->memory_stats, old, (dmc->memory_size - 3) * sizeof(DebugMemory)); free(old); } } - debug_container->dmc.memory_stats[debug_container->dmc.memory_element_idx].start = start; - debug_container->dmc.memory_stats[debug_container->dmc.memory_element_idx].size = size; - debug_container->dmc.memory_stats[debug_container->dmc.memory_element_idx].usage = 0; + DebugMemory* debug_mem = &dmc->memory_stats[dmc->memory_element_idx]; + debug_mem->start = start; + debug_mem->size = size; + debug_mem->usage = 0; - ++debug_container->dmc.memory_element_idx; + ++dmc->memory_element_idx; } void debug_memory_log(uint64 start, uint64 size, int32 type, const char* function) @@ -206,13 +207,14 @@ void debug_memory_log(uint64 start, uint64 size, int32 type, const char* functio mem->action_idx = 0; } - mem->last_action[mem->action_idx].type = type; - mem->last_action[mem->action_idx].start = start - mem->start; - mem->last_action[mem->action_idx].size = size; + DebugMemoryRange* dmr = &mem->last_action[mem->action_idx]; + dmr->type = type; + dmr->start = start - mem->start; + dmr->size = size; // We are using rdtsc since it is faster -> less debugging overhead than using time() - mem->last_action[mem->action_idx].time = __rdtsc(); - mem->last_action[mem->action_idx].function_name = function; + dmr->time = __rdtsc(); + dmr->function_name = function; ++mem->action_idx; @@ -238,13 +240,14 @@ void debug_memory_reserve(uint64 start, uint64 size, int32 type, const char* fun mem->reserve_action_idx = 0; } - mem->reserve_action[mem->reserve_action_idx].type = type; - mem->reserve_action[mem->reserve_action_idx].start = start - mem->start; - mem->reserve_action[mem->reserve_action_idx].size = size; + DebugMemoryRange* dmr = &mem->reserve_action[mem->reserve_action_idx]; + dmr->type = type; + dmr->start = start - mem->start; + dmr->size = size; // We are using rdtsc since it is faster -> less debugging overhead than using time() - mem->reserve_action[mem->reserve_action_idx].time = __rdtsc(); - mem->reserve_action[mem->reserve_action_idx].function_name = function; + dmr->time = __rdtsc(); + dmr->function_name = function; ++mem->reserve_action_idx; } @@ -273,29 +276,30 @@ byte* log_get_memory(uint64 size, byte aligned = 1, bool zeroed = false) return 0; } - ASSERT_SIMPLE(size <= debug_container->log_memory.size); + LogMemory* log_mem = &debug_container->log_memory; + ASSERT_SIMPLE(size <= log_mem->size); if (aligned > 1) { - uintptr_t address = (uintptr_t) debug_container->log_memory.memory; - debug_container->log_memory.pos += (aligned - ((address + debug_container->log_memory.pos) & (aligned - 1))) % aligned; + uintptr_t address = (uintptr_t) log_mem->memory; + log_mem->pos += (aligned - ((address + log_mem->pos) & (aligned - 1))) % aligned; } size = ROUND_TO_NEAREST(size, aligned); - if (debug_container->log_memory.pos + size > debug_container->log_memory.size) { - debug_container->log_memory.pos = 0; + if (log_mem->pos + size > log_mem->size) { + log_mem->pos = 0; if (aligned > 1) { - uintptr_t address = (uintptr_t) debug_container->log_memory.memory; - debug_container->log_memory.pos += (aligned - ((address + debug_container->log_memory.pos) & (aligned - 1))) % aligned; + uintptr_t address = (uintptr_t) log_mem->memory; + log_mem->pos += (aligned - ((address + log_mem->pos) & (aligned - 1))) % aligned; } } - byte* offset = (byte *) (debug_container->log_memory.memory + debug_container->log_memory.pos); + byte* offset = (byte *) (log_mem->memory + log_mem->pos); if (zeroed) { memset((void *) offset, 0, size); } - debug_container->log_memory.pos += size; + log_mem->pos += size; return offset; } diff --git a/math/matrix/MatrixFloat32.h b/math/matrix/MatrixFloat32.h index 1f3a6b5..12a8fe6 100644 --- a/math/matrix/MatrixFloat32.h +++ b/math/matrix/MatrixFloat32.h @@ -23,6 +23,7 @@ // @todo Implement intrinsic versions! +inline void vec2_normalize(f32* __restrict x, f32* __restrict y) { f32 d = sqrtf((*x) * (*x) + (*y) * (*y)); @@ -94,6 +95,7 @@ f32 vec2_dot(const v2_f32* a, const v2_f32* b) { return a->x * b->x + a->y * b->y; } +inline void vec3_normalize(f32* __restrict x, f32* __restrict y, f32* __restrict z) { f32 d = sqrtf((*x) * (*x) + (*y) * (*y) + (*z) * (*z)); @@ -103,6 +105,7 @@ void vec3_normalize(f32* __restrict x, f32* __restrict y, f32* __restrict z) *z /= d; } +inline void vec3_normalize(v3_f32* vec) { f32 d = sqrtf(vec->x * vec->x + vec->y * vec->y + vec->z * vec->z); @@ -179,6 +182,7 @@ void vec3_cross(v3_f32* __restrict vec, const v3_f32* a, const v3_f32* b) { vec->z = a->x * b->y - a->y * b->x; } +inline f32 vec3_dot(const v3_f32* a, const v3_f32* b) { return a->x * b->x + a->y * b->y + a->z * b->z; }