diff --git a/camera/Camera.h b/camera/Camera.h index 63401cb..62246b8 100644 --- a/camera/Camera.h +++ b/camera/Camera.h @@ -18,7 +18,6 @@ #define CAMERA_MAX_INPUTS 4 // @todo Please check out if we can switch to quaternions. We tried but failed. -// The functions with a 2 at the end are our current backup solution which shouldn't be used (probably) struct Camera { v3_f32 location; @@ -45,16 +44,17 @@ struct Camera { void camera_update_vectors(Camera* camera) { - camera->front.x = cosf(OMS_DEG2RAD(camera->orientation.x)) * cosf(OMS_DEG2RAD(camera->orientation.y)); + f32 cos_ori_x = cosf(OMS_DEG2RAD(camera->orientation.x)); + camera->front.x = cos_ori_x * cosf(OMS_DEG2RAD(camera->orientation.y)); camera->front.y = sinf(OMS_DEG2RAD(camera->orientation.x)); - camera->front.z = cosf(OMS_DEG2RAD(camera->orientation.x)) * sinf(OMS_DEG2RAD(camera->orientation.y)); - vec3_normalize_f32(&camera->front); + camera->front.z = cos_ori_x * sinf(OMS_DEG2RAD(camera->orientation.y)); + vec3_normalize(&camera->front); vec3_cross(&camera->right, &camera->front, &camera->world_up); - vec3_normalize_f32(&camera->right); + vec3_normalize(&camera->right); vec3_cross(&camera->up, &camera->right, &camera->front); - vec3_normalize_f32(&camera->up); + vec3_normalize(&camera->up); } void camera_rotate(Camera* camera, int32 dx, int32 dy, f32 dt) @@ -137,11 +137,11 @@ void camera_movement(Camera* camera, CameraMovement* movement, f32 dt, bool rela v3_f32 right; vec3_cross(&right, &camera->world_up, &forward); - vec3_normalize_f32(&right); + vec3_normalize(&right); v3_f32 up; vec3_cross(&up, &right, &forward); - vec3_normalize_f32(&up); + vec3_normalize(&up); for (int32 i = 0; i < CAMERA_MAX_INPUTS; i++) { switch(movement[i]) { @@ -275,6 +275,8 @@ void camera_translation_matrix_sparse_lh(const Camera* __restrict camera, f32* t translation[11] = camera->location.z; } +// @performance This function might be optimizable with simd? +// the normalization might also be not required? void camera_view_matrix_lh(const Camera* __restrict camera, f32* __restrict view) { @@ -282,7 +284,7 @@ camera_view_matrix_lh(const Camera* __restrict camera, f32* __restrict view) v3_f32 xaxis; vec3_cross(&xaxis, &camera->world_up, &zaxis); - vec3_normalize_f32(&xaxis); + vec3_normalize(&xaxis); v3_f32 yaxis; vec3_cross(&yaxis, &zaxis, &xaxis); @@ -305,6 +307,8 @@ camera_view_matrix_lh(const Camera* __restrict camera, f32* __restrict view) view[15] = 1.0f; } +// @performance This function might be optimizable with simd? +// the normalization might also be not required? void camera_view_matrix_rh(const Camera* __restrict camera, f32* __restrict view) { @@ -312,7 +316,7 @@ camera_view_matrix_rh(const Camera* __restrict camera, f32* __restrict view) v3_f32 xaxis; vec3_cross(&xaxis, &zaxis, &camera->world_up); - vec3_normalize_f32(&xaxis); + vec3_normalize(&xaxis); v3_f32 yaxis; vec3_cross(&yaxis, &zaxis, &xaxis); diff --git a/input/Input.h b/input/Input.h index dd91f12..030925e 100644 --- a/input/Input.h +++ b/input/Input.h @@ -189,121 +189,119 @@ void input_init(Input* input, uint8 size, void* callback_data, BufferMemory* buf } inline -void input_clean_state(InputState* state) +void input_clean_state(InputKey* state_keys) { for (int32 i = 0; i < MAX_KEY_STATES; ++i) { - if (state->state_keys[i].key_state == KEY_STATE_RELEASED) { - state->state_keys[i].key_id = 0; + if (state_keys[i].key_state == KEY_STATE_RELEASED) { + state_keys[i].key_id = 0; } } } inline -bool input_action_exists(const InputState* state, int16 key) +bool input_action_exists(const InputKey* state_keys, int16 key) { - return state->state_keys[0].key_id == key - || state->state_keys[1].key_id == key - || state->state_keys[2].key_id == key - || state->state_keys[3].key_id == key - || state->state_keys[4].key_id == key - || state->state_keys[4].key_id == key - || state->state_keys[5].key_id == key - || state->state_keys[6].key_id == key - || state->state_keys[7].key_id == key - || state->state_keys[8].key_id == key - || state->state_keys[9].key_id == key; + return state_keys[0].key_id == key + || state_keys[1].key_id == key + || state_keys[2].key_id == key + || state_keys[3].key_id == key + || state_keys[4].key_id == key + || state_keys[4].key_id == key + || state_keys[5].key_id == key + || state_keys[6].key_id == key + || state_keys[7].key_id == key + || state_keys[8].key_id == key + || state_keys[9].key_id == key; } inline -bool input_is_down(const InputState* state, int16 key) +bool input_is_down(const InputKey* state_keys, int16 key) { - return (state->state_keys[0].key_id == key && state->state_keys[0].key_state != KEY_STATE_RELEASED) - || (state->state_keys[1].key_id == key && state->state_keys[1].key_state != KEY_STATE_RELEASED) - || (state->state_keys[2].key_id == key && state->state_keys[2].key_state != KEY_STATE_RELEASED) - || (state->state_keys[3].key_id == key && state->state_keys[3].key_state != KEY_STATE_RELEASED) - || (state->state_keys[4].key_id == key && state->state_keys[4].key_state != KEY_STATE_RELEASED) - || (state->state_keys[4].key_id == key && state->state_keys[4].key_state != KEY_STATE_RELEASED) - || (state->state_keys[5].key_id == key && state->state_keys[5].key_state != KEY_STATE_RELEASED) - || (state->state_keys[6].key_id == key && state->state_keys[6].key_state != KEY_STATE_RELEASED) - || (state->state_keys[7].key_id == key && state->state_keys[7].key_state != KEY_STATE_RELEASED) - || (state->state_keys[8].key_id == key && state->state_keys[8].key_state != KEY_STATE_RELEASED) - || (state->state_keys[9].key_id == key && state->state_keys[9].key_state != KEY_STATE_RELEASED); + return (state_keys[0].key_id == key && state_keys[0].key_state != KEY_STATE_RELEASED) + || (state_keys[1].key_id == key && state_keys[1].key_state != KEY_STATE_RELEASED) + || (state_keys[2].key_id == key && state_keys[2].key_state != KEY_STATE_RELEASED) + || (state_keys[3].key_id == key && state_keys[3].key_state != KEY_STATE_RELEASED) + || (state_keys[4].key_id == key && state_keys[4].key_state != KEY_STATE_RELEASED) + || (state_keys[5].key_id == key && state_keys[5].key_state != KEY_STATE_RELEASED) + || (state_keys[6].key_id == key && state_keys[6].key_state != KEY_STATE_RELEASED) + || (state_keys[7].key_id == key && state_keys[7].key_state != KEY_STATE_RELEASED) + || (state_keys[8].key_id == key && state_keys[8].key_state != KEY_STATE_RELEASED) + || (state_keys[9].key_id == key && state_keys[9].key_state != KEY_STATE_RELEASED); } inline -bool input_is_pressed(const InputState* state, int16 key) +bool input_is_pressed(const InputKey* state_keys, int16 key) { - return (state->state_keys[0].key_id == key && state->state_keys[0].key_state == KEY_STATE_PRESSED) - || (state->state_keys[1].key_id == key && state->state_keys[1].key_state == KEY_STATE_PRESSED) - || (state->state_keys[2].key_id == key && state->state_keys[2].key_state == KEY_STATE_PRESSED) - || (state->state_keys[3].key_id == key && state->state_keys[3].key_state == KEY_STATE_PRESSED) - || (state->state_keys[4].key_id == key && state->state_keys[4].key_state == KEY_STATE_PRESSED) - || (state->state_keys[4].key_id == key && state->state_keys[4].key_state == KEY_STATE_PRESSED) - || (state->state_keys[5].key_id == key && state->state_keys[5].key_state == KEY_STATE_PRESSED) - || (state->state_keys[6].key_id == key && state->state_keys[6].key_state == KEY_STATE_PRESSED) - || (state->state_keys[7].key_id == key && state->state_keys[7].key_state == KEY_STATE_PRESSED) - || (state->state_keys[8].key_id == key && state->state_keys[8].key_state == KEY_STATE_PRESSED) - || (state->state_keys[9].key_id == key && state->state_keys[9].key_state == KEY_STATE_PRESSED); + return (state_keys[0].key_id == key && state_keys[0].key_state == KEY_STATE_PRESSED) + || (state_keys[1].key_id == key && state_keys[1].key_state == KEY_STATE_PRESSED) + || (state_keys[2].key_id == key && state_keys[2].key_state == KEY_STATE_PRESSED) + || (state_keys[3].key_id == key && state_keys[3].key_state == KEY_STATE_PRESSED) + || (state_keys[4].key_id == key && state_keys[4].key_state == KEY_STATE_PRESSED) + || (state_keys[5].key_id == key && state_keys[5].key_state == KEY_STATE_PRESSED) + || (state_keys[6].key_id == key && state_keys[6].key_state == KEY_STATE_PRESSED) + || (state_keys[7].key_id == key && state_keys[7].key_state == KEY_STATE_PRESSED) + || (state_keys[8].key_id == key && state_keys[8].key_state == KEY_STATE_PRESSED) + || (state_keys[9].key_id == key && state_keys[9].key_state == KEY_STATE_PRESSED); } inline -bool input_is_held(const InputState* state, int16 key) +bool input_is_held(const InputKey* state_keys, int16 key) { - return (state->state_keys[0].key_id == key && state->state_keys[0].key_state == KEY_STATE_HELD) - || (state->state_keys[1].key_id == key && state->state_keys[1].key_state == KEY_STATE_HELD) - || (state->state_keys[2].key_id == key && state->state_keys[2].key_state == KEY_STATE_HELD) - || (state->state_keys[3].key_id == key && state->state_keys[3].key_state == KEY_STATE_HELD) - || (state->state_keys[4].key_id == key && state->state_keys[4].key_state == KEY_STATE_HELD) - || (state->state_keys[4].key_id == key && state->state_keys[4].key_state == KEY_STATE_HELD) - || (state->state_keys[5].key_id == key && state->state_keys[5].key_state == KEY_STATE_HELD) - || (state->state_keys[6].key_id == key && state->state_keys[6].key_state == KEY_STATE_HELD) - || (state->state_keys[7].key_id == key && state->state_keys[7].key_state == KEY_STATE_HELD) - || (state->state_keys[8].key_id == key && state->state_keys[8].key_state == KEY_STATE_HELD) - || (state->state_keys[9].key_id == key && state->state_keys[9].key_state == KEY_STATE_HELD); + return (state_keys[0].key_id == key && state_keys[0].key_state == KEY_STATE_HELD) + || (state_keys[1].key_id == key && state_keys[1].key_state == KEY_STATE_HELD) + || (state_keys[2].key_id == key && state_keys[2].key_state == KEY_STATE_HELD) + || (state_keys[3].key_id == key && state_keys[3].key_state == KEY_STATE_HELD) + || (state_keys[4].key_id == key && state_keys[4].key_state == KEY_STATE_HELD) + || (state_keys[4].key_id == key && state_keys[4].key_state == KEY_STATE_HELD) + || (state_keys[5].key_id == key && state_keys[5].key_state == KEY_STATE_HELD) + || (state_keys[6].key_id == key && state_keys[6].key_state == KEY_STATE_HELD) + || (state_keys[7].key_id == key && state_keys[7].key_state == KEY_STATE_HELD) + || (state_keys[8].key_id == key && state_keys[8].key_state == KEY_STATE_HELD) + || (state_keys[9].key_id == key && state_keys[9].key_state == KEY_STATE_HELD); } inline -bool input_is_released(const InputState* state, int16 key) +bool input_is_released(const InputKey* state_keys, int16 key) { - return (state->state_keys[0].key_id == key && state->state_keys[0].key_state == KEY_STATE_RELEASED) - || (state->state_keys[1].key_id == key && state->state_keys[1].key_state == KEY_STATE_RELEASED) - || (state->state_keys[2].key_id == key && state->state_keys[2].key_state == KEY_STATE_RELEASED) - || (state->state_keys[3].key_id == key && state->state_keys[3].key_state == KEY_STATE_RELEASED) - || (state->state_keys[4].key_id == key && state->state_keys[4].key_state == KEY_STATE_RELEASED) - || (state->state_keys[4].key_id == key && state->state_keys[4].key_state == KEY_STATE_RELEASED) - || (state->state_keys[5].key_id == key && state->state_keys[5].key_state == KEY_STATE_RELEASED) - || (state->state_keys[6].key_id == key && state->state_keys[6].key_state == KEY_STATE_RELEASED) - || (state->state_keys[7].key_id == key && state->state_keys[7].key_state == KEY_STATE_RELEASED) - || (state->state_keys[8].key_id == key && state->state_keys[8].key_state == KEY_STATE_RELEASED) - || (state->state_keys[9].key_id == key && state->state_keys[9].key_state == KEY_STATE_RELEASED); + return (state_keys[0].key_id == key && state_keys[0].key_state == KEY_STATE_RELEASED) + || (state_keys[1].key_id == key && state_keys[1].key_state == KEY_STATE_RELEASED) + || (state_keys[2].key_id == key && state_keys[2].key_state == KEY_STATE_RELEASED) + || (state_keys[3].key_id == key && state_keys[3].key_state == KEY_STATE_RELEASED) + || (state_keys[4].key_id == key && state_keys[4].key_state == KEY_STATE_RELEASED) + || (state_keys[4].key_id == key && state_keys[4].key_state == KEY_STATE_RELEASED) + || (state_keys[5].key_id == key && state_keys[5].key_state == KEY_STATE_RELEASED) + || (state_keys[6].key_id == key && state_keys[6].key_state == KEY_STATE_RELEASED) + || (state_keys[7].key_id == key && state_keys[7].key_state == KEY_STATE_RELEASED) + || (state_keys[8].key_id == key && state_keys[8].key_state == KEY_STATE_RELEASED) + || (state_keys[9].key_id == key && state_keys[9].key_state == KEY_STATE_RELEASED); } inline -bool input_was_down(const InputState* state, int16 key) +bool input_was_down(const InputKey* state_keys, int16 key) { - return (state->state_keys[0].key_id == key && state->state_keys[0].key_state == KEY_STATE_RELEASED) - || (state->state_keys[1].key_id == key && state->state_keys[1].key_state == KEY_STATE_RELEASED) - || (state->state_keys[2].key_id == key && state->state_keys[2].key_state == KEY_STATE_RELEASED) - || (state->state_keys[3].key_id == key && state->state_keys[3].key_state == KEY_STATE_RELEASED) - || (state->state_keys[4].key_id == key && state->state_keys[4].key_state == KEY_STATE_RELEASED) - || (state->state_keys[4].key_id == key && state->state_keys[4].key_state == KEY_STATE_RELEASED) - || (state->state_keys[5].key_id == key && state->state_keys[5].key_state == KEY_STATE_RELEASED) - || (state->state_keys[6].key_id == key && state->state_keys[6].key_state == KEY_STATE_RELEASED) - || (state->state_keys[7].key_id == key && state->state_keys[7].key_state == KEY_STATE_RELEASED) - || (state->state_keys[8].key_id == key && state->state_keys[8].key_state == KEY_STATE_RELEASED) - || (state->state_keys[9].key_id == key && state->state_keys[9].key_state == KEY_STATE_RELEASED); + return (state_keys[0].key_id == key && state_keys[0].key_state == KEY_STATE_RELEASED) + || (state_keys[1].key_id == key && state_keys[1].key_state == KEY_STATE_RELEASED) + || (state_keys[2].key_id == key && state_keys[2].key_state == KEY_STATE_RELEASED) + || (state_keys[3].key_id == key && state_keys[3].key_state == KEY_STATE_RELEASED) + || (state_keys[4].key_id == key && state_keys[4].key_state == KEY_STATE_RELEASED) + || (state_keys[4].key_id == key && state_keys[4].key_state == KEY_STATE_RELEASED) + || (state_keys[5].key_id == key && state_keys[5].key_state == KEY_STATE_RELEASED) + || (state_keys[6].key_id == key && state_keys[6].key_state == KEY_STATE_RELEASED) + || (state_keys[7].key_id == key && state_keys[7].key_state == KEY_STATE_RELEASED) + || (state_keys[8].key_id == key && state_keys[8].key_state == KEY_STATE_RELEASED) + || (state_keys[9].key_id == key && state_keys[9].key_state == KEY_STATE_RELEASED); } inline bool inputs_are_down( - const InputState* state, + const InputKey* state_keys, int16 key0, int16 key1 = 0, int16 key2 = 0, int16 key3 = 0, int16 key4 = 0 ) { - return (key0 != 0 && input_is_down(state, key0)) - && (key1 == 0 || input_is_down(state, key1)) - && (key2 == 0 || input_is_down(state, key2)) - && (key3 == 0 || input_is_down(state, key3)) - && (key4 == 0 || input_is_down(state, key4)); + return (key0 != 0 && input_is_down(state_keys, key0)) + && (key1 == 0 || input_is_down(state_keys, key1)) + && (key2 == 0 || input_is_down(state_keys, key2)) + && (key3 == 0 || input_is_down(state_keys, key3)) + && (key4 == 0 || input_is_down(state_keys, key4)); } void input_add_callback(InputMapping* mapping, uint8 hotkey, InputCallback callback) @@ -387,19 +385,19 @@ input_add_hotkey( } inline -bool hotkey_is_active(const InputState* state, uint8 hotkey) +bool hotkey_is_active(const uint8* state_hotkeys, uint8 hotkey) { - return state->state_hotkeys[0] == hotkey - || state->state_hotkeys[1] == hotkey - || state->state_hotkeys[2] == hotkey - || state->state_hotkeys[3] == hotkey - || state->state_hotkeys[4] == hotkey; + return state_hotkeys[0] == hotkey + || state_hotkeys[1] == hotkey + || state_hotkeys[2] == hotkey + || state_hotkeys[3] == hotkey + || state_hotkeys[4] == hotkey; } // similar to hotkey_is_active but instead of just performing a lookup in the input_hotkey_state created results // this is actively checking the current input state (not the hotkey state) inline -bool hotkey_keys_are_active(const InputState* state, const InputMapping* mapping, uint8 hotkey) +bool hotkey_keys_are_active(const InputKey* state_keys, const InputMapping* mapping, uint8 hotkey) { int16 key0 = mapping->hotkeys[(hotkey - 1) * MAX_HOTKEY_COMBINATION]; int16 key1 = mapping->hotkeys[(hotkey - 1) * MAX_HOTKEY_COMBINATION + 1]; @@ -411,32 +409,32 @@ bool hotkey_keys_are_active(const InputState* state, const InputMapping* mapping // Therefore, if a key has a state -> treat it as if active // The code below also allows optional keys which have a negative sign (at least one of the optional keys must be valid) - bool is_active = input_action_exists(state, (int16) OMS_ABS(key0)); + bool is_active = input_action_exists(state_keys, (int16) OMS_ABS(key0)); if ((!is_active && (key0 > 0 || key1 >= 0)) || (is_active && key0 < 0)) { return is_active; } - is_active = input_action_exists(state, (int16) OMS_ABS(key1)); + is_active = input_action_exists(state_keys, (int16) OMS_ABS(key1)); if ((!is_active && (key1 > 0 || key2 >= 0)) || (is_active && key1 < 0)) { return is_active; } - return input_action_exists(state, (int16) OMS_ABS(key2)); + return input_action_exists(state_keys, (int16) OMS_ABS(key2)); } inline -void input_set_state(InputState* state, InputKey* __restrict new_key) +void input_set_state(InputKey* state_keys, InputKey* __restrict new_key) { InputKey* free_state = NULL; bool action_required = true; for (int32 i = 0; i < MAX_KEY_STATES; ++i) { - if (!free_state && state->state_keys[i].key_id == 0) { - free_state = &state->state_keys[i]; - } else if (state->state_keys[i].key_id == new_key->key_id) { - state->state_keys[i].key_state = new_key->key_state; - state->state_keys[i].value += new_key->value; - state->state_keys[i].time = new_key->time; + if (!free_state && state_keys[i].key_id == 0) { + free_state = &state_keys[i]; + } else if (state_keys[i].key_id == new_key->key_id) { + state_keys[i].key_state = new_key->key_state; + state_keys[i].value += new_key->value; + state_keys[i].time = new_key->time; action_required = false; } } @@ -524,7 +522,7 @@ void input_set_controller_state(Input* input, ControllerInput* controller, uint6 if (count > 0) { for (int32 i = 0; i < count; ++i) { - input_set_state(&input->state, &keys[i]); + input_set_state(input->state.state_keys, &keys[i]); } } @@ -566,7 +564,9 @@ input_hotkey_state(Input* input) InputMapping* mapping; if (i == 0) { mapping = &input->input_mapping1; - } else if (input->handle_controller && key->key_id > INPUT_CONTROLLER_PREFIX) { + } else if ((input->handle_controller || input->direct_controller) + && key->key_id > INPUT_CONTROLLER_PREFIX + ) { mapping = &input->input_mapping2; } else { continue; @@ -581,7 +581,7 @@ input_hotkey_state(Input* input) // Check every possible hotkey // Since multiple input devices have their own button/key indices whe have to do this weird range handling - for (int possible_hotkey_idx = 0; possible_hotkey_idx < MAX_KEY_TO_HOTKEY; ++possible_hotkey_idx) { + for (int32 possible_hotkey_idx = 0; possible_hotkey_idx < MAX_KEY_TO_HOTKEY; ++possible_hotkey_idx) { // We only support a slimited amount of active hotkeys if (active_hotkeys >= MAX_KEY_PRESSES) { return; @@ -589,12 +589,12 @@ input_hotkey_state(Input* input) // Hotkey already active // @question Do we even need this? This shouldn't happen anyway?! - if (hotkey_is_active(&input->state, hotkeys_for_key[possible_hotkey_idx])) { + if (hotkey_is_active(input->state.state_hotkeys, hotkeys_for_key[possible_hotkey_idx])) { continue; } // store active hotkey, if it is not already active - bool is_pressed = hotkey_keys_are_active(&input->state, mapping, hotkeys_for_key[possible_hotkey_idx]); + bool is_pressed = hotkey_keys_are_active(input->state.state_keys, mapping, hotkeys_for_key[possible_hotkey_idx]); if (!is_pressed) { continue; } diff --git a/log/Debug.cpp b/log/Debug.cpp index aeabce5..f8d5ca1 100644 --- a/log/Debug.cpp +++ b/log/Debug.cpp @@ -215,7 +215,12 @@ void debug_memory_log(uint64 start, uint64 size, int32 type, const char* functio mem->last_action[mem->action_idx].function_name = function; ++mem->action_idx; - mem->usage += size * type; + + if (type < 0 && mem->usage < size * -type) { + mem->usage = 0; + } else { + mem->usage += size * type; + } } void debug_memory_reserve(uint64 start, uint64 size, int32 type, const char* function) diff --git a/math/PerlinNoise.h b/math/PerlinNoise.h index c4cb769..7adba48 100644 --- a/math/PerlinNoise.h +++ b/math/PerlinNoise.h @@ -1,8 +1,13 @@ #include #include -#include "../stdlib/Intrinsics.h" #include "Animation.h" +#if ARM + #include "../stdlib/IntrinsicsArm.h" +#else + #include "../stdlib/Intrinsics.h" +#endif + double fade(double t) { return t * t * t * (t * (t * 6 - 15) + 10); } diff --git a/math/matrix/MatrixFloat32.h b/math/matrix/MatrixFloat32.h index 6d69a3a..1f3a6b5 100644 --- a/math/matrix/MatrixFloat32.h +++ b/math/matrix/MatrixFloat32.h @@ -12,13 +12,18 @@ #include #include #include -#include "../../stdlib/Intrinsics.h" #include "../../utils/MathUtils.h" #include "../../utils/TestUtils.h" +#if ARM + #include "../../stdlib/IntrinsicsArm.h" +#else + #include "../../stdlib/Intrinsics.h" +#endif + // @todo Implement intrinsic versions! -void vec2_normalize_f32(f32* __restrict x, f32* __restrict y) +void vec2_normalize(f32* __restrict x, f32* __restrict y) { f32 d = sqrtf((*x) * (*x) + (*y) * (*y)); @@ -89,7 +94,7 @@ f32 vec2_dot(const v2_f32* a, const v2_f32* b) { return a->x * b->x + a->y * b->y; } -void vec3_normalize_f32(f32* __restrict x, f32* __restrict y, f32* __restrict z) +void vec3_normalize(f32* __restrict x, f32* __restrict y, f32* __restrict z) { f32 d = sqrtf((*x) * (*x) + (*y) * (*y) + (*z) * (*z)); @@ -98,7 +103,7 @@ void vec3_normalize_f32(f32* __restrict x, f32* __restrict y, f32* __restrict z) *z /= d; } -void vec3_normalize_f32(v3_f32* vec) +void vec3_normalize(v3_f32* vec) { f32 d = sqrtf(vec->x * vec->x + vec->y * vec->y + vec->z * vec->z); @@ -178,7 +183,7 @@ f32 vec3_dot(const v3_f32* a, const v3_f32* b) { return a->x * b->x + a->y * b->y + a->z * b->z; } -void vec4_normalize_f32(f32* __restrict x, f32* __restrict y, f32* __restrict z, f32* __restrict w) +void vec4_normalize(f32* __restrict x, f32* __restrict y, f32* __restrict z, f32* __restrict w) { f32 d = sqrtf((*x) * (*x) + (*y) * (*y) + (*z) * (*z) + (*w) * (*w)); diff --git a/math/matrix/MatrixInt32.h b/math/matrix/MatrixInt32.h index 814d394..78d9662 100644 --- a/math/matrix/MatrixInt32.h +++ b/math/matrix/MatrixInt32.h @@ -9,7 +9,12 @@ #ifndef TOS_MATH_MATRIX_INT32_H #define TOS_MATH_MATRIX_INT32_H -#include "../../stdlib/Intrinsics.h" #include "../../utils/MathUtils.h" +#if ARM + #include "../../stdlib/IntrinsicsArm.h" +#else + #include "../../stdlib/Intrinsics.h" +#endif + #endif \ No newline at end of file diff --git a/math/matrix/MatrixInt64.h b/math/matrix/MatrixInt64.h index 784535e..e983447 100644 --- a/math/matrix/MatrixInt64.h +++ b/math/matrix/MatrixInt64.h @@ -9,9 +9,14 @@ #ifndef TOS_MATH_MATRIX_INT64_H #define TOS_MATH_MATRIX_INT64_H -#include "../../stdlib/Intrinsics.h" #include "../../utils/MathUtils.h" +#if ARM + #include "../../stdlib/IntrinsicsArm.h" +#else + #include "../../stdlib/Intrinsics.h" +#endif + #endif // Remarks: sizes for the second matrix/vector are often implied by the first parameter and the rules for matrix/vector diff --git a/math/matrix/QuaternionFloat32.h b/math/matrix/QuaternionFloat32.h index c577554..a7337fd 100644 --- a/math/matrix/QuaternionFloat32.h +++ b/math/matrix/QuaternionFloat32.h @@ -10,9 +10,15 @@ #ifndef TOS_MATH_MATRIX_QUATERNION_FLOAT32_H #define TOS_MATH_MATRIX_QUATERNION_FLOAT32_H -#include "../../stdlib/Intrinsics.h" #include "../../utils/MathUtils.h" #include "../../utils/TestUtils.h" +#include "MatrixFloat32.h" + +#if ARM + #include "../../stdlib/IntrinsicsArm.h" +#else + #include "../../stdlib/Intrinsics.h" +#endif // @todo Remove unused functions there are a lot (AFTER you implemented quaternion handling in the camera) diff --git a/math/matrix/VectorFloat32.h b/math/matrix/VectorFloat32.h index 9f34188..a96363d 100644 --- a/math/matrix/VectorFloat32.h +++ b/math/matrix/VectorFloat32.h @@ -9,7 +9,6 @@ #ifndef TOS_MATH_MATRIX_VECTOR_FLOAT32_H #define TOS_MATH_MATRIX_VECTOR_FLOAT32_H -#include "../../stdlib/Intrinsics.h" #include "../../utils/MathUtils.h" #include "../../stdlib/simd/SIMD_F32.h" diff --git a/math/matrix/VectorFloat64.h b/math/matrix/VectorFloat64.h index 7130ece..ca1bdd2 100644 --- a/math/matrix/VectorFloat64.h +++ b/math/matrix/VectorFloat64.h @@ -9,7 +9,6 @@ #ifndef TOS_MATH_MATRIX_VECTOR_FLOAT64_H #define TOS_MATH_MATRIX_VECTOR_FLOAT64_H -#include "../../stdlib/Intrinsics.h" #include "../../utils/MathUtils.h" #include "../../stdlib/simd/SIMD_F64.h" diff --git a/math/matrix/VectorInt32.h b/math/matrix/VectorInt32.h index 90045b0..e4699de 100644 --- a/math/matrix/VectorInt32.h +++ b/math/matrix/VectorInt32.h @@ -12,7 +12,6 @@ #include #include -#include "../../stdlib/Intrinsics.h" #include "../../utils/MathUtils.h" #include "../../stdlib/simd/SIMD_I32.h" diff --git a/math/matrix/VectorInt64.h b/math/matrix/VectorInt64.h index e12c06d..2a987a7 100644 --- a/math/matrix/VectorInt64.h +++ b/math/matrix/VectorInt64.h @@ -12,7 +12,6 @@ #include #include -#include "../../stdlib/Intrinsics.h" #include "../../utils/MathUtils.h" #include "../../stdlib/simd/SIMD_I64.h" diff --git a/models/settings/setting_types.h b/models/settings/setting_types.h index 8cc8f22..694dfe7 100644 --- a/models/settings/setting_types.h +++ b/models/settings/setting_types.h @@ -71,7 +71,7 @@ #define SETTING_UI_VISIBILITY_FPS 1 #define SETTING_UI_VISIBILITY_APM 2 -#define SETTING_UI_VISIBILITY__ 4 +#define SETTING_UI_VISIBILITY_NET_GRAPH 4 #define SETTING_UI_VISIBILITY___ 8 #define SETTING_UI_VISIBILITY_HOTKEYS 16 #define SETTING_UI_VISIBILITY_XP_BAR 32 @@ -80,9 +80,9 @@ #define SETTING_UI_VISIBILITY_CHAT 256 #define SETTING_UI_VISIBILITY_CLOCK 512 #define SETTING_UI_VISIBILITY_SUBTITLES 1024 -#define SETTING_UI_VISIBILITY_BAR 1024 -#define SETTING_UI_VISIBILITY_HEALTH 2048 -#define SETTING_UI_VISIBILITY_RESOURCE 4096 +#define SETTING_UI_VISIBILITY_BAR 2048 +#define SETTING_UI_VISIBILITY_HEALTH 4096 +#define SETTING_UI_VISIBILITY_RESOURCE 8192 #define SETTING_UI_VISIBILITY_INFO 8192 // = e.g. quest info #define SETTING_GAME_VISIBILITY_BAR_SELF 1 diff --git a/network/NetworkOSWrapper.h b/network/NetworkOSWrapper.h deleted file mode 100644 index 05e9352..0000000 --- a/network/NetworkOSWrapper.h +++ /dev/null @@ -1,27 +0,0 @@ -/** - * Jingga - * - * @copyright Jingga - * @license OMS License 2.0 - * @version 1.0.0 - * @link https://jingga.app - */ -#ifndef TOS_NETWORK_OS_WRAPPER_H -#define TOS_NETWORK_OS_WRAPPER_H - -#if _WIN32 - #include - #include - - inline - int close(SOCKET sock) { - return closesocket(sock); - } - - inline - void sleep(unsigned long time) { - Sleep(time); - } -#endif - -#endif \ No newline at end of file diff --git a/network/SocketConnection.h b/network/SocketConnection.h index 31560eb..ce836cc 100644 --- a/network/SocketConnection.h +++ b/network/SocketConnection.h @@ -13,7 +13,7 @@ #if _WIN32 #include - #include + #include #else #include #include diff --git a/network/packet/OMSPacket.h b/network/packet/OMSPacket.h index 24e5bac..ea20a5a 100644 --- a/network/packet/OMSPacket.h +++ b/network/packet/OMSPacket.h @@ -10,7 +10,8 @@ #include "PacketHeader.h" #if _WIN32 - #include + #include + #include #elif __linux__ #include #include diff --git a/network/packet/PacketCache.h b/network/packet/PacketCache.h index 9717100..3964c9d 100644 --- a/network/packet/PacketCache.h +++ b/network/packet/PacketCache.h @@ -14,7 +14,7 @@ #if _WIN32 #include - #include + #include #include #else #include diff --git a/network/packet/PacketHeader.h b/network/packet/PacketHeader.h index c7f2635..f04d701 100644 --- a/network/packet/PacketHeader.h +++ b/network/packet/PacketHeader.h @@ -6,8 +6,8 @@ #include "../../stdlib/Types.h" #if _WIN32 - #include - #include + #include + #include #include #elif __linux__ #include diff --git a/network/packet/UDPPacket.h b/network/packet/UDPPacket.h index aecc2e2..04be133 100644 --- a/network/packet/UDPPacket.h +++ b/network/packet/UDPPacket.h @@ -10,7 +10,7 @@ #include "PacketHeader.h" #if _WIN32 - #include + #include #elif __linux__ #include #include @@ -94,7 +94,7 @@ uint16 packet_udp_create_raw( in6_addr* __restrict ipv6_src, uint16 port_src, in6_addr* __restrict ipv6_dst, uint16 port_dst, uint16 flow, - byte* __restrict data, uint16 data_length + const byte* __restrict data, uint16 data_length ) { // create ipv6 header HeaderIPv6Unpacked* ip6_header = (HeaderIPv6Unpacked *) packet; @@ -113,6 +113,7 @@ uint16 packet_udp_create_raw( udp_header->len = ip6_header->ip6_plen; udp_header->check = 0; + // @performance consider to do the compression right here instead of the memcpy // create payload memcpy(packet + sizeof(HeaderIPv6Unpacked) + sizeof(UDPHeaderIPv6Unpacked), data, data_length); @@ -129,19 +130,11 @@ uint16 packet_udp_create_raw( inline uint16 packet_udp_create( byte* __restrict packet, - uint16 port_src, uint16 port_dst, - byte* __restrict data, uint16 data_length + const byte* __restrict data, uint16 data_length ) { - // create udp header - UDPHeaderIPv6Unpacked* udp_header = (UDPHeaderIPv6Unpacked *) packet; - - udp_header->source = port_src; - udp_header->dest = port_dst; - udp_header->len = SWAP_ENDIAN_BIG((uint16) (sizeof(UDPHeaderIPv6Unpacked) + data_length)); - udp_header->check = 0; - + // @performance consider to do the compression right here instead of the memcpy // create payload - memcpy(packet + sizeof(UDPHeaderIPv6Unpacked), data, data_length); + memcpy(packet, data, data_length); return data_length; } diff --git a/platform/linux/Socket.h b/platform/linux/Socket.h new file mode 100644 index 0000000..afa619f --- /dev/null +++ b/platform/linux/Socket.h @@ -0,0 +1,14 @@ +/** + * Jingga + * + * @copyright Jingga + * @license OMS License 2.0 + * @version 1.0.0 + * @link https://jingga.app + */ +#ifndef TOS_PLATFORM_LINUX_SOCKET_H +#define TOS_PLATFORM_LINUX_SOCKET_H + +#define socket_close close + +#endif \ No newline at end of file diff --git a/platform/linux/SystemInfo.h b/platform/linux/SystemInfo.h index 7f2b288..85197da 100644 --- a/platform/linux/SystemInfo.h +++ b/platform/linux/SystemInfo.h @@ -19,7 +19,6 @@ #include #include - // @todo implement for arm? uint16 system_language_code() diff --git a/platform/win32/Client.h b/platform/win32/Client.h index 4348db3..37e52c3 100644 --- a/platform/win32/Client.h +++ b/platform/win32/Client.h @@ -48,7 +48,7 @@ void socket_client_udp_create(SocketConnection* con, uint16 port = 0) { // Bind socket con->addr.sin6_family = AF_INET6; con->addr.sin6_addr = in6addr_any; - con->addr.sin6_port = port; // 0 = OS decides the port + con->addr.sin6_port = SWAP_ENDIAN_BIG(port); // 0 = OS decides the port if (bind(con->sd, (struct sockaddr*) &con->addr, sizeof(con->addr)) == SOCKET_ERROR) { closesocket(con->sd); diff --git a/platform/win32/Socket.h b/platform/win32/Socket.h new file mode 100644 index 0000000..472adde --- /dev/null +++ b/platform/win32/Socket.h @@ -0,0 +1,14 @@ +/** + * Jingga + * + * @copyright Jingga + * @license OMS License 2.0 + * @version 1.0.0 + * @link https://jingga.app + */ +#ifndef TOS_PLATFORM_WIN32_SOCKET_H +#define TOS_PLATFORM_WIN32_SOCKET_H + +#define socket_close closesocket + +#endif \ No newline at end of file diff --git a/platform/win32/SystemInfo.cpp b/platform/win32/SystemInfo.cpp index ce0f986..1b2558a 100644 --- a/platform/win32/SystemInfo.cpp +++ b/platform/win32/SystemInfo.cpp @@ -35,6 +35,7 @@ #pragma comment(lib, "iphlpapi.lib") #pragma comment(lib, "d3d12.lib") #pragma comment(lib, "dxgi.lib") + #pragma comment(lib, "Ws2_32.lib") #endif // @todo implement for arm? diff --git a/platform/win32/input/RawInput.h b/platform/win32/input/RawInput.h index 7e2ca01..8ad575f 100644 --- a/platform/win32/input/RawInput.h +++ b/platform/win32/input/RawInput.h @@ -86,7 +86,7 @@ int rawinput_init_mousekeyboard(HWND hwnd, Input* __restrict states, RingMemory* } } break; case RIM_TYPEKEYBOARD: { - if (states[keyboard_found].handle_keyboard != NULL) { + if (states[keyboard_found].handle_keyboard != NULL) { ++keyboard_found; } @@ -219,13 +219,12 @@ void input_mouse_position(HWND hwnd, v2_int32* pos) } } -int32 input_raw_handle(RAWINPUT* __restrict raw, Input* states, int32 state_count, uint64 time) +int32 input_raw_handle(RAWINPUT* __restrict raw, Input* __restrict states, int32 state_count, uint64 time) { int32 input_count = 0; int32 i = 0; if (raw->header.dwType == RIM_TYPEMOUSE) { - // @performance Change so we can directly access the correct state (maybe map handle address to index?) while (i < state_count && states[i].handle_mouse != raw->header.hDevice ) { @@ -270,10 +269,12 @@ int32 input_raw_handle(RAWINPUT* __restrict raw, Input* states, int32 state_coun key.key_state = KEY_STATE_RELEASED; key.key_id = INPUT_MOUSE_BUTTON_5; } else if (raw->data.mouse.usButtonFlags & RI_MOUSE_WHEEL) { + // @bug not working key.key_state = KEY_STATE_RELEASED; key.key_id = INPUT_MOUSE_BUTTON_WHEEL; key.value = (int16) raw->data.mouse.usButtonData; } else if (raw->data.mouse.usButtonFlags & RI_MOUSE_HWHEEL) { + // @bug not working key.key_state = KEY_STATE_RELEASED; key.key_id = INPUT_MOUSE_BUTTON_HWHEEL; key.value = (int16) raw->data.mouse.usButtonData; @@ -281,17 +282,15 @@ int32 input_raw_handle(RAWINPUT* __restrict raw, Input* states, int32 state_coun return 0; } - // @question is mouse wheel really considered a button change? - ++input_count; key.key_id |= INPUT_MOUSE_PREFIX; key.time = time; - input_set_state(&states[i].state, &key); + input_set_state(states[i].state.state_keys, &key); states[i].state_change_button = true; } else if (states[i].mouse_movement) { - // do we want to handle mouse movement for every individual movement, or do we want to pull it + // @question do we want to handle mouse movement for every individual movement, or do we want to pull it if (raw->data.mouse.usFlags & MOUSE_MOVE_ABSOLUTE) { RECT rect; @@ -351,7 +350,7 @@ int32 input_raw_handle(RAWINPUT* __restrict raw, Input* states, int32 state_coun // @todo change to MakeCode instead of VKey InputKey key = {(uint16) (raw->data.keyboard.VKey | INPUT_KEYBOARD_PREFIX), new_state, 0, time}; - input_set_state(&states[i].state, &key); + input_set_state(states[i].state.state_keys, &key); states[i].state_change_button = true; } else if (raw->header.dwType == RIM_TYPEHID && raw->header.dwSize > sizeof(RAWINPUT) @@ -407,7 +406,7 @@ void input_handle(LPARAM lParam, Input* __restrict states, int state_count, Ring input_raw_handle((RAWINPUT *) lpb, states, state_count, time); } -int32 input_handle_buffered(int buffer_size, Input* __restrict states, int state_count, RingMemory* ring, uint64 time) +int32 input_handle_buffered(int32 buffer_size, Input* __restrict states, int state_count, RingMemory* ring, uint64 time) { uint32 cb_size; GetRawInputBuffer(NULL, &cb_size, sizeof(RAWINPUTHEADER)); diff --git a/stdlib/IntrinsicsArm.h b/stdlib/IntrinsicsArm.h new file mode 100644 index 0000000..1b73689 --- /dev/null +++ b/stdlib/IntrinsicsArm.h @@ -0,0 +1,86 @@ +/** + * Jingga + * + * @copyright Jingga + * @license OMS License 2.0 + * @version 1.0.0 + * @link https://jingga.app + */ +#ifndef TOS_STDLIB_INTRINSICS_ARM_H +#define TOS_STDLIB_INTRINSICS_ARM_H + +#include + +inline float oms_sqrt(float a) { + svfloat32_t input = svdup_f32(a); + svfloat32_t result = svsqrt_f32(input); + + return svget1_f32(result); +} + +inline double oms_sqrt(double a) { + svfloat64_t input = svdup_f64(a); + svfloat64_t result = svsqrt_f64(input); + + return svget1_f64(result); +} + +inline float oms_rsqrt(float a) { + svfloat32_t input = svdup_f32(a); + svfloat32_t result = svrsqrte_f32(input); + + return svget1_f32(result); +} + +inline double oms_rsqrt(double a) { + svfloat64_t input = svdup_f64(a); + svfloat64_t result = svrsqrte_f64(input); + + return svget1_f64(result); +} + +inline float oms_round(float a) { + svfloat32_t input = svdup_f32(a); + svfloat32_t result = svrndn_f32(input); + + return svget1_f32(result); +} + +inline uint32_t round_to_int(float a) { + svfloat32_t input = svdup_f32(a); + svint32_t result = svcvtn_f32_s32(input, SVE_32B); + + return svget1_s32(result); +} + +inline float oms_floor(float a) { + svfloat32_t input = svdup_f32(a); + svfloat32_t result = svfloor_f32(input); + + return svget1_f32(result); +} + +inline float oms_ceil(float a) { + svfloat32_t input = svdup_f32(a); + svfloat32_t result = svceil_f32(input); + + return svget1_f32(result); +} + +inline void atomic_increment(int32_t* a, int32_t b) { + __atomic_add_fetch(a, b, __ATOMIC_SEQ_CST); +} + +inline void atomic_increment(int64_t* a, int64_t b) { + __atomic_add_fetch(a, b, __ATOMIC_SEQ_CST); +} + +inline void atomic_decrement(int32_t* a, int32_t b) { + __atomic_sub_fetch(a, b, __ATOMIC_SEQ_CST); +} + +inline void atomic_decrement(int64_t* a, int64_t b) { + __atomic_sub_fetch(a, b, __ATOMIC_SEQ_CST); +} + +#endif \ No newline at end of file diff --git a/stdlib/Types.h b/stdlib/Types.h index 23a4c69..030a06e 100644 --- a/stdlib/Types.h +++ b/stdlib/Types.h @@ -14,11 +14,14 @@ #ifdef _MSC_VER #define PACKED_STRUCT __pragma(pack(push, 1)) #define UNPACKED_STRUCT __pragma(pack(pop)) + typedef SSIZE_T ssize_t; #else #define PACKED_STRUCT __attribute__((__packed__)) #define UNPACKED_STRUCT #endif +#define ARRAY_COUNT(a) (sizeof(a) / sizeof((a)[0])) + typedef int8_t int8; typedef int16_t int16; typedef int32_t int32; diff --git a/stdlib/simd/SIMD_F32.h b/stdlib/simd/SIMD_F32.h index 24a5124..5ad0563 100644 --- a/stdlib/simd/SIMD_F32.h +++ b/stdlib/simd/SIMD_F32.h @@ -17,21 +17,36 @@ struct f32_4 { union { - __m128 s; + #if ARM + svfloat32_t s; + #else + __m128 s; + #endif + f32 v[4]; }; }; struct f32_8 { union { - __m256 s; + #if ARM + svfloat32_t s; + #else + __m256 s; + #endif + f32 v[8]; }; }; struct f32_16 { union { - __m512 s; + #if ARM + svfloat32_t s; + #else + __m512 s; + #endif + f32 v[16]; }; }; diff --git a/stdlib/simd/SIMD_F64.h b/stdlib/simd/SIMD_F64.h index 061093f..145e53c 100644 --- a/stdlib/simd/SIMD_F64.h +++ b/stdlib/simd/SIMD_F64.h @@ -16,21 +16,36 @@ struct f64_2 { union { - __m128 s; + #if ARM + svfloat64_t s; + #else + __m128 s; + #endif + f64 v[2]; }; }; struct f64_4 { union { - __m256 s; + #if ARM + svfloat64_t s; + #else + __m256 s; + #endif + f64 v[4]; }; }; struct f64_8 { union { - __m512 s; + #if ARM + svfloat64_t s; + #else + __m512 s; + #endif + f64 v[8]; }; }; diff --git a/stdlib/simd/SIMD_Helper.h b/stdlib/simd/SIMD_Helper.h index 7358125..4ca8d93 100644 --- a/stdlib/simd/SIMD_Helper.h +++ b/stdlib/simd/SIMD_Helper.h @@ -14,8 +14,26 @@ #include #include "../Types.h" -#ifdef _MSC_VER - #include +// @todo split into platform code for windows and linux + +#if _WIN32 + #include + #include + + #ifdef _MSC_VER + #include + #endif +#elif __linux__ + #include + #include +#endif + +#if ARM + #include +#else + int32 svcntw() { + return 0; + } #endif enum SIMDVersion { @@ -23,149 +41,203 @@ enum SIMDVersion { SIMD_VERSION_128, SIMD_VERSION_256, SIMD_VERSION_512, + SIMD_VERSION_SVE, + SIMD_VERSION_NEON, }; // @todo implement for arm? +inline int32 max_neon_supported() +{ + #if ARM + #if _WIN32 + int cpu_info[4] = {0}; + __cpuid(cpu_info, 0); + + if (cpu_info[3] & (1 << 1)) { + return 1; + } + #else + unsigned int eax, ebx, ecx, edx; + __asm__ volatile ( + "cpuid" + : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx) + : "a"(0) + ); + + if (edx & (1 << 1)) { + return 1; + } + #endif + + return 0; + #else + return 0; + #endif +} + +inline int32 max_sve_supported() +{ + #if ARM + int32 hwcaps = getauxval(AT_HWCAP); + + return (int32) ((bool) (hwcaps & (1 << 19))); + #else + return 0; + #endif +} + inline int32 max_sse_supported() { - #ifdef _MSC_VER - int32 cpuInfo[4] = {-1}; - __cpuid(cpuInfo, 1); // CPUID function 1 - - uint32 ecx = cpuInfo[2]; - uint32 edx = cpuInfo[3]; + #if ARM + return 0; #else - uint32 eax, ebx, ecx, edx; + #ifdef _MSC_VER + int32 cpuInfo[4] = {-1}; + __cpuid(cpuInfo, 1); // CPUID function 1 - eax = 1; // CPUID function 1 - __asm__ __volatile__("cpuid;" - : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx) - : "a"(eax)); + uint32 ecx = cpuInfo[2]; + uint32 edx = cpuInfo[3]; + #else + uint32 eax, ebx, ecx, edx; + + eax = 1; // CPUID function 1 + __asm__ __volatile__("cpuid;" + : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx) + : "a"(eax)); + #endif + + bool sse42_supported = (ecx >> 20) & 1; + if (sse42_supported) { + return 42; + } + + bool sse41_supported = (ecx >> 19) & 1; + if (sse41_supported) { + return 41; + } + + bool sse3_supported = (ecx >> 0) & 1; + if (sse3_supported) { + return 3; + } + + bool sse2_supported = (edx >> 26) & 1; + if (sse2_supported) { + return 2; + } + + return 0; #endif - - bool sse42_supported = (ecx >> 20) & 1; - if (sse42_supported) { - return 42; - } - - bool sse41_supported = (ecx >> 19) & 1; - if (sse41_supported) { - return 41; - } - - bool sse3_supported = (ecx >> 0) & 1; - if (sse3_supported) { - return 3; - } - - bool sse2_supported = (edx >> 26) & 1; - if (sse2_supported) { - return 2; - } - - return 0; } inline -int max_avx256_supported() +int32 max_avx256_supported() { - int32 max_version = 0; - - #ifdef _MSC_VER - int32 cpuInfo[4]; - __cpuid(cpuInfo, 1); - - if ((cpuInfo[2] >> 28) & 1) { - __cpuid(cpuInfo, 7); // Query extended features - - if ((cpuInfo[1] >> 5) & 1) { - max_version = 2; - } - } + #if ARM + return 0; #else - uint32 eax, ebx, ecx, edx; + int32 max_version = 0; - __asm__ __volatile__("cpuid" - : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx) - : "a"(1)); - if ((ecx >> 28) & 1) { - eax = 7; - ecx = 0; - __asm__ __volatile__("cpuid" - : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx) - : "a"(eax), "c"(ecx)); + #ifdef _MSC_VER + int32 cpuInfo[4]; + __cpuid(cpuInfo, 1); - if ((ebx >> 5) & 1) { - max_version = 2; + if ((cpuInfo[2] >> 28) & 1) { + __cpuid(cpuInfo, 7); // Query extended features + + if ((cpuInfo[1] >> 5) & 1) { + max_version = 2; + } } - } - #endif + #else + uint32 eax, ebx, ecx, edx; - return max_version; + __asm__ __volatile__("cpuid" + : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx) + : "a"(1)); + if ((ecx >> 28) & 1) { + eax = 7; + ecx = 0; + __asm__ __volatile__("cpuid" + : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx) + : "a"(eax), "c"(ecx)); + + if ((ebx >> 5) & 1) { + max_version = 2; + } + } + #endif + + return max_version; + #endif } inline -int max_avx512_supported() +int32 max_avx512_supported() { - #ifdef _MSC_VER - int32 cpuInfo[4]; - __cpuid(cpuInfo, 1); - int32 ebx = 0; - - if ((cpuInfo[2] >> 28) & 1) { - __cpuid(cpuInfo, 7); - - ebx = cpuInfo[1]; - } + #if ARM + return 0; #else - uint32 eax, ebx, ecx, edx; + #ifdef _MSC_VER + int32 cpuInfo[4]; + __cpuid(cpuInfo, 1); + int32 ebx = 0; + + if ((cpuInfo[2] >> 28) & 1) { + __cpuid(cpuInfo, 7); + + ebx = cpuInfo[1]; + } + #else + uint32 eax, ebx, ecx, edx; - __asm__ __volatile__("cpuid" - : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx) - : "a"(1)); - if ((ecx >> 28) & 1) { - eax = 7; - ecx = 0; __asm__ __volatile__("cpuid" - : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx) - : "a"(eax), "c"(ecx)); + : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx) + : "a"(1)); + if ((ecx >> 28) & 1) { + eax = 7; + ecx = 0; + __asm__ __volatile__("cpuid" + : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx) + : "a"(eax), "c"(ecx)); + } + #endif + + if ((ebx >> 16) & 1) { + return 1; // AVX-512F } + + if ((ebx >> 17) & 1) { + return 2; // AVX-512DQ + } + + if ((ebx >> 21) & 1) { + return 3; // AVX-512IFMA + } + + if ((ebx >> 26) & 1) { + return 4; // AVX-512PF + } + + if ((ebx >> 27) & 1) { + return 5; // AVX-512ER + } + + if ((ebx >> 28) & 1) { + return 6; // AVX-512CD + } + + if ((ebx >> 30) & 1) { + return 7; // AVX-512BW + } + + if ((ebx >> 31) & 1) { + return 8; // AVX-512VL + } + + return 0; #endif - - if ((ebx >> 16) & 1) { - return 1; // AVX-512F - } - - if ((ebx >> 17) & 1) { - return 2; // AVX-512DQ - } - - if ((ebx >> 21) & 1) { - return 3; // AVX-512IFMA - } - - if ((ebx >> 26) & 1) { - return 4; // AVX-512PF - } - - if ((ebx >> 27) & 1) { - return 5; // AVX-512ER - } - - if ((ebx >> 28) & 1) { - return 6; // AVX-512CD - } - - if ((ebx >> 30) & 1) { - return 7; // AVX-512BW - } - - if ((ebx >> 31) & 1) { - return 8; // AVX-512VL - } - - return 0; } const char AVX512_VERSIONS[8][12] = { @@ -180,32 +252,35 @@ const char AVX512_VERSIONS[8][12] = { }; bool supports_abm() { - bool popcnt_supported; - bool lzcnt_supported; - - #ifdef _MSC_VER - int cpuInfo[4]; - __cpuid(cpuInfo, 0x80000001); - - popcnt_supported = (cpuInfo[2] & (1 << 5)) != 0; - lzcnt_supported = (cpuInfo[1] & (1 << 5)) != 0; + #if ARM + return 0; #else - uint32 eax, ebx, ecx, edx; - eax = 0x80000001; + bool popcnt_supported; + bool lzcnt_supported; - __asm__ __volatile__ ( - "cpuid" - : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx) - : "a"(eax) - ); + #ifdef _MSC_VER + int cpuInfo[4]; + __cpuid(cpuInfo, 0x80000001); - // Check if the ABM (POPCNT and LZCNT) bits are set - popcnt_supported = (ecx & (1 << 5)) != 0; - lzcnt_supported = (ebx & (1 << 5)) != 0; + popcnt_supported = (cpuInfo[2] & (1 << 5)) != 0; + lzcnt_supported = (cpuInfo[1] & (1 << 5)) != 0; + #else + uint32 eax, ebx, ecx, edx; + eax = 0x80000001; + + __asm__ __volatile__ ( + "cpuid" + : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx) + : "a"(eax) + ); + + // Check if the ABM (POPCNT and LZCNT) bits are set + popcnt_supported = (ecx & (1 << 5)) != 0; + lzcnt_supported = (ebx & (1 << 5)) != 0; + #endif + + return popcnt_supported && lzcnt_supported; #endif - - - return popcnt_supported && lzcnt_supported; } #endif \ No newline at end of file diff --git a/stdlib/simd/SIMD_I16.h b/stdlib/simd/SIMD_I16.h index 7b38e2d..a9dc646 100644 --- a/stdlib/simd/SIMD_I16.h +++ b/stdlib/simd/SIMD_I16.h @@ -16,21 +16,36 @@ struct int16_8 { union { - __m128i s; + #if ARM + svint16_t s; + #else + __m128i s; + #endif + int16 v[8]; }; }; struct int16_16 { union { - __m256i s; + #if ARM + svint16_t s; + #else + __m256i s; + #endif + int16 v[16]; }; }; struct int16_32 { union { - __m512i s; + #if ARM + svint16_t s; + #else + __m512i s; + #endif + int16 v[32]; }; }; diff --git a/stdlib/simd/SIMD_I32.h b/stdlib/simd/SIMD_I32.h index 38c55f1..95c9eea 100644 --- a/stdlib/simd/SIMD_I32.h +++ b/stdlib/simd/SIMD_I32.h @@ -24,21 +24,36 @@ struct int32_4 { union { - __m128i s; + #if ARM + svint32_t s; + #else + __m128i s; + #endif + int32 v[4]; }; }; struct int32_8 { union { - __m256i s; + #if ARM + svint32_t s; + #else + __m256i s; + #endif + int32 v[8]; }; }; struct int32_16 { union { - __m512i s; + #if ARM + svint32_t s; + #else + __m512i s; + #endif + int32 v[16]; }; }; diff --git a/stdlib/simd/SIMD_I64.h b/stdlib/simd/SIMD_I64.h index 86368dc..370398e 100644 --- a/stdlib/simd/SIMD_I64.h +++ b/stdlib/simd/SIMD_I64.h @@ -17,21 +17,36 @@ struct int64_2 { union { - __m128i s; + #if ARM + svint64_t s; + #else + __m128i s; + #endif + int64 v[2]; }; }; struct int64_4 { union { - __m256i s; + #if ARM + svint64_t s; + #else + __m256i s; + #endif + int64 v[4]; }; }; struct int64_8 { union { - __m512i s; + #if ARM + svint64_t s; + #else + __m512i s; + #endif + int64 v[8]; }; }; diff --git a/stdlib/simd/SIMD_I8.h b/stdlib/simd/SIMD_I8.h index 7809a40..a3ad406 100644 --- a/stdlib/simd/SIMD_I8.h +++ b/stdlib/simd/SIMD_I8.h @@ -18,21 +18,36 @@ struct int8_16 { union { - __m128i s; + #if ARM + svint8_t s; + #else + __m128i s; + #endif + int8 v[16]; }; }; struct int8_32 { union { - __m256i s; + #if ARM + svint8_t s; + #else + __m256i s; + #endif + int8 v[32]; }; }; struct int8_64 { union { - __m512i s; + #if ARM + svint8_t s; + #else + __m512i s; + #endif + int8 v[64]; }; }; diff --git a/utils/MathUtils.h b/utils/MathUtils.h index 9aa33eb..3aa9b34 100644 --- a/utils/MathUtils.h +++ b/utils/MathUtils.h @@ -10,9 +10,14 @@ #ifndef TOS_UTILS_MATH_UTILS_H #define TOS_UTILS_MATH_UTILS_H -#include "../stdlib/Intrinsics.h" #include +#if ARM + #include "../stdlib/IntrinsicsArm.h" +#else + #include "../stdlib/Intrinsics.h" +#endif + #define OMS_PI 3.14159265358979323846f #define OMS_PI_OVER_TWO (OMS_PI / 2.0f) #define OMS_PI_OVER_FOUR (OMS_PI / 4.0f) diff --git a/utils/StringUtils.h b/utils/StringUtils.h index 616b09f..40c13ec 100644 --- a/utils/StringUtils.h +++ b/utils/StringUtils.h @@ -222,7 +222,7 @@ int32 int_to_str(int64 number, char *str, const char thousands = ',') { str[k] = temp; } - return i - 1; + return i; } inline @@ -301,6 +301,18 @@ str_concat( return src1_length + src2_length; } +inline +void str_concat( + const char* src, size_t src_length, + int64 data, + char* dst +) { + memcpy(dst, src, src_length); + int32 len = int_to_str(data, dst + src_length); + + dst[src_length + len] = '\0'; +} + inline char* strtok(char* str, const char* __restrict delim, char* *key) { char* result; diff --git a/utils/Utils.h b/utils/Utils.h index 69d95b9..8536f15 100644 --- a/utils/Utils.h +++ b/utils/Utils.h @@ -14,8 +14,6 @@ #include "../stdlib/Types.h" -#define ARRAY_COUNT(a) (sizeof(a) / sizeof((a)[0])) - struct FileBody { uint64 size = 0; // doesn't include null termination (same as strlen) byte* content;