performance fixes, working. Some socket fixes, server socket fails to receive

This commit is contained in:
Dennis Eichhorn 2024-11-21 15:17:11 +01:00
parent b26db7a2d7
commit ab9ab3fceb
37 changed files with 643 additions and 349 deletions

View File

@ -18,7 +18,6 @@
#define CAMERA_MAX_INPUTS 4 #define CAMERA_MAX_INPUTS 4
// @todo Please check out if we can switch to quaternions. We tried but failed. // @todo Please check out if we can switch to quaternions. We tried but failed.
// The functions with a 2 at the end are our current backup solution which shouldn't be used (probably)
struct Camera { struct Camera {
v3_f32 location; v3_f32 location;
@ -45,16 +44,17 @@ struct Camera {
void void
camera_update_vectors(Camera* camera) camera_update_vectors(Camera* camera)
{ {
camera->front.x = cosf(OMS_DEG2RAD(camera->orientation.x)) * cosf(OMS_DEG2RAD(camera->orientation.y)); f32 cos_ori_x = cosf(OMS_DEG2RAD(camera->orientation.x));
camera->front.x = cos_ori_x * cosf(OMS_DEG2RAD(camera->orientation.y));
camera->front.y = sinf(OMS_DEG2RAD(camera->orientation.x)); camera->front.y = sinf(OMS_DEG2RAD(camera->orientation.x));
camera->front.z = cosf(OMS_DEG2RAD(camera->orientation.x)) * sinf(OMS_DEG2RAD(camera->orientation.y)); camera->front.z = cos_ori_x * sinf(OMS_DEG2RAD(camera->orientation.y));
vec3_normalize_f32(&camera->front); vec3_normalize(&camera->front);
vec3_cross(&camera->right, &camera->front, &camera->world_up); vec3_cross(&camera->right, &camera->front, &camera->world_up);
vec3_normalize_f32(&camera->right); vec3_normalize(&camera->right);
vec3_cross(&camera->up, &camera->right, &camera->front); vec3_cross(&camera->up, &camera->right, &camera->front);
vec3_normalize_f32(&camera->up); vec3_normalize(&camera->up);
} }
void camera_rotate(Camera* camera, int32 dx, int32 dy, f32 dt) void camera_rotate(Camera* camera, int32 dx, int32 dy, f32 dt)
@ -137,11 +137,11 @@ void camera_movement(Camera* camera, CameraMovement* movement, f32 dt, bool rela
v3_f32 right; v3_f32 right;
vec3_cross(&right, &camera->world_up, &forward); vec3_cross(&right, &camera->world_up, &forward);
vec3_normalize_f32(&right); vec3_normalize(&right);
v3_f32 up; v3_f32 up;
vec3_cross(&up, &right, &forward); vec3_cross(&up, &right, &forward);
vec3_normalize_f32(&up); vec3_normalize(&up);
for (int32 i = 0; i < CAMERA_MAX_INPUTS; i++) { for (int32 i = 0; i < CAMERA_MAX_INPUTS; i++) {
switch(movement[i]) { switch(movement[i]) {
@ -275,6 +275,8 @@ void camera_translation_matrix_sparse_lh(const Camera* __restrict camera, f32* t
translation[11] = camera->location.z; translation[11] = camera->location.z;
} }
// @performance This function might be optimizable with simd?
// the normalization might also be not required?
void void
camera_view_matrix_lh(const Camera* __restrict camera, f32* __restrict view) camera_view_matrix_lh(const Camera* __restrict camera, f32* __restrict view)
{ {
@ -282,7 +284,7 @@ camera_view_matrix_lh(const Camera* __restrict camera, f32* __restrict view)
v3_f32 xaxis; v3_f32 xaxis;
vec3_cross(&xaxis, &camera->world_up, &zaxis); vec3_cross(&xaxis, &camera->world_up, &zaxis);
vec3_normalize_f32(&xaxis); vec3_normalize(&xaxis);
v3_f32 yaxis; v3_f32 yaxis;
vec3_cross(&yaxis, &zaxis, &xaxis); vec3_cross(&yaxis, &zaxis, &xaxis);
@ -305,6 +307,8 @@ camera_view_matrix_lh(const Camera* __restrict camera, f32* __restrict view)
view[15] = 1.0f; view[15] = 1.0f;
} }
// @performance This function might be optimizable with simd?
// the normalization might also be not required?
void void
camera_view_matrix_rh(const Camera* __restrict camera, f32* __restrict view) camera_view_matrix_rh(const Camera* __restrict camera, f32* __restrict view)
{ {
@ -312,7 +316,7 @@ camera_view_matrix_rh(const Camera* __restrict camera, f32* __restrict view)
v3_f32 xaxis; v3_f32 xaxis;
vec3_cross(&xaxis, &zaxis, &camera->world_up); vec3_cross(&xaxis, &zaxis, &camera->world_up);
vec3_normalize_f32(&xaxis); vec3_normalize(&xaxis);
v3_f32 yaxis; v3_f32 yaxis;
vec3_cross(&yaxis, &zaxis, &xaxis); vec3_cross(&yaxis, &zaxis, &xaxis);

View File

@ -189,121 +189,119 @@ void input_init(Input* input, uint8 size, void* callback_data, BufferMemory* buf
} }
inline inline
void input_clean_state(InputState* state) void input_clean_state(InputKey* state_keys)
{ {
for (int32 i = 0; i < MAX_KEY_STATES; ++i) { for (int32 i = 0; i < MAX_KEY_STATES; ++i) {
if (state->state_keys[i].key_state == KEY_STATE_RELEASED) { if (state_keys[i].key_state == KEY_STATE_RELEASED) {
state->state_keys[i].key_id = 0; state_keys[i].key_id = 0;
} }
} }
} }
inline inline
bool input_action_exists(const InputState* state, int16 key) bool input_action_exists(const InputKey* state_keys, int16 key)
{ {
return state->state_keys[0].key_id == key return state_keys[0].key_id == key
|| state->state_keys[1].key_id == key || state_keys[1].key_id == key
|| state->state_keys[2].key_id == key || state_keys[2].key_id == key
|| state->state_keys[3].key_id == key || state_keys[3].key_id == key
|| state->state_keys[4].key_id == key || state_keys[4].key_id == key
|| state->state_keys[4].key_id == key || state_keys[4].key_id == key
|| state->state_keys[5].key_id == key || state_keys[5].key_id == key
|| state->state_keys[6].key_id == key || state_keys[6].key_id == key
|| state->state_keys[7].key_id == key || state_keys[7].key_id == key
|| state->state_keys[8].key_id == key || state_keys[8].key_id == key
|| state->state_keys[9].key_id == key; || state_keys[9].key_id == key;
} }
inline inline
bool input_is_down(const InputState* state, int16 key) bool input_is_down(const InputKey* state_keys, int16 key)
{ {
return (state->state_keys[0].key_id == key && state->state_keys[0].key_state != KEY_STATE_RELEASED) return (state_keys[0].key_id == key && state_keys[0].key_state != KEY_STATE_RELEASED)
|| (state->state_keys[1].key_id == key && state->state_keys[1].key_state != KEY_STATE_RELEASED) || (state_keys[1].key_id == key && state_keys[1].key_state != KEY_STATE_RELEASED)
|| (state->state_keys[2].key_id == key && state->state_keys[2].key_state != KEY_STATE_RELEASED) || (state_keys[2].key_id == key && state_keys[2].key_state != KEY_STATE_RELEASED)
|| (state->state_keys[3].key_id == key && state->state_keys[3].key_state != KEY_STATE_RELEASED) || (state_keys[3].key_id == key && state_keys[3].key_state != KEY_STATE_RELEASED)
|| (state->state_keys[4].key_id == key && state->state_keys[4].key_state != KEY_STATE_RELEASED) || (state_keys[4].key_id == key && state_keys[4].key_state != KEY_STATE_RELEASED)
|| (state->state_keys[4].key_id == key && state->state_keys[4].key_state != KEY_STATE_RELEASED) || (state_keys[5].key_id == key && state_keys[5].key_state != KEY_STATE_RELEASED)
|| (state->state_keys[5].key_id == key && state->state_keys[5].key_state != KEY_STATE_RELEASED) || (state_keys[6].key_id == key && state_keys[6].key_state != KEY_STATE_RELEASED)
|| (state->state_keys[6].key_id == key && state->state_keys[6].key_state != KEY_STATE_RELEASED) || (state_keys[7].key_id == key && state_keys[7].key_state != KEY_STATE_RELEASED)
|| (state->state_keys[7].key_id == key && state->state_keys[7].key_state != KEY_STATE_RELEASED) || (state_keys[8].key_id == key && state_keys[8].key_state != KEY_STATE_RELEASED)
|| (state->state_keys[8].key_id == key && state->state_keys[8].key_state != KEY_STATE_RELEASED) || (state_keys[9].key_id == key && state_keys[9].key_state != KEY_STATE_RELEASED);
|| (state->state_keys[9].key_id == key && state->state_keys[9].key_state != KEY_STATE_RELEASED);
} }
inline inline
bool input_is_pressed(const InputState* state, int16 key) bool input_is_pressed(const InputKey* state_keys, int16 key)
{ {
return (state->state_keys[0].key_id == key && state->state_keys[0].key_state == KEY_STATE_PRESSED) return (state_keys[0].key_id == key && state_keys[0].key_state == KEY_STATE_PRESSED)
|| (state->state_keys[1].key_id == key && state->state_keys[1].key_state == KEY_STATE_PRESSED) || (state_keys[1].key_id == key && state_keys[1].key_state == KEY_STATE_PRESSED)
|| (state->state_keys[2].key_id == key && state->state_keys[2].key_state == KEY_STATE_PRESSED) || (state_keys[2].key_id == key && state_keys[2].key_state == KEY_STATE_PRESSED)
|| (state->state_keys[3].key_id == key && state->state_keys[3].key_state == KEY_STATE_PRESSED) || (state_keys[3].key_id == key && state_keys[3].key_state == KEY_STATE_PRESSED)
|| (state->state_keys[4].key_id == key && state->state_keys[4].key_state == KEY_STATE_PRESSED) || (state_keys[4].key_id == key && state_keys[4].key_state == KEY_STATE_PRESSED)
|| (state->state_keys[4].key_id == key && state->state_keys[4].key_state == KEY_STATE_PRESSED) || (state_keys[5].key_id == key && state_keys[5].key_state == KEY_STATE_PRESSED)
|| (state->state_keys[5].key_id == key && state->state_keys[5].key_state == KEY_STATE_PRESSED) || (state_keys[6].key_id == key && state_keys[6].key_state == KEY_STATE_PRESSED)
|| (state->state_keys[6].key_id == key && state->state_keys[6].key_state == KEY_STATE_PRESSED) || (state_keys[7].key_id == key && state_keys[7].key_state == KEY_STATE_PRESSED)
|| (state->state_keys[7].key_id == key && state->state_keys[7].key_state == KEY_STATE_PRESSED) || (state_keys[8].key_id == key && state_keys[8].key_state == KEY_STATE_PRESSED)
|| (state->state_keys[8].key_id == key && state->state_keys[8].key_state == KEY_STATE_PRESSED) || (state_keys[9].key_id == key && state_keys[9].key_state == KEY_STATE_PRESSED);
|| (state->state_keys[9].key_id == key && state->state_keys[9].key_state == KEY_STATE_PRESSED);
} }
inline inline
bool input_is_held(const InputState* state, int16 key) bool input_is_held(const InputKey* state_keys, int16 key)
{ {
return (state->state_keys[0].key_id == key && state->state_keys[0].key_state == KEY_STATE_HELD) return (state_keys[0].key_id == key && state_keys[0].key_state == KEY_STATE_HELD)
|| (state->state_keys[1].key_id == key && state->state_keys[1].key_state == KEY_STATE_HELD) || (state_keys[1].key_id == key && state_keys[1].key_state == KEY_STATE_HELD)
|| (state->state_keys[2].key_id == key && state->state_keys[2].key_state == KEY_STATE_HELD) || (state_keys[2].key_id == key && state_keys[2].key_state == KEY_STATE_HELD)
|| (state->state_keys[3].key_id == key && state->state_keys[3].key_state == KEY_STATE_HELD) || (state_keys[3].key_id == key && state_keys[3].key_state == KEY_STATE_HELD)
|| (state->state_keys[4].key_id == key && state->state_keys[4].key_state == KEY_STATE_HELD) || (state_keys[4].key_id == key && state_keys[4].key_state == KEY_STATE_HELD)
|| (state->state_keys[4].key_id == key && state->state_keys[4].key_state == KEY_STATE_HELD) || (state_keys[4].key_id == key && state_keys[4].key_state == KEY_STATE_HELD)
|| (state->state_keys[5].key_id == key && state->state_keys[5].key_state == KEY_STATE_HELD) || (state_keys[5].key_id == key && state_keys[5].key_state == KEY_STATE_HELD)
|| (state->state_keys[6].key_id == key && state->state_keys[6].key_state == KEY_STATE_HELD) || (state_keys[6].key_id == key && state_keys[6].key_state == KEY_STATE_HELD)
|| (state->state_keys[7].key_id == key && state->state_keys[7].key_state == KEY_STATE_HELD) || (state_keys[7].key_id == key && state_keys[7].key_state == KEY_STATE_HELD)
|| (state->state_keys[8].key_id == key && state->state_keys[8].key_state == KEY_STATE_HELD) || (state_keys[8].key_id == key && state_keys[8].key_state == KEY_STATE_HELD)
|| (state->state_keys[9].key_id == key && state->state_keys[9].key_state == KEY_STATE_HELD); || (state_keys[9].key_id == key && state_keys[9].key_state == KEY_STATE_HELD);
} }
inline inline
bool input_is_released(const InputState* state, int16 key) bool input_is_released(const InputKey* state_keys, int16 key)
{ {
return (state->state_keys[0].key_id == key && state->state_keys[0].key_state == KEY_STATE_RELEASED) return (state_keys[0].key_id == key && state_keys[0].key_state == KEY_STATE_RELEASED)
|| (state->state_keys[1].key_id == key && state->state_keys[1].key_state == KEY_STATE_RELEASED) || (state_keys[1].key_id == key && state_keys[1].key_state == KEY_STATE_RELEASED)
|| (state->state_keys[2].key_id == key && state->state_keys[2].key_state == KEY_STATE_RELEASED) || (state_keys[2].key_id == key && state_keys[2].key_state == KEY_STATE_RELEASED)
|| (state->state_keys[3].key_id == key && state->state_keys[3].key_state == KEY_STATE_RELEASED) || (state_keys[3].key_id == key && state_keys[3].key_state == KEY_STATE_RELEASED)
|| (state->state_keys[4].key_id == key && state->state_keys[4].key_state == KEY_STATE_RELEASED) || (state_keys[4].key_id == key && state_keys[4].key_state == KEY_STATE_RELEASED)
|| (state->state_keys[4].key_id == key && state->state_keys[4].key_state == KEY_STATE_RELEASED) || (state_keys[4].key_id == key && state_keys[4].key_state == KEY_STATE_RELEASED)
|| (state->state_keys[5].key_id == key && state->state_keys[5].key_state == KEY_STATE_RELEASED) || (state_keys[5].key_id == key && state_keys[5].key_state == KEY_STATE_RELEASED)
|| (state->state_keys[6].key_id == key && state->state_keys[6].key_state == KEY_STATE_RELEASED) || (state_keys[6].key_id == key && state_keys[6].key_state == KEY_STATE_RELEASED)
|| (state->state_keys[7].key_id == key && state->state_keys[7].key_state == KEY_STATE_RELEASED) || (state_keys[7].key_id == key && state_keys[7].key_state == KEY_STATE_RELEASED)
|| (state->state_keys[8].key_id == key && state->state_keys[8].key_state == KEY_STATE_RELEASED) || (state_keys[8].key_id == key && state_keys[8].key_state == KEY_STATE_RELEASED)
|| (state->state_keys[9].key_id == key && state->state_keys[9].key_state == KEY_STATE_RELEASED); || (state_keys[9].key_id == key && state_keys[9].key_state == KEY_STATE_RELEASED);
} }
inline inline
bool input_was_down(const InputState* state, int16 key) bool input_was_down(const InputKey* state_keys, int16 key)
{ {
return (state->state_keys[0].key_id == key && state->state_keys[0].key_state == KEY_STATE_RELEASED) return (state_keys[0].key_id == key && state_keys[0].key_state == KEY_STATE_RELEASED)
|| (state->state_keys[1].key_id == key && state->state_keys[1].key_state == KEY_STATE_RELEASED) || (state_keys[1].key_id == key && state_keys[1].key_state == KEY_STATE_RELEASED)
|| (state->state_keys[2].key_id == key && state->state_keys[2].key_state == KEY_STATE_RELEASED) || (state_keys[2].key_id == key && state_keys[2].key_state == KEY_STATE_RELEASED)
|| (state->state_keys[3].key_id == key && state->state_keys[3].key_state == KEY_STATE_RELEASED) || (state_keys[3].key_id == key && state_keys[3].key_state == KEY_STATE_RELEASED)
|| (state->state_keys[4].key_id == key && state->state_keys[4].key_state == KEY_STATE_RELEASED) || (state_keys[4].key_id == key && state_keys[4].key_state == KEY_STATE_RELEASED)
|| (state->state_keys[4].key_id == key && state->state_keys[4].key_state == KEY_STATE_RELEASED) || (state_keys[4].key_id == key && state_keys[4].key_state == KEY_STATE_RELEASED)
|| (state->state_keys[5].key_id == key && state->state_keys[5].key_state == KEY_STATE_RELEASED) || (state_keys[5].key_id == key && state_keys[5].key_state == KEY_STATE_RELEASED)
|| (state->state_keys[6].key_id == key && state->state_keys[6].key_state == KEY_STATE_RELEASED) || (state_keys[6].key_id == key && state_keys[6].key_state == KEY_STATE_RELEASED)
|| (state->state_keys[7].key_id == key && state->state_keys[7].key_state == KEY_STATE_RELEASED) || (state_keys[7].key_id == key && state_keys[7].key_state == KEY_STATE_RELEASED)
|| (state->state_keys[8].key_id == key && state->state_keys[8].key_state == KEY_STATE_RELEASED) || (state_keys[8].key_id == key && state_keys[8].key_state == KEY_STATE_RELEASED)
|| (state->state_keys[9].key_id == key && state->state_keys[9].key_state == KEY_STATE_RELEASED); || (state_keys[9].key_id == key && state_keys[9].key_state == KEY_STATE_RELEASED);
} }
inline inline
bool inputs_are_down( bool inputs_are_down(
const InputState* state, const InputKey* state_keys,
int16 key0, int16 key1 = 0, int16 key2 = 0, int16 key3 = 0, int16 key4 = 0 int16 key0, int16 key1 = 0, int16 key2 = 0, int16 key3 = 0, int16 key4 = 0
) { ) {
return (key0 != 0 && input_is_down(state, key0)) return (key0 != 0 && input_is_down(state_keys, key0))
&& (key1 == 0 || input_is_down(state, key1)) && (key1 == 0 || input_is_down(state_keys, key1))
&& (key2 == 0 || input_is_down(state, key2)) && (key2 == 0 || input_is_down(state_keys, key2))
&& (key3 == 0 || input_is_down(state, key3)) && (key3 == 0 || input_is_down(state_keys, key3))
&& (key4 == 0 || input_is_down(state, key4)); && (key4 == 0 || input_is_down(state_keys, key4));
} }
void input_add_callback(InputMapping* mapping, uint8 hotkey, InputCallback callback) void input_add_callback(InputMapping* mapping, uint8 hotkey, InputCallback callback)
@ -387,19 +385,19 @@ input_add_hotkey(
} }
inline inline
bool hotkey_is_active(const InputState* state, uint8 hotkey) bool hotkey_is_active(const uint8* state_hotkeys, uint8 hotkey)
{ {
return state->state_hotkeys[0] == hotkey return state_hotkeys[0] == hotkey
|| state->state_hotkeys[1] == hotkey || state_hotkeys[1] == hotkey
|| state->state_hotkeys[2] == hotkey || state_hotkeys[2] == hotkey
|| state->state_hotkeys[3] == hotkey || state_hotkeys[3] == hotkey
|| state->state_hotkeys[4] == hotkey; || state_hotkeys[4] == hotkey;
} }
// similar to hotkey_is_active but instead of just performing a lookup in the input_hotkey_state created results // similar to hotkey_is_active but instead of just performing a lookup in the input_hotkey_state created results
// this is actively checking the current input state (not the hotkey state) // this is actively checking the current input state (not the hotkey state)
inline inline
bool hotkey_keys_are_active(const InputState* state, const InputMapping* mapping, uint8 hotkey) bool hotkey_keys_are_active(const InputKey* state_keys, const InputMapping* mapping, uint8 hotkey)
{ {
int16 key0 = mapping->hotkeys[(hotkey - 1) * MAX_HOTKEY_COMBINATION]; int16 key0 = mapping->hotkeys[(hotkey - 1) * MAX_HOTKEY_COMBINATION];
int16 key1 = mapping->hotkeys[(hotkey - 1) * MAX_HOTKEY_COMBINATION + 1]; int16 key1 = mapping->hotkeys[(hotkey - 1) * MAX_HOTKEY_COMBINATION + 1];
@ -411,32 +409,32 @@ bool hotkey_keys_are_active(const InputState* state, const InputMapping* mapping
// Therefore, if a key has a state -> treat it as if active // Therefore, if a key has a state -> treat it as if active
// The code below also allows optional keys which have a negative sign (at least one of the optional keys must be valid) // The code below also allows optional keys which have a negative sign (at least one of the optional keys must be valid)
bool is_active = input_action_exists(state, (int16) OMS_ABS(key0)); bool is_active = input_action_exists(state_keys, (int16) OMS_ABS(key0));
if ((!is_active && (key0 > 0 || key1 >= 0)) || (is_active && key0 < 0)) { if ((!is_active && (key0 > 0 || key1 >= 0)) || (is_active && key0 < 0)) {
return is_active; return is_active;
} }
is_active = input_action_exists(state, (int16) OMS_ABS(key1)); is_active = input_action_exists(state_keys, (int16) OMS_ABS(key1));
if ((!is_active && (key1 > 0 || key2 >= 0)) || (is_active && key1 < 0)) { if ((!is_active && (key1 > 0 || key2 >= 0)) || (is_active && key1 < 0)) {
return is_active; return is_active;
} }
return input_action_exists(state, (int16) OMS_ABS(key2)); return input_action_exists(state_keys, (int16) OMS_ABS(key2));
} }
inline inline
void input_set_state(InputState* state, InputKey* __restrict new_key) void input_set_state(InputKey* state_keys, InputKey* __restrict new_key)
{ {
InputKey* free_state = NULL; InputKey* free_state = NULL;
bool action_required = true; bool action_required = true;
for (int32 i = 0; i < MAX_KEY_STATES; ++i) { for (int32 i = 0; i < MAX_KEY_STATES; ++i) {
if (!free_state && state->state_keys[i].key_id == 0) { if (!free_state && state_keys[i].key_id == 0) {
free_state = &state->state_keys[i]; free_state = &state_keys[i];
} else if (state->state_keys[i].key_id == new_key->key_id) { } else if (state_keys[i].key_id == new_key->key_id) {
state->state_keys[i].key_state = new_key->key_state; state_keys[i].key_state = new_key->key_state;
state->state_keys[i].value += new_key->value; state_keys[i].value += new_key->value;
state->state_keys[i].time = new_key->time; state_keys[i].time = new_key->time;
action_required = false; action_required = false;
} }
} }
@ -524,7 +522,7 @@ void input_set_controller_state(Input* input, ControllerInput* controller, uint6
if (count > 0) { if (count > 0) {
for (int32 i = 0; i < count; ++i) { for (int32 i = 0; i < count; ++i) {
input_set_state(&input->state, &keys[i]); input_set_state(input->state.state_keys, &keys[i]);
} }
} }
@ -566,7 +564,9 @@ input_hotkey_state(Input* input)
InputMapping* mapping; InputMapping* mapping;
if (i == 0) { if (i == 0) {
mapping = &input->input_mapping1; mapping = &input->input_mapping1;
} else if (input->handle_controller && key->key_id > INPUT_CONTROLLER_PREFIX) { } else if ((input->handle_controller || input->direct_controller)
&& key->key_id > INPUT_CONTROLLER_PREFIX
) {
mapping = &input->input_mapping2; mapping = &input->input_mapping2;
} else { } else {
continue; continue;
@ -581,7 +581,7 @@ input_hotkey_state(Input* input)
// Check every possible hotkey // Check every possible hotkey
// Since multiple input devices have their own button/key indices whe have to do this weird range handling // Since multiple input devices have their own button/key indices whe have to do this weird range handling
for (int possible_hotkey_idx = 0; possible_hotkey_idx < MAX_KEY_TO_HOTKEY; ++possible_hotkey_idx) { for (int32 possible_hotkey_idx = 0; possible_hotkey_idx < MAX_KEY_TO_HOTKEY; ++possible_hotkey_idx) {
// We only support a slimited amount of active hotkeys // We only support a slimited amount of active hotkeys
if (active_hotkeys >= MAX_KEY_PRESSES) { if (active_hotkeys >= MAX_KEY_PRESSES) {
return; return;
@ -589,12 +589,12 @@ input_hotkey_state(Input* input)
// Hotkey already active // Hotkey already active
// @question Do we even need this? This shouldn't happen anyway?! // @question Do we even need this? This shouldn't happen anyway?!
if (hotkey_is_active(&input->state, hotkeys_for_key[possible_hotkey_idx])) { if (hotkey_is_active(input->state.state_hotkeys, hotkeys_for_key[possible_hotkey_idx])) {
continue; continue;
} }
// store active hotkey, if it is not already active // store active hotkey, if it is not already active
bool is_pressed = hotkey_keys_are_active(&input->state, mapping, hotkeys_for_key[possible_hotkey_idx]); bool is_pressed = hotkey_keys_are_active(input->state.state_keys, mapping, hotkeys_for_key[possible_hotkey_idx]);
if (!is_pressed) { if (!is_pressed) {
continue; continue;
} }

View File

@ -215,7 +215,12 @@ void debug_memory_log(uint64 start, uint64 size, int32 type, const char* functio
mem->last_action[mem->action_idx].function_name = function; mem->last_action[mem->action_idx].function_name = function;
++mem->action_idx; ++mem->action_idx;
mem->usage += size * type;
if (type < 0 && mem->usage < size * -type) {
mem->usage = 0;
} else {
mem->usage += size * type;
}
} }
void debug_memory_reserve(uint64 start, uint64 size, int32 type, const char* function) void debug_memory_reserve(uint64 start, uint64 size, int32 type, const char* function)

View File

@ -1,8 +1,13 @@
#include <stdlib.h> #include <stdlib.h>
#include <stdio.h> #include <stdio.h>
#include "../stdlib/Intrinsics.h"
#include "Animation.h" #include "Animation.h"
#if ARM
#include "../stdlib/IntrinsicsArm.h"
#else
#include "../stdlib/Intrinsics.h"
#endif
double fade(double t) { double fade(double t) {
return t * t * t * (t * (t * 6 - 15) + 10); return t * t * t * (t * (t * 6 - 15) + 10);
} }

View File

@ -12,13 +12,18 @@
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#include <math.h> #include <math.h>
#include "../../stdlib/Intrinsics.h"
#include "../../utils/MathUtils.h" #include "../../utils/MathUtils.h"
#include "../../utils/TestUtils.h" #include "../../utils/TestUtils.h"
#if ARM
#include "../../stdlib/IntrinsicsArm.h"
#else
#include "../../stdlib/Intrinsics.h"
#endif
// @todo Implement intrinsic versions! // @todo Implement intrinsic versions!
void vec2_normalize_f32(f32* __restrict x, f32* __restrict y) void vec2_normalize(f32* __restrict x, f32* __restrict y)
{ {
f32 d = sqrtf((*x) * (*x) + (*y) * (*y)); f32 d = sqrtf((*x) * (*x) + (*y) * (*y));
@ -89,7 +94,7 @@ f32 vec2_dot(const v2_f32* a, const v2_f32* b) {
return a->x * b->x + a->y * b->y; return a->x * b->x + a->y * b->y;
} }
void vec3_normalize_f32(f32* __restrict x, f32* __restrict y, f32* __restrict z) void vec3_normalize(f32* __restrict x, f32* __restrict y, f32* __restrict z)
{ {
f32 d = sqrtf((*x) * (*x) + (*y) * (*y) + (*z) * (*z)); f32 d = sqrtf((*x) * (*x) + (*y) * (*y) + (*z) * (*z));
@ -98,7 +103,7 @@ void vec3_normalize_f32(f32* __restrict x, f32* __restrict y, f32* __restrict z)
*z /= d; *z /= d;
} }
void vec3_normalize_f32(v3_f32* vec) void vec3_normalize(v3_f32* vec)
{ {
f32 d = sqrtf(vec->x * vec->x + vec->y * vec->y + vec->z * vec->z); f32 d = sqrtf(vec->x * vec->x + vec->y * vec->y + vec->z * vec->z);
@ -178,7 +183,7 @@ f32 vec3_dot(const v3_f32* a, const v3_f32* b) {
return a->x * b->x + a->y * b->y + a->z * b->z; return a->x * b->x + a->y * b->y + a->z * b->z;
} }
void vec4_normalize_f32(f32* __restrict x, f32* __restrict y, f32* __restrict z, f32* __restrict w) void vec4_normalize(f32* __restrict x, f32* __restrict y, f32* __restrict z, f32* __restrict w)
{ {
f32 d = sqrtf((*x) * (*x) + (*y) * (*y) + (*z) * (*z) + (*w) * (*w)); f32 d = sqrtf((*x) * (*x) + (*y) * (*y) + (*z) * (*z) + (*w) * (*w));

View File

@ -9,7 +9,12 @@
#ifndef TOS_MATH_MATRIX_INT32_H #ifndef TOS_MATH_MATRIX_INT32_H
#define TOS_MATH_MATRIX_INT32_H #define TOS_MATH_MATRIX_INT32_H
#include "../../stdlib/Intrinsics.h"
#include "../../utils/MathUtils.h" #include "../../utils/MathUtils.h"
#if ARM
#include "../../stdlib/IntrinsicsArm.h"
#else
#include "../../stdlib/Intrinsics.h"
#endif
#endif #endif

View File

@ -9,9 +9,14 @@
#ifndef TOS_MATH_MATRIX_INT64_H #ifndef TOS_MATH_MATRIX_INT64_H
#define TOS_MATH_MATRIX_INT64_H #define TOS_MATH_MATRIX_INT64_H
#include "../../stdlib/Intrinsics.h"
#include "../../utils/MathUtils.h" #include "../../utils/MathUtils.h"
#if ARM
#include "../../stdlib/IntrinsicsArm.h"
#else
#include "../../stdlib/Intrinsics.h"
#endif
#endif #endif
// Remarks: sizes for the second matrix/vector are often implied by the first parameter and the rules for matrix/vector // Remarks: sizes for the second matrix/vector are often implied by the first parameter and the rules for matrix/vector

View File

@ -10,9 +10,15 @@
#ifndef TOS_MATH_MATRIX_QUATERNION_FLOAT32_H #ifndef TOS_MATH_MATRIX_QUATERNION_FLOAT32_H
#define TOS_MATH_MATRIX_QUATERNION_FLOAT32_H #define TOS_MATH_MATRIX_QUATERNION_FLOAT32_H
#include "../../stdlib/Intrinsics.h"
#include "../../utils/MathUtils.h" #include "../../utils/MathUtils.h"
#include "../../utils/TestUtils.h" #include "../../utils/TestUtils.h"
#include "MatrixFloat32.h"
#if ARM
#include "../../stdlib/IntrinsicsArm.h"
#else
#include "../../stdlib/Intrinsics.h"
#endif
// @todo Remove unused functions there are a lot (AFTER you implemented quaternion handling in the camera) // @todo Remove unused functions there are a lot (AFTER you implemented quaternion handling in the camera)

View File

@ -9,7 +9,6 @@
#ifndef TOS_MATH_MATRIX_VECTOR_FLOAT32_H #ifndef TOS_MATH_MATRIX_VECTOR_FLOAT32_H
#define TOS_MATH_MATRIX_VECTOR_FLOAT32_H #define TOS_MATH_MATRIX_VECTOR_FLOAT32_H
#include "../../stdlib/Intrinsics.h"
#include "../../utils/MathUtils.h" #include "../../utils/MathUtils.h"
#include "../../stdlib/simd/SIMD_F32.h" #include "../../stdlib/simd/SIMD_F32.h"

View File

@ -9,7 +9,6 @@
#ifndef TOS_MATH_MATRIX_VECTOR_FLOAT64_H #ifndef TOS_MATH_MATRIX_VECTOR_FLOAT64_H
#define TOS_MATH_MATRIX_VECTOR_FLOAT64_H #define TOS_MATH_MATRIX_VECTOR_FLOAT64_H
#include "../../stdlib/Intrinsics.h"
#include "../../utils/MathUtils.h" #include "../../utils/MathUtils.h"
#include "../../stdlib/simd/SIMD_F64.h" #include "../../stdlib/simd/SIMD_F64.h"

View File

@ -12,7 +12,6 @@
#include <immintrin.h> #include <immintrin.h>
#include <xmmintrin.h> #include <xmmintrin.h>
#include "../../stdlib/Intrinsics.h"
#include "../../utils/MathUtils.h" #include "../../utils/MathUtils.h"
#include "../../stdlib/simd/SIMD_I32.h" #include "../../stdlib/simd/SIMD_I32.h"

View File

@ -12,7 +12,6 @@
#include <immintrin.h> #include <immintrin.h>
#include <xmmintrin.h> #include <xmmintrin.h>
#include "../../stdlib/Intrinsics.h"
#include "../../utils/MathUtils.h" #include "../../utils/MathUtils.h"
#include "../../stdlib/simd/SIMD_I64.h" #include "../../stdlib/simd/SIMD_I64.h"

View File

@ -71,7 +71,7 @@
#define SETTING_UI_VISIBILITY_FPS 1 #define SETTING_UI_VISIBILITY_FPS 1
#define SETTING_UI_VISIBILITY_APM 2 #define SETTING_UI_VISIBILITY_APM 2
#define SETTING_UI_VISIBILITY__ 4 #define SETTING_UI_VISIBILITY_NET_GRAPH 4
#define SETTING_UI_VISIBILITY___ 8 #define SETTING_UI_VISIBILITY___ 8
#define SETTING_UI_VISIBILITY_HOTKEYS 16 #define SETTING_UI_VISIBILITY_HOTKEYS 16
#define SETTING_UI_VISIBILITY_XP_BAR 32 #define SETTING_UI_VISIBILITY_XP_BAR 32
@ -80,9 +80,9 @@
#define SETTING_UI_VISIBILITY_CHAT 256 #define SETTING_UI_VISIBILITY_CHAT 256
#define SETTING_UI_VISIBILITY_CLOCK 512 #define SETTING_UI_VISIBILITY_CLOCK 512
#define SETTING_UI_VISIBILITY_SUBTITLES 1024 #define SETTING_UI_VISIBILITY_SUBTITLES 1024
#define SETTING_UI_VISIBILITY_BAR 1024 #define SETTING_UI_VISIBILITY_BAR 2048
#define SETTING_UI_VISIBILITY_HEALTH 2048 #define SETTING_UI_VISIBILITY_HEALTH 4096
#define SETTING_UI_VISIBILITY_RESOURCE 4096 #define SETTING_UI_VISIBILITY_RESOURCE 8192
#define SETTING_UI_VISIBILITY_INFO 8192 // = e.g. quest info #define SETTING_UI_VISIBILITY_INFO 8192 // = e.g. quest info
#define SETTING_GAME_VISIBILITY_BAR_SELF 1 #define SETTING_GAME_VISIBILITY_BAR_SELF 1

View File

@ -1,27 +0,0 @@
/**
* Jingga
*
* @copyright Jingga
* @license OMS License 2.0
* @version 1.0.0
* @link https://jingga.app
*/
#ifndef TOS_NETWORK_OS_WRAPPER_H
#define TOS_NETWORK_OS_WRAPPER_H
#if _WIN32
#include <winsock2.h>
#include <windows.h>
inline
int close(SOCKET sock) {
return closesocket(sock);
}
inline
void sleep(unsigned long time) {
Sleep(time);
}
#endif
#endif

View File

@ -13,7 +13,7 @@
#if _WIN32 #if _WIN32
#include <winsock2.h> #include <winsock2.h>
#include <ws2tcpip.h> #include <ws2ipdef.h>
#else #else
#include <netdb.h> #include <netdb.h>
#include <unistd.h> #include <unistd.h>

View File

@ -10,7 +10,8 @@
#include "PacketHeader.h" #include "PacketHeader.h"
#if _WIN32 #if _WIN32
#include <ws2def.h> #include <winsock2.h>
#include <Ws2ipdef.h>
#elif __linux__ #elif __linux__
#include <arpa/inet.h> #include <arpa/inet.h>
#include <sys/socket.h> #include <sys/socket.h>

View File

@ -14,7 +14,7 @@
#if _WIN32 #if _WIN32
#include <winsock2.h> #include <winsock2.h>
#include <ws2tcpip.h> #include <Ws2ipdef.h>
#include <windows.h> #include <windows.h>
#else #else
#include <sys/socket.h> #include <sys/socket.h>

View File

@ -6,8 +6,8 @@
#include "../../stdlib/Types.h" #include "../../stdlib/Types.h"
#if _WIN32 #if _WIN32
#include <ws2def.h> #include <winsock2.h>
#include <in6addr.h> #include <Ws2ipdef.h>
#include <ws2tcpip.h> #include <ws2tcpip.h>
#elif __linux__ #elif __linux__
#include <netinet/in.h> #include <netinet/in.h>

View File

@ -10,7 +10,7 @@
#include "PacketHeader.h" #include "PacketHeader.h"
#if _WIN32 #if _WIN32
#include <ws2def.h> #include <winsock2.h>
#elif __linux__ #elif __linux__
#include <arpa/inet.h> #include <arpa/inet.h>
#include <sys/socket.h> #include <sys/socket.h>
@ -94,7 +94,7 @@ uint16 packet_udp_create_raw(
in6_addr* __restrict ipv6_src, uint16 port_src, in6_addr* __restrict ipv6_src, uint16 port_src,
in6_addr* __restrict ipv6_dst, uint16 port_dst, in6_addr* __restrict ipv6_dst, uint16 port_dst,
uint16 flow, uint16 flow,
byte* __restrict data, uint16 data_length const byte* __restrict data, uint16 data_length
) { ) {
// create ipv6 header // create ipv6 header
HeaderIPv6Unpacked* ip6_header = (HeaderIPv6Unpacked *) packet; HeaderIPv6Unpacked* ip6_header = (HeaderIPv6Unpacked *) packet;
@ -113,6 +113,7 @@ uint16 packet_udp_create_raw(
udp_header->len = ip6_header->ip6_plen; udp_header->len = ip6_header->ip6_plen;
udp_header->check = 0; udp_header->check = 0;
// @performance consider to do the compression right here instead of the memcpy
// create payload // create payload
memcpy(packet + sizeof(HeaderIPv6Unpacked) + sizeof(UDPHeaderIPv6Unpacked), data, data_length); memcpy(packet + sizeof(HeaderIPv6Unpacked) + sizeof(UDPHeaderIPv6Unpacked), data, data_length);
@ -129,19 +130,11 @@ uint16 packet_udp_create_raw(
inline inline
uint16 packet_udp_create( uint16 packet_udp_create(
byte* __restrict packet, byte* __restrict packet,
uint16 port_src, uint16 port_dst, const byte* __restrict data, uint16 data_length
byte* __restrict data, uint16 data_length
) { ) {
// create udp header // @performance consider to do the compression right here instead of the memcpy
UDPHeaderIPv6Unpacked* udp_header = (UDPHeaderIPv6Unpacked *) packet;
udp_header->source = port_src;
udp_header->dest = port_dst;
udp_header->len = SWAP_ENDIAN_BIG((uint16) (sizeof(UDPHeaderIPv6Unpacked) + data_length));
udp_header->check = 0;
// create payload // create payload
memcpy(packet + sizeof(UDPHeaderIPv6Unpacked), data, data_length); memcpy(packet, data, data_length);
return data_length; return data_length;
} }

14
platform/linux/Socket.h Normal file
View File

@ -0,0 +1,14 @@
/**
* Jingga
*
* @copyright Jingga
* @license OMS License 2.0
* @version 1.0.0
* @link https://jingga.app
*/
#ifndef TOS_PLATFORM_LINUX_SOCKET_H
#define TOS_PLATFORM_LINUX_SOCKET_H
#define socket_close close
#endif

View File

@ -19,7 +19,6 @@
#include <locale.h> #include <locale.h>
#include <cpuid.h> #include <cpuid.h>
// @todo implement for arm? // @todo implement for arm?
uint16 system_language_code() uint16 system_language_code()

View File

@ -48,7 +48,7 @@ void socket_client_udp_create(SocketConnection* con, uint16 port = 0) {
// Bind socket // Bind socket
con->addr.sin6_family = AF_INET6; con->addr.sin6_family = AF_INET6;
con->addr.sin6_addr = in6addr_any; con->addr.sin6_addr = in6addr_any;
con->addr.sin6_port = port; // 0 = OS decides the port con->addr.sin6_port = SWAP_ENDIAN_BIG(port); // 0 = OS decides the port
if (bind(con->sd, (struct sockaddr*) &con->addr, sizeof(con->addr)) == SOCKET_ERROR) { if (bind(con->sd, (struct sockaddr*) &con->addr, sizeof(con->addr)) == SOCKET_ERROR) {
closesocket(con->sd); closesocket(con->sd);

14
platform/win32/Socket.h Normal file
View File

@ -0,0 +1,14 @@
/**
* Jingga
*
* @copyright Jingga
* @license OMS License 2.0
* @version 1.0.0
* @link https://jingga.app
*/
#ifndef TOS_PLATFORM_WIN32_SOCKET_H
#define TOS_PLATFORM_WIN32_SOCKET_H
#define socket_close closesocket
#endif

View File

@ -35,6 +35,7 @@
#pragma comment(lib, "iphlpapi.lib") #pragma comment(lib, "iphlpapi.lib")
#pragma comment(lib, "d3d12.lib") #pragma comment(lib, "d3d12.lib")
#pragma comment(lib, "dxgi.lib") #pragma comment(lib, "dxgi.lib")
#pragma comment(lib, "Ws2_32.lib")
#endif #endif
// @todo implement for arm? // @todo implement for arm?

View File

@ -86,7 +86,7 @@ int rawinput_init_mousekeyboard(HWND hwnd, Input* __restrict states, RingMemory*
} }
} break; } break;
case RIM_TYPEKEYBOARD: { case RIM_TYPEKEYBOARD: {
if (states[keyboard_found].handle_keyboard != NULL) { if (states[keyboard_found].handle_keyboard != NULL) {
++keyboard_found; ++keyboard_found;
} }
@ -219,13 +219,12 @@ void input_mouse_position(HWND hwnd, v2_int32* pos)
} }
} }
int32 input_raw_handle(RAWINPUT* __restrict raw, Input* states, int32 state_count, uint64 time) int32 input_raw_handle(RAWINPUT* __restrict raw, Input* __restrict states, int32 state_count, uint64 time)
{ {
int32 input_count = 0; int32 input_count = 0;
int32 i = 0; int32 i = 0;
if (raw->header.dwType == RIM_TYPEMOUSE) { if (raw->header.dwType == RIM_TYPEMOUSE) {
// @performance Change so we can directly access the correct state (maybe map handle address to index?)
while (i < state_count while (i < state_count
&& states[i].handle_mouse != raw->header.hDevice && states[i].handle_mouse != raw->header.hDevice
) { ) {
@ -270,10 +269,12 @@ int32 input_raw_handle(RAWINPUT* __restrict raw, Input* states, int32 state_coun
key.key_state = KEY_STATE_RELEASED; key.key_state = KEY_STATE_RELEASED;
key.key_id = INPUT_MOUSE_BUTTON_5; key.key_id = INPUT_MOUSE_BUTTON_5;
} else if (raw->data.mouse.usButtonFlags & RI_MOUSE_WHEEL) { } else if (raw->data.mouse.usButtonFlags & RI_MOUSE_WHEEL) {
// @bug not working
key.key_state = KEY_STATE_RELEASED; key.key_state = KEY_STATE_RELEASED;
key.key_id = INPUT_MOUSE_BUTTON_WHEEL; key.key_id = INPUT_MOUSE_BUTTON_WHEEL;
key.value = (int16) raw->data.mouse.usButtonData; key.value = (int16) raw->data.mouse.usButtonData;
} else if (raw->data.mouse.usButtonFlags & RI_MOUSE_HWHEEL) { } else if (raw->data.mouse.usButtonFlags & RI_MOUSE_HWHEEL) {
// @bug not working
key.key_state = KEY_STATE_RELEASED; key.key_state = KEY_STATE_RELEASED;
key.key_id = INPUT_MOUSE_BUTTON_HWHEEL; key.key_id = INPUT_MOUSE_BUTTON_HWHEEL;
key.value = (int16) raw->data.mouse.usButtonData; key.value = (int16) raw->data.mouse.usButtonData;
@ -281,17 +282,15 @@ int32 input_raw_handle(RAWINPUT* __restrict raw, Input* states, int32 state_coun
return 0; return 0;
} }
// @question is mouse wheel really considered a button change?
++input_count; ++input_count;
key.key_id |= INPUT_MOUSE_PREFIX; key.key_id |= INPUT_MOUSE_PREFIX;
key.time = time; key.time = time;
input_set_state(&states[i].state, &key); input_set_state(states[i].state.state_keys, &key);
states[i].state_change_button = true; states[i].state_change_button = true;
} else if (states[i].mouse_movement) { } else if (states[i].mouse_movement) {
// do we want to handle mouse movement for every individual movement, or do we want to pull it // @question do we want to handle mouse movement for every individual movement, or do we want to pull it
if (raw->data.mouse.usFlags & MOUSE_MOVE_ABSOLUTE) { if (raw->data.mouse.usFlags & MOUSE_MOVE_ABSOLUTE) {
RECT rect; RECT rect;
@ -351,7 +350,7 @@ int32 input_raw_handle(RAWINPUT* __restrict raw, Input* states, int32 state_coun
// @todo change to MakeCode instead of VKey // @todo change to MakeCode instead of VKey
InputKey key = {(uint16) (raw->data.keyboard.VKey | INPUT_KEYBOARD_PREFIX), new_state, 0, time}; InputKey key = {(uint16) (raw->data.keyboard.VKey | INPUT_KEYBOARD_PREFIX), new_state, 0, time};
input_set_state(&states[i].state, &key); input_set_state(states[i].state.state_keys, &key);
states[i].state_change_button = true; states[i].state_change_button = true;
} else if (raw->header.dwType == RIM_TYPEHID } else if (raw->header.dwType == RIM_TYPEHID
&& raw->header.dwSize > sizeof(RAWINPUT) && raw->header.dwSize > sizeof(RAWINPUT)
@ -407,7 +406,7 @@ void input_handle(LPARAM lParam, Input* __restrict states, int state_count, Ring
input_raw_handle((RAWINPUT *) lpb, states, state_count, time); input_raw_handle((RAWINPUT *) lpb, states, state_count, time);
} }
int32 input_handle_buffered(int buffer_size, Input* __restrict states, int state_count, RingMemory* ring, uint64 time) int32 input_handle_buffered(int32 buffer_size, Input* __restrict states, int state_count, RingMemory* ring, uint64 time)
{ {
uint32 cb_size; uint32 cb_size;
GetRawInputBuffer(NULL, &cb_size, sizeof(RAWINPUTHEADER)); GetRawInputBuffer(NULL, &cb_size, sizeof(RAWINPUTHEADER));

86
stdlib/IntrinsicsArm.h Normal file
View File

@ -0,0 +1,86 @@
/**
* Jingga
*
* @copyright Jingga
* @license OMS License 2.0
* @version 1.0.0
* @link https://jingga.app
*/
#ifndef TOS_STDLIB_INTRINSICS_ARM_H
#define TOS_STDLIB_INTRINSICS_ARM_H
#include <arm_sve.h>
inline float oms_sqrt(float a) {
svfloat32_t input = svdup_f32(a);
svfloat32_t result = svsqrt_f32(input);
return svget1_f32(result);
}
inline double oms_sqrt(double a) {
svfloat64_t input = svdup_f64(a);
svfloat64_t result = svsqrt_f64(input);
return svget1_f64(result);
}
inline float oms_rsqrt(float a) {
svfloat32_t input = svdup_f32(a);
svfloat32_t result = svrsqrte_f32(input);
return svget1_f32(result);
}
inline double oms_rsqrt(double a) {
svfloat64_t input = svdup_f64(a);
svfloat64_t result = svrsqrte_f64(input);
return svget1_f64(result);
}
inline float oms_round(float a) {
svfloat32_t input = svdup_f32(a);
svfloat32_t result = svrndn_f32(input);
return svget1_f32(result);
}
inline uint32_t round_to_int(float a) {
svfloat32_t input = svdup_f32(a);
svint32_t result = svcvtn_f32_s32(input, SVE_32B);
return svget1_s32(result);
}
inline float oms_floor(float a) {
svfloat32_t input = svdup_f32(a);
svfloat32_t result = svfloor_f32(input);
return svget1_f32(result);
}
inline float oms_ceil(float a) {
svfloat32_t input = svdup_f32(a);
svfloat32_t result = svceil_f32(input);
return svget1_f32(result);
}
inline void atomic_increment(int32_t* a, int32_t b) {
__atomic_add_fetch(a, b, __ATOMIC_SEQ_CST);
}
inline void atomic_increment(int64_t* a, int64_t b) {
__atomic_add_fetch(a, b, __ATOMIC_SEQ_CST);
}
inline void atomic_decrement(int32_t* a, int32_t b) {
__atomic_sub_fetch(a, b, __ATOMIC_SEQ_CST);
}
inline void atomic_decrement(int64_t* a, int64_t b) {
__atomic_sub_fetch(a, b, __ATOMIC_SEQ_CST);
}
#endif

View File

@ -14,11 +14,14 @@
#ifdef _MSC_VER #ifdef _MSC_VER
#define PACKED_STRUCT __pragma(pack(push, 1)) #define PACKED_STRUCT __pragma(pack(push, 1))
#define UNPACKED_STRUCT __pragma(pack(pop)) #define UNPACKED_STRUCT __pragma(pack(pop))
typedef SSIZE_T ssize_t;
#else #else
#define PACKED_STRUCT __attribute__((__packed__)) #define PACKED_STRUCT __attribute__((__packed__))
#define UNPACKED_STRUCT #define UNPACKED_STRUCT
#endif #endif
#define ARRAY_COUNT(a) (sizeof(a) / sizeof((a)[0]))
typedef int8_t int8; typedef int8_t int8;
typedef int16_t int16; typedef int16_t int16;
typedef int32_t int32; typedef int32_t int32;

View File

@ -17,21 +17,36 @@
struct f32_4 { struct f32_4 {
union { union {
__m128 s; #if ARM
svfloat32_t s;
#else
__m128 s;
#endif
f32 v[4]; f32 v[4];
}; };
}; };
struct f32_8 { struct f32_8 {
union { union {
__m256 s; #if ARM
svfloat32_t s;
#else
__m256 s;
#endif
f32 v[8]; f32 v[8];
}; };
}; };
struct f32_16 { struct f32_16 {
union { union {
__m512 s; #if ARM
svfloat32_t s;
#else
__m512 s;
#endif
f32 v[16]; f32 v[16];
}; };
}; };

View File

@ -16,21 +16,36 @@
struct f64_2 { struct f64_2 {
union { union {
__m128 s; #if ARM
svfloat64_t s;
#else
__m128 s;
#endif
f64 v[2]; f64 v[2];
}; };
}; };
struct f64_4 { struct f64_4 {
union { union {
__m256 s; #if ARM
svfloat64_t s;
#else
__m256 s;
#endif
f64 v[4]; f64 v[4];
}; };
}; };
struct f64_8 { struct f64_8 {
union { union {
__m512 s; #if ARM
svfloat64_t s;
#else
__m512 s;
#endif
f64 v[8]; f64 v[8];
}; };
}; };

View File

@ -14,8 +14,26 @@
#include <xmmintrin.h> #include <xmmintrin.h>
#include "../Types.h" #include "../Types.h"
#ifdef _MSC_VER // @todo split into platform code for windows and linux
#include <intrin.h>
#if _WIN32
#include <windows.h>
#include <stdio.h>
#ifdef _MSC_VER
#include <intrin.h>
#endif
#elif __linux__
#include <sys/auxv.h>
#include <unistd.h>
#endif
#if ARM
#include <arm_sve.h>
#else
int32 svcntw() {
return 0;
}
#endif #endif
enum SIMDVersion { enum SIMDVersion {
@ -23,149 +41,203 @@ enum SIMDVersion {
SIMD_VERSION_128, SIMD_VERSION_128,
SIMD_VERSION_256, SIMD_VERSION_256,
SIMD_VERSION_512, SIMD_VERSION_512,
SIMD_VERSION_SVE,
SIMD_VERSION_NEON,
}; };
// @todo implement for arm? // @todo implement for arm?
inline int32 max_neon_supported()
{
#if ARM
#if _WIN32
int cpu_info[4] = {0};
__cpuid(cpu_info, 0);
if (cpu_info[3] & (1 << 1)) {
return 1;
}
#else
unsigned int eax, ebx, ecx, edx;
__asm__ volatile (
"cpuid"
: "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx)
: "a"(0)
);
if (edx & (1 << 1)) {
return 1;
}
#endif
return 0;
#else
return 0;
#endif
}
inline int32 max_sve_supported()
{
#if ARM
int32 hwcaps = getauxval(AT_HWCAP);
return (int32) ((bool) (hwcaps & (1 << 19)));
#else
return 0;
#endif
}
inline int32 max_sse_supported() inline int32 max_sse_supported()
{ {
#ifdef _MSC_VER #if ARM
int32 cpuInfo[4] = {-1}; return 0;
__cpuid(cpuInfo, 1); // CPUID function 1
uint32 ecx = cpuInfo[2];
uint32 edx = cpuInfo[3];
#else #else
uint32 eax, ebx, ecx, edx; #ifdef _MSC_VER
int32 cpuInfo[4] = {-1};
__cpuid(cpuInfo, 1); // CPUID function 1
eax = 1; // CPUID function 1 uint32 ecx = cpuInfo[2];
__asm__ __volatile__("cpuid;" uint32 edx = cpuInfo[3];
: "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx) #else
: "a"(eax)); uint32 eax, ebx, ecx, edx;
eax = 1; // CPUID function 1
__asm__ __volatile__("cpuid;"
: "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx)
: "a"(eax));
#endif
bool sse42_supported = (ecx >> 20) & 1;
if (sse42_supported) {
return 42;
}
bool sse41_supported = (ecx >> 19) & 1;
if (sse41_supported) {
return 41;
}
bool sse3_supported = (ecx >> 0) & 1;
if (sse3_supported) {
return 3;
}
bool sse2_supported = (edx >> 26) & 1;
if (sse2_supported) {
return 2;
}
return 0;
#endif #endif
bool sse42_supported = (ecx >> 20) & 1;
if (sse42_supported) {
return 42;
}
bool sse41_supported = (ecx >> 19) & 1;
if (sse41_supported) {
return 41;
}
bool sse3_supported = (ecx >> 0) & 1;
if (sse3_supported) {
return 3;
}
bool sse2_supported = (edx >> 26) & 1;
if (sse2_supported) {
return 2;
}
return 0;
} }
inline inline
int max_avx256_supported() int32 max_avx256_supported()
{ {
int32 max_version = 0; #if ARM
return 0;
#ifdef _MSC_VER
int32 cpuInfo[4];
__cpuid(cpuInfo, 1);
if ((cpuInfo[2] >> 28) & 1) {
__cpuid(cpuInfo, 7); // Query extended features
if ((cpuInfo[1] >> 5) & 1) {
max_version = 2;
}
}
#else #else
uint32 eax, ebx, ecx, edx; int32 max_version = 0;
__asm__ __volatile__("cpuid" #ifdef _MSC_VER
: "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx) int32 cpuInfo[4];
: "a"(1)); __cpuid(cpuInfo, 1);
if ((ecx >> 28) & 1) {
eax = 7;
ecx = 0;
__asm__ __volatile__("cpuid"
: "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx)
: "a"(eax), "c"(ecx));
if ((ebx >> 5) & 1) { if ((cpuInfo[2] >> 28) & 1) {
max_version = 2; __cpuid(cpuInfo, 7); // Query extended features
if ((cpuInfo[1] >> 5) & 1) {
max_version = 2;
}
} }
} #else
#endif uint32 eax, ebx, ecx, edx;
return max_version; __asm__ __volatile__("cpuid"
: "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx)
: "a"(1));
if ((ecx >> 28) & 1) {
eax = 7;
ecx = 0;
__asm__ __volatile__("cpuid"
: "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx)
: "a"(eax), "c"(ecx));
if ((ebx >> 5) & 1) {
max_version = 2;
}
}
#endif
return max_version;
#endif
} }
inline inline
int max_avx512_supported() int32 max_avx512_supported()
{ {
#ifdef _MSC_VER #if ARM
int32 cpuInfo[4]; return 0;
__cpuid(cpuInfo, 1);
int32 ebx = 0;
if ((cpuInfo[2] >> 28) & 1) {
__cpuid(cpuInfo, 7);
ebx = cpuInfo[1];
}
#else #else
uint32 eax, ebx, ecx, edx; #ifdef _MSC_VER
int32 cpuInfo[4];
__cpuid(cpuInfo, 1);
int32 ebx = 0;
if ((cpuInfo[2] >> 28) & 1) {
__cpuid(cpuInfo, 7);
ebx = cpuInfo[1];
}
#else
uint32 eax, ebx, ecx, edx;
__asm__ __volatile__("cpuid"
: "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx)
: "a"(1));
if ((ecx >> 28) & 1) {
eax = 7;
ecx = 0;
__asm__ __volatile__("cpuid" __asm__ __volatile__("cpuid"
: "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx) : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx)
: "a"(eax), "c"(ecx)); : "a"(1));
if ((ecx >> 28) & 1) {
eax = 7;
ecx = 0;
__asm__ __volatile__("cpuid"
: "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx)
: "a"(eax), "c"(ecx));
}
#endif
if ((ebx >> 16) & 1) {
return 1; // AVX-512F
} }
if ((ebx >> 17) & 1) {
return 2; // AVX-512DQ
}
if ((ebx >> 21) & 1) {
return 3; // AVX-512IFMA
}
if ((ebx >> 26) & 1) {
return 4; // AVX-512PF
}
if ((ebx >> 27) & 1) {
return 5; // AVX-512ER
}
if ((ebx >> 28) & 1) {
return 6; // AVX-512CD
}
if ((ebx >> 30) & 1) {
return 7; // AVX-512BW
}
if ((ebx >> 31) & 1) {
return 8; // AVX-512VL
}
return 0;
#endif #endif
if ((ebx >> 16) & 1) {
return 1; // AVX-512F
}
if ((ebx >> 17) & 1) {
return 2; // AVX-512DQ
}
if ((ebx >> 21) & 1) {
return 3; // AVX-512IFMA
}
if ((ebx >> 26) & 1) {
return 4; // AVX-512PF
}
if ((ebx >> 27) & 1) {
return 5; // AVX-512ER
}
if ((ebx >> 28) & 1) {
return 6; // AVX-512CD
}
if ((ebx >> 30) & 1) {
return 7; // AVX-512BW
}
if ((ebx >> 31) & 1) {
return 8; // AVX-512VL
}
return 0;
} }
const char AVX512_VERSIONS[8][12] = { const char AVX512_VERSIONS[8][12] = {
@ -180,32 +252,35 @@ const char AVX512_VERSIONS[8][12] = {
}; };
bool supports_abm() { bool supports_abm() {
bool popcnt_supported; #if ARM
bool lzcnt_supported; return 0;
#ifdef _MSC_VER
int cpuInfo[4];
__cpuid(cpuInfo, 0x80000001);
popcnt_supported = (cpuInfo[2] & (1 << 5)) != 0;
lzcnt_supported = (cpuInfo[1] & (1 << 5)) != 0;
#else #else
uint32 eax, ebx, ecx, edx; bool popcnt_supported;
eax = 0x80000001; bool lzcnt_supported;
__asm__ __volatile__ ( #ifdef _MSC_VER
"cpuid" int cpuInfo[4];
: "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx) __cpuid(cpuInfo, 0x80000001);
: "a"(eax)
);
// Check if the ABM (POPCNT and LZCNT) bits are set popcnt_supported = (cpuInfo[2] & (1 << 5)) != 0;
popcnt_supported = (ecx & (1 << 5)) != 0; lzcnt_supported = (cpuInfo[1] & (1 << 5)) != 0;
lzcnt_supported = (ebx & (1 << 5)) != 0; #else
uint32 eax, ebx, ecx, edx;
eax = 0x80000001;
__asm__ __volatile__ (
"cpuid"
: "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx)
: "a"(eax)
);
// Check if the ABM (POPCNT and LZCNT) bits are set
popcnt_supported = (ecx & (1 << 5)) != 0;
lzcnt_supported = (ebx & (1 << 5)) != 0;
#endif
return popcnt_supported && lzcnt_supported;
#endif #endif
return popcnt_supported && lzcnt_supported;
} }
#endif #endif

View File

@ -16,21 +16,36 @@
struct int16_8 { struct int16_8 {
union { union {
__m128i s; #if ARM
svint16_t s;
#else
__m128i s;
#endif
int16 v[8]; int16 v[8];
}; };
}; };
struct int16_16 { struct int16_16 {
union { union {
__m256i s; #if ARM
svint16_t s;
#else
__m256i s;
#endif
int16 v[16]; int16 v[16];
}; };
}; };
struct int16_32 { struct int16_32 {
union { union {
__m512i s; #if ARM
svint16_t s;
#else
__m512i s;
#endif
int16 v[32]; int16 v[32];
}; };
}; };

View File

@ -24,21 +24,36 @@
struct int32_4 { struct int32_4 {
union { union {
__m128i s; #if ARM
svint32_t s;
#else
__m128i s;
#endif
int32 v[4]; int32 v[4];
}; };
}; };
struct int32_8 { struct int32_8 {
union { union {
__m256i s; #if ARM
svint32_t s;
#else
__m256i s;
#endif
int32 v[8]; int32 v[8];
}; };
}; };
struct int32_16 { struct int32_16 {
union { union {
__m512i s; #if ARM
svint32_t s;
#else
__m512i s;
#endif
int32 v[16]; int32 v[16];
}; };
}; };

View File

@ -17,21 +17,36 @@
struct int64_2 { struct int64_2 {
union { union {
__m128i s; #if ARM
svint64_t s;
#else
__m128i s;
#endif
int64 v[2]; int64 v[2];
}; };
}; };
struct int64_4 { struct int64_4 {
union { union {
__m256i s; #if ARM
svint64_t s;
#else
__m256i s;
#endif
int64 v[4]; int64 v[4];
}; };
}; };
struct int64_8 { struct int64_8 {
union { union {
__m512i s; #if ARM
svint64_t s;
#else
__m512i s;
#endif
int64 v[8]; int64 v[8];
}; };
}; };

View File

@ -18,21 +18,36 @@
struct int8_16 { struct int8_16 {
union { union {
__m128i s; #if ARM
svint8_t s;
#else
__m128i s;
#endif
int8 v[16]; int8 v[16];
}; };
}; };
struct int8_32 { struct int8_32 {
union { union {
__m256i s; #if ARM
svint8_t s;
#else
__m256i s;
#endif
int8 v[32]; int8 v[32];
}; };
}; };
struct int8_64 { struct int8_64 {
union { union {
__m512i s; #if ARM
svint8_t s;
#else
__m512i s;
#endif
int8 v[64]; int8 v[64];
}; };
}; };

View File

@ -10,9 +10,14 @@
#ifndef TOS_UTILS_MATH_UTILS_H #ifndef TOS_UTILS_MATH_UTILS_H
#define TOS_UTILS_MATH_UTILS_H #define TOS_UTILS_MATH_UTILS_H
#include "../stdlib/Intrinsics.h"
#include <math.h> #include <math.h>
#if ARM
#include "../stdlib/IntrinsicsArm.h"
#else
#include "../stdlib/Intrinsics.h"
#endif
#define OMS_PI 3.14159265358979323846f #define OMS_PI 3.14159265358979323846f
#define OMS_PI_OVER_TWO (OMS_PI / 2.0f) #define OMS_PI_OVER_TWO (OMS_PI / 2.0f)
#define OMS_PI_OVER_FOUR (OMS_PI / 4.0f) #define OMS_PI_OVER_FOUR (OMS_PI / 4.0f)

View File

@ -222,7 +222,7 @@ int32 int_to_str(int64 number, char *str, const char thousands = ',') {
str[k] = temp; str[k] = temp;
} }
return i - 1; return i;
} }
inline inline
@ -301,6 +301,18 @@ str_concat(
return src1_length + src2_length; return src1_length + src2_length;
} }
inline
void str_concat(
const char* src, size_t src_length,
int64 data,
char* dst
) {
memcpy(dst, src, src_length);
int32 len = int_to_str(data, dst + src_length);
dst[src_length + len] = '\0';
}
inline inline
char* strtok(char* str, const char* __restrict delim, char* *key) { char* strtok(char* str, const char* __restrict delim, char* *key) {
char* result; char* result;

View File

@ -14,8 +14,6 @@
#include "../stdlib/Types.h" #include "../stdlib/Types.h"
#define ARRAY_COUNT(a) (sizeof(a) / sizeof((a)[0]))
struct FileBody { struct FileBody {
uint64 size = 0; // doesn't include null termination (same as strlen) uint64 size = 0; // doesn't include null termination (same as strlen)
byte* content; byte* content;