diff --git a/camera/Camera.h b/camera/Camera.h index 39eb3f8..8b0fe53 100644 --- a/camera/Camera.h +++ b/camera/Camera.h @@ -16,110 +16,428 @@ #include "CameraMovement.h" +#define CAMERA_MAX_INPUTS 4 + +// @todo Please check out if we can switch to quaternions. We tried but failed. +// The functions with a 2 at the end are our current backup solution which shouldn't be used (probably) + struct Camera { - // left handed cartesian coordinates v3_f32 location; v4_f32 orientation; + v3_f32 front; + v3_f32 right; + v3_f32 up; + v3_f32 world_up; + float speed; float sensitivity; float zoom; + + float fov; + float znear; + float zfar; + float aspect; }; -void camera_look_at(Camera* camera, const v3_f32* at) +void +camera_update_vectors2(Camera* camera) { + camera->front.x = cosf(OMS_DEG2RAD(camera->orientation.x)) * cosf(OMS_DEG2RAD(camera->orientation.y)); + camera->front.y = sinf(OMS_DEG2RAD(camera->orientation.x)); + camera->front.z = cosf(OMS_DEG2RAD(camera->orientation.x)) * sinf(OMS_DEG2RAD(camera->orientation.y)); + vec3_normalize_f32(&camera->front); + vec3_cross(&camera->right, &camera->front, &camera->world_up); // @bug + vec3_normalize_f32(&camera->right); + + vec3_cross(&camera->up, &camera->right, &camera->front); + vec3_normalize_f32(&camera->up); } -// you can have up to 4 camera movement inputs at the same time -void camera_movement(Camera* camera, CameraMovement* movement, float dt) +void +camera_update_vectors(Camera* camera) { - f32 velocity = camera->speed * dt; + v3_f32 z = {0.0f, 0.0f, -1.0f}; + quaternion_rotate_vector(&camera->front, &camera->orientation, &z); + vec3_normalize_f32(&camera->front); - bool has_pos = false; - v4_f32 pos = {}; + vec3_cross(&camera->right, &camera->front, &camera->world_up); + vec3_normalize_f32(&camera->right); - bool has_view = false; - v3_f32 view = {}; - v4_f32 quaternion = {}; + vec3_cross(&camera->up, &camera->right, &camera->front); + vec3_normalize_f32(&camera->up); +} - for (int i = 0; i < 4; i++) { - switch(movement[i]) { - case CAMERA_MOVEMENT_FORWARD: { - pos.z = velocity; - has_pos = true; - } break; - case CAMERA_MOVEMENT_BACK: { - pos.z = velocity; - has_pos = true; - } break; - case CAMERA_MOVEMENT_LEFT: { - pos.x = velocity; - has_pos = true; - } break; - case CAMERA_MOVEMENT_RIGHT: { - pos.x = velocity; - has_pos = true; - } break; - case CAMERA_MOVEMENT_UP: { - pos.y = velocity; - has_pos = true; - } break; - case CAMERA_MOVEMENT_DOWN: { - pos.y = velocity; - has_pos = true; - } break; - case CAMERA_MOVEMENT_PITCH_UP: { - view.pitch += velocity; - has_view = true; - } break; - case CAMERA_MOVEMENT_PITCH_DOWN: { - view.pitch -= velocity; - has_view = true; - } break; - case CAMERA_MOVEMENT_ROLL_LEFT: { - view.roll += velocity; - has_view = true; - } break; - case CAMERA_MOVEMENT_ROLL_RIGHT: { - view.roll -= velocity; - has_view = true; - } break; - case CAMERA_MOVEMENT_YAW_LEFT: { - view.yaw += velocity; - has_view = true; - } break; - case CAMERA_MOVEMENT_YAW_RIGHT: { - view.yaw -= velocity; - has_view = true; - } break; - case CAMERA_MOVEMENT_ZOOM_IN: { - camera->zoom += velocity; - } break; - case CAMERA_MOVEMENT_ZOOM_OUT: { - camera->zoom -= velocity; - } break; - default: {} +void camera_rotate2(Camera* camera, float dx, float dy, float dt) +{ + f32 velocity = camera->sensitivity; // @todo do we need dt? + + dx *= velocity; + dy *= velocity; + + camera->orientation.x += dy; + camera->orientation.y += dx; + + if (true) { + if (camera->orientation.x > 89.0f) { + camera->orientation.x = 89.0f; + } else if (camera->orientation.x < -89.0f) { + camera->orientation.x = -89.0f; + } + + if (camera->orientation.y > 360.0f || camera->orientation.y < -360.0f) { + camera->orientation.y -= 360.0f; } } - // A position change updates the position AND the quaternion - if (has_pos) { - // @question this might be wrong/bad since pos is not a normalized vector - v4_f32 quat_temp = camera->orientation; - quaternion_rotate_euler(&camera->orientation, &quat_temp, &pos); + camera_update_vectors2(camera); +} - camera->location.x += pos.x; - camera->location.y += pos.y; - camera->location.z += pos.z; +void camera_rotate(Camera* camera, float dx, float dy, float dt) +{ + f32 velocity = camera->sensitivity; // @todo do we need dt? + + dx *= velocity; + dy *= velocity; + + v4_f32 yaw_quat; + quaternion_from_axis_angle(&yaw_quat, &camera->world_up, dx); + + v4_f32 pitch_quat; + quaternion_from_axis_angle(&pitch_quat, &camera->right, dy); + + v4_f32 result; + quaternion_multiply(&result, &camera->orientation, &pitch_quat); + quaternion_multiply(&camera->orientation, &yaw_quat, &result); + quaternion_unit(&camera->orientation); + + // constrain pitch + if (true) { + v3_f32 euler; + quaternion_to_euler(&camera->orientation, &euler); + + bool found_constrain = false; + + float pitch = euler.x; + if (pitch > 89.0f) { + pitch = 89.0f; + found_constrain = true; + } else if (pitch < -89.0f) { + pitch = -89.0f; + found_constrain = true; + } + + if (found_constrain) { + v4_f32 constrained; + quaternion_from_axis_angle(&constrained, &camera->right, pitch); + quaternion_multiply(&camera->orientation, &yaw_quat, &constrained); + quaternion_unit(&camera->orientation); + } } - // A view change only updates the quaternion - if (has_view) { - v4_f32 quat_temp = camera->orientation; - quaternion_from_euler(&quaternion, &view); - quaternion_multiply(&camera->orientation, &quat_temp, &quaternion); + camera_update_vectors(camera); +} + +// you can have up to 4 camera movement inputs at the same time +void camera_movement(Camera* camera, CameraMovement* movement, float dt, bool relative_to_world = true) +{ + f32 velocity = camera->speed * dt; + + if (relative_to_world) { + for (int i = 0; i < CAMERA_MAX_INPUTS; i++) { + switch(movement[i]) { + case CAMERA_MOVEMENT_FORWARD: { + camera->location.z += velocity; + } break; + case CAMERA_MOVEMENT_BACK: { + camera->location.z -= velocity; + } break; + case CAMERA_MOVEMENT_LEFT: { + camera->location.x -= velocity; + } break; + case CAMERA_MOVEMENT_RIGHT: { + camera->location.x += velocity; + } break; + case CAMERA_MOVEMENT_UP: { + camera->location.y += velocity; + } break; + case CAMERA_MOVEMENT_DOWN: { + camera->location.y -= velocity; + } break; + case CAMERA_MOVEMENT_PITCH_UP: { + camera->orientation.x += velocity; + } break; + case CAMERA_MOVEMENT_PITCH_DOWN: { + camera->orientation.x -= velocity; + } break; + case CAMERA_MOVEMENT_ROLL_LEFT: { + camera->orientation.z += velocity; + } break; + case CAMERA_MOVEMENT_ROLL_RIGHT: { + camera->orientation.z -= velocity; + } break; + case CAMERA_MOVEMENT_YAW_LEFT: { + camera->orientation.y += velocity; + } break; + case CAMERA_MOVEMENT_YAW_RIGHT: { + camera->orientation.y -= velocity; + } break; + case CAMERA_MOVEMENT_ZOOM_IN: { + camera->zoom += velocity; + } break; + case CAMERA_MOVEMENT_ZOOM_OUT: { + camera->zoom -= velocity; + } break; + default: {} + } + } + } else { + v3_f32 forward = camera->front; + + v3_f32 right; + vec3_cross(&right, &forward, &camera->world_up); + vec3_normalize_f32(&right); + + v3_f32 up; + vec3_cross(&up, &right, &forward); + vec3_normalize_f32(&up); + + for (int i = 0; i < CAMERA_MAX_INPUTS; i++) { + switch(movement[i]) { + case CAMERA_MOVEMENT_FORWARD: { + camera->location.x += forward.x * velocity; + camera->location.y += forward.y * velocity; + camera->location.z += forward.z * velocity; + } break; + case CAMERA_MOVEMENT_BACK: { + camera->location.x -= forward.x * velocity; + camera->location.y -= forward.y * velocity; + camera->location.z -= forward.z * velocity; + } break; + case CAMERA_MOVEMENT_LEFT: { + camera->location.x -= right.x * velocity; + camera->location.y -= right.y * velocity; + camera->location.z -= right.z * velocity; + } break; + case CAMERA_MOVEMENT_RIGHT: { + camera->location.x += right.x * velocity; + camera->location.y += right.y * velocity; + camera->location.z += right.z * velocity; + } break; + case CAMERA_MOVEMENT_UP: { + camera->location.x += up.x * velocity; + camera->location.y += up.y * velocity; + camera->location.z += up.z * velocity; + } break; + case CAMERA_MOVEMENT_DOWN: { + camera->location.x -= up.x * velocity; + camera->location.y -= up.y * velocity; + camera->location.z -= up.z * velocity; + } break; + case CAMERA_MOVEMENT_PITCH_UP: { + camera->orientation.x += velocity; + } break; + case CAMERA_MOVEMENT_PITCH_DOWN: { + camera->orientation.x -= velocity; + } break; + case CAMERA_MOVEMENT_ROLL_LEFT: { + camera->orientation.z += velocity; + } break; + case CAMERA_MOVEMENT_ROLL_RIGHT: { + camera->orientation.z -= velocity; + } break; + case CAMERA_MOVEMENT_YAW_LEFT: { + camera->orientation.z += velocity; + } break; + case CAMERA_MOVEMENT_YAW_RIGHT: { + camera->orientation.z -= velocity; + } break; + case CAMERA_MOVEMENT_ZOOM_IN: { + camera->zoom += velocity; + } break; + case CAMERA_MOVEMENT_ZOOM_OUT: { + camera->zoom -= velocity; + } break; + default: {} + } + } } } +inline +void camera_projection_matrix_lh(const Camera* __restrict camera, float* __restrict projection) +{ + mat4_identity_sparse(projection); + mat4_perspective_sparse_lh( + projection, + camera->fov, + camera->aspect, + camera->znear, + camera->zfar + ); +} + +inline +void camera_projection_matrix_rh(const Camera* __restrict camera, float* __restrict projection) +{ + mat4_identity_sparse(projection); + mat4_perspective_sparse_rh( + projection, + camera->fov, + camera->aspect, + camera->znear, + camera->zfar + ); +} + +// This is usually not used, since it is included in the view matrix +// expects the identity matrix +inline +void camera_translation_matrix_sparse(const Camera* __restrict camera, float* translation) +{ + translation[3] = camera->location.x; + translation[7] = camera->location.y; + translation[11] = camera->location.z; +} + +// @performance this function seems worth while to fully convert to simd +// even if we are not really looping anything we do have some repetetive operations (rotate, dot) +/* +void +camera_view_matrix_sparse(const Camera* __restrict camera, float* __restrict view) +{ + // @performance orientation gets converted to a quat every time, pull this out + + v3_f32 up = {0.0f, 1.0f, 0.0f}; + quaternion_rotate_active(&up, camera->orientation.pitch, camera->orientation.yaw, camera->orientation.roll); + + v3_f32 right = {1.0f, 0.0f, 0.0f}; + quaternion_rotate_active(&up, camera->orientation.pitch, camera->orientation.yaw, camera->orientation.roll); + + v3_f32 forward = {0.0f, 0.0f, 1.0f}; + quaternion_rotate_active(&forward, camera->orientation.pitch, camera->orientation.yaw, camera->orientation.roll); + + view[0] = right.x; + view[1] = right.y; + view[2] = right.z; + + view[4] = up.x; + view[5] = up.y; + view[6] = up.z; + + view[8] = -forward.x; + view[9] = -forward.y; + view[10] = -forward.z; + + // Set the translation part + v3_f32 right_v3 = {right.x, right.y, right.z}; + view[3] = -v3_dot(&right_v3, &camera->location); + + v3_f32 up_v3 = {up.x, up.y, up.z}; + view[7] = -v3_dot(&up_v3, &camera->location); + + v3_f32 forward_v3 = {forward.x, forward.y, forward.z}; + view[11] = v3_dot(&forward_v3, &camera->location); + + // Last element of matrix (homogeneous coordinate) + view[15] = 1.0f; +} +*/ + +// https://github.com/g-truc/glm/blob/33b4a621a697a305bc3a7610d290677b96beb181/glm/ext/matrix_transform.inl +// https://learnopengl.com/code_viewer_gh.php?code=includes/learnopengl/camera.h +void +camera_view_matrix_sparse_lh(const Camera* __restrict camera, float* __restrict view) +{ + // We are skipping some things because some things either get neutralized + // (e.g. position - (position + front), other values are already normalized (e.g. front) + v3_f32 f = { camera->front.x, camera->front.y, camera->front.z }; + + v3_f32 s; + vec3_cross(&s, &camera->up, &f); + vec3_normalize_f32(&s); + + v3_f32 u; + vec3_cross(&u, &f, &s); + + view[0] = s.x; + view[1] = s.y; + view[2] = s.z; + view[3] = 0.0f; + view[4] = u.x; + view[5] = u.y; + view[6] = u.z; + view[7] = 0.0f; + view[8] = f.x; + view[9] = f.y; + view[10] = f.z; + view[11] = 0; + view[12] = -vec3_dot(&s, &camera->location); + view[13] = -vec3_dot(&u, &camera->location); + view[14] = -vec3_dot(&f, &camera->location); + view[15] = 1.0f; +} + +void +camera_view_matrix_sparse_rh(const Camera* __restrict camera, float* __restrict view) +{ + // We are skipping some things because some things either get neutralized + // (e.g. position - (position + front), other values are already normalized (e.g. front) + v3_f32 f = { -camera->front.x, -camera->front.y, -camera->front.z }; + + v3_f32 s; + vec3_cross(&s, &f, &camera->up); + vec3_normalize_f32(&s); + + v3_f32 u; + vec3_cross(&u, &s, &f); + + view[0] = s.x; + view[1] = s.y; + view[2] = s.z; + view[3] = 0.0f; + view[4] = u.x; + view[5] = u.y; + view[6] = u.z; + view[7] = 0.0f; + view[8] = f.x; + view[9] = f.y; + view[10] = f.z; + view[11] = 0; + view[12] = -vec3_dot(&s, &camera->location); + view[13] = -vec3_dot(&u, &camera->location); + view[14] = vec3_dot(&f, &camera->location); + view[15] = 1.0f; +} + +void +camera_view_right_handed2(float* view) +{ + // Translation part + view[12] = view[3]; + view[13] = view[7]; + view[14] = view[11]; + view[15] = 1.0f; // @todo could be removed + + float temp; + temp = view[1]; + view[1] = view[4]; + view[4] = temp; + + temp = view[2]; + view[2] = view[8]; + view[8] = -temp; + + view[3] = 0.0f; // @todo could be removed + + temp = view[6]; + view[6] = view[9]; + view[9] = -temp; + + view[7] = 0.0f; // @todo could be removed + view[10] = -view[10]; + view[11] = 0.0f; // @todo could be removed +} + #endif \ No newline at end of file diff --git a/camera/CameraMovement.h b/camera/CameraMovement.h index 6798af1..65885e1 100644 --- a/camera/CameraMovement.h +++ b/camera/CameraMovement.h @@ -10,6 +10,8 @@ #define TOS_CAMERA_MOVEMENT_H enum CameraMovement { + CAMERA_MOVEMENT_NONE, + CAMERA_MOVEMENT_FORWARD, CAMERA_MOVEMENT_BACK, @@ -19,6 +21,8 @@ enum CameraMovement { CAMERA_MOVEMENT_UP, CAMERA_MOVEMENT_DOWN, + CAMERA_MOVEMENT_FREE_ORIENTATION, + CAMERA_MOVEMENT_PITCH_UP, CAMERA_MOVEMENT_PITCH_DOWN, diff --git a/gpuapi/opengl/Opengl.h b/gpuapi/opengl/Opengl.h index 2691ff4..d798c5c 100644 --- a/gpuapi/opengl/Opengl.h +++ b/gpuapi/opengl/Opengl.h @@ -727,6 +727,9 @@ typedef char GLchar; typedef ptrdiff_t GLsizeiptr; typedef ptrdiff_t GLintptr; +// Some apis require a different sign for various operations (left/right) +#define GPU_API_SIGN -1 + #if _WIN32 #include "OpenglWin32.h" #else diff --git a/gpuapi/opengl/OpenglUtils.h b/gpuapi/opengl/OpenglUtils.h index 00ec9fd..ca2d0d7 100644 --- a/gpuapi/opengl/OpenglUtils.h +++ b/gpuapi/opengl/OpenglUtils.h @@ -14,6 +14,7 @@ #include "../../utils/TestUtils.h" #include "../../models/Attrib.h" #include "../../object/Texture.h" +#include "../../utils/StringUtils.h" #include "../RenderUtils.h" #include "Opengl.h" @@ -24,64 +25,53 @@ #include "../../platform/win32/Window.h" #endif -/* -struct Window { - bool is_fullscreen; - int32 width; - int32 height; - char name[32]; +inline +void change_viewport(Window* w, int offset_x = 0, int offset_y = 0) +{ + glViewport(offset_x, offset_y, w->width, w->height); +} - int32 x; - int32 y; +inline +void vsync_set(bool on) +{ + wglSwapIntervalEXT((int) on); +} - GLFWwindow* hwnd_lib; +inline +void wireframe_mode(bool on) +{ + if (on) { + glPolygonMode(GL_FRONT_AND_BACK, GL_LINE); + } else { + glPolygonMode(GL_FRONT_AND_BACK, GL_FILL); + } +} - #ifdef _WIN32 - HWND hwnd; - #endif +struct OpenglInfo { + char* renderer; + int major; + int minor; }; -*/ -/* -inline -void window_create(Window* window, void*) +void opengl_info(OpenglInfo* info) { - //GLFWmonitor *monitor = glfwGetPrimaryMonitor(); - window->hwnd_lib = glfwCreateWindow( - window->width, - window->height, - window->name, - NULL, - NULL - ); + info->renderer = (char *) glGetString(GL_RENDERER); + info->major = 1; + info->minor = 0; - ASSERT_SIMPLE(window->hwnd_lib); + char* version = (char *) glGetString(GL_VERSION); - //glfwSetInputMode(window->hwnd_lib, GLFW_CURSOR, GLFW_CURSOR_DISABLED); + for (char *at = version; *at; ++at) { + if (*at == '.') { + info->major = str_to_int(version); - glfwMakeContextCurrent(window->hwnd_lib); - glfwWindowHint(GLFW_VISIBLE, GLFW_FALSE); - - #if GLFW_EXPOSE_NATIVE_WIN32 - window->hwnd = glfwGetWin32Window(window->hwnd_lib); - #endif + ++at; + info->minor = str_to_int(at); + break; + } + } } -inline -void window_open(Window* window) -{ - glfwMakeContextCurrent(window->hwnd_lib); - glViewport(window->x, window->y, window->width, window->height); - glfwWindowHint(GLFW_VISIBLE, GLFW_FALSE); -} - -inline -void window_close(Window* window) -{ - glfwWindowShouldClose(window->hwnd_lib); -} -*/ - inline uint32 get_texture_data_type(uint32 texture_data_type) { diff --git a/gpuapi/opengl/OpenglWin32.h b/gpuapi/opengl/OpenglWin32.h index edb2883..e71730f 100644 --- a/gpuapi/opengl/OpenglWin32.h +++ b/gpuapi/opengl/OpenglWin32.h @@ -1184,8 +1184,10 @@ typedef HGLRC WINAPI wgl_create_context_attribs_arb(HDC hDC, HGLRC hShareContext typedef BOOL WINAPI wgl_get_pixel_format_attrib_iv_arb(HDC hdc, int iPixelFormat, int iLayerPlane, UINT nAttributes, const int *piAttributes, int *piValues); typedef BOOL WINAPI wgl_get_pixel_format_attrib_fv_arb(HDC hdc, int iPixelFormat, int iLayerPlane, UINT nAttributes, const int *piAttributes, FLOAT *pfValues); typedef BOOL WINAPI wgl_choose_pixel_format_arb(HDC hdc, const int *piAttribIList, const FLOAT *pfAttribFList, UINT nMaxFormats, int *piFormats, UINT *nNumFormats); +typedef BOOL WINAPI wgl_swap_interval_ext(int interval); typedef const char * WINAPI wgl_get_extensions_string_ext(void); +// @question consider to make all these functions global struct OpenGL { type_glTexImage2DMultisample* glTexImage2DMultisample; type_glBindFramebuffer* glBindFramebuffer; @@ -1249,9 +1251,12 @@ struct OpenGL { wgl_choose_pixel_format_arb* wglChoosePixelFormatARB; wgl_create_context_attribs_arb* wglCreateContextAttribsARB; + wgl_swap_interval_ext* wglSwapIntervalEXT; wgl_get_extensions_string_ext* wglGetExtensionsStringEXT; }; +static wgl_swap_interval_ext* wglSwapIntervalEXT; + void set_pixel_format(HDC hdc, OpenGL* gl) { int suggested_pixel_format_idx = 0; @@ -1368,6 +1373,8 @@ void opengl_init(Window* window, OpenGL* gl) gl->wglChoosePixelFormatARB = (wgl_choose_pixel_format_arb *) wglGetProcAddress("wglChoosePixelFormatARB"); gl->wglCreateContextAttribsARB = (wgl_create_context_attribs_arb *) wglGetProcAddress("wglCreateContextAttribsARB"); + gl->wglSwapIntervalEXT = (wgl_swap_interval_ext *) wglGetProcAddress("wglSwapIntervalEXT"); + wglSwapIntervalEXT = gl->wglSwapIntervalEXT; gl->wglGetExtensionsStringEXT = (wgl_get_extensions_string_ext *) wglGetProcAddress("wglGetExtensionsStringEXT"); set_pixel_format(window->hdc, gl); @@ -1445,6 +1452,10 @@ void opengl_init(Window* window, OpenGL* gl) gl->glDrawArraysInstanced = (type_glDrawArraysInstanced *) wglGetProcAddress("glDrawArraysInstanced"); gl->glDrawElementsInstanced = (type_glDrawElementsInstanced *) wglGetProcAddress("glDrawElementsInstanced"); + if (gl->wglSwapIntervalEXT) { + gl->wglSwapIntervalEXT(0); + } + // @todo now do: OpenGLInit } diff --git a/input/Input.h b/input/Input.h index d41b025..1c34c40 100644 --- a/input/Input.h +++ b/input/Input.h @@ -9,25 +9,88 @@ #ifndef TOS_INPUT_H #define TOS_INPUT_H +// @question Consider to change mouse to secondary input device and keyboard to primary input device and also rename the functions etc. + +// How many concurrent mouse/secondary input device presses to we recognize +#define MAX_MOUSE_PRESSES 3 + +// How many concurrent primary key/button presses can be handled? #define MAX_KEY_PRESSES 5 + +// How many keys/buttons do we support for the primary input device +#define MAX_KEYBOARD_KEYS 255 + +// How many mouse/secondary input device keys/buttons do we support +#define MAX_MOUSE_KEYS 5 + #define MIN_INPUT_DEVICES 2 +// How often can a key be asigned to a different hotkey +#define MAX_KEY_TO_HOTKEY 5 + +// How many buttons together are allowed to form a hotkey +#define MAX_HOTKEY_COMBINATION 3 + +// These values are used as bit flags to hint if a "key" is a keyboard/primary or mouse/secondary input +// When adding a keybind the "key" can only be uint8 but we expand it to an int and set the first bit accordingly +#define INPUT_KEYBOARD_PREFIX 80000000 +#define INPUT_MOUSE_PREFIX 0 + #define INPUT_TYPE_MOUSE_KEYBOARD 0x01 #define INPUT_TYPE_OTHER 0x03 #define MIN_CONTROLLER_DEVICES 4 #include "../stdlib/Types.h" +#include "../utils/BitUtils.h" #ifdef _WIN32 #include #endif +// @todo I'm not sure if I like the general input handling +// Having separate keyboard_down and mouse_down etc. is a little bit weird in the functions below + +struct InputMapping { + // A key/button can be bound to up to 5 different hotkeys + // This is used to check if a key/button has a hotkey association + uint8 keys[MAX_KEYBOARD_KEYS + MAX_MOUSE_KEYS][MAX_KEY_TO_HOTKEY]; + + // A hotkey can be bound to a combination of up to 3 key/button presses + uint8 hotkey_count; + uint8* hotkeys; +}; + +// @question Maybe we should also add a third key_down array for controllers and some special controller functions here to just handle everything in one struct +// Or think about completely splitting all states (mouse, keyboard, other) struct InputState { + // State of the hotkeys, resulting from the device input + // @question maybe create a separate define and make it a little bit larger? + uint8 state_hotkeys[MAX_KEY_PRESSES]; + + uint8 keys_down[MAX_KEY_PRESSES]; + + // @question Why do we even need this? shouldn't we only care about the current keys down? + uint8 keys_up[MAX_KEY_PRESSES]; + + uint32 mouse_down; + + int32 dx; + int32 dy; + + uint32 x; + uint32 y; + + int16 wheel_delta = 0; + int16 hwheel_delta = 0; + + uint64 keys_down_time[MAX_MOUSE_PRESSES + MAX_KEY_PRESSES]; +}; + +struct Input { // Device bool is_connected = false; byte type = INPUT_TYPE_OTHER; - double time; #ifdef _WIN32 // @todo maybe replace with id?! @@ -36,46 +99,20 @@ struct InputState { HANDLE handle_mouse; #endif - // After handling the keyboard state change the game loop should set this to false - bool state_change_keyboard = false; - - // We only consider up to 4 pressed keys - // Depending on the keyboard you may only be able to detect a limited amount of key presses anyway - int up_index; - uint8 keys_down_old[MAX_KEY_PRESSES]; - - int down_index; - uint8 keys_down[MAX_KEY_PRESSES]; - - // Mouse - // After handling the mouse state change the game loop should set this to false + bool state_change_button = false; bool state_change_mouse = false; + bool state_change_mouse_button = true; - uint32 x; - uint32 y; + bool mouse_movement; - uint32 x_last; - uint32 y_last; + InputState state; + InputMapping input_mapping; - // https://usb.org/sites/default/files/hid1_11.pdf Page 71 or 61 = 18 - // the bitfield represents which button is pressed - uint32 mouse_down_old; - uint32 mouse_down; - - int16 wheel_delta = 0; - uint32 raw_button = 0; + // @todo we probably don't need this + InputState state_old; }; -void input_transition(InputState* state) -{ - // Mouse - state->x_last = state->x; - state->y_last = state->y; - - state->state_change_mouse = false; -} - -struct ControllerState { +struct ControllerInput { uint32 id = 0; bool is_connected = false; @@ -104,4 +141,263 @@ struct ControllerState { bool stickr_press = false; }; +inline +void mouse_backup_state(Input* input) +{ + input->state_old.mouse_down = input->state.mouse_down; + + input->state_old.x = input->state.x; + input->state_old.y = input->state.y; + + input->state_old.wheel_delta = input->state.wheel_delta; + input->state_old.hwheel_delta = input->state.wheel_delta; +} + +inline +void keyboard_clean_state(InputState* state) +{ + memset(state->keys_down, 0, MAX_KEY_PRESSES * sizeof(uint8)); + memset(state->keys_up, 0, MAX_KEY_PRESSES * sizeof(uint8)); + memset(state->keys_down_time, 0, (MAX_MOUSE_PRESSES + MAX_KEY_PRESSES) * sizeof(uint64)); +} + +inline +void keyboard_backup_state(Input* input) +{ + memcpy(input->state_old.keys_down, input->state.keys_down, MAX_KEY_PRESSES * sizeof(uint8)); + memcpy(input->state_old.keys_up, input->state.keys_up, MAX_KEY_PRESSES * sizeof(uint8)); +} + +inline +bool keyboard_is_pressed(const InputState* state, byte key) +{ + return state->keys_down[0] == key + || state->keys_down[1] == key + || state->keys_down[2] == key + || state->keys_down[3] == key + || state->keys_down[4] == key; +} + +inline +bool keyboard_is_released(const InputState* state, byte key) +{ + return state->keys_up[0] == key + || state->keys_up[1] == key + || state->keys_up[2] == key + || state->keys_up[3] == key + || state->keys_up[4] == key; +} + +inline +bool keyboard_are_pressed( + const InputState* state, + byte key0, byte key1 = 0, byte key2 = 0, byte key3 = 0, byte key4 = 0 +) { + return (key0 != 0 && keyboard_is_pressed(state, key0)) + && (key1 == 0 || keyboard_is_pressed(state, key1)) + && (key2 == 0 || keyboard_is_pressed(state, key2)) + && (key3 == 0 || keyboard_is_pressed(state, key3)) + && (key4 == 0 || keyboard_is_pressed(state, key4)); +} + +// We are binding hotkeys bi-directional +void +input_add_hotkey( + InputMapping* mapping, uint8 hotkey, + uint32 key0, uint32 key1 = 0, uint32 key2 = 0 +) +{ + int count = 0; + + int key0_offset = ((bool) (key0 & INPUT_KEYBOARD_PREFIX)) * MAX_MOUSE_KEYS; + int key1_offset = ((bool) (key1 & INPUT_KEYBOARD_PREFIX)) * MAX_MOUSE_KEYS; + int key2_offset = ((bool) (key2 & INPUT_KEYBOARD_PREFIX)) * MAX_MOUSE_KEYS; + + key0 = key0 & ~INPUT_KEYBOARD_PREFIX; + key1 = key1 & ~INPUT_KEYBOARD_PREFIX; + key2 = key2 & ~INPUT_KEYBOARD_PREFIX; + + // Define required keys for hotkey + if (key0 != 0) { + // Note: -1 since the hotkeys MUST start at 1 (0 is a special value for empty) + mapping->hotkeys[(hotkey - 1) * MAX_HOTKEY_COMBINATION] = (uint8) (key0 + key0_offset); + ++count; + } + + if (key1 != 0) { + // Note: -1 since the hotkeys MUST start at 1 (0 is a special value for empty) + mapping->hotkeys[(hotkey - 1) * MAX_HOTKEY_COMBINATION + count] = (uint8) (key1 + key1_offset); + ++count; + } + + if (key2 != 0) { + // Note: -1 since the hotkeys MUST start at 1 (0 is a special value for empty) + mapping->hotkeys[(hotkey - 1) * MAX_HOTKEY_COMBINATION + count] = (uint8) (key2 + key2_offset); + } + + // Bind key to hotkey + for (int i = 0; i < MAX_KEY_TO_HOTKEY; ++i) { + if (key0 == 0 && key1 == 0 && key2 == 0) { + break; + } + + if (key0 != 0 && mapping->keys[key0 + key0_offset - 1][i] == 0) { + mapping->keys[key0 + key0_offset - 1][i] = hotkey; + key0 = 0; // prevent adding same key again + } + + if (key1 != 0 && mapping->keys[key1 + key1_offset - 1][i] == 0) { + mapping->keys[key1 + key1_offset - 1][i] = hotkey; + key1 = 0; // prevent adding same key again + } + + if (key2 != 0 && mapping->keys[key2 + key2_offset - 1][i] == 0) { + mapping->keys[key2 + key2_offset - 1][i] = hotkey; + key2 = 0; // prevent adding same key again + } + } +} + +inline +bool hotkey_is_active(const InputState* state, uint8 hotkey) +{ + return state->state_hotkeys[0] == hotkey + || state->state_hotkeys[1] == hotkey + || state->state_hotkeys[2] == hotkey + || state->state_hotkeys[3] == hotkey + || state->state_hotkeys[4] == hotkey; +} + +// similar to hotkey_is_active but instead of just performing a lookup in the input_hotkey_state created results +// this is actively checking the current input state (not the hotkey state) +// @performance This seems like a much better simpler solution no? +// However, it is probably a slower solution after calling this function many times? +// Remember, we would call this function for almost every possible hotkey (depending on context) per frame +inline +bool hotkey_is_pressed(const InputState* __restrict state, const InputMapping* __restrict mapping, uint8 hotkey) +{ + uint8 key0 = mapping->hotkeys[(hotkey - 1) * MAX_HOTKEY_COMBINATION]; + uint8 key1 = mapping->hotkeys[(hotkey - 1) * MAX_HOTKEY_COMBINATION + 1]; + uint8 key2 = mapping->hotkeys[(hotkey - 1) * MAX_HOTKEY_COMBINATION + 2]; + + bool is_pressed = false; + if (key0 > MAX_MOUSE_KEYS) { + key0 -= MAX_MOUSE_KEYS; + is_pressed = keyboard_is_pressed(state, key0); + } else if (key0 > 0) { + is_pressed = IS_BIT_SET(state->mouse_down, key0 - 1); + } + + if (!is_pressed || key1 == 0) { + return is_pressed; + } + + if (key1 > MAX_MOUSE_KEYS) { + key1 -= MAX_MOUSE_KEYS; + is_pressed &= keyboard_is_pressed(state, key1); + } else if (key1 > 0) { + is_pressed &= IS_BIT_SET(state->mouse_down, key1 - 1); + } + + if (!is_pressed || key2 == 0) { + return is_pressed; + } + + if (key2 > MAX_MOUSE_KEYS) { + key2 -= MAX_MOUSE_KEYS; + is_pressed &= keyboard_is_pressed(state, key2); + } else if (key2 > 0) { + is_pressed &= IS_BIT_SET(state->mouse_down, key2 - 1); + } + + return is_pressed; +} + +void +input_hotkey_state(InputState* __restrict state, const InputMapping* mapping) +{ + // @bug isn't there a bug, MAX_KEY_PRESSES is the keyboard limit, what about additional mouse inputs? + + memset(state->state_hotkeys, 0, sizeof(uint8) * MAX_KEY_PRESSES); + + int i = 0; + + // @performance It would be nice if we could skip this loop by checking keyboard_changed similar to the mouse loop further down + // The problem is that this loop checks both mouse and keyboard + + // Check every key down state + for (int down_state = 0; down_state < MAX_KEY_PRESSES; ++down_state) { + if (state->keys_down[down_state] == 0) { + // no key defined for this down state + continue; + } + + // Is a key defined for this state AND is at least one hotkey defined for this key + // If no hotkey is defined we don't care + // Careful, remember MAX_MOUSE_KEYS offset + const uint8* hotkeys_for_key = mapping->keys[state->keys_down[down_state] + MAX_MOUSE_KEYS - 1]; + if (hotkeys_for_key[0] == 0) { + // no possible hotkey associated with this key + continue; + } + + // Check every possible hotkey + // Since multiple input devices have their own button/key indices whe have to do this weird range handling + for (int possible_hotkey_idx = 0; possible_hotkey_idx < MAX_KEY_TO_HOTKEY; ++possible_hotkey_idx) { + // We only support a slimited amount of active hotkeys + if (i >= MAX_KEY_PRESSES) { + return; + } + + bool is_pressed = hotkey_is_pressed(state, mapping, hotkeys_for_key[possible_hotkey_idx]); + + // store active hotkey, if it is not already active + if (is_pressed && !hotkey_is_active(state, hotkeys_for_key[possible_hotkey_idx])) { + state->state_hotkeys[i] = hotkeys_for_key[possible_hotkey_idx]; + ++i; + } + } + } + + // @performance we could also check if the mouse state even changed + if (state->mouse_down == 0 || i >= MAX_KEY_PRESSES) { + return; + } + + // We now also need to check if there are hotkeys for the mouse buttons + // Some are already handled in the previous section, but some might not be handled, since they are mouse only + // But this also means, that we ONLY have to search for mouse only hotkeys. It's impossible to find NEW matches with keyboard keys. + for (int down_state = 0; down_state < MAX_MOUSE_KEYS; ++down_state) { + if (!IS_BIT_SET(state->mouse_down, down_state)) { + continue; + } + + const uint8* hotkeys_for_key = mapping->keys[down_state]; + if (hotkeys_for_key[0] == 0) { + // no possible hotkey associated with this key + continue; + } + + for (int possible_hotkey_idx = 0; possible_hotkey_idx < MAX_KEY_TO_HOTKEY; ++possible_hotkey_idx) { + // We only support a slimited amount of active hotkeys + if (i >= MAX_KEY_PRESSES) { + return; + } + + bool is_pressed = hotkey_is_pressed(state, mapping, hotkeys_for_key[possible_hotkey_idx]); + + // store active hotkey, if it is not already active + if (is_pressed && !hotkey_is_active(state, hotkeys_for_key[possible_hotkey_idx])) { + state->state_hotkeys[i] = hotkeys_for_key[possible_hotkey_idx]; + ++i; + } + } + } + + // @bug how to handle long press vs click + // @bug how to handle priority? e.g. there might be a hotkey for 1 and one for alt+1 + // in this case only the hotkey for alt+1 should be triggered + // @bug how to handle other conditions besides buttons pressed together? some hotkeys are only available in certain situations +} + #endif \ No newline at end of file diff --git a/math/matrix/MatrixFloat32.h b/math/matrix/MatrixFloat32.h index c1d3eeb..75f014c 100644 --- a/math/matrix/MatrixFloat32.h +++ b/math/matrix/MatrixFloat32.h @@ -15,6 +15,261 @@ #include "../../utils/TestUtils.h" #include +// @todo Implement intrinsic versions! + +void vec2_normalize_f32(float* __restrict x, float* __restrict y) +{ + float d = sqrtf((*x) * (*x) + (*y) * (*y)); + + *x /= d; + *y /= d; +} + +inline +void vec2_add(v2_f32* __restrict vec, const v2_f32* a, const v2_f32* b) { + vec->x = a->x + b->x; + vec->y = a->y + b->y; +} + +inline +void vec2_add(v2_f32* __restrict vec, const v2_f32* b) { + vec->x += b->x; + vec->y += b->y; +} + +inline +void vec2_sub(v2_f32* __restrict vec, const v2_f32* a, const v2_f32* b) { + vec->x = a->x - b->x; + vec->y = a->y - b->y; +} + +inline +void vec2_sub(v2_f32* __restrict vec, const v2_f32* b) { + vec->x -= b->x; + vec->y -= b->y; +} + +inline +void vec2_mul(v2_f32* vec, const v2_f32* a, float s) { + vec->x = a->x * s; + vec->y = a->y * s; +} + +inline +void vec2_mul(v2_f32* vec, float s) { + vec->x *= s; + vec->y *= s; +} + +inline +float vec2_mul(const v2_f32* a, const v2_f32* b) { + return a->x * b->x + a->y * b->y; +} + +inline +void vec2_mul(v2_f32* __restrict vec, const v2_f32* a, const v2_f32* b) { + vec->x = a->x * b->x; + vec->y = a->y * b->y; +} + +inline +void vec2_mul(v2_f32* __restrict vec, const v2_f32* b) { + vec->x *= b->x; + vec->y *= b->y; +} + +inline +float vec2_cross(const v2_f32* a, const v2_f32* b) { + return a->x * b->y - a->y * b->x; +} + +inline +float vec2_dot(const v2_f32* a, const v2_f32* b) { + return a->x * b->x + a->y * b->y; +} + +void vec3_normalize_f32(float* __restrict x, float* __restrict y, float* __restrict z) +{ + float d = sqrtf((*x) * (*x) + (*y) * (*y) + (*z) * (*z)); + + *x /= d; + *y /= d; + *z /= d; +} + +void vec3_normalize_f32(v3_f32* vec) +{ + float d = sqrtf(vec->x * vec->x + vec->y * vec->y + vec->z * vec->z); + + vec->x /= d; + vec->y /= d; + vec->z /= d; +} + +inline +void vec3_add(v3_f32* __restrict vec, const v3_f32* a, const v3_f32* b) { + vec->x = a->x + b->x; + vec->y = a->y + b->y; + vec->z = a->z + b->z; +} + +inline +void vec3_add(v3_f32* __restrict vec, const v3_f32* b) { + vec->x += b->x; + vec->y += b->y; + vec->z += b->z; +} + +inline +void vec3_sub(v3_f32* __restrict vec, const v3_f32* a, const v3_f32* b) { + vec->x = a->x - b->x; + vec->y = a->y - b->y; + vec->z = a->z - b->z; +} + +inline +void vec3_sub(v3_f32* __restrict vec, const v3_f32* b) { + vec->x -= b->x; + vec->y -= b->y; + vec->z -= b->z; +} + +inline +void vec3_mul(v3_f32* vec, const v3_f32* a, float s) { + vec->x = a->x * s; + vec->y = a->y * s; + vec->z = a->z * s; +} + +inline +void vec3_mul(v3_f32* vec, float s) { + vec->x *= s; + vec->y *= s; + vec->z *= s; +} + +inline +float vec3_mul(const v3_f32* a, const v3_f32* b) { + return a->x * b->x + a->y * b->y + a->z * b->z; +} + +inline +void vec3_mul(v3_f32* __restrict vec, const v3_f32* a, const v3_f32* b) { + vec->x = a->x * b->x; + vec->y = a->y * b->y; + vec->z = a->z * b->z; +} + +inline +void vec3_mul(v3_f32* __restrict vec, const v3_f32* b) { + vec->x *= b->x; + vec->y *= b->y; + vec->z *= b->z; +} + +void vec3_cross(v3_f32* __restrict vec, const v3_f32* a, const v3_f32* b) { + vec->x = a->y * b->z - a->z * b->y; + vec->y = a->z * b->x - a->x * b->z; + vec->z = a->x * b->y - a->y * b->x; +} + +float vec3_dot(const v3_f32* a, const v3_f32* b) { + return a->x * b->x + a->y * b->y + a->z * b->z; +} + +void vec4_normalize_f32(float* __restrict x, float* __restrict y, float* __restrict z, float* __restrict w) +{ + float d = sqrtf((*x) * (*x) + (*y) * (*y) + (*z) * (*z) + (*w) * (*w)); + + *x /= d; + *y /= d; + *z /= d; + *w /= d; +} + +inline +void vec4_add(v4_f32* __restrict vec, const v4_f32* a, const v4_f32* b) { + vec->x = a->x + b->x; + vec->y = a->y + b->y; + vec->z = a->z + b->z; + vec->w = a->w + b->w; +} + +inline +void vec4_add(v4_f32* __restrict vec, const v4_f32* b) { + vec->x += b->x; + vec->y += b->y; + vec->z += b->z; + vec->w += b->w; +} + +inline +void vec4_sub(v4_f32* __restrict vec, const v4_f32* a, const v4_f32* b) { + vec->x = a->x - b->x; + vec->y = a->y - b->y; + vec->z = a->z - b->z; + vec->w = a->w - b->w; +} + +inline +void vec4_sub(v4_f32* __restrict vec, const v4_f32* b) { + vec->x -= b->x; + vec->y -= b->y; + vec->z -= b->z; + vec->w -= b->w; +} + +inline +void vec4_mul(v4_f32* vec, const v4_f32* a, float s) { + vec->x = a->x * s; + vec->y = a->y * s; + vec->z = a->z * s; + vec->w = a->w * s; +} + +inline +void vec4_mul(v4_f32* vec, float s) { + vec->x *= s; + vec->y *= s; + vec->z *= s; + vec->w *= s; +} + +inline +float vec4_mul(const v4_f32* a, const v4_f32* b) { + return a->x * b->x + a->y * b->y + a->z * b->z + a->w * b->w; +} + +inline +void vec4_mul(v4_f32* __restrict vec, const v4_f32* a, const v4_f32* b) { + vec->x = a->x * b->x; + vec->y = a->y * b->y; + vec->z = a->z * b->z; + vec->w = a->w * b->w; +} + +inline +void vec4_mul(v4_f32* __restrict vec, const v4_f32* b) { + vec->x *= b->x; + vec->y *= b->y; + vec->z *= b->z; + vec->w *= b->w; +} + +inline +float vec4_dot(const v4_f32* a, const v4_f32* b) { + return a->x * b->x + a->y * b->y + a->z * b->z + a->w * b->w; +} + +inline +void vec4_cross(v4_f32* __restrict vec, const v4_f32* a, const v4_f32* b, const v4_f32* c) { + vec->x = a->y * (b->z * c->w - b->w * c->z) - a->z * (b->y * c->w - b->w * c->y) + a->w * (b->y * c->z - b->z * c->y); + vec->y = -(a->x * (b->z * c->w - b->w * c->z) - a->z * (b->x * c->w - b->w * c->x) + a->w * (b->x * c->z - b->z * c->x)); + vec->z = a->x * (b->y * c->w - b->w * c->y) - a->y * (b->x * c->w - b->w * c->x) + a->w * (b->x * c->y - b->y * c->x); + vec->w = -(a->x * (b->y * c->z - b->z * c->y) - a->y * (b->x * c->z - b->z * c->x) + a->z * (b->x * c->y - b->y * c->x)); +} + +inline void mat3_identity(float* matrix) { matrix[0] = 1.0f; matrix[1] = 0.0f; matrix[2] = 0.0f; @@ -22,11 +277,13 @@ void mat3_identity(float* matrix) matrix[6] = 0.0f; matrix[7] = 0.0f; matrix[8] = 1.0f; } +inline void mat3_identity_sparse(float* matrix) { matrix[0] = 1.0f; matrix[4] = 1.0f; matrix[8] = 1.0f; } +inline void mat3_identity(__m128* matrix) { matrix[0] = _mm_set_ps(1.0f, 0.0f, 0.0f, 0.0f); @@ -34,6 +291,7 @@ void mat3_identity(__m128* matrix) matrix[2] = _mm_set_ps(0.0f, 0.0f, 1.0f, 0.0f); } +inline void mat4_identity(float* matrix) { matrix[0] = 1.0f; matrix[1] = 0.0f; matrix[2] = 0.0f; matrix[3] = 0.0f; @@ -42,11 +300,13 @@ void mat4_identity(float* matrix) matrix[12] = 0.0f; matrix[13] = 0.0f; matrix[14] = 0.0f; matrix[15] = 1.0f; } +inline void mat4_identity_sparse(float* matrix) { matrix[0] = 1.0f; matrix[5] = 1.0f; matrix[10] = 1.0f; matrix[15] = 1.0f; } +inline void mat4_identity(__m128* matrix) { matrix[0] = _mm_set_ps(1.0f, 0.0f, 0.0f, 0.0f); @@ -59,7 +319,7 @@ void mat4_identity(__m128* matrix) // https://en.wikipedia.org/wiki/Rodrigues%27_rotation_formula void mat4_rotation(float* matrix, float x, float y, float z, float angle) { - ASSERT_SIMPLE(OMS_ABS(x * x + y * y + z * z - 1.0f) < 0.01) + ASSERT_SIMPLE(OMS_ABS(x * x + y * y + z * z - 1.0f) < 0.01); // @todo replace with quaternions float s = sinf(angle); @@ -129,23 +389,16 @@ void mat4_rotation(float* matrix, float pitch, float yaw, float roll) matrix[15] = 1.0f; } -void mat3vec3_mult(const float* matrix, const float* vector, float* result) +inline +void mat3vec3_mult(const float* __restrict matrix, const float* __restrict vector, float* __restrict result) { result[0] = matrix[0] * vector[0] + matrix[1] * vector[1] + matrix[2] * vector[2]; result[1] = matrix[3] * vector[0] + matrix[4] * vector[1] + matrix[5] * vector[2]; result[2] = matrix[6] * vector[0] + matrix[7] * vector[1] + matrix[8] * vector[2]; - - /* - for (int i = 0; i < 3; ++i) { - result[i] = matrix[i * 3 + 0] * vector[0] - + matrix[i * 3 + 1] * vector[1] - + matrix[i * 3 + 2] * vector[2]; - } - */ } // @question could simple mul add sse be faster? -void mat3vec3_mult_sse(const float* matrix, const float* vector, float* result) +void mat3vec3_mult_sse(const float* __restrict matrix, const float* __restrict vector, float* __restrict result) { __m128 vec = _mm_loadu_ps(vector); vec = _mm_insert_ps(vec, _mm_setzero_ps(), 0x30); // vec[3] = 0 @@ -161,7 +414,7 @@ void mat3vec3_mult_sse(const float* matrix, const float* vector, float* result) } // @question could simple mul add sse be faster? -void mat3vec3_mult_sse(const __m128* matrix, const __m128* vector, float* result) +void mat3vec3_mult_sse(const __m128* __restrict matrix, const __m128* __restrict vector, float* __restrict result) { for (int i = 0; i < 3; ++i) { __m128 dot = _mm_dp_ps(matrix[i], *vector, 0xF1); @@ -171,14 +424,15 @@ void mat3vec3_mult_sse(const __m128* matrix, const __m128* vector, float* result } // @question could simple mul add sse be faster? -void mat3vec3_mult_sse(const __m128* matrix, const __m128* vector, __m128* result) +void mat3vec3_mult_sse(const __m128* __restrict matrix, const __m128* __restrict vector, __m128* __restrict result) { for (int i = 0; i < 4; ++i) { result[i] = _mm_dp_ps(matrix[i], *vector, 0xF1); } } -void mat4vec4_mult(const float* matrix, const float* vector, float* result) +inline +void mat4vec4_mult(const float* __restrict matrix, const float* __restrict vector, float* __restrict result) { result[0] = matrix[0] * vector[0] + matrix[1] * vector[1] + matrix[2] * vector[2] + matrix[3] * vector[3]; result[1] = matrix[4] * vector[0] + matrix[5] * vector[1] + matrix[6] * vector[2] + matrix[7] * vector[3]; @@ -187,7 +441,7 @@ void mat4vec4_mult(const float* matrix, const float* vector, float* result) } // @question could simple mul add sse be faster? -void mat4vec4_mult_sse(const float* matrix, const float* vector, float* result) +void mat4vec4_mult_sse(const float* __restrict matrix, const float* __restrict vector, float* __restrict result) { __m128 vec = _mm_loadu_ps(vector); @@ -200,7 +454,7 @@ void mat4vec4_mult_sse(const float* matrix, const float* vector, float* result) } // @question could simple mul add sse be faster? -void mat4vec4_mult_sse(const __m128* matrix, const __m128* vector, float* result) +void mat4vec4_mult_sse(const __m128* __restrict matrix, const __m128* __restrict vector, float* __restrict result) { for (int i = 0; i < 4; ++i) { __m128 dot = _mm_dp_ps(matrix[i], *vector, 0xF1); @@ -210,14 +464,38 @@ void mat4vec4_mult_sse(const __m128* matrix, const __m128* vector, float* result } // @question could simple mul add sse be faster? -void mat4vec4_mult_sse(const __m128* matrix, const __m128* vector, __m128* result) +void mat4vec4_mult_sse(const __m128* __restrict matrix, const __m128* __restrict vector, __m128* __restrict result) { for (int i = 0; i < 4; ++i) { result[i] = _mm_dp_ps(matrix[i], *vector, 0xF1); } } -void mat4mat4_mult(const float* a, const float* b, float* result, int steps = 8) +inline +void mat4mat4_mult(const float* __restrict a, const float* __restrict b, float* __restrict result) +{ + result[0] = a[0] * b[0] + a[1] * b[4] + a[2] * b[8] + a[3] * b[12]; + result[1] = a[0] * b[1] + a[1] * b[5] + a[2] * b[9] + a[3] * b[13]; + result[2] = a[0] * b[2] + a[1] * b[6] + a[2] * b[10] + a[3] * b[14]; + result[3] = a[0] * b[3] + a[1] * b[7] + a[2] * b[11] + a[3] * b[15]; + + result[4] = a[4] * b[0] + a[5] * b[4] + a[6] * b[8] + a[7] * b[12]; + result[5] = a[4] * b[1] + a[5] * b[5] + a[6] * b[9] + a[7] * b[13]; + result[6] = a[4] * b[2] + a[5] * b[6] + a[6] * b[10] + a[7] * b[14]; + result[7] = a[4] * b[3] + a[5] * b[7] + a[6] * b[11] + a[7] * b[15]; + + result[8] = a[8] * b[0] + a[9] * b[4] + a[10] * b[8] + a[11] * b[12]; + result[9] = a[8] * b[1] + a[9] * b[5] + a[10] * b[9] + a[11] * b[13]; + result[10] = a[8] * b[2] + a[9] * b[6] + a[10] * b[10] + a[11] * b[14]; + result[11] = a[8] * b[3] + a[9] * b[7] + a[10] * b[11] + a[11] * b[15]; + + result[12] = a[12] * b[0] + a[13] * b[4] + a[14] * b[8] + a[15] * b[12]; + result[13] = a[12] * b[1] + a[13] * b[5] + a[14] * b[9] + a[15] * b[13]; + result[14] = a[12] * b[2] + a[13] * b[6] + a[14] * b[10] + a[15] * b[14]; + result[15] = a[12] * b[3] + a[13] * b[7] + a[14] * b[11] + a[15] * b[15]; +} + +void mat4mat4_mult(const float* __restrict a, const float* __restrict b, float* __restrict result, int steps) { if (steps > 1) { // @todo check http://fhtr.blogspot.com/2010/02/4x4-float-matrix-multiplication-using.html @@ -286,29 +564,11 @@ void mat4mat4_mult(const float* a, const float* b, float* result, int steps = 8) ) ); } else { - result[0] = a[0] * b[0] + a[1] * b[4] + a[2] * b[8] + a[3] * b[12]; - result[1] = a[0] * b[1] + a[1] * b[5] + a[2] * b[9] + a[3] * b[13]; - result[2] = a[0] * b[2] + a[1] * b[6] + a[2] * b[10] + a[3] * b[14]; - result[3] = a[0] * b[3] + a[1] * b[7] + a[2] * b[11] + a[3] * b[15]; - - result[4] = a[4] * b[0] + a[5] * b[4] + a[6] * b[8] + a[7] * b[12]; - result[5] = a[4] * b[1] + a[5] * b[5] + a[6] * b[9] + a[7] * b[13]; - result[6] = a[4] * b[2] + a[5] * b[6] + a[6] * b[10] + a[7] * b[14]; - result[7] = a[4] * b[3] + a[5] * b[7] + a[6] * b[11] + a[7] * b[15]; - - result[8] = a[8] * b[0] + a[9] * b[4] + a[10] * b[8] + a[11] * b[12]; - result[9] = a[8] * b[1] + a[9] * b[5] + a[10] * b[9] + a[11] * b[13]; - result[10] = a[8] * b[2] + a[9] * b[6] + a[10] * b[10] + a[11] * b[14]; - result[11] = a[8] * b[3] + a[9] * b[7] + a[10] * b[11] + a[11] * b[15]; - - result[12] = a[12] * b[0] + a[13] * b[4] + a[14] * b[8] + a[15] * b[12]; - result[13] = a[12] * b[1] + a[13] * b[5] + a[14] * b[9] + a[15] * b[13]; - result[14] = a[12] * b[2] + a[13] * b[6] + a[14] * b[10] + a[15] * b[14]; - result[15] = a[12] * b[3] + a[13] * b[7] + a[14] * b[11] + a[15] * b[15]; + mat4mat4_mult(a, b, result); } } -void mat4mat4_mult_sse(const __m128* a, const __m128* b_transposed, float* result) +void mat4mat4_mult_sse(const __m128* __restrict a, const __m128* __restrict b_transposed, float* __restrict result) { __m128 dot; @@ -366,7 +626,8 @@ void mat4mat4_mult_sse(const __m128* a, const __m128* b_transposed, float* resul result[15] = _mm_cvtss_f32(dot); } -void mat4mat4_mult_sse(const __m128* a, const __m128* b_transpose, __m128* result) +inline +void mat4mat4_mult_sse(const __m128* __restrict a, const __m128* __restrict b_transpose, __m128* __restrict result) { for (int i = 0; i < 4; ++i) { result[i] = _mm_mul_ps(a[0], b_transpose[i]); @@ -418,7 +679,7 @@ void mat4_frustum_planes(float planes[6][4], float radius, float *matrix) { planes[5][3] = zfar * m[15] - m[14]; } -void mat4_frustum_sparse( +void mat4_frustum_sparse_rh( float *matrix, float left, float right, float bottom, float top, float znear, float zfar @@ -450,8 +711,40 @@ void mat4_frustum_sparse( //matrix[15] = 0.0f; } +void mat4_frustum_sparse_lh( + float *matrix, + float left, float right, float bottom, float top, + float znear, float zfar + ) { + float temp, temp2, temp3, temp4; + temp = 2.0f * znear; + temp2 = right - left; + temp3 = top - bottom; + temp4 = zfar - znear; + + matrix[0] = temp / temp2; + //matrix[1] = 0.0f; + //matrix[2] = 0.0f; + //matrix[3] = 0.0f; + + //matrix[4] = 0.0f; + matrix[5] = temp / temp3; + //matrix[6] = 0.0f; + //matrix[7] = 0.0f; + + matrix[8] = (right + left) / temp2; + matrix[9] = (top + bottom) / temp3; + matrix[10] = (zfar + znear) / temp4; + matrix[11] = 1.0f; + + //matrix[12] = 0.0f; + //matrix[13] = 0.0f; + matrix[14] = (temp * zfar) / temp4; + //matrix[15] = 0.0f; +} + // fov needs to be in rad -void mat4_perspective_sparse( +void mat4_perspective_sparse_lh( float *matrix, float fov, float aspect, float znear, float zfar) { @@ -461,7 +754,20 @@ void mat4_perspective_sparse( ymax = znear * tanf(fov * 0.5f); xmax = ymax * aspect; - mat4_frustum_sparse(matrix, -xmax, xmax, -ymax, ymax, znear, zfar); + mat4_frustum_sparse_lh(matrix, -xmax, xmax, -ymax, ymax, znear, zfar); +} + +void mat4_perspective_sparse_rh( + float *matrix, float fov, float aspect, + float znear, float zfar) +{ + ASSERT_SIMPLE(znear > 0.0f); + + float ymax, xmax; + ymax = znear * tanf(fov * 0.5f); + xmax = ymax * aspect; + + mat4_frustum_sparse_rh(matrix, -xmax, xmax, -ymax, ymax, znear, zfar); } void mat4_ortho( @@ -494,7 +800,21 @@ void mat4_ortho( matrix[15] = 1.0f; } -void mat4_translate(float* matrix, float dx, float dy, float dz, int steps = 8) +void mat4_translate(float* matrix, float dx, float dy, float dz) +{ + float temp[16]; + memcpy(temp, matrix, sizeof(float) * 16); + + float translation_matrix[16]; + translation_matrix[0] = 1.0f; translation_matrix[1] = 0.0f; translation_matrix[2] = 0.0f; translation_matrix[3] = dx; + translation_matrix[4] = 0.0f; translation_matrix[5] = 1.0f; translation_matrix[6] = 0.0f; translation_matrix[7] = dy; + translation_matrix[8] = 0.0f; translation_matrix[9] = 0.0f; translation_matrix[10] = 1.0f; translation_matrix[11] = dz; + translation_matrix[12] = 0.0f; translation_matrix[13] = 0.0f; translation_matrix[14] = 0.0f; translation_matrix[15] = 1.0f; + + mat4mat4_mult(temp, translation_matrix, matrix); +} + +void mat4_translate(float* matrix, float dx, float dy, float dz, int steps) { alignas(64) float temp[16]; memcpy(temp, matrix, sizeof(float) * 16); @@ -505,9 +825,10 @@ void mat4_translate(float* matrix, float dx, float dy, float dz, int steps = 8) translation_matrix[8] = 0.0f; translation_matrix[9] = 0.0f; translation_matrix[10] = 1.0f; translation_matrix[11] = dz; translation_matrix[12] = 0.0f; translation_matrix[13] = 0.0f; translation_matrix[14] = 0.0f; translation_matrix[15] = 1.0f; - mat4mat4_mult(temp, translation_matrix, matrix, 1); + mat4mat4_mult(temp, translation_matrix, matrix, steps); } +inline void mat4_translation(float* matrix, float dx, float dy, float dz) { matrix[0] = 1.0f; matrix[1] = 0.0f; matrix[2] = 0.0f; matrix[3] = dx; @@ -516,6 +837,7 @@ void mat4_translation(float* matrix, float dx, float dy, float dz) matrix[12] = 0.0f; matrix[13] = 0.0f; matrix[14] = 0.0f; matrix[15] = 1.0f; } +inline void mat4_translation_sparse(float* matrix, float dx, float dy, float dz) { matrix[3] = dx; @@ -523,92 +845,112 @@ void mat4_translation_sparse(float* matrix, float dx, float dy, float dz) matrix[11] = dz; } -// @todo unroll these loops below -void mat4_transpose(const float* matrix, float* transposed) +inline +void mat4_scale(float* matrix, float dx, float dy, float dz) { - for (int i = 0; i < 4; ++i) { - for (int j = i + 1; j < 4; ++j) { - int index1 = i * 4 + j; - int index2 = j * 4 + i; - - transposed[index1] = transposed[index2]; - transposed[index2] = matrix[index1]; - } - } + matrix[0] = dx; matrix[1] = 0.0f; matrix[2] = 0.0f; matrix[3] = 0.0f; + matrix[4] = 0.0f; matrix[5] = dy; matrix[6] = 0.0f; matrix[7] = 0.0f; + matrix[8] = 0.0f; matrix[9] = 0.0f; matrix[10] = dz; matrix[11] = 0.0f; + matrix[12] = 0.0f; matrix[13] = 0.0f; matrix[14] = 0.0f; matrix[15] = 1.0f; } +inline +void mat4_scale_sparse(float* matrix, float dx, float dy, float dz) +{ + matrix[0] = dx; + matrix[5] = dy; + matrix[10] = dz; +} + +inline +void mat4_transpose(const float* __restrict matrix, float* __restrict transposed) +{ + transposed[1] = matrix[4]; + transposed[2] = matrix[8]; + transposed[3] = matrix[12]; + transposed[4] = matrix[1]; + transposed[6] = matrix[9]; + transposed[7] = matrix[13]; + transposed[8] = matrix[2]; + transposed[9] = matrix[6]; + transposed[11] = matrix[14]; + transposed[12] = matrix[3]; + transposed[13] = matrix[7]; + transposed[14] = matrix[11]; +} + +inline void mat4_transpose(float* matrix) { float temp; - for (int i = 0; i < 4; ++i) { - for (int j = i + 1; j < 4; ++j) { - int index1 = i * 4 + j; - int index2 = j * 4 + i; + temp = matrix[1]; + matrix[1] = matrix[4]; + matrix[4] = temp; - temp = matrix[index1]; - matrix[index1] = matrix[index2]; - matrix[index2] = temp; - } - } + temp = matrix[2]; + matrix[2] = matrix[8]; + matrix[8] = temp; + + temp = matrix[3]; + matrix[3] = matrix[12]; + matrix[12] = temp; + + temp = matrix[6]; + matrix[6] = matrix[9]; + matrix[9] = temp; + + temp = matrix[7]; + matrix[7] = matrix[13]; + matrix[13] = temp; + + temp = matrix[11]; + matrix[11] = matrix[14]; + matrix[14] = temp; } -void mat3_transpose(const float* matrix, float* transposed) +inline +void mat3_transpose(const float* __restrict matrix, float* __restrict transposed) { - for (int i = 0; i < 3; ++i) { - for (int j = i + 1; j < 3; ++j) { - int index1 = i * 3 + j; - int index2 = j * 3 + i; - - transposed[index1] = transposed[index2]; - transposed[index2] = matrix[index1]; - } - } + transposed[1] = matrix[3]; + transposed[2] = matrix[6]; + transposed[3] = matrix[1]; + transposed[5] = matrix[7]; + transposed[6] = matrix[2]; + transposed[7] = matrix[5]; } +inline void mat3_transpose(float* matrix) { float temp; - for (int i = 0; i < 3; ++i) { - for (int j = i + 1; j < 3; ++j) { - int index1 = i * 3 + j; - int index2 = j * 3 + i; + temp = matrix[1]; + matrix[1] = matrix[3]; + matrix[3] = temp; - temp = matrix[index1]; - matrix[index1] = matrix[index2]; - matrix[index2] = temp; - } - } + temp = matrix[2]; + matrix[2] = matrix[6]; + matrix[6] = temp; + + temp = matrix[5]; + matrix[5] = matrix[7]; + matrix[7] = temp; } -void mat2_transpose(const float* matrix, float* transposed) +inline +void mat2_transpose(const float* __restrict matrix, float* __restrict transposed) { - for (int i = 0; i < 2; ++i) { - for (int j = i + 1; j < 2; ++j) { - int index1 = i * 2 + j; - int index2 = j * 2 + i; - - transposed[index1] = transposed[index2]; - transposed[index2] = matrix[index1]; - } - } + transposed[1] = matrix[2]; + transposed[2] = matrix[1]; } +inline void mat2_transpose(float* matrix) { - float temp; - - for (int i = 0; i < 2; ++i) { - for (int j = i + 1; j < 2; ++j) { - int index1 = i * 2 + j; - int index2 = j * 2 + i; - - temp = matrix[index1]; - matrix[index1] = matrix[index2]; - matrix[index2] = temp; - } - } + float temp = matrix[1]; + matrix[1] = matrix[2]; + matrix[2] = temp; } #endif \ No newline at end of file diff --git a/math/matrix/QuaternionFloat32.h b/math/matrix/QuaternionFloat32.h index efbd9d4..70e5004 100644 --- a/math/matrix/QuaternionFloat32.h +++ b/math/matrix/QuaternionFloat32.h @@ -15,6 +15,8 @@ #include "../../utils/MathUtils.h" #include "../../utils/TestUtils.h" +// @todo Remove unused functions there are a lot (AFTER you implemented quaternion handling in the camera) + inline void quaternion_unit(v4_f32* quat) { @@ -29,17 +31,17 @@ void quaternion_unit(v4_f32* quat) inline void quaternion_from_euler(v4_f32* quat, float pitch, float yaw, float roll) { - float y = OMS_RAD2DEG(yaw * 0.5f); - float cy = cosf(y); - float sy = sinf(y); + float y = OMS_DEG2RAD(yaw); + float cy = cosf(y / 2); + float sy = sinf(y / 2); - float p = OMS_RAD2DEG(pitch * 0.5f); - float cp = cosf(p); - float sp = sinf(p); + float p = OMS_DEG2RAD(pitch); + float cp = cosf(p / 2); + float sp = sinf(p / 2); - float r = OMS_RAD2DEG(roll * 0.5f); - float cr = cosf(r); - float sr = sinf(r); + float r = OMS_DEG2RAD(roll); + float cr = cosf(r / 2); + float sr = sinf(r / 2); quat->w = cr * cp * cy + sr * sp * sy; quat->x = sr * cp * cy - cr * sp * sy; @@ -50,9 +52,9 @@ void quaternion_from_euler(v4_f32* quat, float pitch, float yaw, float roll) } inline -void quaternion_from_euler(v4_f32* quat, const v3_f32* v) +void quaternion_from_euler(v4_f32* __restrict quat, const v3_f32* __restrict v) { - float y = OMS_RAD2DEG(v->v * 0.5f); + float y = OMS_RAD2DEG(v->v / 2); float cy = cosf(y); float sy = sinf(y); @@ -70,15 +72,27 @@ void quaternion_from_euler(v4_f32* quat, const v3_f32* v) quat->z = cr * cp * sy - sr * sp * cy; } -void quaternion_to_euler(const v4_f32* quat, v3_f32* v) { +inline +void quaternion_from_axis_angle(v4_f32* quat, const v3_f32* __restrict axis, float rad) { + float half_angle = rad / 2.0f; + float s = sinf(half_angle); + + quat->x = axis->x * s; + quat->y = axis->y * s; + quat->z = axis->z * s; + quat->w = cosf(half_angle); + + quaternion_unit(quat); +} + +void quaternion_to_euler(const v4_f32* __restrict quat, v3_f32* __restrict v) { // Pitch float sinp = 2.0f * (quat->w * quat->x + quat->y * quat->z); float cosp = 1.0f - 2.0f * (quat->x * quat->x + quat->y * quat->y); v->pitch = atan2f(sinp, cosp); // Check for gimbal lock - float sinp_check = 2.0f * (quat->w * quat->x + quat->y * quat->z); - if (OMS_ABS(sinp_check) >= 0.9999f) { + if (OMS_ABS(sinp) >= 0.9999f) { v->yaw = atan2f(quat->x * quat->z - quat->w * quat->y, quat->w * quat->x + quat->y * quat->z); v->roll = 0.0f; } else { @@ -93,15 +107,15 @@ void quaternion_to_euler(const v4_f32* quat, v3_f32* v) { } } -void quaternion_multiply(v4_f32* quat, const v4_f32* quat1, const v4_f32* quat2) +void quaternion_multiply(v4_f32* __restrict quat, const v4_f32* __restrict quat1, const v4_f32* __restrict quat2) { - quat->w = quat1->w * quat2->w - quat1->x * quat2->x - quat1->y * quat2->y - quat1->z * quat2->z; quat->x = quat1->w * quat2->x + quat1->x * quat2->w + quat1->y * quat2->z - quat1->z * quat2->y; quat->y = quat1->w * quat2->y - quat1->x * quat2->z + quat1->y * quat2->w + quat1->z * quat2->x; quat->z = quat1->w * quat2->z + quat1->x * quat2->y - quat1->y * quat2->x + quat1->z * quat2->w; + quat->w = quat1->w * quat2->w - quat1->x * quat2->x - quat1->y * quat2->y - quat1->z * quat2->z; } -void quaternion_inverse(v4_f32* quat, const v4_f32* quat_origin) { +void quaternion_inverse(v4_f32* __restrict quat, const v4_f32* __restrict quat_origin) { float norm = quat_origin->w * quat_origin->w + quat_origin->x * quat_origin->x + quat_origin->y * quat_origin->y @@ -114,7 +128,7 @@ void quaternion_inverse(v4_f32* quat, const v4_f32* quat_origin) { } inline -void quaternion_to_rotation(f32* matrix, const v4_f32* quat) +void quaternion_to_rotation(f32* __restrict matrix, const v4_f32* __restrict quat) { matrix[0] = 1.0f - 2.0f * (quat->y * quat->y + quat->z * quat->z); matrix[1] = 2.0f * (quat->x * quat->y - quat->z * quat->w); @@ -138,7 +152,7 @@ void quaternion_to_rotation(f32* matrix, const v4_f32* quat) } inline -void quaternion_to_rotation(f32* matrix, const v4_f32* quat) +void quaternion_to_rotation_sparse(f32* __restrict matrix, const v4_f32* __restrict quat) { matrix[0] = 1.0f - 2.0f * (quat->y * quat->y + quat->z * quat->z); matrix[1] = 2.0f * (quat->x * quat->y - quat->z * quat->w); @@ -154,7 +168,7 @@ void quaternion_to_rotation(f32* matrix, const v4_f32* quat) } inline -void quaternion_from_vec(v4_f32* quat, const v4_f32* vec) +void quaternion_from_vec(v4_f32* __restrict quat, const v4_f32* __restrict vec) { quat->x = vec->x; quat->y = vec->y; @@ -163,7 +177,7 @@ void quaternion_from_vec(v4_f32* quat, const v4_f32* vec) } inline -void quaternion_from_vec(v4_f32* quat, const v3_f32* vec) +void quaternion_from_vec(v4_f32* __restrict quat, const v3_f32* __restrict vec) { quat->x = vec->x; quat->y = vec->y; @@ -172,7 +186,7 @@ void quaternion_from_vec(v4_f32* quat, const v3_f32* vec) } inline -void quaternion_to_vec(v4_f32* vec, const v4_f32* quat) +void quaternion_to_vec(v4_f32* __restrict vec, const v4_f32* __restrict quat) { vec->x = quat->x; vec->y = quat->y; @@ -181,18 +195,32 @@ void quaternion_to_vec(v4_f32* vec, const v4_f32* quat) } inline -void quaternion_to_vec(v3_f32* vec, const v4_f32* quat) +void quaternion_to_vec(v3_f32* __restrict vec, const v4_f32* __restrict quat) { vec->x = quat->x; vec->y = quat->y; vec->z = quat->z; } +inline +void quaternion_rotate_vector(v3_f32* __restrict vec, const v4_f32* __restrict quat, v3_f32* __restrict a) +{ + // @todo consider to not create this variable and cast quat to a v3_f32 pointer in cross! (the order is correct) + v3_f32 q2 = {quat->x, quat->y, quat->z}; + + v3_f32 cross; + vec3_cross(&cross, &q2, a); + + vec->x = a->x + 2.0f * cross.x * quat->w + q2.y * cross.z - q2.z * cross.y; + vec->y = a->y + 2.0f * cross.y * quat->w + q2.z * cross.x - q2.x * cross.z; + vec->z = a->z + 2.0f * cross.z * quat->w + q2.x * cross.y - q2.y * cross.x; +} + // active = point rotated respective to coordinate system inline -void quaternion_rotate_active(v4_f32* p, const v4_f32* quat, const v4_f32* quat_inv) +void quaternion_rotate_active(v4_f32* __restrict p, const v4_f32* __restrict quat, const v4_f32* __restrict quat_inv) { - ASSERT_SIMPLE(OMS_ABS(x * x + y * y + z * z + w * z - 1.0f) < 0.01); + //ASSERT_SIMPLE(OMS_ABS(x * x + y * y + z * z + w * z - 1.0f) < 0.01); v4_f32 p_tmp; quaternion_multiply(&p_tmp, quat_inv, p); @@ -201,9 +229,9 @@ void quaternion_rotate_active(v4_f32* p, const v4_f32* quat, const v4_f32* quat_ // passive = coordinate system is rotated inline -void quaternion_rotate_passive(v4_f32* p, const v4_f32* quat, const v4_f32* quat_inv) +void quaternion_rotate_passive(v4_f32* __restrict p, const v4_f32* __restrict quat, const v4_f32* __restrict quat_inv) { - ASSERT_SIMPLE(OMS_ABS(x * x + y * y + z * z + w * w - 1.0f) < 0.01); + //ASSERT_SIMPLE(OMS_ABS(x * x + y * y + z * z + w * w - 1.0f) < 0.01); v4_f32 p_tmp; quaternion_multiply(&p_tmp, quat, p); @@ -218,7 +246,7 @@ void quaternion_rotate_passive(v4_f32* p, const v4_f32* quat, const v4_f32* quat // 5. call quat_rotate_* // 6. convert quat to vec // @todo Since this is usually done on multiple vecs, we should probably accept an array of vecs and then use simd -void quaternion_rotate_active(v4_f32* vec, float pitch, float yaw, float roll) +void quaternion_rotate_active(v3_f32* vec, float pitch, float yaw, float roll) { v4_f32 q; quaternion_from_euler(&q, pitch, yaw, roll); // q is already in unit length @@ -226,15 +254,16 @@ void quaternion_rotate_active(v4_f32* vec, float pitch, float yaw, float roll) v4_f32 q_inv; quaternion_inverse(&q_inv, &q); - v4_f32 p; - quaternion_from_vec(&p, vec); + v4_f32 p = { vec->x, vec->y, vec->z, 0.0f }; quaternion_rotate_active(&p, &q, &q_inv); - quaternion_to_vec(vec, &p); + vec->x = p.x; + vec->y = p.y; + vec->z = p.z; } -void quaternion_rotate_passive(v4_f32* vec, float pitch, float yaw, float roll) +void quaternion_rotate_active(v4_f32* quat, float pitch, float yaw, float roll) { v4_f32 q; quaternion_from_euler(&q, pitch, yaw, roll); // q is already in unit length @@ -242,12 +271,35 @@ void quaternion_rotate_passive(v4_f32* vec, float pitch, float yaw, float roll) v4_f32 q_inv; quaternion_inverse(&q_inv, &q); - v4_f32 p; - quaternion_from_vec(&p, vec); + quaternion_rotate_active(quat, &q, &q_inv); +} + +void quaternion_rotate_passive(v3_f32* vec, float pitch, float yaw, float roll) +{ + v4_f32 q; + quaternion_from_euler(&q, pitch, yaw, roll); // q is already in unit length + + v4_f32 q_inv; + quaternion_inverse(&q_inv, &q); + + v4_f32 p = { vec->x, vec->y, vec->z, 0.0f }; quaternion_rotate_passive(&p, &q, &q_inv); - quaternion_to_vec(vec, &p); + vec->x = p.x; + vec->y = p.y; + vec->z = p.z; +} + +void quaternion_rotate_passive(v4_f32* quat, float pitch, float yaw, float roll) +{ + v4_f32 q; + quaternion_from_euler(&q, pitch, yaw, roll); // q is already in unit length + + v4_f32 q_inv; + quaternion_inverse(&q_inv, &q); + + quaternion_rotate_passive(quat, &q, &q_inv); } #endif \ No newline at end of file diff --git a/math/matrix/VectorFloat32.h b/math/matrix/VectorFloat32.h index a52b145..9f34188 100644 --- a/math/matrix/VectorFloat32.h +++ b/math/matrix/VectorFloat32.h @@ -151,23 +151,4 @@ struct v4_f32_16 { }; }; -void vec3_normalize_f32(float* x, float* y, float* z) -{ - float d = sqrt((*x) * (*x) + (*y) * (*y) + (*z) * (*z)); - - *x /= d; - *y /= d; - *z /= d; -} - -void vec4_normalize_f32(float* x, float* y, float* z, float* w) -{ - float d = sqrt((*x) * (*x) + (*y) * (*y) + (*z) * (*z) + (*w) * (*w)); - - *x /= d; - *y /= d; - *z /= d; - *w /= d; -} - #endif diff --git a/models/item/Equipment.h b/models/item/Equipment.h index e854f27..993f01d 100644 --- a/models/item/Equipment.h +++ b/models/item/Equipment.h @@ -25,6 +25,9 @@ struct SEquipmentStatsPoints { // Item requirements PrimaryStatsPoints requirements; + // @todo Find a way to add/multiply stats on conditions + // e.g. x% or x amount of health/resource + // Item stats // items cannot have stats like str, they can only modify primary stats of chars (see below) SecondaryStatsPoints secondary_item; diff --git a/models/mob/skill/Skill.h b/models/mob/skill/Skill.h index 3d88952..0e126e4 100644 --- a/models/mob/skill/Skill.h +++ b/models/mob/skill/Skill.h @@ -22,6 +22,12 @@ struct Skill // const char name[MAX_SKILL_NAME]; // const char description[MAX_SKILL_DESCRIPTION]; + // @todo implement charged skills + // e.g. you gain one charge every: x seconds, x mob kills, x dmg, ... + // max charges + // you can then use these charges + // -> we could then have things that also reduce charges + int id; // @todo animations diff --git a/platform/win32/UtilsWindows.h b/platform/win32/UtilsWindows.h index 5d9babf..bd5f1f0 100644 --- a/platform/win32/UtilsWindows.h +++ b/platform/win32/UtilsWindows.h @@ -14,7 +14,87 @@ #include "../../stdlib/Types.h" #include "../../utils/TestUtils.h" -void window_create(Window* window, void* proc) +inline +void window_inactive(Window* w) +{ + LONG_PTR style = GetWindowLongPtrA(w->hwnd, GWL_STYLE); + style |= WS_OVERLAPPEDWINDOW; + SetWindowLongPtr(w->hwnd, GWL_STYLE, style); + + ClipCursor(NULL); + + // WARNING: Apparently this has an internal reference count, effecting if true/false actually take effect! + ShowCursor(true); + + w->mouse_captured = false; +} + +inline +void monitor_resolution(const Window* __restrict w, v2_int32* __restrict resolution) +{ + resolution->width = GetDeviceCaps(w->hdc, HORZRES); + resolution->height = GetDeviceCaps(w->hdc, VERTRES); +} + +inline +void monitor_resolution(Window* __restrict w) +{ + w->width = GetDeviceCaps(w->hdc, HORZRES); + w->height = GetDeviceCaps(w->hdc, VERTRES); +} + +inline +void window_active(Window* __restrict w) +{ + LONG_PTR style = GetWindowLongPtrA(w->hwnd, GWL_STYLE); + style &= ~WS_OVERLAPPEDWINDOW; + SetWindowLongPtr(w->hwnd, GWL_STYLE, style); + + SetWindowPos( + w->hwnd, HWND_TOP, + w->x, w->y, + w->width, w->height, + SWP_NOACTIVATE | SWP_NOZORDER + ); + + RECT rect; + GetWindowRect(w->hwnd, &rect); + ClipCursor(&rect); + + // WARNING: Apparently this has an internal reference count, effecting if true/false actually take effect! + ShowCursor(false); + + w->mouse_captured = true; +} + +inline +void window_fullscreen(Window* __restrict w) +{ + monitor_resolution(w); + w->x = 0; + w->y = 0; + + LONG style = GetWindowLong(w->hwnd, GWL_STYLE); + SetWindowLongPtr(w->hwnd, GWL_STYLE, style & ~WS_OVERLAPPEDWINDOW); + + SetWindowPos(w->hwnd, HWND_TOP, 0, 0, w->width, w->height, SWP_NOACTIVATE | SWP_NOZORDER | SWP_NOMOVE); +} + +inline +void window_restore(Window* __restrict w) +{ + window_restore_state(w); + + SetWindowLongPtr(w->hwnd, GWL_STYLE, w->state_old.style); + SetWindowPos( + w->hwnd, HWND_TOP, + w->state_old.x, w->state_old.y, + w->state_old.width, w->state_old.height, + SWP_NOACTIVATE | SWP_NOZORDER + ); +} + +void window_create(Window* __restrict window, void* proc) { ASSERT_SIMPLE(proc); @@ -26,6 +106,7 @@ void window_create(Window* window, void* proc) wc.style = CS_OWNDC; wc.lpfnWndProc = wndproc; wc.hInstance = hinstance; + wc.hCursor = LoadCursor(NULL, IDC_ARROW); wc.lpszClassName = (LPCSTR) window->name; if (!RegisterClassExA(&wc)) { @@ -63,20 +144,17 @@ void window_create(Window* window, void* proc) window->hdc = GetDC(window->hwnd); ASSERT_SIMPLE(window->hwnd); - - //SetWindowLongA(window->hwnd, GWL_STYLE, 0); } -void window_open(const Window* window) +void window_open(const Window* __restrict window) { ShowWindow(window->hwnd, SW_SHOW); SetForegroundWindow(window->hwnd); SetFocus(window->hwnd); - ShowCursor(false); UpdateWindow(window->hwnd); } -void window_close(Window* window) +void window_close(Window* __restrict window) { CloseWindow(window->hwnd); } diff --git a/platform/win32/Window.h b/platform/win32/Window.h index 6b88da1..1ddc45d 100644 --- a/platform/win32/Window.h +++ b/platform/win32/Window.h @@ -12,17 +12,49 @@ #include #include "../../stdlib/Types.h" +struct WindowState { + uint64 style; + int32 width; + int32 height; + + int32 x; + int32 y; +}; + struct Window { bool is_fullscreen; int32 width; int32 height; - char name[32]; int32 x; int32 y; + bool mouse_captured; + HWND hwnd; HDC hdc; + + char name[32]; + WindowState state_old; }; +inline +void window_backup_state(Window* __restrict w) +{ + w->state_old.style = GetWindowLongPtr(w->hwnd, GWL_STYLE); + w->state_old.width = w->width; + w->state_old.height = w->height; + w->state_old.x = w->x; + w->state_old.y = w->y; +} + +inline +void window_restore_state(Window* __restrict w) +{ + w->width = w->state_old.width; + w->height = w->state_old.height; + w->x = w->state_old.x; + w->y = w->state_old.y; +} + #endif \ No newline at end of file diff --git a/platform/win32/input/RawInput.h b/platform/win32/input/RawInput.h index cd58f2d..621fdef 100644 --- a/platform/win32/input/RawInput.h +++ b/platform/win32/input/RawInput.h @@ -19,11 +19,17 @@ #include "../../../memory/BufferMemory.h" #include +#define INPUT_MOUSE_BUTTON_1 1 +#define INPUT_MOUSE_BUTTON_2 2 +#define INPUT_MOUSE_BUTTON_3 4 +#define INPUT_MOUSE_BUTTON_4 8 +#define INPUT_MOUSE_BUTTON_5 16 + // IMPORTANT: // Even if it is nowhere documented (at least not to our knowledge) the GetRawInputDeviceInfoA, GetRawInputBuffer functions requried // aligned memory. So far we only figured out that 4 bytes works, maybe this needs to be 8 in the future?! -int input_init(HWND hwnd, InputState* states, RingMemory* ring, BufferMemory* buf) +int input_init(HWND hwnd, Input* __restrict states, RingMemory* ring) { uint32 device_count; GetRawInputDeviceList(NULL, &device_count, sizeof(RAWINPUTDEVICELIST)); @@ -111,7 +117,16 @@ int input_init(HWND hwnd, InputState* states, RingMemory* ring, BufferMemory* bu return i; } -void input_raw_handle(RAWINPUT* raw, InputState* states, int state_count) +void input_mouse_position(HWND hwnd, v2_int32* pos) +{ + POINT p; + if (GetCursorPos(&p) && ScreenToClient(hwnd, &p)) { + pos->x = p.x; + pos->y = p.y; + } +} + +void input_raw_handle(RAWINPUT* __restrict raw, Input* states, int state_count, uint64 time) { uint32 i = 0; if (raw->header.dwType == RIM_TYPEMOUSE) { @@ -126,40 +141,93 @@ void input_raw_handle(RAWINPUT* raw, InputState* states, int state_count) return; } - InputState* input_state = states + i; - - if (raw->data.mouse.usFlags & MOUSE_MOVE_ABSOLUTE) { - RECT rect; - - // @todo move out, this is slow and should be stored in Window - // @performance this is slow and should be handled in the WindowProc !!! - if (raw->data.mouse.usFlags & MOUSE_VIRTUAL_DESKTOP) { - rect.left = GetSystemMetrics(SM_XVIRTUALSCREEN); - rect.top = GetSystemMetrics(SM_YVIRTUALSCREEN); - rect.right = GetSystemMetrics(SM_CXVIRTUALSCREEN); - rect.bottom = GetSystemMetrics(SM_CYVIRTUALSCREEN); - } else { - rect.left = 0; - rect.top = 0; - rect.right = GetSystemMetrics(SM_CXSCREEN); - rect.bottom = GetSystemMetrics(SM_CYSCREEN); + if (raw->data.mouse.usButtonFlags) { + // @question should all of these be else ifs? + if (raw->data.mouse.usButtonFlags & RI_MOUSE_LEFT_BUTTON_DOWN) { + states[i].state.mouse_down |= INPUT_MOUSE_BUTTON_1; + states[i].state.keys_down_time[0] = time; + } else if (raw->data.mouse.usButtonFlags & RI_MOUSE_LEFT_BUTTON_UP) { + states[i].state.mouse_down &= ~INPUT_MOUSE_BUTTON_1; } - input_state->x_last = input_state->x; - input_state->y_last = input_state->y; + if (raw->data.mouse.usButtonFlags & RI_MOUSE_RIGHT_BUTTON_DOWN) { + states[i].state.mouse_down |= INPUT_MOUSE_BUTTON_2; + states[i].state.keys_down_time[1] = time; + } else if (raw->data.mouse.usButtonFlags & RI_MOUSE_RIGHT_BUTTON_UP) { + states[i].state.mouse_down &= ~INPUT_MOUSE_BUTTON_2; + } - input_state->x = MulDiv(raw->data.mouse.lLastX, rect.right, 65535) + rect.left; - input_state->y = MulDiv(raw->data.mouse.lLastY, rect.bottom, 65535) + rect.top; + if (raw->data.mouse.usButtonFlags & RI_MOUSE_MIDDLE_BUTTON_DOWN) { + states[i].state.mouse_down |= INPUT_MOUSE_BUTTON_3; + states[i].state.keys_down_time[2] = time; + } else if (raw->data.mouse.usButtonFlags & RI_MOUSE_MIDDLE_BUTTON_UP) { + states[i].state.mouse_down &= ~INPUT_MOUSE_BUTTON_3; + } - input_state->state_change_mouse = true; - } else if (raw->data.mouse.lLastX != 0 || raw->data.mouse.lLastY != 0) { - input_state->x_last = input_state->x; - input_state->y_last = input_state->y; + if (raw->data.mouse.usButtonFlags & RI_MOUSE_BUTTON_4_DOWN) { + states[i].state.mouse_down |= INPUT_MOUSE_BUTTON_4; + states[i].state.keys_down_time[3] = time; + } else if (raw->data.mouse.usButtonFlags & RI_MOUSE_BUTTON_4_UP) { + states[i].state.mouse_down &= ~INPUT_MOUSE_BUTTON_4; + } - input_state->x = input_state->x + raw->data.mouse.lLastX; - input_state->y = input_state->y + raw->data.mouse.lLastY; + if (raw->data.mouse.usButtonFlags & RI_MOUSE_BUTTON_5_DOWN) { + states[i].state.mouse_down |= INPUT_MOUSE_BUTTON_5; + states[i].state.keys_down_time[4] = time; + } else if (raw->data.mouse.usButtonFlags & RI_MOUSE_BUTTON_5_UP) { + states[i].state.mouse_down &= ~INPUT_MOUSE_BUTTON_5; + } - input_state->state_change_mouse = true; + if (raw->data.mouse.usButtonFlags & RI_MOUSE_WHEEL) { + states[i].state.wheel_delta += raw->data.mouse.usButtonData; + } + + if (raw->data.mouse.usButtonFlags & RI_MOUSE_HWHEEL) { + states[i].state.hwheel_delta += raw->data.mouse.usButtonData; + } + + states[i].state_change_mouse = true; + states[i].state_change_mouse_button = true; + + // @question is mouse wheel really considered a button change? + states[i].state_change_button = true; + } + + if (states[i].mouse_movement) { + // do we want to handle mouse movement for every individual movement, or do we want to pull it + if (raw->data.mouse.usFlags & MOUSE_MOVE_ABSOLUTE) { + RECT rect; + + // @todo move out, this is slow and should be stored in Window + // @performance this is slow and should be handled in the WindowProc !!! + if (raw->data.mouse.usFlags & MOUSE_VIRTUAL_DESKTOP) { + rect.left = GetSystemMetrics(SM_XVIRTUALSCREEN); + rect.top = GetSystemMetrics(SM_YVIRTUALSCREEN); + rect.right = GetSystemMetrics(SM_CXVIRTUALSCREEN); + rect.bottom = GetSystemMetrics(SM_CYVIRTUALSCREEN); + } else { + rect.left = 0; + rect.top = 0; + rect.right = GetSystemMetrics(SM_CXSCREEN); + rect.bottom = GetSystemMetrics(SM_CYSCREEN); + } + + states[i].state.dx += raw->data.mouse.lLastX; + states[i].state.dy += raw->data.mouse.lLastY; + + states[i].state.x = MulDiv(raw->data.mouse.lLastX, rect.right, 65535) + rect.left; + states[i].state.y = MulDiv(raw->data.mouse.lLastY, rect.bottom, 65535) + rect.top; + + states[i].state_change_mouse = true; + } else if (raw->data.mouse.lLastX != 0 || raw->data.mouse.lLastY != 0) { + states[i].state.dx += raw->data.mouse.lLastX; + states[i].state.dy += raw->data.mouse.lLastY; + + states[i].state.x = states[i].state.x + raw->data.mouse.lLastX; + states[i].state.y = states[i].state.y + raw->data.mouse.lLastY; + + states[i].state_change_mouse = true; + } } } else if (raw->header.dwType == RIM_TYPEKEYBOARD) { // @todo Change so we can directly access the correct state (maybe map handle address to index?) @@ -173,23 +241,69 @@ void input_raw_handle(RAWINPUT* raw, InputState* states, int state_count) return; } - InputState* input_state = states + i; + // @todo change to MakeCode instead of VKey + // @performance Some of the things down here seem unneccessary. We shouldn't have to loop all elements! + if (raw->data.keyboard.Flags == RI_KEY_BREAK) { + // Key is already released + if (keyboard_is_released(&states[i].state, (uint8) raw->data.keyboard.VKey)) { + for (int j = 0; j < MAX_KEY_PRESSES; ++j) { + if (states[i].state.keys_down[j] == (uint8) raw->data.keyboard.VKey) { + states[i].state.keys_down[j] = 0; - RAWKEYBOARD raw_kb = raw->data.keyboard; + break; + } + } - if (raw_kb.Flags & RI_KEY_BREAK) { - input_state->keys_down_old[input_state->up_index++] = (uint8) raw_kb.VKey; + return; + } + + bool empty = true; + for (int j = 0; j < MAX_KEY_PRESSES; ++j) { + if (empty && states[i].state.keys_up[j] == 0) { + states[i].state.keys_up[j] = (uint8) raw->data.keyboard.VKey; + + empty = false; + } + + // remove pressed key + if (states[i].state.keys_down[j] == (uint8) raw->data.keyboard.VKey) { + states[i].state.keys_down[j] = 0; + } + } + } else if (raw->data.keyboard.Flags == RI_KEY_MAKE) { + // Key is already released + if (keyboard_is_pressed(&states[i].state, (uint8) raw->data.keyboard.VKey)) { + for (int j = 0; j < MAX_KEY_PRESSES; ++j) { + if (states[i].state.keys_up[j] == (uint8) raw->data.keyboard.VKey) { + states[i].state.keys_up[j] = 0; + + break; + } + } + + return; + } + + bool empty = true; + for (int j = 0; j < MAX_KEY_PRESSES; ++j) { + if (empty && states[i].state.keys_down[j] == 0) { + states[i].state.keys_down[j] = (uint8) raw->data.keyboard.VKey; + states[i].state.keys_down_time[MAX_MOUSE_PRESSES + j] = time; + empty = false; + } + + // remove released key + if (states[i].state.keys_up[j] == (uint8) raw->data.keyboard.VKey) { + states[i].state.keys_up[j] = 0; + } + } } - if (raw_kb.Flags & RI_KEY_MAKE) { - input_state->keys_down[input_state->down_index++] = (uint8) raw_kb.VKey; - } - - input_state->state_change_keyboard = true; + states[i].state_change_button = true; } } -void input_handle(LPARAM lParam, InputState* states, int state_count, RingMemory* ring) +void input_handle(LPARAM lParam, Input* __restrict states, int state_count, RingMemory* ring, uint64 time) { uint32 db_size; GetRawInputData((HRAWINPUT) lParam, RID_INPUT, NULL, &db_size, sizeof(RAWINPUTHEADER)); @@ -203,13 +317,18 @@ void input_handle(LPARAM lParam, InputState* states, int state_count, RingMemory return; } - input_raw_handle((RAWINPUT *) lpb, states, state_count); + input_raw_handle((RAWINPUT *) lpb, states, state_count, time); } -void input_handle_buffered(LPARAM lParam, int buffer_size, InputState* states, int state_count, RingMemory* ring) +// @bug Somehow this function skips some inputs (input_handle works)!!!!! +void input_handle_buffered(int buffer_size, Input* __restrict states, int state_count, RingMemory* ring, uint64 time) { uint32 cb_size; + GetRawInputBuffer(NULL, &cb_size, sizeof(RAWINPUTHEADER)); + if (!cb_size) { + return; + } // Max input messages (e.g. 16) cb_size *= buffer_size; @@ -217,19 +336,28 @@ void input_handle_buffered(LPARAM lParam, int buffer_size, InputState* states, i PRAWINPUT raw_input = (PRAWINPUT) ring_get_memory(ring, cb_size, 4); uint32 input; - uint32 cb_size_t = cb_size; - while ((input = GetRawInputBuffer(raw_input, &cb_size_t, sizeof(RAWINPUTHEADER))) > 0) { + while (true) { + uint32 cb_size_t = cb_size; + input = GetRawInputBuffer(raw_input, &cb_size_t, sizeof(RAWINPUTHEADER)); + + if (input == 0 || input == (uint32) -1) { + break; + } + PRAWINPUT pri = raw_input; for (uint32 i = 0; i < input; ++i) { - input_raw_handle(pri, states, state_count); + if (!pri->header.hDevice) { + break; + } + + input_raw_handle(pri, states, state_count, time); pri = NEXTRAWINPUTBLOCK(pri); } - - // @question is this asign necessary? - cb_size_t = cb_size; } + + ASSERT_SIMPLE(input != (uint32) -1) } #endif \ No newline at end of file diff --git a/platform/win32/input/XInput.h b/platform/win32/input/XInput.h index e6ac177..65dee55 100644 --- a/platform/win32/input/XInput.h +++ b/platform/win32/input/XInput.h @@ -55,7 +55,7 @@ void xinput_load() { } // END: Dynamically load XInput -ControllerState* init_controllers() +ControllerInput* init_controllers() { uint32 c = 0; for (uint32 controller_index = 0; controller_index < XUSER_MAX_COUNT; ++controller_index) { @@ -67,7 +67,7 @@ ControllerState* init_controllers() // We always want at least one empty controller slot // @todo Change so that we store the actual number of devices - ControllerState *controllers = (ControllerState *) calloc((c + 1), sizeof(ControllerState)); + ControllerInput *controllers = (ControllerInput *) calloc((c + 1), sizeof(ControllerInput)); if (c == 0) { return controllers; @@ -87,7 +87,7 @@ ControllerState* init_controllers() return controllers; } -void handle_controller_input(ControllerState* states) +void handle_controller_input(ControllerInput* states) { uint32 controller_index = 0; while(states[controller_index].is_connected) { diff --git a/stdlib/Mathtypes.h b/stdlib/Mathtypes.h index 7037ae8..9feca27 100644 --- a/stdlib/Mathtypes.h +++ b/stdlib/Mathtypes.h @@ -11,6 +11,8 @@ #include "Types.h" +// @todo Move to matrix + struct v2_int32 { union { struct { @@ -18,6 +20,11 @@ struct v2_int32 { int32 y; }; + struct { + int32 width; + int32 height; + }; + int32 v[2]; }; }; diff --git a/utils/BitUtils.h b/utils/BitUtils.h index 4d6307e..f76e059 100644 --- a/utils/BitUtils.h +++ b/utils/BitUtils.h @@ -12,6 +12,12 @@ #include #include "../stdlib/Types.h" +#define IS_BIT_SET(num, pos) ((bool) ((num) & (1 << (pos)))) +#define BIT_SET(num, pos) ((num) | ((uint32) 1 << (pos))) +#define BIT_UNSET(num, pos) ((num) & ~((uint32) 1 << (pos))) +#define BIT_FLIP(num, pos) ((num) ^ ((uint32) 1 << (pos))) +#define BIT_SET_TO(num, pos, x) ((num) & ~((uint32) 1 << (pos)) | ((uint32) (x) << (pos))) + inline uint32 bytes_merge(byte b0, byte b1, byte b2, byte b3) { uint32 result = 0; @@ -71,4 +77,64 @@ inline int find_first_set_bit(int value) { #endif } + +inline +byte get_bits(byte data, int bits_to_read, int start_pos) +{ + byte mask = (1 << bits_to_read) - 1; + return (data >> (8 - start_pos - bits_to_read)) & mask; +} + +inline +uint64 get_bits(const byte* data, int bits_to_read, int start_pos) +{ + if (bits_to_read <= 0 || bits_to_read > sizeof(uint64)) { + return 0; + } + + int byte_index = start_pos / 8; + int bit_offset = start_pos % 8; + + uint64_t mask = (1ULL << bits_to_read) - 1; + uint64_t result = 0; + + int bits_read = 0; + + while (bits_read < bits_to_read) { + int bits_in_current_byte = 8 - bit_offset; + int bits_to_take = bits_to_read - bits_read; + + if (bits_to_take > bits_in_current_byte) { + bits_to_take = bits_in_current_byte; + } + + uint8_t current_byte = data[byte_index]; + current_byte >>= bit_offset; + current_byte &= (1 << bits_to_take) - 1; + + result |= ((uint64_t)current_byte << bits_read); + + bits_read += bits_to_take; + bit_offset = 0; + byte_index++; + } + + result &= mask; + + return result; +} + +inline +uint32 reverse_bits(uint32 data, uint32 count) +{ + uint32 reversed = 0; + for (uint32 i = 0; i <= (count / 2); ++i) { + uint32 inv = count - i - 1; + reversed |= ((data >> i) & 0x1) << inv; + reversed |= ((data >> inv) & 0x1) << i; + } + + return reversed; +} + #endif \ No newline at end of file diff --git a/utils/MathUtils.h b/utils/MathUtils.h index eecc99f..e5714e0 100644 --- a/utils/MathUtils.h +++ b/utils/MathUtils.h @@ -29,88 +29,4 @@ #define SQRT_2 1.4142135623730950488016887242097f -// @question Consider to implement table based sine wave + approximation if necessary -// [-PI/2, PI/2] -inline -float sin_approx_pih_pih(float x) -{ - return x - (x * x * x / 6.0f); -} - -inline -float sinf_approx(float x) -{ - return 4 * x * (180 - x) / (40500 - x * (180 - x)); -} - -inline -float cosf_approx(float x) -{ - return sinf_approx(x + OMS_RAD2DEG(OMS_PI_OVER_TWO)); -} - -inline -float tanf_approx(float x) -{ - float sin_x = sinf_approx(x); - float cos_x = cosf_approx(x); - - if (cos_x == 0.0f) { - return (sin_x > 0.0f) ? 1e10f : -1e10f; - } - - return sin_x / cos_x; -} - -inline -float atanf_approx(float x) -{ - float abs_x = OMS_ABS(x); - float result; - - if (abs_x > 1.0f) { - result = OMS_PI_OVER_TWO - (1.0f / abs_x); - } else { - result = abs_x - (abs_x * abs_x * abs_x / 3.0f); - } - - return (x < 0.0f) ? -result : result; -} - -inline -float atan2f_approx(float y, float x) -{ - float abs_y = (float) (OMS_ABS(y) + 1.175494e-038); // prevent division by zero - float angle; - - if (x >= 0.0f) { - float r = (x - abs_y) / (x + abs_y); - angle = OMS_PI_OVER_FOUR - OMS_PI_OVER_FOUR * r; - } else { - float r = (x + abs_y) / (abs_y - x); - angle = (3.0f * OMS_PI / 4.0f) - OMS_PI_OVER_FOUR * r; - } - - return (y < 0.0f) ? -angle : angle; -} - -inline -float asinf_approx(float x) -{ - float negate = (x < 0) ? 1.0f : 0.0f; - x = OMS_ABS(x); - - float result = -0.0187293f; - result *= x; - result += 0.0742610f; - result *= x; - result -= 0.2121144f; - result *= x; - result += 1.5707288f; - result *= sqrtf(1.0f - x); - result -= 2 * negate * result; - - return negate * OMS_PI + result; -} - #endif diff --git a/utils/StringUtils.h b/utils/StringUtils.h index 9ecbfec..89c4329 100644 --- a/utils/StringUtils.h +++ b/utils/StringUtils.h @@ -37,6 +37,27 @@ void wchar_to_char(const wchar_t* src, char* dest, int length = 0) *dest = '\0'; } +inline +int str_to_int(const char *str) +{ + int result = 0; + + int sign = 1; + if (*str == '-') { + sign = -1; + ++str; + } + + while (*str >= '0' && *str <= '9') { + result *= 10; + result += (*str - '0'); + + ++str; + } + + return result * sign; +} + inline size_t str_count(const char* str, const char* substr) { size_t l1 = strlen(str); diff --git a/utils/TestUtils.h b/utils/TestUtils.h index c5c2215..df12fa6 100644 --- a/utils/TestUtils.h +++ b/utils/TestUtils.h @@ -46,8 +46,10 @@ void update_timing_stat(TimingStat *stat) // In such cases use the following macro. #if DEBUG #define UPDATE_TIMING_STAT(stat) update_timing_stat(stat) + #define DEBUG_OUTPUT(str) OutputDebugStringA(str) #else #define UPDATE_TIMING_STAT(stat) ((void)0) + #define DEBUG_OUTPUT(str) ((void)0) #endif void profile_function(const char* func_name, void (*func)(void*), void* data, int iterations) diff --git a/utils/Utils.h b/utils/Utils.h index 27a0e90..2440d62 100644 --- a/utils/Utils.h +++ b/utils/Utils.h @@ -45,83 +45,6 @@ f32 fast_rand_percentage(void) { return (f32) fast_rand1() / (f32) FAST_RAND_MAX; } -inline -bool is_bit_set(byte data, int bit) -{ - return data & (1 << bit); -} - -inline -bool is_bit_set(int data, int bit) -{ - return data & (1 << bit); -} - -inline -bool is_bit_set(uint32 data, int bit) -{ - return data & (1 << bit); -} - -inline -byte get_bits(byte data, int bits_to_read, int start_pos) -{ - byte mask = (1 << bits_to_read) - 1; - return (data >> (8 - start_pos - bits_to_read)) & mask; -} - -inline -uint64 get_bits(const byte* data, int bits_to_read, int start_pos) -{ - if (bits_to_read <= 0 || bits_to_read > sizeof(uint64)) { - return 0; - } - - int byte_index = start_pos / 8; - int bit_offset = start_pos % 8; - - uint64_t mask = (1ULL << bits_to_read) - 1; - uint64_t result = 0; - - int bits_read = 0; - - while (bits_read < bits_to_read) { - int bits_in_current_byte = 8 - bit_offset; - int bits_to_take = bits_to_read - bits_read; - - if (bits_to_take > bits_in_current_byte) { - bits_to_take = bits_in_current_byte; - } - - uint8_t current_byte = data[byte_index]; - current_byte >>= bit_offset; - current_byte &= (1 << bits_to_take) - 1; - - result |= ((uint64_t)current_byte << bits_read); - - bits_read += bits_to_take; - bit_offset = 0; - byte_index++; - } - - result &= mask; - - return result; -} - -inline -uint32 reverse_bits(uint32 data, uint32 count) -{ - uint32 reversed = 0; - for (uint32 i = 0; i <= (count / 2); ++i) { - uint32 inv = count - i - 1; - reversed |= ((data >> i) & 0x1) << inv; - reversed |= ((data >> inv) & 0x1) << i; - } - - return reversed; -} - /** * Picks n random elements from end and stores them in begin. */