diff --git a/Math/Matrix/VectorFloat32.h b/Math/Matrix/VectorFloat32.h index e69de29..c971a72 100644 --- a/Math/Matrix/VectorFloat32.h +++ b/Math/Matrix/VectorFloat32.h @@ -0,0 +1,916 @@ +/** + * Karaka + * + * @package Stdlib + * @copyright Dennis Eichhorn + * @license OMS License 1.0 + * @version 1.0.0 + * @link https://jingga.app + */ +#ifndef MATH_MATRIX_VECTORFLOAT32_H +#define MATH_MATRIX_VECTORFLOAT32_H + +#include "Types.h" +#include +#include + +struct simd_f32_4 { + union { + __m128 P; + f32 v[4]; + }; +}; + +struct simd_f32_8 { + union { + __m256 P; + f32 v[8]; + }; +}; + +struct simd_f32_16 { + union { + __m512 P; + f32 v[16]; + }; +}; + +inline +simd_f32_4 init_zero_simd_f32_4() +{ + simd_f32_4 simd; + simd.P = _mm_setzero_ps(); + + return simd; +} + +inline +simd_f32_8 init_zero_simd_f32_8() +{ + simd_f32_8 simd; + simd.P = _mm256_setzero_ps(); + + return simd; +} + +inline +simd_f32_16 init_zero_simd_f32_16() +{ + simd_f32_16 simd; + simd.P = _mm512_setzero_ps(); + + return simd; +} + +inline +simd_f32_4 operator+(simd_f32_4 a, simd_f32_4 b) +{ + simd_f32_4 simd; + simd.P = _mm_add_ps(a.P, b.P); + + return simd; +} + +inline +simd_f32_8 operator+(simd_f32_8 a, simd_f32_8 b) +{ + simd_f32_8 simd; + simd.P = _mm256_add_ps(a.P, b.P); + + return simd; +} + +inline +simd_f32_16 operator+(simd_f32_16 a, simd_f32_16 b) +{ + simd_f32_16 simd; + simd.P = _mm512_add_ps(a.P, b.P); + + return simd; +} + +inline +simd_f32_4 operator-(simd_f32_4 a, simd_f32_4 b) +{ + simd_f32_4 simd; + simd.P = _mm_sub_ps(a.P, b.P); + + return simd; +} + +inline +simd_f32_4 operator-(simd_f32_4 a) +{ + return init_zero_simd_f32_4() - a; +} + +inline +simd_f32_8 operator-(simd_f32_8 a, simd_f32_8 b) +{ + simd_f32_8 simd; + simd.P = _mm256_sub_ps(a.P, b.P); + + return simd; +} + +inline +simd_f32_8 operator-(simd_f32_8 a) +{ + return init_zero_simd_f32_8() - a; +} + +inline +simd_f32_16 operator-(simd_f32_16 a, simd_f32_16 b) +{ + simd_f32_16 simd; + simd.P = _mm512_sub_ps(a.P, b.P); + + return simd; +} + +inline +simd_f32_16 operator-(simd_f32_16 a) +{ + return init_zero_simd_f32_16() - a; +} + +inline +simd_f32_4 operator*(simd_f32_4 a, simd_f32_4 b) +{ + simd_f32_4 simd; + simd.P = _mm_mul_ps(a.P, b.P); + + return simd; +} + +inline +simd_f32_8 operator*(simd_f32_8 a, simd_f32_8 b) +{ + simd_f32_8 simd; + simd.P = _mm256_mul_ps(a.P, b.P); + + return simd; +} + +inline +simd_f32_16 operator*(simd_f32_16 a, simd_f32_16 b) +{ + simd_f32_16 simd; + simd.P = _mm512_mul_ps(a.P, b.P); + + return simd; +} + +inline +simd_f32_4 operator/(simd_f32_4 a, simd_f32_4 b) +{ + simd_f32_4 simd; + simd.P = _mm_div_ps(a.P, b.P); + + return simd; +} + +inline +simd_f32_8 operator/(simd_f32_8 a, simd_f32_8 b) +{ + simd_f32_8 simd; + simd.P = _mm256_div_ps(a.P, b.P); + + return simd; +} + +inline +simd_f32_16 operator/(simd_f32_16 a, simd_f32_16 b) +{ + simd_f32_16 simd; + simd.P = _mm512_div_ps(a.P, b.P); + + return simd; +} + +inline +simd_f32_4 operator^(simd_f32_4 a, simd_f32_4 b) +{ + simd_f32_4 simd; + simd.P = _mm_xor_ps(a.P, b.P); + + return simd; +} + +inline +simd_f32_8 operator^(simd_f32_8 a, simd_f32_8 b) +{ + simd_f32_8 simd; + simd.P = _mm256_xor_ps(a.P, b.P); + + return simd; +} + +inline +simd_f32_16 operator^(simd_f32_16 a, simd_f32_16 b) +{ + simd_f32_16 simd; + simd.P = _mm512_xor_ps(a.P, b.P); + + return simd; +} + +inline +simd_f32_4& operator-=(simd_f32_4 &a, simd_f32_4 b) +{ + a = a - b; + + return a; +} + +inline +simd_f32_8& operator-=(simd_f32_8 &a, simd_f32_8 b) +{ + a = a - b; + + return a; +} + +inline +simd_f32_16& operator-=(simd_f32_16 &a, simd_f32_16 b) +{ + a = a - b; + + return a; +} + +inline +simd_f32_4& operator+=(simd_f32_4 &a, simd_f32_4 b) +{ + a = a + b; + + return a; +} + +inline +simd_f32_8& operator+=(simd_f32_8 &a, simd_f32_8 b) +{ + a = a + b; + + return a; +} + +inline +simd_f32_16& operator+=(simd_f32_16 &a, simd_f32_16 b) +{ + a = a + b; + + return a; +} + +inline +simd_f32_4& operator*=(simd_f32_4 &a, simd_f32_4 b) +{ + a = a * b; + + return a; +} + +inline +simd_f32_8& operator*=(simd_f32_8 &a, simd_f32_8 b) +{ + a = a * b; + + return a; +} + +inline +simd_f32_16& operator*=(simd_f32_16 &a, simd_f32_16 b) +{ + a = a * b; + + return a; +} + +inline +simd_f32_4& operator/=(simd_f32_4 &a, simd_f32_4 b) +{ + a = a / b; + + return a; +} + +inline +simd_f32_8& operator/=(simd_f32_8 &a, simd_f32_8 b) +{ + a = a / b; + + return a; +} + +inline +simd_f32_16& operator/=(simd_f32_16 &a, simd_f32_16 b) +{ + a = a / b; + + return a; +} + +inline +simd_f32_4& operator^=(simd_f32_4 &a, simd_f32_4 b) +{ + a = a ^ b; + + return a; +} + +inline +simd_f32_8& operator^=(simd_f32_8 &a, simd_f32_8 b) +{ + a = a ^ b; + + return a; +} + +inline +simd_f32_16& operator^=(simd_f32_16 &a, simd_f32_16 b) +{ + a = a ^ b; + + return a; +} + +inline +simd_f32_4 operator<(simd_f32_4 a, simd_f32_4 b) +{ + simd_f32_4 simd; + simd.P = _mm_cmplt_ps(a.P, b.P); + + return simd; +} + +inline +simd_f32_8 operator<(simd_f32_8 a, simd_f32_8 b) +{ + simd_f32_8 simd; + simd.P = _mm256_cmp_ps(a.P, b.P, _CMP_LT_OQ); + + return simd; +} + +inline +simd_f32_16 operator<(simd_f32_16 a, simd_f32_16 b) +{ + simd_f32_16 simd; + simd.P = _mm512_mask_mov_ps( + _mm512_setzero_ps(), + _mm512_cmp_ps_mask(a.P, b.P, _CMP_LT_OQ), + _mm512_set1_ps(1.0f) + ); + + return simd; +} + +inline +simd_f32_4 operator<=(simd_f32_4 a, simd_f32_4 b) +{ + simd_f32_4 simd; + simd.P = _mm_cmple_ps(a.P, b.P); + + return simd; +} + +inline +simd_f32_8 operator<=(simd_f32_8 a, simd_f32_8 b) +{ + simd_f32_8 simd; + simd.P = _mm256_cmp_ps(a.P, b.P, _CMP_LE_OQ); + + return simd; +} + +inline +simd_f32_16 operator<=(simd_f32_16 a, simd_f32_16 b) +{ + simd_f32_16 simd; + simd.P = _mm512_mask_mov_ps( + _mm512_setzero_ps(), + _mm512_cmp_ps_mask(a.P, b.P, _CMP_LE_OQ), + _mm512_set1_ps(1.0f) + ); + + return simd; +} + +inline +simd_f32_4 operator>(simd_f32_4 a, simd_f32_4 b) +{ + simd_f32_4 simd; + simd.P = _mm_cmpgt_ps(a.P, b.P); + + return simd; +} + +inline +simd_f32_8 operator>(simd_f32_8 a, simd_f32_8 b) +{ + simd_f32_8 simd; + simd.P = _mm256_cmp_ps(a.P, b.P, _CMP_GT_OQ); + + return simd; +} + +inline +simd_f32_16 operator>(simd_f32_16 a, simd_f32_16 b) +{ + simd_f32_16 simd; + simd.P = _mm512_mask_mov_ps( + _mm512_setzero_ps(), + _mm512_cmp_ps_mask(a.P, b.P, _CMP_GT_OQ), + _mm512_set1_ps(1.0f) + ); + + return simd; +} + +inline +simd_f32_4 operator>=(simd_f32_4 a, simd_f32_4 b) +{ + simd_f32_4 simd; + simd.P = _mm_cmpge_ps(a.P, b.P); + + return simd; +} + +inline +simd_f32_8 operator>=(simd_f32_8 a, simd_f32_8 b) +{ + simd_f32_8 simd; + simd.P = _mm256_cmp_ps(a.P, b.P, _CMP_GE_OQ); + + return simd; +} + +inline +simd_f32_16 operator>=(simd_f32_16 a, simd_f32_16 b) +{ + simd_f32_16 simd; + simd.P = _mm512_mask_mov_ps( + _mm512_setzero_ps(), + _mm512_cmp_ps_mask(a.P, b.P, _CMP_GE_OQ), + _mm512_set1_ps(1.0f) + ); + + return simd; +} + +inline +simd_f32_4 operator==(simd_f32_4 a, simd_f32_4 b) +{ + simd_f32_4 simd; + simd.P = _mm_cmpeq_ps(a.P, b.P); + + return simd; +} + +inline +simd_f32_8 operator==(simd_f32_8 a, simd_f32_8 b) +{ + simd_f32_8 simd; + simd.P = _mm256_cmp_ps(a.P, b.P, _CMP_EQ_OQ); + + return simd; +} + +inline +simd_f32_16 operator==(simd_f32_16 a, simd_f32_16 b) +{ + simd_f32_16 simd; + simd.P = _mm512_mask_mov_ps( + _mm512_setzero_ps(), + _mm512_cmp_ps_mask(a.P, b.P, _CMP_EQ_OQ), + _mm512_set1_ps(1.0f) + ); + + return simd; +} + +inline +simd_f32_4 operator!=(simd_f32_4 a, simd_f32_4 b) +{ + simd_f32_4 simd; + simd.P = _mm_cmpneq_ps(a.P, b.P); + + return simd; +} + +inline +simd_f32_8 operator!=(simd_f32_8 a, simd_f32_8 b) +{ + simd_f32_8 simd; + simd.P = _mm256_cmp_ps(a.P, b.P, _CMP_NEQ_OQ); + + return simd; +} + +inline +simd_f32_16 operator!=(simd_f32_16 a, simd_f32_16 b) +{ + simd_f32_16 simd; + simd.P = _mm512_mask_mov_ps( + _mm512_setzero_ps(), + _mm512_cmp_ps_mask(a.P, b.P, _CMP_NEQ_OQ), + _mm512_set1_ps(1.0f) + ); + + return simd; +} + +inline +simd_f32_4 operator&(simd_f32_4 a, simd_f32_4 b) +{ + simd_f32_4 simd; + simd.P = _mm_and_ps(a.P, b.P); + + return simd; +} + +inline +simd_f32_8 operator&(simd_f32_8 a, simd_f32_8 b) +{ + simd_f32_8 simd; + simd.P = _mm256_and_ps(a.P, b.P); + + return simd; +} + +inline +simd_f32_16 operator&(simd_f32_16 a, simd_f32_16 b) +{ + simd_f32_16 simd; + simd.P = _mm512_and_ps(a.P, b.P); + + return simd; +} + +inline +simd_f32_4 operator|(simd_f32_4 a, simd_f32_4 b) +{ + simd_f32_4 simd; + simd.P = _mm_or_ps(a.P, b.P); + + return simd; +} + +inline +simd_f32_8 operator|(simd_f32_8 a, simd_f32_8 b) +{ + simd_f32_8 simd; + simd.P = _mm256_or_ps(a.P, b.P); + + return simd; +} + +inline +simd_f32_16 operator|(simd_f32_16 a, simd_f32_16 b) +{ + simd_f32_16 simd; + simd.P = _mm512_or_ps(a.P, b.P); + + return simd; +} + +inline +simd_f32_4& operator&=(simd_f32_4 &a, simd_f32_4 b) +{ + a = a & b; + + return a; +} + +inline +simd_f32_8& operator&=(simd_f32_8 &a, simd_f32_8 b) +{ + a = a & b; + + return a; +} + +inline +simd_f32_16& operator&=(simd_f32_16 &a, simd_f32_16 b) +{ + a = a & b; + + return a; +} + +inline +simd_f32_4& operator|=(simd_f32_4 &a, simd_f32_4 b) +{ + a = a | b; + + return a; +} + +inline +simd_f32_8& operator|=(simd_f32_8 &a, simd_f32_8 b) +{ + a = a | b; + + return a; +} + +inline +simd_f32_16& operator|=(simd_f32_16 &a, simd_f32_16 b) +{ + a = a | b; + + return a; +} + +inline +simd_f32_4 abs(simd_f32_4 a) +{ + unsigned int unsigned_mask = (unsigned int) (1 << 31); + __m128 mask = _mm_set1_ps(*(float *) &unsigned_mask); + + simd_f32_4 simd; + simd.P = _mm_and_ps(a.P, mask); + + return simd; +} + +inline +simd_f32_8 abs(simd_f32_8 a) +{ + unsigned int unsigned_mask = (unsigned int) (1 << 31); + __m256 mask = _mm256_set1_ps(*(float *) &unsigned_mask); + + simd_f32_8 simd; + simd.P = _mm256_and_ps(a.P, mask); + + return simd; +} + +inline +simd_f32_16 abs(simd_f32_16 a) +{ + unsigned int unsigned_mask = (unsigned int) (1 << 31); + __m512 mask = _mm512_set1_ps(*(float *) &unsigned_mask); + + simd_f32_16 simd; + simd.P = _mm512_and_ps(a.P, mask); + + return simd; +} + +inline +simd_f32_4 min(simd_f32_4 a, simd_f32_4 b) +{ + simd_f32_4 simd; + simd.P = _mm_min_ps(a.P, b.P); + + return simd; +} + +inline +simd_f32_8 min(simd_f32_8 a, simd_f32_8 b) +{ + simd_f32_8 simd; + simd.P = _mm256_min_ps(a.P, b.P); + + return simd; +} + +inline +simd_f32_16 min(simd_f32_16 a, simd_f32_16 b) +{ + simd_f32_16 simd; + simd.P = _mm512_min_ps(a.P, b.P); + + return simd; +} + +inline +simd_f32_4 max(simd_f32_4 a, simd_f32_4 b) +{ + simd_f32_4 simd; + simd.P = _mm_max_ps(a.P, b.P); + + return simd; +} + +inline +simd_f32_8 max(simd_f32_8 a, simd_f32_8 b) +{ + simd_f32_8 simd; + simd.P = _mm256_max_ps(a.P, b.P); + + return simd; +} + +inline +simd_f32_16 max(simd_f32_16 a, simd_f32_16 b) +{ + simd_f32_16 simd; + simd.P = _mm512_max_ps(a.P, b.P); + + return simd; +} + +inline +simd_f32_4 sign(simd_f32_4 a) +{ + unsigned int umask = (unsigned int) (1 << 31); + __m128 mask = _mm_set1_ps(*(float *) &umask); + + simd_f32_4 signBit; + signBit.P = _mm_and_ps(a.P, mask); + + simd_f32_4 b; + b.P = _mm_set1_ps(1.0f); + + simd_f32_4 simd = b | signBit; + + return simd; +} + +inline +simd_f32_8 sign(simd_f32_8 a) +{ + unsigned int umask = (unsigned int) (1 << 31); + __m256 mask = _mm256_set1_ps(*(float *) &umask); + + simd_f32_8 signBit; + signBit.P = _mm256_and_ps(a.P, mask); + + simd_f32_8 b; + b.P = _mm256_set1_ps(1.0f); + + simd_f32_8 simd = b | signBit; + + return simd; +} + +inline +simd_f32_16 sign(simd_f32_16 a) +{ + unsigned int umask = (unsigned int) (1 << 31); + __m512 mask = _mm512_set1_ps(*(float *) &umask); + + simd_f32_16 signBit; + signBit.P = _mm512_and_ps(a.P, mask); + + simd_f32_16 b; + b.P = _mm512_set1_ps(1.0f); + + simd_f32_16 simd = b | signBit; + + return simd; +} + +// sqrt +// approxinvsquareroot +// approx1over +// clamp +// floor +// ceil +// anytrue +// alltrue +// anyfalse +// allfalse + +struct v3_simd_f32_4 { + union { + struct { + union { + simd_f32_4 x; + simd_f32_4 r; + }; + union { + simd_f32_4 y; + simd_f32_4 g; + }; + union { + simd_f32_4 z; + simd_f32_4 b; + }; + }; + + simd_f32_4 v[3]; + }; +}; + +struct v3_simd_f32_8 { + union { + struct { + union { + simd_f32_8 x; + simd_f32_8 r; + }; + union { + simd_f32_8 y; + simd_f32_8 g; + }; + union { + simd_f32_8 z; + simd_f32_8 b; + }; + }; + + simd_f32_8 v[3]; + }; +}; + +struct v3_simd_f32_16 { + union { + struct { + union { + simd_f32_16 x; + simd_f32_16 r; + }; + union { + simd_f32_16 y; + simd_f32_16 g; + }; + union { + simd_f32_16 z; + simd_f32_16 b; + }; + }; + + simd_f32_16 v[3]; + }; +}; + +struct v4_simd_f32_4 { + union { + struct { + union { + simd_f32_4 x; + simd_f32_4 r; + }; + union { + simd_f32_4 y; + simd_f32_4 g; + }; + union { + simd_f32_4 z; + simd_f32_4 b; + }; + union { + simd_f32_4 w; + simd_f32_4 a; + }; + }; + + simd_f32_4 v[4]; + }; +}; + +struct v4_simd_f32_8 { + union { + struct { + union { + simd_f32_8 x; + simd_f32_8 r; + }; + union { + simd_f32_8 y; + simd_f32_8 g; + }; + union { + simd_f32_8 z; + simd_f32_8 b; + }; + union { + simd_f32_8 w; + simd_f32_8 a; + }; + }; + + simd_f32_8 v[4]; + }; +}; + +struct v4_simd_f32_16 { + union { + struct { + union { + simd_f32_16 x; + simd_f32_16 r; + }; + union { + simd_f32_16 y; + simd_f32_16 g; + }; + union { + simd_f32_16 z; + simd_f32_16 b; + }; + union { + simd_f32_16 w; + simd_f32_16 a; + }; + }; + + simd_f32_16 v[4]; + }; +}; + +#endif diff --git a/Math/Matrix/VectorInt32.h b/Math/Matrix/VectorInt32.h index e69de29..12cb085 100644 --- a/Math/Matrix/VectorInt32.h +++ b/Math/Matrix/VectorInt32.h @@ -0,0 +1,176 @@ +/** + * Karaka + * + * @package Stdlib + * @copyright Dennis Eichhorn + * @license OMS License 1.0 + * @version 1.0.0 + * @link https://jingga.app + */ +#ifndef MATH_MATRIX_VECTORFLOAT32_H +#define MATH_MATRIX_VECTORFLOAT32_H + +#include "Types.h" +#include +#include + +struct simd_int32_4 { + union { + __m128i P; + int32 v[4]; + }; +}; + +struct simd_int32_8 { + union { + __m256i P; + int32 v[8]; + }; +}; + +struct simd_int32_16 { + union { + __m512i P; + int32 v[16]; + }; +}; + +struct v3_simd_int32_4 { + union { + struct { + union { + simd_int32_4 x; + simd_int32_4 r; + }; + union { + simd_int32_4 y; + simd_int32_4 g; + }; + union { + simd_int32_4 z; + simd_int32_4 b; + }; + }; + + simd_int32_4 v[3]; + }; +}; + +struct v3_simd_int32_8 { + union { + struct { + union { + simd_int32_8 x; + simd_int32_8 r; + }; + union { + simd_int32_8 y; + simd_int32_8 g; + }; + union { + simd_int32_8 z; + simd_int32_8 b; + }; + }; + + simd_int32_8 v[3]; + }; +}; + +struct v3_simd_int32_16 { + union { + struct { + union { + simd_int32_16 x; + simd_int32_16 r; + }; + union { + simd_int32_16 y; + simd_int32_16 g; + }; + union { + simd_int32_16 z; + simd_int32_16 b; + }; + }; + + simd_int32_16 v[3]; + }; +}; + +struct v4_simd_int32_4 { + union { + struct { + union { + simd_int32_4 x; + simd_int32_4 r; + }; + union { + simd_int32_4 y; + simd_int32_4 g; + }; + union { + simd_int32_4 z; + simd_int32_4 b; + }; + union { + simd_int32_4 w; + simd_int32_4 a; + }; + }; + + simd_int32_4 v[4]; + }; +}; + +struct v4_simd_int32_8 { + union { + struct { + union { + simd_int32_8 x; + simd_int32_8 r; + }; + union { + simd_int32_8 y; + simd_int32_8 g; + }; + union { + simd_int32_8 z; + simd_int32_8 b; + }; + union { + simd_int32_8 w; + simd_int32_8 a; + }; + }; + + simd_int32_8 v[4]; + }; +}; + +struct v4_simd_int32_16 { + union { + struct { + union { + simd_int32_16 x; + simd_int32_16 r; + }; + union { + simd_int32_16 y; + simd_int32_16 g; + }; + union { + simd_int32_16 z; + simd_int32_16 b; + }; + union { + simd_int32_16 w; + simd_int32_16 a; + }; + }; + + simd_int32_16 v[4]; + }; +}; + +#endif diff --git a/Math/Matrix/VectorInt64.h b/Math/Matrix/VectorInt64.h index c1c8b2f..a861de0 100644 --- a/Math/Matrix/VectorInt64.h +++ b/Math/Matrix/VectorInt64.h @@ -1,186 +1,176 @@ -// Remarks: sizes for the second matrix/vector are often implied by the first parameter and the rules for matrix/vector multiplication. +/** + * Karaka + * + * @package Stdlib + * @copyright Dennis Eichhorn + * @license OMS License 1.0 + * @version 1.0.0 + * @link https://jingga.app + */ +#ifndef MATH_MATRIX_VECTORFLOAT32_H +#define MATH_MATRIX_VECTORFLOAT32_H -///////////////////////////////// -// Multiplication -///////////////////////////////// +#include "Types.h" +#include +#include -// Array vector multiplication -///////////////////////////////// -// mult_vec_int32(int64_t *a, size_t a, int32_t *b) -// mult_vec_int64(int64_t *a, size_t a, int64_t *b) -// mult_vec_float(int64_t *a, size_t a, float *b) +struct simd_int64_2 { + union { + __m128i P; + int64 v[2]; + }; +}; -// mult_scal_int32(int64_t *a, size_t a, int32_t b) -// mult_scal_int64(int64_t *a, size_t a, int64_t b) -// mult_scal_float(int64_t *a, size_t a, float b) +struct simd_int64_4 { + union { + __m256i P; + int64 v[4]; + }; +}; -// Vector 2 vector multiplication -///////////////////////////////// -// v2_mult_vec_int32(v2 *a, int32_t *b) -// v2_mult_vec_int64(v2 *a, int64_t *b) -// v2_mult_vec_float(v2 *a, float *b) -// v2_mult_vec_v2(v2 *a, v2 *b) +struct simd_int64_8 { + union { + __m512i P; + int64 v[8]; + }; +}; -// v2_mult_scal_int32(v2 *a, int32_t b) -// v2_mult_scal_int64(v2 *a, int64_t b) -// v2_mult_scal_float(v2 *a, float b) +struct v3_simd_int64_2 { + union { + struct { + union { + simd_int64_2 x; + simd_int64_2 r; + }; + union { + simd_int64_2 y; + simd_int64_2 g; + }; + union { + simd_int64_2 z; + simd_int64_2 b; + }; + }; -// Vector 3 vector multiplication -///////////////////////////////// -// v3_mult_vec_int32(v3 *a, int32_t *b) -// v3_mult_vec_int64(v3 *a, int64_t *b) -// v3_mult_vec_float(v3 *a, float *b) -// v3_mult_vec_v3(v3 *a, v3 *b) + simd_int64_2 v[3]; + }; +}; -// v3_mult_scal_int32(v3 *a, int32_t b) -// v3_mult_scal_int64(v3 *a, int64_t b) -// v3_mult_scal_float(v3 *a, float b) +struct v3_simd_int64_4 { + union { + struct { + union { + simd_int64_4 x; + simd_int64_4 r; + }; + union { + simd_int64_4 y; + simd_int64_4 g; + }; + union { + simd_int64_4 z; + simd_int64_4 b; + }; + }; -// Vector 4 vector multiplication -///////////////////////////////// -// v4_mult_vec_int32(v4 *a, int32_t *b) -// v4_mult_vec_int64(v4 *a, int64_t *b) -// v4_mult_vec_float(v4 *a, float *b) -// v4_mult_vec_v4(v4 *a, v4 *b) + simd_int64_4 v[3]; + }; +}; -// v4_mult_scal_int32(v4 *a, int32_t b) -// v4_mult_scal_int64(v4 *a, int64_t b) -// v4_mult_scal_float(v4 *a, float b) +struct v3_simd_int64_8 { + union { + struct { + union { + simd_int64_8 x; + simd_int64_8 r; + }; + union { + simd_int64_8 y; + simd_int64_8 g; + }; + union { + simd_int64_8 z; + simd_int64_8 b; + }; + }; -///////////////////////////////// -// Addition -///////////////////////////////// + simd_int64_8 v[3]; + }; +}; -// Array vector addition -///////////////////////////////// -// add_vec_int32(int64_t *a, size_t a, int32_t *b) -// add_vec_int64(int64_t *a, size_t a, int64_t *b) -// add_vec_float(int64_t *a, size_t a, float *b) +struct v4_simd_int64_2 { + union { + struct { + union { + simd_int64_2 x; + simd_int64_2 r; + }; + union { + simd_int64_2 y; + simd_int64_2 g; + }; + union { + simd_int64_2 z; + simd_int64_2 b; + }; + union { + simd_int64_2 w; + simd_int64_2 a; + }; + }; -// add_scal_int32(int64_t *a, size_t a, int32_t b) -// add_scal_int64(int64_t *a, size_t a, int64_t b) -// add_scal_float(int64_t *a, size_t a, float b) + simd_int64_2 v[4]; + }; +}; -// Vector 2 vector addition -///////////////////////////////// -// v2_add_vec_int32(v2 *a, int32_t *b) -// v2_add_vec_int64(v2 *a, int64_t *b) -// v2_add_vec_float(v2 *a, float *b) -// v2_add_vec_v2(v2 *a, v2 *b) +struct v4_simd_int64_4 { + union { + struct { + union { + simd_int64_4 x; + simd_int64_4 r; + }; + union { + simd_int64_4 y; + simd_int64_4 g; + }; + union { + simd_int64_4 z; + simd_int64_4 b; + }; + union { + simd_int64_4 w; + simd_int64_4 a; + }; + }; -// v2_add_scal_int32(v2 *a, int32_t b) -// v2_add_scal_int64(v2 *a, int64_t b) -// v2_add_scal_float(v2 *a, float b) + simd_int64_4 v[4]; + }; +}; -// Vector 3 vector addition -///////////////////////////////// -// v3_add_vec_int32(v3 *a, int32_t *b) -// v3_add_vec_int64(v3 *a, int64_t *b) -// v3_add_vec_float(v3 *a, float *b) -// v3_add_vec_v3(v3 *a, v3 *b) +struct v4_simd_int64_8 { + union { + struct { + union { + simd_int64_8 x; + simd_int64_8 r; + }; + union { + simd_int64_8 y; + simd_int64_8 g; + }; + union { + simd_int64_8 z; + simd_int64_8 b; + }; + union { + simd_int64_8 w; + simd_int64_8 a; + }; + }; -// v3_add_scal_int32(v3 *a, int32_t b) -// v3_add_scal_int64(v3 *a, int64_t b) -// v3_add_scal_float(v3 *a, float b) + simd_int64_8 v[4]; + }; +}; -// Vector 4 vector addition -///////////////////////////////// -// v4_add_vec_int32(v4 *a, int32_t *b) -// v4_add_vec_int64(v4 *a, int64_t *b) -// v4_add_vec_float(v4 *a, float *b) -// v4_add_vec_v4(v4 *a, v4 *b) - -// v4_add_scal_int32(v4 *a, int32_t b) -// v4_add_scal_int64(v4 *a, int64_t b) -// v4_add_scal_float(v4 *a, float b) - -///////////////////////////////// -// Subtraction -///////////////////////////////// - -// Array vector subtraction -///////////////////////////////// -// sub_vec_int32(int64_t *a, size_t a, int32_t *b) -// sub_vec_int64(int64_t *a, size_t a, int64_t *b) -// sub_vec_float(int64_t *a, size_t a, float *b) - -// sub_scal_int32(int64_t *a, size_t a, int32_t b) -// sub_scal_int64(int64_t *a, size_t a, int64_t b) -// sub_scal_float(int64_t *a, size_t a, float b) - -// Vector 2 vector subtraction -///////////////////////////////// -// v2_sub_vec_int32(v2 *a, int32_t *b) -// v2_sub_vec_int64(v2 *a, int64_t *b) -// v2_sub_vec_float(v2 *a, float *b) -// v2_sub_vec_v2(v2 *a, v2 *b) - -// v2_sub_scal_int32(v2 *a, int32_t b) -// v2_sub_scal_int64(v2 *a, int64_t b) -// v2_sub_scal_float(v2 *a, float b) - -// Vector 3 vector subtraction -///////////////////////////////// -// v3_sub_vec_int32(v3 *a, int32_t *b) -// v3_sub_vec_int64(v3 *a, int64_t *b) -// v3_sub_vec_float(v3 *a, float *b) -// v3_sub_vec_v3(v3 *a, v3 *b) - -// v3_sub_scal_int32(v3 *a, int32_t b) -// v3_sub_scal_int64(v3 *a, int64_t b) -// v3_sub_scal_float(v3 *a, float b) - -// Vector 4 vector subtraction -///////////////////////////////// -// v4_sub_vec_int32(v4 *a, int32_t *b) -// v4_sub_vec_int64(v4 *a, int64_t *b) -// v4_sub_vec_float(v4 *a, float *b) -// v4_sub_vec_v4(v4 *a, v4 *b) - -// v4_sub_scal_int32(v4 *a, int32_t b) -// v4_sub_scal_int64(v4 *a, int64_t b) -// v4_sub_scal_float(v4 *a, float b) - -///////////////////////////////// -// Other -///////////////////////////////// - -// Cross product -///////////////////////////////// -// cross_int32(int64_t *a, size_t a, int32_t *b) -// cross_int64(int64_t *a, size_t a, int64_t *b) -// cross_float(int64_t *a, size_t a, float *b) - -// v2_cross_v2(v2 *a, v2 *b) -// v3_cross_v3(v3 *a, v3 *b) -// v4_cross_v4(v4 *a, v4 *b) - -// Dot product -///////////////////////////////// -// dot_int32(int64_t *a, size_t a, int32_t *b) -// dot_int64(int64_t *a, size_t a, int64_t *b) -// dot_float(int64_t *a, size_t a, float *b) - -// v2_dot_v2(v2 *a, v2 *b) -// v3_dot_v3(v3 *a, v3 *b) -// v4_dot_v4(v4 *a, v4 *b) - -// Angle -///////////////////////////////// -// angle_int32(int64_t *a, size_t a, int32_t *b) -// angle_int64(int64_t *a, size_t a, int64_t *b) -// angle_float(int64_t *a, size_t a, float *b) - -// v2_angle_v2(v2 *a, v2 *b) -// v3_angle_v3(v3 *a, v3 *b) -// v4_angle_v4(v4 *a, v4 *b) - -// Cosine -///////////////////////////////// -// cosine_int32(int64_t *a, size_t a, int32_t *b) -// cosine_int64(int64_t *a, size_t a, int64_t *b) -// cosine_float(int64_t *a, size_t a, float *b) - -// v2_cosine_v2(v2 *a, v2 *b) -// v3_cosine_v3(v3 *a, v3 *b) -// v4_cosine_v4(v4 *a, v4 *b) \ No newline at end of file +#endif diff --git a/Types.h b/Types.h new file mode 100644 index 0000000..402a29d --- /dev/null +++ b/Types.h @@ -0,0 +1,24 @@ +/** + * Karaka + * + * @package Stdlib + * @copyright Dennis Eichhorn + * @license OMS License 1.0 + * @version 1.0.0 + * @link https://jingga.app + */ +#ifndef TYPES_H +#define TYPES_H + +#include +#include + +typedef int8_t int8; +typedef int16_t int16; +typedef int32_t int32; +typedef int64_t int64; + +typedef float f32; +typedef double f64; + +#endif