From 5615f3899bd5298894f141b1f86e5465a1152d84 Mon Sep 17 00:00:00 2001 From: Dennis Eichhorn Date: Tue, 12 Dec 2023 19:46:16 +0000 Subject: [PATCH] doodling around --- Math/Matrix/MatrixFloat32.h | 0 Math/Matrix/MatrixInt32.h | 0 Math/Matrix/MatrixInt64.h | 203 ++++++++++++++++++++++++++++++++++++ Math/Matrix/VectorFloat32.h | 0 Math/Matrix/VectorInt32.h | 0 Math/Matrix/VectorInt64.h | 186 +++++++++++++++++++++++++++++++++ Stdlib/Mathtypes.h | 106 +++++++++++++++++++ Utils/ColorUtils.h | 10 +- Utils/Intrinsics.h | 97 +++++++++++++++++ 9 files changed, 597 insertions(+), 5 deletions(-) create mode 100644 Math/Matrix/MatrixFloat32.h create mode 100644 Math/Matrix/MatrixInt32.h create mode 100644 Math/Matrix/MatrixInt64.h create mode 100644 Math/Matrix/VectorFloat32.h create mode 100644 Math/Matrix/VectorInt32.h create mode 100644 Math/Matrix/VectorInt64.h create mode 100644 Stdlib/Mathtypes.h create mode 100644 Utils/Intrinsics.h diff --git a/Math/Matrix/MatrixFloat32.h b/Math/Matrix/MatrixFloat32.h new file mode 100644 index 0000000..e69de29 diff --git a/Math/Matrix/MatrixInt32.h b/Math/Matrix/MatrixInt32.h new file mode 100644 index 0000000..e69de29 diff --git a/Math/Matrix/MatrixInt64.h b/Math/Matrix/MatrixInt64.h new file mode 100644 index 0000000..156d5d6 --- /dev/null +++ b/Math/Matrix/MatrixInt64.h @@ -0,0 +1,203 @@ +// Remarks: sizes for the second matrix/vector are often implied by the first parameter and the rules for matrix/vector multiplication. + +// First element is always a matrix of int64_t +///////////////////////////////// + +///////////////////////////////// +// Multiplication +///////////////////////////////// + +// Array matrix multiplication +///////////////////////////////// +// mult_mat_int32(int64_t **a, size_t ai, size_t aj, int32_t **b) +// mult_mat_int64(int64_t **a, size_t ai, size_t aj, int64_t **b) +// mult_mat_float(int64_t **a, size_t ai, size_t aj, float **b) + +// mult_vec_int32(int64_t **a, size_t ai, size_t aj, int32_t *b) +// mult_vec_int64(int64_t **a, size_t ai, size_t aj, int64_t *b) +// mult_vec_float(int64_t **a, size_t ai, size_t aj, float *b) + +// mult_scal_int32(int64_t **a, size_t ai, size_t aj, int32_t b) +// mult_scal_int64(int64_t **a, size_t ai, size_t aj, int64_t b) +// mult_scal_float(int64_t **a, size_t ai, size_t aj, float b) + +// Modifies the original matrix +// vector 2 matrix multiplication +///////////////////////////////// +// v2_mult_mat_int32(v2 *a, size_t a, int32_t **b) +// v2_mult_mat_int64(v2 *a, size_t a, int64_t **b) +// v2_mult_mat_float(v2 *a, size_t a, float **b) +// v2_mult_mat_v2(v2 *a, size_t a, v2 *b) + +// v2_mult_vec_int32(v2 *a, size_t a, int32_t *b) +// v2_mult_vec_int64(v2 *a, size_t a, int64_t *b) +// v2_mult_vec_float(v2 *a, size_t a, float *b) +// v2_mult_vec_v2(v2 *a, size_t a, v2 *b) + +// v2_mult_scal_int32(v2 *a, size_t a, int32_t b) +// v2_mult_scal_int64(v2 *a, size_t a, int64_t b) +// v2_mult_scal_float(v2 *a, size_t a, float b) + +// vector 3 matrix multiplication +///////////////////////////////// +// v3_mult_mat_int32(v3 *a, size_t a, int32_t **b) +// v3_mult_mat_int64(v3 *a, size_t a, int64_t **b) +// v3_mult_mat_float(v3 *a, size_t a, float **b) +// v3_mult_mat_v3(v3 *a, size_t a, v3 *b) + +// v3_mult_vec_int32(v3 *a, size_t a, int32_t *b) +// v3_mult_vec_int64(v3 *a, size_t a, int64_t *b) +// v3_mult_vec_float(v3 *a, size_t a, float *b) +// v3_mult_vec_v3(v3 *a, size_t a, v3 *b) + +// v3_mult_scal_int32(v3 *a, size_t a, int32_t b) +// v3_mult_scal_int64(v3 *a, size_t a, int64_t b) +// v3_mult_scal_float(v3 *a, size_t a, float b) + +// vector 4 matrix multiplication +///////////////////////////////// +// v4_mult_mat_int32(v4 *a, size_t a, int32_t **b) +// v4_mult_mat_int64(v4 *a, size_t a, int64_t **b) +// v4_mult_mat_float(v4 *a, size_t a, float **b) +// v4_mult_mat_v4(v4 *a, size_t a, v4 *b) + +// v4_mult_vec_int32(v4 *a, size_t a, int32_t *b) +// v4_mult_vec_int64(v4 *a, size_t a, int64_t *b) +// v4_mult_vec_float(v4 *a, size_t a, float *b) +// v4_mult_vec_v4(v4 *a, size_t a, v4 *b) + +// v4_mult_scal_int32(v4 *a, size_t a, int32_t b) +// v4_mult_scal_int64(v4 *a, size_t a, int64_t b) +// v4_mult_scal_float(v4 *a, size_t a, float b) + +///////////////////////////////// +// Addition +///////////////////////////////// + +// Array matrix addition +///////////////////////////////// +// add_mat_int32(int64_t **a, size_t ai, size_t aj, int32_t **b) +// add_mat_int64(int64_t **a, size_t ai, size_t aj, int64_t **b) +// add_mat_float(int64_t **a, size_t ai, size_t aj, float **b) + +// add_vec_int32(int64_t **a, size_t ai, size_t aj, int32_t *b) +// add_vec_int64(int64_t **a, size_t ai, size_t aj, int64_t *b) +// add_vec_float(int64_t **a, size_t ai, size_t aj, float *b) + +// add_scal_int32(int64_t **a, size_t ai, size_t aj, int32_t b) +// add_scal_int64(int64_t **a, size_t ai, size_t aj, int64_t b) +// add_scal_float(int64_t **a, size_t ai, size_t aj, float b) + +// vector 2 matrix addition +///////////////////////////////// +// v2_add_mat_int32(v2 *a, size_t a, int32_t **b) +// v2_add_mat_int64(v2 *a, size_t a, int64_t **b) +// v2_add_mat_float(v2 *a, size_t a, float **b) +// v2_add_mat_v2(v2 *a, size_t a, v2 *b) + +// v2_add_vec_int32(v2 *a, size_t a, int32_t *b) +// v2_add_vec_int64(v2 *a, size_t a, int64_t *b) +// v2_add_vec_float(v2 *a, size_t a, float *b) +// v2_add_vec_v2(v2 *a, size_t a, v2 *b) + +// v2_add_scal_int32(v2 *a, size_t a, int32_t b) +// v2_add_scal_int64(v2 *a, size_t a, int64_t b) +// v2_add_scal_float(v2 *a, size_t a, float b) + +// vector 3 matrix addition +///////////////////////////////// +// v3_add_mat_int32(v3 *a, size_t a, int32_t **b) +// v3_add_mat_int64(v3 *a, size_t a, int64_t **b) +// v3_add_mat_float(v3 *a, size_t a, float **b) +// v3_add_mat_v3(v3 *a, size_t a, v3 *b) + +// v3_add_vec_int32(v3 *a, size_t a, int32_t *b) +// v3_add_vec_int64(v3 *a, size_t a, int64_t *b) +// v3_add_vec_float(v3 *a, size_t a, float *b) +// v3_add_vec_v3(v3 *a, size_t a, v3 *b) + +// v3_add_scal_int32(v3 *a, size_t a, int32_t b) +// v3_add_scal_int64(v3 *a, size_t a, int64_t b) +// v3_add_scal_float(v3 *a, size_t a, float b) + +// vector 4 matrix addition +///////////////////////////////// +// v4_add_mat_int32(v4 *a, size_t a, int32_t **b) +// v4_add_mat_int64(v4 *a, size_t a, int64_t **b) +// v4_add_mat_float(v4 *a, size_t a, float **b) +// v4_add_mat_v4(v4 *a, size_t a, v4 *b) + +// v4_add_vec_int32(v4 *a, size_t a, int32_t *b) +// v4_add_vec_int64(v4 *a, size_t a, int64_t *b) +// v4_add_vec_float(v4 *a, size_t a, float *b) +// v4_add_vec_v4(v4 *a, size_t a, v4 *b) + +// v4_add_scal_int32(v4 *a, size_t a, int32_t b) +// v4_add_scal_int64(v4 *a, size_t a, int64_t b) +// v4_add_scal_float(v4 *a, size_t a, float b) + +///////////////////////////////// +// Subtraction +///////////////////////////////// + +// Array matrix subtraction +///////////////////////////////// +// sub_mat_int32(int64_t **a, size_t ai, size_t aj, int32_t **b) +// sub_mat_int64(int64_t **a, size_t ai, size_t aj, int64_t **b) +// sub_mat_float(int64_t **a, size_t ai, size_t aj, float **b) + +// sub_vec_int32(int64_t **a, size_t ai, size_t aj, int32_t *b) +// sub_vec_int64(int64_t **a, size_t ai, size_t aj, int64_t *b) +// sub_vec_float(int64_t **a, size_t ai, size_t aj, float *b) + +// sub_scal_int32(int64_t **a, size_t ai, size_t aj, int32_t b) +// sub_scal_int64(int64_t **a, size_t ai, size_t aj, int64_t b) +// sub_scal_float(int64_t **a, size_t ai, size_t aj, float b) + +// vector 2 matrix subtraction +///////////////////////////////// +// v2_sub_mat_int32(v2 *a, size_t a, int32_t **b) +// v2_sub_mat_int64(v2 *a, size_t a, int64_t **b) +// v2_sub_mat_float(v2 *a, size_t a, float **b) +// v2_sub_mat_v2(v2 *a, size_t a, v2 *b) + +// v2_sub_vec_int32(v2 *a, size_t a, int32_t *b) +// v2_sub_vec_int64(v2 *a, size_t a, int64_t *b) +// v2_sub_vec_float(v2 *a, size_t a, float *b) +// v2_sub_vec_v2(v2 *a, size_t a, v2 *b) + +// v2_sub_scal_int32(v2 *a, size_t a, int32_t b) +// v2_sub_scal_int64(v2 *a, size_t a, int64_t b) +// v2_sub_scal_float(v2 *a, size_t a, float b) + +// vector 3 matrix subtraction +///////////////////////////////// +// v3_sub_mat_int32(v3 *a, size_t a, int32_t **b) +// v3_sub_mat_int64(v3 *a, size_t a, int64_t **b) +// v3_sub_mat_float(v3 *a, size_t a, float **b) +// v3_sub_mat_v3(v3 *a, size_t a, v3 *b) + +// v3_sub_vec_int32(v3 *a, size_t a, int32_t *b) +// v3_sub_vec_int64(v3 *a, size_t a, int64_t *b) +// v3_sub_vec_float(v3 *a, size_t a, float *b) +// v3_sub_vec_v3(v3 *a, size_t a, v3 *b) + +// v3_sub_scal_int32(v3 *a, size_t a, int32_t b) +// v3_sub_scal_int64(v3 *a, size_t a, int64_t b) +// v3_sub_scal_float(v3 *a, size_t a, float b) + +// vector 4 matrix subtraction +///////////////////////////////// +// v4_sub_mat_int32(v4 *a, size_t a, int32_t **b) +// v4_sub_mat_int64(v4 *a, size_t a, int64_t **b) +// v4_sub_mat_float(v4 *a, size_t a, float **b) +// v4_sub_mat_v4(v4 *a, size_t a, v4 *b) + +// v4_sub_vec_int32(v4 *a, size_t a, int32_t *b) +// v4_sub_vec_int64(v4 *a, size_t a, int64_t *b) +// v4_sub_vec_float(v4 *a, size_t a, float *b) +// v4_sub_vec_v4(v4 *a, size_t a, v4 *b) + +// v4_sub_scal_int32(v4 *a, size_t a, int32_t b) +// v4_sub_scal_int64(v4 *a, size_t a, int64_t b) +// v4_sub_scal_float(v4 *a, size_t a, float b) diff --git a/Math/Matrix/VectorFloat32.h b/Math/Matrix/VectorFloat32.h new file mode 100644 index 0000000..e69de29 diff --git a/Math/Matrix/VectorInt32.h b/Math/Matrix/VectorInt32.h new file mode 100644 index 0000000..e69de29 diff --git a/Math/Matrix/VectorInt64.h b/Math/Matrix/VectorInt64.h new file mode 100644 index 0000000..c1c8b2f --- /dev/null +++ b/Math/Matrix/VectorInt64.h @@ -0,0 +1,186 @@ +// Remarks: sizes for the second matrix/vector are often implied by the first parameter and the rules for matrix/vector multiplication. + +///////////////////////////////// +// Multiplication +///////////////////////////////// + +// Array vector multiplication +///////////////////////////////// +// mult_vec_int32(int64_t *a, size_t a, int32_t *b) +// mult_vec_int64(int64_t *a, size_t a, int64_t *b) +// mult_vec_float(int64_t *a, size_t a, float *b) + +// mult_scal_int32(int64_t *a, size_t a, int32_t b) +// mult_scal_int64(int64_t *a, size_t a, int64_t b) +// mult_scal_float(int64_t *a, size_t a, float b) + +// Vector 2 vector multiplication +///////////////////////////////// +// v2_mult_vec_int32(v2 *a, int32_t *b) +// v2_mult_vec_int64(v2 *a, int64_t *b) +// v2_mult_vec_float(v2 *a, float *b) +// v2_mult_vec_v2(v2 *a, v2 *b) + +// v2_mult_scal_int32(v2 *a, int32_t b) +// v2_mult_scal_int64(v2 *a, int64_t b) +// v2_mult_scal_float(v2 *a, float b) + +// Vector 3 vector multiplication +///////////////////////////////// +// v3_mult_vec_int32(v3 *a, int32_t *b) +// v3_mult_vec_int64(v3 *a, int64_t *b) +// v3_mult_vec_float(v3 *a, float *b) +// v3_mult_vec_v3(v3 *a, v3 *b) + +// v3_mult_scal_int32(v3 *a, int32_t b) +// v3_mult_scal_int64(v3 *a, int64_t b) +// v3_mult_scal_float(v3 *a, float b) + +// Vector 4 vector multiplication +///////////////////////////////// +// v4_mult_vec_int32(v4 *a, int32_t *b) +// v4_mult_vec_int64(v4 *a, int64_t *b) +// v4_mult_vec_float(v4 *a, float *b) +// v4_mult_vec_v4(v4 *a, v4 *b) + +// v4_mult_scal_int32(v4 *a, int32_t b) +// v4_mult_scal_int64(v4 *a, int64_t b) +// v4_mult_scal_float(v4 *a, float b) + +///////////////////////////////// +// Addition +///////////////////////////////// + +// Array vector addition +///////////////////////////////// +// add_vec_int32(int64_t *a, size_t a, int32_t *b) +// add_vec_int64(int64_t *a, size_t a, int64_t *b) +// add_vec_float(int64_t *a, size_t a, float *b) + +// add_scal_int32(int64_t *a, size_t a, int32_t b) +// add_scal_int64(int64_t *a, size_t a, int64_t b) +// add_scal_float(int64_t *a, size_t a, float b) + +// Vector 2 vector addition +///////////////////////////////// +// v2_add_vec_int32(v2 *a, int32_t *b) +// v2_add_vec_int64(v2 *a, int64_t *b) +// v2_add_vec_float(v2 *a, float *b) +// v2_add_vec_v2(v2 *a, v2 *b) + +// v2_add_scal_int32(v2 *a, int32_t b) +// v2_add_scal_int64(v2 *a, int64_t b) +// v2_add_scal_float(v2 *a, float b) + +// Vector 3 vector addition +///////////////////////////////// +// v3_add_vec_int32(v3 *a, int32_t *b) +// v3_add_vec_int64(v3 *a, int64_t *b) +// v3_add_vec_float(v3 *a, float *b) +// v3_add_vec_v3(v3 *a, v3 *b) + +// v3_add_scal_int32(v3 *a, int32_t b) +// v3_add_scal_int64(v3 *a, int64_t b) +// v3_add_scal_float(v3 *a, float b) + +// Vector 4 vector addition +///////////////////////////////// +// v4_add_vec_int32(v4 *a, int32_t *b) +// v4_add_vec_int64(v4 *a, int64_t *b) +// v4_add_vec_float(v4 *a, float *b) +// v4_add_vec_v4(v4 *a, v4 *b) + +// v4_add_scal_int32(v4 *a, int32_t b) +// v4_add_scal_int64(v4 *a, int64_t b) +// v4_add_scal_float(v4 *a, float b) + +///////////////////////////////// +// Subtraction +///////////////////////////////// + +// Array vector subtraction +///////////////////////////////// +// sub_vec_int32(int64_t *a, size_t a, int32_t *b) +// sub_vec_int64(int64_t *a, size_t a, int64_t *b) +// sub_vec_float(int64_t *a, size_t a, float *b) + +// sub_scal_int32(int64_t *a, size_t a, int32_t b) +// sub_scal_int64(int64_t *a, size_t a, int64_t b) +// sub_scal_float(int64_t *a, size_t a, float b) + +// Vector 2 vector subtraction +///////////////////////////////// +// v2_sub_vec_int32(v2 *a, int32_t *b) +// v2_sub_vec_int64(v2 *a, int64_t *b) +// v2_sub_vec_float(v2 *a, float *b) +// v2_sub_vec_v2(v2 *a, v2 *b) + +// v2_sub_scal_int32(v2 *a, int32_t b) +// v2_sub_scal_int64(v2 *a, int64_t b) +// v2_sub_scal_float(v2 *a, float b) + +// Vector 3 vector subtraction +///////////////////////////////// +// v3_sub_vec_int32(v3 *a, int32_t *b) +// v3_sub_vec_int64(v3 *a, int64_t *b) +// v3_sub_vec_float(v3 *a, float *b) +// v3_sub_vec_v3(v3 *a, v3 *b) + +// v3_sub_scal_int32(v3 *a, int32_t b) +// v3_sub_scal_int64(v3 *a, int64_t b) +// v3_sub_scal_float(v3 *a, float b) + +// Vector 4 vector subtraction +///////////////////////////////// +// v4_sub_vec_int32(v4 *a, int32_t *b) +// v4_sub_vec_int64(v4 *a, int64_t *b) +// v4_sub_vec_float(v4 *a, float *b) +// v4_sub_vec_v4(v4 *a, v4 *b) + +// v4_sub_scal_int32(v4 *a, int32_t b) +// v4_sub_scal_int64(v4 *a, int64_t b) +// v4_sub_scal_float(v4 *a, float b) + +///////////////////////////////// +// Other +///////////////////////////////// + +// Cross product +///////////////////////////////// +// cross_int32(int64_t *a, size_t a, int32_t *b) +// cross_int64(int64_t *a, size_t a, int64_t *b) +// cross_float(int64_t *a, size_t a, float *b) + +// v2_cross_v2(v2 *a, v2 *b) +// v3_cross_v3(v3 *a, v3 *b) +// v4_cross_v4(v4 *a, v4 *b) + +// Dot product +///////////////////////////////// +// dot_int32(int64_t *a, size_t a, int32_t *b) +// dot_int64(int64_t *a, size_t a, int64_t *b) +// dot_float(int64_t *a, size_t a, float *b) + +// v2_dot_v2(v2 *a, v2 *b) +// v3_dot_v3(v3 *a, v3 *b) +// v4_dot_v4(v4 *a, v4 *b) + +// Angle +///////////////////////////////// +// angle_int32(int64_t *a, size_t a, int32_t *b) +// angle_int64(int64_t *a, size_t a, int64_t *b) +// angle_float(int64_t *a, size_t a, float *b) + +// v2_angle_v2(v2 *a, v2 *b) +// v3_angle_v3(v3 *a, v3 *b) +// v4_angle_v4(v4 *a, v4 *b) + +// Cosine +///////////////////////////////// +// cosine_int32(int64_t *a, size_t a, int32_t *b) +// cosine_int64(int64_t *a, size_t a, int64_t *b) +// cosine_float(int64_t *a, size_t a, float *b) + +// v2_cosine_v2(v2 *a, v2 *b) +// v3_cosine_v3(v3 *a, v3 *b) +// v4_cosine_v4(v4 *a, v4 *b) \ No newline at end of file diff --git a/Stdlib/Mathtypes.h b/Stdlib/Mathtypes.h new file mode 100644 index 0000000..b5d7738 --- /dev/null +++ b/Stdlib/Mathtypes.h @@ -0,0 +1,106 @@ +/** + * Karaka + * + * @package Utils + * @copyright Dennis Eichhorn + * @license OMS License 1.0 + * @version 1.0.0 + * @link https://jingga.app + */ +#ifndef STDLIB_MATHTYPES_H +#define STDLIB_MATHTYPES_H + +#include +#include + +// int32_t vectors +typedef union { + struct { + int32_t x, y; + }; + + int32_t e[2]; +} i2; + +typedef union { + struct { + int32_t x, y, z; + }; + + int32_t e[3]; +} i3; + +typedef union { + struct { + int32_t x, y, z, w; + }; + + int32_t e[4]; +} i4; + +// int64_t vectors +typedef union { + struct { + int64_t x, y; + }; + + int64_t e[2]; +} l2; + +typedef union { + struct { + int64_t x, y, z; + }; + + int64_t e[3]; +} l3; + +typedef union { + struct { + int64_t x, y, z, w; + }; + + int64_t e[4]; +} l4; + +// float vectors +typedef union { + struct { + float x, y; + }; + + float e[2]; +} f2; + +typedef union { + struct { + float x, y, z; + }; + + float e[3]; +} f3; + +typedef union { + struct { + float x, y, z, w; + }; + + float e[4]; +} f4; + +typedef struct { + int32_t *e; + size_t m, n; +} mi32; + +typedef struct { + int64_t *e; + size_t m, n; +} mi64; + +typedef struct { + float *e; + size_t m, n; +} mf; + +#endif \ No newline at end of file diff --git a/Utils/ColorUtils.h b/Utils/ColorUtils.h index 8dbe74d..70ae286 100644 --- a/Utils/ColorUtils.h +++ b/Utils/ColorUtils.h @@ -7,8 +7,8 @@ * @version 1.0.0 * @link https://jingga.app */ -#ifndef UTILS_STRING_UTILS_H -#define UTILS_STRING_UTILS_H +#ifndef UTILS_COLOR_UTILS_H +#define UTILS_COLOR_UTILS_H #include #include @@ -16,9 +16,9 @@ namespace Utils::ColorUtils { typedef struct { - char r = 0; - char g = 0; - char b = 0; + unsigned char r = 0; + unsigned char g = 0; + unsigned char b = 0; } RGB; inline diff --git a/Utils/Intrinsics.h b/Utils/Intrinsics.h new file mode 100644 index 0000000..8367950 --- /dev/null +++ b/Utils/Intrinsics.h @@ -0,0 +1,97 @@ +/** + * Karaka + * + * @package Utils + * @copyright Dennis Eichhorn + * @license OMS License 1.0 + * @version 1.0.0 + * @link https://jingga.app + */ +#ifndef UTILS_INTRINSICS_H +#define UTILS_INTRINSICS_H + +#include +#include + +/* +MMX +Introduce eight 64 bit registers (MM0-MM7) and instructions to work with eight signed/unsigned bytes, four signed/unsigned words, two signed/unsigned dwords. + +3DNow! +Add support for single precision floating point operand to MMX. Few operation supported, for example addition, subtraction, multiplication. + +SSE +Introduce eight/sixteen 128 bit registers (XMM0-XMM7/15) and instruction to work with four single precision floating point operands. Add integer operations on MMX registers too. (The MMX-integer part of SSE is sometimes called MMXEXT, and was implemented on a few non-Intel CPUs without xmm registers and the floating point part of SSE.) + +SSE2 +Introduces instruction to work with 2 double precision floating point operands, and with packed byte/word/dword/qword integers in 128-bit xmm registers. + +SSE3 +Add a few varied instructions (mostly floating point), including a special kind of unaligned load (lddqu) that was better on Pentium 4, synchronization instruction, horizontal add/sub. + +SSSE3 +Again a varied set of instructions, mostly integer. The first shuffle that takes its control operand from a register instead of hard-coded (pshufb). More horizontal processing, shuffle, packing/unpacking, mul+add on bytes, and some specialized integer add/mul stuff. + +SSE4 (SSE4.1, SSE4.2) +Add a lot of instructions: Filling in a lot of the gaps by providing min and max and other operations for all integer data types (especially 32-bit integer had been lacking), where previously integer min was only available for unsigned bytes and signed 16-bit. Also scaling, FP rounding, blending, linear algebra operation, text processing, comparisons. Also a non temporal load for reading video memory, or copying it back to main memory. (Previously only NT stores were available.) + +AESNI +Add support for accelerating AES symmetric encryption/decryption. + +AVX Add eight/sixteen 256 bit registers (YMM0-YMM7/15). +Support all previous floating point datatype. Three operand instructions. + +FMA +Add Fused Multiply Add and correlated instructions. + +AVX2 +Add support for integer data types. + +AVX512F +Add eight/thirty-two 512 bit registers (ZMM0-ZMM7/31) and eight 64-bit mask register (k0-k7). Promote most previous instruction to 512 bit wide. Optional parts of AVX512 add instruction for exponentials & reciprocals (AVX512ER), scatter/gather prefetching (AVX512PF), scatter conflict detection (AVX512CD), compress, expand. + +IMCI (Intel Xeon Phi) +Early development of AVX512 for the first-gen Intel Xeon Phi (Knight's Corner) coprocessor. +*/ + +#ifndef __AVX__ + #define __AVX__ 0 +#endif + +#ifndef __AVX2__ + #define __AVX2__ 0 +#endif + +#ifndef __SSE__ + #define __SSE__ 0 +#endif + +#ifndef __SSE2__ + #define __SSE2__ 0 +#endif + +#ifndef __SSE2_MATH__ + #define __SSE2_MATH__ 0 +#endif + +#ifndef __SSE3__ + #define __SSE3__ 0 +#endif + +#ifndef __SSE4_1__ + #define __SSE4_1__ 0 +#endif + +#ifndef __SSE4_2__ + #define __SSE4_2__ 0 +#endif + +#ifndef __SSE_MATH__ + #define __SSE_MATH__ 0 +#endif + +#ifndef __SSSE3__ + #define __SSSE3__ 0 +#endif + +#endif \ No newline at end of file