doodling around

This commit is contained in:
Dennis Eichhorn 2023-12-12 19:46:16 +00:00
parent 33b480fbae
commit 5615f3899b
9 changed files with 597 additions and 5 deletions

View File

View File

203
Math/Matrix/MatrixInt64.h Normal file
View File

@ -0,0 +1,203 @@
// Remarks: sizes for the second matrix/vector are often implied by the first parameter and the rules for matrix/vector multiplication.
// First element is always a matrix of int64_t
/////////////////////////////////
/////////////////////////////////
// Multiplication
/////////////////////////////////
// Array matrix multiplication
/////////////////////////////////
// mult_mat_int32(int64_t **a, size_t ai, size_t aj, int32_t **b)
// mult_mat_int64(int64_t **a, size_t ai, size_t aj, int64_t **b)
// mult_mat_float(int64_t **a, size_t ai, size_t aj, float **b)
// mult_vec_int32(int64_t **a, size_t ai, size_t aj, int32_t *b)
// mult_vec_int64(int64_t **a, size_t ai, size_t aj, int64_t *b)
// mult_vec_float(int64_t **a, size_t ai, size_t aj, float *b)
// mult_scal_int32(int64_t **a, size_t ai, size_t aj, int32_t b)
// mult_scal_int64(int64_t **a, size_t ai, size_t aj, int64_t b)
// mult_scal_float(int64_t **a, size_t ai, size_t aj, float b)
// Modifies the original matrix
// vector 2 matrix multiplication
/////////////////////////////////
// v2_mult_mat_int32(v2 *a, size_t a, int32_t **b)
// v2_mult_mat_int64(v2 *a, size_t a, int64_t **b)
// v2_mult_mat_float(v2 *a, size_t a, float **b)
// v2_mult_mat_v2(v2 *a, size_t a, v2 *b)
// v2_mult_vec_int32(v2 *a, size_t a, int32_t *b)
// v2_mult_vec_int64(v2 *a, size_t a, int64_t *b)
// v2_mult_vec_float(v2 *a, size_t a, float *b)
// v2_mult_vec_v2(v2 *a, size_t a, v2 *b)
// v2_mult_scal_int32(v2 *a, size_t a, int32_t b)
// v2_mult_scal_int64(v2 *a, size_t a, int64_t b)
// v2_mult_scal_float(v2 *a, size_t a, float b)
// vector 3 matrix multiplication
/////////////////////////////////
// v3_mult_mat_int32(v3 *a, size_t a, int32_t **b)
// v3_mult_mat_int64(v3 *a, size_t a, int64_t **b)
// v3_mult_mat_float(v3 *a, size_t a, float **b)
// v3_mult_mat_v3(v3 *a, size_t a, v3 *b)
// v3_mult_vec_int32(v3 *a, size_t a, int32_t *b)
// v3_mult_vec_int64(v3 *a, size_t a, int64_t *b)
// v3_mult_vec_float(v3 *a, size_t a, float *b)
// v3_mult_vec_v3(v3 *a, size_t a, v3 *b)
// v3_mult_scal_int32(v3 *a, size_t a, int32_t b)
// v3_mult_scal_int64(v3 *a, size_t a, int64_t b)
// v3_mult_scal_float(v3 *a, size_t a, float b)
// vector 4 matrix multiplication
/////////////////////////////////
// v4_mult_mat_int32(v4 *a, size_t a, int32_t **b)
// v4_mult_mat_int64(v4 *a, size_t a, int64_t **b)
// v4_mult_mat_float(v4 *a, size_t a, float **b)
// v4_mult_mat_v4(v4 *a, size_t a, v4 *b)
// v4_mult_vec_int32(v4 *a, size_t a, int32_t *b)
// v4_mult_vec_int64(v4 *a, size_t a, int64_t *b)
// v4_mult_vec_float(v4 *a, size_t a, float *b)
// v4_mult_vec_v4(v4 *a, size_t a, v4 *b)
// v4_mult_scal_int32(v4 *a, size_t a, int32_t b)
// v4_mult_scal_int64(v4 *a, size_t a, int64_t b)
// v4_mult_scal_float(v4 *a, size_t a, float b)
/////////////////////////////////
// Addition
/////////////////////////////////
// Array matrix addition
/////////////////////////////////
// add_mat_int32(int64_t **a, size_t ai, size_t aj, int32_t **b)
// add_mat_int64(int64_t **a, size_t ai, size_t aj, int64_t **b)
// add_mat_float(int64_t **a, size_t ai, size_t aj, float **b)
// add_vec_int32(int64_t **a, size_t ai, size_t aj, int32_t *b)
// add_vec_int64(int64_t **a, size_t ai, size_t aj, int64_t *b)
// add_vec_float(int64_t **a, size_t ai, size_t aj, float *b)
// add_scal_int32(int64_t **a, size_t ai, size_t aj, int32_t b)
// add_scal_int64(int64_t **a, size_t ai, size_t aj, int64_t b)
// add_scal_float(int64_t **a, size_t ai, size_t aj, float b)
// vector 2 matrix addition
/////////////////////////////////
// v2_add_mat_int32(v2 *a, size_t a, int32_t **b)
// v2_add_mat_int64(v2 *a, size_t a, int64_t **b)
// v2_add_mat_float(v2 *a, size_t a, float **b)
// v2_add_mat_v2(v2 *a, size_t a, v2 *b)
// v2_add_vec_int32(v2 *a, size_t a, int32_t *b)
// v2_add_vec_int64(v2 *a, size_t a, int64_t *b)
// v2_add_vec_float(v2 *a, size_t a, float *b)
// v2_add_vec_v2(v2 *a, size_t a, v2 *b)
// v2_add_scal_int32(v2 *a, size_t a, int32_t b)
// v2_add_scal_int64(v2 *a, size_t a, int64_t b)
// v2_add_scal_float(v2 *a, size_t a, float b)
// vector 3 matrix addition
/////////////////////////////////
// v3_add_mat_int32(v3 *a, size_t a, int32_t **b)
// v3_add_mat_int64(v3 *a, size_t a, int64_t **b)
// v3_add_mat_float(v3 *a, size_t a, float **b)
// v3_add_mat_v3(v3 *a, size_t a, v3 *b)
// v3_add_vec_int32(v3 *a, size_t a, int32_t *b)
// v3_add_vec_int64(v3 *a, size_t a, int64_t *b)
// v3_add_vec_float(v3 *a, size_t a, float *b)
// v3_add_vec_v3(v3 *a, size_t a, v3 *b)
// v3_add_scal_int32(v3 *a, size_t a, int32_t b)
// v3_add_scal_int64(v3 *a, size_t a, int64_t b)
// v3_add_scal_float(v3 *a, size_t a, float b)
// vector 4 matrix addition
/////////////////////////////////
// v4_add_mat_int32(v4 *a, size_t a, int32_t **b)
// v4_add_mat_int64(v4 *a, size_t a, int64_t **b)
// v4_add_mat_float(v4 *a, size_t a, float **b)
// v4_add_mat_v4(v4 *a, size_t a, v4 *b)
// v4_add_vec_int32(v4 *a, size_t a, int32_t *b)
// v4_add_vec_int64(v4 *a, size_t a, int64_t *b)
// v4_add_vec_float(v4 *a, size_t a, float *b)
// v4_add_vec_v4(v4 *a, size_t a, v4 *b)
// v4_add_scal_int32(v4 *a, size_t a, int32_t b)
// v4_add_scal_int64(v4 *a, size_t a, int64_t b)
// v4_add_scal_float(v4 *a, size_t a, float b)
/////////////////////////////////
// Subtraction
/////////////////////////////////
// Array matrix subtraction
/////////////////////////////////
// sub_mat_int32(int64_t **a, size_t ai, size_t aj, int32_t **b)
// sub_mat_int64(int64_t **a, size_t ai, size_t aj, int64_t **b)
// sub_mat_float(int64_t **a, size_t ai, size_t aj, float **b)
// sub_vec_int32(int64_t **a, size_t ai, size_t aj, int32_t *b)
// sub_vec_int64(int64_t **a, size_t ai, size_t aj, int64_t *b)
// sub_vec_float(int64_t **a, size_t ai, size_t aj, float *b)
// sub_scal_int32(int64_t **a, size_t ai, size_t aj, int32_t b)
// sub_scal_int64(int64_t **a, size_t ai, size_t aj, int64_t b)
// sub_scal_float(int64_t **a, size_t ai, size_t aj, float b)
// vector 2 matrix subtraction
/////////////////////////////////
// v2_sub_mat_int32(v2 *a, size_t a, int32_t **b)
// v2_sub_mat_int64(v2 *a, size_t a, int64_t **b)
// v2_sub_mat_float(v2 *a, size_t a, float **b)
// v2_sub_mat_v2(v2 *a, size_t a, v2 *b)
// v2_sub_vec_int32(v2 *a, size_t a, int32_t *b)
// v2_sub_vec_int64(v2 *a, size_t a, int64_t *b)
// v2_sub_vec_float(v2 *a, size_t a, float *b)
// v2_sub_vec_v2(v2 *a, size_t a, v2 *b)
// v2_sub_scal_int32(v2 *a, size_t a, int32_t b)
// v2_sub_scal_int64(v2 *a, size_t a, int64_t b)
// v2_sub_scal_float(v2 *a, size_t a, float b)
// vector 3 matrix subtraction
/////////////////////////////////
// v3_sub_mat_int32(v3 *a, size_t a, int32_t **b)
// v3_sub_mat_int64(v3 *a, size_t a, int64_t **b)
// v3_sub_mat_float(v3 *a, size_t a, float **b)
// v3_sub_mat_v3(v3 *a, size_t a, v3 *b)
// v3_sub_vec_int32(v3 *a, size_t a, int32_t *b)
// v3_sub_vec_int64(v3 *a, size_t a, int64_t *b)
// v3_sub_vec_float(v3 *a, size_t a, float *b)
// v3_sub_vec_v3(v3 *a, size_t a, v3 *b)
// v3_sub_scal_int32(v3 *a, size_t a, int32_t b)
// v3_sub_scal_int64(v3 *a, size_t a, int64_t b)
// v3_sub_scal_float(v3 *a, size_t a, float b)
// vector 4 matrix subtraction
/////////////////////////////////
// v4_sub_mat_int32(v4 *a, size_t a, int32_t **b)
// v4_sub_mat_int64(v4 *a, size_t a, int64_t **b)
// v4_sub_mat_float(v4 *a, size_t a, float **b)
// v4_sub_mat_v4(v4 *a, size_t a, v4 *b)
// v4_sub_vec_int32(v4 *a, size_t a, int32_t *b)
// v4_sub_vec_int64(v4 *a, size_t a, int64_t *b)
// v4_sub_vec_float(v4 *a, size_t a, float *b)
// v4_sub_vec_v4(v4 *a, size_t a, v4 *b)
// v4_sub_scal_int32(v4 *a, size_t a, int32_t b)
// v4_sub_scal_int64(v4 *a, size_t a, int64_t b)
// v4_sub_scal_float(v4 *a, size_t a, float b)

View File

View File

186
Math/Matrix/VectorInt64.h Normal file
View File

@ -0,0 +1,186 @@
// Remarks: sizes for the second matrix/vector are often implied by the first parameter and the rules for matrix/vector multiplication.
/////////////////////////////////
// Multiplication
/////////////////////////////////
// Array vector multiplication
/////////////////////////////////
// mult_vec_int32(int64_t *a, size_t a, int32_t *b)
// mult_vec_int64(int64_t *a, size_t a, int64_t *b)
// mult_vec_float(int64_t *a, size_t a, float *b)
// mult_scal_int32(int64_t *a, size_t a, int32_t b)
// mult_scal_int64(int64_t *a, size_t a, int64_t b)
// mult_scal_float(int64_t *a, size_t a, float b)
// Vector 2 vector multiplication
/////////////////////////////////
// v2_mult_vec_int32(v2 *a, int32_t *b)
// v2_mult_vec_int64(v2 *a, int64_t *b)
// v2_mult_vec_float(v2 *a, float *b)
// v2_mult_vec_v2(v2 *a, v2 *b)
// v2_mult_scal_int32(v2 *a, int32_t b)
// v2_mult_scal_int64(v2 *a, int64_t b)
// v2_mult_scal_float(v2 *a, float b)
// Vector 3 vector multiplication
/////////////////////////////////
// v3_mult_vec_int32(v3 *a, int32_t *b)
// v3_mult_vec_int64(v3 *a, int64_t *b)
// v3_mult_vec_float(v3 *a, float *b)
// v3_mult_vec_v3(v3 *a, v3 *b)
// v3_mult_scal_int32(v3 *a, int32_t b)
// v3_mult_scal_int64(v3 *a, int64_t b)
// v3_mult_scal_float(v3 *a, float b)
// Vector 4 vector multiplication
/////////////////////////////////
// v4_mult_vec_int32(v4 *a, int32_t *b)
// v4_mult_vec_int64(v4 *a, int64_t *b)
// v4_mult_vec_float(v4 *a, float *b)
// v4_mult_vec_v4(v4 *a, v4 *b)
// v4_mult_scal_int32(v4 *a, int32_t b)
// v4_mult_scal_int64(v4 *a, int64_t b)
// v4_mult_scal_float(v4 *a, float b)
/////////////////////////////////
// Addition
/////////////////////////////////
// Array vector addition
/////////////////////////////////
// add_vec_int32(int64_t *a, size_t a, int32_t *b)
// add_vec_int64(int64_t *a, size_t a, int64_t *b)
// add_vec_float(int64_t *a, size_t a, float *b)
// add_scal_int32(int64_t *a, size_t a, int32_t b)
// add_scal_int64(int64_t *a, size_t a, int64_t b)
// add_scal_float(int64_t *a, size_t a, float b)
// Vector 2 vector addition
/////////////////////////////////
// v2_add_vec_int32(v2 *a, int32_t *b)
// v2_add_vec_int64(v2 *a, int64_t *b)
// v2_add_vec_float(v2 *a, float *b)
// v2_add_vec_v2(v2 *a, v2 *b)
// v2_add_scal_int32(v2 *a, int32_t b)
// v2_add_scal_int64(v2 *a, int64_t b)
// v2_add_scal_float(v2 *a, float b)
// Vector 3 vector addition
/////////////////////////////////
// v3_add_vec_int32(v3 *a, int32_t *b)
// v3_add_vec_int64(v3 *a, int64_t *b)
// v3_add_vec_float(v3 *a, float *b)
// v3_add_vec_v3(v3 *a, v3 *b)
// v3_add_scal_int32(v3 *a, int32_t b)
// v3_add_scal_int64(v3 *a, int64_t b)
// v3_add_scal_float(v3 *a, float b)
// Vector 4 vector addition
/////////////////////////////////
// v4_add_vec_int32(v4 *a, int32_t *b)
// v4_add_vec_int64(v4 *a, int64_t *b)
// v4_add_vec_float(v4 *a, float *b)
// v4_add_vec_v4(v4 *a, v4 *b)
// v4_add_scal_int32(v4 *a, int32_t b)
// v4_add_scal_int64(v4 *a, int64_t b)
// v4_add_scal_float(v4 *a, float b)
/////////////////////////////////
// Subtraction
/////////////////////////////////
// Array vector subtraction
/////////////////////////////////
// sub_vec_int32(int64_t *a, size_t a, int32_t *b)
// sub_vec_int64(int64_t *a, size_t a, int64_t *b)
// sub_vec_float(int64_t *a, size_t a, float *b)
// sub_scal_int32(int64_t *a, size_t a, int32_t b)
// sub_scal_int64(int64_t *a, size_t a, int64_t b)
// sub_scal_float(int64_t *a, size_t a, float b)
// Vector 2 vector subtraction
/////////////////////////////////
// v2_sub_vec_int32(v2 *a, int32_t *b)
// v2_sub_vec_int64(v2 *a, int64_t *b)
// v2_sub_vec_float(v2 *a, float *b)
// v2_sub_vec_v2(v2 *a, v2 *b)
// v2_sub_scal_int32(v2 *a, int32_t b)
// v2_sub_scal_int64(v2 *a, int64_t b)
// v2_sub_scal_float(v2 *a, float b)
// Vector 3 vector subtraction
/////////////////////////////////
// v3_sub_vec_int32(v3 *a, int32_t *b)
// v3_sub_vec_int64(v3 *a, int64_t *b)
// v3_sub_vec_float(v3 *a, float *b)
// v3_sub_vec_v3(v3 *a, v3 *b)
// v3_sub_scal_int32(v3 *a, int32_t b)
// v3_sub_scal_int64(v3 *a, int64_t b)
// v3_sub_scal_float(v3 *a, float b)
// Vector 4 vector subtraction
/////////////////////////////////
// v4_sub_vec_int32(v4 *a, int32_t *b)
// v4_sub_vec_int64(v4 *a, int64_t *b)
// v4_sub_vec_float(v4 *a, float *b)
// v4_sub_vec_v4(v4 *a, v4 *b)
// v4_sub_scal_int32(v4 *a, int32_t b)
// v4_sub_scal_int64(v4 *a, int64_t b)
// v4_sub_scal_float(v4 *a, float b)
/////////////////////////////////
// Other
/////////////////////////////////
// Cross product
/////////////////////////////////
// cross_int32(int64_t *a, size_t a, int32_t *b)
// cross_int64(int64_t *a, size_t a, int64_t *b)
// cross_float(int64_t *a, size_t a, float *b)
// v2_cross_v2(v2 *a, v2 *b)
// v3_cross_v3(v3 *a, v3 *b)
// v4_cross_v4(v4 *a, v4 *b)
// Dot product
/////////////////////////////////
// dot_int32(int64_t *a, size_t a, int32_t *b)
// dot_int64(int64_t *a, size_t a, int64_t *b)
// dot_float(int64_t *a, size_t a, float *b)
// v2_dot_v2(v2 *a, v2 *b)
// v3_dot_v3(v3 *a, v3 *b)
// v4_dot_v4(v4 *a, v4 *b)
// Angle
/////////////////////////////////
// angle_int32(int64_t *a, size_t a, int32_t *b)
// angle_int64(int64_t *a, size_t a, int64_t *b)
// angle_float(int64_t *a, size_t a, float *b)
// v2_angle_v2(v2 *a, v2 *b)
// v3_angle_v3(v3 *a, v3 *b)
// v4_angle_v4(v4 *a, v4 *b)
// Cosine
/////////////////////////////////
// cosine_int32(int64_t *a, size_t a, int32_t *b)
// cosine_int64(int64_t *a, size_t a, int64_t *b)
// cosine_float(int64_t *a, size_t a, float *b)
// v2_cosine_v2(v2 *a, v2 *b)
// v3_cosine_v3(v3 *a, v3 *b)
// v4_cosine_v4(v4 *a, v4 *b)

106
Stdlib/Mathtypes.h Normal file
View File

@ -0,0 +1,106 @@
/**
* Karaka
*
* @package Utils
* @copyright Dennis Eichhorn
* @license OMS License 1.0
* @version 1.0.0
* @link https://jingga.app
*/
#ifndef STDLIB_MATHTYPES_H
#define STDLIB_MATHTYPES_H
#include <stdint.h>
#include <stdlib.h>
// int32_t vectors
typedef union {
struct {
int32_t x, y;
};
int32_t e[2];
} i2;
typedef union {
struct {
int32_t x, y, z;
};
int32_t e[3];
} i3;
typedef union {
struct {
int32_t x, y, z, w;
};
int32_t e[4];
} i4;
// int64_t vectors
typedef union {
struct {
int64_t x, y;
};
int64_t e[2];
} l2;
typedef union {
struct {
int64_t x, y, z;
};
int64_t e[3];
} l3;
typedef union {
struct {
int64_t x, y, z, w;
};
int64_t e[4];
} l4;
// float vectors
typedef union {
struct {
float x, y;
};
float e[2];
} f2;
typedef union {
struct {
float x, y, z;
};
float e[3];
} f3;
typedef union {
struct {
float x, y, z, w;
};
float e[4];
} f4;
typedef struct {
int32_t *e;
size_t m, n;
} mi32;
typedef struct {
int64_t *e;
size_t m, n;
} mi64;
typedef struct {
float *e;
size_t m, n;
} mf;
#endif

View File

@ -7,8 +7,8 @@
* @version 1.0.0
* @link https://jingga.app
*/
#ifndef UTILS_STRING_UTILS_H
#define UTILS_STRING_UTILS_H
#ifndef UTILS_COLOR_UTILS_H
#define UTILS_COLOR_UTILS_H
#include <stdio.h>
#include <stdlib.h>
@ -16,9 +16,9 @@
namespace Utils::ColorUtils
{
typedef struct {
char r = 0;
char g = 0;
char b = 0;
unsigned char r = 0;
unsigned char g = 0;
unsigned char b = 0;
} RGB;
inline

97
Utils/Intrinsics.h Normal file
View File

@ -0,0 +1,97 @@
/**
* Karaka
*
* @package Utils
* @copyright Dennis Eichhorn
* @license OMS License 1.0
* @version 1.0.0
* @link https://jingga.app
*/
#ifndef UTILS_INTRINSICS_H
#define UTILS_INTRINSICS_H
#include <stdio.h>
#include <stdlib.h>
/*
MMX
Introduce eight 64 bit registers (MM0-MM7) and instructions to work with eight signed/unsigned bytes, four signed/unsigned words, two signed/unsigned dwords.
3DNow!
Add support for single precision floating point operand to MMX. Few operation supported, for example addition, subtraction, multiplication.
SSE
Introduce eight/sixteen 128 bit registers (XMM0-XMM7/15) and instruction to work with four single precision floating point operands. Add integer operations on MMX registers too. (The MMX-integer part of SSE is sometimes called MMXEXT, and was implemented on a few non-Intel CPUs without xmm registers and the floating point part of SSE.)
SSE2
Introduces instruction to work with 2 double precision floating point operands, and with packed byte/word/dword/qword integers in 128-bit xmm registers.
SSE3
Add a few varied instructions (mostly floating point), including a special kind of unaligned load (lddqu) that was better on Pentium 4, synchronization instruction, horizontal add/sub.
SSSE3
Again a varied set of instructions, mostly integer. The first shuffle that takes its control operand from a register instead of hard-coded (pshufb). More horizontal processing, shuffle, packing/unpacking, mul+add on bytes, and some specialized integer add/mul stuff.
SSE4 (SSE4.1, SSE4.2)
Add a lot of instructions: Filling in a lot of the gaps by providing min and max and other operations for all integer data types (especially 32-bit integer had been lacking), where previously integer min was only available for unsigned bytes and signed 16-bit. Also scaling, FP rounding, blending, linear algebra operation, text processing, comparisons. Also a non temporal load for reading video memory, or copying it back to main memory. (Previously only NT stores were available.)
AESNI
Add support for accelerating AES symmetric encryption/decryption.
AVX Add eight/sixteen 256 bit registers (YMM0-YMM7/15).
Support all previous floating point datatype. Three operand instructions.
FMA
Add Fused Multiply Add and correlated instructions.
AVX2
Add support for integer data types.
AVX512F
Add eight/thirty-two 512 bit registers (ZMM0-ZMM7/31) and eight 64-bit mask register (k0-k7). Promote most previous instruction to 512 bit wide. Optional parts of AVX512 add instruction for exponentials & reciprocals (AVX512ER), scatter/gather prefetching (AVX512PF), scatter conflict detection (AVX512CD), compress, expand.
IMCI (Intel Xeon Phi)
Early development of AVX512 for the first-gen Intel Xeon Phi (Knight's Corner) coprocessor.
*/
#ifndef __AVX__
#define __AVX__ 0
#endif
#ifndef __AVX2__
#define __AVX2__ 0
#endif
#ifndef __SSE__
#define __SSE__ 0
#endif
#ifndef __SSE2__
#define __SSE2__ 0
#endif
#ifndef __SSE2_MATH__
#define __SSE2_MATH__ 0
#endif
#ifndef __SSE3__
#define __SSE3__ 0
#endif
#ifndef __SSE4_1__
#define __SSE4_1__ 0
#endif
#ifndef __SSE4_2__
#define __SSE4_2__ 0
#endif
#ifndef __SSE_MATH__
#define __SSE_MATH__ 0
#endif
#ifndef __SSSE3__
#define __SSSE3__ 0
#endif
#endif