cOMS/Utils/Intrinsics.h
Dennis Eichhorn 146dc9afdc fix style
2024-04-24 17:50:47 +00:00

111 lines
3.2 KiB
C

/**
* Jingga
*
* @package Utils
* @copyright Dennis Eichhorn
* @license OMS License 1.0
* @version 1.0.0
* @link https://jingga.app
*/
#ifndef UTILS_INTRINSICS_H
#define UTILS_INTRINSICS_H
#include <stdio.h>
#include <stdlib.h>
/*
MMX
Introduce eight 64 bit registers (MM0-MM7) and instructions to work with eight signed/unsigned bytes, four
signed/unsigned words, two signed/unsigned dwords.
3DNow!
Add support for single precision floating point operand to MMX. Few operation supported, for example addition,
subtraction, multiplication.
SSE
Introduce eight/sixteen 128 bit registers (XMM0-XMM7/15) and instruction to work with four single precision floating
point operands. Add integer operations on MMX registers too. (The MMX-integer part of SSE is sometimes called MMXEXT,
and was implemented on a few non-Intel CPUs without xmm registers and the floating point part of SSE.)
SSE2
Introduces instruction to work with 2 double precision floating point operands, and with packed byte/word/dword/qword
integers in 128-bit xmm registers.
SSE3
Add a few varied instructions (mostly floating point), including a special kind of unaligned load (lddqu) that was
better on Pentium 4, synchronization instruction, horizontal add/sub.
SSSE3
Again a varied set of instructions, mostly integer. The first shuffle that takes its control operand from a register
instead of hard-coded (pshufb). More horizontal processing, shuffle, packing/unpacking, mul+add on bytes, and some
specialized integer add/mul stuff.
SSE4 (SSE4.1, SSE4.2)
Add a lot of instructions: Filling in a lot of the gaps by providing min and max and other operations for all integer
data types (especially 32-bit integer had been lacking), where previously integer min was only available for unsigned
bytes and signed 16-bit. Also scaling, FP rounding, blending, linear algebra operation, text processing, comparisons.
Also a non temporal load for reading video memory, or copying it back to main memory. (Previously only NT stores were
available.)
AESNI
Add support for accelerating AES symmetric encryption/decryption.
AVX Add eight/sixteen 256 bit registers (YMM0-YMM7/15).
Support all previous floating point datatype. Three operand instructions.
FMA
Add Fused Multiply Add and correlated instructions.
AVX2
Add support for integer data types.
AVX512F
Add eight/thirty-two 512 bit registers (ZMM0-ZMM7/31) and eight 64-bit mask register (k0-k7). Promote most previous
instruction to 512 bit wide. Optional parts of AVX512 add instruction for exponentials & reciprocals (AVX512ER),
scatter/gather prefetching (AVX512PF), scatter conflict detection (AVX512CD), compress, expand.
IMCI (Intel Xeon Phi)
Early development of AVX512 for the first-gen Intel Xeon Phi (Knight's Corner) coprocessor.
*/
#ifndef __AVX__
#define __AVX__ 0
#endif
#ifndef __AVX2__
#define __AVX2__ 0
#endif
#ifndef __SSE__
#define __SSE__ 0
#endif
#ifndef __SSE2__
#define __SSE2__ 0
#endif
#ifndef __SSE2_MATH__
#define __SSE2_MATH__ 0
#endif
#ifndef __SSE3__
#define __SSE3__ 0
#endif
#ifndef __SSE4_1__
#define __SSE4_1__ 0
#endif
#ifndef __SSE4_2__
#define __SSE4_2__ 0
#endif
#ifndef __SSE_MATH__
#define __SSE_MATH__ 0
#endif
#ifndef __SSSE3__
#define __SSSE3__ 0
#endif
#endif