/** * Jingga * * @package Utils * @copyright Dennis Eichhorn * @license OMS License 1.0 * @version 1.0.0 * @link https://jingga.app */ #ifndef UTILS_INTRINSICS_H #define UTILS_INTRINSICS_H #include #include /* MMX Introduce eight 64 bit registers (MM0-MM7) and instructions to work with eight signed/unsigned bytes, four signed/unsigned words, two signed/unsigned dwords. 3DNow! Add support for single precision floating point operand to MMX. Few operation supported, for example addition, subtraction, multiplication. SSE Introduce eight/sixteen 128 bit registers (XMM0-XMM7/15) and instruction to work with four single precision floating point operands. Add integer operations on MMX registers too. (The MMX-integer part of SSE is sometimes called MMXEXT, and was implemented on a few non-Intel CPUs without xmm registers and the floating point part of SSE.) SSE2 Introduces instruction to work with 2 double precision floating point operands, and with packed byte/word/dword/qword integers in 128-bit xmm registers. SSE3 Add a few varied instructions (mostly floating point), including a special kind of unaligned load (lddqu) that was better on Pentium 4, synchronization instruction, horizontal add/sub. SSSE3 Again a varied set of instructions, mostly integer. The first shuffle that takes its control operand from a register instead of hard-coded (pshufb). More horizontal processing, shuffle, packing/unpacking, mul+add on bytes, and some specialized integer add/mul stuff. SSE4 (SSE4.1, SSE4.2) Add a lot of instructions: Filling in a lot of the gaps by providing min and max and other operations for all integer data types (especially 32-bit integer had been lacking), where previously integer min was only available for unsigned bytes and signed 16-bit. Also scaling, FP rounding, blending, linear algebra operation, text processing, comparisons. Also a non temporal load for reading video memory, or copying it back to main memory. (Previously only NT stores were available.) AESNI Add support for accelerating AES symmetric encryption/decryption. AVX Add eight/sixteen 256 bit registers (YMM0-YMM7/15). Support all previous floating point datatype. Three operand instructions. FMA Add Fused Multiply Add and correlated instructions. AVX2 Add support for integer data types. AVX512F Add eight/thirty-two 512 bit registers (ZMM0-ZMM7/31) and eight 64-bit mask register (k0-k7). Promote most previous instruction to 512 bit wide. Optional parts of AVX512 add instruction for exponentials & reciprocals (AVX512ER), scatter/gather prefetching (AVX512PF), scatter conflict detection (AVX512CD), compress, expand. IMCI (Intel Xeon Phi) Early development of AVX512 for the first-gen Intel Xeon Phi (Knight's Corner) coprocessor. */ #ifndef __AVX__ #define __AVX__ 0 #endif #ifndef __AVX2__ #define __AVX2__ 0 #endif #ifndef __SSE__ #define __SSE__ 0 #endif #ifndef __SSE2__ #define __SSE2__ 0 #endif #ifndef __SSE2_MATH__ #define __SSE2_MATH__ 0 #endif #ifndef __SSE3__ #define __SSE3__ 0 #endif #ifndef __SSE4_1__ #define __SSE4_1__ 0 #endif #ifndef __SSE4_2__ #define __SSE4_2__ 0 #endif #ifndef __SSE_MATH__ #define __SSE_MATH__ 0 #endif #ifndef __SSSE3__ #define __SSSE3__ 0 #endif #endif