cOMS/stdlib/SIMD_Helper.h

284 lines
6.0 KiB
C

/**
* Jingga
*
* @copyright Jingga
* @license OMS License 2.0
* @version 1.0.0
* @link https://jingga.app
*/
#ifndef TOS_STDLIB_SIMD_HELPER_H
#define TOS_STDLIB_SIMD_HELPER_H
#include <stdint.h>
#include <immintrin.h>
#include <xmmintrin.h>
#include "Types.h"
// @todo split into platform code for windows and linux
#if _WIN32
#include <windows.h>
#ifdef _MSC_VER
#include <intrin.h>
#endif
#elif __linux__
#include <sys/auxv.h>
#include <unistd.h>
#endif
#if ARM
#include <arm_sve.h>
#else
int32 svcntw() {
return 0;
}
#endif
enum SIMDVersion {
SIMD_VERSION_NONE,
SIMD_VERSION_128,
SIMD_VERSION_256,
SIMD_VERSION_512,
SIMD_VERSION_SVE,
SIMD_VERSION_NEON,
};
// @todo implement for arm?
inline int32 max_neon_supported()
{
#if ARM
#if _WIN32
int cpu_info[4] = {0};
__cpuid(cpu_info, 0);
if (cpu_info[3] & (1 << 1)) {
return 1;
}
#else
unsigned int eax, ebx, ecx, edx;
__asm__ volatile (
"cpuid"
: "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx)
: "a"(0)
);
if (edx & (1 << 1)) {
return 1;
}
#endif
return 0;
#else
return 0;
#endif
}
inline int32 max_sve_supported()
{
#if ARM
int32 hwcaps = getauxval(AT_HWCAP);
return (int32) ((bool) (hwcaps & (1 << 19)));
#else
return 0;
#endif
}
inline int32 max_sse_supported()
{
#if ARM
return 0;
#else
#ifdef _MSC_VER
int32 cpuInfo[4] = {-1};
__cpuid(cpuInfo, 1); // CPUID function 1
uint32 ecx = cpuInfo[2];
uint32 edx = cpuInfo[3];
#else
uint32 eax, ebx, ecx, edx;
eax = 1; // CPUID function 1
__asm__ __volatile__("cpuid;"
: "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx)
: "a"(eax));
#endif
bool sse42_supported = (ecx >> 20) & 1;
if (sse42_supported) {
return 42;
}
bool sse41_supported = (ecx >> 19) & 1;
if (sse41_supported) {
return 41;
}
bool sse3_supported = (ecx >> 0) & 1;
if (sse3_supported) {
return 3;
}
bool sse2_supported = (edx >> 26) & 1;
if (sse2_supported) {
return 2;
}
return 0;
#endif
}
inline
int32 max_avx256_supported()
{
#if ARM
return 0;
#else
int32 max_version = 0;
#ifdef _MSC_VER
int32 cpuInfo[4];
__cpuid(cpuInfo, 1);
if ((cpuInfo[2] >> 28) & 1) {
__cpuid(cpuInfo, 7); // Query extended features
if ((cpuInfo[1] >> 5) & 1) {
max_version = 2;
}
}
#else
uint32 eax, ebx, ecx, edx;
__asm__ __volatile__("cpuid"
: "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx)
: "a"(1));
if ((ecx >> 28) & 1) {
eax = 7;
ecx = 0;
__asm__ __volatile__("cpuid"
: "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx)
: "a"(eax), "c"(ecx));
if ((ebx >> 5) & 1) {
max_version = 2;
}
}
#endif
return max_version;
#endif
}
inline
int32 max_avx512_supported()
{
#if ARM
return 0;
#else
#ifdef _MSC_VER
int32 cpuInfo[4];
__cpuid(cpuInfo, 1);
int32 ebx = 0;
if ((cpuInfo[2] >> 28) & 1) {
__cpuid(cpuInfo, 7);
ebx = cpuInfo[1];
}
#else
uint32 eax, ebx, ecx, edx;
__asm__ __volatile__("cpuid"
: "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx)
: "a"(1));
if ((ecx >> 28) & 1) {
eax = 7;
ecx = 0;
__asm__ __volatile__("cpuid"
: "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx)
: "a"(eax), "c"(ecx));
}
#endif
if ((ebx >> 16) & 1) {
return 1; // AVX-512F
}
if ((ebx >> 17) & 1) {
return 2; // AVX-512DQ
}
if ((ebx >> 21) & 1) {
return 3; // AVX-512IFMA
}
if ((ebx >> 26) & 1) {
return 4; // AVX-512PF
}
if ((ebx >> 27) & 1) {
return 5; // AVX-512ER
}
if ((ebx >> 28) & 1) {
return 6; // AVX-512CD
}
if ((ebx >> 30) & 1) {
return 7; // AVX-512BW
}
if ((ebx >> 31) & 1) {
return 8; // AVX-512VL
}
return 0;
#endif
}
const char AVX512_VERSIONS[8][12] = {
"AVX-512F",
"AVX-512DQ",
"AVX-512IFMA",
"AVX-512PF",
"AVX-512ER",
"AVX-512CD",
"AVX-512BW",
"AVX-512VL"
};
bool supports_abm() {
#if ARM
return 0;
#else
bool popcnt_supported;
bool lzcnt_supported;
#ifdef _MSC_VER
int cpuInfo[4];
__cpuid(cpuInfo, 0x80000001);
popcnt_supported = (cpuInfo[2] & (1 << 5)) != 0;
lzcnt_supported = (cpuInfo[1] & (1 << 5)) != 0;
#else
uint32 eax, ebx, ecx, edx;
eax = 0x80000001;
__asm__ __volatile__ (
"cpuid"
: "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx)
: "a"(eax)
);
// Check if the ABM (POPCNT and LZCNT) bits are set
popcnt_supported = (ecx & (1 << 5)) != 0;
lzcnt_supported = (ebx & (1 << 5)) != 0;
#endif
return popcnt_supported && lzcnt_supported;
#endif
}
#endif