mirror of
https://github.com/Karaka-Management/cOMS.git
synced 2026-02-15 01:48:40 +00:00
new tests and minor fixes
This commit is contained in:
parent
b68c8702e0
commit
b13b0e9483
File diff suppressed because it is too large
Load Diff
70
Stdlib/Intrinsics.h
Normal file
70
Stdlib/Intrinsics.h
Normal file
|
|
@ -0,0 +1,70 @@
|
||||||
|
/**
|
||||||
|
* Jingga
|
||||||
|
*
|
||||||
|
* @package Stdlib
|
||||||
|
* @copyright Dennis Eichhorn
|
||||||
|
* @license OMS License 1.0
|
||||||
|
* @version 1.0.0
|
||||||
|
* @link https://jingga.app
|
||||||
|
*/
|
||||||
|
#ifndef STDLIB_INTRINSICS_H
|
||||||
|
#define STDLIB_INTRINSICS_H
|
||||||
|
|
||||||
|
#include <inttypes.h>
|
||||||
|
#include <emmintrin.h>
|
||||||
|
#include <xmmintrin.h>
|
||||||
|
#include <immintrin.h>
|
||||||
|
|
||||||
|
#include "Types.h"
|
||||||
|
|
||||||
|
namespace Stdlib::Intrinsics
|
||||||
|
{
|
||||||
|
inline
|
||||||
|
f32 sqrt(f32 a) {
|
||||||
|
return _mm_cvtss_f32(_mm_sqrt_ss(_mm_set_ss(a)));
|
||||||
|
}
|
||||||
|
|
||||||
|
inline
|
||||||
|
f32 round(f32 a) {
|
||||||
|
return _mm_cvtss_f32(
|
||||||
|
_mm_round_ss(
|
||||||
|
_mm_setzero_ps(),
|
||||||
|
_mm_set_ss(a),
|
||||||
|
(_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC)
|
||||||
|
)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline
|
||||||
|
uint32 round_to_int(f32 a) {
|
||||||
|
return (uint32) _mm_cvtss_si32(_mm_set_ss(a));
|
||||||
|
}
|
||||||
|
|
||||||
|
inline
|
||||||
|
f32 floor(f32 a) {
|
||||||
|
return _mm_cvtss_f32(_mm_floor_ss(_mm_setzero_ps(), _mm_set_ss(a)));
|
||||||
|
}
|
||||||
|
|
||||||
|
inline
|
||||||
|
f32 ceil(f32 a) {
|
||||||
|
return _mm_cvtss_f32(_mm_ceil_ss(_mm_setzero_ps(), _mm_set_ss(a)));
|
||||||
|
}
|
||||||
|
|
||||||
|
inline
|
||||||
|
uint32 hash(uint64 a, uint64 b = 0) {
|
||||||
|
uint8 seed[16] = {
|
||||||
|
0xaa, 0x9b, 0xbd, 0xb8,
|
||||||
|
0xa1, 0x98, 0xac, 0x3f,
|
||||||
|
0x1f, 0x94, 0x07, 0xb3,
|
||||||
|
0x8c, 0x27, 0x93, 0x69,
|
||||||
|
};
|
||||||
|
|
||||||
|
__m128i hash = _mm_set_epi64x(a, b);
|
||||||
|
hash = _mm_aesdec_si128(hash, _mm_loadu_si128((__m128i *) seed));
|
||||||
|
hash = _mm_aesdec_si128(hash, _mm_loadu_si128((__m128i *) seed));
|
||||||
|
|
||||||
|
return _mm_extract_epi32(hash, 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
@ -13,6 +13,8 @@
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
// @todo check Vectors, we can simplify this!!!
|
||||||
|
|
||||||
// int32_t vectors
|
// int32_t vectors
|
||||||
typedef union {
|
typedef union {
|
||||||
struct {
|
struct {
|
||||||
|
|
|
||||||
1101
Stdlib/SIMD/SIMD_F32.h
Normal file
1101
Stdlib/SIMD/SIMD_F32.h
Normal file
File diff suppressed because it is too large
Load Diff
70
Stdlib/SIMD/SIMD_Helper.h
Normal file
70
Stdlib/SIMD/SIMD_Helper.h
Normal file
|
|
@ -0,0 +1,70 @@
|
||||||
|
/**
|
||||||
|
* Karaka
|
||||||
|
*
|
||||||
|
* @package Stdlib
|
||||||
|
* @copyright Dennis Eichhorn
|
||||||
|
* @license OMS License 1.0
|
||||||
|
* @version 1.0.0
|
||||||
|
* @link https://jingga.app
|
||||||
|
*/
|
||||||
|
#ifndef STDLIB_SIMD_HELPER_H
|
||||||
|
#define STDLIB_SIMD_HELPER_H
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <xmmintrin.h>
|
||||||
|
#include <immintrin.h>
|
||||||
|
|
||||||
|
namespace Stdlib::SIMD
|
||||||
|
{
|
||||||
|
bool is_avx_supported()
|
||||||
|
{
|
||||||
|
uint32_t eax, ebx, ecx, edx;
|
||||||
|
|
||||||
|
eax = 1; // CPUID function 1
|
||||||
|
|
||||||
|
__asm__ __volatile__(
|
||||||
|
"cpuid;"
|
||||||
|
: "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
|
||||||
|
: "a" (eax)
|
||||||
|
);
|
||||||
|
|
||||||
|
// Check the AVX feature bit in ECX
|
||||||
|
return (ecx >> 28) & 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool is_avx256_supported()
|
||||||
|
{
|
||||||
|
uint32_t eax, ebx, ecx, edx;
|
||||||
|
|
||||||
|
eax = 7; // CPUID function 7
|
||||||
|
ecx = 0; // Sub-function 0
|
||||||
|
|
||||||
|
__asm__ __volatile__(
|
||||||
|
"cpuid;"
|
||||||
|
: "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
|
||||||
|
: "a" (eax), "c" (ecx)
|
||||||
|
);
|
||||||
|
|
||||||
|
// Check the AVX-256 (AVX2) feature bit in EBX
|
||||||
|
return (ebx >> 5) & 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool is_avx512_supported()
|
||||||
|
{
|
||||||
|
uint32_t eax, ebx, ecx, edx;
|
||||||
|
|
||||||
|
eax = 7; // CPUID function 7
|
||||||
|
ecx = 0; // Sub-function 0
|
||||||
|
|
||||||
|
__asm__ __volatile__(
|
||||||
|
"cpuid;"
|
||||||
|
: "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
|
||||||
|
: "a" (eax), "c" (ecx)
|
||||||
|
);
|
||||||
|
|
||||||
|
// Check the AVX-512 feature bit in EBX
|
||||||
|
return (ebx >> 16) & 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
1117
Stdlib/SIMD/SIMD_I32.h
Normal file
1117
Stdlib/SIMD/SIMD_I32.h
Normal file
File diff suppressed because it is too large
Load Diff
|
|
@ -7,8 +7,8 @@
|
||||||
* @version 1.0.0
|
* @version 1.0.0
|
||||||
* @link https://jingga.app
|
* @link https://jingga.app
|
||||||
*/
|
*/
|
||||||
#ifndef TYPES_H
|
#ifndef STDLIB_TYPES_H
|
||||||
#define TYPES_H
|
#define STDLIB_TYPES_H
|
||||||
|
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include <float.h>
|
#include <float.h>
|
||||||
|
|
@ -18,6 +18,11 @@ typedef int16_t int16;
|
||||||
typedef int32_t int32;
|
typedef int32_t int32;
|
||||||
typedef int64_t int64;
|
typedef int64_t int64;
|
||||||
|
|
||||||
|
typedef uint8_t uint8;
|
||||||
|
typedef uint16_t uint16;
|
||||||
|
typedef uint32_t uint32;
|
||||||
|
typedef uint64_t uint64;
|
||||||
|
|
||||||
typedef float f32;
|
typedef float f32;
|
||||||
typedef double f64;
|
typedef double f64;
|
||||||
|
|
||||||
|
|
@ -19,6 +19,7 @@ namespace Threads
|
||||||
struct job_t {
|
struct job_t {
|
||||||
JobFunc func;
|
JobFunc func;
|
||||||
void *arg;
|
void *arg;
|
||||||
|
int state;
|
||||||
job_t *next;
|
job_t *next;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -19,38 +19,26 @@ namespace Threads
|
||||||
{
|
{
|
||||||
Job *pool_work_create(JobFunc func, void *arg)
|
Job *pool_work_create(JobFunc func, void *arg)
|
||||||
{
|
{
|
||||||
Job *work;
|
|
||||||
|
|
||||||
if (func == NULL) {
|
if (func == NULL) {
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
work = (Job *) malloc(sizeof(*work));
|
Job *work = (Job *) malloc(sizeof(*work));
|
||||||
work->func = func;
|
work->func = func;
|
||||||
work->arg = arg;
|
work->arg = arg;
|
||||||
|
work->state = 0;
|
||||||
work->next = NULL;
|
work->next = NULL;
|
||||||
|
|
||||||
return work;
|
return work;
|
||||||
}
|
}
|
||||||
|
|
||||||
void pool_work_destroy(Job *work)
|
Job *pool_work_poll(Threads::ThreadPool *pool)
|
||||||
{
|
{
|
||||||
if (work == NULL) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
free(work);
|
|
||||||
}
|
|
||||||
|
|
||||||
Job *pool_work_get(Threads::ThreadPool *pool)
|
|
||||||
{
|
|
||||||
Job *work;
|
|
||||||
|
|
||||||
if (pool == NULL) {
|
if (pool == NULL) {
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
work = pool->work_first;
|
Job *work = pool->work_first;
|
||||||
if (work == NULL) {
|
if (work == NULL) {
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
@ -70,7 +58,7 @@ namespace Threads
|
||||||
Threads::ThreadPool *pool = (Threads::ThreadPool *) arg;
|
Threads::ThreadPool *pool = (Threads::ThreadPool *) arg;
|
||||||
Threads::Job *work;
|
Threads::Job *work;
|
||||||
|
|
||||||
while (1) {
|
while (true) {
|
||||||
pthread_mutex_lock(&(pool->work_mutex));
|
pthread_mutex_lock(&(pool->work_mutex));
|
||||||
|
|
||||||
while (pool->work_first == NULL && !pool->stop) {
|
while (pool->work_first == NULL && !pool->stop) {
|
||||||
|
|
@ -81,13 +69,12 @@ namespace Threads
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
work = Threads::pool_work_get(pool);
|
work = Threads::pool_work_poll(pool);
|
||||||
++(pool->working_cnt);
|
++(pool->working_cnt);
|
||||||
pthread_mutex_unlock(&(pool->work_mutex));
|
pthread_mutex_unlock(&(pool->work_mutex));
|
||||||
|
|
||||||
if (work != NULL) {
|
if (work != NULL) {
|
||||||
work->func(work->arg);
|
work->func(work);
|
||||||
pool_work_destroy(work);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pthread_mutex_lock(&(pool->work_mutex));
|
pthread_mutex_lock(&(pool->work_mutex));
|
||||||
|
|
@ -155,20 +142,15 @@ namespace Threads
|
||||||
|
|
||||||
void pool_destroy(Threads::ThreadPool *pool)
|
void pool_destroy(Threads::ThreadPool *pool)
|
||||||
{
|
{
|
||||||
Threads::Job *work;
|
|
||||||
Threads::Job *work2;
|
|
||||||
|
|
||||||
if (pool == NULL) {
|
if (pool == NULL) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
pthread_mutex_lock(&(pool->work_mutex));
|
pthread_mutex_lock(&(pool->work_mutex));
|
||||||
work = pool->work_first;
|
Threads::Job *work = pool->work_first;
|
||||||
|
|
||||||
while (work != NULL) {
|
while (work != NULL) {
|
||||||
work2 = work->next;
|
work = work->next;
|
||||||
pool_work_destroy(work);
|
|
||||||
work = work2;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pool->stop = true;
|
pool->stop = true;
|
||||||
|
|
@ -184,17 +166,15 @@ namespace Threads
|
||||||
free(pool);
|
free(pool);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool pool_add_work(Threads::ThreadPool *pool, JobFunc func, void *arg)
|
Threads::Job* pool_add_work(Threads::ThreadPool *pool, JobFunc func, void *arg)
|
||||||
{
|
{
|
||||||
Threads::Job *work;
|
|
||||||
|
|
||||||
if (pool == NULL) {
|
if (pool == NULL) {
|
||||||
return false;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
work = Threads::pool_work_create(func, arg);
|
Threads::Job *work = Threads::pool_work_create(func, arg);
|
||||||
if (work == NULL) {
|
if (work == NULL) {
|
||||||
return false;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
pthread_mutex_lock(&(pool->work_mutex));
|
pthread_mutex_lock(&(pool->work_mutex));
|
||||||
|
|
@ -209,7 +189,7 @@ namespace Threads
|
||||||
pthread_cond_broadcast(&(pool->work_cond));
|
pthread_cond_broadcast(&(pool->work_cond));
|
||||||
pthread_mutex_unlock(&(pool->work_mutex));
|
pthread_mutex_unlock(&(pool->work_mutex));
|
||||||
|
|
||||||
return true;
|
return work;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -17,7 +17,7 @@
|
||||||
if ((a) == (b)) { \
|
if ((a) == (b)) { \
|
||||||
printf("."); \
|
printf("."); \
|
||||||
} else { \
|
} else { \
|
||||||
printf("[F]"); \
|
printf("\033[31m[F]\033[0m"); \
|
||||||
printf("\n\n%s - %i: ", __FILE__, __LINE__); \
|
printf("\n\n%s - %i: ", __FILE__, __LINE__); \
|
||||||
printf((t1), (a)); printf(" != "); printf((t2), (b)); printf("\n"); \
|
printf((t1), (a)); printf(" != "); printf((t2), (b)); printf("\n"); \
|
||||||
return 0; } \
|
return 0; } \
|
||||||
|
|
@ -27,7 +27,7 @@
|
||||||
if (oms_abs((a) - (b)) <= (delta)) { \
|
if (oms_abs((a) - (b)) <= (delta)) { \
|
||||||
printf("."); \
|
printf("."); \
|
||||||
} else { \
|
} else { \
|
||||||
printf("[F]"); \
|
printf("\033[31m[F]\033[0m"); \
|
||||||
printf("\n\n%s - %i: ", __FILE__, __LINE__); \
|
printf("\n\n%s - %i: ", __FILE__, __LINE__); \
|
||||||
printf((t1), (a)); printf(" != "); printf((t2), (b)); printf("\n"); \
|
printf((t1), (a)); printf(" != "); printf((t2), (b)); printf("\n"); \
|
||||||
return 0; } \
|
return 0; } \
|
||||||
|
|
@ -37,7 +37,7 @@
|
||||||
if (strstr((a), (b)) != NULL) { \
|
if (strstr((a), (b)) != NULL) { \
|
||||||
printf("."); \
|
printf("."); \
|
||||||
} else { \
|
} else { \
|
||||||
printf("[F]"); \
|
printf("\033[31m[F]\033[0m"); \
|
||||||
printf("\n\n%s - %i: ", __FILE__, __LINE__); \
|
printf("\n\n%s - %i: ", __FILE__, __LINE__); \
|
||||||
printf("%s", (a)); printf(" !contains "); printf("%s", (b)); printf("\n"); \
|
printf("%s", (a)); printf(" !contains "); printf("%s", (b)); printf("\n"); \
|
||||||
return 0; } \
|
return 0; } \
|
||||||
|
|
@ -47,7 +47,7 @@
|
||||||
if ((a) == true) { \
|
if ((a) == true) { \
|
||||||
printf("."); \
|
printf("."); \
|
||||||
} else { \
|
} else { \
|
||||||
printf("[F]"); \
|
printf("\033[31m[F]\033[0m"); \
|
||||||
printf("\n\n%s - %i: ", __FILE__, __LINE__); \
|
printf("\n\n%s - %i: ", __FILE__, __LINE__); \
|
||||||
printf("%d", (a)); printf(" != "); printf("1"); printf("\n"); \
|
printf("%d", (a)); printf(" != "); printf("1"); printf("\n"); \
|
||||||
return 0; } \
|
return 0; } \
|
||||||
|
|
@ -57,7 +57,7 @@
|
||||||
if ((a) == false) { \
|
if ((a) == false) { \
|
||||||
printf("."); \
|
printf("."); \
|
||||||
} else { \
|
} else { \
|
||||||
printf("[F]"); \
|
printf("\033[31m[F]\033[0m"); \
|
||||||
printf("\n\n%s - %i: ", __FILE__, __LINE__); \
|
printf("\n\n%s - %i: ", __FILE__, __LINE__); \
|
||||||
printf("%d", (a)); printf(" != "); printf("1"); printf("\n"); \
|
printf("%d", (a)); printf(" != "); printf("1"); printf("\n"); \
|
||||||
return 0; } \
|
return 0; } \
|
||||||
|
|
|
||||||
386
tests/Stdlib/SIMD/SIMD_F32Test.cpp
Normal file
386
tests/Stdlib/SIMD/SIMD_F32Test.cpp
Normal file
|
|
@ -0,0 +1,386 @@
|
||||||
|
/**
|
||||||
|
* Jingga
|
||||||
|
*
|
||||||
|
* @package Test
|
||||||
|
* @copyright Dennis Eichhorn
|
||||||
|
* @license OMS License 1.0
|
||||||
|
* @version 1.0.0
|
||||||
|
* @link https://jingga.app
|
||||||
|
*/
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
#include "../../../Utils/TestUtils.h"
|
||||||
|
#include "../../../Stdlib/SIMD/SIMD_F32.h"
|
||||||
|
#include "../../../Stdlib/SIMD/SIMD_Helper.h"
|
||||||
|
|
||||||
|
float* a_array_4 = (float *) aligned_alloc(32, 4 * sizeof(float));
|
||||||
|
float* b_array_4 = (float *) aligned_alloc(32, 4 * sizeof(float));
|
||||||
|
float* expected_array_4 = (float *) aligned_alloc(32, 4 * sizeof(float));
|
||||||
|
float* result_array_4 = (float *) aligned_alloc(32, 4 * sizeof(float));
|
||||||
|
|
||||||
|
float* a_array_8 = (float *) aligned_alloc(32, 8 * sizeof(float));
|
||||||
|
float* b_array_8 = (float *) aligned_alloc(32, 8 * sizeof(float));
|
||||||
|
float* expected_array_8 = (float *) aligned_alloc(32, 8 * sizeof(float));
|
||||||
|
float* result_array_8 = (float *) aligned_alloc(32, 8 * sizeof(float));
|
||||||
|
|
||||||
|
float* a_array_16 = (float *) aligned_alloc(32, 16 * sizeof(float));
|
||||||
|
float* b_array_16 = (float *) aligned_alloc(32, 16 * sizeof(float));
|
||||||
|
float* expected_array_16 = (float *) aligned_alloc(32, 16 * sizeof(float));
|
||||||
|
float* result_array_16 = (float *) aligned_alloc(32, 16 * sizeof(float));
|
||||||
|
|
||||||
|
int test_operator_plus();
|
||||||
|
int test_operator_minus();
|
||||||
|
int test_operator_mul();
|
||||||
|
|
||||||
|
int main(int argc, char** argv)
|
||||||
|
{
|
||||||
|
printf("SIMD_F32:\n");
|
||||||
|
|
||||||
|
test_operator_plus();
|
||||||
|
test_operator_minus();
|
||||||
|
test_operator_mul();
|
||||||
|
|
||||||
|
printf("\n\n");
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int test_operator_plus()
|
||||||
|
{
|
||||||
|
printf("\noperator+:\n");
|
||||||
|
printf("[4]: ");
|
||||||
|
if (!Stdlib::SIMD::is_avx_supported()) {
|
||||||
|
printf("[\033[33mNot supported\033[0m]");
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
a_array_4[0] = 0.0f; a_array_4[1] = 1.0f; a_array_4[2] = 2.0f; a_array_4[3] = 3.0f;
|
||||||
|
b_array_4[0] = 0.0f; b_array_4[1] = 1.0f; b_array_4[2] = 2.0f; b_array_4[3] = 3.0f;
|
||||||
|
|
||||||
|
expected_array_4[0] = 0.0f; expected_array_4[1] = 2.0f; expected_array_4[2] = 4.0f; expected_array_4[3] = 6.0f;
|
||||||
|
Stdlib::SIMD::f32_4_simd expected_simd_4 = Stdlib::SIMD::load_f32_4_simd(expected_array_4);
|
||||||
|
|
||||||
|
Stdlib::SIMD::f32_4_simd a_simd_4 = Stdlib::SIMD::load_f32_4_simd(a_array_4);
|
||||||
|
Stdlib::SIMD::f32_4_simd b_simd_4 = Stdlib::SIMD::load_f32_4_simd(b_array_4);
|
||||||
|
|
||||||
|
Stdlib::SIMD::f32_4_simd result_simd_4 = a_simd_4 + b_simd_4;
|
||||||
|
Stdlib::SIMD::unload_f32_4_simd(result_simd_4, result_array_4);
|
||||||
|
|
||||||
|
ASSERT_EQUALS_WITH_DELTA(result_array_4[0], expected_array_4[0], 0.01, "%f", "%f");
|
||||||
|
ASSERT_EQUALS_WITH_DELTA(result_array_4[1], expected_array_4[1], 0.01, "%f", "%f");
|
||||||
|
ASSERT_EQUALS_WITH_DELTA(result_array_4[2], expected_array_4[2], 0.01, "%f", "%f");
|
||||||
|
ASSERT_EQUALS_WITH_DELTA(result_array_4[3], expected_array_4[3], 0.01, "%f", "%f");
|
||||||
|
ASSERT_TRUE(Stdlib::SIMD::all_true(result_simd_4 == expected_simd_4));
|
||||||
|
|
||||||
|
printf("\n[8]: ");
|
||||||
|
|
||||||
|
if (!Stdlib::SIMD::is_avx256_supported()) {
|
||||||
|
printf("[\033[33mNot supported\033[0m]");
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
a_array_8[0] = 0.0f; a_array_8[1] = 1.0f; a_array_8[2] = 2.0f; a_array_8[3] = 3.0f;
|
||||||
|
a_array_8[4] = 0.0f; a_array_8[5] = 1.0f; a_array_8[6] = 2.0f; a_array_8[7] = 3.0f;
|
||||||
|
|
||||||
|
b_array_8[0] = 0.0f; b_array_8[1] = 1.0f; b_array_8[2] = 2.0f; b_array_8[3] = 3.0f;
|
||||||
|
b_array_8[4] = 0.0f; b_array_8[5] = 1.0f; b_array_8[6] = 2.0f; b_array_8[7] = 3.0f;
|
||||||
|
|
||||||
|
expected_array_8[0] = 0.0f; expected_array_8[1] = 2.0f; expected_array_8[2] = 4.0f; expected_array_8[3] = 6.0f;
|
||||||
|
expected_array_8[4] = 0.0f; expected_array_8[5] = 2.0f; expected_array_8[6] = 4.0f; expected_array_8[7] = 6.0f;
|
||||||
|
Stdlib::SIMD::f32_8_simd expected_simd_8 = Stdlib::SIMD::load_f32_8_simd(expected_array_8);
|
||||||
|
|
||||||
|
Stdlib::SIMD::f32_8_simd a_simd_8 = Stdlib::SIMD::load_f32_8_simd(a_array_8);
|
||||||
|
Stdlib::SIMD::f32_8_simd b_simd_8 = Stdlib::SIMD::load_f32_8_simd(b_array_8);
|
||||||
|
|
||||||
|
Stdlib::SIMD::f32_8_simd result_simd_8 = a_simd_8 + b_simd_8;
|
||||||
|
Stdlib::SIMD::unload_f32_8_simd(result_simd_8, result_array_8);
|
||||||
|
|
||||||
|
ASSERT_EQUALS_WITH_DELTA(result_array_8[0], expected_array_8[0], 0.01, "%f", "%f");
|
||||||
|
ASSERT_EQUALS_WITH_DELTA(result_array_8[1], expected_array_8[1], 0.01, "%f", "%f");
|
||||||
|
ASSERT_EQUALS_WITH_DELTA(result_array_8[2], expected_array_8[2], 0.01, "%f", "%f");
|
||||||
|
ASSERT_EQUALS_WITH_DELTA(result_array_8[3], expected_array_8[3], 0.01, "%f", "%f");
|
||||||
|
ASSERT_EQUALS_WITH_DELTA(result_array_8[4], expected_array_8[4], 0.01, "%f", "%f");
|
||||||
|
ASSERT_EQUALS_WITH_DELTA(result_array_8[5], expected_array_8[5], 0.01, "%f", "%f");
|
||||||
|
ASSERT_EQUALS_WITH_DELTA(result_array_8[6], expected_array_8[6], 0.01, "%f", "%f");
|
||||||
|
ASSERT_EQUALS_WITH_DELTA(result_array_8[7], expected_array_8[7], 0.01, "%f", "%f");
|
||||||
|
ASSERT_TRUE(Stdlib::SIMD::all_true(result_simd_8 == expected_simd_8));
|
||||||
|
|
||||||
|
printf("\n[16]: ");
|
||||||
|
|
||||||
|
if (!Stdlib::SIMD::is_avx512_supported()) {
|
||||||
|
printf("[\033[33mNot supported\033[0m]");
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
a_array_16[0] = 0.0f; a_array_16[1] = 1.0f; a_array_16[2] = 2.0f; a_array_16[3] = 3.0f;
|
||||||
|
a_array_16[4] = 0.0f; a_array_16[5] = 1.0f; a_array_16[6] = 2.0f; a_array_16[7] = 3.0f;
|
||||||
|
a_array_16[8] = 0.0f; a_array_16[9] = 1.0f; a_array_16[10] = 2.0f; a_array_16[11] = 3.0f;
|
||||||
|
a_array_16[12] = 0.0f; a_array_16[13] = 1.0f; a_array_16[14] = 2.0f; a_array_16[15] = 3.0f;
|
||||||
|
|
||||||
|
b_array_16[0] = 0.0f; b_array_16[1] = 1.0f; b_array_16[2] = 2.0f; b_array_16[3] = 3.0f;
|
||||||
|
b_array_16[4] = 0.0f; b_array_16[5] = 1.0f; b_array_16[6] = 2.0f; b_array_16[7] = 3.0f;
|
||||||
|
b_array_16[8] = 0.0f; b_array_16[9] = 1.0f; b_array_16[10] = 2.0f; b_array_16[11] = 3.0f;
|
||||||
|
b_array_16[12] = 0.0f; b_array_16[13] = 1.0f; b_array_16[14] = 2.0f; b_array_16[15] = 3.0f;
|
||||||
|
|
||||||
|
expected_array_16[0] = 0.0f; expected_array_16[1] = 2.0f; expected_array_16[2] = 4.0f; expected_array_16[3] = 6.0f;
|
||||||
|
expected_array_16[4] = 0.0f; expected_array_16[5] = 2.0f; expected_array_16[6] = 4.0f; expected_array_16[7] = 6.0f;
|
||||||
|
expected_array_16[8] = 0.0f; expected_array_16[9] = 2.0f; expected_array_16[10] = 4.0f; expected_array_16[11] = 6.0f;
|
||||||
|
expected_array_16[12] = 0.0f; expected_array_16[13] = 2.0f; expected_array_16[14] = 4.0f; expected_array_16[15] = 6.0f;
|
||||||
|
Stdlib::SIMD::f32_16_simd expected_simd_16 = Stdlib::SIMD::load_f32_16_simd(expected_array_16);
|
||||||
|
|
||||||
|
Stdlib::SIMD::f32_16_simd a_simd_16 = Stdlib::SIMD::load_f32_16_simd(a_array_16);
|
||||||
|
Stdlib::SIMD::f32_16_simd b_simd_16 = Stdlib::SIMD::load_f32_16_simd(b_array_16);
|
||||||
|
|
||||||
|
Stdlib::SIMD::f32_16_simd result_simd_16 = a_simd_16 + b_simd_16;
|
||||||
|
Stdlib::SIMD::unload_f32_16_simd(result_simd_16, result_array_16);
|
||||||
|
|
||||||
|
ASSERT_EQUALS_WITH_DELTA(result_array_16[0], expected_array_16[0], 0.01, "%f", "%f");
|
||||||
|
ASSERT_EQUALS_WITH_DELTA(result_array_16[1], expected_array_16[1], 0.01, "%f", "%f");
|
||||||
|
ASSERT_EQUALS_WITH_DELTA(result_array_16[2], expected_array_16[2], 0.01, "%f", "%f");
|
||||||
|
ASSERT_EQUALS_WITH_DELTA(result_array_16[3], expected_array_16[3], 0.01, "%f", "%f");
|
||||||
|
ASSERT_EQUALS_WITH_DELTA(result_array_16[4], expected_array_16[4], 0.01, "%f", "%f");
|
||||||
|
ASSERT_EQUALS_WITH_DELTA(result_array_16[5], expected_array_16[5], 0.01, "%f", "%f");
|
||||||
|
ASSERT_EQUALS_WITH_DELTA(result_array_16[6], expected_array_16[6], 0.01, "%f", "%f");
|
||||||
|
ASSERT_EQUALS_WITH_DELTA(result_array_16[7], expected_array_16[7], 0.01, "%f", "%f");
|
||||||
|
ASSERT_EQUALS_WITH_DELTA(result_array_16[8], expected_array_16[8], 0.01, "%f", "%f");
|
||||||
|
ASSERT_EQUALS_WITH_DELTA(result_array_16[9], expected_array_16[9], 0.01, "%f", "%f");
|
||||||
|
ASSERT_EQUALS_WITH_DELTA(result_array_16[10], expected_array_16[10], 0.01, "%f", "%f");
|
||||||
|
ASSERT_EQUALS_WITH_DELTA(result_array_16[11], expected_array_16[11], 0.01, "%f", "%f");
|
||||||
|
ASSERT_EQUALS_WITH_DELTA(result_array_16[12], expected_array_16[12], 0.01, "%f", "%f");
|
||||||
|
ASSERT_EQUALS_WITH_DELTA(result_array_16[13], expected_array_16[13], 0.01, "%f", "%f");
|
||||||
|
ASSERT_EQUALS_WITH_DELTA(result_array_16[14], expected_array_16[14], 0.01, "%f", "%f");
|
||||||
|
ASSERT_EQUALS_WITH_DELTA(result_array_16[15], expected_array_16[15], 0.01, "%f", "%f");
|
||||||
|
ASSERT_TRUE(Stdlib::SIMD::all_true(result_simd_16 == expected_simd_16));
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int test_operator_minus()
|
||||||
|
{
|
||||||
|
printf("\noperator-:\n");
|
||||||
|
printf("[4]: ");
|
||||||
|
if (!Stdlib::SIMD::is_avx_supported()) {
|
||||||
|
printf("[\033[33mNot supported\033[0m]");
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
a_array_4[0] = 0.0f; a_array_4[1] = 1.0f; a_array_4[2] = 2.0f; a_array_4[3] = 3.0f;
|
||||||
|
b_array_4[0] = 1.0f; b_array_4[1] = 1.0f; b_array_4[2] = 1.0f; b_array_4[3] = 1.0f;
|
||||||
|
|
||||||
|
expected_array_4[0] = -1.0f; expected_array_4[1] = 0.0f; expected_array_4[2] = 1.0f; expected_array_4[3] = 2.0f;
|
||||||
|
Stdlib::SIMD::f32_4_simd expected_simd_4 = Stdlib::SIMD::load_f32_4_simd(expected_array_4);
|
||||||
|
|
||||||
|
Stdlib::SIMD::f32_4_simd a_simd_4 = Stdlib::SIMD::load_f32_4_simd(a_array_4);
|
||||||
|
Stdlib::SIMD::f32_4_simd b_simd_4 = Stdlib::SIMD::load_f32_4_simd(b_array_4);
|
||||||
|
|
||||||
|
Stdlib::SIMD::f32_4_simd result_simd_4 = a_simd_4 - b_simd_4;
|
||||||
|
Stdlib::SIMD::unload_f32_4_simd(result_simd_4, result_array_4);
|
||||||
|
|
||||||
|
ASSERT_EQUALS_WITH_DELTA(result_array_4[0], expected_array_4[0], 0.01, "%f", "%f");
|
||||||
|
ASSERT_EQUALS_WITH_DELTA(result_array_4[1], expected_array_4[1], 0.01, "%f", "%f");
|
||||||
|
ASSERT_EQUALS_WITH_DELTA(result_array_4[2], expected_array_4[2], 0.01, "%f", "%f");
|
||||||
|
ASSERT_EQUALS_WITH_DELTA(result_array_4[3], expected_array_4[3], 0.01, "%f", "%f");
|
||||||
|
ASSERT_TRUE(Stdlib::SIMD::all_true(result_simd_4 == expected_simd_4));
|
||||||
|
|
||||||
|
printf("\n[8]: ");
|
||||||
|
|
||||||
|
if (!Stdlib::SIMD::is_avx256_supported()) {
|
||||||
|
printf("[\033[33mNot supported\033[0m]");
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
a_array_8[0] = 0.0f; a_array_8[1] = 1.0f; a_array_8[2] = 2.0f; a_array_8[3] = 3.0f;
|
||||||
|
a_array_8[4] = 0.0f; a_array_8[5] = 1.0f; a_array_8[6] = 2.0f; a_array_8[7] = 3.0f;
|
||||||
|
|
||||||
|
b_array_8[0] = 1.0f; b_array_8[1] = 1.0f; b_array_8[2] = 1.0f; b_array_8[3] = 1.0f;
|
||||||
|
b_array_8[4] = 1.0f; b_array_8[5] = 1.0f; b_array_8[6] = 1.0f; b_array_8[7] = 1.0f;
|
||||||
|
|
||||||
|
expected_array_8[0] = -1.0f; expected_array_8[1] = 0.0f; expected_array_8[2] = 1.0f; expected_array_8[3] = 2.0f;
|
||||||
|
expected_array_8[4] = -1.0f; expected_array_8[5] = 0.0f; expected_array_8[6] = 1.0f; expected_array_8[7] = 2.0f;
|
||||||
|
Stdlib::SIMD::f32_8_simd expected_simd_8 = Stdlib::SIMD::load_f32_8_simd(expected_array_8);
|
||||||
|
|
||||||
|
Stdlib::SIMD::f32_8_simd a_simd_8 = Stdlib::SIMD::load_f32_8_simd(a_array_8);
|
||||||
|
Stdlib::SIMD::f32_8_simd b_simd_8 = Stdlib::SIMD::load_f32_8_simd(b_array_8);
|
||||||
|
|
||||||
|
Stdlib::SIMD::f32_8_simd result_simd_8 = a_simd_8 - b_simd_8;
|
||||||
|
Stdlib::SIMD::unload_f32_8_simd(result_simd_8, result_array_8);
|
||||||
|
|
||||||
|
ASSERT_EQUALS_WITH_DELTA(result_array_8[0], expected_array_8[0], 0.01, "%f", "%f");
|
||||||
|
ASSERT_EQUALS_WITH_DELTA(result_array_8[1], expected_array_8[1], 0.01, "%f", "%f");
|
||||||
|
ASSERT_EQUALS_WITH_DELTA(result_array_8[2], expected_array_8[2], 0.01, "%f", "%f");
|
||||||
|
ASSERT_EQUALS_WITH_DELTA(result_array_8[3], expected_array_8[3], 0.01, "%f", "%f");
|
||||||
|
ASSERT_EQUALS_WITH_DELTA(result_array_8[4], expected_array_8[4], 0.01, "%f", "%f");
|
||||||
|
ASSERT_EQUALS_WITH_DELTA(result_array_8[5], expected_array_8[5], 0.01, "%f", "%f");
|
||||||
|
ASSERT_EQUALS_WITH_DELTA(result_array_8[6], expected_array_8[6], 0.01, "%f", "%f");
|
||||||
|
ASSERT_EQUALS_WITH_DELTA(result_array_8[7], expected_array_8[7], 0.01, "%f", "%f");
|
||||||
|
ASSERT_TRUE(Stdlib::SIMD::all_true(result_simd_8 == expected_simd_8));
|
||||||
|
|
||||||
|
printf("\n[16]: ");
|
||||||
|
|
||||||
|
if (!Stdlib::SIMD::is_avx512_supported()) {
|
||||||
|
printf("[\033[33mNot supported\033[0m]");
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
a_array_16[0] = 0.0f; a_array_16[1] = 1.0f; a_array_16[2] = 2.0f; a_array_16[3] = 3.0f;
|
||||||
|
a_array_16[4] = 0.0f; a_array_16[5] = 1.0f; a_array_16[6] = 2.0f; a_array_16[7] = 3.0f;
|
||||||
|
a_array_16[8] = 0.0f; a_array_16[9] = 1.0f; a_array_16[10] = 2.0f; a_array_16[11] = 3.0f;
|
||||||
|
a_array_16[12] = 0.0f; a_array_16[13] = 1.0f; a_array_16[14] = 2.0f; a_array_16[15] = 3.0f;
|
||||||
|
|
||||||
|
b_array_16[0] = 1.0f; b_array_16[1] = 1.0f; b_array_16[2] = 1.0f; b_array_16[3] = 1.0f;
|
||||||
|
b_array_16[4] = 1.0f; b_array_16[5] = 1.0f; b_array_16[6] = 1.0f; b_array_16[7] = 1.0f;
|
||||||
|
b_array_16[8] = 1.0f; b_array_16[9] = 1.0f; b_array_16[10] = 1.0f; b_array_16[11] = 1.0f;
|
||||||
|
b_array_16[12] = 1.0f; b_array_16[13] = 1.0f; b_array_16[14] = 1.0f; b_array_16[15] = 1.0f;
|
||||||
|
|
||||||
|
expected_array_16[0] = -1.0f; expected_array_16[1] = 0.0f; expected_array_16[2] = 1.0f; expected_array_16[3] = 2.0f;
|
||||||
|
expected_array_16[4] = -1.0f; expected_array_16[5] = 0.0f; expected_array_16[6] = 1.0f; expected_array_16[7] = 2.0f;
|
||||||
|
expected_array_16[8] = -1.0f; expected_array_16[9] = 0.0f; expected_array_16[10] = 1.0f; expected_array_16[11] = 2.0f;
|
||||||
|
expected_array_16[12] = -1.0f; expected_array_16[13] = 0.0f; expected_array_16[14] = 1.0f; expected_array_16[15] = 2.0f;
|
||||||
|
Stdlib::SIMD::f32_16_simd expected_simd_16 = Stdlib::SIMD::load_f32_16_simd(expected_array_16);
|
||||||
|
|
||||||
|
Stdlib::SIMD::f32_16_simd a_simd_16 = Stdlib::SIMD::load_f32_16_simd(a_array_16);
|
||||||
|
Stdlib::SIMD::f32_16_simd b_simd_16 = Stdlib::SIMD::load_f32_16_simd(b_array_16);
|
||||||
|
|
||||||
|
Stdlib::SIMD::f32_16_simd result_simd_16 = a_simd_16 - b_simd_16;
|
||||||
|
Stdlib::SIMD::unload_f32_16_simd(result_simd_16, result_array_16);
|
||||||
|
|
||||||
|
ASSERT_EQUALS_WITH_DELTA(result_array_16[0], expected_array_16[0], 0.01, "%f", "%f");
|
||||||
|
ASSERT_EQUALS_WITH_DELTA(result_array_16[1], expected_array_16[1], 0.01, "%f", "%f");
|
||||||
|
ASSERT_EQUALS_WITH_DELTA(result_array_16[2], expected_array_16[2], 0.01, "%f", "%f");
|
||||||
|
ASSERT_EQUALS_WITH_DELTA(result_array_16[3], expected_array_16[3], 0.01, "%f", "%f");
|
||||||
|
ASSERT_EQUALS_WITH_DELTA(result_array_16[4], expected_array_16[4], 0.01, "%f", "%f");
|
||||||
|
ASSERT_EQUALS_WITH_DELTA(result_array_16[5], expected_array_16[5], 0.01, "%f", "%f");
|
||||||
|
ASSERT_EQUALS_WITH_DELTA(result_array_16[6], expected_array_16[6], 0.01, "%f", "%f");
|
||||||
|
ASSERT_EQUALS_WITH_DELTA(result_array_16[7], expected_array_16[7], 0.01, "%f", "%f");
|
||||||
|
ASSERT_EQUALS_WITH_DELTA(result_array_16[8], expected_array_16[8], 0.01, "%f", "%f");
|
||||||
|
ASSERT_EQUALS_WITH_DELTA(result_array_16[9], expected_array_16[9], 0.01, "%f", "%f");
|
||||||
|
ASSERT_EQUALS_WITH_DELTA(result_array_16[10], expected_array_16[10], 0.01, "%f", "%f");
|
||||||
|
ASSERT_EQUALS_WITH_DELTA(result_array_16[11], expected_array_16[11], 0.01, "%f", "%f");
|
||||||
|
ASSERT_EQUALS_WITH_DELTA(result_array_16[12], expected_array_16[12], 0.01, "%f", "%f");
|
||||||
|
ASSERT_EQUALS_WITH_DELTA(result_array_16[13], expected_array_16[13], 0.01, "%f", "%f");
|
||||||
|
ASSERT_EQUALS_WITH_DELTA(result_array_16[14], expected_array_16[14], 0.01, "%f", "%f");
|
||||||
|
ASSERT_EQUALS_WITH_DELTA(result_array_16[15], expected_array_16[15], 0.01, "%f", "%f");
|
||||||
|
ASSERT_TRUE(Stdlib::SIMD::all_true(result_simd_16 == expected_simd_16));
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int test_operator_mul()
|
||||||
|
{
|
||||||
|
printf("\noperator*:\n");
|
||||||
|
printf("[4]: ");
|
||||||
|
if (!Stdlib::SIMD::is_avx_supported()) {
|
||||||
|
printf("[\033[33mNot supported\033[0m]");
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
a_array_4[0] = 0.0f; a_array_4[1] = 1.0f; a_array_4[2] = 2.0f; a_array_4[3] = 3.0f;
|
||||||
|
b_array_4[0] = 0.0f; b_array_4[1] = 1.0f; b_array_4[2] = 2.0f; b_array_4[3] = 3.0f;
|
||||||
|
|
||||||
|
expected_array_4[0] = 0.0f; expected_array_4[1] = 1.0f; expected_array_4[2] = 4.0f; expected_array_4[3] = 9.0f;
|
||||||
|
Stdlib::SIMD::f32_4_simd expected_simd_4 = Stdlib::SIMD::load_f32_4_simd(expected_array_4);
|
||||||
|
|
||||||
|
Stdlib::SIMD::f32_4_simd a_simd_4 = Stdlib::SIMD::load_f32_4_simd(a_array_4);
|
||||||
|
Stdlib::SIMD::f32_4_simd b_simd_4 = Stdlib::SIMD::load_f32_4_simd(b_array_4);
|
||||||
|
|
||||||
|
Stdlib::SIMD::f32_4_simd result_simd_4 = a_simd_4 * b_simd_4;
|
||||||
|
Stdlib::SIMD::unload_f32_4_simd(result_simd_4, result_array_4);
|
||||||
|
|
||||||
|
ASSERT_EQUALS_WITH_DELTA(result_array_4[0], expected_array_4[0], 0.01, "%f", "%f");
|
||||||
|
ASSERT_EQUALS_WITH_DELTA(result_array_4[1], expected_array_4[1], 0.01, "%f", "%f");
|
||||||
|
ASSERT_EQUALS_WITH_DELTA(result_array_4[2], expected_array_4[2], 0.01, "%f", "%f");
|
||||||
|
ASSERT_EQUALS_WITH_DELTA(result_array_4[3], expected_array_4[3], 0.01, "%f", "%f");
|
||||||
|
ASSERT_TRUE(Stdlib::SIMD::all_true(result_simd_4 == expected_simd_4));
|
||||||
|
|
||||||
|
printf("\n[8]: ");
|
||||||
|
|
||||||
|
if (!Stdlib::SIMD::is_avx256_supported()) {
|
||||||
|
printf("[\033[33mNot supported\033[0m]");
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
a_array_8[0] = 0.0f; a_array_8[1] = 1.0f; a_array_8[2] = 2.0f; a_array_8[3] = 3.0f;
|
||||||
|
a_array_8[4] = 0.0f; a_array_8[5] = 1.0f; a_array_8[6] = 2.0f; a_array_8[7] = 3.0f;
|
||||||
|
|
||||||
|
b_array_8[0] = 0.0f; b_array_8[1] = 1.0f; b_array_8[2] = 2.0f; b_array_8[3] = 3.0f;
|
||||||
|
b_array_8[4] = 0.0f; b_array_8[5] = 1.0f; b_array_8[6] = 2.0f; b_array_8[7] = 3.0f;
|
||||||
|
|
||||||
|
expected_array_8[0] = 0.0f; expected_array_8[1] = 1.0f; expected_array_8[2] = 4.0f; expected_array_8[3] = 9.0f;
|
||||||
|
expected_array_8[4] = 0.0f; expected_array_8[5] = 1.0f; expected_array_8[6] = 4.0f; expected_array_8[7] = 9.0f;
|
||||||
|
Stdlib::SIMD::f32_8_simd expected_simd_8 = Stdlib::SIMD::load_f32_8_simd(expected_array_8);
|
||||||
|
|
||||||
|
Stdlib::SIMD::f32_8_simd a_simd_8 = Stdlib::SIMD::load_f32_8_simd(a_array_8);
|
||||||
|
Stdlib::SIMD::f32_8_simd b_simd_8 = Stdlib::SIMD::load_f32_8_simd(b_array_8);
|
||||||
|
|
||||||
|
Stdlib::SIMD::f32_8_simd result_simd_8 = a_simd_8 * b_simd_8;
|
||||||
|
Stdlib::SIMD::unload_f32_8_simd(result_simd_8, result_array_8);
|
||||||
|
|
||||||
|
ASSERT_EQUALS_WITH_DELTA(result_array_8[0], expected_array_8[0], 0.01, "%f", "%f");
|
||||||
|
ASSERT_EQUALS_WITH_DELTA(result_array_8[1], expected_array_8[1], 0.01, "%f", "%f");
|
||||||
|
ASSERT_EQUALS_WITH_DELTA(result_array_8[2], expected_array_8[2], 0.01, "%f", "%f");
|
||||||
|
ASSERT_EQUALS_WITH_DELTA(result_array_8[3], expected_array_8[3], 0.01, "%f", "%f");
|
||||||
|
ASSERT_EQUALS_WITH_DELTA(result_array_8[4], expected_array_8[4], 0.01, "%f", "%f");
|
||||||
|
ASSERT_EQUALS_WITH_DELTA(result_array_8[5], expected_array_8[5], 0.01, "%f", "%f");
|
||||||
|
ASSERT_EQUALS_WITH_DELTA(result_array_8[6], expected_array_8[6], 0.01, "%f", "%f");
|
||||||
|
ASSERT_EQUALS_WITH_DELTA(result_array_8[7], expected_array_8[7], 0.01, "%f", "%f");
|
||||||
|
ASSERT_TRUE(Stdlib::SIMD::all_true(result_simd_8 == expected_simd_8));
|
||||||
|
|
||||||
|
printf("\n[16]: ");
|
||||||
|
|
||||||
|
if (!Stdlib::SIMD::is_avx512_supported()) {
|
||||||
|
printf("[\033[33mNot supported\033[0m]");
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
a_array_16[0] = 0.0f; a_array_16[1] = 1.0f; a_array_16[2] = 2.0f; a_array_16[3] = 3.0f;
|
||||||
|
a_array_16[4] = 0.0f; a_array_16[5] = 1.0f; a_array_16[6] = 2.0f; a_array_16[7] = 3.0f;
|
||||||
|
a_array_16[8] = 0.0f; a_array_16[9] = 1.0f; a_array_16[10] = 2.0f; a_array_16[11] = 3.0f;
|
||||||
|
a_array_16[12] = 0.0f; a_array_16[13] = 1.0f; a_array_16[14] = 2.0f; a_array_16[15] = 3.0f;
|
||||||
|
|
||||||
|
b_array_16[0] = 0.0f; b_array_16[1] = 1.0f; b_array_16[2] = 2.0f; b_array_16[3] = 3.0f;
|
||||||
|
b_array_16[4] = 0.0f; b_array_16[5] = 1.0f; b_array_16[6] = 2.0f; b_array_16[7] = 3.0f;
|
||||||
|
b_array_16[8] = 0.0f; b_array_16[9] = 1.0f; b_array_16[10] = 2.0f; b_array_16[11] = 3.0f;
|
||||||
|
b_array_16[12] = 0.0f; b_array_16[13] = 1.0f; b_array_16[14] = 2.0f; b_array_16[15] = 3.0f;
|
||||||
|
|
||||||
|
expected_array_16[0] = 0.0f; expected_array_16[1] = 1.0f; expected_array_16[2] = 4.0f; expected_array_16[3] = 9.0f;
|
||||||
|
expected_array_16[4] = 0.0f; expected_array_16[5] = 1.0f; expected_array_16[6] = 4.0f; expected_array_16[7] = 9.0f;
|
||||||
|
expected_array_16[8] = 0.0f; expected_array_16[9] = 1.0f; expected_array_16[10] = 4.0f; expected_array_16[11] = 9.0f;
|
||||||
|
expected_array_16[12] = 0.0f; expected_array_16[13] = 1.0f; expected_array_16[14] = 4.0f; expected_array_16[15] = 9.0f;
|
||||||
|
Stdlib::SIMD::f32_16_simd expected_simd_16 = Stdlib::SIMD::load_f32_16_simd(expected_array_16);
|
||||||
|
|
||||||
|
Stdlib::SIMD::f32_16_simd a_simd_16 = Stdlib::SIMD::load_f32_16_simd(a_array_16);
|
||||||
|
Stdlib::SIMD::f32_16_simd b_simd_16 = Stdlib::SIMD::load_f32_16_simd(b_array_16);
|
||||||
|
|
||||||
|
Stdlib::SIMD::f32_16_simd result_simd_16 = a_simd_16 * b_simd_16;
|
||||||
|
Stdlib::SIMD::unload_f32_16_simd(result_simd_16, result_array_16);
|
||||||
|
|
||||||
|
ASSERT_EQUALS_WITH_DELTA(result_array_16[0], expected_array_16[0], 0.01, "%f", "%f");
|
||||||
|
ASSERT_EQUALS_WITH_DELTA(result_array_16[1], expected_array_16[1], 0.01, "%f", "%f");
|
||||||
|
ASSERT_EQUALS_WITH_DELTA(result_array_16[2], expected_array_16[2], 0.01, "%f", "%f");
|
||||||
|
ASSERT_EQUALS_WITH_DELTA(result_array_16[3], expected_array_16[3], 0.01, "%f", "%f");
|
||||||
|
ASSERT_EQUALS_WITH_DELTA(result_array_16[4], expected_array_16[4], 0.01, "%f", "%f");
|
||||||
|
ASSERT_EQUALS_WITH_DELTA(result_array_16[5], expected_array_16[5], 0.01, "%f", "%f");
|
||||||
|
ASSERT_EQUALS_WITH_DELTA(result_array_16[6], expected_array_16[6], 0.01, "%f", "%f");
|
||||||
|
ASSERT_EQUALS_WITH_DELTA(result_array_16[7], expected_array_16[7], 0.01, "%f", "%f");
|
||||||
|
ASSERT_EQUALS_WITH_DELTA(result_array_16[8], expected_array_16[8], 0.01, "%f", "%f");
|
||||||
|
ASSERT_EQUALS_WITH_DELTA(result_array_16[9], expected_array_16[9], 0.01, "%f", "%f");
|
||||||
|
ASSERT_EQUALS_WITH_DELTA(result_array_16[10], expected_array_16[10], 0.01, "%f", "%f");
|
||||||
|
ASSERT_EQUALS_WITH_DELTA(result_array_16[11], expected_array_16[11], 0.01, "%f", "%f");
|
||||||
|
ASSERT_EQUALS_WITH_DELTA(result_array_16[12], expected_array_16[12], 0.01, "%f", "%f");
|
||||||
|
ASSERT_EQUALS_WITH_DELTA(result_array_16[13], expected_array_16[13], 0.01, "%f", "%f");
|
||||||
|
ASSERT_EQUALS_WITH_DELTA(result_array_16[14], expected_array_16[14], 0.01, "%f", "%f");
|
||||||
|
ASSERT_EQUALS_WITH_DELTA(result_array_16[15], expected_array_16[15], 0.01, "%f", "%f");
|
||||||
|
ASSERT_TRUE(Stdlib::SIMD::all_true(result_simd_16 == expected_simd_16));
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
42
tests/Stdlib/SIMD/SIMD_HelperTest.cpp
Normal file
42
tests/Stdlib/SIMD/SIMD_HelperTest.cpp
Normal file
|
|
@ -0,0 +1,42 @@
|
||||||
|
/**
|
||||||
|
* Jingga
|
||||||
|
*
|
||||||
|
* @package Test
|
||||||
|
* @copyright Dennis Eichhorn
|
||||||
|
* @license OMS License 1.0
|
||||||
|
* @version 1.0.0
|
||||||
|
* @link https://jingga.app
|
||||||
|
*/
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
#include "../../../Utils/TestUtils.h"
|
||||||
|
#include "../../../Stdlib/SIMD/SIMD_Helper.h"
|
||||||
|
|
||||||
|
|
||||||
|
int main(int argc, char** argv)
|
||||||
|
{
|
||||||
|
printf("SIMD_Helper:\n");
|
||||||
|
|
||||||
|
if (Stdlib::SIMD::is_avx_supported()) {
|
||||||
|
printf("\nAVX is supported");
|
||||||
|
} else {
|
||||||
|
printf("\033[33m\nAVX is NOT supported\033[0m");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (Stdlib::SIMD::is_avx256_supported()) {
|
||||||
|
printf("\nAVX 256 is supported");
|
||||||
|
} else {
|
||||||
|
printf("\033[33m\nAVX 256 is NOT supported\033[0m");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (Stdlib::SIMD::is_avx512_supported()) {
|
||||||
|
printf("\nAVX 512 is supported");
|
||||||
|
} else {
|
||||||
|
printf("\033[33m\nAVX 512 is NOT supported\033[0m");
|
||||||
|
}
|
||||||
|
|
||||||
|
printf("\n\n");
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
@ -8,6 +8,7 @@
|
||||||
* @link https://jingga.app
|
* @link https://jingga.app
|
||||||
*/
|
*/
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
#include "../../Threads/Thread.h"
|
#include "../../Threads/Thread.h"
|
||||||
#include "../../Utils/TestUtils.h"
|
#include "../../Utils/TestUtils.h"
|
||||||
|
|
@ -15,17 +16,19 @@
|
||||||
static const size_t num_threads = 4;
|
static const size_t num_threads = 4;
|
||||||
static const size_t num_items = 10;
|
static const size_t num_items = 10;
|
||||||
|
|
||||||
|
// increase value by 100
|
||||||
void worker(void *arg)
|
void worker(void *arg)
|
||||||
{
|
{
|
||||||
int *val = (int *) arg;
|
Threads::Job *job = (Threads::Job *) arg;
|
||||||
int old = *val;
|
|
||||||
|
|
||||||
|
int *val = (int *) job->arg;
|
||||||
*val += 100;
|
*val += 100;
|
||||||
// printf("tid=%p, old=%d, val=%d\n", (void *) pthread_self(), old, *val);
|
|
||||||
|
|
||||||
if (*val % 2) {
|
if (*val % 2) {
|
||||||
sleep(1);
|
sleep(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
job->state = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
int main(int argc, char** argv)
|
int main(int argc, char** argv)
|
||||||
|
|
@ -36,25 +39,33 @@ int main(int argc, char** argv)
|
||||||
int i;
|
int i;
|
||||||
Threads::ThreadPool *pool = Threads::pool_create(num_threads);
|
Threads::ThreadPool *pool = Threads::pool_create(num_threads);
|
||||||
int *vals = (int *) calloc(num_items, sizeof(int));
|
int *vals = (int *) calloc(num_items, sizeof(int));
|
||||||
|
Threads::Job **works = (Threads::Job **) calloc(num_items, sizeof(Threads::Job));
|
||||||
|
|
||||||
for (i = 0; i < num_items; ++i) {
|
for (i = 0; i < num_items; ++i) {
|
||||||
vals[i] = i;
|
vals[i] = i;
|
||||||
Threads::pool_add_work(pool, worker, vals + i);
|
works[i] = Threads::pool_add_work(pool, worker, vals + i);
|
||||||
}
|
}
|
||||||
|
|
||||||
Threads::pool_wait(pool);
|
// @bug wait is not working as expected
|
||||||
sleep(1);
|
// I thought wait works similarly to what the do/while construct below does
|
||||||
|
//Threads::pool_wait(pool);
|
||||||
|
|
||||||
|
bool finished = false;
|
||||||
|
do {
|
||||||
|
finished = true;
|
||||||
|
for (i = 0; i < num_items; ++i) {
|
||||||
|
finished = finished && (works[i]->state == 1);
|
||||||
|
}
|
||||||
|
} while (!finished);
|
||||||
|
|
||||||
bool test = true;
|
bool test = true;
|
||||||
|
|
||||||
for (i = 0; i < num_items; ++i) {
|
for (i = 0; i < num_items; ++i) {
|
||||||
// printf("%d\n", vals[i]);
|
ASSERT_EQUALS(vals[i], 100 + i, "%d", "%d");
|
||||||
test = test && 100 + i == vals[i];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
ASSERT_EQUALS(test, true, "%d", "%d");
|
|
||||||
|
|
||||||
free(vals);
|
free(vals);
|
||||||
|
free(works);
|
||||||
Threads::pool_destroy(pool);
|
Threads::pool_destroy(pool);
|
||||||
|
|
||||||
printf("\n\n");
|
printf("\n\n");
|
||||||
|
|
|
||||||
|
|
@ -9,3 +9,7 @@ g++ $BASEDIR/Image/ImageUtilsTest.cpp -o $BASEDIR/Image/ImageUtilsTest && $BASED
|
||||||
g++ $BASEDIR/Threads/ThreadPoolTest.cpp -o $BASEDIR/Threads/ThreadPoolTest && $BASEDIR/Threads/ThreadPoolTest && rm $BASEDIR/Threads/ThreadPoolTest
|
g++ $BASEDIR/Threads/ThreadPoolTest.cpp -o $BASEDIR/Threads/ThreadPoolTest && $BASEDIR/Threads/ThreadPoolTest && rm $BASEDIR/Threads/ThreadPoolTest
|
||||||
|
|
||||||
# g++ $BASEDIR/Utils/WebUtilsTest.cpp -o $BASEDIR/Utils/WebUtilsTest -l curl -l xml2 -l libxml2 -I /usr/include/libxml2 -f permissive && $BASEDIR/Utils/WebUtilsTest && rm $BASEDIR/Utils/WebUtilsTest
|
# g++ $BASEDIR/Utils/WebUtilsTest.cpp -o $BASEDIR/Utils/WebUtilsTest -l curl -l xml2 -l libxml2 -I /usr/include/libxml2 -f permissive && $BASEDIR/Utils/WebUtilsTest && rm $BASEDIR/Utils/WebUtilsTest
|
||||||
|
|
||||||
|
g++ $BASEDIR/Stdlib/SIMD/SIMD_HelperTest.cpp -o $BASEDIR/Stdlib/SIMD/SIMD_HelperTest && $BASEDIR/Stdlib/SIMD/SIMD_HelperTest && rm $BASEDIR/Stdlib/SIMD/SIMD_HelperTest
|
||||||
|
|
||||||
|
g++ -mavx -msse -maes -msse3 -msse4.1 -mavx512f -mpclmul -mavx512dq -march=native $BASEDIR/Stdlib/SIMD/SIMD_F32Test.cpp -o $BASEDIR/Stdlib/SIMD/SIMD_F32Test && $BASEDIR/Stdlib/SIMD/SIMD_F32Test && rm $BASEDIR/Stdlib/SIMD/SIMD_F32Test
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user