cOMS/hash/Sha1SimdX86.h
Dennis Eichhorn dc9f37b726
Some checks failed
CodeQL / Analyze (${{ matrix.language }}) (autobuild, c-cpp) (push) Has been cancelled
Microsoft C++ Code Analysis / Analyze (push) Has been cancelled
update
2025-04-06 10:34:47 +00:00

125 lines
3.4 KiB
C

/**
* Jingga
*
* @copyright Jingga
* @license OMS License 2.0
* @version 1.0.0
* @link https://jingga.app
*/
#ifndef COMS_HASH_SHA1_SIMD_X86_H
#define COMS_HASH_SHA1_SIMD_X86_H
#include <immintrin.h>
#include "../stdlib/Types.h"
#include "Sha1Definitions.h"
static
void sha1_transform(SHA1_CTX* ctx, const byte data[64], int32 steps) {
uint32 a, b, c, d, e, temp;
alignas(64) uint32 w[80];
// @question Does it make sense to also do SIMD here?
for (int32 i = 0; i < 16; ++i) {
w[i] = ((uint32) data[i * 4 + 0] << 24)
| ((uint32) data[i * 4 + 1] << 16)
| ((uint32) data[i * 4 + 2] << 8)
| ((uint32) data[i * 4 + 3]);
}
#ifdef __AVX512F__
if (steps >= 16) {
for (int i = 16; i < 80; i += 16) {
__m512i v3 = _mm512_loadu_si512((__m512i*)&w[i-3]);
__m512i v8 = _mm512_load_si512((__m512i*)&w[i-8]);
__m512i v14 = _mm512_loadu_si512((__m512i*)&w[i-14]);
__m512i v16 = _mm512_load_si512((__m512i*)&w[i-16]);
__m512i v = _mm512_xor_si512(v3, v8);
v = _mm512_xor_si512(v, v14);
v = _mm512_xor_si512(v, v16);
__m512i v_rot = _mm512_or_si512(
_mm512_slli_epi32(v, 1),
_mm512_srli_epi32(v, 31)
);
_mm512_store_si512((__m512i*)&w[i], v_rot);
}
steps = 1
} else
#endif
#ifdef __AVX2__
if (steps >= 8) {
for (int i = 16; i < 80; i += 8) {
__m256i v3 = _mm256_loadu_si256((__m256i*)&w[i-3]);
__m256i v8 = _mm256_load_si256((__m256i*)&w[i-8]);
__m256i v14 = _mm256_loadu_si256((__m256i*)&w[i-14]);
__m256i v16 = _mm256_load_si256((__m256i*)&w[i-16]);
__m256i v = _mm256_xor_si256(v3, v8);
v = _mm256_xor_si256(v, v14);
v = _mm256_xor_si256(v, v16);
__m256i v_rot = _mm256_or_si256(
_mm256_slli_epi32(v, 1),
_mm256_srli_epi32(v, 31)
);
_mm256_store_si256((__m256i*)&w[i], v_rot);
}
steps = 1;
} else
#endif
#ifdef __SSE4_2__
if (steps >= 4) {
for (int32 i = 16; i < 80; i += 4) {
__m128i v = _mm_xor_si128(
_mm_loadu_si128((__m128i*) &w[i-3]),
_mm_load_si128((__m128i*) &w[i-8])
);
v = _mm_xor_si128(v, _mm_loadu_si128((__m128i*) &w[i-14]));
v = _mm_xor_si128(v, _mm_load_si128((__m128i*) &w[i-16]));
v = _mm_or_si128(_mm_slli_epi32(v, 1), _mm_srli_epi32(v, 31));
_mm_store_si128((__m128i*) &w[i], v);
}
}
#endif
a = ctx->state[0];
b = ctx->state[1];
c = ctx->state[2];
d = ctx->state[3];
e = ctx->state[4];
for (int32 i = 0; i < 80; ++i) {
if (i < 20) {
temp = SHA1_ROTL32(a, 5) + SHA1_Ch(b, c, d) + e + K1 + w[i];
} else if (i < 40) {
temp = SHA1_ROTL32(a, 5) + SHA1_Parity(b, c, d) + e + K2 + w[i];
} else if (i < 60) {
temp = SHA1_ROTL32(a, 5) + SHA1_Maj(b, c, d) + e + K3 + w[i];
} else {
temp = SHA1_ROTL32(a, 5) + SHA1_Parity(b, c, d) + e + K4 + w[i];
}
e = d;
d = c;
c = SHA1_ROTL32(b, 30);
b = a;
a = temp;
}
ctx->state[0] += a;
ctx->state[1] += b;
ctx->state[2] += c;
ctx->state[3] += d;
ctx->state[4] += e;
}
#endif