-rw-r--r-- 2689 libmceliece-20230612/crypto_kem/348864/avx/vec256.h raw
/*
This file is for functions related to 256-bit vectors
including functions for bitsliced field operations
*/
#ifndef VEC256_H
#define VEC256_H
#define vec256_ama_asm CRYPTO_SHARED_NAMESPACE(vec256_ama_asm)
#define vec256_inv CRYPTO_NAMESPACE(vec256_inv)
#define vec256_maa_asm CRYPTO_SHARED_NAMESPACE(vec256_maa_asm)
#define vec256_mul_asm CRYPTO_SHARED_NAMESPACE(vec256_mul_asm)
#define vec256_sq CRYPTO_NAMESPACE(vec256_sq)
#include "vec128.h"
#include <immintrin.h>
typedef __m256i vec256;
static inline vec256 vec256_set1_16b(uint16_t a)
{
return _mm256_set1_epi16(a);
}
static inline vec256 vec256_set1_32b(uint64_t a)
{
return _mm256_set1_epi32(a);
}
static inline vec256 vec256_setzero()
{
return _mm256_setzero_si256();
}
static inline vec256 vec256_set4x(uint64_t a0, uint64_t a1, uint64_t a2, uint64_t a3)
{
return _mm256_set_epi64x(a3, a2, a1, a0);
}
#define vec256_extract2x(a,i) ((vec128) _mm256_extractf128_si256((vec256) (a),(i)))
#define vec256_extract(a,i) ((uint64_t) _mm256_extract_epi64((vec256) (a),(i)))
static inline int vec256_testz(vec256 a)
{
return _mm256_testz_si256(a, a);
}
static inline vec256 vec256_and(vec256 a, vec256 b)
{
return _mm256_and_si256(a, b);
}
static inline vec256 vec256_xor(vec256 a, vec256 b)
{
return _mm256_xor_si256(a, b);
}
static inline vec256 vec256_or(vec256 a, vec256 b)
{
return _mm256_or_si256(a, b);
}
#define vec256_sll_4x(a, s) ((vec256) _mm256_slli_epi64((vec256) (a), (s)))
#define vec256_srl_4x(a, s) ((vec256) _mm256_srli_epi64((vec256) (a), (s)))
static inline vec256 vec256_unpack_low(vec256 a, vec256 b)
{
return _mm256_permute2x128_si256 (a, b, 0x20);
}
static inline vec256 vec256_unpack_high(vec256 a, vec256 b)
{
return _mm256_permute2x128_si256 (a, b, 0x31);
}
static inline vec256 vec256_unpack_low_2x(vec256 a, vec256 b)
{
return _mm256_unpacklo_epi64 (a, b);
}
static inline vec256 vec256_unpack_high_2x(vec256 a, vec256 b)
{
return _mm256_unpackhi_epi64 (a, b);
}
static inline vec256 vec256_or_reduce(vec256 * a)
{
int i;
vec256 ret;
ret = a[0];
for (i = 1; i < GFBITS; i++)
ret = vec256_or(ret, a[i]);
return ret;
}
static inline void vec256_copy(vec256 *dest, vec256 *src)
{
int i;
for (i = 0; i < GFBITS; i++)
dest[i] = src[i];
}
extern void vec256_mul_asm(vec256 *, vec256 *, const vec256 *);
/* bitsliced field multiplications */
static inline void vec256_mul(vec256 *h, vec256 *f, const vec256 *g)
{
vec256_mul_asm(h, f, g);
}
void vec256_sq(vec256 *, vec256 *);
void vec256_inv(vec256 *, vec256 *);
extern void vec256_maa_asm(vec256 *, vec256 *, const vec256 *);
extern void vec256_ama_asm(vec256 *, vec256 *, const vec256 *);
#endif