1 #ifndef OPENMM_VECTORIZE8_H_
2 #define OPENMM_VECTORIZE8_H_
35 #include "vectorize.h"
36 #include <immintrin.h>
51 fvec8(
float v1,
float v2,
float v3,
float v4,
float v5,
float v6,
float v7,
float v8) :
val(_mm256_set_ps(v8, v7, v6, v5, v4, v3, v2, v1)) {}
53 fvec8(
const float* v) :
val(_mm256_loadu_ps(v)) {}
54 operator __m256()
const {
58 return _mm256_castps256_ps128(
val);
61 return _mm256_extractf128_ps(
val, 1);
64 _mm256_storeu_ps(v,
val);
67 return _mm256_add_ps(
val, other);
70 return _mm256_sub_ps(
val, other);
73 return _mm256_mul_ps(
val, other);
76 return _mm256_div_ps(
val, other);
79 val = _mm256_add_ps(
val, other);
82 val = _mm256_sub_ps(
val, other);
85 val = _mm256_mul_ps(
val, other);
88 val = _mm256_div_ps(
val, other);
91 return _mm256_sub_ps(_mm256_set1_ps(0.0f),
val);
94 return _mm256_and_ps(
val, other);
97 return _mm256_or_ps(
val, other);
100 return _mm256_cmp_ps(
val, other, _CMP_EQ_OQ);
103 return _mm256_cmp_ps(
val, other, _CMP_NEQ_OQ);
106 return _mm256_cmp_ps(
val, other, _CMP_GT_OQ);
109 return _mm256_cmp_ps(
val, other, _CMP_LT_OQ);
112 return _mm256_cmp_ps(
val, other, _CMP_GE_OQ);
115 return _mm256_cmp_ps(
val, other, _CMP_LE_OQ);
117 operator ivec8()
const;
129 ivec8(
int v1,
int v2,
int v3,
int v4,
int v5,
int v6,
int v7,
int v8) :
val(_mm256_set_epi32(v8, v7, v6, v5, v4, v3, v2, v1)) {}
131 ivec8(
const int* v) :
val(_mm256_loadu_si256((const __m256i*) v)) {}
132 operator __m256i()
const {
136 return _mm256_castsi256_si128(
val);
139 return _mm256_extractf128_si256(
val, 1);
142 _mm256_storeu_si256((__m256i*) v,
val);
145 return _mm256_castps_si256(_mm256_and_ps(_mm256_castsi256_ps(
val), _mm256_castsi256_ps(other.
val)));
148 return _mm256_castps_si256(_mm256_or_ps(_mm256_castsi256_ps(
val), _mm256_castsi256_ps(other.
val)));
150 operator fvec8()
const;
155 inline fvec8::operator
ivec8()
const {
156 return _mm256_cvttps_epi32(val);
159 inline ivec8::operator
fvec8()
const {
160 return _mm256_cvtepi32_ps(val);
166 return fvec8(_mm256_round_ps(v.
val, 0x09));
170 return fvec8(_mm256_round_ps(v.
val, 0x0A));
174 return fvec8(_mm256_round_ps(v.
val, _MM_FROUND_TO_NEAREST_INT));
186 static const __m256 mask = _mm256_castsi256_ps(_mm256_set1_epi32(0x7FFFFFFF));
187 return fvec8(_mm256_and_ps(v.
val, mask));
191 return fvec8(_mm256_sqrt_ps(v.
val));
194 static inline float dot8(
const fvec8& v1,
const fvec8& v2) {
195 fvec8 result = _mm256_dp_ps(v1, v2, 0xF1);
199 static inline void transpose(
const fvec4& in1,
const fvec4& in2,
const fvec4& in3,
const fvec4& in4,
const fvec4& in5,
const fvec4& in6,
const fvec4& in7,
const fvec4& in8,
fvec8& out1,
fvec8& out2,
fvec8& out3,
fvec8& out4) {
200 fvec4 i1 = in1, i2 = in2, i3 = in3, i4 = in4;
201 fvec4 i5 = in5, i6 = in6, i7 = in7, i8 = in8;
202 _MM_TRANSPOSE4_PS(i1, i2, i3, i4);
203 _MM_TRANSPOSE4_PS(i5, i6, i7, i8);
206 out1 = _mm256_insertf128_ps(out1, i1, 0);
207 out2 = _mm256_insertf128_ps(out2, i2, 0);
208 out3 = _mm256_insertf128_ps(out3, i3, 0);
209 out4 = _mm256_insertf128_ps(out4, i4, 0);
211 out1 = _mm256_castps128_ps256(i1);
212 out2 = _mm256_castps128_ps256(i2);
213 out3 = _mm256_castps128_ps256(i3);
214 out4 = _mm256_castps128_ps256(i4);
216 out1 = _mm256_insertf128_ps(out1, i5, 1);
217 out2 = _mm256_insertf128_ps(out2, i6, 1);
218 out3 = _mm256_insertf128_ps(out3, i7, 1);
219 out4 = _mm256_insertf128_ps(out4, i8, 1);
222 static inline void transpose(
const fvec8& in1,
const fvec8& in2,
const fvec8& in3,
const fvec8& in4,
fvec4& out1,
fvec4& out2,
fvec4& out3,
fvec4& out4,
fvec4& out5,
fvec4& out6,
fvec4& out7,
fvec4& out8) {
227 _MM_TRANSPOSE4_PS(out1, out2, out3, out4);
232 _MM_TRANSPOSE4_PS(out5, out6, out7, out8);
237 static inline bool any(
const ivec8& v) {
238 return !_mm256_testz_si256(v, _mm256_set1_epi32(0xFFFFFFFF));
243 static inline fvec8 operator+(
float v1,
const fvec8& v2) {
247 static inline fvec8 operator-(
float v1,
const fvec8& v2) {
251 static inline fvec8 operator*(
float v1,
const fvec8& v2) {
255 static inline fvec8 operator/(
float v1,
const fvec8& v2) {
262 return fvec8(_mm256_blendv_ps(v1.
val, v2.
val, _mm256_castsi256_ps(mask.
val)));
fvec8 operator|(const fvec8 &other) const
Definition: vectorize8.h:96
fvec8 operator!=(const fvec8 &other) const
Definition: vectorize8.h:102
A four element vector of ints.
Definition: vectorize_neon.h:150
fvec8 operator*(const fvec8 &other) const
Definition: vectorize8.h:72
fvec8 operator/(const fvec8 &other) const
Definition: vectorize8.h:75
ivec4 lowerVec() const
Definition: vectorize8.h:135
void operator/=(const fvec8 &other)
Definition: vectorize8.h:87
fvec4 lowerVec() const
Definition: vectorize8.h:57
An eight element vector of ints.
Definition: vectorize8.h:123
fvec8()
Definition: vectorize8.h:49
fvec4 upperVec() const
Definition: vectorize8.h:60
void operator*=(const fvec8 &other)
Definition: vectorize8.h:84
fvec8 operator+(const fvec8 &other) const
Definition: vectorize8.h:66
fvec8 operator>(const fvec8 &other) const
Definition: vectorize8.h:105
fvec8(float v1, float v2, float v3, float v4, float v5, float v6, float v7, float v8)
Definition: vectorize8.h:51
A four element vector of floats.
Definition: vectorize_neon.h:56
void store(int *v) const
Definition: vectorize8.h:141
ivec8(int v)
Definition: vectorize8.h:128
ivec4 upperVec() const
Definition: vectorize8.h:138
ivec8(int v1, int v2, int v3, int v4, int v5, int v6, int v7, int v8)
Definition: vectorize8.h:129
fvec8(float v)
Definition: vectorize8.h:50
fvec8 operator<=(const fvec8 &other) const
Definition: vectorize8.h:114
An eight element vector of floats.
Definition: vectorize8.h:45
ivec8 operator&(const ivec8 &other) const
Definition: vectorize8.h:144
fvec8 operator-(const fvec8 &other) const
Definition: vectorize8.h:69
fvec8 operator>=(const fvec8 &other) const
Definition: vectorize8.h:111
__m256i val
Definition: vectorize8.h:125
__m256 val
Definition: vectorize8.h:47
fvec8 operator==(const fvec8 &other) const
Definition: vectorize8.h:99
ivec8 operator|(const ivec8 &other) const
Definition: vectorize8.h:147
ivec8(__m256i v)
Definition: vectorize8.h:130
void operator+=(const fvec8 &other)
Definition: vectorize8.h:78
ivec8()
Definition: vectorize8.h:127
fvec8 operator&(const fvec8 &other) const
Definition: vectorize8.h:93
void operator-=(const fvec8 &other)
Definition: vectorize8.h:81
fvec8 operator-() const
Definition: vectorize8.h:90
ivec8(const int *v)
Definition: vectorize8.h:131
void store(float *v) const
Definition: vectorize8.h:63
fvec8(const float *v)
Definition: vectorize8.h:53
fvec8(__m256 v)
Definition: vectorize8.h:52
fvec8 operator<(const fvec8 &other) const
Definition: vectorize8.h:108