1 #ifndef OPENMM_VECTORIZE_SSE_H_
2 #define OPENMM_VECTORIZE_SSE_H_
35 #include <smmintrin.h>
43 static bool isVec4Supported() {
46 if (cpuInfo[0] >= 1) {
48 return ((cpuInfo[2] & ((
int) 1 << 19)) != 0);
64 fvec4(
float v1,
float v2,
float v3,
float v4) :
val(_mm_set_ps(v4, v3, v2, v1)) {}
66 fvec4(
const float* v) :
val(_mm_loadu_ps(v)) {}
67 operator __m128()
const {
76 _mm_storeu_ps(v,
val);
79 return _mm_add_ps(
val, other);
82 return _mm_sub_ps(
val, other);
85 return _mm_mul_ps(
val, other);
88 return _mm_div_ps(
val, other);
91 val = _mm_add_ps(
val, other);
94 val = _mm_sub_ps(
val, other);
97 val = _mm_mul_ps(
val, other);
100 val = _mm_div_ps(
val, other);
103 return _mm_sub_ps(_mm_set1_ps(0.0f),
val);
106 return _mm_and_ps(
val, other);
109 return _mm_or_ps(
val, other);
112 return _mm_cmpeq_ps(
val, other);
115 return _mm_cmpneq_ps(
val, other);
118 return _mm_cmpgt_ps(
val, other);
121 return _mm_cmplt_ps(
val, other);
124 return _mm_cmpge_ps(
val, other);
127 return _mm_cmple_ps(
val, other);
129 operator ivec4()
const;
141 ivec4(
int v1,
int v2,
int v3,
int v4) :
val(_mm_set_epi32(v4, v3, v2, v1)) {}
143 ivec4(
const int* v) :
val(_mm_loadu_si128((const __m128i*) v)) {}
144 operator __m128i()
const {
153 _mm_storeu_si128((__m128i*) v,
val);
156 return _mm_add_epi32(
val, other);
159 return _mm_sub_epi32(
val, other);
162 return _mm_mullo_epi32(
val, other);
165 val = _mm_add_epi32(
val, other);
168 val = _mm_sub_epi32(
val, other);
171 val = _mm_mullo_epi32(
val, other);
174 return _mm_sub_epi32(_mm_set1_epi32(0),
val);
177 return _mm_and_si128(
val, other);
180 return _mm_or_si128(
val, other);
183 return _mm_cmpeq_epi32(
val, other);
186 return _mm_xor_si128(*
this==other, _mm_set1_epi32(0xFFFFFFFF));
189 return _mm_cmpgt_epi32(
val, other);
192 return _mm_cmplt_epi32(
val, other);
195 return _mm_xor_si128(_mm_cmplt_epi32(
val, other), _mm_set1_epi32(0xFFFFFFFF));
198 return _mm_xor_si128(_mm_cmpgt_epi32(
val, other), _mm_set1_epi32(0xFFFFFFFF));
200 operator fvec4()
const;
205 inline fvec4::operator
ivec4()
const {
206 return _mm_cvttps_epi32(val);
209 inline ivec4::operator
fvec4()
const {
210 return _mm_cvtepi32_ps(val);
224 return fvec4(_mm_round_ps(v.
val, _MM_FROUND_TO_NEAREST_INT));
236 static const __m128 mask = _mm_castsi128_ps(_mm_set1_epi32(0x7FFFFFFF));
237 return fvec4(_mm_and_ps(v.
val, mask));
244 static inline float dot3(
const fvec4& v1,
const fvec4& v2) {
245 return _mm_cvtss_f32(_mm_dp_ps(v1, v2, 0x71));
248 static inline float dot4(
const fvec4& v1,
const fvec4& v2) {
249 return _mm_cvtss_f32(_mm_dp_ps(v1, v2, 0xF1));
253 _MM_TRANSPOSE4_PS(v1, v2, v3, v4);
270 static inline bool any(
const ivec4& v) {
271 return !_mm_test_all_zeros(v, _mm_set1_epi32(0xFFFFFFFF));
276 static inline fvec4 operator+(
float v1,
const fvec4& v2) {
280 static inline fvec4 operator-(
float v1,
const fvec4& v2) {
284 static inline fvec4 operator*(
float v1,
const fvec4& v2) {
288 static inline fvec4 operator/(
float v1,
const fvec4& v2) {
295 return fvec4(_mm_blendv_ps(v1.
val, v2.
val, _mm_castsi128_ps(mask.
val)));
fvec4 operator>=(const fvec4 &other) const
Definition: vectorize_sse.h:123
fvec4 operator+(const fvec4 &other) const
Definition: vectorize_sse.h:78
A four element vector of ints.
Definition: vectorize_neon.h:150
ivec4 operator==(const ivec4 &other) const
Definition: vectorize_sse.h:182
int operator[](int i) const
Definition: vectorize_sse.h:147
fvec4 operator-(const fvec4 &other) const
Definition: vectorize_sse.h:81
float32x4_t val
Definition: vectorize_neon.h:58
float operator[](int i) const
Definition: vectorize_sse.h:70
ivec4 operator<(const ivec4 &other) const
Definition: vectorize_sse.h:191
fvec4 operator<(const fvec4 &other) const
Definition: vectorize_sse.h:120
fvec4(__m128 v)
Definition: vectorize_sse.h:65
void operator/=(const fvec4 &other)
Definition: vectorize_sse.h:99
void operator*=(const fvec4 &other)
Definition: vectorize_sse.h:96
fvec4(const float *v)
Definition: vectorize_sse.h:66
ivec4 operator-(const ivec4 &other) const
Definition: vectorize_sse.h:158
void store(int *v) const
Definition: vectorize_neon.h:179
ivec4 operator>=(const ivec4 &other) const
Definition: vectorize_sse.h:194
ivec4(__m128i v)
Definition: vectorize_sse.h:142
A four element vector of floats.
Definition: vectorize_neon.h:56
ivec4 operator!=(const ivec4 &other) const
Definition: vectorize_sse.h:185
ivec4(int v1, int v2, int v3, int v4)
Definition: vectorize_sse.h:141
ivec4(const int *v)
Definition: vectorize_sse.h:143
fvec4 operator==(const fvec4 &other) const
Definition: vectorize_sse.h:111
fvec4 operator<=(const fvec4 &other) const
Definition: vectorize_sse.h:126
void store(float *v) const
Definition: vectorize_neon.h:84
void operator-=(const ivec4 &other)
Definition: vectorize_sse.h:167
void operator+=(const ivec4 &other)
Definition: vectorize_sse.h:164
ivec4 operator+(const ivec4 &other) const
Definition: vectorize_sse.h:155
fvec4()
Definition: vectorize_sse.h:62
ivec4 operator-() const
Definition: vectorize_sse.h:173
fvec4 operator/(const fvec4 &other) const
Definition: vectorize_sse.h:87
ivec4 operator|(const ivec4 &other) const
Definition: vectorize_sse.h:179
fvec4 operator!=(const fvec4 &other) const
Definition: vectorize_sse.h:114
ivec4 operator>(const ivec4 &other) const
Definition: vectorize_sse.h:188
ivec4()
Definition: vectorize_sse.h:139
void operator-=(const fvec4 &other)
Definition: vectorize_sse.h:93
fvec4 operator&(const fvec4 &other) const
Definition: vectorize_sse.h:105
fvec4 operator*(const fvec4 &other) const
Definition: vectorize_sse.h:84
fvec4(float v1, float v2, float v3, float v4)
Definition: vectorize_sse.h:64
fvec4 operator|(const fvec4 &other) const
Definition: vectorize_sse.h:108
fvec4(float v)
Definition: vectorize_sse.h:63
fvec4 operator>(const fvec4 &other) const
Definition: vectorize_sse.h:117
fvec4 operator-() const
Definition: vectorize_sse.h:102
void operator+=(const fvec4 &other)
Definition: vectorize_sse.h:90
ivec4(int v)
Definition: vectorize_sse.h:140
ivec4 operator*(const ivec4 &other) const
Definition: vectorize_sse.h:161
ivec4 operator<=(const ivec4 &other) const
Definition: vectorize_sse.h:197
ivec4 operator&(const ivec4 &other) const
Definition: vectorize_sse.h:176
void operator*=(const ivec4 &other)
Definition: vectorize_sse.h:170
int32x4_t val
Definition: vectorize_neon.h:153