1 #ifndef OPENMM_VECTORIZE_H_
2 #define OPENMM_VECTORIZE_H_
35 #include <smmintrin.h>
51 fvec4(
float v1,
float v2,
float v3,
float v4) :
val(_mm_set_ps(v4, v3, v2, v1)) {}
53 fvec4(
const float* v) :
val(_mm_loadu_ps(v)) {}
54 operator __m128()
const {
63 _mm_storeu_ps(v,
val);
66 return _mm_add_ps(
val, other);
69 return _mm_sub_ps(
val, other);
72 return _mm_mul_ps(
val, other);
75 return _mm_div_ps(
val, other);
78 val = _mm_add_ps(
val, other);
81 val = _mm_sub_ps(
val, other);
84 val = _mm_mul_ps(
val, other);
87 val = _mm_div_ps(
val, other);
90 return _mm_sub_ps(_mm_set1_ps(0.0f),
val);
93 return _mm_and_ps(
val, other);
96 return _mm_or_ps(
val, other);
99 return _mm_cmpeq_ps(
val, other);
102 return _mm_cmpneq_ps(
val, other);
105 return _mm_cmpgt_ps(
val, other);
108 return _mm_cmplt_ps(
val, other);
111 return _mm_cmpge_ps(
val, other);
114 return _mm_cmple_ps(
val, other);
116 operator ivec4()
const;
128 ivec4(
int v1,
int v2,
int v3,
int v4) :
val(_mm_set_epi32(v4, v3, v2, v1)) {}
130 ivec4(
const int* v) :
val(_mm_loadu_si128((const __m128i*) v)) {}
131 operator __m128i()
const {
140 _mm_storeu_si128((__m128i*) v,
val);
143 return _mm_add_epi32(
val, other);
146 return _mm_sub_epi32(
val, other);
149 return _mm_mul_epi32(
val, other);
152 val = _mm_add_epi32(
val, other);
155 val = _mm_sub_epi32(
val, other);
158 val = _mm_mul_epi32(
val, other);
161 return _mm_sub_epi32(_mm_set1_epi32(0),
val);
164 return _mm_and_si128(
val, other);
167 return _mm_or_si128(
val, other);
170 return _mm_cmpeq_epi32(
val, other);
173 return _mm_xor_si128(*
this==other, _mm_set1_epi32(0xFFFFFFFF));
176 return _mm_cmpgt_epi32(
val, other);
179 return _mm_cmplt_epi32(
val, other);
182 return _mm_xor_si128(_mm_cmplt_epi32(
val, other), _mm_set1_epi32(0xFFFFFFFF));
185 return _mm_xor_si128(_mm_cmpgt_epi32(
val, other), _mm_set1_epi32(0xFFFFFFFF));
187 operator fvec4()
const;
192 inline fvec4::operator
ivec4()
const {
193 return _mm_cvttps_epi32(val);
196 inline ivec4::operator
fvec4()
const {
197 return _mm_cvtepi32_ps(val);
211 return fvec4(_mm_round_ps(v.
val, _MM_FROUND_TO_NEAREST_INT));
223 static const __m128 mask = _mm_castsi128_ps(_mm_set1_epi32(0x7FFFFFFF));
224 return fvec4(_mm_and_ps(v.
val, mask));
231 static inline float dot3(
const fvec4& v1,
const fvec4& v2) {
232 return _mm_cvtss_f32(_mm_dp_ps(v1, v2, 0x71));
235 static inline float dot4(
const fvec4& v1,
const fvec4& v2) {
236 return _mm_cvtss_f32(_mm_dp_ps(v1, v2, 0xF1));
240 _MM_TRANSPOSE4_PS(v1, v2, v3, v4);
257 static inline bool any(
const ivec4& v) {
258 return !_mm_test_all_zeros(v, _mm_set1_epi32(0xFFFFFFFF));
263 static inline fvec4 operator+(
float v1,
const fvec4& v2) {
267 static inline fvec4 operator-(
float v1,
const fvec4& v2) {
271 static inline fvec4 operator*(
float v1,
const fvec4& v2) {
275 static inline fvec4 operator/(
float v1,
const fvec4& v2) {
282 return fvec4(_mm_blendv_ps(v1.
val, v2.
val, _mm_castsi128_ps(mask.
val)));
fvec4 operator>=(const fvec4 &other) const
Definition: vectorize.h:110
fvec4 operator+(const fvec4 &other) const
Definition: vectorize.h:65
A four element vector of ints.
Definition: vectorize.h:122
ivec4 operator==(const ivec4 &other) const
Definition: vectorize.h:169
int operator[](int i) const
Definition: vectorize.h:134
fvec4 operator-(const fvec4 &other) const
Definition: vectorize.h:68
__m128 val
Definition: vectorize.h:47
float operator[](int i) const
Definition: vectorize.h:57
ivec4 operator<(const ivec4 &other) const
Definition: vectorize.h:178
fvec4 operator<(const fvec4 &other) const
Definition: vectorize.h:107
fvec4(__m128 v)
Definition: vectorize.h:52
void operator/=(const fvec4 &other)
Definition: vectorize.h:86
void operator*=(const fvec4 &other)
Definition: vectorize.h:83
fvec4(const float *v)
Definition: vectorize.h:53
ivec4 operator-(const ivec4 &other) const
Definition: vectorize.h:145
void store(int *v) const
Definition: vectorize.h:139
ivec4 operator>=(const ivec4 &other) const
Definition: vectorize.h:181
ivec4(__m128i v)
Definition: vectorize.h:129
A four element vector of floats.
Definition: vectorize.h:45
ivec4 operator!=(const ivec4 &other) const
Definition: vectorize.h:172
ivec4(int v1, int v2, int v3, int v4)
Definition: vectorize.h:128
ivec4(const int *v)
Definition: vectorize.h:130
fvec4 operator==(const fvec4 &other) const
Definition: vectorize.h:98
fvec4 operator<=(const fvec4 &other) const
Definition: vectorize.h:113
void store(float *v) const
Definition: vectorize.h:62
void operator-=(const ivec4 &other)
Definition: vectorize.h:154
void operator+=(const ivec4 &other)
Definition: vectorize.h:151
ivec4 operator+(const ivec4 &other) const
Definition: vectorize.h:142
fvec4()
Definition: vectorize.h:49
ivec4 operator-() const
Definition: vectorize.h:160
fvec4 operator/(const fvec4 &other) const
Definition: vectorize.h:74
ivec4 operator|(const ivec4 &other) const
Definition: vectorize.h:166
fvec4 operator!=(const fvec4 &other) const
Definition: vectorize.h:101
ivec4 operator>(const ivec4 &other) const
Definition: vectorize.h:175
ivec4()
Definition: vectorize.h:126
void operator-=(const fvec4 &other)
Definition: vectorize.h:80
fvec4 operator&(const fvec4 &other) const
Definition: vectorize.h:92
fvec4 operator*(const fvec4 &other) const
Definition: vectorize.h:71
fvec4(float v1, float v2, float v3, float v4)
Definition: vectorize.h:51
fvec4 operator|(const fvec4 &other) const
Definition: vectorize.h:95
fvec4(float v)
Definition: vectorize.h:50
fvec4 operator>(const fvec4 &other) const
Definition: vectorize.h:104
fvec4 operator-() const
Definition: vectorize.h:89
void operator+=(const fvec4 &other)
Definition: vectorize.h:77
ivec4(int v)
Definition: vectorize.h:127
ivec4 operator*(const ivec4 &other) const
Definition: vectorize.h:148
ivec4 operator<=(const ivec4 &other) const
Definition: vectorize.h:184
ivec4 operator&(const ivec4 &other) const
Definition: vectorize.h:163
void operator*=(const ivec4 &other)
Definition: vectorize.h:157
__m128i val
Definition: vectorize.h:124