OpenMM
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends Pages
vectorize_sse.h
1 #ifndef OPENMM_VECTORIZE_SSE_H_
2 #define OPENMM_VECTORIZE_SSE_H_
3 
4 /* -------------------------------------------------------------------------- *
5  * OpenMM *
6  * -------------------------------------------------------------------------- *
7  * This is part of the OpenMM molecular simulation toolkit originating from *
8  * Simbios, the NIH National Center for Physics-Based Simulation of *
9  * Biological Structures at Stanford, funded under the NIH Roadmap for *
10  * Medical Research, grant U54 GM072970. See https://simtk.org. *
11  * *
12  * Portions copyright (c) 2013 Stanford University and the Authors. *
13  * Authors: Peter Eastman *
14  * Contributors: *
15  * *
16  * Permission is hereby granted, free of charge, to any person obtaining a *
17  * copy of this software and associated documentation files (the "Software"), *
18  * to deal in the Software without restriction, including without limitation *
19  * the rights to use, copy, modify, merge, publish, distribute, sublicense, *
20  * and/or sell copies of the Software, and to permit persons to whom the *
21  * Software is furnished to do so, subject to the following conditions: *
22  * *
23  * The above copyright notice and this permission notice shall be included in *
24  * all copies or substantial portions of the Software. *
25  * *
26  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
27  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
28  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
29  * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
30  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
31  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
32  * USE OR OTHER DEALINGS IN THE SOFTWARE. *
33  * -------------------------------------------------------------------------- */
34 
35 #include <smmintrin.h>
36 #include "hardware.h"
37 
38 // This file defines classes and functions to simplify vectorizing code with SSE.
39 
43 static bool isVec4Supported() {
44  int cpuInfo[4];
45  cpuid(cpuInfo, 0);
46  if (cpuInfo[0] >= 1) {
47  cpuid(cpuInfo, 1);
48  return ((cpuInfo[2] & ((int) 1 << 19)) != 0);
49  }
50  return false;
51 }
52 
53 class ivec4;
54 
58 class fvec4 {
59 public:
60  __m128 val;
61 
62  fvec4() {}
63  fvec4(float v) : val(_mm_set1_ps(v)) {}
64  fvec4(float v1, float v2, float v3, float v4) : val(_mm_set_ps(v4, v3, v2, v1)) {}
65  fvec4(__m128 v) : val(v) {}
66  fvec4(const float* v) : val(_mm_loadu_ps(v)) {}
67  operator __m128() const {
68  return val;
69  }
70  float operator[](int i) const {
71  float result[4];
72  store(result);
73  return result[i];
74  }
75  void store(float* v) const {
76  _mm_storeu_ps(v, val);
77  }
78  fvec4 operator+(const fvec4& other) const {
79  return _mm_add_ps(val, other);
80  }
81  fvec4 operator-(const fvec4& other) const {
82  return _mm_sub_ps(val, other);
83  }
84  fvec4 operator*(const fvec4& other) const {
85  return _mm_mul_ps(val, other);
86  }
87  fvec4 operator/(const fvec4& other) const {
88  return _mm_div_ps(val, other);
89  }
90  void operator+=(const fvec4& other) {
91  val = _mm_add_ps(val, other);
92  }
93  void operator-=(const fvec4& other) {
94  val = _mm_sub_ps(val, other);
95  }
96  void operator*=(const fvec4& other) {
97  val = _mm_mul_ps(val, other);
98  }
99  void operator/=(const fvec4& other) {
100  val = _mm_div_ps(val, other);
101  }
102  fvec4 operator-() const {
103  return _mm_sub_ps(_mm_set1_ps(0.0f), val);
104  }
105  fvec4 operator&(const fvec4& other) const {
106  return _mm_and_ps(val, other);
107  }
108  fvec4 operator|(const fvec4& other) const {
109  return _mm_or_ps(val, other);
110  }
111  fvec4 operator==(const fvec4& other) const {
112  return _mm_cmpeq_ps(val, other);
113  }
114  fvec4 operator!=(const fvec4& other) const {
115  return _mm_cmpneq_ps(val, other);
116  }
117  fvec4 operator>(const fvec4& other) const {
118  return _mm_cmpgt_ps(val, other);
119  }
120  fvec4 operator<(const fvec4& other) const {
121  return _mm_cmplt_ps(val, other);
122  }
123  fvec4 operator>=(const fvec4& other) const {
124  return _mm_cmpge_ps(val, other);
125  }
126  fvec4 operator<=(const fvec4& other) const {
127  return _mm_cmple_ps(val, other);
128  }
129  operator ivec4() const;
130 };
131 
135 class ivec4 {
136 public:
137  __m128i val;
138 
139  ivec4() {}
140  ivec4(int v) : val(_mm_set1_epi32(v)) {}
141  ivec4(int v1, int v2, int v3, int v4) : val(_mm_set_epi32(v4, v3, v2, v1)) {}
142  ivec4(__m128i v) : val(v) {}
143  ivec4(const int* v) : val(_mm_loadu_si128((const __m128i*) v)) {}
144  operator __m128i() const {
145  return val;
146  }
147  int operator[](int i) const {
148  int result[4];
149  store(result);
150  return result[i];
151  }
152  void store(int* v) const {
153  _mm_storeu_si128((__m128i*) v, val);
154  }
155  ivec4 operator+(const ivec4& other) const {
156  return _mm_add_epi32(val, other);
157  }
158  ivec4 operator-(const ivec4& other) const {
159  return _mm_sub_epi32(val, other);
160  }
161  ivec4 operator*(const ivec4& other) const {
162  return _mm_mullo_epi32(val, other);
163  }
164  void operator+=(const ivec4& other) {
165  val = _mm_add_epi32(val, other);
166  }
167  void operator-=(const ivec4& other) {
168  val = _mm_sub_epi32(val, other);
169  }
170  void operator*=(const ivec4& other) {
171  val = _mm_mullo_epi32(val, other);
172  }
173  ivec4 operator-() const {
174  return _mm_sub_epi32(_mm_set1_epi32(0), val);
175  }
176  ivec4 operator&(const ivec4& other) const {
177  return _mm_and_si128(val, other);
178  }
179  ivec4 operator|(const ivec4& other) const {
180  return _mm_or_si128(val, other);
181  }
182  ivec4 operator==(const ivec4& other) const {
183  return _mm_cmpeq_epi32(val, other);
184  }
185  ivec4 operator!=(const ivec4& other) const {
186  return _mm_xor_si128(*this==other, _mm_set1_epi32(0xFFFFFFFF));
187  }
188  ivec4 operator>(const ivec4& other) const {
189  return _mm_cmpgt_epi32(val, other);
190  }
191  ivec4 operator<(const ivec4& other) const {
192  return _mm_cmplt_epi32(val, other);
193  }
194  ivec4 operator>=(const ivec4& other) const {
195  return _mm_xor_si128(_mm_cmplt_epi32(val, other), _mm_set1_epi32(0xFFFFFFFF));
196  }
197  ivec4 operator<=(const ivec4& other) const {
198  return _mm_xor_si128(_mm_cmpgt_epi32(val, other), _mm_set1_epi32(0xFFFFFFFF));
199  }
200  operator fvec4() const;
201 };
202 
203 // Conversion operators.
204 
205 inline fvec4::operator ivec4() const {
206  return _mm_cvttps_epi32(val);
207 }
208 
209 inline ivec4::operator fvec4() const {
210  return _mm_cvtepi32_ps(val);
211 }
212 
213 // Functions that operate on fvec4s.
214 
215 static inline fvec4 floor(const fvec4& v) {
216  return fvec4(_mm_floor_ps(v.val));
217 }
218 
219 static inline fvec4 ceil(const fvec4& v) {
220  return fvec4(_mm_ceil_ps(v.val));
221 }
222 
223 static inline fvec4 round(const fvec4& v) {
224  return fvec4(_mm_round_ps(v.val, _MM_FROUND_TO_NEAREST_INT));
225 }
226 
227 static inline fvec4 min(const fvec4& v1, const fvec4& v2) {
228  return fvec4(_mm_min_ps(v1.val, v2.val));
229 }
230 
231 static inline fvec4 max(const fvec4& v1, const fvec4& v2) {
232  return fvec4(_mm_max_ps(v1.val, v2.val));
233 }
234 
235 static inline fvec4 abs(const fvec4& v) {
236  static const __m128 mask = _mm_castsi128_ps(_mm_set1_epi32(0x7FFFFFFF));
237  return fvec4(_mm_and_ps(v.val, mask));
238 }
239 
240 static inline fvec4 sqrt(const fvec4& v) {
241  return fvec4(_mm_sqrt_ps(v.val));
242 }
243 
244 static inline float dot3(const fvec4& v1, const fvec4& v2) {
245  return _mm_cvtss_f32(_mm_dp_ps(v1, v2, 0x71));
246 }
247 
248 static inline float dot4(const fvec4& v1, const fvec4& v2) {
249  return _mm_cvtss_f32(_mm_dp_ps(v1, v2, 0xF1));
250 }
251 
252 static inline void transpose(fvec4& v1, fvec4& v2, fvec4& v3, fvec4& v4) {
253  _MM_TRANSPOSE4_PS(v1, v2, v3, v4);
254 }
255 
256 // Functions that operate on ivec4s.
257 
258 static inline ivec4 min(const ivec4& v1, const ivec4& v2) {
259  return ivec4(_mm_min_epi32(v1.val, v2.val));
260 }
261 
262 static inline ivec4 max(const ivec4& v1, const ivec4& v2) {
263  return ivec4(_mm_max_epi32(v1.val, v2.val));
264 }
265 
266 static inline ivec4 abs(const ivec4& v) {
267  return ivec4(_mm_abs_epi32(v.val));
268 }
269 
270 static inline bool any(const ivec4& v) {
271  return !_mm_test_all_zeros(v, _mm_set1_epi32(0xFFFFFFFF));
272 }
273 
274 // Mathematical operators involving a scalar and a vector.
275 
276 static inline fvec4 operator+(float v1, const fvec4& v2) {
277  return fvec4(v1)+v2;
278 }
279 
280 static inline fvec4 operator-(float v1, const fvec4& v2) {
281  return fvec4(v1)-v2;
282 }
283 
284 static inline fvec4 operator*(float v1, const fvec4& v2) {
285  return fvec4(v1)*v2;
286 }
287 
288 static inline fvec4 operator/(float v1, const fvec4& v2) {
289  return fvec4(v1)/v2;
290 }
291 
292 // Operations for blending fvec4s based on an ivec4.
293 
294 static inline fvec4 blend(const fvec4& v1, const fvec4& v2, const ivec4& mask) {
295  return fvec4(_mm_blendv_ps(v1.val, v2.val, _mm_castsi128_ps(mask.val)));
296 }
297 
298 #endif /*OPENMM_VECTORIZE_SSE_H_*/
299 
fvec4 operator>=(const fvec4 &other) const
Definition: vectorize_sse.h:123
fvec4 operator+(const fvec4 &other) const
Definition: vectorize_sse.h:78
A four element vector of ints.
Definition: vectorize_neon.h:150
ivec4 operator==(const ivec4 &other) const
Definition: vectorize_sse.h:182
int operator[](int i) const
Definition: vectorize_sse.h:147
fvec4 operator-(const fvec4 &other) const
Definition: vectorize_sse.h:81
float32x4_t val
Definition: vectorize_neon.h:58
float operator[](int i) const
Definition: vectorize_sse.h:70
ivec4 operator<(const ivec4 &other) const
Definition: vectorize_sse.h:191
fvec4 operator<(const fvec4 &other) const
Definition: vectorize_sse.h:120
fvec4(__m128 v)
Definition: vectorize_sse.h:65
void operator/=(const fvec4 &other)
Definition: vectorize_sse.h:99
void operator*=(const fvec4 &other)
Definition: vectorize_sse.h:96
fvec4(const float *v)
Definition: vectorize_sse.h:66
ivec4 operator-(const ivec4 &other) const
Definition: vectorize_sse.h:158
void store(int *v) const
Definition: vectorize_neon.h:179
ivec4 operator>=(const ivec4 &other) const
Definition: vectorize_sse.h:194
ivec4(__m128i v)
Definition: vectorize_sse.h:142
A four element vector of floats.
Definition: vectorize_neon.h:56
ivec4 operator!=(const ivec4 &other) const
Definition: vectorize_sse.h:185
ivec4(int v1, int v2, int v3, int v4)
Definition: vectorize_sse.h:141
ivec4(const int *v)
Definition: vectorize_sse.h:143
fvec4 operator==(const fvec4 &other) const
Definition: vectorize_sse.h:111
fvec4 operator<=(const fvec4 &other) const
Definition: vectorize_sse.h:126
void store(float *v) const
Definition: vectorize_neon.h:84
void operator-=(const ivec4 &other)
Definition: vectorize_sse.h:167
void operator+=(const ivec4 &other)
Definition: vectorize_sse.h:164
ivec4 operator+(const ivec4 &other) const
Definition: vectorize_sse.h:155
fvec4()
Definition: vectorize_sse.h:62
ivec4 operator-() const
Definition: vectorize_sse.h:173
fvec4 operator/(const fvec4 &other) const
Definition: vectorize_sse.h:87
ivec4 operator|(const ivec4 &other) const
Definition: vectorize_sse.h:179
fvec4 operator!=(const fvec4 &other) const
Definition: vectorize_sse.h:114
ivec4 operator>(const ivec4 &other) const
Definition: vectorize_sse.h:188
ivec4()
Definition: vectorize_sse.h:139
void operator-=(const fvec4 &other)
Definition: vectorize_sse.h:93
fvec4 operator&(const fvec4 &other) const
Definition: vectorize_sse.h:105
fvec4 operator*(const fvec4 &other) const
Definition: vectorize_sse.h:84
fvec4(float v1, float v2, float v3, float v4)
Definition: vectorize_sse.h:64
fvec4 operator|(const fvec4 &other) const
Definition: vectorize_sse.h:108
fvec4(float v)
Definition: vectorize_sse.h:63
fvec4 operator>(const fvec4 &other) const
Definition: vectorize_sse.h:117
fvec4 operator-() const
Definition: vectorize_sse.h:102
void operator+=(const fvec4 &other)
Definition: vectorize_sse.h:90
ivec4(int v)
Definition: vectorize_sse.h:140
ivec4 operator*(const ivec4 &other) const
Definition: vectorize_sse.h:161
ivec4 operator<=(const ivec4 &other) const
Definition: vectorize_sse.h:197
ivec4 operator&(const ivec4 &other) const
Definition: vectorize_sse.h:176
void operator*=(const ivec4 &other)
Definition: vectorize_sse.h:170
int32x4_t val
Definition: vectorize_neon.h:153