OpenMM
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
vectorize.h
1 #ifndef OPENMM_VECTORIZE_H_
2 #define OPENMM_VECTORIZE_H_
3 
4 /* -------------------------------------------------------------------------- *
5  * OpenMM *
6  * -------------------------------------------------------------------------- *
7  * This is part of the OpenMM molecular simulation toolkit originating from *
8  * Simbios, the NIH National Center for Physics-Based Simulation of *
9  * Biological Structures at Stanford, funded under the NIH Roadmap for *
10  * Medical Research, grant U54 GM072970. See https://simtk.org. *
11  * *
12  * Portions copyright (c) 2013 Stanford University and the Authors. *
13  * Authors: Peter Eastman *
14  * Contributors: *
15  * *
16  * Permission is hereby granted, free of charge, to any person obtaining a *
17  * copy of this software and associated documentation files (the "Software"), *
18  * to deal in the Software without restriction, including without limitation *
19  * the rights to use, copy, modify, merge, publish, distribute, sublicense, *
20  * and/or sell copies of the Software, and to permit persons to whom the *
21  * Software is furnished to do so, subject to the following conditions: *
22  * *
23  * The above copyright notice and this permission notice shall be included in *
24  * all copies or substantial portions of the Software. *
25  * *
26  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
27  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
28  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
29  * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
30  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
31  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
32  * USE OR OTHER DEALINGS IN THE SOFTWARE. *
33  * -------------------------------------------------------------------------- */
34 
35 #include <smmintrin.h>
36 
37 
38 // This file defines classes and functions to simplify vectorizing code with SSE.
39 
40 class ivec4;
41 
45 class fvec4 {
46 public:
47  __m128 val;
48 
49  fvec4() {}
50  fvec4(float v) : val(_mm_set1_ps(v)) {}
51  fvec4(float v1, float v2, float v3, float v4) : val(_mm_set_ps(v4, v3, v2, v1)) {}
52  fvec4(__m128 v) : val(v) {}
53  fvec4(const float* v) : val(_mm_loadu_ps(v)) {}
54  operator __m128() const {
55  return val;
56  }
57  float operator[](int i) const {
58  float result[4];
59  store(result);
60  return result[i];
61  }
62  void store(float* v) const {
63  _mm_storeu_ps(v, val);
64  }
65  fvec4 operator+(const fvec4& other) const {
66  return _mm_add_ps(val, other);
67  }
68  fvec4 operator-(const fvec4& other) const {
69  return _mm_sub_ps(val, other);
70  }
71  fvec4 operator*(const fvec4& other) const {
72  return _mm_mul_ps(val, other);
73  }
74  fvec4 operator/(const fvec4& other) const {
75  return _mm_div_ps(val, other);
76  }
77  void operator+=(const fvec4& other) {
78  val = _mm_add_ps(val, other);
79  }
80  void operator-=(const fvec4& other) {
81  val = _mm_sub_ps(val, other);
82  }
83  void operator*=(const fvec4& other) {
84  val = _mm_mul_ps(val, other);
85  }
86  void operator/=(const fvec4& other) {
87  val = _mm_div_ps(val, other);
88  }
89  fvec4 operator-() const {
90  return _mm_sub_ps(_mm_set1_ps(0.0f), val);
91  }
92  fvec4 operator&(const fvec4& other) const {
93  return _mm_and_ps(val, other);
94  }
95  fvec4 operator|(const fvec4& other) const {
96  return _mm_or_ps(val, other);
97  }
98  fvec4 operator==(const fvec4& other) const {
99  return _mm_cmpeq_ps(val, other);
100  }
101  fvec4 operator!=(const fvec4& other) const {
102  return _mm_cmpneq_ps(val, other);
103  }
104  fvec4 operator>(const fvec4& other) const {
105  return _mm_cmpgt_ps(val, other);
106  }
107  fvec4 operator<(const fvec4& other) const {
108  return _mm_cmplt_ps(val, other);
109  }
110  fvec4 operator>=(const fvec4& other) const {
111  return _mm_cmpge_ps(val, other);
112  }
113  fvec4 operator<=(const fvec4& other) const {
114  return _mm_cmple_ps(val, other);
115  }
116  operator ivec4() const;
117 };
118 
122 class ivec4 {
123 public:
124  __m128i val;
125 
126  ivec4() {}
127  ivec4(int v) : val(_mm_set1_epi32(v)) {}
128  ivec4(int v1, int v2, int v3, int v4) : val(_mm_set_epi32(v4, v3, v2, v1)) {}
129  ivec4(__m128i v) : val(v) {}
130  ivec4(const int* v) : val(_mm_loadu_si128((const __m128i*) v)) {}
131  operator __m128i() const {
132  return val;
133  }
134  int operator[](int i) const {
135  int result[4];
136  store(result);
137  return result[i];
138  }
139  void store(int* v) const {
140  _mm_storeu_si128((__m128i*) v, val);
141  }
142  ivec4 operator+(const ivec4& other) const {
143  return _mm_add_epi32(val, other);
144  }
145  ivec4 operator-(const ivec4& other) const {
146  return _mm_sub_epi32(val, other);
147  }
148  ivec4 operator*(const ivec4& other) const {
149  return _mm_mul_epi32(val, other);
150  }
151  void operator+=(const ivec4& other) {
152  val = _mm_add_epi32(val, other);
153  }
154  void operator-=(const ivec4& other) {
155  val = _mm_sub_epi32(val, other);
156  }
157  void operator*=(const ivec4& other) {
158  val = _mm_mul_epi32(val, other);
159  }
160  ivec4 operator-() const {
161  return _mm_sub_epi32(_mm_set1_epi32(0), val);
162  }
163  ivec4 operator&(const ivec4& other) const {
164  return _mm_and_si128(val, other);
165  }
166  ivec4 operator|(const ivec4& other) const {
167  return _mm_or_si128(val, other);
168  }
169  ivec4 operator==(const ivec4& other) const {
170  return _mm_cmpeq_epi32(val, other);
171  }
172  ivec4 operator!=(const ivec4& other) const {
173  return _mm_xor_si128(*this==other, _mm_set1_epi32(0xFFFFFFFF));
174  }
175  ivec4 operator>(const ivec4& other) const {
176  return _mm_cmpgt_epi32(val, other);
177  }
178  ivec4 operator<(const ivec4& other) const {
179  return _mm_cmplt_epi32(val, other);
180  }
181  ivec4 operator>=(const ivec4& other) const {
182  return _mm_xor_si128(_mm_cmplt_epi32(val, other), _mm_set1_epi32(0xFFFFFFFF));
183  }
184  ivec4 operator<=(const ivec4& other) const {
185  return _mm_xor_si128(_mm_cmpgt_epi32(val, other), _mm_set1_epi32(0xFFFFFFFF));
186  }
187  operator fvec4() const;
188 };
189 
190 // Conversion operators.
191 
192 inline fvec4::operator ivec4() const {
193  return _mm_cvttps_epi32(val);
194 }
195 
196 inline ivec4::operator fvec4() const {
197  return _mm_cvtepi32_ps(val);
198 }
199 
200 // Functions that operate on fvec4s.
201 
202 static inline fvec4 floor(const fvec4& v) {
203  return fvec4(_mm_floor_ps(v.val));
204 }
205 
206 static inline fvec4 ceil(const fvec4& v) {
207  return fvec4(_mm_ceil_ps(v.val));
208 }
209 
210 static inline fvec4 round(const fvec4& v) {
211  return fvec4(_mm_round_ps(v.val, _MM_FROUND_TO_NEAREST_INT));
212 }
213 
214 static inline fvec4 min(const fvec4& v1, const fvec4& v2) {
215  return fvec4(_mm_min_ps(v1.val, v2.val));
216 }
217 
218 static inline fvec4 max(const fvec4& v1, const fvec4& v2) {
219  return fvec4(_mm_max_ps(v1.val, v2.val));
220 }
221 
222 static inline fvec4 abs(const fvec4& v) {
223  static const __m128 mask = _mm_castsi128_ps(_mm_set1_epi32(0x7FFFFFFF));
224  return fvec4(_mm_and_ps(v.val, mask));
225 }
226 
227 static inline fvec4 sqrt(const fvec4& v) {
228  return fvec4(_mm_sqrt_ps(v.val));
229 }
230 
231 static inline float dot3(const fvec4& v1, const fvec4& v2) {
232  return _mm_cvtss_f32(_mm_dp_ps(v1, v2, 0x71));
233 }
234 
235 static inline float dot4(const fvec4& v1, const fvec4& v2) {
236  return _mm_cvtss_f32(_mm_dp_ps(v1, v2, 0xF1));
237 }
238 
239 static inline void transpose(fvec4& v1, fvec4& v2, fvec4& v3, fvec4& v4) {
240  _MM_TRANSPOSE4_PS(v1, v2, v3, v4);
241 }
242 
243 // Functions that operate on ivec4s.
244 
245 static inline ivec4 min(const ivec4& v1, const ivec4& v2) {
246  return ivec4(_mm_min_epi32(v1.val, v2.val));
247 }
248 
249 static inline ivec4 max(const ivec4& v1, const ivec4& v2) {
250  return ivec4(_mm_max_epi32(v1.val, v2.val));
251 }
252 
253 static inline ivec4 abs(const ivec4& v) {
254  return ivec4(_mm_abs_epi32(v.val));
255 }
256 
257 static inline bool any(const ivec4& v) {
258  return !_mm_test_all_zeros(v, _mm_set1_epi32(0xFFFFFFFF));
259 }
260 
261 // Mathematical operators involving a scalar and a vector.
262 
263 static inline fvec4 operator+(float v1, const fvec4& v2) {
264  return fvec4(v1)+v2;
265 }
266 
267 static inline fvec4 operator-(float v1, const fvec4& v2) {
268  return fvec4(v1)-v2;
269 }
270 
271 static inline fvec4 operator*(float v1, const fvec4& v2) {
272  return fvec4(v1)*v2;
273 }
274 
275 static inline fvec4 operator/(float v1, const fvec4& v2) {
276  return fvec4(v1)/v2;
277 }
278 
279 // Operations for blending fvec4s based on an ivec4.
280 
281 static inline fvec4 blend(const fvec4& v1, const fvec4& v2, const ivec4& mask) {
282  return fvec4(_mm_blendv_ps(v1.val, v2.val, _mm_castsi128_ps(mask.val)));
283 }
284 
285 #endif /*OPENMM_VECTORIZE_H_*/
286 
fvec4 operator>=(const fvec4 &other) const
Definition: vectorize.h:110
fvec4 operator+(const fvec4 &other) const
Definition: vectorize.h:65
A four element vector of ints.
Definition: vectorize.h:122
ivec4 operator==(const ivec4 &other) const
Definition: vectorize.h:169
int operator[](int i) const
Definition: vectorize.h:134
fvec4 operator-(const fvec4 &other) const
Definition: vectorize.h:68
__m128 val
Definition: vectorize.h:47
float operator[](int i) const
Definition: vectorize.h:57
ivec4 operator<(const ivec4 &other) const
Definition: vectorize.h:178
fvec4 operator<(const fvec4 &other) const
Definition: vectorize.h:107
fvec4(__m128 v)
Definition: vectorize.h:52
void operator/=(const fvec4 &other)
Definition: vectorize.h:86
void operator*=(const fvec4 &other)
Definition: vectorize.h:83
fvec4(const float *v)
Definition: vectorize.h:53
ivec4 operator-(const ivec4 &other) const
Definition: vectorize.h:145
void store(int *v) const
Definition: vectorize.h:139
ivec4 operator>=(const ivec4 &other) const
Definition: vectorize.h:181
ivec4(__m128i v)
Definition: vectorize.h:129
A four element vector of floats.
Definition: vectorize.h:45
ivec4 operator!=(const ivec4 &other) const
Definition: vectorize.h:172
ivec4(int v1, int v2, int v3, int v4)
Definition: vectorize.h:128
ivec4(const int *v)
Definition: vectorize.h:130
fvec4 operator==(const fvec4 &other) const
Definition: vectorize.h:98
fvec4 operator<=(const fvec4 &other) const
Definition: vectorize.h:113
void store(float *v) const
Definition: vectorize.h:62
void operator-=(const ivec4 &other)
Definition: vectorize.h:154
void operator+=(const ivec4 &other)
Definition: vectorize.h:151
ivec4 operator+(const ivec4 &other) const
Definition: vectorize.h:142
fvec4()
Definition: vectorize.h:49
ivec4 operator-() const
Definition: vectorize.h:160
fvec4 operator/(const fvec4 &other) const
Definition: vectorize.h:74
ivec4 operator|(const ivec4 &other) const
Definition: vectorize.h:166
fvec4 operator!=(const fvec4 &other) const
Definition: vectorize.h:101
ivec4 operator>(const ivec4 &other) const
Definition: vectorize.h:175
ivec4()
Definition: vectorize.h:126
void operator-=(const fvec4 &other)
Definition: vectorize.h:80
fvec4 operator&(const fvec4 &other) const
Definition: vectorize.h:92
fvec4 operator*(const fvec4 &other) const
Definition: vectorize.h:71
fvec4(float v1, float v2, float v3, float v4)
Definition: vectorize.h:51
fvec4 operator|(const fvec4 &other) const
Definition: vectorize.h:95
fvec4(float v)
Definition: vectorize.h:50
fvec4 operator>(const fvec4 &other) const
Definition: vectorize.h:104
fvec4 operator-() const
Definition: vectorize.h:89
void operator+=(const fvec4 &other)
Definition: vectorize.h:77
ivec4(int v)
Definition: vectorize.h:127
ivec4 operator*(const ivec4 &other) const
Definition: vectorize.h:148
ivec4 operator<=(const ivec4 &other) const
Definition: vectorize.h:184
ivec4 operator&(const ivec4 &other) const
Definition: vectorize.h:163
void operator*=(const ivec4 &other)
Definition: vectorize.h:157
__m128i val
Definition: vectorize.h:124