OpenMM
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends Pages
vectorize_pnacl.h
1 #ifndef OPENMM_VECTORIZE_PNACL_H_
2 #define OPENMM_VECTORIZE_PNACL_H_
3 
4 /* -------------------------------------------------------------------------- *
5  * OpenMM *
6  * -------------------------------------------------------------------------- *
7  * This is part of the OpenMM molecular simulation toolkit originating from *
8  * Simbios, the NIH National Center for Physics-Based Simulation of *
9  * Biological Structures at Stanford, funded under the NIH Roadmap for *
10  * Medical Research, grant U54 GM072970. See https://simtk.org. *
11  * *
12  * Portions copyright (c) 2013-2014 Stanford University and the Authors. *
13  * Authors: Peter Eastman *
14  * Contributors: *
15  * *
16  * Permission is hereby granted, free of charge, to any person obtaining a *
17  * copy of this software and associated documentation files (the "Software"), *
18  * to deal in the Software without restriction, including without limitation *
19  * the rights to use, copy, modify, merge, publish, distribute, sublicense, *
20  * and/or sell copies of the Software, and to permit persons to whom the *
21  * Software is furnished to do so, subject to the following conditions: *
22  * *
23  * The above copyright notice and this permission notice shall be included in *
24  * all copies or substantial portions of the Software. *
25  * *
26  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
27  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
28  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
29  * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
30  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
31  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
32  * USE OR OTHER DEALINGS IN THE SOFTWARE. *
33  * -------------------------------------------------------------------------- */
34 
35 #include <cmath>
36 #include <cstdlib>
37 
38 // This file defines classes and functions to simplify vectorizing code with portable SIMD vectors.
39 
43 static bool isVec4Supported() {
44  return true;
45 }
46 
47 typedef float __m128 __attribute__((vector_size(16), aligned(4)));
48 typedef int __m128i __attribute__((vector_size(16), aligned(4)));
49 
50 class ivec4;
51 
55 class fvec4 {
56 public:
57  __m128 val;
58 
59  fvec4() {}
60  fvec4(float v) {
61  val = {v, v, v, v};
62  }
63  fvec4(float v1, float v2, float v3, float v4) {
64  val = {v1, v2, v3, v4};
65  }
66  fvec4(__m128 v) : val(v) {}
67  fvec4(const float* v) {
68  val = *((__m128*) v);
69  }
70  operator __m128() const {
71  return val;
72  }
73  float operator[](int i) const {
74  return val[i];
75  }
76  void store(float* v) const {
77  *((__m128*) v) = val;
78  }
79  fvec4 operator+(const fvec4& other) const {
80  return val+other;
81  }
82  fvec4 operator-(const fvec4& other) const {
83  return val-other;
84  }
85  fvec4 operator*(const fvec4& other) const {
86  return val*other;
87  }
88  fvec4 operator/(const fvec4& other) const {
89  return val/other;
90  }
91  void operator+=(const fvec4& other) {
92  val = val+other;
93  }
94  void operator-=(const fvec4& other) {
95  val = val-other;
96  }
97  void operator*=(const fvec4& other) {
98  val = val*other;
99  }
100  void operator/=(const fvec4& other) {
101  val = val/other;
102  }
103  fvec4 operator-() const {
104  return -val;
105  }
106  fvec4 operator&(const fvec4& other) const {
107  return (fvec4) (((__m128i)val)&((__m128i)other.val));
108  }
109  fvec4 operator|(const fvec4& other) const {
110  return (fvec4) (((__m128i)val)|((__m128i)other.val));
111  }
112  fvec4 operator==(const fvec4& other) const {
113  return (val==other.val);
114  }
115  fvec4 operator!=(const fvec4& other) const {
116  return (val!=other.val);
117  }
118  fvec4 operator>(const fvec4& other) const {
119  return (val>other.val);
120  }
121  fvec4 operator<(const fvec4& other) const {
122  return (val<other.val);
123  }
124  fvec4 operator>=(const fvec4& other) const {
125  return (val>=other.val);
126  }
127  fvec4 operator<=(const fvec4& other) const {
128  return (val<=other.val);
129  }
130  operator ivec4() const;
131 };
132 
136 class ivec4 {
137 public:
138  __m128i val;
139 
140  ivec4() {}
141  ivec4(int v) {
142  val = {v, v, v, v};
143  }
144  ivec4(int v1, int v2, int v3, int v4) {
145  val = {v1, v2, v3, v4};
146  }
147  ivec4(__m128i v) : val(v) {}
148  ivec4(const int* v) {
149  val = *((__m128*) v);
150  }
151  operator __m128i() const {
152  return val;
153  }
154  int operator[](int i) const {
155  return val[i];
156  }
157  void store(int* v) const {
158  *((__m128*) v) = val;
159  }
160  ivec4 operator+(const ivec4& other) const {
161  return val+other;
162  }
163  ivec4 operator-(const ivec4& other) const {
164  return val-other;
165  }
166  ivec4 operator*(const ivec4& other) const {
167  return val*other;
168  }
169  void operator+=(const ivec4& other) {
170  val = val+other;
171  }
172  void operator-=(const ivec4& other) {
173  val = val-other;
174  }
175  void operator*=(const ivec4& other) {
176  val = val*other;
177  }
178  ivec4 operator-() const {
179  return -val;
180  }
181  ivec4 operator&(const ivec4& other) const {
182  return val&other.val;
183  }
184  ivec4 operator|(const ivec4& other) const {
185  return val|other.val;
186  }
187  ivec4 operator==(const ivec4& other) const {
188  return (val==other.val);
189  }
190  ivec4 operator!=(const ivec4& other) const {
191  return (val!=other.val);
192  }
193  ivec4 operator>(const ivec4& other) const {
194  return (val>other.val);
195  }
196  ivec4 operator<(const ivec4& other) const {
197  return (val<other.val);
198  }
199  ivec4 operator>=(const ivec4& other) const {
200  return (val>=other.val);
201  }
202  ivec4 operator<=(const ivec4& other) const {
203  return (val<=other.val);
204  }
205  operator fvec4() const;
206 };
207 
208 // Conversion operators.
209 
210 inline fvec4::operator ivec4() const {
211  return __builtin_convertvector(val, __m128i);
212 }
213 
214 inline ivec4::operator fvec4() const {
215  return __builtin_convertvector(val, __m128);
216 }
217 
218 // Functions that operate on fvec4s.
219 
220 static inline fvec4 floor(const fvec4& v) {
221  return fvec4(std::floor(v[0]), std::floor(v[1]), std::floor(v[2]), std::floor(v[3]));
222 }
223 
224 static inline fvec4 ceil(const fvec4& v) {
225  return fvec4(std::ceil(v[0]), std::ceil(v[1]), std::ceil(v[2]), std::ceil(v[3]));
226 }
227 
228 static inline fvec4 round(const fvec4& v) {
229  return fvec4(std::round(v[0]), std::round(v[1]), std::round(v[2]), std::round(v[3]));
230 }
231 
232 static inline fvec4 min(const fvec4& v1, const fvec4& v2) {
233  return fvec4(std::min(v1[0], v2[0]), std::min(v1[1], v2[1]), std::min(v1[2], v2[2]), std::min(v1[3], v2[3]));
234 }
235 
236 static inline fvec4 max(const fvec4& v1, const fvec4& v2) {
237  return fvec4(std::max(v1[0], v2[0]), std::max(v1[1], v2[1]), std::max(v1[2], v2[2]), std::max(v1[3], v2[3]));
238 }
239 
240 static inline fvec4 abs(const fvec4& v) {
241  return fvec4(std::abs(v[0]), std::abs(v[1]), std::abs(v[2]), std::abs(v[3]));
242 }
243 
244 static inline fvec4 sqrt(const fvec4& v) {
245  return fvec4(std::sqrt(v[0]), std::sqrt(v[1]), std::sqrt(v[2]), std::sqrt(v[3]));
246 }
247 
248 static inline float dot3(const fvec4& v1, const fvec4& v2) {
249  fvec4 r = v1*v2;
250  return r[0]+r[1]+r[2];
251 }
252 
253 static inline float dot4(const fvec4& v1, const fvec4& v2) {
254  fvec4 r = v1*v2;
255  return r[0]+r[1]+r[2]+r[3];
256 }
257 
258 static inline void transpose(fvec4& v1, fvec4& v2, fvec4& v3, fvec4& v4) {
259  __m128 a1 = __builtin_shufflevector(v1.val, v2.val, 0, 4, 2, 6);
260  __m128 a2 = __builtin_shufflevector(v1.val, v2.val, 1, 5, 3, 7);
261  __m128 a3 = __builtin_shufflevector(v3.val, v4.val, 0, 4, 2, 6);
262  __m128 a4 = __builtin_shufflevector(v3.val, v4.val, 1, 5, 3, 7);
263  v1 = __builtin_shufflevector(a1, a3, 0, 1, 4, 5);
264  v2 = __builtin_shufflevector(a2, a4, 0, 1, 4, 5);
265  v3 = __builtin_shufflevector(a1, a3, 2, 3, 6, 7);
266  v4 = __builtin_shufflevector(a2, a4, 2, 3, 6, 7);
267 }
268 
269 // Functions that operate on ivec4s.
270 
271 static inline ivec4 min(const ivec4& v1, const ivec4& v2) {
272  return ivec4(std::min(v1[0], v2[0]), std::min(v1[1], v2[1]), std::min(v1[2], v2[2]), std::min(v1[3], v2[3]));
273 }
274 
275 static inline ivec4 max(const ivec4& v1, const ivec4& v2) {
276  return ivec4(std::max(v1[0], v2[0]), std::max(v1[1], v2[1]), std::max(v1[2], v2[2]), std::max(v1[3], v2[3]));
277 }
278 
279 static inline ivec4 abs(const ivec4& v) {
280  return ivec4(abs(v[0]), abs(v[1]), abs(v[2]), abs(v[3]));
281 }
282 
283 static inline bool any(const __m128i& v) {
284  return (v[0] || v[1] || v[2] || v[3]);
285 }
286 
287 // Mathematical operators involving a scalar and a vector.
288 
289 static inline fvec4 operator+(float v1, const fvec4& v2) {
290  return fvec4(v1)+v2;
291 }
292 
293 static inline fvec4 operator-(float v1, const fvec4& v2) {
294  return fvec4(v1)-v2;
295 }
296 
297 static inline fvec4 operator*(float v1, const fvec4& v2) {
298  return fvec4(v1)*v2;
299 }
300 
301 static inline fvec4 operator/(float v1, const fvec4& v2) {
302  return fvec4(v1)/v2;
303 }
304 
305 // Operations for blending fvec4s based on an ivec4.
306 
307 static inline fvec4 blend(const fvec4& v1, const fvec4& v2, const __m128i& mask) {
308  return fvec4(mask[0] ? v2[0] : v1[0], mask[1] ? v2[1] : v1[1], mask[2] ? v2[2] : v1[2], mask[3] ? v2[3] : v1[3]);
309 }
310 
311 #endif /*OPENMM_VECTORIZE_PNACL_H_*/
312 
fvec4 operator>=(const fvec4 &other) const
Definition: vectorize_pnacl.h:124
fvec4 operator+(const fvec4 &other) const
Definition: vectorize_pnacl.h:79
A four element vector of ints.
Definition: vectorize_neon.h:150
ivec4 operator==(const ivec4 &other) const
Definition: vectorize_pnacl.h:187
int operator[](int i) const
Definition: vectorize_pnacl.h:154
fvec4 operator-(const fvec4 &other) const
Definition: vectorize_pnacl.h:82
__m128 val
Definition: vectorize_pnacl.h:57
float operator[](int i) const
Definition: vectorize_pnacl.h:73
ivec4 operator<(const ivec4 &other) const
Definition: vectorize_pnacl.h:196
fvec4 operator<(const fvec4 &other) const
Definition: vectorize_pnacl.h:121
fvec4(__m128 v)
Definition: vectorize_pnacl.h:66
void operator/=(const fvec4 &other)
Definition: vectorize_pnacl.h:100
void operator*=(const fvec4 &other)
Definition: vectorize_pnacl.h:97
fvec4(const float *v)
Definition: vectorize_pnacl.h:67
ivec4 operator-(const ivec4 &other) const
Definition: vectorize_pnacl.h:163
void store(int *v) const
Definition: vectorize_pnacl.h:157
ivec4 operator>=(const ivec4 &other) const
Definition: vectorize_pnacl.h:199
ivec4(__m128i v)
Definition: vectorize_pnacl.h:147
A four element vector of floats.
Definition: vectorize_neon.h:56
ivec4 operator!=(const ivec4 &other) const
Definition: vectorize_pnacl.h:190
ivec4(int v1, int v2, int v3, int v4)
Definition: vectorize_pnacl.h:144
ivec4(const int *v)
Definition: vectorize_pnacl.h:148
fvec4 operator==(const fvec4 &other) const
Definition: vectorize_pnacl.h:112
fvec4 operator<=(const fvec4 &other) const
Definition: vectorize_pnacl.h:127
void store(float *v) const
Definition: vectorize_pnacl.h:76
void operator-=(const ivec4 &other)
Definition: vectorize_pnacl.h:172
void operator+=(const ivec4 &other)
Definition: vectorize_pnacl.h:169
ivec4 operator+(const ivec4 &other) const
Definition: vectorize_pnacl.h:160
fvec4()
Definition: vectorize_pnacl.h:59
ivec4 operator-() const
Definition: vectorize_pnacl.h:178
fvec4 operator/(const fvec4 &other) const
Definition: vectorize_pnacl.h:88
ivec4 operator|(const ivec4 &other) const
Definition: vectorize_pnacl.h:184
fvec4 operator!=(const fvec4 &other) const
Definition: vectorize_pnacl.h:115
ivec4 operator>(const ivec4 &other) const
Definition: vectorize_pnacl.h:193
ivec4()
Definition: vectorize_pnacl.h:140
void operator-=(const fvec4 &other)
Definition: vectorize_pnacl.h:94
fvec4 operator&(const fvec4 &other) const
Definition: vectorize_pnacl.h:106
fvec4 operator*(const fvec4 &other) const
Definition: vectorize_pnacl.h:85
fvec4(float v1, float v2, float v3, float v4)
Definition: vectorize_pnacl.h:63
fvec4 operator|(const fvec4 &other) const
Definition: vectorize_pnacl.h:109
fvec4(float v)
Definition: vectorize_pnacl.h:60
fvec4 operator>(const fvec4 &other) const
Definition: vectorize_pnacl.h:118
fvec4 operator-() const
Definition: vectorize_pnacl.h:103
void operator+=(const fvec4 &other)
Definition: vectorize_pnacl.h:91
ivec4(int v)
Definition: vectorize_pnacl.h:141
ivec4 operator*(const ivec4 &other) const
Definition: vectorize_pnacl.h:166
ivec4 operator<=(const ivec4 &other) const
Definition: vectorize_pnacl.h:202
ivec4 operator&(const ivec4 &other) const
Definition: vectorize_pnacl.h:181
void operator*=(const ivec4 &other)
Definition: vectorize_pnacl.h:175
__m128i val
Definition: vectorize_pnacl.h:138