10
float __attribute__((__aligned__(16))) ar[4];
11
__m128 v = _mm_set_ps(1.0, 2.0, 3.0, 4.0);
20
float __attribute__((__aligned__(16))) ar[4];
21
__m128 v = _mm_set1_ps(5.5);
29
void testSetZeroPs() {
30
float __attribute__((__aligned__(16))) ar[4];
31
__m128 v = _mm_setzero_ps();
40
int32_t __attribute__((__aligned__(16))) ar[4];
41
__m128i v = _mm_set_epi32(5, 7, 126, 381);
42
_mm_store_si128((__m128i *)ar, v);
47
v = _mm_set_epi32(0x55555555, 0xaaaaaaaa, 0xffffffff, 0x12345678);
48
_mm_store_si128((__m128i *)ar, v);
49
assert(ar[0] == 0x12345678);
50
assert(ar[1] == 0xffffffff);
51
assert(ar[2] == 0xaaaaaaaa);
52
assert(ar[3] == 0x55555555);
55
void testSet1Epi32() {
56
int32_t __attribute__((__aligned__(16))) ar[4];
57
__m128i v = _mm_set1_epi32(-5);
58
_mm_store_si128((__m128i *)ar, v);
65
void testSetZeroSi128() {
66
int32_t __attribute__((__aligned__(16))) ar[4];
67
__m128i v = _mm_setzero_si128();
68
_mm_store_si128((__m128i *)ar, v);
76
int32_t __attribute__((__aligned__(16))) ar1[4];
77
float __attribute__((__aligned__(16))) ar2[4];
78
__m128i v1 = _mm_set_epi32(0x3f800000, 0x40000000, 0x40400000, 0x40800000);
79
__m128 v2 = _mm_castsi128_ps(v1);
80
_mm_store_ps(ar2, v2);
81
assert(ar2[0] == 4.0);
82
assert(ar2[1] == 3.0);
83
assert(ar2[2] == 2.0);
84
assert(ar2[3] == 1.0);
85
v2 = _mm_set_ps(5.0, 6.0, 7.0, 8.0);
86
v1 = _mm_castps_si128(v2);
87
_mm_store_si128((__m128i *)ar1, v1);
88
assert(ar1[0] == 0x41000000);
89
assert(ar1[1] == 0x40e00000);
90
assert(ar1[2] == 0x40c00000);
91
assert(ar1[3] == 0x40a00000);
96
v1 = _mm_castps_si128(_mm_set_ps(w, z, y, x));
97
_mm_store_ps(ar2, _mm_castsi128_ps(v1));
103
std::bitset<sizeof(float)*CHAR_BIT> bits1x(*reinterpret_cast<unsigned
105
std::bitset<sizeof(float)*CHAR_BIT> bits1y(*reinterpret_cast<unsigned
107
std::bitset<sizeof(float)*CHAR_BIT> bits1z(*reinterpret_cast<unsigned
109
std::bitset<sizeof(float)*CHAR_BIT> bits1w(*reinterpret_cast<unsigned
111
std::bitset<sizeof(float)*CHAR_BIT> bits2x(*reinterpret_cast<unsigned
113
std::bitset<sizeof(float)*CHAR_BIT> bits2y(*reinterpret_cast<unsigned
115
std::bitset<sizeof(float)*CHAR_BIT> bits2z(*reinterpret_cast<unsigned
117
std::bitset<sizeof(float)*CHAR_BIT> bits2w(*reinterpret_cast<unsigned
119
assert(bits1x == bits2x);
120
assert(bits1y == bits2y);
121
assert(bits1z == bits2z);
122
assert(bits1w == bits2w);
124
v2 = _mm_castsi128_ps(_mm_set_epi32(0xffffffff, 0, 0x5555cccc, 0xaaaaaaaa));
125
_mm_store_si128((__m128i *)ar1, _mm_castps_si128(v2));
126
assert(ar1[0] == 0xaaaaaaaa);
127
assert(ar1[1] == 0x5555cccc);
129
assert(ar1[3] == 0xffffffff);
132
void testConversions() {
133
int32_t __attribute__((__aligned__(16))) ar1[4];
134
float __attribute__((__aligned__(16))) ar2[4];
135
__m128i v1 = _mm_set_epi32(0, -3, -517, 256);
136
__m128 v2 = _mm_cvtepi32_ps(v1);
137
_mm_store_ps(ar2, v2);
138
assert(ar2[0] == 256.0);
139
assert(ar2[1] == -517.0);
140
assert(ar2[2] == -3.0);
142
v2 = _mm_set_ps(5.0, 6.0, 7.45, -8.0);
143
v1 = _mm_cvtps_epi32(v2);
144
_mm_store_si128((__m128i *)ar1, v1);
145
assert(ar1[0] == -8);
151
void testMoveMaskPs() {
153
_mm_castsi128_ps(_mm_set_epi32(0xffffffff, 0xffffffff, 0, 0xffffffff));
154
int mask = _mm_movemask_ps(v);
159
float __attribute__((__aligned__(16))) ar[4];
160
__m128 v1 = _mm_set_ps(4.0, 3.0, 2.0, 1.0);
161
__m128 v2 = _mm_set_ps(10.0, 20.0, 30.0, 40.0);
162
__m128 v = _mm_add_ps(v1, v2);
164
assert(ar[0] == 41.0);
165
assert(ar[1] == 32.0);
166
assert(ar[2] == 23.0);
167
assert(ar[3] == 14.0);
171
float __attribute__((__aligned__(16))) ar[4];
172
__m128 v1 = _mm_set_ps(4.0, 3.0, 2.0, 1.0);
173
__m128 v2 = _mm_set_ps(10.0, 20.0, 30.0, 40.0);
174
__m128 v = _mm_sub_ps(v1, v2);
176
assert(ar[0] == -39.0);
177
assert(ar[1] == -28.0);
178
assert(ar[2] == -17.0);
179
assert(ar[3] == -6.0);
183
float __attribute__((__aligned__(16))) ar[4];
184
__m128 v1 = _mm_set_ps(4.0, 3.0, 2.0, 1.0);
185
__m128 v2 = _mm_set_ps(10.0, 20.0, 30.0, 40.0);
186
__m128 v = _mm_mul_ps(v1, v2);
188
assert(ar[0] == 40.0);
189
assert(ar[1] == 60.0);
190
assert(ar[2] == 60.0);
191
assert(ar[3] == 40.0);
195
float __attribute__((__aligned__(16))) ar[4];
196
__m128 v1 = _mm_set_ps(4.0, 9.0, 8.0, 1.0);
197
__m128 v2 = _mm_set_ps(2.0, 3.0, 1.0, 0.5);
198
__m128 v = _mm_div_ps(v1, v2);
200
assert(ar[0] == 2.0);
201
assert(ar[1] == 8.0);
202
assert(ar[2] == 3.0);
203
assert(ar[3] == 2.0);
207
float __attribute__((__aligned__(16))) ar[4];
208
__m128 v1 = _mm_set_ps(-20.0, 10.0, 30.0, 0.5);
209
__m128 v2 = _mm_set_ps(2.0, 1.0, 50.0, 0.0);
210
__m128 v = _mm_min_ps(v1, v2);
212
assert(ar[0] == 0.0);
213
assert(ar[1] == 30.0);
214
assert(ar[2] == 1.0);
215
assert(ar[3] == -20.0);
219
float __attribute__((__aligned__(16))) ar[4];
220
__m128 v1 = _mm_set_ps(-20.0, 10.0, 30.0, 0.5);
221
__m128 v2 = _mm_set_ps(2.5, 5.0, 55.0, 1.0);
222
__m128 v = _mm_max_ps(v1, v2);
224
assert(ar[0] == 1.0);
225
assert(ar[1] == 55.0);
226
assert(ar[2] == 10.0);
227
assert(ar[3] == 2.5);
231
float __attribute__((__aligned__(16))) ar[4];
232
__m128 v1 = _mm_set_ps(16.0, 9.0, 4.0, 1.0);
233
__m128 v = _mm_sqrt_ps(v1);
235
assert(ar[0] == 1.0);
236
assert(ar[1] == 2.0);
237
assert(ar[2] == 3.0);
238
assert(ar[3] == 4.0);
242
int32_t __attribute__((__aligned__(16))) ar[4];
243
__m128 v1 = _mm_set_ps(1.0, 2.0, 0.1, 0.001);
244
__m128 v2 = _mm_set_ps(2.0, 2.0, 0.001, 0.1);
245
__m128 v = _mm_cmplt_ps(v1, v2);
246
_mm_store_si128((__m128i *)ar, _mm_castps_si128(v));
247
assert(ar[0] == 0xffffffff);
250
assert(ar[3] == 0xffffffff);
251
assert(_mm_movemask_ps(v) == 9);
255
int32_t __attribute__((__aligned__(16))) ar[4];
256
__m128 v1 = _mm_set_ps(1.0, 2.0, 0.1, 0.001);
257
__m128 v2 = _mm_set_ps(2.0, 2.0, 0.001, 0.1);
258
__m128 v = _mm_cmple_ps(v1, v2);
259
_mm_store_si128((__m128i *)ar, _mm_castps_si128(v));
260
assert(ar[0] == 0xffffffff);
262
assert(ar[2] == 0xffffffff);
263
assert(ar[3] == 0xffffffff);
264
assert(_mm_movemask_ps(v) == 13);
268
int32_t __attribute__((__aligned__(16))) ar[4];
269
__m128 v1 = _mm_set_ps(1.0, 2.0, 0.1, 0.001);
270
__m128 v2 = _mm_set_ps(2.0, 2.0, 0.001, 0.1);
271
__m128 v = _mm_cmpeq_ps(v1, v2);
272
_mm_store_si128((__m128i *)ar, _mm_castps_si128(v));
275
assert(ar[2] == 0xffffffff);
277
assert(_mm_movemask_ps(v) == 4);
281
int32_t __attribute__((__aligned__(16))) ar[4];
282
__m128 v1 = _mm_set_ps(1.0, 2.0, 0.1, 0.001);
283
__m128 v2 = _mm_set_ps(2.0, 2.0, 0.001, 0.1);
284
__m128 v = _mm_cmpge_ps(v1, v2);
285
_mm_store_si128((__m128i *)ar, _mm_castps_si128(v));
287
assert(ar[1] == 0xffffffff);
288
assert(ar[2] == 0xffffffff);
290
assert(_mm_movemask_ps(v) == 6);
294
int32_t __attribute__((__aligned__(16))) ar[4];
295
__m128 v1 = _mm_set_ps(1.0, 2.0, 0.1, 0.001);
296
__m128 v2 = _mm_set_ps(2.0, 2.0, 0.001, 0.1);
297
__m128 v = _mm_cmpgt_ps(v1, v2);
298
_mm_store_si128((__m128i *)ar, _mm_castps_si128(v));
300
assert(ar[1] == 0xffffffff);
303
assert(_mm_movemask_ps(v) == 2);
307
float __attribute__((__aligned__(16))) ar[4];
308
__m128 v1 = _mm_set_ps(425, -501, -32, 68);
310
_mm_castsi128_ps(_mm_set_epi32(0xffffffff, 0xffffffff, 0, 0xffffffff));
311
__m128 v = _mm_and_ps(v1, v2);
315
assert(ar[2] == -501);
316
assert(ar[3] == 425);
317
int32_t __attribute__((__aligned__(16))) ar2[4];
318
v1 = _mm_castsi128_ps(
319
_mm_set_epi32(0xaaaaaaaa, 0xaaaaaaaa, -1431655766, 0xaaaaaaaa));
320
v2 = _mm_castsi128_ps(
321
_mm_set_epi32(0x55555555, 0x55555555, 0x55555555, 0x55555555));
322
v = _mm_and_ps(v1, v2);
323
_mm_store_si128((__m128i *)ar2, _mm_castps_si128(v));
330
void testAndNotPs() {
331
float __attribute__((__aligned__(16))) ar[4];
332
__m128 v1 = _mm_set_ps(425, -501, -32, 68);
334
_mm_castsi128_ps(_mm_set_epi32(0xffffffff, 0xffffffff, 0, 0xffffffff));
335
__m128 v = _mm_andnot_ps(v2, v1);
338
assert(ar[1] == -32);
341
int32_t __attribute__((__aligned__(16))) ar2[4];
342
v1 = _mm_castsi128_ps(
343
_mm_set_epi32(0xaaaaaaaa, 0xaaaaaaaa, -1431655766, 0xaaaaaaaa));
344
v2 = _mm_castsi128_ps(
345
_mm_set_epi32(0x55555555, 0x55555555, 0x55555555, 0x55555555));
346
v = _mm_andnot_ps(v1, v2);
347
_mm_store_si128((__m128i *)ar2, _mm_castps_si128(v));
348
assert(ar2[0] == 0x55555555);
349
assert(ar2[1] == 0x55555555);
350
assert(ar2[2] == 0x55555555);
351
assert(ar2[3] == 0x55555555);
355
int32_t __attribute__((__aligned__(16))) ar[4];
357
_mm_castsi128_ps(_mm_set_epi32(0xaaaaaaaa, 0xaaaaaaaa, 0xffffffff, 0));
358
__m128 v2 = _mm_castsi128_ps(
359
_mm_set_epi32(0x55555555, 0x55555555, 0x55555555, 0x55555555));
360
__m128 v = _mm_or_ps(v1, v2);
361
_mm_store_si128((__m128i *)ar, _mm_castps_si128(v));
362
assert(ar[0] == 0x55555555);
363
assert(ar[1] == 0xffffffff);
364
assert(ar[2] == 0xffffffff);
365
assert(ar[3] == 0xffffffff);
369
int32_t __attribute__((__aligned__(16))) ar[4];
371
_mm_castsi128_ps(_mm_set_epi32(0xaaaaaaaa, 0xaaaaaaaa, 0xffffffff, 0));
372
__m128 v2 = _mm_castsi128_ps(
373
_mm_set_epi32(0x55555555, 0x55555555, 0x55555555, 0x55555555));
374
__m128 v = _mm_xor_ps(v1, v2);
375
_mm_store_si128((__m128i *)ar, _mm_castps_si128(v));
376
assert(ar[0] == 0x55555555);
377
assert(ar[1] == 0xaaaaaaaa);
378
assert(ar[2] == 0xffffffff);
379
assert(ar[3] == 0xffffffff);
382
void testAndSi128() {
383
int32_t __attribute__((__aligned__(16))) ar[4];
384
__m128i v1 = _mm_set_epi32(0xaaaaaaaa, 0xaaaaaaaa, -1431655766, 0xaaaaaaaa);
385
__m128i v2 = _mm_set_epi32(0x55555555, 0x55555555, 0x55555555, 0x55555555);
386
__m128i v = _mm_and_si128(v1, v2);
387
_mm_store_si128((__m128i *)ar, v);
394
void testAndNotSi128() {
395
int32_t __attribute__((__aligned__(16))) ar[4];
396
__m128i v1 = _mm_set_epi32(0xaaaaaaaa, 0xaaaaaaaa, -1431655766, 0xaaaaaaaa);
397
__m128i v2 = _mm_set_epi32(0x55555555, 0x55555555, 0x55555555, 0x55555555);
398
__m128i v = _mm_andnot_si128(v1, v2);
399
_mm_store_si128((__m128i *)ar, v);
400
assert(ar[0] == 0x55555555);
401
assert(ar[1] == 0x55555555);
402
assert(ar[2] == 0x55555555);
403
assert(ar[3] == 0x55555555);
407
int32_t __attribute__((__aligned__(16))) ar[4];
408
__m128i v1 = _mm_set_epi32(0xaaaaaaaa, 0xaaaaaaaa, 0xffffffff, 0);
409
__m128i v2 = _mm_set_epi32(0x55555555, 0x55555555, 0x55555555, 0x55555555);
410
__m128i v = _mm_or_si128(v1, v2);
411
_mm_store_si128((__m128i *)ar, v);
412
assert(ar[0] == 0x55555555);
413
assert(ar[1] == 0xffffffff);
414
assert(ar[2] == 0xffffffff);
415
assert(ar[3] == 0xffffffff);
418
void testXorSi128() {
419
int32_t __attribute__((__aligned__(16))) ar[4];
420
__m128i v1 = _mm_set_epi32(0xaaaaaaaa, 0xaaaaaaaa, 0xffffffff, 0);
421
__m128i v2 = _mm_set_epi32(0x55555555, 0x55555555, 0x55555555, 0x55555555);
422
__m128i v = _mm_xor_si128(v1, v2);
423
_mm_store_si128((__m128i *)ar, v);
424
assert(ar[0] == 0x55555555);
425
assert(ar[1] == 0xaaaaaaaa);
426
assert(ar[2] == 0xffffffff);
427
assert(ar[3] == 0xffffffff);
430
void testAddEpi32() {
431
int32_t __attribute__((__aligned__(16))) ar[4];
432
__m128i v1 = _mm_set_epi32(4, 3, 2, 1);
433
__m128i v2 = _mm_set_epi32(10, 20, 30, 40);
434
__m128i v = _mm_add_epi32(v1, v2);
435
_mm_store_si128((__m128i *)ar, v);
442
void testSubEpi32() {
443
int32_t __attribute__((__aligned__(16))) ar[4];
444
__m128i v1 = _mm_set_epi32(4, 3, 2, 1);
445
__m128i v2 = _mm_set_epi32(10, 20, 30, 40);
446
__m128i v = _mm_sub_epi32(v1, v2);
447
_mm_store_si128((__m128i *)ar, v);
448
assert(ar[0] == -39);
449
assert(ar[1] == -28);
450
assert(ar[2] == -17);
454
int main(int argc, char **argv) {