1
/* This file is part of the Vc library.
3
Copyright (C) 2009-2012 Matthias Kretz <kretz@kde.org>
5
Vc is free software: you can redistribute it and/or modify
6
it under the terms of the GNU Lesser General Public License as
7
published by the Free Software Foundation, either version 3 of
8
the License, or (at your option) any later version.
10
Vc is distributed in the hope that it will be useful, but
11
WITHOUT ANY WARRANTY; without even the implied warranty of
12
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
GNU Lesser General Public License for more details.
15
You should have received a copy of the GNU Lesser General Public
16
License along with Vc. If not, see <http://www.gnu.org/licenses/>.
21
#include <sse/intrinsics.h>
25
ostream &operator<<(ostream &out, const __m128i &v)
33
for (int i = 1; i < 8; ++i) {
34
out << ", " << x.m[i];
40
template<> inline bool unittest_compareHelper<__m128i, __m128i>(const __m128i &a, const __m128i &b)
42
return _mm_movemask_epi8(_mm_cmpeq_epi16(a, b)) == 0xffff;
48
#define blend _mm_blend_pd
50
#define blend Vc::SSE::_mm_blend_pd
52
__m128d a = _mm_set_pd(11, 10);
53
__m128d b = _mm_set_pd(21, 20);
55
COMPARE(_mm_movemask_pd(_mm_cmpeq_pd(blend(a, b, 0x0), a)), 0x3);
56
COMPARE(_mm_movemask_pd(_mm_cmpeq_pd(blend(a, b, 0x1), _mm_set_pd(11, 20))), 0x3);
57
COMPARE(_mm_movemask_pd(_mm_cmpeq_pd(blend(a, b, 0x2), _mm_set_pd(21, 10))), 0x3);
58
COMPARE(_mm_movemask_pd(_mm_cmpeq_pd(blend(a, b, 0x3), b)), 0x3);
64
#define blend _mm_blend_ps
66
#define blend Vc::SSE::_mm_blend_ps
68
__m128 a = _mm_set_ps(13, 12, 11, 10);
69
__m128 b = _mm_set_ps(23, 22, 21, 20);
71
COMPARE(_mm_movemask_ps(_mm_cmpeq_ps(blend(a, b, 0x0), a)), 0xf);
72
COMPARE(_mm_movemask_ps(_mm_cmpeq_ps(blend(a, b, 0x1), _mm_set_ps(13, 12, 11, 20))), 0xf);
73
COMPARE(_mm_movemask_ps(_mm_cmpeq_ps(blend(a, b, 0x2), _mm_set_ps(13, 12, 21, 10))), 0xf);
74
COMPARE(_mm_movemask_ps(_mm_cmpeq_ps(blend(a, b, 0x3), _mm_set_ps(13, 12, 21, 20))), 0xf);
75
COMPARE(_mm_movemask_ps(_mm_cmpeq_ps(blend(a, b, 0x4), _mm_set_ps(13, 22, 11, 10))), 0xf);
76
COMPARE(_mm_movemask_ps(_mm_cmpeq_ps(blend(a, b, 0x5), _mm_set_ps(13, 22, 11, 20))), 0xf);
77
COMPARE(_mm_movemask_ps(_mm_cmpeq_ps(blend(a, b, 0x6), _mm_set_ps(13, 22, 21, 10))), 0xf);
78
COMPARE(_mm_movemask_ps(_mm_cmpeq_ps(blend(a, b, 0x7), _mm_set_ps(13, 22, 21, 20))), 0xf);
79
COMPARE(_mm_movemask_ps(_mm_cmpeq_ps(blend(a, b, 0x8), _mm_set_ps(23, 12, 11, 10))), 0xf);
80
COMPARE(_mm_movemask_ps(_mm_cmpeq_ps(blend(a, b, 0x9), _mm_set_ps(23, 12, 11, 20))), 0xf);
81
COMPARE(_mm_movemask_ps(_mm_cmpeq_ps(blend(a, b, 0xa), _mm_set_ps(23, 12, 21, 10))), 0xf);
82
COMPARE(_mm_movemask_ps(_mm_cmpeq_ps(blend(a, b, 0xb), _mm_set_ps(23, 12, 21, 20))), 0xf);
83
COMPARE(_mm_movemask_ps(_mm_cmpeq_ps(blend(a, b, 0xc), _mm_set_ps(23, 22, 11, 10))), 0xf);
84
COMPARE(_mm_movemask_ps(_mm_cmpeq_ps(blend(a, b, 0xd), _mm_set_ps(23, 22, 11, 20))), 0xf);
85
COMPARE(_mm_movemask_ps(_mm_cmpeq_ps(blend(a, b, 0xe), _mm_set_ps(23, 22, 21, 10))), 0xf);
86
COMPARE(_mm_movemask_ps(_mm_cmpeq_ps(blend(a, b, 0xf), b)), 0xf);
92
#define blend _mm_blend_epi16
94
#define blend Vc::SSE::_mm_blend_epi16
96
__m128i a = _mm_set_epi16(17, 16, 15, 14, 13, 12, 11, 10);
97
__m128i b = _mm_set_epi16(27, 26, 25, 24, 23, 22, 21, 20);
99
#define CALL_2(_i, code) { enum { i = _i }; code } { enum { i = _i + 1 }; code }
100
#define CALL_4(_i, code) CALL_2(_i, code) CALL_2(_i + 2, code)
101
#define CALL_8(_i, code) CALL_4(_i, code) CALL_4(_i + 4, code)
102
#define CALL_16(_i, code) CALL_8(_i, code) CALL_8(_i + 8, code)
103
#define CALL_32(_i, code) CALL_16(_i, code) CALL_16(_i + 16, code)
104
#define CALL_64(_i, code) CALL_32(_i, code) CALL_32(_i + 32, code)
105
#define CALL_100(code) CALL_64(0, code) CALL_32(64, code) CALL_4(96, code)
109
for (int j = 0; j < 8; ++j) {
110
r[j] = j + ((((i >> j) & 1) == 0) ? 10 : 20);
112
__m128i reference = _mm_set_epi16(r[7], r[6], r[5], r[4], r[3], r[2], r[1], r[0]);
113
COMPARE_NOEQ(blend(a, b, i), reference);