2
* Copyright 2015 Red Hat Inc.
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* on the rights to use, copy, modify, merge, publish, distribute, sub
8
* license, and/or sell copies of the Software, and to permit persons to whom
9
* the Software is furnished to do so, subject to the following conditions:
11
* The above copyright notice and this permission notice (including the next
12
* paragraph) shall be included in all copies or substantial portions of the
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21
* USE OR OTHER DEALINGS IN THE SOFTWARE.
23
* Author: Oded Gabbay <oded.gabbay@redhat.com>
28
* POWER8 intrinsics portability header.
35
#if defined(_ARCH_PWR8) && UTIL_ARCH_LITTLE_ENDIAN
37
#define VECTOR_ALIGN_16 __attribute__ ((__aligned__ (16)))
39
typedef VECTOR_ALIGN_16 vector unsigned char __m128i;
41
typedef VECTOR_ALIGN_16 union m128i {
43
vector signed int m128si;
44
vector unsigned int m128ui;
52
vec_set_epi32 (int i3, int i2, int i1, int i0)
56
#if UTIL_ARCH_LITTLE_ENDIAN
68
return (__m128i) vdst.m128si;
72
vec_setr_epi32 (int i0, int i1, int i2, int i3)
74
return vec_set_epi32 (i3, i2, i1, i0);
78
vec_unpacklo_epi32 (__m128i even, __m128i odd)
80
static const __m128i perm_mask =
81
#if UTIL_ARCH_LITTLE_ENDIAN
82
{ 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23};
84
{24, 25, 26, 27, 8, 9, 10, 11, 28, 29, 30, 31, 12, 13, 14, 15};
87
return vec_perm (even, odd, perm_mask);
91
vec_unpackhi_epi32 (__m128i even, __m128i odd)
93
static const __m128i perm_mask =
94
#if UTIL_ARCH_LITTLE_ENDIAN
95
{ 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31};
97
{16, 17, 18, 19, 0, 1, 2, 3, 20, 21, 22, 23, 4, 5, 6, 7};
100
return vec_perm (even, odd, perm_mask);
103
static inline __m128i
104
vec_unpacklo_epi64 (__m128i even, __m128i odd)
106
static const __m128i perm_mask =
107
#if UTIL_ARCH_LITTLE_ENDIAN
108
{ 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23};
110
{24, 25, 26, 27, 28, 29, 30, 31, 8, 9, 10, 11, 12, 13, 14, 15};
113
return vec_perm (even, odd, perm_mask);
116
static inline __m128i
117
vec_unpackhi_epi64 (__m128i even, __m128i odd)
119
static const __m128i perm_mask =
120
#if UTIL_ARCH_LITTLE_ENDIAN
121
{ 8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31};
123
{16, 17, 18, 19, 20, 21, 22, 23, 0, 1, 2, 3, 4, 5, 6, 7};
126
return vec_perm (even, odd, perm_mask);
129
static inline __m128i
130
vec_add_epi32 (__m128i a, __m128i b)
132
return (__m128i) vec_add ((vector signed int) a, (vector signed int) b);
135
static inline __m128i
136
vec_sub_epi32 (__m128i a, __m128i b)
138
return (__m128i) vec_sub ((vector signed int) a, (vector signed int) b);
141
/* Call this function ONLY on POWER8 and newer platforms */
142
static inline __m128i
143
vec_mullo_epi32 (__m128i a, __m128i b)
148
"vmuluwm %0, %1, %2 \n"
156
static inline __m128i
157
vec_andnot_si128 (__m128i a, __m128i b)
159
return vec_andc (b, a);
163
transpose4_epi32(const __m128i * restrict a,
164
const __m128i * restrict b,
165
const __m128i * restrict c,
166
const __m128i * restrict d,
167
__m128i * restrict o,
168
__m128i * restrict p,
169
__m128i * restrict q,
170
__m128i * restrict r)
172
__m128i t0 = vec_unpacklo_epi32(*a, *b);
173
__m128i t1 = vec_unpacklo_epi32(*c, *d);
174
__m128i t2 = vec_unpackhi_epi32(*a, *b);
175
__m128i t3 = vec_unpackhi_epi32(*c, *d);
177
*o = vec_unpacklo_epi64(t0, t1);
178
*p = vec_unpackhi_epi64(t0, t1);
179
*q = vec_unpacklo_epi64(t2, t3);
180
*r = vec_unpackhi_epi64(t2, t3);
183
static inline __m128i
184
vec_slli_epi32 (__m128i vsrc, unsigned int count)
186
__m128i_union vec_count;
189
return (__m128i) vec_splats (0);
193
/* In VMX, all shift count fields must contain the same value */
194
vec_count.m128si = (vector signed int) vec_splats (count);
195
return (__m128i) vec_sl ((vector signed int) vsrc, vec_count.m128ui);
198
static inline __m128i
199
vec_srli_epi32 (__m128i vsrc, unsigned int count)
201
__m128i_union vec_count;
204
return (__m128i) vec_splats (0);
208
/* In VMX, all shift count fields must contain the same value */
209
vec_count.m128si = (vector signed int) vec_splats (count);
210
return (__m128i) vec_sr ((vector signed int) vsrc, vec_count.m128ui);
213
static inline __m128i
214
vec_srai_epi32 (__m128i vsrc, unsigned int count)
216
__m128i_union vec_count;
219
return (__m128i) vec_splats (0);
223
/* In VMX, all shift count fields must contain the same value */
224
vec_count.m128si = (vector signed int) vec_splats (count);
225
return (__m128i) vec_sra ((vector signed int) vsrc, vec_count.m128ui);
228
static inline __m128i
229
vec_cmpeq_epi32 (__m128i a, __m128i b)
231
return (__m128i) vec_cmpeq ((vector signed int) a, (vector signed int) b);
234
static inline __m128i
235
vec_loadu_si128 (const uint32_t* src)
239
#if UTIL_ARCH_LITTLE_ENDIAN
241
vsrc.m128ui = *((vector unsigned int *) src);
245
__m128i vmask, tmp1, tmp2;
247
vmask = vec_lvsl(0, src);
249
tmp1 = (__m128i) vec_ld (0, src);
250
tmp2 = (__m128i) vec_ld (15, src);
251
vsrc.m128ui = (vector unsigned int) vec_perm (tmp1, tmp2, vmask);
258
static inline __m128i
259
vec_load_si128 (const uint32_t* src)
263
vsrc.m128ui = *((vector unsigned int *) src);
269
vec_store_si128 (uint32_t* dest, __m128i vdata)
271
vec_st ((vector unsigned int) vdata, 0, dest);
274
/* Call this function ONLY on POWER8 and newer platforms */
276
vec_movemask_epi8 (__m128i vsrc)
281
vtemp.m128i = vec_vgbbd(vsrc);
283
#if UTIL_ARCH_LITTLE_ENDIAN
284
result = vtemp.ub[15] << 8 | vtemp.ub[7];
286
result = vtemp.ub[0] << 8 | vtemp.ub[8];
292
static inline __m128i
293
vec_packs_epi16 (__m128i a, __m128i b)
295
#if UTIL_ARCH_LITTLE_ENDIAN
296
return (__m128i) vec_packs ((vector signed short) a,
297
(vector signed short) b);
299
return (__m128i) vec_packs ((vector signed short) b,
300
(vector signed short) a);
304
static inline __m128i
305
vec_packs_epi32 (__m128i a, __m128i b)
307
#if UTIL_ARCH_LITTLE_ENDIAN
308
return (__m128i) vec_packs ((vector signed int) a, (vector signed int) b);
310
return (__m128i) vec_packs ((vector signed int) b, (vector signed int) a);
314
#endif /* _ARCH_PWR8 && UTIL_ARCH_LITTLE_ENDIAN */
316
#endif /* U_PWR8_H_ */