1
; RUN: opt < %s -instcombine -S | FileCheck %s
3
; This should never happen, but make sure we don't crash handling a non-constant immediate byte.
5
define <4 x double> @perm2pd_non_const_imm(<4 x double> %a0, <4 x double> %a1, i8 %b) {
6
%res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 %b)
9
; CHECK-LABEL: @perm2pd_non_const_imm
10
; CHECK-NEXT: call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 %b)
11
; CHECK-NEXT: ret <4 x double>
15
; In the following 4 tests, both zero mask bits of the immediate are set.
17
define <4 x double> @perm2pd_0x88(<4 x double> %a0, <4 x double> %a1) {
18
%res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 136)
21
; CHECK-LABEL: @perm2pd_0x88
22
; CHECK-NEXT: ret <4 x double> zeroinitializer
25
define <8 x float> @perm2ps_0x88(<8 x float> %a0, <8 x float> %a1) {
26
%res = call <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float> %a0, <8 x float> %a1, i8 136)
29
; CHECK-LABEL: @perm2ps_0x88
30
; CHECK-NEXT: ret <8 x float> zeroinitializer
33
define <8 x i32> @perm2si_0x88(<8 x i32> %a0, <8 x i32> %a1) {
34
%res = call <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32> %a0, <8 x i32> %a1, i8 136)
37
; CHECK-LABEL: @perm2si_0x88
38
; CHECK-NEXT: ret <8 x i32> zeroinitializer
41
define <4 x i64> @perm2i_0x88(<4 x i64> %a0, <4 x i64> %a1) {
42
%res = call <4 x i64> @llvm.x86.avx2.vperm2i128(<4 x i64> %a0, <4 x i64> %a1, i8 136)
45
; CHECK-LABEL: @perm2i_0x88
46
; CHECK-NEXT: ret <4 x i64> zeroinitializer
50
; The other control bits are ignored when zero mask bits of the immediate are set.
52
define <4 x double> @perm2pd_0xff(<4 x double> %a0, <4 x double> %a1) {
53
%res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 255)
56
; CHECK-LABEL: @perm2pd_0xff
57
; CHECK-NEXT: ret <4 x double> zeroinitializer
61
; The following 16 tests are simple shuffles, except for 2 cases where we can just return one of the
62
; source vectors. Verify that we generate the right shuffle masks and undef source operand where possible..
64
define <4 x double> @perm2pd_0x00(<4 x double> %a0, <4 x double> %a1) {
65
%res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 0)
68
; CHECK-LABEL: @perm2pd_0x00
69
; CHECK-NEXT: %1 = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
70
; CHECK-NEXT: ret <4 x double> %1
73
define <4 x double> @perm2pd_0x01(<4 x double> %a0, <4 x double> %a1) {
74
%res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 1)
77
; CHECK-LABEL: @perm2pd_0x01
78
; CHECK-NEXT: %1 = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
79
; CHECK-NEXT: ret <4 x double> %1
82
define <4 x double> @perm2pd_0x02(<4 x double> %a0, <4 x double> %a1) {
83
%res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 2)
86
; CHECK-LABEL: @perm2pd_0x02
87
; CHECK-NEXT: %1 = shufflevector <4 x double> %a1, <4 x double> %a0, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
88
; CHECK-NEXT: ret <4 x double> %1
91
define <4 x double> @perm2pd_0x03(<4 x double> %a0, <4 x double> %a1) {
92
%res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 3)
95
; CHECK-LABEL: @perm2pd_0x03
96
; CHECK-NEXT: %1 = shufflevector <4 x double> %a1, <4 x double> %a0, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
97
; CHECK-NEXT: ret <4 x double> %1
100
define <4 x double> @perm2pd_0x10(<4 x double> %a0, <4 x double> %a1) {
101
%res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 16)
102
ret <4 x double> %res
104
; CHECK-LABEL: @perm2pd_0x10
105
; CHECK-NEXT: ret <4 x double> %a0
108
define <4 x double> @perm2pd_0x11(<4 x double> %a0, <4 x double> %a1) {
109
%res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 17)
110
ret <4 x double> %res
112
; CHECK-LABEL: @perm2pd_0x11
113
; CHECK-NEXT: %1 = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
114
; CHECK-NEXT: ret <4 x double> %1
117
define <4 x double> @perm2pd_0x12(<4 x double> %a0, <4 x double> %a1) {
118
%res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 18)
119
ret <4 x double> %res
121
; CHECK-LABEL: @perm2pd_0x12
122
; CHECK-NEXT: %1 = shufflevector <4 x double> %a1, <4 x double> %a0, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
123
; CHECK-NEXT: ret <4 x double> %1
126
define <4 x double> @perm2pd_0x13(<4 x double> %a0, <4 x double> %a1) {
127
%res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 19)
128
ret <4 x double> %res
130
; CHECK-LABEL: @perm2pd_0x13
131
; CHECK-NEXT: %1 = shufflevector <4 x double> %a1, <4 x double> %a0, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
132
; CHECK-NEXT: ret <4 x double> %1
135
define <4 x double> @perm2pd_0x20(<4 x double> %a0, <4 x double> %a1) {
136
%res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 32)
137
ret <4 x double> %res
139
; CHECK-LABEL: @perm2pd_0x20
140
; CHECK-NEXT: %1 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
141
; CHECK-NEXT: ret <4 x double> %1
144
define <4 x double> @perm2pd_0x21(<4 x double> %a0, <4 x double> %a1) {
145
%res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 33)
146
ret <4 x double> %res
148
; CHECK-LABEL: @perm2pd_0x21
149
; CHECK-NEXT: %1 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
150
; CHECK-NEXT: ret <4 x double> %1
153
define <4 x double> @perm2pd_0x22(<4 x double> %a0, <4 x double> %a1) {
154
%res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 34)
155
ret <4 x double> %res
157
; CHECK-LABEL: @perm2pd_0x22
158
; CHECK-NEXT: %1 = shufflevector <4 x double> %a1, <4 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
159
; CHECK-NEXT: ret <4 x double> %1
162
define <4 x double> @perm2pd_0x23(<4 x double> %a0, <4 x double> %a1) {
163
%res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 35)
164
ret <4 x double> %res
166
; CHECK-LABEL: @perm2pd_0x23
167
; CHECK-NEXT: %1 = shufflevector <4 x double> %a1, <4 x double> undef, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
168
; CHECK-NEXT: ret <4 x double> %1
171
define <4 x double> @perm2pd_0x30(<4 x double> %a0, <4 x double> %a1) {
172
%res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 48)
173
ret <4 x double> %res
175
; CHECK-LABEL: @perm2pd_0x30
176
; CHECK-NEXT: %1 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
177
; CHECK-NEXT: ret <4 x double> %1
180
define <4 x double> @perm2pd_0x31(<4 x double> %a0, <4 x double> %a1) {
181
%res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 49)
182
ret <4 x double> %res
184
; CHECK-LABEL: @perm2pd_0x31
185
; CHECK-NEXT: %1 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
186
; CHECK-NEXT: ret <4 x double> %1
189
define <4 x double> @perm2pd_0x32(<4 x double> %a0, <4 x double> %a1) {
190
%res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 50)
191
ret <4 x double> %res
193
; CHECK-LABEL: @perm2pd_0x32
194
; CHECK-NEXT: ret <4 x double> %a1
197
define <4 x double> @perm2pd_0x33(<4 x double> %a0, <4 x double> %a1) {
198
%res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 51)
199
ret <4 x double> %res
201
; CHECK-LABEL: @perm2pd_0x33
202
; CHECK-NEXT: %1 = shufflevector <4 x double> %a1, <4 x double> undef, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
203
; CHECK-NEXT: ret <4 x double> %1
206
; Confirm that a mask for 32-bit elements is also correct.
208
define <8 x float> @perm2ps_0x31(<8 x float> %a0, <8 x float> %a1) {
209
%res = call <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float> %a0, <8 x float> %a1, i8 49)
212
; CHECK-LABEL: @perm2ps_0x31
213
; CHECK-NEXT: %1 = shufflevector <8 x float> %a0, <8 x float> %a1, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15>
214
; CHECK-NEXT: ret <8 x float> %1
218
; Confirm that the AVX2 version works the same.
220
define <4 x i64> @perm2i_0x33(<4 x i64> %a0, <4 x i64> %a1) {
221
%res = call <4 x i64> @llvm.x86.avx2.vperm2i128(<4 x i64> %a0, <4 x i64> %a1, i8 51)
224
; CHECK-LABEL: @perm2i_0x33
225
; CHECK-NEXT: %1 = shufflevector <4 x i64> %a1, <4 x i64> undef, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
226
; CHECK-NEXT: ret <4 x i64> %1
230
; Confirm that when a single zero mask bit is set, we replace a source vector with zeros.
232
define <4 x double> @perm2pd_0x81(<4 x double> %a0, <4 x double> %a1) {
233
%res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 129)
234
ret <4 x double> %res
236
; CHECK-LABEL: @perm2pd_0x81
237
; CHECK-NEXT: shufflevector <4 x double> %a0, <4 x double> <double 0.0{{.*}}<4 x i32> <i32 2, i32 3, i32 4, i32 5>
238
; CHECK-NEXT: ret <4 x double>
241
define <4 x double> @perm2pd_0x83(<4 x double> %a0, <4 x double> %a1) {
242
%res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 131)
243
ret <4 x double> %res
245
; CHECK-LABEL: @perm2pd_0x83
246
; CHECK-NEXT: shufflevector <4 x double> %a1, <4 x double> <double 0.0{{.*}}, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
247
; CHECK-NEXT: ret <4 x double>
250
define <4 x double> @perm2pd_0x28(<4 x double> %a0, <4 x double> %a1) {
251
%res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 40)
252
ret <4 x double> %res
254
; CHECK-LABEL: @perm2pd_0x28
255
; CHECK-NEXT: shufflevector <4 x double> <double 0.0{{.*}}, <4 x double> %a1, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
256
; CHECK-NEXT: ret <4 x double>
259
define <4 x double> @perm2pd_0x08(<4 x double> %a0, <4 x double> %a1) {
260
%res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 8)
261
ret <4 x double> %res
263
; CHECK-LABEL: @perm2pd_0x08
264
; CHECK-NEXT: shufflevector <4 x double> <double 0.0{{.*}}, <4 x double> %a0, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
265
; CHECK-NEXT: ret <4 x double>
268
; Check one more with the AVX2 version.
270
define <4 x i64> @perm2i_0x28(<4 x i64> %a0, <4 x i64> %a1) {
271
%res = call <4 x i64> @llvm.x86.avx2.vperm2i128(<4 x i64> %a0, <4 x i64> %a1, i8 40)
274
; CHECK-LABEL: @perm2i_0x28
275
; CHECK-NEXT: shufflevector <4 x i64> <i64 0{{.*}}, <4 x i64> %a1, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
276
; CHECK-NEXT: ret <4 x i64>
279
declare <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone
280
declare <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
281
declare <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32>, <8 x i32>, i8) nounwind readnone
282
declare <4 x i64> @llvm.x86.avx2.vperm2i128(<4 x i64>, <4 x i64>, i8) nounwind readnone