1
; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
3
define <8 x i8> @v_dup8(i8 %A) nounwind {
6
%tmp1 = insertelement <8 x i8> zeroinitializer, i8 %A, i32 0
7
%tmp2 = insertelement <8 x i8> %tmp1, i8 %A, i32 1
8
%tmp3 = insertelement <8 x i8> %tmp2, i8 %A, i32 2
9
%tmp4 = insertelement <8 x i8> %tmp3, i8 %A, i32 3
10
%tmp5 = insertelement <8 x i8> %tmp4, i8 %A, i32 4
11
%tmp6 = insertelement <8 x i8> %tmp5, i8 %A, i32 5
12
%tmp7 = insertelement <8 x i8> %tmp6, i8 %A, i32 6
13
%tmp8 = insertelement <8 x i8> %tmp7, i8 %A, i32 7
17
define <4 x i16> @v_dup16(i16 %A) nounwind {
20
%tmp1 = insertelement <4 x i16> zeroinitializer, i16 %A, i32 0
21
%tmp2 = insertelement <4 x i16> %tmp1, i16 %A, i32 1
22
%tmp3 = insertelement <4 x i16> %tmp2, i16 %A, i32 2
23
%tmp4 = insertelement <4 x i16> %tmp3, i16 %A, i32 3
27
define <2 x i32> @v_dup32(i32 %A) nounwind {
30
%tmp1 = insertelement <2 x i32> zeroinitializer, i32 %A, i32 0
31
%tmp2 = insertelement <2 x i32> %tmp1, i32 %A, i32 1
35
define <2 x float> @v_dupfloat(float %A) nounwind {
38
%tmp1 = insertelement <2 x float> zeroinitializer, float %A, i32 0
39
%tmp2 = insertelement <2 x float> %tmp1, float %A, i32 1
43
define <16 x i8> @v_dupQ8(i8 %A) nounwind {
46
%tmp1 = insertelement <16 x i8> zeroinitializer, i8 %A, i32 0
47
%tmp2 = insertelement <16 x i8> %tmp1, i8 %A, i32 1
48
%tmp3 = insertelement <16 x i8> %tmp2, i8 %A, i32 2
49
%tmp4 = insertelement <16 x i8> %tmp3, i8 %A, i32 3
50
%tmp5 = insertelement <16 x i8> %tmp4, i8 %A, i32 4
51
%tmp6 = insertelement <16 x i8> %tmp5, i8 %A, i32 5
52
%tmp7 = insertelement <16 x i8> %tmp6, i8 %A, i32 6
53
%tmp8 = insertelement <16 x i8> %tmp7, i8 %A, i32 7
54
%tmp9 = insertelement <16 x i8> %tmp8, i8 %A, i32 8
55
%tmp10 = insertelement <16 x i8> %tmp9, i8 %A, i32 9
56
%tmp11 = insertelement <16 x i8> %tmp10, i8 %A, i32 10
57
%tmp12 = insertelement <16 x i8> %tmp11, i8 %A, i32 11
58
%tmp13 = insertelement <16 x i8> %tmp12, i8 %A, i32 12
59
%tmp14 = insertelement <16 x i8> %tmp13, i8 %A, i32 13
60
%tmp15 = insertelement <16 x i8> %tmp14, i8 %A, i32 14
61
%tmp16 = insertelement <16 x i8> %tmp15, i8 %A, i32 15
65
define <8 x i16> @v_dupQ16(i16 %A) nounwind {
68
%tmp1 = insertelement <8 x i16> zeroinitializer, i16 %A, i32 0
69
%tmp2 = insertelement <8 x i16> %tmp1, i16 %A, i32 1
70
%tmp3 = insertelement <8 x i16> %tmp2, i16 %A, i32 2
71
%tmp4 = insertelement <8 x i16> %tmp3, i16 %A, i32 3
72
%tmp5 = insertelement <8 x i16> %tmp4, i16 %A, i32 4
73
%tmp6 = insertelement <8 x i16> %tmp5, i16 %A, i32 5
74
%tmp7 = insertelement <8 x i16> %tmp6, i16 %A, i32 6
75
%tmp8 = insertelement <8 x i16> %tmp7, i16 %A, i32 7
79
define <4 x i32> @v_dupQ32(i32 %A) nounwind {
82
%tmp1 = insertelement <4 x i32> zeroinitializer, i32 %A, i32 0
83
%tmp2 = insertelement <4 x i32> %tmp1, i32 %A, i32 1
84
%tmp3 = insertelement <4 x i32> %tmp2, i32 %A, i32 2
85
%tmp4 = insertelement <4 x i32> %tmp3, i32 %A, i32 3
89
define <4 x float> @v_dupQfloat(float %A) nounwind {
92
%tmp1 = insertelement <4 x float> zeroinitializer, float %A, i32 0
93
%tmp2 = insertelement <4 x float> %tmp1, float %A, i32 1
94
%tmp3 = insertelement <4 x float> %tmp2, float %A, i32 2
95
%tmp4 = insertelement <4 x float> %tmp3, float %A, i32 3
99
; Check to make sure it works with shuffles, too.
101
define <8 x i8> @v_shuffledup8(i8 %A) nounwind {
102
;CHECK: v_shuffledup8:
104
%tmp1 = insertelement <8 x i8> undef, i8 %A, i32 0
105
%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> zeroinitializer
109
define <4 x i16> @v_shuffledup16(i16 %A) nounwind {
110
;CHECK: v_shuffledup16:
112
%tmp1 = insertelement <4 x i16> undef, i16 %A, i32 0
113
%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> zeroinitializer
117
define <2 x i32> @v_shuffledup32(i32 %A) nounwind {
118
;CHECK: v_shuffledup32:
120
%tmp1 = insertelement <2 x i32> undef, i32 %A, i32 0
121
%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> zeroinitializer
125
define <2 x float> @v_shuffledupfloat(float %A) nounwind {
126
;CHECK: v_shuffledupfloat:
128
%tmp1 = insertelement <2 x float> undef, float %A, i32 0
129
%tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> zeroinitializer
130
ret <2 x float> %tmp2
133
define <16 x i8> @v_shuffledupQ8(i8 %A) nounwind {
134
;CHECK: v_shuffledupQ8:
136
%tmp1 = insertelement <16 x i8> undef, i8 %A, i32 0
137
%tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> zeroinitializer
141
define <8 x i16> @v_shuffledupQ16(i16 %A) nounwind {
142
;CHECK: v_shuffledupQ16:
144
%tmp1 = insertelement <8 x i16> undef, i16 %A, i32 0
145
%tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> zeroinitializer
149
define <4 x i32> @v_shuffledupQ32(i32 %A) nounwind {
150
;CHECK: v_shuffledupQ32:
152
%tmp1 = insertelement <4 x i32> undef, i32 %A, i32 0
153
%tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <4 x i32> zeroinitializer
157
define <4 x float> @v_shuffledupQfloat(float %A) nounwind {
158
;CHECK: v_shuffledupQfloat:
160
%tmp1 = insertelement <4 x float> undef, float %A, i32 0
161
%tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> zeroinitializer
162
ret <4 x float> %tmp2
165
define <2 x float> @v_shuffledupfloat2(float* %A) nounwind {
166
;CHECK: v_shuffledupfloat2:
168
%tmp0 = load float* %A
169
%tmp1 = insertelement <2 x float> undef, float %tmp0, i32 0
170
%tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> zeroinitializer
171
ret <2 x float> %tmp2
174
define <4 x float> @v_shuffledupQfloat2(float* %A) nounwind {
175
;CHECK: v_shuffledupQfloat2:
177
%tmp0 = load float* %A
178
%tmp1 = insertelement <4 x float> undef, float %tmp0, i32 0
179
%tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> zeroinitializer
180
ret <4 x float> %tmp2
183
define <8 x i8> @vduplane8(<8 x i8>* %A) nounwind {
186
%tmp1 = load <8 x i8>* %A
187
%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
191
define <4 x i16> @vduplane16(<4 x i16>* %A) nounwind {
194
%tmp1 = load <4 x i16>* %A
195
%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
199
define <2 x i32> @vduplane32(<2 x i32>* %A) nounwind {
202
%tmp1 = load <2 x i32>* %A
203
%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> < i32 1, i32 1 >
207
define <2 x float> @vduplanefloat(<2 x float>* %A) nounwind {
208
;CHECK: vduplanefloat:
210
%tmp1 = load <2 x float>* %A
211
%tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> < i32 1, i32 1 >
212
ret <2 x float> %tmp2
215
define <16 x i8> @vduplaneQ8(<8 x i8>* %A) nounwind {
218
%tmp1 = load <8 x i8>* %A
219
%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <16 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
223
define <8 x i16> @vduplaneQ16(<4 x i16>* %A) nounwind {
226
%tmp1 = load <4 x i16>* %A
227
%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <8 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
231
define <4 x i32> @vduplaneQ32(<2 x i32>* %A) nounwind {
234
%tmp1 = load <2 x i32>* %A
235
%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
239
define <4 x float> @vduplaneQfloat(<2 x float>* %A) nounwind {
240
;CHECK: vduplaneQfloat:
242
%tmp1 = load <2 x float>* %A
243
%tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
244
ret <4 x float> %tmp2
247
define arm_apcscc <2 x i64> @foo(<2 x i64> %arg0_int64x1_t) nounwind readnone {
249
%0 = shufflevector <2 x i64> %arg0_int64x1_t, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
253
define arm_apcscc <2 x i64> @bar(<2 x i64> %arg0_int64x1_t) nounwind readnone {
255
%0 = shufflevector <2 x i64> %arg0_int64x1_t, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
259
define arm_apcscc <2 x double> @baz(<2 x double> %arg0_int64x1_t) nounwind readnone {
261
%0 = shufflevector <2 x double> %arg0_int64x1_t, <2 x double> undef, <2 x i32> <i32 1, i32 1>
265
define arm_apcscc <2 x double> @qux(<2 x double> %arg0_int64x1_t) nounwind readnone {
267
%0 = shufflevector <2 x double> %arg0_int64x1_t, <2 x double> undef, <2 x i32> <i32 0, i32 0>