1
; RUN: opt < %s -instcombine | \
2
; RUN: llc -march=ppc32 -mcpu=g5 | not grep vperm
3
; RUN: llc < %s -march=ppc32 -mcpu=g5 > %t
4
; RUN: grep vsldoi %t | count 2
5
; RUN: grep vmrgh %t | count 7
6
; RUN: grep vmrgl %t | count 6
7
; RUN: grep vpkuhum %t | count 1
8
; RUN: grep vpkuwum %t | count 1
11
define void @VSLDOI_xy(<8 x i16>* %A, <8 x i16>* %B) {
13
%tmp = load <8 x i16>* %A ; <<8 x i16>> [#uses=1]
14
%tmp2 = load <8 x i16>* %B ; <<8 x i16>> [#uses=1]
15
%tmp.upgrd.1 = bitcast <8 x i16> %tmp to <16 x i8> ; <<16 x i8>> [#uses=11]
16
%tmp2.upgrd.2 = bitcast <8 x i16> %tmp2 to <16 x i8> ; <<16 x i8>> [#uses=5]
17
%tmp.upgrd.3 = extractelement <16 x i8> %tmp.upgrd.1, i32 5 ; <i8> [#uses=1]
18
%tmp3 = extractelement <16 x i8> %tmp.upgrd.1, i32 6 ; <i8> [#uses=1]
19
%tmp4 = extractelement <16 x i8> %tmp.upgrd.1, i32 7 ; <i8> [#uses=1]
20
%tmp5 = extractelement <16 x i8> %tmp.upgrd.1, i32 8 ; <i8> [#uses=1]
21
%tmp6 = extractelement <16 x i8> %tmp.upgrd.1, i32 9 ; <i8> [#uses=1]
22
%tmp7 = extractelement <16 x i8> %tmp.upgrd.1, i32 10 ; <i8> [#uses=1]
23
%tmp8 = extractelement <16 x i8> %tmp.upgrd.1, i32 11 ; <i8> [#uses=1]
24
%tmp9 = extractelement <16 x i8> %tmp.upgrd.1, i32 12 ; <i8> [#uses=1]
25
%tmp10 = extractelement <16 x i8> %tmp.upgrd.1, i32 13 ; <i8> [#uses=1]
26
%tmp11 = extractelement <16 x i8> %tmp.upgrd.1, i32 14 ; <i8> [#uses=1]
27
%tmp12 = extractelement <16 x i8> %tmp.upgrd.1, i32 15 ; <i8> [#uses=1]
28
%tmp13 = extractelement <16 x i8> %tmp2.upgrd.2, i32 0 ; <i8> [#uses=1]
29
%tmp14 = extractelement <16 x i8> %tmp2.upgrd.2, i32 1 ; <i8> [#uses=1]
30
%tmp15 = extractelement <16 x i8> %tmp2.upgrd.2, i32 2 ; <i8> [#uses=1]
31
%tmp16 = extractelement <16 x i8> %tmp2.upgrd.2, i32 3 ; <i8> [#uses=1]
32
%tmp17 = extractelement <16 x i8> %tmp2.upgrd.2, i32 4 ; <i8> [#uses=1]
33
%tmp18 = insertelement <16 x i8> undef, i8 %tmp.upgrd.3, i32 0 ; <<16 x i8>> [#uses=1]
34
%tmp19 = insertelement <16 x i8> %tmp18, i8 %tmp3, i32 1 ; <<16 x i8>> [#uses=1]
35
%tmp20 = insertelement <16 x i8> %tmp19, i8 %tmp4, i32 2 ; <<16 x i8>> [#uses=1]
36
%tmp21 = insertelement <16 x i8> %tmp20, i8 %tmp5, i32 3 ; <<16 x i8>> [#uses=1]
37
%tmp22 = insertelement <16 x i8> %tmp21, i8 %tmp6, i32 4 ; <<16 x i8>> [#uses=1]
38
%tmp23 = insertelement <16 x i8> %tmp22, i8 %tmp7, i32 5 ; <<16 x i8>> [#uses=1]
39
%tmp24 = insertelement <16 x i8> %tmp23, i8 %tmp8, i32 6 ; <<16 x i8>> [#uses=1]
40
%tmp25 = insertelement <16 x i8> %tmp24, i8 %tmp9, i32 7 ; <<16 x i8>> [#uses=1]
41
%tmp26 = insertelement <16 x i8> %tmp25, i8 %tmp10, i32 8 ; <<16 x i8>> [#uses=1]
42
%tmp27 = insertelement <16 x i8> %tmp26, i8 %tmp11, i32 9 ; <<16 x i8>> [#uses=1]
43
%tmp28 = insertelement <16 x i8> %tmp27, i8 %tmp12, i32 10 ; <<16 x i8>> [#uses=1]
44
%tmp29 = insertelement <16 x i8> %tmp28, i8 %tmp13, i32 11 ; <<16 x i8>> [#uses=1]
45
%tmp30 = insertelement <16 x i8> %tmp29, i8 %tmp14, i32 12 ; <<16 x i8>> [#uses=1]
46
%tmp31 = insertelement <16 x i8> %tmp30, i8 %tmp15, i32 13 ; <<16 x i8>> [#uses=1]
47
%tmp32 = insertelement <16 x i8> %tmp31, i8 %tmp16, i32 14 ; <<16 x i8>> [#uses=1]
48
%tmp33 = insertelement <16 x i8> %tmp32, i8 %tmp17, i32 15 ; <<16 x i8>> [#uses=1]
49
%tmp33.upgrd.4 = bitcast <16 x i8> %tmp33 to <8 x i16> ; <<8 x i16>> [#uses=1]
50
store <8 x i16> %tmp33.upgrd.4, <8 x i16>* %A
54
define void @VSLDOI_xx(<8 x i16>* %A, <8 x i16>* %B) {
55
%tmp = load <8 x i16>* %A ; <<8 x i16>> [#uses=1]
56
%tmp2 = load <8 x i16>* %A ; <<8 x i16>> [#uses=1]
57
%tmp.upgrd.5 = bitcast <8 x i16> %tmp to <16 x i8> ; <<16 x i8>> [#uses=11]
58
%tmp2.upgrd.6 = bitcast <8 x i16> %tmp2 to <16 x i8> ; <<16 x i8>> [#uses=5]
59
%tmp.upgrd.7 = extractelement <16 x i8> %tmp.upgrd.5, i32 5 ; <i8> [#uses=1]
60
%tmp3 = extractelement <16 x i8> %tmp.upgrd.5, i32 6 ; <i8> [#uses=1]
61
%tmp4 = extractelement <16 x i8> %tmp.upgrd.5, i32 7 ; <i8> [#uses=1]
62
%tmp5 = extractelement <16 x i8> %tmp.upgrd.5, i32 8 ; <i8> [#uses=1]
63
%tmp6 = extractelement <16 x i8> %tmp.upgrd.5, i32 9 ; <i8> [#uses=1]
64
%tmp7 = extractelement <16 x i8> %tmp.upgrd.5, i32 10 ; <i8> [#uses=1]
65
%tmp8 = extractelement <16 x i8> %tmp.upgrd.5, i32 11 ; <i8> [#uses=1]
66
%tmp9 = extractelement <16 x i8> %tmp.upgrd.5, i32 12 ; <i8> [#uses=1]
67
%tmp10 = extractelement <16 x i8> %tmp.upgrd.5, i32 13 ; <i8> [#uses=1]
68
%tmp11 = extractelement <16 x i8> %tmp.upgrd.5, i32 14 ; <i8> [#uses=1]
69
%tmp12 = extractelement <16 x i8> %tmp.upgrd.5, i32 15 ; <i8> [#uses=1]
70
%tmp13 = extractelement <16 x i8> %tmp2.upgrd.6, i32 0 ; <i8> [#uses=1]
71
%tmp14 = extractelement <16 x i8> %tmp2.upgrd.6, i32 1 ; <i8> [#uses=1]
72
%tmp15 = extractelement <16 x i8> %tmp2.upgrd.6, i32 2 ; <i8> [#uses=1]
73
%tmp16 = extractelement <16 x i8> %tmp2.upgrd.6, i32 3 ; <i8> [#uses=1]
74
%tmp17 = extractelement <16 x i8> %tmp2.upgrd.6, i32 4 ; <i8> [#uses=1]
75
%tmp18 = insertelement <16 x i8> undef, i8 %tmp.upgrd.7, i32 0 ; <<16 x i8>> [#uses=1]
76
%tmp19 = insertelement <16 x i8> %tmp18, i8 %tmp3, i32 1 ; <<16 x i8>> [#uses=1]
77
%tmp20 = insertelement <16 x i8> %tmp19, i8 %tmp4, i32 2 ; <<16 x i8>> [#uses=1]
78
%tmp21 = insertelement <16 x i8> %tmp20, i8 %tmp5, i32 3 ; <<16 x i8>> [#uses=1]
79
%tmp22 = insertelement <16 x i8> %tmp21, i8 %tmp6, i32 4 ; <<16 x i8>> [#uses=1]
80
%tmp23 = insertelement <16 x i8> %tmp22, i8 %tmp7, i32 5 ; <<16 x i8>> [#uses=1]
81
%tmp24 = insertelement <16 x i8> %tmp23, i8 %tmp8, i32 6 ; <<16 x i8>> [#uses=1]
82
%tmp25 = insertelement <16 x i8> %tmp24, i8 %tmp9, i32 7 ; <<16 x i8>> [#uses=1]
83
%tmp26 = insertelement <16 x i8> %tmp25, i8 %tmp10, i32 8 ; <<16 x i8>> [#uses=1]
84
%tmp27 = insertelement <16 x i8> %tmp26, i8 %tmp11, i32 9 ; <<16 x i8>> [#uses=1]
85
%tmp28 = insertelement <16 x i8> %tmp27, i8 %tmp12, i32 10 ; <<16 x i8>> [#uses=1]
86
%tmp29 = insertelement <16 x i8> %tmp28, i8 %tmp13, i32 11 ; <<16 x i8>> [#uses=1]
87
%tmp30 = insertelement <16 x i8> %tmp29, i8 %tmp14, i32 12 ; <<16 x i8>> [#uses=1]
88
%tmp31 = insertelement <16 x i8> %tmp30, i8 %tmp15, i32 13 ; <<16 x i8>> [#uses=1]
89
%tmp32 = insertelement <16 x i8> %tmp31, i8 %tmp16, i32 14 ; <<16 x i8>> [#uses=1]
90
%tmp33 = insertelement <16 x i8> %tmp32, i8 %tmp17, i32 15 ; <<16 x i8>> [#uses=1]
91
%tmp33.upgrd.8 = bitcast <16 x i8> %tmp33 to <8 x i16> ; <<8 x i16>> [#uses=1]
92
store <8 x i16> %tmp33.upgrd.8, <8 x i16>* %A
96
define void @VPERM_promote(<8 x i16>* %A, <8 x i16>* %B) {
98
%tmp = load <8 x i16>* %A ; <<8 x i16>> [#uses=1]
99
%tmp.upgrd.9 = bitcast <8 x i16> %tmp to <4 x i32> ; <<4 x i32>> [#uses=1]
100
%tmp2 = load <8 x i16>* %B ; <<8 x i16>> [#uses=1]
101
%tmp2.upgrd.10 = bitcast <8 x i16> %tmp2 to <4 x i32> ; <<4 x i32>> [#uses=1]
102
%tmp3 = call <4 x i32> @llvm.ppc.altivec.vperm( <4 x i32> %tmp.upgrd.9, <4 x i32> %tmp2.upgrd.10, <16 x i8> < i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14 > ) ; <<4 x i32>> [#uses=1]
103
%tmp3.upgrd.11 = bitcast <4 x i32> %tmp3 to <8 x i16> ; <<8 x i16>> [#uses=1]
104
store <8 x i16> %tmp3.upgrd.11, <8 x i16>* %A
108
declare <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32>, <4 x i32>, <16 x i8>)
110
define void @tb_l(<16 x i8>* %A, <16 x i8>* %B) {
112
%tmp = load <16 x i8>* %A ; <<16 x i8>> [#uses=8]
113
%tmp2 = load <16 x i8>* %B ; <<16 x i8>> [#uses=8]
114
%tmp.upgrd.12 = extractelement <16 x i8> %tmp, i32 8 ; <i8> [#uses=1]
115
%tmp3 = extractelement <16 x i8> %tmp2, i32 8 ; <i8> [#uses=1]
116
%tmp4 = extractelement <16 x i8> %tmp, i32 9 ; <i8> [#uses=1]
117
%tmp5 = extractelement <16 x i8> %tmp2, i32 9 ; <i8> [#uses=1]
118
%tmp6 = extractelement <16 x i8> %tmp, i32 10 ; <i8> [#uses=1]
119
%tmp7 = extractelement <16 x i8> %tmp2, i32 10 ; <i8> [#uses=1]
120
%tmp8 = extractelement <16 x i8> %tmp, i32 11 ; <i8> [#uses=1]
121
%tmp9 = extractelement <16 x i8> %tmp2, i32 11 ; <i8> [#uses=1]
122
%tmp10 = extractelement <16 x i8> %tmp, i32 12 ; <i8> [#uses=1]
123
%tmp11 = extractelement <16 x i8> %tmp2, i32 12 ; <i8> [#uses=1]
124
%tmp12 = extractelement <16 x i8> %tmp, i32 13 ; <i8> [#uses=1]
125
%tmp13 = extractelement <16 x i8> %tmp2, i32 13 ; <i8> [#uses=1]
126
%tmp14 = extractelement <16 x i8> %tmp, i32 14 ; <i8> [#uses=1]
127
%tmp15 = extractelement <16 x i8> %tmp2, i32 14 ; <i8> [#uses=1]
128
%tmp16 = extractelement <16 x i8> %tmp, i32 15 ; <i8> [#uses=1]
129
%tmp17 = extractelement <16 x i8> %tmp2, i32 15 ; <i8> [#uses=1]
130
%tmp18 = insertelement <16 x i8> undef, i8 %tmp.upgrd.12, i32 0 ; <<16 x i8>> [#uses=1]
131
%tmp19 = insertelement <16 x i8> %tmp18, i8 %tmp3, i32 1 ; <<16 x i8>> [#uses=1]
132
%tmp20 = insertelement <16 x i8> %tmp19, i8 %tmp4, i32 2 ; <<16 x i8>> [#uses=1]
133
%tmp21 = insertelement <16 x i8> %tmp20, i8 %tmp5, i32 3 ; <<16 x i8>> [#uses=1]
134
%tmp22 = insertelement <16 x i8> %tmp21, i8 %tmp6, i32 4 ; <<16 x i8>> [#uses=1]
135
%tmp23 = insertelement <16 x i8> %tmp22, i8 %tmp7, i32 5 ; <<16 x i8>> [#uses=1]
136
%tmp24 = insertelement <16 x i8> %tmp23, i8 %tmp8, i32 6 ; <<16 x i8>> [#uses=1]
137
%tmp25 = insertelement <16 x i8> %tmp24, i8 %tmp9, i32 7 ; <<16 x i8>> [#uses=1]
138
%tmp26 = insertelement <16 x i8> %tmp25, i8 %tmp10, i32 8 ; <<16 x i8>> [#uses=1]
139
%tmp27 = insertelement <16 x i8> %tmp26, i8 %tmp11, i32 9 ; <<16 x i8>> [#uses=1]
140
%tmp28 = insertelement <16 x i8> %tmp27, i8 %tmp12, i32 10 ; <<16 x i8>> [#uses=1]
141
%tmp29 = insertelement <16 x i8> %tmp28, i8 %tmp13, i32 11 ; <<16 x i8>> [#uses=1]
142
%tmp30 = insertelement <16 x i8> %tmp29, i8 %tmp14, i32 12 ; <<16 x i8>> [#uses=1]
143
%tmp31 = insertelement <16 x i8> %tmp30, i8 %tmp15, i32 13 ; <<16 x i8>> [#uses=1]
144
%tmp32 = insertelement <16 x i8> %tmp31, i8 %tmp16, i32 14 ; <<16 x i8>> [#uses=1]
145
%tmp33 = insertelement <16 x i8> %tmp32, i8 %tmp17, i32 15 ; <<16 x i8>> [#uses=1]
146
store <16 x i8> %tmp33, <16 x i8>* %A
150
define void @th_l(<8 x i16>* %A, <8 x i16>* %B) {
152
%tmp = load <8 x i16>* %A ; <<8 x i16>> [#uses=4]
153
%tmp2 = load <8 x i16>* %B ; <<8 x i16>> [#uses=4]
154
%tmp.upgrd.13 = extractelement <8 x i16> %tmp, i32 4 ; <i16> [#uses=1]
155
%tmp3 = extractelement <8 x i16> %tmp2, i32 4 ; <i16> [#uses=1]
156
%tmp4 = extractelement <8 x i16> %tmp, i32 5 ; <i16> [#uses=1]
157
%tmp5 = extractelement <8 x i16> %tmp2, i32 5 ; <i16> [#uses=1]
158
%tmp6 = extractelement <8 x i16> %tmp, i32 6 ; <i16> [#uses=1]
159
%tmp7 = extractelement <8 x i16> %tmp2, i32 6 ; <i16> [#uses=1]
160
%tmp8 = extractelement <8 x i16> %tmp, i32 7 ; <i16> [#uses=1]
161
%tmp9 = extractelement <8 x i16> %tmp2, i32 7 ; <i16> [#uses=1]
162
%tmp10 = insertelement <8 x i16> undef, i16 %tmp.upgrd.13, i32 0 ; <<8 x i16>> [#uses=1]
163
%tmp11 = insertelement <8 x i16> %tmp10, i16 %tmp3, i32 1 ; <<8 x i16>> [#uses=1]
164
%tmp12 = insertelement <8 x i16> %tmp11, i16 %tmp4, i32 2 ; <<8 x i16>> [#uses=1]
165
%tmp13 = insertelement <8 x i16> %tmp12, i16 %tmp5, i32 3 ; <<8 x i16>> [#uses=1]
166
%tmp14 = insertelement <8 x i16> %tmp13, i16 %tmp6, i32 4 ; <<8 x i16>> [#uses=1]
167
%tmp15 = insertelement <8 x i16> %tmp14, i16 %tmp7, i32 5 ; <<8 x i16>> [#uses=1]
168
%tmp16 = insertelement <8 x i16> %tmp15, i16 %tmp8, i32 6 ; <<8 x i16>> [#uses=1]
169
%tmp17 = insertelement <8 x i16> %tmp16, i16 %tmp9, i32 7 ; <<8 x i16>> [#uses=1]
170
store <8 x i16> %tmp17, <8 x i16>* %A
174
define void @tw_l(<4 x i32>* %A, <4 x i32>* %B) {
176
%tmp = load <4 x i32>* %A ; <<4 x i32>> [#uses=2]
177
%tmp2 = load <4 x i32>* %B ; <<4 x i32>> [#uses=2]
178
%tmp.upgrd.14 = extractelement <4 x i32> %tmp, i32 2 ; <i32> [#uses=1]
179
%tmp3 = extractelement <4 x i32> %tmp2, i32 2 ; <i32> [#uses=1]
180
%tmp4 = extractelement <4 x i32> %tmp, i32 3 ; <i32> [#uses=1]
181
%tmp5 = extractelement <4 x i32> %tmp2, i32 3 ; <i32> [#uses=1]
182
%tmp6 = insertelement <4 x i32> undef, i32 %tmp.upgrd.14, i32 0 ; <<4 x i32>> [#uses=1]
183
%tmp7 = insertelement <4 x i32> %tmp6, i32 %tmp3, i32 1 ; <<4 x i32>> [#uses=1]
184
%tmp8 = insertelement <4 x i32> %tmp7, i32 %tmp4, i32 2 ; <<4 x i32>> [#uses=1]
185
%tmp9 = insertelement <4 x i32> %tmp8, i32 %tmp5, i32 3 ; <<4 x i32>> [#uses=1]
186
store <4 x i32> %tmp9, <4 x i32>* %A
190
define void @tb_h(<16 x i8>* %A, <16 x i8>* %B) {
192
%tmp = load <16 x i8>* %A ; <<16 x i8>> [#uses=8]
193
%tmp2 = load <16 x i8>* %B ; <<16 x i8>> [#uses=8]
194
%tmp.upgrd.15 = extractelement <16 x i8> %tmp, i32 0 ; <i8> [#uses=1]
195
%tmp3 = extractelement <16 x i8> %tmp2, i32 0 ; <i8> [#uses=1]
196
%tmp4 = extractelement <16 x i8> %tmp, i32 1 ; <i8> [#uses=1]
197
%tmp5 = extractelement <16 x i8> %tmp2, i32 1 ; <i8> [#uses=1]
198
%tmp6 = extractelement <16 x i8> %tmp, i32 2 ; <i8> [#uses=1]
199
%tmp7 = extractelement <16 x i8> %tmp2, i32 2 ; <i8> [#uses=1]
200
%tmp8 = extractelement <16 x i8> %tmp, i32 3 ; <i8> [#uses=1]
201
%tmp9 = extractelement <16 x i8> %tmp2, i32 3 ; <i8> [#uses=1]
202
%tmp10 = extractelement <16 x i8> %tmp, i32 4 ; <i8> [#uses=1]
203
%tmp11 = extractelement <16 x i8> %tmp2, i32 4 ; <i8> [#uses=1]
204
%tmp12 = extractelement <16 x i8> %tmp, i32 5 ; <i8> [#uses=1]
205
%tmp13 = extractelement <16 x i8> %tmp2, i32 5 ; <i8> [#uses=1]
206
%tmp14 = extractelement <16 x i8> %tmp, i32 6 ; <i8> [#uses=1]
207
%tmp15 = extractelement <16 x i8> %tmp2, i32 6 ; <i8> [#uses=1]
208
%tmp16 = extractelement <16 x i8> %tmp, i32 7 ; <i8> [#uses=1]
209
%tmp17 = extractelement <16 x i8> %tmp2, i32 7 ; <i8> [#uses=1]
210
%tmp18 = insertelement <16 x i8> undef, i8 %tmp.upgrd.15, i32 0 ; <<16 x i8>> [#uses=1]
211
%tmp19 = insertelement <16 x i8> %tmp18, i8 %tmp3, i32 1 ; <<16 x i8>> [#uses=1]
212
%tmp20 = insertelement <16 x i8> %tmp19, i8 %tmp4, i32 2 ; <<16 x i8>> [#uses=1]
213
%tmp21 = insertelement <16 x i8> %tmp20, i8 %tmp5, i32 3 ; <<16 x i8>> [#uses=1]
214
%tmp22 = insertelement <16 x i8> %tmp21, i8 %tmp6, i32 4 ; <<16 x i8>> [#uses=1]
215
%tmp23 = insertelement <16 x i8> %tmp22, i8 %tmp7, i32 5 ; <<16 x i8>> [#uses=1]
216
%tmp24 = insertelement <16 x i8> %tmp23, i8 %tmp8, i32 6 ; <<16 x i8>> [#uses=1]
217
%tmp25 = insertelement <16 x i8> %tmp24, i8 %tmp9, i32 7 ; <<16 x i8>> [#uses=1]
218
%tmp26 = insertelement <16 x i8> %tmp25, i8 %tmp10, i32 8 ; <<16 x i8>> [#uses=1]
219
%tmp27 = insertelement <16 x i8> %tmp26, i8 %tmp11, i32 9 ; <<16 x i8>> [#uses=1]
220
%tmp28 = insertelement <16 x i8> %tmp27, i8 %tmp12, i32 10 ; <<16 x i8>> [#uses=1]
221
%tmp29 = insertelement <16 x i8> %tmp28, i8 %tmp13, i32 11 ; <<16 x i8>> [#uses=1]
222
%tmp30 = insertelement <16 x i8> %tmp29, i8 %tmp14, i32 12 ; <<16 x i8>> [#uses=1]
223
%tmp31 = insertelement <16 x i8> %tmp30, i8 %tmp15, i32 13 ; <<16 x i8>> [#uses=1]
224
%tmp32 = insertelement <16 x i8> %tmp31, i8 %tmp16, i32 14 ; <<16 x i8>> [#uses=1]
225
%tmp33 = insertelement <16 x i8> %tmp32, i8 %tmp17, i32 15 ; <<16 x i8>> [#uses=1]
226
store <16 x i8> %tmp33, <16 x i8>* %A
230
define void @th_h(<8 x i16>* %A, <8 x i16>* %B) {
232
%tmp = load <8 x i16>* %A ; <<8 x i16>> [#uses=4]
233
%tmp2 = load <8 x i16>* %B ; <<8 x i16>> [#uses=4]
234
%tmp.upgrd.16 = extractelement <8 x i16> %tmp, i32 0 ; <i16> [#uses=1]
235
%tmp3 = extractelement <8 x i16> %tmp2, i32 0 ; <i16> [#uses=1]
236
%tmp4 = extractelement <8 x i16> %tmp, i32 1 ; <i16> [#uses=1]
237
%tmp5 = extractelement <8 x i16> %tmp2, i32 1 ; <i16> [#uses=1]
238
%tmp6 = extractelement <8 x i16> %tmp, i32 2 ; <i16> [#uses=1]
239
%tmp7 = extractelement <8 x i16> %tmp2, i32 2 ; <i16> [#uses=1]
240
%tmp8 = extractelement <8 x i16> %tmp, i32 3 ; <i16> [#uses=1]
241
%tmp9 = extractelement <8 x i16> %tmp2, i32 3 ; <i16> [#uses=1]
242
%tmp10 = insertelement <8 x i16> undef, i16 %tmp.upgrd.16, i32 0 ; <<8 x i16>> [#uses=1]
243
%tmp11 = insertelement <8 x i16> %tmp10, i16 %tmp3, i32 1 ; <<8 x i16>> [#uses=1]
244
%tmp12 = insertelement <8 x i16> %tmp11, i16 %tmp4, i32 2 ; <<8 x i16>> [#uses=1]
245
%tmp13 = insertelement <8 x i16> %tmp12, i16 %tmp5, i32 3 ; <<8 x i16>> [#uses=1]
246
%tmp14 = insertelement <8 x i16> %tmp13, i16 %tmp6, i32 4 ; <<8 x i16>> [#uses=1]
247
%tmp15 = insertelement <8 x i16> %tmp14, i16 %tmp7, i32 5 ; <<8 x i16>> [#uses=1]
248
%tmp16 = insertelement <8 x i16> %tmp15, i16 %tmp8, i32 6 ; <<8 x i16>> [#uses=1]
249
%tmp17 = insertelement <8 x i16> %tmp16, i16 %tmp9, i32 7 ; <<8 x i16>> [#uses=1]
250
store <8 x i16> %tmp17, <8 x i16>* %A
254
define void @tw_h(<4 x i32>* %A, <4 x i32>* %B) {
256
%tmp = load <4 x i32>* %A ; <<4 x i32>> [#uses=2]
257
%tmp2 = load <4 x i32>* %B ; <<4 x i32>> [#uses=2]
258
%tmp.upgrd.17 = extractelement <4 x i32> %tmp2, i32 0 ; <i32> [#uses=1]
259
%tmp3 = extractelement <4 x i32> %tmp, i32 0 ; <i32> [#uses=1]
260
%tmp4 = extractelement <4 x i32> %tmp2, i32 1 ; <i32> [#uses=1]
261
%tmp5 = extractelement <4 x i32> %tmp, i32 1 ; <i32> [#uses=1]
262
%tmp6 = insertelement <4 x i32> undef, i32 %tmp.upgrd.17, i32 0 ; <<4 x i32>> [#uses=1]
263
%tmp7 = insertelement <4 x i32> %tmp6, i32 %tmp3, i32 1 ; <<4 x i32>> [#uses=1]
264
%tmp8 = insertelement <4 x i32> %tmp7, i32 %tmp4, i32 2 ; <<4 x i32>> [#uses=1]
265
%tmp9 = insertelement <4 x i32> %tmp8, i32 %tmp5, i32 3 ; <<4 x i32>> [#uses=1]
266
store <4 x i32> %tmp9, <4 x i32>* %A
270
define void @tw_h_flop(<4 x i32>* %A, <4 x i32>* %B) {
271
%tmp = load <4 x i32>* %A ; <<4 x i32>> [#uses=2]
272
%tmp2 = load <4 x i32>* %B ; <<4 x i32>> [#uses=2]
273
%tmp.upgrd.18 = extractelement <4 x i32> %tmp, i32 0 ; <i32> [#uses=1]
274
%tmp3 = extractelement <4 x i32> %tmp2, i32 0 ; <i32> [#uses=1]
275
%tmp4 = extractelement <4 x i32> %tmp, i32 1 ; <i32> [#uses=1]
276
%tmp5 = extractelement <4 x i32> %tmp2, i32 1 ; <i32> [#uses=1]
277
%tmp6 = insertelement <4 x i32> undef, i32 %tmp.upgrd.18, i32 0 ; <<4 x i32>> [#uses=1]
278
%tmp7 = insertelement <4 x i32> %tmp6, i32 %tmp3, i32 1 ; <<4 x i32>> [#uses=1]
279
%tmp8 = insertelement <4 x i32> %tmp7, i32 %tmp4, i32 2 ; <<4 x i32>> [#uses=1]
280
%tmp9 = insertelement <4 x i32> %tmp8, i32 %tmp5, i32 3 ; <<4 x i32>> [#uses=1]
281
store <4 x i32> %tmp9, <4 x i32>* %A
285
define void @VMRG_UNARY_tb_l(<16 x i8>* %A, <16 x i8>* %B) {
287
%tmp = load <16 x i8>* %A ; <<16 x i8>> [#uses=16]
288
%tmp.upgrd.19 = extractelement <16 x i8> %tmp, i32 8 ; <i8> [#uses=1]
289
%tmp3 = extractelement <16 x i8> %tmp, i32 8 ; <i8> [#uses=1]
290
%tmp4 = extractelement <16 x i8> %tmp, i32 9 ; <i8> [#uses=1]
291
%tmp5 = extractelement <16 x i8> %tmp, i32 9 ; <i8> [#uses=1]
292
%tmp6 = extractelement <16 x i8> %tmp, i32 10 ; <i8> [#uses=1]
293
%tmp7 = extractelement <16 x i8> %tmp, i32 10 ; <i8> [#uses=1]
294
%tmp8 = extractelement <16 x i8> %tmp, i32 11 ; <i8> [#uses=1]
295
%tmp9 = extractelement <16 x i8> %tmp, i32 11 ; <i8> [#uses=1]
296
%tmp10 = extractelement <16 x i8> %tmp, i32 12 ; <i8> [#uses=1]
297
%tmp11 = extractelement <16 x i8> %tmp, i32 12 ; <i8> [#uses=1]
298
%tmp12 = extractelement <16 x i8> %tmp, i32 13 ; <i8> [#uses=1]
299
%tmp13 = extractelement <16 x i8> %tmp, i32 13 ; <i8> [#uses=1]
300
%tmp14 = extractelement <16 x i8> %tmp, i32 14 ; <i8> [#uses=1]
301
%tmp15 = extractelement <16 x i8> %tmp, i32 14 ; <i8> [#uses=1]
302
%tmp16 = extractelement <16 x i8> %tmp, i32 15 ; <i8> [#uses=1]
303
%tmp17 = extractelement <16 x i8> %tmp, i32 15 ; <i8> [#uses=1]
304
%tmp18 = insertelement <16 x i8> undef, i8 %tmp.upgrd.19, i32 0 ; <<16 x i8>> [#uses=1]
305
%tmp19 = insertelement <16 x i8> %tmp18, i8 %tmp3, i32 1 ; <<16 x i8>> [#uses=1]
306
%tmp20 = insertelement <16 x i8> %tmp19, i8 %tmp4, i32 2 ; <<16 x i8>> [#uses=1]
307
%tmp21 = insertelement <16 x i8> %tmp20, i8 %tmp5, i32 3 ; <<16 x i8>> [#uses=1]
308
%tmp22 = insertelement <16 x i8> %tmp21, i8 %tmp6, i32 4 ; <<16 x i8>> [#uses=1]
309
%tmp23 = insertelement <16 x i8> %tmp22, i8 %tmp7, i32 5 ; <<16 x i8>> [#uses=1]
310
%tmp24 = insertelement <16 x i8> %tmp23, i8 %tmp8, i32 6 ; <<16 x i8>> [#uses=1]
311
%tmp25 = insertelement <16 x i8> %tmp24, i8 %tmp9, i32 7 ; <<16 x i8>> [#uses=1]
312
%tmp26 = insertelement <16 x i8> %tmp25, i8 %tmp10, i32 8 ; <<16 x i8>> [#uses=1]
313
%tmp27 = insertelement <16 x i8> %tmp26, i8 %tmp11, i32 9 ; <<16 x i8>> [#uses=1]
314
%tmp28 = insertelement <16 x i8> %tmp27, i8 %tmp12, i32 10 ; <<16 x i8>> [#uses=1]
315
%tmp29 = insertelement <16 x i8> %tmp28, i8 %tmp13, i32 11 ; <<16 x i8>> [#uses=1]
316
%tmp30 = insertelement <16 x i8> %tmp29, i8 %tmp14, i32 12 ; <<16 x i8>> [#uses=1]
317
%tmp31 = insertelement <16 x i8> %tmp30, i8 %tmp15, i32 13 ; <<16 x i8>> [#uses=1]
318
%tmp32 = insertelement <16 x i8> %tmp31, i8 %tmp16, i32 14 ; <<16 x i8>> [#uses=1]
319
%tmp33 = insertelement <16 x i8> %tmp32, i8 %tmp17, i32 15 ; <<16 x i8>> [#uses=1]
320
store <16 x i8> %tmp33, <16 x i8>* %A
324
define void @VMRG_UNARY_th_l(<8 x i16>* %A, <8 x i16>* %B) {
326
%tmp = load <8 x i16>* %A ; <<8 x i16>> [#uses=8]
327
%tmp.upgrd.20 = extractelement <8 x i16> %tmp, i32 4 ; <i16> [#uses=1]
328
%tmp3 = extractelement <8 x i16> %tmp, i32 4 ; <i16> [#uses=1]
329
%tmp4 = extractelement <8 x i16> %tmp, i32 5 ; <i16> [#uses=1]
330
%tmp5 = extractelement <8 x i16> %tmp, i32 5 ; <i16> [#uses=1]
331
%tmp6 = extractelement <8 x i16> %tmp, i32 6 ; <i16> [#uses=1]
332
%tmp7 = extractelement <8 x i16> %tmp, i32 6 ; <i16> [#uses=1]
333
%tmp8 = extractelement <8 x i16> %tmp, i32 7 ; <i16> [#uses=1]
334
%tmp9 = extractelement <8 x i16> %tmp, i32 7 ; <i16> [#uses=1]
335
%tmp10 = insertelement <8 x i16> undef, i16 %tmp.upgrd.20, i32 0 ; <<8 x i16>> [#uses=1]
336
%tmp11 = insertelement <8 x i16> %tmp10, i16 %tmp3, i32 1 ; <<8 x i16>> [#uses=1]
337
%tmp12 = insertelement <8 x i16> %tmp11, i16 %tmp4, i32 2 ; <<8 x i16>> [#uses=1]
338
%tmp13 = insertelement <8 x i16> %tmp12, i16 %tmp5, i32 3 ; <<8 x i16>> [#uses=1]
339
%tmp14 = insertelement <8 x i16> %tmp13, i16 %tmp6, i32 4 ; <<8 x i16>> [#uses=1]
340
%tmp15 = insertelement <8 x i16> %tmp14, i16 %tmp7, i32 5 ; <<8 x i16>> [#uses=1]
341
%tmp16 = insertelement <8 x i16> %tmp15, i16 %tmp8, i32 6 ; <<8 x i16>> [#uses=1]
342
%tmp17 = insertelement <8 x i16> %tmp16, i16 %tmp9, i32 7 ; <<8 x i16>> [#uses=1]
343
store <8 x i16> %tmp17, <8 x i16>* %A
347
define void @VMRG_UNARY_tw_l(<4 x i32>* %A, <4 x i32>* %B) {
349
%tmp = load <4 x i32>* %A ; <<4 x i32>> [#uses=4]
350
%tmp.upgrd.21 = extractelement <4 x i32> %tmp, i32 2 ; <i32> [#uses=1]
351
%tmp3 = extractelement <4 x i32> %tmp, i32 2 ; <i32> [#uses=1]
352
%tmp4 = extractelement <4 x i32> %tmp, i32 3 ; <i32> [#uses=1]
353
%tmp5 = extractelement <4 x i32> %tmp, i32 3 ; <i32> [#uses=1]
354
%tmp6 = insertelement <4 x i32> undef, i32 %tmp.upgrd.21, i32 0 ; <<4 x i32>> [#uses=1]
355
%tmp7 = insertelement <4 x i32> %tmp6, i32 %tmp3, i32 1 ; <<4 x i32>> [#uses=1]
356
%tmp8 = insertelement <4 x i32> %tmp7, i32 %tmp4, i32 2 ; <<4 x i32>> [#uses=1]
357
%tmp9 = insertelement <4 x i32> %tmp8, i32 %tmp5, i32 3 ; <<4 x i32>> [#uses=1]
358
store <4 x i32> %tmp9, <4 x i32>* %A
362
define void @VMRG_UNARY_tb_h(<16 x i8>* %A, <16 x i8>* %B) {
364
%tmp = load <16 x i8>* %A ; <<16 x i8>> [#uses=16]
365
%tmp.upgrd.22 = extractelement <16 x i8> %tmp, i32 0 ; <i8> [#uses=1]
366
%tmp3 = extractelement <16 x i8> %tmp, i32 0 ; <i8> [#uses=1]
367
%tmp4 = extractelement <16 x i8> %tmp, i32 1 ; <i8> [#uses=1]
368
%tmp5 = extractelement <16 x i8> %tmp, i32 1 ; <i8> [#uses=1]
369
%tmp6 = extractelement <16 x i8> %tmp, i32 2 ; <i8> [#uses=1]
370
%tmp7 = extractelement <16 x i8> %tmp, i32 2 ; <i8> [#uses=1]
371
%tmp8 = extractelement <16 x i8> %tmp, i32 3 ; <i8> [#uses=1]
372
%tmp9 = extractelement <16 x i8> %tmp, i32 3 ; <i8> [#uses=1]
373
%tmp10 = extractelement <16 x i8> %tmp, i32 4 ; <i8> [#uses=1]
374
%tmp11 = extractelement <16 x i8> %tmp, i32 4 ; <i8> [#uses=1]
375
%tmp12 = extractelement <16 x i8> %tmp, i32 5 ; <i8> [#uses=1]
376
%tmp13 = extractelement <16 x i8> %tmp, i32 5 ; <i8> [#uses=1]
377
%tmp14 = extractelement <16 x i8> %tmp, i32 6 ; <i8> [#uses=1]
378
%tmp15 = extractelement <16 x i8> %tmp, i32 6 ; <i8> [#uses=1]
379
%tmp16 = extractelement <16 x i8> %tmp, i32 7 ; <i8> [#uses=1]
380
%tmp17 = extractelement <16 x i8> %tmp, i32 7 ; <i8> [#uses=1]
381
%tmp18 = insertelement <16 x i8> undef, i8 %tmp.upgrd.22, i32 0 ; <<16 x i8>> [#uses=1]
382
%tmp19 = insertelement <16 x i8> %tmp18, i8 %tmp3, i32 1 ; <<16 x i8>> [#uses=1]
383
%tmp20 = insertelement <16 x i8> %tmp19, i8 %tmp4, i32 2 ; <<16 x i8>> [#uses=1]
384
%tmp21 = insertelement <16 x i8> %tmp20, i8 %tmp5, i32 3 ; <<16 x i8>> [#uses=1]
385
%tmp22 = insertelement <16 x i8> %tmp21, i8 %tmp6, i32 4 ; <<16 x i8>> [#uses=1]
386
%tmp23 = insertelement <16 x i8> %tmp22, i8 %tmp7, i32 5 ; <<16 x i8>> [#uses=1]
387
%tmp24 = insertelement <16 x i8> %tmp23, i8 %tmp8, i32 6 ; <<16 x i8>> [#uses=1]
388
%tmp25 = insertelement <16 x i8> %tmp24, i8 %tmp9, i32 7 ; <<16 x i8>> [#uses=1]
389
%tmp26 = insertelement <16 x i8> %tmp25, i8 %tmp10, i32 8 ; <<16 x i8>> [#uses=1]
390
%tmp27 = insertelement <16 x i8> %tmp26, i8 %tmp11, i32 9 ; <<16 x i8>> [#uses=1]
391
%tmp28 = insertelement <16 x i8> %tmp27, i8 %tmp12, i32 10 ; <<16 x i8>> [#uses=1]
392
%tmp29 = insertelement <16 x i8> %tmp28, i8 %tmp13, i32 11 ; <<16 x i8>> [#uses=1]
393
%tmp30 = insertelement <16 x i8> %tmp29, i8 %tmp14, i32 12 ; <<16 x i8>> [#uses=1]
394
%tmp31 = insertelement <16 x i8> %tmp30, i8 %tmp15, i32 13 ; <<16 x i8>> [#uses=1]
395
%tmp32 = insertelement <16 x i8> %tmp31, i8 %tmp16, i32 14 ; <<16 x i8>> [#uses=1]
396
%tmp33 = insertelement <16 x i8> %tmp32, i8 %tmp17, i32 15 ; <<16 x i8>> [#uses=1]
397
store <16 x i8> %tmp33, <16 x i8>* %A
401
define void @VMRG_UNARY_th_h(<8 x i16>* %A, <8 x i16>* %B) {
403
%tmp = load <8 x i16>* %A ; <<8 x i16>> [#uses=8]
404
%tmp.upgrd.23 = extractelement <8 x i16> %tmp, i32 0 ; <i16> [#uses=1]
405
%tmp3 = extractelement <8 x i16> %tmp, i32 0 ; <i16> [#uses=1]
406
%tmp4 = extractelement <8 x i16> %tmp, i32 1 ; <i16> [#uses=1]
407
%tmp5 = extractelement <8 x i16> %tmp, i32 1 ; <i16> [#uses=1]
408
%tmp6 = extractelement <8 x i16> %tmp, i32 2 ; <i16> [#uses=1]
409
%tmp7 = extractelement <8 x i16> %tmp, i32 2 ; <i16> [#uses=1]
410
%tmp8 = extractelement <8 x i16> %tmp, i32 3 ; <i16> [#uses=1]
411
%tmp9 = extractelement <8 x i16> %tmp, i32 3 ; <i16> [#uses=1]
412
%tmp10 = insertelement <8 x i16> undef, i16 %tmp.upgrd.23, i32 0 ; <<8 x i16>> [#uses=1]
413
%tmp11 = insertelement <8 x i16> %tmp10, i16 %tmp3, i32 1 ; <<8 x i16>> [#uses=1]
414
%tmp12 = insertelement <8 x i16> %tmp11, i16 %tmp4, i32 2 ; <<8 x i16>> [#uses=1]
415
%tmp13 = insertelement <8 x i16> %tmp12, i16 %tmp5, i32 3 ; <<8 x i16>> [#uses=1]
416
%tmp14 = insertelement <8 x i16> %tmp13, i16 %tmp6, i32 4 ; <<8 x i16>> [#uses=1]
417
%tmp15 = insertelement <8 x i16> %tmp14, i16 %tmp7, i32 5 ; <<8 x i16>> [#uses=1]
418
%tmp16 = insertelement <8 x i16> %tmp15, i16 %tmp8, i32 6 ; <<8 x i16>> [#uses=1]
419
%tmp17 = insertelement <8 x i16> %tmp16, i16 %tmp9, i32 7 ; <<8 x i16>> [#uses=1]
420
store <8 x i16> %tmp17, <8 x i16>* %A
424
define void @VMRG_UNARY_tw_h(<4 x i32>* %A, <4 x i32>* %B) {
426
%tmp = load <4 x i32>* %A ; <<4 x i32>> [#uses=4]
427
%tmp.upgrd.24 = extractelement <4 x i32> %tmp, i32 0 ; <i32> [#uses=1]
428
%tmp3 = extractelement <4 x i32> %tmp, i32 0 ; <i32> [#uses=1]
429
%tmp4 = extractelement <4 x i32> %tmp, i32 1 ; <i32> [#uses=1]
430
%tmp5 = extractelement <4 x i32> %tmp, i32 1 ; <i32> [#uses=1]
431
%tmp6 = insertelement <4 x i32> undef, i32 %tmp.upgrd.24, i32 0 ; <<4 x i32>> [#uses=1]
432
%tmp7 = insertelement <4 x i32> %tmp6, i32 %tmp3, i32 1 ; <<4 x i32>> [#uses=1]
433
%tmp8 = insertelement <4 x i32> %tmp7, i32 %tmp4, i32 2 ; <<4 x i32>> [#uses=1]
434
%tmp9 = insertelement <4 x i32> %tmp8, i32 %tmp5, i32 3 ; <<4 x i32>> [#uses=1]
435
store <4 x i32> %tmp9, <4 x i32>* %A
439
define void @VPCKUHUM_unary(<8 x i16>* %A, <8 x i16>* %B) {
441
%tmp = load <8 x i16>* %A ; <<8 x i16>> [#uses=2]
442
%tmp.upgrd.25 = bitcast <8 x i16> %tmp to <16 x i8> ; <<16 x i8>> [#uses=8]
443
%tmp3 = bitcast <8 x i16> %tmp to <16 x i8> ; <<16 x i8>> [#uses=8]
444
%tmp.upgrd.26 = extractelement <16 x i8> %tmp.upgrd.25, i32 1 ; <i8> [#uses=1]
445
%tmp4 = extractelement <16 x i8> %tmp.upgrd.25, i32 3 ; <i8> [#uses=1]
446
%tmp5 = extractelement <16 x i8> %tmp.upgrd.25, i32 5 ; <i8> [#uses=1]
447
%tmp6 = extractelement <16 x i8> %tmp.upgrd.25, i32 7 ; <i8> [#uses=1]
448
%tmp7 = extractelement <16 x i8> %tmp.upgrd.25, i32 9 ; <i8> [#uses=1]
449
%tmp8 = extractelement <16 x i8> %tmp.upgrd.25, i32 11 ; <i8> [#uses=1]
450
%tmp9 = extractelement <16 x i8> %tmp.upgrd.25, i32 13 ; <i8> [#uses=1]
451
%tmp10 = extractelement <16 x i8> %tmp.upgrd.25, i32 15 ; <i8> [#uses=1]
452
%tmp11 = extractelement <16 x i8> %tmp3, i32 1 ; <i8> [#uses=1]
453
%tmp12 = extractelement <16 x i8> %tmp3, i32 3 ; <i8> [#uses=1]
454
%tmp13 = extractelement <16 x i8> %tmp3, i32 5 ; <i8> [#uses=1]
455
%tmp14 = extractelement <16 x i8> %tmp3, i32 7 ; <i8> [#uses=1]
456
%tmp15 = extractelement <16 x i8> %tmp3, i32 9 ; <i8> [#uses=1]
457
%tmp16 = extractelement <16 x i8> %tmp3, i32 11 ; <i8> [#uses=1]
458
%tmp17 = extractelement <16 x i8> %tmp3, i32 13 ; <i8> [#uses=1]
459
%tmp18 = extractelement <16 x i8> %tmp3, i32 15 ; <i8> [#uses=1]
460
%tmp19 = insertelement <16 x i8> undef, i8 %tmp.upgrd.26, i32 0 ; <<16 x i8>> [#uses=1]
461
%tmp20 = insertelement <16 x i8> %tmp19, i8 %tmp4, i32 1 ; <<16 x i8>> [#uses=1]
462
%tmp21 = insertelement <16 x i8> %tmp20, i8 %tmp5, i32 2 ; <<16 x i8>> [#uses=1]
463
%tmp22 = insertelement <16 x i8> %tmp21, i8 %tmp6, i32 3 ; <<16 x i8>> [#uses=1]
464
%tmp23 = insertelement <16 x i8> %tmp22, i8 %tmp7, i32 4 ; <<16 x i8>> [#uses=1]
465
%tmp24 = insertelement <16 x i8> %tmp23, i8 %tmp8, i32 5 ; <<16 x i8>> [#uses=1]
466
%tmp25 = insertelement <16 x i8> %tmp24, i8 %tmp9, i32 6 ; <<16 x i8>> [#uses=1]
467
%tmp26 = insertelement <16 x i8> %tmp25, i8 %tmp10, i32 7 ; <<16 x i8>> [#uses=1]
468
%tmp27 = insertelement <16 x i8> %tmp26, i8 %tmp11, i32 8 ; <<16 x i8>> [#uses=1]
469
%tmp28 = insertelement <16 x i8> %tmp27, i8 %tmp12, i32 9 ; <<16 x i8>> [#uses=1]
470
%tmp29 = insertelement <16 x i8> %tmp28, i8 %tmp13, i32 10 ; <<16 x i8>> [#uses=1]
471
%tmp30 = insertelement <16 x i8> %tmp29, i8 %tmp14, i32 11 ; <<16 x i8>> [#uses=1]
472
%tmp31 = insertelement <16 x i8> %tmp30, i8 %tmp15, i32 12 ; <<16 x i8>> [#uses=1]
473
%tmp32 = insertelement <16 x i8> %tmp31, i8 %tmp16, i32 13 ; <<16 x i8>> [#uses=1]
474
%tmp33 = insertelement <16 x i8> %tmp32, i8 %tmp17, i32 14 ; <<16 x i8>> [#uses=1]
475
%tmp34 = insertelement <16 x i8> %tmp33, i8 %tmp18, i32 15 ; <<16 x i8>> [#uses=1]
476
%tmp34.upgrd.27 = bitcast <16 x i8> %tmp34 to <8 x i16> ; <<8 x i16>> [#uses=1]
477
store <8 x i16> %tmp34.upgrd.27, <8 x i16>* %A
481
define void @VPCKUWUM_unary(<4 x i32>* %A, <4 x i32>* %B) {
483
%tmp = load <4 x i32>* %A ; <<4 x i32>> [#uses=2]
484
%tmp.upgrd.28 = bitcast <4 x i32> %tmp to <8 x i16> ; <<8 x i16>> [#uses=4]
485
%tmp3 = bitcast <4 x i32> %tmp to <8 x i16> ; <<8 x i16>> [#uses=4]
486
%tmp.upgrd.29 = extractelement <8 x i16> %tmp.upgrd.28, i32 1 ; <i16> [#uses=1]
487
%tmp4 = extractelement <8 x i16> %tmp.upgrd.28, i32 3 ; <i16> [#uses=1]
488
%tmp5 = extractelement <8 x i16> %tmp.upgrd.28, i32 5 ; <i16> [#uses=1]
489
%tmp6 = extractelement <8 x i16> %tmp.upgrd.28, i32 7 ; <i16> [#uses=1]
490
%tmp7 = extractelement <8 x i16> %tmp3, i32 1 ; <i16> [#uses=1]
491
%tmp8 = extractelement <8 x i16> %tmp3, i32 3 ; <i16> [#uses=1]
492
%tmp9 = extractelement <8 x i16> %tmp3, i32 5 ; <i16> [#uses=1]
493
%tmp10 = extractelement <8 x i16> %tmp3, i32 7 ; <i16> [#uses=1]
494
%tmp11 = insertelement <8 x i16> undef, i16 %tmp.upgrd.29, i32 0 ; <<8 x i16>> [#uses=1]
495
%tmp12 = insertelement <8 x i16> %tmp11, i16 %tmp4, i32 1 ; <<8 x i16>> [#uses=1]
496
%tmp13 = insertelement <8 x i16> %tmp12, i16 %tmp5, i32 2 ; <<8 x i16>> [#uses=1]
497
%tmp14 = insertelement <8 x i16> %tmp13, i16 %tmp6, i32 3 ; <<8 x i16>> [#uses=1]
498
%tmp15 = insertelement <8 x i16> %tmp14, i16 %tmp7, i32 4 ; <<8 x i16>> [#uses=1]
499
%tmp16 = insertelement <8 x i16> %tmp15, i16 %tmp8, i32 5 ; <<8 x i16>> [#uses=1]
500
%tmp17 = insertelement <8 x i16> %tmp16, i16 %tmp9, i32 6 ; <<8 x i16>> [#uses=1]
501
%tmp18 = insertelement <8 x i16> %tmp17, i16 %tmp10, i32 7 ; <<8 x i16>> [#uses=1]
502
%tmp18.upgrd.30 = bitcast <8 x i16> %tmp18 to <4 x i32> ; <<4 x i32>> [#uses=1]
503
store <4 x i32> %tmp18.upgrd.30, <4 x i32>* %A