1
; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
2
; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
4
define void @add_v4f32(<4 x float>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
7
%1 = load <4 x float>, <4 x float>* %a
8
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
9
%2 = load <4 x float>, <4 x float>* %b
10
; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
11
%3 = fadd <4 x float> %1, %2
12
; CHECK-DAG: fadd.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
13
store <4 x float> %3, <4 x float>* %c
14
; CHECK-DAG: st.w [[R3]], 0($4)
17
; CHECK: .size add_v4f32
20
define void @add_v2f64(<2 x double>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
23
%1 = load <2 x double>, <2 x double>* %a
24
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
25
%2 = load <2 x double>, <2 x double>* %b
26
; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
27
%3 = fadd <2 x double> %1, %2
28
; CHECK-DAG: fadd.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
29
store <2 x double> %3, <2 x double>* %c
30
; CHECK-DAG: st.d [[R3]], 0($4)
33
; CHECK: .size add_v2f64
36
define void @sub_v4f32(<4 x float>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
39
%1 = load <4 x float>, <4 x float>* %a
40
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
41
%2 = load <4 x float>, <4 x float>* %b
42
; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
43
%3 = fsub <4 x float> %1, %2
44
; CHECK-DAG: fsub.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
45
store <4 x float> %3, <4 x float>* %c
46
; CHECK-DAG: st.w [[R3]], 0($4)
49
; CHECK: .size sub_v4f32
52
define void @sub_v2f64(<2 x double>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
55
%1 = load <2 x double>, <2 x double>* %a
56
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
57
%2 = load <2 x double>, <2 x double>* %b
58
; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
59
%3 = fsub <2 x double> %1, %2
60
; CHECK-DAG: fsub.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
61
store <2 x double> %3, <2 x double>* %c
62
; CHECK-DAG: st.d [[R3]], 0($4)
65
; CHECK: .size sub_v2f64
68
define void @mul_v4f32(<4 x float>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
71
%1 = load <4 x float>, <4 x float>* %a
72
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
73
%2 = load <4 x float>, <4 x float>* %b
74
; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
75
%3 = fmul <4 x float> %1, %2
76
; CHECK-DAG: fmul.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
77
store <4 x float> %3, <4 x float>* %c
78
; CHECK-DAG: st.w [[R3]], 0($4)
81
; CHECK: .size mul_v4f32
84
define void @mul_v2f64(<2 x double>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
87
%1 = load <2 x double>, <2 x double>* %a
88
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
89
%2 = load <2 x double>, <2 x double>* %b
90
; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
91
%3 = fmul <2 x double> %1, %2
92
; CHECK-DAG: fmul.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
93
store <2 x double> %3, <2 x double>* %c
94
; CHECK-DAG: st.d [[R3]], 0($4)
97
; CHECK: .size mul_v2f64
100
define void @fma_v4f32(<4 x float>* %d, <4 x float>* %a, <4 x float>* %b,
101
<4 x float>* %c) nounwind {
104
%1 = load <4 x float>, <4 x float>* %a
105
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
106
%2 = load <4 x float>, <4 x float>* %b
107
; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
108
%3 = load <4 x float>, <4 x float>* %c
109
; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], 0($7)
110
%4 = tail call <4 x float> @llvm.fma.v4f32 (<4 x float> %1, <4 x float> %2,
112
; CHECK-DAG: fmadd.w [[R1]], [[R2]], [[R3]]
113
store <4 x float> %4, <4 x float>* %d
114
; CHECK-DAG: st.w [[R1]], 0($4)
117
; CHECK: .size fma_v4f32
120
define void @fma_v2f64(<2 x double>* %d, <2 x double>* %a, <2 x double>* %b,
121
<2 x double>* %c) nounwind {
124
%1 = load <2 x double>, <2 x double>* %a
125
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
126
%2 = load <2 x double>, <2 x double>* %b
127
; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
128
%3 = load <2 x double>, <2 x double>* %c
129
; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0($7)
130
%4 = tail call <2 x double> @llvm.fma.v2f64 (<2 x double> %1, <2 x double> %2,
132
; CHECK-DAG: fmadd.d [[R1]], [[R2]], [[R3]]
133
store <2 x double> %4, <2 x double>* %d
134
; CHECK-DAG: st.d [[R1]], 0($4)
137
; CHECK: .size fma_v2f64
140
define void @fmsub_v4f32(<4 x float>* %d, <4 x float>* %a, <4 x float>* %b,
141
<4 x float>* %c) nounwind {
142
; CHECK: fmsub_v4f32:
144
%1 = load <4 x float>, <4 x float>* %a
145
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
146
%2 = load <4 x float>, <4 x float>* %b
147
; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
148
%3 = load <4 x float>, <4 x float>* %c
149
; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], 0($7)
150
%4 = fmul <4 x float> %2, %3
151
%5 = fsub <4 x float> %1, %4
152
; CHECK-DAG: fmsub.w [[R1]], [[R2]], [[R3]]
153
store <4 x float> %5, <4 x float>* %d
154
; CHECK-DAG: st.w [[R1]], 0($4)
157
; CHECK: .size fmsub_v4f32
160
define void @fmsub_v2f64(<2 x double>* %d, <2 x double>* %a, <2 x double>* %b,
161
<2 x double>* %c) nounwind {
162
; CHECK: fmsub_v2f64:
164
%1 = load <2 x double>, <2 x double>* %a
165
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
166
%2 = load <2 x double>, <2 x double>* %b
167
; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
168
%3 = load <2 x double>, <2 x double>* %c
169
; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0($7)
170
%4 = fmul <2 x double> %2, %3
171
%5 = fsub <2 x double> %1, %4
172
; CHECK-DAG: fmsub.d [[R1]], [[R2]], [[R3]]
173
store <2 x double> %5, <2 x double>* %d
174
; CHECK-DAG: st.d [[R1]], 0($4)
177
; CHECK: .size fmsub_v2f64
180
define void @fdiv_v4f32(<4 x float>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
183
%1 = load <4 x float>, <4 x float>* %a
184
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
185
%2 = load <4 x float>, <4 x float>* %b
186
; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
187
%3 = fdiv <4 x float> %1, %2
188
; CHECK-DAG: fdiv.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
189
store <4 x float> %3, <4 x float>* %c
190
; CHECK-DAG: st.w [[R3]], 0($4)
193
; CHECK: .size fdiv_v4f32
196
define void @fdiv_v2f64(<2 x double>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
199
%1 = load <2 x double>, <2 x double>* %a
200
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
201
%2 = load <2 x double>, <2 x double>* %b
202
; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
203
%3 = fdiv <2 x double> %1, %2
204
; CHECK-DAG: fdiv.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
205
store <2 x double> %3, <2 x double>* %c
206
; CHECK-DAG: st.d [[R3]], 0($4)
209
; CHECK: .size fdiv_v2f64
212
define void @fabs_v4f32(<4 x float>* %c, <4 x float>* %a) nounwind {
215
%1 = load <4 x float>, <4 x float>* %a
216
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
217
%2 = tail call <4 x float> @llvm.fabs.v4f32 (<4 x float> %1)
218
; CHECK-DAG: fmax_a.w [[R3:\$w[0-9]+]], [[R1]], [[R1]]
219
store <4 x float> %2, <4 x float>* %c
220
; CHECK-DAG: st.w [[R3]], 0($4)
223
; CHECK: .size fabs_v4f32
226
define void @fabs_v2f64(<2 x double>* %c, <2 x double>* %a) nounwind {
229
%1 = load <2 x double>, <2 x double>* %a
230
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
231
%2 = tail call <2 x double> @llvm.fabs.v2f64 (<2 x double> %1)
232
; CHECK-DAG: fmax_a.d [[R3:\$w[0-9]+]], [[R1]], [[R1]]
233
store <2 x double> %2, <2 x double>* %c
234
; CHECK-DAG: st.d [[R3]], 0($4)
237
; CHECK: .size fabs_v2f64
240
define void @fexp2_v4f32(<4 x float>* %c, <4 x float>* %a) nounwind {
241
; CHECK: fexp2_v4f32:
243
%1 = load <4 x float>, <4 x float>* %a
244
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
245
%2 = tail call <4 x float> @llvm.exp2.v4f32 (<4 x float> %1)
246
; CHECK-DAG: ldi.w [[R3:\$w[0-9]+]], 1
247
; CHECK-DAG: ffint_u.w [[R4:\$w[0-9]+]], [[R3]]
248
; CHECK-DAG: fexp2.w [[R4:\$w[0-9]+]], [[R3]], [[R1]]
249
store <4 x float> %2, <4 x float>* %c
250
; CHECK-DAG: st.w [[R4]], 0($4)
253
; CHECK: .size fexp2_v4f32
256
define void @fexp2_v2f64(<2 x double>* %c, <2 x double>* %a) nounwind {
257
; CHECK: fexp2_v2f64:
259
%1 = load <2 x double>, <2 x double>* %a
260
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
261
%2 = tail call <2 x double> @llvm.exp2.v2f64 (<2 x double> %1)
262
; CHECK-DAG: ldi.d [[R3:\$w[0-9]+]], 1
263
; CHECK-DAG: ffint_u.d [[R4:\$w[0-9]+]], [[R3]]
264
; CHECK-DAG: fexp2.d [[R4:\$w[0-9]+]], [[R3]], [[R1]]
265
store <2 x double> %2, <2 x double>* %c
266
; CHECK-DAG: st.d [[R4]], 0($4)
269
; CHECK: .size fexp2_v2f64
272
define void @fexp2_v4f32_2(<4 x float>* %c, <4 x float>* %a) nounwind {
273
; CHECK: fexp2_v4f32_2:
275
%1 = load <4 x float>, <4 x float>* %a
276
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
277
%2 = tail call <4 x float> @llvm.exp2.v4f32 (<4 x float> %1)
278
%3 = fmul <4 x float> <float 2.0, float 2.0, float 2.0, float 2.0>, %2
279
; CHECK-DAG: ldi.w [[R3:\$w[0-9]+]], 1
280
; CHECK-DAG: ffint_u.w [[R4:\$w[0-9]+]], [[R3]]
281
; CHECK-DAG: fexp2.w [[R5:\$w[0-9]+]], [[R4]], [[R1]]
282
store <4 x float> %3, <4 x float>* %c
283
; CHECK-DAG: st.w [[R5]], 0($4)
286
; CHECK: .size fexp2_v4f32_2
289
define void @fexp2_v2f64_2(<2 x double>* %c, <2 x double>* %a) nounwind {
290
; CHECK: fexp2_v2f64_2:
292
%1 = load <2 x double>, <2 x double>* %a
293
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
294
%2 = tail call <2 x double> @llvm.exp2.v2f64 (<2 x double> %1)
295
%3 = fmul <2 x double> <double 2.0, double 2.0>, %2
296
; CHECK-DAG: ldi.d [[R2:\$w[0-9]+]], 1
297
; CHECK-DAG: ffint_u.d [[R3:\$w[0-9]+]], [[R2]]
298
; CHECK-DAG: fexp2.d [[R4:\$w[0-9]+]], [[R3]], [[R1]]
299
store <2 x double> %3, <2 x double>* %c
300
; CHECK-DAG: st.d [[R4]], 0($4)
303
; CHECK: .size fexp2_v2f64_2
306
define void @fsqrt_v4f32(<4 x float>* %c, <4 x float>* %a) nounwind {
307
; CHECK: fsqrt_v4f32:
309
%1 = load <4 x float>, <4 x float>* %a
310
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
311
%2 = tail call <4 x float> @llvm.sqrt.v4f32 (<4 x float> %1)
312
; CHECK-DAG: fsqrt.w [[R3:\$w[0-9]+]], [[R1]]
313
store <4 x float> %2, <4 x float>* %c
314
; CHECK-DAG: st.w [[R3]], 0($4)
317
; CHECK: .size fsqrt_v4f32
320
define void @fsqrt_v2f64(<2 x double>* %c, <2 x double>* %a) nounwind {
321
; CHECK: fsqrt_v2f64:
323
%1 = load <2 x double>, <2 x double>* %a
324
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
325
%2 = tail call <2 x double> @llvm.sqrt.v2f64 (<2 x double> %1)
326
; CHECK-DAG: fsqrt.d [[R3:\$w[0-9]+]], [[R1]]
327
store <2 x double> %2, <2 x double>* %c
328
; CHECK-DAG: st.d [[R3]], 0($4)
331
; CHECK: .size fsqrt_v2f64
334
define void @ffint_u_v4f32(<4 x float>* %c, <4 x i32>* %a) nounwind {
335
; CHECK: ffint_u_v4f32:
337
%1 = load <4 x i32>, <4 x i32>* %a
338
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
339
%2 = uitofp <4 x i32> %1 to <4 x float>
340
; CHECK-DAG: ffint_u.w [[R3:\$w[0-9]+]], [[R1]]
341
store <4 x float> %2, <4 x float>* %c
342
; CHECK-DAG: st.w [[R3]], 0($4)
345
; CHECK: .size ffint_u_v4f32
348
define void @ffint_u_v2f64(<2 x double>* %c, <2 x i64>* %a) nounwind {
349
; CHECK: ffint_u_v2f64:
351
%1 = load <2 x i64>, <2 x i64>* %a
352
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
353
%2 = uitofp <2 x i64> %1 to <2 x double>
354
; CHECK-DAG: ffint_u.d [[R3:\$w[0-9]+]], [[R1]]
355
store <2 x double> %2, <2 x double>* %c
356
; CHECK-DAG: st.d [[R3]], 0($4)
359
; CHECK: .size ffint_u_v2f64
362
define void @ffint_s_v4f32(<4 x float>* %c, <4 x i32>* %a) nounwind {
363
; CHECK: ffint_s_v4f32:
365
%1 = load <4 x i32>, <4 x i32>* %a
366
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
367
%2 = sitofp <4 x i32> %1 to <4 x float>
368
; CHECK-DAG: ffint_s.w [[R3:\$w[0-9]+]], [[R1]]
369
store <4 x float> %2, <4 x float>* %c
370
; CHECK-DAG: st.w [[R3]], 0($4)
373
; CHECK: .size ffint_s_v4f32
376
define void @ffint_s_v2f64(<2 x double>* %c, <2 x i64>* %a) nounwind {
377
; CHECK: ffint_s_v2f64:
379
%1 = load <2 x i64>, <2 x i64>* %a
380
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
381
%2 = sitofp <2 x i64> %1 to <2 x double>
382
; CHECK-DAG: ffint_s.d [[R3:\$w[0-9]+]], [[R1]]
383
store <2 x double> %2, <2 x double>* %c
384
; CHECK-DAG: st.d [[R3]], 0($4)
387
; CHECK: .size ffint_s_v2f64
390
define void @ftrunc_u_v4f32(<4 x i32>* %c, <4 x float>* %a) nounwind {
391
; CHECK: ftrunc_u_v4f32:
393
%1 = load <4 x float>, <4 x float>* %a
394
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
395
%2 = fptoui <4 x float> %1 to <4 x i32>
396
; CHECK-DAG: ftrunc_u.w [[R3:\$w[0-9]+]], [[R1]]
397
store <4 x i32> %2, <4 x i32>* %c
398
; CHECK-DAG: st.w [[R3]], 0($4)
401
; CHECK: .size ftrunc_u_v4f32
404
define void @ftrunc_u_v2f64(<2 x i64>* %c, <2 x double>* %a) nounwind {
405
; CHECK: ftrunc_u_v2f64:
407
%1 = load <2 x double>, <2 x double>* %a
408
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
409
%2 = fptoui <2 x double> %1 to <2 x i64>
410
; CHECK-DAG: ftrunc_u.d [[R3:\$w[0-9]+]], [[R1]]
411
store <2 x i64> %2, <2 x i64>* %c
412
; CHECK-DAG: st.d [[R3]], 0($4)
415
; CHECK: .size ftrunc_u_v2f64
418
define void @ftrunc_s_v4f32(<4 x i32>* %c, <4 x float>* %a) nounwind {
419
; CHECK: ftrunc_s_v4f32:
421
%1 = load <4 x float>, <4 x float>* %a
422
; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
423
%2 = fptosi <4 x float> %1 to <4 x i32>
424
; CHECK-DAG: ftrunc_s.w [[R3:\$w[0-9]+]], [[R1]]
425
store <4 x i32> %2, <4 x i32>* %c
426
; CHECK-DAG: st.w [[R3]], 0($4)
429
; CHECK: .size ftrunc_s_v4f32
432
define void @ftrunc_s_v2f64(<2 x i64>* %c, <2 x double>* %a) nounwind {
433
; CHECK: ftrunc_s_v2f64:
435
%1 = load <2 x double>, <2 x double>* %a
436
; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
437
%2 = fptosi <2 x double> %1 to <2 x i64>
438
; CHECK-DAG: ftrunc_s.d [[R3:\$w[0-9]+]], [[R1]]
439
store <2 x i64> %2, <2 x i64>* %c
440
; CHECK-DAG: st.d [[R3]], 0($4)
443
; CHECK: .size ftrunc_s_v2f64
446
declare <4 x float> @llvm.fabs.v4f32(<4 x float> %Val)
447
declare <2 x double> @llvm.fabs.v2f64(<2 x double> %Val)
448
declare <4 x float> @llvm.exp2.v4f32(<4 x float> %val)
449
declare <2 x double> @llvm.exp2.v2f64(<2 x double> %val)
450
declare <4 x float> @llvm.fma.v4f32(<4 x float> %a, <4 x float> %b,
452
declare <2 x double> @llvm.fma.v2f64(<2 x double> %a, <2 x double> %b,
454
declare <4 x float> @llvm.sqrt.v4f32(<4 x float> %Val)
455
declare <2 x double> @llvm.sqrt.v2f64(<2 x double> %Val)