1
; RUN: llc -march=amdgcn -verify-machineinstrs -mtriple=amdgcn-- -o - %s | FileCheck %s
3
declare float @llvm.fma.f32(float, float, float)
5
; This checks that rematerialization support of the coalescer does not
6
; unnecessarily widen the register class. Without those fixes > 20 VGprs
8
; Also check that some rematerialization of the 0 constant happened.
10
; CHECK: v_mov_b32_e32 v{{[0-9]+}}, 0
11
; CHECK: v_mov_b32_e32 v{{[0-9]+}}, 0
12
; CHECK: v_mov_b32_e32 v{{[0-9]+}}, 0
13
; CHECK: v_mov_b32_e32 v{{[0-9]+}}, 0
14
; It's probably OK if this is slightly higher:
15
; CHECK: ; NumVgprs: 9
16
define void @foobar(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in, i32 %flag) {
18
%cmpflag = icmp eq i32 %flag, 1
19
br i1 %cmpflag, label %loop, label %exit
22
%c = phi i32 [0, %entry], [%cnext, %loop]
23
%v0 = phi float [0.0, %entry], [%fma.0, %loop]
24
%v1 = phi float [0.0, %entry], [%fma.1, %loop]
25
%v2 = phi float [0.0, %entry], [%fma.2, %loop]
26
%v3 = phi float [0.0, %entry], [%fma.3, %loop]
28
; Try to get the 0 constant to get coalesced into a wide register
29
%blup = insertelement <4 x float> undef, float %v0, i32 0
30
store <4 x float> %blup, <4 x float> addrspace(1)* %out
32
%load = load <4 x float>, <4 x float> addrspace(1)* %in
33
%load.0 = extractelement <4 x float> %load, i32 0
34
%load.1 = extractelement <4 x float> %load, i32 1
35
%load.2 = extractelement <4 x float> %load, i32 2
36
%load.3 = extractelement <4 x float> %load, i32 3
37
%fma.0 = call float @llvm.fma.f32(float %v0, float %load.0, float %v0)
38
%fma.1 = call float @llvm.fma.f32(float %v1, float %load.1, float %v1)
39
%fma.2 = call float @llvm.fma.f32(float %v2, float %load.2, float %v2)
40
%fma.3 = call float @llvm.fma.f32(float %v3, float %load.3, float %v3)
42
%cnext = add nsw i32 %c, 1
43
%cmp = icmp eq i32 %cnext, 42
44
br i1 %cmp, label %exit, label %loop
47
%ev0 = phi float [0.0, %entry], [%fma.0, %loop]
48
%ev1 = phi float [0.0, %entry], [%fma.1, %loop]
49
%ev2 = phi float [0.0, %entry], [%fma.2, %loop]
50
%ev3 = phi float [0.0, %entry], [%fma.3, %loop]
51
%dst.0 = insertelement <4 x float> undef, float %ev0, i32 0
52
%dst.1 = insertelement <4 x float> %dst.0, float %ev1, i32 1
53
%dst.2 = insertelement <4 x float> %dst.1, float %ev2, i32 2
54
%dst.3 = insertelement <4 x float> %dst.2, float %ev3, i32 3
55
store <4 x float> %dst.3, <4 x float> addrspace(1)* %out