2
; RUN: llc < %s -march=x86-64 -mcpu=core2 -pre-RA-sched=source -enable-misched -stats 2>&1 | FileCheck %s
4
; Verify that register pressure heuristics are working in MachineScheduler.
6
; We can further reduce spills in this case with a global register
7
; pressure heuristic, like sethi-ullman numbers or biasing toward
8
; scheduled subtrees. However, these heuristics are marginally
9
; beneficial on x86_64 and exacerbate register pressure in other
13
; CHECK: 23 regalloc - Number of spills inserted
15
define void @wrap_mul4(double* nocapture %Out, [4 x double]* nocapture %A, [4 x double]* nocapture %B) #0 {
17
%arrayidx1.i = getelementptr inbounds [4 x double], [4 x double]* %A, i64 0, i64 0
18
%0 = load double, double* %arrayidx1.i, align 8
19
%arrayidx3.i = getelementptr inbounds [4 x double], [4 x double]* %B, i64 0, i64 0
20
%1 = load double, double* %arrayidx3.i, align 8
21
%mul.i = fmul double %0, %1
22
%arrayidx5.i = getelementptr inbounds [4 x double], [4 x double]* %A, i64 0, i64 1
23
%2 = load double, double* %arrayidx5.i, align 8
24
%arrayidx7.i = getelementptr inbounds [4 x double], [4 x double]* %B, i64 1, i64 0
25
%3 = load double, double* %arrayidx7.i, align 8
26
%mul8.i = fmul double %2, %3
27
%add.i = fadd double %mul.i, %mul8.i
28
%arrayidx10.i = getelementptr inbounds [4 x double], [4 x double]* %A, i64 0, i64 2
29
%4 = load double, double* %arrayidx10.i, align 8
30
%arrayidx12.i = getelementptr inbounds [4 x double], [4 x double]* %B, i64 2, i64 0
31
%5 = load double, double* %arrayidx12.i, align 8
32
%mul13.i = fmul double %4, %5
33
%add14.i = fadd double %add.i, %mul13.i
34
%arrayidx16.i = getelementptr inbounds [4 x double], [4 x double]* %A, i64 0, i64 3
35
%6 = load double, double* %arrayidx16.i, align 8
36
%arrayidx18.i = getelementptr inbounds [4 x double], [4 x double]* %B, i64 3, i64 0
37
%7 = load double, double* %arrayidx18.i, align 8
38
%mul19.i = fmul double %6, %7
39
%add20.i = fadd double %add14.i, %mul19.i
40
%arrayidx25.i = getelementptr inbounds [4 x double], [4 x double]* %B, i64 0, i64 1
41
%8 = load double, double* %arrayidx25.i, align 8
42
%mul26.i = fmul double %0, %8
43
%arrayidx30.i = getelementptr inbounds [4 x double], [4 x double]* %B, i64 1, i64 1
44
%9 = load double, double* %arrayidx30.i, align 8
45
%mul31.i = fmul double %2, %9
46
%add32.i = fadd double %mul26.i, %mul31.i
47
%arrayidx36.i = getelementptr inbounds [4 x double], [4 x double]* %B, i64 2, i64 1
48
%10 = load double, double* %arrayidx36.i, align 8
49
%mul37.i = fmul double %4, %10
50
%add38.i = fadd double %add32.i, %mul37.i
51
%arrayidx42.i = getelementptr inbounds [4 x double], [4 x double]* %B, i64 3, i64 1
52
%11 = load double, double* %arrayidx42.i, align 8
53
%mul43.i = fmul double %6, %11
54
%add44.i = fadd double %add38.i, %mul43.i
55
%arrayidx49.i = getelementptr inbounds [4 x double], [4 x double]* %B, i64 0, i64 2
56
%12 = load double, double* %arrayidx49.i, align 8
57
%mul50.i = fmul double %0, %12
58
%arrayidx54.i = getelementptr inbounds [4 x double], [4 x double]* %B, i64 1, i64 2
59
%13 = load double, double* %arrayidx54.i, align 8
60
%mul55.i = fmul double %2, %13
61
%add56.i = fadd double %mul50.i, %mul55.i
62
%arrayidx60.i = getelementptr inbounds [4 x double], [4 x double]* %B, i64 2, i64 2
63
%14 = load double, double* %arrayidx60.i, align 8
64
%mul61.i = fmul double %4, %14
65
%add62.i = fadd double %add56.i, %mul61.i
66
%arrayidx66.i = getelementptr inbounds [4 x double], [4 x double]* %B, i64 3, i64 2
67
%15 = load double, double* %arrayidx66.i, align 8
68
%mul67.i = fmul double %6, %15
69
%add68.i = fadd double %add62.i, %mul67.i
70
%arrayidx73.i = getelementptr inbounds [4 x double], [4 x double]* %B, i64 0, i64 3
71
%16 = load double, double* %arrayidx73.i, align 8
72
%mul74.i = fmul double %0, %16
73
%arrayidx78.i = getelementptr inbounds [4 x double], [4 x double]* %B, i64 1, i64 3
74
%17 = load double, double* %arrayidx78.i, align 8
75
%mul79.i = fmul double %2, %17
76
%add80.i = fadd double %mul74.i, %mul79.i
77
%arrayidx84.i = getelementptr inbounds [4 x double], [4 x double]* %B, i64 2, i64 3
78
%18 = load double, double* %arrayidx84.i, align 8
79
%mul85.i = fmul double %4, %18
80
%add86.i = fadd double %add80.i, %mul85.i
81
%arrayidx90.i = getelementptr inbounds [4 x double], [4 x double]* %B, i64 3, i64 3
82
%19 = load double, double* %arrayidx90.i, align 8
83
%mul91.i = fmul double %6, %19
84
%add92.i = fadd double %add86.i, %mul91.i
85
%arrayidx95.i = getelementptr inbounds [4 x double], [4 x double]* %A, i64 1, i64 0
86
%20 = load double, double* %arrayidx95.i, align 8
87
%mul98.i = fmul double %1, %20
88
%arrayidx100.i = getelementptr inbounds [4 x double], [4 x double]* %A, i64 1, i64 1
89
%21 = load double, double* %arrayidx100.i, align 8
90
%mul103.i = fmul double %3, %21
91
%add104.i = fadd double %mul98.i, %mul103.i
92
%arrayidx106.i = getelementptr inbounds [4 x double], [4 x double]* %A, i64 1, i64 2
93
%22 = load double, double* %arrayidx106.i, align 8
94
%mul109.i = fmul double %5, %22
95
%add110.i = fadd double %add104.i, %mul109.i
96
%arrayidx112.i = getelementptr inbounds [4 x double], [4 x double]* %A, i64 1, i64 3
97
%23 = load double, double* %arrayidx112.i, align 8
98
%mul115.i = fmul double %7, %23
99
%add116.i = fadd double %add110.i, %mul115.i
100
%mul122.i = fmul double %8, %20
101
%mul127.i = fmul double %9, %21
102
%add128.i = fadd double %mul122.i, %mul127.i
103
%mul133.i = fmul double %10, %22
104
%add134.i = fadd double %add128.i, %mul133.i
105
%mul139.i = fmul double %11, %23
106
%add140.i = fadd double %add134.i, %mul139.i
107
%mul146.i = fmul double %12, %20
108
%mul151.i = fmul double %13, %21
109
%add152.i = fadd double %mul146.i, %mul151.i
110
%mul157.i = fmul double %14, %22
111
%add158.i = fadd double %add152.i, %mul157.i
112
%mul163.i = fmul double %15, %23
113
%add164.i = fadd double %add158.i, %mul163.i
114
%mul170.i = fmul double %16, %20
115
%mul175.i = fmul double %17, %21
116
%add176.i = fadd double %mul170.i, %mul175.i
117
%mul181.i = fmul double %18, %22
118
%add182.i = fadd double %add176.i, %mul181.i
119
%mul187.i = fmul double %19, %23
120
%add188.i = fadd double %add182.i, %mul187.i
121
%arrayidx191.i = getelementptr inbounds [4 x double], [4 x double]* %A, i64 2, i64 0
122
%24 = load double, double* %arrayidx191.i, align 8
123
%mul194.i = fmul double %1, %24
124
%arrayidx196.i = getelementptr inbounds [4 x double], [4 x double]* %A, i64 2, i64 1
125
%25 = load double, double* %arrayidx196.i, align 8
126
%mul199.i = fmul double %3, %25
127
%add200.i = fadd double %mul194.i, %mul199.i
128
%arrayidx202.i = getelementptr inbounds [4 x double], [4 x double]* %A, i64 2, i64 2
129
%26 = load double, double* %arrayidx202.i, align 8
130
%mul205.i = fmul double %5, %26
131
%add206.i = fadd double %add200.i, %mul205.i
132
%arrayidx208.i = getelementptr inbounds [4 x double], [4 x double]* %A, i64 2, i64 3
133
%27 = load double, double* %arrayidx208.i, align 8
134
%mul211.i = fmul double %7, %27
135
%add212.i = fadd double %add206.i, %mul211.i
136
%mul218.i = fmul double %8, %24
137
%mul223.i = fmul double %9, %25
138
%add224.i = fadd double %mul218.i, %mul223.i
139
%mul229.i = fmul double %10, %26
140
%add230.i = fadd double %add224.i, %mul229.i
141
%mul235.i = fmul double %11, %27
142
%add236.i = fadd double %add230.i, %mul235.i
143
%mul242.i = fmul double %12, %24
144
%mul247.i = fmul double %13, %25
145
%add248.i = fadd double %mul242.i, %mul247.i
146
%mul253.i = fmul double %14, %26
147
%add254.i = fadd double %add248.i, %mul253.i
148
%mul259.i = fmul double %15, %27
149
%add260.i = fadd double %add254.i, %mul259.i
150
%mul266.i = fmul double %16, %24
151
%mul271.i = fmul double %17, %25
152
%add272.i = fadd double %mul266.i, %mul271.i
153
%mul277.i = fmul double %18, %26
154
%add278.i = fadd double %add272.i, %mul277.i
155
%mul283.i = fmul double %19, %27
156
%add284.i = fadd double %add278.i, %mul283.i
157
%arrayidx287.i = getelementptr inbounds [4 x double], [4 x double]* %A, i64 3, i64 0
158
%28 = load double, double* %arrayidx287.i, align 8
159
%mul290.i = fmul double %1, %28
160
%arrayidx292.i = getelementptr inbounds [4 x double], [4 x double]* %A, i64 3, i64 1
161
%29 = load double, double* %arrayidx292.i, align 8
162
%mul295.i = fmul double %3, %29
163
%add296.i = fadd double %mul290.i, %mul295.i
164
%arrayidx298.i = getelementptr inbounds [4 x double], [4 x double]* %A, i64 3, i64 2
165
%30 = load double, double* %arrayidx298.i, align 8
166
%mul301.i = fmul double %5, %30
167
%add302.i = fadd double %add296.i, %mul301.i
168
%arrayidx304.i = getelementptr inbounds [4 x double], [4 x double]* %A, i64 3, i64 3
169
%31 = load double, double* %arrayidx304.i, align 8
170
%mul307.i = fmul double %7, %31
171
%add308.i = fadd double %add302.i, %mul307.i
172
%mul314.i = fmul double %8, %28
173
%mul319.i = fmul double %9, %29
174
%add320.i = fadd double %mul314.i, %mul319.i
175
%mul325.i = fmul double %10, %30
176
%add326.i = fadd double %add320.i, %mul325.i
177
%mul331.i = fmul double %11, %31
178
%add332.i = fadd double %add326.i, %mul331.i
179
%mul338.i = fmul double %12, %28
180
%mul343.i = fmul double %13, %29
181
%add344.i = fadd double %mul338.i, %mul343.i
182
%mul349.i = fmul double %14, %30
183
%add350.i = fadd double %add344.i, %mul349.i
184
%mul355.i = fmul double %15, %31
185
%add356.i = fadd double %add350.i, %mul355.i
186
%mul362.i = fmul double %16, %28
187
%mul367.i = fmul double %17, %29
188
%add368.i = fadd double %mul362.i, %mul367.i
189
%mul373.i = fmul double %18, %30
190
%add374.i = fadd double %add368.i, %mul373.i
191
%mul379.i = fmul double %19, %31
192
%add380.i = fadd double %add374.i, %mul379.i
193
store double %add20.i, double* %Out, align 8
194
%Res.i.sroa.1.8.idx2 = getelementptr inbounds double, double* %Out, i64 1
195
store double %add44.i, double* %Res.i.sroa.1.8.idx2, align 8
196
%Res.i.sroa.2.16.idx4 = getelementptr inbounds double, double* %Out, i64 2
197
store double %add68.i, double* %Res.i.sroa.2.16.idx4, align 8
198
%Res.i.sroa.3.24.idx6 = getelementptr inbounds double, double* %Out, i64 3
199
store double %add92.i, double* %Res.i.sroa.3.24.idx6, align 8
200
%Res.i.sroa.4.32.idx8 = getelementptr inbounds double, double* %Out, i64 4
201
store double %add116.i, double* %Res.i.sroa.4.32.idx8, align 8
202
%Res.i.sroa.5.40.idx10 = getelementptr inbounds double, double* %Out, i64 5
203
store double %add140.i, double* %Res.i.sroa.5.40.idx10, align 8
204
%Res.i.sroa.6.48.idx12 = getelementptr inbounds double, double* %Out, i64 6
205
store double %add164.i, double* %Res.i.sroa.6.48.idx12, align 8
206
%Res.i.sroa.7.56.idx14 = getelementptr inbounds double, double* %Out, i64 7
207
store double %add188.i, double* %Res.i.sroa.7.56.idx14, align 8
208
%Res.i.sroa.8.64.idx16 = getelementptr inbounds double, double* %Out, i64 8
209
store double %add212.i, double* %Res.i.sroa.8.64.idx16, align 8
210
%Res.i.sroa.9.72.idx18 = getelementptr inbounds double, double* %Out, i64 9
211
store double %add236.i, double* %Res.i.sroa.9.72.idx18, align 8
212
%Res.i.sroa.10.80.idx20 = getelementptr inbounds double, double* %Out, i64 10
213
store double %add260.i, double* %Res.i.sroa.10.80.idx20, align 8
214
%Res.i.sroa.11.88.idx22 = getelementptr inbounds double, double* %Out, i64 11
215
store double %add284.i, double* %Res.i.sroa.11.88.idx22, align 8
216
%Res.i.sroa.12.96.idx24 = getelementptr inbounds double, double* %Out, i64 12
217
store double %add308.i, double* %Res.i.sroa.12.96.idx24, align 8
218
%Res.i.sroa.13.104.idx26 = getelementptr inbounds double, double* %Out, i64 13
219
store double %add332.i, double* %Res.i.sroa.13.104.idx26, align 8
220
%Res.i.sroa.14.112.idx28 = getelementptr inbounds double, double* %Out, i64 14
221
store double %add356.i, double* %Res.i.sroa.14.112.idx28, align 8
222
%Res.i.sroa.15.120.idx30 = getelementptr inbounds double, double* %Out, i64 15
223
store double %add380.i, double* %Res.i.sroa.15.120.idx30, align 8
227
attributes #0 = { noinline nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }