16
16
#define DEBUG_TYPE "misched"
18
18
#include "R600MachineScheduler.h"
19
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
19
20
#include "llvm/CodeGen/MachineRegisterInfo.h"
20
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
21
21
#include "llvm/Pass.h"
22
22
#include "llvm/PassManager.h"
23
23
#include "llvm/Support/raw_ostream.h"
26
25
using namespace llvm;
31
30
TII = static_cast<const R600InstrInfo*>(DAG->TII);
32
31
TRI = static_cast<const R600RegisterInfo*>(DAG->TRI);
34
Available[IDAlu]->clear();
35
Available[IDFetch]->clear();
36
Available[IDOther]->clear();
37
33
CurInstKind = IDOther;
39
35
OccupedSlotsMask = 15;
40
36
InstKindLimit[IDAlu] = TII->getMaxAlusPerClause();
37
InstKindLimit[IDOther] = 32;
43
39
const AMDGPUSubtarget &ST = DAG->TM.getSubtarget<AMDGPUSubtarget>();
44
if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD5XXX) {
45
InstKindLimit[IDFetch] = 7; // 8 minus 1 for security
47
InstKindLimit[IDFetch] = 15; // 16 minus 1 for security
40
InstKindLimit[IDFetch] = ST.getTexVTXClauseSize();
51
void R600SchedStrategy::MoveUnits(ReadyQueue *QSrc, ReadyQueue *QDst)
43
void R600SchedStrategy::MoveUnits(std::vector<SUnit *> &QSrc,
44
std::vector<SUnit *> &QDst)
55
for (ReadyQueue::iterator I = QSrc->begin(),
56
E = QSrc->end(); I != E; ++I) {
57
(*I)->NodeQueueId &= ~QSrc->getID();
46
QDst.insert(QDst.end(), QSrc.begin(), QSrc.end());
63
50
SUnit* R600SchedStrategy::pickNode(bool &IsTopNode) {
66
52
NextInstKind = IDOther;
68
56
// check if we might want to switch current clause type
69
bool AllowSwitchToAlu = (CurInstKind == IDOther) ||
70
(CurEmitted > InstKindLimit[CurInstKind]) ||
71
(Available[CurInstKind]->empty());
72
bool AllowSwitchFromAlu = (CurEmitted > InstKindLimit[CurInstKind]) &&
73
(!Available[IDFetch]->empty() || !Available[IDOther]->empty());
57
bool AllowSwitchToAlu = (CurEmitted >= InstKindLimit[CurInstKind]) ||
58
(Available[CurInstKind].empty());
59
bool AllowSwitchFromAlu = (CurEmitted >= InstKindLimit[CurInstKind]) &&
60
(!Available[IDFetch].empty() || !Available[IDOther].empty());
75
62
if ((AllowSwitchToAlu && CurInstKind != IDAlu) ||
76
63
(!AllowSwitchFromAlu && CurInstKind == IDAlu)) {
80
if (CurEmitted > InstKindLimit[IDAlu])
67
if (CurEmitted >= InstKindLimit[IDAlu])
82
69
NextInstKind = IDAlu;
102
dbgs() << "picked node: ";
89
dbgs() << " ** Pick node **\n";
105
dbgs() << "NO NODE ";
106
for (int i = 0; i < IDLast; ++i) {
107
Available[i]->dump();
92
dbgs() << "NO NODE \n";
110
93
for (unsigned i = 0; i < DAG->SUnits.size(); i++) {
111
94
const SUnit &S = DAG->SUnits[i];
112
95
if (!S.isScheduled)
121
104
void R600SchedStrategy::schedNode(SUnit *SU, bool IsTopNode) {
123
DEBUG(dbgs() << "scheduled: ");
124
DEBUG(SU->dump(DAG));
126
106
if (NextInstKind != CurInstKind) {
127
107
DEBUG(dbgs() << "Instruction Type Switch\n");
128
108
if (NextInstKind != IDAlu)
158
138
if (CurInstKind != IDFetch) {
159
139
MoveUnits(Pending[IDFetch], Available[IDFetch]);
161
MoveUnits(Pending[IDOther], Available[IDOther]);
164
143
void R600SchedStrategy::releaseTopNode(SUnit *SU) {
144
DEBUG(dbgs() << "Top Releasing ";SU->dump(DAG););
148
void R600SchedStrategy::releaseBottomNode(SUnit *SU) {
149
DEBUG(dbgs() << "Bottom Releasing ";SU->dump(DAG););
165
151
int IK = getInstKind(SU);
167
DEBUG(dbgs() << IK << " <= ");
168
DEBUG(SU->dump(DAG));
170
Pending[IK]->push(SU);
173
void R600SchedStrategy::releaseBottomNode(SUnit *SU) {
152
// There is no export clause, we can schedule one as soon as its ready
154
Available[IDOther].push_back(SU);
156
Pending[IK].push_back(SU);
176
160
bool R600SchedStrategy::regBelongsToClass(unsigned Reg,
186
170
MachineInstr *MI = SU->getInstr();
188
172
switch (MI->getOpcode()) {
189
175
case AMDGPU::INTERP_PAIR_XY:
190
176
case AMDGPU::INTERP_PAIR_ZW:
191
177
case AMDGPU::INTERP_VEC_LOAD:
192
179
return AluT_XYZW;
193
180
case AMDGPU::COPY:
194
if (TargetRegisterInfo::isPhysicalRegister(MI->getOperand(1).getReg())) {
195
// %vregX = COPY Tn_X is likely to be discarded in favor of an
196
// assignement of Tn_X to %vregX, don't considers it in scheduling
199
else if (MI->getOperand(1).isUndef()) {
181
if (MI->getOperand(1).isUndef()) {
200
182
// MI will become a KILL, don't considers it in scheduling
201
183
return AluDiscarded;
246
228
int R600SchedStrategy::getInstKind(SUnit* SU) {
247
229
int Opcode = SU->getInstr()->getOpcode();
231
if (TII->usesTextureCache(Opcode) || TII->usesVertexCache(Opcode))
249
234
if (TII->isALUInstr(Opcode)) {
253
238
switch (Opcode) {
254
240
case AMDGPU::COPY:
255
241
case AMDGPU::CONST_COPY:
256
242
case AMDGPU::INTERP_PAIR_XY:
257
243
case AMDGPU::INTERP_PAIR_ZW:
258
244
case AMDGPU::INTERP_VEC_LOAD:
259
case AMDGPU::DOT4_eg_pseudo:
260
case AMDGPU::DOT4_r600_pseudo:
262
case AMDGPU::TEX_VTX_CONSTBUF:
263
case AMDGPU::TEX_VTX_TEXBUF:
265
case AMDGPU::TEX_GET_TEXTURE_RESINFO:
266
case AMDGPU::TEX_GET_GRADIENTS_H:
267
case AMDGPU::TEX_GET_GRADIENTS_V:
268
case AMDGPU::TEX_SET_GRADIENTS_H:
269
case AMDGPU::TEX_SET_GRADIENTS_V:
270
case AMDGPU::TEX_SAMPLE:
271
case AMDGPU::TEX_SAMPLE_C:
272
case AMDGPU::TEX_SAMPLE_L:
273
case AMDGPU::TEX_SAMPLE_C_L:
274
case AMDGPU::TEX_SAMPLE_LB:
275
case AMDGPU::TEX_SAMPLE_C_LB:
276
case AMDGPU::TEX_SAMPLE_G:
277
case AMDGPU::TEX_SAMPLE_C_G:
279
case AMDGPU::TXD_SHADOW:
283
dbgs() << "other inst: ";
290
SUnit *R600SchedStrategy::PopInst(std::multiset<SUnit *, CompareSUnit> &Q) {
252
SUnit *R600SchedStrategy::PopInst(std::vector<SUnit *> &Q) {
293
for (std::set<SUnit *, CompareSUnit>::iterator It = Q.begin(), E = Q.end();
255
for (std::vector<SUnit *>::reverse_iterator It = Q.rbegin(), E = Q.rend();
296
258
InstructionsGroupCandidate.push_back(SU->getInstr());
297
259
if (TII->canBundle(InstructionsGroupCandidate)) {
298
260
InstructionsGroupCandidate.pop_back();
261
Q.erase((It + 1).base());
302
264
InstructionsGroupCandidate.pop_back();
308
270
void R600SchedStrategy::LoadAlu() {
309
ReadyQueue *QSrc = Pending[IDAlu];
310
for (ReadyQueue::iterator I = QSrc->begin(),
311
E = QSrc->end(); I != E; ++I) {
312
(*I)->NodeQueueId &= ~QSrc->getID();
313
AluKind AK = getAluKind(*I);
314
AvailableAlus[AK].insert(*I);
271
std::vector<SUnit *> &QSrc = Pending[IDAlu];
272
for (unsigned i = 0, e = QSrc.size(); i < e; ++i) {
273
AluKind AK = getAluKind(QSrc[i]);
274
AvailableAlus[AK].push_back(QSrc[i]);
319
279
void R600SchedStrategy::PrepareNextSlot() {
355
315
SUnit *R600SchedStrategy::AttemptFillSlot(unsigned Slot) {
356
316
static const AluKind IndexToID[] = {AluT_X, AluT_Y, AluT_Z, AluT_W};
357
317
SUnit *SlotedSU = PopInst(AvailableAlus[IndexToID[Slot]]);
358
320
SUnit *UnslotedSU = PopInst(AvailableAlus[AluAny]);
361
} else if (!SlotedSU) {
362
322
AssignSlot(UnslotedSU->getInstr(), Slot);
365
//Determine which one to pick (the lesser one)
366
if (CompareSUnit()(SlotedSU, UnslotedSU)) {
367
AvailableAlus[AluAny].insert(UnslotedSU);
370
AvailableAlus[IndexToID[Slot]].insert(SlotedSU);
371
AssignSlot(UnslotedSU->getInstr(), Slot);
377
326
bool R600SchedStrategy::isAvailablesAluEmpty() const {
378
return Pending[IDAlu]->empty() && AvailableAlus[AluAny].empty() &&
327
return Pending[IDAlu].empty() && AvailableAlus[AluAny].empty() &&
379
328
AvailableAlus[AluT_XYZW].empty() && AvailableAlus[AluT_X].empty() &&
380
329
AvailableAlus[AluT_Y].empty() && AvailableAlus[AluT_Z].empty() &&
381
AvailableAlus[AluT_W].empty() && AvailableAlus[AluDiscarded].empty();
330
AvailableAlus[AluT_W].empty() && AvailableAlus[AluDiscarded].empty() &&
331
AvailableAlus[AluPredX].empty();
384
334
SUnit* R600SchedStrategy::pickAlu() {
385
335
while (!isAvailablesAluEmpty()) {
386
336
if (!OccupedSlotsMask) {
337
// Bottom up scheduling : predX must comes first
338
if (!AvailableAlus[AluPredX].empty()) {
339
OccupedSlotsMask = 15;
340
return PopInst(AvailableAlus[AluPredX]);
387
342
// Flush physical reg copies (RA will discard them)
388
343
if (!AvailableAlus[AluDiscarded].empty()) {
389
344
OccupedSlotsMask = 15;
395
350
return PopInst(AvailableAlus[AluT_XYZW]);
398
for (unsigned Chan = 0; Chan < 4; ++Chan) {
353
for (int Chan = 3; Chan > -1; --Chan) {
399
354
bool isOccupied = OccupedSlotsMask & (1 << Chan);
400
355
if (!isOccupied) {
401
356
SUnit *SU = AttemptFillSlot(Chan);
414
369
SUnit* R600SchedStrategy::pickOther(int QID) {
416
ReadyQueue *AQ = Available[QID];
371
std::vector<SUnit *> &AQ = Available[QID];
419
374
MoveUnits(Pending[QID], AQ);
423
AQ->remove(AQ->begin());
378
AQ.resize(AQ.size() - 1);