1
//===----- ScheduleDAGFast.cpp - Fast poor list scheduler -----------------===//
3
// The LLVM Compiler Infrastructure
5
// This file is distributed under the University of Illinois Open Source
6
// License. See LICENSE.TXT for details.
8
//===----------------------------------------------------------------------===//
10
// This implements a fast scheduler.
12
//===----------------------------------------------------------------------===//
14
#define DEBUG_TYPE "pre-RA-sched"
15
#include "ScheduleDAGSDNodes.h"
16
#include "llvm/CodeGen/SchedulerRegistry.h"
17
#include "llvm/CodeGen/SelectionDAGISel.h"
18
#include "llvm/Target/TargetRegisterInfo.h"
19
#include "llvm/Target/TargetData.h"
20
#include "llvm/Target/TargetInstrInfo.h"
21
#include "llvm/Support/Debug.h"
22
#include "llvm/ADT/SmallSet.h"
23
#include "llvm/ADT/Statistic.h"
24
#include "llvm/ADT/STLExtras.h"
25
#include "llvm/Support/ErrorHandling.h"
26
#include "llvm/Support/raw_ostream.h"
29
STATISTIC(NumUnfolds, "Number of nodes unfolded");
30
STATISTIC(NumDups, "Number of duplicated nodes");
31
STATISTIC(NumPRCopies, "Number of physical copies");
33
static RegisterScheduler
34
fastDAGScheduler("fast", "Fast suboptimal list scheduling",
35
createFastDAGScheduler);
38
/// FastPriorityQueue - A degenerate priority queue that considers
39
/// all nodes to have the same priority.
41
struct FastPriorityQueue {
42
SmallVector<SUnit *, 16> Queue;
44
bool empty() const { return Queue.empty(); }
51
if (empty()) return NULL;
52
SUnit *V = Queue.back();
58
//===----------------------------------------------------------------------===//
59
/// ScheduleDAGFast - The actual "fast" list scheduler implementation.
61
class ScheduleDAGFast : public ScheduleDAGSDNodes {
63
/// AvailableQueue - The priority queue to use for the available SUnits.
64
FastPriorityQueue AvailableQueue;
66
/// LiveRegDefs - A set of physical registers and their definition
67
/// that are "live". These nodes must be scheduled before any other nodes that
68
/// modifies the registers can be scheduled.
70
std::vector<SUnit*> LiveRegDefs;
71
std::vector<unsigned> LiveRegCycles;
74
ScheduleDAGFast(MachineFunction &mf)
75
: ScheduleDAGSDNodes(mf) {}
79
/// AddPred - adds a predecessor edge to SUnit SU.
80
/// This returns true if this is a new predecessor.
81
void AddPred(SUnit *SU, const SDep &D) {
85
/// RemovePred - removes a predecessor edge from SUnit SU.
86
/// This returns true if an edge was removed.
87
void RemovePred(SUnit *SU, const SDep &D) {
92
void ReleasePred(SUnit *SU, SDep *PredEdge);
93
void ReleasePredecessors(SUnit *SU, unsigned CurCycle);
94
void ScheduleNodeBottomUp(SUnit*, unsigned);
95
SUnit *CopyAndMoveSuccessors(SUnit*);
96
void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
97
const TargetRegisterClass*,
98
const TargetRegisterClass*,
99
SmallVector<SUnit*, 2>&);
100
bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&);
101
void ListScheduleBottomUp();
103
/// ForceUnitLatencies - The fast scheduler doesn't care about real latencies.
104
bool ForceUnitLatencies() const { return true; }
106
} // end anonymous namespace
109
/// Schedule - Schedule the DAG using list scheduling.
110
void ScheduleDAGFast::Schedule() {
111
DEBUG(dbgs() << "********** List Scheduling **********\n");
114
LiveRegDefs.resize(TRI->getNumRegs(), NULL);
115
LiveRegCycles.resize(TRI->getNumRegs(), 0);
117
// Build the scheduling graph.
118
BuildSchedGraph(NULL);
120
DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
121
SUnits[su].dumpAll(this));
123
// Execute the actual scheduling loop.
124
ListScheduleBottomUp();
127
//===----------------------------------------------------------------------===//
128
// Bottom-Up Scheduling
129
//===----------------------------------------------------------------------===//
131
/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to
132
/// the AvailableQueue if the count reaches zero. Also update its cycle bound.
133
void ScheduleDAGFast::ReleasePred(SUnit *SU, SDep *PredEdge) {
134
SUnit *PredSU = PredEdge->getSUnit();
137
if (PredSU->NumSuccsLeft == 0) {
138
dbgs() << "*** Scheduling failed! ***\n";
140
dbgs() << " has been released too many times!\n";
144
--PredSU->NumSuccsLeft;
146
// If all the node's successors are scheduled, this node is ready
147
// to be scheduled. Ignore the special EntrySU node.
148
if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) {
149
PredSU->isAvailable = true;
150
AvailableQueue.push(PredSU);
154
void ScheduleDAGFast::ReleasePredecessors(SUnit *SU, unsigned CurCycle) {
155
// Bottom up: release predecessors
156
for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
158
ReleasePred(SU, &*I);
159
if (I->isAssignedRegDep()) {
160
// This is a physical register dependency and it's impossible or
161
// expensive to copy the register. Make sure nothing that can
162
// clobber the register is scheduled between the predecessor and
164
if (!LiveRegDefs[I->getReg()]) {
166
LiveRegDefs[I->getReg()] = I->getSUnit();
167
LiveRegCycles[I->getReg()] = CurCycle;
173
/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
174
/// count of its predecessors. If a predecessor pending count is zero, add it to
175
/// the Available queue.
176
void ScheduleDAGFast::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) {
177
DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
178
DEBUG(SU->dump(this));
180
assert(CurCycle >= SU->getHeight() && "Node scheduled below its height!");
181
SU->setHeightToAtLeast(CurCycle);
182
Sequence.push_back(SU);
184
ReleasePredecessors(SU, CurCycle);
186
// Release all the implicit physical register defs that are live.
187
for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
189
if (I->isAssignedRegDep()) {
190
if (LiveRegCycles[I->getReg()] == I->getSUnit()->getHeight()) {
191
assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
192
assert(LiveRegDefs[I->getReg()] == SU &&
193
"Physical register dependency violated?");
195
LiveRegDefs[I->getReg()] = NULL;
196
LiveRegCycles[I->getReg()] = 0;
201
SU->isScheduled = true;
204
/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
205
/// successors to the newly created node.
206
SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {
207
if (SU->getNode()->getFlaggedNode())
210
SDNode *N = SU->getNode();
215
bool TryUnfold = false;
216
for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
217
EVT VT = N->getValueType(i);
220
else if (VT == MVT::Other)
223
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
224
const SDValue &Op = N->getOperand(i);
225
EVT VT = Op.getNode()->getValueType(Op.getResNo());
231
SmallVector<SDNode*, 2> NewNodes;
232
if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
235
DEBUG(dbgs() << "Unfolding SU # " << SU->NodeNum << "\n");
236
assert(NewNodes.size() == 2 && "Expected a load folding node!");
239
SDNode *LoadNode = NewNodes[0];
240
unsigned NumVals = N->getNumValues();
241
unsigned OldNumVals = SU->getNode()->getNumValues();
242
for (unsigned i = 0; i != NumVals; ++i)
243
DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i));
244
DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1),
245
SDValue(LoadNode, 1));
247
SUnit *NewSU = NewSUnit(N);
248
assert(N->getNodeId() == -1 && "Node already inserted!");
249
N->setNodeId(NewSU->NodeNum);
251
const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
252
for (unsigned i = 0; i != TID.getNumOperands(); ++i) {
253
if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) {
254
NewSU->isTwoAddress = true;
258
if (TID.isCommutable())
259
NewSU->isCommutable = true;
261
// LoadNode may already exist. This can happen when there is another
262
// load from the same location and producing the same type of value
263
// but it has different alignment or volatileness.
264
bool isNewLoad = true;
266
if (LoadNode->getNodeId() != -1) {
267
LoadSU = &SUnits[LoadNode->getNodeId()];
270
LoadSU = NewSUnit(LoadNode);
271
LoadNode->setNodeId(LoadSU->NodeNum);
275
SmallVector<SDep, 4> ChainSuccs;
276
SmallVector<SDep, 4> LoadPreds;
277
SmallVector<SDep, 4> NodePreds;
278
SmallVector<SDep, 4> NodeSuccs;
279
for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
283
else if (I->getSUnit()->getNode() &&
284
I->getSUnit()->getNode()->isOperandOf(LoadNode))
285
LoadPreds.push_back(*I);
287
NodePreds.push_back(*I);
289
for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
292
ChainSuccs.push_back(*I);
294
NodeSuccs.push_back(*I);
297
if (ChainPred.getSUnit()) {
298
RemovePred(SU, ChainPred);
300
AddPred(LoadSU, ChainPred);
302
for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) {
303
const SDep &Pred = LoadPreds[i];
304
RemovePred(SU, Pred);
306
AddPred(LoadSU, Pred);
309
for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) {
310
const SDep &Pred = NodePreds[i];
311
RemovePred(SU, Pred);
312
AddPred(NewSU, Pred);
314
for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) {
315
SDep D = NodeSuccs[i];
316
SUnit *SuccDep = D.getSUnit();
318
RemovePred(SuccDep, D);
322
for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) {
323
SDep D = ChainSuccs[i];
324
SUnit *SuccDep = D.getSUnit();
326
RemovePred(SuccDep, D);
333
AddPred(NewSU, SDep(LoadSU, SDep::Order, LoadSU->Latency));
338
if (NewSU->NumSuccsLeft == 0) {
339
NewSU->isAvailable = true;
345
DEBUG(dbgs() << "Duplicating SU # " << SU->NodeNum << "\n");
348
// New SUnit has the exact same predecessors.
349
for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
351
if (!I->isArtificial())
354
// Only copy scheduled successors. Cut them from old node's successor
355
// list and move them over.
356
SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
357
for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
359
if (I->isArtificial())
361
SUnit *SuccSU = I->getSUnit();
362
if (SuccSU->isScheduled) {
367
DelDeps.push_back(std::make_pair(SuccSU, D));
370
for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
371
RemovePred(DelDeps[i].first, DelDeps[i].second);
377
/// InsertCopiesAndMoveSuccs - Insert register copies and move all
378
/// scheduled successors of the given SUnit to the last copy.
379
void ScheduleDAGFast::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
380
const TargetRegisterClass *DestRC,
381
const TargetRegisterClass *SrcRC,
382
SmallVector<SUnit*, 2> &Copies) {
383
SUnit *CopyFromSU = NewSUnit(static_cast<SDNode *>(NULL));
384
CopyFromSU->CopySrcRC = SrcRC;
385
CopyFromSU->CopyDstRC = DestRC;
387
SUnit *CopyToSU = NewSUnit(static_cast<SDNode *>(NULL));
388
CopyToSU->CopySrcRC = DestRC;
389
CopyToSU->CopyDstRC = SrcRC;
391
// Only copy scheduled successors. Cut them from old node's successor
392
// list and move them over.
393
SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
394
for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
396
if (I->isArtificial())
398
SUnit *SuccSU = I->getSUnit();
399
if (SuccSU->isScheduled) {
401
D.setSUnit(CopyToSU);
403
DelDeps.push_back(std::make_pair(SuccSU, *I));
406
for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) {
407
RemovePred(DelDeps[i].first, DelDeps[i].second);
410
AddPred(CopyFromSU, SDep(SU, SDep::Data, SU->Latency, Reg));
411
AddPred(CopyToSU, SDep(CopyFromSU, SDep::Data, CopyFromSU->Latency, 0));
413
Copies.push_back(CopyFromSU);
414
Copies.push_back(CopyToSU);
419
/// getPhysicalRegisterVT - Returns the ValueType of the physical register
420
/// definition of the specified node.
421
/// FIXME: Move to SelectionDAG?
422
static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
423
const TargetInstrInfo *TII) {
424
const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
425
assert(TID.ImplicitDefs && "Physical reg def must be in implicit def list!");
426
unsigned NumRes = TID.getNumDefs();
427
for (const unsigned *ImpDef = TID.getImplicitDefs(); *ImpDef; ++ImpDef) {
432
return N->getValueType(NumRes);
435
/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
436
/// scheduling of the given node to satisfy live physical register dependencies.
437
/// If the specific node is the last one that's available to schedule, do
438
/// whatever is necessary (i.e. backtracking or cloning) to make it possible.
439
bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU,
440
SmallVector<unsigned, 4> &LRegs){
441
if (NumLiveRegs == 0)
444
SmallSet<unsigned, 4> RegAdded;
445
// If this node would clobber any "live" register, then it's not ready.
446
for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
448
if (I->isAssignedRegDep()) {
449
unsigned Reg = I->getReg();
450
if (LiveRegDefs[Reg] && LiveRegDefs[Reg] != I->getSUnit()) {
451
if (RegAdded.insert(Reg))
452
LRegs.push_back(Reg);
454
for (const unsigned *Alias = TRI->getAliasSet(Reg);
456
if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != I->getSUnit()) {
457
if (RegAdded.insert(*Alias))
458
LRegs.push_back(*Alias);
463
for (SDNode *Node = SU->getNode(); Node; Node = Node->getFlaggedNode()) {
464
if (!Node->isMachineOpcode())
466
const TargetInstrDesc &TID = TII->get(Node->getMachineOpcode());
467
if (!TID.ImplicitDefs)
469
for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg) {
470
if (LiveRegDefs[*Reg] && LiveRegDefs[*Reg] != SU) {
471
if (RegAdded.insert(*Reg))
472
LRegs.push_back(*Reg);
474
for (const unsigned *Alias = TRI->getAliasSet(*Reg);
476
if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != SU) {
477
if (RegAdded.insert(*Alias))
478
LRegs.push_back(*Alias);
482
return !LRegs.empty();
486
/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
488
void ScheduleDAGFast::ListScheduleBottomUp() {
489
unsigned CurCycle = 0;
491
// Release any predecessors of the special Exit node.
492
ReleasePredecessors(&ExitSU, CurCycle);
494
// Add root to Available queue.
495
if (!SUnits.empty()) {
496
SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()];
497
assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!");
498
RootSU->isAvailable = true;
499
AvailableQueue.push(RootSU);
502
// While Available queue is not empty, grab the node with the highest
503
// priority. If it is not ready put it back. Schedule the node.
504
SmallVector<SUnit*, 4> NotReady;
505
DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap;
506
Sequence.reserve(SUnits.size());
507
while (!AvailableQueue.empty()) {
508
bool Delayed = false;
510
SUnit *CurSU = AvailableQueue.pop();
512
SmallVector<unsigned, 4> LRegs;
513
if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
516
LRegsMap.insert(std::make_pair(CurSU, LRegs));
518
CurSU->isPending = true; // This SU is not in AvailableQueue right now.
519
NotReady.push_back(CurSU);
520
CurSU = AvailableQueue.pop();
523
// All candidates are delayed due to live physical reg dependencies.
524
// Try code duplication or inserting cross class copies
526
if (Delayed && !CurSU) {
528
// Try duplicating the nodes that produces these
529
// "expensive to copy" values to break the dependency. In case even
530
// that doesn't work, insert cross class copies.
531
SUnit *TrySU = NotReady[0];
532
SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
533
assert(LRegs.size() == 1 && "Can't handle this yet!");
534
unsigned Reg = LRegs[0];
535
SUnit *LRDef = LiveRegDefs[Reg];
536
EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
537
const TargetRegisterClass *RC =
538
TRI->getPhysicalRegisterRegClass(Reg, VT);
539
const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
541
// If cross copy register class is null, then it must be possible copy
542
// the value directly. Do not try duplicate the def.
545
NewDef = CopyAndMoveSuccessors(LRDef);
549
// Issue copies, these can be expensive cross register class copies.
550
SmallVector<SUnit*, 2> Copies;
551
InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
552
DEBUG(dbgs() << "Adding an edge from SU # " << TrySU->NodeNum
553
<< " to SU #" << Copies.front()->NodeNum << "\n");
554
AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1,
555
/*Reg=*/0, /*isNormalMemory=*/false,
556
/*isMustAlias=*/false, /*isArtificial=*/true));
557
NewDef = Copies.back();
560
DEBUG(dbgs() << "Adding an edge from SU # " << NewDef->NodeNum
561
<< " to SU #" << TrySU->NodeNum << "\n");
562
LiveRegDefs[Reg] = NewDef;
563
AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1,
564
/*Reg=*/0, /*isNormalMemory=*/false,
565
/*isMustAlias=*/false, /*isArtificial=*/true));
566
TrySU->isAvailable = false;
571
llvm_unreachable("Unable to resolve live physical register dependencies!");
575
// Add the nodes that aren't ready back onto the available list.
576
for (unsigned i = 0, e = NotReady.size(); i != e; ++i) {
577
NotReady[i]->isPending = false;
578
// May no longer be available due to backtracking.
579
if (NotReady[i]->isAvailable)
580
AvailableQueue.push(NotReady[i]);
585
ScheduleNodeBottomUp(CurSU, CurCycle);
589
// Reverse the order since it is bottom up.
590
std::reverse(Sequence.begin(), Sequence.end());
593
VerifySchedule(/*isBottomUp=*/true);
597
//===----------------------------------------------------------------------===//
598
// Public Constructor Functions
599
//===----------------------------------------------------------------------===//
601
llvm::ScheduleDAGSDNodes *
602
llvm::createFastDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
603
return new ScheduleDAGFast(*IS->MF);