10
10
// This pass identifies loops where we can generate the PPC branch instructions
11
11
// that decrement and test the count register (CTR) (bdnz and friends).
12
// This pass is based on the HexagonHardwareLoops pass.
14
13
// The pattern that defines the induction variable can changed depending on
15
14
// prior optimizations. For example, the IndVarSimplify phase run by 'opt'
16
15
// normalizes induction variables, and the Loop Strength Reduction pass
17
16
// run by 'llc' may also make changes to the induction variable.
18
// The pattern detected by this phase is due to running Strength Reduction.
20
18
// Criteria for CTR loops:
21
19
// - Countable loops (w/ ind. var for a trip count)
22
// - Assumes loops are normalized by IndVarSimplify
23
20
// - Try inner-most loops first
24
21
// - No nested CTR loops.
25
22
// - No function calls in loops.
27
// Note: As with unconverted loops, PPCBranchSelector must be run after this
28
// pass in order to convert long-displacement jumps into jump pairs.
30
24
//===----------------------------------------------------------------------===//
32
26
#define DEBUG_TYPE "ctrloops"
28
#include "llvm/Transforms/Scalar.h"
29
#include "llvm/ADT/Statistic.h"
30
#include "llvm/ADT/STLExtras.h"
31
#include "llvm/Analysis/Dominators.h"
32
#include "llvm/Analysis/LoopInfo.h"
33
#include "llvm/Analysis/ScalarEvolutionExpander.h"
34
#include "llvm/IR/Constants.h"
35
#include "llvm/IR/DerivedTypes.h"
36
#include "llvm/IR/InlineAsm.h"
37
#include "llvm/IR/Instructions.h"
38
#include "llvm/IR/IntrinsicInst.h"
39
#include "llvm/IR/Module.h"
40
#include "llvm/PassSupport.h"
41
#include "llvm/Support/CommandLine.h"
42
#include "llvm/Support/Debug.h"
43
#include "llvm/Support/ValueHandle.h"
44
#include "llvm/Support/raw_ostream.h"
45
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
46
#include "llvm/Transforms/Utils/Local.h"
47
#include "llvm/Transforms/Utils/LoopUtils.h"
48
#include "llvm/Target/TargetLibraryInfo.h"
49
#include "PPCTargetMachine.h"
34
#include "MCTargetDesc/PPCPredicates.h"
35
#include "PPCTargetMachine.h"
36
#include "llvm/ADT/DenseMap.h"
37
#include "llvm/ADT/Statistic.h"
38
53
#include "llvm/CodeGen/MachineDominators.h"
39
54
#include "llvm/CodeGen/MachineFunction.h"
40
55
#include "llvm/CodeGen/MachineFunctionPass.h"
41
#include "llvm/CodeGen/MachineInstrBuilder.h"
42
#include "llvm/CodeGen/MachineLoopInfo.h"
43
56
#include "llvm/CodeGen/MachineRegisterInfo.h"
44
#include "llvm/CodeGen/Passes.h"
45
#include "llvm/CodeGen/RegisterScavenging.h"
46
#include "llvm/IR/Constants.h"
47
#include "llvm/PassSupport.h"
48
#include "llvm/Support/Debug.h"
49
#include "llvm/Support/raw_ostream.h"
50
#include "llvm/Target/TargetInstrInfo.h"
51
59
#include <algorithm>
53
62
using namespace llvm;
65
static cl::opt<int> CTRLoopLimit("ppc-max-ctrloop", cl::Hidden, cl::init(-1));
55
68
STATISTIC(NumCTRLoops, "Number of loops converted to CTR loops");
58
71
void initializePPCCTRLoopsPass(PassRegistry&);
73
void initializePPCCTRLoopsVerifyPass(PassRegistry&);
63
struct PPCCTRLoops : public MachineFunctionPass {
65
MachineRegisterInfo *MRI;
66
const TargetInstrInfo *TII;
69
static char ID; // Pass identification, replacement for typeid
71
PPCCTRLoops() : MachineFunctionPass(ID) {
72
initializePPCCTRLoopsPass(*PassRegistry::getPassRegistry());
75
virtual bool runOnMachineFunction(MachineFunction &MF);
77
const char *getPassName() const { return "PPC CTR Loops"; }
79
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
78
struct PPCCTRLoops : public FunctionPass {
87
PPCCTRLoops() : FunctionPass(ID), TM(0) {
88
initializePPCCTRLoopsPass(*PassRegistry::getPassRegistry());
90
PPCCTRLoops(PPCTargetMachine &TM) : FunctionPass(ID), TM(&TM) {
91
initializePPCCTRLoopsPass(*PassRegistry::getPassRegistry());
94
virtual bool runOnFunction(Function &F);
96
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
97
AU.addRequired<LoopInfo>();
98
AU.addPreserved<LoopInfo>();
99
AU.addRequired<DominatorTree>();
100
AU.addPreserved<DominatorTree>();
101
AU.addRequired<ScalarEvolution>();
105
bool mightUseCTR(const Triple &TT, BasicBlock *BB);
106
bool convertToCTRLoop(Loop *L);
109
PPCTargetMachine *TM;
114
const TargetLibraryInfo *LibInfo;
117
char PPCCTRLoops::ID = 0;
119
int PPCCTRLoops::Counter = 0;
123
struct PPCCTRLoopsVerify : public MachineFunctionPass {
127
PPCCTRLoopsVerify() : MachineFunctionPass(ID) {
128
initializePPCCTRLoopsVerifyPass(*PassRegistry::getPassRegistry());
131
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
81
132
AU.addRequired<MachineDominatorTree>();
82
AU.addPreserved<MachineDominatorTree>();
83
AU.addRequired<MachineLoopInfo>();
84
AU.addPreserved<MachineLoopInfo>();
85
133
MachineFunctionPass::getAnalysisUsage(AU);
89
/// getCanonicalInductionVariable - Check to see if the loop has a canonical
90
/// induction variable.
91
/// Should be defined in MachineLoop. Based upon version in class Loop.
92
void getCanonicalInductionVariable(MachineLoop *L,
93
SmallVector<MachineInstr *, 4> &IVars,
94
SmallVector<MachineInstr *, 4> &IOps) const;
96
/// getTripCount - Return a loop-invariant LLVM register indicating the
97
/// number of times the loop will be executed. If the trip-count cannot
98
/// be determined, this return null.
99
CountValue *getTripCount(MachineLoop *L,
100
SmallVector<MachineInstr *, 2> &OldInsts) const;
102
/// isInductionOperation - Return true if the instruction matches the
103
/// pattern for an opertion that defines an induction variable.
104
bool isInductionOperation(const MachineInstr *MI, unsigned IVReg) const;
106
/// isInvalidOperation - Return true if the instruction is not valid within
108
bool isInvalidLoopOperation(const MachineInstr *MI) const;
110
/// containsInavlidInstruction - Return true if the loop contains an
111
/// instruction that inhibits using the CTR loop.
112
bool containsInvalidInstruction(MachineLoop *L) const;
114
/// converToCTRLoop - Given a loop, check if we can convert it to a
115
/// CTR loop. If so, then perform the conversion and return true.
116
bool convertToCTRLoop(MachineLoop *L);
118
/// isDead - Return true if the instruction is now dead.
119
bool isDead(const MachineInstr *MI,
120
SmallVector<MachineInstr *, 1> &DeadPhis) const;
122
/// removeIfDead - Remove the instruction if it is now dead.
123
void removeIfDead(MachineInstr *MI);
126
char PPCCTRLoops::ID = 0;
129
// CountValue class - Abstraction for a trip count of a loop. A
130
// smaller vesrsion of the MachineOperand class without the concerns
131
// of changing the operand representation.
134
enum CountValueType {
143
Values(unsigned r) : RegNum(r) {}
144
Values(int64_t i) : ImmVal(i) {}
149
CountValue(unsigned r, bool neg) : Kind(CV_Register), Contents(r),
151
explicit CountValue(int64_t i) : Kind(CV_Immediate), Contents(i),
153
CountValueType getType() const { return Kind; }
154
bool isReg() const { return Kind == CV_Register; }
155
bool isImm() const { return Kind == CV_Immediate; }
156
bool isNeg() const { return isNegative; }
158
unsigned getReg() const {
159
assert(isReg() && "Wrong CountValue accessor");
160
return Contents.RegNum;
162
void setReg(unsigned Val) {
163
Contents.RegNum = Val;
165
int64_t getImm() const {
166
assert(isImm() && "Wrong CountValue accessor");
168
return -Contents.ImmVal;
170
return Contents.ImmVal;
172
void setImm(int64_t Val) {
173
Contents.ImmVal = Val;
176
void print(raw_ostream &OS, const TargetMachine *TM = 0) const {
177
if (isReg()) { OS << PrintReg(getReg()); }
178
if (isImm()) { OS << getImm(); }
136
virtual bool runOnMachineFunction(MachineFunction &MF);
139
MachineDominatorTree *MDT;
142
char PPCCTRLoopsVerify::ID = 0;
181
144
} // end anonymous namespace
183
146
INITIALIZE_PASS_BEGIN(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops",
185
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
186
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
148
INITIALIZE_PASS_DEPENDENCY(DominatorTree)
149
INITIALIZE_PASS_DEPENDENCY(LoopInfo)
150
INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
187
151
INITIALIZE_PASS_END(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops",
190
/// isCompareEquals - Returns true if the instruction is a compare equals
191
/// instruction with an immediate operand.
192
static bool isCompareEqualsImm(const MachineInstr *MI, bool &SignedCmp,
194
if (MI->getOpcode() == PPC::CMPWI) {
198
} else if (MI->getOpcode() == PPC::CMPDI) {
202
} else if (MI->getOpcode() == PPC::CMPLWI) {
206
} else if (MI->getOpcode() == PPC::CMPLDI) {
216
/// createPPCCTRLoops - Factory for creating
217
/// the CTR loop phase.
218
FunctionPass *llvm::createPPCCTRLoops() {
219
return new PPCCTRLoops();
223
bool PPCCTRLoops::runOnMachineFunction(MachineFunction &MF) {
224
DEBUG(dbgs() << "********* PPC CTR Loops *********\n");
226
bool Changed = false;
228
// get the loop information
229
MLI = &getAnalysis<MachineLoopInfo>();
230
// get the register information
231
MRI = &MF.getRegInfo();
232
// the target specific instructio info.
233
TII = MF.getTarget().getInstrInfo();
235
for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end();
154
FunctionPass *llvm::createPPCCTRLoops(PPCTargetMachine &TM) {
155
return new PPCCTRLoops(TM);
159
INITIALIZE_PASS_BEGIN(PPCCTRLoopsVerify, "ppc-ctr-loops-verify",
160
"PowerPC CTR Loops Verify", false, false)
161
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
162
INITIALIZE_PASS_END(PPCCTRLoopsVerify, "ppc-ctr-loops-verify",
163
"PowerPC CTR Loops Verify", false, false)
165
FunctionPass *llvm::createPPCCTRLoopsVerify() {
166
return new PPCCTRLoopsVerify();
170
bool PPCCTRLoops::runOnFunction(Function &F) {
171
LI = &getAnalysis<LoopInfo>();
172
SE = &getAnalysis<ScalarEvolution>();
173
DT = &getAnalysis<DominatorTree>();
174
TD = getAnalysisIfAvailable<DataLayout>();
175
LibInfo = getAnalysisIfAvailable<TargetLibraryInfo>();
177
bool MadeChange = false;
179
for (LoopInfo::iterator I = LI->begin(), E = LI->end();
238
if (!L->getParentLoop()) {
239
Changed |= convertToCTRLoop(L);
246
/// getCanonicalInductionVariable - Check to see if the loop has a canonical
247
/// induction variable. We check for a simple recurrence pattern - an
248
/// integer recurrence that decrements by one each time through the loop and
249
/// ends at zero. If so, return the phi node that corresponds to it.
251
/// Based upon the similar code in LoopInfo except this code is specific to
253
/// This method assumes that the IndVarSimplify pass has been run by 'opt'.
256
PPCCTRLoops::getCanonicalInductionVariable(MachineLoop *L,
257
SmallVector<MachineInstr *, 4> &IVars,
258
SmallVector<MachineInstr *, 4> &IOps) const {
259
MachineBasicBlock *TopMBB = L->getTopBlock();
260
MachineBasicBlock::pred_iterator PI = TopMBB->pred_begin();
261
assert(PI != TopMBB->pred_end() &&
262
"Loop must have more than one incoming edge!");
263
MachineBasicBlock *Backedge = *PI++;
264
if (PI == TopMBB->pred_end()) return; // dead loop
265
MachineBasicBlock *Incoming = *PI++;
266
if (PI != TopMBB->pred_end()) return; // multiple backedges?
268
// make sure there is one incoming and one backedge and determine which
270
if (L->contains(Incoming)) {
271
if (L->contains(Backedge))
273
std::swap(Incoming, Backedge);
274
} else if (!L->contains(Backedge))
277
// Loop over all of the PHI nodes, looking for a canonical induction variable:
278
// - The PHI node is "reg1 = PHI reg2, BB1, reg3, BB2".
279
// - The recurrence comes from the backedge.
280
// - the definition is an induction operatio.n
281
for (MachineBasicBlock::iterator I = TopMBB->begin(), E = TopMBB->end();
282
I != E && I->isPHI(); ++I) {
283
MachineInstr *MPhi = &*I;
284
unsigned DefReg = MPhi->getOperand(0).getReg();
285
for (unsigned i = 1; i != MPhi->getNumOperands(); i += 2) {
286
// Check each operand for the value from the backedge.
287
MachineBasicBlock *MBB = MPhi->getOperand(i+1).getMBB();
288
if (L->contains(MBB)) { // operands comes from the backedge
289
// Check if the definition is an induction operation.
290
MachineInstr *DI = MRI->getVRegDef(MPhi->getOperand(i).getReg());
291
if (isInductionOperation(DI, DefReg)) {
293
IVars.push_back(MPhi);
301
/// getTripCount - Return a loop-invariant LLVM value indicating the
302
/// number of times the loop will be executed. The trip count can
303
/// be either a register or a constant value. If the trip-count
304
/// cannot be determined, this returns null.
306
/// We find the trip count from the phi instruction that defines the
307
/// induction variable. We follow the links to the CMP instruction
308
/// to get the trip count.
310
/// Based upon getTripCount in LoopInfo.
312
CountValue *PPCCTRLoops::getTripCount(MachineLoop *L,
313
SmallVector<MachineInstr *, 2> &OldInsts) const {
314
MachineBasicBlock *LastMBB = L->getExitingBlock();
315
// Don't generate a CTR loop if the loop has more than one exit.
319
MachineBasicBlock::iterator LastI = LastMBB->getFirstTerminator();
320
if (LastI->getOpcode() != PPC::BCC)
323
// We need to make sure that this compare is defining the condition
324
// register actually used by the terminating branch.
326
unsigned PredReg = LastI->getOperand(1).getReg();
327
DEBUG(dbgs() << "Examining loop with first terminator: " << *LastI);
329
unsigned PredCond = LastI->getOperand(0).getImm();
330
if (PredCond != PPC::PRED_EQ && PredCond != PPC::PRED_NE)
333
// Check that the loop has a induction variable.
334
SmallVector<MachineInstr *, 4> IVars, IOps;
335
getCanonicalInductionVariable(L, IVars, IOps);
336
for (unsigned i = 0; i < IVars.size(); ++i) {
337
MachineInstr *IOp = IOps[i];
338
MachineInstr *IV_Inst = IVars[i];
340
// Canonical loops will end with a 'cmpwi/cmpdi cr, IV, Imm',
341
// if Imm is 0, get the count from the PHI opnd
342
// if Imm is -M, than M is the count
343
// Otherwise, Imm is the count
344
MachineOperand *IV_Opnd;
345
const MachineOperand *InitialValue;
346
if (!L->contains(IV_Inst->getOperand(2).getMBB())) {
347
InitialValue = &IV_Inst->getOperand(1);
348
IV_Opnd = &IV_Inst->getOperand(3);
350
InitialValue = &IV_Inst->getOperand(3);
351
IV_Opnd = &IV_Inst->getOperand(1);
354
DEBUG(dbgs() << "Considering:\n");
355
DEBUG(dbgs() << " induction operation: " << *IOp);
356
DEBUG(dbgs() << " induction variable: " << *IV_Inst);
357
DEBUG(dbgs() << " initial value: " << *InitialValue << "\n");
359
// Look for the cmp instruction to determine if we
360
// can get a useful trip count. The trip count can
361
// be either a register or an immediate. The location
362
// of the value depends upon the type (reg or imm).
363
for (MachineRegisterInfo::reg_iterator
364
RI = MRI->reg_begin(IV_Opnd->getReg()), RE = MRI->reg_end();
366
IV_Opnd = &RI.getOperand();
367
bool SignedCmp, Int64Cmp;
368
MachineInstr *MI = IV_Opnd->getParent();
369
if (L->contains(MI) && isCompareEqualsImm(MI, SignedCmp, Int64Cmp) &&
370
MI->getOperand(0).getReg() == PredReg) {
372
OldInsts.push_back(MI);
373
OldInsts.push_back(IOp);
375
DEBUG(dbgs() << " compare: " << *MI);
377
const MachineOperand &MO = MI->getOperand(2);
378
assert(MO.isImm() && "IV Cmp Operand should be an immediate");
382
ImmVal = (short) MO.getImm();
384
ImmVal = MO.getImm();
386
const MachineInstr *IV_DefInstr = MRI->getVRegDef(IV_Opnd->getReg());
387
assert(L->contains(IV_DefInstr->getParent()) &&
388
"IV definition should occurs in loop");
389
int64_t iv_value = (short) IV_DefInstr->getOperand(2).getImm();
391
assert(InitialValue->isReg() && "Expecting register for init value");
392
unsigned InitialValueReg = InitialValue->getReg();
394
MachineInstr *DefInstr = MRI->getVRegDef(InitialValueReg);
396
// Here we need to look for an immediate load (an li or lis/ori pair).
397
if (DefInstr && (DefInstr->getOpcode() == PPC::ORI8 ||
398
DefInstr->getOpcode() == PPC::ORI)) {
399
int64_t start = DefInstr->getOperand(2).getImm();
400
MachineInstr *DefInstr2 =
401
MRI->getVRegDef(DefInstr->getOperand(1).getReg());
402
if (DefInstr2 && (DefInstr2->getOpcode() == PPC::LIS8 ||
403
DefInstr2->getOpcode() == PPC::LIS)) {
404
DEBUG(dbgs() << " initial constant: " << *DefInstr);
405
DEBUG(dbgs() << " initial constant: " << *DefInstr2);
407
start |= int64_t(short(DefInstr2->getOperand(1).getImm())) << 16;
409
int64_t count = ImmVal - start;
410
if ((count % iv_value) != 0) {
414
OldInsts.push_back(DefInstr);
415
OldInsts.push_back(DefInstr2);
417
// count/iv_value, the trip count, should be positive here. If it
418
// is negative, that indicates that the counter will wrap.
420
return new CountValue(count/iv_value);
422
return new CountValue(uint32_t(count/iv_value));
424
} else if (DefInstr && (DefInstr->getOpcode() == PPC::LI8 ||
425
DefInstr->getOpcode() == PPC::LI)) {
426
DEBUG(dbgs() << " initial constant: " << *DefInstr);
428
int64_t count = ImmVal -
429
int64_t(short(DefInstr->getOperand(1).getImm()));
430
if ((count % iv_value) != 0) {
434
OldInsts.push_back(DefInstr);
437
return new CountValue(count/iv_value);
439
return new CountValue(uint32_t(count/iv_value));
440
} else if (iv_value == 1 || iv_value == -1) {
441
// We can't determine a constant starting value.
443
return new CountValue(InitialValueReg, iv_value > 0);
445
// FIXME: handle non-zero end value.
447
// FIXME: handle non-unit increments (we might not want to introduce
448
// division but we can handle some 2^n cases with shifts).
456
/// isInductionOperation - return true if the operation is matches the
457
/// pattern that defines an induction variable:
461
PPCCTRLoops::isInductionOperation(const MachineInstr *MI,
462
unsigned IVReg) const {
463
return ((MI->getOpcode() == PPC::ADDI || MI->getOpcode() == PPC::ADDI8) &&
464
MI->getOperand(1).isReg() && // could be a frame index instead
465
MI->getOperand(1).getReg() == IVReg);
468
/// isInvalidOperation - Return true if the operation is invalid within
471
PPCCTRLoops::isInvalidLoopOperation(const MachineInstr *MI) const {
473
// call is not allowed because the callee may use a CTR loop
474
if (MI->getDesc().isCall()) {
477
// check if the instruction defines a CTR loop register
478
// (this will also catch nested CTR loops)
479
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
480
const MachineOperand &MO = MI->getOperand(i);
481
if (MO.isReg() && MO.isDef() &&
482
(MO.getReg() == PPC::CTR || MO.getReg() == PPC::CTR8)) {
489
/// containsInvalidInstruction - Return true if the loop contains
490
/// an instruction that inhibits the use of the CTR loop function.
492
bool PPCCTRLoops::containsInvalidInstruction(MachineLoop *L) const {
493
const std::vector<MachineBasicBlock*> Blocks = L->getBlocks();
494
for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
495
MachineBasicBlock *MBB = Blocks[i];
496
for (MachineBasicBlock::iterator
497
MII = MBB->begin(), E = MBB->end(); MII != E; ++MII) {
498
const MachineInstr *MI = &*MII;
499
if (isInvalidLoopOperation(MI)) {
507
/// isDead returns true if the instruction is dead
508
/// (this was essentially copied from DeadMachineInstructionElim::isDead, but
509
/// with special cases for inline asm, physical registers and instructions with
510
/// side effects removed)
511
bool PPCCTRLoops::isDead(const MachineInstr *MI,
512
SmallVector<MachineInstr *, 1> &DeadPhis) const {
513
// Examine each operand.
514
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
515
const MachineOperand &MO = MI->getOperand(i);
516
if (MO.isReg() && MO.isDef()) {
517
unsigned Reg = MO.getReg();
518
if (!MRI->use_nodbg_empty(Reg)) {
519
// This instruction has users, but if the only user is the phi node for
520
// the parent block, and the only use of that phi node is this
521
// instruction, then this instruction is dead: both it (and the phi
522
// node) can be removed.
523
MachineRegisterInfo::use_iterator I = MRI->use_begin(Reg);
524
if (llvm::next(I) == MRI->use_end() &&
525
I.getOperand().getParent()->isPHI()) {
526
MachineInstr *OnePhi = I.getOperand().getParent();
528
for (unsigned j = 0, f = OnePhi->getNumOperands(); j != f; ++j) {
529
const MachineOperand &OPO = OnePhi->getOperand(j);
530
if (OPO.isReg() && OPO.isDef()) {
531
unsigned OPReg = OPO.getReg();
533
MachineRegisterInfo::use_iterator nextJ;
534
for (MachineRegisterInfo::use_iterator J = MRI->use_begin(OPReg),
535
E = MRI->use_end(); J!=E; J=nextJ) {
536
nextJ = llvm::next(J);
537
MachineOperand& Use = J.getOperand();
538
MachineInstr *UseMI = Use.getParent();
541
// The phi node has a user that is not MI, bail...
548
DeadPhis.push_back(OnePhi);
550
// This def has a non-debug use. Don't delete the instruction!
557
// If there are no defs with uses, the instruction is dead.
561
void PPCCTRLoops::removeIfDead(MachineInstr *MI) {
562
// This procedure was essentially copied from DeadMachineInstructionElim
564
SmallVector<MachineInstr *, 1> DeadPhis;
565
if (isDead(MI, DeadPhis)) {
566
DEBUG(dbgs() << "CTR looping will remove: " << *MI);
568
// It is possible that some DBG_VALUE instructions refer to this
569
// instruction. Examine each def operand for such references;
570
// if found, mark the DBG_VALUE as undef (but don't delete it).
571
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
572
const MachineOperand &MO = MI->getOperand(i);
573
if (!MO.isReg() || !MO.isDef())
182
if (!L->getParentLoop())
183
MadeChange |= convertToCTRLoop(L);
189
bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) {
190
for (BasicBlock::iterator J = BB->begin(), JE = BB->end();
192
if (CallInst *CI = dyn_cast<CallInst>(J)) {
193
if (InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue())) {
194
// Inline ASM is okay, unless it clobbers the ctr register.
195
InlineAsm::ConstraintInfoVector CIV = IA->ParseConstraints();
196
for (unsigned i = 0, ie = CIV.size(); i < ie; ++i) {
197
InlineAsm::ConstraintInfo &C = CIV[i];
198
if (C.Type != InlineAsm::isInput)
199
for (unsigned j = 0, je = C.Codes.size(); j < je; ++j)
200
if (StringRef(C.Codes[j]).equals_lower("{ctr}"))
575
unsigned Reg = MO.getReg();
576
MachineRegisterInfo::use_iterator nextI;
577
for (MachineRegisterInfo::use_iterator I = MRI->use_begin(Reg),
578
E = MRI->use_end(); I!=E; I=nextI) {
579
nextI = llvm::next(I); // I is invalidated by the setReg
580
MachineOperand& Use = I.getOperand();
581
MachineInstr *UseMI = Use.getParent();
584
if (Use.isDebug()) // this might also be a instr -> phi -> instr case
585
// which can also be removed.
586
UseMI->getOperand(0).setReg(0U);
590
MI->eraseFromParent();
591
for (unsigned i = 0; i < DeadPhis.size(); ++i) {
592
DeadPhis[i]->eraseFromParent();
209
const TargetLowering *TLI = TM->getTargetLowering();
211
if (Function *F = CI->getCalledFunction()) {
212
// Most intrinsics don't become function calls, but some might.
213
// sin, cos, exp and log are always calls.
215
if (F->getIntrinsicID() != Intrinsic::not_intrinsic) {
216
switch (F->getIntrinsicID()) {
219
// VisualStudio defines setjmp as _setjmp
220
#if defined(_MSC_VER) && defined(setjmp) && \
221
!defined(setjmp_undefined_for_msvc)
222
# pragma push_macro("setjmp")
224
# define setjmp_undefined_for_msvc
227
case Intrinsic::setjmp:
229
#if defined(_MSC_VER) && defined(setjmp_undefined_for_msvc)
230
// let's return it to _setjmp state
231
# pragma pop_macro("setjmp")
232
# undef setjmp_undefined_for_msvc
235
case Intrinsic::longjmp:
236
case Intrinsic::memcpy:
237
case Intrinsic::memmove:
238
case Intrinsic::memset:
239
case Intrinsic::powi:
241
case Intrinsic::log2:
242
case Intrinsic::log10:
244
case Intrinsic::exp2:
249
case Intrinsic::sqrt: Opcode = ISD::FSQRT; break;
250
case Intrinsic::floor: Opcode = ISD::FFLOOR; break;
251
case Intrinsic::ceil: Opcode = ISD::FCEIL; break;
252
case Intrinsic::trunc: Opcode = ISD::FTRUNC; break;
253
case Intrinsic::rint: Opcode = ISD::FRINT; break;
254
case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break;
258
// PowerPC does not use [US]DIVREM or other library calls for
259
// operations on regular types which are not otherwise library calls
260
// (i.e. soft float or atomics). If adapting for targets that do,
261
// additional care is required here.
264
if (!F->hasLocalLinkage() && F->hasName() && LibInfo &&
265
LibInfo->getLibFunc(F->getName(), Func) &&
266
LibInfo->hasOptimizedCodeGen(Func)) {
267
// Non-read-only functions are never treated as intrinsics.
268
if (!CI->onlyReadsMemory())
271
// Conversion happens only for FP calls.
272
if (!CI->getArgOperand(0)->getType()->isFloatingPointTy())
276
default: return true;
277
case LibFunc::copysign:
278
case LibFunc::copysignf:
279
case LibFunc::copysignl:
280
continue; // ISD::FCOPYSIGN is never a library call.
284
continue; // ISD::FABS is never a library call.
288
Opcode = ISD::FSQRT; break;
290
case LibFunc::floorf:
291
case LibFunc::floorl:
292
Opcode = ISD::FFLOOR; break;
293
case LibFunc::nearbyint:
294
case LibFunc::nearbyintf:
295
case LibFunc::nearbyintl:
296
Opcode = ISD::FNEARBYINT; break;
300
Opcode = ISD::FCEIL; break;
304
Opcode = ISD::FRINT; break;
306
case LibFunc::truncf:
307
case LibFunc::truncl:
308
Opcode = ISD::FTRUNC; break;
312
TLI->getSimpleValueType(CI->getArgOperand(0)->getType(), true);
313
if (VTy == MVT::Other)
316
if (TLI->isOperationLegalOrCustom(Opcode, VTy))
318
else if (VTy.isVector() &&
319
TLI->isOperationLegalOrCustom(Opcode, VTy.getScalarType()))
327
} else if (isa<BinaryOperator>(J) &&
328
J->getType()->getScalarType()->isPPC_FP128Ty()) {
329
// Most operations on ppc_f128 values become calls.
331
} else if (isa<UIToFPInst>(J) || isa<SIToFPInst>(J) ||
332
isa<FPToUIInst>(J) || isa<FPToSIInst>(J)) {
333
CastInst *CI = cast<CastInst>(J);
334
if (CI->getSrcTy()->getScalarType()->isPPC_FP128Ty() ||
335
CI->getDestTy()->getScalarType()->isPPC_FP128Ty() ||
337
(CI->getSrcTy()->getScalarType()->isIntegerTy(64) ||
338
CI->getDestTy()->getScalarType()->isIntegerTy(64))
341
} else if (isa<IndirectBrInst>(J) || isa<InvokeInst>(J)) {
342
// On PowerPC, indirect jumps use the counter register.
344
} else if (SwitchInst *SI = dyn_cast<SwitchInst>(J)) {
347
const TargetLowering *TLI = TM->getTargetLowering();
349
if (TLI->supportJumpTables() &&
350
SI->getNumCases()+1 >= (unsigned) TLI->getMinimumJumpTableEntries())
597
/// converToCTRLoop - check if the loop is a candidate for
598
/// converting to a CTR loop. If so, then perform the
601
/// This function works on innermost loops first. A loop can
602
/// be converted if it is a counting loop; either a register
603
/// value or an immediate.
605
/// The code makes several assumptions about the representation
606
/// of the loop in llvm.
607
bool PPCCTRLoops::convertToCTRLoop(MachineLoop *L) {
608
bool Changed = false;
358
bool PPCCTRLoops::convertToCTRLoop(Loop *L) {
359
bool MadeChange = false;
361
Triple TT = Triple(L->getHeader()->getParent()->getParent()->
363
if (!TT.isArch32Bit() && !TT.isArch64Bit())
364
return MadeChange; // Unknown arch. type.
609
366
// Process nested loops first.
610
for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I) {
611
Changed |= convertToCTRLoop(*I);
367
for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) {
368
MadeChange |= convertToCTRLoop(*I);
613
371
// If a nested loop has been converted, then we can't convert this loop.
618
SmallVector<MachineInstr *, 2> OldInsts;
619
// Are we able to determine the trip count for the loop?
620
CountValue *TripCount = getTripCount(L, OldInsts);
621
if (TripCount == 0) {
622
DEBUG(dbgs() << "failed to get trip count!\n");
626
if (TripCount->isImm()) {
627
DEBUG(dbgs() << "constant trip count: " << TripCount->getImm() << "\n");
629
// FIXME: We currently can't form 64-bit constants
630
// (including 32-bit unsigned constants)
631
if (!isInt<32>(TripCount->getImm()))
635
// Does the loop contain any invalid instructions?
636
if (containsInvalidInstruction(L)) {
639
MachineBasicBlock *Preheader = L->getLoopPreheader();
640
// No preheader means there's not place for the loop instr.
641
if (Preheader == 0) {
644
MachineBasicBlock::iterator InsertPos = Preheader->getFirstTerminator();
647
if (InsertPos != Preheader->end())
648
dl = InsertPos->getDebugLoc();
650
MachineBasicBlock *LastMBB = L->getExitingBlock();
651
// Don't generate CTR loop if the loop has more than one exit.
655
MachineBasicBlock::iterator LastI = LastMBB->getFirstTerminator();
657
// Determine the loop start.
658
MachineBasicBlock *LoopStart = L->getTopBlock();
659
if (L->getLoopLatch() != LastMBB) {
660
// When the exit and latch are not the same, use the latch block as the
662
// The loop start address is used only after the 1st iteration, and the loop
663
// latch may contains instrs. that need to be executed after the 1st iter.
664
LoopStart = L->getLoopLatch();
665
// Make sure the latch is a successor of the exit, otherwise it won't work.
666
if (!LastMBB->isSuccessor(LoopStart)) {
671
// Convert the loop to a CTR loop
672
DEBUG(dbgs() << "Change to CTR loop at "; L->dump());
674
MachineFunction *MF = LastMBB->getParent();
675
const PPCSubtarget &Subtarget = MF->getTarget().getSubtarget<PPCSubtarget>();
676
bool isPPC64 = Subtarget.isPPC64();
678
const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
679
const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
680
const TargetRegisterClass *RC = isPPC64 ? G8RC : GPRC;
683
if (TripCount->isReg()) {
684
// Create a copy of the loop count register.
685
const TargetRegisterClass *SrcRC =
686
MF->getRegInfo().getRegClass(TripCount->getReg());
687
CountReg = MF->getRegInfo().createVirtualRegister(RC);
688
unsigned CopyOp = (isPPC64 && GPRC->hasSubClassEq(SrcRC)) ?
689
(unsigned) PPC::EXTSW_32_64 :
690
(unsigned) TargetOpcode::COPY;
691
BuildMI(*Preheader, InsertPos, dl,
692
TII->get(CopyOp), CountReg).addReg(TripCount->getReg());
693
if (TripCount->isNeg()) {
694
unsigned CountReg1 = CountReg;
695
CountReg = MF->getRegInfo().createVirtualRegister(RC);
696
BuildMI(*Preheader, InsertPos, dl,
697
TII->get(isPPC64 ? PPC::NEG8 : PPC::NEG),
698
CountReg).addReg(CountReg1);
701
assert(TripCount->isImm() && "Expecting immedate vaule for trip count");
702
// Put the trip count in a register for transfer into the count register.
704
int64_t CountImm = TripCount->getImm();
705
if (TripCount->isNeg())
706
CountImm = -CountImm;
708
CountReg = MF->getRegInfo().createVirtualRegister(RC);
709
if (abs64(CountImm) > 0x7FFF) {
710
BuildMI(*Preheader, InsertPos, dl,
711
TII->get(isPPC64 ? PPC::LIS8 : PPC::LIS),
712
CountReg).addImm((CountImm >> 16) & 0xFFFF);
713
unsigned CountReg1 = CountReg;
714
CountReg = MF->getRegInfo().createVirtualRegister(RC);
715
BuildMI(*Preheader, InsertPos, dl,
716
TII->get(isPPC64 ? PPC::ORI8 : PPC::ORI),
717
CountReg).addReg(CountReg1).addImm(CountImm & 0xFFFF);
719
BuildMI(*Preheader, InsertPos, dl,
720
TII->get(isPPC64 ? PPC::LI8 : PPC::LI),
721
CountReg).addImm(CountImm);
725
// Add the mtctr instruction to the beginning of the loop.
726
BuildMI(*Preheader, InsertPos, dl,
727
TII->get(isPPC64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(CountReg,
728
TripCount->isImm() ? RegState::Kill : 0);
730
// Make sure the loop start always has a reference in the CFG. We need to
731
// create a BlockAddress operand to get this mechanism to work both the
732
// MachineBasicBlock and BasicBlock objects need the flag set.
733
LoopStart->setHasAddressTaken();
734
// This line is needed to set the hasAddressTaken flag on the BasicBlock
736
BlockAddress::get(const_cast<BasicBlock *>(LoopStart->getBasicBlock()));
738
// Replace the loop branch with a bdnz instruction.
739
dl = LastI->getDebugLoc();
740
const std::vector<MachineBasicBlock*> Blocks = L->getBlocks();
741
for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
742
MachineBasicBlock *MBB = Blocks[i];
743
if (MBB != Preheader)
744
MBB->addLiveIn(isPPC64 ? PPC::CTR8 : PPC::CTR);
747
// The loop ends with either:
748
// - a conditional branch followed by an unconditional branch, or
749
// - a conditional branch to the loop start.
750
assert(LastI->getOpcode() == PPC::BCC &&
751
"loop end must start with a BCC instruction");
752
// Either the BCC branches to the beginning of the loop, or it
753
// branches out of the loop and there is an unconditional branch
754
// to the start of the loop.
755
MachineBasicBlock *BranchTarget = LastI->getOperand(2).getMBB();
756
BuildMI(*LastMBB, LastI, dl,
757
TII->get((BranchTarget == LoopStart) ?
758
(isPPC64 ? PPC::BDNZ8 : PPC::BDNZ) :
759
(isPPC64 ? PPC::BDZ8 : PPC::BDZ))).addMBB(BranchTarget);
761
// Conditional branch; just delete it.
762
DEBUG(dbgs() << "Removing old branch: " << *LastI);
763
LastMBB->erase(LastI);
767
// The induction operation (add) and the comparison (cmpwi) may now be
768
// unneeded. If these are unneeded, then remove them.
769
for (unsigned i = 0; i < OldInsts.size(); ++i)
770
removeIfDead(OldInsts[i]);
376
// Stop trying after reaching the limit (if any).
377
int Limit = CTRLoopLimit;
379
if (Counter >= CTRLoopLimit)
385
// We don't want to spill/restore the counter register, and so we don't
386
// want to use the counter register if the loop contains calls.
387
for (Loop::block_iterator I = L->block_begin(), IE = L->block_end();
389
if (mightUseCTR(TT, *I))
392
SmallVector<BasicBlock*, 4> ExitingBlocks;
393
L->getExitingBlocks(ExitingBlocks);
395
BasicBlock *CountedExitBlock = 0;
396
const SCEV *ExitCount = 0;
397
BranchInst *CountedExitBranch = 0;
398
for (SmallVector<BasicBlock*, 4>::iterator I = ExitingBlocks.begin(),
399
IE = ExitingBlocks.end(); I != IE; ++I) {
400
const SCEV *EC = SE->getExitCount(L, *I);
401
DEBUG(dbgs() << "Exit Count for " << *L << " from block " <<
402
(*I)->getName() << ": " << *EC << "\n");
403
if (isa<SCEVCouldNotCompute>(EC))
405
if (const SCEVConstant *ConstEC = dyn_cast<SCEVConstant>(EC)) {
406
if (ConstEC->getValue()->isZero())
408
} else if (!SE->isLoopInvariant(EC, L))
411
// We now have a loop-invariant count of loop iterations (which is not the
412
// constant zero) for which we know that this loop will not exit via this
415
// We need to make sure that this block will run on every loop iteration.
416
// For this to be true, we must dominate all blocks with backedges. Such
417
// blocks are in-loop predecessors to the header block.
418
bool NotAlways = false;
419
for (pred_iterator PI = pred_begin(L->getHeader()),
420
PIE = pred_end(L->getHeader()); PI != PIE; ++PI) {
421
if (!L->contains(*PI))
424
if (!DT->dominates(*I, *PI)) {
433
// Make sure this blocks ends with a conditional branch.
434
Instruction *TI = (*I)->getTerminator();
438
if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
439
if (!BI->isConditional())
442
CountedExitBranch = BI;
446
// Note that this block may not be the loop latch block, even if the loop
447
// has a latch block.
448
CountedExitBlock = *I;
453
if (!CountedExitBlock)
456
BasicBlock *Preheader = L->getLoopPreheader();
458
// If we don't have a preheader, then insert one. If we already have a
459
// preheader, then we can use it (except if the preheader contains a use of
460
// the CTR register because some such uses might be reordered by the
461
// selection DAG after the mtctr instruction).
462
if (!Preheader || mightUseCTR(TT, Preheader))
463
Preheader = InsertPreheaderForLoop(L, this);
467
DEBUG(dbgs() << "Preheader for exit count: " << Preheader->getName() << "\n");
469
// Insert the count into the preheader and replace the condition used by the
473
SCEVExpander SCEVE(*SE, "loopcnt");
474
LLVMContext &C = SE->getContext();
475
Type *CountType = TT.isArch64Bit() ? Type::getInt64Ty(C) :
477
if (!ExitCount->getType()->isPointerTy() &&
478
ExitCount->getType() != CountType)
479
ExitCount = SE->getZeroExtendExpr(ExitCount, CountType);
480
ExitCount = SE->getAddExpr(ExitCount,
481
SE->getConstant(CountType, 1));
482
Value *ECValue = SCEVE.expandCodeFor(ExitCount, CountType,
483
Preheader->getTerminator());
485
IRBuilder<> CountBuilder(Preheader->getTerminator());
486
Module *M = Preheader->getParent()->getParent();
487
Value *MTCTRFunc = Intrinsic::getDeclaration(M, Intrinsic::ppc_mtctr,
489
CountBuilder.CreateCall(MTCTRFunc, ECValue);
491
IRBuilder<> CondBuilder(CountedExitBranch);
493
Intrinsic::getDeclaration(M, Intrinsic::ppc_is_decremented_ctr_nonzero);
494
Value *NewCond = CondBuilder.CreateCall(DecFunc);
495
Value *OldCond = CountedExitBranch->getCondition();
496
CountedExitBranch->setCondition(NewCond);
498
// The false branch must exit the loop.
499
if (!L->contains(CountedExitBranch->getSuccessor(0)))
500
CountedExitBranch->swapSuccessors();
502
// The old condition may be dead now, and may have even created a dead PHI
503
// (the original induction variable).
504
RecursivelyDeleteTriviallyDeadInstructions(OldCond);
505
DeleteDeadPHIs(CountedExitBlock);
512
static bool clobbersCTR(const MachineInstr *MI) {
513
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
514
const MachineOperand &MO = MI->getOperand(i);
516
if (MO.isDef() && (MO.getReg() == PPC::CTR || MO.getReg() == PPC::CTR8))
518
} else if (MO.isRegMask()) {
519
if (MO.clobbersPhysReg(PPC::CTR) || MO.clobbersPhysReg(PPC::CTR8))
527
static bool verifyCTRBranch(MachineBasicBlock *MBB,
528
MachineBasicBlock::iterator I) {
529
MachineBasicBlock::iterator BI = I;
530
SmallSet<MachineBasicBlock *, 16> Visited;
531
SmallVector<MachineBasicBlock *, 8> Preds;
534
if (I == MBB->begin()) {
546
for (MachineBasicBlock::iterator IE = MBB->begin();; --I) {
547
unsigned Opc = I->getOpcode();
548
if (Opc == PPC::MTCTRloop || Opc == PPC::MTCTR8loop) {
553
if (I != BI && clobbersCTR(I)) {
554
DEBUG(dbgs() << "BB#" << MBB->getNumber() << " (" <<
555
MBB->getFullName() << ") instruction " << *I <<
556
" clobbers CTR, invalidating " << "BB#" <<
557
BI->getParent()->getNumber() << " (" <<
558
BI->getParent()->getFullName() << ") instruction " <<
567
if (!CheckPreds && Preds.empty())
572
if (MachineFunction::iterator(MBB) == MBB->getParent()->begin()) {
573
DEBUG(dbgs() << "Unable to find a MTCTR instruction for BB#" <<
574
BI->getParent()->getNumber() << " (" <<
575
BI->getParent()->getFullName() << ") instruction " <<
580
for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
581
PIE = MBB->pred_end(); PI != PIE; ++PI)
582
Preds.push_back(*PI);
586
MBB = Preds.pop_back_val();
587
if (!Visited.count(MBB)) {
588
I = MBB->getLastNonDebugInstr();
591
} while (!Preds.empty());
596
bool PPCCTRLoopsVerify::runOnMachineFunction(MachineFunction &MF) {
597
MDT = &getAnalysis<MachineDominatorTree>();
599
// Verify that all bdnz/bdz instructions are dominated by a loop mtctr before
600
// any other instructions that might clobber the ctr register.
601
for (MachineFunction::iterator I = MF.begin(), IE = MF.end();
603
MachineBasicBlock *MBB = I;
604
if (!MDT->isReachableFromEntry(MBB))
607
for (MachineBasicBlock::iterator MII = MBB->getFirstTerminator(),
608
MIIE = MBB->end(); MII != MIIE; ++MII) {
609
unsigned Opc = MII->getOpcode();
610
if (Opc == PPC::BDNZ8 || Opc == PPC::BDNZ ||
611
Opc == PPC::BDZ8 || Opc == PPC::BDZ)
612
if (!verifyCTRBranch(MBB, MII))
613
llvm_unreachable("Invalid PPC CTR loop!");