1
//===- GVN.cpp - Eliminate redundant values and loads ---------------------===//
3
// The LLVM Compiler Infrastructure
5
// This file is distributed under the University of Illinois Open Source
6
// License. See LICENSE.TXT for details.
8
//===----------------------------------------------------------------------===//
10
// This pass performs global value numbering to eliminate fully redundant
11
// instructions. It also performs simple dead load elimination.
13
// Note that this pass does the value numbering itself; it does not use the
14
// ValueNumbering analysis passes.
16
//===----------------------------------------------------------------------===//
18
#define DEBUG_TYPE "gvn"
19
#include "llvm/Transforms/Scalar.h"
20
#include "llvm/BasicBlock.h"
21
#include "llvm/Constants.h"
22
#include "llvm/DerivedTypes.h"
23
#include "llvm/GlobalVariable.h"
24
#include "llvm/Function.h"
25
#include "llvm/IntrinsicInst.h"
26
#include "llvm/LLVMContext.h"
27
#include "llvm/Operator.h"
28
#include "llvm/Value.h"
29
#include "llvm/ADT/DenseMap.h"
30
#include "llvm/ADT/DepthFirstIterator.h"
31
#include "llvm/ADT/PostOrderIterator.h"
32
#include "llvm/ADT/SmallPtrSet.h"
33
#include "llvm/ADT/SmallVector.h"
34
#include "llvm/ADT/Statistic.h"
35
#include "llvm/Analysis/AliasAnalysis.h"
36
#include "llvm/Analysis/ConstantFolding.h"
37
#include "llvm/Analysis/Dominators.h"
38
#include "llvm/Analysis/MemoryBuiltins.h"
39
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
40
#include "llvm/Analysis/PHITransAddr.h"
41
#include "llvm/Support/CFG.h"
42
#include "llvm/Support/CommandLine.h"
43
#include "llvm/Support/Debug.h"
44
#include "llvm/Support/ErrorHandling.h"
45
#include "llvm/Support/GetElementPtrTypeIterator.h"
46
#include "llvm/Support/IRBuilder.h"
47
#include "llvm/Support/raw_ostream.h"
48
#include "llvm/Target/TargetData.h"
49
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
50
#include "llvm/Transforms/Utils/Local.h"
51
#include "llvm/Transforms/Utils/SSAUpdater.h"
54
STATISTIC(NumGVNInstr, "Number of instructions deleted");
55
STATISTIC(NumGVNLoad, "Number of loads deleted");
56
STATISTIC(NumGVNPRE, "Number of instructions PRE'd");
57
STATISTIC(NumGVNBlocks, "Number of blocks merged");
58
STATISTIC(NumPRELoad, "Number of loads PRE'd");
60
static cl::opt<bool> EnablePRE("enable-pre",
61
cl::init(true), cl::Hidden);
62
static cl::opt<bool> EnableLoadPRE("enable-load-pre", cl::init(true));
63
static cl::opt<bool> EnableFullLoadPRE("enable-full-load-pre", cl::init(false));
65
//===----------------------------------------------------------------------===//
67
//===----------------------------------------------------------------------===//
69
/// This class holds the mapping between values and value numbers. It is used
70
/// as an efficient mechanism to determine the expression-wise equivalence of
74
enum ExpressionOpcode {
75
ADD = Instruction::Add,
76
FADD = Instruction::FAdd,
77
SUB = Instruction::Sub,
78
FSUB = Instruction::FSub,
79
MUL = Instruction::Mul,
80
FMUL = Instruction::FMul,
81
UDIV = Instruction::UDiv,
82
SDIV = Instruction::SDiv,
83
FDIV = Instruction::FDiv,
84
UREM = Instruction::URem,
85
SREM = Instruction::SRem,
86
FREM = Instruction::FRem,
87
SHL = Instruction::Shl,
88
LSHR = Instruction::LShr,
89
ASHR = Instruction::AShr,
90
AND = Instruction::And,
92
XOR = Instruction::Xor,
93
TRUNC = Instruction::Trunc,
94
ZEXT = Instruction::ZExt,
95
SEXT = Instruction::SExt,
96
FPTOUI = Instruction::FPToUI,
97
FPTOSI = Instruction::FPToSI,
98
UITOFP = Instruction::UIToFP,
99
SITOFP = Instruction::SIToFP,
100
FPTRUNC = Instruction::FPTrunc,
101
FPEXT = Instruction::FPExt,
102
PTRTOINT = Instruction::PtrToInt,
103
INTTOPTR = Instruction::IntToPtr,
104
BITCAST = Instruction::BitCast,
105
ICMPEQ, ICMPNE, ICMPUGT, ICMPUGE, ICMPULT, ICMPULE,
106
ICMPSGT, ICMPSGE, ICMPSLT, ICMPSLE, FCMPOEQ,
107
FCMPOGT, FCMPOGE, FCMPOLT, FCMPOLE, FCMPONE,
108
FCMPORD, FCMPUNO, FCMPUEQ, FCMPUGT, FCMPUGE,
109
FCMPULT, FCMPULE, FCMPUNE, EXTRACT, INSERT,
110
SHUFFLE, SELECT, GEP, CALL, CONSTANT,
111
INSERTVALUE, EXTRACTVALUE, EMPTY, TOMBSTONE };
113
ExpressionOpcode opcode;
115
SmallVector<uint32_t, 4> varargs;
119
Expression(ExpressionOpcode o) : opcode(o) { }
121
bool operator==(const Expression &other) const {
122
if (opcode != other.opcode)
124
else if (opcode == EMPTY || opcode == TOMBSTONE)
126
else if (type != other.type)
128
else if (function != other.function)
131
if (varargs.size() != other.varargs.size())
134
for (size_t i = 0; i < varargs.size(); ++i)
135
if (varargs[i] != other.varargs[i])
142
bool operator!=(const Expression &other) const {
143
return !(*this == other);
149
DenseMap<Value*, uint32_t> valueNumbering;
150
DenseMap<Expression, uint32_t> expressionNumbering;
152
MemoryDependenceAnalysis* MD;
155
uint32_t nextValueNumber;
157
Expression::ExpressionOpcode getOpcode(CmpInst* C);
158
Expression create_expression(BinaryOperator* BO);
159
Expression create_expression(CmpInst* C);
160
Expression create_expression(ShuffleVectorInst* V);
161
Expression create_expression(ExtractElementInst* C);
162
Expression create_expression(InsertElementInst* V);
163
Expression create_expression(SelectInst* V);
164
Expression create_expression(CastInst* C);
165
Expression create_expression(GetElementPtrInst* G);
166
Expression create_expression(CallInst* C);
167
Expression create_expression(Constant* C);
168
Expression create_expression(ExtractValueInst* C);
169
Expression create_expression(InsertValueInst* C);
171
uint32_t lookup_or_add_call(CallInst* C);
173
ValueTable() : nextValueNumber(1) { }
174
uint32_t lookup_or_add(Value *V);
175
uint32_t lookup(Value *V) const;
176
void add(Value *V, uint32_t num);
178
void erase(Value *v);
180
void setAliasAnalysis(AliasAnalysis* A) { AA = A; }
181
AliasAnalysis *getAliasAnalysis() const { return AA; }
182
void setMemDep(MemoryDependenceAnalysis* M) { MD = M; }
183
void setDomTree(DominatorTree* D) { DT = D; }
184
uint32_t getNextUnusedValueNumber() { return nextValueNumber; }
185
void verifyRemoved(const Value *) const;
190
template <> struct DenseMapInfo<Expression> {
191
static inline Expression getEmptyKey() {
192
return Expression(Expression::EMPTY);
195
static inline Expression getTombstoneKey() {
196
return Expression(Expression::TOMBSTONE);
199
static unsigned getHashValue(const Expression e) {
200
unsigned hash = e.opcode;
202
hash = ((unsigned)((uintptr_t)e.type >> 4) ^
203
(unsigned)((uintptr_t)e.type >> 9));
205
for (SmallVector<uint32_t, 4>::const_iterator I = e.varargs.begin(),
206
E = e.varargs.end(); I != E; ++I)
207
hash = *I + hash * 37;
209
hash = ((unsigned)((uintptr_t)e.function >> 4) ^
210
(unsigned)((uintptr_t)e.function >> 9)) +
215
static bool isEqual(const Expression &LHS, const Expression &RHS) {
221
struct isPodLike<Expression> { static const bool value = true; };
225
//===----------------------------------------------------------------------===//
226
// ValueTable Internal Functions
227
//===----------------------------------------------------------------------===//
229
Expression::ExpressionOpcode ValueTable::getOpcode(CmpInst* C) {
230
if (isa<ICmpInst>(C)) {
231
switch (C->getPredicate()) {
232
default: // THIS SHOULD NEVER HAPPEN
233
llvm_unreachable("Comparison with unknown predicate?");
234
case ICmpInst::ICMP_EQ: return Expression::ICMPEQ;
235
case ICmpInst::ICMP_NE: return Expression::ICMPNE;
236
case ICmpInst::ICMP_UGT: return Expression::ICMPUGT;
237
case ICmpInst::ICMP_UGE: return Expression::ICMPUGE;
238
case ICmpInst::ICMP_ULT: return Expression::ICMPULT;
239
case ICmpInst::ICMP_ULE: return Expression::ICMPULE;
240
case ICmpInst::ICMP_SGT: return Expression::ICMPSGT;
241
case ICmpInst::ICMP_SGE: return Expression::ICMPSGE;
242
case ICmpInst::ICMP_SLT: return Expression::ICMPSLT;
243
case ICmpInst::ICMP_SLE: return Expression::ICMPSLE;
246
switch (C->getPredicate()) {
247
default: // THIS SHOULD NEVER HAPPEN
248
llvm_unreachable("Comparison with unknown predicate?");
249
case FCmpInst::FCMP_OEQ: return Expression::FCMPOEQ;
250
case FCmpInst::FCMP_OGT: return Expression::FCMPOGT;
251
case FCmpInst::FCMP_OGE: return Expression::FCMPOGE;
252
case FCmpInst::FCMP_OLT: return Expression::FCMPOLT;
253
case FCmpInst::FCMP_OLE: return Expression::FCMPOLE;
254
case FCmpInst::FCMP_ONE: return Expression::FCMPONE;
255
case FCmpInst::FCMP_ORD: return Expression::FCMPORD;
256
case FCmpInst::FCMP_UNO: return Expression::FCMPUNO;
257
case FCmpInst::FCMP_UEQ: return Expression::FCMPUEQ;
258
case FCmpInst::FCMP_UGT: return Expression::FCMPUGT;
259
case FCmpInst::FCMP_UGE: return Expression::FCMPUGE;
260
case FCmpInst::FCMP_ULT: return Expression::FCMPULT;
261
case FCmpInst::FCMP_ULE: return Expression::FCMPULE;
262
case FCmpInst::FCMP_UNE: return Expression::FCMPUNE;
267
Expression ValueTable::create_expression(CallInst* C) {
270
e.type = C->getType();
271
e.function = C->getCalledFunction();
272
e.opcode = Expression::CALL;
274
for (CallInst::op_iterator I = C->op_begin()+1, E = C->op_end();
276
e.varargs.push_back(lookup_or_add(*I));
281
Expression ValueTable::create_expression(BinaryOperator* BO) {
283
e.varargs.push_back(lookup_or_add(BO->getOperand(0)));
284
e.varargs.push_back(lookup_or_add(BO->getOperand(1)));
286
e.type = BO->getType();
287
e.opcode = static_cast<Expression::ExpressionOpcode>(BO->getOpcode());
292
Expression ValueTable::create_expression(CmpInst* C) {
295
e.varargs.push_back(lookup_or_add(C->getOperand(0)));
296
e.varargs.push_back(lookup_or_add(C->getOperand(1)));
298
e.type = C->getType();
299
e.opcode = getOpcode(C);
304
Expression ValueTable::create_expression(CastInst* C) {
307
e.varargs.push_back(lookup_or_add(C->getOperand(0)));
309
e.type = C->getType();
310
e.opcode = static_cast<Expression::ExpressionOpcode>(C->getOpcode());
315
Expression ValueTable::create_expression(ShuffleVectorInst* S) {
318
e.varargs.push_back(lookup_or_add(S->getOperand(0)));
319
e.varargs.push_back(lookup_or_add(S->getOperand(1)));
320
e.varargs.push_back(lookup_or_add(S->getOperand(2)));
322
e.type = S->getType();
323
e.opcode = Expression::SHUFFLE;
328
Expression ValueTable::create_expression(ExtractElementInst* E) {
331
e.varargs.push_back(lookup_or_add(E->getOperand(0)));
332
e.varargs.push_back(lookup_or_add(E->getOperand(1)));
334
e.type = E->getType();
335
e.opcode = Expression::EXTRACT;
340
Expression ValueTable::create_expression(InsertElementInst* I) {
343
e.varargs.push_back(lookup_or_add(I->getOperand(0)));
344
e.varargs.push_back(lookup_or_add(I->getOperand(1)));
345
e.varargs.push_back(lookup_or_add(I->getOperand(2)));
347
e.type = I->getType();
348
e.opcode = Expression::INSERT;
353
Expression ValueTable::create_expression(SelectInst* I) {
356
e.varargs.push_back(lookup_or_add(I->getCondition()));
357
e.varargs.push_back(lookup_or_add(I->getTrueValue()));
358
e.varargs.push_back(lookup_or_add(I->getFalseValue()));
360
e.type = I->getType();
361
e.opcode = Expression::SELECT;
366
Expression ValueTable::create_expression(GetElementPtrInst* G) {
369
e.varargs.push_back(lookup_or_add(G->getPointerOperand()));
371
e.type = G->getType();
372
e.opcode = Expression::GEP;
374
for (GetElementPtrInst::op_iterator I = G->idx_begin(), E = G->idx_end();
376
e.varargs.push_back(lookup_or_add(*I));
381
Expression ValueTable::create_expression(ExtractValueInst* E) {
384
e.varargs.push_back(lookup_or_add(E->getAggregateOperand()));
385
for (ExtractValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end();
387
e.varargs.push_back(*II);
389
e.type = E->getType();
390
e.opcode = Expression::EXTRACTVALUE;
395
Expression ValueTable::create_expression(InsertValueInst* E) {
398
e.varargs.push_back(lookup_or_add(E->getAggregateOperand()));
399
e.varargs.push_back(lookup_or_add(E->getInsertedValueOperand()));
400
for (InsertValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end();
402
e.varargs.push_back(*II);
404
e.type = E->getType();
405
e.opcode = Expression::INSERTVALUE;
410
//===----------------------------------------------------------------------===//
411
// ValueTable External Functions
412
//===----------------------------------------------------------------------===//
414
/// add - Insert a value into the table with a specified value number.
415
void ValueTable::add(Value *V, uint32_t num) {
416
valueNumbering.insert(std::make_pair(V, num));
419
uint32_t ValueTable::lookup_or_add_call(CallInst* C) {
420
if (AA->doesNotAccessMemory(C)) {
421
Expression exp = create_expression(C);
422
uint32_t& e = expressionNumbering[exp];
423
if (!e) e = nextValueNumber++;
424
valueNumbering[C] = e;
426
} else if (AA->onlyReadsMemory(C)) {
427
Expression exp = create_expression(C);
428
uint32_t& e = expressionNumbering[exp];
430
e = nextValueNumber++;
431
valueNumbering[C] = e;
435
e = nextValueNumber++;
436
valueNumbering[C] = e;
440
MemDepResult local_dep = MD->getDependency(C);
442
if (!local_dep.isDef() && !local_dep.isNonLocal()) {
443
valueNumbering[C] = nextValueNumber;
444
return nextValueNumber++;
447
if (local_dep.isDef()) {
448
CallInst* local_cdep = cast<CallInst>(local_dep.getInst());
450
if (local_cdep->getNumOperands() != C->getNumOperands()) {
451
valueNumbering[C] = nextValueNumber;
452
return nextValueNumber++;
455
for (unsigned i = 1; i < C->getNumOperands(); ++i) {
456
uint32_t c_vn = lookup_or_add(C->getOperand(i));
457
uint32_t cd_vn = lookup_or_add(local_cdep->getOperand(i));
459
valueNumbering[C] = nextValueNumber;
460
return nextValueNumber++;
464
uint32_t v = lookup_or_add(local_cdep);
465
valueNumbering[C] = v;
470
const MemoryDependenceAnalysis::NonLocalDepInfo &deps =
471
MD->getNonLocalCallDependency(CallSite(C));
472
// FIXME: call/call dependencies for readonly calls should return def, not
473
// clobber! Move the checking logic to MemDep!
476
// Check to see if we have a single dominating call instruction that is
478
for (unsigned i = 0, e = deps.size(); i != e; ++i) {
479
const NonLocalDepEntry *I = &deps[i];
480
// Ignore non-local dependencies.
481
if (I->getResult().isNonLocal())
484
// We don't handle non-depedencies. If we already have a call, reject
485
// instruction dependencies.
486
if (I->getResult().isClobber() || cdep != 0) {
491
CallInst *NonLocalDepCall = dyn_cast<CallInst>(I->getResult().getInst());
492
// FIXME: All duplicated with non-local case.
493
if (NonLocalDepCall && DT->properlyDominates(I->getBB(), C->getParent())){
494
cdep = NonLocalDepCall;
503
valueNumbering[C] = nextValueNumber;
504
return nextValueNumber++;
507
if (cdep->getNumOperands() != C->getNumOperands()) {
508
valueNumbering[C] = nextValueNumber;
509
return nextValueNumber++;
511
for (unsigned i = 1; i < C->getNumOperands(); ++i) {
512
uint32_t c_vn = lookup_or_add(C->getOperand(i));
513
uint32_t cd_vn = lookup_or_add(cdep->getOperand(i));
515
valueNumbering[C] = nextValueNumber;
516
return nextValueNumber++;
520
uint32_t v = lookup_or_add(cdep);
521
valueNumbering[C] = v;
525
valueNumbering[C] = nextValueNumber;
526
return nextValueNumber++;
530
/// lookup_or_add - Returns the value number for the specified value, assigning
531
/// it a new number if it did not have one before.
532
uint32_t ValueTable::lookup_or_add(Value *V) {
533
DenseMap<Value*, uint32_t>::iterator VI = valueNumbering.find(V);
534
if (VI != valueNumbering.end())
537
if (!isa<Instruction>(V)) {
538
valueNumbering[V] = nextValueNumber;
539
return nextValueNumber++;
542
Instruction* I = cast<Instruction>(V);
544
switch (I->getOpcode()) {
545
case Instruction::Call:
546
return lookup_or_add_call(cast<CallInst>(I));
547
case Instruction::Add:
548
case Instruction::FAdd:
549
case Instruction::Sub:
550
case Instruction::FSub:
551
case Instruction::Mul:
552
case Instruction::FMul:
553
case Instruction::UDiv:
554
case Instruction::SDiv:
555
case Instruction::FDiv:
556
case Instruction::URem:
557
case Instruction::SRem:
558
case Instruction::FRem:
559
case Instruction::Shl:
560
case Instruction::LShr:
561
case Instruction::AShr:
562
case Instruction::And:
563
case Instruction::Or :
564
case Instruction::Xor:
565
exp = create_expression(cast<BinaryOperator>(I));
567
case Instruction::ICmp:
568
case Instruction::FCmp:
569
exp = create_expression(cast<CmpInst>(I));
571
case Instruction::Trunc:
572
case Instruction::ZExt:
573
case Instruction::SExt:
574
case Instruction::FPToUI:
575
case Instruction::FPToSI:
576
case Instruction::UIToFP:
577
case Instruction::SIToFP:
578
case Instruction::FPTrunc:
579
case Instruction::FPExt:
580
case Instruction::PtrToInt:
581
case Instruction::IntToPtr:
582
case Instruction::BitCast:
583
exp = create_expression(cast<CastInst>(I));
585
case Instruction::Select:
586
exp = create_expression(cast<SelectInst>(I));
588
case Instruction::ExtractElement:
589
exp = create_expression(cast<ExtractElementInst>(I));
591
case Instruction::InsertElement:
592
exp = create_expression(cast<InsertElementInst>(I));
594
case Instruction::ShuffleVector:
595
exp = create_expression(cast<ShuffleVectorInst>(I));
597
case Instruction::ExtractValue:
598
exp = create_expression(cast<ExtractValueInst>(I));
600
case Instruction::InsertValue:
601
exp = create_expression(cast<InsertValueInst>(I));
603
case Instruction::GetElementPtr:
604
exp = create_expression(cast<GetElementPtrInst>(I));
607
valueNumbering[V] = nextValueNumber;
608
return nextValueNumber++;
611
uint32_t& e = expressionNumbering[exp];
612
if (!e) e = nextValueNumber++;
613
valueNumbering[V] = e;
617
/// lookup - Returns the value number of the specified value. Fails if
618
/// the value has not yet been numbered.
619
uint32_t ValueTable::lookup(Value *V) const {
620
DenseMap<Value*, uint32_t>::const_iterator VI = valueNumbering.find(V);
621
assert(VI != valueNumbering.end() && "Value not numbered?");
625
/// clear - Remove all entries from the ValueTable
626
void ValueTable::clear() {
627
valueNumbering.clear();
628
expressionNumbering.clear();
632
/// erase - Remove a value from the value numbering
633
void ValueTable::erase(Value *V) {
634
valueNumbering.erase(V);
637
/// verifyRemoved - Verify that the value is removed from all internal data
639
void ValueTable::verifyRemoved(const Value *V) const {
640
for (DenseMap<Value*, uint32_t>::const_iterator
641
I = valueNumbering.begin(), E = valueNumbering.end(); I != E; ++I) {
642
assert(I->first != V && "Inst still occurs in value numbering map!");
646
//===----------------------------------------------------------------------===//
648
//===----------------------------------------------------------------------===//
651
struct ValueNumberScope {
652
ValueNumberScope* parent;
653
DenseMap<uint32_t, Value*> table;
655
ValueNumberScope(ValueNumberScope* p) : parent(p) { }
661
class GVN : public FunctionPass {
662
bool runOnFunction(Function &F);
664
static char ID; // Pass identification, replacement for typeid
665
explicit GVN(bool noloads = false)
666
: FunctionPass(&ID), NoLoads(noloads), MD(0) { }
670
MemoryDependenceAnalysis *MD;
674
DenseMap<BasicBlock*, ValueNumberScope*> localAvail;
676
// List of critical edges to be split between iterations.
677
SmallVector<std::pair<TerminatorInst*, unsigned>, 4> toSplit;
679
// This transformation requires dominator postdominator info
680
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
681
AU.addRequired<DominatorTree>();
683
AU.addRequired<MemoryDependenceAnalysis>();
684
AU.addRequired<AliasAnalysis>();
686
AU.addPreserved<DominatorTree>();
687
AU.addPreserved<AliasAnalysis>();
691
// FIXME: eliminate or document these better
692
bool processLoad(LoadInst* L,
693
SmallVectorImpl<Instruction*> &toErase);
694
bool processInstruction(Instruction *I,
695
SmallVectorImpl<Instruction*> &toErase);
696
bool processNonLocalLoad(LoadInst* L,
697
SmallVectorImpl<Instruction*> &toErase);
698
bool processBlock(BasicBlock *BB);
699
void dump(DenseMap<uint32_t, Value*>& d);
700
bool iterateOnFunction(Function &F);
701
Value *CollapsePhi(PHINode* p);
702
bool performPRE(Function& F);
703
Value *lookupNumber(BasicBlock *BB, uint32_t num);
704
void cleanupGlobalSets();
705
void verifyRemoved(const Instruction *I) const;
706
bool splitCriticalEdges();
712
// createGVNPass - The public interface to this file...
713
FunctionPass *llvm::createGVNPass(bool NoLoads) {
714
return new GVN(NoLoads);
717
static RegisterPass<GVN> X("gvn",
718
"Global Value Numbering");
720
void GVN::dump(DenseMap<uint32_t, Value*>& d) {
722
for (DenseMap<uint32_t, Value*>::iterator I = d.begin(),
723
E = d.end(); I != E; ++I) {
724
errs() << I->first << "\n";
730
static bool isSafeReplacement(PHINode* p, Instruction *inst) {
731
if (!isa<PHINode>(inst))
734
for (Instruction::use_iterator UI = p->use_begin(), E = p->use_end();
736
if (PHINode* use_phi = dyn_cast<PHINode>(UI))
737
if (use_phi->getParent() == inst->getParent())
743
Value *GVN::CollapsePhi(PHINode *PN) {
744
Value *ConstVal = PN->hasConstantValue(DT);
745
if (!ConstVal) return 0;
747
Instruction *Inst = dyn_cast<Instruction>(ConstVal);
751
if (DT->dominates(Inst, PN))
752
if (isSafeReplacement(PN, Inst))
757
/// IsValueFullyAvailableInBlock - Return true if we can prove that the value
758
/// we're analyzing is fully available in the specified block. As we go, keep
759
/// track of which blocks we know are fully alive in FullyAvailableBlocks. This
760
/// map is actually a tri-state map with the following values:
761
/// 0) we know the block *is not* fully available.
762
/// 1) we know the block *is* fully available.
763
/// 2) we do not know whether the block is fully available or not, but we are
764
/// currently speculating that it will be.
765
/// 3) we are speculating for this block and have used that to speculate for
767
static bool IsValueFullyAvailableInBlock(BasicBlock *BB,
768
DenseMap<BasicBlock*, char> &FullyAvailableBlocks) {
769
// Optimistically assume that the block is fully available and check to see
770
// if we already know about this block in one lookup.
771
std::pair<DenseMap<BasicBlock*, char>::iterator, char> IV =
772
FullyAvailableBlocks.insert(std::make_pair(BB, 2));
774
// If the entry already existed for this block, return the precomputed value.
776
// If this is a speculative "available" value, mark it as being used for
777
// speculation of other blocks.
778
if (IV.first->second == 2)
779
IV.first->second = 3;
780
return IV.first->second != 0;
783
// Otherwise, see if it is fully available in all predecessors.
784
pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
786
// If this block has no predecessors, it isn't live-in here.
788
goto SpeculationFailure;
790
for (; PI != PE; ++PI)
791
// If the value isn't fully available in one of our predecessors, then it
792
// isn't fully available in this block either. Undo our previous
793
// optimistic assumption and bail out.
794
if (!IsValueFullyAvailableInBlock(*PI, FullyAvailableBlocks))
795
goto SpeculationFailure;
799
// SpeculationFailure - If we get here, we found out that this is not, after
800
// all, a fully-available block. We have a problem if we speculated on this and
801
// used the speculation to mark other blocks as available.
803
char &BBVal = FullyAvailableBlocks[BB];
805
// If we didn't speculate on this, just return with it set to false.
811
// If we did speculate on this value, we could have blocks set to 1 that are
812
// incorrect. Walk the (transitive) successors of this block and mark them as
814
SmallVector<BasicBlock*, 32> BBWorklist;
815
BBWorklist.push_back(BB);
818
BasicBlock *Entry = BBWorklist.pop_back_val();
819
// Note that this sets blocks to 0 (unavailable) if they happen to not
820
// already be in FullyAvailableBlocks. This is safe.
821
char &EntryVal = FullyAvailableBlocks[Entry];
822
if (EntryVal == 0) continue; // Already unavailable.
824
// Mark as unavailable.
827
for (succ_iterator I = succ_begin(Entry), E = succ_end(Entry); I != E; ++I)
828
BBWorklist.push_back(*I);
829
} while (!BBWorklist.empty());
835
/// CanCoerceMustAliasedValueToLoad - Return true if
836
/// CoerceAvailableValueToLoadType will succeed.
837
static bool CanCoerceMustAliasedValueToLoad(Value *StoredVal,
839
const TargetData &TD) {
840
// If the loaded or stored value is an first class array or struct, don't try
841
// to transform them. We need to be able to bitcast to integer.
842
if (LoadTy->isStructTy() || LoadTy->isArrayTy() ||
843
StoredVal->getType()->isStructTy() ||
844
StoredVal->getType()->isArrayTy())
847
// The store has to be at least as big as the load.
848
if (TD.getTypeSizeInBits(StoredVal->getType()) <
849
TD.getTypeSizeInBits(LoadTy))
856
/// CoerceAvailableValueToLoadType - If we saw a store of a value to memory, and
857
/// then a load from a must-aliased pointer of a different type, try to coerce
858
/// the stored value. LoadedTy is the type of the load we want to replace and
859
/// InsertPt is the place to insert new instructions.
861
/// If we can't do it, return null.
862
static Value *CoerceAvailableValueToLoadType(Value *StoredVal,
863
const Type *LoadedTy,
864
Instruction *InsertPt,
865
const TargetData &TD) {
866
if (!CanCoerceMustAliasedValueToLoad(StoredVal, LoadedTy, TD))
869
const Type *StoredValTy = StoredVal->getType();
871
uint64_t StoreSize = TD.getTypeSizeInBits(StoredValTy);
872
uint64_t LoadSize = TD.getTypeSizeInBits(LoadedTy);
874
// If the store and reload are the same size, we can always reuse it.
875
if (StoreSize == LoadSize) {
876
if (StoredValTy->isPointerTy() && LoadedTy->isPointerTy()) {
877
// Pointer to Pointer -> use bitcast.
878
return new BitCastInst(StoredVal, LoadedTy, "", InsertPt);
881
// Convert source pointers to integers, which can be bitcast.
882
if (StoredValTy->isPointerTy()) {
883
StoredValTy = TD.getIntPtrType(StoredValTy->getContext());
884
StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt);
887
const Type *TypeToCastTo = LoadedTy;
888
if (TypeToCastTo->isPointerTy())
889
TypeToCastTo = TD.getIntPtrType(StoredValTy->getContext());
891
if (StoredValTy != TypeToCastTo)
892
StoredVal = new BitCastInst(StoredVal, TypeToCastTo, "", InsertPt);
894
// Cast to pointer if the load needs a pointer type.
895
if (LoadedTy->isPointerTy())
896
StoredVal = new IntToPtrInst(StoredVal, LoadedTy, "", InsertPt);
901
// If the loaded value is smaller than the available value, then we can
902
// extract out a piece from it. If the available value is too small, then we
903
// can't do anything.
904
assert(StoreSize >= LoadSize && "CanCoerceMustAliasedValueToLoad fail");
906
// Convert source pointers to integers, which can be manipulated.
907
if (StoredValTy->isPointerTy()) {
908
StoredValTy = TD.getIntPtrType(StoredValTy->getContext());
909
StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt);
912
// Convert vectors and fp to integer, which can be manipulated.
913
if (!StoredValTy->isIntegerTy()) {
914
StoredValTy = IntegerType::get(StoredValTy->getContext(), StoreSize);
915
StoredVal = new BitCastInst(StoredVal, StoredValTy, "", InsertPt);
918
// If this is a big-endian system, we need to shift the value down to the low
919
// bits so that a truncate will work.
920
if (TD.isBigEndian()) {
921
Constant *Val = ConstantInt::get(StoredVal->getType(), StoreSize-LoadSize);
922
StoredVal = BinaryOperator::CreateLShr(StoredVal, Val, "tmp", InsertPt);
925
// Truncate the integer to the right size now.
926
const Type *NewIntTy = IntegerType::get(StoredValTy->getContext(), LoadSize);
927
StoredVal = new TruncInst(StoredVal, NewIntTy, "trunc", InsertPt);
929
if (LoadedTy == NewIntTy)
932
// If the result is a pointer, inttoptr.
933
if (LoadedTy->isPointerTy())
934
return new IntToPtrInst(StoredVal, LoadedTy, "inttoptr", InsertPt);
936
// Otherwise, bitcast.
937
return new BitCastInst(StoredVal, LoadedTy, "bitcast", InsertPt);
940
/// GetBaseWithConstantOffset - Analyze the specified pointer to see if it can
941
/// be expressed as a base pointer plus a constant offset. Return the base and
942
/// offset to the caller.
943
static Value *GetBaseWithConstantOffset(Value *Ptr, int64_t &Offset,
944
const TargetData &TD) {
945
Operator *PtrOp = dyn_cast<Operator>(Ptr);
946
if (PtrOp == 0) return Ptr;
948
// Just look through bitcasts.
949
if (PtrOp->getOpcode() == Instruction::BitCast)
950
return GetBaseWithConstantOffset(PtrOp->getOperand(0), Offset, TD);
952
// If this is a GEP with constant indices, we can look through it.
953
GEPOperator *GEP = dyn_cast<GEPOperator>(PtrOp);
954
if (GEP == 0 || !GEP->hasAllConstantIndices()) return Ptr;
956
gep_type_iterator GTI = gep_type_begin(GEP);
957
for (User::op_iterator I = GEP->idx_begin(), E = GEP->idx_end(); I != E;
959
ConstantInt *OpC = cast<ConstantInt>(*I);
960
if (OpC->isZero()) continue;
962
// Handle a struct and array indices which add their offset to the pointer.
963
if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
964
Offset += TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue());
966
uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType());
967
Offset += OpC->getSExtValue()*Size;
971
// Re-sign extend from the pointer size if needed to get overflow edge cases
973
unsigned PtrSize = TD.getPointerSizeInBits();
975
Offset = (Offset << (64-PtrSize)) >> (64-PtrSize);
977
return GetBaseWithConstantOffset(GEP->getPointerOperand(), Offset, TD);
981
/// AnalyzeLoadFromClobberingWrite - This function is called when we have a
982
/// memdep query of a load that ends up being a clobbering memory write (store,
983
/// memset, memcpy, memmove). This means that the write *may* provide bits used
984
/// by the load but we can't be sure because the pointers don't mustalias.
986
/// Check this case to see if there is anything more we can do before we give
987
/// up. This returns -1 if we have to give up, or a byte number in the stored
988
/// value of the piece that feeds the load.
989
static int AnalyzeLoadFromClobberingWrite(const Type *LoadTy, Value *LoadPtr,
991
uint64_t WriteSizeInBits,
992
const TargetData &TD) {
993
// If the loaded or stored value is an first class array or struct, don't try
994
// to transform them. We need to be able to bitcast to integer.
995
if (LoadTy->isStructTy() || LoadTy->isArrayTy())
998
int64_t StoreOffset = 0, LoadOffset = 0;
999
Value *StoreBase = GetBaseWithConstantOffset(WritePtr, StoreOffset, TD);
1001
GetBaseWithConstantOffset(LoadPtr, LoadOffset, TD);
1002
if (StoreBase != LoadBase)
1005
// If the load and store are to the exact same address, they should have been
1006
// a must alias. AA must have gotten confused.
1007
// FIXME: Study to see if/when this happens.
1008
if (LoadOffset == StoreOffset) {
1010
dbgs() << "STORE/LOAD DEP WITH COMMON POINTER MISSED:\n"
1011
<< "Base = " << *StoreBase << "\n"
1012
<< "Store Ptr = " << *WritePtr << "\n"
1013
<< "Store Offs = " << StoreOffset << "\n"
1014
<< "Load Ptr = " << *LoadPtr << "\n";
1020
// If the load and store don't overlap at all, the store doesn't provide
1021
// anything to the load. In this case, they really don't alias at all, AA
1022
// must have gotten confused.
1023
// FIXME: Investigate cases where this bails out, e.g. rdar://7238614. Then
1024
// remove this check, as it is duplicated with what we have below.
1025
uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy);
1027
if ((WriteSizeInBits & 7) | (LoadSize & 7))
1029
uint64_t StoreSize = WriteSizeInBits >> 3; // Convert to bytes.
1033
bool isAAFailure = false;
1034
if (StoreOffset < LoadOffset) {
1035
isAAFailure = StoreOffset+int64_t(StoreSize) <= LoadOffset;
1037
isAAFailure = LoadOffset+int64_t(LoadSize) <= StoreOffset;
1041
dbgs() << "STORE LOAD DEP WITH COMMON BASE:\n"
1042
<< "Base = " << *StoreBase << "\n"
1043
<< "Store Ptr = " << *WritePtr << "\n"
1044
<< "Store Offs = " << StoreOffset << "\n"
1045
<< "Load Ptr = " << *LoadPtr << "\n";
1051
// If the Load isn't completely contained within the stored bits, we don't
1052
// have all the bits to feed it. We could do something crazy in the future
1053
// (issue a smaller load then merge the bits in) but this seems unlikely to be
1055
if (StoreOffset > LoadOffset ||
1056
StoreOffset+StoreSize < LoadOffset+LoadSize)
1059
// Okay, we can do this transformation. Return the number of bytes into the
1060
// store that the load is.
1061
return LoadOffset-StoreOffset;
1064
/// AnalyzeLoadFromClobberingStore - This function is called when we have a
1065
/// memdep query of a load that ends up being a clobbering store.
1066
static int AnalyzeLoadFromClobberingStore(const Type *LoadTy, Value *LoadPtr,
1068
const TargetData &TD) {
1069
// Cannot handle reading from store of first-class aggregate yet.
1070
if (DepSI->getOperand(0)->getType()->isStructTy() ||
1071
DepSI->getOperand(0)->getType()->isArrayTy())
1074
Value *StorePtr = DepSI->getPointerOperand();
1075
uint64_t StoreSize = TD.getTypeSizeInBits(DepSI->getOperand(0)->getType());
1076
return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr,
1077
StorePtr, StoreSize, TD);
1080
static int AnalyzeLoadFromClobberingMemInst(const Type *LoadTy, Value *LoadPtr,
1082
const TargetData &TD) {
1083
// If the mem operation is a non-constant size, we can't handle it.
1084
ConstantInt *SizeCst = dyn_cast<ConstantInt>(MI->getLength());
1085
if (SizeCst == 0) return -1;
1086
uint64_t MemSizeInBits = SizeCst->getZExtValue()*8;
1088
// If this is memset, we just need to see if the offset is valid in the size
1090
if (MI->getIntrinsicID() == Intrinsic::memset)
1091
return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, MI->getDest(),
1094
// If we have a memcpy/memmove, the only case we can handle is if this is a
1095
// copy from constant memory. In that case, we can read directly from the
1097
MemTransferInst *MTI = cast<MemTransferInst>(MI);
1099
Constant *Src = dyn_cast<Constant>(MTI->getSource());
1100
if (Src == 0) return -1;
1102
GlobalVariable *GV = dyn_cast<GlobalVariable>(Src->getUnderlyingObject());
1103
if (GV == 0 || !GV->isConstant()) return -1;
1105
// See if the access is within the bounds of the transfer.
1106
int Offset = AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr,
1107
MI->getDest(), MemSizeInBits, TD);
1111
// Otherwise, see if we can constant fold a load from the constant with the
1112
// offset applied as appropriate.
1113
Src = ConstantExpr::getBitCast(Src,
1114
llvm::Type::getInt8PtrTy(Src->getContext()));
1115
Constant *OffsetCst =
1116
ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
1117
Src = ConstantExpr::getGetElementPtr(Src, &OffsetCst, 1);
1118
Src = ConstantExpr::getBitCast(Src, PointerType::getUnqual(LoadTy));
1119
if (ConstantFoldLoadFromConstPtr(Src, &TD))
1125
/// GetStoreValueForLoad - This function is called when we have a
1126
/// memdep query of a load that ends up being a clobbering store. This means
1127
/// that the store *may* provide bits used by the load but we can't be sure
1128
/// because the pointers don't mustalias. Check this case to see if there is
1129
/// anything more we can do before we give up.
1130
static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset,
1132
Instruction *InsertPt, const TargetData &TD){
1133
LLVMContext &Ctx = SrcVal->getType()->getContext();
1135
uint64_t StoreSize = TD.getTypeSizeInBits(SrcVal->getType())/8;
1136
uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy)/8;
1138
IRBuilder<> Builder(InsertPt->getParent(), InsertPt);
1140
// Compute which bits of the stored value are being used by the load. Convert
1141
// to an integer type to start with.
1142
if (SrcVal->getType()->isPointerTy())
1143
SrcVal = Builder.CreatePtrToInt(SrcVal, TD.getIntPtrType(Ctx), "tmp");
1144
if (!SrcVal->getType()->isIntegerTy())
1145
SrcVal = Builder.CreateBitCast(SrcVal, IntegerType::get(Ctx, StoreSize*8),
1148
// Shift the bits to the least significant depending on endianness.
1150
if (TD.isLittleEndian())
1151
ShiftAmt = Offset*8;
1153
ShiftAmt = (StoreSize-LoadSize-Offset)*8;
1156
SrcVal = Builder.CreateLShr(SrcVal, ShiftAmt, "tmp");
1158
if (LoadSize != StoreSize)
1159
SrcVal = Builder.CreateTrunc(SrcVal, IntegerType::get(Ctx, LoadSize*8),
1162
return CoerceAvailableValueToLoadType(SrcVal, LoadTy, InsertPt, TD);
1165
/// GetMemInstValueForLoad - This function is called when we have a
1166
/// memdep query of a load that ends up being a clobbering mem intrinsic.
1167
static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset,
1168
const Type *LoadTy, Instruction *InsertPt,
1169
const TargetData &TD){
1170
LLVMContext &Ctx = LoadTy->getContext();
1171
uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy)/8;
1173
IRBuilder<> Builder(InsertPt->getParent(), InsertPt);
1175
// We know that this method is only called when the mem transfer fully
1176
// provides the bits for the load.
1177
if (MemSetInst *MSI = dyn_cast<MemSetInst>(SrcInst)) {
1178
// memset(P, 'x', 1234) -> splat('x'), even if x is a variable, and
1179
// independently of what the offset is.
1180
Value *Val = MSI->getValue();
1182
Val = Builder.CreateZExt(Val, IntegerType::get(Ctx, LoadSize*8));
1184
Value *OneElt = Val;
1186
// Splat the value out to the right number of bits.
1187
for (unsigned NumBytesSet = 1; NumBytesSet != LoadSize; ) {
1188
// If we can double the number of bytes set, do it.
1189
if (NumBytesSet*2 <= LoadSize) {
1190
Value *ShVal = Builder.CreateShl(Val, NumBytesSet*8);
1191
Val = Builder.CreateOr(Val, ShVal);
1196
// Otherwise insert one byte at a time.
1197
Value *ShVal = Builder.CreateShl(Val, 1*8);
1198
Val = Builder.CreateOr(OneElt, ShVal);
1202
return CoerceAvailableValueToLoadType(Val, LoadTy, InsertPt, TD);
1205
// Otherwise, this is a memcpy/memmove from a constant global.
1206
MemTransferInst *MTI = cast<MemTransferInst>(SrcInst);
1207
Constant *Src = cast<Constant>(MTI->getSource());
1209
// Otherwise, see if we can constant fold a load from the constant with the
1210
// offset applied as appropriate.
1211
Src = ConstantExpr::getBitCast(Src,
1212
llvm::Type::getInt8PtrTy(Src->getContext()));
1213
Constant *OffsetCst =
1214
ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
1215
Src = ConstantExpr::getGetElementPtr(Src, &OffsetCst, 1);
1216
Src = ConstantExpr::getBitCast(Src, PointerType::getUnqual(LoadTy));
1217
return ConstantFoldLoadFromConstPtr(Src, &TD);
1222
struct AvailableValueInBlock {
1223
/// BB - The basic block in question.
1226
SimpleVal, // A simple offsetted value that is accessed.
1227
MemIntrin // A memory intrinsic which is loaded from.
1230
/// V - The value that is live out of the block.
1231
PointerIntPair<Value *, 1, ValType> Val;
1233
/// Offset - The byte offset in Val that is interesting for the load query.
1236
static AvailableValueInBlock get(BasicBlock *BB, Value *V,
1237
unsigned Offset = 0) {
1238
AvailableValueInBlock Res;
1240
Res.Val.setPointer(V);
1241
Res.Val.setInt(SimpleVal);
1242
Res.Offset = Offset;
1246
static AvailableValueInBlock getMI(BasicBlock *BB, MemIntrinsic *MI,
1247
unsigned Offset = 0) {
1248
AvailableValueInBlock Res;
1250
Res.Val.setPointer(MI);
1251
Res.Val.setInt(MemIntrin);
1252
Res.Offset = Offset;
1256
bool isSimpleValue() const { return Val.getInt() == SimpleVal; }
1257
Value *getSimpleValue() const {
1258
assert(isSimpleValue() && "Wrong accessor");
1259
return Val.getPointer();
1262
MemIntrinsic *getMemIntrinValue() const {
1263
assert(!isSimpleValue() && "Wrong accessor");
1264
return cast<MemIntrinsic>(Val.getPointer());
1267
/// MaterializeAdjustedValue - Emit code into this block to adjust the value
1268
/// defined here to the specified type. This handles various coercion cases.
1269
Value *MaterializeAdjustedValue(const Type *LoadTy,
1270
const TargetData *TD) const {
1272
if (isSimpleValue()) {
1273
Res = getSimpleValue();
1274
if (Res->getType() != LoadTy) {
1275
assert(TD && "Need target data to handle type mismatch case");
1276
Res = GetStoreValueForLoad(Res, Offset, LoadTy, BB->getTerminator(),
1279
DEBUG(errs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << " "
1280
<< *getSimpleValue() << '\n'
1281
<< *Res << '\n' << "\n\n\n");
1284
Res = GetMemInstValueForLoad(getMemIntrinValue(), Offset,
1285
LoadTy, BB->getTerminator(), *TD);
1286
DEBUG(errs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset
1287
<< " " << *getMemIntrinValue() << '\n'
1288
<< *Res << '\n' << "\n\n\n");
1294
/// ConstructSSAForLoadSet - Given a set of loads specified by ValuesPerBlock,
1295
/// construct SSA form, allowing us to eliminate LI. This returns the value
1296
/// that should be used at LI's definition site.
1297
static Value *ConstructSSAForLoadSet(LoadInst *LI,
1298
SmallVectorImpl<AvailableValueInBlock> &ValuesPerBlock,
1299
const TargetData *TD,
1300
const DominatorTree &DT,
1301
AliasAnalysis *AA) {
1302
// Check for the fully redundant, dominating load case. In this case, we can
1303
// just use the dominating value directly.
1304
if (ValuesPerBlock.size() == 1 &&
1305
DT.properlyDominates(ValuesPerBlock[0].BB, LI->getParent()))
1306
return ValuesPerBlock[0].MaterializeAdjustedValue(LI->getType(), TD);
1308
// Otherwise, we have to construct SSA form.
1309
SmallVector<PHINode*, 8> NewPHIs;
1310
SSAUpdater SSAUpdate(&NewPHIs);
1311
SSAUpdate.Initialize(LI);
1313
const Type *LoadTy = LI->getType();
1315
for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) {
1316
const AvailableValueInBlock &AV = ValuesPerBlock[i];
1317
BasicBlock *BB = AV.BB;
1319
if (SSAUpdate.HasValueForBlock(BB))
1322
SSAUpdate.AddAvailableValue(BB, AV.MaterializeAdjustedValue(LoadTy, TD));
1325
// Perform PHI construction.
1326
Value *V = SSAUpdate.GetValueInMiddleOfBlock(LI->getParent());
1328
// If new PHI nodes were created, notify alias analysis.
1329
if (V->getType()->isPointerTy())
1330
for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i)
1331
AA->copyValue(LI, NewPHIs[i]);
1336
static bool isLifetimeStart(Instruction *Inst) {
1337
if (IntrinsicInst* II = dyn_cast<IntrinsicInst>(Inst))
1338
return II->getIntrinsicID() == Intrinsic::lifetime_start;
1342
/// processNonLocalLoad - Attempt to eliminate a load whose dependencies are
1343
/// non-local by performing PHI construction.
1344
bool GVN::processNonLocalLoad(LoadInst *LI,
1345
SmallVectorImpl<Instruction*> &toErase) {
1346
// Find the non-local dependencies of the load.
1347
SmallVector<NonLocalDepResult, 64> Deps;
1348
MD->getNonLocalPointerDependency(LI->getOperand(0), true, LI->getParent(),
1350
//DEBUG(dbgs() << "INVESTIGATING NONLOCAL LOAD: "
1351
// << Deps.size() << *LI << '\n');
1353
// If we had to process more than one hundred blocks to find the
1354
// dependencies, this load isn't worth worrying about. Optimizing
1355
// it will be too expensive.
1356
if (Deps.size() > 100)
1359
// If we had a phi translation failure, we'll have a single entry which is a
1360
// clobber in the current block. Reject this early.
1361
if (Deps.size() == 1 && Deps[0].getResult().isClobber()) {
1363
dbgs() << "GVN: non-local load ";
1364
WriteAsOperand(dbgs(), LI);
1365
dbgs() << " is clobbered by " << *Deps[0].getResult().getInst() << '\n';
1370
// Filter out useless results (non-locals, etc). Keep track of the blocks
1371
// where we have a value available in repl, also keep track of whether we see
1372
// dependencies that produce an unknown value for the load (such as a call
1373
// that could potentially clobber the load).
1374
SmallVector<AvailableValueInBlock, 16> ValuesPerBlock;
1375
SmallVector<BasicBlock*, 16> UnavailableBlocks;
1377
const TargetData *TD = 0;
1379
for (unsigned i = 0, e = Deps.size(); i != e; ++i) {
1380
BasicBlock *DepBB = Deps[i].getBB();
1381
MemDepResult DepInfo = Deps[i].getResult();
1383
if (DepInfo.isClobber()) {
1384
// The address being loaded in this non-local block may not be the same as
1385
// the pointer operand of the load if PHI translation occurs. Make sure
1386
// to consider the right address.
1387
Value *Address = Deps[i].getAddress();
1389
// If the dependence is to a store that writes to a superset of the bits
1390
// read by the load, we can extract the bits we need for the load from the
1392
if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInfo.getInst())) {
1394
TD = getAnalysisIfAvailable<TargetData>();
1395
if (TD && Address) {
1396
int Offset = AnalyzeLoadFromClobberingStore(LI->getType(), Address,
1399
ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB,
1400
DepSI->getOperand(0),
1407
// If the clobbering value is a memset/memcpy/memmove, see if we can
1408
// forward a value on from it.
1409
if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(DepInfo.getInst())) {
1411
TD = getAnalysisIfAvailable<TargetData>();
1412
if (TD && Address) {
1413
int Offset = AnalyzeLoadFromClobberingMemInst(LI->getType(), Address,
1416
ValuesPerBlock.push_back(AvailableValueInBlock::getMI(DepBB, DepMI,
1423
UnavailableBlocks.push_back(DepBB);
1427
Instruction *DepInst = DepInfo.getInst();
1429
// Loading the allocation -> undef.
1430
if (isa<AllocaInst>(DepInst) || isMalloc(DepInst) ||
1431
// Loading immediately after lifetime begin -> undef.
1432
isLifetimeStart(DepInst)) {
1433
ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB,
1434
UndefValue::get(LI->getType())));
1438
if (StoreInst *S = dyn_cast<StoreInst>(DepInst)) {
1439
// Reject loads and stores that are to the same address but are of
1440
// different types if we have to.
1441
if (S->getOperand(0)->getType() != LI->getType()) {
1443
TD = getAnalysisIfAvailable<TargetData>();
1445
// If the stored value is larger or equal to the loaded value, we can
1447
if (TD == 0 || !CanCoerceMustAliasedValueToLoad(S->getOperand(0),
1448
LI->getType(), *TD)) {
1449
UnavailableBlocks.push_back(DepBB);
1454
ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB,
1459
if (LoadInst *LD = dyn_cast<LoadInst>(DepInst)) {
1460
// If the types mismatch and we can't handle it, reject reuse of the load.
1461
if (LD->getType() != LI->getType()) {
1463
TD = getAnalysisIfAvailable<TargetData>();
1465
// If the stored value is larger or equal to the loaded value, we can
1467
if (TD == 0 || !CanCoerceMustAliasedValueToLoad(LD, LI->getType(),*TD)){
1468
UnavailableBlocks.push_back(DepBB);
1472
ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, LD));
1476
UnavailableBlocks.push_back(DepBB);
1480
// If we have no predecessors that produce a known value for this load, exit
1482
if (ValuesPerBlock.empty()) return false;
1484
// If all of the instructions we depend on produce a known value for this
1485
// load, then it is fully redundant and we can use PHI insertion to compute
1486
// its value. Insert PHIs and remove the fully redundant value now.
1487
if (UnavailableBlocks.empty()) {
1488
DEBUG(dbgs() << "GVN REMOVING NONLOCAL LOAD: " << *LI << '\n');
1490
// Perform PHI construction.
1491
Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, TD, *DT,
1492
VN.getAliasAnalysis());
1493
LI->replaceAllUsesWith(V);
1495
if (isa<PHINode>(V))
1497
if (V->getType()->isPointerTy())
1498
MD->invalidateCachedPointerInfo(V);
1500
toErase.push_back(LI);
1505
if (!EnablePRE || !EnableLoadPRE)
1508
// Okay, we have *some* definitions of the value. This means that the value
1509
// is available in some of our (transitive) predecessors. Lets think about
1510
// doing PRE of this load. This will involve inserting a new load into the
1511
// predecessor when it's not available. We could do this in general, but
1512
// prefer to not increase code size. As such, we only do this when we know
1513
// that we only have to insert *one* load (which means we're basically moving
1514
// the load, not inserting a new one).
1516
SmallPtrSet<BasicBlock *, 4> Blockers;
1517
for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i)
1518
Blockers.insert(UnavailableBlocks[i]);
1520
// Lets find first basic block with more than one predecessor. Walk backwards
1521
// through predecessors if needed.
1522
BasicBlock *LoadBB = LI->getParent();
1523
BasicBlock *TmpBB = LoadBB;
1525
bool isSinglePred = false;
1526
bool allSingleSucc = true;
1527
while (TmpBB->getSinglePredecessor()) {
1528
isSinglePred = true;
1529
TmpBB = TmpBB->getSinglePredecessor();
1530
if (TmpBB == LoadBB) // Infinite (unreachable) loop.
1532
if (Blockers.count(TmpBB))
1534
if (TmpBB->getTerminator()->getNumSuccessors() != 1)
1535
allSingleSucc = false;
1541
// If we have a repl set with LI itself in it, this means we have a loop where
1542
// at least one of the values is LI. Since this means that we won't be able
1543
// to eliminate LI even if we insert uses in the other predecessors, we will
1544
// end up increasing code size. Reject this by scanning for LI.
1545
for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) {
1546
if (ValuesPerBlock[i].isSimpleValue() &&
1547
ValuesPerBlock[i].getSimpleValue() == LI) {
1548
// Skip cases where LI is the only definition, even for EnableFullLoadPRE.
1549
if (!EnableFullLoadPRE || e == 1)
1554
// FIXME: It is extremely unclear what this loop is doing, other than
1555
// artificially restricting loadpre.
1558
for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) {
1559
const AvailableValueInBlock &AV = ValuesPerBlock[i];
1560
if (AV.isSimpleValue())
1561
// "Hot" Instruction is in some loop (because it dominates its dep.
1563
if (Instruction *I = dyn_cast<Instruction>(AV.getSimpleValue()))
1564
if (DT->dominates(LI, I)) {
1570
// We are interested only in "hot" instructions. We don't want to do any
1571
// mis-optimizations here.
1576
// Check to see how many predecessors have the loaded value fully
1578
DenseMap<BasicBlock*, Value*> PredLoads;
1579
DenseMap<BasicBlock*, char> FullyAvailableBlocks;
1580
for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i)
1581
FullyAvailableBlocks[ValuesPerBlock[i].BB] = true;
1582
for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i)
1583
FullyAvailableBlocks[UnavailableBlocks[i]] = false;
1585
bool NeedToSplitEdges = false;
1586
for (pred_iterator PI = pred_begin(LoadBB), E = pred_end(LoadBB);
1588
BasicBlock *Pred = *PI;
1589
if (IsValueFullyAvailableInBlock(Pred, FullyAvailableBlocks)) {
1592
PredLoads[Pred] = 0;
1594
if (Pred->getTerminator()->getNumSuccessors() != 1) {
1595
if (isa<IndirectBrInst>(Pred->getTerminator())) {
1596
DEBUG(dbgs() << "COULD NOT PRE LOAD BECAUSE OF INDBR CRITICAL EDGE '"
1597
<< Pred->getName() << "': " << *LI << '\n');
1600
unsigned SuccNum = GetSuccessorNumber(Pred, LoadBB);
1601
toSplit.push_back(std::make_pair(Pred->getTerminator(), SuccNum));
1602
NeedToSplitEdges = true;
1605
if (NeedToSplitEdges)
1608
// Decide whether PRE is profitable for this load.
1609
unsigned NumUnavailablePreds = PredLoads.size();
1610
assert(NumUnavailablePreds != 0 &&
1611
"Fully available value should be eliminated above!");
1612
if (!EnableFullLoadPRE) {
1613
// If this load is unavailable in multiple predecessors, reject it.
1614
// FIXME: If we could restructure the CFG, we could make a common pred with
1615
// all the preds that don't have an available LI and insert a new load into
1617
if (NumUnavailablePreds != 1)
1621
// Check if the load can safely be moved to all the unavailable predecessors.
1622
bool CanDoPRE = true;
1623
SmallVector<Instruction*, 8> NewInsts;
1624
for (DenseMap<BasicBlock*, Value*>::iterator I = PredLoads.begin(),
1625
E = PredLoads.end(); I != E; ++I) {
1626
BasicBlock *UnavailablePred = I->first;
1628
// Do PHI translation to get its value in the predecessor if necessary. The
1629
// returned pointer (if non-null) is guaranteed to dominate UnavailablePred.
1631
// If all preds have a single successor, then we know it is safe to insert
1632
// the load on the pred (?!?), so we can insert code to materialize the
1633
// pointer if it is not available.
1634
PHITransAddr Address(LI->getOperand(0), TD);
1636
if (allSingleSucc) {
1637
LoadPtr = Address.PHITranslateWithInsertion(LoadBB, UnavailablePred,
1640
Address.PHITranslateValue(LoadBB, UnavailablePred, DT);
1641
LoadPtr = Address.getAddr();
1644
// If we couldn't find or insert a computation of this phi translated value,
1647
DEBUG(dbgs() << "COULDN'T INSERT PHI TRANSLATED VALUE OF: "
1648
<< *LI->getOperand(0) << "\n");
1653
// Make sure it is valid to move this load here. We have to watch out for:
1654
// @1 = getelementptr (i8* p, ...
1655
// test p and branch if == 0
1657
// It is valid to have the getelementptr before the test, even if p can be 0,
1658
// as getelementptr only does address arithmetic.
1659
// If we are not pushing the value through any multiple-successor blocks
1660
// we do not have this case. Otherwise, check that the load is safe to
1661
// put anywhere; this can be improved, but should be conservatively safe.
1662
if (!allSingleSucc &&
1663
// FIXME: REEVALUTE THIS.
1664
!isSafeToLoadUnconditionally(LoadPtr,
1665
UnavailablePred->getTerminator(),
1666
LI->getAlignment(), TD)) {
1671
I->second = LoadPtr;
1675
while (!NewInsts.empty())
1676
NewInsts.pop_back_val()->eraseFromParent();
1680
// Okay, we can eliminate this load by inserting a reload in the predecessor
1681
// and using PHI construction to get the value in the other predecessors, do
1683
DEBUG(dbgs() << "GVN REMOVING PRE LOAD: " << *LI << '\n');
1684
DEBUG(if (!NewInsts.empty())
1685
dbgs() << "INSERTED " << NewInsts.size() << " INSTS: "
1686
<< *NewInsts.back() << '\n');
1688
// Assign value numbers to the new instructions.
1689
for (unsigned i = 0, e = NewInsts.size(); i != e; ++i) {
1690
// FIXME: We really _ought_ to insert these value numbers into their
1691
// parent's availability map. However, in doing so, we risk getting into
1692
// ordering issues. If a block hasn't been processed yet, we would be
1693
// marking a value as AVAIL-IN, which isn't what we intend.
1694
VN.lookup_or_add(NewInsts[i]);
1697
for (DenseMap<BasicBlock*, Value*>::iterator I = PredLoads.begin(),
1698
E = PredLoads.end(); I != E; ++I) {
1699
BasicBlock *UnavailablePred = I->first;
1700
Value *LoadPtr = I->second;
1702
Value *NewLoad = new LoadInst(LoadPtr, LI->getName()+".pre", false,
1704
UnavailablePred->getTerminator());
1706
// Add the newly created load.
1707
ValuesPerBlock.push_back(AvailableValueInBlock::get(UnavailablePred,
1709
MD->invalidateCachedPointerInfo(LoadPtr);
1710
DEBUG(dbgs() << "GVN INSERTED " << *NewLoad << '\n');
1713
// Perform PHI construction.
1714
Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, TD, *DT,
1715
VN.getAliasAnalysis());
1716
LI->replaceAllUsesWith(V);
1717
if (isa<PHINode>(V))
1719
if (V->getType()->isPointerTy())
1720
MD->invalidateCachedPointerInfo(V);
1722
toErase.push_back(LI);
1727
/// processLoad - Attempt to eliminate a load, first by eliminating it
1728
/// locally, and then attempting non-local elimination if that fails.
1729
bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
1733
if (L->isVolatile())
1736
// ... to a pointer that has been loaded from before...
1737
MemDepResult Dep = MD->getDependency(L);
1739
// If the value isn't available, don't do anything!
1740
if (Dep.isClobber()) {
1741
// Check to see if we have something like this:
1742
// store i32 123, i32* %P
1743
// %A = bitcast i32* %P to i8*
1744
// %B = gep i8* %A, i32 1
1747
// We could do that by recognizing if the clobber instructions are obviously
1748
// a common base + constant offset, and if the previous store (or memset)
1749
// completely covers this load. This sort of thing can happen in bitfield
1751
Value *AvailVal = 0;
1752
if (StoreInst *DepSI = dyn_cast<StoreInst>(Dep.getInst()))
1753
if (const TargetData *TD = getAnalysisIfAvailable<TargetData>()) {
1754
int Offset = AnalyzeLoadFromClobberingStore(L->getType(),
1755
L->getPointerOperand(),
1758
AvailVal = GetStoreValueForLoad(DepSI->getOperand(0), Offset,
1759
L->getType(), L, *TD);
1762
// If the clobbering value is a memset/memcpy/memmove, see if we can forward
1763
// a value on from it.
1764
if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(Dep.getInst())) {
1765
if (const TargetData *TD = getAnalysisIfAvailable<TargetData>()) {
1766
int Offset = AnalyzeLoadFromClobberingMemInst(L->getType(),
1767
L->getPointerOperand(),
1770
AvailVal = GetMemInstValueForLoad(DepMI, Offset, L->getType(), L,*TD);
1775
DEBUG(dbgs() << "GVN COERCED INST:\n" << *Dep.getInst() << '\n'
1776
<< *AvailVal << '\n' << *L << "\n\n\n");
1778
// Replace the load!
1779
L->replaceAllUsesWith(AvailVal);
1780
if (AvailVal->getType()->isPointerTy())
1781
MD->invalidateCachedPointerInfo(AvailVal);
1783
toErase.push_back(L);
1789
// fast print dep, using operator<< on instruction would be too slow
1790
dbgs() << "GVN: load ";
1791
WriteAsOperand(dbgs(), L);
1792
Instruction *I = Dep.getInst();
1793
dbgs() << " is clobbered by " << *I << '\n';
1798
// If it is defined in another block, try harder.
1799
if (Dep.isNonLocal())
1800
return processNonLocalLoad(L, toErase);
1802
Instruction *DepInst = Dep.getInst();
1803
if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInst)) {
1804
Value *StoredVal = DepSI->getOperand(0);
1806
// The store and load are to a must-aliased pointer, but they may not
1807
// actually have the same type. See if we know how to reuse the stored
1808
// value (depending on its type).
1809
const TargetData *TD = 0;
1810
if (StoredVal->getType() != L->getType()) {
1811
if ((TD = getAnalysisIfAvailable<TargetData>())) {
1812
StoredVal = CoerceAvailableValueToLoadType(StoredVal, L->getType(),
1817
DEBUG(dbgs() << "GVN COERCED STORE:\n" << *DepSI << '\n' << *StoredVal
1818
<< '\n' << *L << "\n\n\n");
1825
L->replaceAllUsesWith(StoredVal);
1826
if (StoredVal->getType()->isPointerTy())
1827
MD->invalidateCachedPointerInfo(StoredVal);
1829
toErase.push_back(L);
1834
if (LoadInst *DepLI = dyn_cast<LoadInst>(DepInst)) {
1835
Value *AvailableVal = DepLI;
1837
// The loads are of a must-aliased pointer, but they may not actually have
1838
// the same type. See if we know how to reuse the previously loaded value
1839
// (depending on its type).
1840
const TargetData *TD = 0;
1841
if (DepLI->getType() != L->getType()) {
1842
if ((TD = getAnalysisIfAvailable<TargetData>())) {
1843
AvailableVal = CoerceAvailableValueToLoadType(DepLI, L->getType(), L,*TD);
1844
if (AvailableVal == 0)
1847
DEBUG(dbgs() << "GVN COERCED LOAD:\n" << *DepLI << "\n" << *AvailableVal
1848
<< "\n" << *L << "\n\n\n");
1855
L->replaceAllUsesWith(AvailableVal);
1856
if (DepLI->getType()->isPointerTy())
1857
MD->invalidateCachedPointerInfo(DepLI);
1859
toErase.push_back(L);
1864
// If this load really doesn't depend on anything, then we must be loading an
1865
// undef value. This can happen when loading for a fresh allocation with no
1866
// intervening stores, for example.
1867
if (isa<AllocaInst>(DepInst) || isMalloc(DepInst)) {
1868
L->replaceAllUsesWith(UndefValue::get(L->getType()));
1870
toErase.push_back(L);
1875
// If this load occurs either right after a lifetime begin,
1876
// then the loaded value is undefined.
1877
if (IntrinsicInst* II = dyn_cast<IntrinsicInst>(DepInst)) {
1878
if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
1879
L->replaceAllUsesWith(UndefValue::get(L->getType()));
1881
toErase.push_back(L);
1890
Value *GVN::lookupNumber(BasicBlock *BB, uint32_t num) {
1891
DenseMap<BasicBlock*, ValueNumberScope*>::iterator I = localAvail.find(BB);
1892
if (I == localAvail.end())
1895
ValueNumberScope *Locals = I->second;
1897
DenseMap<uint32_t, Value*>::iterator I = Locals->table.find(num);
1898
if (I != Locals->table.end())
1900
Locals = Locals->parent;
1907
/// processInstruction - When calculating availability, handle an instruction
1908
/// by inserting it into the appropriate sets
1909
bool GVN::processInstruction(Instruction *I,
1910
SmallVectorImpl<Instruction*> &toErase) {
1911
// Ignore dbg info intrinsics.
1912
if (isa<DbgInfoIntrinsic>(I))
1915
if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
1916
bool Changed = processLoad(LI, toErase);
1919
unsigned Num = VN.lookup_or_add(LI);
1920
localAvail[I->getParent()]->table.insert(std::make_pair(Num, LI));
1926
uint32_t NextNum = VN.getNextUnusedValueNumber();
1927
unsigned Num = VN.lookup_or_add(I);
1929
if (BranchInst *BI = dyn_cast<BranchInst>(I)) {
1930
localAvail[I->getParent()]->table.insert(std::make_pair(Num, I));
1932
if (!BI->isConditional() || isa<Constant>(BI->getCondition()))
1935
Value *BranchCond = BI->getCondition();
1936
uint32_t CondVN = VN.lookup_or_add(BranchCond);
1938
BasicBlock *TrueSucc = BI->getSuccessor(0);
1939
BasicBlock *FalseSucc = BI->getSuccessor(1);
1941
if (TrueSucc->getSinglePredecessor())
1942
localAvail[TrueSucc]->table[CondVN] =
1943
ConstantInt::getTrue(TrueSucc->getContext());
1944
if (FalseSucc->getSinglePredecessor())
1945
localAvail[FalseSucc]->table[CondVN] =
1946
ConstantInt::getFalse(TrueSucc->getContext());
1950
// Allocations are always uniquely numbered, so we can save time and memory
1951
// by fast failing them.
1952
} else if (isa<AllocaInst>(I) || isa<TerminatorInst>(I)) {
1953
localAvail[I->getParent()]->table.insert(std::make_pair(Num, I));
1957
// Collapse PHI nodes
1958
if (PHINode* p = dyn_cast<PHINode>(I)) {
1959
Value *constVal = CollapsePhi(p);
1962
p->replaceAllUsesWith(constVal);
1963
if (MD && constVal->getType()->isPointerTy())
1964
MD->invalidateCachedPointerInfo(constVal);
1967
toErase.push_back(p);
1969
localAvail[I->getParent()]->table.insert(std::make_pair(Num, I));
1972
// If the number we were assigned was a brand new VN, then we don't
1973
// need to do a lookup to see if the number already exists
1974
// somewhere in the domtree: it can't!
1975
} else if (Num == NextNum) {
1976
localAvail[I->getParent()]->table.insert(std::make_pair(Num, I));
1978
// Perform fast-path value-number based elimination of values inherited from
1980
} else if (Value *repl = lookupNumber(I->getParent(), Num)) {
1983
I->replaceAllUsesWith(repl);
1984
if (MD && repl->getType()->isPointerTy())
1985
MD->invalidateCachedPointerInfo(repl);
1986
toErase.push_back(I);
1990
localAvail[I->getParent()]->table.insert(std::make_pair(Num, I));
1996
/// runOnFunction - This is the main transformation entry point for a function.
1997
bool GVN::runOnFunction(Function& F) {
1999
MD = &getAnalysis<MemoryDependenceAnalysis>();
2000
DT = &getAnalysis<DominatorTree>();
2001
VN.setAliasAnalysis(&getAnalysis<AliasAnalysis>());
2005
bool Changed = false;
2006
bool ShouldContinue = true;
2008
// Merge unconditional branches, allowing PRE to catch more
2009
// optimization opportunities.
2010
for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ) {
2011
BasicBlock *BB = FI;
2013
bool removedBlock = MergeBlockIntoPredecessor(BB, this);
2014
if (removedBlock) NumGVNBlocks++;
2016
Changed |= removedBlock;
2019
unsigned Iteration = 0;
2021
while (ShouldContinue) {
2022
DEBUG(dbgs() << "GVN iteration: " << Iteration << "\n");
2023
ShouldContinue = iterateOnFunction(F);
2024
if (splitCriticalEdges())
2025
ShouldContinue = true;
2026
Changed |= ShouldContinue;
2031
bool PREChanged = true;
2032
while (PREChanged) {
2033
PREChanged = performPRE(F);
2034
Changed |= PREChanged;
2037
// FIXME: Should perform GVN again after PRE does something. PRE can move
2038
// computations into blocks where they become fully redundant. Note that
2039
// we can't do this until PRE's critical edge splitting updates memdep.
2040
// Actually, when this happens, we should just fully integrate PRE into GVN.
2042
cleanupGlobalSets();
2048
bool GVN::processBlock(BasicBlock *BB) {
2049
// FIXME: Kill off toErase by doing erasing eagerly in a helper function (and
2050
// incrementing BI before processing an instruction).
2051
SmallVector<Instruction*, 8> toErase;
2052
bool ChangedFunction = false;
2054
for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();
2056
ChangedFunction |= processInstruction(BI, toErase);
2057
if (toErase.empty()) {
2062
// If we need some instructions deleted, do it now.
2063
NumGVNInstr += toErase.size();
2065
// Avoid iterator invalidation.
2066
bool AtStart = BI == BB->begin();
2070
for (SmallVector<Instruction*, 4>::iterator I = toErase.begin(),
2071
E = toErase.end(); I != E; ++I) {
2072
DEBUG(dbgs() << "GVN removed: " << **I << '\n');
2073
if (MD) MD->removeInstruction(*I);
2074
(*I)->eraseFromParent();
2075
DEBUG(verifyRemoved(*I));
2085
return ChangedFunction;
2088
/// performPRE - Perform a purely local form of PRE that looks for diamond
2089
/// control flow patterns and attempts to perform simple PRE at the join point.
2090
bool GVN::performPRE(Function &F) {
2091
bool Changed = false;
2092
DenseMap<BasicBlock*, Value*> predMap;
2093
for (df_iterator<BasicBlock*> DI = df_begin(&F.getEntryBlock()),
2094
DE = df_end(&F.getEntryBlock()); DI != DE; ++DI) {
2095
BasicBlock *CurrentBlock = *DI;
2097
// Nothing to PRE in the entry block.
2098
if (CurrentBlock == &F.getEntryBlock()) continue;
2100
for (BasicBlock::iterator BI = CurrentBlock->begin(),
2101
BE = CurrentBlock->end(); BI != BE; ) {
2102
Instruction *CurInst = BI++;
2104
if (isa<AllocaInst>(CurInst) ||
2105
isa<TerminatorInst>(CurInst) || isa<PHINode>(CurInst) ||
2106
CurInst->getType()->isVoidTy() ||
2107
CurInst->mayReadFromMemory() || CurInst->mayHaveSideEffects() ||
2108
isa<DbgInfoIntrinsic>(CurInst))
2111
uint32_t ValNo = VN.lookup(CurInst);
2113
// Look for the predecessors for PRE opportunities. We're
2114
// only trying to solve the basic diamond case, where
2115
// a value is computed in the successor and one predecessor,
2116
// but not the other. We also explicitly disallow cases
2117
// where the successor is its own predecessor, because they're
2118
// more complicated to get right.
2119
unsigned NumWith = 0;
2120
unsigned NumWithout = 0;
2121
BasicBlock *PREPred = 0;
2124
for (pred_iterator PI = pred_begin(CurrentBlock),
2125
PE = pred_end(CurrentBlock); PI != PE; ++PI) {
2126
// We're not interested in PRE where the block is its
2127
// own predecessor, or in blocks with predecessors
2128
// that are not reachable.
2129
if (*PI == CurrentBlock) {
2132
} else if (!localAvail.count(*PI)) {
2137
DenseMap<uint32_t, Value*>::iterator predV =
2138
localAvail[*PI]->table.find(ValNo);
2139
if (predV == localAvail[*PI]->table.end()) {
2142
} else if (predV->second == CurInst) {
2145
predMap[*PI] = predV->second;
2150
// Don't do PRE when it might increase code size, i.e. when
2151
// we would need to insert instructions in more than one pred.
2152
if (NumWithout != 1 || NumWith == 0)
2155
// Don't do PRE across indirect branch.
2156
if (isa<IndirectBrInst>(PREPred->getTerminator()))
2159
// We can't do PRE safely on a critical edge, so instead we schedule
2160
// the edge to be split and perform the PRE the next time we iterate
2162
unsigned SuccNum = GetSuccessorNumber(PREPred, CurrentBlock);
2163
if (isCriticalEdge(PREPred->getTerminator(), SuccNum)) {
2164
toSplit.push_back(std::make_pair(PREPred->getTerminator(), SuccNum));
2168
// Instantiate the expression in the predecessor that lacked it.
2169
// Because we are going top-down through the block, all value numbers
2170
// will be available in the predecessor by the time we need them. Any
2171
// that weren't originally present will have been instantiated earlier
2173
Instruction *PREInstr = CurInst->clone();
2174
bool success = true;
2175
for (unsigned i = 0, e = CurInst->getNumOperands(); i != e; ++i) {
2176
Value *Op = PREInstr->getOperand(i);
2177
if (isa<Argument>(Op) || isa<Constant>(Op) || isa<GlobalValue>(Op))
2180
if (Value *V = lookupNumber(PREPred, VN.lookup(Op))) {
2181
PREInstr->setOperand(i, V);
2188
// Fail out if we encounter an operand that is not available in
2189
// the PRE predecessor. This is typically because of loads which
2190
// are not value numbered precisely.
2193
DEBUG(verifyRemoved(PREInstr));
2197
PREInstr->insertBefore(PREPred->getTerminator());
2198
PREInstr->setName(CurInst->getName() + ".pre");
2199
predMap[PREPred] = PREInstr;
2200
VN.add(PREInstr, ValNo);
2203
// Update the availability map to include the new instruction.
2204
localAvail[PREPred]->table.insert(std::make_pair(ValNo, PREInstr));
2206
// Create a PHI to make the value available in this block.
2207
PHINode* Phi = PHINode::Create(CurInst->getType(),
2208
CurInst->getName() + ".pre-phi",
2209
CurrentBlock->begin());
2210
for (pred_iterator PI = pred_begin(CurrentBlock),
2211
PE = pred_end(CurrentBlock); PI != PE; ++PI)
2212
Phi->addIncoming(predMap[*PI], *PI);
2215
localAvail[CurrentBlock]->table[ValNo] = Phi;
2217
CurInst->replaceAllUsesWith(Phi);
2218
if (MD && Phi->getType()->isPointerTy())
2219
MD->invalidateCachedPointerInfo(Phi);
2222
DEBUG(dbgs() << "GVN PRE removed: " << *CurInst << '\n');
2223
if (MD) MD->removeInstruction(CurInst);
2224
CurInst->eraseFromParent();
2225
DEBUG(verifyRemoved(CurInst));
2230
if (splitCriticalEdges())
2236
/// splitCriticalEdges - Split critical edges found during the previous
2237
/// iteration that may enable further optimization.
2238
bool GVN::splitCriticalEdges() {
2239
if (toSplit.empty())
2242
std::pair<TerminatorInst*, unsigned> Edge = toSplit.pop_back_val();
2243
SplitCriticalEdge(Edge.first, Edge.second, this);
2244
} while (!toSplit.empty());
2245
if (MD) MD->invalidateCachedPredecessors();
2249
/// iterateOnFunction - Executes one iteration of GVN
2250
bool GVN::iterateOnFunction(Function &F) {
2251
cleanupGlobalSets();
2253
for (df_iterator<DomTreeNode*> DI = df_begin(DT->getRootNode()),
2254
DE = df_end(DT->getRootNode()); DI != DE; ++DI) {
2256
localAvail[DI->getBlock()] =
2257
new ValueNumberScope(localAvail[DI->getIDom()->getBlock()]);
2259
localAvail[DI->getBlock()] = new ValueNumberScope(0);
2262
// Top-down walk of the dominator tree
2263
bool Changed = false;
2265
// Needed for value numbering with phi construction to work.
2266
ReversePostOrderTraversal<Function*> RPOT(&F);
2267
for (ReversePostOrderTraversal<Function*>::rpo_iterator RI = RPOT.begin(),
2268
RE = RPOT.end(); RI != RE; ++RI)
2269
Changed |= processBlock(*RI);
2271
for (df_iterator<DomTreeNode*> DI = df_begin(DT->getRootNode()),
2272
DE = df_end(DT->getRootNode()); DI != DE; ++DI)
2273
Changed |= processBlock(DI->getBlock());
2279
void GVN::cleanupGlobalSets() {
2282
for (DenseMap<BasicBlock*, ValueNumberScope*>::iterator
2283
I = localAvail.begin(), E = localAvail.end(); I != E; ++I)
2288
/// verifyRemoved - Verify that the specified instruction does not occur in our
2289
/// internal data structures.
2290
void GVN::verifyRemoved(const Instruction *Inst) const {
2291
VN.verifyRemoved(Inst);
2293
// Walk through the value number scope to make sure the instruction isn't
2294
// ferreted away in it.
2295
for (DenseMap<BasicBlock*, ValueNumberScope*>::const_iterator
2296
I = localAvail.begin(), E = localAvail.end(); I != E; ++I) {
2297
const ValueNumberScope *VNS = I->second;
2300
for (DenseMap<uint32_t, Value*>::const_iterator
2301
II = VNS->table.begin(), IE = VNS->table.end(); II != IE; ++II) {
2302
assert(II->second != Inst && "Inst still in value numbering scope!");