23
23
#include "BarrierTailReplication.h"
24
#include "llvm/Function.h"
25
24
#include "llvm/InstrTypes.h"
26
25
#include "llvm/Instructions.h"
30
27
using namespace llvm;
31
28
using namespace pocl;
33
30
#define BARRIER_FUNCTION_NAME "barrier"
35
static void find_barriers_dfs(BasicBlock *bb, std::set<BasicBlock *> &processed_bbs);
36
32
static bool block_has_barrier(const BasicBlock *bb);
37
static BasicBlock *replicate_subgraph(BasicBlock *entry,
39
static void find_subgraph(std::set<BasicBlock *> &subgraph,
41
static void replicate_basicblocks(std::set<BasicBlock *> &new_graph,
42
std::map<Value *, Value *> &reference_map,
43
const std::set<BasicBlock *> &graph,
45
static void update_references(const std::set<BasicBlock *>&graph,
46
const std::map<Value *, Value *> &reference_map);
50
36
RegisterPass<BarrierTailReplication> X("barriertails",
51
"Barrier tail replication pass",
37
"Barrier tail replication pass");
55
40
char BarrierTailReplication::ID = 0;
43
BarrierTailReplication::getAnalysisUsage(AnalysisUsage &AU) const
45
AU.addRequired<DominatorTree>();
46
AU.addPreserved<DominatorTree>();
47
AU.addRequired<LoopInfo>();
48
AU.addPreserved<LoopInfo>();
58
52
BarrierTailReplication::runOnFunction(Function &F)
60
std::set<BasicBlock *> processed_bbs;
62
find_barriers_dfs(&(F.getEntryBlock()), processed_bbs);
54
DT = &getAnalysis<DominatorTree>();
55
LI = &getAnalysis<LoopInfo>();
57
return ProcessFunction(F);
61
BarrierTailReplication::ProcessFunction(Function &F)
63
BasicBlockSet processed_bbs;
65
FindBarriersDFS(&F.getEntryBlock(), processed_bbs);
68
find_barriers_dfs(BasicBlock *bb, std::set<BasicBlock *> &processed_bbs)
74
// Recursively (depht-first) look for barriers in all possible
75
// execution paths starting on entry, replicating the barrier
76
// successors to ensure there is a separate function exit BB
77
// for each combination of traversed barriers. The set
78
// processed_bbs stores the
80
BarrierTailReplication::FindBarriersDFS(BasicBlock *bb,
81
BasicBlockSet &processed_bbs)
83
// Check if we already visited this BB (to avoid
84
// infinite recursion in case of unbarriered loops).
70
85
if (processed_bbs.count(bb) != 0)
73
88
processed_bbs.insert(bb);
90
TerminatorInst *t = bb->getTerminator();
75
91
Function *f = bb->getParent();
76
TerminatorInst *t = bb->getTerminator();
78
93
if (block_has_barrier(bb)) {
79
// Replicate all successors.
80
for (unsigned i = 0, e = t->getNumSuccessors(); i != e; ++i) {
81
BasicBlock *subgraph_entry = t->getSuccessor(i);
82
BasicBlock *replicated_subgraph_entry =
83
replicate_subgraph(subgraph_entry, f);
84
t->setSuccessor(i, replicated_subgraph_entry);
94
// This block has a barrier, replicate all successors.
95
// Even the path starting in an unique successor is replicated,
96
// as it the path might be joined by another path in a
97
// sucessor BB (see ifbarrier4.ll in tests).
99
// Loop check should not be needed here anymore, barriers
100
// have been canonicalized so they have exactly one succesor.
101
assert((t->getNumSuccessors() == 1) && "Not canonicalized barrier found!");
102
BasicBlock *subgraph_entry = t->getSuccessor(0);
103
BasicBlock *replicated_subgraph_entry =
104
ReplicateSubgraph(subgraph_entry, f);
105
t->setSuccessor(0, replicated_subgraph_entry);
107
// We have modified the function. Possibly created new loops.
108
// Update analysis passes.
109
DT->runOnFunction(*f);
111
LI->getBase().Calculate(DT->getBase());
88
114
// Find barriers in the successors (depth first).
89
115
for (unsigned i = 0, e = t->getNumSuccessors(); i != e; ++i)
90
find_barriers_dfs(t->getSuccessor(i), processed_bbs);
94
block_has_barrier(const BasicBlock *bb)
96
for (BasicBlock::const_iterator i = bb->begin(), e = bb->end();
98
if (const CallInst *c = dyn_cast<CallInst>(i)) {
99
const Value *v = c->getCalledValue();
100
if (v->getName().equals(BARRIER_FUNCTION_NAME))
109
replicate_subgraph(BasicBlock *entry, Function *f)
116
FindBarriersDFS(t->getSuccessor(i), processed_bbs);
121
BarrierTailReplication::ReplicateSubgraph(BasicBlock *entry,
111
124
// Find all basic blocks to replicate.
112
std::set<BasicBlock *> subgraph;
113
find_subgraph(subgraph, entry);
125
BasicBlockSet subgraph;
126
FindSubgraph(subgraph, entry);
115
128
// Replicate subgraph maintaining control flow.
116
std::set<BasicBlock *> v;
117
std::map<Value *, Value *> m;
118
replicate_basicblocks(v, m, subgraph, f);
119
update_references(v, m);
131
ReplicateBasicBlocks(v, m, subgraph, f);
132
UpdateReferences(v, m);
121
134
// Return entry block of replicated subgraph.
122
135
return cast<BasicBlock>(m[entry]);
126
find_subgraph(std::set<BasicBlock *> &subgraph,
140
BarrierTailReplication::FindSubgraph(BasicBlockSet &subgraph,
129
if (subgraph.count(entry) != 0)
132
143
subgraph.insert(entry);
134
145
const TerminatorInst *t = entry->getTerminator();
135
for (unsigned i = 0, e = t->getNumSuccessors(); i != e; ++i)
136
find_subgraph(subgraph, t->getSuccessor(i));
146
Loop *l = LI->getLoopFor(entry);
147
for (unsigned i = 0, e = t->getNumSuccessors(); i != e; ++i) {
148
BasicBlock *successor = t->getSuccessor(i);
149
if ((l != NULL) && (l->getHeader() == successor)) {
150
// This is a loop backedge. Do not find subgraphs across
154
FindSubgraph(subgraph, successor);
140
replicate_basicblocks(std::set<BasicBlock *> &new_graph,
141
std::map<Value *, Value *> &reference_map,
142
const std::set<BasicBlock *> &graph,
160
BarrierTailReplication::ReplicateBasicBlocks(BasicBlockSet &new_graph,
161
ValueValueMap &reference_map,
162
BasicBlockSet &graph,
145
for (std::set<BasicBlock *>::const_iterator i = graph.begin(),
165
for (BasicBlockSet::const_iterator i = graph.begin(),
148
168
BasicBlock *b = *i;
149
169
BasicBlock *new_b = BasicBlock::Create(b->getContext(),
170
b->getName() + ".btr",
152
172
reference_map.insert(std::make_pair(b, new_b));
153
173
new_graph.insert(new_b);