49
//////////////////////////////////////////////////////////////////////////////
47
//////////////////////////////////////////////////////////////////////////////
49
// |------| p1=p_port |------| p2=p_portNext
51
// | p1|>--------------------->| p2|----------------
52
// | | \ channels | | \ channels
53
// |------| \--|=====| |------| \--|=====|
54
// | VL0 |-| next chan | VL0 |
55
// |-----| | |========| |-----|
56
// | VL1 | \ | P0-VL0 | ---->| VL1 |
57
// |-----| \ |--------| / |-----|
58
// | ... | \->| P0-VL1 |-/ | ... |
59
// |=====| sl2vl|--------| |=====|
73
//////////////////////////////////////////////////////////////////////////////
77
//////////////////////////////////////////////////////////////////////////////
78
// We keep global flags to control how the check is being done:
80
// If non zero will consider all switch to switch paths too
81
static int CrdLoopIncludeUcastSwitchPaths = 0;
83
// If non zero consider multicast paths too
84
static int CrdLoopIncludeMcastPaths = 0;
86
// Map each MLID to a list of SL's that may be used for this MGRP traffic
87
// If no entry is set then we will assume all traffic on SL=0
88
#define map_mlid_sl_list map<int, list< int >, less<int> >
89
static map_mlid_sl_list mlidSLs;
92
//////////////////////////////////////////////////////////////////////////////
51
93
// Apply DFS on a dependency graph
53
int CrdLoopDFS(VChannel* ch) {
55
if (ch->getFlag() == Closed)
94
int CrdLoopDFS(VChannel* ch)
97
if (ch->getFlag() == Closed)
100
if (ch->getFlag() == Open) {
101
cout << "Found credit loop on: " << ch->pPort->getName()
102
<< " VL: " << ch->vl << endl;
107
// Make recursive steps
108
for (int i=0; i<ch->getDependSize();i++) {
109
VChannel* next = ch->getDependency(i);
111
if (CrdLoopDFS(next)) {
112
cout << " - BT credit loop through: " << ch->pPort->getName()
113
<< " VL: " << ch->vl << endl;
58
if (ch->getFlag() == Open) {
63
// Make recursive steps
64
for (int i=0; i<ch->getDependSize();i++) {
65
VChannel* next = ch->getDependency(i);
76
124
//////////////////////////////////////////////////////////////////////////////
78
125
// Go over CA's apply DFS on the dependency graphs starting from CA's port
80
int CrdLoopFindLoops(IBFabric* p_fabric) {
81
unsigned int lidStep = 1 << p_fabric->lmc;
83
// go over all CA ports in the fabric
84
for (int i = p_fabric->minLid; i <= p_fabric->maxLid; i += lidStep) {
85
IBPort *p_Port = p_fabric->PortByLid[i];
86
if (!p_Port || (p_Port->p_node->type == IB_SW_NODE)) continue;
87
// Go over all CA's channels and find untouched one
88
for (int j=0;j < p_fabric->getNumSLs(); j++) {
89
dfs_t state = p_Port->channels[j]->getFlag();
91
cout << "-E- open channel outside of DFS" << endl;
94
// Already processed, continue
97
// Found starting point
98
if (CrdLoopDFS(p_Port->channels[j]))
126
int CrdLoopFindLoops(IBFabric* p_fabric)
128
unsigned int lidStep = 1 << p_fabric->lmc;
130
// go over all CA ports in the fabric
131
for (int i = p_fabric->minLid; i <= p_fabric->maxLid; i += lidStep) {
132
IBPort *p_Port = p_fabric->PortByLid[i];
133
if (!p_Port || (p_Port->p_node->type == IB_SW_NODE)) continue;
134
// Go over all CA's channels and find untouched one
135
for (int j=0;j < p_fabric->getNumSLs(); j++) {
136
dfs_t state = p_Port->channels[j]->getFlag();
138
cout << "-E- open channel outside of DFS" << endl;
141
// Already processed, continue
144
// Found starting point
145
if (CrdLoopDFS(p_Port->channels[j]))
106
153
//////////////////////////////////////////////////////////////////////////////
108
154
// Trace a route from slid to dlid by LFT
109
155
// Add dependency edges
110
int CrdLoopMarkRouteByLFT (
112
unsigned int sLid , unsigned int dLid
115
IBPort *p_port = p_fabric->getPortByLid(sLid);
118
unsigned int lidStep = 1 << p_fabric->lmc;
119
int outPortNum = 0, inputPortNum = 0, hopCnt = 0;
124
cout << "-E- Provided source:" << sLid
125
<< " lid is not mapped to a port!" << endl;
129
// Retrieve the relevant SL
131
SL = VL = p_port->p_node->getPSLForLid(dLid);
133
if (!p_port->p_remotePort) {
134
cout << "-E- Provided starting point is not connected !"
135
<< "lid:" << sLid << endl;
139
if (SL == IB_SLT_UNASSIGNED) {
140
cout << "-E- SL to destination is unassigned !"
141
<< "slid: " << sLid << "dlid:" << dLid << endl;
145
// check if we are done:
146
done = ((p_port->p_remotePort->base_lid <= dLid) &&
147
(p_port->p_remotePort->base_lid+lidStep - 1 >= dLid));
149
// Get the node on the remote side
150
p_node = p_port->p_remotePort->p_node;
151
// Get remote port's number
152
inputPortNum = p_port->p_remotePort->num;
153
// Get number of ports on the remote side
154
int numPorts = p_node->numPorts;
155
// Init vchannel's number of possible dependencies
156
p_port->channels[VL]->setDependSize((numPorts+1)*p_fabric->getNumVLs());
158
// Get port num of the next hop
159
outPortNum = p_node->getLFTPortForLid(dLid);
160
// Get VL of the next hop
161
int nextVL = p_node->getSLVL(inputPortNum,outPortNum,SL);
163
if (outPortNum == IB_LFT_UNASSIGNED) {
164
cout << "-E- Unassigned LFT for lid:" << dLid << " Dead end at:" << p_node->name << endl;
168
if (nextVL == IB_SLT_UNASSIGNED) {
169
cout << "-E- Unassigned SL2VL entry, iport: "<< inputPortNum<<", oport:"<<outPortNum<<", SL:"<<(int)SL<< endl;
173
// get the next port on the other side
174
p_portNext = p_node->getPort(outPortNum);
177
p_portNext->p_remotePort &&
178
p_portNext->p_remotePort->p_node)) {
179
cout << "-E- Dead end at:" << p_node->name << endl;
183
p_port->channels[VL]->setDependency(outPortNum*p_fabric->getNumVLs()+nextVL,p_portNext->channels[nextVL]);
187
if (hopCnt++ > 256) {
188
cout << "-E- Aborting after 256 hops - loop in LFT?" << endl;
156
int CrdLoopMarkRouteByLFT(IBFabric *p_fabric,
157
unsigned int sLid , unsigned int dLid)
159
IBPort *p_port = p_fabric->getPortByLid(sLid);
162
unsigned int lidStep = 1 << p_fabric->lmc;
163
int outPortNum = 0, inputPortNum = 0, hopCnt = 0;
168
cout << "-E- Provided source:" << sLid
169
<< " lid is not mapped to a port!" << endl;
173
// If started on a switch, need to use the correct output
174
// port, not the first one found by getPortByLid
175
if (p_port->p_node->type == IB_SW_NODE) {
176
int outPortNum = p_port->p_node->getLFTPortForLid(dLid);
177
if (outPortNum == IB_LFT_UNASSIGNED) {
178
cout << "-E- Unassigned LFT for lid:" << dLid
179
<< " Dead end at:" << p_port->p_node->name << endl;
182
p_port = p_port->p_node->getPort(outPortNum);
185
// Retrieve the relevant SL
187
SL = VL = p_port->p_node->getPSLForLid(dLid);
189
if (!p_port->p_remotePort) {
190
cout << "-E- Provided starting point is not connected !"
191
<< "lid:" << sLid << endl;
195
if (SL == IB_SLT_UNASSIGNED) {
196
cout << "-E- SL to destination is unassigned !"
197
<< "slid: " << sLid << "dlid:" << dLid << endl;
201
// check if we are done:
192
202
done = ((p_port->p_remotePort->base_lid <= dLid) &&
193
(p_port->p_remotePort->base_lid+lidStep - 1 >= dLid));
203
(p_port->p_remotePort->base_lid+lidStep - 1 >= dLid));
205
// Get the node on the remote side
206
p_node = p_port->p_remotePort->p_node;
207
// Get remote port's number
208
inputPortNum = p_port->p_remotePort->num;
209
// Get number of ports on the remote side
210
int numPorts = p_node->numPorts;
211
// Init vchannel's number of possible dependencies
212
p_port->channels[VL]->setDependSize((numPorts+1)*p_fabric->getNumVLs());
214
// Get port num of the next hop
215
outPortNum = p_node->getLFTPortForLid(dLid);
216
// Get VL of the next hop
217
int nextVL = p_node->getSLVL(inputPortNum,outPortNum,SL);
219
if (outPortNum == IB_LFT_UNASSIGNED) {
220
cout << "-E- Unassigned LFT for lid:" << dLid << " Dead end at:" << p_node->name << endl;
224
if (nextVL == IB_SLT_UNASSIGNED) {
225
cout << "-E- Unassigned SL2VL entry, iport: "<< inputPortNum<<", oport:"<<outPortNum<<", SL:"<<(int)SL<< endl;
229
// get the next port on the other side
230
p_portNext = p_node->getPort(outPortNum);
233
p_portNext->p_remotePort &&
234
p_portNext->p_remotePort->p_node)) {
235
cout << "-E- Dead end at:" << p_node->name << endl;
239
p_port->channels[VL]->setDependency(
240
outPortNum*p_fabric->getNumVLs()+nextVL,p_portNext->channels[nextVL]);
244
if (hopCnt++ > 256) {
245
cout << "-E- Aborting after 256 hops - loop in LFT?" << endl;
249
done = ((p_port->p_remotePort->base_lid <= dLid) &&
250
(p_port->p_remotePort->base_lid+lidStep - 1 >= dLid));
199
256
/////////////////////////////////////////////////////////////////////////////
201
257
// Go over all CA to CA paths and connect dependant vchannel by an edge
204
CrdLoopConnectDepend(IBFabric* p_fabric)
206
unsigned int lidStep = 1 << p_fabric->lmc;
210
// go over all ports in the fabric
211
for ( i = p_fabric->minLid; i <= p_fabric->maxLid; i += lidStep) {
212
IBPort *p_srcPort = p_fabric->PortByLid[i];
214
if (!p_srcPort || (p_srcPort->p_node->type == IB_SW_NODE)) continue;
216
unsigned int sLid = p_srcPort->base_lid;
218
// go over all the rest of the ports:
219
for ( j = p_fabric->minLid; j <= p_fabric->maxLid; j += lidStep ) {
220
IBPort *p_dstPort = p_fabric->PortByLid[j];
222
// Avoid tracing to itself
223
if (i == j) continue;
225
if (! p_dstPort) continue;
227
if (p_dstPort->p_node->type == IB_SW_NODE) continue;
228
unsigned int dLid = p_dstPort->base_lid;
229
// go over all LMC combinations:
230
for (unsigned int l1 = 0; l1 < lidStep; l1++) {
231
for (unsigned int l2 = 0; l2 < lidStep; l2++) {
232
// Trace the path but record the input to output ports used.
233
if (CrdLoopMarkRouteByLFT(p_fabric, sLid + l1, dLid + l2)) {
234
cout << "-E- Fail to find a path from:"
235
<< p_srcPort->p_node->name << "/" << p_srcPort->num
236
<< " to:" << p_dstPort->p_node->name << "/" << p_dstPort->num
240
}// all LMC lids 2 */
241
} // all LMC lids 1 */
246
cout << "-E- Fail to traverse:" << anyError << " CA to CA paths" << endl;
259
CrdLoopConnectUcastDepend(IBFabric* p_fabric)
261
unsigned int lidStep = 1 << p_fabric->lmc;
265
// go over all ports in the fabric
266
for ( i = p_fabric->minLid; i <= p_fabric->maxLid; i += lidStep) {
267
IBPort *p_srcPort = p_fabric->PortByLid[i];
271
if (!CrdLoopIncludeUcastSwitchPaths &&
272
(p_srcPort->p_node->type == IB_SW_NODE))
275
unsigned int sLid = p_srcPort->base_lid;
277
// go over all the rest of the ports:
278
for ( j = p_fabric->minLid; j <= p_fabric->maxLid; j += lidStep ) {
279
IBPort *p_dstPort = p_fabric->PortByLid[j];
281
// Avoid tracing to itself
286
if (!CrdLoopIncludeUcastSwitchPaths &&
287
(p_dstPort->p_node->type == IB_SW_NODE))
290
unsigned int dLid = p_dstPort->base_lid;
291
// go over all LMC combinations:
292
for (unsigned int l1 = 0; l1 < lidStep; l1++) {
293
for (unsigned int l2 = 0; l2 < lidStep; l2++) {
294
// Trace the path but record the input to output ports used.
295
if (CrdLoopMarkRouteByLFT(p_fabric, sLid + l1, dLid + l2)) {
296
cout << "-E- Fail to find a path from:"
297
<< p_srcPort->p_node->name << "/" << p_srcPort->num
298
<< " to:" << p_dstPort->p_node->name << "/" << p_dstPort->num
302
}// all LMC lids 2 */
303
} // all LMC lids 1 */
308
cout << "-E- Fail to traverse:"
309
<< anyError << " CA to CA paths" << endl;
316
/////////////////////////////////////////////////////////////////////////////
317
// Go over all Multicast Groups on all switches and add the dependecies
318
// they create to the dependency graph
319
// Return number of errors it found
321
CrdLoopConnectMcastDepend(IBFabric* p_fabric)
323
// We support providing an SL list to each MLID by provining a special file
324
// HACK: we can ignore connectivity check of the MCG and treat every switch
325
// separately. The connectivity analysis can be run independently if loops
328
// HACK: the algorithm assumes constant SL2VL which is not port dependant!
329
// otherwise it should have been propagating traffic from each CA and SW on
330
// the MGRP such that the out VL of the previous port is known...
334
// Create empty port-to-port P2P(sl,in,out) mask matrix for each SL
336
// Foreach SL of MLID
337
// Copy the MFT(MLID) port mask to the matrix
339
// Lookup VL by SL - this is where the hack comes in handy
342
// Create the dependency edge (port-driving-in-port, VL, out-port, VL)
348
for (map_str_pnode::const_iterator nI = p_fabric->NodeByName.begin();
349
nI != p_fabric->NodeByName.end(); nI++) {
350
IBNode *p_node = (*nI).second;
352
// we only do MFT on switches
353
if (p_node->type != IB_SW_NODE)
356
// allocate the array of dependencies:
357
uint8_t sl_in_out_dep[16][p_node->numPorts+1][p_node->numPorts+1];
358
memset(sl_in_out_dep, 0, sizeof(uint8_t)*16*(p_node->numPorts+1)*(p_node->numPorts+1));
361
for (unsigned int i = 0; i < p_node->MFT.size(); i++) {
365
map_mlid_sl_list::const_iterator mlidI = mlidSLs.find(i+0xc000);
366
if (mlidI != mlidSLs.end()) {
367
sls = (*mlidI).second;
372
// now go over each SL at a time
373
for (list<int>::const_iterator lI = sls.begin();
377
// check all ports of the MFT
378
uint64_t port_mask = p_node->MFT[i];
379
for (unsigned int inPortNum = 1; inPortNum <= p_node->numPorts; inPortNum++) {
380
// we only care about port that are part of the MCG
381
if ((((uint64_t)1) << inPortNum) & port_mask) {
382
for (unsigned int outPortNum = 1; outPortNum <= p_node->numPorts; outPortNum++) {
383
if ((((uint64_t)1) << outPortNum) & port_mask) {
384
if (inPortNum != outPortNum) {
385
sl_in_out_dep[sl][inPortNum][outPortNum] = 1;
394
// now convert the dependency matrix into channel graph edges:
396
for (unsigned int sl = 0; sl < 16; sl++) {
397
for (unsigned int inPortNum = 1; inPortNum <= p_node->numPorts; inPortNum++) {
398
for (unsigned int outPortNum = 1; outPortNum <= p_node->numPorts; outPortNum++) {
400
if (sl_in_out_dep[sl][inPortNum][outPortNum] != 1)
404
int vl = p_node->getSLVL(inPortNum,outPortNum,sl);
406
// Create the dependency edge (port-driving-in-port, VL, out-port, VL)
407
IBPort *p_outPort = p_node->getPort(outPortNum);
409
cout << "-E- Switch:" << p_node->name << " port:" << outPortNum
410
<< " is included in some MFT but is not connnected" << endl;
414
IBPort *p_inPort = p_node->getPort(inPortNum);
416
cout << "-E- Switch:" << p_node->name << " port:" << inPortNum
417
<< " is included in some MFT but is not connnected" << endl;
421
IBPort *p_drvPort = p_inPort->p_remotePort;
423
cout << "-E- Switch:" << p_node->name << " port:" << inPortNum
424
<< " is included in some MFT but has no remote port." << endl;
429
if (p_drvPort->p_node->type != IB_SW_NODE)
432
// Init vchannel's number of possible dependencies
433
p_drvPort->channels[vl]->setDependSize((p_node->numPorts+1)*p_fabric->getNumVLs());
435
// HACK: we assume the same VL was used entering to this node
436
p_drvPort->channels[vl]->setDependency(outPortNum*p_fabric->getNumVLs()+vl,
437
p_outPort->channels[vl]);
444
// Ref from LFT code:
445
// outPortNum is the FBD port
446
// get the next port on the other side
447
// p_portNext = p_node->getPort(outPortNum);
449
// p_port->channels[VL]->setDependency(outPortNum*p_fabric->getNumVLs()+nextVL,
450
// p_portNext->channels[nextVL]);
451
cout << "-I- MFT added " << addedEdges << " edges to links dependency graph" << endl;
253
456
//////////////////////////////////////////////////////////////////////////////
255
457
// Prepare the data model
257
CrdLoopPrepare(IBFabric *p_fabric) {
258
unsigned int lidStep = 1 << p_fabric->lmc;
260
// go over all ports in the fabric
261
for (int i = p_fabric->minLid; i <= p_fabric->maxLid; i += lidStep) {
262
IBPort *p_Port = p_fabric->PortByLid[i];
263
if (!p_Port) continue;
264
IBNode *p_node = p_Port->p_node;
266
if (p_node->type == IB_CA_NODE)
267
nL = p_fabric->getNumSLs();
459
CrdLoopPrepare(IBFabric *p_fabric)
461
unsigned int lidStep = 1 << p_fabric->lmc;
463
// go over all ports in the fabric
464
for (int i = p_fabric->minLid; i <= p_fabric->maxLid; i += lidStep) {
465
IBPort *p_Port = p_fabric->PortByLid[i];
468
IBNode *p_node = p_Port->p_node;
470
if (p_node->type == IB_CA_NODE)
471
nL = p_fabric->getNumSLs();
473
nL = p_fabric->getNumVLs();
474
// Go over all node's ports
475
for (int k=0;k<p_node->Ports.size();k++) {
476
IBPort* p_Port = p_node->Ports[k];
477
// Init virtual channel array
478
p_Port->channels.resize(nL);
479
for (int j=0;j<nL;j++)
480
p_Port->channels[j] = new VChannel(p_Port, j);
486
// Cleanup the data model
488
CrdLoopCleanup(IBFabric *p_fabric)
490
unsigned int lidStep = 1 << p_fabric->lmc;
492
// go over all ports in the fabric
493
for (int i = p_fabric->minLid; i <= p_fabric->maxLid; i += lidStep) {
494
IBPort *p_Port = p_fabric->PortByLid[i];
497
IBNode *p_node = p_Port->p_node;
499
if (p_node->type == IB_CA_NODE)
500
nL = p_fabric->getNumSLs();
502
nL = p_fabric->getNumVLs();
503
// Go over all node's ports
504
for (int k=0;k<p_node->Ports.size();k++) {
505
IBPort* p_Port = p_node->Ports[k];
506
for (int j=0;j<nL;j++)
507
if (p_Port->channels[j])
508
;//delete p_Port->channels[j];
514
//////////////////////////////////////////////////////////////////////////////
515
// Top Level Subroutine:
517
CrdLoopAnalyze(IBFabric *p_fabric)
520
cout << "-I- Analyzing Fabric for Credit Loops "
521
<< (int)p_fabric->getNumSLs() <<" SLs, "
522
<< (int)p_fabric->getNumVLs() << " VLs used." << endl;
524
// Init data structures
525
if (CrdLoopPrepare(p_fabric)) {
526
cout << "-E- Fail to prepare data structures." << endl;
529
// Create the dependencies for unicast traffic
530
if (CrdLoopConnectUcastDepend(p_fabric)) {
531
cout << "-E- Fail to build dependency graphs." << endl;
534
// Do multicast if require
535
if (CrdLoopIncludeMcastPaths) {
536
if ( CrdLoopConnectMcastDepend(p_fabric) ) {
537
cout << "-E- Fail to build multicast dependency graphs." << endl;
542
// Find the loops if exist
543
res = CrdLoopFindLoops(p_fabric);
545
cout << "-I- no credit loops found" << endl;
269
nL = p_fabric->getNumVLs();
270
// Go over all node's ports
271
for (int k=0;k<p_node->Ports.size();k++) {
272
IBPort* p_Port = p_node->Ports[k];
273
// Init virtual channel array
274
p_Port->channels.resize(nL);
275
for (int j=0;j<nL;j++)
276
p_Port->channels[j] = new VChannel;
547
cout << "-E- credit loops in routing"<<endl;
550
CrdLoopCleanup(p_fabric);
282
555
//////////////////////////////////////////////////////////////////////////////
284
// Top Level Subroutine:
286
CrdLoopAnalyze(IBFabric *p_fabric) {
289
cout << "-I- Analyzing Fabric for Credit Loops "<<(int)p_fabric->getNumSLs()<<" SLs, "<<(int)p_fabric->getNumVLs()<< " VLs used...";
290
// Init data structures
291
if (CrdLoopPrepare(p_fabric)) {
292
cout << "-E- Fail to prepare data structures." << endl;
295
// Create the dependencies
296
if (CrdLoopConnectDepend(p_fabric)) {
297
cout << "-E- Fail to build dependency graphs." << endl;
300
// Find the loops if exist
301
res = CrdLoopFindLoops(p_fabric);
303
cout << " no credit loops found" << endl;
305
cout << endl << "-E- credit loops in routing"<<endl;
557
CredLoopMode(int include_switch_to_switch_paths, int include_multicast)
559
CrdLoopIncludeUcastSwitchPaths = include_switch_to_switch_paths;
560
CrdLoopIncludeMcastPaths = include_multicast;