1
# This is the checker for for a fat-tree routing check
3
##############################################################################
5
# Start up the test applications
6
# This is the default flow that will start OpenSM only in 0x43 verbosity
7
# Return a list of process ids it started (to be killed on exit)
9
proc runner {simDir osmPath osmPortGuid} {
10
set osmStdOutLog [file join $simDir osm.stdout.log]
11
set osmLog [file join $simDir osm.log]
12
puts "-I- Starting: $osmPath -R ftree -d2 -V -g $osmPortGuid ..."
13
#set osmPid [exec $osmPath -f $osmLog -V -g $osmPortGuid > $osmStdOutLog &]
14
set osmPid [exec $osmPath -R ftree -f $osmLog -V -g $osmPortGuid > $osmStdOutLog &]
15
#set osmPid [exec valgrind --tool=memcheck -v --log-file-exactly=/tmp/kliteyn/osm.valgrind.log $osmPath -R ftree -f $osmLog -V -g $osmPortGuid > $osmStdOutLog &]
17
# start a tracker on the log file and process:
18
startOsmLogAnalyzer $osmLog
23
##############################################################################
25
# Check for the test results
26
# 1. Make sure we got a "SUBNET UP"
27
# 2. Run ibdiagnet to check routing
28
# 3. Check that fat-tree routing has run to completion
29
# 4. Run congestion analysis
30
# 5. At each step, return the exit code in case of any failure
32
proc checker {simDir osmPath osmPortGuid} {
34
set osmLog [file join $simDir osm.log]
36
puts "-I- Waiting max time of 100sec...."
38
if {[osmWaitForUpOrDeadWithTimeout $osmLog 1000000]} {
44
set ibdiagnetLog [file join $simDir ibdiagnet.log]
45
set cmd "ibdiagnet -o $simDir"
47
puts "-I- Invoking $cmd "
48
if {[catch {set res [eval "exec $cmd > $ibdiagnetLog"]} e]} {
49
puts "-E- ibdiagnet failed with status:$e"
55
# Check that the fat-tree routing has run to completion.
56
# If it has, then opensm-ftree-ca-order.dump file should exist
57
# in the simulation directory.
58
set osmFtreeCAOrderDump [file join $simDir opensm-ftree-ca-order.dump]
59
if {[file exists $osmFtreeCAOrderDump]} {
60
puts "-I- Fat-tree CA ordering file exists"
62
puts "-E- Fat-tree CA ordering file doesn't exist"
63
puts "-E- Fat-tree routing hasn't run to normal completion"
67
set congestionScript "congestion"
68
set ibdiagnetLstFile [file join $simDir ibdiagnet.lst]
69
set ibdiagnetFdbsFile [file join $simDir ibdiagnet.fdbs]
70
set congestionLog [file join $simDir congestion.log]
71
set cmd "$congestionScript -o $ibdiagnetLstFile $ibdiagnetFdbsFile $osmFtreeCAOrderDump"
73
puts "-I- Running congestion analysis"
74
if {[catch {set res [eval "exec $cmd > $congestionLog"]} e]} {
75
puts "-E- Congestion analysis failed with status: $e"
79
puts "-I- Congestion analysis completed"
80
puts "-I- Parsing congestion log"
84
set f [open $congestionLog]
85
while {[gets $f sLine] >= 0} {
87
if {[regexp {.*TOTAL CONGESTION HISTOGRAM.*} $sLine match]} {
88
#seek three lines forward in the file
89
if {[gets $f sLine] < 0 || [gets $f sLine] < 0 || [gets $f sLine] < 0} {
90
puts "-E- Failed parsing congestion log: $congestionLog"
93
puts "-I- Total congestion histogram:"
94
while {[regexp {\s*(\d+)\s*(\d+)} $sLine match numPath numOutPorts]} {
95
puts "-I- - NumPaths: $numPath, NumOutPorts: $numOutPorts"
96
if { $maxNumPath < $numPath } {
97
set maxNumPath $numPath
100
if {[gets $f sLine] < 0} {
101
puts "-E- Failed parsing congestion log: $congestionLog"
107
if {[regexp {.*STAGE CONGESTION HISTOGRAM.*} $sLine match]} {
108
#seek three lines forward in the file
109
if {[gets $f sLine] < 0 || [gets $f sLine] < 0 || [gets $f sLine] < 0} {
110
puts "-E- Failed parsing congestion log: $congestionLog"
113
puts "-I- Stage congestion histogram:"
114
while {[regexp {\s*(\d+)\s*(\d+)} $sLine match worstCong numStages]} {
115
puts "-I- - WorstCong: $worstCong, NumStages: $numStages"
116
if { $maxWorstCong < $worstCong } {
117
set maxWorstCong $worstCong
120
if {[gets $f sLine] < 0} {
121
puts "-E- Failed parsing congestion log: $congestionLog"
129
if {$maxNumPath > 1 || $maxWorstCong > 1} {
130
puts "-E- FatTree routing is unbalanced"
134
puts "-I- FatTree routing is well-balanced"