10
10
# Copyright (C) 2006-2007 The Regents of the University of California.
11
11
# Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
12
12
# Written by Danny Auble <da@llnl.gov>
13
# CODE-OCEC-09-009. All rights reserved.
15
15
# This file is part of SLURM, a resource management program.
16
# For details, see <http://www.llnl.gov/linux/slurm/>.
16
# For details, see <https://computing.llnl.gov/linux/slurm/>.
17
# Please also read the included file: DISCLAIMER.
18
19
# SLURM is free software; you can redistribute it and/or modify it under
19
20
# the terms of the GNU General Public License as published by the Free
46
proc run_and_test { size } {
47
global number sbatch scontrol
48
global file_out file_err file_in procs_per_cnode
53
set sbatch_pid [spawn $sbatch --output=$file_out --error=$file_err -t2 -N$size-$size $file_in]
55
-re "Submitted batch job ($number)" {
56
set job_id $expect_out(1,string)
60
send_user "\nFAILURE: sbatch not responding\n"
69
if {!$job_id || $exit_code} {
70
send_user "\nFAILURE: batch submit failure\n"
77
if {[wait_for_job $job_id "DONE"] != 0} {
78
send_user "\nFAILURE: waiting for job to run\n"
83
set requested_cpus [expr $size * $procs_per_cnode]
85
set scon_pid [spawn $scontrol show job $job_id]
87
-re "AllocCPUs=$requested_cpus" {
92
send_user "\nFAILURE: scontrol not responding\n"
101
send_user "\nFAILURE: Bluegene $size cnode block not created correctly\n"
109
proc run_bgl_test { } {
112
# make a 512 cnode block
113
if {[run_and_test 512]} {
117
if {[run_and_test 128]} {
122
if {[run_and_test 32]} {
131
proc run_bgp_test { } {
134
# make a 512 cnode block
135
if {[run_and_test 512]} {
139
if {[run_and_test 256]} {
142
if {[run_and_test 128]} {
147
if {[run_and_test 64]} {
151
if {[run_and_test 32]} {
155
if {[run_and_test 16]} {
45
166
print_header $test_id
47
168
if {[test_bluegene] == 0} {
48
169
send_user "\nWARNING: This test is only compatable with bluegene systems\n"
173
if {[string compare [get_bluegene_layout] Dynamic]} {
174
send_user "\nWARNING: This test is only compatable with dynamic bluegene systems\n"
178
set psets [get_bluegene_psets]
181
send_user "\nFAILURE: No psets are set on this system\n"
185
set procs_per_cnode [get_bluegene_procs_per_cnode]
187
if {$procs_per_cnode == 0} {
188
send_user "\nFAILURE: Couldn't determine procs per cnode\n"
192
set type [get_bluegene_type]
195
send_user "\nFAILURE: No bluegene type found \n"
52
201
# Delete left-over input script files
53
202
# Build input script file
61
210
set timeout [expr $max_job_delay + $sleep_time]
63
# make a 512 cnode block
65
set sbatch_pid [spawn $sbatch --output=$file_out --error=$file_err -t2 -N512-512 $file_in]
67
-re "Submitted batch job ($number)" {
68
set job_id $expect_out(1,string)
72
send_user "\nFAILURE: sbatch not responding\n"
81
send_user "\nFAILURE: batch submit failure\n"
88
if {[wait_for_job $job_id "DONE"] != 0} {
89
send_user "\nFAILURE: waiting for job to run\n"
96
spawn $scontrol show job $job_id
98
-re "BP_List=$alpha_numeric " {
102
-re "AllocCPUs=($number)" {
103
set cpu_count $expect_out(1,string)
104
set cpus_per_node [expr $cpu_count / 512]
105
if {($cpu_count != 512) && ($cpu_count != 1024)} {
106
send_user "\nFAILURE: Bad node count allocated\n"
112
send_user "\nFAILURE: scontrol not responding\n"
120
send_user "\nFAILURE: Blue Gene 512 cnode block not created correctly\n"
123
if {$exit_code != 0} {
127
# make a 128 cnode block
130
set sbatch_pid [spawn $sbatch --output=$file_out --error=$file_err -t2 -N128-128 $file_in]
132
-re "Submitted batch job ($number)" {
133
set job_id $expect_out(1,string)
137
send_user "\nFAILURE: sbatch not responding\n"
138
slow_kill $sbatch_pid
146
send_user "\nFAILURE: batch submit failure\n"
151
# Wait for job to run
153
if {[wait_for_job $job_id "DONE"] != 0} {
154
send_user "\nFAILURE: waiting for job to run\n"
160
spawn $scontrol show job $job_id
162
-re "BP_List=$alpha_numeric\[$number-$number\] " {
166
-re "AllocCPUs=($number)" {
167
set cpu_count $expect_out(1,string)
168
set node_count [expr $cpu_count / $cpus_per_node]
169
if {$node_count != 128} {
170
send_user "\n=======================================\n"
171
send_user "\nFAILURE: Bad node count allocated\n"
172
send_user "This can indicate a low configured value of Numpsets\n"
173
send_user " in bluegene.conf (few available I/O nodes).\n"
174
send_user "This could indicate Static Bluegene partitioning\n"
175
send_user " with no small blocks (1/4 of a base partition).\n"
176
send_user "This could also indicate MinNodes for the partition\n"
177
send_user " being too high for this test.\n"
178
send_user "=======================================\n"
184
send_user "\nFAILURE: scontrol not responding\n"
192
send_user "\nFAILURE: Blue Gene 128 cnode block not created correctly\n"
196
# make a 32 cnode block
198
set sbatch_pid [spawn $sbatch --output=$file_out --error=$file_err -t2 -N32-32 $file_in]
200
-re "Submitted batch job ($number)" {
201
set job_id $expect_out(1,string)
205
send_user "\nFAILURE: sbatch not responding\n"
206
slow_kill $sbatch_pid
214
send_user "\nFAILURE: batch submit failure\n"
219
# Wait for job to run
221
if {[wait_for_job $job_id "DONE"] != 0} {
222
send_user "\nFAILURE: waiting for job to run\n"
227
spawn $scontrol show job $job_id
229
# Could be one or multiple node cards (e.g. "bgl000[0]" or "bgl000[0-1]")
230
-re "BP_List=$alpha_numeric\[$number" {
234
-re "AllocCPUs=($number)" {
235
set cpu_count $expect_out(1,string)
236
set node_count [expr $cpu_count / $cpus_per_node]
237
if {$node_count != 32} {
238
send_user "\n=======================================\n"
239
send_user "FAILURE: Bad node count allocated\n"
240
send_user "This can indicate a low configured value of Numpsets\n"
241
send_user " in bluegene.conf (few available I/O nodes).\n"
242
send_user "This could indicate Static Bluegene partitioning\n"
243
send_user " with no small blocks (1/16 of a base partition).\n"
244
send_user "This could also indicate MinNodes for the partition\n"
245
send_user " being too high for this test.\n"
246
send_user "=======================================\n"
252
send_user "\nFAILURE: scontrol not responding\n"
260
send_user "\nFAILURE: Blue Gene 32 cnode block not created correctly\n"
212
if {![string compare $type "P"]} {
213
set exit_code [run_bgp_test]
214
} elseif {![string compare $type "L"]} {
215
set exit_code [run_bgl_test]
217
send_user "\nFAILURE: unknown bluegene system type '$type'\n";
264
222
if {$exit_code == 0} {
265
223
exec rm -f $file_in $file_out $file_err