~ubuntu-branches/ubuntu/precise/slurm-llnl/precise

1.3.7 by Gennaro Oliva
Import upstream version 2.1.0
1
#!/usr/bin/expect
2
############################################################################
3
# Purpose: Test of SLURM functionality
4
#          Test of sinfo cpu total and allocated
5
#
6
# Output:  "TEST: #.#" followed by "SUCCESS" if test was successful, OR
7
#          "FAILURE: ..." otherwise with an explanation of the failure, OR
8
#          anything else indicates a failure mode that must be investigated.
9
############################################################################
10
# Copyright (C) 2009 Lawrence Livermore National Security.
11
# Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
12
# Written by Joseph Donaghy <donaghy1@llnl.gov>
13
# CODE-OCEC-09-009. All rights reserved.
14
#
15
# This file is part of SLURM, a resource management program.
16
# For details, see <https://computing.llnl.gov/linux/slurm/>.
17
# Please also read the included file: DISCLAIMER.
18
#
19
# SLURM is free software; you can redistribute it and/or modify it under
20
# the terms of the GNU General Public License as published by the Free
21
# Software Foundation; either version 2 of the License, or (at your option)
22
# any later version.
23
#
24
# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
25
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
26
# FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
27
# details.
28
#
29
# You should have received a copy of the GNU General Public License along
30
# with SLURM; if not, write to the Free Software Foundation, Inc.,
31
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA.
32
############################################################################
33
source ./globals
34
35
set test_id     "4.12"
36
set file_in     "test$test_id.in"
37
set exit_code   0
38
set pnumsc      0
39
set pnumsi      0
40
set aprocsc     0
41
set aprocsi     0
42
set inode_found 0
43
set inode_name  ""
44
set smallest    1
45
set layout "static"
46
set select_type ""
47
set node_scaling 0
48
49
proc scontrol_test { node proc_cnt } {
50
	global scontrol number prompt
51
	upvar spawn_id spawn_id
52
53
	set found 0
54
	set rc 0
55
56
	send "$scontrol show node $node\r"
57
	expect {
58
		-re "CPUAlloc=($number)" {
59
			set num_alloc $expect_out(1,string)
60
			set found 1
61
			if {$proc_cnt != $num_alloc} {
62
				send_user "\nFAILURE: requested $proc_cnt but got $num_alloc instead\n"
63
				set rc 1
64
			}
65
			exp_continue
66
		}
67
		-re $prompt {
68
		}
69
		timeout {
70
			send_user "\nFAILURE: scontrol not responding\n"
71
			slow_kill $mypid
72
			return 1
73
		}
74
		eof {
75
			wait
76
		}
77
	}
78
79
	if {!$found} {
80
		send_user "\nFAILURE: didn't get expected output from scontrol\n"
81
		set rc 1
82
	}
83
84
	return $rc
85
}
86
87
proc sinfo_test_1 { node proc_cnt total_procs idle_cpus } {
88
	global sinfo number prompt
89
	upvar spawn_id spawn_id
90
91
	set found 0
92
	set rc 0
93
94
	send "$sinfo -o \"%C %A %N\" -h -n $node\r"
95
	# make sure we get by the sinfo command so we don't
96
	# think the %'s are a prompt
97
	expect {
98
		"%C %A %N" {
99
		}
100
	}
101
	expect {
102
		-re "($number)(K?).($number)(K?).($number)(K?).($number)(K?) ($number)(K?).($number)(K?) $node" {
103
			set found 1
104
			set num_alloc $expect_out(1,string)
105
			if {[string compare $expect_out(2,string) ""]} {
106
				set num_alloc [expr $num_alloc * 1024]
107
			}
108
			set num_idle $expect_out(3,string)
109
			if {[string compare $expect_out(4,string) ""]} {
110
				set num_idle [expr $num_idle * 1024]
111
			}
112
			set num_other $expect_out(5,string)
113
			if {[string compare $expect_out(6,string) ""]} {
114
				set num_other [expr $num_other * 1024]
115
			}
116
			set num_total $expect_out(7,string)
117
			if {[string compare $expect_out(8,string) ""]} {
118
				set num_total [expr $num_total * 1024]
119
			}
120
121
			if { $num_alloc != $proc_cnt } {
122
				send_user "\nFAILURE: sinfo 1 allocated cpus wrong, got $num_alloc but needed $proc_cnt\n"
123
				set rc 1
124
			} elseif { $num_idle != $idle_cpus } {
125
				send_user "\nFAILURE: sinfo 1 idle cpus wrong, got $num_idle but needed $idle_cpus\n"
126
				set rc 1
127
			} elseif { $num_total != $total_procs } {
128
				send_user "\nFAILURE: sinfo 1 total cpus wrong, got $num_total but needed $total_procs\n"
129
				set rc 1
130
			}
131
			exp_continue
132
		}
133
		-re $prompt {
134
		}
135
		timeout {
136
			send_user "\nFAILURE: sinfo not responding\n"
137
			slow_kill $mypid
138
			return 1
139
		}
140
		eof {
141
			wait
142
		}
143
	}
144
145
	if {!$found} {
146
		send_user "\nFAILURE: didn't get expected output from sinfo\n"
147
		set rc 1
148
	}
149
150
	return $rc
151
}
152
153
proc sinfo_test_2 { node proc_cnt total_procs } {
154
	global sinfo number prompt node_scaling
155
	upvar spawn_id spawn_id
156
157
	set rc 0
158
	set num_alloc 0
159
	set num_idle 0
160
	set alloc_nodes 1
161
	set total_nodes 1
162
163
	if {$node_scaling} {
164
		set alloc_nodes [expr $proc_cnt / $node_scaling]
165
		set total_nodes [expr $total_procs / $node_scaling]
166
	}
167
	set idle_nodes [expr $total_nodes - $alloc_nodes]
168
169
	send "$sinfo -o \"%t %D %N\" -h -n $node\r"
170
	# make sure we get by the sinfo command so we don't
171
	# think the %'s are a prompt
172
	expect {
173
		"%t %D %N" {
174
		}
175
	}
176
	expect {
177
		-re "alloc ($number)(K?) $node" {
178
			set num_alloc $expect_out(1,string)
179
			if {[string compare $expect_out(2,string) ""]} {
180
				set num_alloc [expr $inode_procs * 1024]
181
			}
182
			exp_continue
183
		}
184
		-re "idle ($number)(K?) $node" {
185
			set num_idle $expect_out(1,string)
186
			if {[string compare $expect_out(2,string) ""]} {
187
				set num_idle [expr $num_idle * 1024]
188
			}
189
			exp_continue
190
		}
191
		-re $prompt {
192
		}
193
		timeout {
194
			send_user "\nFAILURE: sinfo not responding\n"
195
			slow_kill $mypid
196
			return 1
197
		}
198
		eof {
199
			wait
200
		}
201
	}
202
203
	if { $num_alloc != $alloc_nodes } {
204
		send_user "\nFAILURE: sinfo 2 allocated nodes wrong, got $num_alloc but needed $alloc_nodes\n"
205
		set rc 1
206
	} elseif { $num_idle != $idle_nodes } {
207
		send_user "\nFAILURE: sinfo 2 idle nodes wrong, got $num_idle but needed $idle_nodes\n"
208
		set rc 1
209
	}
210
211
	return $rc
212
}
213
214
# allocate a set of nodes (node_cnt) and the quit right after
215
proc allocate_and_quit { node proc_cnt total_procs } {
216
	global salloc scontrol sinfo number alpha_numeric_under
217
	global prompt select_type procs_per_node
218
219
	set job_id 0
220
	set num_alloc 0
221
	set block ""
222
	set rc 0
223
	set timeout 60
224
	set idle_cpus [expr $total_procs - $proc_cnt]
225
226
	set mypid [spawn $salloc -w $node -N1 -n $proc_cnt bash]
227
	expect {
228
		-re "Granted job allocation ($number)" {
229
			set job_id $expect_out(1,string)
230
			exp_continue
231
		}
232
233
		-re $prompt {
234
			# test for scontrol to give me the correct cpu count
235
			if { [scontrol_test $node $proc_cnt] } {
236
				send "exit\r"
237
				return 1
238
			}
239
240
241
			# test for sinfo to give me the correct cpu count
242
			if { [sinfo_test_1 $node $proc_cnt $total_procs $idle_cpus] } {
243
				send "exit\r"
244
				return 1
245
			}
246
			# test for sinfo to give me the correct node count
247
			if { [sinfo_test_2 $node $proc_cnt $total_procs] } {
248
				send "exit\r"
249
				return 1
250
			}
251
			send "exit\r"
252
			exp_continue
253
		}
254
255
		-re "Unable to contact" {
256
			send_user "\nFAILURE: slurm appears to be down\n"
257
			exp_continue
258
		}
259
		timeout {
260
			send_user "\nFAILURE: salloc not responding\n"
261
			if {$job_id != 0} {
262
				cancel_job $job_id
263
			}
264
			slow_kill $mypid
265
			return 1
266
		}
267
		eof {
268
			wait
269
		}
270
	}
271
272
	return $rc
273
}
274
275
############################################################################
276
# test starts here
277
############################################################################
278
279
print_header $test_id
280
281
# find the default partition
1.4.6 by Gennaro Oliva
Import upstream version 2.1.11
282
set def_part [default_partition]
1.3.7 by Gennaro Oliva
Import upstream version 2.1.0
283
284
# find the nodes in the default partition
285
log_user 0
286
set def_hostlist ""
287
set part_exclusive 0
288
spawn $scontrol show part $def_part
289
expect {
290
	-re " Shared=EXCLUSIVE" {
291
		set part_exclusive 1
292
		exp_continue
293
	}
294
	-re " Nodes=($alpha_numeric_nodelist)"  {
295
		set def_hostlist $expect_out(1,string)
296
		exp_continue
297
	}
298
	-re " BasePartitions=($alpha_numeric_nodelist)" {
299
		set def_hostlist $expect_out(1,string)
300
		exp_continue
301
	}
302
	timeout {
303
		send_user "\nFAILURE: scontrol not responding\n"
304
		set exit_code 1
305
	}
306
	eof {
307
		wait
308
	}
309
}
310
set host_cnt 0
311
spawn $scontrol show hostnames $def_hostlist
312
expect {
313
	-re "($alpha_numeric_under)"  {
314
		set host_name($host_cnt) $expect_out(1,string)
315
		incr host_cnt
316
		exp_continue
317
	}
318
	timeout {
319
		send_user "\nFAILURE: scontrol not responding\n"
320
		set exit_code 1
321
	}
322
	eof {
323
		wait
324
	}
325
}
326
log_user 1
327
if {$host_cnt == 0} {
328
	send_user "\nFAILURE: could not find any nodes in default partition\n"
329
	exit 1
330
}
331
332
# find me an idle node in default partition
333
log_user 0
1.4.6 by Gennaro Oliva
Import upstream version 2.1.11
334
set inode_name ""
335
set inode_cores_per_socket 0
1.3.7 by Gennaro Oliva
Import upstream version 2.1.0
336
set inode_procs 0
1.4.6 by Gennaro Oliva
Import upstream version 2.1.11
337
set units ""
338
set inode_sockets 0
339
set inode_threads_per_core 0
340
341
set fd [open "|$scontrol --oneliner show node $def_hostlist"]
342
exp_internal 1
343
while {[gets $fd line] != -1} {
344
	if {[regexp {NodeName=(\w+).*CoresPerSocket=(\d+).*CPUTot=(\d+)(K?).*Sockets=(\d+) State=IDLE ThreadsPerCore=(\d+)} $line frag inode_name inode_cores_per_socket inode_procs units inode_sockets inode_threads_per_core] == 1} {
345
		break
346
	}
347
}
348
exp_internal 0
349
if {[string compare $units ""]} {
350
	set inode_procs [expr $inode_procs * 1024]
351
}
352
catch {close $fd}
353
1.3.7 by Gennaro Oliva
Import upstream version 2.1.0
354
log_user 1
355
356
if {!$inode_procs} {
357
	send_user "\nFAILURE: couldn't find an idle node in the default partition\n"
358
	exit 1
359
}
360
361
send_user "found idle node $inode_name with $inode_procs processors\n"
362
363
# figure out the select plugin we are using
364
set select_type [test_select_type]
365
if {![string compare $select_type "bluegene"]} {
366
	# figure out some things if a bluegene system
367
	set layout [get_bluegene_layout]
368
	if {$layout == 0} {
369
		send_user "\nFAILURE: No layout mode found for this system\n"
370
		exit 1
371
	}
372
	set psets [get_bluegene_psets]
373
374
	if {$psets == 0} {
375
		send_user "\nFAILURE: No psets are set on this system\n"
376
		exit 1
377
	}
378
	set type [get_bluegene_type]
379
380
	if {$type == 0} {
381
		send_user "\nFAILURE: No bluegene type found \n"
382
		exit 1
383
	}
384
385
	if {![string compare $type "P"]} {
386
		if {$psets >= 32} {
387
			set smallest 16
388
		} elseif {$psets >= 16} {
389
			set smallest 32
390
		} elseif {$psets >= 8} {
391
			set smallest 64
392
		} else {
393
			set smallest 128
394
		}
395
	} elseif {![string compare $type "L"]} {
396
		if {$psets >= 16} {
397
			set smallest 32
398
		} else {
399
			set smallest 128
400
		}
401
	} else {
402
		send_user "\nFAILURE: unknown bluegene system type '$type'\n";
403
		exit 1
404
	}
405
	set node_scaling [get_bluegene_procs_per_cnode]
406
	set smallest [expr $smallest * $node_scaling]
407
} elseif {![string compare $select_type "linear"]} {
408
	set smallest $inode_procs
409
} else {
410
	set select_params [test_select_type_params]
411
	if {$part_exclusive == 1} {
412
		set smallest $inode_procs
413
	} elseif {![string compare $select_params "CR_CPU"]} {
414
		set smallest $inode_threads_per_core
415
	} elseif {![string compare $select_params "CR_CPU_MEMORY"]} {
416
		set smallest $inode_threads_per_core
417
	} elseif {![string compare $select_params "NONE"]} {
418
		set smallest $inode_threads_per_core
419
	} elseif {![string compare $select_params "CR_CORE"]} {
420
		set smallest $inode_threads_per_core
421
	} elseif {![string compare $select_params "CR_CORE_MEMORY"]} {
422
		set smallest $inode_threads_per_core
423
	} elseif {![string compare $select_params "CR_SOCKET"]} {
424
		set smallest [expr $inode_cores_per_socket *$inode_threads_per_core]
425
	} elseif {![string compare $select_params "CR_SOCKET_MEMORY"]} {
426
		set smallest [expr $inode_cores_per_socket *$inode_threads_per_core]
427
	} else {
428
		send_user "\nWARNING: failed to parse SelectTypeParameters '$select_params'\n"
429
		set smallest $inode_procs
430
	}
431
}
432
433
set exit_code [allocate_and_quit $inode_name $smallest $inode_procs]
434
if {!$exit_code && $smallest != $inode_procs} {
1.6.4 by Gennaro Oliva
Import upstream version 2.2.0
435
	# just to make sure we get a clean state we will sleep a bit
436
	sleep 1
437
	set exit_code [allocate_and_quit $inode_name $inode_procs $inode_procs]
1.3.7 by Gennaro Oliva
Import upstream version 2.1.0
438
}
439
if {$exit_code == 0} {
440
	send_user "\nSUCCESS\n"
441
} else {
442
	exit $exit_code
443
}