1
/*****************************************************************************\
2
* topology_tree.c - Build configuration information for hierarchical
4
*****************************************************************************
5
* Copyright (C) 2009 Lawrence Livermore National Security.
6
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
7
* Written by Morris Jette <jette1@llnl.gov>
8
* CODE-OCEC-09-009. All rights reserved.
10
* This file is part of SLURM, a resource management program.
11
* For details, see <https://computing.llnl.gov/linux/slurm/>.
12
* Please also read the included file: DISCLAIMER.
14
* SLURM is free software; you can redistribute it and/or modify it under
15
* the terms of the GNU General Public License as published by the Free
16
* Software Foundation; either version 2 of the License, or (at your option)
19
* In addition, as a special exception, the copyright holders give permission
20
* to link the code of portions of this program with the OpenSSL library under
21
* certain conditions as described in each individual source file, and
22
* distribute linked combinations including the two. You must obey the GNU
23
* General Public License in all respects for all of the code used other than
24
* OpenSSL. If you modify file(s) with this exception, you may extend this
25
* exception to your version of the file(s), but you are not obligated to do
26
* so. If you do not wish to do so, delete this exception statement from your
27
* version. If you delete this exception statement from all source files in
28
* the program, then also delete it here.
30
* SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
31
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
32
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
35
* You should have received a copy of the GNU General Public License along
36
* with SLURM; if not, write to the Free Software Foundation, Inc.,
37
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
38
\*****************************************************************************/
46
#include <sys/types.h>
48
#include <slurm/slurm_errno.h>
49
#include "src/common/slurm_xlator.h"
50
#include "src/slurmctld/slurmctld.h"
53
* These variables are required by the generic plugin interface. If they
54
* are not found in the plugin, the plugin loader will ignore it.
56
* plugin_name - a string giving a human-readable description of the
57
* plugin. There is no maximum length, but the symbol must refer to
60
* plugin_type - a string suggesting the type of the plugin or its
61
* applicability to a particular form of data or method of data handling.
62
* If the low-level plugin API is used, the contents of this string are
63
* unimportant and may be anything. SLURM uses the higher-level plugin
64
* interface which requires this string to be of the form
66
* <application>/<method>
68
* where <application> is a description of the intended application of
69
* the plugin (e.g., "task" for task control) and <method> is a description
70
* of how this plugin satisfies that application. SLURM will only load
71
* a task plugin if the plugin_type string has a prefix of "task/".
73
* plugin_version - an unsigned 32-bit integer giving the version number
74
* of the plugin. If major and minor revisions are desired, the major
75
* version number may be multiplied by a suitable magnitude constant such
76
* as 100 or 1000. Various SLURM versions will likely require a certain
77
* minimum versions for their plugins as this API matures.
79
const char plugin_name[] = "topology tree plugin";
80
const char plugin_type[] = "topology/tree";
81
const uint32_t plugin_version = 100;
83
typedef struct slurm_conf_switches {
84
uint32_t link_speed; /* link speed, arbitrary units */
85
char *nodes; /* names of nodes directly connect to
86
* this switch, if any */
87
char *switch_name; /* name of this switch */
88
char *switches; /* names if child switches directly
89
* connected to this switch, if any */
90
} slurm_conf_switches_t;
91
static s_p_hashtbl_t *conf_hashtbl = NULL;
92
static char* topo_conf = NULL;
94
static void _destroy_switches(void *ptr);
95
static void _free_switch_record_table(void);
96
static int _get_switch_inx(const char *name);
97
static char *_get_topo_conf(void);
98
static void _log_switches(void);
99
static int _parse_switches(void **dest, slurm_parser_enum_t type,
100
const char *key, const char *value,
101
const char *line, char **leftover);
102
extern int _read_topo_file(slurm_conf_switches_t **ptr_array[]);
103
static void _validate_switches(void);
107
* init() is called when the plugin is loaded, before any other functions
108
* are called. Put global initialization here.
110
extern int init(void)
112
verbose("%s loaded", plugin_name);
113
return SLURM_SUCCESS;
117
* fini() is called when the plugin is removed. Clear any allocated
120
extern int fini(void)
122
_free_switch_record_table();
124
return SLURM_SUCCESS;
128
* topo_build_config - build or rebuild system topology information
129
* after a system startup or reconfiguration.
131
extern int topo_build_config(void)
133
_validate_switches();
134
return SLURM_SUCCESS;
137
static void _validate_switches(void)
139
slurm_conf_switches_t *ptr, **ptr_array;
141
struct switch_record *switch_ptr;
144
bitstr_t *multi_homed_bitmap = NULL; /* nodes on >1 leaf switch */
145
bitstr_t *switches_bitmap = NULL; /* nodes on any leaf switch */
146
bitstr_t *tmp_bitmap = NULL;
148
_free_switch_record_table();
150
switch_record_cnt = _read_topo_file(&ptr_array);
151
if (switch_record_cnt == 0) {
152
error("No switches configured");
153
s_p_hashtbl_destroy(conf_hashtbl);
157
switch_record_table = xmalloc(sizeof(struct switch_record) *
159
multi_homed_bitmap = bit_alloc(node_record_count);
160
switch_ptr = switch_record_table;
161
for (i=0; i<switch_record_cnt; i++, switch_ptr++) {
163
switch_ptr->name = xstrdup(ptr->switch_name);
164
switch_ptr->link_speed = ptr->link_speed;
166
switch_ptr->level = 0; /* leaf switch */
167
switch_ptr->nodes = xstrdup(ptr->nodes);
168
if (node_name2bitmap(ptr->nodes, true,
169
&switch_ptr->node_bitmap)) {
170
fatal("Invalid node name (%s) in switch "
172
ptr->nodes, ptr->switch_name);
174
if (switches_bitmap) {
175
tmp_bitmap = bit_copy(switch_ptr->node_bitmap);
176
bit_and(tmp_bitmap, switches_bitmap);
177
bit_or(multi_homed_bitmap, tmp_bitmap);
178
bit_free(tmp_bitmap);
179
bit_or(switches_bitmap,
180
switch_ptr->node_bitmap);
182
switches_bitmap = bit_copy(switch_ptr->
185
} else if (ptr->switches) {
186
switch_ptr->level = -1; /* determine later */
187
switch_ptr->switches = xstrdup(ptr->switches);
189
fatal("Switch configuration (%s) lacks children",
194
for (depth=1; ; depth++) {
195
bool resolved = true;
196
switch_ptr = switch_record_table;
197
for (i=0; i<switch_record_cnt; i++, switch_ptr++) {
198
if (switch_ptr->level != -1)
200
hl = hostlist_create(switch_ptr->switches);
202
fatal("hostlist_create: malloc failure");
203
while ((child = hostlist_pop(hl))) {
204
j = _get_switch_inx(child);
205
if ((j < 0) || (j == i)) {
206
fatal("Switch configuration %s has "
207
"invalid child (%s)",
208
switch_ptr->name, child);
210
if (switch_record_table[j].level == -1) {
211
/* Children not resolved */
213
switch_ptr->level = -1;
214
FREE_NULL_BITMAP(switch_ptr->
219
if (switch_ptr->level == -1) {
220
switch_ptr->level = 1 +
221
switch_record_table[j].level;
222
switch_ptr->node_bitmap =
223
bit_copy(switch_record_table[j].
227
MAX(switch_ptr->level,
228
(switch_record_table[j].
230
bit_or(switch_ptr->node_bitmap,
231
switch_record_table[j].
236
hostlist_destroy(hl);
242
switch_ptr = switch_record_table;
243
for (i=0; i<switch_record_cnt; i++, switch_ptr++) {
244
if (switch_ptr->node_bitmap == NULL)
245
error("switch %s has no nodes", switch_ptr->name);
247
if (switches_bitmap) {
248
bit_not(switches_bitmap);
249
i = bit_set_count(switches_bitmap);
251
child = bitmap2node_name(switches_bitmap);
252
error("WARNING: switches lack access to %d nodes: %s",
256
bit_free(switches_bitmap);
258
fatal("switches contain no nodes");
260
/* Report nodes on multiple leaf switches,
261
* possibly due to bad configuration file */
262
i = bit_set_count(multi_homed_bitmap);
264
child = bitmap2node_name(multi_homed_bitmap);
265
error("WARNING: Multiple leaf switches contain nodes: %s",
269
bit_free(multi_homed_bitmap);
271
s_p_hashtbl_destroy(conf_hashtbl);
275
static void _log_switches(void)
278
struct switch_record *switch_ptr;
280
switch_ptr = switch_record_table;
281
for (i=0; i<switch_record_cnt; i++, switch_ptr++) {
282
if (!switch_ptr->nodes) {
283
switch_ptr->nodes = bitmap2node_name(switch_ptr->
286
debug("Switch level:%d name:%s nodes:%s switches:%s",
287
switch_ptr->level, switch_ptr->name,
288
switch_ptr->nodes, switch_ptr->switches);
292
/* Return the index of a given switch name or -1 if not found */
293
static int _get_switch_inx(const char *name)
296
struct switch_record *switch_ptr;
298
switch_ptr = switch_record_table;
299
for (i=0; i<switch_record_cnt; i++, switch_ptr++) {
300
if (strcmp(switch_ptr->name, name) == 0)
307
/* Free all memory associated with switch_record_table structure */
308
static void _free_switch_record_table(void)
312
if (switch_record_table) {
313
for (i=0; i<switch_record_cnt; i++) {
314
xfree(switch_record_table[i].name);
315
xfree(switch_record_table[i].nodes);
316
xfree(switch_record_table[i].switches);
317
FREE_NULL_BITMAP(switch_record_table[i].node_bitmap);
319
xfree(switch_record_table);
320
switch_record_cnt = 0;
324
static char *_get_topo_conf(void)
326
char *val = getenv("SLURM_CONF");
331
return xstrdup(TOPOLOGY_CONFIG_FILE);
333
/* Replace file name on end of path */
334
i = strlen(val) - strlen("slurm.conf") + strlen("topology.conf") + 1;
337
val = strrchr(rc, (int)'/');
338
if (val) /* absolute path */
340
else /* not absolute path */
342
strcpy(val, "topology.conf");
346
/* Return count of switch configuration entries read */
347
extern int _read_topo_file(slurm_conf_switches_t **ptr_array[])
349
static s_p_options_t switch_options[] = {
350
{"SwitchName", S_P_ARRAY, _parse_switches, _destroy_switches},
354
slurm_conf_switches_t **ptr;
356
debug("Reading the topology.conf file");
358
topo_conf = _get_topo_conf();
360
conf_hashtbl = s_p_hashtbl_create(switch_options);
361
if(s_p_parse_file(conf_hashtbl, topo_conf) == SLURM_ERROR)
362
fatal("something wrong with opening/reading %s: %m", topo_conf);
364
if (s_p_get_array((void ***)&ptr, &count, "SwitchName", conf_hashtbl)) {
373
static int _parse_switches(void **dest, slurm_parser_enum_t type,
374
const char *key, const char *value,
375
const char *line, char **leftover)
378
slurm_conf_switches_t *s;
379
static s_p_options_t _switch_options[] = {
380
{"LinkSpeed", S_P_UINT32},
381
{"Nodes", S_P_STRING},
382
{"Switches", S_P_STRING},
386
tbl = s_p_hashtbl_create(_switch_options);
387
s_p_parse_line(tbl, *leftover, leftover);
389
s = xmalloc(sizeof(slurm_conf_switches_t));
390
s->switch_name = xstrdup(value);
391
if (!s_p_get_uint32(&s->link_speed, "LinkSpeed", tbl))
393
s_p_get_string(&s->nodes, "Nodes", tbl);
394
s_p_get_string(&s->switches, "Switches", tbl);
395
s_p_hashtbl_destroy(tbl);
397
if (s->nodes && s->switches) {
398
error("switch %s has both child switches and nodes",
400
_destroy_switches(s);
403
if (!s->nodes && !s->switches) {
404
error("switch %s has neither child switches nor nodes",
406
_destroy_switches(s);
415
static void _destroy_switches(void *ptr)
417
slurm_conf_switches_t *s = (slurm_conf_switches_t *)ptr;
419
xfree(s->switch_name);