~ubuntu-branches/ubuntu/vivid/slurm-llnl/vivid

« back to all changes in this revision

Viewing changes to src/plugins/topology/tree/topology_tree.c

  • Committer: Bazaar Package Importer
  • Author(s): Gennaro Oliva
  • Date: 2009-09-24 23:28:15 UTC
  • mfrom: (1.1.11 upstream) (3.2.4 sid)
  • Revision ID: james.westby@ubuntu.com-20090924232815-enh65jn32q1ebg07
Tags: 2.0.5-1
* New upstream release 
* Changed dependecy from lib-mysqlclient15 to lib-mysqlclient 
* Added Default-Start for runlevel 2 and 4 and $remote_fs requirement in
  init.d scripts (Closes: #541252)
* Postinst checks for wrong runlevels 2 and 4 links
* Upgraded to standard version 3.8.3
* Add lintian overrides for missing slurm-llnl-configurator.html in doc
  base registration
* modified postrm scripts to ignore pkill return value in order to avoid
  postrm failure when no slurm process is running
* Checking for slurmctld.pid before cancelling running and pending
  jobs during package removal 

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
/*****************************************************************************\
 
2
 *  topology_tree.c - Build configuration information for hierarchical
 
3
 *      switch topology
 
4
 *****************************************************************************
 
5
 *  Copyright (C) 2009 Lawrence Livermore National Security.
 
6
 *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
 
7
 *  Written by Morris Jette <jette1@llnl.gov>
 
8
 *  CODE-OCEC-09-009. All rights reserved.
 
9
 *  
 
10
 *  This file is part of SLURM, a resource management program.
 
11
 *  For details, see <https://computing.llnl.gov/linux/slurm/>.
 
12
 *  Please also read the included file: DISCLAIMER.
 
13
 *  
 
14
 *  SLURM is free software; you can redistribute it and/or modify it under
 
15
 *  the terms of the GNU General Public License as published by the Free
 
16
 *  Software Foundation; either version 2 of the License, or (at your option)
 
17
 *  any later version.
 
18
 *
 
19
 *  In addition, as a special exception, the copyright holders give permission 
 
20
 *  to link the code of portions of this program with the OpenSSL library under 
 
21
 *  certain conditions as described in each individual source file, and 
 
22
 *  distribute linked combinations including the two. You must obey the GNU 
 
23
 *  General Public License in all respects for all of the code used other than 
 
24
 *  OpenSSL. If you modify file(s) with this exception, you may extend this 
 
25
 *  exception to your version of the file(s), but you are not obligated to do 
 
26
 *  so. If you do not wish to do so, delete this exception statement from your
 
27
 *  version.  If you delete this exception statement from all source files in 
 
28
 *  the program, then also delete it here.
 
29
 *  
 
30
 *  SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
 
31
 *  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 
32
 *  FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
 
33
 *  details.
 
34
 *  
 
35
 *  You should have received a copy of the GNU General Public License along
 
36
 *  with SLURM; if not, write to the Free Software Foundation, Inc.,
 
37
 *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA.
 
38
\*****************************************************************************/
 
39
 
 
40
#if     HAVE_CONFIG_H
 
41
#  include "config.h"
 
42
#endif
 
43
 
 
44
#include <signal.h>
 
45
#include <stdlib.h>
 
46
#include <sys/types.h>
 
47
 
 
48
#include <slurm/slurm_errno.h>
 
49
#include "src/common/slurm_xlator.h"
 
50
#include "src/slurmctld/slurmctld.h"
 
51
 
 
52
/*
 
53
 * These variables are required by the generic plugin interface.  If they
 
54
 * are not found in the plugin, the plugin loader will ignore it.
 
55
 *
 
56
 * plugin_name - a string giving a human-readable description of the
 
57
 * plugin.  There is no maximum length, but the symbol must refer to
 
58
 * a valid string.
 
59
 *
 
60
 * plugin_type - a string suggesting the type of the plugin or its
 
61
 * applicability to a particular form of data or method of data handling.
 
62
 * If the low-level plugin API is used, the contents of this string are
 
63
 * unimportant and may be anything.  SLURM uses the higher-level plugin
 
64
 * interface which requires this string to be of the form
 
65
 *
 
66
 *      <application>/<method>
 
67
 *
 
68
 * where <application> is a description of the intended application of
 
69
 * the plugin (e.g., "task" for task control) and <method> is a description 
 
70
 * of how this plugin satisfies that application.  SLURM will only load
 
71
 * a task plugin if the plugin_type string has a prefix of "task/".
 
72
 *
 
73
 * plugin_version - an unsigned 32-bit integer giving the version number
 
74
 * of the plugin.  If major and minor revisions are desired, the major
 
75
 * version number may be multiplied by a suitable magnitude constant such
 
76
 * as 100 or 1000.  Various SLURM versions will likely require a certain
 
77
 * minimum versions for their plugins as this API matures.
 
78
 */
 
79
const char plugin_name[]        = "topology tree plugin";
 
80
const char plugin_type[]        = "topology/tree";
 
81
const uint32_t plugin_version   = 100;
 
82
 
 
83
typedef struct slurm_conf_switches {
 
84
        uint32_t link_speed;    /* link speed, arbitrary units */
 
85
        char *nodes;            /* names of nodes directly connect to
 
86
                                 * this switch, if any */
 
87
        char *switch_name;      /* name of this switch */
 
88
        char *switches;         /* names if child switches directly
 
89
                                 * connected to this switch, if any */
 
90
} slurm_conf_switches_t;
 
91
static s_p_hashtbl_t *conf_hashtbl = NULL;
 
92
static char* topo_conf = NULL;
 
93
 
 
94
static void _destroy_switches(void *ptr);
 
95
static void _free_switch_record_table(void);
 
96
static int  _get_switch_inx(const char *name);
 
97
static char *_get_topo_conf(void);
 
98
static void _log_switches(void);
 
99
static int  _parse_switches(void **dest, slurm_parser_enum_t type,
 
100
                            const char *key, const char *value,
 
101
                            const char *line, char **leftover);
 
102
extern int  _read_topo_file(slurm_conf_switches_t **ptr_array[]);
 
103
static void _validate_switches(void);
 
104
 
 
105
 
 
106
/*
 
107
 * init() is called when the plugin is loaded, before any other functions
 
108
 *      are called.  Put global initialization here.
 
109
 */
 
110
extern int init(void)
 
111
{
 
112
        verbose("%s loaded", plugin_name);
 
113
        return SLURM_SUCCESS;
 
114
}
 
115
 
 
116
/*
 
117
 * fini() is called when the plugin is removed. Clear any allocated 
 
118
 *      storage here.
 
119
 */
 
120
extern int fini(void)
 
121
{
 
122
        _free_switch_record_table();
 
123
        xfree(topo_conf);
 
124
        return SLURM_SUCCESS;
 
125
}
 
126
 
 
127
/*
 
128
 * topo_build_config - build or rebuild system topology information
 
129
 *      after a system startup or reconfiguration.
 
130
 */
 
131
extern int topo_build_config(void)
 
132
{
 
133
        _validate_switches();
 
134
        return SLURM_SUCCESS;
 
135
}
 
136
 
 
137
static void _validate_switches(void)
 
138
{
 
139
        slurm_conf_switches_t *ptr, **ptr_array;
 
140
        int depth, i, j;
 
141
        struct switch_record *switch_ptr;
 
142
        hostlist_t hl;
 
143
        char *child;
 
144
        bitstr_t *multi_homed_bitmap = NULL;    /* nodes on >1 leaf switch */
 
145
        bitstr_t *switches_bitmap = NULL;       /* nodes on any leaf switch */
 
146
        bitstr_t *tmp_bitmap = NULL;
 
147
 
 
148
        _free_switch_record_table();
 
149
 
 
150
        switch_record_cnt = _read_topo_file(&ptr_array);
 
151
        if (switch_record_cnt == 0) {
 
152
                error("No switches configured");
 
153
                s_p_hashtbl_destroy(conf_hashtbl);
 
154
                return;
 
155
        }
 
156
 
 
157
        switch_record_table = xmalloc(sizeof(struct switch_record) * 
 
158
                                      switch_record_cnt);
 
159
        multi_homed_bitmap = bit_alloc(node_record_count);
 
160
        switch_ptr = switch_record_table;
 
161
        for (i=0; i<switch_record_cnt; i++, switch_ptr++) {
 
162
                ptr = ptr_array[i];
 
163
                switch_ptr->name = xstrdup(ptr->switch_name);
 
164
                switch_ptr->link_speed = ptr->link_speed;
 
165
                if (ptr->nodes) {
 
166
                        switch_ptr->level = 0;  /* leaf switch */
 
167
                        switch_ptr->nodes = xstrdup(ptr->nodes);
 
168
                        if (node_name2bitmap(ptr->nodes, true, 
 
169
                                             &switch_ptr->node_bitmap)) {
 
170
                                fatal("Invalid node name (%s) in switch "
 
171
                                      "config (%s)", 
 
172
                                      ptr->nodes, ptr->switch_name);
 
173
                        }
 
174
                        if (switches_bitmap) {
 
175
                                tmp_bitmap = bit_copy(switch_ptr->node_bitmap);
 
176
                                bit_and(tmp_bitmap, switches_bitmap);
 
177
                                bit_or(multi_homed_bitmap, tmp_bitmap);
 
178
                                bit_free(tmp_bitmap);
 
179
                                bit_or(switches_bitmap, 
 
180
                                       switch_ptr->node_bitmap);
 
181
                        } else {
 
182
                                switches_bitmap = bit_copy(switch_ptr->
 
183
                                                           node_bitmap);
 
184
                        }
 
185
                } else if (ptr->switches) {
 
186
                        switch_ptr->level = -1; /* determine later */
 
187
                        switch_ptr->switches = xstrdup(ptr->switches);
 
188
                } else {
 
189
                        fatal("Switch configuration (%s) lacks children",
 
190
                              ptr->switch_name);
 
191
                }
 
192
        }
 
193
 
 
194
        for (depth=1; ; depth++) {
 
195
                bool resolved = true;
 
196
                switch_ptr = switch_record_table;
 
197
                for (i=0; i<switch_record_cnt; i++, switch_ptr++) {
 
198
                        if (switch_ptr->level != -1)
 
199
                                continue;
 
200
                        hl = hostlist_create(switch_ptr->switches);
 
201
                        if (!hl)
 
202
                                fatal("hostlist_create: malloc failure");
 
203
                        while ((child = hostlist_pop(hl))) {
 
204
                                j = _get_switch_inx(child);
 
205
                                if ((j < 0) || (j == i)) {
 
206
                                        fatal("Switch configuration %s has "
 
207
                                              "invalid child (%s)",
 
208
                                              switch_ptr->name, child);
 
209
                                }
 
210
                                if (switch_record_table[j].level == -1) {
 
211
                                        /* Children not resolved */
 
212
                                        resolved = false;
 
213
                                        switch_ptr->level = -1;
 
214
                                        FREE_NULL_BITMAP(switch_ptr->
 
215
                                                         node_bitmap);
 
216
                                        free(child);
 
217
                                        break;
 
218
                                }
 
219
                                if (switch_ptr->level == -1) {
 
220
                                        switch_ptr->level = 1 +
 
221
                                                switch_record_table[j].level;
 
222
                                        switch_ptr->node_bitmap = 
 
223
                                                bit_copy(switch_record_table[j].
 
224
                                                         node_bitmap);
 
225
                                } else {
 
226
                                        switch_ptr->level = 
 
227
                                                MAX(switch_ptr->level,
 
228
                                                     (switch_record_table[j].
 
229
                                                      level + 1));
 
230
                                        bit_or(switch_ptr->node_bitmap,
 
231
                                               switch_record_table[j].
 
232
                                               node_bitmap);
 
233
                                }
 
234
                                free(child);
 
235
                        }
 
236
                        hostlist_destroy(hl);
 
237
                }
 
238
                if (resolved)
 
239
                        break;
 
240
        }
 
241
 
 
242
        switch_ptr = switch_record_table;
 
243
        for (i=0; i<switch_record_cnt; i++, switch_ptr++) {
 
244
                if (switch_ptr->node_bitmap == NULL)
 
245
                        error("switch %s has no nodes", switch_ptr->name);
 
246
        }
 
247
        if (switches_bitmap) {
 
248
                bit_not(switches_bitmap);
 
249
                i = bit_set_count(switches_bitmap);
 
250
                if (i > 0) {
 
251
                        child = bitmap2node_name(switches_bitmap);
 
252
                        error("WARNING: switches lack access to %d nodes: %s", 
 
253
                              i, child);
 
254
                        xfree(child);
 
255
                }
 
256
                bit_free(switches_bitmap);
 
257
        } else
 
258
                fatal("switches contain no nodes");
 
259
 
 
260
        /* Report nodes on multiple leaf switches, 
 
261
         * possibly due to bad configuration file */
 
262
        i = bit_set_count(multi_homed_bitmap);
 
263
        if (i > 0) {
 
264
                child = bitmap2node_name(multi_homed_bitmap);
 
265
                error("WARNING: Multiple leaf switches contain nodes: %s", 
 
266
                      child);
 
267
                xfree(child);
 
268
        }
 
269
        bit_free(multi_homed_bitmap);
 
270
 
 
271
        s_p_hashtbl_destroy(conf_hashtbl);
 
272
        _log_switches();
 
273
}
 
274
 
 
275
static void _log_switches(void)
 
276
{
 
277
        int i;
 
278
        struct switch_record *switch_ptr;
 
279
 
 
280
        switch_ptr = switch_record_table;
 
281
        for (i=0; i<switch_record_cnt; i++, switch_ptr++) {
 
282
                if (!switch_ptr->nodes) {
 
283
                        switch_ptr->nodes = bitmap2node_name(switch_ptr->
 
284
                                                             node_bitmap);
 
285
                }
 
286
                debug("Switch level:%d name:%s nodes:%s switches:%s",
 
287
                      switch_ptr->level, switch_ptr->name,
 
288
                      switch_ptr->nodes, switch_ptr->switches);
 
289
        }
 
290
}
 
291
 
 
292
/* Return the index of a given switch name or -1 if not found */
 
293
static int _get_switch_inx(const char *name)
 
294
{
 
295
        int i;
 
296
        struct switch_record *switch_ptr;
 
297
 
 
298
        switch_ptr = switch_record_table;
 
299
        for (i=0; i<switch_record_cnt; i++, switch_ptr++) {
 
300
                if (strcmp(switch_ptr->name, name) == 0)
 
301
                        return i;
 
302
        }
 
303
 
 
304
        return -1;
 
305
}
 
306
 
 
307
/* Free all memory associated with switch_record_table structure */
 
308
static void _free_switch_record_table(void)
 
309
{
 
310
        int i;
 
311
 
 
312
        if (switch_record_table) {
 
313
                for (i=0; i<switch_record_cnt; i++) {
 
314
                        xfree(switch_record_table[i].name);
 
315
                        xfree(switch_record_table[i].nodes);
 
316
                        xfree(switch_record_table[i].switches);
 
317
                        FREE_NULL_BITMAP(switch_record_table[i].node_bitmap);
 
318
                }
 
319
                xfree(switch_record_table);
 
320
                switch_record_cnt = 0;
 
321
        }
 
322
}
 
323
 
 
324
static char *_get_topo_conf(void)
 
325
{
 
326
        char *val = getenv("SLURM_CONF");
 
327
        char *rc;
 
328
        int i;
 
329
 
 
330
        if (!val)
 
331
                return xstrdup(TOPOLOGY_CONFIG_FILE);
 
332
 
 
333
        /* Replace file name on end of path */
 
334
        i = strlen(val) - strlen("slurm.conf") + strlen("topology.conf") + 1;
 
335
        rc = xmalloc(i);
 
336
        strcpy(rc, val);
 
337
        val = strrchr(rc, (int)'/');
 
338
        if (val)        /* absolute path */
 
339
                val++;
 
340
        else            /* not absolute path */
 
341
                val = rc;
 
342
        strcpy(val, "topology.conf");
 
343
        return rc;
 
344
}
 
345
 
 
346
/* Return count of switch configuration entries read */
 
347
extern int  _read_topo_file(slurm_conf_switches_t **ptr_array[])
 
348
{
 
349
        static s_p_options_t switch_options[] = {
 
350
                {"SwitchName", S_P_ARRAY, _parse_switches, _destroy_switches},
 
351
                {NULL}
 
352
        };
 
353
        int count;
 
354
        slurm_conf_switches_t **ptr;
 
355
 
 
356
        debug("Reading the topology.conf file");
 
357
        if (!topo_conf)
 
358
                topo_conf = _get_topo_conf();
 
359
 
 
360
        conf_hashtbl = s_p_hashtbl_create(switch_options);
 
361
        if(s_p_parse_file(conf_hashtbl, topo_conf) == SLURM_ERROR)
 
362
                fatal("something wrong with opening/reading %s: %m", topo_conf);
 
363
 
 
364
        if (s_p_get_array((void ***)&ptr, &count, "SwitchName", conf_hashtbl)) {
 
365
                *ptr_array = ptr;
 
366
        } else {
 
367
                *ptr_array = NULL;
 
368
                count = 0;
 
369
        }
 
370
        return count;
 
371
}
 
372
 
 
373
static int  _parse_switches(void **dest, slurm_parser_enum_t type,
 
374
                            const char *key, const char *value,
 
375
                            const char *line, char **leftover)
 
376
{
 
377
        s_p_hashtbl_t *tbl;
 
378
        slurm_conf_switches_t *s;
 
379
        static s_p_options_t _switch_options[] = {
 
380
                {"LinkSpeed", S_P_UINT32},
 
381
                {"Nodes", S_P_STRING},
 
382
                {"Switches", S_P_STRING},
 
383
                {NULL}
 
384
        };
 
385
 
 
386
        tbl = s_p_hashtbl_create(_switch_options);
 
387
        s_p_parse_line(tbl, *leftover, leftover);
 
388
 
 
389
        s = xmalloc(sizeof(slurm_conf_switches_t));
 
390
        s->switch_name = xstrdup(value);
 
391
        if (!s_p_get_uint32(&s->link_speed, "LinkSpeed", tbl))
 
392
                s->link_speed = 1;
 
393
        s_p_get_string(&s->nodes, "Nodes", tbl);
 
394
        s_p_get_string(&s->switches, "Switches", tbl);
 
395
        s_p_hashtbl_destroy(tbl);
 
396
 
 
397
        if (s->nodes && s->switches) {
 
398
                error("switch %s has both child switches and nodes",
 
399
                      s->switch_name);
 
400
                _destroy_switches(s);
 
401
                return -1;
 
402
        }
 
403
        if (!s->nodes && !s->switches) {
 
404
                error("switch %s has neither child switches nor nodes",
 
405
                      s->switch_name);
 
406
                _destroy_switches(s);
 
407
                return -1;
 
408
        }
 
409
 
 
410
        *dest = (void *)s;
 
411
 
 
412
        return 1;
 
413
}
 
414
 
 
415
static void _destroy_switches(void *ptr)
 
416
{
 
417
        slurm_conf_switches_t *s = (slurm_conf_switches_t *)ptr;
 
418
        xfree(s->nodes);
 
419
        xfree(s->switch_name);
 
420
        xfree(s->switches);
 
421
        xfree(ptr);
 
422
}
 
423