~ubuntu-branches/ubuntu/vivid/slurm-llnl/vivid

« back to all changes in this revision

Viewing changes to src/common/select_job_res.c

  • Committer: Bazaar Package Importer
  • Author(s): Gennaro Oliva
  • Date: 2009-09-24 23:28:15 UTC
  • mfrom: (1.1.11 upstream) (3.2.4 sid)
  • Revision ID: james.westby@ubuntu.com-20090924232815-enh65jn32q1ebg07
Tags: 2.0.5-1
* New upstream release 
* Changed dependecy from lib-mysqlclient15 to lib-mysqlclient 
* Added Default-Start for runlevel 2 and 4 and $remote_fs requirement in
  init.d scripts (Closes: #541252)
* Postinst checks for wrong runlevels 2 and 4 links
* Upgraded to standard version 3.8.3
* Add lintian overrides for missing slurm-llnl-configurator.html in doc
  base registration
* modified postrm scripts to ignore pkill return value in order to avoid
  postrm failure when no slurm process is running
* Checking for slurmctld.pid before cancelling running and pending
  jobs during package removal 

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
/*****************************************************************************\
 
2
 *  select_job_res.c - functions to manage data structure identifying specific
 
3
 *      CPUs allocated to a job, step or partition
 
4
 *****************************************************************************
 
5
 *  Copyright (C) 2008 Lawrence Livermore National Security.
 
6
 *  Written by Morris Jette <jette1@llnl.gov>.
 
7
 *  CODE-OCEC-09-009. All rights reserved.
 
8
 *  
 
9
 *  This file is part of SLURM, a resource management program.
 
10
 *  For details, see <https://computing.llnl.gov/linux/slurm/>.
 
11
 *  Please also read the included file: DISCLAIMER.
 
12
 *  
 
13
 *  SLURM is free software; you can redistribute it and/or modify it under
 
14
 *  the terms of the GNU General Public License as published by the Free
 
15
 *  Software Foundation; either version 2 of the License, or (at your option)
 
16
 *  any later version.
 
17
 *  
 
18
 *  In addition, as a special exception, the copyright holders give permission 
 
19
 *  to link the code of portions of this program with the OpenSSL library under
 
20
 *  certain conditions as described in each individual source file, and 
 
21
 *  distribute linked combinations including the two. You must obey the GNU 
 
22
 *  General Public License in all respects for all of the code used other than 
 
23
 *  OpenSSL. If you modify file(s) with this exception, you may extend this 
 
24
 *  exception to your version of the file(s), but you are not obligated to do 
 
25
 *  so. If you do not wish to do so, delete this exception statement from your
 
26
 *  version.  If you delete this exception statement from all source files in 
 
27
 *  the program, then also delete it here.
 
28
 *  
 
29
 *  SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
 
30
 *  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 
31
 *  FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
 
32
 *  details.
 
33
 *  
 
34
 *  You should have received a copy of the GNU General Public License along
 
35
 *  with SLURM; if not, write to the Free Software Foundation, Inc.,
 
36
 *  59 Temple Place, Suite 330, Boston, MA  02111-1307  USA.
 
37
\*****************************************************************************/
 
38
 
 
39
#include <stdlib.h>
 
40
#include <string.h>
 
41
#include <slurm/slurm_errno.h>
 
42
 
 
43
#include "src/common/hostlist.h"
 
44
#include "src/common/log.h"
 
45
#include "src/common/select_job_res.h"
 
46
#include "src/common/xmalloc.h"
 
47
#include "src/common/xassert.h"
 
48
#include "src/slurmctld/slurmctld.h"
 
49
 
 
50
 
 
51
/* Create an empty select_job_res data structure */
 
52
extern select_job_res_t create_select_job_res(void)
 
53
{
 
54
        select_job_res_t select_job_res;
 
55
 
 
56
        select_job_res = xmalloc(sizeof(struct select_job_res));
 
57
        return select_job_res;
 
58
}
 
59
 
 
60
/* Set the socket and core counts associated with a set of selected
 
61
 * nodes of a select_job_res data structure based upon slurmctld state.
 
62
 * (sets cores_per_socket, sockets_per_node, and sock_core_rep_count based
 
63
 * upon the value of node_bitmap, also creates core_bitmap based upon
 
64
 * the total number of cores in the allocation). Call this ONLY from 
 
65
 * slurmctld. Example of use:
 
66
 *
 
67
 * select_job_res_t select_job_res_ptr = create_select_job_res();
 
68
 * node_name2bitmap("dummy[2,5,12,16]", true, &(select_res_ptr->node_bitmap));
 
69
 * rc = build_select_job_res(select_job_res_ptr, node_record_table_ptr,
 
70
 *                           slurmctld_conf.fast_schedule);
 
71
 */
 
72
extern int build_select_job_res(select_job_res_t select_job_res,
 
73
                                void *node_rec_table,
 
74
                                uint16_t fast_schedule)
 
75
{
 
76
        int i, bitmap_len;
 
77
        int core_cnt = 0, sock_inx = -1;
 
78
        uint32_t cores, socks;
 
79
        struct node_record *node_ptr, *node_record_table;
 
80
 
 
81
        if (select_job_res->node_bitmap == NULL) {
 
82
                error("build_select_job_res: node_bitmap is NULL");
 
83
                return SLURM_ERROR;
 
84
        }
 
85
 
 
86
        node_record_table = (struct node_record *) node_rec_table;
 
87
        xfree(select_job_res->sockets_per_node);
 
88
        xfree(select_job_res->cores_per_socket);
 
89
        xfree(select_job_res->sock_core_rep_count);
 
90
        select_job_res->sockets_per_node = xmalloc(sizeof(uint16_t) * 
 
91
                                                   select_job_res->nhosts);
 
92
        select_job_res->cores_per_socket = xmalloc(sizeof(uint16_t) * 
 
93
                                                   select_job_res->nhosts);
 
94
        select_job_res->sock_core_rep_count = xmalloc(sizeof(uint32_t) * 
 
95
                                                      select_job_res->nhosts);
 
96
 
 
97
        bitmap_len = bit_size(select_job_res->node_bitmap);
 
98
        for (i=0; i<bitmap_len; i++) {
 
99
                if (!bit_test(select_job_res->node_bitmap, i))
 
100
                        continue;
 
101
                node_ptr = node_record_table + i;
 
102
                if (fast_schedule) {
 
103
                        socks = node_ptr->config_ptr->sockets;
 
104
                        cores = node_ptr->config_ptr->cores;
 
105
                } else {
 
106
                        socks = node_ptr->sockets;
 
107
                        cores = node_ptr->cores;
 
108
                }
 
109
                if ((sock_inx < 0) ||
 
110
                    (socks != select_job_res->sockets_per_node[sock_inx]) ||
 
111
                    (cores != select_job_res->cores_per_socket[sock_inx])) {
 
112
                        sock_inx++;
 
113
                        select_job_res->sockets_per_node[sock_inx] = socks;
 
114
                        select_job_res->cores_per_socket[sock_inx] = cores;
 
115
                }
 
116
                select_job_res->sock_core_rep_count[sock_inx]++;
 
117
                core_cnt += (cores * socks);
 
118
        }
 
119
        select_job_res->core_bitmap      = bit_alloc(core_cnt);
 
120
        select_job_res->core_bitmap_used = bit_alloc(core_cnt);
 
121
        if ((select_job_res->core_bitmap == NULL) ||
 
122
            (select_job_res->core_bitmap_used == NULL))
 
123
                fatal("bit_alloc malloc failure");
 
124
        return SLURM_SUCCESS;
 
125
}
 
126
 
 
127
/* Rebuild cpu_array_cnt, cpu_array_value, and cpu_array_reps based upon the
 
128
 * values of nhosts and cpus in an existing data structure
 
129
 * Return total CPU count or -1 on error */
 
130
extern int build_select_job_res_cpu_array(select_job_res_t select_job_res_ptr)
 
131
{
 
132
        int cpu_count = 0, i;
 
133
        uint32_t last_cpu_cnt = 0;
 
134
 
 
135
        if (select_job_res_ptr->nhosts == 0)
 
136
                return cpu_count;       /* no work to do */
 
137
        if (select_job_res_ptr->cpus == NULL) {
 
138
                error("build_select_job_res_cpu_array: cpus==NULL");
 
139
                return -1;
 
140
        }
 
141
 
 
142
        /* clear vestigial data and create new arrays of max size */
 
143
        select_job_res_ptr->cpu_array_cnt = 0;
 
144
        xfree(select_job_res_ptr->cpu_array_reps);
 
145
        select_job_res_ptr->cpu_array_reps = 
 
146
                xmalloc(select_job_res_ptr->nhosts * sizeof(uint32_t));
 
147
        xfree(select_job_res_ptr->cpu_array_value);
 
148
        select_job_res_ptr->cpu_array_value = 
 
149
                xmalloc(select_job_res_ptr->nhosts * sizeof(uint16_t));
 
150
 
 
151
        for (i=0; i<select_job_res_ptr->nhosts; i++) {
 
152
                if (select_job_res_ptr->cpus[i] != last_cpu_cnt) {
 
153
                        last_cpu_cnt = select_job_res_ptr->cpus[i];
 
154
                        select_job_res_ptr->cpu_array_value[
 
155
                                select_job_res_ptr->cpu_array_cnt] 
 
156
                                = last_cpu_cnt;
 
157
                        select_job_res_ptr->cpu_array_reps[
 
158
                                select_job_res_ptr->cpu_array_cnt] = 1;
 
159
                        select_job_res_ptr->cpu_array_cnt++;
 
160
                } else {
 
161
                        select_job_res_ptr->cpu_array_reps[
 
162
                                select_job_res_ptr->cpu_array_cnt-1]++;
 
163
                }
 
164
                cpu_count += last_cpu_cnt;
 
165
        }
 
166
        return cpu_count;
 
167
}
 
168
 
 
169
/* Rebuild cpus array based upon the values of nhosts, cpu_array_value and
 
170
 * cpu_array_reps in an existing data structure
 
171
 * Return total CPU count or -1 on error */
 
172
extern int build_select_job_res_cpus_array(select_job_res_t select_job_res_ptr)
 
173
{
 
174
        int cpu_count = 0, cpu_inx, i, j;
 
175
 
 
176
        if (select_job_res_ptr->nhosts == 0)
 
177
                return cpu_count;       /* no work to do */
 
178
        if (select_job_res_ptr->cpu_array_cnt == 0) {
 
179
                error("build_select_job_res_cpus_array: cpu_array_cnt==0");
 
180
                return -1;
 
181
        }
 
182
        if (select_job_res_ptr->cpu_array_value == NULL) {
 
183
                error("build_select_job_res_cpus_array: cpu_array_value==NULL");
 
184
                return -1;
 
185
        }
 
186
        if (select_job_res_ptr->cpu_array_reps == NULL) {
 
187
                error("build_select_job_res_cpus_array: cpu_array_reps==NULL");
 
188
                return -1;
 
189
        }
 
190
 
 
191
        /* clear vestigial data and create new arrays of max size */
 
192
        xfree(select_job_res_ptr->cpus);
 
193
        select_job_res_ptr->cpus = 
 
194
                xmalloc(select_job_res_ptr->nhosts * sizeof(uint16_t));
 
195
 
 
196
        cpu_inx = 0;
 
197
        for (i=0; i<select_job_res_ptr->cpu_array_cnt; i++) {
 
198
                for (j=0; j<select_job_res_ptr->cpu_array_reps[i]; j++) {
 
199
                        if (cpu_inx >= select_job_res_ptr->nhosts) {
 
200
                                error("build_select_job_res_cpus_array: "
 
201
                                      "cpu_array is too long");
 
202
                                return -1;
 
203
                        }
 
204
                        cpu_count += select_job_res_ptr->cpus[i];
 
205
                        select_job_res_ptr->cpus[cpu_inx++] = 
 
206
                                select_job_res_ptr->cpus[i];
 
207
                }
 
208
        }
 
209
        if (cpu_inx < select_job_res_ptr->nhosts) {
 
210
                error("build_select_job_res_cpus_array: "
 
211
                      "cpu_array is incomplete");
 
212
                return -1;
 
213
        }
 
214
        return cpu_count;
 
215
}
 
216
 
 
217
/* Reset the node_bitmap in a select_job_res data structure
 
218
 * This is needed after a restart/reconfiguration since nodes can 
 
219
 * be added or removed from the system resulting in changing in 
 
220
 * the bitmap size or bit positions */
 
221
extern void reset_node_bitmap(select_job_res_t select_job_res_ptr,
 
222
                              bitstr_t *new_node_bitmap)
 
223
{
 
224
        if (select_job_res_ptr) {
 
225
                if (select_job_res_ptr->node_bitmap)
 
226
                        bit_free(select_job_res_ptr->node_bitmap);
 
227
                if (new_node_bitmap) {
 
228
                        select_job_res_ptr->node_bitmap =
 
229
                                bit_copy(new_node_bitmap);
 
230
                }
 
231
        }
 
232
}
 
233
 
 
234
extern int valid_select_job_res(select_job_res_t select_job_res,
 
235
                                void *node_rec_table,
 
236
                                uint16_t fast_schedule)
 
237
{
 
238
        int i, bitmap_len;
 
239
        int sock_inx = 0, sock_cnt = 0;
 
240
        uint32_t cores, socks;
 
241
        struct node_record *node_ptr, *node_record_table;
 
242
 
 
243
        if (select_job_res->node_bitmap == NULL) {
 
244
                error("valid_select_job_res: node_bitmap is NULL");
 
245
                return SLURM_ERROR;
 
246
        }
 
247
        if ((select_job_res->sockets_per_node == NULL) ||
 
248
            (select_job_res->cores_per_socket == NULL) ||
 
249
            (select_job_res->sock_core_rep_count == NULL)) {
 
250
                error("valid_select_job_res: socket/core array is NULL");
 
251
                return SLURM_ERROR;
 
252
        }
 
253
 
 
254
        node_record_table = (struct node_record *) node_rec_table;
 
255
        bitmap_len = bit_size(select_job_res->node_bitmap);
 
256
        for (i=0; i<bitmap_len; i++) {
 
257
                if (!bit_test(select_job_res->node_bitmap, i))
 
258
                        continue;
 
259
                node_ptr = node_record_table + i;
 
260
                if (fast_schedule) {
 
261
                        socks = node_ptr->config_ptr->sockets;
 
262
                        cores = node_ptr->config_ptr->cores;
 
263
                } else {
 
264
                        socks = node_ptr->sockets;
 
265
                        cores = node_ptr->cores;
 
266
                }
 
267
                if (sock_cnt >= select_job_res->sock_core_rep_count[sock_inx]) {
 
268
                        sock_inx++;
 
269
                        sock_cnt = 0;
 
270
                }
 
271
                if ((socks != select_job_res->sockets_per_node[sock_inx]) ||
 
272
                    (cores != select_job_res->cores_per_socket[sock_inx])) {
 
273
                        error("valid_select_job_res: "
 
274
                              "%s sockets:%u,%u, cores %u,%u",
 
275
                              node_ptr->name,
 
276
                              socks, 
 
277
                              select_job_res->sockets_per_node[sock_inx],
 
278
                              cores, 
 
279
                              select_job_res->cores_per_socket[sock_inx]);
 
280
                        return SLURM_ERROR;
 
281
                }
 
282
                sock_cnt++;
 
283
        }
 
284
        return SLURM_SUCCESS;
 
285
}
 
286
 
 
287
extern select_job_res_t copy_select_job_res(select_job_res_t
 
288
                                            select_job_res_ptr)
 
289
{
 
290
        int i, sock_inx = 0;
 
291
        select_job_res_t new_layout = xmalloc(sizeof(struct select_job_res));
 
292
 
 
293
        xassert(select_job_res_ptr);
 
294
        new_layout->nhosts = select_job_res_ptr->nhosts;
 
295
        new_layout->nprocs = select_job_res_ptr->nprocs;
 
296
        new_layout->node_req = select_job_res_ptr->node_req;
 
297
        if (select_job_res_ptr->core_bitmap) {
 
298
                new_layout->core_bitmap = bit_copy(select_job_res_ptr->
 
299
                                                   core_bitmap);
 
300
        }
 
301
        if (select_job_res_ptr->core_bitmap_used) {
 
302
                new_layout->core_bitmap_used = bit_copy(select_job_res_ptr->
 
303
                                                        core_bitmap_used);
 
304
        }
 
305
        if (select_job_res_ptr->node_bitmap) {
 
306
                new_layout->node_bitmap = bit_copy(select_job_res_ptr->
 
307
                                                   node_bitmap);
 
308
        }
 
309
 
 
310
        new_layout->cpu_array_cnt = select_job_res_ptr->cpu_array_cnt;
 
311
        if (select_job_res_ptr->cpu_array_reps && 
 
312
            select_job_res_ptr->cpu_array_cnt) {
 
313
                new_layout->cpu_array_reps = 
 
314
                        xmalloc(sizeof(uint32_t) *
 
315
                                select_job_res_ptr->cpu_array_cnt);
 
316
                memcpy(new_layout->cpu_array_reps, 
 
317
                       select_job_res_ptr->cpu_array_reps, 
 
318
                       (sizeof(uint32_t) * select_job_res_ptr->cpu_array_cnt));
 
319
        }
 
320
        if (select_job_res_ptr->cpu_array_value && 
 
321
            select_job_res_ptr->cpu_array_cnt) {
 
322
                new_layout->cpu_array_value = 
 
323
                        xmalloc(sizeof(uint16_t) *
 
324
                                select_job_res_ptr->cpu_array_cnt);
 
325
                memcpy(new_layout->cpu_array_value, 
 
326
                       select_job_res_ptr->cpu_array_value, 
 
327
                       (sizeof(uint16_t) * select_job_res_ptr->cpu_array_cnt));
 
328
        }
 
329
 
 
330
        if (select_job_res_ptr->cpus) {
 
331
                new_layout->cpus = xmalloc(sizeof(uint16_t) *
 
332
                                           select_job_res_ptr->nhosts);
 
333
                memcpy(new_layout->cpus, select_job_res_ptr->cpus, 
 
334
                       (sizeof(uint16_t) * select_job_res_ptr->nhosts));
 
335
        }
 
336
        if (select_job_res_ptr->cpus_used) {
 
337
                new_layout->cpus_used = xmalloc(sizeof(uint16_t) *
 
338
                                                select_job_res_ptr->nhosts);
 
339
                memcpy(new_layout->cpus_used, select_job_res_ptr->cpus_used, 
 
340
                       (sizeof(uint16_t) * select_job_res_ptr->nhosts));
 
341
        }
 
342
 
 
343
        if (select_job_res_ptr->memory_allocated) {
 
344
                new_layout->memory_allocated = xmalloc(sizeof(uint32_t) * 
 
345
                                                       new_layout->nhosts);
 
346
                memcpy(new_layout->memory_allocated, 
 
347
                       select_job_res_ptr->memory_allocated, 
 
348
                       (sizeof(uint32_t) * select_job_res_ptr->nhosts));
 
349
        }
 
350
        if (select_job_res_ptr->memory_used) {
 
351
                new_layout->memory_used = xmalloc(sizeof(uint32_t) * 
 
352
                                                  new_layout->nhosts);
 
353
                memcpy(new_layout->memory_used, 
 
354
                       select_job_res_ptr->memory_used, 
 
355
                       (sizeof(uint32_t) * select_job_res_ptr->nhosts));
 
356
        }
 
357
 
 
358
        /* Copy sockets_per_node, cores_per_socket and core_sock_rep_count */
 
359
        new_layout->sockets_per_node = xmalloc(sizeof(uint16_t) * 
 
360
                                               new_layout->nhosts);     
 
361
        new_layout->cores_per_socket = xmalloc(sizeof(uint16_t) * 
 
362
                                               new_layout->nhosts);     
 
363
        new_layout->sock_core_rep_count = xmalloc(sizeof(uint32_t) * 
 
364
                                                  new_layout->nhosts);  
 
365
        for (i=0; i<new_layout->nhosts; i++) {
 
366
                if (select_job_res_ptr->sock_core_rep_count[i] ==  0) {
 
367
                        error("copy_select_job_res: sock_core_rep_count=0");
 
368
                        break;
 
369
                }
 
370
                sock_inx += select_job_res_ptr->sock_core_rep_count[i];
 
371
                if (sock_inx >= select_job_res_ptr->nhosts) {
 
372
                        i++;
 
373
                        break;
 
374
                }
 
375
        }
 
376
        memcpy(new_layout->sockets_per_node, 
 
377
               select_job_res_ptr->sockets_per_node, (sizeof(uint16_t) * i));
 
378
        memcpy(new_layout->cores_per_socket, 
 
379
               select_job_res_ptr->cores_per_socket, (sizeof(uint16_t) * i));
 
380
        memcpy(new_layout->sock_core_rep_count, 
 
381
               select_job_res_ptr->sock_core_rep_count, 
 
382
               (sizeof(uint32_t) * i));
 
383
 
 
384
        return new_layout;
 
385
}
 
386
 
 
387
extern void free_select_job_res(select_job_res_t *select_job_res_pptr)
 
388
{
 
389
        select_job_res_t select_job_res_ptr = *select_job_res_pptr;
 
390
 
 
391
        if (select_job_res_ptr) {
 
392
                if (select_job_res_ptr->core_bitmap)
 
393
                        bit_free(select_job_res_ptr->core_bitmap);
 
394
                if (select_job_res_ptr->core_bitmap_used)
 
395
                        bit_free(select_job_res_ptr->core_bitmap_used);
 
396
                xfree(select_job_res_ptr->cores_per_socket);
 
397
                xfree(select_job_res_ptr->cpu_array_reps);
 
398
                xfree(select_job_res_ptr->cpu_array_value);
 
399
                xfree(select_job_res_ptr->cpus);
 
400
                xfree(select_job_res_ptr->cpus_used);
 
401
                xfree(select_job_res_ptr->memory_allocated);
 
402
                xfree(select_job_res_ptr->memory_used);
 
403
                if (select_job_res_ptr->node_bitmap)
 
404
                        bit_free(select_job_res_ptr->node_bitmap);
 
405
                xfree(select_job_res_ptr->sock_core_rep_count);
 
406
                xfree(select_job_res_ptr->sockets_per_node);
 
407
                xfree(select_job_res_ptr);
 
408
                *select_job_res_pptr = NULL;
 
409
        }
 
410
}
 
411
 
 
412
/* Log the contents of a select_job_res data structure using info() */
 
413
extern void log_select_job_res(uint32_t job_id,
 
414
                               select_job_res_t select_job_res_ptr)
 
415
{
 
416
        int bit_inx = 0, bit_reps, i;
 
417
        int array_size, node_inx;
 
418
        int sock_inx = 0, sock_reps = 0;
 
419
 
 
420
        if (select_job_res_ptr == NULL) {
 
421
                error("log_select_job_res: select_job_res_ptr is NULL");
 
422
                return;
 
423
        }
 
424
 
 
425
        info("====================");
 
426
        info("job_id:%u nhosts:%u nprocs:%u node_req:%u", 
 
427
             job_id, select_job_res_ptr->nhosts, select_job_res_ptr->nprocs,
 
428
             select_job_res_ptr->node_req);
 
429
 
 
430
        if (select_job_res_ptr->cpus == NULL) {
 
431
                error("log_select_job_res: cpus array is NULL");
 
432
                return;
 
433
        }
 
434
        if (select_job_res_ptr->memory_allocated == NULL) {
 
435
                error("log_select_job_res: memory array is NULL");
 
436
                return;
 
437
        }
 
438
        if ((select_job_res_ptr->cores_per_socket == NULL) ||
 
439
            (select_job_res_ptr->sockets_per_node == NULL) ||
 
440
            (select_job_res_ptr->sock_core_rep_count == NULL)) {
 
441
                error("log_select_job_res: socket/core array is NULL");
 
442
                return;
 
443
        }
 
444
        if (select_job_res_ptr->core_bitmap == NULL) {
 
445
                error("log_select_job_res: core_bitmap is NULL");
 
446
                return;
 
447
        }
 
448
        if (select_job_res_ptr->core_bitmap_used == NULL) {
 
449
                error("log_select_job_res: core_bitmap_used is NULL");
 
450
                return;
 
451
        }
 
452
        array_size = bit_size(select_job_res_ptr->core_bitmap);
 
453
 
 
454
        /* Can only log node_bitmap from slurmctld, so don't bother here */
 
455
        for (node_inx=0; node_inx<select_job_res_ptr->nhosts; node_inx++) {
 
456
                uint32_t cpus_used = 0, memory_allocated = 0, memory_used = 0;
 
457
                info("Node[%d]:", node_inx);
 
458
 
 
459
                if (sock_reps >= 
 
460
                    select_job_res_ptr->sock_core_rep_count[sock_inx]) {
 
461
                        sock_inx++;
 
462
                        sock_reps = 0;
 
463
                }
 
464
                sock_reps++;
 
465
 
 
466
                if (select_job_res_ptr->cpus_used)
 
467
                        cpus_used = select_job_res_ptr->cpus_used[node_inx];
 
468
                if (select_job_res_ptr->memory_used)
 
469
                        memory_used = select_job_res_ptr->memory_used[node_inx];
 
470
                if (select_job_res_ptr->memory_allocated)
 
471
                        memory_allocated = select_job_res_ptr->
 
472
                                           memory_allocated[node_inx];
 
473
 
 
474
                info("  Mem(MB):%u:%u  Sockets:%u  Cores:%u  CPUs:%u:%u", 
 
475
                     memory_allocated, memory_used,
 
476
                     select_job_res_ptr->sockets_per_node[sock_inx],
 
477
                     select_job_res_ptr->cores_per_socket[sock_inx],
 
478
                     select_job_res_ptr->cpus[node_inx],
 
479
                     cpus_used);
 
480
 
 
481
                bit_reps = select_job_res_ptr->sockets_per_node[sock_inx] *
 
482
                           select_job_res_ptr->cores_per_socket[sock_inx];
 
483
                for (i=0; i<bit_reps; i++) {
 
484
                        if (bit_inx >= array_size) {
 
485
                                error("log_select_job_res: array size wrong");
 
486
                                break;
 
487
                        }
 
488
                        if (bit_test(select_job_res_ptr->core_bitmap,
 
489
                                     bit_inx)) {
 
490
                                char *core_used = "";
 
491
                                if (bit_test(select_job_res_ptr->
 
492
                                             core_bitmap_used, bit_inx))
 
493
                                        core_used = " and in use";
 
494
                                info("  Socket[%d] Core[%d] is allocated%s",
 
495
                                     (i / select_job_res_ptr->
 
496
                                          cores_per_socket[sock_inx]),
 
497
                                     (i % select_job_res_ptr->
 
498
                                          cores_per_socket[sock_inx]),
 
499
                                     core_used);
 
500
                        }
 
501
                        bit_inx++;
 
502
                }
 
503
        }
 
504
        for (node_inx=0; node_inx<select_job_res_ptr->cpu_array_cnt; 
 
505
             node_inx++) {
 
506
                if (node_inx == 0)
 
507
                        info("--------------------");
 
508
                info("cpu_array_value[%d]:%u reps:%u", node_inx,
 
509
                     select_job_res_ptr->cpu_array_value[node_inx],
 
510
                     select_job_res_ptr->cpu_array_reps[node_inx]);
 
511
        }
 
512
        info("====================");
 
513
}
 
514
 
 
515
extern void pack_select_job_res(select_job_res_t select_job_res_ptr, 
 
516
                                Buf buffer)
 
517
{
 
518
        int i;
 
519
        uint32_t core_cnt = 0, sock_recs = 0;
 
520
 
 
521
        if (select_job_res_ptr == NULL) {
 
522
                uint32_t empty = NO_VAL;
 
523
                pack32(empty, buffer);
 
524
                return;
 
525
        }
 
526
 
 
527
        xassert(select_job_res_ptr->core_bitmap);
 
528
        xassert(select_job_res_ptr->core_bitmap_used);
 
529
        xassert(select_job_res_ptr->cores_per_socket);
 
530
        xassert(select_job_res_ptr->cpus);
 
531
        xassert(select_job_res_ptr->nhosts);
 
532
        xassert(select_job_res_ptr->sock_core_rep_count);
 
533
        xassert(select_job_res_ptr->sockets_per_node);
 
534
 
 
535
        pack32(select_job_res_ptr->nhosts, buffer);
 
536
        pack32(select_job_res_ptr->nprocs, buffer);
 
537
        pack8(select_job_res_ptr->node_req, buffer);
 
538
 
 
539
        if (select_job_res_ptr->cpu_array_cnt &&
 
540
            select_job_res_ptr->cpu_array_reps &&
 
541
            select_job_res_ptr->cpu_array_value) {
 
542
                pack32(select_job_res_ptr->cpu_array_cnt, buffer);
 
543
                pack32_array(select_job_res_ptr->cpu_array_reps,
 
544
                             select_job_res_ptr->cpu_array_cnt, buffer);
 
545
                pack16_array(select_job_res_ptr->cpu_array_value,
 
546
                             select_job_res_ptr->cpu_array_cnt, buffer);
 
547
        } else {
 
548
                pack32((uint32_t) 0, buffer);
 
549
        }
 
550
 
 
551
        pack16_array(select_job_res_ptr->cpus,
 
552
                     select_job_res_ptr->nhosts, buffer);
 
553
        if (select_job_res_ptr->cpus_used) {
 
554
                pack16_array(select_job_res_ptr->cpus_used,
 
555
                             select_job_res_ptr->nhosts, buffer);
 
556
        } else
 
557
                pack16_array(select_job_res_ptr->cpus_used, 0, buffer);
 
558
 
 
559
        if (select_job_res_ptr->memory_allocated) {
 
560
                pack32_array(select_job_res_ptr->memory_allocated,  
 
561
                             select_job_res_ptr->nhosts, buffer);
 
562
        } else
 
563
                pack32_array(select_job_res_ptr->memory_allocated, 0, buffer);
 
564
        if (select_job_res_ptr->memory_used) {
 
565
                pack32_array(select_job_res_ptr->memory_used,  
 
566
                             select_job_res_ptr->nhosts, buffer);
 
567
        } else
 
568
                pack32_array(select_job_res_ptr->memory_used, 0, buffer);
 
569
 
 
570
        for (i=0; i<select_job_res_ptr->nhosts; i++) {
 
571
                core_cnt += select_job_res_ptr->sockets_per_node[i] *
 
572
                            select_job_res_ptr->cores_per_socket[i] *
 
573
                            select_job_res_ptr->sock_core_rep_count[i];
 
574
                sock_recs += select_job_res_ptr->sock_core_rep_count[i];
 
575
                if (sock_recs >= select_job_res_ptr->nhosts)
 
576
                        break;
 
577
        }
 
578
        i++;
 
579
        pack16_array(select_job_res_ptr->sockets_per_node,
 
580
                     (uint32_t) i, buffer);
 
581
        pack16_array(select_job_res_ptr->cores_per_socket,
 
582
                     (uint32_t) i, buffer);
 
583
        pack32_array(select_job_res_ptr->sock_core_rep_count, 
 
584
                     (uint32_t) i, buffer);
 
585
 
 
586
        pack32(core_cnt, buffer);
 
587
        xassert(core_cnt == bit_size(select_job_res_ptr->core_bitmap));
 
588
        pack_bit_fmt(select_job_res_ptr->core_bitmap, buffer);
 
589
        xassert(core_cnt == bit_size(select_job_res_ptr->core_bitmap_used));
 
590
        pack_bit_fmt(select_job_res_ptr->core_bitmap_used, buffer);
 
591
        /* Do not pack the node_bitmap, but rebuild it in reset_node_bitmap()
 
592
         * based upon job_ptr->nodes and the current node table */
 
593
}
 
594
 
 
595
extern int unpack_select_job_res(select_job_res_t *select_job_res_pptr, 
 
596
                                 Buf buffer)
 
597
{
 
598
        char *bit_fmt = NULL;
 
599
        uint32_t core_cnt, empty, tmp32;
 
600
        select_job_res_t select_job_res;
 
601
 
 
602
        xassert(select_job_res_pptr);
 
603
        safe_unpack32(&empty, buffer);
 
604
        if (empty == NO_VAL) {
 
605
                *select_job_res_pptr = NULL;
 
606
                return SLURM_SUCCESS;
 
607
        }
 
608
 
 
609
        select_job_res = xmalloc(sizeof(struct select_job_res));
 
610
        select_job_res->nhosts = empty;
 
611
        safe_unpack32(&select_job_res->nprocs, buffer);
 
612
        safe_unpack8(&select_job_res->node_req, buffer);
 
613
 
 
614
        safe_unpack32(&select_job_res->cpu_array_cnt, buffer);
 
615
        if (select_job_res->cpu_array_cnt) {
 
616
                safe_unpack32_array(&select_job_res->cpu_array_reps,
 
617
                                    &tmp32, buffer);
 
618
                if (tmp32 != select_job_res->cpu_array_cnt)
 
619
                        goto unpack_error;
 
620
                safe_unpack16_array(&select_job_res->cpu_array_value,
 
621
                                    &tmp32, buffer);
 
622
                if (tmp32 != select_job_res->cpu_array_cnt)
 
623
                        goto unpack_error;
 
624
        }
 
625
 
 
626
        safe_unpack16_array(&select_job_res->cpus, &tmp32, buffer);
 
627
        if (tmp32 != select_job_res->nhosts)
 
628
                goto unpack_error;
 
629
        safe_unpack16_array(&select_job_res->cpus_used, &tmp32, buffer);
 
630
        if (tmp32 == 0)
 
631
                xfree(select_job_res->cpus_used);
 
632
 
 
633
        safe_unpack32_array(&select_job_res->memory_allocated,
 
634
                            &tmp32, buffer);
 
635
        if (tmp32 == 0)
 
636
                xfree(select_job_res->memory_allocated);
 
637
        else if (tmp32 != select_job_res->nhosts)
 
638
                goto unpack_error;
 
639
        safe_unpack32_array(&select_job_res->memory_used, &tmp32, buffer);
 
640
        if (tmp32 == 0)
 
641
                xfree(select_job_res->memory_used);
 
642
 
 
643
        safe_unpack16_array(&select_job_res->sockets_per_node, &tmp32, buffer);
 
644
        safe_unpack16_array(&select_job_res->cores_per_socket, &tmp32, buffer);
 
645
        safe_unpack32_array(&select_job_res->sock_core_rep_count,
 
646
                            &tmp32, buffer);
 
647
 
 
648
        safe_unpack32(&core_cnt, buffer);    /* NOTE: Not part of struct */
 
649
        safe_unpackstr_xmalloc(&bit_fmt, &tmp32, buffer);
 
650
        select_job_res->core_bitmap = bit_alloc((bitoff_t) core_cnt);
 
651
        if (bit_unfmt(select_job_res->core_bitmap, bit_fmt))
 
652
                goto unpack_error;
 
653
        xfree(bit_fmt);
 
654
        safe_unpackstr_xmalloc(&bit_fmt, &tmp32, buffer);
 
655
        select_job_res->core_bitmap_used = bit_alloc((bitoff_t) core_cnt);
 
656
        if (bit_unfmt(select_job_res->core_bitmap_used, bit_fmt))
 
657
                goto unpack_error;
 
658
        xfree(bit_fmt);
 
659
        /* node_bitmap is not packed, but rebuilt in reset_node_bitmap()
 
660
         * based upon job_ptr->nodes and the current node table */
 
661
 
 
662
        *select_job_res_pptr = select_job_res;
 
663
        return SLURM_SUCCESS;
 
664
 
 
665
  unpack_error:
 
666
        free_select_job_res(&select_job_res);
 
667
        xfree(bit_fmt);
 
668
        *select_job_res_pptr = NULL;
 
669
        return SLURM_ERROR;
 
670
}
 
671
 
 
672
extern int get_select_job_res_offset(select_job_res_t select_job_res_ptr, 
 
673
                                     uint32_t node_id, uint16_t socket_id, 
 
674
                                     uint16_t core_id)
 
675
{
 
676
        int i, bit_inx = 0;
 
677
 
 
678
        xassert(select_job_res_ptr);
 
679
 
 
680
        for (i=0; i<select_job_res_ptr->nhosts; i++) {
 
681
                if (select_job_res_ptr->sock_core_rep_count[i] <= node_id) {
 
682
                        bit_inx += select_job_res_ptr->sockets_per_node[i] *
 
683
                                   select_job_res_ptr->cores_per_socket[i] *
 
684
                                   select_job_res_ptr->sock_core_rep_count[i];
 
685
                        node_id -= select_job_res_ptr->sock_core_rep_count[i];
 
686
                } else if (socket_id >= select_job_res_ptr->
 
687
                                        sockets_per_node[i]) {
 
688
                        error("get_select_job_res_bit: socket_id >= socket_cnt "
 
689
                              "(%u >= %u)", socket_id, 
 
690
                              select_job_res_ptr->sockets_per_node[i]);
 
691
                        return -1;
 
692
                } else if (core_id >= select_job_res_ptr->cores_per_socket[i]) {
 
693
                        error("get_select_job_res_bit: core_id >= core_cnt "
 
694
                              "(%u >= %u)", core_id, 
 
695
                              select_job_res_ptr->cores_per_socket[i]);
 
696
                        return -1;
 
697
                } else {
 
698
                        bit_inx += select_job_res_ptr->sockets_per_node[i] *
 
699
                                   select_job_res_ptr->cores_per_socket[i] *
 
700
                                   node_id;
 
701
                        bit_inx += select_job_res_ptr->cores_per_socket[i] *
 
702
                                   socket_id;
 
703
                        bit_inx += core_id;
 
704
                        break;
 
705
                }
 
706
        }
 
707
        i = bit_size(select_job_res_ptr->core_bitmap);
 
708
        if (bit_inx >= i) {
 
709
                error("get_select_job_res_bit: offset >= bitmap size "
 
710
                      "(%d >= %d)", bit_inx, i);
 
711
                return -1;
 
712
        }
 
713
 
 
714
        return bit_inx;
 
715
}
 
716
 
 
717
extern int get_select_job_res_bit(select_job_res_t select_job_res_ptr, 
 
718
                                  uint32_t node_id, uint16_t socket_id, 
 
719
                                  uint16_t core_id)
 
720
{
 
721
        int bit_inx = get_select_job_res_offset(select_job_res_ptr, node_id,
 
722
                                                socket_id, core_id);
 
723
        if (bit_inx < 0)
 
724
                return SLURM_ERROR;
 
725
 
 
726
        return bit_test(select_job_res_ptr->core_bitmap, bit_inx);
 
727
}
 
728
 
 
729
extern int set_select_job_res_bit(select_job_res_t select_job_res_ptr, 
 
730
                                  uint32_t node_id, uint16_t socket_id, 
 
731
                                  uint16_t core_id)
 
732
{
 
733
        int bit_inx = get_select_job_res_offset(select_job_res_ptr, node_id,
 
734
                                                socket_id, core_id);
 
735
        if (bit_inx < 0)
 
736
                return SLURM_ERROR;
 
737
 
 
738
        bit_set(select_job_res_ptr->core_bitmap, bit_inx);
 
739
        return SLURM_SUCCESS;
 
740
}
 
741
 
 
742
extern int get_select_job_res_node(select_job_res_t select_job_res_ptr, 
 
743
                                   uint32_t node_id)
 
744
{
 
745
        int i, bit_inx = 0, core_cnt = 0;
 
746
 
 
747
        xassert(select_job_res_ptr);
 
748
 
 
749
        for (i=0; i<select_job_res_ptr->nhosts; i++) {
 
750
                if (select_job_res_ptr->sock_core_rep_count[i] <= node_id) {
 
751
                        bit_inx += select_job_res_ptr->sockets_per_node[i] *
 
752
                                   select_job_res_ptr->cores_per_socket[i] *
 
753
                                   select_job_res_ptr->sock_core_rep_count[i];
 
754
                        node_id -= select_job_res_ptr->sock_core_rep_count[i];
 
755
                } else {
 
756
                        bit_inx += select_job_res_ptr->sockets_per_node[i] *
 
757
                                   select_job_res_ptr->cores_per_socket[i] *
 
758
                                   node_id;
 
759
                        core_cnt = select_job_res_ptr->sockets_per_node[i] *
 
760
                                   select_job_res_ptr->cores_per_socket[i];
 
761
                        break;
 
762
                }
 
763
        }
 
764
        if (core_cnt < 1) {
 
765
                error("get_select_job_res_node: core_cnt=0");
 
766
                return 0;
 
767
        }
 
768
        i = bit_size(select_job_res_ptr->core_bitmap);
 
769
        if ((bit_inx + core_cnt) > i) {
 
770
                error("get_select_job_res_node: offset > bitmap size "
 
771
                      "(%d >= %d)", (bit_inx + core_cnt), i);
 
772
                return 0;
 
773
        }
 
774
 
 
775
        for (i=0; i<core_cnt; i++) {
 
776
                if (bit_test(select_job_res_ptr->core_bitmap, bit_inx++))
 
777
                        return 1;
 
778
        }
 
779
        return 0;
 
780
}
 
781
 
 
782
extern int set_select_job_res_node(select_job_res_t select_job_res_ptr, 
 
783
                                   uint32_t node_id)
 
784
{
 
785
        int i, bit_inx = 0, core_cnt = 0;
 
786
 
 
787
        xassert(select_job_res_ptr);
 
788
 
 
789
        for (i=0; i<select_job_res_ptr->nhosts; i++) {
 
790
                if (select_job_res_ptr->sock_core_rep_count[i] <= node_id) {
 
791
                        bit_inx += select_job_res_ptr->sockets_per_node[i] *
 
792
                                   select_job_res_ptr->cores_per_socket[i] *
 
793
                                   select_job_res_ptr->sock_core_rep_count[i];
 
794
                        node_id -= select_job_res_ptr->sock_core_rep_count[i];
 
795
                } else {
 
796
                        bit_inx += select_job_res_ptr->sockets_per_node[i] *
 
797
                                   select_job_res_ptr->cores_per_socket[i] *
 
798
                                   node_id;
 
799
                        core_cnt = select_job_res_ptr->sockets_per_node[i] *
 
800
                                   select_job_res_ptr->cores_per_socket[i];
 
801
                        break;
 
802
                }
 
803
        }
 
804
        if (core_cnt < 1) {
 
805
                error("set_select_job_res_node: core_cnt=0");
 
806
                return SLURM_ERROR;
 
807
        }
 
808
 
 
809
        i = bit_size(select_job_res_ptr->core_bitmap);
 
810
        if ((bit_inx + core_cnt) > i) {
 
811
                error("set_select_job_res_node: offset > bitmap size "
 
812
                      "(%d >= %d)", (bit_inx + core_cnt), i);
 
813
                return SLURM_ERROR;
 
814
        }
 
815
 
 
816
        for (i=0; i<core_cnt; i++)
 
817
                bit_set(select_job_res_ptr->core_bitmap, bit_inx++);
 
818
 
 
819
        return SLURM_SUCCESS;
 
820
}
 
821
 
 
822
extern int get_select_job_res_cnt(select_job_res_t select_job_res_ptr, 
 
823
                                  uint32_t node_id,
 
824
                                  uint16_t *socket_cnt, 
 
825
                                  uint16_t *cores_per_socket_cnt)
 
826
{
 
827
        int i, node_inx = -1;
 
828
 
 
829
        xassert(socket_cnt);
 
830
        xassert(cores_per_socket_cnt);
 
831
        xassert(select_job_res_ptr->cores_per_socket);
 
832
        xassert(select_job_res_ptr->sock_core_rep_count);
 
833
        xassert(select_job_res_ptr->sockets_per_node);
 
834
 
 
835
        for (i=0; i<select_job_res_ptr->nhosts; i++) {
 
836
                node_inx += select_job_res_ptr->sock_core_rep_count[i];
 
837
                if (node_id <= node_inx) {
 
838
                        *cores_per_socket_cnt = select_job_res_ptr->
 
839
                                                cores_per_socket[i];
 
840
                        *socket_cnt = select_job_res_ptr->sockets_per_node[i];
 
841
                        return SLURM_SUCCESS;
 
842
                }       
 
843
        }
 
844
 
 
845
        error("get_select_job_res_cnt: invalid node_id: %u", node_id);
 
846
        *cores_per_socket_cnt = 0;
 
847
        *socket_cnt = 0;
 
848
        return SLURM_ERROR;
 
849
}
 
850
 
 
851
/* Return 1 if the given job can fit into the given full-length core_bitmap,
 
852
 * else return 0.
 
853
 */
 
854
extern int can_select_job_cores_fit(select_job_res_t select_ptr,
 
855
                                    bitstr_t *full_bitmap,
 
856
                                    const uint16_t *bits_per_node,
 
857
                                    const uint32_t *bit_rep_count)
 
858
{
 
859
        uint32_t i, n, count = 1, last_bit = 0;
 
860
        uint32_t c = 0, j = 0, k = 0;
 
861
        
 
862
        if (!full_bitmap)
 
863
                return 1;
 
864
        
 
865
        for (i = 0, n = 0; i < select_ptr->nhosts; n++) {
 
866
                last_bit += bits_per_node[k];
 
867
                if (++count > bit_rep_count[k]) {
 
868
                        k++;
 
869
                        count = 1;
 
870
                }
 
871
                if (bit_test(select_ptr->node_bitmap, n) == 0) {
 
872
                        c = last_bit;
 
873
                        continue;
 
874
                }
 
875
                for (; c < last_bit; c++, j++) {
 
876
                        if (bit_test(full_bitmap, c) &&
 
877
                            bit_test(select_ptr->core_bitmap, j))
 
878
                                return 0;
 
879
                }
 
880
                i++;
 
881
        }
 
882
        return 1;
 
883
}
 
884
 
 
885
/* add the given job to the given full_core_bitmap */
 
886
extern void add_select_job_to_row(select_job_res_t select_ptr,
 
887
                                  bitstr_t **full_core_bitmap,
 
888
                                  const uint16_t *cores_per_node,
 
889
                                  const uint32_t *core_rep_count)
 
890
{
 
891
        uint32_t i, n, count = 1, last_bit = 0;
 
892
        uint32_t c = 0, j = 0, k = 0;
 
893
        
 
894
        if (!select_ptr->core_bitmap)
 
895
                return;
 
896
 
 
897
        /* add the job to the row_bitmap */
 
898
        if (*full_core_bitmap == NULL) {
 
899
                uint32_t size = 0;
 
900
                for (i = 0; core_rep_count[i]; i++) {
 
901
                        size += cores_per_node[i] * core_rep_count[i];
 
902
                }
 
903
                *full_core_bitmap = bit_alloc(size);
 
904
                if (!*full_core_bitmap)
 
905
                        fatal("add_select_job_to_row: bitmap memory error");
 
906
        }
 
907
 
 
908
        for (i = 0, n = 0; i < select_ptr->nhosts; n++) {
 
909
                last_bit += cores_per_node[k];
 
910
                if (++count > core_rep_count[k]) {
 
911
                        k++;
 
912
                        count = 1;
 
913
                }
 
914
                if (bit_test(select_ptr->node_bitmap, n) == 0) {
 
915
                        c = last_bit;
 
916
                        continue;
 
917
                }
 
918
                for (; c < last_bit; c++, j++) {
 
919
                        if (bit_test(select_ptr->core_bitmap, j))
 
920
                                bit_set(*full_core_bitmap, c);
 
921
                }
 
922
                i++;
 
923
        }
 
924
}