1
/*****************************************************************************\
2
* reservation.c - resource reservation management
3
*****************************************************************************
4
* Copyright (C) 2009 Lawrence Livermore National Security.
5
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
6
* Written by Morris Jette <jette1@llnl.gov> et. al.
7
* CODE-OCEC-09-009. All rights reserved.
9
* This file is part of SLURM, a resource management program.
10
* For details, see <https://computing.llnl.gov/linux/slurm/>.
11
* Please also read the included file: DISCLAIMER.
13
* SLURM is free software; you can redistribute it and/or modify it under
14
* the terms of the GNU General Public License as published by the Free
15
* Software Foundation; either version 2 of the License, or (at your option)
18
* In addition, as a special exception, the copyright holders give permission
19
* to link the code of portions of this program with the OpenSSL library under
20
* certain conditions as described in each individual source file, and
21
* distribute linked combinations including the two. You must obey the GNU
22
* General Public License in all respects for all of the code used other than
23
* OpenSSL. If you modify file(s) with this exception, you may extend this
24
* exception to your version of the file(s), but you are not obligated to do
25
* so. If you do not wish to do so, delete this exception statement from your
26
* version. If you delete this exception statement from all source files in
27
* the program, then also delete it here.
29
* SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
30
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
31
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
34
* You should have received a copy of the GNU General Public License along
35
* with SLURM; if not, write to the Free Software Foundation, Inc.,
36
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
37
\*****************************************************************************/
45
#endif /* WITH_PTHREADS */
52
#include <slurm/slurm.h>
53
#include <slurm/slurm_errno.h>
55
#include <sys/types.h>
57
#include "src/common/assoc_mgr.h"
58
#include "src/common/bitstring.h"
59
#include "src/common/hostlist.h"
60
#include "src/common/list.h"
61
#include "src/common/log.h"
62
#include "src/common/macros.h"
63
#include "src/common/pack.h"
64
#include "src/common/parse_time.h"
65
#include "src/common/uid.h"
66
#include "src/common/xassert.h"
67
#include "src/common/xmalloc.h"
68
#include "src/common/xstring.h"
69
#include "src/common/slurm_accounting_storage.h"
71
#include "src/slurmctld/locks.h"
72
#include "src/slurmctld/slurmctld.h"
73
#include "src/slurmctld/state_save.h"
76
#define ONE_YEAR (365 * 24 * 60 * 60)
77
#define RESV_MAGIC 0x3b82
79
/* Change RESV_STATE_VERSION value when changing the state save format
80
* Add logic to permit reading of the previous version's state in order
81
* to avoid losing reservations between releases major SLURM updates. */
82
#define RESV_STATE_VERSION "VER002"
84
time_t last_resv_update = (time_t) 0;
85
List resv_list = (List) NULL;
86
uint32_t resv_over_run;
87
uint32_t top_suffix = 0;
89
static int _build_account_list(char *accounts, int *account_cnt,
90
char ***account_list);
91
static int _build_uid_list(char *users, int *user_cnt, uid_t **user_list);
92
static void _clear_job_resv(slurmctld_resv_t *resv_ptr);
93
static slurmctld_resv_t *_copy_resv(slurmctld_resv_t *resv_orig_ptr);
94
static void _del_resv_rec(void *x);
95
static void _dump_resv_req(resv_desc_msg_t *resv_ptr, char *mode);
96
static int _find_resv_id(void *x, void *key);
97
static int _find_resv_name(void *x, void *key);
98
static void _generate_resv_id(void);
99
static void _generate_resv_name(resv_desc_msg_t *resv_ptr);
100
static bool _is_account_valid(char *account);
101
static bool _is_resv_used(slurmctld_resv_t *resv_ptr);
102
static bool _job_overlap(time_t start_time, uint16_t flags,
103
bitstr_t *node_bitmap);
104
static void _pack_resv(slurmctld_resv_t *resv_ptr, Buf buffer,
106
static int _post_resv_create(slurmctld_resv_t *resv_ptr);
107
static int _post_resv_delete(slurmctld_resv_t *resv_ptr);
108
static int _post_resv_update(slurmctld_resv_t *resv_ptr,
109
slurmctld_resv_t *old_resv_ptr);
110
static bitstr_t *_pick_idle_nodes(bitstr_t *avail_nodes,
111
resv_desc_msg_t *resv_desc_ptr);
112
static bitstr_t *_pick_idle_nodes2(bitstr_t *avail_nodes,
113
resv_desc_msg_t *resv_desc_ptr);
114
static int _resize_resv(slurmctld_resv_t *resv_ptr, uint32_t node_cnt);
115
static bool _resv_overlap(time_t start_time, time_t end_time,
116
uint16_t flags, bitstr_t *node_bitmap,
117
slurmctld_resv_t *this_resv_ptr);
118
static int _select_nodes(resv_desc_msg_t *resv_desc_ptr,
119
struct part_record **part_ptr,
120
bitstr_t **resv_bitmap);
121
static int _set_assoc_list(slurmctld_resv_t *resv_ptr);
122
static void _set_cpu_cnt(slurmctld_resv_t *resv_ptr);
123
static void _set_nodes_maint(slurmctld_resv_t *resv_ptr, time_t now);
124
static void _swap_resv(slurmctld_resv_t *resv_backup,
125
slurmctld_resv_t *resv_ptr);
126
static int _update_account_list(slurmctld_resv_t *resv_ptr,
128
static int _update_uid_list(slurmctld_resv_t *resv_ptr, char *users);
129
static void _validate_all_reservations(void);
130
static int _valid_job_access_resv(struct job_record *job_ptr,
131
slurmctld_resv_t *resv_ptr);
132
static bool _validate_one_reservation(slurmctld_resv_t *resv_ptr);
133
static void _validate_node_choice(slurmctld_resv_t *resv_ptr);
135
static slurmctld_resv_t *_copy_resv(slurmctld_resv_t *resv_orig_ptr)
137
slurmctld_resv_t *resv_copy_ptr;
140
xassert(resv_orig_ptr->magic == RESV_MAGIC);
141
resv_copy_ptr = xmalloc(sizeof(slurmctld_resv_t));
142
resv_copy_ptr->accounts = xstrdup(resv_orig_ptr->accounts);
143
resv_copy_ptr->account_cnt = resv_orig_ptr->account_cnt;
144
resv_copy_ptr->account_list = xmalloc(sizeof(char *) *
145
resv_orig_ptr->account_cnt);
146
for (i=0; i<resv_copy_ptr->account_cnt; i++) {
147
resv_copy_ptr->account_list[i] =
148
xstrdup(resv_orig_ptr->account_list[i]);
150
resv_copy_ptr->assoc_list = xstrdup(resv_orig_ptr->assoc_list);
151
resv_copy_ptr->cpu_cnt = resv_orig_ptr->cpu_cnt;
152
resv_copy_ptr->end_time = resv_orig_ptr->end_time;
153
resv_copy_ptr->features = xstrdup(resv_orig_ptr->features);
154
resv_copy_ptr->flags = resv_orig_ptr->flags;
155
resv_copy_ptr->job_pend_cnt = resv_orig_ptr->job_pend_cnt;
156
resv_copy_ptr->job_run_cnt = resv_orig_ptr->job_run_cnt;
157
resv_copy_ptr->magic = resv_orig_ptr->magic;
158
resv_copy_ptr->name = xstrdup(resv_orig_ptr->name);
159
resv_copy_ptr->node_bitmap = bit_copy(resv_orig_ptr->node_bitmap);
160
resv_copy_ptr->node_cnt = resv_orig_ptr->node_cnt;
161
resv_copy_ptr->node_list = xstrdup(resv_orig_ptr->node_list);
162
resv_copy_ptr->partition = xstrdup(resv_orig_ptr->partition);
163
resv_copy_ptr->part_ptr = resv_orig_ptr->part_ptr;
164
resv_copy_ptr->resv_id = resv_orig_ptr->resv_id;
165
resv_copy_ptr->start_time = resv_orig_ptr->start_time;
166
resv_copy_ptr->start_time_first = resv_orig_ptr->start_time_first;
167
resv_copy_ptr->start_time_prev = resv_orig_ptr->start_time_prev;
168
resv_copy_ptr->users = xstrdup(resv_orig_ptr->users);
169
resv_copy_ptr->user_cnt = resv_orig_ptr->user_cnt;
170
resv_copy_ptr->user_list = xmalloc(sizeof(uid_t) *
171
resv_orig_ptr->user_cnt);
172
for (i=0; i<resv_copy_ptr->user_cnt; i++)
173
resv_copy_ptr->user_list[i] = resv_orig_ptr->user_list[i];
175
return resv_copy_ptr;
178
/* Swaping the contents of two reservation records */
179
static void _swap_resv(slurmctld_resv_t *resv_backup,
180
slurmctld_resv_t *resv_ptr)
182
resv_desc_msg_t *resv_copy_ptr;
184
xassert(resv_backup->magic == RESV_MAGIC);
185
xassert(resv_ptr->magic == RESV_MAGIC);
186
resv_copy_ptr = xmalloc(sizeof(slurmctld_resv_t));
187
memcpy(resv_copy_ptr, resv_backup, sizeof(slurmctld_resv_t));
188
memcpy(resv_backup, resv_ptr, sizeof(slurmctld_resv_t));
189
memcpy(resv_ptr, resv_copy_ptr, sizeof(slurmctld_resv_t));
190
xfree(resv_copy_ptr);
193
static void _del_resv_rec(void *x)
196
slurmctld_resv_t *resv_ptr = (slurmctld_resv_t *) x;
199
xassert(resv_ptr->magic == RESV_MAGIC);
201
xfree(resv_ptr->accounts);
202
for (i=0; i<resv_ptr->account_cnt; i++)
203
xfree(resv_ptr->account_list[i]);
204
xfree(resv_ptr->account_list);
205
xfree(resv_ptr->assoc_list);
206
xfree(resv_ptr->features);
207
xfree(resv_ptr->name);
208
if (resv_ptr->node_bitmap)
209
bit_free(resv_ptr->node_bitmap);
210
xfree(resv_ptr->node_list);
211
xfree(resv_ptr->partition);
212
xfree(resv_ptr->users);
213
xfree(resv_ptr->user_list);
218
static int _find_resv_id(void *x, void *key)
220
slurmctld_resv_t *resv_ptr = (slurmctld_resv_t *) x;
221
uint32_t *resv_id = (uint32_t *) key;
223
xassert(resv_ptr->magic == RESV_MAGIC);
225
if (resv_ptr->resv_id != *resv_id)
228
return 1; /* match */
231
static int _find_resv_name(void *x, void *key)
233
slurmctld_resv_t *resv_ptr = (slurmctld_resv_t *) x;
235
xassert(resv_ptr->magic == RESV_MAGIC);
237
if (strcmp(resv_ptr->name, (char *) key))
240
return 1; /* match */
243
static void _dump_resv_req(resv_desc_msg_t *resv_ptr, char *mode)
247
char start_str[32] = "-1", end_str[32] = "-1", *flag_str = NULL;
250
if (resv_ptr->start_time != (time_t) NO_VAL) {
251
slurm_make_time_str(&resv_ptr->start_time,
252
start_str, sizeof(start_str));
254
if (resv_ptr->end_time != (time_t) NO_VAL) {
255
slurm_make_time_str(&resv_ptr->end_time,
256
end_str, sizeof(end_str));
258
if (resv_ptr->flags != (uint16_t) NO_VAL)
259
flag_str = reservation_flags_string(resv_ptr->flags);
261
if (resv_ptr->duration == NO_VAL)
264
duration = resv_ptr->duration;
266
info("%s: Name=%s StartTime=%s EndTime=%s Duration=%d "
267
"Flags=%s NodeCnt=%d NodeList=%s Features=%s "
268
"PartitionName=%s Users=%s Accounts=%s",
269
mode, resv_ptr->name, start_str, end_str, duration,
270
flag_str, resv_ptr->node_cnt, resv_ptr->node_list,
271
resv_ptr->features, resv_ptr->partition,
272
resv_ptr->users, resv_ptr->accounts);
278
static void _generate_resv_id(void)
281
if (top_suffix >= 9999)
282
top_suffix = 1; /* wrap around */
285
if (!list_find_first(resv_list, _find_resv_id, &top_suffix))
290
static void _generate_resv_name(resv_desc_msg_t *resv_ptr)
292
char *key, *name, *sep;
295
/* Generate name prefix, based upon the first account
296
* name if provided otherwise first user name */
297
if (resv_ptr->accounts && resv_ptr->accounts[0])
298
key = resv_ptr->accounts;
300
key = resv_ptr->users;
301
sep = strchr(key, ',');
306
name = xmalloc(len + 16);
307
strncpy(name, key, len);
309
xstrfmtcat(name, "_%d", top_suffix);
312
resv_ptr->name = name;
315
/* Validate an account name */
316
static bool _is_account_valid(char *account)
318
acct_association_rec_t assoc_rec, *assoc_ptr;
320
if (!(accounting_enforce & ACCOUNTING_ENFORCE_ASSOCS))
321
return true; /* don't worry about account validity */
323
memset(&assoc_rec, 0, sizeof(acct_association_rec_t));
324
assoc_rec.uid = NO_VAL;
325
assoc_rec.acct = account;
327
if (assoc_mgr_fill_in_assoc(acct_db_conn, &assoc_rec,
328
accounting_enforce, &assoc_ptr)) {
334
static int _append_assoc_list(List assoc_list, acct_association_rec_t *assoc)
336
int rc = ESLURM_INVALID_BANK_ACCOUNT;
337
acct_association_rec_t *assoc_ptr = NULL;
338
if (assoc_mgr_fill_in_assoc(
342
if(accounting_enforce & ACCOUNTING_ENFORCE_ASSOCS) {
343
error("No association for user %u and account %s",
344
assoc->uid, assoc->acct);
346
verbose("No association for user %u and account %s",
347
assoc->uid, assoc->acct);
353
list_append(assoc_list, assoc_ptr);
359
/* Set a association list based upon accounts and users */
360
static int _set_assoc_list(slurmctld_resv_t *resv_ptr)
362
int rc = SLURM_SUCCESS, i = 0, j = 0;
363
List assoc_list = NULL;
364
acct_association_rec_t assoc, *assoc_ptr = NULL;
366
/* no need to do this if we can't ;) */
367
if(!association_based_accounting)
370
assoc_list = list_create(NULL);
372
memset(&assoc, 0, sizeof(acct_association_rec_t));
374
if(resv_ptr->user_cnt) {
375
for(i=0; i < resv_ptr->user_cnt; i++) {
376
if(resv_ptr->account_cnt) {
377
for(j=0; j < resv_ptr->account_cnt; j++) {
379
sizeof(acct_association_rec_t));
380
assoc.uid = resv_ptr->user_list[i];
381
assoc.acct = resv_ptr->account_list[j];
382
if((rc = _append_assoc_list(
390
sizeof(acct_association_rec_t));
391
assoc.uid = resv_ptr->user_list[i];
392
if((rc = assoc_mgr_get_user_assocs(
393
acct_db_conn, &assoc,
394
accounting_enforce, assoc_list))
396
rc = ESLURM_INVALID_BANK_ACCOUNT;
401
} else if(resv_ptr->account_cnt) {
402
for(i=0; i < resv_ptr->account_cnt; i++) {
404
sizeof(acct_association_rec_t));
405
assoc.uid = (uint32_t)NO_VAL;
406
assoc.acct = resv_ptr->account_list[j];
407
if((rc = _append_assoc_list(assoc_list, &assoc))
412
} else if(accounting_enforce & ACCOUNTING_ENFORCE_ASSOCS) {
413
error("We need at least 1 user or 1 account to "
414
"create a reservtion.");
418
if(list_count(assoc_list)) {
419
ListIterator itr = list_iterator_create(assoc_list);
420
xfree(resv_ptr->assoc_list); /* clear for modify */
421
while((assoc_ptr = list_next(itr))) {
422
if(resv_ptr->assoc_list)
423
xstrfmtcat(resv_ptr->assoc_list, ",%u",
426
xstrfmtcat(resv_ptr->assoc_list, "%u",
429
list_iterator_destroy(itr);
433
list_destroy(assoc_list);
437
/* Post reservation create */
438
static int _post_resv_create(slurmctld_resv_t *resv_ptr)
440
int rc = SLURM_SUCCESS;
441
acct_reservation_rec_t resv;
442
char temp_bit[BUF_SIZE];
444
memset(&resv, 0, sizeof(acct_reservation_rec_t));
446
resv.assocs = resv_ptr->assoc_list;
447
resv.cluster = slurmctld_cluster_name;
448
resv.cpus = resv_ptr->cpu_cnt;
449
resv.flags = resv_ptr->flags;
450
resv.id = resv_ptr->resv_id;
451
resv.name = resv_ptr->name;
452
resv.nodes = resv_ptr->node_list;
453
if(resv_ptr->node_bitmap)
454
resv.node_inx = bit_fmt(temp_bit, sizeof(temp_bit),
455
resv_ptr->node_bitmap);
457
resv.time_end = resv_ptr->end_time;
458
resv.time_start = resv_ptr->start_time;
460
rc = acct_storage_g_add_reservation(acct_db_conn, &resv);
465
/* Note that a reservation has been deleted */
466
static int _post_resv_delete(slurmctld_resv_t *resv_ptr)
468
int rc = SLURM_SUCCESS;
469
acct_reservation_rec_t resv;
470
memset(&resv, 0, sizeof(acct_reservation_rec_t));
472
resv.cluster = slurmctld_cluster_name;
473
resv.id = resv_ptr->resv_id;
474
resv.name = resv_ptr->name;
475
resv.time_start = resv_ptr->start_time;
476
/* This is just a time stamp here to delete if the reservation
477
* hasn't started yet so we don't get trash records in the
478
* database if said database isn't up right now */
479
resv.time_start_prev = time(NULL);
480
rc = acct_storage_g_remove_reservation(acct_db_conn, &resv);
485
/* Note that a reservation has been updated */
486
static int _post_resv_update(slurmctld_resv_t *resv_ptr,
487
slurmctld_resv_t *old_resv_ptr)
489
int rc = SLURM_SUCCESS;
490
acct_reservation_rec_t resv;
491
char temp_bit[BUF_SIZE];
493
memset(&resv, 0, sizeof(acct_reservation_rec_t));
495
resv.cluster = slurmctld_cluster_name;
496
resv.id = resv_ptr->resv_id;
497
resv.time_end = resv_ptr->end_time;
500
resv.assocs = resv_ptr->assoc_list;
501
resv.cpus = resv_ptr->cpu_cnt;
502
resv.flags = resv_ptr->flags;
503
resv.nodes = resv_ptr->node_list;
505
time_t now = time(NULL);
507
if(old_resv_ptr->assoc_list && resv_ptr->assoc_list) {
508
if(strcmp(old_resv_ptr->assoc_list,
509
resv_ptr->assoc_list))
510
resv.assocs = resv_ptr->assoc_list;
511
} else if(resv_ptr->assoc_list)
512
resv.assocs = resv_ptr->assoc_list;
514
if(old_resv_ptr->cpu_cnt != resv_ptr->cpu_cnt)
515
resv.cpus = resv_ptr->cpu_cnt;
517
resv.cpus = (uint32_t)NO_VAL;
519
if(old_resv_ptr->flags != resv_ptr->flags)
520
resv.flags = resv_ptr->flags;
522
resv.flags = (uint16_t)NO_VAL;
524
if(old_resv_ptr->node_list && resv_ptr->node_list) {
525
if(strcmp(old_resv_ptr->node_list,
526
resv_ptr->node_list))
527
resv.nodes = resv_ptr->node_list;
528
} else if(resv_ptr->node_list)
529
resv.nodes = resv_ptr->node_list;
531
/* Here if the reservation has started already we need
532
* to mark a new start time for it if certain
533
* variables are needed in accounting. Right now if
534
* the assocs, nodes, flags or cpu count changes we need a
535
* new start time of now. */
536
if((resv_ptr->start_time < now)
539
|| (resv.flags != (uint16_t)NO_VAL)
540
|| (resv.cpus != (uint32_t)NO_VAL))) {
541
resv_ptr->start_time_prev = resv_ptr->start_time;
542
resv_ptr->start_time = now;
545
/* now set the (maybe new) start_times */
546
resv.time_start = resv_ptr->start_time;
547
resv.time_start_prev = resv_ptr->start_time_prev;
549
if(resv.nodes && resv_ptr->node_bitmap)
550
resv.node_inx = bit_fmt(temp_bit, sizeof(temp_bit),
551
resv_ptr->node_bitmap);
553
rc = acct_storage_g_modify_reservation(acct_db_conn, &resv);
559
* Validate a comma delimited list of account names and build an array of
561
* IN account - a list of account names
562
* OUT account_cnt - number of accounts in the list
563
* OUT account_list - list of the account names,
564
* CALLER MUST XFREE this plus each individual record
565
* RETURN 0 on success
567
static int _build_account_list(char *accounts, int *account_cnt,
568
char ***account_list)
570
char *last = NULL, *tmp, *tok;
575
*account_list = (char **) NULL;
578
return ESLURM_INVALID_BANK_ACCOUNT;
580
i = strlen(accounts);
581
ac_list = xmalloc(sizeof(char *) * (i + 2));
582
tmp = xstrdup(accounts);
583
tok = strtok_r(tmp, ",", &last);
585
if (!_is_account_valid(tok)) {
586
info("Reservation request has invalid account %s",
590
ac_list[ac_cnt++] = xstrdup(tok);
591
tok = strtok_r(NULL, ",", &last);
593
*account_cnt = ac_cnt;
594
*account_list = ac_list;
596
return SLURM_SUCCESS;
598
inval: for (i=0; i<ac_cnt; i++)
602
return ESLURM_INVALID_BANK_ACCOUNT;
606
* Update a account list for an existing reservation based upon an
607
* update comma delimited specification of accounts to add (+name),
608
* remove (-name), or set value of
609
* IN/OUT resv_ptr - pointer to reservation structure being updated
610
* IN accounts - a list of account names, to set, add, or remove
611
* RETURN 0 on success
613
static int _update_account_list(slurmctld_resv_t *resv_ptr,
616
char *last = NULL, *ac_cpy, *tok;
617
int ac_cnt = 0, i, j, k;
618
int *ac_type, minus_account = 0, plus_account = 0;
623
return ESLURM_INVALID_BANK_ACCOUNT;
625
i = strlen(accounts);
626
ac_list = xmalloc(sizeof(char *) * (i + 2));
627
ac_type = xmalloc(sizeof(int) * (i + 2));
628
ac_cpy = xstrdup(accounts);
629
tok = strtok_r(ac_cpy, ",", &last);
632
ac_type[ac_cnt] = 1; /* minus */
635
} else if (tok[0] == '+') {
636
ac_type[ac_cnt] = 2; /* plus */
639
} else if (tok[0] == '\0') {
641
} else if (plus_account || minus_account) {
642
info("Reservation account expression invalid %s",
646
ac_type[ac_cnt] = 3; /* set */
647
if (!_is_account_valid(tok)) {
648
info("Reservation request has invalid account %s",
652
ac_list[ac_cnt++] = xstrdup(tok);
653
tok = strtok_r(NULL, ",", &last);
656
if ((plus_account == 0) && (minus_account == 0)) {
657
/* Just a reset of account list */
658
xfree(resv_ptr->accounts);
659
if (accounts[0] != '\0')
660
resv_ptr->accounts = xstrdup(accounts);
661
xfree(resv_ptr->account_list);
662
resv_ptr->account_list = ac_list;
663
resv_ptr->account_cnt = ac_cnt;
666
return SLURM_SUCCESS;
669
/* Modification of existing account list */
671
if (resv_ptr->account_cnt == 0)
673
for (i=0; i<ac_cnt; i++) {
677
for (j=0; j<resv_ptr->account_cnt; j++) {
678
if (strcmp(resv_ptr->account_list[j],
683
xfree(resv_ptr->account_list[j]);
684
resv_ptr->account_cnt--;
685
for (k=j; k<resv_ptr->account_cnt; k++) {
686
resv_ptr->account_list[k] =
687
resv_ptr->account_list[k+1];
694
xfree(resv_ptr->accounts);
695
for (i=0; i<resv_ptr->account_cnt; i++) {
697
resv_ptr->accounts = xstrdup(resv_ptr->
700
xstrcat(resv_ptr->accounts, ",");
701
xstrcat(resv_ptr->accounts,
702
resv_ptr->account_list[i]);
708
for (i=0; i<ac_cnt; i++) {
712
for (j=0; j<resv_ptr->account_cnt; j++) {
713
if (strcmp(resv_ptr->account_list[j],
721
continue; /* duplicate entry */
722
xrealloc(resv_ptr->account_list,
723
sizeof(char *) * (resv_ptr->account_cnt + 1));
724
resv_ptr->account_list[resv_ptr->account_cnt++] =
727
xfree(resv_ptr->accounts);
728
for (i=0; i<resv_ptr->account_cnt; i++) {
730
resv_ptr->accounts = xstrdup(resv_ptr->
733
xstrcat(resv_ptr->accounts, ",");
734
xstrcat(resv_ptr->accounts,
735
resv_ptr->account_list[i]);
739
for (i=0; i<ac_cnt; i++)
744
return SLURM_SUCCESS;
746
inval: for (i=0; i<ac_cnt; i++)
751
return ESLURM_INVALID_BANK_ACCOUNT;
755
* Validate a comma delimited list of user names and build an array of
757
* IN users - a list of user names
758
* OUT user_cnt - number of UIDs in the list
759
* OUT user_list - list of the user's uid, CALLER MUST XFREE;
760
* RETURN 0 on success
762
static int _build_uid_list(char *users, int *user_cnt, uid_t **user_list)
764
char *last = NULL, *tmp = NULL, *tok;
766
uid_t *u_list, u_tmp;
769
*user_list = (uid_t *) NULL;
772
return ESLURM_USER_ID_MISSING;
775
u_list = xmalloc(sizeof(uid_t) * (i + 2));
776
tmp = xstrdup(users);
777
tok = strtok_r(tmp, ",", &last);
779
if (uid_from_string (tok, &u_tmp) < 0) {
780
info("Reservation request has invalid user %s", tok);
783
u_list[u_cnt++] = u_tmp;
784
tok = strtok_r(NULL, ",", &last);
789
return SLURM_SUCCESS;
793
return ESLURM_USER_ID_MISSING;
797
* Update a user/uid list for an existing reservation based upon an
798
* update comma delimited specification of users to add (+name),
799
* remove (-name), or set value of
800
* IN/OUT resv_ptr - pointer to reservation structure being updated
801
* IN users - a list of user names, to set, add, or remove
802
* RETURN 0 on success
804
static int _update_uid_list(slurmctld_resv_t *resv_ptr, char *users)
806
char *last = NULL, *u_cpy = NULL, *tmp = NULL, *tok;
807
int u_cnt = 0, i, j, k;
808
uid_t *u_list, u_tmp;
809
int *u_type, minus_user = 0, plus_user = 0;
814
return ESLURM_USER_ID_MISSING;
816
/* Parse the incoming user expression */
818
u_list = xmalloc(sizeof(uid_t) * (i + 2));
819
u_name = xmalloc(sizeof(char *) * (i + 2));
820
u_type = xmalloc(sizeof(int) * (i + 2));
821
u_cpy = xstrdup(users);
822
tok = strtok_r(u_cpy, ",", &last);
825
u_type[u_cnt] = 1; /* minus */
828
} else if (tok[0] == '+') {
829
u_type[u_cnt] = 2; /* plus */
832
} else if (tok[0] == '\0') {
834
} else if (plus_user || minus_user) {
835
info("Reservation user expression invalid %s", users);
838
u_type[u_cnt] = 3; /* set */
840
if (uid_from_string (tok, &u_tmp) < 0) {
841
info("Reservation request has invalid user %s", tok);
846
u_list[u_cnt++] = u_tmp;
847
tok = strtok_r(NULL, ",", &last);
850
if ((plus_user == 0) && (minus_user == 0)) {
851
/* Just a reset of user list */
852
xfree(resv_ptr->users);
853
xfree(resv_ptr->user_list);
854
if (users[0] != '\0')
855
resv_ptr->users = xstrdup(users);
856
resv_ptr->user_cnt = u_cnt;
857
resv_ptr->user_list = u_list;
861
return SLURM_SUCCESS;
864
/* Modification of existing user list */
866
for (i=0; i<u_cnt; i++) {
870
for (j=0; j<resv_ptr->user_cnt; j++) {
871
if (resv_ptr->user_list[j] != u_list[i])
874
resv_ptr->user_cnt--;
875
for (k=j; k<resv_ptr->user_cnt; k++) {
876
resv_ptr->user_list[k] =
877
resv_ptr->user_list[k+1];
883
/* Now we need to remove from users string */
884
k = strlen(u_name[i]);
885
tmp = resv_ptr->users;
886
while ((tok = strstr(tmp, u_name[i]))) {
887
if (((tok != resv_ptr->users) &&
889
((tok[k] != '\0') && (tok[k] != ','))) {
893
if (tok[-1] == ',') {
896
} else if (tok[k] == ',')
908
for (i=0; i<u_cnt; i++) {
912
for (j=0; j<resv_ptr->user_cnt; j++) {
913
if (resv_ptr->user_list[j] != u_list[i])
919
continue; /* duplicate entry */
920
if (resv_ptr->users && resv_ptr->users[0])
921
xstrcat(resv_ptr->users, ",");
922
xstrcat(resv_ptr->users, u_name[i]);
923
xrealloc(resv_ptr->user_list,
924
sizeof(uid_t) * (resv_ptr->user_cnt + 1));
925
resv_ptr->user_list[resv_ptr->user_cnt++] =
933
return SLURM_SUCCESS;
939
return ESLURM_USER_ID_MISSING;
943
* _pack_resv - dump configuration information about a specific reservation
944
* in machine independent form (for network transmission or state save)
945
* IN resv_ptr - pointer to reservation for which information is requested
946
* IN/OUT buffer - buffer in which data is placed, pointers automatically
948
* IN internal - true if for internal save state, false for xmit to users
949
* NOTE: if you make any changes here be sure to make the corresponding
950
* to _unpack_reserve_info_members() in common/slurm_protocol_pack.c
951
* plus load_all_resv_state() below.
953
static void _pack_resv(slurmctld_resv_t *resv_ptr, Buf buffer,
956
packstr(resv_ptr->accounts, buffer);
957
pack_time(resv_ptr->end_time, buffer);
958
packstr(resv_ptr->features, buffer);
959
packstr(resv_ptr->name, buffer);
960
pack32(resv_ptr->node_cnt, buffer);
961
packstr(resv_ptr->node_list, buffer);
962
packstr(resv_ptr->partition, buffer);
963
pack_time(resv_ptr->start_time_first, buffer);
964
pack16(resv_ptr->flags, buffer);
965
packstr(resv_ptr->users, buffer);
968
packstr(resv_ptr->assoc_list, buffer);
969
pack32(resv_ptr->cpu_cnt, buffer);
970
pack32(resv_ptr->resv_id, buffer);
971
pack_time(resv_ptr->start_time_prev, buffer);
972
pack_time(resv_ptr->start_time, buffer);
973
pack32(resv_ptr->duration, buffer);
975
pack_bit_fmt(resv_ptr->node_bitmap, buffer);
980
* Test if a new/updated reservation request will overlap running jobs
981
* RET true if overlap
983
static bool _job_overlap(time_t start_time, uint16_t flags,
984
bitstr_t *node_bitmap)
986
ListIterator job_iterator;
987
struct job_record *job_ptr;
988
bool overlap = false;
990
if (flags & RESERVE_FLAG_IGN_JOBS) /* ignore job overlap */
993
job_iterator = list_iterator_create(job_list);
994
while ((job_ptr = (struct job_record *) list_next(job_iterator))) {
995
if ((job_ptr->job_state == JOB_RUNNING) &&
996
(job_ptr->end_time > start_time) &&
997
(bit_overlap(job_ptr->node_bitmap, node_bitmap) > 0)) {
1002
list_iterator_destroy(job_iterator);
1008
* Test if a new/updated reservation request overlaps an existing
1010
* RET true if overlap
1012
static bool _resv_overlap(time_t start_time, time_t end_time,
1013
uint16_t flags, bitstr_t *node_bitmap,
1014
slurmctld_resv_t *this_resv_ptr)
1017
slurmctld_resv_t *resv_ptr;
1019
uint32_t delta_t, i, j;
1020
time_t s_time1, s_time2, e_time1, e_time2;
1022
if ((!node_bitmap) || (flags & RESERVE_FLAG_MAINT))
1025
iter = list_iterator_create(resv_list);
1027
fatal("malloc: list_iterator_create");
1029
while ((resv_ptr = (slurmctld_resv_t *) list_next(iter))) {
1030
if (resv_ptr == this_resv_ptr)
1031
continue; /* skip self */
1032
if (resv_ptr->node_bitmap == NULL)
1033
continue; /* no specific nodes in reservation */
1034
if (!bit_overlap(resv_ptr->node_bitmap, node_bitmap))
1035
continue; /* no overlap */
1037
for (i=0; ((i<7) && (!rc)); i++) { /* look forward one week */
1038
delta_t = i * (24 * 60 * 60);
1039
s_time1 = start_time + delta_t;
1040
e_time1 = end_time + delta_t;
1041
for (j=0; ((j<7) && (!rc)); j++) {
1042
delta_t = j * (24 * 60 * 60);
1043
s_time2 = resv_ptr->start_time + delta_t;
1044
e_time2 = resv_ptr->end_time + delta_t;
1045
if ((s_time1 < e_time2) &&
1046
(e_time1 > s_time2)) {
1047
verbose("Reservation overlap with %s",
1052
if (!(resv_ptr->flags & RESERVE_FLAG_DAILY))
1055
if ((flags & RESERVE_FLAG_DAILY) == 0)
1059
list_iterator_destroy(iter);
1064
/* Set a reservation's CPU count. Requires that the reservation's
1065
* node_bitmap be set. */
1066
static void _set_cpu_cnt(slurmctld_resv_t *resv_ptr)
1069
uint32_t cpu_cnt = 0;
1070
struct node_record *node_ptr = node_record_table_ptr;
1072
if (!resv_ptr->node_bitmap)
1075
for (i=0; i<node_record_count; i++, node_ptr++) {
1076
if (!bit_test(resv_ptr->node_bitmap, i))
1078
if (slurmctld_conf.fast_schedule)
1079
cpu_cnt += node_ptr->config_ptr->cpus;
1081
cpu_cnt += node_ptr->cpus;
1083
resv_ptr->cpu_cnt = cpu_cnt;
1086
/* Create a resource reservation */
1087
extern int create_resv(resv_desc_msg_t *resv_desc_ptr)
1089
int i, rc = SLURM_SUCCESS;
1090
time_t now = time(NULL);
1091
struct part_record *part_ptr = NULL;
1092
bitstr_t *node_bitmap = NULL;
1093
slurmctld_resv_t *resv_ptr;
1094
int account_cnt = 0, user_cnt = 0;
1095
char **account_list = NULL;
1096
uid_t *user_list = NULL;
1097
char start_time[32], end_time[32];
1100
resv_list = list_create(_del_resv_rec);
1101
_dump_resv_req(resv_desc_ptr, "create_resv");
1103
/* Validate the request */
1104
if (resv_desc_ptr->start_time != (time_t) NO_VAL) {
1105
if (resv_desc_ptr->start_time < (now - 60)) {
1106
info("Reservation request has invalid start time");
1107
rc = ESLURM_INVALID_TIME_VALUE;
1111
resv_desc_ptr->start_time = now;
1113
if (resv_desc_ptr->end_time != (time_t) NO_VAL) {
1114
if (resv_desc_ptr->end_time < (now - 60)) {
1115
info("Reservation request has invalid end time");
1116
rc = ESLURM_INVALID_TIME_VALUE;
1119
} else if (resv_desc_ptr->duration) {
1120
resv_desc_ptr->end_time = resv_desc_ptr->start_time +
1121
(resv_desc_ptr->duration * 60);
1123
resv_desc_ptr->end_time = INFINITE;
1124
if (resv_desc_ptr->flags == (uint16_t) NO_VAL)
1125
resv_desc_ptr->flags = 0;
1127
resv_desc_ptr->flags &= RESERVE_FLAG_MAINT |
1128
RESERVE_FLAG_IGN_JOBS |
1129
RESERVE_FLAG_DAILY |
1130
RESERVE_FLAG_WEEKLY;
1132
if (resv_desc_ptr->partition) {
1133
part_ptr = find_part_record(resv_desc_ptr->partition);
1135
info("Reservation request has invalid partition %s",
1136
resv_desc_ptr->partition);
1137
rc = ESLURM_INVALID_PARTITION_NAME;
1141
if ((resv_desc_ptr->accounts == NULL) &&
1142
(resv_desc_ptr->users == NULL)) {
1143
info("Reservation request lacks users or accounts");
1144
rc = ESLURM_INVALID_BANK_ACCOUNT;
1147
if (resv_desc_ptr->accounts) {
1148
rc = _build_account_list(resv_desc_ptr->accounts,
1149
&account_cnt, &account_list);
1153
if (resv_desc_ptr->users) {
1154
rc = _build_uid_list(resv_desc_ptr->users,
1155
&user_cnt, &user_list);
1159
if (resv_desc_ptr->node_list) {
1160
resv_desc_ptr->flags |= RESERVE_FLAG_SPEC_NODES;
1161
if (strcasecmp(resv_desc_ptr->node_list, "ALL") == 0) {
1162
node_bitmap = bit_alloc(node_record_count);
1163
bit_nset(node_bitmap, 0, (node_record_count - 1));
1164
} else if (node_name2bitmap(resv_desc_ptr->node_list,
1165
false, &node_bitmap)) {
1166
rc = ESLURM_INVALID_NODE_NAME;
1169
if (resv_desc_ptr->node_cnt == NO_VAL)
1170
resv_desc_ptr->node_cnt = 0;
1171
if (_resv_overlap(resv_desc_ptr->start_time,
1172
resv_desc_ptr->end_time,
1173
resv_desc_ptr->flags, node_bitmap,
1175
info("Reservation request overlaps another");
1176
rc = ESLURM_RESERVATION_OVERLAP;
1179
resv_desc_ptr->node_cnt = bit_set_count(node_bitmap);
1180
if (_job_overlap(resv_desc_ptr->start_time,
1181
resv_desc_ptr->flags, node_bitmap)) {
1182
info("Reservation request overlaps jobs");
1183
rc = ESLURM_NODES_BUSY;
1186
} else if (resv_desc_ptr->node_cnt == NO_VAL) {
1187
info("Reservation request lacks node specification");
1188
rc = ESLURM_INVALID_NODE_NAME;
1190
} else if ((rc = _select_nodes(resv_desc_ptr, &part_ptr, &node_bitmap))
1195
_generate_resv_id();
1196
if (resv_desc_ptr->name) {
1197
resv_ptr = (slurmctld_resv_t *) list_find_first (resv_list,
1198
_find_resv_name, resv_desc_ptr->name);
1200
info("Reservation request name duplication (%s)",
1201
resv_desc_ptr->name);
1202
rc = ESLURM_RESERVATION_INVALID;
1207
_generate_resv_name(resv_desc_ptr);
1208
resv_ptr = (slurmctld_resv_t *)
1209
list_find_first (resv_list,
1210
_find_resv_name, resv_desc_ptr->name);
1213
_generate_resv_id(); /* makes new suffix */
1214
/* Same as previously created name, retry */
1218
/* Create a new reservation record */
1219
resv_ptr = xmalloc(sizeof(slurmctld_resv_t));
1220
resv_ptr->accounts = resv_desc_ptr->accounts;
1221
resv_desc_ptr->accounts = NULL; /* Nothing left to free */
1222
resv_ptr->account_cnt = account_cnt;
1223
resv_ptr->account_list = account_list;
1224
resv_ptr->duration = resv_desc_ptr->duration;
1225
resv_ptr->end_time = resv_desc_ptr->end_time;
1226
resv_ptr->features = resv_desc_ptr->features;
1227
resv_desc_ptr->features = NULL; /* Nothing left to free */
1228
resv_ptr->resv_id = top_suffix;
1229
xassert(resv_ptr->magic = RESV_MAGIC); /* Sets value */
1230
resv_ptr->name = xstrdup(resv_desc_ptr->name);
1231
resv_ptr->node_cnt = resv_desc_ptr->node_cnt;
1232
resv_ptr->node_list = resv_desc_ptr->node_list;
1233
resv_desc_ptr->node_list = NULL; /* Nothing left to free */
1234
resv_ptr->node_bitmap = node_bitmap; /* May be unset */
1235
resv_ptr->partition = resv_desc_ptr->partition;
1236
resv_desc_ptr->partition = NULL; /* Nothing left to free */
1237
resv_ptr->part_ptr = part_ptr;
1238
resv_ptr->start_time = resv_desc_ptr->start_time;
1239
resv_ptr->start_time_first = resv_ptr->start_time;
1240
resv_ptr->start_time_prev = resv_ptr->start_time;
1241
resv_ptr->flags = resv_desc_ptr->flags;
1242
resv_ptr->users = resv_desc_ptr->users;
1243
resv_ptr->user_cnt = user_cnt;
1244
resv_ptr->user_list = user_list;
1245
resv_desc_ptr->users = NULL; /* Nothing left to free */
1246
_set_cpu_cnt(resv_ptr);
1247
if((rc = _set_assoc_list(resv_ptr)) != SLURM_SUCCESS)
1250
/* This needs to be done after all other setup is done. */
1251
_post_resv_create(resv_ptr);
1253
slurm_make_time_str(&resv_ptr->start_time, start_time,
1254
sizeof(start_time));
1255
slurm_make_time_str(&resv_ptr->end_time, end_time, sizeof(end_time));
1256
info("Created reservation %s accounts=%s users=%s "
1257
"nodes=%s start=%s end=%s",
1258
resv_ptr->name, resv_ptr->accounts, resv_ptr->users,
1259
resv_ptr->node_list, start_time, end_time);
1260
list_append(resv_list, resv_ptr);
1261
last_resv_update = now;
1262
schedule_resv_save();
1264
return SLURM_SUCCESS;
1267
for (i=0; i<account_cnt; i++)
1268
xfree(account_list[i]);
1269
xfree(account_list);
1271
bit_free(node_bitmap);
1276
/* Purge all reservation data structures */
1277
extern void resv_fini(void)
1280
list_destroy(resv_list);
1281
resv_list = (List) NULL;
1285
/* Update an exiting resource reservation */
1286
extern int update_resv(resv_desc_msg_t *resv_desc_ptr)
1288
time_t now = time(NULL);
1289
slurmctld_resv_t *resv_backup, *resv_ptr;
1290
int error_code = SLURM_SUCCESS, rc;
1291
char start_time[32], end_time[32];
1294
resv_list = list_create(_del_resv_rec);
1295
_dump_resv_req(resv_desc_ptr, "update_resv");
1297
/* Find the specified reservation */
1298
if ((resv_desc_ptr->name == NULL))
1299
return ESLURM_RESERVATION_INVALID;
1300
resv_ptr = (slurmctld_resv_t *) list_find_first (resv_list,
1301
_find_resv_name, resv_desc_ptr->name);
1303
return ESLURM_RESERVATION_INVALID;
1305
/* Make backup to restore state in case of failure */
1306
resv_backup = _copy_resv(resv_ptr);
1308
/* Process the request */
1309
if (resv_desc_ptr->flags != (uint16_t) NO_VAL) {
1310
if (resv_desc_ptr->flags & RESERVE_FLAG_MAINT)
1311
resv_ptr->flags |= RESERVE_FLAG_MAINT;
1312
if (resv_desc_ptr->flags & RESERVE_FLAG_NO_MAINT)
1313
resv_ptr->flags &= (~RESERVE_FLAG_MAINT);
1314
if (resv_desc_ptr->flags & RESERVE_FLAG_IGN_JOBS)
1315
resv_ptr->flags |= RESERVE_FLAG_IGN_JOBS;
1316
if (resv_desc_ptr->flags & RESERVE_FLAG_NO_IGN_JOB)
1317
resv_ptr->flags &= (~RESERVE_FLAG_IGN_JOBS);
1318
if (resv_desc_ptr->flags & RESERVE_FLAG_DAILY)
1319
resv_ptr->flags |= RESERVE_FLAG_DAILY;
1320
if (resv_desc_ptr->flags & RESERVE_FLAG_NO_DAILY)
1321
resv_ptr->flags &= (~RESERVE_FLAG_DAILY);
1322
if (resv_desc_ptr->flags & RESERVE_FLAG_WEEKLY)
1323
resv_ptr->flags |= RESERVE_FLAG_WEEKLY;
1324
if (resv_desc_ptr->flags & RESERVE_FLAG_NO_WEEKLY)
1325
resv_ptr->flags &= (~RESERVE_FLAG_WEEKLY);
1327
if (resv_desc_ptr->partition && (resv_desc_ptr->partition[0] == '\0')) {
1328
/* Clear the partition */
1329
xfree(resv_desc_ptr->partition);
1330
xfree(resv_ptr->partition);
1331
resv_ptr->part_ptr = NULL;
1333
if (resv_desc_ptr->partition) {
1334
struct part_record *part_ptr = NULL;
1335
part_ptr = find_part_record(resv_desc_ptr->partition);
1337
info("Reservation request has invalid partition (%s)",
1338
resv_desc_ptr->partition);
1339
error_code = ESLURM_INVALID_PARTITION_NAME;
1340
goto update_failure;
1342
xfree(resv_ptr->partition);
1343
resv_ptr->partition = resv_desc_ptr->partition;
1344
resv_desc_ptr->partition = NULL; /* Nothing left to free */
1345
resv_ptr->part_ptr = part_ptr;
1347
if (resv_desc_ptr->accounts) {
1348
rc = _update_account_list(resv_ptr, resv_desc_ptr->accounts);
1351
goto update_failure;
1354
if (resv_desc_ptr->features && (resv_desc_ptr->features[0] == '\0')) {
1355
xfree(resv_desc_ptr->features);
1356
xfree(resv_ptr->features);
1358
if (resv_desc_ptr->features) {
1359
xfree(resv_ptr->features);
1360
resv_ptr->features = resv_desc_ptr->features;
1361
resv_desc_ptr->features = NULL; /* Nothing left to free */
1363
if (resv_desc_ptr->users) {
1364
rc = _update_uid_list(resv_ptr, resv_desc_ptr->users);
1367
goto update_failure;
1370
if ((resv_ptr->users == NULL) && (resv_ptr->accounts == NULL)) {
1371
info("Reservation request lacks users or accounts");
1372
error_code = ESLURM_INVALID_BANK_ACCOUNT;
1373
goto update_failure;
1376
if (resv_desc_ptr->start_time != (time_t) NO_VAL) {
1377
if (resv_desc_ptr->start_time < (now - 60)) {
1378
info("Reservation request has invalid start time");
1379
error_code = ESLURM_INVALID_TIME_VALUE;
1380
goto update_failure;
1382
resv_ptr->start_time_prev = resv_ptr->start_time;
1383
resv_ptr->start_time = resv_desc_ptr->start_time;
1384
resv_ptr->start_time_first = resv_desc_ptr->start_time;
1385
if(resv_ptr->duration) {
1386
resv_ptr->end_time = resv_ptr->start_time_first +
1387
(resv_ptr->duration * 60);
1390
if (resv_desc_ptr->end_time != (time_t) NO_VAL) {
1391
if (resv_desc_ptr->end_time < (now - 60)) {
1392
info("Reservation request has invalid end time");
1393
error_code = ESLURM_INVALID_TIME_VALUE;
1394
goto update_failure;
1396
resv_ptr->end_time = resv_desc_ptr->end_time;
1397
resv_ptr->duration = 0;
1399
if (resv_desc_ptr->duration != NO_VAL) {
1400
resv_ptr->duration = resv_desc_ptr->duration;
1401
resv_ptr->end_time = resv_ptr->start_time_first +
1402
(resv_desc_ptr->duration * 60);
1405
if (resv_ptr->start_time >= resv_ptr->end_time) {
1406
error_code = ESLURM_INVALID_TIME_VALUE;
1407
goto update_failure;
1409
if (resv_desc_ptr->node_list &&
1410
(resv_desc_ptr->node_list[0] == '\0')) { /* Clear bitmap */
1411
resv_ptr->flags &= (~RESERVE_FLAG_SPEC_NODES);
1412
xfree(resv_desc_ptr->node_list);
1413
xfree(resv_ptr->node_list);
1414
FREE_NULL_BITMAP(resv_ptr->node_bitmap);
1415
resv_ptr->node_bitmap = bit_alloc(node_record_count);
1416
if (resv_desc_ptr->node_cnt == NO_VAL)
1417
resv_desc_ptr->node_cnt = resv_ptr->node_cnt;
1418
resv_ptr->node_cnt = 0;
1420
if (resv_desc_ptr->node_list) { /* Change bitmap last */
1421
bitstr_t *node_bitmap;
1422
resv_ptr->flags |= RESERVE_FLAG_SPEC_NODES;
1423
if (strcasecmp(resv_desc_ptr->node_list, "ALL") == 0) {
1424
node_bitmap = bit_alloc(node_record_count);
1425
bit_nset(node_bitmap, 0, (node_record_count - 1));
1426
} else if (node_name2bitmap(resv_desc_ptr->node_list,
1427
false, &node_bitmap)) {
1428
error_code = ESLURM_INVALID_NODE_NAME;
1429
goto update_failure;
1431
xfree(resv_ptr->node_list);
1432
resv_ptr->node_list = resv_desc_ptr->node_list;
1433
resv_desc_ptr->node_list = NULL; /* Nothing left to free */
1434
FREE_NULL_BITMAP(resv_ptr->node_bitmap);
1435
resv_ptr->node_bitmap = node_bitmap;
1436
resv_ptr->node_cnt = bit_set_count(resv_ptr->node_bitmap);
1438
if (resv_desc_ptr->node_cnt != NO_VAL) {
1439
rc = _resize_resv(resv_ptr, resv_desc_ptr->node_cnt);
1442
goto update_failure;
1444
resv_ptr->node_cnt = bit_set_count(resv_ptr->node_bitmap);
1446
if (_resv_overlap(resv_ptr->start_time, resv_ptr->end_time,
1447
resv_ptr->flags, resv_ptr->node_bitmap, resv_ptr)) {
1448
info("Reservation request overlaps another");
1449
error_code = ESLURM_RESERVATION_OVERLAP;
1450
goto update_failure;
1452
if (_job_overlap(resv_ptr->start_time, resv_ptr->flags,
1453
resv_ptr->node_bitmap)) {
1454
info("Reservation request overlaps jobs");
1455
error_code = ESLURM_NODES_BUSY;
1456
goto update_failure;
1458
_set_cpu_cnt(resv_ptr);
1459
if((error_code = _set_assoc_list(resv_ptr)) != SLURM_SUCCESS)
1460
goto update_failure;
1462
slurm_make_time_str(&resv_ptr->start_time, start_time,
1463
sizeof(start_time));
1464
slurm_make_time_str(&resv_ptr->end_time, end_time, sizeof(end_time));
1465
info("Update reservation %s accounts=%s users=%s "
1466
"nodes=%s start=%s end=%s",
1467
resv_ptr->name, resv_ptr->accounts, resv_ptr->users,
1468
resv_ptr->node_list, start_time, end_time);
1470
_post_resv_update(resv_ptr, resv_backup);
1471
_del_resv_rec(resv_backup);
1472
last_resv_update = now;
1473
schedule_resv_save();
1477
_swap_resv(resv_backup, resv_ptr);
1478
_del_resv_rec(resv_backup);
1482
/* Determine if a running or pending job is using a reservation */
1483
static bool _is_resv_used(slurmctld_resv_t *resv_ptr)
1485
ListIterator job_iterator;
1486
struct job_record *job_ptr;
1489
job_iterator = list_iterator_create(job_list);
1490
while ((job_ptr = (struct job_record *) list_next(job_iterator))) {
1491
if ((!IS_JOB_FINISHED(job_ptr)) &&
1492
(job_ptr->resv_id == resv_ptr->resv_id)) {
1497
list_iterator_destroy(job_iterator);
1502
/* Clear the reservation points for jobs referencing a defunct reservation */
1503
static void _clear_job_resv(slurmctld_resv_t *resv_ptr)
1505
ListIterator job_iterator;
1506
struct job_record *job_ptr;
1508
job_iterator = list_iterator_create(job_list);
1509
while ((job_ptr = (struct job_record *) list_next(job_iterator))) {
1510
if (job_ptr->resv_ptr != resv_ptr)
1512
if (!IS_JOB_FINISHED(job_ptr)) {
1513
info("Job %u linked to defunct reservation %s, "
1514
"clearing that reservation",
1515
job_ptr->job_id, job_ptr->resv_name);
1517
job_ptr->resv_id = 0;
1518
job_ptr->resv_ptr = NULL;
1519
xfree(job_ptr->resv_name);
1521
list_iterator_destroy(job_iterator);
1524
/* Delete an exiting resource reservation */
1525
extern int delete_resv(reservation_name_msg_t *resv_desc_ptr)
1528
slurmctld_resv_t *resv_ptr;
1529
int rc = SLURM_SUCCESS;
1530
time_t now = time(NULL);
1533
info("delete_resv: Name=%s", resv_desc_ptr->name);
1536
iter = list_iterator_create(resv_list);
1538
fatal("malloc: list_iterator_create");
1539
while ((resv_ptr = (slurmctld_resv_t *) list_next(iter))) {
1540
if (strcmp(resv_ptr->name, resv_desc_ptr->name))
1542
if (_is_resv_used(resv_ptr)) {
1543
rc = ESLURM_RESERVATION_BUSY;
1547
if (resv_ptr->maint_set_node) {
1548
resv_ptr->maint_set_node = false;
1549
_set_nodes_maint(resv_ptr, now);
1550
last_node_update = now;
1553
rc = _post_resv_delete(resv_ptr);
1554
_clear_job_resv(resv_ptr);
1555
list_delete_item(iter);
1558
list_iterator_destroy(iter);
1561
info("Reservation %s not found for deletion",
1562
resv_desc_ptr->name);
1563
return ESLURM_RESERVATION_INVALID;
1566
last_resv_update = time(NULL);
1567
schedule_resv_save();
1571
/* Dump the reservation records to a buffer */
1572
extern void show_resv(char **buffer_ptr, int *buffer_size, uid_t uid)
1575
slurmctld_resv_t *resv_ptr;
1576
uint32_t resv_packed;
1579
time_t now = time(NULL);
1584
resv_list = list_create(_del_resv_rec);
1586
buffer_ptr[0] = NULL;
1589
buffer = init_buf(BUF_SIZE);
1591
/* write header: version and time */
1593
pack32(resv_packed, buffer);
1594
pack_time(now, buffer);
1596
/* write individual reservation records */
1597
iter = list_iterator_create(resv_list);
1599
fatal("malloc: list_iterator_create");
1600
while ((resv_ptr = (slurmctld_resv_t *) list_next(iter))) {
1601
if ((slurmctld_conf.private_data & PRIVATE_DATA_RESERVATIONS)
1602
&& !validate_super_user(uid)) {
1604
for(i=0; i<resv_ptr->user_cnt; i++) {
1605
if(resv_ptr->user_list[i] == uid)
1609
if(i >= resv_ptr->user_cnt)
1613
_pack_resv(resv_ptr, buffer, false);
1616
list_iterator_destroy(iter);
1618
/* put the real record count in the message body header */
1619
tmp_offset = get_buf_offset(buffer);
1620
set_buf_offset(buffer, 0);
1621
pack32(resv_packed, buffer);
1622
set_buf_offset(buffer, tmp_offset);
1624
*buffer_size = get_buf_offset(buffer);
1625
buffer_ptr[0] = xfer_buf_data(buffer);
1626
END_TIMER2("show_resv");
1629
/* Save the state of all reservations to file */
1630
extern int dump_all_resv_state(void)
1633
slurmctld_resv_t *resv_ptr;
1634
int error_code = 0, log_fd;
1635
char *old_file, *new_file, *reg_file;
1636
/* Locks: Read node */
1637
slurmctld_lock_t resv_read_lock =
1638
{ READ_LOCK, NO_LOCK, READ_LOCK, NO_LOCK };
1639
Buf buffer = init_buf(BUF_SIZE);
1644
resv_list = list_create(_del_resv_rec);
1646
/* write header: time */
1647
packstr(RESV_STATE_VERSION, buffer);
1648
pack_time(time(NULL), buffer);
1649
pack32(top_suffix, buffer);
1651
/* write reservation records to buffer */
1652
lock_slurmctld(resv_read_lock);
1653
iter = list_iterator_create(resv_list);
1655
fatal("malloc: list_iterator_create");
1656
while ((resv_ptr = (slurmctld_resv_t *) list_next(iter)))
1657
_pack_resv(resv_ptr, buffer, true);
1658
list_iterator_destroy(iter);
1659
/* Maintain config read lock until we copy state_save_location *\
1660
\* unlock_slurmctld(resv_read_lock); - see below */
1662
/* write the buffer to file */
1663
old_file = xstrdup(slurmctld_conf.state_save_location);
1664
xstrcat(old_file, "/resv_state.old");
1665
reg_file = xstrdup(slurmctld_conf.state_save_location);
1666
xstrcat(reg_file, "/resv_state");
1667
new_file = xstrdup(slurmctld_conf.state_save_location);
1668
xstrcat(new_file, "/resv_state.new");
1669
unlock_slurmctld(resv_read_lock);
1671
log_fd = creat(new_file, 0600);
1673
error("Can't save state, error creating file %s, %m",
1677
int pos = 0, nwrite = get_buf_offset(buffer), amount;
1678
char *data = (char *)get_buf_data(buffer);
1680
while (nwrite > 0) {
1681
amount = write(log_fd, &data[pos], nwrite);
1682
if ((amount < 0) && (errno != EINTR)) {
1683
error("Error writing file %s, %m", new_file);
1694
(void) unlink(new_file);
1695
else { /* file shuffle */
1696
(void) unlink(old_file);
1697
if(link(reg_file, old_file))
1698
debug4("unable to create link for %s -> %s: %m",
1699
reg_file, old_file);
1700
(void) unlink(reg_file);
1701
if(link(new_file, reg_file))
1702
debug4("unable to create link for %s -> %s: %m",
1703
new_file, reg_file);
1704
(void) unlink(new_file);
1709
unlock_state_files();
1712
END_TIMER2("dump_all_resv_state");
1716
/* Validate one reservation record, return true if good */
1717
static bool _validate_one_reservation(slurmctld_resv_t *resv_ptr)
1719
if ((resv_ptr->name == NULL) || (resv_ptr->name[0] == '\0')) {
1720
error("Read reservation without name");
1723
if (resv_ptr->partition) {
1724
struct part_record *part_ptr = NULL;
1725
part_ptr = find_part_record(resv_ptr->partition);
1727
error("Reservation %s has invalid partition (%s)",
1728
resv_ptr->name, resv_ptr->partition);
1731
resv_ptr->part_ptr = part_ptr;
1733
if (resv_ptr->accounts) {
1734
int account_cnt = 0, i, rc;
1735
char **account_list;
1736
rc = _build_account_list(resv_ptr->accounts,
1737
&account_cnt, &account_list);
1739
error("Reservation %s has invalid accounts (%s)",
1740
resv_ptr->name, resv_ptr->accounts);
1743
for (i=0; i<resv_ptr->account_cnt; i++)
1744
xfree(resv_ptr->account_list[i]);
1745
xfree(resv_ptr->account_list);
1746
resv_ptr->account_cnt = account_cnt;
1747
resv_ptr->account_list = account_list;
1749
if (resv_ptr->users) {
1750
int rc, user_cnt = 0;
1751
uid_t *user_list = NULL;
1752
rc = _build_uid_list(resv_ptr->users,
1753
&user_cnt, &user_list);
1755
error("Reservation %s has invalid users (%s)",
1756
resv_ptr->name, resv_ptr->users);
1759
xfree(resv_ptr->user_list);
1760
resv_ptr->user_cnt = user_cnt;
1761
resv_ptr->user_list = user_list;
1763
if (resv_ptr->node_list) { /* Change bitmap last */
1764
bitstr_t *node_bitmap;
1765
if (strcasecmp(resv_ptr->node_list, "ALL") == 0) {
1766
node_bitmap = bit_alloc(node_record_count);
1767
bit_nset(node_bitmap, 0, (node_record_count - 1));
1768
} else if (node_name2bitmap(resv_ptr->node_list,
1769
false, &node_bitmap)) {
1770
error("Reservation %s has invalid nodes (%s)",
1771
resv_ptr->name, resv_ptr->node_list);
1774
FREE_NULL_BITMAP(resv_ptr->node_bitmap);
1775
resv_ptr->node_bitmap = node_bitmap;
1781
* Validate all reservation records, reset bitmaps, etc.
1782
* Purge any invalid reservation.
1784
static void _validate_all_reservations(void)
1787
slurmctld_resv_t *resv_ptr;
1788
struct job_record *job_ptr;
1792
iter = list_iterator_create(resv_list);
1794
fatal("malloc: list_iterator_create");
1795
while ((resv_ptr = (slurmctld_resv_t *) list_next(iter))) {
1796
if (!_validate_one_reservation(resv_ptr)) {
1797
error("Purging invalid reservation record %s",
1799
_post_resv_delete(resv_ptr);
1800
_clear_job_resv(resv_ptr);
1801
list_delete_item(iter);
1803
_set_assoc_list(resv_ptr);
1804
tmp = strrchr(resv_ptr->name, '_');
1806
res_num = atoi(tmp + 1);
1807
top_suffix = MAX(top_suffix, res_num);
1811
list_iterator_destroy(iter);
1813
/* Validate all job reservation pointers */
1814
iter = list_iterator_create(job_list);
1815
while ((job_ptr = (struct job_record *) list_next(iter))) {
1816
if (job_ptr->resv_name == NULL)
1819
if ((job_ptr->resv_ptr == NULL) ||
1820
(job_ptr->resv_ptr->magic != RESV_MAGIC)) {
1821
job_ptr->resv_ptr = (slurmctld_resv_t *)
1822
list_find_first(resv_list,
1824
job_ptr->resv_name);
1826
if (!job_ptr->resv_ptr) {
1827
error("JobId %u linked to defunct reservation %s",
1828
job_ptr->job_id, job_ptr->resv_name);
1829
job_ptr->resv_id = 0;
1830
xfree(job_ptr->resv_name);
1833
list_iterator_destroy(iter);
1838
* Validate the the reserved nodes are not DOWN or DRAINED and
1839
* select different nodes as needed.
1841
static void _validate_node_choice(slurmctld_resv_t *resv_ptr)
1843
bitstr_t *tmp_bitmap = NULL;
1845
resv_desc_msg_t resv_desc;
1847
if (resv_ptr->flags & RESERVE_FLAG_SPEC_NODES)
1850
i = bit_overlap(resv_ptr->node_bitmap, avail_node_bitmap);
1851
if (i == resv_ptr->node_cnt)
1854
/* Reservation includes DOWN, DRAINED/DRAINING, FAILING or
1855
* NO_RESPOND nodes. Generate new request using _select_nodes()
1856
* in attempt to replace this nodes */
1857
memset(&resv_desc, 0, sizeof(resv_desc_msg_t));
1858
resv_desc.start_time = resv_ptr->start_time;
1859
resv_desc.end_time = resv_ptr->end_time;
1860
resv_desc.features = resv_ptr->features;
1861
resv_desc.node_cnt = resv_ptr->node_cnt - i;
1862
i = _select_nodes(&resv_desc, &resv_ptr->part_ptr, &tmp_bitmap);
1863
xfree(resv_desc.node_list);
1864
xfree(resv_desc.partition);
1865
if (i == SLURM_SUCCESS) {
1866
bit_and(resv_ptr->node_bitmap, avail_node_bitmap);
1867
bit_or(resv_ptr->node_bitmap, tmp_bitmap);
1868
bit_free(tmp_bitmap);
1869
xfree(resv_ptr->node_list);
1870
resv_ptr->node_list = bitmap2node_name(resv_ptr->node_bitmap);
1871
info("modified reservation %s due to unusable nodes, "
1872
"new nodes: %s", resv_ptr->name, resv_ptr->node_list);
1873
} else if (difftime(resv_ptr->start_time, time(NULL)) < 600) {
1874
info("reservation %s contains unusable nodes, "
1875
"can't reallocate now", resv_ptr->name);
1877
debug("reservation %s contains unusable nodes, "
1878
"can't reallocate now", resv_ptr->name);
1883
* Load the reservation state from file, recover on slurmctld restart.
1884
* Reset reservation pointers for all jobs.
1885
* Execute this after loading the configuration file data.
1886
* IN recover - 0 = validate current reservations ONLY if already recovered,
1887
* otherwise recover from disk
1888
* 1+ = recover all reservation state from disk
1889
* RET SLURM_SUCCESS or error code
1890
* NOTE: READ lock_slurmctld config before entry
1892
extern int load_all_resv_state(int recover)
1894
char *state_file, *data = NULL, *ver_str = NULL;
1896
uint32_t data_size = 0, uint32_tmp;
1897
int data_allocated, data_read = 0, error_code = 0, state_fd;
1899
slurmctld_resv_t *resv_ptr = NULL;
1901
last_resv_update = time(NULL);
1902
if ((recover == 0) && resv_list) {
1903
_validate_all_reservations();
1904
return SLURM_SUCCESS;
1907
/* Read state file and validate */
1909
list_flush(resv_list);
1911
resv_list = list_create(_del_resv_rec);
1914
state_file = xstrdup(slurmctld_conf.state_save_location);
1915
xstrcat(state_file, "/resv_state");
1917
state_fd = open(state_file, O_RDONLY);
1919
info("No reservation state file (%s) to recover",
1921
error_code = ENOENT;
1923
data_allocated = BUF_SIZE;
1924
data = xmalloc(data_allocated);
1926
data_read = read(state_fd, &data[data_size],
1928
if (data_read < 0) {
1932
error("Read error on %s: %m",
1936
} else if (data_read == 0) /* eof */
1938
data_size += data_read;
1939
data_allocated += data_read;
1940
xrealloc(data, data_allocated);
1945
unlock_state_files();
1947
buffer = create_buf(data, data_size);
1949
safe_unpackstr_xmalloc( &ver_str, &uint32_tmp, buffer);
1950
debug3("Version string in resv_state header is %s", ver_str);
1951
if ((!ver_str) || (strcmp(ver_str, RESV_STATE_VERSION) != 0)) {
1952
error("************************************************************");
1953
error("Can not recover reservation state, data version incompatable");
1954
error("************************************************************");
1957
schedule_resv_save(); /* Schedule save with new format */
1961
safe_unpack_time(&now, buffer);
1962
safe_unpack32(&top_suffix, buffer);
1964
while (remaining_buf(buffer) > 0) {
1965
resv_ptr = xmalloc(sizeof(slurmctld_resv_t));
1966
xassert(resv_ptr->magic = RESV_MAGIC); /* Sets value */
1967
safe_unpackstr_xmalloc(&resv_ptr->accounts,
1968
&uint32_tmp, buffer);
1969
safe_unpack_time(&resv_ptr->end_time, buffer);
1970
safe_unpackstr_xmalloc(&resv_ptr->features,
1971
&uint32_tmp, buffer);
1972
safe_unpackstr_xmalloc(&resv_ptr->name, &uint32_tmp, buffer);
1973
safe_unpack32(&resv_ptr->node_cnt, buffer);
1974
safe_unpackstr_xmalloc(&resv_ptr->node_list,
1975
&uint32_tmp, buffer);
1976
safe_unpackstr_xmalloc(&resv_ptr->partition,
1977
&uint32_tmp, buffer);
1978
safe_unpack_time(&resv_ptr->start_time_first, buffer);
1979
safe_unpack16(&resv_ptr->flags, buffer);
1980
safe_unpackstr_xmalloc(&resv_ptr->users,&uint32_tmp, buffer);
1982
/* Fields saved for internal use only (save state) */
1983
safe_unpackstr_xmalloc(&resv_ptr->assoc_list,
1984
&uint32_tmp, buffer);
1985
safe_unpack32(&resv_ptr->cpu_cnt, buffer);
1986
safe_unpack32(&resv_ptr->resv_id, buffer);
1987
safe_unpack_time(&resv_ptr->start_time_prev, buffer);
1988
safe_unpack_time(&resv_ptr->start_time, buffer);
1989
safe_unpack32(&resv_ptr->duration, buffer);
1991
list_append(resv_list, resv_ptr);
1992
info("Recovered state of reservation %s", resv_ptr->name);
1995
_validate_all_reservations();
1996
info("Recovered state of %d reservations", list_count(resv_list));
2001
_validate_all_reservations();
2003
error("Incomplete reservation data checkpoint file");
2004
info("Recovered state of %d reservations", list_count(resv_list));
2006
_del_resv_rec(resv_ptr);
2012
* Determine if a job request can use the specified reservations
2013
* IN/OUT job_ptr - job to validate, set its resv_id and resv_flags
2014
* RET SLURM_SUCCESS or error code (not found or access denied)
2016
extern int validate_job_resv(struct job_record *job_ptr)
2018
slurmctld_resv_t *resv_ptr = NULL;
2023
if ((job_ptr->resv_name == NULL) || (job_ptr->resv_name[0] == '\0')) {
2024
xfree(job_ptr->resv_name);
2025
job_ptr->resv_id = 0;
2026
job_ptr->resv_flags = 0;
2027
return SLURM_SUCCESS;
2031
return ESLURM_RESERVATION_INVALID;
2033
/* Find the named reservation */
2034
resv_ptr = (slurmctld_resv_t *) list_find_first (resv_list,
2035
_find_resv_name, job_ptr->resv_name);
2037
info("Reservation name not found (%s)", job_ptr->resv_name);
2038
return ESLURM_RESERVATION_INVALID;
2041
rc = _valid_job_access_resv(job_ptr, resv_ptr);
2042
if (rc == SLURM_SUCCESS) {
2043
job_ptr->resv_id = resv_ptr->resv_id;
2044
job_ptr->resv_flags = resv_ptr->flags;
2049
static int _resize_resv(slurmctld_resv_t *resv_ptr, uint32_t node_cnt)
2051
bitstr_t *tmp1_bitmap = NULL, *tmp2_bitmap = NULL;
2052
int delta_node_cnt, i;
2053
resv_desc_msg_t resv_desc;
2055
delta_node_cnt = resv_ptr->node_cnt - node_cnt;
2056
if (delta_node_cnt == 0) /* Already correct node count */
2057
return SLURM_SUCCESS;
2059
if (delta_node_cnt > 0) { /* Must decrease node count */
2060
if (bit_overlap(resv_ptr->node_bitmap, idle_node_bitmap)) {
2061
/* Start by eliminating idle nodes from reservation */
2062
tmp1_bitmap = bit_copy(resv_ptr->node_bitmap);
2063
bit_and(tmp1_bitmap, idle_node_bitmap);
2064
i = bit_set_count(tmp1_bitmap);
2065
if (i > delta_node_cnt) {
2066
tmp2_bitmap = bit_pick_cnt(tmp1_bitmap,
2068
bit_not(tmp2_bitmap);
2069
bit_and(resv_ptr->node_bitmap, tmp2_bitmap);
2070
FREE_NULL_BITMAP(tmp1_bitmap);
2071
FREE_NULL_BITMAP(tmp2_bitmap);
2072
delta_node_cnt = 0; /* ALL DONE */
2074
bit_not(idle_node_bitmap);
2075
bit_and(resv_ptr->node_bitmap,
2077
bit_not(idle_node_bitmap);
2078
resv_ptr->node_cnt = bit_set_count(
2079
resv_ptr->node_bitmap);
2080
delta_node_cnt = resv_ptr->node_cnt -
2083
FREE_NULL_BITMAP(tmp1_bitmap);
2085
if (delta_node_cnt > 0) {
2086
/* Now eliminate allocated nodes from reservation */
2087
tmp1_bitmap = bit_pick_cnt(resv_ptr->node_bitmap,
2089
bit_free(resv_ptr->node_bitmap);
2090
resv_ptr->node_bitmap = tmp1_bitmap;
2092
xfree(resv_ptr->node_list);
2093
resv_ptr->node_list = bitmap2node_name(resv_ptr->node_bitmap);
2094
resv_ptr->node_cnt = node_cnt;
2095
return SLURM_SUCCESS;
2098
/* Must increase node count. Make this look like new request so
2099
* we can use _select_nodes() for selecting the nodes */
2100
memset(&resv_desc, 0, sizeof(resv_desc_msg_t));
2101
resv_desc.start_time = resv_ptr->start_time;
2102
resv_desc.end_time = resv_ptr->end_time;
2103
resv_desc.features = resv_ptr->features;
2104
resv_desc.flags = resv_ptr->flags;
2105
resv_desc.node_cnt = 0 - delta_node_cnt;
2106
i = _select_nodes(&resv_desc, &resv_ptr->part_ptr, &tmp1_bitmap);
2107
xfree(resv_desc.node_list);
2108
xfree(resv_desc.partition);
2109
if (i == SLURM_SUCCESS) {
2110
bit_or(resv_ptr->node_bitmap, tmp1_bitmap);
2111
bit_free(tmp1_bitmap);
2112
xfree(resv_ptr->node_list);
2113
resv_ptr->node_list = bitmap2node_name(resv_ptr->node_bitmap);
2114
resv_ptr->node_cnt = node_cnt;
2119
/* Given a reservation create request, select appropriate nodes for use */
2120
static int _select_nodes(resv_desc_msg_t *resv_desc_ptr,
2121
struct part_record **part_ptr,
2122
bitstr_t **resv_bitmap)
2124
slurmctld_resv_t *resv_ptr;
2125
bitstr_t *node_bitmap;
2126
struct node_record *node_ptr;
2130
if (*part_ptr == NULL) {
2131
*part_ptr = default_part_loc;
2132
if (*part_ptr == NULL)
2133
return ESLURM_DEFAULT_PARTITION_NOT_SET;
2134
xfree(resv_desc_ptr->partition); /* should be no-op */
2135
resv_desc_ptr->partition = xstrdup((*part_ptr)->name);
2138
/* Start with all nodes in the partition */
2139
node_bitmap = bit_copy((*part_ptr)->node_bitmap);
2141
/* Don't use node already reserved */
2142
iter = list_iterator_create(resv_list);
2144
fatal("malloc: list_iterator_create");
2145
while ((resv_ptr = (slurmctld_resv_t *) list_next(iter))) {
2146
if ((resv_ptr->node_bitmap == NULL) ||
2147
(resv_ptr->start_time >= resv_desc_ptr->end_time) ||
2148
(resv_ptr->end_time <= resv_desc_ptr->start_time))
2150
bit_not(resv_ptr->node_bitmap);
2151
bit_and(node_bitmap, resv_ptr->node_bitmap);
2152
bit_not(resv_ptr->node_bitmap);
2154
list_iterator_destroy(iter);
2156
/* Satisfy feature specification */
2157
if (resv_desc_ptr->features) {
2158
/* FIXME: Just support a single feature name for now */
2159
node_ptr = node_record_table_ptr;
2160
for (i=0; i<node_record_count; i++, node_ptr++) {
2161
if (!bit_test(node_bitmap, i))
2163
if (!node_ptr->config_ptr->feature_array) {
2164
bit_clear(node_bitmap, i);
2167
for (j=0; node_ptr->config_ptr->feature_array[j]; j++){
2168
if (!strcmp(resv_desc_ptr->features,
2169
node_ptr->config_ptr->
2173
if (!node_ptr->config_ptr->feature_array[j]) {
2174
bit_clear(node_bitmap, i);
2180
if ((resv_desc_ptr->flags & RESERVE_FLAG_MAINT) == 0) {
2181
/* Nodes must be available */
2182
bit_and(node_bitmap, avail_node_bitmap);
2184
*resv_bitmap = NULL;
2185
if (bit_set_count(node_bitmap) < resv_desc_ptr->node_cnt)
2186
verbose("reservation requests more nodes than are available");
2187
else if ((i = bit_overlap(node_bitmap, idle_node_bitmap)) >=
2188
resv_desc_ptr->node_cnt) { /* Reserve idle nodes */
2189
bit_and(node_bitmap, idle_node_bitmap);
2190
*resv_bitmap = bit_pick_cnt(node_bitmap,
2191
resv_desc_ptr->node_cnt);
2192
} else if (resv_desc_ptr->flags & RESERVE_FLAG_IGN_JOBS) {
2193
/* Reserve nodes that are idle first, then busy nodes */
2194
*resv_bitmap = _pick_idle_nodes2(node_bitmap, resv_desc_ptr);
2196
/* Reserve nodes that are or will be idle.
2197
* This algorithm is slower than above logic that just
2198
* selects from the idle nodes. */
2199
*resv_bitmap = _pick_idle_nodes(node_bitmap, resv_desc_ptr);
2202
bit_free(node_bitmap);
2203
if (*resv_bitmap == NULL)
2204
return ESLURM_NODES_BUSY;
2205
resv_desc_ptr->node_list = bitmap2node_name(*resv_bitmap);
2206
return SLURM_SUCCESS;
2210
* Select nodes for a reservation to use
2211
* IN,OUT avail_nodes - nodes to choose from with proper features, partition
2212
* destructively modified by this function
2213
* IN resv_desc_ptr - reservation request
2214
* RET bitmap of selected nodes or NULL if request can not be satisfied
2216
static bitstr_t *_pick_idle_nodes(bitstr_t *avail_nodes,
2217
resv_desc_msg_t *resv_desc_ptr)
2219
ListIterator job_iterator;
2220
struct job_record *job_ptr;
2222
job_iterator = list_iterator_create(job_list);
2223
while ((job_ptr = (struct job_record *) list_next(job_iterator))) {
2224
if ((job_ptr->job_state != JOB_RUNNING) ||
2225
(job_ptr->end_time < resv_desc_ptr->start_time))
2227
bit_not(job_ptr->node_bitmap);
2228
bit_and(avail_nodes, job_ptr->node_bitmap);
2229
bit_not(job_ptr->node_bitmap);
2231
list_iterator_destroy(job_iterator);
2233
return bit_pick_cnt(avail_nodes, resv_desc_ptr->node_cnt);
2237
* Select nodes for a reservation to use
2238
* IN,OUT avail_nodes - nodes to choose from with proper features, partition
2239
* destructively modified by this function
2240
* IN resv_desc_ptr - reservation request
2241
* RET bitmap of selected nodes or NULL if request can not be satisfied
2243
static bitstr_t *_pick_idle_nodes2(bitstr_t *avail_nodes,
2244
resv_desc_msg_t *resv_desc_ptr)
2246
ListIterator job_iterator;
2247
struct job_record *job_ptr;
2248
bitstr_t *tmp_bitmap;
2250
job_iterator = list_iterator_create(job_list);
2251
while ((job_ptr = (struct job_record *) list_next(job_iterator))) {
2252
if ((job_ptr->job_state != JOB_RUNNING) ||
2253
(job_ptr->end_time < resv_desc_ptr->start_time))
2255
tmp_bitmap = bit_copy(avail_nodes);
2256
if (tmp_bitmap == NULL)
2257
fatal("malloc failure");
2258
bit_not(job_ptr->node_bitmap);
2259
bit_and(avail_nodes, job_ptr->node_bitmap);
2260
bit_not(job_ptr->node_bitmap);
2261
if (bit_set_count(avail_nodes) < resv_desc_ptr->node_cnt) {
2262
/* Removed too many nodes, put them back */
2263
bit_or(avail_nodes, tmp_bitmap);
2265
bit_free(tmp_bitmap);
2267
list_iterator_destroy(job_iterator);
2269
return bit_pick_cnt(avail_nodes, resv_desc_ptr->node_cnt);
2272
/* Determine if a job has access to a reservation
2273
* RET SLURM_SUCCESS if true, ESLURM_RESERVATION_ACCESS otherwise */
2274
static int _valid_job_access_resv(struct job_record *job_ptr,
2275
slurmctld_resv_t *resv_ptr)
2279
/* Determine if we have access */
2280
if (/*association_enforced*/ 0) {
2281
/* FIXME: add association checks
2282
if (job_ptr->assoc_id in reservation association list)
2283
return SLURM_SUCCESS;
2286
for (i=0; i<resv_ptr->user_cnt; i++) {
2287
if (job_ptr->user_id == resv_ptr->user_list[i])
2288
return SLURM_SUCCESS;
2290
for (i=0; (i<resv_ptr->account_cnt) && job_ptr->account; i++) {
2291
if (resv_ptr->account_list[i] &&
2292
(strcmp(job_ptr->account,
2293
resv_ptr->account_list[i]) == 0)) {
2294
return SLURM_SUCCESS;
2298
info("Security violation, uid=%u attempt to use reservation %s",
2299
job_ptr->user_id, resv_ptr->name);
2300
return ESLURM_RESERVATION_ACCESS;
2304
* Determine if a job can start now based only upon reservations
2305
* IN job_ptr - job to test
2306
* RET SLURM_SUCCESS if runable now, otherwise an error code
2308
extern int job_test_resv_now(struct job_record *job_ptr)
2310
slurmctld_resv_t * resv_ptr;
2313
if (job_ptr->resv_name == NULL)
2314
return SLURM_SUCCESS;
2316
resv_ptr = (slurmctld_resv_t *) list_find_first (resv_list,
2317
_find_resv_name, job_ptr->resv_name);
2318
job_ptr->resv_ptr = resv_ptr;
2320
return ESLURM_RESERVATION_INVALID;
2322
if (_valid_job_access_resv(job_ptr, resv_ptr) != SLURM_SUCCESS)
2323
return ESLURM_RESERVATION_ACCESS;
2325
if (now < resv_ptr->start_time) {
2326
/* reservation starts later */
2327
return ESLURM_INVALID_TIME_VALUE;
2329
if (now > resv_ptr->end_time) {
2330
/* reservation ended earlier */
2331
return ESLURM_RESERVATION_INVALID;
2334
return SLURM_SUCCESS;
2338
* Determine which nodes a job can use based upon reservations
2339
* IN job_ptr - job to test
2340
* IN/OUT when - when we want the job to start (IN)
2341
* when the reservation is available (OUT)
2342
* IN move_time - if true, then permit the start time to advance from
2343
* "when" as needed IF job has no reservervation
2344
* OUT node_bitmap - nodes which the job can use, caller must free unless error
2345
* RET SLURM_SUCCESS if runable now
2346
* ESLURM_RESERVATION_ACCESS access to reservation denied
2347
* ESLURM_RESERVATION_INVALID reservation invalid
2348
* ESLURM_INVALID_TIME_VALUE reservation invalid at time "when"
2349
* ESLURM_NODES_BUSY job has no reservation, but required nodes are
2352
extern int job_test_resv(struct job_record *job_ptr, time_t *when,
2353
bool move_time, bitstr_t **node_bitmap)
2355
slurmctld_resv_t * resv_ptr, *res2_ptr;
2356
time_t job_start_time, job_end_time;
2359
int i, rc = SLURM_SUCCESS;
2361
if (job_ptr->time_limit == INFINITE)
2362
duration = ONE_YEAR;
2363
else if (job_ptr->time_limit != NO_VAL)
2364
duration = (job_ptr->time_limit * 60);
2365
else { /* partition time limit */
2366
if (job_ptr->part_ptr->max_time == INFINITE)
2367
duration = ONE_YEAR;
2369
duration = (job_ptr->part_ptr->max_time * 60);
2371
job_start_time = job_end_time = *when;
2372
job_end_time += duration;
2374
*node_bitmap = (bitstr_t *) NULL;
2376
if (job_ptr->resv_name) {
2377
bool overlap_resv = false;
2378
resv_ptr = (slurmctld_resv_t *) list_find_first (resv_list,
2379
_find_resv_name, job_ptr->resv_name);
2380
job_ptr->resv_ptr = resv_ptr;
2382
return ESLURM_RESERVATION_INVALID;
2383
if (_valid_job_access_resv(job_ptr, resv_ptr) != SLURM_SUCCESS)
2384
return ESLURM_RESERVATION_ACCESS;
2385
if (*when < resv_ptr->start_time) {
2386
/* reservation starts later */
2387
*when = resv_ptr->start_time;
2388
return ESLURM_INVALID_TIME_VALUE;
2390
if (*when > resv_ptr->end_time) {
2391
/* reservation ended earlier */
2392
*when = resv_ptr->end_time;
2393
job_ptr->priority = 0; /* administrative hold */
2394
return ESLURM_RESERVATION_INVALID;
2396
if (job_ptr->details->req_node_bitmap &&
2397
!bit_super_set(job_ptr->details->req_node_bitmap,
2398
resv_ptr->node_bitmap)) {
2399
return ESLURM_RESERVATION_INVALID;
2401
*node_bitmap = bit_copy(resv_ptr->node_bitmap);
2403
/* if there are any overlapping reservations, we need to
2404
* prevent the job from using those nodes (e.g. MAINT nodes) */
2405
iter = list_iterator_create(resv_list);
2407
fatal("malloc: list_iterator_create");
2408
while ((res2_ptr = (slurmctld_resv_t *) list_next(iter))) {
2409
if ((resv_ptr->flags & RESERVE_FLAG_MAINT) ||
2410
(res2_ptr == resv_ptr) ||
2411
(res2_ptr->node_bitmap == NULL) ||
2412
(res2_ptr->start_time >= job_end_time) ||
2413
(res2_ptr->end_time <= job_start_time))
2415
bit_not(res2_ptr->node_bitmap);
2416
bit_and(*node_bitmap, res2_ptr->node_bitmap);
2417
bit_not(res2_ptr->node_bitmap);
2418
overlap_resv = true;
2420
list_iterator_destroy(iter);
2423
char *nodes=bitmap2node_name(*node_bitmap);
2424
info("nodes:%s", nodes);
2428
return SLURM_SUCCESS;
2431
job_ptr->resv_ptr = NULL; /* should be redundant */
2432
*node_bitmap = bit_alloc(node_record_count);
2433
bit_nset(*node_bitmap, 0, (node_record_count - 1));
2434
if (list_count(resv_list) == 0)
2435
return SLURM_SUCCESS;
2437
/* Job has no reservation, try to find time when this can
2438
* run and get it's required nodes (if any) */
2440
iter = list_iterator_create(resv_list);
2442
fatal("malloc: list_iterator_create");
2443
while ((resv_ptr = (slurmctld_resv_t *) list_next(iter))) {
2444
if ((resv_ptr->node_bitmap == NULL) ||
2445
(resv_ptr->start_time >= job_end_time) ||
2446
(resv_ptr->end_time <= job_start_time))
2448
if (job_ptr->details->req_node_bitmap &&
2449
bit_overlap(job_ptr->details->req_node_bitmap,
2450
resv_ptr->node_bitmap)) {
2451
*when = resv_ptr->end_time;
2452
rc = ESLURM_NODES_BUSY;
2455
bit_not(resv_ptr->node_bitmap);
2456
bit_and(*node_bitmap, resv_ptr->node_bitmap);
2457
bit_not(resv_ptr->node_bitmap);
2459
list_iterator_destroy(iter);
2461
if (rc == SLURM_SUCCESS)
2463
/* rc == ESLURM_NODES_BUSY here from above break */
2464
if (move_time && (i<10)) { /* Retry for later start time */
2465
bit_nset(*node_bitmap, 0, (node_record_count - 1));
2469
FREE_NULL_BITMAP(*node_bitmap);
2470
break; /* Give up */
2476
/* Begin scan of all jobs for valid reservations */
2477
extern void begin_job_resv_check(void)
2480
slurmctld_resv_t *resv_ptr;
2481
slurm_ctl_conf_t *conf;
2486
conf = slurm_conf_lock();
2487
resv_over_run = conf->resv_over_run;
2488
slurm_conf_unlock();
2489
if (resv_over_run == (uint16_t) INFINITE)
2490
resv_over_run = 365 * 24 * 60 * 60;
2492
resv_over_run *= 60;
2494
iter = list_iterator_create(resv_list);
2496
fatal("malloc: list_iterator_create");
2497
while ((resv_ptr = (slurmctld_resv_t *) list_next(iter))) {
2498
resv_ptr->job_pend_cnt = 0;
2499
resv_ptr->job_run_cnt = 0;
2501
list_iterator_destroy(iter);
2504
/* Test a particular job for valid reservation
2505
* RET ESLURM_INVALID_TIME_VALUE if reservation is terminated
2506
* SLURM_SUCCESS if reservation is still valid */
2507
extern int job_resv_check(struct job_record *job_ptr)
2509
bool run_flag = false;
2511
if (!job_ptr->resv_name)
2512
return SLURM_SUCCESS;
2514
if ((job_ptr->job_state == JOB_RUNNING) ||
2515
(job_ptr->job_state == JOB_SUSPENDED))
2517
else if (job_ptr->job_state == JOB_PENDING)
2520
return SLURM_SUCCESS;
2522
xassert(job_ptr->resv_ptr->magic == RESV_MAGIC);
2524
job_ptr->resv_ptr->job_run_cnt++;
2526
job_ptr->resv_ptr->job_pend_cnt++;
2528
if (job_ptr->resv_ptr->end_time < (time(NULL) + resv_over_run))
2529
return ESLURM_INVALID_TIME_VALUE;
2530
return SLURM_SUCCESS;
2533
/* Finish scan of all jobs for valid reservations */
2534
extern void fini_job_resv_check(void)
2537
slurmctld_resv_t *resv_ptr;
2538
time_t now = time(NULL);
2543
iter = list_iterator_create(resv_list);
2545
fatal("malloc: list_iterator_create");
2546
while ((resv_ptr = (slurmctld_resv_t *) list_next(iter))) {
2547
if (resv_ptr->end_time > now) { /* reservation not over */
2548
_validate_node_choice(resv_ptr);
2552
if ((resv_ptr->job_run_cnt == 0) &&
2553
(resv_ptr->flags & RESERVE_FLAG_DAILY)) {
2554
verbose("Advance reservation %s one day",
2556
resv_ptr->start_time += 24 * 60 * 60;
2557
resv_ptr->start_time_prev = resv_ptr->start_time;
2558
resv_ptr->start_time_first = resv_ptr->start_time;
2559
resv_ptr->end_time += 24 * 60 * 60;
2560
_post_resv_create(resv_ptr);
2561
last_resv_update = now;
2562
schedule_resv_save();
2565
if ((resv_ptr->job_run_cnt == 0) &&
2566
(resv_ptr->flags & RESERVE_FLAG_WEEKLY)) {
2567
verbose("Advance reservation %s one week",
2569
resv_ptr->start_time += 7 * 24 * 60 * 60;
2570
resv_ptr->start_time_prev = resv_ptr->start_time;
2571
resv_ptr->start_time_first = resv_ptr->start_time;
2572
resv_ptr->end_time += 7 * 24 * 60 * 60;
2573
_post_resv_create(resv_ptr);
2574
last_resv_update = now;
2575
schedule_resv_save();
2578
if ((resv_ptr->job_pend_cnt == 0) &&
2579
(resv_ptr->job_run_cnt == 0) &&
2580
(resv_ptr->maint_set_node == 0) &&
2581
((resv_ptr->flags & RESERVE_FLAG_DAILY ) == 0) &&
2582
((resv_ptr->flags & RESERVE_FLAG_WEEKLY) == 0)) {
2583
debug("Purging vestigial reservation record %s",
2585
_clear_job_resv(resv_ptr);
2586
list_delete_item(iter);
2587
last_resv_update = now;
2588
schedule_resv_save();
2592
list_iterator_destroy(iter);
2595
/* send all reservations to accounting. Only needed at
2596
* first registration
2598
extern int send_resvs_to_accounting(void)
2600
ListIterator itr = NULL;
2601
slurmctld_resv_t *resv_ptr;
2604
return SLURM_SUCCESS;
2606
itr = list_iterator_create(resv_list);
2607
while ((resv_ptr = list_next(itr))) {
2608
_post_resv_create(resv_ptr);
2610
list_iterator_destroy(itr);
2612
return SLURM_SUCCESS;
2616
/* Set or clear NODE_STATE_MAINT for node_state as needed */
2617
extern void set_node_maint_mode(void)
2620
slurmctld_resv_t *resv_ptr;
2621
time_t now = time(NULL);
2626
iter = list_iterator_create(resv_list);
2628
fatal("malloc: list_iterator_create");
2629
while ((resv_ptr = (slurmctld_resv_t *) list_next(iter))) {
2630
if ((resv_ptr->flags & RESERVE_FLAG_MAINT) == 0)
2632
if ((now >= resv_ptr->start_time) &&
2633
(now < resv_ptr->end_time )) {
2634
if (!resv_ptr->maint_set_node) {
2635
resv_ptr->maint_set_node = true;
2636
_set_nodes_maint(resv_ptr, now);
2637
last_node_update = now;
2639
} else if (resv_ptr->maint_set_node) {
2640
resv_ptr->maint_set_node = false;
2641
_set_nodes_maint(resv_ptr, now);
2642
last_node_update = now;
2645
list_iterator_destroy(iter);
2648
static void _set_nodes_maint(slurmctld_resv_t *resv_ptr, time_t now)
2650
int i, i_first, i_last;
2651
struct node_record *node_ptr;
2653
if (!resv_ptr->node_bitmap) {
2654
error("reservation %s lacks a bitmap", resv_ptr->name);
2658
i_first = bit_ffs(resv_ptr->node_bitmap);
2659
i_last = bit_fls(resv_ptr->node_bitmap);
2660
for (i=i_first; i<=i_last; i++) {
2661
if (!bit_test(resv_ptr->node_bitmap, i))
2664
node_ptr = node_record_table_ptr + i;
2665
if (resv_ptr->maint_set_node)
2666
node_ptr->node_state |= NODE_STATE_MAINT;
2668
node_ptr->node_state &= (~NODE_STATE_MAINT);
2669
/* mark that this node is now down and in maint mode
2670
or was removed from maint mode
2672
if(((node_ptr->node_state & NODE_STATE_BASE)
2673
== NODE_STATE_DOWN) ||
2674
(node_ptr->node_state & NODE_STATE_DRAIN) ||
2675
(node_ptr->node_state & NODE_STATE_FAIL)) {
2676
clusteracct_storage_g_node_down(
2678
slurmctld_cluster_name,
2679
node_ptr, now, NULL);