1
/*___INFO__MARK_BEGIN__*/
2
/*************************************************************************
4
* The Contents of this file are made available subject to the terms of
5
* the Sun Industry Standards Source License Version 1.2
7
* Sun Microsystems Inc., March, 2001
10
* Sun Industry Standards Source License Version 1.2
11
* =================================================
12
* The contents of this file are subject to the Sun Industry Standards
13
* Source License Version 1.2 (the "License"); You may not use this file
14
* except in compliance with the License. You may obtain a copy of the
15
* License at http://gridengine.sunsource.net/Gridengine_SISSL_license.html
17
* Software provided under this License is provided on an "AS IS" basis,
18
* WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING,
19
* WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS,
20
* MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING.
21
* See the License for the specific provisions governing your rights and
22
* obligations concerning the Software.
24
* The Initial Developer of the Original Code is: Sun Microsystems, Inc.
26
* Copyright: 2001 by Sun Microsystems, Inc.
28
* All Rights Reserved.
30
************************************************************************/
31
/*___INFO__MARK_END__*/
38
#include "uti/sge_stdlib.h"
39
#include "uti/sge_stdio.h"
41
#include "sgeobj/sge_advance_reservation.h"
42
#include "sge_advance_reservation_qmaster.h"
44
#include "sge_persistence_qmaster.h"
45
#include "rmon/sgermon.h"
46
#include "uti/sge_log.h"
47
#include "sge_answer.h"
48
#include "spool/sge_spooling.h"
49
#include "sgeobj/sge_conf.h"
51
#include "msg_common.h"
52
#include "msg_qmaster.h"
53
#include "msg_daemons_common.h"
56
#include "sge_mtutil.h"
57
#include "uti/sge_time.h"
58
#include "uti/sge_uidgid.h"
59
#include "uti/sge_string.h"
60
#include "sge_utility.h"
61
#include "sge_range.h"
62
#include "sgeobj/msg_sgeobjlib.h"
63
#include "sgeobj/sge_qinstance.h"
64
#include "sgeobj/sge_hgroup.h"
65
#include "sgeobj/sge_userset.h"
66
#include "sgeobj/sge_id.h"
67
#include "sgeobj/sge_manop.h"
68
#include "sgeobj/sge_job.h"
69
#include "sgeobj/sge_ja_task.h"
70
#include "sgeobj/sge_cqueue.h"
71
#include "sgeobj/sge_qinstance_state.h"
72
#include "sgeobj/sge_host.h"
73
#include "sgeobj/sge_schedd_conf.h"
74
#include "sgeobj/sge_centry.h"
75
#include "sgeobj/sge_pe.h"
76
#include "sgeobj/sge_str.h"
77
#include "sgeobj/sge_calendar.h"
78
#include "sgeobj/sge_ulong.h"
79
#include "sgeobj/sge_qref.h"
80
#include "sgeobj/sge_pe.h"
82
#include "sched/sge_resource_utilization.h"
83
#include "sched/sge_select_queue.h"
84
#include "sched/schedd_monitor.h"
85
#include "sched/sge_job_schedd.h"
86
#include "sched/sge_serf.h"
87
#include "sched/valid_queue_user.h"
89
#include "evm/sge_event_master.h"
90
#include "evm/sge_queue_event_master.h"
92
#include "sge_utility_qmaster.h"
93
#include "sge_host_qmaster.h"
94
#include "sge_cqueue_qmaster.h"
95
#include "sge_job_qmaster.h"
97
#include "evm/sge_event_master.h"
98
#include "sge_reporting_qmaster.h"
99
#include "sge_give_jobs.h"
100
#include "sge_qinstance_qmaster.h"
104
#include "sge_complex_schedd.h"
109
pthread_mutex_t ar_id_mutex;
112
ar_id_t ar_id_control = {0, false, PTHREAD_MUTEX_INITIALIZER};
114
static bool ar_reserve_queues(lList **alpp, lListElem *ar);
115
static u_long32 sge_get_ar_id(sge_gdi_ctx_class_t *ctx, monitoring_t *monitor);
116
static u_long32 guess_highest_ar_id(void);
118
static void sge_ar_send_mail(lListElem *ar, int type);
121
ar_initialize_timer(sge_gdi_ctx_class_t *ctx, lList **answer_list, monitoring_t *monitor)
123
object_description *object_base = NULL;
124
lListElem *ar, *next_ar;
125
u_long32 now = sge_get_gmt();
126
lList *ar_master_list = NULL;
128
DENTER(TOP_LAYER, "ar_initialize_timer");
130
object_base = object_type_get_object_description();
131
ar_master_list = *object_base[SGE_TYPE_AR].list;
133
next_ar = lFirst(ar_master_list);
135
while((ar = next_ar)) {
136
te_event_t ev = NULL;
140
if (now < lGetUlong(ar, AR_start_time)) {
141
sge_ar_state_set_waiting(ar);
143
ev = te_new_event((time_t)lGetUlong(ar, AR_start_time), TYPE_AR_EVENT,
144
ONE_TIME_EVENT, lGetUlong(ar, AR_id), AR_RUNNING, NULL);
149
} else if (now < lGetUlong(ar, AR_end_time)) {
150
sge_ar_state_set_running(ar);
152
ev = te_new_event((time_t)lGetUlong(ar, AR_end_time), TYPE_AR_EVENT,
153
ONE_TIME_EVENT, lGetUlong(ar, AR_id), AR_EXITED, NULL);
157
dstring buffer = DSTRING_INIT;
158
u_long32 ar_id = lGetUlong(ar, AR_id);
160
sge_ar_state_set_running(ar);
162
sge_ar_remove_all_jobs(ctx, ar_id, 1, monitor);
164
ar_do_reservation(ar, false);
166
reporting_create_ar_log_record(NULL, ar, ARL_TERMINATED,
167
"end time of AR reached",
169
reporting_create_ar_acct_records(NULL, ar, now);
171
sge_dstring_sprintf(&buffer, sge_U32CFormat, ar_id);
173
lRemoveElem(ar_master_list, &ar);
175
spool_delete_object(answer_list, spool_get_default_context(),
177
sge_dstring_get_string(&buffer),
178
ctx->get_job_spooling(ctx));
180
sge_dstring_free(&buffer);
186
/****** sge_advance_reservation_qmaster/ar_mod() *******************************
188
* ar_mod() -- gdi callback function for adding modifing advance reservations
191
* int ar_mod(sge_gdi_ctx_class_t *ctx, lList **alpp, lListElem *new_ar,
192
* lListElem *ar, int add, const char *ruser, const char *rhost,
193
* gdi_object_t *object, int sub_command, monitoring_t *monitor)
196
* This function is called from the framework that
197
* add/modify/delete generic gdi objects.
198
* The purpose of this function is it to add new advance reservation
200
* Modifing is currently not supported.
203
* sge_gdi_ctx_class_t *ctx - gdi context pointer
204
* lList **alpp - the answer_list
205
* lListElem *new_ar - if a new ar object will be created by this
206
* function, then new_ar is a newly initialized
208
* lListElem *ar - a reduced ar object that contains all of the
210
* int add - 1 for add requests
212
* const char *ruser - username who invoked this GDI request
213
* const char *rhost - hostname of where the GDI request was invoked
214
* gdi_object_t *object - structure of the GDI framework that contains
215
* additional informations to perform the request
216
* int sub_command - GDI sub command
217
* monitoring_t *monitor - monitoring structure
221
* STATUS_EUNKNOWN if an error occured
222
* STATUS_NOTOK_DOAGAIN if a temporary error
225
* MT-NOTE: ar_mod() is not MT safe
226
*******************************************************************************/
227
int ar_mod(sge_gdi_ctx_class_t *ctx, lList **alpp, lListElem *new_ar,
228
lListElem *ar, int add, const char *ruser,
229
const char *rhost, gdi_object_t *object, int sub_command,
230
monitoring_t *monitor)
232
object_description *object_base = object_type_get_object_description();
234
u_long32 max_advance_reservations = mconf_get_max_advance_reservations();
236
DENTER(TOP_LAYER, "ar_mod");
238
if (!ar_validate(ar, alpp, true, false)) {
243
/* get new ar ids until we find one that is not yet used */
245
ar_id = sge_get_ar_id(ctx, monitor);
246
} while (ar_list_locate(*object_base[SGE_TYPE_AR].list, ar_id));
247
lSetUlong(new_ar, AR_id, ar_id);
249
** set the owner of new_ar, don't overwrite it with
250
** attr_mod_str(alpp, ar, new_ar, AR_owner, object->object_name);
252
lSetString(new_ar, AR_owner, ruser);
253
lSetString(new_ar, AR_group, ctx->get_groupname(ctx));
255
ERROR((SGE_EVENT, MSG_NOTYETIMPLEMENTED_S, "advance reservation modification"));
256
answer_list_add(alpp, SGE_EVENT, STATUS_ESEMANTIC, ANSWER_QUALITY_ERROR);
260
if (max_advance_reservations > 0 &&
261
max_advance_reservations <= lGetNumberOfElem(*object_base[SGE_TYPE_AR].list)) {
262
ERROR((SGE_EVENT, MSG_AR_MAXARSPERCLUSTER_U, sge_u32c(max_advance_reservations)));
263
answer_list_add(alpp, SGE_EVENT, STATUS_NOTOK_DOAGAIN, ANSWER_QUALITY_ERROR);
267
/* AR_name, SGE_STRING */
268
attr_mod_zerostr(ar, new_ar, AR_name, object->object_name);
269
/* AR_account, SGE_STRING */
270
attr_mod_zerostr(ar, new_ar, AR_account, object->object_name);
271
/* AR_submission_time, SGE_ULONG */
272
lSetUlong(new_ar, AR_submission_time, sge_get_gmt());
273
/* AR_start_time, SGE_ULONG required */
274
attr_mod_ulong(ar, new_ar, AR_start_time, object->object_name);
275
/* AR_end_time, SGE_ULONG required */
276
attr_mod_ulong(ar, new_ar, AR_end_time, object->object_name);
277
/* AR_duration, SGE_ULONG */
278
attr_mod_ulong(ar, new_ar, AR_duration, object->object_name);
279
/* AR_verify, SGE_ULONG just verify the reservation or final case */
280
attr_mod_ulong(ar, new_ar, AR_verify, object->object_name);
281
/* AR_error_handling, SGE_ULONG how to deal with soft and hard exceptions */
282
attr_mod_ulong(ar, new_ar, AR_error_handling, object->object_name);
283
/* AR_state, SGE_ULONG state of the AR */
284
lSetUlong(new_ar, AR_state, AR_WAITING);
285
/* AR_checkpoint_name, SGE_STRING Named checkpoint */
286
attr_mod_zerostr(ar, new_ar, AR_checkpoint_name, object->object_name);
287
/* AR_resource_list, SGE_LIST */
288
attr_mod_sub_list(alpp, new_ar, AR_resource_list, AR_name, ar, sub_command, SGE_ATTR_COMPLEX_VALUES, SGE_OBJ_AR, 0);
289
/* AR_queue_list, SGE_LIST */
290
attr_mod_sub_list(alpp, new_ar, AR_queue_list, AR_name, ar, sub_command, SGE_ATTR_QUEUE_LIST, SGE_OBJ_AR, 0);
291
/* AR_mail_options, SGE_ULONG */
292
attr_mod_ulong(ar, new_ar, AR_mail_options, object->object_name);
293
/* AR_mail_list, SGE_LIST */
294
attr_mod_sub_list(alpp, new_ar, AR_mail_list, AR_name, ar, sub_command, SGE_ATTR_MAIL_LIST, SGE_OBJ_AR, 0);
295
/* AR_pe, SGE_STRING */
296
attr_mod_zerostr(ar, new_ar, AR_pe, object->object_name);
297
/* AR_master_queue_list, SGE_LIST */
298
attr_mod_sub_list(alpp, new_ar ,AR_master_queue_list, AR_name, ar, sub_command, SGE_ATTR_QUEUE_LIST, SGE_OBJ_AR, 0);
299
/* AR_pe_range, SGE_LIST */
300
attr_mod_sub_list(alpp, new_ar, AR_pe_range, AR_name, ar, sub_command, SGE_ATTR_PE_LIST, SGE_OBJ_AR, 0);
301
/* AR_acl_list, SGE_LIST */
302
attr_mod_sub_list(alpp, new_ar, AR_acl_list, AR_name, ar, sub_command, SGE_ATTR_USER_LISTS, SGE_OBJ_AR, 0);
303
/* AR_xacl_list, SGE_LIST */
304
attr_mod_sub_list(alpp, new_ar, AR_xacl_list, AR_name, ar, sub_command, SGE_ATTR_XUSER_LISTS, SGE_OBJ_AR, 0);
305
/* AR_type, SGE_ULONG */
306
attr_mod_ulong(ar, new_ar, AR_type, object->object_name);
308
/* try to reserve the queues */
309
if (!ar_reserve_queues(alpp, new_ar)) {
313
INFO((SGE_EVENT, MSG_AR_GRANTED_U, sge_u32c(ar_id)));
314
answer_list_add(alpp, SGE_EVENT, STATUS_OK, ANSWER_QUALITY_INFO);
318
DRETURN(STATUS_EUNKNOWN);
320
DRETURN(STATUS_NOTOK_DOAGAIN);
323
/****** sge_advance_reservation_qmaster/ar_spool() *****************************
325
* ar_spool() -- gdi callback funktion to spool an advance reservation
328
* int ar_spool(sge_gdi_ctx_class_t *ctx, lList **alpp, lListElem *ep,
329
* gdi_object_t *object)
332
* This function is called from the framework that
333
* add/modify/delete generic gdi objects.
334
* After an object was modified/added successfully it
335
* is necessary to spool the current state to the filesystem.
338
* sge_gdi_ctx_class_t *ctx - GDI context
339
* lList **alpp - answer_list
340
* lListElem *ep - element to spool
341
* gdi_object_t *object - structure from the GDI framework
344
* [alpp] - error messages will be added to this list
346
* STATUS_EEXIST - an error occured
349
* MT-NOTE: ar_spool() is MT safe
350
*******************************************************************************/
351
int ar_spool(sge_gdi_ctx_class_t *ctx, lList **alpp, lListElem *ep, gdi_object_t *object)
353
lList *answer_list = NULL;
355
bool job_spooling = ctx->get_job_spooling(ctx);
356
dstring buffer = DSTRING_INIT;
358
DENTER(TOP_LAYER, "ar_spool");
360
sge_dstring_sprintf(&buffer, sge_U32CFormat, lGetUlong(ep, AR_id));
361
dbret = spool_write_object(&answer_list, spool_get_default_context(), ep,
362
sge_dstring_get_string(&buffer), SGE_TYPE_AR,
364
answer_list_output(&answer_list);
367
answer_list_add_sprintf(alpp, STATUS_EUNKNOWN,
368
ANSWER_QUALITY_ERROR,
369
MSG_PERSISTENCE_WRITE_FAILED_S,
370
sge_dstring_get_string(&buffer));
372
sge_dstring_free(&buffer);
374
DRETURN(dbret ? 0 : 1);
377
/****** sge_advance_reservation_qmaster/ar_success() ***************************
379
* ar_success() -- does something after a successfully add or modify request
382
* int ar_success(sge_gdi_ctx_class_t *ctx, lListElem *ep, lListElem
383
* *old_ep, gdi_object_t *object, lList **ppList, monitoring_t *monitor)
386
* This function is called from the framework that
387
* add/modify/delete generic gdi objects.
388
* After an object was modified/added and spooled successfully
389
* it is possibly necessary to perform additional tasks.
390
* For example it is necessary to send some events to
394
* sge_gdi_ctx_class_t *ctx - GDI context
395
* lListElem *ep - new added object
396
* lListElem *old_ep - old object before modifications or NULL
398
* gdi_object_t *object - structure from the GDI framework
399
* lList **ppList - ???
400
* monitoring_t *monitor - monitoring structure
406
* MT-NOTE: ar_success() is not MT safe
407
*******************************************************************************/
408
int ar_success(sge_gdi_ctx_class_t *ctx, lListElem *ep, lListElem *old_ep,
409
gdi_object_t *object, lList **ppList, monitoring_t *monitor)
412
dstring buffer = DSTRING_INIT;
413
u_long32 timestamp = 0;
415
DENTER(TOP_LAYER, "ar_success");
417
/* with old_ep it is possible to identify if it is an add or modify request */
418
timestamp = sge_get_gmt();
419
if (old_ep == NULL) {
420
reporting_create_new_ar_record(NULL, ep, timestamp);
421
reporting_create_ar_attribute_record(NULL, ep, timestamp);
423
reporting_create_ar_attribute_record(NULL, ep, timestamp);
427
** return element with correct id
429
if (ppList != NULL) {
430
if (*ppList == NULL) {
431
*ppList = lCreateList("", AR_Type);
433
lAppendElem(*ppList, lCopyElem(ep));
436
sge_ar_state_set_waiting(ep);
439
** send sgeE_AR_MOD/sgeE_AR_ADD event
441
sge_dstring_sprintf(&buffer, sge_U32CFormat, lGetUlong(ep, AR_id));
442
sge_add_event(0, old_ep?sgeE_AR_MOD:sgeE_AR_ADD, lGetUlong(ep, AR_id), 0,
443
sge_dstring_get_string(&buffer), NULL, NULL, ep);
444
lListElem_clear_changed_info(ep);
445
sge_dstring_free(&buffer);
448
** add the timer to trigger the state change
450
ev = te_new_event((time_t)lGetUlong(ep, AR_start_time), TYPE_AR_EVENT, ONE_TIME_EVENT, lGetUlong(ep, AR_id), AR_RUNNING, NULL);
457
/****** sge_advance_reservation_qmaster/ar_del() *******************************
459
* ar_del() -- removes advance reservation from master list
462
* int ar_del(sge_gdi_ctx_class_t *ctx, lListElem *ep, lList **alpp, lList
463
* **ar_list, char *ruser, char *rhost)
466
* This function removes a advance reservation from the master list and
467
* performs the necessary cleanup.
470
* sge_gdi_ctx_class_t *ctx - GDI context
471
* lListElem *ep - element that should be removed (ID_Type)
472
* lList **alpp - answer list
473
* lList **ar_list - list from where the element should be removed
474
* (normally a reference to the master ar list)
475
* char *ruser - user who invoked this GDI request
476
* char *rhost - host where the request was invoked
480
* STATUS_EUNKNOWN on failure
483
* MT-NOTE: ar_del() is not MT safe
484
*******************************************************************************/
485
int ar_del(sge_gdi_ctx_class_t *ctx, lListElem *ep, lList **alpp, lList **master_ar_list,
486
const char *ruser, const char *rhost, monitoring_t *monitor)
488
const char *id_str = NULL;
489
lList *user_list = NULL;
491
bool removed_one = false;
492
bool has_manager_privileges = false;
493
dstring buffer = DSTRING_INIT;
494
lCondition *ar_where = NULL;
497
DENTER(TOP_LAYER, "ar_del");
499
if (!ep || !ruser || !rhost) {
500
CRITICAL((SGE_EVENT, MSG_SGETEXT_NULLPTRPASSED_S, SGE_FUNC));
501
answer_list_add(alpp, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR);
502
sge_dstring_free(&buffer);
503
DRETURN(STATUS_EUNKNOWN);
506
/* ep is no ar_del element, if ep has no ID_str */
507
if (lGetPosViaElem(ep, ID_str, SGE_NO_ABORT) < 0) {
508
CRITICAL((SGE_EVENT, MSG_SGETEXT_MISSINGCULLFIELD_SS,
509
lNm2Str(ID_str), SGE_FUNC));
510
answer_list_add(alpp, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR);
511
sge_dstring_free(&buffer);
512
DRETURN(STATUS_EUNKNOWN);
515
id_str = lGetString(ep, ID_str);
517
if ((user_list = lGetList(ep, ID_user_list)) != NULL) {
518
lCondition *new_where = NULL;
521
for_each(user, user_list) {
522
if (sge_is_pattern(lGetString(user, ST_name)) && !manop_is_manager(ruser)) {
523
ERROR((SGE_EVENT, MSG_SGETEXT_MUST_BE_MGR_TO_SS,
524
ruser, "modify all advance reservations"));
525
answer_list_add(alpp, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR);
526
sge_dstring_free(&buffer);
527
lFreeWhere(&ar_where);
528
DRETURN(STATUS_EUNKNOWN);
531
new_where = lWhere("%T(%I p= %s)", AR_Type, AR_owner, lGetString(user, ST_name));
532
if (ar_where == NULL) {
533
ar_where = new_where;
535
ar_where = lOrWhere(ar_where, new_where);
538
} else if (sge_is_pattern(id_str)) {
539
/* if no userlist and wildcard jobs was requested only delete the own ars */
540
lCondition *new_where = NULL;
541
new_where = lWhere("%T(%I p= %s)", AR_Type, AR_owner, ruser);
542
if (ar_where == NULL) {
543
ar_where = new_where;
545
ar_where = lOrWhere(ar_where, new_where);
549
if (id_str != NULL && (strcmp(id_str, "0") != 0)) {
551
lCondition *new_where = NULL;
553
u_long32 value = strtol(id_str, &dptr, 0);
554
if (dptr[0] == '\0') {
555
/* is numeric value */
556
new_where = lWhere("%T(%I==%u)", AR_Type, AR_id, value);
559
if (isdigit(id_str[0])) {
560
ERROR((SGE_EVENT, MSG_OBJECT_INVALID_NAME_S, id_str));
561
answer_list_add(alpp, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR);
563
} else if (verify_str_key(alpp, id_str, MAX_VERIFY_STRING,
564
lNm2Str(AR_name), KEY_TABLE) != STATUS_OK) {
567
new_where = lWhere("%T(%I p= %s)", AR_Type, AR_name, id_str);
571
sge_dstring_free(&buffer);
572
lFreeWhere(&new_where);
573
lFreeWhere(&ar_where);
574
DRETURN(STATUS_EUNKNOWN);
579
ar_where = new_where;
581
ar_where = lAndWhere(ar_where, new_where);
587
if (id_str == NULL && user_list == NULL) {
588
CRITICAL((SGE_EVENT, MSG_SGETEXT_SPECIFYUSERORID_S, SGE_OBJ_AR));
589
answer_list_add(alpp, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR);
590
sge_dstring_free(&buffer);
591
lFreeWhere(&ar_where);
592
DRETURN(STATUS_EUNKNOWN);
595
if (manop_is_manager(ruser)) {
596
has_manager_privileges = true;
600
nxt = lFirst(*master_ar_list);
602
u_long32 ar_id = lGetUlong(ar, AR_id);
603
sge_dstring_sprintf(&buffer, sge_U32CFormat, sge_u32c(ar_id));
607
if ((ar_where != NULL) && !lCompare(ar, ar_where)) {
613
if (!has_manager_privileges && strcmp(ruser, lGetString(ar, AR_owner))) {
614
ERROR((SGE_EVENT, MSG_DELETEPERMS_SSU,
615
ruser, SGE_OBJ_AR, sge_u32c(ar_id)));
616
answer_list_add(alpp, SGE_EVENT, STATUS_ENOTOWNER, ANSWER_QUALITY_ERROR);
620
sge_ar_state_set_deleted(ar);
622
/* remove timer for this advance reservation */
623
te_delete_one_time_event(TYPE_AR_EVENT, ar_id, AR_RUNNING, NULL);
624
te_delete_one_time_event(TYPE_AR_EVENT, ar_id, AR_EXITED, NULL);
626
sge_ar_send_mail(ar, MAIL_AT_EXIT);
628
/* remove all jobs refering to the AR */
629
if (sge_ar_remove_all_jobs(ctx, ar_id, lGetUlong(ep, ID_force), monitor)) {
630
/* either all jobs were successfull removed or we had no jobs */
632
/* unblock reserved queues */
633
ar_do_reservation(ar, false);
635
reporting_create_ar_log_record(NULL, ar, ARL_DELETED,
638
reporting_create_ar_acct_records(NULL, ar, now);
640
gdil_del_all_orphaned(ctx, lGetList(ar, AR_granted_slots), alpp);
642
lRemoveElem(*master_ar_list, &ar);
644
INFO((SGE_EVENT, MSG_JOB_DELETEX_SSU,
645
ruser, SGE_OBJ_AR, sge_u32c(ar_id)));
646
answer_list_add(alpp, SGE_EVENT, STATUS_OK, ANSWER_QUALITY_INFO);
648
sge_event_spool(ctx, alpp, 0, sgeE_AR_DEL,
649
ar_id, 0, sge_dstring_get_string(&buffer), NULL, NULL,
650
NULL, NULL, NULL, true, true);
652
INFO((SGE_EVENT, MSG_JOB_REGDELX_SSU,
653
ruser, SGE_OBJ_AR, sge_u32c(ar_id)));
654
answer_list_add(alpp, SGE_EVENT, STATUS_OK, ANSWER_QUALITY_INFO);
655
sge_event_spool(ctx, alpp, 0, sgeE_AR_MOD,
656
ar_id, 0, sge_dstring_get_string(&buffer), NULL, NULL,
657
ar, NULL, NULL, true, true);
663
if (id_str != NULL) {
664
sge_dstring_sprintf(&buffer, "%s", id_str);
665
ERROR((SGE_EVENT, MSG_SGETEXT_DOESNOTEXIST_SS, SGE_OBJ_AR, sge_dstring_get_string(&buffer)));
671
sge_dstring_sprintf(&buffer, "%s", "");
672
for_each(user, user_list) {
674
sge_dstring_append(&buffer, ",");
679
sge_dstring_append(&buffer, "...");
682
sge_dstring_append(&buffer, lGetString(user, ST_name));
685
ERROR((SGE_EVENT, MSG_SGETEXT_THEREARENOXFORUSERS_SS, SGE_OBJ_AR, sge_dstring_get_string(&buffer)));
688
answer_list_add(alpp, SGE_EVENT, STATUS_EEXIST, ANSWER_QUALITY_ERROR);
689
sge_dstring_free(&buffer);
690
lFreeWhere(&ar_where);
691
DRETURN(STATUS_EEXIST);
694
sge_dstring_free(&buffer);
695
lFreeWhere(&ar_where);
699
/****** sge_advance_reservation_qmaster/sge_get_ar_id() ************************
701
* sge_get_ar_id() -- returns the next possible unused id
704
* static u_long32 sge_get_ar_id(sge_gdi_ctx_class_t *ctx, monitoring_t
708
* returns the next possible unused advance reservation id.
711
* sge_gdi_ctx_class_t *ctx - gdi context
712
* monitoring_t *monitor - monitoring structure
715
* static u_long32 - ar id
718
* MT-NOTE: sge_get_ar_id() is MT safe
719
*******************************************************************************/
720
static u_long32 sge_get_ar_id(sge_gdi_ctx_class_t *ctx, monitoring_t *monitor)
723
bool is_store_ar = false;
725
DENTER(TOP_LAYER, "sge_get_ar_id");
727
sge_mutex_lock("ar_id_mutex", "sge_get_ar_id", __LINE__,
728
&ar_id_control.ar_id_mutex);
730
ar_id_control.ar_id++;
731
ar_id_control.changed = true;
732
if (ar_id_control.ar_id > MAX_SEQNUM) {
733
DPRINTF(("highest ar number MAX_SEQNUM %d exceeded, starting over with 1\n", MAX_SEQNUM));
734
ar_id_control.ar_id = 1;
737
ar_id = ar_id_control.ar_id;
739
sge_mutex_unlock("ar_id_mutex", "sge_get_ar_id", __LINE__,
740
&ar_id_control.ar_id_mutex);
743
sge_store_ar_id(ctx, NULL, monitor);
749
/****** sge_advance_reservation_qmaster/sge_store_ar_id() **********************
751
* sge_store_ar_id() -- store ar id
754
* void sge_store_ar_id(sge_gdi_ctx_class_t *ctx, te_event_t anEvent,
755
* monitoring_t *monitor)
758
* At qmaster shutdown it's necessary to store the latest highest ar id to
759
* reinitialize the counter at the next qmaster start. This is done by a event
760
* timer in specific intervall.
763
* sge_gdi_ctx_class_t *ctx - GDI context
764
* te_event_t anEvent - event that triggered this function
765
* monitoring_t *monitor - pointer to monitor (not used here)
768
* MT-NOTE: sge_store_ar_id() is not MT safe
769
*******************************************************************************/
770
void sge_store_ar_id(sge_gdi_ctx_class_t *ctx, te_event_t anEvent, monitoring_t *monitor) {
772
bool changed = false;
774
DENTER(TOP_LAYER, "sge_store_ar_id");
776
sge_mutex_lock("ar_id_mutex", "sge_store_ar_id", __LINE__,
777
&ar_id_control.ar_id_mutex);
778
if (ar_id_control.changed) {
779
ar_id = ar_id_control.ar_id;
780
ar_id_control.changed = false;
783
sge_mutex_unlock("ar_id_mutex", "sge_store_ar_id", __LINE__,
784
&ar_id_control.ar_id_mutex);
786
/* here we got a race condition that can (very unlikely)
787
cause concurrent writing of the sequence number file */
789
FILE *fp = fopen(ARSEQ_NUM_FILE, "w");
792
ERROR((SGE_EVENT, MSG_NOSEQFILECREATE_SSS, "ar", ARSEQ_NUM_FILE, strerror(errno)));
794
FPRINTF((fp, sge_u32"\n", ar_id));
802
ERROR((SGE_EVENT, MSG_NOSEQFILECLOSE_SSS, "ar", ARSEQ_NUM_FILE, strerror(errno)));
806
/****** sge_advance_reservation_qmaster/sge_init_ar_id() ***********************
808
* sge_init_ar_id() -- init ar id counter
811
* void sge_init_ar_id(void)
814
* Called during startup and sets the advance reservation id counter.
817
* MT-NOTE: sge_init_ar_id() is MT safe
818
*******************************************************************************/
819
void sge_init_ar_id(void)
823
u_long32 guess_ar_id = 0;
825
DENTER(TOP_LAYER, "sge_init_ar_id");
827
if ((fp = fopen(ARSEQ_NUM_FILE, "r"))) {
828
if (fscanf(fp, sge_u32, &ar_id) != 1) {
829
ERROR((SGE_EVENT, MSG_NOSEQNRREAD_SSS, "ar", ARSEQ_NUM_FILE, strerror(errno)));
835
WARNING((SGE_EVENT, MSG_NOSEQFILEOPEN_SSS, "ar", ARSEQ_NUM_FILE, strerror(errno)));
838
guess_ar_id = guess_highest_ar_id();
839
ar_id = MAX(ar_id, guess_ar_id);
841
sge_mutex_lock("ar_id_mutex", "sge_init_ar_id", __LINE__,
842
&ar_id_control.ar_id_mutex);
843
ar_id_control.ar_id = ar_id;
844
ar_id_control.changed = true;
845
sge_mutex_unlock("ar_id_mutex", "sge_init_ar_id", __LINE__,
846
&ar_id_control.ar_id_mutex);
851
/****** sge_advance_reservation_qmaster/guess_highest_ar_id() ******************
853
* guess_highest_ar_id() -- guesses the histest ar id
856
* static u_long32 guess_highest_ar_id(void)
859
* Iterates over all granted advance reservations in the cluster and determines
863
* static u_long32 - determined id
866
* MT-NOTE: guess_highest_ar_id() is MT safe
867
*******************************************************************************/
868
static u_long32 guess_highest_ar_id(void)
872
lList *master_ar_list = *(object_type_get_master_list(SGE_TYPE_AR));
874
DENTER(TOP_LAYER, "guess_highest_ar_id");
876
/* this function is called during qmaster startup and not while it is running,
877
we do not need to monitor this lock */
878
SGE_LOCK(LOCK_GLOBAL, LOCK_READ);
880
ar = lFirst(master_ar_list);
883
pos = lGetPosViaElem(ar, AR_id, SGE_NO_ABORT);
885
for_each(ar, master_ar_list) {
886
maxid = MAX(maxid, lGetPosUlong(ar, pos));
890
SGE_UNLOCK(LOCK_GLOBAL, LOCK_READ);
895
/****** sge_advance_reservation_qmaster/sge_ar_event_handler() *****************
897
* sge_ar_event_handler() -- advance reservation event handler
900
* void sge_ar_event_handler(sge_gdi_ctx_class_t *ctx, te_event_t anEvent,
901
* monitoring_t *monitor)
904
* Registered function in the times event framework. For every granted a trigger
905
* for the start time of the advance reservation is registered. When the function is
906
* executed at start time it regististers a additional timer for the end time of
907
* the advance reservation.
910
* sge_gdi_ctx_class_t *ctx - GDI context
911
* te_event_t anEvent - triggered timed event
912
* monitoring_t *monitor - monitoring structure
915
* MT-NOTE: sge_ar_event_handler() is MT safe
916
*******************************************************************************/
917
void sge_ar_event_handler(sge_gdi_ctx_class_t *ctx, te_event_t anEvent, monitoring_t *monitor)
920
u_long32 ar_id = te_get_first_numeric_key(anEvent);
921
u_long32 state = te_get_second_numeric_key(anEvent);
923
dstring buffer = DSTRING_INIT;
925
DENTER(TOP_LAYER, "sge_ar_event_handler");
928
To guarantee all jobs are removed from the cluster when AR end time is
929
reached it is necessary to consider the DURATION_OFFSET for Advance Reservation also.
930
This means all jobs submitted to a AR will have a resulting runtime limit of AR duration - DURATION_OFFSET.
931
Jobs requesting a longer runtime will not be scheduled.
932
The AR requester needs to keep this in mind when he creates a new AR.
934
MONITOR_WAIT_TIME(SGE_LOCK(LOCK_GLOBAL, LOCK_WRITE), monitor);
936
if (!(ar = ar_list_locate(*(object_type_get_master_list(SGE_TYPE_AR)), ar_id))) {
937
ERROR((SGE_EVENT, MSG_EVE_TE4AR_U, sge_u32c(ar_id)));
938
SGE_UNLOCK(LOCK_GLOBAL, LOCK_WRITE);
942
sge_dstring_sprintf(&buffer, sge_U32CFormat, ar_id);
944
if (state == AR_EXITED) {
945
time_t timestamp = (time_t) sge_get_gmt();
947
sge_ar_state_set_exited(ar);
949
/* remove all jobs running in this AR */
950
sge_ar_remove_all_jobs(ctx, ar_id, 1, monitor);
952
/* unblock reserved queues */
953
ar_do_reservation(ar, false);
955
reporting_create_ar_log_record(NULL, ar, ARL_TERMINATED,
956
"end time of AR reached",
958
reporting_create_ar_acct_records(NULL, ar, timestamp);
960
sge_ar_send_mail(ar, MAIL_AT_EXIT);
962
/* remove all orphaned queue intances, which are empty. */
963
gdil_del_all_orphaned(ctx, lGetList(ar, AR_granted_slots), NULL);
965
/* remove the AR itself */
966
DPRINTF(("AR: exited, removing AR %s\n", sge_dstring_get_string(&buffer)));
967
lRemoveElem(*(object_type_get_master_list(SGE_TYPE_AR)), &ar);
968
sge_event_spool(ctx, NULL, 0, sgeE_AR_DEL,
969
ar_id, 0, sge_dstring_get_string(&buffer), NULL, NULL,
970
NULL, NULL, NULL, true, true);
974
DPRINTF(("AR: started, changing state of AR "sge_u32"\n", ar_id));
976
sge_ar_state_set_running(ar);
978
ev = te_new_event((time_t)lGetUlong(ar, AR_end_time), TYPE_AR_EVENT, ONE_TIME_EVENT, ar_id, AR_EXITED, NULL);
982
/* this info is not spooled */
983
sge_add_event(0, sgeE_AR_MOD, ar_id, 0,
984
sge_dstring_get_string(&buffer), NULL, NULL, ar);
985
lListElem_clear_changed_info(ar);
987
reporting_create_ar_log_record(NULL, ar, ARL_STARTTIME_REACHED,
988
"start time of AR reached",
991
sge_ar_send_mail(ar, MAIL_AT_BEGINNING);
994
SGE_UNLOCK(LOCK_GLOBAL, LOCK_WRITE);
995
sge_dstring_free(&buffer);
1000
/****** sge_advance_reservation_qmaster/ar_reserve_queues() ********************
1002
* ar_reserve_queues() -- selects the queues for reserving
1005
* static bool ar_reserve_queues(lList **alpp, lListElem *ar)
1008
* The function executes the scheduler code to select queues matching the
1009
* advance reservation request for reserving. The function works on temporary
1010
* lists and creates the AR_granted_slots list
1013
* lList **alpp - answer list pointer pointer
1014
* lListElem *ar - ar object
1017
* static bool - true on success, enough resources reservable
1018
* false in verify mode or not enough resources available
1021
* MT-NOTE: ar_reserve_queues() is not MT safe, needs GLOBAL_LOCK
1022
*******************************************************************************/
1023
static bool ar_reserve_queues(lList **alpp, lListElem *ar)
1025
lList **splitted_job_lists[SPLIT_LAST];
1026
lList *suspended_list = NULL; /* JB_Type */
1027
lList *running_list = NULL; /* JB_Type */
1029
int verify_mode = lGetUlong(ar, AR_verify);
1031
lList *ar_queue_request = lGetList(ar, AR_queue_list);
1032
const char *ar_pe_request = lGetString(ar, AR_pe);
1034
lListElem *cqueue = NULL;
1037
lListElem *dummy_job = lCreateElem(JB_Type);
1038
sge_assignment_t a = SGE_ASSIGNMENT_INIT;
1039
object_description *object_base = object_type_get_object_description();
1040
lList *master_cqueue_list = *object_base[SGE_TYPE_CQUEUE].list;
1041
lList *master_userset_list = *object_base[SGE_TYPE_USERSET].list;
1042
lList *master_job_list = *object_base[SGE_TYPE_JOB].list;
1043
lList *master_centry_list = *object_base[SGE_TYPE_CENTRY].list;
1044
lList *master_hgroup_list = *object_base[SGE_TYPE_HGROUP].list;
1045
lList *master_cal_list = *object_base[SGE_TYPE_CALENDAR].list;
1047
/* These lists must be copied */
1048
lList *master_pe_list = lCopyList("", *object_base[SGE_TYPE_PE].list);
1049
lList *master_exechost_list = lCopyList("", *object_base[SGE_TYPE_EXECHOST].list);
1051
dispatch_t result = DISPATCH_NEVER_CAT;
1053
DENTER(TOP_LAYER, "ar_reserve_queues");
1055
if (lGetList(ar, AR_acl_list) != NULL) {
1056
lSetString(dummy_job, JB_owner, "*");
1057
lSetString(dummy_job, JB_group, "*");
1059
lSetString(dummy_job, JB_owner, lGetString(ar, AR_owner));
1060
lSetString(dummy_job, JB_group, lGetString(ar, AR_group));
1063
assignment_init(&a, dummy_job, NULL, false);
1064
a.host_list = master_exechost_list;
1065
a.centry_list = master_centry_list;
1066
a.acl_list = master_userset_list;
1067
a.hgrp_list = master_hgroup_list;
1068
a.gep = host_list_locate(master_exechost_list, SGE_GLOBAL_NAME);
1069
a.start = lGetUlong(ar, AR_start_time);
1070
a.duration = lGetUlong(ar, AR_duration);
1071
a.is_reservation = true;
1072
a.is_advance_reservation = true;
1075
* Current scheduler code expects all queue instances in a plain list. We use
1076
* a copy of all queue instances that needs to be free'd explicitely after
1077
* deciding about assignment. This is because assignment_release() sees
1078
* queue_list only as a list pointer.
1080
a.queue_list = lCreateList("", QU_Type);
1082
for_each(cqueue, master_cqueue_list) {
1083
const char *cqname = lGetString(cqueue, CQ_name);
1084
lList *qinstance_list = lGetList(cqueue, CQ_qinstances);
1085
lListElem *qinstance;
1087
if (cqueue_match_static(cqname, &a) != DISPATCH_OK) {
1091
for_each(qinstance, qinstance_list) {
1092
const char *cal_name;
1094
/* skip orphaned queues */
1095
if (qinstance_state_is_orphaned(qinstance)) {
1099
/* we only have to consider requested queues */
1100
if (ar_queue_request != NULL) {
1101
if (qref_list_cq_rejected(ar_queue_request, cqname,
1102
lGetHost(qinstance, QU_qhostname), master_hgroup_list)) {
1107
/* we only have to consider queues containing the requested pe */
1108
if (ar_pe_request != NULL) {
1112
for_each(pe_ref, lGetList(qinstance, QU_pe_list)) {
1113
if (pe_name_is_matching(lGetString(pe_ref, ST_name), ar_pe_request)) {
1124
/* sort out queue that are calendar disabled in requested time frame */
1125
if ((cal_name = lGetString(qinstance, QU_calendar)) != NULL) {
1126
lListElem *cal_ep = calendar_list_locate(master_cal_list, cal_name);
1128
if (!calendar_open_in_time_frame(cal_ep, lGetUlong(ar, AR_start_time), lGetUlong(ar, AR_duration))) {
1130
answer_list_add_sprintf(alpp, STATUS_OK, ANSWER_QUALITY_INFO, MSG_AR_QUEUEDISABLEDINTIMEFRAME,
1131
lGetString(qinstance, QU_full_name));
1135
/* sort out queues where not all users have access */
1136
if (lGetList(ar, AR_acl_list) != NULL) {
1137
if (!sge_ar_have_users_access(alpp, ar, lGetString(qinstance, QU_full_name),
1138
lGetList(qinstance, QU_acl),
1139
lGetList(qinstance, QU_xacl),
1140
master_userset_list)) {
1145
lAppendElem(a.queue_list, lCopyElem(qinstance));
1153
/* initialize all job lists */
1154
for (i = SPLIT_FIRST; i < SPLIT_LAST; i++) {
1155
splitted_job_lists[i] = NULL;
1157
splitted_job_lists[SPLIT_SUSPENDED] = &suspended_list;
1158
splitted_job_lists[SPLIT_RUNNING] = &running_list;
1160
/* splitted job lists must be freed */
1161
split_jobs(&master_job_list, NULL, a.queue_list,
1162
mconf_get_max_aj_instances(), splitted_job_lists, false, true);
1166
* prepare resource schedule
1168
prepare_resource_schedules(*(splitted_job_lists[SPLIT_RUNNING]),
1169
*(splitted_job_lists[SPLIT_SUSPENDED]),
1170
master_pe_list, a.host_list, a.queue_list,
1171
NULL, a.centry_list, a.acl_list,
1172
a.hgrp_list, NULL, false);
1174
/* free generated job lists */
1175
lFreeList(splitted_job_lists[SPLIT_RUNNING]);
1176
lFreeList(splitted_job_lists[SPLIT_SUSPENDED]);
1178
lSetUlong(dummy_job, JB_execution_time, lGetUlong(ar, AR_start_time));
1179
lSetUlong(dummy_job, JB_deadline, lGetUlong(ar, AR_end_time));
1180
lSetList(dummy_job, JB_hard_resource_list, lCopyList("", lGetList(ar, AR_resource_list)));
1181
lSetList(dummy_job, JB_hard_queue_list, lCopyList("", lGetList(ar, AR_queue_list)));
1182
lSetList(dummy_job, JB_master_hard_queue_list, lCopyList("", lGetList(ar, AR_master_queue_list)));
1183
lSetUlong(dummy_job, JB_type, lGetUlong(ar, AR_type));
1184
lSetString(dummy_job, JB_checkpoint_name, lGetString(ar, AR_checkpoint_name));
1186
/* imagine qs is empty */
1187
sconf_set_qs_state(QS_STATE_EMPTY);
1189
/* redirect scheduler monitoring into answer list */
1190
if (verify_mode == AR_JUST_VERIFY) {
1191
DPRINTF(("AR Verify Mode\n"));
1192
set_monitor_alpp(&talp);
1195
if (lGetString(ar, AR_pe)) {
1196
lSetString(dummy_job, JB_pe, lGetString(ar, AR_pe));
1197
lSetList(dummy_job, JB_pe_range, lCopyList("", lGetList(ar, AR_pe_range)));
1199
result = sge_select_parallel_environment(&a, master_pe_list);
1200
if (result == DISPATCH_OK) {
1201
lSetString(ar, AR_granted_pe, lGetString(a.pe, PE_name));
1204
result = sge_sequential_assignment(&a);
1207
/* stop redirection of scheduler monitoring messages */
1208
if (verify_mode == AR_JUST_VERIFY) {
1209
/* copy error msgs from talp into alpp */
1210
answer_list_append_list(alpp, &talp);
1212
set_monitor_alpp(NULL);
1214
if (result == DISPATCH_OK) {
1216
answer_list_add_sprintf(alpp, STATUS_OK, ANSWER_QUALITY_INFO, MSG_JOB_VERIFYFOUNDQ);
1218
int ngranted = nslots_granted(a.gdil, NULL);
1219
answer_list_add_sprintf(alpp, STATUS_OK, ANSWER_QUALITY_INFO, MSG_JOB_VERIFYFOUNDSLOTS_I, ngranted);
1222
answer_list_add_sprintf(alpp, STATUS_ESEMANTIC, ANSWER_QUALITY_INFO, MSG_JOB_NOSUITABLEQ_S, MSG_JOB_VERIFYVERIFY);
1224
/* ret has to be false in verify mode, otherwise the framework adds the object to the master list */
1227
if (result != DISPATCH_OK) {
1228
answer_list_add_sprintf(alpp, STATUS_ESEMANTIC, ANSWER_QUALITY_ERROR, MSG_JOB_NOSUITABLEQ_S, SGE_OBJ_AR);
1231
lSetList(ar, AR_granted_slots, a.gdil);
1232
ar_initialize_reserved_queue_list(ar);
1235
ar_do_reservation(ar, true);
1240
sconf_set_qs_state(QS_STATE_FULL);
1242
lFreeList(&(a.queue_list));
1243
lFreeList(&master_pe_list);
1244
lFreeList(&master_exechost_list);
1245
lFreeElem(&dummy_job);
1247
assignment_release(&a);
1252
/****** sge_advance_reservation_qmaster/ar_do_reservation() ********************
1254
* ar_do_reservation() -- do the reservation in the selected queue instances
1257
* int ar_do_reservation(lListElem *ar, bool incslots)
1260
* This function does the (un)reserveration in the selected parallel environment
1261
* and the selected queue instances
1264
* lListElem *ar - ar object (AR_Type)
1265
* bool incslots - increase or decrease usage
1271
* MT-NOTE: ar_do_reservation() is not MT safe
1274
* sge_resource_utilization/rqs_add_job_utilization()
1275
*******************************************************************************/
1276
int ar_do_reservation(lListElem *ar, bool incslots)
1278
lListElem *dummy_job = lCreateElem(JB_Type);
1280
lListElem *global_host_ep = NULL;
1283
const char *granted_pe = lGetString(ar, AR_granted_pe);
1284
u_long32 start_time = lGetUlong(ar, AR_start_time);
1285
u_long32 duration = lGetUlong(ar, AR_duration);
1286
object_description *object_base = object_type_get_object_description();
1287
lList *master_cqueue_list = *object_base[SGE_TYPE_CQUEUE].list;
1288
lList *master_centry_list = *object_base[SGE_TYPE_CENTRY].list;
1289
lList *master_exechost_list = *object_base[SGE_TYPE_EXECHOST].list;
1290
lList *master_pe_list = *object_base[SGE_TYPE_PE].list;
1292
DENTER(TOP_LAYER, "ar_do_reservation");
1294
lSetList(dummy_job, JB_hard_resource_list, lCopyList("", lGetList(ar, AR_resource_list)));
1295
lSetList(dummy_job, JB_hard_queue_list, lCopyList("", lGetList(ar, AR_queue_list)));
1297
global_host_ep = host_list_locate(master_exechost_list, SGE_GLOBAL_NAME);
1299
for_each(qep, lGetList(ar, AR_granted_slots)) {
1300
lListElem *host_ep = NULL;
1301
const char *queue_hostname = NULL;
1302
const char *queue_name = lGetString(qep, JG_qname);
1303
lListElem *queue = cqueue_list_locate_qinstance(master_cqueue_list, queue_name);
1309
queue_hostname = lGetHost(queue, QU_qhostname);
1312
tmp_slots = -lGetUlong(qep, JG_slots);
1314
tmp_slots = lGetUlong(qep, JG_slots);
1317
pe_slots += tmp_slots;
1319
/* reserve global host */
1320
if (rc_add_job_utilization(dummy_job, 0, SCHEDULING_RECORD_ENTRY_TYPE_RESERVING,
1321
global_host_ep, master_centry_list, tmp_slots,
1322
EH_consumable_config_list, EH_resource_utilization,
1323
SGE_GLOBAL_NAME, start_time, duration, GLOBAL_TAG,
1325
/* this info is not spooled */
1326
sge_add_event(0, sgeE_EXECHOST_MOD, 0, 0,
1327
SGE_GLOBAL_NAME, NULL, NULL, global_host_ep);
1328
lListElem_clear_changed_info(global_host_ep);
1331
/* reserve exec host */
1332
host_ep = host_list_locate(master_exechost_list, queue_hostname);
1333
if (rc_add_job_utilization(dummy_job, 0, SCHEDULING_RECORD_ENTRY_TYPE_RESERVING,
1334
host_ep, master_centry_list, tmp_slots, EH_consumable_config_list,
1335
EH_resource_utilization, queue_hostname, start_time,
1336
duration, HOST_TAG, false) != 0) {
1337
/* this info is not spooled */
1338
sge_add_event(0, sgeE_EXECHOST_MOD, 0, 0,
1339
queue_hostname, NULL, NULL, host_ep);
1340
lListElem_clear_changed_info(host_ep);
1343
/* reserve queue instance */
1344
rc_add_job_utilization(dummy_job, 0, SCHEDULING_RECORD_ENTRY_TYPE_RESERVING,
1345
queue, master_centry_list, tmp_slots, QU_consumable_config_list,
1346
QU_resource_utilization, queue_name, start_time, duration,
1349
qinstance_increase_qversion(queue);
1350
/* this info is not spooled */
1351
qinstance_add_event(queue, sgeE_QINSTANCE_MOD);
1354
if (granted_pe != NULL) {
1355
lListElem *pe = pe_list_locate(master_pe_list, granted_pe);
1358
ERROR((SGE_EVENT, MSG_OBJ_UNABLE2FINDPE_S, granted_pe));
1360
utilization_add(lFirst(lGetList(pe, PE_resource_utilization)), start_time,
1361
duration, pe_slots, 0, 0, PE_TAG, granted_pe,
1362
SCHEDULING_RECORD_ENTRY_TYPE_RESERVING, false);
1363
sge_add_event(0, sgeE_PE_MOD, 0, 0, granted_pe, NULL, NULL, pe);
1364
lListElem_clear_changed_info(pe);
1368
lFreeElem(&dummy_job);
1373
/****** libs/sgeobj/ar_list_has_reservation_due_to_ckpt() **********************
1375
* ar_list_has_reservation_due_to_ckpt() -- does ckpt change breake an ar
1378
* bool ar_list_has_reservation_due_to_ckpt(lList *ar_master_list,
1379
* lList **answer_list,
1380
* const char *qinstance_name,
1381
* lList *ckpt_string_list)
1384
* This function tests if a modification of a ckpt list in a qinstance is
1385
* allowed according to the advance reservations.
1387
* Input paramters are: the advance reservation master list, the name of the
1388
* qinstance which sould be modified and the ST_Type string list of ckpt
1389
* names which represents the new setting for the qinstance.
1391
* If there is no reservation for this qinstance-ckpt combination or if
1392
* the reservation would be still valid after the modification then
1393
* the function returnes 'false". Otherwise 'true'
1396
* lList *ar_master_list - advance reservation master list
1397
* lList **answer_list - answer list which will contain the reason why a
1398
* modification is not valid
1399
* const char *qinstance_name - name of a qinstance <cqname@hostname>
1400
* lList *ckpt_string_list - ST_Type list containing ckpt names
1404
* true - modification would breake at least one ar
1405
* false - no ar will be broken if the ckpt list is modified
1408
* MT-NOTE: ar_get_string_from_event() is MT safe
1409
*******************************************************************************/
1411
ar_list_has_reservation_due_to_ckpt(lList *ar_master_list, lList **answer_list,
1412
const char *qinstance_name, lList *ckpt_string_list)
1416
DENTER(TOP_LAYER, "ar_has_reservation_due_to_ckpt");
1418
for_each(ar, ar_master_list) {
1419
const char *ckpt_string = lGetString(ar, AR_checkpoint_name);
1421
if (ckpt_string != NULL && lGetElemStr(lGetList(ar, AR_granted_slots), JG_qname, qinstance_name)) {
1422
if (lGetElemStr(ckpt_string_list, ST_name, ckpt_string) == NULL) {
1423
ERROR((SGE_EVENT, MSG_PARSE_MOD_REJECTED_DUE_TO_AR_SSU, ckpt_string,
1424
SGE_ATTR_CKPT_LIST, sge_u32c(lGetUlong(ar, AR_id))));
1425
answer_list_add(answer_list, SGE_EVENT,
1426
STATUS_ESYNTAX, ANSWER_QUALITY_ERROR);
1434
/****** libs/sgeobj/ar_list_has_reservation_due_to_pe() **********************
1436
* ar_list_has_reservation_due_to_pe() -- does pe change breake an ar
1439
* bool ar_list_has_reservation_due_to_pe(lList *ar_master_list,
1440
* lList **answer_list,
1441
* const char *qinstance_name,
1442
* lList *pe_string_list)
1445
* This function tests if a modification of a pe list in a qinstance is
1446
* allowed according to the advance reservations.
1448
* Input paramters are: the advance reservation master list, the name of the
1449
* qinstance which should be modified and the ST_Type string list of pe
1450
* names which represents the new setting for the qinstance.
1452
* If there is no reservation for this qinstance-ckpt combination or if
1453
* the reservation would be still valid after the modification then
1454
* the function returnes 'false". Otherwise 'true'
1457
* lList *ar_master_list - advance reservation master list
1458
* lList **answer_list - answer list which will contain the reason why a
1459
* modification is not valid
1460
* const char *qinstance_name - name of a qinstance <cqname@hostname>
1461
* lList *pe_string_list - ST_Type list containing pe names
1465
* true - modification would breake at least one ar
1466
* false - no ar will be broken if the ckpt list is modified
1469
* MT-NOTE: ar_get_string_from_event() is MT safe
1470
*******************************************************************************/
1472
ar_list_has_reservation_due_to_pe(lList *ar_master_list, lList **answer_list,
1473
const char *qinstance_name, lList *pe_string_list)
1477
DENTER(TOP_LAYER, "ar_list_has_reservation_due_to_pe");
1479
for_each(ar, ar_master_list) {
1480
const char *pe_string = lGetString(ar, AR_pe);
1482
if (pe_string != NULL && lGetElemStr(lGetList(ar, AR_granted_slots), JG_qname, qinstance_name)) {
1483
if (lGetElemStr(pe_string_list, ST_name, pe_string) == NULL) {
1484
ERROR((SGE_EVENT, MSG_PARSE_MOD_REJECTED_DUE_TO_AR_SSU, pe_string,
1485
SGE_ATTR_PE_LIST, sge_u32c(lGetUlong(ar, AR_id))));
1486
answer_list_add(answer_list, SGE_EVENT,
1487
STATUS_ESYNTAX, ANSWER_QUALITY_ERROR);
1495
/****** sgeobj/ar_list_has_reservation_for_pe_with_slots() ********************
1497
* ar_list_has_reservation_for_pe_with_slots() -- Does PE change violate AR
1501
* ar_list_has_reservation_for_pe_with_slots(lList *ar_master_list,
1502
* lList **answer_list,
1503
* const char *pe_name,
1504
* u_long32 new_slots)
1507
* This function tests if a modification of slots entry in a pe is
1508
* allowed according to the advance reservations.
1510
* Input paramters are: the advance reservation master list, the name of the
1511
* pe which should be modified and the new slots value which should
1512
* be set in the pe which might vialote the advance reservations in
1515
* If there is no reservation for this pe or if the new slots setting
1516
* does not violate the advance reservations in the system then this
1517
* function returns 'false'. Otherwise 'true'
1520
* lList *ar_master_list - master advance reservation list
1521
* lList **answer_list - answer list
1522
* const char *pe_name - pe name
1523
* u_long32 new_slots - new slots setting for pe with 'pe_name'
1527
* true - modification would break the ar's currently known
1528
* false - modification is valid
1531
* MT-NOTE: ar_list_has_reservation_for_pe_with_slots() is MT safe
1532
*******************************************************************************/
1534
ar_list_has_reservation_for_pe_with_slots(lList *ar_master_list,
1535
lList **answer_list,
1536
const char *pe_name,
1542
u_long32 max_res_slots = 0;
1544
DENTER(TOP_LAYER, "ar_list_has_reservation_for_pe_with_slots");
1546
for_each(ar, ar_master_list) {
1547
const char *pe_string = lGetString(ar, AR_pe);
1549
if (pe_name != NULL && pe_string != NULL && strcmp(pe_string, pe_name) == 0) {
1550
for_each(gs, lGetList(ar, AR_granted_slots)) {
1551
u_long32 slots = lGetUlong(gs, JG_slots);
1553
max_res_slots += slots;
1557
if (max_res_slots > new_slots) {
1558
ERROR((SGE_EVENT, MSG_PARSE_MOD_REJECTED_DUE_TO_AR_PE_SLOTS_U,
1559
sge_u32c(max_res_slots)));
1560
answer_list_add(answer_list, SGE_EVENT,
1561
STATUS_ESYNTAX, ANSWER_QUALITY_ERROR);
1567
/****** sge_advance_reservation_qmaster/ar_initialize_reserved_queue_list() ******
1569
* ar_initialize_reserved_queue_list() -- initialize reserved queue structure
1572
* void ar_initialize_reserved_queue_list(lListElem *ar)
1575
* The function creates the list AR_reserved_queues that stores the necessary
1576
* data to debit jobs in a AR. The Elements in the queue are a reduced
1577
* element of type QI_Type
1580
* lListElem *ar - advance reservation that should be initialized
1583
* MT-NOTE: ar_initialize_reserved_queue_list() is not MT safe
1584
*******************************************************************************/
1585
void ar_initialize_reserved_queue_list(lListElem *ar)
1588
lList *gdil = lGetList(ar, AR_granted_slots);
1589
object_description *object_base = object_type_get_object_description();
1590
lList *master_centry_list = *object_base[SGE_TYPE_CENTRY].list;
1591
lList *master_cqueue_list = *object_base[SGE_TYPE_CQUEUE].list;
1592
dstring buffer = DSTRING_INIT;
1594
static int queue_field[] = { QU_qhostname,
1598
QU_consumable_config_list,
1599
QU_resource_utilization,
1619
static char *value = "INFINITY";
1620
static int attr[] = {
1621
QU_s_cpu, QU_h_cpu, QU_s_fsize, QU_h_fsize, QU_s_data,
1622
QU_h_data, QU_s_stack, QU_h_stack, QU_s_core, QU_h_core,
1623
QU_s_rss, QU_h_rss, QU_s_vmem, QU_h_vmem, NoName
1630
DENTER(TOP_LAYER, "ar_initialize_reserved_queue_list");
1632
what = lIntVector2What(QU_Type, queue_field);
1633
lReduceDescr(&rdp, QU_Type, what);
1636
queue_list = lCreateList("", rdp);
1638
for_each(gep, gdil) {
1640
u_long32 slots = lGetUlong(gep, JG_slots);
1641
lListElem *queue = lCreateElem(rdp);
1646
const char *queue_name = lGetString(gep, JG_qname);
1647
char *cqueue_name = cqueue_get_name_from_qinstance(queue_name);
1649
lSetHost(queue, QU_qhostname, lGetHost(gep, JG_qhostname));
1650
lSetString(queue, QU_full_name, queue_name);
1651
lSetString(queue, QU_qname, cqueue_name);
1653
double_print_time_to_dstring(lGetUlong(ar, AR_duration), &buffer);
1654
lSetString(queue, QU_h_rt, sge_dstring_get_string(&buffer));
1655
lSetString(queue, QU_s_rt, sge_dstring_get_string(&buffer));
1660
while (attr[index] != NoName) {
1661
lSetString(queue, attr[index], value);
1665
lSetUlong(queue, QU_job_slots, slots);
1667
for_each(cr, lGetList(ar, AR_resource_list)) {
1668
if (lGetBool(cr, CE_consumable)) {
1669
double newval = lGetDouble(cr, CE_doubleval) * slots;
1671
sge_dstring_sprintf(&buffer, "%f", newval);
1672
new_cr = lCopyElem(cr);
1673
lSetString(new_cr, CE_stringval, sge_dstring_get_string(&buffer));
1674
lSetDouble(new_cr, CE_doubleval, newval);
1677
crl = lCreateList("", CE_Type);
1679
lAppendElem(crl, new_cr);
1683
lSetList(queue, QU_consumable_config_list, crl);
1684
lAppendElem(queue_list, queue);
1686
/* ensure availability of implicit slot request */
1687
qinstance_set_conf_slots_used(queue);
1689
/* initialize QU_resource_utilization */
1690
qinstance_debit_consumable(queue, NULL, master_centry_list, 0);
1692
/* initialize QU_state */
1694
lListElem *master_cqueue;
1695
lListElem *master_queue;
1697
master_cqueue = lGetElemStr(master_cqueue_list, CQ_name, cqueue_name);
1698
if (master_cqueue != NULL) {
1699
if ((master_queue = lGetSubStr(master_cqueue, QU_full_name,
1700
queue_name, CQ_qinstances)) != NULL){
1701
if (qinstance_state_is_ambiguous(master_queue)) {
1702
sge_dstring_sprintf(&buffer, "reserved queue %s is %s", queue_name,
1703
qinstance_state_as_string(QI_AMBIGUOUS));
1704
qinstance_set_error(queue, QI_AMBIGUOUS, sge_dstring_get_string(&buffer), true);
1706
if (qinstance_state_is_alarm(master_queue)) {
1707
sge_dstring_sprintf(&buffer, "reserved queue %s is %s", queue_name,
1708
qinstance_state_as_string(QI_ALARM));
1709
qinstance_set_error(queue, QI_ALARM, sge_dstring_get_string(&buffer), true);
1711
if (qinstance_state_is_suspend_alarm(master_queue)) {
1712
sge_dstring_sprintf(&buffer, "reserved queue %s is %s", queue_name,
1713
qinstance_state_as_string(QI_SUSPEND_ALARM));
1714
qinstance_set_error(queue, QI_SUSPEND_ALARM, sge_dstring_get_string(&buffer), true);
1716
if (qinstance_state_is_manual_disabled(master_queue)) {
1717
sge_dstring_sprintf(&buffer, "reserved queue %s is %s", queue_name,
1718
qinstance_state_as_string(QI_DISABLED));
1719
qinstance_set_error(queue, QI_DISABLED, sge_dstring_get_string(&buffer), true);
1721
if (qinstance_state_is_unknown(master_queue)) {
1722
sge_dstring_sprintf(&buffer, "reserved queue %s is %s", queue_name,
1723
qinstance_state_as_string(QI_UNKNOWN));
1724
qinstance_set_error(queue,QI_UNKNOWN, sge_dstring_get_string(&buffer), true);
1726
if (qinstance_state_is_error(master_queue)) {
1727
sge_dstring_sprintf(&buffer, "reserved queue %s is %s", queue_name,
1728
qinstance_state_as_string(QI_ERROR));
1729
qinstance_set_error(queue, QI_ERROR, sge_dstring_get_string(&buffer), true);
1737
lSetList(ar, AR_reserved_queues, queue_list);
1740
sge_dstring_free(&buffer);
1745
/****** sge_advance_reservation_qmaster/sge_ar_remove_all_jobs() ***************
1747
* sge_ar_remove_all_jobs() -- removes all jobs of an AR
1750
* void sge_ar_remove_all_jobs(sge_gdi_ctx_class_t *ctx, u_long32
1751
* ar_id, monitoring_t *monitor)
1754
* The function deletes all jobs (and tasks) requested the advance
1758
* sge_gdi_ctx_class_t *ctx - context handler
1759
* u_long32 ar_id - advance reservation id
1760
* monitoring_t *monitor - monitoring structure
1763
* MT-NOTE: sge_ar_remove_all_jobs() is not MT safe
1764
*******************************************************************************/
1765
bool sge_ar_remove_all_jobs(sge_gdi_ctx_class_t *ctx, u_long32 ar_id, int forced, monitoring_t *monitor)
1767
lListElem *nextjep, *jep;
1768
lListElem *tmp_task;
1771
DENTER(TOP_LAYER, "sge_ar_remove_all_jobs");
1773
nextjep = lFirst(*(object_type_get_master_list(SGE_TYPE_JOB)));
1774
while ((jep=nextjep)) {
1775
u_long32 task_number;
1776
u_long32 start = MIN(job_get_smallest_unenrolled_task_id(jep),
1777
job_get_smallest_enrolled_task_id(jep));
1778
u_long32 end = MAX(job_get_biggest_unenrolled_task_id(jep),
1779
job_get_biggest_enrolled_task_id(jep));
1781
nextjep = lNext(jep);
1783
if (lGetUlong(jep, JB_ar) != ar_id) {
1787
DPRINTF(("removing job %d\n", lGetUlong(jep, JB_job_number)));
1788
DPRINTF((" ----> task_start = %d, task_end = %d\n", start, end));
1790
for (task_number = start;
1794
if (job_is_ja_task_defined(jep, task_number)) {
1796
if (job_is_enrolled(jep, task_number)) {
1797
/* delete all enrolled pending tasks */
1798
DPRINTF(("removing enrolled task %d.%d\n", lGetUlong(jep, JB_job_number), task_number));
1799
tmp_task = lGetSubUlong(jep, JAT_task_number, task_number, JB_ja_tasks);
1802
* if task is already in status deleted and was signaled
1803
* only recently and deletion is not forced, do nothing
1805
if (ISSET(lGetUlong(tmp_task, JAT_status), JFINISHED)) {
1810
sge_commit_job(ctx, jep, tmp_task, NULL, COMMIT_ST_FINISHED_FAILED_EE,
1811
COMMIT_DEFAULT | COMMIT_NEVER_RAN, monitor);
1813
if (!ISSET(lGetUlong(tmp_task, JAT_state), JDELETED)) {
1814
job_mark_job_as_deleted(ctx, jep, tmp_task);
1819
/* delete all unenrolled running tasks */
1820
DPRINTF(("removing unenrolled task %d.%d\n", lGetUlong(jep, JB_job_number), task_number));
1821
tmp_task = job_get_ja_task_template_pending(jep, task_number);
1823
sge_commit_job(ctx, jep, tmp_task, NULL, COMMIT_ST_FINISHED_FAILED,
1824
COMMIT_NO_SPOOLING | COMMIT_UNENROLLED_TASK | COMMIT_NEVER_RAN,
1834
/****** sge_advance_reservation_qmaster/sge_ar_list_conflicts_with_calendar() ******
1836
* sge_ar_list_conflicts_with_calendar() -- checks if the given calendar
1837
* conflicts with AR open time frame
1840
* bool sge_ar_list_conflicts_with_calendar(lList **answer_list, const char
1841
* *qinstance_name, lListElem *cal_ep, lList *master_ar_list)
1844
* Iteraters over all existing Advance Reservations reserved queues and verifies
1845
* that the new calender does not invalidate the AR if the queue was reserved
1848
* lList **answer_list - answer list
1849
* const char *qinstance_name - qinstance name the calendar was configured
1850
* lListElem *cal_ep - the calendar object (CAL_Type)
1851
* lList *master_ar_list - master AR list
1854
* bool - true if conflicts
1858
* MT-NOTE: sge_ar_list_conflicts_with_calendar() is MT safe
1859
*******************************************************************************/
1861
sge_ar_list_conflicts_with_calendar(lList **answer_list, const char *qinstance_name,
1862
lListElem *cal_ep, lList *master_ar_list)
1866
DENTER(TOP_LAYER, "ar_list_conflicts_with_calendar");
1868
for_each(ar, master_ar_list) {
1869
if (lGetElemStr(lGetList(ar, AR_granted_slots), JG_qname, qinstance_name)) {
1870
u_long32 start_time = lGetUlong(ar, AR_start_time);
1871
u_long32 duration = lGetUlong(ar, AR_duration);
1873
if (!calendar_open_in_time_frame(cal_ep, start_time, duration)) {
1874
ERROR((SGE_EVENT, MSG_PARSE_MOD2_REJECTED_DUE_TO_AR_SSU, lGetString(cal_ep, CAL_name),
1875
SGE_ATTR_CALENDAR, sge_u32c(lGetUlong(ar, AR_id))));
1876
answer_list_add(answer_list, SGE_EVENT,
1877
STATUS_ESYNTAX, ANSWER_QUALITY_ERROR);
1885
/****** sge_advance_reservation_qmaster/sge_ar_state_set_running() *************
1887
* sge_ar_state_set_running() -- set ar in running state
1890
* void sge_ar_state_set_running(lListElem *ar)
1893
* Sets the AR state to running. A running state can result in error state
1894
* if one of the reserved queues is unable to run a job. This is covered by the
1898
* lListElem *ar - advance reservation object (AR_Type)
1901
* MT-NOTE: sge_ar_state_set_running() is MT safe
1904
* sge_advance_reservation_qmaster/sge_ar_state_set_exited()
1905
* sge_advance_reservation_qmaster/sge_ar_state_set_deleted()
1906
* sge_advance_reservation_qmaster/sge_ar_state_set_waiting()
1907
*******************************************************************************/
1908
void sge_ar_state_set_running(lListElem *ar)
1910
u_long32 old_state = lGetUlong(ar, AR_state);
1912
if (old_state == AR_DELETED || old_state == AR_EXITED) {
1916
if (sge_ar_has_errors(ar)) {
1917
lSetUlong(ar, AR_state, AR_ERROR);
1918
if (old_state != AR_WARNING && old_state != lGetUlong(ar, AR_state)) {
1919
/* state change from "running" to "error" */
1920
reporting_create_ar_log_record(NULL, ar, ARL_UNSATISFIED, "AR resources unsatisfied", sge_get_gmt());
1921
sge_ar_send_mail(ar, MAIL_AT_ABORT);
1922
} else if (old_state != lGetUlong(ar, AR_state)) {
1923
/* state change from "warning" to "error" */
1924
sge_ar_send_mail(ar, MAIL_AT_ABORT);
1927
lSetUlong(ar, AR_state, AR_RUNNING);
1928
if (old_state != AR_WAITING && old_state != lGetUlong(ar, AR_state)) {
1929
/* state change from "error" to "running" */
1930
reporting_create_ar_log_record(NULL, ar, ARL_OK, "AR resources satisfied", sge_get_gmt());
1931
sge_ar_send_mail(ar, MAIL_AT_ABORT);
1936
/****** sge_advance_reservation_qmaster/sge_ar_state_set_waiting() *************
1938
* sge_ar_state_set_waiting() -- set ar in running state
1941
* void sge_ar_state_set_waiting(lListElem *ar)
1944
* Sets the AR state to waiting. A waiting state can result in warning state
1945
* if one of the reserved queues is unable to run a job. This is covered by the
1949
* lListElem *ar - advance reservation object (AR_Type)
1952
* MT-NOTE: sge_ar_state_set_waiting() is MT safe
1955
* sge_advance_reservation_qmaster/sge_ar_state_set_exited()
1956
* sge_advance_reservation_qmaster/sge_ar_state_set_deleted()
1957
* sge_advance_reservation_qmaster/sge_ar_state_set_running()
1958
*******************************************************************************/
1959
void sge_ar_state_set_waiting(lListElem *ar)
1961
u_long32 old_state = lGetUlong(ar, AR_state);
1963
if (old_state == AR_DELETED || old_state == AR_EXITED) {
1967
if (sge_ar_has_errors(ar)) {
1968
lSetUlong(ar, AR_state, AR_WARNING);
1969
if (old_state != lGetUlong(ar, AR_state)) {
1970
reporting_create_ar_log_record(NULL, ar, ARL_UNSATISFIED, "AR resources unsatisfied", sge_get_gmt());
1973
lSetUlong(ar, AR_state, AR_WAITING);
1974
if (old_state != lGetUlong(ar, AR_state)) {
1975
reporting_create_ar_log_record(NULL, ar, ARL_OK, "AR resources satisfied", sge_get_gmt());
1980
/****** sge_advance_reservation_qmaster/sge_ar_state_set_deleted() *************
1982
* sge_ar_state_set_deleted() -- sets AR into deleted state
1985
* void sge_ar_state_set_deleted(lListElem *ar)
1988
* Sets the AR state to deleted
1991
* lListElem *ar - advance reservation object (AR_Type)
1994
* MT-NOTE: sge_ar_state_set_deleted() is MT safe
1997
* sge_advance_reservation_qmaster/sge_ar_state_set_exited()
1998
* sge_advance_reservation_qmaster/sge_ar_state_set_waiting()
1999
* sge_advance_reservation_qmaster/sge_ar_state_set_running()
2000
*******************************************************************************/
2001
void sge_ar_state_set_deleted(lListElem *ar) {
2002
lSetUlong(ar, AR_state, AR_DELETED);
2005
/****** sge_advance_reservation_qmaster/sge_ar_state_set_exited() **************
2007
* sge_ar_state_set_exited() -- sets AR into exited state
2010
* void sge_ar_state_set_exited(lListElem *ar)
2013
* Sets the AR state to deleted
2016
* lListElem *ar - advance reservation object (AR_Type)
2019
* MT-NOTE: sge_ar_state_set_exited() is MT safe
2022
* sge_advance_reservation_qmaster/sge_ar_state_set_deleted()
2023
* sge_advance_reservation_qmaster/sge_ar_state_set_waiting()
2024
* sge_advance_reservation_qmaster/sge_ar_state_set_running()
2025
*******************************************************************************/
2026
void sge_ar_state_set_exited(lListElem *ar) {
2027
lSetUlong(ar, AR_state, AR_EXITED);
2030
/****** sge_advance_reservation_qmaster/sge_ar_list_set_error_state() **********
2032
* sge_ar_list_set_error_state() -- Set/unset all ARs reserved in a specific queue
2036
* void sge_ar_list_set_error_state(lList *ar_list, const char *qname,
2037
* u_long32 error_type, bool send_events, bool set_error)
2040
* The function sets/unsets all ARs that reserved in a queue in the error state and
2041
* generates the error messages for qrstat -explain
2045
* lList *ar_list - master advance reservation list
2046
* const char *qname - queue name
2047
* u_long32 error_type - error type
2048
* bool send_events - send events?
2049
* bool set_error - set or unset
2052
* MT-NOTE: sge_ar_list_set_error_state() is MT safe
2053
*******************************************************************************/
2054
void sge_ar_list_set_error_state(lList *ar_list, const char *qname, u_long32 error_type,
2055
bool send_events, bool set_error)
2058
bool start_time_reached;
2059
dstring buffer = DSTRING_INIT;
2061
DENTER(TOP_LAYER, "sge_ar_list_set_error_state");
2063
for_each(ar, ar_list) {
2064
lListElem *qinstance;
2065
lList *granted_slots = lGetList(ar, AR_reserved_queues);
2068
if (lGetUlong(ar, AR_state) == AR_ERROR || lGetUlong(ar, AR_state) == AR_WARNING) {
2069
/* ignore, AR is already in error state */
2073
if (lGetUlong(ar, AR_state) == AR_RUNNING || lGetUlong(ar, AR_state) == AR_WAITING) {
2074
/* ignore, AR is already in non error state */
2079
if (lGetUlong(ar, AR_state) == AR_RUNNING || lGetUlong(ar, AR_state) == AR_ERROR) {
2080
start_time_reached= true;
2082
start_time_reached= false;
2085
if ((qinstance =lGetElemStr(granted_slots, QU_full_name, qname)) != NULL) {
2086
sge_dstring_sprintf(&buffer, MSG_AR_RESERVEDQUEUEHASERROR_SS, qname,
2087
qinstance_state_as_string(error_type));
2088
qinstance_set_error(qinstance, error_type, sge_dstring_get_string(&buffer), set_error);
2091
if (start_time_reached) {
2092
sge_ar_state_set_running(ar);
2094
sge_ar_state_set_waiting(ar);
2098
/* this info is not spooled */
2099
sge_dstring_sprintf(&buffer, sge_U32CFormat, lGetUlong(ar, AR_id));
2100
sge_add_event(0, sgeE_AR_MOD, 0, 0,
2101
sge_dstring_get_string(&buffer), NULL, NULL, ar);
2102
lListElem_clear_changed_info(ar);
2107
sge_dstring_free(&buffer);
2111
/****** sge_advance_reservation_qmaster/sge_ar_send_mail() *********************
2113
* sge_ar_send_mail() -- send mail for advance reservation state change
2116
* static void sge_ar_send_mail(lListElem *ar, int type)
2119
* Create and send mail for a specific event
2122
* lListElem *ar - advance reservation object (AR_Type)
2123
* int type - event type
2126
* MT-NOTE: sge_ar_send_mail() is MT safe
2127
*******************************************************************************/
2128
static void sge_ar_send_mail(lListElem *ar, int type)
2130
dstring buffer = DSTRING_INIT;
2131
dstring subject = DSTRING_INIT;
2132
dstring body = DSTRING_INIT;
2134
const char *ar_name;
2135
const char *mail_type = NULL;
2137
DENTER(TOP_LAYER, "sge_ar_send_mail");
2139
if (!VALID(type, lGetUlong(ar, AR_mail_options))) {
2140
sge_dstring_append_mailopt(&buffer, type);
2141
DPRINTF(("mailopt %s was not requested\n", sge_dstring_get_string(&buffer)));
2142
sge_dstring_free(&subject);
2143
sge_dstring_free(&body);
2144
sge_dstring_free(&buffer);
2148
ar_id = lGetUlong(ar, AR_id);
2149
ar_name = lGetString(ar, AR_name);
2152
case MAIL_AT_BEGINNING:
2153
sge_ctime((time_t)lGetUlong(ar, AR_start_time), &buffer);
2154
sge_dstring_sprintf(&subject, MSG_MAIL_ARSTARTEDSUBJ_US,
2155
sge_u32c(ar_id), ar_name?ar_name:"none");
2156
sge_dstring_sprintf(&body, MSG_MAIL_ARSTARTBODY_USSS,
2157
sge_u32c(ar_id), ar_name?ar_name:"none", lGetString(ar, AR_owner), sge_dstring_get_string(&buffer));
2158
mail_type = MSG_MAIL_TYPE_ARSTART;
2161
if (lGetUlong(ar, AR_state) == AR_DELETED) {
2162
sge_ctime((time_t)sge_get_gmt(), &buffer);
2163
sge_dstring_sprintf(&subject, MSG_MAIL_ARDELETEDSUBJ_US,
2164
sge_u32c(ar_id), ar_name?ar_name:"none");
2165
sge_dstring_sprintf(&body, MSG_MAIL_ARDELETETBODY_USSS,
2166
sge_u32c(ar_id), ar_name?ar_name:"none", lGetString(ar, AR_owner), sge_dstring_get_string(&buffer));
2167
mail_type = MSG_MAIL_TYPE_ARDELETE;
2169
sge_ctime((time_t)lGetUlong(ar, AR_end_time), &buffer);
2170
sge_dstring_sprintf(&subject, MSG_MAIL_AREXITEDSUBJ_US,
2171
sge_u32c(ar_id), ar_name?ar_name:"none");
2172
sge_dstring_sprintf(&body, MSG_MAIL_AREXITBODY_USSS,
2173
sge_u32c(ar_id), ar_name?ar_name:"none", lGetString(ar, AR_owner), sge_dstring_get_string(&buffer));
2174
mail_type = MSG_MAIL_TYPE_AREND;
2178
if (lGetUlong(ar, AR_state) == AR_ERROR) {
2179
sge_ctime((time_t)sge_get_gmt(), &buffer);
2180
sge_dstring_sprintf(&subject, MSG_MAIL_ARERRORSUBJ_US,
2181
sge_u32c(ar_id), ar_name?ar_name:"none");
2182
sge_dstring_sprintf(&body, MSG_MAIL_ARERRORBODY_USSS,
2183
sge_u32c(ar_id), ar_name?ar_name:"none", lGetString(ar, AR_owner), sge_dstring_get_string(&buffer));
2184
mail_type = MSG_MAIL_TYPE_ARERROR;
2186
sge_ctime((time_t)sge_get_gmt(), &buffer);
2187
sge_dstring_sprintf(&subject, MSG_MAIL_AROKSUBJ_US,
2188
sge_u32c(ar_id), ar_name?ar_name:"none");
2189
sge_dstring_sprintf(&body, MSG_MAIL_AROKBODY_USSS,
2190
sge_u32c(ar_id), ar_name?ar_name:"none", lGetString(ar, AR_owner), sge_dstring_get_string(&buffer));
2191
mail_type = MSG_MAIL_TYPE_AROK;
2195
/* should never happen */
2199
cull_mail(QMASTER, lGetList(ar, AR_mail_list), sge_dstring_get_string(&subject), sge_dstring_get_string(&body), mail_type);
2201
sge_dstring_free(&buffer);
2202
sge_dstring_free(&subject);
2203
sge_dstring_free(&body);
2208
/****** sge_advance_reservation_qmaster/ar_list_has_reservation_due_to_qinstance_complex_attr() ******
2210
* ar_list_has_reservation_due_to_qinstance_complex_attr() -- check
2211
* if change of complex values is valid concerning ar
2214
* bool ar_list_has_reservation_due_to_qinstance_complex_attr(
2215
* lList *ar_master_list,
2216
* lList **answer_list,
2217
* lListElem *qinstance,
2218
* lList *ce_master_list)
2221
* Check if the modification of the complex_values of a qinstance
2222
* whould break existing advance reservations
2225
* lList *ar_master_list - master AR list
2226
* lList **answer_list - answer list
2227
* lListElem *qinstance - qinstance
2228
* lList *ce_master_list - master centry list
2232
* true - modification is not allowed
2233
* false - modification is allowed
2236
* MT-NOTE: ar_list_has_reservation_due_to_qinstance_complex_attr() is
2238
*******************************************************************************/
2240
ar_list_has_reservation_due_to_qinstance_complex_attr(lList *ar_master_list,
2241
lList **answer_list,
2242
lListElem *qinstance,
2243
lList *ce_master_list)
2245
lListElem *ar = NULL;
2248
DENTER(TOP_LAYER, "ar_list_has_reservation_due_to_qinstance_complex_attr");
2250
for_each(ar, ar_master_list) {
2251
const char *qinstance_name = lGetString(qinstance, QU_full_name);
2253
if ((gs =lGetElemStr(lGetList(ar, AR_granted_slots), JG_qname, qinstance_name))) {
2255
lListElem *rue = NULL;
2256
lListElem *request = NULL;
2259
for_each(request, lGetList(ar, AR_resource_list)) {
2260
const char *ce_name = lGetString(request, CE_name);
2261
lListElem *ce = lGetElemStr(ce_master_list, CE_name, ce_name);
2262
bool is_consumable = (lGetBool(ce, CE_consumable) > 0) ? true : false;
2264
if (!is_consumable) {
2266
u_long32 slots = lGetUlong(gs, JG_slots);
2267
lListElem *current = lGetSubStr(qinstance, CE_name,
2268
ce_name, QU_consumable_config_list);
2269
if (current != NULL) {
2270
current = lCopyElem(current);
2271
lSetUlong(current, CE_relop, lGetUlong(ce, CE_relop));
2272
lSetDouble(current, CE_pj_doubleval, lGetDouble(current, CE_doubleval));
2273
lSetString(current, CE_pj_stringval, lGetString(current, CE_stringval));
2275
if (compare_complexes(slots, request, current, text, false, true) == 0) {
2276
ERROR((SGE_EVENT, MSG_QUEUE_MODCMPLXDENYDUETOAR_SS, ce_name,
2277
SGE_ATTR_COMPLEX_VALUES));
2278
answer_list_add(answer_list, SGE_EVENT,
2279
STATUS_ESYNTAX, ANSWER_QUALITY_ERROR);
2280
lFreeElem(¤t);
2283
lFreeElem(¤t);
2288
/* now it gets expensive. Before we can start the check at first we have to build the
2289
consumable config list. */
2290
qinstance_reinit_consumable_actual_list(qinstance, answer_list);
2291
rue_list = lGetList(qinstance, QU_resource_utilization);
2293
for_each(rue, rue_list) {
2294
const char *ce_name = lGetString(rue, RUE_name);
2295
lListElem *ce = lGetElemStr(ce_master_list, CE_name, ce_name);
2296
bool is_consumable = (lGetBool(ce, CE_consumable) > 0) ? true : false;
2298
if (is_consumable) {
2299
lListElem *rde = NULL;
2300
lList * rde_list = lGetList(rue, RUE_utilized);
2301
lListElem *cv = lGetSubStr(qinstance, CE_name, ce_name, QU_consumable_config_list);
2304
ERROR((SGE_EVENT, MSG_QUEUE_MODNOCMPLXDENYDUETOAR_SS,
2305
ce_name, SGE_ATTR_COMPLEX_VALUES));
2306
answer_list_add(answer_list, SGE_EVENT,
2307
STATUS_ESYNTAX, ANSWER_QUALITY_ERROR);
2310
double configured = lGetDouble(cv, CE_doubleval);
2312
for_each(rde, rde_list) {
2313
double amount = lGetDouble(rde, RDE_amount);
2315
if (amount > configured) {
2316
ERROR((SGE_EVENT, MSG_QUEUE_MODCMPLXDENYDUETOAR_SS, ce_name,
2317
SGE_ATTR_COMPLEX_VALUES));
2318
answer_list_add(answer_list, SGE_EVENT,
2319
STATUS_ESYNTAX, ANSWER_QUALITY_ERROR);
2331
/****** sge_advance_reservation_qmaster/ar_list_has_reservation_due_to_host_complex_attr() ******
2333
* ar_list_has_reservation_due_to_host_complex_attr() -- check
2334
* if change of complex values is valid concerning ar
2337
* bool ar_list_has_reservation_due_to_host_complex_attr(
2338
* lList *ar_master_list,
2339
* lList **answer_list,
2341
* lList *ce_master_list)
2344
* Check if the modification of the complex_values of a host
2345
* whould break existing advance reservations.
2348
* lList *ar_master_list - master AR list
2349
* lList **answer_list - AN_Type list
2350
* lListElem *host - host
2351
* lList *ce_master_list - master centry list
2355
* true - modification is not allowed
2356
* false - modification is allowed
2359
* MT-NOTE: ar_list_has_reservation_due_to_host_complex_attr() is MT
2361
*******************************************************************************/
2363
ar_list_has_reservation_due_to_host_complex_attr(lList *ar_master_list, lList **answer_list,
2364
lListElem *host, lList *ce_master_list)
2366
lListElem *ar = NULL;
2367
const char *hostname = lGetHost(host, EH_name);
2369
DENTER(TOP_LAYER, "ar_list_has_reservation_due_to_host_complex_attr");
2371
for_each(ar, ar_master_list) {
2372
lListElem *gs = NULL;
2374
for_each(gs, lGetList(ar, AR_granted_slots)) {
2375
const char *gh = lGetHost(gs, JG_qhostname);
2377
if (!sge_hostcmp(gh, hostname)) {
2378
lListElem *rue = NULL;
2379
lListElem *request = NULL;
2380
lList *rue_list = lGetList(host, EH_resource_utilization);
2382
for_each(request, lGetList(ar, AR_resource_list)) {
2383
const char *ce_name = lGetString(request, CE_name);
2384
lListElem *ce = lGetElemStr(ce_master_list, CE_name, ce_name);
2385
bool is_consumable = (lGetBool(ce, CE_consumable) > 0) ? true : false;
2387
if (!is_consumable) {
2389
u_long32 slots = lGetUlong(gs, JG_slots);
2390
lListElem *current = lGetSubStr(host, CE_name,
2391
ce_name, EH_consumable_config_list);
2392
if (current != NULL) {
2393
current = lCopyElem(current);
2394
lSetUlong(current, CE_relop, lGetUlong(ce, CE_relop));
2395
lSetDouble(current, CE_pj_doubleval, lGetDouble(current, CE_doubleval));
2396
lSetString(current, CE_pj_stringval, lGetString(current, CE_stringval));
2398
if (compare_complexes(slots, request, current, text, false, true) == 0) {
2399
ERROR((SGE_EVENT, MSG_QUEUE_MODCMPLXDENYDUETOAR_SS, ce_name,
2400
SGE_ATTR_COMPLEX_VALUES));
2401
answer_list_add(answer_list, SGE_EVENT,
2402
STATUS_ESYNTAX, ANSWER_QUALITY_ERROR);
2403
lFreeElem(¤t);
2406
lFreeElem(¤t);
2410
for_each(rue, rue_list) {
2411
const char *ce_name = lGetString(rue, RUE_name);
2412
lListElem *ce = lGetElemStr(ce_master_list, CE_name, ce_name);
2413
bool is_consumable = (lGetBool(ce, CE_consumable) > 0) ? true : false;
2415
if (is_consumable) {
2416
lListElem *rde = NULL;
2417
lList *rde_list = lGetList(rue, RUE_utilized);
2418
lListElem *cv = lGetSubStr(host, CE_name, ce_name, EH_consumable_config_list);
2421
ERROR((SGE_EVENT, MSG_QUEUE_MODNOCMPLXDENYDUETOAR_SS,
2422
ce_name, SGE_ATTR_COMPLEX_VALUES));
2423
answer_list_add(answer_list, SGE_EVENT,
2424
STATUS_ESYNTAX, ANSWER_QUALITY_ERROR);
2427
double configured = lGetDouble(cv, CE_doubleval);
2429
for_each(rde, rde_list) {
2430
double amount = lGetDouble(rde, RDE_amount);
2432
if (amount > configured) {
2433
ERROR((SGE_EVENT, MSG_QUEUE_MODCMPLXDENYDUETOAR_SS,
2434
ce_name, SGE_ATTR_COMPLEX_VALUES));
2435
answer_list_add(answer_list, SGE_EVENT,
2436
STATUS_ESYNTAX, ANSWER_QUALITY_ERROR);