1
/*___INFO__MARK_BEGIN__*/
2
/**************************************************************************
4
* The Contents of this file are made available subject to the terms of
5
* the Sun Industry Standards Source License Version 1.2
7
* Sun Microsystems Inc., March, 2001
9
* Sun Industry Standards Source License Version 1.2
10
* =================================================
11
* The contents of this file are subject to the Sun Industry Standards
12
* Source License Version 1.2 (the "License"); You may not use this file
13
* except in compliance with the License. You may obtain a copy of the
14
* License at http://gridengine.sunsource.net/Gridengine_SISSL_license.html
16
* Software provided under this License is provided on an "AS IS" basis,
17
* WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING,
18
* WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS,
19
* MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING.
20
* See the License for the specific provisions governing your rights and
21
* obligations concerning the Software.
23
* The Initial Developer of the Original Code is: Sun Microsystems, Inc.
25
* Copyright: 2001 by Sun Microsystems, Inc.
27
* All Rights Reserved.
29
************************************************************************/
30
/*___INFO__MARK_END__*/
38
#include "uti/sge_stdlib.h"
39
#include "uti/sge_stdio.h"
48
#include "sge_sched.h"
49
#include "sge_object.h"
50
#include "sge_feature.h"
51
#include "sge_manop.h"
53
#include "sge_ja_task.h"
56
#include "sge_job_qmaster.h"
57
#include "sge_cqueue_qmaster.h"
58
#include "sge_give_jobs.h"
59
#include "sge_pe_qmaster.h"
60
#include "sge_qmod_qmaster.h"
61
#include "sge_userset_qmaster.h"
62
#include "sge_ckpt_qmaster.h"
63
#include "job_report_qmaster.h"
64
#include "sge_parse_num_par.h"
65
#include "sge_event_master.h"
66
#include "sge_signal.h"
67
#include "sge_subordinate_qmaster.h"
68
#include "sge_advance_reservation.h"
69
#include "sge_userset.h"
70
#include "sge_userprj_qmaster.h"
72
#include "cull_parse_util.h"
73
#include "schedd_monitor.h"
74
#include "sge_messageL.h"
76
#include "sge_afsutil.h"
77
#include "sge_ulongL.h"
78
#include "setup_path.h"
79
#include "sge_string.h"
80
#include "sge_security.h"
81
#include "sge_range.h"
83
#include "sge_job_schedd.h"
84
#include "sge_qmaster_main.h"
85
#include "sge_suser.h"
87
#include "sge_hostname.h"
89
#include "sge_answer.h"
90
#include "sge_schedd_conf.h"
91
#include "sge_qinstance.h"
93
#include "sge_userprj.h"
94
#include "sge_centry.h"
95
#include "sge_cqueue.h"
97
#include "sge_utility.h"
99
#include "sge_mtutil.h"
100
#include "sge_task_depend.h"
101
#include "sgeobj/sge_pe_taskL.h"
102
#include "sgeobj/sge_pe_task.h"
104
#include "sge_persistence_qmaster.h"
105
#include "sge_reporting_qmaster.h"
106
#include "spool/sge_spooling.h"
107
#include "uti/sge_profiling.h"
108
#include "uti/sge_bootstrap.h"
109
#include "uti/sge_string.h"
111
#include "msg_common.h"
112
#include "msg_qmaster.h"
113
#include "msg_daemons_common.h"
116
/****** qmaster/job/spooling ***************************************************
119
* job spooling -- when are jobs/ja_tasks/pe_tasks spooled?
122
* Spooling of jobs is done when
123
* - a new job is added
124
* - a job is modified (qalter)
125
* - a ja_task has been created
126
* - the jobs ja_tasks are partly deleted (not all tasks)
127
* - a job leaves qmaster (all tasks finished)
129
* Spooling of ja_tasks is done when
130
* - a ja_task is created (as result of schedd start order)
131
* - a ja_task is sent to execd
132
* - a ja_task has been received (ack) by an execd
133
* - a ja_task is rescheduled
134
* - ja_task delivery to execd failed (reschedule)
135
* - the ja_task is marked as deleted
136
* - jobs are notified about exec host shutdown
137
* - for long running ja_tasks, the reported usage is spooled once a day
138
* - a ja_task is (un)suspended on threshold
139
* - a job is (un)suspended (qmod)
140
* - a job error state is cleared
142
* Spooling of pe_tasks is done when
143
* - a new pe_task has been reported from execd
144
* - for long running pe_tasks, the reported usage is spooled once a day
145
* - for finished pe_tasks, usage is summed up in a container pe_task.
146
* This container is spooled whenever usage is summed up.
147
* - a pe_task is deleted
149
*******************************************************************************/
154
pthread_mutex_t job_number_mutex;
157
job_number_t job_number_control = {0, false, PTHREAD_MUTEX_INITIALIZER};
159
static int mod_task_attributes(lListElem *job, lListElem *new_ja_task, lListElem *tep,
160
lList **alpp, char *ruser, char *rhost, int *trigger,
161
int is_array, int is_task_enrolled);
163
static int mod_job_attributes(lListElem *new_job, lListElem *jep, lList **alpp,
164
char *ruser, char *rhost, int *trigger);
166
static void set_context(lList *jbctx, lListElem *job);
168
static u_long32 guess_highest_job_number(void);
170
static int verify_suitable_queues(lList **alpp, lListElem *jep, int *trigger);
172
static int job_verify_predecessors(lListElem *job, lList **alpp);
174
static int job_verify_predecessors_ad(lListElem *job, lList **alpp);
176
static bool contains_dependency_cycles(const lListElem * new_job, u_long32 job_number,
179
static int verify_job_list_filter(lList **alpp, int all_users_flag, int all_jobs_flag,
180
int jid_flag, int user_list_flag, char *ruser);
182
static void empty_job_list_filter(lList **alpp, int was_modify, int user_list_flag,
183
lList *user_list, int jid_flag, const char *jobid,
184
int all_users_flag, int all_jobs_flag, char *ruser,
185
int is_array, u_long32 start, u_long32 end, u_long32 step);
187
static u_long32 sge_get_job_number(sge_gdi_ctx_class_t *ctx, monitoring_t *monitor);
189
static void get_rid_of_schedd_job_messages(u_long32 job_number);
191
static bool is_changes_consumables(lList **alpp, lList* new, lList* old);
193
static int deny_soft_consumables(lList **alpp, lList *srl, const lList *master_centry_list);
195
static void job_list_filter(lList *user_list, const char* jobid, lCondition **job_filter);
197
static int sge_delete_all_tasks_of_job(sge_gdi_ctx_class_t *ctx, lList **alpp, const char *ruser, const char *rhost, lListElem *job, u_long32 *r_start, u_long32 *r_end, u_long32 *step, lList* ja_structure, int *alltasks, u_long32 *deleted_tasks, u_long32 start_time, monitoring_t *monitor, int forced, bool *deletion_time_reached);
200
/* when this character is modified, it has also be modified
201
the JOB_NAME_DEL in clients/qalter/qalter.c
203
static const char JOB_NAME_DEL = ':';
205
/*-------------------------------------------------------------------------*/
206
/* sge_gdi_add_job */
207
/* called in sge_c_gdi_add */
210
/* jepp is set to NULL, if the job was sucessfully added */
212
/* MT-Note: it is thread safe. It is using the global lock to secure the */
213
/* none safe functions */
214
/*-------------------------------------------------------------------------*/
215
int sge_gdi_add_job(sge_gdi_ctx_class_t *ctx,
216
lListElem *jep, lList **alpp, lList **lpp, char *ruser,
217
char *rhost, uid_t uid, gid_t gid, char *group,
218
sge_gdi_packet_class_t *packet, sge_gdi_task_class_t *task, monitoring_t *monitor)
221
const char *pe_name = NULL;
222
const char *project = NULL;
223
const char *ckpt_name = NULL;
224
u_long32 ckpt_attr, ckpt_inter;
226
lListElem *ckpt_ep = NULL;
227
char str[1024 + 1]="";
233
lList *pe_range = NULL;
234
lList* user_lists = NULL;
235
lList* xuser_lists = NULL;
236
bool job_spooling = ctx->get_job_spooling(ctx);
237
const char *sge_root = ctx->get_sge_root(ctx);
239
DENTER(TOP_LAYER, "sge_gdi_add_job");
241
if ( !jep || !ruser || !rhost ) {
242
CRITICAL((SGE_EVENT, MSG_SGETEXT_NULLPTRPASSED_S, SGE_FUNC));
243
answer_list_add(alpp, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR);
244
DRETURN(STATUS_EUNKNOWN);
248
if (uid < mconf_get_min_uid()) {
249
ERROR((SGE_EVENT, MSG_JOB_UID2LOW_II, (int)uid, (int)mconf_get_min_uid()));
250
answer_list_add(alpp, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR);
251
DRETURN(STATUS_EUNKNOWN);
255
if (gid < mconf_get_min_gid()) {
256
ERROR((SGE_EVENT, MSG_JOB_GID2LOW_II, (int)gid, (int)mconf_get_min_gid()));
257
answer_list_add(alpp, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR);
258
DRETURN(STATUS_EUNKNOWN);
261
/* check for qsh without DISPLAY set */
262
if (JOB_TYPE_IS_QSH(lGetUlong(jep, JB_type))) {
263
int ret = job_check_qsh_display(jep, alpp, false);
264
if (ret != STATUS_OK) {
270
* fill in user and group
272
* this is not done by the submitter because we want to implement an
273
* gdi submit request it would be bad if you could say
274
* job->uid = 0 before submitting
276
lSetString(jep, JB_owner, ruser);
277
lSetUlong(jep, JB_uid, uid);
278
lSetString(jep, JB_group, group);
279
lSetUlong(jep, JB_gid, gid);
281
job_check_correct_id_sublists(jep, alpp);
282
if (answer_list_has_error(alpp)) {
283
DRETURN(STATUS_EUNKNOWN);
287
* resolve host names. If this is not possible an error is produced
291
if ((status = job_resolve_host_for_path_list(jep, alpp, JB_stdout_path_list)) != STATUS_OK) {
295
if ((status = job_resolve_host_for_path_list(jep, alpp, JB_stdin_path_list)) != STATUS_OK) {
299
if ((status = job_resolve_host_for_path_list(jep, alpp,JB_shell_list)) != STATUS_OK) {
303
if ((status = job_resolve_host_for_path_list(jep, alpp, JB_stderr_path_list)) != STATUS_OK) {
308
if ((!JOB_TYPE_IS_BINARY(lGetUlong(jep, JB_type)) &&
309
!lGetString(jep, JB_script_ptr) && lGetString(jep, JB_script_file))) {
310
ERROR((SGE_EVENT, MSG_JOB_NOSCRIPT));
311
answer_list_add(alpp, SGE_EVENT, STATUS_EUNKNOWN,
312
ANSWER_QUALITY_ERROR);
313
DRETURN(STATUS_EUNKNOWN);
316
/* command line -c switch has higher precedence than ckpt "when" */
317
ckpt_attr = lGetUlong(jep, JB_checkpoint_attr);
318
ckpt_inter = lGetUlong(jep, JB_checkpoint_interval);
319
ckpt_name = lGetString(jep, JB_checkpoint_name);
321
lSetUlong(jep, JB_submission_time, sge_get_gmt());
323
lSetList(jep, JB_ja_tasks, NULL);
324
lSetList(jep, JB_jid_successor_list, NULL);
325
lSetList(jep, JB_ja_ad_successor_list, NULL);
327
if (lGetList(jep, JB_ja_template) == NULL) {
328
lAddSubUlong(jep, JAT_task_number, 0, JB_ja_template, JAT_Type);
331
if (!lGetString(jep, JB_account)) {
332
lSetString(jep, JB_account, DEFAULT_ACCOUNT);
334
if (verify_str_key(alpp, lGetString(jep, JB_account), MAX_VERIFY_STRING,
335
"account string", QSUB_TABLE) != STATUS_OK) {
336
DRETURN(STATUS_EUNKNOWN);
340
if (object_verify_name(jep, alpp, JB_job_name, SGE_OBJ_JOB)) {
341
DRETURN(STATUS_EUNKNOWN);
345
* Is the max. size of array jobs exceeded?
348
u_long32 max_aj_tasks = mconf_get_max_aj_tasks();
349
if (max_aj_tasks > 0) {
350
lList *range_list = lGetList(jep, JB_ja_structure);
351
u_long32 submit_size = range_list_get_number_of_ids(range_list);
353
if (submit_size > max_aj_tasks) {
354
ERROR((SGE_EVENT, MSG_JOB_MORETASKSTHAN_U, sge_u32c(max_aj_tasks)));
355
answer_list_add(alpp, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR);
356
DRETURN(STATUS_EUNKNOWN);
361
{ /* JB_context contains a raw context list, which needs to be transformed into
362
a real context. For that, we have to take out the raw context and add it back
366
lXchgList(jep, JB_context, &temp);
367
set_context(temp, jep);
372
object_description *object_base = object_type_get_object_description();
374
/* get new job numbers until we find one that is not yet used */
376
job_number = sge_get_job_number(ctx, monitor);
377
} while (job_list_locate(*object_base[SGE_TYPE_JOB].list, job_number));
378
lSetUlong(jep, JB_job_number, job_number);
381
** with interactive jobs, JB_exec_file is not set
383
if (lGetString(jep, JB_script_file)) {
384
sprintf(str, "%s/%d", EXEC_DIR, (int)job_number);
385
lSetString(jep, JB_exec_file, str);
389
if (job_list_register_new_job(*object_base[SGE_TYPE_JOB].list, mconf_get_max_jobs(), 0)) {/*read*/
390
INFO((SGE_EVENT, MSG_JOB_ALLOWEDJOBSPERCLUSTER, sge_u32c(mconf_get_max_jobs())));
391
answer_list_add(alpp, SGE_EVENT, STATUS_NOTOK_DOAGAIN, ANSWER_QUALITY_ERROR);
392
DRETURN(STATUS_NOTOK_DOAGAIN);
395
if ((lGetUlong(jep, JB_verify_suitable_queues) != JUST_VERIFY)) {
396
if (suser_check_new_job(jep, mconf_get_max_u_jobs()) != 0) { /*mod*/
397
INFO((SGE_EVENT, MSG_JOB_ALLOWEDJOBSPERUSER_UU, sge_u32c(mconf_get_max_u_jobs()),
398
sge_u32c(suser_job_count(jep))));
399
answer_list_add(alpp, SGE_EVENT, STATUS_NOTOK_DOAGAIN, ANSWER_QUALITY_ERROR);
400
DRETURN(STATUS_NOTOK_DOAGAIN);
404
user_lists = mconf_get_user_lists();
405
xuser_lists = mconf_get_xuser_lists();
406
if (!sge_has_access_(ruser, lGetString(jep, JB_group), /* read */
407
user_lists, xuser_lists, *object_base[SGE_TYPE_USERSET].list)) {
408
ERROR((SGE_EVENT, MSG_JOB_NOPERMS_SS, ruser, rhost));
409
answer_list_add(alpp, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR);
410
lFreeList(&user_lists);
411
lFreeList(&xuser_lists);
412
DRETURN(STATUS_EUNKNOWN);
414
lFreeList(&user_lists);
415
lFreeList(&xuser_lists);
417
/* fill name and shortcut for all requests
418
* fill numeric values for all bool, time, memory and int type requests
419
* use the master_CEntry_list for all fills
420
* JB_hard/soft_resource_list points to a CE_Type list
423
lList *master_centry_list = *object_base[SGE_TYPE_CENTRY].list;
425
if (centry_list_fill_request(lGetList(jep, JB_hard_resource_list),
426
alpp, master_centry_list, false, true,
428
DRETURN(STATUS_EUNKNOWN);
430
if (compress_ressources(alpp, lGetList(jep, JB_hard_resource_list), SGE_OBJ_JOB)) {
431
DRETURN(STATUS_EUNKNOWN);
434
if (centry_list_fill_request(lGetList(jep, JB_soft_resource_list),
435
alpp, master_centry_list, false, true,
437
DRETURN(STATUS_EUNKNOWN);
439
if (compress_ressources(alpp, lGetList(jep, JB_soft_resource_list), SGE_OBJ_JOB)) {
440
DRETURN(STATUS_EUNKNOWN);
442
if (deny_soft_consumables(alpp, lGetList(jep, JB_soft_resource_list), master_centry_list)) {
443
DRETURN(STATUS_EUNKNOWN);
445
if (!centry_list_is_correct(lGetList(jep, JB_hard_resource_list), alpp)) {
446
DRETURN(STATUS_EUNKNOWN);
448
if (!centry_list_is_correct(lGetList(jep, JB_soft_resource_list), alpp)) {
449
DRETURN(STATUS_EUNKNOWN);
453
if (!qref_list_is_valid(lGetList(jep, JB_hard_queue_list), alpp)) {
454
DRETURN(STATUS_EUNKNOWN);
456
if (!qref_list_is_valid(lGetList(jep, JB_soft_queue_list), alpp)) {
457
DRETURN(STATUS_EUNKNOWN);
459
if (!qref_list_is_valid(lGetList(jep, JB_master_hard_queue_list), alpp)) {
460
DRETURN(STATUS_EUNKNOWN);
464
here we test (if requested) the
465
parallel environment exists;
466
if not the job is refused
468
pe_name = lGetString(jep, JB_pe);
470
const lListElem *pep;
471
pep = pe_list_find_matching(*object_base[SGE_TYPE_PE].list, pe_name);
473
ERROR((SGE_EVENT, MSG_JOB_PEUNKNOWN_S, pe_name));
474
answer_list_add(alpp, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR);
475
DRETURN(STATUS_EUNKNOWN);
478
pe_range = lGetList(jep, JB_pe_range);
479
if (object_verify_pe_range(alpp, pe_name, pe_range, SGE_OBJ_JOB)!=STATUS_OK) {
480
DRETURN(STATUS_EUNKNOWN);
485
/* verify PE qsort_args */
486
if ((qsort_args=lGetString(pep, PE_qsort_argv)) != NULL) {
487
sge_assignment_t a = SGE_ASSIGNMENT_INIT;
494
ret = sge_call_pe_qsort(&a, qsort_args, 1, err_str);
496
answer_list_add(alpp, err_str, STATUS_EUNKNOWN,
497
ANSWER_QUALITY_ERROR);
498
DRETURN(STATUS_EUNKNOWN);
507
/* request for non existing ckpt object will be refused */
508
if ((ckpt_name != NULL)) {
509
if (!(ckpt_ep = ckpt_list_locate(*object_base[SGE_TYPE_CKPT].list, ckpt_name)))
511
else if (!ckpt_attr) {
512
ckpt_attr = sge_parse_checkpoint_attr(lGetString(ckpt_ep, CK_when));
513
lSetUlong(jep, JB_checkpoint_attr, ckpt_attr);
518
if ((ckpt_attr & NO_CHECKPOINT) && (ckpt_attr & ~NO_CHECKPOINT)) {
521
else if (ckpt_name && (ckpt_attr & NO_CHECKPOINT)) {
524
else if ((!ckpt_name && (ckpt_attr & ~NO_CHECKPOINT))) {
527
else if (!ckpt_name && ckpt_inter) {
535
sprintf(str, MSG_JOB_CKPTUNKNOWN_S, ckpt_name);
539
sprintf(str, MSG_JOB_CKPTMINUSC);
543
sprintf(str, MSG_JOB_NOCKPTREQ);
546
sprintf(str, MSG_JOB_CKPTDENIED);
549
ERROR((SGE_EVENT, "%s", str));
550
answer_list_add(alpp, SGE_EVENT, STATUS_ESEMANTIC, ANSWER_QUALITY_ERROR);
551
DRETURN(STATUS_ESEMANTIC);
554
/* first check user permissions */
556
lListElem *cqueue = NULL;
557
int has_permissions = 0;
559
for_each (cqueue, *object_base[SGE_TYPE_CQUEUE].list) {
560
lList *qinstance_list = lGetList(cqueue, CQ_qinstances);
561
lListElem *qinstance = NULL;
562
lList *master_userset_list = *object_base[SGE_TYPE_USERSET].list;
564
for_each(qinstance, qinstance_list) {
565
if (sge_has_access(ruser, lGetString(jep, JB_group),
566
qinstance, master_userset_list)) {
567
DPRINTF(("job has access to queue "SFQ"\n", lGetString(qinstance, QU_qname)));
572
if (has_permissions == 1) {
576
if (has_permissions == 0) {
577
SGE_ADD_MSG_ID(sprintf(SGE_EVENT, MSG_JOB_NOTINANYQ_S, ruser));
578
answer_list_add(alpp, SGE_EVENT, STATUS_ESEMANTIC, ANSWER_QUALITY_ERROR);
582
/* check sge attributes */
584
/* if enforce_user flag is "auto", add or update the user */
586
char* enforce_user = mconf_get_enforce_user();
587
if (enforce_user && !strcasecmp(enforce_user, "auto")) {
588
int status = sge_add_auto_user(ctx, ruser, alpp, monitor);
589
if (status != STATUS_OK) {
595
/* ensure user exists if enforce_user flag is set */
596
if (enforce_user && !strcasecmp(enforce_user, "true") &&
597
!user_list_locate(*object_base[SGE_TYPE_USER].list, ruser)) {
598
ERROR((SGE_EVENT, MSG_JOB_USRUNKNOWN_S, ruser));
599
answer_list_add(alpp, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR);
601
DRETURN(STATUS_EUNKNOWN);
606
/* set default project */
607
if (!lGetString(jep, JB_project) && ruser && *object_base[SGE_TYPE_USER].list) {
608
lListElem *uep = NULL;
609
if ((uep = user_list_locate(*object_base[SGE_TYPE_USER].list, ruser)))
610
lSetString(jep, JB_project, lGetString(uep, UU_default_project));
615
lList* projects = mconf_get_projects();
616
if ((project=lGetString(jep, JB_project))) {
619
if (!(pep = prj_list_locate(*object_base[SGE_TYPE_PROJECT].list , project))) {
620
ERROR((SGE_EVENT, MSG_JOB_PRJUNKNOWN_S, project));
621
answer_list_add(alpp, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR);
622
lFreeList(&projects);
623
DRETURN(STATUS_EUNKNOWN);
626
/* ensure user belongs to this project */
627
if (!sge_has_access_(ruser, group,
628
lGetList(pep, PR_acl),
629
lGetList(pep, PR_xacl),
630
*object_base[SGE_TYPE_USERSET].list)) {
631
ERROR((SGE_EVENT, MSG_SGETEXT_NO_ACCESS2PRJ4USER_SS,
633
answer_list_add(alpp, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR);
634
lFreeList(&projects);
635
DRETURN(STATUS_EUNKNOWN);
638
/* verify project can submit jobs */
639
xprojects = mconf_get_xprojects();
640
if ((xprojects && prj_list_locate(xprojects, project)) ||
641
(projects && !prj_list_locate(projects, project))) {
642
ERROR((SGE_EVENT, MSG_JOB_PRJNOSUBMITPERMS_S, project));
643
answer_list_add(alpp, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR);
644
lFreeList(&xprojects);
645
lFreeList(&projects);
646
DRETURN(STATUS_EUNKNOWN);
648
lFreeList(&xprojects);
651
char* enforce_project = mconf_get_enforce_project();
652
if (lGetNumberOfElem(projects)>0) {
653
ERROR((SGE_EVENT, MSG_JOB_PRJREQUIRED));
654
answer_list_add(alpp, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR);
655
lFreeList(&projects);
656
FREE(enforce_project);
657
DRETURN(STATUS_EUNKNOWN);
660
if (enforce_project && !strcasecmp(enforce_project, "true")) {
661
ERROR((SGE_EVENT, MSG_SGETEXT_NO_PROJECT));
662
answer_list_add(alpp, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR);
663
lFreeList(&projects);
664
FREE(enforce_project);
665
DRETURN(STATUS_EUNKNOWN);
667
FREE(enforce_project);
669
lFreeList(&projects);
672
/* try to dispatch a department to the job */
673
if (set_department(alpp, jep, *object_base[SGE_TYPE_USERSET].list) != 1) {
674
/* alpp gets filled by set_department */
675
DRETURN(STATUS_EUNKNOWN);
679
If it is a deadline job the user has to be a deadline user
681
if (lGetUlong(jep, JB_deadline)) {
682
if (!userset_is_deadline_user(*object_base[SGE_TYPE_USERSET].list, ruser)) {
683
ERROR((SGE_EVENT, MSG_JOB_NODEADLINEUSER_S, ruser));
684
answer_list_add(alpp, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR);
685
DRETURN(STATUS_EUNKNOWN);
689
/* Verify existence of ar, if ar exists */
690
if ((ar_id=lGetUlong(jep, JB_ar))) {
692
u_long32 ar_start_time, ar_end_time, job_execution_time, job_duration, now_time;
694
DPRINTF(("job -ar "sge_u32"\n", sge_u32c(ar_id)));
696
ar=ar_list_locate(*object_base[SGE_TYPE_AR].list, ar_id);
698
ERROR((SGE_EVENT, MSG_JOB_NOAREXISTS_U, sge_u32c(ar_id)));
699
answer_list_add(alpp, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR);
700
DRETURN(STATUS_EEXIST);
701
} else if ((lGetUlong(ar, AR_state) == AR_DELETED) ||
702
(lGetUlong(ar, AR_state) == AR_EXITED)) {
703
ERROR((SGE_EVENT, MSG_JOB_ARNOLONGERAVAILABE_U, sge_u32c(ar_id)));
704
answer_list_add(alpp, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR);
705
DRETURN(STATUS_EEXIST);
707
/* fill the job and ar values */
708
ar_start_time = lGetUlong(ar, AR_start_time);
709
ar_end_time = lGetUlong(ar, AR_end_time);
710
now_time = sge_get_gmt();
711
job_execution_time = lGetUlong(jep, JB_execution_time);
713
/* execution before now is set to at least now */
714
if (job_execution_time < now_time) {
715
job_execution_time = now_time;
718
/* to be sure the execution time is NOT before AR start time */
719
if (job_execution_time < ar_start_time) {
720
job_execution_time = ar_start_time;
723
/* hard_resources h_rt limit */
724
if (job_get_wallclock_limit(&job_duration, jep) == true) {
725
DPRINTF(("job -ar "sge_u32", ar_start_time "sge_u32", ar_end_time "sge_u32
726
", job_execution_time "sge_u32", job duration "sge_u32" \n",
727
sge_u32c(ar_id),sge_u32c( ar_start_time),sge_u32c(ar_end_time),
728
sge_u32c(job_execution_time),sge_u32c(job_duration)));
730
/* fit the timeframe */
731
if (job_duration > (ar_end_time - ar_start_time)) {
732
ERROR((SGE_EVENT, MSG_JOB_HRTLIMITTOOLONG_U, sge_u32c(ar_id)));
733
answer_list_add(alpp, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR);
734
DRETURN(STATUS_DENIED);
736
if ((job_execution_time + job_duration) > ar_end_time) {
737
ERROR((SGE_EVENT, MSG_JOB_HRTLIMITOVEREND_U, sge_u32c(ar_id)));
738
answer_list_add(alpp, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR);
739
DRETURN(STATUS_DENIED);
744
/* verify schedulability */
746
int ret = verify_suitable_queues(alpp, jep, NULL);
747
if (lGetUlong(jep, JB_verify_suitable_queues)==JUST_VERIFY || ret != 0) {
753
* only operators and managers are allowed to submit
754
* jobs with higher priority than 0 (=BASE_PRIORITY)
756
if (lGetUlong(jep, JB_priority) > BASE_PRIORITY && !manop_is_operator(ruser)) {
757
ERROR((SGE_EVENT, MSG_JOB_NONADMINPRIO));
758
answer_list_add(alpp, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR);
759
DRETURN(STATUS_EUNKNOWN);
762
/* checks on -hold_jid */
763
if (job_verify_predecessors(jep, alpp)) {
764
DRETURN(STATUS_EUNKNOWN);
767
/* checks on -hold_jid_ad */
768
if (job_verify_predecessors_ad(jep, alpp)) {
769
DRETURN(STATUS_EUNKNOWN);
775
** Execute command to store the client's DCE or Kerberos credentials.
776
** This also creates a forwardable credential for the user.
778
if (mconf_get_do_credentials()) {
779
if (store_sec_cred(sge_root, packet, jep, mconf_get_do_authentication(), alpp) != 0) {
780
DRETURN(STATUS_EUNKNOWN);
788
/* write script to file */
789
spool_transaction(alpp, spool_get_default_context(), STC_begin);
791
if (lGetString(jep, JB_script_file) &&
792
!JOB_TYPE_IS_BINARY(lGetUlong(jep, JB_type))) {
793
if (spool_write_script(alpp, job_number, jep)==false) {
794
spool_transaction(alpp, spool_get_default_context(), STC_rollback);
795
ERROR((SGE_EVENT, MSG_JOB_NOWRITE_US, sge_u32c(job_number), strerror(errno)));
796
answer_list_add(alpp, SGE_EVENT, STATUS_EDISK, ANSWER_QUALITY_ERROR);
797
DRETURN(STATUS_EDISK);
801
/* clean file out of memory */
802
lSetString(jep, JB_script_ptr, NULL);
803
lSetUlong(jep, JB_script_size, 0);
806
if (!sge_event_spool(ctx, alpp, 0, sgeE_JOB_ADD,
807
job_number, 0, NULL, NULL, NULL,
808
jep, NULL, NULL, true, true)) {
809
spool_transaction(alpp, spool_get_default_context(), STC_rollback);
810
ERROR((SGE_EVENT, MSG_JOB_NOWRITE_U, sge_u32c(job_number)));
811
answer_list_add(alpp, SGE_EVENT, STATUS_EDISK, ANSWER_QUALITY_ERROR);
812
if ((lGetString(jep, JB_exec_file) != NULL) && job_spooling) {
813
unlink(lGetString(jep, JB_exec_file));
814
lSetString(jep, JB_exec_file, NULL);
816
DRETURN(STATUS_EDISK);
818
spool_transaction(alpp, spool_get_default_context(), STC_commit);
820
if (!job_is_array(jep)) {
821
DPRINTF(("Added Job "sge_u32"\n", lGetUlong(jep, JB_job_number)));
823
job_get_submit_task_ids(jep, &start, &end, &step);
824
DPRINTF(("Added JobArray "sge_u32"."sge_u32"-"sge_u32":"sge_u32"\n",
825
lGetUlong(jep, JB_job_number), start, end, step));
828
/* add into job list */
829
if (job_list_add_job(object_base[SGE_TYPE_JOB].list, "Master_Job_List", lCopyElem(jep), 0)) {
830
answer_list_add(alpp, SGE_EVENT, STATUS_EDISK, ANSWER_QUALITY_ERROR);
831
DRETURN(STATUS_EUNKNOWN);
834
/** increase user counter */
835
suser_increase_job_counter(suser_list_add(object_base[SGE_TYPE_SUSER].list, NULL, ruser));
837
/* JG: TODO: error handling:
838
* if job can't be spooled, no event is sent (in sge_event_spool)
839
* if job can't be added to master list, it remains spooled
840
* make checks earlier
844
** immediate jobs trigger scheduling immediately
846
if (JOB_TYPE_IS_IMMEDIATE(lGetUlong(jep, JB_type))) {
847
sge_deliver_events_immediately(EV_ID_SCHEDD);
850
if (!job_is_array(jep)) {
851
(sprintf(SGE_EVENT, MSG_JOB_SUBMITJOB_US,
852
sge_u32c(lGetUlong(jep, JB_job_number)),
853
lGetString(jep, JB_job_name)));
855
sprintf(SGE_EVENT, MSG_JOB_SUBMITJOBARRAY_UUUUS,
856
sge_u32c(lGetUlong(jep, JB_job_number)), sge_u32c(start),
857
sge_u32c(end), sge_u32c(step),
858
lGetString(jep, JB_job_name));
860
answer_list_add(alpp, SGE_EVENT, STATUS_OK, ANSWER_QUALITY_INFO);
863
reporting_create_new_job_record(NULL, jep);
864
reporting_create_job_log(NULL, lGetUlong(jep, JB_submission_time),
865
JL_PENDING, ruser, rhost, NULL,
866
jep, NULL, NULL, MSG_LOG_NEWJOB);
869
** add element to return list if necessary
873
*lpp = lCreateList("Job Return", JB_Type);
875
lAppendElem(*lpp, lCopyElem(jep));
882
/*-------------------------------------------------------------------------*/
883
/* sge_gdi_delete_job */
884
/* called in sge_c_gdi_del */
885
/*-------------------------------------------------------------------------*/
886
int sge_gdi_del_job(sge_gdi_ctx_class_t *ctx, lListElem *idep, lList **alpp, char *ruser,
887
char *rhost, int sub_command, monitoring_t *monitor)
892
int user_list_flag = false;
894
lCondition *job_where = NULL;
895
lList *user_list = NULL;
897
u_long32 deleted_tasks = 0;
898
lList *master_job_list = *(object_type_get_master_list(SGE_TYPE_JOB));
899
u_long32 r_start = 0;
903
lListElem *nxt, *job = NULL;
907
DENTER(TOP_LAYER, "sge_gdi_del_job");
909
if ( !idep || !ruser || !rhost ) {
910
CRITICAL((SGE_EVENT, MSG_SGETEXT_NULLPTRPASSED_S, SGE_FUNC));
911
answer_list_add(alpp, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR);
912
DRETURN(STATUS_EUNKNOWN);
915
if (lGetPosViaElem(idep, ID_force, SGE_NO_ABORT) >= 0) {
916
if (lGetUlong(idep, ID_force) == 1) {
920
CRITICAL((SGE_EVENT, MSG_NMNOTINELEMENT_S, "ID_force"));
921
answer_list_add(alpp, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR);
922
DRETURN(STATUS_EUNKNOWN);
925
/* first lets make sure they have permission if a force is involved */
926
if (!mconf_get_enable_forced_qdel()) {/* Flag ENABLE_FORCED_QDEL in qmaster_params */
929
if (forced && !manop_is_manager(ruser)) {
930
ERROR((SGE_EVENT, MSG_JOB_FORCEDDELETEPERMS_S, ruser));
931
answer_list_add(alpp, SGE_EVENT, STATUS_EEXIST, ANSWER_QUALITY_ERROR);
932
DRETURN(STATUS_EUNKNOWN);
937
all_jobs_flag = ((sub_command & SGE_GDI_ALL_JOBS) != 0);
938
all_users_flag = ((sub_command & SGE_GDI_ALL_USERS) != 0);
940
/* Did we get a user list or something else ? */
941
if (lGetPosViaElem(idep, ID_user_list, SGE_NO_ABORT) >= 0) {
942
user_list = lGetList(idep, ID_user_list);
944
CRITICAL((SGE_EVENT, MSG_NMNOTINELEMENT_S, "ID_user_list"));
945
answer_list_add(alpp, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR);
946
DRETURN(STATUS_EUNKNOWN);
949
/* Did we get a user list? */
950
if (user_list && lGetNumberOfElem(user_list) > 0) {
951
lListElem *user = NULL;
952
for_each(user, user_list) {
953
if (strcmp(lGetString(user, ST_name), "*") == 0) {
954
all_users_flag = true;
957
if (!all_users_flag) {
958
user_list_flag = true;
962
jid_str = lGetString(idep, ID_str);
964
/* Did we get a valid jobid? */
965
if (!all_jobs_flag && (jid_str != NULL) && (strcmp(jid_str, "0") != 0)) {
971
/* no user is set, thought only work on the jobs for the current user, if
972
a job name is specified. We do not care for users, if we work on jid*/
973
if (!all_users_flag && !user_list_flag && (jid_str != NULL) &&
974
!isdigit(jid_str[0])) {
975
lList *user_list = lGetList(idep, ID_user_list);
976
lListElem *current_user = lCreateElem(ST_Type);
977
if (user_list == NULL) {
978
user_list = lCreateList("user list", ST_Type);
979
lSetList(idep, ID_user_list, user_list);
981
lSetString(current_user, ST_name, ruser);
982
lAppendElem(user_list, current_user);
983
user_list_flag = true;
986
if (verify_job_list_filter(alpp, all_users_flag, all_jobs_flag,
987
jid_flag, user_list_flag, ruser)) {
988
DRETURN(STATUS_EUNKNOWN);
991
job_list_filter(user_list_flag? lGetList(idep, ID_user_list):NULL,
992
jid_flag?jid_str:NULL, &job_where);
994
start_time = sge_get_gmt();
995
nxt = lFirst(master_job_list);
997
u_long32 job_number = 0;
998
bool deletion_time_reached = false;
1002
if ((job_where != NULL) && !lCompare(job, job_where)) {
1005
job_number = lGetUlong(job, JB_job_number);
1007
/* Does user have privileges to delete the job/task? */
1008
if (job_check_owner(ruser, job_number, master_job_list)) {
1009
ERROR((SGE_EVENT, MSG_DELETEPERMS_SSU, ruser, SGE_OBJ_JOB,
1010
sge_u32c(job_number)));
1011
answer_list_add(alpp, SGE_EVENT, STATUS_ENOTOWNER, ANSWER_QUALITY_ERROR);
1013
/* continue with next job */
1017
njobs += sge_delete_all_tasks_of_job(ctx, alpp, ruser, rhost, job, &r_start, &r_end, &step, lGetList(idep, ID_ja_structure),
1018
&alltasks, &deleted_tasks, start_time, monitor, forced, &deletion_time_reached);
1020
if (deletion_time_reached) {
1021
lFreeWhere(&job_where);
1026
lFreeWhere(&job_where);
1028
if (!njobs && !deleted_tasks) {
1029
empty_job_list_filter(alpp, 0, user_list_flag,
1030
lGetList(idep, ID_user_list), jid_flag,
1031
jid_flag?lGetString(idep, ID_str):"0",
1032
all_users_flag, all_jobs_flag, ruser,
1033
alltasks == 0 ? 1 : 0, r_start, r_end, step);
1034
DRETURN(STATUS_EEXIST);
1038
/* remove all orphaned queue intances, which are empty. */
1039
cqueue_list_del_all_orphaned(ctx, *(object_type_get_master_list(SGE_TYPE_CQUEUE)), alpp, NULL, NULL);
1045
/****** sge_job_qmaster/is_pe_master_task_send() *******************************
1047
* is_pe_master_task_send() -- figures out, if all salves are send
1050
* bool is_pe_master_task_send(lListElem *jatep)
1053
* In case of tightly integrated pe jobs are the salves send first. Once
1054
* all execds acknowledged the slaves, the master can be send. This function
1055
* figures out, if all slaves are acknowledged.
1058
* lListElem *jatep - ja task in question
1061
* bool - true, if all slaves are acknowledged
1064
* MT-NOTE: is_pe_master_task_send() is MT safe
1066
*******************************************************************************/
1068
is_pe_master_task_send(lListElem *jatep)
1070
bool is_all_slaves_arrived = true;
1071
lListElem *gdil_ep = NULL;
1073
for_each (gdil_ep, lGetList(jatep, JAT_granted_destin_identifier_list)) {
1074
if (lGetUlong(gdil_ep, JG_tag_slave_job) != 0) {
1075
is_all_slaves_arrived= false;
1080
return is_all_slaves_arrived;
1083
static void empty_job_list_filter(
1098
DENTER(TOP_LAYER, "empty_job_list_filter");
1100
if (all_users_flag) {
1101
ERROR((SGE_EVENT, MSG_SGETEXT_THEREARENOJOBS));
1102
} else if (user_list_flag) {
1103
dstring user_list_string = DSTRING_INIT;
1105
sge_dstring_sprintf(&user_list_string, "");
1107
if (lGetNumberOfElem(user_list) > 0) {
1112
for_each(user, user_list) {
1114
sge_dstring_append(&user_list_string, ",");
1119
sge_dstring_append(&user_list_string, "...");
1122
sge_dstring_append(&user_list_string, lGetString(user, ST_name));
1129
ERROR((SGE_EVENT, MSG_SGETEXT_DOESNOTEXISTTASK_SUS,
1130
jobid, sge_u32c(start), sge_dstring_get_string(&user_list_string)));
1132
ERROR((SGE_EVENT, MSG_SGETEXT_DOESNOTEXISTTASKRANGE_SUUUS,
1133
jobid, sge_u32c(start), sge_u32c(end), sge_u32c(step),
1134
sge_dstring_get_string(&user_list_string)));
1137
ERROR((SGE_EVENT,MSG_SGETEXT_DEL_JOB_SS, jobid, sge_dstring_get_string(&user_list_string)));
1140
ERROR((SGE_EVENT, MSG_SGETEXT_THEREARENOXFORUSERS_SS, SGE_OBJ_JOB, sge_dstring_get_string(&user_list_string)));
1143
sge_dstring_free(&user_list_string);
1145
} else if (all_jobs_flag) {
1146
ERROR((SGE_EVENT, MSG_SGETEXT_THEREARENOXFORUSERS_SS, SGE_OBJ_JOB ,ruser));
1147
} else if (jid_flag) {
1148
/* should not be possible */
1151
ERROR((SGE_EVENT, MSG_SGETEXT_DOESNOTEXISTTASK_SU,
1152
jobid, sge_u32c(start)));
1154
ERROR((SGE_EVENT, MSG_SGETEXT_DOESNOTEXISTTASKRANGE_SUUU,
1155
jobid, sge_u32c(start), sge_u32c(end), sge_u32c(step)));
1158
ERROR((SGE_EVENT,MSG_SGETEXT_DOESNOTEXIST_SS, SGE_OBJ_JOB, jobid));
1161
/* Should not be possible */
1163
was_modify?MSG_SGETEXT_NOJOBSMODIFIED:MSG_SGETEXT_NOJOBSDELETED));
1166
answer_list_add(alpp, SGE_EVENT, STATUS_EEXIST, ANSWER_QUALITY_ERROR);
1170
/****** sge_job_qmaster/job_list_filter() **************************************
1172
* job_list_filter() -- Build filter for the joblist
1175
* static void job_list_filter(lList *user_list, const char* jobid, char
1176
* *ruser, bool all_users_flag, lCondition **job_filter, lCondition
1180
* Builds two where filters: one for users and one for jobs.
1183
* lList *user_list - user list or NULL if no user exists
1184
* const char* jobid - a job id or a job name or a pattern
1185
* lCondition **job_filter - pointer to the target filter. If a where
1186
* does exist, it will be extended by the new ones
1192
* MT-NOTE: job_list_filter() is MT safe
1194
*******************************************************************************/
1195
static void job_list_filter( lList *user_list, const char* jobid,
1196
lCondition **job_filter) {
1197
lCondition *new_where = NULL;
1199
DENTER(TOP_LAYER, "job_list_filter");
1201
if (job_filter == NULL) {
1202
ERROR((SGE_EVENT, "job_list_filter() got no filters"));
1206
if (user_list != NULL) {
1209
DPRINTF(("Add all users given in userlist to filter\n"));
1210
for_each(user, user_list) {
1212
new_where = lWhere("%T(%I p= %s)", JB_Type, JB_owner,
1213
lGetString(user, ST_name));
1215
*job_filter = new_where;
1217
*job_filter = lOrWhere(*job_filter, new_where);
1222
if (jobid != NULL) {
1223
DPRINTF(("Add jid %s to filter\n", jobid));
1224
if (isdigit(jobid[0])) {
1225
new_where = lWhere("%T(%I==%u)", JB_Type, JB_job_number, atol(jobid));
1227
new_where = lWhere("%T(%I p= %s)", JB_Type, JB_job_name, jobid);
1230
*job_filter = new_where;
1232
*job_filter = lAndWhere(*job_filter, new_where);
1240
qalter -uall => all_users_flag = true
1241
qalter ... <jid> ... => jid_flag = true
1242
qalter -u <username> ... => user_list_flag = true
1243
qalter ... all => all_jobs_flag = true
1245
1) all_users_flag && all_jobs_flag => all jobs of all users (requires
1247
2) all_users_flag && jid_flag => not valid
1248
3) all_users_flag => all jobs of all users (requires
1250
4) user_list_flag && all_jobs_flag => all jobs of all users given in
1252
5) user_list_flag && jid_flag => not valid
1253
6) user_list_flag => all jobs of all users given in
1255
7) all_jobs_flag => all jobs of current user
1256
8) jid_flag => <jid>
1257
9) all_users_flag && user_list_flag => not valid
1260
static int verify_job_list_filter(
1268
DENTER(TOP_LAYER, "verify_job_list_filter");
1270
/* Reject incorrect requests */
1271
if (!all_users_flag && !all_jobs_flag && !jid_flag && !user_list_flag) {
1272
ERROR((SGE_EVENT, MSG_SGETEXT_SPECIFYUSERORID_S, SGE_OBJ_JOB));
1273
answer_list_add(alpp, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR);
1274
DRETURN(STATUS_EUNKNOWN);
1279
if (all_users_flag && user_list_flag) {
1280
ERROR((SGE_EVENT, MSG_SGETEXT_SPECIFYONEORALLUSER));
1281
answer_list_add(alpp, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR);
1282
DRETURN(STATUS_EUNKNOWN);
1286
/* if ((all_users_flag || user_list_flag) && jid_flag) {
1287
ERROR((SGE_EVENT, MSG_SGETEXT_NOTALLOWEDTOSPECUSERANDJID));
1288
answer_list_add(alpp, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR);
1289
DRETURN(STATUS_EUNKNOWN);
1293
/* case 1,3: Only manager can modify all jobs of all users */
1294
if (all_users_flag && !jid_flag && !manop_is_manager(ruser)) {
1295
ERROR((SGE_EVENT, MSG_SGETEXT_MUST_BE_MGR_TO_SS, ruser,
1296
"modify all jobs"));
1297
answer_list_add(alpp, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR);
1298
DRETURN(STATUS_EUNKNOWN);
1304
static void get_rid_of_schedd_job_messages(u_long32 job_number)
1306
lListElem *sme = NULL;
1307
lListElem *mes = NULL;
1308
lListElem *next = NULL;
1309
lList *mes_list = NULL;
1310
lList *master_job_schedd_info_list = *(object_type_get_master_list(SGE_TYPE_JOB_SCHEDD_INFO));
1313
DENTER(TOP_LAYER, "get_rid_of_schedd_job_messages");
1314
if (master_job_schedd_info_list != NULL) {
1315
sme = lFirst(master_job_schedd_info_list);
1316
mes_list = lGetList(sme, SME_message_list);
1319
** remove all messages for job_number
1321
next = lFirst(mes_list);
1322
while ((mes = next)) {
1323
lListElem *job_ulng;
1326
if ((job_ulng = lGetElemUlong(lGetList(mes, MES_job_number_list), ULNG, job_number)) != NULL) {
1328
** more than one job in list for this message => remove job id
1329
** else => remove whole message
1331
if (lGetNumberOfElem(lGetList(mes, MES_job_number_list)) > 1) {
1332
lRemoveElem(lGetList(mes, MES_job_number_list), &job_ulng);
1333
DPRINTF(("Removed jobid "sge_u32" from list of scheduler messages\n", job_number));
1335
lRemoveElem(mes_list, &mes);
1336
DPRINTF(("Removed message from list of scheduler messages "sge_u32"\n", job_number));
1344
void job_ja_task_send_abort_mail(const lListElem *job,
1345
const lListElem *ja_task,
1348
const char *err_str)
1350
dstring subject = DSTRING_INIT;
1351
dstring body = DSTRING_INIT;
1352
lList *users = NULL;
1354
u_long32 ja_task_id;
1355
const char *job_name = NULL;
1356
int send_abort_mail = 0;
1358
ja_task_id = lGetUlong(ja_task, JAT_task_number);
1359
job_name = lGetString(job, JB_job_name);
1360
job_id = lGetUlong(job, JB_job_number);
1361
users = lGetList(job, JB_mail_list);
1362
send_abort_mail = VALID(MAIL_AT_ABORT, lGetUlong(job, JB_mail_options))
1363
&& !(lGetUlong(ja_task, JAT_state) & JDELETED);
1365
if (send_abort_mail) {
1366
if (job_is_array(job)) {
1367
sge_dstring_sprintf(&subject, MSG_MAIL_TASKKILLEDSUBJ_UUS,
1368
sge_u32c(job_id), sge_u32c(ja_task_id), job_name);
1369
sge_dstring_sprintf(&body, MSG_MAIL_TASKKILLEDBODY_UUSSS,
1370
sge_u32c(job_id), sge_u32c(ja_task_id), job_name, ruser, rhost);
1372
sge_dstring_sprintf(&subject, MSG_MAIL_JOBKILLEDSUBJ_US,
1373
sge_u32c(job_id), job_name);
1374
sge_dstring_sprintf(&body, MSG_MAIL_JOBKILLEDBODY_USSS,
1375
sge_u32c(job_id), job_name, ruser, rhost);
1377
if (err_str != NULL) {
1378
sge_dstring_append(&body, "\n");
1379
sge_dstring_append(&body, MSG_MAIL_BECAUSE);
1380
sge_dstring_append(&body, err_str);
1382
cull_mail(QMASTER, users, sge_dstring_get_string(&subject),
1383
sge_dstring_get_string(&body), "job abortion");
1386
sge_dstring_free(&subject);
1387
sge_dstring_free(&body);
1390
void get_rid_of_job_due_to_qdel(sge_gdi_ctx_class_t *ctx,
1393
lList **answer_list,
1396
monitoring_t *monitor)
1398
u_long32 job_number, task_number;
1399
lListElem *qep = NULL;
1401
DENTER(TOP_LAYER, "get_rid_of_job_due_to_qdel");
1403
job_number = lGetUlong(j, JB_job_number);
1404
task_number = lGetUlong(t, JAT_task_number);
1405
qep = cqueue_list_locate_qinstance(*(object_type_get_master_list(SGE_TYPE_CQUEUE)), lGetString(t, JAT_master_queue));
1407
ERROR((SGE_EVENT, MSG_JOB_UNABLE2FINDQOFJOB_S,
1408
lGetString(t, JAT_master_queue)));
1409
answer_list_add(answer_list, SGE_EVENT, STATUS_EEXIST,
1410
ANSWER_QUALITY_ERROR);
1412
if (sge_signal_queue(ctx, SGE_SIGKILL, qep, j, t, monitor)) {
1414
/* 3: JOB_FINISH reports aborted */
1415
sge_commit_job(ctx, j, t, NULL, COMMIT_ST_FINISHED_FAILED_EE, COMMIT_DEFAULT | COMMIT_NEVER_RAN, monitor);
1416
cancel_job_resend(job_number, task_number);
1419
if (job_is_array(j)) {
1420
ERROR((SGE_EVENT, MSG_JOB_FORCEDDELTASK_SUU,
1421
ruser, sge_u32c(job_number), sge_u32c(task_number)));
1423
ERROR((SGE_EVENT, MSG_JOB_FORCEDDELJOB_SU,
1424
ruser, sge_u32c(job_number)));
1426
answer_list_add(answer_list, SGE_EVENT, STATUS_OK,
1427
ANSWER_QUALITY_INFO);
1429
ERROR((SGE_EVENT, MSG_COM_NOSYNCEXECD_SU,
1430
ruser, sge_u32c(job_number)));
1431
answer_list_add(answer_list, SGE_EVENT, STATUS_EEXIST,
1432
ANSWER_QUALITY_ERROR);
1436
if (job_is_array(j)) {
1437
ERROR((SGE_EVENT, MSG_JOB_FORCEDDELTASK_SUU,
1438
ruser, sge_u32c(job_number), sge_u32c(task_number)));
1440
ERROR((SGE_EVENT, MSG_JOB_FORCEDDELJOB_SU,
1441
ruser, sge_u32c(job_number)));
1443
/* 3: JOB_FINISH reports aborted */
1444
sge_commit_job(ctx, j, t, NULL, COMMIT_ST_FINISHED_FAILED_EE, COMMIT_DEFAULT | COMMIT_NEVER_RAN, monitor);
1445
cancel_job_resend(job_number, task_number);
1449
* the job gets registered for deletion:
1450
* 0. send signal to execd
1451
* 1. JB_pending_signal = SGE_SIGKILL
1452
* 2. ACK from execd resets JB_pending_signal to 0
1453
* Here we need a state for the job displaying its
1455
* 3. execd signals shepherd and reaps job after job exit
1456
* 4. execd informs master of job exits and job is
1457
* deleted from master lists
1460
if (job_is_array(j)) {
1461
INFO((SGE_EVENT, MSG_JOB_REGDELTASK_SUU,
1462
ruser, sge_u32c(job_number), sge_u32c(task_number)));
1464
INFO((SGE_EVENT, MSG_JOB_REGDELX_SSU,
1465
ruser, SGE_OBJ_JOB, sge_u32c(job_number)));
1468
answer_list_add(answer_list, SGE_EVENT, STATUS_OK,
1469
ANSWER_QUALITY_INFO);
1471
job_mark_job_as_deleted(ctx, j, t);
1475
void job_mark_job_as_deleted(sge_gdi_ctx_class_t *ctx,
1479
bool job_spooling = ctx->get_job_spooling(ctx);
1481
DENTER(TOP_LAYER, "job_mark_job_as_deleted");
1483
lList *answer_list = NULL;
1484
dstring buffer = DSTRING_INIT;
1485
u_long32 state = lGetUlong(t, JAT_state);
1487
SETBIT(JDELETED, state);
1488
lSetUlong(t, JAT_state, state);
1489
lSetUlong(t, JAT_stop_initiate_time, sge_get_gmt());
1490
spool_write_object(&answer_list, spool_get_default_context(), j,
1491
job_get_key(lGetUlong(j, JB_job_number),
1492
lGetUlong(t, JAT_task_number), NULL,
1496
lListElem_clear_changed_info(t);
1497
answer_list_output(&answer_list);
1498
sge_dstring_free(&buffer);
1503
/*-------------------------------------------------------------------------*/
1504
/* sge_gdi_modify_job */
1505
/* called in sge_c_gdi_mod */
1506
/*-------------------------------------------------------------------------*/
1509
this is our strategy:
1511
do common checks and search old job
1512
make a copy of the old job (this will be the new job)
1513
modify new job using reduced job as instruction
1514
on error: dispose new job
1515
store new job to disc
1516
on error: dispose new job
1518
replace old job by new job
1521
/* actions to be done after successful
1522
saving to disk of a modified job */
1526
RECHAIN_JID_HOLD = 4,
1527
RECHAIN_JA_AD_HOLD = 8,
1531
int sge_gdi_mod_job(
1532
sge_gdi_ctx_class_t *ctx,
1533
lListElem *jep, /* reduced JB_Type */
1539
lListElem *nxt, *jobep = NULL; /* pointer to old job */
1543
lCondition *job_where = NULL;
1545
int njobs = 0, ret, jid_flag;
1548
bool job_name_flag = false;
1549
char *job_mod_name = NULL;
1550
const char *job_name = NULL;
1551
bool job_spooling = ctx->get_job_spooling(ctx);
1553
DENTER(TOP_LAYER, "sge_gdi_mod_job");
1555
if ( !jep || !ruser || !rhost ) {
1556
CRITICAL((SGE_EVENT, MSG_SGETEXT_NULLPTRPASSED_S, SGE_FUNC));
1557
answer_list_add(alpp, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR);
1558
DRETURN(STATUS_EUNKNOWN);
1562
all_jobs_flag = ((sub_command & SGE_GDI_ALL_JOBS) > 0);
1563
all_users_flag = ((sub_command & SGE_GDI_ALL_USERS) > 0);
1565
/* Did we get a user list? */
1566
if (((user_list_pos = lGetPosViaElem(jep, JB_user_list, SGE_NO_ABORT)) >= 0)
1567
&& lGetNumberOfElem(lGetPosList(jep, user_list_pos)) > 0)
1572
job_name_pos = lGetPosViaElem(jep, JB_job_name, SGE_NO_ABORT);
1573
if (job_name_pos >= 0){
1574
job_name = lGetPosString(jep, job_name_pos);
1576
/* Did we get a job - with a jobid? */
1579
(((job_id_pos = lGetPosViaElem(jep, JB_job_number, SGE_NO_ABORT)) >= 0) &&
1580
lGetPosUlong(jep, job_id_pos) > 0) ||
1581
((job_name != NULL) &&
1582
(job_name_flag = (job_name[0] == JOB_NAME_DEL) ? true : false))
1588
if ((ret=verify_job_list_filter(alpp, all_users_flag, all_jobs_flag,
1589
jid_flag, user_list_flag, ruser))) {
1593
const char *job_id_str = NULL;
1595
if (!job_name_flag){
1596
sprintf(job_id, sge_u32, lGetPosUlong(jep, job_id_pos));
1597
job_id_str = job_id;
1599
/* format: <delimiter>old_name<delimiter>new_name */
1600
char *del_pos = NULL;
1601
job_id_str = lGetPosString(jep, job_name_pos);
1603
del_pos = strchr(job_id_str, JOB_NAME_DEL);
1607
job_mod_name = sge_strdup(NULL, job_id_str);
1608
job_id_str = job_mod_name;
1610
if (strlen(del_pos)>0) {
1611
lSetPosString(jep, job_name_pos, del_pos);
1613
lSetPosString(jep, job_name_pos, NULL);
1617
job_list_filter(user_list_flag?lGetPosList(jep, user_list_pos):NULL,
1618
jid_flag?job_id_str:NULL, &job_where);
1621
nxt = lFirst(*(object_type_get_master_list(SGE_TYPE_JOB)));
1622
while ((jobep=nxt)) {
1624
lListElem *new_job; /* new job */
1625
lList *tmp_alp = NULL;
1630
if ((job_where != NULL ) && !lCompare(jobep, job_where)) {
1635
jobid = lGetUlong(jobep, JB_job_number);
1637
/* general check whether ruser is allowed to modify this job */
1638
if (strcmp(ruser, lGetString(jobep, JB_owner)) && !manop_is_operator(ruser) && !manop_is_manager(ruser)) {
1639
ERROR((SGE_EVENT, MSG_SGETEXT_MUST_BE_JOB_OWN_TO_SUS, ruser, sge_u32c(jobid), MSG_JOB_CHANGEATTR));
1640
answer_list_add(alpp, SGE_EVENT, STATUS_ENOTOWNER, ANSWER_QUALITY_ERROR);
1641
lFreeWhere(&job_where);
1643
DRETURN(STATUS_ENOTOWNER);
1646
/* operate on a cull copy of the job */
1647
new_job = lCopyElem(jobep);
1649
if (mod_job_attributes(new_job, jep, &tmp_alp, ruser, rhost, &trigger)) {
1650
/* failure: just append last elem in tmp_alp
1651
elements before may contain invalid success messages */
1653
failure = lLast(tmp_alp);
1654
lDechainElem(tmp_alp, failure);
1656
*alpp = lCreateList("answer", AN_Type);
1658
lAppendElem(*alpp, failure);
1659
lFreeList(&tmp_alp);
1660
lFreeElem(&new_job);
1662
DPRINTF(("---------- removed messages\n"));
1663
lFreeWhere(&job_where);
1665
DRETURN(STATUS_EUNKNOWN);
1668
if (!(trigger & VERIFY_EVENT)) {
1669
dstring buffer = DSTRING_INIT;
1671
lList *answer_list = NULL;
1673
if (trigger & MOD_EVENT) {
1674
lSetUlong(new_job, JB_version, lGetUlong(new_job, JB_version)+1);
1677
/* all job modifications to be saved on disk must be made in new_job */
1678
dbret = spool_write_object(&answer_list, spool_get_default_context(), new_job,
1679
job_get_key(jobid, 0, NULL, &buffer),
1680
SGE_TYPE_JOB, job_spooling);
1681
answer_list_output(&answer_list);
1684
ERROR((SGE_EVENT, MSG_JOB_NOALTERNOWRITE_U, sge_u32c(jobid)));
1685
answer_list_add(alpp, SGE_EVENT, STATUS_EDISK, ANSWER_QUALITY_ERROR);
1686
sge_dstring_free(&buffer);
1687
lFreeList(&tmp_alp);
1688
lFreeElem(&new_job);
1689
lFreeWhere(&job_where);
1691
DRETURN(STATUS_EDISK);
1694
sge_dstring_free(&buffer);
1696
/* all elems in tmp_alp need to be appended to alpp */
1698
*alpp = lCreateList("answer", AN_Type);
1700
lAddList(*alpp, &tmp_alp);
1702
if (trigger & MOD_EVENT) {
1703
sge_add_job_event(sgeE_JOB_MOD, new_job, NULL);
1704
for_each(jatep, lGetList(new_job, JB_ja_tasks)) {
1705
sge_add_jatask_event(sgeE_JATASK_MOD, new_job, jatep);
1708
if (trigger & PRIO_EVENT) {
1709
sge_add_job_event(sgeE_JOB_MOD_SCHED_PRIORITY, new_job, NULL);
1712
lListElem_clear_changed_info(new_job);
1714
/* remove all existing trigger links -
1715
this has to be done using the old
1716
jid_predecessor_list */
1718
if (trigger & RECHAIN_JID_HOLD) {
1719
lListElem *suc_jobep, *jid;
1720
for_each(jid, lGetList(jobep, JB_jid_predecessor_list)) {
1721
u_long32 pre_ident = lGetUlong(jid, JRE_job_number);
1723
DPRINTF((" JOB #"sge_u32": P: "sge_u32"\n", jobid, pre_ident));
1725
if ((suc_jobep = job_list_locate(*(object_type_get_master_list(SGE_TYPE_JOB)), pre_ident))) {
1726
lListElem *temp_job = NULL;
1728
temp_job = lGetElemUlong(lGetList(suc_jobep, JB_jid_successor_list), JRE_job_number, jobid);
1729
DPRINTF((" JOB "sge_u32" removed from trigger "
1730
"list of job "sge_u32"\n", jobid, pre_ident));
1731
lRemoveElem(lGetList(suc_jobep, JB_jid_successor_list), &temp_job);
1736
/* write data back into job list */
1738
lListElem *prev = lPrev(jobep);
1739
lList *master_job_list = *(object_type_get_master_list(SGE_TYPE_JOB));
1741
lRemoveElem(master_job_list, &jobep);
1742
lInsertElem(master_job_list, prev, new_job);
1744
/* no need to spool these mods */
1745
if (trigger & RECHAIN_JID_HOLD)
1746
job_suc_pre(new_job);
1748
INFO((SGE_EVENT, MSG_SGETEXT_MODIFIEDINLIST_SSUS, ruser,
1749
rhost, sge_u32c(jobid), MSG_JOB_JOB));
1752
lFreeWhere(&job_where);
1755
const char *job_id_str = NULL;
1757
if (!job_name_flag){
1758
sprintf(job_id, sge_u32,lGetPosUlong(jep, job_id_pos));
1759
job_id_str = job_id;
1762
job_id_str = job_mod_name;
1765
empty_job_list_filter(alpp, 1, user_list_flag,
1766
user_list_flag?lGetPosList(jep, user_list_pos):NULL,
1767
jid_flag, jid_flag?job_id_str:"0",
1768
all_users_flag, all_jobs_flag, ruser, 0, 0, 0, 0);
1770
DRETURN(STATUS_EEXIST);
1777
void sge_add_job_event(ev_event type, lListElem *jep, lListElem *jatask)
1779
DENTER(TOP_LAYER, "sge_add_job_event");
1780
sge_add_event( 0, type, lGetUlong(jep, JB_job_number),
1781
jatask ? lGetUlong(jatask, JAT_task_number) : 0,
1782
NULL, NULL, lGetString(jep, JB_session), jep);
1786
void sge_add_jatask_event(ev_event type, lListElem *jep, lListElem *jatask)
1788
DENTER(TOP_LAYER, "sge_add_jatask_event");
1789
sge_add_event( 0, type, lGetUlong(jep, JB_job_number),
1790
lGetUlong(jatask, JAT_task_number),
1791
NULL, NULL, lGetString(jep, JB_session), jatask);
1796
build up jid hold links for a job
1797
no need to spool them or to send
1798
events to update schedd data
1803
lListElem *parent_jep, *prep, *task;
1805
DENTER(TOP_LAYER, "job_suc_pre");
1808
here we check whether every job
1809
in the predecessor list has exited
1811
prep = lFirst(lGetList(jep, JB_jid_predecessor_list));
1813
u_long32 pre_ident = lGetUlong(prep, JRE_job_number);
1814
parent_jep = job_list_locate(*(object_type_get_master_list(SGE_TYPE_JOB)), pre_ident);
1820
if (lGetList(parent_jep, JB_ja_n_h_ids) != NULL ||
1821
lGetList(parent_jep, JB_ja_u_h_ids) != NULL ||
1822
lGetList(parent_jep, JB_ja_o_h_ids) != NULL ||
1823
lGetList(parent_jep, JB_ja_s_h_ids) != NULL) {
1827
for_each(ja_task, lGetList(parent_jep, JB_ja_tasks)) {
1828
if (lGetUlong(ja_task, JAT_status) != JFINISHED) {
1832
for_each(task, lGetList(ja_task, JAT_task_list)) {
1833
if (lGetUlong(lFirst(lGetList(task, JB_ja_tasks)), JAT_status)
1835
/* at least one task exists */
1845
DPRINTF(("adding jid "sge_u32" into successor list of job "sge_u32"\n",
1846
lGetUlong(jep, JB_job_number), pre_ident));
1848
/* add jid to successor_list of parent job */
1849
lAddSubUlong(parent_jep, JRE_job_number, lGetUlong(jep, JB_job_number),
1850
JB_jid_successor_list, JRE_Type);
1855
DPRINTF(("job "sge_u32" from predecessor list already exited - ignoring it\n",
1859
lDelSubUlong(jep, JRE_job_number, pre_ident, JB_jid_predecessor_list);
1862
DPRINTF(("predecessor job "sge_u32" does not exist\n", pre_ident));
1864
lDelSubUlong(jep, JRE_job_number, pre_ident, JB_jid_predecessor_list);
1871
build up jid_ad hold links for a job
1872
no need to spool them or to send
1873
events to update schedd data
1875
void job_suc_pre_ad(
1878
lListElem *parent_jep, *prep, *task;
1880
DENTER(TOP_LAYER, "job_suc_pre_ad");
1883
here we check whether every job
1884
in the predecessor list has exited
1886
prep = lFirst(lGetList(jep, JB_ja_ad_predecessor_list));
1888
u_long32 pre_ident = lGetUlong(prep, JRE_job_number);
1889
parent_jep = job_list_locate(*(object_type_get_master_list(SGE_TYPE_JOB)), pre_ident);
1895
if (lGetList(parent_jep, JB_ja_n_h_ids) != NULL ||
1896
lGetList(parent_jep, JB_ja_u_h_ids) != NULL ||
1897
lGetList(parent_jep, JB_ja_o_h_ids) != NULL ||
1898
lGetList(parent_jep, JB_ja_a_h_ids) != NULL ||
1899
lGetList(parent_jep, JB_ja_s_h_ids) != NULL) {
1903
for_each(ja_task, lGetList(parent_jep, JB_ja_tasks)) {
1904
if (lGetUlong(ja_task, JAT_status) != JFINISHED) {
1908
for_each(task, lGetList(ja_task, JAT_task_list)) {
1909
if (lGetUlong(lFirst(lGetList(task, JB_ja_tasks)), JAT_status)
1911
/* at least one task exists */
1921
DPRINTF(("adding jid "sge_u32" into successor list of job "sge_u32"\n",
1922
lGetUlong(jep, JB_job_number), pre_ident));
1924
/* add jid to successor_list of parent job */
1925
lAddSubUlong(parent_jep, JRE_job_number, lGetUlong(jep, JB_job_number),
1926
JB_ja_ad_successor_list, JRE_Type);
1931
DPRINTF(("job "sge_u32" from predecessor list already exited - ignoring it\n",
1935
lDelSubUlong(jep, JRE_job_number, pre_ident, JB_ja_ad_predecessor_list);
1938
DPRINTF(("predecessor job "sge_u32" does not exist\n", pre_ident));
1940
lDelSubUlong(jep, JRE_job_number, pre_ident, JB_ja_ad_predecessor_list);
1946
/* handle all per task attributes which are changeable
1947
from outside using gdi requests
1950
new_ja_task - new task structure DST; may be NULL for not enrolled tasks
1952
tep - reduced task element SRC
1954
static int mod_task_attributes(
1956
lListElem *new_ja_task,
1963
int is_task_enrolled
1965
u_long32 jobid = lGetUlong(job, JB_job_number);
1966
u_long32 jataskid = lGetUlong(new_ja_task, JAT_task_number);
1969
DENTER(TOP_LAYER, "mod_task_attributes");
1971
if (is_task_enrolled) {
1973
/* --- JAT_fshare */
1974
if ((pos=lGetPosViaElem(tep, JAT_fshare, SGE_NO_ABORT))>=0) {
1977
/* need to be operator */
1978
if (!manop_is_operator(ruser)) {
1979
ERROR((SGE_EVENT, MSG_SGETEXT_MUST_BE_OPR_TO_SS, ruser,
1980
MSG_JOB_CHANGESHAREFUNC));
1981
answer_list_add(alpp, SGE_EVENT, STATUS_ENOOPR, ANSWER_QUALITY_ERROR);
1982
DRETURN(STATUS_ENOOPR);
1984
uval = lGetPosUlong(tep, pos);
1985
if (uval != lGetUlong(new_ja_task, JAT_fshare)) {
1986
lSetUlong(new_ja_task, JAT_fshare, uval);
1987
DPRINTF(("JAT_fshare = "sge_u32"\n", uval));
1988
*trigger |= MOD_EVENT;
1991
sprintf(SGE_EVENT, MSG_JOB_SETSHAREFUNC_SSUUU,
1992
ruser, rhost, sge_u32c(jobid), sge_u32c(jataskid), sge_u32c(uval));
1993
answer_list_add(alpp, SGE_EVENT, STATUS_OK, ANSWER_QUALITY_INFO);
1999
if ((pos=lGetPosViaElem(tep, JAT_hold, SGE_NO_ABORT))>=0) {
2000
u_long32 op_code_and_hold = lGetPosUlong(tep, pos);
2001
u_long32 op_code = op_code_and_hold & ~MINUS_H_TGT_ALL;
2002
u_long32 target = op_code_and_hold & MINUS_H_TGT_ALL;
2003
int is_sub_op_code = (op_code == MINUS_H_CMD_SUB);
2004
u_long32 old_hold = job_get_hold_state(job, jataskid);
2008
DPRINTF(("******** jo_id = %d\n", jobid ));
2009
DPRINTF(("******** task_id = %d\n", jataskid ));
2011
DPRINTF(("********** op_code_and_hold = %x\n", op_code_and_hold ));
2012
DPRINTF(("******************* op_code = %x\n", op_code ));
2013
DPRINTF(("*************is_sub_op_code = %x\n", is_sub_op_code));
2014
DPRINTF(("****************** old_hold = %x\n", old_hold));
2015
DPRINTF(("******************** target = %x\n", target ));
2016
DPRINTF(("******* MINUS_H_TGT_SYSTEM = %x\n", MINUS_H_TGT_SYSTEM ));
2017
DPRINTF(("***** MINUS_H_TGT_OPERATOR = %x\n", MINUS_H_TGT_OPERATOR ));
2018
DPRINTF(("********* MINUS_H_TGT_USER = %x\n", MINUS_H_TGT_USER));
2021
if (!is_task_enrolled) {
2022
new_ja_task = job_get_ja_task_template_pending(job, jataskid);
2026
case MINUS_H_CMD_SUB:
2027
new_hold = old_hold & ~target;
2028
/* DPRINTF(("MINUS_H_CMD_SUB = "sge_u32"\n", new_hold)); */
2030
case MINUS_H_CMD_ADD:
2031
new_hold = old_hold | target;
2032
/* DPRINTF(("MINUS_H_CMD_ADD = "sge_u32"\n", new_hold)); */
2034
case MINUS_H_CMD_SET:
2036
/* DPRINTF(("MINUS_H_CMD_SET = "sge_u32"\n", new_hold)); */
2039
new_hold = old_hold;
2040
/* DPRINTF(("MINUS_H_CMD_[default] = "sge_u32"\n", new_hold)); */
2044
if (new_hold != old_hold) {
2045
if ((target & MINUS_H_TGT_SYSTEM) == MINUS_H_TGT_SYSTEM) {
2046
if (!manop_is_manager(ruser)) {
2047
u_long32 new_mask = op_code_and_hold & ~MINUS_H_TGT_SYSTEM;
2048
lSetPosUlong(tep, pos, new_mask);
2049
ERROR((SGE_EVENT, MSG_SGETEXT_MUST_BE_MGR_TO_SS, ruser,
2050
is_sub_op_code ? MSG_JOB_RMHOLDMNG : MSG_JOB_SETHOLDMNG));
2051
answer_list_add(alpp, SGE_EVENT, STATUS_ENOOPR, ANSWER_QUALITY_ERROR);
2052
DRETURN(STATUS_ENOOPR);
2056
if ( (target & MINUS_H_TGT_OPERATOR) == MINUS_H_TGT_OPERATOR) {
2057
if (!manop_is_operator(ruser)) {
2058
u_long32 new_mask = op_code_and_hold & ~MINUS_H_TGT_OPERATOR;
2059
lSetPosUlong(tep, pos, new_mask);
2061
ERROR((SGE_EVENT, MSG_SGETEXT_MUST_BE_OPR_TO_SS, ruser,
2062
is_sub_op_code ? MSG_JOB_RMHOLDOP : MSG_JOB_SETHOLDOP));
2063
answer_list_add(alpp, SGE_EVENT, STATUS_ENOOPR, ANSWER_QUALITY_ERROR);
2064
DRETURN(STATUS_ENOOPR);
2069
if ((target & MINUS_H_TGT_USER) == MINUS_H_TGT_USER) {
2070
if (strcmp(ruser, lGetString(job, JB_owner)) &&
2071
!manop_is_operator(ruser)) {
2072
u_long32 new_mask = op_code_and_hold & ~MINUS_H_TGT_USER;
2073
lSetPosUlong(tep, pos, new_mask);
2074
ERROR((SGE_EVENT, MSG_SGETEXT_MUST_BE_JOB_OWN_TO_SUS, ruser,
2075
sge_u32c(jobid), is_sub_op_code ?
2076
MSG_JOB_RMHOLDUSER : MSG_JOB_SETHOLDUSER));
2077
answer_list_add(alpp, SGE_EVENT, STATUS_ENOOPR, ANSWER_QUALITY_ERROR);
2078
DRETURN(STATUS_ENOOPR);
2083
job_set_hold_state(job, NULL, jataskid, new_hold);
2084
*trigger |= MOD_EVENT;
2086
if (new_hold != old_hold) {
2088
sprintf(SGE_EVENT, MSG_SGETEXT_MOD_JATASK_SUU, MSG_JOB_HOLD,
2089
sge_u32c(jobid), sge_u32c(jataskid));
2091
sprintf(SGE_EVENT, MSG_SGETEXT_MOD_JOBS_SU, MSG_JOB_HOLD,
2094
answer_list_add(alpp, SGE_EVENT, STATUS_OK, ANSWER_QUALITY_INFO);
2101
/****** sge_job/is_changes_consumables() ******************************************
2103
* is_changes_consumables() -- detect changes with consumable resource request
2106
* static bool is_changes_consumables(lList* new, lList* old)
2109
* lList** alpp - answer list pointer pointer
2110
* lList* new - jobs new JB_hard_resource_list
2111
* lList* old - jobs old JB_hard_resource_list
2114
* bool - false, nothing changed
2116
* MT-NOTE: is thread safe (works only on parsed in variables)
2118
*******************************************************************************/
2119
static bool is_changes_consumables(lList **alpp, lList* new, lList* old)
2121
lListElem *new_entry = NULL;
2122
lListElem *old_entry = NULL;
2123
const char *name = NULL;
2125
DENTER(TOP_LAYER, "is_changes_consumables");
2127
/* ensure all old resource requests implying consumables
2128
debitation are still contained in new resource request list */
2129
for_each(old_entry, old) {
2131
/* ignore non-consumables */
2132
if (!lGetBool(old_entry, CE_consumable)) {
2135
name = lGetString(old_entry, CE_name);
2137
/* search it in new hard resource list */
2138
if (lGetElemStr(new, CE_name, name) == NULL) {
2139
ERROR((SGE_EVENT, MSG_JOB_MOD_MISSINGRUNNINGJOBCONSUMABLE_S, name));
2140
answer_list_add(alpp, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR);
2145
/* ensure all new resource requests implying consumable
2146
debitation were also contained in old resource request list
2147
AND have not changed the requested amount */
2148
for_each(new_entry, new) {
2150
/* ignore non-consumables */
2151
if (!lGetBool(new_entry, CE_consumable)) {
2154
name = lGetString(new_entry, CE_name);
2156
/* search it in old hard resource list */
2157
if ((old_entry = lGetElemStr(old, CE_name, name)) == NULL) {
2158
ERROR((SGE_EVENT, MSG_JOB_MOD_ADDEDRUNNINGJOBCONSUMABLE_S, name));
2159
answer_list_add(alpp, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR);
2163
/* compare request in old_entry with new_entry */
2164
DPRINTF(("request: \"%s\" old: %f new: %f\n", name,
2165
lGetDouble(old_entry, CE_doubleval),
2166
lGetDouble(new_entry, CE_doubleval)));
2168
if (lGetDouble(old_entry, CE_doubleval) !=
2169
lGetDouble(new_entry, CE_doubleval)) {
2170
ERROR((SGE_EVENT, MSG_JOB_MOD_CHANGEDRUNNINGJOBCONSUMABLE_S, name));
2171
answer_list_add(alpp, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR);
2180
/****** sge_job/deny_soft_consumables() ****************************************
2182
* deny_soft_consumables() -- Deny soft consumables
2185
* static int deny_soft_consumables(lList **alpp, lList *srl)
2188
* Find out if consumables are requested and deny them.
2191
* lList** alpp - answer list pointer pointer
2192
* lList *srl - jobs JB_soft_resource_list
2193
* const lList *master_centry_list - the master centry list
2196
* static int - 0 request can pass
2197
* !=0 consumables requested soft
2199
*******************************************************************************/
2200
static int deny_soft_consumables(lList **alpp, lList *srl, const lList *master_centry_list)
2202
lListElem *entry, *dcep;
2205
DENTER(TOP_LAYER, "deny_soft_consumables");
2207
/* ensure no consumables are requested in JB_soft_resource_list */
2208
for_each(entry, srl) {
2209
name = lGetString(entry, CE_name);
2211
if (!(dcep = centry_list_locate(master_centry_list, name))) {
2212
ERROR((SGE_EVENT, MSG_ATTRIB_MISSINGATTRIBUTEXINCOMPLEXES_S , name));
2213
answer_list_add(alpp, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR);
2217
/* ignore non-consumables */
2218
if (lGetBool(dcep, CE_consumable)) {
2219
ERROR((SGE_EVENT, MSG_JOB_MOD_SOFTREQCONSUMABLE_S, name));
2220
answer_list_add(alpp, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR);
2230
static int mod_job_attributes(
2231
lListElem *new_job, /* new job */
2232
lListElem *jep, /* reduced job element */
2239
int is_running = 0, may_not_be_running = 0;
2241
u_long32 jobid = lGetUlong(new_job, JB_job_number);
2243
DENTER(TOP_LAYER, "mod_job_attributes");
2245
/* is job running ? */
2248
for_each(ja_task, lGetList(new_job, JB_ja_tasks)) {
2249
if (lGetUlong(ja_task, JAT_status) & JTRANSFERING ||
2250
lGetUlong(ja_task, JAT_status) & JRUNNING) {
2258
* Do we have per task change request?
2260
if ((pos=lGetPosViaElem(jep, JB_ja_tasks, SGE_NO_ABORT))>=0) {
2261
lList *ja_task_list = lGetPosList(jep, pos);
2262
lListElem *ja_task = lFirst(ja_task_list);
2263
int new_job_is_array = job_is_array(new_job);
2264
u_long32 jep_ja_task_number = lGetNumberOfElem(ja_task_list);
2267
* Is it a valid per task request:
2268
* - at least one task element
2270
* - multi tasks requests are only valid for array jobs
2273
ERROR((SGE_EVENT, MSG_SGETEXT_NEEDONEELEMENT_SS,
2274
lNm2Str(JB_ja_tasks), SGE_FUNC));
2275
answer_list_add(alpp, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR);
2276
DRETURN(STATUS_EUNKNOWN);
2278
if ((pos = lGetPosViaElem(ja_task, JAT_task_number, SGE_NO_ABORT)) < 0) {
2279
ERROR((SGE_EVENT, MSG_SGETEXT_MISSINGCULLFIELD_SS,
2280
lNm2Str(JAT_task_number), SGE_FUNC));
2281
answer_list_add(alpp, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR);
2282
DRETURN(STATUS_EUNKNOWN);
2284
if (!new_job_is_array && jep_ja_task_number > 1) {
2285
ERROR((SGE_EVENT, MSG_JOB_NOJOBARRAY_U, sge_u32c(jobid)));
2286
answer_list_add(alpp, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR);
2287
DRETURN(STATUS_EUNKNOWN);
2293
if (ja_task_list != NULL) {
2294
lListElem *first = lFirst(ja_task_list);
2295
u_long32 handle_all_tasks = !lGetUlong(first, JAT_task_number);
2297
if (handle_all_tasks) {
2298
int list_id[] = {JB_ja_n_h_ids, JB_ja_u_h_ids, JB_ja_o_h_ids,
2300
lListElem *dst_ja_task = NULL;
2304
* Visit all unenrolled tasks
2306
while (list_id[++i] != -1) {
2308
lCopyList("task_id_range", lGetList(new_job, list_id[i]));
2309
lListElem *range = NULL;
2312
for_each(range, range_list) {
2313
for(id = lGetUlong(range, RN_min);
2314
id <= lGetUlong(range, RN_max);
2315
id += lGetUlong(range, RN_step)) {
2318
job_get_ja_task_template_pending(new_job, id);
2320
mod_task_attributes(new_job, dst_ja_task, ja_task,
2321
alpp, ruser, rhost, trigger,
2322
job_is_array(new_job), 0);
2325
lFreeList(&range_list);
2328
* Visit enrolled tasks
2330
for_each (dst_ja_task, lGetList(new_job, JB_ja_tasks)) {
2331
mod_task_attributes(new_job, dst_ja_task, ja_task, alpp,
2332
ruser, rhost, trigger,
2333
job_is_array(new_job), 1);
2336
for_each (ja_task, ja_task_list) {
2337
u_long32 ja_task_id = lGetUlong(ja_task, JAT_task_number);
2338
int is_defined = job_is_ja_task_defined(new_job, ja_task_id);
2341
lListElem *dst_ja_task = NULL;
2342
int is_enrolled = 1;
2344
dst_ja_task = job_search_task(new_job, NULL, ja_task_id);
2345
if (dst_ja_task == NULL) {
2348
job_get_ja_task_template_pending(new_job,
2351
mod_task_attributes(new_job, dst_ja_task, ja_task, alpp,
2352
ruser, rhost, trigger,
2353
job_is_array(new_job), is_enrolled);
2355
; /* Ignore silently */
2363
/* ---- JB_override_tickets
2364
A attribute that must be allowed to
2365
be changed when job is running
2367
if ((pos=lGetPosViaElem(jep, JB_override_tickets, SGE_NO_ABORT))>=0) {
2368
uval=lGetPosUlong(jep, pos);
2370
/* need to be operator */
2371
if (!manop_is_operator(ruser)) {
2372
ERROR((SGE_EVENT, MSG_SGETEXT_MUST_BE_OPR_TO_SS, ruser,
2373
MSG_JOB_CHANGEOVERRIDETICKS));
2374
answer_list_add(alpp, SGE_EVENT, STATUS_ENOOPR, ANSWER_QUALITY_ERROR);
2375
DRETURN(STATUS_ENOOPR);
2379
if (uval!=lGetUlong(new_job, JB_override_tickets)) {
2380
lSetUlong(new_job, JB_override_tickets, uval);
2381
*trigger |= MOD_EVENT;
2384
sprintf(SGE_EVENT, MSG_JOB_SETOVERRIDETICKS_SSUU,
2385
ruser, rhost, sge_u32c(jobid), sge_u32c(uval));
2386
answer_list_add(alpp, SGE_EVENT, STATUS_OK, ANSWER_QUALITY_INFO);
2389
/* ---- JB_priority */
2390
if ((pos=lGetPosViaElem(jep, JB_priority, SGE_NO_ABORT))>=0) {
2391
u_long32 old_priority;
2392
uval=lGetPosUlong(jep, pos);
2393
if (uval > (old_priority=lGetUlong(new_job, JB_priority))) {
2394
/* need to be at least operator */
2395
if (!manop_is_operator(ruser)) {
2396
ERROR((SGE_EVENT, MSG_SGETEXT_MUST_BE_OPR_TO_SS, ruser, MSG_JOB_PRIOINC));
2397
answer_list_add(alpp, SGE_EVENT, STATUS_ENOOPR, ANSWER_QUALITY_ERROR);
2398
DRETURN(STATUS_ENOOPR);
2402
if (uval!=old_priority)
2403
*trigger |= PRIO_EVENT;
2405
lSetUlong(new_job, JB_priority, uval);
2407
sprintf(SGE_EVENT, MSG_JOB_PRIOSET_SSUI,
2408
ruser, rhost, sge_u32c(jobid), ((int)(uval)) - BASE_PRIORITY);
2409
answer_list_add(alpp, SGE_EVENT, STATUS_OK, ANSWER_QUALITY_INFO);
2413
/* ---- JB_jobshare */
2414
if ((pos=lGetPosViaElem(jep, JB_jobshare, SGE_NO_ABORT))>=0) {
2415
u_long32 old_jobshare;
2416
uval=lGetPosUlong(jep, pos);
2417
if (uval != (old_jobshare=lGetUlong(new_job, JB_jobshare))) {
2418
/* need to be owner or at least operator */
2419
if (strcmp(ruser, lGetString(new_job, JB_owner)) && !manop_is_operator(ruser)) {
2420
ERROR((SGE_EVENT, MSG_SGETEXT_MUST_BE_OPR_TO_SS, ruser, MSG_JOB_CHANGEJOBSHARE));
2421
answer_list_add(alpp, SGE_EVENT, STATUS_ENOOPR, ANSWER_QUALITY_ERROR);
2422
DRETURN(STATUS_ENOOPR);
2426
if (uval!=old_jobshare)
2427
*trigger |= PRIO_EVENT;
2429
lSetUlong(new_job, JB_jobshare, uval);
2431
sprintf(SGE_EVENT, MSG_JOB_JOBSHARESET_SSUU,
2432
ruser, rhost, sge_u32c(jobid), sge_u32c(uval));
2433
answer_list_add(alpp, SGE_EVENT, STATUS_OK, ANSWER_QUALITY_INFO);
2438
if ((pos=lGetPosViaElem(jep, JB_ar, SGE_NO_ABORT))>=0) {
2439
u_long32 ar_id=lGetUlong(new_job, JB_ar);
2440
uval=lGetPosUlong(jep, pos);
2441
if (uval != ar_id) {
2442
/* need to be owner or at least operator */
2443
if (strcmp(ruser, lGetString(new_job, JB_owner)) && !manop_is_operator(ruser)) {
2444
ERROR((SGE_EVENT, MSG_SGETEXT_MUST_BE_OPR_TO_SS, ruser, MSG_JOB_CHANGEJOBAR));
2445
answer_list_add(alpp, SGE_EVENT, STATUS_ENOOPR, ANSWER_QUALITY_ERROR);
2446
DRETURN(STATUS_ENOOPR);
2448
*trigger |= PRIO_EVENT;
2449
may_not_be_running = 1;
2452
lSetUlong(new_job, JB_ar, uval);
2454
sprintf(SGE_EVENT, MSG_JOB_JOBARSET_SSUU,
2455
ruser, rhost, sge_u32c(jobid), sge_u32c(uval));
2456
answer_list_add(alpp, SGE_EVENT, STATUS_OK, ANSWER_QUALITY_INFO);
2461
/* ---- JB_deadline */
2462
/* If it is a deadline job the user has to be a deadline user */
2463
if ((pos=lGetPosViaElem(jep, JB_deadline, SGE_NO_ABORT))>=0) {
2464
if (!userset_is_deadline_user(*object_type_get_master_list(SGE_TYPE_USERSET), ruser)) {
2465
ERROR((SGE_EVENT, MSG_JOB_NODEADLINEUSER_S, ruser));
2466
answer_list_add(alpp, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR);
2467
DRETURN(STATUS_EUNKNOWN);
2469
lSetUlong(new_job, JB_deadline, lGetUlong(jep, JB_deadline));
2470
*trigger |= MOD_EVENT;
2471
sprintf(SGE_EVENT, MSG_SGETEXT_MOD_JOBS_SU, MSG_JOB_DEADLINETIME, sge_u32c(jobid));
2472
answer_list_add(alpp, SGE_EVENT, STATUS_OK, ANSWER_QUALITY_INFO);
2477
/* ---- JB_execution_time */
2478
if ((pos=lGetPosViaElem(jep, JB_execution_time, SGE_NO_ABORT))>=0) {
2479
DPRINTF(("got new JB_execution_time\n"));
2480
lSetUlong(new_job, JB_execution_time, lGetUlong(jep, JB_execution_time));
2481
*trigger |= MOD_EVENT;
2482
sprintf(SGE_EVENT, MSG_SGETEXT_MOD_JOBS_SU, MSG_JOB_STARTTIME, sge_u32c(jobid));
2483
answer_list_add(alpp, SGE_EVENT, STATUS_OK, ANSWER_QUALITY_INFO);
2486
/* ---- JB_account */
2487
if ((pos=lGetPosViaElem(jep, JB_account, SGE_NO_ABORT))>=0) {
2488
DPRINTF(("got new JB_account\n"));
2489
if (verify_str_key(alpp, lGetString(jep, JB_account), MAX_VERIFY_STRING,
2490
"account string", QSUB_TABLE) != STATUS_OK) {
2491
DRETURN(STATUS_EUNKNOWN);
2493
lSetString(new_job, JB_account, lGetString(jep, JB_account));
2494
sprintf(SGE_EVENT, MSG_SGETEXT_MOD_JOBS_SU, MSG_JOB_ACCOUNT, sge_u32c(jobid));
2495
answer_list_add(alpp, SGE_EVENT, STATUS_OK, ANSWER_QUALITY_INFO);
2499
if ((pos=lGetPosViaElem(jep, JB_cwd, SGE_NO_ABORT))>=0) {
2500
DPRINTF(("got new JB_cwd\n"));
2501
lSetString(new_job, JB_cwd, lGetString(jep, JB_cwd));
2502
sprintf(SGE_EVENT, MSG_SGETEXT_MOD_JOBS_SU, MSG_JOB_WD, sge_u32c(jobid));
2503
answer_list_add(alpp, SGE_EVENT, STATUS_OK, ANSWER_QUALITY_INFO);
2506
/* ---- JB_checkpoint_name */
2507
if ((pos=lGetPosViaElem(jep, JB_checkpoint_name, SGE_NO_ABORT))>=0) {
2508
const char *ckpt_name;
2510
DPRINTF(("got new JB_checkpoint_name\n"));
2511
ckpt_name = lGetString(jep, JB_checkpoint_name);
2512
if (ckpt_name && !ckpt_list_locate(*object_type_get_master_list(SGE_TYPE_CKPT), ckpt_name)) {
2513
ERROR((SGE_EVENT, MSG_SGETEXT_DOESNOTEXIST_SS,
2514
MSG_OBJ_CKPT, ckpt_name));
2515
answer_list_add(alpp, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR);
2516
DRETURN(STATUS_EUNKNOWN);
2518
lSetString(new_job, JB_checkpoint_name, ckpt_name);
2519
*trigger |= MOD_EVENT;
2520
sprintf(SGE_EVENT, MSG_SGETEXT_MOD_JOBS_SU, MSG_OBJ_CKPT, sge_u32c(jobid));
2521
answer_list_add(alpp, SGE_EVENT, STATUS_OK, ANSWER_QUALITY_INFO);
2524
/* ---- JB_stderr_path_list */
2525
if ((pos=lGetPosViaElem(jep, JB_stderr_path_list, SGE_NO_ABORT))>=0) {
2527
DPRINTF(("got new JB_stderr_path_list\n"));
2529
if ((status = job_resolve_host_for_path_list(jep, alpp, JB_stderr_path_list)) != STATUS_OK){
2532
lSetList(new_job, JB_stderr_path_list,
2533
lCopyList("", lGetList(jep, JB_stderr_path_list)));
2534
sprintf(SGE_EVENT, MSG_SGETEXT_MOD_JOBS_SU, MSG_JOB_STDERRPATHLIST, sge_u32c(jobid));
2535
answer_list_add(alpp, SGE_EVENT, STATUS_OK, ANSWER_QUALITY_INFO);
2538
/* ---- JB_stdin_path_list */
2539
if ((pos=lGetPosViaElem(jep, JB_stdin_path_list, SGE_NO_ABORT))>=0) {
2541
DPRINTF(("got new JB_stdin_path_list\n"));
2543
if ((status = job_resolve_host_for_path_list(jep, alpp,JB_stdin_path_list)) != STATUS_OK) {
2548
lSetList(new_job, JB_stdin_path_list,
2549
lCopyList("", lGetList(jep, JB_stdin_path_list)));
2550
sprintf(SGE_EVENT, MSG_SGETEXT_MOD_JOBS_SU, MSG_JOB_STDINPATHLIST,
2552
answer_list_add(alpp, SGE_EVENT, STATUS_OK, ANSWER_QUALITY_INFO);
2555
/* ---- JB_reserve */
2556
if ((pos=lGetPosViaElem(jep, JB_reserve, SGE_NO_ABORT))>=0) {
2557
DPRINTF(("got new JB_reserve\n"));
2558
lSetBool(new_job, JB_reserve, lGetBool(jep, JB_reserve));
2559
*trigger |= MOD_EVENT;
2560
sprintf(SGE_EVENT, MSG_SGETEXT_MOD_JOBS_SU, MSG_JOB_RESERVE, sge_u32c(jobid));
2561
answer_list_add(alpp, SGE_EVENT, STATUS_OK, ANSWER_QUALITY_INFO);
2564
/* ---- JB_merge_stderr */
2565
if ((pos=lGetPosViaElem(jep, JB_merge_stderr, SGE_NO_ABORT))>=0) {
2566
DPRINTF(("got new JB_merge_stderr\n"));
2567
lSetBool(new_job, JB_merge_stderr, lGetBool(jep, JB_merge_stderr));
2568
sprintf(SGE_EVENT, MSG_SGETEXT_MOD_JOBS_SU, MSG_JOB_MERGEOUTPUT, sge_u32c(jobid));
2569
answer_list_add(alpp, SGE_EVENT, STATUS_OK, ANSWER_QUALITY_INFO);
2572
/* ---- JB_hard_resource_list */
2574
lList *master_centry_list = *object_type_get_master_list(SGE_TYPE_CENTRY);
2576
if ((pos=lGetPosViaElem(jep, JB_hard_resource_list, SGE_NO_ABORT))>=0) {
2577
bool is_changed = false;
2579
DPRINTF(("got new JB_hard_resource_list\n"));
2580
if (centry_list_fill_request(lGetList(jep, JB_hard_resource_list),
2581
alpp, master_centry_list,
2582
false, true, false)) {
2583
DRETURN(STATUS_EUNKNOWN);
2585
if (compress_ressources(alpp, lGetList(jep,JB_hard_resource_list), SGE_OBJ_JOB)) {
2586
DRETURN(STATUS_EUNKNOWN);
2589
/* to prevent inconsistent consumable mgmnt:
2590
- deny resource requests changes on consumables for running jobs (IZ #251)
2591
- a better solution is to store for each running job the amount of resources */
2593
is_changed = is_changes_consumables(alpp, lGetList(jep, JB_hard_resource_list),
2594
lGetList(new_job, JB_hard_resource_list));
2595
if (is_running && is_changed) {
2596
DRETURN(STATUS_EUNKNOWN);
2599
if (!centry_list_is_correct(lGetList(jep, JB_hard_resource_list), alpp)) {
2600
DRETURN(STATUS_EUNKNOWN);
2603
lSetList(new_job, JB_hard_resource_list, lCopyList("", lGetList(jep, JB_hard_resource_list)));
2604
*trigger |= MOD_EVENT;
2605
sprintf(SGE_EVENT, MSG_SGETEXT_MOD_JOBS_SU, MSG_JOB_HARDRESOURCELIST, sge_u32c(jobid));
2606
answer_list_add(alpp, SGE_EVENT, STATUS_OK, ANSWER_QUALITY_INFO);
2609
/* ---- JB_soft_resource_list */
2610
if ((pos=lGetPosViaElem(jep, JB_soft_resource_list, SGE_NO_ABORT))>=0) {
2611
DPRINTF(("got new JB_soft_resource_list\n"));
2612
if (centry_list_fill_request(lGetList(jep, JB_soft_resource_list), alpp,
2613
master_centry_list, false, true, false)) {
2614
DRETURN(STATUS_EUNKNOWN);
2616
if (compress_ressources(alpp, lGetList(jep, JB_soft_resource_list), SGE_OBJ_JOB)) {
2617
DRETURN(STATUS_EUNKNOWN);
2619
if (deny_soft_consumables(alpp, lGetList(jep, JB_soft_resource_list), master_centry_list)) {
2620
DRETURN(STATUS_EUNKNOWN);
2622
if (!centry_list_is_correct(lGetList(jep, JB_soft_resource_list), alpp)) {
2623
DRETURN(STATUS_EUNKNOWN);
2626
lSetList(new_job, JB_soft_resource_list,
2627
lCopyList("", lGetList(jep, JB_soft_resource_list)));
2628
*trigger |= MOD_EVENT;
2629
sprintf(SGE_EVENT, MSG_SGETEXT_MOD_JOBS_SU, MSG_JOB_SOFTRESOURCELIST, sge_u32c(jobid));
2630
answer_list_add(alpp, SGE_EVENT, STATUS_OK, ANSWER_QUALITY_INFO);
2634
/* ---- JB_mail_options */
2635
if ((pos=lGetPosViaElem(jep, JB_mail_options, SGE_NO_ABORT))>=0) {
2636
DPRINTF(("got new JB_mail_options\n"));
2637
lSetUlong(new_job, JB_mail_options, lGetUlong(jep, JB_mail_options));
2638
sprintf(SGE_EVENT, MSG_SGETEXT_MOD_JOBS_SU, MSG_JOB_MAILOPTIONS, sge_u32c(jobid));
2639
answer_list_add(alpp, SGE_EVENT, STATUS_OK, ANSWER_QUALITY_INFO);
2642
/* ---- JB_mail_list */
2643
if ((pos=lGetPosViaElem(jep, JB_mail_list, SGE_NO_ABORT))>=0) {
2644
DPRINTF(("got new JB_mail_list\n"));
2645
lSetList(new_job, JB_mail_list,
2646
lCopyList("", lGetList(jep, JB_mail_list)));
2647
sprintf(SGE_EVENT, MSG_SGETEXT_MOD_JOBS_SU, MSG_JOB_MAILLIST, sge_u32c(jobid));
2648
answer_list_add(alpp, SGE_EVENT, STATUS_OK, ANSWER_QUALITY_INFO);
2651
/* ---- JB_job_name */
2652
if ((pos=lGetPosViaElem(jep, JB_job_name, SGE_NO_ABORT))>=0 && lGetString(jep, JB_job_name)) {
2653
/* u_long32 succ_jid;*/
2654
const char *new_name = lGetString(jep, JB_job_name);
2656
DPRINTF(("got new JB_job_name\n"));
2658
/* preform checks only if job name _really_ changes */
2659
if (strcmp(new_name, lGetString(new_job, JB_job_name))) {
2660
char job_descr[100];
2661
const char *job_name;
2663
sprintf(job_descr, "job "sge_u32, jobid);
2664
job_name = lGetString(new_job, JB_job_name);
2665
lSetString(new_job, JB_job_name, new_name);
2666
if (object_verify_name(new_job, alpp, JB_job_name, job_descr)) {
2667
lSetString(new_job, JB_job_name, job_name);
2668
DRETURN(STATUS_EUNKNOWN);
2672
*trigger |= MOD_EVENT;
2673
sprintf(SGE_EVENT, MSG_SGETEXT_MOD_JOBS_SU, MSG_JOB_JOBNAME, sge_u32c(jobid));
2674
answer_list_add(alpp, SGE_EVENT, STATUS_OK, ANSWER_QUALITY_INFO);
2677
/* ---- JB_jid_predecessor_list */
2678
if ((pos=lGetPosViaElem(jep, JB_jid_request_list, SGE_NO_ABORT))>=0 &&
2679
lGetList(jep,JB_jid_request_list)) {
2680
lList *new_pre_list = NULL, *exited_pre_list = NULL;
2681
lListElem *pre, *exited, *nxt, *job;
2683
lList *req_list = NULL, *pred_list = NULL;
2685
if (lGetPosViaElem(jep, JB_ja_tasks, SGE_NO_ABORT) != -1) {
2686
sprintf(SGE_EVENT, MSG_SGETEXT_OPTIONONLEONJOBS_U, sge_u32c(jobid));
2687
answer_list_add(alpp, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR);
2689
DRETURN(STATUS_EUNKNOWN);
2692
DPRINTF(("got new JB_jid_predecessor_list\n"));
2694
if (lGetNumberOfElem(lGetList(jep, JB_jid_request_list )) > 0)
2695
req_list = lCopyList("requested_jid_list", lGetList(jep, JB_jid_request_list ));
2697
lXchgList(new_job, JB_jid_request_list, &req_list);
2698
lXchgList(new_job, JB_jid_predecessor_list, &pred_list);
2700
if (job_verify_predecessors(new_job, alpp)) {
2701
lXchgList(new_job, JB_jid_request_list, &req_list);
2702
lXchgList(new_job, JB_jid_predecessor_list, &pred_list);
2703
lFreeList(&req_list);
2704
lFreeList(&pred_list);
2705
DRETURN(STATUS_EUNKNOWN);
2708
lFreeList(&req_list);
2709
lFreeList(&pred_list);
2711
new_pre_list = lGetList(new_job, JB_jid_predecessor_list);
2713
/* remove jobid's of all no longer existing jobs from this
2714
new job - this must be done before event is sent to schedd */
2715
nxt = lFirst(new_pre_list);
2717
int move_to_exited = 0;
2718
u_long32 pre_ident = lGetUlong(pre, JRE_job_number);
2721
DPRINTF(("jid: "sge_u32"\n", pre_ident));
2723
job = job_list_locate(*(object_type_get_master_list(SGE_TYPE_JOB)), pre_ident);
2725
/* in SGE jobs are exited when they dont exist */
2730
if (move_to_exited) {
2731
if (!exited_pre_list)
2732
exited_pre_list = lCreateList("exited list", JRE_Type);
2733
exited = lDechainElem(new_pre_list, pre);
2734
lAppendElem(exited_pre_list, exited);
2738
if (!lGetNumberOfElem(new_pre_list)){
2739
lSetList(new_job, JB_jid_predecessor_list, NULL);
2740
new_pre_list = NULL;
2742
else if (contains_dependency_cycles(new_job, lGetUlong(new_job, JB_job_number), alpp)) {
2743
DRETURN(STATUS_EUNKNOWN);
2747
*trigger |= (RECHAIN_JID_HOLD|MOD_EVENT);
2749
/* added primarily for own debugging purposes */
2751
char str_predec[256], str_exited[256];
2752
const char *delis[] = {NULL, ",", ""};
2754
int fields[] = { JRE_job_number, 0 };
2755
uni_print_list(NULL, str_predec, sizeof(str_predec)-1, new_pre_list, fields, delis, 0);
2756
uni_print_list(NULL, str_exited, sizeof(str_exited)-1, exited_pre_list, fields, delis, 0);
2757
sprintf(SGE_EVENT, MSG_JOB_HOLDLISTMOD_USS,
2758
sge_u32c(jobid), str_predec, str_exited);
2759
answer_list_add(alpp, SGE_EVENT, STATUS_OK, ANSWER_QUALITY_INFO);
2761
lFreeList(&exited_pre_list);
2764
/* ---- JB_ja_ad_predecessor_list */
2765
if ((pos=lGetPosViaElem(jep, JB_ja_ad_request_list, SGE_NO_ABORT))>=0 &&
2766
lGetList(jep,JB_ja_ad_request_list)) {
2767
lList *new_pre_list = NULL, *exited_pre_list = NULL;
2768
lListElem *pre, *exited, *nxt, *job;
2770
lList *req_list = NULL, *pred_list = NULL;
2772
if (lGetPosViaElem(jep, JB_ja_tasks, SGE_NO_ABORT) != -1) {
2773
sprintf(SGE_EVENT, MSG_SGETEXT_OPTIONONLEONJOBS_U, sge_u32c(jobid));
2774
answer_list_add(alpp, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR);
2776
DRETURN(STATUS_EUNKNOWN);
2779
DPRINTF(("got new JB_ja_ad_predecessor_list\n"));
2781
if (lGetNumberOfElem(lGetList(jep, JB_ja_ad_request_list )) > 0)
2782
req_list = lCopyList("requested_ja_ad_list", lGetList(jep, JB_ja_ad_request_list ));
2784
lXchgList(new_job, JB_ja_ad_request_list, &req_list);
2785
lXchgList(new_job, JB_ja_ad_predecessor_list, &pred_list);
2787
if (job_verify_predecessors_ad(new_job, alpp)) {
2788
lXchgList(new_job, JB_ja_ad_request_list, &req_list);
2789
lXchgList(new_job, JB_ja_ad_predecessor_list, &pred_list);
2790
lFreeList(&req_list);
2791
lFreeList(&pred_list);
2792
DRETURN(STATUS_EUNKNOWN);
2795
lFreeList(&req_list);
2796
lFreeList(&pred_list);
2798
new_pre_list = lGetList(new_job, JB_ja_ad_predecessor_list);
2800
/* remove jobid's of all no longer existing jobs from this
2801
new job - this must be done before event is sent to schedd */
2802
nxt = lFirst(new_pre_list);
2804
int move_to_exited = 0;
2805
u_long32 pre_ident = lGetUlong(pre, JRE_job_number);
2808
DPRINTF(("jid: "sge_u32"\n", pre_ident));
2810
job = job_list_locate(*(object_type_get_master_list(SGE_TYPE_JOB)), pre_ident);
2812
/* in SGE jobs are exited when they dont exist */
2817
if (move_to_exited) {
2818
if (!exited_pre_list)
2819
exited_pre_list = lCreateList("exited list", JRE_Type);
2820
exited = lDechainElem(new_pre_list, pre);
2821
lAppendElem(exited_pre_list, exited);
2825
if (!lGetNumberOfElem(new_pre_list)){
2826
lSetList(new_job, JB_ja_ad_predecessor_list, NULL);
2827
new_pre_list = NULL;
2829
else if (contains_dependency_cycles(new_job, lGetUlong(new_job, JB_job_number), alpp)) {
2830
DRETURN(STATUS_EUNKNOWN);
2833
*trigger |= (RECHAIN_JA_AD_HOLD|MOD_EVENT);
2835
/* added primarily for debugging purposes */
2837
char str_predec[256], str_exited[256];
2838
const char *delis[] = {NULL, ",", ""};
2840
int fields[] = { JRE_job_number, 0 };
2841
uni_print_list(NULL, str_predec, sizeof(str_predec)-1, new_pre_list, fields, delis, 0);
2842
uni_print_list(NULL, str_exited, sizeof(str_exited)-1, exited_pre_list, fields, delis, 0);
2843
sprintf(SGE_EVENT, MSG_JOB_HOLDARRAYLISTMOD_USS,
2844
sge_u32c(jobid), str_predec, str_exited);
2845
answer_list_add(alpp, SGE_EVENT, STATUS_OK, ANSWER_QUALITY_INFO);
2847
lFreeList(&exited_pre_list);
2850
/* ---- JB_notify */
2851
if ((pos=lGetPosViaElem(jep, JB_notify, SGE_NO_ABORT))>=0) {
2852
DPRINTF(("got new JB_notify\n"));
2853
lSetBool(new_job, JB_notify, lGetBool(jep, JB_notify));
2854
*trigger |= MOD_EVENT;
2855
sprintf(SGE_EVENT, MSG_SGETEXT_MOD_JOBS_SU, MSG_JOB_NOTIFYBEHAVIOUR, sge_u32c(jobid));
2856
answer_list_add(alpp, SGE_EVENT, STATUS_OK, ANSWER_QUALITY_INFO);
2859
/* ---- JB_stdout_path_list */
2860
if ((pos=lGetPosViaElem(jep, JB_stdout_path_list, SGE_NO_ABORT))>=0) {
2862
DPRINTF(("got new JB_stdout_path_list?\n"));
2864
if ((status = job_resolve_host_for_path_list(jep, alpp, JB_stdout_path_list)) != STATUS_OK) {
2868
lSetList(new_job, JB_stdout_path_list,
2869
lCopyList("", lGetList(jep, JB_stdout_path_list)));
2870
sprintf(SGE_EVENT, MSG_SGETEXT_MOD_JOBS_SU, MSG_JOB_STDOUTPATHLIST, sge_u32c(jobid));
2871
answer_list_add(alpp, SGE_EVENT, STATUS_OK, ANSWER_QUALITY_INFO);
2874
/* ---- JB_project */
2875
if ((pos=lGetPosViaElem(jep, JB_project, SGE_NO_ABORT))>=0) {
2876
const char *project;
2877
char* enforce_project;
2879
DPRINTF(("got new JB_project\n"));
2881
enforce_project = mconf_get_enforce_project();
2883
project = lGetString(jep, JB_project);
2884
if (project && !prj_list_locate(*object_type_get_master_list(SGE_TYPE_PROJECT),
2886
ERROR((SGE_EVENT, MSG_SGETEXT_DOESNOTEXIST_SS, MSG_JOB_PROJECT, project));
2887
answer_list_add(alpp, SGE_EVENT, STATUS_EEXIST, ANSWER_QUALITY_ERROR);
2888
FREE(enforce_project);
2889
DRETURN(STATUS_EUNKNOWN);
2891
if (!project && enforce_project &&
2892
!strcasecmp(enforce_project, "true")) {
2893
ERROR((SGE_EVENT, MSG_SGETEXT_NO_PROJECT));
2894
answer_list_add(alpp, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR);
2895
FREE(enforce_project);
2896
DRETURN(STATUS_EUNKNOWN);
2898
lSetString(new_job, JB_project, project);
2899
may_not_be_running = 1;
2900
*trigger |= MOD_EVENT;
2901
sprintf(SGE_EVENT, MSG_SGETEXT_MOD_JOBS_SU, MSG_JOB_PROJECT, sge_u32c(jobid));
2902
answer_list_add(alpp, SGE_EVENT, STATUS_OK, ANSWER_QUALITY_INFO);
2903
FREE(enforce_project);
2907
if ((pos=lGetPosViaElem(jep, JB_pe, SGE_NO_ABORT))>=0) {
2908
const char *pe_name;
2910
DPRINTF(("got new JB_pe\n"));
2911
pe_name = lGetString(jep, JB_pe);
2912
if (pe_name && !pe_list_find_matching(*object_type_get_master_list(SGE_TYPE_PE),
2914
ERROR((SGE_EVENT, MSG_SGETEXT_DOESNOTEXIST_SS, MSG_OBJ_PE, pe_name));
2915
answer_list_add(alpp, SGE_EVENT, STATUS_EEXIST, ANSWER_QUALITY_ERROR);
2916
DRETURN(STATUS_EUNKNOWN);
2918
lSetString(new_job, JB_pe, pe_name);
2919
*trigger |= MOD_EVENT;
2920
sprintf(SGE_EVENT, MSG_SGETEXT_MOD_JOBS_SU, MSG_OBJ_PE, sge_u32c(jobid));
2921
answer_list_add(alpp, SGE_EVENT, STATUS_OK, ANSWER_QUALITY_INFO);
2924
/* ---- JB_pe_range */
2925
if ((pos=lGetPosViaElem(jep, JB_pe_range, SGE_NO_ABORT))>=0 && lGetList(jep, JB_pe_range)) {
2927
const char *pe_name;
2928
DPRINTF(("got new JB_pe_range\n"));
2930
/* reject PE ranges change requests for jobs without PE request */
2931
if (!(pe_name=lGetString(new_job, JB_pe))) {
2932
ERROR((SGE_EVENT, MSG_JOB_PERANGE_ONLY_FOR_PARALLEL));
2933
answer_list_add(alpp, SGE_EVENT, STATUS_EEXIST, ANSWER_QUALITY_ERROR);
2934
DRETURN(STATUS_EUNKNOWN);
2937
pe_range = lCopyList("", lGetList(jep, JB_pe_range));
2938
if (object_verify_pe_range(alpp, pe_name, pe_range, SGE_OBJ_JOB)!=STATUS_OK) {
2939
lFreeList(&pe_range);
2940
DRETURN(STATUS_EUNKNOWN);
2942
lSetList(new_job, JB_pe_range, pe_range);
2944
*trigger |= MOD_EVENT;
2945
sprintf(SGE_EVENT, MSG_SGETEXT_MOD_JOBS_SU, MSG_JOB_SLOTRANGE, sge_u32c(jobid));
2946
answer_list_add(alpp, SGE_EVENT, STATUS_OK, ANSWER_QUALITY_INFO);
2949
/* ---- JB_hard_queue_list */
2950
if ((pos=lGetPosViaElem(jep, JB_hard_queue_list, SGE_NO_ABORT))>=0) {
2951
DPRINTF(("got new JB_hard_queue_list\n"));
2953
if (!qref_list_is_valid(lGetList(jep, JB_hard_queue_list), alpp)) {
2954
DRETURN(STATUS_EUNKNOWN);
2957
lSetList(new_job, JB_hard_queue_list,
2958
lCopyList("", lGetList(jep, JB_hard_queue_list)));
2959
*trigger |= MOD_EVENT;
2960
sprintf(SGE_EVENT, MSG_SGETEXT_MOD_JOBS_SU, MSG_JOB_HARDQLIST, sge_u32c(jobid));
2961
answer_list_add(alpp, SGE_EVENT, STATUS_OK, ANSWER_QUALITY_INFO);
2964
/* ---- JB_soft_queue_list */
2965
if ((pos=lGetPosViaElem(jep, JB_soft_queue_list, SGE_NO_ABORT))>=0) {
2966
DPRINTF(("got new JB_soft_queue_list\n"));
2968
if (!qref_list_is_valid(lGetList(jep, JB_soft_queue_list), alpp)) {
2969
DRETURN(STATUS_EUNKNOWN);
2972
lSetList(new_job, JB_soft_queue_list,
2973
lCopyList("", lGetList(jep, JB_soft_queue_list)));
2974
*trigger |= MOD_EVENT;
2975
sprintf(SGE_EVENT, MSG_SGETEXT_MOD_JOBS_SU, MSG_JOB_SOFTQLIST, sge_u32c(jobid));
2976
answer_list_add(alpp, SGE_EVENT, STATUS_OK, ANSWER_QUALITY_INFO);
2979
/* ---- JB_master_hard_queue_list */
2980
if ((pos=lGetPosViaElem(jep, JB_master_hard_queue_list, SGE_NO_ABORT))>=0) {
2981
DPRINTF(("got new JB_master_hard_queue_list\n"));
2983
if (!qref_list_is_valid(lGetList(jep, JB_master_hard_queue_list), alpp)) {
2984
DRETURN(STATUS_EUNKNOWN);
2987
lSetList(new_job, JB_master_hard_queue_list,
2988
lCopyList("", lGetList(jep, JB_master_hard_queue_list)));
2989
*trigger |= MOD_EVENT;
2990
sprintf(SGE_EVENT, MSG_SGETEXT_MOD_JOBS_SU, MSG_JOB_MASTERHARDQLIST, sge_u32c(jobid));
2991
answer_list_add(alpp, SGE_EVENT, STATUS_OK, ANSWER_QUALITY_INFO);
2994
/* ---- JB_restart */
2995
if ((pos=lGetPosViaElem(jep, JB_restart, SGE_NO_ABORT))>=0) {
2996
DPRINTF(("got new JB_restart\n"));
2997
lSetUlong(new_job, JB_restart, lGetUlong(jep, JB_restart));
2998
*trigger |= MOD_EVENT;
2999
sprintf(SGE_EVENT, MSG_SGETEXT_MOD_JOBS_SU, MSG_JOB_RESTARTBEHAVIOR, sge_u32c(jobid));
3000
answer_list_add(alpp, SGE_EVENT, STATUS_OK, ANSWER_QUALITY_INFO);
3003
/* ---- JB_shell_list */
3004
if ((pos=lGetPosViaElem(jep, JB_shell_list, SGE_NO_ABORT))>=0) {
3006
DPRINTF(("got new JB_shell_list\n"));
3008
if ((status = job_resolve_host_for_path_list(jep, alpp,JB_shell_list)) != STATUS_OK){
3012
lSetList(new_job, JB_shell_list,
3013
lCopyList("", lGetList(jep, JB_shell_list)));
3014
sprintf(SGE_EVENT, MSG_SGETEXT_MOD_JOBS_SU, MSG_JOB_SHELLLIST, sge_u32c(jobid));
3015
answer_list_add(alpp, SGE_EVENT, STATUS_OK, ANSWER_QUALITY_INFO);
3018
/* ---- JB_env_list */
3019
if ((pos=lGetPosViaElem(jep, JB_env_list, SGE_NO_ABORT))>=0) {
3020
lList *prefix_vars = NULL;
3021
lList *tmp_var_list = NULL;
3023
DPRINTF(("got new JB_env_list\n"));
3025
/* check for qsh without DISPLAY set */
3026
if (JOB_TYPE_IS_QSH(lGetUlong(new_job, JB_type))) {
3027
int ret = job_check_qsh_display(jep, alpp, false);
3028
if (ret != STATUS_OK) {
3033
/* save existing prefix env vars from being overwritten
3034
TODO: can we rule out that after that step a prefix
3035
env var appears two times in the env var list ? */
3036
tmp_var_list = lGetList(new_job, JB_env_list);
3037
var_list_split_prefix_vars(&tmp_var_list, &prefix_vars, VAR_PREFIX);
3038
lSetList(new_job, JB_env_list,
3039
lCopyList("", lGetList(jep, JB_env_list)));
3040
lAddList(lGetList(new_job, JB_env_list), &prefix_vars);
3041
sprintf(SGE_EVENT, MSG_SGETEXT_MOD_JOBS_SU, MSG_JOB_ENVLIST, sge_u32c(jobid));
3042
answer_list_add(alpp, SGE_EVENT, STATUS_OK, ANSWER_QUALITY_INFO);
3045
/* ---- JB_qs_args */
3046
if ((pos=lGetPosViaElem(jep, JB_qs_args, SGE_NO_ABORT))>=0) {
3047
DPRINTF(("got new JB_qs_args\n"));
3048
lSetList(new_job, JB_qs_args,
3049
lCopyList("", lGetList(jep, JB_qs_args)));
3050
sprintf(SGE_EVENT, MSG_SGETEXT_MOD_JOBS_SU, MSG_JOB_QSARGS, sge_u32c(jobid));
3051
answer_list_add(alpp, SGE_EVENT, STATUS_OK, ANSWER_QUALITY_INFO);
3055
/* ---- JB_job_args */
3056
if ((pos=lGetPosViaElem(jep, JB_job_args, SGE_NO_ABORT))>=0) {
3057
DPRINTF(("got new JB_job_args\n"));
3058
lSetList(new_job, JB_job_args,
3059
lCopyList("", lGetList(jep, JB_job_args)));
3060
sprintf(SGE_EVENT, MSG_SGETEXT_MOD_JOBS_SU, MSG_JOB_SCRIPTARGS, sge_u32c(jobid));
3061
answer_list_add(alpp, SGE_EVENT, STATUS_OK, ANSWER_QUALITY_INFO);
3064
/* ---- JB_verify_suitable_queues */
3065
if ((pos=lGetPosViaElem(jep, JB_verify_suitable_queues, SGE_NO_ABORT))>=0) {
3067
lSetUlong(new_job, JB_verify_suitable_queues,
3068
lGetUlong(jep, JB_verify_suitable_queues));
3069
ret = verify_suitable_queues(alpp, new_job, trigger);
3070
if (lGetUlong(new_job, JB_verify_suitable_queues)==JUST_VERIFY
3076
/* ---- JB_context */
3077
if ((pos=lGetPosViaElem(jep, JB_context, SGE_NO_ABORT))>=0) {
3078
DPRINTF(("got new JB_context\n"));
3079
set_context(lGetList(jep, JB_context), new_job);
3080
sprintf(SGE_EVENT, MSG_SGETEXT_MOD_JOBS_SU, MSG_JOB_CONTEXT, sge_u32c(jobid));
3081
answer_list_add(alpp, SGE_EVENT, STATUS_OK, ANSWER_QUALITY_INFO);
3084
/* deny certain modifications of running jobs */
3085
if (may_not_be_running && is_running) {
3086
ERROR((SGE_EVENT, MSG_SGETEXT_CANT_MOD_RUNNING_JOBS_U, sge_u32c(jobid)));
3087
answer_list_add(alpp, SGE_EVENT, STATUS_EEXIST, ANSWER_QUALITY_ERROR);
3088
DRETURN(STATUS_EEXIST);
3094
/****** sge_job_qmaster/contains_dependency_cycles() ***************************
3096
* contains_dependency_cycles() -- detects cycles in the job dependencies
3099
* static bool contains_dependency_cycles(const lListElem * new_job,
3100
* u_long32 job_number, lList **alpp)
3103
* This function follows the deep search allgorithm, to look for cycles
3104
* in the job dependency list. It stops, when the first cycle is found. It
3105
* only performes the cycle check for a given job and not for all jobs in
3109
* const lListElem * new_job - job, which dependency have to be evaludated
3110
* u_long32 job_number - job number, of the first job
3111
* lList **alpp - answer list
3114
* static bool - true, if there is a dependency cycle
3117
* Is not thread save. Reads from the global Job-List
3119
*******************************************************************************/
3120
static bool contains_dependency_cycles(const lListElem * new_job, u_long32 job_number, lList **alpp) {
3121
bool is_cycle = false;
3122
const lList *predecessor_list = lGetList(new_job, JB_jid_predecessor_list);
3123
const lList *predecessor_list_ad = lGetList(new_job, JB_ja_ad_predecessor_list);
3124
lListElem *pre_elem = NULL;
3127
DENTER(TOP_LAYER, "contains_dependency_cycles");
3129
for_each(pre_elem, predecessor_list) {
3130
pre_nr = lGetUlong(pre_elem, JRE_job_number);
3131
if (pre_nr == job_number) {
3132
u_long32 temp = lGetUlong(new_job, JB_job_number);
3133
ERROR((SGE_EVENT, MSG_JOB_DEPENDENCY_CYCLE_UU, sge_u32c(job_number), sge_u32c(temp)));
3134
answer_list_add(alpp, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR);
3139
is_cycle = contains_dependency_cycles(job_list_locate(*(object_type_get_master_list(SGE_TYPE_JOB)), pre_nr), job_number, alpp);
3145
for_each(pre_elem, predecessor_list_ad) {
3146
pre_nr = lGetUlong(pre_elem, JRE_job_number);
3147
if (pre_nr == job_number) {
3148
u_long32 temp = lGetUlong(new_job, JB_job_number);
3149
ERROR((SGE_EVENT, MSG_JOB_DEPENDENCY_CYCLE_UU, sge_u32c(job_number), sge_u32c(temp)));
3150
answer_list_add(alpp, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR);
3155
is_cycle = contains_dependency_cycles(job_list_locate(*(object_type_get_master_list(SGE_TYPE_JOB)), pre_nr), job_number, alpp);
3166
/****** qmaster/job/job_is_referenced_by_jobname() ****************************
3168
* job_is_referenced_by_jobname() -- is job referenced by another one
3171
* static u_long32 job_is_referenced_by_jobname(lListElem *jep)
3174
* Check whether a certain job is (still) referenced by a second
3175
* job in it's -hold_jid list.
3178
* lListElem *jep - the job
3181
* static u_long32 - job ID of the job referencing 'jep' or 0 if no such
3182
******************************************************************************/
3184
static u_long32 job_is_referenced_by_jobname(lListElem *jep)
3188
DENTER(TOP_LAYER, "job_is_referenced_by_jobname");
3190
succ_lp = lGetList(jep, JB_jid_successor_list);
3192
lListElem *succ_ep, *succ_jep;
3193
const char *job_name = lGetString(jep, JB_job_name);
3195
for_each (succ_ep, succ_lp) {
3197
succ_jid = lGetUlong(succ_ep, JRE_job_number);
3198
if ((succ_jep = job_list_locate(*(object_type_get_master_list(SGE_TYPE_JOB)), succ_jid)) &&
3199
lGetSubStr(succ_jep, JRE_job_name,
3200
job_name, JB_jid_predecessor_list)) {
3210
/****** qmaster/job/job_verify_predecessors() *********************************
3212
* job_verify_predecessors() -- verify -hold_jid list of a job
3215
* static int job_verify_predecessors(const lListElem *job,
3217
* lList *predecessors)
3220
* These checks are done:
3221
* #1 Ensure the job will not become it's own predecessor
3222
* #2 resolve job names and regulare expressions. The
3223
* job ids will be stored in JB_jid_predecessor_list
3226
* const lListElem *job - JB_Type element (JB_job_number may be 0 if
3227
* not yet know (at submit time)
3228
* lList **alpp - the answer list
3231
* int - returns != 0 if there is a problem with predecessors
3232
******************************************************************************/
3233
static int job_verify_predecessors(lListElem *job, lList **alpp)
3235
u_long32 jobid = lGetUlong(job, JB_job_number);
3236
const lList *predecessors_req = NULL;
3237
lList *predecessors_id = NULL;
3239
lListElem *pre_temp;
3241
DENTER(TOP_LAYER, "job_verify_predecessors");
3243
predecessors_req = lGetList(job, JB_jid_request_list);
3244
predecessors_id = lCreateList("job_predecessors", JRE_Type);
3245
if (!predecessors_id) {
3246
ERROR((SGE_EVENT, MSG_JOB_MOD_JOBDEPENDENCY_MEMORY ));
3247
answer_list_add(alpp, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR);
3248
DRETURN(STATUS_EUNKNOWN);
3251
for_each(pre, predecessors_req) {
3252
const char *pre_ident = lGetString(pre, JRE_job_name);
3254
if (isdigit(pre_ident[0])) {
3255
if (strchr(pre_ident, '.')) {
3256
lFreeList(&predecessors_id);
3257
DPRINTF(("a job cannot wait for a task to finish\n"));
3258
ERROR((SGE_EVENT, MSG_JOB_MOD_UNKOWNJOBTOWAITFOR_S, pre_ident));
3259
answer_list_add(alpp, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR);
3260
DRETURN(STATUS_EUNKNOWN);
3262
if (atoi(pre_ident) == jobid) {
3263
lFreeList(&predecessors_id);
3264
DPRINTF(("got my own jobid in JRE_job_name\n"));
3265
ERROR((SGE_EVENT, MSG_JOB_MOD_GOTOWNJOBIDINHOLDJIDOPTION_U, sge_u32c(jobid)));
3266
answer_list_add(alpp, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR);
3267
DRETURN(STATUS_EUNKNOWN);
3269
pre_temp = lCreateElem(JRE_Type);
3271
lSetUlong(pre_temp, JRE_job_number, atoi(pre_ident));
3272
lAppendElem(predecessors_id, pre_temp);
3276
lListElem *user_job = NULL; /* JB_Type */
3277
lListElem *next_user_job = NULL; /* JB_Type */
3278
const void *user_iterator = NULL;
3279
const char *owner = lGetString(job, JB_owner);
3281
next_user_job = lGetElemStrFirst(*(object_type_get_master_list(SGE_TYPE_JOB)), JB_owner, owner, &user_iterator);
3283
while ((user_job = next_user_job)) {
3284
const char *job_name = lGetString(user_job, JB_job_name);
3285
int result = string_base_cmp(TYPE_RESTR, pre_ident, job_name) ;
3288
if (lGetUlong(user_job, JB_job_number) != jobid) {
3289
pre_temp = lCreateElem(JRE_Type);
3291
lSetUlong(pre_temp, JRE_job_number, lGetUlong(user_job, JB_job_number));
3292
lAppendElem(predecessors_id, pre_temp);
3297
next_user_job = lGetElemStrNext(*(object_type_get_master_list(SGE_TYPE_JOB)), JB_owner,
3298
owner, &user_iterator);
3301
/* if no matching job has been found we have to assume
3302
the job finished already */
3305
if (lGetNumberOfElem(predecessors_id) == 0) {
3306
lFreeList(&predecessors_id);
3309
lSetList(job, JB_jid_predecessor_list, predecessors_id);
3314
/****** qmaster/job/job_verify_predecessors_ad() *********************************
3316
* job_verify_predecessors_ad() -- verify -hold_jid_ad list of a job
3319
* static int job_verify_predecessors_ad(lListElem *job, lList **alpp)
3322
* These checks are done:
3323
* #1 Ensure the job will not become it's own predecessor
3324
* #2 Resolve job names and regulare expressions. The
3325
* job ids will be stored in JB_ja_ad_predecessor_list
3326
* #3 Ensure the jobs in the predecessor list are equivalent array jobs
3327
* #4 Update JB_ja_a_h_ids and JB_ja_a_n_ids according to the
3331
* lListElem *job - JB_Type element (JB_job_number may be 0 if
3332
* not yet know (at submit time)
3333
* lList **alpp - the answer list
3336
* int - returns != 0 if there is a problem with predecessors
3337
******************************************************************************/
3338
static int job_verify_predecessors_ad(lListElem *job, lList **alpp)
3340
u_long32 jobid = lGetUlong(job, JB_job_number);
3341
const lList *predecessors_req = NULL;
3342
lList *predecessors_id = NULL;
3344
lListElem *pre_temp;
3346
DENTER(TOP_LAYER, "job_verify_predecessors_ad");
3348
predecessors_req = lGetList(job, JB_ja_ad_request_list);
3349
predecessors_id = lCreateList("job_predecessors_ad", JRE_Type);
3350
if (!predecessors_id) {
3351
ERROR((SGE_EVENT, MSG_JOB_MOD_JOBDEPENDENCY_MEMORY ));
3352
answer_list_add(alpp, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR);
3353
DRETURN(STATUS_EUNKNOWN);
3356
/* only verify -hold_jid_ad option if predecessors are requested */
3357
if (lGetNumberOfElem(predecessors_req) > 0) {
3358
/* verify -t option was used to create this job */
3359
if (!job_is_array(job)) {
3360
lFreeList(&predecessors_id);
3361
DPRINTF(("could not create array dependence for non-array job\n"));
3362
ERROR((SGE_EVENT, MSG_JOB_MOD_CANONLYSPECIFYHOLDJIDADWITHADOPT));
3363
answer_list_add(alpp, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR);
3364
DRETURN(STATUS_EUNKNOWN);
3368
for_each(pre, predecessors_req) {
3369
const char *pre_ident = lGetString(pre, JRE_job_name);
3370
if (isdigit(pre_ident[0])) {
3371
if (strchr(pre_ident, '.')) {
3372
lFreeList(&predecessors_id);
3373
DPRINTF(("a job cannot wait for a task to finish\n"));
3374
ERROR((SGE_EVENT, MSG_JOB_MOD_UNKOWNJOBTOWAITFOR_S, pre_ident));
3375
answer_list_add(alpp, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR);
3376
DRETURN(STATUS_EUNKNOWN);
3378
if (atoi(pre_ident) == jobid) {
3379
lFreeList(&predecessors_id);
3380
DPRINTF(("got my own jobid in JRE_job_name\n"));
3381
ERROR((SGE_EVENT, MSG_JOB_MOD_GOTOWNJOBIDINHOLDJIDOPTION_U, sge_u32c(jobid)));
3382
answer_list_add(alpp, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR);
3383
DRETURN(STATUS_EUNKNOWN);
3385
pre_temp = lCreateElem(JRE_Type);
3387
lSetUlong(pre_temp, JRE_job_number, atoi(pre_ident));
3388
lAppendElem(predecessors_id, pre_temp);
3391
lListElem *user_job = NULL; /* JB_Type */
3392
lListElem *next_user_job = NULL; /* JB_Type */
3393
const void *user_iterator = NULL;
3394
const char *owner = lGetString(job, JB_owner);
3396
next_user_job = lGetElemStrFirst(*(object_type_get_master_list(SGE_TYPE_JOB)), JB_owner, owner, &user_iterator);
3398
while ((user_job = next_user_job)) {
3399
const char *job_name = lGetString(user_job, JB_job_name);
3400
int result = string_base_cmp(TYPE_RESTR, pre_ident, job_name) ;
3402
if (lGetUlong(user_job, JB_job_number) != jobid) {
3403
pre_temp = lCreateElem(JRE_Type);
3405
lSetUlong(pre_temp, JRE_job_number, lGetUlong(user_job, JB_job_number));
3406
lAppendElem(predecessors_id, pre_temp);
3411
next_user_job = lGetElemStrNext(*(object_type_get_master_list(SGE_TYPE_JOB)), JB_owner,
3412
owner, &user_iterator);
3415
/* if no matching job has been found we have to assume
3416
the job finished already */
3420
/* to prevent iterating over task ids when no predecessors are matched */
3421
if (lGetNumberOfElem(predecessors_id) == 0) {
3422
lFreeList(&predecessors_id);
3423
lSetList(job, JB_ja_ad_predecessor_list, predecessors_id);
3424
/* flush task dependency state for empty predecessors list */
3425
sge_task_depend_flush(job, alpp);
3429
/* verify the predecessor list before we try to calculate dependency info */
3430
for_each(pre, predecessors_id) {
3431
/* locate the job id in the master list, if not found we can't do much here */
3432
lListElem *pred_job = job_list_locate(*(object_type_get_master_list(SGE_TYPE_JOB)),
3433
lGetUlong(pre, JRE_job_number));
3434
if (!pred_job) continue;
3435
/* verify this job is an array job */
3436
if (!job_is_array(pred_job)) {
3437
lFreeList(&predecessors_id);
3438
DPRINTF(("could not create array dependence on non-array job\n"));
3439
ERROR((SGE_EVENT, MSG_JOB_MOD_CANONLYSPECIFYHOLDJIDADWITHADOPT));
3440
answer_list_add(alpp, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR);
3441
DRETURN(STATUS_EUNKNOWN);
3443
/* verify this job has the same range of dependent sub-tasks */
3444
if (!sge_task_depend_is_same_range(pred_job, job)) {
3445
lFreeList(&predecessors_id);
3446
DPRINTF(("could not create array dependence for jobs with different sub-task range\n"));
3447
ERROR((SGE_EVENT, MSG_JOB_MOD_ARRAYJOBMUSTHAVESAMERANGEWITHADOPT));
3448
answer_list_add(alpp, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR);
3449
DRETURN(STATUS_EUNKNOWN);
3453
/* this obviously needs to be done before we call the update function */
3454
lSetList(job, JB_ja_ad_predecessor_list, predecessors_id);
3456
/* recalculate dependence information for each task of this job */
3457
sge_task_depend_init(job, alpp);
3461
/* The context comes as a VA_Type list with certain groups of
3462
** elements: A group starts with either:
3463
** (+, ): All following elements are appended to the job's
3464
** current context values, or replaces the current value
3465
** (-, ): The following context values are removed from the
3466
** job's current list of values
3467
** (=, ): The following elements replace the job's current
3469
** Any combination of groups is possible.
3470
** To ensure portablity with common sge_gdi, (=, ) is the default
3471
** when no group tag is given at the beginning of the incoming list
3473
static void set_context(
3474
lList *jbctx, /* VA_Type */
3475
lListElem *job /* JB_Type */
3477
lList* newjbctx = NULL;
3482
newjbctx = lGetList(job, JB_context);
3484
/* if the incoming list is empty, then simply clear the context */
3485
if (!jbctx || !lGetNumberOfElem(jbctx)) {
3486
lSetList(job, JB_context, NULL);
3490
/* if first element contains no tag => assume (=, ) */
3491
switch(*lGetString(lFirst(jbctx), VA_variable)) {
3497
lSetList(job, JB_context, NULL);
3503
for_each(jbctxep, jbctx) {
3504
switch(*(lGetString(jbctxep, VA_variable))) {
3512
lSetList(job, JB_context, NULL);
3520
lSetList(job, JB_context, newjbctx = lCreateList("context_list", VA_Type));
3521
if ((temp = lGetElemStr(newjbctx, VA_variable, lGetString(jbctxep, VA_variable))))
3522
lSetString(temp, VA_value, lGetString(jbctxep, VA_value));
3524
lAppendElem(newjbctx, lCopyElem(jbctxep));
3528
lDelSubStr(job, VA_variable, lGetString(jbctxep, VA_variable), JB_context);
3529
/* WARNING: newjbctx is not valid when complete list was deleted */
3537
/************************************************************************/
3538
static u_long32 sge_get_job_number(sge_gdi_ctx_class_t *ctx, monitoring_t *monitor)
3541
bool is_store_job = false;
3543
DENTER(TOP_LAYER, "sge_get_job_number");
3545
sge_mutex_lock("job_number_mutex", "sge_get_job_number", __LINE__,
3546
&job_number_control.job_number_mutex);
3548
job_number_control.job_number++;
3549
job_number_control.changed = true;
3550
if (job_number_control.job_number > MAX_SEQNUM) {
3551
DPRINTF(("highest job number MAX_SEQNUM %d exceeded, starting over with 1\n", MAX_SEQNUM));
3552
job_number_control.job_number = 1;
3553
is_store_job = true;
3555
job_nr = job_number_control.job_number;
3557
sge_mutex_unlock("job_number_mutex", "sge_get_job_number", __LINE__,
3558
&job_number_control.job_number_mutex);
3561
sge_store_job_number(ctx, NULL, monitor);
3567
void sge_init_job_number(void)
3570
u_long32 job_nr = 0;
3571
u_long32 guess_job_nr;
3573
DENTER(TOP_LAYER, "sge_init_job_number");
3575
if ((fp = fopen(SEQ_NUM_FILE, "r"))) {
3576
if (fscanf(fp, sge_u32, &job_nr) != 1) {
3577
ERROR((SGE_EVENT, MSG_NOSEQNRREAD_SSS, SGE_OBJ_JOB, SEQ_NUM_FILE, strerror(errno)));
3583
WARNING((SGE_EVENT, MSG_NOSEQFILEOPEN_SSS, SGE_OBJ_JOB, SEQ_NUM_FILE, strerror(errno)));
3586
guess_job_nr = guess_highest_job_number();
3587
job_nr = MAX(job_nr, guess_job_nr);
3589
sge_mutex_lock("job_number_mutex", "sge_init_job_number", __LINE__,
3590
&job_number_control.job_number_mutex);
3591
job_number_control.job_number = job_nr;
3592
job_number_control.changed = true;
3593
sge_mutex_unlock("job_number_mutex", "sge_init_job_number", __LINE__,
3594
&job_number_control.job_number_mutex);
3599
void sge_store_job_number(sge_gdi_ctx_class_t *ctx, te_event_t anEvent, monitoring_t *monitor) {
3600
u_long32 job_nr = 0;
3601
bool changed = false;
3603
DENTER(TOP_LAYER, "sge_store_job_number");
3605
sge_mutex_lock("job_number_mutex", "sge_store_job_number", __LINE__,
3606
&job_number_control.job_number_mutex);
3607
if (job_number_control.changed) {
3608
job_nr = job_number_control.job_number;
3609
job_number_control.changed = false;
3612
sge_mutex_unlock("job_number_mutex", "sge_store_job_number", __LINE__,
3613
&job_number_control.job_number_mutex);
3615
/* here we got a race condition that can (very unlikely)
3616
cause concurrent writing of the sequence number file */
3618
FILE *fp = fopen(SEQ_NUM_FILE, "w");
3621
ERROR((SGE_EVENT, MSG_NOSEQFILECREATE_SSS, SGE_OBJ_JOB, SEQ_NUM_FILE, strerror(errno)));
3623
FPRINTF((fp, sge_u32"\n", job_nr));
3631
ERROR((SGE_EVENT, MSG_NOSEQFILECLOSE_SSS, SGE_OBJ_JOB, SEQ_NUM_FILE, strerror(errno)));
3635
static u_long32 guess_highest_job_number()
3640
lList *master_job_list = *(object_type_get_master_list(SGE_TYPE_JOB));
3642
DENTER(TOP_LAYER, "guess_highest_job_number");
3644
/* this function is called during qmaster startup and not while it is running,
3645
we do not need to monitor this lock */
3647
SGE_LOCK(LOCK_GLOBAL, LOCK_READ);
3649
jep = lFirst(master_job_list);
3651
pos = lGetPosViaElem(jep, JB_job_number, SGE_NO_ABORT);
3653
for_each(jep, master_job_list) {
3654
maxid = MAX(maxid, lGetPosUlong(jep, pos));
3658
SGE_UNLOCK(LOCK_GLOBAL, LOCK_READ);
3663
/* all modifications are done now verify schedulability */
3664
static int verify_suitable_queues(lList **alpp, lListElem *jep, int *trigger)
3666
int verify_mode = lGetUlong(jep, JB_verify_suitable_queues);
3668
DENTER(TOP_LAYER, "verify_suitable_queues");
3670
switch (verify_mode) {
3672
DPRINTF(("skip expensive verification of schedulability\n"));
3675
case WARNING_VERIFY:
3683
const char *ckpt_name;
3684
lList *job_hard_queue_list = lGetList(jep, JB_hard_queue_list);
3685
const char *pe_name = lGetString(jep, JB_pe);
3686
object_description *object_base = object_type_get_object_description();
3688
sge_assignment_t a = SGE_ASSIGNMENT_INIT;
3690
assignment_init(&a, jep, NULL, false);
3692
DPRINTF(("verify schedulability = %c\n", OPTION_VERIFY_STR[verify_mode]));
3695
if ((ckpt_name=lGetString(jep, JB_checkpoint_name)))
3696
if (!(a.ckpt = ckpt_list_locate(*object_base[SGE_TYPE_CKPT].list, ckpt_name)))
3701
u_long32 ar_id = lGetUlong(jep, JB_ar);
3702
lList *ar_granted_slots = NULL;
3705
lListElem *ar = NULL;
3706
ar = ar_list_locate(*object_base[SGE_TYPE_AR].list, ar_id);
3708
ar_granted_slots = lGetList(ar, AR_granted_slots);
3712
a.host_list = *object_base[SGE_TYPE_EXECHOST].list;
3713
a.centry_list = *object_base[SGE_TYPE_CENTRY].list;
3714
a.acl_list = *object_base[SGE_TYPE_USERSET].list;
3715
a.hgrp_list = *object_base[SGE_TYPE_HGROUP].list;
3716
if (lGetUlong(jep, JB_ar) == 0) {
3717
a.rqs_list = *object_base[SGE_TYPE_RQS].list;
3719
a.ar_list = *object_base[SGE_TYPE_AR].list;
3721
a.gep = host_list_locate(*object_base[SGE_TYPE_EXECHOST].list, SGE_GLOBAL_NAME);
3722
a.start = DISPATCH_TIME_NOW;
3723
a.duration = 0; /* indicator for schedule based mode */
3724
a.is_job_verify = true;
3727
* Current scheduler code expects all queue instances in a plain list. We use
3728
* a copy of all queue instances that needs to be free'd explicitely after
3729
* deciding about assignment. This is because assignment_release() sees
3730
* queue_list only as a list pointer.
3732
a.queue_list = lCreateList("", QU_Type);
3734
for_each(cqueue, *object_base[SGE_TYPE_CQUEUE].list) {
3735
const char *cqname = lGetString(cqueue, CQ_name);
3736
lList *qinstance_list = lGetList(cqueue, CQ_qinstances);
3737
lListElem *qinstance;
3739
if (cqueue_match_static(cqname, &a) != DISPATCH_OK) {
3743
for_each(qinstance, qinstance_list) {
3745
/* we only have to consider requested queues */
3746
if (job_hard_queue_list != NULL) {
3747
if (qref_list_cq_rejected(job_hard_queue_list, cqname,
3748
lGetHost(qinstance, QU_qhostname), a.hgrp_list)) {
3753
if (ar_granted_slots != NULL) {
3754
if (lGetElemStr(ar_granted_slots, JG_qname, lGetString(qinstance, QU_full_name)) == NULL) {
3760
/* we only have to consider queues containing the requested pe */
3761
if (pe_name != NULL) {
3765
for_each(pe_ref, lGetList(qinstance, QU_pe_list)) {
3766
if (pe_name_is_matching(lGetString(pe_ref, ST_name), pe_name)) {
3777
lAppendElem(a.queue_list, lCopyElem(qinstance));
3782
/* imagine qs is empty */
3783
sconf_set_qs_state(QS_STATE_EMPTY);
3785
/* redirect scheduler monitoring into answer list */
3786
if (verify_mode == JUST_VERIFY) {
3787
set_monitor_alpp(&talp);
3790
if (lGetString(jep, JB_pe)) {
3791
sge_select_parallel_environment(&a, *object_base[SGE_TYPE_PE].list);
3793
sge_sequential_assignment(&a);
3795
ngranted += nslots_granted(a.gdil, NULL);
3797
/* stop redirection of scheduler monitoring messages */
3798
if (verify_mode==JUST_VERIFY) {
3799
set_monitor_alpp(NULL);
3803
sconf_set_qs_state(QS_STATE_FULL);
3805
lFreeList(&(a.queue_list));
3808
assignment_release(&a);
3811
if (!ngranted || !try_it) {
3812
/* copy error msgs from talp into alpp */
3813
if (verify_mode==JUST_VERIFY) {
3815
*alpp = lCreateList("answer", AN_Type);
3816
lAddList(*alpp, &talp);
3821
SGE_ADD_MSG_ID(sprintf(SGE_EVENT, MSG_JOB_NOSUITABLEQ_S,
3822
(verify_mode==JUST_VERIFY ? MSG_JOB_VERIFYVERIFY:
3823
(verify_mode==ERROR_VERIFY)?MSG_JOB_VERIFYERROR:MSG_JOB_VERIFYWARN)));
3824
answer_list_add(alpp, SGE_EVENT, STATUS_ESEMANTIC, (verify_mode==JUST_VERIFY ? ANSWER_QUALITY_INFO:
3825
(verify_mode==ERROR_VERIFY)?ANSWER_QUALITY_ERROR:ANSWER_QUALITY_WARNING));
3827
if (verify_mode != WARNING_VERIFY) {
3828
DRETURN((verify_mode==JUST_VERIFY)?0:STATUS_ESEMANTIC);
3832
if (verify_mode==JUST_VERIFY) {
3834
*trigger |= VERIFY_EVENT;
3837
sprintf(SGE_EVENT, MSG_JOB_VERIFYFOUNDQ);
3839
sprintf(SGE_EVENT, MSG_JOB_VERIFYFOUNDSLOTS_I, ngranted);
3841
answer_list_add(alpp, SGE_EVENT, STATUS_OK, ANSWER_QUALITY_INFO);
3851
int sge_gdi_copy_job(sge_gdi_ctx_class_t *ctx,
3852
lListElem *jep, lList **alpp, lList **lpp, char *ruser, char *rhost,
3853
uid_t uid, gid_t gid, char *group, sge_gdi_packet_class_t *packet, sge_gdi_task_class_t *task,
3854
monitoring_t *monitor)
3858
lListElem *old_jep, *new_jep;
3859
int dummy_trigger = 0;
3860
bool job_spooling = ctx->get_job_spooling(ctx);
3862
DENTER(TOP_LAYER, "sge_gdi_copy_job");
3864
if ( !jep || !ruser || !rhost ) {
3865
CRITICAL((SGE_EVENT, MSG_SGETEXT_NULLPTRPASSED_S, SGE_FUNC));
3866
answer_list_add(alpp, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR);
3867
DRETURN(STATUS_EUNKNOWN);
3871
seek_jid = lGetUlong(jep, JB_job_number);
3872
DPRINTF(("SEEK jobid "sge_u32" for COPY operation\n", seek_jid));
3874
if (!(old_jep = job_list_locate(*(object_type_get_master_list(SGE_TYPE_JOB)), seek_jid))) {
3875
ERROR((SGE_EVENT, MSG_SGETEXT_DOESNOTEXIST_SU, SGE_OBJ_JOB, sge_u32c(seek_jid)));
3876
answer_list_add(alpp, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR);
3877
DRETURN(STATUS_EUNKNOWN);
3880
/* ensure copy is allowed */
3881
if (strcmp(ruser, lGetString(old_jep, JB_owner)) && !manop_is_manager(ruser)) {
3882
ERROR((SGE_EVENT, MSG_JOB_NORESUBPERMS_SSS, ruser, rhost, lGetString(old_jep, JB_owner)));
3883
answer_list_add(alpp, SGE_EVENT, STATUS_EUNKNOWN, ANSWER_QUALITY_ERROR);
3884
DRETURN(STATUS_EUNKNOWN);
3887
new_jep = lCopyElem(old_jep);
3889
/* read script from old job and reuse it */
3890
if (lGetString(new_jep, JB_exec_file) && job_spooling) {
3891
spool_read_script(alpp, seek_jid, new_jep);
3894
job_initialize_id_lists(new_jep, NULL);
3896
/* override settings of old job with new settings of jep */
3897
if (mod_job_attributes(new_jep, jep, alpp, ruser, rhost, &dummy_trigger)) {
3898
DRETURN(STATUS_EUNKNOWN);
3901
/* call add() method */
3902
ret = sge_gdi_add_job(ctx, new_jep, alpp, lpp, ruser, rhost, uid, gid, group, packet, task, monitor);
3904
lFreeElem(&new_jep);
3909
/****** sge_job_qmaster/sge_job_spool() ******************************
3911
* sge_job_spool() -- stores the Master_Job_List into the database
3915
* sge_job_spool(void)
3918
* This function stores the current Master_Job_List into the database.
3919
* This includes also the job scripts, which are stored in the common
3922
* MT-NOTE: sge_job_spool() is MT safe, it uses the global lock (read)
3924
*******************************************************************************/
3925
void sge_job_spool(sge_gdi_ctx_class_t *ctx) {
3926
lListElem *jep = NULL;
3927
lList *answer_list = NULL;
3928
bool job_spooling = ctx->get_job_spooling(ctx);
3930
DENTER(TOP_LAYER, "sge_job_spool");
3932
if (!job_spooling) {
3934
/* the job spooling is disabled, we have to force the spooling */
3935
ctx->set_job_spooling(ctx, true);
3937
INFO((SGE_EVENT, "job spooling is disabled - spooling the jobs"));
3939
/* this function is used on qmaster shutdown, no need to monitor this lock */
3940
SGE_LOCK(LOCK_GLOBAL, LOCK_READ);
3942
/* store each job */
3943
for_each(jep, *(object_type_get_master_list(SGE_TYPE_JOB))) {
3944
u_long32 job_number = lGetUlong(jep, JB_job_number);
3945
lListElem *ja_task = NULL;
3946
lListElem *pe_task = NULL;
3947
bool is_success = true;
3950
dbret = spool_transaction(NULL, spool_get_default_context(),
3954
answer_list_add_sprintf(&answer_list, STATUS_EUNKNOWN,
3955
ANSWER_QUALITY_ERROR,
3956
MSG_PERSISTENCE_OPENTRANSACTION_FAILED);
3958
/* store job script*/
3959
if (lGetString(jep, JB_exec_file) != NULL && job_spooling) {
3960
if (spool_write_script(&answer_list, job_number, jep) == false) {
3961
ERROR((SGE_EVENT, MSG_JOB_NOWRITE_US, sge_u32c(job_number), strerror(errno)));
3964
/* clean file out of memory */
3965
lSetString(jep, JB_script_ptr, NULL);
3966
lSetUlong(jep, JB_script_size, 0);
3970
/* store each ja task */
3972
for_each(ja_task, lGetList(jep, JB_ja_tasks)) {
3973
int jataskid = lGetUlong(ja_task, JAT_task_number);
3974
dstring buffer = DSTRING_INIT;
3976
if (spool_write_object(&answer_list, spool_get_default_context(), ja_task,
3977
job_get_key(job_number, jataskid, NULL, &buffer),
3978
SGE_TYPE_JATASK, job_spooling)) {
3983
sge_dstring_free(&buffer);
3985
for_each(pe_task, lGetList(ja_task, JAT_task_list)) {
3986
const char *pe_task_id_str = lGetString(pe_task, PET_id);
3988
if (!sge_event_spool(ctx, &answer_list, 0, sgeE_PETASK_ADD,
3989
job_number, jataskid, pe_task_id_str, NULL,
3990
NULL, jep, ja_task, pe_task, false, true)) {
4000
if (is_success && !sge_event_spool(ctx, &answer_list, 0, sgeE_JOB_ADD,
4001
job_number, 0, NULL, NULL, NULL,
4002
jep, NULL, NULL, false, true)) {
4010
/* commit or rollback database transaction */
4011
spool_transaction(&answer_list, spool_get_default_context(),
4012
is_success ? STC_commit : STC_rollback);
4018
SGE_UNLOCK(LOCK_GLOBAL, LOCK_READ);
4020
/* reset spooling */
4021
ctx->set_job_spooling(ctx, false);
4022
answer_list_output(&answer_list);
4028
/****** sge_job_qmaster/spool_write_script() ***********************************
4030
* spool_write_script() -- Write job script
4033
* bool spool_write_script(lList **answer_list,u_long32 jobid, lListElem *jep)
4036
* The function stores the script of a '-b n' job into a file.
4039
* lList **answer_list
4040
* u_long32 jobid - job id (needed for Dtrace only)
4041
* lListElem *jep - the job
4044
* static bool - true on success
4047
* MT-NOTE: spool_write_script() is MT safe
4050
* spool_delete_script()
4051
* spool_read_script()
4052
*******************************************************************************/
4053
bool spool_write_script(lList **answer_list, u_long32 jobid, lListElem *jep)
4056
dstring buffer = DSTRING_INIT;
4058
DENTER(TOP_LAYER, "spool_write_script");
4059
PROF_START_MEASUREMENT(SGE_PROF_JOBSCRIPT);
4060
/* The whole job object is needed for spooling classic */
4061
ret = spool_write_object(answer_list, spool_get_default_context(),
4062
jep, jobscript_get_key(jep, &buffer),
4063
SGE_TYPE_JOBSCRIPT, true);
4064
PROF_STOP_MEASUREMENT(SGE_PROF_JOBSCRIPT);
4065
sge_dstring_free(&buffer);
4070
/****** sge_job_qmaster/spool_read_script() **************************************
4072
* spool_read_script() -- Read job script
4075
* bool spool_read_script(lList **answer_list, u_long32 jobid, lListElem *jep)
4078
* The function reads the script of a '-b n' job from file.
4081
* lList **answer_list
4082
* u_long32 jobid - job id (needed for Dtrace only)
4083
* lListElem *jep - the job
4086
* bool - true on success
4089
* MT-NOTE: spool_read_script() is MT safe
4092
* spool_write_script()
4093
* spool_delete_script()
4094
*******************************************************************************/
4095
bool spool_read_script(lList **answer_list, u_long32 jobid, lListElem *jep)
4098
dstring buffer = DSTRING_INIT;
4099
lListElem *script_el = NULL;
4100
DENTER(TOP_LAYER, "spool_read_script");
4101
PROF_START_MEASUREMENT(SGE_PROF_JOBSCRIPT);
4102
script_el = spool_read_object(answer_list, spool_get_default_context(),
4103
SGE_TYPE_JOBSCRIPT, jobscript_get_key(jep, &buffer));
4104
PROF_STOP_MEASUREMENT(SGE_PROF_JOBSCRIPT);
4105
/* The spooled out structure must be restored */
4106
if (script_el != NULL) {
4107
char *script = (char *) lGetString(script_el, STU_name);
4109
int len = strlen(script);
4110
lXchgString(jep, JB_script_ptr, &script);
4111
lXchgString(script_el, STU_name, &dummy);
4112
lFreeElem(&script_el);
4113
lSetUlong(jep, JB_script_size, len);
4117
sge_dstring_free(&buffer);
4121
/****** sge_job_qmaster/spool_delete_script() ************************************
4123
* spool_delete_script() -- Delete job script
4126
* bool spool_delete_script(lList **answer_list, u_long32 jobid, lListElem *jep)
4129
* The function removes the file where the script of a '-b n' job is stored.
4132
* lList **answer_list
4133
* u_long32 jobid - job id (needed for Dtrace only)
4134
* lListElem *jep - the job
4137
* bool - true on success
4140
* MT-NOTE: spool_delete_script() is MT safe
4143
* spool_write_script()
4144
* spool_read_script()
4145
*******************************************************************************/
4146
bool spool_delete_script(lList **answer_list, u_long32 jobid, lListElem *jep)
4149
dstring buffer = DSTRING_INIT;
4150
DENTER(TOP_LAYER, "spool_delete_script");
4151
PROF_START_MEASUREMENT(SGE_PROF_JOBSCRIPT);
4152
ret = spool_delete_object(answer_list, spool_get_default_context(),
4153
SGE_TYPE_JOBSCRIPT, jobscript_get_key(jep, &buffer), true);
4154
PROF_STOP_MEASUREMENT(SGE_PROF_JOBSCRIPT);
4155
sge_dstring_free(&buffer);
4156
lSetString(jep, JB_exec_file, NULL);
4157
lSetString(jep, JB_script_file, NULL);
4158
lSetUlong(jep, JB_script_size, 0);
4162
static int sge_delete_all_tasks_of_job(sge_gdi_ctx_class_t *ctx, lList **alpp, const char *ruser, const char *rhost, lListElem *job, u_long32 *r_start, u_long32 *r_end, u_long32 *step, lList* ja_structure, int *alltasks, u_long32 *deleted_tasks, u_long32 start_time, monitoring_t *monitor, int forced, bool *deletion_time_reached)
4166
char *dupped_session = NULL;
4167
int deleted_unenrolled_tasks;
4168
u_long32 task_number = 0;
4169
u_long32 existing_tasks;
4170
lList *range_list = NULL; /* RN_Type */
4171
u_long32 job_number = lGetUlong(job, JB_job_number);
4173
DENTER(TOP_LAYER, "sge_delete_all_tasks_of_job");
4175
/* In certain cases sge_commit_job() free's the job structure passed.
4176
* The session information is needed after sge_commit_job() so we make
4177
* a copy of the job session before calling sge_commit_job(). This copy
4180
if (lGetString(job, JB_session)) {
4181
dupped_session = strdup(lGetString(job, JB_session));
4185
* Repeat until all requested taskid ranges are handled
4187
rn = lFirst(ja_structure);
4189
u_long32 max_job_deletion_time = mconf_get_max_job_deletion_time();
4190
int showmessage = 0;
4191
u_long32 enrolled_start = 0;
4192
u_long32 enrolled_end = 0;
4193
u_long32 unenrolled_start = 0;
4194
u_long32 unenrolled_end = 0;
4197
* delete tasks or the whole job?
4198
* if ja_structure not empty delete specified tasks
4199
* otherwise delete whole job
4201
unenrolled_start = job_get_smallest_unenrolled_task_id(job);
4202
unenrolled_end = job_get_biggest_unenrolled_task_id(job);
4203
enrolled_start = job_get_smallest_enrolled_task_id(job);
4204
enrolled_end = job_get_biggest_enrolled_task_id(job);
4207
*r_start = lGetUlong(rn, RN_min);
4208
*r_end = lGetUlong(rn, RN_max);
4210
*step = lGetUlong(rn, RN_step);
4215
if (*r_start > unenrolled_start) {
4216
unenrolled_start = (*r_start);
4218
u_long32 temp_start;
4220
/* we have to figure out the first task we can delete and we do want */
4221
/* to start with the first existing task. For that, we compute: */
4223
/* - the delta between the requested task id and the first existing one */
4224
/* - we devide the delta by the step size, to get the number of steps we */
4225
/* need ot get there. */
4226
/* - the number of steps multiplied by the step size + the start value */
4227
/* will get us the first task, or a very close. If we just right befor */
4228
/* it, we add another step to get there. */
4229
temp_start = ((unenrolled_start - (*r_start)) / (*step)) * (*step) + (*r_start);
4231
if (temp_start < unenrolled_start) {
4232
unenrolled_start = temp_start + (*step);
4235
unenrolled_start = temp_start;
4239
unenrolled_end = MIN(*r_end, unenrolled_end);
4242
if ((*r_start) > enrolled_start) {
4243
enrolled_start = *r_start;
4245
u_long32 temp_start;
4247
temp_start = ((enrolled_start - *r_start) / (*step)) * (*step) + (*r_start);
4249
if (temp_start < enrolled_start) {
4250
enrolled_start = temp_start + (*step);
4253
enrolled_start = temp_start;
4257
enrolled_end = MIN(*r_end, enrolled_end);
4265
DPRINTF(("Request: alltasks = %d, start = %d, end = %d, step = %d\n",
4266
*alltasks, *r_start, *r_end, *step));
4267
DPRINTF(("unenrolled ----> start = %d, end = %d, step = %d\n",
4268
unenrolled_start, unenrolled_end, *step));
4269
DPRINTF(("enrolled ----> start = %d, end = %d, step = %d\n",
4270
enrolled_start, enrolled_end, *step));
4275
* Delete all unenrolled pending tasks
4277
deleted_unenrolled_tasks = 0;
4279
existing_tasks = job_get_ja_tasks(job);
4281
if (*alltasks == 1 && existing_tasks == 0) {
4283
* This job has no pending and no running tasks.
4285
lListElem *tmp_task = job_get_ja_task_template_pending(job, task_number);
4287
sge_commit_job(ctx, job, tmp_task, NULL, COMMIT_ST_FINISHED_FAILED,
4288
COMMIT_UNENROLLED_TASK | COMMIT_NEVER_RAN, monitor);
4290
INFO((SGE_EVENT, MSG_JOB_DELETEX_SSU, ruser, SGE_OBJ_JOB, sge_u32c(job_number)));
4291
answer_list_add(alpp, SGE_EVENT, STATUS_OK, ANSWER_QUALITY_INFO);
4296
for (task_number = unenrolled_start;
4297
task_number <= unenrolled_end;
4298
task_number += *step) {
4301
is_defined = job_is_ja_task_defined(job, task_number);
4306
is_enrolled = job_is_enrolled(job, task_number);
4308
lListElem *tmp_task = job_get_ja_task_template_pending(job, task_number);
4312
reporting_create_job_log(NULL, sge_get_gmt(), JL_DELETED,
4313
ruser, rhost, NULL, job, tmp_task,
4314
NULL, MSG_LOG_DELETED);
4315
sge_commit_job(ctx, job, tmp_task, NULL, COMMIT_ST_FINISHED_FAILED,
4316
COMMIT_NO_SPOOLING | COMMIT_UNENROLLED_TASK | COMMIT_NEVER_RAN, monitor);
4317
deleted_unenrolled_tasks = 1;
4319
if (!*alltasks && showmessage) {
4320
range_list_insert_id(&range_list, NULL, task_number);
4326
if (deleted_unenrolled_tasks) {
4328
if (existing_tasks > *deleted_tasks) {
4329
dstring buffer = DSTRING_INIT;
4330
/* write only the common part - pass only the jobid, no jatask or petask id */
4331
lList *answer_list = NULL;
4332
spool_write_object(&answer_list, spool_get_default_context(),
4333
job, job_get_job_key(job_number, &buffer),
4335
ctx->get_job_spooling(ctx));
4336
answer_list_output(&answer_list);
4337
lListElem_clear_changed_info(job);
4338
sge_dstring_free(&buffer);
4340
/* JG: TODO: this joblog seems to have an invalid job object! */
4341
/* reporting_create_job_log(NULL, sge_get_gmt(), JL_DELETED, ruser, rhost, NULL, job, NULL, NULL, MSG_LOG_DELETED); */
4343
#if 0 /* EB: TODO: this should not be necessary because events have been sent in sge_commit_job() above */
4344
sge_add_event(start_time, sgeE_JOB_DEL, job_number, 0,
4345
NULL, NULL, dupped_session, NULL);
4351
* Delete enrolled ja tasks
4353
if (existing_tasks > *deleted_tasks) {
4354
for (task_number = enrolled_start;
4355
task_number <= enrolled_end;
4356
task_number += *step) {
4358
int is_defined = job_is_ja_task_defined(job, task_number);
4361
lListElem *tmp_task = lGetElemUlong(lGetList(job, JB_ja_tasks),
4362
JAT_task_number, task_number);
4364
if (tmp_task == NULL) {
4365
/* ja task does not exist anymore - ignore silently */
4372
* if task is already in status deleted and was signaled
4373
* only recently and deletion is not forced, do nothing
4375
if ((lGetUlong(tmp_task, JAT_status) & JFINISHED) ||
4376
(lGetUlong(tmp_task, JAT_state) & JDELETED &&
4377
lGetUlong(tmp_task, JAT_pending_signal_delivery_time) > sge_get_gmt() &&
4379
INFO((SGE_EVENT, MSG_JOB_ALREADYDELETED_U, sge_u32c(job_number)));
4380
answer_list_add(alpp, SGE_EVENT, STATUS_OK, ANSWER_QUALITY_INFO);
4384
/*If job has large array of tasks, and time to delete the array
4385
* of jobs is greater than MAX_JOB_DELETION_TIME, break out of
4386
* qdel and delete remaining jobs later
4388
if ((njobs > 0 || (*deleted_tasks) > 0) && ((sge_get_gmt() - start_time) > max_job_deletion_time)) {
4389
INFO((SGE_EVENT, MSG_JOB_DISCONTTASKTRANS_SUU, ruser,
4390
sge_u32c(job_number), sge_u32c(task_number)));
4391
answer_list_add(alpp, SGE_EVENT, STATUS_OK_DOAGAIN, ANSWER_QUALITY_INFO);
4392
*deletion_time_reached = true;
4393
FREE(dupped_session);
4394
lFreeList(&range_list);
4398
reporting_create_job_log(NULL, sge_get_gmt(), JL_DELETED, ruser, rhost, NULL, job, tmp_task, NULL, MSG_LOG_DELETED);
4400
if (lGetString(tmp_task, JAT_master_queue) && is_pe_master_task_send(tmp_task)) {
4401
job_ja_task_send_abort_mail(job, tmp_task, ruser,
4403
get_rid_of_job_due_to_qdel(ctx,
4408
sge_commit_job(ctx, job, tmp_task, NULL, COMMIT_ST_FINISHED_FAILED_EE, spool_job | COMMIT_NEVER_RAN, monitor);
4410
if (!*alltasks && showmessage) {
4411
range_list_insert_id(&range_list, NULL, task_number);
4415
; /* Task did never exist! - Ignore silently */
4420
if (range_list && showmessage) {
4421
if (range_list_get_number_of_ids(range_list) > 1) {
4422
dstring tid_string = DSTRING_INIT;
4424
range_list_sort_uniq_compress(range_list, NULL, true);
4425
range_list_print_to_string(range_list, &tid_string, false, false, false);
4426
INFO((SGE_EVENT, MSG_JOB_DELETETASKS_SSU,
4427
ruser, sge_dstring_get_string(&tid_string),
4428
sge_u32c(job_number)));
4429
sge_dstring_free(&tid_string);
4431
u_long32 task_id = range_list_get_first_id(range_list, NULL);
4433
INFO((SGE_EVENT, MSG_JOB_DELETETASK_SUU,
4434
ruser, sge_u32c(job_number), sge_u32c(task_id)));
4436
answer_list_add(alpp, SGE_EVENT, STATUS_OK, ANSWER_QUALITY_INFO);
4439
if ((*alltasks) && showmessage) {
4440
get_rid_of_schedd_job_messages(job_number);
4441
INFO((SGE_EVENT, MSG_JOB_DELETEX_SSU, ruser, SGE_OBJ_JOB, sge_u32c(job_number)));
4442
answer_list_add(alpp, SGE_EVENT, STATUS_OK, ANSWER_QUALITY_INFO);
4445
if ((njobs > 0 || (*deleted_tasks) > 0) && (( sge_get_gmt() - start_time) > max_job_deletion_time)) {
4446
INFO((SGE_EVENT, MSG_JOB_DISCONTINUEDTRANS_SU, ruser,
4447
sge_u32c(job_number)));
4448
answer_list_add(alpp, SGE_EVENT, STATUS_OK_DOAGAIN, ANSWER_QUALITY_INFO);
4449
*deletion_time_reached = true;
4450
FREE(dupped_session);
4451
lFreeList(&range_list);
4456
} while (rn != NULL);
4458
/* free task id range list of this iteration */
4459
lFreeList(&range_list);
4460
FREE(dupped_session);