1
/*___INFO__MARK_BEGIN__*/
2
/*************************************************************************
4
* The Contents of this file are made available subject to the terms of
5
* the Sun Industry Standards Source License Version 1.2
7
* Sun Microsystems Inc., March, 2001
10
* Sun Industry Standards Source License Version 1.2
11
* =================================================
12
* The contents of this file are subject to the Sun Industry Standards
13
* Source License Version 1.2 (the "License"); You may not use this file
14
* except in compliance with the License. You may obtain a copy of the
15
* License at http://gridengine.sunsource.net/Gridengine_SISSL_license.html
17
* Software provided under this License is provided on an "AS IS" basis,
18
* WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING,
19
* WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS,
20
* MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING.
21
* See the License for the specific provisions governing your rights and
22
* obligations concerning the Software.
24
* The Initial Developer of the Original Code is: Sun Microsystems, Inc.
26
* Copyright: 2001 by Sun Microsystems, Inc.
28
* All Rights Reserved.
30
************************************************************************/
31
/*___INFO__MARK_END__*/
38
#include "basis_types.h"
41
#include "sge_ja_task.h"
43
#include "cull_file.h"
44
#include "cull_list.h"
45
#include "sge_spool.h"
46
#include "spool/sge_dirent.h"
48
#include "sge_job_qmaster.h"
50
#include "sge_answer.h"
51
#include "sge_suser.h"
53
#include "sge_unistd.h"
54
#include "sge_pe_task.h"
57
#include "uti/sge_profiling.h"
58
#include "sgeobj/sge_object.h"
60
#include "msg_common.h"
61
#include "msg_spoollib_classic.h"
63
#include "read_write_job.h"
65
static lList *ja_task_list_create_from_file(u_long32 job_id,
67
sge_spool_flags_t flags);
69
static lListElem *ja_task_create_from_file(u_long32 job_id,
71
const char *pe_task_id,
72
sge_spool_flags_t flags);
74
static int ja_task_write_to_disk(lListElem *ja_task, u_long32 job_id,
75
const char *pe_task_id,
76
sge_spool_flags_t flags);
78
static int job_write_ja_task_part(lListElem *job, u_long32 ja_task_id,
79
const char *pe_task_id,
80
sge_spool_flags_t flags);
82
static int job_write_as_single_file(lListElem *job, u_long32 ja_task_id,
83
sge_spool_flags_t flags);
85
static lListElem *job_create_from_file(u_long32 job_id, u_long32 task_id,
86
sge_spool_flags_t flags);
88
static int job_has_to_spool_one_file(const lListElem *job,
90
sge_spool_flags_t flags);
92
static lListElem *pe_task_create_from_file(u_long32 job_id,
94
const char *pe_task_id,
95
sge_spool_flags_t flags);
97
static int job_remove_script_file(u_long32 job_id);
99
/* Here we cache the path of the last task spool dir that has been created.
100
In case a task spool dir is removed the cache is no longer a proof of the
101
existence of the task spool dir and is reinitialized */
102
static char old_task_spool_dir[SGE_PATH_MAX] = "";
104
static lListElem *job_create_from_file(u_long32 job_id, u_long32 ja_task_id,
105
sge_spool_flags_t flags)
107
lListElem *job = NULL;
108
char spool_path[SGE_PATH_MAX] = "";
110
DENTER(TOP_LAYER, "job_create_from_file");
111
sge_get_file_path(spool_path, JOB_SPOOL_DIR, FORMAT_DEFAULT,
112
flags, job_id, ja_task_id, NULL);
114
if (sge_is_directory(spool_path)) {
115
char spool_path_common[SGE_PATH_MAX];
116
lList *ja_tasks = NULL;
118
sge_get_file_path(spool_path_common, JOB_SPOOL_FILE, FORMAT_DEFAULT,
119
flags, job_id, ja_task_id, NULL);
120
job = lReadElemFromDisk(NULL, spool_path_common, JB_Type, "job");
122
ja_tasks = ja_task_list_create_from_file(job_id, ja_task_id, flags);
126
ja_task_list = lGetList(job, JB_ja_tasks);
128
lAddList(ja_task_list, &ja_tasks);
130
lSetList(job, JB_ja_tasks, ja_tasks);
133
lPSortList(ja_tasks, "%I+", JAT_task_number);
136
* This is no error! It only means that there is no enrolled
137
* task in the spool area (all tasks are unenrolled)
142
job = lReadElemFromDisk(NULL, spool_path, JB_Type, "job");
147
static lList *ja_task_list_create_from_file(u_long32 job_id,
149
sge_spool_flags_t flags)
151
lList *dir_entries = NULL;
152
lList *ja_task_entries = NULL;
153
lList *pe_task_entries = NULL;
154
lList *ja_tasks = NULL;
155
lList *pe_tasks = NULL;
156
lListElem *dir_entry;
157
char spool_dir_job[SGE_PATH_MAX];
158
DENTER(TOP_LAYER, "ja_task_list_create_from_file");
160
ja_tasks = lCreateList("ja_tasks", JAT_Type);
165
sge_get_file_path(spool_dir_job, JOB_SPOOL_DIR, FORMAT_DEFAULT, flags,
166
job_id, ja_task_id, NULL);
167
dir_entries = sge_get_dirents(spool_dir_job);
168
for_each(dir_entry, dir_entries) {
171
entry = lGetString(dir_entry, ST_name);
172
if (strcmp(entry, ".") && strcmp(entry, "..") &&
173
strcmp(entry, "common")) {
174
char spool_dir_tasks[SGE_PATH_MAX];
175
lListElem *ja_task_entry;
177
sprintf(spool_dir_tasks, SFN"/"SFN, spool_dir_job, entry);
178
ja_task_entries = sge_get_dirents(spool_dir_tasks);
179
for_each(ja_task_entry, ja_task_entries) {
180
const char *ja_task_string;
182
ja_task_string = lGetString(ja_task_entry, ST_name);
183
if (strcmp(ja_task_string, ".") && strcmp(ja_task_string, "..")) {
184
char spool_dir_pe_tasks[SGE_PATH_MAX];
185
lListElem *pe_task_entry;
189
ja_task_id = atol(ja_task_string);
190
if (ja_task_id == 0) {
194
sprintf(spool_dir_pe_tasks, SFN"/"SFN, spool_dir_tasks,
197
if (sge_is_directory(spool_dir_pe_tasks)) {
198
char spool_path_ja_task[SGE_PATH_MAX];
200
sge_get_file_path(spool_path_ja_task, TASK_SPOOL_FILE,
201
FORMAT_DEFAULT, flags, job_id, ja_task_id, NULL);
202
ja_task = lReadElemFromDisk(NULL, spool_path_ja_task, JAT_Type, "ja_task");
204
pe_task_entries = sge_get_dirents(spool_dir_pe_tasks);
205
for_each(pe_task_entry, pe_task_entries) {
206
const char *pe_task_string;
208
pe_task_string = lGetString(pe_task_entry, ST_name);
209
if (strcmp(pe_task_string, ".") &&
210
strcmp(pe_task_string, "..") &&
211
strcmp(pe_task_string, "common")) {
214
pe_task = pe_task_create_from_file(job_id, ja_task_id, pe_task_string, flags);
217
pe_tasks = lCreateList("pe_tasks", PET_Type);
219
lAppendElem(pe_tasks, pe_task);
226
lFreeList(&pe_task_entries);
227
lSetList(ja_task, JAT_task_list, pe_tasks);
229
ja_task = ja_task_create_from_file(job_id, ja_task_id, NULL, flags);
232
lAppendElem(ja_tasks, ja_task);
239
lFreeList(&ja_task_entries);
242
lFreeList(&dir_entries);
244
if (!lGetNumberOfElem(ja_tasks)) {
251
lFreeList(&ja_tasks);
252
lFreeList(&dir_entries);
253
lFreeList(&ja_task_entries);
254
lFreeList(&pe_task_entries);
259
static lListElem *ja_task_create_from_file(u_long32 job_id,
261
const char *pe_task_id,
262
sge_spool_flags_t flags)
265
char spool_path_ja_task[SGE_PATH_MAX];
267
sge_get_file_path(spool_path_ja_task, TASK_SPOOL_DIR_AS_FILE,
268
FORMAT_DEFAULT, flags, job_id, ja_task_id, NULL);
269
ja_task = lReadElemFromDisk(NULL, spool_path_ja_task, JAT_Type, "ja_task");
273
static lListElem *pe_task_create_from_file(u_long32 job_id,
275
const char *pe_task_id,
276
sge_spool_flags_t flags)
279
char spool_path_pe_task[SGE_PATH_MAX];
281
sge_get_file_path(spool_path_pe_task, PE_TASK_SPOOL_FILE,
282
FORMAT_DEFAULT, flags, job_id, ja_task_id, pe_task_id);
283
pe_task = lReadElemFromDisk(NULL, spool_path_pe_task, PET_Type, "pe_task");
288
/****** spool/classic/job_write_spool_file() **********************************
290
* job_write_spool_file() -- makes a job/task persistent
293
* int job_write_spool_file(lListElem *job, u_long32 ja_taskid,
294
* sge_spool_flags_t flags)
297
* This function writes a job or a task of an array job into the spool
298
* area. It may be used within the qmaster or execd code.
300
* The result from this function looks like this within the spool area
301
* of the master for the job 10001, the array job 10002.1-3,
302
* the tightly integrated job 20011 (two pe_tasks).
305
* $SGE_ROOT/default/spool/qmaster/jobs
308
* | +---0001 (JB_Type file)
310
* | +---common (JB_Type without JB_ja_tasks)
312
* | +---1 (JAT_Type file)
313
* | +---2 (JAT_Type file)
314
* | +---3 (JAT_Type file)
317
* +---common (JB_Type without JB_ja_tasks)
320
* +--- common (JAT_Type file witout JAT_task_list)
321
* +--- 1.speedy (PET_Type file)
322
* +--- 2.speedy (PET_Type file)
323
* +--- past_usage (PET_Type file)
325
* To optimize the spool behaviour please find the defines
326
* MAX_JA_TASK_PER_DIR and MAX_JA_TASK_PER_FILE
329
* lListElem *job - full job (JB_Type)
330
* u_long32 ja_taskid - 0 or a allowed array job task id
331
* const char *pe_task_id - pe task id
332
* sge_spool_flags_t flags - where/how should we spool the object
333
* SPOOL_HANDLE_AS_ZOMBIE -> has to be used for zombie jobs
334
* SPOOL_WITHIN_EXECD -> has to be used within the execd
335
* SPOOL_DEFAULT -> if no other flags are needed
338
* int - 0 on success otherwise != 0
339
******************************************************************************/
340
int job_write_spool_file(lListElem *job, u_long32 ja_taskid,
341
const char *pe_task_id,
342
sge_spool_flags_t flags)
345
int report_long_delays = flags & SPOOL_WITHIN_EXECD;
348
DENTER(TOP_LAYER, "job_write_spool_file");
350
if (report_long_delays) {
351
start = sge_get_gmt();
354
if (job_has_to_spool_one_file(job, *object_type_get_master_list(SGE_TYPE_PE),
356
ret = job_write_as_single_file(job, ja_taskid, flags);
358
ret = job_write_common_part(job, ja_taskid, flags);
359
if (!ret && !(flags & SPOOL_IGNORE_TASK_INSTANCES)) {
360
ret = job_write_ja_task_part(job, ja_taskid, pe_task_id, flags);
364
if (report_long_delays) {
365
u_long32 time = sge_get_gmt() - start;
367
/* administrators need to be aware of suspicious spooling delays */
368
WARNING((SGE_EVENT, MSG_CONFIG_JOBSPOOLINGLONGDELAY_UUI,
369
sge_u32c(lGetUlong(job, JB_job_number)), sge_u32c(ja_taskid), (int)time));
376
static int job_has_to_spool_one_file(const lListElem *job,
377
const lList *pe_list,
378
sge_spool_flags_t flags)
380
DENTER(TOP_LAYER, "job_has_to_spool_one_file");
382
if ((flags & SPOOL_HANDLE_AS_ZOMBIE) || (flags & SPOOL_WITHIN_EXECD)) {
386
if (job_might_be_tight_parallel(job, pe_list)
387
|| (job_get_submit_ja_tasks(job) > sge_get_ja_tasks_per_file())) {
394
static int job_write_as_single_file(lListElem *job, u_long32 ja_task_id,
395
sge_spool_flags_t flags)
399
char job_dir_third[SGE_PATH_MAX] = "";
400
char spool_file[SGE_PATH_MAX] = "";
401
char tmp_spool_file[SGE_PATH_MAX] = "";
403
DENTER(TOP_LAYER, "job_write_as_single_file");
404
job_id = lGetUlong(job, JB_job_number);
406
sge_get_file_path(job_dir_third, JOB_SPOOL_DIR, FORMAT_THIRD_PART,
407
flags, job_id, ja_task_id, NULL);
408
sge_mkdir(job_dir_third, 0755, 0, 0);
409
sge_get_file_path(spool_file, JOB_SPOOL_DIR, FORMAT_DEFAULT,
410
flags, job_id, ja_task_id, NULL);
411
sge_get_file_path(tmp_spool_file, JOB_SPOOL_DIR, FORMAT_DOT_FILENAME,
412
flags, job_id, ja_task_id, NULL);
413
ret = lWriteElemToDisk(job, tmp_spool_file, NULL, "job");
414
if (!ret && (rename(tmp_spool_file, spool_file) == -1)) {
423
static int job_write_ja_task_part(lListElem *job, u_long32 ja_task_id,
424
const char *pe_task_id,
425
sge_spool_flags_t flags)
427
lListElem *ja_task, *next_ja_task;
430
DENTER(TOP_LAYER, "job_write_ja_task_part");
432
job_id = lGetUlong(job, JB_job_number);
434
next_ja_task = lGetElemUlong(lGetList(job, JB_ja_tasks),
435
JAT_task_number, ja_task_id);
437
next_ja_task = lFirst(lGetList(job, JB_ja_tasks));
439
while((ja_task = next_ja_task)) {
443
next_ja_task = lNext(ja_task);
446
if ((flags & SPOOL_WITHIN_EXECD) ||
447
job_is_enrolled(job, lGetUlong(ja_task, JAT_task_number))) {
448
if (job_might_be_tight_parallel(job, *object_type_get_master_list(SGE_TYPE_PE))) {
449
flags |= SPOOL_HANDLE_PARALLEL_TASKS;
452
ret = ja_task_write_to_disk(ja_task, job_id, pe_task_id, flags);
463
int job_write_common_part(lListElem *job, u_long32 ja_task_id,
464
sge_spool_flags_t flags)
468
char spool_dir[SGE_PATH_MAX];
469
char spoolpath_common[SGE_PATH_MAX], tmp_spoolpath_common[SGE_PATH_MAX];
472
DENTER(TOP_LAYER, "job_write_common_part");
474
job_id = lGetUlong(job, JB_job_number);
475
sge_get_file_path(spool_dir, JOB_SPOOL_DIR, FORMAT_DEFAULT,
476
flags, job_id, ja_task_id, NULL);
477
sge_mkdir(spool_dir, 0755, 0, 0);
478
sge_get_file_path(spoolpath_common, JOB_SPOOL_FILE, FORMAT_DEFAULT,
479
flags, job_id, ja_task_id, NULL);
480
sge_get_file_path(tmp_spoolpath_common, JOB_SPOOL_FILE,
481
FORMAT_DOT_FILENAME, flags, job_id, ja_task_id, NULL);
484
lXchgList(job, JB_ja_tasks, &ja_tasks);
485
ret = lWriteElemToDisk(job, tmp_spoolpath_common, NULL, "job");
486
lXchgList(job, JB_ja_tasks, &ja_tasks);
488
if (!ret && (rename(tmp_spoolpath_common, spoolpath_common) == -1)) {
499
static int ja_task_write_to_disk(lListElem *ja_task, u_long32 job_id,
500
const char *pe_task_id,
501
sge_spool_flags_t flags)
503
int handle_pe_tasks = flags & SPOOL_HANDLE_PARALLEL_TASKS;
505
DENTER(TOP_LAYER, "ja_task_write_to_disk");
507
if (handle_pe_tasks) {
508
char task_spool_dir[SGE_PATH_MAX];
509
char task_spool_file[SGE_PATH_MAX];
510
char tmp_task_spool_file[SGE_PATH_MAX];
511
lListElem *pe_task = NULL;
512
lListElem *next_pe_task = NULL;
513
u_long32 ja_task_id = lGetUlong(ja_task, JAT_task_number);
514
lList *pe_task_list = lGetList(ja_task, JAT_task_list);
516
sge_get_file_path(task_spool_dir, TASK_SPOOL_DIR, FORMAT_DEFAULT, flags,
517
job_id, ja_task_id, NULL);
518
sge_get_file_path(task_spool_file, TASK_SPOOL_FILE, FORMAT_DEFAULT, flags,
519
job_id, ja_task_id, NULL);
520
sge_get_file_path(tmp_task_spool_file, TASK_SPOOL_FILE,
521
FORMAT_DOT_FILENAME, flags, job_id, ja_task_id, NULL);
523
if ((flags & SPOOL_WITHIN_EXECD) ||
524
strcmp(old_task_spool_dir, task_spool_dir)) {
525
strcpy(old_task_spool_dir, task_spool_dir);
526
sge_mkdir(task_spool_dir, 0755, 0, 0);
530
lList *tmp_task_list = NULL;
532
lXchgList(ja_task, JAT_task_list, &tmp_task_list);
533
ret = lWriteElemToDisk(ja_task, tmp_task_spool_file, NULL, "ja_task");
534
lXchgList(ja_task, JAT_task_list, &tmp_task_list);
535
if (!ret && (rename(tmp_task_spool_file, task_spool_file) == -1)) {
542
next_pe_task = lGetElemStr(pe_task_list, PET_id, pe_task_id);
544
next_pe_task = lFirst(pe_task_list);
546
while ((pe_task = next_pe_task)) {
547
char pe_task_spool_file[SGE_PATH_MAX];
548
char tmp_pe_task_spool_file[SGE_PATH_MAX];
549
const char* pe_task_id_string = lGetString(pe_task, PET_id);
554
next_pe_task = lNext(pe_task);
559
sge_get_file_path(pe_task_spool_file, PE_TASK_SPOOL_FILE,
560
FORMAT_DEFAULT, flags, job_id, ja_task_id,
562
sge_get_file_path(tmp_pe_task_spool_file, PE_TASK_SPOOL_FILE,
563
FORMAT_DOT_FILENAME, flags, job_id, ja_task_id,
566
ret = lWriteElemToDisk(pe_task, tmp_pe_task_spool_file,
569
(rename(tmp_pe_task_spool_file, pe_task_spool_file) == -1)) {
577
char task_spool_dir[SGE_PATH_MAX];
578
char task_spool_file[SGE_PATH_MAX];
579
char tmp_task_spool_file[SGE_PATH_MAX];
581
sge_get_file_path(task_spool_dir, TASKS_SPOOL_DIR, FORMAT_DEFAULT, flags,
582
job_id, lGetUlong(ja_task, JAT_task_number), NULL);
583
sge_get_file_path(task_spool_file, TASK_SPOOL_DIR_AS_FILE,
584
FORMAT_DEFAULT, flags, job_id,
585
lGetUlong(ja_task, JAT_task_number), NULL);
586
sge_get_file_path(tmp_task_spool_file, TASK_SPOOL_DIR_AS_FILE,
587
FORMAT_DOT_FILENAME, flags, job_id,
588
lGetUlong(ja_task, JAT_task_number), NULL);
590
if ((flags & SPOOL_WITHIN_EXECD) ||
591
strcmp(old_task_spool_dir, task_spool_dir)) {
592
strcpy(old_task_spool_dir, task_spool_dir);
593
sge_mkdir(task_spool_dir, 0755, 0, 0);
596
ret = lWriteElemToDisk(ja_task, tmp_task_spool_file, NULL, "ja_task");
597
if (!ret && (rename(tmp_task_spool_file, task_spool_file) == -1)) {
608
int job_remove_spool_file(u_long32 jobid, u_long32 ja_taskid,
609
const char *pe_task_id,
610
sge_spool_flags_t flags)
612
char spool_dir[SGE_PATH_MAX] = "";
613
char spool_dir_second[SGE_PATH_MAX] = "";
614
char spool_dir_third[SGE_PATH_MAX] = "";
615
char spoolpath_common[SGE_PATH_MAX] = "";
616
int within_execd = flags & SPOOL_WITHIN_EXECD;
617
int handle_as_zombie = flags & SPOOL_HANDLE_AS_ZOMBIE;
619
lList *master_list = handle_as_zombie ?
620
*(object_type_get_master_list(SGE_TYPE_ZOMBIE)) :
621
*(object_type_get_master_list(SGE_TYPE_JOB));
622
lListElem *job = job_list_locate(master_list, jobid);
623
int try_to_remove_sub_dirs = 0;
625
DENTER(TOP_LAYER, "job_remove_spool_file");
627
one_file = job_has_to_spool_one_file(job, *object_type_get_master_list(SGE_TYPE_PE),
629
if (ja_taskid != 0 && pe_task_id != NULL && !one_file) {
630
char pe_task_spool_file[SGE_PATH_MAX];
632
sge_get_file_path(pe_task_spool_file, PE_TASK_SPOOL_FILE,
633
FORMAT_DEFAULT, flags, jobid, ja_taskid, pe_task_id);
635
DPRINTF(("try to remove "SFN"\n", pe_task_spool_file));
636
if (sge_is_file(pe_task_spool_file) &&
637
!sge_unlink(NULL, pe_task_spool_file)) {
638
ERROR((SGE_EVENT, MSG_JOB_CANNOT_REMOVE_SS,
639
MSG_JOB_PE_TASK_SPOOL_FILE, pe_task_spool_file));
644
if (ja_taskid != 0 && pe_task_id == NULL && !one_file) {
645
char task_spool_dir[SGE_PATH_MAX];
646
char task_spool_file[SGE_PATH_MAX];
647
int remove_task_spool_file = 0;
649
sge_get_file_path(task_spool_dir, TASKS_SPOOL_DIR, FORMAT_DEFAULT, flags,
650
jobid, ja_taskid, NULL);
651
sge_get_file_path(task_spool_file, TASK_SPOOL_DIR_AS_FILE,
652
FORMAT_DEFAULT, flags, jobid, ja_taskid, NULL);
655
remove_task_spool_file = 1;
657
remove_task_spool_file = job_is_enrolled(job, ja_taskid);
659
DPRINTF(("remove_task_spool_file = %d\n", remove_task_spool_file));;
661
if (remove_task_spool_file) {
662
DPRINTF(("removing "SFN"\n", task_spool_file));
664
if (sge_is_directory(task_spool_file)) {
665
dstring task_spool_file_msg;
666
char task_spool_file_msg_buffer[SGE_PATH_MAX];
668
sge_dstring_init(&task_spool_file_msg, task_spool_file_msg_buffer,
669
sizeof(task_spool_file_msg_buffer));
670
if (sge_rmdir(task_spool_file, &task_spool_file_msg)) {
671
ERROR((SGE_EVENT, MSG_JOB_CANNOT_REMOVE_SS,
672
MSG_JOB_TASK_SPOOL_FILE, task_spool_file_msg_buffer));
676
if (!sge_unlink(NULL, task_spool_file)) {
677
ERROR((SGE_EVENT, MSG_JOB_CANNOT_REMOVE_SS,
678
MSG_JOB_TASK_SPOOL_FILE, task_spool_file));
684
* Following rmdir call may fail. We can ignore this error.
685
* This is only an indicator that another task is running which has
686
* been spooled in the directory.
688
DPRINTF(("try to remove "SFN"\n", task_spool_dir));
689
rmdir(task_spool_dir);
692
* a task spool directory has been removed: reinit
693
* old_task_spool_dir to ensure mkdir() is performed
695
old_task_spool_dir[0] = '\0';
699
sge_get_file_path(spool_dir, JOB_SPOOL_DIR,
700
FORMAT_DEFAULT, flags, jobid, ja_taskid, NULL);
701
sge_get_file_path(spool_dir_third, JOB_SPOOL_DIR,
702
FORMAT_THIRD_PART, flags, jobid, ja_taskid, NULL);
703
sge_get_file_path(spool_dir_second, JOB_SPOOL_DIR,
704
FORMAT_SECOND_PART, flags, jobid, ja_taskid, NULL);
705
sge_get_file_path(spoolpath_common, JOB_SPOOL_FILE,
706
FORMAT_DEFAULT, flags, jobid, ja_taskid, NULL);
707
try_to_remove_sub_dirs = 0;
709
if (ja_taskid == 0) {
710
DPRINTF(("removing "SFN"\n", spoolpath_common));
711
if (!sge_unlink(NULL, spoolpath_common)) {
712
ERROR((SGE_EVENT, MSG_JOB_CANNOT_REMOVE_SS,
713
MSG_JOB_JOB_SPOOL_FILE, spoolpath_common));
716
DPRINTF(("removing "SFN"\n", spool_dir));
717
if (sge_rmdir(spool_dir, NULL)) {
718
ERROR((SGE_EVENT, MSG_JOB_CANNOT_REMOVE_SS,
719
MSG_JOB_JOB_SPOOL_DIRECTORY, spool_dir));
722
try_to_remove_sub_dirs = 1;
725
DPRINTF(("removing "SFN"\n", spool_dir));
726
if (!sge_unlink(NULL, spool_dir)) {
727
ERROR((SGE_EVENT, MSG_JOB_CANNOT_REMOVE_SS, MSG_JOB_JOB_SPOOL_FILE,
731
try_to_remove_sub_dirs = 1;
734
* Following rmdir calls may fail. We can ignore these errors.
735
* This is only an indicator that another job is running which has been
736
* spooled in the same directory.
738
if (try_to_remove_sub_dirs) {
739
DPRINTF(("try to remove "SFN"\n", spool_dir_third));
740
if (!rmdir(spool_dir_third)) {
741
DPRINTF(("try to remove "SFN"\n", spool_dir_second));
742
rmdir(spool_dir_second);
750
static int job_remove_script_file(u_long32 job_id)
752
char script_file[SGE_PATH_MAX] = "";
754
DENTER(TOP_LAYER, "job_remove_script_file");
756
PROF_START_MEASUREMENT(SGE_PROF_JOBSCRIPT);
757
sge_get_file_path(script_file, JOB_SCRIPT_FILE, FORMAT_DEFAULT,
758
SPOOL_DEFAULT, job_id, 0, NULL);
759
if (unlink(script_file)) {
761
ERROR((SGE_EVENT, MSG_CONFIG_FAILEDREMOVINGSCRIPT_SS,
762
strerror(errno), script_file));
767
INFO((SGE_EVENT, MSG_CONFIG_REMOVEDSCRIPTOFBADJOBFILEX_S, script_file));
769
PROF_STOP_MEASUREMENT(SGE_PROF_JOBSCRIPT);
774
int job_list_read_from_disk(lList **job_list, char *list_name, int check,
775
sge_spool_flags_t flags,
776
int (*init_function)(lListElem*))
778
char first_dir[SGE_PATH_MAX] = "";
779
lList *first_direnties;
780
lListElem *first_direntry;
781
char path[SGE_PATH_MAX];
782
int handle_as_zombie = (flags & SPOOL_HANDLE_AS_ZOMBIE) > 0;
784
DENTER(TOP_LAYER, "job_read_job_list_from_disk");
785
sge_get_file_path(first_dir, JOBS_SPOOL_DIR, FORMAT_FIRST_PART,
787
first_direnties = sge_get_dirents(first_dir);
789
if (first_direnties && !sge_silent_get()) {
790
printf(MSG_CONFIG_READINGIN_S, list_name);
794
sge_status_set_type(STATUS_DOTS);
796
(first_direntry = lFirst(first_direnties));
797
lRemoveElem(first_direnties, &first_direntry)) {
798
char second_dir[SGE_PATH_MAX] = "";
799
lList *second_direnties;
800
lListElem *second_direntry;
801
const char *first_entry_string;
804
first_entry_string = lGetString(first_direntry, ST_name);
805
sprintf(path, "%s/%s", first_dir, first_entry_string);
806
if (!sge_is_directory(path)) {
807
ERROR((SGE_EVENT, MSG_CONFIG_NODIRECTORY_S, path));
811
sprintf(second_dir, SFN"/"SFN, first_dir, first_entry_string);
812
second_direnties = sge_get_dirents(second_dir);
814
(second_direntry = lFirst(second_direnties));
815
lRemoveElem(second_direnties, &second_direntry)) {
816
char third_dir[SGE_PATH_MAX] = "";
817
lList *third_direnties;
818
lListElem *third_direntry;
819
const char *second_entry_string;
821
second_entry_string = lGetString(second_direntry, ST_name);
822
sprintf(path, "%s/%s/%s", first_dir, first_entry_string,
823
second_entry_string);
824
if (!sge_is_directory(path)) {
825
ERROR((SGE_EVENT, MSG_CONFIG_NODIRECTORY_S, path));
829
sprintf(third_dir, SFN"/"SFN, second_dir, second_entry_string);
830
third_direnties = sge_get_dirents(third_dir);
832
(third_direntry = lFirst(third_direnties));
833
lRemoveElem(third_direnties, &third_direntry)) {
834
lListElem *job, *ja_task;
836
char job_dir[SGE_PATH_MAX] = "";
837
char fourth_dir[SGE_PATH_MAX] = "";
838
char job_id_string[SGE_PATH_MAX] = "";
839
char *ja_task_id_string;
840
u_long32 job_id, ja_task_id;
843
sge_status_next_turn();
844
sprintf(fourth_dir, SFN"/"SFN, third_dir,
845
lGetString(third_direntry, ST_name));
846
sprintf(job_id_string, SFN SFN SFN,
847
lGetString(first_direntry, ST_name),
848
lGetString(second_direntry, ST_name),
849
lGetString(third_direntry, ST_name));
850
job_id = (u_long32) strtol(job_id_string, NULL, 10);
851
strtok_r(job_id_string, ".", &lasts);
852
ja_task_id_string = strtok_r(NULL, ".", &lasts);
853
if (ja_task_id_string) {
854
ja_task_id = (u_long32) strtol(ja_task_id_string, NULL, 10);
858
sge_get_file_path(job_dir, JOB_SPOOL_DIR, FORMAT_DEFAULT,
859
flags, job_id, ja_task_id, NULL);
861
/* check directory name */
862
if (strcmp(fourth_dir, job_dir)) {
863
fprintf(stderr, "%s %s\n", fourth_dir, job_dir);
864
DPRINTF(("Invalid directory "SFN"\n", fourth_dir));
869
job = job_create_from_file(job_id, ja_task_id, flags);
871
job_remove_script_file(job_id);
875
/* check for scriptfile before adding job */
877
for_each (ja_task, lGetList(job, JB_ja_tasks)) {
878
if (lGetUlong(ja_task, JAT_status) != JFINISHED) {
883
if (check && !all_finished && lGetString(job, JB_script_file)) {
884
char script_file[SGE_PATH_MAX];
885
SGE_STRUCT_STAT stat_buffer;
887
sge_get_file_path(script_file, JOB_SCRIPT_FILE, FORMAT_DEFAULT,
888
flags, job_id, 0, NULL);
889
if (SGE_STAT(script_file, &stat_buffer)) {
890
ERROR((SGE_EVENT, MSG_CONFIG_CANTFINDSCRIPTFILE_U,
891
sge_u32c(lGetUlong(job, JB_job_number))));
892
job_list_add_job(object_type_get_master_list(SGE_TYPE_JOB), "job list", job, 0);
893
job_remove_spool_file(job_id, 0, NULL, SPOOL_DEFAULT);
894
lRemoveElem( *(object_type_get_master_list(SGE_TYPE_JOB)), &job);
899
/* check if filename has same name which is stored job id */
900
if (lGetUlong(job, JB_job_number) != job_id) {
901
ERROR((SGE_EVENT, MSG_CONFIG_JOBFILEXHASWRONGFILENAMEDELETING_U,
903
job_remove_spool_file(job_id, 0, NULL, flags);
905
* script is not deleted here,
906
* since it may belong to a valid job
914
lSetList(job, JB_jid_successor_list, NULL);
915
job_list_add_job(job_list, list_name, job, 0);
917
if (!handle_as_zombie) {
918
job_list_register_new_job(*(object_type_get_master_list(SGE_TYPE_JOB)), mconf_get_max_jobs(), 1);
919
suser_register_new_job(job, mconf_get_max_u_jobs(), 1);
922
lFreeList(&third_direnties);
924
lFreeList(&second_direnties);
926
lFreeList(&first_direnties);
929
sge_status_end_turn();