~ubuntu-branches/ubuntu/utopic/gridengine/utopic

« back to all changes in this revision

Viewing changes to source/libs/spool/classic/read_write_job.c

  • Committer: Bazaar Package Importer
  • Author(s): Mark Hymers
  • Date: 2008-06-25 22:36:13 UTC
  • Revision ID: james.westby@ubuntu.com-20080625223613-tvd9xlhuoct9kyhm
Tags: upstream-6.2~beta2
ImportĀ upstreamĀ versionĀ 6.2~beta2

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
/*___INFO__MARK_BEGIN__*/
 
2
/*************************************************************************
 
3
 * 
 
4
 *  The Contents of this file are made available subject to the terms of
 
5
 *  the Sun Industry Standards Source License Version 1.2
 
6
 * 
 
7
 *  Sun Microsystems Inc., March, 2001
 
8
 * 
 
9
 * 
 
10
 *  Sun Industry Standards Source License Version 1.2
 
11
 *  =================================================
 
12
 *  The contents of this file are subject to the Sun Industry Standards
 
13
 *  Source License Version 1.2 (the "License"); You may not use this file
 
14
 *  except in compliance with the License. You may obtain a copy of the
 
15
 *  License at http://gridengine.sunsource.net/Gridengine_SISSL_license.html
 
16
 * 
 
17
 *  Software provided under this License is provided on an "AS IS" basis,
 
18
 *  WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING,
 
19
 *  WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS,
 
20
 *  MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING.
 
21
 *  See the License for the specific provisions governing your rights and
 
22
 *  obligations concerning the Software.
 
23
 * 
 
24
 *   The Initial Developer of the Original Code is: Sun Microsystems, Inc.
 
25
 * 
 
26
 *   Copyright: 2001 by Sun Microsystems, Inc.
 
27
 * 
 
28
 *   All Rights Reserved.
 
29
 * 
 
30
 ************************************************************************/
 
31
/*___INFO__MARK_END__*/
 
32
#include <stdio.h>
 
33
#include <stdlib.h>
 
34
#include <string.h>
 
35
#include <unistd.h>
 
36
#include <errno.h>
 
37
 
 
38
#include "basis_types.h"
 
39
#include "sge.h"
 
40
#include "sge_log.h"
 
41
#include "sge_ja_task.h"
 
42
#include "sgermon.h"
 
43
#include "cull_file.h"
 
44
#include "cull_list.h"
 
45
#include "sge_spool.h"
 
46
#include "spool/sge_dirent.h"
 
47
#include "sge_str.h"
 
48
#include "sge_job_qmaster.h"
 
49
#include "sge_job.h"
 
50
#include "sge_answer.h"
 
51
#include "sge_suser.h"
 
52
#include "sge_conf.h"
 
53
#include "sge_unistd.h"
 
54
#include "sge_pe_task.h"
 
55
#include "sge_pe.h"
 
56
#include "sge_time.h"
 
57
#include "uti/sge_profiling.h"
 
58
#include "sgeobj/sge_object.h"
 
59
 
 
60
#include "msg_common.h"
 
61
#include "msg_spoollib_classic.h"
 
62
 
 
63
#include "read_write_job.h"
 
64
 
 
65
static lList *ja_task_list_create_from_file(u_long32 job_id, 
 
66
                                            u_long32 ja_task_id,
 
67
                                            sge_spool_flags_t flags);
 
68
 
 
69
static lListElem *ja_task_create_from_file(u_long32 job_id,
 
70
                                           u_long32 ja_task_id,
 
71
                                           const char *pe_task_id,
 
72
                                           sge_spool_flags_t flags);
 
73
 
 
74
static int ja_task_write_to_disk(lListElem *ja_task, u_long32 job_id,
 
75
                                 const char *pe_task_id,
 
76
                                 sge_spool_flags_t flags); 
 
77
 
 
78
static int job_write_ja_task_part(lListElem *job, u_long32 ja_task_id,
 
79
                                  const char *pe_task_id,
 
80
                                  sge_spool_flags_t flags);
 
81
 
 
82
static int job_write_as_single_file(lListElem *job, u_long32 ja_task_id,
 
83
                                   sge_spool_flags_t flags);
 
84
 
 
85
static lListElem *job_create_from_file(u_long32 job_id, u_long32 task_id,
 
86
                                       sge_spool_flags_t flags);    
 
87
 
 
88
static int job_has_to_spool_one_file(const lListElem *job,
 
89
                                     const lList *pe_list,
 
90
                                     sge_spool_flags_t flags);
 
91
 
 
92
static lListElem *pe_task_create_from_file(u_long32 job_id,
 
93
                                           u_long32 ja_task_id,
 
94
                                           const char *pe_task_id,
 
95
                                           sge_spool_flags_t flags);
 
96
 
 
97
static int job_remove_script_file(u_long32 job_id);
 
98
 
 
99
/* Here we cache the path of the last task spool dir that has been created.
 
100
   In case a task spool dir is removed the cache is no longer a proof of the
 
101
   existence of the task spool dir and is reinitialized */
 
102
static char old_task_spool_dir[SGE_PATH_MAX] = "";
 
103
 
 
104
static lListElem *job_create_from_file(u_long32 job_id, u_long32 ja_task_id,
 
105
                                       sge_spool_flags_t flags)
 
106
{
 
107
   lListElem *job = NULL;
 
108
   char spool_path[SGE_PATH_MAX] = "";
 
109
 
 
110
   DENTER(TOP_LAYER, "job_create_from_file");
 
111
   sge_get_file_path(spool_path, JOB_SPOOL_DIR, FORMAT_DEFAULT, 
 
112
                     flags, job_id, ja_task_id, NULL);  
 
113
 
 
114
   if (sge_is_directory(spool_path)) {
 
115
      char spool_path_common[SGE_PATH_MAX];
 
116
      lList *ja_tasks = NULL;
 
117
 
 
118
      sge_get_file_path(spool_path_common, JOB_SPOOL_FILE, FORMAT_DEFAULT, 
 
119
                        flags, job_id, ja_task_id, NULL);  
 
120
      job = lReadElemFromDisk(NULL, spool_path_common, JB_Type, "job");
 
121
      if (job) {
 
122
         ja_tasks = ja_task_list_create_from_file(job_id, ja_task_id, flags); 
 
123
         if (ja_tasks) {
 
124
            lList *ja_task_list;
 
125
 
 
126
            ja_task_list = lGetList(job, JB_ja_tasks);
 
127
            if (ja_task_list) {
 
128
               lAddList(ja_task_list, &ja_tasks);
 
129
            } else {
 
130
               lSetList(job, JB_ja_tasks, ja_tasks);
 
131
            }
 
132
            ja_tasks = NULL;
 
133
            lPSortList(ja_tasks, "%I+", JAT_task_number); 
 
134
         } else {
 
135
            /*
 
136
             * This is no error! It only means that there is no enrolled
 
137
             * task in the spool area (all tasks are unenrolled)
 
138
             */
 
139
         }
 
140
      }
 
141
   } else {
 
142
      job = lReadElemFromDisk(NULL, spool_path, JB_Type, "job");
 
143
   }
 
144
   DRETURN(job);
 
145
}
 
146
 
 
147
static lList *ja_task_list_create_from_file(u_long32 job_id, 
 
148
                                            u_long32 ja_task_id,
 
149
                                            sge_spool_flags_t flags)
 
150
{
 
151
   lList *dir_entries = NULL;
 
152
   lList *ja_task_entries = NULL;
 
153
   lList *pe_task_entries = NULL;
 
154
   lList *ja_tasks = NULL;
 
155
   lList *pe_tasks = NULL;
 
156
   lListElem *dir_entry;
 
157
   char spool_dir_job[SGE_PATH_MAX];
 
158
   DENTER(TOP_LAYER, "ja_task_list_create_from_file");
 
159
 
 
160
   ja_tasks = lCreateList("ja_tasks", JAT_Type); 
 
161
   if (!ja_tasks) {
 
162
      DTRACE;
 
163
      goto error;
 
164
   }
 
165
   sge_get_file_path(spool_dir_job, JOB_SPOOL_DIR, FORMAT_DEFAULT, flags, 
 
166
                     job_id, ja_task_id, NULL);
 
167
   dir_entries = sge_get_dirents(spool_dir_job);
 
168
   for_each(dir_entry, dir_entries) {
 
169
      const char *entry;
 
170
 
 
171
      entry = lGetString(dir_entry, ST_name);
 
172
      if (strcmp(entry, ".") && strcmp(entry, "..") && 
 
173
          strcmp(entry, "common")) {
 
174
         char spool_dir_tasks[SGE_PATH_MAX];
 
175
         lListElem *ja_task_entry; 
 
176
 
 
177
         sprintf(spool_dir_tasks, SFN"/"SFN, spool_dir_job, entry);
 
178
         ja_task_entries = sge_get_dirents(spool_dir_tasks);
 
179
         for_each(ja_task_entry, ja_task_entries) {
 
180
            const char *ja_task_string;
 
181
 
 
182
            ja_task_string = lGetString(ja_task_entry, ST_name);
 
183
            if (strcmp(ja_task_string, ".") && strcmp(ja_task_string, "..")) {
 
184
               char spool_dir_pe_tasks[SGE_PATH_MAX];
 
185
               lListElem *pe_task_entry;
 
186
               u_long32 ja_task_id;
 
187
               lListElem *ja_task;
 
188
 
 
189
               ja_task_id = atol(ja_task_string);
 
190
               if (ja_task_id == 0) {
 
191
                  DTRACE;
 
192
                  goto error;
 
193
               }
 
194
               sprintf(spool_dir_pe_tasks, SFN"/"SFN, spool_dir_tasks,
 
195
                       ja_task_string);
 
196
 
 
197
               if (sge_is_directory(spool_dir_pe_tasks)) {
 
198
                  char spool_path_ja_task[SGE_PATH_MAX];
 
199
 
 
200
                  sge_get_file_path(spool_path_ja_task, TASK_SPOOL_FILE,
 
201
                                    FORMAT_DEFAULT, flags, job_id, ja_task_id, NULL);
 
202
                  ja_task = lReadElemFromDisk(NULL, spool_path_ja_task, JAT_Type, "ja_task");
 
203
                  pe_tasks = NULL;
 
204
                  pe_task_entries = sge_get_dirents(spool_dir_pe_tasks);
 
205
                  for_each(pe_task_entry, pe_task_entries) {
 
206
                     const char *pe_task_string;
 
207
 
 
208
                     pe_task_string = lGetString(pe_task_entry, ST_name);
 
209
                     if (strcmp(pe_task_string, ".") && 
 
210
                         strcmp(pe_task_string, "..") &&
 
211
                         strcmp(pe_task_string, "common")) {
 
212
                        lListElem *pe_task;
 
213
                        
 
214
                        pe_task = pe_task_create_from_file(job_id, ja_task_id, pe_task_string, flags);
 
215
                        if (pe_task) {
 
216
                           if (!pe_tasks) {
 
217
                              pe_tasks = lCreateList("pe_tasks", PET_Type); 
 
218
                           }
 
219
                           lAppendElem(pe_tasks, pe_task);
 
220
                        } else {
 
221
                           DTRACE;
 
222
                           goto error;
 
223
                        }
 
224
                     }
 
225
                  }
 
226
                  lFreeList(&pe_task_entries);
 
227
                  lSetList(ja_task, JAT_task_list, pe_tasks);
 
228
               } else {
 
229
                  ja_task = ja_task_create_from_file(job_id, ja_task_id, NULL, flags);
 
230
               }
 
231
               if (ja_task) {
 
232
                  lAppendElem(ja_tasks, ja_task);
 
233
               } else {
 
234
                  DTRACE;
 
235
                  goto error;
 
236
               }
 
237
            }
 
238
         } 
 
239
         lFreeList(&ja_task_entries);
 
240
      }
 
241
   }
 
242
   lFreeList(&dir_entries);
 
243
 
 
244
   if (!lGetNumberOfElem(ja_tasks)) {
 
245
      DTRACE;
 
246
      goto error; 
 
247
   } 
 
248
   DEXIT;
 
249
   return ja_tasks;
 
250
error:
 
251
   lFreeList(&ja_tasks);
 
252
   lFreeList(&dir_entries);
 
253
   lFreeList(&ja_task_entries);  
 
254
   lFreeList(&pe_task_entries);
 
255
   DEXIT;
 
256
   return NULL; 
 
257
}
 
258
 
 
259
static lListElem *ja_task_create_from_file(u_long32 job_id, 
 
260
                                           u_long32 ja_task_id, 
 
261
                                           const char *pe_task_id,
 
262
                                           sge_spool_flags_t flags) 
 
263
{
 
264
   lListElem *ja_task;
 
265
   char spool_path_ja_task[SGE_PATH_MAX];
 
266
 
 
267
   sge_get_file_path(spool_path_ja_task, TASK_SPOOL_DIR_AS_FILE,
 
268
                     FORMAT_DEFAULT, flags, job_id, ja_task_id, NULL);
 
269
   ja_task = lReadElemFromDisk(NULL, spool_path_ja_task, JAT_Type, "ja_task"); 
 
270
   return ja_task;
 
271
}
 
272
 
 
273
static lListElem *pe_task_create_from_file(u_long32 job_id,
 
274
                                           u_long32 ja_task_id,
 
275
                                           const char *pe_task_id,
 
276
                                           sge_spool_flags_t flags)
 
277
{
 
278
   lListElem *pe_task;
 
279
   char spool_path_pe_task[SGE_PATH_MAX];
 
280
 
 
281
   sge_get_file_path(spool_path_pe_task, PE_TASK_SPOOL_FILE,
 
282
                     FORMAT_DEFAULT, flags, job_id, ja_task_id, pe_task_id);
 
283
   pe_task = lReadElemFromDisk(NULL, spool_path_pe_task, PET_Type, "pe_task"); 
 
284
   return pe_task;
 
285
   
 
286
}
 
287
 
 
288
/****** spool/classic/job_write_spool_file() **********************************
 
289
*  NAME
 
290
*     job_write_spool_file() -- makes a job/task persistent 
 
291
*
 
292
*  SYNOPSIS
 
293
*     int job_write_spool_file(lListElem *job, u_long32 ja_taskid, 
 
294
*                              sge_spool_flags_t flags) 
 
295
*
 
296
*  FUNCTION
 
297
*     This function writes a job or a task of an array job into the spool 
 
298
*     area. It may be used within the qmaster or execd code.
 
299
*   
 
300
*     The result from this function looks like this within the spool area
 
301
*     of the master for the job 10001, the array job 10002.1-3, 
 
302
*     the tightly integrated job 20011 (two pe_tasks).
 
303
*     
 
304
*   
 
305
*     $SGE_ROOT/default/spool/qmaster/jobs
 
306
*     +---00
 
307
*         +---0001
 
308
*         |   +---0001                     (JB_Type file)
 
309
*         |   +---0002 
 
310
*         |       +---common               (JB_Type without JB_ja_tasks)
 
311
*         |       +---1-4096
 
312
*         |           +---1                (JAT_Type file) 
 
313
*         |           +---2                (JAT_Type file)
 
314
*         |           +---3                (JAT_Type file)
 
315
*         +---0002
 
316
*             +---0011
 
317
*                 +---common               (JB_Type without JB_ja_tasks)
 
318
*                 +---1-4096
 
319
*                     +---1
 
320
*                         +--- common      (JAT_Type file witout JAT_task_list)
 
321
*                         +--- 1.speedy    (PET_Type file)
 
322
*                         +--- 2.speedy    (PET_Type file)
 
323
*                         +--- past_usage  (PET_Type file)
 
324
*
 
325
*     To optimize the spool behaviour please find the defines
 
326
*     MAX_JA_TASK_PER_DIR and MAX_JA_TASK_PER_FILE
 
327
*
 
328
*  INPUTS
 
329
*     lListElem *job          - full job (JB_Type) 
 
330
*     u_long32 ja_taskid      - 0 or a allowed array job task id 
 
331
*     const char *pe_task_id  - pe task id
 
332
*     sge_spool_flags_t flags - where/how should we spool the object 
 
333
*        SPOOL_HANDLE_AS_ZOMBIE   -> has to be used for zombie jobs 
 
334
*        SPOOL_WITHIN_EXECD       -> has to be used within the execd 
 
335
*        SPOOL_DEFAULT            -> if no other flags are needed
 
336
*
 
337
*  RESULT
 
338
*     int - 0 on success otherwise != 0 
 
339
******************************************************************************/
 
340
int job_write_spool_file(lListElem *job, u_long32 ja_taskid, 
 
341
                         const char *pe_task_id,
 
342
                         sge_spool_flags_t flags) 
 
343
{
 
344
   int ret;
 
345
   int report_long_delays = flags & SPOOL_WITHIN_EXECD;
 
346
   u_long32 start = 0;
 
347
   
 
348
   DENTER(TOP_LAYER, "job_write_spool_file");
 
349
 
 
350
   if (report_long_delays) {
 
351
      start = sge_get_gmt();
 
352
   }
 
353
 
 
354
   if (job_has_to_spool_one_file(job, *object_type_get_master_list(SGE_TYPE_PE), 
 
355
                                        flags)) {
 
356
      ret = job_write_as_single_file(job, ja_taskid, flags); 
 
357
   } else {
 
358
      ret = job_write_common_part(job, ja_taskid, flags);
 
359
      if (!ret && !(flags & SPOOL_IGNORE_TASK_INSTANCES)) {
 
360
         ret = job_write_ja_task_part(job, ja_taskid, pe_task_id, flags); 
 
361
      }
 
362
   }
 
363
 
 
364
   if (report_long_delays) {
 
365
      u_long32 time = sge_get_gmt() - start;
 
366
      if (time > 30) {
 
367
         /* administrators need to be aware of suspicious spooling delays */
 
368
         WARNING((SGE_EVENT, MSG_CONFIG_JOBSPOOLINGLONGDELAY_UUI, 
 
369
         sge_u32c(lGetUlong(job, JB_job_number)), sge_u32c(ja_taskid), (int)time));
 
370
      }
 
371
   }
 
372
 
 
373
   DRETURN(ret);
 
374
}
 
375
 
 
376
static int job_has_to_spool_one_file(const lListElem *job, 
 
377
                                     const lList *pe_list,
 
378
                                     sge_spool_flags_t flags) 
 
379
{
 
380
   DENTER(TOP_LAYER, "job_has_to_spool_one_file");
 
381
 
 
382
   if ((flags & SPOOL_HANDLE_AS_ZOMBIE) || (flags & SPOOL_WITHIN_EXECD)) {
 
383
      DRETURN(1);
 
384
   } 
 
385
   
 
386
   if (job_might_be_tight_parallel(job, pe_list)
 
387
               || (job_get_submit_ja_tasks(job) > sge_get_ja_tasks_per_file())) {
 
388
      DRETURN(0);
 
389
   }
 
390
 
 
391
   DRETURN(1);
 
392
}
 
393
 
 
394
static int job_write_as_single_file(lListElem *job, u_long32 ja_task_id,
 
395
                                   sge_spool_flags_t flags) 
 
396
{
 
397
   int ret = 0;
 
398
   u_long32 job_id;
 
399
   char job_dir_third[SGE_PATH_MAX] = "";
 
400
   char spool_file[SGE_PATH_MAX] = "";
 
401
   char tmp_spool_file[SGE_PATH_MAX] = "";
 
402
 
 
403
   DENTER(TOP_LAYER, "job_write_as_single_file");
 
404
   job_id = lGetUlong(job, JB_job_number);
 
405
 
 
406
   sge_get_file_path(job_dir_third, JOB_SPOOL_DIR, FORMAT_THIRD_PART,
 
407
                     flags, job_id, ja_task_id, NULL);
 
408
   sge_mkdir(job_dir_third, 0755, 0, 0);
 
409
   sge_get_file_path(spool_file, JOB_SPOOL_DIR, FORMAT_DEFAULT,
 
410
                     flags, job_id, ja_task_id, NULL);
 
411
   sge_get_file_path(tmp_spool_file, JOB_SPOOL_DIR, FORMAT_DOT_FILENAME,
 
412
                     flags, job_id, ja_task_id, NULL);
 
413
   ret = lWriteElemToDisk(job, tmp_spool_file, NULL, "job");
 
414
   if (!ret && (rename(tmp_spool_file, spool_file) == -1)) {
 
415
      DTRACE;
 
416
      ret = 1;
 
417
   }
 
418
 
 
419
   DEXIT;
 
420
   return ret;  
 
421
}
 
422
 
 
423
static int job_write_ja_task_part(lListElem *job, u_long32 ja_task_id,
 
424
                                  const char *pe_task_id,
 
425
                                  sge_spool_flags_t flags)
 
426
{
 
427
   lListElem *ja_task, *next_ja_task;
 
428
   u_long32 job_id;
 
429
   int ret = 0;
 
430
   DENTER(TOP_LAYER, "job_write_ja_task_part"); 
 
431
 
 
432
   job_id = lGetUlong(job, JB_job_number);
 
433
   if (ja_task_id) {
 
434
      next_ja_task = lGetElemUlong(lGetList(job, JB_ja_tasks),
 
435
                                   JAT_task_number, ja_task_id);
 
436
   } else {
 
437
      next_ja_task = lFirst(lGetList(job, JB_ja_tasks));
 
438
   }
 
439
   while((ja_task = next_ja_task)) {
 
440
      if (ja_task_id) {
 
441
         next_ja_task = NULL;
 
442
      } else {
 
443
         next_ja_task = lNext(ja_task);
 
444
      }
 
445
 
 
446
      if ((flags & SPOOL_WITHIN_EXECD) ||
 
447
          job_is_enrolled(job, lGetUlong(ja_task, JAT_task_number))) {
 
448
         if (job_might_be_tight_parallel(job, *object_type_get_master_list(SGE_TYPE_PE))) {
 
449
            flags |= SPOOL_HANDLE_PARALLEL_TASKS;
 
450
         }
 
451
 
 
452
         ret = ja_task_write_to_disk(ja_task, job_id, pe_task_id, flags);
 
453
         if (ret) {
 
454
            DTRACE;
 
455
            break;
 
456
         }
 
457
      }
 
458
   }
 
459
   DEXIT;
 
460
   return ret;
 
461
}
 
462
 
 
463
int job_write_common_part(lListElem *job, u_long32 ja_task_id,
 
464
                                 sge_spool_flags_t flags) 
 
465
{
 
466
   int ret = 0;
 
467
   u_long32 job_id;
 
468
   char spool_dir[SGE_PATH_MAX];
 
469
   char spoolpath_common[SGE_PATH_MAX], tmp_spoolpath_common[SGE_PATH_MAX];
 
470
   lList *ja_tasks;
 
471
 
 
472
   DENTER(TOP_LAYER, "job_write_common_part");
 
473
 
 
474
   job_id = lGetUlong(job, JB_job_number);
 
475
   sge_get_file_path(spool_dir, JOB_SPOOL_DIR, FORMAT_DEFAULT,
 
476
                     flags, job_id, ja_task_id, NULL);
 
477
   sge_mkdir(spool_dir, 0755, 0, 0);
 
478
   sge_get_file_path(spoolpath_common, JOB_SPOOL_FILE, FORMAT_DEFAULT,
 
479
                     flags, job_id, ja_task_id, NULL);
 
480
   sge_get_file_path(tmp_spoolpath_common, JOB_SPOOL_FILE,
 
481
                     FORMAT_DOT_FILENAME, flags, job_id, ja_task_id, NULL);
 
482
 
 
483
   ja_tasks = NULL;
 
484
   lXchgList(job, JB_ja_tasks, &ja_tasks);
 
485
   ret = lWriteElemToDisk(job, tmp_spoolpath_common, NULL, "job");
 
486
   lXchgList(job, JB_ja_tasks, &ja_tasks);
 
487
 
 
488
   if (!ret && (rename(tmp_spoolpath_common, spoolpath_common) == -1)) {
 
489
      DTRACE;
 
490
      ret = 1;
 
491
   }
 
492
 
 
493
   DEXIT;
 
494
   return ret;
 
495
}
 
496
 
 
497
 
 
498
 
 
499
static int ja_task_write_to_disk(lListElem *ja_task, u_long32 job_id,
 
500
                                 const char *pe_task_id,
 
501
                                 sge_spool_flags_t flags)
 
502
{
 
503
   int handle_pe_tasks = flags & SPOOL_HANDLE_PARALLEL_TASKS;
 
504
   int ret = 0;
 
505
   DENTER(TOP_LAYER, "ja_task_write_to_disk");
 
506
 
 
507
   if (handle_pe_tasks) {
 
508
      char task_spool_dir[SGE_PATH_MAX];
 
509
      char task_spool_file[SGE_PATH_MAX];
 
510
      char tmp_task_spool_file[SGE_PATH_MAX];
 
511
      lListElem *pe_task = NULL;
 
512
      lListElem *next_pe_task = NULL;
 
513
      u_long32 ja_task_id = lGetUlong(ja_task, JAT_task_number);
 
514
      lList *pe_task_list = lGetList(ja_task, JAT_task_list);
 
515
 
 
516
      sge_get_file_path(task_spool_dir, TASK_SPOOL_DIR, FORMAT_DEFAULT, flags,
 
517
                        job_id, ja_task_id, NULL);
 
518
      sge_get_file_path(task_spool_file, TASK_SPOOL_FILE, FORMAT_DEFAULT, flags,
 
519
                        job_id, ja_task_id, NULL);
 
520
      sge_get_file_path(tmp_task_spool_file, TASK_SPOOL_FILE, 
 
521
                        FORMAT_DOT_FILENAME, flags, job_id, ja_task_id, NULL);
 
522
 
 
523
      if ((flags & SPOOL_WITHIN_EXECD) || 
 
524
          strcmp(old_task_spool_dir, task_spool_dir)) {
 
525
         strcpy(old_task_spool_dir, task_spool_dir);
 
526
         sge_mkdir(task_spool_dir, 0755, 0, 0);
 
527
      }
 
528
 
 
529
      {
 
530
         lList *tmp_task_list = NULL;
 
531
 
 
532
         lXchgList(ja_task, JAT_task_list, &tmp_task_list);
 
533
         ret = lWriteElemToDisk(ja_task, tmp_task_spool_file, NULL, "ja_task");
 
534
         lXchgList(ja_task, JAT_task_list, &tmp_task_list);
 
535
         if (!ret && (rename(tmp_task_spool_file, task_spool_file) == -1)) {
 
536
            DTRACE;
 
537
            goto error;
 
538
         }
 
539
      }
 
540
 
 
541
      if (pe_task_id) {
 
542
         next_pe_task = lGetElemStr(pe_task_list, PET_id, pe_task_id);
 
543
      } else {
 
544
         next_pe_task = lFirst(pe_task_list);
 
545
      }
 
546
      while ((pe_task = next_pe_task)) {
 
547
         char pe_task_spool_file[SGE_PATH_MAX];
 
548
         char tmp_pe_task_spool_file[SGE_PATH_MAX];
 
549
         const char* pe_task_id_string = lGetString(pe_task, PET_id);
 
550
 
 
551
         if (pe_task_id) {
 
552
            next_pe_task = NULL;
 
553
         } else {
 
554
            next_pe_task = lNext(pe_task);
 
555
         }
 
556
 
 
557
         DTRACE;
 
558
 
 
559
         sge_get_file_path(pe_task_spool_file, PE_TASK_SPOOL_FILE, 
 
560
                           FORMAT_DEFAULT, flags, job_id, ja_task_id, 
 
561
                           pe_task_id_string);
 
562
         sge_get_file_path(tmp_pe_task_spool_file, PE_TASK_SPOOL_FILE, 
 
563
                           FORMAT_DOT_FILENAME, flags, job_id, ja_task_id, 
 
564
                           pe_task_id_string);
 
565
 
 
566
         ret = lWriteElemToDisk(pe_task, tmp_pe_task_spool_file, 
 
567
                                NULL, "pe_task");
 
568
         if (!ret && 
 
569
             (rename(tmp_pe_task_spool_file, pe_task_spool_file) == -1)) {
 
570
            DTRACE;
 
571
            goto error;
 
572
         }
 
573
   
 
574
         DTRACE;
 
575
      }
 
576
   } else {
 
577
      char task_spool_dir[SGE_PATH_MAX];
 
578
      char task_spool_file[SGE_PATH_MAX];
 
579
      char tmp_task_spool_file[SGE_PATH_MAX];
 
580
 
 
581
      sge_get_file_path(task_spool_dir, TASKS_SPOOL_DIR, FORMAT_DEFAULT, flags,
 
582
                        job_id, lGetUlong(ja_task, JAT_task_number), NULL);
 
583
      sge_get_file_path(task_spool_file, TASK_SPOOL_DIR_AS_FILE, 
 
584
                        FORMAT_DEFAULT, flags, job_id, 
 
585
                        lGetUlong(ja_task, JAT_task_number), NULL);
 
586
      sge_get_file_path(tmp_task_spool_file, TASK_SPOOL_DIR_AS_FILE, 
 
587
                        FORMAT_DOT_FILENAME, flags, job_id, 
 
588
                        lGetUlong(ja_task, JAT_task_number), NULL);
 
589
 
 
590
      if ((flags & SPOOL_WITHIN_EXECD) ||
 
591
          strcmp(old_task_spool_dir, task_spool_dir)) {
 
592
         strcpy(old_task_spool_dir, task_spool_dir);
 
593
         sge_mkdir(task_spool_dir, 0755, 0, 0);
 
594
      }
 
595
 
 
596
      ret = lWriteElemToDisk(ja_task, tmp_task_spool_file, NULL, "ja_task");
 
597
      if (!ret && (rename(tmp_task_spool_file, task_spool_file) == -1)) {
 
598
         DTRACE;
 
599
         goto error;
 
600
      }    
 
601
   }
 
602
 
 
603
error:
 
604
   DEXIT;
 
605
   return ret;
 
606
}
 
607
 
 
608
int job_remove_spool_file(u_long32 jobid, u_long32 ja_taskid, 
 
609
                          const char *pe_task_id,
 
610
                          sge_spool_flags_t flags)
 
611
{
 
612
   char spool_dir[SGE_PATH_MAX] = "";
 
613
   char spool_dir_second[SGE_PATH_MAX] = "";
 
614
   char spool_dir_third[SGE_PATH_MAX] = "";
 
615
   char spoolpath_common[SGE_PATH_MAX] = "";
 
616
   int within_execd = flags & SPOOL_WITHIN_EXECD;
 
617
   int handle_as_zombie = flags & SPOOL_HANDLE_AS_ZOMBIE;
 
618
   int one_file;
 
619
   lList *master_list = handle_as_zombie ? 
 
620
                        *(object_type_get_master_list(SGE_TYPE_ZOMBIE)) : 
 
621
                        *(object_type_get_master_list(SGE_TYPE_JOB));
 
622
   lListElem *job = job_list_locate(master_list, jobid);
 
623
   int try_to_remove_sub_dirs = 0;
 
624
 
 
625
   DENTER(TOP_LAYER, "job_remove_spool_file");
 
626
 
 
627
   one_file = job_has_to_spool_one_file(job, *object_type_get_master_list(SGE_TYPE_PE), 
 
628
                                         flags);
 
629
   if (ja_taskid != 0 && pe_task_id != NULL && !one_file) {
 
630
       char pe_task_spool_file[SGE_PATH_MAX];
 
631
 
 
632
       sge_get_file_path(pe_task_spool_file, PE_TASK_SPOOL_FILE, 
 
633
                         FORMAT_DEFAULT, flags, jobid, ja_taskid, pe_task_id);
 
634
      
 
635
       DPRINTF(("try to remove "SFN"\n", pe_task_spool_file));
 
636
       if (sge_is_file(pe_task_spool_file) &&
 
637
           !sge_unlink(NULL, pe_task_spool_file)) {
 
638
         ERROR((SGE_EVENT, MSG_JOB_CANNOT_REMOVE_SS, 
 
639
                MSG_JOB_PE_TASK_SPOOL_FILE, pe_task_spool_file));
 
640
         DTRACE;
 
641
      }
 
642
   }
 
643
 
 
644
   if (ja_taskid != 0 && pe_task_id == NULL && !one_file) {
 
645
      char task_spool_dir[SGE_PATH_MAX];
 
646
      char task_spool_file[SGE_PATH_MAX];
 
647
      int remove_task_spool_file = 0;
 
648
 
 
649
      sge_get_file_path(task_spool_dir, TASKS_SPOOL_DIR, FORMAT_DEFAULT, flags,
 
650
                        jobid, ja_taskid, NULL);
 
651
      sge_get_file_path(task_spool_file, TASK_SPOOL_DIR_AS_FILE, 
 
652
                        FORMAT_DEFAULT, flags, jobid, ja_taskid, NULL);
 
653
 
 
654
      if (within_execd) {
 
655
         remove_task_spool_file = 1;
 
656
      } else {
 
657
         remove_task_spool_file = job_is_enrolled(job, ja_taskid);
 
658
      }
 
659
      DPRINTF(("remove_task_spool_file = %d\n", remove_task_spool_file));;
 
660
 
 
661
      if (remove_task_spool_file) {
 
662
         DPRINTF(("removing "SFN"\n", task_spool_file));
 
663
         
 
664
         if (sge_is_directory(task_spool_file)) {
 
665
            dstring task_spool_file_msg;
 
666
            char task_spool_file_msg_buffer[SGE_PATH_MAX];
 
667
            
 
668
            sge_dstring_init(&task_spool_file_msg, task_spool_file_msg_buffer,
 
669
                             sizeof(task_spool_file_msg_buffer));
 
670
            if (sge_rmdir(task_spool_file, &task_spool_file_msg)) {
 
671
               ERROR((SGE_EVENT, MSG_JOB_CANNOT_REMOVE_SS, 
 
672
                      MSG_JOB_TASK_SPOOL_FILE, task_spool_file_msg_buffer));
 
673
               DTRACE;
 
674
            } 
 
675
         } else {
 
676
            if (!sge_unlink(NULL, task_spool_file)) {
 
677
               ERROR((SGE_EVENT, MSG_JOB_CANNOT_REMOVE_SS, 
 
678
                      MSG_JOB_TASK_SPOOL_FILE, task_spool_file));
 
679
               DTRACE;
 
680
            }
 
681
         }
 
682
 
 
683
         /*
 
684
          * Following rmdir call may fail. We can ignore this error.
 
685
          * This is only an indicator that another task is running which has 
 
686
          * been spooled in the directory.
 
687
          */  
 
688
         DPRINTF(("try to remove "SFN"\n", task_spool_dir));
 
689
         rmdir(task_spool_dir);
 
690
 
 
691
         /* 
 
692
          * a task spool directory has been removed: reinit 
 
693
          * old_task_spool_dir to ensure mkdir() is performed 
 
694
          */
 
695
         old_task_spool_dir[0] = '\0';
 
696
      }
 
697
   }
 
698
 
 
699
   sge_get_file_path(spool_dir, JOB_SPOOL_DIR, 
 
700
                     FORMAT_DEFAULT, flags, jobid, ja_taskid, NULL);
 
701
   sge_get_file_path(spool_dir_third, JOB_SPOOL_DIR, 
 
702
                     FORMAT_THIRD_PART, flags, jobid, ja_taskid, NULL);
 
703
   sge_get_file_path(spool_dir_second, JOB_SPOOL_DIR, 
 
704
                     FORMAT_SECOND_PART, flags, jobid, ja_taskid, NULL);
 
705
   sge_get_file_path(spoolpath_common, JOB_SPOOL_FILE, 
 
706
                     FORMAT_DEFAULT, flags, jobid, ja_taskid, NULL);
 
707
   try_to_remove_sub_dirs = 0;
 
708
   if (!one_file) {
 
709
      if (ja_taskid == 0) { 
 
710
         DPRINTF(("removing "SFN"\n", spoolpath_common));
 
711
         if (!sge_unlink(NULL, spoolpath_common)) {
 
712
            ERROR((SGE_EVENT, MSG_JOB_CANNOT_REMOVE_SS, 
 
713
                   MSG_JOB_JOB_SPOOL_FILE, spoolpath_common)); 
 
714
            DTRACE;
 
715
         }
 
716
         DPRINTF(("removing "SFN"\n", spool_dir));
 
717
         if (sge_rmdir(spool_dir, NULL)) {
 
718
            ERROR((SGE_EVENT, MSG_JOB_CANNOT_REMOVE_SS, 
 
719
                   MSG_JOB_JOB_SPOOL_DIRECTORY, spool_dir));
 
720
            DTRACE;
 
721
         }
 
722
         try_to_remove_sub_dirs = 1;
 
723
      }
 
724
   } else {
 
725
      DPRINTF(("removing "SFN"\n", spool_dir));
 
726
      if (!sge_unlink(NULL, spool_dir)) {
 
727
         ERROR((SGE_EVENT, MSG_JOB_CANNOT_REMOVE_SS, MSG_JOB_JOB_SPOOL_FILE,
 
728
                spool_dir));
 
729
         DTRACE;
 
730
      }
 
731
      try_to_remove_sub_dirs = 1;
 
732
   }
 
733
   /*
 
734
    * Following rmdir calls may fail. We can ignore these errors.
 
735
    * This is only an indicator that another job is running which has been
 
736
    * spooled in the same directory.
 
737
    */
 
738
   if (try_to_remove_sub_dirs) {
 
739
      DPRINTF(("try to remove "SFN"\n", spool_dir_third));
 
740
      if (!rmdir(spool_dir_third)) {
 
741
         DPRINTF(("try to remove "SFN"\n", spool_dir_second));
 
742
         rmdir(spool_dir_second); 
 
743
      }
 
744
   }
 
745
 
 
746
   DEXIT;
 
747
   return 0;
 
748
}
 
749
 
 
750
static int job_remove_script_file(u_long32 job_id)
 
751
{
 
752
   char script_file[SGE_PATH_MAX] = "";
 
753
   int ret = 0;
 
754
   DENTER(TOP_LAYER, "job_remove_script_file");
 
755
 
 
756
   PROF_START_MEASUREMENT(SGE_PROF_JOBSCRIPT);
 
757
   sge_get_file_path(script_file, JOB_SCRIPT_FILE, FORMAT_DEFAULT,
 
758
                     SPOOL_DEFAULT, job_id, 0, NULL); 
 
759
   if (unlink(script_file)) {
 
760
      if (errno!=ENOENT) {
 
761
         ERROR((SGE_EVENT, MSG_CONFIG_FAILEDREMOVINGSCRIPT_SS,
 
762
              strerror(errno), script_file)); 
 
763
         DTRACE;
 
764
         ret = 1;
 
765
      }
 
766
   } else {
 
767
      INFO((SGE_EVENT, MSG_CONFIG_REMOVEDSCRIPTOFBADJOBFILEX_S, script_file));
 
768
   }
 
769
   PROF_STOP_MEASUREMENT(SGE_PROF_JOBSCRIPT);
 
770
   DEXIT;
 
771
   return ret;
 
772
}
 
773
 
 
774
int job_list_read_from_disk(lList **job_list, char *list_name, int check,
 
775
                                sge_spool_flags_t flags, 
 
776
                                int (*init_function)(lListElem*)) 
 
777
{
 
778
   char first_dir[SGE_PATH_MAX] = ""; 
 
779
   lList *first_direnties; 
 
780
   lListElem *first_direntry;
 
781
   char path[SGE_PATH_MAX];
 
782
   int handle_as_zombie = (flags & SPOOL_HANDLE_AS_ZOMBIE) > 0;
 
783
 
 
784
   DENTER(TOP_LAYER, "job_read_job_list_from_disk"); 
 
785
   sge_get_file_path(first_dir, JOBS_SPOOL_DIR, FORMAT_FIRST_PART, 
 
786
                     flags, 0, 0, NULL);  
 
787
   first_direnties = sge_get_dirents(first_dir);
 
788
 
 
789
   if (first_direnties && !sge_silent_get()) {
 
790
      printf(MSG_CONFIG_READINGIN_S, list_name);
 
791
      printf("\n");
 
792
   }
 
793
 
 
794
   sge_status_set_type(STATUS_DOTS);
 
795
   for (;
 
796
        (first_direntry = lFirst(first_direnties)); 
 
797
        lRemoveElem(first_direnties, &first_direntry)) {
 
798
      char second_dir[SGE_PATH_MAX] = "";
 
799
      lList *second_direnties;
 
800
      lListElem *second_direntry;
 
801
      const char *first_entry_string;
 
802
 
 
803
 
 
804
      first_entry_string = lGetString(first_direntry, ST_name);
 
805
      sprintf(path, "%s/%s", first_dir, first_entry_string);
 
806
      if (!sge_is_directory(path)) {
 
807
         ERROR((SGE_EVENT, MSG_CONFIG_NODIRECTORY_S, path)); 
 
808
         break;
 
809
      }
 
810
   
 
811
      sprintf(second_dir, SFN"/"SFN, first_dir, first_entry_string); 
 
812
      second_direnties = sge_get_dirents(second_dir);
 
813
      for (;
 
814
           (second_direntry = lFirst(second_direnties));
 
815
           lRemoveElem(second_direnties, &second_direntry)) {
 
816
         char third_dir[SGE_PATH_MAX] = "";
 
817
         lList *third_direnties;
 
818
         lListElem *third_direntry;
 
819
         const char *second_entry_string;
 
820
 
 
821
         second_entry_string = lGetString(second_direntry, ST_name);
 
822
         sprintf(path, "%s/%s/%s", first_dir, first_entry_string,
 
823
                 second_entry_string);
 
824
         if (!sge_is_directory(path)) {
 
825
            ERROR((SGE_EVENT, MSG_CONFIG_NODIRECTORY_S, path));
 
826
            break;
 
827
         } 
 
828
 
 
829
         sprintf(third_dir, SFN"/"SFN, second_dir, second_entry_string);
 
830
         third_direnties = sge_get_dirents(third_dir);
 
831
         for (;
 
832
              (third_direntry = lFirst(third_direnties));
 
833
              lRemoveElem(third_direnties, &third_direntry)) {
 
834
            lListElem *job, *ja_task;
 
835
            char *lasts = NULL;
 
836
            char job_dir[SGE_PATH_MAX] = "";
 
837
            char fourth_dir[SGE_PATH_MAX] = "";
 
838
            char job_id_string[SGE_PATH_MAX] = "";
 
839
            char *ja_task_id_string;
 
840
            u_long32 job_id, ja_task_id;
 
841
            int all_finished;
 
842
 
 
843
            sge_status_next_turn();
 
844
            sprintf(fourth_dir, SFN"/"SFN, third_dir,
 
845
                    lGetString(third_direntry, ST_name));
 
846
            sprintf(job_id_string, SFN SFN SFN, 
 
847
                    lGetString(first_direntry, ST_name),
 
848
                    lGetString(second_direntry, ST_name),
 
849
                    lGetString(third_direntry, ST_name)); 
 
850
            job_id = (u_long32) strtol(job_id_string, NULL, 10);
 
851
            strtok_r(job_id_string, ".", &lasts);
 
852
            ja_task_id_string = strtok_r(NULL, ".", &lasts);
 
853
            if (ja_task_id_string) {
 
854
               ja_task_id = (u_long32) strtol(ja_task_id_string, NULL, 10);
 
855
            } else {
 
856
               ja_task_id = 0;
 
857
            }
 
858
            sge_get_file_path(job_dir, JOB_SPOOL_DIR, FORMAT_DEFAULT,
 
859
                              flags, job_id, ja_task_id, NULL);
 
860
 
 
861
            /* check directory name */
 
862
            if (strcmp(fourth_dir, job_dir)) {
 
863
               fprintf(stderr, "%s %s\n", fourth_dir, job_dir);
 
864
               DPRINTF(("Invalid directory "SFN"\n", fourth_dir));
 
865
               continue;
 
866
            }
 
867
 
 
868
            /* read job */
 
869
            job = job_create_from_file(job_id, ja_task_id, flags);
 
870
            if (!job) {
 
871
               job_remove_script_file(job_id);
 
872
               continue;
 
873
            }
 
874
 
 
875
            /* check for scriptfile before adding job */
 
876
            all_finished = 1;
 
877
            for_each (ja_task, lGetList(job, JB_ja_tasks)) {
 
878
               if (lGetUlong(ja_task, JAT_status) != JFINISHED) {
 
879
                  all_finished = 0;
 
880
                  break;
 
881
               }
 
882
            }
 
883
            if (check && !all_finished && lGetString(job, JB_script_file)) {
 
884
               char script_file[SGE_PATH_MAX];
 
885
               SGE_STRUCT_STAT stat_buffer;
 
886
 
 
887
               sge_get_file_path(script_file, JOB_SCRIPT_FILE, FORMAT_DEFAULT,
 
888
                                 flags, job_id, 0, NULL);
 
889
               if (SGE_STAT(script_file, &stat_buffer)) {
 
890
                  ERROR((SGE_EVENT, MSG_CONFIG_CANTFINDSCRIPTFILE_U,
 
891
                         sge_u32c(lGetUlong(job, JB_job_number))));
 
892
                  job_list_add_job(object_type_get_master_list(SGE_TYPE_JOB), "job list", job, 0);
 
893
                  job_remove_spool_file(job_id, 0, NULL, SPOOL_DEFAULT);
 
894
                  lRemoveElem( *(object_type_get_master_list(SGE_TYPE_JOB)), &job);
 
895
                  continue;
 
896
               }
 
897
            }  
 
898
 
 
899
            /* check if filename has same name which is stored job id */
 
900
            if (lGetUlong(job, JB_job_number) != job_id) {
 
901
               ERROR((SGE_EVENT, MSG_CONFIG_JOBFILEXHASWRONGFILENAMEDELETING_U,
 
902
                     sge_u32c(job_id)));
 
903
               job_remove_spool_file(job_id, 0, NULL, flags);
 
904
               /* 
 
905
                * script is not deleted here, 
 
906
                * since it may belong to a valid job 
 
907
                */
 
908
            } 
 
909
 
 
910
            if (init_function) {
 
911
               init_function(job);
 
912
            }
 
913
 
 
914
            lSetList(job, JB_jid_successor_list, NULL); 
 
915
            job_list_add_job(job_list, list_name, job, 0);
 
916
            
 
917
            if (!handle_as_zombie) {
 
918
               job_list_register_new_job(*(object_type_get_master_list(SGE_TYPE_JOB)), mconf_get_max_jobs(), 1);
 
919
               suser_register_new_job(job, mconf_get_max_u_jobs(), 1);
 
920
            }
 
921
         }
 
922
         lFreeList(&third_direnties);
 
923
      }
 
924
      lFreeList(&second_direnties);
 
925
   } 
 
926
   lFreeList(&first_direnties);
 
927
 
 
928
   if (*job_list) {
 
929
      sge_status_end_turn();
 
930
   }      
 
931
 
 
932
   DEXIT;
 
933
   return 0;
 
934
}