~ubuntu-branches/ubuntu/vivid/slurm-llnl/vivid

« back to all changes in this revision

Viewing changes to src/slurmctld/job_scheduler.c

  • Committer: Bazaar Package Importer
  • Author(s): Gennaro Oliva
  • Date: 2009-09-24 23:28:15 UTC
  • mfrom: (1.1.11 upstream) (3.2.4 sid)
  • Revision ID: james.westby@ubuntu.com-20090924232815-enh65jn32q1ebg07
Tags: 2.0.5-1
* New upstream release 
* Changed dependecy from lib-mysqlclient15 to lib-mysqlclient 
* Added Default-Start for runlevel 2 and 4 and $remote_fs requirement in
  init.d scripts (Closes: #541252)
* Postinst checks for wrong runlevels 2 and 4 links
* Upgraded to standard version 3.8.3
* Add lintian overrides for missing slurm-llnl-configurator.html in doc
  base registration
* modified postrm scripts to ignore pkill return value in order to avoid
  postrm failure when no slurm process is running
* Checking for slurmctld.pid before cancelling running and pending
  jobs during package removal 

Show diffs side-by-side

added added

removed removed

Lines of Context:
3
3
 *      Note there is a global job list (job_list)
4
4
 *****************************************************************************
5
5
 *  Copyright (C) 2002-2007 The Regents of the University of California.
6
 
 *  Copyright (C) 2008 Lawrence Livermore National Security.
 
6
 *  Copyright (C) 2008-2009 Lawrence Livermore National Security.
7
7
 *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
8
8
 *  Written by Morris Jette <jette1@llnl.gov>
9
 
 *  LLNL-CODE-402394.
 
9
 *  CODE-OCEC-09-009. All rights reserved.
10
10
 *  
11
11
 *  This file is part of SLURM, a resource management program.
12
 
 *  For details, see <http://www.llnl.gov/linux/slurm/>.
 
12
 *  For details, see <https://computing.llnl.gov/linux/slurm/>.
 
13
 *  Please also read the included file: DISCLAIMER.
13
14
 *  
14
15
 *  SLURM is free software; you can redistribute it and/or modify it under
15
16
 *  the terms of the GNU General Public License as published by the Free
48
49
#include <unistd.h>
49
50
 
50
51
#include "src/common/assoc_mgr.h"
 
52
#include "src/common/env.h"
51
53
#include "src/common/list.h"
52
54
#include "src/common/macros.h"
53
55
#include "src/common/node_select.h"
54
56
#include "src/common/slurm_accounting_storage.h"
 
57
#include "src/common/uid.h"
55
58
#include "src/common/xassert.h"
56
59
#include "src/common/xstring.h"
57
60
 
61
64
#include "src/slurmctld/licenses.h"
62
65
#include "src/slurmctld/locks.h"
63
66
#include "src/slurmctld/node_scheduler.h"
 
67
#include "src/slurmctld/reservation.h"
64
68
#include "src/slurmctld/slurmctld.h"
65
69
#include "src/slurmctld/srun_comm.h"
66
70
 
67
71
#define _DEBUG 0
68
72
#define MAX_RETRIES 10
69
73
 
70
 
static void _depend_list_del(void *dep_ptr);
71
 
static void _feature_list_delete(void *x);
72
 
static int  _valid_feature_list(uint32_t job_id, List feature_list);
73
 
static int  _valid_node_feature(char *feature);
74
 
static char **_xduparray(uint16_t size, char ** array);
 
74
static char **  _build_env(struct job_record *job_ptr);
 
75
static void     _depend_list_del(void *dep_ptr);
 
76
static void     _feature_list_delete(void *x);
 
77
static void *   _run_epilog(void *arg);
 
78
static void *   _run_prolog(void *arg);
 
79
static int      _valid_feature_list(uint32_t job_id, List feature_list);
 
80
static int      _valid_node_feature(char *feature);
 
81
static char **  _xduparray(uint16_t size, char ** array);
75
82
 
76
83
 
77
84
/*
134
141
        int job_buffer_size, job_queue_size;
135
142
        struct job_queue *my_job_queue;
136
143
 
 
144
        if (job_list == NULL)
 
145
                return 0;
 
146
 
137
147
        /* build list pending jobs */
138
148
        job_buffer_size = job_queue_size = 0;
139
149
        job_queue[0] = my_job_queue = NULL;
175
185
        bool completing = false;
176
186
        ListIterator job_iterator;
177
187
        struct job_record *job_ptr = NULL;
178
 
        time_t recent = time(NULL) - (slurmctld_conf.kill_wait + 2);
 
188
        uint16_t complete_wait = slurm_get_complete_wait();
 
189
        time_t recent;
179
190
 
180
 
        if (!job_list)
 
191
        if ((job_list == NULL) || (complete_wait == 0))
181
192
                return completing;
182
193
 
 
194
        recent = time(NULL) - complete_wait;
183
195
        job_iterator = list_iterator_create(job_list);
184
196
        while ((job_ptr = (struct job_record *) list_next(job_iterator))) {
185
197
                if ((job_ptr->job_state & JOB_COMPLETING) &&
224
236
                    ((job_ptr->details->max_nodes < part_ptr->min_nodes) ||
225
237
                     (job_ptr->details->min_nodes > part_ptr->max_nodes)))
226
238
                        continue;
 
239
                /* Job's eligible time is set in job_independent() */
227
240
                if (!job_independent(job_ptr))
228
241
                        continue;
229
242
        }
274
287
        static bool backfill_sched = false;
275
288
        static bool sched_test = false;
276
289
        static bool wiki_sched = false;
 
290
        static int sched_timeout = 0;
277
291
        time_t now = time(NULL);
278
292
 
279
293
        DEF_TIMERS;
 
294
        
 
295
        if(!sched_timeout)
 
296
                sched_timeout = MIN(slurm_get_msg_timeout(), 10);
280
297
 
281
298
        START_TIMER;
282
299
        if (!sched_test) {
294
311
 
295
312
        lock_slurmctld(job_write_lock);
296
313
        /* Avoid resource fragmentation if important */
297
 
        if ((!wiki_sched) && switch_no_frag() && job_is_completing()) {
 
314
        if ((!wiki_sched) && job_is_completing()) {
298
315
                unlock_slurmctld(job_write_lock);
299
316
                debug("schedule() returning, some job still completing");
300
317
                return SLURM_SUCCESS;
316
333
                if (job_ptr->priority == 0)     /* held */
317
334
                        continue;
318
335
 
319
 
                if (_failed_partition(job_ptr->part_ptr, failed_parts, 
 
336
                if ((job_ptr->resv_name == NULL) &&
 
337
                    _failed_partition(job_ptr->part_ptr, failed_parts, 
320
338
                                      failed_part_cnt)) {
321
339
                        job_ptr->state_reason = WAIT_PRIORITY;
 
340
                        xfree(job_ptr->state_desc);
322
341
                        continue;
323
342
                }
324
343
                if (bit_overlap(avail_node_bitmap, 
330
349
                }
331
350
                if (license_job_test(job_ptr) != SLURM_SUCCESS) {
332
351
                        job_ptr->state_reason = WAIT_LICENSES;
 
352
                        xfree(job_ptr->state_desc);
333
353
                        continue;
334
354
                }
335
355
 
336
 
                if (assoc_mgr_validate_assoc_id(acct_db_conn, job_ptr->assoc_id,
 
356
                if (assoc_mgr_validate_assoc_id(acct_db_conn, 
 
357
                                                job_ptr->assoc_id,
337
358
                                                accounting_enforce)) {
338
359
                        /* NOTE: This only happens if a user's account is 
339
360
                         * disabled between when the job was submitted and 
345
366
                        job_ptr->job_state = JOB_FAILED;
346
367
                        job_ptr->exit_code = 1;
347
368
                        job_ptr->state_reason = FAIL_BANK_ACCOUNT;
 
369
                        xfree(job_ptr->state_desc);
348
370
                        job_ptr->start_time = job_ptr->end_time = time(NULL);
349
371
                        job_completion_logger(job_ptr);
350
372
                        delete_job_details(job_ptr);
374
396
                                        job_ptr->part_ptr->node_bitmap);
375
397
                                bit_not(job_ptr->part_ptr->node_bitmap);
376
398
                        }
 
399
                } else if (error_code == ESLURM_RESERVATION_NOT_USABLE) {
 
400
                        if (job_ptr->resv_ptr 
 
401
                            && job_ptr->resv_ptr->node_bitmap) {
 
402
                                bit_not(job_ptr->resv_ptr->node_bitmap);
 
403
                                bit_and(avail_node_bitmap, 
 
404
                                        job_ptr->resv_ptr->node_bitmap);
 
405
                                bit_not(job_ptr->resv_ptr->node_bitmap);
 
406
                        } else {
 
407
                                /* The job has no reservation but requires
 
408
                                 * nodes that are currently in some reservation
 
409
                                 * so just skip over this job and try running
 
410
                                 * the next lower priority job */
 
411
                        }
377
412
                } else if (error_code == SLURM_SUCCESS) {       
378
413
                        /* job initiated */
379
414
                        last_job_update = now;
394
429
                        info("schedule: JobId=%u NodeList=%s",
395
430
                             job_ptr->job_id, job_ptr->nodes);
396
431
#endif
397
 
                        if (job_ptr->batch_flag)
 
432
                        if (job_ptr->batch_flag == 0)
 
433
                                srun_allocate(job_ptr->job_id);
 
434
                        else if (job_ptr->details->prolog_running == 0)
398
435
                                launch_job(job_ptr);
399
 
                        else
400
 
                                srun_allocate(job_ptr->job_id);
401
436
                        job_cnt++;
402
437
                } else if ((error_code !=
403
438
                            ESLURM_REQUESTED_PART_CONFIG_UNAVAILABLE)
410
445
                                job_ptr->job_state = JOB_FAILED;
411
446
                                job_ptr->exit_code = 1;
412
447
                                job_ptr->state_reason = FAIL_BAD_CONSTRAINTS;
 
448
                                xfree(job_ptr->state_desc);
413
449
                                job_ptr->start_time = job_ptr->end_time = now;
414
450
                                job_completion_logger(job_ptr);
415
451
                                delete_job_details(job_ptr);
416
452
                        }
417
453
                }
 
454
                
 
455
                if((time(NULL) - now) >= sched_timeout) {
 
456
                        debug("schedule: loop taking to long breaking out");
 
457
                        break;
 
458
                }
418
459
        }
419
460
 
420
461
        bit_free(avail_node_bitmap);
464
505
                tmp_part_prio = job_queue[i].part_priority;
465
506
 
466
507
                job_queue[i].job_ptr       = job_queue[top_prio_inx].job_ptr;
467
 
                job_queue[i].job_priority  = job_queue[top_prio_inx].job_priority;
468
 
                job_queue[i].part_priority = job_queue[top_prio_inx].part_priority;
 
508
                job_queue[i].job_priority  = job_queue[top_prio_inx].
 
509
                                             job_priority;
 
510
 
469
511
 
470
512
                job_queue[top_prio_inx].job_ptr       = tmp_job_ptr;
471
513
                job_queue[top_prio_inx].job_priority  = tmp_job_prio;
500
542
        launch_msg_ptr->overcommit = job_ptr->details->overcommit;
501
543
        launch_msg_ptr->open_mode  = job_ptr->details->open_mode;
502
544
        launch_msg_ptr->acctg_freq = job_ptr->details->acctg_freq;
 
545
        launch_msg_ptr->cpus_per_task = job_ptr->details->cpus_per_task;
 
546
        launch_msg_ptr->restart_cnt   = job_ptr->restart_cnt;
503
547
 
504
548
        if (make_batch_job_cred(launch_msg_ptr, job_ptr)) {
505
549
                error("aborting batch job %u", job_ptr->job_id);
517
561
        launch_msg_ptr->in = xstrdup(job_ptr->details->in);
518
562
        launch_msg_ptr->out = xstrdup(job_ptr->details->out);
519
563
        launch_msg_ptr->work_dir = xstrdup(job_ptr->details->work_dir);
 
564
        launch_msg_ptr->ckpt_dir = xstrdup(job_ptr->details->ckpt_dir);
 
565
        launch_msg_ptr->restart_dir = xstrdup(job_ptr->details->restart_dir);
520
566
        launch_msg_ptr->argc = job_ptr->details->argc;
521
567
        launch_msg_ptr->argv = _xduparray(job_ptr->details->argc,
522
568
                                        job_ptr->details->argv);
524
570
        launch_msg_ptr->environment =
525
571
            get_job_env(job_ptr, &launch_msg_ptr->envc);
526
572
        launch_msg_ptr->job_mem = job_ptr->details->job_min_memory;
527
 
        launch_msg_ptr->num_cpu_groups = job_ptr->num_cpu_groups;
528
 
        launch_msg_ptr->cpus_per_node  = xmalloc(sizeof(uint32_t) *
529
 
                        job_ptr->num_cpu_groups);
530
 
        memcpy(launch_msg_ptr->cpus_per_node, job_ptr->cpus_per_node,
531
 
                        (sizeof(uint32_t) * job_ptr->num_cpu_groups));
 
573
 
 
574
        launch_msg_ptr->num_cpu_groups = job_ptr->select_job->cpu_array_cnt;
 
575
        launch_msg_ptr->cpus_per_node  = xmalloc(sizeof(uint16_t) *
 
576
                        job_ptr->select_job->cpu_array_cnt);
 
577
        memcpy(launch_msg_ptr->cpus_per_node, 
 
578
               job_ptr->select_job->cpu_array_value,
 
579
               (sizeof(uint16_t) * job_ptr->select_job->cpu_array_cnt));
532
580
        launch_msg_ptr->cpu_count_reps  = xmalloc(sizeof(uint32_t) *
533
 
                        job_ptr->num_cpu_groups);
534
 
        memcpy(launch_msg_ptr->cpu_count_reps, job_ptr->cpu_count_reps,
535
 
                        (sizeof(uint32_t) * job_ptr->num_cpu_groups));
 
581
                        job_ptr->select_job->cpu_array_cnt);
 
582
        memcpy(launch_msg_ptr->cpu_count_reps, 
 
583
               job_ptr->select_job->cpu_array_reps,
 
584
               (sizeof(uint32_t) * job_ptr->select_job->cpu_array_cnt));
536
585
 
537
586
        launch_msg_ptr->select_jobinfo = select_g_copy_jobinfo(
538
587
                        job_ptr->select_jobinfo);
574
623
                               struct job_record *job_ptr)
575
624
{
576
625
        slurm_cred_arg_t cred_arg;
 
626
        select_job_res_t select_ptr;
577
627
 
578
628
        cred_arg.jobid     = launch_msg_ptr->job_id;
579
629
        cred_arg.stepid    = launch_msg_ptr->step_id;
586
636
        if (job_ptr->details == NULL)
587
637
                cred_arg.job_mem = 0;
588
638
        else if (job_ptr->details->job_min_memory & MEM_PER_CPU) {
 
639
                xassert(job_ptr->select_job);
 
640
                xassert(job_ptr->select_job->cpus);
589
641
                cred_arg.job_mem = job_ptr->details->job_min_memory;
590
642
                cred_arg.job_mem &= (~MEM_PER_CPU);
591
 
                cred_arg.job_mem *= job_ptr->alloc_lps[0];
 
643
                cred_arg.job_mem *= job_ptr->select_job->cpus[0];
592
644
        } else
593
645
                cred_arg.job_mem = job_ptr->details->job_min_memory;
594
646
 
595
 
        cred_arg.alloc_lps_cnt = 0;
596
 
        cred_arg.alloc_lps = NULL;
 
647
        /* Identify the cores allocated to this job. */
 
648
        xassert(job_ptr->select_job);
 
649
        select_ptr = job_ptr->select_job;
 
650
        cred_arg.core_bitmap         = select_ptr->core_bitmap;
 
651
        cred_arg.cores_per_socket    = select_ptr->cores_per_socket;
 
652
        cred_arg.sockets_per_node    = select_ptr->sockets_per_node;
 
653
        cred_arg.sock_core_rep_count = select_ptr->sock_core_rep_count;
 
654
        cred_arg.job_nhosts          = select_ptr->nhosts;
 
655
        cred_arg.job_hostlist        = job_ptr->nodes;
597
656
 
598
657
        launch_msg_ptr->cred = slurm_cred_create(slurmctld_config.cred_ctx,
599
658
                         &cred_arg);
885
944
{
886
945
        struct job_record *job_ptr;
887
946
        struct part_record *part_ptr;
888
 
        bitstr_t *avail_bitmap = NULL;
 
947
        bitstr_t *avail_bitmap = NULL, *resv_bitmap = NULL;
889
948
        uint32_t min_nodes, max_nodes, req_nodes;
890
 
        int rc = SLURM_SUCCESS;
 
949
        int i, rc = SLURM_SUCCESS;
 
950
        time_t now = time(NULL), start_res;
891
951
 
892
952
        job_ptr = find_job_record(job_desc_msg->job_id);
893
953
        if (job_ptr == NULL)
904
964
        if ((job_desc_msg->req_nodes == NULL) || 
905
965
            (job_desc_msg->req_nodes == '\0')) {
906
966
                /* assume all nodes available to job for testing */
907
 
                avail_bitmap = bit_copy(avail_node_bitmap);
 
967
                avail_bitmap = bit_alloc(node_record_count);
 
968
                bit_nset(avail_bitmap, 0, (node_record_count - 1));
908
969
        } else if (node_name2bitmap(job_desc_msg->req_nodes, false, 
909
970
                                    &avail_bitmap) != 0) {
910
971
                return ESLURM_INVALID_NODE_NAME;
911
972
        }
912
973
 
913
 
        /* Only consider nodes that are not DOWN or DRAINED */
914
 
        bit_and(avail_bitmap, avail_node_bitmap);
915
 
 
916
974
        /* Consider only nodes in this job's partition */
917
975
        if (part_ptr->node_bitmap)
918
976
                bit_and(avail_bitmap, part_ptr->node_bitmap);
919
977
        else
920
978
                rc = ESLURM_REQUESTED_PART_CONFIG_UNAVAILABLE;
921
 
 
922
979
        if (job_req_node_filter(job_ptr, avail_bitmap))
923
980
                rc = ESLURM_REQUESTED_PART_CONFIG_UNAVAILABLE;
924
981
        if (job_ptr->details->exc_node_bitmap) {
937
994
                }
938
995
        }
939
996
 
 
997
        /* Enforce reservation: access control, time and nodes */
 
998
        if (job_ptr->details->begin_time)
 
999
                start_res = job_ptr->details->begin_time;
 
1000
        else
 
1001
                start_res = now;
 
1002
        i = job_test_resv(job_ptr, &start_res, false, &resv_bitmap);
 
1003
        if (i != SLURM_SUCCESS)
 
1004
                return i;
 
1005
        bit_and(avail_bitmap, resv_bitmap);
 
1006
        FREE_NULL_BITMAP(resv_bitmap);
 
1007
 
 
1008
        /* Only consider nodes that are not DOWN or DRAINED */
 
1009
        bit_and(avail_bitmap, avail_node_bitmap);
 
1010
 
940
1011
        if (rc == SLURM_SUCCESS) {
941
1012
                min_nodes = MAX(job_ptr->details->min_nodes, 
942
1013
                                part_ptr->min_nodes);
968
1039
#else
969
1040
                resp_data->proc_cnt = job_ptr->total_procs;
970
1041
#endif
971
 
                resp_data->start_time = job_ptr->start_time;
 
1042
                resp_data->start_time = MAX(job_ptr->start_time, start_res);
972
1043
                job_ptr->start_time   = 0;  /* restore pending job start time */
973
1044
                resp_data->node_list  = bitmap2node_name(avail_bitmap);
974
1045
                FREE_NULL_BITMAP(avail_bitmap);
982
1053
}
983
1054
 
984
1055
/*
 
1056
 * epilog_slurmctld - execute the prolog_slurmctld for a job that has just
 
1057
 *      terminated.
 
1058
 * IN job_ptr - pointer to job that has been terminated
 
1059
 * RET SLURM_SUCCESS(0) or error code
 
1060
 */
 
1061
extern int epilog_slurmctld(struct job_record *job_ptr)
 
1062
{
 
1063
        int rc;
 
1064
        pthread_t thread_id_epilog;
 
1065
        pthread_attr_t thread_attr_epilog;
 
1066
 
 
1067
        if ((slurmctld_conf.epilog_slurmctld == NULL) ||
 
1068
            (slurmctld_conf.epilog_slurmctld[0] == '\0'))
 
1069
                return SLURM_SUCCESS;
 
1070
 
 
1071
        if (access(slurmctld_conf.epilog_slurmctld, X_OK) < 0) {
 
1072
                error("Invalid EpilogSlurmctld: %m");
 
1073
                return errno;
 
1074
        }
 
1075
 
 
1076
        slurm_attr_init(&thread_attr_epilog);
 
1077
        pthread_attr_setdetachstate(&thread_attr_epilog, 
 
1078
                                    PTHREAD_CREATE_DETACHED);
 
1079
        while(1) {
 
1080
                rc = pthread_create(&thread_id_epilog,
 
1081
                                    &thread_attr_epilog,
 
1082
                                    _run_epilog, (void *) job_ptr);
 
1083
                if (rc == 0)
 
1084
                        return SLURM_SUCCESS;
 
1085
                if (errno == EAGAIN)
 
1086
                        continue;
 
1087
                error("pthread_create: %m");
 
1088
                return errno;
 
1089
        }
 
1090
}
 
1091
 
 
1092
static char **_build_env(struct job_record *job_ptr)
 
1093
{
 
1094
        char **my_env, *name;
 
1095
 
 
1096
        my_env = xmalloc(sizeof(char *));
 
1097
        my_env[0] = NULL;
 
1098
#ifdef HAVE_CRAY_XT
 
1099
        select_g_get_jobinfo(job_ptr->select_jobinfo, 
 
1100
                             SELECT_DATA_RESV_ID, &name);
 
1101
        setenvf(&env, "BASIL_RESERVATION_ID", "%s", name);
 
1102
        xfree(name);
 
1103
#endif
 
1104
#ifdef HAVE_BG
 
1105
        select_g_get_jobinfo(job_ptr->select_jobinfo, 
 
1106
                             SELECT_DATA_BLOCK_ID, &name);
 
1107
        setenvf(&my_env, "MPIRUN_PARTITION", "%s", name);
 
1108
#endif
 
1109
        setenvf(&my_env, "SLURM_JOB_ACCOUNT", "%s", job_ptr->account);
 
1110
        if (job_ptr->details) {
 
1111
                setenvf(&my_env, "SLURM_JOB_CONSTRAINTS", 
 
1112
                        "%s", job_ptr->details->features);
 
1113
        }
 
1114
        setenvf(&my_env, "SLURM_JOB_GID", "%u", job_ptr->group_id);
 
1115
        name = gid_to_string((uid_t) job_ptr->group_id);
 
1116
        setenvf(&my_env, "SLURM_JOB_GROUP", "%s", name);
 
1117
        xfree(name);
 
1118
        setenvf(&my_env, "SLURM_JOB_ID", "%u", job_ptr->job_id);
 
1119
        setenvf(&my_env, "SLURM_JOB_NAME", "%s", job_ptr->name);
 
1120
        setenvf(&my_env, "SLURM_JOB_NODELIST", "%s", job_ptr->nodes);
 
1121
        setenvf(&my_env, "SLURM_JOB_PARTITION", "%s", job_ptr->partition);
 
1122
        setenvf(&my_env, "SLURM_JOB_UID", "%u", job_ptr->user_id);
 
1123
        name = uid_to_string((uid_t) job_ptr->user_id);
 
1124
        setenvf(&my_env, "SLURM_JOB_USER", "%s", name);
 
1125
        xfree(name);
 
1126
 
 
1127
        return my_env;
 
1128
}
 
1129
 
 
1130
static void *_run_epilog(void *arg)
 
1131
{
 
1132
        struct job_record *job_ptr = (struct job_record *) arg;
 
1133
        uint32_t job_id;
 
1134
        pid_t cpid;
 
1135
        int i, status, wait_rc;
 
1136
        char *argv[2], **my_env;
 
1137
        /* Locks: Read config, job */
 
1138
        slurmctld_lock_t config_read_lock = { 
 
1139
                READ_LOCK, READ_LOCK, NO_LOCK, NO_LOCK };
 
1140
 
 
1141
        lock_slurmctld(config_read_lock);
 
1142
        argv[0] = xstrdup(slurmctld_conf.epilog_slurmctld);
 
1143
        argv[1] = NULL;
 
1144
        my_env = _build_env(job_ptr);
 
1145
        job_id = job_ptr->job_id;
 
1146
        unlock_slurmctld(config_read_lock);
 
1147
 
 
1148
        if ((cpid = fork()) < 0) {
 
1149
                error("epilog_slurmctld fork error: %m");
 
1150
                goto fini;
 
1151
        }
 
1152
        if (cpid == 0) {
 
1153
#ifdef SETPGRP_TWO_ARGS
 
1154
                setpgrp(0, 0);
 
1155
#else
 
1156
                setpgrp();
 
1157
#endif
 
1158
                execve(argv[0], argv, my_env);
 
1159
                exit(127);
 
1160
        }
 
1161
 
 
1162
        while (1) {
 
1163
                wait_rc = waitpid(cpid, &status, 0);
 
1164
                if (wait_rc < 0) {
 
1165
                        if (errno == EINTR)
 
1166
                                continue;
 
1167
                        error("epilog_slurmctld waitpid error: %m");
 
1168
                        break;
 
1169
                } else if (wait_rc > 0) {
 
1170
                        killpg(cpid, SIGKILL);  /* kill children too */
 
1171
                        break;
 
1172
                }
 
1173
        }
 
1174
        if (status != 0) {
 
1175
                error("epilog_slurmctld job %u epilog exit status %u:%u",
 
1176
                      job_id, WEXITSTATUS(status), WTERMSIG(status));
 
1177
        } else
 
1178
                debug2("epilog_slurmctld job %u prolog completed", job_id);
 
1179
 
 
1180
 fini:  xfree(argv[0]);
 
1181
        for (i=0; my_env[i]; i++)
 
1182
                xfree(my_env[i]);
 
1183
        xfree(my_env);
 
1184
        return NULL;
 
1185
}
 
1186
 
 
1187
/*
 
1188
 * prolog_slurmctld - execute the prolog_slurmctld for a job that has just
 
1189
 *      been allocated resources.
 
1190
 * IN job_ptr - pointer to job that will be initiated
 
1191
 * RET SLURM_SUCCESS(0) or error code
 
1192
 */
 
1193
extern int prolog_slurmctld(struct job_record *job_ptr)
 
1194
{
 
1195
        int rc;
 
1196
        pthread_t thread_id_prolog;
 
1197
        pthread_attr_t thread_attr_prolog;
 
1198
 
 
1199
        if ((slurmctld_conf.prolog_slurmctld == NULL) ||
 
1200
            (slurmctld_conf.prolog_slurmctld[0] == '\0'))
 
1201
                return SLURM_SUCCESS;
 
1202
 
 
1203
        if (access(slurmctld_conf.prolog_slurmctld, X_OK) < 0) {
 
1204
                error("Invalid PrologSlurmctld: %m");
 
1205
                return errno;
 
1206
        }
 
1207
 
 
1208
        if (job_ptr->details)
 
1209
                job_ptr->details->prolog_running = 1;
 
1210
 
 
1211
        slurm_attr_init(&thread_attr_prolog);
 
1212
        pthread_attr_setdetachstate(&thread_attr_prolog, 
 
1213
                                    PTHREAD_CREATE_DETACHED);
 
1214
        while(1) {
 
1215
                rc = pthread_create(&thread_id_prolog,
 
1216
                                    &thread_attr_prolog,
 
1217
                                    _run_prolog, (void *) job_ptr);
 
1218
                if (rc == 0)
 
1219
                        return SLURM_SUCCESS;
 
1220
                if (errno == EAGAIN)
 
1221
                        continue;
 
1222
                error("pthread_create: %m");
 
1223
                return errno;
 
1224
        }
 
1225
}
 
1226
 
 
1227
static void *_run_prolog(void *arg)
 
1228
{
 
1229
        struct job_record *job_ptr = (struct job_record *) arg;
 
1230
        uint32_t job_id;
 
1231
        pid_t cpid;
 
1232
        int i, status, wait_rc;
 
1233
        char *argv[2], **my_env;
 
1234
        /* Locks: Read config, job */
 
1235
        slurmctld_lock_t config_read_lock = { 
 
1236
                READ_LOCK, READ_LOCK, NO_LOCK, NO_LOCK };
 
1237
 
 
1238
        lock_slurmctld(config_read_lock);
 
1239
        argv[0] = xstrdup(slurmctld_conf.prolog_slurmctld);
 
1240
        argv[1] = NULL;
 
1241
        my_env = _build_env(job_ptr);
 
1242
        job_id = job_ptr->job_id;
 
1243
        unlock_slurmctld(config_read_lock);
 
1244
 
 
1245
        if ((cpid = fork()) < 0) {
 
1246
                error("prolog_slurmctld fork error: %m");
 
1247
                goto fini;
 
1248
        }
 
1249
        if (cpid == 0) {
 
1250
#ifdef SETPGRP_TWO_ARGS
 
1251
                setpgrp(0, 0);
 
1252
#else
 
1253
                setpgrp();
 
1254
#endif
 
1255
                execve(argv[0], argv, my_env);
 
1256
                exit(127);
 
1257
        }
 
1258
 
 
1259
        while (1) {
 
1260
                wait_rc = waitpid(cpid, &status, 0);
 
1261
                if (wait_rc < 0) {
 
1262
                        if (errno == EINTR)
 
1263
                                continue;
 
1264
                        error("prolog_slurmctld waitpid error: %m");
 
1265
                        break;
 
1266
                } else if (wait_rc > 0) {
 
1267
                        killpg(cpid, SIGKILL);  /* kill children too */
 
1268
                        break;
 
1269
                }
 
1270
        }
 
1271
        if (status != 0) {
 
1272
                error("prolog_slurmctld job %u prolog exit status %u:%u",
 
1273
                      job_id, WEXITSTATUS(status), WTERMSIG(status));
 
1274
        } else
 
1275
                debug2("prolog_slurmctld job %u prolog completed", job_id);
 
1276
 
 
1277
 fini:  xfree(argv[0]);
 
1278
        for (i=0; my_env[i]; i++)
 
1279
                xfree(my_env[i]);
 
1280
        xfree(my_env);
 
1281
        lock_slurmctld(config_read_lock);
 
1282
        if (job_ptr->job_id != job_id) {
 
1283
                error("prolog_slurmctld job %u pointer invalid", job_id);
 
1284
                job_ptr = find_job_record(job_id);
 
1285
                if (job_ptr == NULL)
 
1286
                        error("prolog_slurmctld job %u now defunct", job_id);
 
1287
        }
 
1288
        if (job_ptr) {
 
1289
                if (job_ptr->details)
 
1290
                        job_ptr->details->prolog_running = 0;
 
1291
                if (job_ptr->batch_flag &&
 
1292
                    ((job_ptr->job_state == JOB_RUNNING) ||
 
1293
                     (job_ptr->job_state == JOB_SUSPENDED)))
 
1294
                        launch_job(job_ptr);
 
1295
        }
 
1296
        unlock_slurmctld(config_read_lock);
 
1297
 
 
1298
        return NULL;
 
1299
}
 
1300
 
 
1301
/*
985
1302
 * build_feature_list - Translate a job's feature string into a feature_list
986
1303
 * IN  details->features
987
1304
 * OUT details->feature_list