1
1
/*****************************************************************************\
2
2
* src/slurmd/slurmd/req.c - slurmd request handling
3
3
*****************************************************************************
4
* Copyright (C) 2002-2006 The Regents of the University of California.
5
* Copyright (C) 2008 Lawrence Livermore National Security.
4
* Copyright (C) 2002-2007 The Regents of the University of California.
5
* Copyright (C) 2008-2009 Lawrence Livermore National Security.
6
6
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
7
7
* Written by Mark Grondona <mgrondona@llnl.gov>.
8
* CODE-OCEC-09-009. All rights reserved.
10
10
* This file is part of SLURM, a resource management program.
11
* For details, see <http://www.llnl.gov/linux/slurm/>.
11
* For details, see <https://computing.llnl.gov/linux/slurm/>.
12
* Please also read the included file: DISCLAIMER.
13
14
* SLURM is free software; you can redistribute it and/or modify it under
14
15
* the terms of the GNU General Public License as published by the Free
687
if ((arg.alloc_lps_cnt > 0) && (tasks_to_launch > 0)) {
688
host_index = hostset_find(hset, conf->node_name);
691
/* Left for debugging purposes */
693
info(" cons_res %u alloc_lps_cnt %u "
694
"task[%d] = %u = task_to_launch %d host %s ",
695
arg.jobid, arg.alloc_lps_cnt, host_index,
696
arg.alloc_lps[host_index],
697
tasks_to_launch, conf->node_name);
700
if (host_index < 0) {
696
if ((arg.job_nhosts > 0) && (tasks_to_launch > 0)) {
697
uint32_t i, i_first_bit=0, i_last_bit=0;
698
host_index = hostset_find(hset, conf->node_name);
699
if ((host_index < 0) || (host_index >= arg.job_nhosts)) {
701
700
error("job cr credential invalid host_index %d for "
702
701
"job %u", host_index, arg.jobid);
705
if (host_index > arg.alloc_lps_cnt)
706
error("host_index > alloc_lps_cnt in credential");
707
else if (arg.alloc_lps[host_index] == 0)
704
host_index++; /* change from 0-origin to 1-origin */
705
for (i=0; host_index; i++) {
706
if (host_index > arg.sock_core_rep_count[i]) {
707
i_first_bit += arg.sockets_per_node[i] *
708
arg.cores_per_socket[i] *
709
arg.sock_core_rep_count[i];
710
host_index -= arg.sock_core_rep_count[i];
712
i_first_bit += arg.sockets_per_node[i] *
713
arg.cores_per_socket[i] *
715
i_last_bit = i_first_bit +
716
arg.sockets_per_node[i] *
717
arg.cores_per_socket[i];
721
/* Now count the allocated processors */
722
for (i = i_first_bit; i < i_last_bit; i++) {
723
if (bit_test(arg.core_bitmap, i))
726
if (alloc_lps == 0) {
708
727
error("cons_res: zero processors allocated to step");
709
if (tasks_to_launch > arg.alloc_lps[host_index]) {
710
/* This is expected with the --overcommit option */
711
verbose("cons_res: More than one tasks per logical "
712
"processor (%d > %u) on host [%u.%u %ld %s] ",
713
tasks_to_launch, arg.alloc_lps[host_index],
714
arg.jobid, arg.stepid, (long) arg.uid,
716
verbose("cons_res: Use task/affinity plug-in to bind "
717
"the tasks to the allocated resources");
730
if (tasks_to_launch > alloc_lps) {
731
/* This is expected with the --overcommit option
733
debug("cons_res: More than one tasks per logical "
734
"processor (%d > %u) on host [%u.%u %ld %s] ",
735
tasks_to_launch, alloc_lps, arg.jobid,
736
arg.stepid, (long) arg.uid, arg.hostlist);
738
/* NOTE: alloc_lps is the count of allocated resources
739
* (typically cores). Convert to CPU count as needed */
740
if (i_last_bit <= i_first_bit)
741
error("step credential has no CPUs selected");
743
i = conf->cpus / (i_last_bit - i_first_bit);
721
750
/* Overwrite any memory limits in the RPC with contents of the
722
751
* memory limit within the credential.
723
752
* Reset the CPU count on this node to correct value. */
724
753
if (arg.job_mem & MEM_PER_CPU) {
725
754
req->job_mem = arg.job_mem & (~MEM_PER_CPU);
726
if ((host_index >= 0) && (host_index < arg.alloc_lps_cnt) &&
727
(arg.alloc_lps[host_index] > 0))
728
req->job_mem *= arg.alloc_lps[host_index];
755
req->job_mem *= alloc_lps;
730
757
req->job_mem = arg.job_mem;
731
req->task_mem = arg.task_mem; /* Defunct */
732
if ((host_index >= 0) && (host_index < arg.alloc_lps_cnt))
733
req->cpus_allocated[node_id] = arg.alloc_lps[host_index];
758
req->cpus_allocated[node_id] = alloc_lps;
735
760
info("mem orig:%u cpus:%u limit:%u",
736
arg.job_mem, arg.alloc_lps[host_index], req->job_mem);
761
arg.job_mem, alloc_lps, req->job_mem);
739
764
*step_hset = hset;
950
979
if (slurm_cred_get_args(req->cred, &arg) != SLURM_SUCCESS)
953
982
if (arg.job_mem & MEM_PER_CPU) {
984
uint32_t alloc_lps = 0, last_bit = 0;
985
if (arg.job_nhosts > 0) {
986
last_bit = arg.sockets_per_node[0] *
987
arg.cores_per_socket[0];
988
for (i=0; i<last_bit; i++) {
989
if (bit_test(arg.core_bitmap, i))
993
if (alloc_lps == 0) {
994
error("_set_batch_job_limit: alloc_lps is zero");
998
/* NOTE: alloc_lps is the count of allocated resources
999
* (typically cores). Convert to CPU count as needed */
1001
error("Batch job credential allocates no CPUs");
1003
i = conf->cpus / last_bit;
954
1008
req->job_mem = arg.job_mem & (~MEM_PER_CPU);
955
if (arg.alloc_lps_cnt > 1)
956
req->job_mem *= arg.alloc_lps_cnt;
1009
req->job_mem *= alloc_lps;
958
1011
req->job_mem = arg.job_mem;
1016
1071
* Run job prolog on this node
1018
select_g_get_jobinfo(req->select_jobinfo,
1019
SELECT_DATA_BLOCK_ID,
1022
rc = _run_prolog(req->job_id, req->uid, bg_part_id);
1074
select_g_get_jobinfo(req->select_jobinfo,
1075
SELECT_DATA_BLOCK_ID, &resv_id);
1078
select_g_get_jobinfo(req->select_jobinfo,
1079
SELECT_DATA_RESV_ID, &resv_id);
1081
rc = _run_prolog(req->job_id, req->uid, resv_id);
1025
1084
int term_sig, exit_status;
1026
1085
if (WIFSIGNALED(rc)) {
3035
3122
slurm_send_rc_msg(msg, rc);
3038
/* NOTE: xfree returned value */
3125
/* NOTE: call _destroy_env() to free returned value */
3040
_build_env(uint32_t jobid, uid_t uid, char *bg_part_id)
3127
_build_env(uint32_t jobid, uid_t uid, char *resv_id)
3042
3130
char **env = xmalloc(sizeof(char *));
3133
setenvf(&env, "SLURM_JOB_ID", "%u", jobid);
3134
setenvf(&env, "SLURM_JOB_UID", "%u", uid);
3135
name = uid_to_string(uid);
3136
setenvf(&env, "SLURM_JOB_USER", "%s", name);
3044
3138
setenvf(&env, "SLURM_JOBID", "%u", jobid);
3045
3139
setenvf(&env, "SLURM_UID", "%u", uid);
3047
setenvf(&env, "MPIRUN_PARTITION",
3142
setenvf(&env, "MPIRUN_PARTITION", "%s", resv_id);
3145
setenvf(&env, "BASIL_RESERVATION_ID", "%s", resv_id);
3080
3180
xfree(my_prolog);
3081
3181
_destroy_env(my_env);
3183
diff_time = difftime(time(NULL), start_time);
3184
if (msg_timeout == 0)
3185
msg_timeout = slurm_get_msg_timeout();
3186
if (diff_time >= msg_timeout) {
3187
error("prolog for job %u ran for %d seconds",
3083
3191
return error_code;
3087
_run_epilog(uint32_t jobid, uid_t uid, char *bg_part_id)
3195
_run_epilog(uint32_t jobid, uid_t uid, char *resv_id)
3089
3197
int error_code;
3090
3198
char *my_epilog;
3091
char **my_env = _build_env(jobid, uid, bg_part_id);
3199
char **my_env = _build_env(jobid, uid, resv_id);
3093
3201
slurm_mutex_lock(&conf->config_mutex);
3094
3202
my_epilog = xstrdup(conf->epilog);