219
225
dump_step_desc(job_step_create_request_msg_t *step_spec)
221
if (step_spec == NULL)
224
debug3("StepDesc: user_id=%u job_id=%u node_count=%u, cpu_count=%u",
225
step_spec->user_id, step_spec->job_id,
226
step_spec->node_count, step_spec->cpu_count);
227
debug3("StepDesc: user_id=%u job_id=%u node_count=%u cpu_count=%u",
228
step_spec->user_id, step_spec->job_id,
229
step_spec->node_count, step_spec->cpu_count);
227
230
debug3(" num_tasks=%u relative=%u task_dist=%u node_list=%s",
228
step_spec->num_tasks, step_spec->relative,
229
step_spec->task_dist, step_spec->node_list);
230
debug3(" host=%s port=%u name=%s network=%s checkpoint=%u",
231
step_spec->host, step_spec->port, step_spec->name,
232
step_spec->network, step_spec->ckpt_interval);
233
debug3(" checkpoint-path=%s exclusive=%u immediate=%u mem_per_task=%u",
234
step_spec->ckpt_path, step_spec->exclusive,
235
step_spec->immediate, step_spec->mem_per_task);
231
step_spec->num_tasks, step_spec->relative,
232
step_spec->task_dist, step_spec->node_list);
233
debug3(" host=%s port=%u name=%s network=%s exclusive=%u",
234
step_spec->host, step_spec->port, step_spec->name,
235
step_spec->network, step_spec->exclusive);
236
debug3(" checkpoint-dir=%s checkpoint_int=%u",
237
step_spec->ckpt_dir, step_spec->ckpt_interval);
238
debug3(" mem_per_task=%u resv_port_cnt=%u immediate=%u no_kill=%u",
239
step_spec->mem_per_task, step_spec->resv_port_cnt,
240
step_spec->immediate, step_spec->no_kill);
241
debug3(" overcommit=%d",
242
step_spec->overcommit);
456
464
fatal("bit_copy malloc failure");
457
465
bit_and (nodes_avail, up_node_bitmap);
459
/* In exclusive mode, satisfy the processor count.
460
* Do not use nodes that have no unused CPUs */
467
if (step_spec->mem_per_task &&
468
((select_ptr->memory_allocated == NULL) ||
469
(select_ptr->memory_used == NULL))) {
470
error("_pick_step_nodes: lack memory allocation details "
471
"to enforce memory limits for job %u", job_ptr->job_id);
472
step_spec->mem_per_task = 0;
475
if (job_ptr->next_step_id == 0) {
476
if (job_ptr->details && job_ptr->details->prolog_running) {
477
*return_code = ESLURM_PROLOG_RUNNING;
480
for (i=bit_ffs(job_ptr->node_bitmap); i<node_record_count;
482
if (!bit_test(job_ptr->node_bitmap, i))
484
if ((node_record_table_ptr[i].node_state &
485
NODE_STATE_POWER_SAVE) ||
486
(node_record_table_ptr[i].node_state &
487
NODE_STATE_NO_RESPOND)) {
488
/* Node is/was powered down. Need to wait
489
* for it to start responding again. */
490
FREE_NULL_BITMAP(nodes_avail);
491
*return_code = ESLURM_NODES_BUSY;
492
/* Update job's end-time to allow for node
494
if (job_ptr->time_limit != INFINITE) {
495
job_ptr->end_time = time(NULL) +
496
(job_ptr->time_limit * 60);
503
/* In exclusive mode, just satisfy the processor count.
504
* Do not use nodes that have no unused CPUs or insufficient
461
506
if (step_spec->exclusive) {
462
int i, j=0, avail, tot_cpus = 0;
507
int avail_cpus, avail_tasks, total_cpus, total_tasks, node_inx;
509
uint32_t avail_mem, total_mem;
510
uint32_t nodes_picked_cnt = 0;
511
uint32_t tasks_picked_cnt = 0, total_task_cnt = 0;
463
512
bitstr_t *selected_nodes = NULL;
465
514
if (step_spec->node_list) {
490
for (i=bit_ffs(job_ptr->node_bitmap); i<node_record_count;
492
if (!bit_test(job_ptr->node_bitmap, i))
541
i_first = bit_ffs(select_ptr->node_bitmap);
542
i_last = bit_fls(select_ptr->node_bitmap);
543
for (i=i_first; i<=i_last; i++) {
544
if (!bit_test(select_ptr->node_bitmap, i))
494
if (selected_nodes && (!bit_test(selected_nodes, i)))
547
if (!bit_test(nodes_avail, i))
548
continue; /* node now DOWN */
549
avail_cpus = select_ptr->cpus[node_inx] -
550
select_ptr->cpus_used[node_inx];
551
total_cpus = select_ptr->cpus[node_inx];
552
if (cpus_per_task > 0) {
553
avail_tasks = avail_cpus / cpus_per_task;
554
total_tasks = total_cpus / cpus_per_task;
556
avail_tasks = step_spec->num_tasks;
557
total_tasks = step_spec->num_tasks;
559
if (step_spec->mem_per_task) {
560
avail_mem = select_ptr->
561
memory_allocated[node_inx] -
562
select_ptr->memory_used[node_inx];
563
avail_mem /= step_spec->mem_per_task;
564
avail_tasks = MIN(avail_tasks, avail_mem);
565
total_mem = select_ptr->
566
memory_allocated[node_inx];
567
total_mem /= step_spec->mem_per_task;
568
total_tasks = MIN(total_tasks, total_mem);
570
if ((avail_tasks <= 0) ||
571
((selected_nodes == NULL) &&
572
(nodes_picked_cnt >= step_spec->node_count) &&
573
(tasks_picked_cnt > 0) &&
574
(tasks_picked_cnt >= step_spec->num_tasks)))
575
bit_clear(nodes_avail, i);
497
avail = job_ptr->alloc_lps[j] -
498
job_ptr->used_lps[j];
499
tot_cpus += job_ptr->alloc_lps[j];
578
tasks_picked_cnt += avail_tasks;
502
((selected_nodes == NULL) &&
503
(cpus_picked_cnt > 0) &&
504
(cpus_picked_cnt >= step_spec->cpu_count)))
505
bit_clear(nodes_avail, i);
507
cpus_picked_cnt += avail;
508
if (++j >= job_ptr->node_cnt)
580
total_task_cnt += total_tasks;
511
583
if (selected_nodes) {
512
584
if (!bit_equal(selected_nodes, nodes_avail)) {
513
585
/* some required nodes have no available
514
586
* processors, defer request */
587
tasks_picked_cnt = 0;
517
589
bit_free(selected_nodes);
519
if (cpus_picked_cnt >= step_spec->cpu_count)
592
if (tasks_picked_cnt >= step_spec->num_tasks)
520
593
return nodes_avail;
522
594
FREE_NULL_BITMAP(nodes_avail);
523
if (tot_cpus >= step_spec->cpu_count)
595
if (total_task_cnt >= step_spec->num_tasks)
524
596
*return_code = ESLURM_NODES_BUSY;
526
598
*return_code = ESLURM_REQUESTED_NODE_CONFIG_UNAVAILABLE;
530
if ( step_spec->node_count == INFINITE) /* use all nodes */
602
if (step_spec->mem_per_task) {
603
int node_inx = 0, usable_mem;
604
for (i=bit_ffs(select_ptr->node_bitmap); i<node_record_count;
606
if (!bit_test(select_ptr->node_bitmap, i))
608
usable_mem = select_ptr->memory_allocated[node_inx] -
609
select_ptr->memory_used[node_inx];
610
usable_mem /= step_spec->mem_per_task;
611
if (usable_mem <= 0) {
612
if (step_spec->node_count == INFINITE) {
613
FREE_NULL_BITMAP(nodes_avail);
615
ESLURM_INVALID_TASK_MEMORY;
618
bit_clear(nodes_avail, i);
620
if (++node_inx >= select_ptr->nhosts)
625
if (step_spec->node_count == INFINITE) /* use all nodes */
531
626
return nodes_avail;
533
628
if (step_spec->node_list) {
534
629
bitstr_t *selected_nodes = NULL;
536
info("selected nodelist is %s", step_spec->node_list);
630
if (slurm_get_debug_flags() & DEBUG_FLAG_STEPS)
631
info("selected nodelist is %s", step_spec->node_list);
538
632
error_code = node_name2bitmap(step_spec->node_list, false,
539
633
&selected_nodes);
636
731
while ((step_p = (struct step_record *)
637
732
list_next(step_iterator))) {
638
733
bit_or(nodes_idle, step_p->step_node_bitmap);
640
temp = bitmap2node_name(step_p->step_node_bitmap);
641
info("step %d has nodes %s", step_p->step_id, temp);
734
if (slurm_get_debug_flags() & DEBUG_FLAG_STEPS) {
736
temp = bitmap2node_name(step_p->
738
info("step %u.%u has nodes %s",
739
job_ptr->job_id, step_p->step_id, temp);
645
743
list_iterator_destroy (step_iterator);
646
744
bit_not(nodes_idle);
647
745
bit_and(nodes_idle, nodes_avail);
650
temp = bitmap2node_name(nodes_avail);
651
info("can pick from %s %d", temp, step_spec->node_count);
653
temp = bitmap2node_name(nodes_idle);
654
info("can pick from %s", temp);
748
if (slurm_get_debug_flags() & DEBUG_FLAG_STEPS) {
750
temp1 = bitmap2node_name(nodes_avail);
751
temp2 = bitmap2node_name(nodes_idle);
752
info("step pick %u nodes, avail:%s idle:%s",
753
step_spec->node_count, temp1, temp2);
658
758
/* if user specifies step needs a specific processor count and
659
759
* all nodes have the same processor count, just translate this to
660
760
* a node count */
661
if (step_spec->cpu_count && (job_ptr->num_cpu_groups == 1) &&
662
job_ptr->cpus_per_node[0]) {
663
i = (step_spec->cpu_count + (job_ptr->cpus_per_node[0] - 1) )
664
/ job_ptr->cpus_per_node[0];
761
if (step_spec->cpu_count && job_ptr->select_job &&
762
(job_ptr->select_job->cpu_array_cnt == 1) &&
763
job_ptr->select_job->cpu_array_value) {
764
i = (step_spec->cpu_count +
765
(job_ptr->select_job->cpu_array_value[0] - 1)) /
766
job_ptr->select_job->cpu_array_value[0];
665
767
step_spec->node_count = (i > step_spec->node_count) ?
666
i : step_spec->node_count ;
768
i : step_spec->node_count ;
667
769
//step_spec->cpu_count = 0;
670
772
if (step_spec->node_count) {
671
773
nodes_picked_cnt = bit_set_count(nodes_picked);
673
info("got %u %d", step_spec->node_count, nodes_picked_cnt);
774
if (slurm_get_debug_flags() & DEBUG_FLAG_STEPS) {
775
verbose("got %u %d", step_spec->node_count,
676
779
&& (bit_set_count(nodes_idle) >= step_spec->node_count)
677
780
&& (step_spec->node_count > nodes_picked_cnt)) {
859
/* Update the step's core bitmaps, create as needed.
860
* Add the specified task count for a specific node in the job's
861
* and step's allocation */
862
static void _pick_step_cores(struct step_record *step_ptr,
863
select_job_res_t select_ptr,
864
int job_node_inx, uint16_t task_cnt)
866
int bit_offset, core_inx, i, sock_inx;
867
uint16_t sockets, cores;
868
int cpu_cnt = (int) task_cnt;
870
static int last_core_inx;
872
if (!step_ptr->core_bitmap_job) {
873
step_ptr->core_bitmap_job = bit_alloc(bit_size(select_ptr->
876
if (get_select_job_res_cnt(select_ptr, job_node_inx, &sockets, &cores))
877
fatal("get_select_job_res_cnt");
879
if (task_cnt == (cores * sockets))
880
use_all_cores = true;
882
use_all_cores = false;
883
if (step_ptr->cpus_per_task > 0)
884
cpu_cnt *= step_ptr->cpus_per_task;
886
/* select idle cores first */
887
for (core_inx=0; core_inx<cores; core_inx++) {
888
for (sock_inx=0; sock_inx<sockets; sock_inx++) {
889
bit_offset = get_select_job_res_offset(select_ptr,
894
fatal("get_select_job_res_offset");
895
if (!bit_test(select_ptr->core_bitmap, bit_offset))
897
if ((use_all_cores == false) &&
898
bit_test(select_ptr->core_bitmap_used, bit_offset))
900
bit_set(select_ptr->core_bitmap_used, bit_offset);
901
bit_set(step_ptr->core_bitmap_job, bit_offset);
903
info("step alloc N:%d S:%dC :%d",
904
job_node_inx, sock_inx, core_inx);
913
/* We need to over-subscribe one or more cores.
914
* Use last_core_inx to avoid putting all of the extra
915
* work onto core zero */
916
verbose("job step needs to over-subscribe cores");
917
last_core_inx = (last_core_inx + 1) % cores;
918
for (i=0; i<cores; i++) {
919
core_inx = (last_core_inx + i) % cores;
920
for (sock_inx=0; sock_inx<sockets; sock_inx++) {
921
bit_offset = get_select_job_res_offset(select_ptr,
926
fatal("get_select_job_res_offset");
927
if (!bit_test(select_ptr->core_bitmap, bit_offset))
929
if (bit_test(step_ptr->core_bitmap_job, bit_offset))
930
continue; /* already taken by this step */
931
bit_set(step_ptr->core_bitmap_job, bit_offset);
933
info("step alloc N:%d S:%dC :%d",
934
job_node_inx, sock_inx, core_inx);
756
943
/* Update a job's record of allocated CPUs when a job step gets scheduled */
757
944
extern void step_alloc_lps(struct step_record *step_ptr)
759
946
struct job_record *job_ptr = step_ptr->job_ptr;
947
select_job_res_t select_ptr = job_ptr->select_job;
760
949
int i_node, i_first, i_last;
761
950
int job_node_inx = -1, step_node_inx = -1;
951
bool pick_step_cores = true;
954
xassert(select_ptr->core_bitmap);
955
xassert(select_ptr->core_bitmap_used);
956
xassert(select_ptr->cpus);
957
xassert(select_ptr->cpus_used);
763
959
if (step_ptr->step_layout == NULL) /* batch step */
766
i_first = bit_ffs(job_ptr->node_bitmap);
767
i_last = bit_fls(job_ptr->node_bitmap);
962
i_first = bit_ffs(select_ptr->node_bitmap);
963
i_last = bit_fls(select_ptr->node_bitmap);
768
964
if (i_first == -1) /* empty bitmap */
967
if (step_ptr->core_bitmap_job) {
968
/* "scontrol reconfig" of live system */
969
pick_step_cores = false;
970
} else if ((step_ptr->exclusive == 0) ||
971
(step_ptr->cpu_count == job_ptr->total_procs)) {
972
/* Step uses all of job's cores
973
* Just copy the bitmap to save time */
974
step_ptr->core_bitmap_job = bit_copy(select_ptr->core_bitmap);
975
pick_step_cores = false;
978
if (step_ptr->mem_per_task &&
979
((select_ptr->memory_allocated == NULL) ||
980
(select_ptr->memory_used == NULL))) {
981
error("step_alloc_lps: lack memory allocation details "
982
"to enforce memory limits for job %u", job_ptr->job_id);
983
step_ptr->mem_per_task = 0;
770
986
for (i_node = i_first; i_node <= i_last; i_node++) {
771
if (!bit_test(job_ptr->node_bitmap, i_node))
987
if (!bit_test(select_ptr->node_bitmap, i_node))
774
990
if (!bit_test(step_ptr->step_node_bitmap, i_node))
777
if (step_ptr->cpus_per_task) {
778
job_ptr->used_lps[job_node_inx] +=
779
step_ptr->step_layout->tasks[step_node_inx];
782
info("step alloc of %s procs: %u of %u",
783
node_record_table_ptr[i_node].name,
784
job_ptr->used_lps[job_node_inx],
785
job_ptr->alloc_lps[job_node_inx]);
993
if (job_node_inx >= select_ptr->nhosts)
994
fatal("step_alloc_lps: node index bad");
995
/* NOTE: The --overcommit option can result in
996
* cpus_used[] having a higher value than cpus[] */
997
cpus_alloc = step_ptr->step_layout->tasks[step_node_inx] *
998
step_ptr->cpus_per_task;
999
select_ptr->cpus_used[job_node_inx] += cpus_alloc;
1000
if (step_ptr->mem_per_task) {
1001
select_ptr->memory_used[job_node_inx] +=
1002
(step_ptr->mem_per_task *
1003
step_ptr->step_layout->tasks[step_node_inx]);
1005
if (pick_step_cores) {
1006
_pick_step_cores(step_ptr, select_ptr,
1008
step_ptr->step_layout->
1009
tasks[step_node_inx]);
1011
if (slurm_get_debug_flags() & DEBUG_FLAG_CPU_BIND)
1012
_dump_step_layout(step_ptr);
1013
if (slurm_get_debug_flags() & DEBUG_FLAG_STEPS) {
1014
info("step alloc of %s procs: %u of %u",
1015
node_record_table_ptr[i_node].name,
1016
select_ptr->cpus_used[job_node_inx],
1017
select_ptr->cpus[job_node_inx]);
787
1019
if (step_node_inx == (step_ptr->step_layout->node_cnt - 1))
1025
/* Dump a job step's CPU binding information.
1026
* NOTE: The core_bitmap_job and node index are based upon
1027
* the _job_ allocation */
1028
static void _dump_step_layout(struct step_record *step_ptr)
1030
struct job_record* job_ptr = step_ptr->job_ptr;
1031
select_job_res_t select_ptr = job_ptr->select_job;
1032
int i, bit_inx, core_inx, node_inx, rep, sock_inx;
1034
if ((step_ptr->core_bitmap_job == NULL) ||
1035
(select_ptr == NULL) || (select_ptr->cores_per_socket == NULL))
1038
info("====================");
1039
info("step_id:%u.%u", job_ptr->job_id, step_ptr->step_id);
1040
for (i=0, bit_inx= 0, node_inx=0; node_inx<select_ptr->nhosts; i++) {
1041
for (rep=0; rep<select_ptr->sock_core_rep_count[i]; rep++) {
1043
sock_inx<select_ptr->sockets_per_node[i];
1046
core_inx<select_ptr->cores_per_socket[i];
1048
if (bit_test(step_ptr->
1051
info("JobNode[%d] Socket[%d] "
1052
"Core[%d] is allocated",
1061
info("====================");
793
1064
static void _step_dealloc_lps(struct step_record *step_ptr)
795
1066
struct job_record *job_ptr = step_ptr->job_ptr;
1067
select_job_res_t select_ptr = job_ptr->select_job;
796
1069
int i_node, i_first, i_last;
797
1070
int job_node_inx = -1, step_node_inx = -1;
1072
xassert(select_ptr);
1073
xassert(select_ptr->core_bitmap);
1074
xassert(select_ptr->core_bitmap_used);
1075
xassert(select_ptr->cpus);
1076
xassert(select_ptr->cpus_used);
799
1078
if (step_ptr->step_layout == NULL) /* batch step */
802
i_first = bit_ffs(job_ptr->node_bitmap);
803
i_last = bit_fls(job_ptr->node_bitmap);
1081
i_first = bit_ffs(select_ptr->node_bitmap);
1082
i_last = bit_fls(select_ptr->node_bitmap);
804
1083
if (i_first == -1) /* empty bitmap */
1086
if (step_ptr->mem_per_task &&
1087
((select_ptr->memory_allocated == NULL) ||
1088
(select_ptr->memory_used == NULL))) {
1089
error("_step_dealloc_lps: lack memory allocation details "
1090
"to enforce memory limits for job %u", job_ptr->job_id);
1091
step_ptr->mem_per_task = 0;
806
1094
for (i_node = i_first; i_node <= i_last; i_node++) {
807
if (!bit_test(job_ptr->node_bitmap, i_node))
1095
if (!bit_test(select_ptr->node_bitmap, i_node))
810
1098
if (!bit_test(step_ptr->step_node_bitmap, i_node))
812
1100
step_node_inx++;
813
if (step_ptr->cpus_per_task == 0)
814
; /* no CPUs allocated */
815
else if (job_ptr->used_lps[job_node_inx] >=
816
step_ptr->step_layout->tasks[step_node_inx]) {
817
job_ptr->used_lps[job_node_inx] -=
818
step_ptr->step_layout->tasks[step_node_inx];
820
error("_step_dealloc_lps: underflow for %u.%u",
1101
if (job_node_inx >= select_ptr->nhosts)
1102
fatal("_step_dealloc_lps: node index bad");
1103
cpus_alloc = step_ptr->step_layout->tasks[step_node_inx] *
1104
step_ptr->cpus_per_task;
1105
if (select_ptr->cpus_used[job_node_inx] >= cpus_alloc)
1106
select_ptr->cpus_used[job_node_inx] -= cpus_alloc;
1108
error("_step_dealloc_lps: cpu underflow for %u.%u",
821
1109
job_ptr->job_id, step_ptr->step_id);
822
job_ptr->used_lps[job_node_inx] = 0;
825
info("step dealloc of %s procs: %u of %u",
826
node_record_table_ptr[i_node].name,
827
job_ptr->used_lps[job_node_inx],
828
job_ptr->alloc_lps[job_node_inx]);
1110
select_ptr->cpus_used[job_node_inx] = 0;
1112
if (step_ptr->mem_per_task) {
1113
uint32_t mem_use = step_ptr->mem_per_task *
1114
step_ptr->step_layout->
1115
tasks[step_node_inx];
1116
if (select_ptr->memory_used[job_node_inx] >= mem_use) {
1117
select_ptr->memory_used[job_node_inx] -=
1120
error("_step_dealloc_lps: "
1121
"mem underflow for %u.%u",
1122
job_ptr->job_id, step_ptr->step_id);
1123
select_ptr->memory_used[job_node_inx] = 0;
1126
if (slurm_get_debug_flags() & DEBUG_FLAG_STEPS) {
1127
info("step dealloc of %s procs: %u of %u",
1128
node_record_table_ptr[i_node].name,
1129
select_ptr->cpus_used[job_node_inx],
1130
select_ptr->cpus[job_node_inx]);
830
1132
if (step_node_inx == (step_ptr->step_layout->node_cnt - 1))
1135
if (step_ptr->core_bitmap_job) {
1136
/* Mark the job's cores as no longer in use */
1137
bit_not(step_ptr->core_bitmap_job);
1138
bit_and(select_ptr->core_bitmap_used,
1139
step_ptr->core_bitmap_job);
1140
/* no need for bit_not(step_ptr->core_bitmap_job); */
1141
FREE_NULL_BITMAP(step_ptr->core_bitmap_job);
1072
1403
extern slurm_step_layout_t *step_layout_create(struct step_record *step_ptr,
1073
1404
char *step_node_list,
1074
uint16_t node_count,
1405
uint32_t node_count,
1075
1406
uint32_t num_tasks,
1076
1407
uint16_t cpus_per_task,
1077
1408
uint16_t task_dist,
1078
1409
uint32_t plane_size)
1080
uint32_t cpus_per_node[node_count];
1411
uint16_t cpus_per_node[node_count];
1081
1412
uint32_t cpu_count_reps[node_count];
1082
1413
int cpu_inx = -1;
1083
int usable_cpus = 0, i;
1084
int set_nodes = 0, set_cpus = 0;
1414
int i, usable_cpus, usable_mem;
1415
int set_cpus = 0, set_nodes = 0, set_tasks = 0;
1417
int first_bit, last_bit;
1086
1418
struct job_record *job_ptr = step_ptr->job_ptr;
1419
select_job_res_t select_ptr = job_ptr->select_job;
1421
xassert(select_ptr);
1422
xassert(select_ptr->cpus);
1423
xassert(select_ptr->cpus_used);
1425
if (step_ptr->mem_per_task &&
1426
((select_ptr->memory_allocated == NULL) ||
1427
(select_ptr->memory_used == NULL))) {
1428
error("step_layout_create: lack memory allocation details "
1429
"to enforce memory limits for job %u", job_ptr->job_id);
1430
step_ptr->mem_per_task = 0;
1088
1433
/* build the cpus-per-node arrays for the subset of nodes
1089
used by this job step */
1090
for (i = 0; i < node_record_count; i++) {
1434
* used by this job step */
1435
first_bit = bit_ffs(step_ptr->step_node_bitmap);
1436
last_bit = bit_fls(step_ptr->step_node_bitmap);
1437
for (i = first_bit; i <= last_bit; i++) {
1091
1438
if (bit_test(step_ptr->step_node_bitmap, i)) {
1092
1439
/* find out the position in the job */
1093
pos = bit_get_pos_num(job_ptr->node_bitmap, i);
1440
pos = bit_get_pos_num(select_ptr->node_bitmap, i);
1443
if (pos >= select_ptr->nhosts)
1444
fatal("step_layout_create: node index bad");
1096
1445
if (step_ptr->exclusive) {
1097
usable_cpus = job_ptr->alloc_lps[pos] -
1098
job_ptr->used_lps[pos];
1099
if (usable_cpus < 0) {
1100
error("step_layout_create exclusive");
1103
usable_cpus = MIN(usable_cpus,
1104
(num_tasks - set_cpus));
1446
usable_cpus = select_ptr->cpus[pos] -
1447
select_ptr->cpus_used[pos];
1106
usable_cpus = job_ptr->alloc_lps[pos];
1449
usable_cpus = select_ptr->cpus[pos];
1450
if (step_ptr->mem_per_task) {
1451
usable_mem = select_ptr->memory_allocated[pos] -
1452
select_ptr->memory_used[pos];
1453
usable_mem /= step_ptr->mem_per_task;
1454
if (cpus_per_task > 0)
1455
usable_mem *= cpus_per_task;
1456
usable_cpus = MIN(usable_cpus, usable_mem);
1458
if (usable_cpus <= 0) {
1459
error("step_layout_create no usable cpus");
1107
1462
debug3("step_layout cpus = %d pos = %d",
1108
1463
usable_cpus, pos);
1228
1588
list_iterator_destroy(job_iterator);
1230
} else if (step_id == 0) {
1231
/* Return all steps for specific job_id */
1232
1591
job_ptr = find_job_record(job_id);
1233
if (((show_flags & SHOW_ALL) == 0) &&
1234
(job_ptr->part_ptr) &&
1235
(job_ptr->part_ptr->hidden))
1237
else if ((slurmctld_conf.private_data & PRIVATE_DATA_JOBS)
1238
&& (job_ptr->user_id != uid) && !validate_super_user(uid))
1592
/* first lets filter this step based on permission and
1593
request if not allowable set job_ptr = NULL */
1595
if (((show_flags & SHOW_ALL) == 0)
1596
&& (job_ptr->part_ptr)
1597
&& (job_ptr->part_ptr->hidden))
1599
else if ((slurmctld_conf.private_data
1600
& PRIVATE_DATA_JOBS)
1601
&& (job_ptr->user_id != uid)
1602
&& !validate_super_user(uid))
1606
/* now send the requested steps */
1242
1608
step_iterator =
1243
1609
list_iterator_create(job_ptr->step_list);
1245
(struct step_record *)
1246
list_next(step_iterator))) {
1247
_pack_ctld_job_step_info(step_ptr, buffer);
1610
/* If step_id is 0 that means to send all
1611
steps (We understand this is incorrect
1612
since 0 is a valid job step,
1613
but changing it would need to be done in
1614
the api and so we wait until 2.1 */
1615
while ((step_ptr = list_next(step_iterator))) {
1617
|| (step_ptr->step_id == step_id)) {
1618
_pack_ctld_job_step_info(
1250
1623
list_iterator_destroy(step_iterator);
1626
error_code = ESLURM_INVALID_JOB_ID;
1252
1628
error_code = ESLURM_INVALID_JOB_ID;
1254
/* Return data for specific job_id.step_id */
1255
job_ptr = find_job_record(job_id);
1256
if (((show_flags & SHOW_ALL) == 0)
1257
&& (job_ptr != NULL)
1258
&& (job_ptr->part_ptr)
1259
&& (job_ptr->part_ptr->hidden))
1261
else if ((slurmctld_conf.private_data & PRIVATE_DATA_JOBS)
1262
&& (job_ptr->user_id != uid) && !validate_super_user(uid))
1265
step_ptr = find_step_record(job_ptr, step_id);
1266
if (step_ptr == NULL)
1267
error_code = ESLURM_INVALID_JOB_ID;
1269
_pack_ctld_job_step_info(step_ptr, buffer);
1273
1630
part_filter_clear();
1275
1632
/* put the real record count in the message body header */
1818
* Create a new job step from data in a buffer (as created by dump_job_step_state)
2193
* Create a new job step from data in a buffer (as created by
2194
* dump_job_step_state)
1819
2195
* IN/OUT - job_ptr - point to a job for which the step is to be loaded.
1820
* IN/OUT buffer - location from which to get data, pointers automatically advanced
2196
* IN/OUT buffer - location to get data from, pointers advanced
1822
2198
extern int load_step_state(struct job_record *job_ptr, Buf buffer)
1824
2200
struct step_record *step_ptr = NULL;
1825
uint16_t step_id, cyclic_alloc, port, batch_step, bit_cnt;
1826
uint16_t ckpt_interval, mem_per_task;
1827
uint32_t exit_code, name_len;
2202
uint16_t cyclic_alloc, port, batch_step, bit_cnt;
2203
uint16_t ckpt_interval, cpus_per_task, resv_port_cnt;
2204
uint32_t core_size, cpu_count, exit_code, mem_per_task, name_len;
1828
2206
time_t start_time, pre_sus_time, tot_sus_time, ckpt_time;
1829
char *host = NULL, *ckpt_path = NULL;
1830
char *name = NULL, *network = NULL, *bit_fmt = NULL;
2207
char *host = NULL, *ckpt_dir = NULL, *core_job = NULL;
2208
char *resv_ports = NULL, *name = NULL, *network = NULL, *bit_fmt = NULL;
1831
2209
switch_jobinfo_t switch_tmp = NULL;
1832
2210
check_jobinfo_t check_tmp = NULL;
1833
2211
slurm_step_layout_t *step_layout = NULL;
1835
safe_unpack16(&step_id, buffer);
2213
safe_unpack32(&step_id, buffer);
1836
2214
safe_unpack16(&cyclic_alloc, buffer);
1837
2215
safe_unpack16(&port, buffer);
1838
2216
safe_unpack16(&ckpt_interval, buffer);
1839
safe_unpack16(&mem_per_task, buffer);
2217
safe_unpack16(&cpus_per_task, buffer);
2218
safe_unpack16(&resv_port_cnt, buffer);
2220
safe_unpack8(&no_kill, buffer);
2222
safe_unpack32(&cpu_count, buffer);
2223
safe_unpack32(&mem_per_task, buffer);
1841
2224
safe_unpack32(&exit_code, buffer);
1842
2225
if (exit_code != NO_VAL) {
1843
2226
safe_unpackstr_xmalloc(&bit_fmt, &name_len, buffer);
1844
2227
safe_unpack16(&bit_cnt, buffer);
2229
safe_unpack32(&core_size, buffer);
2231
safe_unpackstr_xmalloc(&core_job, &name_len, buffer);
1847
2233
safe_unpack_time(&start_time, buffer);
1848
2234
safe_unpack_time(&pre_sus_time, buffer);
1849
2235
safe_unpack_time(&tot_sus_time, buffer);
1850
2236
safe_unpack_time(&ckpt_time, buffer);
1852
2238
safe_unpackstr_xmalloc(&host, &name_len, buffer);
2239
safe_unpackstr_xmalloc(&resv_ports, &name_len, buffer);
1853
2240
safe_unpackstr_xmalloc(&name, &name_len, buffer);
1854
2241
safe_unpackstr_xmalloc(&network, &name_len, buffer);
1855
safe_unpackstr_xmalloc(&ckpt_path, &name_len, buffer);
2242
safe_unpackstr_xmalloc(&ckpt_dir, &name_len, buffer);
1856
2243
safe_unpack16(&batch_step, buffer);
1857
2244
if (!batch_step) {
1858
2245
if (unpack_slurm_step_layout(&step_layout, buffer))
1967
2374
while ((job_ptr = (struct job_record *) list_next(job_iterator))) {
1968
2375
if (job_ptr->job_state != JOB_RUNNING)
2377
if (job_ptr->batch_flag &&
2378
(job_ptr->ckpt_interval != 0)) { /* periodic job ckpt */
2379
ckpt_due = job_ptr->ckpt_time +
2380
(job_ptr->ckpt_interval * 60);
2384
* DO NOT initiate a checkpoint request if the job is
2385
* started just now, in case it is restarting from checkpoint.
2387
ckpt_due = job_ptr->start_time +
2388
(job_ptr->ckpt_interval * 60);
2392
ckpt_req.op = CHECK_CREATE;
2394
ckpt_req.job_id = job_ptr->job_id;
2395
ckpt_req.step_id = SLURM_BATCH_SCRIPT;
2396
ckpt_req.image_dir = NULL;
2397
job_checkpoint(&ckpt_req, getuid(), -1);
2398
job_ptr->ckpt_time = now;
2399
last_job_update = now;
2400
continue; /* ignore periodic step ckpt */
1970
2402
step_iterator = list_iterator_create (job_ptr->step_list);
1971
2403
while ((step_ptr = (struct step_record *)
1972
2404
list_next (step_iterator))) {
2405
char *image_dir = NULL;
1973
2406
if (step_ptr->ckpt_interval == 0)
1975
2408
ckpt_due = step_ptr->ckpt_time +
1976
(step_ptr->ckpt_interval * 60);
2409
(step_ptr->ckpt_interval * 60);
1977
2410
if (ckpt_due > now)
2413
* DO NOT initiate a checkpoint request if the step is
2414
* started just now, in case it is restarting from
2417
ckpt_due = step_ptr->start_time +
2418
(step_ptr->ckpt_interval * 60);
1979
2422
step_ptr->ckpt_time = now;
1980
2423
last_job_update = now;
1981
(void) checkpoint_op(CHECK_CREATE, 0,
1982
(void *)step_ptr, &event_time,
1983
&error_code, &error_msg);
2424
image_dir = xstrdup(step_ptr->ckpt_dir);
2425
xstrfmtcat(image_dir, "/%u.%u", job_ptr->job_id,
2427
(void) checkpoint_op(job_ptr->job_id,
2429
step_ptr, CHECK_CREATE, 0,
2430
image_dir, &event_time,
2431
&error_code, &error_msg);
1985
2434
list_iterator_destroy (step_iterator);