5
5
* Copyright (C) 2008 Lawrence Livermore National Security.
6
6
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
7
7
* Written by Joey Ekstrom <ekstrom1@llnl.gov>, Morris Jette <jette1@llnl.gov>
8
* CODE-OCEC-09-009. All rights reserved.
10
10
* This file is part of SLURM, a resource management program.
11
* For details, see <http://www.llnl.gov/linux/slurm/>.
11
* For details, see <https://computing.llnl.gov/linux/slurm/>.
12
* Please also read the included file: DISCLAIMER.
13
14
* SLURM is free software; you can redistribute it and/or modify it under
14
15
* the terms of the GNU General Public License as published by the Free
342
345
bg_info_record_t *bg_info_record = NULL;
343
346
int node_scaling = partition_msg->partition_array[0].node_scaling;
344
char *slurm_user = xstrdup(slurmctld_conf.slurm_user_name);
346
349
for (i=0; i<node_msg->record_count; i++) {
347
350
node_ptr = &(node_msg->node_array[i]);
348
/* in each node_ptr we overload the threads var
349
* with the number of cnodes in the used_cpus var
350
* will be used to tell how many cnodes are
351
* allocated and the cores will represent the cnodes
351
/* In each node_ptr we overload the threads var
352
* with the number of cnodes in drained state, the
353
* sockets var with the nodes in draining state, and
354
* the used_cpus var will be used to tell how many cnodes are
355
* allocated. The cores will also represent the cnodes
352
356
* in an error state. So we can get an idle count by
353
* subtracting those 2 numbers from the total possible
357
* subtracting those 3 numbers from the total possible
354
358
* cnodes (which are the idle cnodes).
356
360
node_ptr->threads = node_scaling;
357
361
node_ptr->cores = 0;
362
node_ptr->sockets = 0;
358
363
node_ptr->used_cpus = 0;
364
if((node_ptr->node_state & NODE_STATE_BASE) == NODE_STATE_DOWN)
367
if(node_ptr->node_state & NODE_STATE_DRAIN) {
368
if(node_ptr->node_state & NODE_STATE_FAIL) {
369
node_ptr->node_state &= ~NODE_STATE_DRAIN;
370
node_ptr->node_state &= ~NODE_STATE_FAIL;
372
node_ptr->cores += node_scaling;
375
node_ptr->node_state |= NODE_STATE_IDLE;
361
378
for (i=0; i<node_select_msg->record_count; i++) {
362
379
bg_info_record = &(node_select_msg->bg_info_array[i]);
364
381
/* this block is idle we won't mark it */
365
if (bg_info_record->state != RM_PARTITION_ERROR
366
&& !strcmp(slurm_user, bg_info_record->owner_name))
382
if (bg_info_record->job_running == NO_JOB_RUNNING)
368
385
_update_nodes_for_bg(node_scaling, node_msg, bg_info_record);
373
389
/* by default every partition is shown, even if no nodes */
402
418
if (params.filtering && _filter_out(node_ptr))
421
if((node_ptr->node_state & NODE_STATE_DRAIN)
422
&& (node_ptr->node_state & NODE_STATE_FAIL)) {
423
node_ptr->node_state &= ~NODE_STATE_DRAIN;
424
node_ptr->node_state &= ~NODE_STATE_FAIL;
428
node_ptr->threads = node_scaling;
408
432
case SINFO_BG_IDLE_STATE:
449
475
node_ptr->threads =
450
476
node_ptr->used_cpus;
478
case SINFO_BG_DRAINING_STATE:
479
/* get the drained node count */
480
if(!node_ptr->sockets)
482
node_ptr->node_state =
483
NODE_STATE_ALLOCATED;
484
node_ptr->node_state |=
486
node_ptr->threads = node_ptr->sockets;
452
488
case SINFO_BG_ERROR_STATE:
453
489
/* get the error node count */
454
490
if(!node_ptr->cores)
456
493
node_ptr->node_state &=
457
494
NODE_STATE_FLAGS;
458
495
node_ptr->node_state |=
530
572
iterator = list_iterator_create(params.state_list);
531
573
while ((node_state = list_next(iterator))) {
532
if (*node_state & NODE_STATE_FLAGS) {
575
(NODE_STATE_DRAIN | NODE_STATE_ALLOCATED)) {
576
/* We search for anything that gets mapped to
577
* DRAINING in node_state_string */
578
if (!(node_ptr->node_state & NODE_STATE_DRAIN))
580
if (((node_ptr->node_state & NODE_STATE_BASE) ==
581
NODE_STATE_ALLOCATED) ||
582
(node_ptr->node_state &
583
NODE_STATE_COMPLETING)) {
587
} else if (*node_state ==
588
(NODE_STATE_DRAIN | NODE_STATE_IDLE)) {
589
/* We search for anything that gets mapped to
590
* DRAINED in node_state_string */
591
if (!(node_ptr->node_state & NODE_STATE_DRAIN))
593
if (((node_ptr->node_state & NODE_STATE_BASE) !=
594
NODE_STATE_ALLOCATED) &&
595
(!(node_ptr->node_state &
596
NODE_STATE_COMPLETING))) {
600
} else if (*node_state & NODE_STATE_FLAGS) {
533
601
if (*node_state & node_ptr->node_state) {
650
718
(part_ptr->max_nodes != sinfo_ptr->part_info->max_nodes))
721
if (params.match_flags.default_time_flag &&
722
(part_ptr->default_time != sinfo_ptr->part_info->default_time))
653
725
if (params.match_flags.max_time_flag &&
654
726
(part_ptr->max_time != sinfo_ptr->part_info->max_time))
781
853
bg_info_record_t *bg_info_record)
783
855
node_info_t *node_ptr = NULL;
785
char *node_name = NULL;
787
858
/* we are using less than one node */
788
859
if(bg_info_record->conn_type == SELECT_SMALL)
789
860
node_scaling = bg_info_record->node_cnt;
791
hl = hostlist_create(bg_info_record->nodes);
795
node_name = hostlist_shift(hl);
798
node_ptr = _find_node(node_name, node_msg);
801
/* cores is overloaded to be the cnodes in an error
802
* state and used_cpus is overloaded to be the nodes in
803
* use. No block should be sent in here if it isn't
804
* in use (that doesn't mean in a free state, it means
805
* the user isn't slurm or the block is in an error state.
807
if(bg_info_record->state == RM_PARTITION_ERROR)
808
node_ptr->cores += node_scaling;
810
node_ptr->used_cpus += node_scaling;
863
while(bg_info_record->bp_inx[j] >= 0) {
865
for(i2 = bg_info_record->bp_inx[j];
866
i2 <= bg_info_record->bp_inx[j+1];
868
node_ptr = &(node_msg->node_array[i2]);
869
/* cores is overloaded to be the
870
* cnodes in an error state and
871
* used_cpus is overloaded to be the nodes in
872
* use. No block should be sent in
873
* here if it isn't in use (that
874
* doesn't mean in a free state, it means
875
* the user isn't slurm or the block
876
* is in an error state.
878
if(((node_ptr->node_state & NODE_STATE_BASE)
880
|| (node_ptr->node_state & NODE_STATE_DRAIN)) {
881
if(bg_info_record->job_running
883
node_ptr->sockets += node_scaling;
884
node_ptr->cores -= node_scaling;
889
if(bg_info_record->state == RM_PARTITION_ERROR) {
890
node_ptr->cores += node_scaling;
895
} else if(bg_info_record->job_running
897
node_ptr->used_cpus += node_scaling;
899
error("Hey we didn't get anything here");
812
hostlist_destroy(hl);