1
1
/*****************************************************************************\
2
2
* select_bluegene.c - node selection plugin for Blue Gene system.
4
* $Id: select_bluegene.c 17175 2009-04-07 17:24:20Z da $
5
3
*****************************************************************************
6
* Copyright (C) 2004-2006 The Regents of the University of California.
4
* Copyright (C) 2004-2007 The Regents of the University of California.
5
* Copyright (C) 2008-2009 Lawrence Livermore National Security.
7
6
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
8
7
* Written by Dan Phung <phung4@llnl.gov> Danny Auble <da@llnl.gov>
8
* CODE-OCEC-09-009. All rights reserved.
11
10
* This file is part of SLURM, a resource management program.
12
* For details, see <http://www.llnl.gov/linux/slurm/>.
11
* For details, see <https://computing.llnl.gov/linux/slurm/>.
12
* Please also read the included file: DISCLAIMER.
14
14
* SLURM is free software; you can redistribute it and/or modify it under
15
15
* the terms of the GNU General Public License as published by the Free
727
694
FREE_NULL_BITMAP(ionode_bitmap);
730
node_name = xstrdup_printf("%s%s", bg_slurm_node_prefix, coord);
697
node_name = xstrdup_printf("%s%s", bg_conf->slurm_node_prefix, coord);
731
698
/* find out how many nodecards to get for each ionode */
732
699
if(!part_desc_ptr->state_up) {
733
700
info("Admin setting %s[%s] in an error state",
734
701
node_name, ionodes);
735
for(i = 0; i<bluegene_numpsets; i++) {
702
for(i = 0; i<bg_conf->numpsets; i++) {
736
703
if(bit_test(ionode_bitmap, i)) {
737
704
if((int)nc_pos != (int)last_pos) {
738
down_nodecard(node_name, i);
705
/* find first bit in nc */
707
(int)nc_pos * bg_conf->io_ratio;
708
down_nodecard(node_name, start_io);
739
709
last_pos = nc_pos;
742
nc_pos += bluegene_nc_ratio;
712
nc_pos += bg_conf->nc_ratio;
744
714
} else if(part_desc_ptr->state_up){
745
715
info("Admin setting %s[%s] in an free state",
762
extern int select_p_get_extra_jobinfo (struct node_record *node_ptr,
763
struct job_record *job_ptr,
764
enum select_data_info info,
767
if (info == SELECT_AVAIL_CPUS) {
768
/* Needed to track CPUs allocated to jobs on whole nodes
769
* for sched/wiki2 (Moab scheduler). Small block allocations
770
* handled through use of job_ptr->num_procs in slurmctld */
771
uint16_t *cpus_per_bp = (uint16_t *) data;
772
*cpus_per_bp = procs_per_node;
774
return SLURM_SUCCESS;
777
732
extern int select_p_get_info_from_plugin (enum select_data_info info,
733
struct job_record *job_ptr,
736
if (info == SELECT_STATIC_PART) {
737
uint16_t *tmp16 = (uint16_t *) data;
738
if (bg_conf->layout_mode == LAYOUT_STATIC)
744
return SLURM_SUCCESS;
747
extern int select_p_update_node_config (int index)
780
749
return SLURM_SUCCESS;
783
752
extern int select_p_update_node_state (int index, uint16_t state)
789
755
for (y = DIM_SIZE[Y] - 1; y >= 0; y--) {
790
756
for (z = 0; z < DIM_SIZE[Z]; z++) {
791
757
for (x = 0; x < DIM_SIZE[X]; x++) {
877
836
for (i=0; i<BA_SYSTEM_DIMENSIONS; i++)
878
837
job_desc->min_nodes *=
879
838
(uint16_t)req_geometry[i];
880
job_desc->min_nodes *= bluegene_bp_node_cnt;
839
job_desc->min_nodes *= bg_conf->bp_node_cnt;
881
840
job_desc->max_nodes = job_desc->min_nodes;
884
843
if(job_desc->num_procs != NO_VAL) {
844
job_desc->num_procs /= bg_conf->proc_ratio;
885
845
if(job_desc->min_nodes < job_desc->num_procs)
886
846
job_desc->min_nodes = job_desc->num_procs;
887
847
if(job_desc->max_nodes < job_desc->num_procs)
888
848
job_desc->max_nodes = job_desc->num_procs;
890
850
/* See if min_nodes is greater than one base partition */
891
if(job_desc->min_nodes > bluegene_bp_node_cnt) {
851
if(job_desc->min_nodes > bg_conf->bp_node_cnt) {
893
853
* if it is make sure it is a factor of
894
* bluegene_bp_node_cnt, if it isn't make it
854
* bg_conf->bp_node_cnt, if it isn't make it
897
tmp = job_desc->min_nodes % bluegene_bp_node_cnt;
857
tmp = job_desc->min_nodes % bg_conf->bp_node_cnt;
899
859
job_desc->min_nodes +=
900
(bluegene_bp_node_cnt-tmp);
860
(bg_conf->bp_node_cnt-tmp);
902
tmp = job_desc->min_nodes / bluegene_bp_node_cnt;
862
tmp = job_desc->min_nodes / bg_conf->bp_node_cnt;
904
864
/* this means it is greater or equal to one bp */
907
867
SELECT_DATA_NODE_CNT,
908
868
&job_desc->min_nodes);
909
869
job_desc->min_nodes = tmp;
910
job_desc->num_procs = procs_per_node * tmp;
870
job_desc->num_procs = bg_conf->procs_per_bp * tmp;
913
if(job_desc->min_nodes <= bluegene_nodecard_node_cnt
914
&& bluegene_nodecard_ionode_cnt)
873
if(job_desc->min_nodes <= bg_conf->nodecard_node_cnt
874
&& bg_conf->nodecard_ionode_cnt)
915
875
job_desc->min_nodes =
916
bluegene_nodecard_node_cnt;
876
bg_conf->nodecard_node_cnt;
917
877
else if(job_desc->min_nodes
918
<= bluegene_quarter_node_cnt)
878
<= bg_conf->quarter_node_cnt)
919
879
job_desc->min_nodes =
920
bluegene_quarter_node_cnt;
880
bg_conf->quarter_node_cnt;
922
882
job_desc->min_nodes =
923
bluegene_bp_node_cnt;
883
bg_conf->bp_node_cnt;
925
885
select_g_set_jobinfo(job_desc->select_jobinfo,
926
886
SELECT_DATA_NODE_CNT,
927
887
&job_desc->min_nodes);
929
tmp = bluegene_bp_node_cnt/job_desc->min_nodes;
889
tmp = bg_conf->bp_node_cnt/job_desc->min_nodes;
931
job_desc->num_procs = procs_per_node/tmp;
891
job_desc->num_procs = bg_conf->procs_per_bp/tmp;
932
892
job_desc->min_nodes = 1;
934
i = bluegene_smallest_block;
935
while(i <= bluegene_bp_node_cnt) {
894
i = bg_conf->smallest_block;
895
while(i <= bg_conf->bp_node_cnt) {
936
896
if(job_desc->min_nodes <= i) {
937
897
job_desc->min_nodes = i;
953
913
if(job_desc->max_nodes == (uint32_t) NO_VAL)
954
914
return SLURM_SUCCESS;
956
if(job_desc->max_nodes > bluegene_bp_node_cnt) {
957
tmp = job_desc->max_nodes % bluegene_bp_node_cnt;
916
if(job_desc->max_nodes > bg_conf->bp_node_cnt) {
917
tmp = job_desc->max_nodes % bg_conf->bp_node_cnt;
959
919
job_desc->max_nodes +=
960
(bluegene_bp_node_cnt-tmp);
920
(bg_conf->bp_node_cnt-tmp);
962
tmp = job_desc->max_nodes / bluegene_bp_node_cnt;
922
tmp = job_desc->max_nodes / bg_conf->bp_node_cnt;
964
924
job_desc->max_nodes = tmp;
968
if(job_desc->max_nodes <= bluegene_nodecard_node_cnt
969
&& bluegene_nodecard_ionode_cnt)
928
if(job_desc->max_nodes <= bg_conf->nodecard_node_cnt
929
&& bg_conf->nodecard_ionode_cnt)
970
930
job_desc->max_nodes =
971
bluegene_nodecard_node_cnt;
931
bg_conf->nodecard_node_cnt;
972
932
else if(job_desc->max_nodes
973
<= bluegene_quarter_node_cnt)
933
<= bg_conf->quarter_node_cnt)
974
934
job_desc->max_nodes =
975
bluegene_quarter_node_cnt;
935
bg_conf->quarter_node_cnt;
977
937
job_desc->max_nodes =
978
bluegene_bp_node_cnt;
938
bg_conf->bp_node_cnt;
980
tmp = bluegene_bp_node_cnt/job_desc->max_nodes;
981
tmp = procs_per_node/tmp;
940
tmp = bg_conf->bp_node_cnt/job_desc->max_nodes;
941
tmp = bg_conf->procs_per_bp/tmp;
983
943
select_g_set_jobinfo(job_desc->select_jobinfo,
984
944
SELECT_DATA_MAX_PROCS,
986
946
job_desc->max_nodes = 1;
988
i = bluegene_smallest_block;
989
while(i <= bluegene_bp_node_cnt) {
948
i = bg_conf->smallest_block;
949
while(i <= bg_conf->bp_node_cnt) {
990
950
if(job_desc->max_nodes <= i) {
991
951
job_desc->max_nodes = i;
1015
975
extern int select_p_reconfigure(void)
1017
return SLURM_SUCCESS;
1020
extern int select_p_step_begin(struct step_record *step_ptr)
1022
return SLURM_SUCCESS;
1025
extern int select_p_step_fini(struct step_record *step_ptr)
1027
return SLURM_SUCCESS;
978
if(!slurmctld_conf.slurm_user_name
979
|| strcmp(bg_conf->slurm_user_name, slurmctld_conf.slurm_user_name))
980
error("The slurm user has changed from '%s' to '%s'. "
981
"If this is really what you "
982
"want you will need to restart slurm for this "
983
"change to be enforced in the bluegene plugin.",
984
bg_conf->slurm_user_name, slurmctld_conf.slurm_user_name);
985
if(!slurmctld_conf.node_prefix
986
|| strcmp(bg_conf->slurm_node_prefix, slurmctld_conf.node_prefix))
987
error("Node Prefix has changed from '%s' to '%s'. "
988
"If this is really what you "
989
"want you will need to restart slurm for this "
990
"change to be enforced in the bluegene plugin.",
991
bg_conf->slurm_node_prefix, slurmctld_conf.node_prefix);
994
return SLURM_SUCCESS;
997
extern List select_p_get_config(void)
999
config_key_pair_t *key_pair;
1000
List my_list = list_create(destroy_config_key_pair);
1003
fatal("malloc failure on list_create");
1005
key_pair = xmalloc(sizeof(config_key_pair_t));
1006
key_pair->name = xstrdup("BasePartitionNodeCnt");
1007
key_pair->value = xstrdup_printf("%u", bg_conf->bp_node_cnt);
1008
list_append(my_list, key_pair);
1010
key_pair = xmalloc(sizeof(config_key_pair_t));
1011
key_pair->name = xstrdup("NodeCPUCnt");
1012
key_pair->value = xstrdup_printf("%u", bg_conf->proc_ratio);
1013
list_append(my_list, key_pair);
1017
key_pair = xmalloc(sizeof(config_key_pair_t));
1018
key_pair->name = xstrdup("BlrtsImage");
1019
key_pair->value = xstrdup(bg_conf->default_blrtsimage);
1020
list_append(my_list, key_pair);
1022
key_pair = xmalloc(sizeof(config_key_pair_t));
1023
key_pair->name = xstrdup("LinuxImage");
1024
key_pair->value = xstrdup(bg_conf->default_linuximage);
1025
list_append(my_list, key_pair);
1027
key_pair = xmalloc(sizeof(config_key_pair_t));
1028
key_pair->name = xstrdup("RamDiskImage");
1029
key_pair->value = xstrdup(bg_conf->default_ramdiskimage);
1030
list_append(my_list, key_pair);
1032
key_pair = xmalloc(sizeof(config_key_pair_t));
1033
key_pair->name = xstrdup("CnloadImage");
1034
key_pair->value = xstrdup(bg_conf->default_linuximage);
1035
list_append(my_list, key_pair);
1037
key_pair = xmalloc(sizeof(config_key_pair_t));
1038
key_pair->name = xstrdup("IoloadImage");
1039
key_pair->value = xstrdup(bg_conf->default_ramdiskimage);
1040
list_append(my_list, key_pair);
1043
key_pair = xmalloc(sizeof(config_key_pair_t));
1044
key_pair->name = xstrdup("BridgeAPILogFile");
1045
key_pair->value = xstrdup(bg_conf->bridge_api_file);
1046
list_append(my_list, key_pair);
1048
key_pair = xmalloc(sizeof(config_key_pair_t));
1049
key_pair->name = xstrdup("BridgeAPIVerbose");
1050
key_pair->value = xstrdup_printf("%u", bg_conf->bridge_api_verb);
1051
list_append(my_list, key_pair);
1053
if(bg_conf->deny_pass) {
1054
key_pair = xmalloc(sizeof(config_key_pair_t));
1055
key_pair->name = xstrdup("DenyPassThrough");
1056
if(bg_conf->deny_pass & PASS_DENY_X)
1057
xstrcat(key_pair->value, "X,");
1058
if(bg_conf->deny_pass & PASS_DENY_Y)
1059
xstrcat(key_pair->value, "Y,");
1060
if(bg_conf->deny_pass & PASS_DENY_Z)
1061
xstrcat(key_pair->value, "Z,");
1063
key_pair->value[strlen(key_pair->value)-1] = '\0';
1064
list_append(my_list, key_pair);
1067
key_pair = xmalloc(sizeof(config_key_pair_t));
1068
key_pair->name = xstrdup("LayoutMode");
1069
switch(bg_conf->layout_mode) {
1071
key_pair->value = xstrdup("Static");
1073
case LAYOUT_OVERLAP:
1074
key_pair->value = xstrdup("Overlap");
1076
case LAYOUT_DYNAMIC:
1077
key_pair->value = xstrdup("Dynamic");
1080
key_pair->value = xstrdup("Unknown");
1083
list_append(my_list, key_pair);
1085
key_pair = xmalloc(sizeof(config_key_pair_t));
1086
key_pair->name = xstrdup("MloaderImage");
1087
key_pair->value = xstrdup(bg_conf->default_mloaderimage);
1088
list_append(my_list, key_pair);
1090
key_pair = xmalloc(sizeof(config_key_pair_t));
1091
key_pair->name = xstrdup("NodeCardNodeCnt");
1092
key_pair->value = xstrdup_printf("%u", bg_conf->nodecard_node_cnt);
1093
list_append(my_list, key_pair);
1095
key_pair = xmalloc(sizeof(config_key_pair_t));
1096
key_pair->name = xstrdup("Numpsets");
1097
key_pair->value = xstrdup_printf("%u", bg_conf->numpsets);
1098
list_append(my_list, key_pair);
1100
list_sort(my_list, (ListCmpF) sort_key_pairs);