~ubuntu-branches/ubuntu/vivid/slurm-llnl/vivid

« back to all changes in this revision

Viewing changes to src/plugins/select/bluegene/plugin/bg_job_place.c

  • Committer: Bazaar Package Importer
  • Author(s): Gennaro Oliva
  • Date: 2009-09-24 23:28:15 UTC
  • mfrom: (1.1.11 upstream) (3.2.4 sid)
  • Revision ID: james.westby@ubuntu.com-20090924232815-enh65jn32q1ebg07
Tags: 2.0.5-1
* New upstream release 
* Changed dependecy from lib-mysqlclient15 to lib-mysqlclient 
* Added Default-Start for runlevel 2 and 4 and $remote_fs requirement in
  init.d scripts (Closes: #541252)
* Postinst checks for wrong runlevels 2 and 4 links
* Upgraded to standard version 3.8.3
* Add lintian overrides for missing slurm-llnl-configurator.html in doc
  base registration
* modified postrm scripts to ignore pkill return value in order to avoid
  postrm failure when no slurm process is running
* Checking for slurmctld.pid before cancelling running and pending
  jobs during package removal 

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
1
/*****************************************************************************\
2
2
 *  bg_job_place.c - blue gene job placement (e.g. base block selection)
3
3
 *  functions.
4
 
 *
5
 
 *  $Id: bg_job_place.c 17205 2009-04-09 17:24:11Z da $ 
6
4
 *****************************************************************************
7
5
 *  Copyright (C) 2004-2007 The Regents of the University of California.
 
6
 *  Copyright (C) 2008 Lawrence Livermore National Security.
8
7
 *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
9
8
 *  Written by Dan Phung <phung4@llnl.gov> and Morris Jette <jette1@llnl.gov>
10
9
 *  
11
10
 *  This file is part of SLURM, a resource management program.
12
 
 *  For details, see <http://www.llnl.gov/linux/slurm/>.
 
11
 *  For details, see <https://computing.llnl.gov/linux/slurm/>.
 
12
 *  Please also read the included file: DISCLAIMER.
13
13
 *  
14
14
 *  SLURM is free software; you can redistribute it and/or modify it under
15
15
 *  the terms of the GNU General Public License as published by the Free
46
46
#include "bluegene.h"
47
47
#include "dynamic_block.h"
48
48
 
49
 
#ifdef HAVE_BG 
50
 
 
51
49
#define _DEBUG 0
52
50
#define MAX_GROUPS 128
53
51
 
63
61
pthread_mutex_t job_list_test_mutex = PTHREAD_MUTEX_INITIALIZER;
64
62
 
65
63
/* This list is for the test_job_list function because we will be
66
 
 * adding and removing blocks off the bg_job_block_list and don't want
67
 
 * to ruin that list in submit_job it should = bg_job_block_list
 
64
 * adding and removing blocks off the bg_lists->job_running and don't want
 
65
 * to ruin that list in submit_job it should = bg_lists->job_running
68
66
 * otherwise it should be a copy of that list.
69
67
 */
70
68
List job_block_test_list = NULL;
99
97
        bool test_only);
100
98
static int _dynamically_request(List block_list, int *blocks_added,
101
99
                                ba_request_t *request,
102
 
                                bitstr_t* slurm_block_bitmap,
103
 
                                char *user_req_nodes);
 
100
                                char *user_req_nodes,
 
101
                                bool test_only);
104
102
static int _find_best_block_match(List block_list, int *blocks_added,
105
103
                                  struct job_record* job_ptr,
106
104
                                  bitstr_t* slurm_block_bitmap,
284
282
                             SELECT_DATA_BLRTS_IMAGE, blrtsimage);
285
283
        
286
284
        if (*blrtsimage) {
287
 
                allow = _test_image_perms(*blrtsimage, bg_blrtsimage_list, 
 
285
                allow = _test_image_perms(*blrtsimage, bg_conf->blrts_list, 
288
286
                                          job_ptr);
289
287
                if (!allow) {
290
288
                        error("User %u:%u is not allowed to use BlrtsImage %s",
298
296
        select_g_get_jobinfo(job_ptr->select_jobinfo,
299
297
                             SELECT_DATA_LINUX_IMAGE, linuximage);
300
298
        if (*linuximage) {
301
 
                allow = _test_image_perms(*linuximage, bg_linuximage_list, 
 
299
                allow = _test_image_perms(*linuximage, bg_conf->linux_list, 
302
300
                                          job_ptr);
303
301
                if (!allow) {
304
302
                        error("User %u:%u is not allowed to use LinuxImage %s",
310
308
        select_g_get_jobinfo(job_ptr->select_jobinfo,
311
309
                             SELECT_DATA_MLOADER_IMAGE, mloaderimage);
312
310
        if (*mloaderimage) {
313
 
                allow = _test_image_perms(*mloaderimage, bg_mloaderimage_list, 
 
311
                allow = _test_image_perms(*mloaderimage,
 
312
                                          bg_conf->mloader_list, 
314
313
                                          job_ptr);
315
314
                if(!allow) {
316
315
                        error("User %u:%u is not allowed "
324
323
        select_g_get_jobinfo(job_ptr->select_jobinfo,
325
324
                             SELECT_DATA_RAMDISK_IMAGE, ramdiskimage);
326
325
        if (*ramdiskimage) {
327
 
                allow = _test_image_perms(*ramdiskimage, bg_ramdiskimage_list, 
 
326
                allow = _test_image_perms(*ramdiskimage,
 
327
                                          bg_conf->ramdisk_list, 
328
328
                                          job_ptr);
329
329
                if(!allow) {
330
330
                        error("User %u:%u is not allowed "
372
372
                        continue;
373
373
                } else if((bg_record->job_running != NO_JOB_RUNNING) 
374
374
                          && (bg_record->job_running != job_ptr->job_id)
375
 
                          && (bluegene_layout_mode == LAYOUT_DYNAMIC 
 
375
                          && (bg_conf->layout_mode == LAYOUT_DYNAMIC 
376
376
                              || (!test_only 
377
 
                                  && bluegene_layout_mode != LAYOUT_DYNAMIC))) {
 
377
                                  && bg_conf->layout_mode != LAYOUT_DYNAMIC))) {
378
378
                        debug("block %s in use by %s job %d", 
379
379
                              bg_record->bg_block_id,
380
380
                              bg_record->user_name,
478
478
                                        continue;                       
479
479
                                } 
480
480
                                goto good_conn_type;
481
 
                        } 
 
481
                        } else if(bg_record->conn_type >= SELECT_SMALL) {
 
482
                                /* since we already checked to see if
 
483
                                   the cpus were good this means we are
 
484
                                   looking for a block in a range that
 
485
                                   includes small and regular blocks.
 
486
                                   So we can just continue on.
 
487
                                */
 
488
                                goto good_conn_type;                            
 
489
                        }
 
490
                        
482
491
#endif
483
492
                        debug("bg block %s conn-type not usable asking for %s "
484
493
                              "bg_record is %s", 
537
546
        int overlap = 0;
538
547
 
539
548
         /* this test only is for actually picking a block not testing */
540
 
        if(test_only && bluegene_layout_mode == LAYOUT_DYNAMIC)
 
549
        if(test_only && bg_conf->layout_mode == LAYOUT_DYNAMIC)
541
550
                return rc;
542
551
 
543
552
        /* Make sure no other blocks are under this block 
604
613
                         * overlapping that we could avoid freeing if
605
614
                         * we choose something else
606
615
                         */
607
 
                        if(bluegene_layout_mode == LAYOUT_OVERLAP
 
616
                        if(bg_conf->layout_mode == LAYOUT_OVERLAP
608
617
                           && ((overlap_check == 0 && bg_record->state 
609
618
                                != RM_PARTITION_READY)
610
619
                               || (overlap_check == 1 && found_record->state 
636
645
                                              found_record->job_running,
637
646
                                              found_record->bg_block_id);
638
647
                                
639
 
                                if(bluegene_layout_mode == LAYOUT_DYNAMIC) {
 
648
                                if(bg_conf->layout_mode == LAYOUT_DYNAMIC) {
 
649
                                        List temp_list = list_create(NULL);
640
650
                                        /* this will remove and
641
651
                                         * destroy the memory for
642
652
                                         * bg_record
649
659
                                                found_record =
650
660
                                                        bg_record->original;
651
661
                                                remove_from_bg_list(
652
 
                                                        bg_list, found_record);
 
662
                                                        bg_lists->main,
 
663
                                                        found_record);
653
664
                                        } else {
654
665
                                                debug("looking for original");
655
666
                                                found_record =
656
667
                                                        find_and_remove_org_from_bg_list(
657
 
                                                                bg_list,
 
668
                                                                bg_lists->main,
658
669
                                                                bg_record);
659
670
                                        }
660
 
                                        destroy_bg_record(bg_record);
 
671
 
 
672
                                        debug("Removing unusable block %s "
 
673
                                              "from the system.",
 
674
                                              bg_record->bg_block_id);
 
675
                                        
661
676
                                        if(!found_record) {
662
 
                                                /* There may be a bug
663
 
                                                   here where on a real
664
 
                                                   system we don't go
665
 
                                                   destroy this block
666
 
                                                   in the real system.
667
 
                                                   If that is the case we
668
 
                                                   need to add the
669
 
                                                   bg_record to the
670
 
                                                   free_block_list
671
 
                                                   instead of destroying
672
 
                                                   it like above.
673
 
                                                */ 
674
 
                                                debug("This record wasn't "
675
 
                                                      "found in the bg_list, "
 
677
                                                debug("This record %s wasn't "
 
678
                                                      "found in the "
 
679
                                                      "bg_lists->main, "
676
680
                                                      "no big deal, it "
677
 
                                                      "probably wasn't added");
678
 
                                                //rc = SLURM_ERROR;
679
 
                                        } else {
680
 
                                                debug("removing the block "
681
 
                                                      "from the system");
682
 
                                                List temp_list =
683
 
                                                        list_create(NULL);
684
 
                                                list_push(temp_list, 
685
 
                                                          found_record);
686
 
                                                num_block_to_free++;
687
 
                                                free_block_list(temp_list);
688
 
                                                list_destroy(temp_list);
689
 
                                        }
 
681
                                                      "probably wasn't added",
 
682
                                                      bg_record->bg_block_id);
 
683
                                                found_record = bg_record;
 
684
                                        } else
 
685
                                                destroy_bg_record(bg_record);
 
686
                                        
 
687
                                        list_push(temp_list, found_record);
 
688
                                        free_block_list(temp_list);
 
689
                                        list_destroy(temp_list);
 
690
                                        
690
691
                                        slurm_mutex_unlock(&block_state_mutex);
691
692
                                } 
692
693
                                rc = 1;
708
709
 
709
710
static int _dynamically_request(List block_list, int *blocks_added,
710
711
                                ba_request_t *request,
711
 
                                bitstr_t* slurm_block_bitmap,
712
 
                                char *user_req_nodes)
 
712
                                char *user_req_nodes,
 
713
                                bool test_only)
713
714
{
714
715
        List list_of_lists = NULL;
715
716
        List temp_list = NULL;
727
728
                list_append(list_of_lists, job_block_test_list);
728
729
        else {
729
730
                list_append(list_of_lists, block_list);
730
 
                if(job_block_test_list == bg_job_block_list &&
731
 
                   list_count(block_list) != list_count(bg_booted_block_list)) {
732
 
                        list_append(list_of_lists, bg_booted_block_list);
733
 
                        if(list_count(bg_booted_block_list) 
 
731
                if(job_block_test_list == bg_lists->job_running &&
 
732
                   list_count(block_list) != list_count(bg_lists->booted)) {
 
733
                        list_append(list_of_lists, bg_lists->booted);
 
734
                        if(list_count(bg_lists->booted) 
734
735
                           != list_count(job_block_test_list)) 
735
736
                                list_append(list_of_lists, job_block_test_list);
736
737
                } else if(list_count(block_list) 
756
757
                        while((bg_record = list_pop(new_blocks))) {
757
758
                                if(block_exist_in_list(block_list, bg_record))
758
759
                                        destroy_bg_record(bg_record);
759
 
                                else {
 
760
                                else if(test_only) {
 
761
                                        list_append(block_list, bg_record);
 
762
                                        (*blocks_added) = 1;
 
763
                                } else {
760
764
                                        if(job_block_test_list 
761
 
                                           == bg_job_block_list) {
 
765
                                           == bg_lists->job_running) {
762
766
                                                if(configure_block(bg_record)
763
767
                                                   == SLURM_ERROR) {
764
768
                                                        destroy_bg_record(
775
779
                                        list_append(block_list, bg_record);
776
780
                                        print_bg_record(bg_record);
777
781
                                        (*blocks_added) = 1;
778
 
                                }
 
782
                                }       
779
783
                        }
780
784
                        list_destroy(new_blocks);
781
785
                        if(!*blocks_added) {
782
 
                                memcpy(request->geometry, start_geo,      
 
786
                                memcpy(request->geometry, start_geo,
783
787
                                       sizeof(int)*BA_SYSTEM_DIMENSIONS); 
784
788
                                rc = SLURM_ERROR;
785
789
                                continue;
786
790
                        }
787
791
                        list_sort(block_list,
788
792
                                  (ListCmpF)_bg_record_sort_aval_dec);
789
 
        
 
793
                        
790
794
                        rc = SLURM_SUCCESS;
791
795
                        break;
792
796
                } else if (errno == ESLURM_INTERCONNECT_FAILURE) {
848
852
 
849
853
        if(!total_cpus)
850
854
                total_cpus = DIM_SIZE[X] * DIM_SIZE[Y] * DIM_SIZE[Z] 
851
 
                        * procs_per_node;
 
855
                        * bg_conf->procs_per_bp;
852
856
 
853
857
        if(req_nodes > max_nodes) {
854
858
                error("can't run this job max bps is %u asking for %u",
908
912
                }
909
913
                if(!req_nodes)
910
914
                        req_nodes = min_nodes;
911
 
        }
912
 
        if (target_size == 0) { /* no geometry specified */
913
 
                if(job_ptr->details->req_nodes 
914
 
                   && !start_req) {
915
 
                        bg_record_t *tmp_record = NULL;
916
 
                        char *tmp_nodes= job_ptr->details->req_nodes;
917
 
                        int len = strlen(tmp_nodes);
918
 
                        
919
 
                        i = 0;
920
 
                        while(i<len 
921
 
                              && tmp_nodes[i] != '[' 
922
 
                              && (tmp_nodes[i] < '0' || tmp_nodes[i] > 'Z'
923
 
                                  || (tmp_nodes[i] > '9'
924
 
                                      && tmp_nodes[i] < 'A')))
925
 
                                i++;
926
 
                        
927
 
                        if(i<len) {
928
 
                                len -= i;
929
 
                                tmp_record = xmalloc(sizeof(bg_record_t));
930
 
                                tmp_record->bg_block_list =
931
 
                                        list_create(destroy_ba_node);
932
 
                                
933
 
                                len += strlen(bg_slurm_node_prefix)+1;
934
 
                                tmp_record->nodes = xmalloc(len);
935
 
                                
936
 
                                snprintf(tmp_record->nodes,
937
 
                                         len,
938
 
                                         "%s%s", 
939
 
                                         bg_slurm_node_prefix, 
940
 
                                         tmp_nodes+i);
941
 
                                
942
 
                        
943
 
                                process_nodes(tmp_record, false);
944
 
                                for(i=0; i<BA_SYSTEM_DIMENSIONS; i++) {
945
 
                                        req_geometry[i] = tmp_record->geo[i];
946
 
                                        start[i] = tmp_record->start[i];
947
 
                                }
948
 
                                destroy_bg_record(tmp_record);
949
 
                                select_g_set_jobinfo(job_ptr->select_jobinfo,
950
 
                                                     SELECT_DATA_GEOMETRY, 
951
 
                                                     &req_geometry);
952
 
                                select_g_set_jobinfo(job_ptr->select_jobinfo,
953
 
                                                     SELECT_DATA_START, 
954
 
                                                     &start);
955
 
                                start_req = 1;
956
 
                        }  else 
957
 
                                error("BPs=%s is in a weird format", 
958
 
                                      tmp_nodes); 
959
 
                } else {
960
 
                        req_geometry[X] = (uint16_t)NO_VAL;
961
 
                }
 
915
        } else {
 
916
                req_geometry[X] = (uint16_t)NO_VAL;
962
917
                target_size = min_nodes;
963
918
        }
964
919
        
1000
955
         *  need to set a max_procs if given
1001
956
         */
1002
957
        if(max_procs == (uint32_t)NO_VAL) 
1003
 
                max_procs = max_nodes * procs_per_node;
 
958
                max_procs = max_nodes * bg_conf->procs_per_bp;
1004
959
        
1005
960
        while(1) {
1006
961
                /* Here we are creating a list of all the blocks that
1008
963
                 * works we will have can look and see the earliest
1009
964
                 * the job can start.  This doesn't apply to Dynamic mode.
1010
965
                 */ 
1011
 
                if(test_only && bluegene_layout_mode != LAYOUT_DYNAMIC) 
 
966
                if(test_only && bg_conf->layout_mode != LAYOUT_DYNAMIC) 
1012
967
                        overlapped_list = list_create(NULL);
1013
968
                
1014
969
                bg_record = _find_matching_block(block_list, 
1021
976
                                                 overlapped_list,
1022
977
                                                 test_only);
1023
978
                if(!bg_record && test_only
1024
 
                   && bluegene_layout_mode != LAYOUT_DYNAMIC
 
979
                   && bg_conf->layout_mode != LAYOUT_DYNAMIC
1025
980
                   && list_count(overlapped_list)) {
1026
981
                        ListIterator itr =
1027
982
                                list_iterator_create(overlapped_list);
1035
990
                        list_iterator_destroy(itr);
1036
991
                }
1037
992
                
1038
 
                if(test_only && bluegene_layout_mode != LAYOUT_DYNAMIC)
 
993
                if(test_only && bg_conf->layout_mode != LAYOUT_DYNAMIC)
1039
994
                        list_destroy(overlapped_list);
1040
995
 
1041
996
                /* set the bitmap and do other allocation activities */
1080
1035
 
1081
1036
                /* all these assume that the *bg_record is NULL */
1082
1037
 
1083
 
                if(bluegene_layout_mode == LAYOUT_OVERLAP
 
1038
                if(bg_conf->layout_mode == LAYOUT_OVERLAP
1084
1039
                   && !test_only && overlap_check < 2) {
1085
1040
                        overlap_check++;
1086
1041
                        continue;
1087
1042
                }
1088
1043
                
1089
 
                if(create_try || bluegene_layout_mode != LAYOUT_DYNAMIC)
 
1044
                if(create_try || bg_conf->layout_mode != LAYOUT_DYNAMIC)
1090
1045
                        goto no_match;
1091
1046
                
1092
1047
                if((rc = _dynamically_request(block_list, blocks_added,
1093
1048
                                              &request, 
1094
 
                                              slurm_block_bitmap, 
1095
 
                                              job_ptr->details->req_nodes))
 
1049
                                              job_ptr->details->req_nodes,
 
1050
                                              test_only))
1096
1051
                   == SLURM_SUCCESS) {
1097
1052
                        create_try = 1;
1098
1053
                        continue;
1104
1059
                        List job_list = NULL;
1105
1060
                        debug("trying with empty machine");
1106
1061
                        slurm_mutex_lock(&block_state_mutex);
1107
 
                        if(job_block_test_list == bg_job_block_list) 
 
1062
                        if(job_block_test_list == bg_lists->job_running) 
1108
1063
                                job_list = copy_bg_list(job_block_test_list);
1109
1064
                        else
1110
1065
                                job_list = job_block_test_list;
1134
1089
                                                debug2("taking off (%s) "
1135
1090
                                                       "which is in an error "
1136
1091
                                                       "state",
1137
 
                                                       bg_record->job_running,
1138
 
                                                       bg_record->bg_block_id,
1139
 
                                                       bg_record->job_ptr->
1140
 
                                                       start_time,
1141
 
                                                       bg_record->job_ptr->
1142
 
                                                       end_time);
 
1092
                                                       bg_record->bg_block_id);
1143
1093
                                } else 
1144
1094
                                        /* This means we didn't have
1145
1095
                                           any jobs to take off
1154
1104
                                        destroy_bg_record(bg_record);
1155
1105
                                        if(errno == ESLURM_INTERCONNECT_FAILURE
1156
1106
                                           || !list_count(job_list)) {
1157
 
                                                error("this job will never "
1158
 
                                                      "run on this system");
 
1107
                                                char *nodes;
 
1108
                                                if (slurmctld_conf.
 
1109
                                                    slurmctld_debug < 5)
 
1110
                                                        break;
 
1111
                                                nodes = bitmap2node_name(
 
1112
                                                        slurm_block_bitmap);
 
1113
                                                debug("job %u not "
 
1114
                                                      "runable on %s",
 
1115
                                                      job_ptr->job_id,
 
1116
                                                      nodes);
 
1117
                                                xfree(nodes);
1159
1118
                                                break;
1160
1119
                                        }
1161
1120
                                        continue;
1186
1145
                                        destroy_bg_record(bg_record);
1187
1146
                                }
1188
1147
                                        
1189
 
                                if(job_block_test_list != bg_job_block_list) {
 
1148
                                if(job_block_test_list 
 
1149
                                   != bg_lists->job_running) {
1190
1150
                                        list_append(block_list,
1191
1151
                                                    (*found_bg_record));
1192
1152
                                        while((bg_record = 
1208
1168
                                break;
1209
1169
                        }
1210
1170
 
1211
 
                        if(job_block_test_list == bg_job_block_list) 
 
1171
                        if(job_block_test_list == bg_lists->job_running) 
1212
1172
                                list_destroy(job_list);
1213
1173
 
1214
1174
                        goto end_it;
1244
1204
        itr = list_iterator_create(full_list);
1245
1205
        itr2 = list_iterator_create(incomp_list);
1246
1206
        while((new_record = list_next(itr))) {
 
1207
                /* Make sure we aren't adding any block that doesn't
 
1208
                   have a block_id.
 
1209
                */
 
1210
                if(!new_record->bg_block_id)
 
1211
                        continue;
1247
1212
                while((bg_record = list_next(itr2))) {
1248
1213
                        if(bit_equal(bg_record->bitmap, new_record->bitmap)
1249
1214
                           && bit_equal(bg_record->ionode_bitmap,
1252
1217
                } 
1253
1218
 
1254
1219
                if(!bg_record) {
1255
 
                        bg_record = xmalloc(sizeof(bg_record_t));
1256
 
                        copy_bg_record(new_record, bg_record);
1257
 
                        debug4("adding %s", bg_record->bg_block_id);
1258
 
                        list_append(incomp_list, bg_record);
 
1220
                        list_remove(itr);
 
1221
                        debug4("adding %s", new_record->bg_block_id);
 
1222
                        list_append(incomp_list, new_record);
1259
1223
                        count++;
1260
1224
                } 
1261
1225
                list_iterator_reset(itr2);
1267
1231
        return count;
1268
1232
}
1269
1233
 
1270
 
#endif // HAVE_BG
 
1234
static void _build_select_struct(struct job_record *job_ptr, bitstr_t *bitmap)
 
1235
{
 
1236
        int i, j, k;
 
1237
        int first_bit, last_bit;
 
1238
        uint32_t node_cpus, total_cpus = 0, node_cnt;
 
1239
        select_job_res_t select_ptr;
 
1240
 
 
1241
        if (job_ptr->select_job) {
 
1242
                error("select_p_job_test: already have select_job");
 
1243
                free_select_job_res(&job_ptr->select_job);
 
1244
        }
 
1245
 
 
1246
 
 
1247
        node_cnt = bit_set_count(bitmap);
 
1248
        job_ptr->select_job = select_ptr = create_select_job_res();
 
1249
        select_ptr->cpu_array_reps = xmalloc(sizeof(uint32_t) * node_cnt);
 
1250
        select_ptr->cpu_array_value = xmalloc(sizeof(uint16_t) * node_cnt);
 
1251
        select_ptr->cpus = xmalloc(sizeof(uint16_t) * node_cnt);
 
1252
        select_ptr->cpus_used = xmalloc(sizeof(uint16_t) * node_cnt);
 
1253
        select_ptr->nhosts = node_cnt;
 
1254
        select_ptr->node_bitmap = bit_copy(bitmap);
 
1255
        if (select_ptr->node_bitmap == NULL)
 
1256
                fatal("bit_copy malloc failure");
 
1257
        select_ptr->nprocs = job_ptr->num_procs;
 
1258
        if (build_select_job_res(select_ptr, (void *)node_record_table_ptr, 1))
 
1259
                error("select_p_job_test: build_select_job_res: %m");
 
1260
 
 
1261
        if (job_ptr->num_procs <= bg_conf->procs_per_bp)
 
1262
                node_cpus = job_ptr->num_procs;
 
1263
        else
 
1264
                node_cpus = bg_conf->procs_per_bp;
 
1265
 
 
1266
        first_bit = bit_ffs(bitmap);
 
1267
        last_bit  = bit_fls(bitmap);
 
1268
        for (i=first_bit, j=0, k=-1; i<=last_bit; i++) {
 
1269
                if (!bit_test(bitmap, i))
 
1270
                        continue;
 
1271
 
 
1272
                select_ptr->cpus[j] = node_cpus;
 
1273
                if ((k == -1) || 
 
1274
                    (select_ptr->cpu_array_value[k] != node_cpus)) {
 
1275
                        select_ptr->cpu_array_cnt++;
 
1276
                        select_ptr->cpu_array_reps[++k] = 1;
 
1277
                        select_ptr->cpu_array_value[k] = node_cpus;
 
1278
                } else
 
1279
                        select_ptr->cpu_array_reps[k]++;
 
1280
                total_cpus += node_cpus;
 
1281
 
 
1282
                if (set_select_job_res_node(select_ptr, j))
 
1283
                        error("select_p_job_test: set_select_job_res_node: %m");
 
1284
                j++;
 
1285
        }
 
1286
        if (select_ptr->nprocs != total_cpus) {
 
1287
                error("select_p_job_test: nprocs mismatch %u != %u",
 
1288
                      select_ptr->nprocs, total_cpus);
 
1289
        }
 
1290
}
1271
1291
 
1272
1292
/*
1273
1293
 * Try to find resources for a given job request
1286
1306
                      uint32_t req_nodes, int mode)
1287
1307
{
1288
1308
        int rc = SLURM_SUCCESS;
1289
 
#ifdef HAVE_BG
1290
1309
        bg_record_t* bg_record = NULL;
1291
1310
        char buf[100];
1292
1311
        uint16_t conn_type = (uint16_t)NO_VAL;
1302
1321
        else    
1303
1322
                return EINVAL;  /* something not yet supported */
1304
1323
 
1305
 
        if(bluegene_layout_mode == LAYOUT_DYNAMIC)
 
1324
        if(bg_conf->layout_mode == LAYOUT_DYNAMIC)
1306
1325
                slurm_mutex_lock(&create_dynamic_mutex);
1307
1326
 
1308
 
        job_block_test_list = bg_job_block_list;
 
1327
        job_block_test_list = bg_lists->job_running;
1309
1328
        
1310
1329
        select_g_get_jobinfo(job_ptr->select_jobinfo,
1311
1330
                             SELECT_DATA_CONN_TYPE, &conn_type);
1312
1331
        if(conn_type == SELECT_NAV) {
1313
1332
                uint32_t max_procs = (uint32_t)NO_VAL;
1314
 
                if(bluegene_bp_node_cnt == bluegene_nodecard_node_cnt)
 
1333
                if(bg_conf->bp_node_cnt == bg_conf->nodecard_node_cnt)
1315
1334
                        conn_type = SELECT_SMALL;
1316
1335
                else if(min_nodes > 1) {
1317
1336
                        conn_type = SELECT_TORUS;
1324
1343
                        select_g_get_jobinfo(job_ptr->select_jobinfo,
1325
1344
                                             SELECT_DATA_MAX_PROCS,
1326
1345
                                             &max_procs);
1327
 
                        if((max_procs > procs_per_node)
 
1346
                        if((max_procs > bg_conf->procs_per_bp)
1328
1347
                           || (max_procs == NO_VAL))
1329
1348
                                conn_type = SELECT_TORUS;
1330
1349
                        else
1336
1355
        }
1337
1356
        select_g_sprint_jobinfo(job_ptr->select_jobinfo, buf, sizeof(buf), 
1338
1357
                                SELECT_PRINT_MIXED);
1339
 
        debug("bluegene:submit_job: %s nodes=%u-%u-%u", 
1340
 
              buf, min_nodes, req_nodes, max_nodes);
 
1358
        debug("bluegene:submit_job: %d %s nodes=%u-%u-%u", 
 
1359
              mode, buf, min_nodes, req_nodes, max_nodes);
1341
1360
        select_g_sprint_jobinfo(job_ptr->select_jobinfo, buf, sizeof(buf), 
1342
1361
                                SELECT_PRINT_BLRTS_IMAGE);
1343
1362
#ifdef HAVE_BGL
1362
1381
        debug2("RamDiskIoLoadImage=%s", buf);
1363
1382
#endif  
1364
1383
        slurm_mutex_lock(&block_state_mutex);
1365
 
        block_list = copy_bg_list(bg_list);
 
1384
        block_list = copy_bg_list(bg_lists->main);
1366
1385
        slurm_mutex_unlock(&block_state_mutex);
1367
1386
        
1368
1387
        list_sort(block_list, (ListCmpF)_bg_record_sort_aval_dec);
1401
1420
                                             bg_record->ionodes);
1402
1421
                        
1403
1422
                        if(!bg_record->bg_block_id) {
1404
 
                                uint16_t geo[BA_SYSTEM_DIMENSIONS];
1405
 
                                
1406
1423
                                debug2("%d can start unassigned job %u at "
1407
1424
                                       "%u on %s",
1408
1425
                                       test_only, job_ptr->job_id, starttime,
1409
1426
                                       bg_record->nodes);
1410
1427
                                select_g_set_jobinfo(job_ptr->select_jobinfo,
1411
 
                                             SELECT_DATA_BLOCK_ID,
1412
 
                                             "unassigned");
1413
 
 
 
1428
                                                     SELECT_DATA_BLOCK_ID,
 
1429
                                                     "unassigned");
 
1430
                                
1414
1431
                                min_nodes = bg_record->node_cnt;
1415
1432
                                select_g_set_jobinfo(job_ptr->select_jobinfo,
1416
 
                                             SELECT_DATA_NODE_CNT,
1417
 
                                             &min_nodes);
1418
 
                                memset(geo, 0, 
1419
 
                                       sizeof(uint16_t) * BA_SYSTEM_DIMENSIONS);
1420
 
                                select_g_set_jobinfo(job_ptr->select_jobinfo,
1421
 
                                                     SELECT_DATA_GEOMETRY, 
1422
 
                                                     &geo);
 
1433
                                                     SELECT_DATA_NODE_CNT,
 
1434
                                                     &min_nodes);
1423
1435
                                /* This is a fake record so we need to
1424
1436
                                 * destroy it after we get the info from
1425
 
                                 * it */
1426
 
                                destroy_bg_record(bg_record);
 
1437
                                 * it.  if it was just testing then
 
1438
                                 * we added this record to the
 
1439
                                 * block_list.  If this is the case
 
1440
                                 * it will be set below, but set
 
1441
                                 * blocks_added to 0 since we don't
 
1442
                                 * want to sync this with the list. */
 
1443
                                if(!blocks_added)
 
1444
                                        destroy_bg_record(bg_record);
 
1445
                                blocks_added = 0;
1427
1446
                        } else {
1428
1447
                                if((bg_record->ionodes)
1429
1448
                                   && (job_ptr->part_ptr->max_share <= 1))
1441
1460
                                select_g_set_jobinfo(job_ptr->select_jobinfo,
1442
1461
                                                     SELECT_DATA_NODE_CNT, 
1443
1462
                                                     &bg_record->node_cnt);
1444
 
                                select_g_set_jobinfo(job_ptr->select_jobinfo,
1445
 
                                                     SELECT_DATA_GEOMETRY, 
1446
 
                                                     &bg_record->geo);
1447
1463
 
1448
1464
                                /* tmp16 = bg_record->conn_type; */
1449
1465
/*                              select_g_set_jobinfo(job_ptr->select_jobinfo, */
1450
1466
/*                                                   SELECT_DATA_CONN_TYPE,  */
1451
1467
/*                                                   &tmp16); */
1452
1468
                        }
 
1469
                        if (mode == SELECT_MODE_RUN_NOW) {
 
1470
                                _build_select_struct(job_ptr, 
 
1471
                                                     slurm_block_bitmap);
 
1472
                        }
1453
1473
                } else {
1454
1474
                        error("we got a success, but no block back");
1455
1475
                }
1456
1476
        }
1457
1477
 
1458
 
        if(bluegene_layout_mode == LAYOUT_DYNAMIC) {            
 
1478
        if(bg_conf->layout_mode == LAYOUT_DYNAMIC) {            
1459
1479
                slurm_mutex_lock(&block_state_mutex);
1460
1480
                if(blocks_added) 
1461
 
                        _sync_block_lists(block_list, bg_list);         
 
1481
                        _sync_block_lists(block_list, bg_lists->main);          
1462
1482
                slurm_mutex_unlock(&block_state_mutex);
1463
1483
                slurm_mutex_unlock(&create_dynamic_mutex);
1464
1484
        }
1465
1485
 
1466
1486
        list_destroy(block_list);
1467
 
#endif
1468
1487
        return rc;
1469
1488
}
1470
1489
 
1471
1490
extern int test_job_list(List req_list)
1472
1491
{
1473
1492
        int rc = SLURM_SUCCESS;
1474
 
#ifdef HAVE_BG
1475
1493
        bg_record_t* bg_record = NULL;
1476
1494
        bg_record_t* new_record = NULL;
1477
1495
        char buf[100];
1484
1502
 
1485
1503
        slurm_mutex_lock(&job_list_test_mutex);
1486
1504
        
1487
 
        if(bluegene_layout_mode == LAYOUT_DYNAMIC)
 
1505
        if(bg_conf->layout_mode == LAYOUT_DYNAMIC)
1488
1506
                slurm_mutex_lock(&create_dynamic_mutex);
1489
1507
 
1490
 
        job_block_test_list = copy_bg_list(bg_job_block_list);
 
1508
        job_block_test_list = copy_bg_list(bg_lists->job_running);
1491
1509
 
1492
1510
        slurm_mutex_lock(&block_state_mutex);
1493
 
        block_list = copy_bg_list(bg_list);
 
1511
        block_list = copy_bg_list(bg_lists->main);
1494
1512
        slurm_mutex_unlock(&block_state_mutex);
1495
1513
 
1496
1514
        itr = list_iterator_create(req_list);
1520
1538
                                        will_run->job_ptr->select_jobinfo,
1521
1539
                                        SELECT_DATA_MAX_PROCS,
1522
1540
                                        &max_procs);
1523
 
                                if((max_procs > procs_per_node)
 
1541
                                if((max_procs > bg_conf->procs_per_bp)
1524
1542
                                   || (max_procs == NO_VAL))
1525
1543
                                        conn_type = SELECT_TORUS;
1526
1544
                                else
1640
1658
/*                                              SELECT_DATA_BLOCK_ID, */
1641
1659
/*                                              "unassigned"); */
1642
1660
/*                                      if(will_run->job_ptr->num_procs */
1643
 
/*                                         < bluegene_bp_node_cnt  */
 
1661
/*                                         < bg_conf->bp_node_cnt  */
1644
1662
/*                                         && will_run->job_ptr->num_procs */
1645
1663
/*                                         > 0) { */
1646
 
/*                                              i = procs_per_node/ */
 
1664
/*                                              i = bg_conf->procs_per_bp/ */
1647
1665
/*                                                      will_run->job_ptr-> */
1648
1666
/*                                                      num_procs; */
1649
1667
/*                                              debug2("divide by %d", i); */
1650
1668
/*                                      } else  */
1651
1669
/*                                              i = 1; */
1652
1670
/*                                      will_run->min_nodes *=  */
1653
 
/*                                              bluegene_bp_node_cnt/i; */
 
1671
/*                                              bg_conf->bp_node_cnt/i; */
1654
1672
/*                                      select_g_set_jobinfo( */
1655
1673
/*                                              will_run->job_ptr-> */
1656
1674
/*                                              select_jobinfo, */
1708
1726
        }
1709
1727
        list_iterator_destroy(itr);
1710
1728
 
1711
 
        if(bluegene_layout_mode == LAYOUT_DYNAMIC)              
 
1729
        if(bg_conf->layout_mode == LAYOUT_DYNAMIC)              
1712
1730
                slurm_mutex_unlock(&create_dynamic_mutex);
1713
1731
        
1714
1732
 
1716
1734
        list_destroy(job_block_test_list);
1717
1735
        
1718
1736
        slurm_mutex_unlock(&job_list_test_mutex);
1719
 
#endif
1720
1737
        return rc;
1721
1738
}