~ubuntu-branches/ubuntu/vivid/slurm-llnl/vivid

« back to all changes in this revision

Viewing changes to src/plugins/select/bluegene/plugin/bluegene.c

  • Committer: Bazaar Package Importer
  • Author(s): Gennaro Oliva
  • Date: 2009-09-24 23:28:15 UTC
  • mfrom: (1.1.11 upstream) (3.2.4 sid)
  • Revision ID: james.westby@ubuntu.com-20090924232815-enh65jn32q1ebg07
Tags: 2.0.5-1
* New upstream release 
* Changed dependecy from lib-mysqlclient15 to lib-mysqlclient 
* Added Default-Start for runlevel 2 and 4 and $remote_fs requirement in
  init.d scripts (Closes: #541252)
* Postinst checks for wrong runlevels 2 and 4 links
* Upgraded to standard version 3.8.3
* Add lintian overrides for missing slurm-llnl-configurator.html in doc
  base registration
* modified postrm scripts to ignore pkill return value in order to avoid
  postrm failure when no slurm process is running
* Checking for slurmctld.pid before cancelling running and pending
  jobs during package removal 

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
1
/*****************************************************************************\
2
2
 *  bluegene.c - blue gene node configuration processing module. 
3
3
 *
4
 
 *  $Id: bluegene.c 17202 2009-04-09 16:56:23Z da $
 
4
 *  $Id: bluegene.c 17982 2009-06-26 16:25:33Z da $
5
5
 *****************************************************************************
6
6
 *  Copyright (C) 2004 The Regents of the University of California.
7
7
 *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
8
8
 *  Written by Danny Auble <auble1@llnl.gov> et. al.
9
9
 *  
10
10
 *  This file is part of SLURM, a resource management program.
11
 
 *  For details, see <http://www.llnl.gov/linux/slurm/>.
 
11
 *  For details, see <https://computing.llnl.gov/linux/slurm/>.
 
12
 *  Please also read the included file: DISCLAIMER.
12
13
 *  
13
14
 *  SLURM is free software; you can redistribute it and/or modify it under
14
15
 *  the terms of the GNU General Public License as published by the Free
46
47
 
47
48
#define _DEBUG 0
48
49
 
49
 
char* bg_conf = NULL;
50
 
 
51
50
/* Global variables */
52
 
List bg_list = NULL;                    /* total list of bg_record entries */
53
 
List bg_curr_block_list = NULL;         /* current bg blocks in bluegene.conf*/
54
 
List bg_job_block_list = NULL;          /* jobs running in these blocks */
55
 
List bg_booted_block_list = NULL;       /* blocks that are booted */
56
 
List bg_freeing_list = NULL;            /* blocks that being freed */
57
 
 
58
 
#ifdef HAVE_BGL
59
 
List bg_blrtsimage_list = NULL;
60
 
#endif
61
 
List bg_linuximage_list = NULL;
62
 
List bg_mloaderimage_list = NULL;
63
 
List bg_ramdiskimage_list = NULL;
64
 
#ifdef HAVE_BGL
65
 
char *default_blrtsimage = NULL;
66
 
#endif
67
 
List bg_valid_small32 = NULL;
68
 
List bg_valid_small64 = NULL;
69
 
List bg_valid_small128 = NULL;
70
 
List bg_valid_small256 = NULL;
71
 
char *default_linuximage = NULL;
72
 
char *default_mloaderimage = NULL, *default_ramdiskimage = NULL;
73
 
char *bridge_api_file = NULL; 
74
 
char *bg_slurm_user_name = NULL;
75
 
char *bg_slurm_node_prefix = NULL;
76
 
bg_layout_t bluegene_layout_mode = NO_VAL;
77
 
double bluegene_io_ratio = 0.0;
78
 
double bluegene_nc_ratio = 0.0;
79
 
uint32_t bluegene_smallest_block = 512;
80
 
uint16_t bluegene_proc_ratio = 0;
81
 
uint16_t bluegene_numpsets = 0;
82
 
uint16_t bluegene_bp_node_cnt = 0;
83
 
uint16_t bluegene_bp_nodecard_cnt = 0;
84
 
uint16_t bluegene_quarter_node_cnt = 0;
85
 
uint16_t bluegene_quarter_ionode_cnt = 0;
86
 
uint16_t bluegene_nodecard_node_cnt = 0;
87
 
uint16_t bluegene_nodecard_ionode_cnt = 0;
88
 
uint16_t bridge_api_verb = 0;
89
 
 
 
51
 
 
52
bg_config_t *bg_conf = NULL;
 
53
bg_lists_t *bg_lists = NULL;
90
54
bool agent_fini = false;
91
55
time_t last_bg_update;
92
56
pthread_mutex_t block_state_mutex = PTHREAD_MUTEX_INITIALIZER;
112
76
#endif
113
77
 
114
78
 
 
79
static void _destroy_bg_config(bg_config_t *bg_conf);
 
80
static void _destroy_bg_lists(bg_lists_t *bg_lists);
 
81
 
115
82
static void _set_bg_lists();
116
 
static int  _validate_config_nodes(List *bg_found_block_list, char *dir);
117
 
static int _delete_old_blocks(List bg_found_block_list);
 
83
static int  _validate_config_nodes(List curr_block_list, 
 
84
                                   List found_block_list, char *dir);
 
85
static int _delete_old_blocks(List curr_block_list, 
 
86
                              List found_block_list);
118
87
static char *_get_bg_conf(void);
119
88
static int  _reopen_bridge_log(void);
120
89
static void _destroy_bitmap(void *object);
122
91
/* Initialize all plugin variables */
123
92
extern int init_bg(void)
124
93
{
 
94
        _set_bg_lists();
 
95
 
 
96
        if(!bg_conf)
 
97
                bg_conf = xmalloc(sizeof(bg_config_t));
 
98
 
 
99
        xfree(bg_conf->slurm_user_name);
 
100
        xfree(bg_conf->slurm_node_prefix);
 
101
        slurm_conf_lock();
 
102
        xassert(slurmctld_conf.slurm_user_name);
 
103
        xassert(slurmctld_conf.node_prefix);
 
104
        bg_conf->slurm_user_name = xstrdup(slurmctld_conf.slurm_user_name);
 
105
        bg_conf->slurm_node_prefix = xstrdup(slurmctld_conf.node_prefix);
 
106
        slurm_conf_unlock();    
 
107
 
 
108
#ifdef HAVE_BGL
 
109
        if(bg_conf->blrts_list)
 
110
                list_destroy(bg_conf->blrts_list);
 
111
        bg_conf->blrts_list = list_create(destroy_image);
 
112
#endif
 
113
        if(bg_conf->linux_list)
 
114
                list_destroy(bg_conf->linux_list);
 
115
        bg_conf->linux_list = list_create(destroy_image);
 
116
        if(bg_conf->mloader_list)
 
117
                list_destroy(bg_conf->mloader_list);
 
118
        bg_conf->mloader_list = list_create(destroy_image);
 
119
        if(bg_conf->ramdisk_list)
 
120
                list_destroy(bg_conf->ramdisk_list);
 
121
        bg_conf->ramdisk_list = list_create(destroy_image);     
 
122
 
125
123
        ba_init(NULL);
126
124
 
127
125
        info("BlueGene plugin loaded successfully");
144
142
                pthread_cond_wait(&freed_cond, &freed_cnt_mutex);
145
143
        if(destroy_cnt)
146
144
                pthread_cond_wait(&destroy_cond, &freed_cnt_mutex);
147
 
        
148
 
        if (bg_list) {
149
 
                list_destroy(bg_list);
150
 
                bg_list = NULL;
151
 
        }       
152
 
        if (bg_curr_block_list) {
153
 
                list_destroy(bg_curr_block_list);
154
 
                bg_curr_block_list = NULL;
155
 
        }       
156
 
        if (bg_job_block_list) {
157
 
                list_destroy(bg_job_block_list);
158
 
                bg_job_block_list = NULL;
159
 
                num_unused_cpus = 0;
160
 
        }
161
 
        if (bg_booted_block_list) {
162
 
                list_destroy(bg_booted_block_list);
163
 
                bg_booted_block_list = NULL;
164
 
        }
 
145
 
 
146
        _destroy_bg_config(bg_conf);
 
147
        _destroy_bg_lists(bg_lists);
165
148
                
166
 
#ifdef HAVE_BGL
167
 
        if(bg_blrtsimage_list) {
168
 
                list_destroy(bg_blrtsimage_list);
169
 
                bg_blrtsimage_list = NULL;
170
 
        }
171
 
#endif  
172
 
        if(bg_linuximage_list) {
173
 
                list_destroy(bg_linuximage_list);
174
 
                bg_linuximage_list = NULL;
175
 
        }
176
 
        
177
 
        if(bg_mloaderimage_list) {
178
 
                list_destroy(bg_mloaderimage_list);
179
 
                bg_mloaderimage_list = NULL;
180
 
        }
181
 
 
182
 
        if(bg_ramdiskimage_list) {
183
 
                list_destroy(bg_ramdiskimage_list);
184
 
                bg_ramdiskimage_list = NULL;
185
 
        }
186
 
        
187
 
        if(bg_valid_small32) {
188
 
                list_destroy(bg_valid_small32);
189
 
                bg_valid_small32 = NULL;
190
 
        }
191
 
        if(bg_valid_small64) {
192
 
                list_destroy(bg_valid_small64);
193
 
                bg_valid_small64 = NULL;
194
 
        }
195
 
        if(bg_valid_small128) {
196
 
                list_destroy(bg_valid_small128);
197
 
                bg_valid_small128 = NULL;
198
 
        }
199
 
        if(bg_valid_small256) {
200
 
                list_destroy(bg_valid_small256);
201
 
                bg_valid_small256 = NULL;
202
 
        }
203
 
 
204
 
#ifdef HAVE_BGL
205
 
        xfree(default_blrtsimage);
206
 
#endif
207
 
        xfree(default_linuximage);
208
 
        xfree(default_mloaderimage);
209
 
        xfree(default_ramdiskimage);
210
 
        xfree(bridge_api_file);
211
 
        xfree(bg_conf);
212
 
        xfree(bg_slurm_user_name);
213
 
        xfree(bg_slurm_node_prefix);
214
 
        
215
149
        ba_fini();
216
150
}
217
151
 
233
167
        if (!bit_overlap(rec_a->bitmap, rec_b->bitmap)) 
234
168
                return false;
235
169
 
236
 
        if((rec_a->node_cnt >= bluegene_bp_node_cnt)
237
 
           || (rec_b->node_cnt >= bluegene_bp_node_cnt))
 
170
        if((rec_a->node_cnt >= bg_conf->bp_node_cnt)
 
171
           || (rec_b->node_cnt >= bg_conf->bp_node_cnt))
238
172
                return true;
239
173
        
240
174
        if (!bit_overlap(rec_a->ionode_bitmap, rec_b->ionode_bitmap)) 
253
187
 
254
188
        if ((rc = bridge_get_block(bg_block_id,  &block_ptr)) != STATUS_OK) {
255
189
                if(rc == INCONSISTENT_DATA
256
 
                   && bluegene_layout_mode == LAYOUT_DYNAMIC)
 
190
                   && bg_conf->layout_mode == LAYOUT_DYNAMIC)
257
191
                        return REMOVE_USER_FOUND;
258
192
                        
259
193
                error("bridge_get_block(%s): %s", 
299
233
                        error("No user was returned from database");
300
234
                        continue;
301
235
                }
302
 
                if(!strcmp(user, bg_slurm_user_name)) {
 
236
                if(!strcmp(user, bg_conf->slurm_user_name)) {
303
237
                        free(user);
304
238
                        continue;
305
239
                }
351
285
                rc = SLURM_ERROR;
352
286
        }       
353
287
        xfree(bg_record->target_name);
354
 
        bg_record->target_name = xstrdup(bg_slurm_user_name);
 
288
        bg_record->target_name = xstrdup(bg_conf->slurm_user_name);
355
289
 
356
290
        return rc;
357
291
}
435
369
                                        last_bg_update = now;
436
370
                                } else if(rc == -1)
437
371
                                        error("Error with update_block_list");
438
 
                                if(bluegene_layout_mode == LAYOUT_DYNAMIC) {
 
372
                                if(bg_conf->layout_mode == LAYOUT_DYNAMIC) {
439
373
                                        if((rc = update_freeing_block_list())
440
374
                                           == 1) {
441
375
                                                last_bg_update = now;
577
511
                if (bg_record->state != NO_VAL
578
512
                    && bg_record->state != RM_PARTITION_FREE 
579
513
                    && bg_record->state != RM_PARTITION_DEALLOCATING) {
 
514
                        debug2("bridge_destroy %s", bg_record->bg_block_id);
580
515
#ifdef HAVE_BG_FILES
581
 
                        debug2("bridge_destroy %s",bg_record->bg_block_id);
582
516
                        
583
517
                        rc = bridge_destroy_block(bg_record->bg_block_id);
584
518
                        if (rc != STATUS_OK) {
615
549
                slurm_mutex_unlock(&block_state_mutex);                 
616
550
                sleep(3);
617
551
        }
618
 
        remove_from_bg_list(bg_booted_block_list, bg_record);
 
552
        remove_from_bg_list(bg_lists->booted, bg_record);
619
553
        slurm_mutex_unlock(&block_state_mutex);                 
620
554
                
621
555
        return SLURM_SUCCESS;
675
609
        int rc;
676
610
#endif
677
611
        slurm_mutex_lock(&freed_cnt_mutex);
678
 
        if ((bg_freeing_list == NULL) 
679
 
            && ((bg_freeing_list = list_create(destroy_bg_record)) == NULL))
680
 
                fatal("malloc failure in bg_freeing_list");
 
612
        if ((bg_lists->freeing == NULL) 
 
613
            && ((bg_lists->freeing = list_create(destroy_bg_record)) == NULL))
 
614
                fatal("malloc failure in bg_lists->freeing");
681
615
        slurm_mutex_unlock(&freed_cnt_mutex);
682
616
        
683
617
        /*
695
629
                        continue;
696
630
                }
697
631
                slurm_mutex_lock(&block_state_mutex);
698
 
                remove_from_bg_list(bg_list, bg_record);
699
 
                list_push(bg_freeing_list, bg_record);
 
632
                remove_from_bg_list(bg_lists->main, bg_record);
 
633
                list_push(bg_lists->freeing, bg_record);
700
634
                
701
635
                /* 
702
636
                 * we only are sorting this so when we send it to a
703
637
                 * tool such as smap it will be in a nice order
704
638
                 */
705
 
                sort_bg_record_inc_size(bg_freeing_list);
706
 
                if(remove_from_bg_list(bg_job_block_list, bg_record) 
 
639
                sort_bg_record_inc_size(bg_lists->freeing);
 
640
                if(remove_from_bg_list(bg_lists->job_running, bg_record) 
707
641
                   == SLURM_SUCCESS) {
708
642
                        num_unused_cpus += bg_record->cpu_cnt;
709
643
                }
719
653
                }
720
654
                debug2("done destroying");
721
655
                slurm_mutex_lock(&block_state_mutex);
722
 
                remove_from_bg_list(bg_freeing_list, bg_record);
 
656
                remove_from_bg_list(bg_lists->freeing, bg_record);
723
657
                slurm_mutex_unlock(&block_state_mutex);
724
658
                                                                
725
659
#ifdef HAVE_BG_FILES
755
689
        slurm_mutex_lock(&freed_cnt_mutex);
756
690
        destroy_cnt--;
757
691
        if(destroy_cnt == 0) {
758
 
                if(bg_freeing_list) {
759
 
                        list_destroy(bg_freeing_list);
760
 
                        bg_freeing_list = NULL;
 
692
                if(bg_lists->freeing) {
 
693
                        list_destroy(bg_lists->freeing);
 
694
                        bg_lists->freeing = NULL;
761
695
                }
762
696
                list_destroy(bg_destroy_block_list);
763
697
                bg_destroy_block_list = NULL;
781
715
                return SLURM_SUCCESS;
782
716
 
783
717
        /* set up which list to push onto */
784
 
        if(bluegene_layout_mode == LAYOUT_DYNAMIC) {
 
718
        if(bg_conf->layout_mode == LAYOUT_DYNAMIC) {
785
719
                block_list = &bg_destroy_block_list;
786
720
                count = &destroy_cnt;
787
721
        } else {
799
733
                /* push job onto queue in a FIFO */
800
734
                debug3("adding %s to be freed", found_record->bg_block_id);
801
735
                if(!block_ptr_exist_in_list(*block_list, found_record)) {
 
736
                        num_block_to_free++;
802
737
                        if (list_push(*block_list, found_record) == NULL)
803
738
                                fatal("malloc failure in _block_op/list_push");
804
739
                } else {
805
740
                        error("we had block %s already on the freeing list",
806
741
                              found_record->bg_block_id);
807
 
                        num_block_to_free--;
808
742
                        continue;
809
743
                }
810
744
                /* already running MAX_AGENTS we don't really need more 
820
754
                            PTHREAD_CREATE_DETACHED))
821
755
                        error("pthread_attr_setdetachstate error %m");
822
756
                retries = 0;
823
 
                if(bluegene_layout_mode == LAYOUT_DYNAMIC) {
 
757
                if(bg_conf->layout_mode == LAYOUT_DYNAMIC) {
824
758
                        while (pthread_create(&thread_agent, 
825
759
                                              &attr_agent, 
826
760
                                              mult_destroy_block,
869
803
        static time_t last_config_update = (time_t) 0;
870
804
        struct stat config_stat;
871
805
        ListIterator itr = NULL;
872
 
        
 
806
        char* bg_conf_file = NULL;
 
807
 
873
808
        debug("Reading the bluegene.conf file");
874
809
 
875
810
        /* check if config file has changed */
876
 
        if (!bg_conf)
877
 
                bg_conf = _get_bg_conf();
878
 
        if (stat(bg_conf, &config_stat) < 0)
879
 
                fatal("can't stat bluegene.conf file %s: %m", bg_conf);
 
811
        bg_conf_file = _get_bg_conf();
 
812
 
 
813
        if (stat(bg_conf_file, &config_stat) < 0)
 
814
                fatal("can't stat bluegene.conf file %s: %m", bg_conf_file);
880
815
        if (last_config_update) {
881
816
                _reopen_bridge_log();
882
817
                if(last_config_update == config_stat.st_mtime)
883
 
                        debug("%s unchanged", bg_conf);
 
818
                        debug("%s unchanged", bg_conf_file);
884
819
                else {
885
820
                        info("Restart slurmctld for %s changes to take effect", 
886
 
                             bg_conf);
 
821
                             bg_conf_file);
887
822
                }
888
823
                last_config_update = config_stat.st_mtime; 
 
824
                xfree(bg_conf_file);
889
825
                return SLURM_SUCCESS;
890
826
        }
891
827
        last_config_update = config_stat.st_mtime; 
894
830
        /* bg_conf defined in bg_node_alloc.h */
895
831
        tbl = s_p_hashtbl_create(bg_conf_file_options);
896
832
        
897
 
        if(s_p_parse_file(tbl, bg_conf) == SLURM_ERROR)
 
833
        if(s_p_parse_file(tbl, bg_conf_file) == SLURM_ERROR)
898
834
                fatal("something wrong with opening/reading bluegene "
899
835
                      "conf file");
 
836
        xfree(bg_conf_file);
900
837
        
901
 
        _set_bg_lists();        
902
838
#ifdef HAVE_BGL
903
839
        if (s_p_get_array((void ***)&image_array, 
904
840
                          &count, "AltBlrtsImage", tbl)) {
905
841
                for (i = 0; i < count; i++) {
906
 
                        list_append(bg_blrtsimage_list, image_array[i]);
 
842
                        list_append(bg_conf->blrts_list, image_array[i]);
907
843
                        image_array[i] = NULL;
908
844
                }
909
845
        }
910
 
        if (!s_p_get_string(&default_blrtsimage, "BlrtsImage", tbl)) {
911
 
                if(!list_count(bg_blrtsimage_list))
 
846
        if (!s_p_get_string(&bg_conf->default_blrtsimage, "BlrtsImage", tbl)) {
 
847
                if(!list_count(bg_conf->blrts_list))
912
848
                        fatal("BlrtsImage not configured "
913
849
                              "in bluegene.conf");
914
 
                itr = list_iterator_create(bg_blrtsimage_list);
 
850
                itr = list_iterator_create(bg_conf->blrts_list);
915
851
                image = list_next(itr);
916
852
                image->def = true;
917
853
                list_iterator_destroy(itr);
918
 
                default_blrtsimage = xstrdup(image->name);
 
854
                bg_conf->default_blrtsimage = xstrdup(image->name);
919
855
                info("Warning: using %s as the default BlrtsImage.  "
920
856
                     "If this isn't correct please set BlrtsImage",
921
 
                     default_blrtsimage); 
 
857
                     bg_conf->default_blrtsimage); 
922
858
        } else {
923
 
                debug3("default BlrtsImage %s", default_blrtsimage);
 
859
                debug3("default BlrtsImage %s", bg_conf->default_blrtsimage);
924
860
                image = xmalloc(sizeof(image_t));
925
 
                image->name = xstrdup(default_blrtsimage);
 
861
                image->name = xstrdup(bg_conf->default_blrtsimage);
926
862
                image->def = true;
927
863
                image->groups = NULL;
928
864
                /* we want it to be first */
929
 
                list_push(bg_blrtsimage_list, image);
 
865
                list_push(bg_conf->blrts_list, image);
930
866
        }
931
867
                
932
868
        if (s_p_get_array((void ***)&image_array, 
933
869
                          &count, "AltLinuxImage", tbl)) {
934
870
                for (i = 0; i < count; i++) {
935
 
                        list_append(bg_linuximage_list, image_array[i]);
 
871
                        list_append(bg_conf->linux_list, image_array[i]);
936
872
                        image_array[i] = NULL;
937
873
                }
938
874
        }
939
 
        if (!s_p_get_string(&default_linuximage, "LinuxImage", tbl)) {
940
 
                if(!list_count(bg_linuximage_list))
 
875
        if (!s_p_get_string(&bg_conf->default_linuximage, "LinuxImage", tbl)) {
 
876
                if(!list_count(bg_conf->linux_list))
941
877
                        fatal("LinuxImage not configured "
942
878
                              "in bluegene.conf");
943
 
                itr = list_iterator_create(bg_linuximage_list);
 
879
                itr = list_iterator_create(bg_conf->linux_list);
944
880
                image = list_next(itr);
945
881
                image->def = true;
946
882
                list_iterator_destroy(itr);
947
 
                default_linuximage = xstrdup(image->name);
 
883
                bg_conf->default_linuximage = xstrdup(image->name);
948
884
                info("Warning: using %s as the default LinuxImage.  "
949
885
                     "If this isn't correct please set LinuxImage",
950
 
                     default_linuximage); 
 
886
                     bg_conf->default_linuximage); 
951
887
        } else {
952
 
                debug3("default LinuxImage %s", default_linuximage);
 
888
                debug3("default LinuxImage %s", bg_conf->default_linuximage);
953
889
                image = xmalloc(sizeof(image_t));
954
 
                image->name = xstrdup(default_linuximage);
 
890
                image->name = xstrdup(bg_conf->default_linuximage);
955
891
                image->def = true;
956
892
                image->groups = NULL;
957
893
                /* we want it to be first */
958
 
                list_push(bg_linuximage_list, image);           
 
894
                list_push(bg_conf->linux_list, image);          
959
895
        }
960
896
 
961
897
        if (s_p_get_array((void ***)&image_array, 
962
898
                          &count, "AltRamDiskImage", tbl)) {
963
899
                for (i = 0; i < count; i++) {
964
 
                        list_append(bg_ramdiskimage_list, image_array[i]);
 
900
                        list_append(bg_conf->ramdisk_list, image_array[i]);
965
901
                        image_array[i] = NULL;
966
902
                }
967
903
        }
968
 
        if (!s_p_get_string(&default_ramdiskimage,
 
904
        if (!s_p_get_string(&bg_conf->default_ramdiskimage,
969
905
                            "RamDiskImage", tbl)) {
970
 
                if(!list_count(bg_ramdiskimage_list))
 
906
                if(!list_count(bg_conf->ramdisk_list))
971
907
                        fatal("RamDiskImage not configured "
972
908
                              "in bluegene.conf");
973
 
                itr = list_iterator_create(bg_ramdiskimage_list);
 
909
                itr = list_iterator_create(bg_conf->ramdisk_list);
974
910
                image = list_next(itr);
975
911
                image->def = true;
976
912
                list_iterator_destroy(itr);
977
 
                default_ramdiskimage = xstrdup(image->name);
 
913
                bg_conf->default_ramdiskimage = xstrdup(image->name);
978
914
                info("Warning: using %s as the default RamDiskImage.  "
979
915
                     "If this isn't correct please set RamDiskImage",
980
 
                     default_ramdiskimage); 
 
916
                     bg_conf->default_ramdiskimage); 
981
917
        } else {
982
 
                debug3("default RamDiskImage %s", default_ramdiskimage);
 
918
                debug3("default RamDiskImage %s",
 
919
                       bg_conf->default_ramdiskimage);
983
920
                image = xmalloc(sizeof(image_t));
984
 
                image->name = xstrdup(default_ramdiskimage);
 
921
                image->name = xstrdup(bg_conf->default_ramdiskimage);
985
922
                image->def = true;
986
923
                image->groups = NULL;
987
924
                /* we want it to be first */
988
 
                list_push(bg_ramdiskimage_list, image);         
 
925
                list_push(bg_conf->ramdisk_list, image);                
989
926
        }
990
927
#else
991
928
 
992
929
        if (s_p_get_array((void ***)&image_array, 
993
930
                          &count, "AltCnloadImage", tbl)) {
994
931
                for (i = 0; i < count; i++) {
995
 
                        list_append(bg_linuximage_list, image_array[i]);
 
932
                        list_append(bg_conf->linux_list, image_array[i]);
996
933
                        image_array[i] = NULL;
997
934
                }
998
935
        }
999
 
        if (!s_p_get_string(&default_linuximage, "CnloadImage", tbl)) {
1000
 
                if(!list_count(bg_linuximage_list))
 
936
        if (!s_p_get_string(&bg_conf->default_linuximage, "CnloadImage", tbl)) {
 
937
                if(!list_count(bg_conf->linux_list))
1001
938
                        fatal("CnloadImage not configured "
1002
939
                              "in bluegene.conf");
1003
 
                itr = list_iterator_create(bg_linuximage_list);
 
940
                itr = list_iterator_create(bg_conf->linux_list);
1004
941
                image = list_next(itr);
1005
942
                image->def = true;
1006
943
                list_iterator_destroy(itr);
1007
 
                default_linuximage = xstrdup(image->name);
 
944
                bg_conf->default_linuximage = xstrdup(image->name);
1008
945
                info("Warning: using %s as the default CnloadImage.  "
1009
946
                     "If this isn't correct please set CnloadImage",
1010
 
                     default_linuximage); 
 
947
                     bg_conf->default_linuximage); 
1011
948
        } else {
1012
 
                debug3("default CnloadImage %s", default_linuximage);
 
949
                debug3("default CnloadImage %s", bg_conf->default_linuximage);
1013
950
                image = xmalloc(sizeof(image_t));
1014
 
                image->name = xstrdup(default_linuximage);
 
951
                image->name = xstrdup(bg_conf->default_linuximage);
1015
952
                image->def = true;
1016
953
                image->groups = NULL;
1017
954
                /* we want it to be first */
1018
 
                list_push(bg_linuximage_list, image);           
 
955
                list_push(bg_conf->linux_list, image);          
1019
956
        }
1020
957
 
1021
958
        if (s_p_get_array((void ***)&image_array, 
1022
959
                          &count, "AltIoloadImage", tbl)) {
1023
960
                for (i = 0; i < count; i++) {
1024
 
                        list_append(bg_ramdiskimage_list, image_array[i]);
 
961
                        list_append(bg_conf->ramdisk_list, image_array[i]);
1025
962
                        image_array[i] = NULL;
1026
963
                }
1027
964
        }
1028
 
        if (!s_p_get_string(&default_ramdiskimage,
 
965
        if (!s_p_get_string(&bg_conf->default_ramdiskimage,
1029
966
                            "IoloadImage", tbl)) {
1030
 
                if(!list_count(bg_ramdiskimage_list))
 
967
                if(!list_count(bg_conf->ramdisk_list))
1031
968
                        fatal("IoloadImage not configured "
1032
969
                              "in bluegene.conf");
1033
 
                itr = list_iterator_create(bg_ramdiskimage_list);
 
970
                itr = list_iterator_create(bg_conf->ramdisk_list);
1034
971
                image = list_next(itr);
1035
972
                image->def = true;
1036
973
                list_iterator_destroy(itr);
1037
 
                default_ramdiskimage = xstrdup(image->name);
 
974
                bg_conf->default_ramdiskimage = xstrdup(image->name);
1038
975
                info("Warning: using %s as the default IoloadImage.  "
1039
976
                     "If this isn't correct please set IoloadImage",
1040
 
                     default_ramdiskimage); 
 
977
                     bg_conf->default_ramdiskimage); 
1041
978
        } else {
1042
 
                debug3("default IoloadImage %s", default_ramdiskimage);
 
979
                debug3("default IoloadImage %s", bg_conf->default_ramdiskimage);
1043
980
                image = xmalloc(sizeof(image_t));
1044
 
                image->name = xstrdup(default_ramdiskimage);
 
981
                image->name = xstrdup(bg_conf->default_ramdiskimage);
1045
982
                image->def = true;
1046
983
                image->groups = NULL;
1047
984
                /* we want it to be first */
1048
 
                list_push(bg_ramdiskimage_list, image);         
 
985
                list_push(bg_conf->ramdisk_list, image);                
1049
986
        }
1050
987
 
1051
988
#endif
1052
989
        if (s_p_get_array((void ***)&image_array, 
1053
990
                          &count, "AltMloaderImage", tbl)) {
1054
991
                for (i = 0; i < count; i++) {
1055
 
                        list_append(bg_mloaderimage_list, image_array[i]);
 
992
                        list_append(bg_conf->mloader_list, image_array[i]);
1056
993
                        image_array[i] = NULL;
1057
994
                }
1058
995
        }
1059
 
        if (!s_p_get_string(&default_mloaderimage,
 
996
        if (!s_p_get_string(&bg_conf->default_mloaderimage,
1060
997
                            "MloaderImage", tbl)) {
1061
 
                if(!list_count(bg_mloaderimage_list))
 
998
                if(!list_count(bg_conf->mloader_list))
1062
999
                        fatal("MloaderImage not configured "
1063
1000
                              "in bluegene.conf");
1064
 
                itr = list_iterator_create(bg_mloaderimage_list);
 
1001
                itr = list_iterator_create(bg_conf->mloader_list);
1065
1002
                image = list_next(itr);
1066
1003
                image->def = true;
1067
1004
                list_iterator_destroy(itr);
1068
 
                default_mloaderimage = xstrdup(image->name);
 
1005
                bg_conf->default_mloaderimage = xstrdup(image->name);
1069
1006
                info("Warning: using %s as the default MloaderImage.  "
1070
1007
                     "If this isn't correct please set MloaderImage",
1071
 
                     default_mloaderimage); 
 
1008
                     bg_conf->default_mloaderimage); 
1072
1009
        } else {
1073
 
                debug3("default MloaderImage %s", default_mloaderimage);
 
1010
                debug3("default MloaderImage %s",
 
1011
                       bg_conf->default_mloaderimage);
1074
1012
                image = xmalloc(sizeof(image_t));
1075
 
                image->name = xstrdup(default_mloaderimage);
 
1013
                image->name = xstrdup(bg_conf->default_mloaderimage);
1076
1014
                image->def = true;
1077
1015
                image->groups = NULL;
1078
1016
                /* we want it to be first */
1079
 
                list_push(bg_mloaderimage_list, image);         
 
1017
                list_push(bg_conf->mloader_list, image);                
1080
1018
        }
1081
1019
 
1082
1020
        if (!s_p_get_uint16(
1083
 
                    &bluegene_bp_node_cnt, "BasePartitionNodeCnt", tbl)) {
 
1021
                    &bg_conf->bp_node_cnt, "BasePartitionNodeCnt", tbl)) {
1084
1022
                error("BasePartitionNodeCnt not configured in bluegene.conf "
1085
1023
                      "defaulting to 512 as BasePartitionNodeCnt");
1086
 
                bluegene_bp_node_cnt = 512;
1087
 
                bluegene_quarter_node_cnt = 128;
 
1024
                bg_conf->bp_node_cnt = 512;
 
1025
                bg_conf->quarter_node_cnt = 128;
1088
1026
        } else {
1089
 
                if(bluegene_bp_node_cnt<=0)
 
1027
                if(bg_conf->bp_node_cnt <= 0)
1090
1028
                        fatal("You should have more than 0 nodes "
1091
1029
                              "per base partition");
1092
1030
 
1093
 
                bluegene_quarter_node_cnt = bluegene_bp_node_cnt/4;
1094
 
        }
1095
 
 
1096
 
        /* select_p_node_init needs to be called before this to set
1097
 
           this up correctly
1098
 
        */
1099
 
        bluegene_proc_ratio = procs_per_node/bluegene_bp_node_cnt;
1100
 
        if(!bluegene_proc_ratio)
 
1031
                bg_conf->quarter_node_cnt = bg_conf->bp_node_cnt/4;
 
1032
        }
 
1033
        /* bg_conf->procs_per_bp should had already been set from the
 
1034
         * node_init */
 
1035
        if(bg_conf->procs_per_bp < bg_conf->bp_node_cnt) {
 
1036
                fatal("For some reason we have only %u procs per bp, but "
 
1037
                      "have %u cnodes per bp.  You need at least the same "
 
1038
                      "number of procs as you have cnodes per bp.  "
 
1039
                      "Check the NodeName Procs= "
 
1040
                      "definition in the slurm.conf.", 
 
1041
                      bg_conf->procs_per_bp, bg_conf->bp_node_cnt); 
 
1042
        }
 
1043
        
 
1044
        bg_conf->proc_ratio = bg_conf->procs_per_bp/bg_conf->bp_node_cnt;
 
1045
        if(!bg_conf->proc_ratio)
1101
1046
                fatal("We appear to have less than 1 proc on a cnode.  "
1102
1047
                      "You specified %u for BasePartitionNodeCnt "
1103
1048
                      "in the blugene.conf and %u procs "
1104
1049
                      "for each node in the slurm.conf",
1105
 
                      bluegene_bp_node_cnt, procs_per_node);
 
1050
                      bg_conf->bp_node_cnt, bg_conf->procs_per_bp);
 
1051
        num_unused_cpus = 
 
1052
                DIM_SIZE[X] * DIM_SIZE[Y] * DIM_SIZE[Z] 
 
1053
                * bg_conf->procs_per_bp;
1106
1054
 
1107
1055
        if (!s_p_get_uint16(
1108
 
                    &bluegene_nodecard_node_cnt, "NodeCardNodeCnt", tbl)) {
 
1056
                    &bg_conf->nodecard_node_cnt, "NodeCardNodeCnt", tbl)) {
1109
1057
                error("NodeCardNodeCnt not configured in bluegene.conf "
1110
1058
                      "defaulting to 32 as NodeCardNodeCnt");
1111
 
                bluegene_nodecard_node_cnt = 32;
 
1059
                bg_conf->nodecard_node_cnt = 32;
1112
1060
        }
1113
1061
        
1114
 
        if(bluegene_nodecard_node_cnt<=0)
 
1062
        if(bg_conf->nodecard_node_cnt<=0)
1115
1063
                fatal("You should have more than 0 nodes per nodecard");
1116
1064
 
1117
 
        bluegene_bp_nodecard_cnt = 
1118
 
                bluegene_bp_node_cnt / bluegene_nodecard_node_cnt;
 
1065
        bg_conf->bp_nodecard_cnt = 
 
1066
                bg_conf->bp_node_cnt / bg_conf->nodecard_node_cnt;
1119
1067
 
1120
 
        if (!s_p_get_uint16(&bluegene_numpsets, "Numpsets", tbl))
 
1068
        if (!s_p_get_uint16(&bg_conf->numpsets, "Numpsets", tbl))
1121
1069
                fatal("Warning: Numpsets not configured in bluegene.conf");
1122
1070
 
1123
 
        if(bluegene_numpsets) {
 
1071
        if(bg_conf->numpsets) {
1124
1072
                bitstr_t *tmp_bitmap = NULL;
1125
1073
                int small_size = 1;
1126
1074
 
1127
1075
                /* THIS IS A HACK TO MAKE A 1 NODECARD SYSTEM WORK */
1128
 
                if(bluegene_bp_node_cnt == bluegene_nodecard_node_cnt) {
1129
 
                        bluegene_quarter_ionode_cnt = 2;
1130
 
                        bluegene_nodecard_ionode_cnt = 2;
 
1076
                if(bg_conf->bp_node_cnt == bg_conf->nodecard_node_cnt) {
 
1077
                        bg_conf->quarter_ionode_cnt = 2;
 
1078
                        bg_conf->nodecard_ionode_cnt = 2;
1131
1079
                } else {
1132
 
                        bluegene_quarter_ionode_cnt = bluegene_numpsets/4;
1133
 
                        bluegene_nodecard_ionode_cnt =
1134
 
                                bluegene_quarter_ionode_cnt/4;
 
1080
                        bg_conf->quarter_ionode_cnt = bg_conf->numpsets/4;
 
1081
                        bg_conf->nodecard_ionode_cnt =
 
1082
                                bg_conf->quarter_ionode_cnt/4;
1135
1083
                }
1136
1084
                        
1137
1085
                /* How many nodecards per ionode */
1138
 
                bluegene_nc_ratio = 
1139
 
                        ((double)bluegene_bp_node_cnt 
1140
 
                         / (double)bluegene_nodecard_node_cnt) 
1141
 
                        / (double)bluegene_numpsets;
 
1086
                bg_conf->nc_ratio = 
 
1087
                        ((double)bg_conf->bp_node_cnt 
 
1088
                         / (double)bg_conf->nodecard_node_cnt) 
 
1089
                        / (double)bg_conf->numpsets;
1142
1090
                /* How many ionodes per nodecard */
1143
 
                bluegene_io_ratio = 
1144
 
                        (double)bluegene_numpsets /
1145
 
                        ((double)bluegene_bp_node_cnt 
1146
 
                         / (double)bluegene_nodecard_node_cnt);
1147
 
                //info("got %f %f", bluegene_nc_ratio, bluegene_io_ratio);
 
1091
                bg_conf->io_ratio = 
 
1092
                        (double)bg_conf->numpsets /
 
1093
                        ((double)bg_conf->bp_node_cnt 
 
1094
                         / (double)bg_conf->nodecard_node_cnt);
 
1095
                //info("got %f %f", bg_conf->nc_ratio, bg_conf->io_ratio);
1148
1096
                /* figure out the smallest block we can have on the
1149
1097
                   system */
1150
1098
#ifdef HAVE_BGL
1151
 
                if(bluegene_io_ratio >= 2)
1152
 
                        bluegene_smallest_block=32;
 
1099
                if(bg_conf->io_ratio >= 1)
 
1100
                        bg_conf->smallest_block=32;
1153
1101
                else
1154
 
                        bluegene_smallest_block=128;
 
1102
                        bg_conf->smallest_block=128;
1155
1103
#else
1156
 
                if(bluegene_io_ratio >= 2)
1157
 
                        bluegene_smallest_block=16;
1158
 
                else if(bluegene_io_ratio == 1)
1159
 
                        bluegene_smallest_block=32;
1160
 
                else if(bluegene_io_ratio == .5)
1161
 
                        bluegene_smallest_block=64;
1162
 
                else if(bluegene_io_ratio == .25)
1163
 
                        bluegene_smallest_block=128;
1164
 
                else if(bluegene_io_ratio == .125)
1165
 
                        bluegene_smallest_block=256;
 
1104
                if(bg_conf->io_ratio >= 2)
 
1105
                        bg_conf->smallest_block=16;
 
1106
                else if(bg_conf->io_ratio == 1)
 
1107
                        bg_conf->smallest_block=32;
 
1108
                else if(bg_conf->io_ratio == .5)
 
1109
                        bg_conf->smallest_block=64;
 
1110
                else if(bg_conf->io_ratio == .25)
 
1111
                        bg_conf->smallest_block=128;
 
1112
                else if(bg_conf->io_ratio == .125)
 
1113
                        bg_conf->smallest_block=256;
1166
1114
                else {
1167
1115
                        error("unknown ioratio %f.  Can't figure out "
1168
1116
                              "smallest block size, setting it to midplane");
1169
 
                        bluegene_smallest_block=512;
 
1117
                        bg_conf->smallest_block=512;
1170
1118
                }
1171
1119
#endif
1172
1120
                debug("Smallest block possible on this system is %u",
1173
 
                      bluegene_smallest_block);
 
1121
                      bg_conf->smallest_block);
1174
1122
                /* below we are creating all the possible bitmaps for
1175
1123
                 * each size of small block
1176
1124
                 */
1177
 
                if((int)bluegene_nodecard_ionode_cnt < 1) {
1178
 
                        bluegene_nodecard_ionode_cnt = 0;
 
1125
                if((int)bg_conf->nodecard_ionode_cnt < 1) {
 
1126
                        bg_conf->nodecard_ionode_cnt = 0;
1179
1127
                } else {
1180
 
                        bg_valid_small32 = list_create(_destroy_bitmap);
1181
 
                        if((small_size = bluegene_nodecard_ionode_cnt))
 
1128
                        bg_lists->valid_small32 = list_create(_destroy_bitmap);
 
1129
                        if((small_size = bg_conf->nodecard_ionode_cnt))
1182
1130
                                small_size--;
1183
1131
                        i = 0;
1184
 
                        while(i<bluegene_numpsets) {
1185
 
                                tmp_bitmap = bit_alloc(bluegene_numpsets);
 
1132
                        while(i<bg_conf->numpsets) {
 
1133
                                tmp_bitmap = bit_alloc(bg_conf->numpsets);
1186
1134
                                bit_nset(tmp_bitmap, i, i+small_size);
1187
1135
                                i += small_size+1;
1188
 
                                list_append(bg_valid_small32, tmp_bitmap);
 
1136
                                list_append(bg_lists->valid_small32,
 
1137
                                            tmp_bitmap);
1189
1138
                        }
1190
1139
                }
1191
1140
                /* If we only have 1 nodecard just jump to the end
1192
1141
                   since this will never need to happen below.
1193
1142
                   Pretty much a hack to avoid seg fault;). */
1194
 
                if(bluegene_bp_node_cnt == bluegene_nodecard_node_cnt) 
 
1143
                if(bg_conf->bp_node_cnt == bg_conf->nodecard_node_cnt) 
1195
1144
                        goto no_calc;
1196
1145
 
1197
 
                bg_valid_small128 = list_create(_destroy_bitmap);
1198
 
                if((small_size = bluegene_quarter_ionode_cnt))
 
1146
                bg_lists->valid_small128 = list_create(_destroy_bitmap);
 
1147
                if((small_size = bg_conf->quarter_ionode_cnt))
1199
1148
                        small_size--;
1200
1149
                i = 0;
1201
 
                while(i<bluegene_numpsets) {
1202
 
                        tmp_bitmap = bit_alloc(bluegene_numpsets);
 
1150
                while(i<bg_conf->numpsets) {
 
1151
                        tmp_bitmap = bit_alloc(bg_conf->numpsets);
1203
1152
                        bit_nset(tmp_bitmap, i, i+small_size);
1204
1153
                        i += small_size+1;
1205
 
                        list_append(bg_valid_small128, tmp_bitmap);
 
1154
                        list_append(bg_lists->valid_small128, tmp_bitmap);
1206
1155
                }
1207
1156
 
1208
1157
#ifndef HAVE_BGL
1209
 
                bg_valid_small64 = list_create(_destroy_bitmap);
1210
 
                if((small_size = bluegene_nodecard_ionode_cnt * 2))
 
1158
                bg_lists->valid_small64 = list_create(_destroy_bitmap);
 
1159
                if((small_size = bg_conf->nodecard_ionode_cnt * 2))
1211
1160
                        small_size--;
1212
1161
                i = 0;
1213
 
                while(i<bluegene_numpsets) {
1214
 
                        tmp_bitmap = bit_alloc(bluegene_numpsets);
 
1162
                while(i<bg_conf->numpsets) {
 
1163
                        tmp_bitmap = bit_alloc(bg_conf->numpsets);
1215
1164
                        bit_nset(tmp_bitmap, i, i+small_size);
1216
1165
                        i += small_size+1;
1217
 
                        list_append(bg_valid_small64, tmp_bitmap);
 
1166
                        list_append(bg_lists->valid_small64, tmp_bitmap);
1218
1167
                }
1219
1168
 
1220
 
                bg_valid_small256 = list_create(_destroy_bitmap);
1221
 
                if((small_size = bluegene_quarter_ionode_cnt * 2))
 
1169
                bg_lists->valid_small256 = list_create(_destroy_bitmap);
 
1170
                if((small_size = bg_conf->quarter_ionode_cnt * 2))
1222
1171
                        small_size--;
1223
1172
                i = 0;
1224
 
                while(i<bluegene_numpsets) {
1225
 
                        tmp_bitmap = bit_alloc(bluegene_numpsets);
 
1173
                while(i<bg_conf->numpsets) {
 
1174
                        tmp_bitmap = bit_alloc(bg_conf->numpsets);
1226
1175
                        bit_nset(tmp_bitmap, i, i+small_size);
1227
1176
                        i += small_size+1;
1228
 
                        list_append(bg_valid_small256, tmp_bitmap);
 
1177
                        list_append(bg_lists->valid_small256, tmp_bitmap);
1229
1178
                }
1230
1179
#endif                  
1231
1180
        } else {
1234
1183
 
1235
1184
no_calc:
1236
1185
 
1237
 
        if (!s_p_get_uint16(&bridge_api_verb, "BridgeAPIVerbose", tbl))
 
1186
        if (!s_p_get_uint16(&bg_conf->bridge_api_verb, "BridgeAPIVerbose", tbl))
1238
1187
                info("Warning: BridgeAPIVerbose not configured "
1239
1188
                     "in bluegene.conf");
1240
 
        if (!s_p_get_string(&bridge_api_file, "BridgeAPILogFile", tbl)) 
 
1189
        if (!s_p_get_string(&bg_conf->bridge_api_file,
 
1190
                            "BridgeAPILogFile", tbl)) 
1241
1191
                info("BridgeAPILogFile not configured in bluegene.conf");
1242
1192
        else
1243
1193
                _reopen_bridge_log();
1251
1201
                        ba_deny_pass |= PASS_DENY_Z;
1252
1202
                if(!strcasecmp(layout, "ALL")) 
1253
1203
                        ba_deny_pass |= PASS_DENY_ALL;
1254
 
                
 
1204
                bg_conf->deny_pass = ba_deny_pass;
1255
1205
                xfree(layout);
1256
1206
        }
1257
1207
 
1258
1208
        if (!s_p_get_string(&layout, "LayoutMode", tbl)) {
1259
1209
                info("Warning: LayoutMode was not specified in bluegene.conf "
1260
1210
                     "defaulting to STATIC partitioning");
1261
 
                bluegene_layout_mode = LAYOUT_STATIC;
 
1211
                bg_conf->layout_mode = LAYOUT_STATIC;
1262
1212
        } else {
1263
1213
                if(!strcasecmp(layout,"STATIC")) 
1264
 
                        bluegene_layout_mode = LAYOUT_STATIC;
 
1214
                        bg_conf->layout_mode = LAYOUT_STATIC;
1265
1215
                else if(!strcasecmp(layout,"OVERLAP")) 
1266
 
                        bluegene_layout_mode = LAYOUT_OVERLAP;
 
1216
                        bg_conf->layout_mode = LAYOUT_OVERLAP;
1267
1217
                else if(!strcasecmp(layout,"DYNAMIC")) 
1268
 
                        bluegene_layout_mode = LAYOUT_DYNAMIC;
 
1218
                        bg_conf->layout_mode = LAYOUT_DYNAMIC;
1269
1219
                else {
1270
1220
                        fatal("I don't understand this LayoutMode = %s", 
1271
1221
                              layout);
1274
1224
        }
1275
1225
 
1276
1226
        /* add blocks defined in file */
1277
 
        if(bluegene_layout_mode != LAYOUT_DYNAMIC) {
 
1227
        if(bg_conf->layout_mode != LAYOUT_DYNAMIC) {
1278
1228
                if (!s_p_get_array((void ***)&blockreq_array, 
1279
1229
                                   &count, "BPs", tbl)) {
1280
1230
                        info("WARNING: no blocks defined in bluegene.conf, "
1283
1233
                }
1284
1234
                
1285
1235
                for (i = 0; i < count; i++) {
1286
 
                        add_bg_record(bg_list, NULL, blockreq_array[i], 0, 0);
 
1236
                        add_bg_record(bg_lists->main, NULL,
 
1237
                                      blockreq_array[i], 0, 0);
1287
1238
                }
1288
1239
        }
1289
1240
        s_p_hashtbl_destroy(tbl);
1294
1245
extern int validate_current_blocks(char *dir)
1295
1246
{
1296
1247
        /* found bg blocks already on system */
1297
 
        List bg_found_block_list = NULL;
 
1248
        List curr_block_list = NULL;
 
1249
        List found_block_list = NULL;
1298
1250
        static time_t last_config_update = (time_t) 0;
1299
1251
        ListIterator itr = NULL;
1300
1252
        bg_record_t *bg_record = NULL;
1304
1256
                return SLURM_SUCCESS;
1305
1257
 
1306
1258
        last_config_update = time(NULL);
1307
 
        bg_found_block_list = list_create(NULL);
 
1259
        curr_block_list = list_create(destroy_bg_record);
 
1260
        found_block_list = list_create(NULL);
1308
1261
//#if 0 
1309
1262
        /* Check to see if the configs we have are correct */
1310
 
        if (_validate_config_nodes(&bg_found_block_list, dir) == SLURM_ERROR) { 
1311
 
                _delete_old_blocks(bg_found_block_list);
 
1263
        if (_validate_config_nodes(curr_block_list, found_block_list, dir)
 
1264
            == SLURM_ERROR) { 
 
1265
                _delete_old_blocks(curr_block_list, found_block_list);
1312
1266
        }
1313
1267
//#endif
1314
1268
        /* looking for blocks only I created */
1315
 
        if(bluegene_layout_mode == LAYOUT_DYNAMIC) {
 
1269
        if(bg_conf->layout_mode == LAYOUT_DYNAMIC) {
1316
1270
                init_wires();
1317
1271
                info("No blocks created until jobs are submitted");
1318
1272
        } else {
1319
 
                if (create_defined_blocks(bluegene_layout_mode,
1320
 
                                          bg_found_block_list) 
 
1273
                if (create_defined_blocks(bg_conf->layout_mode,
 
1274
                                          found_block_list) 
1321
1275
                    == SLURM_ERROR) {
1322
1276
                        /* error in creating the static blocks, so
1323
1277
                         * blocks referenced by submitted jobs won't
1328
1282
                }
1329
1283
        } 
1330
1284
        
1331
 
        /* ok now since bg_list has been made we now can put blocks in
 
1285
        /* ok now since bg_lists->main has been made we now can put blocks in
1332
1286
           an error state this needs to be done outside of a lock
1333
1287
           it doesn't matter much in the first place though since
1334
1288
           no threads are started before this function. */
1335
 
        itr = list_iterator_create(bg_list);
 
1289
        itr = list_iterator_create(bg_lists->main);
1336
1290
        while((bg_record = list_next(itr))) {
1337
1291
                if(bg_record->state == RM_PARTITION_ERROR) 
1338
1292
                        put_block_in_error_state(bg_record, BLOCK_ERROR_STATE);
1339
1293
        }
1340
1294
        list_iterator_destroy(itr);
1341
1295
 
 
1296
        list_destroy(curr_block_list);
 
1297
        curr_block_list = NULL;
 
1298
        list_destroy(found_block_list);
 
1299
        found_block_list = NULL;
 
1300
 
1342
1301
        slurm_mutex_lock(&block_state_mutex);
1343
 
        list_destroy(bg_curr_block_list);
1344
 
        bg_curr_block_list = NULL;
1345
 
        if(bg_found_block_list) {
1346
 
                list_destroy(bg_found_block_list);
1347
 
                bg_found_block_list = NULL;
1348
 
        }
1349
 
 
1350
1302
        last_bg_update = time(NULL);
1351
1303
        blocks_are_created = 1;
1352
 
        sort_bg_record_inc_size(bg_list);
 
1304
        sort_bg_record_inc_size(bg_lists->main);
1353
1305
        slurm_mutex_unlock(&block_state_mutex);
1354
1306
        debug("Blocks have finished being created.");
1355
1307
        return SLURM_SUCCESS;
1356
1308
}
1357
1309
 
 
1310
static void _destroy_bg_config(bg_config_t *bg_conf)
 
1311
{
 
1312
        if(bg_conf) {
 
1313
#ifdef HAVE_BGL
 
1314
                if(bg_conf->blrts_list) {
 
1315
                        list_destroy(bg_conf->blrts_list);
 
1316
                        bg_conf->blrts_list = NULL;
 
1317
                }
 
1318
                xfree(bg_conf->default_blrtsimage);
 
1319
#endif  
 
1320
                xfree(bg_conf->bridge_api_file);
 
1321
                xfree(bg_conf->default_linuximage);
 
1322
                xfree(bg_conf->default_mloaderimage);
 
1323
                xfree(bg_conf->default_ramdiskimage);
 
1324
                if(bg_conf->linux_list) {
 
1325
                        list_destroy(bg_conf->linux_list);
 
1326
                        bg_conf->linux_list = NULL;
 
1327
                }
 
1328
        
 
1329
                if(bg_conf->mloader_list) {
 
1330
                        list_destroy(bg_conf->mloader_list);
 
1331
                        bg_conf->mloader_list = NULL;
 
1332
                }
 
1333
 
 
1334
                if(bg_conf->ramdisk_list) {
 
1335
                        list_destroy(bg_conf->ramdisk_list);
 
1336
                        bg_conf->ramdisk_list = NULL;
 
1337
                }
 
1338
                xfree(bg_conf->slurm_user_name);
 
1339
                xfree(bg_conf->slurm_node_prefix);
 
1340
                xfree(bg_conf);
 
1341
        }
 
1342
}
 
1343
 
 
1344
static void _destroy_bg_lists(bg_lists_t *bg_lists)
 
1345
{
 
1346
        if(bg_lists) {
 
1347
                if (bg_lists->booted) {
 
1348
                        list_destroy(bg_lists->booted);
 
1349
                        bg_lists->booted = NULL;
 
1350
                }
 
1351
 
 
1352
                if (bg_lists->freeing) {
 
1353
                        list_destroy(bg_lists->freeing);
 
1354
                        bg_lists->freeing = NULL;
 
1355
                }       
 
1356
 
 
1357
                if (bg_lists->job_running) {
 
1358
                        list_destroy(bg_lists->job_running);
 
1359
                        bg_lists->job_running = NULL;
 
1360
                        num_unused_cpus = 0;
 
1361
                }
 
1362
 
 
1363
                if (bg_lists->main) {
 
1364
                        list_destroy(bg_lists->main);
 
1365
                        bg_lists->main = NULL;
 
1366
                }       
 
1367
 
 
1368
                if(bg_lists->valid_small32) {
 
1369
                        list_destroy(bg_lists->valid_small32);
 
1370
                        bg_lists->valid_small32 = NULL;
 
1371
                }
 
1372
                if(bg_lists->valid_small64) {
 
1373
                        list_destroy(bg_lists->valid_small64);
 
1374
                        bg_lists->valid_small64 = NULL;
 
1375
                }
 
1376
                if(bg_lists->valid_small128) {
 
1377
                        list_destroy(bg_lists->valid_small128);
 
1378
                        bg_lists->valid_small128 = NULL;
 
1379
                }
 
1380
                if(bg_lists->valid_small256) {
 
1381
                        list_destroy(bg_lists->valid_small256);
 
1382
                        bg_lists->valid_small256 = NULL;
 
1383
                }
 
1384
 
 
1385
                xfree(bg_lists);
 
1386
        }
 
1387
}
1358
1388
 
1359
1389
static void _set_bg_lists()
1360
1390
{
 
1391
        if(!bg_lists)
 
1392
                bg_lists = xmalloc(sizeof(bg_lists_t));
 
1393
 
1361
1394
        slurm_mutex_lock(&block_state_mutex);
1362
 
        if(bg_booted_block_list) 
1363
 
                list_destroy(bg_booted_block_list);
1364
 
        bg_booted_block_list = list_create(NULL);
1365
 
        if(bg_job_block_list) 
1366
 
                list_destroy(bg_job_block_list);
1367
 
        bg_job_block_list = list_create(NULL);  
1368
 
        num_unused_cpus = 
1369
 
                DIM_SIZE[X] * DIM_SIZE[Y] * DIM_SIZE[Z] * procs_per_node;
1370
 
        if(bg_curr_block_list)
1371
 
                list_destroy(bg_curr_block_list);       
1372
 
        bg_curr_block_list = list_create(destroy_bg_record);
1373
 
        
1374
 
        if(bg_list) 
1375
 
                list_destroy(bg_list);
1376
 
        bg_list = list_create(destroy_bg_record);
 
1395
 
 
1396
        if(bg_lists->booted) 
 
1397
                list_destroy(bg_lists->booted);
 
1398
        bg_lists->booted = list_create(NULL);
 
1399
 
 
1400
        if(bg_lists->job_running) 
 
1401
                list_destroy(bg_lists->job_running);
 
1402
        bg_lists->job_running = list_create(NULL);      
 
1403
 
 
1404
        if(bg_lists->main) 
 
1405
                list_destroy(bg_lists->main);
 
1406
 
 
1407
        bg_lists->main = list_create(destroy_bg_record);
1377
1408
 
1378
1409
        slurm_mutex_unlock(&block_state_mutex); 
1379
1410
        
1380
 
#ifdef HAVE_BGL
1381
 
        if(bg_blrtsimage_list)
1382
 
                list_destroy(bg_blrtsimage_list);
1383
 
        bg_blrtsimage_list = list_create(destroy_image);
1384
 
#endif
1385
 
        if(bg_linuximage_list)
1386
 
                list_destroy(bg_linuximage_list);
1387
 
        bg_linuximage_list = list_create(destroy_image);
1388
 
        if(bg_mloaderimage_list)
1389
 
                list_destroy(bg_mloaderimage_list);
1390
 
        bg_mloaderimage_list = list_create(destroy_image);
1391
 
        if(bg_ramdiskimage_list)
1392
 
                list_destroy(bg_ramdiskimage_list);
1393
 
        bg_ramdiskimage_list = list_create(destroy_image);      
1394
1411
}
1395
1412
 
1396
1413
/*
1397
1414
 * _validate_config_nodes - Match slurm configuration information with
1398
1415
 *                          current BG block configuration.
1399
 
 * IN/OUT bg_found_block_list - if NULL is created and then any blocks
1400
 
 *                              found on the system are then pushed on.
 
1416
 * IN/OUT curr_block_list -  List of blocks already existing on the system.
 
1417
 * IN/OUT found_block_list - List of blocks found on the system
 
1418
 *                              that are listed in the bluegene.conf.
 
1419
 * NOTE: Both of the lists above should be created with list_create(NULL)
 
1420
 *       since the bg_lists->main will contain the complete list of pointers
 
1421
 *       and be destroyed with it.
 
1422
 *
1401
1423
 * RET - SLURM_SUCCESS if they match, else an error 
1402
 
 * code. Writes bg_block_id into bg_list records.
 
1424
 * code. Writes bg_block_id into bg_lists->main records.
1403
1425
 */
1404
1426
 
1405
 
static int _validate_config_nodes(List *bg_found_block_list, char *dir)
 
1427
static int _validate_config_nodes(List curr_block_list, 
 
1428
                                  List found_block_list, char *dir)
1406
1429
{
1407
1430
        int rc = SLURM_ERROR;
1408
1431
        bg_record_t* bg_record = NULL;  
1412
1435
        ListIterator itr_curr;
1413
1436
        char tmp_char[256];
1414
1437
 
 
1438
        xassert(curr_block_list);
 
1439
        xassert(found_block_list);
 
1440
 
1415
1441
#ifdef HAVE_BG_FILES
1416
 
        /* read current bg block info into bg_curr_block_list This
 
1442
        /* read current bg block info into curr_block_list This
1417
1443
         * happens in the state load before this in emulation mode */
1418
 
        if (read_bg_blocks() == SLURM_ERROR)
 
1444
        if (read_bg_blocks(curr_block_list) == SLURM_ERROR)
1419
1445
                return SLURM_ERROR;
1420
1446
        /* since we only care about error states here we don't care
1421
 
           about the return code this must be done after the bg_list
 
1447
           about the return code this must be done after the bg_lists->main
1422
1448
           is created */
1423
 
        load_state_file(dir);
 
1449
        load_state_file(curr_block_list, dir);
1424
1450
#else
1425
1451
        /* read in state from last run. */
1426
 
        if ((rc = load_state_file(dir)) != SLURM_SUCCESS)
 
1452
        if ((rc = load_state_file(curr_block_list, dir)) != SLURM_SUCCESS)
1427
1453
                return rc;
1428
1454
        /* This needs to be reset to SLURM_ERROR or it will never we
1429
1455
           that way again ;). */
1432
1458
        if(!bg_recover) 
1433
1459
                return SLURM_ERROR;
1434
1460
 
1435
 
        if(!bg_curr_block_list)
1436
 
                return SLURM_ERROR;
1437
 
        
1438
 
        if(!*bg_found_block_list)
1439
 
                (*bg_found_block_list) = list_create(NULL);
1440
 
 
1441
 
        itr_curr = list_iterator_create(bg_curr_block_list);
1442
 
        itr_conf = list_iterator_create(bg_list);
1443
 
        while ((bg_record = (bg_record_t*) list_next(itr_conf))) {
 
1461
        itr_curr = list_iterator_create(curr_block_list);
 
1462
        itr_conf = list_iterator_create(bg_lists->main);
 
1463
        while ((bg_record = list_next(itr_conf))) {
1444
1464
                list_iterator_reset(itr_curr);
1445
1465
                while ((init_bg_record = list_next(itr_curr))) {
1446
 
                        if (strcasecmp(bg_record->nodes, init_bg_record->nodes))
 
1466
                        if (strcasecmp(bg_record->nodes, 
 
1467
                                       init_bg_record->nodes))
1447
1468
                                continue; /* wrong nodes */
1448
1469
                        if(!bit_equal(bg_record->ionode_bitmap,
1449
1470
                                      init_bg_record->ionode_bitmap))
1451
1472
#ifdef HAVE_BGL
1452
1473
                        if (bg_record->conn_type != init_bg_record->conn_type)
1453
1474
                                continue; /* wrong conn_type */
1454
 
                        if(bg_record->blrtsimage &&
1455
 
                           strcasecmp(bg_record->blrtsimage,
1456
 
                                      init_bg_record->blrtsimage)) 
1457
 
                                continue;
1458
1475
#else
1459
1476
                        if ((bg_record->conn_type != init_bg_record->conn_type)
1460
1477
                            && ((bg_record->conn_type < SELECT_SMALL)
1461
1478
                                && (init_bg_record->conn_type < SELECT_SMALL)))
1462
1479
                                continue; /* wrong conn_type */
1463
1480
#endif
1464
 
                        if(bg_record->linuximage &&
1465
 
                           strcasecmp(bg_record->linuximage,
1466
 
                                      init_bg_record->linuximage))
1467
 
                                continue;
1468
 
                        if(bg_record->mloaderimage &&
1469
 
                           strcasecmp(bg_record->mloaderimage,
1470
 
                                      init_bg_record->mloaderimage))
1471
 
                                continue;
1472
 
                        if(bg_record->ramdiskimage &&
1473
 
                           strcasecmp(bg_record->ramdiskimage,
1474
 
                                      init_bg_record->ramdiskimage))
1475
 
                                continue;
1476
1481
                                        
1477
1482
                        copy_bg_record(init_bg_record, bg_record);
1478
1483
                        /* remove from the curr list since we just
1493
1498
                        if(bg_record->full_block)
1494
1499
                                full_created = 1;
1495
1500
 
1496
 
                        list_push(*bg_found_block_list, bg_record);
 
1501
                        list_push(found_block_list, bg_record);
1497
1502
                        format_node_name(bg_record, tmp_char,
1498
1503
                                         sizeof(tmp_char));
1499
1504
                        info("Existing: BlockID:%s Nodes:%s Conn:%s",
1502
1507
                             convert_conn_type(bg_record->conn_type));
1503
1508
                        if(((bg_record->state == RM_PARTITION_READY)
1504
1509
                            || (bg_record->state == RM_PARTITION_CONFIGURING))
1505
 
                           && !block_ptr_exist_in_list(bg_booted_block_list, 
 
1510
                           && !block_ptr_exist_in_list(bg_lists->booted, 
1506
1511
                                                       bg_record))
1507
 
                                list_push(bg_booted_block_list, bg_record);
 
1512
                                list_push(bg_lists->booted, bg_record);
1508
1513
                }
1509
1514
        }               
1510
 
        if(bluegene_layout_mode == LAYOUT_DYNAMIC)
 
1515
        if(bg_conf->layout_mode == LAYOUT_DYNAMIC)
1511
1516
                goto finished;
1512
1517
 
1513
1518
        if(!full_created) {
1516
1521
                        if(init_bg_record->full_block) {
1517
1522
                                list_remove(itr_curr);
1518
1523
                                bg_record = init_bg_record;
1519
 
                                list_append(bg_list, bg_record);
1520
 
                                list_push(*bg_found_block_list, bg_record);
 
1524
                                list_append(bg_lists->main, bg_record);
 
1525
                                list_push(found_block_list, bg_record);
1521
1526
                                format_node_name(bg_record, tmp_char,
1522
1527
                                                 sizeof(tmp_char));
1523
1528
                                info("Existing: BlockID:%s Nodes:%s Conn:%s",
1528
1533
                                    || (bg_record->state 
1529
1534
                                        == RM_PARTITION_CONFIGURING))
1530
1535
                                   && !block_ptr_exist_in_list(
1531
 
                                           bg_booted_block_list, bg_record))
1532
 
                                        list_push(bg_booted_block_list,
 
1536
                                           bg_lists->booted, bg_record))
 
1537
                                        list_push(bg_lists->booted,
1533
1538
                                                  bg_record);
1534
1539
                                break;
1535
1540
                        }
1539
1544
finished:
1540
1545
        list_iterator_destroy(itr_conf);
1541
1546
        list_iterator_destroy(itr_curr);
1542
 
        if(!list_count(bg_curr_block_list))
 
1547
        if(!list_count(curr_block_list))
1543
1548
                rc = SLURM_SUCCESS;
1544
1549
        return rc;
1545
1550
}
1546
1551
 
1547
 
static int _delete_old_blocks(List bg_found_block_list)
 
1552
static int _delete_old_blocks(List curr_block_list, List found_block_list)
1548
1553
{
1549
1554
        ListIterator itr_curr, itr_found;
1550
1555
        bg_record_t *found_record = NULL, *init_record = NULL;
1551
1556
        pthread_attr_t attr_agent;
1552
1557
        pthread_t thread_agent;
1553
1558
        int retries;
1554
 
        List bg_destroy_list = list_create(NULL);
 
1559
        List destroy_list = list_create(NULL);
 
1560
 
 
1561
        xassert(curr_block_list);
 
1562
        xassert(found_block_list);
1555
1563
 
1556
1564
        info("removing unspecified blocks");
1557
1565
        if(!bg_recover) {
1558
 
                if(bg_curr_block_list) {
1559
 
                        itr_curr = list_iterator_create(bg_curr_block_list);
1560
 
                        while ((init_record = 
1561
 
                                (bg_record_t*)list_next(itr_curr))) {
 
1566
                itr_curr = list_iterator_create(curr_block_list);
 
1567
                while ((init_record = list_next(itr_curr))) {
 
1568
                        list_remove(itr_curr);
 
1569
                        list_push(destroy_list, init_record);
 
1570
                }
 
1571
                list_iterator_destroy(itr_curr);
 
1572
        } else {
 
1573
                itr_curr = list_iterator_create(curr_block_list);
 
1574
                while ((init_record = list_next(itr_curr))) {
 
1575
                        itr_found = list_iterator_create(found_block_list);
 
1576
                        while ((found_record = list_next(itr_found))) {
 
1577
                                if (!strcmp(init_record->bg_block_id, 
 
1578
                                            found_record->bg_block_id)) {
 
1579
                                        /* don't delete this one */
 
1580
                                        break;  
 
1581
                                }
 
1582
                        }
 
1583
                        list_iterator_destroy(itr_found);
 
1584
                        
 
1585
                        if(found_record == NULL) {
1562
1586
                                list_remove(itr_curr);
1563
 
                                list_push(bg_destroy_list, init_record);
 
1587
                                list_push(destroy_list, init_record);
1564
1588
                        }
1565
 
                        list_iterator_destroy(itr_curr);
1566
 
                } else {
1567
 
                        error("_delete_old_blocks: "
1568
 
                              "no bg_curr_block_list 1");
1569
 
                        list_destroy(bg_destroy_list);
1570
 
                        return SLURM_ERROR;
1571
 
                }
1572
 
        } else {
1573
 
                if(bg_curr_block_list) {
1574
 
                        itr_curr = list_iterator_create(bg_curr_block_list);
1575
 
                        while ((init_record = list_next(itr_curr))) {
1576
 
                                if(bg_found_block_list) {
1577
 
                                        itr_found = list_iterator_create(
1578
 
                                                bg_found_block_list);
1579
 
                                        while ((found_record 
1580
 
                                                = list_next(itr_found)) 
1581
 
                                               != NULL) {
1582
 
                                                if (!strcmp(init_record->
1583
 
                                                            bg_block_id, 
1584
 
                                                            found_record->
1585
 
                                                            bg_block_id)) {
1586
 
                                                        /* don't delete 
1587
 
                                                           this one 
1588
 
                                                        */
1589
 
                                                        break;  
1590
 
                                                }
1591
 
                                        }
1592
 
                                        list_iterator_destroy(itr_found);
1593
 
                                } else {
1594
 
                                        error("_delete_old_blocks: "
1595
 
                                              "no bg_found_block_list");
1596
 
                                        list_iterator_destroy(itr_curr);
1597
 
                                        list_destroy(bg_destroy_list);
1598
 
                                        return SLURM_ERROR;
1599
 
                                }
1600
 
                                if(found_record == NULL) {
1601
 
                                        list_remove(itr_curr);
1602
 
                                        list_push(bg_destroy_list, 
1603
 
                                                  init_record);
1604
 
                                }
1605
 
                        }               
1606
 
                        list_iterator_destroy(itr_curr);
1607
 
                } else {
1608
 
                        error("_delete_old_blocks: "
1609
 
                              "no bg_curr_block_list 2");
1610
 
                        list_destroy(bg_destroy_list);
1611
 
                        return SLURM_ERROR;
1612
 
                }
 
1589
                }               
 
1590
                list_iterator_destroy(itr_curr);
1613
1591
        }
1614
1592
 
1615
1593
        slurm_mutex_lock(&freed_cnt_mutex);
1617
1595
            && ((bg_destroy_block_list = list_create(NULL)) == NULL))
1618
1596
                fatal("malloc failure in block_list");
1619
1597
 
1620
 
        itr_curr = list_iterator_create(bg_destroy_list);
 
1598
        itr_curr = list_iterator_create(destroy_list);
1621
1599
        while ((init_record = (bg_record_t*) list_next(itr_curr))) {
1622
1600
                list_push(bg_destroy_block_list, init_record);
1623
1601
                num_block_to_free++;
1648
1626
        }
1649
1627
        list_iterator_destroy(itr_curr);
1650
1628
        slurm_mutex_unlock(&freed_cnt_mutex);
1651
 
        list_destroy(bg_destroy_list);
 
1629
        list_destroy(destroy_list);
1652
1630
                
1653
1631
        retries=30;
1654
1632
        while(num_block_to_free > num_block_freed) {
1665
1643
                retries++;
1666
1644
                sleep(1);
1667
1645
        }
1668
 
        
 
1646
 
 
1647
        num_block_to_free = num_block_freed = 0;
 
1648
 
1669
1649
        info("I am done deleting");
1670
1650
 
1671
1651
        return SLURM_SUCCESS;
1674
1654
static char *_get_bg_conf(void)
1675
1655
{
1676
1656
        char *val = getenv("SLURM_CONF");
1677
 
        char *rc;
 
1657
        char *rc = NULL;
1678
1658
        int i;
1679
1659
 
1680
1660
        if (!val)
1697
1677
{
1698
1678
        int rc = SLURM_SUCCESS;
1699
1679
 
1700
 
        if (bridge_api_file == NULL)
 
1680
        if (bg_conf->bridge_api_file == NULL)
1701
1681
                return rc;
1702
1682
        
1703
1683
#ifdef HAVE_BG_FILES
1704
 
        rc = bridge_set_log_params(bridge_api_file, bridge_api_verb);
 
1684
        rc = bridge_set_log_params(bg_conf->bridge_api_file,
 
1685
                                   bg_conf->bridge_api_verb);
1705
1686
#endif
1706
1687
        debug3("Bridge api file set to %s, verbose level %d\n", 
1707
 
               bridge_api_file, bridge_api_verb);
 
1688
               bg_conf->bridge_api_file, bg_conf->bridge_api_verb);
1708
1689
        
1709
1690
        return rc;
1710
1691
}