~ubuntu-branches/ubuntu/vivid/slurm-llnl/vivid

« back to all changes in this revision

Viewing changes to src/sinfo/sinfo.c

  • Committer: Bazaar Package Importer
  • Author(s): Gennaro Oliva
  • Date: 2009-09-24 23:28:15 UTC
  • mfrom: (1.3.4 upstream)
  • mto: (16.1.1 maverick)
  • mto: This revision was merged to the branch mainline in revision 14.
  • Revision ID: james.westby@ubuntu.com-20090924232815-uu5ku2gqk7appy26
Tags: upstream-2.0.5
ImportĀ upstreamĀ versionĀ 2.0.5

Show diffs side-by-side

added added

removed removed

Lines of Context:
5
5
 *  Copyright (C) 2008 Lawrence Livermore National Security.
6
6
 *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
7
7
 *  Written by Joey Ekstrom <ekstrom1@llnl.gov>, Morris Jette <jette1@llnl.gov>
8
 
 *  LLNL-CODE-402394.
 
8
 *  CODE-OCEC-09-009. All rights reserved.
9
9
 *  
10
10
 *  This file is part of SLURM, a resource management program.
11
 
 *  For details, see <http://www.llnl.gov/linux/slurm/>.
 
11
 *  For details, see <https://computing.llnl.gov/linux/slurm/>.
 
12
 *  Please also read the included file: DISCLAIMER.
12
13
 *  
13
14
 *  SLURM is free software; you can redistribute it and/or modify it under
14
15
 *  the terms of the GNU General Public License as published by the Free
49
50
 
50
51
#ifdef HAVE_BG                       
51
52
# include "src/plugins/select/bluegene/wrap_rm_api.h"
 
53
# include "src/plugins/select/bluegene/plugin/bluegene.h"
52
54
#endif
53
55
 
54
56
/********************
90
92
enum {
91
93
        SINFO_BG_IDLE_STATE,
92
94
        SINFO_BG_ALLOC_STATE,
 
95
        SINFO_BG_DRAINING_STATE,
93
96
        SINFO_BG_ERROR_STATE    
94
97
};
95
98
#endif
341
344
        int i=0;
342
345
        bg_info_record_t *bg_info_record = NULL;
343
346
        int node_scaling = partition_msg->partition_array[0].node_scaling;
344
 
        char *slurm_user = xstrdup(slurmctld_conf.slurm_user_name);
 
347
        int block_error = 0;
345
348
 
346
349
        for (i=0; i<node_msg->record_count; i++) {
347
350
                node_ptr = &(node_msg->node_array[i]);
348
 
                /* in each node_ptr we overload the threads var
349
 
                 * with the number of cnodes in the used_cpus var
350
 
                 * will be used to tell how many cnodes are
351
 
                 * allocated and the cores will represent the cnodes
 
351
                /* In each node_ptr we overload the threads var
 
352
                 * with the number of cnodes in drained state, the
 
353
                 * sockets var with the nodes in draining state, and
 
354
                 * the used_cpus var will be used to tell how many cnodes are
 
355
                 * allocated.  The cores will also represent the cnodes
352
356
                 * in an error state. So we can get an idle count by
353
 
                 * subtracting those 2 numbers from the total possible
 
357
                 * subtracting those 3 numbers from the total possible
354
358
                 * cnodes (which are the idle cnodes).
355
359
                 */
356
360
                node_ptr->threads = node_scaling;
357
361
                node_ptr->cores = 0;
 
362
                node_ptr->sockets = 0;
358
363
                node_ptr->used_cpus = 0;
 
364
                if((node_ptr->node_state & NODE_STATE_BASE) == NODE_STATE_DOWN) 
 
365
                        continue;
 
366
 
 
367
                if(node_ptr->node_state & NODE_STATE_DRAIN) {
 
368
                        if(node_ptr->node_state & NODE_STATE_FAIL) {
 
369
                                node_ptr->node_state &= ~NODE_STATE_DRAIN;
 
370
                                node_ptr->node_state &= ~NODE_STATE_FAIL;
 
371
                        } else {
 
372
                                node_ptr->cores += node_scaling;
 
373
                        }
 
374
                }
 
375
                node_ptr->node_state |= NODE_STATE_IDLE;
359
376
        }
360
377
 
361
378
        for (i=0; i<node_select_msg->record_count; i++) {
362
379
                bg_info_record = &(node_select_msg->bg_info_array[i]);
363
380
                
364
381
                /* this block is idle we won't mark it */
365
 
                if (bg_info_record->state != RM_PARTITION_ERROR
366
 
                    && !strcmp(slurm_user, bg_info_record->owner_name))
 
382
                if (bg_info_record->job_running == NO_JOB_RUNNING)
367
383
                        continue;
 
384
 
368
385
                _update_nodes_for_bg(node_scaling, node_msg, bg_info_record);
369
386
        }
370
 
        xfree(slurm_user);
371
387
 
372
388
#endif
373
389
        /* by default every partition is shown, even if no nodes */
402
418
                        if (params.filtering && _filter_out(node_ptr))
403
419
                                continue;
404
420
#ifdef HAVE_BG
405
 
                        for(i=0; i<3; i++) {
 
421
                        if((node_ptr->node_state & NODE_STATE_DRAIN) 
 
422
                           && (node_ptr->node_state & NODE_STATE_FAIL)) {
 
423
                                node_ptr->node_state &= ~NODE_STATE_DRAIN;
 
424
                                node_ptr->node_state &= ~NODE_STATE_FAIL;
 
425
                                block_error = 1;
 
426
                        } else
 
427
                                block_error = 0;
 
428
                        node_ptr->threads = node_scaling;
 
429
                        for(i=0; i<4; i++) {
406
430
                                int norm = 0;
407
431
                                switch(i) {
408
432
                                case SINFO_BG_IDLE_STATE:
425
449
                                         * as it's current state 
426
450
                                         */
427
451
                                        node_ptr->threads -=
428
 
                                                (node_ptr->cores
 
452
                                                (node_ptr->cores 
 
453
                                                 + node_ptr->sockets
429
454
                                                 + node_ptr->used_cpus);
430
 
                                        
 
455
                                        if((int16_t)node_ptr->threads < 0)
 
456
                                                node_ptr->threads = 0;
431
457
                                        if(node_ptr->threads == node_scaling)
432
458
                                                norm = 1;
433
459
                                        else {
449
475
                                        node_ptr->threads =
450
476
                                                node_ptr->used_cpus;
451
477
                                        break;
 
478
                                case SINFO_BG_DRAINING_STATE:
 
479
                                        /* get the drained node count */
 
480
                                        if(!node_ptr->sockets) 
 
481
                                                continue;
 
482
                                        node_ptr->node_state =
 
483
                                                NODE_STATE_ALLOCATED;
 
484
                                        node_ptr->node_state |= 
 
485
                                                NODE_STATE_DRAIN;
 
486
                                        node_ptr->threads = node_ptr->sockets;
 
487
                                        break;
452
488
                                case SINFO_BG_ERROR_STATE:
453
489
                                        /* get the error node count */
454
490
                                        if(!node_ptr->cores) 
455
491
                                                continue;
 
492
                                        
456
493
                                        node_ptr->node_state &=
457
494
                                                NODE_STATE_FLAGS;
458
495
                                        node_ptr->node_state |= 
486
523
                         */
487
524
                        if(norm) 
488
525
                                break;
 
526
                        else if(i == SINFO_BG_ERROR_STATE) {
 
527
                                if(block_error)
 
528
                                        node_ptr->node_state
 
529
                                                |= NODE_STATE_FAIL;
 
530
                        }
489
531
                        }
490
532
#endif
491
533
                }
529
571
 
530
572
                iterator = list_iterator_create(params.state_list);
531
573
                while ((node_state = list_next(iterator))) {
532
 
                        if (*node_state & NODE_STATE_FLAGS) {
 
574
                        if (*node_state == 
 
575
                            (NODE_STATE_DRAIN | NODE_STATE_ALLOCATED)) {
 
576
                                /* We search for anything that gets mapped to
 
577
                                 * DRAINING in node_state_string */
 
578
                                if (!(node_ptr->node_state & NODE_STATE_DRAIN))
 
579
                                        continue;
 
580
                                if (((node_ptr->node_state & NODE_STATE_BASE) ==
 
581
                                     NODE_STATE_ALLOCATED) ||
 
582
                                    (node_ptr->node_state & 
 
583
                                     NODE_STATE_COMPLETING)) {
 
584
                                        match = true;
 
585
                                        break;
 
586
                                }
 
587
                        } else if (*node_state == 
 
588
                                   (NODE_STATE_DRAIN | NODE_STATE_IDLE)) {
 
589
                                /* We search for anything that gets mapped to
 
590
                                 * DRAINED in node_state_string */
 
591
                                if (!(node_ptr->node_state & NODE_STATE_DRAIN))
 
592
                                        continue;
 
593
                                if (((node_ptr->node_state & NODE_STATE_BASE) !=
 
594
                                     NODE_STATE_ALLOCATED) &&
 
595
                                    (!(node_ptr->node_state & 
 
596
                                       NODE_STATE_COMPLETING))) {
 
597
                                        match = true;
 
598
                                        break;
 
599
                                }
 
600
                        } else if (*node_state & NODE_STATE_FLAGS) {
533
601
                                if (*node_state & node_ptr->node_state) {
534
602
                                        match = true;
535
603
                                        break;
650
718
            (part_ptr->max_nodes != sinfo_ptr->part_info->max_nodes))
651
719
                return false;
652
720
 
 
721
        if (params.match_flags.default_time_flag &&
 
722
            (part_ptr->default_time != sinfo_ptr->part_info->default_time))
 
723
                return false;
 
724
 
653
725
        if (params.match_flags.max_time_flag &&
654
726
            (part_ptr->max_time != sinfo_ptr->part_info->max_time))
655
727
                return false;
781
853
                                 bg_info_record_t *bg_info_record)
782
854
{
783
855
        node_info_t *node_ptr = NULL;
784
 
        hostlist_t hl;
785
 
        char *node_name = NULL;
 
856
        int j = 0;
786
857
 
787
858
        /* we are using less than one node */
788
859
        if(bg_info_record->conn_type == SELECT_SMALL) 
789
860
                node_scaling = bg_info_record->node_cnt;
790
 
                   
791
 
        hl = hostlist_create(bg_info_record->nodes);
792
 
        while (1) {
793
 
                if (node_name)
794
 
                        free(node_name);
795
 
                node_name = hostlist_shift(hl);
796
 
                if (!node_name)
797
 
                        break;
798
 
                node_ptr = _find_node(node_name, node_msg);
799
 
                if (!node_ptr)
800
 
                        continue;
801
 
                /* cores is overloaded to be the cnodes in an error
802
 
                 * state and used_cpus is overloaded to be the nodes in
803
 
                 * use.  No block should be sent in here if it isn't
804
 
                 * in use (that doesn't mean in a free state, it means
805
 
                 * the user isn't slurm or the block is in an error state.  
806
 
                 */
807
 
                if(bg_info_record->state == RM_PARTITION_ERROR) 
808
 
                        node_ptr->cores += node_scaling;
809
 
                else
810
 
                        node_ptr->used_cpus += node_scaling;
 
861
        
 
862
        j = 0;
 
863
        while(bg_info_record->bp_inx[j] >= 0) {
 
864
                int i2 = 0;
 
865
                for(i2 = bg_info_record->bp_inx[j];
 
866
                    i2 <= bg_info_record->bp_inx[j+1];
 
867
                    i2++) {
 
868
                        node_ptr = &(node_msg->node_array[i2]);
 
869
                        /* cores is overloaded to be the
 
870
                         * cnodes in an error state and
 
871
                         * used_cpus is overloaded to be the nodes in
 
872
                         * use.  No block should be sent in
 
873
                         * here if it isn't in use (that
 
874
                         * doesn't mean in a free state, it means
 
875
                         * the user isn't slurm or the block 
 
876
                         * is in an error state.  
 
877
                         */
 
878
                        if(((node_ptr->node_state & NODE_STATE_BASE) 
 
879
                            == NODE_STATE_DOWN)
 
880
                           || (node_ptr->node_state & NODE_STATE_DRAIN)) {
 
881
                                if(bg_info_record->job_running 
 
882
                                   > NO_JOB_RUNNING) {
 
883
                                        node_ptr->sockets += node_scaling;
 
884
                                        node_ptr->cores -= node_scaling;
 
885
                                }
 
886
                                continue;
 
887
                        }
 
888
                        
 
889
                        if(bg_info_record->state == RM_PARTITION_ERROR) {
 
890
                                node_ptr->cores += node_scaling;
 
891
                                node_ptr->node_state 
 
892
                                        |= NODE_STATE_DRAIN;
 
893
                                node_ptr->node_state
 
894
                                        |= NODE_STATE_FAIL;
 
895
                        } else if(bg_info_record->job_running 
 
896
                                  > NO_JOB_RUNNING)
 
897
                                node_ptr->used_cpus += node_scaling;
 
898
                        else 
 
899
                                error("Hey we didn't get anything here");
 
900
                }
 
901
                j += 2;
811
902
        }
812
 
        hostlist_destroy(hl);
813
 
        
814
903
}
815
904
#endif
816
905