~ubuntu-branches/ubuntu/vivid/slurm-llnl/vivid

« back to all changes in this revision

Viewing changes to src/srun/opt.c

  • Committer: Bazaar Package Importer
  • Author(s): Gennaro Oliva
  • Date: 2009-09-24 23:28:15 UTC
  • mfrom: (1.1.11 upstream) (3.2.4 sid)
  • Revision ID: james.westby@ubuntu.com-20090924232815-enh65jn32q1ebg07
Tags: 2.0.5-1
* New upstream release 
* Changed dependecy from lib-mysqlclient15 to lib-mysqlclient 
* Added Default-Start for runlevel 2 and 4 and $remote_fs requirement in
  init.d scripts (Closes: #541252)
* Postinst checks for wrong runlevels 2 and 4 links
* Upgraded to standard version 3.8.3
* Add lintian overrides for missing slurm-llnl-configurator.html in doc
  base registration
* modified postrm scripts to ignore pkill return value in order to avoid
  postrm failure when no slurm process is running
* Checking for slurmctld.pid before cancelling running and pending
  jobs during package removal 

Show diffs side-by-side

added added

removed removed

Lines of Context:
2
2
 *  opt.c - options processing for srun
3
3
 *****************************************************************************
4
4
 *  Copyright (C) 2002-2007 The Regents of the University of California.
5
 
 *  Copyright (C) 2008 Lawrence Livermore National Security.
 
5
 *  Copyright (C) 2008-2009 Lawrence Livermore National Security.
6
6
 *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
7
7
 *  Written by Mark Grondona <grondona1@llnl.gov>, et. al.
8
 
 *  LLNL-CODE-402394.
 
8
 *  CODE-OCEC-09-009. All rights reserved.
9
9
 *  
10
10
 *  This file is part of SLURM, a resource management program.
11
 
 *  For details, see <http://www.llnl.gov/linux/slurm/>.
 
11
 *  For details, see <https://computing.llnl.gov/linux/slurm/>.
 
12
 *  Please also read the included file: DISCLAIMER.
12
13
 *  
13
14
 *  SLURM is free software; you can redistribute it and/or modify it under
14
15
 *  the terms of the GNU General Public License as published by the Free
61
62
#include <stdio.h>
62
63
#include <stdlib.h>             /* getenv     */
63
64
#include <pwd.h>                /* getpwuid   */
64
 
#include <ctype.h>              /* isdigit    */
65
65
#include <sys/param.h>          /* MAXPATHLEN */
66
66
#include <sys/stat.h>
67
67
#include <unistd.h>
71
71
 
72
72
#include "src/common/list.h"
73
73
#include "src/common/log.h"
 
74
#include "src/common/mpi.h"
 
75
#include "src/common/optz.h"
74
76
#include "src/common/parse_time.h"
 
77
#include "src/common/plugstack.h"
75
78
#include "src/common/proc_args.h"
76
79
#include "src/common/slurm_protocol_api.h"
77
80
#include "src/common/slurm_protocol_interface.h"
 
81
#include "src/common/slurm_rlimits_info.h"
 
82
#include "src/common/slurm_resource_info.h"
78
83
#include "src/common/uid.h"
79
84
#include "src/common/xmalloc.h"
80
85
#include "src/common/xstring.h"
81
 
#include "src/common/slurm_rlimits_info.h"
82
 
#include "src/common/plugstack.h"
83
 
#include "src/common/optz.h"
 
86
 
84
87
#include "src/api/pmi_server.h"
85
88
 
86
89
#include "src/srun/multi_prog.h"
87
90
#include "src/srun/opt.h"
88
91
#include "src/srun/debugger.h"
89
 
#include "src/common/mpi.h"
90
92
 
91
93
/* generic OPT_ definitions -- mainly for use with env vars  */
92
94
#define OPT_NONE        0x00
93
95
#define OPT_INT         0x01
94
96
#define OPT_STRING      0x02
 
97
#define OPT_IMMEDIATE   0x03
95
98
#define OPT_DISTRIB     0x04
96
99
#define OPT_NODES       0x05
97
100
#define OPT_OVERCOMMIT  0x06
98
101
#define OPT_CORE        0x07
99
102
#define OPT_CONN_TYPE   0x08
 
103
#define OPT_RESV_PORTS  0x09
100
104
#define OPT_NO_ROTATE   0x0a
101
105
#define OPT_GEOMETRY    0x0b
102
106
#define OPT_MPI         0x0c
125
129
#define LONG_OPT_UID         0x10a
126
130
#define LONG_OPT_GID         0x10b
127
131
#define LONG_OPT_MPI         0x10c
128
 
#define LONG_OPT_CORE        0x10e
 
132
#define LONG_OPT_RESV_PORTS  0x10d
 
133
#define LONG_OPT_CORE        0x10e
129
134
#define LONG_OPT_DEBUG_TS    0x110
130
135
#define LONG_OPT_CONNTYPE    0x111
131
136
#define LONG_OPT_TEST_ONLY   0x113
142
147
#define LONG_OPT_NICE        0x11e
143
148
#define LONG_OPT_CPU_BIND    0x11f
144
149
#define LONG_OPT_MEM_BIND    0x120
145
 
#define LONG_OPT_CTRL_COMM_IFHN 0x121
146
150
#define LONG_OPT_MULTI       0x122
147
151
#define LONG_OPT_COMMENT     0x124
148
152
#define LONG_OPT_SOCKETSPERNODE  0x130
164
168
#define LONG_OPT_GET_USER_ENV    0x145
165
169
#define LONG_OPT_PTY             0x146
166
170
#define LONG_OPT_CHECKPOINT      0x147
167
 
#define LONG_OPT_CHECKPOINT_PATH 0x148
 
171
#define LONG_OPT_CHECKPOINT_DIR  0x148
168
172
#define LONG_OPT_OPEN_MODE       0x149
169
173
#define LONG_OPT_ACCTG_FREQ      0x14a
170
174
#define LONG_OPT_WCKEY           0x14b
 
175
#define LONG_OPT_RESERVATION     0x14c
 
176
#define LONG_OPT_RESTART_DIR     0x14d
171
177
 
172
178
/*---- global variables, defined in opt.h ----*/
173
179
int _verbose;
203
209
static void _process_env_var(env_vars_t *e, const char *val);
204
210
 
205
211
static bool  _under_parallel_debugger(void);
206
 
 
207
212
static void  _usage(void);
208
213
static bool  _valid_node_list(char **node_list_pptr);
209
 
static int   _verify_cpu_bind(const char *arg, char **cpu_bind,
210
 
                              cpu_bind_type_t *flags);
211
 
static int   _verify_mem_bind(const char *arg, char **mem_bind,
212
 
                              mem_bind_type_t *flags);
213
214
 
214
215
/*---[ end forward declarations of static functions ]---------------------*/
215
216
 
241
242
 */
242
243
static bool _valid_node_list(char **node_list_pptr)
243
244
{
244
 
        char *nodelist = NULL;
245
 
        
246
 
        if (strchr(*node_list_pptr, '/') == NULL)
247
 
                return true;    /* not a file name */
 
245
        int count = NO_VAL;
248
246
 
249
247
        /* If we are using Arbitrary and we specified the number of
250
248
           procs to use then we need exactly this many since we are
251
249
           saying, lay it out this way!  Same for max and min nodes.  
252
250
           Other than that just read in as many in the hostfile */
253
 
        if(opt.distribution == SLURM_DIST_ARBITRARY) {
254
 
                if(opt.nprocs_set) 
255
 
                        nodelist = slurm_read_hostfile(*node_list_pptr,
256
 
                                                       opt.nprocs);
257
 
                else if(opt.max_nodes)
258
 
                        nodelist = slurm_read_hostfile(*node_list_pptr,
259
 
                                                       opt.max_nodes);
 
251
        if(opt.nprocs_set) 
 
252
                count = opt.nprocs;
 
253
        else if(opt.nodes_set) {
 
254
                if(opt.max_nodes)
 
255
                        count = opt.max_nodes;
260
256
                else if(opt.min_nodes)
261
 
                        nodelist = slurm_read_hostfile(*node_list_pptr,
262
 
                                                       opt.min_nodes);
263
 
         } else
264
 
                nodelist = slurm_read_hostfile(*node_list_pptr, NO_VAL);
265
 
                
266
 
        if (nodelist == NULL) 
267
 
                return false;
268
 
        xfree(*node_list_pptr);
269
 
        *node_list_pptr = xstrdup(nodelist);
270
 
        free(nodelist);
271
 
        return true;
272
 
}
273
 
 
274
 
/*
275
 
 * _isvalue
276
 
 * returns 1 is the argument appears to be a value, 0 otherwise
277
 
 */
278
 
static int _isvalue(char *arg) {
279
 
        if (isdigit(*arg)) {     /* decimal values and 0x... hex values */
280
 
                return 1;
281
 
        }
282
 
 
283
 
        while (isxdigit(*arg)) { /* hex values not preceded by 0x */
284
 
                arg++;
285
 
        }
286
 
        if (*arg == ',' || *arg == '\0') { /* end of field or string */
287
 
                return 1;
288
 
        }
289
 
 
290
 
        return 0;       /* not a value */
291
 
}
292
 
 
293
 
/*
294
 
 * First clear all of the bits in "*data" which are set in "clear_mask".
295
 
 * Then set all of the bits in "*data" that are set in "set_mask".
296
 
 */
297
 
static void clear_then_set(int *data, int clear_mask, int set_mask)
298
 
{
299
 
        *data &= ~clear_mask;
300
 
        *data |= set_mask;
301
 
}
302
 
 
303
 
static void _print_cpu_bind_help()
304
 
{
305
 
        printf(
306
 
"CPU bind options:\n"
307
 
"    --cpu_bind=         Bind tasks to CPUs\n"
308
 
"        q[uiet]         quietly bind before task runs (default)\n"
309
 
"        v[erbose]       verbosely report binding before task runs\n"
310
 
"        no[ne]          don't bind tasks to CPUs (default)\n"
311
 
"        rank            bind by task rank\n"
312
 
"        map_cpu:<list>  specify a CPU ID binding for each task\n"
313
 
"                        where <list> is <cpuid1>,<cpuid2>,...<cpuidN>\n"
314
 
"        mask_cpu:<list> specify a CPU ID binding mask for each task\n"
315
 
"                        where <list> is <mask1>,<mask2>,...<maskN>\n"
316
 
"        sockets         auto-generated masks bind to sockets\n"
317
 
"        cores           auto-generated masks bind to cores\n"
318
 
"        threads         auto-generated masks bind to threads\n"
319
 
"        help            show this help message\n");
320
 
}
321
 
 
322
 
/*
323
 
 * verify cpu_bind arguments
324
 
 *
325
 
 * we support different launch policy names
326
 
 * we also allow a verbose setting to be specified
327
 
 *     --cpu_bind=threads
328
 
 *     --cpu_bind=cores
329
 
 *     --cpu_bind=sockets
330
 
 *     --cpu_bind=v
331
 
 *     --cpu_bind=rank,v
332
 
 *     --cpu_bind=rank
333
 
 *     --cpu_bind={MAP_CPU|MASK_CPU}:0,1,2,3,4
334
 
 *
335
 
 *
336
 
 * returns -1 on error, 0 otherwise
337
 
 */
338
 
static int _verify_cpu_bind(const char *arg, char **cpu_bind, 
339
 
                            cpu_bind_type_t *flags)
340
 
{
341
 
        char *buf, *p, *tok;
342
 
        int bind_bits =
343
 
                CPU_BIND_NONE|CPU_BIND_RANK|CPU_BIND_MAP|CPU_BIND_MASK;
344
 
        int bind_to_bits =
345
 
                CPU_BIND_TO_SOCKETS|CPU_BIND_TO_CORES|CPU_BIND_TO_THREADS;
346
 
 
347
 
        if (arg == NULL) {
348
 
                return 0;
349
 
        }
350
 
 
351
 
        buf = xstrdup(arg);
352
 
        p = buf;
353
 
        /* change all ',' delimiters not followed by a digit to ';'  */
354
 
        /* simplifies parsing tokens while keeping map/mask together */
355
 
        while (p[0] != '\0') {
356
 
                if ((p[0] == ',') && (!_isvalue(&(p[1]))))
357
 
                        p[0] = ';';
358
 
                p++;
359
 
        }
360
 
 
361
 
        p = buf;
362
 
        while ((tok = strsep(&p, ";"))) {
363
 
                if (strcasecmp(tok, "help") == 0) {
364
 
                        _print_cpu_bind_help();
365
 
                        return 1;
366
 
                } else if ((strcasecmp(tok, "q") == 0) ||
367
 
                           (strcasecmp(tok, "quiet") == 0)) {
368
 
                        *flags &= ~CPU_BIND_VERBOSE;
369
 
                } else if ((strcasecmp(tok, "v") == 0) ||
370
 
                           (strcasecmp(tok, "verbose") == 0)) {
371
 
                        *flags |= CPU_BIND_VERBOSE;
372
 
                } else if ((strcasecmp(tok, "no") == 0) ||
373
 
                           (strcasecmp(tok, "none") == 0)) {
374
 
                        clear_then_set((int *)flags, bind_bits, CPU_BIND_NONE);
375
 
                        xfree(*cpu_bind);
376
 
                } else if (strcasecmp(tok, "rank") == 0) {
377
 
                        clear_then_set((int *)flags, bind_bits, CPU_BIND_RANK);
378
 
                        xfree(*cpu_bind);
379
 
                } else if ((strncasecmp(tok, "map_cpu", 7) == 0) ||
380
 
                           (strncasecmp(tok, "mapcpu", 6) == 0)) {
381
 
                        char *list;
382
 
                        list = strsep(&tok, ":=");
383
 
                        list = strsep(&tok, ":=");
384
 
                        clear_then_set((int *)flags, bind_bits, CPU_BIND_MAP);
385
 
                        xfree(*cpu_bind);
386
 
                        if (list && *list) {
387
 
                                *cpu_bind = xstrdup(list);
388
 
                        } else {
389
 
                                error("missing list for \"--cpu_bind=map_cpu:<list>\"");
390
 
                                xfree(buf);
391
 
                                return 1;
392
 
                        }
393
 
                } else if ((strncasecmp(tok, "mask_cpu", 8) == 0) ||
394
 
                           (strncasecmp(tok, "maskcpu", 7) == 0)) {
395
 
                        char *list;
396
 
                        list = strsep(&tok, ":=");
397
 
                        list = strsep(&tok, ":=");
398
 
                        clear_then_set((int *)flags, bind_bits, CPU_BIND_MASK);
399
 
                        xfree(*cpu_bind);
400
 
                        if (list && *list) {
401
 
                                *cpu_bind = xstrdup(list);
402
 
                        } else {
403
 
                                error("missing list for \"--cpu_bind=mask_cpu:<list>\"");
404
 
                                xfree(buf);
405
 
                                return 1;
406
 
                        }
407
 
                } else if ((strcasecmp(tok, "socket") == 0) ||
408
 
                           (strcasecmp(tok, "sockets") == 0)) {
409
 
                        clear_then_set((int *)flags, bind_to_bits,
410
 
                                       CPU_BIND_TO_SOCKETS);
411
 
                } else if ((strcasecmp(tok, "core") == 0) ||
412
 
                           (strcasecmp(tok, "cores") == 0)) {
413
 
                        clear_then_set((int *)flags, bind_to_bits,
414
 
                                       CPU_BIND_TO_CORES);
415
 
                } else if ((strcasecmp(tok, "thread") == 0) ||
416
 
                           (strcasecmp(tok, "threads") == 0)) {
417
 
                        clear_then_set((int *)flags, bind_to_bits,
418
 
                                       CPU_BIND_TO_THREADS);
419
 
                } else {
420
 
                        error("unrecognized --cpu_bind argument \"%s\"", tok);
421
 
                        xfree(buf);
422
 
                        return 1;
423
 
                }
424
 
        }
425
 
 
426
 
        xfree(buf);
427
 
        return 0;
428
 
}
429
 
 
430
 
static void _print_mem_bind_help()
431
 
{
432
 
                        printf(
433
 
"Memory bind options:\n"
434
 
"    --mem_bind=         Bind memory to locality domains (ldom)\n"
435
 
"        q[uiet]         quietly bind before task runs (default)\n"
436
 
"        v[erbose]       verbosely report binding before task runs\n"
437
 
"        no[ne]          don't bind tasks to memory (default)\n"
438
 
"        rank            bind by task rank\n"
439
 
"        local           bind to memory local to processor\n"
440
 
"        map_mem:<list>  specify a memory binding for each task\n"
441
 
"                        where <list> is <cpuid1>,<cpuid2>,...<cpuidN>\n"
442
 
"        mask_mem:<list> specify a memory binding mask for each tasks\n"
443
 
"                        where <list> is <mask1>,<mask2>,...<maskN>\n"
444
 
"        help            show this help message\n");
445
 
}
446
 
 
447
 
/*
448
 
 * verify mem_bind arguments
449
 
 *
450
 
 * we support different memory binding names
451
 
 * we also allow a verbose setting to be specified
452
 
 *     --mem_bind=v
453
 
 *     --mem_bind=rank,v
454
 
 *     --mem_bind=rank
455
 
 *     --mem_bind={MAP_MEM|MASK_MEM}:0,1,2,3,4
456
 
 *
457
 
 * returns -1 on error, 0 otherwise
458
 
 */
459
 
static int _verify_mem_bind(const char *arg, char **mem_bind, 
460
 
                            mem_bind_type_t *flags)
461
 
{
462
 
        char *buf, *p, *tok;
463
 
        int bind_bits = MEM_BIND_NONE|MEM_BIND_RANK|MEM_BIND_LOCAL|
464
 
                MEM_BIND_MAP|MEM_BIND_MASK;
465
 
 
466
 
        if (arg == NULL) {
467
 
                return 0;
468
 
        }
469
 
 
470
 
        buf = xstrdup(arg);
471
 
        p = buf;
472
 
        /* change all ',' delimiters not followed by a digit to ';'  */
473
 
        /* simplifies parsing tokens while keeping map/mask together */
474
 
        while (p[0] != '\0') {
475
 
                if ((p[0] == ',') && (!_isvalue(&(p[1]))))
476
 
                        p[0] = ';';
477
 
                p++;
478
 
        }
479
 
 
480
 
        p = buf;
481
 
        while ((tok = strsep(&p, ";"))) {
482
 
                if (strcasecmp(tok, "help") == 0) {
483
 
                        _print_mem_bind_help();
484
 
                        return 1;
485
 
                        
486
 
                } else if ((strcasecmp(tok, "q") == 0) ||
487
 
                           (strcasecmp(tok, "quiet") == 0)) {
488
 
                        *flags &= ~MEM_BIND_VERBOSE;
489
 
                } else if ((strcasecmp(tok, "v") == 0) ||
490
 
                           (strcasecmp(tok, "verbose") == 0)) {
491
 
                        *flags |= MEM_BIND_VERBOSE;
492
 
                } else if ((strcasecmp(tok, "no") == 0) ||
493
 
                           (strcasecmp(tok, "none") == 0)) {
494
 
                        clear_then_set((int *)flags, bind_bits, MEM_BIND_NONE);
495
 
                        xfree(*mem_bind);
496
 
                } else if (strcasecmp(tok, "rank") == 0) {
497
 
                        clear_then_set((int *)flags, bind_bits, MEM_BIND_RANK);
498
 
                        xfree(*mem_bind);
499
 
                } else if (strcasecmp(tok, "local") == 0) {
500
 
                        clear_then_set((int *)flags, bind_bits, MEM_BIND_LOCAL);
501
 
                        xfree(*mem_bind);
502
 
                } else if ((strncasecmp(tok, "map_mem", 7) == 0) ||
503
 
                           (strncasecmp(tok, "mapmem", 6) == 0)) {
504
 
                        char *list;
505
 
                        list = strsep(&tok, ":=");
506
 
                        list = strsep(&tok, ":=");
507
 
                        clear_then_set((int *)flags, bind_bits, MEM_BIND_MAP);
508
 
                        xfree(*mem_bind);
509
 
                        if (list && *list) {
510
 
                                *mem_bind = xstrdup(list);
511
 
                        } else {
512
 
                                error("missing list for \"--mem_bind=map_mem:<list>\"");
513
 
                                xfree(buf);
514
 
                                return 1;
515
 
                        }
516
 
                } else if ((strncasecmp(tok, "mask_mem", 8) == 0) ||
517
 
                           (strncasecmp(tok, "maskmem", 7) == 0)) {
518
 
                        char *list;
519
 
                        list = strsep(&tok, ":=");
520
 
                        list = strsep(&tok, ":=");
521
 
                        clear_then_set((int *)flags, bind_bits, MEM_BIND_MASK);
522
 
                        xfree(*mem_bind);
523
 
                        if (list && *list) {
524
 
                                *mem_bind = xstrdup(list);
525
 
                        } else {
526
 
                                error("missing list for \"--mem_bind=mask_mem:<list>\"");
527
 
                                xfree(buf);
528
 
                                return 1;
529
 
                        }
530
 
                } else {
531
 
                        error("unrecognized --mem_bind argument \"%s\"", tok);
532
 
                        xfree(buf);
533
 
                        return 1;
534
 
                }
535
 
        }
536
 
 
537
 
        xfree(buf);
538
 
        return 0;
 
257
                        count = opt.min_nodes;
 
258
        }
 
259
 
 
260
        return verify_node_list(node_list_pptr, opt.distribution, count);
539
261
}
540
262
 
541
263
/*
607
329
        opt.time_limit_str = NULL;
608
330
        opt.ckpt_interval = 0;
609
331
        opt.ckpt_interval_str = NULL;
610
 
        opt.ckpt_path = NULL;
 
332
        opt.ckpt_dir = NULL;
 
333
        opt.restart_dir = NULL;
611
334
        opt.partition = NULL;
612
335
        opt.max_threads = MAX_THREADS;
613
336
        pmi_server_max_threads(opt.max_threads);
614
337
 
615
338
        opt.relative = NO_VAL;
616
339
        opt.relative_set = false;
 
340
        opt.resv_port_cnt = NO_VAL;
617
341
        opt.cmd_name = NULL;
618
342
        opt.job_name = NULL;
619
343
        opt.job_name_set_cmd = false;
641
365
        opt.no_kill = false;
642
366
        opt.kill_bad_exit = false;
643
367
 
644
 
        opt.immediate   = false;
 
368
        opt.immediate   = 0;
645
369
 
646
370
        opt.join        = false;
647
371
        opt.max_wait    = slurm_get_wait_time();
649
373
        opt.quit_on_intr = false;
650
374
        opt.disable_status = false;
651
375
        opt.test_only   = false;
 
376
        opt.preserve_env = false;
652
377
 
653
378
        opt.quiet = 0;
654
379
        _verbose = 0;
689
414
 
690
415
        opt.prolog = slurm_get_srun_prolog();
691
416
        opt.epilog = slurm_get_srun_epilog();
 
417
        opt.begin = (time_t)0;
692
418
 
693
419
        opt.task_prolog     = NULL;
694
420
        opt.task_epilog     = NULL;
695
421
 
696
 
        opt.ctrl_comm_ifhn  = NULL;
697
 
 
698
422
        /*
699
423
         * Reset some default values if running under a parallel debugger
700
424
         */
708
432
        opt.pty = false;
709
433
        opt.open_mode = 0;
710
434
        opt.acctg_freq = -1;
 
435
        opt.reservation = NULL;
711
436
        opt.wckey = NULL;
712
437
}
713
438
 
741
466
{"SLURM_DEPENDENCY",    OPT_STRING,     &opt.dependency,    NULL             },
742
467
{"SLURM_DISTRIBUTION",  OPT_DISTRIB,    NULL,               NULL             },
743
468
{"SLURM_GEOMETRY",      OPT_GEOMETRY,   NULL,               NULL             },
744
 
{"SLURM_IMMEDIATE",     OPT_INT,        &opt.immediate,     NULL             },
745
 
{"SLURM_JOB_NAME",      OPT_STRING,     &opt.job_name,      
746
 
                                        &opt.job_name_set_env},
 
469
{"SLURM_IMMEDIATE",     OPT_IMMEDIATE,  NULL,               NULL             },
 
470
{"SLURM_JOB_NAME",      OPT_STRING,     &opt.job_name,  &opt.job_name_set_env},
 
471
/* SLURM_JOBID was used in slurm version 1.3 and below, it is now vestigial */
747
472
{"SLURM_JOBID",         OPT_INT,        &opt.jobid,         NULL             },
 
473
{"SLURM_JOB_ID",        OPT_INT,        &opt.jobid,         NULL             },
748
474
{"SLURM_KILL_BAD_EXIT", OPT_INT,        &opt.kill_bad_exit, NULL             },
749
475
{"SLURM_LABELIO",       OPT_INT,        &opt.labelio,       NULL             },
750
476
{"SLURM_LINUX_IMAGE",   OPT_STRING,     &opt.linuximage,    NULL             },
754
480
{"SLURM_NSOCKETS_PER_NODE",OPT_NSOCKETS,NULL,               NULL             },
755
481
{"SLURM_NCORES_PER_SOCKET",OPT_NCORES,  NULL,               NULL             },
756
482
{"SLURM_NTHREADS_PER_CORE",OPT_NTHREADS,NULL,               NULL             },
 
483
{"SLURM_NTASKS_PER_NODE", OPT_INT,      &opt.ntasks_per_node, NULL           },
757
484
{"SLURM_NO_ROTATE",     OPT_NO_ROTATE,  NULL,               NULL             },
758
485
{"SLURM_NPROCS",        OPT_INT,        &opt.nprocs,        &opt.nprocs_set  },
759
486
{"SLURM_OVERCOMMIT",    OPT_OVERCOMMIT, NULL,               NULL             },
761
488
{"SLURM_RAMDISK_IMAGE", OPT_STRING,     &opt.ramdiskimage,  NULL             },
762
489
{"SLURM_IOLOAD_IMAGE",  OPT_STRING,     &opt.ramdiskimage,  NULL             },
763
490
{"SLURM_REMOTE_CWD",    OPT_STRING,     &opt.cwd,           NULL             },
 
491
{"SLURM_RESV_PORTS",    OPT_RESV_PORTS, NULL,               NULL             },
764
492
{"SLURM_STDERRMODE",    OPT_STRING,     &opt.efname,        NULL             },
765
493
{"SLURM_STDINMODE",     OPT_STRING,     &opt.ifname,        NULL             },
766
494
{"SLURM_STDOUTMODE",    OPT_STRING,     &opt.ofname,        NULL             },
767
495
{"SLURM_THREADS",       OPT_INT,        &opt.max_threads,   NULL             },
768
496
{"SLURM_TIMELIMIT",     OPT_STRING,     &opt.time_limit_str,NULL             },
769
497
{"SLURM_CHECKPOINT",    OPT_STRING,     &opt.ckpt_interval_str, NULL         },
770
 
{"SLURM_CHECKPOINT_PATH",OPT_STRING,    &opt.ckpt_path,     NULL             },
 
498
{"SLURM_CHECKPOINT_DIR",OPT_STRING,     &opt.ckpt_dir,      NULL             },
 
499
{"SLURM_RESTART_DIR",   OPT_STRING,     &opt.restart_dir ,  NULL             },
771
500
{"SLURM_WAIT",          OPT_INT,        &opt.max_wait,      NULL             },
772
501
{"SLURM_DISABLE_STATUS",OPT_INT,        &opt.disable_status,NULL             },
773
502
{"SLURM_MPI_TYPE",      OPT_MPI,        NULL,               NULL             },
774
 
{"SLURM_SRUN_COMM_IFHN",OPT_STRING,     &opt.ctrl_comm_ifhn,NULL             },
775
503
{"SLURM_SRUN_MULTI",    OPT_MULTI,      NULL,               NULL             },
776
504
{"SLURM_UNBUFFEREDIO",  OPT_INT,        &opt.unbuffered,    NULL             },
777
505
{"SLURM_NODELIST",      OPT_STRING,     &opt.alloc_nodelist,NULL             },
826
554
        case OPT_INT:
827
555
                if (val != NULL) {
828
556
                        *((int *) e->arg) = (int) strtol(val, &end, 10);
829
 
                        if (!(end && *end == '\0')) 
830
 
                                error("%s=%s invalid. ignoring...", e->var, val);
 
557
                        if (!(end && *end == '\0')) {
 
558
                                error("%s=%s invalid. ignoring...", 
 
559
                                      e->var, val);
 
560
                        }
831
561
                }
832
562
                break;
833
563
 
843
573
                break;
844
574
 
845
575
        case OPT_CPU_BIND:
846
 
                if (_verify_cpu_bind(val, &opt.cpu_bind,
847
 
                                     &opt.cpu_bind_type))
 
576
                if (slurm_verify_cpu_bind(val, &opt.cpu_bind,
 
577
                                          &opt.cpu_bind_type))
848
578
                        exit(1);
849
579
                break;
850
580
 
851
581
        case OPT_MEM_BIND:
852
 
                if (_verify_mem_bind(val, &opt.mem_bind,
853
 
                                     &opt.mem_bind_type))
 
582
                if (slurm_verify_mem_bind(val, &opt.mem_bind,
 
583
                                          &opt.mem_bind_type))
854
584
                        exit(1);
855
585
                break;
856
586
 
873
603
                opt.shared = 0;
874
604
                break;
875
605
 
 
606
        case OPT_RESV_PORTS:
 
607
                if (val)
 
608
                        opt.resv_port_cnt = strtol(val, NULL, 10);
 
609
                else
 
610
                        opt.resv_port_cnt = 0;
 
611
                break;
 
612
 
876
613
        case OPT_OPEN_MODE:
877
614
                if ((val[0] == 'a') || (val[0] == 'A'))
878
615
                        opt.open_mode = OPEN_MODE_APPEND;
901
638
                }
902
639
                break;
903
640
 
 
641
        case OPT_IMMEDIATE:
 
642
                if (val)
 
643
                        opt.immediate = strtol(val, NULL, 10);
 
644
                else
 
645
                        opt.immediate = DEFAULT_IMMEDIATE;
 
646
                break;
 
647
 
904
648
        case OPT_MPI:
905
649
                if (mpi_hook_client_init((char *)val) == SLURM_ERROR) {
906
650
                        fatal("\"%s=%s\" -- invalid MPI type, "
954
698
                {"slurmd-debug",  required_argument, 0, 'd'},
955
699
                {"chdir",         required_argument, 0, 'D'},
956
700
                {"error",         required_argument, 0, 'e'},
 
701
                {"preserve-env",  no_argument,       0, 'E'},
 
702
                {"preserve-slurm-env", no_argument,  0, 'E'},
957
703
                {"geometry",      required_argument, 0, 'g'},
958
704
                {"hold",          no_argument,       0, 'H'},
959
705
                {"input",         required_argument, 0, 'i'},
960
 
                {"immediate",     no_argument,       0, 'I'},
 
706
                {"immediate",     optional_argument, 0, 'I'},
961
707
                {"join",          no_argument,       0, 'j'},
962
708
                {"job-name",      required_argument, 0, 'J'},
963
709
                {"no-kill",       no_argument,       0, 'k'},
997
743
                {"mincores",         required_argument, 0, LONG_OPT_MINCORES},
998
744
                {"minthreads",       required_argument, 0, LONG_OPT_MINTHREADS},
999
745
                {"mem",              required_argument, 0, LONG_OPT_MEM},
1000
 
                {"job-mem",          required_argument, 0, LONG_OPT_MEM_PER_CPU},
1001
 
                {"task-mem",         required_argument, 0, LONG_OPT_MEM_PER_CPU},
1002
746
                {"mem-per-cpu",      required_argument, 0, LONG_OPT_MEM_PER_CPU},
1003
747
                {"hint",             required_argument, 0, LONG_OPT_HINT},
1004
748
                {"mpi",              required_argument, 0, LONG_OPT_MPI},
 
749
                {"resv-ports",       optional_argument, 0, LONG_OPT_RESV_PORTS},
1005
750
                {"tmp",              required_argument, 0, LONG_OPT_TMP},
1006
751
                {"jobid",            required_argument, 0, LONG_OPT_JOBID},
1007
752
                {"msg-timeout",      required_argument, 0, LONG_OPT_TIMEO},
1024
769
                {"task-prolog",      required_argument, 0, LONG_OPT_TASK_PROLOG},
1025
770
                {"task-epilog",      required_argument, 0, LONG_OPT_TASK_EPILOG},
1026
771
                {"nice",             optional_argument, 0, LONG_OPT_NICE},
1027
 
                {"ctrl-comm-ifhn",   required_argument, 0, LONG_OPT_CTRL_COMM_IFHN},
1028
772
                {"multi-prog",       no_argument,       0, LONG_OPT_MULTI},
1029
773
                {"comment",          required_argument, 0, LONG_OPT_COMMENT},
1030
774
                {"sockets-per-node", required_argument, 0, LONG_OPT_SOCKETSPERNODE},
1044
788
                {"get-user-env",     optional_argument, 0, LONG_OPT_GET_USER_ENV},
1045
789
                {"pty",              no_argument,       0, LONG_OPT_PTY},
1046
790
                {"checkpoint",       required_argument, 0, LONG_OPT_CHECKPOINT},
1047
 
                {"checkpoint-path",  required_argument, 0, LONG_OPT_CHECKPOINT_PATH},
 
791
                {"checkpoint-dir",   required_argument, 0, LONG_OPT_CHECKPOINT_DIR},
1048
792
                {"open-mode",        required_argument, 0, LONG_OPT_OPEN_MODE},
1049
793
                {"acctg-freq",       required_argument, 0, LONG_OPT_ACCTG_FREQ},
1050
794
                {"wckey",            required_argument, 0, LONG_OPT_WCKEY},
 
795
                {"reservation",      required_argument, 0, LONG_OPT_RESERVATION},
 
796
                {"restart-dir",      required_argument, 0, LONG_OPT_RESTART_DIR},
1051
797
                {NULL,               0,                 0, 0}
1052
798
        };
1053
 
        char *opt_string = "+aAbB:c:C:d:D:e:g:Hi:IjJ:kKlL:m:n:N:"
1054
 
                "o:Op:P:qQr:R:st:T:uU:vVw:W:x:XZ";
 
799
        char *opt_string = "+aAbB:c:C:d:D:e:Eg:Hi:IjJ:kKlL:m:n:N:"
 
800
                "o:Op:P:qQr:Rst:T:uU:vVw:W:x:XZ";
1055
801
 
1056
802
        struct option *optz = spank_option_table_create (long_options);
1057
803
 
1123
869
                        opt.cwd = xstrdup(optarg);
1124
870
                        break;
1125
871
                case (int)'e':
1126
 
                        if (opt.pty)
1127
 
                                fatal("--error incompatable with --pty option");
 
872
                        if (opt.pty) {
 
873
                                fatal("--error incompatable with --pty "
 
874
                                      "option");
 
875
                        }
1128
876
                        xfree(opt.efname);
1129
877
                        if (strncasecmp(optarg, "none", (size_t) 4) == 0)
1130
878
                                opt.efname = xstrdup("/dev/null");
1131
879
                        else
1132
880
                                opt.efname = xstrdup(optarg);
1133
881
                        break;
 
882
                case (int)'E':
 
883
                        opt.preserve_env = true;
 
884
                        break;
1134
885
                case (int)'g':
1135
886
                        if (verify_geometry(optarg, opt.geometry))
1136
887
                                exit(1);
1139
890
                        opt.hold = true;
1140
891
                        break;
1141
892
                case (int)'i':
1142
 
                        if (opt.pty)
1143
 
                                fatal("--input incompatable with --pty option");
 
893
                        if (opt.pty) {
 
894
                                fatal("--input incompatable with "
 
895
                                      "--pty option");
 
896
                        }
1144
897
                        xfree(opt.ifname);
1145
898
                        if (strncasecmp(optarg, "none", (size_t) 4) == 0)
1146
899
                                opt.ifname = xstrdup("/dev/null");
1148
901
                                opt.ifname = xstrdup(optarg);
1149
902
                        break;
1150
903
                case (int)'I':
1151
 
                        opt.immediate = true;
 
904
                        if (optarg)
 
905
                                opt.immediate = strtol(optarg, NULL, 10);
 
906
                        else
 
907
                                opt.immediate = DEFAULT_IMMEDIATE;
1152
908
                        break;
1153
909
                case (int)'j':
1154
910
                        opt.join = true;
1287
1043
                        opt.shared = 0;
1288
1044
                        break;
1289
1045
                case LONG_OPT_CPU_BIND:
1290
 
                        if (_verify_cpu_bind(optarg, &opt.cpu_bind,
1291
 
                                             &opt.cpu_bind_type))
 
1046
                        if (slurm_verify_cpu_bind(optarg, &opt.cpu_bind,
 
1047
                                                  &opt.cpu_bind_type))
1292
1048
                                exit(1);
1293
1049
                        break;
1294
1050
                case LONG_OPT_MEM_BIND:
1295
 
                        if (_verify_mem_bind(optarg, &opt.mem_bind,
1296
 
                                             &opt.mem_bind_type))
 
1051
                        if (slurm_verify_mem_bind(optarg, &opt.mem_bind,
 
1052
                                                  &opt.mem_bind_type))
1297
1053
                                exit(1);
1298
1054
                        break;
1299
1055
                case LONG_OPT_CORE:
1340
1096
                                      optarg);
1341
1097
                        }
1342
1098
                        break;
 
1099
                case LONG_OPT_RESV_PORTS:
 
1100
                        if (optarg)
 
1101
                                opt.resv_port_cnt = strtol(optarg, NULL, 10);
 
1102
                        else
 
1103
                                opt.resv_port_cnt = 0;
 
1104
                        break;
1343
1105
                case LONG_OPT_TMP:
1344
1106
                        opt.job_min_tmp_disk = str_to_bytes(optarg);
1345
1107
                        if (opt.job_min_tmp_disk < 0) {
1366
1128
                case LONG_OPT_UID:
1367
1129
                        if (opt.euid != (uid_t) -1)
1368
1130
                                fatal ("duplicate --uid option");
1369
 
                        opt.euid = uid_from_string (optarg);
1370
 
                        if (opt.euid == (uid_t) -1)
 
1131
                        if (uid_from_string (optarg, &opt.euid) < 0)
1371
1132
                                fatal ("--uid=\"%s\" invalid", optarg);
1372
1133
                        break;
1373
1134
                case LONG_OPT_GID:
1374
1135
                        if (opt.egid != (gid_t) -1)
1375
1136
                                fatal ("duplicate --gid option");
1376
 
                        opt.egid = gid_from_string (optarg);
1377
 
                        if (opt.egid == (gid_t) -1)
 
1137
                        if (gid_from_string (optarg, &opt.egid) < 0)
1378
1138
                                fatal ("--gid=\"%s\" invalid", optarg);
1379
1139
                        break;
1380
1140
                case LONG_OPT_DEBUG_TS:
1464
1224
                                }
1465
1225
                        }
1466
1226
                        break;
1467
 
                case LONG_OPT_CTRL_COMM_IFHN:
1468
 
                        xfree(opt.ctrl_comm_ifhn);
1469
 
                        opt.ctrl_comm_ifhn = xstrdup(optarg);
1470
 
                        break;
1471
1227
                case LONG_OPT_MULTI:
1472
1228
                        opt.multi_prog = true;
1473
1229
                        break;
1537
1293
                        opt.reboot = true;
1538
1294
                        break;
1539
1295
                case LONG_OPT_GET_USER_ENV:
1540
 
                        error("--get-user-env is no longer supported in srun, use sbatch");
 
1296
                        error("--get-user-env is no longer supported in srun, "
 
1297
                              "use sbatch");
1541
1298
                        break;
1542
1299
                case LONG_OPT_PTY:
1543
1300
#ifdef HAVE_PTY_H
1575
1332
                        xfree(opt.wckey);
1576
1333
                        opt.wckey = xstrdup(optarg);
1577
1334
                        break;
1578
 
                case LONG_OPT_CHECKPOINT_PATH:
1579
 
                        xfree(opt.ckpt_path);
1580
 
                        opt.ckpt_path = xstrdup(optarg);
 
1335
                case LONG_OPT_RESERVATION:
 
1336
                        xfree(opt.reservation);
 
1337
                        opt.reservation = xstrdup(optarg);
 
1338
                        break;
 
1339
                case LONG_OPT_CHECKPOINT_DIR:
 
1340
                        xfree(opt.ckpt_dir);
 
1341
                        opt.ckpt_dir = xstrdup(optarg);
 
1342
                        break;
 
1343
                case LONG_OPT_RESTART_DIR:
 
1344
                        xfree(opt.restart_dir);
 
1345
                        opt.restart_dir = xstrdup(optarg);
1581
1346
                        break;
1582
1347
                default:
1583
1348
                        if (spank_process_option (opt_char, optarg) < 0) {
1741
1506
         *   these debug messages cause the generation of more
1742
1507
         *   debug messages ad infinitum)
1743
1508
         */
1744
 
        if (opt.slurmd_debug + LOG_LEVEL_ERROR > LOG_LEVEL_DEBUG2)
 
1509
        if (opt.slurmd_debug + LOG_LEVEL_ERROR > LOG_LEVEL_DEBUG2) {
1745
1510
                opt.slurmd_debug = LOG_LEVEL_DEBUG2 - LOG_LEVEL_ERROR;
 
1511
                info("Using srun's max debug increment of %d", opt.slurmd_debug);
 
1512
        }
1746
1513
 
1747
1514
        if (opt.quiet && _verbose) {
1748
1515
                error ("don't specify both --verbose (-v) and --quiet (-Q)");
1801
1568
                if (!_valid_node_list(&opt.nodelist))
1802
1569
                        exit(1);
1803
1570
        }
1804
 
        
 
1571
 
 
1572
        /* set up the proc and node counts based on the arbitrary list
 
1573
           of nodes */
 
1574
        if((opt.distribution == SLURM_DIST_ARBITRARY)
 
1575
           && (!opt.nodes_set || !opt.nprocs_set)) {            
 
1576
                hostlist_t hl = hostlist_create(opt.nodelist);
 
1577
                if(!opt.nprocs_set) {
 
1578
                        opt.nprocs_set = 1;
 
1579
                        opt.nprocs = hostlist_count(hl);
 
1580
                } 
 
1581
                if(!opt.nodes_set) {
 
1582
                        opt.nodes_set = 1;
 
1583
                        hostlist_uniq(hl);
 
1584
                        opt.min_nodes = opt.max_nodes = hostlist_count(hl);
 
1585
                }
 
1586
                hostlist_destroy(hl);
 
1587
        }               
 
1588
 
1805
1589
        /* now if max is set make sure we have <= max_nodes in the
1806
1590
         * nodelist but only if it isn't arbitrary since the user has
1807
1591
         * laid it out how it should be so don't mess with it print an
1837
1621
 
1838
1622
        /* check for realistic arguments */
1839
1623
        if (opt.nprocs <= 0) {
1840
 
                error("%s: invalid number of processes (-n %d)",
1841
 
                      opt.progname, opt.nprocs);
 
1624
                error("invalid number of processes (-n %d)", opt.nprocs);
1842
1625
                verified = false;
1843
1626
        }
1844
1627
 
1845
1628
        if (opt.cpus_per_task < 0) {
1846
 
                error("%s: invalid number of cpus per task (-c %d)\n",
1847
 
                      opt.progname, opt.cpus_per_task);
 
1629
                error("invalid number of cpus per task (-c %d)\n",
 
1630
                      opt.cpus_per_task);
1848
1631
                verified = false;
1849
1632
        }
1850
1633
 
1851
1634
        if ((opt.min_nodes <= 0) || (opt.max_nodes < 0) || 
1852
1635
            (opt.max_nodes && (opt.min_nodes > opt.max_nodes))) {
1853
 
                error("%s: invalid number of nodes (-N %d-%d)\n",
1854
 
                      opt.progname, opt.min_nodes, opt.max_nodes);
 
1636
                error("invalid number of nodes (-N %d-%d)\n",
 
1637
                      opt.min_nodes, opt.max_nodes);
 
1638
                verified = false;
 
1639
        }
 
1640
 
 
1641
#ifdef HAVE_BGL
 
1642
        if (opt.blrtsimage && strchr(opt.blrtsimage, ' ')) {
 
1643
                error("invalid BlrtsImage given '%s'", opt.blrtsimage);
 
1644
                verified = false;
 
1645
        }
 
1646
#endif
 
1647
 
 
1648
        if (opt.linuximage && strchr(opt.linuximage, ' ')) {
 
1649
#ifdef HAVE_BGL
 
1650
                error("invalid LinuxImage given '%s'", opt.linuximage);
 
1651
#else
 
1652
                error("invalid CnloadImage given '%s'", opt.linuximage);
 
1653
#endif
 
1654
                verified = false;
 
1655
        }
 
1656
 
 
1657
        if (opt.mloaderimage && strchr(opt.mloaderimage, ' ')) {
 
1658
                error("invalid MloaderImage given '%s'", opt.mloaderimage);
 
1659
                verified = false;
 
1660
        }
 
1661
 
 
1662
        if (opt.ramdiskimage && strchr(opt.ramdiskimage, ' ')) {
 
1663
#ifdef HAVE_BGL
 
1664
                error("invalid RamDiskImage given '%s'", opt.ramdiskimage);
 
1665
#else
 
1666
                error("invalid IoloadImage given '%s'", opt.ramdiskimage);
 
1667
#endif
1855
1668
                verified = false;
1856
1669
        }
1857
1670
 
1864
1677
                 */
1865
1678
                if (!(opt.cpu_bind_type & (CPU_BIND_TO_SOCKETS |
1866
1679
                                           CPU_BIND_TO_CORES |
1867
 
                                           CPU_BIND_TO_THREADS))) {
 
1680
                                           CPU_BIND_TO_THREADS |
 
1681
                                           CPU_BIND_TO_LDOMS))) {
1868
1682
                        opt.cpu_bind_type |= CPU_BIND_TO_CORES;
1869
1683
                }
1870
1684
        }
1876
1690
                 */
1877
1691
                if (!(opt.cpu_bind_type & (CPU_BIND_TO_SOCKETS |
1878
1692
                                           CPU_BIND_TO_CORES |
1879
 
                                           CPU_BIND_TO_THREADS))) {
 
1693
                                           CPU_BIND_TO_THREADS |
 
1694
                                           CPU_BIND_TO_LDOMS))) {
1880
1695
                        opt.cpu_bind_type |= CPU_BIND_TO_SOCKETS;
1881
1696
                }
1882
1697
        }
1897
1712
                        opt.nodes_set = true;
1898
1713
                }
1899
1714
        }
1900
 
        if ((opt.nodes_set || opt.extra_set) && !opt.nprocs_set) {
 
1715
        if ((opt.nodes_set || opt.extra_set)                            && 
 
1716
            ((opt.min_nodes == opt.max_nodes) || (opt.max_nodes == 0))  && 
 
1717
            !opt.nprocs_set) {
1901
1718
                /* 1 proc / node default */
1902
1719
                opt.nprocs = opt.min_nodes;
1903
1720
 
2012
1829
 
2013
1830
        if (opt.ckpt_interval_str) {
2014
1831
                opt.ckpt_interval = time_str2mins(opt.ckpt_interval_str);
2015
 
                if ((opt.ckpt_interval < 0) && (opt.ckpt_interval != INFINITE)) {
 
1832
                if ((opt.ckpt_interval < 0) && 
 
1833
                    (opt.ckpt_interval != INFINITE)) {
2016
1834
                        error("Invalid checkpoint interval specification");
2017
1835
                        exit(1);
2018
1836
                }
2019
1837
        }
2020
1838
 
2021
 
        if (! opt.ckpt_path)
2022
 
                opt.ckpt_path = xstrdup(opt.cwd);
 
1839
        if (! opt.ckpt_dir)
 
1840
                opt.ckpt_dir = xstrdup(opt.cwd);
2023
1841
 
2024
1842
        if ((opt.euid != (uid_t) -1) && (opt.euid != opt.uid)) 
2025
1843
                opt.uid = opt.euid;
2027
1845
        if ((opt.egid != (gid_t) -1) && (opt.egid != opt.gid)) 
2028
1846
                opt.gid = opt.egid;
2029
1847
 
2030
 
        if (opt.immediate) {
2031
 
                char *sched_name = slurm_get_sched_type();
2032
 
                if (strcmp(sched_name, "sched/wiki") == 0) {
2033
 
                        info("WARNING: Ignoring the -I/--immediate option "
2034
 
                                "(not supported by Maui)");
2035
 
                        opt.immediate = false;
2036
 
                }
2037
 
                xfree(sched_name);
2038
 
        }
 
1848
         if (slurm_verify_cpu_bind(NULL, &opt.cpu_bind,
 
1849
                                   &opt.cpu_bind_type))
 
1850
                exit(1);
2039
1851
 
2040
1852
        return verified;
2041
1853
}
2112
1924
        info("partition      : %s",
2113
1925
             opt.partition == NULL ? "default" : opt.partition);
2114
1926
        info("job name       : `%s'", opt.job_name);
 
1927
        info("reservation    : `%s'", opt.reservation);
2115
1928
        info("wckey          : `%s'", opt.wckey);
2116
1929
        info("distribution   : %s", format_task_dist_states(opt.distribution));
2117
1930
        if(opt.distribution == SLURM_DIST_PLANE)
2123
1936
        info("core format    : %s", core_format_name (opt.core_type));
2124
1937
        info("verbose        : %d", _verbose);
2125
1938
        info("slurmd_debug   : %d", opt.slurmd_debug);
2126
 
        info("immediate      : %s", tf_(opt.immediate));
 
1939
        if (opt.immediate <= 1)
 
1940
                info("immediate      : %s", tf_(opt.immediate));
 
1941
        else
 
1942
                info("immediate      : %d secs", (opt.immediate - 1));
2127
1943
        info("label output   : %s", tf_(opt.labelio));
2128
1944
        info("unbuffered IO  : %s", tf_(opt.unbuffered));
2129
1945
        info("overcommit     : %s", tf_(opt.overcommit));
2134
1950
                info("time_limit     : %d", opt.time_limit);
2135
1951
        if (opt.ckpt_interval)
2136
1952
                info("checkpoint     : %d secs", opt.ckpt_interval);
2137
 
        info("checkpoint_path: %s", opt.ckpt_path);
 
1953
        info("checkpoint_dir : %s", opt.ckpt_dir);
 
1954
        if (opt.restart_dir)
 
1955
                info("restart_dir    : %s", opt.restart_dir);
2138
1956
        info("wait           : %d", opt.max_wait);
2139
1957
        if (opt.nice)
2140
1958
                info("nice           : %d", opt.nice);
2155
1973
        xfree(str);
2156
1974
        info("reboot         : %s", opt.reboot ? "no" : "yes");
2157
1975
        info("rotate         : %s", opt.no_rotate ? "yes" : "no");
 
1976
        info("preserve_env   : %s", tf_(opt.preserve_env));
2158
1977
        
2159
1978
#ifdef HAVE_BGL
2160
1979
        if (opt.blrtsimage)
2189
2008
        info("mail_user      : %s", opt.mail_user);
2190
2009
        info("task_prolog    : %s", opt.task_prolog);
2191
2010
        info("task_epilog    : %s", opt.task_epilog);
2192
 
        info("ctrl_comm_ifhn : %s", opt.ctrl_comm_ifhn);
2193
2011
        info("multi_prog     : %s", opt.multi_prog ? "yes" : "no");
2194
2012
        info("sockets-per-node  : %d - %d", opt.min_sockets_per_node,
2195
2013
                                            opt.max_sockets_per_node);
2201
2019
        info("ntasks-per-socket : %d", opt.ntasks_per_socket);
2202
2020
        info("ntasks-per-core   : %d", opt.ntasks_per_core);
2203
2021
        info("plane_size        : %u", opt.plane_size);
 
2022
        if (opt.resv_port_cnt != NO_VAL)
 
2023
                info("resv_port_cnt     : %d", opt.resv_port_cnt);
2204
2024
        str = print_commandline(opt.argc, opt.argv);
2205
2025
        info("remote command    : `%s'", str);
2206
2026
        xfree(str);
2213
2033
        return (MPIR_being_debugged != 0);
2214
2034
}
2215
2035
 
 
2036
 
2216
2037
static void _usage(void)
2217
2038
{
2218
2039
        printf(
2219
2040
"Usage: srun [-N nnodes] [-n ntasks] [-i in] [-o out] [-e err]\n"
2220
2041
"            [-c ncpus] [-r n] [-p partition] [--hold] [-t minutes]\n"
2221
 
"            [-D path] [--immediate] [--overcommit] [--no-kill]\n"
 
2042
"            [-D path] [--immediate[=secs]] [--overcommit] [--no-kill]\n"
2222
2043
"            [--share] [--label] [--unbuffered] [-m dist] [-J jobname]\n"
2223
2044
"            [--jobid=id] [--verbose] [--slurmd_debug=#]\n"
2224
2045
"            [--core=type] [-T threads] [-W sec] [--checkpoint=time]\n"
2225
 
"            [--checkpoint-path=dir]  [--licenses=names]\n"
 
2046
"            [--checkpoint-dir=dir]  [--licenses=names]\n"
 
2047
"            [--restart-dir=dir]\n"
2226
2048
"            [--contiguous] [--mincpus=n] [--mem=MB] [--tmp=MB] [-C list]\n"
2227
2049
"            [--mpi=type] [--account=name] [--dependency=type:jobid]\n"
2228
2050
"            [--kill-on-bad-exit] [--propagate[=rlimits] [--comment=name]\n"
2229
2051
"            [--cpu_bind=...] [--mem_bind=...] [--network=type]\n"
2230
 
"            [--ntasks-per-node=n] [--ntasks-per-socket=n]\n"
2231
 
"            [--ntasks-per-core=n] [--mem-per-cpu=MB]\n"
 
2052
"            [--ntasks-per-node=n] [--ntasks-per-socket=n] [reservation=name]\n"
 
2053
"            [--ntasks-per-core=n] [--mem-per-cpu=MB] [--preserve-env]\n"
2232
2054
#ifdef HAVE_BG          /* Blue gene specific options */
2233
2055
"            [--geometry=XxYxZ] [--conn-type=type] [--no-rotate] [--reboot]\n"
2234
2056
#ifdef HAVE_BGL
2254
2076
"Usage: srun [OPTIONS...] executable [args...]\n"
2255
2077
"\n"
2256
2078
"Parallel run options:\n"
2257
 
"  -n, --ntasks=ntasks         number of tasks to run\n"
2258
 
"  -N, --nodes=N               number of nodes on which to run (N = min[-max])\n"
 
2079
"      --begin=time            defer job until HH:MM DD/MM/YY\n"
2259
2080
"  -c, --cpus-per-task=ncpus   number of cpus required per task\n"
2260
 
"      --ntasks-per-node=n     number of tasks to invoke on each node\n"
2261
 
"  -i, --input=in              location of stdin redirection\n"
2262
 
"  -o, --output=out            location of stdout redirection\n"
 
2081
"      --checkpoint=time       job step checkpoint interval\n"
 
2082
"      --checkpoint-dir=dir    directory to store job step checkpoint image \n"
 
2083
"                              files\n"
 
2084
"      --comment=name          arbitrary comment\n"
 
2085
"      --core=type             change default corefile format type\n"
 
2086
"                              (type=\"list\" to list of valid formats)\n"
 
2087
"  -d, --slurmd-debug=level    slurmd debug level\n"
 
2088
"  -D, --chdir=path            change remote current working directory\n"
2263
2089
"  -e, --error=err             location of stderr redirection\n"
2264
 
"  -r, --relative=n            run job step relative to node n of allocation\n"
2265
 
"  -p, --partition=partition   partition requested\n"
 
2090
"      --epilog=program        run \"program\" after launching job step\n"
 
2091
"  -E, --preserve-env          env vars for node and task counts override\n"
 
2092
"                              command-line flags\n"
 
2093
"      --get-user-env          used by Moab.  See srun man page.\n"
2266
2094
"  -H, --hold                  submit job in held state\n"
2267
 
"  -t, --time=minutes          time limit\n"
2268
 
"  -D, --chdir=path            change remote current working directory\n"
2269
 
"  -I, --immediate             exit if resources are not immediately available\n"
2270
 
"  -O, --overcommit            overcommit resources\n"
 
2095
"  -i, --input=in              location of stdin redirection\n"
 
2096
"  -I, --immediate[=secs]      exit if resources not available in \"secs\"\n"
 
2097
"      --jobid=id              run under already allocated job\n"
 
2098
"  -J, --job-name=jobname      name of job\n"
2271
2099
"  -k, --no-kill               do not kill job on node failure\n"
2272
2100
"  -K, --kill-on-bad-exit      kill the job if any task terminates with a\n"
2273
2101
"                              non-zero exit code\n"
2274
 
"  -s, --share                 share nodes with other jobs\n"
2275
2102
"  -l, --label                 prepend task number to lines of stdout/err\n"
2276
 
"  -u, --unbuffered            do not line-buffer stdout/err\n"
 
2103
"  -L, --licenses=names        required license, comma separated\n"
2277
2104
"  -m, --distribution=type     distribution method for processes to nodes\n"
2278
2105
"                              (type = block|cyclic|arbitrary)\n"
2279
 
"  -J, --job-name=jobname      name of job\n"
2280
 
"      --jobid=id              run under already allocated job\n"
 
2106
"      --mail-type=type        notify on state change: BEGIN, END, FAIL or ALL\n"
 
2107
"      --mail-user=user        who to send email notification for job state\n"
 
2108
"                              changes\n"
2281
2109
"      --mpi=type              type of MPI being used\n"
2282
 
"  -b, --batch                 submit as batch job for later execution\n"
 
2110
"      --multi-prog            if set the program name specified is the\n"
 
2111
"                              configuration specification for multiple programs\n"
 
2112
"  -n, --ntasks=ntasks         number of tasks to run\n"
 
2113
"      --nice[=value]          decrease secheduling priority by value\n"
 
2114
"      --ntasks-per-node=n     number of tasks to invoke on each node\n"
 
2115
"  -N, --nodes=N               number of nodes on which to run (N = min[-max])\n"
 
2116
"  -o, --output=out            location of stdout redirection\n"
 
2117
"  -O, --overcommit            overcommit resources\n"
 
2118
"  -p, --partition=partition   partition requested\n"
 
2119
"      --prolog=program        run \"program\" before launching job step\n"
 
2120
"      --propagate[=rlimits]   propagate all [or specific list of] rlimits\n"
 
2121
#ifdef HAVE_PTY_H
 
2122
"      --pty                   run task zero in pseudo terminal\n"
 
2123
#endif
 
2124
"  -P, --dependency=type:jobid defer job until condition on jobid is satisfied\n"
 
2125
"  -q, --quit-on-interrupt     quit on single Ctrl-C\n"
 
2126
"  -Q, --quiet                 quiet mode (suppress informational messages)\n"
 
2127
"  -r, --relative=n            run job step relative to node n of allocation\n"
 
2128
"      --restart-dir=dir       directory of checkpoint image files to restart\n"
 
2129
"                              from\n"
 
2130
"  -s, --share                 share nodes with other jobs\n"
 
2131
"  -t, --time=minutes          time limit\n"
 
2132
"      --task-epilog=program   run \"program\" after launching task\n"
 
2133
"      --task-prolog=program   run \"program\" before launching task\n"
2283
2134
"  -T, --threads=threads       set srun launch fanout\n"
 
2135
"  -u, --unbuffered            do not line-buffer stdout/err\n"
 
2136
"  -U, --account=name          charge job to specified account\n"
 
2137
"  -v, --verbose               verbose mode (multiple -v's increase verbosity)\n"
2284
2138
"  -W, --wait=sec              seconds to wait after first task exits\n"
2285
2139
"                              before killing job\n"
2286
 
"  -q, --quit-on-interrupt     quit on single Ctrl-C\n"
2287
2140
"  -X, --disable-status        Disable Ctrl-C status feature\n"
2288
 
"  -v, --verbose               verbose mode (multiple -v's increase verbosity)\n"
2289
 
"  -Q, --quiet                 quiet mode (suppress informational messages)\n"
2290
 
"  -d, --slurmd-debug=level    slurmd debug level\n"
2291
 
"      --core=type             change default corefile format type\n"
2292
 
"                              (type=\"list\" to list of valid formats)\n"
2293
 
"  -P, --dependency=type:jobid defer job until condition on jobid is satisfied\n"
2294
 
"      --nice[=value]          decrease secheduling priority by value\n"
2295
 
"  -U, --account=name          charge job to specified account\n"
2296
 
"      --comment=name          arbitrary comment\n"
2297
 
"      --propagate[=rlimits]   propagate all [or specific list of] rlimits\n"
2298
 
"      --mpi=type              specifies version of MPI to use\n"
2299
 
"      --prolog=program        run \"program\" before launching job step\n"
2300
 
"      --epilog=program        run \"program\" after launching job step\n"
2301
 
"      --task-prolog=program   run \"program\" before launching task\n"
2302
 
"      --task-epilog=program   run \"program\" after launching task\n"
2303
 
"      --begin=time            defer job until HH:MM DD/MM/YY\n"
2304
 
"      --mail-type=type        notify on state change: BEGIN, END, FAIL or ALL\n"
2305
 
"      --mail-user=user        who to send email notification for job state changes\n"
2306
 
"      --ctrl-comm-ifhn=addr   interface hostname for PMI communications from srun\n"
2307
 
"      --multi-prog            if set the program name specified is the\n"
2308
 
"                              configuration specification for multiple programs\n"
2309
 
"      --get-user-env          used by Moab.  See srun man page.\n"
2310
 
"  -L, --licenses=names        required license, comma separated\n"
2311
 
"      --checkpoint=time       job step checkpoint interval\n"
2312
 
"      --checkpoint-path=dir   path to store job step checkpoint image files\n"
2313
 
#ifdef HAVE_PTY_H
2314
 
"      --pty                   run task zero in pseudo terminal\n"
2315
 
#endif
2316
2141
"\n"
2317
2142
"Constraint options:\n"
 
2143
"  -C, --constraint=list       specify a list of constraints\n"
 
2144
"      --contiguous            demand a contiguous range of nodes\n"
2318
2145
"      --mincpus=n             minimum number of cpus per node\n"
 
2146
"      --mincores=n            minimum number of cores per cpu\n"
2319
2147
"      --minsockets=n          minimum number of sockets per node\n"
2320
 
"      --mincores=n            minimum number of cores per cpu\n"
2321
2148
"      --minthreads=n          minimum number of threads per core\n"
2322
2149
"      --mem=MB                minimum amount of real memory\n"
 
2150
"      --reservation=name      allocate resources from named reservation\n"
2323
2151
"      --tmp=MB                minimum amount of temporary disk\n"
2324
 
"      --contiguous            demand a contiguous range of nodes\n"
2325
 
"  -C, --constraint=list       specify a list of constraints\n"
2326
2152
"  -w, --nodelist=hosts...     request a specific list of hosts\n"
2327
2153
"  -x, --exclude=hosts...      exclude a specific list of hosts\n"
2328
2154
"  -Z, --no-allocate           don't allocate nodes (must supply -w)\n"
2333
2159
"                              or don't share CPUs for job steps\n"
2334
2160
"      --mem-per-cpu=MB        maximum amount of real memory per allocated\n"
2335
2161
"                              CPU required by the job.\n" 
2336
 
"                              --mem >= --job-mem if --mem is specified.\n" 
 
2162
"                              --mem >= --mem-per-cpu if --mem is specified.\n" 
 
2163
"      --resv-ports            reserve communication ports\n" 
2337
2164
"\n"
2338
2165
"Affinity/Multi-core options: (when the task/affinity plugin is enabled)\n" 
2339
 
"  -B --extra-node-info=S[:C[:T]]            Expands to:\n"
2340
 
"      --sockets-per-node=S    number of sockets per node to allocate\n"
2341
 
"      --cores-per-socket=C    number of cores per socket to allocate\n"
2342
 
"      --threads-per-core=T    number of threads per core to allocate\n"
 
2166
"  -B  --extra-node-info=S[:C[:T]]            Expands to:\n"
 
2167
"       --sockets-per-node=S   number of sockets per node to allocate\n"
 
2168
"       --cores-per-socket=C   number of cores per socket to allocate\n"
 
2169
"       --threads-per-core=T   number of threads per core to allocate\n"
2343
2170
"                              each field can be 'min[-max]' or wildcard '*'\n"
2344
2171
"                              total cpus requested = (N x S x C x T)\n"
2345
 
"\n"
2346
2172
"      --ntasks-per-socket=n   number of tasks to invoke on each socket\n"
2347
2173
"      --ntasks-per-core=n     number of tasks to invoke on each core\n"
 
2174
"\n"
2348
2175
"\n");
2349
2176
        conf = slurm_conf_lock();
2350
2177
        if (conf->task_plugin != NULL
2351
2178
            && strcasecmp(conf->task_plugin, "task/affinity") == 0) {
2352
2179
                printf(
 
2180
"      --cpu_bind=             Bind tasks to CPUs\n"
 
2181
"                              (see \"--cpu_bind=help\" for options)\n"
2353
2182
"      --hint=                 Bind tasks according to application hints\n"
2354
2183
"                              (see \"--hint=help\" for options)\n"
2355
 
"      --cpu_bind=             Bind tasks to CPUs\n"
2356
 
"                              (see \"--cpu_bind=help\" for options)\n"
2357
2184
"      --mem_bind=             Bind memory to locality domains (ldom)\n"
2358
2185
"                              (see \"--mem_bind=help\" for options)\n"
2359
2186
                        );
2360
2187
        }
2361
2188
        slurm_conf_unlock();
2362
 
        printf("\n");
2363
2189
        spank_print_options (stdout, 6, 30);
2364
 
        printf("\n");
2365
2190
 
2366
 
        printf(
 
2191
        printf("\n"
2367
2192
#ifdef HAVE_AIX                         /* AIX/Federation specific options */
2368
 
                "AIX related options:\n"
2369
 
                "  --network=type              communication protocol to be used\n"
2370
 
                "\n"
 
2193
"AIX related options:\n"
 
2194
"  --network=type              communication protocol to be used\n"
 
2195
"\n"
2371
2196
#endif
2372
2197
 
2373
2198
#ifdef HAVE_BG                          /* Blue gene specific options */
2374
 
                "Blue Gene related options:\n"
2375
 
                "  -g, --geometry=XxYxZ        geometry constraints of the job\n"
2376
 
                "  -R, --no-rotate             disable geometry rotation\n"
2377
 
                "      --reboot                reboot block before starting job\n"
2378
 
                "      --conn-type=type        constraint on type of connection, MESH or TORUS\n"
2379
 
                "                              if not set, then tries to fit TORUS else MESH\n"
 
2199
"Blue Gene related options:\n"
 
2200
"  -g, --geometry=XxYxZ        geometry constraints of the job\n"
 
2201
"  -R, --no-rotate             disable geometry rotation\n"
 
2202
"      --reboot                reboot block before starting job\n"
 
2203
"      --conn-type=type        constraint on type of connection, MESH or TORUS\n"
 
2204
"                              if not set, then tries to fit TORUS else MESH\n"
2380
2205
#ifndef HAVE_BGL
2381
 
                "                              If wanting to run in HTC mode (only for 1\n"
2382
 
                "                              midplane and below).  You can use HTC_S for\n"
2383
 
                "                              SMP, HTC_D for Dual, HTC_V for\n"
2384
 
                "                              virtual node mode, and HTC_L for Linux mode.\n" 
2385
 
                "      --cnload-image=path     path to compute node image for bluegene block.  Default if not set\n"
2386
 
                "      --mloader-image=path    path to mloader image for bluegene block.  Default if not set\n"
2387
 
                "      --ioload-image=path     path to ioload image for bluegene block.  Default if not set\n"
 
2206
"                              If wanting to run in HTC mode (only for 1\n"
 
2207
"                              midplane and below).  You can use HTC_S for\n"
 
2208
"                              SMP, HTC_D for Dual, HTC_V for\n"
 
2209
"                              virtual node mode, and HTC_L for Linux mode.\n" 
 
2210
"      --cnload-image=path     path to compute node image for bluegene block.  Default if not set\n"
 
2211
"      --mloader-image=path    path to mloader image for bluegene block.  Default if not set\n"
 
2212
"      --ioload-image=path     path to ioload image for bluegene block.  Default if not set\n"
2388
2213
#else
2389
 
                "      --blrts-image=path      path to blrts image for bluegene block.  Default if not set\n"
2390
 
                "      --linux-image=path      path to linux image for bluegene block.  Default if not set\n"
2391
 
                "      --mloader-image=path    path to mloader image for bluegene block.  Default if not set\n"
2392
 
                "      --ramdisk-image=path    path to ramdisk image for bluegene block.  Default if not set\n"
2393
 
#endif
2394
 
#endif
2395
 
                "\n"
2396
 
                "Help options:\n"
2397
 
                "      --help                  show this help message\n"
2398
 
                "      --usage                 display brief usage message\n"
2399
 
                "      --print-request         Display job's layout without scheduling it\n"
2400
 
                "\n"
2401
 
                "Other options:\n"
2402
 
                "  -V, --version               output version information and exit\n"
2403
 
                "\n"
 
2214
"      --blrts-image=path      path to blrts image for bluegene block.  Default if not set\n"
 
2215
"      --linux-image=path      path to linux image for bluegene block.  Default if not set\n"
 
2216
"      --mloader-image=path    path to mloader image for bluegene block.  Default if not set\n"
 
2217
"      --ramdisk-image=path    path to ramdisk image for bluegene block.  Default if not set\n"
 
2218
#endif
 
2219
#endif
 
2220
"\n"
 
2221
"Help options:\n"
 
2222
"      --help                  show this help message\n"
 
2223
"      --usage                 display brief usage message\n"
 
2224
"      --print-request         Display job's layout without scheduling it\n"
 
2225
"\n"
 
2226
"Other options:\n"
 
2227
"  -V, --version               output version information and exit\n"
 
2228
"\n"
2404
2229
                );
2405
2230
 
2406
2231
}