~ubuntu-branches/ubuntu/vivid/slurm-llnl/vivid

« back to all changes in this revision

Viewing changes to src/common/slurm_protocol_api.c

  • Committer: Bazaar Package Importer
  • Author(s): Gennaro Oliva
  • Date: 2009-09-24 23:28:15 UTC
  • mfrom: (1.1.11 upstream) (3.2.4 sid)
  • Revision ID: james.westby@ubuntu.com-20090924232815-enh65jn32q1ebg07
Tags: 2.0.5-1
* New upstream release 
* Changed dependecy from lib-mysqlclient15 to lib-mysqlclient 
* Added Default-Start for runlevel 2 and 4 and $remote_fs requirement in
  init.d scripts (Closes: #541252)
* Postinst checks for wrong runlevels 2 and 4 links
* Upgraded to standard version 3.8.3
* Add lintian overrides for missing slurm-llnl-configurator.html in doc
  base registration
* modified postrm scripts to ignore pkill return value in order to avoid
  postrm failure when no slurm process is running
* Checking for slurmctld.pid before cancelling running and pending
  jobs during package removal 

Show diffs side-by-side

added added

removed removed

Lines of Context:
2
2
 *  slurm_protocol_api.c - high-level slurm communication functions
3
3
 *****************************************************************************
4
4
 *  Copyright (C) 2002-2007 The Regents of the University of California.
5
 
 *  Copyright (C) 2008 Lawrence Livermore National Security.
 
5
 *  Copyright (C) 2008-2009 Lawrence Livermore National Security.
6
6
 *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
7
7
 *  Written by Kevin Tew <tew1@llnl.gov>, et. al.
8
 
 *  LLNL-CODE-402394.
 
8
 *  CODE-OCEC-09-009. All rights reserved.
9
9
 *  
10
10
 *  This file is part of SLURM, a resource management program.
11
 
 *  For details, see <http://www.llnl.gov/linux/slurm/>.
 
11
 *  For details, see <https://computing.llnl.gov/linux/slurm/>.
 
12
 *  Please also read the included file: DISCLAIMER.
12
13
 *  
13
14
 *  SLURM is free software; you can redistribute it and/or modify it under
14
15
 *  the terms of the GNU General Public License as published by the Free
91
92
static void  _remap_slurmctld_errno(void);
92
93
static int   _unpack_msg_uid(Buf buffer);
93
94
 
 
95
#if _DEBUG
 
96
static void _print_data(char *data, int len);
 
97
#endif
 
98
 
94
99
/* define the slurmdbd_options flag */
95
100
slurm_dbd_conf_t *slurmdbd_conf = NULL;
96
101
 
181
186
        slurm_conf_destroy();
182
187
}
183
188
 
 
189
/* slurm_get_complete_wait
 
190
 * RET CompleteWait value from slurm.conf
 
191
 */
 
192
uint16_t slurm_get_complete_wait(void)
 
193
{
 
194
        uint16_t complete_wait = 0;
 
195
        slurm_ctl_conf_t *conf;
 
196
 
 
197
        if(slurmdbd_conf) {
 
198
        } else {
 
199
                conf = slurm_conf_lock();
 
200
                complete_wait = conf->complete_wait;
 
201
                slurm_conf_unlock();
 
202
        }
 
203
        return complete_wait;
 
204
}
 
205
 
184
206
/* update internal configuration data structure as needed.
185
207
 *      exit with lock set */
186
208
/* static inline void _lock_update_config() */
206
228
        return batch_start_timeout;
207
229
}
208
230
 
 
231
/* slurm_get_resume_timeout
 
232
 * RET ResumeTimeout value from slurm.conf
 
233
 */
 
234
uint16_t slurm_get_resume_timeout(void)
 
235
{
 
236
        uint16_t resume_timeout = 0;
 
237
        slurm_ctl_conf_t *conf;
 
238
 
 
239
        if(slurmdbd_conf) {
 
240
        } else {
 
241
                conf = slurm_conf_lock();
 
242
                resume_timeout = conf->resume_timeout;
 
243
                slurm_conf_unlock();
 
244
        }
 
245
        return resume_timeout;
 
246
}
 
247
 
209
248
/* slurm_get_def_mem_per_task
210
249
 * RET DefMemPerTask value from slurm.conf
211
250
 */
223
262
        return mem_per_task;
224
263
}
225
264
 
 
265
/* slurm_get_kill_on_bad_exit
 
266
 * RET KillOnBadExit value from slurm.conf
 
267
 */
 
268
uint16_t slurm_get_kill_on_bad_exit(void)
 
269
{
 
270
        uint16_t kill_on_bad_exit = 0;
 
271
        slurm_ctl_conf_t *conf;
 
272
 
 
273
        if(slurmdbd_conf) {
 
274
        } else {
 
275
                conf = slurm_conf_lock();
 
276
                kill_on_bad_exit = conf->kill_on_bad_exit;
 
277
                slurm_conf_unlock();
 
278
        }
 
279
        return kill_on_bad_exit;
 
280
}
 
281
 
 
282
/* slurm_get_debug_flags
 
283
 * RET DebugFlags value from slurm.conf
 
284
 */
 
285
uint32_t slurm_get_debug_flags(void)
 
286
{
 
287
        uint32_t debug_flags = 0;
 
288
        slurm_ctl_conf_t *conf;
 
289
 
 
290
        if(slurmdbd_conf) {
 
291
        } else {
 
292
                conf = slurm_conf_lock();
 
293
                debug_flags = conf->debug_flags;
 
294
                slurm_conf_unlock();
 
295
        }
 
296
        return debug_flags;
 
297
}
 
298
 
226
299
/* slurm_get_max_mem_per_task
227
300
 * RET MaxMemPerTask value from slurm.conf
228
301
 */
292
365
        return mpi_default;
293
366
}
294
367
 
 
368
/* slurm_get_mpi_params
 
369
 * get mpi parameters value from slurmctld_conf object
 
370
 * RET char *   - mpi default value from slurm.conf,  MUST be xfreed by caller
 
371
 */
 
372
char *slurm_get_mpi_params(void)
 
373
{
 
374
        char *mpi_params = NULL;
 
375
        slurm_ctl_conf_t *conf;
 
376
 
 
377
        if(slurmdbd_conf) {
 
378
        } else {
 
379
                conf = slurm_conf_lock();
 
380
                mpi_params = xstrdup(conf->mpi_params);
 
381
                slurm_conf_unlock();
 
382
        }
 
383
        return mpi_params;
 
384
}
 
385
 
295
386
/* slurm_get_msg_timeout
296
387
 * get default message timeout value from slurmctld_conf object
297
388
 */
332
423
        return plugin_dir;
333
424
}
334
425
 
 
426
/* slurm_get_priority_decay_hl
 
427
 * returns the priority decay half life in seconds from slurmctld_conf object
 
428
 * RET uint32_t - decay_hl in secs.
 
429
 */
 
430
uint32_t slurm_get_priority_decay_hl(void)
 
431
{
 
432
        uint32_t priority_hl = NO_VAL;
 
433
        slurm_ctl_conf_t *conf;
 
434
 
 
435
        if(slurmdbd_conf) {             
 
436
        } else {
 
437
                conf = slurm_conf_lock();
 
438
                priority_hl = conf->priority_decay_hl;
 
439
                slurm_conf_unlock();
 
440
        }
 
441
 
 
442
        return priority_hl;
 
443
}
 
444
 
 
445
/* slurm_get_priority_favor_small
 
446
 * returns weither or not we are favoring small jobs from slurmctld_conf object
 
447
 * RET bool - true if favor small, false else.
 
448
 */
 
449
bool slurm_get_priority_favor_small(void)
 
450
{
 
451
        bool factor = 0;
 
452
        slurm_ctl_conf_t *conf;
 
453
 
 
454
        if(slurmdbd_conf) {             
 
455
        } else {
 
456
                conf = slurm_conf_lock();
 
457
                factor = conf->priority_favor_small;
 
458
                slurm_conf_unlock();
 
459
        }
 
460
 
 
461
        return factor;
 
462
}
 
463
 
 
464
 
 
465
/* slurm_get_priority_max_age
 
466
 * returns the priority age max in seconds from slurmctld_conf object
 
467
 * RET uint32_t - age_max in secs.
 
468
 */
 
469
uint32_t slurm_get_priority_max_age(void)
 
470
{
 
471
        uint32_t age = NO_VAL;
 
472
        slurm_ctl_conf_t *conf;
 
473
 
 
474
        if(slurmdbd_conf) {             
 
475
        } else {
 
476
                conf = slurm_conf_lock();
 
477
                age = conf->priority_max_age;
 
478
                slurm_conf_unlock();
 
479
        }
 
480
 
 
481
        return age;
 
482
}
 
483
 
 
484
/* slurm_get_priority_reset_period
 
485
 * returns the priority usage reset period from slurmctld_conf object
 
486
 * RET uint16_t - flag, see PRIORITY_RESET_* in slurm/slurm.h.
 
487
 */
 
488
uint16_t slurm_get_priority_reset_period(void)
 
489
{
 
490
        uint16_t reset_period = (uint16_t) 0;
 
491
        slurm_ctl_conf_t *conf;
 
492
 
 
493
        if(slurmdbd_conf) {             
 
494
        } else {
 
495
                conf = slurm_conf_lock();
 
496
                reset_period = conf->priority_reset_period;
 
497
                slurm_conf_unlock();
 
498
        }
 
499
 
 
500
        return reset_period;
 
501
}
 
502
 
 
503
/* slurm_get_priority_type
 
504
 * returns the priority type from slurmctld_conf object
 
505
 * RET char *    - priority type, MUST be xfreed by caller
 
506
 */
 
507
char *slurm_get_priority_type(void)
 
508
{
 
509
        char *priority_type = NULL;
 
510
        slurm_ctl_conf_t *conf;
 
511
 
 
512
        if(slurmdbd_conf) {             
 
513
        } else {
 
514
                conf = slurm_conf_lock();
 
515
                priority_type = xstrdup(conf->priority_type);
 
516
                slurm_conf_unlock();
 
517
        }
 
518
 
 
519
        return priority_type;
 
520
}
 
521
 
 
522
/* slurm_get_priority_weight_age
 
523
 * returns the priority weight for age from slurmctld_conf object
 
524
 * RET uint32_t - factor weight.
 
525
 */
 
526
uint32_t slurm_get_priority_weight_age(void)
 
527
{
 
528
        uint32_t factor = NO_VAL;
 
529
        slurm_ctl_conf_t *conf;
 
530
 
 
531
        if(slurmdbd_conf) {             
 
532
        } else {
 
533
                conf = slurm_conf_lock();
 
534
                factor = conf->priority_weight_age;
 
535
                slurm_conf_unlock();
 
536
        }
 
537
 
 
538
        return factor;
 
539
}
 
540
 
 
541
 
 
542
/* slurm_get_priority_weight_fairshare
 
543
 * returns the priority weight for fairshare from slurmctld_conf object
 
544
 * RET uint32_t - factor weight.
 
545
 */
 
546
uint32_t slurm_get_priority_weight_fairshare(void)
 
547
{
 
548
        uint32_t factor = NO_VAL;
 
549
        slurm_ctl_conf_t *conf;
 
550
 
 
551
        if(slurmdbd_conf) {             
 
552
        } else {
 
553
                conf = slurm_conf_lock();
 
554
                factor = conf->priority_weight_fs;
 
555
                slurm_conf_unlock();
 
556
        }
 
557
 
 
558
        return factor;
 
559
}
 
560
 
 
561
 
 
562
/* slurm_get_priority_weight_job_size
 
563
 * returns the priority weight for job size from slurmctld_conf object
 
564
 * RET uint32_t - factor weight.
 
565
 */
 
566
uint32_t slurm_get_priority_weight_job_size(void)
 
567
{
 
568
        uint32_t factor = NO_VAL;
 
569
        slurm_ctl_conf_t *conf;
 
570
 
 
571
        if(slurmdbd_conf) {             
 
572
        } else {
 
573
                conf = slurm_conf_lock();
 
574
                factor = conf->priority_weight_js;
 
575
                slurm_conf_unlock();
 
576
        }
 
577
 
 
578
        return factor;
 
579
}
 
580
 
 
581
/* slurm_get_priority_weight_partition
 
582
 * returns the priority weight for partitions from slurmctld_conf object
 
583
 * RET uint32_t - factor weight.
 
584
 */
 
585
uint32_t slurm_get_priority_weight_partition(void)
 
586
{
 
587
        uint32_t factor = NO_VAL;
 
588
        slurm_ctl_conf_t *conf;
 
589
 
 
590
        if(slurmdbd_conf) {             
 
591
        } else {
 
592
                conf = slurm_conf_lock();
 
593
                factor = conf->priority_weight_part;
 
594
                slurm_conf_unlock();
 
595
        }
 
596
 
 
597
        return factor;
 
598
}
 
599
 
 
600
 
 
601
/* slurm_get_priority_weight_qos
 
602
 * returns the priority weight for QOS from slurmctld_conf object
 
603
 * RET uint32_t - factor weight.
 
604
 */
 
605
uint32_t slurm_get_priority_weight_qos(void)
 
606
{
 
607
        uint32_t factor = NO_VAL;
 
608
        slurm_ctl_conf_t *conf;
 
609
 
 
610
        if(slurmdbd_conf) {             
 
611
        } else {
 
612
                conf = slurm_conf_lock();
 
613
                factor = conf->priority_weight_qos;
 
614
                slurm_conf_unlock();
 
615
        }
 
616
 
 
617
        return factor;
 
618
}
 
619
 
 
620
 
335
621
/* slurm_get_private_data
336
622
 * get private data from slurmctld_conf object
337
623
 */
441
727
        return crypto_type;
442
728
}
443
729
 
 
730
/* slurm_get_topology_plugin
 
731
 * returns the value of topology_plugin in slurmctld_conf object
 
732
 * RET char *    - topology type, MUST be xfreed by caller
 
733
 */
 
734
extern char * slurm_get_topology_plugin(void)
 
735
{
 
736
        char *topology_plugin = NULL;
 
737
        slurm_ctl_conf_t *conf;
 
738
 
 
739
        if(slurmdbd_conf) {
 
740
        } else {
 
741
                conf = slurm_conf_lock();
 
742
                topology_plugin = xstrdup(conf->topology_plugin);
 
743
                slurm_conf_unlock();
 
744
        }
 
745
        return topology_plugin;
 
746
}
 
747
 
444
748
/* slurm_get_propagate_prio_process
445
749
 * return the PropagatePrioProcess flag from slurmctld_conf object
446
750
 */
608
912
        return storage_user;    
609
913
}
610
914
 
 
915
/* slurm_set_accounting_storage_user
 
916
 * IN: char *user (name of file or database)
 
917
 * RET 0 or error code
 
918
 */
 
919
int slurm_set_accounting_storage_user(char *user)
 
920
{
 
921
        slurm_ctl_conf_t *conf;
 
922
 
 
923
        if(slurmdbd_conf) {
 
924
                xfree(slurmdbd_conf->storage_user);
 
925
                slurmdbd_conf->storage_user = xstrdup(user);
 
926
        } else {
 
927
                conf = slurm_conf_lock();
 
928
                xfree(conf->accounting_storage_user);
 
929
                conf->accounting_storage_user = xstrdup(user);
 
930
                slurm_conf_unlock();
 
931
        }
 
932
        return 0;       
 
933
}
 
934
 
 
935
/* slurm_get_accounting_storage_backup_host
 
936
 * returns the storage backup host from slurmctld_conf object
 
937
 * RET char *    - storage backup host,  MUST be xfreed by caller
 
938
 */
 
939
char *slurm_get_accounting_storage_backup_host(void)
 
940
{
 
941
        char *storage_host;
 
942
        slurm_ctl_conf_t *conf;
 
943
 
 
944
        if(slurmdbd_conf) {
 
945
                storage_host = xstrdup(slurmdbd_conf->storage_backup_host);
 
946
        } else {
 
947
                conf = slurm_conf_lock();
 
948
                storage_host = xstrdup(conf->accounting_storage_backup_host);
 
949
                slurm_conf_unlock();
 
950
        }
 
951
        return storage_host;    
 
952
}
 
953
 
611
954
/* slurm_get_accounting_storage_host
612
955
 * returns the storage host from slurmctld_conf object
613
956
 * RET char *    - storage host,  MUST be xfreed by caller
627
970
        return storage_host;    
628
971
}
629
972
 
 
973
/* slurm_set_accounting_storage_host
 
974
 * IN: char *host (name of file or database)
 
975
 * RET 0 or error code
 
976
 */
 
977
int slurm_set_accounting_storage_host(char *host)
 
978
{
 
979
        slurm_ctl_conf_t *conf;
 
980
 
 
981
        if(slurmdbd_conf) {
 
982
                xfree(slurmdbd_conf->storage_host);
 
983
                slurmdbd_conf->storage_host = xstrdup(host);
 
984
        } else {
 
985
                conf = slurm_conf_lock();
 
986
                xfree(conf->accounting_storage_host);
 
987
                conf->accounting_storage_host = xstrdup(host);
 
988
                slurm_conf_unlock();
 
989
        }
 
990
        return 0;       
 
991
}
 
992
 
630
993
/* slurm_get_accounting_storage_loc
631
994
 * returns the storage location from slurmctld_conf object
632
995
 * RET char *    - storage location,  MUST be xfreed by caller
646
1009
        return storage_loc;     
647
1010
}
648
1011
 
 
1012
/* slurm_set_accounting_storage_loc
 
1013
 * IN: char *loc (name of file or database)
 
1014
 * RET 0 or error code
 
1015
 */
 
1016
int slurm_set_accounting_storage_loc(char *loc)
 
1017
{
 
1018
        slurm_ctl_conf_t *conf;
 
1019
 
 
1020
        if(slurmdbd_conf) {
 
1021
                xfree(slurmdbd_conf->storage_loc);
 
1022
                slurmdbd_conf->storage_loc = xstrdup(loc);
 
1023
        } else {
 
1024
                conf = slurm_conf_lock();
 
1025
                xfree(conf->accounting_storage_loc);
 
1026
                conf->accounting_storage_loc = xstrdup(loc);
 
1027
                slurm_conf_unlock();
 
1028
        }
 
1029
        return 0;       
 
1030
}
 
1031
 
649
1032
/* slurm_get_accounting_storage_enforce
650
1033
 * returns what level to enforce associations at
651
1034
 */
687
1070
 
688
1071
}
689
1072
 
690
 
/* slurm_set_accounting_storage_loc
691
 
 * IN: char *loc (name of file or database)
692
 
 * RET 0 or error code
693
 
 */
694
 
int slurm_set_accounting_storage_loc(char *loc)
695
 
{
696
 
        slurm_ctl_conf_t *conf;
697
 
 
698
 
        if(slurmdbd_conf) {
699
 
                xfree(slurmdbd_conf->storage_loc);
700
 
                slurmdbd_conf->storage_loc = xstrdup(loc);
701
 
        } else {
702
 
                conf = slurm_conf_lock();
703
 
                xfree(conf->accounting_storage_loc);
704
 
                conf->accounting_storage_loc = xstrdup(loc);
705
 
                slurm_conf_unlock();
706
 
        }
707
 
        return 0;       
708
 
}
709
 
 
710
1073
/* slurm_get_accounting_storage_pass
711
1074
 * returns the storage password from slurmctld_conf object
712
1075
 * RET char *    - storage password,  MUST be xfreed by caller
1013
1376
}
1014
1377
 
1015
1378
/* slurm_get_slurm_user_id
1016
 
 * returns slurmd uid from slurmctld_conf object
 
1379
 * returns slurm uid from slurmctld_conf object
1017
1380
 * RET uint32_t - slurm user id
1018
1381
 */
1019
1382
uint32_t slurm_get_slurm_user_id(void)
1031
1394
        return slurm_uid;
1032
1395
}
1033
1396
 
 
1397
/* slurm_get_slurmd_user_id
 
1398
 * returns slurmd uid from slurmctld_conf object
 
1399
 * RET uint32_t - slurmd user id
 
1400
 */
 
1401
uint32_t slurm_get_slurmd_user_id(void)
 
1402
{
 
1403
        uint32_t slurmd_uid = 0;
 
1404
        slurm_ctl_conf_t *conf;
 
1405
 
 
1406
        if(slurmdbd_conf) {
 
1407
        } else {
 
1408
                conf = slurm_conf_lock();
 
1409
                slurmd_uid = conf->slurmd_user_id;
 
1410
                slurm_conf_unlock();
 
1411
        }
 
1412
        return slurmd_uid;
 
1413
}
 
1414
 
1034
1415
/* slurm_get_root_filter
1035
1416
 * RET uint16_t  - Value of SchedulerRootFilter */
1036
1417
extern uint16_t slurm_get_root_filter(void)
1046
1427
        }
1047
1428
        return root_filter;
1048
1429
}
 
1430
 
 
1431
/* slurm_get_sched_params
 
1432
 * RET char * - Value of SchedulerParameters, MUST be xfreed by caller */
 
1433
extern char *slurm_get_sched_params(void)
 
1434
{
 
1435
        char *params = 0;
 
1436
        slurm_ctl_conf_t *conf;
 
1437
 
 
1438
        if(slurmdbd_conf) {
 
1439
        } else {
 
1440
                conf = slurm_conf_lock();
 
1441
                params = conf->sched_params;
 
1442
                slurm_conf_unlock();
 
1443
        }
 
1444
        return params;
 
1445
}
 
1446
 
1049
1447
/* slurm_get_sched_port
1050
1448
 * RET uint16_t  - Value of SchedulerPort */
1051
1449
extern uint16_t slurm_get_sched_port(void)
1098
1496
        return select_type;
1099
1497
}
1100
1498
 
 
1499
/* slurm_get_srun_io_timeout
 
1500
 * get default srun I/O task timeout value from slurmctld_conf object
 
1501
 */
 
1502
uint16_t slurm_get_srun_io_timeout(void)
 
1503
{
 
1504
        uint16_t srun_io_timeout = 0;
 
1505
        slurm_ctl_conf_t *conf;
 
1506
 
 
1507
        if(slurmdbd_conf) {
 
1508
        } else {
 
1509
                conf = slurm_conf_lock();
 
1510
                srun_io_timeout = conf->srun_io_timeout;
 
1511
                slurm_conf_unlock();
 
1512
        }
 
1513
        return srun_io_timeout;
 
1514
}
 
1515
 
1101
1516
/* slurm_get_switch_type
1102
1517
 * get switch type from slurmctld_conf object
1103
1518
 * RET char *   - switch type, MUST be xfreed by caller
1246
1661
 * general message management functions used by slurmctld, slurmd
1247
1662
\**********************************************************************/
1248
1663
 
1249
 
/* 
1250
 
 *  Initialize a slurm server at port "port"
 
1664
/* In the socket implementation it creates a socket, binds to it, and 
 
1665
 *      listens for connections.
1251
1666
 * 
1252
1667
 * IN  port     - port to bind the msg server to
1253
1668
 * RET slurm_fd - file descriptor of the connection created
1260
1675
        return _slurm_init_msg_engine(&addr);
1261
1676
}
1262
1677
 
 
1678
/* In the socket implementation it creates a socket, binds to it, and 
 
1679
 *      listens for connections.
 
1680
 *
 
1681
 * IN  addr_name - address to bind the msg server to (NULL means any)
 
1682
 * IN  port      - port to bind the msg server to
 
1683
 * RET slurm_fd  - file descriptor of the connection created
 
1684
 */
 
1685
slurm_fd slurm_init_msg_engine_addrname_port(char *addr_name, uint16_t port)
 
1686
{
 
1687
        slurm_addr addr;
 
1688
 
 
1689
#ifdef BIND_SPECIFIC_ADDR
 
1690
        if (addr_name != NULL)
 
1691
                slurm_set_addr(&addr, port, addr_name);
 
1692
        else
 
1693
                slurm_set_addr_any(&addr, port);
 
1694
#else
 
1695
        slurm_set_addr_any(&addr, port);
 
1696
#endif
 
1697
 
 
1698
        return _slurm_init_msg_engine(&addr);
 
1699
}
 
1700
 
1263
1701
/* 
1264
1702
 *  Same as above, but initialize using a slurm address "addr"
1265
1703
 *
1479
1917
        }
1480
1918
        
1481
1919
#if     _DEBUG
1482
 
        _print_data (buftemp, rc);
 
1920
        _print_data (buf, buflen);
1483
1921
#endif
1484
1922
        buffer = create_buf(buf, buflen);
1485
1923
 
1490
1928
        }
1491
1929
        
1492
1930
        if (check_header_version(&header) < 0) {
 
1931
                slurm_addr resp_addr;
 
1932
                char addr_str[32];
1493
1933
                int uid = _unpack_msg_uid(buffer);
1494
 
                error("Invalid Protocol Version %u from uid=%d", 
1495
 
                        header.version, uid);
 
1934
                slurm_get_peer_addr(fd, &resp_addr);
 
1935
                slurm_print_slurm_addr(&resp_addr, addr_str, sizeof(addr_str));
 
1936
                error("Invalid Protocol Version %u from uid=%d at %s", 
 
1937
                        header.version, uid, addr_str);
1496
1938
                free_buf(buffer);
1497
1939
                rc = SLURM_PROTOCOL_VERSION_ERROR;
1498
1940
                goto total_return;
1641
2083
        }
1642
2084
        
1643
2085
#if     _DEBUG
1644
 
        _print_data (buftemp, rc);
 
2086
        _print_data (buf, buflen);
1645
2087
#endif
1646
2088
        buffer = create_buf(buf, buflen);
1647
2089
 
1652
2094
        }
1653
2095
        
1654
2096
        if(check_header_version(&header) < 0) {
 
2097
                slurm_addr resp_addr;
 
2098
                char addr_str[32];
1655
2099
                int uid = _unpack_msg_uid(buffer);
1656
 
                error("Invalid Protocol Version %u from uid=%d",
1657
 
                        header.version, uid);
 
2100
                slurm_get_peer_addr(fd, &resp_addr);
 
2101
                slurm_print_slurm_addr(&resp_addr, addr_str, sizeof(addr_str));
 
2102
                error("Invalid Protocol Version %u from uid=%d at %s", 
 
2103
                        header.version, uid, addr_str);
1658
2104
                free_buf(buffer);
1659
2105
                rc = SLURM_PROTOCOL_VERSION_ERROR;
1660
2106
                goto total_return;
1661
2107
        }
1662
2108
        //info("ret_cnt = %d",header.ret_cnt);
1663
2109
        if(header.ret_cnt > 0) {
1664
 
                ret_list = list_create(destroy_data_info);
1665
 
                while((ret_data_info = list_pop(header.ret_list)))
1666
 
                        list_push(ret_list, ret_data_info);
 
2110
                if(header.ret_list)
 
2111
                        ret_list = header.ret_list;
 
2112
                else
 
2113
                        ret_list = list_create(destroy_data_info);
1667
2114
                header.ret_cnt = 0;
1668
 
                list_destroy(header.ret_list);
1669
2115
                header.ret_list = NULL;
1670
2116
        }
1671
2117
        
1824
2270
        }
1825
2271
        
1826
2272
#if     _DEBUG
1827
 
        _print_data (buftemp, rc);
 
2273
        _print_data (buf, buflen);
1828
2274
#endif
1829
2275
        buffer = create_buf(buf, buflen);
1830
2276
 
1835
2281
        }
1836
2282
        
1837
2283
        if (check_header_version(&header) < 0) {
 
2284
                slurm_addr resp_addr;
 
2285
                char addr_str[32];
1838
2286
                int uid = _unpack_msg_uid(buffer);
1839
 
                error("Invalid Protocol Version %u from uid=%d", 
1840
 
                        header.version, uid);
 
2287
                slurm_get_peer_addr(fd, &resp_addr);
 
2288
                slurm_print_slurm_addr(&resp_addr, addr_str, sizeof(addr_str));
 
2289
                error("Invalid Protocol Version %u from uid=%d at %s", 
 
2290
                        header.version, uid, addr_str);
1841
2291
                free_buf(buffer);
1842
2292
                rc = SLURM_PROTOCOL_VERSION_ERROR;
1843
2293
                goto total_return;
1872
2322
        if(header.forward.cnt > 0) {
1873
2323
                debug("forwarding to %u", header.forward.cnt);
1874
2324
                msg->forward_struct = xmalloc(sizeof(forward_struct_t));
 
2325
                slurm_mutex_init(&msg->forward_struct->forward_mutex);
 
2326
                pthread_cond_init(&msg->forward_struct->notify, NULL);
 
2327
 
 
2328
                msg->forward_struct->forward_msg = 
 
2329
                        xmalloc(sizeof(forward_msg_t) * header.forward.cnt);
 
2330
                
1875
2331
                msg->forward_struct->buf_len = remaining_buf(buffer);
1876
2332
                msg->forward_struct->buf = 
1877
2333
                        xmalloc(sizeof(char) * msg->forward_struct->buf_len);
2011
2467
                msg->ret_list = NULL;
2012
2468
        }
2013
2469
        forward_wait(msg);
2014
 
        
 
2470
 
2015
2471
        init_header(&header, msg, msg->flags);
2016
2472
        
2017
2473
        /*
2698
3154
                          int timeout, bool quiet)
2699
3155
{
2700
3156
        List ret_list = NULL;
2701
 
        List tmp_ret_list = NULL;
2702
 
        slurm_fd fd = -1;
2703
 
        char *name = NULL;
2704
 
        char buf[8192];
 
3157
//      List tmp_ret_list = NULL;
 
3158
//      slurm_fd fd = -1;
 
3159
//      char buf[8192];
2705
3160
        hostlist_t hl = NULL;
2706
 
        ret_data_info_t *ret_data_info = NULL;
2707
 
        ListIterator itr;
 
3161
//      ret_data_info_t *ret_data_info = NULL;
 
3162
//      ListIterator itr;
2708
3163
 
2709
3164
        if(!nodelist || !strlen(nodelist)) {
2710
3165
                error("slurm_send_recv_msgs: no nodelist given");
2711
3166
                return NULL;
2712
3167
        }
 
3168
        
2713
3169
#ifdef HAVE_FRONT_END
 
3170
{
 
3171
        char *name = NULL;
2714
3172
        /* only send to the front end node */
2715
3173
        name = nodelist_nth_host(nodelist, 0);
2716
3174
        if (!name) {
2721
3179
        }
2722
3180
        hl = hostlist_create(name);
2723
3181
        free(name);
 
3182
}
2724
3183
#else
2725
3184
/*      info("total sending to %s",nodelist); */
2726
3185
        hl = hostlist_create(nodelist);
2727
3186
#endif
2728
 
        while((name = hostlist_shift(hl))) {
2729
 
                
2730
 
                if(slurm_conf_get_addr(name, &msg->address) == SLURM_ERROR) {
2731
 
                        if (quiet) {
2732
 
                                debug("slurm_send_recv_msgs: can't find "
2733
 
                                      "address for host %s, check slurm.conf", 
2734
 
                                      name);
2735
 
                        } else {
2736
 
                                error("slurm_send_recv_msgs: can't find "
2737
 
                                      "address for host %s, check slurm.conf", 
2738
 
                                      name);
2739
 
                        }
2740
 
                        mark_as_failed_forward(&tmp_ret_list, name, 
2741
 
                                        SLURM_COMMUNICATIONS_CONNECTION_ERROR);
2742
 
                        free(name);
2743
 
                        continue;
2744
 
                }
2745
 
                
2746
 
                if ((fd = slurm_open_msg_conn(&msg->address)) < 0) {
2747
 
                        if (quiet)
2748
 
                                debug("slurm_send_recv_msgs to %s: %m", name);
2749
 
                        else
2750
 
                                error("slurm_send_recv_msgs to %s: %m", name);
2751
 
                        mark_as_failed_forward(&tmp_ret_list, name, 
2752
 
                                        SLURM_COMMUNICATIONS_CONNECTION_ERROR);
2753
 
                        free(name);
2754
 
                        continue;
2755
 
                }
2756
3187
 
2757
 
                hostlist_ranged_string(hl, sizeof(buf), buf);
2758
 
                forward_init(&msg->forward, NULL);
2759
 
                msg->forward.nodelist = xstrdup(buf);
2760
 
                msg->forward.timeout = timeout;
2761
 
                msg->forward.cnt = hostlist_count(hl);
2762
 
                if (msg->forward.nodelist[0]) {
2763
 
                        debug3("sending to %s along with to %s", 
2764
 
                               name, msg->forward.nodelist);
2765
 
                } else
2766
 
                        debug3("sending to %s", name);
2767
 
                
2768
 
                if(!(ret_list = _send_and_recv_msgs(fd, msg, timeout))) {
2769
 
                        xfree(msg->forward.nodelist);
2770
 
                        if (quiet) {
2771
 
                                debug("slurm_send_recv_msgs"
2772
 
                                      "(_send_and_recv_msgs) to %s: %m", 
2773
 
                                      name);
2774
 
                        } else {
2775
 
                                error("slurm_send_recv_msgs"
2776
 
                                      "(_send_and_recv_msgs) to %s: %m", 
2777
 
                                      name);
2778
 
                        }
2779
 
                        mark_as_failed_forward(&tmp_ret_list, name, errno);
2780
 
                        free(name);
2781
 
                        continue;
2782
 
                } else {
2783
 
                        itr = list_iterator_create(ret_list);
2784
 
                        while((ret_data_info = list_next(itr))) 
2785
 
                                if(!ret_data_info->node_name) {
2786
 
                                        ret_data_info->node_name =
2787
 
                                                xstrdup(name);
2788
 
                                }
2789
 
                        list_iterator_destroy(itr);
2790
 
                }
2791
 
                xfree(msg->forward.nodelist);
2792
 
                free(name);
2793
 
                break;          
 
3188
        if(!hl) {
 
3189
                error("slurm_send_recv_msgs: problem creating hostlist");
 
3190
                return NULL;
2794
3191
        }
 
3192
 
 
3193
        ret_list = start_msg_tree(hl, msg, timeout);
2795
3194
        hostlist_destroy(hl);
2796
3195
 
2797
 
        if(tmp_ret_list) {
2798
 
                if(!ret_list)
2799
 
                        ret_list = tmp_ret_list;
2800
 
                else {
2801
 
                        while((ret_data_info = list_pop(tmp_ret_list))) 
2802
 
                                list_push(ret_list, ret_data_info);
2803
 
                        list_destroy(tmp_ret_list);
2804
 
                }
2805
 
        } 
2806
3196
        return ret_list;
 
3197
 
 
3198
        /* The below code will start from the first node in the list
 
3199
         * to start the tree.  The start_msg_tree function starts the
 
3200
         * tree from the calling node. */
 
3201
 
 
3202
/*      while((name = hostlist_shift(hl))) { */
 
3203
                
 
3204
/*              if(slurm_conf_get_addr(name, &msg->address) == SLURM_ERROR) { */
 
3205
/*                      if (quiet) { */
 
3206
/*                              debug("slurm_send_recv_msgs: can't find " */
 
3207
/*                                    "address for host %s, check slurm.conf",  */
 
3208
/*                                    name); */
 
3209
/*                      } else { */
 
3210
/*                              error("slurm_send_recv_msgs: can't find " */
 
3211
/*                                    "address for host %s, check slurm.conf",  */
 
3212
/*                                    name); */
 
3213
/*                      } */
 
3214
/*                      mark_as_failed_forward(&tmp_ret_list, name,  */
 
3215
/*                                      SLURM_COMMUNICATIONS_CONNECTION_ERROR); */
 
3216
/*                      free(name); */
 
3217
/*                      continue; */
 
3218
/*              } */
 
3219
                
 
3220
/*              if ((fd = slurm_open_msg_conn(&msg->address)) < 0) { */
 
3221
/*                      if (quiet) */
 
3222
/*                              debug("slurm_send_recv_msgs to %s: %m", name); */
 
3223
/*                      else */
 
3224
/*                              error("slurm_send_recv_msgs to %s: %m", name); */
 
3225
/*                      mark_as_failed_forward(&tmp_ret_list, name,  */
 
3226
/*                                      SLURM_COMMUNICATIONS_CONNECTION_ERROR); */
 
3227
/*                      free(name); */
 
3228
/*                      continue; */
 
3229
/*              } */
 
3230
 
 
3231
/*              hostlist_ranged_string(hl, sizeof(buf), buf); */
 
3232
/*              forward_init(&msg->forward, NULL); */
 
3233
/*              msg->forward.nodelist = xstrdup(buf); */
 
3234
/*              msg->forward.timeout = timeout; */
 
3235
/*              msg->forward.cnt = hostlist_count(hl); */
 
3236
/*              if (msg->forward.nodelist[0]) { */
 
3237
/*                      debug3("sending to %s along with %s",  */
 
3238
/*                             name, msg->forward.nodelist); */
 
3239
/*              } else */
 
3240
/*                      debug3("sending to %s", name); */
 
3241
                
 
3242
/*              if(!(ret_list = _send_and_recv_msgs(fd, msg, timeout))) { */
 
3243
/*                      xfree(msg->forward.nodelist); */
 
3244
/*                      if (quiet) { */
 
3245
/*                              debug("slurm_send_recv_msgs" */
 
3246
/*                                    "(_send_and_recv_msgs) to %s: %m",  */
 
3247
/*                                    name); */
 
3248
/*                      } else { */
 
3249
/*                              error("slurm_send_recv_msgs" */
 
3250
/*                                    "(_send_and_recv_msgs) to %s: %m",  */
 
3251
/*                                    name); */
 
3252
/*                      } */
 
3253
/*                      mark_as_failed_forward(&tmp_ret_list, name, errno); */
 
3254
/*                      free(name); */
 
3255
/*                      continue; */
 
3256
/*              } else { */
 
3257
/*                      itr = list_iterator_create(ret_list); */
 
3258
/*                      while((ret_data_info = list_next(itr)))  */
 
3259
/*                              if(!ret_data_info->node_name) { */
 
3260
/*                                      ret_data_info->node_name = */
 
3261
/*                                              xstrdup(name); */
 
3262
/*                              } */
 
3263
/*                      list_iterator_destroy(itr); */
 
3264
/*              } */
 
3265
/*              xfree(msg->forward.nodelist); */
 
3266
/*              free(name); */
 
3267
/*              break;           */
 
3268
/*      } */
 
3269
/*      hostlist_destroy(hl); */
 
3270
 
 
3271
/*      if(tmp_ret_list) { */
 
3272
/*              if(!ret_list) */
 
3273
/*                      ret_list = tmp_ret_list; */
 
3274
/*              else { */
 
3275
/*                      list_transfer(ret_list, tmp_ret_list); */
 
3276
/*                      list_destroy(tmp_ret_list); */
 
3277
/*              } */
 
3278
/*      }  */
 
3279
/*      return ret_list; */
2807
3280
}
2808
3281
 
2809
3282
/*
2818
3291
List slurm_send_addr_recv_msgs(slurm_msg_t *msg, char *name, int timeout)
2819
3292
{
2820
3293
        List ret_list = NULL;
2821
 
        List tmp_ret_list = NULL;
2822
3294
        slurm_fd fd = -1;
2823
3295
        ret_data_info_t *ret_data_info = NULL;
2824
3296
        ListIterator itr;
2826
3298
        if ((fd = slurm_open_msg_conn(&msg->address)) < 0) {
2827
3299
                mark_as_failed_forward(&ret_list, name, 
2828
3300
                                       SLURM_COMMUNICATIONS_CONNECTION_ERROR);
 
3301
                errno = SLURM_COMMUNICATIONS_CONNECTION_ERROR;
2829
3302
                return ret_list;
2830
3303
        }
2831
3304
 
2832
 
        /*just to make sure */
2833
 
        forward_init(&msg->forward, NULL);
2834
3305
        msg->ret_list = NULL;
2835
3306
        msg->forward_struct = NULL;
2836
3307
        if(!(ret_list = _send_and_recv_msgs(fd, msg, timeout))) {
2837
 
                mark_as_failed_forward(&tmp_ret_list, name, errno);
 
3308
                mark_as_failed_forward(&ret_list, name, errno);
 
3309
                errno = SLURM_COMMUNICATIONS_CONNECTION_ERROR;
2838
3310
                return ret_list;
2839
3311
        } else {
2840
3312
                itr = list_iterator_create(ret_list);
2994
3466
        char *unit = "\0KMGP?";
2995
3467
        int i = (int)num % 512;
2996
3468
 
2997
 
        if((i > 0 && num < 1024) || (int)num == 0) {
 
3469
        if((int)num == 0) {
 
3470
                snprintf(buf, buf_size, "%d", (int)num);
 
3471
                return;
 
3472
        } else if((i > 0 && num < 1024)) {
2998
3473
                snprintf(buf, buf_size, "%d%c", (int)num, unit[orig_type]);
2999
3474
                return;
3000
3475
        }