~ubuntu-branches/ubuntu/vivid/slurm-llnl/vivid

« back to all changes in this revision

Viewing changes to src/slurmdbd/slurmdbd.c

  • Committer: Bazaar Package Importer
  • Author(s): Gennaro Oliva
  • Date: 2009-09-24 23:28:15 UTC
  • mfrom: (1.1.11 upstream) (3.2.4 sid)
  • Revision ID: james.westby@ubuntu.com-20090924232815-enh65jn32q1ebg07
Tags: 2.0.5-1
* New upstream release 
* Changed dependecy from lib-mysqlclient15 to lib-mysqlclient 
* Added Default-Start for runlevel 2 and 4 and $remote_fs requirement in
  init.d scripts (Closes: #541252)
* Postinst checks for wrong runlevels 2 and 4 links
* Upgraded to standard version 3.8.3
* Add lintian overrides for missing slurm-llnl-configurator.html in doc
  base registration
* modified postrm scripts to ignore pkill return value in order to avoid
  postrm failure when no slurm process is running
* Checking for slurmctld.pid before cancelling running and pending
  jobs during package removal 

Show diffs side-by-side

added added

removed removed

Lines of Context:
5
5
 *  Copyright (C) 2008 Lawrence Livermore National Security.
6
6
 *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
7
7
 *  Written by Morris Jette <jette@llnl.gov>
8
 
 *  LLNL-CODE-402394.
 
8
 *  CODE-OCEC-09-009. All rights reserved.
9
9
 *  
10
10
 *  This file is part of SLURM, a resource management program.
11
 
 *  For details, see <http://www.llnl.gov/linux/slurm/>.
 
11
 *  For details, see <https://computing.llnl.gov/linux/slurm/>.
 
12
 *  Please also read the included file: DISCLAIMER.
12
13
 *  
13
14
 *  SLURM is free software; you can redistribute it and/or modify it under
14
15
 *  the terms of the GNU General Public License as published by the Free
59
60
#include "src/common/xstring.h"
60
61
#include "src/slurmdbd/read_config.h"
61
62
#include "src/slurmdbd/rpc_mgr.h"
 
63
#include "src/slurmdbd/backup.h"
62
64
 
63
65
/* Global variables */
64
66
time_t shutdown_time = 0;               /* when shutdown request arrived */
85
87
static void  _init_pidfile(void);
86
88
static void  _kill_old_slurmdbd(void);
87
89
static void  _parse_commandline(int argc, char *argv[]);
88
 
static void _rollup_handler_cancel();
 
90
static void  _rollup_handler_cancel();
89
91
static void *_rollup_handler(void *no_data);
90
92
static void *_signal_handler(void *no_data);
91
93
static void  _update_logging(void);
106
108
        _parse_commandline(argc, argv);
107
109
        _update_logging();
108
110
 
109
 
        if (gethostname_short(node_name, sizeof(node_name)))
110
 
                fatal("getnodename: %m");
111
 
        if (slurmdbd_conf->dbd_host &&
112
 
            strcmp(slurmdbd_conf->dbd_host, node_name) &&
113
 
            strcmp(slurmdbd_conf->dbd_host, "localhost")) {
114
 
                fatal("This host not configured to run SlurmDBD (%s != %s)",
115
 
                      node_name, slurmdbd_conf->dbd_host);
116
 
        }
117
111
        if (slurm_auth_init(NULL) != SLURM_SUCCESS) {
118
112
                fatal("Unable to initialize %s authentication plugin",
119
113
                        slurmdbd_conf->auth_type);
147
141
        assoc_init_arg.cache_level = ASSOC_MGR_CACHE_USER;
148
142
        if(slurmdbd_conf->track_wckey)
149
143
                assoc_init_arg.cache_level |= ASSOC_MGR_CACHE_WCKEY;
150
 
 
 
144
        
151
145
        if(assoc_mgr_init(db_conn, &assoc_init_arg) == SLURM_ERROR) {
152
146
                error("Problem getting cache of data");
153
147
                acct_storage_g_close_connection(&db_conn);
154
148
                goto end_it;
155
149
        }
156
150
 
157
 
        if(!shutdown_time) {
158
 
                /* Create attached thread to process incoming RPCs */
159
 
                slurm_attr_init(&thread_attr);
160
 
                if (pthread_create(&rpc_handler_thread, &thread_attr, 
161
 
                                   rpc_mgr, NULL))
162
 
                        fatal("pthread_create error %m");
163
 
                slurm_attr_destroy(&thread_attr);
164
 
        }
165
 
 
166
 
        if(!shutdown_time) {
167
 
                /* Create attached thread to do usage rollup */
168
 
                slurm_attr_init(&thread_attr);
169
 
                if (pthread_create(&rollup_handler_thread, &thread_attr,
170
 
                                   _rollup_handler, db_conn))
171
 
                        fatal("pthread_create error %m");
172
 
                slurm_attr_destroy(&thread_attr);
173
 
        }
174
 
 
175
 
        /* Daemon is fully operational here */
176
 
        info("slurmdbd version %s started", SLURM_VERSION);
177
 
 
 
151
        if (gethostname_short(node_name, sizeof(node_name)))
 
152
                fatal("getnodename: %m");
 
153
 
 
154
        while(1) {
 
155
                if (slurmdbd_conf->dbd_backup &&
 
156
                    (!strcmp(node_name, slurmdbd_conf->dbd_backup) ||
 
157
                     !strcmp(slurmdbd_conf->dbd_backup, "localhost"))) {
 
158
                        info("slurmdbd running in background mode");
 
159
                        have_control = false;
 
160
                        backup = true;
 
161
                        run_backup();
 
162
                        if(!shutdown_time)
 
163
                                assoc_mgr_refresh_lists(db_conn, NULL);         
 
164
                } else if (slurmdbd_conf->dbd_host &&
 
165
                           (!strcmp(slurmdbd_conf->dbd_host, node_name) ||
 
166
                            !strcmp(slurmdbd_conf->dbd_host, "localhost"))) {
 
167
                        backup = false;
 
168
                        have_control = true;
 
169
                } else {
 
170
                        fatal("This host not configured to run SlurmDBD "
 
171
                              "(%s != %s | (backup) %s)",
 
172
                              node_name, slurmdbd_conf->dbd_host,
 
173
                              slurmdbd_conf->dbd_backup);
 
174
                }
 
175
                
 
176
                if(!shutdown_time) {
 
177
                        /* Create attached thread to process incoming RPCs */
 
178
                        slurm_attr_init(&thread_attr);
 
179
                        if (pthread_create(&rpc_handler_thread, &thread_attr, 
 
180
                                           rpc_mgr, NULL))
 
181
                                fatal("pthread_create error %m");
 
182
                        slurm_attr_destroy(&thread_attr);
 
183
                }
 
184
 
 
185
                if(!shutdown_time) {
 
186
                        /* Create attached thread to do usage rollup */
 
187
                        slurm_attr_init(&thread_attr);
 
188
                        if (pthread_create(&rollup_handler_thread,
 
189
                                           &thread_attr,
 
190
                                           _rollup_handler, db_conn))
 
191
                                fatal("pthread_create error %m");
 
192
                        slurm_attr_destroy(&thread_attr);
 
193
                }
 
194
 
 
195
                /* Daemon is fully operational here */
 
196
                if(!shutdown_time || primary_resumed) {
 
197
                        shutdown_time = 0;
 
198
                        info("slurmdbd version %s started", SLURM_VERSION);
 
199
                        if(backup)
 
200
                                run_backup();
 
201
                }
 
202
 
 
203
                /* this is only ran if not backup */
 
204
                if(rollup_handler_thread)
 
205
                        pthread_join(rollup_handler_thread, NULL);
 
206
                if(rpc_handler_thread)
 
207
                        pthread_join(rpc_handler_thread, NULL);
 
208
 
 
209
                if(backup && primary_resumed) { 
 
210
                        shutdown_time = 0;
 
211
                        info("Backup has given up control");
 
212
                }
 
213
 
 
214
                if(shutdown_time)
 
215
                        break;
 
216
        }
178
217
        /* Daemon termination handled here */
179
 
        if(rollup_handler_thread)
180
 
                pthread_join(rollup_handler_thread, NULL);
181
 
 
182
 
        if(rpc_handler_thread)
183
 
                pthread_join(rpc_handler_thread, NULL);
184
 
 
 
218
        
185
219
        if(signal_handler_thread)
186
220
                pthread_join(signal_handler_thread, NULL);
187
 
 
 
221
        
188
222
end_it:
189
223
        acct_storage_g_close_connection(&db_conn);
190
 
 
 
224
        
191
225
        if (slurmdbd_conf->pid_file &&
192
226
            (unlink(slurmdbd_conf->pid_file) < 0)) {
193
227
                verbose("Unable to remove pidfile '%s': %m",
194
228
                        slurmdbd_conf->pid_file);
195
229
        }
196
 
 
 
230
        
197
231
        assoc_mgr_fini(NULL);
198
232
        slurm_acct_storage_fini();
199
233
        slurm_auth_fini();
202
236
        exit(0);
203
237
}
204
238
 
 
239
extern void shutdown_threads()
 
240
{
 
241
        shutdown_time = time(NULL);
 
242
        rpc_mgr_wake();
 
243
        _rollup_handler_cancel();
 
244
}
 
245
 
205
246
/* Reset some of the processes resource limits to the hard limits */
206
247
static void  _init_config(void)
207
248
{
398
439
                slurm_mutex_lock(&rollup_lock);
399
440
                running_rollup = 1;
400
441
                debug2("running rollup at %s", ctime(&start_time));
401
 
                acct_storage_g_roll_usage(db_conn, 0);
 
442
                acct_storage_g_roll_usage(db_conn, 0, 0, 1);
402
443
                running_rollup = 0;
403
444
                slurm_mutex_unlock(&rollup_lock);       
404
445
 
457
498
                case SIGINT:    /* kill -2  or <CTRL-C> */
458
499
                case SIGTERM:   /* kill -15 */
459
500
                        info("Terminate signal (SIGINT or SIGTERM) received");
460
 
                        shutdown_time = time(NULL);
461
 
                        rpc_mgr_wake();
462
 
                        _rollup_handler_cancel();
463
 
 
 
501
                        shutdown_threads();
464
502
                        return NULL;    /* Normal termination */
465
503
                case SIGABRT:   /* abort */
466
504
                        info("SIGABRT received");
467
505
                        abort();        /* Should terminate here */
468
 
                        shutdown_time = time(NULL);
469
 
                        rpc_mgr_wake();
470
 
                        _rollup_handler_cancel();
 
506
                        shutdown_threads();
471
507
                        return NULL;
472
508
                default:
473
509
                        error("Invalid signal (%d) received", sig);