28
28
# jinja2 may not be importable until the install hook has installed the
29
29
# required packages.
30
30
def Template(*args, **kw):
31
"""jinja2.Template with deferred jinja2 import"""
31
32
from jinja2 import Template
32
33
return Template(*args, **kw)
35
36
def log(msg, lvl=INFO):
36
# Per Bug #1208787, log messages sent via juju-log are being lost.
37
# Spit messages out to a log file to work around the problem.
39
Per Bug #1208787, log messages sent via juju-log are being lost.
40
Spit messages out to a log file to work around the problem.
41
It is also rather nice to have the log messages we explicitly emit
42
in a separate log file, rather than just mashed up with all the
38
45
myname = hookenv.local_unit().replace('/', '-')
39
with open('/tmp/{}-debug.log'.format(myname), 'a') as f:
40
f.write('{}: {}\n'.format(lvl, msg))
46
ts = time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime())
47
with open('/var/log/juju/{}-debug.log'.format(myname), 'a') as f:
48
f.write('{} {}: {}\n'.format(ts, lvl, msg))
41
49
hookenv.log(msg, lvl)
221
232
def postgresql_stop():
222
host.service_stop('postgresql')
223
return not postgresql_is_running()
233
'''Shutdown PostgreSQL.'''
234
success = host.service_stop('postgresql')
235
return not (success and postgresql_is_running())
226
238
def postgresql_start():
227
host.service_start('postgresql')
228
return postgresql_is_running()
239
'''Start PostgreSQL if it is not already running.'''
240
success = host.service_start('postgresql')
241
return success and postgresql_is_running()
231
244
def postgresql_restart():
245
'''Restart PostgreSQL, or start it if it is not already running.'''
232
246
if postgresql_is_running():
233
# If the database is in backup mode, we don't want to restart
234
# PostgreSQL and abort the procedure. This may be another unit being
235
# cloned, or a filesystem level backup is being made. There is no
236
# timeout here, as backups can take hours or days. Instead, keep
237
# logging so admins know wtf is going on.
238
last_warning = time.time()
239
while postgresql_is_in_backup_mode():
240
if time.time() + 120 > last_warning:
241
log("In backup mode. PostgreSQL restart blocked.", WARNING)
243
"Run \"psql -U postgres -c 'SELECT pg_stop_backup()'\""
244
"to cancel backup mode and forcefully unblock this hook.")
245
last_warning = time.time()
248
return host.service_restart('postgresql')
247
with restart_lock(hookenv.local_unit(), True):
248
# 'service postgresql restart' fails; it only does a reload.
249
# success = host.service_restart('postgresql')
251
run('pg_ctlcluster -force {version} {cluster_name} '
252
'restart'.format(**config_data))
254
except subprocess.CalledProcessError as e:
250
return host.service_start('postgresql')
257
success = host.service_start('postgresql')
252
259
# Store a copy of our known live configuration so
253
260
# postgresql_reload_or_restart() can make good choices.
254
if 'saved_config' in local_state:
261
if success and 'saved_config' in local_state:
255
262
local_state['live_config'] = local_state['saved_config']
256
263
local_state.save()
258
return postgresql_is_running()
265
return success and postgresql_is_running()
261
268
def postgresql_reload():
269
'''Make PostgreSQL reload its configuration.'''
262
270
# reload returns a reliable exit status
263
271
status, output = commands.getstatusoutput("invoke-rc.d postgresql reload")
264
272
return (status == 0)
267
def postgresql_reload_or_restart():
268
"""Reload PostgreSQL configuration, restarting if necessary."""
269
# Pull in current values of settings that can only be changed on
275
def requires_restart():
276
'''Check for configuration changes requiring a restart to take effect.'''
271
277
if not postgresql_is_running():
272
return postgresql_restart()
274
# Suck in the config last written to postgresql.conf.
275
280
saved_config = local_state.get('saved_config', None)
276
281
if not saved_config:
277
282
# No record of postgresql.conf state, perhaps an upgrade.
278
283
# Better restart.
279
return postgresql_restart()
281
# Suck in our live config from last time we restarted.
282
286
live_config = local_state.setdefault('live_config', {})
284
288
# Pull in a list of PostgreSQL settings.
285
289
cur = db_cursor()
286
290
cur.execute("SELECT name, context FROM pg_settings")
287
requires_restart = False
288
292
for name, context in cur.fetchall():
289
293
live_value = live_config.get(name, None)
290
294
new_value = saved_config.get(name, None)
296
300
if context == 'postmaster':
297
301
# A setting has changed that requires PostgreSQL to be
298
302
# restarted before it will take effect.
299
requires_restart = True
302
# A change has been requested that requires a restart.
304
"Configuration change requires PostgreSQL restart. Restarting.",
307
def postgresql_reload_or_restart():
308
"""Reload PostgreSQL configuration, restarting if necessary."""
309
if requires_restart():
310
log("Configuration change requires PostgreSQL restart. Restarting.",
306
rc = postgresql_restart()
312
success = postgresql_restart()
313
if not success or requires_restart():
314
log("Configuration changes failed to apply", WARNING)
308
log("PostgreSQL reload, config changes taking effect.", DEBUG)
309
rc = postgresql_reload() # No pending need to bounce, just reload.
317
success = host.service_reload('postgresql')
311
if rc == 0 and 'saved_config' in local_state:
312
local_state['live_config'] = local_state['saved_config']
320
local_state['saved_config'] = local_state['live_config']
313
321
local_state.save()
318
326
def get_service_port(postgresql_config):
1303
1311
log("I am already the master", DEBUG)
1304
1312
return hookenv.local_unit()
1314
if local_state['state'] == 'hot standby':
1315
log("I am already following {}".format(
1316
local_state['following']), DEBUG)
1317
return local_state['following']
1319
replication_relid = hookenv.relation_ids('replication')[0]
1320
replication_units = hookenv.related_units(replication_relid)
1322
if local_state['state'] == 'standalone':
1323
log("I'm a standalone unit wanting to participate in replication")
1324
existing_replication = False
1325
for unit in replication_units:
1326
# If another peer thinks it is the master, believe it.
1327
remote_state = hookenv.relation_get(
1328
'state', unit, replication_relid)
1329
if remote_state == 'master':
1330
log("{} thinks it is the master, believing it".format(
1334
# If we find a peer that isn't standalone, we know
1335
# replication has already been setup at some point.
1336
if remote_state != 'standalone':
1337
existing_replication = True
1339
# If we are joining a peer relation where replication has
1340
# already been setup, but there is currently no master, wait
1341
# until one of the remaining participating units has been
1342
# promoted to master. Only they have the data we need to
1344
if existing_replication:
1345
log("Peers participating in replication need to elect a master",
1349
# There are no peers claiming to be master, and there is no
1350
# election in progress, so lowest numbered unit wins.
1351
units = replication_units + [hookenv.local_unit()]
1352
master = unit_sorted(units)[0]
1353
if master == hookenv.local_unit():
1354
log("I'm Master - lowest numbered unit in new peer group")
1357
log("Waiting on {} to declare itself Master".format(master), DEBUG)
1306
1360
if local_state['state'] == 'failover':
1307
1361
former_master = local_state['following']
1308
1362
log("Failover from {}".format(former_master))
1310
1364
units_not_in_failover = set()
1311
for relid in hookenv.relation_ids('replication'):
1312
for unit in hookenv.related_units(relid):
1313
if unit == former_master:
1314
log("Found dying master {}".format(unit), DEBUG)
1317
relation = hookenv.relation_get(unit=unit, rid=relid)
1319
if relation['state'] == 'master':
1321
"{} says it already won the election".format(unit),
1325
if relation['state'] != 'failover':
1326
units_not_in_failover.add(unit)
1366
for unit in replication_units:
1367
if unit == former_master:
1368
log("Found dying master {}".format(unit), DEBUG)
1371
relation = hookenv.relation_get(unit=unit, rid=replication_relid)
1373
if relation['state'] == 'master':
1374
log("{} says it already won the election".format(unit),
1378
if relation['state'] == 'failover':
1379
candidates.add(unit)
1381
elif relation['state'] != 'standalone':
1382
units_not_in_failover.add(unit)
1328
1384
if units_not_in_failover:
1329
1385
log("{} unaware of impending election. Deferring result.".format(
1333
1389
log("Election in progress")
1335
1391
winning_offset = -1
1336
for relid in hookenv.relation_ids('replication'):
1337
candidates = set(hookenv.related_units(relid))
1338
candidates.add(hookenv.local_unit())
1339
candidates.discard(former_master)
1340
# Sort the unit lists so we get consistent results in a tie
1341
# and lowest unit number wins.
1342
for unit in unit_sorted(candidates):
1343
relation = hookenv.relation_get(unit=unit, rid=relid)
1344
if int(relation['wal_received_offset']) > winning_offset:
1346
winning_offset = int(relation['wal_received_offset'])
1392
candidates.add(hookenv.local_unit())
1393
# Sort the unit lists so we get consistent results in a tie
1394
# and lowest unit number wins.
1395
for unit in unit_sorted(candidates):
1396
relation = hookenv.relation_get(unit=unit, rid=replication_relid)
1397
if int(relation['wal_received_offset']) > winning_offset:
1399
winning_offset = int(relation['wal_received_offset'])
1348
1401
# All remaining hot standbys are in failover mode and have
1349
1402
# reported their wal_received_offset. We can declare victory.
1350
log("{} won the election as is the new master".format(winner))
1353
# Maybe another peer thinks it is the master?
1354
for relid in hookenv.relation_ids('replication'):
1355
for unit in hookenv.related_units(relid):
1356
if hookenv.relation_get('state', unit, relid) == 'master':
1359
# New peer group. Lowest numbered unit will be the master.
1360
for relid in hookenv.relation_ids('replication'):
1361
units = hookenv.related_units(relid) + [hookenv.local_unit()]
1362
master = unit_sorted(units)[0]
1363
log("New peer group. {} is elected master".format(master))
1403
if winner == hookenv.local_unit():
1404
log("I won the election, announcing myself winner")
1407
log("Waiting for {} to announce its victory".format(winner),
1367
1412
@hooks.hook('replication-relation-joined', 'replication-relation-changed')
1592
1634
os.chdir(org_dir)
1638
def restart_lock(unit, exclusive):
1639
'''Aquire the database restart lock on the given unit.
1641
A database needing a restart should grab an exclusive lock before
1642
doing so. To block a remote database from doing a restart, grab a shared
1646
key = long(config_data['advisory_lock_restart_key'])
1648
lock_function = 'pg_advisory_lock'
1650
lock_function = 'pg_advisory_lock_shared'
1651
q = 'SELECT {}({})'.format(lock_function, key)
1653
# We will get an exception if the database is rebooted while waiting
1654
# for a shared lock. If the connection is killed, we retry a few
1658
for count in range(0, num_retries):
1660
if unit == hookenv.local_unit():
1661
cur = db_cursor(autocommit=True)
1663
host = hookenv.relation_get('private-address', unit)
1665
autocommit=True, db='postgres',
1666
user='juju_replication', host=host)
1669
except psycopg2.Error:
1670
if count == num_retries - 1:
1676
# Close our connection, swallowing any exceptions as the database
1677
# may be being rebooted now we have released our lock.
1680
except psycopg2.Error:
1595
1684
def clone_database(master_unit, master_host):
1685
with restart_lock(master_unit, False):
1597
1686
postgresql_stop()
1598
1687
log("Cloning master {}".format(master_unit))