~stub/charms/precise/postgresql/bug-1205286

« back to all changes in this revision

Viewing changes to hooks/hooks.py

Committer: Stuart Bishop
Date: 2013-06-25 11:29:13 UTC
mto: (50.1.4 postgresql) (46.7.3 use-charm-helpers)
mto: This revision was merged to the branch mainline in revision 63.
Revision ID: stuart.bishop@canonical.com-20130625112913-xgawha9tu3fown6x

Failover test passing

files modified:
hooks/hooks.py

metadata.yaml

test.py

Show diffs side-by-side

added added

removed removed

hooks/hooks.py

client_state = {}

add(client_state, 'state')

for relid in relation_ids(relation_types=['db', 'db-admin']):

relation_set(client_state, relid)

for relid in hookenv.relation_ids('db'):

hookenv.relation_set(relid, client_state)

for relid in hookenv.relation_ids('db-admin'):

hookenv.relation_set(relid, client_state)

replication_state = dict(client_state)

if authorized:

replication_state['authorized'] = ' '.join(sorted(authorized))

for relid in relation_ids(relation_types=replication_relation_types):

relation_set(replication_state, relid)

for relid in hookenv.relation_ids('replication'):

hookenv.relation_set(relid, replication_state)

100

101

self.save()

102

1575

1578

'''Connect the database as a streaming replica of the master.'''

1576

1579

master_relation = hookenv.relation_get(unit=master)

1577

1580

1578

recovery_conf = dedent("""\

1579

standby_mode = on

1580

primary_conninfo = 'host={} user=juju_replication'

1581

""".format(master_relation['private-address']))

1582

log(recovery_conf, DEBUG)

1581

recovery_conf = Template(

1582

open("templates/recovery.conf.tmpl").read()).render({

1583

'host': master_relation['private-address'],

1584

'password': local_state['replication_password']})

1585

juju_log(MSG_DEBUG, recovery_conf)

1583

1586

install_file(

1584

1587

recovery_conf,

1585

1588

os.path.join(postgresql_cluster_dir, 'recovery.conf'),

1586

1589

owner="postgres", group="postgres")

1590

1587

1591

postgresql_restart()

1588

1592

1589

1593

1660

1664

# Now that pg_hba.conf has been regenerated and loaded, inform related

1661

1665

# units that they have been granted replication access.

1662

1666

authorized_units = set()

1663

for relid in relation_ids(relation_types=replication_relation_types):

1664

for unit in relation_list(relid):

1665

authorized_units.add(unit)

1667

for unit in hookenv.related_units():

1668

authorized_units.add(unit)

1666

1669

local_state['authorized'] = authorized_units

1667

1670

1668

1671

master = elected_master()

1681

1684

promote_database()

1682

1685

if 'following' in local_state:

1683

1686

del local_state['following']

1687

if 'wal_received_offset' in local_state:

1688

del local_state['wal_received_offset']

1689

if 'paused_at_failover' in local_state:

1690

del local_state['paused_at_failover']

1684

1691

local_state['state'] = 'master'

1685

1692

1686

1693

# Publish credentials to hot standbys so they can connect.

1811

1818

1812

1819

assert remote_unit is not None

1813

1820

1814

log("{} {} has left the peer group".format(remote_state, remote_unit))

1815

1816

# If the unit being removed was our master, we need to failover.

1817

if local_state.get('following', None) == remote_unit:

1818

1819

# Prepare for failover. We need to suspend replication to ensure

1820

# that the replay point remains consistent throughout the

1821

# election, and publish that replay point. By comparing these

1822

# replay points, the most up to date hot standby can be

1823

# identified and promoted to the new master.

1821

log("{} has left the peer group".format(remote_unit))

1822

1823

# If we are the last unit standing, we become standalone

1824

remaining_peers = set(hookenv.related_units(hookenv.relation_id()))

1825

remaining_peers.discard(remote_unit) # Bug #1192433

1826

1827

# True if we were following the departed unit.

1828

following_departed = (local_state.get('following', None) == remote_unit)

1829

1830

if remaining_peers and not following_departed:

1831

log("Remaining {}".format(local_state['state']))

1832

1833

elif remaining_peers and following_departed:

1834

# If the unit being removed was our master, prepare for failover.

1835

# We need to suspend replication to ensure that the replay point

1836

# remains consistent throughout the election, and publish that

1837

# replay point. Once all units have entered this steady state,

1838

# we can identify the most up to date hot standby and promote it

1839

# to be the new master.

1840

log("Entering failover state")

1824

1841

cur = db_cursor(autocommit=True)

1825

cur.execute(

1826

"SELECT pg_is_xlog_replay_paused()")

1842

cur.execute("SELECT pg_is_xlog_replay_paused()")

1827

1843

already_paused = cur.fetchone()[0]

1828

1844

local_state["paused_at_failover"] = already_paused

1829

1845

if not already_paused:

1830

1846

cur.execute("SELECT pg_xlog_replay_pause()")

1847

# Switch to failover state. Don't cleanup the 'following'

1848

# setting because having access to the former master is still

1849

# useful.

1831

1850

local_state['state'] = 'failover'

1832

1851

local_state['wal_received_offset'] = postgresql_wal_received_offset()

1833

1852

1834

# Now do nothing. We can't elect a new master until all the

1835

# remaining peers are in a steady state and have published their

1836

# wal_received_offset. Only then can we select a node to be

1837

# master.

1838

pass

1853

else:

1854

log("Last unit standing. Switching from {} to standalone.".format(

1855

local_state['state']))

1856

promote_database()

1857

local_state['state'] = 'standalone'

1858

if 'following' in local_state:

1859

del local_state['following']

1860

if 'wal_received_offset' in local_state:

1861

del local_state['wal_received_offset']

1862

if 'paused_at_failover' in local_state:

1863

del local_state['paused_at_failover']

1839

1864

1840

1865

config_changed(postgresql_config)

1841

1866

local_state.publish()

1842

1867

1843

1868

1844

1869

def replication_relation_broken():

1870

# This unit has been removed from the service.

1845

1871

promote_database()

1846

local_state['state'] = 'standalone'

1847

local_state.save()

1848

1872

if os.path.exists(charm_pgpass):

1849

1873

os.unlink(charm_pgpass)

1850

1874

config_changed(postgresql_config)

2123

2147

elif hook_name == "nrpe-external-master-relation-changed":

2124

2148

update_nrpe_checks()

2125

2149

2126

elif hook_name.startswith('master') or hook_name.startswith('slave'):

2127

raise NotImplementedError(hook_name)

2128

2129

2150

elif hook_name == 'replication-relation-joined':

2130

2151

replication_relation_joined_changed()

2131

2152

Older »