~stub/charms/precise/postgresql/bug-1205286

« back to all changes in this revision

Viewing changes to test.py

Committer: Stuart Bishop
Date: 2013-06-25 11:29:13 UTC
mto: (50.1.4 postgresql) (46.7.3 use-charm-helpers)
mto: This revision was merged to the branch mainline in revision 63.
Revision ID: stuart.bishop@canonical.com-20130625112913-xgawha9tu3fown6x

Failover test passing

files modified:
hooks/hooks.py

metadata.yaml

test.py

Show diffs side-by-side

added added

removed removed

test.py

242

self.addDetail('sql', text_content(repr((sql, result))))

243

return result

244

245

def pg_ctlcluster(self, unit, command):

246

cmd = ['juju', 'ssh', unit,

247

# Due to Bug #1191079, we need to send the whole remote command

248

# as a single argument.

249

'sudo pg_ctlcluster 9.1 main -force {}'.format(command)]

250

_run(self, cmd)

251

245

252

def test_basic(self):

246

253

'''Set up a single unit service'''

247

254

self.juju.deploy(TEST_CHARM, 'postgresql')

258

265

self.assertEqual(result, [['t']])

259

266

260

267

def is_master(self, postgres_unit, dbname=None):

261

is_master = self.sql(

262

'SELECT NOT pg_is_in_recovery()', postgres_unit,

263

dbname=dbname)[0][0]

264

return (is_master == 't')

268

is_master = self.sql(

269

'SELECT NOT pg_is_in_recovery()',

270

postgres_unit, dbname=dbname)[0][0]

271

return (is_master == 't')

265

272

266

273

def test_failover(self):

267

274

"""Set up a multi-unit service and perform failovers."""

268

self.juju.deploy(TEST_CHARM, 'postgresql', num_units=4)

275

self.juju.deploy(TEST_CHARM, 'postgresql', num_units=3)

269

276

self.juju.deploy(PSQL_CHARM, 'psql')

270

self.juju.do(['add-relation', 'postgresql:db-admin', 'psql:db-admin'])

277

self.juju.do(['add-relation', 'postgresql:db', 'psql:db'])

271

278

self.juju.wait_until_ready()

272

279

280

# On a freshly setup service, lowest numbered unit is always the

281

# master.

273

282

units = unit_sorted(

274

283

self.juju.status['services']['postgresql']['units'].keys())

275

master_unit, standby_unit_1, standby_unit_2, standby_unit_3 = units

276

277

# Confirm units agree on their roles. On a freshly setup

278

# service, lowest numbered unit is always the master.

279

self.assertIs(True, self.is_master(master_unit, 'postgres'))

280

self.assertIs(False, self.is_master(standby_unit_1, 'postgres'))

281

self.assertIs(False, self.is_master(standby_unit_2, 'postgres'))

282

self.assertIs(False, self.is_master(standby_unit_3, 'postgres'))

283

284

self.sql('CREATE TABLE Token (x int)', master_unit, dbname='postgres')

285

284

master_unit, standby_unit_1, standby_unit_2 = units

285

286

self.assertIs(True, self.is_master(master_unit))

287

self.assertIs(False, self.is_master(standby_unit_1))

288

self.assertIs(False, self.is_master(standby_unit_2))

289

290

self.sql('CREATE TABLE Token (x int)', master_unit)

291

292

# Some simple helper to send data via the master and check if it

293

# was replicated to the hot standbys.

286

294

_counter = [0]

287

295

288

296

def send_token(unit):

289

297

_counter[0] += 1

290

self.sql(

291

"INSERT INTO Token VALUES (%d)" % _counter[0],

292

unit, dbname='postgres')

298

self.sql("INSERT INTO Token VALUES (%d)" % _counter[0], unit)

293

299

294

300

def token_received(unit):

295

301

r = self.sql(

296

"SELECT TRUE FROM Token WHERE x=%d" % _counter[0],

297

unit, dbname='postgres')

302

"SELECT TRUE FROM Token WHERE x=%d" % _counter[0], unit)

298

303

return (r == [['t']])

299

304

300

305

# Confirm that replication is actually happening.

301

306

send_token(master_unit)

302

307

self.assertIs(True, token_received(standby_unit_1))

303

308

self.assertIs(True, token_received(standby_unit_2))

304

self.assertIs(True, token_received(standby_unit_3))

305

306

# When we failover, the unit most in sync with the old master is

307

# elected the new master. Disable replication on standby_unit_1

308

# and standby_unit_3 to ensure that standby_unit_2 is the best

309

# candidate for master.

310

self.sql(

311

'SELECT pg_xlog_replay_pause()', standby_unit_1, dbname='postgres')

312

self.sql(

313

'SELECT pg_xlog_replay_pause()', standby_unit_3, dbname='postgres')

314

315

send_token(master_unit)

316

self.assertIs(False, token_received(standby_unit_1))

317

self.assertIs(True, token_received(standby_unit_2))

318

self.assertIs(False, token_received(standby_unit_3))

319

309

320

310

# Remove the master unit.

321

311

self.juju.do(['remove-unit', master_unit])

322

312

self.juju.wait_until_ready()

323

313

324

# Confirm the failover worked as expected.

325

self.assertIs(False, self.is_master(standby_unit_1, 'postgres'))

326

self.assertIs(True, self.is_master(standby_unit_2, 'postgres'))

327

self.assertIs(False, self.is_master(standby_unit_3, 'postgres'))

328

329

master_unit = standby_unit_2

330

331

# Replication was not reenabled by the failover.

332

send_token(master_unit)

333

self.assertIs(False, token_received(standby_unit_1))

334

self.assertIs(False, token_received(standby_unit_3))

335

self.sql(

336

'select pg_xlog_replay_resume()',

337

standby_unit_1, dbname='postgres')

338

self.sql(

339

'select pg_xlog_replay_resume()',

340

standby_unit_3, dbname='postgres')

341

342

# Now replication is happening again

343

self.assertIs(True, token_received(standby_unit_1))

344

self.assertIs(True, token_received(standby_unit_3))

345

346

# Remove the master again

347

self.juju.do(['remove-unit', master_unit])

348

self.juju.wait_until_ready()

349

350

# Now we have a new master, and we can't be sure which of the

351

# remaining two units was elected because we don't know if one

352

# happened to be more in sync than the other.

353

standby_unit_1_is_master = is_master(standby_unit_1, 'postgres')

354

standby_unit_3_is_master = is_master(standby_unit_3, 'postgres')

355

self.assertNotEqual(standby_unit_1_is_master, standby_unit_3_is_master)

314

# When we failover, the unit that has received the most WAL

315

# information from the old master (most in sync) is elected the

316

# new master.

317

standby_unit_1_is_master = self.is_master(standby_unit_1)

318

standby_unit_2_is_master = self.is_master(standby_unit_2)

319

self.assertNotEqual(

320

standby_unit_1_is_master, standby_unit_2_is_master)

356

321

357

322

if standby_unit_1_is_master:

358

323

master_unit = standby_unit_1

359

standby_unit = standby_unit_3

324

standby_unit = standby_unit_2

360

325

else:

361

master_unit = standby_unit_3

326

master_unit = standby_unit_2

362

327

standby_unit = standby_unit_1

363

328

364

# Replication is already flowing.

329

# Confirm replication is still working.

365

330

send_token(master_unit)

366

331

self.assertIs(True, token_received(standby_unit))

367

332

368

# When we remove the last master, we end up with a single

369

# functioning standalone database.

370

self.juju.do(['remove-unit', master_unit])

371

self.juju.wait_until_ready()

372

373

self.is_master(standby_unit, 'postgres')

374

375

# TODO: We need to extend the postgresql-psql charm to allow us

376

# to inspect the status attribute on the relation. It should no

377

# longer be 'master', but instead 'standalone'.

333

# Remove the master again, leaving a single unit.

334

self.juju.do(['remove-unit', master_unit])

335

self.juju.wait_until_ready()

336

337

# Last unit is a working, standalone database.

338

self.is_master(standby_unit)

339

send_token(standby_unit)

340

341

# We can tell it is correctly reporting that it is standalone by

342

# seeing if the -master and -hot-standby scripts no longer exist

343

# on the psql unit.

344

self.assertRaises(

345

subprocess.CalledProcessError,

346

self.sql, 'SELECT TRUE', 'master')

347

self.assertRaises(

348

subprocess.CalledProcessError,

349

self.sql, 'SELECT TRUE', 'hot standby')

350

351

def test_failover_election(self):

352

"""Ensure master elected in a failover is the best choice"""

353

self.juju.deploy(TEST_CHARM, 'postgresql', num_units=3)

354

self.juju.deploy(PSQL_CHARM, 'psql')

355

self.juju.do(['add-relation', 'postgresql:db-admin', 'psql:db-admin'])

356

self.juju.wait_until_ready()

357

358

# On a freshly setup service, lowest numbered unit is always the

359

# master.

360

units = unit_sorted(

361

self.juju.status['services']['postgresql']['units'].keys())

362

master_unit, standby_unit_1, standby_unit_2 = units

363

364

# Shutdown PostgreSQL on standby_unit_1 and ensure

365

# standby_unit_2 will have received more WAL information from

366

# the master.

367

self.pg_ctlcluster(standby_unit_1, stop)

368

self.sql("SELECT pg_switch_xlog()", master_unit, dbname='postgres')

369

370

# Destroy the master database, just like this was a real

371

# disaster.

372

cmd = ['juju', 'ssh', unit,

373

# Due to Bug #1191079, we need to send the whole remote command

374

# as a single argument.

375

'sudo pg_dropcluster --stop 9.1 main']

376

_run(self, cmd)

377

378

# Restart standby_unit_1 now the master unit is dead and it has

379

# no way or resyncing.

380

self.pg_ctlcluster(standby_unit_1, start)

381

382

# Failover. Note that this also tests we can remove a unit that

383

# does not have a working database.

384

self.juju.do(['remove-unit', master_unit])

385

self.juju.wait_until_ready()

386

387

# Ensure the election went as predicted.

388

self.assertIs(False, self.is_master(standby_unit_1))

389

self.assertIs(True, self.is_master(standby_unit_2))

378

390

379

391

380

392

def unit_sorted(units):

Older »