39
39
stop - stop all slaves
40
40
switchover - perform slave promotion
42
Note: elect, failover, gtid, and health require --master and either
43
--slaves or --discover-slave-login
44
47
_VALID_COMMANDS = ["elect", "failover", "gtid", "health", "reset", "start",
55
58
_DATE_FORMAT = '%Y-%m-%d %H:%M:%S %p'
61
_HOST_IP_WARNING = "You may be mixing host names and IP " + \
62
"addresses. This may result in negative status " + \
63
"reporting if your DNS services do not support " + \
64
"reverse name lookup."
58
66
def get_valid_rpl_command_text():
59
67
"""Provide list of valid command descriptions to caller.
149
157
self.quiet = self.options.get("quiet", False)
150
158
self.logging = self.options.get("logging", False)
151
159
self.candidates = self.options.get("candidates", None)
161
# Replace all local host IP addresses (i.e. 127.0.0.1) by localhost
162
for candidate in self.candidates:
163
if candidate['host'] == '127.0.0.1':
164
candidate['host'] = 'localhost'
165
for slave in slave_vals:
166
if slave['host'] == '127.0.0.1':
167
slave['host'] = 'localhost'
169
self.rpl_user = self.options.get("rpl_user", None)
152
170
self.topology = Topology(master_vals, slave_vals, self.options,
259
277
print_list(sys.stdout, format, _GTID_COLS, owned)
280
def _check_host_references(self):
281
"""Check to see if using all host or all IP addresses
283
Returns bool - True = all references are consistent
285
from mysql.utilities.common.options import hostname_is_ip
287
uses_ip = hostname_is_ip(self.topology.master.host)
288
for slave_dict in self.topology.slaves:
289
slave = slave_dict['instance']
290
if slave is not None:
291
host, port = slave.get_master_host_port()
292
if uses_ip != hostname_is_ip(slave.host) or \
293
uses_ip != hostname_is_ip(host):
262
298
def _switchover(self):
263
299
"""Perform switchover from master to candidate slave
268
304
Returns bool - True = no errors, False = errors reported.
270
from mysql.utilities.exception import FormatError
271
from mysql.utilities.common.options import parse_connection
306
# Check for --master-info-repository=TABLE if rpl_user is None
307
if not self._check_master_info_type():
310
# Check for mixing IP and hostnames
311
if not self._check_host_references():
312
print "# WARNING: %s" % _HOST_IP_WARNING
313
self._report(_HOST_IP_WARNING, logging.WARN, False)
273
315
# Check prerequisites - need valid candidate
274
316
candidate = self.options.get("new_master", None)
275
317
if candidate is None:
299
341
self._report("# WARNING: slave election requires GTID_MODE=ON "
300
342
"for all servers.", logging.WARN)
345
# Check for mixing IP and hostnames
346
if not self._check_host_references():
347
print "# WARNING: %s" % _HOST_IP_WARNING
348
self._report(_HOST_IP_WARNING, logging.WARN, False)
302
350
candidates = self.options.get("candidates", None)
303
351
if candidates is None or len(candidates) == 0:
304
352
self._report("# Electing candidate slave from known slaves.")
331
379
self._report("# WARNING: slave election requires GTID_MODE=ON "
332
380
"for all servers.", logging.WARN)
383
# Check for --master-info-repository=TABLE if rpl_user is None
384
if not self._check_master_info_type():
334
387
self._report("# Performing failover.")
335
388
if not self.topology.failover(self.candidates, strict):
336
389
self._report("# Errors found.", logging.ERROR)
394
def _check_master_info_type(self, halt=True):
395
"""Check for master information set to TABLE if rpl_user not provided
397
halt[in] if True, raise error on failure. Default is True
399
Returns bool - True if rpl_user is specified or False if rpl_user not
400
specified and at least one slave does not have
401
--master-info-repository=TABLE.
403
error = "You must specify either the --rpl-user or set all slaves " + \
404
"to use --master-info-repository=TABLE."
405
# Check for --master-info-repository=TABLE if rpl_user is None
406
if self.rpl_user is None:
407
if not self.topology.check_master_info_type("TABLE"):
409
raise UtilRplError(error)
410
self._report(error, logging.ERROR)
341
415
def execute_command(self, command):
342
416
"""Execute a replication admin command
400
474
def auto_failover(self, interval):
401
475
"""Automatic failover
477
Wrapper class for running automatic failover. See
478
run_automatic_failover for details on implementation.
480
This method ensures the registration/deregistration occurs
481
regardless of exception or errors.
483
interval[in] time in seconds to wait to check status of servers
485
Returns bool - True = success, raises exception on error
488
from mysql.utilities.command.failover_console import FailoverConsole
490
failover_mode = self.options.get("failover_mode", "auto")
491
force = self.options.get("force", False)
493
# Initialize a console
494
console = FailoverConsole(self.topology.master,
495
self.topology.get_health,
496
self.topology.get_gtid_data,
497
self.topology.get_server_uuids,
501
self._report("Registering instance on master.", logging.INFO, False)
502
old_mode = failover_mode
503
failover_mode = console.register_instance(force)
504
if failover_mode != old_mode:
505
self._report("Multiple instances of failover console found for "
506
"master %s:%s." % (self.topology.master.host,
507
self.topology.master.port),
509
print "If this is an error, restart the console with --force. "
510
print "Failover mode changed to 'FAIL' for this instance. "
511
print "Console will start in 10 seconds.",
513
for i in range(0, 9):
515
sys.stdout.write('.')
517
print "starting Console."
521
res = self.run_auto_failover(console, interval);
526
# Unregister instance
527
self._report("Unregistering instance on master.", logging.INFO,
529
console.register_instance(False, False)
530
self._report("Failover console stopped.", logging.INFO, False)
537
def run_auto_failover(self, console, interval):
538
"""Run automatic failover
403
540
This method implements the automatic failover facility. It uses the
404
541
FailoverConsole class from the failover_console.py to implement all
405
542
user interface commands and uses the existing failover() method of
411
548
2) failover to list of candidates only
551
console[in] instance of the failover console class
414
552
interval[in] time in seconds to wait to check status of servers
416
554
Returns bool - True = success, raises exception on error
419
from mysql.utilities.command.failover_console import FailoverConsole
420
557
from mysql.utilities.common.tools import ping_host
421
558
from mysql.utilities.common.tools import execute_script
423
560
failover_mode = self.options.get("failover_mode", "auto")
424
561
pingtime = self.options.get("pingtime", 3)
425
timeout = self.options.get("timeout", 3)
562
timeout = int(self.options.get("timeout", 3))
426
563
exec_fail = self.options.get("exec_fail", None)
427
force = self.options.get("force", False)
428
564
post_fail = self.options.get("post_fail", None)
430
566
# Only works for GTID_MODE=ON
441
577
msg = "User %s on %s does not have sufficient privileges to " + \
442
578
"execute the %s command."
443
579
for error in errors:
444
self._report(msg % (error[0], error[1], command),
580
self._report(msg % (error[0], error[1], 'failover'),
445
581
logging.CRITICAL)
446
582
raise UtilRplError("Not enough privileges to execute command.")
584
# Require --master-info-repository=TABLE for all slaves
585
if not self.topology.check_master_info_type("TABLE"):
586
msg = "Failover requires --master-info-repository=TABLE for " + \
588
self._report(msg, logging.ERROR, False)
589
raise UtilRplError(msg)
591
# Check for mixing IP and hostnames
592
if not self._check_host_references():
593
print "# WARNING: %s" % _HOST_IP_WARNING
594
self._report(_HOST_IP_WARNING, logging.WARN, False)
595
print "#\n# Failover console will start in 10 seconds."
448
598
# Test failover script. If it doesn't exist, fail.
449
599
no_exec_fail_msg = "Failover check script cannot be found. Please " + \
450
600
"check the path and filename for accuracy and " + \
451
601
"restart the failover console."
452
if exec_fail is not None and not os.path.exists(fail_check):
602
if exec_fail is not None and not os.path.exists(exec_fail):
453
603
self._report(no_exec_fail_msg, logging.CRITICAL, False)
454
604
raise UtilRplError(no_exec_fail_msg)
456
# Initialize a console
457
console = FailoverConsole(self.topology.master, self.topology.get_health,
458
self.topology.get_gtid_data,
459
self.topology.get_server_uuids,
463
self._report("Registering instance on master.", logging.INFO, False)
464
old_mode = failover_mode
465
failover_mode = console.register_instance(force)
466
if failover_mode != old_mode:
467
self._report("Multiple instances of failover console found for "
468
"master %s:%s." % (self.topology.master.host,
469
self.topology.master.port),
471
print "Failover mode changed to 'FAIL'. Console will start in 5 seconds."
474
606
self._report("Failover console started.", logging.INFO, False)
475
607
self._report("Failover mode = %s." % failover_mode, logging.INFO, False)
477
609
# Main loop - loop and fire on interval.
479
611
first_pass = True
561
693
self.topology.run_script(post_fail, False)
563
695
# discover slaves if option was specified at startup
564
elif self.options.get("discover", None) is not None \
696
elif self.options.get("discover", None) is not None and \
697
(not first_pass or self.options.get("rediscover", False)):
566
698
# Force refresh of health list if new slaves found
567
699
if self.topology.discover_slaves():
568
700
console.list_data = None
574
706
done = True # User has quit
575
707
first_pass = False
577
# Unregister instance
578
self._report("Unregistering instance on master.", logging.INFO, False)
579
console.register_instance(False, False)
580
self._report("Failover console stopped.", logging.INFO, False)