2
from collections import defaultdict
3
from contextlib import contextmanager
13
from remote import remote_from_unit
24
def background_chaos(env, client, log_dir, time):
25
monkey = MonkeyRunner(env, client, enablement_timeout=time)
26
monkey.deploy_chaos_monkey()
27
monkey_ids = monkey.unleash_once()
28
monkey.wait_for_chaos(state='start')
31
monkey.wait_for_chaos(state='complete', timeout=time)
32
except BaseException as e:
36
# Copy the chaos logs to the log directory.
37
# Get the remote machine. Currently the remote machine will always be
38
# ubuntu/0. IF background_chaos() is enhanced to take a target service,
39
# then log collection will also need to be updated.
40
remote = remote_from_unit(client, "ubuntu/0")
42
monkey_log = ['chaos-monkey/chaos_monkey.{}/log/*'.format(id)]
43
dest_dir = '{}/chaos-monkey-{}'.format(log_dir, id)
46
remote.copy(dest_dir, monkey_log)
47
except subprocess.CalledProcessError as e:
49
'Could not retrieve Chaos Monkey log for {}:'.format(id))
50
logging.warning(e.output)
55
def __init__(self, env, client, service='0', health_checker=None,
56
enablement_timeout=120, pause_timeout=0, total_timeout=0):
59
self.service = 'ubuntu'
62
self.service = service
64
self.health_checker = health_checker
66
self.enablement_timeout = enablement_timeout
67
self.pause_timeout = pause_timeout
68
self.total_timeout = total_timeout
69
self.expire_time = (datetime.now() + timedelta(seconds=total_timeout))
72
def deploy_chaos_monkey(self):
73
"""Juju deploy chaos-monkey and add a relation.
75
JUJU_REPOSITORY must be set in the OS environment so a local
76
chaos-monkey charm can be found.
80
'Deploying ubuntu to machine {}.'.format(self.machine))
81
charm = local_charm_path(
82
charm='ubuntu', juju_ver=self.client.version)
83
self.client.deploy(charm, to=self.machine)
84
logging.debug('Deploying local:chaos-monkey.')
85
charm = local_charm_path(
86
charm='chaos-monkey', juju_ver=self.client.version)
87
self.client.deploy(charm)
88
logging.debug('Relating chaos-monkey to {}.'.format(self.service))
89
self.client.juju('add-relation', (self.service, 'chaos-monkey'))
90
logging.debug('Waiting for services to start.')
91
self.client.wait_for_started()
92
self.client.wait_for_subordinate_units(self.service, 'chaos-monkey')
94
def iter_chaos_monkey_units(self):
95
status = self.client.get_status()
96
for unit_name, unit in status.service_subordinate_units(self.service):
97
if not unit_name.startswith('chaos-monkey'):
101
def unleash_once(self):
102
for unit_name, unit in self.iter_chaos_monkey_units():
103
logging.info('Starting the chaos monkey on: {}'.format(unit_name))
104
enablement_arg = ('enablement-timeout={}'.format(
105
self.enablement_timeout))
106
monkey_id = self.monkey_ids.get(unit_name)
107
args = (unit_name,) + ('start',) + ('mode=single',)
108
args = args + (enablement_arg,)
109
if monkey_id is not None:
110
args = args + ('monkey-id={}'.format(monkey_id),)
112
id = self.client.action_do(*args)
113
if not self.monkey_ids.get(unit_name):
114
logging.info('Setting the monkey-id for {} to: {}'.format(
116
self.monkey_ids[unit_name] = id
117
return self.monkey_ids.values()
119
def is_healthy(self):
120
"""Returns a boolean after running the health_checker."""
121
if self.health_checker:
123
sub_output = subprocess.check_output(self.health_checker)
124
logging.info('Health check output: {}'.format(sub_output))
127
'The health check failed to execute with: {}'.format(
130
except subprocess.CalledProcessError as e:
131
logging.error('Non-zero exit code returned from {}: {}'.format(
132
self.health_checker, e))
133
logging.error(e.output)
137
def get_unit_status(self, unit_name):
138
"""Return 'done' if no lock file otherwise 'running'"""
139
service_config = self.client.get_service_config('chaos-monkey')
140
logging.debug('{}'.format(service_config))
141
logging.debug('Checking if chaos is done on: {}'.format(unit_name))
143
check_cmd += service_config['settings']['chaos-dir']['value']
144
check_cmd += '/chaos_monkey.' + self.monkey_ids[unit_name]
145
check_cmd += '/chaos_runner.lock'
147
if self.client.juju('run', ('--unit', unit_name, check_cmd),
152
def wait_for_chaos(self, state='complete', timeout=300):
153
if not ('complete' in state or 'start' in state):
154
raise Exception('Unexpected state value: {}'.format(state))
155
for ignored in until_timeout(timeout):
156
locks = defaultdict(list)
157
for unit_name, unit in self.iter_chaos_monkey_units():
158
locks[self.get_unit_status(unit_name)].append(unit_name)
159
if state == 'complete' and locks.keys() == ['done']:
161
'All lock files removed, chaos complete: {}'.format(locks))
163
if state == 'start' and locks.keys() == ['running']:
165
'All lock files found, chaos started: {}'.format(locks))
168
raise Exception('Chaos operations did not {}.'.format(state))