~nskaggs/juju-ci-tools/add-essential-operations

1014.2.1 by John George
background_chaos WIP
1
#!/usr/bin/env python
2
from collections import defaultdict
3
from contextlib import contextmanager
4
from datetime import (
5
    datetime,
6
    timedelta,
7
)
8
import logging
1014.2.5 by John George
deploy_dummy_stack with chaos.
9
import os
1014.2.1 by John George
background_chaos WIP
10
import subprocess
11
import sys
12
1485.1.1 by Martin
Switch all imports of local_charm_path to using jujucharm over utility
13
from jujucharm import (
14
    local_charm_path,
15
)
1014.2.5 by John George
deploy_dummy_stack with chaos.
16
from remote import remote_from_unit
1014.2.1 by John George
background_chaos WIP
17
from utility import (
18
    until_timeout,
19
)
20
21
1092.2.2 by Aaron Bentley
Fix lint.
22
__metaclass__ = type
23
24
1014.2.1 by John George
background_chaos WIP
25
@contextmanager
1014.2.5 by John George
deploy_dummy_stack with chaos.
26
def background_chaos(env, client, log_dir, time):
27
    monkey = MonkeyRunner(env, client, enablement_timeout=time)
1014.2.1 by John George
background_chaos WIP
28
    monkey.deploy_chaos_monkey()
1014.2.5 by John George
deploy_dummy_stack with chaos.
29
    monkey_ids = monkey.unleash_once()
1014.2.1 by John George
background_chaos WIP
30
    monkey.wait_for_chaos(state='start')
31
    try:
32
        yield
1089.1.1 by John George
Pass the enablement_timeout when wait_for_chaos() is called to wait for the complete state.
33
        monkey.wait_for_chaos(state='complete', timeout=time)
1014.2.1 by John George
background_chaos WIP
34
    except BaseException as e:
35
        logging.exception(e)
36
        sys.exit(1)
1014.2.5 by John George
deploy_dummy_stack with chaos.
37
    finally:
38
        # Copy the chaos logs to the log directory.
39
        # Get the remote machine. Currently the remote machine will always be
40
        # ubuntu/0. IF background_chaos() is enhanced to take a target service,
41
        # then log collection will also need to be updated.
42
        remote = remote_from_unit(client, "ubuntu/0")
43
        for id in monkey_ids:
44
            monkey_log = ['chaos-monkey/chaos_monkey.{}/log/*'.format(id)]
45
            dest_dir = '{}/chaos-monkey-{}'.format(log_dir, id)
46
            os.mkdir(dest_dir)
47
            try:
48
                remote.copy(dest_dir, monkey_log)
49
            except subprocess.CalledProcessError as e:
50
                logging.warning(
51
                    'Could not retrieve Chaos Monkey log for {}:'.format(id))
52
                logging.warning(e.output)
1014.2.1 by John George
background_chaos WIP
53
54
55
class MonkeyRunner:
56
57
    def __init__(self, env, client, service='0', health_checker=None,
58
                 enablement_timeout=120, pause_timeout=0, total_timeout=0):
59
        self.env = env
60
        if service == '0':
61
            self.service = 'ubuntu'
62
            self.machine = '0'
63
        else:
64
            self.service = service
65
            self.machine = None
66
        self.health_checker = health_checker
67
        self.client = client
68
        self.enablement_timeout = enablement_timeout
69
        self.pause_timeout = pause_timeout
70
        self.total_timeout = total_timeout
71
        self.expire_time = (datetime.now() + timedelta(seconds=total_timeout))
72
        self.monkey_ids = {}
73
74
    def deploy_chaos_monkey(self):
75
        """Juju deploy chaos-monkey and add a relation.
76
77
        JUJU_REPOSITORY must be set in the OS environment so a local
78
        chaos-monkey charm can be found.
79
        """
80
        if self.machine:
81
            logging.debug(
82
                'Deploying ubuntu to machine {}.'.format(self.machine))
1345.1.3 by Seman
Deploy charm by path.
83
            charm = local_charm_path(
84
                charm='ubuntu', juju_ver=self.client.version)
85
            self.client.deploy(charm, to=self.machine)
1014.2.1 by John George
background_chaos WIP
86
        logging.debug('Deploying local:chaos-monkey.')
1345.1.3 by Seman
Deploy charm by path.
87
        charm = local_charm_path(
88
            charm='chaos-monkey', juju_ver=self.client.version)
89
        self.client.deploy(charm)
1014.2.1 by John George
background_chaos WIP
90
        logging.debug('Relating chaos-monkey to {}.'.format(self.service))
91
        self.client.juju('add-relation', (self.service, 'chaos-monkey'))
92
        logging.debug('Waiting for services to start.')
93
        self.client.wait_for_started()
94
        self.client.wait_for_subordinate_units(self.service, 'chaos-monkey')
95
96
    def iter_chaos_monkey_units(self):
97
        status = self.client.get_status()
98
        for unit_name, unit in status.service_subordinate_units(self.service):
99
            if not unit_name.startswith('chaos-monkey'):
100
                continue
101
            yield unit_name, unit
102
103
    def unleash_once(self):
104
        for unit_name, unit in self.iter_chaos_monkey_units():
105
            logging.info('Starting the chaos monkey on: {}'.format(unit_name))
106
            enablement_arg = ('enablement-timeout={}'.format(
107
                self.enablement_timeout))
108
            monkey_id = self.monkey_ids.get(unit_name)
109
            args = (unit_name,) + ('start',) + ('mode=single',)
110
            args = args + (enablement_arg,)
111
            if monkey_id is not None:
112
                args = args + ('monkey-id={}'.format(monkey_id),)
1221.1.24 by Aaron Bentley
Switch chaos.py over to EnvJujuClient.action_do.
113
114
            id = self.client.action_do(*args)
1014.2.1 by John George
background_chaos WIP
115
            if not self.monkey_ids.get(unit_name):
116
                logging.info('Setting the monkey-id for {} to: {}'.format(
117
                    unit_name, id))
118
                self.monkey_ids[unit_name] = id
119
        return self.monkey_ids.values()
120
121
    def is_healthy(self):
122
        """Returns a boolean after running the health_checker."""
123
        if self.health_checker:
124
            try:
125
                sub_output = subprocess.check_output(self.health_checker)
126
                logging.info('Health check output: {}'.format(sub_output))
127
            except OSError as e:
128
                logging.error(
129
                    'The health check failed to execute with: {}'.format(
130
                        e))
131
                raise
132
            except subprocess.CalledProcessError as e:
133
                logging.error('Non-zero exit code returned from {}: {}'.format(
134
                    self.health_checker, e))
135
                logging.error(e.output)
136
                return False
137
        return True
138
139
    def get_unit_status(self, unit_name):
140
        """Return 'done' if no lock file otherwise 'running'"""
141
        service_config = self.client.get_service_config('chaos-monkey')
142
        logging.debug('{}'.format(service_config))
143
        logging.debug('Checking if chaos is done on: {}'.format(unit_name))
144
        check_cmd = '[ -f '
145
        check_cmd += service_config['settings']['chaos-dir']['value']
146
        check_cmd += '/chaos_monkey.' + self.monkey_ids[unit_name]
147
        check_cmd += '/chaos_runner.lock'
148
        check_cmd += ' ]'
149
        if self.client.juju('run', ('--unit', unit_name, check_cmd),
150
                            check=False):
151
            return 'done'
152
        return 'running'
153
154
    def wait_for_chaos(self, state='complete', timeout=300):
155
        if not ('complete' in state or 'start' in state):
156
            raise Exception('Unexpected state value: {}'.format(state))
157
        for ignored in until_timeout(timeout):
158
            locks = defaultdict(list)
159
            for unit_name, unit in self.iter_chaos_monkey_units():
160
                locks[self.get_unit_status(unit_name)].append(unit_name)
161
            if state == 'complete' and locks.keys() == ['done']:
162
                logging.debug(
163
                    'All lock files removed, chaos complete: {}'.format(locks))
164
                break
165
            if state == 'start' and locks.keys() == ['running']:
166
                logging.debug(
167
                    'All lock files found, chaos started: {}'.format(locks))
168
                break
169
        else:
170
            raise Exception('Chaos operations did not {}.'.format(state))