1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
|
#!/usr/bin/env python
__metaclass__ = type
from argparse import ArgumentParser
from datetime import datetime
import logging
import sys
from time import sleep
from chaos import MonkeyRunner
from jujupy import (
EnvJujuClient,
SimpleEnvironment,
)
from utility import configure_logging
def run_while_healthy_or_timeout(monkey):
logging.debug('run_while_healthy_or_timeout')
while monkey.is_healthy():
logging.debug('Unleashing chaos.')
monkey.unleash_once()
monkey.wait_for_chaos()
if datetime.now() > monkey.expire_time:
logging.debug(
'Reached run timeout, all done running chaos.')
break
if monkey.pause_timeout:
logging.debug(
'Pausing {} seconds after running chaos.'.format(
monkey.pause_timeout))
sleep(monkey.pause_timeout)
else:
logging.error('The health check reported an error: {}'.format(
monkey.health_checker))
sys.exit(1)
def get_args(argv=None):
parser = ArgumentParser()
parser.add_argument('env', help='The name of the environment.')
parser.add_argument('service', help='A service name to monkey with.')
parser.add_argument(
'health_checker',
help='A binary for checking the health of the environment.')
parser.add_argument(
'-et', '--enablement-timeout', default=30, type=int,
help="Enablement timeout in seconds.", metavar='SECONDS')
parser.add_argument(
'-tt', '--total-timeout', type=int, help="Total timeout in seconds.",
metavar='SECONDS')
parser.add_argument(
'-pt', '--pause-timeout', default=0, type=int,
help="Pause timeout in seconds.", metavar='SECONDS')
args = parser.parse_args(argv)
if not args.total_timeout:
args.total_timeout = args.enablement_timeout
if args.enablement_timeout > args.total_timeout:
parser.error("total-timeout can not be less than "
"enablement-timeout.")
if args.total_timeout <= 0:
parser.error("Invalid total-timeout value: timeout must be "
"greater than zero.")
if args.enablement_timeout < 0:
parser.error("Invalid enablement-timeout value: timeout must be "
"zero or greater.")
return args
def main():
""" Deploy and run chaos monkey, while checking env health.
The Chaos Monkey is deployed into the environment and related to
the specified service. Juju actions are then used to run one chaos
operation at a time. After each operation, the provided health
check script is executed, to ensure the Juju environment or
software stack is still healthy.
"""
configure_logging(logging.INFO)
args = get_args()
client = EnvJujuClient.by_version(SimpleEnvironment.from_config(args.env))
monkey_runner = MonkeyRunner(
args.env, client, service=args.service,
health_checker=args.health_checker,
enablement_timeout=args.enablement_timeout,
pause_timeout=args.pause_timeout,
total_timeout=args.total_timeout)
logging.info("Chaos Monkey Start.")
monkey_runner.deploy_chaos_monkey()
run_while_healthy_or_timeout(monkey_runner)
logging.info("Chaos Monkey Complete.")
if __name__ == '__main__':
main()
|