1
by LaMont Jones
initial commit |
1 |
import json |
2 |
import subprocess |
|
3 |
import time |
|
4 |
||
5 |
import fixtures |
|
6 |
from testtools.content import text_content |
|
7 |
||
8 |
||
9 |
__all__ = ['JujuFixture', 'run'] |
|
10 |
||
11 |
||
12 |
class JujuFixture(fixtures.Fixture): |
|
13 |
"""Interact with juju.
|
|
14 |
||
15 |
Assumes juju environment is bootstrapped.
|
|
16 |
"""
|
|
17 |
||
18 |
def __init__(self, reuse_machines=False, do_teardown=True): |
|
19 |
super(JujuFixture, self).__init__() |
|
20 |
||
21 |
self._deployed_charms = set() |
|
22 |
||
23 |
self.reuse_machines = reuse_machines |
|
24 |
||
25 |
# Optionally, don't teardown services and machines after running
|
|
26 |
# a test. If a subsequent test is run, they will be torn down at
|
|
27 |
# that point. This option is only useful when running a single
|
|
28 |
# test, or when the test harness is set to abort after the first
|
|
29 |
# failed test.
|
|
30 |
self.do_teardown = do_teardown |
|
31 |
||
32 |
self._deployed_services = set() |
|
33 |
||
34 |
def do(self, cmd): |
|
35 |
cmd = ['juju'] + cmd |
|
36 |
run(self, cmd) |
|
37 |
||
38 |
def get_result(self, cmd): |
|
39 |
cmd = ['juju'] + cmd + ['--format=json'] |
|
40 |
out = run(self, cmd) |
|
41 |
if out: |
|
42 |
return json.loads(out) |
|
43 |
return None |
|
44 |
||
45 |
def deploy(self, charm, name=None, num_units=1): |
|
46 |
# The first time we deploy a local: charm in the test run, it
|
|
47 |
# needs to deploy with --update to ensure we are testing the
|
|
48 |
# desired revision of the charm. Subsequent deploys we do not
|
|
49 |
# use --update to avoid overhead and needless incrementing of the
|
|
50 |
# revision number.
|
|
51 |
if not charm.startswith('local:') or charm in self._deployed_charms: |
|
52 |
cmd = ['deploy'] |
|
53 |
else: |
|
54 |
cmd = ['deploy', '-u'] |
|
55 |
self._deployed_charms.add(charm) |
|
56 |
||
57 |
cmd.append(charm) |
|
58 |
||
59 |
if name is None: |
|
60 |
name = charm.split(':', 1)[-1] |
|
61 |
||
62 |
cmd.append(name) |
|
63 |
self._deployed_services.add(name) |
|
64 |
||
65 |
if self.reuse_machines and self._free_machines: |
|
66 |
cmd.extend(['--to', str(self._free_machines.pop())]) |
|
67 |
self.do(cmd) |
|
68 |
if num_units > 1: |
|
69 |
self.add_unit(charm, name, num_units - 1) |
|
70 |
else: |
|
71 |
cmd.extend(['-n', str(num_units)]) |
|
72 |
self.do(cmd) |
|
73 |
||
74 |
def add_unit(self, charm, name=None, num_units=1): |
|
75 |
if name is None: |
|
76 |
name = charm.split(':', 1)[-1] |
|
77 |
||
78 |
num_units_spawned = 0 |
|
79 |
while self.reuse_machines and self._free_machines: |
|
80 |
cmd = ['add-unit', '--to', str(self._free_machines.pop()), name] |
|
81 |
self.do(cmd) |
|
82 |
num_units_spawned += 1 |
|
83 |
if num_units_spawned == num_units: |
|
84 |
return
|
|
85 |
||
86 |
cmd = ['add-unit', '-n', str(num_units - num_units_spawned), name] |
|
87 |
self.do(cmd) |
|
88 |
||
89 |
# The most recent environment status, updated by refresh_status()
|
|
90 |
status = None |
|
91 |
||
92 |
def refresh_status(self): |
|
93 |
self.status = self.get_result(['status']) |
|
94 |
||
95 |
self._free_machines = set( |
|
96 |
int(k) for k, m in self.status['machines'].items() if |
|
97 |
k != '0' |
|
98 |
and m.get('life', None) not in ('dead', 'dying') |
|
99 |
and m.get('agent-state', 'pending') in ('started', 'ready')) |
|
100 |
for service in self.status.get('services', {}).values(): |
|
101 |
for unit in service.get('units', []): |
|
102 |
if 'machine' in unit: |
|
103 |
self._free_machines.remove(int(unit['machine'])) |
|
104 |
||
105 |
return self.status |
|
106 |
||
107 |
def wait_until_ready(self, extra=60): |
|
108 |
ready = False |
|
109 |
while not ready: |
|
110 |
self.refresh_status() |
|
111 |
ready = True |
|
112 |
for service in self.status['services']: |
|
113 |
if self.status['services'][service].get('life', '') == 'dying': |
|
114 |
ready = False |
|
115 |
units = self.status['services'][service].get('units', {}) |
|
116 |
for unit in units.keys(): |
|
117 |
agent_state = units[unit].get('agent-state', '') |
|
118 |
if agent_state == 'error': |
|
119 |
raise RuntimeError('{} error: {}'.format( |
|
120 |
unit, units[unit].get('agent-state-info', ''))) |
|
121 |
if agent_state != 'started': |
|
122 |
ready = False |
|
123 |
time.sleep(1) |
|
124 |
# Unfortunately, there is no way to tell when a system is
|
|
125 |
# actually ready for us to test. Juju only tells us that a
|
|
126 |
# relation has started being setup, and that no errors have been
|
|
127 |
# encountered yet. It utterly fails to inform us when the
|
|
128 |
# cascade of hooks this triggers has finished and the
|
|
129 |
# environment is in a stable and actually testable state.
|
|
130 |
# So as a work around for Bug #1200267, we need to sleep long
|
|
131 |
# enough that our system is probably stable. This means we have
|
|
132 |
# extremely slow and flaky tests, but that is possibly better
|
|
133 |
# than no tests.
|
|
134 |
time.sleep(extra) |
|
135 |
||
136 |
def setUp(self): |
|
137 |
super(JujuFixture, self).setUp() |
|
138 |
self.reset() |
|
139 |
if self.do_teardown: |
|
140 |
self.addCleanup(self.reset) |
|
141 |
||
142 |
def reset(self): |
|
143 |
# Tear down any services left running that we know we spawned.
|
|
144 |
while True: |
|
145 |
found_services = False |
|
146 |
self.refresh_status() |
|
147 |
||
148 |
# Kill any services started by the deploy() method.
|
|
149 |
for service_name, service in self.status.get( |
|
150 |
'services', {}).items(): |
|
151 |
if service_name in self._deployed_services: |
|
152 |
found_services = True |
|
153 |
if service.get('life', '') not in ('dying', 'dead'): |
|
154 |
self.do(['destroy-service', service_name]) |
|
155 |
# If any units have failed hooks, unstick them.
|
|
156 |
for unit_name, unit in service.get('units', {}).items(): |
|
157 |
if unit.get('agent-state', None) == 'error': |
|
158 |
try: |
|
159 |
self.do(['resolved', unit_name]) |
|
160 |
except subprocess.CalledProcessError: |
|
161 |
# More race conditions in juju. A
|
|
162 |
# previous 'resolved' call make cause a
|
|
163 |
# subsequent one to fail if it is still
|
|
164 |
# being processed. However, we need to
|
|
165 |
# keep retrying because after a
|
|
166 |
# successful resolution a subsequent hook
|
|
167 |
# may cause an error state.
|
|
168 |
pass
|
|
169 |
if not found_services: |
|
170 |
break
|
|
171 |
time.sleep(1) |
|
172 |
||
173 |
self._deployed_services = set() |
|
174 |
||
175 |
# We shouldn't reuse machines, as we have no guarantee they are
|
|
176 |
# still in a usable state, so tear them down too. Per
|
|
177 |
# Bug #1190492 (INVALID), in the future this will be much nicer
|
|
178 |
# when we can use containers for isolation and can happily reuse
|
|
179 |
# machines.
|
|
180 |
if self.reuse_machines: |
|
181 |
# If we are reusing machines, wait until pending machines
|
|
182 |
# are ready and dying machines are dead.
|
|
183 |
while True: |
|
184 |
for k, machine in self.status['machines'].items(): |
|
185 |
if (k != 0 and machine.get('agent-state', 'pending') |
|
186 |
not in ('ready', 'started')): |
|
187 |
time.sleep(1) |
|
188 |
self.refresh_status() |
|
189 |
continue
|
|
190 |
break
|
|
191 |
else: |
|
192 |
self.do(['terminate-machine'] + list(self._free_machines)) |
|
193 |
||
194 |
||
195 |
def run(detail_collector, cmd, input=''): |
|
196 |
try: |
|
197 |
proc = subprocess.Popen( |
|
198 |
cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, |
|
199 |
stderr=subprocess.PIPE) |
|
200 |
except subprocess.CalledProcessError: |
|
201 |
raise
|
|
202 |
||
203 |
(out, err) = proc.communicate(input) |
|
204 |
if out: |
|
205 |
detail_collector.addDetail('stdout', text_content(out)) |
|
206 |
if err: |
|
207 |
detail_collector.addDetail('stderr', text_content(err)) |
|
208 |
if proc.returncode != 0: |
|
209 |
raise subprocess.CalledProcessError( |
|
210 |
proc.returncode, cmd, err) |
|
211 |
return out |