~andrewjbeach/juju-ci-tools/get-juju-dict

1091.3.1 by James Tunnicliffe
Supporting functions for testing networking between containers in a Juju environment.
1
#!/usr/bin/env python
2
from __future__ import print_function
1091.5.19 by Aaron Bentley
Fix imports
3
from argparse import ArgumentParser
1226.1.1 by Martin Packman
Fix assess_spaces_subnets by adapting to new BootstrapManager code from container networking test
4
import contextlib
1091.3.2 by James Tunnicliffe
clean_environment bails early if there is no environment to clean.
5
from copy import (
6
    copy,
7
    deepcopy,
1591.1.1 by Andrew Beach
Cleaned up duplicate declarations of JujuAssertionError.
8
    )
1091.5.1 by James Tunnicliffe
Test communication between containers and the wider network.
9
import logging
10
import re
11
import os
1091.5.4 by James Tunnicliffe
SSH wrapper now trying even harder to retry on connection errors.
12
import subprocess
1215.1.1 by Martin Packman
Switch assess_container_networking to using BootstrapManager
13
import sys
1091.5.19 by Aaron Bentley
Fix imports
14
import tempfile
1091.5.1 by James Tunnicliffe
Test communication between containers and the wider network.
15
from textwrap import dedent
1091.5.19 by Aaron Bentley
Fix imports
16
import time
1091.5.1 by James Tunnicliffe
Test communication between containers and the wider network.
17
1091.5.19 by Aaron Bentley
Fix imports
18
from deploy_stack import (
1215.1.1 by Martin Packman
Switch assess_container_networking to using BootstrapManager
19
    BootstrapManager,
20
    get_random_string,
1091.5.19 by Aaron Bentley
Fix imports
21
    update_env,
1591.1.1 by Andrew Beach
Cleaned up duplicate declarations of JujuAssertionError.
22
    )
1341.2.1 by Aaron Bentley
Move container support knowledge to jujupy.
23
from jujupy import (
24
    KVM_MACHINE,
25
    LXC_MACHINE,
26
    LXD_MACHINE,
1591.1.1 by Andrew Beach
Cleaned up duplicate declarations of JujuAssertionError.
27
    )
1091.5.1 by James Tunnicliffe
Test communication between containers and the wider network.
28
from utility import (
1591.1.1 by Andrew Beach
Cleaned up duplicate declarations of JujuAssertionError.
29
    JujuAssertionError,
1215.1.1 by Martin Packman
Switch assess_container_networking to using BootstrapManager
30
    add_basic_testing_arguments,
31
    configure_logging,
1091.5.13 by James Tunnicliffe
Fixed up tests.
32
    wait_for_port,
1591.1.1 by Andrew Beach
Cleaned up duplicate declarations of JujuAssertionError.
33
    )
1091.5.19 by Aaron Bentley
Fix imports
34
1091.5.1 by James Tunnicliffe
Test communication between containers and the wider network.
35
1091.5.2 by James Tunnicliffe
retry ssh commands if they fail with code 255, which seems to happen on heavily loaded machines.
36
__metaclass__ = type
37
1091.3.1 by James Tunnicliffe
Supporting functions for testing networking between containers in a Juju environment.
38
1215.1.1 by Martin Packman
Switch assess_container_networking to using BootstrapManager
39
log = logging.getLogger("assess_container_networking")
40
1528.1.4 by Leo Zhang
Cleaned up
41
1091.5.1 by James Tunnicliffe
Test communication between containers and the wider network.
42
def parse_args(argv=None):
43
    """Parse all arguments."""
44
45
    description = dedent("""\
46
    Test container address allocation.
47
    For LXC and KVM, create machines of each type and test the network
48
    between LXC <--> LXC, KVM <--> KVM and LXC <--> KVM. Also test machine
49
    to outside world, DNS and that these tests still pass after a reboot. In
50
    case of failure pull logs and configuration files from the machine that
51
    we detected a problem on for later analysis.
52
    """)
53
    parser = add_basic_testing_arguments(ArgumentParser(
54
        description=description
55
    ))
56
    parser.add_argument(
57
        '--machine-type',
58
        help='Which virtual machine/container type to test. Defaults to all.',
1319.1.1 by Martin Packman
Allow testing of lxd with assess_container_networking
59
        choices=[KVM_MACHINE, LXC_MACHINE, LXD_MACHINE])
1091.5.1 by James Tunnicliffe
Test communication between containers and the wider network.
60
    parser.add_argument(
61
        '--clean-environment', action='store_true', help=dedent("""\
62
        Attempts to re-use an existing environment rather than destroying it
63
        and creating a new one.
64
65
        On launch, if an environment exists, clean out services and machines
66
        from it rather than destroying it. If an environment doesn't exist,
67
        create one and use it.
68
69
        At termination, clean out services and machines from the environment
70
        rather than destroying it."""))
1215.1.1 by Martin Packman
Switch assess_container_networking to using BootstrapManager
71
    args = parser.parse_args(argv)
72
    # Passing --clean-environment implies --keep-env
73
    if args.clean_environment:
74
        args.keep_env = True
75
    return args
1091.5.1 by James Tunnicliffe
Test communication between containers and the wider network.
76
77
78
def ssh(client, machine, cmd):
79
    """Convenience function: run a juju ssh command and get back the output
80
    :param client: A Juju client
81
    :param machine: ID of the machine on which to run a command
82
    :param cmd: the command to run
83
    :return: text output of the command
84
    """
1091.5.4 by James Tunnicliffe
SSH wrapper now trying even harder to retry on connection errors.
85
    back_off = 2
1091.5.6 by James Tunnicliffe
A bit more reliability hacking around ssh and reboots.
86
    attempts = 4
87
    for attempt in range(attempts):
1091.5.4 by James Tunnicliffe
SSH wrapper now trying even harder to retry on connection errors.
88
        try:
1357.1.1 by Curtis Hovey
Use --proxy when using ssh/scp and containers.
89
            return client.get_juju_output('ssh', '--proxy', machine, cmd)
1091.5.4 by James Tunnicliffe
SSH wrapper now trying even harder to retry on connection errors.
90
        except subprocess.CalledProcessError as e:
91
            # If the connection to the host failed, try again in a couple of
92
            # seconds. This is usually due to heavy load.
1477.2.9 by Leo Zhang
clean-up finish
93
            if(attempt < attempts - 1 and
1570.1.11 by Leo Zhang
Fixed reboot
94
                re.search('ssh_exchange_identification: '
95
                          'Connection closed by remote host', e.stderr)):
1091.5.4 by James Tunnicliffe
SSH wrapper now trying even harder to retry on connection errors.
96
                time.sleep(back_off)
97
                back_off *= 2
1091.5.6 by James Tunnicliffe
A bit more reliability hacking around ssh and reboots.
98
            else:
99
                raise
1091.5.1 by James Tunnicliffe
Test communication between containers and the wider network.
100
101
1091.3.1 by James Tunnicliffe
Supporting functions for testing networking between containers in a Juju environment.
102
def clean_environment(client, services_only=False):
103
    """Remove all the services and, optionally, machines from an environment.
104
105
    Use as an alternative to destroying an environment and creating a new one
106
    to save a bit of time.
107
108
    :param client: a Juju client
109
    """
1091.5.4 by James Tunnicliffe
SSH wrapper now trying even harder to retry on connection errors.
110
    # A short timeout is used for get_status here because if we don't get a
111
    # response from  get_status quickly then the environment almost
112
    # certainly doesn't exist or needs recreating.
1215.1.1 by Martin Packman
Switch assess_container_networking to using BootstrapManager
113
    try:
114
        status = client.get_status(5)
115
    except Exception as e:
116
        # TODO(gz): get_status should return a more specific error type.
117
        log.info("Could not clean existing env: %s", e)
118
        return False
1091.3.2 by James Tunnicliffe
clean_environment bails early if there is no environment to clean.
119
1420.2.3 by Aaron Bentley
Use get_applications.
120
    for service in status.get_applications():
121
        client.remove_service(service)
1091.3.1 by James Tunnicliffe
Supporting functions for testing networking between containers in a Juju environment.
122
123
    if not services_only:
124
        # First remove all containers; we can't remove a machine that is
125
        # hosting containers.
126
        for m, _ in status.iter_machines(containers=True, machines=False):
127
            client.juju('remove-machine', m)
128
129
        client.wait_for('containers', 'none')
130
131
        for m, _ in status.iter_machines(containers=False, machines=True):
132
            if m != '0':
1091.5.4 by James Tunnicliffe
SSH wrapper now trying even harder to retry on connection errors.
133
                try:
134
                    client.juju('remove-machine', m)
135
                except subprocess.CalledProcessError:
136
                    # Sometimes this fails because while we have asked Juju
137
                    # to remove a container and it says that it has, when we
138
                    # ask it to remove the host Juju thinks it still has
139
                    # containers on it. Normally a small pause and trying
140
                    # again is all that is needed to resolve this issue.
141
                    time.sleep(2)
1162.1.3 by Aaron Bentley
Fix lint
142
                    client.wait_for_started()
1091.5.4 by James Tunnicliffe
SSH wrapper now trying even harder to retry on connection errors.
143
                    client.juju('remove-machine', m)
1091.3.1 by James Tunnicliffe
Supporting functions for testing networking between containers in a Juju environment.
144
145
        client.wait_for('machines-not-0', 'none')
146
147
    client.wait_for_started()
1091.3.2 by James Tunnicliffe
clean_environment bails early if there is no environment to clean.
148
    return True
149
150
151
def make_machines(client, container_types):
152
    """Make a test environment consisting of:
153
       Two host machines.
154
       Two of each container_type on one host machine.
155
       One of each container_type on one host machine.
156
    :param client: An EnvJujuClient
157
    :param container_types: list of containers to create
158
    :return: hosts (list), containers {container_type}{host}[containers]
159
    """
160
    # Find existing host machines
161
    old_hosts = client.get_status().status['machines']
162
    machines_to_add = 2 - len(old_hosts)
163
164
    # Allocate more hosts as needed
165
    if machines_to_add > 0:
166
        client.juju('add-machine', ('-n', str(machines_to_add)))
167
    status = client.wait_for_started()
168
    hosts = sorted(status.status['machines'].keys())[:2]
169
170
    # Find existing containers
171
    required = dict(zip(hosts, [copy(container_types) for h in hosts]))
172
    required[hosts[0]] += container_types
173
    for c in status.iter_machines(containers=True, machines=False):
174
        host, type, id = c[0].split('/')
175
        if type in required[host]:
176
            required[host].remove(type)
177
178
    # Start any new containers we need
179
    for host, containers in required.iteritems():
180
        for container in containers:
181
            client.juju('add-machine', ('{}:{}'.format(container, host)))
182
183
    status = client.wait_for_started()
184
185
    # Build a list of containers, now they have all started
186
    tmp = dict(zip(hosts, [[] for h in hosts]))
187
    containers = dict(zip(container_types,
188
                          [deepcopy(tmp) for t in container_types]))
189
    for c in status.iter_machines(containers=True, machines=False):
190
        host, type, id = c[0].split('/')
191
        if type in containers and host in containers[type]:
192
            containers[type][host].append(c[0])
193
194
    return hosts, containers
1091.5.1 by James Tunnicliffe
Test communication between containers and the wider network.
195
196
197
def find_network(client, machine, addr):
198
    """Find a connected subnet containing the given address.
199
200
    When using this to find the subnet of a container, don't use the container
201
    as the machine to run the ip route show command on ("machine"), use a real
202
    box because lxc will just send everything to its host machine, so it is on
203
    a subnet containing itself. Not much use.
204
    :param client: A Juju client
205
    :param machine: ID of the machine on which to run a command
206
    :param addr: find the connected subnet containing this address
207
    :return: CIDR containing the address if found, else, None
208
    """
209
    ip_cmd = ' '.join(['ip', 'route', 'show', 'to', 'match', addr])
210
    routes = ssh(client, machine, ip_cmd)
211
212
    for route in re.findall(r'^(\S+).*[\d\.]+/\d+', routes, re.MULTILINE):
213
        if route != 'default':
214
            return route
215
216
    raise ValueError("Unable to find route to %r" % addr)
217
218
219
def assess_network_traffic(client, targets):
220
    """Test that all containers in target can talk to target[0]
221
    :param client: Juju client
222
    :param targets: machine IDs of machines to test
223
    :return: None;
224
    """
1464.1.8 by Curtis Hovey
Changed per review and fix error test.
225
    status = client.wait_for_started().status
1464.1.1 by Curtis Hovey
Add clear information about the progress of the container networking test. Fix reboot for xenial and trusty.
226
    log.info('Assessing network traffic.')
1091.5.1 by James Tunnicliffe
Test communication between containers and the wider network.
227
    source = targets[0]
228
    dests = targets[1:]
229
230
    with tempfile.NamedTemporaryFile(delete=False) as f:
231
        f.write('tmux new-session -d -s test "nc -l 6778 > nc_listen.out"')
1357.1.1 by Curtis Hovey
Use --proxy when using ssh/scp and containers.
232
    client.juju('scp', ('--proxy', f.name, source + ':/home/ubuntu/listen.sh'))
1091.5.1 by James Tunnicliffe
Test communication between containers and the wider network.
233
    os.remove(f.name)
234
235
    # Containers are named 'x/type/y' where x is the host of the container. We
236
    host = source.split('/')[0]
237
    address = status['machines'][host]['containers'][source]['dns-name']
238
239
    for dest in dests:
1464.1.1 by Curtis Hovey
Add clear information about the progress of the container networking test. Fix reboot for xenial and trusty.
240
        log.info('Assessing network traffic for {}.'.format(dest))
1091.5.4 by James Tunnicliffe
SSH wrapper now trying even harder to retry on connection errors.
241
        msg = get_random_string()
1091.5.1 by James Tunnicliffe
Test communication between containers and the wider network.
242
        ssh(client, source, 'rm nc_listen.out; bash ./listen.sh')
1091.5.4 by James Tunnicliffe
SSH wrapper now trying even harder to retry on connection errors.
243
        ssh(client, dest,
244
            'echo "{msg}" | nc {addr} 6778'.format(msg=msg, addr=address))
1091.5.1 by James Tunnicliffe
Test communication between containers and the wider network.
245
        result = ssh(client, source, 'more nc_listen.out')
1091.5.4 by James Tunnicliffe
SSH wrapper now trying even harder to retry on connection errors.
246
        if result.rstrip() != msg:
1091.5.1 by James Tunnicliffe
Test communication between containers and the wider network.
247
            raise ValueError("Wrong or missing message: %r" % result.rstrip())
1464.1.1 by Curtis Hovey
Add clear information about the progress of the container networking test. Fix reboot for xenial and trusty.
248
        log.info('SUCCESS.')
1091.5.1 by James Tunnicliffe
Test communication between containers and the wider network.
249
250
1091.5.6 by James Tunnicliffe
A bit more reliability hacking around ssh and reboots.
251
def private_address(client, host):
252
    default_route = ssh(client, host, 'ip -4 -o route list 0/0')
1528.1.1 by Leo Zhang
Added log info for function private_address
253
    log.info("Default route from {}: {}".format(host, default_route))
1528.1.3 by Leo Zhang
Fixed regex statement in function private_address
254
    route_match = re.search(r'([\w-]+)\s*$', default_route)
1528.1.2 by Leo Zhang
Added exception cases for function private_address
255
    if route_match is None:
256
        raise JujuAssertionError(
257
            "Failed to find device in {}".format(default_route))
258
    device = route_match.group(1)
1528.1.1 by Leo Zhang
Added log info for function private_address
259
    log.info("Fetching the device IP of {}".format(device))
1091.5.6 by James Tunnicliffe
A bit more reliability hacking around ssh and reboots.
260
    device_ip = ssh(client, host, 'ip -4 -o addr show {}'.format(device))
1528.1.4 by Leo Zhang
Cleaned up
261
    log.info("Device IP for {}: {}".format(host, device_ip))
1528.1.2 by Leo Zhang
Added exception cases for function private_address
262
    ip_match = re.search(r'inet\s+(\S+)/\d+\s', device_ip)
263
    if ip_match is None:
264
        raise JujuAssertionError(
265
            "Failed to find ip for device: {}".format(device))
266
    return ip_match.group(1)
1091.5.6 by James Tunnicliffe
A bit more reliability hacking around ssh and reboots.
267
268
1091.5.1 by James Tunnicliffe
Test communication between containers and the wider network.
269
def assess_address_range(client, targets):
270
    """Test that two containers are in the same subnet as their host
271
    :param client: Juju client
272
    :param targets: machine IDs of machines to test
1091.5.4 by James Tunnicliffe
SSH wrapper now trying even harder to retry on connection errors.
273
    :return: None; raises ValueError on failure
1091.5.1 by James Tunnicliffe
Test communication between containers and the wider network.
274
    """
1464.1.1 by Curtis Hovey
Add clear information about the progress of the container networking test. Fix reboot for xenial and trusty.
275
    log.info('Assessing address range.')
1091.5.1 by James Tunnicliffe
Test communication between containers and the wider network.
276
    status = client.wait_for_started().status
277
1091.5.6 by James Tunnicliffe
A bit more reliability hacking around ssh and reboots.
278
    host_subnet_cache = {}
1091.5.1 by James Tunnicliffe
Test communication between containers and the wider network.
279
280
    for target in targets:
1464.1.1 by Curtis Hovey
Add clear information about the progress of the container networking test. Fix reboot for xenial and trusty.
281
        log.info('Assessing address range for {}.'.format(target))
1091.5.6 by James Tunnicliffe
A bit more reliability hacking around ssh and reboots.
282
        host = target.split('/')[0]
283
284
        if host in host_subnet_cache:
285
            host_subnet = host_subnet_cache[host]
286
        else:
287
            host_address = private_address(client, host)
288
            host_subnet = find_network(client, host, host_address)
289
            host_subnet_cache[host] = host_subnet
290
291
        addr = status['machines'][host]['containers'][target]['dns-name']
1091.5.1 by James Tunnicliffe
Test communication between containers and the wider network.
292
        subnet = find_network(client, host, addr)
1091.5.4 by James Tunnicliffe
SSH wrapper now trying even harder to retry on connection errors.
293
        if host_subnet != subnet:
294
            raise ValueError(
295
                '{} ({}) not on the same subnet as {} ({})'.format(
296
                    target, subnet, host, host_subnet))
1464.1.1 by Curtis Hovey
Add clear information about the progress of the container networking test. Fix reboot for xenial and trusty.
297
        log.info('SUCCESS.')
1091.5.1 by James Tunnicliffe
Test communication between containers and the wider network.
298
299
300
def assess_internet_connection(client, targets):
1091.5.6 by James Tunnicliffe
A bit more reliability hacking around ssh and reboots.
301
    """Test that targets can ping their default route
1091.5.1 by James Tunnicliffe
Test communication between containers and the wider network.
302
    :param client: Juju client
303
    :param targets: machine IDs of machines to test
1091.5.4 by James Tunnicliffe
SSH wrapper now trying even harder to retry on connection errors.
304
    :return: None; raises ValueError on failure
1091.5.1 by James Tunnicliffe
Test communication between containers and the wider network.
305
    """
1464.1.1 by Curtis Hovey
Add clear information about the progress of the container networking test. Fix reboot for xenial and trusty.
306
    log.info('Assessing internet connection.')
1091.5.1 by James Tunnicliffe
Test communication between containers and the wider network.
307
    for target in targets:
1464.1.1 by Curtis Hovey
Add clear information about the progress of the container networking test. Fix reboot for xenial and trusty.
308
        log.info("Assessing internet connection for {}".format(target))
1091.5.1 by James Tunnicliffe
Test communication between containers and the wider network.
309
        routes = ssh(client, target, 'ip route show')
310
311
        d = re.search(r'^default\s+via\s+([\d\.]+)\s+', routes, re.MULTILINE)
312
        if d:
1357.1.1 by Curtis Hovey
Use --proxy when using ssh/scp and containers.
313
            rc = client.juju('ssh', ('--proxy', target,
1477.2.12 by Leo Zhang
Fake merge of trunk
314
                                     'ping -c1 -q ' + d.group(1)), check=False)
1091.5.1 by James Tunnicliffe
Test communication between containers and the wider network.
315
            if rc != 0:
316
                raise ValueError('%s unable to ping default route' % target)
317
        else:
318
            raise ValueError("Default route not found")
1464.1.1 by Curtis Hovey
Add clear information about the progress of the container networking test. Fix reboot for xenial and trusty.
319
        log.info("SUCCESS")
1091.5.1 by James Tunnicliffe
Test communication between containers and the wider network.
320
321
322
def _assessment_iteration(client, containers):
323
    """Run the network tests on this collection of machines and containers
324
    :param client: Juju client
325
    :param hosts: list of hosts of containers
326
    :param containers: list of containers to run tests between
327
    :return: None
328
    """
329
    assess_internet_connection(client, containers)
330
    assess_address_range(client, containers)
331
    assess_network_traffic(client, containers)
332
333
1477.2.9 by Leo Zhang
clean-up finish
334
def _assess_container_networking(client, types, hosts, containers):
1091.5.1 by James Tunnicliffe
Test communication between containers and the wider network.
335
    """Run _assessment_iteration on all useful combinations of containers
336
    :param client: Juju client
337
    :param args: Parsed command line arguments
338
    :return: None
339
    """
340
    for container_type in types:
341
        # Test with two containers on the same host
1091.5.2 by James Tunnicliffe
retry ssh commands if they fail with code 255, which seems to happen on heavily loaded machines.
342
        _assessment_iteration(client, containers[container_type][hosts[0]])
1091.5.1 by James Tunnicliffe
Test communication between containers and the wider network.
343
344
        # Now test with two containers on two different hosts
1091.5.2 by James Tunnicliffe
retry ssh commands if they fail with code 255, which seems to happen on heavily loaded machines.
345
        test_containers = [
346
            containers[container_type][hosts[0]][0],
347
            containers[container_type][hosts[1]][0],
1570.1.11 by Leo Zhang
Fixed reboot
348
        ]
1091.5.2 by James Tunnicliffe
retry ssh commands if they fail with code 255, which seems to happen on heavily loaded machines.
349
        _assessment_iteration(client, test_containers)
1091.5.1 by James Tunnicliffe
Test communication between containers and the wider network.
350
1091.5.2 by James Tunnicliffe
retry ssh commands if they fail with code 255, which seems to happen on heavily loaded machines.
351
    if KVM_MACHINE in types and LXC_MACHINE in types:
352
        test_containers = [
353
            containers[LXC_MACHINE][hosts[0]][0],
354
            containers[KVM_MACHINE][hosts[0]][0],
1570.1.11 by Leo Zhang
Fixed reboot
355
        ]
1091.5.2 by James Tunnicliffe
retry ssh commands if they fail with code 255, which seems to happen on heavily loaded machines.
356
        _assessment_iteration(client, test_containers)
1091.5.1 by James Tunnicliffe
Test communication between containers and the wider network.
357
358
        # Test with an LXC and a KVM on different machines
1091.5.2 by James Tunnicliffe
retry ssh commands if they fail with code 255, which seems to happen on heavily loaded machines.
359
        test_containers = [
360
            containers[LXC_MACHINE][hosts[0]][0],
361
            containers[KVM_MACHINE][hosts[1]][0],
1570.1.11 by Leo Zhang
Fixed reboot
362
        ]
1091.5.2 by James Tunnicliffe
retry ssh commands if they fail with code 255, which seems to happen on heavily loaded machines.
363
        _assessment_iteration(client, test_containers)
1091.5.1 by James Tunnicliffe
Test communication between containers and the wider network.
364
365
1570.1.9 by Curtis Hovey
Check the uptime before waiting.
366
def get_uptime(client, host):
367
    uptime_pattern = re.compile(r'.*(\d+)')
368
    uptime_output = ssh(client, host, 'uptime -p')
369
    log.info('uptime -p: {}'.format(uptime_output))
1570.1.10 by Curtis Hovey
uptime_pattern.match()
370
    match = uptime_pattern.match(uptime_output)
1570.1.9 by Curtis Hovey
Check the uptime before waiting.
371
    if match:
372
        return int(match.group(1))
373
    else:
374
        return 0
375
376
1319.1.1 by Martin Packman
Allow testing of lxd with assess_container_networking
377
def assess_container_networking(client, types):
1091.5.1 by James Tunnicliffe
Test communication between containers and the wider network.
378
    """Runs _assess_address_allocation, reboots hosts, repeat.
1570.1.9 by Curtis Hovey
Check the uptime before waiting.
379
1091.5.1 by James Tunnicliffe
Test communication between containers and the wider network.
380
    :param client: Juju client
1319.1.1 by Martin Packman
Allow testing of lxd with assess_container_networking
381
    :param types: Container types to test
1091.5.1 by James Tunnicliffe
Test communication between containers and the wider network.
382
    :return: None
383
    """
1464.1.1 by Curtis Hovey
Add clear information about the progress of the container networking test. Fix reboot for xenial and trusty.
384
    log.info("Setting up test.")
1091.5.2 by James Tunnicliffe
retry ssh commands if they fail with code 255, which seems to happen on heavily loaded machines.
385
    hosts, containers = make_machines(client, types)
1091.5.13 by James Tunnicliffe
Fixed up tests.
386
    status = client.wait_for_started().status
1464.1.1 by Curtis Hovey
Add clear information about the progress of the container networking test. Fix reboot for xenial and trusty.
387
    log.info("Setup complete.")
388
    log.info("Test started.")
1091.5.13 by James Tunnicliffe
Fixed up tests.
389
1091.5.2 by James Tunnicliffe
retry ssh commands if they fail with code 255, which seems to happen on heavily loaded machines.
390
    _assess_container_networking(client, types, hosts, containers)
1091.5.13 by James Tunnicliffe
Fixed up tests.
391
1570.1.2 by Curtis Hovey
Resstart the hosted model, then the controller model machines. Check the uptime.
392
    # Reboot all hosted modelled machines then the controller.
1464.1.1 by Curtis Hovey
Add clear information about the progress of the container networking test. Fix reboot for xenial and trusty.
393
    log.info("Instrumenting reboot of all machines.")
1562.1.1 by Leo Zhang
added 'now' argument in ssh
394
    try:
1570.1.2 by Curtis Hovey
Resstart the hosted model, then the controller model machines. Check the uptime.
395
        for host in hosts:
396
            log.info("Restarting hosted machine: {}".format(host))
1570.1.3 by Curtis Hovey
Fix command args.
397
            client.juju(
1570.1.5 by Curtis Hovey
Pass shutdown as a single arg.
398
                'run', ('--machine', host, 'sudo shutdown -r now'))
1570.1.8 by Curtis Hovey
Call show-action-status once.
399
        client.juju('show-action-status', ('--name', 'juju-run'))
1091.5.6 by James Tunnicliffe
A bit more reliability hacking around ssh and reboots.
400
1570.1.2 by Curtis Hovey
Resstart the hosted model, then the controller model machines. Check the uptime.
401
        log.info("Restarting controller machine 0")
402
        controller_client = client.get_controller_client()
403
        controller_status = controller_client.get_status()
1570.1.9 by Curtis Hovey
Check the uptime before waiting.
404
        controller_host = controller_status.status['machines']['0']['dns-name']
405
        first_uptime = get_uptime(controller_client, '0')
1570.1.7 by Curtis Hovey
juju run cannot be used to restart the controller.
406
        ssh(controller_client, '0', 'sudo shutdown -r now')
1562.1.1 by Leo Zhang
added 'now' argument in ssh
407
    except subprocess.CalledProcessError as e:
408
        logging.info(
409
            "Error running shutdown:\nstdout: %s\nstderr: %s",
410
            e.output, getattr(e, 'stderr', None))
411
        raise
1091.5.16 by James Tunnicliffe
Fixed up reboot ordering and waits.
412
1570.1.9 by Curtis Hovey
Check the uptime before waiting.
413
    # Wait for the controller to shut down if it has not yet restarted.
414
    # This ensure the call to wait_for_started happens after each host
415
    # has restarted.
416
    second_uptime = get_uptime(controller_client, '0')
417
    if second_uptime > first_uptime:
418
        wait_for_port(controller_host, 22, closed=True, timeout=300)
1091.5.1 by James Tunnicliffe
Test communication between containers and the wider network.
419
    client.wait_for_started()
1091.5.6 by James Tunnicliffe
A bit more reliability hacking around ssh and reboots.
420
1091.5.13 by James Tunnicliffe
Fixed up tests.
421
    # Once Juju is up it can take a little while before ssh responds.
422
    for host in hosts:
423
        hostname = status['machines'][host]['dns-name']
424
        wait_for_port(hostname, 22, timeout=240)
1464.1.1 by Curtis Hovey
Add clear information about the progress of the container networking test. Fix reboot for xenial and trusty.
425
    log.info("Reboot complete and all hosts ready for retest.")
1091.5.13 by James Tunnicliffe
Fixed up tests.
426
1091.5.2 by James Tunnicliffe
retry ssh commands if they fail with code 255, which seems to happen on heavily loaded machines.
427
    _assess_container_networking(client, types, hosts, containers)
1464.1.1 by Curtis Hovey
Add clear information about the progress of the container networking test. Fix reboot for xenial and trusty.
428
    log.info("PASS")
1091.5.2 by James Tunnicliffe
retry ssh commands if they fail with code 255, which seems to happen on heavily loaded machines.
429
430
1226.1.2 by Martin Packman
Make dedicated _CleanedContext object for mutability
431
class _CleanedContext:
432
433
    def __init__(self, client):
434
        self.client = client
435
        self.return_code = None
436
437
1226.1.1 by Martin Packman
Fix assess_spaces_subnets by adapting to new BootstrapManager code from container networking test
438
@contextlib.contextmanager
439
def cleaned_bootstrap_context(bs_manager, args):
1226.1.2 by Martin Packman
Make dedicated _CleanedContext object for mutability
440
    ctx = _CleanedContext(bs_manager.client)
441
    client = ctx.client
1215.1.2 by Martin Packman
Fix clean-environment by making calling update_env early to switch to the temp env
442
    # TODO(gz): Having to manipulate client env state here to get the temp env
443
    #           is ugly, would ideally be captured in an explicit scope.
444
    update_env(client.env, bs_manager.temp_env_name, series=bs_manager.series,
445
               agent_url=bs_manager.agent_url,
446
               agent_stream=bs_manager.agent_stream, region=bs_manager.region)
447
    with bs_manager.top_context() as machines:
448
        bootstrap_required = True
449
        if args.clean_environment and clean_environment(client):
450
            bootstrap_required = False
451
        if bootstrap_required:
452
            with bs_manager.bootstrap_context(machines):
453
                client.bootstrap(args.upload_tools)
454
        with bs_manager.runtime_context(machines):
1226.1.1 by Martin Packman
Fix assess_spaces_subnets by adapting to new BootstrapManager code from container networking test
455
            yield ctx
1226.1.2 by Martin Packman
Make dedicated _CleanedContext object for mutability
456
        ctx.return_code = 0
1215.1.2 by Martin Packman
Fix clean-environment by making calling update_env early to switch to the temp env
457
        if args.clean_environment and not clean_environment(client):
1226.1.1 by Martin Packman
Fix assess_spaces_subnets by adapting to new BootstrapManager code from container networking test
458
            ctx.return_code = 1
459
460
1341.2.1 by Aaron Bentley
Move container support knowledge to jujupy.
461
def _get_container_types(client, machine_type):
1319.1.1 by Martin Packman
Allow testing of lxd with assess_container_networking
462
    """
463
    Give list of container types to run testing against.
464
465
    If a machine_type was explictly specified, only test against those kind
466
    of containers. Otherwise, test all possible containers for the given
467
    juju version.
468
    """
469
    if machine_type:
1341.2.1 by Aaron Bentley
Move container support knowledge to jujupy.
470
        if machine_type not in client.supported_container_types:
471
            raise Exception(
472
                "no {} support on juju {}".format(machine_type,
473
                                                  client.version))
1319.1.1 by Martin Packman
Allow testing of lxd with assess_container_networking
474
        return [machine_type]
475
    # TODO(gz): Only include LXC for 1.X clients
1341.2.1 by Aaron Bentley
Move container support knowledge to jujupy.
476
    types = list(client.supported_container_types)
477
    types.sort()
1319.1.1 by Martin Packman
Allow testing of lxd with assess_container_networking
478
    return types
479
480
1226.1.1 by Martin Packman
Fix assess_spaces_subnets by adapting to new BootstrapManager code from container networking test
481
def main(argv=None):
482
    args = parse_args(argv)
483
    configure_logging(args.verbose)
484
    bs_manager = BootstrapManager.from_args(args)
1319.1.1 by Martin Packman
Allow testing of lxd with assess_container_networking
485
    client = bs_manager.client
1341.2.1 by Aaron Bentley
Move container support knowledge to jujupy.
486
    machine_types = _get_container_types(client, args.machine_type)
1226.1.1 by Martin Packman
Fix assess_spaces_subnets by adapting to new BootstrapManager code from container networking test
487
    with cleaned_bootstrap_context(bs_manager, args) as ctx:
1319.1.1 by Martin Packman
Allow testing of lxd with assess_container_networking
488
        assess_container_networking(bs_manager.client, machine_types)
1226.1.1 by Martin Packman
Fix assess_spaces_subnets by adapting to new BootstrapManager code from container networking test
489
    return ctx.return_code
1091.5.4 by James Tunnicliffe
SSH wrapper now trying even harder to retry on connection errors.
490
1091.5.1 by James Tunnicliffe
Test communication between containers and the wider network.
491
492
if __name__ == '__main__':
1215.1.1 by Martin Packman
Switch assess_container_networking to using BootstrapManager
493
    sys.exit(main())