1
1
#!/usr/bin/env python
2
2
from __future__ import print_function
3
from argparse import ArgumentParser
15
from textwrap import dedent
18
from deploy_stack import (
30
add_basic_testing_arguments,
36
10
__metaclass__ = type
39
log = logging.getLogger("assess_container_networking")
42
def parse_args(argv=None):
43
"""Parse all arguments."""
45
description = dedent("""\
46
Test container address allocation.
47
For LXC and KVM, create machines of each type and test the network
48
between LXC <--> LXC, KVM <--> KVM and LXC <--> KVM. Also test machine
49
to outside world, DNS and that these tests still pass after a reboot. In
50
case of failure pull logs and configuration files from the machine that
51
we detected a problem on for later analysis.
53
parser = add_basic_testing_arguments(ArgumentParser(
54
description=description
58
help='Which virtual machine/container type to test. Defaults to all.',
59
choices=[KVM_MACHINE, LXC_MACHINE, LXD_MACHINE])
61
'--clean-environment', action='store_true', help=dedent("""\
62
Attempts to re-use an existing environment rather than destroying it
63
and creating a new one.
65
On launch, if an environment exists, clean out services and machines
66
from it rather than destroying it. If an environment doesn't exist,
67
create one and use it.
69
At termination, clean out services and machines from the environment
70
rather than destroying it."""))
71
args = parser.parse_args(argv)
72
# Passing --clean-environment implies --keep-env
73
if args.clean_environment:
78
def ssh(client, machine, cmd):
79
"""Convenience function: run a juju ssh command and get back the output
80
:param client: A Juju client
81
:param machine: ID of the machine on which to run a command
82
:param cmd: the command to run
83
:return: text output of the command
87
for attempt in range(attempts):
89
return client.get_juju_output('ssh', '--proxy', machine, cmd)
90
except subprocess.CalledProcessError as e:
91
# If the connection to the host failed, try again in a couple of
92
# seconds. This is usually due to heavy load.
93
if(attempt < attempts - 1 and
94
re.search('ssh_exchange_identification: '
95
'Connection closed by remote host', e.stderr)):
102
17
def clean_environment(client, services_only=False):
192
102
containers[type][host].append(c[0])
194
104
return hosts, containers
197
def find_network(client, machine, addr):
198
"""Find a connected subnet containing the given address.
200
When using this to find the subnet of a container, don't use the container
201
as the machine to run the ip route show command on ("machine"), use a real
202
box because lxc will just send everything to its host machine, so it is on
203
a subnet containing itself. Not much use.
204
:param client: A Juju client
205
:param machine: ID of the machine on which to run a command
206
:param addr: find the connected subnet containing this address
207
:return: CIDR containing the address if found, else, None
209
ip_cmd = ' '.join(['ip', 'route', 'show', 'to', 'match', addr])
210
routes = ssh(client, machine, ip_cmd)
212
for route in re.findall(r'^(\S+).*[\d\.]+/\d+', routes, re.MULTILINE):
213
if route != 'default':
216
raise ValueError("Unable to find route to %r" % addr)
219
def assess_network_traffic(client, targets):
220
"""Test that all containers in target can talk to target[0]
221
:param client: Juju client
222
:param targets: machine IDs of machines to test
225
status = client.wait_for_started().status
226
log.info('Assessing network traffic.')
230
with tempfile.NamedTemporaryFile(delete=False) as f:
231
f.write('tmux new-session -d -s test "nc -l 6778 > nc_listen.out"')
232
client.juju('scp', ('--proxy', f.name, source + ':/home/ubuntu/listen.sh'))
235
# Containers are named 'x/type/y' where x is the host of the container. We
236
host = source.split('/')[0]
237
address = status['machines'][host]['containers'][source]['dns-name']
240
log.info('Assessing network traffic for {}.'.format(dest))
241
msg = get_random_string()
242
ssh(client, source, 'rm nc_listen.out; bash ./listen.sh')
244
'echo "{msg}" | nc {addr} 6778'.format(msg=msg, addr=address))
245
result = ssh(client, source, 'more nc_listen.out')
246
if result.rstrip() != msg:
247
raise ValueError("Wrong or missing message: %r" % result.rstrip())
251
def private_address(client, host):
252
default_route = ssh(client, host, 'ip -4 -o route list 0/0')
253
log.info("Default route from {}: {}".format(host, default_route))
254
route_match = re.search(r'([\w-]+)\s*$', default_route)
255
if route_match is None:
256
raise JujuAssertionError(
257
"Failed to find device in {}".format(default_route))
258
device = route_match.group(1)
259
log.info("Fetching the device IP of {}".format(device))
260
device_ip = ssh(client, host, 'ip -4 -o addr show {}'.format(device))
261
log.info("Device IP for {}: {}".format(host, device_ip))
262
ip_match = re.search(r'inet\s+(\S+)/\d+\s', device_ip)
264
raise JujuAssertionError(
265
"Failed to find ip for device: {}".format(device))
266
return ip_match.group(1)
269
def assess_address_range(client, targets):
270
"""Test that two containers are in the same subnet as their host
271
:param client: Juju client
272
:param targets: machine IDs of machines to test
273
:return: None; raises ValueError on failure
275
log.info('Assessing address range.')
276
status = client.wait_for_started().status
278
host_subnet_cache = {}
280
for target in targets:
281
log.info('Assessing address range for {}.'.format(target))
282
host = target.split('/')[0]
284
if host in host_subnet_cache:
285
host_subnet = host_subnet_cache[host]
287
host_address = private_address(client, host)
288
host_subnet = find_network(client, host, host_address)
289
host_subnet_cache[host] = host_subnet
291
addr = status['machines'][host]['containers'][target]['dns-name']
292
subnet = find_network(client, host, addr)
293
if host_subnet != subnet:
295
'{} ({}) not on the same subnet as {} ({})'.format(
296
target, subnet, host, host_subnet))
300
def assess_internet_connection(client, targets):
301
"""Test that targets can ping their default route
302
:param client: Juju client
303
:param targets: machine IDs of machines to test
304
:return: None; raises ValueError on failure
306
log.info('Assessing internet connection.')
307
for target in targets:
308
log.info("Assessing internet connection for {}".format(target))
309
routes = ssh(client, target, 'ip route show')
311
d = re.search(r'^default\s+via\s+([\d\.]+)\s+', routes, re.MULTILINE)
313
rc = client.juju('ssh', ('--proxy', target,
314
'ping -c1 -q ' + d.group(1)), check=False)
316
raise ValueError('%s unable to ping default route' % target)
318
raise ValueError("Default route not found")
322
def _assessment_iteration(client, containers):
323
"""Run the network tests on this collection of machines and containers
324
:param client: Juju client
325
:param hosts: list of hosts of containers
326
:param containers: list of containers to run tests between
329
assess_internet_connection(client, containers)
330
assess_address_range(client, containers)
331
assess_network_traffic(client, containers)
334
def _assess_container_networking(client, types, hosts, containers):
335
"""Run _assessment_iteration on all useful combinations of containers
336
:param client: Juju client
337
:param args: Parsed command line arguments
340
for container_type in types:
341
# Test with two containers on the same host
342
_assessment_iteration(client, containers[container_type][hosts[0]])
344
# Now test with two containers on two different hosts
346
containers[container_type][hosts[0]][0],
347
containers[container_type][hosts[1]][0],
349
_assessment_iteration(client, test_containers)
351
if KVM_MACHINE in types and LXC_MACHINE in types:
353
containers[LXC_MACHINE][hosts[0]][0],
354
containers[KVM_MACHINE][hosts[0]][0],
356
_assessment_iteration(client, test_containers)
358
# Test with an LXC and a KVM on different machines
360
containers[LXC_MACHINE][hosts[0]][0],
361
containers[KVM_MACHINE][hosts[1]][0],
363
_assessment_iteration(client, test_containers)
366
def get_uptime(client, host):
367
uptime_pattern = re.compile(r'.*(\d+)')
368
uptime_output = ssh(client, host, 'uptime -p')
369
log.info('uptime -p: {}'.format(uptime_output))
370
match = uptime_pattern.match(uptime_output)
372
return int(match.group(1))
377
def assess_container_networking(client, types):
378
"""Runs _assess_address_allocation, reboots hosts, repeat.
380
:param client: Juju client
381
:param types: Container types to test
384
log.info("Setting up test.")
385
hosts, containers = make_machines(client, types)
386
status = client.wait_for_started().status
387
log.info("Setup complete.")
388
log.info("Test started.")
390
_assess_container_networking(client, types, hosts, containers)
392
# Reboot all hosted modelled machines then the controller.
393
log.info("Instrumenting reboot of all machines.")
396
log.info("Restarting hosted machine: {}".format(host))
398
'run', ('--machine', host, 'sudo shutdown -r now'))
399
client.juju('show-action-status', ('--name', 'juju-run'))
401
log.info("Restarting controller machine 0")
402
controller_client = client.get_controller_client()
403
controller_status = controller_client.get_status()
404
controller_host = controller_status.status['machines']['0']['dns-name']
405
first_uptime = get_uptime(controller_client, '0')
406
ssh(controller_client, '0', 'sudo shutdown -r now')
407
except subprocess.CalledProcessError as e:
409
"Error running shutdown:\nstdout: %s\nstderr: %s",
410
e.output, getattr(e, 'stderr', None))
413
# Wait for the controller to shut down if it has not yet restarted.
414
# This ensure the call to wait_for_started happens after each host
416
second_uptime = get_uptime(controller_client, '0')
417
if second_uptime > first_uptime:
418
wait_for_port(controller_host, 22, closed=True, timeout=300)
419
client.wait_for_started()
421
# Once Juju is up it can take a little while before ssh responds.
423
hostname = status['machines'][host]['dns-name']
424
wait_for_port(hostname, 22, timeout=240)
425
log.info("Reboot complete and all hosts ready for retest.")
427
_assess_container_networking(client, types, hosts, containers)
431
class _CleanedContext:
433
def __init__(self, client):
435
self.return_code = None
438
@contextlib.contextmanager
439
def cleaned_bootstrap_context(bs_manager, args):
440
ctx = _CleanedContext(bs_manager.client)
442
# TODO(gz): Having to manipulate client env state here to get the temp env
443
# is ugly, would ideally be captured in an explicit scope.
444
update_env(client.env, bs_manager.temp_env_name, series=bs_manager.series,
445
agent_url=bs_manager.agent_url,
446
agent_stream=bs_manager.agent_stream, region=bs_manager.region)
447
with bs_manager.top_context() as machines:
448
bootstrap_required = True
449
if args.clean_environment and clean_environment(client):
450
bootstrap_required = False
451
if bootstrap_required:
452
with bs_manager.bootstrap_context(machines):
453
client.bootstrap(args.upload_tools)
454
with bs_manager.runtime_context(machines):
457
if args.clean_environment and not clean_environment(client):
461
def _get_container_types(client, machine_type):
463
Give list of container types to run testing against.
465
If a machine_type was explictly specified, only test against those kind
466
of containers. Otherwise, test all possible containers for the given
470
if machine_type not in client.supported_container_types:
472
"no {} support on juju {}".format(machine_type,
474
return [machine_type]
475
# TODO(gz): Only include LXC for 1.X clients
476
types = list(client.supported_container_types)
482
args = parse_args(argv)
483
configure_logging(args.verbose)
484
bs_manager = BootstrapManager.from_args(args)
485
client = bs_manager.client
486
machine_types = _get_container_types(client, args.machine_type)
487
with cleaned_bootstrap_context(bs_manager, args) as ctx:
488
assess_container_networking(bs_manager.client, machine_types)
489
return ctx.return_code
492
if __name__ == '__main__':