~andrewjbeach/juju-ci-tools/make-local-patcher

296 by Curtis Hovey
Added first draft of the backup_restore_juju.py test. It doesn't
1
#!/usr/bin/env python
2
# Backup and restore a stack.
3
4
from __future__ import print_function
5
6
__metaclass__ = type
7
8
from argparse import ArgumentParser
742.1.1 by Aaron Bentley
Show traceback in exception handling.
9
import logging
296 by Curtis Hovey
Added first draft of the backup_restore_juju.py test. It doesn't
10
import os
11
import re
12
import subprocess
13
import sys
14
451 by Aaron Bentley
Handle logging and cleanup in python.
15
from deploy_stack import (
611.1.6 by Menno Smits
Call dump_env_logs when test_recovery.py fails
16
    dump_env_logs,
451 by Aaron Bentley
Handle logging and cleanup in python.
17
    get_machine_dns_name,
717.2.2 by Aaron Bentley
Checkpoint with assess_recovery working.
18
    wait_for_state_server_to_shutdown,
451 by Aaron Bentley
Handle logging and cleanup in python.
19
)
717.2.2 by Aaron Bentley
Checkpoint with assess_recovery working.
20
from jujuconfig import (
796.3.5 by Curtis Hovey
Ensure that jenv files are removed before bootstrapping.
21
    get_jenv_path,
717.2.2 by Aaron Bentley
Checkpoint with assess_recovery working.
22
    get_juju_home,
953.3.9 by Nate Finch
more code review changes
23
)
296 by Curtis Hovey
Added first draft of the backup_restore_juju.py test. It doesn't
24
from jujupy import (
717.2.2 by Aaron Bentley
Checkpoint with assess_recovery working.
25
    temp_bootstrap_env,
296 by Curtis Hovey
Added first draft of the backup_restore_juju.py test. It doesn't
26
    until_timeout,
953.3.7 by Nate Finch
update for code review comments
27
    make_client,
953.3.8 by Nate Finch
more review changes
28
    parse_new_state_server_from_error,
296 by Curtis Hovey
Added first draft of the backup_restore_juju.py test. It doesn't
29
)
717.2.2 by Aaron Bentley
Checkpoint with assess_recovery working.
30
from substrate import (
31
    terminate_instances,
953.3.9 by Nate Finch
more code review changes
32
)
379.1.1 by Aaron Bentley
Move portions of deploy job to Python.
33
from utility import (
796.3.5 by Curtis Hovey
Ensure that jenv files are removed before bootstrapping.
34
    ensure_deleted,
379.1.1 by Aaron Bentley
Move portions of deploy job to Python.
35
    print_now,
36
)
296 by Curtis Hovey
Added first draft of the backup_restore_juju.py test. It doesn't
37
38
39
running_instance_pattern = re.compile('\["([^"]+)"\]')
40
41
953.3.9 by Nate Finch
more code review changes
42
def setup_juju_path(juju_path):
43
    """Ensure the binaries and scripts under test are found first."""
44
    full_path = os.path.abspath(juju_path)
45
    if not os.path.isdir(full_path):
46
        raise ValueError("The juju_path does not exist: %s" % full_path)
47
    os.environ['PATH'] = '%s:%s' % (full_path, os.environ['PATH'])
48
    sys.path.insert(0, full_path)
49
50
717.2.1 by Aaron Bentley
Extract EnvJujuClient.backup from assess_recovery.
51
def deploy_stack(client, charm_prefix):
296 by Curtis Hovey
Added first draft of the backup_restore_juju.py test. It doesn't
52
    """"Deploy a simple stack, state-server and ubuntu."""
53
    if charm_prefix and not charm_prefix.endswith('/'):
54
        charm_prefix = charm_prefix + '/'
717.2.1 by Aaron Bentley
Extract EnvJujuClient.backup from assess_recovery.
55
    agent_version = client.get_matching_agent_version()
56
    instance_id = client.get_status().status['machines']['0']['instance-id']
296 by Curtis Hovey
Added first draft of the backup_restore_juju.py test. It doesn't
57
    for ignored in until_timeout(30):
717.2.1 by Aaron Bentley
Extract EnvJujuClient.backup from assess_recovery.
58
        agent_versions = client.get_status().get_agent_versions()
296 by Curtis Hovey
Added first draft of the backup_restore_juju.py test. It doesn't
59
        if 'unknown' not in agent_versions and len(agent_versions) == 1:
60
            break
61
    if agent_versions.keys() != [agent_version]:
375 by Curtis Hovey
Show what is happening now.
62
        print_now("Current versions: %s" % ', '.join(agent_versions.keys()))
717.2.1 by Aaron Bentley
Extract EnvJujuClient.backup from assess_recovery.
63
        client.juju('upgrade-juju', ('--version', agent_version))
64
    client.wait_for_version(client.get_matching_agent_version())
65
    client.juju('deploy', (charm_prefix + 'ubuntu',))
66
    client.wait_for_started().status
67
    print_now("%s is ready to testing" % client.env.environment)
372.1.4 by Aaron Bentley
Add HA test.
68
    return instance_id
296 by Curtis Hovey
Added first draft of the backup_restore_juju.py test. It doesn't
69
70
717.2.1 by Aaron Bentley
Extract EnvJujuClient.backup from assess_recovery.
71
def restore_present_state_server(client, backup_file):
72
    """juju-restore won't restore when the state-server is still present."""
296 by Curtis Hovey
Added first draft of the backup_restore_juju.py test. It doesn't
73
    environ = dict(os.environ)
74
    proc = subprocess.Popen(
717.2.1 by Aaron Bentley
Extract EnvJujuClient.backup from assess_recovery.
75
        ['juju', '--show-log', 'restore', '-e', client.env.environment,
76
         backup_file],
611.2.1 by Curtis Hovey
Added draft to locate the the ip address of the new server.
77
        env=environ, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
296 by Curtis Hovey
Added first draft of the backup_restore_juju.py test. It doesn't
78
    output, err = proc.communicate()
79
    if proc.returncode == 0:
80
        raise Exception(
81
            "juju-restore restored to an operational state-server: %s" % err)
82
    else:
375 by Curtis Hovey
Show what is happening now.
83
        print_now(
296 by Curtis Hovey
Added first draft of the backup_restore_juju.py test. It doesn't
84
            "juju-restore correctly refused to restore "
85
            "because the state-server was still up.")
86
        match = running_instance_pattern.search(err)
87
        if match is None:
684 by Curtis Hovey
Do not raise an exception for a warning. just warn.
88
            print_now("WARNING: Could not find the instance_id in output:")
89
            print_now(err)
90
            print_now("")
91
            return None
296 by Curtis Hovey
Added first draft of the backup_restore_juju.py test. It doesn't
92
        instance_id = match.group(1)
93
    return instance_id
94
95
717.2.1 by Aaron Bentley
Extract EnvJujuClient.backup from assess_recovery.
96
def delete_instance(client, instance_id):
296 by Curtis Hovey
Added first draft of the backup_restore_juju.py test. It doesn't
97
    """Delete the instance using the providers tools."""
375 by Curtis Hovey
Show what is happening now.
98
    print_now("Instrumenting a bootstrap node failure.")
717.2.2 by Aaron Bentley
Checkpoint with assess_recovery working.
99
    return terminate_instances(client.env, [instance_id])
296 by Curtis Hovey
Added first draft of the backup_restore_juju.py test. It doesn't
100
571 by Curtis Hovey
When deletingin instances in openstack, poll nova to be certain the instance is gone.
101
717.2.1 by Aaron Bentley
Extract EnvJujuClient.backup from assess_recovery.
102
def delete_extra_state_servers(client, instance_id):
481.1.3 by Curtis Hovey
Delete the extra state-servers before deleting the master
103
    """Delete the extra state-server instances."""
717.2.1 by Aaron Bentley
Extract EnvJujuClient.backup from assess_recovery.
104
    status = client.get_status()
481.1.3 by Curtis Hovey
Delete the extra state-servers before deleting the master
105
    for machine, info in status.iter_machines():
106
        extra_instance_id = info.get('instance-id')
107
        status = info.get('state-server-member-status')
108
        if extra_instance_id != instance_id and status is not None:
481.1.5 by Curtis Hovey
Fix typo.
109
            print_now("Deleting state-server-member {}".format(machine))
717.2.1 by Aaron Bentley
Extract EnvJujuClient.backup from assess_recovery.
110
            host = get_machine_dns_name(client, machine)
111
            delete_instance(client, extra_instance_id)
112
            wait_for_state_server_to_shutdown(host, client, extra_instance_id)
113
114
115
def restore_missing_state_server(client, backup_file):
296 by Curtis Hovey
Added first draft of the backup_restore_juju.py test. It doesn't
116
    """juju-restore creates a replacement state-server for the services."""
117
    environ = dict(os.environ)
481.1.6 by Curtis Hovey
Added message to make it clear when restore starts.
118
    print_now("Starting restore.")
301 by Curtis Hovey
Improve output. Jenkins supresses the output of the
119
    proc = subprocess.Popen(
717.2.1 by Aaron Bentley
Extract EnvJujuClient.backup from assess_recovery.
120
        ['juju', '--show-log', 'restore', '-e', client.env.environment,
573 by Curtis Hovey
Added support for new Hp/Havana.
121
         '--constraints', 'mem=2G', backup_file],
341 by Curtis Hovey
Use -e with juju-restore because it is the common way to
122
        env=environ, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
301 by Curtis Hovey
Improve output. Jenkins supresses the output of the
123
    output, err = proc.communicate()
124
    if proc.returncode != 0:
685 by Curtis Hovey
Print the error immediately so that the order of events is clear.
125
        print_now('Call of juju restore exited with an error\n')
126
        message = 'Restore failed: \n%s' % err
127
        print_now(message)
128
        print_now('\n')
129
        raise Exception(message)
375 by Curtis Hovey
Show what is happening now.
130
    print_now(output)
717.2.2 by Aaron Bentley
Checkpoint with assess_recovery working.
131
    client.wait_for_started(600).status
717.2.1 by Aaron Bentley
Extract EnvJujuClient.backup from assess_recovery.
132
    print_now("%s restored" % client.env.environment)
375 by Curtis Hovey
Show what is happening now.
133
    print_now("PASS")
296 by Curtis Hovey
Added first draft of the backup_restore_juju.py test. It doesn't
134
135
817.1.3 by Aaron Bentley
Add --debug to assess_recovery.
136
def parse_args(argv=None):
372.1.6 by Aaron Bentley
Tweakage and import fixing.
137
    parser = ArgumentParser('Test recovery strategies.')
296 by Curtis Hovey
Added first draft of the backup_restore_juju.py test. It doesn't
138
    parser.add_argument(
139
        '--charm-prefix', help='A prefix for charm urls.', default='')
817.1.3 by Aaron Bentley
Add --debug to assess_recovery.
140
    parser.add_argument(
141
        '--debug', action='store_true', default=False,
142
        help='Use --debug juju logging.')
372.1.6 by Aaron Bentley
Tweakage and import fixing.
143
    strategy = parser.add_argument_group('test strategy')
144
    strategy.add_argument(
145
        '--ha', action='store_const', dest='strategy', const='ha',
146
        default='backup', help="Test HA.")
147
    strategy.add_argument(
148
        '--backup', action='store_const', dest='strategy', const='backup',
149
        help="Test backup/restore.")
481.1.1 by Curtis Hovey
Add support for ha-backup. This scenario starts with HA and
150
    strategy.add_argument(
684 by Curtis Hovey
Do not raise an exception for a warning. just warn.
151
        '--ha-backup', action='store_const', dest='strategy',
152
        const='ha-backup', help="Test backup/restore of HA.")
296 by Curtis Hovey
Added first draft of the backup_restore_juju.py test. It doesn't
153
    parser.add_argument('juju_path')
154
    parser.add_argument('env_name')
796.3.6 by Curtis Hovey
assess_recovery.py requires a log_dir arg. The callee must create it first.
155
    parser.add_argument('logs', help='Directory to store logs in.')
834.1.2 by Aaron Bentley
Allow optional temp_env_name to control the environment name used.
156
    parser.add_argument(
157
        'temp_env_name', nargs='?',
158
        help='Temporary environment name to use for this test.')
817.1.3 by Aaron Bentley
Add --debug to assess_recovery.
159
    return parser.parse_args(argv)
160
161
990.1.5 by Curtis Hovey
Added main test for assess_recovery.py ha.
162
def main(argv):
163
    args = parse_args(argv)
722.1.1 by Aaron Bentley
Add logs parameter to assess_recovery to eliminate Jenkins environ vars
164
    log_dir = args.logs
296 by Curtis Hovey
Added first draft of the backup_restore_juju.py test. It doesn't
165
    try:
953.3.9 by Nate Finch
more code review changes
166
        setup_juju_path(args.juju_path)
834.1.2 by Aaron Bentley
Allow optional temp_env_name to control the environment name used.
167
        client = make_client(args.juju_path, args.debug, args.env_name,
168
                             args.temp_env_name)
796.3.5 by Curtis Hovey
Ensure that jenv files are removed before bootstrapping.
169
        juju_home = get_juju_home()
834.1.2 by Aaron Bentley
Allow optional temp_env_name to control the environment name used.
170
        ensure_deleted(get_jenv_path(juju_home, client.env.environment))
796.3.5 by Curtis Hovey
Ensure that jenv files are removed before bootstrapping.
171
        with temp_bootstrap_env(juju_home, client):
717.2.2 by Aaron Bentley
Checkpoint with assess_recovery working.
172
            client.bootstrap()
717.2.1 by Aaron Bentley
Extract EnvJujuClient.backup from assess_recovery.
173
        bootstrap_host = get_machine_dns_name(client, 0)
451 by Aaron Bentley
Handle logging and cleanup in python.
174
        try:
717.2.1 by Aaron Bentley
Extract EnvJujuClient.backup from assess_recovery.
175
            instance_id = deploy_stack(client, args.charm_prefix)
481.1.2 by Curtis Hovey
Make it clear which strategy uses which setup and end.
176
            if args.strategy in ('ha', 'ha-backup'):
717.2.1 by Aaron Bentley
Extract EnvJujuClient.backup from assess_recovery.
177
                client.juju('ensure-availability', ('-n', '3'))
178
                client.wait_for_ha()
481.1.2 by Curtis Hovey
Make it clear which strategy uses which setup and end.
179
            if args.strategy in ('ha-backup', 'backup'):
717.2.1 by Aaron Bentley
Extract EnvJujuClient.backup from assess_recovery.
180
                backup_file = client.backup()
181
                restore_present_state_server(client, backup_file)
481.1.3 by Curtis Hovey
Delete the extra state-servers before deleting the master
182
            if args.strategy == 'ha-backup':
717.2.1 by Aaron Bentley
Extract EnvJujuClient.backup from assess_recovery.
183
                delete_extra_state_servers(client, instance_id)
184
            delete_instance(client, instance_id)
185
            wait_for_state_server_to_shutdown(bootstrap_host, client,
186
                                              instance_id)
611.2.3 by Curtis Hovey
Added parse_new_state_server_from_error to find an alternate address to get the logs from.
187
            bootstrap_host = None
451 by Aaron Bentley
Handle logging and cleanup in python.
188
            if args.strategy == 'ha':
717.2.1 by Aaron Bentley
Extract EnvJujuClient.backup from assess_recovery.
189
                client.get_status(600)
451 by Aaron Bentley
Handle logging and cleanup in python.
190
            else:
717.2.1 by Aaron Bentley
Extract EnvJujuClient.backup from assess_recovery.
191
                restore_missing_state_server(client, backup_file)
611.2.1 by Curtis Hovey
Added draft to locate the the ip address of the new server.
192
        except Exception as e:
611.2.2 by Curtis Hovey
Merged tip, resolved conflicts.
193
            if bootstrap_host is None:
194
                bootstrap_host = parse_new_state_server_from_error(e)
451 by Aaron Bentley
Handle logging and cleanup in python.
195
            raise
196
        finally:
990.1.5 by Curtis Hovey
Added main test for assess_recovery.py ha.
197
            dump_env_logs(client, bootstrap_host, log_dir)
717.2.2 by Aaron Bentley
Checkpoint with assess_recovery working.
198
            client.destroy_environment()
296 by Curtis Hovey
Added first draft of the backup_restore_juju.py test. It doesn't
199
    except Exception as e:
611.2.1 by Curtis Hovey
Added draft to locate the the ip address of the new server.
200
        print_now("\nEXCEPTION CAUGHT:\n")
742.1.1 by Aaron Bentley
Show traceback in exception handling.
201
        logging.exception(e)
373 by Curtis Hovey
Print any output captured from a failed command.
202
        if getattr(e, 'output', None):
611.2.1 by Curtis Hovey
Added draft to locate the the ip address of the new server.
203
            print_now('\n')
375 by Curtis Hovey
Show what is happening now.
204
            print_now(e.output)
611.2.1 by Curtis Hovey
Added draft to locate the the ip address of the new server.
205
        print_now("\nFAIL")
296 by Curtis Hovey
Added first draft of the backup_restore_juju.py test. It doesn't
206
        sys.exit(1)
207
208
209
if __name__ == '__main__':
990.1.5 by Curtis Hovey
Added main test for assess_recovery.py ha.
210
    main(sys.argv[1:])