~ubuntuone-pqm-team/juju-deployer/trunk

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
#!/usr/bin/python
# TODO: Add expose
import tempfile
import subprocess
import time
import os
import yaml
import json
import optparse
import pprint
import signal
import __builtin__

from os.path import dirname, abspath
from pdb import *
from utils import *


def timed_out(signal, frame):
    log.error("Deployment timed out after %s sec.", opts.timeout)
    exit(1)

start_time = time.time()

parser = optparse.OptionParser()
parser.add_option('-c', '--config',
                  help=('File containing deployment(s) json config. This '
                        'option can be repeated, with later files overriding '
                        'values in earlier ones.'),
                  dest='configs', action='append')
parser.add_option('-d', '--debug', help='Enable debugging to stdout',
                  dest="debug",
                  action="store_true", default=False)
parser.add_option('-L', '--local-mods',
                  help='Allow deployment of locally-modified charms',
                  dest="no_local_mods", default=True, action='store_false')
parser.add_option('-u', '--update-charms',
                  help='Update existing charm branches',
                  dest="update_charms", default=False, action="store_true")
parser.add_option('-l', '--ls', help='List available deployments',
                  dest="list_deploys", action="store_true", default=False)
parser.add_option('-D', '--destroy-services',
                  help='Destroy all services (do not terminate machines)',
                  dest="destroy_services", action="store_true",
                  default="False")
parser.add_option('-S', '--scrub-zk', action='store_true', default=False,
                  dest='scrub_zk',
                  help='Remove charm nodes from ZK after service destroy.')
parser.add_option('-T', '--terminate-machines',
                  help=('Terminate all machines but the bootstrap node.  '
                        'Destroy any services that exist on each'),
                  dest="terminate_machines", action="store_true",
                  default="False")
parser.add_option('-t', '--timeout',
                  help='Timeout (sec) for entire deployment (45min default)',
                  dest='timeout', action='store', type='int', default=2700)
parser.add_option("-f", '--find-service', action="store", type="string",
                  help='Find hostname from first unit of a specific service.',
                  dest="find_service")
parser.add_option("-m", '--max-concurrent', action="store", type="int",
                  help=("Maximum number of concurrent deployments to send "
                        " to provider. Default: no limit"),
                  dest="max_concur_deploy", default=0)
parser.add_option('-s', '--deploy-delay', action='store', type='float',
                  help=("Time in seconds to sleep between 'deploy' commands, "
                        "to allow machine provider to process requests. This "
                        "delay is also enforced between calls to"
                        "terminate_machine"),
                  dest="deploy_delay", default=0)
parser.add_option('-e', '--environment', action='store', dest='juju_env',
                  help='Deploy to a specific Juju environment.',
                  default=os.getenv('JUJU_ENV'))
parser.add_option('-o', '--override', action='append', type='string',
                  help=('Override *all* config options of the same name '
                        'across all services.  Input as key=value.'),
                  dest='overrides', default=None)
parser.add_option('-w', '--relation-wait', action='store', dest='rel_wait',
                  default='60',
                  help=('Number of seconds to wait before checking for '
                        'relation errors after all relations have been added '
                        'and subordinates started. (default: 60)'))
(opts, args) = parser.parse_args()

if not opts.configs:
    opts.configs = ['deployments.cfg']
update_charms = opts.update_charms

# temporarily abuse __builtin__ till this is setup properly
__builtin__.juju_log = juju_log = open("juju.log", "w")
__builtin__.juju_cmds = []

init_logging("debug.log", opts.debug)

ORIGCWD = os.getcwd()

if opts.destroy_services is True or opts.terminate_machines is True:
    destroy_all(juju_status(opts.juju_env), opts.juju_env,
                terminate_machines=opts.terminate_machines,
                scrub_zk=opts.scrub_zk,
                delay=opts.deploy_delay)
    exit(0)

if opts.find_service is not None:
    rc = find_service(juju_status(opts.juju_env), opts.find_service)
    exit(rc)

# load the json configuration for possible deployments.
missing_configs = [c for c in opts.configs if not os.path.exists(c)]
if missing_configs:
    log.error("Configuration not found: {}".format(", ".join(missing_configs)))
    exit(1)

debug_msg("Loading deployments from {}".format(", ".join(opts.configs)))
cfg = {}
include_dirs = []
for config in opts.configs:
    include_dirs.append(dirname(abspath(config)))
    with open(config, 'r') as f:
        try:
            cfg = dict_merge(cfg, json.load(f))
        except ValueError as exc:
            log.error("Error parsing config: {}".format(config))
            log.error(exc)
            exit(1)

if opts.list_deploys:
    display_deploys(cfg)
    exit(0)

if not args:
    log.error("You must specify a deployment.")
    display_deploys(cfg)
    exit(1)

deployment = args[0]
SERIES, CHARMS, RELATIONS, overrides = load_deployment(cfg, deployment)
series_store = "%s/%s" % (ORIGCWD, SERIES)

# series store ends up being the local juju charm repository
if not os.path.exists(series_store):
    debug_msg("Creating series charm store: %s" % series_store)
    os.mkdir(series_store)
else:
    debug_msg("Series charm store already exists: %s" % series_store)

# either clone all charms if we dont have them or update branches
for k in CHARMS.keys():
    charm_path = "%s/%s" % (series_store, k)
    debug_msg("Charm '%s' - using charm path '%s'" % (k, charm_path))
    (branch, sep, revno) = CHARMS[k].get("branch", '').partition('@')
    needs_build = update_charms
    if branch:
        debug_msg("Branch: {}, revision: {}".format(branch, revno))
    else:
        debug_msg("No remote branch specified")
        needs_build = False
    if os.path.exists(charm_path):
        if opts.no_local_mods:
            with cd(charm_path):
                # is there a better way to check for changes?
                bzrstatus = subprocess.check_output(['bzr', 'st']).strip()
                if bzrstatus not in (
                    "", "working tree is out of date, run 'bzr update'"):
                    log.error("Charm is locally modified: {}".format(
                        charm_path))
                    log.error("Aborting")
                    exit(1)
        debug_msg("Charm path exists @ %s." % charm_path)
        if update_charms and branch:
            debug_msg("Updating charm branch '%s'" % k)
            code = subprocess.call(
                ["bzr", "pull", "-d", charm_path, '--remember', branch])
            if code != 0:
                log.error("Could not update branch at {} from {}".format(
                    charm_path, branch))
                exit(code)
    elif branch:
        print "- Cloning %s from %s" % (k, branch)
        subprocess.call(["bzr", "branch", branch, charm_path])
        needs_build = True
    if revno:
        cmd = ["bzr", "update", charm_path]
        revno != 'tip' and cmd.extend(['-r', revno])
        code = subprocess.call(cmd)
        if code != 0:
            log.error("Unable to check out branch revision {}".format(revno))
            exit(code)
    if CHARMS[k].get("build") is not None and needs_build:
        cmd = CHARMS[k]["build"]
        debug_msg("Running build command at {}...".format(charm_path))
        with cd(charm_path):
            code = subprocess.call(cmd)
        if code != 0:
            log.error("Failed to build charm {}".format(k))
            exit(code)
    # load charms metadata
    if not os.path.isdir(charm_path):
        print "Branch for {} does not exist ({})".format(k, charm_path)
        exit(1)
    mdf = open("%s/metadata.yaml" % charm_path, "r")
    debug_msg("Loading metadata from %s/metadata.yaml" % charm_path)
    CHARMS[k]["metadata"] = yaml.load(mdf)
    mdf.close()
    # load charms config.yaml if it has one
    if os.path.exists("%s/config.yaml" % charm_path):
        debug_msg("Loading config.yaml from %s/config.yaml" % charm_path)
        conf = open("%s/config.yaml" % charm_path)
        CHARMS[k]["config"] = yaml.load(conf)["options"]
        conf.close()
    if "units" not in CHARMS[k].keys():
        CHARMS[k]["units"] = 1

if opts.overrides:
    for override in opts.overrides:
        spl = override.split('=')
        key = spl[0]
        value = '='.join(spl[1:])
        overrides[key] = value

# apply overrides to relevant charms
for k, v in overrides.iteritems():
    for svc in CHARMS:
        if k in CHARMS[svc]['config']:
            if 'options' not in CHARMS[svc]:
                CHARMS[svc]['options'] = {}

            CHARMS[svc]['options'][k] = v

# create a temporary deploy-time config yaml
temp = tempfile.NamedTemporaryFile()
deploy_config = temp.name
CONFIG = generate_deployment_config(temp, CHARMS, include_dirs)
log.debug("Using the following config:\n%s", pprint.pformat(CONFIG))

# make sure we're bootstrapped
status = juju_status(opts.juju_env)
if status == 1:
    log.error("Is juju bootstrapped?")
    exit(1)
if (status["machines"][0]["instance-state"] != "provisioned" and
    status["machines"][0]["agent-state"] != "running"):
    log.error("Bootstrap node not running?")
    exit(1)

debug_msg("Deploying with timeout %s sec." % opts.timeout)
signal.signal(signal.SIGALRM, timed_out)
signal.alarm(opts.timeout)

# figure out what needs to be done
to_deploy = []
for c in CHARMS.keys():
    if c not in status["services"].keys():
        to_deploy.append(c)
    else:
        print "* Services '%s' already deployed. Skipping" % c

if (len(to_deploy) < opts.max_concur_deploy or opts.max_concur_deploy == 0):
    groups_of = len(to_deploy)
else:
    groups_of = opts.max_concur_deploy

start_groups = []
if to_deploy:
    # go through to_deploy in chunks of group_by
    start_groups = [to_deploy[i:i + groups_of]
                    for i in range(0, len(to_deploy), groups_of)]
    for group_num in range(0, len(start_groups)):
        for c in start_groups[group_num]:
            print ("- Deploying %s in group %d/%d" %
                   (c, group_num + 1, len(start_groups)))
            cmd = "deploy"
            if "units" in CHARMS[c]:
                cmd += " -n %s" % CHARMS[c]["units"]
            if "constraints" in CHARMS[c]:
                cmd += " --constraints=%s" % CHARMS[c]["constraints"]
            if c in CONFIG.keys():
                cmd += " --config=%s" % deploy_config
            cmd += " --repository=%s local:%s %s" % (
                ORIGCWD, CHARMS[c]["metadata"]["name"], c)
            if opts.juju_env:
                cmd += " -e %s" % opts.juju_env
            juju_call(cmd)
            if opts.deploy_delay > 0:
                debug_msg("Delaying %s sec. between deployment" %
                          opts.deploy_delay)
                time.sleep(opts.deploy_delay)
        wait_for_started(opts.debug, opts.juju_env, sleep=3.0,
                         msg="- Waiting for started: %s" %
                         start_groups[group_num])

if len(start_groups) != 0:
    status = juju_status(opts.juju_env)

# add additional units to any services that want/need them
# TODO: need max_concur_deploy support here also
for c in CHARMS.keys():
    if CHARMS[c]["units"] > 1:
        if len(status["services"][c]["units"]) < CHARMS[c]["units"]:
            needed_units = (int(CHARMS[c]["units"]) -
                            len(status["services"][c]["units"].keys()))
            if needed_units > 0:
                print "- Adding %d more units to %s" % (needed_units, c)
                cmd = "add-unit --num-units %d %s" % (needed_units, c)
                juju_call(cmd)
        else:
            debug_msg("Service '%s' does not need any more units added." % c)

# poll juju status until all services report strated. fail on any error
wait_for_started(opts.debug, opts.juju_env,
                 "- Waiting for all service units to reach 'started' state.")

# add all relations, ordered by weight
if RELATIONS:
    print "- Adding relations:"
    for w in sorted(RELATIONS, reverse=True):
        for r in RELATIONS[w]:
            print "  -> Relation: %s <-> %s" % (r[0], r[1])
            cmd = "add-relation %s %s" % (r[0], r[1])
            if opts.juju_env:
                cmd += " -e %s" % opts.juju_env
            juju_call(cmd, ignore_failure=True)
            # to be safe
            time.sleep(5)

# Subordinates units spawn after relations have been added.
# Ensure they're started, if they exist.
wait_for_subordinates_started(opts.debug, opts.juju_env)

if RELATIONS:
    print "- Sleeping for %s before ensuring relation state." % opts.rel_wait
    # give all relations a minute to settle down, and make sure no errors are
    # reported.
    time.sleep(float(opts.rel_wait))

if not ensure_relations_up(juju_status(opts.juju_env)):
    exit(1)

print ("- Deployment complete in %d seconds.\n\n" %
       (int(time.time() - start_time)))

print "- Juju command log:"
for c in __builtin__.juju_cmds:
    print c