~jcsackett/charmworld/bac-tag-constraints

« back to all changes in this revision

Viewing changes to charmworld/jobs/ingest.py

  • Committer: Aaron Bentley
  • Date: 2013-02-12 18:57:02 UTC
  • mfrom: (149 charmworld)
  • mto: This revision was merged to the branch mainline in revision 150.
  • Revision ID: aaron@canonical.com-20130212185702-9gnf40ao17a7uw7v
Merged trunk into mongo-urls.

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
from datetime import datetime
 
2
import json
 
3
import logging
 
4
import os
 
5
import shutil
 
6
import subprocess
 
7
import urllib2
 
8
 
 
9
from bzrlib.branch import Branch
 
10
from bzrlib.revisionspec import RevisionSpec
 
11
from bzrlib.transport import get_transport
 
12
import yaml
 
13
import xappy
 
14
 
 
15
from charmworld.models import getconnection
 
16
from charmworld.models import getdb
 
17
from charmworld.models import getfs
 
18
from charmworld.models import CharmFileSet
 
19
from charmworld.utils import quote_key
 
20
from charmworld.utils import quote_yaml
 
21
 
 
22
from config import settings
 
23
from config import STORE_URL
 
24
 
 
25
JENKINS_PROVIDERS = ['ec2', 'openstack', 'local']
 
26
JENKINS_ARTIFACT_URL = (
 
27
    "https://jenkins.qa.ubuntu.com/job/"
 
28
    "%(series)s-%(provider)s-charm-%(charm)s/%(build)d/artifact/%(artifact)s")
 
29
JENKINS_QA_URL = (
 
30
    "https://jenkins.qa.ubuntu.com/job/"
 
31
    "%(series)s-%(provider)s-charm-%(charm)s/lastBuild/api/json")
 
32
 
 
33
 
 
34
def add_files(charm_data):
 
35
    charm_data['files'] = dict([
 
36
        (quote_key(cfile.filename), dict(cfile)) for cfile in
 
37
        store_branch_files(charm_data)
 
38
    ])
 
39
    return charm_data
 
40
 
 
41
 
 
42
def fetch_branch(root_dir, charm_data, retry=True):
 
43
    """Fetch a branch from bzr, and augment charm data."""
 
44
    log = logging.getLogger("charm.bzr")
 
45
    branch_dir = os.path.abspath(
 
46
        str(os.path.join(root_dir,
 
47
                         charm_data["series"],
 
48
                         charm_data["owner"],
 
49
                         charm_data["name"],
 
50
                         charm_data["bname"])))
 
51
 
 
52
    if not os.path.exists(os.path.dirname(branch_dir)):
 
53
        os.makedirs(os.path.dirname(branch_dir))
 
54
 
 
55
    # Store the branch directory
 
56
    charm_data["branch_dir"] = branch_dir
 
57
 
 
58
    if not os.path.exists(branch_dir):
 
59
        # The branch has never been seen before. Original branch.
 
60
        log.info("Branching charm lp:%s", charm_data["branch_spec"])
 
61
        subprocess.check_output(
 
62
            ["/usr/bin/bzr", "co", "-q",
 
63
             "lp:%s" % charm_data["branch_spec"], branch_dir])
 
64
        charm_data = add_files(charm_data)
 
65
        return
 
66
    else:
 
67
        # It exists and check if it's the latest revision already.
 
68
        log.debug("Existing charm from lp:%s", charm_data["branch_spec"])
 
69
        transport = get_transport(branch_dir)
 
70
        branch = Branch.open_from_transport(transport)
 
71
        cur_rev_id = branch.last_revision()
 
72
        if cur_rev_id == charm_data['commit']:
 
73
            charm_data = add_files(charm_data)
 
74
            log.debug("Already up to date lp:%s", charm_data["branch_spec"])
 
75
            return
 
76
 
 
77
    log.debug("Updating branch lp:%s", charm_data["branch_spec"])
 
78
 
 
79
    try:
 
80
        # It exists, but it's not up to date, so update it.
 
81
        subprocess.check_output(
 
82
            ["/usr/bin/bzr", "update", "-q"],
 
83
            cwd=branch_dir,
 
84
            stderr=subprocess.STDOUT)
 
85
        charm_data = add_files(charm_data)
 
86
    except subprocess.CalledProcessError:
 
87
        # It existed but the update failed for some reason. Just strip the
 
88
        # whole tree and start over with the above.
 
89
        if retry:
 
90
            shutil.rmtree(branch_dir)
 
91
            return fetch_branch(root_dir, charm_data, retry=False)
 
92
        raise
 
93
 
 
94
 
 
95
def store_branch_files(charm_data, db=None):
 
96
    """Process the bzr branch for files that need to be stored in gridfs."""
 
97
    log = logging.getLogger("charm.bzr")
 
98
    log.info('Storing files of branch into gridfs')
 
99
    if db is None:
 
100
        connection = getconnection(settings)
 
101
        db = getdb(connection, settings.get('mongo.database'))
 
102
    fs = getfs(db)
 
103
    filestore = CharmFileSet.save_files(
 
104
        fs, charm_data, charm_data['branch_dir'])
 
105
    return filestore
 
106
    log.info('Completed gridfs storage.')
 
107
 
 
108
 
 
109
def _rev_info(r, branch):
 
110
    d = {
 
111
        "revno": branch.revision_id_to_revno(r.revision_id),
 
112
        "committer": r.committer,
 
113
        "created": r.timestamp,
 
114
        "message": r.message
 
115
    }
 
116
    return d
 
117
 
 
118
 
 
119
def fetch_changes(charm_data):
 
120
    branch_dir = charm_data["branch_dir"]
 
121
    transport = get_transport(branch_dir)
 
122
    branch = Branch.open_from_transport(transport)
 
123
 
 
124
    # We only want the last 10 changes, in descending order.
 
125
    _, cur_rev_id = branch.last_revision_info()
 
126
    spec = RevisionSpec.from_string("revno:-10")
 
127
    last_rev_id = spec.as_revision_id(branch)
 
128
 
 
129
    branch.lock_read()
 
130
    try:
 
131
        graph = branch.repository.get_graph()
 
132
        revs = list(
 
133
            graph.iter_lefthand_ancestry(cur_rev_id, (last_rev_id,)))
 
134
        if not cur_rev_id in revs:
 
135
            revs.insert(0, cur_rev_id)
 
136
    finally:
 
137
        branch.unlock()
 
138
    revisions = map(branch.repository.get_revision, revs)
 
139
 
 
140
    charm_data["changes"] = changes = []
 
141
 
 
142
    for r in revisions:
 
143
        changes.append(_rev_info(r, branch))
 
144
    charm_data["last_change"] = changes[0]
 
145
    charm_data["first_change"] = _rev_info(
 
146
        branch.repository.get_revision(
 
147
            RevisionSpec.from_string("revno:1").as_revision_id(branch)),
 
148
        branch)
 
149
 
 
150
 
 
151
def index_charm(indexer, charm):
 
152
    log = logging.getLogger("charm.index")
 
153
    doc = xappy.UnprocessedDocument()
 
154
 
 
155
    # Weight critical fields higher for official charms.
 
156
    if charm['owner'] == 'charmers':
 
157
        weight = 10
 
158
    else:
 
159
        weight = 1
 
160
    doc.fields.append(xappy.Field(
 
161
        "name", charm["name"], weight=weight))
 
162
    doc.fields.append(xappy.Field(
 
163
        "summary", charm["summary"], weight=weight))
 
164
    doc.fields.append(xappy.Field(
 
165
        "description", charm["description"], weight=weight))
 
166
 
 
167
    doc.fields.append(xappy.Field("owner", charm["owner"]))
 
168
    doc.fields.append(xappy.Field("short_url", charm["short_url"]))
 
169
    doc.fields.append(xappy.Field("label", charm["label"]))
 
170
    doc.fields.append(xappy.Field("series", charm["series"]))
 
171
 
 
172
    if charm.get('subordinate'):
 
173
        doc.fields.append(xappy.Field('subordinate', 'true'))
 
174
 
 
175
    if "config" in charm \
 
176
       and charm['config'] \
 
177
       and 'options' in charm['config'] \
 
178
       and charm['config']['options']:
 
179
        config_text = []
 
180
 
 
181
        for key, option in charm["config"]["options"].items():
 
182
            config_text.append(key)
 
183
            config_text.append(option.get("description", ""))
 
184
        doc.fields.append(xappy.Field("config", " ".join(config_text)))
 
185
 
 
186
    relation_text = []
 
187
 
 
188
    if "requires" in charm and charm["requires"]:
 
189
        for key, option in charm["requires"].items():
 
190
            relation_text.append(key)
 
191
            relation_text.append(option["interface"])
 
192
            doc.fields.append(xappy.Field('requires', option["interface"]))
 
193
    if "provides" in charm and charm["provides"]:
 
194
        for key, option in charm["provides"].items():
 
195
            if not isinstance(option, dict):
 
196
                log.warning("invalid charm provides %s", charm['branch_spec'])
 
197
                continue
 
198
            relation_text.append(key)
 
199
            relation_text.append(option["interface"])
 
200
            doc.fields.append(xappy.Field("provides", option["interface"]))
 
201
 
 
202
    if relation_text:
 
203
        doc.fields.append(xappy.Field("relations", " ".join(relation_text)))
 
204
 
 
205
    change_text = []
 
206
    for change in charm.get("changes", ()):
 
207
        change_text.append(change['message'])
 
208
        change_text.append(change['committer'])
 
209
 
 
210
    if change_text:
 
211
        doc.fields.append(xappy.Field("changes", " ".join(change_text)))
 
212
 
 
213
    if 'store_url' in charm:
 
214
        doc.fields.append(xappy.Field('store_url', charm['store_url']))
 
215
    else:
 
216
        log.warning("No store url found for %s", charm["_id"])
 
217
    doc.id = charm["_id"]
 
218
    indexer.replace(doc)
 
219
    indexer.flush()
 
220
 
 
221
 
 
222
def check_jenkins(db, fs, charm):
 
223
    log = logging.getLogger("charm.jenkins")
 
224
    if not charm['branch_spec'].startswith('~charmers'):
 
225
        return
 
226
 
 
227
    charm['tests'] = {}
 
228
    charm['test_results'] = {}
 
229
 
 
230
    for p in JENKINS_PROVIDERS:
 
231
        try:
 
232
            result_id, status = store_provider_results(db, fs, p, charm)
 
233
            if result_id is None:
 
234
                continue
 
235
            charm['tests'][p] = status
 
236
            charm['test_results'][p] = result_id
 
237
        except:
 
238
            log.exception("Unknown error while processing %s %s",
 
239
                          charm['branch_spec'], p)
 
240
 
 
241
 
 
242
def _fetch_artifacts(provider, charm, result, fs):
 
243
    artifacts = []
 
244
    for artifact in result['artifacts']:
 
245
        a_path = "%s/%s/%s/%s" % (
 
246
            charm['branch_spec'],
 
247
            provider,
 
248
            result['number'],
 
249
            artifact['displayPath'])
 
250
        a_url = JENKINS_ARTIFACT_URL % (dict(
 
251
            series=charm['series'],
 
252
            provider=provider,
 
253
            charm=charm['name'],
 
254
            build=result['number'],
 
255
            artifact=artifact['relativePath']))
 
256
 
 
257
        ## File sniffing
 
258
 
 
259
        # Load up the charm revision as a result property.
 
260
        if artifact['displayPath'] == 'charm-revision':
 
261
            charm_revision = urllib2.urlopen(a_url).read().strip()
 
262
            if not charm_revision:
 
263
                continue
 
264
            result['revno'] = int(charm_revision)
 
265
            continue
 
266
 
 
267
        # Mark the test result as graph runner enabled.
 
268
        if "graph-tests" in artifact['displayPath']:
 
269
            result['charmrunner'] = True
 
270
 
 
271
        # Skip the actual charm content
 
272
        if "charm-%s.zip" % charm['name'] == artifact['displayPath']:
 
273
            continue
 
274
 
 
275
        # XXX Short circuit before actual fetching.
 
276
        continue
 
277
 
 
278
        a_file = urllib2.urlopen(a_url)
 
279
        file_id = fs.put(a_file, path=a_path)
 
280
        artifact['file_id'] = file_id
 
281
        artifacts.append(artifact)
 
282
 
 
283
    return artifacts
 
284
 
 
285
 
 
286
def store_provider_results(db, fs, provider, charm):
 
287
    log = logging.getLogger("charm.jenkins")
 
288
    charm_result_url = JENKINS_QA_URL % (
 
289
        dict(series=charm['series'],
 
290
             provider=provider,
 
291
             charm=charm['name']))
 
292
 
 
293
    log.debug("Loading %s from %s", charm['name'], charm_result_url)
 
294
 
 
295
    try:
 
296
        contents = urllib2.urlopen(charm_result_url).read()
 
297
    except urllib2.URLError:
 
298
        log.debug("No test result for %s @ %s", charm['branch_spec'], provider)
 
299
        return None, None
 
300
 
 
301
    result = json.loads(contents)
 
302
 
 
303
    # If we already have results no pointing in refetching.
 
304
    result_id = "%s::%s-%s" % (
 
305
        charm['branch_spec'], provider, result['number'])
 
306
    db_result = db.jenkins.find_one({'_id': result_id})
 
307
    if db_result is not None:
 
308
        return result_id, db_result['result']
 
309
 
 
310
    # Fetch test artifacts.
 
311
    artifacts = _fetch_artifacts(provider, charm, result, fs)
 
312
 
 
313
    # Inject test metadata.
 
314
    result['branch_spec'] = charm['branch_spec']
 
315
    result['provider'] = provider
 
316
    result['artifacts'] = artifacts
 
317
    result['_id'] = result_id
 
318
    db.jenkins.insert(result)
 
319
    return (result_id, result['result'])
 
320
 
 
321
 
 
322
def proof_charm(charm, prooflib):
 
323
    proof = {}
 
324
    lint, exit_code = prooflib.run(charm['branch_dir'])
 
325
    for line in lint:
 
326
        if not ':' in line:
 
327
            continue
 
328
        level, msg = line.split(':', 1)
 
329
        if level == "W" and 'name' in msg:
 
330
            continue
 
331
        proof.setdefault(level.lower(), []).append(msg)
 
332
    charm['proof'] = proof
 
333
 
 
334
 
 
335
def process_charm(charm):
 
336
    # Enrich charm metadata for webapp.
 
337
 
 
338
    # Charm url
 
339
    if charm["owner"] == "charmers":
 
340
        charm["short_url"] = "/charms/%s/%s" % (
 
341
            charm["series"], charm["name"])
 
342
    else:
 
343
        charm["short_url"] = "/~%s/%s/%s" % (charm["owner"],
 
344
                                             charm["series"],
 
345
                                             charm["name"])
 
346
 
 
347
    # Charm label
 
348
    if charm["owner"] == "charmers":
 
349
        charm["label"] = "%s/%s" % (charm["series"], charm["name"])
 
350
    else:
 
351
        charm["label"] = "~%s:%s/%s" % (charm["owner"],
 
352
                                        charm["series"],
 
353
                                        charm["name"])
 
354
 
 
355
    # Flatten the interfaces provided
 
356
    i_provides = []
 
357
    provides = charm.get("provides")
 
358
    if provides:
 
359
        for v in provides.values():
 
360
            if not isinstance(v, dict):
 
361
                continue
 
362
            i = v.get("interface")
 
363
            if not i:
 
364
                continue
 
365
            i_provides.append(i)
 
366
    charm["i_provides"] = i_provides
 
367
 
 
368
    # Flatten the interfaces required
 
369
    i_requires = []
 
370
    requires = charm.get("requires")
 
371
    if requires:
 
372
        for v in requires.values():
 
373
            i = v.get("interface")
 
374
            if not i:
 
375
                continue
 
376
            i_requires.append(i)
 
377
    charm["i_requires"] = i_requires
 
378
    return charm
 
379
 
 
380
 
 
381
def scan_repo(db, root_dir):
 
382
    log = logging.getLogger("charm.scan")
 
383
    charms = os.listdir(root_dir)
 
384
    for c in charms:
 
385
        charm_dir = os.path.join(root_dir, c)
 
386
        if not os.path.isdir(charm_dir):
 
387
            continue
 
388
        #log.info("Processing %s", c)
 
389
        try:
 
390
            scan_charm(db, c, charm_dir, repo="~charmers/charm/oneiric/%s" % c)
 
391
        except:
 
392
            log.exception("Unknown scan error")
 
393
            raise
 
394
            import pdb
 
395
            import sys
 
396
            import traceback
 
397
            traceback.print_exc()
 
398
            pdb.post_mortem(sys.exc_info()[-1])
 
399
            raise
 
400
 
 
401
 
 
402
def scan_charm(db, charm_data):
 
403
    log = logging.getLogger("charm.scan")
 
404
    fs = getfs(db)
 
405
    files = charm_data['files']
 
406
    # Some files have bad characters in them since they are used as mongo
 
407
    # keys. Use their escaped forms instead.
 
408
    metadata_file = quote_key('metadata.yaml')
 
409
    config_file = quote_key('config.yaml')
 
410
 
 
411
    if metadata_file not in files:
 
412
        log.info("Charm has no metadata: %s", charm_data["branch_spec"])
 
413
        return
 
414
    else:
 
415
        cfile = CharmFileSet.get_by_id(fs, files[metadata_file]['fileid'])
 
416
        try:
 
417
            metadata = quote_yaml(yaml.load(cfile.read()))
 
418
        except Exception, exc:
 
419
            log.info(
 
420
                'Invalid charm metadata %s: %s' % (
 
421
                    charm_data['branch_spec'],
 
422
                    exc)
 
423
            )
 
424
 
 
425
    if config_file in files:
 
426
        cfile = CharmFileSet.get_by_id(fs, files[config_file]['fileid'])
 
427
        config_raw = cfile.read()
 
428
 
 
429
        try:
 
430
            config = quote_yaml(yaml.load(config_raw))
 
431
        except Exception, exc:
 
432
            log.info(
 
433
                'Invalid charm config yaml. %s: %s' % (
 
434
                    charm_data['branch_spec'],
 
435
                    exc)
 
436
            )
 
437
 
 
438
        metadata["config"] = config
 
439
        metadata["config_raw"] = config_raw
 
440
 
 
441
    if 'revision' in files:
 
442
        cfile = CharmFileSet.get_by_id(fs, files['revision']['fileid'])
 
443
        rev_raw = cfile.read()
 
444
        rev_id = int(rev_raw.strip())
 
445
        metadata["revision"] = rev_id
 
446
    elif not "revision" in metadata:
 
447
        log.info("Invalid revision %s", charm_data["branch_spec"])
 
448
        metadata["revision"] = 0
 
449
 
 
450
    hooks = []
 
451
    for filedata in files.values():
 
452
        if filedata['subdir'] == 'hooks':
 
453
            hooks.append(filedata['filename'])
 
454
    hooks.sort()
 
455
    metadata["hooks"] = hooks
 
456
 
 
457
    # Stuff into the db
 
458
    metadata.update(charm_data)
 
459
    metadata["_id"] = metadata["branch_spec"]
 
460
    item = db.charms.find_one({"_id": metadata["_id"]})
 
461
    if item is None:
 
462
        item = metadata
 
463
    else:
 
464
        #log.debug("Updating %s", metadata["branch_spec"])
 
465
        item.update(metadata)
 
466
    item = process_charm(item)
 
467
    db.charms.update({"_id": item["_id"]}, item, upsert=True)
 
468
 
 
469
 
 
470
def check_store(charm):
 
471
    log = logging.getLogger("charm.store")
 
472
    if charm['owner'] == 'charmers':
 
473
        address = "cs:%s/%s" % (charm["series"], charm["name"])
 
474
    else:
 
475
        address = "cs:~%s/%s/%s" % (
 
476
            charm["owner"], charm["series"], charm["name"])
 
477
 
 
478
    data = _store_get(address)
 
479
 
 
480
    if 'errors' in data or 'warnings' in data:
 
481
        if charm['owner'] == 'charmers':
 
482
            log.info("rechecking %s with ~charmers", address)
 
483
            retry_address = "cs:~%s/%s/%s" % (
 
484
                charm["owner"], charm["series"], charm["name"])
 
485
            retry_data = _store_get(retry_address)
 
486
            if 1:
 
487
# Update the url to the user qualified name.
 
488
#            if not 'errors' in retry_data \
 
489
#               and not 'warnings' in retry_data:
 
490
                data = retry_data
 
491
                address = retry_address
 
492
 
 
493
    if 'errors' in data or 'warnings' in data:
 
494
        log.warning("store error on %s %s" % (address, data))
 
495
 
 
496
    data["store_checked"] = datetime.now().ctime()
 
497
 
 
498
    charm['store_data'] = data
 
499
    charm['store_url'] = address + "-%d" % data['revision']
 
500
 
 
501
 
 
502
def _store_get(address):
 
503
    url = STORE_URL + "/charm-info?charms=%s&stats=0" % address
 
504
    contents = urllib2.urlopen(url).read()
 
505
    data = json.loads(contents)
 
506
    data = data[address]
 
507
    data['address'] = address
 
508
    return data