~abentley/juju-ci-tools/client-from-config-4

« back to all changes in this revision

Viewing changes to add-missing-result-yaml-files.py

  • Committer: Aaron Bentley
  • Date: 2014-02-24 17:18:29 UTC
  • mto: This revision was merged to the branch mainline in revision 252.
  • Revision ID: aaron.bentley@canonical.com-20140224171829-sz644yhoygu7m9dm
Use tags to identify and shut down instances.

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
#!/usr/bin/env python
2
 
 
3
 
"""Add missing result.yaml in S3; ensue that existing files contain
4
 
the final result.
5
 
"""
6
 
 
7
 
from __future__ import print_function
8
 
from argparse import ArgumentParser
9
 
from datetime import datetime
10
 
import json
11
 
import os
12
 
import re
13
 
from tempfile import NamedTemporaryFile
14
 
import yaml
15
 
 
16
 
from utility import (
17
 
    s3_cmd,
18
 
    temp_dir,
19
 
)
20
 
 
21
 
ARCHIVE_URL = 's3://juju-qa-data/juju-ci/products/'
22
 
ISO_8601_FORMAT = '%Y-%m-%dT%H:%M:%S.%fZ'
23
 
LONG_AGO = datetime(2000, 1, 1)
24
 
 
25
 
 
26
 
def get_ci_director_state():
27
 
    state_file_path = os.path.join(
28
 
        os.environ['HOME'], '.config/ci-director-state')
29
 
    with open(state_file_path) as state_file:
30
 
        return yaml.load(state_file)['versions']
31
 
 
32
 
 
33
 
def list_s3_files():
34
 
    text = s3_cmd(['ls', '-r', ARCHIVE_URL])
35
 
    for line in text.strip().split('\n'):
36
 
        file_date, file_time, size, url = re.split(r'\s+', line)
37
 
        file_date = [int(part) for part in file_date.split('-')]
38
 
        file_time = [int(part) for part in file_time.split(':')]
39
 
        file_time = datetime(*(file_date + file_time))
40
 
        revision_number, filename = re.search(
41
 
            r'^{}version-(\d+)/(.*)$'.format(ARCHIVE_URL), url).groups()
42
 
        yield int(revision_number), filename, file_time
43
 
 
44
 
 
45
 
def get_s3_revision_info():
46
 
    all_revisions = {}
47
 
    for revision_number, file_name, file_time in list_s3_files():
48
 
        revision = all_revisions.setdefault(revision_number, {
49
 
            'result': {},
50
 
            'artifact_time': LONG_AGO,
51
 
            })
52
 
        if file_name in ('result.yaml', 'result.json'):
53
 
            # Many result.json files were added on 2014-08-14 for older
54
 
            # builds, so we may have both a result.yaml file and a
55
 
            # result.json file.
56
 
            revision['result'][file_time] = file_name
57
 
        else:
58
 
            revision['artifact_time'] = max(
59
 
                revision['artifact_time'], file_time)
60
 
    # The most recent version may currently be building, hence a check
61
 
    # if the result file exists is useless.
62
 
    del all_revisions[max(all_revisions)]
63
 
    result_file_time = revision['artifact_time']
64
 
    for revision_number, revision_data in sorted(all_revisions.items()):
65
 
        if not revision_data['result']:
66
 
            result_file_name = None
67
 
        else:
68
 
            result_file_time = min(revision_data['result'])
69
 
            # If both a result.yaml and a result.json file exist, use
70
 
            # the newer one.
71
 
            newer = max(revision_data['result'])
72
 
            result_file_name = revision_data['result'][newer]
73
 
        yield revision_number, result_file_name, result_file_time
74
 
 
75
 
 
76
 
def main(args):
77
 
    ci_director_state = get_ci_director_state()
78
 
    for revision_number, result_file, artifact_time in get_s3_revision_info():
79
 
        state_file_result = ci_director_state.get(revision_number)
80
 
        if state_file_result is None:
81
 
            print(
82
 
                "Warning: No state file data available for revision",
83
 
                revision_number)
84
 
            continue
85
 
        if result_file is not None:
86
 
            with temp_dir() as workspace:
87
 
                copy_from = '{}version-{}/{}'.format(
88
 
                    ARCHIVE_URL, revision_number, result_file)
89
 
                copy_to = os.path.join(workspace, result_file)
90
 
                s3_cmd(['--no-progress', 'get', copy_from, copy_to])
91
 
                with open(copy_to) as f:
92
 
                    s3_result = yaml.load(f)
93
 
                # For paranoids: Check that the data from S3 is a subset
94
 
                # of the data from the state file
95
 
                s3_keys = set(s3_result)
96
 
                state_keys = set(ci_director_state[revision_number])
97
 
                if not s3_keys.issubset(state_keys):
98
 
                    print(
99
 
                        "Warning: S3 result file for {} contains keys that do "
100
 
                        "not exist in the main state file: {}".format(
101
 
                            revision_number, s3_keys.difference(state_keys)))
102
 
                    continue
103
 
                comparable_state_data = dict(
104
 
                    (k, v)
105
 
                    for k, v in ci_director_state[revision_number].items()
106
 
                    if k in s3_keys)
107
 
                if comparable_state_data != s3_result:
108
 
                    # This can happen when the result file was written
109
 
                    # when a -devel job is still running.
110
 
                    print(
111
 
                        "Warning: Diverging data for revision {} in S3 ({}) "
112
 
                        "and in state file ({}).".format(
113
 
                            revision_number, s3_result,
114
 
                            ci_director_state[revision_number]))
115
 
                if 'result' in s3_result:
116
 
                    continue
117
 
 
118
 
        if 'finished' not in state_file_result:
119
 
            state_file_result['finished'] = artifact_time.strftime(
120
 
                ISO_8601_FORMAT)
121
 
        with NamedTemporaryFile() as new_result_file:
122
 
            json.dump(state_file_result, new_result_file)
123
 
            new_result_file.flush()
124
 
            dest_url = '{}version-{}/result.json'.format(
125
 
                ARCHIVE_URL, revision_number)
126
 
            params = ['put', new_result_file.name, dest_url]
127
 
            if args.dry_run:
128
 
                print(*(['s3cmd'] + params))
129
 
            else:
130
 
                s3_cmd(params)
131
 
 
132
 
if __name__ == '__main__':
133
 
    parser = ArgumentParser()
134
 
    parser.add_argument('--dry-run', action='store_true')
135
 
    args = parser.parse_args()
136
 
    main(args)