~nskaggs/qa-dashboard/contribtrack

« back to all changes in this revision

Viewing changes to eventstat/management/commands/jenkins_pull_eventstat.py

  • Committer: Joe Talbott
  • Date: 2013-04-18 19:07:54 UTC
  • mto: (335.4.8 fix_zip_errors)
  • mto: This revision was merged to the branch mainline in revision 356.
  • Revision ID: joe.talbott@canonical.com-20130418190754-yl63qvq6e5ghnn3u
eventstat - Add initial eventstat app.

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
# QA Dashboard
 
2
# Copyright 2013 Canonical Ltd.
 
3
 
 
4
# This program is free software: you can redistribute it and/or modify it
 
5
# under the terms of the GNU Affero General Public License version 3, as
 
6
# published by the Free Software Foundation.
 
7
 
 
8
# This program is distributed in the hope that it will be useful, but
 
9
# WITHOUT ANY WARRANTY; without even the implied warranties of
 
10
# MERCHANTABILITY, SATISFACTORY QUALITY, or FITNESS FOR A PARTICULAR
 
11
# PURPOSE.  See the GNU Affero General Public License for more details.
 
12
 
 
13
# You should have received a copy of the GNU Affero General Public
 
14
# License along with this program.  If not, see
 
15
# <http://www.gnu.org/licenses/>.
 
16
 
 
17
import csv
 
18
import logging
 
19
import os
 
20
import re
 
21
 
 
22
from django.db import models
 
23
 
 
24
from common.management import (
 
25
    jenkins_get,
 
26
    JenkinsBaseCommand,
 
27
)
 
28
from eventstat.models import (
 
29
    EventstatImage,
 
30
    EventstatLog,
 
31
    EventstatMachine,
 
32
    EventstatMetric,
 
33
    EventstatResult,
 
34
    EventstatUpgrade,
 
35
    TASK_TYPE_CHOICES,
 
36
)
 
37
from performance.management import (
 
38
    get_image_build_number,
 
39
    get_run_number,
 
40
)
 
41
 
 
42
 
 
43
def whoami(fn):
 
44
 
 
45
    def wrapped(*args, **kwargs):
 
46
        logging.warn("fn: %s", fn.__name__)
 
47
 
 
48
        return fn(*args, **kwargs)
 
49
 
 
50
    return wrapped
 
51
 
 
52
 
 
53
class Command(JenkinsBaseCommand):
 
54
 
 
55
    job_regex = re.compile(
 
56
        ur"eventstat-([^-]+)-([^-]+)-([^-]+)-(install|upgrade)-([^-]+)-(.*)",
 
57
    )
 
58
 
 
59
    history_days = 30
 
60
 
 
61
    def _get_eventstat_logs(self, artifacts):
 
62
        """ Check for valid eventstat results.
 
63
 
 
64
        :returns: a list of eventstat logs.
 
65
 
 
66
        """
 
67
 
 
68
        logs = {}
 
69
        eventstat_log_regex = re.compile(
 
70
            ur'^(results\/|)(\d+\/|)eventstat.csv',
 
71
        )
 
72
        log_regex = re.compile(ur'^(results\/|)(\d+\/|)eventstat.(csv,log)')
 
73
        utah_log_regex = re.compile(ur'^(clientlogs\/|)utah.*.yaml')
 
74
 
 
75
        for artifact in artifacts:
 
76
            logging.info("artifact: %s", artifact)
 
77
            path = artifact['relativePath']
 
78
            num = get_run_number(path, prefix="results\/")
 
79
 
 
80
            if utah_log_regex.match(path):
 
81
                logging.debug("found %s", path)
 
82
                logs['utah'] = artifact
 
83
 
 
84
            if num is None:
 
85
                num = 1
 
86
 
 
87
            if num not in logs:
 
88
                logs[num] = {
 
89
                    'data_file': None,
 
90
                    'logs': [],
 
91
                }
 
92
 
 
93
            logging.debug("artifact: %s", path)
 
94
            if eventstat_log_regex.match(path):
 
95
                logging.debug("found %s", path)
 
96
                logs[num]['data_file'] = artifact
 
97
            if log_regex.match(path):
 
98
                logging.debug("found eventstat log %s", path)
 
99
                logs[num]['logs'].append(artifact)
 
100
 
 
101
        return logs
 
102
 
 
103
    def _process_logs(self, build, build_date, install_data, func):
 
104
        """ Process eventstat logs calling func() for each log. """
 
105
 
 
106
        if not hasattr(func, '__call__'):
 
107
            raise Exception("invalid callable: {}".format(func))
 
108
 
 
109
        unzip_path = self.get_artifacts(build)
 
110
        install_data['unzip_path'] = unzip_path
 
111
 
 
112
        logs = self._get_eventstat_logs(build['artifacts'])
 
113
        if 'data_file' in logs:
 
114
            logging.info("logs: %s", logs)
 
115
 
 
116
        image_build_number = get_image_build_number(build)
 
117
 
 
118
        utah_log = logs.pop('utah', None)
 
119
 
 
120
        kernel = self.get_kernel(build['url'], utah_log)
 
121
 
 
122
        unzip_path = '{}artifact'.format(build['url'])
 
123
        online_path = unzip_path
 
124
 
 
125
        if 'unzip_path' in install_data:
 
126
            unzip_path = "{}/archive".format(install_data['unzip_path'])
 
127
 
 
128
        for num in logs:
 
129
            # skip incomplete logs
 
130
            if logs[num]['data_file'] is None:
 
131
                logging.info(
 
132
                    "missing data: %s",
 
133
                    logs[num]['data_file'],
 
134
                )
 
135
                continue
 
136
 
 
137
            log_name = logs[num]['data_file']['relativePath']
 
138
            log_path = "{}/{}".format(
 
139
                unzip_path,
 
140
                log_name,
 
141
            )
 
142
            log_path_url = "{}/{}".format(
 
143
                online_path,
 
144
                log_name,
 
145
            )
 
146
            logging.info("log_path_url: %s", log_path_url)
 
147
 
 
148
            install_type = install_data['method']
 
149
            dashboard_data = dict(
 
150
                release=install_data['release'],
 
151
                variant=install_data['variant'],
 
152
                arch=install_data['arch'],
 
153
                build_date=build_date,
 
154
                kernel=kernel,
 
155
            )
 
156
            if 'jenkins_build' in install_data:
 
157
                dashboard_data['jenkins_build'] = install_data['jenkins_build']
 
158
 
 
159
            if install_type == 'install':
 
160
                dashboard_data['build_number'] = image_build_number
 
161
            else:
 
162
                dashboard_data['date'] = build_date
 
163
 
 
164
                # XXX: if we start having builds that upgrade across
 
165
                # releases we will need to update this
 
166
                dashboard_data['from_release'] = install_data['release']
 
167
 
 
168
            # Assume we need to retrieve the logs if there's no 'unzip_path'
 
169
            if 'unzip_path' not in install_data:
 
170
                logging.warn("Getting logs from jenkins directly...")
 
171
                dashboard_data['output'] = jenkins_get(
 
172
                    log_path_url,
 
173
                    as_json=False,
 
174
                )
 
175
            else:
 
176
                dashboard_data['output'] = self.get_local_file(log_path)
 
177
                dashboard_data['csv'] = log_path
 
178
 
 
179
            dashboard_data['log_path'] = log_path
 
180
            dashboard_data['log_path_url'] = log_path_url
 
181
            dashboard_data['log_name'] = log_name
 
182
 
 
183
            func(dashboard_data)
 
184
 
 
185
    def _parse_task(self, task):
 
186
 
 
187
        if task[0] == '[':
 
188
            return task.split()[0]
 
189
 
 
190
        return os.path.basename(task.split()[0])
 
191
 
 
192
    def _task_type(self, task):
 
193
 
 
194
        if task[0] == '[':
 
195
            return TASK_TYPE_CHOICES[0][1]
 
196
        else:
 
197
            return TASK_TYPE_CHOICES[1][1]
 
198
 
 
199
    def _parse_log(self, filename):
 
200
        """ Parse an eventstat.csv file. """
 
201
 
 
202
        mapping = {
 
203
            'Task': 'task',
 
204
            'Init Function': 'init_function',
 
205
            'Callback': 'callback',
 
206
            'Total': 'total',
 
207
            'Max': 'max',
 
208
            'Min': 'min',
 
209
            'Average': 'avg',
 
210
            'Std.Dev.': 'stddev',
 
211
        }
 
212
 
 
213
        data = []
 
214
        with open(filename, 'rb') as f:
 
215
 
 
216
            reader = csv.reader(f)
 
217
 
 
218
            for row in reader:
 
219
                row_key = row[0].rstrip(':')
 
220
                logging.info("row_key type: %s", type(row_key))
 
221
 
 
222
                try:
 
223
                    interval = int(row_key)
 
224
                except ValueError:
 
225
                    interval = None
 
226
 
 
227
                if row_key not in mapping and interval is None:
 
228
                    logging.warn("unkown row: %s", row_key)
 
229
                    continue
 
230
 
 
231
                field = mapping.get(row_key, row_key)
 
232
 
 
233
                # strip off row key
 
234
                row = row[1:]
 
235
                for i in range(len(row)):
 
236
                    if len(data) <= i:
 
237
                        data.insert(i, dict())
 
238
 
 
239
                    value = row[i]
 
240
 
 
241
                    # Add interval values to a sub-array.
 
242
                    if interval is not None:
 
243
                        if 'intervals' not in data[i]:
 
244
                            data[i]['intervals'] = []
 
245
 
 
246
                        data[i]['intervals'].append(
 
247
                            dict(
 
248
                                interval=interval,
 
249
                                value=value,
 
250
                            ),
 
251
                        )
 
252
                        continue
 
253
 
 
254
                    if field == 'task':
 
255
                        data[i]['command'] = value
 
256
                        value = self._parse_task(value)
 
257
                        logging.info("value: %s", value)
 
258
                        data[i]['task_type'] = self._task_type(value)
 
259
 
 
260
                    data[i][field] = value
 
261
 
 
262
        return data
 
263
 
 
264
    def _valid_log_format(self, filename):
 
265
        """ Determine if an eventstat log is valid.
 
266
        Right now this means it has a 'Max:' row.
 
267
 
 
268
        """
 
269
 
 
270
        valid = False
 
271
        with open(filename, 'rb') as f:
 
272
 
 
273
            reader = csv.reader(f)
 
274
 
 
275
            for row in reader:
 
276
                if row[0].rstrip(':') == 'Max':
 
277
                    valid = True
 
278
                    break
 
279
 
 
280
        return valid
 
281
 
 
282
    def add_result(self, dashboard_data):
 
283
        """ Add eventstat data to the database. """
 
284
 
 
285
        if not self._valid_log_format(dashboard_data['csv']):
 
286
            logging.info(
 
287
                "Invalid log format: %s",
 
288
                dashboard_data['log_path_url'],
 
289
            )
 
290
            return
 
291
 
 
292
        logging.info("Adding eventstat result.")
 
293
        logging.info(
 
294
            "Found good log: %s",
 
295
            dashboard_data['log_path_url'],
 
296
        )
 
297
 
 
298
        items = self._parse_log(dashboard_data['csv'])
 
299
        logging.info("items: %s", items)
 
300
 
 
301
        if 'build_number' in dashboard_data:
 
302
            image, new_image = EventstatImage.objects.get_or_create(
 
303
                release=dashboard_data['release'],
 
304
                variant=dashboard_data['variant'],
 
305
                arch=dashboard_data['arch'],
 
306
                build_number=dashboard_data['build_number'],
 
307
                # fake an md5 since we don't have the data
 
308
                md5="{}{}{}{}".format(
 
309
                    self.arch,
 
310
                    self.release,
 
311
                    self.variant,
 
312
                    dashboard_data['build_number'],
 
313
                )
 
314
            )
 
315
            self.image = image
 
316
            self.upgrade = None
 
317
        else:  # upgrade
 
318
            upg, new_upgr = EventstatUpgrade.objects.get_or_create(
 
319
                release=dashboard_data['release'],
 
320
                variant=dashboard_data['variant'],
 
321
                arch=dashboard_data['arch'],
 
322
                date=dashboard_data['date'],
 
323
                from_release=dashboard_data['from_release'],
 
324
            )
 
325
            self.upgrade = upg
 
326
            self.image = None
 
327
 
 
328
        log, new_log = EventstatLog.objects.get_or_create(
 
329
            path=dashboard_data['log_path_url'],
 
330
            name=dashboard_data['log_name'],
 
331
        )
 
332
 
 
333
        jenkins_url = ""
 
334
        if 'jenkins_build' in dashboard_data:
 
335
            jenkins_url = dashboard_data['jenkins_build'].url
 
336
 
 
337
        metrics = {}
 
338
        task_types = [x[1] for x in TASK_TYPE_CHOICES]
 
339
        task_types.append('total')
 
340
        for task_type in task_types:
 
341
            metric, new_metric = EventstatMetric.objects.get_or_create(
 
342
                image=self.image,
 
343
                upgrade=self.upgrade,
 
344
                machine=self.machine,
 
345
                name=task_type,
 
346
                kernel=dashboard_data['kernel'],
 
347
                defaults=dict(
 
348
                    ran_at=dashboard_data['build_date'],
 
349
                    publish=False,
 
350
                ),
 
351
            )
 
352
 
 
353
            metrics[task_type] = metric
 
354
 
 
355
        for item in items:
 
356
            logging.info("item: %s", item)
 
357
            task_type = item['task_type']
 
358
 
 
359
            for interval in item['intervals']:
 
360
                value = int(float(interval['value']))
 
361
                interval = int(float(interval['interval']))
 
362
 
 
363
                result, new_result = EventstatResult.objects.get_or_create(
 
364
                    image=self.image,
 
365
                    upgrade=self.upgrade,
 
366
                    machine=self.machine,
 
367
                    ran_at=dashboard_data['build_date'],
 
368
                    kernel=dashboard_data['kernel'],
 
369
                    metric=metrics[task_type],
 
370
                    interval=interval,
 
371
                    task=item['task'],
 
372
                    name=item['task'][:200],
 
373
                    task_type=task_type,
 
374
                    init_function=item['init_function'],
 
375
                    callback=item['callback'],
 
376
                    value=value,
 
377
                    log=log,
 
378
                    jenkins_url=jenkins_url,
 
379
                )
 
380
 
 
381
        for task_type in task_types:
 
382
            values = [
 
383
                'image',
 
384
                'upgrade',
 
385
                'machine',
 
386
            ]
 
387
 
 
388
            results = EventstatResult.objects.filter(
 
389
                publish=True,
 
390
                image=self.image,
 
391
                upgrade=self.upgrade,
 
392
                machine=self.machine,
 
393
            )
 
394
 
 
395
            if task_type != "total":
 
396
                results = results.filter(
 
397
                    task_type=task_type,
 
398
                )
 
399
                values.append('task_type')
 
400
 
 
401
            results = results.values(*values).annotate(
 
402
                models.Avg('value'),
 
403
                models.Min('value'),
 
404
                models.Max('value'),
 
405
                models.StdDev('value'),
 
406
            )
 
407
 
 
408
            agg_len = len(results)
 
409
            if agg_len != 1:
 
410
                logging.warn("Strange aggregation result count: %s", agg_len)
 
411
                logging.warn("  machine.id: %s", self.machine.id)
 
412
                if self.image is not None:
 
413
                    logging.warn("  image.id: %s", self.image.id)
 
414
                if self.upgrade is not None:
 
415
                    logging.warn("  upgrade.id: %s", self.upgrade.id)
 
416
                logging.warn("  task_type: %s", task_type)
 
417
                return
 
418
 
 
419
            result = results[0]
 
420
 
 
421
            metrics[task_type].average = result['value__avg']
 
422
            metrics[task_type].minimum = result['value__min']
 
423
            metrics[task_type].maximum = result['value__max']
 
424
            metrics[task_type].stddev = result['value__stddev']
 
425
            metrics[task_type].publish = True
 
426
            metrics[task_type].save()
 
427
 
 
428
    def remove_result(self, dashboard_data):
 
429
        """ Remove eventstat data from the database. """
 
430
 
 
431
        logging.info("Removing eventstat result.")
 
432
 
 
433
    def extract_data(self, name):
 
434
        m = self.job_regex.match(name)
 
435
 
 
436
        if m:
 
437
            self.release = m.group(1)
 
438
            self.variant = m.group(2)
 
439
            self.arch = m.group(3)
 
440
            self.method = m.group(4)
 
441
            self.workload = m.group(5)
 
442
            self.machine_name = m.group(6)
 
443
 
 
444
            logging.info("name: %s", name)
 
445
            logging.info("method: %s", self.method)
 
446
 
 
447
            self.install_data = dict(
 
448
                release=self.release,
 
449
                variant=self.variant,
 
450
                arch=self.arch,
 
451
                method=self.method,
 
452
            )
 
453
 
 
454
    def process_job(self, job):
 
455
        logging.info("job.name: %s", job['name'])
 
456
        self.machine, new_machine = EventstatMachine.objects.get_or_create(
 
457
            name=self.machine_name,
 
458
        )