~mvo/daisy/problem-type-column-family

« back to all changes in this revision

Viewing changes to tools/back_populate_problem_type.py

  • Committer: Michael Vogt
  • Date: 2017-02-21 16:53:02 UTC
  • Revision ID: michael.vogt@ubuntu.com-20170221165302-zxn29ne2mvc7ti3f
add ColumnFamily for ProblemType

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
#!/usr/bin/python
 
2
 
 
3
import sys
 
4
import pycassa
 
5
from pycassa.cassandra.ttypes import NotFoundException
 
6
from collections import defaultdict
 
7
from daisy import config
 
8
 
 
9
creds = {'username': config.cassandra_username,
 
10
         'password': config.cassandra_password}
 
11
pool = pycassa.ConnectionPool(config.cassandra_keyspace,
 
12
                              config.cassandra_hosts, timeout=600,
 
13
                              credentials=creds)
 
14
 
 
15
dayoops_cf = pycassa.ColumnFamily(pool, 'DayOOPS')
 
16
oops_cf = pycassa.ColumnFamily(pool, 'OOPS')
 
17
problem_type_cf = pycassa.ColumnFamily(pool, 'ProblemType')
 
18
 
 
19
# Main
 
20
 
 
21
if __name__ == '__main__':
 
22
    if len(sys.argv) != 2:
 
23
        print >>sys.stderr, "Usage: [date]"
 
24
        sys.exit(1)
 
25
    oopses = set()
 
26
    start = ''
 
27
    date = sys.argv[1]
 
28
    while True:
 
29
        try:
 
30
            buf = dayoops_cf.get(date, column_start=start, column_count=1000)
 
31
        except NotFoundException:
 
32
            break
 
33
        start = buf.keys()[-1]
 
34
        buf = buf.values()
 
35
        oopses.update(buf)
 
36
        if len(buf) < 1000:
 
37
            break
 
38
    for oops_id in oopses:
 
39
        try:
 
40
            data = oops_cf.get(str(oops_id), columns=['ProblemType'])
 
41
            problem_type = data['ProblemType']
 
42
            try:
 
43
                problem_type_cf.get(oops_id)
 
44
            except NotFoundException:
 
45
                problem_type_cf.insert(oops_id, {"ProblemType":problem_type})
 
46
        except (NotFoundException, KeyError):
 
47
            # Sometimes we didn't insert the full OOPS. I have no idea why.
 
48
            #print 'could not find', uuid
 
49
            continue