~jcsackett/charmworld/bac-tag-constraints

« back to all changes in this revision

Viewing changes to charmworld/jobs/index.py

  • Committer: Aaron Bentley
  • Date: 2013-02-12 18:57:02 UTC
  • mfrom: (149 charmworld)
  • mto: This revision was merged to the branch mainline in revision 150.
  • Revision ID: aaron@canonical.com-20130212185702-9gnf40ao17a7uw7v
Merged trunk into mongo-urls.

Show diffs side-by-side

added added

removed removed

Lines of Context:
10
10
from config import CHARM_INDEX_DIR
11
11
from config import INDEX_IN_QUEUE
12
12
from config import MONGO_URL
 
13
from worker import index_queue
 
14
from worker import reindex
 
15
from worker import setup_indexer
13
16
from utils import get_queues
14
17
 
15
 
log = logging.getLogger("charm.index")
16
 
 
17
 
 
18
 
def setup_indexer(indexer):
19
 
    indexer.add_field_action('owner', xappy.FieldActions.INDEX_EXACT)
20
 
    indexer.add_field_action('series', xappy.FieldActions.INDEX_EXACT)
21
 
    indexer.add_field_action('subordinate', xappy.FieldActions.INDEX_EXACT)
22
 
    indexer.add_field_action('provides', xappy.FieldActions.INDEX_EXACT)
23
 
    indexer.add_field_action('requires', xappy.FieldActions.INDEX_EXACT)
24
 
 
25
 
    # Store content
26
 
    indexer.add_field_action('name', xappy.FieldActions.STORE_CONTENT)
27
 
    indexer.add_field_action('store_url', xappy.FieldActions.STORE_CONTENT)
28
 
    indexer.add_field_action('summary', xappy.FieldActions.STORE_CONTENT)
29
 
    indexer.add_field_action('short_url', xappy.FieldActions.STORE_CONTENT)
30
 
    indexer.add_field_action('label', xappy.FieldActions.STORE_CONTENT)
31
 
    indexer.add_field_action('series', xappy.FieldActions.STORE_CONTENT)
32
 
    indexer.add_field_action('owner', xappy.FieldActions.STORE_CONTENT)
33
 
    indexer.add_field_action('subordinate', xappy.FieldActions.STORE_CONTENT)
34
 
 
35
 
    # Full text search fields
36
 
    indexer.add_field_action(
37
 
        'name', xappy.FieldActions.INDEX_FREETEXT, weight=10,
38
 
        language='en')
39
 
 
40
 
    indexer.add_field_action(
41
 
        'summary', xappy.FieldActions.INDEX_FREETEXT, weight=5,
42
 
        language='en')
43
 
 
44
 
    indexer.add_field_action(
45
 
        'description', xappy.FieldActions.INDEX_FREETEXT, weight=3,
46
 
        language='en')
47
 
 
48
 
    indexer.add_field_action(
49
 
        'config', xappy.FieldActions.INDEX_FREETEXT, language='en')
50
 
 
51
 
    indexer.add_field_action(
52
 
        'relations', xappy.FieldActions.INDEX_FREETEXT, language='en')
53
 
 
54
 
    indexer.add_field_action(
55
 
        'changes', xappy.FieldActions.INDEX_FREETEXT, language='en')
56
 
 
57
 
 
58
 
def index_charm(indexer, charm):
59
 
    doc = xappy.UnprocessedDocument()
60
 
 
61
 
    # Weight critical fields higher for official charms.
62
 
    if charm['owner'] == 'charmers':
63
 
        weight = 10
64
 
    else:
65
 
        weight = 1
66
 
    doc.fields.append(xappy.Field(
67
 
        "name", charm["name"], weight=weight))
68
 
    doc.fields.append(xappy.Field(
69
 
        "summary", charm["summary"], weight=weight))
70
 
    doc.fields.append(xappy.Field(
71
 
        "description", charm["description"], weight=weight))
72
 
 
73
 
    doc.fields.append(xappy.Field("owner", charm["owner"]))
74
 
    doc.fields.append(xappy.Field("short_url", charm["short_url"]))
75
 
    doc.fields.append(xappy.Field("label", charm["label"]))
76
 
    doc.fields.append(xappy.Field("series", charm["series"]))
77
 
 
78
 
    if charm.get('subordinate'):
79
 
        doc.fields.append(xappy.Field('subordinate', 'true'))
80
 
 
81
 
    if "config" in charm \
82
 
       and charm['config'] \
83
 
       and 'options' in charm['config'] \
84
 
       and charm['config']['options']:
85
 
        config_text = []
86
 
 
87
 
        for key, option in charm["config"]["options"].items():
88
 
            config_text.append(key)
89
 
            config_text.append(option.get("description", ""))
90
 
        doc.fields.append(xappy.Field("config", " ".join(config_text)))
91
 
 
92
 
    relation_text = []
93
 
 
94
 
    if "requires" in charm and charm["requires"]:
95
 
        for key, option in charm["requires"].items():
96
 
            relation_text.append(key)
97
 
            relation_text.append(option["interface"])
98
 
            doc.fields.append(xappy.Field('requires', option["interface"]))
99
 
    if "provides" in charm and charm["provides"]:
100
 
        for key, option in charm["provides"].items():
101
 
            if not isinstance(option, dict):
102
 
                log.warning("invalid charm provides %s", charm['branch_spec'])
103
 
                continue
104
 
            relation_text.append(key)
105
 
            relation_text.append(option["interface"])
106
 
            doc.fields.append(xappy.Field("provides", option["interface"]))
107
 
 
108
 
    if relation_text:
109
 
        doc.fields.append(xappy.Field("relations", " ".join(relation_text)))
110
 
 
111
 
    change_text = []
112
 
    for change in charm.get("changes", ()):
113
 
        change_text.append(change['message'])
114
 
        change_text.append(change['committer'])
115
 
 
116
 
    if change_text:
117
 
        doc.fields.append(xappy.Field("changes", " ".join(change_text)))
118
 
 
119
 
    if 'store_url' in charm:
120
 
        doc.fields.append(xappy.Field('store_url', charm['store_url']))
121
 
    else:
122
 
        log.warning("No store url found for %s", charm["_id"])
123
 
    doc.id = charm["_id"]
124
 
    indexer.replace(doc)
125
 
    indexer.flush()
126
 
 
127
 
 
128
 
def reindex(db, indexer):
129
 
    count = 0
130
 
    for charm in db.find():
131
 
        log.debug("Indexing %s", charm['branch_spec'])
132
 
        try:
133
 
            index_charm(indexer, charm)
134
 
        except:
135
 
            log.error("Indexing charm %s", charm['branch_spec'])
136
 
            raise
137
 
        count += 1
138
 
    log.info("Indexed %d Charms" % count)
139
 
 
140
 
 
141
 
def index_queue(db, indexer, index_queue):
142
 
    count = 0
143
 
    while 1:
144
 
        count += 1
145
 
        item = index_queue.next()
146
 
        if not item:
147
 
            return
148
 
        try:
149
 
            charm_data = item.payload
150
 
            charm = db.find_one({"_id": charm_data["branch_spec"]})
151
 
 
152
 
            log.debug("Indexing charm %s", charm_data["branch_spec"])
153
 
            if charm:
154
 
                index_charm(indexer, charm)
155
 
            else:
156
 
                log.info(
157
 
                    "Skipping unknown charm %s", charm_data["branch_spec"])
158
 
        except KeyboardInterrupt:
159
 
            raise
160
 
        except:
161
 
            log.exception("Error indexing %s", charm_data)
162
 
            continue
163
 
        finally:
164
 
            # Remove the job from the input queue.
165
 
            item.complete()
166
 
    log.info("Indexed %d Charms" % count)
167
 
 
168
 
 
169
 
def main():
 
18
 
 
19
if __name__ == '__main__':
 
20
    log = logging.getLogger("charm.index")
170
21
    logging.basicConfig(
171
22
        level=logging.WARNING,
172
23
        format="%(asctime)s: %(name)s@%(levelname)s: %(message)s")
180
31
    # Index queue
181
32
    in_queue = get_queues(INDEX_IN_QUEUE)
182
33
    index_queue(db, indexer, in_queue)
 
34
    # XXX j.c.sackett Reindexing should be its own job, outside of the ingest
 
35
    # queue.
183
36
    #reindex(db, indexer)
184
37
 
185
38
    indexer.close()
186
39
    log.info("Indexed Repo")
187
 
 
188
 
if __name__ == '__main__':
189
 
    main()