~jcsackett/charmworld/bac-tag-constraints

« back to all changes in this revision

Viewing changes to charmworld/search.py

  • Committer: Tarmac
  • Author(s): Reed O'Brien
  • Date: 2014-05-14 16:16:48 UTC
  • mfrom: (506.2.10 ngrams)
  • Revision ID: tarmac-20140514161648-ax50xhzkvot5vv4p
adds ngrams as multifield index to name. skips destructive test -- see Bug #1317567.

Approved by Juju Gui Bot, Richard Harding, j.c.sackett.

Show diffs side-by-side

added added

removed removed

Lines of Context:
32
32
]
33
33
charm_free_text_fields = {
34
34
    'name': 10,
 
35
    'ngrams': None,
35
36
    'summary': 5,
36
37
    'description': 3,
37
38
    'config.options.description': None,
45
46
]
46
47
bundle_free_text_fields = {
47
48
    'name': 10,
 
49
    'ngrams': None,
48
50
    'basket_name': 5,
49
51
    'description': 3,
50
52
    'title': None,
160
162
                "index": {
161
163
                    "number_of_shards": settings['es_shards'],
162
164
                    "number_of_replicas": settings['es_replicas'],
 
165
                    "analysis": {
 
166
                        "filter": {
 
167
                            "n3_20grams_filter": {
 
168
                                "type": "ngram",
 
169
                                "min_gram": 3,
 
170
                                "max_gram": 20
 
171
                            }
 
172
                        },
 
173
                        "analyzer": {
 
174
                            "n3_20grams": {
 
175
                                "type": "custom",
 
176
                                "tokenizer": "standard",
 
177
                                "filter": [
 
178
                                    "lowercase",
 
179
                                    "n3_20grams_filter"
 
180
                                ]
 
181
                            }
 
182
                        }
 
183
                    },
 
184
                    "mappings": {
 
185
                        "charm": {
 
186
                            "properties": {
 
187
                                "name": {
 
188
                                    "type": "multi_field",
 
189
                                    "fields": {
 
190
                                        "ngrams": {
 
191
                                            "type": "string",
 
192
                                            "analyzer": "n3_20grams"
 
193
                                        },
 
194
                                        "name": {
 
195
                                            "type": "string",
 
196
                                            "index": "not_analyzed"
 
197
                                        }
 
198
                                    }
 
199
                                }
 
200
                            }
 
201
 
 
202
                        },
 
203
                        "bundle": {
 
204
                            "properties": {
 
205
                                "name": {
 
206
                                    "type": "multi_field",
 
207
                                    "fields": {
 
208
                                        "ngrams": {
 
209
                                            "type": "string",
 
210
                                            "analyzer": "n3_20grams"
 
211
                                        },
 
212
                                        "name": {
 
213
                                            "type": "string",
 
214
                                            "index": "not_analyzed"
 
215
                                        }
 
216
                                    }
 
217
                                }
 
218
                            }
 
219
                        }
 
220
                    }
163
221
                }
164
222
            })
165
223
        # The ES server may need some time to actually create the index
221
279
        """
222
280
        charm_exact_index = [
223
281
            'categories',
224
 
            'name',
225
282
            'owner',
226
283
            'i_provides',
227
284
            'i_requires',
229
286
            'store_url',
230
287
            '_id',
231
288
        ]
 
289
 
232
290
        inner_charm_properties = dict(
233
291
            (name, {'type': 'string', 'index': 'not_analyzed'})
234
292
            for name in charm_exact_index)
235
293
 
236
 
        CHARM_TEXT_INDEX = (
 
294
        charm_n3_20gram_index = [
 
295
            'name'
 
296
        ]
 
297
 
 
298
        charm_text_index = (
237
299
            'summary',
238
300
            'description',
239
301
        )
 
302
 
240
303
        inner_charm_properties.update(dict(
241
 
            (name, {'type': 'string'}) for name in CHARM_TEXT_INDEX))
 
304
            (name, {'type': 'string'}) for name in charm_text_index))
 
305
 
 
306
        inner_charm_properties.update(
 
307
            dict((name, {
 
308
                "type": "multi_field",
 
309
                "fields": {
 
310
                    "ngrams": {
 
311
                        "type": "string",
 
312
                        "analyzer": "n3_20grams"
 
313
                    },
 
314
                    "name": {
 
315
                        "type": "string",
 
316
                        "index": "not_analyzed"
 
317
                    }
 
318
                }
 
319
            }) for name in charm_n3_20gram_index))
242
320
 
243
321
        CHARM_INTEGER_INDEX = (
244
322
            'downloads',
290
368
        }
291
369
 
292
370
        bundle_exact_index = [
293
 
            'name',
294
371
            'owner',
295
372
            'series',
296
373
            'title',
297
374
        ]
298
375
 
 
376
        bundle_n3_20gram_index = [
 
377
            "name",
 
378
        ]
 
379
 
299
380
        bundle_properties = {
300
381
            'data': {
301
382
                'properties':  # XXX The linter won't let me indent this well.
302
383
                dict((name, {'type': 'string', 'index': 'not_analyzed'})
303
 
                for name in bundle_exact_index)
 
384
                     for name in bundle_exact_index)
304
385
            }}
305
386
 
 
387
        bundle_properties['data']['properties'].update(
 
388
            dict((name, {
 
389
                "type": "multi_field",
 
390
                "fields": {
 
391
                    "ngrams": {
 
392
                        "type": "string",
 
393
                        "analyzer": "n3_20grams"
 
394
                    },
 
395
                    "name": {
 
396
                        "type": "string",
 
397
                        "index": "not_analyzed"
 
398
                    }
 
399
                }
 
400
            }) for name in bundle_n3_20gram_index))
 
401
 
306
402
        with translate_error():
307
403
            try:
308
404
                for (name, properties, dynamic) in (
407
503
                text = text[:-1]
408
504
 
409
505
            if autocomplete:
410
 
                return {'prefix': {'data.name': text}}
 
506
                return {'match': {'ngrams': text}}
411
507
 
412
508
        charm_fields = [field + ('' if boost is None else '^%d' % boost)
413
509
                        for field, boost in charm_free_text_fields.items()]
425
521
                fields.extend(other_fields)
426
522
                return {
427
523
                    'query_string': {
428
 
                    'query': text,
429
 
                    'fields': fields,
 
524
                        'query': text,
 
525
                        'fields': fields,
430
526
                    }}
431
527
 
432
528
            charm_dsl = {'filtered': {