1
# Copyright (c) 2012 Mitch Garnaat http://garnaat.org/
2
# Copyright (c) 2012 Amazon.com, Inc. or its affiliates.
5
# Permission is hereby granted, free of charge, to any person obtaining a
6
# copy of this software and associated documentation files (the
7
# "Software"), to deal in the Software without restriction, including
8
# without limitation the rights to use, copy, modify, merge, publish, dis-
9
# tribute, sublicense, and/or sell copies of the Software, and to permit
10
# persons to whom the Software is furnished to do so, subject to the fol-
13
# The above copyright notice and this permission notice shall be included
14
# in all copies or substantial portions of the Software.
16
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17
# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL-
18
# ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
19
# SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
20
# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
27
from boto.compat import json
31
class SearchServiceException(Exception):
35
class CommitMismatchError(Exception):
39
class SearchResults(object):
41
def __init__(self, **attrs):
42
self.rid = attrs['info']['rid']
43
# self.doc_coverage_pct = attrs['info']['doc-coverage-pct']
44
self.cpu_time_ms = attrs['info']['cpu-time-ms']
45
self.time_ms = attrs['info']['time-ms']
46
self.hits = attrs['hits']['found']
47
self.docs = attrs['hits']['hit']
48
self.start = attrs['hits']['start']
49
self.rank = attrs['rank']
50
self.match_expression = attrs['match-expr']
51
self.query = attrs['query']
52
self.search_service = attrs['search_service']
56
for (facet, values) in attrs['facets'].iteritems():
57
self.facets[facet] = dict((k, v) for (k, v) in map(lambda x: (x['value'], x['count']), values['constraints']))
59
self.num_pages_needed = ceil(self.hits / self.query.real_size)
65
return iter(self.docs)
68
"""Call Cloudsearch to get the next page of search results
70
:rtype: :class:`boto.cloudsearch.search.SearchResults`
71
:return: the following page of search results
73
if self.query.page <= self.num_pages_needed:
74
self.query.start += self.query.real_size
76
return self.search_service(self.query)
83
RESULTS_PER_PAGE = 500
85
def __init__(self, q=None, bq=None, rank=None,
86
return_fields=None, size=10,
87
start=0, facet=None, facet_constraints=None,
88
facet_sort=None, facet_top_n=None, t=None):
92
self.rank = rank or []
93
self.return_fields = return_fields or []
95
self.facet = facet or []
96
self.facet_constraints = facet_constraints or {}
97
self.facet_sort = facet_sort or {}
98
self.facet_top_n = facet_top_n or {}
101
self.update_size(size)
103
def update_size(self, new_size):
105
self.real_size = Query.RESULTS_PER_PAGE if (self.size >
106
Query.RESULTS_PER_PAGE or self.size == 0) else self.size
109
"""Transform search parameters from instance properties to a dictionary
112
:return: search parameters
114
params = {'start': self.start, 'size': self.real_size}
120
params['bq'] = self.bq
123
params['rank'] = ','.join(self.rank)
125
if self.return_fields:
126
params['return-fields'] = ','.join(self.return_fields)
129
params['facet'] = ','.join(self.facet)
131
if self.facet_constraints:
132
for k, v in self.facet_constraints.iteritems():
133
params['facet-%s-constraints' % k] = v
136
for k, v in self.facet_sort.iteritems():
137
params['facet-%s-sort' % k] = v
140
for k, v in self.facet_top_n.iteritems():
141
params['facet-%s-top-n' % k] = v
144
for k, v in self.t.iteritems():
145
params['t-%s' % k] = v
149
class SearchConnection(object):
151
def __init__(self, domain=None, endpoint=None):
153
self.endpoint = endpoint
155
self.endpoint = domain.search_service_endpoint
157
def build_query(self, q=None, bq=None, rank=None, return_fields=None,
158
size=10, start=0, facet=None, facet_constraints=None,
159
facet_sort=None, facet_top_n=None, t=None):
160
return Query(q=q, bq=bq, rank=rank, return_fields=return_fields,
161
size=size, start=start, facet=facet,
162
facet_constraints=facet_constraints,
163
facet_sort=facet_sort, facet_top_n=facet_top_n, t=t)
165
def search(self, q=None, bq=None, rank=None, return_fields=None,
166
size=10, start=0, facet=None, facet_constraints=None,
167
facet_sort=None, facet_top_n=None, t=None):
169
Send a query to CloudSearch
171
Each search query should use at least the q or bq argument to specify
172
the search parameter. The other options are used to specify the
173
criteria of the search.
176
:param q: A string to search the default search fields for.
179
:param bq: A string to perform a Boolean search. This can be used to
180
create advanced searches.
182
:type rank: List of strings
183
:param rank: A list of fields or rank expressions used to order the
184
search results. A field can be reversed by using the - operator.
185
``['-year', 'author']``
187
:type return_fields: List of strings
188
:param return_fields: A list of fields which should be returned by the
189
search. If this field is not specified, only IDs will be returned.
193
:param size: Number of search results to specify
196
:param start: Offset of the first search result to return (can be used
200
:param facet: List of fields for which facets should be returned
201
``['colour', 'size']``
203
:type facet_constraints: dict
204
:param facet_constraints: Use to limit facets to specific values
205
specified as comma-delimited strings in a Dictionary of facets
206
``{'colour': "'blue','white','red'", 'size': "big"}``
208
:type facet_sort: dict
209
:param facet_sort: Rules used to specify the order in which facet
210
values should be returned. Allowed values are *alpha*, *count*,
211
*max*, *sum*. Use *alpha* to sort alphabetical, and *count* to sort
212
the facet by number of available result.
213
``{'color': 'alpha', 'size': 'count'}``
215
:type facet_top_n: dict
216
:param facet_top_n: Dictionary of facets and number of facets to
221
:param t: Specify ranges for specific fields
222
``{'year': '2000..2005'}``
224
:rtype: :class:`boto.cloudsearch.search.SearchResults`
225
:return: Returns the results of this search
227
The following examples all assume we have indexed a set of documents
228
with fields: *author*, *date*, *headline*
230
A simple search will look for documents whose default text search
231
fields will contain the search word exactly:
233
>>> search(q='Tim') # Return documents with the word Tim in them (but not Timothy)
235
A simple search with more keywords will return documents whose default
236
text search fields contain the search strings together or separately.
238
>>> search(q='Tim apple') # Will match "tim" and "apple"
240
More complex searches require the boolean search operator.
242
Wildcard searches can be used to search for any words that start with
245
>>> search(bq="'Tim*'") # Return documents with words like Tim or Timothy)
247
Search terms can also be combined. Allowed operators are "and", "or",
248
"not", "field", "optional", "token", "phrase", or "filter"
250
>>> search(bq="(and 'Tim' (field author 'John Smith'))")
252
Facets allow you to show classification information about the search
253
results. For example, you can retrieve the authors who have written
256
>>> search(q='Tim', facet=['Author'])
258
With facet_constraints, facet_top_n and facet_sort more complicated
259
constraints can be specified such as returning the top author out of
260
John Smith and Mark Smith who have a document with the word Tim in it.
263
... facet=['Author'],
264
... facet_constraints={'author': "'John Smith','Mark Smith'"},
265
... facet=['author'],
266
... facet_top_n={'author': 1},
267
... facet_sort={'author': 'count'})
270
query = self.build_query(q=q, bq=bq, rank=rank,
271
return_fields=return_fields,
272
size=size, start=start, facet=facet,
273
facet_constraints=facet_constraints,
274
facet_sort=facet_sort,
275
facet_top_n=facet_top_n, t=t)
278
def __call__(self, query):
279
"""Make a call to CloudSearch
281
:type query: :class:`boto.cloudsearch.search.Query`
282
:param query: A group of search criteria
284
:rtype: :class:`boto.cloudsearch.search.SearchResults`
285
:return: search results
287
url = "http://%s/2011-02-01/search" % (self.endpoint)
288
params = query.to_params()
290
r = requests.get(url, params=params)
291
data = json.loads(r.content)
292
data['query'] = query
293
data['search_service'] = self
295
if 'messages' in data and 'error' in data:
296
for m in data['messages']:
297
if m['severity'] == 'fatal':
298
raise SearchServiceException("Error processing search %s "
299
"=> %s" % (params, m['message']), query)
300
elif 'error' in data:
301
raise SearchServiceException("Unknown error processing search %s"
304
return SearchResults(**data)
306
def get_all_paged(self, query, per_page):
307
"""Get a generator to iterate over all pages of search results
309
:type query: :class:`boto.cloudsearch.search.Query`
310
:param query: A group of search criteria
313
:param per_page: Number of docs in each :class:`boto.cloudsearch.search.SearchResults` object.
316
:return: Generator containing :class:`boto.cloudsearch.search.SearchResults`
318
query.update_size(per_page)
321
while page <= num_pages_needed:
322
results = self(query)
323
num_pages_needed = results.num_pages_needed
325
query.start += query.real_size
328
def get_all_hits(self, query):
329
"""Get a generator to iterate over all search results
331
Transparently handles the results paging from Cloudsearch
332
search results so even if you have many thousands of results
333
you can iterate over all results in a reasonably efficient
336
:type query: :class:`boto.cloudsearch.search.Query`
337
:param query: A group of search criteria
340
:return: All docs matching query
344
while page <= num_pages_needed:
345
results = self(query)
346
num_pages_needed = results.num_pages_needed
349
query.start += query.real_size
352
def get_num_hits(self, query):
353
"""Return the total number of hits for query
355
:type query: :class:`boto.cloudsearch.search.Query`
356
:param query: a group of search criteria
359
:return: Total number of hits for query
362
return self(query).hits