1
# Copyright (c) 2006,2007 Mitch Garnaat http://garnaat.org/
3
# Permission is hereby granted, free of charge, to any person obtaining a
4
# copy of this software and associated documentation files (the
5
# "Software"), to deal in the Software without restriction, including
6
# without limitation the rights to use, copy, modify, merge, publish, dis-
7
# tribute, sublicense, and/or sell copies of the Software, and to permit
8
# persons to whom the Software is furnished to do so, subject to the fol-
11
# The above copyright notice and this permission notice shall be included
12
# in all copies or substantial portions of the Software.
14
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15
# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL-
16
# ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
17
# SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18
# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25
from boto import handler
26
from boto.mturk.price import Price
27
import boto.mturk.notification
28
from boto.connection import AWSQueryConnection
29
from boto.exception import EC2ResponseError
30
from boto.resultset import ResultSet
32
class MTurkConnection(AWSQueryConnection):
34
APIVersion = '2008-08-02'
35
SignatureVersion = '1'
37
def __init__(self, aws_access_key_id=None, aws_secret_access_key=None,
38
is_secure=False, port=None, proxy=None, proxy_port=None,
39
proxy_user=None, proxy_pass=None, host='mechanicalturk.amazonaws.com', debug=0,
40
https_connection_factory=None):
41
AWSQueryConnection.__init__(self, aws_access_key_id, aws_secret_access_key,
42
is_secure, port, proxy, proxy_port, proxy_user, proxy_pass,
43
host, debug, https_connection_factory)
45
def get_account_balance(self):
49
return self._process_request('GetAccountBalance', params, [('AvailableBalance', Price),
50
('OnHoldBalance', Price)])
52
def register_hit_type(self, title, description, reward, duration,
53
keywords=None, approval_delay=None, qual_req=None):
55
Register a new HIT Type
56
\ttitle, description are strings
57
\treward is a Price object
58
\tduration can be an integer or string
60
params = {'Title' : title,
61
'Description' : description,
62
'AssignmentDurationInSeconds' : duration}
63
params.update(MTurkConnection.get_price_as_price(reward).get_as_params('Reward'))
66
params['Keywords'] = keywords
68
if approval_delay is not None:
69
params['AutoApprovalDelayInSeconds']= approval_delay
71
return self._process_request('RegisterHITType', params)
73
def set_email_notification(self, hit_type, email, event_types=None):
75
Performs a SetHITTypeNotification operation to set email notification for a specified HIT type
77
return self._set_notification(hit_type, 'Email', email, event_types)
79
def set_rest_notification(self, hit_type, url, event_types=None):
81
Performs a SetHITTypeNotification operation to set REST notification for a specified HIT type
83
return self._set_notification(hit_type, 'REST', url, event_types)
85
def _set_notification(self, hit_type, transport, destination, event_types=None):
87
Common SetHITTypeNotification operation to set notification for a specified HIT type
89
assert type(hit_type) is str, "hit_type argument should be a string."
91
params = {'HITTypeId': hit_type}
93
# from the Developer Guide:
94
# The 'Active' parameter is optional. If omitted, the active status of the HIT type's
95
# notification specification is unchanged. All HIT types begin with their
96
# notification specifications in the "inactive" status.
97
notification_params = {'Destination': destination,
98
'Transport': transport,
99
'Version': boto.mturk.notification.NotificationMessage.NOTIFICATION_VERSION,
103
# add specific event types if required
105
self.build_list_params(notification_params, event_types, 'EventType')
107
# Set up dict of 'Notification.1.Transport' etc. values
108
notification_rest_params = {}
110
for key in notification_params:
111
notification_rest_params['Notification.%d.%s' % (num, key)] = notification_params[key]
113
# Update main params dict
114
params.update(notification_rest_params)
117
return self._process_request('SetHITTypeNotification', params)
119
def create_hit(self, hit_type=None, question=None, lifetime=60*60*24*7, max_assignments=1,
120
title=None, description=None, keywords=None, reward=None,
121
duration=60*60*24*7, approval_delay=None, annotation=None, qual_req=None,
122
questions=None, qualifications=None, response_groups=None):
126
See: http://docs.amazonwebservices.com/AWSMechanicalTurkRequester/2006-10-31/ApiReference_CreateHITOperation.html
129
# handle single or multiple questions
130
if question is not None and questions is not None:
131
raise ValueError("Must specify either question (single Question instance) or questions (list), but not both")
132
if question is not None and questions is None:
133
questions = [question]
136
# Handle basic required arguments and set up params dict
137
params = {'Question': question.get_as_xml(),
138
'LifetimeInSeconds' : lifetime,
139
'MaxAssignments' : max_assignments,
142
# if hit type specified then add it
143
# else add the additional required parameters
145
params['HITTypeId'] = hit_type
148
final_keywords = MTurkConnection.get_keywords_as_string(keywords)
150
# Handle price argument
151
final_price = MTurkConnection.get_price_as_price(reward)
153
additional_params = {'Title': title,
154
'Description' : description,
155
'Keywords': final_keywords,
156
'AssignmentDurationInSeconds' : duration,
158
additional_params.update(final_price.get_as_params('Reward'))
160
if approval_delay is not None:
161
additional_params['AutoApprovalDelayInSeconds'] = approval_delay
163
# add these params to the others
164
params.update(additional_params)
166
# add the annotation if specified
167
if annotation is not None:
168
params['RequesterAnnotation'] = annotation
170
# Add the Qualifications if specified
171
if qualifications is not None:
172
params.update(qualifications.get_as_params())
174
# Handle optional response groups argument
176
self.build_list_params(params, response_groups, 'ResponseGroup')
179
return self._process_request('CreateHIT', params, [('HIT', HIT),])
181
def get_reviewable_hits(self, hit_type=None, status='Reviewable',
182
sort_by='Expiration', sort_direction='Ascending',
183
page_size=10, page_number=1):
185
Retrieve the HITs that have a status of Reviewable, or HITs that
186
have a status of Reviewing, and that belong to the Requester calling the operation.
188
params = {'Status' : status,
189
'SortProperty' : sort_by,
190
'SortDirection' : sort_direction,
191
'PageSize' : page_size,
192
'PageNumber' : page_number}
194
# Handle optional hit_type argument
195
if hit_type is not None:
196
params.update({'HITTypeId': hit_type})
198
return self._process_request('GetReviewableHITs', params, [('HIT', HIT),])
200
def search_hits(self, sort_by='CreationTime', sort_direction='Ascending',
201
page_size=10, page_number=1):
203
Return all of a Requester's HITs, on behalf of the Requester.
204
The operation returns HITs of any status, except for HITs that have been disposed
205
with the DisposeHIT operation.
207
The SearchHITs operation does not accept any search parameters that filter the results.
209
params = {'SortProperty' : sort_by,
210
'SortDirection' : sort_direction,
211
'PageSize' : page_size,
212
'PageNumber' : page_number}
214
return self._process_request('SearchHITs', params, [('HIT', HIT),])
216
def get_assignments(self, hit_id, status=None,
217
sort_by='SubmitTime', sort_direction='Ascending',
218
page_size=10, page_number=1):
220
Retrieves completed assignments for a HIT.
221
Use this operation to retrieve the results for a HIT.
223
The returned ResultSet will have the following attributes:
226
The number of assignments on the page in the filtered results list,
227
equivalent to the number of assignments being returned by this call.
228
A non-negative integer
230
The number of the page in the filtered results list being returned.
233
The total number of HITs in the filtered results list based on this call.
234
A non-negative integer
236
The ResultSet will contain zero or more Assignment objects
239
params = {'HITId' : hit_id,
240
'SortProperty' : sort_by,
241
'SortDirection' : sort_direction,
242
'PageSize' : page_size,
243
'PageNumber' : page_number}
245
if status is not None:
246
params['AssignmentStatus'] = status
248
return self._process_request('GetAssignmentsForHIT', params, [('Assignment', Assignment),])
250
def approve_assignment(self, assignment_id, feedback=None):
253
params = {'AssignmentId' : assignment_id,}
255
params['RequesterFeedback'] = feedback
256
return self._process_request('ApproveAssignment', params)
258
def reject_assignment(self, assignment_id, feedback=None):
261
params = {'AssignmentId' : assignment_id,}
263
params['RequesterFeedback'] = feedback
264
return self._process_request('RejectAssignment', params)
266
def get_hit(self, hit_id):
269
params = {'HITId' : hit_id,}
270
return self._process_request('GetHIT', params, [('HIT', HIT),])
272
def set_reviewing(self, hit_id, revert=None):
274
Update a HIT with a status of Reviewable to have a status of Reviewing,
275
or reverts a Reviewing HIT back to the Reviewable status.
277
Only HITs with a status of Reviewable can be updated with a status of Reviewing.
278
Similarly, only Reviewing HITs can be reverted back to a status of Reviewable.
280
params = {'HITId' : hit_id,}
282
params['Revert'] = revert
283
return self._process_request('SetHITAsReviewing', params)
285
def disable_hit(self, hit_id):
287
Remove a HIT from the Mechanical Turk marketplace, approves all submitted assignments
288
that have not already been approved or rejected, and disposes of the HIT and all
291
Assignments for the HIT that have already been submitted, but not yet approved or rejected, will be
292
automatically approved. Assignments in progress at the time of the call to DisableHIT will be
293
approved once the assignments are submitted. You will be charged for approval of these assignments.
294
DisableHIT completely disposes of the HIT and all submitted assignment data. Assignment results
295
data cannot be retrieved for a HIT that has been disposed.
297
It is not possible to re-enable a HIT once it has been disabled. To make the work from a disabled HIT
298
available again, create a new HIT.
300
params = {'HITId' : hit_id,}
301
return self._process_request('DisableHIT', params)
303
def dispose_hit(self, hit_id):
305
Dispose of a HIT that is no longer needed.
307
Only HITs in the "reviewable" state, with all submitted assignments approved or rejected,
308
can be disposed. A Requester can call GetReviewableHITs to determine which HITs are
309
reviewable, then call GetAssignmentsForHIT to retrieve the assignments.
310
Disposing of a HIT removes the HIT from the results of a call to GetReviewableHITs.
312
params = {'HITId' : hit_id,}
313
return self._process_request('DisposeHIT', params)
315
def expire_hit(self, hit_id):
318
Expire a HIT that is no longer needed.
320
The effect is identical to the HIT expiring on its own. The HIT no longer appears on the
321
Mechanical Turk web site, and no new Workers are allowed to accept the HIT. Workers who
322
have accepted the HIT prior to expiration are allowed to complete it or return it, or
323
allow the assignment duration to elapse (abandon the HIT). Once all remaining assignments
324
have been submitted, the expired HIT becomes "reviewable", and will be returned by a call
325
to GetReviewableHITs.
327
params = {'HITId' : hit_id,}
328
return self._process_request('ForceExpireHIT', params)
330
def extend_hit(self, hit_id, assignments_increment=None, expiration_increment=None):
332
Increase the maximum number of assignments, or extend the expiration date, of an existing HIT.
334
NOTE: If a HIT has a status of Reviewable and the HIT is extended to make it Available, the
335
HIT will not be returned by GetReviewableHITs, and its submitted assignments will not
336
be returned by GetAssignmentsForHIT, until the HIT is Reviewable again.
337
Assignment auto-approval will still happen on its original schedule, even if the HIT has
338
been extended. Be sure to retrieve and approve (or reject) submitted assignments before
339
extending the HIT, if so desired.
341
# must provide assignment *or* expiration increment
342
if (assignments_increment is None and expiration_increment is None) or \
343
(assignments_increment is not None and expiration_increment is not None):
344
raise ValueError("Must specify either assignments_increment or expiration_increment, but not both")
346
params = {'HITId' : hit_id,}
347
if assignments_increment:
348
params['MaxAssignmentsIncrement'] = assignments_increment
349
if expiration_increment:
350
params['ExpirationIncrementInSeconds'] = expiration_increment
352
return self._process_request('ExtendHIT', params)
354
def get_help(self, about, help_type='Operation'):
356
Return information about the Mechanical Turk Service operations and response group
357
NOTE - this is basically useless as it just returns the URL of the documentation
359
help_type: either 'Operation' or 'ResponseGroup'
361
params = {'About': about, 'HelpType': help_type,}
362
return self._process_request('Help', params)
364
def grant_bonus(self, worker_id, assignment_id, bonus_price, reason):
366
Issues a payment of money from your account to a Worker.
367
To be eligible for a bonus, the Worker must have submitted results for one of your
368
HITs, and have had those results approved or rejected. This payment happens separately
369
from the reward you pay to the Worker when you approve the Worker's assignment.
370
The Bonus must be passed in as an instance of the Price object.
372
params = bonus_price.get_as_params('BonusAmount', 1)
373
params['WorkerId'] = worker_id
374
params['AssignmentId'] = assignment_id
375
params['Reason'] = reason
377
return self._process_request('GrantBonus', params)
379
def _process_request(self, request_type, params, marker_elems=None):
381
Helper to process the xml response from AWS
383
response = self.make_request(request_type, params)
384
return self._process_response(response, marker_elems)
386
def _process_response(self, response, marker_elems=None):
388
Helper to process the xml response from AWS
390
body = response.read()
392
if '<Errors>' not in body:
393
rs = ResultSet(marker_elems)
394
h = handler.XmlHandler(rs, self)
395
xml.sax.parseString(body, h)
398
raise EC2ResponseError(response.status, response.reason, body)
401
def get_keywords_as_string(keywords):
403
Returns a comma+space-separated string of keywords from either a list or a string
405
if type(keywords) is list:
406
final_keywords = ', '.join(keywords)
407
elif type(keywords) is str:
408
final_keywords = keywords
409
elif type(keywords) is unicode:
410
final_keywords = keywords.encode('utf-8')
411
elif keywords is None:
414
raise TypeError("keywords argument must be a string or a list of strings; got a %s" % type(keywords))
415
return final_keywords
418
def get_price_as_price(reward):
420
Returns a Price data structure from either a float or a Price
422
if isinstance(reward, Price):
425
final_price = Price(reward)
428
class BaseAutoResultElement:
430
Base class to automatically add attributes when parsing XML
432
def __init__(self, connection):
433
self.connection = connection
435
def startElement(self, name, attrs, connection):
438
def endElement(self, name, value, connection):
439
setattr(self, name, value)
441
class HIT(BaseAutoResultElement):
443
Class to extract a HIT structure from a response (used in ResultSet)
445
Will have attributes named as per the Developer Guide,
446
e.g. HITId, HITTypeId, CreationTime
449
# property helper to determine if HIT has expired
450
def _has_expired(self):
451
""" Has this HIT expired yet? """
453
if hasattr(self, 'Expiration'):
454
now = datetime.datetime.utcnow()
455
expiration = datetime.datetime.strptime(self.Expiration, '%Y-%m-%dT%H:%M:%SZ')
456
expired = (now >= expiration)
458
raise ValueError("ERROR: Request for expired property, but no Expiration in HIT!")
462
expired = property(_has_expired)
464
class Assignment(BaseAutoResultElement):
466
Class to extract an Assignment structure from a response (used in ResultSet)
468
Will have attributes named as per the Developer Guide,
469
e.g. AssignmentId, WorkerId, HITId, Answer, etc
472
def __init__(self, connection):
473
BaseAutoResultElement.__init__(self, connection)
476
def endElement(self, name, value, connection):
477
# the answer consists of embedded XML, so it needs to be parsed independantly
479
answer_rs = ResultSet([('Answer', QuestionFormAnswer),])
480
h = handler.XmlHandler(answer_rs, connection)
481
value = self.connection.get_utf8_value(value)
482
xml.sax.parseString(value, h)
483
self.answers.append(answer_rs)
485
BaseAutoResultElement.endElement(self, name, value, connection)
487
class QuestionFormAnswer(BaseAutoResultElement):
489
Class to extract Answers from inside the embedded XML QuestionFormAnswers element inside the
490
Answer element which is part of the Assignment structure
492
A QuestionFormAnswers element contains an Answer element for each question in the HIT or
493
Qualification test for which the Worker provided an answer. Each Answer contains a
494
QuestionIdentifier element whose value corresponds to the QuestionIdentifier of a
495
Question in the QuestionForm. See the QuestionForm data structure for more information about
496
questions and answer specifications.
498
If the question expects a free-text answer, the Answer element contains a FreeText element. This
499
element contains the Worker's answer
501
*NOTE* - currently really only supports free-text answers
504
def __init__(self, connection):
505
BaseAutoResultElement.__init__(self, connection)
509
def endElement(self, name, value, connection):
510
if name == 'QuestionIdentifier':
512
elif name == 'FreeText' and self.qid:
513
self.fields.append((self.qid,value))
514
elif name == 'Answer':