~nchohan/+junk/mytools

« back to all changes in this revision

Viewing changes to sample_apps/httpmr/httpmr/driver.py

Committer: root
Date: 2010-11-03 07:43:57 UTC
Revision ID: root@appscale-image0-20101103074357-xea7ja3sor3x93oc

init

files added:

CHANGELOG

LICENSE

README

Rakefile

bin/appscale-add-keypair

bin/appscale-describe-instances

bin/appscale-remove-app

bin/appscale-reset-pwd

bin/appscale-run-instances

bin/appscale-terminate-instances

bin/appscale-upload-app

bin/dbtranstest

bin/dbtranstest/README

bin/dbtranstest/app.yaml

bin/dbtranstest/clientside

bin/dbtranstest/clientside/README

bin/dbtranstest/clientside/curllib.py

bin/dbtranstest/clientside/logs

bin/dbtranstest/clientside/master.py

bin/dbtranstest/clientside/mysqlnodes

bin/dbtranstest/clientside/newips

bin/dbtranstest/clientside/newips_direct

bin/dbtranstest/clientside/nohup.out

bin/dbtranstest/clientside/run_ten_slaves.sh

bin/dbtranstest/clientside/script

bin/dbtranstest/clientside/slave.py

bin/dbtranstest/clientside/slaves

bin/dbtranstest/clientside/targets

bin/dbtranstest/clientside/test_functions.py

bin/dbtranstest/clientside/tester.py

bin/dbtranstest/clientside/testingips_direct

bin/dbtranstest/dbtranstest.py

bin/dbtranstest/index.yaml

bin/dbtranstest2

bin/dbtranstest2/README

bin/dbtranstest2/app.yaml

bin/dbtranstest2/clientside

bin/dbtranstest2/clientside/README

bin/dbtranstest2/clientside/curllib.py

bin/dbtranstest2/clientside/logs

bin/dbtranstest2/clientside/master.py

bin/dbtranstest2/clientside/mysqlnodes

bin/dbtranstest2/clientside/newips

bin/dbtranstest2/clientside/newips_direct

bin/dbtranstest2/clientside/nohup.out

bin/dbtranstest2/clientside/run_ten_slaves.sh

bin/dbtranstest2/clientside/script

bin/dbtranstest2/clientside/slave.py

bin/dbtranstest2/clientside/slaves

bin/dbtranstest2/clientside/targets

bin/dbtranstest2/clientside/test_functions.py

bin/dbtranstest2/clientside/tester.py

bin/dbtranstest2/clientside/testingips_direct

bin/dbtranstest2/dbtranstest.py

bin/dbtranstest2/index.yaml

bin/ips-10.yaml

bin/ips-12.yaml

bin/ips-2-1.yaml

bin/ips-2.yaml

bin/ips-4.yaml

bin/ips-6.yaml

bin/ips-8.yaml

bin/notranstest

bin/notranstest/README

bin/notranstest/app.yaml

bin/notranstest/clientside

bin/notranstest/clientside/README

bin/notranstest/clientside/curllib.py

bin/notranstest/clientside/logs

bin/notranstest/clientside/master.py

bin/notranstest/clientside/mysqlnodes

bin/notranstest/clientside/newips

bin/notranstest/clientside/newips_direct

bin/notranstest/clientside/nohup.out

bin/notranstest/clientside/run_ten_slaves.sh

bin/notranstest/clientside/script

bin/notranstest/clientside/slave.py

bin/notranstest/clientside/slaves

bin/notranstest/clientside/targets

bin/notranstest/clientside/test_functions.py

bin/notranstest/clientside/tester.py

bin/notranstest/clientside/testingips_direct

bin/notranstest/dbtranstest.py

bin/notranstest/index.yaml

bin/run_script

debian

debian/appscale_build.sh

debian/appscale_install_functions.sh

debian/appscale_install_jaunty.sh

debian/appscale_install_karmic.sh

debian/appscale_install_lucid.sh

debian/changelog.jaunty

debian/changelog.karmic

debian/changelog.lucid

debian/compat

debian/control.jaunty

debian/control.karmic

debian/control.lucid

debian/copyright

debian/makedeb.sh

debian/package-list.awk

debian/postinst.jaunty

debian/postinst.karmic

debian/postinst.lucid

debian/rules

lib/app_controller_client.rb

lib/common_functions.rb

lib/encryption_helper.rb

lib/node_layout.rb

lib/parse_args.rb

lib/user_app_client.rb

lib/vm_tools.rb

package_app.rb

sample_apps

sample_apps/ModelUploader

sample_apps/ModelUploader/ModelUploader.py

sample_apps/ModelUploader/ModelUploader2.py

sample_apps/ModelUploader/ModelUploaderBlobstoreAPI

sample_apps/ModelUploader/ModelUploaderBlobstoreAPI.py

sample_apps/ModelUploader/README

sample_apps/ModelUploader/app.yaml

sample_apps/ModelUploader/cron.yaml

sample_apps/ModelUploader/css

sample_apps/ModelUploader/css/cmxform.css

sample_apps/ModelUploader/css/cmxformTemplate.css

sample_apps/ModelUploader/css/lavalamp_test.css

sample_apps/ModelUploader/img

sample_apps/ModelUploader/img/bg.png

sample_apps/ModelUploader/img/error.png

sample_apps/ModelUploader/img/logo.png

sample_apps/ModelUploader/img/success.png

sample_apps/ModelUploader/index.html

sample_apps/ModelUploader/index.yaml

sample_apps/ModelUploader/js

sample_apps/ModelUploader/js/jquery-1.2.3.min.js

sample_apps/ModelUploader/js/jquery-1.4.2.js

sample_apps/ModelUploader/js/jquery.easing.min.js

sample_apps/ModelUploader/js/jquery.form.js

sample_apps/ModelUploader/js/jquery.js

sample_apps/ModelUploader/js/jquery.lavalamp.js

sample_apps/ModelUploader/js/jquery.lavalamp.min.js

sample_apps/ModelUploader/js/jquery.metadata.js

sample_apps/admintest

sample_apps/admintest/admintest.py

sample_apps/admintest/app.yaml

sample_apps/ajaxchat

sample_apps/ajaxchat/app.yaml

sample_apps/ajaxchat/css

sample_apps/ajaxchat/css/main.css

sample_apps/ajaxchat/devchat.py

sample_apps/ajaxchat/index.yaml

sample_apps/ajaxchat/js

sample_apps/ajaxchat/js/util.js

sample_apps/ajaxchat/templates

sample_apps/ajaxchat/templates/chats.html

sample_apps/ajaxchat/templates/edit_user.html

sample_apps/ajaxchat/templates/index.html

sample_apps/ajaxchat/templates/user.html

sample_apps/badtrans

sample_apps/badtrans/app.yaml

sample_apps/badtrans/badtrans.py

sample_apps/badtrans/index.yaml

sample_apps/counter

sample_apps/counter/._app.yaml

sample_apps/counter/._counters.html

sample_apps/counter/._main.py

sample_apps/counter/app.yaml

sample_apps/counter/counters.html

sample_apps/counter/index.yaml

sample_apps/counter/main.py

sample_apps/cronbook

sample_apps/cronbook/app.yaml

sample_apps/cronbook/cron.yaml

sample_apps/cronbook/guestbook.py

sample_apps/cronbook/static

sample_apps/cronbook/static/images

sample_apps/cronbook/static/images/._favicon.ico

sample_apps/cronbook/static/images/favicon.ico

sample_apps/dbtest

sample_apps/dbtest/app.yaml

sample_apps/dbtest/dbtest.py

sample_apps/dbtest/index.yaml

sample_apps/dbtest/tests

sample_apps/dbtest/tests/test1.rb

sample_apps/dbtest/tests/write1.rb

sample_apps/dbtranstest

sample_apps/dbtranstest/app.yaml

sample_apps/dbtranstest/dbtranstest.py

sample_apps/dbtranstest/index.yaml

sample_apps/dbtranstest/tests

sample_apps/dbtranstest/tests/curllib.py

sample_apps/dbtranstest/tests/logs

sample_apps/dbtranstest/tests/logs/appscale01_batchdelete

sample_apps/dbtranstest/tests/logs/appscale01_batchget

sample_apps/dbtranstest/tests/logs/appscale01_batchput

sample_apps/dbtranstest/tests/logs/appscale01_query

sample_apps/dbtranstest/tests/logs/appscale02_batchdelete

sample_apps/dbtranstest/tests/logs/appscale02_batchget

sample_apps/dbtranstest/tests/logs/appscale02_batchincrement

sample_apps/dbtranstest/tests/logs/appscale02_batchput

sample_apps/dbtranstest/tests/logs/appscale02_query

sample_apps/dbtranstest/tests/logs/appscale03_batchdelete

sample_apps/dbtranstest/tests/logs/appscale03_batchget

sample_apps/dbtranstest/tests/logs/appscale03_batchput

sample_apps/dbtranstest/tests/logs/appscale03_query

sample_apps/dbtranstest/tests/logs/memcache_1rps_5trials_batchallowance

sample_apps/dbtranstest/tests/logs/memcache_1rps_5trials_batchget

sample_apps/dbtranstest/tests/logs/memcache_1rps_5trials_batchincrement

sample_apps/dbtranstest/tests/logs/memcache_1rps_5trials_batchput

sample_apps/dbtranstest/tests/logs/memcache_1rps_5trials_query

sample_apps/dbtranstest/tests/logs/test1_batchallowance

sample_apps/dbtranstest/tests/logs/test2_batchallowance

sample_apps/dbtranstest/tests/logs/test3_batchallowance

sample_apps/dbtranstest/tests/logs/test4_batchallowance

sample_apps/dbtranstest/tests/logs/test5_batchallowance

sample_apps/dbtranstest/tests/logs/test6_batchallowance

sample_apps/dbtranstest/tests/logs/test6_batchdelete

sample_apps/dbtranstest/tests/logs/test6_batchget

sample_apps/dbtranstest/tests/logs/test6_batchincrement

sample_apps/dbtranstest/tests/logs/test6_batchput

sample_apps/dbtranstest/tests/logs/test6_query

sample_apps/dbtranstest/tests/logs/test7_batchallowance

sample_apps/dbtranstest/tests/logs/test7_batchdelete

sample_apps/dbtranstest/tests/logs/test7_batchget

sample_apps/dbtranstest/tests/logs/test7_batchincrement

sample_apps/dbtranstest/tests/logs/test7_batchput

sample_apps/dbtranstest/tests/logs/test7_query

sample_apps/dbtranstest/tests/test_functions.py

sample_apps/dbtranstest/tests/tester.py

sample_apps/ec2demo

sample_apps/ec2demo/app.yaml

sample_apps/ec2demo/images

sample_apps/ec2demo/images/cloud.png

sample_apps/ec2demo/index.html

sample_apps/ec2demo/index.yaml

sample_apps/ec2demo/main.py

sample_apps/ec2demo/queue.yaml

sample_apps/ec2demo/stylesheets

sample_apps/ec2demo/stylesheets/all.css

sample_apps/email

sample_apps/email/__init__.py

sample_apps/email/app.yaml

sample_apps/email/index.yaml

sample_apps/email/main.py

sample_apps/email/manage.py

sample_apps/email/sendmail

sample_apps/email/sendmail/__init__.py

sample_apps/email/sendmail/views.py

sample_apps/email/settings.py

sample_apps/email/urls.py

sample_apps/guestbook

sample_apps/guestbook/app.yaml

sample_apps/guestbook/guestbook.py

sample_apps/guestbook/static

sample_apps/guestbook/static/images

sample_apps/guestbook/static/images/._favicon.ico

sample_apps/guestbook/static/images/favicon.ico

sample_apps/httpmr

sample_apps/httpmr/app.yaml

sample_apps/httpmr/construct_document_index.py

sample_apps/httpmr/construct_document_index.sh

sample_apps/httpmr/httpmr

sample_apps/httpmr/httpmr/__init__.py

sample_apps/httpmr/httpmr/appengine.py

sample_apps/httpmr/httpmr/base.py

sample_apps/httpmr/httpmr/driver.py

sample_apps/httpmr/httpmr/mappers.py

sample_apps/httpmr/httpmr/master.py

sample_apps/httpmr/httpmr/reducers.py

sample_apps/httpmr/httpmr/sinks.py

sample_apps/httpmr/httpmr/templates

sample_apps/httpmr/httpmr/templates/base.html

sample_apps/httpmr/httpmr/templates/cleanup.html

sample_apps/httpmr/httpmr/templates/cleanup_master.html

sample_apps/httpmr/httpmr/templates/map_master.html

sample_apps/httpmr/httpmr/templates/mapper.html

sample_apps/httpmr/httpmr/templates/reduce_master.html

sample_apps/httpmr/httpmr/templates/reducer.html

sample_apps/httpmr/index.html

sample_apps/httpmr/load_fake_documents.py

sample_apps/images-api-in-action

sample_apps/images-api-in-action/app.yaml

sample_apps/images-api-in-action/images

sample_apps/images-api-in-action/images/cloud.png

sample_apps/images-api-in-action/index.html

sample_apps/images-api-in-action/index.yaml

sample_apps/images-api-in-action/main.py

sample_apps/images-api-in-action/queue.yaml

sample_apps/images-api-in-action/stylesheets

sample_apps/images-api-in-action/stylesheets/all.css

sample_apps/javabook

sample_apps/javabook/src

sample_apps/javabook/src/META-INF

sample_apps/javabook/src/META-INF/jdoconfig.xml

sample_apps/javabook/src/guestbook

sample_apps/javabook/src/guestbook/DeleteServlet.java

sample_apps/javabook/src/guestbook/Greeting.java

sample_apps/javabook/src/guestbook/GuestbookServlet.java

sample_apps/javabook/src/guestbook/PMF.java

sample_apps/javabook/src/guestbook/SignGuestbookServlet.java

sample_apps/javabook/src/log4j.properties

sample_apps/javabook/war

sample_apps/javabook/war/WEB-INF

sample_apps/javabook/war/WEB-INF/appengine-generated

sample_apps/javabook/war/WEB-INF/appengine-generated/datastore-indexes-auto.xml

sample_apps/javabook/war/WEB-INF/appengine-generated/local_db.bin

sample_apps/javabook/war/WEB-INF/appengine-web.xml

sample_apps/javabook/war/WEB-INF/classes

sample_apps/javabook/war/WEB-INF/classes/META-INF

sample_apps/javabook/war/WEB-INF/classes/META-INF/jdoconfig.xml

sample_apps/javabook/war/WEB-INF/classes/guestbook

sample_apps/javabook/war/WEB-INF/classes/guestbook/DeleteServlet.class

sample_apps/javabook/war/WEB-INF/classes/guestbook/Greeting.class

sample_apps/javabook/war/WEB-INF/classes/guestbook/GuestbookServlet.class

sample_apps/javabook/war/WEB-INF/classes/guestbook/PMF.class

sample_apps/javabook/war/WEB-INF/classes/guestbook/SignGuestbookServlet.class

sample_apps/javabook/war/WEB-INF/classes/log4j.properties

sample_apps/javabook/war/WEB-INF/lib

sample_apps/javabook/war/WEB-INF/lib/appengine-api-1.0-sdk-1.2.6.jar

sample_apps/javabook/war/WEB-INF/lib/appengine-api-labs-1.2.6.jar

sample_apps/javabook/war/WEB-INF/lib/datanucleus-appengine-1.0.3.jar

sample_apps/javabook/war/WEB-INF/lib/datanucleus-core-1.1.5.jar

sample_apps/javabook/war/WEB-INF/lib/datanucleus-jpa-1.1.5.jar

sample_apps/javabook/war/WEB-INF/lib/geronimo-jpa_3.0_spec-1.1.1.jar

sample_apps/javabook/war/WEB-INF/lib/geronimo-jta_1.1_spec-1.1.1.jar

sample_apps/javabook/war/WEB-INF/lib/jdo2-api-2.3-eb.jar

sample_apps/javabook/war/WEB-INF/logging.properties

sample_apps/javabook/war/WEB-INF/web.xml

sample_apps/javabook/war/guestbook.jsp

sample_apps/javabook/war/stylesheets

sample_apps/javabook/war/stylesheets/main.css

sample_apps/mapreduce

sample_apps/mapreduce/app.yaml

sample_apps/mapreduce/gen_input.rb

sample_apps/mapreduce/index.yaml

sample_apps/mapreduce/main.py

sample_apps/mapreduce/map.rb

sample_apps/mapreduce/mapreduce.html

sample_apps/mapreduce/queue.yaml

sample_apps/mapreduce/reduce.rb

sample_apps/memcachebook

sample_apps/memcachebook/app.yaml

sample_apps/memcachebook/index.html

sample_apps/memcachebook/index.yaml

sample_apps/memcachebook/memcachebook.py

sample_apps/petlog

sample_apps/petlog/app.yaml

sample_apps/petlog/deletetest.py

sample_apps/putgetquerydelete

sample_apps/putgetquerydelete/app.yaml

sample_apps/putgetquerydelete/index.yaml

sample_apps/putgetquerydelete/putgetquerydelete.py

sample_apps/querytest

sample_apps/querytest/app.yaml

sample_apps/querytest/querytest.py

sample_apps/shell

sample_apps/shell/README

sample_apps/shell/app.yaml

sample_apps/shell/shell.py

sample_apps/shell/static

sample_apps/shell/static/shell.js

sample_apps/shell/static/spinner.gif

sample_apps/shell/templates

sample_apps/shell/templates/shell.html

sample_apps/sleep

sample_apps/sleep/app.yaml

sample_apps/sleep/sleep.py

sample_apps/tasks

sample_apps/tasks/app.yaml

sample_apps/tasks/index.yaml

sample_apps/tasks/static

sample_apps/tasks/static/css

sample_apps/tasks/static/css/base.css

sample_apps/tasks/static/favicon.ico

sample_apps/tasks/static/images

sample_apps/tasks/static/images/button-background.gif

sample_apps/tasks/static/images/logo.png

sample_apps/tasks/static/images/zip-plus.gif

sample_apps/tasks/static/javascript

sample_apps/tasks/static/javascript/debug

sample_apps/tasks/static/javascript/debug/browser.js

sample_apps/tasks/static/javascript/debug/dialog.js

sample_apps/tasks/static/javascript/debug/dom.js

sample_apps/tasks/static/javascript/debug/drag.js

sample_apps/tasks/static/javascript/debug/event.js

sample_apps/tasks/static/javascript/debug/externs_tasks.js

sample_apps/tasks/static/javascript/debug/io.js

sample_apps/tasks/static/javascript/debug/lang.js

sample_apps/tasks/static/javascript/debug/offscreen.js

sample_apps/tasks/static/javascript/debug/tasklist.js

sample_apps/tasks/static/javascript/tasks.js

sample_apps/tasks/tasks.py

sample_apps/tasks/templatefilters.py

sample_apps/tasks/templates

sample_apps/tasks/templates/base.html

sample_apps/tasks/templates/index.html

sample_apps/tasks/templates/tasklist_atom.xml

sample_apps/tasks/templates/tasklist_default.html

sample_apps/tasks/templates/tasklist_html.html

sample_apps/transtest

sample_apps/transtest/app.yaml

sample_apps/transtest/index.yaml

sample_apps/transtest/transtest.py

sample_apps/xmpptest

sample_apps/xmpptest/app.yaml

sample_apps/xmpptest/static

sample_apps/xmpptest/static/images

sample_apps/xmpptest/static/images/._favicon.ico

sample_apps/xmpptest/static/images/favicon.ico

sample_apps/xmpptest/xmpptest.py

templates

templates/ips.yaml

test

test/test_common_functions.rb

test/test_helper.rb

test/test_node_layout.rb

test/test_parse_args.rb

test_apps

test_apps/memcachetest.tar.gz

test_apps/querytest.tar.gz

Show diffs side-by-side

added added

removed removed

sample_apps/httpmr/httpmr/driver.py

#!/usr/bin/python

"""Simple multithreaded HTTP request driver for HTTPMR.

Command-line tool for driving HTTPMR operations. Spawns multiple threads for

concurrent shard operation, handles statistics collection and operation failure

retries.

Sample usage:

driver.py --httpmr_base=http://your.app.com/httpmr_base_url \

--max_operations_inflight=10 \

--max_per_operation_failures=10

"""

import HTMLParser

import logging

import optparse

import time

import sys

import threading

import urllib

import urllib2

import urlparse

MAP_MASTER_TASK_NAME = "map_master"

REDUCE_MASTER_TASK_NAME = "reduce_master"

INTERMEDIATE_DATA_CLEANUP_MASTER_TASK_NAME = "cleanup_master"

OPERATION_TIMEOUT_SEC = "operation_timeout"

MIN_OPERATION_TIMEOUT_SEC_VALUE = 0.5

INFINITE_PARAMETER_VALUE = -1

class Error(Exception):

"""Base class for all driver-specific Exceptions."""

class UncrecoverableOperationError(Error):

"""Base class for all fatal operation errors."""

class TooManyTriesError(UncrecoverableOperationError):

"""An operation has been tried too many times without success."""

class OperationResult(object):

"""Simple data object that holds the result of a map or reduce operation.

To use, set public instance parameters directly. Not meant to be used outside

the context of this module.

"""

def __init__(self):

self.url = None

self.next_url = None

self.errors = []

self.tries = 0

self.statistics = {}

def __str__(self):

return str({"url": self.url,

"next_url": self.next_url,

"errors": self.errors,

"tries": self.tries,

"statistics": self.statistics})

def ParseStatisticsString(self, statistics_string):

logging.debug("Parsing statistics from %s" % statistics_string)

self.statistics = {}

for line in statistics_string.splitlines():

tuple = line.split(" ")

if len(tuple) == 2:

key = tuple[0]

value = float(tuple[1])

self.statistics[key] = value

logging.debug("Got statistics: %s" % self.statistics)

class OperationResultHTMLParser(HTMLParser.HTMLParser):

"""HTMLParser that reads the HTML page from a Map or Reduce operation."""

def handle_starttag(self, tag, attrs):

if tag == "a":

self.handle_start_a_tag(attrs)

elif tag == "pre":

self.handle_start_pre_tag(attrs)

def handle_start_a_tag(self, attrs):

"""Determine the next operation's URL."""

self.url = None

for tuple in attrs:

if tuple[0] == "href":

self.url = tuple[1]

def handle_start_pre_tag(self, attrs):

"""Read the statistics information from the <pre> tag."""

self._in_pre_tag = True

def handle_data(self, data):

100

if hasattr(self, "_in_pre_tag") and self._in_pre_tag:

101

self.statistics = data

102

self._in_pre_tag = False

103

104

105

class MasterPageResultHTMLParser(HTMLParser.HTMLParser):

106

"""HTMLParser that reads the HTML page from a Master page."""

107

108

def Init(self):

109

self.urls = []

110

111

def handle_starttag(self, tag, attrs):

112

if tag == "a":

113

self.handle_start_a_tag(attrs)

114

115

def handle_start_a_tag(self, attrs):

116

"""Read the 'href' attribute from an 'a' tag, and add it to the list of URLs

117

118

The master page for any HTTPMR operation master page lists a set of <a>

119

tags, each representing the first operation of the relevant shard. Each of

120

these links should be retained and used to populate the initial set of

121

operation threads.

122

"""

123

logging.debug("Reading 'a' tag: %s" % attrs)

124

for tuple in attrs:

125

if tuple[0] == "href":

126

self.urls.append(tuple[1])

127

128

129

class OperationThread(threading.Thread):

130

"""An OperationThread handles the execution and retry of an HTTP request.

131

132

The OperationThread handles executing and retrying an HTTP request to a single

133

Map or Reduce operation. Once the thread has successfully completed its

134

operation (successfully fetched the url assigned via #SetUrl and parsed the

135

operation result page HTML), the callback set via #SetOperationCallback is

136

invoked. If there is an unrecoverable error (i.e., too many operation

137

failures), the callback set via #SetUnrecoverableErrorCallback is invoked.

138

"""

139

140

def SetOperationCallback(self, callback, **kwargs):

141

"""Set the callback that will be invoked when this operation is finished.

142

143

args:

144

callback: A callable that takes one parameter, the OperationResult

145

constructed by this thread when the operation has completed, and the

146

supplied keyword arguments.

147

kwargs: Keyword arguments that should be passed to the callback.

148

"""

149

self.operation_callback = callback

150

self.operation_callback_kwargs = kwargs

151

152

def SetUnrecoverableErrorCallback(self, callback, **kwargs):

153

"""Set the callback that will be invoked on unrecoverable errors.

154

155

args:

156

callback: A callable that takes the failed URL as its first argument, the

157

unrecoverable exception as its second, and the supplied keyword

158

arguments.

159

kwargs: The keyword arguments that should be supplied to the callback.

160

"""

161

self.error_callback = callback

162

self.error_callback_kwargs = kwargs

163

164

def SetMaxTries(self, max_tries):

165

"""Set the maximum number of tries that the operation can be performed.

166

167

If the operation is attempted unsuccessfully more than this number of times,

168

the operation is considered to fail and a TooManyTriesError is handed to the

169

unrecoverable error callback.

170

"""

171

self.max_tries = max_tries

172

173

def SetUrl(self, url):

174

"""Specify the URL that this operation should operate on."""

175

self.url = url

176

self._cancel = False

177

178

def run(self):

179

"""Fetch the URL, retry on failures, invoke error or operation callbacks."""

180

assert self.url is not None

181

assert self.operation_callback is not None

182

assert self.error_callback is not None

183

self.html = None

184

185

logging.info("Starting operation on %s." % self.url)

186

187

self.results = OperationResult()

188

self.results.url = self.url

189

190

try:

191

if self._cancel:

192

return

193

self.html = self._FetchWithRetries(self.url, self.max_tries)

194

logging.debug("Retrieved HTML %s" % self.html)

195

except UncrecoverableOperationError, e:

196

self.error_callback(self.url, e, **self.error_callback_kwargs)

197

self._PopulateResults()

198

if not self._cancel:

199

self.operation_callback(self.results, **self.operation_callback_kwargs)

200

201

def Cancel(self):

202

self._cancel = True

203

204

def _FetchWithRetries(self, url, max_tries):

205

tries = 0

206

while tries < max_tries or max_tries == INFINITE_PARAMETER_VALUE:

207

try:

208

tries += 1

209

self.results.tries = tries

210

return self._Fetch(url)

211

except urllib2.HTTPError, e:

212

logging.warning("HTTPError on fetch of %s: %s" % (url, str(e)))

213

url = self._ReduceOperationTimeout(url)

214

self.results.errors.append(e)

215

self._WaitForRetry(tries)

216

raise TooManyTriesError("Too many tries on URL %s" % url)

217

218

def _WaitForRetry(self, tries):

219

wait_time_sec = min(30 * tries, 600)

220

logging.info("Sleeping for %s seconds." % wait_time_sec)

221

time.sleep(wait_time_sec)

222

223

def _Fetch(self, url):

224

safe_url = self._GetSafeUrl(url)

225

logging.debug("Fetching %s" % safe_url)

226

f = urllib2.urlopen(safe_url)

227

contents = f.read()

228

f.close()

229

return contents

230

231

def _GetSafeUrl(self, url):

232

parts = urlparse.urlsplit(url)

233

safe_query = \

234

urllib.quote(parts.query).replace("%26", "&").replace("%3D", "=")

235

parts = (parts.scheme,

236

parts.netloc,

237

parts.path,

238

safe_query,

239

parts.fragment)

240

return urlparse.urlunsplit(parts)

241

242

def _ReduceOperationTimeout(self, url):

243

current_timeout = None

244

# TODO: Hand URL parameter parsing off to a library, here and elsewhere

245

params = url.split("?")[1]

246

for key_value in params.split("&"):

247

(key, value) = key_value.split("=", 2)

248

if key == OPERATION_TIMEOUT_SEC:

249

# At this point current_timeout is a string

250

current_timeout = value

251

if current_timeout is not None:

252

new_timeout = max(float(current_timeout) - 1,

253

MIN_OPERATION_TIMEOUT_SEC_VALUE)

254

return url.replace("%s=%s" % (OPERATION_TIMEOUT_SEC, current_timeout),

255

"%s=%s" % (OPERATION_TIMEOUT_SEC, new_timeout))

256

else:

257

logging.warning("Could not parse the operation timeout from URL '%s', "

258

"operation retry with original timeout value." % url)

259

return url

260

261

def _PopulateResults(self):

262

parser = OperationResultHTMLParser()

263

parser.feed(self.html)

264

parser.close()

265

266

self.results.next_url = None

267

if hasattr(parser, "url"):

268

self.results.next_url = parser.url

269

if hasattr(parser, "statistics"):

270

self.results.ParseStatisticsString(parser.statistics)

271

272

273

class HTTPMRDriver(object):

274

275

threads = []

276

threads_pending = []

277

278

def __init__(self,

279

httpmr_base,

280

max_operation_tries=-1,

281

max_operations_inflight=-1):

282

self.httpmr_base = httpmr_base

283

self.max_operation_tries = max_operation_tries

284

self.max_operations_inflight = max_operations_inflight

285

self.results = []

286

self.lock = threading.Lock()

287

288

def Run(self):

289

"""Begin the Driver's Map - Reduce - Cleanup phase.

290

291

It is important to use this method as the primary entry point, as it may

292

be utilized in the future to precompute optimal operation parameters in the

293

future.

294

"""

295

logging.info("Beginning HTTPMR Driver Run with base URL %s" %

296

self.httpmr_base)

297

self.Map()

298

299

def _HandleUnrecoverableOperationError(self, url, error):

300

logging.error("Unrecoverable error on url %s: %s; %s" %

301

(url, type(error), error))

302

for thread in HTTPMRDriver.threads:

303

thread.Cancel()

304

logging.info("Going to cleanup.")

305

self.Cleanup()

306

307

def Map(self):

308

self._LaunchPhase(MAP_MASTER_TASK_NAME, self._AllMapOperationsComplete)

309

310

def _AllMapOperationsComplete(self):

311

logging.info("Done Mapping!")

312

self.Reduce()

313

314

def Reduce(self):

315

self._LaunchPhase(REDUCE_MASTER_TASK_NAME,

316

self._AllReduceOperationsComplete)

317

318

def _AllReduceOperationsComplete(self):

319

logging.info("Done Reducing!")

320

self.Cleanup()

321

322

def Cleanup(self):

323

self._LaunchPhase(INTERMEDIATE_DATA_CLEANUP_MASTER_TASK_NAME,

324

self._AllCleanupOperationsComplete)

325

326

def _AllCleanupOperationsComplete(self):

327

logging.info("Done Cleaning Up!")

328

logging.debug("Results: %s" % self.results)

329

logging.info("Comprehensive Results: %s" % self._GetAggregateResults())

330

331

def _GetAggregateResults(self):

332

def AddDicts(a, b):

333

sum_dict = {}

334

for key in a:

335

if key in b:

336

sum_dict[key] = a[key] + b[key]

337

return sum_dict

338

return reduce(AddDicts, map(lambda result: result.statistics,

339

self.results))

340

341

def _LaunchPhase(self, phase_task_name, all_operations_complete_callback):

342

logging.info("Starting %s phase." % phase_task_name)

343

base_urls = self._GetInitialUrls(phase_task_name)

344

logging.debug("Initial URLs: %s" % ", ".join(base_urls))

345

self.threads_inflight = 0

346

for url in base_urls:

347

thread = self._CreateOperationThread(url,

348

all_operations_complete_callback)

349

HTTPMRDriver.threads.append(thread)

350

if (self.threads_inflight < self.max_operations_inflight or

351

self.max_operations_inflight == INFINITE_PARAMETER_VALUE):

352

self.threads_inflight += 1

353

thread.start()

354

else:

355

HTTPMRDriver.threads_pending.append(thread)

356

357

def _GetInitialUrls(self, task):

358

url = "%s?task=%s" % (self.httpmr_base, task)

359

html = urllib2.urlopen(url).read()

360

parser = MasterPageResultHTMLParser()

361

parser.Init()

362

parser.feed(html)

363

parser.close()

364

return parser.urls

365

366

def _CreateOperationThread(self, url, all_operations_complete_callback):

367

thread = OperationThread()

368

thread.SetUrl(url)

369

# TODO(peterdolan): Make the maximum operation tries configurable via a

370

# command-line parameter

371

thread.SetMaxTries(self.max_operation_tries)

372

thread.SetOperationCallback(

373

self._HandleThreadCompletion,

374

all_operations_complete_callback=all_operations_complete_callback)

375

thread.SetUnrecoverableErrorCallback(

376

self._HandleUnrecoverableOperationError)

377

return thread

378

379

def _HandleThreadCompletion(self, results, all_operations_complete_callback):

380

self.lock.acquire()

381

self.threads_inflight -= 1

382

383

self.results.append(results)

384

if results.next_url is not None:

385

logging.debug("Initializing new thread to handle %s" % results.next_url)

386

thread = self._CreateOperationThread(results.next_url,

387

all_operations_complete_callback)

388

HTTPMRDriver.threads_pending.insert(0, thread)

389

390

if HTTPMRDriver.threads_pending:

391

logging.debug("Starting the next pending thread.")

392

thread = self.threads_pending.pop()

393

self.threads_inflight += 1

394

thread.start()

395

396

if not self.threads_inflight:

397

logging.debug("All threads completed for this phase.")

398

all_operations_complete_callback()

399

self.lock.release()

400

401

402

def main():

403

logging.basicConfig(level=logging.INFO,

404

format='%(asctime)s %(levelname)-8s %(message)s',

405

datefmt='%a, %d %b %Y %H:%M:%S',

406

stream=sys.stdout)

407

options_parser = optparse.OptionParser()

408

options_parser.add_option("-b",

409

"--httpmr_base",

410

action="store",

411

type="string",

412

dest="httpmr_base",

413

help="The base URL of the HTTPMR operation.")

414

options_parser.add_option("-i",

415

"--max_operations_inflight",

416

action="store",

417

type="int",

418

dest="max_operations_inflight",

419

default=-1,

420

help="The maximum number of operations to keep "

421

+ "simultaneously inflight. -1 for inf.")

422

options_parser.add_option("-f",

423

"--max_per_operation_failures",

424

action="store",

425

type="int",

426

dest="max_per_operation_failures",

427

default=-1,

428

help="The maximum number of times any given "

429

+ "operation can fail before a fatal error is"

430

+ " thrown. -1 for inf.")

431

options_parser.add_option("-c",

432

"--cleanup_only",

433

action="store_true",

434

dest="cleanup_only",

435

default=False,

436

help="Only execute the intermediate data cleanup "

437

+ "phase.")

438

(options, args) = options_parser.parse_args()

439

440

driver = HTTPMRDriver(options.httpmr_base,

441

options.max_per_operation_failures,

442

options.max_operations_inflight)

443

if options.cleanup_only:

444

driver.Cleanup()

445

else:

446

driver.Run()

447

448

449

if __name__ == "__main__":

450

main()

b'\\ No newline at end of file'

Older »