1
4
from optparse import OptionParser
2
5
from StringIO import StringIO
7
9
from twisted.internet.threads import deferToThread
10
from twisted.internet.defer import DeferredList
10
13
class FetchError(Exception):
109
112
def fetch_async(*args, **kwargs):
113
"""Retrieve a URL asynchronously.
115
@return: A C{Deferred} resulting in the URL content.
110
117
return deferToThread(fetch, *args, **kwargs)
120
def fetch_many_async(urls, callback=None, errback=None, **kwargs):
122
Retrieve a list of URLs asynchronously.
124
@param callback: Optionally, a function that will be fired one time for
125
each successful URL, and will be passed its content and the URL itself.
126
@param errback: Optionally, a function that will be fired one time for each
127
failing URL, and will be passed the failure and the URL itself.
128
@return: A C{DeferredList} whose callback chain will be fired as soon as
129
all downloads have terminated. If an error occurs, the errback chain
130
of the C{DeferredList} will be fired immediatly.
134
result = fetch_async(url, **kwargs)
136
result.addCallback(callback, url)
138
result.addErrback(errback, url)
139
results.append(result)
140
return DeferredList(results, fireOnOneErrback=True, consumeErrors=True)
143
def fetch_to_files(urls, directory, logger=None, **kwargs):
145
Retrieve a list of URLs and save their content as files in a directory.
147
@param urls: The list URLs to fetch.
148
@param directory: The directory to save the files to, the name of the file
149
will equal the last fragment of the URL.
150
@param logger: Optional function to be used to log errors for failed URLs.
153
def write(data, url):
154
filename = os.path.join(directory, url.rstrip("/").split("/")[-1])
155
fd = open(filename, "w")
159
def log_error(failure, url):
161
logger("Couldn't fetch file from %s (%s)" % (
162
url, str(failure.value)))
165
return fetch_many_async(urls, callback=write, errback=log_error, **kwargs)
113
168
if __name__ == "__main__":
114
169
test(sys.argv[1:])