1.1.12
by Free Ekanayaka
Import upstream version 1.4.0 |
1 |
import os |
2 |
import sys |
|
3 |
||
1.1.1
by Rick Clark
Import upstream version 1.0.18 |
4 |
from optparse import OptionParser |
5 |
from StringIO import StringIO |
|
6 |
||
1.1.6
by Christopher Armstrong
Import upstream version 1.0.26 |
7 |
from twisted.internet.threads import deferToThread |
1.1.12
by Free Ekanayaka
Import upstream version 1.4.0 |
8 |
from twisted.internet.defer import DeferredList |
9 |
||
1.1.6
by Christopher Armstrong
Import upstream version 1.0.26 |
10 |
|
1.1.9
by Mathias Gug
Import upstream version 1.3.2.2 |
11 |
class FetchError(Exception): |
12 |
pass
|
|
1.2.1
by Free Ekanayaka
Import upstream version 1.3.2.3 |
13 |
|
1.1.12
by Free Ekanayaka
Import upstream version 1.4.0 |
14 |
|
1.1.9
by Mathias Gug
Import upstream version 1.3.2.2 |
15 |
class HTTPCodeError(FetchError): |
15
by Martin Pitt
New upstream release. (LP: #343954) |
16 |
|
17 |
def __init__(self, http_code, body): |
|
18 |
self.http_code = http_code |
|
19 |
self.body = body |
|
20 |
||
21 |
def __str__(self): |
|
22 |
return "Server returned HTTP code %d" % self.http_code |
|
23 |
||
24 |
def __repr__(self): |
|
25 |
return "<HTTPCodeError http_code=%d>" % self.http_code |
|
26 |
||
27 |
||
1.1.9
by Mathias Gug
Import upstream version 1.3.2.2 |
28 |
class PyCurlError(FetchError): |
1.1.12
by Free Ekanayaka
Import upstream version 1.4.0 |
29 |
|
1.1.9
by Mathias Gug
Import upstream version 1.3.2.2 |
30 |
def __init__(self, error_code, message): |
31 |
self.error_code = error_code |
|
1.1.12
by Free Ekanayaka
Import upstream version 1.4.0 |
32 |
self._message = message |
1.1.9
by Mathias Gug
Import upstream version 1.3.2.2 |
33 |
|
34 |
def __str__(self): |
|
35 |
return "Error %d: %s" % (self.error_code, self.message) |
|
36 |
||
37 |
def __repr__(self): |
|
38 |
return "<PyCurlError args=(%d, '%s')>" % (self.error_code, |
|
39 |
self.message) |
|
40 |
||
1.1.12
by Free Ekanayaka
Import upstream version 1.4.0 |
41 |
@property
|
42 |
def message(self): |
|
43 |
return self._message |
|
44 |
||
45 |
||
1.1.9
by Mathias Gug
Import upstream version 1.3.2.2 |
46 |
def fetch(url, post=False, data="", headers={}, cainfo=None, curl=None, |
47 |
connect_timeout=30, total_timeout=600): |
|
1.1.1
by Rick Clark
Import upstream version 1.0.18 |
48 |
"""Retrieve a URL and return the content.
|
49 |
||
50 |
@param url: The url to be fetched.
|
|
51 |
@param post: If true, the POST method will be used (defaults to GET).
|
|
52 |
@param data: Data to be sent to the server as the POST content.
|
|
53 |
@param headers: Dictionary of header => value entries to be used
|
|
54 |
on the request.
|
|
55 |
@param cainfo: Path to the file with CA certificates.
|
|
56 |
"""
|
|
1.1.18
by Free Ekanayaka
Import upstream version 1.5.4 |
57 |
import pycurl |
1.1.1
by Rick Clark
Import upstream version 1.0.18 |
58 |
output = StringIO(data) |
59 |
input = StringIO() |
|
60 |
||
61 |
if curl is None: |
|
62 |
curl = pycurl.Curl() |
|
63 |
||
64 |
if post: |
|
65 |
curl.setopt(pycurl.POST, True) |
|
66 |
||
67 |
if data: |
|
68 |
curl.setopt(pycurl.POSTFIELDSIZE, len(data)) |
|
69 |
curl.setopt(pycurl.READFUNCTION, output.read) |
|
70 |
||
71 |
if cainfo and url.startswith("https:"): |
|
72 |
curl.setopt(pycurl.CAINFO, cainfo) |
|
73 |
||
74 |
if headers: |
|
75 |
curl.setopt(pycurl.HTTPHEADER, |
|
76 |
["%s: %s" % pair for pair in sorted(headers.iteritems())]) |
|
77 |
||
1.1.12
by Free Ekanayaka
Import upstream version 1.4.0 |
78 |
curl.setopt(pycurl.URL, str(url)) |
1.1.1
by Rick Clark
Import upstream version 1.0.18 |
79 |
curl.setopt(pycurl.FOLLOWLOCATION, True) |
80 |
curl.setopt(pycurl.MAXREDIRS, 5) |
|
1.1.9
by Mathias Gug
Import upstream version 1.3.2.2 |
81 |
curl.setopt(pycurl.CONNECTTIMEOUT, connect_timeout) |
82 |
curl.setopt(pycurl.LOW_SPEED_LIMIT, 1) |
|
83 |
curl.setopt(pycurl.LOW_SPEED_TIME, total_timeout) |
|
1.1.8
by Christopher Armstrong
Import upstream version 1.0.29.1 |
84 |
curl.setopt(pycurl.NOSIGNAL, 1) |
1.1.1
by Rick Clark
Import upstream version 1.0.18 |
85 |
curl.setopt(pycurl.WRITEFUNCTION, input.write) |
1.1.14
by Free Ekanayaka
Import upstream version 1.5.0 |
86 |
curl.setopt(pycurl.DNS_CACHE_TIMEOUT, 0) |
1.1.22
by Andreas Hasenack
Import upstream version 11.02 |
87 |
curl.setopt(pycurl.ENCODING, "gzip,deflate") |
15
by Martin Pitt
New upstream release. (LP: #343954) |
88 |
|
1.1.9
by Mathias Gug
Import upstream version 1.3.2.2 |
89 |
try: |
90 |
curl.perform() |
|
91 |
except pycurl.error, e: |
|
92 |
raise PyCurlError(e.args[0], e.args[1]) |
|
1.2.1
by Free Ekanayaka
Import upstream version 1.3.2.3 |
93 |
|
15
by Martin Pitt
New upstream release. (LP: #343954) |
94 |
body = input.getvalue() |
95 |
||
96 |
http_code = curl.getinfo(pycurl.HTTP_CODE) |
|
97 |
if http_code != 200: |
|
98 |
raise HTTPCodeError(http_code, body) |
|
99 |
||
100 |
return body |
|
1.1.1
by Rick Clark
Import upstream version 1.0.18 |
101 |
|
102 |
||
103 |
def test(args): |
|
104 |
parser = OptionParser() |
|
15
by Martin Pitt
New upstream release. (LP: #343954) |
105 |
parser.add_option("--post", action="store_true") |
1.1.1
by Rick Clark
Import upstream version 1.0.18 |
106 |
parser.add_option("--data", default="") |
107 |
parser.add_option("--cainfo") |
|
108 |
options, (url,) = parser.parse_args(args) |
|
15
by Martin Pitt
New upstream release. (LP: #343954) |
109 |
print fetch(url, post=options.post, data=options.data, |
110 |
cainfo=options.cainfo) |
|
1.2.1
by Free Ekanayaka
Import upstream version 1.3.2.3 |
111 |
|
112 |
||
1.1.6
by Christopher Armstrong
Import upstream version 1.0.26 |
113 |
def fetch_async(*args, **kwargs): |
1.1.12
by Free Ekanayaka
Import upstream version 1.4.0 |
114 |
"""Retrieve a URL asynchronously.
|
115 |
||
116 |
@return: A C{Deferred} resulting in the URL content.
|
|
117 |
"""
|
|
1.1.6
by Christopher Armstrong
Import upstream version 1.0.26 |
118 |
return deferToThread(fetch, *args, **kwargs) |
119 |
||
120 |
||
1.1.12
by Free Ekanayaka
Import upstream version 1.4.0 |
121 |
def fetch_many_async(urls, callback=None, errback=None, **kwargs): |
122 |
"""
|
|
123 |
Retrieve a list of URLs asynchronously.
|
|
124 |
||
125 |
@param callback: Optionally, a function that will be fired one time for
|
|
126 |
each successful URL, and will be passed its content and the URL itself.
|
|
127 |
@param errback: Optionally, a function that will be fired one time for each
|
|
128 |
failing URL, and will be passed the failure and the URL itself.
|
|
129 |
@return: A C{DeferredList} whose callback chain will be fired as soon as
|
|
130 |
all downloads have terminated. If an error occurs, the errback chain
|
|
131 |
of the C{DeferredList} will be fired immediatly.
|
|
132 |
"""
|
|
133 |
results = [] |
|
134 |
for url in urls: |
|
135 |
result = fetch_async(url, **kwargs) |
|
136 |
if callback: |
|
137 |
result.addCallback(callback, url) |
|
138 |
if errback: |
|
139 |
result.addErrback(errback, url) |
|
140 |
results.append(result) |
|
141 |
return DeferredList(results, fireOnOneErrback=True, consumeErrors=True) |
|
142 |
||
143 |
||
144 |
def url_to_filename(url, directory=None): |
|
145 |
"""Return the last component of the given C{url}.
|
|
146 |
||
147 |
@param url: The URL to get the filename from.
|
|
148 |
@param directory: Optionally a path to prepend to the returned filename.
|
|
149 |
||
150 |
@note: Any trailing slash in the C{url} will be removed
|
|
151 |
"""
|
|
152 |
filename = url.rstrip("/").split("/")[-1] |
|
153 |
if directory is not None: |
|
154 |
filename = os.path.join(directory, filename) |
|
155 |
return filename |
|
156 |
||
157 |
||
158 |
def fetch_to_files(urls, directory, logger=None, **kwargs): |
|
159 |
"""
|
|
160 |
Retrieve a list of URLs and save their content as files in a directory.
|
|
161 |
||
162 |
@param urls: The list URLs to fetch.
|
|
163 |
@param directory: The directory to save the files to, the name of the file
|
|
164 |
will equal the last fragment of the URL.
|
|
165 |
@param logger: Optional function to be used to log errors for failed URLs.
|
|
166 |
"""
|
|
167 |
||
168 |
def write(data, url): |
|
169 |
filename = url_to_filename(url, directory=directory) |
|
170 |
fd = open(filename, "w") |
|
171 |
fd.write(data) |
|
172 |
fd.close() |
|
173 |
||
174 |
def log_error(failure, url): |
|
175 |
if logger: |
|
176 |
logger("Couldn't fetch file from %s (%s)" % ( |
|
177 |
url, str(failure.value))) |
|
178 |
return failure |
|
179 |
||
180 |
return fetch_many_async(urls, callback=write, errback=log_error, **kwargs) |
|
181 |
||
182 |
||
1.1.1
by Rick Clark
Import upstream version 1.0.18 |
183 |
if __name__ == "__main__": |
184 |
test(sys.argv[1:]) |