~noskcaj/ubuntu/vivid/urlgrabber/3.10.1

« back to all changes in this revision

Viewing changes to debian/patches/grabber_fix.diff

Committer: Jackson Doak
Date: 2014-12-13 22:18:24 UTC
mfrom: (1.1.5)
Revision ID: noskcaj@ubuntu.com-20141213221824-urw2c597urpzwqt3

* New upstream release.
* Drop all patches, fixed upstream

files added:
scripts/urlgrabber-ext-down

files removed:
.pc/applied-patches

.pc/grabber_fix.diff

.pc/grabber_fix.diff/urlgrabber

.pc/grabber_fix.diff/urlgrabber/grabber.py

.pc/progress_fix.diff

.pc/progress_fix.diff/urlgrabber

.pc/progress_fix.diff/urlgrabber/progress.py

.pc/progress_object_callback_fix.diff

.pc/progress_object_callback_fix.diff/urlgrabber

.pc/progress_object_callback_fix.diff/urlgrabber/grabber.py

debian/patches/grabber_fix.diff

debian/patches/progress_fix.diff

debian/patches/progress_object_callback_fix.diff

files modified:
ChangeLog

MANIFEST

PKG-INFO

README

debian/changelog

debian/patches/series

scripts/urlgrabber

setup.py

test/base_test_code.py

test/munittest.py

test/test_byterange.py

test/test_grabber.py

test/test_mirror.py

urlgrabber/__init__.py

urlgrabber/byterange.py

urlgrabber/grabber.py

urlgrabber/mirror.py

urlgrabber/progress.py

Show diffs side-by-side

added added

removed removed

debian/patches/grabber_fix.diff

--- urlgrabber-3.9.1/urlgrabber/grabber.py.orig 2010-07-02 21:24:12.000000000 -0400

+++ urlgrabber-3.9.1/urlgrabber/grabber.py 2010-07-02 20:30:25.000000000 -0400

@@ -68,14 +68,14 @@

(which can be set on default_grabber.throttle) is used. See

BANDWIDTH THROTTLING for more information.

- timeout = None

+ timeout = 300

- a positive float expressing the number of seconds to wait for socket

- operations. If the value is None or 0.0, socket operations will block

- forever. Setting this option causes urlgrabber to call the settimeout

- method on the Socket object used for the request. See the Python

- documentation on settimeout for more information.

- http://www.python.org/doc/current/lib/socket-objects.html

+ a positive integer expressing the number of seconds to wait before

+ timing out attempts to connect to a server. If the value is None

+ or 0, connection attempts will not time out. The timeout is passed

+ to the underlying pycurl object as its CONNECTTIMEOUT option, see

+ the curl documentation on CURLOPT_CONNECTTIMEOUT for more information.

+ http://curl.haxx.se/libcurl/c/curl_easy_setopt.html#CURLOPTCONNECTTIMEOUT

bandwidth = 0

@@ -439,6 +439,12 @@

except:

__version__ = '???'

+try:

+ # this part isn't going to do much - need to talk to gettext

+ from i18n import _

+except ImportError, msg:

+ def _(st): return st

########################################################################

# functions for debugging output. These functions are here because they

# are also part of the module initialization.

@@ -808,7 +814,7 @@

self.prefix = None

self.opener = None

self.cache_openers = True

- self.timeout = None

+ self.timeout = 300

self.text = None

self.http_headers = None

self.ftp_headers = None

@@ -1052,9 +1058,15 @@

self._reget_length = 0

self._prog_running = False

self._error = (None, None)

- self.size = None

+ self.size = 0

+ self._hdr_ended = False

self._do_open()

+ def geturl(self):

+ """ Provide the geturl() method, used to be got from

+ urllib.addinfourl, via. urllib.URLopener.* """

+ return self.url

def __getattr__(self, name):

"""This effectively allows us to wrap at the instance level.

@@ -1085,9 +1097,14 @@

return -1

def _hdr_retrieve(self, buf):

+ if self._hdr_ended:

+ self._hdr_dump = ''

+ self.size = 0

+ self._hdr_ended = False

if self._over_max_size(cur=len(self._hdr_dump),

max_size=self.opts.max_header_size):

- return -1

+ return -1

try:

self._hdr_dump += buf

# we have to get the size before we do the progress obj start

@@ -1104,7 +1121,17 @@

s = parse150(buf)

if s:

self.size = int(s)

+ if buf.lower().find('location') != -1:

+ location = ':'.join(buf.split(':')[1:])

+ location = location.strip()

+ self.scheme = urlparse.urlsplit(location)[0]

+ self.url = location

+ if len(self._hdr_dump) != 0 and buf == '\r\n':

+ self._hdr_ended = True

+ if DEBUG: DEBUG.info('header ended:')

return len(buf)

except KeyboardInterrupt:

return pycurl.READFUNC_ABORT

@@ -1113,8 +1140,10 @@

100

if self._parsed_hdr:

101

return self._parsed_hdr

102

statusend = self._hdr_dump.find('\n')

103

+ statusend += 1 # ridiculous as it may seem.

104

hdrfp = StringIO()

105

hdrfp.write(self._hdr_dump[statusend:])

106

+ hdrfp.seek(0)

107

self._parsed_hdr = mimetools.Message(hdrfp)

108

return self._parsed_hdr

109

110

@@ -1136,6 +1165,7 @@

111

self.curl_obj.setopt(pycurl.PROGRESSFUNCTION, self._progress_update)

112

self.curl_obj.setopt(pycurl.FAILONERROR, True)

113

self.curl_obj.setopt(pycurl.OPT_FILETIME, True)

114

+ self.curl_obj.setopt(pycurl.FOLLOWLOCATION, True)

115

116

if DEBUG:

117

self.curl_obj.setopt(pycurl.VERBOSE, True)

118

@@ -1148,9 +1178,11 @@

119

120

# timeouts

121

timeout = 300

122

- if opts.timeout:

123

- timeout = int(opts.timeout)

124

- self.curl_obj.setopt(pycurl.CONNECTTIMEOUT, timeout)

125

+ if hasattr(opts, 'timeout'):

126

+ timeout = int(opts.timeout or 0)

127

+ self.curl_obj.setopt(pycurl.CONNECTTIMEOUT, timeout)

128

+ self.curl_obj.setopt(pycurl.LOW_SPEED_LIMIT, 1)

129

+ self.curl_obj.setopt(pycurl.LOW_SPEED_TIME, timeout)

130

131

# ssl options

132

if self.scheme == 'https':

133

@@ -1276,7 +1308,7 @@

134

raise err

135

136

elif errcode == 60:

137

- msg = _("client cert cannot be verified or client cert incorrect")

138

+ msg = _("Peer cert cannot be verified or peer cert invalid")

139

err = URLGrabError(14, msg)

140

err.url = self.url

141

raise err

142

@@ -1291,7 +1323,12 @@

143

raise err

144

145

elif str(e.args[1]) == '' and self.http_code != 0: # fake it until you make it

146

- msg = 'HTTP Error %s : %s ' % (self.http_code, self.url)

147

+ if self.scheme in ['http', 'https']:

148

+ msg = 'HTTP Error %s : %s ' % (self.http_code, self.url)

149

+ elif self.scheme in ['ftp']:

150

+ msg = 'FTP Error %s : %s ' % (self.http_code, self.url)

151

+ else:

152

+ msg = "Unknown Error: URL=%s , scheme=%s" % (self.url, self.scheme)

153

else:

154

msg = 'PYCURL ERROR %s - "%s"' % (errcode, str(e.args[1]))

155

code = errcode

156

@@ -1299,6 +1336,12 @@

157

err.code = code

158

err.exception = e

159

raise err

160

+ else:

161

+ if self._error[1]:

162

+ msg = self._error[1]

163

+ err = URLGRabError(14, msg)

164

+ err.url = self.url

165

+ raise err

166

167

def _do_open(self):

168

self.curl_obj = _curl_cache

169

@@ -1446,9 +1489,23 @@

170

# set the time

171

mod_time = self.curl_obj.getinfo(pycurl.INFO_FILETIME)

172

if mod_time != -1:

173

- os.utime(self.filename, (mod_time, mod_time))

174

+ try:

175

+ os.utime(self.filename, (mod_time, mod_time))

176

+ except OSError, e:

177

+ err = URLGrabError(16, _(\

178

+ 'error setting timestamp on file %s from %s, OSError: %s')

179

+ % (self.filenameself.url, e))

180

+ err.url = self.url

181

+ raise err

182

# re open it

183

- self.fo = open(self.filename, 'r')

184

+ try:

185

+ self.fo = open(self.filename, 'r')

186

+ except IOError, e:

187

+ err = URLGrabError(16, _(\

188

+ 'error opening file from %s, IOError: %s') % (self.url, e))

189

+ err.url = self.url

190

+ raise err

191

192

else:

193

#self.fo = open(self._temp_name, 'r')

194

self.fo.seek(0)

195

@@ -1532,11 +1589,14 @@

196

def _over_max_size(self, cur, max_size=None):

197

198

if not max_size:

199

- max_size = self.size

200

- if self.opts.size: # if we set an opts size use that, no matter what

201

- max_size = self.opts.size

202

+ if not self.opts.size:

203

+ max_size = self.size

204

+ else:

205

+ max_size = self.opts.size

206

207

if not max_size: return False # if we have None for all of the Max then this is dumb

208

- if cur > max_size + max_size*.10:

209

210

+ if cur > int(float(max_size) * 1.10):

211

212

msg = _("Downloaded more than max size for %s: %s > %s") \

213

% (self.url, cur, max_size)

214

@@ -1582,9 +1642,21 @@

215

self.opts.progress_obj.end(self._amount_read)

216

self.fo.close()

217

218

219

+ def geturl(self):

220

+ """ Provide the geturl() method, used to be got from

221

+ urllib.addinfourl, via. urllib.URLopener.* """

222

+ return self.url

223

224

_curl_cache = pycurl.Curl() # make one and reuse it over and over and over

225

226

+def reset_curl_obj():

227

+ """To make sure curl has reread the network/dns info we force a reload"""

228

+ global _curl_cache

229

+ _curl_cache.close()

230

+ _curl_cache = pycurl.Curl()

231

232

233

234

235

#####################################################################

236

# DEPRECATED FUNCTIONS

Older »