11
by Chris Oliver
Added urlgrabber to libkeryx |
1 |
# This library is free software; you can redistribute it and/or
|
2 |
# modify it under the terms of the GNU Lesser General Public
|
|
3 |
# License as published by the Free Software Foundation; either
|
|
4 |
# version 2.1 of the License, or (at your option) any later version.
|
|
5 |
#
|
|
6 |
# This library is distributed in the hope that it will be useful,
|
|
7 |
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
8 |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
9 |
# Lesser General Public License for more details.
|
|
10 |
#
|
|
11 |
# You should have received a copy of the GNU Lesser General Public
|
|
12 |
# License along with this library; if not, write to the
|
|
13 |
# Free Software Foundation, Inc.,
|
|
14 |
# 59 Temple Place, Suite 330,
|
|
15 |
# Boston, MA 02111-1307 USA
|
|
16 |
||
17 |
# This file is part of urlgrabber, a high-level cross-protocol url-grabber
|
|
18 |
# Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
|
|
19 |
||
20 |
# $Id: progress.py,v 1.7 2005/08/19 21:59:07 mstenner Exp $
|
|
21 |
||
22 |
import sys |
|
23 |
import time |
|
24 |
import math |
|
25 |
import thread |
|
26 |
||
27 |
class BaseMeter: |
|
28 |
def __init__(self): |
|
29 |
self.update_period = 0.3 # seconds |
|
30 |
||
31 |
self.filename = None |
|
32 |
self.url = None |
|
33 |
self.basename = None |
|
34 |
self.text = None |
|
35 |
self.size = None |
|
36 |
self.start_time = None |
|
37 |
self.last_amount_read = 0 |
|
38 |
self.last_update_time = None |
|
39 |
self.re = RateEstimator() |
|
40 |
||
41 |
def start(self, filename=None, url=None, basename=None, |
|
42 |
size=None, now=None, text=None): |
|
43 |
self.filename = filename |
|
44 |
self.url = url |
|
45 |
self.basename = basename |
|
46 |
self.text = text |
|
47 |
||
48 |
#size = None ######### TESTING
|
|
49 |
self.size = size |
|
50 |
if not size is None: self.fsize = format_number(size) + 'B' |
|
51 |
||
52 |
if now is None: now = time.time() |
|
53 |
self.start_time = now |
|
54 |
self.re.start(size, now) |
|
55 |
self.last_amount_read = 0 |
|
56 |
self.last_update_time = now |
|
57 |
self._do_start(now) |
|
58 |
||
59 |
def _do_start(self, now=None): |
|
60 |
pass
|
|
61 |
||
62 |
def update(self, amount_read, now=None): |
|
63 |
# for a real gui, you probably want to override and put a call
|
|
64 |
# to your mainloop iteration function here
|
|
65 |
if now is None: now = time.time() |
|
66 |
if (now >= self.last_update_time + self.update_period) or \ |
|
67 |
not self.last_update_time: |
|
68 |
self.re.update(amount_read, now) |
|
69 |
self.last_amount_read = amount_read |
|
70 |
self.last_update_time = now |
|
71 |
self._do_update(amount_read, now) |
|
72 |
||
73 |
def _do_update(self, amount_read, now=None): |
|
74 |
pass
|
|
75 |
||
76 |
def end(self, amount_read, now=None): |
|
77 |
if now is None: now = time.time() |
|
78 |
self.re.update(amount_read, now) |
|
79 |
self.last_amount_read = amount_read |
|
80 |
self.last_update_time = now |
|
81 |
self._do_end(amount_read, now) |
|
82 |
||
83 |
def _do_end(self, amount_read, now=None): |
|
84 |
pass
|
|
85 |
||
86 |
class TextMeter(BaseMeter): |
|
87 |
def __init__(self, fo=sys.stderr): |
|
88 |
BaseMeter.__init__(self) |
|
89 |
self.fo = fo |
|
90 |
||
91 |
def _do_update(self, amount_read, now=None): |
|
92 |
etime = self.re.elapsed_time() |
|
93 |
fetime = format_time(etime) |
|
94 |
fread = format_number(amount_read) |
|
95 |
#self.size = None
|
|
96 |
if self.text is not None: |
|
97 |
text = self.text |
|
98 |
else: |
|
99 |
text = self.basename |
|
100 |
if self.size is None: |
|
101 |
out = '\r%-60.60s %5sB %s ' % \ |
|
102 |
(text, fread, fetime) |
|
103 |
else: |
|
104 |
rtime = self.re.remaining_time() |
|
105 |
frtime = format_time(rtime) |
|
106 |
frac = self.re.fraction_read() |
|
107 |
bar = '='*int(25 * frac) |
|
108 |
||
109 |
out = '\r%-25.25s %3i%% |%-25.25s| %5sB %8s ETA ' % \ |
|
110 |
(text, frac*100, bar, fread, frtime) |
|
111 |
||
112 |
self.fo.write(out) |
|
113 |
self.fo.flush() |
|
114 |
||
115 |
def _do_end(self, amount_read, now=None): |
|
116 |
total_time = format_time(self.re.elapsed_time()) |
|
117 |
total_size = format_number(amount_read) |
|
118 |
if self.text is not None: |
|
119 |
text = self.text |
|
120 |
else: |
|
121 |
text = self.basename |
|
122 |
if self.size is None: |
|
123 |
out = '\r%-60.60s %5sB %s ' % \ |
|
124 |
(text, total_size, total_time) |
|
125 |
else: |
|
126 |
bar = '='*25 |
|
127 |
out = '\r%-25.25s %3i%% |%-25.25s| %5sB %8s ' % \ |
|
128 |
(text, 100, bar, total_size, total_time) |
|
129 |
self.fo.write(out + '\n') |
|
130 |
self.fo.flush() |
|
131 |
||
132 |
text_progress_meter = TextMeter |
|
133 |
||
134 |
class MultiFileHelper(BaseMeter): |
|
135 |
def __init__(self, master): |
|
136 |
BaseMeter.__init__(self) |
|
137 |
self.master = master |
|
138 |
||
139 |
def _do_start(self, now): |
|
140 |
self.master.start_meter(self, now) |
|
141 |
||
142 |
def _do_update(self, amount_read, now): |
|
143 |
# elapsed time since last update
|
|
144 |
self.master.update_meter(self, now) |
|
145 |
||
146 |
def _do_end(self, amount_read, now): |
|
147 |
self.ftotal_time = format_time(now - self.start_time) |
|
148 |
self.ftotal_size = format_number(self.last_amount_read) |
|
149 |
self.master.end_meter(self, now) |
|
150 |
||
151 |
def failure(self, message, now=None): |
|
152 |
self.master.failure_meter(self, message, now) |
|
153 |
||
154 |
def message(self, message): |
|
155 |
self.master.message_meter(self, message) |
|
156 |
||
157 |
class MultiFileMeter: |
|
158 |
helperclass = MultiFileHelper |
|
159 |
def __init__(self): |
|
160 |
self.meters = [] |
|
161 |
self.in_progress_meters = [] |
|
162 |
self._lock = thread.allocate_lock() |
|
163 |
self.update_period = 0.3 # seconds |
|
164 |
||
165 |
self.numfiles = None |
|
166 |
self.finished_files = 0 |
|
167 |
self.failed_files = 0 |
|
168 |
self.open_files = 0 |
|
169 |
self.total_size = None |
|
170 |
self.failed_size = 0 |
|
171 |
self.start_time = None |
|
172 |
self.finished_file_size = 0 |
|
173 |
self.last_update_time = None |
|
174 |
self.re = RateEstimator() |
|
175 |
||
176 |
def start(self, numfiles=None, total_size=None, now=None): |
|
177 |
if now is None: now = time.time() |
|
178 |
self.numfiles = numfiles |
|
179 |
self.finished_files = 0 |
|
180 |
self.failed_files = 0 |
|
181 |
self.open_files = 0 |
|
182 |
self.total_size = total_size |
|
183 |
self.failed_size = 0 |
|
184 |
self.start_time = now |
|
185 |
self.finished_file_size = 0 |
|
186 |
self.last_update_time = now |
|
187 |
self.re.start(total_size, now) |
|
188 |
self._do_start(now) |
|
189 |
||
190 |
def _do_start(self, now): |
|
191 |
pass
|
|
192 |
||
193 |
def end(self, now=None): |
|
194 |
if now is None: now = time.time() |
|
195 |
self._do_end(now) |
|
196 |
||
197 |
def _do_end(self, now): |
|
198 |
pass
|
|
199 |
||
200 |
def lock(self): self._lock.acquire() |
|
201 |
def unlock(self): self._lock.release() |
|
202 |
||
203 |
###########################################################
|
|
204 |
# child meter creation and destruction
|
|
205 |
def newMeter(self): |
|
206 |
newmeter = self.helperclass(self) |
|
207 |
self.meters.append(newmeter) |
|
208 |
return newmeter |
|
209 |
||
210 |
def removeMeter(self, meter): |
|
211 |
self.meters.remove(meter) |
|
212 |
||
213 |
###########################################################
|
|
214 |
# child functions - these should only be called by helpers
|
|
215 |
def start_meter(self, meter, now): |
|
216 |
if not meter in self.meters: |
|
217 |
raise ValueError('attempt to use orphaned meter') |
|
218 |
self._lock.acquire() |
|
219 |
try: |
|
220 |
if not meter in self.in_progress_meters: |
|
221 |
self.in_progress_meters.append(meter) |
|
222 |
self.open_files += 1 |
|
223 |
finally: |
|
224 |
self._lock.release() |
|
225 |
self._do_start_meter(meter, now) |
|
226 |
||
227 |
def _do_start_meter(self, meter, now): |
|
228 |
pass
|
|
229 |
||
230 |
def update_meter(self, meter, now): |
|
231 |
if not meter in self.meters: |
|
232 |
raise ValueError('attempt to use orphaned meter') |
|
233 |
if (now >= self.last_update_time + self.update_period) or \ |
|
234 |
not self.last_update_time: |
|
235 |
self.re.update(self._amount_read(), now) |
|
236 |
self.last_update_time = now |
|
237 |
self._do_update_meter(meter, now) |
|
238 |
||
239 |
def _do_update_meter(self, meter, now): |
|
240 |
pass
|
|
241 |
||
242 |
def end_meter(self, meter, now): |
|
243 |
if not meter in self.meters: |
|
244 |
raise ValueError('attempt to use orphaned meter') |
|
245 |
self._lock.acquire() |
|
246 |
try: |
|
247 |
try: self.in_progress_meters.remove(meter) |
|
248 |
except ValueError: pass |
|
249 |
self.open_files -= 1 |
|
250 |
self.finished_files += 1 |
|
251 |
self.finished_file_size += meter.last_amount_read |
|
252 |
finally: |
|
253 |
self._lock.release() |
|
254 |
self._do_end_meter(meter, now) |
|
255 |
||
256 |
def _do_end_meter(self, meter, now): |
|
257 |
pass
|
|
258 |
||
259 |
def failure_meter(self, meter, message, now): |
|
260 |
if not meter in self.meters: |
|
261 |
raise ValueError('attempt to use orphaned meter') |
|
262 |
self._lock.acquire() |
|
263 |
try: |
|
264 |
try: self.in_progress_meters.remove(meter) |
|
265 |
except ValueError: pass |
|
266 |
self.open_files -= 1 |
|
267 |
self.failed_files += 1 |
|
268 |
if meter.size and self.failed_size is not None: |
|
269 |
self.failed_size += meter.size |
|
270 |
else: |
|
271 |
self.failed_size = None |
|
272 |
finally: |
|
273 |
self._lock.release() |
|
274 |
self._do_failure_meter(meter, message, now) |
|
275 |
||
276 |
def _do_failure_meter(self, meter, message, now): |
|
277 |
pass
|
|
278 |
||
279 |
def message_meter(self, meter, message): |
|
280 |
pass
|
|
281 |
||
282 |
########################################################
|
|
283 |
# internal functions
|
|
284 |
def _amount_read(self): |
|
285 |
tot = self.finished_file_size |
|
286 |
for m in self.in_progress_meters: |
|
287 |
tot += m.last_amount_read |
|
288 |
return tot |
|
289 |
||
290 |
||
291 |
class TextMultiFileMeter(MultiFileMeter): |
|
292 |
def __init__(self, fo=sys.stderr): |
|
293 |
self.fo = fo |
|
294 |
MultiFileMeter.__init__(self) |
|
295 |
||
296 |
# files: ###/### ###% data: ######/###### ###% time: ##:##:##/##:##:##
|
|
297 |
def _do_update_meter(self, meter, now): |
|
298 |
self._lock.acquire() |
|
299 |
try: |
|
300 |
format = "files: %3i/%-3i %3i%% data: %6.6s/%-6.6s %3i%% " \ |
|
301 |
"time: %8.8s/%8.8s" |
|
302 |
df = self.finished_files |
|
303 |
tf = self.numfiles or 1 |
|
304 |
pf = 100 * float(df)/tf + 0.49 |
|
305 |
dd = self.re.last_amount_read |
|
306 |
td = self.total_size |
|
307 |
pd = 100 * (self.re.fraction_read() or 0) + 0.49 |
|
308 |
dt = self.re.elapsed_time() |
|
309 |
rt = self.re.remaining_time() |
|
310 |
if rt is None: tt = None |
|
311 |
else: tt = dt + rt |
|
312 |
||
313 |
fdd = format_number(dd) + 'B' |
|
314 |
ftd = format_number(td) + 'B' |
|
315 |
fdt = format_time(dt, 1) |
|
316 |
ftt = format_time(tt, 1) |
|
317 |
||
318 |
out = '%-79.79s' % (format % (df, tf, pf, fdd, ftd, pd, fdt, ftt)) |
|
319 |
self.fo.write('\r' + out) |
|
320 |
self.fo.flush() |
|
321 |
finally: |
|
322 |
self._lock.release() |
|
323 |
||
324 |
def _do_end_meter(self, meter, now): |
|
325 |
self._lock.acquire() |
|
326 |
try: |
|
327 |
format = "%-30.30s %6.6s %8.8s %9.9s" |
|
328 |
fn = meter.basename |
|
329 |
size = meter.last_amount_read |
|
330 |
fsize = format_number(size) + 'B' |
|
331 |
et = meter.re.elapsed_time() |
|
332 |
fet = format_time(et, 1) |
|
333 |
frate = format_number(size / et) + 'B/s' |
|
334 |
||
335 |
out = '%-79.79s' % (format % (fn, fsize, fet, frate)) |
|
336 |
self.fo.write('\r' + out + '\n') |
|
337 |
finally: |
|
338 |
self._lock.release() |
|
339 |
self._do_update_meter(meter, now) |
|
340 |
||
341 |
def _do_failure_meter(self, meter, message, now): |
|
342 |
self._lock.acquire() |
|
343 |
try: |
|
344 |
format = "%-30.30s %6.6s %s" |
|
345 |
fn = meter.basename |
|
346 |
if type(message) in (type(''), type(u'')): |
|
347 |
message = message.splitlines() |
|
348 |
if not message: message = [''] |
|
349 |
out = '%-79s' % (format % (fn, 'FAILED', message[0] or '')) |
|
350 |
self.fo.write('\r' + out + '\n') |
|
351 |
for m in message[1:]: self.fo.write(' ' + m + '\n') |
|
352 |
self._lock.release() |
|
353 |
finally: |
|
354 |
self._do_update_meter(meter, now) |
|
355 |
||
356 |
def message_meter(self, meter, message): |
|
357 |
self._lock.acquire() |
|
358 |
try: |
|
359 |
pass
|
|
360 |
finally: |
|
361 |
self._lock.release() |
|
362 |
||
363 |
def _do_end(self, now): |
|
364 |
self._do_update_meter(None, now) |
|
365 |
self._lock.acquire() |
|
366 |
try: |
|
367 |
self.fo.write('\n') |
|
368 |
self.fo.flush() |
|
369 |
finally: |
|
370 |
self._lock.release() |
|
371 |
||
372 |
######################################################################
|
|
373 |
# support classes and functions
|
|
374 |
||
375 |
class RateEstimator: |
|
376 |
def __init__(self, timescale=5.0): |
|
377 |
self.timescale = timescale |
|
378 |
||
379 |
def start(self, total=None, now=None): |
|
380 |
if now is None: now = time.time() |
|
381 |
self.total = total |
|
382 |
self.start_time = now |
|
383 |
self.last_update_time = now |
|
384 |
self.last_amount_read = 0 |
|
385 |
self.ave_rate = None |
|
386 |
||
387 |
def update(self, amount_read, now=None): |
|
388 |
if now is None: now = time.time() |
|
389 |
if amount_read == 0: |
|
390 |
# if we just started this file, all bets are off
|
|
391 |
self.last_update_time = now |
|
392 |
self.last_amount_read = 0 |
|
393 |
self.ave_rate = None |
|
394 |
return
|
|
395 |
||
396 |
#print 'times', now, self.last_update_time
|
|
397 |
time_diff = now - self.last_update_time |
|
398 |
read_diff = amount_read - self.last_amount_read |
|
399 |
self.last_update_time = now |
|
400 |
self.last_amount_read = amount_read |
|
401 |
self.ave_rate = self._temporal_rolling_ave(\ |
|
402 |
time_diff, read_diff, self.ave_rate, self.timescale) |
|
403 |
#print 'results', time_diff, read_diff, self.ave_rate
|
|
404 |
||
405 |
#####################################################################
|
|
406 |
# result methods
|
|
407 |
def average_rate(self): |
|
408 |
"get the average transfer rate (in bytes/second)"
|
|
409 |
return self.ave_rate |
|
410 |
||
411 |
def elapsed_time(self): |
|
412 |
"the time between the start of the transfer and the most recent update"
|
|
413 |
return self.last_update_time - self.start_time |
|
414 |
||
415 |
def remaining_time(self): |
|
416 |
"estimated time remaining"
|
|
417 |
if not self.ave_rate or not self.total: return None |
|
418 |
return (self.total - self.last_amount_read) / self.ave_rate |
|
419 |
||
420 |
def fraction_read(self): |
|
421 |
"""the fraction of the data that has been read
|
|
422 |
(can be None for unknown transfer size)"""
|
|
423 |
if self.total is None: return None |
|
424 |
elif self.total == 0: return 1.0 |
|
425 |
else: return float(self.last_amount_read)/self.total |
|
426 |
||
427 |
#########################################################################
|
|
428 |
# support methods
|
|
429 |
def _temporal_rolling_ave(self, time_diff, read_diff, last_ave, timescale): |
|
430 |
"""a temporal rolling average performs smooth averaging even when
|
|
431 |
updates come at irregular intervals. This is performed by scaling
|
|
432 |
the "epsilon" according to the time since the last update.
|
|
433 |
Specifically, epsilon = time_diff / timescale
|
|
434 |
||
435 |
As a general rule, the average will take on a completely new value
|
|
436 |
after 'timescale' seconds."""
|
|
437 |
epsilon = time_diff / timescale |
|
438 |
if epsilon > 1: epsilon = 1.0 |
|
439 |
return self._rolling_ave(time_diff, read_diff, last_ave, epsilon) |
|
440 |
||
441 |
def _rolling_ave(self, time_diff, read_diff, last_ave, epsilon): |
|
442 |
"""perform a "rolling average" iteration
|
|
443 |
a rolling average "folds" new data into an existing average with
|
|
444 |
some weight, epsilon. epsilon must be between 0.0 and 1.0 (inclusive)
|
|
445 |
a value of 0.0 means only the old value (initial value) counts,
|
|
446 |
and a value of 1.0 means only the newest value is considered."""
|
|
447 |
||
448 |
try: |
|
449 |
recent_rate = read_diff / time_diff |
|
450 |
except ZeroDivisionError: |
|
451 |
recent_rate = None |
|
452 |
if last_ave is None: return recent_rate |
|
453 |
elif recent_rate is None: return last_ave |
|
454 |
||
455 |
# at this point, both last_ave and recent_rate are numbers
|
|
456 |
return epsilon * recent_rate + (1 - epsilon) * last_ave |
|
457 |
||
458 |
def _round_remaining_time(self, rt, start_time=15.0): |
|
459 |
"""round the remaining time, depending on its size
|
|
460 |
If rt is between n*start_time and (n+1)*start_time round downward
|
|
461 |
to the nearest multiple of n (for any counting number n).
|
|
462 |
If rt < start_time, round down to the nearest 1.
|
|
463 |
For example (for start_time = 15.0):
|
|
464 |
2.7 -> 2.0
|
|
465 |
25.2 -> 25.0
|
|
466 |
26.4 -> 26.0
|
|
467 |
35.3 -> 34.0
|
|
468 |
63.6 -> 60.0
|
|
469 |
"""
|
|
470 |
||
471 |
if rt < 0: return 0.0 |
|
472 |
shift = int(math.log(rt/start_time)/math.log(2)) |
|
473 |
rt = int(rt) |
|
474 |
if shift <= 0: return rt |
|
475 |
return float(int(rt) >> shift << shift) |
|
476 |
||
477 |
||
478 |
def format_time(seconds, use_hours=0): |
|
479 |
if seconds is None or seconds < 0: |
|
480 |
if use_hours: return '--:--:--' |
|
481 |
else: return '--:--' |
|
482 |
else: |
|
483 |
seconds = int(seconds) |
|
484 |
minutes = seconds / 60 |
|
485 |
seconds = seconds % 60 |
|
486 |
if use_hours: |
|
487 |
hours = minutes / 60 |
|
488 |
minutes = minutes % 60 |
|
489 |
return '%02i:%02i:%02i' % (hours, minutes, seconds) |
|
490 |
else: |
|
491 |
return '%02i:%02i' % (minutes, seconds) |
|
492 |
||
493 |
def format_number(number, SI=0, space=' '): |
|
494 |
"""Turn numbers into human-readable metric-like numbers"""
|
|
495 |
symbols = ['', # (none) |
|
496 |
'k', # kilo |
|
497 |
'M', # mega |
|
498 |
'G', # giga |
|
499 |
'T', # tera |
|
500 |
'P', # peta |
|
501 |
'E', # exa |
|
502 |
'Z', # zetta |
|
503 |
'Y'] # yotta |
|
504 |
||
505 |
if SI: step = 1000.0 |
|
506 |
else: step = 1024.0 |
|
507 |
||
508 |
thresh = 999 |
|
509 |
depth = 0 |
|
510 |
max_depth = len(symbols) - 1 |
|
511 |
||
512 |
# we want numbers between 0 and thresh, but don't exceed the length
|
|
513 |
# of our list. In that event, the formatting will be screwed up,
|
|
514 |
# but it'll still show the right number.
|
|
515 |
while number > thresh and depth < max_depth: |
|
516 |
depth = depth + 1 |
|
517 |
number = number / step |
|
518 |
||
519 |
if type(number) == type(1) or type(number) == type(1L): |
|
520 |
# it's an int or a long, which means it didn't get divided,
|
|
521 |
# which means it's already short enough
|
|
522 |
format = '%i%s%s' |
|
523 |
elif number < 9.95: |
|
524 |
# must use 9.95 for proper sizing. For example, 9.99 will be
|
|
525 |
# rounded to 10.0 with the .1f format string (which is too long)
|
|
526 |
format = '%.1f%s%s' |
|
527 |
else: |
|
528 |
format = '%.0f%s%s' |
|
529 |
||
530 |
return(format % (float(number or 0), space, symbols[depth])) |