~romaimperator/keryx/devel

11 by Chris Oliver
Added urlgrabber to libkeryx
1
#   This library is free software; you can redistribute it and/or
2
#   modify it under the terms of the GNU Lesser General Public
3
#   License as published by the Free Software Foundation; either
4
#   version 2.1 of the License, or (at your option) any later version.
5
#
6
#   This library is distributed in the hope that it will be useful,
7
#   but WITHOUT ANY WARRANTY; without even the implied warranty of
8
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
9
#   Lesser General Public License for more details.
10
#
11
#   You should have received a copy of the GNU Lesser General Public
12
#   License along with this library; if not, write to the 
13
#      Free Software Foundation, Inc., 
14
#      59 Temple Place, Suite 330, 
15
#      Boston, MA  02111-1307  USA
16
17
# This file is part of urlgrabber, a high-level cross-protocol url-grabber
18
# Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
19
20
# $Id: progress.py,v 1.7 2005/08/19 21:59:07 mstenner Exp $
21
22
import sys
23
import time
24
import math
25
import thread
26
    
27
class BaseMeter:
28
    def __init__(self):
29
        self.update_period = 0.3 # seconds
30
31
        self.filename   = None
32
        self.url        = None
33
        self.basename   = None
34
        self.text       = None
35
        self.size       = None
36
        self.start_time = None
37
        self.last_amount_read = 0
38
        self.last_update_time = None
39
        self.re = RateEstimator()
40
        
41
    def start(self, filename=None, url=None, basename=None,
42
              size=None, now=None, text=None):
43
        self.filename = filename
44
        self.url      = url
45
        self.basename = basename
46
        self.text     = text
47
48
        #size = None #########  TESTING
49
        self.size = size
50
        if not size is None: self.fsize = format_number(size) + 'B'
51
52
        if now is None: now = time.time()
53
        self.start_time = now
54
        self.re.start(size, now)
55
        self.last_amount_read = 0
56
        self.last_update_time = now
57
        self._do_start(now)
58
        
59
    def _do_start(self, now=None):
60
        pass
61
62
    def update(self, amount_read, now=None):
63
        # for a real gui, you probably want to override and put a call
64
        # to your mainloop iteration function here
65
        if now is None: now = time.time()
66
        if (now >= self.last_update_time + self.update_period) or \
67
               not self.last_update_time:
68
            self.re.update(amount_read, now)
69
            self.last_amount_read = amount_read
70
            self.last_update_time = now
71
            self._do_update(amount_read, now)
72
73
    def _do_update(self, amount_read, now=None):
74
        pass
75
76
    def end(self, amount_read, now=None):
77
        if now is None: now = time.time()
78
        self.re.update(amount_read, now)
79
        self.last_amount_read = amount_read
80
        self.last_update_time = now
81
        self._do_end(amount_read, now)
82
83
    def _do_end(self, amount_read, now=None):
84
        pass
85
        
86
class TextMeter(BaseMeter):
87
    def __init__(self, fo=sys.stderr):
88
        BaseMeter.__init__(self)
89
        self.fo = fo
90
91
    def _do_update(self, amount_read, now=None):
92
        etime = self.re.elapsed_time()
93
        fetime = format_time(etime)
94
        fread = format_number(amount_read)
95
        #self.size = None
96
        if self.text is not None:
97
            text = self.text
98
        else:
99
            text = self.basename
100
        if self.size is None:
101
            out = '\r%-60.60s    %5sB %s ' % \
102
                  (text, fread, fetime)
103
        else:
104
            rtime = self.re.remaining_time()
105
            frtime = format_time(rtime)
106
            frac = self.re.fraction_read()
107
            bar = '='*int(25 * frac)
108
109
            out = '\r%-25.25s %3i%% |%-25.25s| %5sB %8s ETA ' % \
110
                  (text, frac*100, bar, fread, frtime)
111
112
        self.fo.write(out)
113
        self.fo.flush()
114
115
    def _do_end(self, amount_read, now=None):
116
        total_time = format_time(self.re.elapsed_time())
117
        total_size = format_number(amount_read)
118
        if self.text is not None:
119
            text = self.text
120
        else:
121
            text = self.basename
122
        if self.size is None:
123
            out = '\r%-60.60s    %5sB %s ' % \
124
                  (text, total_size, total_time)
125
        else:
126
            bar = '='*25
127
            out = '\r%-25.25s %3i%% |%-25.25s| %5sB %8s     ' % \
128
                  (text, 100, bar, total_size, total_time)
129
        self.fo.write(out + '\n')
130
        self.fo.flush()
131
132
text_progress_meter = TextMeter
133
134
class MultiFileHelper(BaseMeter):
135
    def __init__(self, master):
136
        BaseMeter.__init__(self)
137
        self.master = master
138
139
    def _do_start(self, now):
140
        self.master.start_meter(self, now)
141
142
    def _do_update(self, amount_read, now):
143
        # elapsed time since last update
144
        self.master.update_meter(self, now)
145
146
    def _do_end(self, amount_read, now):
147
        self.ftotal_time = format_time(now - self.start_time)
148
        self.ftotal_size = format_number(self.last_amount_read)
149
        self.master.end_meter(self, now)
150
151
    def failure(self, message, now=None):
152
        self.master.failure_meter(self, message, now)
153
154
    def message(self, message):
155
        self.master.message_meter(self, message)
156
157
class MultiFileMeter:
158
    helperclass = MultiFileHelper
159
    def __init__(self):
160
        self.meters = []
161
        self.in_progress_meters = []
162
        self._lock = thread.allocate_lock()
163
        self.update_period = 0.3 # seconds
164
        
165
        self.numfiles         = None
166
        self.finished_files   = 0
167
        self.failed_files     = 0
168
        self.open_files       = 0
169
        self.total_size       = None
170
        self.failed_size      = 0
171
        self.start_time       = None
172
        self.finished_file_size = 0
173
        self.last_update_time = None
174
        self.re = RateEstimator()
175
176
    def start(self, numfiles=None, total_size=None, now=None):
177
        if now is None: now = time.time()
178
        self.numfiles         = numfiles
179
        self.finished_files   = 0
180
        self.failed_files     = 0
181
        self.open_files       = 0
182
        self.total_size       = total_size
183
        self.failed_size      = 0
184
        self.start_time       = now
185
        self.finished_file_size = 0
186
        self.last_update_time = now
187
        self.re.start(total_size, now)
188
        self._do_start(now)
189
190
    def _do_start(self, now):
191
        pass
192
193
    def end(self, now=None):
194
        if now is None: now = time.time()
195
        self._do_end(now)
196
        
197
    def _do_end(self, now):
198
        pass
199
200
    def lock(self): self._lock.acquire()
201
    def unlock(self): self._lock.release()
202
203
    ###########################################################
204
    # child meter creation and destruction
205
    def newMeter(self):
206
        newmeter = self.helperclass(self)
207
        self.meters.append(newmeter)
208
        return newmeter
209
    
210
    def removeMeter(self, meter):
211
        self.meters.remove(meter)
212
        
213
    ###########################################################
214
    # child functions - these should only be called by helpers
215
    def start_meter(self, meter, now):
216
        if not meter in self.meters:
217
            raise ValueError('attempt to use orphaned meter')
218
        self._lock.acquire()
219
        try:
220
            if not meter in self.in_progress_meters:
221
                self.in_progress_meters.append(meter)
222
                self.open_files += 1
223
        finally:
224
            self._lock.release()
225
        self._do_start_meter(meter, now)
226
        
227
    def _do_start_meter(self, meter, now):
228
        pass
229
        
230
    def update_meter(self, meter, now):
231
        if not meter in self.meters:
232
            raise ValueError('attempt to use orphaned meter')
233
        if (now >= self.last_update_time + self.update_period) or \
234
               not self.last_update_time:
235
            self.re.update(self._amount_read(), now)
236
            self.last_update_time = now
237
            self._do_update_meter(meter, now)
238
239
    def _do_update_meter(self, meter, now):
240
        pass
241
242
    def end_meter(self, meter, now):
243
        if not meter in self.meters:
244
            raise ValueError('attempt to use orphaned meter')
245
        self._lock.acquire()
246
        try:
247
            try: self.in_progress_meters.remove(meter)
248
            except ValueError: pass
249
            self.open_files     -= 1
250
            self.finished_files += 1
251
            self.finished_file_size += meter.last_amount_read
252
        finally:
253
            self._lock.release()
254
        self._do_end_meter(meter, now)
255
256
    def _do_end_meter(self, meter, now):
257
        pass
258
259
    def failure_meter(self, meter, message, now):
260
        if not meter in self.meters:
261
            raise ValueError('attempt to use orphaned meter')
262
        self._lock.acquire()
263
        try:
264
            try: self.in_progress_meters.remove(meter)
265
            except ValueError: pass
266
            self.open_files     -= 1
267
            self.failed_files   += 1
268
            if meter.size and self.failed_size is not None:
269
                self.failed_size += meter.size
270
            else:
271
                self.failed_size = None
272
        finally:
273
            self._lock.release()
274
        self._do_failure_meter(meter, message, now)
275
276
    def _do_failure_meter(self, meter, message, now):
277
        pass
278
279
    def message_meter(self, meter, message):
280
        pass
281
282
    ########################################################
283
    # internal functions
284
    def _amount_read(self):
285
        tot = self.finished_file_size
286
        for m in self.in_progress_meters:
287
            tot += m.last_amount_read
288
        return tot
289
290
291
class TextMultiFileMeter(MultiFileMeter):
292
    def __init__(self, fo=sys.stderr):
293
        self.fo = fo
294
        MultiFileMeter.__init__(self)
295
296
    # files: ###/### ###%  data: ######/###### ###%  time: ##:##:##/##:##:##
297
    def _do_update_meter(self, meter, now):
298
        self._lock.acquire()
299
        try:
300
            format = "files: %3i/%-3i %3i%%   data: %6.6s/%-6.6s %3i%%   " \
301
                     "time: %8.8s/%8.8s"
302
            df = self.finished_files
303
            tf = self.numfiles or 1
304
            pf = 100 * float(df)/tf + 0.49
305
            dd = self.re.last_amount_read
306
            td = self.total_size
307
            pd = 100 * (self.re.fraction_read() or 0) + 0.49
308
            dt = self.re.elapsed_time()
309
            rt = self.re.remaining_time()
310
            if rt is None: tt = None
311
            else: tt = dt + rt
312
313
            fdd = format_number(dd) + 'B'
314
            ftd = format_number(td) + 'B'
315
            fdt = format_time(dt, 1)
316
            ftt = format_time(tt, 1)
317
            
318
            out = '%-79.79s' % (format % (df, tf, pf, fdd, ftd, pd, fdt, ftt))
319
            self.fo.write('\r' + out)
320
            self.fo.flush()
321
        finally:
322
            self._lock.release()
323
324
    def _do_end_meter(self, meter, now):
325
        self._lock.acquire()
326
        try:
327
            format = "%-30.30s %6.6s    %8.8s    %9.9s"
328
            fn = meter.basename
329
            size = meter.last_amount_read
330
            fsize = format_number(size) + 'B'
331
            et = meter.re.elapsed_time()
332
            fet = format_time(et, 1)
333
            frate = format_number(size / et) + 'B/s'
334
            
335
            out = '%-79.79s' % (format % (fn, fsize, fet, frate))
336
            self.fo.write('\r' + out + '\n')
337
        finally:
338
            self._lock.release()
339
        self._do_update_meter(meter, now)
340
341
    def _do_failure_meter(self, meter, message, now):
342
        self._lock.acquire()
343
        try:
344
            format = "%-30.30s %6.6s %s"
345
            fn = meter.basename
346
            if type(message) in (type(''), type(u'')):
347
                message = message.splitlines()
348
            if not message: message = ['']
349
            out = '%-79s' % (format % (fn, 'FAILED', message[0] or ''))
350
            self.fo.write('\r' + out + '\n')
351
            for m in message[1:]: self.fo.write('  ' + m + '\n')
352
            self._lock.release()
353
        finally:
354
            self._do_update_meter(meter, now)
355
356
    def message_meter(self, meter, message):
357
        self._lock.acquire()
358
        try:
359
            pass
360
        finally:
361
            self._lock.release()
362
363
    def _do_end(self, now):
364
        self._do_update_meter(None, now)
365
        self._lock.acquire()
366
        try:
367
            self.fo.write('\n')
368
            self.fo.flush()
369
        finally:
370
            self._lock.release()
371
        
372
######################################################################
373
# support classes and functions
374
375
class RateEstimator:
376
    def __init__(self, timescale=5.0):
377
        self.timescale = timescale
378
379
    def start(self, total=None, now=None):
380
        if now is None: now = time.time()
381
        self.total = total
382
        self.start_time = now
383
        self.last_update_time = now
384
        self.last_amount_read = 0
385
        self.ave_rate = None
386
        
387
    def update(self, amount_read, now=None):
388
        if now is None: now = time.time()
389
        if amount_read == 0:
390
            # if we just started this file, all bets are off
391
            self.last_update_time = now
392
            self.last_amount_read = 0
393
            self.ave_rate = None
394
            return
395
396
        #print 'times', now, self.last_update_time
397
        time_diff = now         - self.last_update_time
398
        read_diff = amount_read - self.last_amount_read
399
        self.last_update_time = now
400
        self.last_amount_read = amount_read
401
        self.ave_rate = self._temporal_rolling_ave(\
402
            time_diff, read_diff, self.ave_rate, self.timescale)
403
        #print 'results', time_diff, read_diff, self.ave_rate
404
        
405
    #####################################################################
406
    # result methods
407
    def average_rate(self):
408
        "get the average transfer rate (in bytes/second)"
409
        return self.ave_rate
410
411
    def elapsed_time(self):
412
        "the time between the start of the transfer and the most recent update"
413
        return self.last_update_time - self.start_time
414
415
    def remaining_time(self):
416
        "estimated time remaining"
417
        if not self.ave_rate or not self.total: return None
418
        return (self.total - self.last_amount_read) / self.ave_rate
419
420
    def fraction_read(self):
421
        """the fraction of the data that has been read
422
        (can be None for unknown transfer size)"""
423
        if self.total is None: return None
424
        elif self.total == 0: return 1.0
425
        else: return float(self.last_amount_read)/self.total
426
427
    #########################################################################
428
    # support methods
429
    def _temporal_rolling_ave(self, time_diff, read_diff, last_ave, timescale):
430
        """a temporal rolling average performs smooth averaging even when
431
        updates come at irregular intervals.  This is performed by scaling
432
        the "epsilon" according to the time since the last update.
433
        Specifically, epsilon = time_diff / timescale
434
435
        As a general rule, the average will take on a completely new value
436
        after 'timescale' seconds."""
437
        epsilon = time_diff / timescale
438
        if epsilon > 1: epsilon = 1.0
439
        return self._rolling_ave(time_diff, read_diff, last_ave, epsilon)
440
    
441
    def _rolling_ave(self, time_diff, read_diff, last_ave, epsilon):
442
        """perform a "rolling average" iteration
443
        a rolling average "folds" new data into an existing average with
444
        some weight, epsilon.  epsilon must be between 0.0 and 1.0 (inclusive)
445
        a value of 0.0 means only the old value (initial value) counts,
446
        and a value of 1.0 means only the newest value is considered."""
447
        
448
        try:
449
            recent_rate = read_diff / time_diff
450
        except ZeroDivisionError:
451
            recent_rate = None
452
        if last_ave is None: return recent_rate
453
        elif recent_rate is None: return last_ave
454
455
        # at this point, both last_ave and recent_rate are numbers
456
        return epsilon * recent_rate  +  (1 - epsilon) * last_ave
457
458
    def _round_remaining_time(self, rt, start_time=15.0):
459
        """round the remaining time, depending on its size
460
        If rt is between n*start_time and (n+1)*start_time round downward
461
        to the nearest multiple of n (for any counting number n).
462
        If rt < start_time, round down to the nearest 1.
463
        For example (for start_time = 15.0):
464
         2.7  -> 2.0
465
         25.2 -> 25.0
466
         26.4 -> 26.0
467
         35.3 -> 34.0
468
         63.6 -> 60.0
469
        """
470
471
        if rt < 0: return 0.0
472
        shift = int(math.log(rt/start_time)/math.log(2))
473
        rt = int(rt)
474
        if shift <= 0: return rt
475
        return float(int(rt) >> shift << shift)
476
        
477
478
def format_time(seconds, use_hours=0):
479
    if seconds is None or seconds < 0:
480
        if use_hours: return '--:--:--'
481
        else:         return '--:--'
482
    else:
483
        seconds = int(seconds)
484
        minutes = seconds / 60
485
        seconds = seconds % 60
486
        if use_hours:
487
            hours = minutes / 60
488
            minutes = minutes % 60
489
            return '%02i:%02i:%02i' % (hours, minutes, seconds)
490
        else:
491
            return '%02i:%02i' % (minutes, seconds)
492
            
493
def format_number(number, SI=0, space=' '):
494
    """Turn numbers into human-readable metric-like numbers"""
495
    symbols = ['',  # (none)
496
               'k', # kilo
497
               'M', # mega
498
               'G', # giga
499
               'T', # tera
500
               'P', # peta
501
               'E', # exa
502
               'Z', # zetta
503
               'Y'] # yotta
504
    
505
    if SI: step = 1000.0
506
    else: step = 1024.0
507
508
    thresh = 999
509
    depth = 0
510
    max_depth = len(symbols) - 1
511
    
512
    # we want numbers between 0 and thresh, but don't exceed the length
513
    # of our list.  In that event, the formatting will be screwed up,
514
    # but it'll still show the right number.
515
    while number > thresh and depth < max_depth:
516
        depth  = depth + 1
517
        number = number / step
518
519
    if type(number) == type(1) or type(number) == type(1L):
520
        # it's an int or a long, which means it didn't get divided,
521
        # which means it's already short enough
522
        format = '%i%s%s'
523
    elif number < 9.95:
524
        # must use 9.95 for proper sizing.  For example, 9.99 will be
525
        # rounded to 10.0 with the .1f format string (which is too long)
526
        format = '%.1f%s%s'
527
    else:
528
        format = '%.0f%s%s'
529
        
530
    return(format % (float(number or 0), space, symbols[depth]))