~jderose/dmedia/trunk

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
#!/usr/bin/env python3

import sys
import os
from os import path
import optparse
import time
import hashlib
import platform


parser = optparse.OptionParser(
	usage='Usage: %prog FILE',
)
parser.add_option('--chunk',
    help='Read buffer size in KiB; default=1024',
    metavar='KiB',
    default=1024,
    type='int',
)
parser.add_option('--runs',
    help='Runs per algorithm; default=4',
    metavar='N',
    default=4,
    type='int',
)

(options, args) = parser.parse_args()
if len(args) != 1:
    parser.print_usage()
    sys.exit('ERROR: must provide FILE to hash')
src = path.abspath(args[0])
if not path.isfile(src):
    parser.print_usage()
    sys.exit('ERROR: not a file: %r' % src)
size = path.getsize(src)
chunk = options.chunk * 1024


# Build list of hashes:
hashes = [
    hashlib.md5,
    hashlib.sha1,
    hashlib.sha224,
    hashlib.sha256,
    hashlib.sha384,
    hashlib.sha512,
]
try:
    import skein
    hashes.extend([
        skein.skein256,
        skein.skein512,
        skein.skein1024,
    ])
except ImportError:
    print('Could not import `skein`.')
    print('Download pyskein at http://packages.python.org/pyskein/')
    print('')


def hash_file(filename, hashfunc):
    """
    Compute the content-hash of the file *filename*.
    """
    fp = open(filename, 'rb')
    h = hashfunc()
    while True:
        try:
            buf = fp.read(chunk)
        except KeyboardInterrupt:
            print('')
            sys.exit()
        if not buf:
            break
        h.update(buf)
    return h.hexdigest()


def benchmark(hashfunc):
    start = time.time()
    for i in range(options.runs):
        hash_file(src, hashfunc)
    return (time.time() - start) / options.runs


print('-' * 80)
print('File size: %d bytes' % size)
print('Read buffer size: %d KiB' % options.chunk)
print('Runs per algorithm: %d' % options.runs)
print('Python: %s, %s, %s' %
    (platform.python_version(), platform.machine(), platform.system())
)

# Do an md5sum once to get the file into the page cache:
hash_file(src, hashlib.md5)

report = []
for hashfunc in hashes:
    avg = benchmark(hashfunc)
    bytes_per_second = (size / avg)
    report.append(
        dict(
            name=hashfunc.__name__,
            avg=avg,
            bytes_per_second=bytes_per_second,
            mbps=(bytes_per_second / 10 ** 6),
        )
    )

output = [['Hash', 'Time', 'MB/s']]
for d in report:
    output.append(
        [
            '%(name)s' % d,
            '%(avg).2f' % d,
            '%(mbps).1f' % d,
        ]
    )

widths = [
    max(len(r[i]) for r in output) for i in range(3)
]
output.insert(1, ['=' * w for w in widths])

print('')
for row in output:
    print('  '.join(row[i].ljust(widths[i]) for i in range(3)))
print('-' * 80)