3
Example demonstrating how to use compression and other special options
4
for storing datasets in HDF5.
6
Compression is supported in HDF5 via a "filter pipeline" which is applied
7
to data as it is written to and read from disk. Each dataset in the
8
file has its own pipeline, which allows the compression strategy to be
9
specified on a per-dataset basis.
11
Compression is only available for the actual data, and not for attributes
14
As of h5py 1.1, three compression techniques are available, "gzip", "lzf",
15
and "szip". The non-compression filters "shuffle" and "fletcher32" are
16
also available. See the docstring for the module h5py.filters for more
19
Please note LZF is a h5py-only filter. While reference C source is
20
available, other HDF5-aware applications may be unable to read data in
30
SHAPE = (100,100,100,20)
32
SIZE = np.product(SHAPE)
34
f = h5py.File('compress_test.hdf5','w')
36
mydata = np.arange(SIZE,dtype=DTYPE).reshape(SHAPE)
40
print "Creating dataset with gzip"
41
dset = f.create_dataset("gzipped", data=mydata, compression="gzip",
42
compression_opts=4) # compression_opts is optional
45
print "Creating dataset with LZF"
46
dset = f.create_dataset("lzfcompressed", data=mydata, compression="lzf")
49
if 'szip' in h5py.filters.encode: # Not distributed with all versions of HDF5
50
print "Creating dataset with SZIP"
51
dset = f.create_dataset("szipped", data=mydata, compression="szip",
52
compression_opts=('nn',8))
55
print "Creating dataset with LZF and error detection"
56
dset = f.create_dataset("gzip_error_detection", data=mydata,
57
compression="gzip", fletcher32=True)
60
print "Creating uncompressed dataset"
61
dset = f.create_dataset("uncompressed", data=mydata)
66
def showsettings(dataset):
67
""" Demonstrate the public attributes of datasets """
70
print "Dataset ", dataset.name
72
print "Shape ", dataset.shape
73
print "Chunk size ", dataset.chunks
74
print "Datatype ", dataset.dtype
76
print "Compression ", dataset.compression
77
print "Settings ", dataset.compression_opts
79
print "Shuffle ", dataset.shuffle
80
print "Fletcher32 ", dataset.fletcher32