1
########################################################################
4
# Created: February 10, 2004
5
# Author: Francesc Alted - falted@pytables.org
7
# $Id: ptrepack.py 1488 2006-02-28 11:35:23Z faltet $
9
########################################################################
11
"""This utility lets you repack your data files in a flexible way.
13
Pass the flag -h to this for help on usage.
22
from tables.File import openFile
23
from tables.Group import Group
24
from tables.Leaf import Filters
28
def newdstGroup(dstfileh, dstgroup, title, filters):
30
# Now, create the new group. This works even if dstgroup == '/'
31
for nodeName in dstgroup.split('/'):
34
if not hasattr(dstfileh, nodeName):
35
group2 = dstfileh.createGroup(group, nodeName,
39
# We assume that nodeName is a group. If not, an error will
41
group2 = dstfileh._f_getChild(nodeName)
45
def copyLeaf(srcfile, dstfile, srcnode, dstnode, title,
46
filters, copyuserattrs, overwritefile, overwrtnodes, stats,
48
# Open the source file
49
srcfileh = openFile(srcfile, "r")
50
# Get the source node (that should exist)
51
srcNode = srcfileh.getNode(srcnode)
53
# Get the destination node and its parent
54
last_slash = dstnode.rindex('/')
55
if last_slash == len(dstnode)-1:
56
# print "Detected a trainling slash in destination node. Interpreting it as a destination group."
57
dstgroup = dstnode[:-1]
59
dstgroup = dstnode[:last_slash]
62
dstleaf = dstnode[last_slash+1:]
64
dstleaf = srcNode.name
65
# Check whether the destination group exists or not
66
if os.path.isfile(dstfile) and not overwritefile:
67
dstfileh = openFile(dstfile, "a")
69
dstGroup = dstfileh.getNode(dstgroup)
71
# The dstgroup does not seem to exist. Try creating it.
72
dstGroup = newdstGroup(dstfileh, dstgroup, title, filters)
74
# The node exists, but it is really a group?
75
if not isinstance(dstGroup, Group):
76
# No. Should we overwrite it?
78
parent = dstGroup._v_parent
79
last_slash = dstGroup._v_pathname.rindex('/')
80
dstgroupname = dstGroup._v_pathname[last_slash+1:]
82
dstGroup = dstfileh.createGroup(parent, dstgroupname,
86
raise RuntimeError, "Please, check that the node names are not duplicated in destination, and if so, add the --overwrite-nodes flag if desired."
88
# The destination file does not exist or will be overwritten.
89
dstfileh = openFile(dstfile, "w", title=title, filters=filters)
90
dstGroup = newdstGroup(dstfileh, dstgroup, title="", filters=filters)
92
# Finally, copy srcNode to dstNode
94
dstNode = srcNode.copy(
95
dstGroup, dstleaf, filters = filters,
96
copyuserattrs = copyuserattrs, overwrite = overwrtnodes,
97
stats = stats, start = start, stop = stop, step = step)
99
(type, value, traceback) = sys.exc_info()
100
print "Problems doing the copy from '%s:%s' to '%s:%s'" % \
101
(srcfile, srcnode, dstfile, dstnode)
102
print "The error was --> %s: %s" % (type, value)
103
print "The destination file looks like:\n", dstfileh
104
# Close all the open files:
107
raise RuntimeError, "Please, check that the node names are not duplicated in destination, and if so, add the --overwrite-nodes flag if desired."
109
# Close all the open files:
113
def copyChildren(srcfile, dstfile, srcgroup, dstgroup, title,
114
recursive, filters, copyuserattrs, overwritefile,
115
overwrtnodes, stats, start, stop, step):
116
"Copy the children from source group to destination group"
117
# Open the source file with srcgroup as rootUEP
118
srcfileh = openFile(srcfile, "r", rootUEP=srcgroup)
119
# Assign the root to srcGroup
120
srcGroup = srcfileh.root
122
# Check whether the destination group exists or not
123
if os.path.isfile(dstfile) and not overwritefile:
124
dstfileh = openFile(dstfile, "a")
126
dstGroup = dstfileh.getNode(dstgroup)
128
# The dstgroup does not seem to exist. Try creating it.
129
dstGroup = newdstGroup(dstfileh, dstgroup, title, filters)
131
# The node exists, but it is really a group?
132
if not isinstance(dstGroup, Group):
133
# No. Should we overwrite it?
135
parent = dstGroup._v_parent
136
last_slash = dstGroup._v_pathname.rindex('/')
137
dstgroupname = dstGroup._v_pathname[last_slash+1:]
139
dstGroup = dstfileh.createGroup(parent, dstgroupname,
143
raise RuntimeError, "Please, check that the node names are not duplicated in destination, and if so, add the --overwrite-nodes flag if desired."
145
# The destination file does not exist or will be overwritten.
146
dstfileh = openFile(dstfile, "w", title=title, filters=filters)
147
dstGroup = newdstGroup(dstfileh, dstgroup, title="", filters=filters)
149
# Finally, copy srcGroup to dstGroup
151
srcGroup._f_copyChildren(
152
dstGroup, recursive = recursive, filters = filters,
153
copyuserattrs = copyuserattrs, overwrite = overwrtnodes,
154
stats = stats, start = start, stop = stop, step = step)
156
(type, value, traceback) = sys.exc_info()
157
print "Problems doing the copy from '%s:%s' to '%s:%s'" % \
158
(srcfile, srcgroup, dstfile, dstgroup)
159
print "The error was --> %s: %s" % (type, value)
160
print "The destination file looks like:\n", dstfileh
161
# Close all the open files:
164
raise RuntimeError, "Please, check that the node names are not duplicated in destination, and if so, add the --overwrite-nodes flag if desired. In particular, pay attention that rootUEP is not fooling you."
166
# Close all the open files:
172
usage = """usage: %s [-h] [-v] [-o] [-R start,stop,step] [--non-recursive] [--dest-title=title] [--dont-copyuser-attrs] [--overwrite-nodes] [--complevel=(0-9)] [--complib=lib] [--shuffle=(0|1)] [--fletcher32=(0|1)] [--keep-source-filters] sourcefile:sourcegroup destfile:destgroup
173
-h -- Print usage message.
174
-v -- Show more information.
175
-o -- Overwite destination file.
176
-R RANGE -- Select a RANGE of rows (in the form "start,stop,step")
177
during the copy of *all* the leaves.
178
--non-recursive -- Do not do a recursive copy. Default is to do it.
179
--dest-title=title -- Title for the new file (if not specified,
180
the source is copied).
181
--dont-copy-userattrs -- Do not copy the user attrs (default is to do it)
182
--overwrite-nodes -- Overwrite destination nodes if they exist. Default is
183
to not overwrite them.
184
--complevel=(0-9) -- Set a compression level (0 for no compression, which
186
--complib=lib -- Set the compression library to be used during the copy.
187
lib can be set to "zlib", "lzo", "ucl" or "bzip2". Defaults to "zlib".
188
--shuffle=(0|1) -- Activate or not the shuffling filter (default is active
190
--fletcher32=(0|1) -- Whether to activate or not the fletcher32 filter (not
192
--keep-source-filters -- Use the original filters in source files. The
193
default is not doing that if any of --complevel, --complib, --shuffle
194
or --fletcher32 option is specified.
195
\n""" % os.path.basename(sys.argv[0])
199
opts, pargs = getopt.getopt(sys.argv[1:], 'hvoR:',
202
'dont-copy-userattrs',
208
'keep-source-filters'
211
(type, value, traceback) = sys.exc_info()
212
print "Error parsing the options. The error was:", value
213
sys.stderr.write(usage)
232
if option[0] == '-h':
233
sys.stderr.write(usage)
235
elif option[0] == '-v':
237
elif option[0] == '-o':
239
elif option[0] == '-R':
241
rng = eval("slice("+option[1]+")")
243
print "Error when getting the range parameter."
244
(type, value, traceback) = sys.exc_info()
245
print " The error was:", value
246
sys.stderr.write(usage)
248
elif option[0] == '--dest-title':
250
elif option[0] == '--dont-copy-userattrs':
252
elif option[0] == '--non-recursive':
254
elif option[0] == '--overwrite-nodes':
256
elif option[0] == '--keep-source-filters':
258
elif option[0] == '--complevel':
259
complevel = int(option[1])
260
elif option[0] == '--complib':
262
elif option[0] == '--shuffle':
263
shuffle = int(option[1])
264
elif option[0] == '--fletcher32':
265
fletcher32 = int(option[1])
267
print option[0], ": Unrecognized option"
268
sys.stderr.write(usage)
271
# if we pass a number of files different from 2, abort
273
print "You need to pass both source and destination!."
274
sys.stderr.write(usage)
277
# Catch the files passed as the last arguments
278
src = pargs[0].split(':')
279
dst = pargs[1].split(':')
281
srcfile, srcnode = src[0], "/"
283
srcfile, srcnode = src
285
dstfile, dstnode = dst[0], "/"
287
dstfile, dstnode = dst
290
# case where filename == "filename:" instead of "filename:/"
294
# case where filename == "filename:" instead of "filename:/"
297
# Build the Filters instance
298
if ((complevel, complib, shuffle, fletcher32) == (None,)*4 or keepfilters):
301
if complevel is None: complevel = 0
302
if complevel > 0 and shuffle is None:
306
if complib is None: complib = "zlib"
307
if fletcher32 is None: fletcher32 = 0
308
filters = Filters(complevel=complevel, complib=complib,
309
shuffle=shuffle, fletcher32=fletcher32)
311
# The start, stop and step params:
312
start, stop, step = 0, None, 1 # Defaults
314
start, stop, step = rng.start, rng.stop, rng.step
322
print "Recursive copy:", recursive
323
print "Applying filters:", filters
324
print "Starting copying %s:%s to %s:%s" % (srcfile, srcnode,
328
# Check whether the specified source node is a group or a leaf
329
h5srcfile = openFile(srcfile, 'r')
330
srcnodeobject = h5srcfile.getNode(srcnode)
331
objectclass = srcnodeobject.__class__.__name__
332
# Close the file again
335
stats = {'groups': 0, 'leaves': 0, 'bytes': 0}
336
if isinstance(srcnodeobject, Group):
338
srcfile, dstfile, srcnode, dstnode,
339
title = title, recursive = recursive, filters = filters,
340
copyuserattrs = copyuserattrs, overwritefile = overwritefile,
341
overwrtnodes = overwrtnodes, stats = stats,
342
start = start, stop = stop, step = step)
344
# If not a Group, it should be a Leaf
346
srcfile, dstfile, srcnode, dstnode,
347
title = title, filters = filters, copyuserattrs = copyuserattrs,
348
overwritefile = overwritefile, overwrtnodes = overwrtnodes,
349
stats = stats, start = start, stop = stop, step = step)
351
# Gather some statistics
354
tcopy = round(t2-t1, 3)
355
cpucopy = round(cpu2-cpu1, 3)
356
tpercent = int(round(cpucopy/tcopy, 2)*100)
359
ngroups = stats['groups']
360
nleafs = stats['leaves']
361
nbytescopied = stats['bytes']
363
print "Groups copied:", ngroups, " Leaves copied:", nleafs
365
print "User attrs copied"
367
print "User attrs not copied"
368
print "KBytes copied:", round(nbytescopied/1024.,3)
369
print "Time copying: %s s (real) %s s (cpu) %s%%" % \
370
(tcopy, cpucopy, tpercent)
371
print "Copied nodes/sec: ", round((ngroups+nleafs) / float(tcopy),1)
372
print "Copied KB/s :", int(nbytescopied / (tcopy * 1024))