4
4
# Created: December 15, 2003
5
# Author: Francesc Alted - falted@pytables.org
5
# Author: Francesc Altet - faltet@carabos.com
7
# $Source: /cvsroot/pytables/pytables/tables/EArray.py,v $
8
# $Id: EArray.py,v 1.21.2.4 2004/11/24 12:19:56 falted Exp $
7
# $Source: /home/ivan/_/programari/pytables/svn/cvs/pytables/pytables/tables/EArray.py,v $
8
# $Id: EArray.py 1153 2005-09-06 11:32:10Z faltet $
10
10
########################################################################
30
__version__ = "$Revision: 1.21.2.4 $"
33
import numarray.records as records
35
from tables.utils import convertIntoNA, processRangeRead
36
from tables.Atom import Atom, EnumAtom
37
from tables.Array import Array
41
__version__ = "$Revision: 1153 $"
31
44
# default version for EARRAY objects
32
45
#obversion = "1.0" # initial version
33
obversion = "1.1" # support for complex datatypes
35
import types, warnings, sys
36
from Array import Array
37
from VLArray import Atom
38
from utils import convertIntoNA, processRangeRead
41
import numarray.strings as strings
42
import numarray.records as records
50
class EArray(Array, hdf5Extension.Array, object):
46
#obversion = "1.1" # support for complex datatypes
47
#obversion = "1.2" # This adds support for time datatypes.
48
obversion = "1.3" # This adds support for enumerated datatypes.
51
53
"""Represent an homogeneous dataset in HDF5 file.
53
55
It enables to create new datasets on-disk from Numeric and
79
81
Specific of EArray:
81
83
extdim -- The enlargeable dimension.
82
84
nrows -- The value of the enlargeable dimension.
94
_c_canUndoCreate = True # Can creation/copying be undone and redone?
95
_c_canUndoRemove = True # Can removal be undone and redone?
96
_c_canUndoMove = True # Can movement/renaming be undone and redone?
97
# </undo-redo support>
87
100
def __init__(self, atom = None, title = "",
88
101
filters = None, expectedrows = 1000):
89
102
"""Create EArray instance.
94
107
of the atomic objects to be saved. One of the shape
95
108
dimensions must be 0. The dimension being 0 means that the
96
109
resulting EArray object can be extended along it.
98
111
title -- Sets a TITLE attribute on the array entity.
100
113
filters -- An instance of the Filters class that provides
125
138
def _calcBufferSize(self, atom, extdim, expectedrows, compress):
126
139
"""Calculate the buffer size and the HDF5 chunk size.
128
The logic to do that is based purely in experiments playing
129
with different buffer sizes, chunksize and compression
130
flag. It is obvious that using big buffers optimize the I/O
131
speed. This might (should) be further optimized doing more
136
143
rowsize = atom.atomsize()
138
# Increasing the bufmultfactor would enable a good compression
139
# ratio (up to an extend), but it would affect to reading
140
# performance. Be careful when touching this
141
# F. Altet 2004-11-10
142
#bufmultfactor = int(1000 * 5) # Conservative value
143
bufmultfactor = int(1000 * 10) # Medium value
144
#bufmultfactor = int(1000 * 20) # Agressive value
145
#bufmultfactor = int(1000 * 50) # Very Aggresive value
147
rowsizeinfile = rowsize
148
expectedfsizeinKb = (expectedrows * rowsizeinfile) / 1024
150
if expectedfsizeinKb <= 100:
151
# Values for files less than 100 KB of size
152
buffersize = 5 * bufmultfactor
153
elif (expectedfsizeinKb > 100 and
154
expectedfsizeinKb <= 1000):
155
# Values for files less than 1 MB of size
156
buffersize = 10 * bufmultfactor
157
elif (expectedfsizeinKb > 1000 and
158
expectedfsizeinKb <= 20 * 1000):
159
# Values for sizes between 1 MB and 20 MB
160
buffersize = 20 * bufmultfactor
161
elif (expectedfsizeinKb > 20 * 1000 and
162
expectedfsizeinKb <= 200 * 1000):
163
# Values for sizes between 20 MB and 200 MB
164
buffersize = 40 * bufmultfactor
165
elif (expectedfsizeinKb > 200 * 1000 and
166
expectedfsizeinKb <= 2000 * 1000):
167
# Values for sizes between 200 MB and 2 GB
168
buffersize = 50 * bufmultfactor
169
else: # Greater than 2 GB
170
buffersize = 60 * bufmultfactor
144
expectedfsizeinKb = (expectedrows * rowsize) / 1024
145
buffersize = self._g_calcBufferSize(expectedfsizeinKb)
172
147
# Max Tuples to fill the buffer
173
148
maxTuples = buffersize // rowsize
207
182
def _create(self):
208
183
"""Save a fresh array (i.e., not present on HDF5 file)."""
211
assert isinstance(self.atom, Atom), "The object passed to the IndexArray constructor must be a descendent of the Atom class."
212
assert isinstance(self.atom.shape, types.TupleType), "The Atom shape has to be a tuple for IndexArrays, and you passed a '%s' object." % (self.atom.shape)
187
assert isinstance(self.atom, Atom), "The object passed to the EArray constructor must be a descendent of the Atom class."
188
assert isinstance(self.atom.shape, tuple), "The Atom shape has to be a tuple for EArrays, and you passed a '%s' object." % (self.atom.shape)
213
189
# Version, type, shape, flavor, byteorder
214
190
self._v_version = obversion
215
191
self.type = self.atom.type
192
self.stype = self.atom.stype
216
193
self.shape = self.atom.shape
217
self.flavor = self.atom.flavor
194
self.flavor = self.atom.flavor
218
195
if self.type == "CharType" or isinstance(self.type, records.Char):
219
196
self.byteorder = "non-relevant"
221
198
# Only support for creating objects in system byteorder
222
199
self.byteorder = sys.byteorder
224
201
# extdim computation
225
202
zerodims = numarray.sum(numarray.array(self.shape) == 0)
243
220
#print "chunksizes-->", self._v_chunksize
244
221
self.nrows = 0 # No rows initially
245
222
self.itemsize = self.atom.itemsize
246
self._createEArray("EARRAY", self._v_new_title)
223
self._createEArray(self._v_new_title)
228
Get the enumerated type associated with this array.
230
If this array is of an enumerated type, the corresponding `Enum`
231
instance is returned. If it is not of an enumerated type, a
232
``TypeError`` is raised.
235
if self.atom.stype != 'Enum':
236
raise TypeError("array ``%s`` is not of an enumerated type"
239
return self.atom.enum
248
242
def _checkTypeShape(self, naarr):
249
243
"Test that naarr parameter is shape and type compliant"
264
258
(naarr, self.type)
266
260
# The arrays conforms self expandibility?
267
assert len(self.shape) == len(naarr.shape), \
268
"Sorry, the ranks of the EArray %r (%d) and object to be appended (%d) differ." % (self._v_pathname, len(self.shape), len(naarr.shape))
269
for i in range(len(self.shape)):
271
assert self.shape[i] == naarr.shape[i], \
272
"Sorry, shapes of EArray '%r' and object differ in non-enlargeable dimension (%d) " % (self._v_pathname, i)
261
myshlen = len(self.shape)
262
nashlen = len(naarr.shape)
263
if myshlen != nashlen:
264
raise ValueError("""\
265
the ranks of the appended object (%d) and the ``%s`` EArray (%d) differ"""
266
% (nashlen, self._v_pathname, myshlen))
267
for i in range(myshlen):
268
if i != self.extdim and self.shape[i] != naarr.shape[i]:
269
raise ValueError("""\
270
the shapes of the appended object and the ``%s`` EArray \
271
differ in non-enlargeable dimension %d""" % (self._v_pathname, i))
273
272
# Ok. all conditions are met. Return the numarray object
276
def append(self, object):
277
"""Append the object to this (enlargeable) object"""
278
assert self._v_file.mode <> "r", "Attempt to write over a file opened in read-only mode"
280
# Convert the object into a numarray object
281
naarr = convertIntoNA(object, self.atom)
275
def append(self, sequence):
276
"""Append the sequence to this (enlargeable) object"""
278
if self._v_file.mode == 'r':
279
raise IOError("attempt to write over a file opened in read-only mode")
281
# The sequence needs to be copied to make the operation safe
282
# to in-place conversion.
283
copy = self.stype in ['Time64']
284
# Convert the sequence into a numarray object
285
naarr = convertIntoNA(sequence, self.atom, copy)
282
286
# Check if it is correct type and shape
283
287
naarr = self._checkTypeShape(naarr)
284
288
self._append(naarr)
286
290
def truncate(self, size):
287
291
"Truncate the extendable dimension to at most size rows"
289
#assert size >= 0, "Size should be 0 or a positive value"
290
assert size > 0, "Size should be an integer greater than 0"
291
return self._truncateArray(size)
294
raise ValueError("`size` must be greater than 0")
295
self._truncateArray(size)
294
298
"""Get the metadata info for an array in file."""
295
(self.type, self.shape, self.itemsize, self.byteorder,
300
(type_, self.stype, self.shape, self.itemsize, self.byteorder,
296
301
self._v_chunksize) = self._openArray()
297
304
#print "chunksizes-->", self._v_chunksize
299
306
assert self.extdim >= 0, "extdim < 0: this should never happen!"
300
307
# Compute the real shape for atom:
301
308
shape = list(self.shape)
302
309
shape[self.extdim] = 0
303
if self.type == "CharType" or isinstance(self.type, records.Char):
310
if type_ == "CharType" or isinstance(type_, records.Char):
304
311
# Add the length of the array at the end of the shape for atom
305
312
shape.append(self.itemsize)
306
313
shape = tuple(shape)
307
# Create the atom instance
308
self.atom = Atom(dtype=self.type, shape=shape,
309
flavor=self.attrs.FLAVOR)
314
# Create the atom instance and set definitive type
316
(enum, type_) = self._loadEnum()
317
self.atom = EnumAtom(enum, type_, shape, flavor)
319
self.atom = Atom(stype, shape, flavor)
310
321
# Compute the rowsize for each element
311
322
self.rowsize = self.atom.atomsize()
312
323
# nrows in this instance
321
332
self._v_maxTuples = self._v_buffersize // chunksize
322
333
#print "maxTuples-->", self._v_maxTuples
324
def _g_copy(self, group, name, start, stop, step, title, filters):
335
def _g_copyWithStats(self, group, name, start, stop, step, title, filters):
325
336
"Private part of Leaf.copy() for each kind of leaf"
326
337
# Build the new EArray object
327
object = EArray(atom=self.atom,
330
expectedrows=self.nrows)
331
setattr(group, name, object)
338
object = self._v_file.createEArray(
339
group, name, atom=self.atom, title=title, filters=filters,
340
expectedrows=self.nrows, _log = False)
332
341
# Now, fill the new earray with values from source
333
342
nrowsinbuf = self._v_maxTuples
334
343
# The slices parameter for self.__getitem__
358
367
"""This provides more metainfo in addition to standard __str__"""
367
byteorder = %r""" % (self, self.type, self.shape, self.itemsize, self.nrows,
368
self.extdim, self.flavor, self.byteorder)
374
byteorder = %r""" % (self, self.atom, self.nrows, self.extdim, self.flavor,