228
228
self._coldefs = ColDefs(self)
229
229
self.formats = self._coldefs.formats
232
def from_columns(cls, columns, nrows=0, fill=False):
234
Given a ColDefs object of unknown origin, initialize a new FITS_rec
237
This was originally part of the new_table function in the table module
238
but was moved into a class method since most of its functionality
239
always had more to do with initializing a FITS_rec object than anything
240
else, and much of it also overlapped with FITS_rec._scale_back.
244
columns : sequence of Columns or a ColDefs
245
The columns from which to create the table data. If these
246
columns have data arrays attached that data may be used in
247
initializing the new table. Otherwise the input columns
248
will be used as a template for a new table with the requested
252
Number of rows in the new table. If the input columns have data
253
associated with them, the size of the largest input column is used.
254
Otherwise the default is 0.
257
If `True`, will fill all cells with zeros or blanks. If
258
`False`, copy the data from input, undefined cells will still
259
be filled with zeros/blanks.
262
# read the delayed data
263
for idx in range(len(columns)):
264
arr = columns._arrays[idx]
265
if isinstance(arr, Delayed):
266
if arr.hdu.data is None:
267
columns._arrays[idx] = None
269
columns._arrays[idx] = np.rec.recarray.field(arr.hdu.data,
272
# use the largest column shape as the shape of the record
274
for arr in columns._arrays:
282
raw_data = np.empty(columns.dtype.itemsize * nrows, dtype=np.uint8)
283
raw_data.fill(ord(columns._padding_byte))
284
data = np.recarray(nrows, dtype=columns.dtype, buf=raw_data).view(cls)
286
# Previously this assignment was made from hdu.columns, but that's a
287
# bug since if a _TableBaseHDU has a FITS_rec in its .data attribute
288
# the _TableBaseHDU.columns property is actually returned from
289
# .data._coldefs, so this assignment was circular! Don't make that
291
# All of this is an artifact of the fragility of the FITS_rec class,
292
# and that it can't just be initialized by columns...
293
data._coldefs = columns
294
data.formats = columns.formats
296
# If fill is True we don't copy anything from the column arrays. We're
297
# just using them as a template, and returning a table filled with
302
# Otherwise we have to fill the recarray with data from the input
304
for idx in range(len(columns)):
305
# For each column in the ColDef object, determine the number of
306
# rows in that column. This will be either the number of rows in
307
# the ndarray associated with the column, or the number of rows
308
# given in the call to this function, which ever is smaller. If
309
# the input FILL argument is true, the number of rows is set to
310
# zero so that no data is copied from the original input data.
311
arr = columns._arrays[idx]
316
array_size = len(arr)
318
n = min(array_size, nrows)
320
# TODO: At least *some* of this logic is mostly redundant with the
321
# _convert_foo methods in this class; see if we can eliminate some
322
# of that duplication.
325
# The input column had an empty array, so just use the fill
329
field = np.rec.recarray.field(data, idx)
330
fitsformat = columns.formats[idx]
331
recformat = columns._recformats[idx]
336
if isinstance(recformat, _FormatX):
337
# Data is a bit array
338
if inarr.shape[-1] == recformat.repeat:
339
_wrapx(inarr, outarr, recformat.repeat)
341
elif isinstance(recformat, _FormatP):
342
data._convert[idx] = _makep(inarr, field, recformat,
345
# TODO: Find a better way of determining that the column is meant
346
# to be FITS L formatted
347
elif recformat[-2:] == FITS2NUMPY['L'] and inarr.dtype == bool:
349
# The raw data field should be filled with either 'T' or 'F'
350
# (not 0). Use 'F' as a default
352
# Also save the original boolean array in data._converted so
353
# that it doesn't have to be re-converted
354
data._convert[idx] = np.zeros(field.shape, dtype=bool)
355
data._convert[idx][:n] = inarr
356
# TODO: Maybe this step isn't necessary at all if _scale_back
358
inarr = np.where(inarr == False, ord('F'), ord('T'))
359
elif (columns[idx]._physical_values and
360
columns[idx]._pseudo_unsigned_ints):
362
bzero = columns[idx].bzero
363
data._convert[idx] = np.zeros(field.shape, dtype=inarr.dtype)
364
data._convert[idx][:n] = inarr
366
# Pre-scale rows below the input data
369
inarr = inarr - bzero
370
elif isinstance(columns, _AsciiColDefs):
371
# Regardless whether the format is character or numeric, if the
372
# input array contains characters then it's already in the raw
373
# format for ASCII tables
374
if fitsformat._pseudo_logical:
375
# Hack to support converting from 8-bit T/F characters
376
# Normally the column array is a chararray of 1 character
377
# strings, but we need to view it as a normal ndarray of
378
# 8-bit ints to fill it with ASCII codes for 'T' and 'F'
379
outarr = field.view(np.uint8, np.ndarray)[:n]
380
elif not isinstance(arr, chararray.chararray):
381
# Fill with the appropriate blanks for the column format
382
data._convert[idx] = np.zeros(nrows, dtype=arr.dtype)
383
outarr = data._convert[idx][:n]
388
if inarr.shape != outarr.shape:
389
if inarr.dtype != outarr.dtype:
390
inarr = inarr.view(outarr.dtype)
392
# This is a special case to handle input arrays with
394
# By design each row of the outarray is 1-D, while each row of
395
# the input array may be n-D
397
# The normal case where the first dimension is the rows
398
inarr_rowsize = inarr[0].size
399
inarr = inarr.reshape((n, inarr_rowsize))
400
outarr[:, :inarr_rowsize] = inarr
402
# Special case for strings where the out array only has one
403
# dimension (the second dimension is rolled up into the
405
outarr[:n] = inarr.ravel()
231
411
def __repr__(self):
232
412
return np.recarray.__repr__(self)
349
551
field = np.recarray.field(base, indx)
351
553
if self._convert[indx] is None:
353
if isinstance(recformat, _FormatX):
355
dummy = np.zeros(self.shape + (_nx,), dtype=np.bool_)
356
_unwrapx(field, dummy, _nx)
357
self._convert[indx] = dummy
358
return self._convert[indx]
360
(_str, _bool, _number, _scale, _zero, bscale, bzero, dim) = \
361
self._get_scale_factors(indx)
364
554
if isinstance(recformat, _FormatP):
365
dummy = _VLF([None] * len(self), dtype=recformat.dtype)
366
raw_data = self._get_raw_data()
369
"Could not find heap data for the %r variable-length "
370
"array column." % self.names[indx])
371
for i in range(len(self)):
372
offset = field[i, 1] + self._heapoffset
375
if recformat.dtype == 'a':
376
dt = np.dtype(recformat.dtype + str(1))
377
arr_len = count * dt.itemsize
378
da = raw_data[offset:offset + arr_len].view(dt)
379
da = np.char.array(da.view(dtype=dt), itemsize=count)
380
dummy[i] = decode_ascii(da)
382
dt = np.dtype(recformat.dtype)
383
arr_len = count * dt.itemsize
384
dummy[i] = raw_data[offset:offset + arr_len].view(dt)
385
dummy[i].dtype = dummy[i].dtype.newbyteorder('>')
387
# scale by TSCAL and TZERO
389
for i in range(len(self)):
390
dummy[i][:] = dummy[i] * bscale + bzero
392
# Boolean (logical) column
393
if recformat.dtype == FITS2NUMPY['L']:
394
for i in range(len(self)):
395
dummy[i] = np.equal(dummy[i], ord('T'))
397
self._convert[indx] = dummy
398
return self._convert[indx]
400
# ASCII table, convert strings to numbers
401
if not _str and self._coldefs._tbtype == 'TableHDU':
402
_fmap = {'I': np.int32, 'F': np.float32, 'E': np.float32,
404
_type = _fmap[self._coldefs.formats[indx][0]]
406
# if the string = TNULL, return ASCIITNULL
407
nullval = self._coldefs.nulls[indx].strip().encode('ascii')
408
dummy = field.replace('D'.encode('ascii'),
410
dummy = np.where(dummy.strip() == nullval, str(ASCIITNULL),
413
dummy = np.array(dummy, dtype=_type)
414
except ValueError, e:
416
'%s; the header may be missing the necessary TNULL%d '
417
'keyword or the table contains invalid data' %
420
self._convert[indx] = dummy
424
# Test that the dimensions given in dim are sensible; otherwise
425
# display a warning and ignore them
427
# See if the dimensions already match, if not, make sure the
428
# number items will fit in the specified dimensions
430
actual_shape = dummy[0].shape
432
actual_shape = (dummy[0].itemsize,) + actual_shape
556
converted = self._convert_p(indx, field, recformat)
558
# Handle all other column data types which are fixed-width
560
converted = self._convert_other(indx, field, recformat)
562
self._convert[indx] = converted
565
return self._convert[indx]
567
def _convert_x(self, field, recformat):
568
"""Convert a raw table column to a bit array as specified by the
572
dummy = np.zeros(self.shape + (recformat.repeat,), dtype=np.bool_)
573
_unwrapx(field, dummy, recformat.repeat)
576
def _convert_p(self, indx, field, recformat):
577
"""Convert a raw table column of FITS P or Q format descriptors
578
to a VLA column with the array data returned from the heap.
581
dummy = _VLF([None] * len(self), dtype=recformat.dtype)
582
raw_data = self._get_raw_data()
586
"Could not find heap data for the %r variable-length "
587
"array column." % self.names[indx])
589
for idx in xrange(len(self)):
590
offset = field[idx, 1] + self._heapoffset
591
count = field[idx, 0]
593
if recformat.dtype == 'a':
594
dt = np.dtype(recformat.dtype + str(1))
595
arr_len = count * dt.itemsize
596
da = raw_data[offset:offset + arr_len].view(dt)
597
da = np.char.array(da.view(dtype=dt), itemsize=count)
598
dummy[idx] = decode_ascii(da)
600
dt = np.dtype(recformat.dtype)
601
arr_len = count * dt.itemsize
602
dummy[idx] = raw_data[offset:offset + arr_len].view(dt)
603
dummy[idx].dtype = dummy[idx].dtype.newbyteorder('>')
604
# Each array in the field may now require additional
605
# scaling depending on the other scaling parameters
606
# TODO: The same scaling parameters apply to every
607
# array in the column so this is currently very slow; we
608
# really only need to check once whether any scaling will
609
# be necessary and skip this step if not
610
# TODO: Test that this works for X format; I don't think
611
# that it does--the recformat variable only applies to the P
612
# format not the X format
613
dummy[idx] = self._convert_other(indx, dummy[idx], recformat)
617
def _convert_ascii(self, indx, field):
618
"""Special handling for ASCII table columns to convert columns
619
containing numeric types to actual numeric arrays from the string
623
format = self._coldefs.formats[indx]
624
recformat = ASCII2NUMPY[format[0]]
625
# if the string = TNULL, return ASCIITNULL
626
nullval = str(self._coldefs.nulls[indx]).strip().encode('ascii')
627
if len(nullval) > format.width:
628
nullval = nullval[:format.width]
629
dummy = field.replace('D'.encode('ascii'), 'E'.encode('ascii'))
630
dummy = np.where(dummy.strip() == nullval, str(ASCIITNULL), dummy)
633
dummy = np.array(dummy, dtype=recformat)
634
except ValueError, e:
636
'%s; the header may be missing the necessary TNULL%d '
637
'keyword or the table contains invalid data' % (e, indx + 1))
641
def _convert_other(self, indx, field, recformat):
642
"""Perform conversions on any other fixed-width column data types.
644
This may not perform any conversion at all if it's not necessary, in
645
which case the original column array is returned.
648
if isinstance(recformat, _FormatX):
649
# special handling for the X format
650
return self._convert_x(field, recformat)
652
(_str, _bool, _number, _scale, _zero, bscale, bzero, dim) = \
653
self._get_scale_factors(indx)
655
# ASCII table, convert strings to numbers
657
# For now, check that these are ASCII columns by checking the coldefs
658
# type; in the future all columns (for binary tables, ASCII tables, or
659
# otherwise) should "know" what type they are already and how to handle
660
# converting their data from FITS format to native format and vice
662
if not _str and isinstance(self._coldefs, _AsciiColDefs):
663
field = self._convert_ascii(indx, field)
665
# Test that the dimensions given in dim are sensible; otherwise
666
# display a warning and ignore them
668
# See if the dimensions already match, if not, make sure the
669
# number items will fit in the specified dimensions
671
actual_shape = field[0].shape
673
actual_shape = (field[0].itemsize,) + actual_shape
675
actual_shape = len(field[0])
677
if dim == actual_shape:
678
# The array already has the correct dimensions, so we
679
# ignore dim and don't convert
682
nitems = reduce(operator.mul, dim)
684
actual_nitems = field.itemsize
434
actual_shape = len(dummy[0])
435
if dim == actual_shape:
436
# The array already has the correct dimensions, so we
437
# ignore dim and don't convert
686
actual_nitems = field.shape[1]
687
if nitems > actual_nitems:
689
'TDIM%d value %s does not fit with the size of '
690
'the array items (%d). TDIM%d will be ignored.'
691
% (indx + 1, self._coldefs.dims[indx],
692
actual_nitems, indx + 1))
695
# further conversion for both ASCII and binary tables
696
# For now we've made columns responsible for *knowing* whether their
697
# data has been scaled, but we make the FITS_rec class responsible for
698
# actually doing the scaling
699
# TODO: This also needs to be fixed in the effort to make Columns
700
# responsible for scaling their arrays to/from FITS native values
701
column = self._coldefs[indx]
702
if (_number and (_scale or _zero) and not column._physical_values):
703
# This is to handle pseudo unsigned ints in table columns
704
# TODO: For now this only really works correctly for binary tables
705
# Should it work for ASCII tables as well?
707
if bzero == 2**15 and 'I' in self._coldefs.formats[indx]:
708
field = np.array(field, dtype=np.uint16)
709
elif bzero == 2**31 and 'J' in self._coldefs.formats[indx]:
710
field = np.array(field, dtype=np.uint32)
711
elif bzero == 2**63 and 'K' in self._coldefs.formats[indx]:
712
field = np.array(field, dtype=np.uint64)
713
bzero64 = np.uint64(2 ** 63)
440
nitems = reduce(operator.mul, dim)
442
actual_nitems = dummy.itemsize
715
field = np.array(field, dtype=np.float64)
717
field = np.array(field, dtype=np.float64)
720
np.multiply(field, bscale, field)
722
if self._uint and 'K' in self._coldefs.formats[indx]:
723
# There is a chance of overflow, so be careful
724
test_overflow = field.copy()
726
test_overflow += bzero64
727
except OverflowError:
729
"Overflow detected while applying TZERO{0:d}. "
730
"Returning unscaled data.".format(indx))
444
actual_nitems = dummy.shape[1]
445
if nitems > actual_nitems:
447
'TDIM%d value %s does not fit with the size of '
448
'the array items (%d). TDIM%d will be ignored.'
449
% (indx + 1, self._coldefs.dims[indx],
450
actual_nitems, indx + 1))
453
# further conversion for both ASCII and binary tables
454
if _number and (_scale or _zero):
456
# only do the scaling the first time and store it in _convert
457
self._convert[indx] = np.array(dummy, dtype=np.float64)
459
np.multiply(self._convert[indx], bscale,
462
self._convert[indx] += bzero
463
elif _bool and dummy.dtype != bool:
464
self._convert[indx] = np.equal(dummy, ord('T'))
467
self._convert[indx] = decode_ascii(dummy)
468
except UnicodeDecodeError:
472
nitems = reduce(operator.mul, dim)
473
if self._convert[indx] is None:
474
self._convert[indx] = dummy[:,:nitems]
476
fmt = self._convert[indx].dtype.char
477
dtype = ('|%s%d' % (fmt, dim[-1]), dim[:-1])
478
self._convert[indx].dtype = dtype
732
field = test_overflow
480
self._convert[indx].shape = (dummy.shape[0],) + dim
482
if self._convert[indx] is not None:
483
return self._convert[indx]
735
elif _bool and field.dtype != bool:
736
field = np.equal(field, ord('T'))
739
field = decode_ascii(field)
740
except UnicodeDecodeError:
744
# Apply the new field item dimensions
745
nitems = reduce(operator.mul, dim)
747
field = field[:, :nitems]
749
fmt = field.dtype.char
750
dtype = ('|%s%d' % (fmt, dim[-1]), dim[:-1])
753
field.shape = (field.shape[0],) + dim
487
757
def _clone(self, shape):
599
863
# conversion for both ASCII and binary tables
600
864
if _number or _str:
601
if _number and (_scale or _zero):
865
column = self._coldefs[indx]
866
if _number and (_scale or _zero) and column._physical_values:
602
867
dummy = self._convert[indx].copy()
872
# This will set the raw values in the recarray back to
873
# their non-physical storage values, so the column should
874
# be mark is not scaled
875
column._physical_values = False
608
877
dummy = self._convert[indx]
609
elif self._coldefs._tbtype == 'TableHDU':
878
elif isinstance(self._coldefs, _AsciiColDefs):
610
879
dummy = self._convert[indx]
614
883
# ASCII table, convert numbers to strings
615
if self._coldefs._tbtype == 'TableHDU':
884
if isinstance(self._coldefs, _AsciiColDefs):
885
starts = self._coldefs.starts[:]
886
spans = self._coldefs.spans
616
887
format = self._coldefs.formats[indx].strip()
617
lead = self._coldefs.starts[indx] - loc[indx]
889
# The the index of the "end" column of the record, beyond
890
# which we can't write
891
end = super(FITS_rec, self).field(-1).itemsize
892
starts.append(end + starts[-1])
895
lead = (starts[indx] - starts[indx - 1] -
620
'Column `%s` starting point overlaps to the '
621
'previous column.' % indx + 1)
622
trail = (loc[indx + 1] - widths[indx] -
623
self._coldefs.starts[indx])
902
'Column %r starting point overlaps the '
903
'previous column.' % (indx + 1))
905
trail = starts[indx + 1] - starts[indx] - spans[indx]
626
'Column `%s` ending point overlaps to the next '
627
'column.' % indx + 1)
909
'Column %r ending point overlaps the next '
910
'column.' % (indx + 1))
912
# TODO: It would be nice if these string column formatting
913
# details were left to a specialized class, as is the case
914
# with FormatX and FormatP
628
915
if 'A' in format:
633
fmt = ''.join([(' ' * lead), _pc, format[1:],
634
_fmap[format[0]], (' ' * trail)])
920
fmt = ''.join([_pc, format[1:], ASCII2STR[format[0]],
636
923
# not using numarray.strings's num2char because the
637
924
# result is not allowed to expand (as C/Python does).
638
925
for jdx in range(len(dummy)):
639
926
x = fmt % dummy[jdx]
640
if len(x) > (loc[indx + 1] - loc[indx]):
927
if len(x) > starts[indx + 1] - starts[indx]:
641
928
raise ValueError(
642
"Number `%s` does not fit into the output's "
643
"itemsize of %s." % (x, widths[indx]))
929
"Value %r does not fit into the output's "
930
"itemsize of %s." % (x, spans[indx]))
646
933
# Replace exponent separator in floating point numbers