~ubuntu-branches/ubuntu/quantal/pytables/quantal

« back to all changes in this revision

Viewing changes to tables/nestedrecords.py

Committer: Bazaar Package Importer
Author(s): Sebastian Dröge
Date: 2005-11-27 20:25:34 UTC
mfrom: (1.1.1 upstream)
Revision ID: james.westby@ubuntu.com-20051127202534-l8jzyd8357krw40h

Tags: 1.1.1-1ubuntu1

* Sync with Debian:
+ Use python 2.4 as default

files added:
ANNOUNCE.txt.in

Makefile

README.txt.in

RELEASE-NOTES.txt.in

VERSION

bench/create-large-number-objects.py

bench/open_close-bench-gzip.h5

bench/open_close-bench.py

bench/open_close-bench2.py

bench/undo_redo.py

bench/undo_redo.txt

doc/Makefile

doc/html/logo3-ombra-web.jpg

doc/html/logo3-ombra.jpg

doc/html/usersguide10.html

doc/html/usersguide11.html

doc/scripts/filenode.py

doc/scripts/pickletrouble.py

doc/text

doc/text/nestedrecords.txt

doc/xml/Makefile

doc/xml/html.css

doc/xml/objecttree.dia

doc/xml/rootUEP1.fig

doc/xml/rootUEP2.fig

doc/xml/saxon

examples/add-column.py

examples/carray1.py

examples/enum.py

examples/nested-iter.py

examples/nested-tut.py

examples/nested1.py

examples/tutorial3-1.py

examples/tutorial3-2.py

examples/undo-redo.py

src/H5Zbzip2.c

src/H5Zbzip2.h

src/Makefile

src/TableExtension.c

src/TableExtension.pyx

src/definitions.pxd

src/typeconv.c

src/typeconv.h

src/utilsExtension.c

src/utilsExtension.pyx

src/version.h.in

tables/Atom.py

tables/CArray.py

tables/Node.py

tables/attributeaccess.py

tables/constants.py

tables/enum.py

tables/exceptions.py

tables/nestedrecords.py

tables/nriterators.py

tables/registry.py

tables/undoredo.py

test/Tables_lzo1.h5

test/Tables_lzo1_shuffle.h5

test/Tables_lzo2.h5

test/Tables_lzo2_shuffle.h5

test/test_attributes.py

test/test_carray.py

test/test_do_undo.py

test/test_enum.py

test/test_filenode_v1.h5

test/test_nestedrecords.py

test/test_nestedtypes.py

test/test_nriterators.py

test/test_timetype.py

test/util.py

test/zerodim-attrs-1.3.h5

test/zerodim-attrs-1.4.h5

files removed:
doc/html/generahtml.sh

doc/html/makebib

doc/html/makeclean

doc/html/makeepss

doc/html/makeeqns

doc/html/makeidx

doc/html/makepage

doc/html/makepdfs

doc/html/makewebs

doc/html/paperbk4.jpg

doc/html/pretty.jpg

doc/html/read-medium-psyco-lzo-comparison-web.png

doc/html/tile.jpg

doc/html/usersguide.bib

doc/html/write-medium-psyco-lzo-comparison-web.png

doc/objecttree.dia

doc/rootUEP1.fig

doc/rootUEP2.fig

doc/xml/logo4-ombra.jpg

doc/xml/objecttree.eps

doc/xml/rootUEP1.eps

doc/xml/rootUEP2.eps

src/calcoffset.c

src/calcoffset.h

src/getfieldfmt.c

src/getfieldfmt.h

src/type-longlong.h

files modified:
ANNOUNCE.txt

LICENSE

MANIFEST.in

README.txt

RELEASE-NOTES.txt

THANKS

TODO.txt

bench/bsddb-table-bench.py

bench/opteron-stress-test.txt

bench/search-bench.py

bench/searchsorted-bench.py

bench/searchsorted-bench2.py

bench/stress-test.py

bench/stress-test2.py

bench/stress-test3.py

bench/table-bench.py

bench/table-bench2.py

bench/widetree.py

bench/widetree2.py

contrib/make_hdf.py

debian/README.Debian

debian/changelog

debian/control

debian/copyright

debian/nctoh5.1

debian/ptdump.1

debian/ptrepack.1

debian/rules

doc/html/indexTimes-itanium-web.png

doc/html/objecttree-h5-web.jpg

doc/html/objecttree-h5.jpg

doc/html/objecttree-web.png

doc/html/read-medium-lzo-zlib-ucl-comparison-web.png

doc/html/read-medium-psyco-lzo-zlib-ucl-comparison-web.png

doc/html/read-medium-psyco-nopsyco-comparison-web.png

doc/html/rootUEP1-web.png

doc/html/rootUEP2-web.png

doc/html/searchTimes-float-itanium-web.png

doc/html/searchTimes-int-itanium-web.png

doc/html/tutorial1-general-web.jpg

doc/html/tutorial1-general.jpg

doc/html/tutorial1-tableview-web.jpg

doc/html/tutorial1-tableview.jpg

doc/html/tutorial2-tableview-web.jpg

doc/html/tutorial2-tableview.jpg

doc/html/usersguide.html

doc/html/usersguide1.html

doc/html/usersguide2.html

doc/html/usersguide3.html

doc/html/usersguide4.html

doc/html/usersguide5.html

doc/html/usersguide6.html

doc/html/usersguide7.html

doc/html/usersguide8.html

doc/html/usersguide9.html

doc/html/write-medium-lzo-zlib-ucl-comparison-web.png

doc/html/write-medium-psyco-lzo-zlib-ucl-comparison-web.png

doc/html/write-medium-psyco-nopsyco-comparison-web.png

doc/usersguide.pdf

doc/xml/objecttree-h5.jpg

doc/xml/tutorial1-general.jpg

doc/xml/tutorial1-tableview.jpg

doc/xml/tutorial2-tableview.jpg

doc/xml/usersguide.bib

doc/xml/usersguide.xml

examples/check_examples.sh

examples/earray1.py

examples/filenodes1.py

examples/tutorial1-1.py

examples/tutorial1-2.py

examples/tutorial2.py

examples/vlarray1.py

examples/vlarray2.py

examples/vlarray3.py

setup.cfg

setup.py

src/H5ARRAY-opt.c

src/H5ARRAY.c

src/H5ARRAY.h

src/H5LT.c

src/H5LT.h

src/H5TB-opt.c

src/H5TB-opt.h

src/H5TB.c

src/H5TB.h

src/H5VLARRAY.c

src/H5VLARRAY.h

src/H5Zlzo.c

src/H5Zucl.c

src/arraytypes.c

src/hdf5Extension.c

src/hdf5Extension.pyx

src/tables.h

src/utils.c

src/utils.h

src/version.h

tables/Array.py

tables/AttributeSet.py

tables/EArray.py

tables/File.py

tables/Group.py

tables/Index.py

tables/IndexArray.py

tables/IsDescription.py

tables/Leaf.py

tables/Table.py

tables/UnImplemented.py

tables/VLArray.py

tables/__init__.py

tables/nodes/FileNode.py

tables/nodes/__init__.py

tables/utils.py

test/test_Numeric.py

test/test_all.py

test/test_backcompat.py

test/test_basics.py

test/test_create.py

test/test_earray.py

test/test_filenode.py

test/test_indexes.py

test/test_indexvalues.py

test/test_lists.py

test/test_numarray.py

test/test_tables.py

test/test_tablesMD.py

test/test_tree.py

test/test_types.py

test/test_vlarray.py

utils/nctoh5

utils/ptdump

utils/ptrepack

Show diffs side-by-side

added added

removed removed

tables/nestedrecords.py

"""

Support for arrays of nested records.

This module provides the `NestedRecArray` and `NestedRecord` classes,

which can be used to handle arrays of nested records in a way which is

compatible with ``numarray.records``.

Nested record arrays are made up by a sequence of nested records. A

nested record is made up of a set of non-nested and nested fields, each

of them having a different name. Non-nested fields have homogeneous

n-dimensional values (where n >= 1), while nested fields consist of a

set of fields (sub-fields), each of them with a different name.

Sub-fields can also be nested.

Several utility functions are provided for creating nested record

arrays.

"""

import sys

import types

import numarray

import numarray.strings

import numarray.records

from tables.attributeaccess import AttributeAccess

import nriterators

__docformat__ = 'reStructuredText'

"""The format of documentation strings in this module."""

def _isThereStructure(formats, descr, buffer):

"""

Check if buffer structure is given. It must be given in order to

disambiguate possible ambiguities.

For an explanation of argument meanings see the `array()` function.

"""

if not (formats or descr):

if buffer is None:

raise ValueError("""``formats`` or ``descr`` arguments """

"""must be given if ``buffer`` is ``None``""")

else:

raise NotImplementedError("""unable to infer the buffer """

"""structure; it must be supplied with ``formats`` or """

"""``descr`` arguments""")

def _onlyOneSyntax(descr, formats, names):

"""

Ensure that buffer structure is specified using either `descr` or

`formats`.

For an explanation of argument meanings see the `array()` function.

"""

if descr and (formats or names):

raise ValueError("""only one syntax can be used to specify """

"""the buffer structure; please use either ``descr`` or """

"""``formats`` and ``names``""")

def _checkFormats(formats):

"""

Check the format of the `formats` list.

For an explanation of argument meanings see the `array()` function.

"""

# Formats description must be a list or a tuple

if not (isinstance(formats, list) or isinstance(formats, tuple)):

raise TypeError("""``formats`` argument must be a list or a tuple""")

# Formats elements must be strings or sequences

for item in nriterators.flattenFormats(formats, check=True):

if item is None:

raise TypeError("""elements of the ``formats`` list must """

"""be strings or sequences""")

def _checkNames(names):

"""

Check the format of the `names` list.

For an explanation of argument meanings see the `array()` function.

"""

# Names description must be a list or a tuple

if not (isinstance(names, list) or isinstance(names, tuple)):

raise TypeError("""``names`` argument must be a list""")

# Names elements must be strings or 2-tuples

for item in nriterators.flattenNames(names, check=True):

if item is None:

raise TypeError("""elements of the ``names`` list must be"""

100

""" strings or 2-tuples""")

101

102

# The names used in the names list should not contain the '/' string

103

for item in nriterators.getSubNames(names):

104

if '/' in item:

105

raise ValueError(

106

"""field names cannot contain the ``/`` character""")

107

108

# For every level of the names structure names must be unique

109

nriterators.checkNamesUniqueness(names)

110

111

112

def _checkDescr(descr):

113

"""

114

Check the format of the `descr` list.

115

116

For an explanation of argument meanings see the `array()` function.

117

"""

118

119

# descr must be a list

120

if not isinstance(descr, list):

121

raise TypeError("""the descr argument must be a list!""")

122

123

# descr must be a list of 2-tuples

124

for item in nriterators.flattenDescr(descr, check=True):

125

if item is None:

126

raise TypeError(

127

"""elements of the `descr` list must be 2-tuples!""")

128

129

130

def _checkFieldsInDescr(descr):

131

"""

132

Check that field names do not contain the ``/`` character.

133

134

The checking is done on the most deeply nested field names. For an

135

explanation of argument meanings see the `array()` function.

136

"""

137

138

names = [item for item in nriterators.getNamesFromDescr(descr)]

139

_checkNames(names)

140

141

142

def makeDescr(formats, names=None):

143

"""

144

Create a ``descr`` list for the array.

145

146

If no `names` are passed fields are automatically named as ``c1``,

147

``c2``...

148

"""

149

150

return [item for item in nriterators.getDescr(names, formats)]

151

152

153

def makeFormats(descr):

154

"""Create a ``formats`` list for the array."""

155

156

return [item for item in nriterators.getFormatsFromDescr(descr)]

157

158

159

def makeNames(descr):

160

"""Create a ``names`` list for the array."""

161

162

return [item for item in nriterators.getNamesFromDescr(descr)]

163

164

165

def _checkBufferStructure(structure, buffer):

166

"""

167

Check the `buffer` structure using the given `structure`.

168

169

The checking is done after flattening both the `structure` and the

170

`buffer`. `structure` is the descr list that describes the buffer

171

structure. buffer` has its usual meaning in this module.

172

"""

173

174

for row in buffer:

175

for item in nriterators.zipBufferDescr(row, structure):

176

if not (isinstance(item, tuple) and len(item) == 2):

177

raise ValueError("""row structure doesn't match that """

178

"""provided by the format specification""")

179

if not isinstance(item[1], str):

180

raise TypeError("""field descriptors must be strings""")

181

182

183

def _matchFormats(seq1, seq2):

184

"""Check if two flat formats lists are equivalent."""

185

186

# Lists must have the same length

187

if len(seq1) != len(seq2):

188

raise ValueError("""buffer structure doesn't match that """

189

"""provided by the format specification""")

190

191

# Elements in the same position must describe the same format

192

for (f1, f2) in zip(seq1, seq2):

193

ra1 = numarray.records.array(buffer=None, formats = [f1])

194

ra2 = numarray.records.array(buffer=None, formats = [f2])

195

if ra1._formats != ra2._formats:

196

raise ValueError("""buffer formats don't match those """

197

"""provided by the format specification""")

198

199

200

def _narrowRecArray(recarray, startField, newFieldNames):

201

"""

202

Take a set of contiguous columns from a ``RecArray``.

203

204

This function creates and returns a new ``RecArray`` by taking a

205

number of contiguous columns from `recarray`, starting by field

206

`startField`. The new columns take their names from the

207

`newFieldNames` list, which also determines the number of fields to

208

take. The resulting array shares its data with `recarray`.

209

"""

210

211

iStartField = recarray._names.index(startField)

212

iEndField = iStartField + len(newFieldNames)

213

byteOffset = recarray.field(iStartField)._byteoffset

214

return numarray.records.RecArray(

215

recarray._data, recarray._formats[iStartField:iEndField],

216

shape=recarray._shape, names=newFieldNames, byteoffset=byteOffset,

217

bytestride=recarray._bytestride, byteorder=recarray._byteorder,

218

aligned=recarray.isaligned())

219

220

221

def array(buffer=None, formats=None, shape=0, names=None,

222

byteorder=sys.byteorder, aligned=0, descr=None):

223

"""

224

Create a new instance of a `NestedRecArray`.

225

226

This function can be used to build a new array of nested records.

227

The new array is returned as a result.

228

229

The function works much like ``numarray.records.array()``, with some

230

differences:

231

232

1. In addition to flat buffers and regular sequences of non-nested

233

elements, the `buffer` argument can take regular sequences where

234

each element has a structure nested to an arbitrary depth. Of

235

course, all elements in a non-flat buffer must have the same

236

format.

237

238

2. The `formats` argument only supports sequences of strings and

239

other sequences. Each string defines the shape and type of a

240

non-nested field. Each sequence contains the formats of the

241

sub-fields of a nested field.

242

243

The structure of this argument must match that of the elements in

244

`buffer`. This argument may have a recursive structure.

245

246

3. The `names` argument only supports lists of strings and 2-tuples.

247

Each string defines the name of a non-nested field. Each 2-tuple

248

contains the name of a nested field and a list describing the

249

names of its sub-fields.

250

251

The structure of this argument must match that of the elements in

252

`buffer`. This argument may have a recursive structure.

253

254

The `descr` argument is a new-style description of the structure of

255

the `buffer`. It is intended to replace the `formats` and `names`

256

arguments, so they can not be used at the same time [#descr]_.

257

258

The `descr` argument is a list of 2-tuples, each of them describing

259

a field. The first value in a tuple is the *name* of the field,

260

while the second one is a description of its *structure*. If the

261

second value is a string, it defines the format (shape and type) of

262

a non-nested field. Else, it is a list of 2-tuples describing the

263

sub-fields of a nested field.

264

265

If `descr` is ``None`` (or omitted), the whole structure of the

266

array is tried to be inferred from that of the `buffer`, and

267

automatic names (``c1``, ``c2`` etc. on each nested field) are

268

assigned to all fields.

269

270

The `descr` argument may have a recursive structure.

271

272

Please note that names used in `names` or `descr` should *not*

273

contain the string ``'/'``, since it is used as the field/sub-field

274

separator by `NestedRecArray.asRecArray()`. If the separator is

275

found in a name, a ``ValueError`` is raised.

276

277

.. [#descr] The syntax of `descr` is based on that of the

278

``__array_descr__`` attribute in the proposed standard

279

`N-dimensional array interface`__.

280

281

__ http://numeric.scipy.org/array_interface.html

282

283

284

When to use `descr` or `formats`

285

================================

286

287

Since `descr` requires both the name and structure of fields to

288

always be specified, the `formats` argument comes more handy when

289

one does not want to explicitly specify names. However it is not

290

allowed to use the `names` argument without the `formats` one. This

291

is due to the fact that automatic inferrence of the `buffer`

292

structure is not implemented. When fully specifying names and

293

structure, the `descr` argument is preferred over `formats` and

294

`names` for the sake of code legibility and conciseness.

295

296

297

Examples

298

========

299

300

The following examples will help to clarify the words above. In

301

them, an array of two elements is created. Each element has three

302

fields: a 64-bit integer (``id``), a bi-dimensional 32-bit floating

303

point (``pos``) and a nested field (``info``); the nested field has

304

two sub-fields: a two-character string (``name``) and a 64-bit

305

complex (``value``).

306

307

Example 1

308

---------

309

310

In this example the array is created by specifying both its contents

311

and its structure, so the structure of the used arguments must be

312

coherent.

313

314

This is how the array would be created in the old-style way,

315

i.e. using the `formats` and `names` arguments:

316

317

>>> nra = array(

318

... [(1, (0.5, 1.0), ('a1', 1j)), (2, (0, 0), ('a2', 1+.1j))],

319

... names=['id', 'pos', ('info', ['name', 'value'])],

320

... formats=['Int64', '(2,)Float32', ['a2', 'Complex64']])

321

322

And this is how the array would be created in the new-style way,

323

i.e. using the `descr` argument:

324

325

>>> nra = array(

326

... [(1, (0.5, 1.0), ('a1', 1j)), (2, (0, 0), ('a2', 1+.1j))],

327

... descr=[('id', 'Int64'), ('pos', '(2,)Float32'),

328

... ('info', [('name', 'a2'), ('value', 'Complex64')])])

329

330

Note how `formats` and `descr` mimic the structure of each the

331

elements in `buffer`.

332

333

Example 2

334

---------

335

336

Now the array is created from a flat string representing the data in

337

memory. Names will be automatically assigned. For that to work,

338

the resulting array shape and record format must be fully specified.

339

340

>>> datastring = binary_representation_of_data

341

>>> nra = array(

342

... datastring, shape=2,

343

... formats=['Int64', '(2,)Float32', ['a2', 'Complex64']])

344

345

Byte ordering and alignment is assumed to be that of the host

346

machine, since it has not been explicitly stated via the `byteorder`

347

and `aligned` arguments.

348

"""

349

350

# Check if a buffer structure is given. It must be given in order to

351

# disambiguate possible ambiguities

352

_isThereStructure(formats, descr, buffer)

353

354

# Check if buffer structure is specified using descr OR formats (and,

355

# optionally, names)

356

_onlyOneSyntax(descr, formats, names)

357

358

# This is to keep compatibility with numarray.records.array function

359

if isinstance(formats, str):

360

formats = formats.split(',')

361

if isinstance(names, str):

362

names = names.split(',')

363

364

# Check the descr format

365

# Check for '/' in descr

366

if descr is None:

367

descr = makeDescr(formats, names)

368

_checkDescr(descr)

369

_checkFieldsInDescr(descr)

370

371

# Check the formats format

372

if formats is None:

373

formats = makeFormats(descr)

374

_checkFormats(formats)

375

376

# Check the names format

377

if names is None:

378

names = makeNames(descr)

379

_checkNames(names)

380

381

# Flatten the structure descriptors

382

flatFormats = [item for item in nriterators.flattenFormats(formats)]

383

flatNames = [item for item in nriterators.flattenNames(names)]

384

385

# Check the buffer structure (order matters!)

386

if isinstance(buffer, NestedRecArray):

387

_matchFormats( flatFormats, buffer._flatArray._formats)

388

elif isinstance(buffer, numarray.records.RecArray):

389

_matchFormats( flatFormats, buffer._formats)

390

elif (isinstance(buffer, types.ListType) or

391

isinstance(buffer, types.TupleType)):

392

if (isinstance(buffer[0], numarray.NumArray) or

393

isinstance(buffer[0], numarray.strings.CharArray)):

394

return fromarrays(buffer, formats=formats,

395

shape=shape, names=names,

396

byteorder=byteorder, aligned=aligned)

397

elif buffer:

398

_checkBufferStructure( descr, buffer)

399

400

# If buffer is a RecArray then we have almost done with the creation

401

if isinstance(buffer, numarray.records.RecArray):

402

if isinstance(buffer, NestedRecArray):

403

flatBuffer = buffer.asRecArray()

404

else:

405

flatBuffer = buffer.copy()

406

newFlatArray = _narrowRecArray(flatBuffer, flatBuffer._names[0],

407

flatNames)

408

return NestedRecArray(newFlatArray, descr)

409

410

# Flatten the buffer (if any)

411

if buffer is None:

412

flatBuffer = None

413

else:

414

# Buffer is a list of sequences. Every sublist represents a row

415

# of the array

416

flatBuffer = \

417

[tuple([v for (v, f) in nriterators.zipBufferDescr(row, descr)])

418

for row in buffer]

419

420

# Create a flat recarray

421

flatArray = numarray.records.array(

422

flatBuffer, flatFormats, shape, flatNames, byteorder, aligned)

423

424

# Create the nested recarray

425

return NestedRecArray(flatArray, descr)

426

427

428

def _checkArrayList(arrayList):

429

"""

430

Check the type of the arraylist argument of fromarrays.

431

432

For an explanation of argument meanings see the `array()` function.

433

"""

434

435

# The argument must be a list or a tuple

436

if not (isinstance(arrayList, list) or isinstance(arrayList, tuple)):

437

raise TypeError("""``arrayList`` argument must be a list or a tuple""")

438

439

def fromarrays(arrayList, formats=None, names=None, shape=0,

440

byteorder=sys.byteorder, aligned=0, descr=None):

441

"""

442

Create a new instance of a `NestedRecArray` from field arrays.

443

444

This function can be used to build a new array of nested records

445

from a list of arrays, one for each field. The new array is

446

returned as a result.

447

448

The function works much like ``numarray.records.fromarrays()``, but

449

`arrayList` may also contain nested fields, i.e. sequences of other

450

arrays (nested or not). All non-nested arrays appearing in

451

`arrayList` must have the same length.

452

453

The rest of arguments work as explained in `array()`.

454

455

456

Example

457

=======

458

459

Let us build the sample array used in the examples of `array()`. In

460

the old way:

461

462

>>> nra = fromarrays(

463

... [[1, 2], [(0.5, 1.0), (0, 0)], [['a1', 'a2'], [1j, 1+.1j]]],

464

... names=['id', 'pos', ('info', ['name', 'value'])],

465

... formats=['Int64', '(2,)Float32', ['a2', 'Complex64']])

466

467

In the new way:

468

469

>>> nra = fromarrays(

470

... [[1, 2], [(0.5, 1.0), (0, 0)], [['a1', 'a2'], [1j, 1+.1j]]],

471

... descr=[('id', 'Int64'), ('pos', '(2,)Float32'),

472

... ('info', [('name', 'a2'), ('value', 'Complex64')])])

473

474

Note how `formats` and `descr` mimic the structure of the whole

475

`arrayList`.

476

"""

477

478

_checkArrayList(arrayList)

479

480

# Check if a buffer structure is given. It must be given in order to

481

# disambiguate possible ambiguities

482

_isThereStructure(formats, descr, arrayList)

483

484

# Check if buffer structure is specified using descr OR formats (and,

485

# optionally, names)

486

_onlyOneSyntax(descr, formats, names)

487

488

# This is to keep compatibility with numarray.records.array function

489

if isinstance(formats, str):

490

formats = formats.split(',')

491

if isinstance(names, str):

492

names = names.split(',')

493

494

# Check the descr format

495

# Check for '/' in descr

496

if descr is None:

497

descr = makeDescr(formats, names)

498

_checkDescr(descr)

499

_checkFieldsInDescr(descr)

500

501

# Check the formats format

502

if formats is None:

503

formats = makeFormats(descr)

504

_checkFormats(formats)

505

506

# Check the names format

507

if names is None:

508

names = makeNames(descr)

509

_checkNames(names)

510

511

# Flatten the structure descriptors

512

flatFormats = [item for item in nriterators.flattenFormats(formats)]

513

flatNames = [item for item in nriterators.flattenNames(names)]

514

515

# Create a regular recarray from the arrays list

516

flatArrayList = []

517

nriterators.flattenArraysList(arrayList, descr, flatArrayList)

518

ra = numarray.records.fromarrays(flatArrayList, formats=flatFormats,

519

names=flatNames, shape=shape, byteorder=byteorder, aligned=aligned)

520

521

# Create the nested recarray

522

nra = NestedRecArray(ra, descr)

523

524

return nra

525

526

527

class NestedRecArray(numarray.records.RecArray):

528

529

"""

530

Array of nested records.

531

532

This is a generalization of the ``numarray.records.RecArray`` class.

533

It supports nested fields and records via the `NestedRecord` class.

534

535

This class is compatible with ``RecArray``. However, part of its

536

behaviour has been extended to support nested fields:

537

538

1. Getting a single item from an array will return a `NestedRecord`,

539

a special kind of ``Record`` with support for nested structures.

540

541

2. Getting a range of items will return another `NestedRecArray`

542

instead of an ordinary ``RecArray``.

543

544

3. Getting a whole field may return a `NestedRecArray` instead of a

545

``NumArray`` or ``CharArray``, if the field is nested.

546

547

Fields and sub-fields can be accessed using both the `field()`

548

method and the ``fields`` interface, which allows accessing fields

549

as Python attributes: ``nrec = nrarr.fields.f1.fields.subf1[4]``.

550

The `field()` method supports the ``'/'`` separator to access

551

sub-fields.

552

553

Nested record arrays can be converted to ordinary record arrays by

554

using the `asRecArray()` method.

555

556

Finally, the constructor of this class is not intended to be used

557

directly by users. Instead, use one of the creation functions

558

(`array()`, `fromarrays()` or the others).

559

"""

560

561

def __init__(self, recarray, descr):

562

super(NestedRecArray, self).__init__(

563

recarray._data, recarray._formats, shape=recarray._shape,

564

names=recarray._names, byteoffset=recarray._byteoffset,

565

bytestride=recarray._bytestride,

566

byteorder=recarray._byteorder, aligned=recarray.isaligned())

567

# ``_strides`` is not properly copied from the original array,

568

# so, the copy must be made by hand. :[

569

self._strides = recarray._strides

570

571

self._flatArray = recarray

572

self.descr = descr

573

574

self.fields = AttributeAccess(self, 'field')

575

"""

576

Provides attribute access to fields.

577

578

For instance, accessing ``recarray.fields.x`` is equivalent to

579

``recarray.field('x')``, and ``recarray.fields.x.fields.y`` is

580

equivalent to ``recarray.field('x/y')``. This functionality is

581

mainly intended for interactive usage from the Python console.

582

"""

583

584

585

def __str__(self):

586

"""Return a string representation of the nested record array."""

587

588

psData = {}

589

psData['psClassName'] = self.__class__.__name__

590

psData['psElems'] = ',\n'.join([str(elem) for elem in self])

591

592

return '''\

593

%(psClassName)s[

594

%(psElems)s

595

]''' % psData

596

597

598

def __repr__(self):

599

"""

600

Return the canonical string representation of the nested record

601

array.

602

"""

603

604

rsData = {}

605

rsData['rsElems'] = '[%s]' % ',\n'.join([str(elem) for elem in self])

606

rsData['rsDescr'] = str(self.descr)

607

rsData['rsShape'] = str(self.shape[0])

608

609

return '''\

610

array(

611

%(rsElems)s,

612

descr=%(rsDescr)s,

613

shape=%(rsShape)s)''' % rsData

614

615

616

def __getitem__(self, key):

617

if not isinstance(key, slice):

618

# The `key` must be a single index.

619

# Let `self._getitem()` do the job.

620

return super(NestedRecArray, self).__getitem__(key)

621

622

# The `key` is a slice.

623

# Delegate selection to flat array and build a nested one from that.

624

return NestedRecArray(self._flatArray[key], self.descr)

625

626

627

def _getitem(self, offset):

628

flatArray = self._flatArray

629

row = (offset - flatArray._byteoffset) / flatArray._strides[0]

630

return NestedRecord(self, row)

631

632

633

def __setitem__(self, key, value):

634

_RecArray = numarray.records.RecArray # Should't it be NestedRecArray?

635

if isinstance(key, slice) and not isinstance(value, _RecArray):

636

# Conversion of the value to an array will need a little help

637

# until structure inference is supported.

638

print 'Value', value

639

print 'Description', self.descr

640

value = array(value, descr=self.descr)

641

#super(NestedRecArray, self).__setitem__(key, value)

642

# Call the setitem method with the flatArray mate instead.

643

# It's extremely important doing this because the shape can

644

# be temporarily modified during the assign process, and self

645

# and self._flatArray may end having different shapes, which

646

# gives problems (specially with numarray > 1.1.1)

647

# F. Altet 2005-06-09

648

self._flatArray.__setitem__(key, value)

649

650

651

# It seems like this method is never called, because __setitem__ calls

652

# the flatArray (RecArray object) __setitem__

653

# F. Altet 2005-06-09

654

# def _setitem(self, offset, value):

655

# row = (offset - self._byteoffset) / self._strides[0]

656

# for i in range(0, self._nfields):

657

# self._flatArray.field(self._names[i])[row] = \

658

# value.field(self._names[i])

659

660

661

def __add__(self, other):

662

"""Add two NestedRecArray objects in a row wise manner."""

663

664

if isinstance(other, NestedRecArray):

665

return NestedRecArray(self._flatArray + other._flatArray,

666

self.descr)

667

else:

668

# Assume other is a RecArray

669

return NestedRecArray(self._flatArray + other, self.descr)

670

671

672

def field(self, fieldName):

673

"""

674

Get field data as an array.

675

676

`fieldName` can be the name or the index of a field in the

677

record array. If it is not nested, a ``NumArray`` or

678

``CharArray`` object representing the values in that field is

679

returned. Else, a `NestedRecArray` object is returned.

680

681

`fieldName` can be used to provide the name of sub-fields. In

682

that case, it will consist of several field name components

683

separated by the string ``'/'``. For instance, if there is a

684

nested field named ``x`` with a sub-field named ``y``, the last

685

one can be accesed by using ``'x/y'`` as the value of

686

`fieldName`.

687

"""

688

689

# fieldName can be an integer, get the corresponding name

690

if isinstance(fieldName, int):

691

fieldName = self.descr[fieldName][0]

692

693

# The descr list of the field whose content is being extracted

694

fieldDescr = [

695

item for item in nriterators.getFieldDescr(fieldName, self.descr)]

696

if fieldDescr == []:

697

raise ValueError("there is no field named ``%s``" % (fieldName,))

698

fieldDescr = fieldDescr[0][1]

699

700

# Case 1) non nested fields (bottom level)

701

if isinstance(fieldDescr, str):

702

# The field content is returned as numarray or chararray

703

return self._flatArray.field(fieldName)

704

705

# Case 2) nested fields (both top and intermediate levels)

706

# We need fully qualified names to access the flat array fields

707

fieldNames = [

708

name for name in nriterators.getNamesFromDescr(fieldDescr)]

709

flatNames = [

710

name for name in nriterators.flattenNames(fieldNames)]

711

712

# This is the flattened name of the original first bottom field.

713

startField = '%s/%s' % (fieldName, flatNames[0])

714

# Get the requested fields from the flat array and build a nested one.

715

newFlatArray = _narrowRecArray(self._flatArray, startField, flatNames)

716

return NestedRecArray(newFlatArray, fieldDescr)

717

718

719

def asRecArray(self, copy=True):

720

"""

721

Convert a nested array to a non-nested equivalent array.

722

723

This function creates a new vanilla ``RecArray`` instance

724

equivalent to this one by *flattening* its fields. Only

725

bottom-level fields are included in the array. Sub-fields are

726

named by prepending the names of their parent fields up to the

727

top-level fields, using ``'/'`` as a separator.

728

729

By default the data area of the array is copied into the new one,

730

but a pointer to the data area can be returned if the copy

731

argument is set to False.

732

733

Example

734

-------

735

736

Let us take the following nested array:

737

738

>>> nra = array([(1, (0, 0), ('a1', 1j)), (2, (0, 0), ('a2', 2j))],

739

... names=['id', 'pos', ('info', ['name', 'value'])],

740

... formats=['Int64', '(2,)Float32', ['a2', 'Complex64']])

741

742

Calling ``nra.asRecArray()`` would return the same array as

743

calling:

744

745

>>> ra = numarray.records.array(

746

... [(1, (0, 0), 'a1', 1j), (2, (0, 0), 'a2', 2j)],

747

... names=['id', 'pos', 'info/name', 'info/value'],

748

... formats=['Int64', '(2,)Float32', 'a2', 'Complex64'])

749

750

Please note that the shape of multi-dimensional fields is kept.

751

"""

752

753

if copy:

754

return self._flatArray.copy()

755

else:

756

return self._flatArray

757

758

759

def copy(self):

760

return NestedRecArray(self._flatArray.copy(), self.descr)

761

762

763

764

class NestedRecord(numarray.records.Record):

765

766

"""

767

Nested record.

768

769

This is a generalization of the ``numarray.records.Record`` class to

770

support nested fields. It represents a record in a `NestedRecArray`

771

or an isolated record. In the second case, its names are

772

automatically set to ``c1``, ``c2`` etc. on each nested field.

773

774

This class is compatible with ``Record``. However, getting a field

775

may return a `NestedRecord` instead of a Python scalar, ``NumArray``

776

or ``CharArray``, if the field is nested.

777

778

Fields and sub-fields can be accessed using both the `field()`

779

method and the ``fields`` interface, which allows accessing fields

780

as Python attributes: ``nfld = nrec.fields.f1.fields.subf1[4]``.

781

The `field()` method supports the ``'/'`` separator to access

782

sub-fields.

783

784

Nested records can be converted to ordinary records by using the

785

`asRecord()` method.

786

"""

787

788

def __init__(self, input, row=0):

789

numarray.records.Record.__init__(self, input._flatArray, row)

790

self.array = input

791

792

self.fields = AttributeAccess(self, 'field')

793

"""

794

Provides attribute access to fields.

795

796

For instance, accessing ``record.fields.x`` is equivalent to

797

``record.field('x')``, and ``record.fields.x.fields.y`` is

798

equivalent to ``record.field('x/y')``. This functionality is

799

mainly intended for interactive usage from the Python console.

800

"""

801

802

803

def __str__(self):

804

"""Return a string representation of the nested record."""

805

806

# This is only defined to avoid falling back to ``Record.__str__()``.

807

return repr(self)

808

809

810

def __repr__(self):

811

"""Return the canonical string representation of the nested record."""

812

813

nra = self.array

814

row = self.row

815

816

fieldNames = [fieldName for (fieldName, fieldFormat) in nra.descr]

817

818

field_rsValues = []

819

for fieldName in fieldNames:

820

rsFieldValue = repr(nra.field(fieldName)[row])

821

field_rsValues.append(rsFieldValue)

822

rsFieldValues = '(%s)' % ', '.join(field_rsValues)

823

return rsFieldValues

824

825

826

def field(self, fieldName):

827

"""

828

Get field data.

829

830

If the named field (`fieldName`, a string) is not nested, a

831

Python scalar, ``NumArray`` or ``CharArray`` object with the

832

value of that field is returned. Else, a `NestedRecord` object

833

is returned.

834

835

`fieldName` can be used to provide the name of sub-fields. In

836

that case, it will consist of several field name components

837

separated by the string ``'/'``. For instance, if there is a

838

nested field named ``x`` with a sub-field named ``y``, the last

839

one can be accesed by using ``'x/y'`` as the value of

840

`fieldName`.

841

"""

842

return self.array.field(fieldName)[self.row]

843

844

845

def asRecord(self):

846

"""

847

Convert a nested record to a non-nested equivalent record.

848

849

This function creates a new vanilla ``Record`` instance

850

equivalent to this one by *flattening* its fields. Only

851

bottom-level fields are included in the array.

852

853

The *whole array* to which the record belongs is copied. If you

854

want to repeatedly access nested records as flat records you

855

should consider converting the whole nested array into a flat

856

one and access its records normally.

857

858

Example

859

-------

860

861

Let us take the following nested record:

862

863

>>> nr = NestedRecord([1, (0, 0), ('a1', 1j)])

864

865

Calling ``nr.asRecord()`` would return the same record as

866

calling:

867

868

>>> r = numarray.records.Record([1, (0, 0), 'a1', 1j])

869

870

Please note that the shape of multi-dimensional fields is kept.

871

"""

872

return self.array.asRecArray()[self.row]

873

874

875

def __len__(self):

876

"""Get the number of fields in this record."""

877

return len(self.array.descr)

878

879

880

def __getitem__(self, fieldName):

881

"""Get the value of the field `fieldName`."""

882

return self.field(fieldName)

883

884

885

def __setitem__(self, fieldName, value):

886

"""Set the `value` of the field `fieldName`."""

887

self.setfield(fieldName, value)

888

889

890

def copy(self):

891

"""

892

Make a copy of this record.

893

894

Only one row of the nested recarray is copied. This is useful in

895

some corner cases, for instance

896

897

(nra[0], nra[-1]) = (nra[-1], nra[0])

898

899

doesn't work but

900

901

(nra[0], nra[-1]) = (nra[-1].copy(), nra[0].copy())

902

903

works just fine.

904

905

No data are shared between the copy and the source.

906

"""

907

nra = NestedRecArray(self.array[self.row:self.row + 1].asRecArray(),

908

self.array.descr)

909

return NestedRecord(nra, 0)

910

911

912

913

## Local Variables:

914

## mode: python

915

## py-indent-offset: 4

916

## tab-width: 4

917

## fill-column: 72

918

## End:

Older »