~ubuntu-branches/ubuntu/quantal/pytables/quantal

« back to all changes in this revision

Viewing changes to bench/search-bench.py

Committer: Bazaar Package Importer
Author(s): Sebastian Dröge
Date: 2005-11-27 20:25:34 UTC
mfrom: (1.1.1 upstream)
Revision ID: james.westby@ubuntu.com-20051127202534-l8jzyd8357krw40h

Tags: 1.1.1-1ubuntu1

* Sync with Debian:
+ Use python 2.4 as default

files added:
ANNOUNCE.txt.in

Makefile

README.txt.in

RELEASE-NOTES.txt.in

VERSION

bench/create-large-number-objects.py

bench/open_close-bench-gzip.h5

bench/open_close-bench.py

bench/open_close-bench2.py

bench/undo_redo.py

bench/undo_redo.txt

doc/Makefile

doc/html/logo3-ombra-web.jpg

doc/html/logo3-ombra.jpg

doc/html/usersguide10.html

doc/html/usersguide11.html

doc/scripts/filenode.py

doc/scripts/pickletrouble.py

doc/text

doc/text/nestedrecords.txt

doc/xml/Makefile

doc/xml/html.css

doc/xml/objecttree.dia

doc/xml/rootUEP1.fig

doc/xml/rootUEP2.fig

doc/xml/saxon

examples/add-column.py

examples/carray1.py

examples/enum.py

examples/nested-iter.py

examples/nested-tut.py

examples/nested1.py

examples/tutorial3-1.py

examples/tutorial3-2.py

examples/undo-redo.py

src/H5Zbzip2.c

src/H5Zbzip2.h

src/Makefile

src/TableExtension.c

src/TableExtension.pyx

src/definitions.pxd

src/typeconv.c

src/typeconv.h

src/utilsExtension.c

src/utilsExtension.pyx

src/version.h.in

tables/Atom.py

tables/CArray.py

tables/Node.py

tables/attributeaccess.py

tables/constants.py

tables/enum.py

tables/exceptions.py

tables/nestedrecords.py

tables/nriterators.py

tables/registry.py

tables/undoredo.py

test/Tables_lzo1.h5

test/Tables_lzo1_shuffle.h5

test/Tables_lzo2.h5

test/Tables_lzo2_shuffle.h5

test/test_attributes.py

test/test_carray.py

test/test_do_undo.py

test/test_enum.py

test/test_filenode_v1.h5

test/test_nestedrecords.py

test/test_nestedtypes.py

test/test_nriterators.py

test/test_timetype.py

test/util.py

test/zerodim-attrs-1.3.h5

test/zerodim-attrs-1.4.h5

files removed:
doc/html/generahtml.sh

doc/html/makebib

doc/html/makeclean

doc/html/makeepss

doc/html/makeeqns

doc/html/makeidx

doc/html/makepage

doc/html/makepdfs

doc/html/makewebs

doc/html/paperbk4.jpg

doc/html/pretty.jpg

doc/html/read-medium-psyco-lzo-comparison-web.png

doc/html/tile.jpg

doc/html/usersguide.bib

doc/html/write-medium-psyco-lzo-comparison-web.png

doc/objecttree.dia

doc/rootUEP1.fig

doc/rootUEP2.fig

doc/xml/logo4-ombra.jpg

doc/xml/objecttree.eps

doc/xml/rootUEP1.eps

doc/xml/rootUEP2.eps

src/calcoffset.c

src/calcoffset.h

src/getfieldfmt.c

src/getfieldfmt.h

src/type-longlong.h

files modified:
ANNOUNCE.txt

LICENSE

MANIFEST.in

README.txt

RELEASE-NOTES.txt

THANKS

TODO.txt

bench/bsddb-table-bench.py

bench/opteron-stress-test.txt

bench/search-bench.py

bench/searchsorted-bench.py

bench/searchsorted-bench2.py

bench/stress-test.py

bench/stress-test2.py

bench/stress-test3.py

bench/table-bench.py

bench/table-bench2.py

bench/widetree.py

bench/widetree2.py

contrib/make_hdf.py

debian/README.Debian

debian/changelog

debian/control

debian/copyright

debian/nctoh5.1

debian/ptdump.1

debian/ptrepack.1

debian/rules

doc/html/indexTimes-itanium-web.png

doc/html/objecttree-h5-web.jpg

doc/html/objecttree-h5.jpg

doc/html/objecttree-web.png

doc/html/read-medium-lzo-zlib-ucl-comparison-web.png

doc/html/read-medium-psyco-lzo-zlib-ucl-comparison-web.png

doc/html/read-medium-psyco-nopsyco-comparison-web.png

doc/html/rootUEP1-web.png

doc/html/rootUEP2-web.png

doc/html/searchTimes-float-itanium-web.png

doc/html/searchTimes-int-itanium-web.png

doc/html/tutorial1-general-web.jpg

doc/html/tutorial1-general.jpg

doc/html/tutorial1-tableview-web.jpg

doc/html/tutorial1-tableview.jpg

doc/html/tutorial2-tableview-web.jpg

doc/html/tutorial2-tableview.jpg

doc/html/usersguide.html

doc/html/usersguide1.html

doc/html/usersguide2.html

doc/html/usersguide3.html

doc/html/usersguide4.html

doc/html/usersguide5.html

doc/html/usersguide6.html

doc/html/usersguide7.html

doc/html/usersguide8.html

doc/html/usersguide9.html

doc/html/write-medium-lzo-zlib-ucl-comparison-web.png

doc/html/write-medium-psyco-lzo-zlib-ucl-comparison-web.png

doc/html/write-medium-psyco-nopsyco-comparison-web.png

doc/usersguide.pdf

doc/xml/objecttree-h5.jpg

doc/xml/tutorial1-general.jpg

doc/xml/tutorial1-tableview.jpg

doc/xml/tutorial2-tableview.jpg

doc/xml/usersguide.bib

doc/xml/usersguide.xml

examples/check_examples.sh

examples/earray1.py

examples/filenodes1.py

examples/tutorial1-1.py

examples/tutorial1-2.py

examples/tutorial2.py

examples/vlarray1.py

examples/vlarray2.py

examples/vlarray3.py

setup.cfg

setup.py

src/H5ARRAY-opt.c

src/H5ARRAY.c

src/H5ARRAY.h

src/H5LT.c

src/H5LT.h

src/H5TB-opt.c

src/H5TB-opt.h

src/H5TB.c

src/H5TB.h

src/H5VLARRAY.c

src/H5VLARRAY.h

src/H5Zlzo.c

src/H5Zucl.c

src/arraytypes.c

src/hdf5Extension.c

src/hdf5Extension.pyx

src/tables.h

src/utils.c

src/utils.h

src/version.h

tables/Array.py

tables/AttributeSet.py

tables/EArray.py

tables/File.py

tables/Group.py

tables/Index.py

tables/IndexArray.py

tables/IsDescription.py

tables/Leaf.py

tables/Table.py

tables/UnImplemented.py

tables/VLArray.py

tables/__init__.py

tables/nodes/FileNode.py

tables/nodes/__init__.py

tables/utils.py

test/test_Numeric.py

test/test_all.py

test/test_backcompat.py

test/test_basics.py

test/test_create.py

test/test_earray.py

test/test_filenode.py

test/test_indexes.py

test/test_indexvalues.py

test/test_lists.py

test/test_numarray.py

test/test_tables.py

test/test_tablesMD.py

test/test_tree.py

test/test_types.py

test/test_vlarray.py

utils/nctoh5

utils/ptdump

utils/ptrepack

Show diffs side-by-side

added added

removed removed

bench/search-bench.py

#!/usr/bin/env python

import copy

import sys

import time

import numarray as NA

from numarray import strings

from numarray import random_array

randomvalues = 0

standarddeviation = 10000

# Initialize the random generator always with the same integer

# in order to have reproductible results

random.seed(19)

random_array.seed(19, 20)

randomvalues = 0

worst=0

# class Small(IsDescription):

# _v_indexprops = IndexProps(auto=0, filters=Filters(complevel=1, complib="zlib", shuffle=1))

# var1 = StringCol(length=4, dflt="", indexed=1)

# var2 = IntCol(0, indexed=1)

# var3 = FloatCol(0, indexed=1)

# var4 = BoolCol(0, indexed=1)

Small = {

#"_v_indexprops" : IndexProps(auto=1),

# var1 column will be indexed if not heavy test

"var1" : StringCol(length=4, dflt="", indexed=0, pos=2),

"var1" : StringCol(length=4, dflt="", indexed=1, pos=2),

"var2" : IntCol(0, indexed=1, pos=1),

"var3" : FloatCol(0, indexed=1, pos=0),

#"var4" : BoolCol(0, indexed=1),

100

else:

101

auto = 0

102

Small["_v_indexprops"] = IndexProps(auto=0, filters=filters)

103

if not heavy:

104

# make the index entry indexed as well

105

Small["var1"] = StringCol(length=4, dflt="", indexed=1)

if heavy:

# make the string entry not indexed

Small["var1"] = StringCol(length=4, dflt="", indexed=0)

106

107

# Create the test table

108

100

table = fileh.createTable(fileh.root, 'table', Small, "test table",

109

101

None, nrows)

110

102

t1 = time.time()

111

103

cpu1 = time.clock()

112

# for i in xrange(nrows):

113

# # Assigning a string takes lots of time!

114

# if not heavy:

115

# table.row['var1'] = str(i)

116

# #table.row['var2'] = random.randrange(nrows)

117

# table.row['var2'] = i

118

# table.row['var3'] = nrows-i

119

# #table.row['var3'] = float(nrows-i)

120

# #table.row['var4'] = i % 2

121

# #table.row['var4'] = i > 2

122

# table.row.append()

123

# This way of filling is much faster

124

104

nrowsbuf = table._v_maxTuples

125

#mean = nrows / 2.; stddev = nrows/100.

126

# with a fixed stddev, the compression rate does not change

127

mean = nrows / 2.; stddev = float(standarddeviation)

105

minimum = 0

106

maximum = nrows

128

107

for i in xrange(0, nrows, nrowsbuf):

129

108

if i+nrowsbuf > nrows:

130

109

j = nrows

131

110

else:

132

111

j = i+nrowsbuf

112

if randomvalues:

113

var3 = random_array.uniform(minimum, maximum, shape=[j-i])

114

else:

115

var3 = numarray.arange(i, j, type=numarray.Float64)

116

#var3 += random_array.uniform(-3, 3, shape=[j-i])

117

var2 = numarray.array(var3, type=numarray.Int32)

133

118

var1 = strings.array(None, shape=[j-i], itemsize=4)

134

if randomvalues:

135

var3 = random_array.normal(mean, stddev, shape=[j-i])

136

var2 = numarray.array(var3, type=numarray.Int32)

137

else:

138

var2 = numarray.arange(i, j, type=numarray.Int32)

139

# var3 = numarray.arange(i, j, type=numarray.Float64)

140

var3 = numarray.arange(nrows-i, nrows-j, -1, type=numarray.Float64)

141

119

if not heavy:

142

120

for n in xrange(j-i):

143

121

var1[n] = str("%.4s" % var2[n])

144

122

table.append([var3, var2, var1])

123

table.flush()

145

124

rowswritten += nrows

146

table.flush()

147

125

time1 = time.time()-t1

148

126

tcpu1 = time.clock()-cpu1

149

127

print "Time for filling:", round(time1,3),\

232

210

(round(tidxrows,3), cpuidxrows, tpercent)

233

211

rowseci = irows / tidxrows

234

212

table.row["rowseci"] = rowseci

235

#print "Index rows/sec: ", rowseci

236

213

table.row.append()

237

214

bf.close()

238

215

244

221

var1 = table.cols.var1

245

222

var2 = table.cols.var2

246

223

var3 = table.cols.var3

247

#var4 = table.cols.var4

248

224

if indexmode == "indexed":

249

225

if var2.index.nelements > 0:

250

where = table.whereIndexed

226

where = table._whereIndexed

251

227

else:

252

228

warnings.warn("Not indexed table or empty index. Defaulting to in-kernel selection")

253

229

indexmode = "inkernel"

254

where = table.whereInRange

230

where = table._whereInRange

255

231

elif indexmode == "inkernel":

256

where = table.whereInRange

232

where = table._whereInRange

257

233

if verbose:

258

234

print "Max rows in buf:", table._v_maxTuples

259

235

print "Rows in", table._v_pathname, ":", table.nrows

269

245

tcpu2 = 0.

270

246

results = []

271

247

print "Select mode:", indexmode, ". Selecting for type:", atom

272

if randomvalues:

273

# algorithm to choose a value separated from mean

274

# # If want to select fewer values, select this

275

# if table.nrows/2 > standarddeviation*3:

276

# # Choose five standard deviations away from mean value

277

# dev = standarddeviation*5

278

# #dev = standarddeviation*math.log10(table.nrows/1000.)

279

280

# This algorithm give place to too asymmetric result values

281

# if table.nrows/2 > standarddeviation*10:

282

# # Choose five standard deviations away from mean value

283

# dev = standarddeviation*4

284

# #dev = standarddeviation*math.log10(table.nrows/1000.)

285

# else:

286

# dev = 100

287

# Yet Another Algorithm

288

if table.nrows/2 > standarddeviation*10:

289

dev = standarddeviation*4.

290

elif table.nrows/2 > standarddeviation:

291

dev = standarddeviation*2.

292

elif table.nrows/2 > standarddeviation/10.:

293

dev = standarddeviation/10.

294

else:

295

dev = standarddeviation/100.

296

297

valmax = int(round((table.nrows/2.)-dev))

298

# split the selection range in regular chunks

299

if riter > valmax*2:

300

riter = valmax*2

301

#print "valmax, riter-->", valmax, riter

302

#chunksize = valmax*2/riter

303

# use a chunksize ten times larger

304

#chunksize = int(round(valmax*2/riter))*10

305

chunksize = (valmax*2/riter)*10

306

# Get a list of integers for the intervals

307

randlist = range(0, valmax, chunksize)

308

randlist.extend(range(table.nrows-valmax, table.nrows, chunksize))

309

# expand the list ten times so as to use the cache

310

randlist = randlist*10

311

# shuffle the list

312

random.shuffle(randlist)

313

# reset the value of chunksize

314

#chunksize = int(round(chunksize/10))

315

# Protection to avoid too large chunksizes with small tables

316

chunksize = chunksize/10

317

# if table.nrows > 1000000:

318

# chunksize = chunksize/10

319

# else:

320

# # chunksize = chunksize/100

321

# chunksize = 100

322

#print "chunksize-->", chunksize

323

#randlist.sort();print "randlist-->", randlist

324

else:

325

chunksize = 3

326

#print "riter-->", riter

248

# The interval for look values at. This is aproximately equivalent to

249

# the number of elements to select

250

chunksize = 1000 # Change here for selecting more or less entries

251

# Initialize the random generator always with the same integer

252

# in order to have reproductible results

253

random.seed(19)

254

random_array.seed(19, 20)

327

255

for i in xrange(riter):

328

#randlist.sort();print "randlist-->", randlist

329

if randomvalues:

330

rnd = randlist[i]

331

else:

332

rnd = random.randrange(table.nrows)

256

rnd = random.randrange(table.nrows)

333

257

cpu1 = time.clock()

334

258

t1 = time.time()

335

259

if atom == "string":

364

288

if rnd <= p["var2"] < rnd+chunksize]

365

289

elif atom == "float":

366

290

if indexmode in ["indexed", "inkernel"]:

291

t1=time.time()

367

292

results = [p.nrow()

368

293

# for p in where(var3 < 5.)]

369

294

#for p in where(3. <= var3 < 5.)]

372

297

for p in where(rnd <= var3 < rnd+chunksize)]

373

298

# for p in where(1000.-i <= var3 < 1000.+i)]

374

299

# for p in where(100*i <= var3 < 100*(i+1))]

300

#print "time for complete selection-->", time.time()-t1

301

#print "results-->", results, rnd

375

302

else:

376

303

results = [p.nrow() for p in table

377

304

# if p["var3"] < 5.]

380

307

if float(rnd) <= p["var3"] < float(rnd+chunksize)]

381

308

# if 1000.-i <= p["var3"] < 1000.+i]

382

309

# if 100*i <= p["var3"] < 100*(i+1)]

383

# elif atom == "bool":

384

# if indexmode in ["indexed", "inkernel"]:

385

# results = [p.nrow() for p in where(var4 == 0)]

386

# else:

387

# results = [p.nrow() for p in table if p["var4"] == 0]

388

310

else:

389

311

raise ValueError, "Value for atom '%s' not supported." % atom

390

312

rowselected += len(results)

391

#results.sort(); print "selected values-->", results

313

#print "selected values-->", results

392

314

if i == 0:

393

315

# First iteration

394

316

time1 = time.time() - t1

508

430

-r only read test

509

431

-w only write test

510

432

-c sets a compression level (do not set it or 0 for no compression)

511

-l sets the compression library ("zlib", "lzo", "ucl" or "none")

433

-l sets the compression library ("zlib", "lzo", "ucl", "bzip2" or "none")

512

434

-S activate shuffling filter

513

435

-F activate fletcher32 filter

514

436

-n set the number of rows in tables (in krows)

Older »