12
12
from numarray import strings
13
13
from numarray import random_array
16
standarddeviation = 10000
17
15
# Initialize the random generator always with the same integer
18
16
# in order to have reproductible results
20
18
random_array.seed(19, 20)
24
# class Small(IsDescription):
25
# _v_indexprops = IndexProps(auto=0, filters=Filters(complevel=1, complib="zlib", shuffle=1))
26
# var1 = StringCol(length=4, dflt="", indexed=1)
27
# var2 = IntCol(0, indexed=1)
28
# var3 = FloatCol(0, indexed=1)
29
# var4 = BoolCol(0, indexed=1)
32
24
#"_v_indexprops" : IndexProps(auto=1),
33
25
# var1 column will be indexed if not heavy test
34
"var1" : StringCol(length=4, dflt="", indexed=0, pos=2),
26
"var1" : StringCol(length=4, dflt="", indexed=1, pos=2),
35
27
"var2" : IntCol(0, indexed=1, pos=1),
36
28
"var3" : FloatCol(0, indexed=1, pos=0),
37
29
#"var4" : BoolCol(0, indexed=1),
102
94
Small["_v_indexprops"] = IndexProps(auto=0, filters=filters)
104
# make the index entry indexed as well
105
Small["var1"] = StringCol(length=4, dflt="", indexed=1)
96
# make the string entry not indexed
97
Small["var1"] = StringCol(length=4, dflt="", indexed=0)
107
99
# Create the test table
108
100
table = fileh.createTable(fileh.root, 'table', Small, "test table",
111
103
cpu1 = time.clock()
112
# for i in xrange(nrows):
113
# # Assigning a string takes lots of time!
115
# table.row['var1'] = str(i)
116
# #table.row['var2'] = random.randrange(nrows)
117
# table.row['var2'] = i
118
# table.row['var3'] = nrows-i
119
# #table.row['var3'] = float(nrows-i)
120
# #table.row['var4'] = i % 2
121
# #table.row['var4'] = i > 2
123
# This way of filling is much faster
124
104
nrowsbuf = table._v_maxTuples
125
#mean = nrows / 2.; stddev = nrows/100.
126
# with a fixed stddev, the compression rate does not change
127
mean = nrows / 2.; stddev = float(standarddeviation)
128
107
for i in xrange(0, nrows, nrowsbuf):
129
108
if i+nrowsbuf > nrows:
113
var3 = random_array.uniform(minimum, maximum, shape=[j-i])
115
var3 = numarray.arange(i, j, type=numarray.Float64)
116
#var3 += random_array.uniform(-3, 3, shape=[j-i])
117
var2 = numarray.array(var3, type=numarray.Int32)
133
118
var1 = strings.array(None, shape=[j-i], itemsize=4)
135
var3 = random_array.normal(mean, stddev, shape=[j-i])
136
var2 = numarray.array(var3, type=numarray.Int32)
138
var2 = numarray.arange(i, j, type=numarray.Int32)
139
# var3 = numarray.arange(i, j, type=numarray.Float64)
140
var3 = numarray.arange(nrows-i, nrows-j, -1, type=numarray.Float64)
142
120
for n in xrange(j-i):
143
121
var1[n] = str("%.4s" % var2[n])
144
122
table.append([var3, var2, var1])
145
124
rowswritten += nrows
147
125
time1 = time.time()-t1
148
126
tcpu1 = time.clock()-cpu1
149
127
print "Time for filling:", round(time1,3),\
244
221
var1 = table.cols.var1
245
222
var2 = table.cols.var2
246
223
var3 = table.cols.var3
247
#var4 = table.cols.var4
248
224
if indexmode == "indexed":
249
225
if var2.index.nelements > 0:
250
where = table.whereIndexed
226
where = table._whereIndexed
252
228
warnings.warn("Not indexed table or empty index. Defaulting to in-kernel selection")
253
229
indexmode = "inkernel"
254
where = table.whereInRange
230
where = table._whereInRange
255
231
elif indexmode == "inkernel":
256
where = table.whereInRange
232
where = table._whereInRange
258
234
print "Max rows in buf:", table._v_maxTuples
259
235
print "Rows in", table._v_pathname, ":", table.nrows
271
247
print "Select mode:", indexmode, ". Selecting for type:", atom
273
# algorithm to choose a value separated from mean
274
# # If want to select fewer values, select this
275
# if table.nrows/2 > standarddeviation*3:
276
# # Choose five standard deviations away from mean value
277
# dev = standarddeviation*5
278
# #dev = standarddeviation*math.log10(table.nrows/1000.)
280
# This algorithm give place to too asymmetric result values
281
# if table.nrows/2 > standarddeviation*10:
282
# # Choose five standard deviations away from mean value
283
# dev = standarddeviation*4
284
# #dev = standarddeviation*math.log10(table.nrows/1000.)
287
# Yet Another Algorithm
288
if table.nrows/2 > standarddeviation*10:
289
dev = standarddeviation*4.
290
elif table.nrows/2 > standarddeviation:
291
dev = standarddeviation*2.
292
elif table.nrows/2 > standarddeviation/10.:
293
dev = standarddeviation/10.
295
dev = standarddeviation/100.
297
valmax = int(round((table.nrows/2.)-dev))
298
# split the selection range in regular chunks
301
#print "valmax, riter-->", valmax, riter
302
#chunksize = valmax*2/riter
303
# use a chunksize ten times larger
304
#chunksize = int(round(valmax*2/riter))*10
305
chunksize = (valmax*2/riter)*10
306
# Get a list of integers for the intervals
307
randlist = range(0, valmax, chunksize)
308
randlist.extend(range(table.nrows-valmax, table.nrows, chunksize))
309
# expand the list ten times so as to use the cache
310
randlist = randlist*10
312
random.shuffle(randlist)
313
# reset the value of chunksize
314
#chunksize = int(round(chunksize/10))
315
# Protection to avoid too large chunksizes with small tables
316
chunksize = chunksize/10
317
# if table.nrows > 1000000:
318
# chunksize = chunksize/10
320
# # chunksize = chunksize/100
322
#print "chunksize-->", chunksize
323
#randlist.sort();print "randlist-->", randlist
326
#print "riter-->", riter
248
# The interval for look values at. This is aproximately equivalent to
249
# the number of elements to select
250
chunksize = 1000 # Change here for selecting more or less entries
251
# Initialize the random generator always with the same integer
252
# in order to have reproductible results
254
random_array.seed(19, 20)
327
255
for i in xrange(riter):
328
#randlist.sort();print "randlist-->", randlist
332
rnd = random.randrange(table.nrows)
256
rnd = random.randrange(table.nrows)
333
257
cpu1 = time.clock()
335
259
if atom == "string":
380
307
if float(rnd) <= p["var3"] < float(rnd+chunksize)]
381
308
# if 1000.-i <= p["var3"] < 1000.+i]
382
309
# if 100*i <= p["var3"] < 100*(i+1)]
383
# elif atom == "bool":
384
# if indexmode in ["indexed", "inkernel"]:
385
# results = [p.nrow() for p in where(var4 == 0)]
387
# results = [p.nrow() for p in table if p["var4"] == 0]
389
311
raise ValueError, "Value for atom '%s' not supported." % atom
390
312
rowselected += len(results)
391
#results.sort(); print "selected values-->", results
313
#print "selected values-->", results
393
315
# First iteration
394
316
time1 = time.time() - t1
508
430
-r only read test
509
431
-w only write test
510
432
-c sets a compression level (do not set it or 0 for no compression)
511
-l sets the compression library ("zlib", "lzo", "ucl" or "none")
433
-l sets the compression library ("zlib", "lzo", "ucl", "bzip2" or "none")
512
434
-S activate shuffling filter
513
435
-F activate fletcher32 filter
514
436
-n set the number of rows in tables (in krows)