2
# -*- coding: utf-8 -*-
5
## Here we aim at testing and comparing the performance of different indexing technologies
8
import sys, os, time, pickle
9
from pycvf.core.errors import *
10
from pycvf.core.experiment import Experiment
12
import numpy, sys, time,os,gc
13
import cPickle as pickle
14
from pycvf.core import genericnode
15
from pycvf.lib.info import pmap
16
from pycvf.datatypes import image
17
from pycvf.core.builders import *
19
from pycvf.indexes.pseudoincremental import PseudoIncrementalIndex
20
from pycvf.indexes.load_index import CachedIndex
21
from pycvf.indexes.sashindex import SashIndex
22
from pycvf.databases.limit import DB as limit
23
from pycvf.databases.randomized import DB as randomized
24
from pycvf.nodes.free import Model as free
25
from pycvf.nodes.vectorset.index_build import Model as index_build
26
from pycvf.nodes.vectorset.index_query import Model as index_query
28
from pycvf.apps.model_run import MdlRunner
32
#import matplotlib.axes3d as p3
33
import mpl_toolkits.mplot3d.axes3d as p3
36
def pycvf_model_run(*args,**kwargs):
39
return C.call(*args,**kwargs)
43
### We need to create different database for near duplicate we have
47
# image / image+impairement
48
# shots / shots+impairement (time offset in video)
57
dbwang="""aggregated_database([
58
transformed(limit(classical_db('WANG'),{l}),LN('image.deteriorate.noise')),
59
transformed(limit(classical_db('WANG'),{l}),LN('image.deteriorate.jpegcompress')),
60
transformed(limit(classical_db('WANG'),{l}),LN('image.deteriorate.blackframe')),
61
transformed(limit(classical_db('WANG'),{l}),LN('image.deteriorate.randwarp')),
65
DATABASES_AND_LABELS={
67
"traindb(limit(classical_db('SZuBuD25'),{l}),0.8)",
68
"testdb(limit(classical_db('SZuBuD25'),{l}),0.8)",
69
("default", "lambda x:x[0]")
72
"traindb(limit(classical_db('SZuBuD50'),{l}),0.8)",
73
"testdb(limit(classical_db('SZuBuD50'),{l}),0.8)",
74
("default", "lambda x:x[0]")
77
"traindb(limit(classical_db('SZuBuD101'),{l}),0.8)",
78
"testdb(limit(classical_db('SZuBuD101'),{l}),0.8)",
79
("default", "lambda x:x[0]")
82
"traindb(limit(classical_db('SZuBuD202'),{l}),0.8)",
83
"testdb(limit(classical_db('SZuBuD202'),{l}),0.8)",
84
("default", "lambda x:x[0]")
87
"traindb(limit(classical_db('ALOIILL_red4'),12*12),0.8)",
88
"testdb(limit(classical_db('ALOIILL_red4'),12*12),0.8)",
89
("default", "lambda x:x[0]")
92
"traindb(limit(classical_db('ALOIILL_red4'),24*12),0.8)",
93
"testdb(limit(classical_db('ALOIILL_red4'),24*12),0.8)",
94
("default", "lambda x:x[0]")
97
"traindb(%s,0.8)"%(dbwang.format(l="12"),),
98
"testdb(%s,0.8)"%(dbwang.format(l="12"),),
99
("default", "lambda x:x[0]")
102
"traindb(%s,0.8)"%(dbwang.format(l="25"),),
103
"testdb(%s,0.8)"%(dbwang.format(l="25"),),
104
("default", "lambda x:x[0]")
108
"traindb(%s,0.8)"%(dbwang.format(l="50"),),
109
"testdb(%s,0.8)"%(dbwang.format(l="50"),),
110
("default", "lambda x:x[0]")
114
"traindb(%s,0.8)"%(dbwang.format(l="100"),),
115
"testdb(%s,0.8)"%(dbwang.format(l="100"),),
116
("default", "lambda x:x[0]")
120
"traindb(%s,0.8)"%(dbwang.format(l="200"),),
121
"testdb(%s,0.8)"%(dbwang.format(l="200"),),
122
("default", "lambda x:x[0]")
126
"traindb(%s,0.8)"%(dbwang.format(l="400"),),
127
"testdb(%s,0.8)"%(dbwang.format(l="400"),),
128
("default", "lambda x:x[0]")
131
#(traindb(),testdb())
136
'CM':'image.descriptors.CM()',
137
'HOG':'image.descriptors.HOG()',
138
'GIST':'image.descriptors.GIST()',
139
'LBP':'image.descriptors.LBP()',
142
class MyExperiment(Experiment):
144
This will generate the experiment code for us..
150
## Which regression model is to be used
151
idxclass=[ 'SashIndex', 'CvLSH', 'RCDist' ]#, 'PoullotIndex' ] #, 'krr', 'multinomial' ] ##
152
database=DATABASES_AND_LABELS.keys()
153
feature=FEATURES.keys()
159
### nice names for the parameters
161
idxclass=P0['idxclass']
162
feature=P0['feature']
163
database=P0['database']
164
pickle.dump(P0,file(os.path.join(self.directory,"xparam-%d.pcl")%(self.c,),"w"))
169
## Compute Features on the database.... Some of these database may be really larger, and we currently focus
170
## on rather small amount so we do compute feature files for declared database and not for pre-existing one.
171
## this choice of course depends of the situation...
173
## on this occasion we also compute the ground-truth if it has not yet been computed ...
174
from pycvf.core import builders
175
db1=builders.database_builder(DATABASES_AND_LABELS[database][1])
176
db2=builders.database_builder(DATABASES_AND_LABELS[database][0])
177
ldb1=getattr(db1,"labeling_"+DATABASES_AND_LABELS[database][2][0])()
178
ldb2=getattr(db1,"labeling_"+DATABASES_AND_LABELS[database][2][0])()
179
lot=DATABASES_AND_LABELS[database][2][1]
180
lo=eval(DATABASES_AND_LABELS[database][2][1])
182
## ############################################################################################################0
183
## enumerate the classes....
184
## ############################################################################################################0
188
C=pickle.load(file("%s/categs-%s.pcl"%(self.directory,database,),"rb"))
189
pickle.load(file("%s/addresses-%s.pcl"%(self.directory,database,),"rb"))
193
for t1 in db1.keys():
195
if (lo(ldb1[t1]) not in C):
197
for t2 in db2.keys():
199
if (lo(ldb2[t2]) not in C):
201
pickle.dump(C,file("%s/categs-%s.pcl"%(self.directory,database,),"wb"))
202
pickle.dump(A,file("%s/addresses-%s.pcl"%(self.directory,database,),"wb"))
205
## ############################################################################################################0
206
## compute groundtruth....
207
## ############################################################################################################0
209
gt=pickle.load(file("%s/ground-truth-%s.pcl"%(self.directory,database,),"rb"))
212
for t1 in db1.keys():
215
for t2 in db2.keys():
216
if (lo(ldb1[t1])==lo(ldb2[t2])):
220
pickle.dump(gt,file("%s/ground-truth-%s.pcl"%(self.directory,database,),"wb"))
223
gt=pickle.load(file("%s/tt-ground-truth-%s.pcl"%(self.directory,database,),"rb"))
226
for t1 in db1.keys():
229
for t2 in db1.keys():
230
if (lo(ldb1[t1])==lo(ldb1[t2])):
234
pickle.dump(gt,file("%s/tt-ground-truth-%s.pcl"%(self.directory,database,),"wb"))
238
gt=pickle.load(file("%s/aa-ground-truth-%s.pcl"%(self.directory,database,),"rb"))
241
for t1 in db1.keys():
243
for t2 in db1.keys():
244
if (lo(ldb1[t1])==lo(ldb1[t2])):
246
for t2 in db2.keys():
247
if (lo(ldb1[t1])==lo(ldb1[t2])):
250
pickle.dump(gt,file("%s/aa-ground-truth-%s.pcl"%(self.directory,database,),"wb"))
253
## ############################################################################################################0
254
## compute features if necessary....
255
## ############################################################################################################0
257
os.stat("%s/%s-%s-train-0000.mfa"%(self.directory,database,feature))
259
print ' '.join(("pycvf_compute_features",
260
"pycvf_compute_features",
261
"--db", DATABASES_AND_LABELS[database][0],
262
"-m", FEATURES[feature],
263
"-t", "%s/%s-%s-train"%(self.directory,database,feature),
265
r=os.spawnlp(os.P_WAIT,"pycvf_compute_features",
266
"pycvf_compute_features",
267
"--db", DATABASES_AND_LABELS[database][0],
268
"-m", FEATURES[feature],
269
"-t", "%s/%s-%s-train"%(self.directory,database,feature),
274
os.stat("%s/%s-%s-test-0000.mfa"%(self.directory,database,feature))
276
print ' '.join(["pycvf_compute_features",
277
"pycvf_compute_features",
278
"--db", DATABASES_AND_LABELS[database][1],
279
"-m", FEATURES[feature],
280
"-t", "%s/%s-%s-test"%(self.directory,database,feature),
282
r=os.spawnlp(os.P_WAIT,"pycvf_compute_features",
283
"pycvf_compute_features",
284
"--db", DATABASES_AND_LABELS[database][1],
285
"-m", FEATURES[feature],
286
"-t", "%s/%s-%s-test"%(self.directory,database,feature),
292
## ############################################################################################################0
293
## build an index on train features....
294
## ############################################################################################################0
298
os.stat("%s/"%(self.directory,database,feature))
300
print "Building Index"
301
r=os.spawnlp(os.P_WAIT,"pycvf_build_index",
303
"--db", "from_trackfile('%s/%s-%s-train')"%(self.directory,database,feature),
304
"--idxpath", "%s/%s-%s-train-idx")
309
## ############################################################################################################0
310
## train a classifier to decide which pair of features are actually corresponding to same clip or not....
311
## ############################################################################################################0
312
#positive_db=pickle.load("tt-ground-truth-%s.pcl")
314
## positive_couple_db : getdbitem.getdbitem()
315
## aggregated_database ( [ positive_couples_db , negative_couples_db ] )
318
#random_couple_db = "randomized(productdb(from_list(map(lambda x:(x,x),pickle.load(file('%s/categs-%s.pcl','rb')))),from_list(map(lambda x:(x,x),pickle.load(file('%s/categs-%s.pcl','rb'))))))"%(self.directory,database,self.directory,database,)
319
random_couple_db = "randomized(productdb(from_list(map(lambda x:(x,x),pickle.load(file('%s/categs-%s.pcl','rb')))),from_list(map(lambda x:(x,x),pickle.load(file('%s/categs-%s.pcl','rb'))))))"%(self.directory,database,self.directory,database,)
321
positive_couple_db_train= "transformed(exploded(from_list(pickle.load(file('%s/tt-ground-truth-%s.pcl','rb'))),LS('list').DefaultStructure()),LN('free','(((%s)(thisnode.get_curaddr())),x)'))"%(self.directory,database,lot)
322
positive_couple_db_test = "transformed(exploded(from_list(pickle.load(file('%s/ground-truth-%s.pcl','rb'))),LS('list').DefaultStructure()),LN('free', '(((%s)(thisnode.get_curaddr())),x)'))"%(self.directory,database,lot)
324
negative_couple_db_train= "transformed(exploded(transformed(from_list(pickle.load(file('%s/aa-ground-truth-%s.pcl','rb'))), free('list(pickle.load(file(\"%s/addresses-%s.pcl\",\"rb\"))-set(x))')), LS('list').DefaultStructure()),LN('free','(((%s)(thisnode.get_curaddr())),x)'))"%(self.directory,database,self.directory,database,lot)
325
negative_couple_db_test = "transformed(exploded(transformed(from_list(pickle.load(file('%s/aa-ground-truth-%s.pcl','rb'))), free('list(pickle.load(file(\"%s/addresses-%s.pcl\",\"rb\"))-set(x))') ), LS('list').DefaultStructure()),LN('free','(((%s)(thisnode.get_curaddr())),x)'))"%(self.directory,database,self.directory,database,lot)
327
#print random_couple_db
328
#print positive_couple_db_train
329
#print negative_couple_db_train
331
#reduce(lambda b,y: b+map(lambda x:(y[0],x) ,y[1]), pickle.load(file("ground-truth-ALOI12.pcl")),[])
334
#pycvf_dbshow --db "transformed(exploded(transformed(from_list(pickle.load(file('near_duplicate_detection-std/tt-ground-truth-ALOI12.pcl','rb'))),free('list(pickle.load(file(\"near_duplicate_detection-std/addresses-ALOI12.pcl\",\"rb\"))-set(x))')),LS('list').DefaultStructure()),LN('free','(((lambda x:x[0])(thisnode.get_curaddr()))[1],x[0])'))"
336
#negative_couple_db= "excludedb(positive_couple_db,random_couple_db)"
337
#print negative_couple_db_train
340
dbexpr_train="randomized(labeled_databases_from_list(dict([(1, (limit.DB({positive_couple_db},1000))),(-1,(limit.DB({negative_couple_db},1000)))])))".format(
341
positive_couple_db=positive_couple_db_train,
342
negative_couple_db=negative_couple_db_train
346
dbexpr_test="randomized(labeled_databases_from_list(dict([(1, (limit.DB({positive_couple_db},1000))),(-1,(limit.DB({negative_couple_db},1000)))])))".format(
347
positive_couple_db=positive_couple_db_test,
348
negative_couple_db=negative_couple_db_test
351
#print positive_couple_db_train
352
#print negative_couple_db_train
355
dbtrain=DATABASES_AND_LABELS[database][0]
356
dbtest=DATABASES_AND_LABELS[database][1]
358
#os.spawnlp(os.P_WAIT,"pycvf_dbshow",
361
# "--db", "%s"%(dbexpr_train,)
364
#print "transformed(%s,naive())"%(dbexpr_train,) # dbtrain,naive()
365
#print "randomized(transformed({db},free('(pycvf.core.builders.database_builder(\"%s\")[x[0]], pycvf.core.builders.database_builder(\"%s\")[x[1]])'%(\"{db1}\",\"{db2}\"))))-free('x[0]')".format(db=dbexpr_train,db1=dbtrain,db2=dbtrain)
367
#r=os.spawnlp(os.P_WAIT,"pycvf_dbshow",
370
#"--db", "randomized(transformed({db},free('(pycvf.core.builders.database_builder(\"%s\")[x[0]], pycvf.core.builders.database_builder(\"%s\")[x[1]])'%(\"{db1}\",\"{db2}\"))-LN('free','x',datatype=DTP('image'))))".format(db=dbexpr_train,db1=dbtrain,db2=dbtrain) #dbtrain,)
375
print "randomized(transformed({db},free('(pycvf.core.builders.database_builder(\"%s\")[x[0]], pycvf.core.builders.database_builder(\"%s\")[x[1]])'%(\"{db1}\",\"{db2}\"))-LN('free','x',datatype=DTP('image'))))".format(db=dbexpr_train,db1="from_trackfile(%s/%s-%s-train)"%(self.directory,database,feature),db2="from_trackfile('%s/%s-%s-train')"%(self.directory,database,feature))
377
r=os.spawnlp(os.P_WAIT,"pycvf_dbshow",
380
"--db", "randomized(transformed({db},free('(pycvf.core.builders.database_builder(\"merged(from_trackfile(\\\\\"%s\\\\\"),from_trackfile(\\\\\"%s\\\\\"))\")[x[0]], pycvf.core.builders.database_builder(\"merged(from_trackfile(\\\\\"%s\\\\\"),from_trackfile(\\\\\"%s\\\\\"))\")[x[1]])'%(\"{db1a}\",\"{db1b}\",\"{db2a}\",\"{db2b}\"))-LN('free','x',datatype=DTP('image'))))".format(db=dbexpr_train,db1a="%s/%s-%s-train"%(self.directory,database,feature),db1b="%s/%s-%s-test"%(self.directory,database,feature),db2a="%s/%s-%s-train"%(self.directory,database,feature),db2b="%s/%s-%s-test"%(self.directory,database,feature)) #dbtrain,)
384
dbtest_cpl="randomized(transformed({db},free('(pycvf.core.builders.database_builder(\"merged(from_trackfile(\\\\\"%s\\\\\"),from_trackfile(\\\\\"%s\\\\\"))\")[x[0]], pycvf.core.builders.database_builder(\"merged(from_trackfile(\\\\\"%s\\\\\"),from_trackfile(\\\\\"%s\\\\\"))\")[x[1]])'%(\"{db1a}\",\"{db1b}\",\"{db2a}\",\"{db2b}\"))-LN('free','x',datatype=DTP('image'))))".format(db=dbexpr_test,db1a="%s/%s-%s-train"%(self.directory,database,feature),db1b="%s/%s-%s-test"%(self.directory,database,feature),db2a="%s/%s-%s-train"%(self.directory,database,feature),db2b="%s/%s-%s-test"%(self.directory,database,feature))
387
#r=os.spawnlp(os.P_WAIT,"pycvf_model_run",
389
# "--db", dbexpr_train,
390
# "-m", "vectorset.train_classification_and_output_model(ML('CLS.weka_bridge','adaboost1'),'xxx')",
400
## ############################################################################################################0
401
## we actually are not needing to create pairs for ....
402
## ############################################################################################################0
406
## ############################################################################################################0
407
## we are ready to evaluate our performance based on our setting based on training
408
## ############################################################################################################0
410
# retrieve features in the track file according to the element indexes...
411
#dbtotal=("transformed(model=LN('getdbitem',from_trackfile('%s')),db=transformed(model=LN('address'),db=caltech256({nelemperclass},{numclasses})))"%(feature[0],))
412
#dbtotal=dbtotal.format(**P0)
413
#seed=random.random()
414
#dbtrain="traindb(%s,0.5,%f,)"%(dbtotal,seed,)
415
#dbtest="testdb(%s,0.5,%f)"%(dbtotal,seed,)
416
return {'dbtest_cpl':dbtest_cpl,
419
def eval(self,**kwargs):
420
print "~~~~~~~~~TESTING~~~~~~~~~"
424
"--db", kwargs['dbtest_cpl'],
425
"-m", "vectorset.train_classification_and_output_model(ML('CLS.weka_bridge','adaboost1'))-free('x.save(\"titi\")')",
429
r=os.spawnlp(os.P_WAIT,"pycvf_model_run",
431
"--db", kwargs['dbtest_cpl'],
432
"-m", "vectorset.train_classification_and_output_model(ML('CLS.weka_bridge','adaboost1'))-free('x.save(\"titi\")')",
436
print "/~~~~~~~~~TESTING~~~~~~~~~"
442
def displayresults(self,):