2
// C++ Implementation: algorithms
7
// Author: Andrea Rizzi <rizzi@kde.org>, (C) 2003
9
// Copyright: See COPYING file that comes with this distribution
12
#include "algorithms.h"
13
#include <qstringlist.h>
17
#define i18n (const char*)
19
DataBaseInterface::ResultList ExactSearchAlgorithm::exec(const QString& query )
21
DataBaseInterface::ResultList res;
22
DataBaseInterface::MainEntry e=di->get(query,0);
24
QStringList trs=e.second.getTranslations();
26
for(QStringList::iterator it=trs.begin();it!=trs.end();++it)
29
emit newResult(QueryResult(*it,e.first.getString(),settings->scoreExact));
31
res.push_back(QueryResult(*it));
33
kdDebug(0) <<"Exact algo found " << res.count() << "entries" << endl;
38
DataBaseInterface::ResultList GenericSearchAlgorithm::exec(const QString& query )
40
DataBaseInterface::ResultList res;
41
// ExactSearchAlgorithm exact(query,settings);
43
for(QValueList<AbstractSearchAlgorithm *>::iterator algoit = algoChain.begin(); algoit!=algoChain.end() && countResults < maxResults; algoit++)
45
connect(*algoit,SIGNAL(newResult(QueryResult)),this,SIGNAL(newResult(QueryResult)));
46
kdDebug(0) << "Algo pointer" << (*algoit) << endl;
47
res+=(*algoit)->exec(query);
48
countResults=res.count();
49
kdDebug(0) << "Count = " << countResults << endl;
50
disconnect(*algoit,SIGNAL(newResult(QueryResult)),this,SIGNAL(newResult(QueryResult)));
55
void GenericSearchAlgorithm::addAlgorithm( AbstractSearchAlgorithm * algo )
57
algoChain.append(algo);
60
DataBaseInterface::ResultList AlphaSearchAlgorithm::exec( const QString & query )
62
DataBaseInterface::ResultList res;
63
DBItemMultiIndex::IndexList il=di->getAlpha(query);
65
for(DBItemMultiIndex::IndexList::iterator it=il.begin();it!=il.end()&&!di->stopNow();++it)
67
DataBaseInterface::MainEntry e=di->getFromIndex(*it);
68
QStringList trs=e.second.getTranslations();
69
for(QStringList::iterator it=trs.begin();it!=trs.end() && !di->stopNow();++it)
71
QueryResult r(di->format(di->simple(*it,true),query),e.first.getString(),settings->scoreAlpha);
76
kdDebug(0) <<"Alpha algo found " << res.count() << "entries" << endl;
81
DataBaseInterface::ResultList SentenceArchiveSearchAlgorithm::exec( const QString & query )
83
DataBaseInterface::ResultList res;
85
DataBaseInterface::MainEntry e = di->getSentence(query);
87
QStringList trs=e.second.getTranslations();
89
kdDebug(0) << "Count in sentence archive " << trs.count()<< endl;
91
for(QStringList::iterator it=trs.begin();it!=trs.end();++it)
93
QueryResult r(di->format(di->simple(*it,true),query),e.first.getString(),settings->scoreSentence);
98
kdDebug(0) <<"Sentence algo found " << res.count() << "entries" << endl;
103
DataBaseInterface::ResultList ChunkByChunkSearchAlgorithm::exec( const QString & query )
106
factory->setQuery(query);
107
QPtrList<AbstractChunk> chunks=factory->chunks();
108
kdDebug(0) << "Number of chunks " << chunks.count() << endl;
109
chunks.setAutoDelete(true); //I should delete the chunks myself
110
QStringList querySeparators=factory->separators();
112
//This prevents recursive loop.
113
if (chunks.count()<=1) return res;
115
QStringList translations,tmpTranslations;
117
translations.push_back(""); //FIXME this is needed to start , but is not good
120
QMap<QString,bool> translationUsed;
123
for(AbstractChunk *it=chunks.first();it && !di->stopNow(); it=chunks.next())
125
kdDebug(0) << "Process next chunk" << endl;
127
QValueList<QueryResult> r=it->translations();
128
kdDebug(0) << "Number of results for this chunk " << r.count() << endl;
131
// kdDebug(0) << "Nothing found for:" << it->translations() << endl;
137
//FIXME: check this, why 0? it is the best one?
138
chunkscore=r[0].score();
139
kdDebug(0) << "ChunkScore " << chunkscore << endl;
140
tmpTranslations.clear();
144
translationUsed.clear();
145
for(ResultList::iterator it1=r.begin();it1!=r.end() &&!di->stopNow(); ++it1)
147
QString chunkTranslation= (*it1).result();
148
if(!translationUsed.contains(chunkTranslation))
150
translationUsed[chunkTranslation]=true;
151
kdDebug(0) << "a translation is: " << chunkTranslation << endl;
152
for(QStringList::iterator it2=translations.begin();it2!=translations.end() && !di->stopNow() ; it2++)
154
QString prevTranslation=*it2;
155
tmpTranslations.push_back(prevTranslation+chunkTranslation+querySeparators[i]);
156
kdDebug(0) << "..appending it to " << prevTranslation << endl;
161
translations=tmpTranslations;
165
//kdDebug(0) << it-> << r[0].result() << "#" << querySeparators[i] << endl;
167
finalscore+=chunkscore;
169
kdDebug(0) << "partial score " << finalscore;
171
kdDebug(0) << "this is finishd" << endl;
172
if(settings->scoreChunkByChunk==0)
173
settings->scoreChunkByChunk=1;
174
// FIXME:fix the score system
175
// finalscore/=(i*100*100/settings->scoreChunkByChunk); //change 100 to 120(?) to lower this result (done)
177
if (finalscore<50) return res;
179
for(QStringList::iterator it2=translations.begin();it2!=translations.end() && !di->stopNow() ; it2++)
181
QString theTranslation=*it2;
182
QueryResult qr(di->format(theTranslation,query),i18n("CHUNK BY CHUNK"),finalscore);
183
qr.setRichOriginal(i18n("<h3>Chunk by chunk</h3>CHANGE THIS TEXT!!!!This translation is"
184
"obtained translating the sentences and using a"
185
"fuzzy sentence translation database.<br>"
186
" <b>Do not rely on it</b>. Translations may be fuzzy.<br>"));
187
qr.setRichResult("<font color=#800000>"+theTranslation+"</font>") ;
198
ChunkByChunkSearchAlgorithm::ChunkByChunkSearchAlgorithm( DataBaseInterface * dbi, DBSESettings * sets ): AbstractSearchAlgorithm(dbi,sets) , factory(0)
204
SentenceArchiveSearchAlgorithm::SentenceArchiveSearchAlgorithm( DataBaseInterface * dbi, DBSESettings * sets ): AbstractSearchAlgorithm(dbi,sets)
208
FuzzyChunkSearchAlgorithm::FuzzyChunkSearchAlgorithm( DataBaseInterface * dbi, DBSESettings * sets ) : AbstractSearchAlgorithm(dbi,sets)
214
DataBaseInterface::ResultList FuzzyChunkSearchAlgorithm::exec( const QString & query )
216
//FIXME: this code is shit too
218
factory->setQuery(query);
219
QPtrList<AbstractChunk> querychunks = factory->chunks();
220
querychunks.setAutoDelete(true);
222
typedef QMap<QString,QValueList<unsigned int> > ResultMap;
223
ResultMap rmap; //result of words index query
224
unsigned int notfound=0,frequent=0,nchunks = querychunks.count();
226
//Get index list for each word
227
for(AbstractChunk *it=querychunks.first(); it &&!di->stopNow() ; it=querychunks.next() )
229
QValueList<uint> locations = (*it).locationReferences();
231
if(locations.count()>0)
233
rmap[(*it).chunkString()] = locations;
235
if(locations.count()>1000) //FIXME NORMALIZE THIS!!!
238
kdDebug(0) << "\""<<(*it).chunkString() << "\" is frequent" <<endl;
247
//Now we have a map (rmap) "word in query->list of occurency"
249
QValueList<unsigned int>::iterator countpos[nchunks+1];
252
QValueList<unsigned int> il;
253
for(int i = 0;i<=nchunks&&!di->stopNow();i++)
254
countpos[i]=il.end();
256
unsigned int bestcount=0;
257
while(!rmap.isEmpty())
259
unsigned int ref,count;
260
ref=(unsigned int)-1;
264
//This will find the min head and count how many times it occurs
265
for(ResultMap::iterator it = rmap.begin();it!=rmap.end()&&!di->stopNow();++it)
267
unsigned int thisref=it.data().first();
281
for(ResultMap::iterator it = rmap.begin();it!=rmap.end()&&!di->stopNow();)
283
it.data().remove(ref);
285
//kdDebug(0)<< ((frequent<(nwords-notfound)) && (it.data().count()>350)) <<endl;
286
//FIXME: I think the frequent word check is not in the right place
287
if(it.data().isEmpty() || (((frequent+notfound)<nchunks) && (it.data().count()>1000)))
291
ResultMap::iterator it2=it;
299
//This should be configurable or optimized:
300
if(count>=(nchunks-notfound)*0.50 && count!=0)
302
il.insert(countpos[count],ref);
303
for(unsigned int i = nchunks;i>=count;i--)
304
if(countpos[i]==countpos[count])
309
//loop on number of words found
312
for(unsigned int wf=nchunks;wf>0;wf-- ){
313
for(QValueList<unsigned int>::iterator it=countpos[wf];it!=countpos[wf-1] ;++it)
314
{ //loop on entries with same number of word found
315
DataBaseInterface::MainEntry e;
316
e=di->getFromIndex(*it);
317
QStringList trs=e.second.getTranslations();
318
for(QStringList::iterator it=trs.begin();it!=trs.end()&&!di->stopNow();++it)
320
unsigned int cinr=factory->chunks(*it).count(); //chunk in result
321
//compute a score, lets kbabel sort now, it should be fast...
322
int score=90*wf/nchunks-(signed int)90*(((nchunks-cinr)>0)?(nchunks-cinr):(cinr-nchunks))/(nchunks*10);
323
if(score>bestscore) bestscore=score;
324
if(score>bestscore*0.40)
326
// kdDebug(0) << "s: "<<score << " wf: "<<wf<<" nwords: "<<nwords<<" winr: "<<winr
327
// <<" 90*wf/nwords: "<<90*wf/nwords << " -:" << 90*(((nwords-winr)>0)?(nwords-winr):(winr-nwords))/(nwords*10)<< endl;
328
// FIXME: format better the richtext
329
QString ori=e.first.getString();
330
QString re=di->format(di->simple(*it,true),query);
331
QueryResult r(re,ori,score);
332
for(QPtrListIterator<AbstractChunk> it(querychunks); it.current() && di->stopNow() ; ++it){
333
ori=ori.replace(QRegExp((*it)->chunkString(),false),"<font color=#000080><u><b>"+(*it)->chunkString()+"</b></u></font>");
335
r.setRichOriginal(ori);
347
DataBaseInterface::ResultList CorrelationSearchAlgorithm::exec( const QString & query )
349
//FIXME, this code is shit.
350
DataBaseInterface::ResultList res;
351
if(di->words(query).count()>1) return res;
352
QMap<QString,float> corRes = di->correlation(query,0,false);
353
float max=0,max1=0,max2=0;
354
QString best,best1,best2;
356
for(QMap<QString,float>::iterator it = corRes.begin(); it !=corRes.end(); ++it)
373
double myscore=0.01*max*settings->scoreDynamic;
374
QueryResult r(di->format(best,query),i18n("DYNAMIC DICT:"),myscore);
375
r.setRichOriginal(i18n("<h3>Dynamic Dictionary</h3>This is a dynamic dictionary created"
376
" looking for correlation of original and translated words.<br>"
377
" <b>Do not rely on it</b>. Translations may be fuzzy.<br>"));
378
r.setRichResult("<font size=+2 color=#A00000>"+di->format(best,query)+"</font>") ;
385
double myscore=0.01*max1*settings->scoreDynamic;
386
QueryResult r(di->format(best1,query),i18n("DYNAMIC DICT:"),myscore);
387
r.setRichOriginal(i18n("<h3>Dynamic Dictionary</h3>This is a dynamic dictionary created"
388
" looking for correlation of original and translated words.<br>"
389
" <b>Do not rely on it</b>. Translations may be fuzzy.<br>"));
390
r.setRichResult("<font size=+2 color=#800000>"+di->format(best1,query)+"</font>") ;
396
kdDebug(0) << "Correlation algorithm found" << res.count() << "results";
401
GenericSearchAlgorithm::GenericSearchAlgorithm( DataBaseInterface * dbi, DBSESettings * sets ): AbstractSearchAlgorithm(dbi,sets)
403
maxResults = 5; //FIXME use as default somthing from DBSESettings
406
SingleWordSearchAlgorithm::SingleWordSearchAlgorithm( DataBaseInterface * dbi, DBSESettings * sets ) : GenericSearchAlgorithm(dbi,sets),
407
exact(dbi,sets), alpha(dbi,sets), sentence(dbi,sets), corr(dbi,sets), chunk(dbi,sets),casefactory(dbi)
409
addAlgorithm(&exact);
410
addAlgorithm(&alpha);
411
addAlgorithm(&sentence);
412
chunk.setChunkFactory(&casefactory);
413
addAlgorithm(&chunk);
417
DataBaseInterface::ResultList SingleWordSearchAlgorithm::exec( const QString & query )
419
if(di->words(query).count()>1)
421
return GenericSearchAlgorithm::exec(query);
425
//#include "algorithms.moc"