1
/****************************************************************************
3
** Copyright (C) 2008 Nokia Corporation and/or its subsidiary(-ies).
4
** Contact: Qt Software Information (qt-info@nokia.com)
6
** This file is part of the Qt Assistant of the Qt Toolkit.
9
** Licensees holding valid Qt Commercial licenses may use this file in
10
** accordance with the Qt Commercial License Agreement provided with the
11
** Software or, alternatively, in accordance with the terms contained in
12
** a written agreement between you and Nokia.
15
** GNU General Public License Usage
16
** Alternatively, this file may be used under the terms of the GNU
17
** General Public License versions 2.0 or 3.0 as published by the Free
18
** Software Foundation and appearing in the file LICENSE.GPL included in
19
** the packaging of this file. Please review the following information
20
** to ensure GNU General Public Licensing requirements will be met:
21
** http://www.fsf.org/licensing/licenses/info/GPLv2.html and
22
** http://www.gnu.org/copyleft/gpl.html. In addition, as a special
23
** exception, Nokia gives you certain additional rights. These rights
24
** are described in the Nokia Qt GPL Exception version 1.3, included in
25
** the file GPL_EXCEPTION.txt in this package.
27
** Qt for Windows(R) Licensees
28
** As a special exception, Nokia, as the sole copyright holder for Qt
29
** Designer, grants users of the Qt/Eclipse Integration plug-in the
30
** right for the Qt/Eclipse Integration to link to functionality
31
** provided by Qt Designer and its related libraries.
33
** If you are unsure which license is appropriate for your use, please
34
** contact the sales department at qt-sales@nokia.com.
36
****************************************************************************/
38
#include "HelpIndex.h"
42
#include <QStringList>
43
#include <QApplication>
45
#include <QTextStream>
46
#include <QtAlgorithms>
50
#include <QTextDocument>
56
Term() : frequency(-1) {}
57
Term( const QString &t, int f, QVector<Document> l ) : term( t ), frequency( f ), documents( l ) {}
60
QVector<Document>documents;
61
bool operator<( const Term &i2 ) const { return frequency < i2.frequency; }
64
QDataStream &operator>>( QDataStream &s, Document &l )
71
QDataStream &operator<<( QDataStream &s, const Document &l )
73
s << (qint16)l.docNumber;
74
s << (qint16)l.frequency;
78
HelpIndex::HelpIndex( const QString &dp, const QString &hp )
79
: QObject( 0 ), docPath( dp )
83
alreadyHaveDocList = false;
84
lastWindowClosed = false;
85
connect( qApp, SIGNAL(lastWindowClosed()),
86
this, SLOT(setLastWinClosed()) );
88
m_pTimer = new QTimer(this);
89
m_pTimer->setSingleShot(true);
90
m_pTimer->setInterval(0);
91
connect(m_pTimer, SIGNAL(timeout()), this, SLOT(filterNext()));
94
HelpIndex::HelpIndex( const QStringList &dl, const QString &hp )
99
alreadyHaveDocList = true;
100
lastWindowClosed = false;
101
connect( qApp, SIGNAL(lastWindowClosed()),
102
this, SLOT(setLastWinClosed()) );
105
void HelpIndex::setLastWinClosed()
107
lastWindowClosed = true;
110
void HelpIndex::setDictionaryFile( const QString &f )
115
void HelpIndex::setDocListFile( const QString &f )
120
void HelpIndex::setDocList( const QStringList &lst )
125
void HelpIndex::makeIndex()
127
if ( !alreadyHaveDocList )
130
lastWindowClosed = false;
131
emit indexingStart( docList.count() );
134
m_pTimer->start(); //singleshot
137
void HelpIndex::filterNext()
139
if(m_iCurItem < docList.count() && !lastWindowClosed)
141
QUrl url(docList.at(m_iCurItem));
142
parseDocument( url.toLocalFile(), m_iCurItem );
143
emit indexingProgress( m_iCurItem );
145
m_pTimer->start(); //singleshot
151
void HelpIndex::setupDocumentList()
157
filters.append(QLatin1String("*.html"));
158
QStringList lst = d.entryList(filters);
159
QStringList::ConstIterator it = lst.constBegin();
160
for ( ; it != lst.constEnd(); ++it )
162
QString filename=QLatin1String("file:") + docPath + QLatin1String("/") + *it ;
163
docList.append(filename);
164
titleList.append(getDocumentTitle(filename));
168
void HelpIndex::insertInDict( const QString &str, int docNum )
170
if ( str == QLatin1String("amp") || str == QLatin1String("nbsp"))
177
if ( e->documents.last().docNumber != docNum )
178
e->documents.append( Document(docNum, 1 ) );
180
e->documents.last().frequency++;
182
dict.insert( str, new Entry( docNum ) );
186
QString HelpIndex::getCharsetForDocument(QFile *file)
189
QString contents = s.readAll();
192
int start = contents.indexOf(QLatin1String("<meta"), 0, Qt::CaseInsensitive);
194
int end = contents.indexOf(QLatin1String(">"), start);
195
QString meta = contents.mid(start+5, end-start);
196
meta = meta.toLower();
197
QRegExp r(QLatin1String("charset=([^\"\\s]+)"));
198
if (r.indexIn(meta) != -1) {
204
if (encoding.isEmpty())
205
return QLatin1String("utf-8");
209
void HelpIndex::parseDocument( const QString &filename, int docNum )
211
QFile file( filename );
212
if ( !file.open(QFile::ReadOnly) ) {
213
qWarning( "can not open file %s", qPrintable(filename) );
217
QTextStream s(&file);
218
QString en = getCharsetForDocument(&file);
219
s.setCodec(QTextCodec::codecForName(en.toLatin1().constData()));
221
QString text = s.readAll();
226
const QChar *buf = text.unicode();
231
while ( j < text.length() ) {
232
if ( c == QLatin1Char('<') || c == QLatin1Char('&') ) {
235
insertInDict( QString(str,i), docNum );
240
if ( ( c == QLatin1Char('>') || c == QLatin1Char(';') ) && !valid ) {
249
if ( ( c.isLetterOrNumber() || c == QLatin1Char('_') ) && i < 63 ) {
250
str[i] = c.toLower();
254
insertInDict( QString(str,i), docNum );
260
insertInDict( QString(str,i), docNum );
264
void HelpIndex::writeDict()
267
if ( !f.open(QFile::WriteOnly ) )
270
for(QHash<QString, Entry *>::Iterator it = dict.begin(); it != dict.end(); ++it) {
272
s << it.value()->documents.count();
273
s << it.value()->documents;
279
void HelpIndex::writeDocumentList()
281
QFile f( docListFile );
282
if ( !f.open(QFile::WriteOnly ) )
287
QFile f1( docListFile+".titles" );
288
if ( !f1.open(QFile::WriteOnly ) )
290
QDataStream s1( &f1 );
295
void HelpIndex::readDict()
298
if ( !f.open(QFile::ReadOnly ) )
305
QVector<Document> docs;
306
while ( !s.atEnd() ) {
309
docs.resize(numOfDocs);
311
dict.insert( key, new Entry( docs ) );
317
void HelpIndex::readDocumentList()
319
QFile f( docListFile );
320
if ( !f.open(QFile::ReadOnly ) )
324
QFile f1( docListFile+".titles" );
325
if ( !f1.open(QFile::ReadOnly ) )
327
QDataStream s1( &f1 );
331
QStringList HelpIndex::query( const QStringList &terms, const QStringList &termSeq, const QStringList &seqWords )
333
QList<Term> termList;
334
for (QStringList::ConstIterator it = terms.begin(); it != terms.end(); ++it ) {
336
if ( (*it).contains(QLatin1Char('*')) ) {
337
QVector<Document> wcts = setupDummyTerm( getWildcardTerms( *it ) );
338
termList.append( Term(QLatin1String("dummy"), wcts.count(), wcts ) );
339
} else if ( dict[ *it ] ) {
341
termList.append( Term( *it, e->documents.count(), e->documents ) );
343
return QStringList();
346
if ( !termList.count() )
347
return QStringList();
350
QVector<Document> minDocs = termList.takeFirst().documents;
351
for(QList<Term>::Iterator it = termList.begin(); it != termList.end(); ++it) {
353
QVector<Document> docs = t->documents;
354
for(QVector<Document>::Iterator minDoc_it = minDocs.begin(); minDoc_it != minDocs.end(); ) {
356
for (QVector<Document>::ConstIterator doc_it = docs.constBegin(); doc_it != docs.constEnd(); ++doc_it ) {
357
if ( (*minDoc_it).docNumber == (*doc_it).docNumber ) {
358
(*minDoc_it).frequency += (*doc_it).frequency;
364
minDoc_it = minDocs.erase( minDoc_it );
372
if ( termSeq.isEmpty() ) {
373
for(QVector<Document>::Iterator it = minDocs.begin(); it != minDocs.end(); ++it)
374
results << docList.at((int)(*it).docNumber);
379
for(QVector<Document>::Iterator it = minDocs.begin(); it != minDocs.end(); ++it) {
380
fileName = docList[ (int)(*it).docNumber ];
381
if ( searchForPattern( termSeq, seqWords, fileName ) )
387
QString HelpIndex::getDocumentTitle( const QString &fullFileName )
389
QUrl url(fullFileName);
390
QString fileName = url.toLocalFile();
392
if (documentTitleCache.contains(fileName))
393
return documentTitleCache.value(fileName);
395
QFile file( fileName );
396
if ( !file.open( QFile::ReadOnly ) ) {
397
qWarning( "cannot open file %s", qPrintable(fileName) );
400
QTextStream s( &file );
401
QString text = s.readAll();
403
int start = text.indexOf(QLatin1String("<title>"), 0, Qt::CaseInsensitive) + 7;
404
int end = text.indexOf(QLatin1String("</title>"), 0, Qt::CaseInsensitive);
406
QString title = tr("Untitled");
407
if (end - start > 0) {
408
title = text.mid(start, end - start);
409
if (Qt::mightBeRichText(title)) {
412
title = doc.toPlainText();
415
documentTitleCache.insert(fileName, title);
419
QStringList HelpIndex::getWildcardTerms( const QString &term )
422
QStringList terms = split( term );
423
QStringList::Iterator iter;
425
for(QHash<QString, Entry*>::Iterator it = dict.begin(); it != dict.end(); ++it) {
428
QString text( it.key() );
429
for ( iter = terms.begin(); iter != terms.end(); ++iter ) {
430
if ( *iter == QLatin1String("*") ) {
434
if ( iter == terms.begin() && (*iter)[0] != text[0] ) {
438
index = text.indexOf( *iter, index );
439
if ( *iter == terms.last() && index != (int)text.length()-1 ) {
440
index = text.lastIndexOf( *iter );
441
if ( index != (int)text.length() - (int)(*iter).length() ) {
448
index += (*iter).length();
462
QStringList HelpIndex::split( const QString &str )
466
int i = str.indexOf(QLatin1Char('*'), j );
468
if (str.startsWith(QLatin1String("*")))
469
lst << QLatin1String("*");
472
if ( i > j && i <= (int)str.length() ) {
473
lst << str.mid( j, i - j );
474
lst << QLatin1String("*");
477
i = str.indexOf(QLatin1Char('*'), j );
480
int l = str.length() - 1;
481
if ( str.mid( j, l - j + 1 ).length() > 0 )
482
lst << str.mid( j, l - j + 1 );
487
QVector<Document> HelpIndex::setupDummyTerm( const QStringList &terms )
489
QList<Term> termList;
490
for (QStringList::ConstIterator it = terms.begin(); it != terms.end(); ++it) {
494
termList.append( Term( *it, e->documents.count(), e->documents ) );
497
QVector<Document> maxList(0);
498
if ( !termList.count() )
502
maxList = termList.takeLast().documents;
503
for(QList<Term>::Iterator it = termList.begin(); it != termList.end(); ++it) {
505
QVector<Document> docs = t->documents;
506
for (QVector<Document>::iterator docIt = docs.begin(); docIt != docs.end(); ++docIt ) {
507
if ( maxList.indexOf( *docIt ) == -1 )
508
maxList.append( *docIt );
514
void HelpIndex::buildMiniDict( const QString &str )
516
if ( miniDict[ str ] )
517
miniDict[ str ]->positions.append( wordNum );
521
bool HelpIndex::searchForPattern( const QStringList &patterns, const QStringList &words, const QString &fileName )
524
QString fName = url.toLocalFile();
526
if ( !file.open( QFile::ReadOnly ) ) {
527
qWarning( "cannot open file %s", qPrintable(fName) );
533
QStringList::ConstIterator cIt = words.begin();
534
for ( ; cIt != words.end(); ++cIt )
535
miniDict.insert( *cIt, new PosEntry( 0 ) );
537
QTextStream s( &file );
538
QString text = s.readAll();
540
const QChar *buf = text.unicode();
545
while ( j < text.length() ) {
546
if ( c == QLatin1Char('<') || c == QLatin1Char('&') ) {
549
buildMiniDict( QString(str,i) );
554
if ( ( c == QLatin1Char('>') || c == QLatin1Char(';') ) && !valid ) {
563
if ( ( c.isLetterOrNumber() || c == QLatin1Char('_') ) && i < 63 ) {
564
str[i] = c.toLower();
568
buildMiniDict( QString(str,i) );
574
buildMiniDict( QString(str,i) );
577
QStringList::ConstIterator patIt = patterns.begin();
580
QList<uint>::iterator aIt;
581
for ( ; patIt != patterns.end(); ++patIt ) {
582
wordLst = (*patIt).split(QLatin1Char(' '));
583
a = miniDict[ wordLst[0] ]->positions;
584
for ( int j = 1; j < (int)wordLst.count(); ++j ) {
585
b = miniDict[ wordLst[j] ]->positions;
587
while ( aIt != a.end() ) {
588
if ( b.contains( *aIt + 1 )) {
592
aIt = a.erase( aIt );