92
94
bool XMLParser::endDocument() {
93
95
if (!text.isNull() && !text.isEmpty()) {
94
list += QStringList::split(QRegExp("\\W"), text);
96
addWords (list, text);
95
97
text = QString::null;
100
QStringList XMLParser::getList() {
102
WordMap XMLParser::getList() {
106
/***************************************************************************/
108
KProgressDialog *progressDialog() {
109
KProgressDialog *pdlg = new KProgressDialog(0, "progressDialog", i18n("Creating word list"), i18n("Parsing the KDE documentation..."), false);
110
pdlg->setAllowCancel (false);
111
pdlg->showCancelButton (false);
112
pdlg->setAutoReset(false);
113
pdlg->setAutoClose(false);
114
pdlg->progressBar()->setTotalSteps(100);
115
pdlg->progressBar()->setProgress(0);
119
bool saveWordList (WordMap map, QString filename) {
120
QFile file(filename);
121
if(!file.open(IO_WriteOnly))
124
QTextStream stream(&file);
125
stream.setEncoding (QTextStream::UnicodeUTF8);
127
stream << "WPDictFile\n";
128
WordMap::ConstIterator it;
129
for (it = map.begin(); it != map.end(); ++it)
130
stream << it.key() << "\t" << it.data() << "\t2\n";
135
/***************************************************************************/
137
void addWords (WordMap &map, QString line) {
138
QStringList words = QStringList::split(QRegExp("\\W"), line);
140
QStringList::ConstIterator it;
141
for (it = words.begin(); it != words.end(); ++it) {
142
if (!(*it).contains(QRegExp("\\d|_"))) {
143
QString key = (*it).lower();
144
if (map.contains(key))
152
void addWords (WordMap &map, WordMap add) {
153
WordList::WordMap::ConstIterator it;
154
for (it = add.begin(); it != add.end(); ++it)
155
if (map.contains(it.key()))
156
map[it.key()] += it.data();
158
map[it.key()] = it.data();
161
void addWordsFromFile (WordMap &map, QString filename, QTextStream::Encoding encoding, QTextCodec *codec) {
162
QFile xmlfile(filename);
163
QXmlInputSource source (&xmlfile);
165
QXmlSimpleReader reader;
166
reader.setFeature ("http://trolltech.com/xml/features/report-start-end-entity", true);
167
reader.setContentHandler (&parser);
170
if (reader.parse(source)) // try to load the file as an xml-file
171
addWords(map, parser.getList());
173
QFile wpdfile(filename);
174
if (wpdfile.open(IO_ReadOnly)) {
175
QTextStream stream(&wpdfile);
176
stream.setEncoding (QTextStream::UnicodeUTF8);
178
if (!stream.atEnd()) {
179
QString s = stream.readLine();
180
if (s == "WPDictFile") { // Contains the file a weighted word list?
181
// We can assume that weighted word lists are always UTF8 coded.
182
while (!stream.atEnd()) {
183
QString s = stream.readLine();
184
if (!(s.isNull() || s.isEmpty())) {
185
QStringList list = QStringList::split("\t", s);
187
int weight = list[1].toInt(&ok);
188
if (ok && (weight > 0)) {
189
if (map.contains(list[0]))
190
map[list[0]] += weight;
192
map[list[0]] = weight;
197
else { // Count the words in an ordinary text file
198
QFile file(filename);
199
if (file.open(IO_ReadOnly)) {
200
QTextStream stream(&file);
202
stream.setCodec (codec);
204
stream.setEncoding (encoding);
205
while (!stream.atEnd())
206
addWords (map, stream.readLine());
218
WordMap parseFiles (QStringList files, QTextStream::Encoding encoding, QTextCodec *codec, KProgressDialog *pdlg) {
220
int steps = files.count();
224
QStringList::ConstIterator it;
225
for (progress = 1, it = files.begin(); it != files.end(); ++progress, ++it) {
226
addWordsFromFile (map, *it, encoding, codec);
228
if (steps != 0 && progress*100/steps > percent) {
229
percent = progress*100/steps;
230
pdlg->progressBar()->setProgress(percent);
231
qApp->processEvents (20);
237
WordMap mergeFiles (QMap<QString,int> files, KProgressDialog *pdlg) {
238
pdlg->setLabel (i18n("Merging dictionaries..."));
240
qApp->processEvents (20);
243
int steps = files.count();
245
float totalWeight = 0;
246
long long maxWeight = 0;
248
QMap<QString,float> map;
249
QMap<QString,int>::ConstIterator it;
250
for (progress = 1, it = files.begin(); it != files.end(); ++progress, ++it) {
252
addWordsFromFile (fileMap, it.key(), QTextStream::UnicodeUTF8, 0);
254
long long weight = 0;
255
WordMap::ConstIterator iter;
256
for (iter = fileMap.begin(); iter != fileMap.end(); ++iter)
257
weight += iter.data();
258
float factor = 1.0 * it.data() / weight;
259
totalWeight += it.data();
260
if (weight > maxWeight)
263
for (iter = fileMap.begin(); iter != fileMap.end(); ++iter)
264
if (map.contains(iter.key()))
265
map[iter.key()] += iter.data() * factor;
267
map[iter.key()] = iter.data() * factor;
269
if (steps != 0 && progress*100/steps > percent) {
270
percent = progress*100/steps;
271
pdlg->progressBar()->setProgress(percent);
272
qApp->processEvents (20);
277
if (1.0 * maxWeight * totalWeight > 1000000000)
278
factor = 1000000000 / totalWeight;
280
factor = 1.0 * maxWeight;
283
QMap<QString,float>::ConstIterator iter;
284
for (iter = map.begin(); iter != map.end(); ++iter)
285
resultMap[iter.key()] = (int)(factor * iter.data() + 0.5);
290
WordMap parseKDEDoc (QString language, KProgressDialog *pdlg) {
291
pdlg->setLabel (i18n("Parsing the KDE documentation..."));
293
qApp->processEvents (20);
295
QStringList files = KApplication::kApplication()->dirs()->findAllResources ("html", language + "/*.docbook", true, true);
296
if ((files.count() == 0) && (language.length() == 5)) {
297
language = language.left(2);
298
files = KApplication::kApplication()->dirs()->findAllResources ("html", language + "/*.docbook", true, true);
301
return parseFiles (files, QTextStream::UnicodeUTF8, 0, pdlg);
304
WordMap parseFile (QString filename, QTextStream::Encoding encoding, QTextCodec *codec, KProgressDialog *pdlg) {
305
pdlg->setLabel (i18n("Parsing file..."));
307
qApp->processEvents (20);
309
QStringList files = filename;
311
return parseFiles (files, encoding, codec, pdlg);
314
WordMap parseDir (QString directory, QTextStream::Encoding encoding, QTextCodec *codec, KProgressDialog *pdlg) {
315
pdlg->setLabel (i18n("Parsing directory..."));
317
qApp->processEvents (20);
319
QStringList directories;
320
directories += directory;
322
for (QStringList::Iterator it = directories.begin(); it != directories.end(); it = directories.remove(it)) {
324
const QFileInfoList *entries = dir.entryInfoList ("*", QDir::Dirs | QDir::Files | QDir::NoSymLinks | QDir::Readable);
326
QFileInfoListIterator iter (*entries);
327
while ((iter.current()) != 0) {
328
QString name = iter.current()->fileName();
329
if (name != "." && name != "..") {
330
if (iter.current()->isDir())
331
directories += iter.current()->filePath ();
333
files += iter.current()->filePath ();
340
return parseFiles (files, encoding, codec, pdlg);
105
343
/***************************************************************************/
253
/***************************************************************************/
255
void addWordsFromFile (WordMap &map, QString filename) {
256
QFile file(filename);
257
QXmlInputSource source (&file);
259
QXmlSimpleReader reader;
260
reader.setFeature ("http://trolltech.com/xml/features/report-start-end-entity", true);
261
reader.setContentHandler (&parser);
263
if (reader.parse(source)) {
264
QStringList words = parser.getList();
265
QStringList::ConstIterator it;
266
for (it = words.begin(); it != words.end(); ++it) {
267
if (!(*it).contains(QRegExp("\\d|_"))) {
268
QString key = (*it).lower();
269
if (map.contains(key))
278
WordMap checkWordList(WordMap &map, QString dict, QString aff);
280
WordMap createWordList (QString language, QString dictionary) {
281
KProgressDialog *pdlg = new KProgressDialog(0, "progressDialog", i18n("Creating word list"), i18n("Parsing the KDE documentation..."), false);
282
pdlg->setAllowCancel (false);
283
pdlg->showCancelButton (false);
284
pdlg->setLabel (i18n("Parsing the KDE documentation..."));
285
pdlg->setAutoReset(false);
286
pdlg->setAutoClose(false);
287
pdlg->progressBar()->setTotalSteps(100);
288
pdlg->progressBar()->setProgress(0);
290
qApp->processEvents (20);
296
QStringList files = KApplication::kApplication()->dirs()->findAllResources ("html", language + "/*.docbook", true, true);
297
if ((files.count() == 0) && (language.length() == 5)) {
298
language = language.left(2);
299
files = KApplication::kApplication()->dirs()->findAllResources ("html", language + "/*.docbook", true, true);
302
steps = files.count();
305
QStringList::ConstIterator it;
306
for (progress = 1, it = files.begin(); it != files.end(); ++progress, ++it) {
307
addWordsFromFile (map, *it);
309
if (steps != 0 && progress*100/steps > percent) {
310
percent = progress*100/steps;
311
pdlg->progressBar()->setProgress(percent);
312
qApp->processEvents (20);
491
WordMap spellCheck (WordMap map, QString dictionary, KProgressDialog *pdlg) {
316
493
if (dictionary.endsWith(".dic")) {