2
* This file is part of the KDE Baloo Project
3
* Copyright (C) 2013 Vishesh Handa <me@vhanda.in>
5
* This library is free software; you can redistribute it and/or
6
* modify it under the terms of the GNU Lesser General Public
7
* License as published by the Free Software Foundation; either
8
* version 2.1 of the License, or (at your option) version 3, or any
9
* later version accepted by the membership of KDE e.V. (or its
10
* successor approved by the membership of KDE e.V.), which shall
11
* act as a proxy defined in Section 6 of version 3 of the license.
13
* This library is distributed in the hope that it will be useful,
14
* but WITHOUT ANY WARRANTY; without even the implied warranty of
15
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16
* Lesser General Public License for more details.
18
* You should have received a copy of the GNU Lesser General Public
19
* License along with this library. If not, see <http://www.gnu.org/licenses/>.
23
#include "emailindexer.h"
25
#include <Akonadi/Collection>
26
#include <Akonadi/KMime/MessageFlags>
28
#include <QTextDocument>
30
EmailIndexer::EmailIndexer(const QString& path, const QString& contactDbPath):
31
AbstractIndexer(), m_doc( 0 ), m_termGen( 0 )
33
m_db = new Xapian::WritableDatabase(path.toStdString(), Xapian::DB_CREATE_OR_OPEN);
34
m_contactDb = new Xapian::WritableDatabase(contactDbPath.toStdString(), Xapian::DB_CREATE_OR_OPEN);
37
EmailIndexer::~EmailIndexer()
42
m_contactDb->commit();
46
QStringList EmailIndexer::mimeTypes() const
48
return QStringList() << KMime::Message::mimeType();
51
void EmailIndexer::index(const Akonadi::Item& item)
53
Akonadi::MessageStatus status;
54
status.setStatusFromFlags(item.flags());
58
KMime::Message::Ptr msg;
60
msg = item.payload<KMime::Message::Ptr>();
61
} catch (const Akonadi::PayloadException&) {
65
m_doc = new Xapian::Document();
66
m_termGen = new Xapian::TermGenerator();
67
m_termGen->set_document(*m_doc);
68
m_termGen->set_database(*m_db);
70
processMessageStatus(status);
74
m_doc->add_value(1, QString::number(item.size()).toStdString());
77
Q_ASSERT_X(item.parentCollection().isValid(), "Baloo::EmailIndexer::index",
78
"Item does not have a valid parent collection");
80
Akonadi::Entity::Id colId = item.parentCollection().id();
81
QByteArray term = 'C' + QByteArray::number(colId);
82
m_doc->add_boolean_term(term.data());
84
m_db->replace_document(item.id(), *m_doc);
93
void EmailIndexer::insert(const QByteArray& key, KMime::Headers::Base* unstructured)
96
m_termGen->index_text_without_positions(unstructured->asUnicodeString().toUtf8().constData(), 1, key.data());
100
void EmailIndexer::insert(const QByteArray& key, KMime::Headers::Generics::MailboxList* mlist)
103
insert(key, mlist->mailboxes());
106
void EmailIndexer::insert(const QByteArray& key, KMime::Headers::Generics::AddressList* alist)
109
insert(key, alist->mailboxes());
113
// Does some extra stuff such as lower casing the email, removing all quotes
114
// and removing extra spaces
115
// TODO: Move this into KMime?
116
// TODO: If name is all upper/lower then try to captialize it?
117
QString prettyAddress(const KMime::Types::Mailbox& mbox) {
118
QString name = mbox.name().simplified();
119
QByteArray email = mbox.address().simplified().toLower();
121
// Remove outer quotes recursively
122
while (name.size() >= 2 && (name[0] == '\'' || name[0] == '"') &&
123
(name[name.size()-1] == '\'' || name[name.size()-1] == '"')) {
124
name = name.mid(1, name.size()-2);
130
return name + QLatin1String(" <") + QString::fromUtf8(email) + QLatin1Char('>');
134
// Add once with a prefix and once without
135
void EmailIndexer::insert(const QByteArray& key, const KMime::Types::Mailbox::List& list)
137
Q_FOREACH (const KMime::Types::Mailbox& mbox, list) {
138
std::string name(mbox.name().toUtf8().constData());
139
m_termGen->index_text_without_positions(name, 1, key.data());
140
m_termGen->index_text_without_positions(name, 1);
141
m_termGen->index_text_without_positions(mbox.address().data(), 1, key.data());
142
m_termGen->index_text_without_positions(mbox.address().data(), 1);
144
m_doc->add_term((key + mbox.address()).data());
145
m_doc->add_term(mbox.address().data());
148
// Add emails for email auto-completion
150
const QString pa = prettyAddress(mbox);
153
Xapian::Document doc = m_contactDb->get_document(id);
156
catch (const Xapian::DocNotFoundError&) {
157
Xapian::Document doc;
158
std::string pretty(pa.toUtf8().constData());
159
doc.set_data(pretty);
161
Xapian::TermGenerator termGen;
162
termGen.set_document(doc);
163
termGen.index_text(pretty);
165
doc.add_term(mbox.address().data());
166
m_contactDb->replace_document(id, doc);
171
// FIXME: Only index properties that are actually searched!
172
void EmailIndexer::process(const KMime::Message::Ptr& msg)
176
// (Give the subject a higher priority)
177
KMime::Headers::Subject* subject = msg->subject(false);
179
std::string str(subject->asUnicodeString().toUtf8().constData());
180
kDebug() << "Indexing" << str.c_str();
181
m_termGen->index_text_without_positions(str, 1, "SU");
182
m_termGen->index_text_without_positions(str, 100);
183
m_doc->set_data(str);
186
KMime::Headers::Date* date = msg->date(false);
188
const QString str = QString::number(date->dateTime().toTime_t());
189
m_doc->add_value(0, str.toStdString());
190
const QString julianDay = QString::number(date->dateTime().date().toJulianDay());
191
m_doc->add_value(2, julianDay.toStdString());
194
insert("F", msg->from(false));
195
insert("T", msg->to(false));
196
insert("CC", msg->cc(false));
197
insert("BC", msg->bcc(false));
198
insert("O", msg->organization(false));
199
insert("RT", msg->replyTo(false));
200
insert("RF", msg->headerByType("Resent-From"));
201
insert("LI", msg->headerByType("List-Id"));
202
insert("XL", msg->headerByType("X-Loop"));
203
insert("XML", msg->headerByType("X-Mailing-List"));
204
insert("XSF", msg->headerByType("X-Spam-Flag"));
207
// Process Plain Text Content
211
m_termGen->index_text_without_positions(std::string(msg->head().constData()), 1, "HE");
213
KMime::Content* mainBody = msg->mainBodyPart("text/plain");
215
const std::string text(mainBody->decodedText().toUtf8().constData());
216
m_termGen->index_text_without_positions(text);
217
m_termGen->index_text_without_positions(text, 1, "BO");
220
processPart(msg.get(), 0);
224
void EmailIndexer::processPart(KMime::Content* content, KMime::Content* mainContent)
226
if (content == mainContent) {
230
KMime::Headers::ContentType* type = content->contentType(false);
232
if (type->isMultipart()) {
233
if (type->isSubtype("encrypted"))
236
Q_FOREACH (KMime::Content* c, content->contents()) {
237
processPart(c, mainContent);
241
// Only get HTML content, if no plain text content
242
if (!mainContent && type->isHTMLText()) {
244
doc.setHtml(content->decodedText());
246
const std::string text(doc.toPlainText().toUtf8().constData());
247
m_termGen->index_text_without_positions(text);
251
// FIXME: Handle attachments?
254
void EmailIndexer::processMessageStatus(const Akonadi::MessageStatus& status)
256
insertBool('R', status.isRead());
257
insertBool('A', status.hasAttachment());
258
insertBool('I', status.isImportant());
259
insertBool('W', status.isWatched());
260
insertBool('T', status.isToAct());
261
insertBool('D', status.isDeleted());
262
insertBool('S', status.isSpam());
263
insertBool('E', status.isReplied());
264
insertBool('G', status.isIgnored());
265
insertBool('F', status.isForwarded());
266
insertBool('N', status.isSent());
267
insertBool('Q', status.isQueued());
268
insertBool('H', status.isHam());
269
insertBool('C', status.isEncrypted());
270
insertBool('V', status.hasInvitation());
273
void EmailIndexer::insertBool(char key, bool value)
275
QByteArray term("B");
284
m_doc->add_boolean_term(term.data());
287
void EmailIndexer::toggleFlag(Xapian::Document& doc, const char* remove, const char* add)
290
doc.remove_term(remove);
292
catch (const Xapian::InvalidArgumentError &e) {
293
// The previous flag state was not indexed, continue
300
void EmailIndexer::updateFlags(const Akonadi::Item& item,
301
const QSet<QByteArray>& added,
302
const QSet<QByteArray>& removed)
304
Xapian::Document doc;
306
doc = m_db->get_document(item.id());
308
catch (const Xapian::DocNotFoundError&) {
312
Q_FOREACH (const QByteArray& flag, removed) {
313
if (flag == Akonadi::MessageFlags::Seen) {
314
toggleFlag(doc, "BR", "BNR");
316
else if (flag == Akonadi::MessageFlags::Flagged) {
317
toggleFlag(doc, "BI", "BNI");
319
else if (flag == Akonadi::MessageFlags::Watched) {
320
toggleFlag(doc, "BW", "BNW");
324
Q_FOREACH (const QByteArray& flag, added) {
325
if (flag == Akonadi::MessageFlags::Seen) {
326
toggleFlag(doc, "BNR", "BR");
328
else if (flag == Akonadi::MessageFlags::Flagged) {
329
toggleFlag(doc, "BNI", "BI");
331
else if (flag == Akonadi::MessageFlags::Watched) {
332
toggleFlag(doc, "BNW", "BW");
336
m_db->replace_document(doc.get_docid(), doc);
339
void EmailIndexer::remove(const Akonadi::Item& item)
342
m_db->delete_document(item.id());
343
//TODO remove contacts from contact db?
345
catch (const Xapian::DocNotFoundError&) {
350
void EmailIndexer::remove(const Akonadi::Collection& collection)
353
Xapian::Query query('C'+ QString::number(collection.id()).toStdString());
354
Xapian::Enquire enquire(*m_db);
355
enquire.set_query(query);
357
Xapian::MSet mset = enquire.get_mset(0, m_db->get_doccount());
358
Xapian::MSetIterator end = mset.end();
359
for (Xapian::MSetIterator it = mset.begin(); it != end; ++it) {
360
const qint64 id = *it;
361
remove(Akonadi::Item(id));
364
catch (const Xapian::DocNotFoundError&) {
369
void EmailIndexer::move(const Akonadi::Item::Id& itemId,
370
const Akonadi::Entity::Id& from,
371
const Akonadi::Entity::Id& to)
373
Xapian::Document doc;
375
doc = m_db->get_document(itemId);
377
catch (const Xapian::DocNotFoundError&) {
381
const QByteArray ft = 'C' + QByteArray::number(from);
382
const QByteArray tt = 'C' + QByteArray::number(to);
384
doc.remove_term(ft.data());
385
doc.add_boolean_term(tt.data());
386
m_db->replace_document(doc.get_docid(), doc);
390
void EmailIndexer::commit()
393
m_contactDb->commit();