1
/* This file is part of Strigi Desktop Search
3
* Copyright (C) 2006 Jos van den Oever <jos@vandenoever.info>
5
* This library is free software; you can redistribute it and/or
6
* modify it under the terms of the GNU Library General Public
7
* License as published by the Free Software Foundation; either
8
* version 2 of the License, or (at your option) any later version.
10
* This library is distributed in the hope that it will be useful,
11
* but WITHOUT ANY WARRANTY; without even the implied warranty of
12
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13
* Library General Public License for more details.
15
* You should have received a copy of the GNU Library General Public License
16
* along with this library; see the file COPYING.LIB. If not, write to
17
* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
18
* Boston, MA 02110-1301, USA.
20
#include "mailendanalyzer.h"
21
#include <strigi/strigiconfig.h>
22
#include "mailinputstream.h"
23
#include "encodinginputstream.h"
24
#include "analysisresult.h"
25
#include "textendanalyzer.h"
26
#include "fieldtypes.h"
28
using namespace Strigi;
31
#define NMO_PROPOSAL "http://www.semanticdesktop.org/ontologies/nmo#"
35
"http://www.semanticdesktop.org/ontologies/2007/01/19/nie#subject"),
37
"http://www.semanticdesktop.org/ontologies/2007/03/22/nmo#from"),
39
"http://www.semanticdesktop.org/ontologies/2007/03/22/nmo#to"),
41
"http://www.semanticdesktop.org/ontologies/2007/03/22/nmo#cc"),
43
"http://www.semanticdesktop.org/ontologies/2007/03/22/nmo#bcc"),
45
"http://www.semanticdesktop.org/ontologies/2007/03/22/nmo#messageId"),
47
"http://www.semanticdesktop.org/ontologies/2007/03/22/nmo#references"),
48
emailInReplyToFieldName(
49
"http://www.semanticdesktop.org/ontologies/2007/03/22/nmo#inReplyTo"),
52
"http://www.w3.org/1999/02/22-rdf-syntax-ns#type"),
54
"http://www.semanticdesktop.org/ontologies/2007/03/22/nco#fullname"),
55
hasEmailAddressFieldName(
56
"http://www.semanticdesktop.org/ontologies/2007/03/22/nco#hasEmailAddress"),
57
emailAddressFieldName(
58
"http://www.semanticdesktop.org/ontologies/2007/03/22/nco#emailAddress"),
61
"http://www.semanticdesktop.org/ontologies/2007/03/22/nmo#Email"),
63
"http://www.semanticdesktop.org/ontologies/2007/03/22/nco#Contact"),
64
emailAddressClassName(
65
"http://www.semanticdesktop.org/ontologies/2007/03/22/nco#EmailAddress"),
67
NMO_PROPOSAL "MimePart");
72
MailEndAnalyzerFactory::registerFields(FieldRegister& r) {
73
titleField = r.registerField(titleFieldName);
74
contenttypeField = r.mimetypeField;
75
fromField = r.registerField(fromFieldName);
76
toField = r.registerField(toFieldName);
77
ccField = r.registerField(ccFieldName);
78
bccField = r.registerField(bccFieldName);
79
contentidField = r.registerField(contentidFieldName);
80
contentlinkField = r.registerField(contentlinkFieldName);
81
emailInReplyToField = r.registerField(emailInReplyToFieldName);
82
typeField = r.typeField;
85
addField(contenttypeField);
90
addField(contentidField);
91
addField(contentlinkField);
92
addField(emailInReplyToField);
97
MailEndAnalyzer::checkHeader(const char* header, int32_t headersize) const {
98
return MailInputStream::checkHeader(header, headersize);
101
charset(const string& contenttype) {
102
const char* s = strstr(contenttype.c_str(), "charset");
105
if (c == '\'' || c == '"') {
107
const char* e = strchr(s, c);
109
return string(s, e-s);
116
splitAddress(const string& addr, string& name, string& email) {
118
if( (p = addr.find("<"))!= string::npos ) {
119
name = addr.substr(0, p);
120
email = addr.substr(p+1, addr.rfind(">") -p -1);
127
processAddress(Strigi::AnalysisResult& idx, const string& address) {
128
string uri(idx.newAnonymousUri());
132
splitAddress(address, name, email);
133
cmUri = "mailto:" + email;
135
idx.addTriplet(uri, typeFieldName, contactClassName);
137
idx.addTriplet(uri, fullnameFieldName, name);
138
idx.addTriplet(uri, hasEmailAddressFieldName, cmUri);
139
idx.addTriplet(cmUri, typeFieldName, emailAddressClassName);
140
idx.addTriplet(cmUri, emailAddressFieldName, email);
145
MailEndAnalyzer::analyze(AnalysisResult& idx, InputStream* in) {
149
MailInputStream mail(in);
150
InputStream *s = mail.nextEntry();
151
if (mail.status() == Error) {
152
m_error = mail.error();
155
string enc(charset(mail.contentType()));
157
idx.setEncoding(enc.c_str());
159
idx.addValue(factory->typeField, emailClassName);
160
idx.addValue(factory->titleField, mail.subject());
161
idx.addValue(factory->contenttypeField, mail.contentType());
163
idx.addValue(factory->fromField, processAddress(idx, mail.from()) );
165
idx.addValue(factory->toField, processAddress(idx, mail.to()) );
166
if (mail.cc().length() > 0) idx.addValue(factory->ccField, processAddress(idx, mail.cc()) );
167
if (mail.bcc().length() > 0) idx.addValue(factory->bccField, processAddress(idx, mail.bcc()) );
168
if (mail.messageid().length() > 0)
169
idx.addValue(factory->contentidField, mail.messageid());
170
if (mail.inreplyto().length() > 0) {
171
string uri(idx.newAnonymousUri());
172
idx.addValue(factory->emailInReplyToField, uri);
173
idx.addTriplet(uri, typeFieldName, emailClassName);
174
idx.addTriplet(uri, contentidFieldName, mail.inreplyto());
176
if (mail.references().length() > 0) {
177
string uri(idx.newAnonymousUri());
178
idx.addValue(factory->contentlinkField, uri);
179
idx.addTriplet(uri, typeFieldName, emailClassName);
180
idx.addTriplet(uri, contentidFieldName, mail.references());
185
EncodingInputStream eis(s, enc.c_str());
186
tea.analyze(idx, &eis);
191
s = mail.nextEntry();
195
if (mail.entryInfo().filename.length() == 0) {
196
file = (char)(n+'1');
198
file = mail.entryInfo().filename;
200
// maybe use the date of sending the mail here
201
idx.indexChild(file, idx.mTime(), s);
203
idx.child()->addValue(factory->typeField, mimePartClassName);
206
s = mail.nextEntry();
209
if (mail.status() == Error) {
210
m_error = mail.error();