~ubuntu-branches/ubuntu/oneiric/strigi/oneiric

« back to all changes in this revision

Viewing changes to src/streamanalyzer/endanalyzers/mailendanalyzer.cpp

  • Committer: Package Import Robot
  • Author(s): Felix Geyer
  • Date: 2011-09-24 17:12:15 UTC
  • mfrom: (1.2.6 upstream)
  • mto: This revision was merged to the branch mainline in revision 44.
  • Revision ID: package-import@ubuntu.com-20110924171215-zmbi1f77jntvz65h
Tags: upstream-0.7.6
ImportĀ upstreamĀ versionĀ 0.7.6

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
/* This file is part of Strigi Desktop Search
2
 
 *
3
 
 * Copyright (C) 2006 Jos van den Oever <jos@vandenoever.info>
4
 
 *
5
 
 * This library is free software; you can redistribute it and/or
6
 
 * modify it under the terms of the GNU Library General Public
7
 
 * License as published by the Free Software Foundation; either
8
 
 * version 2 of the License, or (at your option) any later version.
9
 
 *
10
 
 * This library is distributed in the hope that it will be useful,
11
 
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13
 
 * Library General Public License for more details.
14
 
 *
15
 
 * You should have received a copy of the GNU Library General Public License
16
 
 * along with this library; see the file COPYING.LIB.  If not, write to
17
 
 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
18
 
 * Boston, MA 02110-1301, USA.
19
 
 */
20
 
#include "mailendanalyzer.h"
21
 
#include <strigi/strigiconfig.h>
22
 
#include "mailinputstream.h"
23
 
#include "encodinginputstream.h"
24
 
#include "analysisresult.h"
25
 
#include "textendanalyzer.h"
26
 
#include "fieldtypes.h"
27
 
#include <iostream>
28
 
using namespace Strigi;
29
 
using namespace std;
30
 
 
31
 
#define NMO_PROPOSAL "http://www.semanticdesktop.org/ontologies/nmo#"
32
 
 
33
 
const string
34
 
    titleFieldName(
35
 
        "http://www.semanticdesktop.org/ontologies/2007/01/19/nie#subject"),
36
 
    fromFieldName(
37
 
        "http://www.semanticdesktop.org/ontologies/2007/03/22/nmo#from"),
38
 
    toFieldName(
39
 
        "http://www.semanticdesktop.org/ontologies/2007/03/22/nmo#to"),
40
 
    ccFieldName(
41
 
        "http://www.semanticdesktop.org/ontologies/2007/03/22/nmo#cc"),
42
 
    bccFieldName(
43
 
        "http://www.semanticdesktop.org/ontologies/2007/03/22/nmo#bcc"),
44
 
    contentidFieldName(
45
 
        "http://www.semanticdesktop.org/ontologies/2007/03/22/nmo#messageId"),
46
 
    contentlinkFieldName(
47
 
        "http://www.semanticdesktop.org/ontologies/2007/03/22/nmo#references"),
48
 
    emailInReplyToFieldName(
49
 
        "http://www.semanticdesktop.org/ontologies/2007/03/22/nmo#inReplyTo"),
50
 
 
51
 
    typeFieldName(
52
 
        "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"),
53
 
    fullnameFieldName(
54
 
        "http://www.semanticdesktop.org/ontologies/2007/03/22/nco#fullname"),
55
 
    hasEmailAddressFieldName(
56
 
        "http://www.semanticdesktop.org/ontologies/2007/03/22/nco#hasEmailAddress"),
57
 
    emailAddressFieldName(
58
 
        "http://www.semanticdesktop.org/ontologies/2007/03/22/nco#emailAddress"),
59
 
 
60
 
    emailClassName(
61
 
        "http://www.semanticdesktop.org/ontologies/2007/03/22/nmo#Email"),
62
 
    contactClassName(
63
 
        "http://www.semanticdesktop.org/ontologies/2007/03/22/nco#Contact"),
64
 
    emailAddressClassName(
65
 
        "http://www.semanticdesktop.org/ontologies/2007/03/22/nco#EmailAddress"),
66
 
    mimePartClassName(
67
 
        NMO_PROPOSAL "MimePart");
68
 
 
69
 
#undef NMO_PROPOSAL
70
 
 
71
 
void
72
 
MailEndAnalyzerFactory::registerFields(FieldRegister& r) {
73
 
    titleField = r.registerField(titleFieldName);
74
 
    contenttypeField = r.mimetypeField;
75
 
    fromField = r.registerField(fromFieldName);
76
 
    toField = r.registerField(toFieldName);
77
 
    ccField = r.registerField(ccFieldName);
78
 
    bccField = r.registerField(bccFieldName);
79
 
    contentidField = r.registerField(contentidFieldName);
80
 
    contentlinkField = r.registerField(contentlinkFieldName);
81
 
    emailInReplyToField = r.registerField(emailInReplyToFieldName);
82
 
    typeField = r.typeField;
83
 
 
84
 
    addField(titleField);
85
 
    addField(contenttypeField);
86
 
    addField(fromField);
87
 
    addField(toField);
88
 
    addField(ccField);
89
 
    addField(bccField);
90
 
    addField(contentidField);
91
 
    addField(contentlinkField);
92
 
    addField(emailInReplyToField);
93
 
    addField(typeField);
94
 
}
95
 
 
96
 
bool
97
 
MailEndAnalyzer::checkHeader(const char* header, int32_t headersize) const {
98
 
    return MailInputStream::checkHeader(header, headersize);
99
 
}
100
 
string
101
 
charset(const string& contenttype) {
102
 
    const char* s = strstr(contenttype.c_str(), "charset");
103
 
    if (s) {
104
 
        char c = s[8];
105
 
        if (c == '\'' || c == '"') {
106
 
            s += 9;
107
 
            const char* e = strchr(s, c);
108
 
            if (e) {
109
 
                return string(s, e-s);
110
 
            }
111
 
        }
112
 
    }
113
 
    return string();
114
 
}
115
 
void
116
 
splitAddress(const string& addr, string& name, string& email) {
117
 
  size_t p;
118
 
  if( (p = addr.find("<"))!= string::npos ) {
119
 
    name = addr.substr(0, p);
120
 
    email = addr.substr(p+1, addr.rfind(">") -p -1);
121
 
  } else {
122
 
    name = "";
123
 
    email = addr;
124
 
  }
125
 
}
126
 
string 
127
 
processAddress(Strigi::AnalysisResult& idx, const string& address) {
128
 
    string uri(idx.newAnonymousUri());
129
 
    string cmUri;
130
 
    string name, email;
131
 
 
132
 
    splitAddress(address, name, email);
133
 
    cmUri = "mailto:" + email;
134
 
 
135
 
    idx.addTriplet(uri, typeFieldName, contactClassName);
136
 
    if (name.size())
137
 
        idx.addTriplet(uri, fullnameFieldName, name);
138
 
    idx.addTriplet(uri, hasEmailAddressFieldName, cmUri);
139
 
    idx.addTriplet(cmUri, typeFieldName, emailAddressClassName);
140
 
    idx.addTriplet(cmUri, emailAddressFieldName, email);
141
 
    
142
 
    return uri;
143
 
}
144
 
signed char
145
 
MailEndAnalyzer::analyze(AnalysisResult& idx, InputStream* in) {
146
 
    if(!in)
147
 
        return -1;
148
 
 
149
 
    MailInputStream mail(in);
150
 
    InputStream *s = mail.nextEntry();
151
 
    if (mail.status() == Error) {
152
 
        m_error = mail.error();
153
 
        return -1;
154
 
    }
155
 
    string enc(charset(mail.contentType()));
156
 
    if (enc.length()) {
157
 
        idx.setEncoding(enc.c_str());
158
 
    }
159
 
    idx.addValue(factory->typeField, emailClassName);
160
 
    idx.addValue(factory->titleField, mail.subject());
161
 
    idx.addValue(factory->contenttypeField, mail.contentType());
162
 
    
163
 
    idx.addValue(factory->fromField, processAddress(idx, mail.from()) );
164
 
    
165
 
    idx.addValue(factory->toField, processAddress(idx, mail.to()) );
166
 
    if (mail.cc().length() > 0) idx.addValue(factory->ccField, processAddress(idx, mail.cc()) );
167
 
    if (mail.bcc().length() > 0) idx.addValue(factory->bccField, processAddress(idx, mail.bcc()) );
168
 
    if (mail.messageid().length() > 0)
169
 
        idx.addValue(factory->contentidField, mail.messageid());
170
 
    if (mail.inreplyto().length() > 0) {
171
 
        string uri(idx.newAnonymousUri());
172
 
        idx.addValue(factory->emailInReplyToField, uri);
173
 
        idx.addTriplet(uri, typeFieldName, emailClassName);
174
 
        idx.addTriplet(uri, contentidFieldName, mail.inreplyto());
175
 
    }
176
 
    if (mail.references().length() > 0) {
177
 
        string uri(idx.newAnonymousUri());
178
 
        idx.addValue(factory->contentlinkField, uri);
179
 
        idx.addTriplet(uri, typeFieldName, emailClassName);
180
 
        idx.addTriplet(uri, contentidFieldName, mail.references());
181
 
    }
182
 
    if (s != 0) {
183
 
        TextEndAnalyzer tea;
184
 
        if (enc.length()) {
185
 
            EncodingInputStream eis(s, enc.c_str());
186
 
            tea.analyze(idx, &eis);
187
 
        } else {
188
 
            tea.analyze(idx, s);
189
 
        }
190
 
    }
191
 
    s = mail.nextEntry();
192
 
    int n = 1;
193
 
    while (s) {
194
 
        std::string file;
195
 
        if (mail.entryInfo().filename.length() == 0) {
196
 
            file = (char)(n+'1');
197
 
        } else {
198
 
            file = mail.entryInfo().filename;
199
 
        }
200
 
        // maybe use the date of sending the mail here
201
 
        idx.indexChild(file, idx.mTime(), s);
202
 
        if (idx.child()) {
203
 
            idx.child()->addValue(factory->typeField, mimePartClassName);
204
 
        }
205
 
 
206
 
        s = mail.nextEntry();
207
 
        n++;
208
 
    }
209
 
    if (mail.status() == Error) {
210
 
        m_error = mail.error();
211
 
        return -1;
212
 
    } else {
213
 
        m_error.resize(0);
214
 
    }
215
 
    return 0;
216
 
}
217