1
/* This file is part of Strigi Desktop Search
3
* Copyright (C) 2006 Jos van den Oever <jos@vandenoever.info>
4
* Copyright (C) 2007 Arend van Beelen jr. <arend@auton.nl>
5
* 2009 Evgeny Egorochkin <phreedom.stdin@gmail.com>
7
* This library is free software; you can redistribute it and/or
8
* modify it under the terms of the GNU Library General Public
9
* License as published by the Free Software Foundation; either
10
* version 2 of the License, or (at your option) any later version.
12
* This library is distributed in the hope that it will be useful,
13
* but WITHOUT ANY WARRANTY; without even the implied warranty of
14
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15
* Library General Public License for more details.
17
* You should have received a copy of the GNU Library General Public License
18
* along with this library; see the file COPYING.LIB. If not, write to
19
* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20
* Boston, MA 02110-1301, USA.
23
#include "odfmetahelperanalyzer.h"
24
#include <strigi/analysisresult.h>
30
#include <strigi/analysisresult.h>
31
#include <strigi/fieldtypes.h>
32
#include "../rdfnamespaces.h"
34
using namespace Strigi;
37
static const char *dcNS = "http://purl.org/dc/elements/1.1/";
38
static const char *metaNS = "urn:oasis:names:tc:opendocument:xmlns:meta:1.0";
39
static const char *opfNS = "http://www.idpf.org/2007/opf";
40
//static const char *officeNS = "urn:oasis:names:tc:opendocument:xmlns:office:1.0";
41
//static const char *svgNS = "urn:oasis:names:tc:opendocument:xmlns:svg-compatible:1.0";
42
//static const char *textNS = "urn:oasis:names:tc:opendocument:xmlns:text:1.0";
49
creationTimePropertyName(
50
NIE "contentCreated"),
57
descriptionPropertyName(
63
generatorPropertyName(
66
wordCountPropertyName(
68
pageCountPropertyName(
70
characterCountPropertyName(
71
NFO "characterCount"),
78
/*OdfMetaHelperAnalyzer::registerFields(FieldRegister ®) {
79
tablecountField = reg.registerField("http://strigi.sf.net/ontologies/homeless#documentTableCount");
80
paragcountField = reg.registerField("http://freedesktop.org/standards/xesam/1.0/core#paragraphCount");
81
objectcountField = reg.registerField("http://strigi.sf.net/ontologies/homeless#documentObjectCount");
82
imagecountField = reg.registerField("http://strigi.sf.net/ontologies/homeless#documentImageCount");
85
void OdfMetaHelperAnalyzer::startAnalysis(AnalysisResult &r) {
87
m_currentField = NULL;
90
void OdfMetaHelperAnalyzer::endAnalysis(bool /*complete*/) {
92
m_currentField = NULL;
95
void OdfMetaHelperAnalyzer::startElement(const char *localname, const char *prefix,
96
const char *uri, int nb_namespaces, const char **namespaces,
97
int nb_attributes, int nb_defaulted, const char **attributes) {
100
if(uri && strcmp(uri, dcNS) == 0) {
101
if(strcmp(localname, "creator") == 0) {
102
m_currentField = &creatorPropertyName;
103
} else if(strcmp(localname, "title") == 0) {
104
m_currentField = &titlePropertyName;
105
} else if(strcmp(localname, "subject") == 0) {
106
m_currentField = &subjectPropertyName;
107
} else if(strcmp(localname, "description") == 0) {
108
m_currentField = &descriptionPropertyName;
109
} else if(strcmp(localname, "language") == 0) {
110
m_currentField = &languagePropertyName;
111
// This element never appears in ODF files. It is defined in OPF specification
112
// (see http://www.openebook.org/2007/opf/OPF_2.0_final_spec.html#Section2.2.7 ) used
113
// in ePub electronic book format. It is here only because metadata format is almost
114
// identical to one used in ODF, so it makes more sense to reuse ODF analyzer for ePub too than
115
// to duplicate whole thing just to add support for one additional field
116
} else if((strcmp(localname, "date") == 0) && nb_attributes == 1 &&
117
(strcmp(attributes[0], "event") == 0) && attributes[2] != 0 &&
118
(strcmp(attributes[2], opfNS) == 0) &&
119
(strncmp(attributes[3], "creation", attributes[4] - attributes[3]) == 0 ) ) {
120
m_currentField = &creationTimePropertyName;
122
} else if(uri && strcmp(uri, metaNS) == 0) {
123
if(strcmp(localname, "creation-date") == 0) {
124
m_currentField = &creationTimePropertyName;
125
} else if(strcmp(localname, "keyword") == 0) {
126
m_currentField = &keywordPropertyName;
127
} else if(strcmp(localname, "generator") == 0) {
128
m_currentField = &generatorPropertyName;
129
} else if(strcmp(localname, "document-statistic")==0) {
130
for(int i = 0 ; i < nb_attributes ;i++)
132
if(strcmp(attributes[2+i*5], metaNS) ==0) {
133
const char *attrName(attributes[0+i*5]);
134
int stringLength = strlen(attributes[3+i*5]) - strlen(attributes[4+i*5]);
135
string line(attributes[3+i*5],stringLength);
137
if(strcmp(attrName, "word-count") ==0 ){
138
result->addTriplet(result->path(), wordCountPropertyName, line);
140
else if(strcmp(attrName, "paragraph-count") ==0 ){
141
//FIXME: either get rid of this or replace with NIE equivalent
142
//result->addValue(m_factory->paragcountField,line);
144
else if(strcmp(attrName, "page-count") ==0 ){
145
result->addTriplet(result->path(), pageCountPropertyName,line);
147
else if(strcmp(attrName, "image-count") ==0 ){
148
//FIXME: either get rid of this or replace with NIE equivalent
149
//result->addValue(m_factory->imagecountField,line);
151
else if(strcmp(attrName, "character-count") ==0 ){
152
result->addTriplet(result->path(), characterCountPropertyName,line);
154
else if(strcmp(attrName, "object-count") ==0 ){
155
//FIXME: either get rid of this or replace with NIE equivalent
156
//result->addValue(m_factory->objectcountField,line);
158
else if(strcmp(attrName, "table-count") ==0 ){
159
//FIXME: either get rid of this or replace with NIE equivalent
160
//result->addValue(m_factory->tablecountField,line);
168
void OdfMetaHelperAnalyzer::endElement(const char *localname, const char *prefix,
170
m_currentField = NULL;
173
void OdfMetaHelperAnalyzer::characters(const char *data, uint32_t length) {
176
if(m_currentField != NULL) {
177
if(m_currentField == &creatorPropertyName) {
178
string creatorUri = result->newAnonymousUri();
179
result->addTriplet(result->path(), *m_currentField, creatorUri);
180
result->addTriplet(creatorUri, typePropertyName, contactClassName);
181
result->addTriplet(creatorUri, fullnamePropertyName, string(data,length));
183
result->addTriplet(result->path(), *m_currentField, string(data, length));