1
/* This file is part of Strigi Desktop Search
3
* Copyright (C) 2007 Alexandr Goncearenco <neksa@neksa.net>
5
* This library is free software; you can redistribute it and/or
6
* modify it under the terms of the GNU Library General Public
7
* License as published by the Free Software Foundation; either
8
* version 2 of the License, or (at your option) any later version.
10
* This library is distributed in the hope that it will be useful,
11
* but WITHOUT ANY WARRANTY; without even the implied warranty of
12
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13
* Library General Public License for more details.
15
* You should have received a copy of the GNU Library General Public License
16
* along with this library; see the file COPYING.LIB. If not, write to
17
* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
18
* Boston, MA 02110-1301, USA.
20
#include <strigi/sdfinputstream.h>
21
#include <strigi/subinputstream.h>
27
using namespace Strigi;
29
const string SdfInputStream::delimiter("$$$$");
30
const string SdfInputStream::label("V2000");
32
SdfInputStream::SdfInputStream(InputStream* input)
33
: SubStreamProvider(input), substream(0), entrynumber(0),
34
previousStartOfDelimiter(0) {
35
m_searcher.setQuery(delimiter);
37
SdfInputStream::~SdfInputStream() {
38
if (substream && substream != m_entrystream) {
43
* Return a pointer that points past the current line. The line can end on
44
* either \r, \n, or \r\n.
45
* If the pointer would point past then end of the buffer or the line is longer
46
* than 80 bytes, NULL is returned.
49
skip80Line(const char* data, int32_t size) {
50
if (size <= 0) return 0;
51
// EOL can be at position 80
52
int max = min(size, 81);
54
for (i=0; i<max && data[i] != '\n' && data[i] != '\r'; ++i) {
56
if (i == max) return 0;
57
if (data[i] == '\n') return (i+1 < size) ?data+i+1 :0;
58
// data[i] is known to be '\r' by now
59
if (i+1 < size && data[i+1] != '\n') return data+i+1;
60
// data[i+1] is known to be '\n' by now
61
return (i+2 < size) ?data+i+2 :0;
64
SdfInputStream::checkHeader(const char* data, int32_t datasize) {
65
if (datasize <= 10) return false;
66
// the fourth line must contain the string "V2000"
67
// skip three lines of at most 80 bytes
68
const char* thisLine = 0;
69
const char* nextLine = data;
70
for (int i=0; i<4 && nextLine != 0; ++i) {
72
nextLine = skip80Line(thisLine, datasize - (int32_t)(thisLine - data));
74
static const KmpSearcher searcher(label);
76
nextLine = searcher.search(thisLine, (int32_t)(nextLine - thisLine));
81
SdfInputStream::nextEntry() {
82
if (m_status != Ok) return 0;
84
m_input->reset(previousStartOfDelimiter);
87
// read anything that's left over in the previous substream
91
while (substream->status() == Ok) {
92
substream->read(dummy, 1, 0);
94
if (substream->status() == Error) {
97
if (substream && substream != m_entrystream) {
101
delete m_entrystream;
104
m_input->reset(previousStartOfDelimiter);
106
// eat delimiter and following newlines
107
if (m_input->status() == Ok) {
108
nread = m_input->read(dummy, 4, 4);
109
if (nread == 4 && strncmp(dummy, delimiter.c_str(), 4) == 0) {
110
m_input->read(dummy, 1, 1);
111
while (m_input->status() == Ok && (
112
strncmp(dummy, "\n", 1) == 0
113
|| strncmp(dummy, "\r", 1) == 0)) {
114
m_input->read(dummy, 1, 1);
119
// make sure it is not a MOL
120
// we can not check it in checkHeader due to low header size limit
121
// There is only one way to destinguish between MOL and SD:
122
// MOL does not have $$$$ delimiter. Return no entries if it is a MOL.
126
const int64_t pos = m_input->position();
129
while (m_input->status() == Ok) {
130
nread = m_input->read(start, 1024, 0);
132
end = m_searcher.search(start, nread);
134
len = end - start + total;
140
if (m_input->status() == Error) {
149
// this stream is an SD
150
substream = new SubInputStream(m_input, len);
151
previousStartOfDelimiter = m_input->position() + len;
152
m_entryinfo.type = EntryInfo::File;
153
m_entryinfo.size = len;
155
m_entryinfo.filename.assign("Molecule");
159
m_entryinfo.filename.append(o.str());
161
m_entrystream = substream;
162
return m_entrystream;
164
// this stream is a MOL itself, not an SD