1
/* This file is part of Strigi Desktop Search
3
* Copyright (C) 2006 Jos van den Oever <jos@vandenoever.info>
5
* This library is free software; you can redistribute it and/or
6
* modify it under the terms of the GNU Library General Public
7
* License as published by the Free Software Foundation; either
8
* version 2 of the License, or (at your option) any later version.
10
* This library is distributed in the hope that it will be useful,
11
* but WITHOUT ANY WARRANTY; without even the implied warranty of
12
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13
* Library General Public License for more details.
15
* You should have received a copy of the GNU Library General Public License
16
* along with this library; see the file COPYING.LIB. If not, write to
17
* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
18
* Boston, MA 02110-1301, USA.
20
#include <strigi/stringterminatedsubstream.h>
21
#include <strigi/strigiconfig.h>
22
#include <strigi/kmpsearcher.h>
27
using namespace Strigi;
29
// TODO add a mechanism that avoid searching for a stop point again after a
32
class StringTerminatedSubStream::Private {
34
KmpSearcher m_searcher;
35
const int64_t m_offset;
39
Private(InputStream* i, const std::string& terminator)
40
: m_offset(i->position()), furthest(0), m_input(i) {
41
m_searcher.setQuery(terminator);
45
StringTerminatedSubStream::StringTerminatedSubStream(InputStream* i,
46
const std::string& terminator) :p(new Private(i, terminator)) {
48
StringTerminatedSubStream::~StringTerminatedSubStream() {
52
StringTerminatedSubStream::offset() const {
56
StringTerminatedSubStream::read(const char*& start, int32_t min, int32_t max) {
57
if (m_status == Eof) return -1;
58
if (m_status == Error) return -2;
61
// check if we already read enough
62
const int64_t pos = p->m_input->position();
63
nread = (int32_t)(p->furthest - pos);
65
if (max <= 0 || max > nread) {
68
nread = p->m_input->read(start, min, max);
72
if (m_position == m_size) {
79
// convenience parameter
80
int32_t tl = p->m_searcher.queryLength();
82
// increase min and max to accommodate for the length of the terminator
90
if (tlmax > 0 && tlmax < tlmin) tlmax = tlmin;
92
nread = p->m_input->read(start, tlmin, tlmax);
99
m_error = p->m_input->error();
103
const char* end = p->m_searcher.search(start, nread);
105
// the end signature was found
106
nread = (int32_t)(end - start);
107
// signal the end of stream at the next call
109
// set input stream to point after the terminator
110
p->m_input->reset(pos + nread + tl);
111
} else if (nread >= tlmin) {
112
// we are not at or near the end and read the required amount
113
// reserve the last bit of buffer for rereading to match the terminator
116
p->furthest = pos + nread;
117
// we rewind, but the pointer 'start' will stay valid nontheless
118
p->m_input->reset(pos + nread);
119
} else if (max != 0 && nread > max) {
120
// we are near the end of the stream but cannot pass all data
121
// at once because the amount read is larger than the amount to pass
122
p->furthest = pos + nread;
123
p->m_input->reset(pos + max);
126
// we are at the end of the stream, so no need to rewind
127
// signal the end of stream at the next call
130
if (nread > 0) m_position += nread;
131
if (m_status == Eof) {
137
StringTerminatedSubStream::reset(int64_t newpos) {
138
m_position = p->m_input->reset(newpos + p->m_offset);
139
if (m_position >= p->m_offset) {
140
m_position -= p->m_offset;
141
if (m_position != m_size) m_status = Ok;
143
// the stream is not positioned at a valid m_position