1
/* This file is part of Strigi Desktop Search
3
* Copyright (C) 2006 Jos van den Oever <jos@vandenoever.info>
5
* This library is free software; you can redistribute it and/or
6
* modify it under the terms of the GNU Library General Public
7
* License as published by the Free Software Foundation; either
8
* version 2 of the License, or (at your option) any later version.
10
* This library is distributed in the hope that it will be useful,
11
* but WITHOUT ANY WARRANTY; without even the implied warranty of
12
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13
* Library General Public License for more details.
15
* You should have received a copy of the GNU Library General Public License
16
* along with this library; see the file COPYING.LIB. If not, write to
17
* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
18
* Boston, MA 02110-1301, USA.
21
#include <strigi/zipinputstream.h>
22
#include <strigi/strigiconfig.h>
23
#include <strigi/gzipinputstream.h>
24
#include <strigi/subinputstream.h>
27
#include <strigi/textutils.h>
31
using namespace Strigi;
34
ZipInputStream::checkHeader(const char* data, int32_t datasize) {
35
static const char magic[] = {0x50, 0x4b, 0x03, 0x04};
36
if (datasize < 4) return false;
37
bool ok = std::memcmp(data, magic, 4) == 0 && datasize > 8;
40
ZipInputStream::ZipInputStream(InputStream* input)
41
: SubStreamProvider(input) {
42
compressedEntryStream = 0;
43
uncompressionStream = 0;
45
ZipInputStream::~ZipInputStream() {
46
if (compressedEntryStream) {
47
delete compressedEntryStream;
49
if (uncompressionStream) {
50
delete uncompressionStream;
54
ZipInputStream::nextEntry() {
55
if (m_status) return NULL;
56
// clean up the last stream(s)
58
// if this entry is a compressed entry of know size, we can skip to
59
// the end by skipping in the compressed stream, without decompressing
60
if (compressedEntryStream) {
61
compressedEntryStream->skip(compressedEntryStream->size());
62
delete compressedEntryStream;
63
compressedEntryStream = 0;
64
delete uncompressionStream;
65
uncompressionStream = 0;
67
// check for a potential signature and skip it if it is there
69
int64_t p = m_input->position();
70
int32_t n = m_input->read(c, 16, 16);
72
n = readLittleEndianUInt32((const unsigned char*)c);
73
if (n != 0x08074b50) {
78
int64_t size = m_entrystream->size();
82
while (m_entrystream->status() == Ok) {
83
m_entrystream->skip(size);
85
if (m_entryinfo.size < 0) {
86
// skip the data descriptor that occurs after the data
88
int32_t n = m_input->read(c, 4, 4);
90
n = readLittleEndianUInt32((const unsigned char*)c);
91
if (n == 0x08074b50) { // sometimes this signature appears
92
n = m_input->read(c, 12, 12);
95
n = m_input->read(c, 8, 8);
101
m_error = "No valid data descriptor after entry data.";
106
delete m_entrystream;
109
// are we at the end of the zip file?
110
if (m_input->status() == Eof) {
115
if (m_status != Ok) return NULL;
116
if (m_entryinfo.filename.length()<=0) {
118
m_error = "Archived file name is empty";
121
if (compressionMethod == 8) {
122
if (m_entryinfo.size >= 0) {
123
compressedEntryStream
124
= new SubInputStream(m_input, entryCompressedSize);
125
if (uncompressionStream) {
126
delete uncompressionStream;
128
uncompressionStream = new GZipInputStream(compressedEntryStream,
129
GZipInputStream::ZIPFORMAT);
131
= new SubInputStream(uncompressionStream, m_entryinfo.size);
133
m_entrystream = new GZipInputStream(m_input,
134
GZipInputStream::ZIPFORMAT);
137
m_entrystream = new SubInputStream(m_input, m_entryinfo.size);
139
return m_entrystream;
142
ZipInputStream::readHeader() {
143
const unsigned char *hb;
148
// read the first 30 characters
150
nread = m_input->read(b, toread, toread);
151
if (nread != toread) {
152
m_error = "Error reading zip header: ";
154
m_error += m_input->error();
156
m_error += " premature end of file.";
159
fprintf(stderr, "%s\n", m_error.c_str());
162
hb = (const unsigned char*)b;
163
// check the signature
164
// check the first half of the signature
165
if (hb[0] != 0x50 || hb[1] != 0x4b) {
166
// signature is invalid
168
m_error = "Error: wrong zip signature.";
171
// check the second half of the signature
172
if (hb[2] != 0x03 || hb[3] != 0x04) {
173
// this may be the start of the central file header
174
if (hb[2] != 0x01 || hb[3] != 0x02) {
175
fprintf(stderr, "This code in a zip file is strange: %x %x %x %x\n",
176
hb[0], hb[1], hb[2], hb[3]);
181
// read 2 bytes into the filename size
182
int32_t filenamelen = readLittleEndianUInt16(hb + 26);
183
int64_t extralen = readLittleEndianUInt16(hb + 28);
184
// read 4 bytes into the length of the uncompressed size
185
m_entryinfo.size = readLittleEndianUInt32(hb + 22);
186
// read 4 bytes into the length of the compressed size
187
entryCompressedSize = readLittleEndianUInt32(hb + 18);
188
if (entryCompressedSize < 0) {
190
m_error = "Corrupt zip file with negative compressed size.";
193
compressionMethod = readLittleEndianUInt16(hb + 8);
194
int32_t generalBitFlags = readLittleEndianUInt16(hb+6);
195
if (generalBitFlags & 8) { // is bit 3 set?
196
// ohoh, the file size and compressed file size are unknown at this
198
// if the file is compressed with method 8 we rely on the decompression
199
// stream to signal the end of the stream properly
200
if (compressionMethod != 8) {
202
m_error = "This particular zip file format is not supported for "
203
"reading as a stream.";
206
m_entryinfo.size = -1;
207
entryCompressedSize = -1;
209
unsigned long dost = readLittleEndianUInt32(hb+10);
210
m_entryinfo.mtime = dos2unixtime(dost);
212
readFileName(filenamelen);
215
m_error = "Error reading file name: ";
216
m_error += m_input->error();
219
// read 2 bytes into the length of the extra field
220
int64_t skipped = m_input->skip(extralen);
221
if (skipped != extralen) {
223
// printf("skipped %li extralen %li position: %li size: %li\n", skipped, extralen, m_input->position(), m_input->size());
224
m_error = "Error skipping extra field: ";
225
m_error += m_input->error();
230
ZipInputStream::readFileName(int32_t len) {
231
m_entryinfo.filename.resize(0);
233
int32_t nread = m_input->read(begin, len, len);
235
m_error = "Error reading filename: ";
237
m_error += m_input->error();
239
m_error += " premature end of file.";
243
m_entryinfo.filename.assign(begin, nread);
245
// temporary hack for determining if this is a directory:
246
// does the filename end in '/'?
247
len = (int32_t)m_entryinfo.filename.length();
248
if (m_entryinfo.filename[len-1] == '/') {
249
m_entryinfo.filename.resize(len-1);
250
m_entryinfo.type = EntryInfo::Dir;
252
m_entryinfo.type = EntryInfo::File;