1
/* This file is part of Strigi Desktop Search
3
* Copyright (C) 2006 Jos van den Oever <jos@vandenoever.info>
5
* This library is free software; you can redistribute it and/or
6
* modify it under the terms of the GNU Library General Public
7
* License as published by the Free Software Foundation; either
8
* version 2 of the License, or (at your option) any later version.
10
* This library is distributed in the hope that it will be useful,
11
* but WITHOUT ANY WARRANTY; without even the implied warranty of
12
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13
* Library General Public License for more details.
15
* You should have received a copy of the GNU Library General Public License
16
* along with this library; see the file COPYING.LIB. If not, write to
17
* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
18
* Boston, MA 02110-1301, USA.
21
#include "zipinputstream.h"
22
#include <strigi/strigiconfig.h>
23
#include "gzipinputstream.h"
24
#include "subinputstream.h"
27
#include "textutils.h"
31
using namespace Strigi;
34
ZipInputStream::checkHeader(const char* data, int32_t datasize) {
35
static const char magic[] = {0x50, 0x4b, 0x03, 0x04};
36
if (datasize < 4) return false;
37
bool ok = std::memcmp(data, magic, 4) == 0 && datasize > 8;
40
ZipInputStream::ZipInputStream(InputStream* input)
41
: SubStreamProvider(input) {
42
compressedEntryStream = 0;
43
uncompressionStream = 0;
45
ZipInputStream::~ZipInputStream() {
46
if (compressedEntryStream) {
47
delete compressedEntryStream;
49
if (uncompressionStream) {
50
delete uncompressionStream;
54
ZipInputStream::nextEntry() {
55
if (m_status) return 0;
56
// clean up the last stream(s)
58
// if this entry is a compressed entry of know size, we can skip to
59
// the end by skipping in the compressed stream, without decompressing
60
if (compressedEntryStream) {
61
compressedEntryStream->skip(compressedEntryStream->size());
62
delete compressedEntryStream;
63
compressedEntryStream = 0;
64
delete uncompressionStream;
65
uncompressionStream = 0;
67
// check for a potential signature and skip it if it is there
69
int64_t p = m_input->position();
70
int32_t n = m_input->read(c, 16, 16);
72
n = readLittleEndianUInt32((const unsigned char*)c);
73
if (n != 0x08074b50) {
78
int64_t size = m_entrystream->size();
82
while (m_entrystream->status() == Ok) {
83
m_entrystream->skip(size);
85
if (m_entryinfo.size < 0) {
86
// skip the data descriptor that occurs after the data
88
int32_t n = m_input->read(c, 4, 4);
90
n = readLittleEndianUInt32((const unsigned char*)c);
91
if (n == 0x08074b50) { // sometimes this signature appears
92
n = m_input->read(c, 12, 12);
95
n = m_input->read(c, 8, 8);
101
m_error = "No valid data descriptor after entry data.";
106
delete m_entrystream;
109
// are we at the end of the zip file?
110
if (m_input->status() == Eof) {
115
if (m_status) return 0;
116
if (compressionMethod == 8) {
117
if (m_entryinfo.size >= 0) {
118
compressedEntryStream
119
= new SubInputStream(m_input, entryCompressedSize);
120
if (uncompressionStream) {
121
delete uncompressionStream;
123
uncompressionStream = new GZipInputStream(compressedEntryStream,
124
GZipInputStream::ZIPFORMAT);
126
= new SubInputStream(uncompressionStream, m_entryinfo.size);
128
m_entrystream = new GZipInputStream(m_input,
129
GZipInputStream::ZIPFORMAT);
132
m_entrystream = new SubInputStream(m_input, m_entryinfo.size);
134
return m_entrystream;
137
ZipInputStream::readHeader() {
138
const unsigned char *hb;
143
// read the first 30 characters
145
nread = m_input->read(b, toread, toread);
146
if (nread != toread) {
147
m_error = "Error reading zip header: ";
149
m_error += m_input->error();
151
m_error += " premature end of file.";
154
fprintf(stderr, "%s\n", m_error.c_str());
157
hb = (const unsigned char*)b;
158
// check the signature
159
// check the first half of the signature
160
if (hb[0] != 0x50 || hb[1] != 0x4b) {
161
// signature is invalid
163
m_error = "Error: wrong zip signature.";
166
// check the second half of the signature
167
if (hb[2] != 0x03 || hb[3] != 0x04) {
168
// this may be the start of the central file header
169
if (hb[2] != 0x01 || hb[3] != 0x02) {
170
fprintf(stderr, "This code in a zip file is strange: %x %x %x %x\n",
171
hb[0], hb[1], hb[2], hb[3]);
176
// read 2 bytes into the filename size
177
int32_t filenamelen = readLittleEndianUInt16(hb + 26);
178
int64_t extralen = readLittleEndianUInt16(hb + 28);
179
// read 4 bytes into the length of the uncompressed size
180
m_entryinfo.size = readLittleEndianUInt32(hb + 22);
181
// read 4 bytes into the length of the compressed size
182
entryCompressedSize = readLittleEndianUInt32(hb + 18);
183
if (entryCompressedSize < 0) {
185
m_error = "Corrupt zip file with negative compressed size.";
188
compressionMethod = readLittleEndianUInt16(hb + 8);
189
int32_t generalBitFlags = readLittleEndianUInt16(hb+6);
190
if (generalBitFlags & 8) { // is bit 3 set?
191
// ohoh, the file size and compressed file size are unknown at this
193
// if the file is compressed with method 8 we rely on the decompression
194
// stream to signal the end of the stream properly
195
if (compressionMethod != 8) {
197
m_error = "This particular zip file format is not supported for "
198
"reading as a stream.";
201
m_entryinfo.size = -1;
202
entryCompressedSize = -1;
204
unsigned long dost = readLittleEndianUInt32(hb+10);
205
m_entryinfo.mtime = dos2unixtime(dost);
207
readFileName(filenamelen);
210
m_error = "Error reading file name: ";
211
m_error += m_input->error();
214
// read 2 bytes into the length of the extra field
215
int64_t skipped = m_input->skip(extralen);
216
if (skipped != extralen) {
218
// printf("skipped %li extralen %li position: %li size: %li\n", skipped, extralen, m_input->position(), m_input->size());
219
m_error = "Error skipping extra field: ";
220
m_error += m_input->error();
225
ZipInputStream::readFileName(int32_t len) {
226
m_entryinfo.filename.resize(0);
228
int32_t nread = m_input->read(begin, len, len);
230
m_error = "Error reading filename: ";
232
m_error += m_input->error();
234
m_error += " premature end of file.";
238
m_entryinfo.filename.assign(begin, nread);
240
// temporary hack for determining if this is a directory:
241
// does the filename end in '/'?
242
len = (int32_t)m_entryinfo.filename.length();
243
if (m_entryinfo.filename[len-1] == '/') {
244
m_entryinfo.filename.resize(len-1);
245
m_entryinfo.type = EntryInfo::Dir;
247
m_entryinfo.type = EntryInfo::File;