2
* NOTE: this is part of libzzipfseeko (i.e. it is not libzzip).
4
* These routines are fully independent from the traditional zzip
5
* implementation. They assume a readonly seekable stdio handle
6
* representing a complete zip file. The functions show how to
7
* parse the structure, find files and return a decoded bytestream.
9
* These routines are a bit simple and really here for documenting
10
* the way to access a zip file. The complexity of zip access comes
11
* from staggered reading of bytes and reposition of a filepointer in
12
* a big archive with lots of files and long compressed datastreams.
13
* Plus varaints of drop-in stdio replacements, obfuscation routines,
14
* auto fileextensions, drop-in dirent replacements, and so on...
16
* btw, we can _not_ use fgetpos/fsetpos since an fpos_t has no asserted
17
* relation to a linear seek value as specified in zip info headers. In
18
* general it is not a problem if your system has no fseeko/ftello pair
19
* since we can fallback to fseek/ftell which limits the zip disk size
20
* to 2MiBs but the zip-storable seek values are 32bit limited anyway.
23
* Guido Draheim <guidod@gmx.de>
25
* Copyright (c) 2003,2004 Guido Draheim
26
* All rights reserved,
27
* use under the restrictions of the
28
* Lesser GNU General Public License
29
* or alternatively the restrictions
30
* of the Mozilla Public License 1.1
33
#define _LARGEFILE_SOURCE 1
34
#define _ZZIP_ENTRY_STRUCT 1
36
#include <zzip/types.h>
42
#if defined ZZIP_HAVE_STRING_H
44
#elif defined ZZIP_HAVE_STRINGS_H
49
#include <zzip/format.h>
50
#include <zzip/fseeko.h>
51
#include <zzip/fetch.h>
52
#include <zzip/__mmap.h>
53
#include <zzip/__fnmatch.h>
55
#if __STDC_VERSION__+0 > 199900L
63
#ifndef ZZIP_HAVE_FSEEKO
68
/* note that the struct zzip_entry inherits the zzip_disk_entry values
69
* and usually carries a copy of its values (in disk format!). To make the
70
* following code more readable, we use a shorthand notation for the
71
* upcast needed in C (not needed in C++) as "disk_(entry)".
73
#ifdef __zzip_entry_extends_zzip_disk_entry
74
#define disk_(_entry_) _entry_
76
#define disk_(_entry_) (& (_entry_)->head)
79
/* we try to round all seeks to the pagesize - since we do not use
80
* the sys/mmap interface we have to guess a good value here: */
83
/* ====================================================================== */
84
/* helper functions */
86
/** => zzip_entry_data_offset
87
* This functions read the correspoding struct zzip_file_header from
88
* the zip disk of the given "entry". The returned off_t points to the
89
* end of the file_header where the current fseek pointer has stopped.
90
* This is used to immediatly parse out any filename/extras block following
91
* the file_header. The return value is null on error.
94
zzip_entry_fread_file_header (ZZIP_ENTRY* entry,
95
struct zzip_file_header* file_header)
97
if (! entry || ! file_header) return 0;
98
___ zzip_off_t offset = zzip_disk_entry_fileoffset (disk_(entry));
99
if (0 > offset || offset >= entry->disksize) return 0;
101
fseeko (entry->diskfile, offset, SEEK_SET);
102
return (fread (file_header, sizeof(*file_header), 1, entry->diskfile)
103
? offset+sizeof(*file_header) : 0 ); ____;
106
/** helper functions for (fseeko) zip access api
108
* This functions returns the seekval offset of the data portion of the
109
* file referenced by the given zzip_entry. It requires an intermediate
110
* check of the file_header structure (i.e. it reads it from disk). After
111
* this call, the contained diskfile readposition is already set to the
112
* data_offset returned here. On error -1 is returned.
115
zzip_entry_data_offset(ZZIP_ENTRY* entry)
117
struct zzip_file_header file_header;
118
if (! entry) return -1;
119
___ zzip_off_t offset =
120
zzip_entry_fread_file_header (entry, & file_header);
121
if (! offset) return -1;
122
offset += zzip_file_header_sizeof_tails (& file_header);
123
fseeko (entry->diskfile, offset, SEEK_SET);
127
/** => zzip_entry_data_offset
128
* This function is a big helper despite its little name: in a zip file the
129
* encoded filenames are usually NOT zero-terminated but for common usage
130
* with libc we need it that way. Secondly, the filename SHOULD be present
131
* in the zip central directory but if not then we fallback to the filename
132
* given in the file_header of each compressed data portion.
135
zzip_entry_strdup_name(ZZIP_ENTRY* entry)
137
if (! entry) return 0;
140
if ((len = zzip_disk_entry_namlen (disk_(entry)))) {
141
char* name = malloc (len+1);
142
if (! name) return 0;
143
memcpy (name, entry->tail, len);
147
___ auto struct zzip_file_header header;
148
if (zzip_entry_fread_file_header (entry, &header)
149
&& ( len = zzip_file_header_namlen(&header) )) {
150
char* name = malloc (len+1);
151
if (! name) return 0;
152
fread (name, 1, len, entry->diskfile);
161
prescan_entry(ZZIP_ENTRY* entry)
164
___ zzip_off_t tailsize = zzip_disk_entry_sizeof_tails (disk_(entry));
165
if (tailsize+1 > entry->tailalloc) {
166
char* newtail = realloc (entry->tail, tailsize+1);
167
if (! newtail) return ENOMEM;
168
entry->tail = newtail;
169
entry->tailalloc = tailsize+1;
171
fread (entry->tail, 1, tailsize, entry->diskfile);
172
/* name + comment + extras */
177
prescan_clear(ZZIP_ENTRY* entry)
180
if (entry->tail) free (entry->tail);
181
entry->tail = 0; entry->tailalloc = 0;
184
/* ====================================================================== */
186
/** => zzip_entry_findfile
188
* This function is the first call of all the zip access functions here.
189
* It contains the code to find the first entry of the zip central directory.
190
* Here we require the stdio handle to represent a real zip file where the
191
* disk_trailer is _last_ in the file area, so that its position would be at
192
* a fixed offset from the end of the file area if not for the comment field
193
* allowed to be of variable length (which needs us to do a little search
194
* for the disk_tailer). However, in this simple implementation we disregard
195
* any disk_trailer info telling about multidisk archives, so we just return
196
* a pointer to the first entry in the zip central directory of that file.
198
* For an actual means, we are going to search backwards from the end
199
* of the mmaped block looking for the PK-magic signature of a
200
* disk_trailer. If we see one then we check the rootseek value to
201
* find the first disk_entry of the root central directory. If we find
202
* the correct PK-magic signature of a disk_entry over there then we
203
* assume we are done and we are going to return a pointer to that label.
205
* The return value is a pointer to the first zzip_disk_entry being checked
206
* to be within the bounds of the file area specified by the arguments. If
207
* no disk_trailer was found then null is returned, and likewise we only
208
* accept a disk_trailer with a seekvalue that points to a disk_entry and
209
* both parts have valid PK-magic parts. Beyond some sanity check we try to
210
* catch a common brokeness with zip archives that still allows us to find
211
* the start of the zip central directory.
213
zzip__new__ ZZIP_ENTRY*
214
zzip_entry_findfirst(FILE* disk)
216
if (! disk) return 0;
217
fseeko (disk, 0, SEEK_END);
218
___ zzip_off_t disksize = ftello (disk);
219
if (disksize < (zzip_off_t) sizeof(struct zzip_disk_trailer)) return 0;
220
/* we read out chunks of 8 KiB in the hope to match disk granularity */
221
___ zzip_off_t pagesize = PAGESIZE; /* getpagesize() */
222
___ ZZIP_ENTRY* entry = malloc (sizeof(*entry)); if (! entry) return 0;
223
___ unsigned char* buffer = malloc (pagesize); if (! buffer) goto nomem;
225
assert (pagesize/2 > (zzip_off_t) sizeof (struct zzip_disk_trailer));
226
/* at each step, we will fread a pagesize block which overlaps with the
227
* previous read by means of pagesize/2 step at the end of the while(1) */
228
___ zzip_off_t mapoffs = disksize &~ (pagesize-1);
229
___ zzip_off_t mapsize = disksize - mapoffs;
230
if (mapoffs && mapsize < pagesize/2) {
231
mapoffs -= pagesize/2; mapsize += pagesize/2; }
233
fseeko (disk, mapoffs, SEEK_SET);
234
fread (buffer, 1, mapsize, disk);
235
___ unsigned char* p =
236
buffer + mapsize - sizeof(struct zzip_disk_trailer);
237
for (; p >= buffer ; p--)
239
zzip_off_t root; /* (struct zzip_disk_entry*) */
240
if (zzip_disk_trailer_check_magic(p)) {
241
root = zzip_disk_trailer_rootseek (
242
(struct zzip_disk_trailer*)p);
243
if (root > disksize - (long)sizeof(struct zzip_disk_trailer)) {
244
/* first disk_entry is after the disk_trailer? can't be! */
245
zzip_off_t rootsize = zzip_disk_trailer_rootsize (
246
(struct zzip_disk_trailer*)p);
247
if (rootsize > mapoffs) continue;
248
/* a common brokeness that can be fixed: we just assume the
249
* central directory was written directly before : */
250
root = mapoffs - rootsize;
252
} else if (zzip_disk64_trailer_check_magic(p)) {
253
if (sizeof(zzip_off_t) < 8) return 0;
254
root = zzip_disk64_trailer_rootseek (
255
(struct zzip_disk64_trailer*)p);
258
assert (0 <= root && root < mapsize);
259
fseeko (disk, root, SEEK_SET);
260
fread (disk_(entry), 1, sizeof(*disk_(entry)), disk);
261
if (zzip_disk_entry_check_magic(entry)) {
263
entry->headseek = root;
264
entry->diskfile = disk;
265
entry->disksize = disksize;
266
if (prescan_entry(entry)) goto nomem;
270
if (! mapoffs) break; assert (mapsize >= pagesize/2);
271
mapoffs -= pagesize/2; /* mapsize += pagesize/2; */
272
mapsize = pagesize; /* if (mapsize > pagesize) ... */
273
if (disksize - mapoffs > 64*1024) break;
277
free (entry); ____;____;____;____;____;____;
281
/** => zzip_entry_findfile
283
* This function takes an existing "entry" in the central root directory
284
* (e.g. from zzip_entry_findfirst) and moves it to point to the next entry.
285
* On error it returns 0, otherwise the old entry. If no further match is
286
* found then null is returned and the entry already free()d. If you want
287
* to stop searching for matches before that case then please call
288
* => zzip_entry_free on the cursor struct ZZIP_ENTRY.
290
zzip__new__ ZZIP_ENTRY*
291
zzip_entry_findnext(ZZIP_ENTRY* _zzip_restrict entry)
293
if (! entry) return entry;
294
if (! zzip_disk_entry_check_magic (entry)) goto err;
295
___ zzip_off_t seek =
296
entry->headseek + zzip_disk_entry_sizeto_end (disk_(entry));
297
if (seek + (zzip_off_t) sizeof(*disk_(entry)) > entry->disksize) goto err;
299
fseeko (entry->diskfile, seek, SEEK_SET);
300
fread (disk_(entry), 1, sizeof(*disk_(entry)), entry->diskfile);
301
entry->headseek = seek;
302
if (! zzip_disk_entry_check_magic (entry)) goto err;
303
if (prescan_entry(entry)) goto err;
306
zzip_entry_free (entry);
310
/** => zzip_entry_findfile
311
* this function releases the malloc()ed areas needed for zzip_entry, the
312
* pointer is invalid afterwards. This function has #define synonyms of
313
* zzip_entry_findlast(), zzip_entry_findlastfile(), zzip_entry_findlastmatch()
316
zzip_entry_free(ZZIP_ENTRY* entry)
318
if (! entry) return 0;
319
prescan_clear (entry);
324
/** search for files in the (fseeko) zip central directory
326
* This function is given a filename as an additional argument, to find the
327
* disk_entry matching a given filename. The compare-function is usually
328
* strcmp or strcasecmp or perhaps strcoll, if null then strcmp is used.
329
* - use null as argument for "old"-entry when searching the first
330
* matching entry, otherwise the last returned value if you look for other
331
* entries with a special "compare" function (if null then a doubled search
332
* is rather useless with this variant of _findfile). If no further entry is
333
* found then null is returned and any "old"-entry gets already free()d.
335
zzip__new__ ZZIP_ENTRY*
336
zzip_entry_findfile(FILE* disk, char* filename,
337
ZZIP_ENTRY* _zzip_restrict entry,
338
zzip_strcmp_fn_t compare)
340
if (! filename || ! disk) return 0;
341
entry = ( ! entry ) ? zzip_entry_findfirst (disk)
342
: zzip_entry_findnext (entry);
343
if (! compare) compare = (zzip_strcmp_fn_t)(strcmp);
345
for (; entry ; entry = zzip_entry_findnext (entry))
346
{ /* filenames within zip files are often not null-terminated! */
347
char* realname = zzip_entry_strdup_name (entry);
348
if (! realname) continue;
349
if (! compare (filename, realname)) {
350
free (realname); return entry;
352
free (realname); continue;
358
#ifdef ZZIP_HAVE_FNMATCH_H
359
#define _zzip_fnmatch fnmatch
361
# define _zzip_fnmatch_CASEFOLD FNM_CASEFOLD
363
# define _zzip_fnmatch_CASEFOLD 0
366
# define _zzip_fnmatch_CASEFOLD 0
367
/* if your system does not have fnmatch, we fall back to strcmp: */
368
static int _zzip_fnmatch(char* pattern, char* string, int flags)
370
puts ("<zzip:strcmp>");
371
return strcmp (pattern, string);
375
/** => zzip_entry_findfile
377
* This function uses a compare-function with an additional argument
378
* and it is called just like fnmatch(3) from POSIX.2 AD:1993), i.e.
379
* the argument filespec first and the ziplocal filename second with
380
* the integer-flags put in as third to the indirect call. If the
381
* platform has fnmatch available then null-compare will use that one
382
* and otherwise we fall back to mere strcmp, so if you need fnmatch
383
* searching then please provide an implementation somewhere else.
384
* - use null as argument for "after"-entry when searching the first
385
* matching entry, or the last disk_entry return-value to find the
386
* next entry matching the given filespec. If no further entry is
387
* found then null is returned and any "old"-entry gets already free()d.
389
zzip__new__ ZZIP_ENTRY*
390
zzip_entry_findmatch(FILE* disk, char* filespec,
391
ZZIP_ENTRY* _zzip_restrict entry,
392
zzip_fnmatch_fn_t compare, int flags)
394
if (! filespec || ! disk) return 0;
395
entry = ( ! entry ) ? zzip_entry_findfirst (disk)
396
: zzip_entry_findnext (entry);
397
if (! compare) compare = (zzip_fnmatch_fn_t) _zzip_fnmatch;
399
for (; entry ; entry = zzip_entry_findnext (entry))
400
{ /* filenames within zip files are often not null-terminated! */
401
char* realname = zzip_entry_strdup_name (entry);
402
if (! realname) continue;
403
if (! compare (filespec, realname, flags)) {
404
free (realname); return entry;
406
free (realname); continue;
412
/* ====================================================================== */
415
* typedef struct zzip_disk_file ZZIP_ENTRY_FILE;
417
struct zzip_entry_file /* : zzip_file_header */
419
struct zzip_file_header header; /* fopen detected header */
420
ZZIP_ENTRY* entry; /* fopen entry */
421
zzip_off_t data; /* for stored blocks */
422
zzip_size_t avail; /* memorized for checks on EOF */
423
zzip_size_t compressed; /* compressed flag and datasize */
424
zzip_size_t dataoff; /* offset from data start */
425
z_stream zlib; /* for inflated blocks */
426
unsigned char buffer[PAGESIZE]; /* work buffer for inflate algorithm */
429
/** open a file within a zip disk for reading
431
* This function does take an "entry" argument and copies it (or just takes
432
* it over as owner) to a new ZZIP_ENTRY_FILE handle structure. That
433
* structure contains also a zlib buffer for decoding. This function does
434
* seek to the file_header of the given "entry" and validates it for the
435
* data buffer following it. We do also prefetch some data from the data
436
* buffer thereby trying to match the disk pagesize for faster access later.
437
* The => zzip_entry_fread will then read in chunks of pagesizes which is
438
* the size of the internal readahead buffer. If an error occurs then null
441
zzip__new__ ZZIP_ENTRY_FILE*
442
zzip_entry_fopen (ZZIP_ENTRY* entry, int takeover)
444
if (! entry) return 0;
446
ZZIP_ENTRY* found = malloc (sizeof(*entry));
447
if (! found) return 0;
448
memcpy (found, entry, sizeof(*entry)); /* prescan_copy */
449
found->tail = malloc (found->tailalloc);
450
if (! found->tail) { free (found); return 0; }
451
memcpy (found->tail, entry->tail, entry->tailalloc);
454
___ ZZIP_ENTRY_FILE* file = malloc(sizeof(*file));
455
if (! file) goto fail1;
457
if (! zzip_entry_fread_file_header (entry, &file->header))
459
file->avail = zzip_file_header_usize (&file->header);
460
file->data = zzip_entry_data_offset (entry);
463
if (! file->avail || zzip_file_header_data_stored (&file->header))
464
{ file->compressed = 0; return file; }
466
file->compressed = zzip_file_header_csize (&file->header);
467
file->zlib.opaque = 0;
468
file->zlib.zalloc = Z_NULL;
469
file->zlib.zfree = Z_NULL;
471
___ zzip_off_t seek = file->data;
472
seek += sizeof(file->buffer); seek -= seek & (sizeof(file->buffer)-1);
473
assert (file->data < seek); /* pre-read to next PAGESIZE boundary... */
474
fseeko (file->entry->diskfile, file->data + file->dataoff, SEEK_SET);
475
file->zlib.next_in = file->buffer;
476
file->zlib.avail_in = fread (file->buffer, 1, seek - file->data,
477
file->entry->diskfile);
478
file->dataoff += file->zlib.avail_in; ____;
480
if (! zzip_file_header_data_deflated (&file->header)
481
|| inflateInit2 (& file->zlib, -MAX_WBITS) != Z_OK) goto fail2;
487
zzip_entry_free (entry);
491
/** => zzip_entry_fopen
493
* This function opens a file found by name, so it does a search into
494
* the zip central directory with => zzip_entry_findfile and whatever
495
* is found first is given to => zzip_entry_fopen
497
zzip__new__ ZZIP_ENTRY_FILE*
498
zzip_entry_ffile (FILE* disk, char* filename)
500
ZZIP_ENTRY* entry = zzip_entry_findfile (disk, filename, 0, 0);
501
if (! entry) return 0;
502
return zzip_entry_fopen (entry, 1);
506
/** => zzip_entry_fopen
508
* This function reads more bytes into the output buffer specified as
509
* arguments. The return value is null on eof or error, the stdio-like
510
* interface can not distinguish between these so you need to check
511
* with => zzip_entry_feof for the difference.
514
zzip_entry_fread (void* ptr, zzip_size_t sized, zzip_size_t nmemb,
515
ZZIP_ENTRY_FILE* file)
517
if (! file) return 0;
518
___ zzip_size_t size = sized*nmemb;
519
if (! file->compressed) {
520
if (size > file->avail) size = file->avail;
521
fread (ptr, 1, size, file->entry->diskfile);
522
file->dataoff += size;
527
file->zlib.avail_out = size;
528
file->zlib.next_out = ptr;
529
___ zzip_size_t total_old = file->zlib.total_out;
531
if (! file->zlib.avail_in) {
532
size = file->compressed - file->dataoff;
533
if (size > sizeof(file->buffer)) size = sizeof(file->buffer);
534
/* fseek (file->data + file->dataoff, file->entry->diskfile); */
535
file->zlib.avail_in = fread (file->buffer, 1, size,
536
file->entry->diskfile);
537
file->zlib.next_in = file->buffer;
538
file->dataoff += file->zlib.avail_in;
540
if (! file->zlib.avail_in) return 0;
542
___ int err = inflate (& file->zlib, Z_NO_FLUSH);
543
if (err == Z_STREAM_END)
545
else if (err == Z_OK)
546
file->avail -= file->zlib.total_out - total_old;
550
if (file->zlib.avail_out && ! file->zlib.avail_in) continue;
551
return file->zlib.total_out - total_old;
555
/** => zzip_entry_fopen
556
* This function releases any zlib decoder info needed for decompression
557
* and dumps the ZZIP_ENTRY_FILE struct then.
560
zzip_entry_fclose (ZZIP_ENTRY_FILE* file)
562
if (! file) return 0;
563
if (file->compressed)
564
inflateEnd (& file->zlib);
565
zzip_entry_free (file->entry);
570
/** => zzip_entry_fopen
572
* This function allows to distinguish an error from an eof condition.
573
* Actually, if we found an error but we did already reach eof then we
574
* just keep on saying that it was an eof, so the app can just continue.
577
zzip_entry_feof (ZZIP_ENTRY_FILE* file)
579
return ! file || ! file->avail;