2
* NOTE: this is part of libzzipmmapped (i.e. it is not libzzip).
4
* These routines are fully independent from the traditional zzip
5
* implementation. They assume a readonly mmapped sharedmem block
6
* representing a complete zip file. The functions show how to
7
* parse the structure, find files and return a decoded bytestream.
9
* These routines are a bit simple and really here for documenting
10
* the way to access a zip file. The complexity of zip access comes
11
* from staggered reading of bytes and reposition of a filepointer in
12
* a big archive with lots of files and long compressed datastreams.
13
* Plus varaints of drop-in stdio replacements, obfuscation routines,
14
* auto fileextensions, drop-in dirent replacements, and so on...
17
* Guido Draheim <guidod@gmx.de>
19
* Copyright (c) 2003,2004 Guido Draheim
20
* All rights reserved,
21
* use under the restrictions of the
22
* Lesser GNU General Public License
23
* or alternatively the restrictions
24
* of the Mozilla Public License 1.1
28
#define _GNU_SOURCE _glibc_developers_are_idiots_to_call_this_gnu_specific_
31
#define _ZZIP_DISK_FILE_STRUCT 1
33
#include <zzip/types.h>
38
#if defined ZZIP_HAVE_UNISTD_H
40
#elif defined ZZIP_HAVE_IO_H
44
#if defined ZZIP_HAVE_STRING_H
46
#elif defined ZZIP_HAVE_STRINGS_H
51
#include <zzip/mmapped.h>
52
#include <zzip/format.h>
53
#include <zzip/fetch.h>
54
#include <zzip/__mmap.h>
55
#include <zzip/__fnmatch.h>
57
#if __STDC_VERSION__+0 > 199900L
66
* This function does primary initialization of a disk-buffer struct.
69
zzip_disk_init(ZZIP_DISK* disk, void* buffer, zzip_size_t buflen)
71
disk->buffer = (zzip_byte_t*) buffer;
72
disk->endbuf = (zzip_byte_t*) buffer + buflen;
76
/* do not touch disk->user */
77
/* do not touch disk->code */
82
* This function allocates a new disk-buffer with => malloc(3)
84
zzip__new__ ZZIP_DISK*
87
ZZIP_DISK* disk = malloc(sizeof(disk));
88
if (! disk) return disk;
89
zzip_disk_init (disk, 0, 0);
93
/** turn a filehandle into a mmapped zip disk archive handle
95
* This function uses the given file-descriptor to detect the length of the
96
* file and calls the system => mmap(2) to put it in main memory. If it is
97
* successful then a newly allocated ZZIP_DISK* is returned with
98
* disk->buffer pointing to the mapview of the zipdisk content.
100
zzip__new__ ZZIP_DISK*
101
zzip_disk_mmap(int fd)
104
if (fstat (fd, &st) || !st.st_size) return 0;
105
___ ZZIP_DISK* disk = zzip_disk_new (); if (! disk) return 0;
106
disk->buffer = _zzip_mmap (& zzip->mapped, fd, 0, st.st_size);
107
if (disk->buffer == MAP_FAILED) { free (disk); return 0; }
108
disk->endbuf = disk->buffer + st.st_size;
112
/** => zzip_disk_mmap
113
* This function is the inverse of => zzip_disk_mmap and using the system
114
* munmap(2) on the buffer area and => free(3) on the ZZIP_DISK structure.
117
zzip_disk_munmap(ZZIP_DISK* disk)
119
if (! disk) return 0;
120
_zzip_munmap (disk->mapped, disk->buffer, disk->endbuf-disk->buffer);
125
/** => zzip_disk_mmap
127
* This function opens the given archive by name and turn the filehandle
128
* to => zzip_disk_mmap for bringing it to main memory. If it can not
129
* be => mmap(2)'ed then we slurp the whole file into a newly => malloc(2)'ed
130
* memory block. Only if that fails too then we return null. Since handling
131
* of disk->buffer is ambigous it should not be snatched away please.
133
ZZIP_DISK* zzip__new__
134
zzip_disk_open(char* filename)
140
if (stat (filename, &st) || !st.st_size) return 0;
141
___ int fd = open (filename, O_RDONLY|O_BINARY);
142
if (fd <= 0) return 0;
143
___ ZZIP_DISK* disk = zzip_disk_mmap (fd);
144
if (disk) return disk;
145
___ zzip_byte_t* buffer = malloc (st.st_size);
146
if (! buffer) return 0;
147
if ((st.st_size == read (fd, buffer, st.st_size)) &&
148
(disk = zzip_disk_new ()))
150
disk->buffer = buffer;
151
disk->endbuf = buffer+st.st_size;
154
return disk; ____;____;____;
157
/** => zzip_disk_mmap
159
* This function will release all data needed to access a (mmapped)
160
* zip archive, including any malloc()ed blocks, sharedmem mappings
161
* and it dumps the handle struct as well.
164
zzip_disk_close(ZZIP_DISK* disk)
166
if (! disk) return 0;
167
if (disk->mapped != -1) return zzip_disk_munmap (disk);
173
/* ====================================================================== */
174
/* helper functions */
176
#ifdef ZZIP_HAVE_STRNDUP
177
#define _zzip_strndup strndup
179
/* if your system does not have strndup: */
180
zzip__new__ static char* _zzip_strndup(char* p, int maxlen)
183
___ zzip_byte_t* r = malloc (maxlen+1);
185
strncpy (r, p, maxlen);
191
#if defined ZZIP_HAVE_STRCASECMP || defined strcasecmp
192
#define _zzip_strcasecmp strcasecmp
194
/* if your system does not have strcasecmp: */
195
static int _zzip_strcasecmp(char* __zzip_restrict a, char* _zzip_restrict b)
197
if (! a) return (b) ? 1 : 0;
201
int v = tolower(*a) - tolower(*b);
210
/** helper functions for (mmapped) zip access api
212
* This function augments the other zzip_disk_entry_* helpers: here we move
213
* a disk_entry pointer (as returned by _find* functions) into a pointer to
214
* the data block right after the file_header. Only disk->buffer would be
215
* needed to perform the seek but we check the mmapped range end as well.
218
zzip_disk_entry_to_data(ZZIP_DISK* disk, struct zzip_disk_entry* entry)
220
struct zzip_file_header* file =
221
zzip_disk_entry_to_file_header(disk, entry);
222
if (file) return zzip_file_header_to_data (file);
226
/** => zzip_disk_entry_to_data
227
* This function does half the job of => zzip_disk_entry_to_data where it
228
* can augment with => zzip_file_header_to_data helper from format/fetch.h
230
struct zzip_file_header*
231
zzip_disk_entry_to_file_header(ZZIP_DISK* disk, struct zzip_disk_entry* entry)
233
zzip_byte_t* file_header = /* (struct zzip_file_header*) */
234
(disk->buffer + zzip_disk_entry_fileoffset (entry));
235
if (disk->buffer > file_header || file_header >= disk->endbuf)
237
return (struct zzip_file_header*) file_header;
240
/** => zzip_disk_entry_to_data
241
* This function is a big helper despite its little name: in a zip file the
242
* encoded filenames are usually NOT zero-terminated but for common usage
243
* with libc we need it that way. Secondly, the filename SHOULD be present
244
* in the zip central directory but if not then we fallback to the filename
245
* given in the file_header of each compressed data portion.
248
zzip_disk_entry_strdup_name(ZZIP_DISK* disk, struct zzip_disk_entry* entry)
250
if (! disk || ! entry) return 0;
252
___ char* name; zzip_size_t len;
253
struct zzip_file_header* file;
254
if ((len = zzip_disk_entry_namlen (entry)))
255
name = zzip_disk_entry_to_filename (entry);
256
else if ((file = zzip_disk_entry_to_file_header (disk, entry)) &&
257
(len = zzip_file_header_namlen (file)))
258
name = zzip_file_header_to_filename (file);
262
if ((zzip_byte_t*) name < disk->buffer ||
263
(zzip_byte_t*) name+len > disk->endbuf)
266
return _zzip_strndup (name, len); ____;
269
/** => zzip_disk_entry_to_data
270
* This function is similar creating a reference to a zero terminated
271
* string but it can only exist in the zip central directory entry.
274
zzip_disk_entry_strdup_comment(ZZIP_DISK* disk, struct zzip_disk_entry* entry)
276
if (! disk || ! entry) return 0;
278
___ char* text; zzip_size_t len;
279
if ((len = zzip_disk_entry_comment (entry)))
280
text = zzip_disk_entry_to_comment (entry);
284
if ((zzip_byte_t*) text < disk->buffer ||
285
(zzip_byte_t*) text+len > disk->endbuf)
288
return _zzip_strndup (text, len); ____;
291
/* ====================================================================== */
293
/** => zzip_disk_findfile
295
* This function is the first call of all the zip access functions here.
296
* It contains the code to find the first entry of the zip central directory.
297
* Here we require the mmapped block to represent a real zip file where the
298
* disk_trailer is _last_ in the file area, so that its position would be at
299
* a fixed offset from the end of the file area if not for the comment field
300
* allowed to be of variable length (which needs us to do a little search
301
* for the disk_tailer). However, in this simple implementation we disregard
302
* any disk_trailer info telling about multidisk archives, so we just return
303
* a pointer to the zip central directory.
305
* For an actual means, we are going to search backwards from the end
306
* of the mmaped block looking for the PK-magic signature of a
307
* disk_trailer. If we see one then we check the rootseek value to
308
* find the first disk_entry of the root central directory. If we find
309
* the correct PK-magic signature of a disk_entry over there then we
310
* assume we are done and we are going to return a pointer to that label.
312
* The return value is a pointer to the first zzip_disk_entry being checked
313
* to be within the bounds of the file area specified by the arguments. If
314
* no disk_trailer was found then null is returned, and likewise we only
315
* accept a disk_trailer with a seekvalue that points to a disk_entry and
316
* both parts have valid PK-magic parts. Beyond some sanity check we try to
317
* catch a common brokeness with zip archives that still allows us to find
318
* the start of the zip central directory.
320
struct zzip_disk_entry*
321
zzip_disk_findfirst(ZZIP_DISK* disk)
323
if (disk->buffer > disk->endbuf-sizeof(struct zzip_disk_trailer))
325
___ zzip_byte_t* p = disk->endbuf-sizeof(struct zzip_disk_trailer);
326
for (; p >= disk->buffer ; p--)
328
zzip_byte_t* root; /* (struct zzip_disk_entry*) */
329
if (zzip_disk_trailer_check_magic(p)) {
330
root = disk->buffer + zzip_disk_trailer_get_rootseek (
331
(struct zzip_disk_trailer*)p);
333
{ /* the first disk_entry is after the disk_trailer? can't be! */
334
zzip_size_t rootsize = zzip_disk_trailer_get_rootsize (
335
(struct zzip_disk_trailer*)p);
336
if (disk->buffer+rootsize > p) continue;
337
/* a common brokeness that can be fixed: we just assume the
338
* central directory was written directly before the trailer:*/
341
} else if (zzip_disk64_trailer_check_magic(p)) {
342
if (sizeof(void*) < 8) return 0; /* EOVERFLOW */
343
root = disk->buffer + zzip_disk64_trailer_get_rootseek (
344
(struct zzip_disk64_trailer*)p);
345
if (root > p) continue;
348
if (root < disk->buffer) continue;
349
if (zzip_disk_entry_check_magic(root))
350
return (struct zzip_disk_entry*) root;
355
/** => zzip_disk_findfile
357
* This function takes an existing disk_entry in the central root directory
358
* (e.g. from zzip_disk_findfirst) and returns the next entry within in
359
* the given bounds of the mmapped file area.
361
struct zzip_disk_entry*
362
zzip_disk_findnext(ZZIP_DISK* disk, struct zzip_disk_entry* entry)
364
if ((zzip_byte_t*)entry < disk->buffer ||
365
(zzip_byte_t*)entry > disk->endbuf-sizeof(entry) ||
366
! zzip_disk_entry_check_magic (entry) ||
367
zzip_disk_entry_sizeto_end (entry) > 64*1024)
369
entry = zzip_disk_entry_to_next_entry (entry);
370
if ((zzip_byte_t*)entry > disk->endbuf-sizeof(entry) ||
371
! zzip_disk_entry_check_magic (entry) ||
372
zzip_disk_entry_sizeto_end (entry) > 64*1024 ||
373
zzip_disk_entry_skipto_end (entry) + sizeof(entry) > disk->endbuf)
379
/** search for files in the (mmapped) zip central directory
381
* This function is given a filename as an additional argument, to find the
382
* disk_entry matching a given filename. The compare-function is usually
383
* strcmp or strcasecmp or perhaps strcoll, if null then strcmp is used.
384
* - use null as argument for "after"-entry when searching the first
385
* matching entry, otherwise the last returned value if you look for other
386
* entries with a special "compare" function (if null then a doubled search
387
* is rather useless with this variant of _findfile).
389
struct zzip_disk_entry*
390
zzip_disk_findfile(ZZIP_DISK* disk, char* filename,
391
struct zzip_disk_entry* after, zzip_strcmp_fn_t compare)
393
struct zzip_disk_entry* entry = (! after ? zzip_disk_findfirst (disk)
394
: zzip_disk_findnext (disk, after));
396
compare = (zzip_strcmp_fn_t)( (disk->flags&1) ?
397
(_zzip_strcasecmp) : (strcmp));
398
for (; entry ; entry = zzip_disk_findnext (disk, entry))
400
/* filenames within zip files are often not null-terminated! */
401
char* realname = zzip_disk_entry_strdup_name (disk, entry);
402
if (realname && ! compare(filename, realname))
412
/** => zzip_disk_findfile
414
* This function uses a compare-function with an additional argument
415
* and it is called just like fnmatch(3) from POSIX.2 AD:1993), i.e.
416
* the argument filespec first and the ziplocal filename second with
417
* the integer-flags put in as third to the indirect call. If the
418
* platform has fnmatch available then null-compare will use that one
419
* and otherwise we fall back to mere strcmp, so if you need fnmatch
420
* searching then please provide an implementation somewhere else.
421
* - use null as argument for "after"-entry when searching the first
422
* matching entry, or the last disk_entry return-value to find the
423
* next entry matching the given filespec.
425
struct zzip_disk_entry*
426
zzip_disk_findmatch(ZZIP_DISK* disk, char* filespec,
427
struct zzip_disk_entry* after,
428
zzip_fnmatch_fn_t compare, int flags)
430
struct zzip_disk_entry* entry = (! after ? zzip_disk_findfirst (disk)
431
: zzip_disk_findnext (disk, after));
433
compare = (zzip_fnmatch_fn_t) _zzip_fnmatch;
434
if (disk->flags&1) disk->flags |= _zzip_fnmatch_CASEFOLD;
436
for (; entry ; entry = zzip_disk_findnext (disk, entry))
438
/* filenames within zip files are often not null-terminated! */
439
char* realname = zzip_disk_entry_strdup_name(disk, entry);
440
if (realname && ! compare(filespec, realname, flags))
450
/* ====================================================================== */
452
/** => zzip_disk_fopen
454
* the ZZIP_DISK_FILE* is rather simple in just encapsulating the
455
* arguments given to this function plus a zlib deflate buffer.
456
* Note that the ZZIP_DISK pointer does already contain the full
457
* mmapped file area of a zip disk, so open()ing a file part within
458
* that area happens to be a lookup of its bounds and encoding. That
459
* information is memorized on the ZZIP_DISK_FILE so that subsequent
460
* _read() operations will be able to get the next data portion or
461
* return an eof condition for that file part wrapped in the zip archive.
463
zzip__new__ ZZIP_DISK_FILE*
464
zzip_disk_entry_fopen (ZZIP_DISK* disk, ZZIP_DISK_ENTRY* entry)
466
/* keep this in sync with zzip_mem_entry_fopen */
467
struct zzip_file_header* header =
468
zzip_disk_entry_to_file_header (disk, entry);
469
if (! header) return 0;
470
___ ZZIP_DISK_FILE* file = malloc(sizeof(ZZIP_DISK_FILE));
471
if (! file) return file;
472
file->buffer = disk->buffer;
473
file->endbuf = disk->endbuf;
474
file->avail = zzip_file_header_usize (header);
476
if (! file->avail || zzip_file_header_data_stored (header))
477
{ file->stored = zzip_file_header_to_data (header); return file; }
480
file->zlib.opaque = 0;
481
file->zlib.zalloc = Z_NULL;
482
file->zlib.zfree = Z_NULL;
483
file->zlib.avail_in = zzip_file_header_csize (header);
484
file->zlib.next_in = zzip_file_header_to_data (header);
486
if (! zzip_file_header_data_deflated (header) ||
487
inflateInit2 (& file->zlib, -MAX_WBITS) != Z_OK)
488
{ free (file); return 0; }
493
/** openening a file part wrapped within a (mmapped) zip archive
495
* This function opens a file found by name, so it does a search into
496
* the zip central directory with => zzip_disk_findfile and whatever
497
* is found first is given to => zzip_disk_entry_fopen
499
zzip__new__ ZZIP_DISK_FILE*
500
zzip_disk_fopen (ZZIP_DISK* disk, char* filename)
502
ZZIP_DISK_ENTRY* entry = zzip_disk_findfile (disk, filename, 0, 0);
503
if (! entry) return 0; else return zzip_disk_entry_fopen (disk, entry);
507
/** => zzip_disk_fopen
509
* This function reads more bytes into the output buffer specified as
510
* arguments. The return value is null on eof or error, the stdio-like
511
* interface can not distinguish between these so you need to check
512
* with => zzip_disk_feof for the difference.
515
zzip_disk_fread (void* ptr, zzip_size_t sized, zzip_size_t nmemb,
516
ZZIP_DISK_FILE* file)
518
zzip_size_t size = sized*nmemb;
519
if (size > file->avail) size = file->avail;
522
memcpy (ptr, file->stored, size);
523
file->stored += size;
528
file->zlib.avail_out = sized*nmemb;
529
file->zlib.next_out = ptr;
530
___ zzip_size_t total_old = file->zlib.total_out;
531
___ int err = inflate (& file->zlib, Z_NO_FLUSH);
532
if (err == Z_STREAM_END)
534
else if (err == Z_OK)
535
file->avail -= file->zlib.total_out - total_old;
538
return file->zlib.total_out - total_old;
542
/** => zzip_disk_fopen
543
* This function releases any zlib decoder info needed for decompression
544
* and dumps the ZZIP_DISK_FILE* then.
547
zzip_disk_fclose (ZZIP_DISK_FILE* file)
550
inflateEnd (& file->zlib);
555
/** => zzip_disk_fopen
557
* This function allows to distinguish an error from an eof condition.
558
* Actually, if we found an error but we did already reach eof then we
559
* just keep on saying that it was an eof, so the app can just continue.
562
zzip_disk_feof (ZZIP_DISK_FILE* file)
564
return ! file || ! file->avail;