~ubuntu-branches/ubuntu/precise/unzip/precise-proposed

« back to all changes in this revision

Viewing changes to proginfo/ziplimit.txt

Committer: Bazaar Package Importer
Author(s): Santiago Vila
Date: 2004-06-06 17:57:46 UTC
Revision ID: james.westby@ubuntu.com-20040606175746-nl7p2dgp3aobyc2c

Tags: upstream-5.51

Import upstream version 5.51

files added:

BUGS

COPYING.OLD

Contents

History.550

History.551

INSTALL

LICENSE

README

ToDo

WHERE

acorn

acorn/Contents

acorn/GMakefile

acorn/ReadMe

acorn/RunMe1st

acorn/acorn.c

acorn/makefile

acorn/riscos.c

acorn/riscos.h

acorn/srcrename

acorn/swiven.h

acorn/swiven.s

amiga

amiga/Contents

amiga/amiga.c

amiga/amiga.h

amiga/crc_68.a

amiga/filedate.c

amiga/flate.a

amiga/makefile.azt

amiga/makesfx.c

amiga/smakefile

amiga/stat.c

amiga/z-stat.h

aosvs

aosvs/Contents

aosvs/README

aosvs/aosvs.c

aosvs/aosvs.h

aosvs/make_unz.cli

api.c

apihelp.c

atari

atari/Contents

atari/Makefile

atari/Makefile.old

atari/README.old

atari/atari.c

atari/make_unz.mup

beos

beos/Contents

beos/Makefile

beos/README

beos/beos.c

beos/beos.h

beos/beosmain.cpp

beos/unzip.rsc

beos/unzipsfx.rsc

cmsmvs

cmsmvs/INSTALL.CMS

cmsmvs/README.CMS

cmsmvs/README.MVS

cmsmvs/WHATSNEW.CMS

cmsmvs/ccunzip.exec

cmsmvs/mc.exec

cmsmvs/mvs.mki

cmsmvs/unzip.exec

cmsmvs/unzip.makefile

cmsmvs/unzmvsc.job

cmsmvs/unzvmc.exec

cmsmvs/vmmvs.c

cmsmvs/vmmvs.h

cmsmvs/vmstat.h

cmsmvs/zipinfo.exec

consts.h

crc32.c

crc_i386.S

crctab.c

crypt.c

crypt.h

ebcdic.h

envargs.c

explode.c

extract.c

file_id.diz

fileio.c

flexos

flexos/Contents

flexos/README

flexos/flexos.c

flexos/flxcfg.h

flexos/hc.pro

flexos/makefile

funzip.c

funzip.txt

gbloffs.c

globals.c

globals.h

human68k

human68k/Contents

human68k/Makefile

human68k/crc_68.s

human68k/flate.s

human68k/human68k.c

inflate.c

inflate.h

list.c

macos

macos/Contents

macos/HISTORY.TXT

macos/README.TXT

macos/UnZp.h

macos/UnZpLib.h

macos/UnZpPrj.xml

macos/UnZpSFX.h

macos/UnZpSx.h

macos/source

macos/source/charmap.h

macos/source/getenv.c

macos/source/helpers.c

macos/source/helpers.h

macos/source/macbin3.c

macos/source/macbin3.h

macos/source/maccfg.h

macos/source/macdir.c

macos/source/macdir.h

macos/source/macos.c

macos/source/macscreen.c

macos/source/macstat.c

macos/source/macstat.h

macos/source/macstuff.h

macos/source/mactime.c

macos/source/mactime.h

macos/source/macunzip.c

macos/source/pathname.c

macos/source/pathname.h

macos/source/sxunzip.c

macos/source/unzip_rc.hqx

man/Contents

man/funzip.1

man/unzip.1

man/unzipsfx.1

man/zipgrep.1

man/zipinfo.1

match.c

msdos

msdos/Contents

msdos/README

msdos/crc_i86.asm

msdos/doscfg.h

msdos/makefile.bc

msdos/makefile.dj1

msdos/makefile.dj2

msdos/makefile.emx

msdos/makefile.msc

msdos/makefile.tc

msdos/makefile.wat

msdos/msc51opt.dif

msdos/msdos.c

novell

novell/Contents

novell/README

novell/makefile.wat

novell/nlm_EOL.pat

novell/nlmcfg.h

novell/novell.c

novell/unzip.build

novell/unzip.link

os2/Contents

os2/makefile.os2

os2/os2.c

os2/os2acl.c

os2/os2acl.h

os2/os2cfg.h

os2/os2data.h

os2/rexxapi.c

os2/rexxapi.def

os2/rexxhelp.c

os2/rexxtest.cmd

os2/stub.def

os2/unzip.def

os2/zgrepapi.cmd

os2/zip2exe.cmd

os2/zipgrep.cmd

process.c

proginfo

proginfo/3rdparty.bug

proginfo/CONTRIBS

proginfo/Contents

proginfo/ZipPorts

proginfo/defer.in

proginfo/extra.fld

proginfo/fileinfo.cms

proginfo/nt.sd

proginfo/perform.dos

proginfo/timezone.txt

proginfo/ziplimit.txt

qdos

qdos/Contents

qdos/IZREADME.SMS

qdos/Makefile

qdos/callstub.c

qdos/config.S

qdos/crc68.s

qdos/izqdos.h

qdos/makesfx.c

qdos/qdos.c

tables.h

tandem

tandem/Contents

tandem/HISTORY

tandem/README

tandem/commacs

tandem/doit

tandem/macros

tandem/make

tandem/makesfx

tandem/tandem.c

tandem/tandem.h

tandem/tannsk.h

tandem/tanunz.c

testmake.zip

theos

theos/Contents

theos/_fprintf.c

theos/_isatty.c

theos/_setargv.c

theos/_sprintf.c

theos/_stat.c

theos/makefile

theos/oldstat.h

theos/stat.h

theos/theos.c

theos/thscfg.h

timezone.c

timezone.h

tops20

tops20/Contents

tops20/make.mic

tops20/rename.mic

tops20/tops20.c

ttyio.c

ttyio.h

unix

unix/Contents

unix/Makefile

unix/Packaging

unix/Packaging/README

unix/Packaging/pkginfo.in

unix/Packaging/postinstall

unix/Packaging/preinstall.in

unix/Packaging/prototype

unix/unix.c

unix/unxcfg.h

unix/zipgrep

unreduce.c

unshrink.c

unzip.c

unzip.h

unzip.txt

unzipsfx.txt

unzipstb.c

unzpriv.h

unzvers.h

vms/00binary.vms

vms/Contents

vms/README

vms/cmdline.c

vms/cvthelp.tpu

vms/descrip.mms

vms/link_unz.com

vms/make_unz.com

vms/makesfx.com

vms/unz_cli.cld

vms/unzip.opt

vms/unzip_cli.help

vms/unzip_def.rnh

vms/unzipsfx.hlp

vms/unzipsfx.opt

vms/vms.c

vms/vms.h

vms/vmsdefs.h

win32

win32/Contents

win32/Makefile

win32/Makefile.bc

win32/Makefile.dj

win32/Makefile.emx

win32/Makefile.gcc

win32/Makefile.lcc

win32/Makefile.wat

win32/crc_i386.asm

win32/crc_i386.c

win32/crc_lcc.asm

win32/nt.c

win32/nt.h

win32/rsxntwin.h

win32/w32cfg.h

win32/win32.c

wince

wince/Contents

wince/README

wince/ilmask.bmp

wince/imglist.2bp

wince/imglist.bmp

wince/inc

wince/inc/conio.h

wince/inc/errno.h

wince/inc/locale.h

wince/inc/signal.h

wince/inc/stdio.h

wince/intrface.cpp

wince/intrface.h

wince/punzip.h

wince/punzip.htp

wince/punzip.ic2

wince/punzip.ico

wince/punzip.rc

wince/punzip.rcv

wince/resource.h

wince/toolbar.2bp

wince/toolbar.bmp

wince/vc5

wince/vc5/punzip.dsp

wince/vc6

wince/vc6/punzip.dsp

wince/vcEMbed3

wince/vcEMbed3/punzip.vcp

wince/vcEMbed3/unzipcmd.vcp

wince/wcecfg.h

wince/wcemain.c

wince/wince.cpp

wince/wince.h

wince/winmain.cpp

wince/winmain.h

wince/zipfile.ic2

wince/zipfile.ico

windll

windll/Contents

windll/decs.h

windll/guisfx

windll/guisfx/dialog.h

windll/guisfx/sfxw.ico

windll/guisfx/sfxwiz.c

windll/guisfx/sfxwiz.dlg

windll/guisfx/sfxwiz.rc

windll/guisfx/sfxwiz16.mak

windll/guisfx/unzsfx16.def

windll/structs.h

windll/unziplib.def

windll/uzexampl.c

windll/uzexampl.h

windll/vb

windll/vb/VBReadMe.txt

windll/vb/vbunzip.bas

windll/vb/vbunzip.frm

windll/vb/vbunzip.vbp

windll/vb/vbunzip.vbw

windll/vc5

windll/vc5/dll

windll/vc5/dll/unzip32.dsp

windll/vc5/lib

windll/vc5/lib/unzip32.dsp

windll/vc5/sfx

windll/vc5/sfx/SFXWiz32.dsp

windll/vc5/sfx/SFXWiz32.dsw

windll/vc5/sfx/unzsfx32.dsp

windll/vc6

windll/vc6/dll

windll/vc6/dll/unzip32.dsp

windll/vc6/lib

windll/vc6/lib/unzip32.dsp

windll/vc6/sfx

windll/vc6/sfx/SFXWiz32.dsp

windll/vc6/sfx/SFXWiz32.dsw

windll/vc6/sfx/unzsfx32.dsp

windll/windll.c

windll/windll.h

windll/windll.rc

windll/windll.txt

windll/windll16.def

windll/windll32.def

windll/windll_lc.def

windll/windllgcc.def

zip.h

zipgrep.txt

zipinfo.c

zipinfo.txt

Show diffs side-by-side

added added

removed removed

proginfo/ziplimit.txt

ziplimit.txt

A) Hard limits of the Zip archive format:

Number of entries in Zip archive: 64 k (2^16 - 1 entries)

Compressed size of archive entry: 4 GByte (2^32 - 1 Bytes)

Uncompressed size of entry: 4 GByte (2^32 - 1 Bytes)

Size of single-volume Zip archive: 4 GByte (2^32 - 1 Bytes)

Per-volume size of multi-volume archives: 4 GByte (2^32 - 1 Bytes)

Number of parts for multi-volume archives: 64 k (1^16 - 1 parts)

Total size of multi-volume archive: 256 TByte (4G * 64k)

The number of archive entries and of multivolume parts are limited by

the structure of the "end-of-central-directory" record, where the these

numbers are stored in 2-Byte fields.

Some Zip and/or UnZip implementations (for example Info-ZIP's) allow

handling of archives with more than 64k entries. (The information

from "number of entries" field in the "end-of-central-directory" record

is not really neccessary to retrieve the contents of a Zip archive;

it should rather be used for consistency checks.)

Length of an archive entry name: 64 kByte (2^16 - 1)

Length of archive member comment: 64 kByte (2^16 - 1)

Total length of "extra field": 64 kByte (2^16 - 1)

Length of a single e.f. block: 64 kByte (2^16 - 1)

Length of archive comment: 64 KByte (2^16 - 1)

Additional limitation claimed by PKWARE:

Size of local-header structure (fixed fields of 30 Bytes + filename

local extra field): < 64 kByte

Size of central-directory structure (46 Bytes + filename +

central extra field + member comment): < 64 kByte

Note:

In 2001, PKWARE has published version 4.5 of the Zip format specification

(together with the release of PKZIP for Windows 4.5). This specification

defines new extra field blocks that allow to break the size limits of the

standard zipfile structures. In this extended Zip format, the size limits

of zip entries (and the complete zip archive) have been extended to

(2^64 - 1) Bytes and the maximum number of archive entries to (2^32-1).

Currently, these extensions are not supported by Info-ZIP software,

but it is planned to provide implementation for selected environments

with the next major release. (This may take a while, though.)

B) Implementation limits of UnZip:

1. Size limits caused by file I/O and decompression handling:

Size of Zip archive: 2 GByte (2^31 - 1 Bytes)

Compressed size of archive entry: 2 GByte (2^31 - 1 Bytes)

Note: On some systems, UnZip may support archive sizes up to 4 GByte.

To get this support, the target environment has to meet the following

requirements:

a) The compiler's intrinsic "long" data types must be able to hold

integer numbers of 2^32. In other words - the standard intrinsic

integer types "long" and "unsigned long" have to be wider than

32 bit.

b) The system has to supply a C runtime library that is compatible

with the more-than-32-bit-wide "long int" type of condition a)

c) The standard file positioning functions fseek(), ftell() (and/or

the Unix style lseek() and tell() functions) have to be capable

to move to absolute file offsets of up to 4 GByte from the file

start.

On 32-bit CPU hardware, you generally cannot expect that a C compiler

provides a "long int" type that is wider than 32-bit. So, many of the

most popular systems (i386, PowerPC, 680x0, et. al) are out of luck.

You may find environment that provide all requirements on systems

with 64-bit CPU hardware. Examples might be Cray number crunchers

or Compaq (former DEC) Alpha AXP machines.

The number of Zip archive entries is unlimited. The "number-of-entries"

field of the "end-of-central-dir" record is checked against the "number

of entries found in the central directory" modulus 64k (2^16).

Multi-volume archive extraction is not supported.

Memory requirements are mostly independent of the archive size

and archive contents.

In general, UnZip needs a fixed amount of internal buffer space

plus the size to hold the complete information of the currently

processed entry's local header. Here, a large extra field

(could be up to 64 kByte) may exceed the available memory

for MSDOS 16-bit executables (when they were compiled in small

or medium memory model, with a fixed 64kByte limit on data space).

The other exception where memory requirements scale with "larger"

archives is the "restore directory attributes" feature. Here, the

directory attributes info for each restored directory has to be held

in memory until the whole archive has been processed. So, the amount

of memory needed to keep this info scales with the number of restored

directories and may cause memory problems when a lot of directories

are restored in a single run.

C) Implementation limits of the Zip executables:

1. Size limits caused by file I/O and compression handling:

Size of Zip archive: 2 GByte (2^31 - 1 Bytes)

Compressed size of archive entry: 2 GByte (2^31 - 1 Bytes)

Uncompressed size of entry: 2 GByte (2^31 - 1 Bytes),

100

(could/should be 4 GBytes...)

101

Multi-volume archive creation is not supported.

102

103

2. Limits caused by handling of archive contents lists

104

105

2.1. Number of archive entries (freshen, update, delete)

106

a) 16-bit executable: 64k (2^16 -1) or 32k (2^15 - 1),

107

(unsigned vs. signed type of size_t)

108

a1) 16-bit executable: <16k ((2^16)/4)

109

(The smaller limit a1) results from the array size limit of

110

the "qsort()" function.)

111

32-bit executables <1G ((2^32)/4)

112

(usual system limit of the "qsort()" function on 32-bit systems)

113

114

b) stack space needed by qsort to sort list of archive entries

115

116

NOTE: In the current executables, overflows of limits a) and b) are NOT

117

checked!

118

119

c) amount of free memory to hold "central directory information" of

120

all archive entries; one entry needs:

121

96 bytes (32-bit) resp. 80 bytes (16-bit)

122

+ 3 * length of entry name

123

+ length of zip entry comment (when present)

124

+ length of extra field(s) (when present, e.g.: UT needs 9 bytes)

125

+ some bytes for book-keeping of memory allocation

126

127

Conclusion:

128

For systems with limited memory space (MSDOS, small AMIGAs, other

129

environments without virtual memory), the number of archive entries

130

is most often limited by condition c).

131

For example, with approx. 100 kBytes of free memory after loading and

132

initializing the program, a 16-bit DOS Zip cannot process more than 600

133

to 1000 (+) archive entries. (For the 16-bit Windows DLL or the 16-bit

134

OS/2 port, limit c) is less important because Windows or OS/2 executables

135

are not restricted to the 1024k area of real mode memory. These 16-bit

136

ports are limited by conditions a1) and b), say: at maximum approx.

137

16000 entries!)

138

139

140

2.2. Number of "new" entries (add operation)

141

In addition to the restrictions above (2.1.), the following limits

142

caused by the handling of the "new files" list apply:

143

144

a) 16-bit executable: <16k ((2^64)/4)

145

146

b) stack size required for "qsort" operation on "new entries" list.

147

148

NOTE: In the current executables, the overflow checks for these limits

149

are missing!

150

151

c) amount of free memory to hold the directory info list for new entries;

152

one entry needs:

153

24 bytes (32-bit) resp. 22 bytes (16-bit)

154

+ 3 * length of filename

155

156

D) Some technical remarks:

157

158

1. The 2GByte size limit on archive files is a consequence of the portable

159

C implementation of the Info-ZIP programs.

160

Zip archive processing requires random access to the archive file for

161

jumping between different parts of the archive's structure.

162

In standard C, this is done via stdio functions fseek()/ftell() resp.

163

unix-io functions lseek()/tell(). In many (most?) C implementations,

164

these functions use "signed long" variables to hold offset pointers

165

into sequential files. In most cases, this is a signed 32-bit number,

166

which is limited to ca. 2E+09. There may be specific C runtime library

167

implementations that interpret the offset numbers as unsigned, but for

168

us, this is not reliable in the context of portable programming.

169

170

2. The 2GByte limit on the size of a single compressed archive member

171

is again a consequence of the implementation in C.

172

The variables used internally to count the size of the compressed

173

data stream are of type "long", which is guaranted to be at least

174

32-bit wide on all supported environments.

175

176

But, why do we use "signed" long and not "unsigned long"?

177

178

Throughout the I/O handling of the compressed data stream, the

179

sign bit of the "long" numbers is (mis-)used as a kind of overflow

180

detection. In the end, this is caused by the fact that standard C

181

lacks any overflow checking on integer arithmetics and does not

182

support access to the underlying hardware's overflow detection

183

(the status bits, especially "carry" and "overflow" of the CPU's

184

flags-register) in a system-independent manner.

185

186

So, we "misuse" the most-significant bit of the compressed data

187

size counters as carry bit for efficient overflow/underflow detection.

188

We could change the code to a different method of overflow detection,

189

by using a bunch of "sanity" comparisons (kind of "is the calculated

190

result plausible when compared with the operands"). But, this would

191

"blow up" the code of the "inner loop", with remarkable loss of

192

processing speed. Or, we could reduce the amount of consistency checks

193

of the compressed data (e.g. detection of premature end of stream) to

194

an absolute minimum, at the cost of the programs' stability when

195

processing corrupted data.

196

197

Summary: Changing the compression/decompression core routines to

198

be "unsigned safe" would require excessive recoding, with little

199

gain on maximum processable uncompressed size (a gain can only be

200

expected for hardly compressable data), but at severe costs on

201

performance, stability and maintainability. Therefore, it is

202

quite unlikely that this will ever happen for Zip/UnZip.

203

204

Anyway, the Zip archive format is more and more showing its age...

205

The effort to lift the 2GByte limits should be better invested in

206

creating a successor for the Zip archive format and tools.

207

208

Please report any problems to: Zip-Bugs@lists.wku.edu

209

210

Last updated: 26 January 2002, Christian Spieler

Older »