1
#!/usr/bin/python -Qwarnall
2
"""ZipSyncer, a tool to keep zip files synchronized with unzipped directories
6
zipsyncer listzipped [directory]
8
Scan directory recursively look for zipped files that are supported by
11
zipsyncer createunzipped [directory]
13
Scan directory recursively, look for zipped files that are supported and unzip
16
zipsyncer createzipped [directory]
18
Scan directory recursively, look for unzipped directories that are have the right naming and zip them.
20
zipsyncer sync [directory]
22
Scan directory recursively, look for zipped-unzipped pairs and synchronize
23
them if one has been changed.
25
zipsyncer removezipped [directory]
27
Scan directory recursively, look for zipped-unzipped pairs and delete the
28
zipped part if they are in sync.
30
zipsyncer removeunzipped [directory]
32
Scan directory recursively, look for zipped-unzipped pairs and delete the
33
unzipped part if they are in sync.
37
import struct, zlib, os, base64, time, shutil
39
""" Deflate a data blob and remove the pre- and post-fix.
40
This is how files are compressed by the DEFLATE method in zip files.
42
def deflate(data, level):
43
return zlib.compress(data, level)[2:-4]
45
""" Find the compression level that compresses to a given size.
46
To recreate a zipfile from the unzipped files, the file has to be compressed
47
to the same size as specified in the original zip file. Depending on the
48
used DEFLATE algorithm, this may or may nor succeed.
50
def compressToTargetSize(data, targetsize):
51
for level in [6, 9, 5, 4, 3, 2, 1, 7, 8, 0, -1]:
52
d = deflate(data, level)
53
if len(d) == targetsize:
58
return zlib.crc32(data) & 0xFFFFFFFF
60
def dos2unixtime(dostime, dosdate):
61
""" Convert date/time code to (year, month, day, hour, min, sec) """
62
return ( (dosdata>>9)+1980, (dosdate>>5)&0xF, dosdate&0x1F,
63
dostime>>11, (dostime>>5)&0x3F, (dostime&0x1F) * 2 )
65
def unixtime2dos(secondssinceepoch):
66
t = time.gmtime(secondssinceepoch)
67
dosdate = (t[0] - 1980) << 9 | t[1] << 5 | t[2]
68
dostime = t[3] << 11 | t[4] << 5 | t[5] >> 1
69
return (dostime, dosdate)
72
sigstruct = struct.Struct("<I")
74
struct = struct.Struct("<HHHHHIIIHH")
76
self.versionNeeded = 20
78
self.compressionMethod = 0
80
self.compressedSize = 0
81
self.uncompressedSize = 0
84
def unpack(self, data, offset):
85
fields = ZipEntry.Common.struct.unpack_from(data, offset)
86
(self.versionNeeded, self.flag, self.compressionMethod,
87
self.mtime, self.mdate, self.crc32,
88
self.compressedSize, self.uncompressedSize,
89
self.filenameLength, self.extraLength) = fields;
92
return ZipEntry.Common.struct.pack(self.versionNeeded, self.flag,
93
self.compressionMethod, self.mtime, self.mdate, self.crc32,
94
self.compressedSize, self.uncompressedSize,
95
self.filenameLength, self.extraLength)
97
return map(str, [self.versionNeeded, self.flag,
98
self.compressionMethod, self.mtime, self.mdate,
100
self.compressedSize, self.uncompressedSize,
101
self.filenameLength, self.extraLength])
102
def setFields(self, fields):
103
(self.versionNeeded, self.flag, self.compressionMethod,
104
self.mtime, self.mdate, self.crc32,
105
self.compressedSize, self.uncompressedSize,
106
self.filenameLength, self.extraLength) = map(int, fields)
108
class Header(Common):
110
ZipEntry.Common.__init__(self)
112
self.signature = 0x04034b50
113
def unpack(self, data, offset):
115
sig = ZipEntry.sigstruct.unpack_from(data, offset)
116
if sig[0] != 0x04034b50:
118
self.signature = sig[0]
119
offset = ZipEntry.Common.unpack(self, data, offset + 4)
120
filenameend = offset + self.filenameLength
121
extraend = filenameend + self.extraLength
122
self.filename = data[offset : filenameend]
123
self.extra = data[filenameend : extraend]
127
return ZipEntry.sigstruct.pack(self.signature) \
128
+ ZipEntry.Common.pack(self) + self.filename + self.extra
130
return [str(self.signature)] + ZipEntry.Common.getFields(self) \
131
+ [self.filename, base64.b64encode(self.extra)]
132
def setFields(self, fields):
133
self.signature = int(fields[0])
134
ZipEntry.Common.setFields(self, fields[1:11])
135
self.filename = fields[11]
136
self.extra = base64.b64decode(fields[12])
138
return 30 + self.filenameLength + self.extraLength
140
class DataDescriptor:
141
struct = struct.Struct("<III")
143
(self.signature, self.crc32, self.compressedSize,
144
self.uncompressedSize) = (0, 0, 0, 0)
145
def unpack(self, flag, data, offset):
147
if len(data) - offset > 4:
148
sig = ZipEntry.sigstruct.unpack_from(data, offset)
149
if sig[0] == 0x08074b50:
150
self.signature = 0x08074b50
152
if flag & 8 or self.signature:
153
d = ZipEntry.DataDescriptor.struct.unpack_from(data, offset)
157
(self.crc32, self.compressedSize, self.uncompressedSize) = d
161
return ZipEntry.sigstruct.pack(self.signature) \
162
+ ZipEntry.DataDescriptor.struct.pack(self.crc32,
164
self.uncompressedSize)
165
if self.crc32 or self.compressedSize \
166
or self.uncompressedSize:
167
return ZipEntry.DataDescriptor.struct.pack(
168
self.crc32, self.compressedSize,
169
self.uncompressedSize)
172
return map(str, [self.signature, self.crc32,
173
self.compressedSize, self.uncompressedSize])
174
def setFields(self, fields):
175
(self.signature, self.crc32, self.compressedSize,
176
self.uncompressedSize) = map(int, fields)
178
if self.signature: return 16
179
if self.crc32: return 12
182
class CentralDirectoryData(Common):
183
struct1 = struct.Struct("<IH")
184
struct2 = struct.Struct("<HHHII")
186
ZipEntry.Common.__init__(self)
188
self.signature = 0x02014b50
190
self.commentLength = 0
192
self.internalAttr = 0
193
self.externalAttr = 0
195
def unpack(self, data, offset):
197
if len(data) - offset < 6:
199
sig = ZipEntry.CentralDirectoryData.struct1.unpack_from(
201
if sig[0] != 0x02014b50:
203
(self.signature, self.version) = sig
204
offset = ZipEntry.Common.unpack(self, data, offset + 6)
205
(self.commentLength, self.disk, self.internalAttr,
206
self.externalAttr, self.offset
207
) = ZipEntry.CentralDirectoryData.struct2.unpack_from(
210
filenameend = offset + self.filenameLength
211
extraend = filenameend + self.extraLength
212
commentend = extraend + self.commentLength
213
self.filename = data[offset : filenameend]
214
self.extra = data[filenameend : extraend]
215
self.comment = data[extraend : commentend]
219
return ZipEntry.CentralDirectoryData.struct1.pack(
220
self.signature, self.version) \
221
+ ZipEntry.Common.pack(self) \
222
+ ZipEntry.CentralDirectoryData.struct2.pack(
223
self.commentLength, self.disk, self.internalAttr,
224
self.externalAttr, self.offset) \
225
+ self.filename + self.extra + self.comment
227
return map(str, [self.signature, self.version]) \
228
+ ZipEntry.Common.getFields(self) \
229
+ map(str, [self.commentLength, self.disk,
231
self.externalAttr, self.offset]) \
232
+ [self.filename, base64.b64encode(self.extra),
233
base64.b64encode(self.comment)]
234
def setFields(self, fields):
235
self.signature = int(fields[0])
236
self.version = int(fields[1])
237
ZipEntry.Common.setFields(self, fields[2:12])
238
(self.commentLength, self.disk, self.internalAttr,
239
self.externalAttr, self.offset) = map(int, fields[12:17])
240
self.filename = fields[17]
241
self.extra = base64.b64decode(fields[18])
242
self.comment = base64.b64decode(fields[19])
244
return 46 + self.filenameLength + self.extraLength \
250
self.header = ZipEntry.Header()
251
self.datadescriptor = ZipEntry.DataDescriptor()
253
self.cddata = ZipEntry.CentralDirectoryData()
254
def setHeader(self, header, filename, extra):
255
self.header = ZipEntry.Header(header, filename, extra)
256
def setData(self, data):
258
def setDataDescriptor(self, sig, datadescriptor):
259
self.datadescriptor = ZipEntry.DataDescriptor(sig,
261
def setCentralDirectoryData(self, entry, filename, extra, comment):
262
self.cddata = ZipEntry.CentralDirectoryData(entry, filename,
264
def unpackHeader(self, data, offset):
267
self.header = ZipEntry.Header()
268
offset = self.header.unpack(data, offset)
269
if not self.header.valid:
272
if self.header.compressionMethod == 8: # deflate
273
decompressobj = zlib.decompressobj(-15)
274
self.data = decompressobj.decompress(data[offset:])
275
left = decompressobj.unused_data
276
offset = len(data) - len(left)
277
elif self.header.compressionMethod == 0: # no compression
278
size = self.header.uncompressedSize
279
self.data = data[offset : offset + size ]
282
self.error = "compression method not supported"
285
# read data descriptor
286
self.datadescriptor = ZipEntry.DataDescriptor()
287
offset = self.datadescriptor.unpack(self.header.flag, data, offset)
290
def packHeader(self):
292
if self.header.compressionMethod == 8:
293
compressedSize = self.datadescriptor.compressedSize \
294
if self.datadescriptor.compressedSize \
295
else self.header.compressedSize
296
d = compressToTargetSize(d, compressedSize)
298
self.error = 'deflating to target size failed'
300
return self.header.pack() + d + self.datadescriptor.pack()
301
def unpackEntry(self, data, offset):
303
self.cddata = ZipEntry.CentralDirectoryData()
304
offset = self.cddata.unpack(data, offset)
305
if not self.cddata.valid:
310
return self.cddata.pack()
312
return self.header.getFields() + self.datadescriptor.getFields() \
313
+ self.cddata.getFields()
314
def setFields(self, fields):
315
self.header.setFields(fields[:13])
316
self.datadescriptor.setFields(fields[13:17])
317
self.cddata.setFields(fields[17:])
318
def setEntry(self, path, mtime):
319
self.header.filenameLength = self.cddata.filenameLength = len(path)
320
self.header.filename = self.cddata.filename = path
321
(self.header.mtime, self.header.mdate) \
322
= (self.cddata.mtime, self.cddata.mdate) \
323
= unixtime2dos(mtime)
324
def setDirectory(self, offset, path, mtime):
325
self.setEntry(offset, path, mtime)
326
def setFile(self, path, mtime, data, compresslevel):
327
self.setEntry(path, mtime)
330
self.cddata.compressionMethod = 8
331
self.cddata.compressedSize = len(deflate(data, compresslevel))
332
def updateOffsetEtc(self, offset):
333
self.cddata.offset = offset
334
self.cddata.uncompressedSize = len(self.data)
335
self.cddata.crc32 = getCRC(self.data)
336
csize = self.cddata.uncompressedSize
337
if self.cddata.compressionMethod:
338
cdata = compressToTargetSize(self.data,
339
self.cddata.compressedSize)
341
cdata = deflate(self.data, 6)
343
self.cddata.compressedSize = csize
344
if self.datadescriptor.compressedSize:
345
o = self.datadescriptor
348
o.crc32 = self.cddata.crc32
349
o.uncompressedSize = self.cddata.uncompressedSize
350
o.compressedSize = self.cddata.compressedSize
352
def getHeaderSize(self):
353
return self.header.getSize() + self.cddata.compressedSize \
354
+ self.datadescriptor.getSize()
362
""" True if the data in @entries and @filedata constitutes a
363
valid, supported zip file. """
366
""" A string describing the error that caused the object to be
368
self.error = 'No entries.'
370
""" Metadata for all entries. """
373
""" Raw uncompressed data for all entries. """
376
""" Data from the end of central directory record """
377
self.fileinfo = 9*[None]
378
self.fileinfo[0] = 0x06054b50
382
self.fileinfo[8] = ''
384
def setFromFileContents(self, data):
387
# parse the full entries
389
while offset < len(data):
391
offset = entry.unpackHeader(data, offset)
393
self.entries.append(entry)
397
if len(self.entries) == 0:
398
self.error = "No entries."
401
# parse central directory
402
for e in self.entries:
403
offset = e.unpackEntry(data, offset)
407
# parse end of central directory
408
if offset + 22 > len(data):
409
self.error = "premature end of zipfile"
411
dirend = struct.unpack_from("<IHHHHIIH", data, offset)
412
if dirend[0] != 0x06054b50:
413
self.error = 'invalid end of central directory'
417
zipcomment = data[offset:offset+l]
420
if offset != len(data):
421
self.error = "trailing data in zip file"
423
if len(data) != dirend[5] + dirend[6] + dirend[7] + 22:
424
self.error = 'zip file invalid or not supported'
426
self.fileinfo = list(dirend) + [zipcomment]
429
recreated = self.recreate()
430
# for i in range(len(recreated)):
431
# if recreated[i] != data[i]:
432
# print 'error at pos ' + str(i)
434
# print str(len(data)) + ' ' + str(len(recreated))
437
if recreated != data:
438
#print str(len(recreated))+' '+str(len(data))
439
#for i in range(0, min(len(recreated),len(data))):
440
# if recreated[i] != data[i]:
441
# print 'pos ' + hex(i)
442
self.error = "roundtripping fails"
448
def containsPath(self, path):
449
for e in self.entries:
450
if e.header.filename == path:
454
def addDirectory(self, basedir, dir):
455
p = os.path.relpath(dir, basedir) + '/'
456
if self.containsPath(p):
458
print 'adding dir ' + p
459
mtime = os.path.getmtime(dir)
462
e.setDirectory(offset, p, mtime)
463
self.entries.append(e)
465
def addFile(self, basedir, file, compresslevel):
466
p = os.path.relpath(file, basedir)
467
if self.containsPath(p):
469
print 'adding file "' + p + '"'
470
mtime = os.path.getmtime(file)
475
e.setFile(p, mtime, data, compresslevel)
476
self.entries.append(e)
478
def setFromDirectory(self, basedir, zipdatafile):
479
# first the original entry description
480
if os.path.isfile(zipdatafile):
481
self.readFromDataFile(zipdatafile)
482
# adapt it to the current directory files
484
while i < len(self.entries):
485
# if an entry does not exist anymore, remove it
487
p = os.path.join(basedir, e.header.filename)
488
if e.header.filename.endswith('/'):
489
# always keep directories as zip entries,
490
# directory entries must be removed by hand
492
elif os.path.isfile(p):
496
# read data into filedata
500
# if the archive is empty so far and, the file 'mimetype'
501
# exists, add it first, in uncompressed form
502
p = os.path.join(basedir, 'mimetype')
503
if os.path.isfile(p):
504
self.addFile(basedir, p, 0)
505
# add all directories and files that are not there yet
506
for root, directories, files in os.walk(basedir):
507
# directory entries are not created
508
#for d in directories:
509
# p = os.path.join(root, d)
510
# self.addDirectory(basedir, p)
512
p = os.path.join(root, f)
513
self.addFile(basedir, p, 6)
516
self.updateOffsetsAndSizes()
517
filesize = 22 + self.fileinfo[5] + self.fileinfo[6]
520
for e in self.entries:
521
data += e.packHeader()
522
for e in self.entries:
523
data += e.packEntry()
526
data += struct.pack("<IHHHHIIH", fi[0], fi[1], fi[2], fi[3], fi[4], fi[5], fi[6], fi[7]) + fi[8]
530
def updateOffsetsAndSizes(self):
532
for e in self.entries:
533
e.updateOffsetEtc(total)
534
total += e.getHeaderSize()
536
for e in self.entries:
537
total += e.cddata.getSize()
539
self.fileinfo[3] = self.fileinfo[4] = len(self.entries)
540
self.fileinfo[5] = total - cdstart
541
self.fileinfo[6] = cdstart
543
def writeToDirectory(self, dirpath):
544
for e in self.entries:
545
p = os.path.join(dirpath, e.header.filename)
546
if os.path.commonprefix([p, dirpath]) != dirpath:
547
# error, zip file would lie outside of parentdir
556
os.makedirs(os.path.dirname(p))
563
def writeToDataFile(self, zipdatafile):
564
f = open(zipdatafile, 'w')
565
# write file specific line with 9 fields first
567
f.write(str(self.fileinfo[i]) + '\t')
568
f.write(base64.b64encode(self.fileinfo[8]) + '\n')
569
# write one line with 37 fields per entry
570
for e in self.entries:
571
f.write('\t'.join(e.getFields()) + '\n')
573
def readFromDataFile(self, zipdatafile):
575
f = open(zipdatafile, 'r')
577
fields = line.split('\t')
579
self.fileinfo[i] = int(fields[i])
580
self.fileinfo[8] = base64.b64decode(fields[8])
581
if (len(fields) != 9):
582
self.error = 'First line does not have 9 entries.'
584
fields = line.split('\t')
585
if (len(fields) != 37):
586
self.error = 'Entry line does not have 37 entries.'
589
self.entries.append(e)
591
self.filedata = len(self.entries)*['']
593
def filenameToDirname(filename, extensions):
594
ext = filter(lambda e: filename.endswith('.' + e), extensions)
597
return filename[:-l-1] + '_' + ext[0]
600
def dirnameToFilename(dirname, extensions):
601
ext = filter(lambda e: dirname.endswith('_' + e), extensions)
604
return dirname[:-l-1] + '.' + ext[0]
608
List all files and directories that are potentially supported.
609
The list is created on the extension of the file and trailing part of the
610
name of the directory
612
def scanDirectory(rootdir, extensions):
613
if os.path.isfile(rootdir):
616
filext = map(lambda e: "." + e, extensions)
618
for root, directories, files in os.walk(rootdir):
620
if file.startswith('.'):
622
if any(map(lambda e: file.endswith(e), filext)):
623
list.append(os.path.join(root, file))
624
for dir in directories:
625
file = dirnameToFilename(dir, extensions)
627
list.append(os.path.join(root, file))
629
# remove duplicates by converting to a set
630
return frozenset(list)
632
def readZipData(filepath):
633
if not os.path.exists(filepath):
637
fd = open(filepath, "rb")
639
if magic != 'PK\3\4':
648
def writeZipped(data, filepath):
649
fd = open(filepath, "wb")
653
def writeUnzipped(data, dirpath, descriptionfile):
655
zipdata.setFromFileContents(data)
656
if not zipdata.valid:
658
zipdata.writeToDirectory(dirpath)
659
zipdata.writeToDataFile(descriptionfile)
661
def listzippedFunction(filepath, dirpath, descriptionfile, hiddenfile):
662
# if there is a problem reading, simply do not list the file
663
data = readZipData(filepath)
667
zipdata.setFromFileContents(data)
671
def createzippedFunction(filepath, dirpath, descriptionfile, hiddenfile):
672
# check that no file exists yet
673
if os.path.isfile(filepath) or os.path.isfile(hiddenfile):
678
zipdata.setFromDirectory(dirpath, descriptionfile)
682
data = zipdata.recreate()
683
writeZipped(data, filepath)
684
shutil.copy(filepath, hiddenfile)
686
def createunzippedFunction(filepath, dirpath, descriptionfile, hiddenfile):
687
# check that no directory exists yet
688
if os.path.isdir(dirpath) or os.path.isfile(hiddenfile) \
689
or os.path.isfile(descriptionfile):
692
# if there is a problem reading, simply do not unzip the file
693
data = readZipData(filepath)
696
writeUnzipped(data, dirpath, descriptionfile)
697
shutil.copy(filepath, hiddenfile)
699
""" Find which file is the newest, that is which is different from the other
700
two. Returns None if all are equal, 'Error' when it cannot be determined,
701
e.g. because one version does not exist. 'unzipped' when the unzipped
702
version is different, 'zipped' when the zipped version is different and
703
'both' when no version resembles the hidden file. """
704
def findDifferentVersion(filepath, dirpath, descriptionfile, hiddenfile):
705
d = dict(source='error', data='')
706
# check that an unzipped version and a hidden file exist
707
if not os.path.isdir(dirpath) or not os.path.isfile(hiddenfile) \
708
or not os.path.isfile(filepath):
711
# check that the files are in sync
712
hidden = readZipData(hiddenfile)
713
zipped = readZipData(filepath)
716
zipdata.setFromDirectory(dirpath, descriptionfile)
719
unzipped = zipdata.recreate()
722
if hidden == unzipped:
725
d['source'] = 'unzipped'
727
if hidden == unzipped:
729
d['source'] = 'zipped'
734
def syncFunction(filepath, dirpath, descriptionfile, hiddenfile):
735
d = findDifferentVersion(filepath, dirpath, descriptionfile, hiddenfile)
736
if d['source'] == 'both':
737
print 'Conflict for ' + filepath
738
elif d['source'] == 'zipped':
739
writeUnzipped(d['data'], dirpath, descriptionfile)
740
shutil.copy(filepath, hiddenfile)
741
elif d['source'] == 'unzipped' or d['source'] == None:
742
writeZipped(d['data'], filepath)
743
shutil.copy(filepath, hiddenfile)
745
def removezippedFunction(filepath, dirpath, descriptionfile, hiddenfile):
746
# only delete a version of there is no different version
747
d = findDifferentVersion(filepath, dirpath, descriptionfile, hiddenfile)
748
if d['source'] != None:
751
os.remove(hiddenfile)
753
def removeunzippedFunction(filepath, dirpath, descriptionfile, hiddenfile):
754
# only delete a version of there is no different version
755
d = findDifferentVersion(filepath, dirpath, descriptionfile, hiddenfile)
756
if d['source'] != None:
758
os.remove(hiddenfile)
759
if os.path.isfile(descriptionfile):
760
os.remove(descriptionfile)
761
shutil.rmtree(dirpath)
763
if __name__ == '__main__':
766
if len(sys.argv) < 2:
770
command = sys.argv[1]
771
if len(sys.argv) == 2:
774
directories = sys.argv[2:]
776
commands = {'listzipped': listzippedFunction,
777
'createzipped': createzippedFunction,
778
'createunzipped': createunzippedFunction,
779
'sync': syncFunction,
780
'removezipped': removezippedFunction,
781
'removeunzipped': removeunzippedFunction}
783
if not command in commands:
784
print 'invalid command "' + command + '"'
787
commandFunction = commands[command]
789
extensions = ["odt", "odp", "ods", "odg", "jar", "zip"]
791
for directory in directories:
792
fileList = scanDirectory(directory, extensions)
793
for file in fileList:
794
dir = filenameToDirname(file, extensions)
795
descriptionfile = dir + '.cd'
796
if file.find('/') == -1:
797
hiddenfile = '.' + file
799
hiddenfile = '/.'.join(os.path.split(file))
800
commandFunction(file, dir, descriptionfile, hiddenfile)