2
# THIS IS WORK IN PROGRESS
4
# The Python Imaging Library
7
# stuff to deal with OLE2 Structured Storage files. this module is
8
# used by PIL to read Image Composer and FlashPix files, but can also
9
# be used to read other files of this type.
12
# 1997-01-20 fl Created
13
# 1997-01-22 fl Fixed 64-bit portability quirk
14
# 2003-09-09 fl Fixed typo in OleFileIO.loadfat (noted by Daniel Haertle)
15
# 2004-02-29 fl Changed long hex constants to signed integers
18
# FIXME: sort out sign problem (eliminate long hex constants)
19
# FIXME: change filename to use "a/b/c" instead of ["a", "b", "c"]
20
# FIXME: provide a glob mechanism function (using fnmatchcase)
24
# "FlashPix Format Specification, Appendix A", Kodak and Microsoft,
29
# "If this document and functionality of the Software conflict,
30
# the actual functionality of the Software represents the correct
31
# functionality" -- Microsoft, in the OLE format specification
33
# Copyright (c) Secret Labs AB 1997.
34
# Copyright (c) Fredrik Lundh 1997.
36
# See the README file for information on usage and redistribution.
39
import string, StringIO
43
return ord(c[o])+(ord(c[o+1])<<8)
46
return ord(c[o])+(ord(c[o+1])<<8)+(ord(c[o+2])<<16)+(ord(c[o+3])<<24)
49
MAGIC = '\320\317\021\340\241\261\032\341'
52
# --------------------------------------------------------------------
55
VT_EMPTY=0; VT_NULL=1; VT_I2=2; VT_I4=3; VT_R4=4; VT_R8=5; VT_CY=6;
56
VT_DATE=7; VT_BSTR=8; VT_DISPATCH=9; VT_ERROR=10; VT_BOOL=11;
57
VT_VARIANT=12; VT_UNKNOWN=13; VT_DECIMAL=14; VT_I1=16; VT_UI1=17;
58
VT_UI2=18; VT_UI4=19; VT_I8=20; VT_UI8=21; VT_INT=22; VT_UINT=23;
59
VT_VOID=24; VT_HRESULT=25; VT_PTR=26; VT_SAFEARRAY=27; VT_CARRAY=28;
60
VT_USERDEFINED=29; VT_LPSTR=30; VT_LPWSTR=31; VT_FILETIME=64;
61
VT_BLOB=65; VT_STREAM=66; VT_STORAGE=67; VT_STREAMED_OBJECT=68;
62
VT_STORED_OBJECT=69; VT_BLOB_OBJECT=70; VT_CF=71; VT_CLSID=72;
65
# map property id to name (for debugging purposes)
68
for k, v in vars().items():
73
# --------------------------------------------------------------------
74
# Some common document types (root.clsid fields)
76
WORD_CLSID = "00020900-0000-0000-C000-000000000046"
80
# --------------------------------------------------------------------
82
class _OleStream(StringIO.StringIO):
86
Returns a read-only file object which can be used to read
87
the contents of a OLE stream. To open a stream, use the
88
openstream method in the OleFile class.
90
This function can be used with either ordinary streams,
91
or ministreams, depending on the offset, sectorsize, and
95
# FIXME: should store the list of sects obtained by following
96
# the fat chain, and load new sectors on demand instead of
97
# loading it all in one go.
99
def __init__(self, fp, sect, size, offset, sectorsize, fat):
103
while sect != -2: # 0xFFFFFFFEL:
104
fp.seek(offset + sectorsize * sect)
105
data.append(fp.read(sectorsize))
108
data = string.join(data, "")
110
# print len(data), size
112
StringIO.StringIO.__init__(self, data[:size])
115
# --------------------------------------------------------------------
117
# FIXME: should add a counter in here to avoid looping forever
118
# if the tree is broken.
120
class _OleDirectoryEntry:
122
"""OLE2 Directory Entry
124
Encapsulates a stream directory entry. Note that the
125
constructor builds a tree of all subentries, so we only
126
have to call it with the root object.
129
def __init__(self, sidlist, sid):
131
# store directory parameters. the caller provides
132
# a complete list of directory entries, as read from
133
# the directory stream.
135
name, type, sect, size, sids, clsid = sidlist[sid]
139
self.type = type # 1=storage 2=stream
144
# process child nodes, if any
148
sid = sidlist[sid][4][2]
152
# the directory entries are organized as a red-black tree.
153
# the following piece of code does an ordered traversal of
154
# such a tree (at least that's what I hope ;-)
158
# start at leftmost position
160
left, right, child = sidlist[sid][4]
162
while left != -1: # 0xFFFFFFFFL:
165
left, right, child = sidlist[sid][4]
167
while sid != self.sid:
169
self.kids.append(_OleDirectoryEntry(sidlist, sid))
172
left, right, child = sidlist[sid][4]
173
if right != -1: # 0xFFFFFFFFL:
174
# and then back to the left
177
left, right, child = sidlist[sid][4]
178
if left == -1: # 0xFFFFFFFFL:
183
# couldn't move right; move up instead
187
left, right, child = sidlist[ptr][4]
191
left, right, child = sidlist[sid][4]
195
# in the OLE file, entries are sorted on (length, name).
196
# for convenience, we sort them on name instead.
200
def __cmp__(self, other):
201
"Compare entries by name"
203
return cmp(self.name, other.name)
205
def dump(self, tab = 0):
206
"Dump this entry, and all its subentries (for debug purposes only)"
208
TYPES = ["(invalid)", "(storage)", "(stream)", "(lockbytes)",
209
"(property)", "(root)"]
211
print " "*tab + repr(self.name), TYPES[self.type],
212
if self.type in (2, 5):
213
print self.size, "bytes",
215
if self.type in (1, 5) and self.clsid:
216
print " "*tab + "{%s}" % self.clsid
218
for kid in self.kids:
222
# --------------------------------------------------------------------
225
# This class encapsulates the interface to an OLE 2 structured
226
# storage file. Use the {@link listdir} and {@link openstream}
227
# methods to access the contents of this file.
230
"""OLE container object
232
This class encapsulates the interface to an OLE 2 structured
233
storage file. Use the listdir and openstream methods to access
234
the contents of this file.
236
Object names are given as a list of strings, one for each subentry
237
level. The root entry should be omitted. For example, the following
238
code extracts all image streams from a Microsoft Image Composer file:
240
ole = OleFileIO("fan.mic")
242
for entry in ole.listdir():
243
if entry[1:2] == "Image":
244
fin = ole.openstream(entry)
245
fout = open(entry[0:1], "wb")
252
You can use the viewer application provided with the Python Imaging
253
Library to view the resulting files (which happens to be standard
257
def __init__(self, filename = None):
265
def open(self, filename):
266
"""Open an OLE2 file"""
268
if type(filename) == type(""):
269
self.fp = open(filename, "rb")
273
header = self.fp.read(512)
275
if len(header) != 512 or header[:8] != MAGIC:
276
raise IOError, "not an OLE2 structured storage file"
278
# file clsid (probably never used, so we don't store it)
279
clsid = self._clsid(header[8:24])
281
# FIXME: could check version and byte order fields
283
self.sectorsize = 1 << i16(header, 30)
284
self.minisectorsize = 1 << i16(header, 32)
286
self.minisectorcutoff = i32(header, 56)
288
# Load file allocation tables
291
# Load direcory. This sets both the sidlist (ordered by id)
292
# and the root (ordered by hierarchy) members.
293
self.loaddirectory(i32(header, 48))
295
self.ministream = None
296
self.minifatsect = i32(header, 60)
298
def loadfat(self, header):
299
# Load the FAT table. The header contains a sector numbers
300
# for the first 109 FAT sectors. Additional sectors are
301
# described by DIF blocks (FIXME: not yet implemented)
303
sect = header[76:512]
305
for i in range(0, len(sect), 4):
307
if ix == -2 or ix == -1: # ix == 0xFFFFFFFEL or ix == 0xFFFFFFFFL:
310
fat = fat + map(lambda i, s=s: i32(s, i), range(0, len(s), 4))
313
def loadminifat(self):
314
# Load the MINIFAT table. This is stored in a standard sub-
315
# stream, pointed to by a header field.
317
s = self._open(self.minifatsect).read()
319
self.minifat = map(lambda i, s=s: i32(s, i), range(0, len(s), 4))
321
def getsect(self, sect):
324
self.fp.seek(512 + self.sectorsize * sect)
325
return self.fp.read(self.sectorsize)
327
def _unicode(self, s):
328
# Map unicode string to Latin 1
330
# FIXME: some day, Python will provide an official way to handle
331
# Unicode strings, but until then, this will have to do...
332
return filter(ord, s)
334
def loaddirectory(self, sect):
335
# Load the directory. The directory is stored in a standard
336
# substream, independent of its size.
338
# read directory stream
339
fp = self._open(sect)
341
# create list of sid entries
347
type = ord(entry[66])
348
name = self._unicode(entry[0:0+i16(entry, 64)])
349
ptrs = i32(entry, 68), i32(entry, 72), i32(entry, 76)
350
sect, size = i32(entry, 116), i32(entry, 120)
351
clsid = self._clsid(entry[80:96])
352
self.sidlist.append((name, type, sect, size, ptrs, clsid))
354
# create hierarchical list of directory entries
355
self.root = _OleDirectoryEntry(self.sidlist, 0)
357
def dumpdirectory(self):
358
# Dump directory (for debugging only)
362
def _clsid(self, clsid):
363
if clsid == "\0" * len(clsid):
365
return (("%08X-%04X-%04X-%02X%02X-" + "%02X" * 6) %
366
((i32(clsid, 0), i16(clsid, 4), i16(clsid, 6)) +
367
tuple(map(ord, clsid[8:16]))))
369
def _list(self, files, prefix, node):
372
prefix = prefix + [node.name]
373
for entry in node.kids:
375
self._list(files, prefix, entry)
377
files.append(prefix[1:] + [entry.name])
379
def _find(self, filename):
383
for name in filename:
384
for kid in node.kids:
388
raise IOError, "file not found"
392
def _open(self, start, size = 0x7FFFFFFF):
395
if size < self.minisectorcutoff:
397
if not self.ministream:
399
self.ministream = self._open(self.sidlist[0][2])
400
return _OleStream(self.ministream, start, size, 0,
401
self.minisectorsize, self.minifat)
404
return _OleStream(self.fp, start, size, 512,
405
self.sectorsize, self.fat)
408
# Returns a list of streams stored in this file.
411
"""Return a list of streams stored in this file"""
414
self._list(files, [], self.root)
418
# Opens a stream as a read-only file object.
420
def openstream(self, filename):
421
"""Open a stream as a read-only file object"""
423
slot = self._find(filename)
424
name, type, sect, size, sids, clsid = self.sidlist[slot]
426
raise IOError, "this file is not a stream"
427
return self._open(sect, size)
430
# Gets a list of properties described in substream.
432
def getproperties(self, filename):
433
"""Return properties described in substream"""
435
fp = self.openstream(filename)
441
clsid = self._clsid(s[8:24])
445
fmtid = self._clsid(s[:16])
449
s = "****" + fp.read(i32(fp.read(4))-4)
451
for i in range(i32(s, 4)):
454
offset = i32(s, 12+i*8)
455
type = i32(s, offset)
457
# test for common types first (should perhaps use
458
# a dictionary instead?)
461
value = i16(s, offset+4)
463
value = value - 65536
465
value = i16(s, offset+4)
466
elif type in (VT_I4, VT_ERROR):
467
value = i32(s, offset+4)
469
value = i32(s, offset+4) # FIXME
470
elif type in (VT_BSTR, VT_LPSTR):
471
count = i32(s, offset+4)
472
value = s[offset+8:offset+8+count-1]
473
elif type == VT_BLOB:
474
count = i32(s, offset+4)
475
value = s[offset+8:offset+8+count]
476
elif type == VT_LPWSTR:
477
count = i32(s, offset+4)
478
value = self._unicode(s[offset+8:offset+8+count*2])
479
elif type == VT_FILETIME:
480
value = long(i32(s, offset+4)) + (long(i32(s, offset+8))<<32)
481
# FIXME: this is a 64-bit int: "number of 100ns periods
482
# since Jan 1,1601". Should map this to Python time
483
value = value / 10000000L # seconds
485
value = ord(s[offset+4])
486
elif type == VT_CLSID:
487
value = self._clsid(s[offset+4:offset+20])
489
count = i32(s, offset+4)
490
value = s[offset+8:offset+8+count]
492
value = None # everything else yields "None"
494
# FIXME: add support for VT_VECTOR
496
#print "%08x" % id, repr(value),
497
#print "(%s)" % VT[i32(s, offset) & 0xFFF]
504
# --------------------------------------------------------------------
505
# This script can be used to dump the directory of any OLE2 structured
508
if __name__ == "__main__":
512
for file in sys.argv[1:]:
514
ole = OleFileIO(file)
519
for file in ole.listdir():
520
if file[-1][0] == "\005":
522
props = ole.getproperties(file)
523
props = props.items()
528
print "***", "cannot read", file, "-", v