1
"""Microsoft Internet Explorer cookie loading on Windows.
3
Copyright 2002-2003 Johnny Lee <typo_pl@hotmail.com> (MSIE Perl code)
4
Copyright 2002-2003 John J Lee <jjl@pobox.com> (The Python port)
6
This code is free software; you can redistribute it and/or modify it under
7
the terms of the BSD License (see the file COPYING included with the
12
import os, re, string, time, struct
16
from _ClientCookie import CookieJar, Cookie, MISSING_FILENAME_TEXT
17
from _Util import startswith
18
from _Debug import getLogger
19
debug = getLogger("ClientCookie").debug
27
def regload(path, leaf):
28
key = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER, path, 0,
29
_winreg.KEY_ALL_ACCESS)
31
value = _winreg.QueryValueEx(key, leaf)[0]
36
WIN32_EPOCH = 0x019db1ded53e8000L # 1970 Jan 01 00:00:00 in Win32 FILETIME
38
def epoch_time_offset_from_win32_filetime(filetime):
39
"""Convert from win32 filetime to seconds-since-epoch value.
41
MSIE stores create and expire times as Win32 FILETIME, which is 64
42
bits of 100 nanosecond intervals since Jan 01 1601.
44
ClientCookie expects time in 32-bit value expressed in seconds since the
48
if filetime < WIN32_EPOCH:
49
raise ValueError("filetime (%d) is before epoch (%d)" %
50
(filetime, WIN32_EPOCH))
52
return divmod((filetime - WIN32_EPOCH), 10000000L)[0]
54
def binary_to_char(c): return "%02X" % ord(c)
55
def binary_to_str(d): return string.join(map(binary_to_char, list(d)), "")
57
class MSIECookieJar(CookieJar):
59
This class differs from CookieJar only in the format it uses to load cookies
62
MSIECookieJar can read the cookie files of Microsoft Internet Explorer
63
(MSIE) for Windows version 5 on Windows NT and version 6 on Windows XP and
64
Windows 98. Other configurations may also work, but are untested. Saving
65
cookies in MSIE format is NOT supported. If you save cookies, they'll be
66
in the usual Set-Cookie3 format, which you can read back in using an
67
instance of the plain old CookieJar class. Don't save using the same
68
filename that you loaded cookies from, because you may succeed in
69
clobbering your MSIE cookies index file!
71
You should be able to have LWP share Internet Explorer's cookies like
72
this (note you need to supply a username to load_from_registry if you're on
73
Windows 9x or Windows ME):
75
cj = MSIECookieJar(delayload=1)
76
# find cookies index file in registry and load cookies from it
77
cj.load_from_registry()
78
opener = ClientCookie.build_opener(ClientCookie.HTTPCookieProcessor(cj))
79
response = opener.open("http://example.com/")
81
Iterating over a delayloaded MSIECookieJar instance will not cause any
82
cookies to be read from disk. To force reading of all cookies from disk,
83
call read_all_cookies. Note that the following methods iterate over self:
84
clear_temporary_cookies, clear_expired_cookies, __len__, __repr__, __str__
89
load_from_registry(ignore_discard=False, ignore_expires=False,
91
load_cookie_data(filename, ignore_discard=False, ignore_expires=False)
95
magic_re = re.compile(r"Client UrlCache MMF Ver \d\.\d.*")
96
padding = "\x0d\xf0\xad\x0b"
98
msie_domain_re = re.compile(r"^([^/]+)(/.*)$")
99
cookie_re = re.compile("Cookie\:.+\@([\x21-\xFF]+).*?"
100
"(.+\@[\x21-\xFF]+\.txt)")
102
# path under HKEY_CURRENT_USER from which to get location of index.dat
103
reg_path = r"software\microsoft\windows" \
104
r"\currentversion\explorer\shell folders"
107
def __init__(self, *args, **kwds):
108
apply(CookieJar.__init__, (self,)+args, kwds)
109
self._delayload_domains = {}
111
def set_cookie(self, cookie):
113
self._delayload_domain(cookie.domain)
114
CookieJar.set_cookie(self, cookie)
116
def _cookies_for_domain(self, domain, request, unverifiable):
117
debug("Checking %s for cookies to return", domain)
118
if not self.policy.domain_return_ok(domain, request, unverifiable):
122
self._delayload_domain(domain)
124
return CookieJar._cookies_for_domain(
125
self, domain, request, unverifiable)
127
def read_all_cookies(self):
128
"""Eagerly read in all cookies."""
130
for domain in self._delayload_domains.keys():
131
self._delayload_domain(domain)
133
def _delayload_domain(self, domain):
134
# if necessary, lazily load cookies for this domain
135
delayload_info = self._delayload_domains.get(domain)
136
if delayload_info is not None:
137
cookie_file, ignore_discard, ignore_expires = delayload_info
139
self.load_cookie_data(cookie_file,
140
ignore_discard, ignore_expires)
142
debug("error reading cookie file, skipping: %s", cookie_file)
144
del self._delayload_domains[domain]
146
def _load_cookies_from_file(self, filename):
149
cookies_fh = open(filename)
153
key = cookies_fh.readline()
156
rl = cookies_fh.readline
157
def getlong(rl=rl): return long(rl().rstrip())
158
def getstr(rl=rl): return rl().rstrip()
162
domain_path = getstr()
163
flags = getlong() # 0x2000 bit is for secure I think
164
lo_expire = getlong()
165
hi_expire = getlong()
166
lo_create = getlong()
167
hi_create = getlong()
170
if "" in (key, value, domain_path, flags, hi_expire, lo_expire,
171
hi_create, lo_create, sep) or (sep != "*"):
174
m = self.msie_domain_re.search(domain_path)
179
cookies.append({"KEY": key, "VALUE": value, "DOMAIN": domain,
180
"PATH": path, "FLAGS": flags, "HIXP": hi_expire,
181
"LOXP": lo_expire, "HICREATE": hi_create,
182
"LOCREATE": lo_create})
188
def load_cookie_data(self, filename,
189
ignore_discard=False, ignore_expires=False):
190
"""Load cookies from file containing actual cookie data.
192
Old cookies are kept unless overwritten by newly loaded ones.
194
You should not call this method if the delayload attribute is set.
196
I think each of these files contain all cookies for one user, domain,
199
filename: file containing cookies -- usually found in a file like
200
C:\WINNT\Profiles\joe\Cookies\joe@blah[1].txt
203
now = int(time.time())
205
cookie_data = self._load_cookies_from_file(filename)
207
for cookie in cookie_data:
208
flags = cookie["FLAGS"]
209
secure = ((flags & 0x2000) != 0)
210
filetime = (cookie["HIXP"] << 32) + cookie["LOXP"]
211
expires = epoch_time_offset_from_win32_filetime(filetime)
216
domain = cookie["DOMAIN"]
217
initial_dot = startswith(domain, ".")
219
domain_specified = True
221
# MSIE 5 does not record whether the domain cookie-attribute
223
# Assuming it wasn't is conservative, because with strict
224
# domain matching this will match less frequently; with regular
225
# Netscape tail-matching, this will match at exactly the same
226
# times that domain_specified = True would. It also means we
227
# don't have to prepend a dot to achieve consistency with our
228
# own & Mozilla's domain-munging scheme.
229
domain_specified = False
231
# assume path_specified is false
232
# XXX is there other stuff in here? -- eg. comment, commentURL?
234
cookie["KEY"], cookie["VALUE"],
236
domain, domain_specified, initial_dot,
237
cookie["PATH"], False,
244
if not ignore_discard and c.discard:
246
if not ignore_expires and c.is_expired(now):
248
CookieJar.set_cookie(self, c)
250
def load_from_registry(self, ignore_discard=False, ignore_expires=False,
253
username: only required on win9x
256
cookies_dir = regload(self.reg_path, self.reg_key)
257
filename = os.path.normpath(os.path.join(cookies_dir, "INDEX.DAT"))
258
self.load(filename, ignore_discard, ignore_expires, username)
260
def load(self, filename, ignore_discard=False, ignore_expires=False,
262
"""Load cookies from an MSIE 'index.dat' cookies index file.
264
filename: full path to cookie index file
265
username: only required on win9x
269
if self.filename is not None: filename = self.filename
270
else: raise ValueError(MISSING_FILENAME_TEXT)
272
index = open(filename, "rb")
275
self._really_load(index, filename, ignore_discard, ignore_expires,
280
def _really_load(self, index, filename, ignore_discard, ignore_expires,
282
now = int(time.time())
285
username = string.lower(os.environ['USERNAME'])
287
cookie_dir = os.path.dirname(filename)
289
data = index.read(256)
291
raise IOError("%s file is too short" % filename)
293
# Cookies' index.dat file starts with 32 bytes of signature
294
# followed by an offset to the first record, stored as a little-
296
sig, size, data = data[:32], data[32:36], data[36:]
297
size = struct.unpack("<L", size)[0]
299
# check that sig is valid
300
if not self.magic_re.match(sig) or size != 0x4000:
301
raise IOError("%s ['%s' %s] does not seem to contain cookies" %
302
(str(filename), sig, size))
304
# skip to start of first record
307
sector = 128 # size of sector in bytes
312
# Cookies are usually in two contiguous sectors, so read in two
313
# sectors and adjust if not a Cookie.
315
d = index.read(to_read)
316
if len(d) != to_read:
320
# Each record starts with a 4-byte signature and a count
321
# (little-endian DWORD) of sectors for the record.
322
sig, size, data = data[:4], data[4:8], data[8:]
323
size = struct.unpack("<L", size)[0]
325
to_read = (size - 2) * sector
327
## from urllib import quote
328
## print "data", quote(data)
329
## print "sig", quote(sig)
330
## print "size in sectors", size
331
## print "size in bytes", size*sector
332
## print "size in units of 16 bytes", (size*sector) / 16
333
## print "size to read in bytes", to_read
337
assert (sig in ("HASH", "LEAK",
338
self.padding, "\x00\x00\x00\x00"),
339
"unrecognized MSIE index.dat record: %s" %
341
if sig == "\x00\x00\x00\x00":
342
# assume we've got all the cookies, and stop
344
if sig == self.padding:
346
# skip the rest of this record
350
index.seek(to_read, 1)
353
# read in rest of record if necessary
355
more_data = index.read(to_read)
356
if len(more_data) != to_read: break
357
data = data + more_data
359
cookie_re = ("Cookie\:%s\@([\x21-\xFF]+).*?" % username +
360
"(%s\@[\x21-\xFF]+\.txt)" % username)
361
m = re.search(cookie_re, data, re.I)
363
cookie_file = os.path.join(cookie_dir, m.group(2))
364
if not self.delayload:
366
self.load_cookie_data(cookie_file,
367
ignore_discard, ignore_expires)
369
debug("error reading cookie file, skipping: %s",
377
self._delayload_domains[domain] = (
378
cookie_file, ignore_discard, ignore_expires)