1
"""Mozilla / Netscape cookie loading / saving.
3
Copyright 1997-1999 Gisle Aas (libwww-perl)
4
Copyright 2002-2003 Johnny Lee <typo_pl@hotmail.com> (MSIE Perl code)
5
Copyright 2002-2003 John J Lee <jjl@pobox.com> (The Python port)
7
This code is free software; you can redistribute it and/or modify it under
8
the terms of the BSD License (see the file COPYING included with the
13
import os, re, string, time, struct
17
from _ClientCookie import CookieJar, Cookie, MISSING_FILENAME_TEXT
18
from _Util import startswith
19
from _Debug import debug
27
def regload(path, leaf):
28
key = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER, path, 0, _winreg.KEY_ALL_ACCESS)
30
value = _winreg.QueryValueEx(key, leaf)[0]
35
WIN32_EPOCH = 0x019db1ded53e8000L # 1970 Jan 01 00:00:00 in Win32 FILETIME
37
def epoch_time_offset_from_win32_filetime(filetime):
38
"""Convert from win32 filetime to seconds-since-epoch value.
40
MSIE stores create and expire times as Win32 FILETIME, which is 64
41
bits of 100 nanosecond intervals since Jan 01 1601.
43
Cookies code expects time in 32-bit value expressed in seconds since
44
the epoch (Jan 01 1970).
47
if filetime < WIN32_EPOCH:
48
raise ValueError("filetime (%d) is before epoch (%d)" %
49
(filetime, WIN32_EPOCH))
51
return divmod((filetime - WIN32_EPOCH), 10000000L)[0]
53
def binary_to_char(c): return "%02X" % ord(c)
54
def binary_to_str(d): return string.join(map(binary_to_char, list(d)), "")
56
class MSIECookieJar(CookieJar):
58
This class differs from CookieJar only in the format it uses to load cookies
61
MSIECookieJar can read the cookie files of Microsoft Internet Explorer
62
(MSIE) for Windows, versions 5 and 6, on Windows NT and XP respectively.
63
Other configurations may also work, but are untested. Saving cookies in
64
MSIE format is NOT supported. If you save cookies, they'll be in the usual
65
Set-Cookie3 format, which you can read back in using an instance of the
66
plain old CookieJar class. Don't save using the same filename that you
67
loaded cookies from, because you may succeed in clobbering your MSIE
70
You should be able to have LWP share Internet Explorer's cookies like
71
this (note you need to supply a username to load_from_registry if you're on
74
cookies = MSIECookieJar(delayload=1)
75
# find cookies index file in registry and load cookies from it
76
cookies.load_from_registry()
77
opener = ClientCookie.build_opener(ClientCookie.HTTPHandler(cookies))
78
response = opener.open("http://foo.bar.com/")
80
Iterating over a delayloaded MSIECookieJar instance will not cause any
81
cookies to be read from disk. To force reading of all cookies from disk,
82
call read_all_cookies. Note that the following methods iterate over self:
83
clear_temporary_cookies, clear_expired_cookies, __len__, __repr__, __str__
88
load_from_registry(ignore_discard=False, ignore_expires=False,
90
load_cookie_data(filename, ignore_discard=False, ignore_expires=False)
94
magic_re = re.compile(r"Client UrlCache MMF Ver \d\.\d.*")
95
padding = "\x0d\xf0\xad\x0b"
97
msie_domain_re = re.compile(r"^([^/]+)(/.*)$")
98
cookie_re = re.compile("Cookie\:.+\@([\x21-\xFF]+).*?"
99
"(.+\@[\x21-\xFF]+\.txt)")
101
# path under HKEY_CURRENT_USER from which to get location of index.dat
102
reg_path = r"software\microsoft\windows" \
103
r"\currentversion\explorer\shell folders"
106
def __init__(self, *args, **kwargs):
107
apply(CookieJar.__init__, (self, args, kwargs))
108
self._delayload_domains = {}
110
def set_cookie(self, cookie):
112
self._delayload_domain(cookie.domain)
113
CookieJar.set_cookie(self, cookie)
115
def _cookies_for_domain(self, domain, request, unverifiable):
116
debug("Checking %s for cookies to return" % domain)
117
if not self.policy.domain_return_ok(domain, request, unverifiable):
121
self._delayload_domain(domain)
123
return CookieJar._cookies_for_domain(
124
self, domain, request, unverifiable)
126
def read_all_cookies(self):
127
"""Eagerly read in all cookies."""
129
for domain in self._delayload_domains.keys():
130
self._delayload_domain(domain)
132
def _delayload_domain(self, domain):
133
# if necessary, lazily load cookies for this domain
134
delayload_info = self._delayload_domains.get(domain)
135
if delayload_info is not None:
136
cookie_file, ignore_discard, ignore_expires = delayload_info
138
self.load_cookie_data(cookie_file,
139
ignore_discard, ignore_expires)
141
debug("error reading cookie file, skipping: %s" % cookie_file)
143
del self._delayload_domains[domain]
145
def _load_cookies_from_file(self, filename):
148
cookies_fh = open(filename)
152
key = cookies_fh.readline()
155
rl = cookies_fh.readline
156
def getlong(rl=rl): return long(rl().rstrip())
157
def getstr(rl=rl): return rl().rstrip()
161
domain_path = getstr()
162
flags = getlong() # 0x2000 bit is for secure I think
163
lo_expire = getlong()
164
hi_expire = getlong()
165
lo_create = getlong()
166
hi_create = getlong()
169
if "" in (key, value, domain_path, flags, hi_expire, lo_expire,
170
hi_create, lo_create, sep) or (sep != "*"):
173
m = self.msie_domain_re.search(domain_path)
178
cookies.append({"KEY": key, "VALUE": value, "DOMAIN": domain,
179
"PATH": path, "FLAGS": flags, "HIXP": hi_expire,
180
"LOXP": lo_expire, "HICREATE": hi_create,
181
"LOCREATE": lo_create})
187
def load_cookie_data(self, filename,
188
ignore_discard=False, ignore_expires=False):
189
"""Load cookies from file containing actual cookie data.
191
Old cookies are kept unless overwritten by newly loaded ones.
193
You should not call this method if the delayload attribute is set.
195
I think each of these files contain all cookies for one user, domain,
198
filename: file containing cookies -- usually found in a file like
199
C:\WINNT\Profiles\joe\Cookies\joe@blah[1].txt
202
now = int(time.time())
204
cookie_data = self._load_cookies_from_file(filename)
206
for cookie in cookie_data:
207
flags = cookie["FLAGS"]
208
secure = ((flags & 0x2000) != 0)
209
filetime = (cookie["HIXP"] << 32) + cookie["LOXP"]
210
expires = epoch_time_offset_from_win32_filetime(filetime)
215
domain = cookie["DOMAIN"]
216
initial_dot = startswith(domain, ".")
218
domain_specified = True
220
# MSIE 5 does not record whether the domain cookie-attribute
222
# Assuming it wasn't is conservative, because with strict
223
# domain matching this will match less frequently; with regular
224
# Netscape tail-matching, this will match at exactly the same
225
# times that domain_specified = True would. It also means we
226
# don't have to prepend a dot to achieve consistency with our
227
# own & Mozilla's domain-munging scheme.
228
domain_specified = False
230
# assume path_specified is false
231
# XXX is there other stuff in here? -- eg. comment, commentURL?
233
cookie["KEY"], cookie["VALUE"],
235
domain, domain_specified, initial_dot,
236
cookie["PATH"], False,
243
if not ignore_discard and c.discard:
245
if not ignore_expires and c.is_expired(now):
249
def load_from_registry(self, ignore_discard=False, ignore_expires=False,
252
username: only required on win9x
255
cookies_dir = regload(self.reg_path, self.reg_key)
256
filename = os.path.normpath(os.path.join(cookies_dir, "INDEX.DAT"))
257
self.load(filename, ignore_discard, ignore_expires, username)
259
def load(self, filename, ignore_discard=False, ignore_expires=False,
261
"""Load cookies from an MSIE 'index.dat' cookies index file.
263
filename: full path to cookie index file
264
username: only required on win9x
268
if self.filename is not None: filename = self.filename
269
else: raise ValueError(MISSING_FILENAME_TEXT)
271
index = open(filename, "rb")
274
self._really_load(index, filename, ignore_discard, ignore_expires,
279
def _really_load(self, index, filename, ignore_discard, ignore_expires,
281
now = int(time.time())
284
username = string.lower(os.environ['USERNAME'])
286
cookie_dir = os.path.dirname(filename)
288
data = index.read(256)
290
raise IOError("%s file is too short" % filename)
292
# Cookies' index.dat file starts with 32 bytes of signature
293
# followed by an offset to the first record, stored as a little-
295
sig, size, data = data[:32], data[32:36], data[36:]
296
size = struct.unpack("<L", size)[0]
298
# check that sig is valid
299
if not self.magic_re.match(sig) or size != 0x4000:
300
raise IOError("%s ['%s' %s] does not seem to contain cookies" %
301
(str(filename), sig, size))
303
# skip to start of first record
306
sector = 128 # size of sector in bytes
311
# Cookies are usually in two contiguous sectors, so read in two
312
# sectors and adjust if not a Cookie.
314
d = index.read(to_read)
315
if len(d) != to_read:
319
# Each record starts with a 4-byte signature and a count
320
# (little-endian DWORD) of sectors for the record.
321
sig, size, data = data[:4], data[4:8], data[8:]
322
size = struct.unpack("<L", size)[0]
324
to_read = (size - 2) * sector
326
## from urllib import quote
327
## print "data", quote(data)
328
## print "sig", quote(sig)
329
## print "size in sectors", size
330
## print "size in bytes", size*sector
331
## print "size in units of 16 bytes", (size*sector) / 16
332
## print "size to read in bytes", to_read
336
assert (sig in ("HASH", "LEAK",
337
self.padding, "\x00\x00\x00\x00"),
338
"unrecognized MSIE index.dat record: %s" %
340
if sig == "\x00\x00\x00\x00":
341
# assume we've got all the cookies, and stop
343
if sig == self.padding:
345
# skip the rest of this record
349
index.seek(to_read, 1)
352
# read in rest of record if necessary
354
more_data = index.read(to_read)
355
if len(more_data) != to_read: break
356
data = data + more_data
358
cookie_re = ("Cookie\:%s\@([\x21-\xFF]+).*?" % username +
359
"(%s\@[\x21-\xFF]+\.txt)" % username)
360
m = re.search(cookie_re, data, re.I)
362
cookie_file = os.path.join(cookie_dir, m.group(2))
363
if not self.delayload:
365
self.load_cookie_data(cookie_file,
366
ignore_discard, ignore_expires)
368
debug("error reading cookie file, skipping: %s" %
376
self._delayload_domains[domain] = (
377
cookie_file, ignore_discard, ignore_expires)