2
* Copyright (C) 2009 Julien Chaffraix <jchaffraix@pleyo.com>
3
* Copyright (C) 2010, 2011, 2012 Research In Motion Limited. All rights reserved.
5
* Redistribution and use in source and binary forms, with or without
6
* modification, are permitted provided that the following conditions
8
* 1. Redistributions of source code must retain the above copyright
9
* notice, this list of conditions and the following disclaimer.
10
* 2. Redistributions in binary form must reproduce the above copyright
11
* notice, this list of conditions and the following disclaimer in the
12
* documentation and/or other materials provided with the distribution.
14
* THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
15
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
18
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
#include "CookieParser.h"
31
#include "ParsedCookie.h"
32
#include <network/DomainTools.h>
33
#include <wtf/CurrentTime.h>
34
#include <wtf/text/CString.h>
38
#define LOG_AND_DELETE(format, ...) \
40
LOG_ERROR(format, ## __VA_ARGS__); \
45
static inline bool isCookieHeaderSeparator(UChar c)
47
return (c == '\r' || c =='\n');
50
static inline bool isLightweightSpace(UChar c)
52
return (c == ' ' || c == '\t');
55
CookieParser::CookieParser(const KURL& defaultCookieURL)
56
: m_defaultCookieURL(defaultCookieURL)
58
m_defaultCookieHost = defaultCookieURL.host();
59
m_defaultDomainIsIPAddress = false;
60
string hostDomainCanonical = BlackBerry::Platform::getCanonicalIPFormat(m_defaultCookieHost.utf8().data()).c_str();
61
if (!hostDomainCanonical.empty()) {
62
m_defaultCookieHost = String(hostDomainCanonical.c_str());
63
m_defaultDomainIsIPAddress = true;
65
m_defaultCookieHost = m_defaultCookieHost.startsWith(".") ? m_defaultCookieHost : "." + m_defaultCookieHost;
68
CookieParser::~CookieParser()
72
Vector<ParsedCookie*> CookieParser::parse(const String& cookies)
74
unsigned cookieStart, cookieEnd = 0;
75
double curTime = currentTime();
76
Vector<ParsedCookie*, 4> parsedCookies;
78
unsigned cookiesLength = cookies.length();
79
if (!cookiesLength) // Code below doesn't handle this case
82
// Iterate over the header to parse all the cookies.
83
while (cookieEnd <= cookiesLength) {
84
cookieStart = cookieEnd;
86
// Find a cookie separator.
87
while (cookieEnd <= cookiesLength && !isCookieHeaderSeparator(cookies[cookieEnd]))
90
// Detect an empty cookie and go to the next one.
91
if (cookieStart == cookieEnd) {
96
if (cookieEnd < cookiesLength && isCookieHeaderSeparator(cookies[cookieEnd]))
99
ParsedCookie* cookie = parseOneCookie(cookies, cookieStart, cookieEnd - 1, curTime);
101
parsedCookies.append(cookie);
103
return parsedCookies;
106
ParsedCookie* CookieParser::parseOneCookie(const String& cookie)
108
return parseOneCookie(cookie, 0, cookie.length() - 1, currentTime());
111
// The cookie String passed into this method will only contian the name value pairs as well as other related cookie
112
// attributes such as max-age and domain. Set-Cookie should never be part of this string.
113
ParsedCookie* CookieParser::parseOneCookie(const String& cookie, unsigned start, unsigned end, double curTime)
115
ParsedCookie* res = new ParsedCookie(curTime);
118
LOG_AND_DELETE("Out of memory");
120
res->setProtocol(m_defaultCookieURL.protocol());
122
// Parse [NAME "="] VALUE
123
unsigned tokenEnd = start; // Token end contains the position of the '=' or the end of a token
124
unsigned pairEnd = start; // Pair end contains always the position of the ';'
126
// Find the first ';' which is not double-quoted and the '=' (if they exist).
127
bool foundEqual = false;
128
while (pairEnd < end && cookie[pairEnd] != ';') {
129
if (cookie[pairEnd] == '=') {
130
if (tokenEnd == start) {
134
} else if (cookie[pairEnd] == '"') {
135
size_t secondQuotePosition = cookie.find('"', pairEnd + 1);
136
if (secondQuotePosition != notFound && secondQuotePosition <= end) {
137
pairEnd = secondQuotePosition + 1;
144
unsigned tokenStart = start;
146
bool hasName = false; // This is a hack to avoid changing too much in this
147
// brutally brittle code.
148
if (tokenEnd != start) {
149
// There is a '=' so parse the NAME
150
unsigned nameEnd = tokenEnd;
152
// The tokenEnd is the position of the '=' so the nameEnd is one less
155
// Remove lightweight spaces.
156
while (nameEnd && isLightweightSpace(cookie[nameEnd]))
159
while (tokenStart < nameEnd && isLightweightSpace(cookie[tokenStart]))
162
if (nameEnd + 1 <= tokenStart)
163
LOG_AND_DELETE("Empty name. Rejecting the cookie");
165
String name = cookie.substring(tokenStart, nameEnd + 1 - start);
170
// Now parse the VALUE
171
tokenStart = tokenEnd + 1;
175
// Skip lightweight spaces in our token
176
while (tokenStart < pairEnd && isLightweightSpace(cookie[tokenStart]))
180
while (tokenEnd > tokenStart && isLightweightSpace(cookie[tokenEnd - 1]))
184
if (tokenEnd == tokenStart) {
185
// Firefox accepts empty value so we will do the same
188
value = cookie.substring(tokenStart, tokenEnd - tokenStart);
191
res->setValue(value);
192
else if (foundEqual) {
196
res->setName(value); // No NAME=VALUE, only NAME
198
while (pairEnd < end) {
199
// Switch to the next pair as pairEnd is on the ';' and fast-forward any lightweight spaces.
201
while (pairEnd < end && isLightweightSpace(cookie[pairEnd]))
204
tokenStart = pairEnd;
205
tokenEnd = tokenStart; // initialize token end to catch first '='
207
while (pairEnd < end && cookie[pairEnd] != ';') {
208
if (tokenEnd == tokenStart && cookie[pairEnd] == '=')
213
// FIXME : should we skip lightweight spaces here ?
215
unsigned length = tokenEnd - tokenStart;
216
unsigned tokenStartSvg = tokenStart;
219
if (tokenStart != tokenEnd) {
220
// There is an equal sign so remove lightweight spaces in VALUE
221
tokenStart = tokenEnd + 1;
222
while (tokenStart < pairEnd && isLightweightSpace(cookie[tokenStart]))
226
while (tokenEnd > tokenStart && isLightweightSpace(cookie[tokenEnd - 1]))
229
parsedValue = cookie.substring(tokenStart, tokenEnd - tokenStart);
231
// If the parsedValue is empty, initialise it in case we need it
232
parsedValue = String();
233
// Handle a token without value.
234
length = pairEnd - tokenStart;
237
// Detect which "cookie-av" is parsed
238
// Look at the first char then parse the whole for performance issue
239
switch (cookie[tokenStartSvg]) {
242
if (length >= 4 && cookie.find("ath", tokenStartSvg + 1, false)) {
243
// We need the path to be decoded to match those returned from KURL::path().
244
// The path attribute may or may not include percent-encoded characters. Fortunately
245
// if there are no percent-encoded characters, decoding the url is a no-op.
246
res->setPath(decodeURLEscapeSequences(parsedValue));
248
// We have to disable the following check because sites like Facebook and
249
// Gmail currently do not follow the spec.
251
// Check if path attribute is a prefix of the request URI.
252
if (!m_defaultCookieURL.path().startsWith(res->path()))
253
LOG_AND_DELETE("Invalid cookie %s (path): it does not math the URL", cookie.ascii().data());
257
LOG_AND_DELETE("Invalid cookie %s (path)", cookie.ascii().data());
263
if (length >= 6 && cookie.find("omain", tokenStartSvg + 1, false)) {
264
if (parsedValue.length() > 1 && parsedValue[0] == '"' && parsedValue[parsedValue.length() - 1] == '"')
265
parsedValue = parsedValue.substring(1, parsedValue.length() - 2);
267
// Check if the domain contains an embedded dot.
268
size_t dotPosition = parsedValue.find(".", 1);
269
if (dotPosition == notFound || dotPosition == parsedValue.length())
270
LOG_AND_DELETE("Invalid cookie %s (domain): it does not contain an embedded dot", cookie.ascii().data());
272
// If the domain does not start with a dot, add one for security checks,
273
// For example: ab.c.com dose not domain match b.c.com;
274
String realDomain = parsedValue[0] == '.' ? parsedValue : "." + parsedValue;
276
// Try to return an canonical ip address if the domain is an ip
278
bool isIPAddress = false;
279
// We only check if the current domain is an IP address when the default domain is an IP address
280
// We know if the default domain is not an IP address and the current domain is, it won't suffix match
281
// If it is an IP Address, we should treat it only if it matches the host exactly
282
// We determine the canonical IP format before comparing because IPv6 could be represented in multiple formats
283
if (m_defaultDomainIsIPAddress) {
284
String realDomainCanonical = String(BlackBerry::Platform::getCanonicalIPFormat(realDomain.utf8().data()).c_str());
285
if (realDomainCanonical.isEmpty() || realDomainCanonical != m_defaultCookieHost)
286
LOG_AND_DELETE("Invalid cookie %s (domain): domain is IP but does not match host's IP", cookie.ascii().data());
287
realDomain = realDomainCanonical;
290
// The request host should domain match the Domain attribute.
291
// Domain string starts with a dot, so a.b.com should domain match .a.b.com.
292
// add a "." at beginning of host name, because it can handle many cases such as
293
// a.b.com matches b.com, a.b.com matches .B.com and a.b.com matches .A.b.Com
295
// We also have to make a special case for IP addresses. If a website tries to set
296
// a cookie to 61.97, that domain is not an IP address and will end with the m_defaultCookieHost
297
if (!m_defaultCookieHost.endsWith(realDomain, false))
298
LOG_AND_DELETE("Invalid cookie %s (domain): it does not domain match the host", cookie.ascii().data());
299
// We should check for an embedded dot in the portion of string in the host not in the domain
300
// but to match firefox behaviour we do not.
302
// Check whether the domain is a top level domain, if it is throw it out
303
// http://publicsuffix.org/list/
304
if (BlackBerry::Platform::isTopLevelDomain(realDomain.utf8().data()))
305
LOG_AND_DELETE("Invalid cookie %s (domain): it did not pass the top level domain check", cookie.ascii().data());
307
res->setDomain(realDomain, isIPAddress);
309
LOG_AND_DELETE("Invalid cookie %s (domain)", cookie.ascii().data());
315
if (length >= 7 && cookie.find("xpires", tokenStartSvg + 1, false))
316
res->setExpiry(parsedValue);
318
LOG_AND_DELETE("Invalid cookie %s (expires)", cookie.ascii().data());
324
if (length >= 7 && cookie.find("ax-age", tokenStartSvg + 1, false))
325
res->setMaxAge(parsedValue);
327
LOG_AND_DELETE("Invalid cookie %s (max-age)", cookie.ascii().data());
333
if (length >= 7 && cookie.find("omment", tokenStartSvg + 1, false))
334
// We do not have room for the comment part (and so do Mozilla) so just log the comment.
335
LOG(Network, "Comment %s for ParsedCookie : %s\n", parsedValue.ascii().data(), cookie.ascii().data());
337
LOG_AND_DELETE("Invalid cookie %s (comment)", cookie.ascii().data());
343
if (length >= 7 && cookie.find("ersion", tokenStartSvg + 1, false)) {
344
// Although the out-of-dated Cookie Spec(RFC2965, http://tools.ietf.org/html/rfc2965) defined
345
// the value of version can only contain DIGIT, some random sites, e.g. https://devforums.apple.com
346
// would use double quotation marks to quote the digit. So we need to get rid of them for compliance.
347
if (parsedValue.length() > 1 && parsedValue[0] == '"' && parsedValue[parsedValue.length() - 1] == '"')
348
parsedValue = parsedValue.substring(1, parsedValue.length() - 2);
350
if (parsedValue.toInt() != 1)
351
LOG_AND_DELETE("ParsedCookie version %d not supported (only support version=1)", parsedValue.toInt());
353
LOG_AND_DELETE("Invalid cookie %s (version)", cookie.ascii().data());
359
// Secure is a standalone token ("Secure;")
360
if (length >= 6 && cookie.find("ecure", tokenStartSvg + 1, false))
361
res->setSecureFlag(true);
363
LOG_AND_DELETE("Invalid cookie %s (secure)", cookie.ascii().data());
368
// HttpOnly is a standalone token ("HttpOnly;")
369
if (length >= 8 && cookie.find("ttpOnly", tokenStartSvg + 1, false))
370
res->setIsHttpOnly(true);
372
LOG_AND_DELETE("Invalid cookie %s (HttpOnly)", cookie.ascii().data());
377
// If length == 0, we should be at the end of the cookie (case : ";\r") so ignore it
379
LOG_ERROR("Invalid token for cookie %s", cookie.ascii().data());
384
// Check if the cookie is valid with respect to the size limit.
385
if (!res->isUnderSizeLimit())
386
LOG_AND_DELETE("ParsedCookie %s is above the 4kb in length : REJECTED", cookie.ascii().data());
388
// If some pair was not provided, during parsing then apply some default value
389
// the rest has been done in the constructor.
391
// If no domain was provided, set it to the host
393
res->setDomain(m_defaultCookieHost, m_defaultDomainIsIPAddress);
395
// According to the Cookie Specificaiton (RFC6265, section 4.1.2.4 and 5.2.4, http://tools.ietf.org/html/rfc6265),
396
// If no path was provided or the first character of the path value is not '/', set it to the host's path
399
// 4.1.2.4. The Path Attribute
401
// The scope of each cookie is limited to a set of paths, controlled by
402
// the Path attribute. If the server omits the Path attribute, the user
403
// agent will use the "directory" of the request-uri's path component as
404
// the default value. (See Section 5.1.4 for more details.)
406
// 5.2.4. The Path Attribute
408
// If the attribute-name case-insensitively matches the string "Path",
409
// the user agent MUST process the cookie-av as follows.
411
// If the attribute-value is empty or if the first character of the
412
// attribute-value is not %x2F ("/"):
414
// Let cookie-path be the default-path.
418
// Let cookie-path be the attribute-value.
420
// Append an attribute to the cookie-attribute-list with an attribute-
421
// name of Path and an attribute-value of cookie-path.
422
if (!res->path() || !res->path().length() || !res->path().startsWith("/", false)) {
423
String path = m_defaultCookieURL.string().substring(m_defaultCookieURL.pathStart(), m_defaultCookieURL.pathAfterLastSlash() - m_defaultCookieURL.pathStart() - 1);
426
// Since this is reading the raw url string, it could contain percent-encoded sequences. We
427
// want it to be comparable to the return value of url.path(), which is not percent-encoded,
428
// so we must remove the escape sequences.
429
res->setPath(decodeURLEscapeSequences(path));
435
} // namespace WebCore