1
1
/****************************************************************************
3
3
** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies).
4
** All rights reserved.
4
5
** Contact: Nokia Corporation (qt-info@nokia.com)
6
7
** This file is part of the QtCore module of the Qt Toolkit.
8
9
** $QT_BEGIN_LICENSE:LGPL$
10
** Licensees holding valid Qt Commercial licenses may use this file in
11
** accordance with the Qt Commercial License Agreement provided with the
12
** Software or, alternatively, in accordance with the terms contained in
13
** a written agreement between you and Nokia.
10
** No Commercial Usage
11
** This file contains pre-release code and may not be distributed.
12
** You may use this file in accordance with the terms and conditions
13
** contained in the Technology Preview License Agreement accompanying
15
16
** GNU Lesser General Public License Usage
16
17
** Alternatively, this file may be used under the terms of the GNU Lesser
20
21
** ensure the GNU Lesser General Public License version 2.1 requirements
21
22
** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
23
** In addition, as a special exception, Nokia gives you certain
24
** additional rights. These rights are described in the Nokia Qt LGPL
25
** Exception version 1.0, included in the file LGPL_EXCEPTION.txt in this
28
** GNU General Public License Usage
29
** Alternatively, this file may be used under the terms of the GNU
30
** General Public License version 3.0 as published by the Free Software
31
** Foundation and appearing in the file LICENSE.GPL included in the
32
** packaging of this file. Please review the following information to
33
** ensure the GNU General Public License version 3.0 requirements will be
34
** met: http://www.gnu.org/copyleft/gpl.html.
36
** If you are unsure which license is appropriate for your use, please
37
** contact the sales department at http://www.qtsoftware.com/contact.
24
** In addition, as a special exception, Nokia gives you certain additional
25
** rights. These rights are described in the Nokia Qt LGPL Exception
26
** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
28
** If you have questions regarding the use of this file, please contact
29
** Nokia at qt-info@nokia.com.
38
38
** $QT_END_LICENSE$
40
40
****************************************************************************/
99
102
(QTextCodecFactoryInterface_iid, QLatin1String("/codecs")))
105
static char qtolower(register char c)
106
{ if (c >= 'A' && c <= 'Z') return c + 0x20; return c; }
107
static bool qisalnum(register char c)
108
{ return (c >= '0' && c <= '9') || ((c | 0x20) >= 'a' && (c | 0x20) <= 'z'); }
103
110
static bool nameMatch(const QByteArray &name, const QByteArray &test)
112
119
// if the letters and numbers are the same, we have a match
113
120
while (*n != '\0') {
114
if (isalnum((uchar)*n)) {
118
if (isalnum((uchar)*h))
122
if (tolower((uchar)*n) != tolower((uchar)*h))
129
if (qtolower(*n) != qtolower(*h))
128
while (*h && !isalnum((uchar)*h))
135
while (*h && !qisalnum(*h))
130
137
return (*h == '\0');
199
#ifdef Q_DEBUG_TEXTCODEC
190
200
destroying_is_ok = true;
193
delete all->takeFirst();
203
for (QList<QTextCodec *>::const_iterator it = all->constBegin()
204
; it != all->constEnd(); ++it) {
196
209
localeMapper = 0;
211
#ifdef Q_DEBUG_TEXTCODEC
198
212
destroying_is_ok = false;
201
216
Q_GLOBAL_STATIC(QTextCodecCleanup, createQTextCodecCleanup)
341
356
const char *next = 0;
343
358
while((next = CharNextExA(CP_ACP, mb, 0)) != mb) {
345
360
int charlength = next - mb;
346
361
int len = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED|MB_ERR_INVALID_CHARS, mb, charlength, wc, 2);
448
463
// static const char * const tcvnlocales[] = {
449
464
// "vi", "vi_VN", 0 };
451
static bool try_locale_list(const char * const locale[], const char * lang)
466
static bool try_locale_list(const char * const locale[], const QByteArray &lang)
454
for(i=0; locale[i] && *locale[i] && strcmp(locale[i], lang); i++)
469
for(i=0; locale[i] && lang != locale[i]; i++)
456
471
return locale[i] != 0;
505
520
#if !defined(Q_OS_WIN32) && !defined(Q_OS_WINCE)
506
static QTextCodec *checkForCodec(const char *name) {
521
static QTextCodec *checkForCodec(const QByteArray &name) {
507
522
QTextCodec *c = QTextCodec::codecForName(name);
509
const char *at = strchr(name, '@');
511
QByteArray n(name, at - name);
512
c = QTextCodec::codecForName(n.data());
524
const int index = name.indexOf('@');
526
c = QTextCodec::codecForName(name.left(index));
529
543
localeMapper = QTextCodec::codecForName("System");
532
#if defined (_XOPEN_UNIX) && !defined(Q_OS_QNX6) && !defined(Q_OS_OSF)
546
#if defined (_XOPEN_UNIX) && !defined(Q_OS_QNX) && !defined(Q_OS_OSF)
533
547
if (!localeMapper) {
534
548
char *charset = nl_langinfo (CODESET);
550
564
// definitely knows it, but since we cannot fully trust it, get ready
551
565
// to fall back to environment variables.
552
566
#if !defined(QT_NO_SETLOCALE)
553
char * ctype = qstrdup(setlocale(LC_CTYPE, 0));
567
const QByteArray ctype = setlocale(LC_CTYPE, 0);
555
char * ctype = qstrdup("");
569
const QByteArray ctype;
558
572
// Get the first nonempty value from $LC_ALL, $LC_CTYPE, and $LANG
559
573
// environment variables.
560
char * lang = qstrdup(qgetenv("LC_ALL").constData());
561
if (!lang || lang[0] == 0 || strcmp(lang, "C") == 0) {
562
if (lang) delete [] lang;
563
lang = qstrdup(qgetenv("LC_CTYPE").constData());
574
QByteArray lang = qgetenv("LC_ALL");
575
if (lang.isEmpty() || lang == "C") {
576
lang = qgetenv("LC_CTYPE");
565
if (!lang || lang[0] == 0 || strcmp(lang, "C") == 0) {
566
if (lang) delete [] lang;
567
lang = qstrdup(qgetenv("LANG").constData());
578
if (lang.isEmpty() || lang == "C") {
579
lang = qgetenv("LANG");
570
582
// Now try these in order:
577
589
// 7. guess locale from lang
579
591
// 1. CODESET from ctype if it contains a .CODESET part (e.g. en_US.ISO8859-15)
580
char * codeset = ctype ? strchr(ctype, '.') : 0;
581
if (codeset && *codeset == '.')
582
localeMapper = checkForCodec(codeset + 1);
592
int indexOfDot = ctype.indexOf('.');
593
if (indexOfDot != -1)
594
localeMapper = checkForCodec( ctype.mid(indexOfDot + 1) );
584
596
// 2. CODESET from lang if it contains a .CODESET part
585
codeset = lang ? strchr(lang, '.') : 0;
586
if (!localeMapper && codeset && *codeset == '.')
587
localeMapper = checkForCodec(codeset + 1);
598
indexOfDot = lang.indexOf('.');
599
if (indexOfDot != -1)
600
localeMapper = checkForCodec( lang.mid(indexOfDot + 1) );
589
603
// 3. ctype (maybe the locale is named "ISO-8859-1" or something)
590
if (!localeMapper && ctype && *ctype != 0 && strcmp (ctype, "C") != 0)
604
if (!localeMapper && !ctype.isEmpty() && ctype != "C")
591
605
localeMapper = checkForCodec(ctype);
593
607
// 4. locale (ditto)
594
if (!localeMapper && lang && *lang != 0)
608
if (!localeMapper && !lang.isEmpty())
595
609
localeMapper = checkForCodec(lang);
598
if ((!localeMapper && ctype && strstr(ctype, "@euro")) || (lang && strstr(lang, "@euro")))
612
if ((!localeMapper && ctype.contains("@euro")) || lang.contains("@euro"))
599
613
localeMapper = checkForCodec("ISO 8859-15");
601
615
// 6. guess locale from ctype unless ctype is "C"
602
616
// 7. guess locale from lang
603
char * try_by_name = ctype;
604
if (ctype && *ctype != 0 && strcmp (ctype, "C") != 0)
617
const QByteArray &try_by_name = (!ctype.isEmpty() && ctype != "C") ? lang : ctype;
607
619
// Now do the guessing.
608
if (lang && *lang && !localeMapper && try_by_name && *try_by_name) {
620
if (!lang.isEmpty() && !localeMapper && !try_by_name.isEmpty()) {
609
621
if (try_locale_list(iso8859_15locales, lang))
610
622
localeMapper = QTextCodec::codecForName("ISO 8859-15");
611
623
else if (try_locale_list(iso8859_2locales, lang))
728
741
setupLocaleMapper();
745
\enum QTextCodec::ConversionFlag
747
\value DefaultConversion No flag is set.
748
\value ConvertInvalidToNull If this flag is set, each invalid input
749
character is output as a null character.
750
\value IgnoreHeader Ignore any Unicode byte-order mark and don't generate any.
752
\omitvalue FreeFunction
756
\fn QTextCodec::ConverterState::ConverterState(ConversionFlags flags)
758
Constructs a ConverterState object initialized with the given \a flags.
762
Destroys the ConverterState object.
731
764
QTextCodec::ConverterState::~ConverterState()
733
766
if (flags & FreeFunction)
873
\enum QTextCodec::ConversionFlag
875
\value DefaultConversion No flag is set.
876
\value ConvertInvalidToNull If this flag is set, each invalid input
877
character is output as a null character.
878
\value IgnoreHeader Ignore any Unicode byte-order mark and don't generate any.
880
\omitvalue FreeFunction
884
\fn QTextCodec::ConverterState::ConverterState(ConversionFlags flags)
886
Constructs a ConverterState object initialized with the given \a flags.
890
\fn QTextCodec::ConverterState::~ConverterState()
892
Destroys the ConverterState object.
898
908
Constructs a QTextCodec, and gives it the highest precedence. The
1047
1059
This might be needed for some applications that want to use their
1048
1060
own mechanism for setting the locale.
1050
Setting this codec is not supported on DOS based Windows.
1052
1062
\sa codecForLocale()
1054
1064
void QTextCodec::setCodecForLocale(QTextCodec *c)
1057
if (QSysInfo::WindowsVersion& QSysInfo::WV_DOS_based)
1060
1066
localeMapper = c;
1061
1067
if (!localeMapper)
1062
1068
setupLocaleMapper();
1511
Tries to detect the encoding of the provided snippet of HTML in the given byte array, \a ba,
1512
and returns a QTextCodec instance that is capable of decoding the html to unicode.
1513
If the codec cannot be detected from the content provided, \a defaultCodec is returned.
1517
Tries to detect the encoding of the provided snippet of HTML in
1518
the given byte array, \a ba, by checking the BOM (Byte Order Mark)
1519
and the content-type meta header and returns a QTextCodec instance
1520
that is capable of decoding the html to unicode. If the codec
1521
cannot be detected from the content provided, \a defaultCodec is
1524
\sa codecForUtfText()
1515
1526
QTextCodec *QTextCodec::codecForHtml(const QByteArray &ba, QTextCodec *defaultCodec)
1519
1530
QTextCodec *c = 0;
1521
if (ba.size() > 1 && (((uchar)ba[0] == 0xfe && (uchar)ba[1] == 0xff)
1522
|| ((uchar)ba[0] == 0xff && (uchar)ba[1] == 0xfe))) {
1523
c = QTextCodec::codecForMib(1015); // utf16
1524
} else if (ba.size() > 2
1525
&& (uchar)ba[0] == 0xef
1526
&& (uchar)ba[1] == 0xbb
1527
&& (uchar)ba[2] == 0xbf) {
1528
c = QTextCodec::codecForMib(106); // utf-8
1532
c = QTextCodec::codecForUtfText(ba, c);
1530
1534
QByteArray header = ba.left(512).toLower();
1531
1535
if ((pos = header.indexOf("http-equiv=")) != -1) {
1532
pos = header.indexOf("charset=", pos) + int(strlen("charset="));
1534
int pos2 = header.indexOf('\"', pos+1);
1535
QByteArray cs = header.mid(pos, pos2-pos);
1536
// qDebug("found charset: %s", cs.data());
1537
c = QTextCodec::codecForName(cs);
1536
if ((pos = header.lastIndexOf("meta ", pos)) != -1) {
1537
pos = header.indexOf("charset=", pos) + int(strlen("charset="));
1539
int pos2 = header.indexOf('\"', pos+1);
1540
QByteArray cs = header.mid(pos, pos2-pos);
1541
// qDebug("found charset: %s", cs.data());
1542
c = QTextCodec::codecForName(cs);
1550
If the codec cannot be detected, this overload returns a Latin-1 QTextCodec.
1556
Tries to detect the encoding of the provided snippet of HTML in
1557
the given byte array, \a ba, by checking the BOM (Byte Order Mark)
1558
and the content-type meta header and returns a QTextCodec instance
1559
that is capable of decoding the html to unicode. If the codec cannot
1560
be detected, this overload returns a Latin-1 QTextCodec.
1552
1562
QTextCodec *QTextCodec::codecForHtml(const QByteArray &ba)
1554
1564
return codecForHtml(ba, QTextCodec::codecForMib(/*Latin 1*/ 4));
1570
Tries to detect the encoding of the provided snippet \a ba by
1571
using the BOM (Byte Order Mark) and returns a QTextCodec instance
1572
that is capable of decoding the text to unicode. If the codec
1573
cannot be detected from the content provided, \a defaultCodec is
1576
The behavior of this function is undefined if \a ba is not
1581
QTextCodec *QTextCodec::codecForUtfText(const QByteArray &ba, QTextCodec *defaultCodec)
1583
const int arraySize = ba.size();
1585
if (arraySize > 3) {
1586
if ((uchar)ba[0] == 0x00
1587
&& (uchar)ba[1] == 0x00
1588
&& (uchar)ba[2] == 0xFE
1589
&& (uchar)ba[3] == 0xFF)
1590
return QTextCodec::codecForMib(1018); // utf-32 be
1591
else if ((uchar)ba[0] == 0xFF
1592
&& (uchar)ba[1] == 0xFE
1593
&& (uchar)ba[2] == 0x00
1594
&& (uchar)ba[3] == 0x00)
1595
return QTextCodec::codecForMib(1019); // utf-32 le
1599
return defaultCodec;
1600
if ((uchar)ba[0] == 0xfe && (uchar)ba[1] == 0xff)
1601
return QTextCodec::codecForMib(1013); // utf16 be
1602
else if ((uchar)ba[0] == 0xff && (uchar)ba[1] == 0xfe)
1603
return QTextCodec::codecForMib(1014); // utf16 le
1606
return defaultCodec;
1607
if ((uchar)ba[0] == 0xef
1608
&& (uchar)ba[1] == 0xbb
1609
&& (uchar)ba[2] == 0xbf)
1610
return QTextCodec::codecForMib(106); // utf-8
1612
return defaultCodec;
1618
Tries to detect the encoding of the provided snippet \a ba by
1619
using the BOM (Byte Order Mark) and returns a QTextCodec instance
1620
that is capable of decoding the text to unicode. If the codec
1621
cannot be detected, this overload returns a Latin-1 QTextCodec.
1623
The behavior of this function is undefined if \a ba is not
1628
QTextCodec *QTextCodec::codecForUtfText(const QByteArray &ba)
1630
return codecForUtfText(ba, QTextCodec::codecForMib(/*Latin 1*/ 4));