2
* Copyright (C) 2008 Jürg Billeter <j@bitron.ch>
3
* Copyright (C) 2008 Dominik Röttsches <dominik.roettsches@access-company.com>
4
* Copyright (C) 2010 Igalia S.L.
6
* This library is free software; you can redistribute it and/or
7
* modify it under the terms of the GNU Library General Public
8
* License as published by the Free Software Foundation; either
9
* version 2 of the License, or (at your option) any later version.
11
* This library is distributed in the hope that it will be useful,
12
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
* Library General Public License for more details.
16
* You should have received a copy of the GNU Library General Public License
17
* along with this library; see the file COPYING.LIB. If not, write to
18
* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19
* Boston, MA 02110-1301, USA.
24
#include "UnicodeGLib.h"
26
#include <wtf/Vector.h>
27
#include <wtf/unicode/UTF8.h>
29
#define UTF8_IS_SURROGATE(character) (character >= 0x10000 && character <= 0x10FFFF)
34
UChar32 foldCase(UChar32 ch)
36
GOwnPtr<GError> gerror;
38
GOwnPtr<char> utf8char;
39
utf8char.set(g_ucs4_to_utf8(reinterpret_cast<gunichar*>(&ch), 1, 0, 0, &gerror.outPtr()));
43
GOwnPtr<char> utf8caseFolded;
44
utf8caseFolded.set(g_utf8_casefold(utf8char.get(), -1));
46
GOwnPtr<gunichar> ucs4Result;
47
ucs4Result.set(g_utf8_to_ucs4_fast(utf8caseFolded.get(), -1, 0));
52
static int getUTF16LengthFromUTF8(const gchar* utf8String, int length)
55
const gchar* inputString = utf8String;
57
while ((utf8String + length - inputString > 0) && *inputString) {
58
gunichar character = g_utf8_get_char(inputString);
60
utf16Length += UTF8_IS_SURROGATE(character) ? 2 : 1;
61
inputString = g_utf8_next_char(inputString);
67
typedef gchar* (*UTF8CaseFunction)(const gchar*, gssize length);
69
static int convertCase(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error, UTF8CaseFunction caseFunction)
73
// Allocate a buffer big enough to hold all the characters.
74
Vector<char> buffer(srcLength * 3);
75
char* utf8Target = buffer.data();
76
const UChar* utf16Source = src;
77
ConversionResult conversionResult = convertUTF16ToUTF8(&utf16Source, utf16Source + srcLength, &utf8Target, utf8Target + buffer.size(), true);
78
if (conversionResult != conversionOK) {
82
buffer.shrink(utf8Target - buffer.data());
84
GOwnPtr<char> utf8Result(caseFunction(buffer.data(), buffer.size()));
85
long utf8ResultLength = strlen(utf8Result.get());
87
// Calculate the destination buffer size.
88
int realLength = getUTF16LengthFromUTF8(utf8Result.get(), utf8ResultLength);
89
if (realLength > resultLength) {
94
// Convert the result to UTF-16.
95
UChar* utf16Target = result;
96
const char* utf8Source = utf8Result.get();
97
bool unusedISAllASCII;
98
conversionResult = convertUTF8ToUTF16(&utf8Source, utf8Source + utf8ResultLength, &utf16Target, utf16Target + resultLength, &unusedIsAllASCII, true);
99
long utf16ResultLength = utf16Target - result;
100
if (conversionResult != conversionOK)
103
return utf16ResultLength <= 0 ? -1 : utf16ResultLength;
105
int foldCase(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error)
107
return convertCase(result, resultLength, src, srcLength, error, g_utf8_casefold);
110
int toLower(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error)
112
return convertCase(result, resultLength, src, srcLength, error, g_utf8_strdown);
115
int toUpper(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error)
117
return convertCase(result, resultLength, src, srcLength, error, g_utf8_strup);
120
Direction direction(UChar32 c)
122
PangoBidiType type = pango_bidi_type_for_unichar(c);
124
case PANGO_BIDI_TYPE_L:
126
case PANGO_BIDI_TYPE_R:
128
case PANGO_BIDI_TYPE_AL:
129
return RightToLeftArabic;
130
case PANGO_BIDI_TYPE_LRE:
131
return LeftToRightEmbedding;
132
case PANGO_BIDI_TYPE_RLE:
133
return RightToLeftEmbedding;
134
case PANGO_BIDI_TYPE_LRO:
135
return LeftToRightOverride;
136
case PANGO_BIDI_TYPE_RLO:
137
return RightToLeftOverride;
138
case PANGO_BIDI_TYPE_PDF:
139
return PopDirectionalFormat;
140
case PANGO_BIDI_TYPE_EN:
141
return EuropeanNumber;
142
case PANGO_BIDI_TYPE_AN:
144
case PANGO_BIDI_TYPE_ES:
145
return EuropeanNumberSeparator;
146
case PANGO_BIDI_TYPE_ET:
147
return EuropeanNumberTerminator;
148
case PANGO_BIDI_TYPE_CS:
149
return CommonNumberSeparator;
150
case PANGO_BIDI_TYPE_NSM:
151
return NonSpacingMark;
152
case PANGO_BIDI_TYPE_BN:
153
return BoundaryNeutral;
154
case PANGO_BIDI_TYPE_B:
155
return BlockSeparator;
156
case PANGO_BIDI_TYPE_S:
157
return SegmentSeparator;
158
case PANGO_BIDI_TYPE_WS:
159
return WhiteSpaceNeutral;
165
int umemcasecmp(const UChar* a, const UChar* b, int len)
170
utf8a.set(g_utf16_to_utf8(a, len, 0, 0, 0));
171
utf8b.set(g_utf16_to_utf8(b, len, 0, 0, 0));
173
GOwnPtr<char> foldedA;
174
GOwnPtr<char> foldedB;
176
foldedA.set(g_utf8_casefold(utf8a.get(), -1));
177
foldedB.set(g_utf8_casefold(utf8b.get(), -1));
179
// FIXME: umemcasecmp needs to mimic u_memcasecmp of icu
180
// from the ICU docs:
181
// "Compare two strings case-insensitively using full case folding.
182
// his is equivalent to u_strcmp(u_strFoldCase(s1, n, options), u_strFoldCase(s2, n, options))."
184
// So it looks like we don't need the full g_utf8_collate here,
185
// but really a bitwise comparison of casefolded unicode chars (not utf-8 bytes).
186
// As there is no direct equivalent to this icu function in GLib, for now
187
// we'll use g_utf8_collate():
189
return g_utf8_collate(foldedA.get(), foldedB.get());