7
// UTF-16 is the primary encoding mechanism used by Microsoft Windows 2000, Windows 2000 Server, Windows XP and Windows 2003 Server.
8
// Unicode Byte Order Mark (BOM)
9
enum {UNICODE_UTF32_BE = 0x0000FEFF };
10
enum {UNICODE_UTF32_LE = 0xFFFE0000 };
11
enum {UNICODE_UTF16_BE = 0xFEFF };
12
enum {UNICODE_UTF16_LE = 0xFFFE };
13
enum {UNICODE_UTF8 = 0xEFBBBF };
15
const BYTE UTF32_BE[] = {0x04 /*size*/, 0x00, 0x00, 0xFE, 0xFF };
16
const BYTE UTF32_LE[] = {0x04 /*size*/, 0xFF, 0xFE, 0x00, 0x00 };
17
const BYTE UTF16_BE[] = {0x02 /*size*/, 0xFE, 0xFF };
18
const BYTE UTF16_LE[] = {0x02 /*size*/, 0xFF, 0xFE };
19
const BYTE UTF8[] = {0x03 /*size*/, 0xEF, 0xBB, 0xBF };
21
enum {UNICODE_BOM = 0xfeff};
23
// UTF-16 is the default encoding form of the Unicode Standard
24
// On Linux and Mac OS X, wchar_t is 4 bytes!
25
// On windows wchar_t is 2 bytes!
28
inline TCHAR ConvertAnsiCharToTCHAR(ANSICHAR In)
31
const t_UTF8 *source_start = &In;
32
const t_UTF8 *source_end = source_start + 1;
33
t_UTF16* target_start = reinterpret_cast<t_UTF16*>(&output);
34
t_UTF16* target_end = target_start + sizeof(wchar_t);
36
ConversionResult res = ConvertUTF8toUTF16(&source_start, source_end, &target_start, target_end, lenientConversion);
37
if (res != conversionOK)
44
inline ANSICHAR ConvertTCHARToAnsiChar(TCHAR In)
47
const t_UTF16 *source_start = &In;
48
const t_UTF16 *source_end = source_start + 1;
49
t_UTF8* target_start = reinterpret_cast<t_UTF8*>(&output);
50
t_UTF8* target_end = target_start + sizeof(wchar_t);
52
ConversionResult res = ConvertUTF16toUTF8(&source_start, source_end, &target_start, target_end, lenientConversion);
53
if (res != conversionOK)
59
inline TCHAR ConvertUnicodeCharToTCHAR(UNICHAR In) {return In;}
60
inline UNICHAR ConvertTCHARToUnicodeChar(TCHAR In) {return In;}
62
inline TCHAR ConvertUnicodeCharToTCHAR(UNICHAR In)
65
const t_UTF16 *source_start = &In;
66
const t_UTF16 *source_end = source_start + 1;
67
t_UTF8* target_start = reinterpret_cast<t_UTF8*>(&output);
68
t_UTF8* target_end = target_start + sizeof(wchar_t);
70
ConversionResult res = ConvertUTF16toUTF8(&source_start, source_end, &target_start, target_end, lenientConversion);
71
if (res != conversionOK)
78
inline UNICHAR ConvertTCHARToUnicodeChar(TCHAR In)
81
const t_UTF8 *source_start = reinterpret_cast<const t_UTF8*>(&In);
82
const t_UTF8 *source_end = source_start + 1;
83
t_UTF16* target_start = reinterpret_cast<t_UTF16*>(&output);
84
t_UTF16* target_end = target_start + sizeof(wchar_t);
86
ConversionResult res = ConvertUTF8toUTF16(&source_start, source_end, &target_start, target_end, lenientConversion);
87
if (res != conversionOK)
94
inline TCHAR ConvertAnsiCharToTCHAR(ANSICHAR In) {return In;}
95
inline ANSICHAR ConvertTCHARToAnsiChar(TCHAR In) {return In;}
99
Convert a single UNICHAR to ANSICHAR.
101
inline ANSICHAR ConvertUnicodeCharToAnsiChar(UNICHAR In)
104
const t_UTF16 *source_start = &In;
105
const t_UTF16 *source_end = source_start + 1;
106
t_UTF8* target_start = reinterpret_cast<t_UTF8*>(&output);
107
t_UTF8* target_end = target_start + sizeof(wchar_t);
109
ConversionResult res = ConvertUTF16toUTF8(&source_start, source_end, &target_start, target_end, lenientConversion);
110
if (res != conversionOK)
118
Convert a single ANSICHAR to UNICHAR.
120
inline UNICHAR ConvertAnsiCharToUnicodeChar(ANSICHAR In)
123
const t_UTF8 *source_start = reinterpret_cast<const t_UTF8*>(&In);
124
const t_UTF8 *source_end = source_start + 1;
125
t_UTF16* target_start = reinterpret_cast<t_UTF16*>(&output);
126
t_UTF16* target_end = target_start + sizeof(wchar_t);
128
ConversionResult res = ConvertUTF8toUTF16(&source_start, source_end, &target_start, target_end, lenientConversion);
129
if (res != conversionOK)
136
class UnicharToAnsicharConvertion
139
// Default to ANSI code page
140
UnicharToAnsicharConvertion() {}
143
Convert from UNICHAR to ANSICHAR
144
@param Source String to convert. Null terminated.
145
@return Return a pointer to the new string. Null terminated.
147
ANSICHAR* Convert(const UNICHAR* Source);
149
std::wstring utf16string(Source);
150
size_t utf16size = utf16string.length();
151
size_t utf8size = 6 * utf16size;
152
ANSICHAR *utf8string = new ANSICHAR[utf8size+1];
154
const t_UTF16 *source_start = utf16string.c_str();
155
const t_UTF16 *source_end = source_start + utf16size;
156
t_UTF8* target_start = reinterpret_cast<t_UTF8*>(utf8string);
157
t_UTF8* target_end = target_start + utf8size;
159
ConversionResult res = ConvertUTF16toUTF8(&source_start, source_end, &target_start, target_end, lenientConversion);
160
if (res != conversionOK)
165
// mark end of string
171
//! ANSICHAR to UNICHAR conversion
172
class AnsicharToUnicharConvertion
175
AnsicharToUnicharConvertion() {}
178
Convert from ANSICHAR to UNICHAR
179
@param Source String to convert. Null terminated.
180
@return Return a pointer to the new string. Null terminated.
182
UNICHAR* Convert(const ANSICHAR* Source);
185
//! TCHAR to ANSI conversion
186
// TCHAR can be ansi or unicode depending if UNICODE is defined or not.
187
class TCharToAnsiConvertion
190
INL_INLINE TCharToAnsiConvertion() {}
193
Convert from TCHAR to ANSICHAR
194
@param Source String to convert. Null terminated.
195
@return Return a pointer to the new string. Null terminated.
197
INL_INLINE ANSICHAR* Convert(const TCHAR* Source)
199
// Determine whether we need to allocate memory or not
201
UnicharToAnsicharConvertion convert;
202
return convert.Convert(Source);
204
size_t length = strlen(Source) + 1;
205
size_t size = length * sizeof(ANSICHAR);
206
ANSICHAR* Dest = new ANSICHAR[size];
207
STRNCPY_S(Dest, size, Source, length);
213
//! ANSI to TCHAR conversion
214
// TCHAR can be ansi or unicode depending if UNICODE is defined or not.
215
class AnsiToTCharConversion
218
INL_INLINE AnsiToTCharConversion() {}
221
Convert from ANSICHAR to TCHAR
222
@param Source String to convert. Null terminated.
223
@return Return a pointer to the new string. Null terminated.
225
INL_INLINE TCHAR* Convert(const ANSICHAR* Source)
228
AnsicharToUnicharConvertion convert;
229
return convert.Convert(Source);
231
size_t length = strlen(Source) + 1;
232
size_t size = length;
233
TCHAR* Dest = new TCHAR[size];
234
STRNCPY_S(Dest, size, Source, length);
241
Convert from one string format to another.
243
template<typename CONVERT_TO,typename CONVERT_FROM, typename BASE_CONVERTER, DWORD DefaultConversionSize = 128>
244
class NCharacterConversion: public BASE_CONVERTER
246
CONVERT_TO* ConvertedString;
248
// Hide the default constructor
249
NCharacterConversion();
253
Converts the data by using the Convert() method on the base class
255
explicit inline NCharacterConversion(const CONVERT_FROM* Source)
259
// Use base class' convert method
260
ConvertedString = BASE_CONVERTER::Convert(Source);
264
ConvertedString = NULL;
269
If memory was allocated, then it is freed below
271
inline ~NCharacterConversion()
273
if (ConvertedString != NULL)
275
delete [] ConvertedString;
279
// Operator to get access to the converted string
280
inline operator CONVERT_TO*(void) const
282
return ConvertedString;
286
// Conversion typedefs
287
// typedef NCharacterConversion<TCHAR, ANSICHAR, AnsiToTCharConversion> ANSI_To_TCHAR_Conversion;
288
// typedef NCharacterConversion<ANSICHAR, TCHAR, TCharToAnsiConvertion> TCHAR_To_ANSI_Conversion;
289
// typedef NCharacterConversion<ANSICHAR, UNICHAR, UnicharToAnsicharConvertion> UNICHAR_To_ANSICHAR_Conversion;
290
// typedef NCharacterConversion<UNICHAR, ANSICHAR, AnsicharToUnicharConvertion> ANSICHAR_To_UNICHAR_Conversion;