1
// Scintilla source code edit control
2
/** @file UniConversion.cxx
3
** Functions to handle UTF-8 and UTF-16 strings.
5
// Copyright 1998-2001 by Neil Hodgson <neilh@scintilla.org>
6
// The License.txt file describes the conditions under which this software may be distributed.
10
#include "UniConversion.h"
12
enum { SURROGATE_LEAD_FIRST = 0xD800 };
13
enum { SURROGATE_TRAIL_FIRST = 0xDC00 };
14
enum { SURROGATE_TRAIL_LAST = 0xDFFF };
16
unsigned int UTF8Length(const wchar_t *uptr, unsigned int tlen) {
18
for (unsigned int i = 0; i < tlen && uptr[i];) {
19
unsigned int uch = uptr[i];
22
} else if (uch < 0x800) {
24
} else if ((uch >= SURROGATE_LEAD_FIRST) &&
25
(uch <= SURROGATE_TRAIL_LAST)) {
36
void UTF8FromUTF16(const wchar_t *uptr, unsigned int tlen, char *putf, unsigned int len) {
38
for (unsigned int i = 0; i < tlen && uptr[i];) {
39
unsigned int uch = uptr[i];
41
putf[k++] = static_cast<char>(uch);
42
} else if (uch < 0x800) {
43
putf[k++] = static_cast<char>(0xC0 | (uch >> 6));
44
putf[k++] = static_cast<char>(0x80 | (uch & 0x3f));
45
} else if ((uch >= SURROGATE_LEAD_FIRST) &&
46
(uch <= SURROGATE_TRAIL_LAST)) {
47
// Half a surrogate pair
49
unsigned int xch = 0x10000 + ((uch & 0x3ff) << 10) + (uptr[i] & 0x3ff);
50
putf[k++] = static_cast<char>(0xF0 | (xch >> 18));
51
putf[k++] = static_cast<char>(0x80 | ((xch >> 12) & 0x3f));
52
putf[k++] = static_cast<char>(0x80 | ((xch >> 6) & 0x3f));
53
putf[k++] = static_cast<char>(0x80 | (xch & 0x3f));
55
putf[k++] = static_cast<char>(0xE0 | (uch >> 12));
56
putf[k++] = static_cast<char>(0x80 | ((uch >> 6) & 0x3f));
57
putf[k++] = static_cast<char>(0x80 | (uch & 0x3f));
64
unsigned int UTF8CharLength(unsigned char ch) {
67
} else if (ch < 0x80 + 0x40 + 0x20) {
69
} else if (ch < 0x80 + 0x40 + 0x20 + 0x10) {
76
unsigned int UTF16Length(const char *s, unsigned int len) {
77
unsigned int ulen = 0;
79
for (unsigned int i=0; i<len;) {
80
unsigned char ch = static_cast<unsigned char>(s[i]);
83
} else if (ch < 0x80 + 0x40 + 0x20) {
85
} else if (ch < 0x80 + 0x40 + 0x20 + 0x10) {
97
unsigned int UTF16FromUTF8(const char *s, unsigned int len, wchar_t *tbuf, unsigned int tlen) {
99
const unsigned char *us = reinterpret_cast<const unsigned char *>(s);
101
while ((i<len) && (ui<tlen)) {
102
unsigned char ch = us[i++];
105
} else if (ch < 0x80 + 0x40 + 0x20) {
106
tbuf[ui] = static_cast<wchar_t>((ch & 0x1F) << 6);
108
tbuf[ui] = static_cast<wchar_t>(tbuf[ui] + (ch & 0x7F));
109
} else if (ch < 0x80 + 0x40 + 0x20 + 0x10) {
110
tbuf[ui] = static_cast<wchar_t>((ch & 0xF) << 12);
112
tbuf[ui] = static_cast<wchar_t>(tbuf[ui] + ((ch & 0x7F) << 6));
114
tbuf[ui] = static_cast<wchar_t>(tbuf[ui] + (ch & 0x7F));
116
// Outside the BMP so need two surrogates
117
int val = (ch & 0x7) << 18;
119
val += (ch & 0x3F) << 12;
121
val += (ch & 0x3F) << 6;
124
tbuf[ui] = static_cast<wchar_t>(((val - 0x10000) >> 10) + SURROGATE_LEAD_FIRST);
126
tbuf[ui] = static_cast<wchar_t>((val & 0x3ff) + SURROGATE_TRAIL_FIRST);