48
static smb_iconv_t conv_handles[NUM_CHARSETS][NUM_CHARSETS];
49
48
static bool conv_silent; /* Should we do a debug if the conversion fails ? */
50
49
static bool initialized;
53
* Return the name of a charset to give to iconv().
55
static const char *charset_name(charset_t ch)
67
ret = lp_unix_charset();
70
ret = lp_dos_charset();
73
ret = lp_display_charset();
82
#if defined(HAVE_NL_LANGINFO) && defined(CODESET)
83
if (ret && !strcmp(ret, "LOCALE")) {
84
const char *ln = NULL;
87
setlocale(LC_ALL, "");
89
ln = nl_langinfo(CODESET);
91
/* Check whether the charset name is supported
93
smb_iconv_t handle = smb_iconv_open(ln,"UCS-2LE");
94
if (handle == (smb_iconv_t) -1) {
95
DEBUG(5,("Locale charset '%s' unsupported, using ASCII instead\n", ln));
98
DEBUG(5,("Substituting charset '%s' for LOCALE\n", ln));
99
smb_iconv_close(handle);
106
if (!ret || !*ret) ret = "ASCII";
110
51
void lazy_initialize_conv(void)
112
53
if (!initialized) {
54
load_case_tables_library();
115
56
initialized = true;
144
76
void init_iconv(void)
147
bool did_reload = False;
149
/* so that charset_name() works we need to get the UNIX<->UCS2 going
151
if (!conv_handles[CH_UNIX][CH_UTF16LE])
152
conv_handles[CH_UNIX][CH_UTF16LE] = smb_iconv_open(charset_name(CH_UTF16LE), "ASCII");
154
if (!conv_handles[CH_UTF16LE][CH_UNIX])
155
conv_handles[CH_UTF16LE][CH_UNIX] = smb_iconv_open("ASCII", charset_name(CH_UTF16LE));
157
for (c1=0;c1<NUM_CHARSETS;c1++) {
158
for (c2=0;c2<NUM_CHARSETS;c2++) {
159
const char *n1 = charset_name((charset_t)c1);
160
const char *n2 = charset_name((charset_t)c2);
161
if (conv_handles[c1][c2] &&
162
strcmp(n1, conv_handles[c1][c2]->from_name) == 0 &&
163
strcmp(n2, conv_handles[c1][c2]->to_name) == 0)
168
if (conv_handles[c1][c2])
169
smb_iconv_close(conv_handles[c1][c2]);
171
conv_handles[c1][c2] = smb_iconv_open(n2,n1);
172
if (conv_handles[c1][c2] == (smb_iconv_t)-1) {
173
DEBUG(0,("init_iconv: Conversion from %s to %s not supported\n",
174
charset_name((charset_t)c1), charset_name((charset_t)c2)));
175
if (c1 != CH_UTF16LE && c1 != CH_UTF16BE) {
178
if (c2 != CH_UTF16LE && c2 != CH_UTF16BE) {
181
DEBUG(0,("init_iconv: Attempting to replace with conversion from %s to %s\n",
183
conv_handles[c1][c2] = smb_iconv_open(n2,n1);
184
if (!conv_handles[c1][c2]) {
185
DEBUG(0,("init_iconv: Conversion from %s to %s failed", n1, n2));
186
smb_panic("init_iconv: conv_handle initialization failed");
193
/* XXX: Does this really get called every time the dos
194
* codepage changes? */
195
/* XXX: Is the did_reload test too strict? */
78
global_iconv_convenience = smb_iconv_convenience_reinit(NULL, lp_dos_charset(),
79
lp_unix_charset(), lp_display_charset(),
80
true, global_iconv_convenience);
223
104
const char* inbuf = (const char*)src;
224
105
char* outbuf = (char*)dest;
225
106
smb_iconv_t descriptor;
107
struct smb_iconv_convenience *ic;
227
109
lazy_initialize_conv();
229
descriptor = conv_handles[from][to];
110
ic = get_iconv_convenience();
111
descriptor = get_conv_handle(ic, from, to);
231
113
if (srclen == (size_t)-1) {
232
114
if (from == CH_UTF16LE || from == CH_UTF16BE) {
264
146
if (!conv_silent) {
265
147
if (from == CH_UNIX) {
266
148
DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u - '%s'\n",
267
charset_name(from), charset_name(to),
149
charset_name(ic, from), charset_name(ic, to),
268
150
(unsigned int)srclen, (unsigned int)destlen, (const char *)src));
270
152
DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u\n",
271
charset_name(from), charset_name(to),
153
charset_name(ic, from), charset_name(ic, to),
272
154
(unsigned int)srclen, (unsigned int)destlen));
1821
Return the unicode codepoint for the next multi-byte CH_UNIX character
1822
in the string. The unicode codepoint (codepoint_t) is an unsinged 32 bit value.
1824
Also return the number of bytes consumed (which tells the caller
1825
how many bytes to skip to get to the next CH_UNIX character).
1827
Return INVALID_CODEPOINT if the next character cannot be converted.
1830
codepoint_t next_codepoint(const char *str, size_t *size)
1832
/* It cannot occupy more than 4 bytes in UTF16 format */
1834
smb_iconv_t descriptor;
1840
if ((str[0] & 0x80) == 0) {
1842
return (codepoint_t)str[0];
1845
/* We assume that no multi-byte character can take
1846
more than 5 bytes. This is OK as we only
1847
support codepoints up to 1M */
1849
ilen_orig = strnlen(str, 5);
1852
lazy_initialize_conv();
1854
descriptor = conv_handles[CH_UNIX][CH_UTF16LE];
1855
if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
1857
return INVALID_CODEPOINT;
1860
/* This looks a little strange, but it is needed to cope
1861
with codepoints above 64k which are encoded as per RFC2781. */
1863
outbuf = (char *)buf;
1864
smb_iconv(descriptor, &str, &ilen, &outbuf, &olen);
1866
/* We failed to convert to a 2 byte character.
1867
See if we can convert to a 4 UTF16-LE byte char encoding.
1870
outbuf = (char *)buf;
1871
smb_iconv(descriptor, &str, &ilen, &outbuf, &olen);
1873
/* We didn't convert any bytes */
1875
return INVALID_CODEPOINT;
1882
*size = ilen_orig - ilen;
1885
/* 2 byte, UTF16-LE encoded value. */
1886
return (codepoint_t)SVAL(buf, 0);
1889
/* Decode a 4 byte UTF16-LE character manually.
1890
See RFC2871 for the encoding machanism.
1892
codepoint_t w1 = SVAL(buf,0) & ~0xD800;
1893
codepoint_t w2 = SVAL(buf,2) & ~0xDC00;
1895
return (codepoint_t)0x10000 +
1899
/* no other length is valid */
1900
return INVALID_CODEPOINT;
1904
push a single codepoint into a CH_UNIX string the target string must
1905
be able to hold the full character, which is guaranteed if it is at
1906
least 5 bytes in size. The caller may pass less than 5 bytes if they
1907
are sure the character will fit (for example, you can assume that
1908
uppercase/lowercase of a character will not add more than 1 byte)
1910
return the number of bytes occupied by the CH_UNIX character, or
1913
_PUBLIC_ ssize_t push_codepoint(char *str, codepoint_t c)
1915
smb_iconv_t descriptor;
1925
lazy_initialize_conv();
1927
descriptor = conv_handles[CH_UNIX][CH_UTF16LE];
1928
if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
1935
inbuf = (char *)buf;
1937
smb_iconv(descriptor, &inbuf, &ilen, &str, &olen);
1946
buf[0] = (c>>10) & 0xFF;
1947
buf[1] = (c>>18) | 0xd8;
1949
buf[3] = ((c>>8) & 0x3) | 0xdc;
1953
inbuf = (char *)buf;
1955
smb_iconv(descriptor, &inbuf, &ilen, &str, &olen);