1
// MeCab -- Yet Another Part-of-Speech and Morphological Analyzer
4
// Copyright(C) 2001-2006 Taku Kudo <taku@chasen.org>
5
// Copyright(C) 2004-2006 Nippon Telegraph and Telephone Corporation
7
/* ----------------------------------------------------------------- */
8
/* The Japanese TTS System "Open JTalk" */
9
/* developed by HTS Working Group */
10
/* http://open-jtalk.sourceforge.net/ */
11
/* ----------------------------------------------------------------- */
13
/* Copyright (c) 2008-2011 Nagoya Institute of Technology */
14
/* Department of Computer Science */
16
/* All rights reserved. */
18
/* Redistribution and use in source and binary forms, with or */
19
/* without modification, are permitted provided that the following */
20
/* conditions are met: */
22
/* - Redistributions of source code must retain the above copyright */
23
/* notice, this list of conditions and the following disclaimer. */
24
/* - Redistributions in binary form must reproduce the above */
25
/* copyright notice, this list of conditions and the following */
26
/* disclaimer in the documentation and/or other materials provided */
27
/* with the distribution. */
28
/* - Neither the name of the HTS working group nor the names of its */
29
/* contributors may be used to endorse or promote products derived */
30
/* from this software without specific prior written permission. */
32
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
33
/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
34
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
35
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
36
/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */
37
/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, */
38
/* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED */
39
/* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, */
40
/* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON */
41
/* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, */
42
/* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY */
43
/* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
44
/* POSSIBILITY OF SUCH DAMAGE. */
45
/* ----------------------------------------------------------------- */
53
#include "scoped_ptr.h"
59
#include "iconv_utils.h"
60
#include "char_property.h"
62
#ifdef HAVE_WINDOWS_H /* for Open JTalk */
69
const char * decode_charset_iconv(const char *str) {
70
const int charset = MeCab::decode_charset(str);
85
std::cerr << "charset " << str
86
<< " is not defined, use " MECAB_DEFAULT_CHARSET;
87
return MECAB_DEFAULT_CHARSET;
89
return MECAB_DEFAULT_CHARSET;
93
#ifdef HAVE_WINDOWS_H /* for Open JTalk */
94
DWORD decode_charset_win32(const char *str) {
95
const int charset = MeCab::decode_charset(str);
111
std::cerr << "charset " << str
112
<< " is not defined, use 'CP_THREAD_ACP'";
113
return CP_THREAD_ACP;
121
bool Iconv::open(const char* from, const char* to) {
123
#if defined HAVE_ICONV
124
const char *from2 = decode_charset_iconv(from);
125
const char *to2 = decode_charset_iconv(to);
126
if (std::strcmp(from2, to2) == 0) {
130
ic_ = iconv_open(to2, from2);
131
if (ic_ == (iconv_t)(-1)) {
136
#ifdef HAVE_WINDOWS_H /* for Open JTalk */
137
from_cp_ = decode_charset_win32(from);
138
to_cp_ = decode_charset_win32(to);
139
if (from_cp_ == to_cp_) {
144
std::cerr << "iconv_open is not supported" << std::endl;
151
bool Iconv::convert(std::string *str) {
159
#if defined HAVE_ICONV /* for Open JTalk */
166
char *ibuf = const_cast<char *>(str->data());
167
char *obuf_org = const_cast<char *>(tmp.data());
168
char *obuf = obuf_org;
169
std::fill(obuf, obuf + olen, 0);
170
size_t olen_org = olen;
171
iconv(ic_, 0, &ilen, 0, &olen); // reset iconv state
173
if (iconv(ic_, (ICONV_CONST char **)&ibuf, &ilen, &obuf, &olen)
178
str->assign(obuf_org, olen_org - olen);
180
#ifdef HAVE_WINDOWS_H /* for Open JTalk */
181
// covert it to wide character first
182
const size_t wide_len = ::MultiByteToWideChar(from_cp_, 0,
189
scoped_array<wchar_t> wide_str(new wchar_t[wide_len + 1]);
191
if (!wide_str.get()) {
195
if (::MultiByteToWideChar(from_cp_, 0, str->c_str(), -1,
196
wide_str.get(), wide_len + 1) == 0) {
200
if (to_cp_ == 1200 || to_cp_ == 1201) {
201
str->resize(2 * wide_len);
202
memcpy(const_cast<char *>(str->data()),
203
reinterpret_cast<char *>(wide_str.get()), wide_len * 2);
204
if (to_cp_ == 1201) {
205
char *buf = const_cast<char *>(str->data());
206
for (size_t i = 0; i < 2 * wide_len; i += 2) {
207
std::swap(buf[i], buf[i+1]);
213
const size_t output_len = ::WideCharToMultiByte(to_cp_, 0,
216
NULL, 0, NULL, NULL);
218
if (output_len == 0) {
222
scoped_array<char> encoded(new char[output_len + 1]);
223
if (::WideCharToMultiByte(to_cp_, 0, wide_str.get(), wide_len,
224
encoded.get(), output_len + 1,
229
str->assign(encoded.get());
237
Iconv::Iconv() : ic_(0) {}
240
#if defined HAVE_ICONV
241
if (ic_ != 0) iconv_close(ic_);