~ubuntu-branches/ubuntu/karmic/vmware-view-open-client/karmic

Viewing changes to lib/bora/unicode/ucnv2022.c

Committer: Bazaar Package Importer
Author(s): Daniel Baumann
Date: 2009-04-17 20:43:00 UTC
mfrom: (1.1.1 upstream)
Revision ID: james.westby@ubuntu.com-20090417204300-kfj5m144zva0gbgc

Tags: 3.1.0-160969+dfsg-1

* Merging upstream version 3.1.0-160969+dfsg.
* Upgrading package to standards 3.8.1.
* Adding build-depends to intltool.
* Adding build-depends to libicu.
* Rediffing doc-pdf.patch.

files added:
config/mkinstalldirs

cryptoki.cc

cryptoki.hh

doc/help

doc/help/integrated_help-de.txt

doc/help/integrated_help-en.txt

doc/help/integrated_help-ja.txt

extraTranslations.hh

helpDlg.cc

helpDlg.hh

icons/list_button_hover.h

icons/list_button_normal.h

icons/list_button_open.h

icudata

icudata/NamePrepProfile.txt

icudata/NormalizationCorrections.txt

icudata/README

intltool-extract.in

intltool-merge.in

intltool-update.in

lib/gtm

lib/gtm/COPYING

lib/gtm/Makefile.inc

lib/gtm/gtm.c

lib/gtm/include

lib/gtm/include/gtm.h

lib/libp11

lib/libp11/COPYING

lib/libp11/Makefile.inc

lib/libp11/include

lib/libp11/include/pkcs11.h

m4/glib-gettext.m4

m4/intltool.m4

m4/nls.m4

mkinstalldirs

po/ChangeLog

po/Makefile.in.in

po/POTFILES.in

po/de.po

po/ja.po

scCertDetailsDlg.cc

scCertDetailsDlg.hh

scCertDlg.cc

scCertDlg.hh

scInsertPromptDlg.cc

scInsertPromptDlg.hh

scPinDlg.cc

scPinDlg.hh

usb.cc

usb.hh

vmware-view-log-collector

vmware-view.desktop.in.in

files removed:
lib/bora/unicode

lib/bora/unicode/Makefile.inc

lib/bora/unicode/cmemory.c

lib/bora/unicode/cmemory.h

lib/bora/unicode/cstring.c

lib/bora/unicode/cstring.h

lib/bora/unicode/cwchar.c

lib/bora/unicode/cwchar.h

lib/bora/unicode/locmap.c

lib/bora/unicode/locmap.h

lib/bora/unicode/putil.c

lib/bora/unicode/putilimp.h

lib/bora/unicode/uarrsort.c

lib/bora/unicode/uarrsort.h

lib/bora/unicode/uassert.h

lib/bora/unicode/ucase.h

lib/bora/unicode/ucln.h

lib/bora/unicode/ucln_cmn.c

lib/bora/unicode/ucln_cmn.h

lib/bora/unicode/ucmndata.c

lib/bora/unicode/ucmndata.h

lib/bora/unicode/ucnv.c

lib/bora/unicode/ucnv2022.c

lib/bora/unicode/ucnv_bld.c

lib/bora/unicode/ucnv_bld.h

lib/bora/unicode/ucnv_cb.c

lib/bora/unicode/ucnv_cnv.c

lib/bora/unicode/ucnv_cnv.h

lib/bora/unicode/ucnv_err.c

lib/bora/unicode/ucnv_ext.c

lib/bora/unicode/ucnv_ext.h

lib/bora/unicode/ucnv_imp.h

lib/bora/unicode/ucnv_io.c

lib/bora/unicode/ucnv_io.h

lib/bora/unicode/ucnv_lmb.c

lib/bora/unicode/ucnv_u16.c

lib/bora/unicode/ucnv_u32.c

lib/bora/unicode/ucnv_u7.c

lib/bora/unicode/ucnv_u8.c

lib/bora/unicode/ucnvbocu.c

lib/bora/unicode/ucnvhz.c

lib/bora/unicode/ucnvisci.c

lib/bora/unicode/ucnvlat1.c

lib/bora/unicode/ucnvmbcs.c

lib/bora/unicode/ucnvmbcs.h

lib/bora/unicode/ucnvscsu.c

lib/bora/unicode/udata.c

lib/bora/unicode/udatamem.c

lib/bora/unicode/udatamem.h

lib/bora/unicode/udataswp.c

lib/bora/unicode/udataswp.h

lib/bora/unicode/uenum.c

lib/bora/unicode/uenumimp.h

lib/bora/unicode/uhash.c

lib/bora/unicode/uhash.h

lib/bora/unicode/uinvchar.c

lib/bora/unicode/uinvchar.h

lib/bora/unicode/umapfile.c

lib/bora/unicode/umapfile.h

lib/bora/unicode/umath.c

lib/bora/unicode/umutex.c

lib/bora/unicode/umutex.h

lib/bora/unicode/unicode

lib/bora/unicode/unicode/brkiter.h

lib/bora/unicode/unicode/caniter.h

lib/bora/unicode/unicode/chariter.h

lib/bora/unicode/unicode/dbbi.h

lib/bora/unicode/unicode/docmain.h

lib/bora/unicode/unicode/locid.h

lib/bora/unicode/unicode/normlzr.h

lib/bora/unicode/unicode/parseerr.h

lib/bora/unicode/unicode/parsepos.h

lib/bora/unicode/unicode/platform.h

lib/bora/unicode/unicode/ppalmos.h

lib/bora/unicode/unicode/putil.h

lib/bora/unicode/unicode/pwin32.h

lib/bora/unicode/unicode/rbbi.h

lib/bora/unicode/unicode/rep.h

lib/bora/unicode/unicode/resbund.h

lib/bora/unicode/unicode/schriter.h

lib/bora/unicode/unicode/strenum.h

lib/bora/unicode/unicode/symtable.h

lib/bora/unicode/unicode/ubidi.h

lib/bora/unicode/unicode/ubrk.h

lib/bora/unicode/unicode/ucasemap.h

lib/bora/unicode/unicode/ucat.h

lib/bora/unicode/unicode/uchar.h

lib/bora/unicode/unicode/uchriter.h

lib/bora/unicode/unicode/uclean.h

lib/bora/unicode/unicode/ucnv.h

lib/bora/unicode/unicode/ucnv_cb.h

lib/bora/unicode/unicode/ucnv_err.h

lib/bora/unicode/unicode/uconfig.h

lib/bora/unicode/unicode/udata.h

lib/bora/unicode/unicode/udeprctd.h

lib/bora/unicode/unicode/udraft.h

lib/bora/unicode/unicode/uenum.h

lib/bora/unicode/unicode/uidna.h

lib/bora/unicode/unicode/uintrnal.h

lib/bora/unicode/unicode/uiter.h

lib/bora/unicode/unicode/uloc.h

lib/bora/unicode/unicode/umachine.h

lib/bora/unicode/unicode/umisc.h

lib/bora/unicode/unicode/unifilt.h

lib/bora/unicode/unicode/unifunct.h

lib/bora/unicode/unicode/unimatch.h

lib/bora/unicode/unicode/uniset.h

lib/bora/unicode/unicode/unistr.h

lib/bora/unicode/unicode/unorm.h

lib/bora/unicode/unicode/uobject.h

lib/bora/unicode/unicode/uobslete.h

lib/bora/unicode/unicode/urename.h

lib/bora/unicode/unicode/urep.h

lib/bora/unicode/unicode/ures.h

lib/bora/unicode/unicode/uscript.h

lib/bora/unicode/unicode/uset.h

lib/bora/unicode/unicode/ushape.h

lib/bora/unicode/unicode/usprep.h

lib/bora/unicode/unicode/ustring.h

lib/bora/unicode/unicode/usystem.h

lib/bora/unicode/unicode/utext.h

lib/bora/unicode/unicode/utf.h

lib/bora/unicode/unicode/utf16.h

lib/bora/unicode/unicode/utf32.h

lib/bora/unicode/unicode/utf8.h

lib/bora/unicode/unicode/utf_old.h

lib/bora/unicode/unicode/utrace.h

lib/bora/unicode/unicode/utypes.h

lib/bora/unicode/unicode/uversion.h

lib/bora/unicode/unormimp.h

lib/bora/unicode/uset_imp.h

lib/bora/unicode/ustr_cnv.c

lib/bora/unicode/ustr_cnv.h

lib/bora/unicode/ustr_imp.h

lib/bora/unicode/ustrfmt.c

lib/bora/unicode/ustrfmt.h

lib/bora/unicode/ustring.c

lib/bora/unicode/utf_impl.c

lib/bora/unicode/utracimp.h

lib/bora/unicode/utrie.c

lib/bora/unicode/utrie.h

lib/bora/unicode/wintz.c

lib/bora/unicode/wintz.h

vmware-view.desktop.in

files modified:
Makefile.am

Makefile.in

Makefile.inc

README.txt

VMware-view-open-client.spec.in

aclocal.m4

app.cc

app.hh

broker.cc

broker.hh

brokerDlg.cc

brokerDlg.hh

brokerXml.cc

brokerXml.hh

buildNumber.h.in

configure

configure.ac

debian/changelog

debian/control

debian/patches/01-doc-pdf.patch

desktop.cc

desktop.hh

desktopSelectDlg.cc

desktopSelectDlg.hh

dlg.cc

dlg.hh

doc/Makefile.inc

doc/open_source_licenses.txt

doc/vmware-view-tunnel.1

doc/vmware-view.1

lib/bora/asyncsocket/asyncsocket.c

lib/bora/basicHttp/http.c

lib/bora/include/Makefile.inc

lib/bora/include/asyncsocket.h

lib/bora/include/basicHttp.h

lib/bora/include/sslFunctionList.h

lib/bora/misc/Makefile.inc

lib/open-vm-tools/file/filePosix.c

lib/open-vm-tools/misc/posixPosix.c

loginDlg.cc

main.cc

passwordDlg.cc

prefs.cc

prefs.hh

rdesktop.cc

rdesktop.hh

securIDDlg.cc

transitionDlg.cc

tunnel.cc

tunnel/stubs.c

tunnel/tunnelMain.c

util.cc

util.hh

Show diffs side-by-side

added added

removed removed

lib/bora/unicode/ucnv2022.c

/*********************************************************

* This file is part of VMware View Open Client.

*********************************************************/

**********************************************************************

* file name: ucnv2022.c

* encoding: US-ASCII

* tab size: 8 (not used)

* indentation:4

* created on: 2000feb03

* created by: Markus W. Scherer

* Change history:

* 06/29/2000 helena Major rewrite of the callback APIs.

* 08/08/2000 Ram Included support for ISO-2022-JP-2

* Changed implementation of toUnicode

* function

* 08/21/2000 Ram Added support for ISO-2022-KR

* 08/29/2000 Ram Seperated implementation of EBCDIC to

* ucnvebdc.c

* 09/20/2000 Ram Added support for ISO-2022-CN

* Added implementations for getNextUChar()

* for specific 2022 country variants.

* 10/31/2000 Ram Implemented offsets logic functions

#include "unicode/utypes.h"

#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION

#include "unicode/ucnv.h"

#include "unicode/uset.h"

#include "unicode/ucnv_err.h"

#include "unicode/ucnv_cb.h"

#include "ucnv_imp.h"

#include "ucnv_bld.h"

#include "ucnv_cnv.h"

#include "ucnvmbcs.h"

#include "cstring.h"

#include "cmemory.h"

#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))

#ifdef U_ENABLE_GENERIC_ISO_2022

* I am disabling the generic ISO-2022 converter after proposing to do so on

* the icu mailing list two days ago.

* Reasons:

* 1. It does not fully support the ISO-2022/ECMA-35 specification with all of

* its designation sequences, single shifts with return to the previous state,

* switch-with-no-return to UTF-16BE or similar, etc.

* This is unlike the language-specific variants like ISO-2022-JP which

* require a much smaller repertoire of ISO-2022 features.

* These variants continue to be supported.

* 2. I believe that no one is really using the generic ISO-2022 converter

* but rather always one of the language-specific variants.

* Note that ICU's generic ISO-2022 converter has always output one escape

* sequence followed by UTF-8 for the whole stream.

* 3. Switching between subcharsets is extremely slow, because each time

* the previous converter is closed and a new one opened,

* without any kind of caching, least-recently-used list, etc.

* 4. The code is currently buggy, and given the above it does not seem

* reasonable to spend the time on maintenance.

* 5. ISO-2022 subcharsets should normally be used with 7-bit byte encodings.

* This means, for example, that when ISO-8859-7 is designated, the following

* ISO-2022 bytes 00..7f should be interpreted as ISO-8859-7 bytes 80..ff.

* The ICU ISO-2022 converter does not handle this - and has no information

* about which subconverter would have to be shifted vs. which is designed

* for 7-bit ISO-2022.

* Markus Scherer 2003-dec-03

#endif

static const char SHIFT_IN_STR[] = "\x0F";

static const char SHIFT_OUT_STR[] = "\x0E";

#define CR 0x0D

#define LF 0x0A

#define H_TAB 0x09

#define V_TAB 0x0B

#define SPACE 0x20

enum {

HWKANA_START=0xff61,

HWKANA_END=0xff9f

};

* 94-character sets with native byte values A1..FE are encoded in ISO 2022

* as bytes 21..7E. (Subtract 0x80.)

100

* 96-character sets with native byte values A0..FF are encoded in ISO 2022

101

* as bytes 20..7F. (Subtract 0x80.)

102

* Do not encode C1 control codes with native bytes 80..9F

103

* as bytes 00..1F (C0 control codes).

104

105

enum {

106

GR94_START=0xa1,

107

GR94_END=0xfe,

108

GR96_START=0xa0,

109

GR96_END=0xff

110

};

111

112

113

* ISO 2022 control codes must not be converted from Unicode

114

* because they would mess up the byte stream.

115

* The bit mask 0x0800c000 has bits set at bit positions 0xe, 0xf, 0x1b

116

* corresponding to SO, SI, and ESC.

117

118

#define IS_2022_CONTROL(c) (((c)<0x20) && (((uint32_t)1<<(c))&0x0800c000)!=0)

119

120

/* for ISO-2022-JP and -CN implementations */

121

typedef enum {

122

/* shared values */

123

INVALID_STATE=-1,

124

ASCII = 0,

125

126

SS2_STATE=0x10,

127

SS3_STATE,

128

129

/* JP */

130

ISO8859_1 = 1 ,

131

ISO8859_7 = 2 ,

132

JISX201 = 3,

133

JISX208 = 4,

134

JISX212 = 5,

135

GB2312 =6,

136

KSC5601 =7,

137

HWKANA_7BIT=8, /* Halfwidth Katakana 7 bit */

138

139

/* CN */

140

/* the first few enum constants must keep their values because they correspond to myConverterArray[] */

141

GB2312_1=1,

142

ISO_IR_165=2,

143

CNS_11643=3,

144

145

146

* these are used in StateEnum and ISO2022State variables,

147

* but CNS_11643 must be used to index into myConverterArray[]

148

149

CNS_11643_0=0x20,

150

CNS_11643_1,

151

CNS_11643_2,

152

CNS_11643_3,

153

CNS_11643_4,

154

CNS_11643_5,

155

CNS_11643_6,

156

CNS_11643_7

157

} StateEnum;

158

159

/* is the StateEnum charset value for a DBCS charset? */

160

#define IS_JP_DBCS(cs) (JISX208<=(cs) && (cs)<=KSC5601)

161

162

#define CSM(cs) ((uint16_t)1<<(cs))

163

164

165

* Each of these charset masks (with index x) contains a bit for a charset in exact correspondence

166

* to whether that charset is used in the corresponding version x of ISO_2022,locale=ja,version=x

167

168

* Note: The converter uses some leniency:

169

* - The escape sequence ESC ( I for half-width 7-bit Katakana is recognized in

170

* all versions, not just JIS7 and JIS8.

171

* - ICU does not distinguish between different versions of JIS X 0208.

172

173

static const uint16_t jpCharsetMasks[5]={

174

CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT),

175

CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212),

176

177

178

179

};

180

181

typedef enum {

182

ASCII1=0,

183

LATIN1,

184

SBCS,

185

DBCS,

186

MBCS,

187

HWKANA

188

}Cnv2022Type;

189

190

typedef struct ISO2022State {

191

int8_t cs[4]; /* charset number for SI (G0)/SO (G1)/SS2 (G2)/SS3 (G3) */

192

int8_t g; /* 0..3 for G0..G3 (SI/SO/SS2/SS3) */

193

int8_t prevG; /* g before single shift (SS2 or SS3) */

194

} ISO2022State;

195

196

#define UCNV_OPTIONS_VERSION_MASK 0xf

197

#define UCNV_2022_MAX_CONVERTERS 10

198

199

typedef struct{

200

UConverterSharedData *myConverterArray[UCNV_2022_MAX_CONVERTERS];

201

UConverter *currentConverter;

202

Cnv2022Type currentType;

203

ISO2022State toU2022State, fromU2022State;

204

uint32_t key;

205

uint32_t version;

206

#ifdef U_ENABLE_GENERIC_ISO_2022

207

UBool isFirstBuffer;

208

#endif

209

char name[30];

210

char locale[3];

211

}UConverterDataISO2022;

212

213

/* Protos */

214

/* ISO-2022 ----------------------------------------------------------------- */

215

216

/*Forward declaration */

217

U_CFUNC void

218

ucnv_fromUnicode_UTF8(UConverterFromUnicodeArgs * args,

219

UErrorCode * err);

220

U_CFUNC void

221

ucnv_fromUnicode_UTF8_OFFSETS_LOGIC(UConverterFromUnicodeArgs * args,

222

UErrorCode * err);

223

224

#define ESC_2022 0x1B /*ESC*/

225

226

typedef enum

227

{

228

INVALID_2022 = -1, /*Doesn't correspond to a valid iso 2022 escape sequence*/

229

VALID_NON_TERMINAL_2022 = 0, /*so far corresponds to a valid iso 2022 escape sequence*/

230

VALID_TERMINAL_2022 = 1, /*corresponds to a valid iso 2022 escape sequence*/

231

VALID_MAYBE_TERMINAL_2022 = 2 /*so far matches one iso 2022 escape sequence, but by adding more characters might match another escape sequence*/

232

} UCNV_TableStates_2022;

233

234

235

* The way these state transition arrays work is:

236

* ex : ESC$B is the sequence for JISX208

237

* a) First Iteration: char is ESC

238

* i) Get the value of ESC from normalize_esq_chars_2022[] with int value of ESC as index

239

* int x = normalize_esq_chars_2022[27] which is equal to 1

240

* ii) Search for this value in escSeqStateTable_Key_2022[]

241

* value of x is stored at escSeqStateTable_Key_2022[0]

242

* iii) Save this index as offset

243

* iv) Get state of this sequence from escSeqStateTable_Value_2022[]

244

* escSeqStateTable_Value_2022[offset], which is VALID_NON_TERMINAL_2022

245

* b) Switch on this state and continue to next char

246

* i) Get the value of $ from normalize_esq_chars_2022[] with int value of $ as index

247

* which is normalize_esq_chars_2022[36] == 4

248

* ii) x is currently 1(from above)

249

* x<<=5 -- x is now 32

250

* x+=normalize_esq_chars_2022[36]

251

* now x is 36

252

* iii) Search for this value in escSeqStateTable_Key_2022[]

253

* value of x is stored at escSeqStateTable_Key_2022[2], so offset is 2

254

* iv) Get state of this sequence from escSeqStateTable_Value_2022[]

255

* escSeqStateTable_Value_2022[offset], which is VALID_NON_TERMINAL_2022

256

* c) Switch on this state and continue to next char

257

* i) Get the value of B from normalize_esq_chars_2022[] with int value of B as index

258

* ii) x is currently 36 (from above)

259

* x<<=5 -- x is now 1152

260

* x+=normalize_esq_chars_2022[66]

261

* now x is 1161

262

* iii) Search for this value in escSeqStateTable_Key_2022[]

263

* value of x is stored at escSeqStateTable_Key_2022[21], so offset is 21

264

* iv) Get state of this sequence from escSeqStateTable_Value_2022[21]

265

* escSeqStateTable_Value_2022[offset], which is VALID_TERMINAL_2022

266

* v) Get the converter name form escSeqStateTable_Result_2022[21] which is JISX208

267

268

269

270

/*Below are the 3 arrays depicting a state transition table*/

271

static const int8_t normalize_esq_chars_2022[256] = {

272

/* 0 1 2 3 4 5 6 7 8 9 */

273

274

0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0

275

,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0

276

,0 ,0 ,0 ,0 ,0 ,0 ,0 ,1 ,0 ,0

277

,0 ,0 ,0 ,0 ,0 ,0 ,4 ,7 ,29 ,0

278

,2 ,24 ,26 ,27 ,0 ,3 ,23 ,6 ,0 ,0

279

,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0

280

,0 ,0 ,0 ,0 ,5 ,8 ,9 ,10 ,11 ,12

281

,13 ,14 ,15 ,16 ,17 ,18 ,19 ,20 ,25 ,28

282

,0 ,0 ,21 ,0 ,0 ,0 ,0 ,0 ,0 ,0

283

,22 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0

284

,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0

285

,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0

286

,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0

287

,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0

288

,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0

289

,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0

290

,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0

291

,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0

292

,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0

293

,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0

294

,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0

295

,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0

296

,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0

297

,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0

298

,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0

299

,0 ,0 ,0 ,0 ,0 ,0

300

};

301

302

#ifdef U_ENABLE_GENERIC_ISO_2022

303

304

* When the generic ISO-2022 converter is completely removed, not just disabled

305

* per #ifdef, then the following state table and the associated tables that are

306

* dimensioned with MAX_STATES_2022 should be trimmed.

307

308

* Especially, VALID_MAYBE_TERMINAL_2022 will not be used any more, and all of

309

* the associated escape sequences starting with ESC ( B should be removed.

310

* This includes the ones with key values 1097 and all of the ones above 1000000.

311

312

* For the latter, the tables can simply be truncated.

313

* For the former, since the tables must be kept parallel, it is probably best

314

* to simply duplicate an adjacent table cell, parallel in all tables.

315

316

* It may make sense to restructure the tables, especially by using small search

317

* tables for the variants instead of indexing them parallel to the table here.

318

319

#endif

320

321

#define MAX_STATES_2022 74

322

static const int32_t escSeqStateTable_Key_2022[MAX_STATES_2022] = {

323

/* 0 1 2 3 4 5 6 7 8 9 */

324

325

1 ,34 ,36 ,39 ,55 ,57 ,60 ,61 ,1093 ,1096

326

,1097 ,1098 ,1099 ,1100 ,1101 ,1102 ,1103 ,1104 ,1105 ,1106

327

,1109 ,1154 ,1157 ,1160 ,1161 ,1176 ,1178 ,1179 ,1254 ,1257

328

,1768 ,1773 ,1957 ,35105 ,36933 ,36936 ,36937 ,36938 ,36939 ,36940

329

,36942 ,36943 ,36944 ,36945 ,36946 ,36947 ,36948 ,37640 ,37642 ,37644

330

,37646 ,37711 ,37744 ,37745 ,37746 ,37747 ,37748 ,40133 ,40136 ,40138

331

,40139 ,40140 ,40141 ,1123363 ,35947624 ,35947625 ,35947626 ,35947627 ,35947629 ,35947630

332

,35947631 ,35947635 ,35947636 ,35947638

333

};

334

335

#ifdef U_ENABLE_GENERIC_ISO_2022

336

337

static const char* const escSeqStateTable_Result_2022[MAX_STATES_2022] = {

338

/* 0 1 2 3 4 5 6 7 8 9 */

339

340

NULL ,NULL ,NULL ,NULL ,NULL ,NULL ,NULL ,NULL ,"latin1" ,"latin1"

341

,"latin1" ,"ibm-865" ,"ibm-865" ,"ibm-865" ,"ibm-865" ,"ibm-865" ,"ibm-865" ,"JISX0201" ,"JISX0201" ,"latin1"

342

,"latin1" ,NULL ,"JISX-208" ,"ibm-5478" ,"JISX-208" ,NULL ,NULL ,NULL ,NULL ,"UTF8"

343

,"ISO-8859-1" ,"ISO-8859-7" ,"JIS-X-208" ,NULL ,"ibm-955" ,"ibm-367" ,"ibm-952" ,"ibm-949" ,"JISX-212" ,"ibm-1383"

344

,"ibm-952" ,"ibm-964" ,"ibm-964" ,"ibm-964" ,"ibm-964" ,"ibm-964" ,"ibm-964" ,"ibm-5478" ,"ibm-949" ,"ISO-IR-165"

345

,"CNS-11643-1992,1" ,"CNS-11643-1992,2" ,"CNS-11643-1992,3" ,"CNS-11643-1992,4" ,"CNS-11643-1992,5" ,"CNS-11643-1992,6" ,"CNS-11643-1992,7" ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian"

346

,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian" ,NULL ,"latin1" ,"ibm-912" ,"ibm-913" ,"ibm-914" ,"ibm-813" ,"ibm-1089"

347

,"ibm-920" ,"ibm-915" ,"ibm-915" ,"latin1"

348

};

349

350

#endif

351

352

static const UCNV_TableStates_2022 escSeqStateTable_Value_2022[MAX_STATES_2022] = {

353

/* 0 1 2 3 4 5 6 7 8 9 */

354

VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022

355

,VALID_MAYBE_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022

356

,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022

357

,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022

358

,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022

359

360

361

,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022

362

};

363

364

365

/* Type def for refactoring changeState_2022 code*/

366

typedef enum{

367

#ifdef U_ENABLE_GENERIC_ISO_2022

368

ISO_2022=0,

369

#endif

370

ISO_2022_JP=1,

371

ISO_2022_KR=2,

372

ISO_2022_CN=3

373

} Variant2022;

374

375

/*********** ISO 2022 Converter Protos ***********/

376

static void

377

_ISO2022Open(UConverter *cnv, const char *name, const char *locale,uint32_t options, UErrorCode *errorCode);

378

379

static void

380

_ISO2022Close(UConverter *converter);

381

382

static void

383

_ISO2022Reset(UConverter *converter, UConverterResetChoice choice);

384

385

static const char*

386

_ISO2022getName(const UConverter* cnv);

387

388

static void

389

_ISO_2022_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UErrorCode *err);

390

391

static UConverter *

392

_ISO_2022_SafeClone(const UConverter *cnv, void *stackBuffer, int32_t *pBufferSize, UErrorCode *status);

393

394

#ifdef U_ENABLE_GENERIC_ISO_2022

395

static void

396

T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC(UConverterToUnicodeArgs* args, UErrorCode* err);

397

#endif

398

399

/*const UConverterSharedData _ISO2022Data;*/

400

static const UConverterSharedData _ISO2022JPData;

401

static const UConverterSharedData _ISO2022KRData;

402

static const UConverterSharedData _ISO2022CNData;

403

404

/*************** Converter implementations ******************/

405

406

/* The purpose of this function is to get around gcc compiler warnings. */

407

static U_INLINE void

408

fromUWriteUInt8(UConverter *cnv,

409

const char *bytes, int32_t length,

410

uint8_t **target, const char *targetLimit,

411

int32_t **offsets,

412

int32_t sourceIndex,

413

UErrorCode *pErrorCode)

414

{

415

char *targetChars = (char *)*target;

416

ucnv_fromUWriteBytes(cnv, bytes, length, &targetChars, targetLimit,

417

offsets, sourceIndex, pErrorCode);

418

*target = (uint8_t*)targetChars;

419

420

}

421

422

static U_INLINE void

423

setInitialStateToUnicodeKR(UConverter* converter, UConverterDataISO2022 *myConverterData){

424

if(myConverterData->version == 1) {

425

UConverter *cnv = myConverterData->currentConverter;

426

427

cnv->toUnicodeStatus=0; /* offset */

428

cnv->mode=0; /* state */

429

cnv->toULength=0; /* byteIndex */

430

}

431

}

432

433

static U_INLINE void

434

setInitialStateFromUnicodeKR(UConverter* converter,UConverterDataISO2022 *myConverterData){

435

/* in ISO-2022-KR the designator sequence appears only once

436

* in a file so we append it only once

437

438

if( converter->charErrorBufferLength==0){

439

440

converter->charErrorBufferLength = 4;

441

converter->charErrorBuffer[0] = 0x1b;

442

converter->charErrorBuffer[1] = 0x24;

443

converter->charErrorBuffer[2] = 0x29;

444

converter->charErrorBuffer[3] = 0x43;

445

}

446

if(myConverterData->version == 1) {

447

UConverter *cnv = myConverterData->currentConverter;

448

449

cnv->fromUChar32=0;

450

cnv->fromUnicodeStatus=1; /* prevLength */

451

}

452

}

453

454

static void

455

_ISO2022Open(UConverter *cnv, const char *name, const char *locale,uint32_t options, UErrorCode *errorCode){

456

457

char myLocale[6]={' ',' ',' ',' ',' ',' '};

458

459

cnv->extraInfo = uprv_malloc (sizeof (UConverterDataISO2022));

460

if(cnv->extraInfo != NULL) {

461

UConverterDataISO2022 *myConverterData=(UConverterDataISO2022 *) cnv->extraInfo;

462

uint32_t version;

463

464

uprv_memset(myConverterData, 0, sizeof(UConverterDataISO2022));

465

myConverterData->currentType = ASCII1;

466

cnv->fromUnicodeStatus =FALSE;

467

if(locale){

468

uprv_strncpy(myLocale, locale, sizeof(myLocale));

469

}

470

version = options & UCNV_OPTIONS_VERSION_MASK;

471

myConverterData->version = version;

472

if(myLocale[0]=='j' && (myLocale[1]=='a'|| myLocale[1]=='p') &&

473

(myLocale[2]=='_' || myLocale[2]=='\0'))

474

{

475

size_t len=0;

476

/* open the required converters and cache them */

477

if(jpCharsetMasks[version]&CSM(ISO8859_7)) {

478

myConverterData->myConverterArray[ISO8859_7]= ucnv_loadSharedData("ISO8859_7", NULL, errorCode);

479

}

480

myConverterData->myConverterArray[JISX201] = ucnv_loadSharedData("JISX0201", NULL, errorCode);

481

myConverterData->myConverterArray[JISX208] = ucnv_loadSharedData("jisx-208", NULL, errorCode);

482

if(jpCharsetMasks[version]&CSM(JISX212)) {

483

myConverterData->myConverterArray[JISX212] = ucnv_loadSharedData("jisx-212", NULL, errorCode);

484

}

485

if(jpCharsetMasks[version]&CSM(GB2312)) {

486

myConverterData->myConverterArray[GB2312] = ucnv_loadSharedData("ibm-5478", NULL, errorCode); /* gb_2312_80-1 */

487

}

488

if(jpCharsetMasks[version]&CSM(KSC5601)) {

489

myConverterData->myConverterArray[KSC5601] = ucnv_loadSharedData("ksc_5601", NULL, errorCode);

490

}

491

492

/* set the function pointers to appropriate funtions */

493

cnv->sharedData=(UConverterSharedData*)(&_ISO2022JPData);

494

uprv_strcpy(myConverterData->locale,"ja");

495

496

(void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=ja,version=");

497

len = uprv_strlen(myConverterData->name);

498

myConverterData->name[len]=(char)(myConverterData->version+(int)'0');

499

myConverterData->name[len+1]='\0';

500

}

501

else if(myLocale[0]=='k' && (myLocale[1]=='o'|| myLocale[1]=='r') &&

502

(myLocale[2]=='_' || myLocale[2]=='\0'))

503

{

504

if (version==1){

505

myConverterData->currentConverter=

506

ucnv_open("icu-internal-25546",errorCode);

507

508

if (U_FAILURE(*errorCode)) {

509

_ISO2022Close(cnv);

510

return;

511

}

512

513

(void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=ko,version=1");

514

uprv_memcpy(cnv->subChars, myConverterData->currentConverter->subChars, 4);

515

cnv->subCharLen = myConverterData->currentConverter->subCharLen;

516

}else{

517

myConverterData->currentConverter=ucnv_open("ibm-949",errorCode);

518

519

if (U_FAILURE(*errorCode)) {

520

_ISO2022Close(cnv);

521

return;

522

}

523

524

myConverterData->version = 0;

525

(void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=ko,version=0");

526

}

527

528

/* initialize the state variables */

529

setInitialStateToUnicodeKR(cnv, myConverterData);

530

setInitialStateFromUnicodeKR(cnv, myConverterData);

531

532

/* set the function pointers to appropriate funtions */

533

cnv->sharedData=(UConverterSharedData*)&_ISO2022KRData;

534

uprv_strcpy(myConverterData->locale,"ko");

535

}

536

else if(((myLocale[0]=='z' && myLocale[1]=='h') || (myLocale[0]=='c'&& myLocale[1]=='n'))&&

537

(myLocale[2]=='_' || myLocale[2]=='\0'))

538

{

539

540

/* open the required converters and cache them */

541

myConverterData->myConverterArray[GB2312_1] = ucnv_loadSharedData("ibm-5478", NULL, errorCode);

542

if(version==1) {

543

myConverterData->myConverterArray[ISO_IR_165] = ucnv_loadSharedData("iso-ir-165", NULL, errorCode);

544

}

545

myConverterData->myConverterArray[CNS_11643] = ucnv_loadSharedData("cns-11643-1992", NULL, errorCode);

546

547

548

/* set the function pointers to appropriate funtions */

549

cnv->sharedData=(UConverterSharedData*)&_ISO2022CNData;

550

uprv_strcpy(myConverterData->locale,"cn");

551

552

if (version==1){

553

(void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,version=1");

554

}else{

555

myConverterData->version = 0;

556

(void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,version=0");

557

}

558

}

559

else{

560

#ifdef U_ENABLE_GENERIC_ISO_2022

561

myConverterData->isFirstBuffer = TRUE;

562

563

/* append the UTF-8 escape sequence */

564

cnv->charErrorBufferLength = 3;

565

cnv->charErrorBuffer[0] = 0x1b;

566

cnv->charErrorBuffer[1] = 0x25;

567

cnv->charErrorBuffer[2] = 0x42;

568

569

cnv->sharedData=(UConverterSharedData*)&_ISO2022Data;

570

/* initialize the state variables */

571

uprv_strcpy(myConverterData->name,"ISO_2022");

572

#else

573

*errorCode = U_UNSUPPORTED_ERROR;

574

return;

575

#endif

576

}

577

578

cnv->maxBytesPerUChar=cnv->sharedData->staticData->maxBytesPerChar;

579

580

if(U_FAILURE(*errorCode)) {

581

_ISO2022Close(cnv);

582

}

583

} else {

584

*errorCode = U_MEMORY_ALLOCATION_ERROR;

585

}

586

}

587

588

589

static void

590

_ISO2022Close(UConverter *converter) {

591

UConverterDataISO2022* myData =(UConverterDataISO2022 *) (converter->extraInfo);

592

UConverterSharedData **array = myData->myConverterArray;

593

int32_t i;

594

595

if (converter->extraInfo != NULL) {

596

/*close the array of converter pointers and free the memory*/

597

for (i=0; i<UCNV_2022_MAX_CONVERTERS; i++) {

598

if(array[i]!=NULL) {

599

ucnv_unloadSharedDataIfReady(array[i]);

600

}

601

}

602

603

ucnv_close(myData->currentConverter);

604

605

if(!converter->isExtraLocal){

606

uprv_free (converter->extraInfo);

607

converter->extraInfo = NULL;

608

}

609

}

610

}

611

612

static void

613

_ISO2022Reset(UConverter *converter, UConverterResetChoice choice) {

614

UConverterDataISO2022 *myConverterData=(UConverterDataISO2022 *) (converter->extraInfo);

615

if(choice<=UCNV_RESET_TO_UNICODE) {

616

uprv_memset(&myConverterData->toU2022State, 0, sizeof(ISO2022State));

617

myConverterData->key = 0;

618

}

619

if(choice!=UCNV_RESET_TO_UNICODE) {

620

uprv_memset(&myConverterData->fromU2022State, 0, sizeof(ISO2022State));

621

}

622

#ifdef U_ENABLE_GENERIC_ISO_2022

623

if(myConverterData->locale[0] == 0){

624

if(choice<=UCNV_RESET_TO_UNICODE) {

625

myConverterData->isFirstBuffer = TRUE;

626

myConverterData->key = 0;

627

if (converter->mode == UCNV_SO){

628

ucnv_close (myConverterData->currentConverter);

629

myConverterData->currentConverter=NULL;

630

}

631

converter->mode = UCNV_SI;

632

}

633

if(choice!=UCNV_RESET_TO_UNICODE) {

634

/* re-append UTF-8 escape sequence */

635

converter->charErrorBufferLength = 3;

636

converter->charErrorBuffer[0] = 0x1b;

637

converter->charErrorBuffer[1] = 0x28;

638

converter->charErrorBuffer[2] = 0x42;

639

}

640

}

641

else

642

#endif

643

{

644

/* reset the state variables */

645

if(myConverterData->locale[0] == 'k'){

646

if(choice<=UCNV_RESET_TO_UNICODE) {

647

setInitialStateToUnicodeKR(converter, myConverterData);

648

}

649

if(choice!=UCNV_RESET_TO_UNICODE) {

650

setInitialStateFromUnicodeKR(converter, myConverterData);

651

}

652

}

653

}

654

}

655

656

static const char*

657

_ISO2022getName(const UConverter* cnv){

658

if(cnv->extraInfo){

659

UConverterDataISO2022* myData= (UConverterDataISO2022*)cnv->extraInfo;

660

return myData->name;

661

}

662

return NULL;

663

}

664

665

666

/*************** to unicode *******************/

667

/****************************************************************************

668

* Recognized escape sequences are

669

* <ESC>(B ASCII

670

* <ESC>.A ISO-8859-1

671

* <ESC>.F ISO-8859-7

672

* <ESC>(J JISX-201

673

* <ESC>(I JISX-201

674

* <ESC>$B JISX-208

675

* <ESC>$@ JISX-208

676

* <ESC>$(D JISX-212

677

* <ESC>$A GB2312

678

* <ESC>$(C KSC5601

679

680

static const StateEnum nextStateToUnicodeJP[MAX_STATES_2022]= {

681

/* 0 1 2 3 4 5 6 7 8 9 */

682

INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,SS2_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE

683

,ASCII ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,JISX201 ,HWKANA_7BIT ,JISX201 ,INVALID_STATE

684

,INVALID_STATE ,INVALID_STATE ,JISX208 ,GB2312 ,JISX208 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE

685

,ISO8859_1 ,ISO8859_7 ,JISX208 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,KSC5601 ,JISX212 ,INVALID_STATE

686

,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE

687

,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE

688

,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE

689

,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE

690

};

691

692

/*************** to unicode *******************/

693

static const StateEnum nextStateToUnicodeCN[MAX_STATES_2022]= {

694

/* 0 1 2 3 4 5 6 7 8 9 */

695

INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,SS2_STATE ,SS3_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE

696

,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE

697

,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE

698

,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE

699

,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,GB2312_1 ,INVALID_STATE ,ISO_IR_165

700

,CNS_11643_1 ,CNS_11643_2 ,CNS_11643_3 ,CNS_11643_4 ,CNS_11643_5 ,CNS_11643_6 ,CNS_11643_7 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE

701

,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE

702

,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE

703

};

704

705

706

static UCNV_TableStates_2022

707

getKey_2022(char c,int32_t* key,int32_t* offset){

708

int32_t togo;

709

int32_t low = 0;

710

int32_t hi = MAX_STATES_2022;

711

int32_t oldmid=0;

712

713

togo = normalize_esq_chars_2022[(uint8_t)c];

714

if(togo == 0) {

715

/* not a valid character anywhere in an escape sequence */

716

*key = 0;

717

*offset = 0;

718

return INVALID_2022;

719

}

720

togo = (*key << 5) + togo;

721

722

while (hi != low) /*binary search*/{

723

724

725

726

if (mid == oldmid)

727

break;

728

729

if (escSeqStateTable_Key_2022[mid] > togo){

730

hi = mid;

731

}

732

else if (escSeqStateTable_Key_2022[mid] < togo){

733

low = mid;

734

}

735

else /*we found it*/{

736

*key = togo;

737

*offset = mid;

738

return escSeqStateTable_Value_2022[mid];

739

}

740

oldmid = mid;

741

742

}

743

744

*key = 0;

745

*offset = 0;

746

return INVALID_2022;

747

}

748

749

/*runs through a state machine to determine the escape sequence - codepage correspondance

750

751

static void

752

changeState_2022(UConverter* _this,

753

const char** source,

754

const char* sourceLimit,

755

Variant2022 var,

756

UErrorCode* err){

757

UCNV_TableStates_2022 value;

758

UConverterDataISO2022* myData2022 = ((UConverterDataISO2022*)_this->extraInfo);

759

uint32_t key = myData2022->key;

760

int32_t offset = 0;

761

char c;

762

763

value = VALID_NON_TERMINAL_2022;

764

while (*source < sourceLimit) {

765

c = *(*source)++;

766

_this->toUBytes[_this->toULength++]=(uint8_t)c;

767

value = getKey_2022(c,(int32_t *) &key, &offset);

768

769

switch (value){

770

771

case VALID_NON_TERMINAL_2022 :

772

/* continue with the loop */

773

break;

774

775

case VALID_TERMINAL_2022:

776

key = 0;

777

goto DONE;

778

779

case INVALID_2022:

780

goto DONE;

781

782

case VALID_MAYBE_TERMINAL_2022:

783

#ifdef U_ENABLE_GENERIC_ISO_2022

784

/* ESC ( B is ambiguous only for ISO_2022 itself */

785

if(var == ISO_2022) {

786

/* discard toUBytes[] for ESC ( B because this sequence is correct and complete */

787

_this->toULength = 0;

788

789

/* TODO need to indicate that ESC ( B was seen; if failure, then need to replay from source or from MBCS-style replay */

790

791

/* continue with the loop */

792

value = VALID_NON_TERMINAL_2022;

793

break;

794

} else

795

#endif

796

{

797

/* not ISO_2022 itself, finish here */

798

value = VALID_TERMINAL_2022;

799

key = 0;

800

goto DONE;

801

}

802

}

803

}

804

805

DONE:

806

myData2022->key = key;

807

808

if (value == VALID_NON_TERMINAL_2022) {

809

/* indicate that the escape sequence is incomplete: key!=0 */

810

return;

811

} else if (value == INVALID_2022 ) {

812

*err = U_ILLEGAL_ESCAPE_SEQUENCE;

813

return;

814

} else /* value == VALID_TERMINAL_2022 */ {

815

switch(var){

816

#ifdef U_ENABLE_GENERIC_ISO_2022

817

case ISO_2022:

818

{

819

const char *chosenConverterName = escSeqStateTable_Result_2022[offset];

820

if(chosenConverterName == NULL) {

821

/* SS2 or SS3 */

822

*err = U_UNSUPPORTED_ESCAPE_SEQUENCE;

823

return;

824

}

825

826

_this->mode = UCNV_SI;

827

ucnv_close(myData2022->currentConverter);

828

myData2022->currentConverter = myUConverter = ucnv_open(chosenConverterName, err);

829

if(U_SUCCESS(*err)) {

830

myUConverter->fromCharErrorBehaviour = UCNV_TO_U_CALLBACK_STOP;

831

_this->mode = UCNV_SO;

832

}

833

break;

834

}

835

#endif

836

case ISO_2022_JP:

837

{

838

StateEnum tempState=nextStateToUnicodeJP[offset];

839

switch(tempState) {

840

case INVALID_STATE:

841

*err = U_UNSUPPORTED_ESCAPE_SEQUENCE;

842

break;

843

case SS2_STATE:

844

if(myData2022->toU2022State.cs[2]!=0) {

845

if(myData2022->toU2022State.g<2) {

846

myData2022->toU2022State.prevG=myData2022->toU2022State.g;

847

}

848

myData2022->toU2022State.g=2;

849

} else {

850

/* illegal to have SS2 before a matching designator */

851

*err = U_ILLEGAL_ESCAPE_SEQUENCE;

852

}

853

break;

854

/* case SS3_STATE: not used in ISO-2022-JP-x */

855

case ISO8859_1:

856

case ISO8859_7:

857

if((jpCharsetMasks[myData2022->version] & CSM(tempState)) == 0) {

858

*err = U_UNSUPPORTED_ESCAPE_SEQUENCE;

859

} else {

860

/* G2 charset for SS2 */

861

myData2022->toU2022State.cs[2]=(int8_t)tempState;

862

}

863

break;

864

default:

865

if((jpCharsetMasks[myData2022->version] & CSM(tempState)) == 0) {

866

*err = U_UNSUPPORTED_ESCAPE_SEQUENCE;

867

} else {

868

/* G0 charset */

869

myData2022->toU2022State.cs[0]=(int8_t)tempState;

870

}

871

break;

872

}

873

}

874

break;

875

case ISO_2022_CN:

876

{

877

StateEnum tempState=nextStateToUnicodeCN[offset];

878

switch(tempState) {

879

case INVALID_STATE:

880

*err = U_UNSUPPORTED_ESCAPE_SEQUENCE;

881

break;

882

case SS2_STATE:

883

if(myData2022->toU2022State.cs[2]!=0) {

884

if(myData2022->toU2022State.g<2) {

885

myData2022->toU2022State.prevG=myData2022->toU2022State.g;

886

}

887

myData2022->toU2022State.g=2;

888

} else {

889

/* illegal to have SS2 before a matching designator */

890

*err = U_ILLEGAL_ESCAPE_SEQUENCE;

891

}

892

break;

893

case SS3_STATE:

894

if(myData2022->toU2022State.cs[3]!=0) {

895

if(myData2022->toU2022State.g<2) {

896

myData2022->toU2022State.prevG=myData2022->toU2022State.g;

897

}

898

myData2022->toU2022State.g=3;

899

} else {

900

/* illegal to have SS3 before a matching designator */

901

*err = U_ILLEGAL_ESCAPE_SEQUENCE;

902

}

903

break;

904

case ISO_IR_165:

905

if(myData2022->version==0) {

906

*err = U_UNSUPPORTED_ESCAPE_SEQUENCE;

907

break;

908

}

909

/*fall through*/

910

case GB2312_1:

911

/*fall through*/

912

case CNS_11643_1:

913

myData2022->toU2022State.cs[1]=(int8_t)tempState;

914

break;

915

case CNS_11643_2:

916

myData2022->toU2022State.cs[2]=(int8_t)tempState;

917

break;

918

default:

919

/* other CNS 11643 planes */

920

if(myData2022->version==0) {

921

*err = U_UNSUPPORTED_ESCAPE_SEQUENCE;

922

} else {

923

myData2022->toU2022State.cs[3]=(int8_t)tempState;

924

}

925

break;

926

}

927

}

928

break;

929

case ISO_2022_KR:

930

if(offset==0x30){

931

/* nothing to be done, just accept this one escape sequence */

932

} else {

933

*err = U_UNSUPPORTED_ESCAPE_SEQUENCE;

934

}

935

break;

936

937

default:

938

*err = U_ILLEGAL_ESCAPE_SEQUENCE;

939

break;

940

}

941

}

942

if(U_SUCCESS(*err)) {

943

_this->toULength = 0;

944

}

945

}

946

947

/*Checks the characters of the buffer against valid 2022 escape sequences

948

*if the match we return a pointer to the initial start of the sequence otherwise

949

*we return sourceLimit

950

951

/*for 2022 looks ahead in the stream

952

*to determine the longest possible convertible

953

*data stream

954

955

static U_INLINE const char*

956

getEndOfBuffer_2022(const char** source,

957

const char* sourceLimit,

958

UBool flush){

959

960

const char* mySource = *source;

961

962

#ifdef U_ENABLE_GENERIC_ISO_2022

963

if (*source >= sourceLimit)

964

return sourceLimit;

965

966

do{

967

968

if (*mySource == ESC_2022){

969

int8_t i;

970

int32_t key = 0;

971

int32_t offset;

972

UCNV_TableStates_2022 value = VALID_NON_TERMINAL_2022;

973

974

/* Kludge: I could not

975

* figure out the reason for validating an escape sequence

976

* twice - once here and once in changeState_2022().

977

* is it possible to have an ESC character in a ISO2022

978

* byte stream which is valid in a code page? Is it legal?

979

980

for (i=0;

981

(mySource+i < sourceLimit)&&(value == VALID_NON_TERMINAL_2022);

982

i++) {

983

value = getKey_2022(*(mySource+i), &key, &offset);

984

}

985

if (value > 0 || *mySource==ESC_2022)

986

return mySource;

987

988

if ((value == VALID_NON_TERMINAL_2022)&&(!flush) )

989

return sourceLimit;

990

}

991

}while (++mySource < sourceLimit);

992

993

return sourceLimit;

994

#else

995

while(mySource < sourceLimit && *mySource != ESC_2022) {

996

++mySource;

997

}

998

return mySource;

999

#endif

1000

}

1001

1002

1003

/* This inline function replicates code in _MBCSFromUChar32() function in ucnvmbcs.c

1004

* any future change in _MBCSFromUChar32() function should be reflected here.

1005

* @return number of bytes in *value; negative number if fallback; 0 if no mapping

1006

1007

static U_INLINE int32_t

1008

MBCS_FROM_UCHAR32_ISO2022(UConverterSharedData* sharedData,

1009

UChar32 c,

1010

uint32_t* value,

1011

UBool useFallback,

1012

int outputType)

1013

{

1014

const int32_t *cx;

1015

const uint16_t *table;

1016

uint32_t stage2Entry;

1017

uint32_t myValue;

1018

int32_t length;

1019

const uint8_t *p;

1020

1021

* TODO(markus): Use and require new, faster MBCS conversion table structures.

1022

* Use internal version of ucnv_open() that verifies that the new structures are available,

1023

* else U_INTERNAL_PROGRAM_ERROR.

1024

1025

/* BMP-only codepages are stored without stage 1 entries for supplementary code points */

1026

if(c<0x10000 || (sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {

1027

table=sharedData->mbcs.fromUnicodeTable;

1028

stage2Entry=MBCS_STAGE_2_FROM_U(table, c);

1029

/* get the bytes and the length for the output */

1030

if(outputType==MBCS_OUTPUT_2){

1031

myValue=MBCS_VALUE_2_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);

1032

if(myValue<=0xff) {

1033

length=1;

1034

} else {

1035

length=2;

1036

}

1037

} else /* outputType==MBCS_OUTPUT_3 */ {

1038

p=MBCS_POINTER_3_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c);

1039

myValue=((uint32_t)*p<<16)|((uint32_t)p[1]<<8)|p[2];

1040

if(myValue<=0xff) {

1041

length=1;

1042

} else if(myValue<=0xffff) {

1043

length=2;

1044

} else {

1045

length=3;

1046

}

1047

}

1048

1049

* TODO(markus): Use Shift-JIS table for JIS X 0208, to save mapping table space.

1050

* Pass in parameter for type of output bytes, for validation and shifting:

1051

* - Direct: Pass bytes through, but forbid control codes 00-1F (except SI/SO/ESC) and space 20?

1052

* (Need to allow some (TAB/LF/CR) or most of them for ASCII and maybe JIS X 0201.)

1053

* - A1-FE: Subtract 80 after range check.

1054

* - SJIS: Shift DBCS result to 21-7E x 21-7E.

1055

1056

/* is this code point assigned, or do we use fallbacks? */

1057

if((stage2Entry&(1<<(16+(c&0xf))))!=0) {

1058

/* assigned */

1059

*value=myValue;

1060

return length;

1061

} else if(FROM_U_USE_FALLBACK(useFallback, c) && myValue!=0) {

1062

1063

* We allow a 0 byte output if the "assigned" bit is set for this entry.

1064

* There is no way with this data structure for fallback output

1065

* to be a zero byte.

1066

1067

*value=myValue;

1068

return -length;

1069

}

1070

}

1071

1072

cx=sharedData->mbcs.extIndexes;

1073

if(cx!=NULL) {

1074

return ucnv_extSimpleMatchFromU(cx, c, value, useFallback);

1075

}

1076

1077

/* unassigned */

1078

return 0;

1079

}

1080

1081

/* This inline function replicates code in _MBCSSingleFromUChar32() function in ucnvmbcs.c

1082

* any future change in _MBCSSingleFromUChar32() function should be reflected here.

1083

* @param retval pointer to output byte

1084

* @return 1 roundtrip byte 0 no mapping -1 fallback byte

1085

1086

static U_INLINE int32_t

1087

MBCS_SINGLE_FROM_UCHAR32(UConverterSharedData* sharedData,

1088

UChar32 c,

1089

uint32_t* retval,

1090

UBool useFallback)

1091

{

1092

const uint16_t *table;

1093

int32_t value;

1094

/* BMP-only codepages are stored without stage 1 entries for supplementary code points */

1095

if(c>=0x10000 && !(sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY)) {

1096

return 0;

1097

}

1098

/* convert the Unicode code point in c into codepage bytes (same as in _MBCSFromUnicodeWithOffsets) */

1099

table=sharedData->mbcs.fromUnicodeTable;

1100

/* get the byte for the output */

1101

value=MBCS_SINGLE_RESULT_FROM_U(table, (uint16_t *)sharedData->mbcs.fromUnicodeBytes, c);

1102

/* is this code point assigned, or do we use fallbacks? */

1103

*retval=(uint32_t)(value&0xff);

1104

if(value>=0xf00) {

1105

return 1; /* roundtrip */

1106

} else if(useFallback ? value>=0x800 : value>=0xc00) {

1107

return -1; /* fallback taken */

1108

} else {

1109

return 0; /* no mapping */

1110

}

1111

}

1112

1113

#ifdef U_ENABLE_GENERIC_ISO_2022

1114

1115

/**********************************************************************************

1116

* ISO-2022 Converter

1117

1118

1119

1120

1121

static void

1122

T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC(UConverterToUnicodeArgs* args,

1123

UErrorCode* err){

1124

const char* mySourceLimit, *realSourceLimit;

1125

const char* sourceStart;

1126

const UChar* myTargetStart;

1127

UConverter* saveThis;

1128

UConverterDataISO2022* myData;

1129

int8_t length;

1130

1131

saveThis = args->converter;

1132

myData=((UConverterDataISO2022*)(saveThis->extraInfo));

1133

1134

realSourceLimit = args->sourceLimit;

1135

while (args->source < realSourceLimit) {

1136

if(myData->key == 0) { /* are we in the middle of an escape sequence? */

1137

/*Find the end of the buffer e.g : Next Escape Seq | end of Buffer*/

1138

mySourceLimit = getEndOfBuffer_2022(&(args->source), realSourceLimit, args->flush);

1139

1140

if(args->source < mySourceLimit) {

1141

if(myData->currentConverter==NULL) {

1142

myData->currentConverter = ucnv_open("ASCII",err);

1143

if(U_FAILURE(*err)){

1144

return;

1145

}

1146

1147

myData->currentConverter->fromCharErrorBehaviour = UCNV_TO_U_CALLBACK_STOP;

1148

saveThis->mode = UCNV_SO;

1149

}

1150

1151

/* convert to before the ESC or until the end of the buffer */

1152

myData->isFirstBuffer=FALSE;

1153

sourceStart = args->source;

1154

myTargetStart = args->target;

1155

args->converter = myData->currentConverter;

1156

ucnv_toUnicode(args->converter,

1157

&args->target,

1158

args->targetLimit,

1159

&args->source,

1160

mySourceLimit,

1161

args->offsets,

1162

(UBool)(args->flush && mySourceLimit == realSourceLimit),

1163

err);

1164

args->converter = saveThis;

1165

1166

if (*err == U_BUFFER_OVERFLOW_ERROR) {

1167

/* move the overflow buffer */

1168

length = saveThis->UCharErrorBufferLength = myData->currentConverter->UCharErrorBufferLength;

1169

myData->currentConverter->UCharErrorBufferLength = 0;

1170

if(length > 0) {

1171

uprv_memcpy(saveThis->UCharErrorBuffer,

1172

myData->currentConverter->UCharErrorBuffer,

1173

length*U_SIZEOF_UCHAR);

1174

}

1175

return;

1176

}

1177

1178

1179

* At least one of:

1180

* -Error while converting

1181

* -Done with entire buffer

1182

* -Need to write offsets or update the current offset

1183

* (leave that up to the code in ucnv.c)

1184

1185

* or else we just stopped at an ESC byte and continue with changeState_2022()

1186

1187

if (U_FAILURE(*err) ||

1188

(args->source == realSourceLimit) ||

1189

(args->offsets != NULL && (args->target != myTargetStart || args->source != sourceStart) ||

1190

(mySourceLimit < realSourceLimit && myData->currentConverter->toULength > 0))

1191

) {

1192

/* copy partial or error input for truncated detection and error handling */

1193

if(U_FAILURE(*err)) {

1194

length = saveThis->invalidCharLength = myData->currentConverter->invalidCharLength;

1195

if(length > 0) {

1196

uprv_memcpy(saveThis->invalidCharBuffer, myData->currentConverter->invalidCharBuffer, length);

1197

}

1198

} else {

1199

length = saveThis->toULength = myData->currentConverter->toULength;

1200

if(length > 0) {

1201

uprv_memcpy(saveThis->toUBytes, myData->currentConverter->toUBytes, length);

1202

if(args->source < mySourceLimit) {

1203

*err = U_TRUNCATED_CHAR_FOUND; /* truncated input before ESC */

1204

}

1205

}

1206

}

1207

return;

1208

}

1209

}

1210

}

1211

1212

sourceStart = args->source;

1213

changeState_2022(args->converter,

1214

&(args->source),

1215

realSourceLimit,

1216

ISO_2022,

1217

err);

1218

if (U_FAILURE(*err) || (args->source != sourceStart && args->offsets != NULL)) {

1219

/* let the ucnv.c code update its current offset */

1220

return;

1221

}

1222

}

1223

}

1224

1225

#endif

1226

1227

1228

* To Unicode Callback helper function

1229

1230

static void

1231

toUnicodeCallback(UConverter *cnv,

1232

const uint32_t sourceChar, const uint32_t targetUniChar,

1233

UErrorCode* err){

1234

if(sourceChar>0xff){

1235

cnv->toUBytes[0] = (uint8_t)(sourceChar>>8);

1236

cnv->toUBytes[1] = (uint8_t)sourceChar;

1237

cnv->toULength = 2;

1238

}

1239

else{

1240

cnv->toUBytes[0] =(char) sourceChar;

1241

cnv->toULength = 2;

1242

}

1243

1244

if(targetUniChar == (missingCharMarker-1/*0xfffe*/)){

1245

*err = U_INVALID_CHAR_FOUND;

1246

}

1247

else{

1248

*err = U_ILLEGAL_CHAR_FOUND;

1249

}

1250

}

1251

1252

/**************************************ISO-2022-JP*************************************************/

1253

1254

/************************************** IMPORTANT **************************************************

1255

* The UConverter_fromUnicode_ISO2022_JP converter does not use ucnv_fromUnicode() functions for SBCS,DBCS and

1256

* MBCS; instead, the values are obtained directly by calling _MBCSFromUChar32().

1257

* The converter iterates over each Unicode codepoint

1258

* to obtain the equivalent codepoints from the codepages supported. Since the source buffer is

1259

* processed one char at a time it would make sense to reduce the extra processing a canned converter

1260

* would do as far as possible.

1261

1262

* If the implementation of these macros or structure of sharedData struct change in the future, make

1263

* sure that ISO-2022 is also changed.

1264

***************************************************************************************************

1265

1266

1267

/***************************************************************************************************

1268

* Rules for ISO-2022-jp encoding

1269

* (i) Escape sequences must be fully contained within a line they should not

1270

* span new lines or CRs

1271

* (ii) If the last character on a line is represented by two bytes then an ASCII or

1272

* JIS-Roman character escape sequence should follow before the line terminates

1273

* (iii) If the first character on the line is represented by two bytes then a two

1274

* byte character escape sequence should precede it

1275

* (iv) If no escape sequence is encountered then the characters are ASCII

1276

* (v) Latin(ISO-8859-1) and Greek(ISO-8859-7) characters must be designated to G2,

1277

* and invoked with SS2 (ESC N).

1278

* (vi) If there is any G0 designation in text, there must be a switch to

1279

* ASCII or to JIS X 0201-Roman before a space character (but not

1280

* necessarily before "ESC 4/14 2/0" or "ESC N ' '") or control

1281

* characters such as tab or CRLF.

1282

* (vi) Supported encodings:

1283

* ASCII, JISX201, JISX208, JISX212, GB2312, KSC5601, ISO-8859-1,ISO-8859-7

1284

1285

* source : RFC-1554

1286

1287

* JISX201, JISX208,JISX212 : new .cnv data files created

1288

* KSC5601 : alias to ibm-949 mapping table

1289

* GB2312 : alias to ibm-1386 mapping table

1290

* ISO-8859-1 : Algorithmic implemented as LATIN1 case

1291

* ISO-8859-7 : alisas to ibm-9409 mapping table

1292

1293

1294

/* preference order of JP charsets */

1295

static const StateEnum jpCharsetPref[]={

1296

ASCII,

1297

JISX201,

1298

ISO8859_1,

1299

ISO8859_7,

1300

JISX208,

1301

JISX212,

1302

GB2312,

1303

KSC5601,

1304

HWKANA_7BIT

1305

};

1306

1307

1308

* The escape sequences must be in order of the enum constants like JISX201 = 3,

1309

* not in order of jpCharsetPref[]!

1310

1311

static const char escSeqChars[][6] ={

1312

"\x1B\x28\x42", /* <ESC>(B ASCII */

1313

"\x1B\x2E\x41", /* <ESC>.A ISO-8859-1 */

1314

"\x1B\x2E\x46", /* <ESC>.F ISO-8859-7 */

1315

"\x1B\x28\x4A", /* <ESC>(J JISX-201 */

1316

"\x1B\x24\x42", /* <ESC>$B JISX-208 */

1317

"\x1B\x24\x28\x44", /* <ESC>$(D JISX-212 */

1318

"\x1B\x24\x41", /* <ESC>$A GB2312 */

1319

"\x1B\x24\x28\x43", /* <ESC>$(C KSC5601 */

1320

"\x1B\x28\x49" /* <ESC>(I HWKANA_7BIT */

1321

1322

};

1323

static const int32_t escSeqCharsLen[] ={

1324

3, /* length of <ESC>(B ASCII */

1325

3, /* length of <ESC>.A ISO-8859-1 */

1326

3, /* length of <ESC>.F ISO-8859-7 */

1327

3, /* length of <ESC>(J JISX-201 */

1328

3, /* length of <ESC>$B JISX-208 */

1329

4, /* length of <ESC>$(D JISX-212 */

1330

3, /* length of <ESC>$A GB2312 */

1331

4, /* length of <ESC>$(C KSC5601 */

1332

3 /* length of <ESC>(I HWKANA_7BIT */

1333

};

1334

1335

1336

* The iteration over various code pages works this way:

1337

* i) Get the currentState from myConverterData->currentState

1338

* ii) Check if the character is mapped to a valid character in the currentState

1339

* Yes -> a) set the initIterState to currentState

1340

* b) remain in this state until an invalid character is found

1341

* No -> a) go to the next code page and find the character

1342

* iii) Before changing the state increment the current state check if the current state

1343

* is equal to the intitIteration state

1344

* Yes -> A character that cannot be represented in any of the supported encodings

1345

* break and return a U_INVALID_CHARACTER error

1346

* No -> Continue and find the character in next code page

1347

1348

1349

* TODO: Implement a priority technique where the users are allowed to set the priority of code pages

1350

1351

1352

static void

1353

UConverter_fromUnicode_ISO_2022_JP_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args, UErrorCode* err) {

1354

UConverter *cnv = args->converter;

1355

UConverterDataISO2022 *converterData;

1356

ISO2022State *pFromU2022State;

1357

uint8_t *target = (uint8_t *) args->target;

1358

const uint8_t *targetLimit = (const uint8_t *) args->targetLimit;

1359

const UChar* source = args->source;

1360

const UChar* sourceLimit = args->sourceLimit;

1361

int32_t* offsets = args->offsets;

1362

UChar32 sourceChar;

1363

char buffer[8];

1364

int32_t len, outLen;

1365

int8_t choices[10];

1366

int32_t choiceCount;

1367

uint32_t targetValue = 0;

1368

UBool useFallback;

1369

1370

int32_t i;

1371

int8_t cs, g;

1372

1373

/* set up the state */

1374

converterData = (UConverterDataISO2022*)cnv->extraInfo;

1375

pFromU2022State = &converterData->fromU2022State;

1376

1377

choiceCount = 0;

1378

1379

/* check if the last codepoint of previous buffer was a lead surrogate*/

1380

if((sourceChar = cnv->fromUChar32)!=0 && target< targetLimit) {

1381

goto getTrail;

1382

}

1383

1384

while(source < sourceLimit) {

1385

if(target < targetLimit) {

1386

1387

sourceChar = *(source++);

1388

/*check if the char is a First surrogate*/

1389

if(UTF_IS_SURROGATE(sourceChar)) {

1390

if(UTF_IS_SURROGATE_FIRST(sourceChar)) {

1391

getTrail:

1392

/*look ahead to find the trail surrogate*/

1393

if(source < sourceLimit) {

1394

/* test the following code unit */

1395

UChar trail=(UChar) *source;

1396

if(UTF_IS_SECOND_SURROGATE(trail)) {

1397

source++;

1398

sourceChar=UTF16_GET_PAIR_VALUE(sourceChar, trail);

1399

cnv->fromUChar32=0x00;

1400

/* convert this supplementary code point */

1401

/* exit this condition tree */

1402

} else {

1403

/* this is an unmatched lead code unit (1st surrogate) */

1404

/* callback(illegal) */

1405

*err=U_ILLEGAL_CHAR_FOUND;

1406

cnv->fromUChar32=sourceChar;

1407

break;

1408

}

1409

} else {

1410

/* no more input */

1411

cnv->fromUChar32=sourceChar;

1412

break;

1413

}

1414

} else {

1415

/* this is an unmatched trail code unit (2nd surrogate) */

1416

/* callback(illegal) */

1417

*err=U_ILLEGAL_CHAR_FOUND;

1418

cnv->fromUChar32=sourceChar;

1419

break;

1420

}

1421

}

1422

1423

/* do not convert SO/SI/ESC */

1424

if(IS_2022_CONTROL(sourceChar)) {

1425

/* callback(illegal) */

1426

*err=U_ILLEGAL_CHAR_FOUND;

1427

cnv->fromUChar32=sourceChar;

1428

break;

1429

}

1430

1431

/* do the conversion */

1432

1433

if(choiceCount == 0) {

1434

uint16_t csm;

1435

1436

1437

* The csm variable keeps track of which charsets are allowed

1438

* and not used yet while building the choices[].

1439

1440

csm = jpCharsetMasks[converterData->version];

1441

choiceCount = 0;

1442

1443

/* JIS7/8: try single-byte half-width Katakana before JISX208 */

1444

if(converterData->version == 3 || converterData->version == 4) {

1445

choices[choiceCount++] = (int8_t)HWKANA_7BIT;

1446

}

1447

/* Do not try single-byte half-width Katakana for other versions. */

1448

csm &= ~CSM(HWKANA_7BIT);

1449

1450

/* try the current G0 charset */

1451

choices[choiceCount++] = cs = pFromU2022State->cs[0];

1452

csm &= ~CSM(cs);

1453

1454

/* try the current G2 charset */

1455

if((cs = pFromU2022State->cs[2]) != 0) {

1456

choices[choiceCount++] = cs;

1457

csm &= ~CSM(cs);

1458

}

1459

1460

/* try all the other possible charsets */

1461

for(i = 0; i < LENGTHOF(jpCharsetPref); ++i) {

1462

cs = (int8_t)jpCharsetPref[i];

1463

if(CSM(cs) & csm) {

1464

choices[choiceCount++] = cs;

1465

csm &= ~CSM(cs);

1466

}

1467

}

1468

}

1469

1470

cs = g = 0;

1471

1472

* len==0: no mapping found yet

1473

* len<0: found a fallback result: continue looking for a roundtrip but no further fallbacks

1474

* len>0: found a roundtrip result, done

1475

1476

len = 0;

1477

1478

* We will turn off useFallback after finding a fallback,

1479

* but we still get fallbacks from PUA code points as usual.

1480

* Therefore, we will also need to check that we don't overwrite

1481

* an early fallback with a later one.

1482

1483

useFallback = cnv->useFallback;

1484

1485

for(i = 0; i < choiceCount && len <= 0; ++i) {

1486

uint32_t value;

1487

int32_t len2;

1488

int8_t cs0 = choices[i];

1489

switch(cs0) {

1490

case ASCII:

1491

if(sourceChar <= 0x7f) {

1492

targetValue = (uint32_t)sourceChar;

1493

len = 1;

1494

cs = cs0;

1495

g = 0;

1496

}

1497

break;

1498

case ISO8859_1:

1499

if(GR96_START <= sourceChar && sourceChar <= GR96_END) {

1500

targetValue = (uint32_t)sourceChar - 0x80;

1501

len = 1;

1502

cs = cs0;

1503

g = 2;

1504

}

1505

break;

1506

case HWKANA_7BIT:

1507

if((uint32_t)(HWKANA_END-sourceChar)<=(HWKANA_END-HWKANA_START)) {

1508

if(converterData->version==3) {

1509

/* JIS7: use G1 (SO) */

1510

/* Shift U+FF61..U+FF9F to bytes 21..5F. */

1511

targetValue = (uint32_t)(sourceChar - (HWKANA_START - 0x21));

1512

len = 1;

1513

pFromU2022State->cs[1] = cs = cs0; /* do not output an escape sequence */

1514

g = 1;

1515

} else if(converterData->version==4) {

1516

/* JIS8: use 8-bit bytes with any single-byte charset, see escape sequence output below */

1517

/* Shift U+FF61..U+FF9F to bytes A1..DF. */

1518

targetValue = (uint32_t)(sourceChar - (HWKANA_START - 0xa1));

1519

len = 1;

1520

1521

cs = pFromU2022State->cs[0];

1522

if(IS_JP_DBCS(cs)) {

1523

/* switch from a DBCS charset to JISX201 */

1524

cs = (int8_t)JISX201;

1525

}

1526

/* else stay in the current G0 charset */

1527

g = 0;

1528

}

1529

/* else do not use HWKANA_7BIT with other versions */

1530

}

1531

break;

1532

case JISX201:

1533

/* G0 SBCS */

1534

len2 = MBCS_SINGLE_FROM_UCHAR32(

1535

converterData->myConverterArray[cs0],

1536

sourceChar, &value,

1537

useFallback);

1538

if(len2 != 0 && !(len2 < 0 && len != 0) && value <= 0x7f) {

1539

targetValue = value;

1540

len = len2;

1541

cs = cs0;

1542

g = 0;

1543

useFallback = FALSE;

1544

}

1545

break;

1546

case ISO8859_7:

1547

/* G0 SBCS forced to 7-bit output */

1548

len2 = MBCS_SINGLE_FROM_UCHAR32(

1549

converterData->myConverterArray[cs0],

1550

sourceChar, &value,

1551

useFallback);

1552

if(len2 != 0 && !(len2 < 0 && len != 0) && GR96_START <= value && value <= GR96_END) {

1553

targetValue = value - 0x80;

1554

len = len2;

1555

cs = cs0;

1556

g = 2;

1557

useFallback = FALSE;

1558

}

1559

break;

1560

default:

1561

/* G0 DBCS */

1562

len2 = MBCS_FROM_UCHAR32_ISO2022(

1563

converterData->myConverterArray[cs0],

1564

sourceChar, &value,

1565

useFallback, MBCS_OUTPUT_2);

1566

if(len2 == 2 || (len2 == -2 && len == 0)) { /* only accept DBCS: abs(len)==2 */

1567

if(cs0 == KSC5601) {

1568

1569

* Check for valid bytes for the encoding scheme.

1570

* This is necessary because the sub-converter (windows-949)

1571

* has a broader encoding scheme than is valid for 2022.

1572

1573

* Check that the result is a 2-byte value with each byte in the range A1..FE

1574

* (strict EUC-KR DBCS) before accepting it and subtracting 0x80 from each byte

1575

* to move it to the ISO 2022 range 21..7E.

1576

1577

if( (uint16_t)(value - 0xa1a1) <= (0xfefe - 0xa1a1) &&

1578

(uint8_t)(value - 0xa1) <= (0xfe - 0xa1)

1579

) {

1580

value -= 0x8080; /* shift down to 21..7e byte range */

1581

} else {

1582

break; /* not valid for ISO 2022 */

1583

}

1584

}

1585

targetValue = value;

1586

len = len2;

1587

cs = cs0;

1588

g = 0;

1589

useFallback = FALSE;

1590

}

1591

break;

1592

}

1593

}

1594

1595

if(len != 0) {

1596

if(len < 0) {

1597

len = -len; /* fallback */

1598

}

1599

outLen = 0; /* count output bytes */

1600

1601

/* write SI if necessary (only for JIS7) */

1602

if(pFromU2022State->g == 1 && g == 0) {

1603

buffer[outLen++] = UCNV_SI;

1604

pFromU2022State->g = 0;

1605

}

1606

1607

/* write the designation sequence if necessary */

1608

if(cs != pFromU2022State->cs[g]) {

1609

int32_t escLen = escSeqCharsLen[cs];

1610

uprv_memcpy(buffer + outLen, escSeqChars[cs], escLen);

1611

outLen += escLen;

1612

pFromU2022State->cs[g] = cs;

1613

1614

/* invalidate the choices[] */

1615

choiceCount = 0;

1616

}

1617

1618

/* write the shift sequence if necessary */

1619

if(g != pFromU2022State->g) {

1620

switch(g) {

1621

/* case 0 handled before writing escapes */

1622

case 1:

1623

buffer[outLen++] = UCNV_SO;

1624

pFromU2022State->g = 1;

1625

break;

1626

default: /* case 2 */

1627

buffer[outLen++] = 0x1b;

1628

buffer[outLen++] = 0x4e;

1629

break;

1630

/* no case 3: no SS3 in ISO-2022-JP-x */

1631

}

1632

}

1633

1634

/* write the output bytes */

1635

if(len == 1) {

1636

buffer[outLen++] = (char)targetValue;

1637

} else /* len == 2 */ {

1638

buffer[outLen++] = (char)(targetValue >> 8);

1639

buffer[outLen++] = (char)targetValue;

1640

}

1641

} else {

1642

1643

* if we cannot find the character after checking all codepages

1644

* then this is an error

1645

1646

*err = U_INVALID_CHAR_FOUND;

1647

cnv->fromUChar32=sourceChar;

1648

break;

1649

}

1650

1651

if(sourceChar == CR || sourceChar == LF) {

1652

/* reset the G2 state at the end of a line (conversion got us into ASCII or JISX201 already) */

1653

pFromU2022State->cs[2] = 0;

1654

choiceCount = 0;

1655

}

1656

1657

/* output outLen>0 bytes in buffer[] */

1658

if(outLen == 1) {

1659

*target++ = buffer[0];

1660

if(offsets) {

1661

*offsets++ = (int32_t)(source - args->source - 1); /* -1: known to be ASCII */

1662

}

1663

} else if(outLen == 2 && (target + 2) <= targetLimit) {

1664

*target++ = buffer[0];

1665

*target++ = buffer[1];

1666

if(offsets) {

1667

int32_t sourceIndex = (int32_t)(source - args->source - U16_LENGTH(sourceChar));

1668

*offsets++ = sourceIndex;

1669

*offsets++ = sourceIndex;

1670

}

1671

} else {

1672

fromUWriteUInt8(

1673

cnv,

1674

buffer, outLen,

1675

&target, (const char *)targetLimit,

1676

&offsets, (int32_t)(source - args->source - U16_LENGTH(sourceChar)),

1677

err);

1678

if(U_FAILURE(*err)) {

1679

break;

1680

}

1681

}

1682

} /* end if(myTargetIndex<myTargetLength) */

1683

else{

1684

*err =U_BUFFER_OVERFLOW_ERROR;

1685

break;

1686

}

1687

1688

}/* end while(mySourceIndex<mySourceLength) */

1689

1690

1691

* the end of the input stream and detection of truncated input

1692

* are handled by the framework, but for ISO-2022-JP conversion

1693

* we need to be in ASCII mode at the very end

1694

1695

* conditions:

1696

* successful

1697

* in SO mode or not in ASCII mode

1698

* end of input and no truncated input

1699

1700

if( U_SUCCESS(*err) &&

1701

(pFromU2022State->g!=0 || pFromU2022State->cs[0]!=ASCII) &&

1702

args->flush && source>=sourceLimit && cnv->fromUChar32==0

1703

) {

1704

int32_t sourceIndex;

1705

1706

outLen = 0;

1707

1708

if(pFromU2022State->g != 0) {

1709

buffer[outLen++] = UCNV_SI;

1710

pFromU2022State->g = 0;

1711

}

1712

1713

if(pFromU2022State->cs[0] != ASCII) {

1714

int32_t escLen = escSeqCharsLen[ASCII];

1715

uprv_memcpy(buffer + outLen, escSeqChars[ASCII], escLen);

1716

outLen += escLen;

1717

pFromU2022State->cs[0] = (int8_t)ASCII;

1718

}

1719

1720

/* get the source index of the last input character */

1721

1722

* TODO this would be simpler and more reliable if we used a pair

1723

* of sourceIndex/prevSourceIndex like in ucnvmbcs.c

1724

* so that we could simply use the prevSourceIndex here;

1725

* this code gives an incorrect result for the rare case of an unmatched

1726

* trail surrogate that is alone in the last buffer of the text stream

1727

1728

sourceIndex=(int32_t)(source-args->source);

1729

if(sourceIndex>0) {

1730

--sourceIndex;

1731

if( U16_IS_TRAIL(args->source[sourceIndex]) &&

1732

(sourceIndex==0 || U16_IS_LEAD(args->source[sourceIndex-1]))

1733

) {

1734

--sourceIndex;

1735

}

1736

} else {

1737

sourceIndex=-1;

1738

}

1739

1740

fromUWriteUInt8(

1741

cnv,

1742

buffer, outLen,

1743

&target, (const char *)targetLimit,

1744

&offsets, sourceIndex,

1745

err);

1746

}

1747

1748

/*save the state and return */

1749

args->source = source;

1750

args->target = (char*)target;

1751

}

1752

1753

/*************** to unicode *******************/

1754

1755

static void

1756

UConverter_toUnicode_ISO_2022_JP_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,

1757

UErrorCode* err){

1758

char tempBuf[3];

1759

const char *mySource = (char *) args->source;

1760

UChar *myTarget = args->target;

1761

const char *mySourceLimit = args->sourceLimit;

1762

uint32_t targetUniChar = 0x0000;

1763

uint32_t mySourceChar = 0x0000;

1764

UConverterDataISO2022* myData;

1765

ISO2022State *pToU2022State;

1766

StateEnum cs;

1767

1768

myData=(UConverterDataISO2022*)(args->converter->extraInfo);

1769

pToU2022State = &myData->toU2022State;

1770

1771

if(myData->key != 0) {

1772

/* continue with a partial escape sequence */

1773

goto escape;

1774

} else if(args->converter->toULength == 1 && mySource < mySourceLimit && myTarget < args->targetLimit) {

1775

/* continue with a partial double-byte character */

1776

mySourceChar = args->converter->toUBytes[0];

1777

args->converter->toULength = 0;

1778

cs = (StateEnum)pToU2022State->cs[pToU2022State->g];

1779

goto getTrailByte;

1780

}

1781

1782

while(mySource < mySourceLimit){

1783

1784

targetUniChar =missingCharMarker;

1785

1786

if(myTarget < args->targetLimit){

1787

1788

mySourceChar= (unsigned char) *mySource++;

1789

1790

switch(mySourceChar) {

1791

case UCNV_SI:

1792

if(myData->version==3) {

1793

pToU2022State->g=0;

1794

continue;

1795

} else {

1796

/* only JIS7 uses SI/SO, not ISO-2022-JP-x */

1797

break;

1798

}

1799

1800

case UCNV_SO:

1801

if(myData->version==3) {

1802

/* JIS7: switch to G1 half-width Katakana */

1803

pToU2022State->cs[1] = (int8_t)HWKANA_7BIT;

1804

pToU2022State->g=1;

1805

continue;

1806

} else {

1807

/* only JIS7 uses SI/SO, not ISO-2022-JP-x */

1808

break;

1809

}

1810

1811

case ESC_2022:

1812

mySource--;

1813

escape:

1814

changeState_2022(args->converter,&(mySource),

1815

mySourceLimit, ISO_2022_JP,err);

1816

1817

/* invalid or illegal escape sequence */

1818

if(U_FAILURE(*err)){

1819

args->target = myTarget;

1820

args->source = mySource;

1821

return;

1822

}

1823

continue;

1824

1825

/* ISO-2022-JP does not use single-byte (C1) SS2 and SS3 */

1826

1827

case CR:

1828

/*falls through*/

1829

case LF:

1830

/* automatically reset to single-byte mode */

1831

if((StateEnum)pToU2022State->cs[0] != ASCII && (StateEnum)pToU2022State->cs[0] != JISX201) {

1832

pToU2022State->cs[0] = (int8_t)ASCII;

1833

}

1834

pToU2022State->cs[2] = 0;

1835

pToU2022State->g = 0;

1836

/* falls through */

1837

default:

1838

/* convert one or two bytes */

1839

cs = (StateEnum)pToU2022State->cs[pToU2022State->g];

1840

if( (uint8_t)(mySourceChar - 0xa1) <= (0xdf - 0xa1) && myData->version==4 &&

1841

!IS_JP_DBCS(cs)

1842

) {

1843

/* 8-bit halfwidth katakana in any single-byte mode for JIS8 */

1844

targetUniChar = mySourceChar + (HWKANA_START - 0xa1);

1845

1846

/* return from a single-shift state to the previous one */

1847

if(pToU2022State->g >= 2) {

1848

pToU2022State->g=pToU2022State->prevG;

1849

}

1850

} else switch(cs) {

1851

case ASCII:

1852

if(mySourceChar <= 0x7f) {

1853

targetUniChar = mySourceChar;

1854

}

1855

break;

1856

case ISO8859_1:

1857

if(mySourceChar <= 0x7f) {

1858

targetUniChar = mySourceChar + 0x80;

1859

}

1860

/* return from a single-shift state to the previous one */

1861

pToU2022State->g=pToU2022State->prevG;

1862

break;

1863

case ISO8859_7:

1864

if(mySourceChar <= 0x7f) {

1865

/* convert mySourceChar+0x80 to use a normal 8-bit table */

1866

targetUniChar =

1867

_MBCS_SINGLE_SIMPLE_GET_NEXT_BMP(

1868

myData->myConverterArray[cs],

1869

mySourceChar + 0x80);

1870

}

1871

/* return from a single-shift state to the previous one */

1872

pToU2022State->g=pToU2022State->prevG;

1873

break;

1874

case JISX201:

1875

if(mySourceChar <= 0x7f) {

1876

targetUniChar =

1877

_MBCS_SINGLE_SIMPLE_GET_NEXT_BMP(

1878

myData->myConverterArray[cs],

1879

mySourceChar);

1880

}

1881

break;

1882

case HWKANA_7BIT:

1883

if((uint8_t)(mySourceChar - 0x21) <= (0x5f - 0x21)) {

1884

/* 7-bit halfwidth Katakana */

1885

targetUniChar = mySourceChar + (HWKANA_START - 0x21);

1886

}

1887

break;

1888

default:

1889

/* G0 DBCS */

1890

if(mySource < mySourceLimit) {

1891

char trailByte;

1892

getTrailByte:

1893

tempBuf[0] = (char) (mySourceChar);

1894

tempBuf[1] = trailByte = *mySource++;

1895

mySourceChar = (mySourceChar << 8) | (uint8_t)(trailByte);

1896

targetUniChar = ucnv_MBCSSimpleGetNextUChar(myData->myConverterArray[cs], tempBuf, 2, FALSE);

1897

} else {

1898

args->converter->toUBytes[0] = (uint8_t)mySourceChar;

1899

args->converter->toULength = 1;

1900

goto endloop;

1901

}

1902

} /* End of inner switch */

1903

break;

1904

} /* End of outer switch */

1905

if(targetUniChar < (missingCharMarker-1/*0xfffe*/)){

1906

if(args->offsets){

1907

args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));

1908

}

1909

*(myTarget++)=(UChar)targetUniChar;

1910

}

1911

else if(targetUniChar > missingCharMarker){

1912

/* disassemble the surrogate pair and write to output*/

1913

targetUniChar-=0x0010000;

1914

*myTarget = (UChar)(0xd800+(UChar)(targetUniChar>>10));

1915

if(args->offsets){

1916

args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));

1917

}

1918

++myTarget;

1919

if(myTarget< args->targetLimit){

1920

*myTarget = (UChar)(0xdc00+(UChar)(targetUniChar&0x3ff));

1921

if(args->offsets){

1922

args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));

1923

}

1924

++myTarget;

1925

}else{

1926

args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]=

1927

(UChar)(0xdc00+(UChar)(targetUniChar&0x3ff));

1928

}

1929

1930

}

1931

else{

1932

/* Call the callback function*/

1933

toUnicodeCallback(args->converter,mySourceChar,targetUniChar,err);

1934

break;

1935

}

1936

}

1937

else{ /* goes with "if(myTarget < args->targetLimit)" way up near top of function */

1938

*err =U_BUFFER_OVERFLOW_ERROR;

1939

break;

1940

}

1941

}

1942

endloop:

1943

args->target = myTarget;

1944

args->source = mySource;

1945

}

1946

1947

1948

/***************************************************************

1949

* Rules for ISO-2022-KR encoding

1950

* i) The KSC5601 designator sequence should appear only once in a file,

1951

* at the begining of a line before any KSC5601 characters. This usually

1952

* means that it appears by itself on the first line of the file

1953

* ii) There are only 2 shifting sequences SO to shift into double byte mode

1954

* and SI to shift into single byte mode

1955

1956

static void

1957

UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(UConverterFromUnicodeArgs* args, UErrorCode* err){

1958

1959

UConverter* saveConv = args->converter;

1960

UConverterDataISO2022 *myConverterData=(UConverterDataISO2022*)saveConv->extraInfo;

1961

args->converter=myConverterData->currentConverter;

1962

1963

myConverterData->currentConverter->fromUChar32 = saveConv->fromUChar32;

1964

ucnv_MBCSFromUnicodeWithOffsets(args,err);

1965

saveConv->fromUChar32 = myConverterData->currentConverter->fromUChar32;

1966

1967

if(*err == U_BUFFER_OVERFLOW_ERROR) {

1968

if(myConverterData->currentConverter->charErrorBufferLength > 0) {

1969

uprv_memcpy(

1970

saveConv->charErrorBuffer,

1971

myConverterData->currentConverter->charErrorBuffer,

1972

myConverterData->currentConverter->charErrorBufferLength);

1973

}

1974

saveConv->charErrorBufferLength = myConverterData->currentConverter->charErrorBufferLength;

1975

myConverterData->currentConverter->charErrorBufferLength = 0;

1976

}

1977

args->converter=saveConv;

1978

}

1979

1980

static void

1981

UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args, UErrorCode* err){

1982

1983

const UChar *source = args->source;

1984

const UChar *sourceLimit = args->sourceLimit;

1985

unsigned char *target = (unsigned char *) args->target;

1986

unsigned char *targetLimit = (unsigned char *) args->targetLimit;

1987

int32_t* offsets = args->offsets;

1988

uint32_t targetByteUnit = 0x0000;

1989

UChar32 sourceChar = 0x0000;

1990

UBool isTargetByteDBCS;

1991

UBool oldIsTargetByteDBCS;

1992

UConverterDataISO2022 *converterData;

1993

UConverterSharedData* sharedData;

1994

UBool useFallback;

1995

int32_t length =0;

1996

1997

converterData=(UConverterDataISO2022*)args->converter->extraInfo;

1998

/* if the version is 1 then the user is requesting

1999

* conversion with ibm-25546 pass the arguments to

2000

* MBCS converter and return

2001

2002

if(converterData->version==1){

2003

UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(args,err);

2004

return;

2005

}

2006

2007

/* initialize data */

2008

sharedData = converterData->currentConverter->sharedData;

2009

useFallback = args->converter->useFallback;

2010

isTargetByteDBCS=(UBool)args->converter->fromUnicodeStatus;

2011

oldIsTargetByteDBCS = isTargetByteDBCS;

2012

2013

isTargetByteDBCS = (UBool) args->converter->fromUnicodeStatus;

2014

if((sourceChar = args->converter->fromUChar32)!=0 && target <targetLimit) {

2015

goto getTrail;

2016

}

2017

while(source < sourceLimit){

2018

2019

targetByteUnit = missingCharMarker;

2020

2021

if(target < (unsigned char*) args->targetLimit){

2022

sourceChar = *source++;

2023

2024

/* do not convert SO/SI/ESC */

2025

if(IS_2022_CONTROL(sourceChar)) {

2026

/* callback(illegal) */

2027

*err=U_ILLEGAL_CHAR_FOUND;

2028

args->converter->fromUChar32=sourceChar;

2029

break;

2030

}

2031

2032

length = MBCS_FROM_UCHAR32_ISO2022(sharedData,sourceChar,&targetByteUnit,useFallback,MBCS_OUTPUT_2);

2033

if(length < 0) {

2034

length = -length; /* fallback */

2035

}

2036

/* only DBCS or SBCS characters are expected*/

2037

/* DB characters with high bit set to 1 are expected */

2038

if(length > 2 || length==0 ||(((targetByteUnit & 0x8080) != 0x8080)&& length==2)){

2039

targetByteUnit=missingCharMarker;

2040

}

2041

if (targetByteUnit != missingCharMarker){

2042

2043

oldIsTargetByteDBCS = isTargetByteDBCS;

2044

isTargetByteDBCS = (UBool)(targetByteUnit>0x00FF);

2045

/* append the shift sequence */

2046

if (oldIsTargetByteDBCS != isTargetByteDBCS ){

2047

2048

if (isTargetByteDBCS)

2049

*target++ = UCNV_SO;

2050

else

2051

*target++ = UCNV_SI;

2052

if(offsets)

2053

*(offsets++) = (int32_t)(source - args->source-1);

2054

}

2055

/* write the targetUniChar to target */

2056

if(targetByteUnit <= 0x00FF){

2057

if( target < targetLimit){

2058

*(target++) = (unsigned char) targetByteUnit;

2059

if(offsets){

2060

*(offsets++) = (int32_t)(source - args->source-1);

2061

}

2062

2063

}else{

2064

args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) (targetByteUnit);

2065

*err = U_BUFFER_OVERFLOW_ERROR;

2066

}

2067

}else{

2068

if(target < targetLimit){

2069

*(target++) =(unsigned char) ((targetByteUnit>>8) -0x80);

2070

if(offsets){

2071

*(offsets++) = (int32_t)(source - args->source-1);

2072

}

2073

if(target < targetLimit){

2074

*(target++) =(unsigned char) (targetByteUnit -0x80);

2075

if(offsets){

2076

*(offsets++) = (int32_t)(source - args->source-1);

2077

}

2078

}else{

2079

args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) (targetByteUnit -0x80);

2080

*err = U_BUFFER_OVERFLOW_ERROR;

2081

}

2082

}else{

2083

args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) ((targetByteUnit>>8) -0x80);

2084

args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) (targetByteUnit-0x80);

2085

*err = U_BUFFER_OVERFLOW_ERROR;

2086

}

2087

}

2088

2089

}

2090

else{

2091

/* oops.. the code point is unassingned

2092

* set the error and reason

2093

2094

2095

/*check if the char is a First surrogate*/

2096

if(UTF_IS_SURROGATE(sourceChar)) {

2097

if(UTF_IS_SURROGATE_FIRST(sourceChar)) {

2098

getTrail:

2099

/*look ahead to find the trail surrogate*/

2100

if(source < sourceLimit) {

2101

/* test the following code unit */

2102

UChar trail=(UChar) *source;

2103

if(UTF_IS_SECOND_SURROGATE(trail)) {

2104

source++;

2105

sourceChar=UTF16_GET_PAIR_VALUE(sourceChar, trail);

2106

*err = U_INVALID_CHAR_FOUND;

2107

/* convert this surrogate code point */

2108

/* exit this condition tree */

2109

} else {

2110

/* this is an unmatched lead code unit (1st surrogate) */

2111

/* callback(illegal) */

2112

*err=U_ILLEGAL_CHAR_FOUND;

2113

}

2114

} else {

2115

/* no more input */

2116

*err = U_ZERO_ERROR;

2117

}

2118

} else {

2119

/* this is an unmatched trail code unit (2nd surrogate) */

2120

/* callback(illegal) */

2121

*err=U_ILLEGAL_CHAR_FOUND;

2122

}

2123

} else {

2124

/* callback(unassigned) for a BMP code point */

2125

*err = U_INVALID_CHAR_FOUND;

2126

}

2127

2128

args->converter->fromUChar32=sourceChar;

2129

break;

2130

}

2131

} /* end if(myTargetIndex<myTargetLength) */

2132

else{

2133

*err =U_BUFFER_OVERFLOW_ERROR;

2134

break;

2135

}

2136

2137

}/* end while(mySourceIndex<mySourceLength) */

2138

2139

2140

* the end of the input stream and detection of truncated input

2141

* are handled by the framework, but for ISO-2022-KR conversion

2142

* we need to be in ASCII mode at the very end

2143

2144

* conditions:

2145

* successful

2146

* not in ASCII mode

2147

* end of input and no truncated input

2148

2149

if( U_SUCCESS(*err) &&

2150

isTargetByteDBCS &&

2151

args->flush && source>=sourceLimit && args->converter->fromUChar32==0

2152

) {

2153

int32_t sourceIndex;

2154

2155

/* we are switching to ASCII */

2156

isTargetByteDBCS=FALSE;

2157

2158

/* get the source index of the last input character */

2159

2160

* TODO this would be simpler and more reliable if we used a pair

2161

* of sourceIndex/prevSourceIndex like in ucnvmbcs.c

2162

* so that we could simply use the prevSourceIndex here;

2163

* this code gives an incorrect result for the rare case of an unmatched

2164

* trail surrogate that is alone in the last buffer of the text stream

2165

2166

sourceIndex=(int32_t)(source-args->source);

2167

if(sourceIndex>0) {

2168

--sourceIndex;

2169

if( U16_IS_TRAIL(args->source[sourceIndex]) &&

2170

(sourceIndex==0 || U16_IS_LEAD(args->source[sourceIndex-1]))

2171

) {

2172

--sourceIndex;

2173

}

2174

} else {

2175

sourceIndex=-1;

2176

}

2177

2178

fromUWriteUInt8(

2179

args->converter,

2180

SHIFT_IN_STR, 1,

2181

&target, (const char *)targetLimit,

2182

&offsets, sourceIndex,

2183

err);

2184

}

2185

2186

/*save the state and return */

2187

args->source = source;

2188

args->target = (char*)target;

2189

args->converter->fromUnicodeStatus = (uint32_t)isTargetByteDBCS;

2190

}

2191

2192

/************************ To Unicode ***************************************/

2193

2194

static void

2195

UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(UConverterToUnicodeArgs *args,

2196

UErrorCode* err){

2197

char const* sourceStart;

2198

UConverterDataISO2022* myData=(UConverterDataISO2022*)(args->converter->extraInfo);

2199

2200

UConverterToUnicodeArgs subArgs;

2201

int32_t minArgsSize;

2202

2203

/* set up the subconverter arguments */

2204

if(args->size<sizeof(UConverterToUnicodeArgs)) {

2205

minArgsSize = args->size;

2206

} else {

2207

minArgsSize = (int32_t)sizeof(UConverterToUnicodeArgs);

2208

}

2209

2210

uprv_memcpy(&subArgs, args, minArgsSize);

2211

subArgs.size = (uint16_t)minArgsSize;

2212

subArgs.converter = myData->currentConverter;

2213

2214

/* remember the original start of the input for offsets */

2215

sourceStart = args->source;

2216

2217

if(myData->key != 0) {

2218

/* continue with a partial escape sequence */

2219

goto escape;

2220

}

2221

2222

while(U_SUCCESS(*err) && args->source < args->sourceLimit) {

2223

/*Find the end of the buffer e.g : Next Escape Seq | end of Buffer*/

2224

subArgs.source = args->source;

2225

subArgs.sourceLimit = getEndOfBuffer_2022(&(args->source), args->sourceLimit, args->flush);

2226

if(subArgs.source != subArgs.sourceLimit) {

2227

2228

* get the current partial byte sequence

2229

2230

* it needs to be moved between the public and the subconverter

2231

* so that the conversion framework, which only sees the public

2232

* converter, can handle truncated and illegal input etc.

2233

2234

if(args->converter->toULength > 0) {

2235

uprv_memcpy(subArgs.converter->toUBytes, args->converter->toUBytes, args->converter->toULength);

2236

}

2237

subArgs.converter->toULength = args->converter->toULength;

2238

2239

2240

* Convert up to the end of the input, or to before the next escape character.

2241

* Does not handle conversion extensions because the preToU[] state etc.

2242

* is not copied.

2243

2244

ucnv_MBCSToUnicodeWithOffsets(&subArgs, err);

2245

2246

if(args->offsets != NULL && sourceStart != args->source) {

2247

/* update offsets to base them on the actual start of the input */

2248

int32_t *offsets = args->offsets;

2249

UChar *target = args->target;

2250

int32_t delta = (int32_t)(args->source - sourceStart);

2251

while(target < subArgs.target) {

2252

if(*offsets >= 0) {

2253

*offsets += delta;

2254

}

2255

++offsets;

2256

++target;

2257

}

2258

}

2259

args->source = subArgs.source;

2260

args->target = subArgs.target;

2261

args->offsets = subArgs.offsets;

2262

2263

/* copy input/error/overflow buffers */

2264

if(subArgs.converter->toULength > 0) {

2265

uprv_memcpy(args->converter->toUBytes, subArgs.converter->toUBytes, subArgs.converter->toULength);

2266

}

2267

args->converter->toULength = subArgs.converter->toULength;

2268

2269

if(*err == U_BUFFER_OVERFLOW_ERROR) {

2270

if(subArgs.converter->UCharErrorBufferLength > 0) {

2271

uprv_memcpy(args->converter->UCharErrorBuffer, subArgs.converter->UCharErrorBuffer,

2272

subArgs.converter->UCharErrorBufferLength);

2273

}

2274

args->converter->UCharErrorBufferLength=subArgs.converter->UCharErrorBufferLength;

2275

subArgs.converter->UCharErrorBufferLength = 0;

2276

}

2277

}

2278

2279

if (U_FAILURE(*err) || (args->source == args->sourceLimit)) {

2280

return;

2281

}

2282

2283

escape:

2284

changeState_2022(args->converter,

2285

&(args->source),

2286

args->sourceLimit,

2287

ISO_2022_KR,

2288

err);

2289

}

2290

}

2291

2292

static void

2293

UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,

2294

UErrorCode* err){

2295

char tempBuf[2];

2296

const char *mySource = ( char *) args->source;

2297

UChar *myTarget = args->target;

2298

const char *mySourceLimit = args->sourceLimit;

2299

UChar32 targetUniChar = 0x0000;

2300

UChar mySourceChar = 0x0000;

2301

UConverterDataISO2022* myData;

2302

UConverterSharedData* sharedData ;

2303

UBool useFallback;

2304

2305

myData=(UConverterDataISO2022*)(args->converter->extraInfo);

2306

if(myData->version==1){

2307

UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(args,err);

2308

return;

2309

}

2310

2311

/* initialize state */

2312

sharedData = myData->currentConverter->sharedData;

2313

useFallback = args->converter->useFallback;

2314

2315

if(myData->key != 0) {

2316

/* continue with a partial escape sequence */

2317

goto escape;

2318

} else if(args->converter->toULength == 1 && mySource < mySourceLimit && myTarget < args->targetLimit) {

2319

/* continue with a partial double-byte character */

2320

mySourceChar = args->converter->toUBytes[0];

2321

args->converter->toULength = 0;

2322

goto getTrailByte;

2323

}

2324

2325

while(mySource< mySourceLimit){

2326

2327

if(myTarget < args->targetLimit){

2328

2329

mySourceChar= (unsigned char) *mySource++;

2330

2331

if(mySourceChar==UCNV_SI){

2332

myData->toU2022State.g = 0;

2333

/*consume the source */

2334

continue;

2335

}else if(mySourceChar==UCNV_SO){

2336

myData->toU2022State.g = 1;

2337

/*consume the source */

2338

continue;

2339

}else if(mySourceChar==ESC_2022){

2340

mySource--;

2341

escape:

2342

changeState_2022(args->converter,&(mySource),

2343

mySourceLimit, ISO_2022_KR, err);

2344

if(U_FAILURE(*err)){

2345

args->target = myTarget;

2346

args->source = mySource;

2347

return;

2348

}

2349

continue;

2350

}

2351

2352

if(myData->toU2022State.g == 1) {

2353

if(mySource < mySourceLimit) {

2354

char trailByte;

2355

getTrailByte:

2356

trailByte = *mySource++;

2357

tempBuf[0] = (char)(mySourceChar + 0x80);

2358

tempBuf[1] = (char)(trailByte + 0x80);

2359

mySourceChar = (mySourceChar << 8) | (uint8_t)(trailByte);

2360

if((mySourceChar & 0x8080) == 0) {

2361

targetUniChar = ucnv_MBCSSimpleGetNextUChar(sharedData, tempBuf, 2, useFallback);

2362

} else {

2363

/* illegal bytes > 0x7f */

2364

targetUniChar = missingCharMarker;

2365

}

2366

} else {

2367

args->converter->toUBytes[0] = (uint8_t)mySourceChar;

2368

args->converter->toULength = 1;

2369

break;

2370

}

2371

}

2372

else{

2373

targetUniChar = ucnv_MBCSSimpleGetNextUChar(sharedData, mySource - 1, 1, useFallback);

2374

}

2375

if(targetUniChar < 0xfffe){

2376

if(args->offsets) {

2377

args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));

2378

}

2379

*(myTarget++)=(UChar)targetUniChar;

2380

}

2381

else {

2382

/* Call the callback function*/

2383

toUnicodeCallback(args->converter,mySourceChar,targetUniChar,err);

2384

break;

2385

}

2386

}

2387

else{

2388

*err =U_BUFFER_OVERFLOW_ERROR;

2389

break;

2390

}

2391

}

2392

args->target = myTarget;

2393

args->source = mySource;

2394

}

2395

2396

/*************************** END ISO2022-KR *********************************/

2397

2398

/*************************** ISO-2022-CN *********************************

2399

2400

* Rules for ISO-2022-CN Encoding:

2401

* i) The designator sequence must appear once on a line before any instance

2402

* of character set it designates.

2403

* ii) If two lines contain characters from the same character set, both lines

2404

* must include the designator sequence.

2405

* iii) Once the designator sequence is known, a shifting sequence has to be found

2406

* to invoke the shifting

2407

* iv) All lines start in ASCII and end in ASCII.

2408

* v) Four shifting sequences are employed for this purpose:

2409

2410

* Sequcence ASCII Eq Charsets

2411

* ---------- ------- ---------

2412

* SI <SI> US-ASCII

2413

* SO <SO> CNS-11643-1992 Plane 1, GB2312, ISO-IR-165

2414

* SS2 <ESC>N CNS-11643-1992 Plane 2

2415

* SS3 <ESC>O CNS-11643-1992 Planes 3-7

2416

2417

* vi)

2418

* SOdesignator : ESC "$" ")" finalchar_for_SO

2419

* SS2designator : ESC "$" "*" finalchar_for_SS2

2420

* SS3designator : ESC "$" "+" finalchar_for_SS3

2421

2422

* ESC $ ) A Indicates the bytes following SO are Chinese

2423

* characters as defined in GB 2312-80, until

2424

* another SOdesignation appears

2425

2426

2427

* ESC $ ) E Indicates the bytes following SO are as defined

2428

* in ISO-IR-165 (for details, see section 2.1),

2429

* until another SOdesignation appears

2430

2431

* ESC $ ) G Indicates the bytes following SO are as defined

2432

* in CNS 11643-plane-1, until another

2433

* SOdesignation appears

2434

2435

* ESC $ * H Indicates the two bytes immediately following

2436

* SS2 is a Chinese character as defined in CNS

2437

* 11643-plane-2, until another SS2designation

2438

* appears

2439

* (Meaning <ESC>N must preceed every 2 byte

2440

* sequence.)

2441

2442

* ESC $ + I Indicates the immediate two bytes following SS3

2443

* is a Chinese character as defined in CNS

2444

* 11643-plane-3, until another SS3designation

2445

* appears

2446

* (Meaning <ESC>O must preceed every 2 byte

2447

* sequence.)

2448

2449

* ESC $ + J Indicates the immediate two bytes following SS3

2450

* is a Chinese character as defined in CNS

2451

* 11643-plane-4, until another SS3designation

2452

* appears

2453

* (In English: <ESC>O must preceed every 2 byte

2454

* sequence.)

2455

2456

* ESC $ + K Indicates the immediate two bytes following SS3

2457

* is a Chinese character as defined in CNS

2458

* 11643-plane-5, until another SS3designation

2459

* appears

2460

2461

* ESC $ + L Indicates the immediate two bytes following SS3

2462

* is a Chinese character as defined in CNS

2463

* 11643-plane-6, until another SS3designation

2464

* appears

2465

2466

* ESC $ + M Indicates the immediate two bytes following SS3

2467

* is a Chinese character as defined in CNS

2468

* 11643-plane-7, until another SS3designation

2469

* appears

2470

2471

* As in ISO-2022-CN, each line starts in ASCII, and ends in ASCII, and

2472

* has its own designation information before any Chinese characters

2473

* appear

2474

2475

2476

2477

/* The following are defined this way to make the strings truely readonly */

2478

static const char GB_2312_80_STR[] = "\x1B\x24\x29\x41";

2479

static const char ISO_IR_165_STR[] = "\x1B\x24\x29\x45";

2480

static const char CNS_11643_1992_Plane_1_STR[] = "\x1B\x24\x29\x47";

2481

static const char CNS_11643_1992_Plane_2_STR[] = "\x1B\x24\x2A\x48";

2482

static const char CNS_11643_1992_Plane_3_STR[] = "\x1B\x24\x2B\x49";

2483

static const char CNS_11643_1992_Plane_4_STR[] = "\x1B\x24\x2B\x4A";

2484

static const char CNS_11643_1992_Plane_5_STR[] = "\x1B\x24\x2B\x4B";

2485

static const char CNS_11643_1992_Plane_6_STR[] = "\x1B\x24\x2B\x4C";

2486

static const char CNS_11643_1992_Plane_7_STR[] = "\x1B\x24\x2B\x4D";

2487

2488

/********************** ISO2022-CN Data **************************/

2489

static const char* const escSeqCharsCN[10] ={

2490

SHIFT_IN_STR, /* ASCII */

2491

GB_2312_80_STR,

2492

ISO_IR_165_STR,

2493

CNS_11643_1992_Plane_1_STR,

2494

CNS_11643_1992_Plane_2_STR,

2495

CNS_11643_1992_Plane_3_STR,

2496

CNS_11643_1992_Plane_4_STR,

2497

CNS_11643_1992_Plane_5_STR,

2498

CNS_11643_1992_Plane_6_STR,

2499

CNS_11643_1992_Plane_7_STR

2500

};

2501

2502

static void

2503

UConverter_fromUnicode_ISO_2022_CN_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args, UErrorCode* err){

2504

UConverter *cnv = args->converter;

2505

UConverterDataISO2022 *converterData;

2506

ISO2022State *pFromU2022State;

2507

uint8_t *target = (uint8_t *) args->target;

2508

const uint8_t *targetLimit = (const uint8_t *) args->targetLimit;

2509

const UChar* source = args->source;

2510

const UChar* sourceLimit = args->sourceLimit;

2511

int32_t* offsets = args->offsets;

2512

UChar32 sourceChar;

2513

char buffer[8];

2514

int32_t len;

2515

int8_t choices[3];

2516

int32_t choiceCount;

2517

uint32_t targetValue = 0;

2518

UBool useFallback;

2519

2520

/* set up the state */

2521

converterData = (UConverterDataISO2022*)cnv->extraInfo;

2522

pFromU2022State = &converterData->fromU2022State;

2523

2524

choiceCount = 0;

2525

2526

/* check if the last codepoint of previous buffer was a lead surrogate*/

2527

if((sourceChar = cnv->fromUChar32)!=0 && target< targetLimit) {

2528

goto getTrail;

2529

}

2530

2531

while( source < sourceLimit){

2532

if(target < targetLimit){

2533

2534

sourceChar = *(source++);

2535

/*check if the char is a First surrogate*/

2536

if(UTF_IS_SURROGATE(sourceChar)) {

2537

if(UTF_IS_SURROGATE_FIRST(sourceChar)) {

2538

getTrail:

2539

/*look ahead to find the trail surrogate*/

2540

if(source < sourceLimit) {

2541

/* test the following code unit */

2542

UChar trail=(UChar) *source;

2543

if(UTF_IS_SECOND_SURROGATE(trail)) {

2544

source++;

2545

sourceChar=UTF16_GET_PAIR_VALUE(sourceChar, trail);

2546

cnv->fromUChar32=0x00;

2547

/* convert this supplementary code point */

2548

/* exit this condition tree */

2549

} else {

2550

/* this is an unmatched lead code unit (1st surrogate) */

2551

/* callback(illegal) */

2552

*err=U_ILLEGAL_CHAR_FOUND;

2553

cnv->fromUChar32=sourceChar;

2554

break;

2555

}

2556

} else {

2557

/* no more input */

2558

cnv->fromUChar32=sourceChar;

2559

break;

2560

}

2561

} else {

2562

/* this is an unmatched trail code unit (2nd surrogate) */

2563

/* callback(illegal) */

2564

*err=U_ILLEGAL_CHAR_FOUND;

2565

cnv->fromUChar32=sourceChar;

2566

break;

2567

}

2568

}

2569

2570

/* do the conversion */

2571

if(sourceChar <= 0x007f ){

2572

/* do not convert SO/SI/ESC */

2573

if(IS_2022_CONTROL(sourceChar)) {

2574

/* callback(illegal) */

2575

*err=U_ILLEGAL_CHAR_FOUND;

2576

cnv->fromUChar32=sourceChar;

2577

break;

2578

}

2579

2580

/* US-ASCII */

2581

if(pFromU2022State->g == 0) {

2582

buffer[0] = (char)sourceChar;

2583

len = 1;

2584

} else {

2585

buffer[0] = UCNV_SI;

2586

buffer[1] = (char)sourceChar;

2587

len = 2;

2588

pFromU2022State->g = 0;

2589

choiceCount = 0;

2590

}

2591

if(sourceChar == CR || sourceChar == LF) {

2592

/* reset the state at the end of a line */

2593

uprv_memset(pFromU2022State, 0, sizeof(ISO2022State));

2594

choiceCount = 0;

2595

}

2596

}

2597

else{

2598

/* convert U+0080..U+10ffff */

2599

int32_t i;

2600

int8_t cs, g;

2601

2602

if(choiceCount == 0) {

2603

/* try the current SO/G1 converter first */

2604

choices[0] = pFromU2022State->cs[1];

2605

2606

/* default to GB2312_1 if none is designated yet */

2607

if(choices[0] == 0) {

2608

choices[0] = GB2312_1;

2609

}

2610

2611

if(converterData->version == 0) {

2612

/* ISO-2022-CN */

2613

2614

/* try the other SO/G1 converter; a CNS_11643_1 lookup may result in any plane */

2615

if(choices[0] == GB2312_1) {

2616

choices[1] = (int8_t)CNS_11643_1;

2617

} else {

2618

choices[1] = (int8_t)GB2312_1;

2619

}

2620

2621

choiceCount = 2;

2622

} else {

2623

/* ISO-2022-CN-EXT */

2624

2625

/* try one of the other converters */

2626

switch(choices[0]) {

2627

case GB2312_1:

2628

choices[1] = (int8_t)CNS_11643_1;

2629

choices[2] = (int8_t)ISO_IR_165;

2630

break;

2631

case ISO_IR_165:

2632

choices[1] = (int8_t)GB2312_1;

2633

choices[2] = (int8_t)CNS_11643_1;

2634

break;

2635

default: /* CNS_11643_x */

2636

choices[1] = (int8_t)GB2312_1;

2637

choices[2] = (int8_t)ISO_IR_165;

2638

break;

2639

}

2640

2641

choiceCount = 3;

2642

}

2643

}

2644

2645

cs = g = 0;

2646

2647

* len==0: no mapping found yet

2648

* len<0: found a fallback result: continue looking for a roundtrip but no further fallbacks

2649

* len>0: found a roundtrip result, done

2650

2651

len = 0;

2652

2653

* We will turn off useFallback after finding a fallback,

2654

* but we still get fallbacks from PUA code points as usual.

2655

* Therefore, we will also need to check that we don't overwrite

2656

* an early fallback with a later one.

2657

2658

useFallback = cnv->useFallback;

2659

2660

for(i = 0; i < choiceCount && len <= 0; ++i) {

2661

int8_t cs0 = choices[i];

2662

if(cs0 > 0) {

2663

uint32_t value;

2664

int32_t len2;

2665

if(cs0 > CNS_11643_0) {

2666

len2 = MBCS_FROM_UCHAR32_ISO2022(

2667

converterData->myConverterArray[CNS_11643],

2668

sourceChar,

2669

&value,

2670

useFallback,

2671

MBCS_OUTPUT_3);

2672

if(len2 == 3 || (len2 == -3 && len == 0)) {

2673

targetValue = value;

2674

cs = (int8_t)(CNS_11643_0 + (value >> 16) - 0x80);

2675

if(len2 >= 0) {

2676

len = 2;

2677

} else {

2678

len = -2;

2679

useFallback = FALSE;

2680

}

2681

if(cs == CNS_11643_1) {

2682

g = 1;

2683

} else if(cs == CNS_11643_2) {

2684

g = 2;

2685

} else /* plane 3..7 */ if(converterData->version == 1) {

2686

g = 3;

2687

} else {

2688

/* ISO-2022-CN (without -EXT) does not support plane 3..7 */

2689

len = 0;

2690

}

2691

}

2692

} else {

2693

/* GB2312_1 or ISO-IR-165 */

2694

len2 = MBCS_FROM_UCHAR32_ISO2022(

2695

converterData->myConverterArray[cs0],

2696

sourceChar,

2697

&value,

2698

useFallback,

2699

MBCS_OUTPUT_2);

2700

if(len2 == 2 || (len2 == -2 && len == 0)) {

2701

targetValue = value;

2702

len = len2;

2703

cs = cs0;

2704

g = 1;

2705

useFallback = FALSE;

2706

}

2707

}

2708

}

2709

}

2710

2711

if(len != 0) {

2712

len = 0; /* count output bytes; it must have been abs(len) == 2 */

2713

2714

/* write the designation sequence if necessary */

2715

if(cs != pFromU2022State->cs[g]) {

2716

if(cs < CNS_11643) {

2717

uprv_memcpy(buffer, escSeqCharsCN[cs], 4);

2718

} else {

2719

uprv_memcpy(buffer, escSeqCharsCN[CNS_11643 + (cs - CNS_11643_1)], 4);

2720

}

2721

len = 4;

2722

pFromU2022State->cs[g] = cs;

2723

if(g == 1) {

2724

/* changing the SO/G1 charset invalidates the choices[] */

2725

choiceCount = 0;

2726

}

2727

}

2728

2729

/* write the shift sequence if necessary */

2730

if(g != pFromU2022State->g) {

2731

switch(g) {

2732

case 1:

2733

buffer[len++] = UCNV_SO;

2734

2735

/* set the new state only if it is the locking shift SO/G1, not for SS2 or SS3 */

2736

pFromU2022State->g = 1;

2737

break;

2738

case 2:

2739

buffer[len++] = 0x1b;

2740

buffer[len++] = 0x4e;

2741

break;

2742

default: /* case 3 */

2743

buffer[len++] = 0x1b;

2744

buffer[len++] = 0x4f;

2745

break;

2746

}

2747

}

2748

2749

/* write the two output bytes */

2750

buffer[len++] = (char)(targetValue >> 8);

2751

buffer[len++] = (char)targetValue;

2752

} else {

2753

/* if we cannot find the character after checking all codepages

2754

* then this is an error

2755

2756

*err = U_INVALID_CHAR_FOUND;

2757

cnv->fromUChar32=sourceChar;

2758

break;

2759

}

2760

}

2761

2762

/* output len>0 bytes in buffer[] */

2763

if(len == 1) {

2764

*target++ = buffer[0];

2765

if(offsets) {

2766

*offsets++ = (int32_t)(source - args->source - 1); /* -1: known to be ASCII */

2767

}

2768

} else if(len == 2 && (target + 2) <= targetLimit) {

2769

*target++ = buffer[0];

2770

*target++ = buffer[1];

2771

if(offsets) {

2772

int32_t sourceIndex = (int32_t)(source - args->source - U16_LENGTH(sourceChar));

2773

*offsets++ = sourceIndex;

2774

*offsets++ = sourceIndex;

2775

}

2776

} else {

2777

fromUWriteUInt8(

2778

cnv,

2779

buffer, len,

2780

&target, (const char *)targetLimit,

2781

&offsets, (int32_t)(source - args->source - U16_LENGTH(sourceChar)),

2782

err);

2783

if(U_FAILURE(*err)) {

2784

break;

2785

}

2786

}

2787

} /* end if(myTargetIndex<myTargetLength) */

2788

else{

2789

*err =U_BUFFER_OVERFLOW_ERROR;

2790

break;

2791

}

2792

2793

}/* end while(mySourceIndex<mySourceLength) */

2794

2795

2796

* the end of the input stream and detection of truncated input

2797

* are handled by the framework, but for ISO-2022-CN conversion

2798

* we need to be in ASCII mode at the very end

2799

2800

* conditions:

2801

* successful

2802

* not in ASCII mode

2803

* end of input and no truncated input

2804

2805

if( U_SUCCESS(*err) &&

2806

pFromU2022State->g!=0 &&

2807

args->flush && source>=sourceLimit && cnv->fromUChar32==0

2808

) {

2809

int32_t sourceIndex;

2810

2811

/* we are switching to ASCII */

2812

pFromU2022State->g=0;

2813

2814

/* get the source index of the last input character */

2815

2816

* TODO this would be simpler and more reliable if we used a pair

2817

* of sourceIndex/prevSourceIndex like in ucnvmbcs.c

2818

* so that we could simply use the prevSourceIndex here;

2819

* this code gives an incorrect result for the rare case of an unmatched

2820

* trail surrogate that is alone in the last buffer of the text stream

2821

2822

sourceIndex=(int32_t)(source-args->source);

2823

if(sourceIndex>0) {

2824

--sourceIndex;

2825

if( U16_IS_TRAIL(args->source[sourceIndex]) &&

2826

(sourceIndex==0 || U16_IS_LEAD(args->source[sourceIndex-1]))

2827

) {

2828

--sourceIndex;

2829

}

2830

} else {

2831

sourceIndex=-1;

2832

}

2833

2834

fromUWriteUInt8(

2835

cnv,

2836

SHIFT_IN_STR, 1,

2837

&target, (const char *)targetLimit,

2838

&offsets, sourceIndex,

2839

err);

2840

}

2841

2842

/*save the state and return */

2843

args->source = source;

2844

args->target = (char*)target;

2845

}

2846

2847

2848

static void

2849

UConverter_toUnicode_ISO_2022_CN_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,

2850

UErrorCode* err){

2851

char tempBuf[3];

2852

const char *mySource = (char *) args->source;

2853

UChar *myTarget = args->target;

2854

const char *mySourceLimit = args->sourceLimit;

2855

uint32_t targetUniChar = 0x0000;

2856

uint32_t mySourceChar = 0x0000;

2857

UConverterDataISO2022* myData;

2858

ISO2022State *pToU2022State;

2859

2860

myData=(UConverterDataISO2022*)(args->converter->extraInfo);

2861

pToU2022State = &myData->toU2022State;

2862

2863

if(myData->key != 0) {

2864

/* continue with a partial escape sequence */

2865

goto escape;

2866

} else if(args->converter->toULength == 1 && mySource < mySourceLimit && myTarget < args->targetLimit) {

2867

/* continue with a partial double-byte character */

2868

mySourceChar = args->converter->toUBytes[0];

2869

args->converter->toULength = 0;

2870

goto getTrailByte;

2871

}

2872

2873

while(mySource < mySourceLimit){

2874

2875

targetUniChar =missingCharMarker;

2876

2877

if(myTarget < args->targetLimit){

2878

2879

mySourceChar= (unsigned char) *mySource++;

2880

2881

switch(mySourceChar){

2882

case UCNV_SI:

2883

pToU2022State->g=0;

2884

continue;

2885

2886

case UCNV_SO:

2887

if(pToU2022State->cs[1] != 0) {

2888

pToU2022State->g=1;

2889

continue;

2890

} else {

2891

/* illegal to have SO before a matching designator */

2892

break;

2893

}

2894

2895

case ESC_2022:

2896

mySource--;

2897

escape:

2898

changeState_2022(args->converter,&(mySource),

2899

mySourceLimit, ISO_2022_CN,err);

2900

2901

/* invalid or illegal escape sequence */

2902

if(U_FAILURE(*err)){

2903

args->target = myTarget;

2904

args->source = mySource;

2905

return;

2906

}

2907

continue;

2908

2909

/* ISO-2022-CN does not use single-byte (C1) SS2 and SS3 */

2910

2911

case CR:

2912

/*falls through*/

2913

case LF:

2914

uprv_memset(pToU2022State, 0, sizeof(ISO2022State));

2915

/* falls through */

2916

default:

2917

/* convert one or two bytes */

2918

if(pToU2022State->g != 0) {

2919

if(mySource < mySourceLimit) {

2920

UConverterSharedData *cnv;

2921

StateEnum tempState;

2922

int32_t tempBufLen;

2923

char trailByte;

2924

getTrailByte:

2925

trailByte = *mySource++;

2926

tempState = (StateEnum)pToU2022State->cs[pToU2022State->g];

2927

if(tempState > CNS_11643_0) {

2928

cnv = myData->myConverterArray[CNS_11643];

2929

tempBuf[0] = (char) (0x80+(tempState-CNS_11643_0));

2930

tempBuf[1] = (char) (mySourceChar);

2931

tempBuf[2] = trailByte;

2932

tempBufLen = 3;

2933

2934

}else{

2935

cnv = myData->myConverterArray[tempState];

2936

tempBuf[0] = (char) (mySourceChar);

2937

tempBuf[1] = trailByte;

2938

tempBufLen = 2;

2939

}

2940

mySourceChar = (mySourceChar << 8) | (uint8_t)(trailByte);

2941

if(pToU2022State->g>=2) {

2942

/* return from a single-shift state to the previous one */

2943

pToU2022State->g=pToU2022State->prevG;

2944

}

2945

targetUniChar = ucnv_MBCSSimpleGetNextUChar(cnv, tempBuf, tempBufLen, FALSE);

2946

} else {

2947

args->converter->toUBytes[0] = (uint8_t)mySourceChar;

2948

args->converter->toULength = 1;

2949

goto endloop;

2950

}

2951

}

2952

else{

2953

if(mySourceChar <= 0x7f) {

2954

targetUniChar = (UChar) mySourceChar;

2955

}

2956

}

2957

break;

2958

}

2959

if(targetUniChar < (missingCharMarker-1/*0xfffe*/)){

2960

if(args->offsets){

2961

args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));

2962

}

2963

*(myTarget++)=(UChar)targetUniChar;

2964

}

2965

else if(targetUniChar > missingCharMarker){

2966

/* disassemble the surrogate pair and write to output*/

2967

targetUniChar-=0x0010000;

2968

*myTarget = (UChar)(0xd800+(UChar)(targetUniChar>>10));

2969

if(args->offsets){

2970

args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));

2971

}

2972

++myTarget;

2973

if(myTarget< args->targetLimit){

2974

*myTarget = (UChar)(0xdc00+(UChar)(targetUniChar&0x3ff));

2975

if(args->offsets){

2976

args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2));

2977

}

2978

++myTarget;

2979

}else{

2980

args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]=

2981

(UChar)(0xdc00+(UChar)(targetUniChar&0x3ff));

2982

}

2983

2984

}

2985

else{

2986

/* Call the callback function*/

2987

toUnicodeCallback(args->converter,mySourceChar,targetUniChar,err);

2988

break;

2989

}

2990

}

2991

else{

2992

*err =U_BUFFER_OVERFLOW_ERROR;

2993

break;

2994

}

2995

}

2996

endloop:

2997

args->target = myTarget;

2998

args->source = mySource;

2999

}

3000

3001

static void

3002

_ISO_2022_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UErrorCode *err) {

3003

UConverter *cnv = args->converter;

3004

UConverterDataISO2022 *myConverterData=(UConverterDataISO2022 *) cnv->extraInfo;

3005

ISO2022State *pFromU2022State=&myConverterData->fromU2022State;

3006

char *p, *subchar;

3007

char buffer[8];

3008

int32_t length;

3009

3010

subchar=(char *)cnv->subChars;

3011

length=cnv->subCharLen; /* assume length==1 for most variants */

3012

3013

p = buffer;

3014

switch(myConverterData->locale[0]){

3015

case 'j':

3016

{

3017

int8_t cs;

3018

3019

if(pFromU2022State->g == 1) {

3020

/* JIS7: switch from G1 to G0 */

3021

pFromU2022State->g = 0;

3022

*p++ = UCNV_SI;

3023

}

3024

3025

cs = pFromU2022State->cs[0];

3026

if(cs != ASCII && cs != JISX201) {

3027

/* not in ASCII or JIS X 0201: switch to ASCII */

3028

pFromU2022State->cs[0] = (int8_t)ASCII;

3029

*p++ = '\x1b';

3030

*p++ = '\x28';

3031

*p++ = '\x42';

3032

}

3033

3034

*p++ = subchar[0];

3035

break;

3036

}

3037

case 'c':

3038

if(pFromU2022State->g != 0) {

3039

/* not in ASCII mode: switch to ASCII */

3040

pFromU2022State->g = 0;

3041

*p++ = UCNV_SI;

3042

}

3043

*p++ = subchar[0];

3044

break;

3045

case 'k':

3046

if(myConverterData->version == 0) {

3047

if(length == 1) {

3048

if((UBool)args->converter->fromUnicodeStatus) {

3049

/* in DBCS mode: switch to SBCS */

3050

args->converter->fromUnicodeStatus = 0;

3051

*p++ = UCNV_SI;

3052

}

3053

*p++ = subchar[0];

3054

} else /* length == 2*/ {

3055

if(!(UBool)args->converter->fromUnicodeStatus) {

3056

/* in SBCS mode: switch to DBCS */

3057

args->converter->fromUnicodeStatus = 1;

3058

*p++ = UCNV_SO;

3059

}

3060

*p++ = subchar[0];

3061

*p++ = subchar[1];

3062

}

3063

break;

3064

} else {

3065

/* save the subconverter's substitution string */

3066

uint8_t *currentSubChars = myConverterData->currentConverter->subChars;

3067

int8_t currentSubCharLen = myConverterData->currentConverter->subCharLen;

3068

3069

/* set our substitution string into the subconverter */

3070

myConverterData->currentConverter->subChars = (uint8_t *)subchar;

3071

myConverterData->currentConverter->subCharLen = (int8_t)length;

3072

3073

/* let the subconverter write the subchar, set/retrieve fromUChar32 state */

3074

args->converter = myConverterData->currentConverter;

3075

myConverterData->currentConverter->fromUChar32 = cnv->fromUChar32;

3076

ucnv_cbFromUWriteSub(args, 0, err);

3077

cnv->fromUChar32 = myConverterData->currentConverter->fromUChar32;

3078

args->converter = cnv;

3079

3080

/* restore the subconverter's substitution string */

3081

myConverterData->currentConverter->subChars = currentSubChars;

3082

myConverterData->currentConverter->subCharLen = currentSubCharLen;

3083

3084

if(*err == U_BUFFER_OVERFLOW_ERROR) {

3085

if(myConverterData->currentConverter->charErrorBufferLength > 0) {

3086

uprv_memcpy(

3087

cnv->charErrorBuffer,

3088

myConverterData->currentConverter->charErrorBuffer,

3089

myConverterData->currentConverter->charErrorBufferLength);

3090

}

3091

cnv->charErrorBufferLength = myConverterData->currentConverter->charErrorBufferLength;

3092

myConverterData->currentConverter->charErrorBufferLength = 0;

3093

}

3094

return;

3095

}

3096

default:

3097

/* not expected */

3098

break;

3099

}

3100

ucnv_cbFromUWriteBytes(args,

3101

buffer, (int32_t)(p - buffer),

3102

offsetIndex, err);

3103

}

3104

3105

3106

* Structure for cloning an ISO 2022 converter into a single memory block.

3107

* ucnv_safeClone() of the converter will align the entire cloneStruct,

3108

* and then ucnv_safeClone() of the sub-converter may additionally align

3109

* currentConverter inside the cloneStruct, for which we need the deadSpace

3110

* after currentConverter.

3111

* This is because UAlignedMemory may be larger than the actually

3112

* necessary alignment size for the platform.

3113

* The other cloneStruct fields will not be moved around,

3114

* and are aligned properly with cloneStruct's alignment.

3115

3116

struct cloneStruct

3117

{

3118

UConverter cnv;

3119

UConverter currentConverter;

3120

UAlignedMemory deadSpace;

3121

UConverterDataISO2022 mydata;

3122

};

3123

3124

3125

static UConverter *

3126

_ISO_2022_SafeClone(

3127

const UConverter *cnv,

3128

void *stackBuffer,

3129

int32_t *pBufferSize,

3130

UErrorCode *status)

3131

{

3132

struct cloneStruct * localClone;

3133

UConverterDataISO2022 *cnvData;

3134

int32_t i, size;

3135

3136

if (*pBufferSize == 0) { /* 'preflighting' request - set needed size into *pBufferSize */

3137

*pBufferSize = (int32_t)sizeof(struct cloneStruct);

3138

return NULL;

3139

}

3140

3141

cnvData = (UConverterDataISO2022 *)cnv->extraInfo;

3142

localClone = (struct cloneStruct *)stackBuffer;

3143

3144

/* ucnv.c/ucnv_safeClone() copied the main UConverter already */

3145

3146

uprv_memcpy(&localClone->mydata, cnvData, sizeof(UConverterDataISO2022));

3147

localClone->cnv.extraInfo = &localClone->mydata; /* set pointer to extra data */

3148

localClone->cnv.isExtraLocal = TRUE;

3149

3150

/* share the subconverters */

3151

3152

if(cnvData->currentConverter != NULL) {

3153

size = (int32_t)(sizeof(UConverter) + sizeof(UAlignedMemory)); /* include size of padding */

3154

localClone->mydata.currentConverter =

3155

ucnv_safeClone(cnvData->currentConverter,

3156

&localClone->currentConverter,

3157

&size, status);

3158

if(U_FAILURE(*status)) {

3159

return NULL;

3160

}

3161

}

3162

3163

for(i=0; i<UCNV_2022_MAX_CONVERTERS; ++i) {

3164

if(cnvData->myConverterArray[i] != NULL) {

3165

ucnv_incrementRefCount(cnvData->myConverterArray[i]);

3166

}

3167

}

3168

3169

return &localClone->cnv;

3170

}

3171

3172

static void

3173

_ISO_2022_GetUnicodeSet(const UConverter *cnv,

3174

const USetAdder *sa,

3175

UConverterUnicodeSet which,

3176

UErrorCode *pErrorCode)

3177

{

3178

int32_t i;

3179

UConverterDataISO2022* cnvData;

3180

3181

if (U_FAILURE(*pErrorCode)) {

3182

return;

3183

}

3184

#ifdef U_ENABLE_GENERIC_ISO_2022

3185

if (cnv->sharedData == &_ISO2022Data) {

3186

/* We use UTF-8 in this case */

3187

sa->addRange(sa->set, 0, 0xd7FF);

3188

sa->addRange(sa->set, 0xE000, 0x10FFFF);

3189

return;

3190

}

3191

#endif

3192

3193

cnvData = (UConverterDataISO2022*)cnv->extraInfo;

3194

3195

/* open a set and initialize it with code points that are algorithmically round-tripped */

3196

switch(cnvData->locale[0]){

3197

case 'j':

3198

if(jpCharsetMasks[cnvData->version]&CSM(ISO8859_1)) {

3199

/* include Latin-1 for some variants of JP */

3200

sa->addRange(sa->set, 0, 0xff);

3201

} else {

3202

/* include ASCII for JP */

3203

sa->addRange(sa->set, 0, 0x7f);

3204

}

3205

if(jpCharsetMasks[cnvData->version]&CSM(HWKANA_7BIT)) {

3206

/* include half-width Katakana for JP */

3207

sa->addRange(sa->set, HWKANA_START, HWKANA_END);

3208

}

3209

break;

3210

case 'c':

3211

case 'z':

3212

/* include ASCII for CN */

3213

sa->addRange(sa->set, 0, 0x7f);

3214

break;

3215

case 'k':

3216

/* there is only one converter for KR, and it is not in the myConverterArray[] */

3217

cnvData->currentConverter->sharedData->impl->getUnicodeSet(

3218

cnvData->currentConverter, sa, which, pErrorCode);

3219

/* the loop over myConverterArray[] will simply not find another converter */

3220

break;

3221

default:

3222

break;

3223

}

3224

3225

3226

* Version-specific for CN:

3227

* CN version 0 does not map CNS planes 3..7 although

3228

* they are all available in the CNS conversion table;

3229

* CN version 1 does map them all.

3230

* The two versions create different Unicode sets.

3231

3232

for (i=0; i<UCNV_2022_MAX_CONVERTERS; i++) {

3233

if(cnvData->myConverterArray[i]!=NULL) {

3234

if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') &&

3235

cnvData->version==0 && i==CNS_11643

3236

) {

3237

/* special handling for non-EXT ISO-2022-CN: add only code points for CNS planes 1 and 2 */

3238

ucnv_MBCSGetUnicodeSetForBytes(

3239

cnvData->myConverterArray[i],

3240

sa, UCNV_ROUNDTRIP_SET,

3241

0, 0x81, 0x82,

3242

pErrorCode);

3243

} else {

3244

ucnv_MBCSGetUnicodeSetForUnicode(cnvData->myConverterArray[i], sa, which, pErrorCode);

3245

}

3246

}

3247

}

3248

3249

3250

* ISO 2022 converters must not convert SO/SI/ESC despite what

3251

* sub-converters do by themselves.

3252

* Remove these characters from the set.

3253

3254

sa->remove(sa->set, 0x0e);

3255

sa->remove(sa->set, 0x0f);

3256

sa->remove(sa->set, 0x1b);

3257

}

3258

3259

static const UConverterImpl _ISO2022Impl={

3260

UCNV_ISO_2022,

3261

3262

NULL,

3263

NULL,

3264

3265

_ISO2022Open,

3266

_ISO2022Close,

3267

_ISO2022Reset,

3268

3269

#ifdef U_ENABLE_GENERIC_ISO_2022

3270

T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC,

3271

T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC,

3272

ucnv_fromUnicode_UTF8,

3273

ucnv_fromUnicode_UTF8_OFFSETS_LOGIC,

3274

#else

3275

NULL,

3276

NULL,

3277

NULL,

3278

NULL,

3279

#endif

3280

NULL,

3281

3282

NULL,

3283

_ISO2022getName,

3284

_ISO_2022_WriteSub,

3285

_ISO_2022_SafeClone,

3286

_ISO_2022_GetUnicodeSet,

3287

};

3288

static const UConverterStaticData _ISO2022StaticData={

3289

sizeof(UConverterStaticData),

3290

"ISO_2022",

3291

2022,

3292

UCNV_IBM,

3293

UCNV_ISO_2022,

3294

3295

3, /* max 3 bytes per UChar from UTF-8 (4 bytes from surrogate _pair_) */

3296

{ 0x1a, 0, 0, 0 },

3297

3298

FALSE,

3299

FALSE,

3300

3301

3302

{ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */

3303

};

3304

const UConverterSharedData _ISO2022Data={

3305

sizeof(UConverterSharedData),

3306

~((uint32_t) 0),

3307

NULL,

3308

NULL,

3309

&_ISO2022StaticData,

3310

FALSE,

3311

&_ISO2022Impl,

3312

3313

};

3314

3315

/*************JP****************/

3316

static const UConverterImpl _ISO2022JPImpl={

3317

UCNV_ISO_2022,

3318

3319

NULL,

3320

NULL,

3321

3322

_ISO2022Open,

3323

_ISO2022Close,

3324

_ISO2022Reset,

3325

3326

UConverter_toUnicode_ISO_2022_JP_OFFSETS_LOGIC,

3327

UConverter_toUnicode_ISO_2022_JP_OFFSETS_LOGIC,

3328

UConverter_fromUnicode_ISO_2022_JP_OFFSETS_LOGIC,

3329

UConverter_fromUnicode_ISO_2022_JP_OFFSETS_LOGIC,

3330

NULL,

3331

3332

NULL,

3333

_ISO2022getName,

3334

_ISO_2022_WriteSub,

3335

_ISO_2022_SafeClone,

3336

_ISO_2022_GetUnicodeSet

3337

};

3338

static const UConverterStaticData _ISO2022JPStaticData={

3339

sizeof(UConverterStaticData),

3340

"ISO_2022_JP",

3341

3342

UCNV_IBM,

3343

UCNV_ISO_2022,

3344

3345

6, /* max 6 bytes per UChar: 4-byte escape sequence + DBCS */

3346

{ 0x1a, 0, 0, 0 },

3347

3348

FALSE,

3349

FALSE,

3350

3351

3352

{ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */

3353

};

3354

static const UConverterSharedData _ISO2022JPData={

3355

sizeof(UConverterSharedData),

3356

~((uint32_t) 0),

3357

NULL,

3358

NULL,

3359

&_ISO2022JPStaticData,

3360

FALSE,

3361

&_ISO2022JPImpl,

3362

3363

};

3364

3365

/************* KR ***************/

3366

static const UConverterImpl _ISO2022KRImpl={

3367

UCNV_ISO_2022,

3368

3369

NULL,

3370

NULL,

3371

3372

_ISO2022Open,

3373

_ISO2022Close,

3374

_ISO2022Reset,

3375

3376

UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC,

3377

UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC,

3378

UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC,

3379

UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC,

3380

NULL,

3381

3382

NULL,

3383

_ISO2022getName,

3384

_ISO_2022_WriteSub,

3385

_ISO_2022_SafeClone,

3386

_ISO_2022_GetUnicodeSet

3387

};

3388

static const UConverterStaticData _ISO2022KRStaticData={

3389

sizeof(UConverterStaticData),

3390

"ISO_2022_KR",

3391

3392

UCNV_IBM,

3393

UCNV_ISO_2022,

3394

3395

3, /* max 3 bytes per UChar: SO+DBCS */

3396

{ 0x1a, 0, 0, 0 },

3397

3398

FALSE,

3399

FALSE,

3400

3401

3402

{ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */

3403

};

3404

static const UConverterSharedData _ISO2022KRData={

3405

sizeof(UConverterSharedData),

3406

~((uint32_t) 0),

3407

NULL,

3408

NULL,

3409

&_ISO2022KRStaticData,

3410

FALSE,

3411

&_ISO2022KRImpl,

3412

3413

};

3414

3415

/*************** CN ***************/

3416

static const UConverterImpl _ISO2022CNImpl={

3417

3418

UCNV_ISO_2022,

3419

3420

NULL,

3421

NULL,

3422

3423

_ISO2022Open,

3424

_ISO2022Close,

3425

_ISO2022Reset,

3426

3427

UConverter_toUnicode_ISO_2022_CN_OFFSETS_LOGIC,

3428

UConverter_toUnicode_ISO_2022_CN_OFFSETS_LOGIC,

3429

UConverter_fromUnicode_ISO_2022_CN_OFFSETS_LOGIC,

3430

UConverter_fromUnicode_ISO_2022_CN_OFFSETS_LOGIC,

3431

NULL,

3432

3433

NULL,

3434

_ISO2022getName,

3435

_ISO_2022_WriteSub,

3436

_ISO_2022_SafeClone,

3437

_ISO_2022_GetUnicodeSet

3438

};

3439

static const UConverterStaticData _ISO2022CNStaticData={

3440

sizeof(UConverterStaticData),

3441

"ISO_2022_CN",

3442

3443

UCNV_IBM,

3444

UCNV_ISO_2022,

3445

3446

8, /* max 8 bytes per UChar: 4-byte CNS designator + 2 bytes for SS2/SS3 + DBCS */

3447

{ 0x1a, 0, 0, 0 },

3448

3449

FALSE,

3450

FALSE,

3451

3452

3453

{ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */

3454

};

3455

static const UConverterSharedData _ISO2022CNData={

3456

sizeof(UConverterSharedData),

3457

~((uint32_t) 0),

3458

NULL,

3459

NULL,

3460

&_ISO2022CNStaticData,

3461

FALSE,

3462

&_ISO2022CNImpl,

3463

3464

};

3465

3466

3467

3468

#endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */

Older »