1
/****************************************************************************
3
** Copyright (C) 1992-2005 Trolltech AS. All rights reserved.
5
** This file is part of the core module of the Qt Toolkit.
7
** This file may be distributed under the terms of the Q Public License
8
** as defined by Trolltech AS of Norway and appearing in the file
9
** LICENSE.QPL included in the packaging of this file.
11
** This file may be distributed and/or modified under the terms of the
12
** GNU General Public License version 2 as published by the Free Software
13
** Foundation and appearing in the file LICENSE.GPL included in the
14
** packaging of this file.
16
** See http://www.trolltech.com/pricing.html or email sales@trolltech.com for
17
** information about Qt Commercial License Agreements.
18
** See http://www.trolltech.com/qpl/ for QPL licensing information.
19
** See http://www.trolltech.com/gpl/ for GPL licensing information.
21
** Contact info@trolltech.com if any conditions of this licensing are
24
** This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
25
** WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
27
****************************************************************************/
29
// Most of the code here was originally written by Hans Petter Bieker,
30
// and is included in Qt with the author's permission, and the grateful
31
// thanks of the Trolltech team.
33
/*! \class QTsciiCodec
37
\brief The QTsciiCodec class provides conversion to and from the Tamil TSCII encoding.
39
TSCII, formally the Tamil Standard Code Information Interchange
40
specification, is a commonly used charset for Tamils. The
41
official page for the standard is at
42
\link http://www.tamil.net/tscii/ http://www.tamil.net/tscii/\endlink
44
This codec uses the mapping table found at
45
\link http://www.geocities.com/Athens/5180/tsciiset.html
46
http://www.geocities.com/Athens/5180/tsciiset.html\endlink.
47
Tamil uses composed Unicode which might cause some
48
problems if you are using Unicode fonts instead of TSCII fonts.
50
Most of the code here was written by Hans Petter Bieker
51
and is included in Qt with the author's permission and the
52
grateful thanks of the Trolltech team.
53
Here is the copyright statement for the code as it was at the
54
point of contribution. Trolltech's subsequent modifications
55
are covered by the usual copyright for Qt.
60
Copyright (c) 2000 Hans Petter Bieker. All rights reserved.
62
Redistribution and use in source and binary forms, with or without
63
modification, are permitted provided that the following conditions
65
1. Redistributions of source code must retain the above copyright
66
notice, this list of conditions and the following disclaimer.
67
2. Redistributions in binary form must reproduce the above copyright
68
notice, this list of conditions and the following disclaimer in the
69
documentation and/or other materials provided with the distribution.
71
THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
72
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
73
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
74
ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
75
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
76
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
77
OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
78
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
79
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
80
OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
85
#include "qtsciicodec_p.h"
90
static unsigned char qt_UnicodeToTSCII(ushort u1, ushort u2, ushort u3);
91
static unsigned int qt_TSCIIToUnicode(unsigned int code, uint *s);
93
#define IsTSCIIChar(c) (((c) >= 0x80) && ((c) <= 0xfd))
96
Destroys the text codec object.
98
QTsciiCodec::~QTsciiCodec()
103
Converts the first \a len characters in \a uc from Unicode to this
104
encoding, and returns the result in a byte array. The \a state contains
105
some conversion flags, and is used by the codec to maintain state
108
QByteArray QTsciiCodec::convertFromUnicode(const QChar *uc, int len, ConverterState *state) const
110
char replacement = '?';
112
if (state->flags & ConvertInvalidToNull)
119
uchar* cursor = (uchar*)rstr.data();
120
for (int i = 0; i < len; i++) {
123
if (ch.row() == 0x00 && ch.cell() < 0x80) {
126
} else if ((j = qt_UnicodeToTSCII(uc[i].unicode(),
128
uc[i + 2].unicode()))) {
129
// We have to check the combined chars first!
131
} else if ((j = qt_UnicodeToTSCII(uc[i].unicode(),
132
uc[i + 1].unicode(), 0))) {
134
} else if ((j = qt_UnicodeToTSCII(uc[i].unicode(), 0, 0))) {
142
rstr.resize(cursor - (const uchar*)rstr.constData());
145
state->invalidChars += invalid;
151
Converts the first \a len characters in \a chars from this encoding
152
to Unicode, and returns the result in a QString. The \a state contains
153
some conversion flags, and is used by the codec to maintain state
156
QString QTsciiCodec::convertToUnicode(const char* chars, int len, ConverterState *state) const
158
QChar replacement = QChar::ReplacementCharacter;
160
if (state->flags & ConvertInvalidToNull)
161
replacement = QChar::Null;
166
for (int i = 0; i < len; i++) {
170
result += QLatin1Char(ch);
171
} else if (IsTSCIIChar(ch)) {
174
uint u = qt_TSCIIToUnicode(ch, s);
181
result += replacement;
187
result += replacement;
193
state->invalidChars += invalid;
199
Returns the official name for the encoding that is handled by the codec.
201
\sa QTextCodec::name()
203
QByteArray QTsciiCodec::name() const
209
Returns the MIB enum for the encoding.
211
\sa QTextCodec::mibEnum()
213
int QTsciiCodec::mibEnum() const
215
/* There is no MIBEnum for TSCII now */
219
static const int UnToTsLast = 124; // 125 items -- so the last will be 124
220
static const ushort UnToTs [][4] = {
221
// *Sorted* list of TSCII maping for unicode chars
222
//FIRST SECOND THIRD TSCII
223
{0x00A0, 0x0000, 0x0000, 0xA0},
224
{0x00A9, 0x0000, 0x0000, 0xA9},
225
{0x0B83, 0x0000, 0x0000, 0xB7},
226
{0x0B85, 0x0000, 0x0000, 0xAB},
227
{0x0B86, 0x0000, 0x0000, 0xAC},
228
{0x0B87, 0x0000, 0x0000, 0xAD},
229
{0x0B88, 0x0000, 0x0000, 0xAE},
230
{0x0B89, 0x0000, 0x0000, 0xAF},
231
{0x0B8A, 0x0000, 0x0000, 0xB0},
232
{0x0B8E, 0x0000, 0x0000, 0xB1},
233
{0x0B8F, 0x0000, 0x0000, 0xB2},
234
{0x0B90, 0x0000, 0x0000, 0xB3},
235
{0x0B92, 0x0000, 0x0000, 0xB4},
236
{0x0B93, 0x0000, 0x0000, 0xB5},
237
{0x0B94, 0x0000, 0x0000, 0xB6},
238
{0x0B95, 0x0000, 0x0000, 0xB8},
239
{0x0B95, 0x0B82, 0x0000, 0xEC},
240
{0x0B95, 0x0BC1, 0x0000, 0xCC},
241
{0x0B95, 0x0BC2, 0x0000, 0xDC},
242
{0x0B99, 0x0000, 0x0000, 0xB9},
243
{0x0B99, 0x0B82, 0x0000, 0xED},
244
{0x0B99, 0x0BC1, 0x0000, 0x99},
245
{0x0B99, 0x0BC2, 0x0000, 0x9B},
246
{0x0B9A, 0x0000, 0x0000, 0xBA},
247
{0x0B9A, 0x0B82, 0x0000, 0xEE},
248
{0x0B9A, 0x0BC1, 0x0000, 0xCD},
249
{0x0B9A, 0x0BC2, 0x0000, 0xDD},
250
{0x0B9C, 0x0000, 0x0000, 0x83},
251
{0x0B9C, 0x0B82, 0x0000, 0x88},
252
{0x0B9E, 0x0000, 0x0000, 0xBB},
253
{0x0B9E, 0x0B82, 0x0000, 0xEF},
254
{0x0B9E, 0x0BC1, 0x0000, 0x9A},
255
{0x0B9E, 0x0BC2, 0x0000, 0x9C},
256
{0x0B9F, 0x0000, 0x0000, 0xBC},
257
{0x0B9F, 0x0B82, 0x0000, 0xF0},
258
{0x0B9F, 0x0BBF, 0x0000, 0xCA},
259
{0x0B9F, 0x0BC0, 0x0000, 0xCB},
260
{0x0B9F, 0x0BC1, 0x0000, 0xCE},
261
{0x0B9F, 0x0BC2, 0x0000, 0xDE},
262
{0x0BA1, 0x0B82, 0x0000, 0xF2},
263
{0x0BA3, 0x0000, 0x0000, 0xBD},
264
{0x0BA3, 0x0B82, 0x0000, 0xF1},
265
{0x0BA3, 0x0BC1, 0x0000, 0xCF},
266
{0x0BA3, 0x0BC2, 0x0000, 0xDF},
267
{0x0BA4, 0x0000, 0x0000, 0xBE},
268
{0x0BA4, 0x0BC1, 0x0000, 0xD0},
269
{0x0BA4, 0x0BC2, 0x0000, 0xE0},
270
{0x0BA8, 0x0000, 0x0000, 0xBF},
271
{0x0BA8, 0x0B82, 0x0000, 0xF3},
272
{0x0BA8, 0x0BC1, 0x0000, 0xD1},
273
{0x0BA8, 0x0BC2, 0x0000, 0xE1},
274
{0x0BA9, 0x0000, 0x0000, 0xC9},
275
{0x0BA9, 0x0B82, 0x0000, 0xFD},
276
{0x0BA9, 0x0BC1, 0x0000, 0xDB},
277
{0x0BA9, 0x0BC2, 0x0000, 0xEB},
278
{0x0BAA, 0x0000, 0x0000, 0xC0},
279
{0x0BAA, 0x0B82, 0x0000, 0xF4},
280
{0x0BAA, 0x0BC1, 0x0000, 0xD2},
281
{0x0BAA, 0x0BC2, 0x0000, 0xE2},
282
{0x0BAE, 0x0000, 0x0000, 0xC1},
283
{0x0BAE, 0x0B82, 0x0000, 0xF5},
284
{0x0BAE, 0x0BC1, 0x0000, 0xD3},
285
{0x0BAE, 0x0BC2, 0x0000, 0xE3},
286
{0x0BAF, 0x0000, 0x0000, 0xC2},
287
{0x0BAF, 0x0B82, 0x0000, 0xF6},
288
{0x0BAF, 0x0BC1, 0x0000, 0xD4},
289
{0x0BAF, 0x0BC2, 0x0000, 0xE4},
290
{0x0BB0, 0x0000, 0x0000, 0xC3},
291
{0x0BB0, 0x0B82, 0x0000, 0xF7},
292
{0x0BB0, 0x0BC1, 0x0000, 0xD5},
293
{0x0BB0, 0x0BC2, 0x0000, 0xE5},
294
{0x0BB1, 0x0000, 0x0000, 0xC8},
295
{0x0BB1, 0x0B82, 0x0000, 0xFC},
296
{0x0BB1, 0x0BC1, 0x0000, 0xDA},
297
{0x0BB1, 0x0BC2, 0x0000, 0xEA},
298
{0x0BB2, 0x0000, 0x0000, 0xC4},
299
{0x0BB2, 0x0B82, 0x0000, 0xF8},
300
{0x0BB2, 0x0BC1, 0x0000, 0xD6},
301
{0x0BB2, 0x0BC2, 0x0000, 0xE6},
302
{0x0BB3, 0x0000, 0x0000, 0xC7},
303
{0x0BB3, 0x0B82, 0x0000, 0xFB},
304
{0x0BB3, 0x0BC1, 0x0000, 0xD9},
305
{0x0BB3, 0x0BC2, 0x0000, 0xE9},
306
{0x0BB4, 0x0000, 0x0000, 0xC6},
307
{0x0BB4, 0x0B82, 0x0000, 0xFA},
308
{0x0BB4, 0x0BC1, 0x0000, 0xD8},
309
{0x0BB4, 0x0BC2, 0x0000, 0xE8},
310
{0x0BB5, 0x0000, 0x0000, 0xC5},
311
{0x0BB5, 0x0B82, 0x0000, 0xF9},
312
{0x0BB5, 0x0BC1, 0x0000, 0xD7},
313
{0x0BB5, 0x0BC2, 0x0000, 0xE7},
314
{0x0BB7, 0x0000, 0x0000, 0x84},
315
{0x0BB7, 0x0B82, 0x0000, 0x89},
316
{0x0BB8, 0x0000, 0x0000, 0x85},
317
{0x0BB8, 0x0B82, 0x0000, 0x8A},
318
{0x0BB9, 0x0000, 0x0000, 0x86},
319
{0x0BB9, 0x0B82, 0x0000, 0x8B},
320
{0x0BBE, 0x0000, 0x0000, 0xA1},
321
{0x0BBF, 0x0000, 0x0000, 0xA2},
322
{0x0BC0, 0x0000, 0x0000, 0xA3},
323
{0x0BC1, 0x0000, 0x0000, 0xA4},
324
{0x0BC2, 0x0000, 0x0000, 0xA5},
325
{0x0BC6, 0x0000, 0x0000, 0xA6},
326
{0x0BC7, 0x0000, 0x0000, 0xA7},
327
{0x0BC8, 0x0000, 0x0000, 0xA8},
328
{0x0BCC, 0x0000, 0x0000, 0xAA},
329
{0x0BE6, 0x0000, 0x0000, 0x80},
330
{0x0BE7, 0x0000, 0x0000, 0x81},
331
{0x0BE7, 0x0BB7, 0x0000, 0x87},
332
{0x0BE7, 0x0BB7, 0x0B82, 0x8C},
333
{0x0BE8, 0x0000, 0x0000, 0x8D},
334
{0x0BE9, 0x0000, 0x0000, 0x8E},
335
{0x0BEA, 0x0000, 0x0000, 0x8F},
336
{0x0BEB, 0x0000, 0x0000, 0x90},
337
{0x0BEC, 0x0000, 0x0000, 0x95},
338
{0x0BED, 0x0000, 0x0000, 0x96},
339
{0x0BEE, 0x0000, 0x0000, 0x97},
340
{0x0BEF, 0x0000, 0x0000, 0x98},
341
{0x0BF0, 0x0000, 0x0000, 0x9D},
342
{0x0BF1, 0x0000, 0x0000, 0x9E},
343
{0x0BF2, 0x0000, 0x0000, 0x9F},
344
{0x2018, 0x0000, 0x0000, 0x91},
345
{0x2019, 0x0000, 0x0000, 0x92},
346
{0x201C, 0x0000, 0x0000, 0x93},
347
{0x201C, 0x0000, 0x0000, 0x94}
350
static const ushort TsToUn [][3] = {
352
{0x0BE6, 0x0000, 0x0000},
353
{0x0BE7, 0x0000, 0x0000},
354
{0x0000, 0x0000, 0x0000}, // unknown
355
{0x0B9C, 0x0000, 0x0000},
356
{0x0BB7, 0x0000, 0x0000},
357
{0x0BB8, 0x0000, 0x0000},
358
{0x0BB9, 0x0000, 0x0000},
359
{0x0BE7, 0x0BB7, 0x0000},
360
{0x0B9C, 0x0B82, 0x0000},
361
{0x0BB7, 0x0B82, 0x0000},
362
{0x0BB8, 0x0B82, 0x0000},
363
{0x0BB9, 0x0B82, 0x0000},
364
{0x0BE7, 0x0BB7, 0x0B82},
365
{0x0BE8, 0x0000, 0x0000},
366
{0x0BE9, 0x0000, 0x0000},
367
{0x0BEA, 0x0000, 0x0000},
368
{0x0BEB, 0x0000, 0x0000},
369
{0x2018, 0x0000, 0x0000},
370
{0x2019, 0x0000, 0x0000},
371
{0x201C, 0x0000, 0x0000},
372
{0x201C, 0x0000, 0x0000}, // two of the same??
373
{0x0BEC, 0x0000, 0x0000},
374
{0x0BED, 0x0000, 0x0000},
375
{0x0BEE, 0x0000, 0x0000},
376
{0x0BEF, 0x0000, 0x0000},
377
{0x0B99, 0x0BC1, 0x0000},
378
{0x0B9E, 0x0BC1, 0x0000},
379
{0x0B99, 0x0BC2, 0x0000},
380
{0x0B9E, 0x0BC2, 0x0000},
381
{0x0BF0, 0x0000, 0x0000},
382
{0x0BF1, 0x0000, 0x0000},
383
{0x0BF2, 0x0000, 0x0000},
384
{0x00A0, 0x0000, 0x0000},
385
{0x0BBE, 0x0000, 0x0000},
386
{0x0BBF, 0x0000, 0x0000},
387
{0x0BC0, 0x0000, 0x0000},
388
{0x0BC1, 0x0000, 0x0000},
389
{0x0BC2, 0x0000, 0x0000},
390
{0x0BC6, 0x0000, 0x0000},
391
{0x0BC7, 0x0000, 0x0000},
392
{0x0BC8, 0x0000, 0x0000},
393
{0x00A9, 0x0000, 0x0000},
394
{0x0BCC, 0x0000, 0x0000},
395
{0x0B85, 0x0000, 0x0000},
396
{0x0B86, 0x0000, 0x0000},
397
{0x0B87, 0x0000, 0x0000},
398
{0x0B88, 0x0000, 0x0000},
399
{0x0B89, 0x0000, 0x0000},
400
{0x0B8A, 0x0000, 0x0000},
401
{0x0B8E, 0x0000, 0x0000},
402
{0x0B8F, 0x0000, 0x0000},
403
{0x0B90, 0x0000, 0x0000},
404
{0x0B92, 0x0000, 0x0000},
405
{0x0B93, 0x0000, 0x0000},
406
{0x0B94, 0x0000, 0x0000},
407
{0x0B83, 0x0000, 0x0000},
408
{0x0B95, 0x0000, 0x0000},
409
{0x0B99, 0x0000, 0x0000},
410
{0x0B9A, 0x0000, 0x0000},
411
{0x0B9E, 0x0000, 0x0000},
412
{0x0B9F, 0x0000, 0x0000},
413
{0x0BA3, 0x0000, 0x0000},
414
{0x0BA4, 0x0000, 0x0000},
415
{0x0BA8, 0x0000, 0x0000},
416
{0x0BAA, 0x0000, 0x0000},
417
{0x0BAE, 0x0000, 0x0000},
418
{0x0BAF, 0x0000, 0x0000},
419
{0x0BB0, 0x0000, 0x0000},
420
{0x0BB2, 0x0000, 0x0000},
421
{0x0BB5, 0x0000, 0x0000},
422
{0x0BB4, 0x0000, 0x0000},
423
{0x0BB3, 0x0000, 0x0000},
424
{0x0BB1, 0x0000, 0x0000},
425
{0x0BA9, 0x0000, 0x0000},
426
{0x0B9F, 0x0BBF, 0x0000},
427
{0x0B9F, 0x0BC0, 0x0000},
428
{0x0B95, 0x0BC1, 0x0000},
429
{0x0B9A, 0x0BC1, 0x0000},
430
{0x0B9F, 0x0BC1, 0x0000},
431
{0x0BA3, 0x0BC1, 0x0000},
432
{0x0BA4, 0x0BC1, 0x0000},
433
{0x0BA8, 0x0BC1, 0x0000},
434
{0x0BAA, 0x0BC1, 0x0000},
435
{0x0BAE, 0x0BC1, 0x0000},
436
{0x0BAF, 0x0BC1, 0x0000},
437
{0x0BB0, 0x0BC1, 0x0000},
438
{0x0BB2, 0x0BC1, 0x0000},
439
{0x0BB5, 0x0BC1, 0x0000},
440
{0x0BB4, 0x0BC1, 0x0000},
441
{0x0BB3, 0x0BC1, 0x0000},
442
{0x0BB1, 0x0BC1, 0x0000},
443
{0x0BA9, 0x0BC1, 0x0000},
444
{0x0B95, 0x0BC2, 0x0000},
445
{0x0B9A, 0x0BC2, 0x0000},
446
{0x0B9F, 0x0BC2, 0x0000},
447
{0x0BA3, 0x0BC2, 0x0000},
448
{0x0BA4, 0x0BC2, 0x0000},
449
{0x0BA8, 0x0BC2, 0x0000},
450
{0x0BAA, 0x0BC2, 0x0000},
451
{0x0BAE, 0x0BC2, 0x0000},
452
{0x0BAF, 0x0BC2, 0x0000},
453
{0x0BB0, 0x0BC2, 0x0000},
454
{0x0BB2, 0x0BC2, 0x0000},
455
{0x0BB5, 0x0BC2, 0x0000},
456
{0x0BB4, 0x0BC2, 0x0000},
457
{0x0BB3, 0x0BC2, 0x0000},
458
{0x0BB1, 0x0BC2, 0x0000},
459
{0x0BA9, 0x0BC2, 0x0000},
460
{0x0B95, 0x0B82, 0x0000},
461
{0x0B99, 0x0B82, 0x0000},
462
{0x0B9A, 0x0B82, 0x0000},
463
{0x0B9E, 0x0B82, 0x0000},
464
{0x0B9F, 0x0B82, 0x0000},
465
{0x0BA3, 0x0B82, 0x0000},
466
{0x0BA1, 0x0B82, 0x0000},
467
{0x0BA8, 0x0B82, 0x0000},
468
{0x0BAA, 0x0B82, 0x0000},
469
{0x0BAE, 0x0B82, 0x0000},
470
{0x0BAF, 0x0B82, 0x0000},
471
{0x0BB0, 0x0B82, 0x0000},
472
{0x0BB2, 0x0B82, 0x0000},
473
{0x0BB5, 0x0B82, 0x0000},
474
{0x0BB4, 0x0B82, 0x0000},
475
{0x0BB3, 0x0B82, 0x0000},
476
{0x0BB1, 0x0B82, 0x0000},
477
{0x0BA9, 0x0B82, 0x0000}
480
static int cmp(const ushort *s1, const ushort *s2, size_t len)
484
while (len-- && (diff = *s1++ - *s2++) == 0)
490
static unsigned char qt_UnicodeToTSCII(ushort u1, ushort u2, ushort u3)
497
int a = 0; // start pos
498
int b = UnToTsLast; // end pos
500
// do a binary search for the composed unicode in the list
503
int j = cmp(UnToTs[w], s, 3);
518
static unsigned int qt_TSCIIToUnicode(uint code, uint *s)
521
for (int i = 0; i < 3; i++) {
522
uint u = TsToUn[code & 0x7f][i];
524
if (s[i]) len = i + 1;