170
165
if (charset == MM_MODEM_CHARSET_UTF8 || charset == MM_MODEM_CHARSET_IRA)
171
166
return unconverted;
173
return g_convert (unconverted, unconverted_len, "UTF-8//TRANSLIT", iconv_from, NULL, NULL, NULL);
168
converted = g_convert (unconverted, unconverted_len,
169
"UTF-8//TRANSLIT", iconv_from,
171
if (!converted || error) {
172
g_clear_error (&error);
173
g_free (unconverted);
181
/* GSM 03.38 encoding conversion stuff */
183
#define GSM_DEF_ALPHABET_SIZE 128
184
#define GSM_EXT_ALPHABET_SIZE 10
186
typedef struct GsmUtf8Mapping {
189
guint8 gsm; /* only used for extended GSM charset */
192
#define ONE(a) { {a, 0x00, 0x00}, 1, 0 }
193
#define TWO(a, b) { {a, b, 0x00}, 2, 0 }
196
* gsm_def_utf8_alphabet:
198
* Mapping from GSM default alphabet to UTF-8.
200
* ETSI GSM 03.38, version 6.0.1, section 6.2.1; Default alphabet. Mapping to UCS-2.
201
* Mapping according to http://unicode.org/Public/MAPPINGS/ETSI/GSM0338.TXT
203
static const GsmUtf8Mapping gsm_def_utf8_alphabet[GSM_DEF_ALPHABET_SIZE] = {
205
ONE(0x40), TWO(0xc2, 0xa3), ONE(0x24), TWO(0xc2, 0xa5),
207
TWO(0xc3, 0xa8), TWO(0xc3, 0xa9), TWO(0xc3, 0xb9), TWO(0xc3, 0xac),
209
TWO(0xc3, 0xb2), TWO(0xc3, 0x87), ONE(0x0a), TWO(0xc3, 0x98),
211
TWO(0xc3, 0xb8), ONE(0x0d), TWO(0xc3, 0x85), TWO(0xc3, 0xa5),
213
TWO(0xce, 0x94), ONE(0x5f), TWO(0xce, 0xa6), TWO(0xce, 0x93),
215
TWO(0xce, 0x9b), TWO(0xce, 0xa9), TWO(0xce, 0xa0), TWO(0xce, 0xa8),
216
/* Σ Θ Ξ Escape Code */
217
TWO(0xce, 0xa3), TWO(0xce, 0x98), TWO(0xce, 0x9e), ONE(0xa0),
219
TWO(0xc3, 0x86), TWO(0xc3, 0xa6), TWO(0xc3, 0x9f), TWO(0xc3, 0x89),
221
ONE(0x20), ONE(0x21), ONE(0x22), ONE(0x23),
223
TWO(0xc2, 0xa4), ONE(0x25), ONE(0x26), ONE(0x27),
225
ONE(0x28), ONE(0x29), ONE(0x2a), ONE(0x2b),
227
ONE(0x2c), ONE(0x2d), ONE(0x2e), ONE(0x2f),
229
ONE(0x30), ONE(0x31), ONE(0x32), ONE(0x33),
231
ONE(0x34), ONE(0x35), ONE(0x36), ONE(0x37),
233
ONE(0x38), ONE(0x39), ONE(0x3a), ONE(0x3b),
235
ONE(0x3c), ONE(0x3d), ONE(0x3e), ONE(0x3f),
237
TWO(0xc2, 0xa1), ONE(0x41), ONE(0x42), ONE(0x43),
239
ONE(0x44), ONE(0x45), ONE(0x46), ONE(0x47),
241
ONE(0x48), ONE(0x49), ONE(0x4a), ONE(0x4b),
243
ONE(0x4c), ONE(0x4d), ONE(0x4e), ONE(0x4f),
245
ONE(0x50), ONE(0x51), ONE(0x52), ONE(0x53),
247
ONE(0x54), ONE(0x55), ONE(0x56), ONE(0x57),
249
ONE(0x58), ONE(0x59), ONE(0x5a), TWO(0xc3, 0x84),
251
TWO(0xc3, 0x96), TWO(0xc3, 0x91), TWO(0xc3, 0x9c), TWO(0xc2, 0xa7),
253
TWO(0xc2, 0xbf), ONE(0x61), ONE(0x62), ONE(0x63),
255
ONE(0x64), ONE(0x65), ONE(0x66), ONE(0x67),
257
ONE(0x68), ONE(0x69), ONE(0x6a), ONE(0x6b),
259
ONE(0x6c), ONE(0x6d), ONE(0x6e), ONE(0x6f),
261
ONE(0x70), ONE(0x71), ONE(0x72), ONE(0x73),
263
ONE(0x74), ONE(0x75), ONE(0x76), ONE(0x77),
265
ONE(0x78), ONE(0x79), ONE(0x7a), TWO(0xc3, 0xa4),
267
TWO(0xc3, 0xb6), TWO(0xc3, 0xb1), TWO(0xc3, 0xbc), TWO(0xc3, 0xa0)
271
gsm_def_char_to_utf8 (const guint8 gsm, guint8 out_utf8[2])
273
g_return_val_if_fail (gsm < GSM_DEF_ALPHABET_SIZE, 0);
274
memcpy (&out_utf8[0], &gsm_def_utf8_alphabet[gsm].chars[0], gsm_def_utf8_alphabet[gsm].len);
275
return gsm_def_utf8_alphabet[gsm].len;
279
utf8_to_gsm_def_char (const char *utf8, guint32 len, guint8 *out_gsm)
283
if (len > 0 && len < 4) {
284
for (i = 0; i < GSM_DEF_ALPHABET_SIZE; i++) {
285
if (gsm_def_utf8_alphabet[i].len == len) {
286
if (memcmp (&gsm_def_utf8_alphabet[i].chars[0], utf8, len) == 0) {
297
#define EONE(a, g) { {a, 0x00, 0x00}, 1, g }
298
#define ETHR(a, b, c, g) { {a, b, c}, 3, g }
301
* gsm_ext_utf8_alphabet:
303
* Mapping from GSM extended alphabet to UTF-8.
306
static const GsmUtf8Mapping gsm_ext_utf8_alphabet[GSM_EXT_ALPHABET_SIZE] = {
307
/* form feed ^ { } */
308
EONE(0x0c, 0x0a), EONE(0x5e, 0x14), EONE(0x7b, 0x28), EONE(0x7d, 0x29),
310
EONE(0x5c, 0x2f), EONE(0x5b, 0x3c), EONE(0x7e, 0x3d), EONE(0x5d, 0x3e),
312
EONE(0x7c, 0x40), ETHR(0xe2, 0x82, 0xac, 0x65)
315
#define GSM_ESCAPE_CHAR 0x1b
318
gsm_ext_char_to_utf8 (const guint8 gsm, guint8 out_utf8[3])
322
for (i = 0; i < GSM_EXT_ALPHABET_SIZE; i++) {
323
if (gsm == gsm_ext_utf8_alphabet[i].gsm) {
324
memcpy (&out_utf8[0], &gsm_ext_utf8_alphabet[i].chars[0], gsm_ext_utf8_alphabet[i].len);
325
return gsm_ext_utf8_alphabet[i].len;
332
utf8_to_gsm_ext_char (const char *utf8, guint32 len, guint8 *out_gsm)
336
if (len > 0 && len < 4) {
337
for (i = 0; i < GSM_EXT_ALPHABET_SIZE; i++) {
338
if (gsm_ext_utf8_alphabet[i].len == len) {
339
if (memcmp (&gsm_ext_utf8_alphabet[i].chars[0], utf8, len) == 0) {
340
*out_gsm = gsm_ext_utf8_alphabet[i].gsm;
350
mm_charset_gsm_unpacked_to_utf8 (const guint8 *gsm, guint32 len)
355
g_return_val_if_fail (gsm != NULL, NULL);
356
g_return_val_if_fail (len < 4096, NULL);
358
/* worst case initial length */
359
utf8 = g_byte_array_sized_new (len * 2 + 1);
361
for (i = 0; i < len; i++) {
365
if (gsm[i] == GSM_ESCAPE_CHAR) {
366
/* Extended alphabet, decode next char */
367
ulen = gsm_ext_char_to_utf8 (gsm[i+1], uchars);
371
/* Default alphabet */
372
ulen = gsm_def_char_to_utf8 (gsm[i], uchars);
376
g_byte_array_append (utf8, &uchars[0], ulen);
378
g_byte_array_append (utf8, (guint8 *) "?", 1);
381
g_byte_array_append (utf8, (guint8 *) "\0", 1); /* NULL terminator */
382
return g_byte_array_free (utf8, FALSE);
386
mm_charset_utf8_to_unpacked_gsm (const char *utf8, guint32 *out_len)
389
const char *c = utf8, *next = c;
390
static const guint8 gesc = GSM_ESCAPE_CHAR;
393
g_return_val_if_fail (utf8 != NULL, NULL);
394
g_return_val_if_fail (out_len != NULL, NULL);
395
g_return_val_if_fail (g_utf8_validate (utf8, -1, NULL), NULL);
397
/* worst case initial length */
398
gsm = g_byte_array_sized_new (g_utf8_strlen (utf8, -1) * 2 + 1);
401
/* Zero-length string */
402
g_byte_array_append (gsm, (guint8 *) "\0", 1);
404
return g_byte_array_free (gsm, FALSE);
407
while (next && *next) {
408
guint8 gch = 0x3f; /* 0x3f == '?' */
410
next = g_utf8_next_char (c);
412
/* Try escaped chars first, then default alphabet */
413
if (utf8_to_gsm_ext_char (c, next - c, &gch)) {
414
/* Add the escape char */
415
g_byte_array_append (gsm, &gesc, 1);
416
g_byte_array_append (gsm, &gch, 1);
417
} else if (utf8_to_gsm_def_char (c, next - c, &gch))
418
g_byte_array_append (gsm, &gch, 1);
425
return g_byte_array_free (gsm, FALSE);
429
gsm_unpack (const guint8 *gsm,
431
guint8 start_offset, /* in _bits_ */
432
guint32 *out_unpacked_len)
434
GByteArray *unpacked;
437
nchars = ((gsm_len * 8) - start_offset) / 7;
438
unpacked = g_byte_array_sized_new (nchars + 1);
440
for (i = 0; i < nchars; i++) {
441
guint8 bits_here, bits_in_next, octet, offset, c;
444
start_bit = start_offset + (i * 7); /* Overall bit offset of char in buffer */
445
offset = start_bit % 8; /* Offset to start of char in this byte */
446
bits_here = offset ? (8 - offset) : 7;
447
bits_in_next = 7 - bits_here;
449
/* Grab bits in the current byte */
450
octet = gsm[start_bit / 8];
451
c = (octet >> offset) & (0xFF >> (8 - bits_here));
453
/* Grab any bits that spilled over to next byte */
455
octet = gsm[(start_bit / 8) + 1];
456
c |= (octet & (0xFF >> (8 - bits_in_next))) << bits_here;
458
g_byte_array_append (unpacked, &c, 1);
461
*out_unpacked_len = unpacked->len;
462
return g_byte_array_free (unpacked, FALSE);
466
gsm_pack (const guint8 *src,
469
guint32 *out_packed_len)
472
guint8 c, add_last = 0;
475
packed = g_byte_array_sized_new (src_len);
477
for (i = 0, c = 0; i < src_len; i++) {
478
guint8 bits_here, offset;
481
start_bit = start_offset + (i * 7); /* Overall bit offset of char in buffer */
482
offset = start_bit % 8; /* Offset to start of char in this byte */
483
bits_here = offset ? (8 - offset) : 7;
485
c |= (src[i] & 0x7F) << offset;
487
/* Add this packed byte */
488
g_byte_array_append (packed, &c, 1);
492
/* Pack the rest of this char into the next byte */
493
if (bits_here != 7) {
494
c = (src[i] & 0x7F) >> bits_here;
499
g_byte_array_append (packed, &c, 1);
501
*out_packed_len = packed->len;
502
return g_byte_array_free (packed, FALSE);