4
* Copyright 2012-2013 Alex <alex@linuxonly.ru>
6
* This program is free software: you can redistribute it and/or modify
7
* it under the terms of the GNU General Public License as published by
8
* the Free Software Foundation; either version 3 of the License, or
9
* (at your option) any later version.
11
* This program is distributed in the hope that it will be useful,
12
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
* GNU General Public License for more details.
16
* You should have received a copy of the GNU General Public License
17
* along with this program. If not, see <http://www.gnu.org/licenses/>.
25
static const guint gsm7_utf8_table [128] = {
26
0x0040, 0xc2a3, 0x0024, 0xc2a5, 0xc3a8, 0xc3a9, 0xc3b9, 0xc3ac, 0xc3b2, 0xc387,
27
0x000a, 0xc398, 0xc3b8, 0x000d, 0xc385, 0xc3a5, 0xce94, 0x005f, 0xcea6, 0xce93,
28
0xce9b, 0xcea9, 0xcea0, 0xcea8, 0xcea3, 0xce98, 0xce9e, 0x00a0, 0xc386, 0xc3a6,
29
0xc39f, 0xc389, 0x0020, 0x0021, 0x0022, 0x0023, 0xc2a4, 0x0025, 0x0026, 0x0027,
30
0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f, 0x0030, 0x0031,
31
0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x0039, 0x003a, 0x003b,
32
0x003c, 0x003d, 0x003e, 0x003f, 0xc2a1, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045,
33
0x0046, 0x0047, 0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f,
34
0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059,
35
0x005a, 0xc384, 0xc396, 0xc391, 0xc39c, 0xc2a7, 0xc2bf, 0x0061, 0x0062, 0x0063,
36
0x0064, 0x0065, 0x0066, 0x0067, 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d,
37
0x006e, 0x006f, 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077,
38
0x0078, 0x0079, 0x007a, 0xc3a4, 0xc3b6, 0xc3b1, 0xc3bc, 0xc3a0
41
static const guint gsm7_utf8_ext_table [2][10] = {
42
{0x00000c, 0x00005e, 0x00007b, 0x00007d, 0x00005c, 0x00005b, 0x00007e, 0x00005d, 0x00007c, 0xe282ac},
43
{ 0x0a, 0x14, 0x28, 0x29, 0x2f, 0x3c, 0x3d, 0x3e, 0x40, 0x65}
46
static const gchar hextable[16] = {'0','1','2','3','4','5','6','7','8','9','A','B','C','D','E','F'};
49
static guint hex_to_dec(const guchar *input, gsize number);
52
static guint hex_to_dec(const guchar *input, gsize number)
57
if ((input == NULL) || ((input != NULL) && (input[0] == '\0')) || (number == 0)) return 0;
62
for (hexptr = (number-1); hexptr >= 0; hexptr--) {
63
switch (input[hexptr]) {
64
case '0': b = 0; break;
65
case '1': b = 1; break;
66
case '2': b = 2; break;
67
case '3': b = 3; break;
68
case '4': b = 4; break;
69
case '5': b = 5; break;
70
case '6': b = 6; break;
71
case '7': b = 7; break;
72
case '8': b = 8; break;
73
case '9': b = 9; break;
75
case 'A': b = 10; break;
77
case 'B': b = 11; break;
79
case 'C': b = 12; break;
81
case 'D': b = 13; break;
83
case 'E': b = 14; break;
85
case 'F': b = 15; break;
86
default: b = 0; break;
96
guchar *utf8_to_ucs2(const guchar *input, gsize ilength, gsize *olength)
98
guchar *output, *routput;
102
if ((input == NULL) || (ilength == 0) || (olength == NULL)) return NULL;
104
if (input[0] == '\0') return NULL;
106
output = g_malloc0(ilength*2+1);
108
if (output == NULL) return NULL;
112
while (iptr < ilength) {
113
if (input[iptr] < 0x80) {
116
output[optr+1] = '0';
117
output[optr+2] = hextable[(guchar)(value & 0xff)/16];
118
output[optr+3] = hextable[(guchar)(value & 0xff)%16];
123
if ((input[iptr] & 0xE0) == 0xE0) {
124
if (!((input[iptr+1] == 0) || (input[iptr+2] == 0))) {
125
value = ((input[iptr] & 0x0F) << 12) | ((input[iptr+1] & 0x3F) << 6) | (input[iptr+2] & 0x3F);
126
output[optr] = hextable[(guchar)((value >> 8) & 0xff)/16];
127
output[optr+1] = hextable[(guchar)((value >> 8) & 0xff)%16];
128
output[optr+2] = hextable[(guchar)(value & 0xff)/16];
129
output[optr+3] = hextable[(guchar)(value & 0xff)%16];
136
if ((input[0] & 0xC0) == 0xC0) {
138
value = ((input[iptr] & 0x1F) << 6) | (input[iptr+1] & 0x3F);
139
output[optr] = hextable[(guchar)((value >> 8) & 0xff)/16];
140
output[optr+1] = hextable[(guchar)((value >> 8) & 0xff)%16];
141
output[optr+2] = hextable[(guchar)(value & 0xff)/16];
142
output[optr+3] = hextable[(guchar)(value & 0xff)%16];
152
routput = g_realloc(output, optr+1);
154
if (routput != NULL) output = routput;
161
guchar *ucs2_to_utf8(const guchar *input, gsize ilength, gsize *olength)
163
guchar *output, *routput;
167
if ((input == NULL) || (ilength == 0) || (olength == NULL)) return NULL;
169
if ((input[0] == '\0') || (ilength%4 != 0)) return NULL;
171
output = g_malloc0(ilength*2+1);
175
while (iptr < ilength) {
176
value = hex_to_dec(input+iptr, 4);
179
if ((value <= 0x20) && (value != 0x0A) && (value != 0x0D)) {
182
output[optr] = value;
187
if ((value >= 0x80) && (value < 0x800)) {
188
output[optr] = (value >> 6) | 0xC0;
189
output[optr+1] = (value & 0x3F) | 0x80;
193
if ((value >= 0x800) && (value < 0xFFFF)) {
194
output[optr] = ((value >> 12)) | 0xE0;
195
output[optr+1] = ((value >> 6) & 0x3F) | 0x80;
196
output[optr+2] = ((value) & 0x3F) | 0x80;
205
routput = g_realloc(output, optr+1);
207
if (routput != NULL) output = routput;
214
guchar *utf8_to_gsm7(const guchar *input, gsize ilength, gsize *olength)
216
guchar *output, *routput;
220
if ((input == NULL) || (ilength == 0) || (olength == NULL)) return NULL;
222
output = g_malloc0(ilength*2+1);
224
if (output == NULL) return NULL;
228
while (iptr < ilength) {
231
if ((iptr + 1) == ilength) {
232
value = (input[iptr] >> (iptr % 8)) & 0xff;
233
output[optr] = hextable[(guchar)(value & 0xff)/16];
234
output[optr+1] = hextable[(guchar)(value & 0xff)%16];
237
value = (((input[iptr] >> (x - 1)) | (input[iptr+1] << (8 - x))) & 0xff) & 0xff;
238
output[optr] = hextable[(guchar)(value & 0xff)/16];
239
output[optr+1] = hextable[(guchar)(value & 0xff)%16];
248
routput = g_realloc(output, optr+1);
250
if (routput != NULL) output = routput;
257
guchar *gsm7_to_utf8(const guchar *input, gsize ilength, gsize *olength)
259
guchar *output, *routput;
261
guint value, current, mask, next, left;
263
if ((input == NULL) || (ilength == 0) || (olength == NULL)) return NULL;
265
if ((input[0] == '\0') || (ilength%2 != 0)) return NULL;
267
output = g_malloc0(ilength*4+1);
269
if (output == NULL) return NULL;
277
while (iptr < ilength) {
279
value = hex_to_dec(input+iptr, 2);
280
current = (((value & mask) << (7 - left)) | next);
281
next = (value & (~mask)) >> left;
282
output[optr] = current;
298
routput = g_realloc(output, optr+1);
300
if (routput != NULL) output = routput;
307
guchar *utf8_map_gsm7(const guchar *input, gsize ilength, gsize *olength)
309
guchar *output, *routput;
313
gboolean detected, found;
315
if ((input == NULL) || (ilength == 0) || (olength == NULL)) return NULL;
317
if (input[0] == '\0') return NULL;
319
output = g_malloc0(ilength*2+1);
321
if (output == NULL) return NULL;
325
while (iptr < ilength) {
327
if (input[iptr] <= 127) {
331
} else if ((input[iptr] >= 194) && (input[iptr] <= 223)) {
332
value = (((input[iptr] << 8) & 0xff00) | input[iptr+1]) & 0xffff;
335
} else if ((input[iptr] >= 224) && (input[iptr] <= 239)) {
336
value = ((((input[iptr] << 16) & 0xff0000) | (input[iptr+1] << 8) & 0x00ff00) | input[iptr+2]) & 0xffffff;
339
} else if ((input[iptr] >= 240) && (input[iptr] <= 244)) {
340
value = (((((input[iptr] << 24) & 0xff000000) | (input[iptr+1] << 16) & 0x00ff0000) | (input[iptr+2] << 8) & 0x0000ff00) | input[iptr+3]) & 0xffffffff;
347
for (i=0; i<10; i++) {
348
if (gsm7_utf8_ext_table[0][i] == value) {
350
output[optr+1] = (unsigned char)gsm7_utf8_ext_table[1][i];
357
for (i=0; i<128; i++) {
358
if (gsm7_utf8_table[i] == value) {
359
output[optr] = (unsigned char)i;
376
routput = g_realloc(output, optr+1);
378
if (routput != NULL) output = routput;
385
guchar *bcd_to_utf8_ascii_part(const guchar *input, gsize ilength, gsize *olength)
387
guchar *output, *routput;
392
if ((input == NULL) || (ilength == 0) || (olength == NULL)) return NULL;
394
if (input[0] == '\0') return NULL;
396
//Test if number decoded correctly
397
for (iptr=0; iptr<ilength; iptr++) {
398
value = tolower(input[iptr]);
399
if (((!isdigit(value)) && (value != 'a') && (value != 'b') && (value != 'c') && (value != '*') && (value != '#')) || (ilength <= 6)) {
400
output = g_strdup(input);
406
output = g_malloc0(ilength);
408
if (output == NULL) return NULL;
413
//Decode characters in range 32 ... 127
414
while (iptr < ilength) {
415
memset(buf, 0, sizeof(buf));
416
if (isdigit(input[iptr])) {
417
if ((input[iptr] == '1') && (ilength - iptr >= 3)) {
418
strncpy(buf, input+iptr, 3);
419
value = (unsigned char)atoi(buf);
421
output[optr] = value;
425
} else if (ilength - iptr >= 2) {
426
strncpy(buf, input+iptr, 2);
427
value = (unsigned char)atoi(buf);
429
output[optr] = value;
445
routput = g_realloc(output, optr+1);
447
if (routput != NULL) output = routput;
454
gchar *encoding_unescape_xml_markup(const gchar *srcstr, gsize srclen)
456
guint iptr, optr, newlen, charleft;
459
/*XML escape characters:
460
"<", ">", "&", """, "'", "
", "	", "
"
461
'<', '>', '&', '\"', '\'', '\r', '\t', '\n'
464
if ((srcstr == NULL) || (srclen == 0)) return NULL;
469
while (iptr < srclen) {
470
if (srcstr[iptr] == '&') {
471
charleft = srclen - iptr - 1;
473
if ((charleft >= 5) && (srcstr[iptr+1] == 'q') && (srcstr[iptr+2] == 'u') && (srcstr[iptr+3] == 'o') && (srcstr[iptr+4] == 't') && (srcstr[iptr+5] == ';')) {
476
} else if ((charleft >= 5) && (srcstr[iptr+1] == 'a') && (srcstr[iptr+2] == 'p') && (srcstr[iptr+3] == 'o') && (srcstr[iptr+4] == 's') && (srcstr[iptr+5] == ';')) {
479
} else if ((charleft >= 4) && (srcstr[iptr+1] == 'a') && (srcstr[iptr+2] == 'm') && (srcstr[iptr+3] == 'p') && (srcstr[iptr+4] == ';')) {
482
} else if ((charleft >= 4) && (srcstr[iptr+1] == '#') && (srcstr[iptr+2] == 'x') && (srcstr[iptr+3] == 'D') && (srcstr[iptr+4] == ';')) {
485
} else if ((charleft >= 4) && (srcstr[iptr+1] == '#') && (srcstr[iptr+2] == 'x') && (srcstr[iptr+3] == '9') && (srcstr[iptr+4] == ';')) {
488
} else if ((charleft >= 4) && (srcstr[iptr+1] == '#') && (srcstr[iptr+2] == 'x') && (srcstr[iptr+3] == 'A') && (srcstr[iptr+4] == ';')) {
491
} else if ((charleft >= 3) && (srcstr[iptr+1] == 'l') && (srcstr[iptr+2] == 't') && (srcstr[iptr+3] == ';')) {
494
} else if ((charleft >= 3) && (srcstr[iptr+1] == 'g') && (srcstr[iptr+2] == 't') && (srcstr[iptr+3] == ';')) {
511
unescaped = g_malloc0(newlen+1);
517
while (iptr < srclen) {
518
if (srcstr[iptr] == '&') {
519
charleft = srclen - iptr - 1;
521
if ((charleft >= 5) && (srcstr[iptr+1] == 'q') && (srcstr[iptr+2] == 'u') && (srcstr[iptr+3] == 'o') && (srcstr[iptr+4] == 't') && (srcstr[iptr+5] == ';')) {
522
unescaped[optr] = '\"';
525
} else if ((charleft >= 5) && (srcstr[iptr+1] == 'a') && (srcstr[iptr+2] == 'p') && (srcstr[iptr+3] == 'o') && (srcstr[iptr+4] == 's') && (srcstr[iptr+5] == ';')) {
526
unescaped[optr] = '\'';
529
} else if ((charleft >= 4) && (srcstr[iptr+1] == 'a') && (srcstr[iptr+2] == 'm') && (srcstr[iptr+3] == 'p') && (srcstr[iptr+4] == ';')) {
530
unescaped[optr] = '&';
533
} else if ((charleft >= 4) && (srcstr[iptr+1] == '#') && (srcstr[iptr+2] == 'x') && (srcstr[iptr+3] == 'D') && (srcstr[iptr+4] == ';')) {
534
unescaped[optr] = '\r';
537
} else if ((charleft >= 4) && (srcstr[iptr+1] == '#') && (srcstr[iptr+2] == 'x') && (srcstr[iptr+3] == '9') && (srcstr[iptr+4] == ';')) {
538
unescaped[optr] = '\t';
541
} else if ((charleft >= 4) && (srcstr[iptr+1] == '#') && (srcstr[iptr+2] == 'x') && (srcstr[iptr+3] == 'A') && (srcstr[iptr+4] == ';')) {
542
unescaped[optr] = '\n';
545
} else if ((charleft >= 3) && (srcstr[iptr+1] == 'l') && (srcstr[iptr+2] == 't') && (srcstr[iptr+3] == ';')) {
546
unescaped[optr] = '<';
549
} else if ((charleft >= 3) && (srcstr[iptr+1] == 'g') && (srcstr[iptr+2] == 't') && (srcstr[iptr+3] == ';')) {
550
unescaped[optr] = '>';
554
unescaped[optr] = srcstr[iptr];
560
unescaped[optr] = srcstr[iptr];
565
unescaped[optr] = srcstr[iptr];
574
gchar *encoding_clear_special_symbols(gchar *srcstr, gsize srclen)
578
if ((srcstr == NULL) || (srclen == 0)) return NULL;
582
while (iptr < srclen) {
583
if (srcstr[iptr] > 0) {
584
if ((srcstr[iptr] == '\n') || (srcstr[iptr] == '\r') || (srcstr[iptr] == '\t')) {
589
switch (srcstr[iptr] & 0xF0) {