1
/******************************************************************************
3
* SCSUUTF8 - SWFilter descendant to convert a SCSU character to UTF-8
8
/* This class is based on:
9
* http://czyborra.com/scsu/scsu.c written by Roman Czyborra@dds.nl
10
* on Andrea's balcony in North Amsterdam on 1998-08-04
11
* Thanks to Richard Verhoeven <rcb5@win.tue.nl> for his suggestion
12
* to correct the haphazard "if" after UQU to "else if" on 1998-10-01
14
* This is a deflator to UTF-8 output for input compressed in SCSU,
15
* the (Reuters) Standard Compression Scheme for Unicode as described
16
* in http://www.unicode.org/unicode/reports/tr6.html
27
SCSUUTF8::SCSUUTF8() {
31
unsigned char* SCSUUTF8::UTF8Output(unsigned long uchar, unsigned char* text)
33
/* join UTF-16 surrogates without any pairing sanity checks */
37
if (uchar >= 0xd800 && uchar <= 0xdbff) { d = uchar & 0x3f; return text; }
38
if (uchar >= 0xdc00 && uchar <= 0xdfff) { uchar = uchar + 0x2400 + d * 0x400; }
40
/* output one character as UTF-8 multibyte sequence */
45
else if (uchar < 0x800) {
46
*text++ = 0xc0 | uchar >> 6;
47
*text++ = 0x80 | uchar & 0x3f;
49
else if (uchar < 0x10000) {
50
*text++ = 0xe0 | uchar >> 12;
51
*text++ = 0x80 | uchar >> 6 & 0x3f;
52
*text++ = 0x80 | uchar & 0x3f;
54
else if (uchar < 0x200000) {
55
*text++ = 0xf0 | uchar >> 18;
56
*text++ = 0x80 | uchar >> 12 & 0x3f;
57
*text++ = 0x80 | uchar >> 6 & 0x3f;
58
*text++ = 0x80 | uchar & 0x3f;
64
char SCSUUTF8::processText(SWBuf &text, const SWKey *key, const SWModule *module) {
66
unsigned char *to, *from;
67
unsigned long buflen = len * FILTERPAD;
68
char active = 0, mode = 0;
69
if ((unsigned long)key < 2) // hack, we're en(1)/de(0)ciphering
72
static unsigned short start[8] = {0x0000,0x0080,0x0100,0x0300,0x2000,0x2080,0x2100,0x3000};
73
static unsigned short slide[8] = {0x0080,0x00C0,0x0400,0x0600,0x0900,0x3040,0x30A0,0xFF00};
74
static unsigned short win[256] = {
75
0x0000, 0x0080, 0x0100, 0x0180, 0x0200, 0x0280, 0x0300, 0x0380,
76
0x0400, 0x0480, 0x0500, 0x0580, 0x0600, 0x0680, 0x0700, 0x0780,
77
0x0800, 0x0880, 0x0900, 0x0980, 0x0A00, 0x0A80, 0x0B00, 0x0B80,
78
0x0C00, 0x0C80, 0x0D00, 0x0D80, 0x0E00, 0x0E80, 0x0F00, 0x0F80,
79
0x1000, 0x1080, 0x1100, 0x1180, 0x1200, 0x1280, 0x1300, 0x1380,
80
0x1400, 0x1480, 0x1500, 0x1580, 0x1600, 0x1680, 0x1700, 0x1780,
81
0x1800, 0x1880, 0x1900, 0x1980, 0x1A00, 0x1A80, 0x1B00, 0x1B80,
82
0x1C00, 0x1C80, 0x1D00, 0x1D80, 0x1E00, 0x1E80, 0x1F00, 0x1F80,
83
0x2000, 0x2080, 0x2100, 0x2180, 0x2200, 0x2280, 0x2300, 0x2380,
84
0x2400, 0x2480, 0x2500, 0x2580, 0x2600, 0x2680, 0x2700, 0x2780,
85
0x2800, 0x2880, 0x2900, 0x2980, 0x2A00, 0x2A80, 0x2B00, 0x2B80,
86
0x2C00, 0x2C80, 0x2D00, 0x2D80, 0x2E00, 0x2E80, 0x2F00, 0x2F80,
87
0x3000, 0x3080, 0x3100, 0x3180, 0x3200, 0x3280, 0x3300, 0x3800,
88
0xE000, 0xE080, 0xE100, 0xE180, 0xE200, 0xE280, 0xE300, 0xE380,
89
0xE400, 0xE480, 0xE500, 0xE580, 0xE600, 0xE680, 0xE700, 0xE780,
90
0xE800, 0xE880, 0xE900, 0xE980, 0xEA00, 0xEA80, 0xEB00, 0xEB80,
91
0xEC00, 0xEC80, 0xED00, 0xED80, 0xEE00, 0xEE80, 0xEF00, 0xEF80,
92
0xF000, 0xF080, 0xF100, 0xF180, 0xF200, 0xF280, 0xF300, 0xF380,
93
0xF400, 0xF480, 0xF500, 0xF580, 0xF600, 0xF680, 0xF700, 0xF780,
94
0xF800, 0xF880, 0xF900, 0xF980, 0xFA00, 0xFA80, 0xFB00, 0xFB80,
95
0xFC00, 0xFC80, 0xFD00, 0xFD80, 0xFE00, 0xFE80, 0xFF00, 0xFF80,
96
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
97
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
98
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
99
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
100
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
101
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
102
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
103
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
104
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
105
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
106
0x0000, 0x00C0, 0x0250, 0x0370, 0x0530, 0x3040, 0x30A0, 0xFF60
112
memmove(&text[buflen - len], text, len);
113
from = (unsigned char*)&text[buflen - len];
114
to = (unsigned char *)text;
116
// -------------------------------
118
for (int i = 0; i < len;) {
126
to = UTF8Output (c - 0x80 + slide[active], to);
128
else if (c >= 0x20 && c <= 0x7F)
130
to = UTF8Output (c, to);
132
else if (c == 0x0 || c == 0x9 || c == 0xA || c == 0xC || c == 0xD)
134
to = UTF8Output (c, to);
136
else if (c >= 0x1 && c <= 0x8) // SQn
139
d = from[i++]; // single quote
141
to = UTF8Output (d < 0x80 ? d + start [c - 0x1] :
142
d - 0x80 + slide [c - 0x1], to);
144
else if (c >= 0x10 && c <= 0x17) // SCn
146
active = c - 0x10; // change window
148
else if (c >= 0x18 && c <= 0x1F) // SDn
150
active = c - 0x18; // define window
152
slide [active] = win [from[i++]];
154
else if (c == 0xB) // SDX
162
slide [active = c>>5] = 0x10000 + (((c & 0x1F) << 8 | d) << 7);
164
else if (c == 0xE) // SQU
167
c = from[i++]; // SQU
170
to = UTF8Output (c << 8 | from[i++], to);
172
else if (c == 0xF) // SCU
174
mode = 1; // change to Unicode mode
181
if (c <= 0xDF || c >= 0xF3)
184
to = UTF8Output (c << 8 | from[i++], to);
186
else if (c == 0xF0) // UQU
192
to = UTF8Output (c << 8 | from[i++], to);
194
else if (c >= 0xE0 && c <= 0xE7) // UCn
196
active = c - 0xE0; mode = 0;
198
else if (c >= 0xE8 && c <= 0xEF) // UDn
201
slide [active=c-0xE8] = win [from[i++]]; mode = 0;
203
else if (c == 0xF1) // UDX
211
slide [active = c>>5] =
212
0x10000 + (((c & 0x1F) << 8 | d) << 7); mode = 0;