10
#include "njb_error.h"
16
extern int __sub_depth;
17
int njb_unicode_flag = NJB_UC_8859;
18
#define MAX_STRING_LENGTH 512
20
/* This flag determines whether to use ISO 8859-1
21
* or unicode UTF-8 for ALL strings */
22
void njb_set_unicode (int flag)
24
njb_unicode_flag = flag;
27
/* Gets the length (in characters, not bytes) of a unicode
28
* UCS-2 string, eg a string which physically is 0x00 0x41 0x00 0x00
29
* will return a value of 1. */
30
int ucs2strlen(const unsigned char *unicstr){
39
/* Unicode strings are terminated with 2 * 0x00 */
40
for(i=0; (unicstr[i] | unicstr[i+1])!='\0'; i+=2) {
48
/* This routine returns the length in bytes that this
49
* UCS-2 string would occupy if encoded as UTF-8 */
50
static int ucs2utf8len(const unsigned char *unicstr){
53
for(i=0; (unicstr[i] | unicstr[i+1]) != '\0'; i+=2) {
54
if (unicstr[i] == 0x00 && unicstr[i+1] < 0x80)
56
else if (unicstr[i] < 0x08)
64
/* Create a new, allocated UCS-2 string that is a copy
66
static unsigned char *ucs2strdup(const unsigned char *unicstr) {
67
int length = ucs2strlen(unicstr);
70
data= (char *) malloc(length*2+2);
74
memcpy(data, unicstr, length*2+2);
78
/* This function converts an ordinary ISO 8859-1 string
79
* to a unicode UTF-8 string */
80
char *strtoutf8(const unsigned char *str) {
81
unsigned char buffer[MAX_STRING_LENGTH];
85
memset(buffer,0,MAX_STRING_LENGTH);
87
for (i=0;i<strlen(str);i++) {
92
buffer[l] = 0xC0 | (str[i]>>6 & 0x03);
93
buffer[l+1] = 0x80 | (str[i] & 0x3F);
98
/* The duplicate the string and return it */
99
return strdup(buffer);
102
/* This function approximates an ISO 8859-1 string from
103
* a UTF-8 string, leaving out untranslatable characters */
104
char *utf8tostr(const unsigned char *str) {
105
unsigned char buffer[MAX_STRING_LENGTH];
106
unsigned char *ucs2string;
110
memset(buffer,0,MAX_STRING_LENGTH);
112
ucs2string = strtoucs2(str);
113
if (ucs2string == NULL)
116
for(i=0; (ucs2string[i] | ucs2string[i+1])!='\0'; i+=2) {
117
if (ucs2string[i] == '\0') {
118
buffer[l] = ucs2string[i+1];
126
/* If there was nothing in this string, return NULL */
128
return strdup(buffer);
133
/* Converts a unicode 2-byte string to a common string
134
* quick and dirty (japanese unicodes etc, that use all 16 bits
135
* will fail miserably) */
136
char *ucs2tostr(const unsigned char *unicstr){
147
/* Real unicode support in UTF8 */
148
if (njb_unicode_flag == NJB_UC_UTF8) {
149
int length8 = ucs2utf8len(unicstr);
150
data= (char *) malloc(length8+1);
151
if ( data == NULL ) {
156
for(l=0;(unicstr[l] | unicstr[l+1])!='\0'; l+=2) {
157
if (unicstr[l] == 0x00 && unicstr[l+1] < 0x80) {
158
data[i]=unicstr[l+1];
160
} else if (unicstr[l] < 0x08) {
161
data[i] = 0xc0 | (unicstr[l]<<2 & 0x1C) | (unicstr[l+1]>>6 & 0x03);
162
data[i+1] = 0x80 | (unicstr[l+1] & 0x3F);
165
data[i] = 0xe0 | (unicstr[l]>>4 & 0x0F);
166
data[i+1] = 0x80 | (unicstr[l]<<2 & 0x3C) | (unicstr[l+1]>>6 & 0x03);
167
data[i+2] = 0x80 | (unicstr[l+1] & 0x3F);
171
/* Terminate string */
174
/* If we're running in ISO 8859-1 mode, approximate
175
* and concatenate, loosing any chars above 0xff */
176
int length=ucs2strlen(unicstr);
178
data= (char *) malloc(length+1);
179
if ( data == NULL ) {
186
for(i=0;l<length*2;){
187
if (unicstr[l] == 0x00) {
188
data[i]=unicstr[l+1];
193
/* Terminate string */
202
/* Convert a simple ISO 8859-1 or a Unicode
203
* UTF8 string to a unicode UCS2 string */
204
unsigned char *strtoucs2(const unsigned char *str) {
208
unsigned char *data = NULL;
214
/* Real unicode support in UTF8 */
215
if (njb_unicode_flag == NJB_UC_UTF8) {
216
unsigned char buffer[MAX_STRING_LENGTH*2];
221
for(i=0; str[i] != '\0';) {
223
buffer[length] = 0x00;
224
buffer[length+1] = str[i];
228
unsigned char numbytes = 0;
229
unsigned char lenbyte = 0;
231
/* Read the number of encoded bytes */
233
while (lenbyte & 0x80) {
235
lenbyte = lenbyte<<1;
237
/* UCS-2 can handle no more than 3 UTF-8 encoded bytes */
239
if (numbytes == 2 && str[i+1] > 0x80) {
240
/* This character can always be handled correctly */
241
buffer[length] = (str[i]>>3 & 0x07);
242
buffer[length+1] = (str[i]<<6 & 0xC0) | (str[i+1] & 0x3F);
245
} else if (numbytes == 3 && str[i+1] > 0x80 && str[i+2] > 0x80) {
246
buffer[length] = (str[i]<<4 & 0xF0) | (str[i+1]>>2 & 0x0F);
247
buffer[length+1]= (str[i+1]<<6 & 0xC0) | (str[i+2] & 0x3F);
251
/* Abnormal string character, just skip */
255
/* Just skip that character */
260
/* Copy the buffer contents */
261
buffer[length] = 0x00;
262
buffer[length+1] = 0x00;
263
data = ucs2strdup(buffer);
270
/* If we're running in ISO 8859-1 mode, approximate
271
* and concatenate, loosing any chars above 0xff */
272
data= (unsigned char *) malloc(2*strlen(str)+2);
273
if ( data == NULL ) {
279
for(i=0;i<=strlen(str);i++){