2
/* gdkanji.c (Kanji code converter) */
3
/* written by Masahito Yamaga (ma@yama-ga.com) */
12
#if defined(HAVE_ICONV_H) || defined(HAVE_ICONV)
19
#if defined(HAVE_ICONV_H) && !defined(HAVE_ICONV)
23
#define LIBNAME "any2eucjp()"
25
#if defined(__MSC__) || defined(__BORLANDC__) || defined(__TURBOC__) || defined(_Windows) || defined(MSDOS)
50
#define NEWJISSTR "JIS7"
51
#define OLDJISSTR "jis"
52
#define EUCSTR "eucJP"
53
#define SJISSTR "SJIS"
59
debug (const char *format,...)
64
va_start (args, format);
65
fprintf (stdout, "%s: ", LIBNAME);
66
vfprintf (stdout, format, args);
67
fprintf (stdout, "\n");
73
error (const char *format,...)
77
va_start (args, format);
78
fprintf (stderr, "%s: ", LIBNAME);
79
vfprintf (stderr, format, args);
80
fprintf (stderr, "\n");
84
/* DetectKanjiCode() derived from DetectCodeType() by Ken Lunde. */
87
DetectKanjiCode (unsigned char *str)
89
static int whatcode = ASCII;
97
if (whatcode != EUCORSJIS && whatcode != ASCII)
103
while ((whatcode == EUCORSJIS || whatcode == ASCII) && c != '\0')
105
if ((c = str[i++]) != '\0')
127
else if ((c >= 129 && c <= 141) || (c >= 143 && c <= 159))
132
if ((c >= 64 && c <= 126) || (c >= 128 && c <= 160) || (c >= 224 && c <= 252))
134
else if (c >= 161 && c <= 223)
135
whatcode = EUCORSJIS;
137
else if (c >= 161 && c <= 223)
140
if (c >= 240 && c <= 254)
142
else if (c >= 161 && c <= 223)
143
whatcode = EUCORSJIS;
144
else if (c >= 224 && c <= 239)
146
whatcode = EUCORSJIS;
147
while (c >= 64 && c != '\0' && whatcode == EUCORSJIS)
151
if (c <= 141 || (c >= 143 && c <= 159))
153
else if (c >= 253 && c <= 254)
162
else if (c >= 240 && c <= 254)
164
else if (c >= 224 && c <= 239)
167
if ((c >= 64 && c <= 126) || (c >= 128 && c <= 160))
169
else if (c >= 253 && c <= 254)
171
else if (c >= 161 && c <= 252)
172
whatcode = EUCORSJIS;
178
if (whatcode == ASCII)
179
debug ("Kanji code not included.");
180
else if (whatcode == EUCORSJIS)
181
debug ("Kanji code not detected.");
183
debug ("Kanji code detected at %d byte.", i);
186
if (whatcode == EUCORSJIS && oldcode != ASCII)
189
if (whatcode == EUCORSJIS)
191
if (getenv ("LC_ALL"))
192
lang = getenv ("LC_ALL");
193
else if (getenv ("LC_CTYPE"))
194
lang = getenv ("LC_CTYPE");
195
else if (getenv ("LANG"))
196
lang = getenv ("LANG");
200
if (strcmp (lang, "ja_JP.SJIS") == 0 ||
202
strcmp (lang, "japanese") == 0 ||
204
strcmp (lang, "ja_JP.mscode") == 0 ||
205
strcmp (lang, "ja_JP.PCK") == 0)
207
else if (strncmp (lang, "ja", 2) == 0)
216
if (whatcode == EUCORSJIS)
226
/* SJIStoJIS() is sjis2jis() by Ken Lunde. */
229
SJIStoJIS (int *p1, int *p2)
231
register unsigned char c1 = *p1;
232
register unsigned char c2 = *p2;
233
register int adjust = c2 < 159;
234
register int rowOffset = c1 < 160 ? 112 : 176;
235
register int cellOffset = adjust ? (31 + (c2 > 127)) : 126;
237
*p1 = ((c1 - rowOffset) << 1) - adjust;
241
/* han2zen() was derived from han2zen() written by Ken Lunde. */
243
#define IS_DAKU(c) ((c >= 182 && c <= 196) || (c >= 202 && c <= 206) || (c == 179))
244
#define IS_HANDAKU(c) (c >= 202 && c <= 206)
247
han2zen (int *p1, int *p2)
319
if (*p2 == 222 && IS_DAKU (*p1))
320
daku = TRUE; /* Daku-ten */
321
else if (*p2 == 223 && IS_HANDAKU (*p1))
322
handaku = TRUE; /* Han-daku-ten */
324
*p1 = mtable[c - 161][0];
325
*p2 = mtable[c - 161][1];
329
if ((*p2 >= 74 && *p2 <= 103) || (*p2 >= 110 && *p2 <= 122))
331
else if (*p2 == 131 && *p2 == 69)
334
else if (handaku && *p2 >= 110 && *p2 <= 122)
338
/* Recast strcpy to handle unsigned chars used below. */
339
#define ustrcpy(A,B) (strcpy((char*)(A),(const char*)(B)))
342
do_convert (unsigned char *to, unsigned char *from, const char *code)
346
size_t from_len, to_len;
348
if ((cd = iconv_open (EUCSTR, code)) == (iconv_t) - 1)
350
error ("iconv_open() error");
353
error ("invalid code specification: \"%s\" or \"%s\"",
356
strcpy ((char *) to, (const char *) from);
360
from_len = strlen ((const char *) from) + 1;
363
if (iconv (cd, (const char **) &from, &from_len,
364
(char **) &to, &to_len) == -1)
368
error ("invalid end of input string");
369
else if (errno == EILSEQ)
370
error ("invalid code in input string");
371
else if (errno == E2BIG)
372
error ("output buffer overflow at do_convert()");
375
error ("something happen");
376
strcpy ((char *) to, (const char *) from);
380
if (iconv_close (cd) != 0)
382
error ("iconv_close() error");
386
int jisx0208 = FALSE;
390
if (strcmp (code, NEWJISSTR) == 0 || strcmp (code, OLDJISSTR) == 0)
392
for (i = 0; from[i] != '\0' && j < BUFSIZ; i++)
403
else if (from[i] == '(')
407
if (from[i] == 'I') /* Hankaku Kana */
416
to[j++] = from[i] + 128;
420
to[j++] = from[i] + 128;
427
else if (strcmp (code, SJISSTR) == 0)
429
for (i = 0; from[i] != '\0' && j < BUFSIZ; i++)
434
else if ((p1 >= 161) && (p1 <= 223))
442
SJIStoJIS (&p1, &p2);
450
error ("invalid code specification: \"%s\"", code);
456
error ("output buffer overflow at do_convert()");
461
#endif /* HAVE_ICONV */
465
do_check_and_conv (unsigned char *to, unsigned char *from)
467
static unsigned char tmp[BUFSIZ];
471
switch (DetectKanjiCode (from))
474
debug ("Kanji code is New JIS.");
475
do_convert (tmp, from, NEWJISSTR);
478
debug ("Kanji code is Old JIS.");
479
do_convert (tmp, from, OLDJISSTR);
482
debug ("This string includes Hankaku-Kana (jisx0201) escape sequence [ESC] + ( + I.");
483
do_convert (tmp, from, NEWJISSTR);
486
debug ("Kanji code is NEC Kanji.");
487
error ("cannot convert NEC Kanji.");
492
debug ("Kanji code is EUC.");
496
debug ("Kanji code is SJIS.");
497
do_convert (tmp, from, SJISSTR);
500
debug ("Kanji code is EUC or SJIS.");
505
debug ("This is ASCII string.");
510
debug ("This string includes unknown code.");
516
/* Hankaku Kana ---> Zenkaku Kana */
520
for (i = 0; tmp[i] != '\0' && j < BUFSIZ; i++)
525
if (tmp[i + 1] == SS2)
528
if (p2 == 222 || p2 == 223)
536
SJIStoJIS (&p1, &p2);
546
error ("output buffer overflow at Hankaku --> Zenkaku");
559
any2eucjp (unsigned char *dest, unsigned char *src, unsigned int dest_max)
561
static unsigned char tmp_dest[BUFSIZ];
564
if (strlen ((const char *) src) >= BUFSIZ)
566
error ("input string too large");
569
if (dest_max > BUFSIZ)
571
error ("invalid maximum size of destination\nit should be less than %d.", BUFSIZ);
574
ret = do_check_and_conv (tmp_dest, src);
575
if (strlen ((const char *) tmp_dest) >= dest_max)
577
error ("output buffer overflow");
581
ustrcpy (dest, tmp_dest);
587
strwidth (unsigned char *s)
592
t = (unsigned char *) gdMalloc (BUFSIZ);
593
any2eucjp (t, s, BUFSIZ);
603
unsigned char input[BUFSIZ];
604
unsigned char *output;
608
while ((c = fgetc (stdin)) != '\n' && i < BUFSIZ)
612
printf ("input : %d bytes\n", strlen ((const char *) input));
613
printf ("output: %d bytes\n", strwidth (input));
615
output = (unsigned char *) gdMalloc (BUFSIZ);
616
any2eucjp (output, input, BUFSIZ);