2
* Copyright (C) 1984-2000 Mark Nudelman
4
* You may distribute under the terms of either the GNU General Public
5
* License or the Less License, as specified in the README file.
7
* For more information about less, or for information on how to
8
* contact the author, see the README file.
13
* Functions to define the character set
14
* and do things specific to the character set.
23
public int utf_mode = 0;
26
* Predefined character sets,
27
* selected by the LESSCHARSET environment variable.
34
{ "ascii", NULL, "8bcccbcc18b95.b" },
35
{ "dos", NULL, "8bcccbcc12bc5b223.b" },
36
{ "ebcdic", NULL, "5bc6bcc7bcc41b.9b7.9b5.b..8b6.10b6.b9.7b9.8b8.17b3.3b9.7b9.8b8.6b10.b.b.b." },
37
{ "IBM-1047", NULL, "4cbcbc3b9cbccbccbb4c6bcc5b3cbbc4bc4bccbc191.b" },
38
{ "iso8859", NULL, "8bcccbcc18b95.33b." },
39
{ "koi8-r", NULL, "8bcccbcc18b95.b128." },
40
{ "next", NULL, "8bcccbcc18b95.bb125.bb" },
41
{ "utf-8", &utf_mode, "8bcccbcc18b." },
49
{ "latin1", "iso8859" },
50
{ "latin9", "iso8859" },
54
#define IS_BINARY_CHAR 01
55
#define IS_CONTROL_CHAR 02
57
static char chardef[256];
58
static char *binfmt = NULL;
59
public int binattr = AT_STANDOUT;
63
* Define a charset, given a description string.
64
* The string consists of 256 letters,
65
* one for each character in the charset.
66
* If the string is shorter than 256 letters, missing letters
67
* are taken to be identical to the last one.
68
* A decimal number followed by a letter is taken to be a
69
* repetition of the letter.
71
* Each letter is one of:
98
v = IS_BINARY_CHAR|IS_CONTROL_CHAR;
101
case '0': case '1': case '2': case '3': case '4':
102
case '5': case '6': case '7': case '8': case '9':
103
n = (10 * n) + (s[-1] - '0');
107
error("invalid chardef", NULL_PARG);
114
if (cp >= chardef + sizeof(chardef))
116
error("chardef longer than 256", NULL_PARG);
125
while (cp < chardef + sizeof(chardef))
130
* Define a charset, given a charset name.
131
* The valid charset names are listed in the "charsets" array.
137
register struct charset *p;
138
register struct cs_alias *a;
140
if (name == NULL || *name == '\0')
143
/* First see if the name is an alias. */
144
for (a = cs_aliases; a->name != NULL; a++)
146
if (strcmp(name, a->name) == 0)
153
for (p = charsets; p->name != NULL; p++)
155
if (strcmp(name, p->name) == 0)
158
if (p->p_flag != NULL)
164
error("invalid charset name", NULL_PARG);
171
* Define a charset, given a locale name.
178
setlocale(LC_ALL, "");
179
for (c = 0; c < (int) sizeof(chardef); c++)
184
chardef[c] = IS_CONTROL_CHAR;
186
chardef[c] = IS_BINARY_CHAR|IS_CONTROL_CHAR;
192
* Define the printing format for control chars.
198
if (s == NULL || *s == '\0')
201
* Select the attributes if it starts with "*".
207
case 'd': binattr = AT_BOLD; break;
208
case 'k': binattr = AT_BLINK; break;
209
case 's': binattr = AT_STANDOUT; break;
210
case 'u': binattr = AT_UNDERLINE; break;
211
default: binattr = AT_NORMAL; break;
219
* Initialize charset data structures.
226
s = lgetenv("LESSBINFMT");
230
* See if environment variable LESSCHARSET is defined.
232
s = lgetenv("LESSCHARSET");
236
* LESSCHARSET is not defined: try LESSCHARDEF.
238
s = lgetenv("LESSCHARDEF");
239
if (s != NULL && *s != '\0')
247
* Check whether LC_ALL, LC_CTYPE or LANG look like UTF-8 is used.
249
if ((s = lgetenv("LC_ALL")) != NULL ||
250
(s = lgetenv("LC_CTYPE")) != NULL ||
251
(s = lgetenv("LANG")) != NULL)
253
if (strstr(s, "UTF-8") != NULL || strstr(s, "utf-8") != NULL)
254
if (icharset("utf-8"))
269
(void) icharset("dos");
272
* Default to "latin1".
274
(void) icharset("latin1");
280
* Is a given character a "binary" character?
287
return (chardef[c] & IS_BINARY_CHAR);
291
* Is a given character a "control" character?
298
return (chardef[c] & IS_CONTROL_CHAR);
302
* Return the printable form of a character.
303
* For example, in the "ascii" charset '\3' is printed as "^C".
312
if (!control_char(c))
313
sprintf(buf, "%c", c);
317
else if (!binary_char(c) && c < 64)
320
* This array roughly inverts CONTROL() #defined in less.h,
321
* and should be kept in sync with CONTROL() and IBM-1047.
327
"..V....D....TU.Z"[c]);
329
else if (c < 128 && !control_char(c ^ 0100))
330
sprintf(buf, "^%c", c ^ 0100);
333
sprintf(buf, binfmt, c);