1
#if !defined(lint) && !defined(DOS)
2
static char rcsid[] = "$Id: utf8.c 384 2007-01-24 01:22:15Z hubert@u.washington.edu $";
6
* ========================================================================
7
* Copyright 2006-2007 University of Washington
9
* Licensed under the Apache License, Version 2.0 (the "License");
10
* you may not use this file except in compliance with the License.
11
* You may obtain a copy of the License at
13
* http://www.apache.org/licenses/LICENSE-2.0
15
* ========================================================================
19
/* includable WITHOUT dependency on c-client */
20
#include "../../c-client/mail.h"
21
#include "../../c-client/utf8.h"
24
/* wingdi.h uses ERROR (!) and we aren't using the c-client ERROR so... */
30
#include "../../c-client/fs.h"
32
/* includable WITHOUT dependency on pico */
33
#include "../../pico/keydefs.h"
40
static char locale_charmap[50];
42
static int native_utf8;
43
static void *display_data;
46
init_utf8_display(int utf8, void *rmap)
54
* Argument is a UCS-4 wide character.
55
* Returns the environment dependent cell width of the
56
* character when printed to the screen.
57
* This will be -1 if the character is not printable.
58
* It will be >= zero if it is printable.
60
* Note that in the case it is not printable but it is still sent to
61
* Writechar, Writechar will print a '?' with width 1.
70
* We believe that on modern unix systems wchar_t is a UCS-4 character.
71
* That's the assumption here.
74
if(native_utf8){ /* display is UTF-8 capable */
75
w = ucs4_width((unsigned long) ucs);
76
return(w >= U4W_ERROR ? -1 : w);
78
else if(display_data){
79
if(wtomb(dummy, ucs) < 0)
82
w = ucs4_width((unsigned long) ucs);
83
return(w >= U4W_ERROR ? -1 : w);
88
return(wcwidth((wchar_t) ucs));
96
* Argument is a UCS-4 wide character.
97
* It is converted to the multibyte version (for example UTF8 or EUC-JP).
98
* Dest is a buffer at least xx chars wide where the multi-byte version
99
* of the wide character will be written.
100
* The returned value is the number of bytes written to dest or -1
101
* if the conversion can't be done.
104
wtomb(char *dest, UCS ucs)
107
* We believe that on modern unix systems wchar_t is a UCS-4 character.
108
* That's the assumption here.
112
unsigned char *newdptr;
114
newdptr = utf8_put((unsigned char *) dest, (unsigned long) ucs);
115
return(newdptr - (unsigned char *) dest);
117
else if(display_data){
121
ucs4 = (unsigned long) ucs;
122
ret = ucs4_rmaplen(&ucs4, 1, (unsigned short *) display_data, 0);
124
ucs4_rmapbuf((unsigned char *) dest, &ucs4, 1, (unsigned short *) display_data, 0);
129
return(wcrtomb(dest, (wchar_t) ucs, NULL));
134
* This function does not necessarily update inputp and remaining_octets, so
135
* don't rely on that. The c-client version does but the other doesn't.
138
mbtow(void *input_cs, unsigned char **inputp, unsigned long *remaining_octets)
143
CHARSET *cast_input_cs;
145
cast_input_cs = (CHARSET *) input_cs;
147
switch((ucs = (UCS) ucs4_cs_get(cast_input_cs, inputp, remaining_octets))){
152
return(CCONV_BADCHAR);
156
return(CCONV_NEEDMORE);
160
return(CCONV_BADCHAR);
170
* Warning: input_cs and remaining_octets are unused in this
171
* half of the if/else.
173
* Unfortunately, we can't tell the difference between a source string
174
* that is just not long enough and one that has characters that can't
175
* be converted even though it is long enough. We return NEEDMORE in both cases.
177
ret = mbstowcs(&w, (char *) (*inputp), 1);
178
if(ret == (size_t)(-1))
179
return(CCONV_NEEDMORE);
189
set_locale_charmap(char *charmap)
192
strncpy(locale_charmap, charmap, sizeof(locale_charmap));
193
locale_charmap[sizeof(locale_charmap)-1] = '\0';
196
locale_charmap[0] = '\0';
201
* This ensures that the string is UTF-8. If str is already a UTF-8 string,
202
* NULL is returned. Otherwise, an allocated string which is UTF-8 is returned.
203
* The caller is responsible for freeing the returned value.
205
* Args str -- the string to convert
208
convert_to_utf8(char *str, char *fromcharset, int flags)
212
SIZEDTEXT src, result;
216
src.data = (unsigned char *) str;
217
src.size = strlen(str);
219
/* already UTF-8, return NULL */
220
if(!(flags & CU8_NOINFER)
221
&& (cs = utf8_infercharset(&src))
222
&& (cs->type == CT_ASCII || cs->type == CT_UTF8))
229
fcharset = fromcharset;
230
if(fcharset && strucmp("UTF-8", fcharset) != 0)
231
break; /* give it a try */
233
try++; /* fall through */
236
if(!(flags & CU8_NOINFER)){
237
fcharset = cs ? cs->name : NULL;
238
if(fcharset && strucmp("UTF-8", fcharset) != 0)
241
try++; /* fall through */
244
try++; /* fall through */
247
fcharset = locale_charmap;
248
if(fcharset && strucmp("UTF-8", fcharset) != 0)
251
try++; /* fall through */
254
fcharset = "ISO-8859-1"; /* this will "work" */
258
memset(&result, 0, sizeof(result));
260
if(fcharset && utf8_text(&src, fcharset, &result, 0L)){
261
if(!(result.size == src.size && result.data == src.data)){
262
ret = (char *) fs_get((result.size+1) * sizeof(char));
263
strncpy(ret, (char *) result.data, result.size);
264
ret[result.size] = '\0';
266
/* else no conversion necessary */
274
/* won't make it to here */
280
* Convert from UTF-8 to user's locale charset.
281
* This actually uses the wtomb routine to do the conversion, and that
282
* relies on setup_for_display_charmap having been called.
283
* If no conversion is necessary, NULL is returned, otherwise an allocated
284
* string in the locale charset is returned and the caller is responsible
288
convert_to_locale(char *utf8str)
291
char *inp, *retp, *ret = NULL;
292
unsigned char cbuf[6], *cbufp, *cbufend;
293
int r, alloced, used = 0;
295
if(native_utf8 || !utf8str || !utf8str[0])
299
cbufp = cbufend = cbuf;
303
ret = (char *) fs_get(alloced * sizeof(char));
307
* There's gotta be a better way to do this but utf8_to_locale was
308
* available and everything looks like a nail when all you have
313
* We're placing the outgoing stream of characters in ret, a multi-byte
314
* array of characters in the user's locale charset. See if there is
315
* enough room for the next wide characters worth of output chars
316
* and allocate more space if not.
318
if((alloced - (retp-ret)) < MAX(MB_LEN_MAX,32)){
320
fs_resize((void **) &ret, alloced * sizeof(char));
323
r = utf8_to_locale((int) *inp++, cbuf, sizeof(cbuf), &cbufp,
324
(unsigned char *) retp, alloced-(retp-ret));
331
fs_resize((void **) &ret, strlen(ret)+1);
338
* Pass in a stream of UTF-8 characters in 'c' and return obuf
339
* filled in with multi-byte characters. The return value is the
340
* number of valid characters in obuf to be used.
343
utf8_to_locale(int c, unsigned char cbuf[], size_t cbuf_size,
344
unsigned char **cbufp, unsigned char obuf[], size_t obuf_size)
346
int width = 0, outchars = 0, printable_ascii = 0;
348
if(!(cbufp && *cbufp))
351
if((*cbufp) < cbuf+cbuf_size){
352
unsigned char *inputp;
353
unsigned long remaining_octets;
356
*(*cbufp)++ = (unsigned char) c;
358
remaining_octets = ((*cbufp) - cbuf) * sizeof(unsigned char);
359
if(remaining_octets == 1 && (*cbuf) < 0x80){
360
/* shortcut common case */
363
printable_ascii++; /* just for efficiency */
367
* we could use mbtow(utf8_charset, ...)
368
* here to lend an air of portability, then use the CCONV_
369
* constants for the return values. However, we know we are
370
* dealing with UTF-8 so we can skip straight to the
371
* correct function instead.
373
ucs = (UCS) utf8_get(&inputp, &remaining_octets);
376
case U8G_BADCONT: /* continuation at start of char */
377
case U8G_NOTUTF8: /* invalid character */
378
case U8G_INCMPLT: /* incomplete character */
381
* None of these cases is supposed to happen. If it
382
* does happen then the input stream isn't UTF-8
383
* so something is wrong. Treat each character in the
384
* input buffer as a separate error character and
385
* print a '?' for each.
387
for(inputp = cbuf; inputp < (*cbufp); inputp++)
388
obuf[outchars++] = '?';
393
case U8G_ENDSTRG: /* incomplete character, wait */
394
case U8G_ENDSTRI: /* incomplete character, wait */
398
/* got a character */
402
width = wcellwidth(ucs);
406
* This happens when we have a UTF-8 character that
407
* we aren't able to print in our locale. For example,
408
* if the locale is setup with the terminal
409
* expecting ISO-8859-1 characters then there are
410
* lots of UTF-8 characters that can't be printed.
411
* Print a '?' instead.
413
obuf[outchars++] = '?';
417
* Convert the ucs into the multibyte
418
* character that corresponds to the
419
* ucs in the users locale.
422
obuf[outchars++] = *cbuf;
424
outchars = wtomb((char *) obuf, ucs);
432
/* update the input buffer */
433
if(inputp >= (*cbufp)) /* this should be the case */
435
else{ /* extra chars for some reason? */
436
unsigned char *q, *newcbufp;
438
newcbufp = ((*cbufp) - inputp) + cbuf;
440
while(inputp < (*cbufp))
459
* Returns the screen cells width of the UCS-4 string argument.
460
* The source string is zero terminated.
463
ucs4_str_width(UCS *ucsstr)
470
w = wcellwidth(*ucsstr++);
472
width += (w < 0 ? 1 : w);
480
* Returns the screen cells width of the UCS-4 string argument
481
* from ucsstr[a] through (inclusive) ucsstr[b].
482
* No checking is done to make sure a starts in the middle
486
ucs4_str_width_a_to_b(UCS *ucsstr, int a, int b)
492
for(i = a; i <= b && ucsstr[i]; i++){
493
w = wcellwidth(ucsstr[i]);
495
width += (w < 0 ? 1 : w);
503
* Returns the screen cells width of the UCS-4 string argument
504
* from ustart through (exclusive) uend.
505
* No checking is done to make sure it starts in the middle
509
ucs4_str_width_ptr_to_ptr(UCS *ustart, UCS *uend)
519
for(u = ustart; u < uend; u++){
522
width += (w < 0 ? 1 : w);
530
* Return the largest possible pointer into ucs4str so that the width
531
* of the string from ucs4str to the pointer (exclusive)
532
* is maxwidth or less. Also stops at a null character.
535
ucs4_particular_width(UCS *ucs4str, int maxwidth)
538
int w_consumed = 0, w, done = 0;
543
while(!done && *u && w_consumed <= maxwidth){
545
w = (w >= 0 ? w : 1);
546
if(w_consumed + w <= maxwidth){
559
* Convert and copy a UTF-8 string into a UCS-4 NULL
560
* terminated array. Just like cpystr only it converts
561
* from UTF-8 to UCS-4.
563
* Returned UCS-4 string needs to be freed by caller.
566
utf8_to_ucs4_cpystr(char *utf8src)
571
unsigned long remaining_octets;
572
unsigned char *readptr;
576
* We don't know how big to allocate the return array
577
* because variable numbers of octets in the src array
578
* will combine to make UCS-4 characters. The number of
579
* UCS-4 characters is less than or equal to the number
580
* of src characters, though.
586
retsize = strlen(utf8src) + 1;
588
ret = (UCS *) fs_get(retsize * sizeof(*ret));
589
memset(ret, 0, retsize * sizeof(*ret));
591
readptr = (unsigned char *) utf8src;
592
remaining_octets = retsize-1;
595
while(remaining_octets > 0 && *readptr){
596
ucs = (UCS) utf8_get(&readptr, &remaining_octets);
599
remaining_octets = 0;
601
ret[arrayindex++] = ucs;
604
/* get rid of excess size */
605
if(arrayindex+1 < retsize)
606
fs_resize((void **) &ret, (arrayindex + 1) * sizeof(*ret));
613
* Convert and copy a UCS-4 zero-terminated array into a UTF-8 NULL
614
* terminated string. Just like cpystr only it converts
615
* from UCS-4 to UTF-8.
617
* Returned UTF-8 string needs to be freed by caller.
620
ucs4_to_utf8_cpystr(UCS *ucs4src)
622
unsigned char *ret = NULL;
623
unsigned char *writeptr;
630
* Over-allocate and then resize at the end.
633
/* count characters in source */
634
for(i = 0; ucs4src[i]; i++)
637
ret = (unsigned char *) fs_get((6*i + 1) * sizeof(*ret));
638
memset(ret, 0, (6*i + 1) * sizeof(*ret));
641
for(i = 0; ucs4src[i]; i++)
642
writeptr = utf8_put(writeptr, (unsigned long) ucs4src[i]);
644
/* get rid of excess size */
645
fs_resize((void **) &ret, (writeptr - ret + 1) * sizeof(*ret));
647
return ((char *) ret);
652
* Similar to above but copy a fixed number of source
653
* characters instead of going until null terminator.
656
ucs4_to_utf8_cpystr_n(UCS *ucs4src, int ucs4src_len)
658
unsigned char *ret = NULL;
659
unsigned char *writeptr;
666
* Over-allocate and then resize at the end.
669
ret = (unsigned char *) fs_get((6*ucs4src_len + 1) * sizeof(*ret));
670
memset(ret, 0, (6*ucs4src_len + 1) * sizeof(*ret));
673
for(i = 0; i < ucs4src_len; i++)
674
writeptr = utf8_put(writeptr, (unsigned long) ucs4src[i]);
676
/* get rid of excess size */
677
fs_resize((void **) &ret, (writeptr - ret + 1) * sizeof(*ret));
679
return ((char *) ret);
685
* Convert a UTF-8 argument into an LPTSTR version
686
* of that argument. The result is allocated here
687
* and should be freed by the caller.
690
utf8_to_lptstr(LPSTR arg_utf8)
693
LPTSTR lptstr_ret = NULL;
695
lptstr_len = MultiByteToWideChar( CP_UTF8, 0, arg_utf8, -1, NULL, 0 );
698
lptstr_ret = (LPTSTR)fs_get(lptstr_len * sizeof(TCHAR));
699
lptstr_len = MultiByteToWideChar( CP_UTF8, 0,
700
arg_utf8, -1, lptstr_ret, lptstr_len );
705
// check GetLastError()?
706
lptstr_ret = (LPTSTR)fs_get(sizeof(TCHAR));
715
* Convert an LPTSTR argument into a UTF-8 version
716
* of that argument. The result is allocated here
717
* and should be freed by the caller.
720
lptstr_to_utf8(LPTSTR arg_lptstr)
723
LPSTR utf8str_ret = NULL;
725
utf8str_len = WideCharToMultiByte( CP_UTF8, 0, arg_lptstr, -1, NULL, 0, NULL, NULL );
728
utf8str_ret = (LPSTR)fs_get(utf8str_len * sizeof(CHAR));
729
utf8str_len = WideCharToMultiByte( CP_UTF8, 0,
730
arg_lptstr, -1, utf8str_ret, utf8str_len, NULL, NULL );
735
// check GetLastError()?
736
utf8str_ret = (LPSTR)fs_get(sizeof(CHAR));
745
* Convert a UCS4 argument into an LPTSTR version
746
* of that argument. The result is allocated here
747
* and should be freed by the caller.
750
ucs4_to_lptstr(UCS *arg_ucs4)
752
LPTSTR ret_lptstr = NULL;
757
len = ucs4_strlen(arg_ucs4);
758
ret_lptstr = (LPTSTR) fs_get((len+1) * sizeof(TCHAR));
759
/* bogus conversion ignores UTF-16 */
760
for(i = 0; i < len; i++)
761
ret_lptstr[i] = arg_ucs4[i];
763
ret_lptstr[len] = '\0';
771
* Convert an LPTSTR argument into a UCS4 version
772
* of that argument. The result is MemAlloc'd here
773
* and should be freed by the caller.
776
lptstr_to_ucs4(LPTSTR arg_lptstr)
778
UCS *ret_ucs4 = NULL;
783
len = _tcslen(arg_lptstr);
784
ret_ucs4 = (UCS *) fs_get((len+1)*sizeof(UCS));
785
/* bogus conversion ignores UTF-16 */
786
for(i = 0; i < len; i++)
787
ret_ucs4[i] = arg_lptstr[i];
789
ret_ucs4[len] = '\0';
795
#endif /* _WINDOWS */
799
* Pass in a stream of UTF-8 characters 1-at-a-time in 'c' and return obuf
800
* 1-at-a-time filled in with UCS characters. The return value is the
801
* number of valid characters in obuf to be used. It can only
802
* be 1 or 0 characters since we're only getting one UTF-8 character
806
utf8_to_ucs4_oneatatime(int c, unsigned char cbuf[], size_t cbuf_size,
807
unsigned char **cbufp, UCS *obuf, int *obufwidth)
809
int width = 0, outchars = 0, printable_ascii = 0;
811
if(!(cbufp && *cbufp))
814
if((*cbufp) < cbuf+cbuf_size){
815
unsigned char *inputp;
816
unsigned long remaining_octets;
819
*(*cbufp)++ = (unsigned char) c;
821
remaining_octets = ((*cbufp) - cbuf) * sizeof(unsigned char);
822
if(remaining_octets == 1 && (*cbuf) < 0x80){
823
/* shortcut common case */
826
printable_ascii++; /* just for efficiency */
830
* we could use mbtow(utf8_charset, ...)
831
* here to lend an air of portability, then use the CCONV_
832
* constants for the return values. However, we know we are
833
* dealing with UTF-8 so we can skip straight to the
834
* correct function instead.
836
ucs = (UCS) utf8_get(&inputp, &remaining_octets);
839
case U8G_BADCONT: /* continuation at start of char */
840
case U8G_NOTUTF8: /* invalid character */
841
case U8G_INCMPLT: /* incomplete character */
844
* None of these cases is supposed to happen. If it
845
* does happen then the input stream isn't UTF-8
846
* so something is wrong.
854
case U8G_ENDSTRG: /* incomplete character, wait */
855
case U8G_ENDSTRI: /* incomplete character, wait */
859
/* got a character */
863
width = wcellwidth(ucs);
869
* This happens when we have a UTF-8 character that
870
* we aren't able to print in our locale. For example,
871
* if the locale is setup with the terminal
872
* expecting ISO-8859-1 characters then there are
873
* lots of UTF-8 characters that can't be printed.
874
* Print a '?' instead.
875
* Don't think this should happen in Windows.
881
* Convert the ucs into the multibyte
882
* character that corresponds to the
883
* ucs in the users locale.
891
/* update the input buffer */
892
if(inputp >= (*cbufp)) /* this should be the case */
894
else{ /* extra chars for some reason? */
895
unsigned char *q, *newcbufp;
897
newcbufp = ((*cbufp) - inputp) + cbuf;
899
while(inputp < (*cbufp))
922
* Return an allocated copy of a zero-terminated UCS-4 string.
925
ucs4_cpystr(UCS *ucs4src)
934
arraysize = ucs4_strlen(ucs4src);
936
ret = (UCS *) fs_get((arraysize+1) * sizeof(*ret));
937
memset(ret, 0, (arraysize+1) * sizeof(*ret));
939
for(i = 0; i < arraysize; i++)
947
ucs4_strncpy(UCS *ucs4dst, UCS *ucs4src, size_t n)
951
if(ucs4src && ucs4dst){
952
for(i = 0; i < n; i++){
953
ucs4dst[i] = ucs4src[i];
954
if(ucs4dst[i] == '\0')
964
ucs4_strncat(UCS *ucs4dst, UCS *ucs4src, size_t n)
969
if(ucs4src && ucs4dst){
970
for(u = ucs4dst; *u; u++)
973
for(i = 0; i < n; i++){
985
* Like strlen only this returns the number of non-zero characters
986
* in a zero-terminated UCS-4 array.
989
ucs4_strlen(UCS *ucs4str)
1002
ucs4_strcmp(UCS *s1, UCS *s2)
1004
for(; *s1 == *s2; s1++, s2++)
1008
return((*s1 < *s2) ? -1 : 1);
1013
ucs4_strchr(UCS *s, UCS c)
1018
while(*s && *s != c)
1029
ucs4_strrchr(UCS *s, UCS c)
1048
* Returns the screen cells width of the UTF-8 string argument.
1051
utf8_width(char *str)
1056
unsigned long remaining_octets;
1063
remaining_octets = readptr ? strlen(readptr) : 0;
1065
while(remaining_octets > 0 && *readptr){
1067
ucs = (UCS) utf8_get((unsigned char **) &readptr, &remaining_octets);
1069
if(ucs & U8G_ERROR){
1071
* This should not happen, but do something to handle it anyway.
1072
* Treat each character as a single width character, which is what should
1073
* probably happen when we actually go to write it out.
1080
this_width = wcellwidth(ucs);
1083
* If this_width is -1 that means we can't print this character
1084
* with our current locale. Writechar will print a '?'.
1090
width += (unsigned) this_width;
1098
* Copy UTF-8 characters from src into dst.
1099
* This is intended to be used if you want to truncate a string at
1100
* the start instead of the end. For example, you have a long string
1102
* this_is_a_long_string
1103
* but not enough space to fit it into a particular field. You want to
1106
* where that fits in a particular width. Perhaps you'd use this with ...
1108
* ...s_a_long_string
1109
* This right adjusts the end of the string in the width space and
1110
* cuts it off at the start. If there is enough width for the whole
1111
* string it will copy the string into dst with no padding.
1113
* Copy enough characters so that the result will have screen width of
1114
* want_width screen cells in current locale.
1116
* Dstlen is the available space in dst. No more than dstlen bytes will be written
1117
* to dst. This is just for protection, it shouldn't be relied on to
1118
* do anything useful. Dstlen should be large enough. Otherwise you'll get
1119
* characters truncated in the middle or something like that.
1121
* Returned value is the number of bytes written to dst, not including
1122
* the possible terminating null.
1124
* If we can't hit want_width exactly because of double width characters
1125
* then we will pad the end of the string with space in order to make
1129
utf8_to_width_rhs(char *dst, /* destination buffer */
1130
char *src, /* source string */
1131
size_t dstlen, /* space in dest */
1132
unsigned want_width) /* desired screen width */
1135
unsigned width_consumed = 0;
1137
unsigned long remaining_octets;
1138
char *readptr, *goodreadptr, *savereadptr, *endptr;
1149
* Start at the end of the source string and go backwards until we
1150
* get to the desired width, but not more than the width.
1152
readptr = src + strlen(src);
1154
goodreadptr = readptr;
1156
savereadptr = readptr;
1158
for(readptr = savereadptr-1; readptr >= src && width_consumed < want_width && (endptr - readptr) < dstlen;
1159
readptr = savereadptr-1){
1161
savereadptr = readptr;
1162
remaining_octets = goodreadptr - readptr;
1163
ucs = (UCS) utf8_get((unsigned char **) &readptr, &remaining_octets);
1166
* Handling the error case is tough because an error will be the normal thing that
1167
* happens as we back through the string. So we're just going to punt on the
1170
if(!(ucs & U8G_ERROR)){
1171
if(remaining_octets > 0){
1173
* This means there are some bad octets after this good
1174
* character so things are not going to work out well.
1177
savereadptr = src; /* we're done */
1180
this_width = wcellwidth(ucs);
1185
if(width_consumed + (unsigned) this_width <= want_width){ /* ok */
1186
width_consumed += (unsigned) this_width;
1187
goodreadptr = savereadptr;
1190
savereadptr = src; /* we're done */
1196
* Copy characters from goodreadptr to endptr into dst.
1198
nb = MIN(endptr-goodreadptr, dstlen-1);
1199
strncpy(dst, goodreadptr, nb);
1203
* Pad out with spaces in order to hit width exactly.
1205
while(width_consumed < want_width && nb < dstlen-1){
1216
* The arguments being converted are UTF-8 strings.
1217
* This routine attempts to make it possible to use screen cell
1218
* widths in a format specifier. In a one-byte per screen cell
1219
* world we might have used %10.10s to cause a string to occupy
1220
* 10 screen positions. Since the width and precision are really
1221
* referring to numbers of bytes instead of screen positions that
1222
* won't work with UTF-8 input. We emulate that behavior with
1223
* the format string %w. %m.nw means to use the m and n as
1224
* screen width indicators instead of bytes indicators.
1226
* There is no reason to use this routine unless you want to use
1227
* min field with or precision with the specifier. A plain %w without
1228
* widths is equivalent exactly to a plain %s in a regular printf.
1230
* Double-width characters complicate things. It may not be possible
1231
* to satisfy the request exactly. For example, %3w for an input
1232
* string that is made up of two double-width characters.
1233
* This routine will arbitrarily use a trailing space character if
1234
* needed to make the width come out correctly where a half of a
1235
* double-width character would have been needed. We'll see how
1236
* that works for us.
1238
* %w only works for strings (it's a %s replacement).
1240
* Buffer overflow is handled by the size argument. %.30s will work
1241
* to limit a particular string to 30 bytes, but you lose that
1242
* ability with %w, since it may write more than precision bytes
1243
* in order to get to the desired width. It is best to choose
1244
* size large enough so that it doesn't come into play, otherwise
1245
* it may be possible to get partial UTF-8 characters because of
1248
* The return value isn't quite the same as the return value
1249
* of snprintf. It is the number of bytes written, not counting
1250
* the trailing null, just like snprintf. However, if it is
1251
* truncated due to size then the output is size, not the
1252
* number of characters that would have been written.
1255
utf8_snprintf(char *dest, size_t size, char *fmt, ...)
1257
char newfmt[100], buf[20], *q, *pdest, *p, *width_str, *end;
1258
char *start_of_specifier;
1264
int more_flags, ret, w;
1265
int min_field_width, field_precision, modifier;
1266
int flags_minus, flags_plus, flags_space, flags_zero, flags_pound;
1274
#define IS_ROOM_IN_DEST(n_more_chars) \
1275
((pdest - dest + (n_more_chars) <= size) ? 1 : 0)
1278
* Strategy: Look through the fmt string for %w's. Replace the
1279
* %w's in the format string with %s's but with possibly different
1280
* width and precision arguments which will make it come out right.
1281
* Then call the regular system vsnprintf with the altered format
1282
* string but same arguments.
1284
* That would be nice but it doesn't quite work. Why? Because a
1285
* %*w will need to have the value in the integer argument the *
1286
* refers to modified. Can't do it as far as I can tell. Or we could
1287
* remove the integer argument somehow before calling printf. Can't
1288
* do it. Or we could somehow add an additional conversion specifier
1289
* that caused nothing to be printed but ate up the integer arg.
1290
* Can't figure out how to do that either.
1292
* Since we can't figure out how to do it, the alternative is to
1293
* construct the result one piece at a time, pasting together the
1294
* pieces from the different conversions.
1296
va_start(args, fmt);
1298
while(*fmt && IS_ROOM_IN_DEST(1)){
1300
start_of_specifier = fmt++;
1302
min_field_width = field_precision = -1;
1303
flags_minus = flags_plus = flags_space = flags_zero = flags_pound = 0;
1340
/* minimum field width */
1342
min_field_width = va_arg(args, int);
1345
else if(*fmt >= '0' && *fmt <= '9'){
1347
while (*fmt >= '0' && *fmt <= '9')
1350
strncpy(buf, width_str, MIN(fmt-width_str,sizeof(buf)));
1351
if(sizeof(buf) > fmt-width_str)
1352
buf[fmt-width_str] = '\0';
1354
buf[sizeof(buf)-1] = '\0';
1356
min_field_width = atoi(width_str);
1359
/* field precision */
1363
field_precision = va_arg(args, int);
1366
else if(*fmt >= '0' && *fmt <= '9'){
1368
while (*fmt >= '0' && *fmt <= '9')
1371
strncpy(buf, width_str, MIN(fmt-width_str,sizeof(buf)));
1372
if(sizeof(buf) > fmt-width_str)
1373
buf[fmt-width_str] = '\0';
1375
buf[sizeof(buf)-1] = '\0';
1377
field_precision = atoi(width_str);
1381
/* length modifier */
1382
if(*fmt == 'h' || *fmt == 'l' || *fmt == 'L')
1385
/* conversion character */
1389
* work with va_arg(char *) to figure out width
1390
* and precision needed to produce the screen width
1391
* and precision asked for in %w using some of the
1392
* utf8 width routines we have.
1395
input_str = va_arg(args, char *);
1396
if(field_precision >=0 || min_field_width >= 0)
1397
w = utf8_width(input_str);
1399
if(field_precision >= 0){
1400
if(w <= field_precision)
1401
field_precision = -1; /* print it all */
1404
* We need to cut off some of the input_str
1407
end = utf8_count_forw_width(input_str, field_precision, &got_width);
1408
field_precision = (int) (end - input_str);
1409
/* new w with this field_precision */
1414
/* need some padding */
1415
if(min_field_width >= 0)
1416
min_field_width = ((field_precision >= 0) ? field_precision : strlen(input_str)) +
1417
MAX(0, min_field_width - w);
1420
* Now we just need to get the new format string
1421
* set correctly in newfmt.
1424
if(q-newfmt < sizeof(newfmt))
1427
if(flags_minus && q-newfmt < sizeof(newfmt))
1429
if(flags_plus && q-newfmt < sizeof(newfmt))
1431
if(flags_space && q-newfmt < sizeof(newfmt))
1433
if(flags_zero && q-newfmt < sizeof(newfmt))
1435
if(flags_pound && q-newfmt < sizeof(newfmt))
1438
if(min_field_width >= 0){
1439
snprintf(buf, sizeof(buf), "%d", min_field_width);
1440
sstrncpy(&q, buf, sizeof(newfmt)-(q-newfmt));
1443
if(field_precision >= 0){
1444
if(q-newfmt < sizeof(newfmt))
1447
snprintf(buf, sizeof(buf), "%d", field_precision);
1448
sstrncpy(&q, buf, sizeof(newfmt)-(q-newfmt));
1451
if(q-newfmt < sizeof(newfmt))
1454
if(q-newfmt < sizeof(newfmt))
1457
snprintf(pdest, size - (pdest-dest), newfmt, input_str);
1458
pdest += strlen(pdest);
1467
/* make a new format which leaves out the dynamic '*' arguments */
1469
if(q-newfmt < sizeof(newfmt))
1472
if(flags_minus && q-newfmt < sizeof(newfmt))
1474
if(flags_plus && q-newfmt < sizeof(newfmt))
1476
if(flags_space && q-newfmt < sizeof(newfmt))
1478
if(flags_zero && q-newfmt < sizeof(newfmt))
1480
if(flags_pound && q-newfmt < sizeof(newfmt))
1483
if(min_field_width >= 0){
1484
snprintf(buf, sizeof(buf), "%d", min_field_width);
1485
sstrncpy(&q, buf, sizeof(newfmt)-(q-newfmt));
1488
if(field_precision >= 0){
1489
if(q-newfmt < sizeof(newfmt))
1492
snprintf(buf, sizeof(buf), "%d", field_precision);
1493
sstrncpy(&q, buf, sizeof(newfmt)-(q-newfmt));
1496
if(q-newfmt < sizeof(newfmt))
1499
if(q-newfmt < sizeof(newfmt))
1503
case 'd': case 'i': case 'o':
1504
case 'x': case 'X': case 'u': case 'c':
1505
int_arg = va_arg(args, int);
1506
snprintf(pdest, size - (pdest-dest), newfmt, int_arg);
1507
pdest += strlen(pdest);
1511
input_str = va_arg(args, char *);
1512
snprintf(pdest, size - (pdest-dest), newfmt, input_str);
1513
pdest += strlen(pdest);
1516
case 'f': case 'e': case 'E':
1518
double_arg = va_arg(args, double);
1519
snprintf(pdest, size - (pdest-dest), newfmt, double_arg);
1520
pdest += strlen(pdest);
1524
ptr_arg = va_arg(args, void *);
1525
snprintf(pdest, size - (pdest-dest), newfmt, ptr_arg);
1526
pdest += strlen(pdest);
1530
if(IS_ROOM_IN_DEST(1))
1536
/* didn't think of this type */
1547
if(IS_ROOM_IN_DEST(1))
1554
if(IS_ROOM_IN_DEST(1))
1564
* Copy UTF-8 characters from src into dst.
1565
* Copy enough characters so that the result will have (<=) screen width of
1566
* want_width screen cells in current locale.
1568
* Dstlen is the available space in dst. No more than dstlen bytes will be written
1571
* Returned value is the number of bytes written to dst, not including
1572
* the possible terminating null.
1573
* Got_width is another returned value. It is the width in screen cells of
1574
* the string placed in dst. It will be the same as want_width if there
1575
* are enough characters in the src to do that and if the character widths
1576
* hit the width exactly. It will be less than want_width if we run out
1577
* of src characters or if the next character width would skip over the
1578
* width we want, because it is double width.
1580
* Zero width characters are collected and included at the end of the string.
1581
* That is, if we make it to want_width but there is still a zero length
1582
* character sitting in src, we add that to dst. This might be an accent
1583
* or something like that.
1586
utf8_to_width(char *dst, /* destination buffer */
1587
char *src, /* source string */
1588
size_t dstlen, /* space in dst */
1589
unsigned want_width, /* desired screen width */
1590
unsigned *got_width) /* returned screen width in dst */
1593
unsigned width_consumed = 0;
1595
unsigned long remaining_octets;
1596
char *writeptr, *readptr, *savereadptr, *endptr;
1597
int ran_out_of_space = 0;
1601
remaining_octets = readptr ? strlen(readptr) : 0;
1604
endptr = writeptr + dstlen;
1606
if(readptr && writeptr){
1607
while(width_consumed <= want_width && remaining_octets > 0 && writeptr < dst + dstlen && !ran_out_of_space){
1608
savereadptr = readptr;
1609
ucs = (UCS) utf8_get((unsigned char **) &readptr, &remaining_octets);
1612
remaining_octets = 0;
1614
this_width = wcellwidth(ucs);
1617
* If this_width is -1 that means we can't print this character
1618
* with our current locale. Writechar will print a '?'.
1623
if(width_consumed + (unsigned) this_width <= want_width){
1624
/* append this utf8 character to dst if it will fit */
1625
if(writeptr + (readptr - savereadptr) < endptr){
1626
width_consumed += this_width;
1627
while(savereadptr < readptr)
1628
*writeptr++ = *savereadptr++;
1631
ran_out_of_space++; /* no more utf8 to dst */
1634
remaining_octets = 0; /* we're done */
1638
if(writeptr < endptr)
1643
*got_width = width_consumed;
1645
return(writeptr ? (writeptr - dst) : 0);
1650
* Str is a UTF-8 string.
1651
* Count forward width screencell positions and return a pointer to the
1652
* end of the string that is width wide.
1653
* The returned pointer points at the next character (where the null would
1656
* Got_width is another returned value. It is the width in screen cells of
1657
* the string from str to the returned pointer. It will be the same as
1658
* want_width if there are enough characters in the str to do that
1659
* and if the character widths hit the width exactly. It will be less
1660
* than want_width if we run out of characters or if the next character
1661
* width would skip over the width we want, because it is double width.
1664
utf8_count_forw_width(char *str, unsigned want_width, unsigned *got_width)
1667
unsigned width_consumed = 0;
1669
unsigned long remaining_octets;
1673
retptr = readptr = str;
1675
remaining_octets = readptr ? strlen(readptr) : 0;
1677
while(width_consumed <= want_width && remaining_octets > 0){
1679
ucs = (UCS) utf8_get((unsigned char **) &readptr, &remaining_octets);
1681
if(ucs & U8G_ERROR){
1683
* This should not happen, but do something to handle it anyway.
1684
* Treat each character as a single width character, which is what should
1685
* probably happen when we actually go to write it out.
1692
this_width = wcellwidth(ucs);
1695
* If this_width is -1 that means we can't print this character
1696
* with our current locale. Writechar will print a '?'.
1702
if(width_consumed + (unsigned) this_width <= want_width){
1703
width_consumed += (unsigned) this_width;
1707
remaining_octets = 0; /* we're done */
1711
*got_width = width_consumed;
1718
* Copy a null terminator into a UTF-8 string in place so that the string is
1719
* no more than a certain screen width wide. If the string is already less
1720
* than or equal in width to the requested width, no change is made.
1722
* The actual width accomplished is returned. Note that it may be less than
1723
* max_width due to double width characters as well as due to the fact that
1724
* it fits wholly in the max_width.
1726
* Returned value is the actual screen width of str when done.
1728
* A side effect is that a terminating null may have been written into
1729
* the passed in string.
1732
utf8_truncate(char *str, unsigned max_width)
1735
unsigned width_consumed = 0;
1737
unsigned long remaining_octets;
1738
char *readptr, *savereadptr;
1742
remaining_octets = readptr ? strlen(readptr) : 0;
1745
while(width_consumed <= max_width && remaining_octets > 0){
1747
savereadptr = readptr;
1748
ucs = (UCS) utf8_get((unsigned char **) &readptr, &remaining_octets);
1750
if(ucs & U8G_ERROR){
1752
* This should not happen, but do something to handle it anyway.
1753
* Treat each character as a single width character, which is what should
1754
* probably happen when we actually go to write it out.
1761
this_width = wcellwidth(ucs);
1764
* If this_width is -1 that means we can't print this character
1765
* with our current locale. Writechar will print a '?'.
1771
if(width_consumed + (unsigned) this_width <= max_width){
1772
width_consumed += (unsigned) this_width;
1775
remaining_octets = 0; /* we're done */
1776
*savereadptr = '\0';
1781
return(width_consumed);
1786
* Copy UTF-8 characters from src into dst.
1787
* Copy enough characters so that the result will have screen width of
1788
* want_width screen cells in current locale.
1789
* If there aren't enough characters in src to get to want_width, pad on
1790
* left or right according to left_adjust argument.
1792
* Dstlen is the available space in dst. No more than dstlen bytes will be written
1793
* to dst. Dst will be null terminated if there is enough room, but not
1794
* if that would overflow dst's len.
1796
* Returned value is the number of bytes written to dst, not including
1797
* the possible terminating null.
1800
utf8_pad_to_width(char *dst, /* destination buffer */
1801
char *src, /* source string */
1802
size_t dstlen, /* space in dst */
1803
unsigned want_width, /* desired screen width */
1804
int left_adjust) /* adjust left or right in want_width columns */
1806
unsigned got_width = 0;
1807
int need_more, howmany;
1808
size_t len_left, bytes_used;
1810
bytes_used = utf8_to_width(dst, src, dstlen, want_width, &got_width);
1811
len_left = dstlen - bytes_used;
1813
need_more = want_width - got_width;
1814
howmany = MIN(need_more, len_left);
1817
char *end, *newend, *p, *q;
1819
end = dst + bytes_used;
1820
newend = end + howmany;
1823
* Add padding to end of string. Simply append
1824
* the needed number of spaces, or however many will fit
1825
* if we don't have enough space.
1827
for(q = end; q < newend; q++)
1832
* Add padding to start of string.
1835
/* slide existing string over */
1836
for(p = end - 1, q = newend - 1; p >= dst; p--, q--)
1839
/* fill rest with spaces */
1840
for(; q >= dst; q--)
1844
bytes_used += howmany;
1847
if(bytes_used < dstlen)
1848
dst[bytes_used] = '\0';
1855
* Str is a UTF-8 string.
1856
* Start_here is a pointer into the string. It points one position past
1857
* the last byte that should be considered a part of the length string.
1858
* Count back want_width screencell positions and return a pointer to the
1859
* start of the string that is want_width wide and ends with start_here.
1861
* Since characters may be more than one cell width wide we may end up
1862
* skipping over the exact width. That is, if we need to we'll go back
1863
* too far (by one cell width). Account for that in the call by looking
1866
* Note that this call gives a possible got_width == want_width+1 as
1867
* opposed to utf8_count_forw_width which gives got_width == want-1 instead.
1868
* That was just what was needed at the time, maybe it needs to be
1872
utf8_count_back_width(char *str, char *start_here, unsigned want_width, unsigned *got_width)
1874
unsigned width_consumed = 0;
1877
unsigned long remaining_octets;
1878
char *ptr, *savereadptr, *goodreadptr;
1880
savereadptr = start_here;
1881
goodreadptr = start_here;
1883
for(ptr = savereadptr - 1; width_consumed < want_width && ptr >= str; ptr = savereadptr - 1){
1886
remaining_octets = goodreadptr - ptr;
1887
ucs = (UCS) utf8_get((unsigned char **) &ptr, &remaining_octets);
1889
if(!(ucs & U8G_ERROR)){
1890
if(remaining_octets > 0){
1892
* This means there are some bad octets after this good
1893
* character so things are not going to work out well.
1896
savereadptr = str; /* we're done */
1899
this_width = wcellwidth(ucs);
1902
* If this_width is -1 that means we can't print this character
1903
* with our current locale. Writechar will print a '?'.
1908
width_consumed += (unsigned) this_width;
1909
goodreadptr = savereadptr;
1915
*got_width = width_consumed;
1917
return(savereadptr);
1921
/*----------------------------------------------------------------------
1922
copy the source string onto the destination string returning with
1923
the destination string pointer at the end of the destination text
1925
motivation for this is to avoid twice passing over a string that's
1926
being appended to twice (i.e., strcpy(t, x); t += strlen(t))
1928
This doesn't really belong here but it is used here.
1931
sstrncpy(char **d, char *s, int n)
1933
while(n-- > 0 && (**d = *s++) != '\0')
1939
* If use_system_routines is set then NULL is the return value and it is
1940
* not an error. Display_charmap and keyboard_charmap should come over as
1941
* malloced strings and will be filled in with the result.
1943
* Returns a void pointer to the input_cs CHARSET which is
1944
* passed to mbtow via kbseq().
1945
* If !use_system_routines && NULL is returned, that is an error and err should
1947
* display_charmap and keyboard_charmap should be malloced data and may be
1948
* realloced and changed here.
1951
setup_for_input_output(int use_system_routines, char **display_charmap,
1952
char **keyboard_charmap, void **input_cs_arg, char **err)
1955
const CHARSET *input_cs = NULL;
1956
int already_tried = 0;
1960
#define cpstr(s) strcpy((char *)fs_get(1+strlen(s)), s)
1965
if(!display_charmap || !keyboard_charmap || !input_cs_arg){
1966
*err = cpstr("Bad call to setup_for_input_output");
1970
if(use_system_routines){
1971
#if PREREQ_FOR_SYS_TRANSLATION
1974
dcm = nl_langinfo(CODESET);
1975
dcm = dcm ? dcm : "US-ASCII";
1977
init_utf8_display(0, NULL);
1978
if(*display_charmap){
1979
if(dcm && strucmp(*display_charmap, dcm)){
1980
snprintf(buf, sizeof(buf),
1981
_("Display character set \"%s\" is ignored when using system translation"),
1987
fs_give((void **) display_charmap);
1990
if(*keyboard_charmap){
1991
if(!*err && dcm && strucmp(*keyboard_charmap, dcm)){
1992
snprintf(buf, sizeof(buf),
1993
_("Keyboard character set \"%s\" is ignored when using system translation"),
1999
fs_give((void **) keyboard_charmap);
2002
*display_charmap = cpstr(dcm);
2003
*keyboard_charmap = cpstr(dcm);
2005
*err = cpstr("Bad call to setup_for_input_output");
2008
*input_cs_arg = NULL;
2014
if(!(*display_charmap))
2015
*display_charmap = cpstr("US-ASCII");
2017
if(!(*keyboard_charmap))
2018
*keyboard_charmap = cpstr(*display_charmap);
2020
if(*keyboard_charmap){
2021
supported = input_charset_is_supported(*keyboard_charmap);
2024
if(!strucmp(*keyboard_charmap, "utf-8"))
2025
input_cs = utf8_charset(*keyboard_charmap);
2026
else if((cs = utf8_charset(*keyboard_charmap)) != NULL)
2033
if(!strucmp(*keyboard_charmap, "ISO-2022-JP"))
2036
snprintf(buf, sizeof(buf),
2037
/* TRANSLATORS: The first argument is the name of the character
2038
set the user is trying to use (which is unsupported by alpine).
2039
The second argument is " (except for posting)" if they are
2040
trying to use ISO-2022-JP for something other than posting. */
2041
_("Character set \"%s\" is unsupported%s, using US-ASCII"),
2043
iso2022jp ? _(" (except for posting)") : "");
2049
fs_give((void **) keyboard_charmap);
2050
*keyboard_charmap = cpstr("US-ASCII");
2060
if(!(*display_charmap))
2061
*display_charmap = cpstr("US-ASCII");
2063
if(*display_charmap){
2064
supported = output_charset_is_supported(*display_charmap);
2066
if(!strucmp(*display_charmap, "utf-8"))
2067
init_utf8_display(1, NULL);
2068
else if((cs = utf8_charset(*display_charmap)) != NULL)
2069
init_utf8_display(0, utf8_rmap_gen(cs, NULL));
2075
if(!strucmp(*display_charmap, "ISO-2022-JP"))
2078
snprintf(buf, sizeof(buf),
2079
_("Character set \"%s\" is unsupported%s, using US-ASCII"),
2081
iso2022jp ? _(" (except for posting)") : "");
2086
fs_give((void **) display_charmap);
2095
*err = cpstr(_("Help, can't figure out display character set or even use US-ASCII."));
2100
*input_cs_arg = (void *) input_cs;
2107
input_charset_is_supported(char *input_charset)
2111
if(!(input_charset && *input_charset))
2114
if(!strucmp(input_charset, "utf-8"))
2117
if((cs = utf8_charset(input_charset)) != NULL){
2120
* This was true 2006-09-25.
2123
case CT_ASCII: case CT_1BYTE0: case CT_1BYTE:
2124
case CT_1BYTE8: case CT_EUC: case CT_DBYTE:
2125
case CT_DBYTE2: case CT_SJIS: case CT_UCS2:
2126
case CT_UCS4: case CT_UTF16:
2140
output_charset_is_supported(char *output_charset)
2144
if(!(output_charset && *output_charset))
2147
if(!strucmp(output_charset, "utf-8"))
2150
if((cs = utf8_charset(output_charset)) != NULL && utf8_rmap_gen(cs, NULL))
2158
posting_charset_is_supported(char *posting_charset)
2160
return(posting_charset && *posting_charset
2161
&& (!strucmp(posting_charset, "ISO-2022-JP")
2162
|| output_charset_is_supported(posting_charset)));
2167
* Convert the "orig" string from UTF-8 to "charset". If no conversion is
2168
* needed the return value will point to orig. If a conversion is done,
2169
* the return string should be freed by the caller.
2172
utf8_to_charset(char *orig, char *charset, int report_err)
2178
if(!charset || !charset[0] || !orig || !orig[0] || !strucmp(charset, "utf-8"))
2181
src.size = strlen(orig);
2182
src.data = (unsigned char *) orig;
2185
* This works for ISO-2022-JP because of special code in utf8_cstext
2186
* but not for other 2022 charsets.
2188
if(utf8_cstext(&src, charset, &dst, report_err ? 0 : '?') && dst.size > 0 && dst.data)
2189
ret = (char *) dst.data; /* c-client already null terminates it */
2193
if(ret && (unsigned char *) ret != dst.data && dst.data)
2194
fs_give((void **) &dst.data);
2201
* Turn a number into a string with comma's
2203
* Args: number -- The long to be turned into a string.
2205
* Result: pointer to static string representing number with commas
2206
* Can use up to 3 comatose results at once.
2209
comatose(long int number)
2211
long i, x, done_one;
2212
static char buf[3][50];
2213
static int whichbuf = 0;
2216
whichbuf = (whichbuf + 1) % 3;
2219
strncpy(buf[whichbuf], "0", sizeof(buf[0]));
2220
buf[whichbuf][sizeof(buf[0])-1] = '\0';
2221
return(buf[whichbuf]);
2226
for(i = 1000000000; i >= 1; i /= 1000) {
2228
number = number % i;
2229
if(x != 0 || done_one) {
2230
if(b != buf[whichbuf] && (b-buf[whichbuf]) < sizeof(buf[0]))
2233
snprintf(b, sizeof(buf[0])-(b-buf[whichbuf]), done_one ? "%03ld" : "%d", x);
2239
if(b-buf[whichbuf] < sizeof(buf[0]))
2242
return(buf[whichbuf]);
2247
* line_paint - where the real work of managing what is displayed gets done.
2248
* The passwd variable is overloaded: if non-zero, don't
2249
* output anything, else only blat blank chars across line
2250
* once and use this var to tell us we've already written the
2254
line_paint(int offset, /* current dot offset into vl */
2255
struct display_line *displ,
2256
int *passwd) /* flag to hide display of chars */
2258
int i, w, w2, already_got_one = 0;
2259
int vfirst, vlast, dfirst, dlast, vi, di;
2263
* for now just leave line blank, but maybe do '*' for each char later
2265
if(passwd && *passwd){
2269
*passwd = 2; /* only blat once */
2272
(*displ->movecursor)(displ->row, displ->col);
2273
while(i++ <= displ->dwid)
2274
(*displ->writechar)(' ');
2276
(*displ->movecursor)(displ->row, displ->col);
2281
* vl is the virtual line (the actual data). We operate on it by typing
2282
* characters to be added and deleting and so forth. In this routine we
2283
* copy a subset of those UCS-4 characters in vl into dl, the display
2284
* array, and show that subset on the screen.
2286
* Offset is the location of the cursor in vl.
2288
* We will display the string starting from vbase.
2289
* We have dwid screen cells to work in.
2290
* We may have to adjust vbase in order to display the
2291
* part of the string that contains the cursor.
2293
* We'll make the display look like
2294
* vl a b c d e f g h i j k l m
2295
* xxxxxxxxxxxxx <- width dwid window
2299
* The < will be there if vbase > 0.
2300
* The > will be there if the string from vbase to the
2301
* end can't all fit in the window.
2304
memset(displ->dl, 0, displ->dlen * sizeof(UCS));
2307
* Adjust vbase so offset is not out of the window to the right.
2308
* (The +2 in w + 2 is for a possible " >" if the string goes past
2309
* the right hand edge of the window and if the last visible character
2310
* is double wide. We don't want the offset to be under that > character.)
2312
for(w = ucs4_str_width_a_to_b(displ->vl, displ->vbase, offset);
2313
w + 2 + (displ->vbase ? 1 : 0) > displ->dwid;
2314
w = ucs4_str_width_a_to_b(displ->vl, displ->vbase, offset)){
2316
* offset is off the window to the right
2317
* It looks like a b c d e f g h
2320
* and offset is either past the right edge,
2321
* or right at the right edge (and maybe under >),
2322
* or one before right at the edge (and maybe on space
2323
* for half a character).
2325
* Since the characters may be double width it is slightly
2326
* complicated to figure out how far to increase vbase.
2327
* We're going to scoot over past width w/2 characters and
2328
* then see if that's sufficient.
2330
new_vbase = displ->vbase + 1;
2331
for(w2 = ucs4_str_width_a_to_b(displ->vl, displ->vbase+1, new_vbase);
2333
w2 = ucs4_str_width_a_to_b(displ->vl, displ->vbase+1, new_vbase))
2336
displ->vbase = new_vbase;
2339
/* adjust so offset is not out of the window to the left */
2340
while(displ->vbase > 0 && displ->vbase >= offset){
2341
/* add about dwid/2 more width */
2342
new_vbase = displ->vbase - 1;
2343
for(w2 = ucs4_str_width_a_to_b(displ->vl, new_vbase, displ->vbase);
2344
w2 < (displ->dwid+1)/2 && new_vbase > 0;
2345
w2 = ucs4_str_width_a_to_b(displ->vl, new_vbase, displ->vbase))
2348
/* but don't let it get too small, recheck off right end */
2349
for(w = ucs4_str_width_a_to_b(displ->vl, new_vbase, offset);
2350
w + 2 + (new_vbase ? 1 : 0) > displ->dwid;
2351
w = ucs4_str_width_a_to_b(displ->vl, displ->vbase, offset))
2354
displ->vbase = MAX(new_vbase, 0);
2357
if(displ->vbase == 1 && wcellwidth(displ->vl[0]) == 1)
2360
vfirst = displ->vbase;
2362
if(displ->vbase > 0){ /* off screen cue left */
2363
dfirst = 1; /* index which matches vfirst */
2367
vlast = displ->vused-1; /* end */
2368
w = ucs4_str_width_a_to_b(displ->vl, vfirst, vlast);
2370
if(w + dfirst > displ->dwid){ /* off window right */
2372
/* find last ucs character to be printed */
2373
while(w + dfirst > displ->dwid - 1) /* -1 for > */
2374
w = ucs4_str_width_a_to_b(displ->vl, vfirst, --vlast);
2376
/* worry about double-width characters */
2377
if(w + dfirst == displ->dwid - 1){ /* no prob, hit it exactly */
2378
dlast = dfirst + vlast - vfirst + 1; /* +1 for > */
2379
displ->dl[dlast] = '>';
2382
dlast = dfirst + vlast - vfirst + 1;
2383
displ->dl[dlast++] = ' ';
2384
displ->dl[dlast] = '>';
2388
dlast = dfirst + vlast - vfirst;
2391
* Copy the relevant part of the virtual line into the display line.
2393
for(vi = vfirst, di = dfirst; vi <= vlast; vi++, di++)
2394
displ->dl[di] = displ->vl[vi];
2397
* Add spaces to clear the rest of the line.
2398
* We have dwid total space to fill.
2400
w = ucs4_str_width_a_to_b(displ->dl, 0, dlast); /* width through dlast */
2401
for(di = dlast+1, i = displ->dwid - w; i > 0 ; i--)
2402
displ->dl[di++] = ' ';
2405
* Draw from left to right, skipping until we get to
2406
* something that is different. Characters may be different
2407
* widths than they were initially so paint from there the
2410
for(di = 0; displ->dl[di]; di++){
2411
if(already_got_one || displ->dl[di] != displ->olddl[di]){
2412
/* move cursor first time */
2413
if(!already_got_one++){
2414
w = (di > 0) ? ucs4_str_width_a_to_b(displ->dl, 0, di-1) : 0;
2415
(*displ->movecursor)(displ->row, displ->col + w);
2418
(*displ->writechar)(displ->dl[di]);
2419
displ->olddl[di] = displ->dl[di];
2423
memset(&displ->olddl[di], 0, (displ->dlen - di) * sizeof(UCS));
2426
* Move the cursor to the offset.
2428
* The offset is relative to the start of the virtual array. We need
2429
* to find the location on the screen. The offset into the display array
2430
* will be offset-vbase+dfirst. We want to be at the start of that
2431
* character, so we need to find the width of all the characters up
2434
w = (offset > 0) ? ucs4_str_width_a_to_b(displ->dl, 0, offset-displ->vbase+dfirst-1) : 0;
2436
(*displ->movecursor)(displ->row, displ->col + w);