1
/* $OpenLDAP: pkg/ldap/libraries/liblunicode/ucdata/ucpgba.c,v 1.9 2008/01/07 23:20:05 kurt Exp $ */
2
/* This work is part of OpenLDAP Software <http://www.openldap.org/>.
4
* Copyright 1998-2008 The OpenLDAP Foundation.
7
* Redistribution and use in source and binary forms, with or without
8
* modification, are permitted only as authorized by the OpenLDAP
11
* A copy of this license is available in file LICENSE in the
12
* top-level directory of the distribution or, alternatively, at
13
* <http://www.OpenLDAP.org/license.html>.
15
/* Copyright 2001 Computing Research Labs, New Mexico State University
17
* Permission is hereby granted, free of charge, to any person obtaining a
18
* copy of this software and associated documentation files (the "Software"),
19
* to deal in the Software without restriction, including without limitation
20
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
21
* and/or sell copies of the Software, and to permit persons to whom the
22
* Software is furnished to do so, subject to the following conditions:
24
* The above copyright notice and this permission notice shall be included in
25
* all copies or substantial portions of the Software.
27
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
28
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
29
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
30
* THE COMPUTING RESEARCH LAB OR NEW MEXICO STATE UNIVERSITY BE LIABLE FOR ANY
31
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT
32
* OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
33
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
35
/* $Id: ucpgba.c,v 1.5 2001/01/02 18:46:20 mleisher Exp $ */
39
#include "k5-unicode.h"
48
* These macros are used while reordering of RTL runs of text for the
49
* special case of non-spacing characters being in runs of weakly
50
* directional text. They check for weak and non-spacing, and digits and
53
#define ISWEAKSPECIAL(cc) ucisprop(cc, UC_EN|UC_ES|UC_MN, UC_ET|UC_AN|UC_CS)
54
#define ISDIGITSPECIAL(cc) ucisprop(cc, UC_ND|UC_MN, 0)
57
* These macros are used while breaking a string into runs of text in
58
* different directions. Descriptions:
60
* ISLTR_LTR - Test for members of an LTR run in an LTR context. This looks
61
* for characters with ltr, non-spacing, weak, and neutral
64
* ISRTL_RTL - Test for members of an RTL run in an RTL context. This looks
65
* for characters with rtl, non-spacing, weak, and neutral
68
* ISRTL_NEUTRAL - Test for RTL or neutral characters.
70
* ISWEAK_NEUTRAL - Test for weak or neutral characters.
72
#define ISLTR_LTR(cc) ucisprop(cc, UC_L|UC_MN|UC_EN|UC_ES,\
73
UC_ET|UC_CS|UC_B|UC_S|UC_WS|UC_ON)
75
#define ISRTL_RTL(cc) ucisprop(cc, UC_R|UC_MN|UC_EN|UC_ES,\
76
UC_ET|UC_AN|UC_CS|UC_B|UC_S|UC_WS|UC_ON)
78
#define ISRTL_NEUTRAL(cc) ucisprop(cc, UC_R, UC_B|UC_S|UC_WS|UC_ON)
79
#define ISWEAK_NEUTRAL(cc) ucisprop(cc, UC_EN|UC_ES, \
80
UC_B|UC_S|UC_WS|UC_ON|UC_ET|UC_AN|UC_CS)
83
* This table is temporarily hard-coded here until it can be constructed
84
* automatically somehow.
86
static unsigned long _symmetric_pairs[] = {
87
0x0028, 0x0029, 0x0029, 0x0028, 0x003C, 0x003E, 0x003E, 0x003C,
88
0x005B, 0x005D, 0x005D, 0x005B, 0x007B, 0x007D, 0x007D, 0x007B,
89
0x2045, 0x2046, 0x2046, 0x2045, 0x207D, 0x207E, 0x207E, 0x207D,
90
0x208D, 0x208E, 0x208E, 0x208D, 0x3008, 0x3009, 0x3009, 0x3008,
91
0x300A, 0x300B, 0x300B, 0x300A, 0x300C, 0x300D, 0x300D, 0x300C,
92
0x300E, 0x300F, 0x300F, 0x300E, 0x3010, 0x3011, 0x3011, 0x3010,
93
0x3014, 0x3015, 0x3015, 0x3014, 0x3016, 0x3017, 0x3017, 0x3016,
94
0x3018, 0x3019, 0x3019, 0x3018, 0x301A, 0x301B, 0x301B, 0x301A,
95
0xFD3E, 0xFD3F, 0xFD3F, 0xFD3E, 0xFE59, 0xFE5A, 0xFE5A, 0xFE59,
96
0xFE5B, 0xFE5C, 0xFE5C, 0xFE5B, 0xFE5D, 0xFE5E, 0xFE5E, 0xFE5D,
97
0xFF08, 0xFF09, 0xFF09, 0xFF08, 0xFF3B, 0xFF3D, 0xFF3D, 0xFF3B,
98
0xFF5B, 0xFF5D, 0xFF5D, 0xFF5B, 0xFF62, 0xFF63, 0xFF63, 0xFF62,
101
static int _symmetric_pairs_size =
102
sizeof(_symmetric_pairs)/sizeof(_symmetric_pairs[0]);
105
* This routine looks up the other form of a symmetric pair.
108
_ucsymmetric_pair(unsigned long c)
112
for (i = 0; i < _symmetric_pairs_size; i += 2) {
113
if (_symmetric_pairs[i] == c)
114
return _symmetric_pairs[i+1];
120
* This routine creates a new run, copies the text into it, links it into the
121
* logical text order chain and returns it to the caller to be linked into
122
* the visual text order chain.
125
_add_run(ucstring_t *str, unsigned long *src,
126
unsigned long start, unsigned long end, int direction)
131
run = (ucrun_t *) malloc(sizeof(ucrun_t));
132
run->visual_next = run->visual_prev = 0;
133
run->direction = direction;
137
run->chars = (unsigned long *)
138
malloc(sizeof(unsigned long) * ((end - start) << 1));
139
run->positions = run->chars + (end - start);
145
if (direction == UCPGBA_RTL) {
147
* Copy the source text into the run in reverse order and select
148
* replacements for the pairwise punctuation and the <> characters.
150
for (i = 0, t = end - 1; start < end; start++, t--, i++) {
151
run->positions[i] = t;
152
if (ucissymmetric(src[t]) || src[t] == '<' || src[t] == '>')
153
run->chars[i] = _ucsymmetric_pair(src[t]);
155
run->chars[i] = src[t];
159
* Copy the source text into the run directly.
161
for (i = start; i < end; i++) {
162
run->positions[i - start] = i;
163
run->chars[i - start] = src[i];
168
* Add the run to the logical list for cursor traversal.
170
if (str->logical_first == 0)
171
str->logical_first = str->logical_last = run;
173
run->logical_prev = str->logical_last;
174
str->logical_last->logical_next = run;
175
str->logical_last = run;
182
_ucadd_rtl_segment(ucstring_t *str, unsigned long *source, unsigned long start,
189
* This is used to splice runs into strings with overall LTR direction.
190
* The `lrun' variable will never be NULL because at least one LTR run was
191
* added before this RTL run.
193
lrun = str->visual_last;
195
for (e = s = start; s < end;) {
196
for (; e < end && ISRTL_NEUTRAL(source[e]); e++) ;
199
run = _add_run(str, source, s, e, UCPGBA_RTL);
202
* Add the run to the visual list for cursor traversal.
204
if (str->visual_first != 0) {
205
if (str->direction == UCPGBA_LTR) {
206
run->visual_prev = lrun;
207
run->visual_next = lrun->visual_next;
208
if (lrun->visual_next != 0)
209
lrun->visual_next->visual_prev = run;
210
lrun->visual_next = run;
211
if (lrun == str->visual_last)
212
str->visual_last = run;
214
run->visual_next = str->visual_first;
215
str->visual_first->visual_prev = run;
216
str->visual_first = run;
219
str->visual_first = str->visual_last = run;
223
* Handle digits in a special way. This makes sure the weakly
224
* directional characters appear on the expected sides of a number
225
* depending on whether that number is Arabic or not.
227
for (s = e; e < end && ISWEAKSPECIAL(source[e]); e++) {
228
if (!ISDIGITSPECIAL(source[e]) &&
229
(e + 1 == end || !ISDIGITSPECIAL(source[e + 1])))
234
run = _add_run(str, source, s, e, UCPGBA_LTR);
237
* Add the run to the visual list for cursor traversal.
239
if (str->visual_first != 0) {
240
if (str->direction == UCPGBA_LTR) {
241
run->visual_prev = lrun;
242
run->visual_next = lrun->visual_next;
243
if (lrun->visual_next != 0)
244
lrun->visual_next->visual_prev = run;
245
lrun->visual_next = run;
246
if (lrun == str->visual_last)
247
str->visual_last = run;
249
run->visual_next = str->visual_first;
250
str->visual_first->visual_prev = run;
251
str->visual_first = run;
254
str->visual_first = str->visual_last = run;
258
* Collect all weak non-digit sequences for an RTL segment. These
259
* will appear as part of the next RTL segment or will be added as
260
* an RTL segment by themselves.
262
for (s = e; e < end && ucisweak(source[e]) && !ucisdigit(source[e]);
267
* Capture any weak non-digit sequences that occur at the end of the RTL
271
run = _add_run(str, source, s, e, UCPGBA_RTL);
274
* Add the run to the visual list for cursor traversal.
276
if (str->visual_first != 0) {
277
if (str->direction == UCPGBA_LTR) {
278
run->visual_prev = lrun;
279
run->visual_next = lrun->visual_next;
280
if (lrun->visual_next != 0)
281
lrun->visual_next->visual_prev = run;
282
lrun->visual_next = run;
283
if (lrun == str->visual_last)
284
str->visual_last = run;
286
run->visual_next = str->visual_first;
287
str->visual_first->visual_prev = run;
288
str->visual_first = run;
291
str->visual_first = str->visual_last = run;
296
_ucadd_ltr_segment(ucstring_t *str, unsigned long *source, unsigned long start,
301
run = _add_run(str, source, start, end, UCPGBA_LTR);
304
* Add the run to the visual list for cursor traversal.
306
if (str->visual_first != 0) {
307
if (str->direction == UCPGBA_LTR) {
308
run->visual_prev = str->visual_last;
309
str->visual_last->visual_next = run;
310
str->visual_last = run;
312
run->visual_next = str->visual_first;
313
str->visual_first->visual_prev = run;
314
str->visual_first = run;
317
str->visual_first = str->visual_last = run;
321
ucstring_create(unsigned long *source, unsigned long start, unsigned long end,
322
int default_direction, int cursor_motion)
325
unsigned long s, e, ld;
328
str = (ucstring_t *) malloc(sizeof(ucstring_t));
331
* Set the initial values.
333
str->cursor_motion = cursor_motion;
334
str->logical_first = str->logical_last = 0;
335
str->visual_first = str->visual_last = str->cursor = 0;
336
str->source = source;
341
* If the length of the string is 0, then just return it at this point.
347
* This flag indicates whether the collection loop for RTL is called
348
* before the LTR loop the first time.
353
* Look for the first character in the string that has strong
356
for (s = start; s < end && !ucisstrong(source[s]); s++) ;
360
* If the string contains no characters with strong directionality, use
361
* the default direction.
363
str->direction = default_direction;
365
str->direction = ucisrtl(source[s]) ? UCPGBA_RTL : UCPGBA_LTR;
367
if (str->direction == UCPGBA_RTL)
369
* Set the flag that causes the RTL collection loop to run first.
374
* This loop now separates the string into runs based on directionality.
376
for (s = e = 0; s < end; s = e) {
379
* Determine the next run of LTR text.
383
while (e < end && ISLTR_LTR(source[e])) {
384
if (ucisdigit(source[e]) &&
385
!(0x660 <= source[e] && source[e] <= 0x669))
389
if (str->direction != UCPGBA_LTR) {
390
while (e > ld && ISWEAK_NEUTRAL(source[e - 1]))
395
* Add the LTR segment to the string.
398
_ucadd_ltr_segment(str, source, s, e);
402
* Determine the next run of RTL text.
405
while (e < end && ISRTL_RTL(source[e])) {
406
if (ucisdigit(source[e]) &&
407
!(0x660 <= source[e] && source[e] <= 0x669))
411
if (str->direction != UCPGBA_RTL) {
412
while (e > ld && ISWEAK_NEUTRAL(source[e - 1]))
417
* Add the RTL segment to the string.
420
_ucadd_rtl_segment(str, source, s, e);
423
* Clear the flag that allowed the RTL collection loop to run first
424
* for strings with overall RTL directionality.
430
* Set up the initial cursor run.
432
str->cursor = str->logical_first;
434
str->cursor->cursor = (str->cursor->direction == UCPGBA_RTL) ?
435
str->cursor->end - str->cursor->start : 0;
441
ucstring_free(ucstring_t *s)
448
for (l = 0, r = s->visual_first; r != 0; r = r->visual_next) {
449
if (r->end > r->start)
450
free((char *) r->chars);
462
ucstring_set_cursor_motion(ucstring_t *str, int cursor_motion)
469
n = str->cursor_motion;
470
str->cursor_motion = cursor_motion;
475
_ucstring_visual_cursor_right(ucstring_t *str, int count)
484
cursor = str->cursor;
486
size = cursor->end - cursor->start;
487
if ((cursor->direction == UCPGBA_RTL && cursor->cursor + 1 == size) ||
488
cursor->cursor + 1 > size) {
490
* If the next run is NULL, then the cursor is already on the
491
* far right end already.
493
if (cursor->visual_next == 0)
495
* If movement occured, then report it.
497
return (cnt != count);
500
* Move to the next run.
502
str->cursor = cursor = cursor->visual_next;
503
cursor->cursor = (cursor->direction == UCPGBA_RTL) ? -1 : 0;
504
size = cursor->end - cursor->start;
513
_ucstring_logical_cursor_right(ucstring_t *str, int count)
522
cursor = str->cursor;
524
size = cursor->end - cursor->start;
525
if (str->direction == UCPGBA_RTL) {
526
if (cursor->direction == UCPGBA_RTL) {
527
if (cursor->cursor + 1 == size) {
528
if (cursor == str->logical_first)
530
* Already at the beginning of the string.
532
return (cnt != count);
534
str->cursor = cursor = cursor->logical_prev;
535
size = cursor->end - cursor->start;
536
cursor->cursor = (cursor->direction == UCPGBA_LTR) ?
541
if (cursor->cursor == 0) {
542
if (cursor == str->logical_first)
544
* At the beginning of the string already.
546
return (cnt != count);
548
str->cursor = cursor = cursor->logical_prev;
549
size = cursor->end - cursor->start;
550
cursor->cursor = (cursor->direction == UCPGBA_LTR) ?
556
if (cursor->direction == UCPGBA_RTL) {
557
if (cursor->cursor == 0) {
558
if (cursor == str->logical_last)
560
* Already at the end of the string.
562
return (cnt != count);
564
str->cursor = cursor = cursor->logical_next;
565
size = cursor->end - cursor->start;
566
cursor->cursor = (cursor->direction == UCPGBA_LTR) ?
571
if (cursor->cursor + 1 > size) {
572
if (cursor == str->logical_last)
574
* Already at the end of the string.
576
return (cnt != count);
578
str->cursor = cursor = cursor->logical_next;
579
cursor->cursor = (cursor->direction == UCPGBA_LTR) ?
591
ucstring_cursor_right(ucstring_t *str, int count)
595
return (str->cursor_motion == UCPGBA_CURSOR_VISUAL) ?
596
_ucstring_visual_cursor_right(str, count) :
597
_ucstring_logical_cursor_right(str, count);
601
_ucstring_visual_cursor_left(ucstring_t *str, int count)
610
cursor = str->cursor;
612
size = cursor->end - cursor->start;
613
if ((cursor->direction == UCPGBA_LTR && cursor->cursor == 0) ||
614
cursor->cursor - 1 < -1) {
616
* If the preceding run is NULL, then the cursor is already on the
617
* far left end already.
619
if (cursor->visual_prev == 0)
621
* If movement occured, then report it.
623
return (cnt != count);
626
* Move to the previous run.
628
str->cursor = cursor = cursor->visual_prev;
629
size = cursor->end - cursor->start;
630
cursor->cursor = (cursor->direction == UCPGBA_RTL) ?
640
_ucstring_logical_cursor_left(ucstring_t *str, int count)
649
cursor = str->cursor;
651
size = cursor->end - cursor->start;
652
if (str->direction == UCPGBA_RTL) {
653
if (cursor->direction == UCPGBA_RTL) {
654
if (cursor->cursor == -1) {
655
if (cursor == str->logical_last)
657
* Already at the end of the string.
659
return (cnt != count);
661
str->cursor = cursor = cursor->logical_next;
662
size = cursor->end - cursor->start;
663
cursor->cursor = (cursor->direction == UCPGBA_LTR) ?
668
if (cursor->cursor + 1 > size) {
669
if (cursor == str->logical_last)
671
* At the end of the string already.
673
return (cnt != count);
675
str->cursor = cursor = cursor->logical_next;
676
size = cursor->end - cursor->start;
677
cursor->cursor = (cursor->direction == UCPGBA_LTR) ?
683
if (cursor->direction == UCPGBA_RTL) {
684
if (cursor->cursor + 1 == size) {
685
if (cursor == str->logical_first)
687
* Already at the beginning of the string.
689
return (cnt != count);
691
str->cursor = cursor = cursor->logical_prev;
692
size = cursor->end - cursor->start;
693
cursor->cursor = (cursor->direction == UCPGBA_LTR) ?
698
if (cursor->cursor == 0) {
699
if (cursor == str->logical_first)
701
* Already at the beginning of the string.
703
return (cnt != count);
705
str->cursor = cursor = cursor->logical_prev;
706
cursor->cursor = (cursor->direction == UCPGBA_LTR) ?
718
ucstring_cursor_left(ucstring_t *str, int count)
722
return (str->cursor_motion == UCPGBA_CURSOR_VISUAL) ?
723
_ucstring_visual_cursor_left(str, count) :
724
_ucstring_logical_cursor_left(str, count);
728
ucstring_cursor_info(ucstring_t *str, int *direction, unsigned long *position)
734
if (str == 0 || direction == 0 || position == 0)
737
cursor = str->cursor;
739
*direction = cursor->direction;
742
size = cursor->end - cursor->start;
745
*position = (cursor->direction == UCPGBA_RTL) ?
746
cursor->start : cursor->positions[c - 1];
748
*position = (cursor->direction == UCPGBA_RTL) ?
749
cursor->end : cursor->start;
751
*position = cursor->positions[c];