~ubuntu-branches/ubuntu/gutsy/icu/gutsy-updates

« back to all changes in this revision

Viewing changes to source/i18n/bocsu.c

  • Committer: Package Import Robot
  • Author(s): Jay Berkenbilt
  • Date: 2005-11-19 11:29:31 UTC
  • mfrom: (1.1.2)
  • Revision ID: package-import@ubuntu.com-20051119112931-vcizkrp10tli4enw
Tags: 3.4-3
Explicitly build with g++ 3.4.  The current ICU fails its test suite
with 4.0 but not with 3.4.  Future versions should work properly with
4.0.

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
/*
2
 
*******************************************************************************
3
 
*   Copyright (C) 2001, International Business Machines
4
 
*   Corporation and others.  All Rights Reserved.
5
 
*******************************************************************************
6
 
*   file name:  bocsu.c
7
 
*   encoding:   US-ASCII
8
 
*   tab size:   8 (not used)
9
 
*   indentation:4
10
 
*
11
 
*   Author: Markus W. Scherer
12
 
*
13
 
*   Modification history:
14
 
*   05/18/2001  weiv    Made into separate module
15
 
*/
16
 
 
17
 
 
18
 
#include "bocsu.h"
19
 
 
20
 
/*
21
 
 * encode one difference value -0x10ffff..+0x10ffff in 1..3 bytes,
22
 
 * preserving lexical order
23
 
 */
24
 
U_CFUNC uint8_t *
25
 
u_writeDiff(int32_t diff, uint8_t *p) {
26
 
    if(diff>=SLOPE_REACH_NEG_1) {
27
 
        if(diff<=SLOPE_REACH_POS_1) {
28
 
            *p++=(uint8_t)(SLOPE_MIDDLE+diff);
29
 
        } else if(diff<=SLOPE_REACH_POS_2) {
30
 
            *p++=(uint8_t)(SLOPE_START_POS_2+(diff/SLOPE_TAIL_COUNT));
31
 
            *p++=(uint8_t)(SLOPE_MIN+diff%SLOPE_TAIL_COUNT);
32
 
        } else if(diff<=SLOPE_REACH_POS_3) {
33
 
            p[2]=(uint8_t)(SLOPE_MIN+diff%SLOPE_TAIL_COUNT);
34
 
            diff/=SLOPE_TAIL_COUNT;
35
 
            p[1]=(uint8_t)(SLOPE_MIN+diff%SLOPE_TAIL_COUNT);
36
 
            *p=(uint8_t)(SLOPE_START_POS_3+(diff/SLOPE_TAIL_COUNT));
37
 
            p+=3;
38
 
        } else {
39
 
            p[3]=(uint8_t)(SLOPE_MIN+diff%SLOPE_TAIL_COUNT);
40
 
            diff/=SLOPE_TAIL_COUNT;
41
 
            p[2]=(uint8_t)(SLOPE_MIN+diff%SLOPE_TAIL_COUNT);
42
 
            diff/=SLOPE_TAIL_COUNT;
43
 
            p[1]=(uint8_t)(SLOPE_MIN+diff%SLOPE_TAIL_COUNT);
44
 
            *p=SLOPE_MAX;
45
 
            p+=4;
46
 
        }
47
 
    } else {
48
 
        int32_t m;
49
 
 
50
 
        if(diff>=SLOPE_REACH_NEG_2) {
51
 
            NEGDIVMOD(diff, SLOPE_TAIL_COUNT, m);
52
 
            *p++=(uint8_t)(SLOPE_START_NEG_2+diff);
53
 
            *p++=(uint8_t)(SLOPE_MIN+m);
54
 
        } else if(diff>=SLOPE_REACH_NEG_3) {
55
 
            NEGDIVMOD(diff, SLOPE_TAIL_COUNT, m);
56
 
            p[2]=(uint8_t)(SLOPE_MIN+m);
57
 
            NEGDIVMOD(diff, SLOPE_TAIL_COUNT, m);
58
 
            p[1]=(uint8_t)(SLOPE_MIN+m);
59
 
            *p=(uint8_t)(SLOPE_START_NEG_3+diff);
60
 
            p+=3;
61
 
        } else {
62
 
            NEGDIVMOD(diff, SLOPE_TAIL_COUNT, m);
63
 
            p[3]=(uint8_t)(SLOPE_MIN+m);
64
 
            NEGDIVMOD(diff, SLOPE_TAIL_COUNT, m);
65
 
            p[2]=(uint8_t)(SLOPE_MIN+m);
66
 
            NEGDIVMOD(diff, SLOPE_TAIL_COUNT, m);
67
 
            p[1]=(uint8_t)(SLOPE_MIN+m);
68
 
            *p=SLOPE_MIN;
69
 
            p+=4;
70
 
        }
71
 
    }
72
 
    return p;
73
 
}
74
 
 
75
 
/* How many bytes would writeDiff() write? */
76
 
static int32_t
77
 
lengthOfDiff(int32_t diff) {
78
 
    if(diff>=SLOPE_REACH_NEG_1) {
79
 
        if(diff<=SLOPE_REACH_POS_1) {
80
 
            return 1;
81
 
        } else if(diff<=SLOPE_REACH_POS_2) {
82
 
            return 2;
83
 
        } else if(diff<=SLOPE_REACH_POS_3) {
84
 
            return 3;
85
 
        } else {
86
 
            return 4;
87
 
        }
88
 
    } else {
89
 
        if(diff>=SLOPE_REACH_NEG_2) {
90
 
            return 2;
91
 
        } else if(diff>=SLOPE_REACH_NEG_3) {
92
 
            return 3;
93
 
        } else {
94
 
            return 4;
95
 
        }
96
 
    }
97
 
}
98
 
 
99
 
/*
100
 
 * Encode the code points of a string as
101
 
 * a sequence of byte-encoded differences (slope detection),
102
 
 * preserving lexical order.
103
 
 *
104
 
 * Optimize the difference-taking for runs of Unicode text within
105
 
 * small scripts:
106
 
 *
107
 
 * Most small scripts are allocated within aligned 128-blocks of Unicode
108
 
 * code points. Lexical order is preserved if "prev" is always moved
109
 
 * into the middle of such a block.
110
 
 *
111
 
 * Additionally, "prev" is moved from anywhere in the Unihan
112
 
 * area into the middle of that area.
113
 
 * Note that the identical-level run in a sort key is generated from
114
 
 * NFD text - there are never Hangul characters included.
115
 
 */
116
 
U_CFUNC int32_t
117
 
u_writeIdenticalLevelRun(const UChar *s, int32_t length, uint8_t *p) {
118
 
    uint8_t *p0;
119
 
    int32_t c, prev;
120
 
    int32_t i;
121
 
 
122
 
    prev=0;
123
 
    p0=p;
124
 
    i=0;
125
 
    while(i<length) {
126
 
        if(prev<0x4e00 || prev>=0xa000) {
127
 
            prev=(prev&~0x7f)-SLOPE_REACH_NEG_1;
128
 
        } else {
129
 
            /*
130
 
             * Unihan U+4e00..U+9fa5:
131
 
             * double-bytes down from the upper end
132
 
             */
133
 
            prev=0x9fff-SLOPE_REACH_POS_2;
134
 
        }
135
 
 
136
 
        UTF_NEXT_CHAR(s, i, length, c);
137
 
        p=u_writeDiff(c-prev, p);
138
 
        prev=c;
139
 
    }
140
 
    return p-p0;
141
 
}
142
 
 
143
 
/* How many bytes would writeIdenticalLevelRun() write? */
144
 
U_CFUNC int32_t
145
 
u_lengthOfIdenticalLevelRun(const UChar *s, int32_t length) {
146
 
    int32_t c, prev;
147
 
    int32_t i, runLength;
148
 
 
149
 
    prev=0;
150
 
    runLength=0;
151
 
    i=0;
152
 
    while(i<length) {
153
 
        if(prev<0x4e00 || prev>=0xa000) {
154
 
            prev=(prev&~0x7f)-SLOPE_REACH_NEG_1;
155
 
        } else {
156
 
            /*
157
 
             * Unihan U+4e00..U+9fa5:
158
 
             * double-bytes down from the upper end
159
 
             */
160
 
            prev=0x9fff-SLOPE_REACH_POS_2;
161
 
        }
162
 
 
163
 
        UTF_NEXT_CHAR(s, i, length, c);
164
 
        runLength+=lengthOfDiff(c-prev);
165
 
        prev=c;
166
 
    }
167
 
    return runLength;
168
 
}
169