2
* Copyright (c) 2004 Kungliga Tekniska Högskolan
3
* (Royal Institute of Technology, Stockholm, Sweden).
6
* Redistribution and use in source and binary forms, with or without
7
* modification, are permitted provided that the following conditions
10
* 1. Redistributions of source code must retain the above copyright
11
* notice, this list of conditions and the following disclaimer.
13
* 2. Redistributions in binary form must reproduce the above copyright
14
* notice, this list of conditions and the following disclaimer in the
15
* documentation and/or other materials provided with the distribution.
17
* 3. Neither the name of the Institute nor the names of its contributors
18
* may be used to endorse or promote products derived from this software
19
* without specific prior written permission.
21
* THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND
22
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24
* ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE
25
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
43
#include "normalize_table.h"
48
translation_cmp(const void *key, const void *data)
50
const struct translation *t1 = (const struct translation *)key;
51
const struct translation *t2 = (const struct translation *)data;
53
return t1->key - t2->key;
56
enum { s_base = 0xAC00};
57
enum { s_count = 11172};
58
enum { l_base = 0x1100};
60
enum { v_base = 0x1161};
62
enum { t_base = 0x11A7};
64
enum { n_count = v_count * t_count};
67
hangul_decomp(const uint32_t *in, size_t in_len,
68
uint32_t *out, size_t *out_len)
75
if (u < s_base || u >= s_base + s_count)
78
l = l_base + s_index / n_count;
79
v = v_base + (s_index % n_count) / t_count;
80
t = t_base + s_index % t_count;
85
return WIND_ERR_OVERRUN;
95
hangul_composition(const uint32_t *in, size_t in_len)
99
if (in[0] >= l_base && in[0] < l_base + l_count) {
100
unsigned l_index = in[0] - l_base;
103
if (in[1] < v_base || in[1] >= v_base + v_count)
105
v_index = in[1] - v_base;
106
return (l_index * v_count + v_index) * t_count + s_base;
107
} else if (in[0] >= s_base && in[0] < s_base + s_count) {
108
unsigned s_index = in[0] - s_base;
111
if (s_index % t_count != 0)
113
if (in[1] < t_base || in[1] >= t_base + t_count)
115
t_index = in[1] - t_base;
116
return in[0] + t_index;
122
compat_decomp(const uint32_t *in, size_t in_len,
123
uint32_t *out, size_t *out_len)
128
for (i = 0; i < in_len; ++i) {
129
struct translation ts = {in[i]};
130
size_t sub_len = *out_len - o;
133
ret = hangul_decomp(in + i, in_len - i,
136
if (ret == WIND_ERR_OVERRUN)
140
void *s = bsearch(&ts,
141
_wind_normalize_table,
142
_wind_normalize_table_size,
143
sizeof(_wind_normalize_table[0]),
146
const struct translation *t = (const struct translation *)s;
148
ret = compat_decomp(_wind_normalize_val_table + t->val_offset,
156
return WIND_ERR_OVERRUN;
167
cc_cmp(const void *a, const void *b)
169
const uint32_t *ua = (const uint32_t *)a;
170
const uint32_t *ub = (const uint32_t *)b;
172
return _wind_combining_class(*ua) - _wind_combining_class(*ub);
176
canonical_reorder(uint32_t *tmp, size_t tmp_len)
180
for (i = 0; i < tmp_len; ++i) {
181
int cc = _wind_combining_class(tmp[i]);
185
j < tmp_len && _wind_combining_class(tmp[j]);
188
qsort(&tmp[i], j - i, sizeof(unsigned),
196
find_composition(const uint32_t *in, unsigned in_len)
198
unsigned short canon_index = 0;
202
cur = hangul_composition(in, in_len);
207
const struct canon_node *c = &_wind_canon_table[canon_index];
217
if (i < c->next_start || i >= c->next_end)
221
_wind_canon_next_table[c->next_offset + i - c->next_start];
222
if (canon_index != 0) {
223
cur = (cur << 4) & 0xFFFFF;
226
} while (canon_index != 0);
231
combine(const uint32_t *in, size_t in_len,
232
uint32_t *out, size_t *out_len)
240
for (i = 0; i < in_len;) {
241
while (i < in_len && (cc = _wind_combining_class(in[i])) != 0) {
246
return WIND_ERR_OVERRUN;
255
v[0] = out[ostarter];
258
cc = _wind_combining_class(in[i]);
259
if (old_cc != cc && (comb = find_composition(v, 2))) {
260
out[ostarter] = comb;
261
} else if (cc == 0) {
265
return WIND_ERR_OVERRUN;
278
_wind_stringprep_normalize(const uint32_t *in, size_t in_len,
279
uint32_t *out, size_t *out_len)
285
tmp_len = in_len * 4;
286
if (tmp_len < MAX_LENGTH_CANON)
287
tmp_len = MAX_LENGTH_CANON;
288
tmp = malloc(tmp_len * sizeof(uint32_t));
292
ret = compat_decomp(in, in_len, tmp, &tmp_len);
297
canonical_reorder(tmp, tmp_len);
298
ret = combine(tmp, tmp_len, out, out_len);