4
%% Copyright Ericsson AB 2000-2010. All Rights Reserved.
6
%% The contents of this file are subject to the Erlang Public License,
7
%% Version 1.1, (the "License"); you may not use this file except in
8
%% compliance with the License. You should have received a copy of the
9
%% Erlang Public License along with this software. If not, it can be
10
%% retrieved online at http://www.erlang.org/.
12
%% Software distributed under the License is distributed on an "AS IS"
13
%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
14
%% the License for the specific language governing rights and limitations
20
%% This program is used to generate a header file with data for
21
%% normalizing denormalized unicode.
23
%% The C header is generated from a text file containing tuples in the
25
%% {RevList,Translation}
26
%% Where 'RevList' is a reversed list of the denormalized repressentation of
27
%% the character 'Translation'. An example would be the swedish character
28
%% '�', which would be represented in the file as:
29
%% {[776,111],246}, as the denormalized representation of codepoint 246
30
%% is [111,776] (i.e an 'o' followed by the "double dot accent character 776),
31
%% while '�' instead is represented as {[776,97],228}, as the denormalized
32
%% form would be [97,776] (same accent but an 'a' instead).
33
%% The datafile is generated from the table on Apple's developer connection
34
%% http://developer.apple.com/library/mac/#technotes/tn/tn1150table.html
35
%% The generating is done whenever new data is present (i.e. dec.dat has
36
%% to be changed) and not for every build. The product (the C header) is copied
37
%% to $ERL_TOP/erts/beam after generation and checked in.
38
%% The program and the data file is included for reference.
44
-define(HASH_SIZE_FACTOR,2).
45
-define(BIG_PREFIX_SIZE,392).
47
-define(INPUT_FILE_NAME,"dec.dat").
48
-define(OUTPUT_FILE_NAME,"erl_unicode_normalize.h").
51
{ok,L} = file:consult(FName),
53
length(A) > 1% , hd(A) < 769
57
L = read(?INPUT_FILE_NAME),
59
{ok,Out} = file:open(?OUTPUT_FILE_NAME,[write]),
63
"* %CopyrightBegin%~n"
65
"* Copyright Ericsson AB 1999-2010. All Rights Reserved.~n"
67
"* The contents of this file are subject to the Erlang Public License,~n"
68
"* Version 1.1, (the \"License\"); you may not use this file except in~n"
69
"* compliance with the License. You should have received a copy of the~n"
70
"* Erlang Public License along with this software. If not, it can be~n"
71
"* retrieved online at http://www.erlang.org/.~n"
73
"* Software distributed under the License is distributed on an "
75
"* basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See~n"
76
"* the License for the specific language governing rights and "
78
"* under the License.~n"
83
"* This file is automatically generated by ~p.erl, "
84
"do not edit manually~n"
89
"#define HASH_SIZE_FACTOR ~w~n"
90
"typedef struct _compose_entry {~n"
94
" struct _compose_entry *subs;~n"
97
"static int compose_tab_size = ~p;~n",
98
[?HASH_SIZE_FACTOR,length(G)]),
100
PreTab = tuple_to_list(make_prefix_table(G,erlang:make_tuple(102,0))),
101
dump_prefixes(Out,PreTab),
102
%% Using this cuts down on the searching in the
103
%% actual implementation, but wastes memory with little real gain..
104
%% LL = lists:flatten([PartList || {PartList,_} <- L]),
105
%% BigPreTab = tuple_to_list(
106
%% make_big_prefixes(LL,
107
%% erlang:make_tuple(?BIG_PREFIX_SIZE,0))),
108
%% dump_big_prefixes(Out,BigPreTab),
116
d_top_hash(Out,List,D,C),
118
d_sub(_Out,[],_D,_C) ->
120
d_sub(Out,[{_CP,[],_Res}|T],D,C) ->
122
d_sub(Out,[{_CP,Subs,_Res0}|T],D,C) ->
126
io:format(Out,"static CompEntry ~s[] = {~n",[format_depth(D)]),
128
io:format(Out,"}; /* ~s */ ~n",[format_depth(D)]).
130
d_top_1(_Out,[],_D,_C) ->
132
d_top_1(Out,[{CP,[],Res}|T],D,C) ->
134
"{~w, ~w, 0, NULL, NULL}",[CP,Res]),
137
io:format(Out,"~n",[]);
139
io:format(Out,",~n",[])
141
d_top_1(Out,T,D,C+1);
142
d_top_1(Out,[{CP,Subs,_Res}|T],D,C) ->
144
"{~w, 0, ~w, ~s, ~s}",[CP,length(Subs),
146
"hash_"++format_depth([C|D])]),
149
io:format(Out,"~n",[]);
151
io:format(Out,",~n",[])
153
d_top_1(Out,T,D,C+1).
156
d_top_hash(Out,List,D,_C) ->
157
HSize = length(List)*?HASH_SIZE_FACTOR,
158
io:format(Out,"static int ~s[~p] = ~n",["hash_"++format_depth(D),HSize]),
159
Tup = d_top_hash_1(List,0,erlang:make_tuple(HSize,-1),HSize),
160
io:format(Out,"~p; /* ~s */ ~n",[Tup,"hash_"++format_depth(D)]).
162
d_top_hash_1([],_,Hash,_HSize) ->
164
d_top_hash_1([{CP,_,_}|T],Index,Hash,HSize) ->
165
Bucket = hash_search(Hash,HSize,CP rem HSize),
166
d_top_hash_1(T,Index+1,erlang:setelement(Bucket+1,Hash,Index),HSize).
168
hash_search(Hash,_HSize,Bucket) when element(Bucket+1,Hash) =:= -1 ->
170
hash_search(Hash,HSize,Bucket) ->
171
hash_search(Hash,HSize,(Bucket + 1) rem HSize).
174
lists:reverse(tl(lists:reverse(lists:flatten(["compose_tab_",[ integer_to_list(X) ++ "_" || X <- lists:reverse(D) ]])))).
179
make_prefix_table([],Table) ->
181
make_prefix_table([{C,_,_}|T],Table) when C =< 4023 ->
182
Index = (C div 32) + 1 - 24,
184
X = element(Index,Table),
185
Y = X bor (1 bsl Pos),
186
NewTab = setelement(Index,Table,Y),
187
make_prefix_table(T,NewTab);
188
make_prefix_table([_|T],Tab) ->
189
make_prefix_table(T,Tab).
191
dump_prefixes(Out,L) ->
192
io:format(Out,"#define COMP_CANDIDATE_MAP_OFFSET 24~n",[]),
193
io:format(Out,"static Uint32 comp_candidate_map[] = {~n",[]),
194
dump_prefixes_1(Out,L).
195
dump_prefixes_1(Out,[H]) ->
196
io:format(Out," 0x~8.16.0BU~n",[H]),
197
io:format(Out,"};~n",[]);
198
dump_prefixes_1(Out,[H|T]) ->
199
io:format(Out," 0x~8.16.0BU,~n",[H]),
200
dump_prefixes_1(Out,T).
202
%% make_big_prefixes([],Table) ->
204
%% make_big_prefixes([C|T],Table) ->
205
%% Index = (C div 32) + 1,
207
%% X = element(Index,Table),
208
%% Y = X bor (1 bsl Pos),
209
%% NewTab = setelement(Index,Table,Y),
210
%% make_big_prefixes(T,NewTab).
212
%% dump_big_prefixes(Out,L) ->
213
%% io:format(Out,"#define BIG_COMP_CANDIDATE_SIZE ~w~n", [?BIG_PREFIX_SIZE]),
214
%% io:format(Out,"static Uint32 big_comp_candidate_map[] = {~n",[]),
215
%% dump_prefixes_1(Out,L).
218
{lists:reverse(Acc),[]};
219
pick([{[H|TT],N}|T],H,Acc) ->
220
pick(T,H,[{TT,N}|Acc]);
221
pick([{[H|_],_}|_]=L,M,Acc) when H =/= M ->
222
{lists:reverse(Acc),L}.
227
group([{[H],N}|T]) ->
228
{Part,Rest} = pick(T,H,[]),
229
[{H,group(Part),N}| group(Rest)];
230
group([{[H|_],_}|_]=L) ->
231
{Part,Rest} = pick(L,H,[]),
232
[{H,group(Part),0}| group(Rest)].