3
# Note: This file is taken, and modified, from gucharmap/gen-guch-unicode-tables.pl - svn revision 1040
7
# generates in the current directory:
10
# - unicode-nameslist.h
12
# - UnicodeCategories.h
15
# usage: ./gen-guch-unicode-tables.pl UNICODE-VERSION DIRECTORY
16
# where DIRECTORY contains UnicodeData.txt Unihan.zip NamesList.txt Blocks.txt Scripts.txt
20
use vars ('$UNZIP', '$ICONV');
22
# if these things aren't in your path you can put full paths to them here
26
sub process_unicode_data_txt ($);
27
sub process_unihan_zip ($);
28
sub process_nameslist_txt ($);
29
sub process_blocks_txt ($);
30
sub process_scripts_txt ($);
32
$| = 1; # flush stdout buffer
39
Usage: $0 UNICODE-VERSION DIRECTORY
41
DIRECTORY should contain the following Unicode data files:
42
UnicodeData.txt Unihan.zip NamesList.txt Blocks.txt Scripts.txt
44
which can be found at http://www.unicode.org/Public/UNIDATA/
49
my ($unicodedata_txt, $unihan_zip, $nameslist_txt, $blocks_txt, $scripts_txt);
53
opendir (my $dir, $d) or die "Cannot open Unicode data dir $d: $!\n";
54
for my $f (readdir ($dir))
56
$unicodedata_txt = "$d/$f" if ($f =~ /UnicodeData.*\.txt/);
57
# $unihan_zip = "$d/$f" if ($f =~ /Unihan.*\.zip/);
58
# $nameslist_txt = "$d/$f" if ($f =~ /NamesList.*\.txt/);
59
$blocks_txt = "$d/$f" if ($f =~ /Blocks.*\.txt/);
60
$scripts_txt = "$d/$f" if ($f =~ /Scripts.*\.txt/);
63
defined $unicodedata_txt or die "Did not find $d/UnicodeData.txt";
64
# defined $unihan_zip or die "Did not find $d/Unihan.zip";
65
# defined $nameslist_txt or die "Did not find $d/NamesList.txt";
66
defined $blocks_txt or die "Did not find $d/Blocks.txt";
67
defined $scripts_txt or die "Did not find $d/Scripts.txt";
69
process_unicode_data_txt ($unicodedata_txt);
70
# process_nameslist_txt ($nameslist_txt);
71
process_blocks_txt ($blocks_txt);
72
process_scripts_txt ($scripts_txt);
73
# process_unihan_zip ($unihan_zip);
78
#------------------------#
80
sub process_unicode_data_txt ($)
82
my ($unicodedata_txt) = @_;
86
open (my $unicodedata, $unicodedata_txt) or die;
87
# open (my $out, "> unicode-names.h") or die;
89
print "processing $unicodedata_txt...";
91
# print $out "/* unicode-names.h */\n";
92
# print $out "/* THIS IS A GENERATED FILE. CHANGES WILL BE OVERWRITTEN. */\n";
93
# print $out "/* Generated by $0 */\n";
94
# print $out "/* Generated from UCD version $v */\n\n";
96
# print $out "#ifndef UNICODE_NAMES_H\n";
97
# print $out "#define UNICODE_NAMES_H\n\n";
99
# print $out "#include <glib/gunicode.h>\n\n";
100
# print $out "#include \"gucharmap-intl.h\"\n\n";
105
# while (my $line = <$unicodedata>)
108
# $line =~ /^([^;]+);([^;]+)/ or die;
114
# push @unicode_pairs, [$hex, $name];
117
# print $out "static const char unicode_names_strings[] = \\\n";
121
# foreach my $name (sort keys %names) {
122
# print $out " \"$name\\0\"\n";
123
# $names{$name} = $offset;
124
# $offset += length($name) + 1;
131
# print $out "typedef struct _UnicodeName UnicodeName;\n\n";
133
# print $out "static const struct _UnicodeName\n";
135
# print $out " gunichar index;\n";
136
# print $out " guint32 name_offset;\n";
138
# print $out "unicode_names[] =\n";
141
# my $first_line = 1;
143
# foreach my $pair (@unicode_pairs) {
144
# if (!$first_line) {
150
# my ($hex, $name) = @{$pair};
151
# my $offset = $names{$name};
152
# print $out " {0x$hex, $offset}";
155
# print $out "\n};\n\n";
158
# static inline const char * unicode_name_get_name(const UnicodeName *entry)
160
# guint32 offset = entry->name_offset;
161
# return unicode_names_strings + offset;
166
# print $out "#endif /* #ifndef UNICODE_NAMES_H */\n";
169
# undef @unicode_pairs;
171
# close ($unicodedata);
176
open ($unicodedata, $unicodedata_txt) or die;
177
open (my $out, "> UnicodeCategories.h") or die;
179
# Map general category code onto symbolic name.
183
'Lu' => "UNICODE_UPPERCASE_LETTER",
184
'Ll' => "UNICODE_LOWERCASE_LETTER",
185
'Lt' => "UNICODE_TITLECASE_LETTER",
186
'Mn' => "UNICODE_NON_SPACING_MARK",
187
'Mc' => "UNICODE_COMBINING_MARK",
188
'Me' => "UNICODE_ENCLOSING_MARK",
189
'Nd' => "UNICODE_DECIMAL_NUMBER",
190
'Nl' => "UNICODE_LETTER_NUMBER",
191
'No' => "UNICODE_OTHER_NUMBER",
192
'Zs' => "UNICODE_SPACE_SEPARATOR",
193
'Zl' => "UNICODE_LINE_SEPARATOR",
194
'Zp' => "UNICODE_PARAGRAPH_SEPARATOR",
195
'Cc' => "UNICODE_CONTROL",
196
'Cf' => "UNICODE_FORMAT",
197
'Cs' => "UNICODE_SURROGATE",
198
'Co' => "UNICODE_PRIVATE_USE",
199
'Cn' => "UNICODE_UNASSIGNED",
202
'Lm' => "UNICODE_MODIFIER_LETTER",
203
'Lo' => "UNICODE_OTHER_LETTER",
204
'Pc' => "UNICODE_CONNECT_PUNCTUATION",
205
'Pd' => "UNICODE_DASH_PUNCTUATION",
206
'Ps' => "UNICODE_OPEN_PUNCTUATION",
207
'Pe' => "UNICODE_CLOSE_PUNCTUATION",
208
'Pi' => "UNICODE_INITIAL_PUNCTUATION",
209
'Pf' => "UNICODE_FINAL_PUNCTUATION",
210
'Po' => "UNICODE_OTHER_PUNCTUATION",
211
'Sm' => "UNICODE_MATH_SYMBOL",
212
'Sc' => "UNICODE_CURRENCY_SYMBOL",
213
'Sk' => "UNICODE_MODIFIER_SYMBOL",
214
'So' => "UNICODE_OTHER_SYMBOL"
217
# these shouldn't be -1
218
my ($codepoint, $last_codepoint, $start_codepoint) = (-999, -999, -999);
220
my ($category, $last_category) = ("FAKE1", "FAKE2");
221
my ($started_range, $finished_range) = (undef, undef);
223
print $out "/* UnicodeCategories.h */\n";
224
print $out "/* THIS IS A GENERATED FILE. CHANGES WILL BE OVERWRITTEN. */\n";
225
print $out "/* Generated by $0 */\n";
226
print $out "/* Generated from UCD version $v */\n\n";
228
print $out "#ifndef UNICODE_CATEGORIES_H\n";
229
print $out "#define UNICODE_CATEGORIES_H\n\n";
230
print $out "#include <QtCore/qglobal.h>\n\n";
231
print $out "enum EUnicodeCategory\n";
233
print $out " UNICODE_UPPERCASE_LETTER,\n";
234
print $out " UNICODE_LOWERCASE_LETTER,\n";
235
print $out " UNICODE_TITLECASE_LETTER,\n";
236
print $out " UNICODE_NON_SPACING_MARK,\n";
237
print $out " UNICODE_COMBINING_MARK,\n";
238
print $out " UNICODE_ENCLOSING_MARK,\n";
239
print $out " UNICODE_DECIMAL_NUMBER,\n";
240
print $out " UNICODE_LETTER_NUMBER,\n";
241
print $out " UNICODE_OTHER_NUMBER,\n";
242
print $out " UNICODE_SPACE_SEPARATOR,\n";
243
print $out " UNICODE_LINE_SEPARATOR,\n";
244
print $out " UNICODE_PARAGRAPH_SEPARATOR,\n";
245
print $out " UNICODE_CONTROL,\n";
246
print $out " UNICODE_FORMAT,\n";
247
print $out " UNICODE_SURROGATE,\n";
248
print $out " UNICODE_PRIVATE_USE,\n";
249
print $out " UNICODE_UNASSIGNED,\n";
250
print $out " UNICODE_MODIFIER_LETTER,\n";
251
print $out " UNICODE_OTHER_LETTER,\n";
252
print $out " UNICODE_CONNECT_PUNCTUATION,\n";
253
print $out " UNICODE_DASH_PUNCTUATION,\n";
254
print $out " UNICODE_OPEN_PUNCTUATION,\n";
255
print $out " UNICODE_CLOSE_PUNCTUATION,\n";
256
print $out " UNICODE_INITIAL_PUNCTUATION,\n";
257
print $out " UNICODE_FINAL_PUNCTUATION,\n";
258
print $out " UNICODE_OTHER_PUNCTUATION,\n";
259
print $out " UNICODE_MATH_SYMBOL,\n";
260
print $out " UNICODE_CURRENCY_SYMBOL,\n";
261
print $out " UNICODE_MODIFIER_SYMBOL,\n";
262
print $out " UNICODE_OTHER_SYMBOL,\n";
264
print $out " UNICODE_INVALID\n";
266
print $out "struct TUnicodeCategory\n";
268
print $out " quint32 start;\n";
269
print $out " quint32 end;\n";
270
print $out " EUnicodeCategory category;\n";
272
print $out "static const TUnicodeCategory constUnicodeCategoryList[] =\n";
275
while (my $line = <$unicodedata>)
277
$line =~ /^([0-9A-F]*);([^;]*);([^;]*);/ or die;
278
my $codepoint = hex ($1);
280
my $category = $mappings{$3};
283
or ($category ne $last_category)
284
or (not $started_range and $codepoint != $last_codepoint + 1))
286
if ($last_codepoint >= 0) {
287
printf $out (" { 0x%4.4X, 0x%4.4X, \%s },\n", $start_codepoint, $last_codepoint, $last_category);
290
$start_codepoint = $codepoint;
293
if ($name =~ /^<.*First>$/) {
295
$finished_range = undef;
297
elsif ($name =~ /^<.*Last>$/) {
298
$started_range = undef;
301
elsif ($finished_range) {
302
$finished_range = undef;
305
$last_codepoint = $codepoint;
306
$last_category = $category;
308
printf $out (" { 0x%4.4X, 0x%4.4X, \%s },\n", $start_codepoint, $last_codepoint, $last_category);
309
printf $out " { 0x0, 0x0, UNICODE_INVALID }\n";
312
print $out "#endif\n";
318
#------------------------#
320
# XXX should do kFrequency too
321
sub process_unihan_zip ($)
323
my ($unihan_zip) = @_;
325
open (my $unihan, "$UNZIP -c $unihan_zip |") or die;
326
open (my $out, "> unicode-unihan.h") or die;
328
print "processing $unihan_zip";
330
print $out "/* unicode-unihan.h */\n";
331
print $out "/* THIS IS A GENERATED FILE. CHANGES WILL BE OVERWRITTEN. */\n";
332
print $out "/* Generated by $0 */\n";
333
print $out "/* Generated from UCD version $v */\n\n";
335
print $out "#ifndef UNICODE_UNIHAN_H\n";
336
print $out "#define UNICODE_UNIHAN_H\n\n";
338
print $out "#include <glib/gunicode.h>\n\n";
340
print $out "typedef struct _Unihan Unihan;\n\n";
342
print $out "static const struct _Unihan\n";
344
print $out " gunichar index;\n";
345
print $out " gint32 kDefinition;\n";
346
print $out " gint32 kCantonese;\n";
347
print $out " gint32 kMandarin;\n";
348
print $out " gint32 kTang;\n";
349
print $out " gint32 kKorean;\n";
350
print $out " gint32 kJapaneseKun;\n";
351
print $out " gint32 kJapaneseOn;\n";
353
print $out "unihan[] =\n";
360
my ($kDefinition, $kCantonese, $kMandarin, $kTang, $kKorean, $kJapaneseKun, $kJapaneseOn);
363
while (my $line = <$unihan>)
366
$line =~ /^U\+([0-9A-F]+)\s+([^\s]+)\s+(.+)$/ or next;
368
my $new_wc = hex ($1);
372
$value =~ s/\\/\\\\/g;
373
$value =~ s/\"/\\"/g;
377
if (defined $kDefinition or defined $kCantonese or defined $kMandarin
378
or defined $kTang or defined $kKorean or defined $kJapaneseKun
379
or defined $kJapaneseOn)
381
printf $out (" { 0x%04X, \%d, \%d, \%d, \%d, \%d, \%d, \%d },\n",
383
(defined($kDefinition) ? $kDefinition : -1),
384
(defined($kCantonese) ? $kCantonese: -1),
385
(defined($kMandarin) ? $kMandarin : -1),
386
(defined($kTang) ? $kTang : -1),
387
(defined($kKorean) ? $kKorean : -1),
388
(defined($kJapaneseKun) ? $kJapaneseKun : -1),
389
(defined($kJapaneseOn) ? $kJapaneseOn : -1));
403
for my $f qw(kDefinition kCantonese kMandarin
404
kTang kKorean kJapaneseKun kJapaneseOn) {
407
push @strings, $value;
408
my $last_offset = $offset;
409
$offset += length($value) + 1;
410
$value = $last_offset;
415
if ($field eq "kDefinition") {
416
$kDefinition = $value;
418
elsif ($field eq "kCantonese") {
419
$kCantonese = $value;
421
elsif ($field eq "kMandarin") {
424
elsif ($field eq "kTang") {
427
elsif ($field eq "kKorean") {
430
elsif ($field eq "kJapaneseKun") {
431
$kJapaneseKun = $value;
433
elsif ($field eq "kJapaneseOn") {
434
$kJapaneseOn = $value;
437
if ($i++ % 32768 == 0) {
444
print $out "static const char unihan_strings[] = \\\n";
446
for my $s (@strings) {
447
print $out " \"$s\\0\"\n";
451
print $out "static const Unihan *_get_unihan (gunichar uc)\n;";
453
for my $name qw(kDefinition kCantonese kMandarin
454
kTang kKorean kJapaneseKun kJapaneseOn) {
457
static inline const char * unihan_get_$name (const Unihan *uh)
459
gint32 offset = uh->$name;
462
return unihan_strings + offset;
465
G_CONST_RETURN gchar *
466
gucharmap_get_unicode_$name (gunichar uc)
468
const Unihan *uh = _get_unihan (uc);
472
return unihan_get_$name (uh);
478
print $out "#endif /* #ifndef UNICODE_UNIHAN_H */\n";
486
#------------------------#
490
# 0x0027 => { '=' => {
492
# 'values' => [ 'APOSTROPHE-QUOTE', 'APL quote' ]
496
# 'values' => [ 'neutral (vertical) glyph with mixed usage',
497
# '2019 is preferred for apostrophe',
498
# 'preferred characters in English for paired quotation marks are 2018 & 2019'
506
sub process_nameslist_txt ($)
508
my ($nameslist_txt) = @_;
510
open (my $nameslist, "$ICONV -f 'ISO8859-1' -t 'UTF-8' $nameslist_txt |") or die;
512
print "processing $nameslist_txt...";
514
my ($equal_i, $ex_i, $star_i, $pound_i, $colon_i) = (0, 0, 0, 0, 0);
519
while (my $line = <$nameslist>)
527
elsif ($line =~ /^([0-9A-F]+)/)
531
elsif ($line =~ /^\s+=\s+(.+)$/)
534
$value =~ s/\\/\\\\/g;
535
$value =~ s/\"/\\"/g;
537
if (not defined $nameslist_hash->{$wc}->{'='}->{'index'}) {
538
$nameslist_hash->{$wc}->{'='}->{'index'} = $equal_i;
540
push (@{$nameslist_hash->{$wc}->{'='}->{'values'}}, $value);
544
elsif ($line =~ /^\s+\*\s+(.+)$/)
547
$value =~ s/\\/\\\\/g;
548
$value =~ s/\"/\\"/g;
550
if (not defined $nameslist_hash->{$wc}->{'*'}->{'index'}) {
551
$nameslist_hash->{$wc}->{'*'}->{'index'} = $star_i;
553
push (@{$nameslist_hash->{$wc}->{'*'}->{'values'}}, $value);
557
elsif ($line =~ /^\s+#\s+(.+)$/)
560
$value =~ s/\\/\\\\/g;
561
$value =~ s/\"/\\"/g;
563
if (not defined $nameslist_hash->{$wc}->{'#'}->{'index'}) {
564
$nameslist_hash->{$wc}->{'#'}->{'index'} = $pound_i;
566
push (@{$nameslist_hash->{$wc}->{'#'}->{'values'}}, $value);
570
elsif ($line =~ /^\s+:\s+(.+)$/)
573
$value =~ s/\\/\\\\/g;
574
$value =~ s/\"/\\"/g;
576
if (not defined $nameslist_hash->{$wc}->{':'}->{'index'}) {
577
$nameslist_hash->{$wc}->{':'}->{'index'} = $colon_i;
579
push (@{$nameslist_hash->{$wc}->{':'}->{'values'}}, $value);
583
elsif ($line =~ /^\s+x\s+.*([0-9A-F]{4,6})\)$/) # this one is different
585
my $value = hex ($1);
587
if (not defined $nameslist_hash->{$wc}->{'x'}->{'index'}) {
588
$nameslist_hash->{$wc}->{'x'}->{'index'} = $ex_i;
590
push (@{$nameslist_hash->{$wc}->{'x'}->{'values'}}, $value);
598
open (my $out, "> unicode-nameslist.h") or die;
600
print $out "/* unicode-nameslist.h */\n";
601
print $out "/* THIS IS A GENERATED FILE. CHANGES WILL BE OVERWRITTEN. */\n";
602
print $out "/* Generated by $0 */\n";
603
print $out "/* Generated from UCD version $v */\n\n";
605
print $out "#ifndef UNICODE_NAMESLIST_H\n";
606
print $out "#define UNICODE_NAMESLIST_H\n\n";
608
print $out "#include <glib/gunicode.h>\n\n";
610
print $out "typedef struct _UnicharString UnicharString;\n";
611
print $out "typedef struct _UnicharUnichar UnicharUnichar;\n";
612
print $out "typedef struct _NamesList NamesList;\n\n";
614
print $out "struct _UnicharString\n";
616
print $out " gunichar index;\n";
617
print $out " const gchar *value;\n";
618
print $out "}; \n\n";
620
print $out "struct _UnicharUnichar\n";
622
print $out " gunichar index;\n";
623
print $out " gunichar value;\n";
624
print $out "}; \n\n";
626
print $out "struct _NamesList\n";
628
print $out " gunichar index;\n";
629
print $out " gint equals_index; /* -1 means */\n";
630
print $out " gint stars_index; /* this character */\n";
631
print $out " gint exes_index; /* doesn't */\n";
632
print $out " gint pounds_index; /* have any */\n";
633
print $out " gint colons_index;\n";
636
print $out "static const UnicharString names_list_equals[] = \n";
638
for $wc (sort {$a <=> $b} keys %{$nameslist_hash})
640
next if not exists $nameslist_hash->{$wc}->{'='};
641
for my $value (@{$nameslist_hash->{$wc}->{'='}->{'values'}}) {
642
printf $out (qq/ { 0x%04X, "\%s" },\n/, $wc, $value);
645
print $out " { (gunichar)(-1), 0 }\n";
648
print $out "static const UnicharString names_list_stars[] = \n";
650
for $wc (sort {$a <=> $b} keys %{$nameslist_hash})
652
next if not exists $nameslist_hash->{$wc}->{'*'};
653
for my $value (@{$nameslist_hash->{$wc}->{'*'}->{'values'}}) {
654
printf $out (qq/ { 0x%04X, "\%s" },\n/, $wc, $value);
657
print $out " { (gunichar)(-1), 0 }\n";
660
print $out "static const UnicharString names_list_pounds[] = \n";
662
for $wc (sort {$a <=> $b} keys %{$nameslist_hash})
664
next if not exists $nameslist_hash->{$wc}->{'#'};
665
for my $value (@{$nameslist_hash->{$wc}->{'#'}->{'values'}}) {
666
printf $out (qq/ { 0x%04X, "\%s" },\n/, $wc, $value);
669
print $out " { (gunichar)(-1), 0 }\n";
672
print $out "static const UnicharUnichar names_list_exes[] = \n";
674
for $wc (sort {$a <=> $b} keys %{$nameslist_hash})
676
next if not exists $nameslist_hash->{$wc}->{'x'};
677
for my $value (@{$nameslist_hash->{$wc}->{'x'}->{'values'}}) {
678
printf $out (qq/ { 0x%04X, 0x%04X },\n/, $wc, $value);
681
print $out " { (gunichar)(-1), 0 }\n";
684
print $out "static const UnicharString names_list_colons[] = \n";
686
for $wc (sort {$a <=> $b} keys %{$nameslist_hash})
688
next if not exists $nameslist_hash->{$wc}->{':'};
689
for my $value (@{$nameslist_hash->{$wc}->{':'}->{'values'}}) {
690
printf $out (qq/ { 0x%04X, "\%s" },\n/, $wc, $value);
693
print $out " { (gunichar)(-1), 0 }\n";
696
print $out "static const NamesList names_list[] =\n";
698
for $wc (sort {$a <=> $b} keys %{$nameslist_hash})
700
my $eq = exists $nameslist_hash->{$wc}->{'='}->{'index'} ? $nameslist_hash->{$wc}->{'='}->{'index'} : -1;
701
my $star = exists $nameslist_hash->{$wc}->{'*'}->{'index'} ? $nameslist_hash->{$wc}->{'*'}->{'index'} : -1;
702
my $ex = exists $nameslist_hash->{$wc}->{'x'}->{'index'} ? $nameslist_hash->{$wc}->{'x'}->{'index'} : -1;
703
my $pound = exists $nameslist_hash->{$wc}->{'#'}->{'index'} ? $nameslist_hash->{$wc}->{'#'}->{'index'} : -1;
704
my $colon = exists $nameslist_hash->{$wc}->{':'}->{'index'} ? $nameslist_hash->{$wc}->{':'}->{'index'} : -1;
706
printf $out (" { 0x%04X, \%d, \%d, \%d, \%d, \%d },\n", $wc, $eq, $star, $ex, $pound, $colon);
710
print $out "#endif /* #ifndef UNICODE_NAMESLIST_H */\n";
717
#------------------------#
719
sub process_blocks_txt ($)
721
my ($blocks_txt) = @_;
723
open (my $blocks, $blocks_txt) or die;
724
open (my $out, "> UnicodeBlocks.h") or die;
726
print "processing $blocks_txt...";
728
print $out "/* UnicodeBlocks.h */\n";
729
print $out "/* THIS IS A GENERATED FILE. CHANGES WILL BE OVERWRITTEN. */\n";
730
print $out "/* Generated by $0 */\n";
731
print $out "/* Generated from UCD version $v */\n\n";
733
print $out "#ifndef __UNICODE_BLOCKS_H__\n";
734
print $out "#define __UNICODE_BLOCKS_H__\n\n";
736
print $out "#include <QtCore/qglobal.h>\n";
737
print $out "#include <klocalizedstring.h>\n\n";
739
print $out "struct TUnicodeBlock\n";
741
print $out " quint32 start,\n";
742
print $out " end;\n";
743
print $out " const char *blockName;\n";
745
print $out "static const struct TUnicodeBlock constUnicodeBlocks[] =\n";
747
while (my $line = <$blocks>)
749
$line =~ /^([0-9A-F]+)\.\.([0-9A-F]+); (.+)$/ or next;
750
print $out qq/ { 0x$1, 0x$2, I18N_NOOP("$3") },\n/;
752
print $out " { 0x0, 0x0, NULL }\n";
755
print $out "#endif\n\n";
763
#------------------------#
765
sub process_scripts_txt ($)
767
my ($scripts_txt) = @_;
772
open (my $scripts, $scripts_txt) or die;
773
open (my $out, "> UnicodeScripts.h") or die;
775
print "processing $scripts_txt...";
777
while (my $line = <$scripts>)
779
my ($start, $end, $raw_script);
781
if ($line =~ /^([0-9A-F]+)\.\.([0-9A-F]+)\s+;\s+(\S+)/)
787
elsif ($line =~ /^([0-9A-F]+)\s+;\s+(\S+)/)
798
my $script = $raw_script;
800
$script =~ s/(\w+)/\u\L$1/g;
802
$script_hash{$start} = { 'end' => $end, 'script' => $script };
803
$scripts{$script} = 1;
808
# Adds Common to make sure works with UCD <= 4.0.0
809
$scripts{"Common"} = 1;
811
print $out "/* UnicodeScripts.h */\n";
812
print $out "/* THIS IS A GENERATED FILE. CHANGES WILL BE OVERWRITTEN. */\n";
813
print $out "/* Generated by $0 */\n";
814
print $out "/* Generated from UCD version $v */\n\n";
816
print $out "#ifndef __UNICODE_SCRIPTS_H__\n";
817
print $out "#define __UNICODE_SCRIPTS_H__\n\n";
819
print $out "#include <QtCore/qglobal.h>\n";
820
print $out "#include <klocalizedstring.h>\n\n";
822
print $out "static const char * const constUnicodeScriptList[] =\n";
825
for my $script (sort keys %scripts)
827
$scripts{$script} = $i;
828
print $out qq/ I18N_NOOP("$script"),\n/;
831
print $out " NULL\n";
834
print $out "struct TUnicodeScript\n";
836
print $out " quint32 start,\n";
837
print $out " end;\n";
838
print $out " int scriptIndex; /* index into constUnicodeScriptList */\n";
840
print $out "static const TUnicodeScript constUnicodeScripts[] =\n";
842
for my $start (sort { $a <=> $b } keys %script_hash)
844
printf $out (qq/ { 0x%04X, 0x%04X, \%2d },\n/,
845
$start, $script_hash{$start}->{'end'}, $scripts{$script_hash{$start}->{'script'}});
847
printf $out " { 0x0, 0x0, -1 }\n";
850
print $out "#endif\n\n";