2
# ***** BEGIN LICENSE BLOCK *****
3
# Version: MPL 1.1/GPL 2.0/LGPL 2.1
5
# The contents of this file are subject to the Mozilla Public License Version
6
# 1.1 (the "License"); you may not use this file except in compliance with
7
# the License. You may obtain a copy of the License at
8
# http://www.mozilla.org/MPL/
10
# Software distributed under the License is distributed on an "AS IS" basis,
11
# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12
# for the specific language governing rights and limitations under the
15
# The Original Code is Mozilla Communicator.
17
# The Initial Developer of the Original Code is
18
# Jungshik Shin <jshin@mailaps.org>.
19
# Portions created by the Initial Developer are Copyright (C) 2002, 2003
20
# the Initial Developer. All Rights Reserved.
24
# Alternatively, the contents of this file may be used under the terms of
25
# either the GNU General Public License Version 2 or later (the "GPL"), or
26
# the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
27
# in which case the provisions of the GPL or the LGPL are applicable instead
28
# of those above. If you wish to allow use of your version of this file only
29
# under the terms of either the GPL or the LGPL, and not to allow others to
30
# use your version of this file under the terms of the MPL, indicate your
31
# decision by deleting the provisions above and replace them with the notice
32
# and other provisions required by the GPL or the LGPL. If you do not delete
33
# the provisions above, a recipient may use your version of this file under
34
# the terms of any one of the MPL, the GPL or the LGPL.
36
# ***** END LICENSE BLOCK *****
38
# This script is used to generate precompiled CCMap files.
39
# See bug 180266 for details.
41
# Revised to support extended CCMaps for non-BMP characters : 2003-09-19 (bug 205387)
42
# Revised to support the automatic generation of a macro defining the size
43
# of a CCMap in terms of PRUint16 : 2003-12-11 (bug 224337)
48
use vars qw($fill_fmt $fu_sz);
49
use vars qw($e_mid_offset $e_pg_offset);
51
(@ARGV < 1 ) and usage();
55
my ($ifh, $variable, $class);
56
open $ifh , "< $ifn" or die "Cannot open $ifn";
61
"$0:\n\t VARIABLE $variable is specified in the command line.\n" .
62
"\t The variable name spec. in the input file will be ignored.\n";
68
"$0:\n\t CLASS $class is specified in the command line.\n" .
69
"\t The class spec. in the input file will be ignored.\n";
72
use constant N_PLANES => 17; # BMP + 16 non-BMP planes
73
use constant PLANE_SZ => 0x10000;
74
use constant MID_SZ => PLANE_SZ / 16;
75
use constant PG_SZ => MID_SZ / 16;
77
# Unlike FillInfo() method in Mozilla, let's use 16bit integer
78
# to pack the character coverage/representability. This way,
79
# we can just copy fillinfo to fill up page maps later.
81
FILL_SZ => PLANE_SZ / 16,
82
MID_FILL_SZ => MID_SZ / 16,
83
PG_FILL_SZ => PG_SZ / 16
88
# network byte order short. actually, byte order doesn't matter.
90
$fu_sz = length(pack $fill_fmt, 0); # fillinfo unit size in byte (size of short)
100
my $planes = &read_input(\@fillinfo,$ifh,\%comments);
102
if (!defined($variable) && !defined($comments{'VARIABLE'}))
104
printf STDERR "Variable name is not specified in the cmd line. " .
105
"Neither is it found in the input file.\n\n" ;
109
$variable = $comments{'VARIABLE'} if (! defined($variable));
111
if (!defined($class) && !defined($comments{'CLASS'}))
113
printf STDERR "Class name is not specified in the cmd line. " .
114
"Neither is it found in the input file.\n\n" ;
118
$class = $comments{'CLASS'} if (! defined($class));
120
my $have_non_bmp = 0;
122
# add the non_bmp flag and the bmp ccmap size (default to 0)
123
# at the very beginning if there are non-bmp characters.
124
if ($planes & 0x1fe) {
129
my $plane_idx_offset;
130
foreach my $plane (0 .. ($have_non_bmp ? 16 : 0))
132
my @plane_ccmap = add_plane(\@ccmap, \@fillinfo, $plane);
133
my $size = @plane_ccmap;
134
push @ccmap, @plane_ccmap;
135
if ($plane == 0 && $have_non_bmp) {
137
# add 2 for non-BMP flag and BMP plane size
138
# that have negative indices in C++.
139
$plane_idx_offset = $size + 2;
141
# 'Flag' the offset as holding the plane indices (any negative
143
$pg_flags{$plane_idx_offset} = -1;
144
$pg_flags{$plane_idx_offset + 16} = -1;
146
# plane indices are 16 PRUint32's(not 16 PRUint16's).
147
# In Perl, we assign each PRUint32 two slots in @ccmap (in BE order)
148
my $e_plane_offset = $size + 16 * 2;
150
# set plane indices to the empty plane by default
151
foreach my $i (1 .. 16) {
152
# split PRUint32 into two PRUint16's in BE
153
push @ccmap, $e_plane_offset >> 16;
154
push @ccmap, $e_plane_offset & 0xffff;
156
# add 'the' empty plane;
157
push @ccmap, (0) x 16;
161
# split PRUint32 into two PRUint16's in BE.
162
# subtract 2 for non-BMP flag and BMP plane size
163
# that have negative indices in C++.
164
$ccmap[$plane_idx_offset + ($plane - 1) * 2] = (@ccmap - $size - 2) >> 16;
165
$ccmap[$plane_idx_offset + ($plane - 1) * 2 + 1] = (@ccmap - $size -2) & 0xffff;
170
&print_ccmap(\@ccmap, \%pg_flags, $variable, $class, \%comments, $have_non_bmp);
178
print STDERR <<USAGE;
179
Usage: $0 input_file [variable [class]]
181
The output file "class.ccmap" will be generated with
182
all three cases LE(16/32/64bit)/BE(16bit), BE(32bit), and BE(64bit)
183
put together. 'variable' will be used to name two macros, one for
184
dimensioning the size of a PRUin16[] and the other for the array
187
When 'variable' is omitted, it has to be specified in the input file with
188
the following syntax.
192
When 'class' is omitted, it has to be specified in the input file with
193
the following syntax.
204
my($fillinfo_p, $input, $comments_p) = @_;
205
@$fillinfo_p = (0) x (FILL_SZ * N_PLANES);
207
# init bitfield for plane flags (17bits : BMP + 16 non-BMP planes)
214
/^\s*VARIABLE::\s*([a-zA-Z][a-zA-Z0-9_]*)$/ and
215
$comments_p->{'VARIABLE'} = $1,
218
($comments_p->{'CLASS'} = $_) =~ s/^\s*CLASS::\s*([a-zA-Z0-9_]+).*$/$1/,
220
/^\s*DESCRIPTION::/ and
221
($comments_p->{'DESC'} = $_) =~ s/^\s*DESCRIPTION::\s*//, next;
223
($comments_p->{'FILE'} = $_) =~ s/^\s*FILE::\s*//, next;
225
next unless /^\s*0[Xx][0-9A-Fa-f]{4}/;
228
my ($u, $comment) = split /\s+/, $1, 2;
231
next if /^0x.*[^0-9a-f]+.*/;
234
if ( 0xd800 <= $usv && $usv <= 0xdfff || # surrogate code points
236
printf STDERR "Invalid input $u at %4d\n", $lc;
239
$fillinfo_p->[($usv >> 4)] |= (1 << ($usv & 0x0f));
240
# printf STDERR "input %s(%04x) \@line %d : put %04x @ %04x\n",
241
# $u,$usv, $lc, (1 << ($usv & 0x0f)), ($usv >> 4) & 0xfff;
243
# turn on plane flags
244
$planes |= (1 << ($usv >> 16));
246
my $key = sprintf("0X%06X", $usv);
247
$comments_p->{$key} = "";
249
# Remove '/*' and '*/' (C style comment) or '//' (C++ style comment)
250
# or ':' and store only the textual content of the comment.
251
if (defined($comment)) {
252
($comments_p->{$key} = $comment)
254
(?:/\*|//|:)? # '/*', '//' or ':' or NULL. Do not store.
255
\s* # zero or more of white space(s)
256
([^*]+) # one or more of non-white space(s).Store it
257
# in $1 for the reference in replace part.
258
\s* # zero or more of white space(s)
259
(?:\*/)? # '*/' or NONE. Do not store
260
!$1!sx # replace the whole match with $1 stored above.
269
my($ccmap_p, $f_pg_offset) = @_;
270
# add a full page if not yet added.
271
if (! $f_pg_offset) {
272
$f_pg_offset = @$ccmap_p;
273
push @$ccmap_p, (0xffff) x 16;
275
# add the full mid-pointer array with all the pointers pointing to the full page.
276
my $f_mid_offset = @$ccmap_p;
277
push @$ccmap_p, ($f_pg_offset) x 16;
278
return ($f_mid_offset, $f_pg_offset);
283
my($ccmap_p, $mid) = @_;
284
my $mid_offset = @$ccmap_p;
285
$ccmap_p->[$mid] = $mid_offset;
286
#by default, all mid-pointers point to the empty page.
287
push @$ccmap_p, ($e_pg_offset) x 16;
293
my ($full_ccmap_p, $fillinfo_p, $plane) = @_;
294
# my @ccmap = @$ccmap_p;
295
my @ccmap = (); # plane ccmap
296
my(@fillinfo) = splice @$fillinfo_p, 0, FILL_SZ;
297
# convert 4096(FILL_SZ) 16bit integers to a string of 4096 * $fu_sz
299
my($plane_str) = pack $fill_fmt x FILL_SZ, @fillinfo;
302
if ($plane_str eq "\0" x ($fu_sz * FILL_SZ)) {
303
# for non-BMP plane, the default empty plane ccmap would work.
304
# for BMP, we need 'self-referring' folded CCMap (the smallest CCMap)
305
push @ccmap, (0) x 16 if (!$plane);
309
#get all upper pointers to point at empty mid pointers
310
push @ccmap, ($e_mid_offset) x 16;
311
#get all mid-pointers to point at empty page.
312
push @ccmap, ($e_pg_offset) x 16;
313
push @ccmap, (0) x 16; # empty pg
315
my $f_mid_offset = 0;
318
foreach my $mid (0 .. 15)
320
my(@mid_fill) = splice @fillinfo, 0, MID_FILL_SZ;
321
# convert 256(MID_FILL_SZ) 16bit integers to a string of 256 * $fu_sz
323
my($mid_str) = pack $fill_fmt x MID_FILL_SZ, @mid_fill;
325
# for an empty mid, upper-pointer is already pointing to the empty mid.
326
next if ($mid_str eq "\0" x ($fu_sz * MID_FILL_SZ));
328
# for a full mid, add full mid if necessary.
329
if ($mid_str eq "\xff" x ($fu_sz * MID_FILL_SZ)) {
330
($f_mid_offset, $f_pg_offset) =
331
add_full_mid(\@ccmap, $f_pg_offset) unless ($f_mid_offset);
332
$ccmap[$mid] = $f_mid_offset;
336
my $mid_offset = add_new_mid(\@ccmap,$mid);
338
foreach my $pg (0 .. 15) {
339
my(@pg_fill) = splice @mid_fill, 0, PG_FILL_SZ;
340
my($pg_str) = pack $fill_fmt x PG_FILL_SZ, @pg_fill;
342
# for an empty pg, mid-pointer is already pointing to the empty page.
343
next if ($pg_str eq "\x0" x ($fu_sz * PG_FILL_SZ));
345
# for a full pg, add the full pg if necessary.
346
# and set the mid-pointer to the full pg offset.
347
if ($pg_str eq "\xff" x ($fu_sz * PG_FILL_SZ)) {
348
if (! $f_pg_offset) {
349
$f_pg_offset = @ccmap;
350
#for the full pg, endianess and ALU size are immaterial.
351
push @ccmap, (0xffff) x 16;
353
$ccmap[$mid_offset + $pg] = $f_pg_offset;
357
$ccmap[$mid_offset + $pg] = @ccmap;
359
# 'Flag' the offset as the beginning of a page with actual data as
360
# opposed to pointer sections.
361
$pg_flags{(scalar @$full_ccmap_p) + (scalar @ccmap)} = @ccmap;
363
push @ccmap, @pg_fill;
371
my($ccmap_p,$pg_flags_p, $variable, $class, $comments_p, $is_ext) = @_;
374
my $ofn = $class . ($is_ext ? ".x-ccmap" : ".ccmap");
376
open OUT, "> $ofn" or
377
die "cannot open $ofn for output\n";
379
print OUT print_preamble($variable, $class);
382
# defined ($comments_p->{'CLASS'}) and
383
# print OUT " CLASS:: $comments_p->{'CLASS'}\n";
384
print OUT " VARIABLE:: $variable\n";
385
print OUT " CLASS:: $class\n";
386
defined ($comments_p->{'DESC'}) and
387
print OUT " DESCRIPTION:: $comments_p->{'DESC'}\n";
388
defined ($comments_p->{'FILE'}) and
389
print OUT " FILE:: $comments_p->{'FILE'}\n";
393
for my $key (sort keys %$comments_p) {
394
next if ($key !~ /^0X/);
395
printf OUT " %s : %s\n", $key, $comments_p->{$key};
401
my(@idxlist, @int16toint32);
403
# When CCMap is accessed, (PRUint16 *) is cast to
404
# the pointer type of the ALU of a machine.
405
# For little endian machines, the size of the ALU
406
# doesn't matter (16, 32, 64). For Big endian
407
# machines with 32/64 bit ALU, two/four 16bit words
408
# have to be rearranged to be interpreted correctly
409
# as 32bit or 64bit integers with the 16bit word
410
# at the lowest address taking the highest place value.
411
# This shuffling is NOT necessary for the upper pointer section
412
# and mid-pointer sections.
414
# If non-BMP characters are presente, 16 plane indices
415
# (32bit integers stored in two 16bit shorts in
416
# BE order) have to be treated differently based on the
417
# the endianness as well.
419
# For BMP-only CCMap, 16BE CCMap is identical to LE CCMaps.
420
# With non-BMP characters present, to avoid the misalignment on 64bit
421
# machines, we have to store the ccmap flag (indicating whether the map
422
# is extended or not) and the BMP map size in two 32bit integers instead of
423
# two 16bit integers (bug 225340)
424
my @fmts = $is_ext ? ("64LE", "LE", "16BE", "32BE", "64BE") : ("LE", "32BE", "64BE") ;
425
foreach my $fmt (@fmts)
431
@idxlist = (0, 1, 2, 3);
432
@int16toint32 = (1, 0, 3, 2);
433
print OUT "#if (defined(IS_LITTLE_ENDIAN) && ALU_SIZE == 64)\n" .
434
"// Precompiled CCMap for Little Endian(64bit)\n";
435
printf OUT "#define ${variable}_SIZE %d\n", scalar @$ccmap_p + 2;
436
printf OUT "#define ${variable}_INITIALIZER \\\n";
437
printf OUT "/* EXTFLG */ 0x%04X,0x0000,0x%04X,0x0000, \\\n",
438
$ccmap_p->[0], $ccmap_p->[1];
442
@idxlist = (0, 1, 2, 3);
443
@int16toint32 = (1, 0, 3, 2);
445
"#elif defined(IS_LITTLE_ENDIAN)\n" .
446
"// Precompiled CCMap for Little Endian(16/32bit) \n" :
447
"#if (defined(IS_LITTLE_ENDIAN) || ALU_SIZE == 16)\n" .
448
"// Precompiled CCMap for Little Endian(16/32/64bit)\n" .
449
"// and Big Endian(16bit)\n";
450
printf OUT "#define ${variable}_SIZE %d\n", scalar @$ccmap_p;
451
printf OUT "#define ${variable}_INITIALIZER \\\n";
453
printf OUT "/* EXTFLG */ 0x%04X,0x%04X, \\\n",
454
$ccmap_p->[0], $ccmap_p->[1];
459
@idxlist = (0, 1, 2, 3);
460
@int16toint32 = (0, 1, 2, 3);
461
print OUT "#elif (ALU_SIZE == 16)\n" .
462
"// Precompiled CCMap for Big Endian(16bit)\n";
463
printf OUT "#define ${variable}_SIZE %d\n", scalar @$ccmap_p;
464
printf OUT "#define ${variable}_INITIALIZER \\\n";
465
printf OUT "/* EXTFLG */ 0x%04X,0x%04X, \\\n",
466
$ccmap_p->[0], $ccmap_p->[1];
470
@idxlist = (1, 0, 3, 2);
471
@int16toint32 = (0, 1, 2, 3);
472
print OUT "#elif (ALU_SIZE == 32)\n" .
473
"// Precompiled CCMap for Big Endian(32bit)\n";
474
printf OUT "#define ${variable}_SIZE %d\n", scalar @$ccmap_p;
475
printf OUT "#define ${variable}_INITIALIZER \\\n";
477
printf OUT "/* EXTFLG */ 0x%04X,0x%04X, \\\n",
478
$ccmap_p->[0], $ccmap_p->[1];
483
@idxlist = (3, 2, 1, 0);
484
@int16toint32 = (0, 1, 2, 3);
485
print OUT "#elif (ALU_SIZE == 64)\n" .
486
"// Precompiled CCMap for Big Endian(64bit)\n";
487
printf OUT "#define ${variable}_SIZE %d\n", scalar @$ccmap_p +
489
printf OUT "#define ${variable}_INITIALIZER \\\n";
491
printf OUT "/* EXTFLG */ 0x0000,0x%04X,0x0000,0x%04X, \\\n",
492
$ccmap_p->[0], $ccmap_p->[1];
498
$offset = $is_ext ? 2 : 0;
500
while ($offset < @$ccmap_p) {
501
printf OUT "/* %06x */ ", $offset - ($is_ext ? 2 : 0);
503
for my $j (defined($pg_flags_p->{$offset}) ?
504
($pg_flags_p->{$offset} > 0 ?
505
@idxlist : @int16toint32) : (0,1,2,3)) {
506
printf OUT "0x%04X,", $ccmap_p->[$offset + $i * 4 + $j];
508
print OUT " \\\n " if $i==1;
510
if ($offset + 16 < @$ccmap_p) {print OUT " \\\n"; }
518
#error "We don't support this architecture."
529
my($variable, $class) = @_;
531
/* ***** BEGIN LICENSE BLOCK *****
532
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
534
* The contents of this file are subject to the Mozilla Public License Version
535
* 1.1 (the "License"); you may not use this file except in compliance with
536
* the License. You may obtain a copy of the License at
537
* http://www.mozilla.org/MPL/
539
* Software distributed under the License is distributed on an "AS IS" basis,
540
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
541
* for the specific language governing rights and limitations under the
544
* The Original Code is mozilla.org code.
546
* The Initial Developer of the Original Code is
547
* Jungshik Shin <jshin\@mailaps.org>
548
* Portions created by the Initial Developer are Copyright (C) 2003
549
* the Initial Developer. All Rights Reserved.
553
* Alternatively, the contents of this file may be used under the terms of
554
* either the GNU General Public License Version 2 or later (the "GPL"), or
555
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
556
* in which case the provisions of the GPL or the LGPL are applicable instead
557
* of those above. If you wish to allow use of your version of this file only
558
* under the terms of either the GPL or the LGPL, and not to allow others to
559
* use your version of this file under the terms of the MPL, indicate your
560
* decision by deleting the provisions above and replace them with the notice
561
* and other provisions required by the GPL or the LGPL. If you do not delete
562
* the provisions above, a recipient may use your version of this file under
563
* the terms of any one of the MPL, the GPL or the LGPL.
565
* ***** END LICENSE BLOCK ***** */
567
/*========================================================
568
This file contains a precompiled CCMap for a class of Unicode
569
characters ($class) to be identified quickly by Mozilla.
570
It was generated by ccmapbin.pl which you can find under
571
mozilla/intl/unicharutil/tools.
573
Enumerated below are characters included in the precompiled CCMap
574
which is human-readable but not so human-friendly. If you
575
needs to modify the list of characters belonging to "$class",
576
you have to make a new file (with the name of your choice)
577
listing characters (one character per line) you want to put
578
into "$class" in the format
582
In addition, the input file can have the following optional lines that
587
DESCRIPTION:: description of a character class
588
FILE:: mozilla source file to include the output file
591
Then, run the following in the current directory.
593
perl ccmapbin.pl input_file [$variable [$class]]
595
which will generate $class.ccmap (or $class.x-ccmap if the ccmap
596
includes non-BMP characters.). $variable is used as the prefix
597
in macros for the array initializer and the array size.
599
(see bug 180266, bug 167136, and bug 224337)