~ubuntu-branches/ubuntu/saucy/gucharmap/saucy-proposed

« back to all changes in this revision

Viewing changes to .pc/10_perl_5_18.patch/gucharmap/gen-guch-unicode-tables.pl

Committer: Package Import Robot
Author(s): Emilio Pozuelo Monfort
Date: 2013-05-28 22:27:42 UTC
mfrom: (1.1.52) (2.4.11 sid)
Revision ID: package-import@ubuntu.com-20130528222742-54p7tpi7qdt4rdbt

http://bugs.debian.org/710038

* debian/patches/10_perl_5_18.patch:
+ Fix FTBFS with perl 5.18. Thanks to Roderich Schupp. Closes: #710038.

files added:
.pc/10_perl_5_18.patch

.pc/10_perl_5_18.patch/gucharmap

.pc/10_perl_5_18.patch/gucharmap/gen-guch-unicode-tables.pl

.pc/unicode-source-generation.patch

.pc/unicode-source-generation.patch/gucharmap

.pc/unicode-source-generation.patch/gucharmap/gen-guch-unicode-tables.pl

debian/patches/10_perl_5_18.patch

debian/patches/unicode-source-generation.patch

po/tg.po

files modified:
.pc/applied-patches

ChangeLog

Makefile.am

Makefile.in

NEWS

aclocal.m4

configure

configure.ac

debian/changelog

debian/control

debian/control.in

debian/libgucharmap-2-90-7.symbols

debian/libgucharmap-2-90-dev.install

debian/patches/99_ltmain_as-needed.patch

debian/patches/series

debian/rules

debian/watch

docs/Makefile.in

docs/reference/Makefile.in

docs/reference/html/ch01.html

docs/reference/html/gucharmap-gucharmap-chapters-model.html

docs/reference/tmpl/gucharmap-chapters-model.sgml

gucharmap.desktop.in.in

gucharmap/Makefile.am

gucharmap/Makefile.in

gucharmap/gen-guch-unicode-tables.pl

gucharmap/gucharmap-chapters-model.c

gucharmap/gucharmap-chapters-model.h

gucharmap/gucharmap.h

gucharmap/main.c

help/Makefile.in

help/gl/gl.po

help/pt_BR/pt_BR.po

pixmaps/Makefile.in

po/LINGUAS

po/ar.po

po/as.po

po/be.po

po/bg.po

po/ca.po

po/ca@valencia.po

po/cs.po

po/da.po

po/de.po

po/el.po

po/es.po

po/et.po

po/eu.po

po/fa.po

po/fr.po

po/gl.po

po/gu.po

po/he.po

po/hi.po

po/hu.po

po/id.po

po/it.po

po/ja.po

po/kn.po

po/ko.po

po/lt.po

po/lv.po

po/ml.po

po/mr.po

po/nb.po

po/nl.po

po/or.po

po/pa.po

po/pl.po

po/pt.po

po/pt_BR.po

po/ru.po

po/sk.po

po/sl.po

po/sr.po

po/sr@latin.po

po/ta.po

po/te.po

po/th.po

po/ug.po

po/uk.po

po/zh_HK.po

po/zh_TW.po

Show diffs side-by-side

added added

removed removed

.pc/10_perl_5_18.patch/gucharmap/gen-guch-unicode-tables.pl

#!/usr/bin/perl -w

# $Id$

# generates in the current directory:

# - unicode-blocks.h

# - unicode-names.h

# - unicode-nameslist.h

# - unicode-unihan.h

# - unicode-categories.h

# - unicode-scripts.h

# - unicode-versions.h

# usage: ./gen-guch-unicode-tables.pl UNICODE-VERSION DIRECTORY

# where DIRECTORY contains UnicodeData.txt Unihan_Readings.txt.bz2 NamesList.txt Blocks.txt Scripts.txt

# NOTE! Some code copied from glib/glib/gen-unicode-tables.pl; keep in sync!

use strict;

use vars ('$UNZIP', '$ICONV');

# if these things aren't in your path you can put full paths to them here

$UNZIP = 'bunzip2';

$ICONV = 'iconv';

sub process_unicode_data_txt ($);

sub process_unihan_zip ($);

sub process_nameslist_txt ($);

sub process_blocks_txt ($);

sub process_scripts_txt ($);

sub process_versions_txt ($);

$| = 1; # flush stdout buffer

if (@ARGV != 2)

{

$0 =~ s@.*/@@;

die <<EOF

Usage: $0 UNICODE-VERSION DIRECTORY

DIRECTORY should contain the following Unicode data files:

UnicodeData.txt Unihan_Readings.txt.bz2 NamesList.txt Blocks.txt Scripts.txt

which can be found at http://www.unicode.org/Public/UNIDATA/

EOF

}

my ($unicodedata_txt, $unihan_zip, $nameslist_txt, $blocks_txt, $scripts_txt, $versions_txt);

my $v = $ARGV[0];

my $d = $ARGV[1];

opendir (my $dir, $d) or die "Cannot open Unicode data dir $d: $!\n";

for my $f (readdir ($dir))

{

$unicodedata_txt = "$d/$f" if ($f =~ /UnicodeData.*\.txt/);

$unihan_zip = "$d/$f" if ($f =~ /Unihan_Readings\.txt\.bz2/);

$nameslist_txt = "$d/$f" if ($f =~ /NamesList.*\.txt/);

$blocks_txt = "$d/$f" if ($f =~ /Blocks.*\.txt/);

$scripts_txt = "$d/$f" if ($f =~ /Scripts.*\.txt/);

$versions_txt = "$d/$f" if ($f =~ /DerivedAge.*\.txt/);

}

defined $unicodedata_txt or die "Did not find $d/UnicodeData.txt";

defined $unihan_zip or die "Did not find $d/Unihan_Readings.txt.bz2";

defined $nameslist_txt or die "Did not find $d/NamesList.txt";

defined $blocks_txt or die "Did not find $d/Blocks.txt";

defined $scripts_txt or die "Did not find $d/Scripts.txt";

defined $versions_txt or die "Did not find $d/DerivedAge.txt";

process_unicode_data_txt ($unicodedata_txt);

process_nameslist_txt ($nameslist_txt);

process_blocks_txt ($blocks_txt);

process_scripts_txt ($scripts_txt);

process_versions_txt ($versions_txt);

process_unihan_zip ($unihan_zip);

exit;

#------------------------#

sub process_unicode_data_txt ($)

{

my ($unicodedata_txt) = @_;

# part 1: names

open (my $unicodedata, $unicodedata_txt) or die;

open (my $out, "> unicode-names.h") or die;

print "processing $unicodedata_txt...";

print $out "/* unicode-names.h */\n";

print $out "/* THIS IS A GENERATED FILE. CHANGES WILL BE OVERWRITTEN. */\n";

print $out "/* Generated by $0 */\n";

print $out "/* Generated from UCD version $v */\n\n";

100

print $out "#ifndef UNICODE_NAMES_H\n";

101

print $out "#define UNICODE_NAMES_H\n\n";

102

103

print $out "#include <glib.h>\n\n";

104

print $out "#include <glib/gi18n-lib.h>\n\n";

105

106

my @unicode_pairs;

107

my %names;

108

109

while (my $line = <$unicodedata>)

110

{

111

chomp $line;

112

$line =~ /^([^;]+);([^;]+)/ or die;

113

114

my $hex = $1;

115

my $name = $2;

116

117

$names{$name} = 1;

118

push @unicode_pairs, [$hex, $name];

119

}

120

121

print $out "static const char unicode_names_strings[] = \\\n";

122

123

my $offset = 0;

124

125

foreach my $name (sort keys %names) {

126

print $out " \"$name\\0\"\n";

127

$names{$name} = $offset;

128

$offset += length($name) + 1;

129

}

130

131

undef $offset;

132

133

print $out ";\n";

134

135

print $out "typedef struct _UnicodeName UnicodeName;\n\n";

136

137

print $out "static const struct _UnicodeName\n";

138

print $out "{\n";

139

print $out " gunichar index;\n";

140

print $out " guint32 name_offset;\n";

141

print $out "} \n";

142

print $out "unicode_names[] =\n";

143

print $out "{\n";

144

145

my $first_line = 1;

146

147

foreach my $pair (@unicode_pairs) {

148

if (!$first_line) {

149

print $out ",\n";

150

} else {

151

$first_line = 0;

152

}

153

154

my ($hex, $name) = @{$pair};

155

my $offset = $names{$name};

156

print $out " {0x$hex, $offset}";

157

}

158

159

print $out "\n};\n\n";

160

161

print $out <<EOT;

162

static inline const char * unicode_name_get_name(const UnicodeName *entry)

163

{

164

guint32 offset = entry->name_offset;

165

return unicode_names_strings + offset;

166

}

167

168

EOT

169

170

print $out "#endif /* #ifndef UNICODE_NAMES_H */\n";

171

172

undef %names;

173

undef @unicode_pairs;

174

175

close ($unicodedata);

176

close ($out);

177

178

# part 2: categories

179

180

open ($unicodedata, $unicodedata_txt) or die;

181

open ($out, "> unicode-categories.h") or die;

182

183

# Map general category code onto symbolic name.

184

my %mappings =

185

(

186

# Normative.

187

'Lu' => "G_UNICODE_UPPERCASE_LETTER",

188

'Ll' => "G_UNICODE_LOWERCASE_LETTER",

189

'Lt' => "G_UNICODE_TITLECASE_LETTER",

190

'Mn' => "G_UNICODE_NON_SPACING_MARK",

191

'Mc' => "G_UNICODE_COMBINING_MARK",

192

'Me' => "G_UNICODE_ENCLOSING_MARK",

193

'Nd' => "G_UNICODE_DECIMAL_NUMBER",

194

'Nl' => "G_UNICODE_LETTER_NUMBER",

195

'No' => "G_UNICODE_OTHER_NUMBER",

196

'Zs' => "G_UNICODE_SPACE_SEPARATOR",

197

'Zl' => "G_UNICODE_LINE_SEPARATOR",

198

'Zp' => "G_UNICODE_PARAGRAPH_SEPARATOR",

199

'Cc' => "G_UNICODE_CONTROL",

200

'Cf' => "G_UNICODE_FORMAT",

201

'Cs' => "G_UNICODE_SURROGATE",

202

'Co' => "G_UNICODE_PRIVATE_USE",

203

'Cn' => "G_UNICODE_UNASSIGNED",

204

205

# Informative.

206

'Lm' => "G_UNICODE_MODIFIER_LETTER",

207

'Lo' => "G_UNICODE_OTHER_LETTER",

208

'Pc' => "G_UNICODE_CONNECT_PUNCTUATION",

209

'Pd' => "G_UNICODE_DASH_PUNCTUATION",

210

'Ps' => "G_UNICODE_OPEN_PUNCTUATION",

211

'Pe' => "G_UNICODE_CLOSE_PUNCTUATION",

212

'Pi' => "G_UNICODE_INITIAL_PUNCTUATION",

213

'Pf' => "G_UNICODE_FINAL_PUNCTUATION",

214

'Po' => "G_UNICODE_OTHER_PUNCTUATION",

215

'Sm' => "G_UNICODE_MATH_SYMBOL",

216

'Sc' => "G_UNICODE_CURRENCY_SYMBOL",

217

'Sk' => "G_UNICODE_MODIFIER_SYMBOL",

218

'So' => "G_UNICODE_OTHER_SYMBOL"

219

);

220

221

# these shouldn't be -1

222

my ($codepoint, $last_codepoint, $start_codepoint) = (-999, -999, -999);

223

224

my ($category, $last_category) = ("G_FAKE1", "G_FAKE2");

225

my ($started_range, $finished_range) = (undef, undef);

226

227

print $out "/* unicode-categories.h */\n";

228

print $out "/* THIS IS A GENERATED FILE. CHANGES WILL BE OVERWRITTEN. */\n";

229

print $out "/* Generated by $0 */\n";

230

print $out "/* Generated from UCD version $v */\n\n";

231

232

print $out "#ifndef UNICODE_CATEGORIES_H\n";

233

print $out "#define UNICODE_CATEGORIES_H\n\n";

234

235

print $out "#include <glib.h>\n\n";

236

237

print $out "typedef struct _UnicodeCategory UnicodeCategory;\n\n";

238

239

print $out "static const struct _UnicodeCategory\n";

240

print $out "{\n";

241

print $out " gunichar start;\n";

242

print $out " gunichar end;\n";

243

print $out " GUnicodeType category;\n";

244

print $out "}\n";

245

print $out "unicode_categories[] =\n";

246

print $out "{\n";

247

248

while (my $line = <$unicodedata>)

249

{

250

$line =~ /^([0-9A-F]*);([^;]*);([^;]*);/ or die;

251

my $codepoint = hex ($1);

252

my $name = $2;

253

my $category = $mappings{$3};

254

255

if ($finished_range

256

or ($category ne $last_category)

257

or (not $started_range and $codepoint != $last_codepoint + 1))

258

{

259

if ($last_codepoint >= 0) {

260

printf $out (" { 0x%4.4X, 0x%4.4X, \%s },\n", $start_codepoint, $last_codepoint, $last_category);

261

}

262

263

$start_codepoint = $codepoint;

264

}

265

266

if ($name =~ /^<.*First>$/) {

267

$started_range = 1;

268

$finished_range = undef;

269

}

270

elsif ($name =~ /^<.*Last>$/) {

271

$started_range = undef;

272

$finished_range = 1;

273

}

274

elsif ($finished_range) {

275

$finished_range = undef;

276

}

277

278

$last_codepoint = $codepoint;

279

$last_category = $category;

280

}

281

printf $out (" { 0x%4.4X, 0x%4.4X, \%s },\n", $start_codepoint, $last_codepoint, $last_category);

282

283

print $out "};\n\n";

284

285

print $out "#endif /* #ifndef UNICODE_CATEGORIES_H */\n";

286

287

close ($out);

288

print " done.\n";

289

}

290

291

#------------------------#

292

293

# XXX should do kFrequency too

294

sub process_unihan_zip ($)

295

{

296

my ($unihan_zip) = @_;

297

298

open (my $unihan, "$UNZIP -c $unihan_zip |") or die;

299

open (my $out, "> unicode-unihan.h") or die;

300

301

print "processing $unihan_zip";

302

303

print $out "/* unicode-unihan.h */\n";

304

print $out "/* THIS IS A GENERATED FILE. CHANGES WILL BE OVERWRITTEN. */\n";

305

print $out "/* Generated by $0 */\n";

306

print $out "/* Generated from UCD version $v */\n\n";

307

308

print $out "#ifndef UNICODE_UNIHAN_H\n";

309

print $out "#define UNICODE_UNIHAN_H\n\n";

310

311

print $out "#include <glib.h>\n\n";

312

313

print $out "typedef struct _Unihan Unihan;\n\n";

314

315

print $out "static const struct _Unihan\n";

316

print $out "{\n";

317

print $out " gunichar index;\n";

318

print $out " gint32 kDefinition;\n";

319

print $out " gint32 kCantonese;\n";

320

print $out " gint32 kMandarin;\n";

321

print $out " gint32 kTang;\n";

322

print $out " gint32 kKorean;\n";

323

print $out " gint32 kJapaneseKun;\n";

324

print $out " gint32 kJapaneseOn;\n";

325

print $out "} \n";

326

print $out "unihan[] =\n";

327

print $out "{\n";

328

329

my @strings;

330

my $offset = 0;

331

332

my $wc = 0;

333

my ($kDefinition, $kCantonese, $kMandarin, $kTang, $kKorean, $kJapaneseKun, $kJapaneseOn);

334

335

my $i = 0;

336

while (my $line = <$unihan>)

337

{

338

chomp $line;

339

$line =~ /^U\+([0-9A-F]+)\s+([^\s]+)\s+(.+)$/ or next;

340

341

my $new_wc = hex ($1);

342

my $field = $2;

343

344

my $value = $3;

345

$value =~ s/\\/\\\\/g;

346

$value =~ s/\"/\\"/g;

347

348

if ($new_wc != $wc)

349

{

350

if (defined $kDefinition or defined $kCantonese or defined $kMandarin

351

or defined $kTang or defined $kKorean or defined $kJapaneseKun

352

or defined $kJapaneseOn)

353

{

354

printf $out (" { 0x%04X, \%d, \%d, \%d, \%d, \%d, \%d, \%d },\n",

355

$wc,

356

(defined($kDefinition) ? $kDefinition : -1),

357

(defined($kCantonese) ? $kCantonese: -1),

358

(defined($kMandarin) ? $kMandarin : -1),

359

(defined($kTang) ? $kTang : -1),

360

(defined($kKorean) ? $kKorean : -1),

361

(defined($kJapaneseKun) ? $kJapaneseKun : -1),

362

(defined($kJapaneseOn) ? $kJapaneseOn : -1));

363

}

364

365

$wc = $new_wc;

366

367

undef $kDefinition;

368

undef $kCantonese;

369

undef $kMandarin;

370

undef $kTang;

371

undef $kKorean;

372

undef $kJapaneseKun;

373

undef $kJapaneseOn;

374

}

375

376

for my $f qw(kDefinition kCantonese kMandarin

377

kTang kKorean kJapaneseKun kJapaneseOn) {

378

379

if ($field eq $f) {

380

push @strings, $value;

381

my $last_offset = $offset;

382

$offset += length($value) + 1;

383

$value = $last_offset;

384

last;

385

}

386

}

387

388

if ($field eq "kDefinition") {

389

$kDefinition = $value;

390

}

391

elsif ($field eq "kCantonese") {

392

$kCantonese = $value;

393

}

394

elsif ($field eq "kMandarin") {

395

$kMandarin = $value;

396

}

397

elsif ($field eq "kTang") {

398

$kTang = $value;

399

}

400

elsif ($field eq "kKorean") {

401

$kKorean = $value;

402

}

403

elsif ($field eq "kJapaneseKun") {

404

$kJapaneseKun = $value;

405

}

406

elsif ($field eq "kJapaneseOn") {

407

$kJapaneseOn = $value;

408

}

409

410

if ($i++ % 32768 == 0) {

411

print ".";

412

}

413

}

414

415

print $out "};\n\n";

416

417

print $out "static const char unihan_strings[] = \\\n";

418

419

for my $s (@strings) {

420

print $out " \"$s\\0\"\n";

421

}

422

print $out ";\n\n";

423

424

print $out "static const Unihan *_get_unihan (gunichar uc)\n;";

425

426

for my $name qw(kDefinition kCantonese kMandarin

427

kTang kKorean kJapaneseKun kJapaneseOn) {

428

print $out <<EOT;

429

430

static inline const char * unihan_get_$name (const Unihan *uh)

431

{

432

gint32 offset = uh->$name;

433

if (offset == -1)

434

return NULL;

435

return unihan_strings + offset;

436

}

437

438

G_CONST_RETURN gchar *

439

gucharmap_get_unicode_$name (gunichar uc)

440

{

441

const Unihan *uh = _get_unihan (uc);

442

if (uh == NULL)

443

return NULL;

444

else

445

return unihan_get_$name (uh);

446

}

447

448

EOT

449

}

450

451

print $out "#endif /* #ifndef UNICODE_UNIHAN_H */\n";

452

453

close ($unihan);

454

close ($out);

455

456

print " done.\n";

457

}

458

459

#------------------------#

460

461

# $nameslist_hash =

462

# {

463

# 0x0027 => { '=' => {

464

# 'index' => 30,

465

# 'values' => [ 'APOSTROPHE-QUOTE', 'APL quote' ]

466

# }

467

# '*' => {

468

# 'index' => 50,

469

# 'values' => [ 'neutral (vertical) glyph with mixed usage',

470

# '2019 is preferred for apostrophe',

471

# 'preferred characters in English for paired quotation marks are 2018 & 2019'

472

# ]

473

# }

474

# # etc

475

# }

476

# # etc

477

# };

478

479

480

sub print_names_list

481

{

482

my ($out, $nameslist_hash, $token, $variable_name) = @_;

483

484

print $out "static const char ", $variable_name, "_strings[] = \n";

485

486

my @names_pairs;

487

my %names_offsets;

488

my $offset = 0;

489

490

for my $wc (sort {$a <=> $b} keys %{$nameslist_hash})

491

{

492

next if not exists $nameslist_hash->{$wc}->{$token};

493

for my $value (@{$nameslist_hash->{$wc}->{$token}->{'values'}}) {

494

push @names_pairs, [$wc, $value];

495

next if exists $names_offsets{$value};

496

497

$names_offsets{$value} = $offset;

498

$offset += length($value) + 1;

499

500

my $printvalue = $value;

501

$printvalue =~ s/\\/\\\\/g;

502

$printvalue =~ s/\"/\\"/g;

503

504

printf $out (qq/ "\%s\\0"\n/, $printvalue);

505

}

506

}

507

508

print $out " ;\n\n";

509

510

print $out "static const UnicharStringIndex ", $variable_name, "[] = \n";

511

print $out "{\n";

512

foreach my $pair (@names_pairs) {

513

my ($wc, $value) = @{$pair};

514

printf $out (qq/ { 0x%04X, %d },\n/, $wc, $names_offsets{$value});

515

}

516

print $out " { (gunichar)(-1), 0 } /* end marker */ \n";

517

print $out "};\n\n";

518

}

519

520

sub process_nameslist_txt ($)

521

{

522

my ($nameslist_txt) = @_;

523

524

open (my $nameslist, $nameslist_txt) or die;

525

526

print "processing $nameslist_txt...";

527

528

my ($equal_i, $ex_i, $star_i, $pound_i, $colon_i) = (0, 0, 0, 0, 0);

529

my $wc = 0;

530

531

my $nameslist_hash;

532

my $in_multiline_comment = 0;

533

534

while (my $line = <$nameslist>)

535

{

536

if ($in_multiline_comment && $line =~ /^\t/)

537

{

538

next;

539

}

540

541

chomp ($line);

542

543

$in_multiline_comment = 0;

544

545

if ($line =~ /^@\+/)

546

{

547

$in_multiline_comment = 1;

548

next;

549

}

550

elsif ($line =~ /^@/)

551

{

552

next;

553

}

554

elsif ($line =~ /^([0-9A-F]+)/)

555

{

556

$wc = hex ($1);

557

}

558

elsif ($line =~ /^\s+=\s+(.+)$/)

559

{

560

my $value = $1;

561

562

if (not defined $nameslist_hash->{$wc}->{'='}->{'index'}) {

563

$nameslist_hash->{$wc}->{'='}->{'index'} = $equal_i;

564

}

565

push (@{$nameslist_hash->{$wc}->{'='}->{'values'}}, $value);

566

567

$equal_i++;

568

}

569

elsif ($line =~ /^\s+\*\s+(.+)$/)

570

{

571

my $value = $1;

572

573

if (not defined $nameslist_hash->{$wc}->{'*'}->{'index'}) {

574

$nameslist_hash->{$wc}->{'*'}->{'index'} = $star_i;

575

}

576

push (@{$nameslist_hash->{$wc}->{'*'}->{'values'}}, $value);

577

578

$star_i++;

579

}

580

elsif ($line =~ /^\s+#\s+(.+)$/)

581

{

582

my $value = $1;

583

584

if (not defined $nameslist_hash->{$wc}->{'#'}->{'index'}) {

585

$nameslist_hash->{$wc}->{'#'}->{'index'} = $pound_i;

586

}

587

push (@{$nameslist_hash->{$wc}->{'#'}->{'values'}}, $value);

588

589

$pound_i++;

590

}

591

elsif ($line =~ /^\s+:\s+(.+)$/)

592

{

593

my $value = $1;

594

595

if (not defined $nameslist_hash->{$wc}->{':'}->{'index'}) {

596

$nameslist_hash->{$wc}->{':'}->{'index'} = $colon_i;

597

}

598

push (@{$nameslist_hash->{$wc}->{':'}->{'values'}}, $value);

599

600

$colon_i++;

601

}

602

elsif ($line =~ /^\s+x\s+.*?([0-9A-F]{4,6})\)$/) # this one is different

603

{

604

my $value = hex ($1);

605

606

if (not defined $nameslist_hash->{$wc}->{'x'}->{'index'}) {

607

$nameslist_hash->{$wc}->{'x'}->{'index'} = $ex_i;

608

}

609

push (@{$nameslist_hash->{$wc}->{'x'}->{'values'}}, $value);

610

611

$ex_i++;

612

}

613

}

614

615

close ($nameslist);

616

617

open (my $out, "> unicode-nameslist.h") or die;

618

619

print $out "/* unicode-nameslist.h */\n";

620

print $out "/* THIS IS A GENERATED FILE. CHANGES WILL BE OVERWRITTEN. */\n";

621

print $out "/* Generated by $0 */\n";

622

print $out "/* Generated from UCD version $v */\n\n";

623

624

print $out "#ifndef UNICODE_NAMESLIST_H\n";

625

print $out "#define UNICODE_NAMESLIST_H\n\n";

626

627

print $out "#include <glib.h>\n\n";

628

629

print $out "typedef struct _UnicharStringIndex UnicharStringIndex;\n";

630

print $out "typedef struct _UnicharUnichar UnicharUnichar;\n";

631

print $out "typedef struct _NamesList NamesList;\n\n";

632

633

print $out "struct _UnicharStringIndex\n";

634

print $out "{\n";

635

print $out " gunichar index;\n";

636

print $out " guint32 string_index;\n";

637

print $out "}; \n\n";

638

639

print $out "struct _UnicharUnichar\n";

640

print $out "{\n";

641

print $out " gunichar index;\n";

642

print $out " gunichar value;\n";

643

print $out "}; \n\n";

644

645

print $out "struct _NamesList\n";

646

print $out "{\n";

647

print $out " gunichar index;\n";

648

print $out " gint16 equals_index; /* -1 means */\n";

649

print $out " gint16 stars_index; /* this character */\n";

650

print $out " gint16 exes_index; /* doesn't */\n";

651

print $out " gint16 pounds_index; /* have any */\n";

652

print $out " gint16 colons_index;\n";

653

print $out "};\n\n";

654

655

print_names_list($out, $nameslist_hash, '=', "names_list_equals");

656

print_names_list($out, $nameslist_hash, '*', "names_list_stars");

657

print_names_list($out, $nameslist_hash, '#', "names_list_pounds");

658

print_names_list($out, $nameslist_hash, ':', "names_list_colons");

659

660

print $out "static const UnicharUnichar names_list_exes[] = \n";

661

print $out "{\n";

662

for $wc (sort {$a <=> $b} keys %{$nameslist_hash})

663

{

664

next if not exists $nameslist_hash->{$wc}->{'x'};

665

for my $value (@{$nameslist_hash->{$wc}->{'x'}->{'values'}}) {

666

printf $out (qq/ { 0x%04X, 0x%04X },\n/, $wc, $value);

667

}

668

}

669

print $out " { (gunichar)(-1), 0 }\n";

670

print $out "};\n\n";

671

672

print $out "static const NamesList names_list[] =\n";

673

print $out "{\n";

674

for $wc (sort {$a <=> $b} keys %{$nameslist_hash})

675

{

676

my $eq = exists $nameslist_hash->{$wc}->{'='}->{'index'} ? $nameslist_hash->{$wc}->{'='}->{'index'} : -1;

677

my $star = exists $nameslist_hash->{$wc}->{'*'}->{'index'} ? $nameslist_hash->{$wc}->{'*'}->{'index'} : -1;

678

my $ex = exists $nameslist_hash->{$wc}->{'x'}->{'index'} ? $nameslist_hash->{$wc}->{'x'}->{'index'} : -1;

679

my $pound = exists $nameslist_hash->{$wc}->{'#'}->{'index'} ? $nameslist_hash->{$wc}->{'#'}->{'index'} : -1;

680

my $colon = exists $nameslist_hash->{$wc}->{':'}->{'index'} ? $nameslist_hash->{$wc}->{':'}->{'index'} : -1;

681

682

printf $out (" { 0x%04X, \%d, \%d, \%d, \%d, \%d },\n", $wc, $eq, $star, $ex, $pound, $colon);

683

}

684

print $out "};\n\n";

685

686

print $out "#endif /* #ifndef UNICODE_NAMESLIST_H */\n";

687

688

close ($out);

689

690

print " done.\n";

691

}

692

693

#------------------------#

694

695

sub process_blocks_txt ($)

696

{

697

my ($blocks_txt) = @_;

698

699

# Override script names

700

my %block_overrides =

701

(

702

"NKo" => "N\'Ko"

703

);

704

705

open (my $blocks, $blocks_txt) or die;

706

open (my $out, "> unicode-blocks.h") or die;

707

708

print "processing $blocks_txt...";

709

710

print $out "/* unicode-blocks.h */\n";

711

print $out "/* THIS IS A GENERATED FILE. CHANGES WILL BE OVERWRITTEN. */\n";

712

print $out "/* Generated by $0 */\n";

713

print $out "/* Generated from UCD version $v */\n\n";

714

715

print $out "#ifndef UNICODE_BLOCKS_H\n";

716

print $out "#define UNICODE_BLOCKS_H\n\n";

717

718

print $out "#include <glib.h>\n";

719

print $out "#include <glib/gi18n-lib.h>\n\n";

720

721

my @blocks;

722

my $offset = 0;

723

724

while (my $line = <$blocks>)

725

{

726

$line =~ /^([0-9A-F]+)\.\.([0-9A-F]+); (.+)$/ or next;

727

728

my ($start,$end,$block) = ($1, $2, $3);

729

730

if (exists $block_overrides{$block}) {

731

$block = $block_overrides{$block};

732

}

733

734

push @blocks, [$start, $end, $block, $offset];

735

$offset += length($block) + 1;

736

}

737

738

print $out "/* for extraction by intltool */\n";

739

print $out "#if 0\n";

740

foreach my $block (@blocks)

741

{

742

my ($start, $end, $name, $offset) = @{$block};

743

print $out qq/ N_("$name"),\n/;

744

}

745

print $out "#endif /* 0 */\n\n";

746

747

print $out "static const char unicode_blocks_strings[] =\n";

748

foreach my $block (@blocks)

749

{

750

my ($start, $end, $name, $offset) = @{$block};

751

print $out qq/ "$name\\0"\n/;

752

}

753

print $out " ;\n\n";

754

755

print $out "typedef struct _UnicodeBlock UnicodeBlock;\n";

756

print $out "\n";

757

print $out "static const struct _UnicodeBlock\n";

758

print $out "{\n";

759

print $out " gunichar start;\n";

760

print $out " gunichar end;\n";

761

print $out " guint16 block_name_index;\n";

762

print $out "}\n";

763

print $out "unicode_blocks[] =\n";

764

print $out "{\n";

765

foreach my $block (@blocks)

766

{

767

my ($start, $end, $name, $offset) = @{$block};

768

print $out qq/ { 0x$start, 0x$end, $offset },\n/;

769

}

770

print $out "};\n\n";

771

772

print $out "#endif /* #ifndef UNICODE_BLOCKS_H */\n";

773

774

close ($blocks);

775

close ($out);

776

777

print " done.\n";

778

}

779

780

#------------------------#

781

782

sub process_scripts_txt ($)

783

{

784

my ($scripts_txt) = @_;

785

786

# Override script names

787

my %script_overrides =

788

(

789

"Nko" => "N\'Ko"

790

);

791

792

my %script_hash;

793

my %scripts;

794

795

open (my $scripts_file, $scripts_txt) or die;

796

open (my $out, "> unicode-scripts.h") or die;

797

798

print "processing $scripts_txt...";

799

800

while (my $line = <$scripts_file>)

801

{

802

my ($start, $end, $raw_script);

803

804

if ($line =~ /^([0-9A-F]+)\.\.([0-9A-F]+)\s+;\s+(\S+)/)

805

{

806

$start = hex ($1);

807

$end = hex ($2);

808

$raw_script = $3;

809

}

810

elsif ($line =~ /^([0-9A-F]+)\s+;\s+(\S+)/)

811

{

812

$start = hex ($1);

813

$end = $start;

814

$raw_script = $2;

815

}

816

else

817

{

818

next;

819

}

820

821

my $script = $raw_script;

822

$script =~ tr/_/ /;

823

$script =~ s/(\w+)/\u\L$1/g;

824

825

if (exists $script_overrides{$script}) {

826

$script = $script_overrides{$script};

827

}

828

829

$script_hash{$start} = { 'end' => $end, 'script' => $script };

830

$scripts{$script} = 1;

831

}

832

833

close ($scripts_file);

834

835

# Adds Common to make sure works with UCD <= 4.0.0

836

$scripts{"Common"} = 1;

837

838

print $out "/* unicode-scripts.h */\n";

839

print $out "/* THIS IS A GENERATED FILE. CHANGES WILL BE OVERWRITTEN. */\n";

840

print $out "/* Generated by $0 */\n";

841

print $out "/* Generated from UCD version $v */\n\n";

842

843

print $out "#ifndef UNICODE_SCRIPTS_H\n";

844

print $out "#define UNICODE_SCRIPTS_H\n\n";

845

846

print $out "#include <glib.h>\n";

847

print $out "#include <glib/gi18n-lib.h>\n\n";

848

849

print $out "typedef struct _UnicodeScript UnicodeScript;\n\n";

850

851

print $out "/* for extraction by intltool */\n";

852

print $out "#if 0\n";

853

my $i = 0;

854

for my $script (sort keys %scripts)

855

{

856

$scripts{$script} = $i;

857

$i++;

858

859

print $out qq/ N_("$script"),\n/;

860

}

861

print $out "#endif /* 0 */\n\n";

862

863

print $out "static const gchar unicode_script_list_strings[] =\n";

864

my $offset = 0;

865

my %script_offsets;

866

for my $script (sort keys %scripts)

867

{

868

printf $out (qq/ "\%s\\0"\n/, $script);

869

$script_offsets{$script} = $offset;

870

$offset += length($script) + 1;

871

}

872

print $out " ;\n\n";

873

undef $offset;

874

875

print $out "static const guint16 unicode_script_list_offsets[] =\n";

876

print $out "{\n";

877

for my $script (sort keys %scripts)

878

{

879

printf $out (qq/ \%d,\n/, $script_offsets{$script});

880

}

881

print $out "};\n\n";

882

883

print $out "static const struct _UnicodeScript\n";

884

print $out "{\n";

885

print $out " gunichar start;\n";

886

print $out " gunichar end;\n";

887

print $out " guint8 script_index; /* index into unicode_script_list_offsets */\n";

888

print $out "}\n";

889

print $out "unicode_scripts[] =\n";

890

print $out "{\n";

891

for my $start (sort { $a <=> $b } keys %script_hash)

892

{

893

printf $out (qq/ { 0x%04X, 0x%04X, \%2d },\n/,

894

$start, $script_hash{$start}->{'end'}, $scripts{$script_hash{$start}->{'script'}});

895

}

896

print $out "};\n\n";

897

898

print $out "#endif /* #ifndef UNICODE_SCRIPTS_H */\n";

899

900

close ($out);

901

print " done.\n";

902

}

903

904

#------------------------#

905

906

sub process_versions_txt ($)

907

{

908

my ($versions_txt) = @_;

909

910

my %version_hash;

911

my %versions;

912

913

open (my $versions, $versions_txt) or die;

914

open (my $out, "> unicode-versions.h") or die;

915

916

print "processing $versions_txt...";

917

918

while (my $line = <$versions>)

919

{

920

my ($start, $end, $raw_version);

921

922

if ($line =~ /^([0-9A-F]+)\.\.([0-9A-F]+)\s+;\s+(\S+)/)

923

{

924

$start = hex ($1);

925

$end = hex ($2);

926

$raw_version = $3;

927

}

928

elsif ($line =~ /^([0-9A-F]+)\s+;\s+(\S+)/)

929

{

930

$start = hex ($1);

931

$end = $start;

932

$raw_version = $2;

933

}

934

else

935

{

936

next;

937

}

938

939

my $version = $raw_version;

940

$version =~ tr/_/ /;

941

$version =~ s/(\w+)/\u\L$1/g;

942

943

$versions{$version} = 1;

944

945

$version =~ s/\./_/g;

946

$version_hash{$start} = { 'end' => $end, 'version' => $version };

947

}

948

949

close ($versions);

950

951

print $out "/* unicode-versions.h */\n";

952

print $out "/* THIS IS A GENERATED FILE. CHANGES WILL BE OVERWRITTEN. */\n";

953

print $out "/* Generated by $0 */\n";

954

print $out "/* Generated from UCD version $v */\n\n";

955

956

print $out "#ifndef UNICODE_VERSIONS_H\n";

957

print $out "#define UNICODE_VERSIONS_H\n\n";

958

959

print $out "#include <glib.h>\n";

960

print $out "#include <glib/gi18n-lib.h>\n\n";

961

962

print $out "typedef struct {\n";

963

print $out " gunichar start;\n";

964

print $out " gunichar end;\n";

965

print $out " GucharmapUnicodeVersion version;\n";

966

print $out "} UnicodeVersion;\n\n";

967

968

print $out "static const UnicodeVersion unicode_versions[] =\n";

969

print $out "{\n";

970

for my $start (sort { $a <=> $b } keys %version_hash)

971

{

972

printf $out (qq/ { 0x%04X, 0x%04X, GUCHARMAP_UNICODE_VERSION_\%s },\n/,

973

$start, $version_hash{$start}->{'end'}, $version_hash{$start}->{'version'});

974

}

975

print $out "};\n\n";

976

977

print $out "static const gchar unicode_version_strings[] =\n";

978

my $offset = 0;

979

my %version_offsets;

980

for my $version (sort keys %versions)

981

{

982

printf $out (qq/ "\%s\\0"\n/, $version);

983

$version_offsets{$version} = $offset;

984

$offset += length($version) + 1;

985

}

986

print $out " ;\n\n";

987

undef $offset;

988

989

print $out "static const guint16 unicode_version_string_offsets[] =\n";

990

print $out "{\n";

991

for my $version (sort keys %versions)

992

{

993

printf $out (qq/ \%d,\n/, $version_offsets{$version});

994

}

995

print $out "};\n\n";

996

997

print $out "#endif /* #ifndef UNICODE_VERSIONS_H */\n";

998

999

close ($out);

1000

print " done.\n";

1001

}

Older »