1
/* Copyright (C) 2001-2006 Artifex Software, Inc.
4
This software is provided AS-IS with no warranty, either express or
7
This software is distributed under license and may not be copied, modified
8
or distributed except as expressly authorized under the terms of that
9
license. Refer to licensing information at http://www.artifex.com/
10
or contact Artifex Software, Inc., 7 Mt. Lassen Drive - Suite A-134,
11
San Rafael, CA 94903, U.S.A., +1(415)492-9861, for further information.
14
/* $Id: gsfcmap.c 8250 2007-09-25 13:31:24Z giles $ */
15
/* CMap character decoding */
21
#include "gsutil.h" /* for gs_next_ids */
24
typedef struct gs_cmap_identity_s {
28
int code; /* 0 or num_bytes */
33
gs_private_st_suffix_add0_local(st_cmap_identity, gs_cmap_identity_t,
34
"gs_cmap_identity_t", cmap_ptrs, cmap_data,
37
/* ---------------- Client procedures ---------------- */
39
/* ------ Initialization/creation ------ */
42
* Create an Identity CMap.
45
get_integer_bytes(const byte *src, int count)
50
for (i = 0; i < count; ++i)
51
v = (v << 8) + src[i];
55
identity_decode_next(const gs_cmap_t *pcmap, const gs_const_string *str,
56
uint *pindex, uint *pfidx,
57
gs_char *pchr, gs_glyph *pglyph)
59
const gs_cmap_identity_t *const pcimap =
60
(const gs_cmap_identity_t *)pcmap;
61
int num_bytes = pcimap->num_bytes;
64
if (str->size < *pindex + num_bytes) {
65
*pglyph = gs_no_glyph;
66
return (*pindex == str->size ? 2 : -1);
68
value = get_integer_bytes(str->data + *pindex, num_bytes);
69
*pglyph = gs_min_cid_glyph + value;
76
identity_next_range(gs_cmap_ranges_enum_t *penum)
78
if (penum->index == 0) {
79
const gs_cmap_identity_t *const pcimap =
80
(const gs_cmap_identity_t *)penum->cmap;
82
memset(penum->range.first, 0, pcimap->num_bytes);
83
memset(penum->range.last, 0xff, pcimap->num_bytes);
84
penum->range.size = pcimap->num_bytes;
90
static const gs_cmap_ranges_enum_procs_t identity_range_procs = {
94
identity_enum_ranges(const gs_cmap_t *pcmap, gs_cmap_ranges_enum_t *pre)
96
gs_cmap_ranges_enum_setup(pre, pcmap, &identity_range_procs);
99
identity_next_lookup(gs_cmap_lookups_enum_t *penum)
101
if (penum->index[0] == 0) {
102
const gs_cmap_identity_t *const pcimap =
103
(const gs_cmap_identity_t *)penum->cmap;
104
int num_bytes = pcimap->num_bytes;
106
memset(penum->entry.key[0], 0, num_bytes);
107
memset(penum->entry.key[1], 0xff, num_bytes);
108
memset(penum->entry.key[1], 0, num_bytes - pcimap->varying_bytes);
109
penum->entry.key_size = num_bytes;
110
penum->entry.key_is_range = true;
111
penum->entry.value_type =
112
(pcimap->code ? CODE_VALUE_CHARS : CODE_VALUE_CID);
113
penum->entry.value.size = num_bytes;
114
penum->entry.font_index = 0;
121
no_next_lookup(gs_cmap_lookups_enum_t *penum)
126
identity_next_entry(gs_cmap_lookups_enum_t *penum)
128
const gs_cmap_identity_t *const pcimap =
129
(const gs_cmap_identity_t *)penum->cmap;
130
int num_bytes = pcimap->num_bytes;
131
int i = num_bytes - pcimap->varying_bytes;
133
memcpy(penum->temp_value, penum->entry.key[0], num_bytes);
134
memcpy(penum->entry.key[0], penum->entry.key[1], i);
136
if (++(penum->entry.key[1][i]) != 0) {
137
penum->entry.value.data = penum->temp_value;
143
static const gs_cmap_lookups_enum_procs_t identity_lookup_procs = {
144
identity_next_lookup, identity_next_entry
146
const gs_cmap_lookups_enum_procs_t gs_cmap_no_lookups_procs = {
150
identity_enum_lookups(const gs_cmap_t *pcmap, int which,
151
gs_cmap_lookups_enum_t *pre)
153
gs_cmap_lookups_enum_setup(pre, pcmap,
154
(which ? &gs_cmap_no_lookups_procs :
155
&identity_lookup_procs));
158
identity_is_identity(const gs_cmap_t *pcmap, int font_index_only)
163
static const gs_cmap_procs_t identity_procs = {
164
identity_decode_next, identity_enum_ranges, identity_enum_lookups, identity_is_identity
168
gs_cmap_identity_alloc(gs_cmap_t **ppcmap, int num_bytes, int varying_bytes,
169
int return_code, const char *cmap_name, int wmode,
173
* We could allow any value of num_bytes between 1 and
174
* min(MAX_CMAP_CODE_SIZE, 4), but if num_bytes != 2, we can't name
175
* the result "Identity-[HV]".
177
static const gs_cid_system_info_t identity_cidsi = {
178
{ (const byte *)"Adobe", 5 },
179
{ (const byte *)"Identity", 8 },
183
gs_cmap_identity_t *pcimap;
186
return_error(gs_error_rangecheck);
187
code = gs_cmap_alloc(ppcmap, &st_cmap_identity, wmode,
188
(const byte *)cmap_name, strlen(cmap_name),
189
&identity_cidsi, 1, &identity_procs, mem);
192
pcimap = (gs_cmap_identity_t *)*ppcmap;
193
pcimap->num_bytes = num_bytes;
194
pcimap->varying_bytes = varying_bytes;
195
pcimap->code = return_code;
199
gs_cmap_create_identity(gs_cmap_t **ppcmap, int num_bytes, int wmode,
202
return gs_cmap_identity_alloc(ppcmap, num_bytes, num_bytes, 0,
203
(wmode ? "Identity-V" : "Identity-H"),
207
gs_cmap_create_char_identity(gs_cmap_t **ppcmap, int num_bytes, int wmode,
210
return gs_cmap_identity_alloc(ppcmap, num_bytes, 1, num_bytes,
211
(wmode ? "Identity-BF-V" : "Identity-BF-H"),
215
/* ------ Check identity ------ */
218
* Check for identity CMap. Uses a fast check for special cases.
221
gs_cmap_is_identity(const gs_cmap_t *pcmap, int font_index_only)
223
return pcmap->procs->is_identity(pcmap, font_index_only);
226
/* ------ Decoding ------ */
229
* Decode and map a character from a string using a CMap.
230
* See gsfcmap.h for details.
233
gs_cmap_decode_next(const gs_cmap_t *pcmap, const gs_const_string *str,
234
uint *pindex, uint *pfidx,
235
gs_char *pchr, gs_glyph *pglyph)
237
return pcmap->procs->decode_next(pcmap, str, pindex, pfidx, pchr, pglyph);
240
/* ------ Enumeration ------ */
243
* Initialize the enumeration of the code space ranges, and enumerate
244
* the next range. See gxfcmap.h for details.
247
gs_cmap_ranges_enum_init(const gs_cmap_t *pcmap, gs_cmap_ranges_enum_t *penum)
249
pcmap->procs->enum_ranges(pcmap, penum);
252
gs_cmap_enum_next_range(gs_cmap_ranges_enum_t *penum)
254
return penum->procs->next_range(penum);
258
* Initialize the enumeration of the lookups, and enumerate the next
259
* the next lookup or entry. See gxfcmap.h for details.
262
gs_cmap_lookups_enum_init(const gs_cmap_t *pcmap, int which,
263
gs_cmap_lookups_enum_t *penum)
265
pcmap->procs->enum_lookups(pcmap, which, penum);
268
gs_cmap_enum_next_lookup(gs_cmap_lookups_enum_t *penum)
270
return penum->procs->next_lookup(penum);
273
gs_cmap_enum_next_entry(gs_cmap_lookups_enum_t *penum)
275
return penum->procs->next_entry(penum);
278
/* ---------------- Implementation procedures ---------------- */
280
/* ------ Initialization/creation ------ */
283
* Initialize a just-allocated CMap, to ensure that all pointers are clean
284
* for the GC. Note that this only initializes the common part.
287
gs_cmap_init(const gs_memory_t *mem, gs_cmap_t *pcmap, int num_fonts)
289
memset(pcmap, 0, sizeof(*pcmap));
290
/* We reserve a range of IDs for pdfwrite needs,
291
to allow an identification of submaps for a particular subfont.
293
pcmap->id = gs_next_ids(mem, num_fonts);
294
pcmap->num_fonts = num_fonts;
295
uid_set_invalid(&pcmap->uid);
299
* Allocate and initialize (the common part of) a CMap.
302
gs_cmap_alloc(gs_cmap_t **ppcmap, const gs_memory_struct_type_t *pstype,
303
int wmode, const byte *map_name, uint name_size,
304
const gs_cid_system_info_t *pcidsi_in, int num_fonts,
305
const gs_cmap_procs_t *procs, gs_memory_t *mem)
308
gs_alloc_struct(mem, gs_cmap_t, pstype, "gs_cmap_alloc(CMap)");
309
gs_cid_system_info_t *pcidsi =
310
gs_alloc_struct_array(mem, num_fonts, gs_cid_system_info_t,
311
&st_cid_system_info_element,
312
"gs_cmap_alloc(CIDSystemInfo)");
314
if (pcmap == 0 || pcidsi == 0) {
315
gs_free_object(mem, pcidsi, "gs_cmap_alloc(CIDSystemInfo)");
316
gs_free_object(mem, pcmap, "gs_cmap_alloc(CMap)");
317
return_error(gs_error_VMerror);
319
gs_cmap_init(mem, pcmap, num_fonts); /* id, uid, num_fonts */
321
pcmap->CMapName.data = map_name;
322
pcmap->CMapName.size = name_size;
324
memcpy(pcidsi, pcidsi_in, sizeof(*pcidsi) * num_fonts);
326
memset(pcidsi, 0, sizeof(*pcidsi) * num_fonts);
327
pcmap->CIDSystemInfo = pcidsi;
328
pcmap->CMapVersion = 1.0;
329
/* uid = 0, UIDOffset = 0 */
330
pcmap->WMode = wmode;
331
/* from_Unicode = 0 */
332
/* not glyph_name, glyph_name_data */
333
pcmap->procs = procs;
339
* Initialize an enumerator with convenient defaults (index = 0).
342
gs_cmap_ranges_enum_setup(gs_cmap_ranges_enum_t *penum,
343
const gs_cmap_t *pcmap,
344
const gs_cmap_ranges_enum_procs_t *procs)
347
penum->procs = procs;
351
gs_cmap_lookups_enum_setup(gs_cmap_lookups_enum_t *penum,
352
const gs_cmap_t *pcmap,
353
const gs_cmap_lookups_enum_procs_t *procs)
356
penum->procs = procs;
357
penum->index[0] = penum->index[1] = 0;
361
* For a random CMap, compute whether it is identity.
362
* It is not applicable to gs_cmap_ToUnicode_t due to
363
* different sizes of domain keys and range values.
364
* Note we reject CMaps with Registry=Artifex
365
* to force embedding special instandard CMaps,
366
* which are not commonly in use yet.
369
gs_cmap_compute_identity(const gs_cmap_t *pcmap, int font_index_only)
372
gs_cmap_lookups_enum_t lenum;
375
if (!bytes_compare(pcmap->CIDSystemInfo->Registry.data, pcmap->CIDSystemInfo->Registry.size,
376
(const byte *)"Artifex", 7))
378
for (gs_cmap_lookups_enum_init(pcmap, which, &lenum);
379
(code = gs_cmap_enum_next_lookup(&lenum)) == 0; ) {
380
if (font_index_only >= 0 && lenum.entry.font_index != font_index_only)
382
if (font_index_only < 0 && lenum.entry.font_index > 0)
384
while (gs_cmap_enum_next_entry(&lenum) == 0) {
385
switch (lenum.entry.value_type) {
388
case CODE_VALUE_CHARS:
389
return false; /* Not implemented yet. */
390
case CODE_VALUE_GLYPH:
393
return false; /* Must not happen. */
395
if (lenum.entry.key_size != lenum.entry.value.size)
397
if (memcmp(lenum.entry.key[0], lenum.entry.value.data,
398
lenum.entry.key_size))
405
/* ================= ToUnicode CMap ========================= */
408
* This kind of CMaps keeps character a mapping from a random
409
* PS encoding to Unicode, being defined in PDF reference, "ToUnicode CMaps".
410
* It represents ranges in a closure data, without using
411
* gx_cmap_lookup_range_t. A special function gs_cmap_ToUnicode_set
412
* allows to write code pairs into the closure data.
415
static const int gs_cmap_ToUnicode_code_bytes = 2;
417
typedef struct gs_cmap_ToUnicode_s {
422
} gs_cmap_ToUnicode_t;
424
gs_private_st_suffix_add0(st_cmap_ToUnicode, gs_cmap_ToUnicode_t,
425
"gs_cmap_ToUnicode_t", cmap_ToUnicode_enum_ptrs, cmap_ToUnicode_reloc_ptrs,
429
gs_cmap_ToUnicode_next_range(gs_cmap_ranges_enum_t *penum)
430
{ const gs_cmap_ToUnicode_t *cmap = (gs_cmap_ToUnicode_t *)penum->cmap;
431
if (penum->index == 0) {
432
memset(penum->range.first, 0, cmap->key_size);
433
memset(penum->range.last, 0xff, cmap->key_size);
434
penum->range.size = cmap->key_size;
441
static const gs_cmap_ranges_enum_procs_t gs_cmap_ToUnicode_range_procs = {
442
gs_cmap_ToUnicode_next_range
446
gs_cmap_ToUnicode_decode_next(const gs_cmap_t *pcmap, const gs_const_string *str,
447
uint *pindex, uint *pfidx,
448
gs_char *pchr, gs_glyph *pglyph)
450
return_error(gs_error_unregistered);
454
gs_cmap_ToUnicode_enum_ranges(const gs_cmap_t *pcmap, gs_cmap_ranges_enum_t *pre)
456
gs_cmap_ranges_enum_setup(pre, pcmap, &gs_cmap_ToUnicode_range_procs);
460
gs_cmap_ToUnicode_next_lookup(gs_cmap_lookups_enum_t *penum)
461
{ const gs_cmap_ToUnicode_t *cmap = (gs_cmap_ToUnicode_t *)penum->cmap;
463
if (penum->index[0]++ > 0)
465
penum->entry.value.data = penum->temp_value;
466
penum->entry.value.size = gs_cmap_ToUnicode_code_bytes;
468
penum->entry.key_is_range = true;
469
penum->entry.value_type = CODE_VALUE_CHARS;
470
penum->entry.key_size = cmap->key_size;
471
penum->entry.value.size = gs_cmap_ToUnicode_code_bytes;
472
penum->entry.font_index = 0;
477
gs_cmap_ToUnicode_next_entry(gs_cmap_lookups_enum_t *penum)
478
{ const gs_cmap_ToUnicode_t *cmap = (gs_cmap_ToUnicode_t *)penum->cmap;
479
const uchar *map = cmap->glyph_name_data;
480
const int num_codes = cmap->num_codes;
481
uint index = penum->index[1], i, j;
484
/* Warning : this hardcodes gs_cmap_ToUnicode_num_code_bytes = 2 */
485
for (i = index; i < num_codes; i++)
486
if (map[i + i + 0] != 0 || map[i + i + 1] != 0)
492
for (j = i + 1, c2 = c1 + 1; j < num_codes; j++, c2++) {
493
/* Due to PDF spec, *bfrange boundaries may differ
494
in the last byte only. */
499
if (map[j + j + 0] != c0 || map[j + j + 1] != c2)
503
penum->entry.key[0][0] = (uchar)(i >> 8);
504
penum->entry.key[0][cmap->key_size - 1] = (uchar)(i & 0xFF);
505
penum->entry.key[1][0] = (uchar)(j >> 8);
506
penum->entry.key[1][cmap->key_size - 1] = (uchar)((j - 1) & 0xFF);
507
memcpy(penum->temp_value, map + i * gs_cmap_ToUnicode_code_bytes,
508
gs_cmap_ToUnicode_code_bytes);
512
static const gs_cmap_lookups_enum_procs_t gs_cmap_ToUnicode_lookup_procs = {
513
gs_cmap_ToUnicode_next_lookup, gs_cmap_ToUnicode_next_entry
517
gs_cmap_ToUnicode_enum_lookups(const gs_cmap_t *pcmap, int which,
518
gs_cmap_lookups_enum_t *pre)
520
gs_cmap_lookups_enum_setup(pre, pcmap,
521
(which ? &gs_cmap_no_lookups_procs : /* fixme */
522
&gs_cmap_ToUnicode_lookup_procs));
526
gs_cmap_ToUnicode_is_identity(const gs_cmap_t *pcmap, int font_index_only)
527
{ const gs_cmap_ToUnicode_t *cmap = (gs_cmap_ToUnicode_t *)pcmap;
528
return cmap->is_identity;
531
static const gs_cmap_procs_t gs_cmap_ToUnicode_procs = {
532
gs_cmap_ToUnicode_decode_next,
533
gs_cmap_ToUnicode_enum_ranges,
534
gs_cmap_ToUnicode_enum_lookups,
535
gs_cmap_ToUnicode_is_identity
539
* Allocate and initialize a ToUnicode CMap.
542
gs_cmap_ToUnicode_alloc(gs_memory_t *mem, int id, int num_codes, int key_size, gs_cmap_t **ppcmap)
544
uchar *map, *cmap_name = NULL;
545
gs_cmap_ToUnicode_t *cmap;
548
/* We don't write a CMap name to ToUnicode CMaps,
549
* becsue (1) there is no conventional method for
550
* generating them, and (2) Acrobat Reader ignores them.
551
* But we'd like to keep this code until beta-testing completes,
552
* and we ensure that other viewers do not need the names.
554
char sid[10], *pref = "aux-";
555
int sid_len, pref_len = strlen(pref);
557
sprintf(sid, "%d", id);
558
sid_len = strlen(sid);
559
name_len = pref_len + sid_len;
560
cmap_name = gs_alloc_string(mem, name_len, "gs_cmap_ToUnicode_alloc");
562
return_error(gs_error_VMerror);
563
memcpy(cmap_name, pref, pref_len);
564
memcpy(cmap_name + pref_len, sid, sid_len);
566
code = gs_cmap_alloc(ppcmap, &st_cmap_ToUnicode,
567
0, cmap_name, name_len, NULL, 0, &gs_cmap_ToUnicode_procs, mem);
570
map = (uchar *)gs_alloc_bytes(mem, num_codes * gs_cmap_ToUnicode_code_bytes,
571
"gs_cmap_ToUnicode_alloc");
573
return_error(gs_error_VMerror);
574
memset(map, 0, num_codes * gs_cmap_ToUnicode_code_bytes);
575
cmap = (gs_cmap_ToUnicode_t *)*ppcmap;
576
cmap->glyph_name_data = map;
579
cmap->key_size = key_size;
580
cmap->num_codes = num_codes;
581
cmap->ToUnicode = true;
582
cmap->is_identity = true;
587
* Write a code pair to ToUnicode CMap.
590
gs_cmap_ToUnicode_add_pair(gs_cmap_t *pcmap, int code0, int code1)
591
{ gs_cmap_ToUnicode_t *cmap = (gs_cmap_ToUnicode_t *)pcmap;
592
uchar *map = pcmap->glyph_name_data;
593
const int num_codes = ((gs_cmap_ToUnicode_t *)pcmap)->num_codes;
595
if (code0 >= num_codes)
596
return; /* must not happen. */
597
map[code0 * gs_cmap_ToUnicode_code_bytes + 0] = (uchar)(code1 >> 8);
598
map[code0 * gs_cmap_ToUnicode_code_bytes + 1] = (uchar)(code1 & 0xFF);
599
cmap->is_identity &= (code0 == code1);