1
/* Copyright (C) 2001-2006 Artifex Software, Inc.
4
This software is provided AS-IS with no warranty, either express or
7
This software is distributed under license and may not be copied, modified
8
or distributed except as expressly authorized under the terms of that
9
license. Refer to licensing information at http://www.artifex.com/
10
or contact Artifex Software, Inc., 7 Mt. Lassen Drive - Suite A-134,
11
San Rafael, CA 94903, U.S.A., +1(415)492-9861, for further information.
14
/* $Id: gxfcmap.h 8022 2007-06-05 22:23:38Z giles $ */
15
/* Internal CMap structure definitions */
17
/* This file should be called gxcmap.h, except that name is already used. */
19
#ifndef gxfcmap_INCLUDED
20
# define gxfcmap_INCLUDED
27
* CMaps are the structures that map (possibly variable-length) characters
28
* appearing in a text string to glyph numbers in some font-specific space.
29
* The structure defined here generally follows Adobe's specifications, but
30
* the actual implementation of the code space and the lookup tables is
31
* virtual, so that the same interface can be used for direct access to the
32
* corresponding "cmap" structure in TrueType fonts, rather than having to
33
* convert that structure to the Adobe-based one.
37
* A CMap conceptually consists of three parts:
39
* - The code space, used for parsing the input string into (possibly
40
* variable-length) characters.
42
* - A 'def' map, which maps defined parsed characters to values.
44
* - A 'notdef' map, which maps parsed but undefined characters to
47
* The value of a character may be a string, a name, or a CID. For more
48
* information, see the Adobe documentation.
51
/* ---------------- Code space ranges ---------------- */
54
* A code space is a non-empty, lexicographically sorted sequence of
55
* code space ranges. Ranges must not overlap. In each range,
56
* first[i] <= last[i] for 0 <= i < size.
58
#define MAX_CMAP_CODE_SIZE 4
59
typedef struct gx_code_space_range_s {
60
byte first[MAX_CMAP_CODE_SIZE];
61
byte last[MAX_CMAP_CODE_SIZE];
62
int size; /* 1 .. MAX_CMAP_CODE_SIZE */
63
} gx_code_space_range_t;
65
/* ---------------- Lookup tables ---------------- */
68
* A lookup table is a non-empty sequence of lookup ranges. Each range has
69
* an associated sorted lookup table, indexed by the num_key_bytes low-order
70
* code bytes. If key_is_range is true, each key is a range (2 x key_size
71
* bytes); if false, each key is a single code (key_size bytes).
73
* The only difference between CODE_VALUE_CID and CODE_VALUE_NOTDEF is
74
* that after looking up a CID in a table, for CODE_VALUE_CID the result
75
* is incremented by the difference between the input code and the key
76
* (i.e., a single CODE_VALUE_CID entry actually represents a range of
77
* CIDs), whereas for CODE_VALUE_NOTDEF, the result is not incremented.
78
* The defined-character map for a CMap uses the former behavior; the
79
* notdef map uses the latter.
81
* CODE_VALUE_GLYPH and CODE_VALUE_CHARS are reserved for
82
* rearranged font CMaps, which are not implemented yet.
85
CODE_VALUE_CID, /* CIDs */
86
CODE_VALUE_GLYPH, /* glyphs */
87
CODE_VALUE_CHARS, /* character(s) */
88
CODE_VALUE_NOTDEF /* CID - for notdef(char|range) dst */
89
#define CODE_VALUE_MAX CODE_VALUE_NOTDEF
90
} gx_cmap_code_value_type_t;
91
typedef struct gx_cmap_lookup_entry_s {
93
byte key[2][MAX_CMAP_CODE_SIZE]; /* [key_is_range + 1][key_size] */
94
int key_size; /* 0 .. MAX_CMAP_CODE_SIZE */
97
gx_cmap_code_value_type_t value_type;
98
gs_const_string value;
99
int font_index; /* for rearranged fonts */
100
} gx_cmap_lookup_entry_t;
102
/* ---------------- CMaps proper ---------------- */
105
* Define the elements common to all CMaps. Currently we include all
106
* elements from the Adobe specification except for the actual code space
107
* ranges and lookup tables.
109
* CMapType and id are common to all CMapTypes. We really only support the
110
* single Adobe standard CMap format. Note that the only documented values
111
* of CMapType in the PLRM are 0 and 1, which are equivalent; however, in
112
* the second PDF Reference, the CMapType for the example ToUnicode CMap is
115
* glyph_name and glyph_name_data are only used if the CMap has lookup
116
* entries of type CODE_VALUE_GLYPH. We deliberately chose to make
117
* glyph_name a function pointer rather than including it in the procs
118
* virtual functions. The rationale is that the virtual functions are
119
* dependent on the representation of the CMap, so they should be set by the
120
* code that must work with this structure. However, glyph_name is not
121
* dependent on the representation of the CMap: it does not need to know
122
* anything about how the CMap is stored. Rather, it is meant to be used by
123
* the client who constructs the CMap, who decides how stored
124
* CODE_VALUE_GLYPH values correspond to printable glyph names. The same
125
* glyph_name procedure can, in principle, be used with multiple different
126
* subclasses of gs_cmap_t.
128
#ifndef gs_cmap_DEFINED
129
# define gs_cmap_DEFINED
130
typedef struct gs_cmap_s gs_cmap_t;
133
#define GS_CMAP_COMMON\
134
int CMapType; /* must be first */\
135
gs_id id; /* internal ID (no relation to UID) */\
136
/* End of entries common to all CMapTypes */\
137
gs_const_string CMapName;\
138
gs_cid_system_info_t *CIDSystemInfo; /* [num_fonts] */\
141
gs_uid uid; /* XUID or nothing */\
144
bool from_Unicode; /* if true, characters are Unicode */\
145
bool ToUnicode; /* if true, it is a ToUnicode CMap */\
146
gs_glyph_name_proc_t glyph_name; /* glyph name procedure for printing */\
147
void *glyph_name_data; /* closure data */\
148
const gs_cmap_procs_t *procs
151
#define public_st_cmap() /* in gsfcmap.c */\
152
BASIC_PTRS(cmap_ptrs) {\
153
GC_CONST_STRING_ELT(gs_cmap_t, CMapName),\
154
GC_OBJ_ELT3(gs_cmap_t, CIDSystemInfo, uid.xvalues, glyph_name_data)\
156
gs_public_st_basic(st_cmap, gs_cmap_t, "gs_cmap_t", cmap_ptrs, cmap_data)
158
typedef struct gs_cmap_ranges_enum_s gs_cmap_ranges_enum_t;
159
typedef struct gs_cmap_lookups_enum_s gs_cmap_lookups_enum_t;
161
typedef struct gs_cmap_procs_s {
164
* Decode and map a character from a string using a CMap.
165
* See gsfcmap.h for details.
168
int (*decode_next)(const gs_cmap_t *pcmap, const gs_const_string *str,
169
uint *pindex, uint *pfidx,
170
gs_char *pchr, gs_glyph *pglyph);
173
* Initialize an enumeration of code space ranges. See below.
176
void (*enum_ranges)(const gs_cmap_t *pcmap,
177
gs_cmap_ranges_enum_t *penum);
180
* Initialize an enumeration of lookups. See below.
183
void (*enum_lookups)(const gs_cmap_t *pcmap, int which,
184
gs_cmap_lookups_enum_t *penum);
187
* Check if the cmap is identity.
190
bool (*is_identity)(const gs_cmap_t *pcmap, int font_index_only);
198
/* ---------------- Enumerators ---------------- */
201
* Define enumeration structures for code space ranges and lookup tables.
202
* Since all current and currently envisioned implementations are very
203
* simple, we don't bother to make this fully general, with subclasses
204
* or a "finish" procedure.
206
typedef struct gs_cmap_ranges_enum_procs_s {
207
int (*next_range)(gs_cmap_ranges_enum_t *penum);
208
} gs_cmap_ranges_enum_procs_t;
209
struct gs_cmap_ranges_enum_s {
211
* Return the next code space range here.
213
gx_code_space_range_t range;
215
* The rest of the information is private to the implementation.
217
const gs_cmap_t *cmap;
218
const gs_cmap_ranges_enum_procs_t *procs;
222
typedef struct gs_cmap_lookups_enum_procs_s {
223
int (*next_lookup)(gs_cmap_lookups_enum_t *penum);
224
int (*next_entry)(gs_cmap_lookups_enum_t *penum);
225
} gs_cmap_lookups_enum_procs_t;
226
struct gs_cmap_lookups_enum_s {
228
* Return the next lookup and entry here.
230
gx_cmap_lookup_entry_t entry;
232
* The rest of the information is private to the implementation.
234
const gs_cmap_t *cmap;
235
const gs_cmap_lookups_enum_procs_t *procs;
237
byte temp_value[max(sizeof(gs_glyph), sizeof(gs_char))];
240
* Define a vacuous next_lookup procedure, useful for the notdef lookups
241
* for CMaps that don't have any.
243
extern const gs_cmap_lookups_enum_procs_t gs_cmap_no_lookups_procs;
245
/* ---------------- Client procedures ---------------- */
248
* Initialize the enumeration of the code space ranges, and enumerate
249
* the next range. enum_next returns 0 if OK, 1 if finished, <0 if error.
250
* The intended usage is:
252
* for (gs_cmap_ranges_enum_init(pcmap, &renum);
253
* (code = gs_cmap_enum_next_range(&renum)) == 0; ) {
256
* if (code < 0) <<error>>
258
void gs_cmap_ranges_enum_init(const gs_cmap_t *pcmap,
259
gs_cmap_ranges_enum_t *penum);
260
int gs_cmap_enum_next_range(gs_cmap_ranges_enum_t *penum);
263
* Initialize the enumeration of the lookups, and enumerate the next
264
* the next lookup or entry. which = 0 for defined characters,
265
* which = 1 for notdef. next_xxx returns 0 if OK, 1 if finished,
266
* <0 if error. The intended usage is:
268
* for (gs_cmap_lookups_enum_init(pcmap, which, &lenum);
269
* (code = gs_cmap_enum_next_lookup(&lenum)) == 0; ) {
270
* while ((code = gs_cmap_enum_next_entry(&lenum)) == 0) {
273
* if (code < 0) <<error>>
275
* if (code < 0) <<error>>
277
* Note that next_lookup sets (at least) penum->entry.
278
* key_size, key_is_range, value_type, font_index
279
* whereas next_entry sets penum->entry.
280
* key[0][*], key[1][*], value
281
* Clients must not modify any members of the enumerator.
282
* The bytes of the value string may be allocated locally (in the enumerator
283
* itself) and not survive from one call to the next.
285
void gs_cmap_lookups_enum_init(const gs_cmap_t *pcmap, int which,
286
gs_cmap_lookups_enum_t *penum);
287
int gs_cmap_enum_next_lookup(gs_cmap_lookups_enum_t *penum);
288
int gs_cmap_enum_next_entry(gs_cmap_lookups_enum_t *penum);
290
/* ---------------- Implementation procedures ---------------- */
293
* Initialize a just-allocated CMap, to ensure that all pointers are clean
294
* for the GC. Note that this only initializes the common part.
296
void gs_cmap_init(const gs_memory_t *mem, gs_cmap_t *pcmap, int num_fonts);
299
* Allocate and initialize (the common part of) a CMap.
301
int gs_cmap_alloc(gs_cmap_t **ppcmap, const gs_memory_struct_type_t *pstype,
302
int wmode, const byte *map_name, uint name_size,
303
const gs_cid_system_info_t *pcidsi, int num_fonts,
304
const gs_cmap_procs_t *procs, gs_memory_t *mem);
307
* Initialize an enumerator with convenient defaults (index = 0).
309
void gs_cmap_ranges_enum_setup(gs_cmap_ranges_enum_t *penum,
310
const gs_cmap_t *pcmap,
311
const gs_cmap_ranges_enum_procs_t *procs);
312
void gs_cmap_lookups_enum_setup(gs_cmap_lookups_enum_t *penum,
313
const gs_cmap_t *pcmap,
314
const gs_cmap_lookups_enum_procs_t *procs);
317
* Check for identity CMap. Uses a fast check for special cases.
319
bool gs_cmap_is_identity(const gs_cmap_t *pcmap, int font_index_only);
322
* For a random CMap, compute whether it is identity.
323
* It is not applicable to gs_cmap_ToUnicode_t due to
324
* different sizes of domain keys and range values.
326
bool gs_cmap_compute_identity(const gs_cmap_t *pcmap, int font_index_only);
328
#endif /* gxfcmap_INCLUDED */