1
/*****************************************************************
4
** Copyright 1998 Clark Cooper
5
** All rights reserved.
7
** This program is free software; you can redistribute it and/or
8
** modify it under the same terms as Perl itself.
14
#define ENCMAP_MAGIC 0xfeebface
16
typedef struct prefixmap {
18
unsigned char len; /* 0 => 256 */
19
unsigned short bmap_start;
20
unsigned char ispfx[32];
21
unsigned char ischar[32];
26
unsigned short prefixes_size;
27
unsigned short bytemap_size;
30
unsigned short *bytemap;
33
typedef struct encmaphdr
37
unsigned short pfsize;
38
unsigned short bmsize;
42
/*================================================================
43
** Structure of Encoding map binary encoding
45
** Note that all shorts and ints are in network order,
46
** so when packing or unpacking with perl, use 'n' and 'N' respectively.
47
** In C, use the htonl family of functions.
49
** The basic structure is:
51
** _______________________
52
** |Header (including map expat needs for 1st byte)
53
** |PrefixMap * pfsize
54
** | This section isn't included for single-byte encodings.
55
** | For multiple byte encodings, when a byte represents a prefix
56
** | then it indexes into this vector instead of mapping to a
57
** | Unicode character. The PrefixMap type is declared above. The
58
** | ispfx and ischar fields are bitvectors indicating whether
59
** | the byte being mapped is a prefix or character respectively.
60
** | If neither is set, then the character is not mapped to Unicode.
62
** | The min field is the 1st byte mapped for this prefix; the
63
** | len field is the number of bytes mapped; and bmap_start is
64
** | the starting index of the map for this prefix in the overall
65
** | map (next section).
66
** |unsigned short * bmsize
67
** | This section also is omitted for single-byte encodings.
68
** | Each short is either a Unicode scalar or an index into the
69
** | PrefixMap vector.
71
** The header for these files is declared above as the Encmap_Header type.
72
** The magic field is a magic number which should match the ENCMAP_MAGIC
73
** macro above. The next 40 bytes stores IANA registered name for the
74
** encoding. The pfsize field holds the number of PrefixMaps, which should
75
** be zero for single byte encodings. The bmsize field holds the number of
76
** shorts used for the overall map.
78
** The map field contains either the Unicode scalar encoded by the 1st byte
79
** or -n where n is the number of bytes that such a 1st byte implies (Expat
80
** requires that the number of bytes to encode a character is indicated by
81
** the 1st byte) or -1 if the byte doesn't map to any Unicode character.
83
** If the encoding is a multiple byte encoding, then there will be PrefixMap
84
** and character map sections. The 1st PrefixMap (index 0), covers a range
85
** of bytes that includes all 1st byte prefixes.
87
** Look at convert_to_unicode in Expat.xs to see how this data structure
91
#endif /* ndef ENCODING_H */