1
//========================================================================
3
// P2PCharCodeToUnicode.cc
7
// Based Poppler CharCodeToUnicode.h
8
// Copyright 2001-2003 Glyph & Cog, LLC
10
//========================================================================
14
#ifdef USE_GCC_PRAGMAS
15
#pragma implementation
21
#include "goo/GooString.h"
23
#include "GlobalParams.h"
24
#include "PSTokenizer.h"
25
#include "P2PCharCodeToUnicode.h"
27
//------------------------------------------------------------------------
29
#define maxUnicodeString 8
31
struct P2PCharCodeToUnicodeString {
33
Unicode u[maxUnicodeString];
37
//------------------------------------------------------------------------
39
static int getCharFromFile(void *data) {
40
return fgetc((FILE *)data);
43
//------------------------------------------------------------------------
45
P2PCharCodeToUnicode *P2PCharCodeToUnicode::parseCMapFromFile(
46
GooString *fileName, int nBits) {
47
P2PCharCodeToUnicode *ctu;
50
ctu = new P2PCharCodeToUnicode();
51
if ((f = globalParams->findToUnicodeFile(fileName))) {
52
ctu->parseCMap1(&getCharFromFile, f, nBits);
55
p2pError(-1, const_cast<char *>("Couldn't find ToUnicode CMap file for '%s'"),
56
fileName->getCString());
61
void P2PCharCodeToUnicode::parseCMap1(int (*getCharFunc)(void *), void *data,
64
char tok1[256], tok2[256], tok3[256];
65
int nDigits, n1, n2, n3;
67
CharCode code1, code2;
72
pst = new PSTokenizer(getCharFunc, data);
73
pst->getToken(tok1, sizeof(tok1), &n1);
74
while (pst->getToken(tok2, sizeof(tok2), &n2)) {
75
if (!strcmp(tok2, "usecmap")) {
77
name = new GooString(tok1 + 1);
78
if ((f = globalParams->findToUnicodeFile(name))) {
79
parseCMap1(&getCharFromFile, f, nBits);
82
p2pError(-1, const_cast<char *>("Couldn't find ToUnicode CMap file for '%s'"),
87
pst->getToken(tok1, sizeof(tok1), &n1);
88
} else if (!strcmp(tok2, "beginbfchar")) {
89
while (pst->getToken(tok1, sizeof(tok1), &n1)) {
90
if (!strcmp(tok1, "endbfchar")) {
93
if (!pst->getToken(tok2, sizeof(tok2), &n2) ||
94
!strcmp(tok2, "endbfchar")) {
95
p2pError(-1, const_cast<char *>("Illegal entry in bfchar block in ToUnicode CMap"));
98
if (!(n1 == 2 + nDigits && tok1[0] == '<' && tok1[n1 - 1] == '>' &&
99
tok2[0] == '<' && tok2[n2 - 1] == '>')) {
101
// check there was no line jump inside the token and so the length is
102
// longer than it should be
104
for (int k = 0; k < n1; k++)
105
if (tok1[k] != '\n' && tok1[k] != '\r') countAux++;
107
if (!(countAux == 2 + nDigits && tok1[0] == '<' && tok1[n1 - 1] == '>' &&
108
tok2[0] == '<' && tok2[n2 - 1] == '>')) {
109
p2pError(-1, const_cast<char *>("Illegal entry in bfchar block in ToUnicode CMap"));
113
tok1[n1 - 1] = tok2[n2 - 1] = '\0';
114
if (sscanf(tok1 + 1, "%x", &code1) != 1) {
115
p2pError(-1, const_cast<char *>("Illegal entry in bfchar block in ToUnicode CMap"));
118
addMapping(code1, tok2 + 1, n2 - 2, 0);
120
pst->getToken(tok1, sizeof(tok1), &n1);
121
} else if (!strcmp(tok2, "beginbfrange")) {
122
while (pst->getToken(tok1, sizeof(tok1), &n1)) {
123
if (!strcmp(tok1, "endbfrange")) {
126
if (!pst->getToken(tok2, sizeof(tok2), &n2) ||
127
!strcmp(tok2, "endbfrange") ||
128
!pst->getToken(tok3, sizeof(tok3), &n3) ||
129
!strcmp(tok3, "endbfrange")) {
130
p2pError(-1, const_cast<char *>("Illegal entry in bfrange block in ToUnicode CMap"));
133
if (!(n1 == 2 + nDigits && tok1[0] == '<' && tok1[n1 - 1] == '>' &&
134
n2 == 2 + nDigits && tok2[0] == '<' && tok2[n2 - 1] == '>')) {
135
// check there was no line jump inside the token and so the length is
136
// longer than it should be
138
for (int k = 0; k < n1; k++)
139
if (tok1[k] != '\n' && tok1[k] != '\r') countAux++;
142
for (int k = 0; k < n1; k++)
143
if (tok2[k] != '\n' && tok2[k] != '\r') countAux++;
145
if (!(countAux == 2 + nDigits && tok1[0] == '<' && tok1[n1 - 1] == '>' &&
146
countAux2 == 2 + nDigits && tok2[0] == '<' && tok2[n2 - 1] == '>')) {
147
p2pError(-1, const_cast<char *>("Illegal entry in bfrange block in ToUnicode CMap"));
151
tok1[n1 - 1] = tok2[n2 - 1] = '\0';
152
if (sscanf(tok1 + 1, "%x", &code1) != 1 ||
153
sscanf(tok2 + 1, "%x", &code2) != 1) {
154
p2pError(-1, const_cast<char *>("Illegal entry in bfrange block in ToUnicode CMap"));
157
if (!strcmp(tok3, "[")) {
159
while (pst->getToken(tok1, sizeof(tok1), &n1) &&
160
code1 + i <= code2) {
161
if (!strcmp(tok1, "]")) {
164
if (tok1[0] == '<' && tok1[n1 - 1] == '>') {
166
addMapping(code1 + i, tok1 + 1, n1 - 2, 0);
168
p2pError(-1, const_cast<char *>("Illegal entry in bfrange block in ToUnicode CMap"));
172
} else if (tok3[0] == '<' && tok3[n3 - 1] == '>') {
174
for (i = 0; code1 <= code2; ++code1, ++i) {
175
addMapping(code1, tok3 + 1, n3 - 2, i);
179
p2pError(-1, const_cast<char *>("Illegal entry in bfrange block in ToUnicode CMap"));
182
pst->getToken(tok1, sizeof(tok1), &n1);
190
void P2PCharCodeToUnicode::addMapping(CharCode code, char *uStr, int n,
197
if (code >= mapLen) {
199
mapLen = (code + 256) & ~255;
200
map = (Unicode *)greallocn(map, mapLen, sizeof(Unicode));
201
for (i = oldLen; i < mapLen; ++i) {
206
if (sscanf(uStr, "%x", &u) != 1) {
207
p2pError(-1, const_cast<char *>("Illegal entry in ToUnicode CMap"));
210
map[code] = u + offset;
212
if (sMapLen >= sMapSize) {
213
sMapSize = sMapSize + 16;
214
sMap = (P2PCharCodeToUnicodeString *)
215
greallocn(sMap, sMapSize, sizeof(P2PCharCodeToUnicodeString));
218
sMap[sMapLen].c = code;
219
sMap[sMapLen].len = n / 4;
220
for (j = 0; j < sMap[sMapLen].len && j < maxUnicodeString; ++j) {
221
strncpy(uHex, uStr + j*4, 4);
223
if (sscanf(uHex, "%x", &sMap[sMapLen].u[j]) != 1) {
224
p2pError(-1, const_cast<char *>("Illegal entry in ToUnicode CMap"));
227
sMap[sMapLen].u[sMap[sMapLen].len - 1] += offset;
232
P2PCharCodeToUnicode::P2PCharCodeToUnicode() {
236
map = (Unicode *)gmallocn(mapLen, sizeof(Unicode));
237
for (i = 0; i < mapLen; ++i) {
241
sMapLen = sMapSize = 0;
244
P2PCharCodeToUnicode::~P2PCharCodeToUnicode() {
251
int P2PCharCodeToUnicode::mapToUnicode(CharCode c, Unicode *u, int size) {
261
for (i = 0; i < sMapLen; ++i) {
262
if (sMap[i].c == c) {
263
for (j = 0; j < sMap[i].len && j < size; ++j) {