1
#Copyright ReportLab Europe Ltd. 2000-2004
2
#see license.txt for license details
3
#history http://www.reportlab.co.uk/cgi-bin/viewcvs.cgi/public/reportlab/trunk/reportlab/pdfbase/cidfonts.py
5
__version__=''' $Id$ '''
6
__doc__="""CID (Asian multi-byte) font support.
8
This defines classes to represent CID fonts. They know how to calculate
9
their own width and how to write themselves into PDF files."""
12
from types import ListType, TupleType, DictType
13
from string import find, split, strip
19
from reportlab.pdfbase import pdfmetrics
20
from reportlab.pdfbase._cidfontdata import allowedTypeFaces, allowedEncodings, CIDFontInfo
21
from reportlab.pdfgen.canvas import Canvas
22
from reportlab.pdfbase import pdfdoc
23
from reportlab.rl_config import CMapSearchPath
26
def findCMapFile(name):
27
"Returns full filename, or raises error"
28
for dirname in CMapSearchPath:
29
cmapfile = dirname + os.sep + name
30
if os.path.isfile(cmapfile):
32
raise IOError, 'CMAP file for encodings "%s" not found!' % name
34
def structToPDF(structure):
35
"Converts deeply nested structure to PDFdoc dictionary/array objects"
36
if type(structure) is DictType:
38
for k, v in structure.items():
39
newDict[k] = structToPDF(v)
40
return pdfdoc.PDFDictionary(newDict)
41
elif type(structure) in (ListType, TupleType):
43
for elem in structure:
44
newList.append(structToPDF(elem))
45
return pdfdoc.PDFArray(newList)
49
class CIDEncoding(pdfmetrics.Encoding):
50
"""Multi-byte encoding. These are loaded from CMAP files.
52
A CMAP file is like a mini-codec. It defines the correspondence
53
between code points in the (multi-byte) input data and Character
55
# aims to do similar things to Brian Hooper's CMap class,
56
# but I could not get it working and had to rewrite.
57
# also, we should really rearrange our current encoding
58
# into a SingleByteEncoding since many of its methods
59
# should not apply here.
61
def __init__(self, name, useCache=1):
63
self._mapFileHash = None
64
self._codeSpaceRanges = []
65
self._notDefRanges = []
69
from reportlab.lib.utils import get_rl_tempdir
70
fontmapdir = get_rl_tempdir('FastCMAPS')
71
if os.path.isfile(fontmapdir + os.sep + name + '.fastmap'):
72
self.fastLoad(fontmapdir)
73
self.source = fontmapdir + os.sep + name + '.fastmap'
75
self.parseCMAPFile(name)
76
self.source = 'CMAP: ' + name
77
self.fastSave(fontmapdir)
79
self.parseCMAPFile(name)
81
def _hash(self, text):
84
return hasher.digest()
86
def parseCMAPFile(self, name):
87
"""This is a tricky one as CMAP files are Postscript
88
ones. Some refer to others with a 'usecmap'
90
started = time.clock()
91
cmapfile = findCMapFile(name)
92
# this will CRAWL with the unicode encodings...
93
rawdata = open(cmapfile, 'r').read()
95
self._mapFileHash = self._hash(rawdata)
96
#if it contains the token 'usecmap', parse the other
98
usecmap_pos = find(rawdata, 'usecmap')
100
#they tell us to look in another file
101
#for the code space ranges. The one
102
# to use will be the previous word.
103
chunk = rawdata[0:usecmap_pos]
105
otherCMAPName = words[-1]
106
#print 'referred to another CMAP %s' % otherCMAPName
107
self.parseCMAPFile(otherCMAPName)
108
# now continue parsing this, as it may
109
# override some settings
112
words = split(rawdata)
114
if words[0] == 'begincodespacerange':
116
while words[0] <> 'endcodespacerange':
117
strStart, strEnd, words = words[0], words[1], words[2:]
118
start = int(strStart[1:-1], 16)
119
end = int(strEnd[1:-1], 16)
120
self._codeSpaceRanges.append((start, end),)
121
elif words[0] == 'beginnotdefrange':
123
while words[0] <> 'endnotdefrange':
124
strStart, strEnd, strValue = words[0:3]
125
start = int(strStart[1:-1], 16)
126
end = int(strEnd[1:-1], 16)
127
value = int(strValue)
128
self._notDefRanges.append((start, end, value),)
130
elif words[0] == 'begincidrange':
132
while words[0] <> 'endcidrange':
133
strStart, strEnd, strValue = words[0:3]
134
start = int(strStart[1:-1], 16)
135
end = int(strEnd[1:-1], 16)
136
value = int(strValue)
137
# this means that 'start' corresponds to 'value',
138
# start+1 corresponds to value+1 and so on up
141
while start + offset <= end:
142
self._cmap[start + offset] = value + offset
148
finished = time.clock()
149
print 'parsed CMAP %s in %0.4f seconds' % (self.name, finished - started)
151
def translate(self, text):
152
"Convert a string into a list of CIDs"
158
#print 'convert character pair "%s"' % (lastChar + char)
159
num = ord(lastChar) * 256 + ord(char)
161
#print 'convert character "%s"' % char
165
for low, high in self._codeSpaceRanges:
169
#print '%d -> %d' % (num, cid)
171
#not defined. Try to find the appropriate
172
# notdef character, or failing that return
175
for low2, high2, notdef in self._notDefRanges:
176
if low2 < num < high2:
188
def fastSave(self, directory):
189
f = open(os.path.join(directory, self.name + '.fastmap'), 'wb')
190
marshal.dump(self._mapFileHash, f)
191
marshal.dump(self._codeSpaceRanges, f)
192
marshal.dump(self._notDefRanges, f)
193
marshal.dump(self._cmap, f)
196
def fastLoad(self, directory):
197
started = time.clock()
198
f = open(os.path.join(directory, self.name + '.fastmap'), 'rb')
199
self._mapFileHash = marshal.load(f)
200
self._codeSpaceRanges = marshal.load(f)
201
self._notDefRanges = marshal.load(f)
202
self._cmap = marshal.load(f)
204
finished = time.clock()
205
#print 'loaded %s in %0.4f seconds' % (self.name, finished - started)
208
class CIDTypeFace(pdfmetrics.TypeFace):
209
"""Multi-byte type face.
211
Conceptually similar to a single byte typeface,
212
but the glyphs are identified by a numeric Character
213
ID (CID) and not a glyph name. """
214
def __init__(self, name):
215
"""Initialised from one of the canned dictionaries in allowedEncodings
217
Or rather, it will be shortly..."""
218
pdfmetrics.TypeFace.__init__(self, name)
219
self._extractDictInfo(name)
220
def _extractDictInfo(self, name):
222
fontDict = CIDFontInfo[name]
224
raise KeyError, ("Unable to find information on CID typeface '%s'" % name +
225
"Only the following font names work:" + repr(allowedTypeFaces)
227
descFont = fontDict['DescendantFonts'][0]
228
self.ascent = descFont['FontDescriptor']['Ascent']
229
self.descent = descFont['FontDescriptor']['Descent']
230
self._defaultWidth = descFont['DW']
231
self._explicitWidths = self._expandWidths(descFont['W'])
233
# should really support self.glyphWidths, self.glyphNames
237
def _expandWidths(self, compactWidthArray):
238
"""Expands Adobe nested list structure to get a dictionary of widths.
240
Here is an example of such a structure.
242
# starting at character ID 1, next n characters have the widths given.
243
1, (277,305,500,668,668,906,727,305,445,445,508,668,305,379,305,539),
244
# all Characters from ID 17 to 26 are 668 em units wide
246
27, (305, 305, 668, 668, 668, 566, 871, 727, 637, 652, 699, 574, 555,
247
676, 687, 242, 492, 664, 582, 789, 707, 734, 582, 734, 605, 605,
248
641, 668, 727, 945, 609, 609, 574, 445, 668, 445, 668, 668, 590,
249
555, 609, 547, 602, 574, 391, 609, 582, 234, 277, 539, 234, 895,
250
582, 605, 602, 602, 387, 508, 441, 582, 562, 781, 531, 570, 555,
252
# these must be half width katakana and the like.
256
data = compactWidthArray[:]
259
start, data = data[0], data[1:]
260
if type(data[0]) in (ListType, TupleType):
261
items, data = data[0], data[1:]
262
for offset in range(len(items)):
263
widths[start + offset] = items[offset]
265
end, width, data = data[0], data[1], data[2:]
266
for idx in range(start, end+1):
270
def getCharWidth(self, characterId):
271
return self._explicitWidths.get(characterId, self._defaultWidth)
273
class CIDFont(pdfmetrics.Font):
274
"Represents a built-in multi-byte font"
275
def __init__(self, face, encoding):
278
assert face in allowedTypeFaces, "TypeFace '%s' not supported! Use any of these instead: %s" % (face, allowedTypeFaces)
280
#should cache in registry...
281
self.face = CIDTypeFace(face)
283
assert encoding in allowedEncodings, "Encoding '%s' not supported! Use any of these instead: %s" % (encoding, allowedEncodings)
284
self.encodingName = encoding
285
self.encoding = CIDEncoding(encoding)
287
#legacy hack doing quick cut and paste.
288
self.fontName = self.faceName + '-' + self.encodingName
289
self.name = self.fontName
291
# need to know if it is vertical or horizontal
292
self.isVertical = (self.encodingName[-1] == 'V')
295
def stringWidth(self, text, size):
296
cidlist = self.encoding.translate(text)
298
#this part is "not checked!" but seems to work.
299
#assume each is 1000 ems high
300
return len(cidlist) * size
304
w = w + self.face.getCharWidth(cid)
305
return 0.001 * w * size
308
def addObjects(self, doc):
309
"""The explicit code in addMinchoObjects and addGothicObjects
310
will be replaced by something that pulls the data from
311
_cidfontdata.py in the next few days."""
312
internalName = 'F' + repr(len(doc.fontMapping)+1)
314
bigDict = CIDFontInfo[self.face.name]
315
bigDict['Name'] = '/' + internalName
316
bigDict['Encoding'] = '/' + self.encodingName
318
#convert to PDF dictionary/array objects
319
cidObj = structToPDF(bigDict)
321
# link into document, and add to font map
322
r = doc.Reference(cidObj, internalName)
323
fontDict = doc.idToObject['BasicFonts'].dict
324
fontDict[internalName] = r
325
doc.fontMapping[self.name] = '/' + internalName
329
def precalculate(cmapdir):
330
# crunches through all, making 'fastmap' files
332
files = os.listdir(cmapdir)
334
if os.path.isfile(cmapdir + os.sep + self.name + '.fastmap'):
337
enc = CIDEncoding(file)
339
print 'cannot parse %s, skipping' % enc
341
enc.fastSave(cmapdir)
342
print 'saved %s.fastmap' % file
345
# only works if you have cirrect encodings on your box!
346
c = Canvas('test_japanese.pdf')
347
c.setFont('Helvetica', 30)
348
c.drawString(100,700, 'Japanese Font Support')
350
pdfmetrics.registerFont(CIDFont('HeiseiMin-W3','90ms-RKSJ-H'))
351
pdfmetrics.registerFont(CIDFont('HeiseiKakuGo-W5','90ms-RKSJ-H'))
355
c.setFont('HeiseiMin-W3-90ms-RKSJ-H', 16)
356
# this says "This is HeiseiMincho" in shift-JIS. Not all our readers
357
# have a Japanese PC, so I escaped it. On a Japanese-capable
358
# system, print the string to see Kanji
359
message1 = '\202\261\202\352\202\315\225\275\220\254\226\276\222\251\202\305\202\267\201B'
360
c.drawString(100, 675, message1)
362
print 'saved test_japanese.pdf'
365
## print 'CMAP_DIR = ', CMAP_DIR
366
## tf1 = CIDTypeFace('HeiseiMin-W3')
367
## print 'ascent = ',tf1.ascent
368
## print 'descent = ',tf1.descent
369
## for cid in [1,2,3,4,5,18,19,28,231,1742]:
370
## print 'width of cid %d = %d' % (cid, tf1.getCharWidth(cid))
372
encName = '90ms-RKSJ-H'
373
enc = CIDEncoding(encName)
374
print message1, '->', enc.translate(message1)
376
f = CIDFont('HeiseiMin-W3','90ms-RKSJ-H')
377
print 'width = %0.2f' % f.stringWidth(message1, 10)
380
#testing all encodings
382
## started = time.time()
384
## for encName in _cidfontdata.allowedEncodings:
385
## #encName = '90ms-RKSJ-H'
386
## enc = CIDEncoding(encName)
387
## print 'encoding %s:' % encName
388
## print ' codeSpaceRanges = %s' % enc._codeSpaceRanges
389
## print ' notDefRanges = %s' % enc._notDefRanges
390
## print ' mapping size = %d' % len(enc._cmap)
391
## finished = time.time()
392
## print 'constructed all encodings in %0.2f seconds' % (finished - started)
394
if __name__=='__main__':