1
#Copyright ReportLab Europe Ltd. 2000-2004
2
#see license.txt for license details
3
#history http://www.reportlab.co.uk/cgi-bin/viewcvs.cgi/public/reportlab/trunk/reportlab/pdfbase/pdfutils.py
4
__version__=''' $Id$ '''
6
# pdfutils.py - everything to do with images, streams,
7
# compression, and some constants
10
from reportlab import rl_config
11
from string import join, replace, strip, split
12
from reportlab.lib.utils import getStringIO, ImageReader
16
def _chunker(src,dst=[],chunkSize=60):
17
for i in xrange(0,len(src),chunkSize):
18
dst.append(src[i:i+chunkSize])
21
##########################################################
23
# Image compression helpers. Preprocessing a directory
24
# of images will offer a vast speedup.
26
##########################################################
28
def cacheImageFile(filename, returnInMemory=0, IMG=None):
29
"Processes image as if for encoding, saves to a file with .a85 extension."
31
from reportlab.lib.utils import open_for_read
34
cachedname = os.path.splitext(filename)[0] + '.a85'
35
if filename==cachedname:
36
if cachedImageExists(filename):
37
if returnInMemory: return split(open_for_read(cachedname).read(),LINEEND)[:-1]
39
raise IOError, 'No such cached image %s' % filename
41
img = ImageReader(filename)
42
if IMG is not None: IMG.append(img)
44
imgwidth, imgheight = img.getSize()
45
raw = img.getRGBData()
48
# this describes what is in the image itself
50
code.append('/W %s /H %s /BPC 8 /CS /RGB /F [/A85 /Fl]' % (imgwidth, imgheight))
52
#use a flate filter and Ascii Base 85
53
assert(len(raw) == imgwidth * imgheight, "Wrong amount of data for image")
54
compressed = zlib.compress(raw) #this bit is very fast...
55
encoded = _AsciiBase85Encode(compressed) #...sadly this may not be
57
#append in blocks of 60 characters
58
_chunker(encoded,code)
61
if returnInMemory: return code
64
f = open(cachedname,'wb')
65
f.write(join(code, LINEEND)+LINEEND)
68
print 'cached image as %s' % cachedname
71
def preProcessImages(spec):
72
"""Preprocesses one or more image files.
74
Accepts either a filespec ('C:\mydir\*.jpg') or a list
75
of image filenames, crunches them all to save time. Run this
76
to save huge amounts of time when repeatedly building image
81
if type(spec) is types.StringType:
82
filelist = glob.glob(spec)
83
else: #list or tuple OK
86
for filename in filelist:
87
if cachedImageExists(filename):
89
print 'cached version of %s already exists' % filename
91
cacheImageFile(filename)
94
def cachedImageExists(filename):
95
"""Determines if a cached image already exists for a given file.
97
Determines if a cached image exists which has the same name
98
and equal or newer date to the given file."""
99
cachedname = os.path.splitext(filename)[0] + '.a85'
100
if os.path.isfile(cachedname):
102
original_date = os.stat(filename)[8]
103
cached_date = os.stat(cachedname)[8]
104
if original_date > cached_date:
112
##############################################################
114
# PDF Helper functions
116
##############################################################
119
from _rl_accel import escapePDF, _instanceEscapePDF
123
from reportlab.lib._rl_accel import escapePDF, _instanceEscapePDF
126
_instanceEscapePDF=None
127
if rl_config.sys_version>='2.1':
129
for c in range(0,256):
131
_ESCAPEDICT[chr(c)]= '\\%03o' % c
132
elif c in (ord('\\'),ord('('),ord(')')):
133
_ESCAPEDICT[chr(c)] = '\\'+chr(c)
135
_ESCAPEDICT[chr(c)] = chr(c)
137
#Michael Hudson donated this
139
return join(map(lambda c, d=_ESCAPEDICT: d[c],s),'')
142
"""Escapes some PDF symbols (in fact, parenthesis).
143
PDF escapes are almost like Python ones, but brackets
144
need slashes before them too. Uses Python's repr function
145
and chops off the quotes first."""
147
s = replace(s, '(','\(')
148
s = replace(s, ')','\)')
151
def _normalizeLineEnds(text,desired=LINEEND):
152
"""Normalizes different line end character(s).
154
Ensures all instances of CR, LF and CRLF end up as
155
the specified one."""
156
unlikely = '\000\001\002\003'
157
text = replace(text, '\015\012', unlikely)
158
text = replace(text, '\015', unlikely)
159
text = replace(text, '\012', unlikely)
160
text = replace(text, unlikely, desired)
164
def _AsciiHexEncode(input):
165
"""Encodes input using ASCII-Hex coding.
167
This is a verbose encoding used for binary data within
168
a PDF file. One byte binary becomes two bytes of ASCII.
169
Helper function used by images."""
170
output = getStringIO()
172
output.write('%02x' % ord(char))
174
return output.getvalue()
177
def _AsciiHexDecode(input):
178
"""Decodes input using ASCII-Hex coding.
180
Not used except to provide a test of the inverse function."""
182
#strip out all whitespace
183
stripped = join(split(input),'')
184
assert stripped[-1] == '>', 'Invalid terminator for Ascii Hex Stream'
185
stripped = stripped[:-1] #chop off terminator
186
assert len(stripped) % 2 == 0, 'Ascii Hex stream has odd number of bytes'
189
output = getStringIO()
190
while i < len(stripped):
191
twobytes = stripped[i:i+2]
192
output.write(chr(eval('0x'+twobytes)))
194
return output.getvalue()
197
if 1: # for testing always define this
198
def _AsciiBase85EncodePYTHON(input):
199
"""Encodes input using ASCII-Base85 coding.
201
This is a compact encoding used for binary data within
202
a PDF file. Four bytes of binary data become five bytes of
203
ASCII. This is the default method used for encoding images."""
204
outstream = getStringIO()
205
# special rules apply if not a multiple of four bytes.
206
whole_word_count, remainder_size = divmod(len(input), 4)
207
cut = 4 * whole_word_count
208
body, lastbit = input[0:cut], input[cut:]
210
for i in range(whole_word_count):
212
b1 = ord(body[offset])
213
b2 = ord(body[offset+1])
214
b3 = ord(body[offset+2])
215
b4 = ord(body[offset+3])
218
num = (((((b1<<8)|b2)<<8)|b3)<<8)|b4
220
num = 16777216L * b1 + 65536 * b2 + 256 * b3 + b4
226
#solve for five base-85 numbers
227
temp, c5 = divmod(num, 85)
228
temp, c4 = divmod(temp, 85)
229
temp, c3 = divmod(temp, 85)
230
c1, c2 = divmod(temp, 85)
231
assert ((85**4) * c1) + ((85**3) * c2) + ((85**2) * c3) + (85*c4) + c5 == num, 'dodgy code!'
232
outstream.write(chr(c1+33))
233
outstream.write(chr(c2+33))
234
outstream.write(chr(c3+33))
235
outstream.write(chr(c4+33))
236
outstream.write(chr(c5+33))
238
# now we do the final bit at the end. I repeated this separately as
239
# the loop above is the time-critical part of a script, whereas this
240
# happens only once at the end.
242
#encode however many bytes we have as usual
243
if remainder_size > 0:
244
while len(lastbit) < 4:
245
lastbit = lastbit + '\000'
251
num = 16777216L * b1 + 65536 * b2 + 256 * b3 + b4
254
temp, c5 = divmod(num, 85)
255
temp, c4 = divmod(temp, 85)
256
temp, c3 = divmod(temp, 85)
257
c1, c2 = divmod(temp, 85)
259
#print 'encoding: %d %d %d %d -> %d -> %d %d %d %d %d' % (
260
# b1,b2,b3,b4,num,c1,c2,c3,c4,c5)
261
lastword = chr(c1+33) + chr(c2+33) + chr(c3+33) + chr(c4+33) + chr(c5+33)
262
#write out most of the bytes.
263
outstream.write(lastword[0:remainder_size + 1])
265
#terminator code for ascii 85
266
outstream.write('~>')
267
return outstream.getvalue()
269
def _AsciiBase85DecodePYTHON(input):
270
"""Decodes input using ASCII-Base85 coding.
272
This is not used - Acrobat Reader decodes for you
273
- but a round trip is essential for testing."""
274
outstream = getStringIO()
275
#strip all whitespace
276
stripped = join(split(input),'')
278
assert stripped[-2:] == '~>', 'Invalid terminator for Ascii Base 85 Stream'
279
stripped = stripped[:-2] #chop off terminator
281
#may have 'z' in it which complicates matters - expand them
282
stripped = replace(stripped,'z','!!!!!')
283
# special rules apply if not a multiple of five bytes.
284
whole_word_count, remainder_size = divmod(len(stripped), 5)
285
#print '%d words, %d leftover' % (whole_word_count, remainder_size)
286
#assert remainder_size <> 1, 'invalid Ascii 85 stream!'
287
cut = 5 * whole_word_count
288
body, lastbit = stripped[0:cut], stripped[cut:]
290
for i in range(whole_word_count):
292
c1 = ord(body[offset]) - 33
293
c2 = ord(body[offset+1]) - 33
294
c3 = ord(body[offset+2]) - 33
295
c4 = ord(body[offset+3]) - 33
296
c5 = ord(body[offset+4]) - 33
298
num = ((85L**4) * c1) + ((85**3) * c2) + ((85**2) * c3) + (85*c4) + c5
300
temp, b4 = divmod(num,256)
301
temp, b3 = divmod(temp,256)
302
b1, b2 = divmod(temp, 256)
304
assert num == 16777216 * b1 + 65536 * b2 + 256 * b3 + b4, 'dodgy code!'
305
outstream.write(chr(b1))
306
outstream.write(chr(b2))
307
outstream.write(chr(b3))
308
outstream.write(chr(b4))
310
#decode however many bytes we have as usual
311
if remainder_size > 0:
312
while len(lastbit) < 5:
313
lastbit = lastbit + '!'
314
c1 = ord(lastbit[0]) - 33
315
c2 = ord(lastbit[1]) - 33
316
c3 = ord(lastbit[2]) - 33
317
c4 = ord(lastbit[3]) - 33
318
c5 = ord(lastbit[4]) - 33
319
num = (((85*c1+c2)*85+c3)*85+c4)*85L + (c5
320
+(0,0,0xFFFFFF,0xFFFF,0xFF)[remainder_size])
321
temp, b4 = divmod(num,256)
322
temp, b3 = divmod(temp,256)
323
b1, b2 = divmod(temp, 256)
324
assert num == 16777216 * b1 + 65536 * b2 + 256 * b3 + b4, 'dodgy code!'
325
#print 'decoding: %d %d %d %d %d -> %d -> %d %d %d %d' % (
326
# c1,c2,c3,c4,c5,num,b1,b2,b3,b4)
328
#the last character needs 1 adding; the encoding loses
329
#data by rounding the number to x bytes, and when
330
#divided repeatedly we get one less
331
if remainder_size == 2:
333
elif remainder_size == 3:
334
lastword = chr(b1) + chr(b2)
335
elif remainder_size == 4:
336
lastword = chr(b1) + chr(b2) + chr(b3)
339
outstream.write(lastword)
341
#terminator code for ascii 85
342
return outstream.getvalue()
345
from _rl_accel import _AsciiBase85Encode # builtin or on the path
348
from reportlab.lib._rl_accel import _AsciiBase85Encode # where we think it should be
350
_AsciiBase85Encode = _AsciiBase85EncodePYTHON
353
from _rl_accel import _AsciiBase85Decode # builtin or on the path
356
from reportlab.lib._rl_accel import _AsciiBase85Decode # where we think it should be
358
_AsciiBase85Decode = _AsciiBase85DecodePYTHON
360
def _wrap(input, columns=60):
361
"Wraps input at a given column size by inserting LINEEND characters."
368
output.append(input[pos:pos+columns])
372
return join(output, LINEEND)
375
#########################################################################
377
# JPEG processing code - contributed by Eric Johnson
379
#########################################################################
381
# Read data from the JPEG file. We should probably be using PIL to
382
# get this information for us -- but this way is more fun!
383
# Returns (width, height, color components) as a triple
384
# This is based on Thomas Merz's code from GhostScript (viewjpeg.ps)
385
def readJPEGInfo(image):
386
"Read width, height and number of components from open JPEG file."
390
#Acceptable JPEG Markers:
391
# SROF0=baseline, SOF1=extended sequential or SOF2=progressive
392
validMarkers = [0xC0, 0xC1, 0xC2]
394
#JPEG markers without additional parameters
396
[ 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 0xD8, 0x01 ]
398
#Unsupported JPEG Markers
399
unsupportedMarkers = \
400
[ 0xC3, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xCB, 0xCD, 0xCE, 0xCF ]
402
#read JPEG marker segments until we find SOFn marker or EOF
405
x = struct.unpack('B', image.read(1))
406
if x[0] == 0xFF: #found marker
407
x = struct.unpack('B', image.read(1))
408
#print "Marker: ", '%0.2x' % x[0]
409
#check marker type is acceptable and process it
410
if x[0] in validMarkers:
411
image.seek(2, 1) #skip segment length
412
x = struct.unpack('B', image.read(1)) #data precision
414
raise 'PDFError', ' JPEG must have 8 bits per component'
415
y = struct.unpack('BB', image.read(2))
416
height = (y[0] << 8) + y[1]
417
y = struct.unpack('BB', image.read(2))
418
width = (y[0] << 8) + y[1]
419
y = struct.unpack('B', image.read(1))
421
return width, height, color
423
elif x[0] in unsupportedMarkers:
424
raise 'PDFError', ' Unsupported JPEG marker: %0.2x' % x[0]
425
elif x[0] not in noParamMarkers:
426
#skip segments with parameters
427
#read length and skip the data
428
x = struct.unpack('BB', image.read(2))
429
image.seek( (x[0] << 8) + x[1] - 2, 1)
432
def __init__(self,k, n):
433
assert k, 'Argument k should be a non empty string'
436
self._n = int(n) or 7
439
return self.__rotate(_AsciiBase85Encode(''.join(map(chr,self.__fusc(map(ord,s))))),self._n)
442
return ''.join(map(chr,self.__fusc(map(ord,_AsciiBase85Decode(self.__rotate(s,-self._n))))))
444
def __rotate(self,s,n):
449
return s[-n:]+s[:l-n]
453
return map(lambda x,y: x ^ y,s,map(ord,((int(slen/self._klen)+1)*self._k)[:slen]))