1
#Copyright ReportLab Europe Ltd. 2000-2004
2
#see license.txt for license details
3
#history http://www.reportlab.co.uk/cgi-bin/viewcvs.cgi/public/reportlab/trunk/reportlab/pdfbase/pdfdoc.py
4
__version__=''' $Id$ '''
6
The module pdfdoc.py handles the 'outer structure' of PDF documents, ensuring that
7
all objects are properly cross-referenced and indexed to the nearest byte. The
8
'inner structure' - the page descriptions - are presumed to be generated before
10
pdfgen.py calls this and provides a 'canvas' object to handle page marking operators.
11
piddlePDF calls pdfgen and offers a high-level interface.
13
The classes within this generally mirror structures in the PDF file
14
and are not part of any public interface. Instead, canvas and font
15
classes are made available elsewhere for users to manipulate.
19
from reportlab.pdfbase import pdfutils
20
from reportlab.pdfbase.pdfutils import LINEEND # this constant needed in both
21
from reportlab import rl_config
22
from reportlab.lib.utils import import_zlib, open_for_read, fp_str
24
from sys import platform
26
from sys import version_info
28
# may be inaccurate but will at least
29
#work in anything which seeks to format
30
# version_info into a string
31
version_info = (1,5,2,'unknown',0)
33
if platform[:4] == 'java' and version_info[:2] == (2, 1):
34
# workaround for list()-bug in Jython 2.1 (should be fixed in 2.2)
38
return map(f, sequence)
40
class PDFError(Exception):
44
# set this flag to get more vertical whitespace (and larger files)
46
##if LongFormat: (doesn't work)
49
## LINEEND = "\n" # no wasteful carriage returns!
51
# __InternalName__ is a special attribute that can only be set by the Document arbitrator
52
__InternalName__ = "__InternalName__"
54
# __RefOnly__ marks reference only elements that must be formatted on top level
55
__RefOnly__ = "__RefOnly__"
57
# __Comment__ provides a (one line) comment to inline with an object ref, if present
58
# if it is more than one line then percentize it...
59
__Comment__ = "__Comment__"
61
# If DoComments is set then add helpful (space wasting) comment lines to PDF files
66
# name for standard font dictionary
67
BasicFonts = "BasicFonts"
69
# name for the pages object
76
LINEENDDICT = {"LINEEND": LINEEND, "PERCENT": "%"}
78
def format(element, document, toplevel=0):
79
"""Indirection step for formatting.
80
Ensures that document parameters alter behaviour
81
of formatting for all elements.
83
from types import InstanceType, FloatType, IntType
84
if type(element) is InstanceType:
85
if not toplevel and hasattr(element, __RefOnly__):
86
# the object cannot be a component at non top level.
87
# make a reference to it and return it's format
88
R = document.Reference(element)
89
return R.format(document)
94
raise AttributeError, "%s has no format operation" % element
96
if not rl_config.invariant and DoComments and hasattr(element, __Comment__):
97
f = "%s%s%s%s" % ("% ", element.__Comment__, LINEEND, f)
99
elif type(element) in (FloatType, IntType):
100
#use a controlled number formatting routine
101
#instead of str, so Jython/Python etc do not differ
102
return fp_str(element)
106
def indent(s, IND=LINEEND+" "):
107
return string.replace(s, LINEEND, IND)
109
def xObjectName(externalname):
110
return "FormXob.%s" % externalname
112
# backwards compatibility
113
formName = xObjectName
119
"encode a string, stream, text"
121
def prepare(self, document):
122
# get ready to do encryption
124
def register(self, objnum, version):
125
# enter a new direct object
128
# the representation of self in file if any (should be None or PDFDict)
132
"used to bypass encryption when required"
133
encrypt = NoEncryption()
135
### the global document structure manager
141
# set this to define filters
142
defaultStreamFilters = None
143
encrypt = NoEncryption() # default no encryption
146
encoding=rl_config.defaultEncoding,
148
compression=rl_config.pageCompression,
149
invariant=rl_config.invariant):
150
#self.defaultStreamFilters = [PDFBase85Encode, PDFZCompress] # for testing!
151
#self.defaultStreamFilters = [PDFZCompress] # for testing!
152
assert encoding in ['MacRomanEncoding',
155
'WinAnsi'], 'Unsupported encoding %s' % encoding
156
if encoding[-8:] <> 'Encoding':
157
encoding = encoding + 'Encoding'
159
# allow None value to be passed in to mean 'give system defaults'
160
if invariant is None:
161
self.invariant = rl_config.invariant
163
self.invariant = invariant
164
self.setCompression(compression)
165
self.encoding = encoding
166
# signature for creating PDF ID
168
sig = self.signature = md5.new()
169
sig.update("a reportlab document")
170
if not self.invariant:
171
cat = _getTimeStamp()
174
sig.update(repr(cat)) # initialize with timestamp digest
175
# mapping of internal identifier ("Page001") to PDF objectnumber and generation number (34, 0)
176
self.idToObjectNumberAndVersion = {}
177
# mapping of internal identifier ("Page001") to PDF object (PDFPage instance)
179
# internal id to file location
183
cat = self.Catalog = self._catalog = PDFCatalog()
184
pages = self.Pages = PDFPages()
187
outlines = PDFOutlines0()
189
outlines = PDFOutlines()
190
self.Outlines = self.outline = outlines
191
cat.Outlines = outlines
192
self.info = PDFInfo()
193
self.info.invariant = self.invariant
194
#self.Reference(self.Catalog)
195
#self.Reference(self.Info)
196
self.fontMapping = {}
197
#make an empty font dictionary
198
DD = PDFDictionary({})
199
DD.__Comment__ = "The standard fonts dictionary"
200
DDR = self.Reference(DD, BasicFonts)
201
self.delayedFonts = []
203
def setCompression(self, onoff):
204
# XXX: maybe this should also set self.defaultStreamFilters?
205
self.compression = onoff
207
def updateSignature(self, thing):
208
"add information to the signature"
209
if self._ID: return # but not if its used already!
210
self.signature.update(str(thing))
213
"A unique fingerprint for the file (unless in invariant mode)"
216
digest = self.signature.digest()
218
ID = PDFString(digest)
220
self._ID = "%s %% ReportLab generated PDF document -- digest (http://www.reportlab.com) %s [%s %s] %s" % (
221
LINEEND, LINEEND, IDs, IDs, LINEEND)
224
def SaveToFile(self, filename, canvas):
225
if callable(getattr(filename, "write",None)):
228
filename = str(getattr(filename,'name',''))
231
filename = str(filename)
232
f = open(filename, "wb")
233
f.write(self.GetPDFData(canvas))
238
from reportlab.lib.utils import markfilename
239
markfilename(filename) # do platform specific file junk
240
if getattr(canvas,'_verbosity',None): print 'saved', filename
242
def GetPDFData(self, canvas):
243
# realize delayed fonts
244
for fnt in self.delayedFonts:
246
# add info stuff to signature
247
self.info.invariant = self.invariant
248
self.info.digest(self.signature)
249
### later: maybe add more info to sig?
251
self.Reference(self.Catalog)
252
self.Reference(self.info)
253
outline = self.outline
254
outline.prepare(self, canvas)
258
"""specify the current object as a page (enables reference binding and other page features)"""
259
if self.inObject is not None:
260
if self.inObject=="page": return
261
raise ValueError, "can't go in page already in object %s" % self.inObject
262
self.inObject = "page"
265
"""specify that we are in a form xobject (disable page features, etc)"""
266
# don't need this check anymore since going in a form pushes old context at canvas level.
267
#if self.inObject not in ["form", None]:
268
# raise ValueError, "can't go in form already in object %s" % self.inObject
269
self.inObject = "form"
270
# don't need to do anything else, I think...
272
def getInternalFontName(self, psfontname):
273
fm = self.fontMapping
274
if fm.has_key(psfontname):
275
return fm[psfontname]
278
# does pdfmetrics know about it? if so, add
279
from reportlab.pdfbase import pdfmetrics
280
fontObj = pdfmetrics.getFont(psfontname)
281
if getattr(fontObj, '_dynamicFont', 0):
282
raise PDFError, "getInternalFontName(%s) called for a dynamic font" % repr(psfontname)
283
fontObj.addObjects(self)
284
#self.addFont(fontObj)
285
return fm[psfontname]
287
raise PDFError, "Font %s not known!" % repr(psfontname)
289
def thisPageName(self):
290
return "Page"+repr(self.pageCounter)
292
def thisPageRef(self):
293
return PDFObjectReference(self.thisPageName())
295
def addPage(self, page):
296
name = self.thisPageName()
297
self.Reference(page, name)
298
self.Pages.addPage(page)
299
self.pageCounter = self.pageCounter+1
303
def addForm(self, name, form):
304
"""add a Form XObject."""
305
# XXX should check that name is a legal PDF name
306
if self.inObject != "form":
308
self.Reference(form, xObjectName(name))
311
def annotationName(self, externalname):
312
return "Annot.%s"%externalname
314
def addAnnotation(self, name, annotation):
315
self.Reference(annotation, self.annotationName(name))
317
def refAnnotation(self, name):
318
internalname = self.annotationName(name)
319
return PDFObjectReference(internalname)
321
def setTitle(self, title):
323
self.info.title = title
325
def setAuthor(self, author):
326
"embedded in PDF file"
327
self.info.author = author
329
def setSubject(self, subject):
331
self.info.subject = subject
333
def getAvailableFonts(self):
334
fontnames = self.fontMapping.keys()
335
# the standard 14 are also always available! (even if not initialized yet)
337
for name in _fontdata.standardFonts:
338
if name not in fontnames:
339
fontnames.append(name)
346
# register the Catalog/INfo and then format the objects one by one until exhausted
347
# (possible infinite loop if there is a bug that continually makes new objects/refs...)
349
self.encrypt.prepare(self)
352
self.Reference(self.Catalog)
353
self.Reference(self.info)
354
# register the encryption dictionary if present
356
encryptinfo = self.encrypt.info()
358
encryptref = self.Reference(encryptinfo)
359
# make std fonts (this could be made optional
360
counter = 0 # start at first object (object 1 after preincrement)
361
ids = [] # the collection of object ids in object number order
362
numbertoid = self.numberToId
363
idToNV = self.idToObjectNumberAndVersion
364
idToOb = self.idToObject
365
idToOf = self.idToOffset
366
### note that new entries may be "appended" DURING FORMATTING
368
File = PDFFile() # output collector
370
counter = counter+1 # do next object...
371
if numbertoid.has_key(counter):
372
id = numbertoid[counter]
375
IO = PDFIndirectObject(id, obj)
376
# register object number and version
377
#encrypt.register(id,
378
IOf = IO.format(self)
379
# add a comment to the PDF output
380
if not rl_config.invariant and DoComments:
382
classname = obj.__class__.__name__
384
classname = repr(obj)
385
File.add("%% %s: class %s %s" % (repr(id), classname[:50], LINEEND))
386
offset = File.add(IOf)
391
# sanity checks (must happen AFTER formatting)
392
lno = len(numbertoid)
394
raise ValueError, "counter %s doesn't match number to id dictionary %s" %(counter, lno)
396
xref = PDFCrossReferenceTable()
397
xref.addsection(0, ids)
398
xreff = xref.format(self)
399
xrefoffset = File.add(xreff)
400
# now add the trailer
401
trailer = PDFTrailer(
402
startxref = xrefoffset,
404
Root = self.Reference(cat),
405
Info = self.Reference(info),
406
Encrypt = encryptref,
409
trailerf = trailer.format(self)
411
# return string format for pdf file
412
return File.format(self)
414
def hasForm(self, name):
415
"""test for existence of named form"""
416
internalname = xObjectName(name)
417
return self.idToObject.has_key(internalname)
419
def getFormBBox(self, name):
420
"get the declared bounding box of the form as a list"
421
internalname = xObjectName(name)
422
if self.idToObject.has_key(internalname):
423
theform = self.idToObject[internalname]
424
if isinstance(theform, PDFFormXObject):
425
# internally defined form
426
return theform.BBoxList()
427
elif isinstance(theform, PDFStream):
428
# externally defined form
429
return list(theform.dictionary.dict["BBox"].sequence)
431
raise ValueError, "I don't understand the form instance %s" % repr(name)
433
def getXObjectName(self, name):
434
"""Lets canvas find out what form is called internally.
435
Never mind whether it is defined yet or not."""
436
return xObjectName(name)
438
def xobjDict(self, formnames):
439
"""construct an xobject dict (for inclusion in a resource dict, usually)
440
from a list of form names (images not yet supported)"""
442
for name in formnames:
443
internalname = xObjectName(name)
444
reference = PDFObjectReference(internalname)
445
D[internalname] = reference
446
#print "xobjDict D", D
447
return PDFDictionary(D)
449
def Reference(self, object, name=None):
450
### note references may "grow" during the final formatting pass: don't use d.keys()!
451
# don't make references to other references, or non instances, unless they are named!
452
from types import InstanceType
453
#print"object type is ", type(object)
455
idToObject = self.idToObject
456
if name is None and (
457
(tob is not InstanceType) or (tob is InstanceType and object.__class__ is PDFObjectReference)):
459
if hasattr(object, __InternalName__):
461
intname = object.__InternalName__
462
if name is not None and name!=intname:
463
raise ValueError, "attempt to reregister object %s with new name %s" % (
464
repr(intname), repr(name))
465
if not idToObject.has_key(intname):
466
raise ValueError, "object named but not registered"
467
return PDFObjectReference(intname)
468
# otherwise register the new object
469
objectcounter = self.objectcounter = self.objectcounter+1
471
name = "R"+repr(objectcounter)
472
if idToObject.has_key(name):
473
other = idToObject[name]
475
raise ValueError, "redefining named object: "+repr(name)
476
return PDFObjectReference(name)
477
if tob is InstanceType:
478
object.__InternalName__ = name
479
#print "name", name, "counter", objectcounter
480
self.idToObjectNumberAndVersion[name] = (objectcounter, 0)
481
self.numberToId[objectcounter] = name
482
idToObject[name] = object
483
return PDFObjectReference(name)
485
### chapter 4 Objects
492
def __init__(self, t):
494
def format(self, document):
496
t = document.encrypt.encode(t)
498
for i in range(len(L)):
502
h2 = h[2:] # nuke the 0x
506
result = string.join(L, "")
507
return "<%s>" % result
509
dummydoc = DummyDoc()
510
return self.format(dummydoc)
516
def __init__(self, str):
517
# might need to change this to class for encryption
519
def format(self, document):
520
s = document.encrypt.encode(self.s)
522
return "(%s)" % pdfutils._escape(s)
524
raise ValueError, "cannot escape %s %s" %(s, repr(s))
526
return "(%s)" % pdfutils._escape(self.s)
529
# might need to change this to class for encryption
530
# NOTE: RESULT MUST ALWAYS SUPPORT MEANINGFUL COMPARISONS (EQUALITY) AND HASH
531
# first convert the name
534
for thischar in data:
535
if 0x21<=ord(thischar)<=0x7e and thischar not in "%()<>{}[]#":
538
hexord = hex(ord(thischar))[2:] # forget the 0x thing...
539
ldata[index] = "#"+hexord
541
data = string.join(ldata, "")
546
multiline = LongFormat
547
def __init__(self, dict=None):
548
"""dict should be namestring to value eg "a": 122 NOT pdfname to value NOT "/a":122"""
552
self.dict = dict.copy()
553
def __setitem__(self, name, value):
554
self.dict[name] = value
555
def Reference(name, document):
557
self.dict[name] = document.Reference(ob)
558
def format(self, document):
566
fv = format(v, document)
567
fk = format(PDFName(k), document)
573
Lj = string.join(L, LINEEND)
577
# break up every 6 elements anyway
578
for i in range(6, len(Lj), 6):
580
Lj = string.join(L, " ")
581
return "<< %s >>" % Lj
583
# stream filters are objects to support round trip and
584
# possibly in the future also support parameters
585
class PDFStreamFilterZCompress:
586
pdfname = "FlateDecode"
587
def encode(self, text):
588
from reportlab.lib.utils import import_zlib
590
if not zlib: raise ImportError, "cannot z-compress zlib unavailable"
591
return zlib.compress(text)
592
def decode(self, encoded):
593
from reportlab.lib.utils import import_zlib
595
if not zlib: raise ImportError, "cannot z-decompress zlib unavailable"
596
return zlib.decompress(encoded)
598
# need only one of these, unless we implement parameters later
599
PDFZCompress = PDFStreamFilterZCompress()
601
class PDFStreamFilterBase85Encode:
602
pdfname = "ASCII85Decode"
603
def encode(self, text):
604
from pdfutils import _AsciiBase85Encode, _wrap
605
return _wrap(_AsciiBase85Encode(text))
606
def decode(self, text):
607
from pdfutils import _AsciiBase85Decode
608
return _AsciiBase85Decode(text)
610
# need only one of these too
611
PDFBase85Encode = PDFStreamFilterBase85Encode()
613
STREAMFMT = ("%(dictionary)s%(LINEEND)s" # dictionary
614
"stream" # stream keyword
615
"%(LINEEND)s" # a line end (could be just a \n)
616
"%(content)s" # the content, with no lineend
617
"endstream%(LINEEND)s" # the endstream keyword
620
'''set dictionary elements explicitly stream.dictionary[name]=value'''
621
### compression stuff not implemented yet
622
__RefOnly__ = 1 # must be at top level
623
def __init__(self, dictionary=None, content=None):
624
if dictionary is None:
625
dictionary = PDFDictionary()
626
self.dictionary = dictionary
627
self.content = content
629
def format(self, document):
630
dictionary = self.dictionary
631
# copy it for modification
632
dictionary = PDFDictionary(dictionary.dict.copy())
633
content = self.content
634
filters = self.filters
635
if self.content is None:
636
raise ValueError, "stream content not set"
638
filters = document.defaultStreamFilters
639
# only apply filters if they haven't been applied elsewhere
640
if filters is not None and not dictionary.dict.has_key("Filter"):
641
# apply filters in reverse order listed
646
#print "*****************content:"; print repr(content[:200])
647
#print "*****************filter", f.pdfname
648
content = f.encode(content)
649
fnames.insert(0, PDFName(f.pdfname))
650
#print "*****************finally:"; print content[:200]
651
#print "****** FILTERS", fnames
653
dictionary["Filter"] = PDFArray(fnames)
654
# "stream encoding is done after all filters have been applied"
655
content = document.encrypt.encode(content)
656
fc = format(content, document)
657
#print "type(content)", type(content), len(content), type(self.dictionary)
659
#if fc!=content: burp
660
# set dictionary length parameter
661
dictionary["Length"] = lc
662
fd = format(dictionary, document)
663
sdict = LINEENDDICT.copy()
664
sdict["dictionary"] = fd
665
sdict["content"] = fc
666
return STREAMFMT % sdict
668
def teststream(content=None):
671
content = teststreamcontent
672
content = string.strip(content)
673
content = string.replace(content, "\n", LINEEND) + LINEEND
676
S.filters = [PDFBase85Encode, PDFZCompress]
677
# nothing else needed...
678
S.__Comment__ = "test stream"
681
teststreamcontent = """
682
1 0 0 1 0 0 cm BT /F9 12 Tf 14.4 TL ET
684
n 72.00 72.00 432.00 648.00 re B*
687
multiline = LongFormat
688
def __init__(self, sequence):
689
self.sequence = list(sequence)
690
def References(self, document):
691
"""make all objects in sequence references"""
692
self.sequence = map(document.Reference, self.sequence)
693
def format(self, document):
694
#ssequence = map(str, self.sequence)
695
sequence = self.sequence
698
felt = format(elt, document)
699
fsequence.append(felt)
701
Lj = string.join(fsequence, LINEEND)
704
# break up every 10 elements anyway
706
breakline = LINEEND+" "
707
for i in range(10, len(Lj), 10):
708
Lj.insert(i,breakline)
712
INDIRECTOBFMT = ("%(n)s %(v)s obj%(LINEEND)s"
713
"%(content)s" "%(LINEEND)s"
714
"endobj" "%(LINEEND)s")
716
class PDFIndirectObject:
718
def __init__(self, name, content):
720
self.content = content
721
def format(self, document):
723
(n, v) = document.idToObjectNumberAndVersion[name]
724
# set encryption parameters
725
document.encrypt.register(n, v)
726
content = self.content
727
fcontent = format(content, document, toplevel=1) # yes this is at top level
728
sdict = LINEENDDICT.copy()
731
sdict["content"] = fcontent
732
return INDIRECTOBFMT % sdict
734
class PDFObjectReference:
735
def __init__(self, name):
737
def format(self, document):
740
(n, v) = document.idToObjectNumberAndVersion[name]
742
raise KeyError, "forward reference to %s not resolved upon final formatting" % repr(name)
743
return "%s %s R" % (n,v)
746
# Following Ken Lunde's advice and the PDF spec, this includes
747
# some high-order bytes. I chose the characters for Tokyo
748
# in Shift-JIS encoding, as these cannot be mistaken for
749
# any other encoding, and we'll be able to tell if something
750
# has run our PDF files through a dodgy Unicode conversion.
753
"%\223\214\213\236 ReportLab Generated PDF document http://www.reportlab.com"+LINEEND)
756
### just accumulates strings: keeps track of current offset
762
"""should be constructed as late as possible, return position where placed"""
764
self.offset = result+len(s)
765
self.strings.append(s)
767
def format(self, document):
768
strings = map(str, self.strings) # final conversion, in case of lazy objects
769
return string.join(strings, "")
771
XREFFMT = '%0.10d %0.5d n'
773
class PDFCrossReferenceSubsection:
774
def __init__(self, firstentrynumber, idsequence):
775
self.firstentrynumber = firstentrynumber
776
self.idsequence = idsequence
777
def format(self, document):
778
"""id sequence should represent contiguous object nums else error. free numbers not supported (yet)"""
779
firstentrynumber = self.firstentrynumber
780
idsequence = self.idsequence
781
entries = list(idsequence)
782
nentries = len(idsequence)
783
# special case: object number 0 is always free
785
if firstentrynumber==0:
786
taken[0] = "standard free entry"
787
nentries = nentries+1
788
entries.insert(0, "0000000000 65535 f")
789
idToNV = document.idToObjectNumberAndVersion
790
idToOffset = document.idToOffset
791
lastentrynumber = firstentrynumber+nentries-1
792
for id in idsequence:
793
(num, version) = idToNV[id]
794
if taken.has_key(num):
795
raise ValueError, "object number collision %s %s %s" % (num, repr(id), repr(taken[id]))
796
if num>lastentrynumber or num<firstentrynumber:
797
raise ValueError, "object number %s not in range %s..%s" % (num, firstentrynumber, lastentrynumber)
798
# compute position in list
799
rnum = num-firstentrynumber
801
offset = idToOffset[id]
802
entries[num] = XREFFMT % (offset, version)
803
# now add the initial line
804
firstline = "%s %s" % (firstentrynumber, nentries)
805
entries.insert(0, firstline)
806
# make sure it ends with a LINEEND
808
if LINEEND=="\n" or LINEEND=="\r":
809
reflineend = " "+LINEEND # as per spec
810
elif LINEEND=="\r\n":
813
raise ValueError, "bad end of line! %s" % repr(LINEEND)
814
return string.join(entries, LINEEND)
816
class PDFCrossReferenceTable:
820
def addsection(self, firstentry, ids):
821
section = PDFCrossReferenceSubsection(firstentry, ids)
822
self.sections.append(section)
823
def format(self, document):
824
sections = self.sections
826
raise ValueError, "no crossref sections"
828
for s in self.sections:
829
fs = format(s, document)
831
return string.join(L, "")
833
TRAILERFMT = ("trailer%(LINEEND)s"
834
"%(dict)s%(LINEEND)s"
835
"startxref%(LINEEND)s"
836
"%(startxref)s%(LINEEND)s"
837
"%(PERCENT)s%(PERCENT)sEOF%(LINEEND)s")
841
def __init__(self, startxref, Size=None, Prev=None, Root=None, Info=None, ID=None, Encrypt=None):
842
self.startxref = startxref
843
if Size is None or Root is None:
844
raise ValueError, "Size and Root keys required"
845
dict = self.dict = PDFDictionary()
846
for (n,v) in [("Size", Size), ("Prev", Prev), ("Root", Root),
847
("Info", Info), ("ID", ID), ("Encrypt", Encrypt)]:
850
def format(self, document):
851
fdict = format(self.dict, document)
852
D = LINEENDDICT.copy()
854
D["startxref"] = self.startxref
855
return TRAILERFMT % D
857
#### XXXX skipping incremental update,
860
#### chapter 6, doc structure
863
__Comment__ = "Document Root"
865
# to override, set as attributes
866
__Defaults__ = {"Type": PDFName("Catalog"),
867
"PageMode": PDFName("UseNone"),
869
__NoDefault__ = string.split("""
870
Dests Outlines Pages Threads AcroForm Names OpenActions PageMode URI
871
ViewerPreferences PageLabels PageLayout JavaScript StructTreeRoot SpiderInfo"""
873
__Refs__ = __NoDefault__ # make these all into references, if present
875
def format(self, document):
876
self.check_format(document)
877
defaults = self.__Defaults__
880
for k in defaults.keys():
881
default = defaults[k]
883
if hasattr(self, k) and getattr(self,k) is not None:
885
elif default is not None:
889
for k in self.__NoDefault__:
894
# force objects to be references where required
897
#print"k is", k, "value", D[k]
898
D[k] = document.Reference(D[k])
899
dict = PDFDictionary(D)
900
return format(dict, document)
902
def showOutline(self):
903
self.PageMode = PDFName("UseOutlines")
905
def showFullScreen(self):
906
self.PageMode = PDFName("FullScreen")
908
def check_format(self, document):
909
"""for use in subclasses"""
912
# not yet implementing
913
# ViewerPreferences, PageLabelDictionaries,
915
class PDFPages(PDFCatalog):
916
"""PAGES TREE WITH ONE INTERNAL NODE, FOR "BALANCING" CHANGE IMPLEMENTATION"""
917
__Comment__ = "page tree"
919
# note: could implement page attribute inheritance...
920
__Defaults__ = {"Type": PDFName("Pages"),
922
__NoDefault__ = string.split("Kids Count Parent")
923
__Refs__ = ["Parent"]
926
def __getitem__(self, item):
927
return self.pages[item]
928
def addPage(self, page):
929
self.pages.append(page)
930
def check_format(self, document):
931
# convert all pages to page references
933
kids = PDFArray(pages)
934
# make sure all pages are references
935
kids.References(document)
937
self.Count = len(pages)
939
class PDFPage(PDFCatalog):
940
__Comment__ = "Page dictionary"
941
# all PDF attributes can be set explicitly
942
# if this flag is set, the "usual" behavior will be suppressed
943
Override_default_compilation = 0
945
__Defaults__ = {"Type": PDFName("Page"),
946
# "Parent": PDFObjectReference(Pages), # no! use document.Pages
948
__NoDefault__ = string.split(""" Parent
949
MediaBox Resources Contents CropBox Rotate Thumb Annots B Dur Hid Trans AA
950
PieceInfo LastModified SeparationInfo ArtBox TrimBox BleedBox ID PZ
953
__Refs__ = string.split("""
967
# set all nodefaults to None
968
for name in self.__NoDefault__:
969
setattr(self, name, None)
970
def setCompression(self, onoff):
971
self.compression = onoff
972
def setStream(self, code):
973
if self.Override_default_compilation:
974
raise ValueError, "overridden! must set stream explicitly"
975
from types import ListType
976
if type(code) is ListType:
977
code = string.join(code, LINEEND)+LINEEND
980
def setPageTransition(self, tranDict):
981
self.Trans = PDFDictionary(tranDict)
983
def check_format(self, document):
984
# set up parameters unless usual behaviour is suppressed
985
if self.Override_default_compilation:
987
self.MediaBox = self.MediaBox or PDFArray([0, 0, self.pagewidth, self.pageheight])
992
#raise ValueError, "annotations not reimplemented yet"
993
if type(self.Annots) is not types.InstanceType:
994
self.Annots = PDFArray(self.Annots)
995
if not self.Contents:
998
self.Contents = teststream()
1001
if self.compression:
1002
S.filters = [PDFBase85Encode, PDFZCompress]
1004
S.__Comment__ = "page stream"
1006
if not self.Resources:
1007
resources = PDFResourceDictionary()
1009
resources.basicFonts()
1011
resources.allProcs()
1013
resources.basicProcs()
1015
#print "XObjects", self.XObjects.dict
1016
resources.XObject = self.XObjects
1017
self.Resources = resources
1019
pages = document.Pages
1020
self.Parent = document.Reference(pages)
1022
def testpage(document):
1024
P.Contents = teststream()
1025
pages = document.Pages
1026
P.Parent = document.Reference(pages)
1027
P.MediaBox = PDFArray([0, 0, 595, 841])
1028
resources = PDFResourceDictionary()
1029
resources.allProcs() # enable all procsets
1030
resources.basicFonts()
1031
P.Resources = resources
1034
#### DUMMY OUTLINES IMPLEMENTATION FOR testing
1045
__Comment__ = "TEST OUTLINE!"
1046
text = string.replace(DUMMYOUTLINE, "\n", LINEEND)
1048
def format(self, document):
1052
class OutlineEntryObject:
1053
"an entry in an outline"
1054
Title = Dest = Parent = Prev = Next = First = Last = Count = None
1055
def format(self, document):
1057
D["Title"] = PDFString(self.Title)
1058
D["Parent"] = self.Parent
1059
D["Dest"] = self.Dest
1060
for n in ("Prev", "Next", "First", "Last", "Count"):
1061
v = getattr(self, n)
1064
PD = PDFDictionary(D)
1065
return PD.format(document)
1069
"""takes a recursive list of outline destinations
1072
out.setNames(canvas, # requires canvas for name resolution
1075
["chapter2section1dest",
1076
"chapter2section2dest",
1077
"chapter2conclusiondest"]
1078
), # end of chapter2 description
1080
("chapter4dest", ["c4s1", "c4s2"])
1082
Higher layers may build this structure incrementally. KISS at base level.
1084
# first attempt, many possible features missing.
1086
mydestinations = ready = None
1088
currentlevel = -1 # ie, no levels yet
1091
self.destinationnamestotitles = {}
1092
self.destinationstotitles = {}
1093
self.levelstack = []
1095
self.closedict = {} # dictionary of "closed" destinations in the outline
1097
def addOutlineEntry(self, destinationname, level=0, title=None, closed=None):
1098
"""destinationname of None means "close the tree" """
1099
from types import IntType, TupleType
1100
if destinationname is None and level!=0:
1101
raise ValueError, "close tree must have level of 0"
1102
if type(level) is not IntType: raise ValueError, "level must be integer, got %s" % type(level)
1103
if level<0: raise ValueError, "negative levels not allowed"
1104
if title is None: title = destinationname
1105
currentlevel = self.currentlevel
1106
stack = self.levelstack
1107
tree = self.buildtree
1108
# adjust currentlevel and stack to match level
1109
if level>currentlevel:
1110
if level>currentlevel+1:
1111
raise ValueError, "can't jump from outline level %s to level %s, need intermediates" %(currentlevel, level)
1112
level = currentlevel = currentlevel+1
1114
while level<currentlevel:
1115
# pop off levels to match
1118
previous = stack[-1]
1119
lastinprevious = previous[-1]
1120
if type(lastinprevious) is TupleType:
1121
(name, sectionlist) = lastinprevious
1122
raise ValueError, "cannot reset existing sections: " + repr(lastinprevious)
1124
name = lastinprevious
1125
sectionlist = current
1126
previous[-1] = (name, sectionlist)
1127
#sectionlist.append(current)
1128
currentlevel = currentlevel-1
1129
if destinationname is None: return
1130
stack[-1].append(destinationname)
1131
self.destinationnamestotitles[destinationname] = title
1132
if closed: self.closedict[destinationname] = 1
1133
self.currentlevel = level
1135
def setDestinations(self, destinationtree):
1136
self.mydestinations = destinationtree
1138
def format(self, document):
1140
D["Type"] = PDFName("Outlines")
1144
D["First"] = self.first
1145
D["Last"] = self.last
1146
PD = PDFDictionary(D)
1147
return PD.format(document)
1149
def setNames(self, canvas, *nametree):
1150
desttree = self.translateNames(canvas, nametree)
1151
self.setDestinations(desttree)
1153
def setNameList(self, canvas, nametree):
1154
"Explicit list so I don't need to do apply(...) in the caller"
1155
desttree = self.translateNames(canvas, nametree)
1156
self.setDestinations(desttree)
1158
def translateNames(self, canvas, object):
1159
"recursively translate tree of names into tree of destinations"
1160
from types import StringType, ListType, TupleType
1162
destinationnamestotitles = self.destinationnamestotitles
1163
destinationstotitles = self.destinationstotitles
1164
closedict = self.closedict
1165
if Ot is StringType:
1166
destination = canvas._bookmarkReference(object)
1168
if destinationnamestotitles.has_key(object):
1169
title = destinationnamestotitles[object]
1171
destinationnamestotitles[title] = title
1172
destinationstotitles[destination] = title
1173
if closedict.has_key(object):
1174
closedict[destination] = 1 # mark destination closed
1175
return {object: canvas._bookmarkReference(object)} # name-->ref
1176
if Ot is ListType or Ot is TupleType:
1179
L.append(self.translateNames(canvas, o))
1183
raise "in outline, destination name must be string: got a %s" % Ot
1185
def prepare(self, document, canvas):
1186
"""prepare all data structures required for save operation (create related objects)"""
1187
if self.mydestinations is None:
1189
self.addOutlineEntry(None) # close the tree
1190
destnames = self.levelstack[0]
1191
#from pprint import pprint; pprint(destnames); stop
1192
self.mydestinations = self.translateNames(canvas, destnames)
1194
self.first = self.last = None
1198
#self.first = document.objectReference("Outline.First")
1199
#self.last = document.objectReference("Outline.Last")
1200
# XXXX this needs to be generalized for closed entries!
1201
self.count = count(self.mydestinations, self.closedict)
1202
(self.first, self.last) = self.maketree(document, self.mydestinations, toplevel=1)
1205
def maketree(self, document, destinationtree, Parent=None, toplevel=0):
1206
from types import ListType, TupleType, DictType
1207
tdestinationtree = type(destinationtree)
1209
levelname = "Outline"
1210
Parent = document.Reference(document.Outlines)
1212
self.count = self.count+1
1213
levelname = "Outline.%s" % self.count
1215
raise ValueError, "non-top level outline elt parent must be specified"
1216
if tdestinationtree is not ListType and tdestinationtree is not TupleType:
1217
raise ValueError, "destinationtree must be list or tuple, got %s"
1218
nelts = len(destinationtree)
1220
lastelt = firstref = lastref = None
1221
destinationnamestotitles = self.destinationnamestotitles
1222
closedict = self.closedict
1223
for index in range(nelts):
1224
eltobj = OutlineEntryObject()
1225
eltobj.Parent = Parent
1226
eltname = "%s.%s" % (levelname, index)
1227
eltref = document.Reference(eltobj, eltname)
1228
#document.add(eltname, eltobj)
1229
if lastelt is not None:
1230
lastelt.Next = eltref
1231
eltobj.Prev = lastref
1232
if firstref is None:
1235
lastelt = eltobj # advance eltobj
1237
elt = destinationtree[index]
1240
# simple leaf {name: dest}
1242
elif te is TupleType:
1243
# leaf with subsections: ({name: ref}, subsections) XXXX should clean up (see count(...))
1245
(leafdict, subsections) = elt
1247
raise ValueError, "destination tree elt tuple should have two elts, got %s" % len(elt)
1248
eltobj.Count = count(subsections, closedict)
1249
(eltobj.First, eltobj.Last) = self.maketree(document, subsections, eltref)
1251
raise ValueError, "destination tree elt should be dict or tuple, got %s" % te
1253
[(Title, Dest)] = leafdict.items()
1255
raise ValueError, "bad outline leaf dictionary, should have one entry "+str(elt)
1256
eltobj.Title = destinationnamestotitles[Title]
1258
if te is TupleType and closedict.has_key(Dest):
1259
# closed subsection, count should be negative
1260
eltobj.Count = -eltobj.Count
1261
return (firstref, lastref)
1263
def count(tree, closedict=None):
1264
"""utility for outline: recursively count leaves in a tuple/list tree"""
1265
from operator import add
1266
from types import TupleType, ListType
1269
# leaf with subsections XXXX should clean up this structural usage
1270
(leafdict, subsections) = tree
1271
[(Title, Dest)] = leafdict.items()
1272
if closedict and closedict.has_key(Dest):
1273
return 1 # closed tree element
1274
if tt is TupleType or tt is ListType:
1275
#return reduce(add, map(count, tree))
1278
counts.append(count(e, closedict))
1279
return reduce(add, counts)
1288
/CreationDate (D:20001012220652)
1289
/Producer (ReportLab http://www.reportlab.com)
1290
/Subject (this file generated by an alpha test module)
1294
__Comment__ = "TEST INFO STRUCTURE"
1295
text = string.replace(DUMMYINFO, "\n", LINEEND)
1297
def format(self, document):
1301
"""PDF documents can have basic information embedded, viewable from
1302
File | Document Info in Acrobat Reader. If this is wrong, you get
1303
Postscript errors while printing, even though it does not print."""
1305
self.invariant = rl_config.invariant
1306
self.title = "untitled"
1307
self.author = "anonymous"
1308
self.subject = "unspecified"
1310
def digest(self, md5object):
1311
# add self information to signature
1312
for x in (self.title, self.author, self.subject):
1313
md5object.update(str(x))
1315
def format(self, document):
1317
D["Title"] = PDFString(self.title)
1318
D["Author"] = PDFString(self.author)
1319
D["CreationDate"] = PDFDate(invariant=self.invariant)
1320
D["Producer"] = PDFString("ReportLab http://www.reportlab.com")
1321
D["Subject"] = PDFString(self.subject)
1322
PD = PDFDictionary(D)
1323
return PD.format(document)
1325
# skipping thumbnails, etc
1329
"""superclass for all annotations."""
1330
defaults = [("Type", PDFName("Annot"),)]
1331
required = ("Type", "Rect", "Contents", "Subtype")
1332
permitted = required+(
1333
"Border", "C", "T", "M", "F", "H", "BS", "AA", "AS", "Popup", "P")
1334
def cvtdict(self, d):
1335
"""transform dict args from python form to pdf string rep as needed"""
1337
if type(Rect) is not types.StringType:
1338
d["Rect"] = PDFArray(Rect)
1339
d["Contents"] = PDFString(d["Contents"])
1341
def AnnotationDict(self, **kw):
1343
for (name,val) in self.defaults:
1346
for name in self.required:
1347
if not d.has_key(name):
1348
raise ValueError, "keyword argument %s missing" % name
1350
permitted = self.permitted
1351
for name in d.keys():
1352
if name not in permitted:
1353
raise ValueError, "bad annotation dictionary name %s" % name
1354
return PDFDictionary(d)
1356
raise ValueError, "DictString undefined for virtual superclass Annotation, must overload"
1358
#return self.AnnotationDict(self, Rect=(a,b,c,d)) or whatever
1359
def format(self, document):
1361
return D.format(document)
1363
class TextAnnotation(Annotation):
1364
permitted = Annotation.permitted + (
1365
"Open", "Name", "AP")
1366
def __init__(self, Rect, Contents, **kw):
1368
self.Contents = Contents
1372
d.update(self.otherkw)
1373
d["Rect"] = self.Rect
1374
d["Contents"] = self.Contents
1375
d["Subtype"] = "/Text"
1376
return apply(self.AnnotationDict, (), d)
1378
class LinkAnnotation(Annotation):
1380
permitted = Annotation.permitted + (
1382
def __init__(self, Rect, Contents, Destination, Border="[0 0 1]", **kw):
1383
self.Border = Border
1385
self.Contents = Contents
1386
self.Destination = Destination
1389
def dummyDictString(self): # old, testing
1391
<< /Type /Annot /Subtype /Link /Rect [71 717 190 734] /Border [16 16 1]
1392
/Dest [23 0 R /Fit] >>
1397
d.update(self.otherkw)
1398
d["Border"] = self.Border
1399
d["Rect"] = self.Rect
1400
d["Contents"] = self.Contents
1401
d["Subtype"] = "/Link"
1402
d["Dest"] = self.Destination
1403
return apply(self.AnnotationDict, (), d)
1406
# skipping names tree
1410
# skipping names trees
1412
# skipping to chapter 7
1415
def __init__(self, llx, lly, urx, ury):
1416
self.llx, self.lly, self.ulx, self.ury = llx, lly, urx, ury
1417
def format(self, document):
1418
A = PDFArray([self.llx, self.lly, self.ulx, self.ury])
1419
return format(A, document)
1422
def _getTimeStamp():
1430
# gmt offset not yet suppported
1431
def __init__(self, yyyy=None, mm=None, dd=None, hh=None, m=None, s=None, invariant=rl_config.invariant):
1432
if None in (yyyy, mm, dd, hh, m, s):
1434
now = (2000,01,01,00,00,00,0)
1437
now = tuple(time.localtime(_getTimeStamp())[:6])
1438
if yyyy is None: yyyy=now[0]
1439
if mm is None: mm=now[1]
1440
if dd is None: dd=now[2]
1441
if hh is None: hh=now[3]
1442
if m is None: m=now[4]
1443
if s is None: s=now[5]
1444
self.yyyy=yyyy; self.mm=mm; self.dd=dd; self.hh=hh; self.m=m; self.s=s
1446
def format(self, doc):
1447
S = PDFString('%04d%02d%02d%02d%02d%02d' % (self.yyyy, self.mm, self.dd, self.hh, self.m, self.s))
1448
return format(S, doc)
1452
"""not a pdfobject! This is a placeholder that can delegates
1453
to a pdf object only after it has been defined by the methods
1454
below. EG a Destination can refer to Appendix A before it has been
1455
defined, but only if Appendix A is explicitly noted as a destination
1456
and resolved before the document is generated...
1457
For example the following sequence causes resolution before doc generation.
1459
d.fit() # or other format defining method call
1461
(at present setPageRef is called on generation of the page).
1463
representation = format = page = None
1464
def __init__(self,name):
1466
self.fmt = self.page = None
1467
def format(self, document):
1469
if f is None: raise ValueError, "format not resolved %s" % self.name
1471
if p is None: raise ValueError, "Page reference unbound %s" % self.name
1473
return f.format(document)
1474
def xyz(self, left, top, zoom): # see pdfspec mar 11 99 pp184+
1475
self.fmt = PDFDestinationXYZ(None, left, top, zoom)
1477
self.fmt = PDFDestinationFit(None)
1479
self.fmt = PDFDestinationFitB(None)
1480
def fith(self, top):
1481
self.fmt = PDFDestinationFitH(None,top)
1482
def fitv(self, left):
1483
self.fmt = PDFDestinationFitV(None, left)
1484
def fitbh(self, top):
1485
self.fmt = PDFDestinationFitBH(None, top)
1486
def fitbv(self, left):
1487
self.fmt = PDFDestinationFitBV(None, left)
1488
def fitr(self, left, bottom, right, top):
1489
self.fmt = PDFDestinationFitR(None, left, bottom, right, top)
1490
def setPage(self, page):
1492
#self.fmt.page = page # may not yet be defined!
1494
class PDFDestinationXYZ:
1496
def __init__(self, page, left, top, zoom):
1501
def format(self, document):
1502
pageref = document.Reference(self.page)
1503
A = PDFArray( [ pageref, PDFName(self.typename), self.left, self.top, self.zoom ] )
1504
return format(A, document)
1506
class PDFDestinationFit:
1508
def __init__(self, page):
1510
def format(self, document):
1511
pageref = document.Reference(self.page)
1512
A = PDFArray( [ pageref, PDFName(self.typename) ] )
1513
return format(A, document)
1515
class PDFDestinationFitB(PDFDestinationFit):
1518
class PDFDestinationFitH:
1520
def __init__(self, page, top):
1521
self.page = page; self.top=top
1522
def format(self, document):
1523
pageref = document.Reference(self.page)
1524
A = PDFArray( [ pageref, PDFName(self.typename), self.top ] )
1525
return format(A, document)
1527
class PDFDestinationFitBH(PDFDestinationFitH):
1530
class PDFDestinationFitV:
1532
def __init__(self, page, left):
1533
self.page = page; self.left=left
1534
def format(self, document):
1535
pageref = document.Reference(self.page)
1536
A = PDFArray( [ pageref, PDFName(self.typename), self.left ] )
1537
return format(A, document)
1539
class PDFDestinationBV(PDFDestinationFitV):
1542
class PDFDestinationFitR:
1544
def __init__(self, page, left, bottom, right, top):
1545
self.page = page; self.left=left; self.bottom=bottom; self.right=right; self.top=top
1546
def format(self, document):
1547
pageref = document.Reference(self.page)
1548
A = PDFArray( [ pageref, PDFName(self.typename), self.left, self.bottom, self.right, self.top] )
1549
return format(A, document)
1551
# named destinations need nothing
1553
# skipping filespecs
1555
class PDFResourceDictionary:
1556
"""each element *could* be reset to a reference if desired"""
1558
self.ColorSpace = {}
1564
self.Properties = {}
1566
# ?by default define the basicprocs
1568
stdprocs = map(PDFName, string.split("PDF Text ImageB ImageC ImageI"))
1569
dict_attributes = ("ColorSpace", "XObject", "ExtGState", "Font", "Pattern", "Properties", "Shading")
1571
# define all standard procsets
1572
self.ProcSet = self.stdprocs
1573
def basicProcs(self):
1574
self.ProcSet = self.stdprocs[:2] # just PDF and Text
1575
def basicFonts(self):
1576
self.Font = PDFObjectReference(BasicFonts)
1577
def format(self, document):
1579
from types import ListType, DictType
1580
for dname in self.dict_attributes:
1581
v = getattr(self, dname)
1582
if type(v) is DictType:
1584
dv = PDFDictionary(v)
1590
if type(v) is ListType:
1596
DD = PDFDictionary(D)
1597
return format(DD, document)
1599
##############################################################################
1601
# Font objects - the PDFDocument.addFont() method knows which of these
1602
# to construct when given a user-facing Font object
1604
##############################################################################
1608
"""no init: set attributes explicitly"""
1610
# note! /Name appears to be an undocumented attribute....
1611
name_attributes = string.split("Type Subtype BaseFont Name")
1614
# these attributes are assumed to already be of the right type
1615
local_attributes = string.split("FirstChar LastChar Widths Encoding ToUnicode FontDescriptor")
1616
def format(self, document):
1618
for name in self.name_attributes:
1619
if hasattr(self, name):
1620
value = getattr(self, name)
1621
D[name] = PDFName(value)
1622
for name in self.local_attributes:
1623
if hasattr(self, name):
1624
value = getattr(self, name)
1627
PD = PDFDictionary(D)
1628
return PD.format(document)
1630
## These attribute listings will be useful in future, even if we
1631
## put them elsewhere
1633
class PDFTrueTypeFont(PDFType1Font):
1634
Subtype = "TrueType"
1635
#local_attributes = string.split("FirstChar LastChar Widths Encoding ToUnicode FontDescriptor") #same
1637
##class PDFMMType1Font(PDFType1Font):
1638
## Subtype = "MMType1"
1640
##class PDFType3Font(PDFType1Font):
1641
## Subtype = "Type3"
1642
## local_attributes = string.split(
1643
## "FirstChar LastChar Widths CharProcs FontBBox FontMatrix Resources Encoding")
1645
##class PDFType0Font(PDFType1Font):
1646
## Subtype = "Type0"
1647
## local_attributes = string.split(
1648
## "DescendantFonts Encoding")
1650
##class PDFCIDFontType0(PDFType1Font):
1651
## Subtype = "CIDFontType0"
1652
## local_attributes = string.split(
1653
## "CIDSystemInfo FontDescriptor DW W DW2 W2 Registry Ordering Supplement")
1655
##class PDFCIDFontType0(PDFType1Font):
1656
## Subtype = "CIDFontType2"
1657
## local_attributes = string.split(
1658
## "BaseFont CIDToGIDMap CIDSystemInfo FontDescriptor DW W DW2 W2")
1660
##class PDFEncoding(PDFType1Font):
1661
## Type = "Encoding"
1662
## name_attributes = string.split("Type BaseEncoding")
1663
## # these attributes are assumed to already be of the right type
1664
## local_attributes = ["Differences"]
1667
# UGLY ALERT - this needs turning into something O-O, it was hacked
1668
# across from the pdfmetrics.Encoding class to avoid circularity
1672
class PDFFormXObject:
1673
# like page requires .info set by some higher level (doc)
1674
# XXXX any resource used in a form must be propagated up to the page that (recursively) uses
1675
# the form!! (not implemented yet).
1676
XObjects = Annots = BBox = Matrix = Contents = stream = Resources = None
1677
hasImages = 1 # probably should change
1679
def __init__(self, lowerx, lowery, upperx, uppery):
1681
self.lowerx = lowerx; self.lowery=lowery; self.upperx=upperx; self.uppery=uppery
1683
def setStreamList(self, data):
1684
if type(data) is types.ListType:
1685
data = string.join(data, LINEEND)
1689
"get the declared bounding box for the form as a list"
1691
return list(self.BBox.sequence)
1693
return [self.lowerx, self.lowery, self.upperx, self.uppery]
1695
def format(self, document):
1696
self.BBox = self.BBox or PDFArray([self.lowerx, self.lowery, self.upperx, self.uppery])
1697
self.Matrix = self.Matrix or PDFArray([1, 0, 0, 1, 0, 0])
1701
#these must be transferred to the page when the form is used
1702
raise ValueError, "annotations not reimplemented yet"
1703
if not self.Contents:
1704
stream = self.stream
1706
self.Contents = teststream()
1710
# need to add filter stuff (?)
1711
S.__Comment__ = "xobject form stream"
1713
if not self.Resources:
1714
resources = PDFResourceDictionary()
1716
resources.basicFonts()
1718
resources.allProcs()
1720
resources.basicProcs()
1722
#print "XObjects", self.XObjects.dict
1723
resources.XObject = self.XObjects
1724
if self.compression:
1725
self.Contents.filters = [PDFBase85Encode, PDFZCompress]
1726
sdict = self.Contents.dictionary
1727
sdict["Type"] = PDFName("XObject")
1728
sdict["Subtype"] = PDFName("Form")
1729
sdict["FormType"] = 1
1730
sdict["BBox"] = self.BBox
1731
sdict["Matrix"] = self.Matrix
1732
sdict["Resources"] = resources
1733
return self.Contents.format(document)
1735
class PDFPostScriptXObject:
1736
"For embedding PD (e.g. tray commands) in PDF"
1737
def __init__(self, content=None):
1738
self.content = content
1740
def format(self, document):
1742
S.content = self.content
1743
S.__Comment__ = "xobject postscript stream"
1744
sdict = S.dictionary
1745
sdict["Type"] = PDFName("XObject")
1746
sdict["Subtype"] = PDFName("PS")
1747
return S.format(document)
1749
class PDFImageXObject:
1750
# first attempts at a hard-coded one
1751
# in the file, Image XObjects are stream objects. We already
1752
# have a PDFStream object with 3 attributes: dictionary, content
1753
# and filters. So the job of this thing is to construct the
1754
# right PDFStream instance and ask it to format itself.
1755
def __init__(self, name, source=None, mask=None):
1759
self.bitsPerComponent = 1
1760
self.colorSpace = 'DeviceGray'
1761
self._filters = 'ASCII85Decode',
1762
self.streamContent = """
1763
003B00 002700 002480 0E4940 114920 14B220 3CB650
1764
75FE88 17FF8C 175F14 1C07E2 3803C4 703182 F8EDFC
1765
B2BBC2 BB6F84 31BFC2 18EA3C 0E3E00 07FC00 03F800
1771
pass # use the canned one.
1772
elif type(source) == type(''):
1775
ext = string.lower(os.path.splitext(source)[1])
1776
if ext in ('.jpg', '.jpeg'):
1777
self.loadImageFromJPEG(open_for_read(source))
1779
self.loadImageFromA85(source)
1780
else: # it is already a PIL Image
1781
self.loadImageFromSRC(source)
1783
def loadImageFromA85(self,source):
1785
imagedata = map(string.strip,pdfutils.cacheImageFile(source,returnInMemory=1,IMG=IMG))
1786
words = string.split(imagedata[1])
1787
self.width, self.height = map(string.atoi,(words[1],words[3]))
1788
self.colorSpace = 'DeviceRGB'
1789
self.bitsPerComponent = 8
1790
self._filters = 'ASCII85Decode','FlateDecode' #'A85','Fl'
1791
if IMG: self._checkTransparency(IMG[0])
1792
elif self.mask=='auto': self.mask = None
1793
self.streamContent = string.join(imagedata[3:-1],'')
1795
def loadImageFromJPEG(self,imageFile):
1796
info = pdfutils.readJPEGInfo(imageFile)
1797
self.width, self.height = info[0], info[1]
1798
self.bitsPerComponent = 8
1800
self.colorSpace = 'DeviceGray'
1802
self.colorSpace = 'DeviceRGB'
1803
else: #maybe should generate an error, is this right for CMYK?
1804
self.colorSpace = 'DeviceCMYK'
1805
imageFile.seek(0) #reset file pointer
1806
self.streamContent = pdfutils._AsciiBase85Encode(imageFile.read())
1807
self._filters = 'ASCII85Decode','DCTDecode' #'A85','DCT'
1810
def _checkTransparency(self,im):
1811
if self.mask=='auto':
1812
tc = im.getTransparent()
1814
self.mask = (tc[0], tc[0], tc[1], tc[1], tc[2], tc[2])
1817
elif hasattr(self.mask,'rgb'):
1819
self.mask = _[0],_[0],_[1],_[1],_[2],_[2]
1821
def loadImageFromSRC(self, im):
1822
"Extracts the stream, width and height"
1823
if im._image.format=='JPEG':
1826
self.loadImageFromJPEG(fp)
1828
zlib = import_zlib()
1830
self.width, self.height = im.getSize()
1831
raw = im.getRGBData()
1832
assert(len(raw) == self.width*self.height, "Wrong amount of data for image")
1833
self.streamContent = pdfutils._AsciiBase85Encode(zlib.compress(raw))
1834
self.colorSpace = 'DeviceRGB'
1835
self.bitsPerComponent = 8
1836
self._filters = 'ASCII85Decode','FlateDecode' #'A85','Fl'
1837
self._checkTransparency(im)
1839
def format(self, document):
1841
S.content = self.streamContent
1843
dict["Type"] = PDFName("XObject")
1844
dict["Subtype"] = PDFName("Image")
1845
dict["Width"] = self.width
1846
dict["Height"] = self.height
1847
dict["BitsPerComponent"] = self.bitsPerComponent
1848
dict["ColorSpace"] = PDFName(self.colorSpace)
1849
dict["Filter"] = PDFArray(map(PDFName,self._filters))
1850
dict["Length"] = len(self.streamContent)
1851
if self.mask: dict["Mask"] = PDFArray(self.mask)
1852
return S.format(document)
1854
if __name__=="__main__":
1855
print "There is no script interpretation for pdfdoc."