1
1
#Copyright ReportLab Europe Ltd. 2000-2012
2
2
#see license.txt for license details
3
3
#history http://www.reportlab.co.uk/cgi-bin/viewcvs.cgi/public/reportlab/trunk/reportlab/platypus/paragraph.py
4
__version__=''' $Id: paragraph.py 3959 2012-09-27 14:39:39Z robin $ '''
4
__version__=''' $Id$ '''
5
5
__doc__='''The standard paragraph implementation'''
6
from string import join, whitespace
6
from string import whitespace
7
7
from operator import truth
8
from types import StringType, ListType
9
8
from unicodedata import category
10
9
from reportlab.pdfbase.pdfmetrics import stringWidth, getFont, getAscentDescent
11
10
from reportlab.platypus.paraparser import ParaParser
19
18
from reportlab.lib.abag import ABag
20
19
from reportlab.rl_config import platypus_link_underline
21
20
from reportlab import rl_config
21
from reportlab.lib.utils import isBytes, unicodeT, bytesT, strTypes
22
from reportlab.lib.rl_accel import sameFrag
24
#on UTF8 branch, split and strip must be unicode-safe!
25
#on UTF8/py33 branch, split and strip must be unicode-safe!
25
26
#thanks to Dirk Holtwick for helpful discussions/insight
59
60
_wsc_re_split=re.compile('[%s]+'% re.escape(_wsc)).split
61
62
def split(text, delim=None):
62
if type(text) is str: text = text.decode('utf8')
63
if type(delim) is str: delim = delim.decode('utf8')
64
if delim is None and u'\xa0' in text:
65
return [uword.encode('utf8') for uword in _wsc_re_split(text)]
66
return [uword.encode('utf8') for uword in text.split(delim)]
63
if isBytes(text): text = text.decode('utf8')
64
if delim is not None and isBytes(delim): delim = delim.decode('utf8')
65
return [uword for uword in (_wsc_re_split(text) if delim is None and u'\xa0' in text else text.split(delim))]
69
if type(text) is str: text = text.decode('utf8')
70
return text.strip(_wsc).encode('utf8')
68
if isBytes(text): text = text.decode('utf8')
69
return text.strip(_wsc)
72
71
class ParaLines(ABag):
91
90
but could be used for line spacing.
94
#our one and only parser
95
# XXXXX if the parser has any internal state using only one is probably a BAD idea!
99
return join(filter(truth,split(strip(L))))
94
return ' '.join(list(filter(truth,split(strip(L)))))
101
96
def cleanBlockQuotedText(text,joiner=' '):
102
97
"""This is an internal utility which takes triple-
103
98
quoted text form within the document and returns
104
99
(hopefully) the paragraph the user intended originally."""
105
L=filter(truth,map(_lineClean, split(text, '\n')))
106
return join(L, joiner)
100
L=list(filter(truth,list(map(_lineClean, split(text, '\n')))))
101
return joiner.join(L)
108
103
def setXPos(tx,dx):
109
104
if dx>1e-6 or dx<-1e-6:
112
107
def _leftDrawParaLine( tx, offset, extraspace, words, last=0):
113
108
setXPos(tx,offset)
114
tx._textOut(join(words),1)
109
tx._textOut(' '.join(words),1)
115
110
setXPos(tx,-offset)
118
113
def _centerDrawParaLine( tx, offset, extraspace, words, last=0):
119
114
m = offset + 0.5 * extraspace
121
tx._textOut(join(words),1)
116
tx._textOut(' '.join(words),1)
125
120
def _rightDrawParaLine( tx, offset, extraspace, words, last=0):
126
121
m = offset + extraspace
128
tx._textOut(join(words),1)
123
tx._textOut(' '.join(words),1)
132
127
def _nbspCount(w):
133
if isinstance(w,str):
134
return w.count('\xc2\xa0')
129
return w.count(b'\xc2\xa0')
136
131
return w.count(u'\xa0')
138
133
def _justifyDrawParaLine( tx, offset, extraspace, words, last=0):
139
134
setXPos(tx,offset)
135
text = ' '.join(words)
141
136
if last or extraspace<=1e-8:
142
137
#last one, left align
143
138
tx._textOut(text,1)
400
395
_putFragLine(offset, tx, line, last, 'justify') #no space modification
401
396
setXPos(tx,-offset)
404
from _rl_accel import _sameFrag
407
from reportlab.lib._rl_accel import _sameFrag
409
#if you modify this you need to modify _rl_accel RGB
411
'returns 1 if two ParaFrags map out the same'
412
if (hasattr(f,'cbDefn') or hasattr(g,'cbDefn')
413
or hasattr(f,'lineBreak') or hasattr(g,'lineBreak')): return 0
414
for a in ('fontName', 'fontSize', 'textColor', 'rise', 'underline', 'strike', 'link', "backColor"):
415
if getattr(f,a,None)!=getattr(g,a,None): return 0
418
398
def _getFragWords(frags,maxWidth=None):
419
399
''' given a Parafrag list return a list of fragwords
420
400
[[size, (f00,w00), ..., (f0n,w0n)],....,[size, (fm0,wm0), ..., (f0n,wmn)]]
471
def _fragWordIter(w):
473
if hasattr(f,'cbDefn'):
474
yield f, getattr(f,'width'), s
477
s = s.decode('utf8') #only encoding allowed
479
yield f, stringWidth(c,f.fontName, f.fontSize), c
483
class _SplitList(list):
486
def _splitFragWord(w,maxWidth,maxWidths,lineno):
487
'''given a frag word, w, as returned by getFragWords
488
split it into frag words that fit in lines of length
494
return the new word list
497
maxlineno = len(maxWidths)-1
503
for g,cw,c in _fragWordIter(w):
504
newLineWidth = lineWidth+cw
505
tooLong = newLineWidth>maxWidth
506
if g is not f or tooLong:
508
if hasattr(f,'text'):
510
W.append((f,fragText))
512
W = _SplitList([wordWidth]+W)
515
maxWidth = maxWidths[min(maxlineno,lineno)]
517
newLineWidth = wordWidth = cw
523
lineWidth = newLineWidth
524
W.append((f,fragText))
525
W = _SplitList([wordWidth]+W)
529
class _SplitText(str):
532
def _splitWord(w,maxWidth,maxWidths,lineno,fontName,fontSize,encoding):
534
split w into words that fit in lines of length
540
then push those new words onto words
542
#TODO fix this to use binary search for the split points
544
maxlineno = len(maxWidths)-1
550
cw = stringWidth(c,fontName,fontSize,encoding)
551
newLineWidth = lineWidth+cw
552
if newLineWidth>maxWidth:
553
R.append(_SplitText(wordText))
555
maxWidth = maxWidths[min(maxlineno,lineno)]
559
lineWidth = newLineWidth
560
R.append(_SplitText(wordText))
491
563
def _split_blParaSimple(blPara,start,stop):
492
564
f = blPara.clone()
493
565
for a in ('lines', 'kind', 'text'):
519
591
tx2 = canvas.beginText(style.bulletIndent, cur_y+getattr(style,"bulletOffsetY",0))
520
592
tx2.setFont(style.bulletFontName, style.bulletFontSize)
521
593
tx2.setFillColor(hasattr(style,'bulletColor') and style.bulletColor or style.textColor)
522
if isinstance(bulletText,basestring):
594
if isinstance(bulletText,strTypes):
523
595
tx2.textOut(bulletText)
525
597
for f in bulletText:
538
610
'''work out bullet width and adjust maxWidths[0] if neccessary
541
if isinstance(bulletText,basestring):
613
if isinstance(bulletText,strTypes):
542
614
bulletWidth = stringWidth( bulletText, style.bulletFontName, style.bulletFontSize)
544
616
#it's a list of fragments
612
684
def _do_under_line(i, t_off, ws, tx, lm=-0.125):
613
685
y = tx.XtraState.cur_y - i*tx.XtraState.style.leading + lm*tx.XtraState.f.fontSize
614
textlen = tx._canvas.stringWidth(join(tx.XtraState.lines[i][1]), tx._fontname, tx._fontsize)
686
textlen = tx._canvas.stringWidth(' '.join(tx.XtraState.lines[i][1]), tx._fontname, tx._fontsize)
615
687
tx._canvas.line(t_off, y, t_off+textlen+ws, y)
617
689
_scheme_re = re.compile('^[a-zA-Z][-+a-zA-Z0-9]+$')
618
690
def _doLink(tx,link,rect):
619
if isinstance(link,unicode):
620
link = link.encode('utf8')
621
691
parts = link.split(':',1)
622
692
scheme = len(parts)==2 and parts[0].lower() or ''
623
693
if _scheme_re.match(scheme) and scheme!='document':
634
704
xs = tx.XtraState
635
705
leading = xs.style.leading
636
706
y = xs.cur_y - i*leading - xs.f.fontSize/8.0 # 8.0 factor copied from para.py
637
text = join(xs.lines[i][1])
707
text = ' '.join(xs.lines[i][1])
638
708
textlen = tx._canvas.stringWidth(text, tx._fontname, tx._fontsize)
639
709
_doLink(tx, xs.link, (t_off, y, t_off+textlen+ws, y+leading))
709
779
#single fragment the easy case
710
frags[0].text = tt(frags[0].text.decode('utf8')).encode('utf8')
711
elif tt is unicode.title:
780
frags[0].text = tt(frags[0].text)
781
elif tt is unicodeT.title:
717
786
if u.startswith(u' ') or pb:
722
791
u = u[:i]+tt(u[i:])
723
792
pb = u.endswith(u' ')
724
f.text = u.encode('utf8')
729
f.text = tt(t.decode('utf8')).encode('utf8')
800
class cjkU(unicodeT):
732
801
'''simple class to hold the frag corresponding to a str'''
733
802
def __new__(cls,value,frag,encoding):
734
self = unicode.__new__(cls,value)
803
self = unicodeT.__new__(cls,value)
735
804
self._frag = frag
736
805
if hasattr(frag,'cbDefn'):
737
806
w = getattr(frag.cbDefn,'width',0)
742
811
frag = property(lambda self: self._frag)
743
812
width = property(lambda self: self._width)
745
def makeCJKParaLine(U,extraSpace,calcBounds):
814
def makeCJKParaLine(U,maxWidth,widthUsed,extraSpace,lineBreak,calcBounds):
773
842
f0.text = u''.join(CW)
775
return FragLine(kind=1,extraSpace=extraSpace,wordCount=1,words=words[1:],fontSize=maxSize,ascent=maxAscent,descent=minDescent)
844
return FragLine(kind=1,extraSpace=extraSpace,wordCount=1,words=words[1:],fontSize=maxSize,ascent=maxAscent,descent=minDescent,maxWidth=maxWidth,currentWidth=widthUsed,lineBreak=lineBreak)
777
846
def cjkFragSplit(frags, maxWidths, calcBounds, encoding='utf8'):
778
847
'''This attempts to be wordSplit for frags using the dumb algorithm'''
780
849
U = [] #get a list of single glyphs with their widths etc etc
783
if not isinstance(text,unicode):
784
853
text = text.decode(encoding)
786
855
U.extend([cjkU(t,f,encoding) for t in text])
817
886
# - reversion to Kanji (which would be a good split point)
818
887
# - in the worst case, roughly half way back along the line
819
888
limitCheck = (lineStartPos+i)>>1 #(arbitrary taste issue)
820
for j in xrange(i-1,limitCheck,-1):
889
for j in range(i-1,limitCheck,-1):
822
891
if uj and category(uj)=='Zs' or ord(uj)>=0x3000:
826
extraSpace += sum(U[ii].width for ii in xrange(j,i))
895
extraSpace += sum(U[ii].width for ii in range(j,i))
841
910
#the i>lineStart+1 condition ensures progress
844
lines.append(makeCJKParaLine(U[lineStartPos:i],extraSpace,calcBounds))
913
lines.append(makeCJKParaLine(U[lineStartPos:i],maxWidth,widthUsed,extraSpace,lineBreak,calcBounds))
846
915
maxWidth = maxWidths[len(lines)]
847
916
except IndexError:
853
922
#any characters left?
854
923
if widthUsed > 0:
855
lines.append(makeCJKParaLine(U[lineStartPos:],maxWidth-widthUsed,calcBounds))
924
lines.append(makeCJKParaLine(U[lineStartPos:],maxWidth,widthUsed,maxWidth-widthUsed,False,calcBounds))
857
926
return ParaLines(kind=1,lines=lines)
918
987
self.encoding = encoding
919
988
self._setup(text, style, bulletText or getattr(style,'bulletText',None), frags, cleanBlockQuotedText)
921
991
def __repr__(self):
922
992
n = self.__class__.__name__
924
keys = self.__dict__.keys()
994
keys = list(self.__dict__.keys())
926
996
L.append('%s: %s' % (repr(k).replace("\n", " ").replace(" "," "),repr(getattr(self, k)).replace("\n", " ").replace(" "," ")))
927
997
L.append(") #"+n)
928
998
return '\n'.join(L)
930
1000
def _setup(self, text, style, bulletText, frags, cleaner):
1002
#This used to be a global parser to save overhead.
1003
#In the interests of thread safety it is being instantiated per paragraph.
1004
#On the next release, we'll replace with a cElementTree parser
1005
_parser = ParaParser()
931
1007
if frags is None:
932
1008
text = cleaner(text)
933
1009
_parser.caseSensitive = self.caseSensitive
998
1074
words = _getFragWords(frags)
999
1075
func = lambda x: x[0]
1000
return max(map(func,words))
1076
return max(list(map(func,words)))
1002
1078
def _get_split_blParaFunc(self):
1003
1079
return self.blPara.kind==0 and _split_blParaSimple or _split_blParaHard
1123
1199
else: maxWidths = width
1125
1201
self.height = lineno = 0
1202
maxlineno = len(maxWidths)-1
1126
1203
style = self.style
1204
splitLongWords = style.splitLongWords
1128
1206
#for bullets, work out width and ensure we wrap the right amount onto line one
1129
1207
_handleBulletWidth(self.bulletText,style,maxWidths)
1154
1232
spaceWidth = stringWidth(' ', fontName, fontSize, self.encoding)
1156
1234
currentWidth = -spaceWidth # hack to get around extra space for word 1
1158
1237
#this underscores my feeling that Unicode throughout would be easier!
1159
1238
wordWidth = stringWidth(word, fontName, fontSize, self.encoding)
1160
1239
newWidth = currentWidth + spaceWidth + wordWidth
1240
if newWidth>maxWidth:
1241
nmw = min(lineno,maxlineno)
1242
if wordWidth>max(maxWidths[nmw:nmw+1]) and not isinstance(word,_SplitText) and splitLongWords:
1244
words[0:0] = _splitWord(word,maxWidth-spaceWidth-currentWidth,maxWidths,lineno,fontName,fontSize,self.encoding)
1161
1246
if newWidth <= maxWidth or not len(cLine):
1162
1247
# fit one more on this line
1163
1248
cLine.append(word)
1212
1296
#test to see if this frag is a line break. If it is we will only act on it
1213
1297
#if the current width is non-negative or the previous thing was a deliberate lineBreak
1214
1298
lineBreak = hasattr(f,'lineBreak')
1299
if not lineBreak and newWidth>maxWidth and not isinstance(w,_SplitList) and splitLongWords:
1300
nmw = min(lineno,maxlineno)
1301
if wordWidth>max(maxWidths[nmw:nmw+1]):
1303
_words[0:0] = _splitFragWord(w,maxWidth-spaceWidth-currentWidth,maxWidths,lineno)
1215
1305
endLine = (newWidth>maxWidth and n>0) or lineBreak
1216
1306
if not endLine:
1217
1307
if lineBreak: continue #throw it away
1356
1443
_handleBulletWidth(self.bulletText, style, maxWidths)
1357
1444
frags = self.frags
1358
1445
nFrags = len(frags)
1359
if nFrags==1 and not hasattr(frags[0],'cbDefn'):
1446
if nFrags==1 and not hasattr(frags[0],'cbDefn') and not style.endDots:
1361
1448
if hasattr(self,'blPara') and getattr(self,'_splitpara',0):
1362
1449
return f.clone(kind=0, lines=self.blPara.lines)
1510
1597
if noJustifyLast and nLines==1 and style.endDots and dpl!=_rightDrawParaLine: _do_dots(0, dx, ws, xs, tx, dpl)
1512
1599
#now the middle of the paragraph, aligned with the left margin which is our origin.
1513
for i in xrange(1, nLines):
1600
for i in range(1, nLines):
1514
1601
ws = lines[i][0]
1515
1602
t_off = dpl( tx, _offsets[i], ws, lines[i][1], noJustifyLast and i==lim)
1516
1603
dx = t_off+leftIndent
1520
1607
if link: _do_link_line(i, dx, ws, tx)
1521
1608
if noJustifyLast and i==lim and style.endDots and dpl!=_rightDrawParaLine: _do_dots(i, dx, ws, xs, tx, dpl)
1523
for i in xrange(1, nLines):
1610
for i in range(1, nLines):
1524
1611
dpl( tx, _offsets[i], lines[i][0], lines[i][1], noJustifyLast and i==lim)
1573
1660
_do_post_text(tx)
1575
1662
#now the middle of the paragraph, aligned with the left margin which is our origin.
1576
for i in xrange(1, nLines):
1663
for i in range(1, nLines):
1578
1665
dpl( tx, _offsets[i], f, noJustifyLast and i==lim)
1579
1666
_do_post_text(tx)
1608
1695
func = lambda frag, w=self.width: w - frag.extraSpace
1610
1697
func = lambda frag, w=self.width: w - frag[0]
1611
return map(func,self.blPara.lines)
1698
return list(map(func,self.blPara.lines))
1613
1700
if __name__=='__main__': #NORUNTESTS
1614
1701
def dumpParagraphLines(P):
1615
print 'dumpParagraphLines(<Paragraph @ %d>)' % id(P)
1702
print('dumpParagraphLines(<Paragraph @ %d>)' % id(P))
1616
1703
lines = P.blPara.lines
1704
outw = sys.stdout.write
1617
1705
for l,line in enumerate(lines):
1618
1706
line = lines[l]
1619
1707
if hasattr(line,'words'):
1622
1710
words = line[1]
1623
1711
nwords = len(words)
1624
print 'line%d: %d(%s)\n ' % (l,nwords,str(getattr(line,'wordCount','Unknown'))),
1625
for w in xrange(nwords):
1626
print "%d:'%s'"%(w,getattr(words[w],'text',words[w])),
1712
outw('line%d: %d(%s)\n ' % (l,nwords,str(getattr(line,'wordCount','Unknown'))))
1713
for w in range(nwords):
1714
outw(" %d:'%s'"%(w,getattr(words[w],'text',words[w])))
1629
1717
def fragDump(w):
1630
1718
R= ["'%s'" % w[1]]
1634
1722
return ', '.join(R)
1636
1724
def dumpParagraphFrags(P):
1637
print 'dumpParagraphFrags(<Paragraph @ %d>) minWidth() = %.2f' % (id(P), P.minWidth())
1725
print('dumpParagraphFrags(<Paragraph @ %d>) minWidth() = %.2f' % (id(P), P.minWidth()))
1638
1726
frags = P.frags
1641
print "frag%d: '%s' %s" % (l, frags[l].text,' '.join(['%s=%s' % (k,getattr(frags[l],k)) for k in frags[l].__dict__ if k!=text]))
1729
print("frag%d: '%s' %s" % (l, frags[l].text,' '.join(['%s=%s' % (k,getattr(frags[l],k)) for k in frags[l].__dict__ if k!=text])))
1731
outw = sys.stdout.write
1645
1734
for W in _getFragWords(frags,360):
1647
print "fragword%d: cum=%3d size=%d" % (l, cum, W[0]),
1736
outw("fragword%d: cum=%3d size=%d" % (l, cum, W[0]))
1648
1737
for w in W[1:]:
1649
print '(%s)' % fragDump(w),
1738
outw(' (%s)' % fragDump(w))
1718
1807
P=Paragraph(text, B)
1719
1808
dumpParagraphFrags(P)
1720
1809
w,h = P.wrap(aW,aH)
1721
print 'After initial wrap',w,h
1810
print('After initial wrap',w,h)
1722
1811
dumpParagraphLines(P)
1723
1812
S = P.split(aW,aH)
1724
1813
dumpParagraphFrags(S[0])
1725
1814
w0,h0 = S[0].wrap(aW,aH)
1726
print 'After split wrap',w0,h0
1815
print('After split wrap',w0,h0)
1727
1816
dumpParagraphLines(S[0])