1
"""Text wrapping and filling.
4
# Copyright (C) 1999-2001 Gregory P. Ward.
5
# Copyright (C) 2002, 2003 Python Software Foundation.
6
# Written by Greg Ward <gward@python.net>
8
__revision__ = "$Id: textwrap.py,v 1.32.8.2 2004/05/13 01:48:15 gward Exp $"
18
# Do the right thing with boolean values for all known Python versions
19
# (so this module can be copied to projects that don't depend on Python
20
# 2.3, e.g. Optik and Docutils).
24
(True, False) = (1, 0)
26
__all__ = ['TextWrapper', 'wrap', 'fill']
28
# Hardcode the recognized whitespace characters to the US-ASCII
29
# whitespace characters. The main reason for doing this is that in
30
# ISO-8859-1, 0xa0 is non-breaking whitespace, so in certain locales
31
# that character winds up in string.whitespace. Respecting
32
# string.whitespace in those cases would 1) make textwrap treat 0xa0 the
33
# same as any other whitespace char, which is clearly wrong (it's a
34
# *non-breaking* space), 2) possibly cause problems with Unicode,
35
# since 0xa0 is not in range(128).
36
_whitespace = '\t\n\x0b\x0c\r '
40
Object for wrapping/filling text. The public interface consists of
41
the wrap() and fill() methods; the other methods are just there for
42
subclasses to override in order to tweak the default behaviour.
43
If you want to completely replace the main wrapping algorithm,
44
you'll probably have to override _wrap_chunks().
46
Several instance attributes control various aspects of wrapping:
48
the maximum width of wrapped lines (unless break_long_words
50
initial_indent (default: "")
51
string that will be prepended to the first line of wrapped
52
output. Counts towards the line's width.
53
subsequent_indent (default: "")
54
string that will be prepended to all lines save the first
55
of wrapped output; also counts towards each line's width.
56
expand_tabs (default: true)
57
Expand tabs in input text to spaces before further processing.
58
Each tab will become 1 .. 8 spaces, depending on its position in
59
its line. If false, each tab is treated as a single character.
60
replace_whitespace (default: true)
61
Replace all whitespace characters in the input text by spaces
62
after tab expansion. Note that if expand_tabs is false and
63
replace_whitespace is true, every tab will be converted to a
65
fix_sentence_endings (default: false)
66
Ensure that sentence-ending punctuation is always followed
67
by two spaces. Off by default because the algorithm is
68
(unavoidably) imperfect.
69
break_long_words (default: true)
70
Break words longer than 'width'. If false, those words will not
71
be broken, and some lines might be longer than 'width'.
74
whitespace_trans = string.maketrans(_whitespace, ' ' * len(_whitespace))
76
unicode_whitespace_trans = {}
78
uspace = eval("ord(u' ')")
80
# Python1.5 doesn't understand u'' syntax, in which case we
81
# won't actually use the unicode translation below, so it
82
# doesn't matter what value we put in the table.
84
for x in map(ord, _whitespace):
85
unicode_whitespace_trans[x] = uspace
87
# This funky little regex is just the trick for splitting
88
# text up into word-wrappable chunks. E.g.
89
# "Hello there -- you goof-ball, use the -b option!"
91
# Hello/ /there/ /--/ /you/ /goof-/ball,/ /use/ /the/ /-b/ /option!
92
# (after stripping out empty strings).
93
wordsep_re = re.compile(r'(\s+|' # any whitespace
94
r'-*\w{2,}-(?=\w{2,}))') # hyphenated words
95
# Earlier Python's don't have the (?<=
96
# negative look-behind assertion. It doesn't
97
# matter for the simple input SCons is going to
98
# give it, so just comment it out.
99
#r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w))') # em-dash
101
# XXX will there be a locale-or-charset-aware version of
102
# string.lowercase in 2.3?
103
sentence_end_re = re.compile(r'[%s]' # lowercase letter
104
r'[\.\!\?]' # sentence-ending punct.
105
r'[\"\']?' # optional end-of-quote
112
subsequent_indent="",
114
replace_whitespace=True,
115
fix_sentence_endings=False,
116
break_long_words=True):
118
self.initial_indent = initial_indent
119
self.subsequent_indent = subsequent_indent
120
self.expand_tabs = expand_tabs
121
self.replace_whitespace = replace_whitespace
122
self.fix_sentence_endings = fix_sentence_endings
123
self.break_long_words = break_long_words
126
# -- Private methods -----------------------------------------------
127
# (possibly useful for subclasses to override)
129
def _munge_whitespace(self, text):
130
"""_munge_whitespace(text : string) -> string
132
Munge whitespace in text: expand tabs and convert all other
133
whitespace characters to spaces. Eg. " foo\tbar\n\nbaz"
134
becomes " foo bar baz".
137
text = string.expandtabs(text)
138
if self.replace_whitespace:
139
if type(text) == type(''):
140
text = string.translate(text, self.whitespace_trans)
141
elif isinstance(text, unicode):
142
text = string.translate(text, self.unicode_whitespace_trans)
146
def _split(self, text):
147
"""_split(text : string) -> [string]
149
Split the text to wrap into indivisible chunks. Chunks are
150
not quite the same as words; see wrap_chunks() for full
151
details. As an example, the text
152
Look, goof-ball -- use the -b option!
153
breaks into the following chunks:
154
'Look,', ' ', 'goof-', 'ball', ' ', '--', ' ',
155
'use', ' ', 'the', ' ', '-b', ' ', 'option!'
157
chunks = self.wordsep_re.split(text)
158
chunks = filter(None, chunks)
161
def _fix_sentence_endings(self, chunks):
162
"""_fix_sentence_endings(chunks : [string])
164
Correct for sentence endings buried in 'chunks'. Eg. when the
165
original text contains "... foo.\nBar ...", munge_whitespace()
166
and split() will convert that to [..., "foo.", " ", "Bar", ...]
167
which has one too few spaces; this method simply changes the one
171
pat = self.sentence_end_re
172
while i < len(chunks)-1:
173
if chunks[i+1] == " " and pat.search(chunks[i]):
179
def _handle_long_word(self, chunks, cur_line, cur_len, width):
180
"""_handle_long_word(chunks : [string],
182
cur_len : int, width : int)
184
Handle a chunk of text (most likely a word, not whitespace) that
185
is too long to fit in any line.
187
space_left = max(width - cur_len, 1)
189
# If we're allowed to break long words, then do so: put as much
190
# of the next chunk onto the current line as will fit.
191
if self.break_long_words:
192
cur_line.append(chunks[0][0:space_left])
193
chunks[0] = chunks[0][space_left:]
195
# Otherwise, we have to preserve the long word intact. Only add
196
# it to the current line if there's nothing already there --
197
# that minimizes how much we violate the width constraint.
199
cur_line.append(chunks.pop(0))
201
# If we're not allowed to break long words, and there's already
202
# text on the current line, do nothing. Next time through the
203
# main loop of _wrap_chunks(), we'll wind up here again, but
204
# cur_len will be zero, so the next line will be entirely
205
# devoted to the long word that we can't handle right now.
207
def _wrap_chunks(self, chunks):
208
"""_wrap_chunks(chunks : [string]) -> [string]
210
Wrap a sequence of text chunks and return a list of lines of
211
length 'self.width' or less. (If 'break_long_words' is false,
212
some lines may be longer than this.) Chunks correspond roughly
213
to words and the whitespace between them: each chunk is
214
indivisible (modulo 'break_long_words'), but a line break can
215
come between any two chunks. Chunks should not have internal
216
whitespace; ie. a chunk is either all whitespace or a "word".
217
Whitespace chunks will be removed from the beginning and end of
218
lines, but apart from that whitespace is preserved.
222
raise ValueError("invalid width %r (must be > 0)" % self.width)
226
# Start the list of chunks that will make up the current line.
227
# cur_len is just the length of all the chunks in cur_line.
231
# Figure out which static string will prefix this line.
233
indent = self.subsequent_indent
235
indent = self.initial_indent
237
# Maximum width for this line.
238
width = self.width - len(indent)
240
# First chunk on line is whitespace -- drop it, unless this
241
# is the very beginning of the text (ie. no lines started yet).
242
if string.strip(chunks[0]) == '' and lines:
248
# Can at least squeeze this chunk onto the current line.
249
if cur_len + l <= width:
250
cur_line.append(chunks.pop(0))
251
cur_len = cur_len + l
253
# Nope, this line is full.
257
# The current line is full, and the next chunk is too big to
258
# fit on *any* line (not just this one).
259
if chunks and len(chunks[0]) > width:
260
self._handle_long_word(chunks, cur_line, cur_len, width)
262
# If the last chunk on this line is all whitespace, drop it.
263
if cur_line and string.strip(cur_line[-1]) == '':
266
# Convert current line back to a string and store it in list
267
# of all lines (return value).
269
lines.append(indent + string.join(cur_line, ''))
274
# -- Public interface ----------------------------------------------
276
def wrap(self, text):
277
"""wrap(text : string) -> [string]
279
Reformat the single paragraph in 'text' so it fits in lines of
280
no more than 'self.width' columns, and return a list of wrapped
281
lines. Tabs in 'text' are expanded with string.expandtabs(),
282
and all other whitespace characters (including newline) are
285
text = self._munge_whitespace(text)
286
indent = self.initial_indent
287
chunks = self._split(text)
288
if self.fix_sentence_endings:
289
self._fix_sentence_endings(chunks)
290
return self._wrap_chunks(chunks)
292
def fill(self, text):
293
"""fill(text : string) -> string
295
Reformat the single paragraph in 'text' to fit in lines of no
296
more than 'self.width' columns, and return a new string
297
containing the entire wrapped paragraph.
299
return string.join(self.wrap(text), "\n")
302
# -- Convenience interface ---------------------------------------------
304
def wrap(text, width=70, **kwargs):
305
"""Wrap a single paragraph of text, returning a list of wrapped lines.
307
Reformat the single paragraph in 'text' so it fits in lines of no
308
more than 'width' columns, and return a list of wrapped lines. By
309
default, tabs in 'text' are expanded with string.expandtabs(), and
310
all other whitespace characters (including newline) are converted to
311
space. See TextWrapper class for available keyword args to customize
316
w = apply(TextWrapper, (), kw)
319
def fill(text, width=70, **kwargs):
320
"""Fill a single paragraph of text, returning a new string.
322
Reformat the single paragraph in 'text' to fit in lines of no more
323
than 'width' columns, and return a new string containing the entire
324
wrapped paragraph. As with wrap(), tabs are expanded and other
325
whitespace characters converted to space. See TextWrapper class for
326
available keyword args to customize wrapping behaviour.
330
w = apply(TextWrapper, (), kw)
334
# -- Loosely related functionality -------------------------------------
337
"""dedent(text : string) -> string
339
Remove any whitespace than can be uniformly removed from the left
340
of every line in `text`.
342
This can be used e.g. to make triple-quoted strings line up with
343
the left edge of screen/whatever, while still presenting it in the
344
source code in indented form.
349
# end first line with \ to avoid the empty line!
354
print repr(s) # prints ' hello\n world\n '
355
print repr(dedent(s)) # prints 'hello\n world\n'
357
lines = text.expandtabs().split('\n')
360
content = line.lstrip()
363
indent = len(line) - len(content)
367
margin = min(margin, indent)
369
if margin is not None and margin > 0:
370
for i in range(len(lines)):
371
lines[i] = lines[i][margin:]
373
return string.join(lines, '\n')