1
# vim: sw=4:expandtab:foldmethod=marker
3
# Copyright (c) 2006, Mathieu Fenniak
6
# Redistribution and use in source and binary forms, with or without
7
# modification, are permitted provided that the following conditions are
10
# * Redistributions of source code must retain the above copyright notice,
11
# this list of conditions and the following disclaimer.
12
# * Redistributions in binary form must reproduce the above copyright notice,
13
# this list of conditions and the following disclaimer in the documentation
14
# and/or other materials provided with the distribution.
15
# * The name of the author may not be used to endorse or promote products
16
# derived from this software without specific prior written permission.
18
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
22
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28
# POSSIBILITY OF SUCH DAMAGE.
32
Implementation of stream filters for PDF.
34
__author__ = "Mathieu Fenniak"
35
__author_email__ = "biziqe@mathieu.fenniak.net"
37
from utils import PdfReadError
39
from cStringIO import StringIO
41
from StringIO import StringIO
46
return zlib.decompress(data)
48
return zlib.compress(data)
50
# Unable to import zlib. Attempt to use the System.IO.Compression
51
# library from the .NET framework. (IronPython only)
53
from System import IO, Collections, Array
54
def _string_to_bytearr(buf):
55
retval = Array.CreateInstance(System.Byte, len(buf))
56
for i in range(len(buf)):
57
retval[i] = ord(buf[i])
59
def _bytearr_to_string(bytes):
61
for i in range(bytes.Length):
62
retval += chr(bytes[i])
64
def _read_bytes(stream):
65
ms = IO.MemoryStream()
66
buf = Array.CreateInstance(System.Byte, 2048)
68
bytes = stream.Read(buf, 0, buf.Length)
72
ms.Write(buf, 0, bytes)
77
bytes = _string_to_bytearr(data)
78
ms = IO.MemoryStream()
79
ms.Write(bytes, 0, bytes.Length)
80
ms.Position = 0 # fseek 0
81
gz = IO.Compression.DeflateStream(ms, IO.Compression.CompressionMode.Decompress)
82
bytes = _read_bytes(gz)
83
retval = _bytearr_to_string(bytes)
87
bytes = _string_to_bytearr(data)
88
ms = IO.MemoryStream()
89
gz = IO.Compression.DeflateStream(ms, IO.Compression.CompressionMode.Compress, True)
90
gz.Write(bytes, 0, bytes.Length)
92
ms.Position = 0 # fseek 0
94
retval = _bytearr_to_string(bytes)
99
class FlateDecode(object):
100
def decode(data, decodeParms):
101
data = decompress(data)
104
predictor = decodeParms.get("/Predictor", 1)
105
# predictor 1 == no predictor
107
columns = decodeParms["/Columns"]
109
if predictor >= 10 and predictor <= 15:
111
# PNG prediction can vary from row to row
112
rowlength = columns + 1
113
assert len(data) % rowlength == 0
114
prev_rowdata = (0,) * rowlength
115
for row in xrange(len(data) / rowlength):
116
rowdata = [ord(x) for x in data[(row*rowlength):((row+1)*rowlength)]]
117
filterByte = rowdata[0]
120
elif filterByte == 1:
121
for i in range(2, rowlength):
122
rowdata[i] = (rowdata[i] + rowdata[i-1]) % 256
123
elif filterByte == 2:
124
for i in range(1, rowlength):
125
rowdata[i] = (rowdata[i] + prev_rowdata[i]) % 256
127
# unsupported PNG filter
128
raise PdfReadError("Unsupported PNG filter %r" % filterByte)
129
prev_rowdata = rowdata
130
output.write(''.join([chr(x) for x in rowdata[1:]]))
131
data = output.getvalue()
133
# unsupported predictor
134
raise PdfReadError("Unsupported flatedecode predictor %r" % predictor)
136
decode = staticmethod(decode)
139
return compress(data)
140
encode = staticmethod(encode)
142
class ASCIIHexDecode(object):
143
def decode(data, decodeParms=None):
156
retval += chr(int(char, base=16))
161
decode = staticmethod(decode)
163
class ASCII85Decode(object):
164
def decode(data, decodeParms=None):
169
# remove all whitespace from data
170
data = [y for y in data if not (y in ' \n\r\t')]
173
if len(retval) == 0 and c == "<" and data[x+1] == "~":
180
assert len(group) == 0
181
retval += '\x00\x00\x00\x00'
183
elif c == "~" and data[x+1] == ">":
185
# cannot have a final group of just 1 char
186
assert len(group) > 1
188
group += [ 85, 85, 85 ]
194
assert c >= 0 and c < 85
197
b = group[0] * (85**4) + \
198
group[1] * (85**3) + \
199
group[2] * (85**2) + \
202
assert b < (2**32 - 1)
203
c4 = chr((b >> 0) % 256)
204
c3 = chr((b >> 8) % 256)
205
c2 = chr((b >> 16) % 256)
207
retval += (c1 + c2 + c3 + c4)
209
retval = retval[:-4+hitEod]
213
decode = staticmethod(decode)
215
def decodeStreamData(stream):
216
from generic import NameObject
217
filters = stream.get("/Filter", ())
218
if len(filters) and not isinstance(filters[0], NameObject):
219
# we have a single filter instance
222
for filterType in filters:
223
if filterType == "/FlateDecode":
224
data = FlateDecode.decode(data, stream.get("/DecodeParms"))
225
elif filterType == "/ASCIIHexDecode":
226
data = ASCIIHexDecode.decode(data)
227
elif filterType == "/ASCII85Decode":
228
data = ASCII85Decode.decode(data)
229
elif filterType == "/Crypt":
230
decodeParams = stream.get("/DecodeParams", {})
231
if "/Name" not in decodeParams and "/Type" not in decodeParams:
234
raise NotImplementedError("/Crypt filter with /Name or /Type not supported yet")
237
raise NotImplementedError("unsupported filter %s" % filterType)
240
if __name__ == "__main__":
241
assert "abc" == ASCIIHexDecode.decode('61\n626\n3>')
244
<~9jqo^BlbD-BleB1DJ+*+F(f,q/0JhKF<GL>Cj@.4Gp$d7F!,L7@<6@)/0JDEF<G%<+EV:2F!,
245
O<DJ+*.@<*K0@<6L(Df-\\0Ec5e;DffZ(EZee.Bl.9pF"AGXBPCsi+DGm>@3BB/F*&OCAfu2/AKY
246
i(DIb:@FD,*)+C]U=@3BN#EcYf8ATD3s@q?d$AftVqCh[NqF<G:8+EV:.+Cf>-FD5W8ARlolDIa
247
l(DId<j@<?3r@:F%a+D58'ATD4$Bl@l3De:,-DJs`8ARoFb/0JMK@qB4^F!,R<AKZ&-DfTqBG%G
248
>uD.RTpAKYo'+CT/5+Cei#DII?(E,9)oF*2M7/c~>
250
ascii85_originalText="Man is distinguished, not only by his reason, but by this singular passion from other animals, which is a lust of the mind, that by a perseverance of delight in the continued and indefatigable generation of knowledge, exceeds the short vehemence of any carnal pleasure."
251
assert ASCII85Decode.decode(ascii85Test) == ascii85_originalText