1
# -*- coding: utf-8 -*-
6
a port of the ``serialize`` and ``unserialize`` functions of
7
php to python. This module implements the python serialization
8
interface (eg: provides `dumps`, `loads` and similar functions).
13
>>> from phpserialize import *
14
>>> obj = dumps("Hello World")
18
Due to the fact that PHP doesn't know the concept of lists, lists
19
are serialized like hash-maps in PHP. As a matter of fact the
20
reverse value of a serialized list is a dict:
22
>>> loads(dumps(range(2)))
25
If you want to have a list again, you can use the `dict_to_list`
28
>>> dict_to_list(loads(dumps(range(2))))
31
It's also possible to convert into a tuple by using the `dict_to_tuple`
34
>>> dict_to_tuple(loads(dumps((1, 2, 3))))
37
Another problem are unicode strings. By default unicode strings are
38
encoded to 'utf-8' but not decoded on `unserialize`. The reason for
39
this is that phpserialize can't guess if you have binary or text data
42
>>> loads(dumps(u'Hello W\xf6rld'))
45
If you know that you have only text data of a known charset in the result
46
you can decode strings by setting `decode_strings` to True when calling
49
>>> loads(dumps(u'Hello W\xf6rld'), decode_strings=True)
52
Dictionary keys are limited to strings and integers. `None` is converted
53
into an empty string and floats and booleans into integers for PHP
56
>>> loads(dumps({None: 14, 42.23: 'foo', True: [1, 2, 3]}))
57
{'': 14, 1: {0: 1, 1: 2, 2: 3}, 42: 'foo'}
59
It also provides functions to read from file-like objects:
61
>>> from StringIO import StringIO
62
>>> stream = StringIO('a:2:{i:0;i:1;i:1;i:2;}')
63
>>> dict_to_list(load(stream))
66
And to write to those:
68
>>> stream = StringIO()
69
>>> dump([1, 2], stream)
71
'a:2:{i:0;i:1;i:1;i:2;}'
73
Like `pickle` chaining of objects is supported:
75
>>> stream = StringIO()
76
>>> dump([1, 2], stream)
77
>>> dump("foo", stream)
84
This feature however is not supported in PHP. PHP will only unserialize
91
- added `dict_to_list` and `dict_to_tuple`
92
- added support for unicode
93
- allowed chaining of objects like pickle does.
96
:copyright: 2007-2008 by Armin Ronacher.
99
from StringIO import StringIO
101
__author__ = 'Armin Ronacher <armin.ronacher@active-4.com>'
105
def dumps(data, charset='utf-8', errors='strict'):
106
"""Return the PHP-serialized representation of the object as a string,
107
instead of writing it to a file like `dump` does.
109
def _serialize(obj, keypos):
111
if isinstance(obj, (int, long, float, bool)):
113
if isinstance(obj, basestring):
114
if isinstance(obj, unicode):
115
obj = obj.encode(charset, errors)
116
return 's:%i:"%s";' % (len(obj), obj)
119
raise TypeError('can\'t serialize %r as key' % type(obj))
123
if isinstance(obj, bool):
125
if isinstance(obj, (int, long)):
127
if isinstance(obj, float):
129
if isinstance(obj, basestring):
130
if isinstance(obj, unicode):
131
obj = obj.encode(charset, errors)
132
return 's:%i:"%s";' % (len(obj), obj)
133
if isinstance(obj, (list, tuple, dict)):
135
if isinstance(obj, dict):
136
iterable = obj.iteritems()
138
iterable = enumerate(obj)
139
for key, value in iterable:
140
out.append(_serialize(key, True))
141
out.append(_serialize(value, False))
142
return 'a:%i:{%s}' % (len(obj), ''.join(out))
143
raise TypeError('can\'t serialize %r' % type(obj))
144
return _serialize(data, False)
147
def load(fp, charset='utf-8', errors='strict', decode_strings=False):
148
"""Read a string from the open file object `fp` and interpret it as a
149
data stream of PHP-serialized objects, reconstructing and returning
150
the original object hierarchy.
152
`fp` must provide a `read()` method that takes an integer argument. Both
153
method should return strings. Thus `fp` can be a file object opened for
154
reading, a `StringIO` object, or any other custom object that meets this
157
`load` will read exactly one object from the stream. See the docstring of
158
the module for this chained behavior.
163
raise ValueError('failed expectation, expected %r got %r' % (e, v))
165
def _read_until(delim):
172
raise ValueError('unexpected end of stream')
177
type_ = fp.read(1).lower()
183
data = _read_until(';')
188
return int(data) != 0
191
length = int(_read_until(':'))
193
data = fp.read(length)
196
data = data.decode(charset, errors)
201
items = int(_read_until(':')) * 2
205
for idx in xrange(items):
206
item = _unserialize()
207
if last_item is Ellipsis:
210
result[last_item] = item
214
raise ValueError('unexpected opcode')
216
return _unserialize()
219
def loads(data, charset='utf-8', errors='strict', decode_strings=False):
220
"""Read a PHP-serialized object hierarchy from a string. Characters in the
221
string past the object's representation are ignored.
223
return load(StringIO(data), charset, errors, decode_strings)
226
def dump(data, fp, charset='utf-8', errors='strict'):
227
"""Write a PHP-serialized representation of obj to the open file object
228
`fp`. Unicode strings are encoded to `charset` with the error handling
231
`fp` must have a `write()` method that accepts a single string argument.
232
It can thus be a file object opened for writing, a `StringIO` object, or
233
any other custom object that meets this interface.
235
fp.write(dumps(data, charset, errors))
239
"""Converts an ordered dict into a list."""
241
return [d[x] for x in xrange(len(d))]
243
raise ValueError('dict is not a sequence')
246
def dict_to_tuple(d):
247
"""Converts an ordered dict into a tuple."""
248
return tuple(dict_to_list(d))
255
if __name__ == '__main__':