1
"""Create portable serialized representations of Python objects.
3
See module cPickle for a (much) faster implementation.
4
See module copy_reg for a mechanism for registering custom picklers.
5
See module pickletools source for extensive comments.
15
dumps(object) -> string
17
loads(string) -> object
27
__version__ = "$Revision: 1.158 $" # Code version
30
from copy_reg import dispatch_table
31
from copy_reg import _extension_registry, _inverted_registry, _extension_cache
38
__all__ = ["PickleError", "PicklingError", "UnpicklingError", "Pickler",
39
"Unpickler", "dump", "dumps", "load", "loads"]
41
# These are purely informational; no code uses these.
42
format_version = "2.0" # File format version we write
43
compatible_formats = ["1.0", # Original protocol 0
44
"1.1", # Protocol 0 with INST added
45
"1.2", # Original protocol 1
46
"1.3", # Protocol 1 with BINFLOAT added
48
] # Old format versions we can read
50
# Keep in synch with cPickle. This is the highest protocol number we
54
# Why use struct.pack() for pickling but marshal.loads() for
55
# unpickling? struct.pack() is 40% faster than marshal.dumps(), but
56
# marshal.loads() is twice as fast as struct.unpack()!
57
mloads = marshal.loads
59
class PickleError(Exception):
60
"""A common base class for the other pickling exceptions."""
63
class PicklingError(PickleError):
64
"""This exception is raised when an unpicklable object is passed to the
70
class UnpicklingError(PickleError):
71
"""This exception is raised when there is a problem unpickling an object,
72
such as a security violation.
74
Note that other exceptions may also be raised during unpickling, including
75
(but not necessarily limited to) AttributeError, EOFError, ImportError,
81
# An instance of _Stop is raised by Unpickler.load_stop() in response to
82
# the STOP opcode, passing the object that is the result of unpickling.
83
class _Stop(Exception):
84
def __init__(self, value):
87
# Jython has PyStringMap; it's a dict subclass with string keys
89
from org.python.core import PyStringMap
93
# UnicodeType may or may not be exported (normally imported from types)
99
# Pickle opcodes. See pickletools.py for extensive docs. The listing
100
# here is in kind-of alphabetical order of 1-character pickle code.
101
# pickletools groups them by purpose.
103
MARK = '(' # push special markobject on stack
104
STOP = '.' # every pickle ends with STOP
105
POP = '0' # discard topmost stack item
106
POP_MARK = '1' # discard stack top through topmost markobject
107
DUP = '2' # duplicate top stack item
108
FLOAT = 'F' # push float object; decimal string argument
109
INT = 'I' # push integer or bool; decimal string argument
110
BININT = 'J' # push four-byte signed int
111
BININT1 = 'K' # push 1-byte unsigned int
112
LONG = 'L' # push long; decimal string argument
113
BININT2 = 'M' # push 2-byte unsigned int
114
NONE = 'N' # push None
115
PERSID = 'P' # push persistent object; id is taken from string arg
116
BINPERSID = 'Q' # " " " ; " " " " stack
117
REDUCE = 'R' # apply callable to argtuple, both on stack
118
STRING = 'S' # push string; NL-terminated string argument
119
BINSTRING = 'T' # push string; counted binary string argument
120
SHORT_BINSTRING = 'U' # " " ; " " " " < 256 bytes
121
UNICODE = 'V' # push Unicode string; raw-unicode-escaped'd argument
122
BINUNICODE = 'X' # " " " ; counted UTF-8 string argument
123
APPEND = 'a' # append stack top to list below it
124
BUILD = 'b' # call __setstate__ or __dict__.update()
125
GLOBAL = 'c' # push self.find_class(modname, name); 2 string args
126
DICT = 'd' # build a dict from stack items
127
EMPTY_DICT = '}' # push empty dict
128
APPENDS = 'e' # extend list on stack by topmost stack slice
129
GET = 'g' # push item from memo on stack; index is string arg
130
BINGET = 'h' # " " " " " " ; " " 1-byte arg
131
INST = 'i' # build & push class instance
132
LONG_BINGET = 'j' # push item from memo on stack; index is 4-byte arg
133
LIST = 'l' # build list from topmost stack items
134
EMPTY_LIST = ']' # push empty list
135
OBJ = 'o' # build & push class instance
136
PUT = 'p' # store stack top in memo; index is string arg
137
BINPUT = 'q' # " " " " " ; " " 1-byte arg
138
LONG_BINPUT = 'r' # " " " " " ; " " 4-byte arg
139
SETITEM = 's' # add key+value pair to dict
140
TUPLE = 't' # build tuple from topmost stack items
141
EMPTY_TUPLE = ')' # push empty tuple
142
SETITEMS = 'u' # modify dict by adding topmost key+value pairs
143
BINFLOAT = 'G' # push float; arg is 8-byte float encoding
145
TRUE = 'I01\n' # not an opcode; see INT docs in pickletools.py
146
FALSE = 'I00\n' # not an opcode; see INT docs in pickletools.py
150
PROTO = '\x80' # identify pickle protocol
151
NEWOBJ = '\x81' # build object by applying cls.__new__ to argtuple
152
EXT1 = '\x82' # push object from extension registry; 1-byte index
153
EXT2 = '\x83' # ditto, but 2-byte index
154
EXT4 = '\x84' # ditto, but 4-byte index
155
TUPLE1 = '\x85' # build 1-tuple from stack top
156
TUPLE2 = '\x86' # build 2-tuple from two topmost stack items
157
TUPLE3 = '\x87' # build 3-tuple from three topmost stack items
158
NEWTRUE = '\x88' # push True
159
NEWFALSE = '\x89' # push False
160
LONG1 = '\x8a' # push long from < 256 bytes
161
LONG4 = '\x8b' # push really big long
163
_tuplesize2code = [EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3]
166
__all__.extend([x for x in dir() if re.match("[A-Z][A-Z0-9_]+$",x)])
174
def __init__(self, file, protocol=None, bin=None):
175
"""This takes a file-like object for writing a pickle data stream.
177
The optional protocol argument tells the pickler to use the
178
given protocol; supported protocols are 0, 1, 2. The default
179
protocol is 0, to be backwards compatible. (Protocol 0 is the
180
only protocol that can be written to a file opened in text
181
mode and read back successfully. When using a protocol higher
182
than 0, make sure the file is opened in binary mode, both when
183
pickling and unpickling.)
185
Protocol 1 is more efficient than protocol 0; protocol 2 is
186
more efficient than protocol 1.
188
Specifying a negative protocol version selects the highest
189
protocol version supported. The higher the protocol used, the
190
more recent the version of Python needed to read the pickle
193
The file parameter must have a write() method that accepts a single
194
string argument. It can thus be an open file object, a StringIO
195
object, or any other custom object that meets this interface.
198
if protocol is not None and bin is not None:
199
raise ValueError, "can't specify both 'protocol' and 'bin'"
201
warnings.warn("The 'bin' argument to Pickler() is deprecated",
207
protocol = HIGHEST_PROTOCOL
208
elif not 0 <= protocol <= HIGHEST_PROTOCOL:
209
raise ValueError("pickle protocol must be <= %d" % HIGHEST_PROTOCOL)
210
self.write = file.write
212
self.proto = int(protocol)
213
self.bin = protocol >= 1
216
def _pickle_moduledict(self, obj):
218
modict = self.module_dict_ids
219
except AttributeError:
221
from sys import modules
222
for mod in modules.values():
223
if isinstance(mod, ModuleType):
225
modict[id(mod.__dict__)] = mod
226
except KeyboardInterrupt:
228
except: # obscure: the above can fail for
229
# arbitrary reasons, because of the py lib
231
self.module_dict_ids = modict
235
themodule = modict[thisid]
238
from __builtin__ import getattr
239
return getattr, (themodule, '__dict__')
241
def clear_memo(self):
242
"""Clears the pickler's "memo".
244
The memo is the data structure that remembers which objects the
245
pickler has already seen, so that shared or recursive objects are
246
pickled by reference and not by value. This method is useful when
253
"""Write a pickled representation of obj to the open file."""
255
self.write(PROTO + chr(self.proto))
259
def memoize(self, obj):
260
"""Store an object in the memo."""
262
# The Pickler memo is a dictionary mapping object ids to 2-tuples
263
# that contain the Unpickler memo key and the object being memoized.
264
# The memo key is written to the pickle and will become
265
# the key in the Unpickler's memo. The object is stored in the
266
# Pickler memo so that transient objects are kept alive during
269
# The use of the Unpickler memo length as the memo key is just a
270
# convention. The only requirement is that the memo values be unique.
271
# But there appears no advantage to any other scheme, and this
272
# scheme allows the Unpickler memo to be implemented as a plain (but
273
# growable) array, indexed by memo key.
276
assert id(obj) not in self.memo
277
memo_len = len(self.memo)
278
self.write(self.put(memo_len))
279
self.memo[id(obj)] = memo_len, obj
281
# Return a PUT (BINPUT, LONG_BINPUT) opcode string, with argument i.
282
def put(self, i, pack=struct.pack):
285
return BINPUT + chr(i)
287
return LONG_BINPUT + pack("<i", i)
289
return PUT + repr(i) + '\n'
291
# Return a GET (BINGET, LONG_BINGET) opcode string, with argument i.
292
def get(self, i, pack=struct.pack):
295
return BINGET + chr(i)
297
return LONG_BINGET + pack("<i", i)
299
return GET + repr(i) + '\n'
302
# Check for persistent id (defined by a subclass)
303
pid = self.persistent_id(obj)
309
x = self.memo.get(id(obj))
311
self.write(self.get(x[0]))
314
# Check the type dispatch table
316
f = self.dispatch.get(t)
318
f(self, obj) # Call unbound method with explicit self
321
# Check for a class with a custom metaclass; treat as regular class
323
issc = issubclass(t, TypeType)
324
except TypeError: # t is not a class (old Boost; see SF #502085)
327
self.save_global(obj)
330
# Check copy_reg.dispatch_table
331
reduce = dispatch_table.get(t)
335
# Check for a __reduce_ex__ method, fall back to __reduce__
336
reduce = getattr(obj, "__reduce_ex__", None)
338
rv = reduce(self.proto)
340
reduce = getattr(obj, "__reduce__", None)
344
raise PicklingError("Can't pickle %r object: %r" %
347
# Check for string returned by reduce(), meaning "save as global"
348
if type(rv) is StringType:
349
self.save_global(obj, rv)
352
# Assert that reduce() returned a tuple
353
if type(rv) is not TupleType:
354
raise PicklingError("%s must return string or tuple" % reduce)
356
# Assert that it returned an appropriately sized tuple
358
if not (2 <= l <= 5):
359
raise PicklingError("Tuple returned by %s must have "
360
"two to five elements" % reduce)
362
# Save the reduce() output and finally memoize the object
363
self.save_reduce(obj=obj, *rv)
365
def persistent_id(self, obj):
366
# This exists so a subclass can override it
369
def save_pers(self, pid):
370
# Save a persistent id reference
373
self.write(BINPERSID)
375
self.write(PERSID + str(pid) + '\n')
377
def save_reduce(self, func, args, state=None,
378
listitems=None, dictitems=None, obj=None):
379
# This API is called by some subclasses
381
# Assert that args is a tuple or None
382
if not isinstance(args, TupleType):
384
# A hack for Jim Fulton's ExtensionClass, now deprecated.
386
warnings.warn("__basicnew__ special case is deprecated",
390
"args from reduce() should be a tuple")
392
# Assert that func is callable
393
if not callable(func):
394
raise PicklingError("func from reduce should be callable")
399
# Protocol 2 special case: if func's name is __newobj__, use NEWOBJ
400
if self.proto >= 2 and getattr(func, "__name__", "") == "__newobj__":
401
# A __reduce__ implementation can direct protocol 2 to
402
# use the more efficient NEWOBJ opcode, while still
403
# allowing protocol 0 and 1 to work normally. For this to
404
# work, the function returned by __reduce__ should be
405
# called __newobj__, and its first argument should be a
406
# new-style class. The implementation for __newobj__
407
# should be as follows, although pickle has no way to
410
# def __newobj__(cls, *args):
411
# return cls.__new__(cls, *args)
413
# Protocols 0 and 1 will pickle a reference to __newobj__,
414
# while protocol 2 (and above) will pickle a reference to
415
# cls, the remaining args tuple, and the NEWOBJ code,
416
# which calls cls.__new__(cls, *args) at unpickling time
417
# (see load_newobj below). If __reduce__ returns a
418
# three-tuple, the state from the third tuple item will be
419
# pickled regardless of the protocol, calling __setstate__
420
# at unpickling time (see load_build below).
422
# Note that no standard __newobj__ implementation exists;
423
# you have to provide your own. This is to enforce
424
# compatibility with Python 2.2 (pickles written using
425
# protocol 0 or 1 in Python 2.3 should be unpicklable by
428
if not hasattr(cls, "__new__"):
430
"args[0] from __newobj__ args has no __new__")
431
if obj is not None and cls is not obj.__class__:
433
"args[0] from __newobj__ args has the wrong class")
446
# More new special cases (that work with older protocols as
447
# well): when __reduce__ returns a tuple with 4 or 5 items,
448
# the 4th and 5th item should be iterators that provide list
449
# items and dict items (as (key, value) tuples), or None.
451
if listitems is not None:
452
self._batch_appends(listitems)
454
if dictitems is not None:
455
self._batch_setitems(dictitems)
457
if state is not None:
461
# Methods below this point are dispatched through the dispatch table
465
def save_none(self, obj):
467
dispatch[NoneType] = save_none
469
def save_bool(self, obj):
471
self.write(obj and NEWTRUE or NEWFALSE)
473
self.write(obj and TRUE or FALSE)
474
dispatch[bool] = save_bool
476
def save_int(self, obj, pack=struct.pack):
478
# If the int is small enough to fit in a signed 4-byte 2's-comp
479
# format, we can store it more efficiently than the general
481
# First one- and two-byte unsigned ints:
484
self.write(BININT1 + chr(obj))
487
self.write("%c%c%c" % (BININT2, obj&0xff, obj>>8))
489
# Next check for 4-byte signed ints:
490
high_bits = obj >> 31 # note that Python shift sign-extends
491
if high_bits == 0 or high_bits == -1:
492
# All high bits are copies of bit 2**31, so the value
493
# fits in a 4-byte signed int.
494
self.write(BININT + pack("<i", obj))
496
# Text pickle, or int too big to fit in signed 4-byte format.
497
self.write(INT + repr(obj) + '\n')
498
dispatch[IntType] = save_int
500
def save_long(self, obj, pack=struct.pack):
502
bytes = encode_long(obj)
505
self.write(LONG1 + chr(n) + bytes)
507
self.write(LONG4 + pack("<i", n) + bytes)
509
self.write(LONG + repr(obj) + '\n')
510
dispatch[LongType] = save_long
512
def save_float(self, obj, pack=struct.pack):
514
self.write(BINFLOAT + pack('>d', obj))
516
self.write(FLOAT + repr(obj) + '\n')
517
dispatch[FloatType] = save_float
519
def save_string(self, obj, pack=struct.pack):
523
self.write(SHORT_BINSTRING + chr(n) + obj)
525
self.write(BINSTRING + pack("<i", n) + obj)
527
self.write(STRING + repr(obj) + '\n')
529
dispatch[StringType] = save_string
531
def save_unicode(self, obj, pack=struct.pack):
533
encoding = obj.encode('utf-8')
535
self.write(BINUNICODE + pack("<i", n) + encoding)
537
obj = obj.replace("\\", "\\u005c")
538
obj = obj.replace("\n", "\\u000a")
539
self.write(UNICODE + obj.encode('raw-unicode-escape') + '\n')
541
dispatch[UnicodeType] = save_unicode
543
if StringType == UnicodeType:
544
# This is true for Jython
545
def save_string(self, obj, pack=struct.pack):
546
unicode = obj.isunicode()
550
obj = obj.encode("utf-8")
552
if l < 256 and not unicode:
553
self.write(SHORT_BINSTRING + chr(l) + obj)
557
self.write(BINUNICODE + s + obj)
559
self.write(BINSTRING + s + obj)
562
obj = obj.replace("\\", "\\u005c")
563
obj = obj.replace("\n", "\\u000a")
564
obj = obj.encode('raw-unicode-escape')
565
self.write(UNICODE + obj + '\n')
567
self.write(STRING + repr(obj) + '\n')
569
dispatch[StringType] = save_string
571
def save_tuple(self, obj):
585
if n <= 3 and proto >= 2:
588
# Subtle. Same as in the big comment below.
590
get = self.get(memo[id(obj)][0])
593
write(_tuplesize2code[n])
597
# proto 0 or proto 1 and tuple isn't empty, or proto > 1 and tuple
598
# has more than 3 elements.
604
# Subtle. d was not in memo when we entered save_tuple(), so
605
# the process of saving the tuple's elements must have saved
606
# the tuple itself: the tuple is recursive. The proper action
607
# now is to throw away everything we put on the stack, and
608
# simply GET the tuple (it's already constructed). This check
609
# could have been done in the "for element" loop instead, but
610
# recursive tuples are a rare thing.
611
get = self.get(memo[id(obj)][0])
613
write(POP_MARK + get)
614
else: # proto 0 -- POP_MARK not available
615
write(POP * (n+1) + get)
622
dispatch[TupleType] = save_tuple
624
# save_empty_tuple() isn't used by anything in Python 2.3. However, I
625
# found a Pickler subclass in Zope3 that calls it, so it's not harmless
627
def save_empty_tuple(self, obj):
628
self.write(EMPTY_TUPLE)
630
def save_list(self, obj):
635
else: # proto 0 -- can't use EMPTY_LIST
639
self._batch_appends(iter(obj))
641
dispatch[ListType] = save_list
643
# Keep in synch with cPickle's BATCHSIZE. Nothing will break if it gets
644
# out of synch, though.
647
def _batch_appends(self, items):
648
# Helper to batch up APPENDS sequences
658
r = xrange(self._BATCHSIZE)
659
while items is not None:
665
except StopIteration:
677
# else tmp is empty, and we're done
679
def save_dict(self, obj):
680
## Stackless addition BEGIN
681
modict_saver = self._pickle_moduledict(obj)
682
if modict_saver is not None:
683
return self.save_reduce(*modict_saver)
684
## Stackless addition END
690
else: # proto 0 -- can't use EMPTY_DICT
694
self._batch_setitems(obj.iteritems())
696
dispatch[DictionaryType] = save_dict
697
if not PyStringMap is None:
698
dispatch[PyStringMap] = save_dict
700
def _batch_setitems(self, items):
701
# Helper to batch up SETITEMS sequences; proto >= 1 only
712
r = xrange(self._BATCHSIZE)
713
while items is not None:
717
tmp.append(items.next())
718
except StopIteration:
733
# else tmp is empty, and we're done
735
def save_inst(self, obj):
742
if hasattr(obj, '__getinitargs__'):
743
args = obj.__getinitargs__()
744
len(args) # XXX Assert it's a sequence
745
_keep_alive(args, memo)
759
write(INST + cls.__module__ + '\n' + cls.__name__ + '\n')
764
getstate = obj.__getstate__
765
except AttributeError:
769
_keep_alive(stuff, memo)
773
dispatch[InstanceType] = save_inst
775
def save_global(self, obj, name=None, pack=struct.pack):
782
module = getattr(obj, "__module__", None)
784
module = whichmodule(obj, name)
788
mod = sys.modules[module]
789
klass = getattr(mod, name)
790
except (ImportError, KeyError, AttributeError):
792
"Can't pickle %r: it's not found as %s.%s" %
797
"Can't pickle %r: it's not the same object as %s.%s" %
801
code = _extension_registry.get((module, name))
805
write(EXT1 + chr(code))
807
write("%c%c%c" % (EXT2, code&0xff, code>>8))
809
write(EXT4 + pack("<i", code))
812
write(GLOBAL + module + '\n' + name + '\n')
815
def save_function(self, obj):
817
return self.save_global(obj)
818
except PicklingError, e:
820
# Check copy_reg.dispatch_table
821
reduce = dispatch_table.get(type(obj))
825
# Check for a __reduce_ex__ method, fall back to __reduce__
826
reduce = getattr(obj, "__reduce_ex__", None)
828
rv = reduce(self.proto)
830
reduce = getattr(obj, "__reduce__", None)
835
return self.save_reduce(obj=obj, *rv)
837
dispatch[ClassType] = save_global
838
dispatch[FunctionType] = save_function
839
dispatch[BuiltinFunctionType] = save_global
840
dispatch[TypeType] = save_global
844
def _keep_alive(x, memo):
845
"""Keeps a reference to the object x in the memo.
847
Because we remember objects by their id, we have
848
to assure that possibly temporary objects are kept
849
alive by referencing them.
850
We store a reference at the id of the memo, which should
851
normally not be used unless someone tries to deepcopy
855
memo[id(memo)].append(x)
857
# aha, this is the first one :-)
861
# A cache for whichmodule(), mapping a function object to the name of
862
# the module in which the function was found.
864
classmap = {} # called classmap for backwards compatibility
866
def whichmodule(func, funcname):
867
"""Figure out the module in which a function occurs.
869
Search sys.modules for the module.
871
Return a module name.
872
If the function cannot be found, return "__main__".
874
# Python functions should always get an __module__ from their globals.
875
mod = getattr(func, "__module__", None)
879
return classmap[func]
881
for name, module in sys.modules.items():
883
continue # skip dummy package entries
884
if name != '__main__' and getattr(module, funcname, None) is func:
888
classmap[func] = name
892
# Unpickling machinery
896
def __init__(self, file):
897
"""This takes a file-like object for reading a pickle data stream.
899
The protocol version of the pickle is detected automatically, so no
900
proto argument is needed.
902
The file-like object must have two methods, a read() method that
903
takes an integer argument, and a readline() method that requires no
904
arguments. Both methods should return a string. Thus file-like
905
object can be a file object opened for reading, a StringIO object,
906
or any other custom object that meets this interface.
908
self.readline = file.readline
909
self.read = file.read
913
"""Read a pickled object representation from the open file.
915
Return the reconstituted object hierarchy specified in the file.
917
self.mark = object() # any new unique object
919
self.append = self.stack.append
921
dispatch = self.dispatch
926
except _Stop, stopinst:
927
return stopinst.value
929
# Return largest index k such that self.stack[k] is self.mark.
930
# If the stack doesn't contain a mark, eventually raises IndexError.
931
# This could be sped by maintaining another stack, of indices at which
932
# the mark appears. For that matter, the latter stack would suffice,
933
# and we wouldn't need to push mark objects on self.stack at all.
934
# Doing so is probably a good thing, though, since if the pickle is
935
# corrupt (or hostile) we may get a clue from finding self.mark embedded
936
# in unpickled objects.
941
while stack[k] is not mark: k = k-1
948
dispatch[''] = load_eof
950
def load_proto(self):
951
proto = ord(self.read(1))
952
if not 0 <= proto <= 2:
953
raise ValueError, "unsupported pickle protocol: %d" % proto
954
dispatch[PROTO] = load_proto
956
def load_persid(self):
957
pid = self.readline()[:-1]
958
self.append(self.persistent_load(pid))
959
dispatch[PERSID] = load_persid
961
def load_binpersid(self):
962
pid = self.stack.pop()
963
self.append(self.persistent_load(pid))
964
dispatch[BINPERSID] = load_binpersid
968
dispatch[NONE] = load_none
970
def load_false(self):
972
dispatch[NEWFALSE] = load_false
976
dispatch[NEWTRUE] = load_true
979
data = self.readline()
980
if data == FALSE[1:]:
982
elif data == TRUE[1:]:
990
dispatch[INT] = load_int
992
def load_binint(self):
993
self.append(mloads('i' + self.read(4)))
994
dispatch[BININT] = load_binint
996
def load_binint1(self):
997
self.append(ord(self.read(1)))
998
dispatch[BININT1] = load_binint1
1000
def load_binint2(self):
1001
self.append(mloads('i' + self.read(2) + '\000\000'))
1002
dispatch[BININT2] = load_binint2
1004
def load_long(self):
1005
self.append(long(self.readline()[:-1], 0))
1006
dispatch[LONG] = load_long
1008
def load_long1(self):
1009
n = ord(self.read(1))
1010
bytes = self.read(n)
1011
self.append(decode_long(bytes))
1012
dispatch[LONG1] = load_long1
1014
def load_long4(self):
1015
n = mloads('i' + self.read(4))
1016
bytes = self.read(n)
1017
self.append(decode_long(bytes))
1018
dispatch[LONG4] = load_long4
1020
def load_float(self):
1021
self.append(float(self.readline()[:-1]))
1022
dispatch[FLOAT] = load_float
1024
def load_binfloat(self, unpack=struct.unpack):
1025
self.append(unpack('>d', self.read(8))[0])
1026
dispatch[BINFLOAT] = load_binfloat
1028
def load_string(self):
1029
rep = self.readline()[:-1]
1030
for q in "\"'": # double or single quote
1031
if rep.startswith(q):
1032
if not rep.endswith(q):
1033
raise ValueError, "insecure string pickle"
1034
rep = rep[len(q):-len(q)]
1037
raise ValueError, "insecure string pickle"
1038
self.append(rep.decode("string-escape"))
1039
dispatch[STRING] = load_string
1041
def load_binstring(self):
1042
len = mloads('i' + self.read(4))
1043
self.append(self.read(len))
1044
dispatch[BINSTRING] = load_binstring
1046
def load_unicode(self):
1047
self.append(unicode(self.readline()[:-1],'raw-unicode-escape'))
1048
dispatch[UNICODE] = load_unicode
1050
def load_binunicode(self):
1051
len = mloads('i' + self.read(4))
1052
self.append(unicode(self.read(len),'utf-8'))
1053
dispatch[BINUNICODE] = load_binunicode
1055
def load_short_binstring(self):
1056
len = ord(self.read(1))
1057
self.append(self.read(len))
1058
dispatch[SHORT_BINSTRING] = load_short_binstring
1060
def load_tuple(self):
1062
self.stack[k:] = [tuple(self.stack[k+1:])]
1063
dispatch[TUPLE] = load_tuple
1065
def load_empty_tuple(self):
1066
self.stack.append(())
1067
dispatch[EMPTY_TUPLE] = load_empty_tuple
1069
def load_tuple1(self):
1070
self.stack[-1] = (self.stack[-1],)
1071
dispatch[TUPLE1] = load_tuple1
1073
def load_tuple2(self):
1074
self.stack[-2:] = [(self.stack[-2], self.stack[-1])]
1075
dispatch[TUPLE2] = load_tuple2
1077
def load_tuple3(self):
1078
self.stack[-3:] = [(self.stack[-3], self.stack[-2], self.stack[-1])]
1079
dispatch[TUPLE3] = load_tuple3
1081
def load_empty_list(self):
1082
self.stack.append([])
1083
dispatch[EMPTY_LIST] = load_empty_list
1085
def load_empty_dictionary(self):
1086
self.stack.append({})
1087
dispatch[EMPTY_DICT] = load_empty_dictionary
1089
def load_list(self):
1091
self.stack[k:] = [self.stack[k+1:]]
1092
dispatch[LIST] = load_list
1094
def load_dict(self):
1097
items = self.stack[k+1:]
1098
for i in range(0, len(items), 2):
1102
self.stack[k:] = [d]
1103
dispatch[DICT] = load_dict
1105
# INST and OBJ differ only in how they get a class object. It's not
1106
# only sensible to do the rest in a common routine, the two routines
1107
# previously diverged and grew different bugs.
1108
# klass is the class to instantiate, and k points to the topmost mark
1109
# object, following which are the arguments for klass.__init__.
1110
def _instantiate(self, klass, k):
1111
args = tuple(self.stack[k+1:])
1115
type(klass) is ClassType and
1116
not hasattr(klass, "__getinitargs__")):
1118
value = _EmptyClass()
1119
value.__class__ = klass
1121
except RuntimeError:
1122
# In restricted execution, assignment to inst.__class__ is
1125
if not instantiated:
1127
value = klass(*args)
1128
except TypeError, err:
1129
raise TypeError, "in constructor for %s: %s" % (
1130
klass.__name__, str(err)), sys.exc_info()[2]
1133
def load_inst(self):
1134
module = self.readline()[:-1]
1135
name = self.readline()[:-1]
1136
klass = self.find_class(module, name)
1137
self._instantiate(klass, self.marker())
1138
dispatch[INST] = load_inst
1141
# Stack is ... markobject classobject arg1 arg2 ...
1143
klass = self.stack.pop(k+1)
1144
self._instantiate(klass, k)
1145
dispatch[OBJ] = load_obj
1147
def load_newobj(self):
1148
args = self.stack.pop()
1149
cls = self.stack[-1]
1150
obj = cls.__new__(cls, *args)
1151
self.stack[-1] = obj
1152
dispatch[NEWOBJ] = load_newobj
1154
def load_global(self):
1155
module = self.readline()[:-1]
1156
name = self.readline()[:-1]
1157
klass = self.find_class(module, name)
1159
dispatch[GLOBAL] = load_global
1161
def load_ext1(self):
1162
code = ord(self.read(1))
1163
self.get_extension(code)
1164
dispatch[EXT1] = load_ext1
1166
def load_ext2(self):
1167
code = mloads('i' + self.read(2) + '\000\000')
1168
self.get_extension(code)
1169
dispatch[EXT2] = load_ext2
1171
def load_ext4(self):
1172
code = mloads('i' + self.read(4))
1173
self.get_extension(code)
1174
dispatch[EXT4] = load_ext4
1176
def get_extension(self, code):
1178
obj = _extension_cache.get(code, nil)
1182
key = _inverted_registry.get(code)
1184
raise ValueError("unregistered extension code %d" % code)
1185
obj = self.find_class(*key)
1186
_extension_cache[code] = obj
1189
def find_class(self, module, name):
1190
# Subclasses may override this
1192
mod = sys.modules[module]
1193
klass = getattr(mod, name)
1196
def load_reduce(self):
1201
# A hack for Jim Fulton's ExtensionClass, now deprecated
1202
warnings.warn("__basicnew__ special case is deprecated",
1204
value = func.__basicnew__()
1208
dispatch[REDUCE] = load_reduce
1212
dispatch[POP] = load_pop
1214
def load_pop_mark(self):
1217
dispatch[POP_MARK] = load_pop_mark
1220
self.append(self.stack[-1])
1221
dispatch[DUP] = load_dup
1224
self.append(self.memo[self.readline()[:-1]])
1225
dispatch[GET] = load_get
1227
def load_binget(self):
1228
i = ord(self.read(1))
1229
self.append(self.memo[repr(i)])
1230
dispatch[BINGET] = load_binget
1232
def load_long_binget(self):
1233
i = mloads('i' + self.read(4))
1234
self.append(self.memo[repr(i)])
1235
dispatch[LONG_BINGET] = load_long_binget
1238
self.memo[self.readline()[:-1]] = self.stack[-1]
1239
dispatch[PUT] = load_put
1241
def load_binput(self):
1242
i = ord(self.read(1))
1243
self.memo[repr(i)] = self.stack[-1]
1244
dispatch[BINPUT] = load_binput
1246
def load_long_binput(self):
1247
i = mloads('i' + self.read(4))
1248
self.memo[repr(i)] = self.stack[-1]
1249
dispatch[LONG_BINPUT] = load_long_binput
1251
def load_append(self):
1256
dispatch[APPEND] = load_append
1258
def load_appends(self):
1260
mark = self.marker()
1261
list = stack[mark - 1]
1262
list.extend(stack[mark + 1:])
1264
dispatch[APPENDS] = load_appends
1266
def load_setitem(self):
1272
dispatch[SETITEM] = load_setitem
1274
def load_setitems(self):
1276
mark = self.marker()
1277
dict = stack[mark - 1]
1278
for i in range(mark + 1, len(stack), 2):
1279
dict[stack[i]] = stack[i + 1]
1282
dispatch[SETITEMS] = load_setitems
1284
def load_build(self):
1288
setstate = getattr(inst, "__setstate__", None)
1293
if isinstance(state, tuple) and len(state) == 2:
1294
state, slotstate = state
1297
inst.__dict__.update(state)
1298
except RuntimeError:
1299
# XXX In restricted execution, the instance's __dict__
1300
# is not accessible. Use the old way of unpickling
1301
# the instance variables. This is a semantic
1302
# difference when unpickling in restricted
1303
# vs. unrestricted modes.
1304
# Note, however, that cPickle has never tried to do the
1305
# .update() business, and always uses
1306
# PyObject_SetItem(inst.__dict__, key, value) in a
1307
# loop over state.items().
1308
for k, v in state.items():
1311
for k, v in slotstate.items():
1313
dispatch[BUILD] = load_build
1315
def load_mark(self):
1316
self.append(self.mark)
1317
dispatch[MARK] = load_mark
1319
def load_stop(self):
1320
value = self.stack.pop()
1322
dispatch[STOP] = load_stop
1324
# Helper class for load_inst/load_obj
1329
# Encode/decode longs in linear time.
1331
import binascii as _binascii
1334
r"""Encode a long to a two's complement little-endian binary string.
1335
Note that 0L is a special case, returning an empty string, to save a
1336
byte in the LONG1 pickling context.
1340
>>> encode_long(255L)
1342
>>> encode_long(32767L)
1344
>>> encode_long(-256L)
1346
>>> encode_long(-32768L)
1348
>>> encode_long(-128L)
1350
>>> encode_long(127L)
1359
assert ashex.startswith("0x")
1360
njunkchars = 2 + ashex.endswith('L')
1361
nibbles = len(ashex) - njunkchars
1363
# need an even # of nibbles for unhexlify
1364
ashex = "0x0" + ashex[2:]
1365
elif int(ashex[2], 16) >= 8:
1366
# "looks negative", so need a byte of sign bits
1367
ashex = "0x00" + ashex[2:]
1369
# Build the 256's-complement: (1L << nbytes) + x. The trick is
1370
# to find the number of bytes in linear time (although that should
1371
# really be a constant-time task).
1373
assert ashex.startswith("0x")
1374
njunkchars = 2 + ashex.endswith('L')
1375
nibbles = len(ashex) - njunkchars
1377
# Extend to a full byte.
1383
njunkchars = 2 + ashex.endswith('L')
1384
newnibbles = len(ashex) - njunkchars
1385
if newnibbles < nibbles:
1386
ashex = "0x" + "0" * (nibbles - newnibbles) + ashex[2:]
1387
if int(ashex[2], 16) < 8:
1388
# "looks positive", so need a byte of sign bits
1389
ashex = "0xff" + ashex[2:]
1391
if ashex.endswith('L'):
1395
assert len(ashex) & 1 == 0, (x, ashex)
1396
binary = _binascii.unhexlify(ashex)
1399
def decode_long(data):
1400
r"""Decode a long from a two's complement little-endian binary string.
1404
>>> decode_long("\xff\x00")
1406
>>> decode_long("\xff\x7f")
1408
>>> decode_long("\x00\xff")
1410
>>> decode_long("\x00\x80")
1412
>>> decode_long("\x80")
1414
>>> decode_long("\x7f")
1421
ashex = _binascii.hexlify(data[::-1])
1422
n = long(ashex, 16) # quadratic time before Python 2.3; linear now
1423
if data[-1] >= '\x80':
1424
n -= 1L << (nbytes * 8)
1430
from cStringIO import StringIO
1432
from StringIO import StringIO
1434
def dump(obj, file, protocol=None, bin=None):
1435
Pickler(file, protocol, bin).dump(obj)
1437
def dumps(obj, protocol=None, bin=None):
1439
Pickler(file, protocol, bin).dump(obj)
1440
return file.getvalue()
1443
return Unpickler(file).load()
1446
file = StringIO(str)
1447
return Unpickler(file).load()
1453
return doctest.testmod()
1455
if __name__ == "__main__":