1
"""Classes to represent arbitrary sets (including sets of sets).
3
This module implements sets using dictionaries whose values are
4
ignored. The usual operations (union, intersection, deletion, etc.)
5
are provided as both methods and operators.
7
Important: sets are not sequences! While they support 'x in s',
8
'len(s)', and 'for x in s', none of those operations are unique for
9
sequences; for example, mappings support all three as well. The
10
characteristic operation for sequences is subscripting with small
11
integers: s[i], for i in range(len(s)). Sets don't support
12
subscripting at all. Also, sequences allow multiple occurrences and
13
their elements have a definite order; sets on the other hand don't
14
record multiple occurrences and don't remember the order of element
15
insertion (which is why they don't support s[i]).
17
The following classes are provided:
19
BaseSet -- All the operations common to both mutable and immutable
20
sets. This is an abstract class, not meant to be directly
23
Set -- Mutable sets, subclass of BaseSet; not hashable.
25
ImmutableSet -- Immutable sets, subclass of BaseSet; hashable.
26
An iterable argument is mandatory to create an ImmutableSet.
28
_TemporarilyImmutableSet -- A wrapper around a Set, hashable,
29
giving the same hash value as the immutable set equivalent
30
would have. Do not use this class directly.
32
Only hashable objects can be added to a Set. In particular, you cannot
33
really add a Set as an element to another Set; if you try, what is
34
actually added is an ImmutableSet built from it (it compares equal to
35
the one you tried adding).
37
When you ask if `x in y' where x is a Set and y is a Set or
38
ImmutableSet, x is wrapped into a _TemporarilyImmutableSet z, and
39
what's tested is actually `z in y'.
45
# - Greg V. Wilson wrote the first version, using a different approach
46
# to the mutable/immutable problem, and inheriting from dict.
48
# - Alex Martelli modified Greg's version to implement the current
49
# Set/ImmutableSet approach, and make the data an attribute.
51
# - Guido van Rossum rewrote much of the code, made some API changes,
52
# and cleaned up the docstrings.
54
# - Raymond Hettinger added a number of speedups and other
57
# protect this import from the fixers...
58
exec('from itertools import ifilterfalse as filterfalse')
60
__all__ = ['BaseSet', 'Set', 'ImmutableSet']
62
class BaseSet(object):
63
"""Common base class for mutable and immutable sets."""
70
"""This is an abstract class."""
71
# Don't call this from a concrete subclass!
72
if self.__class__ is BaseSet:
73
raise TypeError("BaseSet is an abstract class. "
74
"Use Set or ImmutableSet.")
76
# Standard protocols: __len__, __repr__, __str__, __iter__
79
"""Return the number of elements of a set."""
80
return len(self._data)
83
"""Return string representation of a set.
85
This looks like 'Set([<list of elements>])'.
89
# __str__ is the same as __repr__
92
def _repr(self, sort_them=False):
93
elements = list(self._data.keys())
96
return '%s(%r)' % (self.__class__.__name__, elements)
99
"""Return an iterator over the elements or a set.
101
This is the keys iterator for the underlying dict.
103
# Wrapping name in () prevents fixer from "fixing" this
104
return (self._data.iterkeys)()
106
# Three-way comparison is not supported. However, because __eq__ is
107
# tried before __cmp__, if Set x == Set y, x.__eq__(y) returns True and
108
# then cmp(x, y) returns 0 (Python doesn't actually call __cmp__ in this
111
def __cmp__(self, other):
112
raise TypeError("can't compare sets using cmp()")
114
# Equality comparisons using the underlying dicts. Mixed-type comparisons
115
# are allowed here, where Set == z for non-Set z always returns False,
116
# and Set != z always True. This allows expressions like "x in y" to
117
# give the expected result when y is a sequence of mixed types, not
118
# raising a pointless TypeError just because y contains a Set, or x is
119
# a Set and y contain's a non-set ("in" invokes only __eq__).
120
# Subtle: it would be nicer if __eq__ and __ne__ could return
121
# NotImplemented instead of True or False. Then the other comparand
122
# would get a chance to determine the result, and if the other comparand
123
# also returned NotImplemented then it would fall back to object address
124
# comparison (which would always return False for __eq__ and always
125
# True for __ne__). However, that doesn't work, because this type
126
# *also* implements __cmp__: if, e.g., __eq__ returns NotImplemented,
127
# Python tries __cmp__ next, and the __cmp__ here then raises TypeError.
129
def __eq__(self, other):
130
if isinstance(other, BaseSet):
131
return self._data == other._data
135
def __ne__(self, other):
136
if isinstance(other, BaseSet):
137
return self._data != other._data
144
"""Return a shallow copy of a set."""
145
result = self.__class__()
146
result._data.update(self._data)
149
__copy__ = copy # For the copy module
151
def __deepcopy__(self, memo):
152
"""Return a deep copy of a set; used by copy module."""
153
# This pre-creates the result and inserts it in the memo
154
# early, in case the deep copy recurses into another reference
155
# to this same set. A set can't be an element of itself, but
156
# it can certainly contain an object that has a reference to
158
from copy import deepcopy
159
result = self.__class__()
160
memo[id(self)] = result
164
data[deepcopy(elt, memo)] = value
167
# Standard set operations: union, intersection, both differences.
168
# Each has an operator version (e.g. __or__, invoked with |) and a
169
# method version (e.g. union).
170
# Subtle: Each pair requires distinct code so that the outcome is
171
# correct when the type of other isn't suitable. For example, if
172
# we did "union = __or__" instead, then Set().union(3) would return
173
# NotImplemented instead of raising TypeError (albeit that *why* it
174
# raises TypeError as-is is also a bit subtle).
176
def __or__(self, other):
177
"""Return the union of two sets as a new set.
179
(I.e. all elements that are in either set.)
181
if not isinstance(other, BaseSet):
182
return NotImplemented
183
return self.union(other)
185
def union(self, other):
186
"""Return the union of two sets as a new set.
188
(I.e. all elements that are in either set.)
190
result = self.__class__(self)
191
result._update(other)
194
def __and__(self, other):
195
"""Return the intersection of two sets as a new set.
197
(I.e. all elements that are in both sets.)
199
if not isinstance(other, BaseSet):
200
return NotImplemented
201
return self.intersection(other)
203
def intersection(self, other):
204
"""Return the intersection of two sets as a new set.
206
(I.e. all elements that are in both sets.)
208
if not isinstance(other, BaseSet):
210
if len(self) <= len(other):
211
little, big = self, other
213
little, big = other, self
214
common = iter(filter(big._data.has_key, little))
215
return self.__class__(common)
217
def __xor__(self, other):
218
"""Return the symmetric difference of two sets as a new set.
220
(I.e. all elements that are in exactly one of the sets.)
222
if not isinstance(other, BaseSet):
223
return NotImplemented
224
return self.symmetric_difference(other)
226
def symmetric_difference(self, other):
227
"""Return the symmetric difference of two sets as a new set.
229
(I.e. all elements that are in exactly one of the sets.)
231
result = self.__class__()
234
selfdata = self._data
236
otherdata = other._data
237
except AttributeError:
238
otherdata = Set(other)._data
239
for elt in filterfalse(otherdata.has_key, selfdata):
241
for elt in filterfalse(selfdata.has_key, otherdata):
245
def __sub__(self, other):
246
"""Return the difference of two sets as a new Set.
248
(I.e. all elements that are in this set and not in the other.)
250
if not isinstance(other, BaseSet):
251
return NotImplemented
252
return self.difference(other)
254
def difference(self, other):
255
"""Return the difference of two sets as a new Set.
257
(I.e. all elements that are in this set and not in the other.)
259
result = self.__class__()
262
otherdata = other._data
263
except AttributeError:
264
otherdata = Set(other)._data
266
for elt in filterfalse(otherdata.has_key, self):
272
def __contains__(self, element):
273
"""Report whether an element is a member of a set.
275
(Called in response to the expression `element in self'.)
278
return element in self._data
280
transform = getattr(element, "__as_temporarily_immutable__", None)
281
if transform is None:
282
raise # re-raise the TypeError exception we caught
283
return transform() in self._data
285
# Subset and superset test
287
def issubset(self, other):
288
"""Report whether another set contains this set."""
289
self._binary_sanity_check(other)
290
if len(self) > len(other): # Fast check for obvious cases
292
for elt in filterfalse(other._data.has_key, self):
296
def issuperset(self, other):
297
"""Report whether this set contains another set."""
298
self._binary_sanity_check(other)
299
if len(self) < len(other): # Fast check for obvious cases
301
for elt in filterfalse(self._data.has_key, other):
305
# Inequality comparisons using the is-subset relation.
309
def __lt__(self, other):
310
self._binary_sanity_check(other)
311
return len(self) < len(other) and self.issubset(other)
313
def __gt__(self, other):
314
self._binary_sanity_check(other)
315
return len(self) > len(other) and self.issuperset(other)
319
def _binary_sanity_check(self, other):
320
# Check that the other argument to a binary operation is also
321
# a set, raising a TypeError otherwise.
322
if not isinstance(other, BaseSet):
323
raise TypeError("Binary operation only permitted between sets")
325
def _compute_hash(self):
326
# Calculate hash code for a set by xor'ing the hash codes of
327
# the elements. This ensures that the hash code does not depend
328
# on the order in which elements are added to the set. This is
329
# not called __hash__ because a BaseSet should not be hashable;
330
# only an ImmutableSet is hashable.
336
def _update(self, iterable):
337
# The main loop for update() and the subclass __init__() methods.
340
# Use the fast update() method when a dictionary is available.
341
if isinstance(iterable, BaseSet):
342
data.update(iterable._data)
347
if type(iterable) in (list, tuple, xrange):
348
# Optimized: we know that __iter__() and next() can't
349
# raise TypeError, so we can move 'try:' out of the loop.
354
data[element] = value
357
transform = getattr(element, "__as_immutable__", None)
358
if transform is None:
359
raise # re-raise the TypeError exception we caught
360
data[transform()] = value
362
# Safe: only catch TypeError where intended
363
for element in iterable:
365
data[element] = value
367
transform = getattr(element, "__as_immutable__", None)
368
if transform is None:
369
raise # re-raise the TypeError exception we caught
370
data[transform()] = value
373
class ImmutableSet(BaseSet):
374
"""Immutable set class."""
376
__slots__ = ['_hashcode']
380
def __init__(self, iterable=None):
381
"""Construct an immutable set from an optional iterable."""
382
self._hashcode = None
384
if iterable is not None:
385
self._update(iterable)
388
if self._hashcode is None:
389
self._hashcode = self._compute_hash()
390
return self._hashcode
392
def __getstate__(self):
393
return self._data, self._hashcode
395
def __setstate__(self, state):
396
self._data, self._hashcode = state
399
""" Mutable set class."""
403
# BaseSet + operations requiring mutability; no hashing
405
def __init__(self, iterable=None):
406
"""Construct a set from an optional iterable."""
408
if iterable is not None:
409
self._update(iterable)
411
def __getstate__(self):
412
# getstate's results are ignored if it is not
415
def __setstate__(self, data):
419
"""A Set cannot be hashed."""
420
# We inherit object.__hash__, so we must deny this explicitly
421
raise TypeError("Can't hash a Set, only an ImmutableSet.")
423
# In-place union, intersection, differences.
424
# Subtle: The xyz_update() functions deliberately return None,
425
# as do all mutating operations on built-in container types.
426
# The __xyz__ spellings have to return self, though.
428
def __ior__(self, other):
429
"""Update a set with the union of itself and another."""
430
self._binary_sanity_check(other)
431
self._data.update(other._data)
434
def union_update(self, other):
435
"""Update a set with the union of itself and another."""
438
def __iand__(self, other):
439
"""Update a set with the intersection of itself and another."""
440
self._binary_sanity_check(other)
441
self._data = (self & other)._data
444
def intersection_update(self, other):
445
"""Update a set with the intersection of itself and another."""
446
if isinstance(other, BaseSet):
449
self._data = (self.intersection(other))._data
451
def __ixor__(self, other):
452
"""Update a set with the symmetric difference of itself and another."""
453
self._binary_sanity_check(other)
454
self.symmetric_difference_update(other)
457
def symmetric_difference_update(self, other):
458
"""Update a set with the symmetric difference of itself and another."""
461
if not isinstance(other, BaseSet):
471
def __isub__(self, other):
472
"""Remove all elements of another set from this set."""
473
self._binary_sanity_check(other)
474
self.difference_update(other)
477
def difference_update(self, other):
478
"""Remove all elements of another set from this set."""
480
if not isinstance(other, BaseSet):
484
for elt in filter(data.has_key, other):
487
# Python dict-like mass mutations: update, clear
489
def update(self, iterable):
490
"""Add all values from an iterable (such as a list or file)."""
491
self._update(iterable)
494
"""Remove all elements from this set."""
497
# Single-element mutations: add, remove, discard
499
def add(self, element):
500
"""Add an element to a set.
502
This has no effect if the element is already present.
505
self._data[element] = True
507
transform = getattr(element, "__as_immutable__", None)
508
if transform is None:
509
raise # re-raise the TypeError exception we caught
510
self._data[transform()] = True
512
def remove(self, element):
513
"""Remove an element from a set; it must be a member.
515
If the element is not a member, raise a KeyError.
518
del self._data[element]
520
transform = getattr(element, "__as_temporarily_immutable__", None)
521
if transform is None:
522
raise # re-raise the TypeError exception we caught
523
del self._data[transform()]
525
def discard(self, element):
526
"""Remove an element from a set if it is a member.
528
If the element is not a member, do nothing.
536
"""Remove and return an arbitrary set element."""
537
return self._data.popitem()[0]
539
def __as_immutable__(self):
540
# Return a copy of self as an immutable set
541
return ImmutableSet(self)
543
def __as_temporarily_immutable__(self):
544
# Return self wrapped in a temporarily immutable set
545
return _TemporarilyImmutableSet(self)
548
class _TemporarilyImmutableSet(BaseSet):
549
# Wrap a mutable set as if it was temporarily immutable.
550
# This only supplies hashing and equality comparisons.
552
def __init__(self, set):
554
self._data = set._data # Needed by ImmutableSet.__eq__()
557
return self._set._compute_hash()
561
# indent-tabs-mode:nil
563
# vim: set expandtab tabstop=4 shiftwidth=4: