1
# Copyright (C) 2009, 2010 Canonical Ltd
3
# This program is free software: you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License version 3 as
5
# published by the Free Software Foundation.
7
# This program is distributed in the hope that it will be useful, but
8
# WITHOUT ANY WARRANTY; without even the implied warranty of
9
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
10
# General Public License for more details.
12
# You should have received a copy of the GNU General Public License
13
# along with this program. If not, see <http://www.gnu.org/licenses/>.
15
"""A structure for handling a set of pure integers.
17
(Such as a set of python object ids.)
21
ctypedef unsigned long size_t
23
void *realloc(void *, size_t)
25
void memset(void *, int, size_t)
28
ctypedef Py_ssize_t int_type
29
cdef int_type _singleton1, _singleton2
30
# _singleton1 is the 'no value present' value
31
# _singleton2 is the 'value deleted' value, which has us keep searching
32
# collisions after the fact
33
_singleton1 = <int_type> 0;
34
_singleton2 = <int_type> -1;
38
"""Keep a set of integer objects.
40
This is slightly more efficient than a PySet because we don't allow
44
cdef Py_ssize_t _count
47
cdef readonly int _has_singleton
49
def __init__(self, values=None):
53
# This is a separate bit mask of singletons that we have seen
54
# There are 2, the one which indicates no value, and the one that
55
# indicates 'dummy', aka removed value
56
self._has_singleton = 0
61
def __dealloc__(self):
62
if self._array != NULL:
68
def _peek_array(self):
69
cdef Py_ssize_t i, size
70
if self._array == NULL:
74
for i from 0 <= i < size:
75
result.append(self._array[i])
78
cdef int_type *_lookup(self, int_type c_val) except NULL:
79
"""Taken from the set() algorithm."""
80
cdef size_t offset, perturb
81
cdef int_type *entry, *freeslot
83
if self._array == NULL:
84
raise RuntimeError('cannot _lookup without _array allocated.')
85
offset = c_val & self._mask
86
entry = self._array + offset
87
if entry[0] == c_val or entry[0] == _singleton1:
89
if entry[0] == _singleton2:
96
offset = (offset << 2) + offset + perturb + 1
97
entry = self._array + (offset & self._mask)
98
if (entry[0] == _singleton1):
99
# We converged on an empty slot, without finding entry == c_val
100
# If we previously found a freeslot, return it, else return
106
elif (entry[0] == c_val):
109
elif (entry[0] == _singleton2 and freeslot == NULL):
110
# We found the first match that was a 'dummy' entry
112
perturb = perturb >> 5 # PERTURB_SHIFT
114
def __contains__(self, val):
115
cdef int_type c_val, *entry
117
if c_val == _singleton1:
118
if self._has_singleton & 0x01:
122
elif c_val == _singleton2:
123
if self._has_singleton & 0x02:
127
if self._array == NULL:
129
entry = self._lookup(c_val)
130
if entry[0] == c_val:
134
cdef int _grow(self) except -1:
136
cdef Py_ssize_t old_mask, old_size, new_size, old_count
137
cdef int_type *old_array, val
139
old_mask = self._mask
140
old_size = old_mask + 1
141
old_array = self._array
142
old_count = self._count
144
if old_array == NULL: # Nothing currently allocated
146
self._array = <int_type*>malloc(sizeof(int_type) * 256)
147
memset(self._array, _singleton1, sizeof(int_type) * 256)
149
new_size = old_size * 2
150
# Replace 'in place', grow to a new array, and add items back in
151
# Note that if it weren't for collisions, we could actually 'realloc()'
152
# and insert backwards. Since expanding mask means something will only
153
# fit in its old place, or the 2<<1 greater.
154
self._array = <int_type*>malloc(sizeof(int_type) * new_size)
155
memset(self._array, _singleton1, sizeof(int_type) * new_size)
156
self._mask = new_size - 1
158
if self._has_singleton & 0x01:
159
self._count = self._count + 1
160
if self._has_singleton & 0x02:
161
self._count = self._count + 1
162
for i from 0 <= i < old_size:
164
if val != _singleton1 and val != _singleton2:
166
if self._count != old_count:
167
raise RuntimeError('After resizing array from %d => %d'
168
' the count of items changed %d => %d'
169
% (old_size, new_size, old_count, self._count))
172
cdef int _add(self, int_type c_val) except -1:
174
if c_val == _singleton1:
175
if self._has_singleton & 0x01:
177
self._has_singleton = self._has_singleton | 0x01
178
self._count = self._count + 1
180
elif c_val == _singleton2:
181
if self._has_singleton & 0x02:
182
# Already had it, no-op
184
self._has_singleton = self._has_singleton | 0x02
185
self._count = self._count + 1
187
if self._array == NULL or self._count * 4 > self._mask:
189
entry = self._lookup(c_val)
190
if entry[0] == c_val:
191
# We already had it, no-op
193
if entry[0] == _singleton1 or entry[0] == _singleton2:
194
# No value stored at this location
196
self._count = self._count + 1
198
raise RuntimeError("Calling self._lookup(%x) returned %x. However"
199
" that is not the value or one of the singletons."
206
cdef class IDSet(IntSet):
207
"""Track a set of object ids (addresses).
209
This only differs from IntSet in how the integers are hashed. Object
210
addresses tend to be aligned on 16-byte boundaries (occasionally 8-byte,
211
and even more rarely on 4-byte), as such the standard hash lookup has more
212
collisions than desired.
215
cdef int_type *_lookup(self, int_type c_val) except NULL:
216
"""Taken from the set() algorithm."""
217
cdef size_t offset, perturb
218
cdef int_type *entry, *freeslot
219
cdef int_type internal_val
221
if self._array == NULL:
222
raise RuntimeError('cannot _lookup without _array allocated.')
223
# For addresses, we shift the last 4 bits into the beginning of the
225
internal_val = ((c_val & 0xf) << (sizeof(int_type)*8 - 4))
226
internal_val = internal_val | (c_val >> 4)
227
offset = internal_val & self._mask
228
entry = self._array + offset
229
if entry[0] == c_val or entry[0] == _singleton1:
231
if entry[0] == _singleton2:
238
offset = (offset << 2) + offset + perturb + 1
239
entry = self._array + (offset & self._mask)
240
if (entry[0] == _singleton1):
241
# We converged on an empty slot, without finding entry == c_val
242
# If we previously found a freeslot, return it, else return
248
elif (entry[0] == c_val):
251
elif (entry[0] == _singleton2 and freeslot == NULL):
252
# We found the first match that was a 'dummy' entry
254
perturb = perturb >> 5 # PERTURB_SHIFT