1
{-# OPTIONS -cpp -fglasgow-exts #-}
2
--------------------------------------------------------------------------------
4
Copyright : (c) Daan Leijen 2002
7
Maintainer : daan@cs.uu.nl
8
Stability : provisional
11
An efficient implementation of integer sets.
13
1) The 'filter' function clashes with the "Prelude".
14
If you want to use "IntSet" unqualified, this function should be hidden.
16
> import Prelude hiding (filter)
19
Another solution is to use qualified names.
21
> import qualified IntSet
23
> ... IntSet.fromList [1..5]
25
Or, if you prefer a terse coding style:
27
> import qualified IntSet as S
29
> ... S.fromList [1..5]
31
2) The implementation is based on /big-endian patricia trees/. This data structure
32
performs especially well on binary operations like 'union' and 'intersection'. However,
33
my benchmarks show that it is also (much) faster on insertions and deletions when
34
compared to a generic size-balanced set implementation (see "Set").
36
* Chris Okasaki and Andy Gill, \"/Fast Mergeable Integer Maps/\",
37
Workshop on ML, September 1998, pages 77--86, <http://www.cse.ogi.edu/~andy/pub/finite.htm>
39
* D.R. Morrison, \"/PATRICIA -- Practical Algorithm To Retrieve Information
40
Coded In Alphanumeric/\", Journal of the ACM, 15(4), October 1968, pages 514--534.
42
3) Many operations have a worst-case complexity of /O(min(n,W))/. This means that the
43
operation can become linear in the number of elements
44
with a maximum of /W/ -- the number of bits in an 'Int' (32 or 64).
46
---------------------------------------------------------------------------------}
47
module UU.DData.IntSet (
49
IntSet -- instance Eq,Show
98
import Prelude hiding (lookup,filter)
104
import List (nub,sort)
105
import qualified List
109
#ifdef __GLASGOW_HASKELL__
110
{--------------------------------------------------------------------
111
GHC: use unboxing to get @shiftRL@ inlined.
112
--------------------------------------------------------------------}
113
#if __GLASGOW_HASKELL__ >= 503
115
import GHC.Exts ( Word(..), Int(..), shiftRL# )
118
import GlaExts ( Word(..), Int(..), shiftRL# )
124
natFromInt :: Int -> Nat
125
natFromInt i = fromIntegral i
127
intFromNat :: Nat -> Int
128
intFromNat w = fromIntegral w
130
shiftRL :: Nat -> Int -> Nat
131
shiftRL (W# x) (I# i)
135
{--------------------------------------------------------------------
137
* raises errors on boundary values when using 'fromIntegral'
138
but not with the deprecated 'fromInt/toInt'.
139
* Older Hugs doesn't define 'Word'.
140
* Newer Hugs defines 'Word' in the Prelude but no operations.
141
--------------------------------------------------------------------}
144
type Nat = Word32 -- illegal on 64-bit platforms!
146
natFromInt :: Int -> Nat
147
natFromInt i = fromInt i
149
intFromNat :: Nat -> Int
150
intFromNat w = toInt w
152
shiftRL :: Nat -> Int -> Nat
153
shiftRL x i = shiftR x i
156
{--------------------------------------------------------------------
158
* A "Nat" is a natural machine word (an unsigned Int)
159
--------------------------------------------------------------------}
164
natFromInt :: Int -> Nat
165
natFromInt i = fromIntegral i
167
intFromNat :: Nat -> Int
168
intFromNat w = fromIntegral w
170
shiftRL :: Nat -> Int -> Nat
171
shiftRL w i = shiftR w i
177
{--------------------------------------------------------------------
179
--------------------------------------------------------------------}
180
(\\) :: IntSet -> IntSet -> IntSet
181
m1 \\ m2 = difference m1 m2
183
{--------------------------------------------------------------------
185
--------------------------------------------------------------------}
188
| Bin !Prefix !Mask !IntSet !IntSet
193
{--------------------------------------------------------------------
195
--------------------------------------------------------------------}
196
isEmpty :: IntSet -> Bool
198
isEmpty other = False
200
size :: IntSet -> Int
203
Bin p m l r -> size l + size r
207
member :: Int -> IntSet -> Bool
211
| nomatch x p m -> False
212
| zero x m -> member x l
213
| otherwise -> member x r
217
lookup :: Int -> IntSet -> Maybe Int
221
| nomatch x p m -> Nothing
222
| zero x m -> lookup x l
223
| otherwise -> lookup x r
226
| otherwise -> Nothing
229
{--------------------------------------------------------------------
231
--------------------------------------------------------------------}
236
single :: Int -> IntSet
240
{--------------------------------------------------------------------
242
--------------------------------------------------------------------}
243
insert :: Int -> IntSet -> IntSet
247
| nomatch x p m -> join x (Tip x) p t
248
| zero x m -> Bin p m (insert x l) r
249
| otherwise -> Bin p m l (insert x r)
252
| otherwise -> join x (Tip x) y t
255
insertR :: Int -> IntSet -> IntSet
259
| nomatch x p m -> join x (Tip x) p t
260
| zero x m -> Bin p m (insert x l) r
261
| otherwise -> Bin p m l (insert x r)
264
| otherwise -> join x (Tip x) y t
267
delete :: Int -> IntSet -> IntSet
272
| zero x m -> bin p m (delete x l) r
273
| otherwise -> bin p m l (delete x r)
280
{--------------------------------------------------------------------
282
--------------------------------------------------------------------}
283
unions :: [IntSet] -> IntSet
285
= foldlStrict union empty xs
288
union :: IntSet -> IntSet -> IntSet
289
union t1@(Bin p1 m1 l1 r1) t2@(Bin p2 m2 l2 r2)
290
| shorter m1 m2 = union1
291
| shorter m2 m1 = union2
292
| p1 == p2 = Bin p1 m1 (union l1 l2) (union r1 r2)
293
| otherwise = join p1 t1 p2 t2
295
union1 | nomatch p2 p1 m1 = join p1 t1 p2 t2
296
| zero p2 m1 = Bin p1 m1 (union l1 t2) r1
297
| otherwise = Bin p1 m1 l1 (union r1 t2)
299
union2 | nomatch p1 p2 m2 = join p1 t1 p2 t2
300
| zero p1 m2 = Bin p2 m2 (union t1 l2) r2
301
| otherwise = Bin p2 m2 l2 (union t1 r2)
303
union (Tip x) t = insert x t
304
union t (Tip x) = insertR x t -- right bias
309
{--------------------------------------------------------------------
311
--------------------------------------------------------------------}
312
difference :: IntSet -> IntSet -> IntSet
313
difference t1@(Bin p1 m1 l1 r1) t2@(Bin p2 m2 l2 r2)
314
| shorter m1 m2 = difference1
315
| shorter m2 m1 = difference2
316
| p1 == p2 = bin p1 m1 (difference l1 l2) (difference r1 r2)
319
difference1 | nomatch p2 p1 m1 = t1
320
| zero p2 m1 = bin p1 m1 (difference l1 t2) r1
321
| otherwise = bin p1 m1 l1 (difference r1 t2)
323
difference2 | nomatch p1 p2 m2 = t1
324
| zero p1 m2 = difference t1 l2
325
| otherwise = difference t1 r2
327
difference t1@(Tip x) t2
331
difference Nil t = Nil
332
difference t (Tip x) = delete x t
337
{--------------------------------------------------------------------
339
--------------------------------------------------------------------}
340
intersection :: IntSet -> IntSet -> IntSet
341
intersection t1@(Bin p1 m1 l1 r1) t2@(Bin p2 m2 l2 r2)
342
| shorter m1 m2 = intersection1
343
| shorter m2 m1 = intersection2
344
| p1 == p2 = bin p1 m1 (intersection l1 l2) (intersection r1 r2)
347
intersection1 | nomatch p2 p1 m1 = Nil
348
| zero p2 m1 = intersection l1 t2
349
| otherwise = intersection r1 t2
351
intersection2 | nomatch p1 p2 m2 = Nil
352
| zero p1 m2 = intersection t1 l2
353
| otherwise = intersection t1 r2
355
intersection t1@(Tip x) t2
358
intersection t (Tip x)
362
intersection Nil t = Nil
363
intersection t Nil = Nil
367
{--------------------------------------------------------------------
369
--------------------------------------------------------------------}
370
properSubset :: IntSet -> IntSet -> Bool
372
= case subsetCmp t1 t2 of
376
subsetCmp t1@(Bin p1 m1 l1 r1) t2@(Bin p2 m2 l2 r2)
378
| shorter m2 m1 = subsetCmpLt
379
| p1 == p2 = subsetCmpEq
380
| otherwise = GT -- disjoint
382
subsetCmpLt | nomatch p1 p2 m2 = GT
383
| zero p1 m2 = subsetCmp t1 l2
384
| otherwise = subsetCmp t1 r2
385
subsetCmpEq = case (subsetCmp l1 l2, subsetCmp r1 r2) of
391
subsetCmp (Bin p m l r) t = GT
392
subsetCmp (Tip x) (Tip y)
394
| otherwise = GT -- disjoint
397
| otherwise = GT -- disjoint
398
subsetCmp Nil Nil = EQ
401
subset :: IntSet -> IntSet -> Bool
402
subset t1@(Bin p1 m1 l1 r1) t2@(Bin p2 m2 l2 r2)
403
| shorter m1 m2 = False
404
| shorter m2 m1 = match p1 p2 m2 && (if zero p1 m2 then subset t1 l2
406
| otherwise = (p1==p2) && subset l1 l2 && subset r1 r2
407
subset (Bin p m l r) t = False
408
subset (Tip x) t = member x t
412
{--------------------------------------------------------------------
414
--------------------------------------------------------------------}
415
filter :: (Int -> Bool) -> IntSet -> IntSet
419
-> bin p m (filter pred l) (filter pred r)
425
partition :: (Int -> Bool) -> IntSet -> (IntSet,IntSet)
429
-> let (l1,l2) = partition pred l
430
(r1,r2) = partition pred r
431
in (bin p m l1 r1, bin p m l2 r2)
434
| otherwise -> (Nil,t)
438
split :: Int -> IntSet -> (IntSet,IntSet)
442
| zero x m -> let (lt,gt) = split x l in (lt,union gt r)
443
| otherwise -> let (lt,gt) = split x r in (union l lt,gt)
447
| otherwise -> (Nil,Nil)
450
splitMember :: Int -> IntSet -> (Bool,IntSet,IntSet)
454
| zero x m -> let (found,lt,gt) = splitMember x l in (found,lt,union gt r)
455
| otherwise -> let (found,lt,gt) = splitMember x r in (found,union l lt,gt)
457
| x>y -> (False,t,Nil)
458
| x<y -> (False,Nil,t)
459
| otherwise -> (True,Nil,Nil)
460
Nil -> (False,Nil,Nil)
463
{--------------------------------------------------------------------
465
--------------------------------------------------------------------}
467
fold :: (Int -> b -> b) -> b -> IntSet -> b
471
foldR :: (Int -> b -> b) -> b -> IntSet -> b
474
Bin p m l r -> foldR f (foldR f z r) l
478
{--------------------------------------------------------------------
480
--------------------------------------------------------------------}
481
elems :: IntSet -> [Int]
485
{--------------------------------------------------------------------
487
--------------------------------------------------------------------}
488
toList :: IntSet -> [Int]
492
toAscList :: IntSet -> [Int]
494
= -- NOTE: the following algorithm only works for big-endian trees
495
let (pos,neg) = span (>=0) (foldR (:) [] t) in neg ++ pos
497
fromList :: [Int] -> IntSet
499
= foldlStrict ins empty xs
503
fromAscList :: [Int] -> IntSet
507
fromDistinctAscList :: [Int] -> IntSet
508
fromDistinctAscList xs
512
{--------------------------------------------------------------------
514
--------------------------------------------------------------------}
515
instance Eq IntSet where
516
t1 == t2 = equal t1 t2
517
t1 /= t2 = nequal t1 t2
519
equal :: IntSet -> IntSet -> Bool
520
equal (Bin p1 m1 l1 r1) (Bin p2 m2 l2 r2)
521
= (m1 == m2) && (p1 == p2) && (equal l1 l2) && (equal r1 r2)
522
equal (Tip x) (Tip y)
527
nequal :: IntSet -> IntSet -> Bool
528
nequal (Bin p1 m1 l1 r1) (Bin p2 m2 l2 r2)
529
= (m1 /= m2) || (p1 /= p2) || (nequal l1 l2) || (nequal r1 r2)
530
nequal (Tip x) (Tip y)
532
nequal Nil Nil = False
535
{--------------------------------------------------------------------
537
--------------------------------------------------------------------}
538
instance Show IntSet where
539
showsPrec d s = showSet (toList s)
541
showSet :: [Int] -> ShowS
545
= showChar '{' . shows x . showTail xs
547
showTail [] = showChar '}'
548
showTail (x:xs) = showChar ',' . shows x . showTail xs
550
{--------------------------------------------------------------------
552
--------------------------------------------------------------------}
553
showTree :: IntSet -> String
555
= showTreeWith True False s
558
{- | /O(n)/. The expression (@showTreeWith hang wide map@) shows
559
the tree that implements the set. If @hang@ is
560
@True@, a /hanging/ tree is shown otherwise a rotated tree is shown. If
561
@wide@ is true, an extra wide version is shown.
563
showTreeWith :: Bool -> Bool -> IntSet -> String
564
showTreeWith hang wide t
565
| hang = (showsTreeHang wide [] t) ""
566
| otherwise = (showsTree wide [] [] t) ""
568
showsTree :: Bool -> [String] -> [String] -> IntSet -> ShowS
569
showsTree wide lbars rbars t
572
-> showsTree wide (withBar rbars) (withEmpty rbars) r .
573
showWide wide rbars .
574
showsBars lbars . showString (showBin p m) . showString "\n" .
575
showWide wide lbars .
576
showsTree wide (withEmpty lbars) (withBar lbars) l
578
-> showsBars lbars . showString " " . shows x . showString "\n"
579
Nil -> showsBars lbars . showString "|\n"
581
showsTreeHang :: Bool -> [String] -> IntSet -> ShowS
582
showsTreeHang wide bars t
585
-> showsBars bars . showString (showBin p m) . showString "\n" .
587
showsTreeHang wide (withBar bars) l .
589
showsTreeHang wide (withEmpty bars) r
591
-> showsBars bars . showString " " . shows x . showString "\n"
592
Nil -> showsBars bars . showString "|\n"
595
= "*" -- ++ show (p,m)
598
| wide = showString (concat (reverse bars)) . showString "|\n"
601
showsBars :: [String] -> ShowS
605
_ -> showString (concat (reverse (tail bars))) . showString node
608
withBar bars = "| ":bars
609
withEmpty bars = " ":bars
612
{--------------------------------------------------------------------
614
--------------------------------------------------------------------}
615
{--------------------------------------------------------------------
617
--------------------------------------------------------------------}
618
join :: Prefix -> IntSet -> Prefix -> IntSet -> IntSet
620
| zero p1 m = Bin p m t1 t2
621
| otherwise = Bin p m t2 t1
626
{--------------------------------------------------------------------
627
@bin@ assures that we never have empty trees within a tree.
628
--------------------------------------------------------------------}
629
bin :: Prefix -> Mask -> IntSet -> IntSet -> IntSet
632
bin p m l r = Bin p m l r
635
{--------------------------------------------------------------------
636
Endian independent bit twiddling
637
--------------------------------------------------------------------}
638
zero :: Int -> Mask -> Bool
640
= (natFromInt i) .&. (natFromInt m) == 0
642
nomatch,match :: Int -> Prefix -> Mask -> Bool
649
mask :: Int -> Mask -> Prefix
651
= maskW (natFromInt i) (natFromInt m)
654
{--------------------------------------------------------------------
655
Big endian operations
656
--------------------------------------------------------------------}
657
maskW :: Nat -> Nat -> Prefix
659
= intFromNat (i .&. (complement (m-1) `xor` m))
661
shorter :: Mask -> Mask -> Bool
663
= (natFromInt m1) > (natFromInt m2)
665
branchMask :: Prefix -> Prefix -> Mask
667
= intFromNat (highestBitMask (natFromInt p1 `xor` natFromInt p2))
669
{----------------------------------------------------------------------
670
Finding the highest bit (mask) in a word [x] can be done efficiently in
672
* convert to a floating point value and the mantissa tells us the
673
[log2(x)] that corresponds with the highest bit position. The mantissa
674
is retrieved either via the standard C function [frexp] or by some bit
675
twiddling on IEEE compatible numbers (float). Note that one needs to
676
use at least [double] precision for an accurate mantissa of 32 bit
678
* use bit twiddling, a logarithmic sequence of bitwise or's and shifts (bit).
679
* use processor specific assembler instruction (asm).
681
The most portable way would be [bit], but is it efficient enough?
682
I have measured the cycle counts of the different methods on an AMD
683
Athlon-XP 1800 (~ Pentium III 1.8Ghz) using the RDTSC instruction:
685
highestBitMask: method cycles
692
highestBit: method cycles
699
Wow, the bit twiddling is on today's RISC like machines even faster
700
than a single CISC instruction (BSR)!
701
----------------------------------------------------------------------}
703
{----------------------------------------------------------------------
704
[highestBitMask] returns a word where only the highest bit is set.
705
It is found by first setting all bits in lower positions than the
706
highest bit and than taking an exclusive or with the original value.
707
Allthough the function may look expensive, GHC compiles this into
708
excellent C code that subsequently compiled into highly efficient
709
machine code. The algorithm is derived from Jorg Arndt's FXT library.
710
----------------------------------------------------------------------}
711
highestBitMask :: Nat -> Nat
713
= case (x .|. shiftRL x 1) of
714
x -> case (x .|. shiftRL x 2) of
715
x -> case (x .|. shiftRL x 4) of
716
x -> case (x .|. shiftRL x 8) of
717
x -> case (x .|. shiftRL x 16) of
718
x -> case (x .|. shiftRL x 32) of -- for 64 bit platforms
719
x -> (x `xor` (shiftRL x 1))
722
{--------------------------------------------------------------------
724
--------------------------------------------------------------------}
728
(x:xx) -> let z' = f z x in seq z' (foldlStrict f z' xx)
732
{--------------------------------------------------------------------
734
--------------------------------------------------------------------}
735
testTree :: [Int] -> IntSet
736
testTree xs = fromList xs
737
test1 = testTree [1..20]
738
test2 = testTree [30,29..10]
739
test3 = testTree [1,4,6,89,2323,53,43,234,5,79,12,9,24,9,8,423,8,42,4,8,9,3]
741
{--------------------------------------------------------------------
743
--------------------------------------------------------------------}
748
{ configMaxTest = 500
749
, configMaxFail = 5000
750
, configSize = \n -> (div n 2 + 3)
751
, configEvery = \n args -> let s = show n in s ++ [ '\b' | _ <- s ]
755
{--------------------------------------------------------------------
756
Arbitrary, reasonably balanced trees
757
--------------------------------------------------------------------}
758
instance Arbitrary IntSet where
759
arbitrary = do{ xs <- arbitrary
760
; return (fromList xs)
764
{--------------------------------------------------------------------
765
Single, Insert, Delete
766
--------------------------------------------------------------------}
767
prop_Single :: Int -> Bool
769
= (insert x empty == single x)
771
prop_InsertDelete :: Int -> IntSet -> Property
772
prop_InsertDelete k t
773
= not (member k t) ==> delete k (insert k t) == t
776
{--------------------------------------------------------------------
778
--------------------------------------------------------------------}
779
prop_UnionInsert :: Int -> IntSet -> Bool
781
= union t (single x) == insert x t
783
prop_UnionAssoc :: IntSet -> IntSet -> IntSet -> Bool
784
prop_UnionAssoc t1 t2 t3
785
= union t1 (union t2 t3) == union (union t1 t2) t3
787
prop_UnionComm :: IntSet -> IntSet -> Bool
789
= (union t1 t2 == union t2 t1)
791
prop_Diff :: [Int] -> [Int] -> Bool
793
= toAscList (difference (fromList xs) (fromList ys))
794
== List.sort ((List.\\) (nub xs) (nub ys))
796
prop_Int :: [Int] -> [Int] -> Bool
798
= toAscList (intersection (fromList xs) (fromList ys))
799
== List.sort (nub ((List.intersect) (xs) (ys)))
801
{--------------------------------------------------------------------
803
--------------------------------------------------------------------}
805
= forAll (choose (5,100)) $ \n ->
807
in fromAscList xs == fromList xs
809
prop_List :: [Int] -> Bool
811
= (sort (nub xs) == toAscList (fromList xs))