3
* Copyright (c) 2005, European Commission project OneLab under contract 034819 (http://www.one-lab.org)
5
* Redistribution and use in source and binary forms, with or
6
* without modification, are permitted provided that the following
8
* - Redistributions of source code must retain the above copyright
9
* notice, this list of conditions and the following disclaimer.
10
* - Redistributions in binary form must reproduce the above copyright
11
* notice, this list of conditions and the following disclaimer in
12
* the documentation and/or other materials provided with the distribution.
13
* - Neither the name of the University Catholique de Louvain - UCL
14
* nor the names of its contributors may be used to endorse or
15
* promote products derived from this software without specific prior
18
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
21
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
22
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
23
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
24
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
28
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29
* POSSIBILITY OF SUCH DAMAGE.
32
* Licensed to the Apache Software Foundation (ASF) under one
33
* or more contributor license agreements. See the NOTICE file
34
* distributed with this work for additional information
35
* regarding copyright ownership. The ASF licenses this file
36
* to you under the Apache License, Version 2.0 (the
37
* "License"); you may not use this file except in compliance
38
* with the License. You may obtain a copy of the License at
40
* http://www.apache.org/licenses/LICENSE-2.0
42
* Unless required by applicable law or agreed to in writing, software
43
* distributed under the License is distributed on an "AS IS" BASIS,
44
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
45
* See the License for the specific language governing permissions and
46
* limitations under the License.
48
package org.apache.hadoop.hbase.migration.nineteen.onelab.filter;
50
import java.io.DataInput;
51
import java.io.DataOutput;
52
import java.io.IOException;
54
import java.util.BitSet;
56
import org.apache.hadoop.hbase.util.Hash;
59
* Implements a <i>Bloom filter</i>, as defined by Bloom in 1970.
61
* The Bloom filter is a data structure that was introduced in 1970 and that has been adopted by
62
* the networking research community in the past decade thanks to the bandwidth efficiencies that it
63
* offers for the transmission of set membership information between networked hosts. A sender encodes
64
* the information into a bit vector, the Bloom filter, that is more compact than a conventional
65
* representation. Computation and space costs for construction are linear in the number of elements.
66
* The receiver uses the filter to test whether various elements are members of the set. Though the
67
* filter will occasionally return a false positive, it will never return a false negative. When creating
68
* the filter, the sender can choose its desired point in a trade-off between the false positive rate and the size.
70
* contract <a href="http://www.one-lab.org">European Commission One-Lab Project 034819</a>.
72
* @version 1.0 - 2 Feb. 07
74
public class BloomFilter extends Filter {
75
private static final byte[] bitvalues = new byte[] {
86
/** The bit vector. */
89
/** Default constructor - use with readFields */
90
public BloomFilter() {
96
* @param vectorSize The vector size of <i>this</i> filter.
97
* @param nbHash The number of hash function to consider.
98
* @param hashType type of the hashing function (see {@link Hash}).
100
public BloomFilter(int vectorSize, int nbHash, int hashType){
101
super(vectorSize, nbHash, hashType);
103
bits = new BitSet(this.vectorSize);
107
public void add(Key key) {
109
throw new NullPointerException("key cannot be null");
112
int[] h = hash.hash(key);
115
for(int i = 0; i < nbHash; i++) {
121
public void and(Filter filter){
123
|| !(filter instanceof BloomFilter)
124
|| filter.vectorSize != this.vectorSize
125
|| filter.nbHash != this.nbHash) {
126
throw new IllegalArgumentException("filters cannot be and-ed");
129
this.bits.and(((BloomFilter) filter).bits);
133
public boolean membershipTest(Key key){
135
throw new NullPointerException("key cannot be null");
138
int[] h = hash.hash(key);
140
for(int i = 0; i < nbHash; i++) {
141
if(!bits.get(h[i])) {
146
}//end memberhsipTest()
150
bits.flip(0, vectorSize - 1);
154
public void or(Filter filter){
156
|| !(filter instanceof BloomFilter)
157
|| filter.vectorSize != this.vectorSize
158
|| filter.nbHash != this.nbHash) {
159
throw new IllegalArgumentException("filters cannot be or-ed");
161
bits.or(((BloomFilter) filter).bits);
165
public void xor(Filter filter){
167
|| !(filter instanceof BloomFilter)
168
|| filter.vectorSize != this.vectorSize
169
|| filter.nbHash != this.nbHash) {
170
throw new IllegalArgumentException("filters cannot be xor-ed");
172
bits.xor(((BloomFilter) filter).bits);
176
public String toString(){
177
return bits.toString();
181
public Object clone(){
182
BloomFilter bf = new BloomFilter(vectorSize, nbHash, hashType);
188
* @return size of the the bloomfilter
190
public int getVectorSize() {
191
return this.vectorSize;
197
public void write(DataOutput out) throws IOException {
199
byte[] bytes = new byte[getNBytes()];
200
for(int i = 0, byteIndex = 0, bitIndex = 0; i < vectorSize; i++, bitIndex++) {
206
bytes[byteIndex] = 0;
209
bytes[byteIndex] |= bitvalues[bitIndex];
216
public void readFields(DataInput in) throws IOException {
217
super.readFields(in);
218
bits = new BitSet(this.vectorSize);
219
byte[] bytes = new byte[getNBytes()];
221
for(int i = 0, byteIndex = 0, bitIndex = 0; i < vectorSize; i++, bitIndex++) {
226
if ((bytes[byteIndex] & bitvalues[bitIndex]) != 0) {
232
/* @return number of bytes needed to hold bit vector */
233
private int getNBytes() {
234
return (vectorSize + 7) / 8;