public class Test {
public static void main(String[] args){
BloomFilter b = new BloomFilter();
b.addValue("www.baidu.com");
b.addValue("www.sohu.com");
System.out.println(b.contains("www.baidu.com"));
System.out.println(b.contains("www.sina.com"));
}
}
class BloomFilter{
private static final int BIT_SIZE = 2 << 28 ;//二進制向量的位數,相當於能存儲1000萬條url左右,誤報率爲千萬分之一
private static final int[] seeds = new int[]{3, 5, 7, 11, 13, 31, 37, 61};//用於生成信息指紋的8個隨機數,最好選取質數
private BitSet bits = new BitSet(BIT_SIZE);
private Hash[] func = new Hash[seeds.length];//用於存儲8個隨機哈希值對象
public BloomFilter(){
for(int i = 0; i < seeds.length; i++){
func[i] = new Hash(BIT_SIZE, seeds[i]);
}
}
/**
* 像過濾器中添加字符串
*/
public void addValue(String value)
{
//將字符串value哈希爲8個或多個整數,然後在這些整數的bit上變爲1
if(value != null){
for(Hash f : func)
bits.set(f.hash(value), true);
}
}
/**
* 判斷字符串是否包含在布隆過濾器中
*/
public boolean contains(String value)
{
if(value == null)
return false;
boolean ret = true;
//將要比較的字符串重新以上述方法計算hash值,再與布隆過濾器比對
for(Hash f : func)
ret = ret && bits.get(f.hash(value));
return ret;
}
/**
* 隨機哈希值對象
*/
public static class Hash{
private int size;//二進制向量數組大小
private int seed;//隨機數種子
public Hash(int cap, int seed){
this.size = cap;
this.seed = seed;
}
/**
* 計算哈希值(也可以選用別的恰當的哈希函數)
*/
public int hash(String value){
int result = 0;
int len = value.length();
for(int i = 0; i < len; i++){
result = seed * result + value.charAt(i);
}
return (size - 1) & result;
}
}
}