布隆過濾器含義
- 先準備一個長度爲n的二進制向量
- 若干個哈希函數,計算出來的若干個索引在0-n-1
- 某數據通過這組哈希函數計算出來若干個索引,在該向量中若都爲1則表明該數據大概率存在,否則一定不存在
- 有誤判率
- 設計難度在於:隨機映射函數的設計 和 二進制向量的長度設爲多少?
引用代碼——guava實現布隆過濾器
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<version>19.0</version>
</dependency>
private static int size = 1000000;
private static double fpp = 0.01;
private static BloomFilter<Integer> bloomFilter = BloomFilter.create(Funnels.integerFunnel(), size, fpp);
public static void main(String[] args) {
for (int i = 0; i < 1000000; i++) {
bloomFilter.put(i);
}
int count = 0;
for (int i = 1000000; i < 2000000; i++) {
if (bloomFilter.mightContain(i)) {
count++;
System.out.println(i + "誤判了");
}
}
System.out.println("總共的誤判數:" + count);
}
引用代碼——redis實現布隆過濾器
public class RedisMain {
static final int expectedInsertions = 100;
static final double fpp = 0.01;
private static long numBits;
private static int numHashFunctions;
static {
numBits = optimalNumOfBits(expectedInsertions, fpp);
numHashFunctions = optimalNumOfHashFunctions(expectedInsertions, numBits);
}
public static void main(String[] args) {
Jedis jedis = new Jedis("192.168.0.109", 6379);
for (int i = 0; i < 100; i++) {
long[] indexs = getIndexs(String.valueOf(i));
for (long index : indexs) {
jedis.setbit("codebear:bloom", index, true);
}
}
for (int i = 0; i < 100; i++) {
long[] indexs = getIndexs(String.valueOf(i));
for (long index : indexs) {
Boolean isContain = jedis.getbit("codebear:bloom", index);
if (!isContain) {
System.out.println(i + "肯定沒有重複");
}
}
System.out.println(i + "可能重複");
}
}
private static long[] getIndexs(String key) {
long hash1 = hash(key);
long hash2 = hash1 >>> 16;
long[] result = new long[numHashFunctions];
for (int i = 0; i < numHashFunctions; i++) {
long combinedHash = hash1 + i * hash2;
if (combinedHash < 0) {
combinedHash = ~combinedHash;
}
result[i] = combinedHash % numBits;
}
return result;
}
private static long hash(String key) {
Charset charset = Charset.forName("UTF-8");
return Hashing.murmur3_128().hashObject(key, Funnels.stringFunnel(charset)).asLong();
}
private static int optimalNumOfHashFunctions(long n, long m) {
return Math.max(1, (int) Math.round((double) m / n * Math.log(2)));
}
private static long optimalNumOfBits(long n, double p) {
if (p == 0) {
p = Double.MIN_VALUE;
}
return (long) (-n * Math.log(p) / (Math.log(2) * Math.log(2)));
}
}