需求
自己实现一个简单的布谷鸟散列(Cuckoo hashtable)。
分析
布谷鸟哈希最早于2001 年由Rasmus Pagh 和Flemming Friche Rodler 提出 。该哈希方法是为了解决哈希冲突的问题而提出,利用较少计算换取了较大空间。名称源于该哈希方法行为类似于布谷鸟在别的鸟巢中下蛋,并将别的鸟蛋挤出的行为。它具有占用空间小、查询迅速等特性,可用于Bloom filter 和内存管理 。
算法描述
算法使用多个不同哈希函数计算对应key 的位置。
- 当多个哈希任意位置为空,则选择一个位置插入
- 当多个哈希有位置为空时,则插入到空位置
- 当多个哈希位置均不为空时,随机选择两者之一的位置上key 踢出,计算踢出的key 另一个哈希值对应的位置进行插入,转至2执行(即当再次插入位置为空时插入,仍旧不为空时,再踢出这个key)
接口
interface HashTable<E> {
int size();
boolean isEmpty();
void clear();
boolean contains(E e);
void add(E e);
boolean remove(E e);
}
interface HashFamily<E> {
int getNumOfFunctions();
void generateNewFunctions();
int hash(E e, int which);
}
实现类
class CuckooHashTable<E> implements HashTable<E> {
private static final float LOAD_FACTOR = 0.4f;
private static final int ALLOWED_REHASHES = 1;
private static final int DEFAULT_TABLE_SIZE = 101;
private final HashFamily<? super E> hashFunctions;
private final int numHashFunctions;
private Object[] array;
private int currentSize;
public CuckooHashTable(HashFamily<? super E> hashFunctions) {
this(hashFunctions, DEFAULT_TABLE_SIZE);
}
public CuckooHashTable(HashFamily<? super E> hashFunctions, int size) {
allocateArray(nextPrime(size));
currentSize = 0;
this.hashFunctions = hashFunctions;
numHashFunctions = hashFunctions.getNumOfFunctions();
}
private void allocateArray(int arraySize) {
array = new Object[arraySize];
}
private void doClear() {
currentSize = 0;
for (int i = 0; i < array.length; i++)
array[i] = null;
}
@Override
public int size() {
return currentSize;
}
@Override
public boolean isEmpty() {
return currentSize == 0;
}
@Override
public void clear() {
doClear();
}
@Override
public boolean contains(E e) {
return findPos(e) != -1;
}
@Override
public void add(E e) {
if (contains(e))
return;
if (currentSize >= array.length * LOAD_FACTOR)
expand();
addHelper(e);
}
private int rehashes = 0;
private Random r = new Random();
private void addHelper(E e) {
final int COUNT_LIMIT = 100;
while (true) {
int lastPos = -1;
int pos;
for (int count = 0; count < COUNT_LIMIT; count++) {
for (int i = 0; i < numHashFunctions; i++) {
pos = hash(e, i);
if (array[pos] == null) {
array[pos] = e;
currentSize++;
return;
}
}
int i = 0;
do {
pos = hash(e, r.nextInt(numHashFunctions));
}
while (pos == lastPos && i++ < 5);
E tmp = array(lastPos = pos);
array[pos] = e;
e = tmp;
}
if (++rehashes > ALLOWED_REHASHES) {
expand();
rehashes = 0;
}
else
rehash();
}
}
private void expand() {
rehash((int) (array.length / LOAD_FACTOR));
}
private void rehash() {
hashFunctions.generateNewFunctions();
rehash(array.length);
}
private void rehash(int newLength) {
@SuppressWarnings("unchecked")
E[] oldArray = (E[]) array;
allocateArray(nextPrime(newLength));
currentSize = 0;
for (E e : oldArray) {
if (e != null)
add(e);
}
}
@SuppressWarnings("unchecked")
private E array(int index) {
return (E) array[index];
}
@Override
public boolean remove(E e) {
int pos = findPos(e);
if (pos != -1) {
array[pos] = null;
currentSize--;
}
return pos != -1;
}
private int findPos(E e) {
for (int i = 0; i < numHashFunctions; i++) {
int pos = hash(e, i);
if (array[pos] != null && array[pos].equals(e))
return pos;
}
return -1;
}
private int hash(E e, int which) {
int hashVal = hashFunctions.hash(e, which);
hashVal %= array.length;
if (hashVal < 0)
hashVal += array.length;
return hashVal;
}
@Override
public String toString() {
StringJoiner joiner = new StringJoiner();
for (int i = 0; i < array.length; i++) {
if (array[i] != null)
joiner.add(array[i]);
}
return joiner.toString();
}
private int nextPrime(int n) {
if (n % 2 == 0)
n++;
while (!isPrime(n))
n += 2;
return n;
}
private boolean isPrime(int n) {
if (n == 2 || n == 3)
return true;
if (n == 1 || n % 2 == 0)
return false;
for (int i = 3; i * i <= n; i += 2)
if (n % i == 0)
return false;
return true;
}
}
class StringHashFamily implements HashFamily<String> {
private final int[] MUTLIPLIERS;
private final Random r = new Random();
public StringHashFamily(int size) {
MUTLIPLIERS = new int[size];
generateNewFunctions();
}
@Override
public int getNumOfFunctions() {
return MUTLIPLIERS.length;
}
@Override
public void generateNewFunctions() {
for (int i = 0; i < MUTLIPLIERS.length; i++)
MUTLIPLIERS[i] = r.nextInt();
}
@Override
public int hash(String e, int which) {
final int mutliplier = MUTLIPLIERS[which];
int hashVal = 0;
for (int i = 0; i < e.length(); i++)
hashVal = mutliplier * hashVal + e.charAt(i);
return hashVal;
}
}