散列表(哈希表)是普通數組概念的推廣。在散列表中,不是直接把關鍵字作爲數組的下標,而是根據關鍵字計算出相應的下標。
一個散列表中重要的是他的散列函數以及解決衝突的方法。
這裏試着自己模仿網上STL中hash表的實現寫了一個,記在這裏以便以後查看。
這裏散列函數選擇的是除法散列法,解決衝突的方法選用的是鏈接法,表基於vector實現。
代碼如下:
//hashtable.h
#ifndef HASHTABLE_H
#define HASHTABLE_H
#include<iostream>
#include<algorithm>
#include<vector>
#include<string.h>
#include<memory>
template<class ValueType>
struct __hashtable_node
{
__hashtable_node *next;
ValueType val;
};
static const int __stl_num_primes=28;
static const unsigned long __stl_prime_list[__stl_num_primes]=
{
53,97,193,389,769,
1543,3079,6151,12289,24593,
49157,98317,196613,393241,786433,
1572869,3145739,6291469,12582917,25165843,
50331653,100663319,201326611,402653189,805306457,
16106122741,3221225473ul,4294967291ul
};
inline unsigned long __get_next_prime(unsigned long n)
{
const unsigned long *first=__stl_prime_list;
const unsigned long *last=__stl_prime_list+__stl_num_primes;
const unsigned long *pos=std::lower_bound(first,last,n);
return pos==last?*(last-1):*pos;
}
template<class T1,class T2>
void construct(T1 *p,const T2 &value)
{
new (p) T1(value);
}
template<class T>
void destroy(T *pointer)
{
pointer->~T();
}
//hash函數定義(仿函數)
template<class KeyType>
struct hash
{
};
inline size_t __stl_hash_string(const char *s)
{
unsigned long h=0;
while(*s)
{
h=5*h+*s;
}
return size_t(h);
}
template<>
struct hash<int>
{
size_t operator()(int x) const {return x;}
};
template<>
struct hash<unsigned int>
{
size_t operator()(unsigned int x) const {return x;}
};
template<>
struct hash<short>
{
size_t operator()(short x) const {return x;}
};
template<>
struct hash<unsigned short>
{
size_t operator()(unsigned short x) const {return x;}
};
template<>
struct hash<long>
{
size_t operator()(long x) const {return x;}
};
template<>
struct hash<unsigned long>
{
size_t operator()(unsigned long x) const {return x;}
};
template<>
struct hash<char *>
{
size_t operator()(const char *s) const
{
return __stl_hash_string(s);
}
};
template<>
struct hash<const char *>
{
size_t operator()(const char *s) const
{
return __stl_hash_string(s);
}
};
template<>
struct hash<char>
{
size_t operator()(char s) const{return s;}
};
template<>
struct hash<unsigned char>
{
size_t operator()(unsigned char s) const{return s;}
};
template<>
struct hash<signed char>
{
size_t operator()(signed char s) const{return s;}
};
template<class Arg,class Result>
struct unary_function
{
typedef Arg argument_type;
typedef Result result_type;
};
template<class Arg1,class Arg2,class Result>
struct binary_function
{
typedef Arg1 first_argument_type;
typedef Arg2 second_argument_type;
typedef Result result_type;
};
template<class T>
struct identity:public unary_function<T,T>
{
const T& operator()(const T& x) const{return x;}
};
template <class T>
struct equal_to:public binary_function<T,T,bool>
{
bool operator()(const T &x,const T &y) const{return x==y;}
};
struct eqstr
{
bool operator()(const char *s1,const char *s2)const
{
return !strcmp(s1,s2);
}
};
//hash table數據結構定義
//模板參數:
//ValueType:節點的實值型別
//KeyType:節點的鍵值型別
//HashFcn:hash function的函數型別
//ExtractKey:從節點中取出鍵值的方法
//EqualKey:判斷鍵值是否相同
template <class ValueType,class KeyType,class HashFcn,class ExtractKey,class EqualKey>
class HashTableClass
{
public:
typedef struct __hashtable_node<ValueType> node;
HashTableClass(size_t n,const HashFcn &hf,const EqualKey &eql,const ExtractKey &ext)
:hasher(hf),equals(eql),get_key(ext),num_elements(0)
{
initialize_buckets(n);
}
HashTableClass(size_t n, const HashFcn &hf, const EqualKey &eql)
:hasher(hf),equals(eql),get_key(ExtractKey()),num_elements(0)
{
initialize_buckets(n);
}
HashTableClass(const HashTableClass &ht)
:hasher(ht.hasher),equals(ht.equals),get_key(ht.get_key),num_elements(0)
{
copy_from(&ht);
}
~HashTableClass()
{
clear();
}
void operator =(const HashTableClass &ht)
{
if(this!=&ht)
{
clear();
hasher=ht.hasher;
equals=ht.equals;
get_key=ht.get_key;
copy_from(&ht);
}
}
//返回元素數目
size_t size()
{
return num_elements;
}
//返回最大元素數目
size_t max_size()
{
//size_t(-1)/sizeof(node)
return std::numeric_limits<size_t>::max()/sizeof(node);
}
//返回桶的大小
size_t bucket_count()
{
return buckets.size();
}
//返回通可能的最大數目
size_t max_bucket_count()
{
return __stl_prime_list[__stl_num_primes-1];
}
//插入元素,不允許重複
std::pair<node *,bool> insert_unique(const ValueType &obj);
//插入元素,允許重複
node * insert_equal(const ValueType &obj);
//打印所有結點
void printAllNodes();
//打印所有桶
void printAllBuckets();
//查找某一鍵值的結點
std::pair<node *,bool> find(const KeyType &key);
//判斷某一鍵值出現的次數
size_t count(const KeyType &key);
//複製hash表
void copy_from(const HashTableClass *ht);
//刪除hash表
void clear();
private:
std::vector<node*> buckets;
size_t num_elements;
HashFcn hasher;
ExtractKey get_key;
EqualKey equals;
//節點配置和釋放
node* new_node(const ValueType &obj);
//刪除節點
void delete_node(node *n)
{
destroy(&n->val);//調用對象析構函數,釋放動態內存,並沒有刪除對象本省
delete n;//刪除對象n
}
//初始化桶
void initialize_buckets(size_t n);
//返回質數表中第一個比n大的質數
size_t next_size(size_t n) const
{
return __get_next_prime(n);
}
//判斷是否需要擴充桶
void resize(size_t num_elements_hint);
//判斷元素落在哪個bucket
//提供兩個版本
//版本一:只接受實值
size_t bkt_num(const ValueType &obj) const{return bkt_num_key(get_key(obj));}
//版本一:只接受鍵值
size_t bkt_num_key(const KeyType &key) const{return hasher(key)%buckets.size();}
//版本二:接受實值和bucket個數
size_t bkt_num(const ValueType &obj,size_t n) const{return bkt_num_key(get_key(obj),n);}
//版本二:接受鍵值和bucket個數
size_t bkt_num_key(const KeyType &key,size_t n) const{return hasher(key)%n;}
//在不需要重新分配bucket的情況下插入元素,元素不允許重複
std::pair<node *,bool> insert_unique_noresize(const ValueType &obj);
//在不需要重新分配bucket的情況下插入元素,元素允許重複
node * insert_equal_noresize(const ValueType &obj);
};
template <class ValueType,class KeyType,class HashFcn,class ExtractKey,class EqualKey>
typename HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::node *//typename此處用法申明node爲類型而不是變量
HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::new_node(const ValueType &obj)
{
node *tempNode=new node;
tempNode->next=nullptr;
try
{
construct(&tempNode->val,obj);//在tempNode->val地址上構造obj對象,也可以缺省構造
}
catch(...)//捕捉所有異常
{
delete tempNode;
throw;//拋出所有異常
return nullptr;
}
return tempNode;
}
template <class ValueType,class KeyType,class HashFcn,class ExtractKey,class EqualKey>
std::pair<typename HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::node *,bool>//typename此處用法申明node爲類型而不是變量
HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::insert_unique(const ValueType &obj)
{
resize(num_elements+1);
return insert_unique_noresize(obj);
}
template <class ValueType,class KeyType,class HashFcn,class ExtractKey,class EqualKey>
typename HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::node *//typename此處用法申明node爲類型而不是變量
HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::insert_equal(const ValueType &obj)
{
resize(num_elements+1);
return insert_equal_noresize(obj);
}
template <class ValueType,class KeyType,class HashFcn,class ExtractKey,class EqualKey>
void HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::printAllNodes()
{
std::cout<<"all nodes in hash table:"<<std::endl;
for(size_t i=0;i<buckets.size();i++)
{
typename HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::node* curNode=buckets[i];
while(curNode)
{
std::cout<<curNode->val<<" ";
curNode=curNode->next;
}
}
std::cout<<std::endl;
}
template <class ValueType,class KeyType,class HashFcn,class ExtractKey,class EqualKey>
void HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::printAllBuckets()
{
std::cout<<"all buckets in hash table:"<<std::endl;
for(size_t i=0;i<buckets.size();i++)
{
typename HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::node* curNode=buckets[i];
if(curNode==nullptr)
{
std::cout<<"bucket["<<i<<"] is empty!"<<std::endl;
}
else
{
size_t count=0;
while(curNode)
{
count++;
curNode=curNode->next;
}
std::cout<<"bucket["<<i<<"] has "<<count<<" elements:"<<std::endl;
curNode=buckets[i];
while(curNode)
{
std::cout<<curNode->val<<" ";
curNode=curNode->next;
}
std::cout<<std::endl;
}
}
}
template <class ValueType,class KeyType,class HashFcn,class ExtractKey,class EqualKey>
std::pair<typename HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::node *,bool>
HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::find(const KeyType &key)
{
size_t bucket_index=bkt_num_key(key);
typename HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::node* curNode=buckets[bucket_index];
while(curNode)
{
if(equals(key,get_key(curNode->val)))
{
std::cout<<"find the element "<<key<<" success!"<<std::endl;
return std::pair<typename HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::node*,bool>(curNode,true);
}
curNode=curNode->next;
}
std::cout<<"cannot find the element "<<key<<"!"<<std::endl;
return std::pair<typename HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::node*,bool>(nullptr,false);
}
template <class ValueType,class KeyType,class HashFcn,class ExtractKey,class EqualKey>
size_t HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::count(const KeyType &key)
{
size_t bucket_index=bkt_num_key(key);
typename HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::node* curNode=buckets[bucket_index];
size_t sum=0;
while(curNode)
{
if(equals(key,get_key(curNode->val)))
{
sum++;
}
curNode=curNode->next;
}
std::cout<<"the element "<<key<<" appears "<<sum<<" times"<<std::endl;
return sum;
}
template <class ValueType,class KeyType,class HashFcn,class ExtractKey,class EqualKey>
void HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::clear()
{
for(size_t i=0;i<buckets.size();i++)
{
typename HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::node* curNode=buckets[i];
while(curNode)
{
typename HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::node* next=curNode->next;
delete_node(curNode);
curNode=next;
}
buckets[i]=nullptr;
}
num_elements=0;
}
template <class ValueType,class KeyType,class HashFcn,class ExtractKey,class EqualKey>
void HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::copy_from(const HashTableClass *ht)
{
buckets.clear();
buckets.reserve(ht->buckets.size());
buckets.insert(buckets.end(),ht->buckets.size(),(node *)nullptr);
for(size_t i=0;i<ht->buckets.size();i++)
{
if(const typename HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::node *curNode=ht->buckets[i])
{
typename HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::node *tempNode=new_node(curNode->val);
buckets[i]=tempNode;
for(typename HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::node *next=tempNode->next;next;next=next->next)
{
tempNode->next=new_node(next->val);
tempNode=tempNode->next;
}
}
}
num_elements=ht->num_elements;
}
template <class ValueType,class KeyType,class HashFcn,class ExtractKey,class EqualKey>
void HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::initialize_buckets(size_t n)
{
const size_t n_buckets=next_size(n);
buckets.reserve(n_buckets);
buckets.insert(buckets.end(),n_buckets,(node *)nullptr);
}
template <class ValueType,class KeyType,class HashFcn,class ExtractKey,class EqualKey>
void HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::resize(size_t num_elements_hint)
{
const size_t old_n_vector = buckets.size();
if(num_elements_hint>old_n_vector)
{
const size_t n=next_size(num_elements_hint);
if(n>old_n_vector)//n有可能是質數預設值的最大值了
{
std::vector<node *> tempVec(n,(node *)0);
for(size_t i=0;i<old_n_vector;i++)
{
node *first=buckets[i];
while(first)
{
//計算節點落在哪一個新的bucket內
size_t new_bucket_index=bkt_num_key(first->val,n);
//在桶的最前端插入first
first->next=tempVec[new_bucket_index];
tempVec[new_bucket_index]=first;
first=first->next;
}
}
buckets.swap(tempVec);
}
}
}
template <class ValueType,class KeyType,class HashFcn,class ExtractKey,class EqualKey>
std::pair<typename HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::node*,bool>
HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::insert_unique_noresize(const ValueType &obj)
{
size_t bucket_index=bkt_num(obj);
node *first=buckets[bucket_index];
node *curNode=buckets[bucket_index];
while(curNode)
{
if(equals(get_key(obj),get_key(curNode->val)))
return std::pair<typename HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::node*,bool>(curNode,false);
curNode=curNode->next;
}
curNode=new_node(obj);
curNode->next=first;
buckets[bucket_index]=curNode;
num_elements++;
return std::pair<typename HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::node*,bool>(curNode,true);
}
template <class ValueType,class KeyType,class HashFcn,class ExtractKey,class EqualKey>
typename HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::node *
HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::insert_equal_noresize(const ValueType &obj)
{
size_t bucket_index=bkt_num(obj);
node *curNode=buckets[bucket_index];
while(curNode)
{
if(equals(get_key(obj),get_key(curNode->val)))
{
node *tempNode=new_node(obj);
tempNode->next=curNode->next;
curNode->next=tempNode;
num_elements++;
return tempNode;
}
curNode=curNode->next;
}
node *tempNode=new_node(obj);
tempNode->next=buckets[bucket_index];
buckets[bucket_index]=tempNode;
num_elements++;
return tempNode;
}
#endif // HASHTABLE_H