C++STL之散列表

散列表(哈希表)是普通數組概念的推廣。在散列表中,不是直接把關鍵字作爲數組的下標,而是根據關鍵字計算出相應的下標。

一個散列表中重要的是他的散列函數以及解決衝突的方法。

這裏試着自己模仿網上STL中hash表的實現寫了一個,記在這裏以便以後查看。

這裏散列函數選擇的是除法散列法,解決衝突的方法選用的是鏈接法,表基於vector實現。

代碼如下:

//hashtable.h
#ifndef HASHTABLE_H
#define HASHTABLE_H

#include<iostream>
#include<algorithm>
#include<vector>
#include<string.h>
#include<memory>

template<class ValueType>
struct __hashtable_node
{
    __hashtable_node *next;
    ValueType val;
};

static const int __stl_num_primes=28;

static const unsigned long __stl_prime_list[__stl_num_primes]=
{
    53,97,193,389,769,
    1543,3079,6151,12289,24593,
    49157,98317,196613,393241,786433,
    1572869,3145739,6291469,12582917,25165843,
    50331653,100663319,201326611,402653189,805306457,
    16106122741,3221225473ul,4294967291ul
};

inline unsigned long __get_next_prime(unsigned long n)
{
    const unsigned long *first=__stl_prime_list;
    const unsigned long *last=__stl_prime_list+__stl_num_primes;
    const unsigned long *pos=std::lower_bound(first,last,n);

    return pos==last?*(last-1):*pos;
}

template<class T1,class T2>
void construct(T1 *p,const T2 &value)
{
    new (p) T1(value);
}

template<class T>
void destroy(T *pointer)
{
    pointer->~T();
}

//hash函數定義(仿函數)
template<class KeyType>
struct hash
{
};

inline size_t __stl_hash_string(const char *s)
{
    unsigned long h=0;
    while(*s)
    {
        h=5*h+*s;
    }
    return size_t(h);
}

template<>
struct hash<int>
{
    size_t operator()(int x) const {return x;}
};

template<>
struct hash<unsigned int>
{
    size_t operator()(unsigned int x) const {return x;}
};

template<>
struct hash<short>
{
    size_t operator()(short x) const {return x;}
};

template<>
struct hash<unsigned short>
{
    size_t operator()(unsigned short x) const {return x;}
};

template<>
struct hash<long>
{
    size_t operator()(long x) const {return x;}
};

template<>
struct hash<unsigned long>
{
    size_t operator()(unsigned long x) const {return x;}
};

template<>
struct hash<char *>
{
    size_t operator()(const char *s) const
    {
        return __stl_hash_string(s);
    }
};

template<>
struct hash<const char *>
{
    size_t operator()(const char *s) const
    {
        return __stl_hash_string(s);
    }
};

template<>
struct hash<char>
{
    size_t operator()(char s) const{return s;}
};

template<>
struct hash<unsigned char>
{
    size_t operator()(unsigned char s) const{return s;}
};

template<>
struct hash<signed char>
{
    size_t operator()(signed char s) const{return s;}
};

template<class Arg,class Result>
struct unary_function
{
    typedef Arg argument_type;
    typedef Result result_type;
};

template<class Arg1,class Arg2,class Result>
struct binary_function
{
    typedef Arg1 first_argument_type;
    typedef Arg2 second_argument_type;
    typedef Result result_type;
};

template<class T>
struct identity:public unary_function<T,T>
{
    const T& operator()(const T& x) const{return x;}
};

template <class T>
struct equal_to:public binary_function<T,T,bool>
{
    bool operator()(const T &x,const T &y) const{return x==y;}
};

struct eqstr
{
    bool operator()(const char *s1,const char *s2)const
    {
        return !strcmp(s1,s2);
    }
};


//hash table數據結構定義
//模板參數:
//ValueType:節點的實值型別
//KeyType:節點的鍵值型別
//HashFcn:hash function的函數型別
//ExtractKey:從節點中取出鍵值的方法
//EqualKey:判斷鍵值是否相同
template <class ValueType,class KeyType,class HashFcn,class ExtractKey,class EqualKey>
class HashTableClass
{
public:
    typedef struct __hashtable_node<ValueType> node;

    HashTableClass(size_t n,const HashFcn &hf,const EqualKey &eql,const ExtractKey &ext)
        :hasher(hf),equals(eql),get_key(ext),num_elements(0)
    {
        initialize_buckets(n);
    }

    HashTableClass(size_t n, const HashFcn &hf, const EqualKey &eql)
        :hasher(hf),equals(eql),get_key(ExtractKey()),num_elements(0)
    {
        initialize_buckets(n);
    }

    HashTableClass(const HashTableClass &ht)
        :hasher(ht.hasher),equals(ht.equals),get_key(ht.get_key),num_elements(0)
    {
        copy_from(&ht);
    }

    ~HashTableClass()
    {
        clear();
    }

    void operator =(const HashTableClass &ht)
    {
        if(this!=&ht)
        {
            clear();
            hasher=ht.hasher;
            equals=ht.equals;
            get_key=ht.get_key;
            copy_from(&ht);
        }
    }

    //返回元素數目
    size_t size()
    {
        return num_elements;
    }

    //返回最大元素數目
    size_t max_size()
    {
        //size_t(-1)/sizeof(node)
        return std::numeric_limits<size_t>::max()/sizeof(node);
    }

    //返回桶的大小
    size_t bucket_count()
    {
        return buckets.size();
    }

    //返回通可能的最大數目
    size_t max_bucket_count()
    {
        return __stl_prime_list[__stl_num_primes-1];
    }

    //插入元素,不允許重複
    std::pair<node *,bool> insert_unique(const ValueType &obj);

    //插入元素,允許重複
    node * insert_equal(const ValueType &obj);

    //打印所有結點
    void printAllNodes();

    //打印所有桶
    void printAllBuckets();

    //查找某一鍵值的結點
    std::pair<node *,bool> find(const KeyType &key);

    //判斷某一鍵值出現的次數
    size_t count(const KeyType &key);

    //複製hash表
    void copy_from(const HashTableClass *ht);

    //刪除hash表
    void clear();

private:
    std::vector<node*> buckets;
    size_t num_elements;
    HashFcn hasher;
    ExtractKey get_key;
    EqualKey equals;

    //節點配置和釋放
    node* new_node(const ValueType &obj);

    //刪除節點
    void delete_node(node *n)
    {
        destroy(&n->val);//調用對象析構函數,釋放動態內存,並沒有刪除對象本省
        delete n;//刪除對象n
    }

    //初始化桶
    void initialize_buckets(size_t n);

    //返回質數表中第一個比n大的質數
    size_t next_size(size_t n) const
    {
        return __get_next_prime(n);
    }

    //判斷是否需要擴充桶
    void resize(size_t num_elements_hint);

    //判斷元素落在哪個bucket
    //提供兩個版本
    //版本一:只接受實值
    size_t bkt_num(const ValueType &obj) const{return bkt_num_key(get_key(obj));}
    //版本一:只接受鍵值
    size_t bkt_num_key(const KeyType &key) const{return hasher(key)%buckets.size();}
    //版本二:接受實值和bucket個數
    size_t bkt_num(const ValueType &obj,size_t n) const{return bkt_num_key(get_key(obj),n);}
    //版本二:接受鍵值和bucket個數
    size_t bkt_num_key(const KeyType &key,size_t n) const{return hasher(key)%n;}

    //在不需要重新分配bucket的情況下插入元素,元素不允許重複
    std::pair<node *,bool> insert_unique_noresize(const ValueType &obj);

    //在不需要重新分配bucket的情況下插入元素,元素允許重複
    node * insert_equal_noresize(const ValueType &obj);
};

template <class ValueType,class KeyType,class HashFcn,class ExtractKey,class EqualKey>
typename HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::node *//typename此處用法申明node爲類型而不是變量
HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::new_node(const ValueType &obj)
{
    node *tempNode=new node;
    tempNode->next=nullptr;
    try
    {
        construct(&tempNode->val,obj);//在tempNode->val地址上構造obj對象,也可以缺省構造
    }
    catch(...)//捕捉所有異常
    {
        delete tempNode;
        throw;//拋出所有異常
        return nullptr;
    }
    return tempNode;
}

template <class ValueType,class KeyType,class HashFcn,class ExtractKey,class EqualKey>
std::pair<typename HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::node *,bool>//typename此處用法申明node爲類型而不是變量
HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::insert_unique(const ValueType &obj)
{
    resize(num_elements+1);
    return insert_unique_noresize(obj);
}

template <class ValueType,class KeyType,class HashFcn,class ExtractKey,class EqualKey>
typename HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::node *//typename此處用法申明node爲類型而不是變量
HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::insert_equal(const ValueType &obj)
{
    resize(num_elements+1);
    return insert_equal_noresize(obj);
}

template <class ValueType,class KeyType,class HashFcn,class ExtractKey,class EqualKey>
void HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::printAllNodes()
{
    std::cout<<"all nodes in hash table:"<<std::endl;
    for(size_t i=0;i<buckets.size();i++)
    {
        typename HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::node* curNode=buckets[i];
        while(curNode)
        {
            std::cout<<curNode->val<<" ";
            curNode=curNode->next;
        }
    }
    std::cout<<std::endl;
}

template <class ValueType,class KeyType,class HashFcn,class ExtractKey,class EqualKey>
void HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::printAllBuckets()
{
    std::cout<<"all buckets in hash table:"<<std::endl;
    for(size_t i=0;i<buckets.size();i++)
    {
        typename HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::node* curNode=buckets[i];
        if(curNode==nullptr)
        {
            std::cout<<"bucket["<<i<<"] is empty!"<<std::endl;
        }
        else
        {
            size_t count=0;
            while(curNode)
            {
                count++;
                curNode=curNode->next;
            }
            std::cout<<"bucket["<<i<<"] has "<<count<<" elements:"<<std::endl;
            curNode=buckets[i];
            while(curNode)
            {
                std::cout<<curNode->val<<" ";
                curNode=curNode->next;
            }
            std::cout<<std::endl;
        }
    }
}

template <class ValueType,class KeyType,class HashFcn,class ExtractKey,class EqualKey>
std::pair<typename HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::node *,bool>
HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::find(const KeyType &key)
{
    size_t bucket_index=bkt_num_key(key);
    typename HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::node* curNode=buckets[bucket_index];
    while(curNode)
    {
        if(equals(key,get_key(curNode->val)))
        {
            std::cout<<"find the element "<<key<<" success!"<<std::endl;
            return std::pair<typename HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::node*,bool>(curNode,true);
        }
        curNode=curNode->next;
    }
    std::cout<<"cannot find the element "<<key<<"!"<<std::endl;
    return std::pair<typename HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::node*,bool>(nullptr,false);
}

template <class ValueType,class KeyType,class HashFcn,class ExtractKey,class EqualKey>
size_t HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::count(const KeyType &key)
{
    size_t bucket_index=bkt_num_key(key);
    typename HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::node* curNode=buckets[bucket_index];
    size_t sum=0;
    while(curNode)
    {
        if(equals(key,get_key(curNode->val)))
        {
            sum++;
        }
        curNode=curNode->next;
    }
    std::cout<<"the element "<<key<<" appears "<<sum<<" times"<<std::endl;
    return sum;
}

template <class ValueType,class KeyType,class HashFcn,class ExtractKey,class EqualKey>
void HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::clear()
{
    for(size_t i=0;i<buckets.size();i++)
    {
        typename HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::node* curNode=buckets[i];
        while(curNode)
        {
            typename HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::node* next=curNode->next;
            delete_node(curNode);
            curNode=next;
        }
        buckets[i]=nullptr;
    }
    num_elements=0;
}

template <class ValueType,class KeyType,class HashFcn,class ExtractKey,class EqualKey>
void HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::copy_from(const HashTableClass *ht)
{
    buckets.clear();
    buckets.reserve(ht->buckets.size());
    buckets.insert(buckets.end(),ht->buckets.size(),(node *)nullptr);
    for(size_t i=0;i<ht->buckets.size();i++)
    {
        if(const typename HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::node *curNode=ht->buckets[i])
        {
            typename HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::node *tempNode=new_node(curNode->val);
            buckets[i]=tempNode;
            for(typename HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::node *next=tempNode->next;next;next=next->next)
            {
                tempNode->next=new_node(next->val);
                tempNode=tempNode->next;
            }
        }
    }
    num_elements=ht->num_elements;
}

template <class ValueType,class KeyType,class HashFcn,class ExtractKey,class EqualKey>
void HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::initialize_buckets(size_t n)
{
    const size_t n_buckets=next_size(n);
    buckets.reserve(n_buckets);
    buckets.insert(buckets.end(),n_buckets,(node *)nullptr);
}

template <class ValueType,class KeyType,class HashFcn,class ExtractKey,class EqualKey>
void HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::resize(size_t num_elements_hint)
{
    const size_t old_n_vector = buckets.size();
    if(num_elements_hint>old_n_vector)
    {
        const size_t n=next_size(num_elements_hint);
        if(n>old_n_vector)//n有可能是質數預設值的最大值了
        {
            std::vector<node *> tempVec(n,(node *)0);
            for(size_t i=0;i<old_n_vector;i++)
            {
                node *first=buckets[i];
                while(first)
                {
                    //計算節點落在哪一個新的bucket內
                    size_t new_bucket_index=bkt_num_key(first->val,n);
                    //在桶的最前端插入first
                    first->next=tempVec[new_bucket_index];
                    tempVec[new_bucket_index]=first;
                    first=first->next;
                }
            }
            buckets.swap(tempVec);
        }
    }
}

template <class ValueType,class KeyType,class HashFcn,class ExtractKey,class EqualKey>
std::pair<typename HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::node*,bool>
HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::insert_unique_noresize(const ValueType &obj)
{
    size_t bucket_index=bkt_num(obj);
    node *first=buckets[bucket_index];
    node *curNode=buckets[bucket_index];
    while(curNode)
    {
        if(equals(get_key(obj),get_key(curNode->val)))
            return std::pair<typename HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::node*,bool>(curNode,false);
        curNode=curNode->next;
    }
    curNode=new_node(obj);
    curNode->next=first;
    buckets[bucket_index]=curNode;
    num_elements++;
    return std::pair<typename HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::node*,bool>(curNode,true);
}

template <class ValueType,class KeyType,class HashFcn,class ExtractKey,class EqualKey>
typename HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::node *
HashTableClass<ValueType,KeyType,HashFcn,ExtractKey,EqualKey>::insert_equal_noresize(const ValueType &obj)
{
    size_t bucket_index=bkt_num(obj);
    node *curNode=buckets[bucket_index];
    while(curNode)
    {
        if(equals(get_key(obj),get_key(curNode->val)))
        {
            node *tempNode=new_node(obj);
            tempNode->next=curNode->next;
            curNode->next=tempNode;
            num_elements++;
            return tempNode;
        }
        curNode=curNode->next;
    }
    node *tempNode=new_node(obj);
    tempNode->next=buckets[bucket_index];
    buckets[bucket_index]=tempNode;
    num_elements++;
    return tempNode;
}

#endif // HASHTABLE_H

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章