一致性hash C++實現
知識標籤: Consistent hashing, C++
這兩篇關於Consistent hashing的文章不錯:
理想化的 Redis 集羣
一致性hash和solr千萬級數據分佈式搜索引擎中的應用
該代碼是我在別人github上找到的源碼,自己添加了一些註釋
一致性哈希的功能被封裝在模板類consistent_hash_map中
consistent_hash_map.h
consistent_hash_map.h如下:
#include <map>
#include <string>
#include <list>
#include <functional>
#include <algorithm>
#ifndef __CONSISTENT_HASH_H__
#define __CONSISTENT_HASH_H__
//consistent hash的節點類型。
//一元函數對象。接收T類型對象作爲參數,返回一個整形作爲其hash值,該hash值將被用於內部的排序。Hash需在其內部定義result_type 指明返回整形的類型。
template <typename T,
typename Hash,
typename Alloc = std::allocator<std::pair<const typename Hash::result_type,T > > >
class consistent_hash_map
{
public:
//hash函數返回值的類型
typedef typename Hash::result_type size_type;
//使用std::map來管理節點
typedef std::map<size_type,T,std::less<size_type>,Alloc> map_type;
//std::pair<const size_type, T>,first爲節點的哈希值,second爲節點。
typedef typename map_type::value_type value_type;
typedef value_type& reference;
typedef const value_type& const_reference;
typedef typename map_type::iterator iterator;
typedef typename map_type::reverse_iterator reverse_iterator;
typedef Alloc allocator_type;
public:
consistent_hash_map() {}
~consistent_hash_map() {}
public:
//返回consistent_hash_map內的節點數量
std::size_t size() const
{
return nodes_.size();
}
//判斷consistent_hash_map是否爲空
bool empty() const
{
return nodes_.empty();
}
//插入一個節點,如果返回值中bool變量爲真,iterator則爲指向插入節點的迭代器。
//如果bool爲假,表示插入失敗,iterator指向已經存在的節點。
//插入失敗因爲節點已經存在或者是節點的hash值與其他節點發生衝突
std::pair<iterator,bool> insert(const T& node)
{
size_type hash = hasher_(node);
return nodes_.insert(value_type(hash,node));
}
//通過迭代器刪除指定節點
void erase(iterator it)
{
nodes_.erase(it);
}
//通過節點值刪除指定節點
std::size_t erase(const T& node)
{
size_type hash = hasher_(node);
return nodes_.erase(hash);
}
//hash爲數據關鍵字的hash值, find方法能找到該數據映射的節點
iterator find(size_type hash)
{//按照一個圓環方向(順時針或逆時針),尋找hash值>=給定hash的節點
if(nodes_.empty())
{
return nodes_.end();
}
//找到map中key值>=hash的第一個迭代器
iterator it = nodes_.lower_bound(hash);
if (it == nodes_.end())
{
it = nodes_.begin();
}
return it;
}
iterator begin() { return nodes_.begin(); }
iterator end() { return nodes_.end(); }
reverse_iterator rbegin() { return nodes_.rbegin(); }
reverse_iterator rend() { return nodes_.rend(); }
private:
Hash hasher_;
map_type nodes_;
};
#endif
test.cpp只是簡單實現consistent hashing, 並沒實現虛擬節點,關於虛擬節點的例子在下面testVnode.cpp中
test.cpp
test.cpp如下:
#include <iostream>
#include <string>
#include <boost/functional/hash.hpp>
#include <stdint.h> // for uint32_t
#include <boost/format.hpp>
#include <boost/crc.hpp> //for crc_optimal
#include "consistent_hash_map.hpp"
struct crc32_hasher
{
uint32_t operator()(const std::string& node)
{
//定義crc_optimal對象
boost::crc_32_type ret;
//處理字符串,生成CRC序列
ret.process_bytes(node.c_str(),node.size());
//checksum()返回CRC序列
return ret.checksum();
}
typedef uint32_t result_type;
};
int main(int argc, char const *argv[])
{
typedef consistent_hash_map<std::string,crc32_hasher> consistent_hash_t;
consistent_hash_t consistent_hash_;
//定義格式化字符串的類
boost::format node_fmt("192.168.1.%1%");
for(std::size_t i=0;i<3;++i)
{
std::string node = boost::str(node_fmt % i);
consistent_hash_.insert(node);
std::cout<<boost::format("add node: %1%") % node << std::endl;
}
{
std::cout<<"========================================================="<<std::endl;
for(consistent_hash_t::iterator it = consistent_hash_.begin();it != consistent_hash_.end(); ++it)
{
std::cout<<boost::format("node: %1%,%2%") % it->second % it->first << std::endl;
}
}
// 輸出相關數據關鍵字hash值映射的節點
{
consistent_hash_t::iterator it;
it = consistent_hash_.find(290235110); //290235110代表數據關鍵字的hash值
std::cout<<boost::format("node:%1%,%2%") % it->second % it->first << std::endl;
}
{
consistent_hash_t::iterator it;
it = consistent_hash_.find(2286285664);
std::cout<<boost::format("node:%1%,%2%") % it->second % it->first << std::endl;
}
{
consistent_hash_t::iterator it;
it = consistent_hash_.find(4282565578);
std::cout<<boost::format("node:%1%,%2%") % it->second % it->first << std::endl;
}
std::cout<<"========================================================="<<std::endl;
{// 刪除192.168.1.1
std::string node = boost::str(node_fmt % 1);
consistent_hash_.erase(node);
for(consistent_hash_t::iterator it = consistent_hash_.begin();it != consistent_hash_.end(); ++it)
{
std::cout<<boost::format("node:%1%,%2%") % it->second % it->first << std::endl;
}
}
std::cout<<"========================================================="<<std::endl;
{
consistent_hash_t::iterator it;
it = consistent_hash_.find(4282565578);
std::cout<<boost::format("node:%1%,%2%") % it->second % it->first << std::endl;
std::cout<<"-------------------------------------------"<<std::endl;
consistent_hash_.erase(it);
for(consistent_hash_t::iterator it = consistent_hash_.begin();it != consistent_hash_.end(); ++it)
{
std::cout<<boost::format("node:%1%,%2%") % it->second % it->first << std::endl;
}
}
std::cout<<"========================================================="<<std::endl;
{
std::cout<<"-------------------------------------------"<<std::endl;
consistent_hash_t::iterator it;
it = consistent_hash_.find(4282565578);
std::cout<<boost::format("node:%1%,%2%") % it->second % it->first << std::endl;
std::cout<<"-------------------------------------------"<<std::endl;
it = consistent_hash_.find(4282565576);
std::cout<<boost::format("node:%1%,%2%") % it->second % it->first << std::endl;
std::cout<<"-------------------------------------------"<<std::endl;
consistent_hash_.erase(it);
for(consistent_hash_t::iterator it = consistent_hash_.begin();it != consistent_hash_.end(); ++it)
{
std::cout<<boost::format("node:%1%,%2%") % it->second % it->first << std::endl;
}
std::cout<<"-------------------------------------------"<<std::endl;
}
std::cout<<"========================================================="<<std::endl;
{
std::cout<<"-------------------------------------------"<<std::endl;
consistent_hash_t::iterator it;
it = consistent_hash_.find(4282565578);
if(it == consistent_hash_.end())
{
std::cout<<"not found, consistent_hash is empty"<<std::endl;
}
}
return 0;
}
編譯:g++ test.cpp -o test
運行:./test
test結果
結果如下:
testVnode.cpp
testVnode.cpp如下:
#include <stdint.h>
#include <iostream>
#include <string>
#include <boost/functional/hash.hpp>
#include <boost/format.hpp>
#include <boost/crc.hpp>
#include "consistent_hash_map.hpp"
const char* nodes[] = {
"192.168.1.100",
"192.168.1.101",
"192.168.1.102",
"192.168.1.103",
"192.168.1.104" };
struct vnode_t
{
vnode_t() {}
vnode_t(std::size_t n,std::size_t v):node_id(n),vnode_id(v) {}
std::string to_str() const
{
return boost::str(boost::format("%1%-%2%") % nodes[node_id] % vnode_id);
}
std::size_t node_id;
std::size_t vnode_id;
};
struct crc32_hasher
{
uint32_t operator()(const vnode_t& node)
{
boost::crc_32_type ret;
std::string vnode = node.to_str();
std::cout<<"vnode:"<<vnode<<std::endl;
ret.process_bytes(vnode.c_str(),vnode.size());
return ret.checksum();
}
typedef uint32_t result_type;
};
int main(int argc, char const *argv[])
{
typedef consistent_hash_map<vnode_t,crc32_hasher> consistent_hash_t;
consistent_hash_t consistent_hash_;
for(std::size_t i=0;i<5;++i)
{//每個節點插入100個虛擬節點
for(std::size_t j=0;j<100;j++)
{
consistent_hash_.insert(vnode_t(i,j));
}
}
//遍歷consistent_hash中的所有的vnode,統計每個虛擬節點的key的數量和每個主機包含key的數量
{
std::cout<<"========================================================="<<std::endl;
//sums統計每個主機可以包含的key數量
std::size_t sums[] = {0,0,0,0,0};
//處理圓環的鏈接點處第一個vnode
consistent_hash_t::iterator i = consistent_hash_.begin();
consistent_hash_t::reverse_iterator j = consistent_hash_.rbegin();
// 計算第一個節點包含的key數量
// static_cast<uint32_t>(-1)源碼爲UINT32_MAX, 但無法通過編譯,替代之
std::size_t n = i->first + static_cast<uint32_t>(-1) - j->first;
std::cout<<boost::format("vnode:%1%,hash:%2%,contains:%3%")
% i->second.to_str() % i->first % n << std::endl;
sums[i->second.node_id] += n;
uint32_t priv = i->first;
uint32_t cur;
consistent_hash_t::iterator end = consistent_hash_.end();
// 處理圓環中間的vnode
while(++i != end)
{
cur = i->first;
n = cur - priv;
std::cout<<boost::format("vnode:%1%,hash:%2%,contains:%3%")
% i->second.to_str() % cur % n << std::endl;
sums[i->second.node_id] += n;
priv = cur;
}
for(std::size_t i=0;i<5;++i)
{
std::cout<<boost::format("node:%1% contains:%2%") % nodes[i] % sums[i] <<std::endl;
}
}
//查找某個hash值對應的vnode 和 主機
{
consistent_hash_t::iterator it;
it = consistent_hash_.find(290235110);
std::cout<<boost::format("node:%1%,vnode:%2%,hash:%3%")
% nodes[it->second.node_id] % it->second.vnode_id % it->first << std::endl;
}
return 0;
}
testVnode結果自己測