常用的字符串Hash函數還有ELFHash,APHash等等,都是十分簡單有效的方法。這些函數使用
位運算使得每一個字符都對最後的函數值產生影響。另外還有以MD5和SHA1爲代表的雜湊函數,
這些函數幾乎不可能找到碰撞。
常用字符串哈希函數有BKDRHash,APHash,DJBHash,JSHash,RSHash,SDBMHash,
PJWHash,ELFHash等等。對於以上幾種哈希函數,我對其進行了一個小小的評測。
Hash函數 |
數據1 |
數據2 |
數據3 |
數據4 |
數據1得分 |
數據2得分 |
數據3得分 |
數據4得分 |
平均分 |
BKDRHash |
2 |
0 |
4774 |
481 |
96.55 |
100 |
90.95 |
82.05 |
92.64 |
APHash |
2 |
3 |
4754 |
493 |
96.55 |
88.46 |
100 |
51.28 |
86.28 |
DJBHash |
2 |
2 |
4975 |
474 |
96.55 |
92.31 |
0 |
100 |
83.43 |
JSHash |
1 |
4 |
4761 |
506 |
100 |
84.62 |
96.83 |
17.95 |
81.94 |
RSHash |
1 |
0 |
4861 |
505 |
100 |
100 |
51.58 |
20.51 |
75.96 |
SDBMHash |
3 |
2 |
4849 |
504 |
93.1 |
92.31 |
57.01 |
23.08 |
72.41 |
PJWHash |
30 |
26 |
4878 |
513 |
0 |
0 |
43.89 |
0 |
21.95 |
ELFHash |
30 |
26 |
4878 |
513 |
0 |
0 |
43.89 |
0 |
21.95 |
其中數據1爲100000個字母和數字組成的隨機串哈希衝突個數。數據2爲100000個有意義的英文句
子哈希衝突個數。數據3爲數據1的哈希值與1000003(大素數)求模後存儲到線性表中衝突的個數。
數據4爲數據1的哈希值與10000019(更大素數)求模後存儲到線性表中衝突的個數。
經過比較,得出以上平均得分。平均數爲平方平均數。可以發現,BKDRHash無論是在實際效果還是
編碼實現中,效果都是最突出的。APHash也是較爲優秀的算法。DJBHash,JSHash,RSHash與
SDBMHash各有千秋。PJWHash與ELFHash效果最差,但得分相似,其算法本質是相似的。
在信息修競賽中,要本着易於編碼調試的原則,個人認爲BKDRHash是最適合記憶和使用的。
CmYkRgB123原創,歡迎建議、交流、批評和指正。
附:各種哈希函數的C語言程序代碼
unsigned int SDBMHash(char *str)
{
unsignedint hash = 0;
while(*str)
{
//equivalent to: hash = 65599*hash + (*str++);
hash= (*str++) + (hash << 6) + (hash << 16) - hash;
}
return(hash & 0x7FFFFFFF);
}
// RS Hash
unsigned int RSHash(char *str)
{
unsignedint b = 378551;
unsignedint a = 63689;
unsignedint hash = 0;
while(*str)
{
hash= hash * a + (*str++);
a*= b;
}
return(hash & 0x7FFFFFFF);
}
// JS Hash
unsigned int JSHash(char *str)
{
unsignedint hash = 1315423911;
while(*str)
{
hash^= ((hash << 5) + (*str++) + (hash >> 2));
}
return(hash & 0x7FFFFFFF);
}
// P. J. Weinberger Hash
unsigned int PJWHash(char *str)
{
unsignedint BitsInUnignedInt = (unsigned int)(sizeof(unsigned int) * 8);
unsignedint ThreeQuarters = (unsigned int)((BitsInUnignedInt * 3) / 4);
unsignedint OneEighth = (unsigned int)(BitsInUnignedInt / 8);
unsignedint HighBits = (unsigned int)(0xFFFFFFFF) << (BitsInUnignedInt
-OneEighth);
unsignedint hash = 0;
unsignedint test = 0;
while(*str)
{
hash= (hash << OneEighth) + (*str++);
if((test = hash & HighBits) != 0)
{
hash= ((hash ^ (test >> ThreeQuarters)) & (~HighBits));
}
}
return(hash & 0x7FFFFFFF);
}
// ELF Hash
unsigned int ELFHash(char *str)
{
unsignedint hash = 0;
unsignedint x = 0;
while(*str)
{
hash= (hash << 4) + (*str++);
if((x = hash & 0xF0000000L) != 0)
{
hash^= (x >> 24);
hash&= ~x;
}
}
return(hash & 0x7FFFFFFF);
}
// BKDR Hash
unsigned int BKDRHash(char *str)
{
unsignedint seed = 131; // 31 131 1313 13131 131313 etc..
unsignedint hash = 0;
while(*str)
{
hash= hash * seed + (*str++);
}
return(hash & 0x7FFFFFFF);
}
// DJB Hash
unsigned int DJBHash(char *str)
{
unsignedint hash = 5381;
while(*str)
{
hash+= (hash << 5) + (*str++);
}
return(hash & 0x7FFFFFFF);
}
// AP Hash
unsigned int APHash(char *str)
{
unsignedint hash = 0;
inti;
for(i=0; *str; i++)
{
if((i & 1) == 0)
{
hash^= ((hash << 7) ^ (*str++) ^ (hash >> 3));
}
else
{
hash^= (~((hash << 11) ^ (*str++) ^ (hash >> 5)));
}
}
return(hash & 0x7FFFFFFF);
}