幾種經典的Hash算法的實現(源代碼)

哈希算法將任意長度的二進制值映射爲固定長度的較小二進制值,這個小的二進制值稱爲哈希值。哈希值是一段數據唯一且極其緊湊的數值表示形式。如果散列一段明文而且哪怕只更改該段落的一個字母,隨後的哈希都將產生不同的值。要找到散列爲同一個值的兩個不同的輸入,在計算上是不可能的,所以數據的哈希值可以檢驗數據的完整性。


鏈表查找的時間效率爲O(N),二分法爲log2N,B+ Tree爲log2N,但Hash鏈表查找的時間效率爲O(1)。

設計高效算法往往需要使用Hash鏈表,常數級的查找速度是任何別的算法無法比擬的,Hash鏈表的構造和衝突的不同實現方法對效率當然有一定的影響,然而Hash函數是Hash鏈表最核心的部分,下面是幾款經典軟件中使用到的字符串Hash函數實現,通過閱讀這些代碼,我們可以在Hash算法的執行效率、離散性、空間利用率等方面有比較深刻的瞭解。

下面分別介紹幾個經典軟件中出現的字符串Hash函數。

●PHP中出現的字符串Hash函數

static unsigned long hashpjw(char *arKey, unsigned int nKeyLength)

{

unsigned long h = 0, g;

char *arEnd=arKey+nKeyLength;

 

while (arKey < arEnd) {

h = (h << 4) + *arKey++;

if ((g = (h & 0xF0000000))) {

h = h ^ (g >> 24);

h = h ^ g;

}

}

return h;

}

●OpenSSL中出現的字符串Hash函數

unsigned long lh_strhash(char *str)

{

int i,l;

unsigned long ret=0;

unsigned short *s;

 

if (str == NULL) return(0);

l=(strlen(str)+1)/2;

s=(unsigned short *)str;

 

for (i=0; i

ret^=(s[i]<<(i&0x0f));

return(ret);

}

 

/* The following hash seems to work very well on normal text strings

* no collisions on /usr/dict/words and it distributes on %2^n quite

* well, not as good as MD5, but still good.

*/

unsigned long lh_strhash(const char *c)

{

unsigned long ret=0;

long n;

unsigned long v;

int r;

 

if ((c == NULL) || (*c == '\0'))

return(ret);

/*

unsigned char b[16];

MD5(c,strlen(c),b);

return(b[0]|(b[1]<<8)|(b[2]<<16)|(b[3]<<24));

*/

 

n=0x100;

while (*c)

{

v=n|(*c);

n+=0x100;

r= (int)((v>>2)^v)&0x0f;

ret=(ret(32-r));

ret&=0xFFFFFFFFL;

ret^=v*v;

c++;

}

 

return((ret>>16)^ret);

}

●MySql中出現的字符串Hash函數

#ifndef NEW_HASH_FUNCTION

 

/* Calc hashvalue for a key */

static uint calc_hashnr(const byte *key,uint length)

{

register uint nr=1, nr2=4;

 

while (length--)

{

nr^= (((nr & 63)+nr2)*((uint) (uchar) *key++))+ (nr << 8);

nr2+=3;

}

 

return((uint) nr);

}

 

/* Calc hashvalue for a key, case indepenently */

static uint calc_hashnr_caseup(const byte *key,uint length)

{

register uint nr=1, nr2=4;

 

while (length--)

{

nr^= (((nr & 63)+nr2)*((uint) (uchar) toupper(*key++)))+ (nr << 8);

nr2+=3;

}

 

return((uint) nr);

}

#else

/*

* Fowler/Noll/Vo hash

*

* The basis of the hash algorithm was taken from an idea sent by email to the

* IEEE Posix P1003.2 mailing list from Phong Vo ([email protected]) and

* Glenn Fowler ([email protected]). Landon Curt Noll ([email protected])

* later improved on their algorithm.

*

* The magic is in the interesting relationship between the special prime

* 16777619 (2^24 + 403) and 2^32 and 2^8.

*

* This hash produces the fewest collisions of any function that we've seen so

* far, and works well on both numbers and strings.

*/

uint calc_hashnr(const byte *key, uint len)

{

const byte *end=key+len;

uint hash;

 

for (hash = 0; key < end; key++)

{

hash *= 16777619;

hash ^= (uint) *(uchar*) key;

}

 

return (hash);

}

 

uint calc_hashnr_caseup(const byte *key, uint len)

{

const byte *end=key+len;

uint hash;

 

for (hash = 0; key < end; key++)

{

hash *= 16777619;

hash ^= (uint) (uchar) toupper(*key);

}

 

return (hash);

}

#endif

Mysql中對字符串Hash函數還區分了大小寫

●另一個經典字符串Hash函數

unsigned int hash(char *str)

{

register unsigned int h;

register unsigned char *p;

 

for(h=0, p = (unsigned char *)str; *p ; p++)

h = 31 * h + *p;

 

return h;

}

 

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章