HashTable的一個詳細實現

注:修改完善於一個粗糙版本http://blog.csdn.net/aishen944/article/details/1483516

1,修改了原文代碼中的錯誤

2,主要解決了在擴容時hash效率較差的問題


 #ifndef _HASHTABLE_H
 #define _HASHTABLE_H
 
 struct hashtable; 
 struct hashtable*  hashtable_create(unsigned long size, unsigned long(*hash_func)(const void *key),
                                     int(*test_func)(const void *key1, const void *key2));
 
 struct hashtable*  make_string_hashtable(unsigned long size);
 
 int hashtable_put(struct hashtable *ht, const void *key, void *value);
 
 void* hashtable_get(struct hashtable *ht, const void *key);
 
 int hashtable_remove(struct hashtable *ht, const void *key);
 
 int hashtable_contains(struct hashtable *ht, const void *key);
 
 int hashtable_set(struct hashtable *ht, const void *key, void *newvalue);
 
 unsigned long hashtable_count(struct hashtable *ht);
 
 void hashtable_map(struct hashtable *ht, int(*mapfunc)(void*, void*, void*), void*);
 
 void hashtable_clear(struct hashtable *ht); 
 
 void hashtable_close(struct hashtable *ht); 
 #endif

#include <string.h>
#include <stdlib.h>
#include <limits.h>
#include <iostream>
using namespace std;
// #include "hashtable.h"

/* Hashtable MAX fullness, you can amend it, but it may best, i think. */
#define HASH_MAX_FULLNESS 0.75

#define HASH_RESIZE_FACTOR 2

#define HASH_POSITION(key, hash_func, size) ((hash_func)(key) % size)

/* Because linuxget hashtable allow 0/NULL key. So we use -1 to point
empty hash mapping. */
#define INVALID_PTR ((void*) ~(unsigned long)0)

#define INVALID_PTR_BYTE 0xff

#define NON_EMPTY(mapping) ((mapping)->key != INVALID_PTR)

typedef unsigned long (*hash_func_t)(const void *key);
typedef int (*test_func_t)(const void *key1, const void *key2);

struct hash_mapping {
	void  *key;
	void  *value;
};

//可以替換上面的宏
inline unsigned long hash_position(const void *key,hash_func_t hash_func,unsigned long size)
{
	return hash_func(key)%size;
}

inline bool non_empty(hash_mapping * mapping)
{
	return (mapping->key != INVALID_PTR);
}

struct hashtable {
	hash_func_t          hash_func;        /* Hash function pointer. */
	test_func_t          test_func;        /* Hash key compare function pointer. */
	struct hash_mapping *mappings;         /* Hashtable data entries. */
	unsigned long        count;            /* Current hashtable not NULL entry count. */
	unsigned long        size;             /* Current hashtable size. */
	int                  prime_offset;     /* The offset for prime size. */
	unsigned long        resize_threshold; /* Hashtable resize threshold, when size more than this, grow it. */
};

/***********************************************************************
* Not link functions.
***********************************************************************/

/* Prime the hashtable size. */
static unsigned long prime_size(unsigned long size, int *prime_offset) {
	static const unsigned long primes[] = {
		13, 19, 29, 41, 59, 79, 107, 149, 197, 263, 347, 457, 599, 787, 1031,
		1361, 1777, 2333, 3037, 3967, 5167, 6719, 8737, 11369, 14783,
		19219, 24989, 32491, 42257, 54941, 71429, 92861, 120721, 156941,
		204047, 265271, 344857, 448321, 582821, 757693, 985003, 1280519,
		1664681, 2164111, 2813353, 3657361, 4754591, 6180989, 8035301,
		10445899, 13579681, 17653589, 22949669, 29834603, 38784989,
		50420551, 65546729, 85210757, 110774011, 144006217, 187208107,
		243370577, 316381771, 411296309, 534685237, 695090819, 903618083,
		1174703521, 1527114613, 1837299131, 2147483647};

		int i=0;
		for(i = *prime_offset; i < sizeof(primes) / sizeof(unsigned long); ++i) {
			if(primes[i] >= size) {
				*prime_offset = i + 1;
				return primes[i];
			}
		}

		abort();    /* Hash table range out. */
}

/* Hash function. If not give customer hash function, use it.
This implementation is the Robert Jenkins' 32 bit Mix Function,
with a simple adaptation for 64-bit values.*/
static unsigned long hash_pointer(const void *key) {
	unsigned long hashval = (unsigned long)key;

	hashval += (hashval << 12);
	hashval ^= (hashval >> 22);
	hashval += (hashval << 4);
	hashval ^= (hashval >> 9);
	hashval += (hashval << 10);
	hashval ^= (hashval >> 2);
	hashval += (hashval << 7);
	hashval ^= (hashval >> 12);
#if ULONG_MAX > 4294967295
	hashval += (hashval << 44);
	hashval ^= (hashval >> 54);
	hashval += (hashval << 36);
	hashval ^= (hashval >> 41);
	hashval += (hashval << 42);
	hashval ^= (hashval >> 34);
	hashval += (hashval << 39);
	hashval ^= (hashval >> 44);
#endif

	return hashval;
}

/* Hash key compare function. If not give customer compare function,
use it. */
static int cmp_pointer(const void *key1, const void *key2) {
	return key1 == key2;
}

/* Hash function. Only use in string hash table. This is a
31 bit hash function.  Taken from Gnome's glib,
modified to use standard C types.*/
static unsigned long hash_string(const void *key) {
	const char *p = (const char *)key;
	unsigned int h = *p;

	if (h)
		for (p += 1; *p != '\0'; p++)
			h = (h << 5) - h + *p;

	return h;
	//return 5;
}

/* Hash key compare function. Only use in string hash table. */
static int string_cmp_pointer(const void *key1, const void *key2) {
	return !strcmp((const char *)key1, (const char *)key2);
}

/**************************************************************************
* Hash table public functions.
**************************************************************************/

struct hashtable*  hashtable_create(unsigned long size, hash_func_t hash_func,
	test_func_t test_func) {
		struct hashtable *ht=(struct hashtable *)malloc(sizeof(struct hashtable));
		ht->prime_offset = 0;

		unsigned long hsize = prime_size(size + 1, &ht->prime_offset);
		ht->mappings = (struct hash_mapping *) malloc(hsize * sizeof(struct hash_mapping));
		memset(ht->mappings, INVALID_PTR_BYTE, hsize * sizeof(struct hash_mapping));

		ht->hash_func = hash_func ? hash_func : hash_pointer;
		ht->test_func = test_func ? test_func : cmp_pointer;
		ht->count = 0;
		ht->size = hsize;		
		ht->resize_threshold = hsize * HASH_MAX_FULLNESS;

		return ht;
}

struct hashtable*  make_string_hashtable(unsigned long size) {
	return hashtable_create(size, hash_string, string_cmp_pointer);
}

/* Hash table find mapping function, it is a linchpin in hash table. */
static struct hash_mapping* find_mapping(struct hashtable *ht, const void *key) {
	struct hash_mapping *mapping = ht->mappings +
		hash_position(key, ht->hash_func, ht->size);

	if(non_empty(mapping) && !ht->test_func(mapping->key, key)) 
	{	
		//重新hash直到找到一個新位置
		for(unsigned int i = 1; i < ht->size; ++i) 
		{
			mapping = ht->mappings + ((ht->hash_func)(key)+i)%ht->size;
			if(!non_empty(mapping))return mapping;
			if(non_empty(mapping) && ht->test_func(mapping->key, key)) {
				return mapping;//鍵值相等的情況
			}
		}
	}

	return mapping;
}


//增長表非常耗時,不但需要重新分配空間,而且需要把原來的hash數據重新hash放入新hash表
static int grow_hashtable(struct hashtable *ht) {
	if(!ht)return 0;

	unsigned long newsize = prime_size(ht->size * HASH_RESIZE_FACTOR, &ht->prime_offset);

	//重新分配hash空間
	struct hash_mapping *phm = (struct hash_mapping *)malloc(newsize*sizeof(struct hash_mapping));
	memset(phm, INVALID_PTR_BYTE, newsize*sizeof(struct hash_mapping));

	struct hash_mapping *mp,*mapping;
	//把原hash表中的所有值都重新進行hash放入新的hash表中
	for (unsigned int i = 0; i < ht->size; ++i)
	{
		mp = ht->mappings + i;
		if(non_empty(mp))
		{
			mapping = phm + hash_position(mp->key, ht->hash_func, newsize);

			if (non_empty(mapping))
			{
				for (unsigned int i = 1; i < ht->size; ++i)//開放尋址中衝突解決方法爲線性探查
				{
					mapping = phm + ((ht->hash_func)(mp->key)+i)%newsize;
					if (!non_empty(mapping))break;			
				}		
			}

			mapping->key = mp->key;
			mapping->value = mp->value;
		}
	}

	ht->mappings = phm;
	ht->size = newsize;
	ht->resize_threshold = newsize * HASH_MAX_FULLNESS;

	return 1;
}

//採用線性探查的方法解決衝突問題
int hashtable_put(struct hashtable *ht, const void *key, void *value) {
	if(ht->count >= ht->resize_threshold)
		grow_hashtable(ht);

	struct hash_mapping *mapping = find_mapping(ht, key);
	
	if (non_empty(mapping))
	{
		//key值已經存在於hash表中
		return 0;
	}

	mapping->key = (void*)key;
	mapping->value = value;
	ht->count += 1;

	return 1;
}

void* hashtable_get(struct hashtable *ht, const void *key) {
	struct hash_mapping *mapping = find_mapping(ht, key);
	return NON_EMPTY(mapping) ? mapping->value : NULL;
}

int hashtable_remove(struct hashtable *ht, const void *key) {
	struct hash_mapping *mapping = find_mapping(ht, key);
	if(!NON_EMPTY(mapping))  /* Not found. */
		return 0;

	/* Remove item. */
	memset(mapping, INVALID_PTR_BYTE, sizeof(struct hash_mapping));
	ht->count -= 1;

	return 1;
}

int hashtable_contains(struct hashtable *ht, const void *key) {
	return NON_EMPTY(find_mapping(ht, key));
}

int hashtable_set(struct hashtable *ht, const void *key, void *newvalue) {
	struct hash_mapping *mapping = find_mapping(ht, key);
	if(!NON_EMPTY(mapping)) /* Not exist. */
		return 0;

	/* Update the item. */
	mapping->value = newvalue;

	return 1;
}

unsigned long hashtable_count(struct hashtable *ht) {
	return ht->count;
}

void hashtable_map(struct hashtable *ht,
				   int(*mapfunc)(void*, void*, void*), void* maparg) {
	unsigned i = 0, hsize = ht->size;
	struct hash_mapping *mp;
	for(; i < hsize; ++i) {
	   mp = ht->mappings + i;
	   if(non_empty(mp) && !mapfunc(mp->key, mp->value, maparg))
		   return;
	}
}

void hashtable_clear(struct hashtable *ht) {
	memset(ht->mappings, INVALID_PTR_BYTE, ht->size * sizeof(struct hash_mapping));
	ht->count = 0;
}

void hashtable_close(struct hashtable *ht) {
	free(ht->mappings);
	free(ht);
}

測試代碼

int main()
{
	struct hashtable* ht = make_string_hashtable(10);

	//測試擴容的情況
	//char *strKey[] = {"123","234","345","456","df","sdf","wer","sdf","xd","dsf","sxdf","dfdf","sdf"};
	//char *strValue[] = {"123","234","345","456","df","sdf","wer","sdf","xd","dsf","sxdf","dfdf","sdf"};
	
	//相同鍵值不同的情況,第二個鍵值不會被插入
	//char *strKey[] = {"123","123","345","456","df","sdf","wer","sdf","xd","dsf","sxdf","dfdf","sdf"};
	//char *strValue[] = {"xxx","yyy","345","456","df","sdf","wer","sdf","xd","dsf","sxdf","dfdf","sdf"};


	//修改一下hash函數,使得每一次都返回相同的值,測試碰撞問題
	//char *strKey[] = {"123","234","345"};
	//char *strValue[] = {"xxx","yyy","zzz"};

	//修改一下hash函數,使得每一次都返回相同的值,測試碰撞問題和擴容問題
	char *strKey[] = {"123","123","345","456","df","sdf","wer","sdf","xd","dsf","sxdf","dfdf","sdf"};
	char *strValue[] = {"xxx","yyy","345","456","df","sdf","wer","sdf","xd","dsf","sxdf","dfdf","sdf"};


	int i;
	for (i = 0; i < 13; ++i)
		hashtable_put(ht,strKey[i],strValue[i]);

	for(i = 0; i < 13; ++i)
		if(hashtable_get(ht,strKey[i]))
			cout<<(char *)hashtable_get(ht,strKey[i])<<endl;

	if (!hashtable_get(ht,"346"))	
		cout<<"not in"<<endl;
}





發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章