hash函數進階一

hash函數進階一

通過一個例子來熟悉hash函數的功能作用以及如何使用。

0.例子:

(1)有100萬個IP地址,從中查詢是否存在待查的IP地址

(2)使用hash映射來實現

1.前言:

(1)hash表面上看起來只是將輸入對象散列到hash表中,其實在使用使用中過程有:

  • 將目標對象通過hash函數,得到對應的key鍵值(hash code),形成對應的hash對<key, value>
  • 通過某種散列方式將key鍵值儘可能的均勻放於hash表中
  • 衝突處理機制
(2)使用C語言的隨機函數生成IP地址,模擬海量數據處理

2.實現:

2.1.頭文件:存放需要生成的IP地址個數等公共數據

#define MAXIPNUM		1000000
#define HASHTABLELEN	10000
#define IPSTRLEN		16

2.2.源文件一:隨機生成樣本IP地址及待查IP地址

#include <stdio.h>
#include <time.h>
#include "hash_ip.h"

void main()
{
	int index = 0;
	int i = 0, j = 0, k = 0;
	
	srand((unsigned int)time(NULL));
	while(index < (MAXIPNUM + 1))
	{
		i = rand() % 255;
		j = rand() % 255;
		k = rand() % 255;
		
		index++;
		
		printf("%d.%d.%d\n", i, j, k);
	}
}

2.3.源文件二:hash實現

(1)節點結構

struct hashNode
{
	char str[IPSTRLEN];
	int hashcode;
	struct hashNode *next;
	int value;
};
(2)hash數組表

struct hashtable
{
	int value;
	struct hashNode *next;
}hashTable[HASHTABLELEN];
(3)初始化hash表(使用鏈表來解決衝突):

void inittable()
{
	int index = 0;
	
	for(index = 0; index < HASHTABLELEN; index++)
	{
		hashTable[index].next = NULL;
		hashTable[index].value = 0;
	}
}
(4)映射和散列:將目標IP地址映射散列到hash表中:

void beginhash()
{
	int index = 0, hashcode = 0, tmpindex = 0;
	char tmpstr[IPSTRLEN];
	struct hashNode *tmpNode = NULL, *node = NULL;
	
	while(index < MAXIPNUM)
	{
		gets(tmpstr);
		hashcode = gethashcode(tmpstr);
		tmpindex = hashcode % HASHTABLELEN;
		
		node = (struct hashNode *)malloc(sizeof(struct hashNode));
		node->hashcode = hashcode;
		node->value = hashTable[tmpindex].value;
		node->next = NULL;
		strcpy(node->str, tmpstr);
		
		if(hashTable[tmpindex].next == NULL)
		{
			hashTable[tmpindex].next = node;
		}
		else
		{
			tmpNode = hashTable[tmpindex].next;
			while(tmpNode->next != NULL)
			{
				tmpNode = tmpNode->next;
			}
			tmpNode->next = node;
		}
		
		hashTable[tmpindex].value++;
		index++;
	}
}
(5)hash函數:可以自行設計優化,使得更均勻,衝突更少

int gethashcode(const char *str)
{
	int hashcode = 0;
	const char *tmpstr = str;
	
	while(*tmpstr)
	{
		//hashcode = (hashcode << 4) ^ (hashcode >> 28) + *tmpstr++;
		//hashcode = hashcode + *tmpstr++;
		//hashcode = *tmpstr++ + (hashcode << 6) + (hashcode << 16) - hashcode;
		//hashcode = hashcode * 1366 + *tmpstr++;
		hashcode = hashcode * 2345 + *tmpstr++;
	}
	
	return (hashcode & 0x7FFFFFFF);
}
(6)查找:是否存在待查IP地址

void findstr(const char *str)
{
	int hashcode = 0, index = 0, tmpindex = 0, mark = 0;
	struct hashNode *tmpNode = NULL;
	
	hashcode = gethashcode(str);
	tmpindex = hashcode % HASHTABLELEN;
	tmpNode = hashTable[tmpindex].next;
	while(tmpNode != NULL)
	{
		if(tmpNode->hashcode != hashcode)
		{
			
		}
		else if(!strcmp(tmpNode->str, str))
		{
			mark = 1;
			printf("find str: %s in %d index, and depth is %d\n", str, tmpindex, tmpNode->value);
		}
		tmpNode = tmpNode->next;
	}
	
	if(0 == mark)
	{
		printf("not find str: %s\n", str);
	}
}

2.4.自動化測試:使用批處理

@cl /nologo haship.c
@cl /nologo randip.c

@del haship.obj	randip.obj

@call randip.exe > randip.txt
@call haship.exe < randip.txt > result.txt

@del haship.exe randip.exe

@pause


發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章