題目要求:給你兩個txt文檔,一個是發送垃圾郵件的黑名單郵箱地址,另一個是待檢測的郵箱地址,現要求給出一種方案,儘快將黑名單當中的地址從待測郵箱地址中分離出來。
大家都知道,對於這一類問題,散列表所給出的解決方案雖然需要消耗大量的物理空間,但在時間上卻有很大的優勢,不同的哈希函數有不同的效果,在特定的和環境下都能發揮出不一樣的效果。在這裏作者給出了十餘種解決方式,並進行了測試,具體如下。。。。不多說,上C代碼:
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#define N 10000005
#define MOD 10000007
typedef struct node
{
char address[20];
struct node *next;
}node, *ptr;
node str[N] = { 0 };
/*unsigned int Hash(char* str) // 154.4s
{
unsigned int hash = 0;
unsigned int x = 0;
unsigned int i = 0;
for(i = 0; i < sizeof(str); str++, i++)
{
hash = (hash << 4) + (*str);
if((x = hash & 0xF0000000L) != 0)
{
hash ^= (x >> 24);
}
hash &= ~x;
}
return hash;
} */
/*unsigned int Hash(char* str) // 148.9s
{
const unsigned int BitsInUnsignedInt = (unsigned int)(sizeof(unsigned int) * 8);
const unsigned int ThreeQuarters = (unsigned int)((BitsInUnsignedInt * 3) / 4);
const unsigned int OneEighth = (unsigned int)(BitsInUnsignedInt / 8);
const unsigned int HighBits = (unsigned int)(0xFFFFFFFF) << (BitsInUnsignedInt - OneEighth);
unsigned int hash = 0;
unsigned int test = 0;
unsigned int i = 0;
for(i = 0; i < sizeof(str); str++, i++)
{
hash = (hash << OneEighth) + (*str);
if((test = hash & HighBits) != 0)
{
hash = (( hash ^ (test >> ThreeQuarters)) & (~HighBits));
}
}
return hash;
} */
/*unsigned int Hash(char* str) // 151.7s
{
unsigned int hash = 1315423911;
unsigned int i = 0;
for(i = 0; i < sizeof(str); str++, i++)
{
hash ^= ((hash << 5) + (*str) + (hash >> 2));
}
return hash;
} */
/*unsigned int Hash(char* str) // 148.1s
{
unsigned int b = 378551;
unsigned int a = 63689;
unsigned int hash = 0;
unsigned int i = 0;
for(i = 0; i < sizeof(str); str++, i++)
{
hash = hash * a + (*str);
a = a * b;
}
return hash;
} */
/*unsigned int Hash(char* str) // 155.8s
{
unsigned int hash = 0;
unsigned int x = 0;
unsigned int i = 0;
for(i = 0; i <sizeof(str); str++, i++)
{
hash = (hash << 4) + (*str);
if((x = hash & 0xF0000000L) != 0)
{
hash ^= (x >> 24);
}
hash &= ~x;
}
return hash;
} */
/*unsigned int Hash(char* str) // 152.9s
{
unsigned int seed = 131; // 31 131 1313 13131 131313 etc..
unsigned int hash = 0;
unsigned int i = 0;
for(i = 0; i <sizeof(str); str++, i++)
{
hash = (hash * seed) + (*str);
}
return hash;
} */
/*unsigned int Hash(char str[]) 200s
{
unsigned int b = 217;
unsigned int a = 119;
unsigned int hash = 0;
unsigned int i = 0;
for(i = 0; i <15; i++)
{
hash = hash + (str[i]+str[i+1]+str[i+2])*a +b*str[i];
a = a + str[i];
}
hash=hash-2800000;
return hash;
} */
/*unsigned int Hash(char* str) //146.4s
{
unsigned int hash = 0;
unsigned int i = 0;
for(i = 0; i < sizeof(str); str++, i++)
{
hash = (*str) + (hash << 6) + (hash << 16) - hash;
}
return hash;
} */
/*unsigned int Hash(char* str) //149.6s
{
unsigned int hash = 5381;
unsigned int i = 0;
for(i = 0; i < sizeof(str); str++, i++)
{
hash = ((hash << 5) + hash) + (*str);
}
return hash;
} */
/*unsigned int Hash(char* str) //147s
{
unsigned int hash = sizeof(str);
unsigned int i = 0;
for(i = 0; i < sizeof(str); str++, i++)
{
hash = ((hash << 5) ^ (hash >> 27)) ^ (*str);
}
return hash;
} */
/*unsigned int Hash(char* str) //141.2s
{
unsigned int hash = 0;
unsigned int i = 0;
for(i = 0; i < sizeof(str); str++, i++)
{
hash = hash << 7 ^ (*str);
}
return hash;
} */
/*unsigned int Hash(char* str) //151.3s
{
const unsigned int fnv_prime = 0x811C9DC5;
unsigned int hash = 0;
unsigned int i = 0;
for(i = 0; i < sizeof(str); str++, i++)
{
hash *= fnv_prime;
hash ^= (*str);
}
return hash;
} */
unsigned int Hash(char* str) //140.7s
{
unsigned int hash = 0xAAAAAAAA;
unsigned int i = 0;
for(i = 0; i < sizeof(str); str++, i++)
{
hash ^= ((i & 1) == 0) ? ( (hash << 7) ^ (*str) * (hash >> 3)) :
(~((hash << 11) + ((*str) ^ (hash >> 5))));
}
return hash;
}
void read(node *str)
{
FILE *fp;
char str_read[20];
unsigned int hash_value; //鍝堝笇鍊?
unsigned int hash_solved; //MOD鍚庣殑鍝堝笇
ptr p, q;
int flag = 0;
fp = fopen("黑名單.txt", "r");
if (fp == NULL)
{
printf("cannot open address.txt file!\n");
exit(0);
}
while (fscanf(fp, "%s", str_read) != EOF)
{
hash_value = Hash(str_read);
hash_solved = hash_value%MOD;
p = &str[hash_solved];
if (strcmp(p->address, str_read) == 0)
continue;
if (strcmp(p->address, "\0") == 0)strcpy(p->address, str_read);
else
{
while (p->next != NULL)
{
if (strcmp(p->address, str_read) == 0)
flag = 1;
p = p->next;
}
if (flag == 1)continue;
q = (ptr)malloc(sizeof(node));
q->next = NULL;
strcpy(q->address, str_read);
p->next = q;
flag = 0;
}
}
fclose(fp);
}
int search(char *str_search, node *str)
{
int flag = 0;
ptr p;
unsigned int hash_value;
unsigned int hash_solved;
hash_value = Hash(str_search);
hash_solved = hash_value%MOD;
p = &str[hash_solved];
if (strcmp(p->address, str_search) == 0) flag = 1;
while (p->next != NULL)
{
if (strcmp(str[hash_solved].address, str_search) == 0)
flag = 1;
p = p->next;
}
return flag;
}
int main()
{
int a;
FILE *fp, *fp_1;
char str_search[20];
fp = fopen("待測地址.txt", "r");
if (fp == NULL){
printf("cannot open this input.txt file!\n");
exit(0);
}
fp_1 = fopen("F:\\test\\test3(2.1.1).txt", "w");
if (fp == NULL)
{
printf("cannot open this output.txt file!\n");
exit(0);
}
read(str);
while (fscanf(fp, "%s", str_search) != EOF)
{
a = search(str_search, str);
fprintf(fp_1, "%s\t\t\t%d\n", str_search, a);
}
fclose(fp);
fclose(fp_1);
return 0;
}