哈希表對字符串的高效處理

哈希表對字符串的高效處理

哈希表（散列表）是一種非常高效的查找數據結構，在原理上也與其他的查找不盡相同，它迴避了關鍵字之間反覆比較的繁瑣，而是直接一步到位查找結果。當然，這也帶來了記錄之間沒有任何關聯的弊端。應該說，散列表對於那些查找性能要求高，記錄之間關係無要求的數據有非常好的適用性。注意對散列函數的選擇和處理衝突的方法。

Hash表是使用 O(1) 時間進行數據的插入、刪除和查找，但是 hash 表不保證表中數據的有序性，這樣在 hash 表中查找最大數據或者最小數據的時間是 O(N) 。

/* 字符串中完成過濾重複字符的功能，

【輸入】:1.常字符串；2.字符串長度；3.【out】用於輸出過濾後的字符串.

【輸出】：過濾後的字符串。

思路1, 循環判定法。第1步，先記錄字符串中第1個字符；第2步，然後從第2個字符開始，判定其和其前面的字符是否相同，不相同的話，則統計進去；相同的話則繼續遍歷，直到字符串末尾（遇到’\0’）。時間複雜度：O（n²）。

思路2, 哈希表過濾法。第1步，初始化一個哈希表，用以存儲字符（key）及字符出現的次數；第2步，遍歷哈希表，進行統計計數；第3步，輸出統計次數爲1及統計次數多餘1的（輸出1次）。時間複雜度：O（n）。

//循環判定法過濾掉重複字符

[cpp]view
plaincopy

void stringFilter(const char*pInputStr, long lInputLen, char *pOutputStr)  

{  

       if(pInputStr== NULL || lInputLen == 0 || pOutputStr == NULL)  

       {  

              return;  

       }  

       intnCnt = 0;  

       *pOutputStr= pInputStr[0];            //先處理第一個  

       ++nCnt;  

       intnNotEqualCnt = 0;                 //統計計數  

       for(inti = 1; i < lInputLen; i++)  

       {  

              nNotEqualCnt= 0;  

              for(intj = i-1; j >=0; j--)  

              {  

                     if(pInputStr[i]!= pInputStr[j])  

                     {  

                            ++nNotEqualCnt;  

                     }  

              }  

              if(nNotEqualCnt== i)  //和前面的都不一樣.  

              {  

                     pOutputStr[nCnt++]= pInputStr[i];  

              }  

       }//endfor  

       pOutputStr[nCnt]= '\0';  

}

//哈希表法過濾字符串中的重複字符

[cpp]view
plaincopy

void stringFilterFast(const char*pInputStr, long lInputLen, char *pOutputStr)  

{  

       charrstChar = '\0';  

       boolbNotRepeatFound = false;  

       constunsigned int size = 256;  

       unsignedint hashTable[size];  

       constchar* pHashKey = pInputStr;  

       intoutPutCnt = 0;  

       if(pInputStr== NULL)  

       {  

              return;  

       }  

       //初始化哈希表  

       for(unsignedint i = 0; i < size; i++)  

       {  

              hashTable[i]= 0;  

       }  

       //將pString讀入到哈希表中  

       while(*pHashKey!= '\0')  

       {  

              cout<< *pHashKey << "\t";  

              hashTable[*pHashKey]++;    //統計計數  

              pHashKey++;  

       }      

       //讀取哈希表，對只出現1次的進行存儲，對出現多次的進行1次存儲。  

       pHashKey= pInputStr;  

       while(*pHashKey!= '\0')  

       {  

              if((hashTable[*(pHashKey)])== 1)   //僅有一次，  

              {  

                     pOutputStr[outPutCnt++]= *pHashKey;  

              }  

              elseif((hashTable[*(pHashKey)]) > 1) // 多餘一次，統計第一次  

              {  

                     pOutputStr[outPutCnt++]= *pHashKey;  

                     hashTable[*(pHashKey)]= 0;  

              }  

              pHashKey++;  

       }  

       pOutputStr[outPutCnt]= '\0';  

}  

int main()  

{  

       constchar* strSrc = "desdefedeffdsswwwwwwwwwwdd";//"desdefedeffdssw";  

       char*strRst =new char[strlen(strSrc)+1];  

       stringFilter(strSrc,strlen(strSrc), strRst);  

       cout<< strRst << endl;  

       return0;  

}

//哈希表法查找字符串中第一個不重複的字符

【功能】：查找字符串中第一個不重複的字符。

【輸入】：字符串。

【輸出】：第一個不重複的字符。

時間複雜度O（n），思路類似於上面的哈希表過濾法。

[cpp]view
plaincopy

char FirstNotRepeatingChar(constchar* pString)  

{  

       charrstChar = '\0';  

       boolbNotRepeatFound = false;  

       constunsigned int size = 256;  

       unsignedchar hashTable[size];  

       constchar* pHashKey = pString;  

       if(pString== NULL)  

       {  

              returnrstChar;  

       }  

       //初始化哈希表  

       for(unsignedint i = 0; i < size; i++)  

       {  

              hashTable[i] = 0;  

       }  

       //將pString存入到哈希表中  

       while(*pHashKey!= '\0')  

       {  

              hashTable[*(pHashKey++)]++;    //統計計數  

       }  

       //讀取哈希表，找到第一個=1的字符，bNotRepeatFound用於查找。.  

       pHashKey= pString;  

       while(*pHashKey!= '\0')  

       {  

              if((hashTable[*(pHashKey)]) == 1)  

              {  

                     bNotRepeatFound= true;  

                     rstChar= *pHashKey;  

                     break;  

              }  

              pHashKey++;  

       }  

       if(bNotRepeatFound)  

       {  

              cout<< "The first not Repeate char is " << rstChar <<endl;  

       }  

       else  

       {  

              cout<< "The first not Repeate char is not Exist " << endl;  

       }  

       returnrstChar;  

}  

int main()  

{  

       constchar* strSrc = "google";  

       constchar* strSrc2 = "[email protected]";  

       constchar* strSrc3 = "aabbccddeeff";  

       constchar* strsrc4 = "11111111";    

       constchar* strArray[4] = {strSrc, strSrc2, strSrc3, strsrc4};  

      for(inti = 0; i < 4; i++)  

      {  

              FirstNotRepeatingChar(strArray[i]);  

       }  

       return0;  

}