從utf-8文件中讀取轉換成unicode

int CCheckSensitiveWord::ReadSenSitiveWordFromFile( CString &strPath)
{
	try
	{
		char ch[MAXTXTLEN]=("\0");	
	
		WideCharToMultiByte ( CP_OEMCP,NULL, strPath.GetBuffer(0),-1,ch,strPath.GetLength()*2,NULL,FALSE);
	
		FILE  *fpp = fopen(ch, ("r"));
		if (fpp == NULL)
		{
			return 0;
		}	

// 		int iFlag= IsUTF8File(strPath.GetBuffer(0));
// 		int iFileFlag = CP_ACP;
// 		if (iFlag > 0)
// 		{
// 			iFileFlag = CP_UTF8;
// 		}

		char chLine = 0;

		char chWord[MAXTXTLEN];
		TCHAR chhWord[MAXTXTLEN];
		memset(chWord,'\0', MAXTXTLEN);
		memset(chhWord, _T('\0'), MAXTXTLEN);
	
		
		char *pCh = chWord;
		char chT =10;
		int i = 0;
		m_ErrorNum = 0;       //初始化長度大於128的字符
		
		int iTemp = 0;
		do 
		{	

			fread(pCh, 1, 1, fpp);
			if (iTemp <3 )//如果是utf_8文件
			{
			//	i--;
				iTemp ++;
				
				continue;
			}
			i++;
			if (feof(fpp))
			{				
				*pCh = 10;
			}

			if (*pCh == 10)//表示讀取的是換行符
			{
				if(i != 1) //表示讀取了一行數據
				{	
					*pCh='\0';		
					memset(chhWord, _T('\0'), MAXTXTLEN);
					int wcsLen = ::MultiByteToWideChar(CP_UTF8, NULL, chWord, strlen(chWord), NULL, 0);

					MultiByteToWideChar( CP_UTF8, 0, chWord, strlen(chWord), chhWord, wcsLen )		;
					CString strTemp( chhWord ,wcsLen);
					int iFRet = FormatWord(strTemp);
					if (-2 == iFRet)
					{
						m_ErrorNum++;
						chhWord[wcsLen]=_T('\0');
						EBOOK_LOG(LOG_INFO,_T("senseitive word len > 128 \n word=%s"),chhWord);
					}
					else if (iFRet > 0)
					{
						m_SetWord.insert(strTemp);					
					}								
					memset(chWord,'\0', MAXTXTLEN);
				}
				i = 0;
				pCh = chWord;
				if (feof(fpp))
				{
					break;
				}
				continue;
			}
			pCh++;	


		} while (TRUE) ;

		fclose(fpp);
	}
	catch (CException* e)
	{
		return 0;//讀取異常
	}

	
	return 1;//成功
}


 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章