真正UTF-8與GB2312間的轉換(兼容windows和Linux)

UTF-8與GB2312間的轉換作者：曾劉彬中文轉碼是程序員可能經常遇到的一個問題，鄙人在這方面有些心得，故在此拋磚引玉了。我在網上看到好多關於UTF-8與BG2312間互相轉換都用一下的方法（爲了方便，以後我稱之爲“拼湊法”）： // 把UTF-8轉換成Unicode void CChineseCodeLib::UTF_8ToUnicode(WCHAR* pOut,char *pText) { char* uchar = (char *)pOut; uchar[1] = ((pText[0] & 0x0F) << 4) + ((pText[1] >> 2) & 0x0F); uchar[0] = ((pText[1] & 0x03) << 6) + (pText[2] & 0x3F); return; } // Unicode 轉換成UTF-8 void CChineseCodeLib::UnicodeToUTF_8(char* pOut,WCHAR* pText) { // 注意 WCHAR高低字的順序,低字節在前，高字節在後 char* pchar = (char *)pText; pOut[0] = (0xE0 | ((pchar[1] & 0xF0) >> 4)); pOut[1] = (0x80 | ((pchar[1] & 0x0F) << 2)) + ((pchar[0] & 0xC0) >> 6); pOut[2] = (0x80 | (pchar[0] & 0x3F)); return; } “拼湊法”可以成功的轉換大部分的UTF-8編碼，不過作爲一個負責的人，我想指出其中的缺陷：真正的UTF-8的編碼規則如下： U-00000000 - U-0000007F: 0xxxxxxx U-00000080 - U-000007FF: 110xxxxx 10xxxxxx U-00000800 - U-0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx U-00010000 - U-001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx U-00200000 - U-03FFFFFF: 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx U-04000000 - U-7FFFFFFF: 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 可見UTF-8 編碼字符理論上可以是1 - 6 個字節長，而“拼湊法”只處理了1字節和3字節兩種編碼方式。有的人可能會說：16 位 BMP 字符最多只用到 3 字節長。這沒錯，拼湊發也沒處理2字節編碼方式。所以我得出結論：“拼湊法”是不安全的。言歸正傳，其實UTF-8與GB2312之間的轉化不用弄得那麼複雜。在windows下用這兩個現成的函數，通過不同的參數就能實現。 ::WideCharToMultiByte（。。。） ::MultiByteToWideChar（。。。）若在Linux下，則用iconv命令實現。具體代碼如下：如果是windows下，則：#define __cdn_win32_platform__ #define CP_GB2312 20936 class CodingTransformer { public: //GB2312 轉爲 UTF-8 int UTF_8ToGB2312(char* pOut, int iBufSize, char *pText, int iLenth); //GB2312 轉爲 UTF-8 int GB2312ToUTF_8(char* pOut, int iBufSize,char *pText, int iLenth); }; int CodingTransformer::UTF_8ToGB2312(char* pOut, int iBufSize, char *pText, int pLen) { #ifdef __cdn_win32_platform__ WCHAR* pWtemp = new WCHAR[pLen]; int iWcharLenth = ::MultiByteToWideChar(CP_UTF8,0,pText,-1,pWtemp,pLen); if (0 == iWcharLenth) { DWORD dwLastErr = GetLastError(); printf("alexaroma:轉碼錯誤，錯誤號:%d/n", dwLastErr); delete pWtemp; return 0; } int iMultByteLenth = ::WideCharToMultiByte(CP_GB2312, NULL, pWtemp, iWcharLenth, pOut, iBufSize, NULL, NULL); if (0 == iMultByteLenth) { DWORD dwLastErr = GetLastError(); printf("alexaroma:轉碼錯誤，錯誤號:%d/n", dwLastErr); delete pWtemp; return 0; } delete pWtemp; return iMultByteLenth; #else//__cdn_win32_platform__ iconv_t cd; int rc; char **pin = &pText; char **pout = &pOut; int outlen = iBufSize; cd = iconv_open("GB2312","UTF-8"); if(cd==0)return -1; memset(pOut,0,strlen(pOut)); if(iconv(cd,pin,(size_t *)&pLen,pout,(size_t *)&outlen) == -1) { iconv_close(cd); return outlen; } iconv_close(cd); return -1; #endif//__cdn_win32_platform__ } int CodingTransformer::GB2312ToUTF_8(char* pOut, int iBufSize, char *pText, int pLen) { #ifdef __cdn_win32_platform__ WCHAR* pWtemp = new WCHAR[pLen]; int iWcharLenth = ::MultiByteToWideChar(CP_GB2312, MB_PRECOMPOSED, pText, -1, pWtemp, pLen); if (0 == iWcharLenth) { DWORD dwLastErr = GetLastError(); printf("alexaroma:轉碼錯誤，錯誤號:%d/n", dwLastErr); delete pWtemp; return 0; } int iMultByteLenth = ::WideCharToMultiByte(CP_UTF8, 0, pWtemp, iWcharLenth, pOut, iBufSize, NULL, NULL); if (0 == iMultByteLenth) { DWORD dwLastErr = GetLastError(); printf("alexaroma:轉碼錯誤，錯誤號:%d/n", dwLastErr); delete pWtemp; return 0; } delete pWtemp; return iMultByteLenth; #else//__cdn_win32_platform__ iconv_t cd; int rc; char **pin = &pText; char **pout = &pOut; int outlen = iBufSize; cd=iconv_open("UTF-8","GB2312"); if(cd==0)return -1; memset(pOut,0,strlen(pOut)); if(iconv(cd,pin,(size_t *)&pLen,pout,(size_t *)&outlen) == -1) { iconv_close(cd); return outlen; } iconv_close(cd); return -1; #endif//__cdn_win32_platform__ }

真正UTF-8與GB2312間的轉換(兼容windows和Linux)

再談23種設計模式（3）：行爲型模式（學習筆記）

Power Automate Desktop 安裝完，登錄後老是提示one driver 錯誤

微前端學習筆記(4):從微前端到微模塊之EMP與hel-micro方案探索

微前端學習筆記（1）：微前端總體架構概述，從微服務發微

985 碩士程序員，空窗 4 個月沒有 Offer！

一文搞懂 Spring 循環依賴

賽博鬥地主——使用大語言模型扮演Agent智能體玩牌類遊戲。

VScode右鍵打開(添加到右鍵)

記一次 .NET某工控視覺自動化系統卡死分析

WindowsServer--SQL Server搭建主從同步實現讀寫分離 - 事務性分發

面試題目-大數據量專題

計算機期刊介紹(zz)

C++ sizeof 使用規則及陷阱分析

數據查詢優化的方法

C++中extern “C”含義深層探索

Mac下配置sublime實現LaTeX

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結