網上典型的代碼是這一段:
char* ConvertEnc( char *encFrom, char *encTo, const char * in)
{
static char bufin[1024], bufout[1024], *sin, *sout;
int lenin, lenout, ret;
iconv_t c_pt;
if ((c_pt = iconv_open(encTo, encFrom)) == (iconv_t)-1)
{
printf("iconv_open false: %s ==> %s", encFrom, encTo);
return NULL;
}
iconv(c_pt, NULL, NULL, NULL, NULL);
lenin = strlen(in) + 1;
lenout = 1024;
sin = (char *)in;
sout = bufout;
ret = iconv(c_pt, (const char**)&sin, (size_t *)&lenin, &sout, (size_t *)&lenout);
if (ret == -1)
{
return NULL;
}
iconv_close(c_pt);
return bufout;
}
這段代碼裏面有3個問題:1.沒有重複使用初始化後的iconv_t。
2.lenin = strlen(in) + 1這行代碼在某些情況下有問題。
3.轉換緩衝區是一個固定值。
我根據iconv官網上的文檔重寫了相關代碼,官網地址如下:
http://www.gnu.org/savannah-checkouts/gnu/libiconv
代碼如下
#ifndef _ICONV_PAIR_HXX_
#define _ICONV_PAIR_HXX_
#include <string>
#include <iconv/include/iconv.h>
class IconvPair
{
enum
{
INIT_BUFFER = 4096
};
public:
IconvPair(const std::string &toCode, const std::string &fromCode);
~IconvPair();
private:
IconvPair(const IconvPair&);
IconvPair& operator=(const IconvPair&);
public:
friend bool operator<(const IconvPair &lhs, const IconvPair &rhs)
{
if (lhs.mToCode < rhs.mToCode)
{
return true;
}
else if (lhs.mToCode > rhs.mToCode)
{
return false;
}
return lhs.mFromCode <=rhs.mFromCode ? true : false;
}
const char* buffer() const {return mBuffer;}
size_t bufferLen() const {return mBufferLen;}
size_t contentLen() const {return mContentLen;}
size_t convert(const char **inBuffer, size_t *inBytesLeft);
private:
void incBuffer();
private:
std::string mToCode;
std::string mFromCode;
iconv_t mIconv;
char *mBuffer;
size_t mBufferLen;
size_t mContentLen;
};
#endif
#include "statistics/IconvPair.hxx"
#include <cstdlib>
#include <cerrno>
#include <cassert>
#include <climits>
#include <exception>
IconvPair::IconvPair(const std::string &toCode, const std::string &fromCode)
: mToCode(toCode)
, mFromCode(fromCode)
, mIconv(reinterpret_cast<iconv_t>(-1))
, mBuffer(NULL)
, mBufferLen(0)
, mContentLen(0)
{
if (mToCode.empty() || mFromCode.empty())
{
throw std::exception();
}
mBufferLen = INIT_BUFFER;
mBuffer = reinterpret_cast<char*>(malloc(mBufferLen));
if (NULL == mBuffer)
{
throw std::bad_alloc();
}
mIconv = iconv_open(toCode.c_str(), fromCode.c_str());
if (reinterpret_cast<iconv_t>(-1) == mIconv)
{
throw std::exception();
}
}
IconvPair::~IconvPair()
{
if (reinterpret_cast<iconv_t>(-1) != mIconv)
{
iconv_close(mIconv);
}
free(mBuffer);
}
size_t IconvPair::convert(const char **inBuffer, size_t *inBytesLeft)
{
assert((NULL != mBuffer) && (reinterpret_cast<iconv_t>(-1) != mIconv));
assert((NULL != inBuffer) && (NULL != *inBuffer) && (NULL != inBytesLeft));
iconv(mIconv, NULL, NULL, NULL, NULL);
char *outBuffer = mBuffer;
size_t outBytesLeft = mBufferLen;
size_t ret = iconv(mIconv, inBuffer, inBytesLeft, &outBuffer,
&outBytesLeft);
while ((UINT_MAX == ret) && (0 == outBytesLeft)/*(E2BIG == errno)*/)
{
size_t oldBufferLen = mBufferLen;
incBuffer();
outBuffer = mBuffer + oldBufferLen;
outBytesLeft = mBufferLen - oldBufferLen;
ret = iconv(mIconv, inBuffer, inBytesLeft, &outBuffer,
&outBytesLeft);
}
mContentLen = mBufferLen - outBytesLeft;
if (0 == outBytesLeft)
{
incBuffer();
}
mBuffer[mContentLen] = 0;
return ret;
}
void IconvPair::incBuffer()
{
mBufferLen *= 2;
mBuffer = reinterpret_cast<char*>(realloc(mBuffer, mBufferLen));
if (NULL == mBuffer)
{
throw std::bad_alloc();
}
}
測試代碼塊如下:
void UnitTest::testIconv()
{
const size_t pageSize = 4096;
IconvPair g2u("utf-8", "gb2312");
char array[pageSize] = {'a'};
const char *inArray = array;
size_t inArrayLen = pageSize;
size_t ret = g2u.convert(&inArray, &inArrayLen);
CPPUNIT_ASSERT((0 == ret) && ((pageSize * 2) == g2u.bufferLen()));
char *buffer = reinterpret_cast<char*>(malloc(1024 * 1024 * sizeof(char)));
CPPUNIT_ASSERT(NULL != buffer);
FILE *f = fopen("league2012b.htm", "r");
CPPUNIT_ASSERT(NULL != f);
size_t fileLen = 0;
while (!feof(f))
{
fileLen += fread(buffer + fileLen, 1, pageSize, f);
}
fclose(f);
const char *bufferIn = buffer;
ret = g2u.convert(&bufferIn, &fileLen);
CPPUNIT_ASSERT(0 == ret);
f = fopen("league2012b.txt", "w");
CPPUNIT_ASSERT(NULL != f);
fwrite(g2u.buffer(), 1, g2u.contentLen(), f);
fclose(f);
free(buffer);
}
由於直接將測試代碼塊中的league2012b.htm複製到CSDN的代碼塊中會有問題,所以只好請大家直接用快盤共享出來的鏈接自己下載了。鏈接地址如下
http://www.kuaipan.cn/file/id_19631556775674797.htm