正確的iconv使用方法

網上典型的代碼是這一段:

char* ConvertEnc( char *encFrom, char *encTo, const char * in)
{
	static char bufin[1024], bufout[1024], *sin, *sout;
	int lenin, lenout, ret;
	iconv_t c_pt;

	if ((c_pt = iconv_open(encTo, encFrom)) == (iconv_t)-1)
	{
		printf("iconv_open false: %s ==> %s", encFrom, encTo);
		return NULL;
	}
	iconv(c_pt, NULL, NULL, NULL, NULL);

	lenin  = strlen(in) + 1;
	lenout = 1024;
	sin    = (char *)in;
	sout   = bufout;
	ret = iconv(c_pt, (const char**)&sin, (size_t *)&lenin, &sout, (size_t *)&lenout);

	if (ret == -1)
	{
		return NULL;
	}
	iconv_close(c_pt);

	return bufout;
}
這段代碼裏面有3個問題:
1.沒有重複使用初始化後的iconv_t。
2.lenin  = strlen(in) + 1這行代碼在某些情況下有問題。
3.轉換緩衝區是一個固定值。


我根據iconv官網上的文檔重寫了相關代碼,官網地址如下:

http://www.gnu.org/savannah-checkouts/gnu/libiconv


代碼如下

#ifndef _ICONV_PAIR_HXX_
#define _ICONV_PAIR_HXX_

#include <string>
#include <iconv/include/iconv.h>

class IconvPair
{
	enum 
	{
		INIT_BUFFER = 4096
	};

public:
	IconvPair(const std::string &toCode, const std::string &fromCode);
	~IconvPair();

private:
	IconvPair(const IconvPair&);
	IconvPair& operator=(const IconvPair&);

public:
	friend bool operator<(const IconvPair &lhs, const IconvPair &rhs)
	{
		if (lhs.mToCode < rhs.mToCode)
		{
			return true;
		}
		else if (lhs.mToCode > rhs.mToCode)
		{
			return false;
		}

		return lhs.mFromCode <=rhs.mFromCode ? true : false;
	}

	const char* buffer() const {return mBuffer;}
	size_t bufferLen() const {return mBufferLen;}
	size_t contentLen() const {return mContentLen;}

	size_t convert(const char **inBuffer, size_t *inBytesLeft);

private:
	void incBuffer();

private:
	std::string mToCode;
	std::string mFromCode;
	iconv_t mIconv;
	char *mBuffer;
	size_t mBufferLen;
	size_t mContentLen;
};

#endif

#include "statistics/IconvPair.hxx"
#include <cstdlib>
#include <cerrno>
#include <cassert>
#include <climits>
#include <exception>

IconvPair::IconvPair(const std::string &toCode, const std::string &fromCode)
	: mToCode(toCode)
	, mFromCode(fromCode)
	, mIconv(reinterpret_cast<iconv_t>(-1))
	, mBuffer(NULL)
	, mBufferLen(0)
	, mContentLen(0)
{
	if (mToCode.empty() || mFromCode.empty())
	{
		throw std::exception();
	}

	mBufferLen = INIT_BUFFER;
	mBuffer = reinterpret_cast<char*>(malloc(mBufferLen));
	if (NULL == mBuffer)
	{
		throw std::bad_alloc();
	}

	mIconv = iconv_open(toCode.c_str(), fromCode.c_str());
	if (reinterpret_cast<iconv_t>(-1) == mIconv)
	{
		throw std::exception();
	}
}

IconvPair::~IconvPair()
{
	if (reinterpret_cast<iconv_t>(-1) != mIconv)
	{
		iconv_close(mIconv);
	}

	free(mBuffer);
}

size_t IconvPair::convert(const char **inBuffer, size_t *inBytesLeft)
{
	assert((NULL != mBuffer) && (reinterpret_cast<iconv_t>(-1) != mIconv));
	assert((NULL != inBuffer) && (NULL != *inBuffer) && (NULL != inBytesLeft));

	iconv(mIconv, NULL, NULL, NULL, NULL);

	char *outBuffer = mBuffer;
	size_t outBytesLeft = mBufferLen;
	size_t ret = iconv(mIconv, inBuffer, inBytesLeft, &outBuffer,
		&outBytesLeft);
	while ((UINT_MAX == ret) && (0 == outBytesLeft)/*(E2BIG == errno)*/)
	{
		size_t oldBufferLen = mBufferLen;
		incBuffer();

		outBuffer = mBuffer + oldBufferLen;
		outBytesLeft = mBufferLen - oldBufferLen;
		ret = iconv(mIconv, inBuffer, inBytesLeft, &outBuffer,
			&outBytesLeft);
	}

	mContentLen = mBufferLen - outBytesLeft;
	if (0 == outBytesLeft)
	{
		incBuffer();
	}
	mBuffer[mContentLen] = 0;
	return ret;
}

void IconvPair::incBuffer()
{
	mBufferLen *= 2;
	mBuffer = reinterpret_cast<char*>(realloc(mBuffer, mBufferLen));
	if (NULL == mBuffer)
	{
		throw std::bad_alloc();
	}
}



測試代碼塊如下:

void UnitTest::testIconv()
{
	const size_t pageSize = 4096;
	IconvPair g2u("utf-8", "gb2312");

	char array[pageSize] = {'a'};
	const char *inArray = array;
	size_t inArrayLen = pageSize;
	size_t ret = g2u.convert(&inArray, &inArrayLen);
	CPPUNIT_ASSERT((0 == ret) && ((pageSize * 2) == g2u.bufferLen()));

	char *buffer = reinterpret_cast<char*>(malloc(1024 * 1024 * sizeof(char)));
	CPPUNIT_ASSERT(NULL != buffer);

	FILE *f = fopen("league2012b.htm", "r");
	CPPUNIT_ASSERT(NULL != f);
	size_t fileLen = 0;
	while (!feof(f))
	{
		fileLen += fread(buffer + fileLen, 1, pageSize, f);
	}
	fclose(f);

	const char *bufferIn = buffer;
	ret = g2u.convert(&bufferIn, &fileLen);
	CPPUNIT_ASSERT(0 == ret);

	f = fopen("league2012b.txt", "w");
	CPPUNIT_ASSERT(NULL != f);
	fwrite(g2u.buffer(), 1, g2u.contentLen(), f);
	fclose(f);

	free(buffer);
}

由於直接將測試代碼塊中的league2012b.htm複製到CSDN的代碼塊中會有問題,所以只好請大家直接用快盤共享出來的鏈接自己下載了。鏈接地址如下

http://www.kuaipan.cn/file/id_19631556775674797.htm


發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章