1. 以下編碼對應的中文字符: 中文
//bom \xEF\xBB\xBF
qDebug() << QString::fromUtf8("\xEF\xBB\xBF\xE4\xB8\xAD\xE6\x96\x87");//utf8
qDebug() << QString("\u4e2d\u6587");//ascii
qDebug() << QString::fromLocal8Bit("\xD6\xD0\xCE\xC4");//ansi
QTextCodec *codec = QTextCodec::codecForName("utf-8"); //$$name = utf-8
//fromUnicode: unicode -> $$name類型
qDebug() << codec->fromUnicode("\u4e2d\u6587");//\xE4\xB8\xAD\xE6\x96\x87
qDebug() << QString::fromUtf8( codec->fromUnicode("\u4e2d\u6587") );//中文
//toUnicode: $$name類型 ->unicode
qDebug() << codec->toUnicode("\xE4\xB8\xAD\xE6\x96\x87");//中文
2. 判斷是否是utf8 //bom \xEF\xBB\xBF
bool IsUTF8(const void* pBuffer, long size)
{
bool IsUTF8 = true;
unsigned char* start = (unsigned char*)pBuffer;
unsigned char* end = (unsigned char*)pBuffer + size;
while (start < end)
{
if (*start < 0x80) // (10000000): 值小於0x80的爲ASCII字符
{
start++;
}
else if (*start < (0xC0)) // (11000000): 值介於0x80與0xC0之間的爲無效UTF-8字符
{
IsUTF8 = false;
break;
}
else if (*start < (0xE0)) // (11100000): 此範圍內爲2字節UTF-8字符
{
if (start >= end - 1)
{
break;
}
if ((start[1] & (0xC0)) != 0x80)
{
IsUTF8 = false;
break;
}
start += 2;
}
else if (*start < (0xF0)) // (11110000): 此範圍內爲3字節UTF-8字符
{
if (start >= end - 2)
{
break;
}
if ((start[1] & (0xC0)) != 0x80 || (start[2] & (0xC0)) != 0x80)
{
IsUTF8 = false;
break;
}
start += 3;
}
else
{
IsUTF8 = false;
break;
}
}
return IsUTF8;
}