學習筆記:Unicode與UTF8互轉

 int UTF2Uni(const char* src, std::wstring &t)
{
 if (src == NULL)
 {
  return -1;
 }

 int size_s = strlen(src);
 int size_d = size_s + 10;          //?
 
 wchar_t *des = new wchar_t[size_d];
 memset(des, 0, size_d * sizeof(wchar_t));
 
 int s = 0, d = 0;
 bool toomuchbyte = true; //set true to skip error prefix.
 
 while (s < size_s && d < size_d)
 {
  unsigned char c = src[s];
  if ((c & 0x80) == 0)
  {
   des[d++] += src[s++];
  }
  else if((c & 0xE0) == 0xC0)  ///< 110x-xxxx 10xx-xxxx
  {
   WCHAR &wideChar = des[d++];
   wideChar  = (src[s + 0] & 0x3F) << 6;
   wideChar |= (src[s + 1] & 0x3F);
   
   s += 2;
  }
  else if((c & 0xF0) == 0xE0)  ///< 1110-xxxx 10xx-xxxx 10xx-xxxx
  {
   WCHAR &wideChar = des[d++];
   
   wideChar  = (src[s + 0] & 0x1F) << 12;
   wideChar |= (src[s + 1] & 0x3F) << 6;
   wideChar |= (src[s + 2] & 0x3F);
   
   s += 3;
  }
  else if((c & 0xF8) == 0xF0)  ///< 1111-0xxx 10xx-xxxx 10xx-xxxx 10xx-xxxx
  {
   WCHAR &wideChar = des[d++];
   
   wideChar  = (src[s + 0] & 0x0F) << 18;
   wideChar  = (src[s + 1] & 0x3F) << 12;
   wideChar |= (src[s + 2] & 0x3F) << 6;
   wideChar |= (src[s + 3] & 0x3F);
   
   s += 4;
  }
  else
  {
   WCHAR &wideChar = des[d++]; ///< 1111-10xx 10xx-xxxx 10xx-xxxx 10xx-xxxx 10xx-xxxx
   
   wideChar  = (src[s + 0] & 0x07) << 24;
   wideChar  = (src[s + 1] & 0x3F) << 18;
   wideChar  = (src[s + 2] & 0x3F) << 12;
   wideChar |= (src[s + 3] & 0x3F) << 6;
   wideChar |= (src[s + 4] & 0x3F);

   s += 5;
  }
 }
 
 t = des;
 delete[] des;
 des = NULL;
 
 return 0;
}

 

int Uni2UTF(wchar_t wchar, char *utf8)
{
 if (utf8 == NULL) {
  return -1;
 }
 int len = 0;
 int size_d = 8;
 
 if (wchar < 0x80)
 {  //
  //length = 1;
  utf8[len++] = (char)wchar;
 }
 else if(wchar < 0x800)
 {
  //length = 2;
  
  if (len + 1 >= size_d)
   return -1;
  
  utf8[len++] = 0xc0 | ( wchar >> 6 );
  utf8[len++] = 0x80 | ( wchar & 0x3f );
 }
 else if(wchar < 0x10000 )
 {
  //length = 3;
  if (len + 2 >= size_d)
   return -1;
  
  utf8[len++] = 0xe0 | ( wchar >> 12 );
  utf8[len++] = 0x80 | ( (wchar >> 6) & 0x3f );
  utf8[len++] = 0x80 | ( wchar & 0x3f );
 }
 else if( wchar < 0x200000 )
 {
  //length = 4;
  if (len + 3 >= size_d)
   return -1;
  
  utf8[len++] = 0xf0 | ( (int)wchar >> 18 );
  utf8[len++] = 0x80 | ( (wchar >> 12) & 0x3f );
  utf8[len++] = 0x80 | ( (wchar >> 6) & 0x3f );
  utf8[len++] = 0x80 | ( wchar & 0x3f );
 }
 return len;
}

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章