原理:
已unicode爲中介
utf-8 ——》unicode——》gbk
gbk ——》unicode——》utf-8
知識點:
1、 unicode是一種“編碼”,
所謂編碼就是一個編號(數字)到字符的一種映射關係,
就僅僅是一種一對一的映射而已,
可以理解成一個很大的對應表格。
2、 GBK、UTF-8是一種“編碼格式”,
是用來序列化或存儲1中提到的那個“編號(數字)”的一種“格式”;
GBK和UTF-8都是用來序列化或存儲unicode編碼的數據的,
但是分別是2種不同的格式;
他們倆除了格式不一樣之外,
他們所關心的unicode編碼範圍也不一樣,
utf-8考慮了很多種不同國家的字符,
涵蓋整個unicode碼錶,
所以其存儲一個字符的編碼的時候,
使用的字節長度也從1字節到4字節不等;
而GBK只考慮中文——在unicode中的一小部分——的字符的編碼,
所以它算好了只要2個字節就能涵蓋到絕大多數常用中文(2個字節能表示6w多種字符),
所以它存儲一個字符的時候,
所用的字節長度是固定的;
鏈接:https://www.zhihu.com/question/20361462/answer/14899233
正確代碼
方法1:
//utf-8轉gbk
String clientStr = new String(str.getBytes("GBK"), "GBK");
//gbk轉utf-8
String clientStr = new String(str.getBytes("UTF-8"), "UTF-8");
方法2:
/**
* gbk與utf-8互轉
* 利用BASE64Encoder/BASE64Decoder實現互轉
* @param str
* @return
*/
private String charsetConvert(String str, String charset) {
try {
str = new sun.misc.BASE64Encoder().encode(str.getBytes(charset));
byte[] bytes = new sun.misc.BASE64Decoder().decodeBuffer(str);
str = new String(bytes, charset);
} catch(IOException e) {
e.printStackTrace();
}
return str;
}
錯誤代碼
/**
* gbk轉utf-8
* @param str
* @return
*/
public static String gbkToUtf8(String str) {
String utf8Str = "";
try {
String UnicodeStr = gbkToUnicode(str);
utf8Str = unicodeToUtf8(UnicodeStr);
} catch (Exception e) {
LogUtil.info("gbk轉utf-8 異常,原字符串返回!");
utf8Str = str;
}
return utf8Str;
}
/**
* utf-8轉gbk,實際測試中發現有部分漢字轉換不了
* @param str
* @return
*/
public static String utf8ToGbk(String str) {
String gbkStr = "";
try {
String UnicodeStr = utf8ToUnicode(str);
gbkStr = unicodeToGbk(UnicodeStr);
} catch (Exception e) {
LogUtil.info("utf-8 轉 gbk異常,原字符串返回!");
gbkStr = str;
}
return gbkStr;
}
上面utf-8轉gbk,實際測試中發現有部分漢字轉換不了
/**
* gbk轉unicode
* @param str
* @return
*/
public static String gbkToUnicode(String str) {
StringBuffer result = new StringBuffer();
for (int i = 0; i < str.length(); i++) {
char chr1 = (char) str.charAt(i);
if ((chr1 & (0x00FF)) == chr1) {
result.append(chr1);
continue;
}
result.append("\\u" + Integer.toHexString((int) chr1));
}
return result.toString();
}
/**
* unicode轉gbk
* @param dataStr
* @return
*/
public static String unicodeToGbk(String dataStr) {
int index = 0;
StringBuffer buffer = new StringBuffer();
int li_len = dataStr.length();
while (index < li_len) {
if (index >= li_len - 1
|| !"\\u".equals(dataStr.substring(index, index + 2))) {
buffer.append(dataStr.charAt(index));
index++;
continue;
}
String charStr = "";
charStr = dataStr.substring(index + 2, index + 6);
char letter = (char) Integer.parseInt(charStr, 16);
buffer.append(letter);
index += 6;
}
return buffer.toString();
}
/**
* utf-8轉unicode
* @param inStr
* @return
*/
public static String utf8ToUnicode(String inStr) {
char[] myBuffer = inStr.toCharArray();
StringBuffer sb = new StringBuffer();
for (int i = 0; i < inStr.length(); i++) {
UnicodeBlock ub = UnicodeBlock.of(myBuffer[i]);
if (ub == UnicodeBlock.BASIC_LATIN) {
sb.append(myBuffer[i]);
} else if (ub == UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS) {
int j = (int) myBuffer[i] - 65248;
sb.append((char) j);
} else {
short s = (short) myBuffer[i];
String hexS = Integer.toHexString(s);
String unicode = "\\u" + hexS;
sb.append(unicode.toLowerCase());
}
}
return sb.toString();
}
/**
* unicode轉utf-8
* @param theString
* @return
*/
public static String unicodeToUtf8(String theString) {
char aChar;
int len = theString.length();
StringBuffer outBuffer = new StringBuffer(len);
for (int x = 0; x < len;) {
aChar = theString.charAt(x++);
if (aChar == '\\') {
aChar = theString.charAt(x++);
if (aChar == 'u') {
int value = 0;
for (int i = 0; i < 4; i++) {
aChar = theString.charAt(x++);
switch (aChar) {
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
value = (value << 4) + aChar - '0';
break;
case 'a':
case 'b':
case 'c':
case 'd':
case 'e':
case 'f':
value = (value << 4) + 10 + aChar - 'a';
break;
case 'A':
case 'B':
case 'C':
case 'D':
case 'E':
case 'F':
value = (value << 4) + 10 + aChar - 'A';
break;
default:
throw new IllegalArgumentException(
"Malformed \\uxxxx encoding.");
}
}
outBuffer.append((char) value);
} else {
if (aChar == 't')
aChar = '\t';
else if (aChar == 'r')
aChar = '\r';
else if (aChar == 'n')
aChar = '\n';
else if (aChar == 'f')
aChar = '\f';
outBuffer.append(aChar);
}
} else
outBuffer.append(aChar);
}
return outBuffer.toString();
}
參照:原博客