之前做emoji表情處理的時候在網上找到這個工具類源碼,複製下來後發現反解析是不正確,不知這個是作者故意設的小坑讓我們去讀代碼還是copy的人粗心大意,經仔細查看,送上正確的源碼,修正內容爲83-90行:
/**
* <pre>
* 本類的主要功能是將帶有emoji的字符串,格式化成unicode字符串,並且提供可見unicode字符反解成emoji字符
*
*
* 相關識知點:
* <b>
* Unicode平面,
* BMP的字符可以使用charAt(index)來處理,計數可以使用length()
* 其它平面字符,需要用codePointAt(index),計數可以使用codePointCount(0,str.lenght())</b>
*
* Unicode可以邏輯分爲17平面(Plane),每個平面擁有65536( = 216)個代碼點,雖然目前只有少數平面被使
* 用。
* 平面0 (0000–FFFF): 基本多文種平面(Basic Multilingual Plane, BMP).
* 平面1 (10000–1FFFF): 多文種補充平面(Supplementary Multilingual Plane, SMP).
* 平面2 (20000–2FFFF): 表意文字補充平面(Supplementary Ideographic Plane, SIP).
* 平面3 (30000–3FFFF): 表意文字第三平面(Tertiary Ideographic Plane, TIP).
* 平面4 to 13 (40000–DFFFF)尚未使用
* 平面14 (E0000–EFFFF): 特別用途補充平面(Supplementary Special-purpose Plane, SSP)
* 平面15 (F0000–FFFFF)保留作爲私人使用區(Private Use Area, PUA)
* 平面16 (100000–10FFFF),保留作爲私人使用區(Private Use Area, PUA)
*
* 參考:
* 維基百科: http://en.wikipedia.org/wiki/Emoji
* GITHUB: http://punchdrunker.github.io/iOSEmoji/
* 雜項象形符號:1F300-1F5FF
* 表情符號:1F600-1F64F
* 交通和地圖符號:1F680-1F6FF
* 雜項符號:2600-26FF
* 符號字體:2700-27BF
* 國旗:1F100-1F1FF
* 箭頭:2B00-2BFF 2900-297F
* 各種技術符號:2300-23FF
* 字母符號: 2100–214F
* 中文符號: 303D 3200–32FF 2049 203C
* Private Use Area:E000-F8FF;
* High Surrogates D800..DB7F;
* High Private Use Surrogates DB80..DBFF
* Low Surrogates DC00..DFFF D800-DFFF E000-F8FF
* 標點符號:2000-200F 2028-202F 205F 2065-206F
* 變異選擇器:IOS獨有 FE00-FE0F
* </pre>
*/
public class EmojiCharacterUtil {
// 轉義時標識
private static final char unicode_separator = '&';
private static final char unicode_prefix = 'u';
private static final char separator = ':';
private static boolean isEmojiCharacter(int codePoint) {
return (codePoint >= 0x2600 && codePoint <= 0x27BF) // 雜項符號與符號字體
|| codePoint == 0x303D
|| codePoint == 0x2049
|| codePoint == 0x203C
|| (codePoint >= 0x2000 && codePoint <= 0x200F)//
|| (codePoint >= 0x2028 && codePoint <= 0x202F)//
|| codePoint == 0x205F //
|| (codePoint >= 0x2065 && codePoint <= 0x206F)//
/* 標點符號佔用區域 */
|| (codePoint >= 0x2100 && codePoint <= 0x214F)// 字母符號
|| (codePoint >= 0x2300 && codePoint <= 0x23FF)// 各種技術符號
|| (codePoint >= 0x2B00 && codePoint <= 0x2BFF)// 箭頭A
|| (codePoint >= 0x2900 && codePoint <= 0x297F)// 箭頭B
|| (codePoint >= 0x3200 && codePoint <= 0x32FF)// 中文符號
|| (codePoint >= 0xD800 && codePoint <= 0xDFFF)// 高低位替代符保留區域
|| (codePoint >= 0xE000 && codePoint <= 0xF8FF)// 私有保留區域
|| (codePoint >= 0xFE00 && codePoint <= 0xFE0F)// 變異選擇器
|| codePoint >= 0x10000; // Plane在第二平面以上的,char都不可以存,全部都轉
}
/**
* 將帶有emoji字符的字符串轉換成可見字符標識
*/
public static String escape(String src) {
if (StringUtils.isBlank(src)) {
return src;
}
int cpCount = src.codePointCount(0, src.length());
int firCodeIndex = src.offsetByCodePoints(0, 0);
int lstCodeIndex = src.offsetByCodePoints(0, cpCount - 1);
StringBuilder sb = new StringBuilder(src.length());
for (int index = firCodeIndex; index <= lstCodeIndex; index ++) {
int codepoint = src.codePointAt(index);
if (isEmojiCharacter(codepoint)) {
String hash = Integer.toHexString(codepoint);
sb.append(unicode_separator).append(hash.length())
.append(unicode_prefix).append(separator).append(hash);
// hash 長度,4位1個字節
index += (hash.length() - 1)/4;
} else {
sb.append((char) codepoint);
}
}
return sb.toString();
}
/** 解析可見字符標識字符串 */
public static String reverse(String src) {
// 查找對應編碼的標識位
if (StringUtils.isBlank(src)) {
return src;
}
StringBuilder sb = new StringBuilder(src.length());
char[] sourceChar = src.toCharArray();
int index = 0;
while (index < sourceChar.length) {
if (sourceChar[index] == unicode_separator) {
if (index + 6 >= sourceChar.length) {
sb.append(sourceChar[index]);
index++;
continue;
}
// 自已的格式,與通用unicode格式不能互轉
if (sourceChar[index + 1] >= '4' && sourceChar[index + 1] <= '6'
&& sourceChar[index + 2] == unicode_prefix
&& sourceChar[index + 3] == separator) {
int length = Integer.parseInt(String.valueOf(sourceChar[index + 1]));
char[] hexchars = new char[length]; // 創建一個4至六位的數組,來存儲uncode碼的HEX值
for (int j = 0; j < length; j++) {
char ch = sourceChar[index + 4 + j];// 4位識別碼
if ((ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'f')) {
hexchars[j] = ch;
} else { // 字符範圍不對
sb.append(sourceChar[index]);
index++;
break;
}
}
sb.append(Character.toChars(Integer.parseInt(new String(hexchars), 16)));
index += (4 + length);// 4位前綴+4-6位字符碼
} else if (sourceChar[index + 1] == unicode_prefix) { // 通用字符的反轉
// 因爲第二平面之上的,已經採用了我們自己轉碼格式,所以這裏是固定的長度4
char[] hexchars = new char[4];
for (int j = 0; j < 4; j++) {
char ch = sourceChar[index + 2 + j]; // 兩位識別碼要去掉
if ((ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'f')) {
hexchars[j] = ch; // 4位識別碼
} else { // 字符範圍不對
sb.append(sourceChar[index]);
index++;
break;
}
sb.append(Character.toChars(Integer.parseInt(String.valueOf(hexchars), 16)));
index += (2 + 4);// 2位前綴+4位字符碼
}
} else {
sb.append(sourceChar[index]);
index++;
continue;
}
} else {
sb.append(sourceChar[index]);
index++;
continue;
}
}
return sb.toString();
}
public static String filter(String src) {
if (src == null) {
return null;
}
int cpCount = src.codePointCount(0, src.length());
int firCodeIndex = src.offsetByCodePoints(0, 0);
int lstCodeIndex = src.offsetByCodePoints(0, cpCount - 1);
StringBuilder sb = new StringBuilder(src.length());
for (int index = firCodeIndex; index <= lstCodeIndex;) {
int codepoint = src.codePointAt(index);
if (!isEmojiCharacter(codepoint)) {
System.err.println("codepoint:" + Integer.toHexString(codepoint));
sb.append((char) codepoint);
}
index += ((Character.isSupplementaryCodePoint(codepoint)) ? 2 : 1);
}
return sb.toString();
}
}