\xE4\xBD\xA0\xE5\xA5\xBD 解碼
感謝 http://www.bejson.com/convert/ox2str/
p e4 bd a0
P -100 -61 -32
P 11100100 10111101 10100000
J -28 -67 -96
J 10011100 11000011 11100000
p e5 a5 bd
P -101 -37 -61
P 11100101 10100101 10111101
J -27 -91 -67
J 10011011 11011011 11000011
Java:你好[-28, -67, -96, -27, -91, -67]
\xE4\xBD\xA0\xE5\xA5\xBD –> e4bda0e5a5bd –> 你好
與python的關係
python2
普通字符串常量,默認utf-8
>>> a = 'aa'
>>> type(a[0])
<type 'str'>
bytes
>>> bytes #(str == bytes)爲 Ture
<type 'str'>
bytearray
<type 'bytearray'>
r'xxx';
非轉義的原始字符串
u'xxx';
不是僅僅是針對中文, 可以針對任何的字符串,代表是對字符串進行unicode編碼。
python3
普通字符串常量 <class 'str'>,默認unicode
>>> a = 'aa'
>>> type(a[0])
<class 'str'>
bytes :<class 'bytes'>
bytearray:<class 'bytearray'>
r'xxx';
u'xxx';
b'xxx';
python3.x裏默認的str是unicode, bytes是(py2.x)的str, b”“前綴代表的就是bytes
python2.x裏, b前綴沒什麼具體意義, 只是爲了兼容python3.x的這種寫法
HBase中取出來了字符串中文不能直接顯示
所有這裏實現瞭解析\x中文 英文字符集 混合編碼,,,
#-*-coding:utf-8-*-
# python2 實現
def dvstr(r):
barr = bytearray();
rlen = len(r);
i = 0;
while i < rlen:
if r[i] == '\\' and r[i+1] == 'x':
barr.append(r[i+2])
barr.append(r[i+3])
i+=4;
else:
hexc = hex(ord(r[i]))
barr.append(hexc[2])
barr.append(hexc[3])
i+=1;
hexstr = str(barr);
return hexstr.decode('hex')
def evstr(s):
barr = bytearray();
for c in s:
size = ord(c);
if size >0 and size < 127:
barr.append(c)
else:
hexc = hex(size)
barr.append("\\")
barr.append("x")
barr.append(hexc[2])
barr.append(hexc[3])
return str(barr)
rstr = r'\xe4\xbd\xa0\xe5\xa5\xbd\xe5\x95\x8a121A\xe5\x8f\xb7'
sstr = dvstr(rstr)
print(sstr)
sstr = "你好啊121A號";
rstr = evstr(sstr);
print(rstr)
# python3 實現
def dvstr(r):
barr = bytearray();
rlen = len(r);
i = 0;
while i < rlen:
if r[i] == '\\' and r[i+1] == 'x':
barr.append(ord(r[i+2]))
barr.append(ord(r[i+3]))
i+=4;
else:
hexc = hex(ord(r[i]))
barr.append(ord(hexc[2]))
barr.append(ord(hexc[3]))
i+=1;
hexstr = str(barr,'utf-8');
return bytes.fromhex(hexstr).decode('utf-8')
def evstr(s):
s = s.encode("UTF-8")
barr = bytearray();
for c in s:
size = c;
if size >0 and size < 127:
barr.append(c)
else:
hexc = hex(size)
barr.append(92)# \ 92 x 120
barr.append(120)
barr.append(ord(hexc[2]))
barr.append(ord(hexc[3]))
return str(barr,'utf-8')
sstr = "你A";
rstr = evstr(sstr);
print(rstr)
rstr = r'\xe4\xbd\xa0A'
sstr = dvstr(rstr)
print(sstr)
java端解碼
/**
* @ 解碼字符串
*/
public static String drstr(String rstr) {
byte[] bs = rstr.getBytes();
ByteBuffer buf = ByteBuffer.allocate(bs.length);
for (int i = 0; i < bs.length; i++) {
if (bs[i] == 92 && bs[i + 1] == 120) {
Integer in = Integer.valueOf((char) bs[i + 2] + "" + (char) bs[i + 3], 16);
in = in & 0xFF;
buf.put(in.byteValue());
i += 3;
} else {
buf.put(bs[i]);
}
}
return new String(buf.array()).trim();
}
/**
* @ 字符串重編碼
*/
public static String erstr(String str) {
final byte[] prefix = "\\x".getBytes();
ByteBuffer buf = ByteBuffer.allocate(str.length()*24);//
for (int i = 0; i < str.length(); i++) {
char ch = str.charAt(i);
if (ch >= 0 && ch <= 127) {
Integer num = (int) ch;
buf.put(num.byteValue());
} else {
byte[] cbs = String.valueOf(ch).getBytes();
for (byte b : cbs) {
int v = b & 0xFF;
String hv = Integer.toHexString(v).toUpperCase();
buf.put(prefix);
buf.put(hv.getBytes());
}
}
}
return new String(buf.array()).trim();
}