import codecs
f = codecs.open('unicode.rst', encoding='utf-8')
def rm_control_characters(string):
return "".join(ch if not unicodedata.category(ch)=='Cc' else ',' ch in unicode(string))
>>> print u'Hello, 你好, bye.'.encode('unicode-escape')
Hello, \u4f60\u597d, bye.