中英文字符混合處理

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Fri Apr 12 11:26:30 2019

@author: macbook
"""

def is_zh(c) :
    x = ord(c) 
    # Punct & Radicals 
    if x >= 0x2e80 and x <= 0x33ff:
        return True 
    # Fullwidth Latin Characters 
    elif x >= 0xff00 and x <= 0xffef:
        return True 
    # CJK Unified Ideographs & 
    # CJK Unified Ideographs Extension A 
    elif x >= 0x4e00 and x <= 0x9fbb:
        return True 
    # CJK Compatibility Ideographs 
    elif x >= 0xf900 and x <= 0xfad9:
        return True 
    # CJK Unified Ideographs Extension B 
    elif x >= 0x20000 and x <= 0x2a6d6:
        return True 
    # CJK Compatibility Supplement 
    elif x >= 0x2f800 and x <= 0x2fa1d:
        return True 
    else:
        return False


def split_zh_en(zh_en_str) :

    zh_en_group = [ ] 
    zh_gather = "" 
    en_gather = "" 
    zh_status = False 
    mark = []
    for c in zh_en_str:
        if not zh_status and is_zh(c) :
            zh_status = True 
            if en_gather != "" :
                zh_en_group.append(en_gather) 
                en_gather = "" 
        elif not is_zh(c) and zh_status:
            zh_status = False 
            if zh_gather != "" :
                zh_en_group.append(zh_gather) 
        if zh_status:
            zh_gather += c
        else :
            en_gather += c                               
            zh_gather = "" 

    if en_gather != "" :
        zh_en_group.append(en_gather) 
    elif zh_gather != "" :
        zh_en_group.append(zh_gather) 

    return zh_en_group



word = '好人examples.append(example)好事'

aa =split_zh_en(word)

list(aa)








 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章