java漢語轉拼音pinyin4j、jpinyin、tinypinyin

maven依賴

        <dependency>
            <groupId>com.belerweb</groupId>
            <artifactId>pinyin4j</artifactId>
            <version>2.5.0</version>
        </dependency>
        <dependency>
            <groupId>com.github.stuxuhai</groupId>
            <artifactId>jpinyin</artifactId>
            <version>1.1.8</version>
        </dependency>
        <dependency>
            <groupId>com.github.promeg</groupId>
            <artifactId>tinypinyin</artifactId>
            <version>2.0.3</version>
        </dependency>
        <dependency>
            <groupId>com.github.promeg</groupId>
            <artifactId>tinypinyin-lexicons-java-cncity</artifactId>
            <version>2.0.3</version>
        </dependency>

Pinyin4jUtil

package com.utils;

import net.sourceforge.pinyin4j.PinyinHelper;
import net.sourceforge.pinyin4j.format.HanyuPinyinCaseType;
import net.sourceforge.pinyin4j.format.HanyuPinyinOutputFormat;
import net.sourceforge.pinyin4j.format.HanyuPinyinToneType;
import net.sourceforge.pinyin4j.format.HanyuPinyinVCharType;
import net.sourceforge.pinyin4j.format.exception.BadHanyuPinyinOutputFormatCombination;

import java.util.HashSet;
import java.util.Set;
public class Pinyin4jUtil {

    //將漢字轉換爲拼音全拼 首字母大寫,多音字的返回多個拼音以逗號分隔
    public static String getPinyin(String chinese) {
        return getPinyinZh_CN(makeStringByStringSet(chinese));
    }

    //將漢字轉換爲拼音全拼 大寫,多音字的返回多個拼音以逗號分隔
    public static String getPinyinToUpperCase(String chinese) {
        return getPinyinZh_CN(makeStringByStringSet(chinese)).toUpperCase();
    }

    //將漢字轉換爲拼音全拼 小寫,多音字的返回多個拼音以逗號分隔
    public static String getPinyinToLowerCase(String chinese) {
        return getPinyinZh_CN(makeStringByStringSet(chinese)).toLowerCase();
    }

    //將漢字轉換爲拼音簡拼 大寫,多音字的返回多個拼音以逗號分隔
    public static String getPinyinJianPin(String chinese) {
        String[] strArray = getPinyin(chinese).split(",");
        String strChar = "";
        for (String str : strArray) {
            char arr[] = str.toCharArray(); // 將字符串轉化成char型數組
            for (int i = 0; i < arr.length; i++) {
                if (arr[i] >= 65 && arr[i] < 91) { // 判斷是否是大寫字母
                    strChar += new String(arr[i] + "");
                }
            }
            strChar += ",";
        }
        return strChar.substring(0,strChar.length()-1);
    }

    //字符集轉換
    public static Set<String> makeStringByStringSet(String chinese) {
        char[] chars = chinese.toCharArray();
        if (chinese != null && !chinese.trim().equalsIgnoreCase("")) {
            char[] srcChar = chinese.toCharArray();
            String[][] temp = new String[chinese.length()][];
            for (int i = 0; i < srcChar.length; i++) {
                char c = srcChar[i];

                // 是中文或者a-z或者A-Z轉換拼音
                if (String.valueOf(c).matches("[\\u4E00-\\u9FA5]+")) {

                    try {
                        temp[i] = PinyinHelper.toHanyuPinyinStringArray(
                                chars[i], getDefaultOutputFormat());

                    } catch (BadHanyuPinyinOutputFormatCombination e) {
                        e.printStackTrace();
                    }
                } else if (((int) c >= 65 && (int) c <= 90)
                        || ((int) c >= 97 && (int) c <= 122)) {
                    temp[i] = new String[] { String.valueOf(srcChar[i]) };
                } else {
                    temp[i] = new String[] { "" };
                }
            }
            String[] pingyinArray = Exchange(temp);
            Set<String> zhongWenPinYin = new HashSet<String>();
            for (int i = 0; i < pingyinArray.length; i++) {
                zhongWenPinYin.add(pingyinArray[i]);
            }
            return zhongWenPinYin;
        }
        return null;
    }

    //默認輸出格式
    public static HanyuPinyinOutputFormat getDefaultOutputFormat() {
        HanyuPinyinOutputFormat format = new HanyuPinyinOutputFormat();
        format.setCaseType(HanyuPinyinCaseType.LOWERCASE);// 小寫
        format.setToneType(HanyuPinyinToneType.WITHOUT_TONE);// 沒有音調數字
        format.setVCharType(HanyuPinyinVCharType.WITH_U_AND_COLON);// u顯示
        return format;
    }

    public static String[] Exchange(String[][] strJaggedArray) {
        String[][] temp = DoExchange(strJaggedArray);
        return temp[0];
    }

    private static String[][] DoExchange(String[][] strJaggedArray) {
        int len = strJaggedArray.length;
        if (len >= 2) {
            int len1 = strJaggedArray[0].length;
            int len2 = strJaggedArray[1].length;
            int newlen = len1 * len2;
            String[] temp = new String[newlen];
            int Index = 0;
            for (int i = 0; i < len1; i++) {
                for (int j = 0; j < len2; j++) {
                    temp[Index] = capitalize(strJaggedArray[0][i])
                            + capitalize(strJaggedArray[1][j]);
                    Index++;
                }
            }
            String[][] newArray = new String[len - 1][];
            for (int i = 2; i < len; i++) {
                newArray[i - 1] = strJaggedArray[i];
            }
            newArray[0] = temp;
            return DoExchange(newArray);
        } else {
            return strJaggedArray;
        }
    }

    //首字母大寫
    public static String capitalize(String s) {
        char ch[];
        ch = s.toCharArray();
        if (ch[0] >= 'a' && ch[0] <= 'z') {
            ch[0] = (char) (ch[0] - 32);
        }
        String newString = new String(ch);
        return newString;
    }

    //字符串集合轉換字符串(逗號分隔)
    public static String getPinyinZh_CN(Set<String> stringSet) {
        StringBuilder str = new StringBuilder();
        int i = 0;
        for (String s : stringSet) {
            if (i == stringSet.size() - 1) {
                str.append(s);
            } else {
                str.append(s + ",");
            }
            i++;
        }
        return str.toString();
    }

    // 將漢字轉換爲全拼 小寫 不識別多音字
    public static String getPingYin(String src) {
        char[] t1 = null;
        t1 = src.toCharArray();
        String[] t2 = new String[t1.length];
        HanyuPinyinOutputFormat t3 = new HanyuPinyinOutputFormat();

        t3.setCaseType(HanyuPinyinCaseType.LOWERCASE);
        t3.setToneType(HanyuPinyinToneType.WITHOUT_TONE);
        t3.setVCharType(HanyuPinyinVCharType.WITH_V);
        String t4 = "";
        int t0 = t1.length;
        try {
            for (int i = 0; i < t0; i++) {
                // 判斷是否爲漢字字符
                if (java.lang.Character.toString(t1[i]).matches(
                        "[\\u4E00-\\u9FA5]+")) {
                    t2 = PinyinHelper.toHanyuPinyinStringArray(t1[i], t3);
                    t4 += t2[0];
                } else
                    t4 += java.lang.Character.toString(t1[i]);
            }
            // System.out.println(t4);
            return t4;
        } catch (BadHanyuPinyinOutputFormatCombination e1) {
            e1.printStackTrace();
        }
        return t4;
    }

    // 漢字轉拼音 小寫首字母 不識別多音字
    public static String getPinYinHeadChar(String str) {
        String convert = "";
        for (int j = 0; j < str.length(); j++) {
            char word = str.charAt(j);
            String[] pinyinArray = PinyinHelper.toHanyuPinyinStringArray(word);
            if (pinyinArray != null) {
                convert += pinyinArray[0].charAt(0);
            } else {
                convert += word;
            }
        }
        return convert;
    }

    //測試方法
    public static void main(String[] args) {
        System.out.println(getPinyinJianPin("中牟縣"));//ZMX,ZMX
        System.out.println(getPinyin("中牟縣"));//ZhongMuXian,ZhongMouXian
        System.out.println(getPingYin("中牟縣"));//zhongmouxian
    }

}

JpPinyinUtil

package com.utils;

import com.github.stuxuhai.jpinyin.ChineseHelper;
import com.github.stuxuhai.jpinyin.PinyinFormat;
import com.github.stuxuhai.jpinyin.PinyinHelper;

public class JpPinyinUtil{

    /**
     * 轉換爲有聲調的拼音字符串
     * @param pinYinStr 漢字
     * @return 有聲調的拼音字符串
     */
    public static String changeToMarkPinYin(String pinYinStr){

        String tempStr = null;

        try
        {
            tempStr =  PinyinHelper.convertToPinyinString(pinYinStr,  " ", PinyinFormat.WITH_TONE_MARK);

        } catch (Exception e)
        {
            e.printStackTrace();
        }
        return tempStr;

    }


    /**
     * 轉換爲數字聲調字符串
     * @param pinYinStr 需轉換的漢字
     * @return 轉換完成的拼音字符串
     */
    public static String changeToNumberPinYin(String pinYinStr){

        String tempStr = null;

        try
        {
            tempStr = PinyinHelper.convertToPinyinString(pinYinStr, " ", PinyinFormat.WITH_TONE_NUMBER);
        } catch (Exception e)
        {
            e.printStackTrace();
        }

        return tempStr;

    }

    /**
     * 轉換爲不帶音調的拼音字符串
     * @param pinYinStr 需轉換的漢字
     * @return 拼音字符串
     */
    public static String changeToTonePinYin(String pinYinStr){

        String tempStr = null;

        try
        {
            tempStr =  PinyinHelper.convertToPinyinString(pinYinStr, "", PinyinFormat.WITHOUT_TONE);
        } catch (Exception e)
        {
            e.printStackTrace();
        }
        return tempStr;

    }

    /**
     * 轉換爲每個漢字對應拼音首字母字符串
     * @param pinYinStr 需轉換的漢字
     * @return 拼音字符串
     */
    public static String changeToGetShortPinYin(String pinYinStr){

        String tempStr = null;

        try
        {
            tempStr = PinyinHelper.getShortPinyin(pinYinStr);
        } catch (Exception e)
        {
            e.printStackTrace();
        }
        return tempStr;

    }

    /**
     * 檢查漢字是否爲多音字
     * @param pinYinStr 需檢查的漢字
     * @return true 多音字,false 不是多音字
     */
    public static boolean checkPinYin(char pinYinStr){

        boolean check  = false;
        try
        {
            check = PinyinHelper.hasMultiPinyin(pinYinStr);
        } catch (Exception e) {
            e.printStackTrace();
        }
        return check;
    }

    /**
     * 簡體轉換爲繁體
     * @param pinYinStr
     * @return
     */
    public static String changeToTraditional(String pinYinStr){

        String tempStr = null;
        try
        {
            tempStr = ChineseHelper.convertToTraditionalChinese(pinYinStr);
        } catch (Exception e)
        {
            e.printStackTrace();
        }
        return tempStr;

    }

    /**
     * 繁體轉換爲簡體
     * @param pinYinSt
     * @return
     */
    public static String changeToSimplified(String pinYinSt){

        String tempStr = null;

        try
        {
            tempStr = ChineseHelper.convertToSimplifiedChinese(pinYinSt);
        } catch (Exception e)
        {
            e.printStackTrace();
        }

        return tempStr;

    }

  public static void main(String[] args) {
      System.out.println(checkPinYin('牟'));//true
      System.out.println(changeToTonePinYin("中牟縣"));//zhongmuxian
  }
}


TinyPinyinUtil

package com.utils;

import com.github.promeg.pinyinhelper.Pinyin;
import com.github.promeg.pinyinhelper.PinyinMapDict;
import com.github.promeg.tinypinyin.lexicons.java.cncity.CnCityDict;

import java.util.HashMap;
import java.util.Map;

/**
 * @author ywx
 * @className TinyPinyinUtil
 * @description 參考網址:https://github.com/promeG/TinyPinyin
 * @date 2020/5/14 10:12
 **/
public class TinyPinyinUtil {
    static {
        Pinyin.init(Pinyin.newConfig().with(CnCityDict.getInstance()));// 添加中文城市詞典
        // 添加自定義詞典
        Pinyin.init(Pinyin.newConfig()
                .with(new PinyinMapDict() {
                    @Override
                    public Map<String, String[]> mapping() {
                        HashMap<String, String[]> map = new HashMap<>();
                        map.put("重慶", new String[]{"CHONG", "QING"});
                        map.put("中牟縣", new String[]{"ZHONG", "MU", "XIAN"});
                        return map;
                    }
                }));
    }
    //漢語轉大寫拼音
    public static String toUpperPinyin(String str, String separator){
        return Pinyin.toPinyin(str,separator);
    }

    //漢語轉大寫拼音
    public static String toUpperPinyin(String str){
        return Pinyin.toPinyin(str,"");
    }

    //漢語轉小寫拼音
    public static String toLowerPinyin(String str, String separator){
        return Pinyin.toPinyin(str,separator).toLowerCase();
    }

    //漢語轉小寫拼音
    public static String toLowerPinyin(String str){
        return Pinyin.toPinyin(str,"").toLowerCase();
    }

    //測試方法
    public static void main(String[] args) {
        System.out.println(toUpperPinyin("中牟縣"));//ZHONGMUXIAN
        System.out.println(toLowerPinyin("中牟縣"));//zhongmuxian
    }
}

總結

Pinyin4jUtil含多音字的詞語返回多個拼音以逗號分隔
JpPinyinUtil可以自動識別含多音字的常用詞語
TinyPinyinUtil可以添加自定義詞典處理含多音字的詞語
參考網址:https://github.com/promeG/TinyPinyin

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章