java 敏感詞彙過濾

 

  • 聲明變量
    // 存儲 *
    private StringBuilder replaceSymbol;
    // 聲明替換字符 *
    private String symbol = "*";
    // 文件名稱
    private String fileName;
    // 存儲所有敏感詞彙
    private List<String> arrayList ;
  •  初始化敏感詞彙

    /**
     * @Description: 把文本中的敏感詞彙讀取到 list集合中
     * @Date: 2019/5/7 17:11
     **/
    public void initSensitiveWord(String str){

        replaceSymbol = new StringBuilder();
        for (int i =0 ; i< str.length(); i++){
            replaceSymbol.append(symbol);
        }

        arrayList = new ArrayList<>();
        InputStreamReader inputStreamReader = null;
        BufferedReader bufferedReader = null;
        try {
            inputStreamReader =
                    new InputStreamReader(SensitiveWordUtil.class.getClassLoader().getResourceAsStream(fileName),"UTF-8");
            bufferedReader = new BufferedReader(inputStreamReader);

            for (String txt = null;(txt = bufferedReader.readLine()) != null;){
                if (!arrayList.contains(txt))
                    arrayList.add(txt);
            }
        } catch (IOException e) {
            e.printStackTrace();
        } finally {
            try {
                    if (null != bufferedReader)
                        bufferedReader.close();
                } catch (IOException e) {
                    e.printStackTrace();
                }

                if (null != inputStreamReader)
                    try {
                        inputStreamReader.close();
                    } catch (IOException e) {
                        e.printStackTrace();
                    }
        }
    }
  •  對輸入的字符串進行敏感詞彙處理
     **
     * @Description: 對輸入的敏感詞彙進行處理
     * @Date: 2019/5/7 17:13
     **/
    public String filterSensitiveWord(String str){

//        HashMap<Integer,Integer> map = new HashMap<>();
        Map<Integer,Integer> map = new HashMap<>();
        StringBuilder builder = new StringBuilder(str);

        String sensitive ;
        // 遍歷所有的敏感詞彙
        for (int i = 0; i < arrayList.size(); i++){
            sensitive = arrayList.get(i);

            int startIndex = 0;
            // 查找字符串中是否包含 指定得敏感詞彙,若包含返回該詞彙首個詞的索引值,否則返回 -1;
            for (int start = -1; ( start = builder.indexOf(sensitive,startIndex) ) > -1 ;){
                startIndex = start + sensitive.length();
               Integer mapStart =  map.get(start);
               if (null == mapStart || (mapStart != null && startIndex > mapStart))
                   map.put(start,startIndex);
            }
        }

        // 獲取存入的敏感詞索引值集合
       Collection<Integer> keys = map.keySet();
        for (Integer startIndex : keys){
            // 結束索引
            Integer endIndex = map.get(startIndex);
            // 把字符串中的關鍵字替換成*
           builder.replace(startIndex,endIndex,replaceSymbol.substring(startIndex,endIndex));
        }
        map.clear();
    return builder.toString();
    }
  • 調用方法進行測試
  public static void main(String[] args) {
        String string = "你好,毛一鮮,hello,老丁,這是一個錯誤的六四事件";
        // 敏感詞彙文件放在 resource/file 下
        SensitiveWordUtil swu = new SensitiveWordUtil("file//CensorWords.txt");
        swu.initSensitiveWord(string);
        String str = swu.filterSensitiveWord(string);
        System.out.println(str);
    }
  • 測試結果

  • 敏感詞彙文件位置

文件下載:https://pan.baidu.com/s/12NpqFyvJiNz98mNAePEpjg 提取碼:k3nd

 

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章