- 聲明變量
// 存儲 *
private StringBuilder replaceSymbol;
// 聲明替換字符 *
private String symbol = "*";
// 文件名稱
private String fileName;
// 存儲所有敏感詞彙
private List<String> arrayList ;
- 初始化敏感詞彙
/**
* @Description: 把文本中的敏感詞彙讀取到 list集合中
* @Date: 2019/5/7 17:11
**/
public void initSensitiveWord(String str){
replaceSymbol = new StringBuilder();
for (int i =0 ; i< str.length(); i++){
replaceSymbol.append(symbol);
}
arrayList = new ArrayList<>();
InputStreamReader inputStreamReader = null;
BufferedReader bufferedReader = null;
try {
inputStreamReader =
new InputStreamReader(SensitiveWordUtil.class.getClassLoader().getResourceAsStream(fileName),"UTF-8");
bufferedReader = new BufferedReader(inputStreamReader);
for (String txt = null;(txt = bufferedReader.readLine()) != null;){
if (!arrayList.contains(txt))
arrayList.add(txt);
}
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
if (null != bufferedReader)
bufferedReader.close();
} catch (IOException e) {
e.printStackTrace();
}
if (null != inputStreamReader)
try {
inputStreamReader.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
- 對輸入的字符串進行敏感詞彙處理
**
* @Description: 對輸入的敏感詞彙進行處理
* @Date: 2019/5/7 17:13
**/
public String filterSensitiveWord(String str){
// HashMap<Integer,Integer> map = new HashMap<>();
Map<Integer,Integer> map = new HashMap<>();
StringBuilder builder = new StringBuilder(str);
String sensitive ;
// 遍歷所有的敏感詞彙
for (int i = 0; i < arrayList.size(); i++){
sensitive = arrayList.get(i);
int startIndex = 0;
// 查找字符串中是否包含 指定得敏感詞彙,若包含返回該詞彙首個詞的索引值,否則返回 -1;
for (int start = -1; ( start = builder.indexOf(sensitive,startIndex) ) > -1 ;){
startIndex = start + sensitive.length();
Integer mapStart = map.get(start);
if (null == mapStart || (mapStart != null && startIndex > mapStart))
map.put(start,startIndex);
}
}
// 獲取存入的敏感詞索引值集合
Collection<Integer> keys = map.keySet();
for (Integer startIndex : keys){
// 結束索引
Integer endIndex = map.get(startIndex);
// 把字符串中的關鍵字替換成*
builder.replace(startIndex,endIndex,replaceSymbol.substring(startIndex,endIndex));
}
map.clear();
return builder.toString();
}
- 調用方法進行測試
public static void main(String[] args) {
String string = "你好,毛一鮮,hello,老丁,這是一個錯誤的六四事件";
// 敏感詞彙文件放在 resource/file 下
SensitiveWordUtil swu = new SensitiveWordUtil("file//CensorWords.txt");
swu.initSensitiveWord(string);
String str = swu.filterSensitiveWord(string);
System.out.println(str);
}
- 測試結果
- 敏感詞彙文件位置
文件下載:https://pan.baidu.com/s/12NpqFyvJiNz98mNAePEpjg 提取碼:k3nd