- 声明变量
// 存储 *
private StringBuilder replaceSymbol;
// 声明替换字符 *
private String symbol = "*";
// 文件名称
private String fileName;
// 存储所有敏感词汇
private List<String> arrayList ;
- 初始化敏感词汇
/**
* @Description: 把文本中的敏感词汇读取到 list集合中
* @Date: 2019/5/7 17:11
**/
public void initSensitiveWord(String str){
replaceSymbol = new StringBuilder();
for (int i =0 ; i< str.length(); i++){
replaceSymbol.append(symbol);
}
arrayList = new ArrayList<>();
InputStreamReader inputStreamReader = null;
BufferedReader bufferedReader = null;
try {
inputStreamReader =
new InputStreamReader(SensitiveWordUtil.class.getClassLoader().getResourceAsStream(fileName),"UTF-8");
bufferedReader = new BufferedReader(inputStreamReader);
for (String txt = null;(txt = bufferedReader.readLine()) != null;){
if (!arrayList.contains(txt))
arrayList.add(txt);
}
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
if (null != bufferedReader)
bufferedReader.close();
} catch (IOException e) {
e.printStackTrace();
}
if (null != inputStreamReader)
try {
inputStreamReader.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
- 对输入的字符串进行敏感词汇处理
**
* @Description: 对输入的敏感词汇进行处理
* @Date: 2019/5/7 17:13
**/
public String filterSensitiveWord(String str){
// HashMap<Integer,Integer> map = new HashMap<>();
Map<Integer,Integer> map = new HashMap<>();
StringBuilder builder = new StringBuilder(str);
String sensitive ;
// 遍历所有的敏感词汇
for (int i = 0; i < arrayList.size(); i++){
sensitive = arrayList.get(i);
int startIndex = 0;
// 查找字符串中是否包含 指定得敏感词汇,若包含返回该词汇首个词的索引值,否则返回 -1;
for (int start = -1; ( start = builder.indexOf(sensitive,startIndex) ) > -1 ;){
startIndex = start + sensitive.length();
Integer mapStart = map.get(start);
if (null == mapStart || (mapStart != null && startIndex > mapStart))
map.put(start,startIndex);
}
}
// 获取存入的敏感词索引值集合
Collection<Integer> keys = map.keySet();
for (Integer startIndex : keys){
// 结束索引
Integer endIndex = map.get(startIndex);
// 把字符串中的关键字替换成*
builder.replace(startIndex,endIndex,replaceSymbol.substring(startIndex,endIndex));
}
map.clear();
return builder.toString();
}
- 调用方法进行测试
public static void main(String[] args) {
String string = "你好,毛一鲜,hello,老丁,这是一个错误的六四事件";
// 敏感词汇文件放在 resource/file 下
SensitiveWordUtil swu = new SensitiveWordUtil("file//CensorWords.txt");
swu.initSensitiveWord(string);
String str = swu.filterSensitiveWord(string);
System.out.println(str);
}
- 测试结果
- 敏感词汇文件位置
文件下载:https://pan.baidu.com/s/12NpqFyvJiNz98mNAePEpjg 提取码:k3nd