敏感詞過濾

有些包是其它業務的，在此沒有刪除，大家用的時候把不必要的刪了就行

package com.aifa.project.indust.client.support;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;

import org.apache.commons.fileupload.disk.DiskFileItem;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.extractor.WordExtractor;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Component;
import org.springframework.web.multipart.MultipartFile;
import org.springframework.web.multipart.commons.CommonsMultipartFile;

import com.aifa.core.exception.SystemServiceException;
import com.aifa.project.indust.model.EntSensitivity;
import com.lowagie.text.Document;
import com.lowagie.text.DocumentException;
import com.lowagie.text.Font;
import com.lowagie.text.PageSize;
import com.lowagie.text.Paragraph;
import com.lowagie.text.pdf.BaseFont;
import com.lowagie.text.pdf.PdfWriter;
@Component
public class FileSupport {

/***

     * 讀取文本內容（doc格式的文檔）
     * @return
     */
    public static String getContents(MultipartFile partFile){

       FileInputStream in = null;
//       try {
//           in = new FileInputStream(new File(partFile));
//       } catch (FileNotFoundException e) {
//           e.printStackTrace();
//       }
       try {
           in=(FileInputStream) partFile.getInputStream();
       } catch (IOException e) {
           e.printStackTrace();
       }
       HWPFDocument hdt = null;

       try {
            hdt = new HWPFDocument(in);
        } catch (IOException e1) {
            e1.printStackTrace();
        }
       // 讀取word文本內容
        //Range range = hdt.getRange();
        //String text = range.text();
        String content = hdt.getDocumentText();

       return content;

    }

    /***
     * 將數據庫存儲的敏感詞跟上傳文件內容進行比對
     * @author guangwen zhou
     * @param partFile   文件
     * @param sensitivity   數據庫中的敏感詞集合
     * @return
     */
    public List<String> judgeSensitive(MultipartFile partFile,List<EntSensitivity> sensitivity){

        String text = getContents(partFile);//獲得文本內容
       //遍歷敏感詞比對文本內容
       List<String> mySenList = new ArrayList<String>();
        List<String> hasSensit = new ArrayList<String>();
        for(int i=0;i<sensitivity.size();i++){
           mySenList.add(sensitivity.get(i).getSensitWord());
           String senword=isFilter(mySenList, text);//比對文本中是否包含某一個敏感詞
           mySenList.clear();
           if(senword!=null&&!"".equals(senword)){
               if(i==sensitivity.size()-1){
                   hasSensit.add(senword);
               }else{
                   hasSensit.add(senword+"、");//將文中擁有的敏感詞保存下來
               }
           }
        }
       return hasSensit;
    }


    /***
     * 敏感詞比對方法
     * @param list
     * @param inputWords
     * @return
     */
    public static String isFilter(List<String> list,String inputWords){
       changePattern(list);
       Iterator it = list.iterator();
       while(it.hasNext()){
           String patStr = (String)it.next();
           Pattern pattern = Pattern.compile(patStr);
           Matcher matcher = pattern.matcher(inputWords);
           if(matcher.find()){
               //去掉正則表達式
               return patStr.replaceAll("\\\\s\\*", "");
           }
       }
       return null;
    }
    /**
    * 修改list
    * @param list
    */
    public static void changePattern(List<String> list){
       if(null != list && list.size()>0){
           int index = 0;
           Iterator it = list.iterator();
       while(it.hasNext()){
           String str = (String) it.next();
           int length = str.length();
           //將字符加上正則表達式
           String temp = "";
           for (int i = 0; i < length; i++) {
               if(i==0){
                   temp += str.charAt(i);
                   continue;
               }
               temp = temp + "\\s*"+str.charAt(i);
           }
           list.set(index, temp);
           index ++;
       }

       }
    }

}

本文是在http://your233.iteye.com/blog/1175714基礎上根據業務改進，可通過查詢數據庫中敏感詞進行文本內容過濾

《Python進階》學習筆記

Leetcode 3161. 物塊放置查詢

一個docker容器暴露多個端口

leetcode 60 排列序列

微服務實踐之使用 Visual Studio 2022 調試Dapr 應用程序

wpf附加屬性理解 WPF附加屬性

反射獲取對象的屬性及屬性值

postman的post請求，要設置參數才能生效

SpringBoot打包成可執行jar包

日常知識點備錄

工作常用Linux命令集

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結