有些包是其它業務的,在此沒有刪除,大家用的時候把不必要的刪了就行
package com.aifa.project.indust.client.support;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import org.apache.commons.fileupload.disk.DiskFileItem;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.extractor.WordExtractor;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Component;
import org.springframework.web.multipart.MultipartFile;
import org.springframework.web.multipart.commons.CommonsMultipartFile;
import com.aifa.core.exception.SystemServiceException;
import com.aifa.project.indust.model.EntSensitivity;
import com.lowagie.text.Document;
import com.lowagie.text.DocumentException;
import com.lowagie.text.Font;
import com.lowagie.text.PageSize;
import com.lowagie.text.Paragraph;
import com.lowagie.text.pdf.BaseFont;
import com.lowagie.text.pdf.PdfWriter;
@Component
public class FileSupport {
/***
* 讀取文本內容(doc格式的文檔)* @return
*/
public static String getContents(MultipartFile partFile){
FileInputStream in = null;
// try {
// in = new FileInputStream(new File(partFile));
// } catch (FileNotFoundException e) {
// e.printStackTrace();
// }
try {
in=(FileInputStream) partFile.getInputStream();
} catch (IOException e) {
e.printStackTrace();
}
HWPFDocument hdt = null;
try {
hdt = new HWPFDocument(in);
} catch (IOException e1) {
e1.printStackTrace();
}
// 讀取word文本內容
//Range range = hdt.getRange();
//String text = range.text();
String content = hdt.getDocumentText();
return content;
}
/***
* 將數據庫存儲的敏感詞跟上傳文件內容進行比對
* @author guangwen zhou
* @param partFile 文件
* @param sensitivity 數據庫中的敏感詞集合
* @return
*/
public List<String> judgeSensitive(MultipartFile partFile,List<EntSensitivity> sensitivity){
String text = getContents(partFile);//獲得文本內容
//遍歷敏感詞比對文本內容
List<String> mySenList = new ArrayList<String>();
List<String> hasSensit = new ArrayList<String>();
for(int i=0;i<sensitivity.size();i++){
mySenList.add(sensitivity.get(i).getSensitWord());
String senword=isFilter(mySenList, text);//比對文本中是否包含某一個敏感詞
mySenList.clear();
if(senword!=null&&!"".equals(senword)){
if(i==sensitivity.size()-1){
hasSensit.add(senword);
}else{
hasSensit.add(senword+"、");//將文中擁有的敏感詞保存下來
}
}
}
return hasSensit;
}
/***
* 敏感詞比對方法
* @param list
* @param inputWords
* @return
*/
public static String isFilter(List<String> list,String inputWords){
changePattern(list);
Iterator it = list.iterator();
while(it.hasNext()){
String patStr = (String)it.next();
Pattern pattern = Pattern.compile(patStr);
Matcher matcher = pattern.matcher(inputWords);
if(matcher.find()){
//去掉正則表達式
return patStr.replaceAll("\\\\s\\*", "");
}
}
return null;
}
/**
* 修改list
* @param list
*/
public static void changePattern(List<String> list){
if(null != list && list.size()>0){
int index = 0;
Iterator it = list.iterator();
while(it.hasNext()){
String str = (String) it.next();
int length = str.length();
//將字符加上正則表達式
String temp = "";
for (int i = 0; i < length; i++) {
if(i==0){
temp += str.charAt(i);
continue;
}
temp = temp + "\\s*"+str.charAt(i);
}
list.set(index, temp);
index ++;
}
}
}
}
本文是在http://your233.iteye.com/blog/1175714基礎上根據業務改進,可通過查詢數據庫中敏感詞進行文本內容過濾