全文檢索lucene

一、下載lucene4.7的jar包:
lucene-analyzers-common-4.7.0.jar
lucene-analyzers-smartcn-4.7.0.jar
lucene-core-4.7.0.jar
lucene-facet-4.7.0.jar
lucene-highlighter-4.7.0.jar
lucene-queries-4.7.0.jar
lucene-queryparser-4.7.0.jar

把以上jar包導入項目中

二、創建索引
*************************************************1.創建索引配置****************************************
因爲創建索引是針對表的,所以定義配置文件,配置需要創建索引的SQL
index.xml:

<?xml version='1.0' encoding='UTF-8'?>
<indexs>
<index>
<name>riskRule</name>
<all>
<![CDATA[
select ID,NAME,BODY,DOCUMENT_TYPE from ARMS.T_RISK_RULES
where remove_flag = 0
ORDER BY ID
]]>
</all>
<add>
<![CDATA[
SELECT ID,NAME,BODY,DOCUMENT_TYPE from ARMS.T_RISK_RULES
WHERE remove_flag = 0
and ID > {?#ID#}
AND UPDATE_TIMESTAMP > {?#UPDATE_TIME#}
]]>
</add>
<update>
<![CDATA[
SELECT ID,NAME,BODY,DOCUMENT_TYPE from ARMS.T_RISK_RULES
WHERE remove_flag = 0
and ID < {?#ID#}
AND UPDATE_TIMESTAMP > {?#UPDATE_TIME#}
]]>
</update>
<delete>
<![CDATA[
SELECT ID,NAME,BODY,DOCUMENT_TYPE from ARMS.T_RISK_RULES
WHERE remove_flag = 1
AND ID < {?#ID#}
AND UPDATE_TIMESTAMP > {?#UPDATE_TIME#}
]]>
</delete>
<blob>BODY:DOCUMENT_TYPE</blob>
</index>
<index>
<name>riskProblem</name>
<all>
<![CDATA[
SELECT ID,TITLE,CONTENTS,PUNISH,CRITERION_CONTENT FROM ARMS.T_RISK
WHERE REMOVE_FLAG = 0
ORDER BY ID
]]>
</all>
<add>
<![CDATA[
SELECT ID,TITLE,CONTENTS,PUNISH,CRITERION_CONTENT FROM ARMS.T_RISK
WHERE REMOVE_FLAG = 0
AND ID > {?#ID#}
AND UPDATE_TIME > {?#UPDATE_TIME#}
]]>
</add>
<update>
<![CDATA[
SELECT ID,TITLE,CONTENTS,PUNISH,CRITERION_CONTENT FROM ARMS.T_RISK
WHERE REMOVE_FLAG = 0
AND ID < {?#ID#}
AND UPDATE_TIME > {?#UPDATE_TIME#}
]]>
</update>
<delete>
<![CDATA[
SELECT ID,TITLE,CONTENTS,PUNISH,CRITERION_CONTENT FROM ARMS.T_RISK
WHERE REMOVE_FLAG = 1
AND ID < {?#ID#}
AND UPDATE_TIME > {?#UPDATE_TIME#}
]]>
</delete>
<blob></blob>
</index>
</indexs>


讀取解析index.xml的工具類
package com.lhzq.ibms.lucene.util;

import com.htsc.abms.lucene.model.Index;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import java.io.File;
import java.util.ArrayList;
import java.util.List;

/**
* Created with IntelliJ IDEA.
* User: 陳聖林
* Date: 14-5-10
* Time: 上午9:28
* 索引配置解析
*/
public class IndexConfigMgr
{
/**
* 自身對象用來做單例
*/
private static IndexConfigMgr indexConfigMgr;

/**
* 用來做線程鎖
*/
private static Object obj = new Object();

/**
* 要索引的表的配置
*/
private List<Index> tableConfigs;

/**
* 日誌
*/
private static Logger logger= LoggerFactory.getLogger(IndexConfigMgr.class);

/**
* 索引配置文件
*/
private static final String INDEX_DIR = "index/index.xml";

/**
* index節點的名稱
*/
private static final String INDEX_NODE_NAME = "index";

/**
* name節點的名稱
*/
private static final String NAME_NODE_NAME = "name";

/**
* all節點的名稱
*/
private static final String ALL_NODE_NAME = "all";

/**
* add節點的名稱
*/
private static final String ADD_NODE_NAME = "add";

/**
* update節點的名稱
*/
private static final String UPDATE_NODE_NAME = "update";

/**
* delete節點的名稱
*/
private static final String DELETE_NODE_NAME = "delete";

/**
* blob節點的名稱
*/
private static final String BLOB_NODE_NAME = "blob";

/**
* 私有的構造方法
*/
private IndexConfigMgr()
{
// 創建配置容器
tableConfigs = new ArrayList<Index>();
}

/**
* 獲取實例對象
* @return
*/
public static IndexConfigMgr getInstance()
{
synchronized (obj)
{
if(null == indexConfigMgr)
{
indexConfigMgr = new IndexConfigMgr();
}
}

// 加載配置文件
indexConfigMgr.load();

return indexConfigMgr;
}

/**
* 加載配置文件
*/
private void load()
{
// 拿到索引配置文件的路徑
String path = WorkSpaceCenter.getClassPath(INDEX_DIR);

Document doc = null;
try
{
doc = getDocumentByPath(path);
loadIndexes(doc);
} catch (Exception e) {
logger.error("加載index.xml文件失敗",e);
}
}

/**
* 根據xml文件路徑拿到dom對象
* @param path 文件路徑
* @return
* @throws javax.xml.parsers.ParserConfigurationException
* @throws java.io.IOException
* @throws org.xml.sax.SAXException
*/
private Document getDocumentByPath(String path) throws Exception
{
// 獲取DOM解析器工廠對象
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();

// 獲取DOM解析器對象
DocumentBuilder db = dbf.newDocumentBuilder();

File file=new File(path);

// 加載要解析xml文檔
Document doc = db.parse(file);

return doc;
}


/**
* 加載索引配置
* @param doc
* @return
* @throws javax.xml.parsers.ParserConfigurationException
* @throws java.io.IOException
* @throws org.xml.sax.SAXException
*/
private void loadIndexes(Document doc)
{
NodeList indexNodes = doc.getElementsByTagName(INDEX_NODE_NAME);

Node node = null;
tableConfigs.clear();
for (int i = 0; i < indexNodes.getLength() ; i++)
{
node = indexNodes.item(i);
if(!node.hasChildNodes())
{
continue;
}

tableConfigs.add(newIndex(node));
}
}


/**
* 封裝一個index
* @param parent
* @return
*/
private Index newIndex(Node parent)
{
Node node= null;
Index index = null;

String name = null;
String all = null;
String add = null;
String update = null;
String delete = null;
String blob = null;

NodeList nodes = parent.getChildNodes();
for(int i = 0; i < nodes.getLength(); i++)
{
node = nodes.item(i);

if(!node.hasChildNodes())
{
continue;
}

if(node.getNodeName().equals(NAME_NODE_NAME))
{
name = node.getTextContent().trim();
}

if(node.getNodeName().equals(ALL_NODE_NAME))
{
all = node.getTextContent().trim();
}

if(node.getNodeName().equals(ADD_NODE_NAME))
{
add = node.getTextContent().trim();
}

if(node.getNodeName().equals(UPDATE_NODE_NAME))
{
update = node.getTextContent().trim();
}

if(node.getNodeName().equals(DELETE_NODE_NAME))
{
delete = node.getTextContent().trim();
}

if(node.getNodeName().equals(BLOB_NODE_NAME))
{
blob = node.getTextContent().trim();
}

index = new Index(name,all,add,update,delete,blob);
}

return index;
}


/**
* 返回結果數據
* @return
*/
public List<Index> getTableConfigs()
{
return tableConfigs;
}
}


*************************************************2.定時創建索引****************************************
如果創建索引的數據量較大,創建索引需要花很長的時間,建議創建定時任務創建索引

由於第一次是索引的全部創建,之後就可以更新索引(新增,更新,刪除)即可不用每次全部創建,
所以要記錄索引的最大ID和上一次更新時間

1>////////////////////////創建索引的定時任務CreateIndexJob.java:

package com.lhzq.ibms.lucene.job;

import com.htsc.abms.lucene.model.Index;
import com.htsc.abms.lucene.service.CreateIndexService;
import com.lhzq.ibms.commons.util.Configuration;
import com.lhzq.ibms.lucene.util.*;
import com.lhzq.leap.core.utils.DateUtils;
import com.lhzq.leap.core.utils.FileUtility;
import com.lhzq.leap.core.utils.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;

import java.io.File;
import java.io.IOException;
import java.math.BigDecimal;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

/**
* 創建索引的定時任務
*/
@Service("createIndexJob")
public class CreateIndexJob
{
/**
* 日誌
*/
private static Logger logger = LoggerFactory.getLogger(CreateIndexJob.class);

/**
* 示例用戶業務處理
*/
@Autowired
private CreateIndexService indexService;

/**
* 創建索引工具
*/
private BuildIndex buildIndex;

/**
* 記錄最大的Id和更新索引的時間
*/
private IndexLog indexLog;

/**
* 全部加載索引
*/
public String loadIndex()
{
StringBuffer message = new StringBuffer();
message.append("["+DateUtils.now()+"]:開始創建索引***********!\r\n");
logger.info("開始創建索引***************************");
long begin=System.currentTimeMillis();
List<Index> indexes = IndexConfigMgr.getInstance().getTableConfigs();
try
{
// 先刪除目錄
String indexPath = Configuration.getLuceneIndexDir();
FileUtility.deleteDir(indexPath);
message.append("刪除index目錄成功!\r\n");
logger.info("刪除index目錄成功**********");

// 創建日誌文件
CreateLog.init();
BigDecimal maxId = null;
for(Index index : indexes)
{
message.append("開始創建["+index.getName()+"]模塊的索引!\r\n");
logger.info("開始創建["+index.getName()+"]索引======");

// 設置索引參數
buildIndex = new BuildIndex(index.getName());
indexLog = new IndexLog(index.getName());

// 寫入索引
buildIndex.setDocType(BuildIndex.DOC_TYPE_CREATE);
maxId = pageAddDoc(buildIndex,index,new HashMap());

// 關閉
buildIndex.close();

// 寫如參數
after(indexLog,maxId);

message.append("創建["+index.getName()+"]模塊索引完成!\r\n");
logger.info("創建["+index.getName()+"]索引完成=======");
}

message.append("["+DateUtils.now()+"]:創建索引完成***********!\r\n");
long end=System.currentTimeMillis();
message.append("創建索引一共花費:"+(float)(end-begin)/1000+"秒");

logger.info("創建索引完成********************************");
} catch (Exception e) {
message.append("創建索引異常:"+e.getMessage());
logger.error("加載所有的索引失敗", e);
}

return message.toString();
}

/**
* 定時更新索引
*/
public String updateIndex()
{
StringBuffer message = new StringBuffer();
long begin=System.currentTimeMillis();
message.append("["+DateUtils.now()+"]:開始更新索引***********!\r\n");

List<Index> indexes = IndexConfigMgr.getInstance().getTableConfigs();
try
{
BigDecimal maxId = null;
BigDecimal addMaxId = null;
HashMap<String,Object> params = null;
for(Index index : indexes)
{
message.append("開始更新["+index.getName()+"]模塊的索引!\r\n");

// 讀取參數
buildIndex = new BuildIndex(index.getName());
indexLog = new IndexLog(index.getName());
params = before(indexLog);

// 拿出最大ID
maxId =(BigDecimal)params.get("ID");

// 添加索引
buildIndex.setDocType(BuildIndex.DOC_TYPE_ADD);
addMaxId = pageAddDoc(buildIndex,index,params);
// 更新最大ID
if(null != addMaxId){
maxId = addMaxId;
}

// 更新索引
buildIndex.setDocType(BuildIndex.DOC_TYPE_UPDATE);
pageAddDoc(buildIndex,index,params);

// 刪除索引
buildIndex.setDocType(BuildIndex.DOC_TYPE_DELETE);
pageAddDoc(buildIndex,index,params);

// 關閉
buildIndex.close();

// 寫如參數
after(indexLog,maxId);
message.append("更新["+index.getName()+"]模塊索引完!\r\n");
}

message.append("["+DateUtils.now()+"]:更新索引完成***********!\r\n");
long end=System.currentTimeMillis();
message.append("更新索引花費了時間:" + (float)(end-begin)/1000+"秒");
} catch (Exception e) {
message.append("更新索引異常:" + e.getMessage());
logger.error("更新索引失敗", e);
}

return message.toString();
}

/**
* 讀取索引文件內容
* @param indexLog
* @return
*/
private HashMap<String,Object> before(IndexLog indexLog)
{
HashMap<String,Object> params = new HashMap<String, Object>();
String content = indexLog.readText();

if(!StringUtils.isEmpty(content))
{
String id = content.split(",")[0];
String now = content.split(",")[1];

// 封裝參數
params.put("ID", new BigDecimal(id));
params.put("UPDATE_TIME",DateUtils.toDate(now));

logger.info("索引庫中最大的ID:"+ id+",上次更新時間:"+now);
}

return params;
}

/**
* 寫入新的最大ID和時間
* @param indexLog
* @param maxId
*/
private void after(IndexLog indexLog,BigDecimal maxId)
{
if(null == maxId){
return;
}

String now = DateUtils.toString(new Date());
indexLog.WriteText(maxId + "," +now );

logger.info("寫入最大的ID:"+ maxId+",記錄更新時間:"+now);
}

/**
* 分頁操作添加索引
* @param buildIndex
* @param index
* @param param
* @return
* @throws Exception
*/
private BigDecimal pageAddDoc(BuildIndex buildIndex,Index index,Map param) throws IOException {
DataPage dataPage = new DataPage(this.indexService,index.getBlob(),param);
BigDecimal maxId = null;
int count = 0;
switch (buildIndex.getDocType())
{
case BuildIndex.DOC_TYPE_CREATE:
{
dataPage.setBaseSql(index.getAll());
count = (int)Math.ceil((float)dataPage.getCount()/DataPage.PAGE_SIZE);
dataPage.setTotalPage(count);
for(int i =1;i<=count;i++){
buildIndex.addDoc(dataPage.queryPage(i));
}
maxId = dataPage.getMaxId();
break;
}
case BuildIndex.DOC_TYPE_ADD:
{
dataPage.setBaseSql(index.getAdd());
count = (int)Math.ceil((float)dataPage.getCount()/DataPage.PAGE_SIZE);
dataPage.setTotalPage(count);
for(int i =1;i<=count;i++){
buildIndex.addDoc(dataPage.queryPage(i));
}
maxId = dataPage.getMaxId();
break;
}
case BuildIndex.DOC_TYPE_UPDATE:
{
dataPage.setBaseSql(index.getUpdate());
count = (int)Math.ceil((float)dataPage.getCount()/DataPage.PAGE_SIZE);
dataPage.setTotalPage(count);
for(int i =1;i<=count;i++){
buildIndex.updateDoc(dataPage.queryPage(i));
}
break;
}
case BuildIndex.DOC_TYPE_DELETE:
{
dataPage.setBaseSql(index.getDelete());
count = (int)Math.ceil((float)dataPage.getCount()/DataPage.PAGE_SIZE);
dataPage.setTotalPage(count);
for(int i =1;i<=count;i++){
buildIndex.deleteDoc(dataPage.queryPage(i));
}
break;
}
}

return maxId;
}

public CreateIndexService getIndexService() {
return indexService;
}

public void setIndexService(CreateIndexService indexService) {
this.indexService = indexService;
}
}


2>.//////////////////////////創建索引工具類BuildIndex.java:

package com.lhzq.ibms.lucene.util;

import com.lhzq.ibms.commons.util.Configuration;
import com.lhzq.leap.core.config.CommonConfig;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Version;

import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

/**
* Created with IntelliJ IDEA.
* User: 陳聖林
* Date: 14-5-12
* Time: 下午4:06
* Lucene創建索引工具類
*/
public class BuildIndex
{
/**
* 操作類型
*/
public static final int DOC_TYPE_CREATE = 0;
public static final int DOC_TYPE_ADD = 1;
public static final int DOC_TYPE_UPDATE = 2;
public static final int DOC_TYPE_DELETE = 3;

/**
* 索引寫入器
*/
private IndexWriter indexWriter;

/**
* 操作類型
*/
private int docType;

/**
* 構造方法創建索引寫入器
*
* @param name
*/
public BuildIndex(String name) throws IOException {
// 創建IndexWriter
String indexPath = Configuration.getLuceneIndexDir();
indexWriter = getIndexWriter(indexPath + "/" + name);
}

// 索引寫入器
private IndexWriter getIndexWriter(String indexDir) throws IOException {
// 存儲索引在硬盤中
Directory dir = DirCenter.getDir(indexDir);

// Version操作開始變得非常常見
// 中文分詞器的引入,好像4.7.0對庖丁等第三方分詞器兼容得並不好,可能也是因爲apache對原生的做了一些整合的緣故
Analyzer analyzer = AnalyzerCenter.getAnalyzer();

// 同時引入了IndexWriterConfig對象,封裝了早期版本的一大堆參數
IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_47, analyzer);
IndexWriter writer = new IndexWriter(dir, config);

return writer;
}

// 創建document對象
private Document createDoc(Map<String, Object> record) throws UnsupportedEncodingException {

Document doc = new Document();
Iterator<String> it = record.keySet().iterator();

String key = null;
String value = null;
while (it.hasNext()) {
key = it.next();
value = String.valueOf(record.get(key));
doc.add(new Field(key, value, TextField.TYPE_STORED));
}

return doc;
}

// 添加索引
public void addDoc(List<Map<String, Object>> data) throws IOException {
for (Map<String, Object> record : data) {
Document doc = createDoc(record);
indexWriter.addDocument(doc);
}
}

// 更新索引
public void updateDoc(List<Map<String, Object>> data) throws IOException {
for (Map<String, Object> record : data) {
Document doc = createDoc(record);

Term term = new Term("ID", "" + record.get("ID"));

indexWriter.updateDocument(term, doc);
}
}

// 刪除索引
public void deleteDoc(List<Map<String, Object>> data) throws IOException {
for (Map<String, Object> record : data) {
Term term = new Term("ID", "" + record.get("ID"));

indexWriter.deleteDocuments(term);
}
}

// 關閉
public void close() throws IOException {
if (null != this.indexWriter) {
this.indexWriter.close();
this.indexWriter = null;
}
}

public int getDocType() {
return docType;
}

public void setDocType(int docType) {
this.docType = docType;
}
}


3>.////////////////////////////單例拿到解析器

package com.lhzq.ibms.lucene.util;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.util.Version;

/**
* Created with IntelliJ IDEA.
* User: 陳聖林
* Date: 14-5-15
* Time: 上午10:21
* 單例模式 獲取解析器
*/
public class AnalyzerCenter
{
private static Analyzer analyzer;

private AnalyzerCenter(){}

public static Analyzer getAnalyzer()
{
if(null == analyzer)
{
analyzer = new StandardAnalyzer(Version.LUCENE_47);
}

return analyzer;
}
}


4>.///////////////////////////////打開一個索引目錄工具類
package com.lhzq.ibms.lucene.util;

import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;

import java.io.File;
import java.io.IOException;

/**
* Created with IntelliJ IDEA.
* User: 陳聖林
* Date: 14-5-15
* Time: 上午10:31
* 打開一個目錄
*/
public class DirCenter
{
private DirCenter(){}

public static Directory getDir(String path) throws IOException
{
// 檢查參數
if(null == path)
{
return null;
}

File indexDir = new File(path);

// 如果文件不存在,則創建目錄
if(!indexDir.exists())
{
indexDir.mkdir();
}

// 存儲索引在硬盤中
Directory dir = FSDirectory.open(indexDir);
return dir;
}
}


5>./////////////////////////////一次創建索引太多,會導致內存溢出,需要分頁創建
package com.lhzq.ibms.lucene.util;

import com.htsc.abms.lucene.service.CreateIndexService;
import java.math.BigDecimal;
import java.util.List;
import java.util.Map;

/**
* Created with IntelliJ IDEA.
* User: 陳聖林
* Date: 14-6-3
* Time: 下午4:09
* 包裝分頁查詢數據
*/
public class DataPage
{
/**
* 每頁條數
*/
public static final int PAGE_SIZE = 20;

/**
* 業務操作
*/
private CreateIndexService indexService;

/**
* 原始sql
*/
private String baseSql;

/**
* blob字段
*/
private String blob;

/**
* 參數
*/
private Map param;

/**
* 最大的id
*/
private BigDecimal maxId;

/**
* 總頁數
*/
private Integer totalPage;


/**
* 構造方法設置查詢條件
* @param indexService
* @param blob
* @param param
*/
public DataPage(CreateIndexService indexService,String blob,Map param)
{
this.indexService = indexService;
this.blob = blob;
this.param = param;
}

/**
* 查詢一頁數據
* @param pageNo
* @return
*/
public List<Map<String,Object>> queryPage(int pageNo)
{
String sql = "SELECT * FROM (SELECT A.*,ROWNUM RN FROM ("+this.baseSql+") A WHERE ROWNUM <= "+pageNo * PAGE_SIZE+")"
+" WHERE RN >= "+((pageNo-1) * PAGE_SIZE + 1);

List<Map<String,Object>> data = indexService.queryPageData(sql,this.blob,this.param);

if(pageNo == totalPage)
{
this.maxId = (BigDecimal)data.get(data.size() -1).get("ID");
}

return data;
}

/**
* 查詢總數量
* @return
*/
public Integer getCount()
{
String sql = "SELECT COUNT(*) CNT FROM ("+this.baseSql+") A";
return indexService.getCount(sql,this.param);
}

public BigDecimal getMaxId() {
return maxId;
}

public void setBaseSql(String baseSql) {
this.baseSql = baseSql;
}

public void setTotalPage(Integer totalPage) {
this.totalPage = totalPage;
}
}


6>.///////////////////////////////記錄上一次更新索引的時間和最大ID,方便更新索引
package com.lhzq.ibms.lucene.util;

import com.lhzq.ibms.commons.util.Configuration;
import com.lhzq.leap.core.config.CommonConfig;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.*;

/**
* Created with IntelliJ IDEA.
* User: 陳聖林
* Date: 14-5-13
* Time: 下午1:25
* 更新索引時,修改保存最大Id和更新時間
*/
public class IndexLog
{
/**
* 日誌
*/
private static Logger logger = LoggerFactory.getLogger(IndexLog.class);

/**
* 保存最大的Id和更新索引的時間的文件
*/
private File logFile;

/**
* 設置日誌文件
* @param dir
*/
public IndexLog(String dir) throws IOException
{
String indexPath = Configuration.getLuceneIndexDir()+"/" + dir;;
File fileDir = new File(indexPath);

if(!fileDir.exists()){
fileDir.mkdir();
}
File file = new File(fileDir,dir + ".txt");

if(!file.exists()){
file.createNewFile();
}

logFile = file;
}

/**
* 讀取上一次更新索引的時間和最大ID
* @return
* @throws java.io.IOException
*/
public String readText()
{
BufferedReader br = null;
String content = null;
try
{
br = new BufferedReader(new FileReader(logFile));
content = br.readLine();
}
catch (IOException e)
{
logger.error("讀取最大ID和上次更新索引時間失敗", e);
}
finally
{
try {
if (br != null) {
br.close();
br = null;
}
} catch (IOException e) {
logger.error("讀取最大ID和上次更新索引時,關閉IO失敗", e);
}
}

return content;
}

/**
* 寫入創建或者更新索引日誌
* @param text
* @throws java.io.IOException
*/
public void WriteText(String text)
{
BufferedWriter bw = null;
try {
bw = new BufferedWriter(new FileWriter(logFile));
bw.write(text);
} catch (IOException e) {
logger.error("寫入最大ID和上次更新索引失敗", e);
}
finally
{
try {
if(bw!=null){
bw.close();
bw = null;
}
} catch (IOException e) {
logger.error("寫入最大ID和上次更新索引時,關閉IO失敗", e);
}
}
}
}

***********************************************************3.手動創建索引******************************************
1>.////////////////////////頁面:
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN">
<%@ page contentType="text/html;charset=UTF-8" language="java" %>

<%@include file="/modules/comm/loadingData.jsp"%>
<%@include file="/common/path_header.jsp" %>
<%@include file="/common/jqgrid_header.jsp" %>

<!--dwr-->
<script type="text/javascript" src="<%=path%>/dwr/engine.js"></script>
<script type="text/javascript" src="<%=path%>/dwr/util.js"></script>
<script type="text/javascript" src="<%=path%>/dwr/interface/dwrIndexManage.js"></script>

<html>

<head>
<title>索引維護</title>
<meta http-equiv="content-type" content="text/html; charset=UTF-8">
<style type="text/css">
.indexInfo{
width: 800px;
height: 200px;
border: 2px solid #E5E5E5;
}
</style>
</head>
<body>

<div style="text-align: center">
<ul style="text-align:left;list-style-type:none;">
<li>
<a class="button glow button-rounded button-flat-primary button-tiny" onclick="createIndex();"> 創建索引 </a>   
</li>
<li>
<div class="indexInfo" id="createInfo">創建索引日誌...</div>
</li>
<li style="margin-top: 10px;">
<a class="button glow button-rounded button-flat-primary button-tiny" onclick="updateIndex();"> 更新索引 </a>
</li>
<li>
<div class="indexInfo" id="updateInfo">更新索引日誌...</div>
</li>
</ul>
</div>
</body>
</html>

<script type="text/javascript">

var interval = null;

var time = null;

// 創建索引
function createIndex(){
// 顯示加載
createDiv();

// 創建索引
dwrIndexManage.createIndex();

// 延遲執行
time = setTimeout(function(){
interval = setInterval(showCtResult, "1000");
},60000);
}

// 顯示創建結果
function showCtResult(){
dwrIndexManage.queryCtResult(function(data){
if(null!=data&&data!=''){
clearTimeout(time);
clearInterval(interval)
setValue("createInfo",data);

// 加載完成移出
removeDiv();
}
})
}

// 更新索引
function updateIndex(){
// 顯示加載
createDiv();

dwrIndexManage.updateIndex({
//回調函數
callback: function(data){
setValue("updateInfo",data);

// 加載完成移出
removeDiv();
},
//超時,單位是毫秒,默認爲20分鐘,設置爲0代表關閉超時
timeout: 0,
//超時後調用的處理函數
errorHandler:function(message) { alert(message); }
});
}

// 設置值
function setValue(id,data){
document.getElementById(id).innerHTML="";
document.getElementById(id).innerHTML="<pre>"+data+"</pre>";
}
</script>

2>.////////////////////////////////DWR操作
package com.lhzq.ibms.lucene.dwr;

import com.lhzq.ibms.lucene.job.CreateIndexJob;
import com.lhzq.ibms.lucene.util.CreateLog;
import org.springframework.beans.factory.annotation.Autowired;

/**
* Created with IntelliJ IDEA.
* User: 陳聖林
* Date: 14-6-18
* Time: 下午5:24
* 手動索引的創建和更新
*/
public class DwrIndexManage
{
@Autowired
private CreateIndexJob indexJob;

/**
* 創建索引
* @return
*/
public void createIndex()
{
String logInfo = indexJob.loadIndex();
CreateLog.write(logInfo);
}

/**
* 查詢創建索引結果
* @return
*/
public String queryCtResult()
{
return CreateLog.read().trim();
}

/**
* 更新索引
* @return
*/
public String updateIndex()
{
return indexJob.updateIndex();
}
}


3>.////////////////////////////頁面返回創建操作日誌:
package com.lhzq.ibms.lucene.util;

import com.lhzq.ibms.commons.util.Configuration;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.*;

/**
* Created with IntelliJ IDEA.
* User: 陳聖林
* Date: 14-5-13
* Time: 下午1:25
* 記錄創建和更新索引的操作日誌
*/
public class CreateLog
{
/**
* 日誌
*/
private static Logger logger = LoggerFactory.getLogger(CreateLog.class);

/**
* 創建日誌文件
*/
private static String path = Configuration.getLuceneIndexDir()+"/createLog.txt";

/**
* 創建文件
*/
public static void init(){
try {
File indexDir = new File(Configuration.getLuceneIndexDir());

// 如果文件不存在,則創建目錄
if(!indexDir.exists())
{
indexDir.mkdir();
}

// 創建文件
File createLogFile = new File(path);
if(!createLogFile.exists()){
createLogFile.createNewFile();
}
} catch (IOException e) {
logger.error("創建日誌文件失敗",e);
}
}

/**
* 讀取日誌文件
* @return
*/
public static String read()
{
BufferedReader br = null;
StringBuffer log =new StringBuffer();
try
{
br = new BufferedReader(new FileReader(path));
String line = null;
while((line = br.readLine())!=null)
{
log.append(line).append("\r\n");
}
}
catch (IOException e)
{
logger.error("創建索引讀取日誌異常", e);
}
finally
{
try {
if (br != null) {
br.close();
}
} catch (IOException e) {
logger.error("創建索引讀取日誌,關閉IO失敗", e);
}
}

return log.toString();
}

/**
* 寫入創建或者更新索引日誌
*/
public static void write(String log)
{
BufferedWriter bw = null;
try {
bw = new BufferedWriter(new FileWriter(path));
bw.write(log);
} catch (IOException e) {
logger.error("創建索引寫入日誌錯誤", e);
}
finally
{
try {
if(bw!=null){
bw.close();
}
} catch (IOException e) {
logger.error("創建索引寫入日誌,關閉IO失敗", e);
}
}
}
}


三、查詢索引
1>.///////////////////////////////////頁面
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN">
<%@ page contentType="text/html;charset=UTF-8" language="java" %>
<%@ taglib prefix="struts" uri="/struts-tags" %>

<!--引入path java中的path和js中的path-->
<%@include file="/common/path_header.jsp" %>
<%@include file="/common/jqgrid_header.jsp" %>

<html>
<head>
<title>全文檢索</title>
<script type="text/javascript" src="${path}/script/common/rims.js"></script>
<meta http-equiv="content-type" content="text/html; charset=UTF-8">
</head>
<body style="text-align: center">

<!--查詢條件-->
<form id="riskReferencePoint" method="post">
<div style="text-align: center">
<div style="overflow:auto;zoom:1;padding:10px 0px 5px 0px;">
<ul style="text-align:left;list-style-type:none;">
<li style="float:left;">
檢索信息:
<input type="text" id="KEYWORD" name="keyword" onkeydown="if(event.keyCode==13){ enterSearch();}" size="80" />
   
</li>

<li style="float:left;">
<a href="javascript:search();" class="button glow button-rounded button-flat-primary button-tiny" id="save"> 檢索 </a>
  
</li>
</ul>
</div>
</div>
</form>

<!--檢索規章制度結果集-->
<table id="riskRolesGrid">

</table>
<br>
<!--檢索風險問題結果集-->
<table id="riskProblemGrid">

</table>

</body>


<script type="text/javascript">

//
no_data();

// 首次加載的時候,不到後臺查詢數據
function no_data()
{
var keyword = $("#KEYWORD").val();

if(keyword ==undefined||keyword ==null||keyword=='')
{
return;
}
}

// 檢索規章制度
new AbmsGrid('riskRolesGrid',{
colNames:['id','標題','內容','文檔下載'],
colModel:[
{
name:'ID'
,key:true
,width:55
,hidden:true
}
,{
name:'NAME'
,width:100
}
,{
name:'BODY'
,width:400
,formatter:function(value){
return "<pre>"+trimToSummary(value)+"</pre>";
}
}
,{
width:60
,align:'center'
,formatter:function( value,options,rowData ){
//自定義渲染函數
if(rowData.BODY==undefined||rowData.BODY==null||rowData.BODY==''){
return '--';
}
return '<a href="javascript:uploadRiskRules('+rowData.ID+');" style="color:#fff" class="button glow button-rounded button-flat-primary button-tiny">文檔下載</a>';
}
}
],
postParamNames:['KEYWORD'],
_gridDatasourceClass:'com.htsc.abms.auditrisk.web.RiskRuleDatasource',
showPagerTool:true,
loadDataFlag:false,
caption:"風險規章制度"

});

// 檢索風險問題
new AbmsGrid('riskProblemGrid',{
colNames:['id','風險問題', '審計意見','處罰意見','詳細信息'],// ,'處罰內容'
colModel:[
{
name:'ID'
,key:true
,width:55
,hidden:true
},
{
name:'TITLE'
,width:220
},
{
name:'CONTENTS'
,width:250
},
{
name:'PUNISH'
,width:200
}
// ,{
// name:'CRITERION_CONTENT'
// ,width:200
// }
,{
width:60
,align:'center'
,formatter:function( value,options,rowData ){
//自定義渲染函數
return '<a href="javascript:findRiskProblem('+rowData.ID+');" style="color:#fff" class="button glow button-rounded button-flat-primary button-tiny">詳細信息</a>';
}
}
],
postParamNames:['KEYWORD'],
_gridDatasourceClass:'com.htsc.abms.auditrisk.web.RiskProblemDatasource',
showPagerTool:true,
loadDataFlag:false,
caption:"風險問題"
});

// 檢索
function search(){
$("#riskRolesGrid").trigger("reloadGrid");
$("#riskProblemGrid").trigger("reloadGrid");
}

// 顯示制度詳情
function uploadRiskRules(id)
{
var inputs='<input type="hidden" name="id" value="'+id+'"/>';
jQuery('<form action="/htsc.abms/riskRules/uploadRiskRules.do" method="post">'+inputs+'</form>').appendTo('body').submit().remove();
}

// 顯示詳細信息
function findRiskProblem(id) {
var _url="${path}/risk/viewRiskById.do?riskId="+id;
rims.window.showWindow(_url,900,900,null);
}

// 點擊回車鍵查詢
function enterSearch(){
$("#KEYWORD").blur();
search();
}

// 截取字符串
function trimToSummary(str){
var endLength = 30;
if(null==str||str==''){
return str;
}

if(str.length > endLength){
return str.substring(0,endLength) +'...';
} else{
return str;
}
}
</script>
</html>

2>./////////////////////////////////////後臺數據讀取
package com.htsc.abms.auditrisk.web;

import com.htsc.abms.jqgrid.model.GridData;
import com.htsc.abms.jqgrid.model.GridPostParam;
import com.htsc.abms.jqgrid.util.GridDatasourceInterface;
import com.lhzq.ibms.lucene.util.Searcher;
import com.lhzq.leap.core.utils.AppUtils;
import com.lhzq.leap.core.utils.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Component;

import java.util.List;
import java.util.Map;

/**
* User: 陳聖林
* Date: 14-5-27
* Time: 下午2:19
* 風險問題查選檢索
*/
@Component
public class RiskProblemDatasource implements GridDatasourceInterface {
/**
* 日誌
*/
private static Logger logger = LoggerFactory.getLogger(RiskProblemDatasource.class);

/**
* 索引的字段
*/
private static final String[] INDEX_FIELDS = {"ID", "TITLE", "CONTENTS", "PUNISH", "CRITERION_CONTENT"};

/**
* 根據參數查詢檢索信息
*
* @param gridPostParam
* @return jqgrid數據對象
*/
public GridData getGridData(GridPostParam gridPostParam) {
// 拿到關鍵字參數
String keyword = (String) gridPostParam.getParamMap().get("KEYWORD");
if (StringUtils.isEmpty(keyword)) {
return new GridData();
}

// 那到當前頁
Integer currentPage = gridPostParam.getPage();

// 每頁顯示的行數
Integer pageSize = gridPostParam.getPageSize();

// 全文檢索查詢器
Searcher searcher = null;
List<Map<String, String>> data = null;

// 處理關鍵字
String [] keywords = AppUtils.keywords(keyword);
try {
searcher = new Searcher("riskProblem");
data = searcher.search(keywords, INDEX_FIELDS);
} catch (Exception e) {
logger.error("全文檢索異常", e);
}

if (AppUtils.isBlank(data)) {
return new GridData();
}

// 返回當前對象
GridData gridData = new GridData(pageSize, currentPage, data);

return gridData;
}
}


單個解析詞,是根據單個字查詢的,爲了按詞組查詢,需要做處理

package com.lhzq.leap.core.utils;

import java.io.Serializable;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Collection;
import java.util.GregorianCalendar;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import javax.servlet.http.Cookie;
import javax.servlet.http.HttpServletRequest;

import org.apache.commons.beanutils.BeanUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
* 應用幫助工具
*/
public class AppUtils{
/**
* 加特殊字符做整體分詞
*
* @param keyword
* @return
*/
public static String[] keywords(String keyword) {
String[] keywords = keyword.trim().split("\\s+");

for (int i = 0; i < keywords.length; i++) {
keywords[i] = "\"" + keywords[i] + "\"";
}

return keywords;
}
}

3>.////////////////////////封裝的查詢器
package com.lhzq.ibms.lucene.util;

import com.lhzq.ibms.commons.util.Configuration;
import com.lhzq.leap.core.config.CommonConfig;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;
import org.apache.lucene.search.*;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Version;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

/**
* Created with IntelliJ IDEA.
* User: 陳聖林
* Date: 14-5-12
* Time: 下午5:40
* 全文索引收索工具類
*/
public class Searcher
{
/**
* 最大獲取的匹配文檔數,比如100個總文檔,
* 你的query表達式匹配了50個,但是你傳的maxCount爲5,那就是選最優的前5個
*/
private static final int MAX_COUNT = 1000;

/**
* 查詢器
*/
private IndexSearcher indexSearcher = null;

/**
* 創建索引查詢器
* @param name 索引目錄
* @throws java.io.IOException
*/
public Searcher(String name) throws IOException
{
// 創建索引的位置
String indexPath = Configuration.getLuceneIndexDir() + "/" + name;

// 打開索引目錄
Directory indexDir = DirCenter.getDir(indexPath);

// 讀取器
IndexReader reader = DirectoryReader.open(indexDir);

// 創建索引
indexSearcher = new IndexSearcher(reader);
}


/**
* 根據關鍵字搜索
* @param keywords 關鍵字
* @return
* @throws Exception
*/
public List<Map<String,String>> search(String keywords,String []indexFields) throws Exception
{
// 解析器
Analyzer analyzer = AnalyzerCenter.getAnalyzer();

MultiFieldQueryParser parser = new MultiFieldQueryParser(Version.LUCENE_47,indexFields,analyzer);

// 查詢對象
Query query = parser.parse(keywords);

return search(query);
}


/**
* 根據多個關鍵字搜索
* @param keywords 關鍵字
* @return
* @throws Exception
*/
public List<Map<String,String>> search(String [] keywords,String []indexFields) throws Exception
{
// 解析器
Analyzer analyzer = AnalyzerCenter.getAnalyzer();

MultiFieldQueryParser parser = new MultiFieldQueryParser(Version.LUCENE_47,indexFields,analyzer);

// 多關鍵子查詢
BooleanQuery bq = new BooleanQuery();

// 查詢對象
Query query = null;
for(String keyword : keywords)
{
query=parser.parse(keyword);
// 是表示And關係
bq.add(query, BooleanClause.Occur.MUST);
}

return search(bq);
}

/**
* 根據Query查詢結果集
* @param query
* @return
* @throws Exception
*/
private List<Map<String,String>> search(Query query)throws Exception
{
// 查詢匹配的前50個
ScoreDoc[] hits = indexSearcher.search(query, null, MAX_COUNT).scoreDocs;

// 封裝檢索的數據
List<Map<String,String>> data = new ArrayList<Map<String,String>>();
Map<String,String> record = null;
Document hitDoc = null;
for (int i = 0; i < hits.length; i++) {
hitDoc = indexSearcher.doc(hits[i].doc);
record = getDocsItem(hitDoc);
data.add(record);
}

return data;
}

/**
* 轉換Doc對象爲map數據結構
* @param hitDoc 檢索的doc對象
* @return
* @throws java.io.IOException
*/
private Map<String,String> getDocsItem(Document hitDoc) throws IOException
{
// 文檔的字段
List<IndexableField> indexes = hitDoc.getFields();

// 封裝數據
String name = null;
String value = null;
Map<String,String> record = new HashMap<String, String>();
for(IndexableField index : indexes)
{
name = index.name();
value = index.stringValue();
record.put(name,value);
}

return record;
}
}
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章