Lucene學習之構建簡單通用的搜索查詢接口

在本篇博客中我們來構建一個簡單而通用的搜索查詢接口，在這個接口裏面我們需要實現基本的增、刪、改、查功能，並且做到通用而又使用簡單，可擴展性強。一般在實際應用Lucene過程中，主要兩個作用最爲常見，一個爲文檔庫的搜索查詢（可以擴展爲各種搜索引擎），另一個爲知識問答庫（可以擴展爲類似小黃雞的智能對話機器人）的搜索查詢。接口的類圖如下：

爲了便於大家使用，在此處將全部源碼公開，

DAO的基類LuceneDao，提供常用的增刪改查方法，並且將根據資料生成Document以及查詢結果這兩個擴展點進行抽象，在子類中可以根據不同的資料，進行擴展實現：

package com.hsdl.lucene;

import java.io.File;
import java.io.IOException;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.wltea.analyzer.lucene.IKAnalyzer;

public abstract class LuceneDao {
	private Analyzer analyzer = new IKAnalyzer(true);
	private String indexPath = "D:/work/lucene/tika/index";

	public void add(Stuff stuff) throws Exception {
		createIndex(stuff);
	}

	public void batchAdd(List<Stuff> stuffs) throws Exception {
		createIndexs(stuffs);
	}

	/***
	 * 
	 * 刪除方法
	 * 
	 * */

	public void delete(String fieldName, String fieldVaule) {
		try {
			IndexWriter writer = getIndexWrite();
			Query q = new TermQuery(new Term(fieldName, fieldVaule));
			writer.deleteDocuments(q);// 刪除指定條件的Document
			writer.commit();// 提交
			writer.close();// 關閉
			System.out.println("刪除" + fieldName + "爲" + fieldVaule + "的記錄成功");
		} catch (Exception e) {
			e.printStackTrace();
		}
	}

	/**
	 * 批量刪除
	 * 
	 * @param fieldMap
	 * @throws Exception
	 */
	public void batchDelete(Map<String, String> fieldMap) throws Exception {
		IndexWriter writer = getIndexWrite();
		for (String fieldName : fieldMap.keySet()) {
			Query q = new TermQuery(
					new Term(fieldName, fieldMap.get(fieldName)));
			writer.deleteDocuments(q);// 刪除指定條件的Document
			System.out.println("刪除" + fieldName + "爲" + fieldMap.get(fieldName)
					+ "的記錄成功");
		}
		writer.commit();// 提交
		writer.close();// 關閉
	}

	protected abstract Document getDocument(Stuff stuff) throws Exception;

	/**
	 * 
	 * @param fieldName
	 * @param fieldVaule
	 * @param stuff
	 * @throws Exception
	 */
	public void update(String fieldName, String fieldVaule, Stuff stuff)
			throws Exception {
		try {
			IndexWriter writer = getIndexWrite();
			Document doc = getDocument(stuff);
			writer.updateDocument(new Term(fieldName, fieldVaule), doc);
			writer.commit();
			writer.close();// 關閉
			System.out.println("更新" + fieldName + "爲" + fieldVaule + "的記錄成功");
		} catch (Exception e) {
			throw e;
		}
	}

	public void setAnalyzer(Analyzer analyzer) {
		this.analyzer = analyzer;
	}

	/**
	 * 設置索引文件的目錄
	 * 
	 * @param indexPath
	 */
	public void setIndexPath(String indexPath) {
		this.indexPath = indexPath;
	}

	/**
	 * 創建索引
	 * 
	 * @param analyzer
	 * @param indexPath
	 * @param docPath
	 * @throws Exception
	 */
	protected void createIndex(Stuff stuff) throws Exception {
		IndexWriter iwriter = getIndexWrite();
		indexDoc(iwriter, stuff);
		iwriter.commit();
		iwriter.close();
	}

	protected void indexDoc(IndexWriter iwriter, Stuff stuff) throws Exception {
		Document doc = getDocument(stuff);
		iwriter.addDocument(doc);
	}

	/**
	 * 批量創建索引
	 * 
	 * @param analyzer
	 * @param indexPath
	 * @param docPath
	 * @throws Exception
	 */
	protected void createIndexs(List<Stuff> stuffs) throws Exception {
		IndexWriter iwriter = getIndexWrite();
		for (Stuff stuff : stuffs) {
			indexDoc(iwriter, stuff);
		}
		iwriter.close();
	}

	/**
	 * 獲取IndexWrite實例
	 * 
	 * @param analyzer
	 * @param indexPath
	 * @return
	 * @throws IOException
	 */
	protected IndexWriter getIndexWrite() throws IOException {
		IndexWriter iwriter;
		Directory directory = FSDirectory.open(new File(indexPath));
		// 配置IndexWriterConfig
		IndexWriterConfig iwConfig = new IndexWriterConfig(Version.LUCENE_45,
				analyzer);
		iwConfig.setOpenMode(OpenMode.CREATE_OR_APPEND);
		iwriter = new IndexWriter(directory, iwConfig);
		return iwriter;
	}

	/**
	 * 搜索
	 * 
	 * @param searchField
	 *            搜索域
	 * @param indexPath
	 *            索引目錄
	 * @param topCount
	 *            返回搜索相似度最高的條數
	 * @throws CorruptIndexException
	 * @throws IOException
	 * @throws ParseException
	 */
	public Document[] search(String searchField, String searchKeyStr,
			int topCount) throws CorruptIndexException, IOException,
			ParseException {
		Directory directory = FSDirectory.open(new File(indexPath));
		// 搜索過程**********************************
		// 實例化搜索器
		IndexReader ireader = DirectoryReader.open(directory);
		IndexSearcher isearcher = new IndexSearcher(ireader);

		// 使用QueryParser查詢分析器構造Query對象
		QueryParser qp = new QueryParser(Version.LUCENE_45, searchField,
				analyzer);
		qp.setDefaultOperator(QueryParser.AND_OPERATOR);
		Query query = qp.parse(searchKeyStr);

		// 搜索相似度最高的topCount條記錄
		TopDocs topDocs = isearcher.search(query, topCount);
		// 輸出結果
		Document[] docs=new Document[topDocs.scoreDocs.length];
		for(int i=0;i<docs.length;i++){
			docs[i]=isearcher.doc(topDocs.scoreDocs[i].doc);
		}
		return docs;
	}

	public void displaySearchResult(Document[] docs) {
		System.out.println("開始顯示搜索查詢結果....\n返回查詢條數："+docs.length);
	}

	/**
	 * 爲索引文檔添加附加的數據,一般爲數據庫存儲相關記錄的主鍵,便於在搜索後查詢該文檔其它的信息
	 * 
	 * @param attachData
	 * @param doc
	 */
	protected void addAttacheData(Document doc, Map<String, String> attachData) {
		if (attachData != null) {
			Set<String> keys = attachData.keySet();
			for (String key : keys) {
				doc.add(new StringField(key, attachData.get(key),
						Field.Store.YES));
			}
		}
	}

}

文檔庫資料對象的基類Stuff，我們將資料內容之外的其他數據放入到Map中，做爲附加數據。

package com.hsdl.lucene;

import java.util.Map;
/**
 * 文檔庫資料對象的基類
 * @author alex
 *
 */
public class Stuff {
	private Map<String,String> attacheData;

	public Map<String,String> getAttacheData() {
		return attacheData;
	}

	public void setAttacheData(Map<String,String> attacheData) {
		this.attacheData = attacheData;
	};
	
}

文件資料對象FileStuff，在這個類中有文件路徑以及代表文件內容的域的名字，在構建索引和搜索時使用：

package com.hsdl.lucene;
/**
 * 文件資料
 * @author alex
 *
 */
public class FileStuff extends Stuff{
	private String filePath;
	private String contentFieldName;
	public String getContentFieldName() {
		return contentFieldName;
	}

	public void setContentFieldName(String contentFieldName) {
		this.contentFieldName = contentFieldName;
	}

	public String getFilePath() {
		return filePath;
	}

	public void setFilePath(String filePath) {
		this.filePath = filePath;
	}
}

知識問答資料 AskAnswerStuff：

package com.hsdl.lucene;
/**
 * 知識問答資料
 * @author alex
 *
 */
public class AskAnswerStuff extends Stuff{
	private String ask;
	private String answer;
	private String contentFieldName;
	
	public String getContentFieldName() {
		return contentFieldName;
	}
	public void setContentFieldName(String contentFieldName) {
		this.contentFieldName = contentFieldName;
	}
	
	public String getAsk() {
		return ask;
	}
	public void setAsk(String ask) {
		this.ask = ask;
	}
	public String getAnswer() {
		return answer;
	}
	public void setAnswer(String answer) {
		this.answer = answer;
	}
}

文檔庫訪問之文件對象實現LuceneDaoFileImpl：

package com.hsdl.lucene;

import java.io.File;
import java.io.IOException;
import java.util.Map;

import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.tika.Tika;
/**
 * 文檔庫訪問之文件對象實現
 * @author alex
 *
 */
public class LuceneDaoFileImpl extends LuceneDao{
	private static String contentFieldName = "content";
	private static Tika tika = new Tika();

	protected void indexDoc(IndexWriter iwriter, Stuff stuff) throws Exception {
		FileStuff fileStuff=(FileStuff)stuff;
		File file=new File(fileStuff.getFilePath());
		if(file.isDirectory()){
			indexDocByFileDir(iwriter,new File(fileStuff.getFilePath()),stuff.getAttacheData());
		}else{
			super.indexDoc(iwriter,stuff);
		}
	}
	
	/**
	 * 根據指定存放內容的目錄創建索引
	 * 
	 * @param iwriter
	 * @param file
	 * @throws IOException
	 */
	private void indexDocByFileDir(IndexWriter iwriter, File file,Map<String,String> attachData) throws IOException {
		if (file.canRead()){
			if (file.isDirectory()) {
				String[] files = file.list();
				if (files != null)
					for (int i = 0; i < files.length; i++)
						indexDocByFileDir(iwriter, new File(file, files[i]),attachData);
			} else {
				Document doc = getDocument(file,attachData);
				iwriter.addDocument(doc);
			}
		}
	}
	
	protected Document getDocument(File file,Map<String,String> attachData) throws IOException {
		Document doc = new Document();
		addAttacheData(doc,attachData );
		// 此處添加文件內容時，需要根據tika獲取Reader對象
		doc.add(new TextField(contentFieldName, tika.parse(file)));
		doc.add(new StringField("fileName", file.getName(),
				Field.Store.YES));
		doc.add(new StringField("path", file.getAbsolutePath(),
				Field.Store.YES));
		return doc;
	}
	
	public void displaySearchResult(Document[] docs) {
		super.displaySearchResult(docs);
		for (int i = 0; i < docs.length; i++) {
			System.out.println("內容：" + docs[i].toString());
			System.out.println(docs[i].get("fileName") + "["
					+ docs[i].get("path") + "]");
		}
	}
	@Override
	protected Document getDocument(Stuff stuff) throws IOException {
		FileStuff fileStuff=(FileStuff)stuff;
		File file=new File(fileStuff.getFilePath());
		Document doc = new Document();
		addAttacheData(doc,stuff.getAttacheData() );
		// 此處添加文件內容時，需要根據tika獲取Reader對象
		doc.add(new TextField(contentFieldName, tika.parse(file)));
		doc.add(new StringField("fileName", file.getName(),
				Field.Store.YES));
		doc.add(new StringField("path", file.getAbsolutePath(),
				Field.Store.YES));
		return doc;
	}

}

文檔庫訪問之知識問答實現LuceneDaoAskAnswerImpl：

package com.hsdl.lucene;

import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
/**
   * 文檔庫訪問之知識問答實現
 * @author alex
 *
 */
public class LuceneDaoAskAnswerImpl extends LuceneDao{
	@Override
	protected Document getDocument(Stuff stuff) throws Exception {
		AskAnswerStuff fileStuff=(AskAnswerStuff)stuff;
		Document doc = new Document();
		addAttacheData(doc,stuff.getAttacheData() );
		// 此處添加文件內容時，需要根據tika獲取Reader對象
		doc.add(new TextField("ask",fileStuff.getAsk(),Field.Store.YES));
		doc.add(new StringField("answer", fileStuff.getAnswer(),
				Field.Store.YES));
		return doc;
	}

	public void displaySearchResult(Document[] docs) {
		super.displaySearchResult(docs);
		for (int i = 0; i < docs.length; i++) {
			System.out.println("內容：" + docs[i].toString());
			System.out.println(docs[i].get("ask") + ":["
					+ docs[i].get("answer") + "]");
		}
	}
}

下面我們來編寫兩個測試類，分別測試文件庫的訪問以及知識問答庫：

LuceneDaoFileTest

package com.hsdl.lucene;

import java.util.HashMap;
import java.util.Map;

import org.apache.lucene.document.Document;
/**
 * 測試文件索引與搜索
 * @author alex
 *
 */
public class LuceneDaoFileTest {
	
	public static void main(String[] args) {
		LuceneDao luceneDao=new LuceneDaoFileImpl();
		luceneDao.setIndexPath("D:/work/lucene/filetest/index");
		FileStuff fileStuff=new FileStuff();
		fileStuff.setFilePath("D:/work/lucene/filetest/doc/test.txt");
		Map<String,String> attacheData=new HashMap<String,String>();
		attacheData.put("ID", "001");
		fileStuff.setAttacheData(attacheData);
		fileStuff.setContentFieldName("content");
		try {
			Document[] docs;
			//添加測試
			System.err.println("------------開始添加測試------------");
			luceneDao.add(fileStuff);
			docs=luceneDao.search(fileStuff.getContentFieldName(),"微信收費",10);
			luceneDao.displaySearchResult(docs);
			docs=luceneDao.search(fileStuff.getContentFieldName(),"網站收費",10);
			luceneDao.displaySearchResult(docs);
			
			//刪除測試
			System.err.println("------------開始刪除測試------------");
			luceneDao.delete("ID", "001");
			docs=luceneDao.search(fileStuff.getContentFieldName(),"微信收費",10);
			luceneDao.displaySearchResult(docs);
			
			//更新測試
			fileStuff.setFilePath("D:/work/lucene/filetest/doc/test.xls");
			luceneDao.update("ID", "001",fileStuff);
			System.err.println("------------開始更新測試------------");
			docs=luceneDao.search(fileStuff.getContentFieldName(),"微信收費",10);
			luceneDao.displaySearchResult(docs);
			docs=luceneDao.search(fileStuff.getContentFieldName(),"網站費用",10);
			luceneDao.displaySearchResult(docs);
		} catch (Exception e) {
			e.printStackTrace();
		}
	}
}

LuceneDaoAskAnswerTest

package com.hsdl.lucene;

import java.util.HashMap;
import java.util.Map;

import org.apache.lucene.document.Document;
/**
 * 測試問答索引與搜索
 * @author alex
 *
 */
public class LuceneDaoAskAnswerTest {
	public static void main(String[] args){
		//測試問答知識的索引與搜索
		LuceneDao luceneDao=new LuceneDaoAskAnswerImpl();
		luceneDao.setIndexPath("D:/work/lucene/askanswer/index");
		AskAnswerStuff askAnswerStuff=new AskAnswerStuff();
		askAnswerStuff.setAsk("微信營銷怎麼收費?");
		askAnswerStuff.setAnswer("3000元每年，10年25000");
		Map<String,String> attacheData=new HashMap<String,String>();
		attacheData.put("ID", "001");
		askAnswerStuff.setAttacheData(attacheData);
		try {
			Document[] docs;
			//添加測試
			System.err.println("------------開始添加測試------------");
			luceneDao.add(askAnswerStuff);
			docs=luceneDao.search("ask","微信收費",10);
			luceneDao.displaySearchResult(docs);
			docs=luceneDao.search("ask","網站收費",10);
			luceneDao.displaySearchResult(docs);
			
			//刪除測試
			System.err.println("------------開始刪除測試------------");
			luceneDao.delete("ID", "001");
			docs=luceneDao.search("ask","微信收費",10);
			luceneDao.displaySearchResult(docs);
			
			//更新測試
			askAnswerStuff.setAsk("網站建設怎麼收費?");
			askAnswerStuff.setAnswer("普通企業網站6000，商城網站10000，其他網站價格面議！");
			luceneDao.update("ID", "001",askAnswerStuff);
			System.err.println("------------開始更新測試------------");
			docs=luceneDao.search("ask","微信收費",10);
			luceneDao.displaySearchResult(docs);
			docs=luceneDao.search("ask","網站收費",10);
			luceneDao.displaySearchResult(docs);
		} catch (Exception e) {
			e.printStackTrace();
		}
	}
}

Lucene學習之構建簡單通用的搜索查詢接口

SQL優化-20231016

Android Button快速滑過，ACTION_UP事件不觸發的問題的解決

Lucene學習之Lucene入門暨中文文件搜索問題的解決

經典語錄收集

柔性的工作流管理系統設計

[轉]不平整的路人使人活的更堅強

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結