Lucene初學Demo

初次使用Lucene先從最簡單的實例入手。

不跟你多BB，直接上代碼：

這個Demo是使用Maven構建的。

創建Lucene索引的Indexer類：

package com.java.lucene;

import java.io.File;
import java.io.FileReader;
import java.nio.file.Paths;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;

/**
 * 創建Lucene索引的類
 * @author Administrator
 *
 */
public class Indexer {
	// 寫索引實例
	private IndexWriter writer; 
	
	/**
	 * 構造方法 
	 * 實例化IndexWriter
	 * @param indexDir
	 * @throws Exception
	 */
	public Indexer(String indexDir)throws Exception{
		Directory dir=FSDirectory.open(Paths.get(indexDir));
		// 標準分詞器
		Analyzer analyzer=new StandardAnalyzer(); 
		IndexWriterConfig iwc=new IndexWriterConfig(analyzer);
		writer=new IndexWriter(dir, iwc);
	}
	
	/**
	 * 關閉寫索引
	 * 也需要釋放資源
	 * @throws Exception
	 */
	public void close()throws Exception{
		writer.close();
	}
	
	/**
	 * 索引指定目錄的所有文件
	 * @param dataDir  需要進行索引的目錄
	 * @throws Exception
	 */
	public int index(String dataDir)throws Exception{
		//遍歷索引目錄下的所有文件
		File []files=new File(dataDir).listFiles();
		for(File f:files){
			indexFile(f);
		}
		//返回索引的文件數量
		return writer.numDocs();
	}

	/**
	 * 索引指定文件
	 * @param f
	 */
	private void indexFile(File f) throws Exception{
		System.out.println("索引文件："+f.getCanonicalPath());
		//這裏有一個概念：索引的時候，它會像數據裏行和列一樣
		//一行、一行，這裏一行就是一個Document，一個文檔，文檔裏又有列
		Document doc=getDocument(f);
		writer.addDocument(doc);
	}

	/**
	 * 獲取文檔，文檔裏再設置每個字段
	 * @param f
	 */
	private Document getDocument(File f)throws Exception {
		Document doc=new Document();
		doc.add(new TextField("contents",new FileReader(f)));
		doc.add(new TextField("fileName", f.getName(),Field.Store.YES));
		//fullPath 完整路徑
		doc.add(new TextField("fullPath",f.getCanonicalPath(),Field.Store.YES));
		
		return doc;
	}
	/**
	 * 測試創建索引
	 * @param args
	 */
	public static void main(String[] args) {
		//索引輸出目錄
		String indexDir="D:\\lucene";
		//讀取數據的路徑
		String dataDir="D:\\lucene\\data";
		Indexer indexer=null;
		int numIndexed=0;
		long start=System.currentTimeMillis();
		try {
			indexer = new Indexer(indexDir);
			numIndexed=indexer.index(dataDir);
		} catch (Exception e) {
			e.printStackTrace();
		}finally{
			try {
				indexer.close();
			} catch (Exception e) {
				e.printStackTrace();
			}
		}
		long end=System.currentTimeMillis();
		System.out.println("創建索引："+numIndexed+" 個文件 花費了"+(end-start)+" 毫秒");
	}
}

運行結果如下：

索引文件：D:\lucene\data\CHANGES.txt
索引文件：D:\lucene\data\JRE_VERSION_MIGRATION.txt
索引文件：D:\lucene\data\LICENSE.txt
索引文件：D:\lucene\data\MIGRATE.txt
索引文件：D:\lucene\data\NOTICE.txt
索引文件：D:\lucene\data\README.txt
索引文件：D:\lucene\data\SYSTEM_REQUIREMENTS.txt
創建索引：7 個文件 花費了2036 毫秒

測試查詢Lucene的Searcher類：

package com.java.lucene;
import java.nio.file.Paths;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
/**
 * 測試查詢Lucene 索引的類
 * @author Administrator
 *
 */
public class Searcher {

	public static void search(String indexDir,String q)throws Exception{
		Directory dir=FSDirectory.open(Paths.get(indexDir));
		//創建索引讀取器
		IndexReader reader=DirectoryReader.open(dir);
		//創建索引查詢器
		IndexSearcher is=new IndexSearcher(reader);
		// 標準分詞器
		Analyzer analyzer=new StandardAnalyzer(); 
		//開始查詢解析
		QueryParser parser=new QueryParser("contents", analyzer);
		Query query=parser.parse(q);
		
		long start=System.currentTimeMillis();
		TopDocs hits=is.search(query, 10);
		long end=System.currentTimeMillis();
		System.out.println("匹配 "+q+" ，總共花費"+(end-start)+"毫秒"+"查詢到"+hits.totalHits+"個記錄");
		for(ScoreDoc scoreDoc:hits.scoreDocs){
			Document doc=is.doc(scoreDoc.doc);
			System.out.println(doc.get("fullPath"));
		}
		reader.close();
	}
	
	public static void main(String[] args) {
		//索引存放路徑
		String indexDir="D:\\lucene";
		//查詢字段
		String q="Zygmunt Saloni";
		try {
			search(indexDir,q);
		} catch (Exception e) {
			e.printStackTrace();
		}
	}
}

運行結果如下：

匹配 Zygmunt Saloni ，總共花費32毫秒查詢到1個記錄
D:\lucene\data\LICENSE.txt

代碼中出現的讀取的數據文件，我和源碼一起打包上傳了：

下載地址：https://pan.baidu.com/s/1dEG1z4t （我百度網盤分享的如果失效可以扣我。）

SpringCloud Zuul API路由網關服務簡介

SpringCloud Zuul 路由映射規則配置

SpringCloud Zuul 請求過濾配置

SpringCloud Zuul 路由配置

SpringCloud Hystrix集羣監控Turbine

Mac下配置sublime實現LaTeX

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結