lucene各版本之間實現方法有些變化,此文記錄下lucene5的實現方式。
一、pom文件
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-core</artifactId>
<version>5.3.1</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-analyzers-common</artifactId>
<version>5.3.1</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-queryparser</artifactId>
<version>5.3.1</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-analyzers-smartcn</artifactId>
<version>5.3.1</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-highlighter</artifactId>
<version>5.3.1</version>
</dependency>
二、代碼實現
package com.jthao.lucene.example1;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.cjk.CJKAnalyzer;
import org.apache.lucene.analysis.core.SimpleAnalyzer;
import org.apache.lucene.analysis.core.StopAnalyzer;
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.*;
import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.*;
import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.wltea.analyzer.lucene.IKAnalyzer;
import java.io.IOException;
import java.io.StringReader;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.List;
public class IndexCrud {
String[] ids = {"1", "2"};
String[] projectNames = {"北京中諮時代資產管理有限公司100%股權", "臺灣菲格科技發展有限公司100%股權轉讓"};
String[] projectCodes = {"G32018BJ1000852-0", "G32018FJ1000017-0"};
String[] zones = {"上海", "北京"};
String[] tags = {"商務服務業", "批發業"};
private Directory directory;
private Analyzer analyzer;
private IndexWriterConfig config;
private IndexWriter writer;
private IndexReader reader;
public IndexCrud() {
try {
//本地存儲
directory = FSDirectory.open(Paths.get("C:\\home\\iflow\\lucene1"));
//內存存儲
//directory = new RAMDirectory();
//初始化分詞器
analyzer = new IKAnalyzer();
//config
config = new IndexWriterConfig(analyzer);
//index writer
writer = new IndexWriter(directory, config);
//
createIndex();
//index reader
reader = DirectoryReader.open(directory);
} catch (IOException e) {
e.printStackTrace();
}
}
public static void main(String[] args) {
IndexCrud indexCrud = new IndexCrud();
try {
// indexCrud.deleteAllIndex();
indexCrud.queryIndex();
} catch (Exception e) {
e.printStackTrace();
}
}
public static void display(String str, Analyzer a) {
TokenStream stream = null ;
try {
stream = a.tokenStream( "renyi", new StringReader(str)) ;
PositionIncrementAttribute pia = stream.addAttribute(PositionIncrementAttribute.class ) ; //保存位置
OffsetAttribute oa = stream.addAttribute(OffsetAttribute.class ) ; //保存辭與詞之間偏移量
CharTermAttribute cta = stream.addAttribute(CharTermAttribute.class ) ;//保存響應詞彙
TypeAttribute ta = stream.addAttribute(TypeAttribute.class ) ; //保存類型
//在lucene 4 以上 要加入reset 和 end方法
stream.reset() ;
while (stream.incrementToken()) {
System. out.println(pia.getPositionIncrement() + ":[" + cta.toString() + "]:" + oa.startOffset() + "->" + oa.endOffset() + ":" + ta.type());
}
stream.end() ;
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* 索引更新
*/
public void updateIndex() {
try {
Term term = new Term("id", "2");
Document doc = new Document();
doc.add(new StringField("id", ids[1], Field.Store.YES));
doc.add(new StringField("name", "lsup", Field.Store.YES));
//更新的時候,會把原來那個索引刪掉,重新生成一個索引
writer.updateDocument(term, doc);
writer.commit();
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* 索引全部刪除
*/
public void deleteAllIndex() {
try {
writer.deleteAll();
writer.commit();
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* 索引刪除
*/
public void deleteIndex() {
try {
Term[] terms = new Term[2];
Term term = new Term("id", "1");
terms[0] = term;
term = new Term("id", "3");
terms[1] = term;
//將id爲 1和3的索引刪除。
//也可以傳一個Query數組對象,將Query查找的結果刪除。
writer.deleteDocuments(terms);
//deleteDocuments
writer.commit();
} catch (IOException e) {
e.printStackTrace();
}
}
public IndexSearcher getSearcher() {
try {
IndexReader newReader = DirectoryReader.openIfChanged((DirectoryReader) reader);
if (newReader != null) {
reader.close();
reader = newReader;
}
} catch (IOException e) {
e.printStackTrace();
}
return new IndexSearcher(reader);
}
/**
* 根據條件查找索引
*/
public void queryIndex() throws InvalidTokenOffsetsException {
try {
//搜索器
IndexSearcher searcher = getSearcher();
//查詢哪個字段
BooleanQuery.Builder builder = new BooleanQuery.Builder();
//全文檢索
QueryParser parse = new MultiFieldQueryParser(new String[]{"projectName", "projectCode", "zone", "tag"}, analyzer);
// QueryParser parse = new QueryParser("zone", analyzer);
//查詢關鍵字
Query query = parse.parse("商務服務");
builder.add(query, BooleanClause.Occur.MUST);
TopDocs topDocs = searcher.search(query, 1000);
//碰撞結果
ScoreDoc[] hits = topDocs.scoreDocs;
for (int i = 0; i < hits.length; i++) {
ScoreDoc hit = hits[i];
Document hitDoc = searcher.doc(hit.doc);
//結果按照得分來排序。主要由 關鍵字的個數和權值來決定
System.out.println("(" + hit.doc + "-" + hit.score + ")" + "id:" + hitDoc.get("id") + " projectName:"
+ hitDoc.get("projectName") + " projectCode:" + hitDoc.get("projectCode") + " zone:" + hitDoc.get("zone") + " tag:" + hitDoc.get("tag"));
}
} catch (IOException e) {
e.printStackTrace();
} catch (ParseException e) {
e.printStackTrace();
}
}
/**
* 讀取索引
*/
public void readIndex() {
System.out.println("max num:" + reader.maxDoc());
System.out.println("index num:" + reader.numDocs());
//刪除了的索引數
System.out.println("delete index num:" + reader.numDeletedDocs());
}
/**
* 創建索引
*/
public void createIndex() {
try {
//創建writer
List<Document> docs = new ArrayList<Document>();
for (int i = 0; i < ids.length; i++) {
Document doc = new Document();
doc.add(new StringField("id", ids[i], Field.Store.YES));
Field projectName = new TextField("projectName", projectNames[i], Field.Store.YES);
Field projectCode = new TextField("projectCode", projectCodes[i], Field.Store.YES);
Field zone = new TextField("zone", zones[i], Field.Store.YES);
Field tag = new TextField("tag", tags[i], Field.Store.YES);
doc.add(projectName);
doc.add(projectCode);
doc.add(zone);
doc.add(tag);
//1.權值越高,查詢結果越靠前。
tag.setBoost(2.5f);
projectName.setBoost(2.0f);
projectCode.setBoost(2.0f);
zone.setBoost(0.5f);
//對於內容只索引不存儲
// doc.add(new TextField("content", contents[i], Field.Store.NO));
writer.addDocument(doc);
}
writer.commit();
} catch (IOException e) {
e.printStackTrace();
}
}
}