Lucene二(域選項、文檔基本信息、索引的增刪改查)

  先來看看域選項,域選項分爲域存儲選項和域索引選項,該選項是在爲Document添加Field的時候,對該Field的描述。域存儲選項即Field.Store.*   *等於YES時表示會把這個域中的內容完全存儲到索引文件中,方便進行還原;*等於NO表示這個域中的內容不存儲到索引文件中,但是不代表不能進行索引,存儲和索引是兩個概念。域索引選項即Field.Index.*     *等於NO,表示不進行索引;*等於ANALYZED表示需要進行索引和分詞,適用於標題,內容等;*等於NOT_ANALYZED表示要進行索引,但不進行分詞,如身份證、姓名、id等,適用於精確搜索;*等於ANALYZED_NOT_NORMS表示進行索引和分詞但不存儲norms信息,norms信息包含了創建索引的時間和權值等信息;*等於NOT_ANALYZED_NOT_NORMS表示要索引,但既不進行分詞也不存儲norms信息

  最佳實踐:

Index Store 常見使用場景
NOT_ANALYZED_NOT_NORMS YES 標識符(主鍵、文件名),電話,身份證,姓名,日期等
ANALYZED YES 文檔標題和摘要
ANALYZED NO 文檔正文
NO YES 文檔類型,數據庫主鍵(不進行索引)
NOT_ANALYZED NO 隱藏關鍵字







以下是索引的操作代碼:

package cn.liuys.lucene.index;

import java.io.File;
import java.io.IOException;
import java.util.Date;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.util.Version;


public class IndexUtil {
private String[] ids = {"1","2","3","4","5","6"};
private String[] emails = {"[email protected]","[email protected]","[email protected]","[email protected]","[email protected]","[email protected]"};
private String[] contents = {
"welcome to visited the space,I like book",
"hello boy, I like pingpeng ball",
"my name is cc I like game",
"I like football",
"I like football and I like basketball too",
"I like movie and swim"
};
private Date[] dates = null;
private int[] attachs = {2,3,1,4,5,5};
private String[] names = {"zhangsan","lisi","john","jetty","mike","jake"};
private Directory directory = null;


public IndexUtil() {
try {
directory = FSDirectory.open(new File("F:\\stady\\JAVA\\other\\Lucene\\test\\index02"));
} catch (IOException e) {
e.printStackTrace();
}
}

/**
* 建立索引
*/
public void index(){
IndexWriter writer = null;
try {
writer = new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35)));
//創建前先刪除索引
writer.deleteAll();
Document doc = null;
for(int i = 0; i < ids.length; i++){
doc = new Document();
doc.add(new Field("id", ids[i], Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS));
doc.add(new Field("email",emails[i],Field.Store.YES,Field.Index.NOT_ANALYZED));
doc.add(new Field("content",contents[i],Field.Store.NO,Field.Index.ANALYZED));
doc.add(new Field("name",names[i],Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS));
writer.addDocument(doc);
}
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (LockObtainFailedException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally{
try {
if(writer != null) writer.close();
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
}

/**
* 根據IndexReader獲取文檔基本信息
*/
public void query(){
IndexReader reader = null;
try {
reader = IndexReader.open(directory);
System.out.println("numDoc:"+reader.numDocs());
System.out.println("maxDoc:"+reader.maxDoc());
System.out.println("deleteDoc:"+reader.numDeletedDocs());
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally{
try {
if(reader != null) reader.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}

/**
* 刪除索引,類似windows的刪除,刪除的文件存放在了回收站中並沒有徹底刪除
*/
public void delete(){
IndexWriter writer = null;
try {
writer = new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35)));
//該方法的參數可以是一個query,也可以是一個term,term是一個精確查找的值
writer.deleteDocuments(new Term("id", "1"));
//刪除之後,當我們運行query方法會發現numDoc爲5,maxDoc爲6,deleteDoc爲1
//並且索引文件中多了一個.del結尾的文件,證明該刪除確實不是完全刪除,可以恢復也可以完全刪除
} catch (IOException e) {
e.printStackTrace();
} finally{
try {
if(writer != null) writer.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}


/**
* 恢復刪除(回收站)的內容
*/
public void unDelete(){
IndexReader reader = null;
try {
//reader是用來讀取的,所以默認readOnly爲true,改爲false就不會拋出異常了
reader = IndexReader.open(directory,false);
reader.undeleteAll();
} catch (IOException e) {
e.printStackTrace();
} finally{
try {
if(reader != null) reader.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}

/**
* 強制刪除,會清空回收站內容
*/
public void forceDelete(){
IndexWriter writer = null;
try {
writer = new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35)));
writer.forceMergeDeletes();
} catch (IOException e) {
e.printStackTrace();
} finally{
try {
if(writer != null) writer.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}

/**
* 強制合併重複索引段,不建議使用,消耗性能。Lucene會自動進行索引的維護
*/
public void forceMerge(){
IndexWriter writer = null;
try {
writer = new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35)));
//將索引強制合併爲2段
writer.forceMerge(2);
} catch (IOException e) {
e.printStackTrace();
} finally{
try {
if(writer != null) writer.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}

/**
* 更新索引
*/
public void update(){
IndexWriter writer = null;
try {
writer = new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35)));
//lucene並沒有提供更新操作,更新其實就是先刪除後添加
Document doc = new Document();
doc.add(new Field("id", "1001", Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS));
doc.add(new Field("email",emails[0],Field.Store.YES,Field.Index.NOT_ANALYZED));
doc.add(new Field("content",contents[0],Field.Store.NO,Field.Index.ANALYZED));
doc.add(new Field("name",names[0],Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS));
//將id爲1的項更新
writer.updateDocument(new Term("id", "1"), doc);
} catch (IOException e) {
e.printStackTrace();
} finally{
try {
if(writer != null) writer.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}

發佈了30 篇原創文章 · 獲贊 2 · 訪問量 1萬+
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章