Lucene4.10使用教程(三):lucene的增刪改查

萬丈高樓平地起,在看完helloLucene的demo後,我們可以看下Lucene的增刪改查,下面是詳細代碼。

其中:Store.YES表示將索引並且存儲,Store.NO表示索引但不存儲

在Lucene中使用評分來確定文檔的重要度和優先級。評分越高,表示文檔優先級越高,進行排序顯示的時候顯示的位置越靠前。在Lucene4.10中,無法對整個文檔進行評分,不過可以通過對文檔中各個Field的評分來提高整個文檔的評分,評分可以使用doc.setBoost()來設定。


package com.johnny.lucene01.index;

import java.io.File;
import java.io.IOException;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.HashMap;
import java.util.Map;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.IntField;
import org.apache.lucene.document.LongField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Version;

/**
 * @author Johnny
 * @date 2015-01-04
 * @description: 
 * 依賴jar:Lucene-core,Lucene-analysis(使用標準分詞器做測試),Lucene-queryParser
 * 作用:索引操作
 */
public class IndexUtil {
    private static Version Lucene_Version = Version.LUCENE_4_10_2;
    private String[] ids = {"1","2","3","4","5","6"};
    private String[] emails = {"[email protected]","[email protected]","[email protected]","[email protected]","[email protected]","[email protected]"};
    private String[] contents = {
            "welcome to visited the space,I like book",
            "hello boy, I like pingpeng ball",
            "my name is cc I like game",
            "I like football",
            "I like football and I like basketball too",
            "I like movie and swim"
    };
    private Date[] dates = null;
    private int[] attachs = {2,3,1,4,5,5};
    private String[] names = {"zhangsan","lisi","john","jetty","mike","jake"};
    private Directory directory = null;
    private Map<String,Float> scores = new HashMap<String,Float>();
    private DirectoryReader reader = null;
    IndexWriter writer = null;
    
    private void setDates() {
        SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
        try {
            dates = new Date[ids.length];
            dates[0] = sdf.parse("2010-02-19");
            dates[1] = sdf.parse("2012-01-11");
            dates[2] = sdf.parse("2011-09-19");
            dates[3] = sdf.parse("2010-12-22");
            dates[4] = sdf.parse("2012-01-01");
            dates[5] = sdf.parse("2011-05-19");
        } catch (ParseException e) {
            e.printStackTrace();
        }
    }
    //數據初始化
    public IndexUtil(){
        setDates();
        scores.put("itat.org",2.0f);//設定評分
        scores.put("zttc.edu", 1.5f);
        try {
            directory = FSDirectory.open(new File("/Users/ChinaMWorld/Desktop/index/"));
        } catch (IOException e) {
            e.printStackTrace();
        }
        //directory = new RAMDirectory();//創建內存索引
        //index();
         
    }
    /**建立索引**/
    public void index() {
        IndexWriter writer = null;
        try {
            writer = new IndexWriter(directory,new IndexWriterConfig(Lucene_Version, new StandardAnalyzer()));
            //writer.deleteAll();
            Document doc = null;
            for(int i=0;i<ids.length;i++) {
                doc = new Document();
                doc.add(new StringField("id",ids[i],Store.YES));
                doc.add(new StringField("email", emails[i],Store.YES));
                doc.add(new TextField("content", contents[i], Store.NO));
                doc.add(new StringField("name",names[i], Store.YES));
                //存儲數字
                doc.add(new IntField("attach",attachs[i], Store.YES));
                //存儲日期
                doc.add(new LongField("date", dates[i].getTime(), Store.YES));
                
                String et = emails[i].substring(emails[i].lastIndexOf("@")+1);
                System.out.println(et);
                /**
                 * 在Lucene4.x中,只能給域加權,不能給文檔加權,如果要提高文檔的加權,需要給
                 * 文檔的每個域進行加權
                 * StringField field = new StringField("newScore", "test", Store.NO);
                 * field.setBoost(2.0f);//設置評分
                 * **/
                
                writer.addDocument(doc);
            }
        } catch (IOException e) {
            e.printStackTrace();
        } finally{
            try{
                if(writer!=null) writer.close();
            }catch(Exception ex){
                ex.printStackTrace();
            }
        }
    }
    
    
    /***查詢**/
    public void query() {
        try {
            IndexReader reader = DirectoryReader.open(directory);;
            //通過reader可以有效的獲取到文檔的數量
            System.out.println("numDocs:"+reader.numDocs());
            System.out.println("maxDocs:"+reader.maxDoc());
            System.out.println("deleteDocs:"+reader.numDeletedDocs());
            reader.close();
        } catch (CorruptIndexException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
    /**刪除**/
    public void delete(){
        IndexWriter writer =null;
        try{
            writer = new IndexWriter(directory, new IndexWriterConfig(Lucene_Version, new StandardAnalyzer()));
            //參數是一個選項,可以是一個Query,也可以是一個term,term是一個精確查找的值
            //此時刪除的文檔並不會被完全刪除,而是存儲在一個回收站中的,可以恢復
            writer.deleteDocuments(new Term("id", "1"));
            writer.commit();
        }catch(Exception e){
            e.printStackTrace();
        }finally{
            try {
                writer.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }
    /**索引合併/優化**/
    public void merge(){
        //會將索引合併爲2段,這兩段中的被刪除的數據會被清空
        //特別注意:此處Lucene在3.5之後不建議使用,因爲會消耗大量的開銷,
        //Lucene會根據情況自動處理的
        //將多份索引合併可以使用writer.addIndexes(d1,d2);//傳入各自的Diretory或者IndexReader進行合併 
        IndexWriter writer =null;
        try{
            writer = new IndexWriter(directory, new IndexWriterConfig(Lucene_Version, new StandardAnalyzer()));
            writer.forceMerge(2);
        }catch(Exception e){
            e.printStackTrace();
        }finally{
            try {
                writer.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }
    /** 強制刪除 **/
    public void forceDelete(){
        IndexWriter writer =null;
        try{
            writer = new IndexWriter(directory, new IndexWriterConfig(Lucene_Version, new StandardAnalyzer()));
            writer.forceMergeDeletes();
        }catch(Exception e){
            e.printStackTrace();
        }finally{
            try {
                writer.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }
    /**修改**/
    public void update(){
        IndexWriter writer =null;
        try{
            writer = new IndexWriter(directory, new IndexWriterConfig(Lucene_Version, new StandardAnalyzer()));
            Document doc = new Document();
            /*
             * Lucene並沒有提供更新,這裏的更新操作其實是如下兩個操作的合集
             * 先刪除之後再添加
             */
            doc.add(new StringField("id", "21", Store.YES));
            doc.add(new TextField("email", "aa.bb@s", Store.YES));
            doc.add(new TextField("content", "update content like", Store.NO));
            doc.add(new StringField("name", "jackson", Store.YES));
            writer.updateDocument(new Term("id","1"), doc);
        }catch(Exception e){
            e.printStackTrace();
        }finally{
            try {
                writer.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }
    /**實現近實時查詢,不關閉reader,但是Index有變化時,重新獲取reader **/
    public IndexSearcher getSearcher() {
        try {
            if(reader==null) {
                reader = DirectoryReader.open(directory);
            } else {
                DirectoryReader tr = DirectoryReader.openIfChanged(reader) ;
                if(tr!=null) {
                    reader.close();
                    reader = tr;
                }
            }
            return new IndexSearcher(reader);
        } catch (CorruptIndexException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }
        return null;
        
    }
    
    /** 查詢 **/
    public void search(){
        try {
            IndexSearcher search = getSearcher();
            TermQuery query = new TermQuery(new Term("content","like"));
            TopDocs tds = search.search(query, 10);
            for(ScoreDoc sd:tds.scoreDocs){
                Document doc = search.doc(sd.doc);
                System.out.println(doc.get("id")+"---->"+
                        doc.get("name")+"["+doc.get("email")+"]-->"+doc.get("id")+","+
                        doc.get("attach")+","+doc.get("date")+"," +doc.getValues("email")[0]);
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
        
    }
}


發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章