Lucene5教程

lucene各版本之間實現方法有些變化,此文記錄下lucene5的實現方式。

一、pom文件

       <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-core</artifactId>
            <version>5.3.1</version>
        </dependency>
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-analyzers-common</artifactId>
            <version>5.3.1</version>
        </dependency>
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-queryparser</artifactId>
            <version>5.3.1</version>
        </dependency>
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-analyzers-smartcn</artifactId>
            <version>5.3.1</version>
        </dependency>
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-highlighter</artifactId>
            <version>5.3.1</version>
        </dependency>

二、代碼實現

package com.jthao.lucene.example1;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.cjk.CJKAnalyzer;
import org.apache.lucene.analysis.core.SimpleAnalyzer;
import org.apache.lucene.analysis.core.StopAnalyzer;
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.*;
import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.*;
import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.wltea.analyzer.lucene.IKAnalyzer;

import java.io.IOException;
import java.io.StringReader;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.List;


public class IndexCrud {

    String[] ids = {"1", "2"};

    String[] projectNames = {"北京中諮時代資產管理有限公司100%股權", "臺灣菲格科技發展有限公司100%股權轉讓"};

    String[] projectCodes = {"G32018BJ1000852-0", "G32018FJ1000017-0"};

    String[] zones = {"上海", "北京"};

    String[] tags = {"商務服務業", "批發業"};

    private Directory directory;

    private Analyzer analyzer;

    private IndexWriterConfig config;

    private IndexWriter writer;

    private IndexReader reader;

    public IndexCrud() {
        try {
            //本地存儲
            directory = FSDirectory.open(Paths.get("C:\\home\\iflow\\lucene1"));
            //內存存儲
            //directory = new RAMDirectory();
            //初始化分詞器
            analyzer = new IKAnalyzer();
            //config
            config = new IndexWriterConfig(analyzer);
            //index writer
            writer = new IndexWriter(directory, config);
            //
            createIndex();
            //index reader
            reader = DirectoryReader.open(directory);
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    public static void main(String[] args) {
        IndexCrud indexCrud = new IndexCrud();
        try {
//            indexCrud.deleteAllIndex();
            indexCrud.queryIndex();
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    public static void display(String str, Analyzer a) {
        TokenStream stream = null ;
        try {
            stream = a.tokenStream( "renyi", new StringReader(str)) ;
            PositionIncrementAttribute pia = stream.addAttribute(PositionIncrementAttribute.class ) ;  //保存位置
            OffsetAttribute oa = stream.addAttribute(OffsetAttribute.class ) ; //保存辭與詞之間偏移量
            CharTermAttribute cta = stream.addAttribute(CharTermAttribute.class ) ;//保存響應詞彙
            TypeAttribute ta = stream.addAttribute(TypeAttribute.class ) ; //保存類型
            //在lucene 4 以上  要加入reset 和  end方法
            stream.reset() ;
            while (stream.incrementToken()) {
                System. out.println(pia.getPositionIncrement() + ":[" + cta.toString() + "]:" + oa.startOffset() + "->" + oa.endOffset() + ":" + ta.type());
            }
            stream.end() ;
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    /**
     * 索引更新
     */
    public void updateIndex() {
        try {
            Term term = new Term("id", "2");
            Document doc = new Document();
            doc.add(new StringField("id", ids[1], Field.Store.YES));
            doc.add(new StringField("name", "lsup", Field.Store.YES));
            //更新的時候,會把原來那個索引刪掉,重新生成一個索引
            writer.updateDocument(term, doc);

            writer.commit();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    /**
     * 索引全部刪除
     */
    public void deleteAllIndex() {
        try {
            writer.deleteAll();
            writer.commit();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    /**
     * 索引刪除
     */
    public void deleteIndex() {
        try {
            Term[] terms = new Term[2];
            Term term = new Term("id", "1");
            terms[0] = term;
            term = new Term("id", "3");
            terms[1] = term;
            //將id爲 1和3的索引刪除。
            //也可以傳一個Query數組對象,將Query查找的結果刪除。
            writer.deleteDocuments(terms);
            //deleteDocuments
            writer.commit();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    public IndexSearcher getSearcher() {
        try {
            IndexReader newReader = DirectoryReader.openIfChanged((DirectoryReader) reader);
            if (newReader != null) {
                reader.close();
                reader = newReader;
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
        return new IndexSearcher(reader);
    }

    /**
     * 根據條件查找索引
     */
    public void queryIndex() throws InvalidTokenOffsetsException {
        try {
            //搜索器
            IndexSearcher searcher = getSearcher();
            //查詢哪個字段
            BooleanQuery.Builder builder = new BooleanQuery.Builder();
            //全文檢索
            QueryParser parse = new MultiFieldQueryParser(new String[]{"projectName", "projectCode", "zone", "tag"}, analyzer);
//            QueryParser parse = new QueryParser("zone", analyzer);
            //查詢關鍵字
            Query query = parse.parse("商務服務");
            builder.add(query, BooleanClause.Occur.MUST);
            TopDocs topDocs = searcher.search(query, 1000);

            //碰撞結果
            ScoreDoc[] hits = topDocs.scoreDocs;
            for (int i = 0; i < hits.length; i++) {
                ScoreDoc hit = hits[i];
                Document hitDoc = searcher.doc(hit.doc);
                //結果按照得分來排序。主要由 關鍵字的個數和權值來決定
                System.out.println("(" + hit.doc + "-" + hit.score + ")" + "id:" + hitDoc.get("id") + " projectName:"
                        + hitDoc.get("projectName") + " projectCode:" + hitDoc.get("projectCode") + " zone:" + hitDoc.get("zone") + " tag:" + hitDoc.get("tag"));
            }
        } catch (IOException e) {
            e.printStackTrace();
        } catch (ParseException e) {
            e.printStackTrace();
        }
    }

    /**
     * 讀取索引
     */
    public void readIndex() {
        System.out.println("max num:" + reader.maxDoc());
        System.out.println("index num:" + reader.numDocs());
        //刪除了的索引數
        System.out.println("delete index num:" + reader.numDeletedDocs());
    }

    /**
     * 創建索引
     */
    public void createIndex() {
        try {
            //創建writer
            List<Document> docs = new ArrayList<Document>();
            for (int i = 0; i < ids.length; i++) {
                Document doc = new Document();
                doc.add(new StringField("id", ids[i], Field.Store.YES));
                Field projectName = new TextField("projectName", projectNames[i], Field.Store.YES);
                Field projectCode = new TextField("projectCode", projectCodes[i], Field.Store.YES);
                Field zone = new TextField("zone", zones[i], Field.Store.YES);
                Field tag = new TextField("tag", tags[i], Field.Store.YES);
                doc.add(projectName);
                doc.add(projectCode);
                doc.add(zone);
                doc.add(tag);

                //1.權值越高,查詢結果越靠前。
                tag.setBoost(2.5f);
                projectName.setBoost(2.0f);
                projectCode.setBoost(2.0f);
                zone.setBoost(0.5f);
                //對於內容只索引不存儲
//                doc.add(new TextField("content", contents[i], Field.Store.NO));
                writer.addDocument(doc);
            }
            writer.commit();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章