Lucene 示例代碼

package cn.edu360.lucene;

import org.apache.lucene.document.*;
import org.apache.lucene.document.Field.Store;

public class Article {

    private Long id;

    private String title;

    private String content;

    private String author;

    private String url;

    public Article(){}

    public Article(Long id, String title, String content, String author,
            String url) {
        super();
        this.id = id;
        this.title = title;
        this.content = content;
        this.author = author;
        this.url = url;
    }

    public Long getId() {
        return id;
    }

    public void setId(Long id) {
        this.id = id;
    }

    public String getTitle() {
        return title;
    }

    public void setTitle(String title) {
        this.title = title;
    }

    public String getContent() {
        return content;
    }

    public void setContent(String content) {
        this.content = content;
    }

    public String getAuthor() {
        return author;
    }

    public void setAuthor(String author) {
        this.author = author;
    }

    public String getUrl() {
        return url;
    }

    public void setUrl(String url) {
        this.url = url;
    }

    public Document toDocument(){
        //Lucene存儲的格式(Map裝的k,v)
        Document doc = new Document();
        //向文檔中添加一個long類型的屬性,建立索引
        doc.add(new LongPoint("id", id));
        //在文檔中存儲
        doc.add(new StoredField("id", id));

        //設置一個文本類型,會對內容進行分詞,建立索引,並將內容在文檔中存儲
        doc.add(new TextField("title", title, Store.YES));
        //設置一個文本類型,會對內容進行分詞,建立索引,存在文檔中存儲 / No代表不存儲
        doc.add(new TextField("content", content, Store.YES));

        //StringField,不分詞,建立索引,文檔中存儲
        doc.add(new StringField("author", author, Store.YES));

        //不分詞,不建立索引,在文檔中存儲,
        doc.add(new StoredField("url", url));
        return doc;
    }

    public static Article parseArticle(Document doc){
        Long id = Long.parseLong(doc.get("id"));
        String title = doc.get("title");
        String content = doc.get("content");
        String author = doc.get("author");
        String url = doc.get("url");
        Article article = new Article(id, title, content, author, url);
        return article;
    }

    @Override
    public String toString() {
        return "id : " + id + " , title : " + title + " , content : " + content + " , author : " + author + " , url : " + url;
    }


}
package cn.edu360.lucene;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.LongPoint;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.*;
import org.apache.lucene.store.FSDirectory;
import org.junit.Test;
import org.wltea.analyzer.lucene.IKAnalyzer;

import java.io.IOException;
import java.nio.file.Paths;

/**
 * Created by zx on 2017/9/12.
 * <p>
 * Lucene Index ToolBox : https://github.com/DmitryKey/luke/releases
 * <p>
 * https://segmentfault.com/a/1190000010367206
 */
public class HelloWorld {


    /**
     * 往用lucene寫入數據
     * @throws IOException
     */
    @Test
    public void testCreate() throws IOException {
        Article article = new Article();
        article.setId(108L);
        article.setAuthor("老王");
        article.setTitle("學習大數據");
        article.setContent("學數據,迎娶丁老師!");
        article.setUrl("http://www.edu360.cn/a10011");

        String indexPath = "/Users/zx/Documents/dev/lucene/index";
        FSDirectory fsDirectory = FSDirectory.open(Paths.get(indexPath));
        //創建一個標準分詞器,一個字分一次
        //Analyzer analyzer = new StandardAnalyzer();
        Analyzer analyzer = new IKAnalyzer(true);
        //寫入索引的配置,設置了分詞器
        IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer);
        //指定了寫入數據目錄和配置
        IndexWriter indexWriter = new IndexWriter(fsDirectory, indexWriterConfig);
        //創建一個文檔對象
        Document document = article.toDocument();
        //通過IndexWriter寫入
        indexWriter.addDocument(document);
        indexWriter.close();
    }

    @Test
    public void testSearch() throws IOException, ParseException {

        String indexPath = "/Users/zx/Documents/dev/lucene/index";
        Analyzer analyzer = new IKAnalyzer(true);
        //Analyzer analyzer = new IKAnalyzer(true);
        DirectoryReader directoryReader = DirectoryReader.open(FSDirectory.open(Paths.get(indexPath)));
        //索引查詢器
        IndexSearcher indexSearcher = new IndexSearcher(directoryReader);

        String queryStr = "數據";
        //創建一個查詢條件解析器
        QueryParser parser = new QueryParser("content", analyzer);
        //對查詢條件進行解析
        Query query = parser.parse(queryStr);

         //TermQuery將查詢條件當成是一個固定的詞
        //Query query = new TermQuery(new Term("url", "http://www.edu360.cn/a10010"));
        //在【索引】中進行查找
        TopDocs topDocs = indexSearcher.search(query, 10);

        //獲取到查找到的文文檔ID和得分
        ScoreDoc[] scoreDocs = topDocs.scoreDocs;
        for (ScoreDoc scoreDoc : scoreDocs) {
            //從索引中查詢到文檔的ID,
            int doc = scoreDoc.doc;
            //在根據ID到文檔中查找文檔內容
            Document document = indexSearcher.doc(doc);
            //將文檔轉換成對應的實體類
            Article article = Article.parseArticle(document);
            System.out.println(article);
        }

        directoryReader.close();
    }

    @Test
    public void testDelete() throws IOException, ParseException {

        String indexPath = "/Users/zx/Documents/dev/lucene/index";
        Analyzer analyzer = new IKAnalyzer(true);
        FSDirectory fsDirectory = FSDirectory.open(Paths.get(indexPath));
        IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer);
        IndexWriter indexWriter = new IndexWriter(fsDirectory, indexWriterConfig);

        //Term詞條查找,內容必須完全匹配,不分詞
        //indexWriter.deleteDocuments(new Term("content", "學好"));

        //QueryParser parser = new QueryParser("title", analyzer);
        //Query query = parser.parse("大數據老師");

        //LongPoint是建立索引的
        //Query query = LongPoint.newRangeQuery("id", 99L, 120L);
        Query query = LongPoint.newExactQuery("id", 105L);

        indexWriter.deleteDocuments(query);

        indexWriter.commit();
        indexWriter.close();
    }

    /**
     * lucene的update比較特殊,update的代價太高,先刪除,然後在插入
     * @throws IOException
     * @throws ParseException
     */
    @Test
    public void testUpdate() throws IOException, ParseException {

        String indexPath = "/Users/zx/Documents/dev/lucene/index";
        StandardAnalyzer analyzer = new StandardAnalyzer();
        FSDirectory fsDirectory = FSDirectory.open(Paths.get(indexPath));
        IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer);
        IndexWriter indexWriter = new IndexWriter(fsDirectory, indexWriterConfig);


        Article article = new Article();
        article.setId(106L);
        article.setAuthor("老王");
        article.setTitle("學好大數據,要找趙老師");
        article.setContent("迎娶白富美,走上人生巔峯!!!");
        article.setUrl("http://www.edu360.cn/a111");
        Document document = article.toDocument();

        indexWriter.updateDocument(new Term("author", "老王"), document);

        indexWriter.commit();
        indexWriter.close();
    }

    /**
     * 可以從多個字段中查找
     * @throws IOException
     * @throws ParseException
     */
    @Test
    public void testMultiField() throws IOException, ParseException {

        String indexPath = "/Users/zx/Documents/dev/lucene/index";
        Analyzer analyzer = new IKAnalyzer(true);
        DirectoryReader directoryReader = DirectoryReader.open(FSDirectory.open(Paths.get(indexPath)));
        IndexSearcher indexSearcher = new IndexSearcher(directoryReader);

        String[] fields = {"title", "content"};
        //多字段的查詢轉換器
        MultiFieldQueryParser queryParser = new MultiFieldQueryParser(fields, analyzer);
        Query query = queryParser.parse("老師");

        TopDocs topDocs = indexSearcher.search(query, 10);
        ScoreDoc[] scoreDocs = topDocs.scoreDocs;
        for (ScoreDoc scoreDoc : scoreDocs) {
            int doc = scoreDoc.doc;
            Document document = indexSearcher.doc(doc);
            Article article = Article.parseArticle(document);
            System.out.println(article);
        }

        directoryReader.close();
    }

    /**
     * 查找全部的數據
     * @throws IOException
     * @throws ParseException
     */
    @Test
    public void testMatchAll() throws IOException, ParseException {

        String indexPath = "/Users/zx/Documents/dev/lucene/index";
        DirectoryReader directoryReader = DirectoryReader.open(FSDirectory.open(Paths.get(indexPath)));
        IndexSearcher indexSearcher = new IndexSearcher(directoryReader);

        Query query = new MatchAllDocsQuery();

        TopDocs topDocs = indexSearcher.search(query, 10);
        ScoreDoc[] scoreDocs = topDocs.scoreDocs;
        for (ScoreDoc scoreDoc : scoreDocs) {
            int doc = scoreDoc.doc;
            Document document = indexSearcher.doc(doc);
            Article article = Article.parseArticle(document);
            System.out.println(article);
        }

        directoryReader.close();
    }

    /**
     * 布爾查詢,可以組合多個查詢條件
     * @throws Exception
     */
    @Test
    public void testBooleanQuery() throws Exception {
        String indexPath = "/Users/zx/Documents/dev/lucene/index";
        DirectoryReader directoryReader = DirectoryReader.open(FSDirectory.open(Paths.get(indexPath)));
        IndexSearcher indexSearcher = new IndexSearcher(directoryReader);

        Query query1 = new TermQuery(new Term("title", "老師"));
        Query query2 = new TermQuery(new Term("content", "丁"));
        BooleanClause bc1 = new BooleanClause(query1, BooleanClause.Occur.MUST);
        BooleanClause bc2 = new BooleanClause(query2, BooleanClause.Occur.MUST_NOT);
        BooleanQuery boolQuery = new BooleanQuery.Builder().add(bc1).add(bc2).build();
        System.out.println(boolQuery);

        TopDocs topDocs = indexSearcher.search(boolQuery, 10);
        ScoreDoc[] scoreDocs = topDocs.scoreDocs;
        for (ScoreDoc scoreDoc : scoreDocs) {
            int doc = scoreDoc.doc;
            Document document = indexSearcher.doc(doc);
            Article article = Article.parseArticle(document);
            System.out.println(article);
        }

        directoryReader.close();
    }

    @Test
    public void testQueryParser() throws Exception {
        String indexPath = "/Users/zx/Documents/dev/lucene/index";
        DirectoryReader directoryReader = DirectoryReader.open(FSDirectory.open(Paths.get(indexPath)));
        IndexSearcher indexSearcher = new IndexSearcher(directoryReader);

        //創建一個QueryParser對象。參數1:默認搜索域 參數2:分析器對象。
        QueryParser queryParser = new QueryParser("title", new IKAnalyzer(true));

        //Query query = queryParser.parse("數據");
        Query query = queryParser.parse("title:學好 OR title:學習");
        System.out.println(query);

        TopDocs topDocs = indexSearcher.search(query, 10);
        ScoreDoc[] scoreDocs = topDocs.scoreDocs;
        for (ScoreDoc scoreDoc : scoreDocs) {
            int doc = scoreDoc.doc;
            Document document = indexSearcher.doc(doc);
            Article article = Article.parseArticle(document);
            System.out.println(article);
        }

        directoryReader.close();
    }


    @Test
    public void testRangeQuery() throws Exception {
        String indexPath = "/Users/zx/Documents/dev/lucene/index";
        DirectoryReader directoryReader = DirectoryReader.open(FSDirectory.open(Paths.get(indexPath)));
        IndexSearcher indexSearcher = new IndexSearcher(directoryReader);


        Query query = LongPoint.newRangeQuery("id", 107L, 108L);

        System.out.println(query);

        TopDocs topDocs = indexSearcher.search(query, 10);
        ScoreDoc[] scoreDocs = topDocs.scoreDocs;
        for (ScoreDoc scoreDoc : scoreDocs) {
            int doc = scoreDoc.doc;
            Document document = indexSearcher.doc(doc);
            Article article = Article.parseArticle(document);
            System.out.println(article);
        }

        directoryReader.close();
    }
}
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章