Lucene學習之創建索引

一、實現步驟

第一步:創建一個maven工程。

第二步:創建一個indexwriter對象。

  • 指定索引庫的存放位置Directory對象
  • 指定一個分析器,對文檔內容進行分析。

第二步:創建document對象。

第三步:創建field對象,將field添加到document對象中。

第四步:使用indexwriter對象將document對象寫入索引庫,此過程進行索引創建。並將索引和document對象寫入索引庫。

第五步:關閉IndexWriter對象。

二、pom文件

我這裏用的是當前最新版本,具體maven文件如下:

<dependencies>
    <dependency>
      <groupId>junit</groupId>
      <artifactId>junit</artifactId>
      <version>4.11</version>
      <scope>test</scope>
    </dependency>
    <!-- https://mvnrepository.com/artifact/org.apache.lucene/lucene-core -->
    <dependency>
      <groupId>org.apache.lucene</groupId>
      <artifactId>lucene-core</artifactId>
      <version>7.5.0</version>
    </dependency>
    <!-- https://mvnrepository.com/artifact/org.apache.lucene/lucene-analyzers-common -->
    <dependency>
      <groupId>org.apache.lucene</groupId>
      <artifactId>lucene-analyzers-common</artifactId>
      <version>7.5.0</version>
    </dependency>

    <!-- https://mvnrepository.om/artifact/org.apache.lucene/lucene-queryparser -->
    <dependency>
      <groupId>org.apache.lucene</groupId>
      <artifactId>lucene-queryparser</artifactId>
      <version>7.5.0</version>
    </dependency>

    <!-- https://mvnrepository.com/artifact/commons-io/commons-io -->
    <dependency>
      <groupId>commons-io</groupId>
      <artifactId>commons-io</artifactId>
      <version>2.6</version>
    </dependency>
    <dependency>
      <groupId>junit</groupId>
      <artifactId>junit</artifactId>
      <version>4.12</version>
      <scope>compile</scope>
    </dependency>
    <dependency>
      <groupId>junit</groupId>
      <artifactId>junit</artifactId>
      <version>4.12</version>
      <scope>compile</scope>
    </dependency>
    <dependency>
      <groupId>junit</groupId>
      <artifactId>junit</artifactId>
      <version>4.12</version>
      <scope>compile</scope>
    </dependency>
    <dependency>
      <groupId>junit</groupId>
      <artifactId>junit</artifactId>
      <version>4.12</version>
    </dependency>
    <dependency>
      <groupId>junit</groupId>
      <artifactId>junit</artifactId>
      <version>4.12</version>
      <scope>compile</scope>
    </dependency>

  </dependencies>

三、源代碼

package com.wuzheng.lucene;

import org.apache.commons.io.FileUtils;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;

import java.io.File;
import java.io.IOException;
import java.nio.file.Paths;
import java.util.Arrays;
import java.util.List;

public class Indexer {

    private IndexWriter indexWriter;

    private final String index_dir = "D:/index";

    private final String file_dir = "D:/file";

    /**
     * 創建索引
     * 1.創建一個IndexWriter對象
     */
    public void createIndex() throws Exception{
        //指定索引庫存放位置
        Directory directory = FSDirectory.open(Paths.get(index_dir));
        //創建分析器   默認標準分析器
        StandardAnalyzer analyzer = new StandardAnalyzer();
        //創建IndexWriterConfig 對象
        IndexWriterConfig indexWriterConfig=new IndexWriterConfig(analyzer);
        //創建IndexWriter對象
        indexWriter=new IndexWriter(directory,indexWriterConfig);

        //獲得原始文檔 可以是文件、數據庫表記錄、或者網頁信息等
        List<File> sourceFiles = getSourceFiles(file_dir);
        if(sourceFiles!=null&& sourceFiles.size()>0){
            for (int i = 0; i < sourceFiles.size(); i++) {
                File file =  sourceFiles.get(i);
                indexWriter.addDocument(fileToDocument(file));
            }
        }
        indexWriter.close();
    }

    private Document fileToDocument (File file) throws IOException {
        Document document = new Document();
        StringField stringField = new StringField("fileName", file.getName(), Field.Store.YES);
        TextField textField = new TextField("fileContent", FileUtils.readFileToString(file,"UTF-8"), Field.Store.NO);
        document.add(stringField);
        document.add(textField);
        return document;
    }

    private List<File> getSourceFiles(String file_dir) {
        File file = new File(file_dir);
        return Arrays.asList(file.listFiles());
    }

    public static void main(String[] args) {
        Indexer indexer = new Indexer();
        try {
            indexer.createIndex();
            System.out.println("create index success");
        } catch (Exception e) {
            e.printStackTrace();
        }
    }


}

四、測試結果

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章