javaee全文檢索lucenc入門教程(一)

第一步 

下載lucenc的jar包   此入門教程用的是lucene-3.5.0

第二步

建立普通的java項目

原理圖

索引

索引庫是一組文件的集合 

 Directory:索引庫的位置,可以在本地磁盤,也可以在內存中,

Document::索引庫的每一篇文檔就是一個Field的集合

public class Document{:

Set<Field>field

}

IndexWriter: 操作索引庫的增刪改

下面的方法

Index()這個是建立索引的過程;

search() 這個方法是搜索的過程;

package org.itcast.lucenc;

import java.io.File;
import java.io.FileReader;
import java.io.IOException;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Version;

public class HelloLucenc {

	public void Index() {
		// 1.創建Directory
		
		//創建內存索引庫  Directory directory = new RAMDirectory();

		// 2.創建IdexWriter

		IndexWriter indexWriter = null;
		try {
			Directory directory = FSDirectory
					.open(new File("d:/lucenc/index01"));
			IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_35,
					new StandardAnalyzer(Version.LUCENE_35));
			indexWriter = new IndexWriter(directory, iwc);
			// 3.創建Document對象
			Document doc = null;
			// 4.爲Document
			File f = new File("E:/lunces學習資料");
			for (File file : f.listFiles()) {
				doc = new Document();
				doc.add(new Field("content", new FileReader(file)));
				doc.add(new Field("filename", file.getName(), Field.Store.YES,
						Field.Index.NOT_ANALYZED));
				doc.add(new Field("path", file.getAbsolutePath(),
						Field.Store.YES, Field.Index.NOT_ANALYZED));
				indexWriter.addDocument(doc);

			}

		} catch (CorruptIndexException e) {
			e.printStackTrace();
		} catch (LockObtainFailedException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		} finally {
			try {
				indexWriter.close();
			} catch (CorruptIndexException e) {
				e.printStackTrace();
			} catch (IOException e) {
				e.printStackTrace();
			}
		}

		//		

	}

	public void serarch() {

		try {

			// 1. 創建Dirctory
			Directory directory = FSDirectory
					.open(new File("d:/lucenc/index01"));
			// 2.創建IndexReader
			IndexReader reader = IndexReader.open(directory);
			// 3.根據indexReader創建IndexSearcher
			IndexSearcher searcher = new IndexSearcher(reader);

			// 4.創建搜索的Query
			QueryParser parser = new QueryParser(Version.LUCENE_35, "content",
					new StandardAnalyzer(Version.LUCENE_35));
			Query query = parser.parse("Redis");

			// 5.根據seacher搜索並且返回TopDocs
			TopDocs tds = searcher.search(query, 10);
			// 6.根據TopDccs獲取ScoreDoc對象
			ScoreDoc[] sds = tds.scoreDocs;
			for (ScoreDoc sd : sds) {
				// 7.根據seacher和ScordDoc對象獲取具體的Documnet對象
				Document d = searcher.doc(sd.doc);
				// 8.根據Document對象獲取所需要的值
				System.out.println(d.get("filename"));
			}
			// 9.關閉Reader
			reader.close();
		} catch (Exception e) {
			// TODO: handle exception
		}

	}

}










發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章