lucene入門03

入門程序

首先要導入5個包

IKAnalyzer3.2.0Stable.jar

lucene-analyzers-3.0.1.jar

lucene-core-3.0.1.jar

lucene-highlighter-3.0.1.jar

lucene-memory-3.0.1.jar

然後是domain

package cn.itcast._domain;

public class Article {
	private Integer id;
	private String title;
	private String content;

	@Override
	public String toString() {
		return "[id=" + id + ", title=" + title + ", content=" + content + "]";
	}

	public Integer getId() {
		return id;
	}

	public void setId(Integer id) {
		this.id = id;
	}

	public String getTitle() {
		return title;
	}

	public void setTitle(String title) {
		this.title = title;
	}

	public String getContent() {
		return content;
	}

	public void setContent(String content) {
		this.content = content;
	}

}

整個程序

package cn.itcast.a_helloworld;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriter.MaxFieldLength;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.NumericUtils;
import org.apache.lucene.util.Version;
import org.junit.Test;

import cn.itcast._domain.Article;

public class TestApp {

	private static Directory directory; // 索引庫目錄
	private static Analyzer analyzer; // 分詞器

	static {
		try {
//			open了一個索引庫
			directory = FSDirectory.open(new File("c:/indexDir")); // 索引庫目錄
			analyzer = new StandardAnalyzer(Version.LUCENE_30); // 分詞器
		} catch (IOException e) {
			throw new RuntimeException(e);
		}
	}

	// 建立索引
	@Test
	public void testCreateIndex() throws Exception {
		// 模擬一條已經保存到數據庫的數據
		Article article = new Article();
		article.setId(2);
		article.setTitle("Lucene的應用場景");
		article.setContent("我們使用Lucene，主要是做站內搜索，即對一個系統內的資源進行搜索。如BBS、BLOG中的文章搜索，網上商店中的商品搜索等。");

		// 存到索引庫中
		// 1，把Article轉爲Document
//		一定要使用Lucene的工具類做數字與字符串的轉換，因爲int只佔4個字節，而如果直接轉成String的話佔用更多空間
		String idStr = NumericUtils.intToPrefixCoded(article.getId()); 
		Document doc = new Document();
		doc.add(new Field("id", idStr, Store.YES, Index.NOT_ANALYZED));
		doc.add(new Field("title", article.getTitle(), Store.YES, Index.ANALYZED));
		doc.add(new Field("content", article.getContent(), Store.YES, Index.ANALYZED));

		// 2，把Document添加到索引庫中
		IndexWriter indexWriter = new IndexWriter(directory, analyzer, MaxFieldLength.LIMITED);
		indexWriter.addDocument(doc);
		indexWriter.close();
	}

	// 搜索
	@Test
	public void testSearch() throws Exception {
		// 查詢條件
		String queryString = "lucene";
		// String queryString = "傳智";

		// 進行查詢，得到結果
		// ================================================================================
		// 1，把查詢字符串轉爲Query對象
		QueryParser queryParser = new MultiFieldQueryParser(Version.LUCENE_30, new String[] { "title", "content" }, analyzer);
		Query query = queryParser.parse(queryString);

		// 2，執行查詢，得到中間結果
		IndexSearcher indexSearcher = new IndexSearcher(directory);
		TopDocs topDocs = indexSearcher.search(query, 100); // 查詢並最多返回前n條結果
		int count = topDocs.totalHits; // 總的結果數量
		ScoreDoc[] scoreDocs = topDocs.scoreDocs; // 前n條結果的信息

		// 3，處理結果
		List<Article> list = new ArrayList<Article>();
		for (int i = 0; i < scoreDocs.length; i++) {
			float socre = scoreDocs[i].score; // 相關度得分
			int docId = scoreDocs[i].doc; // Document的內部編號
			// 根據Doucment的內部編號得到Document數據
			Document doc = indexSearcher.doc(docId);
			// 把Document轉爲Article
			Article article = new Article();
			Integer id = NumericUtils.prefixCodedToInt(doc.get("id"));// 一定要使用Lucene的工具類做數字與字符串的轉換
			article.setId(id);
			article.setTitle(doc.get("title"));
			article.setContent(doc.get("content")); // 相當於doc.getField("content").stringValue()
			// 放到結果集合中
			list.add(article);
		}
		indexSearcher.close();

		// ================================================================================

		// 顯示結果
		System.out.println("總結果數：" + count);
		for (Article article : list) {
			System.out.println(article);
		}
	}

}


/*quesions:
 一、添加索引
 1.誰去添加，如何添加
 	IndexWriter去添加，indexWriter.add(doc);不能忘記indexWriter.close();
 2.IndexWriter是怎麼來的,需要包含那些信息
 	new IndexWriter(directory, analyzer, MaxFieldLength.LIMITED);
 	作爲一個IndexWriter，有幾點是必須明確的,首先是往哪裏寫，所以需要一個Directory,這個Directory必須包含的具體的文件位置：FSDirectory.open(new File("c:/indexDir"));
 3.添加的Document是怎麼來的？
 	作爲一個Document，必須包含了各個key和value,這些key和value統一與Field,doc.add(new Field("id", idStr, Store.YES, Index.NOT_ANALYZED));Field的第一個param是key，第二個是value
 
 二、搜索
 1.誰去搜索，如何搜索
 	TopDocs topDocs = indexSearcher.search(query, 100);
 2.indexSearcher是怎麼來的？
	 IndexSearcher indexSearcher = new IndexSearcher(directory);
	 作爲一個indexSearcher，必須要明確搜索哪裏，所以要有dierectory參數
 3.indexSearcher.search(query, 100)方法的query參數的含義
	查詢的相關條件，首先肯定要包含查詢哪個詞
	Query query = queryParser.parse(queryString);
	QueryParser queryParser = new MultiFieldQueryParser(Version.LUCENE_30, new String[] { "title", "content" }, analyzer);
	Query query = queryParser.parse(queryString);
 4.查詢的結果如何處理
 	返回的是TopDocs對象
 	可以得到符合條件的數量int count = topDocs.totalHits;
 	可以得到前n條記錄的ScoreDoc集合(n表示indexSearcher.search(query, 100)中的 100):
 	ScoreDoc[] scoreDocs = topDocs.scoreDocs;
 	ScoreDoc包含了各個記錄的得分和Document對象的id
 	float socre = scoreDocs[i].score;int docId = scoreDocs[i].doc;
	具體的doc還是要indexSearcher才能得到Document doc = indexSearcher.doc(docId);
	doc通過key拿value:doc.get("title")

三、比較
 1.indexWriter和indexSearcher
 	IndexWriter indexWriter = new IndexWriter(directory, analyzer, MaxFieldLength.LIMITED);
 	IndexSearcher indexSearcher = new IndexSearcher(directory);
 	indexSearcher只要知道索引庫在哪兒就行
 	
 	indexWriter.addDocument(doc);
 	TopDocs topDocs = indexSearcher.search(query, 100);
	添加的是Document，查詢出來的是TopDocs








 */

24-5-18 X

lucene入門01

lucene入門04

lucene入門02

lucene入門03

linux shell

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結