Lucene3.0開始已經完全拋棄了2.x使用的Hits類,轉而使用TopDocs,TopFieldDocs。本例演示瞭如何使用這兩個類進行搜索。
package index; import java.io.File; import java.io.IOException; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.Field.Index; import org.apache.lucene.document.Field.Store; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriter.MaxFieldLength; import org.apache.lucene.queryParser.ParseException; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.Searcher; import org.apache.lucene.search.Sort; import org.apache.lucene.search.SortField; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.TopFieldDocs; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.store.LockObtainFailedException; import org.apache.lucene.util.Version; public class SearchTest { /** * 建索引<br> * 一共4個Document,每個文檔兩個Field:text,size。text存放內容,size用於排序 * * @throws CorruptIndexException * @throws LockObtainFailedException * @throws IOException */ private static void build() throws CorruptIndexException, LockObtainFailedException, IOException { IndexWriter writer = new IndexWriter(FSDirectory.open(new File("index")), new StandardAnalyzer(Version.LUCENE_30), true, MaxFieldLength.LIMITED); Document document = new Document(); document.add(new Field("text", "google", Store.YES, Index.ANALYZED)); document.add(new Field("size", "1", Store.YES, Index.NOT_ANALYZED_NO_NORMS)); writer.addDocument(document); document = new Document(); document.add(new Field("text", "google earth apache", Store.YES, Index.ANALYZED)); document.add(new Field("size", "2", Store.YES, Index.NOT_ANALYZED_NO_NORMS)); writer.addDocument(document); document = new Document(); document.add(new Field("text", "baidu earth", Store.YES, Index.ANALYZED)); document.add(new Field("size", "3", Store.YES, Index.NOT_ANALYZED_NO_NORMS)); writer.addDocument(document); document = new Document(); document.add(new Field("text", "baidu earth apache", Store.YES, Index.ANALYZED)); document.add(new Field("size", "4", Store.YES, Index.NOT_ANALYZED_NO_NORMS)); writer.addDocument(document); writer.optimize(); writer.close(); } /** * lucene3.0已經沒有返回Hits的方法,使用返回TopDocs的方法進行搜索 * * @param keyword * 要搜索的關鍵詞 * @throws CorruptIndexException * @throws IOException * @throws ParseException */ private static void searchWithTopDocs(String keyword) throws CorruptIndexException, IOException, ParseException { QueryParser parser = new QueryParser(Version.LUCENE_30, "text", new StandardAnalyzer(Version.LUCENE_30)); IndexSearcher searcher = new IndexSearcher(FSDirectory.open(new File("index"))); TopDocs topDocs = searcher.search(parser.parse(keyword), 10);// 取前10個搜索結果,如果沒有這麼多,就取實際大小 ScoreDoc[] scoreDocs = topDocs.scoreDocs;// 獲取ScoreDoc System.out.println("hits:" + topDocs.totalHits); for (ScoreDoc scoreDoc : scoreDocs) { int docNum = scoreDoc.doc;// 文檔編號 Document doc = searcher.doc(docNum); String text = doc.get("text"); String size = doc.get("size"); float score = scoreDoc.score;// 評分 System.out.println(text + " " + size + " " + score); } } /** * 對命中文檔進行排序的搜索,也不再返回Hits,而是返回TopFieldDocs * * @param keyword * 要搜索的關鍵詞 * @throws CorruptIndexException * @throws IOException * @throws ParseException */ private static void searchWithSort(String keyword) throws CorruptIndexException, IOException, ParseException { QueryParser parser = new QueryParser(Version.LUCENE_30, "text", new StandardAnalyzer(Version.LUCENE_30)); Searcher searcher = new IndexSearcher(FSDirectory.open(new File("index"))); Query query = parser.parse(keyword); SortField sortField = new SortField("size", SortField.INT, true);// 需要排序的字段 TopFieldDocs topFieldDocs = searcher.search(query, null, 10, new Sort(sortField));// 第二個參數是過濾器,此例中不需要 ScoreDoc[] socDocs = topFieldDocs.scoreDocs; System.out.println("hits:" + topFieldDocs.totalHits); for (ScoreDoc scoreDoc : socDocs) { int docNum = scoreDoc.doc; Document doc = searcher.doc(docNum); String text = doc.get("text"); String size = doc.get("size"); float score = scoreDoc.score;// 評分,這裏的評分不可用,分值都是NaN System.out.println(text + " " + size + " " + score); } } public static void main(String[] args) throws CorruptIndexException, LockObtainFailedException, IOException, ParseException { build(); String keyword = "google"; searchWithTopDocs(keyword); System.out.println("---------"); searchWithSort(keyword); } }
Lucene版本升級到3.0以後,原來的分詞方式(token=tokenStream.next())的方式已經被拋棄,轉而使用TermAttribute,本例使用SmartChineseAnalyzer演示如何分詞,此外,本例還演示瞭如何
Solr1.4在建索引的時候默認使用複合文件格式,在索引較大時無疑會增加索引創建的時間以及系統的IO。對於沒有使用master/slave架構的應用來說,這樣做鴨梨很大。 本文將闡述如何改變這一默認策略: 1、環境描述 Solr