Lucene 初試

項目數據量較大,如果從數據庫查詢,效率較低,所以用到了lucene。


針對項目的需求,寫了一個工具類。還要更多的修改。


日期排序,將日期轉換成long類型的。


Lucene版本3.6.2

IKAnalyzer2012_u6

package t.util;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Date;
import java.util.Iterator;
import java.util.List;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.TermVector;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.util.Version;
import org.wltea.analyzer.lucene.IKAnalyzer;

import t.Constants;
import t.model.News;

/**
 * Lucene 索引工具類
 * 
 * @auhtor: tangjing
 * @date:2013-2-4
 */
public class LuceneUtil {

	/**
	 * lucene 索引文件夾地址
	 */
	public static final String LUCENE_INDEX_DIR = "c://luceneTest";

	/**
	 * 新聞ID 索引域名
	 */
	public static final String FIELDNAME_NEWS_ID = "id";
	/**
	 * 新聞內容 索引域名
	 */
	public static final String FIELDNAME_NEWS_CONTENT = "content";
	/**
	 * 新聞發佈時間 索引域名
	 */
	public static final String FIELDNAME_NEWS_DATE = "date";
	/**
	 * 新聞來源 索引域名
	 */
	public static final String FIELDNAME_NEWS_SOURCE = "source";

	/**
	 * 創建索引 單個對象
	 * 
	 * @param news
	 * @auhtor: tangjing
	 * @date:2013-2-1
	 */
	public static void createIndexByNews(News news) {
		try {
			if (news != null) {
				IndexWriter indexWriter = getIndexWriter();
				indexWriter.addDocument(getDocumentByNews(news));
				indexWriter.close();
			}
		} catch (CorruptIndexException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		}
	}

	/**
	 * 創建索引 news的集合
	 * 
	 * @param news
	 * @auhtor: tangjing
	 * @date:2013-2-1
	 */
	public static void createIndexByNewsList(List<News> newsList) {
		try {
			if (newsList != null) {
				IndexWriter indexWriter = getIndexWriter();
				for (Iterator<News> iterator = newsList.iterator(); iterator
						.hasNext();) {
					News news = (News) iterator.next();
					indexWriter.addDocument(getDocumentByNews(news));
				}
				indexWriter.close();
			}
		} catch (CorruptIndexException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		}
	}

	/**
	 * 默認排序
	 * 
	 * @param keywords
	 * @param size
	 * @return
	 * @auhtor: tangjing
	 * @date:2013-2-1
	 */
	public static List<News> searchNewsIndex(String keywords, int size) {
		return searchNewsIndex(keywords, size, new Sort());
	}

	/**
	 * 根據時間排序
	 * 
	 * @param keywords
	 * @param size
	 * @return
	 * @auhtor: tangjing
	 * @date:2013-2-1
	 */
	public static List<News> searchNewsIndexOrderByDate(String keywords,
			int size) {
		Sort sort = new Sort(new SortField(FIELDNAME_NEWS_DATE, SortField.LONG,
				true));
		return searchNewsIndex(keywords, size, sort);
	}

	/**
	 * 
	 * @param keywords
	 *            關鍵詞
	 * @param size
	 *            查詢的條數
	 * @param sore
	 *            查詢的排序方式 如果爲空,默認以相關性排序
	 * @return
	 * @auhtor: tangjing
	 * @date:2013-2-1
	 */
	private static List<News> searchNewsIndex(String keywords, int size,
			Sort sort) {
		// 搜索
		List<News> list = null;
		try {
			Directory directory = FSDirectory.open(getIndexFile());
			IndexReader indexReader = IndexReader.open(directory);
			IndexSearcher searcher = new IndexSearcher(indexReader);
			Analyzer analyzer = new IKAnalyzer();
			QueryParser parser = new QueryParser(Version.LUCENE_36,
					FIELDNAME_NEWS_CONTENT, analyzer);
			// 設置詞條之間的關係是AND 這裏如果不設置,就是默認是OR
			// parser.setDefaultOperator(QueryParser.AND_OPERATOR);
			Query query = parser.parse(keywords);
			TopDocs topDocs = searcher.search(query, size, sort);
			list = new ArrayList<News>();
			ScoreDoc[] docs = topDocs.scoreDocs;
			for (ScoreDoc doc : docs) {
				Document d = searcher.doc(doc.doc);
				list.add(getNewsByDocument(d));
			}
		} catch (NumberFormatException e) {
			e.printStackTrace();
		} catch (CorruptIndexException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		} catch (ParseException e) {
			e.printStackTrace();
		}
		return list;
	}

	/**
	 * 根據新聞對象,返回lucene文檔對象
	 * 
	 * @param news
	 * @return
	 * @auhtor: tangjing
	 * @date:2013-2-4
	 */
	private static Document getDocumentByNews(News news) {
		Document document = new Document();
		// ID不用建立索引
		document.add(new Field(FIELDNAME_NEWS_ID, news.getId() + "",
				Field.Store.YES, Field.Index.NO));
		document.add(new Field(FIELDNAME_NEWS_CONTENT, news.getContent(),
				Field.Store.YES, Field.Index.ANALYZED, TermVector.YES));
		document.add(new Field(FIELDNAME_NEWS_DATE, news.getCreateDate()
				.getTime() + "", Field.Store.YES, Field.Index.NOT_ANALYZED));
		// 網站可以建立索引,不用分詞
		document.add(new Field(FIELDNAME_NEWS_SOURCE, news.getNetsite(),
				Field.Store.YES, Field.Index.NOT_ANALYZED));
		return document;
	}

	/**
	 * 根據索引文檔,轉換爲news對象
	 * 
	 * @param document
	 * @return
	 * @auhtor: tangjing
	 * @date:2013-2-4
	 */
	private static News getNewsByDocument(Document document) {
		News news = new News();
		news.setId(Integer.parseInt(document.get(FIELDNAME_NEWS_ID)));
		news.setContent(document.get(FIELDNAME_NEWS_CONTENT));
		news.setNetsite(document.get(FIELDNAME_NEWS_SOURCE));
		Date date = new Date(Long.parseLong(document.get(FIELDNAME_NEWS_DATE)));
		news.setCreateDate(date);
		return news;
	}

	/**
	 * 獲得IndexWriter對象
	 * 
	 * @return
	 * @auhtor: tangjing
	 * @date:2013-2-4
	 */
	private static IndexWriter getIndexWriter() {
		IndexWriter indexWriter = null;
		try {
			// IK分詞器
			Analyzer analyzer = new IKAnalyzer();
			Directory directory = FSDirectory.open(getIndexFile());
			IndexWriterConfig writerConfig = new IndexWriterConfig(
					Version.LUCENE_36, analyzer);
			indexWriter = new IndexWriter(directory, writerConfig);
		} catch (CorruptIndexException e) {
			e.printStackTrace();
		} catch (LockObtainFailedException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		}
		return indexWriter;
	}

	/**
	 * 索引路徑
	 * 
	 * @return
	 * @throws IOException
	 * @auhtor: tangjing
	 * @date:2013-2-4
	 */
	private static File getIndexFile() throws IOException {
		File indexFile = new File(Constants.LUCENE_INDEX_DIR);
		if (!indexFile.exists()) {
			indexFile.createNewFile();
		}
		return indexFile;
	}

}




發佈了39 篇原創文章 · 獲贊 1 · 訪問量 3萬+
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章