項目數據量較大,如果從數據庫查詢,效率較低,所以用到了lucene。
針對項目的需求,寫了一個工具類。還要更多的修改。
日期排序,將日期轉換成long類型的。
Lucene版本3.6.2
IKAnalyzer2012_u6
package t.util;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Date;
import java.util.Iterator;
import java.util.List;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.TermVector;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.util.Version;
import org.wltea.analyzer.lucene.IKAnalyzer;
import t.Constants;
import t.model.News;
/**
* Lucene 索引工具類
*
* @auhtor: tangjing
* @date:2013-2-4
*/
public class LuceneUtil {
/**
* lucene 索引文件夾地址
*/
public static final String LUCENE_INDEX_DIR = "c://luceneTest";
/**
* 新聞ID 索引域名
*/
public static final String FIELDNAME_NEWS_ID = "id";
/**
* 新聞內容 索引域名
*/
public static final String FIELDNAME_NEWS_CONTENT = "content";
/**
* 新聞發佈時間 索引域名
*/
public static final String FIELDNAME_NEWS_DATE = "date";
/**
* 新聞來源 索引域名
*/
public static final String FIELDNAME_NEWS_SOURCE = "source";
/**
* 創建索引 單個對象
*
* @param news
* @auhtor: tangjing
* @date:2013-2-1
*/
public static void createIndexByNews(News news) {
try {
if (news != null) {
IndexWriter indexWriter = getIndexWriter();
indexWriter.addDocument(getDocumentByNews(news));
indexWriter.close();
}
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* 創建索引 news的集合
*
* @param news
* @auhtor: tangjing
* @date:2013-2-1
*/
public static void createIndexByNewsList(List<News> newsList) {
try {
if (newsList != null) {
IndexWriter indexWriter = getIndexWriter();
for (Iterator<News> iterator = newsList.iterator(); iterator
.hasNext();) {
News news = (News) iterator.next();
indexWriter.addDocument(getDocumentByNews(news));
}
indexWriter.close();
}
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* 默認排序
*
* @param keywords
* @param size
* @return
* @auhtor: tangjing
* @date:2013-2-1
*/
public static List<News> searchNewsIndex(String keywords, int size) {
return searchNewsIndex(keywords, size, new Sort());
}
/**
* 根據時間排序
*
* @param keywords
* @param size
* @return
* @auhtor: tangjing
* @date:2013-2-1
*/
public static List<News> searchNewsIndexOrderByDate(String keywords,
int size) {
Sort sort = new Sort(new SortField(FIELDNAME_NEWS_DATE, SortField.LONG,
true));
return searchNewsIndex(keywords, size, sort);
}
/**
*
* @param keywords
* 關鍵詞
* @param size
* 查詢的條數
* @param sore
* 查詢的排序方式 如果爲空,默認以相關性排序
* @return
* @auhtor: tangjing
* @date:2013-2-1
*/
private static List<News> searchNewsIndex(String keywords, int size,
Sort sort) {
// 搜索
List<News> list = null;
try {
Directory directory = FSDirectory.open(getIndexFile());
IndexReader indexReader = IndexReader.open(directory);
IndexSearcher searcher = new IndexSearcher(indexReader);
Analyzer analyzer = new IKAnalyzer();
QueryParser parser = new QueryParser(Version.LUCENE_36,
FIELDNAME_NEWS_CONTENT, analyzer);
// 設置詞條之間的關係是AND 這裏如果不設置,就是默認是OR
// parser.setDefaultOperator(QueryParser.AND_OPERATOR);
Query query = parser.parse(keywords);
TopDocs topDocs = searcher.search(query, size, sort);
list = new ArrayList<News>();
ScoreDoc[] docs = topDocs.scoreDocs;
for (ScoreDoc doc : docs) {
Document d = searcher.doc(doc.doc);
list.add(getNewsByDocument(d));
}
} catch (NumberFormatException e) {
e.printStackTrace();
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} catch (ParseException e) {
e.printStackTrace();
}
return list;
}
/**
* 根據新聞對象,返回lucene文檔對象
*
* @param news
* @return
* @auhtor: tangjing
* @date:2013-2-4
*/
private static Document getDocumentByNews(News news) {
Document document = new Document();
// ID不用建立索引
document.add(new Field(FIELDNAME_NEWS_ID, news.getId() + "",
Field.Store.YES, Field.Index.NO));
document.add(new Field(FIELDNAME_NEWS_CONTENT, news.getContent(),
Field.Store.YES, Field.Index.ANALYZED, TermVector.YES));
document.add(new Field(FIELDNAME_NEWS_DATE, news.getCreateDate()
.getTime() + "", Field.Store.YES, Field.Index.NOT_ANALYZED));
// 網站可以建立索引,不用分詞
document.add(new Field(FIELDNAME_NEWS_SOURCE, news.getNetsite(),
Field.Store.YES, Field.Index.NOT_ANALYZED));
return document;
}
/**
* 根據索引文檔,轉換爲news對象
*
* @param document
* @return
* @auhtor: tangjing
* @date:2013-2-4
*/
private static News getNewsByDocument(Document document) {
News news = new News();
news.setId(Integer.parseInt(document.get(FIELDNAME_NEWS_ID)));
news.setContent(document.get(FIELDNAME_NEWS_CONTENT));
news.setNetsite(document.get(FIELDNAME_NEWS_SOURCE));
Date date = new Date(Long.parseLong(document.get(FIELDNAME_NEWS_DATE)));
news.setCreateDate(date);
return news;
}
/**
* 獲得IndexWriter對象
*
* @return
* @auhtor: tangjing
* @date:2013-2-4
*/
private static IndexWriter getIndexWriter() {
IndexWriter indexWriter = null;
try {
// IK分詞器
Analyzer analyzer = new IKAnalyzer();
Directory directory = FSDirectory.open(getIndexFile());
IndexWriterConfig writerConfig = new IndexWriterConfig(
Version.LUCENE_36, analyzer);
indexWriter = new IndexWriter(directory, writerConfig);
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (LockObtainFailedException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return indexWriter;
}
/**
* 索引路徑
*
* @return
* @throws IOException
* @auhtor: tangjing
* @date:2013-2-4
*/
private static File getIndexFile() throws IOException {
File indexFile = new File(Constants.LUCENE_INDEX_DIR);
if (!indexFile.exists()) {
indexFile.createNewFile();
}
return indexFile;
}
}