把Document映射爲Object類
public interface Doc2ObjectMapper {
/**
* 將多個Document映射成一個對象
* @param documents
* @return
*/
Object mapDocumentsToObject(List<Document> documents);
/**
* 將單個Document映射成一個對象
* @param document
* @return
*/
Object mapDocumentToObject(Document document);
}
普通檢索
public class SearchHelper {
private Analyzer analyzer;
private String indexDirUrl;
private Directory directory;
private IndexReader reader;
private IndexSearcher indexSearcher;
public SearchHelper(String indexDirUrl,Analyzer analyzer){
this.indexDirUrl=indexDirUrl;
this.analyzer=analyzer;
try {
init();
} catch (IOException e) {
e.printStackTrace();
}
}
public SearchHelper(String indexDirUrl){
this(indexDirUrl, new SmartChineseAnalyzer());
}
private void init() throws IOException {
directory=FSDirectory.open(Paths.get(indexDirUrl));
reader= DirectoryReader.open(directory);
indexSearcher=new IndexSearcher(reader);
}
/**
* 查詢並且返回經過映射後的對象List
* @param query
* @param offset
* @param topN
* @return
* @throws IOException
*/
public List<Object> search(Query query,int offset,int topN,Sort sort,Doc2ObjectMapper doc2ObjectMapper) throws IOException {
TopDocs topDocs=null;
ScoreDoc after=null;
if(offset>0){
TopDocs docsBefore=indexSearcher.search(query,offset,sort);
ScoreDoc[] scoreDocs=docsBefore.scoreDocs;
if(scoreDocs.length>0)
after=scoreDocs[scoreDocs.length-1];
}
topDocs=indexSearcher.searchAfter(after,query,topN,sort);
return creatObjectList(topDocs.scoreDocs,doc2ObjectMapper);
}
/**
* 沒有Sort的search
* @param query
* @param offset
* @param topN
* @return
* @throws IOException
*/
public List<Object> search(Query query,int offset,int topN,Doc2ObjectMapper doc2ObjectMapper) throws IOException {
TopDocs topDocs=null;
ScoreDoc after=null;
if(offset>0){
TopDocs docsBefore=indexSearcher.search(query,offset);
ScoreDoc[] scoreDocs=docsBefore.scoreDocs;
if(scoreDocs.length>0)
after=scoreDocs[scoreDocs.length-1];
}
topDocs=indexSearcher.searchAfter(after,query,topN);
return creatObjectList(topDocs.scoreDocs,doc2ObjectMapper);
}
/**
* 獲取查詢到的總數量
* @param query
* @return
* @throws IOException
*/
public int getSum(Query query) throws IOException {
return indexSearcher.search(query,1).totalHits;
}
private List<Object> creatObjectList(ScoreDoc[] scoreDocs,Doc2ObjectMapper doc2ObjectMapper) throws IOException {
List<Object> result=new LinkedList<Object>();
for(ScoreDoc scoreDoc:scoreDocs){
result.add(doc2ObjectMapper.mapDocumentToObject(indexSearcher.doc(scoreDoc.doc)));
}
return result;
}
}
基於Group by的檢索
/**
* 使用Group by進行搜索
* Created by yuan on 1/8/17.
*/
public class GroupSearcherHelper {
private Analyzer analyzer;
private String indexDirUrl;
private Directory directory;
private IndexReader reader;
private IndexSearcher indexSearcher;
private double maxCacheRAMMB;
private boolean isCacheScores=true;
private boolean ifFillFields=true;
public static final double DEFAULT_MAX_CACHE_RAM_MB=4.0;
public GroupSearcherHelper(String indexDirUrl,Analyzer analyzer,double maxCacheRAMMB){
this.indexDirUrl=indexDirUrl;
this.analyzer=analyzer;
this.maxCacheRAMMB=maxCacheRAMMB;
try {
init();
} catch (IOException e) {
e.printStackTrace();
}
}
public GroupSearcherHelper(String indexDirUrl,Analyzer analyzer){
this(indexDirUrl,analyzer,DEFAULT_MAX_CACHE_RAM_MB);
}
public GroupSearcherHelper(String indexDirUrl){
this(indexDirUrl,new SmartChineseAnalyzer());
}
private void init() throws IOException {
directory= FSDirectory.open(Paths.get(indexDirUrl));
reader= DirectoryReader.open(directory);
indexSearcher=new IndexSearcher(reader);
}
/**
* 搜索返回文檔分組
* @param query
* @param groupFieldName
* @param groupSort
* @param withinGroupSort
* @param groupOffset
* @param topNGroups
* @return
* @throws IOException
*/
public List<List<Document>> searchDocument(Query query, String groupFieldName, Sort groupSort, Sort withinGroupSort, int groupOffset, int topNGroups) throws IOException {
List<List<Document>> result=new LinkedList<List<Document>>();
TopGroups<BytesRef> topGroupsResult=searchHelp(query,groupFieldName,groupSort,withinGroupSort,groupOffset,topNGroups);
if(topGroupsResult==null)
return result;
GroupDocs<BytesRef>[] groupDocses=topGroupsResult.groups;
for(GroupDocs<BytesRef> groupDocs:groupDocses){
List<Document> subList=new LinkedList<Document>();
for(ScoreDoc scoreDoc:groupDocs.scoreDocs){
Document document=indexSearcher.doc(scoreDoc.doc);
subList.add(document);
}
result.add(subList);
}
return result;
}
/**
* 使用默認Sort的searchDocument
* @param query
* @param groupFieldName
* @param groupOffset
* @param topNGroups
* @return
* @throws IOException
*/
public List<List<Document>> searchDocument(Query query, String groupFieldName, int groupOffset, int topNGroups) throws IOException {
return searchDocument(query,groupFieldName,Sort.INDEXORDER,Sort.INDEXORDER,groupOffset,topNGroups);
}
/**
* 分組搜索並且將每一組Document映射成一個對象並且返回所有對象組成的List
* @param query
* @param groupFieldName
* @param groupSort
* @param withinGroupSort
* @param groupOffset
* @param topNGroups
* @param mapper
* @return
* @throws IOException
*/
public List<Object> search(Query query, String groupFieldName, Sort groupSort, Sort withinGroupSort, int groupOffset, int topNGroups, Doc2ObjectMapper mapper) throws IOException {
List<Object> result=new LinkedList<Object>();
List<List<Document>> documentsList=searchDocument(query,groupFieldName,groupSort,withinGroupSort,groupOffset,topNGroups);
if(documentsList.size()==0)
return result;
Object o=null;
for(List<Document> documents:documentsList){
o=mapper.mapDocumentsToObject(documents);
result.add(o);
}
return result;
}
/**
* 使用默認Sort的search
* @param query
* @param groupFieldName
* @param groupOffset
* @param topNGroups
* @param mapper
* @return
* @throws IOException
*/
public List<Object> search(Query query, String groupFieldName, int groupOffset, int topNGroups, Doc2ObjectMapper mapper) throws IOException {
return search(query,groupFieldName,Sort.INDEXORDER,Sort.INDEXORDER,groupOffset,topNGroups,mapper);
}
TopGroups<BytesRef> searchHelp(Query query, String groupFieldName, Sort groupSort, Sort withinGroupSort, int groupOffset, int topNGroups) throws IOException {
TermFirstPassGroupingCollector c1=new TermFirstPassGroupingCollector(groupFieldName,groupSort,groupOffset+topNGroups);
/**
* 將TermFirstPassGroupingCollector包裝成CachingCollector,爲第一次查詢加緩存,避免重複評分
* CachingCollector就是用來爲結果收集器添加緩存功能的
*/
CachingCollector cachingCollector=CachingCollector.create(c1,isCacheScores,maxCacheRAMMB);
indexSearcher.search(query,cachingCollector);
/**第一次查詢返回的結果集TopGroups中只有分組域值以及每組總的評分,至於每個分組裏有幾條,分別哪些索引文檔,則需要進行第二次查詢獲取*/
Collection<SearchGroup<BytesRef>> topGroups=c1.getTopGroups(groupOffset,ifFillFields);
if(topGroups==null){
return null;
}
Collector secondPassCollector=null;
boolean ifGetScores=true;
boolean ifGetMaxScores=true;
int maxDocsPerGroup=10;
TermSecondPassGroupingCollector c2=new TermSecondPassGroupingCollector(groupFieldName,topGroups,
groupSort,withinGroupSort,
maxDocsPerGroup,ifGetScores,ifGetMaxScores,ifFillFields);
secondPassCollector=c2;
/**如果第一次查詢已經加了緩存,則直接從緩存中取*/
if(cachingCollector.isCached()){
cachingCollector.replay(secondPassCollector);
}else{
indexSearcher.search(query,secondPassCollector);
}
TopGroups<BytesRef> topGroupsResult=c2.getTopGroups(0);
return topGroupsResult;
}
/**
* 查詢符合條件的分組總數量
* @param query
* @param groupFieldName
* @return
* @throws Exception
*/
public int getGroupSum(Query query,String groupFieldName) throws Exception{
TermFirstPassGroupingCollector c1=new TermFirstPassGroupingCollector(groupFieldName,Sort.INDEXORDER,1);
TermAllGroupsCollector termAllGroupsCollector=new TermAllGroupsCollector(groupFieldName);
Collector collector= MultiCollector.wrap(c1,termAllGroupsCollector);
indexSearcher.search(query,collector);
return termAllGroupsCollector.getGroupCount();
}