Lucene檢索筆記

把Document映射爲Object類

public interface Doc2ObjectMapper {
    /**
     * 將多個Document映射成一個對象
     * @param documents
     * @return
     */
    Object mapDocumentsToObject(List<Document> documents);


    /**
     * 將單個Document映射成一個對象
     * @param document
     * @return
     */
    Object mapDocumentToObject(Document document);
}

普通檢索

public class SearchHelper {

    private Analyzer analyzer;
    private String indexDirUrl;
    private Directory directory;
    private IndexReader reader;
    private IndexSearcher indexSearcher;


    public SearchHelper(String indexDirUrl,Analyzer analyzer){
        this.indexDirUrl=indexDirUrl;
        this.analyzer=analyzer;
        try {
            init();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    public SearchHelper(String indexDirUrl){
        this(indexDirUrl, new SmartChineseAnalyzer());
    }


    private void init() throws IOException {
        directory=FSDirectory.open(Paths.get(indexDirUrl));
        reader= DirectoryReader.open(directory);
        indexSearcher=new IndexSearcher(reader);
    }

    /**
     * 查詢並且返回經過映射後的對象List
     * @param query
     * @param offset
     * @param topN
     * @return
     * @throws IOException
     */
    public List<Object> search(Query query,int offset,int topN,Sort sort,Doc2ObjectMapper doc2ObjectMapper) throws IOException {
        TopDocs topDocs=null;
        ScoreDoc after=null;
        if(offset>0){
            TopDocs docsBefore=indexSearcher.search(query,offset,sort);
            ScoreDoc[] scoreDocs=docsBefore.scoreDocs;
            if(scoreDocs.length>0)
                after=scoreDocs[scoreDocs.length-1];
        }
        topDocs=indexSearcher.searchAfter(after,query,topN,sort);
        return creatObjectList(topDocs.scoreDocs,doc2ObjectMapper);
    }

    /**
     * 沒有Sort的search
     * @param query
     * @param offset
     * @param topN
     * @return
     * @throws IOException
     */
    public List<Object> search(Query query,int offset,int topN,Doc2ObjectMapper doc2ObjectMapper) throws IOException {
        TopDocs topDocs=null;
        ScoreDoc after=null;
        if(offset>0){
            TopDocs docsBefore=indexSearcher.search(query,offset);
            ScoreDoc[] scoreDocs=docsBefore.scoreDocs;
            if(scoreDocs.length>0)
                after=scoreDocs[scoreDocs.length-1];
        }
        topDocs=indexSearcher.searchAfter(after,query,topN);

        return creatObjectList(topDocs.scoreDocs,doc2ObjectMapper);
    }



    /**
     * 獲取查詢到的總數量
     * @param query
     * @return
     * @throws IOException
     */
    public int getSum(Query query) throws IOException {
        return indexSearcher.search(query,1).totalHits;
    }

    private List<Object> creatObjectList(ScoreDoc[] scoreDocs,Doc2ObjectMapper doc2ObjectMapper) throws IOException {
        List<Object> result=new LinkedList<Object>();
        for(ScoreDoc scoreDoc:scoreDocs){
            result.add(doc2ObjectMapper.mapDocumentToObject(indexSearcher.doc(scoreDoc.doc)));
        }
        return result;
    }

}

基於Group by的檢索

/**
 * 使用Group by進行搜索
 * Created by yuan on 1/8/17.
 */
public class GroupSearcherHelper {

    private Analyzer analyzer;
    private String indexDirUrl;
    private Directory directory;
    private IndexReader reader;
    private IndexSearcher indexSearcher;
    private double maxCacheRAMMB;
    private boolean isCacheScores=true;
    private boolean ifFillFields=true;

    public static final double DEFAULT_MAX_CACHE_RAM_MB=4.0;

    public GroupSearcherHelper(String indexDirUrl,Analyzer analyzer,double maxCacheRAMMB){
        this.indexDirUrl=indexDirUrl;
        this.analyzer=analyzer;
        this.maxCacheRAMMB=maxCacheRAMMB;
        try {
            init();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
    public GroupSearcherHelper(String indexDirUrl,Analyzer analyzer){
        this(indexDirUrl,analyzer,DEFAULT_MAX_CACHE_RAM_MB);
    }

    public GroupSearcherHelper(String indexDirUrl){
        this(indexDirUrl,new SmartChineseAnalyzer());
    }



    private void init() throws IOException {
        directory= FSDirectory.open(Paths.get(indexDirUrl));
        reader= DirectoryReader.open(directory);
        indexSearcher=new IndexSearcher(reader);
    }


    /**
     * 搜索返回文檔分組
     * @param query
     * @param groupFieldName
     * @param groupSort
     * @param withinGroupSort
     * @param groupOffset
     * @param topNGroups
     * @return
     * @throws IOException
     */
    public List<List<Document>> searchDocument(Query query, String groupFieldName, Sort groupSort, Sort withinGroupSort, int groupOffset, int topNGroups) throws IOException {
        List<List<Document>> result=new LinkedList<List<Document>>();
        TopGroups<BytesRef> topGroupsResult=searchHelp(query,groupFieldName,groupSort,withinGroupSort,groupOffset,topNGroups);
        if(topGroupsResult==null)
            return result;
        GroupDocs<BytesRef>[] groupDocses=topGroupsResult.groups;
        for(GroupDocs<BytesRef> groupDocs:groupDocses){
            List<Document> subList=new LinkedList<Document>();
            for(ScoreDoc scoreDoc:groupDocs.scoreDocs){
                Document document=indexSearcher.doc(scoreDoc.doc);
                subList.add(document);
            }
            result.add(subList);
        }

        return result;
    }

    /**
     * 使用默認Sort的searchDocument
     * @param query
     * @param groupFieldName
     * @param groupOffset
     * @param topNGroups
     * @return
     * @throws IOException
     */
    public List<List<Document>> searchDocument(Query query, String groupFieldName, int groupOffset, int topNGroups) throws IOException {
        return searchDocument(query,groupFieldName,Sort.INDEXORDER,Sort.INDEXORDER,groupOffset,topNGroups);
    }

    /**
     * 分組搜索並且將每一組Document映射成一個對象並且返回所有對象組成的List
     * @param query
     * @param groupFieldName
     * @param groupSort
     * @param withinGroupSort
     * @param groupOffset
     * @param topNGroups
     * @param mapper
     * @return
     * @throws IOException
     */
    public List<Object> search(Query query, String groupFieldName, Sort groupSort, Sort withinGroupSort, int groupOffset, int topNGroups, Doc2ObjectMapper mapper) throws IOException {
        List<Object> result=new LinkedList<Object>();
        List<List<Document>> documentsList=searchDocument(query,groupFieldName,groupSort,withinGroupSort,groupOffset,topNGroups);
        if(documentsList.size()==0)
            return result;
        Object o=null;
        for(List<Document> documents:documentsList){
            o=mapper.mapDocumentsToObject(documents);
            result.add(o);
        }
        return result;
    }

    /**
     * 使用默認Sort的search
     * @param query
     * @param groupFieldName
     * @param groupOffset
     * @param topNGroups
     * @param mapper
     * @return
     * @throws IOException
     */
    public List<Object> search(Query query, String groupFieldName,  int groupOffset, int topNGroups, Doc2ObjectMapper mapper) throws IOException {
        return search(query,groupFieldName,Sort.INDEXORDER,Sort.INDEXORDER,groupOffset,topNGroups,mapper);
    }



    TopGroups<BytesRef> searchHelp(Query query, String groupFieldName, Sort groupSort, Sort withinGroupSort, int groupOffset, int topNGroups) throws IOException {
        TermFirstPassGroupingCollector c1=new TermFirstPassGroupingCollector(groupFieldName,groupSort,groupOffset+topNGroups);
        /**
         * 將TermFirstPassGroupingCollector包裝成CachingCollector,爲第一次查詢加緩存,避免重複評分
         *  CachingCollector就是用來爲結果收集器添加緩存功能的
         */
        CachingCollector cachingCollector=CachingCollector.create(c1,isCacheScores,maxCacheRAMMB);
        //開始第一次分組統計
        indexSearcher.search(query,cachingCollector);

        /**第一次查詢返回的結果集TopGroups中只有分組域值以及每組總的評分,至於每個分組裏有幾條,分別哪些索引文檔,則需要進行第二次查詢獲取*/
        Collection<SearchGroup<BytesRef>> topGroups=c1.getTopGroups(groupOffset,ifFillFields);
        if(topGroups==null){
            return null;
        }

        Collector secondPassCollector=null;
        // 是否獲取每個分組內部每個索引的評分
        boolean ifGetScores=true;
        // 是否計算最大評分
        boolean ifGetMaxScores=true;
        int maxDocsPerGroup=10;
        // 如果需要對Lucene的score進行修正,則需要重載TermSecondPassGroupingCollector
        TermSecondPassGroupingCollector c2=new TermSecondPassGroupingCollector(groupFieldName,topGroups,
                groupSort,withinGroupSort,
                maxDocsPerGroup,ifGetScores,ifGetMaxScores,ifFillFields);

        secondPassCollector=c2;

        /**如果第一次查詢已經加了緩存,則直接從緩存中取*/
        if(cachingCollector.isCached()){
            //第二次查詢直接從緩存中取
            cachingCollector.replay(secondPassCollector);
        }else{
            // 開始第二次分組查詢
            indexSearcher.search(query,secondPassCollector);
        }


        TopGroups<BytesRef> topGroupsResult=c2.getTopGroups(0);

        return topGroupsResult;
    }

    /**
     * 查詢符合條件的分組總數量
     * @param query
     * @param groupFieldName
     * @return
     * @throws Exception
     */
    public int getGroupSum(Query query,String groupFieldName) throws Exception{
        TermFirstPassGroupingCollector c1=new TermFirstPassGroupingCollector(groupFieldName,Sort.INDEXORDER,1);
        TermAllGroupsCollector termAllGroupsCollector=new TermAllGroupsCollector(groupFieldName);
        Collector collector=  MultiCollector.wrap(c1,termAllGroupsCollector);
        indexSearcher.search(query,collector);

        return termAllGroupsCollector.getGroupCount();
    }
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章