Lucence自定義過濾器查詢

package org.adv.lucene.util;


import java.io.IOException;
import java.text.SimpleDateFormat;


import org.apache.lucene.document.Document;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;


/**
 * 自定義過濾器查詢
 * @author 
 *
 */
public class CustomFilter {


/**
* 使用自定義的Filter來查詢
*/
public void searchByCustomFilter() {
try {
IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(FileIndexUtils.getDirectory()));
Query q = new TermQuery(new Term("content","java"));
TopDocs tds = null;
//自定義的Filter:MyIDFilter

tds = searcher.search(q, new MyIDFilter(new FilterAccessor() {
@Override
public String[] values() {
return new String[]{"impala","catalogd","impalad","statestored"};
}
@Override
public boolean set() {
return false;
}
@Override
public String getField() {
return "filename";
}
}),1000);
//tds = searcher.search(q,1000);
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd hh:mm:ss");
for(ScoreDoc sd:tds.scoreDocs) {
Document d = searcher.doc(sd.doc);
System.out.println(sd.doc+":("+sd.score+")" +
"["+d.get("filename")+"["+d.get("path")+"]--->"+
d.get("size")+"------------>"+d.get("id"));
}
searcher.getIndexReader().close();
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}

}



package org.adv.lucene.test;


import org.adv.lucene.util.CustomFilter;
import org.junit.Test;


public class TestCustomFilter {


@Test
public void test01() {
CustomFilter cf = new CustomFilter();
cf.searchByCustomFilter();
}
}




package org.adv.lucene.util;


import java.io.IOException;


import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.Filter;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.OpenBitSet;


/**
 * 自定義過濾器
 * 根據ID進行過濾
 * @author 
 *
 */
public class MyIDFilter extends Filter {

private FilterAccessor accessor;

public MyIDFilter(FilterAccessor accessor) {
this.accessor = accessor;
}


@Override
public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
//創建一個bit,默認所有的元素都是0
AtomicReader reader=context.reader();
OpenBitSet obs = new OpenBitSet(reader.maxDoc());//FixedBitSet fbs=new FixedBitSet(reader.maxDoc());
if(accessor.set()) {
set(reader,obs);
} else {
clear(reader, obs);
}
return obs;
}

private void set(AtomicReader reader,OpenBitSet obs) {
try {
//獲取id所在的doc的位置,並且將其設置爲0
for(String delId:accessor.values()) {
//Term出現的頻率
//int count=reader.docFreq(new Term(accessor.getField(),delId));
DocsEnum  de=reader.termDocsEnum(new Term(accessor.getField(),delId));
//DocsAndPositionsEnum dpe= reader.termPositionsEnum(new Term(accessor.getField(),delId));
if(de!=null){
int docid=de.nextDoc();
if(docid!=-1) {
obs.set(docid);
}
}
}
} catch (IOException e) {
e.printStackTrace();
}
}

private void clear(AtomicReader reader,OpenBitSet obs) {
try {
//先把元素填滿
obs.set(0,reader.maxDoc());
//獲取id所在的doc的位置,並且將其刪除
for(String delId:accessor.values()) {
//Term出現的頻率
//int count=reader.docFreq(new Term(accessor.getField(),delId));
DocsEnum  de=reader.termDocsEnum(new Term(accessor.getField(),delId));
//DocsAndPositionsEnum dpe= reader.termPositionsEnum(new Term(accessor.getField(),delId));
if(de!=null){
int docid=de.nextDoc();
if(docid!=-1) {
//將這個位置的元素刪除
obs.clear(docid);
}
}
}
} catch (IOException e) {
e.printStackTrace();
}
}


}



package org.adv.lucene.util;


/**
 * 過濾器數據接口
 * @author 
 *
 */
public interface FilterAccessor {


public String[] values();

public String getField();

public boolean set();
}

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章