Lucence自定義評分查詢

package org.adv.lucene.util;


import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.Date;


import org.apache.lucene.document.Document;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.queries.CustomScoreProvider;
import org.apache.lucene.queries.CustomScoreQuery;
import org.apache.lucene.queries.function.FunctionQuery;
import org.apache.lucene.queries.function.valuesource.IntFieldSource;
import org.apache.lucene.search.FieldCache;
import org.apache.lucene.search.FieldCache.Longs;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;


/**
 * 自定義評分
 * @author 
 * 自定義評分步驟:
 * 1.創建一個類繼承自CustomScoreQuery,並且重寫其中的方法getCustomScoreProvider
 * 2.創建一個類繼承自CustomScoreProvider,並且重寫其中的方法customScore
 *
 */
public class MyScoreQuery {

/**
* 自定義評分查詢
* 根據score域自定義評分
*/
public void searchByScoreQuery() {
try {
IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(FileIndexUtils.getDirectory()));
Query q = new TermQuery(new Term("content","java"));
IntFieldSource fs=new IntFieldSource("score");
FunctionQuery fq=new FunctionQuery(fs);
//創建一個基於原始查詢Query與score域的自定義評分Query
MyCustomScoreQuery query = new MyCustomScoreQuery(q,fq);
TopDocs tds = null;
tds = searcher.search(query, 1000);
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd hh:mm:ss");
for(ScoreDoc sd:tds.scoreDocs) {
Document d = searcher.doc(sd.doc);
System.out.println(sd.doc+":("+sd.score+")" +
"["+d.get("filename")+"["+d.get("path")+"]--->"+
d.get("size")+"-----"+sdf.format(new Date(Long.valueOf(d.get("date"))))+"]");
}
searcher.getIndexReader().close();
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}

/**
* 自定義評分查詢
* 根據filename域自定義評分
*/
public void searchByFileScoreQuery() {
try {
IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(FileIndexUtils.getDirectory()));
Query q = new TermQuery(new Term("content","java"));
//1、創建一個基於filename的自定義評分Query
FilenameScoreQuery query = new FilenameScoreQuery(q);
TopDocs tds = null;
tds = searcher.search(query, 1000);
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd hh:mm:ss");
for(ScoreDoc sd:tds.scoreDocs) {
Document d = searcher.doc(sd.doc);
System.out.println(sd.doc+":("+sd.score+")" +
"["+d.get("filename")+"["+d.get("path")+"]--->"+
d.get("size")+"-----"+sdf.format(new Date(Long.valueOf(d.get("date"))))+"]");
}
searcher.getIndexReader().close();
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}



/**
* 自定義評分查詢
* 根據date域自定義評分
*/
public void searchByDateScoreQuery() {
try {
IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(FileIndexUtils.getDirectory()));
Query q = new TermQuery(new Term("content","java"));
//1、創建一個基於日期的自定義評分Query
DateScoreQuery query = new DateScoreQuery(q);
TopDocs tds = null;
tds = searcher.search(query, 1000);
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd hh:mm:ss");
for(ScoreDoc sd:tds.scoreDocs) {
Document d = searcher.doc(sd.doc);
System.out.println(sd.doc+":("+sd.score+")" +
"["+d.get("filename")+"["+d.get("path")+"]--->"+
d.get("size")+"-----"+sdf.format(new Date(Long.valueOf(d.get("date"))))+"]");
}
searcher.getIndexReader().close();
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}

@SuppressWarnings("serial")
private class FilenameScoreQuery extends CustomScoreQuery {


public FilenameScoreQuery(Query subQuery) {
super(subQuery);
}

@Override
protected CustomScoreProvider getCustomScoreProvider(AtomicReaderContext context)
throws IOException {
return new FilenameScoreProvider(context);
}

}

private class FilenameScoreProvider extends CustomScoreProvider {
BinaryDocValues filenames = null;
public FilenameScoreProvider(AtomicReaderContext context) {
super(context);
try {
//從域緩存中獲取各個域的值,只要reader沒有關閉,域緩存就一直存在
filenames = FieldCache.DEFAULT.getTerms(context.reader(), "filename",false);
} catch (IOException e) {
e.printStackTrace();
}
}

@Override
public float customScore(int doc, float subQueryScore, float valSrcScore)
throws IOException {
System.out.println("subQueryScore="+subQueryScore);
System.out.println("valSrcScore="+valSrcScore);


//如何根據doc獲取相應的field的值
/*
* 在reader沒有關閉之前,所有的數據會存儲要一個域緩存中,可以通過域緩存獲取很多有用的信息
* filenames = FieldCache.DEFAULT.getTerms(reader, "filename",false);可以獲取所有的filename域的信息
*/
String filename = filenames.get(doc).utf8ToString();
System.out.println("filename="+filename);
if(filename.endsWith(".xml") || filename.endsWith(".she")) {
return subQueryScore*100000.0f;
}else if(filename.endsWith(".aa") || filename.endsWith(".ba")) {
return subQueryScore/1.5f;
}else{
   return subQueryScore*0.8f;
}
}
}


@SuppressWarnings("serial")
private class MyCustomScoreQuery extends CustomScoreQuery {

public MyCustomScoreQuery(Query subQuery, FunctionQuery scoringQuery) {
super(subQuery, scoringQuery);
}

@Override
protected CustomScoreProvider getCustomScoreProvider(AtomicReaderContext context)
throws IOException {
//默認情況實現的評分是通過原有的評分*傳入進來的評分域所獲取的評分來確定最終打分的
//爲了根據不同的需求進行評分,需要自己進行評分的設定
/**
* 自定評分的步驟:
* 1.創建一個類繼承於CustomScoreProvider
* 2.覆蓋customScore方法
*/
return new MyCustomScoreProvider(context);
}

}

private class MyCustomScoreProvider extends CustomScoreProvider {


public MyCustomScoreProvider(AtomicReaderContext context) {
super(context);
}

/**
* subQueryScore表示默認文檔的打分
* valSrcScore表示的評分域的打分
*/
@Override
public float customScore(int doc, float subQueryScore, float valSrcScore)
throws IOException {
System.out.println("subQueryScore="+subQueryScore);
System.out.println("valSrcScore="+valSrcScore);
return subQueryScore/valSrcScore;
}

}

private  class DateScoreQuery  extends  CustomScoreQuery{


public DateScoreQuery(Query subQuery) {
super(subQuery);
}


@Override
protected CustomScoreProvider getCustomScoreProvider(
AtomicReaderContext context) throws IOException {
return new DateScoreProvider(context);
}



}


private class DateScoreProvider extends CustomScoreProvider {
Longs dates = null;
public DateScoreProvider(AtomicReaderContext context) {
super(context);
try {
//從域緩存中獲取各個域的值,只要reader沒有關閉,域緩存就一直存在
dates = FieldCache.DEFAULT.getLongs(context.reader(), "date",false);
} catch (IOException e) {
e.printStackTrace();
}
}

@Override
public float customScore(int doc, float subQueryScore, float valSrcScore)
throws IOException {
System.out.println("subQueryScore="+subQueryScore);
System.out.println("valSrcScore="+valSrcScore);
long date = dates.get(doc);
long today = new Date().getTime();
long year = 1000*60*60*24*3;
if(today-date<=year) {
//爲其加分
return subQueryScore*1.5f;
}else{
return subQueryScore*0.5f;
}
}

}

}



package org.adv.lucene.test;


import org.adv.lucene.util.MyScoreQuery;
import org.junit.Test;


public class TestCustomScore {


@Test
public void test01() {
MyScoreQuery msq = new MyScoreQuery();
msq.searchByScoreQuery();
}

@Test
public void test02() {
MyScoreQuery msq = new MyScoreQuery();
msq.searchByFileScoreQuery();
}

@Test
public void test03() {
MyScoreQuery msq = new MyScoreQuery();
msq.searchByDateScoreQuery();
}
}

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章