本人主要是參考lucene實戰一書,不過中文版上總是發現一些錯誤,導致程序並沒有給出想要的結果,還是要看api文檔。
lucene3.X實現自定義排序,主要是實現繼承FieldComparatorSource抽象類的子類和繼承FieldComparator的子類。
1.繼承FieldComparatorSource,必須實現抽象方法newComparator。
2.繼承FieldComparator,必須實現下面6個抽象方法:
compare(int, int)
Compare a hit at 'slot a' with hit 'slot b'.setBottom(int)
This method is called byFieldValueHitQueue
to notify the FieldComparator of the current weakest ("bottom") slot. Note that this slot may not hold the weakest value according to your comparator, in cases where your comparator is not the primary one (ie, is only used to break ties from the comparators before it).compareBottom(int)
Compare a new hit (docID) against the "weakest" (bottom) entry in the queue.copy(int, int)
Installs a new hit into the priority queue. TheFieldValueHitQueue
calls this method when a new hit is competitive.setNextReader(org.apache.lucene.index.IndexReader, int)
Invoked when the search is switching to the next segment. You may need to update internal state of the comparator, for example retrieving new values from theFieldCache
.value(int)
Return the sort value stored in the specified slot. This is only called at the end of the search, in order to populateFieldDoc.fields
when returning the top results.
上面方法描述摘自api文檔,詳細請查閱api。
例子是書上的一個簡單例子,匹配結果根據用戶所在地址(二維)查找離他最近的餐廳順序排序。每個地點指定了三個域,即地名、二維座標x和y,以及該地點的類型。下面是具體實現代碼:
package org.apache.lucene.demo;
import java.io.IOException;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.FieldCache;
import org.apache.lucene.search.FieldComparator;
import org.apache.lucene.search.FieldComparatorSource;
import org.apache.lucene.search.SortField;
public class DistanceComparatorSource extends FieldComparatorSource{
private int x;
private int y;
public DistanceComparatorSource(int x,int y){
this.x = x;
this.y = y;
}
@Override
public FieldComparator<?> newComparator(String arg0, int arg1, int arg2,
boolean arg3) throws IOException {
// TODO Auto-generated method stub
return new DistanceSourceLookupComparator(arg0, arg1);
}
private class DistanceSourceLookupComparator extends FieldComparator{
private int[] xDoc,yDoc;
private float[] values;
private float bottom;
String fieldName;
public DistanceSourceLookupComparator(String fieldName , int numHits){
values = new float[numHits];
this.fieldName = fieldName;
}
@Override
public int compare(int arg0, int arg1) {
// TODO Auto-generated method stub
if(values[arg0] > values[arg1]) return 1;
if(values[arg0] < values[arg1]) return -1;
return 0;
}
private float getDistance(int doc){
int deltax = xDoc[doc] - x ;
int deltay = yDoc[doc] - y;
return (float)Math.sqrt(deltax*deltax+deltay*deltay);
}
@Override
public int compareBottom(int arg0) throws IOException {
// TODO Auto-generated method stub
float distance = getDistance(arg0);
if(bottom < distance) return -1;
if(bottom > distance) return 1;
return 0;
}
@Override
public void copy(int arg0, int arg1) throws IOException {
// TODO Auto-generated method stub
values[arg0] = getDistance(arg1);
}
@Override
public void setBottom(int arg0) {
// TODO Auto-generated method stub
bottom = values[arg0];
}
@Override
public void setNextReader(IndexReader arg0, int arg1) //在讀下一個段時,書上有誤,根據api的理解,如下實現得到正確結果
throws IOException {
// TODO Auto-generated method stub
String[] temp = FieldCache.DEFAULT.getStrings(arg0, "location");
xDoc = new int[temp.length];
yDoc = new int[temp.length];
for(int i = 0 ;i<temp.length;i++){
String[] str = temp[i].split(",");
xDoc[i] = Integer.parseInt(str[0]);
yDoc[i] = Integer.parseInt(str[1]);
}
}
@Override
public Object value(int arg0) {
// TODO Auto-generated method stub
return new Float(values[arg0]);
}
public int sortType(){
return SortField.CUSTOM;
}
public String toString(){
return "Distance from ("+x+","+y+")";
}
}
}
下面是具體的測試運行排序結果的程序:
package org.apache.lucene.demo;
import java.io.IOException;
import javax.crypto.SealedObject;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldSelectorResult;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.FieldDoc;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Searcher;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TopFieldDocs;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.store.RAMDirectory;
public class DistanceSortingTest {
/**
* @param args
* @throws IOException
* @throws LockObtainFailedException
* @throws CorruptIndexException
*/
public static void main(String[] args) throws CorruptIndexException, LockObtainFailedException, IOException {
// TODO Auto-generated method stub
RAMDirectory directory = new RAMDirectory();
IndexWriter indexWriter = new IndexWriter(directory, new WhitespaceAnalyzer(),
IndexWriter.MaxFieldLength.UNLIMITED);
addPoint(indexWriter, "El charro", "restaurant", 1, 2);
addPoint(indexWriter, "Cafe Poca Cosa", "restaurant", 5, 9);
addPoint(indexWriter, "Los Betos", "restaurant", 9, 6);
addPoint(indexWriter, "Nico's Toco Shop", "restaurant", 3, 8);
indexWriter.close();
Searcher searcher = new IndexSearcher(directory);
Query query = new TermQuery(new Term("type","restaurant"));
Sort sort = new Sort(new SortField("location",new DistanceComparatorSource(10, 10)));
TopFieldDocs topDocs = searcher.search(query, null, 5,sort);
ScoreDoc[] docs = topDocs.scoreDocs;
//FieldDoc fieldDoc = (FieldDoc)topDocs.scoreDocs[0];
//System.out.println(fieldDoc.fields[0]);
for(ScoreDoc doc : docs){
FieldDoc fieldDoc2 = (FieldDoc)doc;
Document document = searcher.doc(doc.doc);
System.out.println(document.get("name"));
}
System.out.println(Math.sqrt(17));
}
private static void addPoint(IndexWriter writer,String name,String type,int x,int y) throws CorruptIndexException, IOException{
Document document = new Document();
document.add(new Field("name",name,Field.Store.YES,Field.Index.NOT_ANALYZED));
document.add(new Field("type",type,Field.Store.YES,Field.Index.NOT_ANALYZED));
document.add(new Field("location",x+","+y,Field.Store.YES,Field.Index.NOT_ANALYZED));
writer.addDocument(document);
}
}
運行結果:
4.1231055
Los Betos
5.0990195
Cafe Poca Cosa
7.28011
Nico's Toco Shop
12.0415945
El charro