轉載自:http://blog.csdn.net/duck_genuine/article/details/6962624#t13
配置
solr 對一個搜索請求的的流程
在solrconfig.xml會配置一個handler。配置了前置處理組件preParams,還有後置處理組件filterResult,當然還有默認的組件
-
<requestHandler name="standard" class="solr.SearchHandler" default="true">
-
-
<arr name="first-components">
-
<str>preParams</str>
-
</arr>
-
<lst name="defaults">
-
<str name="echoParams">explicit</str>
-
<int name="rows">10</int>
-
<int name="start">0</int>
-
<str name="q">*:*</str>
-
</lst>
-
-
<arr name="last-components">
-
<str>filterResult</str>
-
</arr>
-
-
</requestHandler>
http請求控制器
當一個查詢請求過來的時候,先到類SolrDispatchFilter,由這個分發器尋找對應的handler來處理。
-
String qt = solrReq.getParams().get( CommonParams.QT );
-
handler = core.getRequestHandler( qt );
---------------------------------------------------------------------------------------------------
-
this.execute( req, handler, solrReq, solrRsp );
-
HttpCacheHeaderUtil.checkHttpCachingVeto(solrRsp, resp, reqMethod);
-----------------------------------------------------------------------------------------------
從上面的代碼裏看出是由solrCore留下的接口來處理請求。從代碼框架上,從此刻開始進入solr的核心代碼。
-
protected void execute( HttpServletRequest req, SolrRequestHandler handler, SolrQueryRequest sreq, SolrQueryResponse rsp) {
-
sreq.getContext().put( "webapp", req.getContextPath() );
-
sreq.getCore().execute( handler, sreq, rsp );
-
}
看一下solrCore代碼execute的方法 的主要代碼
-
public void execute(SolrRequestHandler handler, SolrQueryRequest req, SolrQueryResponse rsp) {
-
。。。。。
-
handler.handleRequest(req,rsp);
-
setResponseHeaderValues(handler,req,rsp);
-
。。。。。。。
-
}
主要實現對請求的處理,並將請求結果的狀態信息寫到響應的頭部
SolrRequestHandler 處理器
再看一下對請求的處理。。先看定義該請求處理器的接口,可以更好理解。只有兩個方法,一個是初始化信息,主要是配置時的默認參數,另一個就是處理請求的接口。
-
public interface SolrRequestHandler extends SolrInfoMBean {
-
public void init(NamedList args);
-
public void handleRequest(SolrQueryRequest req, SolrQueryResponse rsp);
-
}
先看一下實現該接口的類RequestHandlerBase
-
public void handleRequest(SolrQueryRequest req, SolrQueryResponse rsp) {
-
numRequests++;
-
try {
-
SolrPluginUtils.setDefaults(req,defaults,appends,invariants);
-
rsp.setHttpCaching(httpCaching);
-
handleRequestBody( req, rsp );
-
-
NamedList header = rsp.getResponseHeader();
-
if(header != null) {
-
Object partialResults = header.get("partialResults");
-
boolean timedOut = partialResults == null ? false : (Boolean)partialResults;
-
if( timedOut ) {
-
numTimeouts++;
-
rsp.setHttpCaching(false);
-
}
-
}
-
} catch (Exception e) {
-
SolrException.log(SolrCore.log,e);
-
if (e instanceof ParseException) {
-
e = new SolrException(SolrException.ErrorCode.BAD_REQUEST, e);
-
}
-
rsp.setException(e);
-
numErrors++;
-
}
-
totalTime += rsp.getEndTime() - req.getStartTime();
-
}
主要記錄該請求處理的狀態與處理時間記錄。真正的實現方法交由各個子類 handleRequestBody( req, rsp );
現在看一下SearchHandler對於搜索處理的實現方法
首先是將solrconfig.xml上配置的各個處理組件按一定順序組裝起來,先是first-Component,默認的component,last-component.這些處理組件會按照它們的順序來執行,以下是searchHandler的實現主體。方法handleRequestBody
-
@Override
-
public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throws Exception, ParseException, InstantiationException, IllegalAccessException
-
{
-
-
-
ResponseBuilder rb = new ResponseBuilder();
-
rb.req = req;
-
rb.rsp = rsp;
-
rb.components = components;
-
rb.setDebug(req.getParams().getBool(CommonParams.DEBUG_QUERY, false));
-
-
final RTimer timer = rb.isDebug() ? new RTimer() : null;
-
-
if (timer == null) {
-
-
for( SearchComponent c : components ) {
-
c.prepare(rb);
-
}
-
} else {
-
-
RTimer subt = timer.sub( "prepare" );
-
for( SearchComponent c : components ) {
-
rb.setTimer( subt.sub( c.getName() ) );
-
c.prepare(rb);
-
rb.getTimer().stop();
-
}
-
subt.stop()<span style="color:#FF0000;">;</span>
-
}
-
-
if (rb.shards == null) {
-
-
-
-
-
if(!rb.isDebug()) {
-
-
for( SearchComponent c : components ) {
-
c.process(rb);
-
}
-
}
-
else {
-
-
RTimer subt = timer.sub( "process" );
-
for( SearchComponent c : components ) {
-
rb.setTimer( subt.sub( c.getName() ) );
-
c.process(rb);
-
rb.getTimer().stop();
-
}
-
subt.stop();
-
timer.stop();
-
-
-
if( rb.getDebugInfo() == null ) {
-
rb.setDebugInfo( new SimpleOrderedMap<Object>() );
-
}
-
rb.getDebugInfo().add( "timing", timer.asNamedList() );
-
}
-
-
} else {
-
-
-
HttpCommComponent comm = new HttpCommComponent();
-
-
if (rb.outgoing == null) {
-
rb.outgoing = new LinkedList<ShardRequest>();
-
}
-
rb.finished = new ArrayList<ShardRequest>();
-
-
-
int nextStage = 0;
-
do {
-
rb.stage = nextStage;
-
nextStage = ResponseBuilder.STAGE_DONE;
-
-
-
for( SearchComponent c : components ) {
-
-
nextStage = Math.min(nextStage, c.distributedProcess(rb));
-
}
-
-
-
-
while (rb.outgoing.size() > 0) {
-
-
-
while (rb.outgoing.size() > 0) {
-
ShardRequest sreq = rb.outgoing.remove(0);
-
sreq.actualShards = sreq.shards;
-
if (sreq.actualShards==ShardRequest.ALL_SHARDS) {
-
sreq.actualShards = rb.shards;
-
}
-
sreq.responses = new ArrayList<ShardResponse>();
-
-
-
for (String shard : sreq.actualShards) {
-
ModifiableSolrParams params = new ModifiableSolrParams(sreq.params);
-
params.remove(ShardParams.SHARDS);
-
params.remove("indent");
-
params.remove(CommonParams.HEADER_ECHO_PARAMS);
-
params.set(ShardParams.IS_SHARD, true);
-
String shardHandler = req.getParams().get(ShardParams.SHARDS_QT);
-
if (shardHandler == null) {
-
params.remove(CommonParams.QT);
-
} else {
-
params.set(CommonParams.QT, shardHandler);
-
}
-
-
comm.submit(sreq, shard, params);
-
}
-
}
-
-
-
-
-
-
while (rb.outgoing.size() == 0) {
-
ShardResponse srsp = comm.takeCompletedOrError();
-
if (srsp == null) break;
-
-
-
-
if (srsp.getException() != null) {
-
comm.cancelAll();
-
if (srsp.getException() instanceof SolrException) {
-
throw (SolrException)srsp.getException();
-
} else {
-
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, srsp.getException());
-
}
-
}
-
-
rb.finished.add(srsp.getShardRequest());
-
-
-
for(SearchComponent c : components) {
-
c.handleResponses(rb, srsp.getShardRequest());
-
}
-
}
-
}
-
-
-
for(SearchComponent c : components) {
-
c.finishStage(rb);
-
}
-
-
-
} while (nextStage != Integer.MAX_VALUE);
-
}
-
}
首先運行的是各個組件的方法prepare
-
for( SearchComponent c : components ) {
-
c.prepare(rb);
-
}
再則如果不是分佈式搜索,則比較簡單的運行
-
for( SearchComponent c : components ) {
-
c.process(rb);
-
}
就結束!
如果是分佈式搜索,過程會比較複雜些,對於每個組件處理都會返回一個狀態,對於以下幾個方法循環執行,直到狀態結束 。
在類ResponseBuilder定義了幾個狀態。
-
public static int STAGE_START = 0;
-
public static int STAGE_PARSE_QUERY = 1000;
-
public static int STAGE_EXECUTE_QUERY = 2000;
-
public static int STAGE_GET_FIELDS = 3000;
-
public static int STAGE_DONE = Integer.MAX_VALUE;
從STAGE_START---->STAGE_PARSE_QUERY------>STAGE_EXECUTE_QUERY--------------->STAGE_GET_FIELDS------------>STAGE_DONE
從這些狀態名稱可以猜得出整個對應的過程。
每個組件先調用方法distributeProcess,並返回下一個狀態
-
for( SearchComponent c : components ) {
-
-
nextStage = Math.min(nextStage, c.distributedProcess(rb));
-
}
而方法handleResponse主要處理返回來的數據
-
for(SearchComponent c : components) {
-
c.handleResponses(rb, srsp.getShardRequest());
-
}
然後交由finishStage方法來對每一個狀態的過程作結束動作。
------------------------------
-
for(SearchComponent c : components) {
-
c.finishStage(rb);
-
}
-----------------------------
瞭解這個流程有助於擴展solr。比如有個業務是要我對搜索的自然結果排序進行干預,而這個干預只針對前幾頁結果,所以我不得不做個組件來對其中結果進行處理。
所以我想可以添加一個組件放在最後-------------》
1)如果是分佈式搜索:
這個組件可以在重寫finsihStage做處理。算是對最終結果的排序處理即可。
2)如果只是單機:
這個組件可以在重寫process做處理
組件
現在看一下其中一個主要的組件QueryComponent
prepare
對於QueryComponent主要解析用戶傳送的語法解析參數defType,以及過濾查詢fq,返回字段集fl.排序字段Sort
單機處理
process
分佈式搜索過程中的某一步,這裏應該是主機要合併文檔,取出對應的文檔的過程,
主機發出指定的solr主鍵ids來取文檔集,首先取出對應的lucene的內部id集。如果某些文檔已不在則棄掉。
-
String ids = params.get(ShardParams.IDS);
-
if (ids != null) {
-
SchemaField idField = req.getSchema().getUniqueKeyField();
-
List<String> idArr = StrUtils.splitSmart(ids, ",", true);
-
int[] luceneIds = new int[idArr.size()];
-
int docs = 0;
-
for (int i=0; i<idArr.size(); i++) {
-
-
int id = req.getSearcher().getFirstMatch(
-
new Term(idField.getName(), idField.getType().toInternal(idArr.get(i))));
-
if (id >= 0)
-
luceneIds[docs++] = id;
-
}
-
-
DocListAndSet res = new DocListAndSet();
-
-
-
-
res.docList = new DocSlice(0, docs, luceneIds, null, docs, 0);
-
-
if (rb.isNeedDocSet()) {
-
List<Query> queries = new ArrayList<Query>();
-
queries.add(rb.getQuery());
-
List<Query> filters = rb.getFilters();
-
if (filters != null)
-
queries.addAll(filters);
-
res.docSet = searcher.getDocSet(queries);
-
}
-
rb.setResults(res);
-
rsp.add("response",rb.getResults().docList);
-
return;
-
}
-
-
SolrIndexSearcher.QueryCommand cmd = rb.getQueryCommand();
-
-
cmd.setTimeAllowed(timeAllowed);
-
SolrIndexSearcher.QueryResult result = new SolrIndexSearcher.QueryResult();
-
-
searcher.search(result,cmd);
-
-
rb.setResult( result );
-
rsp.add("response",rb.getResults().docList);
-
rsp.getToLog().add("hits", rb.getResults().docList.matches());
-
-
doFieldSortValues(rb, searcher);
-
-
doPrefetch(rb);
目前看到真實獲取文檔內容的是在
QueryResponseWriter
例如xml的輸出格式類XMLWriter
分佈式處理
1)distributedProcess
-
@Override
-
public int distributedProcess(ResponseBuilder rb) throws IOException {
-
if (rb.stage < ResponseBuilder.STAGE_PARSE_QUERY)
-
return ResponseBuilder.STAGE_PARSE_QUERY;
-
if (rb.stage == ResponseBuilder.STAGE_PARSE_QUERY) {
-
createDistributedIdf(rb);
-
return ResponseBuilder.STAGE_EXECUTE_QUERY;
-
}
-
if (rb.stage < ResponseBuilder.STAGE_EXECUTE_QUERY) return ResponseBuilder.STAGE_EXECUTE_QUERY;
-
if (rb.stage == ResponseBuilder.STAGE_EXECUTE_QUERY) {
-
-
createMainQuery(rb);
-
return ResponseBuilder.STAGE_GET_FIELDS;
-
}
-
if (rb.stage < ResponseBuilder.STAGE_GET_FIELDS) return ResponseBuilder.STAGE_GET_FIELDS;
-
if (rb.stage == ResponseBuilder.STAGE_GET_FIELDS) {
-
-
-
createRetrieveDocs(rb);
-
return ResponseBuilder.STAGE_DONE;
-
}
-
return ResponseBuilder.STAGE_DONE;
-
}
2) handleResponses
-
public void handleResponses(ResponseBuilder rb, ShardRequest sreq) {
-
-
if ((sreq.purpose & ShardRequest.PURPOSE_GET_TOP_IDS) != 0) {
-
-
-
-
mergeIds(rb, sreq);
-
-
-
-
mergeGroupCounts(rb, sreq);
-
-
}
-
-
if ((sreq.purpose & ShardRequest.PURPOSE_GET_FIELDS) != 0) {
-
-
-
-
returnFields(rb, sreq);
-
-
return;
-
-
}
-
-
}
3) finishStage
-
@Override
-
public void finishStage(ResponseBuilder rb) {
-
-
if (rb.stage == ResponseBuilder.STAGE_GET_FIELDS) {
-
-
for (Iterator<SolrDocument> iter = rb._responseDocs.iterator(); iter.hasNext();) {
-
if (iter.next() == null) {
-
iter.remove();
-
rb._responseDocs.setNumFound(rb._responseDocs.getNumFound()-1);
-
}
-
}
-
-
rb.rsp.add("response", rb._responseDocs);
-
}
-
}
同樣最後的結果是保存在
ResponseBuilder
ResponseBuilder
NamedList values = new SimpleOrderedMap();
這個字段裏,以鍵爲"response",單機存儲的是lucene 的內部id列表
如果是分佈式,則存儲的是SolrDocumentList,不用再去索引拿出對應的存儲字段,
這個在QueryResponseWriter裏有對應的處理