合併詞典和倒排表的源代碼和註釋
void SegmentMerger::mergeTermInfos(SegmentMergeQueue* queue)
{
//base is the id of the first document in a segment
int32_t base = 0;
IndexReader* reader = NULL;
SegmentMergeInfo* smi = NULL;
//0.準備工作,初始化每個seg的枚舉器,使之指向seg的第一個term,構建SegmentMergeInfo信息(這時,修改了doc Id編號),壓入隊列.
for (uint32_t i = 0; i < readers.size(); i++) {
reader = readers[i];
TermEnum* termEnum = reader->terms();
smi = _CLNEW SegmentMergeInfo(base, termEnum, reader);
base += reader->numDocs();
if (smi->next()){
queue->put(smi);
}else{
smi->close();
_CLDELETE(smi);
}
}
SegmentMergeInfo** match = _CL_NEWARRAY(SegmentMergeInfo*,readers.size()+1);
SegmentMergeInfo* top = NULL;
//As long as there are SegmentMergeInfo instances stored in the queue
while (queue->size() > 0) {
int32_t matchSize = 0;
//1.從優先級隊列中彈出第一個Term放到match數組中。
match[matchSize++] = queue->pop();
Term* term = match[0]->term;
top = queue->top();
//2.尋找含有相同Term的其他seg也放到match數組中
while (top != NULL && term->equals(top->term) ){
match[matchSize++] = queue->pop();
top = queue->top();
}
match[matchSize]=NULL;
//3.合併這些相同Term的倒排表,並把新的倒排表加入新生成的seg中。
mergeTermInfo(match); //matchSize 處理這個Term
//4.將match數組中還有Term的seg重新放入優先級隊列中.
while (matchSize > 0){
smi = match[--matchSize];
if (smi->next()){
queue->put(smi);
}else{
_CLDELETE( smi );
}
}
//5.繼續檢測隊列,重複上述過程,直到隊列元素爲空。
}
_CLDELETE_ARRAY(match);
}
void SegmentMerger::mergeTermInfo( SegmentMergeInfo** smis)
{
....
}