在Google原生輸入法LatinIME詞庫構建流程分析(一) 中分析LatinIME構建流程進行到了dict_trie->dict_list_->init_list這一步,然後就是構建N-gram信息了,N-gram構建過程在Google原生輸入法LatinIME詞庫構建流程分析(三)--N-gram信息構建中進行了分析,那麼接下來繼續:
bool DictBuilder::build_dict(const char *fn_raw,
const char *fn_validhzs,
DictTrie *dict_trie) {
...
// Construct the NGram information
NGram& ngram = NGram::get_instance();
ngram.build_unigram(lemma_arr_, lemma_num_,
lemma_arr_[lemma_num_ - 1].idx_by_hz + 1);
// sort the lemma items according to the spelling idx string
myqsort(lemma_arr_, lemma_num_, sizeof(LemmaEntry), compare_py);
get_top_lemmas();
#ifdef ___DO_STATISTICS___
stat_init();
#endif
lma_nds_used_num_le0_ = 1; // The root node
bool dt_success = construct_subset(static_cast<void*>(lma_nodes_le0_),
lemma_arr_, 0, lemma_num_, 0);
if (!dt_success) {
free_resource();
return false;
}
...
}