在Google原生输入法LatinIME词库构建流程分析(一) 中分析LatinIME构建流程进行到了dict_trie->dict_list_->init_list这一步,然后就是构建N-gram信息了,N-gram构建过程在Google原生输入法LatinIME词库构建流程分析(三)--N-gram信息构建中进行了分析,那么接下来继续:
bool DictBuilder::build_dict(const char *fn_raw,
const char *fn_validhzs,
DictTrie *dict_trie) {
...
// Construct the NGram information
NGram& ngram = NGram::get_instance();
ngram.build_unigram(lemma_arr_, lemma_num_,
lemma_arr_[lemma_num_ - 1].idx_by_hz + 1);
// sort the lemma items according to the spelling idx string
myqsort(lemma_arr_, lemma_num_, sizeof(LemmaEntry), compare_py);
get_top_lemmas();
#ifdef ___DO_STATISTICS___
stat_init();
#endif
lma_nds_used_num_le0_ = 1; // The root node
bool dt_success = construct_subset(static_cast<void*>(lma_nodes_le0_),
lemma_arr_, 0, lemma_num_, 0);
if (!dt_success) {
free_resource();
return false;
}
...
}