java實現的Trie樹數據結構

最近在學習的時候，經常看到使用Trie樹數據結構來解決問題，比如“ 有一個1G大小的一個文件，裏面每一行是一個詞，詞的大小不超過16字節，內存限制大小是1M。返回頻數最高的100個詞。” 該如何解決？有一種方案就是使用Trie樹加排序實現。

什麼是Trie 樹呢？也就是常說的字典樹，網上對此講得也很多，簡單補充一下個人理解：它實際上相當於把單詞的公共部分給拎出來，這樣一層一層往上拎直到得到每個節點都是不可分的最小單元！

比如網上一個例子

一組單詞，inn, int, at, age, adv, ant, 我們可以得到下面的Trie：

這裏的節點上存的是一個單詞，實際上，每個節點走過的路徑就是該節點代表的單詞！其它不多扯了~~~

Trie樹有什麼好處呢

它是一種非常快的單詞查詢結構，當然，對於單詞去重統計也是非常好的選擇！比如搜索引擎的關鍵詞聯想功能很好的一種選擇就是使用Trie樹了！比如你輸入了in，通過上面的圖我們應該提示inn和int ，這樣可以輕鬆實現！另外，對於單詞出現的頻率統計，以及查找公共前綴等問題，都可以很好的解決！本文不是講理論，只是給出用java自己實現的Trie樹數據結構，其中實現了插入、查找、遍歷、單詞聯想（找公共前綴）等基本功能, 其它功能大家可以自己添加~~~~

以下是Trie樹類：

package com.algorithms;

import java.util.HashMap;
import java.util.Map;


public class Trie_Tree{
	 
	
	/**
	 * 內部節點類
	 * @author "zhshl"
	 * @date	2014-10-14
	 *
	 */
	private class Node{
		private int dumpli_num;////該字串的重複數目，  該屬性統計重複次數的時候有用,取值爲0、1、2、3、4、5……
		private int prefix_num;///以該字串爲前綴的字串數， 應該包括該字串本身！！！！！
		private Node childs[];////此處用數組實現，當然也可以map或list實現以節省空間
		private boolean isLeaf;///是否爲單詞節點
		public Node(){
			dumpli_num=0;
			prefix_num=0;
			isLeaf=false;
			childs=new Node[26];
		}
	}	
	
	
	private Node root;///樹根  
	public Trie_Tree(){
		///初始化trie 樹
		root=new Node();
	}
	
	
	
	/**
	 * 插入字串，用循環代替迭代實現
	 * @param words
	 */
	public void insert(String words){
		insert(this.root, words);
	}
	/**
	 * 插入字串，用循環代替迭代實現
	 * @param root
	 * @param words
	 */
	private void insert(Node root,String words){
		words=words.toLowerCase();////轉化爲小寫
		char[] chrs=words.toCharArray();
		
		for(int i=0,length=chrs.length; i<length; i++){
			///用相對於a字母的值作爲下標索引，也隱式地記錄了該字母的值
			int index=chrs[i]-'a';
			if(root.childs[index]!=null){
				////已經存在了，該子節點prefix_num++
				root.childs[index].prefix_num++;
			}else{
				///如果不存在
				root.childs[index]=new Node();
				root.childs[index].prefix_num++;				
			}	
			
			///如果到了字串結尾，則做標記
			if(i==length-1){
				root.childs[index].isLeaf=true;
				root.childs[index].dumpli_num++;
			}
			///root指向子節點，繼續處理
			root=root.childs[index];
		}
		
	}
	
	
	
	
	/**
	 * 遍歷Trie樹，查找所有的words以及出現次數
	 * @return HashMap<String, Integer> map
	 */
	public HashMap<String,Integer> getAllWords(){
//		HashMap<String, Integer> map=new HashMap<String, Integer>();
			
		return preTraversal(this.root, "");
	}
	
	/**
	 * 前序遍歷。。。
	 * @param root		子樹根節點
	 * @param prefixs	查詢到該節點前所遍歷過的前綴
	 * @return
	 */
	private  HashMap<String,Integer> preTraversal(Node root,String prefixs){
		HashMap<String, Integer> map=new HashMap<String, Integer>();
		
		if(root!=null){
			
			if(root.isLeaf==true){
			////當前即爲一個單詞
				map.put(prefixs, root.dumpli_num);
			}
			
			for(int i=0,length=root.childs.length; i<length;i++){
				if(root.childs[i]!=null){
					char ch=(char) (i+'a');
					////遞歸調用前序遍歷
					String tempStr=prefixs+ch;
					map.putAll(preTraversal(root.childs[i], tempStr));
				}
			}
		}		
		
		return map;
	}
	
	
	
	
	/**
	 * 判斷某字串是否在字典樹中
	 * @param word
	 * @return true if exists ,otherwise  false 
	 */
	public boolean isExist(String word){
		return search(this.root, word);
	}
	/**
	 * 查詢某字串是否在字典樹中
	 * @param word
	 * @return true if exists ,otherwise  false 
	 */
	private boolean search(Node root,String word){
		char[] chs=word.toLowerCase().toCharArray();
		for(int i=0,length=chs.length; i<length;i++){
			int index=chs[i]-'a';
			if(root.childs[index]==null){
				///如果不存在，則查找失敗
				return false;
			}			
			root=root.childs[index];			
		}
		
		return true;
	}
	
	/**
	 * 得到以某字串爲前綴的字串集，包括字串本身！ 類似單詞輸入法的聯想功能
	 * @param prefix 字串前綴
	 * @return 字串集以及出現次數，如果不存在則返回null
	 */
	public HashMap<String, Integer> getWordsForPrefix(String prefix){
		return getWordsForPrefix(this.root, prefix);
	}
	/**
	 * 得到以某字串爲前綴的字串集，包括字串本身！
	 * @param root
	 * @param prefix
	 * @return 字串集以及出現次數
	 */
	private HashMap<String, Integer> getWordsForPrefix(Node root,String prefix){
		HashMap<String, Integer> map=new HashMap<String, Integer>();
		char[] chrs=prefix.toLowerCase().toCharArray();
		////
		for(int i=0, length=chrs.length; i<length; i++){
			
			int index=chrs[i]-'a';
			if(root.childs[index]==null){
				return null;
			}
			
			root=root.childs[index];
		
		}
		///結果包括該前綴本身
		///此處利用之前的前序搜索方法進行搜索
		return preTraversal(root, prefix);
	}
	   
}

以下是測試類：

package com.algorithm.test;

import java.util.HashMap;

import com.algorithms.Trie_Tree;

public class Trie_Test {

	 public static void main(String args[])  //Just used for test
	    {
	    Trie_Tree trie = new Trie_Tree();
	    trie.insert("I");
	    trie.insert("Love");
	    trie.insert("China");
	    trie.insert("China");
	    trie.insert("China");
	    trie.insert("China");
	    trie.insert("China");
	    trie.insert("xiaoliang");
	    trie.insert("xiaoliang");
	    trie.insert("man");
	    trie.insert("handsome");
	    trie.insert("love");
	    trie.insert("chinaha");
	    trie.insert("her");
	    trie.insert("know");
	  
	    HashMap<String,Integer> map=trie.getAllWords();
	    
	    for(String key:map.keySet()){
	    	System.out.println(key+" 出現: "+ map.get(key)+"次");
	    }
	    
	    
	    map=trie.getWordsForPrefix("chin");
	    
	    System.out.println("\n\n包含chin（包括本身）前綴的單詞及出現次數：");
	    for(String key:map.keySet()){
	    	System.out.println(key+" 出現: "+ map.get(key)+"次");
	    }
	    
	    if(trie.isExist("xiaoming")==false){
	    	System.out.println("\n\n字典樹中不存在：xiaoming ");
	    }
	    
	    
	    }
}

運行結果：

love 出現: 2次
chinaha 出現: 1次
her 出現: 1次
handsome 出現: 1次
know 出現: 1次
man 出現: 1次
xiaoliang 出現: 2次
i 出現: 1次
china 出現: 5次

包含chin（包括本身）前綴的單詞及出現次數：
chinaha 出現: 1次
china 出現: 5次

字典樹中不存在：xiaoming

總結：在實現的時候，主要是想好如何設計每個節點的結構，這裏針對單詞總共26個，使用了一個字符數組來記錄，其實完全可以用list或其他的容器來實現，這樣也就可以容納更復雜的對象了！另外一個方面就是，一個節點的prefix_num屬性實際上是指到該節點經過的路徑（也就是字串）的重複數，而不是到該節點的重複數（因爲一個節點的child域並不是指某個單詞，這樣prefix_num對該節點本身沒意義）。最後，遍歷使用了前序遍歷的遞歸實現。相信對學過一點數據結構的不難。。。

java實現的Trie樹數據結構

Trie樹有什麼好處呢

如何使用 JS 判斷用戶是否處於活躍狀態

通過HPA+CronHPA組合應對業務複雜彈性伸縮場景

java最小堆實現優先權隊列和求最大的n個數問題

java volatile 和synchronize 辨析

java實現的Trie樹數據結構

android 自定義titlebar標題欄衝突問題

什麼？你還不知道EJB？

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結