字典樹(Trie) : 又稱前綴樹,是一種樹形結構,是一種哈希樹的變種。典型應用是用於統計,排序和保存大量的字符串(但不僅限於字符串),所以經常被搜索引擎系統用於文本詞頻統計。它的優點是:利用字符串的公共前綴來減少查詢時間,最大限度地減少無謂的字符串比較,查詢效率比哈希樹高。
如果有n個條目,Trie查詢每個條目的時間複雜度和字典中一共有多少條目無關,時間複雜度爲O(w),w爲查詢單詞的長度!而大多數單詞的長度小於10。
使用java中的TreeMap實現Trie:
有增加與查詢,刪除功能
import java.util.TreeMap;
public class Trie {
private class Node{
public boolean isWord;
public TreeMap<Character,Node> next;
public Node(boolean isWord){
this.isWord = isWord;
next = new TreeMap<>();
}
public Node(){
this(false);
}
}
private Node root;
private int size;
public Trie(){
root = new Node();
size = 0;
}
//獲得Trie中存儲的單詞數量
public int getSize(){
return size;
}
//向Trie中添加一個新的單詞word
public void add(String word){
Node cur = root;
for (int i = 0; i < word.length(); i ++){
char c = word.charAt(i);
if (cur.next.get(c) == null){
cur.next.put(c,new Node());
}
cur = cur.next.get(c);
}
//判斷是否是已存在的單詞
if (!cur.isWord){
cur.isWord = true;
size ++;
}
}
//查詢單詞word是否在Trie中
public boolean contains(String word){
Node cur = root;
for (int i = 0; i < word.length(); i ++){
char c = word.charAt(i);
if (cur.next.get(c) == null)
return false;
cur = cur.next.get(c);
}
return cur.isWord;
}
//查詢是否在Trie中有單詞以prefix爲前綴
public boolean isPrefix(String prefix){
Node cur = root;
for (int i = 0; i < prefix.length(); i ++){
char c = prefix.charAt(i);
if (cur.next.get(c) == null)
return false;
cur = cur.next.get(c);
}
return true;
}
}
import java.util.TreeMap;
/**
* @author ymn
* @version 1.0
* @date 2020\5\22 0022 15:39
*/
public class Trie {
private class Node{
public boolean isWord;
public TreeMap<Character,Node> next;
public Node(boolean isWord){
this.isWord = isWord;
next = new TreeMap<>();
}
public Node(){
this(false);
}
}
private Node root;
private int size;
public Trie(){
root = new Node();
size = 0;
}
//獲得Trie中存儲的單詞數量
public int getSize(){
return size;
}
//向Trie中添加一個新的單詞word
public void add(String word){
Node cur = root;
for (int i = 0; i < word.length(); i ++){
char c = word.charAt(i);
if (cur.next.get(c) == null){
cur.next.put(c,new Node());
}
cur = cur.next.get(c);
}
//判斷是否是已存在的單詞
if (!cur.isWord){
cur.isWord = true;
size ++;
}
}
//查詢單詞word是否在Trie中
public boolean contains(String word){
Node cur = root;
for (int i = 0; i < word.length(); i ++){
char c = word.charAt(i);
if (cur.next.get(c) == null)
return false;
cur = cur.next.get(c);
}
return cur.isWord;
}
//查詢是否在Trie中有單詞以prefix爲前綴
public boolean isPrefix(String prefix){
Node cur = root;
for (int i = 0; i < prefix.length(); i ++){
char c = prefix.charAt(i);
if (cur.next.get(c) == null)
return false;
cur = cur.next.get(c);
}
return true;
}
// 刪除word, 返回是否刪除成功, 遞歸算法
public boolean remove(String word){
if(word.equals(""))
return false;
return remove(root, word, 0);
}
// 在以Node爲根的Trie中刪除單詞word[index...end),返回是否刪除成功, 遞歸算法
private boolean remove(Node node, String word, int index){
if(index == word.length()){
if(!node.isWord)
return false;
node.isWord = false;
size --;
return true;
}
char c = word.charAt(index);
if(!node.next.containsKey(c))
return false;
boolean ret = remove(node.next.get(c), word, index + 1);
Node nextNode = node.next.get(c);
if(!nextNode.isWord && nextNode.next.size() == 0)
node.next.remove(word.charAt(index));
return ret;
}
}
除了Trie外,還有壓縮字典樹(Compressed Trie),三分搜索樹(Ternary Search Tire)等。