字典樹常用做高效的文本詞語保存,適用於敏感詞過濾、關鍵詞提取等場景。在字典樹中相同前綴的詞之間共享相同的樹節點和路徑。
字典樹結構一般包括如下功能和屬性:(1)構建;(2)添加;(3)刪除;(4)前綴統計;(5)搜索
- 實現一:通過字典的嵌套來實現
class Trie(object):
"""
實現1:通過python自帶的字典結構
具有如下基本功能:
(1)根據一組words進行TrieTree的構建
(2)添加某個word
(3)查詢某個word
(4)刪除某個word
"""
def __init__(self):
self.trie = {}
self.count = 0
def __repr__(self):
return str(self.trie)
def buildTree(self, wordList):
for word in wordList:
t = self.trie # 指向各節點的指針,初始化爲root節點
for w in word:
if w not in t:
t[w] = {'count': 0}
t[w]['count'] += 1
t = t[w]
self.count += 1
t['end'] = 1
def add(self, word):
t = self.trie
for w in word:
if w not in t:
t[w] = {'count': 0}
t[w]['count'] += 1
t = t[w]
self.count += 1
t['end'] = 1
def delete(self, word):
# 僅僅改變end和count屬性,字符串仍存在於存儲中
# 先確定是否存在,若存在沿着的每條路徑的count都需要-1
if not self.search(word):
return False
t = self.trie
for w in word:
t = t[w]
t['count'] -= 1
self.count -= 1
t['end'] = 0
def search(self, word):
t = self.trie
for w in word:
if w not in t:
return False
t = t[w]
if t.get('end') == 1:
return True
return False
def prefix_count(self, prefix):
t = self.trie
for w in prefix:
if w not in t:
return -1
t = t[w]
return t['count']
- 實現二:通過遞歸的節點類實現
class Trie(object):
"""
另一種實現
Trie可視作一種遞歸結構
"""
def __init__(self, depth=0):
self.children = {} # {key: Trie()}, key爲每個層級對應的字符
self.depth = depth # 在整個字典樹中的層級,root爲0,因此可以視爲word的長度
self.end = False # word終止標誌
self.count = 0 # 以當前節點前序子串爲前綴的word數量
self.words = []
def __repr__(self):
return str(self.words)
def insert(self, word: str) -> None:
cur = self
cur.count += 1
for w in word:
if w not in cur.children:
cur.children[w] = Trie(cur.depth+1)
cur = cur.children[w]
cur.count += 1
cur.words.append(word)
cur.end = True
def buildTree(self, words: list) -> None:
for word in words:
self.insert(word)
def prefix(self, pref):
cur = self
for p in pref:
if p not in cur.children:
return None
cur = cur.children[p]
return cur.words
def search(self, word):
cur = self
for w in word:
if w not in cur.children:
return False
cur = cur.children[w]
if cur.end:
return True
else:
return False
def remove(self, word):
if not self.search(word):
return False
cur = self
cur.count -= 1
for w in word:
cur = cur.children[w]
cur.count -= 1
cur.end = False