緩存算法之FIFO、LRU、LFU的Python實現
早期計算機內存有限,程序的運行與加載依賴頁面置換算法, 其目的是將部分數據緩存在內存中, 在使用的時候就不必去磁盤上加載,這樣可以提高運行的效率,雖然現在計算機的內存控件已經得到了很大的提升, 但是這種緩存策略一直被沿用, 尤其對於需要訪問IO以及數據庫等數據, 將加載的數據緩存到內存中以便減少IO與數據的耗時操作。
FIFO緩存算法:
首先緩存兩個基本的特點, 一個是緩存空間有限, 另一個是數據過期,緩存空間有限就需要我們對有限的控件進行利用,數據的置換策略非常重要,基本的置換策略有FIFO、LRU、LFU,其中FIFO最爲簡單,其基本假設就是最近被加載進來的數據下次使用到的可能性大於之前被加載進來的數據,對於符合這種假設的場景較爲適用。其python代碼如下:
class FIFOCache(object):
"""
數據緩存類, 當前使用FIFO策略, 設置過期時間戳
"""
def __init__(self, cache_time, opacity):
"""
數據緩存類初始化
"""
# 數據緩存的時間長短, 秒爲單位
self.cache_time = cache_time
# 數據池的大小
self.opacity = opacity
# 當前數據的索引
self._index = 0
# 緩存的字典key的列表
self._key_list = [None for i in xrange(self.opacity)]
# 緩存數據的查詢字典
self._data_cache = {}
def push_data(self, key, data):
"""
將數據緩存
"""
if not isinstance(data, dict):
return
# 數據進來的時候的時間戳
data['_time_stamps'] = int(time.time())
# 如果key已經在字典中, 直接更新, 保持list中key不同
if key in self._data_cache:
self._data_cache[key] = data
return
# 索引循環
if self._index >= self.opacity:
self._index = 0
# 刪除老的數據
old_key = self._key_list[self._index]
self._data_cache.pop(old_key, 0)
# 加入新的數據
self._data_cache[key] = data
# 索引遞增
self._key_list[self._index] = key
self._index += 1
def get_data(self, key):
"""
取出緩存的數據
"""
# 檢查下出來時候時間戳
data = self._data_cache.get(key, {})
time_stamp = data.get('_time_stamps', 0)
if time.time() - time_stamp < self.cache_time:
return data
else:
return None
def destory(self):
"""
緩存的銷燬
"""
self._data_cache.clear()
self._key_list = []
LRU緩存算法:
LRU緩存算法採取的緩存置換策略是, 當緩存空間滿時新來的數據置換到未使用時間最長的那個,實現中採用雙端隊列, 將每次訪問到的數據放在隊列的最前端,從而保證隊列裏的數據是按使用時間有序的。Python代碼如下:
class ListNode(object):
"""
雙向鏈表節點類
"""
def __init__(self, data=None):
""" 雙向鏈表節點類的初始化 """
self.data = data
self.pre_node = None
self.next_node = None
def clear_pointer(self):
""" 清空指針避免不必要的鏈接錯誤 """
self.pre_node = None
self.next_node = None
def destory(self):
""" 節點的銷燬 """
self.clear_pointer()
self.data = None
class DoubleLinkList(object):
"""
雙向鏈表類
"""
def __init__(self):
""" 雙向鏈表節點類的初始化 """
self._head_node = ListNode()
self._tail_node = ListNode()
self._head_node.next_node = self._tail_node
self._tail_node.pre_node = self._head_node
self._size = 0
def get_head(self):
""" 獲取頭結點 """
if self._head_node.next_node is self._tail_node:
return None
return self._head_node.next_node
def get_tail(self):
""" 獲取尾節點 """
if self._tail_node.pre_node is self._head_node:
return None
return self._tail_node.pre_node
def show_link_list(self):
""" 顯示下鏈表 """
next_node = self._head_node.next_node
index = 1
while next_node:
temp_node = next_node.next_node
print '---->', index, next_node.data
index += 1
next_node = temp_node
def _insert_node(self, node, pre_node, next_node):
""" 將節點插入到兩個節點之間 """
node.clear_pointer()
pre_node.next_node = node
next_node.pre_node = node
node.pre_node = pre_node
node.next_node = next_node
self._size += 1
def _remove_node(self, node):
""" 將節點移除雙向鏈表 """
if node is self._head_node or node is self._tail_node:
return None
pre_node = node.pre_node
next_node = node.next_node
if pre_node:
pre_node.next_node = next_node
if next_node:
next_node.pre_node = pre_node
self._size -= 1
node.clear_pointer()
return node
def push_back(self, node):
""" 隊尾添加節點 """
if not isinstance(node, ListNode):
return
pre_node = self._tail_node.pre_node
next_node = self._tail_node
self._insert_node(node, pre_node, next_node)
def push_front(self, node):
""" 隊頭添加元素 """
if not isinstance(node, ListNode):
return
pre_node = self._head_node
next_node = self._head_node.next_node
self._insert_node(node, pre_node, next_node)
def pop_back(self):
""" 隊尾推出 """
return self._remove_node(self._tail_node.pre_node)
def pop_front(self):
""" 隊頭推出 """
return self._remove_node(self._head_node.next_node)
def remove_node(self, node):
""" 去除指定的鏈表節點 """
return self._remove_node(node)
def get_size(self):
""" 計算鏈表的長度 """
return self._size
def clear_node(self):
""" 清除所有的鏈表節點 """
next_node = self._head_node.next_node
while next_node:
temp_node = next_node.next_node
next_node.destory()
next_node = temp_node
self._head_node.next_node = self._tail_node
self._tail_node.pre_node = self._head_node
self._size = 0
def destory(self):
""" 鏈表的銷燬 """
self.clear_node()
self._head_node and self._head_node.destory()
self._tail_node and self._tail_node.destory()
class LRUCache(object):
"""
最近未使用的緩存策略類
"""
def __init__(self, cache_time, opacity):
"""
最近未使用策略緩存
"""
# 緩存類的參數設置
self._dity_time = cache_time
self._opacity = opacity
# 數據緩存類的內部變量
self._data_map = {} # {key: Node}
self._link_list = DoubleLinkList() # 雙向鏈表
def push_data(self, key, data):
"""
數據的存儲
"""
if not isinstance(data, dict):
return
# 數據進來的時候的時間戳
data['_key'] = key
data['_time_stamps'] = int(time.time())
# 數據已經在字典中直接更新
if key in self._data_map:
if self._data_map[key]:
self._data_map[key].data = data
return
else:
self._data_map.pop(key, 0)
# 將數據插入到雙端隊列頭部
node = ListNode(data)
if self._link_list.get_size() >= self._opacity:
removed_node = self._link_list.pop_back()
if removed_node and removed_node.data:
self._data_map.pop(removed_node.data.get('_key'), 0)
self._link_list.push_front(node)
self._data_map[key] = node
def get_data(self, key):
"""
數據的訪問
"""
# 檢出數據的時間戳
node = self._data_map.get(key)
if not node or not node.data:
return None
time_stamp = node.data.get('_time_stamps', 0)
if time.time() - time_stamp < self._dity_time:
# 將最近訪問的數據放在雙端隊列的隊頭
self._link_list.remove_node(node)
self._link_list.push_front(node)
return node.data
else:
return None
def destory(self):
"""
緩存的清空
"""
self._data_map.clear()
self._link_list.destory()
LFU緩存算法:
LFU緩存算法採取的緩存置換策略是, 當緩存空間滿時新來的數據置換使用頻率最低的那個緩存數據,實現中採用最小堆維護數據的訪問次數,每次插入都從堆的頂部刪除數據, 再將新的數據插入。Python代碼如下:
class HeapNode(object):
"""
數據堆的節點
"""
def __init__(self, data):
""" 數據堆節點初始化 """
self.index = 0
self.data = data
self.use_times = 1
def __str__(self):
return self.data
def __gt__(self, other):
""" 大於運算符 """
return self.use_times > other.use_times
def __lt__(self, other):
""" 小於運算符 """
return self.use_times < other.use_times
def destory(self):
""" 數據釋放 """
self.data = None
self.use_times = 1
class EasyHeap(object):
"""
簡單的堆結構, 不用內置的原因是內置的堆操作列表事件複雜度爲O(n)
"""
def __init__(self, opacity):
""" 數據結構的初始化 """
# 參數初始化
self._opacity = opacity
self._heap = [None for i in xrange(self._opacity)]
self._size = 0
def _get_parent(self, index):
""" 獲取父節點 """
if index <= 0:
return None
parent_index = (index - 1) / 2
return self._heap[parent_index]
def _get_left_child(self, index):
""" 獲取左孩子 """
if 2 * index + 1 < self._size:
return self._heap[2 * index + 1]
return None
def _get_right_child(self, index):
""" 獲取右節點 """
if 2 * index + 2 < self._size:
return self._heap[2 * index + 2]
return None
def _heapfix_up(self, node):
""" 節點上升 """
if not node:
return
index = node.index
# 節點上升
parent_node = self._get_parent(index)
if parent_node and node < parent_node:
self._exchange_node(parent_node, node)
self._heapfix_up(node)
def _heapfix_down(self, node):
""" 節點下降 """
if not node:
return
index = node.index
# 挑選父子節點中最小的節點
min_child = node
left_node = self._get_left_child(index)
min_child = left_node if left_node and left_node < min_child else min_child
right_node = self._get_right_child(index)
min_child = right_node if right_node and right_node < min_child else min_child
# 選擇最小的節點進行下降
if min_child is not node:
self._exchange_node(min_child, node)
self._heapfix_down(node)
def _exchange_node(self, node1, node2):
""" 交換兩個節點 """
node1.index, node2.index = node2.index, node1.index
self._heap[node1.index] = node1
self._heap[node2.index] = node2
def heapfix(self, node):
""" 調整節點位置 """
self._heapfix_up(node)
self._heapfix_down(node)
def get_head(self):
""" 獲取頭部節點 """
if self._size <= 0:
return None
return self._heap[0]
def heappop(self):
""" 推出頭部節點 """
if self._size <= 0:
return None
# 交換頭尾節點
head_node = self._heap[0]
tail_node = self._heap[self._size - 1]
self._exchange_node(head_node, tail_node)
self._size -= 1
# 節點調整
self.heapfix(self._heap[0])
return head_node
def heappush(self, node):
""" 將節點加到到堆中 """
node.index = self._size
self._heap[self._size] = node
self._size += 1
# 節點調整
self.heapfix(self._heap[self._size - 1])
def get_size(self):
""" 獲取堆的尺寸 """
return self._size
def show_heap(self):
""" 顯示整個堆 """
result = []
for node in self._heap:
node and result.append([node.data.get('_key'), node.use_times, node.index])
print '======', result
def destory(self):
""" 緩存的清空 """
for node in self._heap:
node and node.destory()
class LFUCache(object):
"""
最近最少使用的緩存策略類
"""
def __init__(self, cache_time, opacity):
"""
最近未使用策略緩存
"""
# 緩存類的參數設置
self._dity_time = cache_time
self._opacity = opacity
# 數據緩存類的內部變量
self._data_map = {} # {key: Node}
self._heap = EasyHeap(self._opacity) # 數據堆
def push_data(self, key, data):
"""
數據的存儲
"""
if not isinstance(data, dict):
return
# 數據進來的時候的時間戳
data['_key'] = key
data['_time_stamps'] = int(time.time())
# 數據已經在字典中直接更新
if key in self._data_map:
if self._data_map[key]:
self._data_map[key].data = data
return
else:
self._data_map.pop(key, 0)
# 將數據插入到最小堆中
node = HeapNode(data)
if self._heap.get_size() >= self._opacity:
removed_node = self._heap.heappop()
if removed_node and removed_node.data:
self._data_map.pop(removed_node.data.get('_key'), 0)
self._heap.heappush(node)
self._data_map[key] = node
def get_data(self, key):
"""
數據的訪問
"""
# 檢出數據的時間戳
node = self._data_map.get(key)
if not node or not node.data:
return None
time_stamp = node.data.get('_time_stamps', 0)
if time.time() - time_stamp < self._dity_time:
node.use_times += 1
self._heap.heapfix(node)
return node.data
else:
return None
def destory(self):
"""
緩存的清空
"""
self._data_map.clear()
self._heap.destory()
單元測試(需要簡單調整):
if "__main__" == __name__:
import random
case = 0
if case == 1:
cache_obj = FIFOCache(0.1, 20)
for i in xrange(10000):
key = random.randint(1, 30)
cache_obj.push_data(key, {'key': key})
print '++++', cache_obj._key_list
print '----', cache_obj._data_cache.keys()
print cache_obj.get_data(random.randint(1, 20))
cache_obj.destory()
if case == 2:
cache_obj = LRUCache(5, 20)
for i in xrange(10000):
if random.random() < 0.5:
key = random.randint(1, 100)
cache_obj.push_data(key, {'key': key})
else:
cache_obj.get_data(random.randint(1, 30))
cache_obj.destory()
if case == 3:
cache_obj = LFUCache(5, 10)
for i in xrange(10000):
if random.random() < 0.5:
key = random.randint(1, 30)
cache_obj.push_data(key, {'key': key})
else:
cache_obj.get_data(random.randint(1, 30))
cache_obj.destory()