leveldb源码学习之LRU Cache

推荐结合 leveldb-handbook 阅读源码

Leveldb内部通过双向链表实现了一个标准版的LRUCache

include/leveldb/cache.h

// Cache 是将key映射为value的接口. 有内部同步机制可供多线程并发安全访问
// 可以自动淘汰元素以为新元素腾出空间. Values 在缓存中有一定的费用. 例如一个value是变长string的cache，可以使用string的长度作为其费用.
// 内置的 cache 实现使用LRU least-recently-used 作为淘汰策略。调用方也可以自定义更复杂的策略。 (如scan-resistance, variable cache sizing, etc.)

#ifndef STORAGE_LEVELDB_INCLUDE_CACHE_H_
#define STORAGE_LEVELDB_INCLUDE_CACHE_H_

#include <stdint.h>

#include "leveldb/export.h"
#include "leveldb/slice.h"

namespace leveldb {

class LEVELDB_EXPORT Cache;

// 创建固定大小的新cache. 这种cache实现使用 LRU 淘汰策略.
LEVELDB_EXPORT Cache* NewLRUCache(size_t capacity);

class LEVELDB_EXPORT Cache {
 public:
  Cache() = default;

  Cache(const Cache&) = delete;
  Cache& operator=(const Cache&) = delete;

  // 通过调用传入构造器的"deleter"函数销毁所有元素
  virtual ~Cache();

  // 对cache中元素进行处理
  struct Handle {};

  // 往cache 插入键值对并赋予特定的费用.
  // 返回对应键值对的一个句柄handle. 使用者在键值对不再使用时必须调用 this->Release(handle) .
  // 当插入的元素不再使用时， key 和 value 会送至"deleter".
  virtual Handle* Insert(const Slice& key, void* value, size_t charge,
                         void (*deleter)(const Slice& key, void* value)) = 0;

  // 若 cache 中没有 "key", 返回 nullptr.
  // Else return a handle that corresponds to the mapping.  The caller
  // must call this->Release(handle) when the returned mapping is no
  // longer needed.
  virtual Handle* Lookup(const Slice& key) = 0;

  // 释放一个通过Lookup() 返回的键值对的句柄.
  // REQUIRES: handle 必须还未被释放.
  // REQUIRES: handle 必须是 *this 返回的.
  virtual void Release(Handle* handle) = 0;

  // 返回一个成功调用 Lookup() 返回的句柄对应的value.
  // REQUIRES: handle 必须还未被释放.
  // REQUIRES: handle 必须是 *this 返回的.
  virtual void* Value(Handle* handle) = 0;

  // 若 cache 包含key则删除.  注意这里元素会一直保留直到所有指向它的句柄都被释放才删除.
  virtual void Erase(const Slice& key) = 0;

  // 返回新的 id.  多个共享cache 的客户端可能会使用id来划分key空间。
  // 一般客户端会在启动时分配新id并将其加到key前面 .
  virtual uint64_t NewId() = 0;

  // 删除cache中所有非使用中的元素。 有内存限制的应用可能会调用此方法来坚守内存占用.
  // Prune()的默认实现为空. 这里推荐子类覆盖默认实现. 以后的leveldb版本可能将Prune()改为纯虚函数.
  virtual void Prune() {}

  // 返回对所有元素的总费用的估计.
  virtual size_t TotalCharge() const = 0;

 private:
  void LRU_Remove(Handle* e);
  void LRU_Append(Handle* e);
  void Unref(Handle* e);

  struct Rep;
  Rep* rep_;
};

}  // namespace leveldb

#endif  // STORAGE_LEVELDB_INCLUDE_CACHE_H_

util / cache.cc

lrucache 通过元素句柄 LRUHandle 来表示元素

// 一个元素是分配在堆空间的变长结构. 按照访问时间保存在循环双向链表.
// LRUHandle是元素对应的句柄
struct LRUHandle {
  void* value; // 元素值
  void (*deleter)(const Slice&, void* value); // 元素销毁的方法
  LRUHandle* next_hash;
  LRUHandle* next;
  LRUHandle* prev;
  size_t charge;  // TODO(opt): Only allow uint32_t?
  size_t key_length;
  bool in_cache;     // 元素是否在 cache.
  uint32_t refs;     // 引用计数，包括 cache引用如果有的话.
  uint32_t hash;     // Hash of key(); 用户快速定位和比较
  char key_data[1];  // Beginning of key

  Slice key() const {
    // 只有当handle 代表空链表头时 next_ 等于this，head不保存实际的key.
    assert(next != this);

    return Slice(key_data, key_length);
  }
};

通过简单实现hashtable来快速定位cache中的key，经测试要比g++内部实现的hashtable更快

class HandleTable {
 public:
  HandleTable() : length_(0), elems_(0), list_(nullptr) { Resize(); }
  ~HandleTable() { delete[] list_; }

  LRUHandle* Lookup(const Slice& key, uint32_t hash) {
    return *FindPointer(key, hash);
  }

  LRUHandle* Insert(LRUHandle* h) {
    LRUHandle** ptr = FindPointer(h->key(), h->hash);
    LRUHandle* old = *ptr;
	// 若元素不存在则将元素放入链表末尾，否则替换原有的value
    h->next_hash = (old == nullptr ? nullptr : old->next_hash);
    *ptr = h;
    if (old == nullptr) {
      ++elems_;
      if (elems_ > length_) {
        // 总元素个数大于桶个数，因为每个元素较大，这里重新分配空间使得平均每个桶元素不超过1.
        Resize();
      }
    }
    return old;
  }

  LRUHandle* Remove(const Slice& key, uint32_t hash) {
    LRUHandle** ptr = FindPointer(key, hash);
    LRUHandle* result = *ptr;
    if (result != nullptr) {
      *ptr = result->next_hash;  // 元素前驱的后继指针指向next
      --elems_;
    }
    return result;
  }

 private:
  // table 由beckets 桶数组组成，每个桶是一个cache元素的链表，其中的元素根据hash值放入该桶.
  uint32_t length_;
  uint32_t elems_;
  LRUHandle** list_;

  // 返回key/hash值对应的元素的指针. 若没有这样的元素，返回该链表的末尾指针（链表是无序的）.
  LRUHandle** FindPointer(const Slice& key, uint32_t hash) {
    LRUHandle** ptr = &list_[hash & (length_ - 1)];
    while (*ptr != nullptr && ((*ptr)->hash != hash || key != (*ptr)->key())) {
      ptr = &(*ptr)->next_hash;
    }
    return ptr;
  }

  void Resize() {
    uint32_t new_length = 4;
	// 保证新的桶数量是2的幂
    while (new_length < elems_) {
      new_length *= 2;
    }
    LRUHandle** new_list = new LRUHandle*[new_length];
    memset(new_list, 0, sizeof(new_list[0]) * new_length);
    uint32_t count = 0;
    for (uint32_t i = 0; i < length_; i++) { // 遍历原来每个桶的每个元素，重新计算hash值放到新table
      LRUHandle* h = list_[i];
      while (h != nullptr) {
        LRUHandle* next = h->next_hash;
        uint32_t hash = h->hash;
        LRUHandle** ptr = &new_list[hash & (new_length - 1)];
        h->next_hash = *ptr;
        *ptr = h;
        h = next;
        count++;
      }
    }
    assert(elems_ == count);
    delete[] list_;
    list_ = new_list;
    length_ = new_length;
  }
};

LRU cache 实现


// LRU 缓存实现
// Cache 元素有 "in_cache" 成员 ，表示 cache 是否有该元素的引用。在元素没有deleter处理时，in_cache变为false的唯一方式是通过Erase, 或者用Insert()插入一个cache已有的key，或cache的析构函数.
// cache 内部保存两个链表，所有元素只会出现在一条链表中。被cache 删除但仍被客户端引用的元素不在任何链表中.  这两条链表是:
// - in-use: 保存当前正在被客户端引用的元素，无序.
// - LRU:  保存不常被客户端使用的元素, 按LRU排序
// 当检测到cache中的元素获得首个外部引用、或最后一个引用回收后，该元素在链表之间通过Ref() 和 Unref()移动,

class LRUCache {
 public:
  LRUCache();
  ~LRUCache();

  // Separate from constructor so caller can easily make an array of LRUCache
  void SetCapacity(size_t capacity) { capacity_ = capacity; }

  // Like Cache methods, but with an extra "hash" parameter.
  Cache::Handle* Insert(const Slice& key, uint32_t hash, void* value,
                        size_t charge,
                        void (*deleter)(const Slice& key, void* value));
  Cache::Handle* Lookup(const Slice& key, uint32_t hash);
  void Release(Cache::Handle* handle);
  void Erase(const Slice& key, uint32_t hash);
  void Prune();
  size_t TotalCharge() const {
    MutexLock l(&mutex_);
    return usage_;
  }

 private:
  void LRU_Remove(LRUHandle* e);
  void LRU_Append(LRUHandle* list, LRUHandle* e);
  void Ref(LRUHandle* e);
  void Unref(LRUHandle* e);
  bool FinishErase(LRUHandle* e) EXCLUSIVE_LOCKS_REQUIRED(mutex_);

  // Initialized before use.
  size_t capacity_;

  // mutex_ protects the following state.
  mutable port::Mutex mutex_;
  size_t usage_ GUARDED_BY(mutex_);

  // LRU链表的虚拟头部，存放最近没有访问过的元素.
  // lru.prev 是最新的元素, lru.next 是最久的元素.
  // refs==1的元素且 in_cache==true，只有cache在引用元素.
  LRUHandle lru_ GUARDED_BY(mutex_);

  // in-use 链表的虚拟头部，存放客户端使用的元素.
  // refs >= 2 且 in_cache==true.
  LRUHandle in_use_ GUARDED_BY(mutex_);

  HandleTable table_ GUARDED_BY(mutex_);
};

构造与析构

LRUCache::LRUCache() : capacity_(0), usage_(0) {
  // Make empty circular linked lists.
  lru_.next = &lru_;
  lru_.prev = &lru_;
  in_use_.next = &in_use_;
  in_use_.prev = &in_use_;
}

LRUCache::~LRUCache() {
  assert(in_use_.next == &in_use_);  // 若还有客户端持有元素引用则报错
  for (LRUHandle* e = lru_.next; e != &lru_;) {
    LRUHandle* next = e->next;
    assert(e->in_cache);
    e->in_cache = false;
    assert(e->refs == 1);  // Invariant of lru_ list.
    Unref(e);
    e = next;
  }
}

对链表元素的访问，若是lru链表元素需要移动到inuse链表，不需要等待淘汰

void LRUCache::Ref(LRUHandle* e) {
  if (e->refs == 1 && e->in_cache) {  // If on lru_ list, move to in_use_ list.
    LRU_Remove(e);
    LRU_Append(&in_use_, e);
  }
  e->refs++;
}

对链表元素解引用，引用计数为0则需要回收，为1则重新加入到lru链表等待被淘汰（最近的）

void LRUCache::Unref(LRUHandle* e) {
  assert(e->refs > 0);
  e->refs--;
  if (e->refs == 0) {  // Deallocate.
    assert(!e->in_cache);
    (*e->deleter)(e->key(), e->value);
    free(e);
  } else if (e->in_cache && e->refs == 1) {
    // No longer in use; move to lru_ list.
    LRU_Remove(e);
    LRU_Append(&lru_, e);
  }
}

链表元素添加与删除

void LRUCache::LRU_Remove(LRUHandle* e) {
  e->next->prev = e->prev;
  e->prev->next = e->next;
}

void LRUCache::LRU_Append(LRUHandle* list, LRUHandle* e) {
  // Make "e" newest entry by inserting just before *list
  e->next = list;
  e->prev = list->prev;
  e->prev->next = e;
  e->next->prev = e;
}

链表元素查找，借助hashtable快速定位

Cache::Handle* LRUCache::Lookup(const Slice& key, uint32_t hash) {
  MutexLock l(&mutex_);
  LRUHandle* e = table_.Lookup(key, hash);
  if (e != nullptr) {
    Ref(e);
  }
  return reinterpret_cast<Cache::Handle*>(e);
}

释放元素、插入元素

void LRUCache::Release(Cache::Handle* handle) {
  MutexLock l(&mutex_);
  Unref(reinterpret_cast<LRUHandle*>(handle));
}

Cache::Handle* LRUCache::Insert(const Slice& key, uint32_t hash, void* value,
                                size_t charge,
                                void (*deleter)(const Slice& key,
                                                void* value)) {
  MutexLock l(&mutex_);

  LRUHandle* e =
      reinterpret_cast<LRUHandle*>(malloc(sizeof(LRUHandle) - 1 + key.size()));
  e->value = value;
  e->deleter = deleter;
  e->charge = charge;
  e->key_length = key.size();
  e->hash = hash;
  e->in_cache = false;
  e->refs = 1;  // 提供给返回的句柄 handle.
  memcpy(e->key_data, key.data(), key.size());

  if (capacity_ > 0) {
    e->refs++;  // 提供给 cache 引用.
    e->in_cache = true;
    LRU_Append(&in_use_, e); // 加入到链表
    usage_ += charge;
    FinishErase(table_.Insert(e));
  } else {  // don't cache. (capacity_==0 is supported and turns off caching.)
    // next is read by key() in an assert, so it must be initialized
    e->next = nullptr;
  }
  while (usage_ > capacity_ && lru_.next != &lru_) { // 元素总的费用超出了容量，且lru链表非空，尝试删除太久未访问过的元素
    LRUHandle* old = lru_.next;
    assert(old->refs == 1);
    bool erased = FinishErase(table_.Remove(old->key(), old->hash));
    if (!erased) {  // to avoid unused variable when compiled NDEBUG
      assert(erased);
    }
  }

  return reinterpret_cast<Cache::Handle*>(e);
}

// 在table中删除元素后再删链表的元素
// If e != nullptr, finish removing *e from the cache; it has already been
// removed from the hash table.  Return whether e != nullptr.
bool LRUCache::FinishErase(LRUHandle* e) {
  if (e != nullptr) {
    assert(e->in_cache);
    LRU_Remove(e);
    e->in_cache = false;
    usage_ -= e->charge;
    Unref(e);
  }
  return e != nullptr;
}

清空lru链表

void LRUCache::Prune() {
  MutexLock l(&mutex_);
  while (lru_.next != &lru_) {
    LRUHandle* e = lru_.next;
    assert(e->refs == 1);
    bool erased = FinishErase(table_.Remove(e->key(), e->hash));
    if (!erased) {  // to avoid unused variable when compiled NDEBUG
      assert(erased);
    }
  }
}

分片LRU cache，在以上cache基础上，新增根据key的hash值分配到不同LRUcache的ShardedLRUCache

static const int kNumShardBits = 4;
static const int kNumShards = 1 << kNumShardBits;

class ShardedLRUCache : public Cache {
 private:
  LRUCache shard_[kNumShards];
  port::Mutex id_mutex_;
  uint64_t last_id_;

  static inline uint32_t HashSlice(const Slice& s) {
    return Hash(s.data(), s.size(), 0);
  }

  static uint32_t Shard(uint32_t hash) { return hash >> (32 - kNumShardBits); }

 public:
  explicit ShardedLRUCache(size_t capacity) : last_id_(0) {
    const size_t per_shard = (capacity + (kNumShards - 1)) / kNumShards;
    for (int s = 0; s < kNumShards; s++) {
      shard_[s].SetCapacity(per_shard);
    }
  }
  ~ShardedLRUCache() override {}
  Handle* Insert(const Slice& key, void* value, size_t charge,
                 void (*deleter)(const Slice& key, void* value)) override {
    const uint32_t hash = HashSlice(key);
    return shard_[Shard(hash)].Insert(key, hash, value, charge, deleter);
  }
  Handle* Lookup(const Slice& key) override {
    const uint32_t hash = HashSlice(key);
    return shard_[Shard(hash)].Lookup(key, hash);
  }
  void Release(Handle* handle) override {
    LRUHandle* h = reinterpret_cast<LRUHandle*>(handle);
    shard_[Shard(h->hash)].Release(handle);
  }
  void Erase(const Slice& key) override {
    const uint32_t hash = HashSlice(key);
    shard_[Shard(hash)].Erase(key, hash);
  }
  void* Value(Handle* handle) override {
    return reinterpret_cast<LRUHandle*>(handle)->value;
  }
  uint64_t NewId() override {
    MutexLock l(&id_mutex_);
    return ++(last_id_);
  }
  void Prune() override {
    for (int s = 0; s < kNumShards; s++) {
      shard_[s].Prune();
    }
  }
  size_t TotalCharge() const override {
    size_t total = 0;
    for (int s = 0; s < kNumShards; s++) {
      total += shard_[s].TotalCharge();
    }
    return total;
  }
};

leveldb源码学习之LRU Cache

Python中的logging模塊：按時間滾動、按文件大小滾動、過期刪除

win10 anaconda 安裝tensorflow 2.x 使用報錯：has no attribute 'populate_dict_with_module_objects'

CNN 可視化解釋

Datawhale 計算機視覺基礎-圖像處理（上）- Task06 邊緣檢測

Datawhale 計算機視覺基礎-圖像處理（上）-Task05 圖像分割/二值化

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結