leveldb源碼分析：數據插入續(跳錶)

leveldb數據的插入-跳錶

本文主要是接着上一篇文章，繼續深入探索Write函數調用插入之後的流程。

status = WriteBatchInternal::InsertInto(updates, mem_);

InsertInto插入數據函數

namespace {
class MemTableInserter : public WriteBatch::Handler {                     // MemTable插入類
 public:
  SequenceNumber sequence_;
  MemTable* mem_;

  void Put(const Slice& key, const Slice& value) override {               // 添加內容
    mem_->Add(sequence_, kTypeValue, key, value);                         // 添加序列號 插入類型  key  value
    sequence_++;
  }
  void Delete(const Slice& key) override {
    mem_->Add(sequence_, kTypeDeletion, key, Slice());                    // 添加內容 序列號  刪除類型  key 空的value
    sequence_++;
  }
};
}  // namespace

Status WriteBatchInternal::InsertInto(const WriteBatch* b, MemTable* memtable) {
  MemTableInserter inserter;
  inserter.sequence_ = WriteBatchInternal::Sequence(b);                   // 先獲取序列號
  inserter.mem_ = memtable;                                               // 設置memtabe
  return b->Iterate(&inserter);                                           // 迭代插入
}

可以得知，真正的插入數據的操作是在調用InsertInto函數，將序列化好的數據設置到inserter的sequence_屬性中，傳入當前的memtable，此時就調用WriteBatch的Iterate方法，來插入數據。

Status WriteBatch::Iterate(Handler* handler) const {                    // 迭代器
  Slice input(rep_);
  if (input.size() < kHeader) {                                         // 如果輸入的大小小於頭部信息的大小 則太小了
    return Status::Corruption("malformed WriteBatch (too small)");
  }

  input.remove_prefix(kHeader);                                         // 移除頭部
  Slice key, value;
  int found = 0;
  while (!input.empty()) {                                              // 檢查是否爲空
    found++;
    char tag = input[0];                                                // 獲取當前的tag
    input.remove_prefix(1);                                             // 移除一個該位
    switch (tag) {                                                      // 檢查該tag是Put還是Delete
      case kTypeValue:                                                  // 如果是添加
        if (GetLengthPrefixedSlice(&input, &key) &&                       
            GetLengthPrefixedSlice(&input, &value)) {                   // 分別獲取key 和 value
          handler->Put(key, value);                                     //  調用handler去添加
        } else {
          return Status::Corruption("bad WriteBatch Put");
        }
        break;
      case kTypeDeletion:                                               // 如果是刪除
        if (GetLengthPrefixedSlice(&input, &key)) {                     // 獲取對應的key
          handler->Delete(key);                                         // 調用handle的刪除方法
        } else {
          return Status::Corruption("bad WriteBatch Delete");
        }
        break;
      default:
        return Status::Corruption("unknown WriteBatch tag");            // 如果tag不對則 返回錯誤
    }
  }
  if (found != WriteBatchInternal::Count(this)) {                       // 檢查查找到的與當前數據保存的數據是否相同 
    return Status::Corruption("WriteBatch has wrong count");
  } else {
    return Status::OK();                                                // 返回成功
  }
}

此時就調用了迭代的方法來插入數據，此時從執行流程可知，先檢查頭部信息，檢查完成頭部信息之後，然後再檢查該數據的標誌位，調用handler的Put或者Delete方法。

Status WriteBatch::Iterate(Handler* handler) const {                    // 迭代器
  Slice input(rep_);
  if (input.size() < kHeader) {                                         // 如果輸入的大小小於頭部信息的大小 則太小了
    return Status::Corruption("malformed WriteBatch (too small)");
  }

  input.remove_prefix(kHeader);                                         // 移除頭部
  Slice key, value;
  int found = 0;
  while (!input.empty()) {                                              // 檢查是否爲空
    found++;
    char tag = input[0];                                                // 獲取當前的tag
    input.remove_prefix(1);                                             // 移除一個該位
    switch (tag) {                                                      // 檢查該tag是Put還是Delete
      case kTypeValue:                                                  // 如果是添加
        if (GetLengthPrefixedSlice(&input, &key) &&                       
            GetLengthPrefixedSlice(&input, &value)) {                   // 分別獲取key 和 value
          handler->Put(key, value);                                     //  調用handler去添加
        } else {
          return Status::Corruption("bad WriteBatch Put");
        }
        break;
      case kTypeDeletion:                                               // 如果是刪除
        if (GetLengthPrefixedSlice(&input, &key)) {                     // 獲取對應的key
          handler->Delete(key);                                         // 調用handle的刪除方法
        } else {
          return Status::Corruption("bad WriteBatch Delete");
        }
        break;
      default:
        return Status::Corruption("unknown WriteBatch tag");            // 如果tag不對則 返回錯誤
    }
  }
  if (found != WriteBatchInternal::Count(this)) {                       // 檢查查找到的與當前數據保存的數據是否相同 
    return Status::Corruption("WriteBatch has wrong count");
  } else {
    return Status::OK();                                                // 返回成功
  }
}

此時執行的handler就是MemTableInserter的實例，並調用該Put和Delete方法；

void Put(const Slice& key, const Slice& value) override {               // 添加內容
    mem_->Add(sequence_, kTypeValue, key, value);                         // 添加序列號 插入類型  key  value
    sequence_++;
  }
  void Delete(const Slice& key) override {
    mem_->Add(sequence_, kTypeDeletion, key, Slice());                    // 添加內容 序列號  刪除類型  key 空的value
    sequence_++;
  }

此時就是調用了mem_的Add方法，只不過就是利用了不同的Type來標記是新增數據還是刪除數據；此時查看MemTable相關內容

MemTable細節相關

MemTable就是內存中保存的數據，當內存數據規模達到閾值時，就會將內存數據寫入到文件中，此時先查看Add方法。

mem_->Add方法

void MemTable::Add(SequenceNumber s, ValueType type, const Slice& key,
                   const Slice& value) {
  // Format of an entry is concatenation of:
  //  key_size     : varint32 of internal_key.size()
  //  key bytes    : char[internal_key.size()]
  //  value_size   : varint32 of value.size()
  //  value bytes  : char[value.size()]
  size_t key_size = key.size();                                               // 獲取key大小
  size_t val_size = value.size();                                             // 獲取value大小
  size_t internal_key_size = key_size + 8;                                    // 頭部加上8個字節大小 表示是添加還是刪除
  const size_t encoded_len = VarintLength(internal_key_size) +
                             internal_key_size + VarintLength(val_size) +
                             val_size;                                        // 包括保存數據的大小 即既保持數據又保存數據大小
  char* buf = arena_.Allocate(encoded_len);                                   // 申請內存
  char* p = EncodeVarint32(buf, internal_key_size);                           // 轉換成字符偏移
  memcpy(p, key.data(), key_size);                                            // 拷貝數據到指針指向的位置
  p += key_size;
  EncodeFixed64(p, (s << 8) | type);                                          // 將類型大小存入
  p += 8;
  p = EncodeVarint32(p, val_size);                                            // 轉換value字節大小
  memcpy(p, value.data(), val_size);                                          // 將數據拷貝到指定位置處
  assert(p + val_size == buf + encoded_len);
  table_.Insert(buf);                                                         // 此時就就將內容填充到buf處 調用table插入
}

此時主要就是將數據轉換爲buf，並調用table插入。數據格式如下；

SkipList跳錶

在上一節中，最後調用了table_.Insert函數插入數據，此時的table定義如下；

  typedef SkipList<const char*, KeyComparator> Table;

  Table table_;

此時定義的就是SkipList的Insert方法，在MemTable初始化過程中；

MemTable::MemTable(const InternalKeyComparator& comparator)
    : comparator_(comparator), refs_(0), table_(comparator_, &arena_) {}
    
# DBImpl初始化MemTable
mem = new MemTable(internal_comparator_)

此時可知初始化table_的參數來源一個來自於DBImpl中的internal_comparator_，區域則來自於申請的內存地址。

此時查看SkipList的初始化過程；

template <typename Key, class Comparator>
SkipList<Key, Comparator>::SkipList(Comparator cmp, Arena* arena)
    : compare_(cmp),
      arena_(arena),
      head_(NewNode(0 /* any key will do */, kMaxHeight)),
      max_height_(1),
      rnd_(0xdeadbeef) {
  for (int i = 0; i < kMaxHeight; i++) {
    head_->SetNext(i, nullptr);
  }
}

在初始化過程中，會初始化一個頭部節點，然後初始化對應長度（默認是12）的鏈表，讓列表中的數據都爲空。

跳錶插入數據

template <typename Key, class Comparator>
void SkipList<Key, Comparator>::Insert(const Key& key) {
  // TODO(opt): We can use a barrier-free variant of FindGreaterOrEqual()
  // here since Insert() is externally synchronized.
  Node* prev[kMaxHeight];                                                 // 數組
  Node* x = FindGreaterOrEqual(key, prev);                                // 查找或者創建

  // Our data structure does not allow duplicate insertion
  assert(x == nullptr || !Equal(key, x->key));

  int height = RandomHeight();                                            // 獲取隨機的height
  if (height > GetMaxHeight()) {                                          // 如果獲取的值比當前保存的值大
    for (int i = GetMaxHeight(); i < height; i++) {                       // 遍歷循環
      prev[i] = head_;                                                    // 將對應的頭部數據設置爲head_
    }
    // It is ok to mutate max_height_ without any synchronization
    // with concurrent readers.  A concurrent reader that observes
    // the new value of max_height_ will see either the old value of
    // new level pointers from head_ (nullptr), or a new value set in
    // the loop below.  In the former case the reader will
    // immediately drop to the next level since nullptr sorts after all
    // keys.  In the latter case the reader will use the new node.
    max_height_.store(height, std::memory_order_relaxed);                 // 修改當前的height值 原子修改
  }

  x = NewNode(key, height);                                               // 生成一個節點
  for (int i = 0; i < height; i++) {                                      // 遍歷當前列表
    // NoBarrier_SetNext() suffices since we will add a barrier when
    // we publish a pointer to "x" in prev[i].
    x->NoBarrier_SetNext(i, prev[i]->NoBarrier_Next(i));                  
    prev[i]->SetNext(i, x);                                               // 插入該節點
  }
}

此時的執行過程，首先會FindGreaterOrEqual查找當前的該函數主要是將當前的key遍歷列表查找一個比該key曉得列表，如果沒有則創建一個，這樣是數據格式以大小來排序。然後再就是設置到當的數據到跳錶中。

template <typename Key, class Comparator>
bool SkipList<Key, Comparator>::KeyIsAfterNode(const Key& key, Node* n) const {
  // null n is considered infinite
  return (n != nullptr) && (compare_(n->key, key) < 0);       // 比較key的大小 如果傳入的n 不爲空指針， 並且傳入的長度值 小於 當前傳入的key值
}

template <typename Key, class Comparator>
typename SkipList<Key, Comparator>::Node*
SkipList<Key, Comparator>::FindGreaterOrEqual(const Key& key,
                                              Node** prev) const {
  Node* x = head_;                                      // 獲取頭部
  int level = GetMaxHeight() - 1;                       // 獲取層級
  while (true) {
    Node* next = x->Next(level);                        // 依次遍歷下一級
    if (KeyIsAfterNode(key, next)) {                    // 檢查當前key的大小是否大於next的key大小
      // Keep searching in this list
      x = next;                                         // 如果是之後則繼續深入
    } else {
      if (prev != nullptr) prev[level] = x;             // 如果指向不爲空  且當前是最小數據長度  則 設置成頭指針
      if (level == 0) {                                 // 如果爲零就返回當前查找到的 否則下一個層級查找
        return next;                                    
      } else {
        // Switch to next list
        level--;
      }
    }
  }
}

其中compare_在默認情況下，其實調用的是Slice的比較函數；

inline int Slice::compare(const Slice& b) const {
  const size_t min_len = (size_ < b.size_) ? size_ : b.size_;
  int r = memcmp(data_, b.data_, min_len);
  if (r == 0) {
    if (size_ < b.size_)
      r = -1;
    else if (size_ > b.size_)
      r = +1;
  }
  return r;
}

此時就將數據按照長度大小插入到了跳錶中。有關跳錶的基本內容大家可自行查閱。

跳錶的插入概述

初始如下

此時插入3.5:c之後

總結

本文主要是繼續分析了數據插入到最後，將數據插入到跳錶中的基本過程，源碼相對查看的數據流程相對較繁瑣，只要思路理解，就大致能理解數據整個的插入過程，至此插入與刪除流程就分析完成。由於本人才疏學淺，如有錯誤請批評指正。

leveldb源碼分析：數據插入續(跳錶)

leveldb數據的插入-跳錶

InsertInto插入數據函數

MemTable細節相關

mem_->Add方法

SkipList跳錶

跳錶插入數據

跳錶的插入概述

總結

Redis的rdb格式學習

遍歷百萬級Redis的鍵值的大結局

租約-代碼實踐

golang源碼分析：調度器chan調度

兩階段提交實際項目V1

Mac下配置sublime實現LaTeX

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結