leveldb源碼分析：數據查詢

leveldb數據查詢

查詢的示例代碼如下：

string res;
status = db->Get(ReadOptions(), "KeyNameExample", &res);

本文就先分析一下數據的獲取流程。

db->Get獲取數據

主要就是調用db的Get方法來查找數據；

Status DBImpl::Get(const ReadOptions& options, const Slice& key,
                   std::string* value) {
  Status s;
  MutexLock l(&mutex_);
  SequenceNumber snapshot;
  if (options.snapshot != nullptr) {                                  // 檢查快照是否爲空指針
    snapshot =
        static_cast<const SnapshotImpl*>(options.snapshot)->sequence_number();   // 獲取快照對應的序列號
  } else {
    snapshot = versions_->LastSequence();                                        // 否則就獲取版本最新的序列號
  }

  MemTable* mem = mem_;                                                           // 當前memtable內容
  MemTable* imm = imm_;                                                           // 不可變內容
  Version* current = versions_->current();                                        // 獲取當前的版本
  mem->Ref();
  if (imm != nullptr) imm->Ref();
  current->Ref();

  bool have_stat_update = false;                                                  // 是否有更新標誌位 設置爲False
  Version::GetStats stats;

  // Unlock while reading from files and memtables
  {
    mutex_.Unlock();                                                              // 獲取鎖
    // First look in the memtable, then in the immutable memtable (if any).
    LookupKey lkey(key, snapshot);                                                // 將內容包裝成LookupKey實例
    if (mem->Get(lkey, value, &s)) {                                              // 先在mem當中查找key
      // Done
    } else if (imm != nullptr && imm->Get(lkey, value, &s)) {                     // 如果在mem中沒有查找到該key則在imm中查找數據
      // Done
    } else {
      s = current->Get(options, lkey, value, &stats);                             // 最後再文件中查找 即level層級的數據塊中查找
      have_stat_update = true;                                                    // 此時設置更新爲true
    }
    mutex_.Lock();
  }

  if (have_stat_update && current->UpdateStats(stats)) {                          // 如果在level層級文件中查找 並且當前的內容有更改則調用合併
    MaybeScheduleCompaction();
  }
  mem->Unref();                                                                   // 引用計數恢復
  if (imm != nullptr) imm->Unref();
  current->Unref();
  return s;                                                                       // 返回狀態
}

從執行流程可知，獲取數據時的優先級主要就是三個；

從當前內存memTable中獲取；
如果第一步未獲取到，則從當前的不可修改的imm中獲取；
如果第二步未獲取到，則從level層中去獲取數據；

從memeTable中查找

mem->Get(lkey, value, &s)

此時調用的就是mem的Get方法來查找；

bool MemTable::Get(const LookupKey& key, std::string* value, Status* s) {
  Slice memkey = key.memtable_key();                                          // 先獲取key的數據
  Table::Iterator iter(&table_);                                              // 生成table的迭代器
  iter.Seek(memkey.data());                                                   // 查找數據 
  if (iter.Valid()) {                                                         // 如果找到
    // entry format is:
    //    klength  varint32
    //    userkey  char[klength]
    //    tag      uint64
    //    vlength  varint32
    //    value    char[vlength]
    // Check that it belongs to same user key.  We do not check the
    // sequence number since the Seek() call above should have skipped
    // all entries with overly large sequence numbers.                        // 獲取整個數據
    const char* entry = iter.key();
    uint32_t key_length;
    const char* key_ptr = GetVarint32Ptr(entry, entry + 5, &key_length);      // 獲取指針內容值
    if (comparator_.comparator.user_comparator()->Compare(
            Slice(key_ptr, key_length - 8), key.user_key()) == 0) {           // key內容是否相同
      // Correct user key
      const uint64_t tag = DecodeFixed64(key_ptr + key_length - 8);           // 獲取該key的標誌位  是刪除還是新增數據
      switch (static_cast<ValueType>(tag & 0xff)) {
        case kTypeValue: {
          Slice v = GetLengthPrefixedSlice(key_ptr + key_length);             // 如果是未刪除數據
          value->assign(v.data(), v.size());                                  // 設置到velue中並返回
          return true;
        }
        case kTypeDeletion:
          *s = Status::NotFound(Slice());                                     // 如果該數據爲刪除則標記爲NotFound
          return true;
      }
    }
  }
  return false;
}

其中有關iter.Seek的方法，本質上其實就是調用的是table_.Seek方法，而table_又是SkipList類型所以最終調用的是FindGreaterOrEqual；

template <typename Key, class Comparator>
inline void SkipList<Key, Comparator>::Iterator::Seek(const Key& target) {
  node_ = list_->FindGreaterOrEqual(target, nullptr);
}

template <typename Key, class Comparator>
typename SkipList<Key, Comparator>::Node*
SkipList<Key, Comparator>::FindGreaterOrEqual(const Key& key,
                                              Node** prev) const {
  Node* x = head_;                                      // 獲取頭部
  int level = GetMaxHeight() - 1;                       // 獲取層級
  while (true) {
    Node* next = x->Next(level);                        // 依次遍歷下一級
    if (KeyIsAfterNode(key, next)) {                    // 檢查當前key的大小是否大於next的key大小
      // Keep searching in this list
      x = next;                                         // 如果是之後則繼續深入
    } else {
      if (prev != nullptr) prev[level] = x;             // 如果指向不爲空  且當前是最小數據長度  則 設置成頭指針
      if (level == 0) {                                 // 如果爲零就返回當前查找到的 否則下一個層級查找
        return next;                                    
      } else {
        // Switch to next list
        level--;
      }
    }
  }
}

在mem中查找的過程其實就和數據插入的過程比較類似。因爲imm不可變table查找方式與該流程一樣故不再敘述。

從level文件中查找

s = current->Get(options, lkey, value, &stats)

此時current其實就是Version對象，此時就是調用的Version的Get方法；

Status Version::Get(const ReadOptions& options, const LookupKey& k,
                    std::string* value, GetStats* stats) {
  stats->seek_file = nullptr;                                             // 初始化 stats seek_file爲空 查找層級爲-1
  stats->seek_file_level = -1;

  struct State {                                                          // 定義一個State結構體
    Saver saver;
    GetStats* stats;
    const ReadOptions* options;                                           // 設置選項
    Slice ikey;
    FileMetaData* last_file_read;                                         // 設置文件源信息
    int last_file_read_level;

    VersionSet* vset;
    Status s;
    bool found;

    static bool Match(void* arg, int level, FileMetaData* f) {            // 匹配方法
      State* state = reinterpret_cast<State*>(arg);

      if (state->stats->seek_file == nullptr &&
          state->last_file_read != nullptr) {
        // We have had more than one seek for this read.  Charge the 1st file.
        state->stats->seek_file = state->last_file_read;                        // 設置當前查找值
        state->stats->seek_file_level = state->last_file_read_level;
      }

      state->last_file_read = f;                                                // 設置當前元信息
      state->last_file_read_level = level;                                      // 設置當前層級

      state->s = state->vset->table_cache_->Get(*state->options, f->number,
                                                f->file_size, state->ikey,
                                                &state->saver, SaveValue);      // 查找具體數據 調用table_cache_的Get方法查找
      if (!state->s.ok()) {                                                     // 如果查找是否 設置已查找 返回False
        state->found = true;
        return false;
      }
      switch (state->saver.state) {                                             // 狀態判斷
        case kNotFound:
          return true;  // Keep searching in other files
        case kFound:
          state->found = true;                                                  // 如果找到則返回false
          return false;
        case kDeleted:
          return false;                                                         // 如果已刪除也返回false
        case kCorrupt:
          state->s =
              Status::Corruption("corrupted key for ", state->saver.user_key);
          state->found = true;
          return false;
      }
    }
  };

  State state;                                                    // 初始化 state實例
  state.found = false;
  state.stats = stats;
  state.last_file_read = nullptr;
  state.last_file_read_level = -1;

  state.options = &options;
  state.ikey = k.internal_key();
  state.vset = vset_;

  state.saver.state = kNotFound;
  state.saver.ucmp = vset_->icmp_.user_comparator();
  state.saver.user_key = k.user_key();
  state.saver.value = value;

  ForEachOverlapping(state.saver.user_key, state.ikey, &state, &State::Match);      // 查找key

  return state.found ? state.s : Status::NotFound(Slice());
}

主要在該方法內部定義了一個State結構體，然後調用ForEachOverlapping方法去具體查找內容；

void Version::ForEachOverlapping(Slice user_key, Slice internal_key, void* arg,
                                 bool (*func)(void*, int, FileMetaData*)) {
  const Comparator* ucmp = vset_->icmp_.user_comparator();                    // 先獲取比較方法

  // Search level-0 in order from newest to oldest.
  std::vector<FileMetaData*> tmp;                                             // 初始化 一個 列表
  tmp.reserve(files_[0].size());                                              // 設置爲層級爲第一層的大小
  for (uint32_t i = 0; i < files_[0].size(); i++) {                           // 遍歷第一層
    FileMetaData* f = files_[0][i];                                           // 獲取元文件信息
    if (ucmp->Compare(user_key, f->smallest.user_key()) >= 0 &&
        ucmp->Compare(user_key, f->largest.user_key()) <= 0) {                // 如果當前值比最小的要大 比最大的要小 則壓入數據中
      tmp.push_back(f);
    }
  }
  if (!tmp.empty()) {                                                         // 如果當前查找的不爲空
    std::sort(tmp.begin(), tmp.end(), NewestFirst);                           // 排序該列表 排序規則按照大小排序
    for (uint32_t i = 0; i < tmp.size(); i++) {
      if (!(*func)(arg, 0, tmp[i])) {                                         // 遍歷當前列表 並執行回調函數 該回調函數就是State中的Match函數如果找到則返回
        return;
      }
    }
  }

  // Search other levels.
  for (int level = 1; level < config::kNumLevels; level++) {                  // 如果在第一層沒有找到 開始從第二層開始查找
    size_t num_files = files_[level].size();                                  // 獲取當前層級的大小
    if (num_files == 0) continue;                                             // 如果當前層級爲空則循環下一個

    // Binary search to find earliest index whose largest key >= internal_key.
    uint32_t index = FindFile(vset_->icmp_, files_[level], internal_key);     // 查找文件 中是否包含該值
    if (index < num_files) {                                                  // 如果當前索引值小於 當前層級數
      FileMetaData* f = files_[level][index];                                 // 獲取當前的元信息
      if (ucmp->Compare(user_key, f->smallest.user_key()) < 0) {              // 編輯是否小於該文件最小的值 如果比最小值要大
        // All of "f" is past any data for user_key
      } else {
        if (!(*func)(arg, level, f)) {                                        // 使用回調函數檢查是否找到該值
          return;
        }
      }
    }
  }
}

通過該方法可知，首先查找第一層，如果第一層找到了就直接返回，如果第一層沒有找到則繼續往下層查找，默認最高層數是7，此時查找的過程中，都是先比較每一層的元文件信息，比較該數據是否在該元文件信息之間，如果是之間，則在調用傳入的回調函數Match進行精準查找，在Match方法中主要的比較方法如下；

state->s = state->vset->table_cache_->Get(*state->options, f->number,
                                                f->file_size, state->ikey,
                                                &state->saver, SaveValue);

調用了vset的table_cache_方法中的Get方法；

Status TableCache::Get(const ReadOptions& options, uint64_t file_number,
                       uint64_t file_size, const Slice& k, void* arg,
                       void (*handle_result)(void*, const Slice&,
                                             const Slice&)) {
  Cache::Handle* handle = nullptr;
  Status s = FindTable(file_number, file_size, &handle);                            // 查找文件
  if (s.ok()) {
    Table* t = reinterpret_cast<TableAndFile*>(cache_->Value(handle))->table;       // 如果找到 則獲取table
    s = t->InternalGet(options, k, arg, handle_result);
    cache_->Release(handle);
  }
  return s;
}

其中傳入了SaveValue的回調處理函數，來再次確認找到的值，並將值保存；此時首先調用FindTable方法去查找文件；

Status TableCache::FindTable(uint64_t file_number, uint64_t file_size,
                             Cache::Handle** handle) {
  Status s;
  char buf[sizeof(file_number)];                                          // 獲取當前的文件大小
  EncodeFixed64(buf, file_number);
  Slice key(buf, sizeof(buf));
  *handle = cache_->Lookup(key);                                          // 調用cache_的Lookup方法來查找該key 使用了LRU算法
  if (*handle == nullptr) {                                               // 如果沒有找到 則新生成
    std::string fname = TableFileName(dbname_, file_number);              // 新生成一個TableFileName 
    RandomAccessFile* file = nullptr;
    Table* table = nullptr;
    s = env_->NewRandomAccessFile(fname, &file);                          // 初始化 檢查 該文件是否可用
    if (!s.ok()) {
      std::string old_fname = SSTTableFileName(dbname_, file_number);     // 生成一個SSTTableFileName文件實例
      if (env_->NewRandomAccessFile(old_fname, &file).ok()) {             // 檢查是否成功
        s = Status::OK();
      }
    }
    if (s.ok()) {
      s = Table::Open(options_, file, file_size, &table);                 // 如果檢查成功 則打開該文件
    }

    if (!s.ok()) {                                                        // ruguo 打開失敗則 重置數據
      assert(table == nullptr);
      delete file;
      // We do not cache error results so that if the error is transient,
      // or somebody repairs the file, we recover automatically.
    } else {
      TableAndFile* tf = new TableAndFile;                                // 新生成一個TableFileName 
      tf->file = file;
      tf->table = table;
      *handle = cache_->Insert(key, tf, 1, &DeleteEntry);                 // 在緩存中插入該數據
    }
  }
  return s;
}

主要就是檢查輸入的文件是否可以打開，並檢查模式是否可以，最後將生成的文件加入到緩存中，以便後續查找該值能夠更快找到；

Status Table::InternalGet(const ReadOptions& options, const Slice& k, void* arg,
                          void (*handle_result)(void*, const Slice&,
                                                const Slice&)) {
  Status s;
  Iterator* iiter = rep_->index_block->NewIterator(rep_->options.comparator);       // 設置迭代器
  iiter->Seek(k);                                                                   // 查找該key 
  if (iiter->Valid()) {                                                             // 如果找到
    Slice handle_value = iiter->value();                                            // 設置該值
    FilterBlockReader* filter = rep_->filter;
    BlockHandle handle;
    if (filter != nullptr && handle.DecodeFrom(&handle_value).ok() &&
        !filter->KeyMayMatch(handle.offset(), k)) {                                 // 通過過濾器來檢查該值是否合法
      // Not found
    } else {
      Iterator* block_iter = BlockReader(this, options, iiter->value());            // 設置一個迭代器    
      block_iter->Seek(k);                                                          // 查找該值
      if (block_iter->Valid()) {
        (*handle_result)(arg, block_iter->key(), block_iter->value());              // 調用回調函數處理
      }
      s = block_iter->status();                                                     // 獲取狀態
      delete block_iter;
    }
  }
  if (s.ok()) {
    s = iiter->status();
  }
  delete iiter;
  return s;
}

該方法主要就是查找值，通過多層次的iter迭代器的包裝，主要是爲了加入其它的如加入緩存，或者註冊相關的處理事件，所以導致BlockReader和rep_->index_block->NewIterator多次檢查了待查找的值；

Iterator* Table::BlockReader(void* arg, const ReadOptions& options,
                             const Slice& index_value) {
  Table* table = reinterpret_cast<Table*>(arg);                 // 獲取table 
  Cache* block_cache = table->rep_->options.block_cache;        // 獲取cache  
  Block* block = nullptr;
  Cache::Handle* cache_handle = nullptr;

  BlockHandle handle;
  Slice input = index_value;
  Status s = handle.DecodeFrom(&input);
  // We intentionally allow extra stuff in index_value so that we
  // can add more features in the future.

  if (s.ok()) {
    BlockContents contents;
    if (block_cache != nullptr) {
      char cache_key_buffer[16];
      EncodeFixed64(cache_key_buffer, table->rep_->cache_id);             // 獲取緩存的值
      EncodeFixed64(cache_key_buffer + 8, handle.offset());               // 獲取八個偏移內容
      Slice key(cache_key_buffer, sizeof(cache_key_buffer));
      cache_handle = block_cache->Lookup(key);                            // 查找該值
      if (cache_handle != nullptr) {
        block = reinterpret_cast<Block*>(block_cache->Value(cache_handle));     //如果找到則 設置
      } else {
        s = ReadBlock(table->rep_->file, options, handle, &contents);           // 新生成一個
        if (s.ok()) {
          block = new Block(contents);                                          // 新生成一個block
          if (contents.cachable && options.fill_cache) {
            cache_handle = block_cache->Insert(key, block, block->size(),
                                               &DeleteCachedBlock);             // 在緩存中插入
          }
        }
      }
    } else {
      s = ReadBlock(table->rep_->file, options, handle, &contents);
      if (s.ok()) {
        block = new Block(contents);
      }
    }
  }

  Iterator* iter;                                                                 // 設置迭代器
  if (block != nullptr) {
    iter = block->NewIterator(table->rep_->options.comparator);                   // 生成一個默認的迭代器
    if (cache_handle == nullptr) {
      iter->RegisterCleanup(&DeleteBlock, block, nullptr);                        // 註冊一個刪除 列表 等到執行完成後刪除
    } else {
      iter->RegisterCleanup(&ReleaseBlock, block_cache, cache_handle);
    }
  } else {
    iter = NewErrorIterator(s);
  }
  return iter;
}

至此，在層級文件中的查找流程基本完成，從流程可以看出在層級查找的過程中，機制更爲複雜，設置了更多的緩存與檢查機制。

總結

本文主要是講述了leveldb數據的獲取流程，當獲取數據的時候可能會出現從當前內存中的memtable中獲取或者是不可變immtable中獲取，如果兩者都獲取不到，則去層級文件中去查找，在層級文件中查找，還需要確定查找的內容是屬於哪一層級，然後通過添加緩存等方式，來提高讀的性能，然後註冊相應的回調機制來保證數據的流程的高效。由於本人才疏學淺，如有錯誤請批評指正。

leveldb源碼分析：數據查詢

leveldb數據查詢

db->Get獲取數據

從memeTable中查找

從level文件中查找

總結

Redis的rdb格式學習

遍歷百萬級Redis的鍵值的大結局

租約-代碼實踐

golang源碼分析：調度器chan調度

兩階段提交實際項目V1

Mac下配置sublime實現LaTeX

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結