本篇來介紹一下leveldb初始化流程,初始化流程中涉及了很多內容,例如:文件組織方式,存儲內容序列化和反序列化等等。在瞭解這些內容後,是有助於日後分析存儲流程和壓縮流程的。
一、創建/Open數據庫
leveldb屬於輕量級kv數據庫,可以很方便寫一個hello程序並結合gdb單步調試,來輔助我們閱讀源碼,起到事半功倍的效果。
1.1、版本管理
leveldb採用分層思想對數據進行管理,那麼就需要一個對象來管理每層數據元信息,這個對象就是VersionSet,在源碼中和Version有關的對象一共有三個分別爲:
對象 | 作用 |
VersionSet | Version集合,所有的Version都掛在VersionSet對象下面,一個db只有一個VersionSet |
Version | 一個db可能存在多個Version,Version之間採用鏈表方式管理,鏈表最後節點爲當前最新Version信息 |
VersionEdit | 該對象用於生成最新的Version。VersionEdit + 當前使用的Version 生成 新的Version存放到鏈表尾部,只在需要生成新的Version時纔會創建 |
1.2、Open函數
調用接口Open,用於創建數據庫(如果數據不存在),下面是Open函數流程圖:
Status DB::Open(const Options& options, const std::string& dbname,
DB** dbptr) {
*dbptr = NULL;
DBImpl* impl = new DBImpl(options, dbname);//初始化DBImpl對象
impl->mutex_.Lock();
VersionEdit edit;
// Recover handles create_if_missing, error_if_exists 恢復數據
bool save_manifest = false;
Status s = impl->Recover(&edit, &save_manifest);//恢復數據
if (s.ok() && impl->mem_ == NULL) {//創建memTable對象
// Create new log and a corresponding memtable.
uint64_t new_log_number = impl->versions_->NewFileNumber();
// 創建*.log文件
WritableFile* lfile;
s = options.env->NewWritableFile(LogFileName(dbname, new_log_number),
&lfile);
if (s.ok()) {
edit.SetLogNumber(new_log_number);
impl->logfile_ = lfile;
impl->logfile_number_ = new_log_number;
impl->log_ = new log::Writer(lfile);
impl->mem_ = new MemTable(impl->internal_comparator_);
impl->mem_->Ref();
}
}
if (s.ok() && save_manifest) {//save_manifest爲true表示需要重新創建一個MANIFEST文件
edit.SetPrevLogNumber(0); // No older logs needed after recovery.
edit.SetLogNumber(impl->logfile_number_);
s = impl->versions_->LogAndApply(&edit, &impl->mutex_);//由於log相關數據變化 所以要寫回Version信息
}
if (s.ok()) {
impl->DeleteObsoleteFiles();//刪除舊版本文件
impl->MaybeScheduleCompaction();//啓動壓縮調度流程 單獨啓動一個線程
}
impl->mutex_.Unlock();
if (s.ok()) {
assert(impl->mem_ != NULL);
*dbptr = impl;
} else {
delete impl;
}
return s;
}
1.2.1、Recover數據恢復
DBImpl的構造函數實現並不複雜,不再展開說明。這裏詳細說明一下Recover的實現,Recover只做了兩件主要事情:恢復Version信息和恢復MemTable(讀取.log文件),具體流程圖如下:
/**
* 數據庫環境恢復
* @param edit
* @param save_manifest
*/
Status DBImpl::Recover(VersionEdit* edit, bool *save_manifest) {
mutex_.AssertHeld();
// Ignore error from CreateDir since the creation of the DB is
// committed only when the descriptor is created, and this directory
// may already exist from a previous failed creation attempt.
env_->CreateDir(dbname_);
assert(db_lock_ == NULL);
// 創建文件鎖 支持多進程併發訪問同一個數據庫 env_指向env_posix.cc
Status s = env_->LockFile(LockFileName(dbname_), &db_lock_);
if (!s.ok()) {
return s;
}
// 判斷CURRENT文件是否存在
if (!env_->FileExists(CurrentFileName(dbname_))) {
if (options_.create_if_missing) {
s = NewDB(); //創建數據庫,主要包含CURRENT、MANIFEST文件,不包含*.log文件
if (!s.ok()) {
return s;
}
} else {
return Status::InvalidArgument(
dbname_, "does not exist (create_if_missing is false)");
}
} else {
if (options_.error_if_exists) {
return Status::InvalidArgument(
dbname_, "exists (error_if_exists is true)");
}
}
/**
* 以上是數據文件校驗 如果不存在則創建新的數據庫文件 以下內容是從數據庫文件
* 中恢復數據,如版本信息
*/
s = versions_->Recover(save_manifest);//version_set.cc
if (!s.ok()) {
return s;
}
SequenceNumber max_sequence(0);
// Recover from all newer log files than the ones named in the
// descriptor (new log files may have been added by the previous
// incarnation without registering them in the descriptor).
//
// Note that PrevLogNumber() is no longer used, but we pay
// attention to it in case we are recovering a database
// produced by an older version of leveldb.
const uint64_t min_log = versions_->LogNumber();
const uint64_t prev_log = versions_->PrevLogNumber();
//獲取目錄下所有目錄或文件名字
std::vector<std::string> filenames;
s = env_->GetChildren(dbname_, &filenames);
if (!s.ok()) {
return s;
}
std::set<uint64_t> expected;
versions_->AddLiveFiles(&expected);
uint64_t number;
FileType type;
std::vector<uint64_t> logs;
for (size_t i = 0; i < filenames.size(); i++) {
if (ParseFileName(filenames[i], &number, &type)) {
expected.erase(number);
if (type == kLogFile && ((number >= min_log) || (number == prev_log)))
logs.push_back(number);
}
}
if (!expected.empty()) {//不空表示有文件丟失 出錯
char buf[50];
snprintf(buf, sizeof(buf), "%d missing files; e.g.",
static_cast<int>(expected.size()));
return Status::Corruption(buf, TableFileName(dbname_, *(expected.begin())));
}
// Recover in the order in which the logs were generated
// 按從小到大順序讀取.log文件 生成MemTable結構
std::sort(logs.begin(), logs.end());
for (size_t i = 0; i < logs.size(); i++) {
s = RecoverLogFile(logs[i], (i == logs.size() - 1), save_manifest, edit,
&max_sequence);
if (!s.ok()) {
return s;
}
// The previous incarnation may not have written any MANIFEST
// records after allocating this log number. So we manually
// update the file number allocation counter in VersionSet.
versions_->MarkFileNumberUsed(logs[i]);
}
if (versions_->LastSequence() < max_sequence) {
versions_->SetLastSequence(max_sequence);//保存最大記錄號
}
return Status::OK();
}
這裏需要提示一下:.log文件相當於數據庫操作日誌,.log中每條記錄都與MemTable中記錄一一對應.遍歷完所有.log文件MemTable就生成了。
這裏簡單說明一下數據插入流程:leveldb首先將一條記錄插入到.log文件,然後在插入到MemTable中。當leveldb數據庫重啓時,需要讀取每個.log文件以便恢復MemTable,這個恢復流程就是在RecoverLoFile中實現的。
1.2.2、VersionEdit恢復
在之前的博客介紹了MANIFEST存儲的內容爲VersionEdit,該對象包含了很多內容,例如:log文件編號,壓縮點,待刪文件序號以及保存的最小key和最大key。該函數主要是針對MANIFEST文件的解析
/**
* 恢復數據 從MANIFEST文件中讀取出VersionEdit來初始化VersionSet
* @param save_manifest 輸出參數
* true - 需要創建新的manifest文件
* false - 不需要創建新的manifest文件
* @return 返回操作狀態
*/
Status VersionSet::Recover(bool *save_manifest) {
struct LogReporter : public log::Reader::Reporter {
Status* status;
virtual void Corruption(size_t bytes, const Status& s) {
if (this->status->ok()) *this->status = s;
}
};
// Read "CURRENT" file, which contains a pointer to the current manifest file
std::string current;
Status s = ReadFileToString(env_, CurrentFileName(dbname_), ¤t);
if (!s.ok()) {
return s;
}
if (current.empty() || current[current.size()-1] != '\n') {
return Status::Corruption("CURRENT file does not end with newline");
}
current.resize(current.size() - 1);
//讀取MANIFEST文件
std::string dscname = dbname_ + "/" + current;
SequentialFile* file;
s = env_->NewSequentialFile(dscname, &file);//env_posix.cc
if (!s.ok()) {
return s;
}
bool have_log_number = false;
bool have_prev_log_number = false;
bool have_next_file = false;
bool have_last_sequence = false;
uint64_t next_file = 0;
uint64_t last_sequence = 0;
uint64_t log_number = 0;
uint64_t prev_log_number = 0;
// current_ 保存的當前最新的Version信息 後面會調用Builder saveto方法與current_
// 指定的Version信息進行合併
Builder builder(this, current_); //Builder類定義在version_set.cc
{
LogReporter reporter;
reporter.status = &s;
//創建讀取執行器 讀取MANIFEST文件
log::Reader reader(file, &reporter, true/*checksum*/, 0/*initial_offset*/);
Slice record;
std::string scratch;//在ReadRecord內部使用
// 讀取MAINIFEST文件 逐條讀取反序列化VersionEdit對象
while (reader.ReadRecord(&record, &scratch) && s.ok()) {//log_reader.cc
VersionEdit edit;
s = edit.DecodeFrom(record);//解碼 到VersionEdit
if (s.ok()) {
if (edit.has_comparator_ &&
edit.comparator_ != icmp_.user_comparator()->Name()) {
s = Status::InvalidArgument(
edit.comparator_ + " does not match existing comparator ",
icmp_.user_comparator()->Name());
}
}
//對於過期的VersionEdit對象中最有用的內容就是: 文件信息以及壓縮點,所以需要對其進行整合
if (s.ok()) {
builder.Apply(&edit);
}
if (edit.has_log_number_) {
log_number = edit.log_number_;
have_log_number = true;
}
if (edit.has_prev_log_number_) {
prev_log_number = edit.prev_log_number_;
have_prev_log_number = true;
}
if (edit.has_next_file_number_) {
next_file = edit.next_file_number_;
have_next_file = true;
}
if (edit.has_last_sequence_) {
last_sequence = edit.last_sequence_;
have_last_sequence = true;
}
}
}
delete file;
file = NULL;
if (s.ok()) {
if (!have_next_file) {
s = Status::Corruption("no meta-nextfile entry in descriptor");
} else if (!have_log_number) {
s = Status::Corruption("no meta-lognumber entry in descriptor");
} else if (!have_last_sequence) {
s = Status::Corruption("no last-sequence-number entry in descriptor");
}
if (!have_prev_log_number) {
prev_log_number = 0;
}
MarkFileNumberUsed(prev_log_number);
MarkFileNumberUsed(log_number);
}
if (s.ok()) {
//創建新的Version 與舊的Version進行合併,並保存到新的Version中
Version* v = new Version(this);
builder.SaveTo(v);
// Install recovered version
Finalize(v);//預計算壓縮層次以及分數
AppendVersion(v);// 將version插入到Version雙向鏈表尾部
// 設置VersionSet中的序號信息
manifest_file_number_ = next_file;
next_file_number_ = next_file + 1;
last_sequence_ = last_sequence;
log_number_ = log_number;
prev_log_number_ = prev_log_number;
// See if we can reuse the existing MANIFEST file.
if (ReuseManifest(dscname, current)) {
// No need to save new manifest
} else {
*save_manifest = true;
}
}
return s;
}
二、總結
本篇了主要介紹leveldb初始化流程,只有瞭解數據庫組織方式,才有助於我們後續分析存儲流程,如果對於leveldb存儲劉結構還不清楚的,可參考本篇《leveldb深度剖析-存儲結構(1)》,下一篇將介紹存儲流程。