在閱讀Memtable代碼的過程中涉及到leveldb的各種key,只有對這些key理解清楚了,讀起Memtable纔會很輕鬆,所以這篇着重講解下這些key。
一、Key
看圖1,先有個初步印象:
1.SequenceNumber+Type
leveldb每次更新(put/delete)操作都擁有一個版本,由SequenceNumber來標識,整個leveldb有一個全局值保存着當前使用到的SequenceNumber。Key的排序、compact以及snapshot都依賴着它。
每次操作是put操作還是delete操作由ValueType來標識。
2. Internal Key
這裏的UserKey
是用戶層面傳入的key,用Slice表示。對sstable 進行查詢時會用到IntervalKey
。
3.Lookup Key
對Memtable進行查找時會用到LookupKey
。
Klength = UserKey.length + 8 (SequenceNumber + ValueType)。
二、源碼
這裏展示的dbformat中的代碼是隻與Memtable和Key相關代碼。
dbformat.h
namespace leveldb
{
class InternalKey;
// Value types encoded as the last component of internal keys.
// DO NOT CHANGE THESE ENUM VALUES: they are embedded in the on-disk
// data structures.
<!這裏的ValueType就是對應圖2中的ValueType。
1.kTypeDeletion對應用戶的 Delete操作;
2.kTypeValue 對應用戶的Put操作。
>
enum ValueType { kTypeDeletion = 0x0, kTypeValue = 0x1 };
// kValueTypeForSeek defines the ValueType that should be passed when
// constructing a ParsedInternalKey object for seeking to a particular
// sequence number (since we sort sequence numbers in decreasing order
// and the value type is embedded as the low 8 bits in the sequence
// number in internal keys, we need to use the highest-numbered
// ValueType, not the lowest).
static const ValueType kValueTypeForSeek = kTypeValue;
<!這裏的SequenceNumber對應圖2中的SequenceNumber,
佔位56bit,最大值就是56個1的二進制。
>
typedef uint64_t SequenceNumber;
// We leave eight bits empty at the bottom so a type and sequence#
// can be packed together into 64-bits.
static const SequenceNumber kMaxSequenceNumber = ((0x1ull << 56) - 1);
<!InternalKey 結構,從這個結構可以看出InternalKey由
user_Key、SequenceNumber、Value_Type三部分組成。
>
struct ParsedInternalKey {
Slice user_key;
SequenceNumber sequence;
ValueType type;
ParsedInternalKey() {} // Intentionally left uninitialized (for speed)
ParsedInternalKey(const Slice& u, const SequenceNumber& seq, ValueType t)
: user_key(u), sequence(seq), type(t) {}
std::string DebugString() const;
};
<!InternalKey的長度,8Byte是SequenceNumber(56Bit) + ValueType(8Bit)>
// Return the length of the encoding of "key".
inline size_t InternalKeyEncodingLength(const ParsedInternalKey& key) {
return key.user_key.size() + 8;
}
<!將IntervalKey添加到result尾部>
// Append the serialization of "key" to *result.
void AppendInternalKey(std::string* result, const ParsedInternalKey& key);
<!嘗試解析IntervalKey,
1.解析成功就返回true,結果存在*result中;
2.解析識別就返回false,*result則是未定義的。
>
// Attempt to parse an internal key from "internal_key". On success,
// stores the parsed data in "*result", and returns true.
//
// On error, returns false, leaves "*result" in an undefined state.
bool ParseInternalKey(const Slice& internal_key, ParsedInternalKey* result);
<!從IntervalKey中解析出用戶key,即user_Key。>
// Returns the user key portion of an internal key.
inline Slice ExtractUserKey(const Slice& internal_key) {
assert(internal_key.size() >= 8);
return Slice(internal_key.data(), internal_key.size() - 8);
}
<!IntervalKey的比較類,內部是BytewiseComparator實現方式,即按字節比較>
// A comparator for internal keys that uses a specified comparator for
// the user key portion and breaks ties by decreasing sequence number.
class InternalKeyComparator : public Comparator {
private:
const Comparator* user_comparator_;
public:
explicit InternalKeyComparator(const Comparator* c) : user_comparator_(c) {}
<!比較器名稱>
const char* Name() const override;
<!比較兩個Slice,
a < b,返回值<0;
a = b,返回值=0;
a > b,返回值>0;
>
int Compare(const Slice& a, const Slice& b) const override;
<!獲得大於*start,但小於limit的最小值,值存在*start中返回>
void FindShortestSeparator(std::string* start,
const Slice& limit) const override;
<!獲得大於*key的最小值,值存在*start中返回。>
void FindShortSuccessor(std::string* key) const override;
const Comparator* user_comparator() const { return user_comparator_; }
<!比較兩個IntervalKey,返回值參照上面的compare>
int Compare(const InternalKey& a, const InternalKey& b) const;
};
// Modules in this directory should keep internal keys wrapped inside
// the following class instead of plain strings so that we do not
// incorrectly use string comparisons instead of an InternalKeyComparator.
<!InternalKey的封裝,使用此封裝可以避免使用字符串比較來代替IntervalKeyComparator。
整個類看起來很清晰,我就不做過多註釋了。
>
class InternalKey {
private:
std::string rep_;
public:
InternalKey() {} // Leave rep_ as empty to indicate it is invalid
InternalKey(const Slice& user_key, SequenceNumber s, ValueType t) {
AppendInternalKey(&rep_, ParsedInternalKey(user_key, s, t));
}
bool DecodeFrom(const Slice& s) {
rep_.assign(s.data(), s.size());
return !rep_.empty();
}
Slice Encode() const {
assert(!rep_.empty());
return rep_;
}
Slice user_key() const { return ExtractUserKey(rep_); }
void SetFrom(const ParsedInternalKey& p) {
rep_.clear();
AppendInternalKey(&rep_, p);
}
void Clear() { rep_.clear(); }
std::string DebugString() const;
};
inline int InternalKeyComparator::Compare(const InternalKey& a,
const InternalKey& b) const {
return Compare(a.Encode(), b.Encode());
}
<!解析出InternalKey存於*result中。>
inline bool ParseInternalKey(const Slice& internal_key,
ParsedInternalKey* result) {
const size_t n = internal_key.size();
<! SequenceNumber + ValueType 爲8,所以InternalKey不可能呢小於8>
if (n < 8) return false;
<!以leveldb內部默認的小端方式解析出8Byte的SequenceNumber+ValueType>
uint64_t num = DecodeFixed64(internal_key.data() + n - 8);
<!接下來就是:
1.獲取ValueType;
2.右移8位獲得SequenceNumber;
3.減去8Byte的Seq+ValueType,獲得userKey。
>
uint8_t c = num & 0xff;
result->sequence = num >> 8;
result->type = static_cast<ValueType>(c);
result->user_key = Slice(internal_key.data(), n - 8);
return (c <= static_cast<uint8_t>(kTypeValue));
}
<! 以下是LookupKey的封裝類,結構如下:
start_ kstart_ end_
| | | |
|<--klength-->|<--userkey--> |<--sequenceNumber+ValueType-->|
klength = userkey大小 + 8Byte的Seq + ValueType,
klength是Varint32編碼,最多佔用5Byte。
>
// A helper class useful for DBImpl::Get()
class LookupKey {
public:
// Initialize *this for looking up user_key at a snapshot with
// the specified sequence number.
LookupKey(const Slice& user_key, SequenceNumber sequence);
LookupKey(const LookupKey&) = delete;
LookupKey& operator=(const LookupKey&) = delete;
~LookupKey();
// Return a key suitable for lookup in a MemTable.
Slice memtable_key() const { return Slice(start_, end_ - start_); }
// Return an internal key (suitable for passing to an internal iterator)
Slice internal_key() const { return Slice(kstart_, end_ - kstart_); }
// Return the user key
Slice user_key() const { return Slice(kstart_, end_ - kstart_ - 8); }
private:
// We construct a char array of the form:
// klength varint32 <-- start_
// userkey char[klength] <-- kstart_
// tag uint64
// <-- end_
// The array is a suitable MemTable key.
// The suffix starting with "userkey" can be used as an InternalKey.
const char* start_;
const char* kstart_;
const char* end_;
char space_[200]; // Avoid allocation for short keys
};
<!如果二者地址不相等,說明start_的內存是new出來的,
所以要delete。
>
inline LookupKey::~LookupKey() {
if (start_ != space_) delete[] start_;
}
}
deformat.cc
namespace leveldb {
<!打包sequenceNumber + VlaueType,這裏就是將seq左移8bit,存入ValueType>
static uint64_t PackSequenceAndType(uint64_t seq, ValueType t) {
assert(seq <= kMaxSequenceNumber);
assert(t <= kValueTypeForSeek);
return (seq << 8) | t;
}
<!將InternalKey append到*result後面>
void AppendInternalKey(std::string* result, const ParsedInternalKey& key) {
result->append(key.user_key.data(), key.user_key.size());
PutFixed64(result, PackSequenceAndType(key.sequence, key.type));
}
<!調試輸出userKey、sequenceNumber、ValueType>
std::string ParsedInternalKey::DebugString() const {
std::ostringstream ss;
ss << '\'' << EscapeString(user_key.ToString()) << "' @ " << sequence << " : "
<< static_cast<int>(type);
return ss.str();
}
<!調試輸出InternalKey>
std::string InternalKey::DebugString() const {
ParsedInternalKey parsed;
if (ParseInternalKey(rep_, &parsed)) {
return parsed.DebugString();
}
std::ostringstream ss;
ss << "(bad)" << EscapeString(rep_);
return ss.str();
}
const char* InternalKeyComparator::Name() const {
return "leveldb.InternalKeyComparator";
}
<!比較兩個Slice封裝的Internalkey:
1.先提取二者的userKey進行比較;
2.如果userKey是一樣的,就比較8Byte的Seq+ValueType,
seq+ValueType大的爲小。因爲sequenceNumber在leveldb
全局遞增,所以對於相同的userKey,最新的更新(sequenceNumber更大)
排在前面,在查找的時候會被先找到。
>
int InternalKeyComparator::Compare(const Slice& akey, const Slice& bkey) const {
// Order by:
// increasing user key (according to user-supplied comparator)
// decreasing sequence number
// decreasing type (though sequence# should be enough to disambiguate)
int r = user_comparator_->Compare(ExtractUserKey(akey), ExtractUserKey(bkey));
if (r == 0) {
const uint64_t anum = DecodeFixed64(akey.data() + akey.size() - 8);
const uint64_t bnum = DecodeFixed64(bkey.data() + bkey.size() - 8);
if (anum > bnum) {
r = -1;
} else if (anum < bnum) {
r = +1;
}
}
return r;
}
<!獲得大於*start,但小於limit的最小值,值存在*start中返回,
流程不是太難理解,這裏舉幾個例子:
1.start("foo"),limit("foo"),未找到,start還是"foo"。
2.start("foo"),limit(bar),未找到,start還是"foo"。
3.start("foo"),limit(hello),找到,start返回"g"。
4.start("foo"),limit("foobar"),未找到,start還是"foo"。
5.start("foobar"),limit("foo"),未找到,start還是"foobar"。
這裏忽略了ValueType比較,邏輯處理流程來看比較是不涉及到的。
>
void InternalKeyComparator::FindShortestSeparator(std::string* start,
const Slice& limit) const {
// Attempt to shorten the user portion of the key
Slice user_start = ExtractUserKey(*start);
Slice user_limit = ExtractUserKey(limit);
std::string tmp(user_start.data(), user_start.size());
user_comparator_->FindShortestSeparator(&tmp, user_limit);
if (tmp.size() < user_start.size() &&
user_comparator_->Compare(user_start, tmp) < 0) {
// User key has become shorter physically, but larger logically.
// Tack on the earliest possible number to the shortened user key.
PutFixed64(&tmp,
PackSequenceAndType(kMaxSequenceNumber, kValueTypeForSeek));
assert(this->Compare(*start, tmp) < 0);
assert(this->Compare(tmp, limit) < 0);
start->swap(tmp);
}
}
<!接上方的調用>
void FindShortestSeparator(std::string* start,
const Slice& limit) const override {
// Find length of common prefix
size_t min_length = std::min(start->size(), limit.size());
size_t diff_index = 0;
while ((diff_index < min_length) &&
((*start)[diff_index] == limit[diff_index])) {
diff_index++;
}
if (diff_index >= min_length) {
// Do not shorten if one string is a prefix of the other
} else {
uint8_t diff_byte = static_cast<uint8_t>((*start)[diff_index]);
if (diff_byte < static_cast<uint8_t>(0xff) &&
diff_byte + 1 < static_cast<uint8_t>(limit[diff_index])) {
(*start)[diff_index]++;
start->resize(diff_index + 1);
assert(Compare(*start, limit) < 0);
}
}
}
<!獲得大於*start的最小值,值存在*start中返回,
流程不是太難理解,這裏舉幾個例子:
1.key("foo"),找到,start返回"g"。
2.start("\xff\xff"),未找到,start還是"\xff\xff"。
內部比較是遇到0xff直接不比較的。
這裏忽略了ValueType比較,邏輯處理流程來看比較是不涉及到的。
>
void InternalKeyComparator::FindShortSuccessor(std::string* key) const {
Slice user_key = ExtractUserKey(*key);
std::string tmp(user_key.data(), user_key.size());
user_comparator_->FindShortSuccessor(&tmp);
if (tmp.size() < user_key.size() &&
user_comparator_->Compare(user_key, tmp) < 0) {
// User key has become shorter physically, but larger logically.
// Tack on the earliest possible number to the shortened user key.
PutFixed64(&tmp,
PackSequenceAndType(kMaxSequenceNumber, kValueTypeForSeek));
assert(this->Compare(*key, tmp) < 0);
key->swap(tmp);
}
}
<!接上方的調用>
void FindShortSuccessor(std::string* key) const override {
// Find first character that can be incremented
size_t n = key->size();
for (size_t i = 0; i < n; i++) {
const uint8_t byte = (*key)[i];
if (byte != static_cast<uint8_t>(0xff)) {
(*key)[i] = byte + 1;
key->resize(i + 1);
return;
}
}
// *key is a run of 0xffs. Leave it alone.
}
<!LookupKey的構造,流程還是比較清晰的,這裏的ValueType默認是最大即kTypeValue>
LookupKey::LookupKey(const Slice& user_key, SequenceNumber s) {
size_t usize = user_key.size();
<!內存大小預估,這裏的13是 5 + 8,
5是因爲klength是Varint32編碼,最多5Byte,
8就是SequenceNumber + ValueType大小。
>
size_t needed = usize + 13; // A conservative estimate
char* dst;
if (needed <= sizeof(space_)) {
dst = space_;
} else {
dst = new char[needed];
}
start_ = dst;
dst = EncodeVarint32(dst, usize + 8);
kstart_ = dst;
memcpy(dst, user_key.data(), usize);
dst += usize;
EncodeFixed64(dst, PackSequenceAndType(s, kValueTypeForSeek));
dst += 8;
end_ = dst;
}
} // namespace leveldb
三、總結
無它,看不懂的多看幾遍就懂了。
參考:《leveldb實現解析》