Redis源碼分析——字典(dict)

Redis版本:5.0.5
文件：dict.h dict.c

字典概念

字典，又稱爲符號表，關聯數組或映射，是一種用於保存鍵值對的抽象數據結構。
在字典中，一個鍵可以和一個值進行關聯（或者說爲映射），這些關聯的鍵和值就稱爲鍵值對。
字典中的每個鍵都是獨一無二的，程序可以在字典中根據鍵查找與之關聯的值，或者通過鍵來更新值，又或者根據鍵來刪除整個鍵值對等。

字典的結構

哈希表

typedef struct dictht {
    //哈希表數組
    dictEntry **table;
    //哈希表的大小
    unsigned long size;
    //哈希表的掩碼，用於計算索引值，並且其大小總是等於size-1
    unsigned long sizemask;
    //哈希表已有節點的數量
    unsigned long used;
} dictht;

哈希表節點

typedef struct dictEntry {
    //鍵
    void *key;
    //值(值可以是指針，可以是unit64_t, int64_t, double)
    union {
        void *val;
        uint64_t u64;
        int64_t s64;
        double d;
    } v;
    //指向下一個哈希表節點，形成鏈表
    struct dictEntry *next;
} dictEntry;

dictype

typedef struct dictType {
	//計算哈希值的函數
    uint64_t (*hashFunction)(const void *key);
    //複製鍵的函數
    void *(*keyDup)(void *privdata, const void *key);
    //複製值的函數
    void *(*valDup)(void *privdata, const void *obj);
    //對比鍵的函數
    int (*keyCompare)(void *privdata, const void *key1, const void *key2);
    //銷燬鍵的函數
    void (*keyDestructor)(void *privdata, void *key);
    //銷燬值的函數
    void (*valDestructor)(void *privdata, void *obj);
} dictType;

字典dict

typedef struct dict {
    //函數結構體
    dictType *type;
    //私有數據
    void *privdata;
    dictht ht[2]; //兩個數組，ht[0]用來hash，ht[1]用來rehash
    long rehashidx; /* rehashing not in progress if rehashidx == -1 */
    unsigned long iterators; /* number of iterators currently running 迭代器*/
} dict;

字典的創建、插入、Rehash、刪除查找等函數(很詳細的在代碼中進行註釋講解)

1.創建函數

//dictCreate暴露給用戶 
//該函數只是給dict分配了空間，而其內部的ht->table還沒有分配空間，等到第一次添加鍵值對時分配
dict *dictCreate(dictType *type,
        void *privDataPtr)
{
    dict *d = zmalloc(sizeof(*d));

    _dictInit(d,type,privDataPtr);
    return d;
}

/* Initialize the hash table */
int _dictInit(dict *d, dictType *type,
        void *privDataPtr)
{
	//_dicRest函數初始化dictht
    _dictReset(&d->ht[0]);
    _dictReset(&d->ht[1]);
    d->type = type;
    d->privdata = privDataPtr;
    d->rehashidx = -1;
    d->iterators = 0;
    return DICT_OK;
}

2.插入函數

dictAdd函數是暴露給用戶的函數，其內部調用了dictAddRaw函數來進行添加

int dictAdd(dict *d, void *key, void *val)
{
    dictEntry *entry = dictAddRaw(d,key,NULL);

    if (!entry) return DICT_ERR;
    dictSetVal(d, entry, val);
    return DICT_OK;
}

dicAddRaw函數

dictEntry *dictAddRaw(dict *d, void *key, dictEntry **existing)
{
    long index;
    dictEntry *entry;
    dictht *ht;
    
	//判斷是否該dict是否正在rehash，如果是則調用_dictRehashStep函數
    if (dictIsRehashing(d)) _dictRehashStep(d);

	//調用_dictKeyIndex獲取要插入的位置，下面會講到這個函數
    if ((index = _dictKeyIndex(d, key, dictHashKey(d,key), existing)) == -1)
        return NULL;

	//如果正在rehash則插入到ht[1]中，否則插入的ht[0]中。
    ht = dictIsRehashing(d) ? &d->ht[1] : &d->ht[0];
    entry = zmalloc(sizeof(*entry));
    entry->next = ht->table[index];
    ht->table[index] = entry;
    ht->used++;

    dictSetKey(d, entry, key);
    return entry;
}

_dictKeyIndex函數

static long _dictKeyIndex(dict *d, const void *key, uint64_t hash, dictEntry **existing)
{
    unsigned long idx, table;
    dictEntry *he;
    if (existing) *existing = NULL;

    //判斷是否還需要額外的空間，下面會將這個函數
    if (_dictExpandIfNeeded(d) == DICT_ERR)
        return -1;
    for (table = 0; table <= 1; table++) {
        //算出插入的位置idx
        idx = hash & d->ht[table].sizemask;
        //找到idx位置的頭結點
        he = d->ht[table].table[idx];
        //判斷idx位置的鏈表中是否有相同的key
        while(he) {
            if (key==he->key || dictCompareKeys(d, key, he->key)) {
                if (existing) *existing = he;
                return -1;
            }
            he = he->next;
        }
        /*
        *沒有正在rehash的話就只需要判斷ht[0]就可以
        *如果正在rehash，則還需要判斷ht[1]中是否也有相同的鍵
        */
        if (!dictIsRehashing(d)) break;
    }
    //返回要插入的位置
    return idx;
}

_dictExpandIfNeeded函數

static int _dictExpandIfNeeded(dict *d)
{
	//如果正在rehash，則直接返回DICT_OK(因爲正在rehash表明有足夠的空間)
    if (dictIsRehashing(d)) return DICT_OK;
    
    //如果是第一次插入，則需要給ht[0]分配空間，調用dicExpand函數(這就是上面將創建dict時沒有給ht->table分配空間在這裏進行創建)
    if (d->ht[0].size == 0) return dictExpand(d, DICT_HT_INITIAL_SIZE); //這個函數下面也會講

    //當已有的元素大於size或者大於初始裝載因子時則需要擴容
    if (d->ht[0].used >= d->ht[0].size &&
        (dict_can_resize ||
         d->ht[0].used/d->ht[0].size > dict_force_resize_ratio)) 
    {
        //used*2 擴容兩倍,調用dictExpand函數
        return dictExpand(d, d->ht[0].used*2);
    }
    return DICT_OK;
}

dictExpand函數

int dictExpand(dict *d, unsigned long size)
{
    //擴容, d正在rehash或者已有元素大於擴容的容量 
    if (dictIsRehashing(d) || d->ht[0].used > size)
        return DICT_ERR;

    //new hash table 
    dictht n; 
    //返回新hashtable的容量 _dictNextPower函數比較簡單，可以自行去閱讀(該函數總是返回距離size最近的大於它的2的n次方的數)
    //舉個例子：擴容的size = 6，該函數就會返回8。  size = 17，該函數就會返回32。 
    unsigned long realsize = _dictNextPower(size);

    
    if (realsize == d->ht[0].size) return DICT_ERR;

    //給新的hash table分配空間
    n.size = realsize;
    n.sizemask = realsize-1;
    n.table = zcalloc(realsize*sizeof(dictEntry*));
    n.used = 0;

	//如果ht[0]還沒有分配空間，則n賦值給ht[0]。
    if (d->ht[0].table == NULL) {
        d->ht[0] = n;
        return DICT_OK;
    }

    /* Prepare a second hash table for incremental rehashing */
    d->ht[1] = n;
    //將d->rehashidx = 0 表明要開始rehash操作
    d->rehashidx = 0;
    return DICT_OK;
}

總結一下插入的流程。
1.第一次插入：調用插入函數，發現是第一次插入，就需要給ht[0]分配內存，然後找到要插入的下標進行插入。
2.之後的插入：調用插入函數，判斷空間是否還能插入，可以插入則直接插入。空間不足，就需要新的table，就爲ht[1]分配空間，然後將rehashidx置爲0，進行rehash，這個時候的插入都將會插入到ht[1]中。

3.Rehash函數

_dictRehashStep函數(這個函數在上文中的dictAddRaw函數中出現)

static void _dictRehashStep(dict *d) 
{
    if (d->iterators == 0) dictRehash(d,1);
}

dictRehash函數

int dictRehash(dict *d, int n) {
    //一次rehash n*10個桶的元素
    int empty_visits = n*10; /* Max number of empty buckets to visit. */
    if (!dictIsRehashing(d)) return 0;

    //開始rehash
    while(n-- && d->ht[0].used != 0) {
        dictEntry *de, *nextde;

        assert(d->ht[0].size > (unsigned long)d->rehashidx);
        //當前桶已經爲空，將rehashidx++
        while(d->ht[0].table[d->rehashidx] == NULL) {
            d->rehashidx++;
            //本次rehash已經結束，但是hash[0]中還有元素，return 1，表明還需要繼續rehash操作。
            if (--empty_visits == 0) return 1;
        }
        de = d->ht[0].table[d->rehashidx];
        //將ht[0].table[d->rehashidx]桶的元素移動到ht[1]中
        while(de) {
            uint64_t h;

            nextde = de->next;
            //需要重新計算在ht[1]中的位置
            h = dictHashKey(d, de->key) & d->ht[1].sizemask;
            de->next = d->ht[1].table[h];
            d->ht[1].table[h] = de;
            d->ht[0].used--;
            d->ht[1].used++;
            de = nextde;
        }
        d->ht[0].table[d->rehashidx] = NULL;
        d->rehashidx++;
    }

    //檢查是否rehash完畢
    if (d->ht[0].used == 0) {
        zfree(d->ht[0].table);
        d->ht[0] = d->ht[1];
        _dictReset(&d->ht[1]);
        d->rehashidx = -1;
        return 0;
    }

    /* More to rehash... */
    return 1;
}

總結rehash
從上面的代碼中可以看出，Redis中的字典是採用漸進式rehash方法，如果ht[0]中元素非常多，採用漸進式rehash不會導致rehash操作浪費太多時間而導致服務器阻塞一段時間。

掌握了上文中講的所有函數後，大家自行去看刪除查找的源碼就沒有什麼難度了。

Redis源碼分析——字典(dict)

字典概念

字典的結構

字典的創建、插入、Rehash、刪除查找等函數(很詳細的在代碼中進行註釋講解)

釘釘打卡速度慢

Nginx R31 doc 官方文檔-01-nginx 如何安裝

Qt/C++音視頻開發74-合併標籤圖形/生成yolo運算結果圖形/文字和圖形合併成一個/水印濾鏡

挑戰程序設計競賽 2.2章習題 POJ - 3617 Best Cow Line 貪心

字節面試：MySQL什麼時候鎖表？如何防止鎖表？

.NET8連接SQL SERVER 2008 R2 報：證書鏈是由不受信任的頒發機構頒發的

golang開發環境搭建(win10)

python計算機視覺學習筆記——PIL庫的用法

Golang初學：獲取程序內存使用情況，std runtime

線程同步機制——POSIX信號量、互斥量、條件變量

c++11線程的使用坑點總結

linux創建進程do_fork()詳解

Muduo學習筆記—Eventloop Channel EPollPoller類

線程池（linux）

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結