最近,我想通過redis的源碼來學習redis。雖然平時工作中用得不多,不過對redis還是比較感興趣的,畢竟它的性能是不錯的。redis是一個 開源的項目,我們可以通過源代碼去了解redis。我後面會通過自己的學習,寫一些關於redis源碼的帖子。帖子的主要內容是分析代碼設計,而並不會對 源碼進行詳細解說。如果有不對的地方,請指正。源碼是reids 3.0.3版本。
dict 字典
一、數據結構
//字典條目 typedef struct dictEntry { void *key; union { void *val; uint64_t u64; int64_t s64; double d; } v; struct dictEntry *next; } dictEntry; typedef struct dictType { unsigned int (*hashFunction)(const void *key); //計算key的哈希值 void *(*keyDup)(void *privdata, const void *key); //複製key的函數 void *(*valDup)(void *privdata, const void *obj); //複製value的函數 int (*keyCompare)(void *privdata, const void *key1, const void *key2);//比較key相等的函數 void (*keyDestructor)(void *privdata, void *key);//銷燬key的函數 void (*valDestructor)(void *privdata, void *obj);//銷燬value的函數 } dictType; //用於存儲字典條件的哈希表 /* This is our hash table structure. Every dictionary has two of this as we * implement incremental rehashing, for the old to the new table. */ typedef struct dictht { dictEntry **table; unsigned long size; unsigned long sizemask; unsigned long used; } dictht; //字典 typedef struct dict { dictType *type; void *privdata; dictht ht[2]; //兩個哈希表,在rehash時會使用兩個哈希表,否則只會使用一個表 long rehashidx; /* rehashing not in progress if rehashidx == -1 */ int iterators; /* number of iterators currently running */ } dict; /* If safe is set to 1 this is a safe iterator, that means, you can call * dictAdd, dictFind, and other functions against the dictionary even while * iterating. Otherwise it is a non safe iterator, and only dictNext() * should be called while iterating. */ typedef struct dictIterator { dict *d; long index; int table, safe; dictEntry *entry, *nextEntry; /* unsafe iterator fingerprint for misuse detection. */ long long fingerprint; } dictIterator;
二、宏實現的簡單函數
舉三個例子:
#define dictFreeVal(d, entry) \ if ((d)->type->valDestructor) \ (d)->type->valDestructor((d)->privdata, (entry)->v.val) #define dictSetVal(d, entry, _val_) do { \ if ((d)->type->valDup) \ entry->v.val = (d)->type->valDup((d)->privdata, _val_); \ else \ entry->v.val = (_val_); \ } while(0) #define dictSetSignedIntegerVal(entry, _val_) \ do { entry->v.s64 = _val_; } while(0)
dictFreeVal,在釋放字典條目的value時使用。實現中沒使用 do{}while(0),我沒有想明白是爲何不使用,但我覺得應該加的,不然使用不當時會出問題,具體可見我的另一個貼子:http://chhquan.blog.51cto.com/1346841/1358254
dictSetSignedIntegerVal 中加上do{}while(0),應該是爲了阻止以表達形式使用宏。
三、部分代碼解析
由於dict行爲特點比較多,本貼子打算詳解部分代碼。
1. dict_can_resize
/* Using dictEnableResize() / dictDisableResize() we make possible to * enable/disable resizing of the hash table as needed. This is very important * for Redis, as we use copy-on-write and don't want to move too much memory * around when there is a child performing saving operations. * * Note that even when dict_can_resize is set to 0, not all resizes are * prevented: a hash table is still allowed to grow if the ratio between * the number of elements and the buckets > dict_force_resize_ratio. */ static int dict_can_resize = 1; static unsigned int dict_force_resize_ratio = 5;
dict_can_resize,可控制dict是否可以進行rehash,1 時允許rehash,0 - 通常情況不允許rehash,但如果滿足 條目數/桶 > dict_force_resize_ratio時,仍可進行rehash。通過 dictEnableResize() 或 dictDisableResize() 可以設置 dict_can_resize。這樣的設置的目的在於:當redis需要對dict進行保存操作時(寫文件),是要把dict的當前快照作保存,要保持dict不變,但這樣會使字典的不能接收寫入操作或是進行rehash,爲了確保dict能正常處理請求,redis採用copy-on-write的策略,即當dict有修改操作時,需要把dict進行復制,以同時支持保存操作和修改操作。由於rehash也是對dict進行修改,也可能會使正在保存的dict進行復制,所以使用 dict_force_resize_ratio 可阻止rehash,從而一定程度上避免複製。但如果保存 條目數/桶 > dict_force_resize_ratio 時,redis認爲這時dict的條目數相對於桶來說已經太多了,有些桶上所掛的元素個數可能比較多,對dict的效率產生嚴重的影響。所以此時寧可複製dict也要允許rehash以恢復dict的性能。當然具體 dict_force_resize_ratio 是多少,應該由實驗得出吧。又或者如何度量複製與保持dict高效的轉折點也是要進行實驗的,不一定是 條目數/桶,具體也就由實驗得出吧。由於沒有實驗,我也不能多說了。
2. hash計算
計算hash值的函數,具體算法我也不併熟悉,跳過。
3. 重置哈希表
//重置哈希表 /* Reset a hash table already initialized with ht_init(). * NOTE: This function should only be called by ht_destroy(). */ static void _dictReset(dictht *ht) { //下面直接覆蓋table的值,調用方需確保table要麼不指向一塊動態內存, //要麼動態內存已被釋放,要麼還有別的指針保留table所指向的動態內存空間 ht->table = NULL; ht->size = 0; ht->sizemask = 0; ht->used = 0; }
4. 創建dict
//創建dict /* Create a new hash table */ dict *dictCreate(dictType *type, void *privDataPtr) { dict *d = zmalloc(sizeof(*d)); _dictInit(d,type,privDataPtr); return d; }
5. 初始化dict
//初始化dict /* Initialize the hash table */ int _dictInit(dict *d, dictType *type, void *privDataPtr) { _dictReset(&d->ht[0]); _dictReset(&d->ht[1]); d->type = type; d->privdata = privDataPtr; d->rehashidx = -1; // -1爲不在rehash狀態,>= 0 爲rehash中 d->iterators = 0; return DICT_OK; }
6. 調整大小
//resize,根據dict中已存儲的條目數進行resize,可擴展哈希表空間也可縮小。 /* Resize the table to the minimal size that contains all the elements, * but with the invariant of a USED/BUCKETS ratio near to <= 1 */ int dictResize(dict *d) { int minimal; if (!dict_can_resize || dictIsRehashing(d)) return DICT_ERR; minimal = d->ht[0].used; //按已存儲的條目數進行resize if (minimal < DICT_HT_INITIAL_SIZE) //最小resize大小爲 DICT_HT_INITIAL_SIZE minimal = DICT_HT_INITIAL_SIZE; return dictExpand(d, minimal); } /* Expand or create the hash table */ int dictExpand(dict *d, unsigned long size) { dictht n; /* the new hash table */ unsigned long realsize = _dictNextPower(size); //取大於size的最小的2的冪作爲實際size /* the size is invalid if it is smaller than the number of * elements already inside the hash table */ if (dictIsRehashing(d) || d->ht[0].used > size) return DICT_ERR; /* Rehashing to the same table size is not useful. */ if (realsize == d->ht[0].size) return DICT_ERR; /* Allocate the new hash table and initialize all pointers to NULL */ n.size = realsize; n.sizemask = realsize-1; //size-1,bit爲1的都在低位,用於對哈希值取size的模作爲哈希表的桶號 n.table = zcalloc(realsize*sizeof(dictEntry*)); n.used = 0; /* Is this the first initialization? If so it's not really a rehashing * we just set the first hash table so that it can accept keys. */ if (d->ht[0].table == NULL) { d->ht[0] = n; return DICT_OK; } /* Prepare a second hash table for incremental rehashing */ d->ht[1] = n; d->rehashidx = 0;//>=0,正在rehash中 return DICT_OK; }
7. rehash
//rehash函數,逐步對dict進行rehash //redis並沒有一次性完成對dict的rehash,而是把整個rehash過程分成許多小的rehash操作去完成, //每一次rehash都會處理至多一定數量的桶,由參數n指定。由於部分桶是空的,爲防止rehash一直都訪問 //到空的桶使rehash過程耗時過多,函數裏面設定最多訪問 n*10 個桶。 //redis爲保持性能的穩定,會把一些有機會耗時較比多的操作,分成放多小的操作,rehash便是其中一個例子。 /* Performs N steps of incremental rehashing. Returns 1 if there are still * keys to move from the old to the new hash table, otherwise 0 is returned. * * Note that a rehashing step consists in moving a bucket (that may have more * than one key as we use chaining) from the old to the new hash table, however * since part of the hash table may be composed of empty spaces, it is not * guaranteed that this function will rehash even a single bucket, since it * will visit at max N*10 empty buckets in total, otherwise the amount of * work it does would be unbound and the function may block for a long time. */ int dictRehash(dict *d, int n) { int empty_visits = n*10; /* Max number of empty buckets to visit. */ if (!dictIsRehashing(d)) return 0; //訪問ht[0]中的桶,如果桶非空,把桶中的元素放進ht[1]裏。 while(n-- && d->ht[0].used != 0) { dictEntry *de, *nextde; /* Note that rehashidx can't overflow as we are sure there are more * elements because ht[0].used != 0 */ //從rehashidx開始,rehashidx便是用來記錄rehash過程狀態的變量 assert(d->ht[0].size > (unsigned long)d->rehashidx); //找出一個非空桶,總的訪問次數受到 empty_visits 的限制 while(d->ht[0].table[d->rehashidx] == NULL) { d->rehashidx++; if (--empty_visits == 0) return 1; //返回1表示rehash還沒完成,需要進行進行 } de = d->ht[0].table[d->rehashidx]; //移動桶中所有條目到ht[1]中 /* Move all the keys in this bucket from the old to the new hash HT */ while(de) { unsigned int h; nextde = de->next; /* Get the index in the new hash table */ h = dictHashKey(d, de->key) & d->ht[1].sizemask; //對桶號 de->next = d->ht[1].table[h]; d->ht[1].table[h] = de; d->ht[0].used--; d->ht[1].used++; de = nextde; } d->ht[0].table[d->rehashidx] = NULL; d->rehashidx++; //已經處理了rehashidx 號桶,下一個桶 } //如果ht[0]已經沒有條目了,可以把ht[1]切換到ht[0],並重置ht[1]。 /* Check if we already rehashed the whole table... */ if (d->ht[0].used == 0) { zfree(d->ht[0].table); //釋放ht[0]的桶空間 d->ht[0] = d->ht[1]; _dictReset(&d->ht[1]); d->rehashidx = -1; return 0; } /* More to rehash... */ return 1; }
由rehash過程可以看出,在rehash過程中,ht[0]和ht[1]同時具有條目,即字典中的所有條目分佈在ht[0]和ht[1]中,
這時麻煩也就出來了。主要有以下問題:(現在暫不解答是如何解決的)
1.如何查找key。
2.如何插入新的key。
3.如何刪除一個key。
4.如何遍歷dict所有條目,如何確保遍歷順序。
5.如何確保rehash過程不斷插入、刪除條目,而rehash沒有出錯。
6.如何確保迭代器有效,且正確。