dict

HashKey最多有LONG_MAX個桶。

在redis中最基本的三個數據結構是dict 、adlist和sds，其中dict是redis中最重要的數據結構了，其key-value的映射關係就是通過dict來實現的，dict的內部實現是hash table，這個哈希表的大小是動態增加或減少的，主要是依據哈希表中的元素個數；同時哈希表適用鏈接法來解決哈希衝突的，具體實現在dict.h和dict.c文件中；

字典實現中主要用到如下5個結構體：

typedef struct dict {
    dictType *type;
    void *privdata; //每個dict的私有信息，用於不同的dict之間相互區分
    dictht ht[2]; // dict代表整個字典，內部有兩個dictht, 以實現增量hash（將ht[0]中的值rehash到ht[1]中），

//使用是優先使用0號hash table，當空間不足時會調用dictExpand來擴展hash table，此時準備1號hash table用於增量的rehash使用。rehash完成後把0號釋放，1號保存到0號。

int rehashidx; /* rehashing not in progress if rehashidx == -1 ,rehashidx是下一個需要rehash的項在ht[0] table中的索引，不需要rehash時置爲-1。也就是說-1時，表示不進行rehash。Resize之後必然會引起Rehash
int iterators; /* number of iterators currently running */iterators記錄當前dict中的迭代器數，主要是爲了避免在有迭代器時rehash，在有迭代器時rehash可能會造成值的丟失或重複，有迭代器是不進行rehash,但可以Resize();
} dict;

typedef struct dictht {
    dictEntry **table; //是一個指針數組的地址，指針數組爲dictEntry地址的集合
    unsigned long size; //是table的長度，通常爲2的冪次方=slot或桶數。
    unsigned long sizemask; =size-1
    unsigned long used; //所有dictEntry的和，幹嘛用？
} dictht;

typedef struct dictEntry {.

    void *key;

    void *val;

    struct dictEntry *next; //使用鏈表法解決衝突
} dictEntry;
typedef struct dictType {    //存放dic中數據的處理方法 :函數指針

    unsigned int (*hashFunction)(const void *key);

    void *(*keyDup)(void *privdata, const void *key);

    void *(*valDup)(void *privdata, const void *obj);

    int (*keyCompare)(void *privdata, const void *key1, const void *key2);

    void (*keyDestructor)(void *privdata, void *key);

    void (*valDestructor)(void *privdata, void *obj);
} dictType;

typedef struct dictIterator {

    dict *d;

    int table;

    int index;

    int safe;//=1 支持多線程安全的iterator,有操作函數保證。//safe操作函數的dictNext會對dict->iterators++,

    /* If safe is set to 1 this is a safe iteartor, that means, you can call

     78  * dictAdd, dictFind, and other functions against the dictionary even while

     79  * iterating. Otherwise it is a non safe iterator, and only dictNext()

     80  * should be called while iterating. */因爲safe iterator在使用過程中,只要不釋放(iterators--),就不會進行實質的rehash,不會引起調用者預料

         不到的錯誤。

    dictEntry *entry, *nextEntry;
} dictIterator;

redis中用到的整數hash、字符串hash算法如下，做個備份：

/* Thomas Wang's 32 bit Mix Function */
unsigned int dictIntHashFunction(unsigned int key)
{

    key += ~(key << 15);

    key ^=  (key >> 10);

    key +=  (key << 3);

    key ^=  (key >> 6);

    key += ~(key << 11);

    key ^=  (key >> 16);

    return key;
}
/* Generic hash function (a popular one from Bernstein).
* I tested a few and this was the best. */ 基本上都用這個，爲什麼，:-)
unsigned int dictGenHashFunction(const unsigned char *buf, int len) {

    unsigned int hash = 5381;

    while (len--)

        hash = ((hash << 5) + hash) + (*buf++); /*
hash * 33 + c */

    return hash;
}

ReSize (用於創建或擴展HashTable)

  Note that even when dict_can_resize is set to 0, not all resizes are

  prevented: an hash table is still allowed to grow if the ratio between

  the number of elements and the buckets > dict_force_resize_ratio. */

       static int dict_can_resize = 1; //

       static unsigned int dict_force_resize_ratio = 5; //此時不管是否允許，都要引起resize,否則前臺體驗下降。後端內存壓力增大沒有辦法。
總 的說來，在系統運行有後臺線程(linux一切都是進程），不允許自動自動調整大小，這是爲了爲了使得類linux系統的copy-on-write有更好的性能（沒有調整大小， 就沒有rehash，這樣父進程的db沒有改變，子進程就不需要真的copy數據）。在後臺線程退出後，又會允許resize。
接下來我們看看自動調整大小的過程。

什麼時候dict做擴容

在數據插入的時候會調用dictKeyIndex(只在插入數據時調用,存在的話返回-1,表示錯誤。不存在時，若rehashing,則返回ht[1]中table位置),該方法裏會調用_dictExpandIfNeeded，判斷dict是否需要rehash，當dict中元素大於桶的個數時，調用dictExpand擴展hash

/* Expand the hash table if needed */ (因爲在數據插入時被調用，我們根據hash已有的統計信息，判斷是否要Expand.

static int _dictExpandIfNeeded(dict *d)

{

/* If the hash table is empty expand it to the intial size,

* if the table is “full” dobule its size. */

if (dictIsRehashing(d)) return DICT_OK;

if (d->ht[0].size == 0)

return dictExpand(d, DICT_HT_INITIAL_SIZE);

if (d->ht[0].used >= d->ht[0].size && dict_can_resize)

return dictExpand(d, ((d->ht[0].size > d->ht[0].used) ?

d->ht[0].size : d->ht[0].used)*2); // 爲什麼需要判斷??????

return DICT_OK;

}

ReHash

通常情況下，所有的數據都是存在放dict的ht[0]中，ht[1]只在rehash的時候使用,rehash。但在rehash過程中兩個都會ht都會用

dict進行rehash的時候，將ht[0]中的所有數據rehash到 ht[1]中。然後將ht[1]賦值給ht[0]，並清空ht[1]。

rehash有2種工作模式

lazy rehashing：在每次對dict進行操作的時候執行一個slot的rehash._dictRehashStep中，也會調用dictRehash，而_dictRehashStep每次僅會rehash一個值從ht[0]到 ht[1]，但由於_dictRehashStep是被dictGetRandomKey、dictFind、 dictGenericDelete、dictAdd調用的，因此在每次dict增刪查改時都會被調用，這無疑就加快rehash了過程。N步，每一步移一個桶。

active rehashing：每100ms裏面使用1ms時間進行rehash。serverCron中，當沒有後臺子線程時，會調用incrementallyRehash，最終調用dictRehashMilliseconds。incrementallyRehash的時間較長，rehash的個數也比較多。這裏每次執行 1 millisecond rehash 操作；如果未完成 rehash，會在下一個 loop 裏面繼續執行。

tryResizeHashTables最終被serverCron調用，也就是在每次serverCron循環時都會被調用。

static int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
---
if (server.bgsavechildpid == -1 && server.bgrewritechildpid == -1) {

        if (!(loops % 10)) tryResizeHashTables();

        if (server.activerehashing) incrementallyRehash();

    }
---

}

接下來看下rehash，主要在dictRehash中完成。先看下什麼時候進行rehash。

在如上的serverCron中，當沒有後臺子線程時，會調用incrementallyRehash，最終調用dictRehash。incrementallyRehash的時間較長，rehash的個數也比較多。

另外在_dictRehashStep，也會調用dictRehash，而_dictRehashStep每次僅會rehash一個值從ht[0]到 ht[1](夠緩慢的)，但由於_dictRehashStep是被dictGetRandomKey、dictFind、 dictGenericDelete、dictAdd調用的，因此在每次dict增刪查改時都會被調用，這無疑就加快了rehash過程。

我們再來看看rehash過程。dictRehash每次增量rehash n個元素，由於在自動調整大小時已設置好了ht[1]的大小，因此rehash的主要過程就是遍歷ht[0]，取得key，然後將該key按ht[1]的桶的大小重新rehash，並在rehash完後將ht[0]指向ht[1],然後將ht[1]清空。

Redis dict

dict

DAPPER 事務 TRANSACTION

MySQL Introduction

Membase存儲

Membase

Membase Cluster Manager

Redis dict

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結