Redis 3.0.4 字典

MLXY

2020-06-18

redis的字典使用哈希表作为底层实现，一个哈希表里面可以有多个哈希表，而每个哈希表节点就保存了字典中的一个键值对。

1.哈希表　　　　

typedef struct dictht {  //哈希表
    dictEntry **table;      //存放一个数组的地址，数组存放着哈希表节点dictEntry的地址
    unsigned long size;     //哈希表table的大小.初始化大小是4
    unsigned long sizemask; //用于将哈希值映射到table的位置索引,它的值总是等于size-1
    unsigned long used;     //记录哈希表已有的节点的数值
} dictht;

2.哈希表节点

typedef struct dictEntry {
    void *key;          //key
    union {
        void *val;
        uint64_t u64;
        int64_t s64;
        double d;
    } v; //value
    struct dictEntry *next; //指向下一个节点的指针
} dictEntry;

3.字典

typedef struct dict {
    dictType *type; //指向dictType结构，dictType结构中包含自定义的函数，这些函数使得key和value能够存储任何类型的数据。
    void *privdata; //私有数据
    dictht ht[2];    //ht[0] ht[1] 
    long rehashidx; /* rehashing not in progress if rehashidx == -1 */
    int iterators; /* number of iterators currently running */
} dict;
typedef struct dictType {
    unsigned int (*hashFunction)(const void *key);
    void *(*keyDup)(void *privdata, const void *key);
    void *(*valDup)(void *privdata, const void *obj);
    int (*keyCompare)(void *privdata, const void *key1, const void *key2);
    void (*keyDestructor)(void *privdata, void *key);
    void (*valDestructor)(void *privdata, void *obj);
} dictType;

哈希结构图如图：

Redis 3.0.4 字典

4.哈希算法

　　要将一个键值加入到字典里面，程序会先根据键值对的键计算出哈希值和索引值，然后再根据索引值，将包含新键值对的哈希表节点放到哈希表数组指定索引上面。

　　1.用于计算整型值的hash算法

/* Thomas Wang‘s 32 bit Mix Function */
unsigned int dictIntHashFunction(unsigned int key)
{   
    key += ~(key << 15);
    key ^=  (key >> 10);
    key +=  (key << 3);
    key ^=  (key >> 6);
    key += ~(key << 11);
    key ^=  (key >> 16);
    return key;
}

　　2.MurmurHash2哈希算法

　　优点是：即使输入的键是有规律的，算法仍能给出一个很好的随机分布性，并且算法的计算速度也非常快

//MurmurHash2哈希算法
unsigned int dictGenHashFunction(const void *key, int len) {
    /* ‘m‘ and ‘r‘ are mixing constants generated offline.
     They‘re not really ‘magic‘, they just happen to work well.  */
    uint32_t seed = dict_hash_function_seed;
    const uint32_t m = 0x5bd1e995;
    const int r = 24;

    /* Initialize the hash to a ‘random‘ value */
    uint32_t h = seed ^ len;

    /* Mix 4 bytes at a time into the hash */
    const unsigned char *data = (const unsigned char *)key;

    while(len >= 4) {
        uint32_t k = *(uint32_t*)data;

        k *= m;
        k ^= k >> r;
        k *= m;

        h *= m;
        h ^= k;

        data += 4;
        len -= 4;
    }

    /* Handle the last few bytes of the input array  */
    switch(len) {
    case 3: h ^= data[2] << 16;
    case 2: h ^= data[1] << 8;
    case 1: h ^= data[0]; h *= m;
    };

    /* Do a few final mixes of the hash to ensure the last few
     * bytes are well-incorporated. */
    h ^= h >> 13;
    h *= m;
    h ^= h >> 15;

    return (unsigned int)h;
}

　　3.djb哈希算法

　　　　算法的思想是利用字符串中的ascii码值与一个随机seed，通过len次变换，得到最后的hash值。

//djb哈希算法
unsigned int dictGenCaseHashFunction(const unsigned char *buf, int len) {
    unsigned int hash = (unsigned int)dict_hash_function_seed;

    while (len--)
        hash = ((hash << 5) + hash) + (tolower(*buf++)); /* hash * 33 + c */
    return hash;
}

5.解决键冲突

　　如果是有键值对被分配到哈希表数组的同一个索引上面，redis的哈希表利用链表地址法，每一个哈希表节点都有一个next指针，多个哈希表节点都有一个next指针，多个哈希表可以用next指针构成一个单链表，被分配到同一个索引上的多个节点可以用这个单向链表连接起来。也就是哈希链表(忘记是在那本书上看到过还是哪个学长讲过)

6.rehash

　　rehash是随着操作的不断执行，哈希表保存的键值对会逐渐增多或减少，为了使哈希表的负载因子维持在一个合理的范围内，当哈希表保存饿键值对数量太多或者太少时，程序都要对哈希表进行rehash。

　　rehash分为三步：

　　　　1.为ht[1]分配合适空间大小；

　　　　　　如果是执行的扩容操作ht[1]分配的空间大小是第一个大于等于ht[0].used*2的 pow(2,n);

　　　　　　如果是执行的缩容操作ht[1]分配的空间大小是第一个大于等于ht[0].used的pow(2,n);

　　　　2.将ht[0]中所有的键值对重新计算键的hash值和索引值，然后将键值对放置到ht[1]上；

　　　　3.当所有的ht[0]都迁移到ht[1]之后，释放ht[0],将ht[1]置为ht[0]，并给ht[1]新创建一个空的hash表

　　程序会自动rehash操作：

　　　　1.服务器目前没有在执行bgsave命令或者bgrewriteaof命令，并且哈希表的负载因子大于等于1；

　　　　2.服务器目前在执行bgsave命令或者bgrewriteaof命令，并且哈希表的负载因子大于等于5；

　　负载因子load_factor = ht[0].used / ht[0].size

//根据size调整d的哈希表
int dictExpand(dict *d, unsigned long size)
{
    dictht n; /* the new hash table */
    unsigned long realsize = _dictNextPower(size); // 获取第一个pow(2,x) 大于size的数


    /* the size is invalid if it is smaller than the number of
     * elements already inside the hash table */
    //dictIsRehashing查看当前dict是否在rehash
    //已有键值对的数量大于size
    if (dictIsRehashing(d) || d->ht[0].used > size)
        return DICT_ERR;

    /* Rehashing to the same table size is not useful. */
    if (realsize == d->ht[0].size) return DICT_ERR;

    /* Allocate the new hash table and initialize all pointers to NULL */
    n.size = realsize;
    n.sizemask = realsize-1;
    n.table = zcalloc(realsize*sizeof(dictEntry*));
    n.used = 0;

    /* Is this the first initialization? If so it‘s not really a rehashing
     * we just set the first hash table so that it can accept keys. */
    if (d->ht[0].table == NULL) {
        d->ht[0] = n;
        return DICT_OK;
    }

    /* Prepare a second hash table for incremental rehashing */
    d->ht[1] = n;
    d->rehashidx = 0;
    return DICT_OK;
}

7.渐进式rehash

哈希表渐进式rehash步骤:

　　1.为ht[1]分配空间，让字典同时拥有ht[0]和ht[1]两个哈希表；

　　2.在字典中维持一个索引计数器变量，rehashidx,并将值值为0，表示rehash开始；

　　3.在rehash期间，每次对字典执行增删查改操作时，程序除了执行指定的操作，还会顺带将ht[0]哈希表在rehashidx索引上的所有键值对rehash到ht[1]，当rehash完成后，程序将rehashidx属性值增一；

　　4.随着字典操作的不断执行，最终在某个时间点上ht[0]的所有键值对都会被rehash到ht[1]，这时程序会讲rehashidx属性值为-1；

　　渐进式rehash将rehash键值所需的计算工作均摊到对字典的操作上，从而避免集中式rehash带来庞大的计算量。

//单步迭代  只有在dict的索引为0时开始
static void _dictRehashStep(dict *d) {
    if (d->iterators == 0) dictRehash(d,1);
}
//n步进行rehash
int dictRehash(dict *d, int n) {
    int empty_visits = n*10; /* Max number of empty buckets to visit. */
    if (!dictIsRehashing(d)) return 0;

    while(n-- && d->ht[0].used != 0) {
        dictEntry *de, *nextde;

        /* Note that rehashidx can‘t overflow as we are sure there are more
         * elements because ht[0].used != 0 */
        assert(d->ht[0].size > (unsigned long)d->rehashidx);
        //dictEntry[rehashidx] 不为空
        while(d->ht[0].table[d->rehashidx] == NULL) {
            d->rehashidx++;
            if (--empty_visits == 0) return 1;
        }
        de = d->ht[0].table[d->rehashidx];
        /* Move all the keys in this bucket from the old to the new hash HT */
        //遍历hash table[rehashidx]的全链表
        while(de) {
            unsigned int h;

            nextde = de->next;
            /* Get the index in the new hash table */
            h = dictHashKey(d, de->key) & d->ht[1].sizemask;
            de->next = d->ht[1].table[h]; //插入到表头
            d->ht[1].table[h] = de;
            d->ht[0].used--;
            d->ht[1].used++;
            de = nextde;
        }
        d->ht[0].table[d->rehashidx] = NULL;
        d->rehashidx++;
    }

    /* Check if we already rehashed the whole table... */
    if (d->ht[0].used == 0) {
        zfree(d->ht[0].table);
        d->ht[0] = d->ht[1];
        _dictReset(&d->ht[1]);
        d->rehashidx = -1;
        return 0;
    }

    /* More to rehash... */
    return 1;
}

哈希表 redis

安科网

Redis 3.0.4 字典

MLXY

MLXY

相关推荐

redis中scan命令的基本实现方法

什么是hash?

8-哈希表-Scala实现

《数据结构与算法之美》15——散列表（二）如何实现工业级别的散列表

memcached 如何处理容错的？

数组中重复的数字

redis基本操作 —— hash

哈希表 Map Golang实现，使用红黑树和AVL树-性能爆表-非递归版本

数据结构与算法-java-哈希表

深入理解Java编程性能调优——深入浅出HashMap的设计与优化

【数据结构】哈希表

Redis 字典实现

哈希表查找

[redis]dict和rehash

因为不会Redis的scan命令，我被开除了

JAVA数据结构与算法之哈希表

Redis设计与实现-1.数据结构（1）

redis 6源码解析之 dict

Redis底部的几种存储结构（sds、dict、ziplist、intset、skiplist）

Python实现哈希表

MLXY