魔改redis之添加命令hrandmember

魔改redis之添加命令hrandmember
- 正文
- 参考文献

正文

前言

想从redis的hash表获取随机的键值对，但是发现redis只支持set的随机值SRANDMEMBER。但是如果把hash表中的数据又存一份，占用的空间又太大。也可以通过先HLEN获取hash表的大小，随机出一个偏移值，再调用HSCAN获得一组数据。或者直接多次随机，多次取值。但这样效率始终不如SRANDMEMBER(redis的开销主要是网络的开销)。于是想到魔改redis代码，使其支持对hash表的随机键值对。客户端jedis打算使用eval来调用redis中新加入的指令。

Set类型与srandmember命令

Set类型的编码可以是OBJ_ENCODING_HT或者OBJ_ENCODING_INTSET，如果集合中的值全是数值，那么Set的编码(底层类型)为OBJ_ENCODING_INTSET, 如果加入了无法被解析为数值的字符串，或者set的大小超过了OBJ_SET_MAX_INTSET_ENTRIES默认512，编码则会变更为OBJ_ENCODING_HT。

OBJ_ENCODING_INTSET就是存储着整数的有序数组。加入新值时新realloc新增内存，再使用memmove将对应位置后的数据后移，然后在对应的位置加入值。

OBJ_ENCODING_HT编码就是dict类型，也就是字典。

srandmember命令的主要处理函数是srandmemberWithCountCommand，如果传入的count值是负数，意味着值可以重复。

如果值可以重复，那么每次随机取出一个成员。

如果set的size小于请求的数量，则返回set集合中全部的值。

//case 1
if (!uniq) {
    addReplyMultiBulkLen(c,count);
    while(count--) {

        encoding = setTypeRandomElement(set,&ele,&llele);
        if (encoding == OBJ_ENCODING_INTSET) {
            addReplyBulkLongLong(c,llele);
        } else {
            addReplyBulkCBuffer(c,ele,sdslen(ele));
        }
    }
    return;
}

//case 2
if (count >= size) {
    sunionDiffGenericCommand(c,c->argv+1,1,NULL,SET_OP_UNION);
    return;
}

集合的数量没有远远大于请求的数量。将set的值复制到dict中，然后随机删除值，直到数量等于请求的值。

集合数量远大请求的数量。随机取值，加入dict中，数量满足后返回dict中的值。

    if (count*SRANDMEMBER_SUB_STRATEGY_MUL > size) {
        setTypeIterator *si;

        /* Add all the elements into the temporary dictionary. */
        si = setTypeInitIterator(set);
        while((encoding = setTypeNext(si,&ele,&llele)) != -1) {
            int retval = DICT_ERR;

            if (encoding == OBJ_ENCODING_INTSET) {
                retval = dictAdd(d,createStringObjectFromLongLong(llele),NULL);
            } else {
                retval = dictAdd(d,createStringObject(ele,sdslen(ele)),NULL);
            }
            serverAssert(retval == DICT_OK);
        }
        setTypeReleaseIterator(si);
        serverAssert(dictSize(d) == size);

        /* Remove random elements to reach the right count. */
        while(size > count) {
            dictEntry *de;

            de = dictGetRandomKey(d);
            dictDelete(d,dictGetKey(de));
            size--;
        }
    }


    else {
        unsigned long added = 0;
        robj *objele;

        while(added < count) {
            encoding = setTypeRandomElement(set,&ele,&llele);
            if (encoding == OBJ_ENCODING_INTSET) {
                objele = createStringObjectFromLongLong(llele);
            } else {
                objele = createStringObject(ele,sdslen(ele));
            }
            /* Try to add the object to the dictionary. If it already exists
             * free it, otherwise increment the number of objects we have
             * in the result dictionary. */
            if (dictAdd(d,objele,NULL) == DICT_OK)
                added++;
            else
                decrRefCount(objele);
        }
    }

    /* CASE 3 & 4: send the result to the user. */
    {
        dictIterator *di;
        dictEntry *de;

        addReplyMultiBulkLen(c,count);
        di = dictGetIterator(d);
        while((de = dictNext(di)) != NULL)
            addReplyBulk(c,dictGetKey(de));
        dictReleaseIterator(di);
        dictRelease(d);
    }

Hash类型对比Set类型

Hash类型和Set类型的关系非常密切，在java源码中，往往set类型就是由hash类型实现的。在redis中在数据量较大的时候也十分相似。

前文提到 Set类型的编码可以是intset或者是dict，ziplist的编码是ziplist或者是dict。在当前的redis版本中，还并没有添加hrandmember命令（6.2及之前）。

ziplist中的字符串长度超过OBJ_HASH_MAX_ZIPLIST_VALUE(默认值为64)，或者entry的个数超过OBJ_HASH_MAX_ZIPLIST_ENTRIES(默认值为512)，则会转化为hashtable编码。

ziplist的encoding就是尝试将字符串值解析成long并保存编码。hash类型和 set类型最大的区别在于元素个数较少时，内部的编码不同，hash内部的编码是ziplist，而set的内部编码是intset，（个人认为hash和intset内部编码不统一是一处失误，使用者对两者有着相似的用法，也就是需求类似，然而底层实现却不同，必然导致代码的重复，也确实如此，redis团队似乎因为这个原因迟迟没有添加hrandmember命令）

hrandmember命令

因为ziplist不能被随机访问。对于ziplist编码的hash表，我们采用以下算法，来保证每个被取出来的entry的概率是一样的。

我们从长度为m的ziplist中取出n个entry，m>=n，设剩下的长度为m ，剩余要取的个数为n，每次取球时，我们取它的概率为 n/m。

这样能保证每个球被取出的概率相同，为n/m。可用数学归纳法证明。

通过使用这种方式，我们将时间复杂度从O(nm)降为O(m)。

处理hash编码，我们复制srandmember的代码，并稍作修改，避免字符串的复制以提高效率。

注意：当编码为ziplist时，不支持负数的count。虽然也有返回值，但并不会重复，并且个数小于期望值。

void hrandmemberWithCountCommand(client *c, long l) {
    unsigned long entryCount, hashSize;
    int uniq = 1;
    hashTypeIterator *hi;
    robj *hash;
    dict *d;
    double randomDouble;
    double threshold;
    unsigned long index = 0;

    if ((hash = lookupKeyReadOrReply(c,c->argv[1],shared.null[c->resp]))
        == NULL || checkType(c,hash,OBJ_HASH)) return;

    if(l >= 0) {
        entryCount = (unsigned long) l;
    } else {
        entryCount = -l;
        uniq = 0;
    }

    hashSize = hashTypeLength(hash);
    if(entryCount > hashSize)
        entryCount = hashSize;
    addReplyMapLen(c, entryCount);
    hi = hashTypeInitIterator(hash);

    if(hash->encoding == OBJ_ENCODING_ZIPLIST) {
        while (hashTypeNext(hi) != C_ERR && entryCount != 0) {
            randomDouble = ((double)rand()) / RAND_MAX;
            threshold = ((double)entryCount) / (hashSize - index);
            if(randomDouble < threshold){
                entryCount--;
                addHashIteratorCursorToReply(c, hi, OBJ_HASH_KEY);
                addHashIteratorCursorToReply(c, hi, OBJ_HASH_VALUE);
            }

            index ++;
        }
    } else {
        // copy of srandmember
        if(!uniq) {
            while(entryCount--) {
                sds key, value;

                dictEntry *de = dictGetRandomKey(hash->ptr);
                key = dictGetKey(de);
                value = dictGetVal(de);
                addReplyBulkCBuffer(c,key,sdslen(key));
                addReplyBulkCBuffer(c,value,sdslen(value));
            }
            return;
        }

        if(entryCount >= hashSize) {

            while (hashTypeNext(hi) != C_ERR) {
                addHashIteratorCursorToReply(c, hi, OBJ_HASH_KEY);
                addHashIteratorCursorToReply(c, hi, OBJ_HASH_VALUE);
            }
            return;
        }

        static dictType dt = {
            dictSdsHash,                /* hash function */
            NULL,                       /* key dup */
            NULL,                       /* val dup */
            dictSdsKeyCompare,          /* key compare */
            NULL,                       /* key destructor */
            NULL,                       /* val destructor */
            NULL                        /* allow to expand */
        };
        d = dictCreate(&dt,NULL);

        if(entryCount * HRANDMEMBER_SUB_STRATEGY_MUL > hashSize) {

            /* Add all the elements into the temporary dictionary. */
            while((hashTypeNext(hi)) != C_ERR) {
                int ret = DICT_ERR;
                sds key, value;

                key = hashTypeCurrentFromHashTable(hi,OBJ_HASH_KEY);
                value = hashTypeCurrentFromHashTable(hi,OBJ_HASH_VALUE);
                ret = dictAdd(d, key, value);

                serverAssert(ret == DICT_OK);
            }
            serverAssert(dictSize(d) == hashSize);

            /* Remove random elements to reach the right count. */
            while(hashSize > entryCount) {
                dictEntry *de;

                de = dictGetRandomKey(d);
                dictDelete(d,dictGetKey(de));
                hashSize--;
            }
        }

        else {
            unsigned long added = 0;
            sds sdsKey, sdsVal;

            while(added < entryCount) {
                dictEntry *de = dictGetRandomKey(hash->ptr);
                sdsKey = dictGetKey(de);
                sdsVal = dictGetVal(de);

                /* Try to add the object to the dictionary. If it already exists
                * free it, otherwise increment the number of objects we have
                * in the result dictionary. */
                if (dictAdd(d,sdsKey,sdsVal) == DICT_OK){
                    added++;
                }
            }
        }

        {
            dictIterator *di;
            dictEntry *de;
            di = dictGetIterator(d);
            while((de = dictNext(di)) != NULL) {
                sds key = dictGetKey(de);
                sds value = dictGetVal(de);
                addReplyBulkCBuffer(c,key,sdslen(key));
                addReplyBulkCBuffer(c,value,sdslen(value));
            }

            dictReleaseIterator(di);
            dictRelease(d);
        }

    }
    hashTypeReleaseIterator(hi);
}

参考文献

srandmember

redis源码

dewxin