redis 的數據結構優(yōu)化設計

對于實現數據結構來說泵喘，Redis 就給我們提供了兩個優(yōu)秀的設計思想：一個是使用連續(xù)的內存空間，避免內存碎片開銷般妙；二個是針對不同長度的數據纪铺，采用不同大小的元數據，以避免使用統(tǒng)一大小的元數據碟渺，造成內存空間的浪費鲜锚。
在數據訪問方面，你也要知道止状，使用共享對象其實可以避免重復創(chuàng)建冗余的數據烹棉，從而也可以有效地節(jié)省內存空間。不過怯疤，共享對象主要適用于只讀場景浆洗，如果一個字符串被反復地修改，就無法被多個請求共享訪問了集峦。

SDS 內存優(yōu)化設計

SDS 設計了不同類型的結構頭伏社，包括 sdshdr8、sdshdr16塔淤、sdshdr32 和 sdshdr64摘昌。這些不同類型的結構頭可以適配不同大小的字符串，從而避免了內存浪費高蜂。不過聪黎，SDS 除了使用精巧設計的結構頭外，在保存較小字符串時备恤，其實還使用了嵌入式字符串的設計方法稿饰。這種方法避免了給字符串分配額外的空間锦秒，而是可以讓字符串直接保存在 Redis 的基本數據對象結構體中。

要理解嵌入式字符串的設計與實現喉镰，先了解下 Redis 基本數據對象結構體 redisObject：

#define LRU_BITS 24
typedef struct redisObject {
    // redisObject 的數據類型旅择，是應用程序在 Redis 中保存的數據類型，包括 String侣姆、List生真、Hash 等
    unsigned type:4;
    // redisObject 的編碼類型，是 Redis 內部實現各種數據類型所用的數據結構
    unsigned encoding:4;
    // redisObject 的 LRU 時間
    unsigned lru:LRU_BITS; /* LRU time (relative to global lru_clock) or
                            * LFU data (least significant 8 bits frequency
                            * and most significant 16 bits access time). */
    // redisObject 的引用計數
    int refcount;
    // 指向值的指針
    void *ptr;
} robj;

變量后使用冒號和數值的定義方法捺宗。這實際上是 C 語言中的位域定義方法柱蟀，可以用來有效地節(jié)省內存開銷。

嵌入式字符串

/* Create a string object with EMBSTR encoding if it is smaller than
 * OBJ_ENCODING_EMBSTR_SIZE_LIMIT, otherwise the RAW encoding is
 * used.
 *
 * The current limit of 44 is chosen so that the biggest string object
 * we allocate as EMBSTR will still fit into the 64 byte arena of jemalloc. */
#define OBJ_ENCODING_EMBSTR_SIZE_LIMIT 44
robj *createStringObject(const char *ptr, size_t len) {
    if (len <= OBJ_ENCODING_EMBSTR_SIZE_LIMIT)
        return createEmbeddedStringObject(ptr,len);
    else
        return createRawStringObject(ptr,len);
}

/* The actual Redis Object */
#define OBJ_STRING 0    /* String object. */
#define OBJ_LIST 1      /* List object. */
#define OBJ_SET 2       /* Set object. */
#define OBJ_ZSET 3      /* Sorted set object. */
#define OBJ_HASH 4      /* Hash object. */

/* Create a string object with encoding OBJ_ENCODING_RAW, that is a plain
 * string object where o->ptr points to a proper sds string. */
robj *createRawStringObject(const char *ptr, size_t len) {
    return createObject(OBJ_STRING, sdsnewlen(ptr,len));
}

/* ===================== Creation and parsing of objects ==================== */

robj *createObject(int type, void *ptr) {
    robj *o = zmalloc(sizeof(*o));
    o->type = type;
    o->encoding = OBJ_ENCODING_RAW;
    o->ptr = ptr;
    o->refcount = 1;

    /* Set the LRU to the current lruclock (minutes resolution), or
     * alternatively the LFU counter. */
    if (server.maxmemory_policy & MAXMEMORY_FLAG_LFU) {
        o->lru = (LFUGetTimeInMinutes()<<8) | LFU_INIT_VAL;
    } else {
        o->lru = LRU_CLOCK();
    }
    return o;
}

Redis 會通過設計實現一塊連續(xù)的內存空間偿凭，把 redisObject 結構體和 SDS 結構體緊湊地放置在一起产弹。這樣一來，對于不超過 44 字節(jié)的字符串來說弯囊，就可以避免內存碎片和兩次內存分配的開銷了

/* Create a string object with encoding OBJ_ENCODING_EMBSTR, that is
 * an object where the sds string is actually an unmodifiable string
 * allocated in the same chunk as the object itself. */
robj *createEmbeddedStringObject(const char *ptr, size_t len) {
    robj *o = zmalloc(sizeof(robj)+sizeof(struct sdshdr8)+len+1);
    struct sdshdr8 *sh = (void*)(o+1);

    o->type = OBJ_STRING;
    o->encoding = OBJ_ENCODING_EMBSTR;
    o->ptr = sh+1;
    o->refcount = 1;
    if (server.maxmemory_policy & MAXMEMORY_FLAG_LFU) {
        o->lru = (LFUGetTimeInMinutes()<<8) | LFU_INIT_VAL;
    } else {
        o->lru = LRU_CLOCK();
    }

    sh->len = len;
    sh->alloc = len;
    sh->flags = SDS_TYPE_8;
    if (ptr == SDS_NOINIT)
        sh->buf[len] = '\0';
    else if (ptr) {
        memcpy(sh->buf,ptr,len);
        sh->buf[len] = '\0';
    } else {
        memset(sh->buf,0,len+1);
    }
    return o;
}

ziplist 內存優(yōu)化設計

image.png

/* The size of a ziplist header: two 32 bit integers for the total
 * bytes count and last item offset. One 16 bit integer for the number
 * of items field. */
#define ZIPLIST_HEADER_SIZE     (sizeof(uint32_t)*2+sizeof(uint16_t))

/* Size of the "end of ziplist" entry. Just one byte. */
#define ZIPLIST_END_SIZE        (sizeof(uint8_t))

/* Create a new empty ziplist. */
unsigned char *ziplistNew(void) {
    unsigned int bytes = ZIPLIST_HEADER_SIZE+ZIPLIST_END_SIZE;
    unsigned char *zl = zmalloc(bytes);
    ZIPLIST_BYTES(zl) = intrev32ifbe(bytes);
    ZIPLIST_TAIL_OFFSET(zl) = intrev32ifbe(ZIPLIST_HEADER_SIZE);
    ZIPLIST_LENGTH(zl) = 0;
    zl[bytes-1] = ZIP_END;
    return zl;
}

/* We use this function to receive information about a ziplist entry.
 * Note that this is not how the data is actually encoded, is just what we
 * get filled by a function in order to operate more easily. */
typedef struct zlentry {
    unsigned int prevrawlensize; /* Bytes used to encode the previous entry len*/
    unsigned int prevrawlen;     /* Previous entry len. */
    unsigned int lensize;        /* Bytes used to encode this entry type/len.
                                    For example strings have a 1, 2 or 5 bytes
                                    header. Integers always use a single byte.*/
    unsigned int len;            /* Bytes used to represent the actual entry.
                                    For strings this is just the string length
                                    while for integers it is 1, 2, 3, 4, 8 or
                                    0 (for 4 bit immediate) depending on the
                                    number range. */
    unsigned int headersize;     /* prevrawlensize + lensize. */
    unsigned char encoding;      /* Set to ZIP_STR_* or ZIP_INT_* depending on
                                    the entry encoding. However for 4 bits
                                    immediate integers this can assume a range
                                    of values and must be range-checked. */
    unsigned char *p;            /* Pointer to the very start of the entry, that
                                    is, this points to prev-entry-len field. */
} zlentry;

每個列表項中都會記錄前一項的長度痰哨。因為每個列表項的長度不一樣，所以如果使用相同的字節(jié)大小來記錄 prevlen匾嘱，就會造成內存空間浪費斤斧。

#define ZIP_BIG_PREVLEN 254

/* Encode the length of the previous entry and write it to "p". Return the
 * number of bytes needed to encode this length if "p" is NULL. */
unsigned int zipStorePrevEntryLength(unsigned char *p, unsigned int len) {
    if (p == NULL) {
        return (len < ZIP_BIG_PREVLEN) ? 1 : sizeof(len)+1;
    } else {
        //判斷prevlen的長度是否小于ZIP_BIG_PREVLEN，ZIP_BIG_PREVLEN等于254
        if (len < ZIP_BIG_PREVLEN) {
            //如果小于254字節(jié)霎烙，那么返回prevlen為1字節(jié)
            p[0] = len;
            return 1;
        } else {
            //否則撬讽，調用zipStorePrevEntryLengthLarge進行編碼
            return zipStorePrevEntryLengthLarge(p,len);
        }
    }
}
/* Encode the length of the previous entry and write it to "p". This only
 * uses the larger encoding (required in __ziplistCascadeUpdate). */
int zipStorePrevEntryLengthLarge(unsigned char *p, unsigned int len) {
    if (p != NULL) {
        //將prevlen的第1字節(jié)設置為ZIP_BIG_PREVLEN，即254
        p[0] = ZIP_BIG_PREVLEN;
        //將前一個列表項的長度值拷貝至prevlen的第2至第5字節(jié)悬垃，其中sizeof(len)的值為4
        memcpy(p+1,&len,sizeof(len));
        memrev32ifbe(p+1);
    }
    //返回prevlen的大小游昼，為5字節(jié)
    return 1+sizeof(len);
}

ziplist 在 zipStoreEntryEncoding 函數中，針對整數和字符串尝蠕，就分別使用了不同字節(jié)長度的編碼結果烘豌。

/* Write the encoidng header of the entry in 'p'. If p is NULL it just returns
 * the amount of bytes required to encode such a length. Arguments:
 *
 * 'encoding' is the encoding we are using for the entry. It could be
 * ZIP_INT_* or ZIP_STR_* or between ZIP_INT_IMM_MIN and ZIP_INT_IMM_MAX
 * for single-byte small immediate integers.
 *
 * 'rawlen' is only used for ZIP_STR_* encodings and is the length of the
 * srting that this entry represents.
 *
 * The function returns the number of bytes used by the encoding/length
 * header stored in 'p'. */
unsigned int zipStoreEntryEncoding(unsigned char *p, unsigned char encoding, unsigned int rawlen) {
    //默認編碼結果是1字節(jié)
    unsigned char len = 1, buf[5];

    if (ZIP_IS_STR(encoding)) {
        /* Although encoding is given it may not be set for strings,
         * so we determine it here using the raw length. */
        //字符串長度小于等于63字節(jié)（16進制為0x3f）
        if (rawlen <= 0x3f) {
            if (!p) return len;
            buf[0] = ZIP_STR_06B | rawlen;
        //字符串長度小于等于16383字節(jié)（16進制為0x3fff）
        } else if (rawlen <= 0x3fff) {
            //編碼結果是2字節(jié)
            len += 1;
            if (!p) return len;
            buf[0] = ZIP_STR_14B | ((rawlen >> 8) & 0x3f);
            buf[1] = rawlen & 0xff;
        } else {
            //編碼結果是5字節(jié)
            len += 4;
            if (!p) return len;
            buf[0] = ZIP_STR_32B;
            buf[1] = (rawlen >> 24) & 0xff;
            buf[2] = (rawlen >> 16) & 0xff;
            buf[3] = (rawlen >> 8) & 0xff;
            buf[4] = rawlen & 0xff;
        }
    } else {
        /* Implies integer encoding, so length is always 1\. */
        if (!p) return len;
        buf[0] = encoding;
    }

    /* Store this length at p. */
    memcpy(p,buf,len);
    return len;
}

簡而言之，針對不同長度的數據看彼，使用不同大小的元數據信息（prevlen 和 encoding）廊佩，這種方法可以有效地節(jié)省內存開銷。

intset 內存優(yōu)化設計

整數集合結構體中記錄數據的部分靖榕，就是一個 int8_t 類型的整數數組 contents标锄。從內存使用的角度來看，整數數組就是一塊連續(xù)內存空間茁计，所以這樣就避免了內存碎片料皇，并提升了內存使用效率。

typedef struct intset {
    uint32_t encoding;
    uint32_t length;
    int8_t contents[];
} intset;

通過共享對象節(jié)省內存

Redis 就采用了共享對象的設計思想,把這些常用數據創(chuàng)建為共享對象，當上層應用需要訪問它們時践剂，直接讀取就行毒返。

#define OBJ_SHARED_INTEGERS 10000

void createSharedObjects(void) {
    int j;
    shared.crlf = createObject(OBJ_STRING,sdsnew("\r\n"));
    //常見回復信息
    shared.ok = createObject(OBJ_STRING,sdsnew("+OK\r\n"));
    shared.err = createObject(OBJ_STRING,sdsnew("-ERR\r\n"));
    ...
    //常見報錯信息
    shared.nokeyerr = createObject(OBJ_STRING,sdsnew(
        "-ERR no such key\r\n"));
    shared.syntaxerr = createObject(OBJ_STRING,sdsnew(
        "-ERR syntax error\r\n"));
    ...

    //0到9999的整數
    for (j = 0; j < OBJ_SHARED_INTEGERS; j++) {
        shared.integers[j] =
            makeObjectShared(createObject(OBJ_STRING,(void*)(long)j));
        shared.integers[j]->encoding = OBJ_ENCODING_INT;
    }
    for (j = 0; j < OBJ_SHARED_BULKHDR_LEN; j++) {
        shared.mbulkhdr[j] = createObject(OBJ_STRING,
            sdscatprintf(sdsempty(),"*%d\r\n",j));
        shared.bulkhdr[j] = createObject(OBJ_STRING,
            sdscatprintf(sdsempty(),"$%d\r\n",j));
    }
    /* The following two shared objects, minstring and maxstrings, are not
     * actually used for their value but as a special object meaning
     * respectively the minimum possible string and the maximum possible
     * string in string comparisons for the ZRANGEBYLEX command. */
    shared.minstring = sdsnew("minstring");
    shared.maxstring = sdsnew("maxstring");
}

Q：SDS 判斷是否使用嵌入式字符串的條件是 44 字節(jié)，你知道為什么是 44 字節(jié)嗎舷手？
A：Redis 規(guī)定嵌入式字符串最大以 64 字節(jié)存儲，所以 N = 64 - 16(redisObject) - 3(sdshr8) - 1(\0)劲绪， N = 44 字節(jié)男窟。

image.png

最后編輯于：2024.06.11 20:23:41

?著作權歸作者所有,轉載或內容合作請聯系作者

人面猴
序言：七十年代末，一起剝皮案震驚了整個濱河市贾富，隨后出現的幾起案子歉眷，更是在濱河造成了極大的恐慌，老刑警劉巖颤枪，帶你破解...
沈念sama閱讀 219,589評論 6贊 508
死咒
序言：濱河連續(xù)發(fā)生了三起死亡事件汗捡，死亡現場離奇詭異，居然都是意外死亡畏纲，警方通過查閱死者的電腦和手機扇住，發(fā)現死者居然都...
沈念sama閱讀 93,615評論 3贊 396
救了他兩次的神仙讓他今天三更去死
文/潘曉璐我一進店門，熙熙樓的掌柜王于貴愁眉苦臉地迎上來盗胀，“玉大人艘蹋，你說我怎么就攤上這事∑被遥” “怎么了女阀？”我有些...
開封第一講書人閱讀 165,933評論 0贊 356
道士緝兇錄：失蹤的賣姜人
文/不壞的土叔我叫張陵，是天一觀的道長屑迂。經常有香客問我浸策，道長，這世上最難降的妖魔是什么惹盼？我笑而不...
開封第一講書人閱讀 58,976評論 1贊 295
?港島之戀（遺憾婚禮）
正文為了忘掉前任庸汗，我火速辦了婚禮，結果婚禮上逻锐，老公的妹妹穿的比我還像新娘夫晌。我一直安慰自己，他們只是感情好昧诱，可當我...
茶點故事閱讀 67,999評論 6贊 393
惡毒庶女頂嫁案：這布局不是一般人想出來的
文/花漫我一把揭開白布晓淀。她就那樣靜靜地躺著，像睡著了一般盏档。火紅的嫁衣襯著肌膚如雪凶掰。梳的紋絲不亂的頭發(fā)上，一...
開封第一講書人閱讀 51,775評論 1贊 307
城市分裂傳說
那天，我揣著相機與錄音懦窘，去河邊找鬼前翎。笑死，一個胖子當著我的面吹牛畅涂，可吹牛的內容都是我干的港华。我是一名探鬼主播，決...
沈念sama閱讀 40,474評論 3贊 420
雙鴛鴦連環(huán)套：你想象不到人心有多黑
文/蒼蘭香墨我猛地睜開眼午衰，長吁一口氣：“原來是場噩夢啊……” “哼立宜！你這毒婦竟也來了？” 一聲冷哼從身側響起臊岸，我...
開封第一講書人閱讀 39,359評論 0贊 276
萬榮殺人案實錄
序言：老撾萬榮一對情侶失蹤橙数，失蹤者是張志新（化名）和其女友劉穎，沒想到半個月后帅戒，有當地人在樹林里發(fā)現了一具尸體灯帮，經...
沈念sama閱讀 45,854評論 1贊 317
?護林員之死
正文獨居荒郊野嶺守林人離奇死亡，尸身上長有42處帶血的膿包…… 初始之章·張勛以下內容為張勛視角年9月15日...
茶點故事閱讀 38,007評論 3贊 338
?白月光啟示錄
正文我和宋清朗相戀三年逻住，在試婚紗的時候發(fā)現自己被綠了钟哥。大學時的朋友給我發(fā)了我未婚夫和他白月光在一起吃飯的照片。...
茶點故事閱讀 40,146評論 1贊 351
活死人
序言：一個原本活蹦亂跳的男人離奇死亡瞎访，死狀恐怖瞪醋，靈堂內的尸體忽然破棺而出，到底是詐尸還是另有隱情装诡，我是刑警寧澤银受，帶...
沈念sama閱讀 35,826評論 5贊 346
?日本核電站爆炸內幕
正文年R本政府宣布，位于F島的核電站鸦采，受9級特大地震影響宾巍，放射性物質發(fā)生泄漏。R本人自食惡果不足惜渔伯，卻給世界環(huán)境...
茶點故事閱讀 41,484評論 3贊 331
男人毒藥：我在死后第九天來索命
文/蒙蒙一顶霞、第九天我趴在偏房一處隱蔽的房頂上張望。院中可真熱鬧锣吼，春花似錦选浑、人聲如沸。這莊子的主人今日做“春日...
開封第一講書人閱讀 32,029評論 0贊 22
一樁弒父案古徒，背后竟有這般陰謀
文/蒼蘭香墨我抬頭看了看天上的太陽。三九已至读恃，卻和暖如春隧膘，著一層夾襖步出監(jiān)牢的瞬間代态，已是汗流浹背。一陣腳步聲響...
開封第一講書人閱讀 33,153評論 1贊 272
情欲美人皮
我被黑心中介騙來泰國打工疹吃，沒想到剛下飛機就差點兒被人妖公主榨干…… 1. 我叫王不留蹦疑，地道東北人。一個月前我還...
沈念sama閱讀 48,420評論 3贊 373
代替公主和親
正文我出身青樓萨驶，卻偏偏與公主長得像歉摧，于是被迫代替她去往敵國和親。傳聞我的和親對象是個殘疾皇子腔呜，可洞房花燭夜當晚...
茶點故事閱讀 45,107評論 2贊 356

redis 的數據結構優(yōu)化設計

SDS 內存優(yōu)化設計

ziplist 內存優(yōu)化設計

intset 內存優(yōu)化設計

通過共享對象節(jié)省內存

推薦閱讀更多精彩內容