對于實現數據結構來說泵喘,Redis 就給我們提供了兩個優(yōu)秀的設計思想:一個是使用連續(xù)的內存空間,避免內存碎片開銷般妙;二個是針對不同長度的數據纪铺,采用不同大小的元數據,以避免使用統(tǒng)一大小的元數據碟渺,造成內存空間的浪費鲜锚。
在數據訪問方面,你也要知道止状,使用共享對象其實可以避免重復創(chuàng)建冗余的數據烹棉,從而也可以有效地節(jié)省內存空間。不過怯疤,共享對象主要適用于只讀場景浆洗,如果一個字符串被反復地修改,就無法被多個請求共享訪問了集峦。
SDS 內存優(yōu)化設計
SDS 設計了不同類型的結構頭伏社,包括 sdshdr8、sdshdr16塔淤、sdshdr32 和 sdshdr64摘昌。這些不同類型的結構頭可以適配不同大小的字符串,從而避免了內存浪費高蜂。不過聪黎,SDS 除了使用精巧設計的結構頭外,在保存較小字符串時备恤,其實還使用了嵌入式字符串的設計方法稿饰。這種方法避免了給字符串分配額外的空間锦秒,而是可以讓字符串直接保存在 Redis 的基本數據對象結構體中。
要理解嵌入式字符串的設計與實現喉镰,先了解下 Redis 基本數據對象結構體 redisObject:
#define LRU_BITS 24
typedef struct redisObject {
// redisObject 的數據類型旅择,是應用程序在 Redis 中保存的數據類型,包括 String侣姆、List生真、Hash 等
unsigned type:4;
// redisObject 的編碼類型,是 Redis 內部實現各種數據類型所用的數據結構
unsigned encoding:4;
// redisObject 的 LRU 時間
unsigned lru:LRU_BITS; /* LRU time (relative to global lru_clock) or
* LFU data (least significant 8 bits frequency
* and most significant 16 bits access time). */
// redisObject 的引用計數
int refcount;
// 指向值的指針
void *ptr;
} robj;
變量后使用冒號和數值的定義方法捺宗。這實際上是 C 語言中的位域定義方法柱蟀,可以用來有效地節(jié)省內存開銷。
嵌入式字符串
/* Create a string object with EMBSTR encoding if it is smaller than
* OBJ_ENCODING_EMBSTR_SIZE_LIMIT, otherwise the RAW encoding is
* used.
*
* The current limit of 44 is chosen so that the biggest string object
* we allocate as EMBSTR will still fit into the 64 byte arena of jemalloc. */
#define OBJ_ENCODING_EMBSTR_SIZE_LIMIT 44
robj *createStringObject(const char *ptr, size_t len) {
if (len <= OBJ_ENCODING_EMBSTR_SIZE_LIMIT)
return createEmbeddedStringObject(ptr,len);
else
return createRawStringObject(ptr,len);
}
/* The actual Redis Object */
#define OBJ_STRING 0 /* String object. */
#define OBJ_LIST 1 /* List object. */
#define OBJ_SET 2 /* Set object. */
#define OBJ_ZSET 3 /* Sorted set object. */
#define OBJ_HASH 4 /* Hash object. */
/* Create a string object with encoding OBJ_ENCODING_RAW, that is a plain
* string object where o->ptr points to a proper sds string. */
robj *createRawStringObject(const char *ptr, size_t len) {
return createObject(OBJ_STRING, sdsnewlen(ptr,len));
}
/* ===================== Creation and parsing of objects ==================== */
robj *createObject(int type, void *ptr) {
robj *o = zmalloc(sizeof(*o));
o->type = type;
o->encoding = OBJ_ENCODING_RAW;
o->ptr = ptr;
o->refcount = 1;
/* Set the LRU to the current lruclock (minutes resolution), or
* alternatively the LFU counter. */
if (server.maxmemory_policy & MAXMEMORY_FLAG_LFU) {
o->lru = (LFUGetTimeInMinutes()<<8) | LFU_INIT_VAL;
} else {
o->lru = LRU_CLOCK();
}
return o;
}
Redis 會通過設計實現一塊連續(xù)的內存空間偿凭,把 redisObject 結構體和 SDS 結構體緊湊地放置在一起产弹。這樣一來,對于不超過 44 字節(jié)的字符串來說弯囊,就可以避免內存碎片和兩次內存分配的開銷了
/* Create a string object with encoding OBJ_ENCODING_EMBSTR, that is
* an object where the sds string is actually an unmodifiable string
* allocated in the same chunk as the object itself. */
robj *createEmbeddedStringObject(const char *ptr, size_t len) {
robj *o = zmalloc(sizeof(robj)+sizeof(struct sdshdr8)+len+1);
struct sdshdr8 *sh = (void*)(o+1);
o->type = OBJ_STRING;
o->encoding = OBJ_ENCODING_EMBSTR;
o->ptr = sh+1;
o->refcount = 1;
if (server.maxmemory_policy & MAXMEMORY_FLAG_LFU) {
o->lru = (LFUGetTimeInMinutes()<<8) | LFU_INIT_VAL;
} else {
o->lru = LRU_CLOCK();
}
sh->len = len;
sh->alloc = len;
sh->flags = SDS_TYPE_8;
if (ptr == SDS_NOINIT)
sh->buf[len] = '\0';
else if (ptr) {
memcpy(sh->buf,ptr,len);
sh->buf[len] = '\0';
} else {
memset(sh->buf,0,len+1);
}
return o;
}
ziplist 內存優(yōu)化設計
/* The size of a ziplist header: two 32 bit integers for the total
* bytes count and last item offset. One 16 bit integer for the number
* of items field. */
#define ZIPLIST_HEADER_SIZE (sizeof(uint32_t)*2+sizeof(uint16_t))
/* Size of the "end of ziplist" entry. Just one byte. */
#define ZIPLIST_END_SIZE (sizeof(uint8_t))
/* Create a new empty ziplist. */
unsigned char *ziplistNew(void) {
unsigned int bytes = ZIPLIST_HEADER_SIZE+ZIPLIST_END_SIZE;
unsigned char *zl = zmalloc(bytes);
ZIPLIST_BYTES(zl) = intrev32ifbe(bytes);
ZIPLIST_TAIL_OFFSET(zl) = intrev32ifbe(ZIPLIST_HEADER_SIZE);
ZIPLIST_LENGTH(zl) = 0;
zl[bytes-1] = ZIP_END;
return zl;
}
/* We use this function to receive information about a ziplist entry.
* Note that this is not how the data is actually encoded, is just what we
* get filled by a function in order to operate more easily. */
typedef struct zlentry {
unsigned int prevrawlensize; /* Bytes used to encode the previous entry len*/
unsigned int prevrawlen; /* Previous entry len. */
unsigned int lensize; /* Bytes used to encode this entry type/len.
For example strings have a 1, 2 or 5 bytes
header. Integers always use a single byte.*/
unsigned int len; /* Bytes used to represent the actual entry.
For strings this is just the string length
while for integers it is 1, 2, 3, 4, 8 or
0 (for 4 bit immediate) depending on the
number range. */
unsigned int headersize; /* prevrawlensize + lensize. */
unsigned char encoding; /* Set to ZIP_STR_* or ZIP_INT_* depending on
the entry encoding. However for 4 bits
immediate integers this can assume a range
of values and must be range-checked. */
unsigned char *p; /* Pointer to the very start of the entry, that
is, this points to prev-entry-len field. */
} zlentry;
每個列表項中都會記錄前一項的長度痰哨。因為每個列表項的長度不一樣,所以如果使用相同的字節(jié)大小來記錄 prevlen匾嘱,就會造成內存空間浪費斤斧。
#define ZIP_BIG_PREVLEN 254
/* Encode the length of the previous entry and write it to "p". Return the
* number of bytes needed to encode this length if "p" is NULL. */
unsigned int zipStorePrevEntryLength(unsigned char *p, unsigned int len) {
if (p == NULL) {
return (len < ZIP_BIG_PREVLEN) ? 1 : sizeof(len)+1;
} else {
//判斷prevlen的長度是否小于ZIP_BIG_PREVLEN,ZIP_BIG_PREVLEN等于254
if (len < ZIP_BIG_PREVLEN) {
//如果小于254字節(jié)霎烙,那么返回prevlen為1字節(jié)
p[0] = len;
return 1;
} else {
//否則撬讽,調用zipStorePrevEntryLengthLarge進行編碼
return zipStorePrevEntryLengthLarge(p,len);
}
}
}
/* Encode the length of the previous entry and write it to "p". This only
* uses the larger encoding (required in __ziplistCascadeUpdate). */
int zipStorePrevEntryLengthLarge(unsigned char *p, unsigned int len) {
if (p != NULL) {
//將prevlen的第1字節(jié)設置為ZIP_BIG_PREVLEN,即254
p[0] = ZIP_BIG_PREVLEN;
//將前一個列表項的長度值拷貝至prevlen的第2至第5字節(jié)悬垃,其中sizeof(len)的值為4
memcpy(p+1,&len,sizeof(len));
memrev32ifbe(p+1);
}
//返回prevlen的大小游昼,為5字節(jié)
return 1+sizeof(len);
}
ziplist 在 zipStoreEntryEncoding 函數中,針對整數和字符串尝蠕,就分別使用了不同字節(jié)長度的編碼結果烘豌。
/* Write the encoidng header of the entry in 'p'. If p is NULL it just returns
* the amount of bytes required to encode such a length. Arguments:
*
* 'encoding' is the encoding we are using for the entry. It could be
* ZIP_INT_* or ZIP_STR_* or between ZIP_INT_IMM_MIN and ZIP_INT_IMM_MAX
* for single-byte small immediate integers.
*
* 'rawlen' is only used for ZIP_STR_* encodings and is the length of the
* srting that this entry represents.
*
* The function returns the number of bytes used by the encoding/length
* header stored in 'p'. */
unsigned int zipStoreEntryEncoding(unsigned char *p, unsigned char encoding, unsigned int rawlen) {
//默認編碼結果是1字節(jié)
unsigned char len = 1, buf[5];
if (ZIP_IS_STR(encoding)) {
/* Although encoding is given it may not be set for strings,
* so we determine it here using the raw length. */
//字符串長度小于等于63字節(jié)(16進制為0x3f)
if (rawlen <= 0x3f) {
if (!p) return len;
buf[0] = ZIP_STR_06B | rawlen;
//字符串長度小于等于16383字節(jié)(16進制為0x3fff)
} else if (rawlen <= 0x3fff) {
//編碼結果是2字節(jié)
len += 1;
if (!p) return len;
buf[0] = ZIP_STR_14B | ((rawlen >> 8) & 0x3f);
buf[1] = rawlen & 0xff;
} else {
//編碼結果是5字節(jié)
len += 4;
if (!p) return len;
buf[0] = ZIP_STR_32B;
buf[1] = (rawlen >> 24) & 0xff;
buf[2] = (rawlen >> 16) & 0xff;
buf[3] = (rawlen >> 8) & 0xff;
buf[4] = rawlen & 0xff;
}
} else {
/* Implies integer encoding, so length is always 1\. */
if (!p) return len;
buf[0] = encoding;
}
/* Store this length at p. */
memcpy(p,buf,len);
return len;
}
簡而言之,針對不同長度的數據看彼,使用不同大小的元數據信息(prevlen 和 encoding)廊佩,這種方法可以有效地節(jié)省內存開銷。
intset 內存優(yōu)化設計
整數集合結構體中記錄數據的部分靖榕,就是一個 int8_t 類型的整數數組 contents标锄。從內存使用的角度來看,整數數組就是一塊連續(xù)內存空間茁计,所以這樣就避免了內存碎片料皇,并提升了內存使用效率。
typedef struct intset {
uint32_t encoding;
uint32_t length;
int8_t contents[];
} intset;
通過共享對象節(jié)省內存
Redis 就采用了共享對象的設計思想,把這些常用數據創(chuàng)建為共享對象,當上層應用需要訪問它們時践剂,直接讀取就行毒返。
#define OBJ_SHARED_INTEGERS 10000
void createSharedObjects(void) {
int j;
shared.crlf = createObject(OBJ_STRING,sdsnew("\r\n"));
//常見回復信息
shared.ok = createObject(OBJ_STRING,sdsnew("+OK\r\n"));
shared.err = createObject(OBJ_STRING,sdsnew("-ERR\r\n"));
...
//常見報錯信息
shared.nokeyerr = createObject(OBJ_STRING,sdsnew(
"-ERR no such key\r\n"));
shared.syntaxerr = createObject(OBJ_STRING,sdsnew(
"-ERR syntax error\r\n"));
...
//0到9999的整數
for (j = 0; j < OBJ_SHARED_INTEGERS; j++) {
shared.integers[j] =
makeObjectShared(createObject(OBJ_STRING,(void*)(long)j));
shared.integers[j]->encoding = OBJ_ENCODING_INT;
}
for (j = 0; j < OBJ_SHARED_BULKHDR_LEN; j++) {
shared.mbulkhdr[j] = createObject(OBJ_STRING,
sdscatprintf(sdsempty(),"*%d\r\n",j));
shared.bulkhdr[j] = createObject(OBJ_STRING,
sdscatprintf(sdsempty(),"$%d\r\n",j));
}
/* The following two shared objects, minstring and maxstrings, are not
* actually used for their value but as a special object meaning
* respectively the minimum possible string and the maximum possible
* string in string comparisons for the ZRANGEBYLEX command. */
shared.minstring = sdsnew("minstring");
shared.maxstring = sdsnew("maxstring");
}
Q:SDS 判斷是否使用嵌入式字符串的條件是 44 字節(jié),你知道為什么是 44 字節(jié)嗎舷手?
A:Redis 規(guī)定嵌入式字符串最大以 64 字節(jié)存儲,所以 N = 64 - 16(redisObject) - 3(sdshr8) - 1(\0)劲绪, N = 44 字節(jié)男窟。