title: redis5.0_01_SDS.字符串
tags: longzy:2018-12-2
在上一篇文章中唠叛,通過(guò)編譯redis邪铲,設(shè)置斷點(diǎn)執(zhí)行哭懈,然后跟蹤了redis的大概執(zhí)行流程俘种,那么從今天以后,將對(duì)redis源碼的每個(gè)細(xì)節(jié)進(jìn)行閱讀分析。今天閱讀分析的是redis字符串SDS白嘁。
在C語(yǔ)言中构眯,字符串一般有兩種表示方法
- char *buf1 = "redis_5.0";
- char buf2[] = "redis_5.0";
而在redis中,自己封裝了一種叫簡(jiǎn)單動(dòng)態(tài)字符串(simple dynamic string晌纫,SDS)的類型來(lái)表示的,同時(shí)也兼容了c語(yǔ)言的字符串。
SDS定義
定義是很簡(jiǎn)單的蹂随,我簡(jiǎn)單加了些注釋和自己的看法。
//類型別名因惭,實(shí)際指向下面sdshdrXX結(jié)構(gòu)中的buf
typedef char *sds;
struct __attribute__ ((__packed__)) sdshdr5 {
unsigned char flags; /* 3 lsb of type, and 5 msb of string length */
char buf[];
};
//sdshdr5 在redis_5.0中已經(jīng)放棄使用岳锁,當(dāng)最小的時(shí)候默認(rèn)使用sdshdr8
struct __attribute__ ((__packed__)) sdshdr8 {
//字符串的實(shí)際長(zhǎng)度,不包括空終止符
//在對(duì)sds求長(zhǎng)度的時(shí)候也就是sdslen蹦魔,復(fù)雜度為O(1),直接返回了len
uint8_t len; /* used */
//字符串的最大長(zhǎng)度激率,不包括header的大小和最后的終止符
uint8_t alloc; /* excluding the header and null terminator */
//header的類型標(biāo)志SDS_TYPE_8 SDS_TYPE_16 ......
unsigned char flags; /* 3 lsb of type, 5 unused bits */
//存儲(chǔ)字符串的實(shí)際內(nèi)容
char buf[];
};
struct __attribute__ ((__packed__)) sdshdr16 {
uint16_t len; /* used */
uint16_t alloc; /* excluding the header and null terminator */
unsigned char flags; /* 3 lsb of type, 5 unused bits */
char buf[];
};
struct __attribute__ ((__packed__)) sdshdr32 {
uint32_t len; /* used */
uint32_t alloc; /* excluding the header and null terminator */
unsigned char flags; /* 3 lsb of type, 5 unused bits */
char buf[];
};
struct __attribute__ ((__packed__)) sdshdr64 {
uint64_t len; /* used */
uint64_t alloc; /* excluding the header and null terminator */
unsigned char flags; /* 3 lsb of type, 5 unused bits */
char buf[];
};
/*
__attribute__ ((__packed__)): 告訴編譯器取消結(jié)構(gòu)在編譯過(guò)程中的對(duì)齊優(yōu)化,按照字節(jié)的實(shí)際大小對(duì)齊勿决,這是GCC特有的語(yǔ)法
在GCC下:struct my{ char ch; int a;} sizeof(int)=4;sizeof(my)=8;(非緊湊模式)
在GCC下:struct my{ char ch; int a;}__attrubte__ ((packed))
*/
#define SDS_TYPE_5 0
#define SDS_TYPE_8 1
#define SDS_TYPE_16 2
#define SDS_TYPE_32 3
#define SDS_TYPE_64 4
#define SDS_TYPE_MASK 7
#define SDS_TYPE_BITS 3
#define SDS_HDR_VAR(T,s) struct sdshdr##T *sh = (void*)((s)-(sizeof(struct sdshdr##T)));
#define SDS_HDR(T,s) ((struct sdshdr##T *)((s)-(sizeof(struct sdshdr##T))))
#define SDS_TYPE_5_LEN(f) ((f)>>SDS_TYPE_BITS)
//##用在宏定義中乒躺,有連接的作用。
這里很是羞愧低缩,之前居然沒(méi)遇到過(guò)attribute ((packed))
下面分析幾個(gè)核心的函數(shù)
sds sdsnewlen(const void *init, size_t initlen);
sds sdscatlen(sds s, const void *t, size_t len);
sds sdscatvprintf(sds s, const char *fmt, va_list ap);
sds sdscatfmt(sds s, char const *fmt, ...);
sds sdstrim(sds s, const char *cset);
void sdsrange(sds s, ssize_t start, ssize_t end);
sds *sdssplitlen(const char *s, ssize_t len, const char *sep, int seplen, int *count);
sds *sdssplitargs(const char *line, int *argc);
sds sdsMakeRoomFor(sds s, size_t addlen);
這幾個(gè)函數(shù)從字面意思看起來(lái)都很明顯嘉冒,新建、連接咆繁、格式化讳推、去掉指定字符、分割玩般、擴(kuò)展空間银觅、移除空閑空間等等。另外這些函數(shù)都是在內(nèi)分配內(nèi)存坏为,所以調(diào)用在需要在外面釋放內(nèi)存究驴。
我們依次看看他們的實(shí)現(xiàn)
sds sdsnewlen(const void *init, size_t initlen)
這個(gè)函數(shù)沒(méi)啥難度
/* Create a new sds string with the content specified by the 'init' pointer
* and 'initlen'.
* If NULL is used for 'init' the string is initialized with zero bytes.
*
* The string is always null-termined (all the sds strings are, always) so
* even if you create an sds string with:
*
* mystring = sdsnewlen("abc",3);
*
* You can print the string with printf() as there is an implicit \0 at the
* end of the string. However the string is binary safe and can contain
* \0 characters in the middle, as the length is stored in the sds header. */
/*
描述:根據(jù)指定的init和起長(zhǎng)度initlen慨仿,創(chuàng)建新的sds
參數(shù):
In:
init:初始化字符串指針
initlen: 字符串長(zhǎng)度
返回值:
成功:返回新的sds
失敗:返回NULL
*/
sds sdsnewlen(const void *init, size_t initlen) {
void *sh;
sds s;
//根據(jù)initlen長(zhǎng)度來(lái)計(jì)算sdshdr 頭部header的類型
char type = sdsReqType(initlen);
/* Empty strings are usually created in order to append. Use type 8
* since type 5 is not good at this. */
//默認(rèn)使用SDS_TYPE_8
if (type == SDS_TYPE_5 && initlen == 0) type = SDS_TYPE_8;
//計(jì)算headr的大小
int hdrlen = sdsHdrSize(type);
//指向header flag的指針
unsigned char *fp; /* flags pointer. */
sh = s_malloc(hdrlen+initlen+1);
if (sh == NULL) return NULL;
if (!init)
memset(sh, 0, hdrlen+initlen+1);
s = (char*)sh+hdrlen;
fp = ((unsigned char*)s)-1;
switch(type) {
case SDS_TYPE_5: {
*fp = type | (initlen << SDS_TYPE_BITS);
break;
}
case SDS_TYPE_8: {
SDS_HDR_VAR(8,s);
sh->len = initlen;
sh->alloc = initlen;
*fp = type;
break;
}
case SDS_TYPE_16: {
SDS_HDR_VAR(16,s);
sh->len = initlen;
sh->alloc = initlen;
*fp = type;
break;
}
case SDS_TYPE_32: {
SDS_HDR_VAR(32,s);
sh->len = initlen;
sh->alloc = initlen;
*fp = type;
break;
}
case SDS_TYPE_64: {
SDS_HDR_VAR(64,s);
sh->len = initlen;
sh->alloc = initlen;
*fp = type;
break;
}
}
if (initlen && init)
memcpy(s, init, initlen);
s[initlen] = '\0';
return s;
}
sds sdsMakeRoomFor(sds s, size_t addlen);
/* Enlarge the free space at the end of the sds string so that the caller
* is sure that after calling this function can overwrite up to addlen
* bytes after the end of the string, plus one more byte for nul term.
*
* Note: this does not change the *length* of the sds string as returned
* by sdslen(), but only the free buffer space we have. */
/*
描述:對(duì)字符串?dāng)U展空間
參數(shù):
IN:
s: 需要擴(kuò)展的字符串
addlen: 擴(kuò)展長(zhǎng)度
返回值:
成功: 返回?cái)U(kuò)展后的sds
失斈呻省: NULL
*/
sds sdsMakeRoomFor(sds s, size_t addlen) {
void *sh, *newsh;
size_t avail = sdsavail(s);//可用的長(zhǎng)度
size_t len, newlen;
char type, oldtype = s[-1] & SDS_TYPE_MASK;
int hdrlen;
/* Return ASAP if there is enough space left. */
//如果可用長(zhǎng)度大于需要擴(kuò)容的長(zhǎng)度镰吆,直接返回
if (avail >= addlen) return s;
len = sdslen(s);
sh = (char*)s-sdsHdrSize(oldtype);
newlen = (len+addlen);
/*
SDS_MAX_PREALLOC = 2014*1024 = 1M
如果擴(kuò)容后的<1M,那么新的字符串為擴(kuò)容后的2倍
如果>=1M,那么新的字符串加上1M
*/
if (newlen < SDS_MAX_PREALLOC)
newlen *= 2;
else
newlen += SDS_MAX_PREALLOC;
//長(zhǎng)度變了,需要重新獲取新字符串的type
type = sdsReqType(newlen);
/* Don't use type 5: the user is appending to the string and type 5 is
* not able to remember empty space, so sdsMakeRoomFor() must be called
* at every appending operation. */
if (type == SDS_TYPE_5) type = SDS_TYPE_8;
hdrlen = sdsHdrSize(type);
//和oldtype比較跑慕,然后根據(jù)情況分配空間
if (oldtype==type) {
newsh = s_realloc(sh, hdrlen+newlen+1);
if (newsh == NULL) return NULL;
s = (char*)newsh+hdrlen;
} else {
/* Since the header size changes, need to move the string forward,
* and can't use realloc */
newsh = s_malloc(hdrlen+newlen+1);
if (newsh == NULL) return NULL;
memcpy((char*)newsh+hdrlen, s, len+1);
s_free(sh);
//釋放sh指針万皿,其實(shí)是釋放s,為s重新指向新分配的newsh
//sh = (char*)s-sdsHdrSize(oldtype);
s = (char*)newsh+hdrlen;//重新給s賦值
s[-1] = type;
sdssetlen(s, len);
}
sdssetalloc(s, newlen);
return s;
}
sds sdscatlen(sds s, const void *t, size_t len)
/* Append the specified binary-safe string pointed by 't' of 'len' bytes to the
* end of the specified sds string 's'.
*
* After the call, the passed sds string is no longer valid and all the
* references must be substituted with the new pointer returned by the call. */
/*
描述:把長(zhǎng)度為len的字符串t連接到s,類似于strcat
參數(shù):
IN:
s:目標(biāo)字符串
t:源字符串
len:t的長(zhǎng)度
返回值:
成功:新的sds
失敽诵小:NULL
*/
sds sdscatlen(sds s, const void *t, size_t len) {
size_t curlen = sdslen(s);
s = sdsMakeRoomFor(s,len);//擴(kuò)展連接字符串的長(zhǎng)度
if (s == NULL) return NULL;
memcpy(s+curlen, t, len);
sdssetlen(s, curlen+len);
s[curlen+len] = '\0';
return s;
}
sds sdscatvprintf(sds s, const char *fmt, va_list ap);
/* Like sdscatprintf() but gets va_list instead of being variadic. */
/*
描述:格式化字符串
*/
sds sdscatvprintf(sds s, const char *fmt, va_list ap) {
va_list cpy;
char staticbuf[1024], *buf = staticbuf, *t;
size_t buflen = strlen(fmt)*2;
/* We try to start using a static buffer for speed.
* If not possible we revert to heap allocation. */
/*
這里先用棧區(qū)的staticbuf牢硅,如果fmt的2倍長(zhǎng)度超過(guò)這個(gè)staticbuf在從堆去分配
這樣做如果是小于1024的,直接用棧區(qū)的內(nèi)存芝雪,非臣跤啵快
這是預(yù)分配冗余空間的慣用手段,減小對(duì)內(nèi)存的頻繁分配
*/
if (buflen > sizeof(staticbuf)) {
buf = s_malloc(buflen);
if (buf == NULL) return NULL;
} else {
buflen = sizeof(staticbuf);
}
/* Try with buffers two times bigger every time we fail to
* fit the string in the current buffer size. */
while(1) {
buf[buflen-2] = '\0';//設(shè)置倒數(shù)第二個(gè)字符為結(jié)束符惩系,
//方便后面判斷是否超過(guò)了最終長(zhǎng)度
va_copy(cpy,ap);
vsnprintf(buf, buflen, fmt, cpy);//調(diào)用是的vsprintf家族函數(shù)
va_end(cpy);
if (buf[buflen-2] != '\0') {//說(shuō)明已經(jīng)寫滿了位岔,需要重新分配2倍大小,繼續(xù)寫
if (buf != staticbuf) s_free(buf);
buflen *= 2;
buf = s_malloc(buflen);
if (buf == NULL) return NULL;
continue;
}
break;
}
/* Finally concat the obtained string to the SDS string and return it. */
t = sdscat(s, buf);//這里底層調(diào)用的sdscatlen堡牡,是安全的
if (buf != staticbuf) s_free(buf);
return t;
}
sds sdscatfmt(sds s, char const *fmt, ...);
/* This function is similar to sdscatprintf, but much faster as it does
* not rely on sprintf() family functions implemented by the libc that
* are often very slow. Moreover directly handling the sds string as
* new data is concatenated provides a performance improvement.
*
* However this function only handles an incompatible subset of printf-alike
* format specifiers:
*
* %s - C String
* %S - SDS string
* %i - signed int
* %I - 64 bit signed integer (long long, int64_t)
* %u - unsigned int
* %U - 64 bit unsigned integer (unsigned long long, uint64_t)
* %% - Verbatim "%" character.
*/
/*
描述:更高效的格式化字符串抒抬,沒(méi)有調(diào)用vsprintf家族函數(shù)
*/
sds sdscatfmt(sds s, char const *fmt, ...) {
size_t initlen = sdslen(s);
const char *f = fmt;
long i;
va_list ap;
va_start(ap,fmt);
f = fmt; /* Next format specifier byte to process. */
i = initlen; /* Position of the next byte to write to dest str. */
while(*f) {
char next, *str;
size_t l;
long long num;
unsigned long long unum;
/* Make sure there is always space for at least 1 char. */
//判斷是否有 可用空間,沒(méi)有的話擴(kuò)展
if (sdsavail(s)==0) {
s = sdsMakeRoomFor(s,1);
}
switch(*f) {
case '%':
next = *(f+1);
f++;
switch(next) {
case 's':
case 'S':
str = va_arg(ap,char*);
//計(jì)算長(zhǎng)度
l = (next == 's') ? strlen(str) : sdslen(str);
//如果可用空間不夠,擴(kuò)展
if (sdsavail(s) < l) {
s = sdsMakeRoomFor(s,l);
}
memcpy(s+i,str,l);
sdsinclen(s,l);//增加len的長(zhǎng)度l
i += l;
break;
case 'i':
case 'I':
if (next == 'i')
num = va_arg(ap,int);
else
num = va_arg(ap,long long);
{
char buf[SDS_LLSTR_SIZE];
//邏輯處理同上晤柄,把long long 轉(zhuǎn)為str
l = sdsll2str(buf,num);
if (sdsavail(s) < l) {
s = sdsMakeRoomFor(s,l);
}
memcpy(s+i,buf,l);
sdsinclen(s,l);
i += l;
}
break;
case 'u':
case 'U':
if (next == 'u')
unum = va_arg(ap,unsigned int);
else
unum = va_arg(ap,unsigned long long);
{
char buf[SDS_LLSTR_SIZE];
//邏輯處理同上擦剑,把unsignt long long 轉(zhuǎn)為str
l = sdsull2str(buf,unum);
if (sdsavail(s) < l) {
s = sdsMakeRoomFor(s,l);
}
memcpy(s+i,buf,l);
sdsinclen(s,l);
i += l;
}
break;
default: /* Handle %% and generally %<unknown>. */
//除了上面的字符,其他的默認(rèn)支持芥颈,即使是%
s[i++] = next;
sdsinclen(s,1);
break;
}
break;
default:
s[i++] = *f;
sdsinclen(s,1);
break;
}
f++;
}
va_end(ap);
/* Add null-term */
s[i] = '\0';
return s;
}
sds sdstrim(sds s, const char *cset);
/* Remove the part of the string from left and from right composed just of
* contiguous characters found in 'cset', that is a null terminted C string.
*
* After the call, the modified sds string is no longer valid and all the
* references must be substituted with the new pointer returned by the call.
*
* Example:
*
* s = sdsnew("AA...AA.a.aa.aHelloWorld :::");
* s = sdstrim(s,"Aa. :");
* printf("%s\n", s);
*
* Output will be just "Hello World".
*/
/*
除去字符串s中在字符串cset中出現(xiàn)的所有字符
*/
sds sdstrim(sds s, const char *cset) {
char *start, *end, *sp, *ep;
size_t len;
sp = start = s;
ep = end = s+sdslen(s)-1;
while(sp <= end && strchr(cset, *sp)) sp++;
while(ep > sp && strchr(cset, *ep)) ep--;
len = (sp > ep) ? 0 : ((ep-sp)+1);
if (s != sp) memmove(s, sp, len);
s[len] = '\0';
sdssetlen(s,len);
return s;
}
void sdsrange(sds s, ssize_t start, ssize_t end);
/* Turn the string into a smaller (or equal) string containing only the
* substring specified by the 'start' and 'end' indexes.
*
* start and end can be negative, where -1 means the last character of the
* string, -2 the penultimate character, and so forth.
*
* The interval is inclusive, so the start and end characters will be part
* of the resulting string.
*
* The string is modified in-place.
*
* Example:
*
* s = sdsnew("Hello World");
* sdsrange(s,1,-1); => "ello World"
*/
/*
取區(qū)間[start,end]的字符串惠勒,下標(biāo)從0開(kāi)始,-1表示最后一個(gè)
*/
void sdsrange(sds s, ssize_t start, ssize_t end) {
size_t newlen, len = sdslen(s);
if (len == 0) return;
if (start < 0) {
start = len+start;
if (start < 0) start = 0;
}
if (end < 0) {
end = len+end;
if (end < 0) end = 0;
}
newlen = (start > end) ? 0 : (end-start)+1;
if (newlen != 0) {
if (start >= (ssize_t)len) {
newlen = 0;
} else if (end >= (ssize_t)len) {
end = len-1;
newlen = (start > end) ? 0 : (end-start)+1;
}
} else {
start = 0;
}
if (start && newlen) memmove(s, s+start, newlen);
s[newlen] = 0;
sdssetlen(s,newlen);
}
sds *sdssplitlen(const char *s, ssize_t len, const char *sep, int seplen, int *count);
/* Split 's' with separator in 'sep'. An array
* of sds strings is returned. *count will be set
* by reference to the number of tokens returned.
*
* On out of memory, zero length string, zero length
* separator, NULL is returned.
*
* Note that 'sep' is able to split a string using
* a multi-character separator. For example
* sdssplit("foo_-_bar","_-_"); will return two
* elements "foo" and "bar".
*
* This version of the function is binary-safe but
* requires length arguments. sdssplit() is just the
* same function but for zero-terminated strings.
*/
sds *sdssplitlen(const char *s, ssize_t len, const char *sep, int seplen, int *count) {
int elements = 0, slots = 5;
long start = 0, j;
sds *tokens;
if (seplen < 1 || len < 0) return NULL;
tokens = s_malloc(sizeof(sds)*slots);
if (tokens == NULL) return NULL;
if (len == 0) {
*count = 0;
return tokens;
}
for (j = 0; j < (len-(seplen-1)); j++) {
/* make sure there is room for the next element and the final one */
if (slots < elements+2) {
sds *newtokens;
slots *= 2;
newtokens = s_realloc(tokens,sizeof(sds)*slots);
if (newtokens == NULL) goto cleanup;
tokens = newtokens;
}
/* search the separator */
if ((seplen == 1 && *(s+j) == sep[0]) || (memcmp(s+j,sep,seplen) == 0)) {
tokens[elements] = sdsnewlen(s+start,j-start);
if (tokens[elements] == NULL) goto cleanup;
elements++;
start = j+seplen;
j = j+seplen-1; /* skip the separator */
}
}
/* Add the final element. We are sure there is room in the tokens array. */
tokens[elements] = sdsnewlen(s+start,len-start);
if (tokens[elements] == NULL) goto cleanup;
elements++;
*count = elements;
return tokens;
cleanup:
{
int i;
for (i = 0; i < elements; i++) sdsfree(tokens[i]);
s_free(tokens);
*count = 0;
return NULL;
}
}
上面分析的關(guān)鍵函數(shù)是redis實(shí)現(xiàn)SDS的核心函數(shù)爬坑,像外部接口sdsnew底層調(diào)用的是sdsnewlen纠屋,sdscpy、sdscat等底層調(diào)用的是sdscatlen妇垢。
其實(shí)c語(yǔ)言的字符串已經(jīng)能夠滿足基本全部需求巾遭,為什么redis還要自己實(shí)現(xiàn)字符串sds呢肉康?
要回答這個(gè)問(wèn)題闯估,還是回到開(kāi)頭說(shuō)的c語(yǔ)言對(duì)于字符串的一般定義。其通常如下:
- char *buf1 = "redis_5.0";
- char buf2[] = "redis_5.0";
這兩種都表示一個(gè)字符串常量吼和,第一種方式不可以在修改涨薪,第二種方式可以修改,但是大小固定炫乓。再想想平時(shí)對(duì)字符串的操作函數(shù)刚夺,strcpy献丑、strcat等函數(shù),一般是不安全的侠姑。
那么我們對(duì)比redis的實(shí)現(xiàn)创橄,可以看出redis具有以下優(yōu)點(diǎn):
- 兼容c語(yǔ)言字符串
- 對(duì)于普通字符串的操作是安全的
- 可以動(dòng)態(tài)擴(kuò)展空間(最大是512M)
- 對(duì)字符串求長(zhǎng)度的復(fù)雜度為O(1)
- 底層用的是數(shù)組,操作很快
- 從sdsMakeRoomFor的實(shí)現(xiàn)莽红,我們知道redis采用了預(yù)分配冗余空間的方式來(lái)減小內(nèi)存的頻繁分配