導(dǎo)航
memcached源碼分析
memcached源碼分析-網(wǎng)絡(luò)模塊
memcached源碼分析-指令解析模塊
memcached源碼分析-哈希表(hashtable)模塊
memcached源碼分析-slab存儲機制
1.前言
這一章節(jié)主要介紹了memcached的內(nèi)存管理機制股缸,我們在指令解析模塊中說到了memcached是如何解析處理客戶端請求指令的手销。
例如:執(zhí)行set key flags exptime bytes
\n
value
memcached接收來此客戶端的存儲請求职员,那么服務(wù)端是如何存儲來自客戶端的存儲內(nèi)容的呢,這里就涉及到了slabs存儲機制赊琳,在此之前首先介紹memcached中關(guān)于內(nèi)存管理的幾個重要的概念
item 數(shù)據(jù)存儲節(jié)點
typedef struct _stritem {
/* Protected by LRU locks */
//一個item的地址, 主要用于LRU鏈和freelist鏈
struct _stritem *next;
//下一個item的地址,主要用于LRU鏈和freelist鏈
struct _stritem *prev;
/* Rest are protected by an item lock */
//用于記錄哈希表槽中下一個item節(jié)點的地址
struct _stritem *h_next; /* hash chain next */
//最近訪問時間
rel_time_t time; /* least recent access */
//緩存過期時間
rel_time_t exptime; /* expire time */
int nbytes; /* size of data */
//當(dāng)前item被引用的次數(shù)街夭,用于判斷item是否被其它的線程在操作中
//refcount == 1的情況下該節(jié)點才可以被刪除
unsigned short refcount;
uint8_t nsuffix; /* length of flags-and-length string */
uint8_t it_flags; /* ITEM_* above */
//記錄該item節(jié)點位于哪個slabclass_t中
uint8_t slabs_clsid;/* which slab class we're in */
uint8_t nkey; /* key length, w/terminating null and padding */
/* this odd type prevents type-punning issues when we do
* the little shuffle to save space when not using CAS. */
union {
uint64_t cas;
char end;
} data[];
/* if it_flags & ITEM_CAS we have 8 bytes CAS */
/* then null-terminated key */
/* then " flags length\r\n" (no terminating null) */
/* then data with terminating \r\n (no terminating null; it's binary!) */
} item;
slab與chunk
slab是一塊內(nèi)存空間,默認(rèn)大小為1M躏筏,memcached會把一個slab分割成一個個chunk, 這些被切割的小的內(nèi)存塊板丽,主要用來存儲item
slabclass
每個item的大小都可能不一樣,item存儲于chunk,如果chunk大小不夠趁尼,則不足以分配給item使用埃碱,如果chunk過大,則太過于浪費內(nèi)存空間酥泞。因此memcached采取的做法是砚殿,將slab切割成不同大小的chunk,這樣就滿足了不同大小item的存儲芝囤。被劃分不同大小chunk的slab的內(nèi)存在memcached就是用slabclass這個結(jié)構(gòu)體來表現(xiàn)的
typedef struct {
//chunk大小
unsigned int size; /* sizes of items */
//1M內(nèi)存大小被分割為多少個chunck
unsigned int perslab; /* how many items per slab */
//空閑chunk鏈表
void *slots; /* list of item ptrs */
//空閑chunk的個數(shù)
unsigned int sl_curr; /* total free items in list */
//當(dāng)前slabclass已經(jīng)分配了所少個1M空間的slab
unsigned int slabs; /* how many slabs were allocated for this class */
//slab指針數(shù)組
void **slab_list; /* array of slab pointers */
//slab指針數(shù)組的大小
unsigned int list_size; /* size of prev array */
size_t requested; /* The number of requested bytes */
} slabclass_t;
1.slabclass數(shù)組初始化的時候似炎,每個slabclass_t都會分配一個1M大小的slab辛萍,slab會被切分為N個小的內(nèi)存塊,這個小的內(nèi)存塊的大小取決于slabclass_t結(jié)構(gòu)上的size的大小
2.每個slabclass_t都只存儲一定大小范圍的數(shù)據(jù)羡藐,并且下一個slabclass切割的chunk塊大于前一個slabclass切割的chunk塊大小
3.memcached中slabclass數(shù)組默認(rèn)大小為64贩毕,slabclass切割塊大小的增長因子默認(rèn)是1.25
例如:slabclass[1]切割的chunk塊大小為100字節(jié),slabclass[2]為125仆嗦,如果需要存儲一個110字節(jié)的緩存辉阶,那么就需要到slabclass[2] 的空閑鏈表中獲取一個空閑節(jié)點進(jìn)行存儲
2.slabclass的初始化
/**
* Determines the chunk sizes and initializes the slab class descriptors
* accordingly.
*/
void slabs_init(const size_t limit, const double factor, const bool prealloc, const uint32_t *slab_sizes) {
int i = POWER_SMALLEST - 1;
unsigned int size = sizeof(item) + settings.chunk_size;
/* Some platforms use runtime transparent hugepages. If for any reason
* the initial allocation fails, the required settings do not persist
* for remaining allocations. As such it makes little sense to do slab
* preallocation. */
bool __attribute__ ((unused)) do_slab_prealloc = false;
mem_limit = limit;
if (prealloc) {
//...
/* Allocate everything in a big chunk with malloc */
//預(yù)分配一個大空間
mem_base = malloc(mem_limit);
do_slab_prealloc = true;
if (mem_base != NULL) {
mem_current = mem_base;
mem_avail = mem_limit;
} else {
fprintf(stderr, "Warning: Failed to allocate requested memory in"
" one large chunk.\nWill allocate in smaller chunks\n");
}
}
memset(slabclass, 0, sizeof(slabclass));
while (++i < MAX_NUMBER_OF_SLAB_CLASSES-1) {
if (slab_sizes != NULL) {
if (slab_sizes[i-1] == 0)
break;
size = slab_sizes[i-1];
} else if (size >= settings.slab_chunk_size_max / factor) {
break;
}
/* Make sure items are always n-byte aligned */
if (size % CHUNK_ALIGN_BYTES)
size += CHUNK_ALIGN_BYTES - (size % CHUNK_ALIGN_BYTES);
//當(dāng)前slabclass切割的chunk塊大小
slabclass[i].size = size;
slabclass[i].perslab = settings.slab_page_size / slabclass[i].size;
if (slab_sizes == NULL)
//下一個slabclass切割的chunk塊大小
size *= factor;
//...
}
power_largest = i;
slabclass[power_largest].size = settings.slab_chunk_size_max;
slabclass[power_largest].perslab = settings.slab_page_size / settings.slab_chunk_size_max;
//...
if (prealloc && do_slab_prealloc) {
//為slabclass結(jié)構(gòu)分配slab空間,默認(rèn)大小為1M
//并且將1M的slab空間切割成N個slabclass->size大小塊欧啤,并每個塊加入到slot空閑鏈表中睛藻,塊和塊直接用雙向鏈表串聯(lián)
slabs_preallocate(power_largest);
}
}
static void slabs_preallocate (const unsigned int maxslabs) {
int i;
unsigned int prealloc = 0;
//...
for (i = POWER_SMALLEST; i < MAX_NUMBER_OF_SLAB_CLASSES; i++) {
if (++prealloc > maxslabs)
return;
//分配slab空間启上,并切割slab塊
if (do_slabs_newslab(i) == 0) {
fprintf(stderr, "Error while preallocating slab memory!\n"
"If using -L or other prealloc options, max memory must be "
"at least %d megabytes.\n", power_largest);
exit(1);
}
}
}
//為slabclass分配一個新的slab
static int do_slabs_newslab(const unsigned int id) {
slabclass_t *p = &slabclass[id];
slabclass_t *g = &slabclass[SLAB_GLOBAL_PAGE_POOL];
int len = (settings.slab_reassign || settings.slab_chunk_size_max != settings.slab_page_size)
? settings.slab_page_size
: p->size * p->perslab;
char *ptr;
if ((mem_limit && mem_malloced + len > mem_limit && p->slabs > 0
&& g->slabs == 0)) {
mem_limit_reached = true;
MEMCACHED_SLABS_SLABCLASS_ALLOCATE_FAILED(id);
return 0;
}
//空閑鏈表沒有空間可用邢隧,需要重新分配開辟一個slab空間
//這個slab空間地址使用slabclass->slab_list保存的,grow_slab_list就是擴大slab_list
if ((grow_slab_list(id) == 0) ||
//分配具體的slab空間
(((ptr = get_page_from_global_pool()) == NULL) &&
((ptr = memory_allocate((size_t)len)) == 0))) {
MEMCACHED_SLABS_SLABCLASS_ALLOCATE_FAILED(id);
return 0;
}
memset(ptr, 0, (size_t)len);
//將新分配的slab空間進(jìn)行切割chunk塊冈在,并加入到slot空閑列表中
split_slab_page_into_freelist(ptr, id);
p->slab_list[p->slabs++] = ptr;
MEMCACHED_SLABS_SLABCLASS_ALLOCATE(id);
return 1;
}
static void split_slab_page_into_freelist(char *ptr, const unsigned int id) {
slabclass_t *p = &slabclass[id];
int x;
for (x = 0; x < p->perslab; x++) {
//這里的釋放并不是一個真實意義上的釋放倒慧,而是將內(nèi)存塊歸還到slabclass的空閑列表中
//我們分配一個新的slab空間,切割后加入到空閑列表中和memcached中釋放一個item節(jié)點操作是一致的包券,顧調(diào)用了相同的接口
//將內(nèi)存chunk塊強轉(zhuǎn)為item類型纫谅,然后通過item中的雙向節(jié)點串接到slabclass的空閑節(jié)點列表中
do_slabs_free(ptr, 0, id);
//切割塊偏移
ptr += p->size;
}
}
3.item節(jié)點的分配流程
1、 根據(jù)大小溅固,找到合適的slabclass
2付秕、 slabclass空閑列表中是否有空閑的item節(jié)點,如果有直接分配item侍郭,用于存儲內(nèi)容
3询吴、 空閑列表沒有空閑的item可以分配,會重新開辟一個slab(默認(rèn)大小為1M)的內(nèi)存塊亮元,然后切割slab并放入到空閑列表中猛计,然后從空閑列表中獲取節(jié)點
item *item_alloc(char *key, size_t nkey, int flags, rel_time_t exptime, int nbytes) {
item *it;
/* do_item_alloc handles its own locks */
it = do_item_alloc(key, nkey, flags, exptime, nbytes);
return it;
}
item *do_item_alloc(char *key, const size_t nkey, const unsigned int flags,
const rel_time_t exptime, const int nbytes) {
//...
//計算需要的空間大小
size_t ntotal = item_make_header(nkey + 1, flags, nbytes, suffix, &nsuffix);
if (settings.use_cas) {
ntotal += sizeof(uint64_t);
}
//根據(jù)大小,找到對應(yīng)的slabclass下標(biāo)
unsigned int id = slabs_clsid(ntotal);
unsigned int hdr_id = 0;
if (id == 0)
return 0;
/* This is a large item. Allocate a header object now, lazily allocate
* chunks while reading the upload.
*/
if (ntotal > settings.slab_chunk_size_max) {
//...
} else {
//從根據(jù)id去對應(yīng)的slabcalss中獲取一個item節(jié)點
it = do_item_alloc_pull(ntotal, id);
}
//...
/* Refcount is seeded to 1 by slabs_alloc() */
it->next = it->prev = 0;
//LRU分層機制爆捞,后面說到LRU我們再做進(jìn)一步分析
if (settings.temp_lru &&
exptime - current_time <= settings.temporary_ttl) {
id |= TEMP_LRU;
} else if (settings.lru_segmented) {
id |= HOT_LRU;
} else {
/* There is only COLD in compat-mode */
id |= COLD_LRU;
}
it->slabs_clsid = id;
//...
return it;
}
item *do_item_alloc_pull(const size_t ntotal, const unsigned int id) {
item *it = NULL;
int i;
//這里涉及LRU淘汰機制奉瘤,后面會做介紹
//...
it = slabs_alloc(ntotal, id, &total_bytes, 0);
//...
return it;
}
void *slabs_alloc(size_t size, unsigned int id, uint64_t *total_bytes,
unsigned int flags) {
void *ret;
pthread_mutex_lock(&slabs_lock);
ret = do_slabs_alloc(size, id, total_bytes, flags);
pthread_mutex_unlock(&slabs_lock);
return ret;
}
static void *do_slabs_alloc(const size_t size, unsigned int id, uint64_t *total_bytes,
unsigned int flags) {
slabclass_t *p;
item *it = NULL;
//...
p = &slabclass[id];
//...
/* fail unless we have space at the end of a recently allocated page,
we have something on our freelist, or we could allocate a new page */
//空閑列表中已經(jīng)沒有空閑節(jié)點了,重新分配slab并切割加入到空閑列表中
if (p->sl_curr == 0 && flags != SLABS_ALLOC_NO_NEWPAGE) {
//為slabclass重新開辟一個slab
do_slabs_newslab(id);
}
//空閑列表中含有空余的節(jié)點
if (p->sl_curr != 0) {
/* return off our freelist */
//分配出去煮甥,并將該節(jié)點從空閑列表中移除
it = (item *)p->slots;
p->slots = it->next;
if (it->next) it->next->prev = 0;
/* Kill flag and initialize refcount here for lock safety in slab
* mover's freeness detection. */
it->it_flags &= ~ITEM_SLABBED;
it->refcount = 1;
p->sl_curr--;
ret = (void *)it;
} else {
ret = NULL;
}
//...
return ret;
}
4.item節(jié)點的釋放
釋放一個item節(jié)點盗温,并不會free內(nèi)存空間,而是將item節(jié)點歸還到slabclass的空閑列表中
void slabs_free(void *ptr, size_t size, unsigned int id) {
pthread_mutex_lock(&slabs_lock);
//將item節(jié)點歸還到slabclass的空閑列表中
do_slabs_free(ptr, size, id);
pthread_mutex_unlock(&slabs_lock);
}
static void do_slabs_free(void *ptr, const size_t size, unsigned int id) {
slabclass_t *p;
item *it;
//...
p = &slabclass[id];
it = (item *)ptr;
if ((it->it_flags & ITEM_CHUNKED) == 0) {
//...
it->it_flags = ITEM_SLABBED;
it->slabs_clsid = 0;
it->prev = 0;
it->next = p->slots;
if (it->next) it->next->prev = it;
p->slots = it;
p->sl_curr++;
//...
} else {
do_slabs_free_chunked(it, size);
}
return;
}