基于1.8.3版本宣脉,64位Linux操作系統(tǒng)
1车柠、概述
Go內(nèi)存管理基于tcmalloc,使用連續(xù)虛擬地址塑猖,以頁(8k)為單位竹祷、多級緩存進行管理;
在分配內(nèi)存時羊苟,需要對size進行對齊處理塑陵,根據(jù)best-fit找到合適的mspan,對未用完的內(nèi)存還會拆分成其他大小的mspan繼續(xù)使用
在new一個object時(忽略逃逸分析)蜡励,根據(jù)object的size做不同的分配策略:
- 極小對象(size<16byte)直接在當前P的mcache上的tiny緩存上分配;
- 小對象(16byte <= size <= 32k)在當前P的mcache上對應slot的空閑列表中分配令花,無空閑列表則會繼續(xù)向mcentral申請(還是沒有則向mheap申請);
- 大對象(size>32k)直接通過mheap申請凉倚。
2兼都、數(shù)據(jù)結構
2.1 mspan
mspan并不直接擁有內(nèi)存空間,它負責管理起始地址為startAddr稽寒、級別(預分配頁個數(shù))為sizeclass的連續(xù)地址空間扮碧。
摘取重點內(nèi)容(下同)
type mspan struct {
//雙向鏈表
next *mspan
prev *mspan
//起始地址
startAddr uintptr
//包含多少頁
npages uintptr
//
stackfreelist gclinkptr
//有多少對象
nelems uintptr
//gc相關
sweepgen uint32
//級別
sizeclass uint8
//已被mcache使用
incache bool
//狀態(tài)
state mSpanState
}
2.2 mcache
Go為Per-thread (in Go, per-P)分配了mcache管理結構,所以對其操作是不需要鎖的杏糙,每個mcache有大小為67的mspan數(shù)組慎王,存儲不同級別大小的mspan
type mcache struct {
tiny uintptr
tinyoffset uintptr
alloc [_NumSizeClasses]*mspan
stackcache [_NumStackOrders]stackfreelist
...
}
2.3 mcentral
mcentral集中管理,當在mcache申請失敗的時候搔啊,會向mcentral申請柬祠;mcentral有個關鍵方法cacheSpan(),它是整個分配的核心算法
type mcentral struct {
lock mutex
sizeclass int32
nonempty mSpanList
empty mSpanList
}
2.4 mheap
mheap是真實擁有虛擬地址的結構负芋,同時擁有67個級別的mcentral漫蛔,以及所有分配的mspan。
// _NumSizeClasses := 67
// _MaxMHeapList := 128
type mheap struct {
lock mutex
//size < 128 * 8k(1M)的可用mspanList
free [_MaxMHeapList]mSpanList
//size >= 128 * 8k(1M)的可用mspanList
freelarge mSpanList
busy [_MaxMHeapList]mSpanList
busylarge mSpanList
//gc相關
sweepgen uint32
sweepdone uint32
//所有的mspan
allspans []*mspan
//頁到span的查找表
spans []*mspan
//位圖
bitmap uintptr
bitmap_mapped uintptr
//真實申請的內(nèi)存起始地址
arena_start uintptr
//真實申請的內(nèi)存目前可用起始地址
arena_used uintptr
//真實申請的內(nèi)存結束地址
arena_end uintptr
//分級的mcentral
central [_NumSizeClasses]struct {
mcentral mcentral
pad [sys.CacheLineSize]byte
}
...
}
2.5 四者的關系示圖
3旧蛾、初始化
初始化時莽龟,Go向系統(tǒng)申請預留一段連續(xù)虛擬地址,大小(64位機器上)為512M(spans_mapped)+16G(bitmap_mapped)+512G(arena)
向系統(tǒng)申請預留的連續(xù)地址空間
+----------+-----------+-----------------------------+
| spans | bitmap | arena |
| 512M | 16G | 512G |
+----------+-----------+-----------------------------+
mheap的初始化在func mallocinit()中锨天,而mallocinit被schedinit()調(diào)用
/src/runtime/proc.go
// The bootstrap sequence is:
//
// call osinit
// call schedinit
// make & queue new G
// call runtime·mstart
//
// The new G calls runtime·main.
mallocinit的邏輯為:
func mallocinit() {
// 0. 檢查系統(tǒng)/硬件信息毯盈,bala bala
// 1. 計算預留空間大小
arenaSize := round(_MaxMem, _PageSize)
bitmapSize = arenaSize / (sys.PtrSize * 8 / 2)
spansSize = arenaSize / _PageSize * sys.PtrSize
spansSize = round(spansSize, _PageSize)
// 2. 嘗試預留地址(區(qū)分不同平臺 略)
for i := 0; i <= 0x7f; i++ {
...
pSize = bitmapSize + spansSize + arenaSize + _PageSize
p = uintptr(sysReserve(unsafe.Pointer(p), pSize, &reserved))
}
// 3. 初始化部分mheap中變量
p1 := round(p, _PageSize)
spansStart := p1
mheap_.bitmap = p1 + spansSize + bitmapSize
mheap_.arena_start = p1 + (spansSize + bitmapSize)
mheap_.arena_end = p + pSize
mheap_.arena_used = p1 + (spansSize + bitmapSize)
mheap_.arena_reserved = reserved
// 4. 其他部分初始化,67個mcentral在這里初始化
mheap_.init(spansStart, spansSize)
_g_ := getg()
_g_.m.mcache = allocmcache()
}
mheap的初始化方法
// Initialize the heap.
func (h *mheap) init(spansStart, spansBytes uintptr) {
// 0. xxalloc.init
// 1. free病袄、busy init
for i := range h.free {
h.free[i].init()
h.busy[i].init()
}
h.freelarge.init()
h.busylarge.init()
// 2. mcentral初始化
for i := range h.central {
h.central[i].mcentral.init(int32(i))
}
// 3. spans初始化
sp := (*slice)(unsafe.Pointer(&h.spans))
sp.array = unsafe.Pointer(spansStart)
sp.len = 0
sp.cap = int(spansBytes / sys.PtrSize)
}
mcentral的初始化比較簡單搂赋,設置自己的級別赘阀,同時將兩個mspanList初始化
而mcache的初始化在func procresize(nprocs int32) *p中,procresize也在schedinit()中調(diào)用脑奠,順序在mallocinit()之后基公,也就是說發(fā)生在mheap于mcentral的初始化后面
func procresize(nprocs int32) *p {
// 0. bala bala
// 1. 初始化P
for i := int32(0); i < nprocs; i++ {
pp := allp[i]
//初始化每個P的mcache
if pp.mcache == nil {
if old == 0 && i == 0 {
if getg().m.mcache == nil {
throw("missing mcache?")
}
pp.mcache = getg().m.mcache
} else {
pp.mcache = allocmcache()
}
}
}
}
而allocmcache比較簡單
func allocmcache() *mcache {
lock(&mheap_.lock)
c := (*mcache)(mheap_.cachealloc.alloc())
unlock(&mheap_.lock)
for i := 0; i < _NumSizeClasses; i++ {
c.alloc[i] = &emptymspan
}
c.next_sample = nextSample()
return c
}
至此,管理結構mheap宋欺、67個mcentral及每個P的mcache都初始化完畢轰豆,接下來進入重點--分配階段。
4齿诞、分配
前面說過酸休,在分配對象內(nèi)存時,根據(jù)對象的大小分為3個級別:極小祷杈、小斑司、大;在這里我們假設關閉內(nèi)聯(lián)優(yōu)化吠式,即沒有逃逸的存在陡厘。當new一個對象時抽米,調(diào)用的是:
func newobject(typ *_type) unsafe.Pointer {
return mallocgc(typ.size, typ, true)
}
mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer {
dataSize := size
c := gomcache()
var x unsafe.Pointer
noscan := typ == nil || typ.kind&kindNoPointers != 0
if size <= maxSmallSize {
if noscan && size < maxTinySize {
// 極小對象
} else {
// 小對象
} else {
// 大對象
}
}
我們將針對這三類一一分析
- 首先是極小對象(<16byte)
off := c.tinyoffset
// 地址對齊
if size&7 == 0 {
off = round(off, 8)
} else if size&3 == 0 {
off = round(off, 4)
} else if size&1 == 0 {
off = round(off, 2)
}
//若之前tiny剩余空間夠用特占,則將極小對象拼在一起
if off+size <= maxTinySize && c.tiny != 0 {
// The object fits into existing tiny block.
x = unsafe.Pointer(c.tiny + off)
c.tinyoffset = off + size
c.local_tinyallocs++
mp.mallocing = 0
releasem(mp)
return x
}
//不若,則申請新的mspan
// Allocate a new maxTinySize block.
span := c.alloc[tinySizeClass]
v := nextFreeFast(span)
if v == 0 {
v, _, shouldhelpgc = c.nextFree(tinySizeClass)
}
x = unsafe.Pointer(v)
(*[2]uint64)(x)[0] = 0
(*[2]uint64)(x)[1] = 0
// 新申請的剩余空間大于之前的剩余空間
if size < c.tinyoffset || c.tiny == 0 {
c.tiny = uintptr(x)
c.tinyoffset = size
}
size = maxTinySize
其中nextFreeFast和nextFree先跳過去云茸,因為小對象分配時也會使用到是目,之后一并分析;下面是極小對象分配的示意圖
先是有足夠剩余空間标捺,那么對齊都直接利用(為了便于說明問題懊纳,tinyoffset用箭頭指向表示)
如果沒有足夠空間,則申請新的亡容,若必要修正tiny及tinyoffset的值
- 接著分析小對象(16byte <= size <= 32k)
介于16b到32k之間大小的對象分配比較復雜嗤疯,可以結合文末的流程圖,便于記憶
var sizeclass uint8
if size <= smallSizeMax-8 {
sizeclass = size_to_class8[(size+smallSizeDiv-1)/smallSizeDiv]
} else {
sizeclass = size_to_class128[(size-smallSizeMax+largeSizeDiv-1)/largeSizeDiv]
}
size = uintptr(class_to_size[sizeclass])
span := c.alloc[sizeclass]
v := nextFreeFast(span)
if v == 0 {
v, span, shouldhelpgc = c.nextFree(sizeclass)
}
x = unsafe.Pointer(v)
if needzero && span.needzero != 0 {
memclrNoHeapPointers(unsafe.Pointer(v), size)
}
首先計算申請對象的sizeclass闺兢,以此找到對應大小的mspan茂缚;然后找到可用的地址。這里面有兩個重要的方法nextFreeFast和nextFree:
// nextFreeFast returns the next free object if one is quickly available.
// Otherwise it returns 0.
func nextFreeFast(s *mspan) gclinkptr {
//計算s.allocCache從低位起有多少個0
theBit := sys.Ctz64(s.allocCache)
if theBit < 64 {
result := s.freeindex + uintptr(theBit)
if result < s.nelems {
freeidx := result + 1
if freeidx%64 == 0 && freeidx != s.nelems {
return 0
}
//更新位圖屋谭、可用游標
s.allocCache >>= (theBit + 1)
s.freeindex = freeidx
//根據(jù)result和s.elemsize起始地址計算v
v := gclinkptr(result*s.elemsize + s.base())
s.allocCount++
return v
}
}
return 0
}
重點是當mcache沒有可用地址時脚囊,通過nextFree向mcentral甚至mheap申請
func (c *mcache) nextFree(sizeclass uint8) (v gclinkptr, s *mspan, shouldhelpgc bool) {
s = c.alloc[sizeclass]
shouldhelpgc = false
freeIndex := s.nextFreeIndex()
if freeIndex == s.nelems {
// The span is full.
...
//重新填充當前的mcache
systemstack(func() {
c.refill(int32(sizeclass))
})
shouldhelpgc = true
s = c.alloc[sizeclass]
freeIndex = s.nextFreeIndex()
}
...
...
}
向mcentral是通過refill來實現(xiàn)的
func (c *mcache) refill(sizeclass int32) *mspan {
_g_ := getg()
_g_.m.locks++
// 想mcentral歸還當前的mspan
s := c.alloc[sizeclass]
if uintptr(s.allocCount) != s.nelems {
throw("refill of span with free space remaining")
}
if s != &emptymspan {
s.incache = false
}
// 獲取新的, mcentral.cacheSpan()重點分析
s = mheap_.central[sizeclass].mcentral.cacheSpan()
...
c.alloc[sizeclass] = s
_g_.m.locks--
return s
}
下面是一個很長的調(diào)用鏈路...
func (c *mcentral) cacheSpan() *mspan {
...
retry:
var s *mspan
//先從非空列表中找
for s = c.nonempty.first; s != nil; s = s.next {
...
goto havespan
}
//沒有則從空列表中找
for s = c.empty.first; s != nil; s = s.next {
...
goto retry
}
//實在沒有,那么申請吧
s = c.grow()
if s == nil {
return nil
}
havespan:
//
return s
}
// 由mcentral申請
func (c *mcentral) grow() *mspan {
...
s := mheap_.alloc(npages, c.sizeclass, false, true)
...
return s
}
//由mheap申請
func (h *mheap) alloc(npage uintptr, sizeclass int32, large bool, needzero bool) *mspan {
...
systemstack(func() {
s = h.alloc_m(npage, sizeclass, large)
})
...
return s
}
func (h *mheap) alloc_m(npage uintptr, sizeclass int32, large bool) *mspan {
...
s := h.allocSpanLocked(npage)
...
return s
}
//Best-fit算法
func (h *mheap) allocSpanLocked(npage uintptr) *mspan {
//先從128頁以內(nèi)(1M)的free列表中尋找
for i := int(npage); i < len(h.free); i++ {
list = &h.free[i]
...
}
// Best-fit 對于大對象申請也會用到這個方法
//基本思路是找到最小可以滿足需求的mspan桐磁,如果有多個悔耘,選擇地址最小的
list = &h.freelarge
s = h.allocLarge(npage)
if s == nil {
//如果mheap也沒有空間了,向系統(tǒng)申請吧
if !h.grow(npage) {
return nil
}
s = h.allocLarge(npage)
if s == nil {
return nil
}
}
HaveSpan:
//轉移s
list.remove(s)
if s.inList() {
throw("still in list")
}
//對于申請到的內(nèi)存大于想要的我擂,將其拆分衬以,避免浪費
if s.npages > npage {
...
h.freeSpanLocked(t, false, false, s.unusedsince)
s.state = _MSpanFree
}
return s
}
//向系統(tǒng)申請空間
func (h *mheap) grow(npage uintptr) bool {
//計算頁數(shù)
npage = round(npage, (64<<10)/_PageSize)
ask := npage << _PageShift
if ask < _HeapAllocChunk {
ask = _HeapAllocChunk
}
v := h.sysAlloc(ask)
...
}
func (h *mheap) sysAlloc(n uintptr) unsafe.Pointer {
...
// sysReserve調(diào)用mmap預留空間缓艳,至此調(diào)用鏈結束
p := uintptr(sysReserve(unsafe.Pointer(h.arena_end), p_size, &reserved))
...
}
- 最后是大對象
var s *mspan
shouldhelpgc = true
systemstack(func() {
// largeAlloc會調(diào)用mheap_.alloc,這個方法在小對象申請時已經(jīng)追蹤過
s = largeAlloc(size, needzero)
})
s.freeindex = 1
s.allocCount = 1
x = unsafe.Pointer(s.base())
size = s.elemsize
-
內(nèi)存分配的流程圖
5看峻、參考文獻
[1]. https://github.com/qyuhen
[2]. http://legendtkl.com/2017/04/02/golang-alloc/
[3]. https://tracymacding.gitbooks.io/implementation-of-golang/content/memory/memory_core_data_structure.html