為了回收系統(tǒng)上的資源kubelet有ImageGC和ContainerGC等功能對image和container進行回收,下面就根據(jù)kubelet代碼對ContainerGC部分進行分析擅编。
相關的參數(shù)主要有:
- minimum-container-ttl-duration
- maximum-dead-containers-per-container
- minimum-container-ttl-duration
相對應的代碼是:
//pkg/kubelet/container_gc.go
type ContainerGCPolicy struct {
// 已經(jīng)死掉的容器在機器上存留的時間
MinAge time.Duration
// 每個pod可以保留的死掉的容器
MaxPerPodContainer int
// 機器上最大的可以保留的死亡容器數(shù)量
MaxContainers int
}
...
func NewContainerGC(runtime Runtime, policy ContainerGCPolicy, sourcesReadyProvider SourcesReadyProvider) (ContainerGC, error) {
if policy.MinAge < 0 {
return nil, fmt.Errorf("invalid minimum garbage collection age: %v", policy.MinAge)
}
return &realContainerGC{
runtime: runtime,
policy: policy,
sourcesReadyProvider: sourcesReadyProvider,
}, nil
}
func (cgc *realContainerGC) GarbageCollect() error {
return cgc.runtime.GarbageCollect(cgc.policy, cgc.sourcesReadyProvider.AllReady(), false)
}
func (cgc *realContainerGC) DeleteAllUnusedContainers() error {
return cgc.runtime.GarbageCollect(cgc.policy, cgc.sourcesReadyProvider.AllReady(), true)
}
GarbageCollect方法里面的調(diào)用就是ContainerGC真正的邏輯所在, GarbageCollect函數(shù)是在pkg/kubelet/kubelet.go
里面調(diào)用的茬故,每隔一分鐘會執(zhí)行一次。GarbageCollect里面所調(diào)用的runtime的GarbageCollect函數(shù)是在pkg/kubelet/kuberuntime/kuberuntime_gc.go
里面熬的。
//pkg/kubelet/kuberuntime/kuberuntime_gc.go
func (cgc *containerGC) GarbageCollect(gcPolicy kubecontainer.ContainerGCPolicy, allSourcesReady bool, evictNonDeletedPods bool) error {
// Remove evictable containers
if err := cgc.evictContainers(gcPolicy, allSourcesReady, evictNonDeletedPods); err != nil {
return err
}
// Remove sandboxes with zero containers
if err := cgc.evictSandboxes(evictNonDeletedPods); err != nil {
return err
}
// Remove pod sandbox log directory
return cgc.evictPodLogsDirectories(allSourcesReady)
}
第一步是驅逐容器
func (cgc *containerGC) evictContainers(gcPolicy kubecontainer.ContainerGCPolicy, allSourcesReady bool, evictNonDeletedPods bool) error {
// Separate containers by evict units.
evictUnits, err := cgc.evictableContainers(gcPolicy.MinAge)
if err != nil {
return err
}
// Remove deleted pod containers if all sources are ready.
if allSourcesReady {
for key, unit := range evictUnits {
if cgc.isPodDeleted(key.uid) || evictNonDeletedPods {
cgc.removeOldestN(unit, len(unit)) // Remove all.
delete(evictUnits, key)
}
}
}
// Enforce max containers per evict unit.
if gcPolicy.MaxPerPodContainer >= 0 {
cgc.enforceMaxContainersPerEvictUnit(evictUnits, gcPolicy.MaxPerPodContainer)
}
// Enforce max total number of containers.
if gcPolicy.MaxContainers >= 0 && evictUnits.NumContainers() > gcPolicy.MaxContainers {
// Leave an equal number of containers per evict unit (min: 1).
numContainersPerEvictUnit := gcPolicy.MaxContainers / evictUnits.NumEvictUnits()
if numContainersPerEvictUnit < 1 {
numContainersPerEvictUnit = 1
}
cgc.enforceMaxContainersPerEvictUnit(evictUnits, numContainersPerEvictUnit)
// If we still need to evict, evict oldest first.
numContainers := evictUnits.NumContainers()
if numContainers > gcPolicy.MaxContainers {
flattened := make([]containerGCInfo, 0, numContainers)
for key := range evictUnits {
flattened = append(flattened, evictUnits[key]...)
}
sort.Sort(byCreated(flattened))
cgc.removeOldestN(flattened, numContainers-gcPolicy.MaxContainers)
}
}
return nil
}
1.首先獲取已經(jīng)死掉的并且創(chuàng)建時間大于minage的容器
2.如果pod已經(jīng)delete那么把屬于這個pod的容器全部刪除
3.如果設置了MaxPerPodContainer那么把MaxPerPodContainer之外數(shù)量的容器刪除,這個值默認是1
4.如果設置MaxContainers那么再次對容器進行清理咆槽,這個值默認是-1也就是不清理的派诬。首先會拿所有容器的數(shù)量除以pod的數(shù)量劳淆,這樣會得到一個平均值,然后按照這個值對3進行再次處理默赂。這個時候如果機器上的死亡容器的數(shù)量還大于MaxContainer那么直接按照時間對容器進行排序然后刪除大于MaxContainer數(shù)量之外的容器沛鸵。
下一步是清理機器上的sandbox
func (cgc *containerGC) evictSandboxes(evictNonDeletedPods bool) error {
containers, err := cgc.manager.getKubeletContainers(true)
if err != nil {
return err
}
sandboxes, err := cgc.manager.getKubeletSandboxes(true)
if err != nil {
return err
}
sandboxesByPod := make(sandboxesByPodUID)
for _, sandbox := range sandboxes {
podUID := types.UID(sandbox.Metadata.Uid)
sandboxInfo := sandboxGCInfo{
id: sandbox.Id,
createTime: time.Unix(0, sandbox.CreatedAt),
}
// Set ready sandboxes to be active.
if sandbox.State == runtimeapi.PodSandboxState_SANDBOX_READY {
sandboxInfo.active = true
}
// Set sandboxes that still have containers to be active.
hasContainers := false
sandboxID := sandbox.Id
for _, container := range containers {
if container.PodSandboxId == sandboxID {
hasContainers = true
break
}
}
if hasContainers {
sandboxInfo.active = true
}
sandboxesByPod[podUID] = append(sandboxesByPod[podUID], sandboxInfo)
}
// Sort the sandboxes by age.
for uid := range sandboxesByPod {
sort.Sort(sandboxByCreated(sandboxesByPod[uid]))
}
for podUID, sandboxes := range sandboxesByPod {
if cgc.isPodDeleted(podUID) || evictNonDeletedPods {
// Remove all evictable sandboxes if the pod has been removed.
// Note that the latest dead sandbox is also removed if there is
// already an active one.
cgc.removeOldestNSandboxes(sandboxes, len(sandboxes))
} else {
// Keep latest one if the pod still exists.
cgc.removeOldestNSandboxes(sandboxes, len(sandboxes)-1)
}
}
return nil
}
先獲取機器上所有的容器和sandbox,如果pod的狀態(tài)是0則致為active狀態(tài),如果此sandbox還有運行的container也認為是active狀態(tài)缆八,接著對sandbox進行排序曲掰,如果sandbox所屬的pod的已經(jīng)被刪除那么刪除所有的sandbox,如果pod還存在那么就留下最新的一個sandbox其他的都刪除.
最后一步是清除container和pod日志
// evictPodLogsDirectories evicts all evictable pod logs directories. Pod logs directories
// are evictable if there are no corresponding pods.
func (cgc *containerGC) evictPodLogsDirectories(allSourcesReady bool) error {
osInterface := cgc.manager.osInterface
if allSourcesReady {
// Only remove pod logs directories when all sources are ready.
dirs, err := osInterface.ReadDir(podLogsRootDirectory)
if err != nil {
return fmt.Errorf("failed to read podLogsRootDirectory %q: %v", podLogsRootDirectory, err)
}
for _, dir := range dirs {
name := dir.Name()
podUID := types.UID(name)
if !cgc.isPodDeleted(podUID) {
continue
}
err := osInterface.RemoveAll(filepath.Join(podLogsRootDirectory, name))
if err != nil {
glog.Errorf("Failed to remove pod logs directory %q: %v", name, err)
}
}
}
// Remove dead container log symlinks.
// TODO(random-liu): Remove this after cluster logging supports CRI container log path.
logSymlinks, _ := osInterface.Glob(filepath.Join(legacyContainerLogsDir, fmt.Sprintf("*.%s", legacyLogSuffix)))
for _, logSymlink := range logSymlinks {
if _, err := osInterface.Stat(logSymlink); os.IsNotExist(err) {
err := osInterface.Remove(logSymlink)
if err != nil {
glog.Errorf("Failed to remove container log dead symlink %q: %v", logSymlink, err)
}
}
}
return nil
}
首先會讀取/var/log/pods
目錄下面的子目錄奈辰,下面的目錄名稱都是pod的uid栏妖,如果pod已經(jīng)刪除那么直接把pod所屬的目錄刪除,然后刪除/var/log/containers
目錄下的軟連接奖恰。
至此單次ContainerGC的流程結束吊趾。