一、大概過程
seL4內(nèi)核是遵循multiboot specification的,可以直接通過GRUB引導(dǎo)畜隶。在GRUB引導(dǎo)完成后,會(huì)把CPU控制權(quán)跳轉(zhuǎn)到kernel入口号胚;再由內(nèi)核初始化自己的棧籽慢、GDT、IDT猫胁、Page Table等等箱亿;最后跳轉(zhuǎn)到內(nèi)核C入口。本文以x86架構(gòu)64位的seL4內(nèi)核為參考對(duì)象弃秆,進(jìn)行分析届惋。
二、啟動(dòng)流程
1. 內(nèi)核匯編32位入口_start
- _start入口作為內(nèi)核匯編32位代碼的入口菠赚,在GRUB將內(nèi)核程序搬移到內(nèi)存后脑豹,最終會(huì)跳轉(zhuǎn)到這一個(gè)入口,GRUB的任務(wù)就完成了衡查。
- 這個(gè)入口加載啟動(dòng)時(shí)候的棧到esp,清除eflags,并將GRUB獲取的硬件信息(eax,multiboot_magic; ebx, multiboot_info_ptr)壓棧瘩欺,再調(diào)用common_init。
- 調(diào)用common_init接口初始化頁(yè)表結(jié)構(gòu)拌牲、64位執(zhí)行環(huán)境俱饿。最后通過ljmp指令跳轉(zhuǎn)到64位指令入口。
BEGIN_FUNC(_start)
/* Assume we are MultiBooted, e.g. by GRUB.
* While not immediately checked, the magic number is checked prior to
* Multiboot dependent operations. */
movl %eax, %edi /* multiboot_magic */
movl %ebx, %esi /* multiboot_info_ptr */
/* Load kernel boot stack pointer. */
leal boot_stack_top, %esp
/* Reset EFLAGS register (also disables interrupts etc.). */
pushl $0
popf
/* Already push parameters for calling boot_sys later. Push
* them as 8 byte values so we can easily pop later. */
pushl $0
pushl %esi /* 2nd parameter: multiboot_info_ptr */
pushl $0
pushl %edi /* 1st parameter: multiboot_magic */
call common_init
/* Reload CS with long bit to enable long mode. */
ljmp $8, $_start64
END_FUNC(_start)
1.1 common_init接口
這個(gè)接口主要是調(diào)用子接口塌忽,就不再分析了拍埠。
BEGIN_FUNC(common_init)
/* Disable paging. */
movl %cr0, %eax
andl $0x7fffffff, %eax
movl %eax, %cr0
#ifdef CONFIG_FSGSBASE_INST
call fsgsbase_enable
#endif /* CONFIG_FSGSBASE_INST */
/* Initialize boot PML4 and switch to long mode. */
call setup_pml4
call enable_x64_mode
lgdt _gdt64_ptr
#ifdef CONFIG_SYSCALL
call syscall_enable
#endif
ret
END_FUNC(common_init)
1.2 setup_pml4接口分析
- 這個(gè)接口作為初始化過程中最重要的接口,負(fù)責(zé)初始化系統(tǒng)一開始的頁(yè)表結(jié)構(gòu)土居,必須要仔細(xì)分析枣购。
- 如果頁(yè)表結(jié)構(gòu)初始化失敗,那么在訪問未映射的地址装盯,會(huì)觸發(fā)PF(page fault)坷虑;而在系統(tǒng)初始化初期,并未安裝對(duì)應(yīng)的處理函數(shù)埂奈,又會(huì)觸發(fā)DF(double fault)迄损;然而這時(shí)候還是沒有對(duì)于的處理函數(shù),則會(huì)出發(fā)TF(triple fault)账磺。triple fault是很嚴(yán)重的系統(tǒng)錯(cuò)誤芹敌,會(huì)使得CPU進(jìn)入SHUT DOWN狀態(tài)痊远,然后硬件電路強(qiáng)制使整個(gè)硬件系統(tǒng)重啟。
1.3 映射結(jié)構(gòu)分析
我對(duì)于源碼做了相應(yīng)備注氏捞,直接粘貼出來碧聪,如下:
BEGIN_FUNC(setup_pml4)
#ifdef CONFIG_HUGE_PAGE
call huge_page_check
#endif /* CONFIG_HUGE_PAGE */
movl %cr0, %eax /* disable paging */
andl $0x7fffffff, %eax
movl %eax, %cr0
movl $boot_pml4, %edi /* get pml4 base addr */
movl $0x0, %edx
movl $1024, %ecx
1:
movl %edx, (%edi) /* this loop clear plm4 */
addl $4, %edi
loop 1b
movl $boot_pdpt, %edi
movl $1024, %ecx
1:
movl %edx, (%edi) /* this loop clear pdpt */
addl $4, %edi
loop 1b
movl $boot_pml4, %edi /* thoes line, to fill plm4 (512 x 8B = 4KB) */
movl $boot_pdpt, %ecx
orl $0x7, %ecx /* aligned by 8 bytes */
movl %ecx, (%edi) /* index: 0x0 -> pdpt:0 */
movl %ecx, 0x800(%edi) /* index: 0x100(256) -> pdpt:0 */
movl %ecx, 4088(%edi) /* index: 0x1ff(511) -> pdpt:0 */
movl $_boot_pd, %ecx /* thoes line, to fill pdpt (512 x 8B = 4KB) */
orl $0x7, %ecx /* aligned by 8 bytes */
movl $boot_pdpt, %edi
movl %ecx, (%edi) /* index: 0x0 -> pd:0 */
movl %ecx, 4080(%edi) /* index: 0x1fe(510) -> pd:0 */
addl $0x1000, %ecx
movl %ecx, 8(%edi) /* index: 0x1 -> pd:0x200(512) */
addl $0x1000, %ecx
movl %ecx, 16(%edi) /* index: 0x2 -> pd:0x400(1024) */
addl $0x1000, %ecx
movl %ecx, 24(%edi) /* index: 0x3 -> pd:0x800(1024 + 512) */
/* Map first 4GiB into the _boot_pd. */
movl $_boot_pd, %edi /* thoes line, to fill pd (2048 x 8B = 16KB) */
movl $2048, %ecx /* 2048 * 2M = 4G, define loop times use %ecx */
movl $0x87, %edx /* page attribute, 0x87(Present, Write, User) */
2:
movl %edx, (%edi) /* index: 0,1,2,... till 4GB map finish */
addl $0x200000, %edx /* physic addr, 1-to-1 mapping of first 4GB */
addl $8, %edi
loop 2b
ret
END_FUNC(setup_pml4)
目前做了第一版手繪的頁(yè)表模型(后期改為電子版),對(duì)應(yīng)的抽象模型如下:
setup_plm4.jpg
1.3.1 說明
- PD 為4個(gè)表組成液茎,對(duì)應(yīng)了0~1G, 1~2G, 2~3G, 3~4G的物理內(nèi)存地址逞姿,占用2K * 8B = 16KB空間。
-
PDPT 為1個(gè)表組成捆等,下標(biāo)
0-3
的分別指向PD_0, PD_1, PD_2, PD_3滞造,倒數(shù)第二個(gè)表項(xiàng)510
(0x1fe)指向PD_0, 其他項(xiàng)均被初始化為0(空),占用 512 * 8B = 4KB空間栋烤。 -
PML4 為1個(gè)表組成谒养,下標(biāo)為
0
,256
(0x100)明郭,511
(0x1ff, the last)均指向 PDPT买窟,占用 512 * 8B = 4KB空間。
1.4 enable_x64_mode接口分析
主要流程為如下:
- cr3 = pml4, 加載頂層頁(yè)表基地址薯定;
- cr4[5] = 1始绍,開啟PAE;
- IA32_EFER_MSR[8] = 1, 開啟Long Mode 使能位;
- cr0[31] = 1, 開啟分頁(yè)功能沉唠;
再調(diào)用本接口后疆虚,執(zhí)行l(wèi)jmp加載64位代碼段的CS, OFFSET苛败。CPU便進(jìn)入到64位模式满葛。
BEGIN_FUNC(enable_x64_mode)
#ifdef CONFIG_SUPPORT_PCID
call pcid_check
call invpcid_check
#endif
/* Put base pointer in cr3. */
movl $boot_pml4, %eax
movl %eax, %cr3
/* Set PAE (bit 5), as this is required before switching to long mode. */
movl %cr4, %eax
orl $0x20, %eax
movl %eax, %cr4
/* Set LME (bit 8) in the extended feature MSR. */
movl $IA32_EFER_MSR, %ecx
rdmsr
orl $0x100, %eax
wrmsr
/* Set PG (bit 31) of cr0 to enable paging. */
movl %cr0, %eax
orl $0x80000000, %eax
movl %eax, %cr0
#ifdef CONFIG_SUPPORT_PCID
/* Enable PCID (bit 17), must be done in long mode. */
movl %cr4, %eax
orl $0x20000, %eax
movl %eax, %cr4
#endif
ret
END_FUNC(enable_x64_mode)
2. 內(nèi)核匯編64位入口_start64
該段程序主要加載使CPU跳轉(zhuǎn)到內(nèi)核高端虛擬地址_entry_64,執(zhí)行內(nèi)核程序罢屈。
.align 4096
BEGIN_FUNC(_start64)
/* Leave phys code behind and jump to the high kernel virtual address. */
movabs $_entry_64, %rax
jmp *%rax
END_FUNC(_start64)
3. 內(nèi)核匯編64位入口_entry_64
該接口主要將內(nèi)核棧更新到rsp嘀韧,并將GRUB傳遞的兩個(gè)參數(shù)Pop到rdi, rsi(64位C程序,從左往右第一缠捌、二個(gè)參數(shù))锄贷,然后壓入restore_user_context接口(具體功能暫時(shí)未研究)地址,作為boot_sys的返回地址曼月。最后跳轉(zhuǎn)到64位的內(nèi)核C接口boot_sys執(zhí)行系統(tǒng)初始化的詳細(xì)工作谊却。
BEGIN_FUNC(_entry_64)
/* Update our stack pointer. */
movq $0xffffffff80000000, %rax
addq %rax, %rsp
addq %rax, %rbp
/* Pop the multiboot parameters off. */
pop %rdi
pop %rsi
/* Load our real kernel stack. */
leaq kernel_stack_alloc + (1 << CONFIG_KERNEL_STACK_BITS), %rsp
movabs $restore_user_context, %rax
push %rax
jmp boot_sys
END_FUNC(_entry_64)