這是Mach-O系列的第三篇
閱讀 FishHook
源碼之前,你可能需要對(duì)以下知識(shí)有個(gè)簡(jiǎn)單的了解
- Mach-O文件格式:趣探 Mach-O:文件格式分析
- 動(dòng)態(tài)鏈接相關(guān)知識(shí):Mach-O 的動(dòng)態(tài)鏈接過(guò)程 娱节、 趣探 Mach-O:加載過(guò)程
- 對(duì)操作系統(tǒng)、編譯原理的理解:深入解析Mac OS X & iOS操作系統(tǒng) 跟束、 程序員的自我修養(yǎng)
本文的闡述順序按照函數(shù)調(diào)用過(guò)程來(lái)進(jìn)行
Fishhook 可以做什么
在此借用阿里百川的一張分析圖勇皇,可以比較清晰的了解FishHook
發(fā)揮了哪些作用
FishHook
在這里是對(duì)動(dòng)態(tài)鏈接庫(kù)起作用佛呻,修改對(duì)應(yīng)的函數(shù)實(shí)現(xiàn)
對(duì)于動(dòng)態(tài)鏈接庫(kù)里面的C
函數(shù),第一次調(diào)用的時(shí)候跺撼,我們會(huì)得到函數(shù)和實(shí)現(xiàn)地址的對(duì)應(yīng)關(guān)系窟感,函數(shù)的實(shí)現(xiàn)地址存放在一個(gè)叫la_symbol_ptr
的地方,第二次調(diào)用的時(shí)候歉井,直接通過(guò)la_symbol_ptr
找到函數(shù)地址就可以柿祈,不再需要繁瑣的獲取函數(shù)地址的過(guò)程。(具體通過(guò)哪些過(guò)程哩至,可以參考剛才的鏈接:Mach-O 的動(dòng)態(tài)鏈接過(guò)程)
那么躏嚎,上圖的含義就很明了了
在程序運(yùn)行時(shí),動(dòng)態(tài)鏈接的 C 函數(shù)
dynamic(...)
地址記錄在DATA segment
下的la_symbol_ptr
中菩貌;初始時(shí)卢佣,程序只知道dynamic
函數(shù)的符號(hào)名而不知道函數(shù)的實(shí)現(xiàn)地址;首次調(diào)用時(shí)箭阶,程序通過(guò)TEXT segment
中的stub_helper
取得綁定信息虚茶,通過(guò)dyld_stub_binder
來(lái)更新la_symbol_ptr
中的符號(hào)實(shí)現(xiàn)地址;這樣尾膊,再次調(diào)用時(shí)媳危,就可以通過(guò)la_symbol_ptr
直接找到dynamic
函數(shù)的實(shí)現(xiàn)荞彼;如果我們需要替換dynamic
函數(shù)的實(shí)現(xiàn)冈敛,只需要修改__la_symbol_ptr
即可,也就是我們要談的Fishhook
Fishhook 的實(shí)現(xiàn)
通過(guò)fishhook的官方文檔可以知道鸣皂,Fishhook
的使用方法大致如下:
static int (*original_open)(const char *, int, ...);
int new_open(const char *path, int oflag, ...) {
va_list ap = {0};
mode_t mode = 0;
if ((oflag & O_CREAT) != 0) {
// mode only applies to O_CREAT
va_start(ap, oflag);
mode = va_arg(ap, int);
va_end(ap);
printf("Calling real open('%s', %d, %d)\n", path, oflag, mode);
return original_open(path, oflag, mode);
} else {
printf("Calling real open('%s', %d)\n", path, oflag);
return original_open(path, oflag, mode);
}
}
int main(int argc, const char * argv[]) {
@autoreleasepool {
struct rebinding open_rebinding = { "open", new_open, (void *)&original_open };
rebind_symbols((struct rebinding[1]){open_rebinding}, 1);
__unused int fd = open(argv[0], O_RDONLY);
}
return 0;
}
先從函數(shù)的入口抓谴,rebind_symbols
開始談起吧,rebind_symbols
主要是使用_dyld_register_func_for_add_image
來(lái)注冊(cè)回調(diào)函數(shù)寞缝,在加載動(dòng)態(tài)庫(kù)的時(shí)候執(zhí)行一些操作
int rebind_symbols(struct rebinding rebindings[], size_t rebindings_nel) {
// 調(diào)用 prepend_rebindings 的函數(shù)癌压,將整個(gè) rebindings 數(shù)組添加到 _rebindings_head 這個(gè)私有鏈表的頭部
int retval = prepend_rebindings(&_rebindings_head, rebindings, rebindings_nel);
if (retval < 0) {
return retval;
}
// 判斷 _rebindings_head->next 的值來(lái)判斷是否為第一次調(diào)用
// If this was the first call, register callback for image additions (which is also invoked for
// existing images, otherwise, just run on existing images
if (!_rebindings_head->next) {
_dyld_register_func_for_add_image(_rebind_symbols_for_image);
} else {
uint32_t c = _dyld_image_count();
for (uint32_t i = 0; i < c; i++) {
_rebind_symbols_for_image(_dyld_get_image_header(i), _dyld_get_image_vmaddr_slide(i));
}
}
return retval;
}
對(duì)于prepend_rebindings
的代碼如下
// 鏈表的數(shù)組結(jié)構(gòu)
struct rebindings_entry {
struct rebinding *rebindings;
size_t rebindings_nel;
struct rebindings_entry *next;
};
static struct rebindings_entry *_rebindings_head;
static int prepend_rebindings(struct rebindings_entry **rebindings_head,
struct rebinding rebindings[],
size_t nel) {
struct rebindings_entry *new_entry = malloc(sizeof(struct rebindings_entry));
if (!new_entry) {
return -1;
}
new_entry->rebindings = malloc(sizeof(struct rebinding) * nel);
if (!new_entry->rebindings) {
free(new_entry);
return -1;
}
// 將 rebindings 插入到鏈表頭部
memcpy(new_entry->rebindings, rebindings, sizeof(struct rebinding) * nel);
new_entry->rebindings_nel = nel;
new_entry->next = *rebindings_head;
*rebindings_head = new_entry;
return 0;
}
基礎(chǔ)結(jié)構(gòu)解釋
Dl_info
/*
* Structure filled in by dladdr().
*/
typedef struct dl_info {
const char *dli_fname; /* Pathname of shared object */
void *dli_fbase; /* Base address of shared object */
const char *dli_sname; /* Name of nearest symbol */
void *dli_saddr; /* Address of nearest symbol */
} Dl_info;
我們一會(huì)經(jīng)過(guò) dladdr()
處理后的有效信息都會(huì)放進(jìn)這個(gè)結(jié)構(gòu)體中
-
fname:
路徑名,例如
/Applications/Xcode.app/Contents/Developer/Platforms/iPhoneSimulator.platform/Developer/SDKs/iPhoneSimulator.sdk/System/Library/Frameworks/CoreFoundation.framework/CoreFoundation
-
dli_fbase:
鏡像的的起始地址(Base address of shared object荆陆,比如上面的 CoreFoundation) -
dli_saddr :
符號(hào)的地址 -
dli_sname:
符號(hào)的名字滩届,即下面的第四列的函數(shù)信息
Thread 0:
0 libsystem_kernel.dylib 0x11135810a __semwait_signal + 94474
1 libsystem_c.dylib 0x1110dab0b sleep + 518923
2 QYPerformanceMonitor 0x10dda4f1b -[ViewController tableView:cellForRowAtIndexPath:] + 7963
3 UIKit 0x10ed4d4f4 -[UITableView _createPreparedCellForGlobalRow:withIndexPath:willDisplay:] + 1586420
LC_SYMTAB
struct symtab_command {
uint32_t cmd; /* LC_SYMTAB */
uint32_t cmdsize; /* sizeof(struct symtab_command) */
uint32_t symoff; /* symbol table offset */
uint32_t nsyms; /* number of symbol table entries */
uint32_t stroff; /* string table offset */
uint32_t strsize; /* string table size in bytes */
};
主要是提供符號(hào)表的偏移量,以及元素個(gè)數(shù)被啼,還有字符串表的偏移和其長(zhǎng)度帜消。符號(hào)表在 Mach-O
目標(biāo)文件中的地址可以通過(guò)LC_SYMTAB
加載命令指定的 symoff
找到,對(duì)應(yīng)的符號(hào)名稱在stroff
浓体,總共有nsyms
條符號(hào)信息
LC_DYSYMTAB
這個(gè)數(shù)組結(jié)構(gòu)有些復(fù)雜泡挺,有興趣的可以閱讀loader.h
文件,內(nèi)部標(biāo)示了動(dòng)態(tài)符號(hào)表的偏移量和符號(hào)個(gè)數(shù)
struct dysymtab_command {
uint32_t cmd; /* LC_DYSYMTAB */
uint32_t cmdsize; /* sizeof(struct dysymtab_command) */
uint32_t indirectsymoff; /* file offset to the indirect symbol table */
uint32_t nindirectsyms; /* number of indirect symbol table entries */
.......
_rebind_symbols_for_image
對(duì)于關(guān)鍵的代碼 _rebind_symbols_for_image
如下
static void rebind_symbols_for_image(struct rebindings_entry *rebindings,
const struct mach_header *header,
intptr_t slide) {
Dl_info info;
if (dladdr(header, &info) == 0) {
return;
}
// segment_command_64
segment_command_t *cur_seg_cmd;
segment_command_t *linkedit_segment = NULL;
// LC_SYMTAB
struct symtab_command* symtab_cmd = NULL;
// LC_DYSYMTAB
struct dysymtab_command* dysymtab_cmd = NULL;
// 下面是要尋找load_command命浴,所以越過(guò)mach_header_t
uintptr_t cur = (uintptr_t)header + sizeof(mach_header_t);
for (uint i = 0; i < header->ncmds; i++, cur += cur_seg_cmd->cmdsize) {
cur_seg_cmd = (segment_command_t *)cur;
if (cur_seg_cmd->cmd == LC_SEGMENT_ARCH_DEPENDENT) {
if (strcmp(cur_seg_cmd->segname, SEG_LINKEDIT) == 0) {
//遍歷尋找__LINKEDIT
linkedit_segment = cur_seg_cmd;
}
} else if (cur_seg_cmd->cmd == LC_SYMTAB) {
//遍歷尋找lc_symtab
symtab_cmd = (struct symtab_command*)cur_seg_cmd;
} else if (cur_seg_cmd->cmd == LC_DYSYMTAB) {
//遍歷尋找lc_dysymtab
dysymtab_cmd = (struct dysymtab_command*)cur_seg_cmd;
}
}
為什么要尋找這個(gè)幾個(gè)LoadCommand
的信息呢娄猫?就如上面介紹的__LINKEDIT
贱除、LC_DYSYMTAB
、LC_SYMTAB
都提供了重要的信息媳溺。
__LINKEDIT段 含有為動(dòng)態(tài)鏈接庫(kù)使用的原始數(shù)據(jù)月幌,比如符號(hào),字符串褂删,重定位表?xiàng)l目等等
閱讀下面的代碼之前飞醉,先來(lái)看一個(gè)計(jì)算公式
鏈接時(shí)程序的基址 = __LINKEDIT.VM_Address
-__LINKEDIT.File_Offset
+ silde
的改變值
這里出現(xiàn)了一個(gè) slide
,那么slide
是啥呢屯阀?先看一下ASLR
ASLR:Address space layout randomization
缅帘,將可執(zhí)行程序隨機(jī)裝載到內(nèi)存中,這里的隨機(jī)只是偏移,而不是打亂难衰,具體做法就是通過(guò)內(nèi)核將 Mach-O
的段“平移”某個(gè)隨機(jī)系數(shù)钦无。slide
正是ASLR
引入的偏移
也就是說(shuō)程序的基址等于__LINKEDIT
的地址減去偏移量,然后再加上ASLR
造成的偏移
// 鏈接時(shí)程序的基址
uintptr_t linkedit_base = (uintptr_t)slide + linkedit_segment->vmaddr - linkedit_segment->fileoff;
// 符號(hào)表的地址 = 基址 + 符號(hào)表偏移量
nlist_t *symtab = (nlist_t *)(linkedit_base + symtab_cmd->symoff);
// 字符串表的地址 = 基址 + 字符串表偏移量
char *strtab = (char *)(linkedit_base + symtab_cmd->stroff);
// 動(dòng)態(tài)符號(hào)表地址 = 基址 + 動(dòng)態(tài)符號(hào)表偏移量
uint32_t *indirect_symtab = (uint32_t *)(linkedit_base + dysymtab_cmd->indirectsymoff);
符號(hào)表中的元素都是nlist_t
結(jié)構(gòu)體,nlist_t
中有很多學(xué)問盖袭,這里先看一下他的基礎(chǔ)結(jié)構(gòu)
/*
* This is the symbol table entry structure for 32-bit architectures.
*/
struct nlist {
union {
uint32_t n_strx; /* index into the string table */
} n_un;
uint8_t n_type; /* type flag, see below */
uint8_t n_sect; /* section number or NO_SECT */
int16_t n_desc; /* see <mach-o/stab.h> */
uint32_t n_value; /* value of this symbol (or stab offset) */
};
然后再次遍歷loadcommands
失暂,尋找__DATA
和__DATA_CONST
的section
,并對(duì)對(duì)__nl_symbol_ptr
以及__la_symbol_ptr
進(jìn)行rebind
cur = (uintptr_t)header + sizeof(mach_header_t);
for (uint i = 0; i < header->ncmds; i++, cur += cur_seg_cmd->cmdsize) {
cur_seg_cmd = (segment_command_t *)cur;
if (cur_seg_cmd->cmd == LC_SEGMENT_ARCH_DEPENDENT) {
if (strcmp(cur_seg_cmd->segname, SEG_DATA) != 0 &&
strcmp(cur_seg_cmd->segname, SEG_DATA_CONST) != 0) {
continue;
}
//找到__DATA和__DATA_CONST的section鳄虱,對(duì)__nl_symbol_ptr以及__la_symbol_ptr進(jìn)行rebind
for (uint j = 0; j < cur_seg_cmd->nsects; j++) {
section_t *sect =
(section_t *)(cur + sizeof(segment_command_t)) + j;
if ((sect->flags & SECTION_TYPE) == S_LAZY_SYMBOL_POINTERS) {
// sect為Section弟塞,symtab為符號(hào)表,strtab字符串表拙已,indirect_symtab動(dòng)態(tài)符號(hào)表(indirect symbol table)
perform_rebinding_with_section(rebindings, sect, slide, symtab, strtab, indirect_symtab);
}
if ((sect->flags & SECTION_TYPE) == S_NON_LAZY_SYMBOL_POINTERS) {
perform_rebinding_with_section(rebindings, sect, slide, symtab, strtab, indirect_symtab);
}
}
}
perform_rebinding_with_section
nl_symbol_ptr
和la_symbol_ptr
section中的reserved1
字段指明對(duì)應(yīng)的indirect symbol table
起始的index
决记,
For the two relevant sections, the section headers (struct sections from <mach-o/loader.h>) provide an offset (in the reserved1 field) into what is known as the indirect symbol table. The indirect symbol table, which is located in the __LINKEDIT segment of the binary, is just an array of indexes into the symbol table (also in __LINKEDIT) whose order is identical to that of the pointers in the non-lazy and lazy symbol sections
So, given struct section nl_symbol_ptr, the corresponding index in the symbol table of the first address in that section is indirect_symbol_table[nl_symbol_ptr->reserved1]. The symbol table itself is an array of struct nlists (see <mach-o/nlist.h>), and each nlist contains an index into the string table in __LINKEDIT which where the actual symbol names are stored. So, for each pointer __nl_symbol_ptr and __la_symbol_ptr, we are able to find the corresponding symbol and then the corresponding string to compare against the requested symbol names, and if there is a match, we replace the pointer in the section with the replacement.
結(jié)合英文,看下面的代碼就很容易理解
// sect為Section倍踪,symtab為符號(hào)表系宫,strtab字符串表,indirect_symtab動(dòng)態(tài)符號(hào)表(indirect symbol table)
static void perform_rebinding_with_section(struct rebindings_entry *rebindings,
section_t *section,
intptr_t slide,
nlist_t *symtab,
char *strtab,
uint32_t *indirect_symtab) {
// `nl_symbol_ptr`和`la_symbol_ptr`section中的`reserved1`字段指明對(duì)應(yīng)的`indirect symbol table`起始的index
//動(dòng)態(tài)符號(hào)表中第一個(gè)解析的符號(hào)的起始地址
uint32_t *indirect_symbol_indices = indirect_symtab + section->reserved1;
void **indirect_symbol_bindings = (void **)((uintptr_t)slide + section->addr);
for (uint i = 0; i < section->size / sizeof(void *); i++) {
// 符號(hào)表的index
uint32_t symtab_index = indirect_symbol_indices[i];
if (symtab_index == INDIRECT_SYMBOL_ABS || symtab_index == INDIRECT_SYMBOL_LOCAL ||
symtab_index == (INDIRECT_SYMBOL_LOCAL | INDIRECT_SYMBOL_ABS)) {
continue;
}
//獲取每一個(gè)需要?jiǎng)討B(tài)解析的符號(hào)在符號(hào)表中的偏移量
uint32_t strtab_offset = symtab[symtab_index].n_un.n_strx;
//通過(guò)字符串表偏移量獲取符號(hào)對(duì)應(yīng)的字符串(符號(hào)的名字)
char *symbol_name = strtab + strtab_offset;
上面的代碼其實(shí)就可以用官方的一個(gè)圖片很直觀的表示
走到這里是找到了字符串表對(duì)應(yīng)的符號(hào)(字符串)
如何替換實(shí)現(xiàn)
遍歷 rebindings
數(shù)組建车,符號(hào)進(jìn)行比較扩借,相同的符號(hào)就進(jìn)行實(shí)現(xiàn)替換,這里的代碼比較清晰缤至,直接貼出
struct rebindings_entry *cur = rebindings;
while (cur) {
for (uint j = 0; j < cur->rebindings_nel; j++) {
if (strcmp(&symbol_name[1], cur->rebindings[j].name) == 0) {
if (cur->rebindings[j].replaced != NULL &&
indirect_symbol_bindings[i] != cur->rebindings[j].replacement) {
*(cur->rebindings[j].replaced) = indirect_symbol_bindings[i];
}
indirect_symbol_bindings[i] = cur->rebindings[j].replacement;
goto symbol_loop;
}
}
cur = cur->next;
}
symbol_loop:;
}
參考鏈接
- 動(dòng)態(tài)修改 C 語(yǔ)言函數(shù)的實(shí)現(xiàn)
- mrh的Fihshook源碼分析
- fishhook
- 深入解析Mac OS X & iOS操作系統(tǒng)
- 程序員的自我修養(yǎng)
- 編譯體系漫游