1. 为什么fishhook 可以hook NSLog等系统库
其实就是 PIC 技术 ,Mach-O 文件的,一些内部的动态库的 方法,比如 NSLog 、UIView 等不会加载到内存中;只有在 dyld 加载 当前的Mach-o文件的时候 才会进行rebind; 主要是为了减少Mach-O的包的大小,也方便内存共享;
2. 为什么使用 二级指针来 保存 nslog的源函数地址
nslogBind.replaced = (void *)&old_nslog;
因为开始的时候 old_nslog 指向的是一个nil,那怎么样更改 old_nslog 地址,就需要一个二级指针;在后面就会有个赋值操作 *(cur->rebindings[j].replaced) = indirect_symbol_bindings[i];
这样就可以使得 old_nslog 函数指向 NSLog的源数组
- (void)viewDidLoad {
[super viewDidLoad];
// Do any additional setup after loading the view.
NSLog(@"123");
struct rebinding nslogBind;
//系统函数的名称
nslogBind.name = "NSLog";
//新的函数地址
nslogBind.replacement = myNSLog;
//保存原始函数地址的变量的指针
nslogBind.replaced = (void *)&old_nslog;
//定义数组
struct rebinding rebs[] = {nslogBind};
/*
arg1 : 存放rebinding结构体的数组
arg2 : 数组的长度
*/
rebind_symbols(rebs, 1);
}
//函数指针,用保存原始的函数的地址, 主要是为了 接收 NSLog的 真实的地址
static void (*old_nslog)(NSString *format, ...);
//新的NSLog
void myNSLog(NSString *format, ...){
format = [format stringByAppendingString:@"\n勾上了!"];
//再调用原来的
old_nslog(format);
}
- (void)touchesBegan:(NSSet<UITouch *> *)touches withEvent:(UIEvent *)event {
NSLog(@"点击了屏幕!!");
}
复制代码
3. Dynamic Symbol Table、 Symbol Table 、String Table
3.1 Dynamic Symbol Table
首先知道里面到达存了什么?
其实是一个 uint32_t *indirect_symbol_indices
uint32_t 类型的数组 ,内部存的 是 Symbol Table
的index;
3.2 Symbol Table
这个是一个完整的符号表
内存存的是 指向 nlist_64 结构体, nlist_64 *symtab
struct nlist_64 {
union {
uint32_t n_strx; /* index into the string table */
} n_un;
uint8_t n_type; /* type flag, see below */
uint8_t n_sect; /* section number or NO_SECT */
uint16_t n_desc; /* see <mach-o/stab.h> */
uint64_t n_value; /* value of this symbol (or stab offset) */
};
复制代码
内部有个比较重要的参数, 我们看到注释 index into the string table
,就是字符表的offset;uint32_t strtab_offset = symtab[symtab_index].n_un.n_strx;
union {
uint32_t n_strx; /* index into the string table */
} n_un
复制代码
4. 源码研读
#include "fishhook.h"
\
#include <dlfcn.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <mach-o/dyld.h>
#include <mach-o/loader.h>
#include <mach-o/nlist.h>
\
#ifdef __LP64__
typedef struct mach_header_64 mach_header_t;
typedef struct segment_command_64 segment_command_t;
typedef struct section_64 section_t;
typedef struct nlist_64 nlist_t;
#define LC_SEGMENT_ARCH_DEPENDENT LC_SEGMENT_64
#else
typedef struct mach_header mach_header_t;
typedef struct segment_command segment_command_t;
typedef struct section section_t;
typedef struct nlist nlist_t;
#define LC_SEGMENT_ARCH_DEPENDENT LC_SEGMENT
#endif
\
#ifndef SEG_DATA_CONST
#define SEG_DATA_CONST "__DATA_CONST"
#endif
\
struct rebindings_entry {
struct rebinding *rebindings;
size_t rebindings_nel;
struct rebindings_entry *next;
};
\
static struct rebindings_entry *_rebindings_head;
\
// 给需要rebinding的方法结构体开辟出对应的空间
// 生成对应的链表结构(rebindings_entry)
static int prepend_rebindings(struct rebindings_entry **rebindings_head,
struct rebinding rebindings[],
size_t nel) {
// 开辟一个rebindings_entry大小的空间
struct rebindings_entry *new_entry = (struct rebindings_entry *) malloc(sizeof(struct rebindings_entry));
if (!new_entry) {
return -1;
}
// 一共有nel个rebinding
new_entry->rebindings = (struct rebinding *) malloc(sizeof(struct rebinding) * nel);
if (!new_entry->rebindings) {
free(new_entry);
return -1;
}
// 将rebinding赋值给new_entry->rebindings
memcpy(new_entry->rebindings, rebindings, sizeof(struct rebinding) * nel);
// 继续赋值nel
new_entry->rebindings_nel = nel;
// 每次都将new_entry插入头部
new_entry->next = *rebindings_head;
// rebindings_head重新指向头部
*rebindings_head = new_entry;
return 0;
}
\
\
/**
获得 section 、 slide 、symtab 、strtab、indirect_symtab
*/
static void perform_rebinding_with_section(struct rebindings_entry *rebindings,
section_t *section,
intptr_t slide,
nlist_t *symtab,
char *strtab,
uint32_t *indirect_symtab) {
// section->reserved1 其实就是 Indiect Symbols的 Offset ,每个偏移是 4个字节;
// indirect_symbol_indices : 其实就是当前section 对应的 间接符号表的初始地址
uint32_t *indirect_symbol_indices = indirect_symtab + section->reserved1;
// _la_symbol_ptr _nl_symbol_ptr 的真实地址
// 其实就是下面String表对应的 方法符号的 执行地址
// 这个就是 PIC 技术 Mach-O 文件的,一些内部的动态库的 方法,比如 NSLog 、UIView 等不会加载到内存中;只有在 dyld 加载 当前的Mach-o文件的时候 才会进行rebind;
// 然后我们自己可以通过这个机制来修改 indirect_symbol_bindings 内部的调用
void **indirect_symbol_bindings = (void **)((uintptr_t)slide + section->addr);
// 遍历section中的每个符号
/**
section->size / sizeof(void *)
其实 就是 遍历
data _la_symbol_ptr
data _nl_symbol_ptr
指针列表
*/
for (uint i = 0; i < section->size / sizeof(void *); i++) {
// 根据 indirect_symbol 找到 symtab 里面的 index (Indiect Symbols 内部是一个数组,数组内部 就是 一个 4字节的 unsigned int类型数字) =》 这个数字 就是 真正符号表里面的序号
uint32_t symtab_index = indirect_symbol_indices[i];
if (symtab_index == INDIRECT_SYMBOL_ABS || symtab_index == INDIRECT_SYMBOL_LOCAL ||
symtab_index == (INDIRECT_SYMBOL_LOCAL | INDIRECT_SYMBOL_ABS)) {
continue;
}
// Symbols 也是一个数组 (内部是一个 8字节的 unsigned long 数字类型,并且前4个字节表示 String 表的 offset)
uint32_t strtab_offset = symtab[symtab_index].n_un.n_strx;
// 找到一个符号 比如 _NSLog\0;
// 然后通过 符号表的index 找到 string 表的 offset 【进行校验】
char *symbol_name = strtab + strtab_offset;
//
bool symbol_name_longer_than_1 = symbol_name[0] && symbol_name[1];
struct rebindings_entry *cur = rebindings;
// 已经存入的rebindings_entry
while (cur) {
// 循环每个entry中需要重绑定的函数
for (uint j = 0; j < cur->rebindings_nel; j++) {
// 判断symbol_name是否是一个正确的函数名
// 需要被重绑定的函数名是否与当前symbol_name相等
// symbol_name[1] 其实是忽略下划线_,比如 _NSLog\0;
if (symbol_name_longer_than_1 &&
strcmp(&symbol_name[1], cur->rebindings[j].name) == 0) {
// 判断replaced是否存在 (eg: replaced 就是 static void (*old_nslog)(NSString *format, ...))
// replacement
/*
void myNSLog(NSString *format, ...){
format = [format stringByAppendingString:@"\n勾上了!"];
old_nslog(format);
}
**/
\
// indirect_symbol_bindings[i] : (eg: nslog的真实的地址)
if (cur->rebindings[j].replaced != NULL &&
indirect_symbol_bindings[i] != cur->rebindings[j].replacement) {
// 将原函数的地址给新函数replaced
*(cur->rebindings[j].replaced) = indirect_symbol_bindings[i];
}
// 将replacement赋值给刚刚找到的 (这里让 indirect_symbol_bindings[i] 指向 自定义的 myNSLog)
indirect_symbol_bindings[i] = cur->rebindings[j].replacement;
goto symbol_loop;
}
}
// 继续下一个需要绑定的函数
cur = cur->next;
}
symbol_loop:;
}
}
\
static void rebind_symbols_for_image(struct rebindings_entry *rebindings,
const struct mach_header *header,
intptr_t slide) {
Dl_info info;
// 判断当前macho是否在进程里,如果不在则直接返回
if (dladdr(header, &info) == 0) {
return;
}
// image list -f -o | grep 'Test' 得到 0x0000000002f98000 ; slide 也是 0x0000000002f98000 一样的
// image list | grep 'Test' 得到 0x0000000102f98000 ; header也是 0x0000000102f98000 一样的
segment_command_t *cur_seg_cmd;
//其实 symtab(符号表总表) 和 dysymtab(需要rebind 的方法) 都是在 linkedit段里面的
segment_command_t *linkedit_segment = NULL;
struct symtab_command* symtab_cmd = NULL;
struct dysymtab_command* dysymtab_cmd = NULL;
// 跳过头部
uintptr_t cur = (uintptr_t)header + sizeof(mach_header_t);
// 遍历 段segment (每个segment的 大小是不一样的)
for (uint i = 0; i < header->ncmds; i++, cur += cur_seg_cmd->cmdsize) {
cur_seg_cmd = (segment_command_t *)cur;
// 如果是LC_SEGMENT_64
if (cur_seg_cmd->cmd == LC_SEGMENT_ARCH_DEPENDENT) {
if (strcmp(cur_seg_cmd->segname, SEG_LINKEDIT) == 0) {
linkedit_segment = cur_seg_cmd;
}
}
else if (cur_seg_cmd->cmd == LC_SYMTAB) {
symtab_cmd = (struct symtab_command*)cur_seg_cmd;
}
else if (cur_seg_cmd->cmd == LC_DYSYMTAB) {
dysymtab_cmd = (struct dysymtab_command*)cur_seg_cmd;
}
}
\
if (!symtab_cmd || !dysymtab_cmd || !linkedit_segment ||
!dysymtab_cmd->nindirectsyms) {
return;
}
/**********************************************************
slide 就是 ASLR(Address Space Layout Random),地址空间布局随机化
PAGEZERO(64位下0x1 00 00 00 00也就是 2^32次方 4G)
MachO 的虚拟地址首地址 = (Linkedit虚拟地址 - LinkeditFileOffset) + ASLR(slide)
**********************************************************/
// MachO 的虚拟地址首地址, 【其实 header的首地址就是MachO的首地址】
// linkedit_segment->vmaddr : 编译的虚拟地址
// 程序执行起来的虚拟地址: linkedit_segment->vmaddr + slide
uintptr_t linkedit_base = (uintptr_t)slide + linkedit_segment->vmaddr - linkedit_segment->fileoff;
// symbol_table 符号表的偏移(symtab_cmd->symoff)
nlist_t *symtab = (nlist_t *)(linkedit_base + symtab_cmd->symoff);
// string_table 字符串表的偏移(symtab_cmd->stroff)
char *strtab = (char *)(linkedit_base + symtab_cmd->stroff);
// 间接符号表的偏移(dysymtab_cmd->indirectsymoff)
uint32_t *indirect_symtab = (uint32_t *)(linkedit_base + dysymtab_cmd->indirectsymoff);
// 同样的,得到跳过mach_header的地址,得到Load Commons的地址
cur = (uintptr_t)header + sizeof(mach_header_t);
// 遍历Load Commons,找到对应符号进行重新绑定
for (uint i = 0; i < header->ncmds; i++, cur += cur_seg_cmd->cmdsize) {
cur_seg_cmd = (segment_command_t *)cur;
if (cur_seg_cmd->cmd == LC_SEGMENT_ARCH_DEPENDENT) {
// __DATA_CONST 和 __DATA segment 才能进入
if (strcmp(cur_seg_cmd->segname, SEG_DATA) != 0 &&
strcmp(cur_seg_cmd->segname, SEG_DATA_CONST) != 0) {
continue;
}
// 遍历所有的segment
for (uint j = 0; j < cur_seg_cmd->nsects; j++) {
section_t *sect =
(section_t *)(cur + sizeof(segment_command_t)) + j;
// 找懒加载表S_LAZY_SYMBOL_POINTERS
if ((sect->flags & SECTION_TYPE) == S_LAZY_SYMBOL_POINTERS) {
// 重绑定的真正函数
perform_rebinding_with_section(rebindings, sect, slide, symtab, strtab, indirect_symtab);
}
// 找非懒加载表S_NON_LAZY_SYMBOL_POINTERS
if ((sect->flags & SECTION_TYPE) == S_NON_LAZY_SYMBOL_POINTERS) {
// 重绑定的真正函数
perform_rebinding_with_section(rebindings, sect, slide, symtab, strtab, indirect_symtab);
}
}
}
}
}
\
static void _rebind_symbols_for_image(const struct mach_header *header,
intptr_t slide) {
// 找到对应的符号,进行重绑定
rebind_symbols_for_image(_rebindings_head, header, slide);
}
\
// 在知道确定的MachO,可以使用该方法
int rebind_symbols_image(void *header,
intptr_t slide,
struct rebinding rebindings[],
size_t rebindings_nel) {
struct rebindings_entry *rebindings_head = NULL;
int retval = prepend_rebindings(&rebindings_head, rebindings, rebindings_nel);
rebind_symbols_for_image(rebindings_head, (const struct mach_header *) header, slide);
if (rebindings_head) {
free(rebindings_head->rebindings);
}
free(rebindings_head);
return retval;
}
\
int rebind_symbols(struct rebinding rebindings[], size_t rebindings_nel) {
int retval = prepend_rebindings(&_rebindings_head, rebindings, rebindings_nel);
if (retval < 0) {
return retval;
}
// If this was the first call, register callback for image additions (which is also invoked for
// existing images, otherwise, just run on existing images
if (!_rebindings_head->next) {
// 向每个image注册_rebind_symbols_for_image函数,并且立即触发一次
_dyld_register_func_for_add_image(_rebind_symbols_for_image);
} else {
// _dyld_image_count() 获取image数量
uint32_t c = _dyld_image_count();
for (uint32_t i = 0; i < c; i++) {
// _dyld_get_image_header(i) 获取第i个image的header指针
// _dyld_get_image_vmaddr_slide(i) 获取第i个image的基址
_rebind_symbols_for_image(_dyld_get_image_header(i), _dyld_get_image_vmaddr_slide(i));
}
}
return retval;
}
复制代码
© 版权声明
文章版权归作者所有,未经允许请勿转载。
THE END