相关文章链接:iOS应用启动流程分析之dyld过程初探
上一篇我们介绍了关于应用启动的一些整体过程的讲解,还有关于dyld的一些简单介绍,这一篇文章我们就用一个探路者的视角去分析dyld的源码和流程吧!
一、dyld分析前的准备
1、源码
地址:opensource.apple.com/tarballs/dy…
2、堆栈信息
(lldb) bt
* thread #1, queue = 'com.apple.main-thread', stop reason = breakpoint 1.1
* frame #0: 0x000000010ae80ecc APP启动流程分析`+[ViewController load](self=ViewController, _cmd="load") at ViewController.m:18:1
frame #1: 0x00007fff20181ff2 libobjc.A.dylib`load_images + 1439
frame #2: 0x000000010ae94e2c dyld_sim`dyld::notifySingle(dyld_image_states, ImageLoader const*, ImageLoader::InitializerTimingList*) + 425
frame #3: 0x000000010aea3ba5 dyld_sim`ImageLoader::recursiveInitialization(ImageLoader::LinkContext const&, unsigned int, char const*, ImageLoader::InitializerTimingList&, ImageLoader::UninitedUpwards&) + 437
frame #4: 0x000000010aea1ec7 dyld_sim`ImageLoader::processInitializers(ImageLoader::LinkContext const&, unsigned int, ImageLoader::InitializerTimingList&, ImageLoader::UninitedUpwards&) + 191
frame #5: 0x000000010aea1f68 dyld_sim`ImageLoader::runInitializers(ImageLoader::LinkContext const&, ImageLoader::InitializerTimingList&) + 82
frame #6: 0x000000010ae9526b dyld_sim`dyld::initializeMainExecutable() + 199
frame #7: 0x000000010ae99f56 dyld_sim`dyld::_main(macho_header const*, unsigned long, int, char const**, char const**, char const**, unsigned long*) + 4789
frame #8: 0x000000010ae941c2 dyld_sim`start_sim + 122
frame #9: 0x000000010bc97a88 dyld`dyld::useSimulatorDyld(int, macho_header const*, char const*, int, char const**, char const**, char const**, unsigned long*, unsigned long*) + 2093
frame #10: 0x000000010bc95162 dyld`dyld::_main(macho_header const*, unsigned long, int, char const**, char const**, char const**, unsigned long*) + 1198
frame #11: 0x000000010bc8f224 dyld`dyldbootstrap::start(dyld3::MachOLoaded const*, int, char const**, dyld3::MachOLoaded const*, unsigned long*) + 450
frame #12: 0x000000010bc8f025 dyld`_dyld_start + 37
复制代码
3、MachO分析工具
工具( 三者都可,推荐 MachOView ): Hopper Disassembler、MachOView、IDA64
4、测试工程和MachO文件
下面分别是测试工程和经过MachOView分析的测试工程可执行文件结构信息。
二、进入源码分析流程
1、dyld入口函数 _dyld_start
- 搜索源码中的_dyld_start函数,我们进入到arm64环境下的汇编,跟踪流程。拿到下一个函数 dyldbootstrap::start
- 然后根据dyldbootstrap找到命名空间和start函数,从源码上分析,是dyld的准备过程。
其实以上都是为dyld的加载做准备工作的,在环境和其他的条件都准备完毕后,就开始进入dyld的main中开始dyld的链接过程。
2、uintptr_t _main 分析
-
2.1 代码跟踪到dyld.cpp的main函数中
查看和检测设置dyld上下文需要传入的参数,包括可执行文件、参数、系统运行环境,平台架构等。我们看到程序日志打印:dyld: launch started!到这里,dyld才算正式开始!
继续查看内部实现,可以跟踪到以下代码:
-
2.2 setContext的内部实现逻辑,设置上下文内容
static void setContext(const macho_header* mainExecutableMH, int argc, const char* argv[], const char* envp[], const char* apple[])
{
#pragma mark - 加载库文件
gLinkContext.loadLibrary = &libraryLocator;
gLinkContext.terminationRecorder = &terminationRecorder;
gLinkContext.flatExportFinder = &flatFindExportedSymbol;
gLinkContext.coalescedExportFinder = &findCoalescedExportedSymbol;
gLinkContext.getCoalescedImages = &getCoalescedImages;
gLinkContext.undefinedHandler = &undefinedHandler;
#pragma mark - 获取所有映射地址
gLinkContext.getAllMappedRegions = &getMappedRegions;
gLinkContext.bindingHandler = NULL;
gLinkContext.notifySingle = ¬ifySingle;
gLinkContext.notifyBatch = ¬ifyBatch;
gLinkContext.removeImage = &removeImage;
gLinkContext.registerDOFs = dyld3::Loader::dtraceUserProbesEnabled() ? ®isterDOFs : NULL;
gLinkContext.clearAllDepths = &clearAllDepths;
gLinkContext.printAllDepths = &printAllDepths;
gLinkContext.imageCount = &imageCount;
gLinkContext.setNewProgramVars = &setNewProgramVars;
#pragma mark - 链接共享缓存,说明已经装载镜像完毕
gLinkContext.inSharedCache = &inSharedCache;
gLinkContext.setErrorStrings = &setErrorStrings;
#if SUPPORT_OLD_CRT_INITIALIZATION
gLinkContext.setRunInitialzersOldWay= &setRunInitialzersOldWay;
#endif
gLinkContext.findImageContainingAddress = &findImageContainingAddress;
gLinkContext.addDynamicReference = &addDynamicReference;
#if SUPPORT_ACCELERATE_TABLES
gLinkContext.notifySingleFromCache = ¬ifySingleFromCache;
gLinkContext.getPreInitNotifyHandler= &getPreInitNotifyHandler;
gLinkContext.getBoundBatchHandler = &getBoundBatchHandler;
#endif
gLinkContext.bindingOptions = ImageLoader::kBindingNone;
#pragma mark - 绑定基本信息
gLinkContext.argc = argc;
gLinkContext.argv = argv;
gLinkContext.envp = envp;
gLinkContext.apple = apple;
gLinkContext.progname = (argv[0] != NULL) ? basename(argv[0]) : "";
gLinkContext.programVars.mh = mainExecutableMH;
gLinkContext.programVars.NXArgcPtr = &gLinkContext.argc;
gLinkContext.programVars.NXArgvPtr = &gLinkContext.argv;
gLinkContext.programVars.environPtr = &gLinkContext.envp;
gLinkContext.programVars.__prognamePtr=&gLinkContext.progname;
gLinkContext.mainExecutable = NULL;
gLinkContext.imageSuffix = NULL;
gLinkContext.dynamicInterposeArray = NULL;
gLinkContext.dynamicInterposeCount = 0;
gLinkContext.prebindUsage = ImageLoader::kUseAllPrebinding;
gLinkContext.sharedRegionMode = ImageLoader::kUseSharedRegion;
}
复制代码
-
2.3 加载共享缓存
这里略过了一些步骤,在设置完上下文之后,系统进行了一些运行库路径的加载、dyld相关方法的检测、日志打印、缓存开启状态等操作。
-
2.4 mapSharedCache
static void mapSharedCache(uintptr_t mainExecutableSlide)
{
dyld3::SharedCacheOptions opts;
opts.cacheDirOverride = sSharedCacheOverrideDir;
opts.forcePrivate = (gLinkContext.sharedRegionMode == ImageLoader::kUsePrivateSharedRegion);
#if __x86_64__ && !TARGET_OS_SIMULATOR
opts.useHaswell = sHaswell;
#else
opts.useHaswell = false;
#endif
opts.verbose = gLinkContext.verboseMapping;
// <rdar://problem/32031197> respect -disable_aslr boot-arg
// <rdar://problem/56299169> kern.bootargs is now blocked
opts.disableASLR = (mainExecutableSlide == 0) && dyld3::internalInstall(); // infer ASLR is off if main executable is not slid
loadDyldCache(opts, &sSharedCacheLoadInfo);
// update global state
if ( sSharedCacheLoadInfo.loadAddress != nullptr ) {
gLinkContext.dyldCache = sSharedCacheLoadInfo.loadAddress;
dyld::gProcessInfo->processDetachedFromSharedRegion = opts.forcePrivate;
dyld::gProcessInfo->sharedCacheSlide = sSharedCacheLoadInfo.slide;
dyld::gProcessInfo->sharedCacheBaseAddress = (unsigned long)sSharedCacheLoadInfo.loadAddress;
sSharedCacheLoadInfo.loadAddress->getUUID(dyld::gProcessInfo->sharedCacheUUID);
dyld3::kdebug_trace_dyld_image(DBG_DYLD_UUID_SHARED_CACHE_A, sSharedCacheLoadInfo.path, (const uuid_t *)&dyld::gProcessInfo->sharedCacheUUID[0], {0,0}, {{ 0, 0 }}, (const mach_header *)sSharedCacheLoadInfo.loadAddress);
}
}
bool loadDyldCache(const SharedCacheOptions& options, SharedCacheLoadInfo* results)
{
results->loadAddress = 0;
results->slide = 0;
results->errorMessage = nullptr;
#if TARGET_OS_SIMULATOR
// simulator only supports mmap()ing cache privately into process
return mapCachePrivate(options, results);
#else
if ( options.forcePrivate ) {
// mmap cache into this process only
return mapCachePrivate(options, results);
}
else {
// fast path: when cache is already mapped into shared region
bool hasError = false;
if ( reuseExistingCache(options, results) ) {
hasError = (results->errorMessage != nullptr);
} else {
// slow path: this is first process to load cache
hasError = mapCacheSystemWide(options, results);
}
return hasError;
}
#endif
}
复制代码
这里通过dyld3去进行了一个动态加载缓存映射的过程,像系统常用的一些库,如UIKit、Foundation等所有iOS应用程序都会用到的库,可以通过加载共享缓存的方式,减少ipa包体的大小,节省系统的内存开销,减少资源冗余问题。
3、dyld主函数调用过程分析
以上是获取MachO可执行文件信息,并通过dyld链接器存储代码签名信息。
-
3.1 加载可执行文件信息
#pragma mark - 加载可执行文件信息
static ImageLoaderMachO* instantiateFromLoadedImage(const macho_header* mh, uintptr_t slide, const char* path)
{
// try mach-o loader
// if ( isCompatibleMachO((const uint8_t*)mh, path) ) {
ImageLoader* image = ImageLoaderMachO::instantiateMainExecutable(mh, slide, path, gLinkContext);
addImage(image);
return (ImageLoaderMachO*)image;
// }
// throw "main executable not a known format";
}
#pragma mark - 为可执行文件,创建镜像
// create image for main executable
ImageLoader* ImageLoaderMachO::instantiateMainExecutable(const macho_header* mh, uintptr_t slide, const char* path, const LinkContext& context)
{
//dyld::log("ImageLoader=%ld, ImageLoaderMachO=%ld, ImageLoaderMachOClassic=%ld, ImageLoaderMachOCompressed=%ld\n",
// sizeof(ImageLoader), sizeof(ImageLoaderMachO), sizeof(ImageLoaderMachOClassic), sizeof(ImageLoaderMachOCompressed));
bool compressed;
unsigned int segCount;
unsigned int libCount;
const linkedit_data_command* codeSigCmd;
const encryption_info_command* encryptCmd;
sniffLoadCommands(mh, path, false, &compressed, &segCount, &libCount, context, &codeSigCmd, &encryptCmd);
// instantiate concrete class based on content of load commands
if ( compressed )
return ImageLoaderMachOCompressed::instantiateMainExecutable(mh, slide, path, segCount, libCount, context);
else
#if SUPPORT_CLASSIC_MACHO
return ImageLoaderMachOClassic::instantiateMainExecutable(mh, slide, path, segCount, libCount, context);
#else
throw "missing LC_DYLD_INFO load command";
#endif
}
复制代码
以上操作是通过ImageLoader加载MachO可执行文件,获取Header文件
static bool hasCodeSignatureLoadCommand(const macho_header* mh)
{
const uint32_t cmd_count = mh->ncmds;
const struct load_command* const cmds = (struct load_command*)(((char*)mh)+sizeof(macho_header));
const struct load_command* cmd = cmds;
for (uint32_t i = 0; i < cmd_count; ++i) {
if (cmd->cmd == LC_CODE_SIGNATURE)
return true;
cmd = (const struct load_command*)(((char*)cmd)+cmd->cmdsize);
}
return false;
}
复制代码
以上操作是获取MachO文件的LoadCommand内容,获取代码签名信息。
-
3.2 加载images、runtime、cache信息到dyld_all_image_info
-
3.3 加载插入动态库
-
3.4 插入动态库的方法 loadInsertedDylib
static void loadInsertedDylib(const char* path)
{
unsigned cacheIndex;
try {
LoadContext context;
context.useSearchPaths = false;
context.useFallbackPaths = false;
context.useLdLibraryPath = false;
context.implicitRPath = false;
context.matchByInstallName = false;
context.dontLoad = false;
context.mustBeBundle = false;
context.mustBeDylib = true;
context.canBePIE = false;
context.origin = NULL; // can't use @loader_path with DYLD_INSERT_LIBRARIES
context.rpath = NULL;
#pragma mark - 如果存在,就加载动态库到指定位置,load过程省略...
load(path, context, cacheIndex);
}
catch (const char* msg) {
if ( gLinkContext.allowInsertFailures )
dyld::log("dyld: warning: could not load inserted library '%s' into hardened process because %s\n", path, msg);
else
halt(dyld::mkstringf("could not load inserted library '%s' because %s\n", path, msg));
}
catch (...) {
halt(dyld::mkstringf("could not load inserted library '%s'\n", path));
}
}
复制代码
-
3.5 链接主程序 link main executable
link(sMainExecutable, ……)表示主程序链接过程,这里就进入主程序链接过程。
-
3.6 主程序链接后,进行动态库插入(符号修正操作)
4、调用所有初始化器,进行主程序的初始化
-
4.1 主函数执行的代码逻辑跟踪
#pragma mark - initializeMainExecutable 主函数
void initializeMainExecutable()
{
// record that we've reached this step
gLinkContext.startedInitializingMainExecutable = true;
#pragma mark - 运行所有插入动态库的dyld库的初始化器
// run initialzers for any inserted dylibs
ImageLoader::InitializerTimingList initializerTimes[allImagesCount()];
initializerTimes[0].count = 0;
const size_t rootCount = sImageRoots.size();
if ( rootCount > 1 ) {
for(size_t i=1; i < rootCount; ++i) {
sImageRoots[i]->runInitializers(gLinkContext, initializerTimes[0]);
}
}
#pragma mark - 运行主函数的初始化器并取出所有信息
// run initializers for main executable and everything it brings up
sMainExecutable->runInitializers(gLinkContext, initializerTimes[0]);
// register cxa_atexit() handler to run static terminators in all loaded images when this process exits
if ( gLibSystemHelpers != NULL )
(*gLibSystemHelpers->cxa_atexit)(&runAllStaticTerminators, NULL, NULL);
// dump info if requested
if ( sEnv.DYLD_PRINT_STATISTICS )
ImageLoader::printStatistics((unsigned int)allImagesCount(), initializerTimes[0]);
if ( sEnv.DYLD_PRINT_STATISTICS_DETAILS )
ImageLoaderMachO::printStatisticsDetails((unsigned int)allImagesCount(), initializerTimes[0]);
}
复制代码
-
4.2 子函数 runInitializers
#pragma mark - 子函数 runInitializers
void ImageLoader::runInitializers(const LinkContext& context, InitializerTimingList& timingInfo)
{
uint64_t t1 = mach_absolute_time();
mach_port_t thisThread = mach_thread_self();
ImageLoader::UninitedUpwards up;
up.count = 1;
up.imagesAndPaths[0] = { this, this->getPath() };
processInitializers(context, thisThread, timingInfo, up);
context.notifyBatch(dyld_image_state_initialized, false);
mach_port_deallocate(mach_task_self(), thisThread);
uint64_t t2 = mach_absolute_time();
fgTotalInitTime += (t2 - t1);
}
复制代码
运行初始化器,获取镜像和镜像路径,执行初始化进行操作。
-
4.3 子函数 processInitializers
#pragma mark - 子函数 processInitializers
void ImageLoader::processInitializers(const LinkContext& context, mach_port_t thisThread,
InitializerTimingList& timingInfo, ImageLoader::UninitedUpwards& images)
{
uint32_t maxImageCount = context.imageCount()+2;
ImageLoader::UninitedUpwards upsBuffer[maxImageCount];
ImageLoader::UninitedUpwards& ups = upsBuffer[0];
ups.count = 0;
// Calling recursive init on all images in images list, building a new list of
// uninitialized upward dependencies.
for (uintptr_t i=0; i < images.count; ++i) {
images.imagesAndPaths[i].first->recursiveInitialization(context, thisThread, images.imagesAndPaths[i].second, timingInfo, ups);
}
// If any upward dependencies remain, init them.
if ( ups.count > 0 )
processInitializers(context, thisThread, timingInfo, ups);
}
复制代码
获取上下文内容中的镜像与库的信息,检测他们的向上层的依赖关系,如果存在依赖则继续递归,否则进行拿到这些信息进行初始化过程。
-
4.4 子函数 recursiveInitialization (递归初始化器)
#pragma mark - 子函数 recursiveInitialization
void ImageLoader::recursiveInitialization(const LinkContext& context, mach_port_t this_thread, const char* pathToInitialize,
InitializerTimingList& timingInfo, UninitedUpwards& uninitUps)
{
recursive_lock lock_info(this_thread);
recursiveSpinLock(lock_info);
if ( fState < dyld_image_state_dependents_initialized-1 ) {
uint8_t oldState = fState;
// break cycles
fState = dyld_image_state_dependents_initialized-1;
try {
// initialize lower level libraries first
for(unsigned int i=0; i < libraryCount(); ++i) {
ImageLoader* dependentImage = libImage(i);
if ( dependentImage != NULL ) {
// don't try to initialize stuff "above" me yet
if ( libIsUpward(i) ) {
uninitUps.imagesAndPaths[uninitUps.count] = { dependentImage, libPath(i) };
uninitUps.count++;
}
else if ( dependentImage->fDepth >= fDepth ) {
dependentImage->recursiveInitialization(context, this_thread, libPath(i), timingInfo, uninitUps);
}
}
}
// record termination order
if ( this->needsTermination() )
context.terminationRecorder(this);
// let objc know we are about to initialize this image
uint64_t t1 = mach_absolute_time();
fState = dyld_image_state_dependents_initialized;
oldState = fState;
context.notifySingle(dyld_image_state_dependents_initialized, this, &timingInfo);
// initialize this image
bool hasInitializers = this->doInitialization(context);
// let anyone know we finished initializing this image
fState = dyld_image_state_initialized;
oldState = fState;
context.notifySingle(dyld_image_state_initialized, this, NULL);
if ( hasInitializers ) {
uint64_t t2 = mach_absolute_time();
timingInfo.addTime(this->getShortName(), t2-t1);
}
}
catch (const char* msg) {
// this image is not initialized
fState = oldState;
recursiveSpinUnLock();
throw;
}
}
recursiveSpinUnLock();
}
复制代码
利用递归查找加载到dyld的镜像,进行初始化操作,直到完成所有镜像文件的初始化,未完成初始化的镜像和库文件抛出异常。
-
4.5 子函数 doInitialization
#pragma mark - 子函数 doInitialization
bool ImageLoaderMachO::doInitialization(const LinkContext& context)
{
CRSetCrashLogMessage2(this->getPath());
// mach-o has -init and static initializers
doImageInit(context);
doModInitFunctions(context);
CRSetCrashLogMessage2(NULL);
return (fHasDashInit || fHasInitializers);
}
复制代码
操作是拿到链接的内容,进行初始化操作,包含操作:日志打印、MachO初始化后的镜像和函数初始化,最后返回一个初始化和绑定状态。
主函数调用过程主要描述了通过加载dyld中已经缓存的关于images镜像信息,通过不断的递归查找,进行初始化的整个过程。
-
# 4.6 初始化主程序总结:
- initializeMainExecutable:初始化主程序,内部会调用其他跟镜像rootImage的初始化
- runInitializers: 进一步调用processInitializers
- processInitializers:递归调用镜像的初始化
- recursiveInitialization: 这里会检测一些依赖,并将它初始化,接着开始doInitialization
- doInitialization:开始初始化动作,这会进行各个模块的初始化操作_objc_init也是在此进行的
说到doInitialization中的_objc_init,我们可以跟踪下相关流程:
void _objc_init(void)
{
static bool initialized = false;
if (initialized) return;
initialized = true;
// fixme defer initialization until an objc-using image is found?
environ_init();
tls_init();
static_init();
runtime_init();
exception_init();
cache_init();
_imp_implementationWithBlock_init();
_dyld_objc_notify_register(&map_images, load_images, unmap_image);
#if __OBJC2__
didCallDyldNotifyRegister = true;
#endif
}
复制代码
这里又回到了objc中调用init初始化方法,形成了一个完整的闭环。
5、main函数入口
我们看到入口函数的值从getEntryFromLC_MAIN 、 main executable得到,继续跟踪dyld的源码入口函数:
-
5.1 重新回到dyld汇编源码
#if __arm64__ && !TARGET_OS_SIMULATOR
.text
.align 2
.globl __dyld_start
__dyld_start:
mov x28, sp
and sp, x28, #~15 // force 16-byte alignment of stack
mov x0, #0
mov x1, #0
stp x1, x0, [sp, #-16]! // make aligned terminating frame
mov fp, sp // set up fp to point to terminating frame
sub sp, sp, #16 // make room for local variables
#if __LP64__
ldr x0, [x28] // get app's mh into x0
ldr x1, [x28, #8] // get argc into x1 (kernel passes 32-bit int argc as 64-bits on stack to keep alignment)
add x2, x28, #16 // get argv into x2
#else
ldr w0, [x28] // get app's mh into x0
ldr w1, [x28, #4] // get argc into x1 (kernel passes 32-bit int argc as 64-bits on stack to keep alignment)
add w2, w28, #8 // get argv into x2
#endif
adrp x3,___dso_handle@page
add x3,x3,___dso_handle@pageoff // get dyld's mh in to x4
mov x4,sp // x5 has &startGlue
// call dyldbootstrap::start(app_mh, argc, argv, dyld_mh, &startGlue)
bl __ZN13dyldbootstrap5startEPKN5dyld311MachOLoadedEiPPKcS3_Pm
mov x16,x0 // save entry point address in x16
#if __LP64__
ldr x1, [sp]
#else
ldr w1, [sp]
#endif
cmp x1, #0
b.ne Lnew
// LC_UNIXTHREAD way, clean up stack and jump to result
#if __LP64__
add sp, x28, #8 // restore unaligned stack pointer without app mh
#else
add sp, x28, #4 // restore unaligned stack pointer without app mh
#endif
#pragma mark - ************************************
#pragma mark - *********** 程序入口 ************
#pragma mark - ************************************
#if __arm64e__
braaz x16 // jump to the program's entry point
#else
br x16 // jump to the program's entry point
#endif
复制代码
如上面所示:我们着重来看dyld的源程序arm64结构下的执行,程序执行到这里,形成了一个完整的闭环,我们在x16可以找到程序的入口,那么main()函数在哪里呢?下面接着看dyld的汇编:
-
5.2 LC_MAIN函数的执行过程
#pragma mark - **********************************************
#pragma mark - ***** LC_MAIN调起main()函数的入栈操作 ********
#pragma mark - **********************************************
// LC_MAIN case, set up stack for call to main()
Lnew: mov lr, x1 // simulate return address into _start in libdyld.dylib
#if __LP64__
ldr x0, [x28, #8] // main param1 = argc
add x1, x28, #16 // main param2 = argv
add x2, x1, x0, lsl #3
add x2, x2, #8 // main param3 = &env[0]
mov x3, x2
Lapple: ldr x4, [x3]
add x3, x3, #8
#else
ldr w0, [x28, #4] // main param1 = argc
add x1, x28, #8 // main param2 = argv
add x2, x1, x0, lsl #2
add x2, x2, #4 // main param3 = &env[0]
mov x3, x2
Lapple: ldr w4, [x3]
add x3, x3, #4
#endif
cmp x4, #0
b.ne Lapple // main param4 = apple
#if __arm64e__
braaz x16
#else
br x16
#endif
复制代码
至此,我们已经完成了dyld从程序加载到进入main函数的整个流程的分析,下面再整理一下整体的思路和逻辑。
三、总结
由于时间和精力关系,这里只整理了关于dyld从程序加载到进入main函数的简要分析过程,更详细的内部执行逻辑,以思维导图的形式放在最后!
iOS应用启动流程和dyld的链接过程是进行程序运行和后期程序优化的开端,一定把握住其中的流程和一些关键的点,方能够在以后的开发中,做出体验出众的程序!知识需要精益求精,知其然知其所以然,方能有所成长!
?更多内容期待与你一起分享,喜欢的话,点个赞点个关注,持续为您创造好的内容。