? 这是我参与更文挑战的第1天，活动详情查看：更文挑战

系列文章：
iOS底层探险① OC对象初始化流程
 iOS底层探险② 从LLVM源码分析为什么alloc、retain、isKindOfClass等AWZ、RR、CORE系列方法没有走自身的IMP
iOS底层探险③ iOS malloc内存分配底层原理
…

⭐ 表示「核心逻辑内容」

? 表示「分支知识扩展」

 表示「苹果官方文档或者源码」

本文用到的资源：
 objc4 源码
 libmalloc 源码

malloc内存分配底层原理

源码分析

在 objc4源码里可以看到 alloc 、 alloc init 、 new 最终调用在执行 _class_createInstanceFromZone 函数，这个函数向系统申请内存的功能调用的是 obj = (id)calloc(1, size); 而 calloc 函数的实现是在 libmalloc源码里

WX20210614-183026@2x.png

calloc 函数具体做了什么呢，先看 calloc 函数源码

void *
calloc(size_t num_items, size_t size)
{
   return _malloc_zone_calloc(default_zone, num_items, size, MZ_POSIX);
}
复制代码

对应 _malloc_zone_calloc 函数源码：

MALLOC_NOINLINE
static void *
_malloc_zone_calloc(malloc_zone_t *zone, size_t num_items, size_t size,
		malloc_zone_options_t mzo)
{
	MALLOC_TRACE(TRACE_calloc | DBG_FUNC_START, (uintptr_t)zone, num_items, size, 0);

	void *ptr;
	if (malloc_check_start) {
		internal_check();
	}
	ptr = zone->calloc(zone, num_items, size);

        ... 省略非必要代码
        
	return ptr;
}
复制代码

⭐ 核心代码： ptr = zone->calloc(zone, num_items, size); 怎么又是一个 calloc ？？这不是死循环了么、、、(O_O)? 其实不是，这里的 calloc 是 zone 这个结构体里的函数,而最开始的 calloc 是原始的 c 函数，是两个函数实现，接下来查看 zone 的逻辑，由于上一步传过来的是全局变量 default_zone :

static malloc_zone_t *default_zone = &virtual_default_zone.malloc_zone;
复制代码

可以看到 default_zone 是 virtual_default_zone.malloc_zone 的一个引用,是个 malloc_zone_t 类型 virtual_default_zone 的源码：

typedef struct {
	malloc_zone_t malloc_zone;
	uint8_t pad[PAGE_MAX_SIZE - sizeof(malloc_zone_t)];
} virtual_default_zone_t;

static virtual_default_zone_t virtual_default_zone
__attribute__((section("__DATA,__v_zone")))
__attribute__((aligned(PAGE_MAX_SIZE))) = {
	NULL,
	NULL,
	default_zone_size,
	default_zone_malloc,
	default_zone_calloc,
	default_zone_valloc,
	default_zone_free,
	default_zone_realloc,
	default_zone_destroy,

        ... 省略非必要代码
};
复制代码

malloc_zone_t 结构体的定义源码如下：

typedef struct _malloc_zone_t {
    /* Only zone implementors should depend on the layout of this structure;
    Regular callers should use the access functions below */
    void	*reserved1;	/* RESERVED FOR CFAllocator DO NOT USE */
    void	*reserved2;	/* RESERVED FOR CFAllocator DO NOT USE */
    size_t 	(* MALLOC_ZONE_FN_PTR(size))(struct _malloc_zone_t *zone, const void *ptr); /* returns the size of a block or 0 if not in this zone; must be fast, especially for negative answers */
    void 	*(* MALLOC_ZONE_FN_PTR(malloc))(struct _malloc_zone_t *zone, size_t size);
    void 	*(* MALLOC_ZONE_FN_PTR(calloc))(struct _malloc_zone_t *zone, size_t num_items, size_t size); /* same as malloc, but block returned is set to zero */
    void 	*(* MALLOC_ZONE_FN_PTR(valloc))(struct _malloc_zone_t *zone, size_t size); /* same as malloc, but block returned is set to zero and is guaranteed to be page aligned */
    void 	(* MALLOC_ZONE_FN_PTR(free))(struct _malloc_zone_t *zone, void *ptr);
    void 	*(* MALLOC_ZONE_FN_PTR(realloc))(struct _malloc_zone_t *zone, void *ptr, size_t size);
    void 	(* MALLOC_ZONE_FN_PTR(destroy))(struct _malloc_zone_t *zone); /* zone is destroyed and all memory reclaimed */
    const char	*zone_name;
    
    ... 省略非必要代码

boolean_t (* MALLOC_ZONE_FN_PTR(claimed_address))(struct _malloc_zone_t *zone, void *ptr);
} malloc_zone_t;
复制代码

继续之前的代码，打开反汇编调试可以看到接下来走了 default_zone_calloc 函数

WX20210614-193158@2x.png

不通过汇编，通过断点调试也可以查看，断好之后 p zone->calloc 也可以看到

WX20210614-193503@2x.png

接下来查看 default_zone_calloc 函数实现：

static void *
default_zone_calloc(malloc_zone_t *zone, size_t num_items, size_t size)
{
	zone = runtime_default_zone();
	return zone->calloc(zone, num_items, size);
}
复制代码

⭐ 可以看到 zone = runtime_default_zone(); 形参 zone 被换成了 runtime_default_zone()

同上汇编查看接下来调用的是 nano_zone 的 nano_calloc函数其源码如下：

static void *
nano_calloc(nanozone_t *nanozone, size_t num_items, size_t size)
{
	size_t total_bytes;

	if (calloc_get_size(num_items, size, 0, &total_bytes)) {
		return NULL;
	}

        ///核心逻辑------------------
	if (total_bytes <= NANO_MAX_SIZE) {
		void *p = _nano_malloc_check_clear(nanozone, total_bytes, 1);
		if (p) {
			return p;
		} else {
			/* FALLTHROUGH to helper zone */
		}
	}
	malloc_zone_t *zone = (malloc_zone_t *)(nanozone->helper_zone);
	return zone->calloc(zone, 1, total_bytes);
}
复制代码

⭐ 函数核心逻辑是 如果申请内存的大小小于 NANO_MAX_SIZE 就调用 void *p = _nano_malloc_check_clear(nanozone, total_bytes, 1); 初始化 p 返回，超过 NANO_MAX_SIZE 就是用 helper_zone 来创建大对象（用的是 scalable_zone ）

#define NANO_MAX_SIZE   256 /* Buckets sized {16, 32, 48, ..., 256} */
复制代码

可以看到 malloc 底层小对象、大对象内存分配的临界值升级 256字节 ，接下来查看 _nano_malloc_check_clear ：

static void *
_nano_malloc_check_clear(nanozone_t *nanozone, size_t size, boolean_t cleared_requested)
{
	MALLOC_TRACE(TRACE_nano_malloc, (uintptr_t)nanozone, size, cleared_requested, 0);

	void *ptr;
	size_t slot_key;
        /// ------------ slot_bytes ----
	size_t slot_bytes = segregated_size_to_fit(nanozone, size, &slot_key); // Note slot_key is set here
	mag_index_t mag_index = nano_mag_index(nanozone);

	nano_meta_admin_t pMeta = &(nanozone->meta_data[mag_index][slot_key]);

	ptr = OSAtomicDequeue(&(pMeta->slot_LIFO), offsetof(struct chained_block_s, next));
	if (ptr)
                ... 省略非必要代码
	} else {
                /// ------------ 核心逻辑 ----
		ptr = segregated_next_block(nanozone, pMeta, slot_bytes, mag_index);
	}

	if (cleared_requested && ptr) {
		memset(ptr, 0, slot_bytes); // TODO: Needs a memory barrier after memset to ensure zeroes land first?
	}
	return ptr;
}
复制代码

核心逻辑执行 segregated_next_block 函数，而传入的内存大小参数是 slot_bytes ，接下来查看 slot_bytes 的初始化方法：

static MALLOC_INLINE size_t
segregated_size_to_fit(nanozone_t *nanozone, size_t size, size_t *pKey)
{
	size_t k, slot_bytes;

	if (0 == size) {
		size = NANO_REGIME_QUANTA_SIZE; // Historical behavior
	}
	k = (size + NANO_REGIME_QUANTA_SIZE - 1) >> SHIFT_NANO_QUANTUM; // round up and shift for number of quanta
	slot_bytes = k << SHIFT_NANO_QUANTUM;							// multiply by power of two quanta size
	*pKey = k - 1;													// Zero-based!

	return slot_bytes;
}

#define NANO_REGIME_QUANTA_SIZE	(1 << SHIFT_NANO_QUANTUM)	// 16
#define SHIFT_NANO_QUANTUM	 4
复制代码

⭐ 可以看到核心逻辑是按照 NANO_REGIME_QUANTA_SIZE 16对齐，算法是原始Size+15 进位后尾部四位抹零举?：

WX20210614-202104@2x.png

接下来查看 segregated_next_block ：

static MALLOC_INLINE void *
segregated_next_block(nanozone_t *nanozone, nano_meta_admin_t pMeta, size_t slot_bytes, unsigned int mag_index)
{
	while (1) {
		uintptr_t theLimit = pMeta->slot_limit_addr; // Capture the slot limit that bounds slot_bump_addr right now
                ///-------- 核心逻辑 --------------
		uintptr_t b = OSAtomicAdd64Barrier(slot_bytes, (volatile int64_t *)&(pMeta->slot_bump_addr));
		b -= slot_bytes; // Atomic op returned addr of *next* free block. Subtract to get addr for *this* allocation.

		if (b < theLimit) {   // Did we stay within the bound of the present slot allocation?
			return (void *)b; // Yep, so the slot_bump_addr this thread incremented is good to go
		} else {
			if (pMeta->slot_exhausted) { // exhausted all the bands availble for this slot?
				pMeta->slot_bump_addr = theLimit;
				return 0;				 // We're toast
			} else {
				// One thread will grow the heap, others will see its been grown and retry allocation
				_malloc_lock_lock(&nanozone->band_resupply_lock[mag_index]);
				// re-check state now that we've taken the lock
				if (pMeta->slot_exhausted) {
					_malloc_lock_unlock(&nanozone->band_resupply_lock[mag_index]);
					return 0; // Toast
				} else if (b < pMeta->slot_limit_addr) {
					_malloc_lock_unlock(&nanozone->band_resupply_lock[mag_index]);
					continue; // ... the slot was successfully grown by first-taker (not us). Now try again.
				} else if (segregated_band_grow(nanozone, pMeta, slot_bytes, mag_index)) {
					_malloc_lock_unlock(&nanozone->band_resupply_lock[mag_index]);
					continue; // ... the slot has been successfully grown by us. Now try again.
				} else {
					pMeta->slot_exhausted = TRUE;
					pMeta->slot_bump_addr = theLimit;
					_malloc_lock_unlock(&nanozone->band_resupply_lock[mag_index]);
					return 0;
				}
			}
		}
	}
}
复制代码

⭐ 最终得到内存的方法是调用系统内核 OSAtomicAdd64Barrier 原子的为 pMeta->slot_bump_addr 添加 slot_bytes(16字节对齐）的长度，偏移到 下一个地址 然后减去 slot_bytes 得到 申请好的内存地址的开始地址 返回。

流程图

上述源码分析,执行顺序简单总结一下： calloc（1，size) -> _malloc_zone_calloc -> ptr = zone->calloc(zone, num_items, size); -> nano_calloc -> _nano_malloc_check_clear -> segregated_next_block -> OSAtomicAdd64Barrier

流程图如下：

未命名文件.png

补充一结构体内存大小规则

分析CPU是64位架构

规则 1、成员开始位置

成员开始位置，必须是自身大小或最大成员大小的整数倍
- 基础数据类型用自己身的类型大小（如：Int 4字节）的整数倍
- 复合类型（数组、结构体等）用自身最大的成员变量大小的整数倍

规则 2、结构内存对齐

在上述规则累加后补充：结构体的最终大小必须是最大成员大小的整数倍

比如计算大小后，算出来是12字节，最大成员8字节，最终是16字节

规则 3、最后 malloc 内存对齐

最终结构体大小会根据操作系统做内存对齐，iOS 64位CPU是16字节对齐，在上述 segregated_size_to_fit 已经做详细说明，不再赘述

举?

struct Struct_1 {
    double a;       // 8 bit
    char b;         // 1 bit
    double c;       // 8 bit
    short d;        // 2 bit
};

struct Struct_2 {
    double a;       // 8 bit
    char b;         // 1 bit
    short d;        // 2 bit
    double c;       // 8 bit
};

复制代码

分析 Struct_1
步骤1： a 8字节 ———— 当前大小8字节
步骤2： b 1字节 —— 满足1的整数倍 — 当前大小9字节
步骤3： c 8字节 —— 不满足8的整数倍，向后调整到 16开始存储8字节 — 当前大小24字节
步骤4： d 2字节 —— 满足2的整数倍 — 当前大小26字节
步骤5：结构体内存对齐，最大成员是 8字节 26字节对齐后当前大小32字节
步骤6： malloc内存对齐，最后是32字节是内存16字节对齐的整数倍： 最终32字节

分析 Struct_2
步骤1： a 8字节 ———— 当前大小8字节
步骤2： b 1字节 —— 满足1的整数倍 — 当前大小9字节
步骤3： d 2字节 —— 不满足2的整数倍 — 10字节开始+2字节， 当前大小12字节
步骤4： c 8字节 —— 不满足8的整数倍，向后调整到 16开始存储8字节 — 当前大小24字节
步骤5：结构体内存对齐，最大成员是 8字节，24字节满足8的备注
步骤6： malloc内存对齐，32字节 是内存16字节对齐的整数倍
结果： 32字节

可以看到上述两个结构体，成员类型是一样的，只是位置调整sizeof()得出的结构体大小就不一样了，第一个 32字节，第二个 24字节，但是最终 malloc 内存对齐的话都是 32字节。

拓展 C++ 结构体虚表

? 上述结构体 Struct_1 结构体大小是 32字节 ，假如继承一个有个 虚函数 的结构体呢

struct Struct_0 {

    // virtual  pointer     //8 bit
    BOOL v1;                //1  bit

    virtual void foo(){
    }
}struct0;


struct Struct_1 : Struct_0 {

    // virtual  pointer     //8 bit
    double a;       // 8 bit
    char b;         // 1 bit
    double c;       // 8 bit
    short d;        // 2 bit

}struct1;
复制代码