前言
我们知道类是一个objc_object结构体,里面有成员变量isa(结构体指针8字节)、superclass(结构体指针8字节)、cache、bits(8字节长度结构体),ios 对象的本质与isa我们探索了isa以及类的关联关系,supperclasss是类的父类,ios 类的结构分析我们探索了bits以及bits里的方法、属性以及协议,那么cache里面的存储的是什么呢
?字面意思是缓存,那么缓存的是什么呢,为什么要缓存呢?抱着这样的疑问我们探索下。objc4-818源码地址
cache_t
struct cache_t {
private:
explicit_atomic<uintptr_t> _bucketsAndMaybeMask;//uintptr_t为unsigned long 8个字节
union {
struct {
explicit_atomic<mask_t> _maybeMask;//mask_t为uint32_t 4个字节
#if __LP64__
uint16_t _flags;//2个字节
#endif
uint16_t _occupied;//2个字节
};
explicit_atomic<preopt_cache_t *> _originalPreoptCache; //结构体指针 8个字节
};
//.....省略一些干扰分析的方法体
public:
unsigned capacity() const;
struct bucket_t *buckets() const;//猜测 buckets()存有方法
Class cls() const;
#if CONFIG_USE_PREOPT_CACHES
const preopt_cache_t *preopt_cache() const;
#endif
mask_t occupied() const;
void initializeToEmpty();
复制代码
分析:cache_t
是一个结构体
,有8字节
的变量_bucketsAndMaybeMask
,8字节
的联合体
(联合体是互斥的,共享内存,大小取决于最大元素),cache_t
结构体总大小16个字节
。通过lldb调试这两个变量没有探索到cache_t里缓存的是什么。但是却发现bucket_t这个东西在cache_t中出现的频率很高,猜测bucket_t
是缓存的关键,查看bucket_t源码。
struct bucket_t {
private:
// IMP-first is better for arm64e ptrauth and no worse for arm64.
// SEL-first is better for armv7* and i386 and x86_64.
#if __arm64__
explicit_atomic<uintptr_t> _imp;
explicit_atomic<SEL> _sel;
#else
explicit_atomic<SEL> _sel;
explicit_atomic<uintptr_t> _imp;
复制代码
分析:bucket_t存储着SEL和IMP
。很高兴找到了想看到的结果。原来方法的实现存储在bucket_t中,cache缓存着方法的实现。那么bucket_t和cache_t中两个成员变量又什么关系呢?口说无凭,验证一下。
lldb探索cache_t
首先定义个对象GyPerson,添加一个对象方法testfunction。在main()入口函数中 GyPerson *p1 = [GyPerson alloc]初始化p1对象,然后lldb调试。
(lldb) x/4gx p1
0x101305070: 0x011d800100008409 0x0000000000000000
0x101305080: 0x00007fff80dfdd78 0x000000010063de20
(lldb) p/x 0x011d800100008409 & 0x00007ffffffffff8ULL //获取isa关联类
(unsigned long long) $2 = 0x0000000100008408
(lldb) p/x 0x0000000100008408 + 0x10 //isa向下偏移16个字节获取catch_t
(long) $3 = 0x0000000100008418
(lldb) p/x (cache_t*)$3 //强制转换cache_t指针
(cache_t *) $4 = 0x0000000100008418
(lldb) p *$4 //获取cache_t值
(cache_t) $5 = {
_bucketsAndMaybeMask = {
std::__1::atomic<unsigned long> = {
Value = 4298515312
}
}
= {
= {
_maybeMask = {
std::__1::atomic<unsigned int> = {
Value = 0
}
}
_flags = 32784
_occupied = 0
}
_originalPreoptCache = {
std::__1::atomic<preopt_cache_t *> = {
Value = 0x0000801000000000
}
}
}
}
(lldb) p $5.buckets() //调用buckets()
(bucket_t *) $6 = 0x0000000100362370
(lldb) p [p1 testfunction] //lldb动态调用对象方法
(lldb) p *$4 //重新取cache_t值
(cache_t) $6 = {
_bucketsAndMaybeMask = {
std::__1::atomic<unsigned long> = {
Value = 4313982656
}
}
= {
= {
_maybeMask = {
std::__1::atomic<unsigned int> = {
Value = 7
}
}
_flags = 32784
_occupied = 1
}
_originalPreoptCache = {
std::__1::atomic<preopt_cache_t *> = {
Value = 0x0001801000000007
}
}
}
}
(lldb) p $6.buckets() //调用cache_t中buckets()方法
(bucket_t *) $7 = 0x00000001012226c0
(lldb) p $6.buckets()[1] //buckets()里面的数据是哈希表存储 通过内存偏移查找
(bucket_t) $8 = {
_sel = {
std::__1::atomic<objc_selector *> = (null) {
Value = (null)
}
}
_imp = {
std::__1::atomic<unsigned long> = {
Value = 0
}
}
}
(lldb) p $6.buckets()[2]
(bucket_t) $9 = {
_sel = {
std::__1::atomic<objc_selector *> = "" {
Value = ""
}
}
_imp = {
std::__1::atomic<unsigned long> = {
Value = 49080
}
}
}
(lldb) p $9.sel()
(SEL) $10 = "testfunction"
复制代码
分析:
isa
向下偏移16个字节
获取catch_t
- 取catch_t值,_maybeMask值为0,_occupied值为0,调用
catch_t中函数buckets()
,但是并没有想看到的SEL和IMP。 - lldb动态调用对象方法testfunction,再次
取值catch_t
,_maybeMask值为7,_occupied值为0,再次调用catch_t中函数buckets(),注意buckets()函数,猜想获取到应该是一个数组或列表,试着用内存平移获取buckets()
,果然向下平移两个后,通过调用buckets
的sel()
函数获取到了SEL testfunction - 疑问:catch_t中_maybeMask和_occupied与buckets()有什么联系?buckets()是什么数据结构,SEL和IMP是如何存储的呢?
LLDB调试不符合开发习惯
,能不能通过源代码探索?
代码还原探索cache_t
@interface GyPerson:NSObject
-(void)testfunction1;
-(void)testfunction2;
-(void)testfunction3;
-(void)testfunction4;
-(void)testfunction5;
@end
@implementation GyPerson
-(void)testfunction1{}
-(void)testfunction2{}
-(void)testfunction3{}
-(void)testfunction4{}
-(void)testfunction5{}
@end
typedef uint32_t mask_t;
struct gy_bucket_t{
SEL _sel;
IMP _imp;
};
struct gy_cache_t{
struct gy_bucket_t * _buckets;
mask_t _maybeMask;
uint16_t _flags;
uint16_t _occupied;
};
struct gy_class_data_bits_t{
uintptr_t bits;//8字节
};
struct gy_objc_class{
Class isa;
Class superclass;
struct gy_cache_t cache; // formerly cache pointer and vtable
struct gy_class_data_bits_t bits;
};
int main(int argc, const char * argv[]) {
@autoreleasepool {
GyPerson *p = [GyPerson alloc];
[p testfunction1];
// [p testfunction2];
// [p testfunction3];
// [p testfunction4];
// [p testfunction5];
Class gyclass=p.class;
struct gy_objc_class * g_class=(__bridge struct gy_objc_class *)gyclass;
NSLog(@"%hu-%u",g_class->cache._occupied,g_class->cache._maybeMask);
for(mask_t i=0;i<g_class->cache._maybeMask;i++){
struct gy_bucket_t bucket=g_class->cache._buckets[i];
NSLog(@"%@ - %p",NSStringFromSelector(bucket._sel),bucket._imp);
}
}
return 0;
}
复制代码
输出如下:
1-3
(null) - 0x0
(null) - 0x0
testfunction1 - 0xbe10
复制代码
上面的代码把注释掉的方法testfunction2、testfunction3、testfunction4、testfunction5打开,输出如下:
3-7
(null) - 0x0
(null) - 0x0
testfunction5 - 0xbe20
(null) - 0x0
testfunction4 - 0xbed0
(null) - 0x0
testfunction3 - 0xbec0
复制代码
分析:
- 要获取cache_t,就要
构造cache_t结构体
。通过参考源码构造objc_class结构体,即objc_class
结构体含有isa、superclass、cache、bits
成员变量。cache、bit结构体同样参考源码构造。 - 随着方法的追加,
_maybeMask
和_occupied的
值都发生了改变,说明maybeMask和occupied与缓存方法的个数有关。 - 追加方法后打印buckets中SEL,发现
testfunction1不见了
,也没有testfunction2?testfunction5方法的前面为什么有两个空的SEL
?
通过上面的分析感觉还是乱乱的,只知道cache_t中确实缓存了方法,但是缓存的规则是怎样的还是很懵逼,那就继续探究一下缓存是怎么插入的。试着全局搜索一下insert,果然有相关方法。
insert
void cache_t::insert(SEL sel, IMP imp, id receiver)
{
runtimeLock.assertLocked();
// Never cache before +initialize is done
if (slowpath(!cls()->isInitialized())) {
return;
}
if (isConstantOptimizedCache()) {
_objc_fatal("cache_t::insert() called with a preoptimized cache for %s",
cls()->nameForLogging());
}
#if DEBUG_TASK_THREADS
return _collecting_in_critical();
#else
#if CONFIG_USE_CACHE_LOCK
mutex_locker_t lock(cacheUpdateLock);
#endif
ASSERT(sel != 0 && cls()->isInitialized());
//第一次进来occupied=0,newOccupied=1
mask_t newOccupied = occupied() + 1;
//首次capacity()为0,即oldCapacity=0,capacity=0
unsigned oldCapacity = capacity(), capacity = oldCapacity;
if (slowpath(isConstantEmptyCache())) {//缓存为空,第一次进入时
// Cache is read-only. Replace it.
if (!capacity) capacity = INIT_CACHE_SIZE;//capacity=1左移2,即2^2=4,capacity=4
reallocate(oldCapacity, capacity, /* freeOld */false);//开辟缓存空间,oldCapacity=0,capacity=4,freeOld是否释放旧内存
}
else if (fastpath(newOccupied + CACHE_END_MARKER <= cache_fill_ratio(capacity))) {//newOccupied+1<=capacity * 3 / 4
// Cache is less than 3/4 or 7/8 full. Use it as-is.
}
#if CACHE_ALLOW_FULL_UTILIZATION
//CACHE_END_MARKER真机是0 非真机是1 capacity<=8&&newOccupied+1<=capacity
else if (capacity <= FULL_UTILIZATION_CACHE_SIZE && newOccupied + CACHE_END_MARKER <= capacity) {
// Allow 100% cache utilization for small buckets. Use it as-is.
//如果允许存满
}
#endif
else {
//真机INIT_CACHE_SIZE=4 非真机INIT_CACHE_SIZE=2
//capacity有值capacity扩容两倍,否则capacity=INIT_CACHE_SIZE
capacity = capacity ? capacity * 2 : INIT_CACHE_SIZE;
if (capacity > MAX_CACHE_SIZE) {//如果capacity大于2^15 capacity=2^15
capacity = MAX_CACHE_SIZE;
}
reallocate(oldCapacity, capacity, true);
}
//bucket插入规则
bucket_t *b = buckets();
mask_t m = capacity - 1;
mask_t begin = cache_hash(sel, m);
mask_t i = begin;
do {
if (fastpath(b[i].sel() == 0)) {
incrementOccupied();
b[i].set<Atomic, Encoded>(b, sel, imp, cls());//插入sel IMP到指定位置
return;
}
if (b[i].sel() == sel) {
// The entry was added to the cache by some other thread
// before we grabbed the cacheUpdateLock.
return;
}
} while (fastpath((i = cache_next(i, m)) != begin));
bad_cache(receiver, (SEL)sel);
#endif // !DEBUG_TASK_THREADS
}
复制代码
分析:
- 缓存容量的开辟分四种情况
第一次插入SEL入缓存时
,capacity=4,oldCapacity=0,调用reallocate()开辟内存。实际上是调用setBucketsAndMask()开辟了capacity-1
个bucket大小的内存
,下面会分析。newOccupied+1<=capacity * 3 / 4
。比如第二次插入SEL入缓存,capacity=4,occupied=1,newOccupied=2,newOccupied+1<=capacity * 3/4,那么还是开辟3个bucket大小的内存;第三次插入SEL入缓存,capacity=4,occupied=2,newOccupied=3,newOccupied+1>capacity * 3 / 4,不走这个流程。允许存满时
,CACHE_ALLOW_FULL_UTILIZATION=1,CACHE_ALLOW_FULL_UTILIZATION变量为1时代表允许存满,比如开辟了3个bucket大小的内存,正好存满。capacity * 2 或 INIT_CACHE_SIZE扩容
。当newOccupied+1>capacity * 3/4时需要扩容,capacity有值时扩容2倍,没有值时扩容INIT_CACHE_SIZE,真机INIT_CACHE_SIZE=4,非真机INIT_CACHE_SIZE=2。调用reallocate()开辟内存。比如macos下第一次扩容,capacity=4*2=8。实际上是调用setBucketsAndMask()开辟了capacity-1个bucket大小的内存。
bucket位置
下标的计算是通过cache_hash
计算得出,然后set()
方法设置,下面会详细分析。
reallocate()
void cache_t::reallocate(mask_t oldCapacity, mask_t newCapacity, bool freeOld)
{
bucket_t *oldBuckets = buckets();//获取oldBuckets首地址
bucket_t *newBuckets = allocateBuckets(newCapacity);//获取newBuckets首地址
ASSERT(newCapacity > 0);
ASSERT((uintptr_t)(mask_t)(newCapacity-1) == newCapacity-1);
setBucketsAndMask(newBuckets, newCapacity - 1);//设置bucket和mask,bucket存的是newBuckets首地址,mask存的是newCapacity - 1
if (freeOld) {//第一次插入时freeOld=false,扩容freeOld=true释放旧内存
collect_free(oldBuckets, oldCapacity);
}
}
复制代码
分析:
allocateBuckets开辟内存
,内存大小newCapacity * bucket_t
setBucketsAndMask
设置mask和buckets
的值,mask=newCapacity-1
,比如第一次插入方法,newCapacity=4,mask即为3。扩容时
freeOld=true,释放旧的buckets
。比如我们上面写测试方法,插入testfunction1和testfunction2并没有达到扩容条件,当插入testfunction3时,达到扩容条件,释放了旧的的bucket。- _maybeMask即
mask的值
为bucket的个数
。
cache_hash、cache_next、bucket_t::set
//mask=capacity-1
static inline mask_t cache_hash(SEL sel, mask_t mask)
{
uintptr_t value = (uintptr_t)sel;
#if CONFIG_USE_PREOPT_CACHES //CONFIG_USE_PREOPT_CACHES=1 真机
value ^= value >> 7;//value=value^value右移动7位
#endif
return (mask_t)(value & mask);//value与mask与运算
}
#if CACHE_END_MARKER //非真机
static inline mask_t cache_next(mask_t i, mask_t mask) {
return (i+1) & mask;
}
#elif __arm64__ //真机
static inline mask_t cache_next(mask_t i, mask_t mask) {
return i ? i-1 : mask;
}
void bucket_t::set(bucket_t *base, SEL newSel, IMP newImp, Class cls)
{
ASSERT(_sel.load(memory_order_relaxed) == 0 ||
_sel.load(memory_order_relaxed) == newSel);
static_assert(offsetof(bucket_t,_imp) == 0 &&
offsetof(bucket_t,_sel) == sizeof(void *),
"bucket_t layout doesn't match arm64 bucket_t::set()");
uintptr_t encodedImp = (impEncoding == Encoded
? encodeImp(base, newImp, newSel, cls)
: (uintptr_t)newImp);
stp(encodedImp, (uintptr_t)newSel, this);
}
复制代码
分析:
cache_hash
主要是生成hash下标
,cache_next
主要是解决hash冲突
encodeImp
方法会对imp进行编码
(uintptr_t)newImp ^ (uintptr_t)cls即异或运算
。cls有值
imp进行编码
,cls没有值
imp相当于没编码
。再异或即解码
。
总结:
cache_t
成员变量buckets
中缓存着SEL与IMP
bucket
缓存个数即mask个数
bucket
开辟内存空间以3/4
为界限,首次开辟3个bucket内存大小。大于3/4时需要扩容,真机扩容2倍,非真机扩容8,最大扩容2的15次方
。bucket
存储是通过哈希计算下标方式存储
,所以存储顺序是随机的。