Photo by Yang Shuo on Unsplash
前段时间,在实现“快速打开文件夹”时,我不加思索地采用大众方案(将 ),并轻松入坑?。直到测试超大文件夹(文件数50000)时,发现耗时超过2分钟,远远达不到快速这个要求,经过一段时间的折腾之后,才找到原因,并写了这篇文章。File#listFiles
列出的所有文件转换成List<Info>
供RecyclerView使用,Info中封装了路径、文件大小、修改时间等信息
源码分析
File#listFiles
分析
分析执行逻辑,并没有发现明显的耗时操作,于是进一步分析list
方法。
File#listFiles
调用list
扫描出所有文件名- 然后创建对应的File对象,并保存。
public File[] listFiles() {
String[] ss = list();
if (ss == null) return null;
int n = ss.length;
File[] fs = new File[n];
for (int i = 0; i < n; i++) {
fs[i] = new File(ss[i], this);
}
return fs;
}
复制代码
File#list
分析
Java层逻辑
Android 7.0是一个分水岭,7.0之前直接调用listImpl()
方法,进入native层;7.0及其以后版本不在直接调用listImpl()
方法,由FileSystem的子类执行list操作,进入native层。
public String[] list() {
return listImpl(path);
}
private static native String[] listImpl(String path);
--------------------------------------------------7.0--------------------------------------------------
//源码位于File中
public String[] list() {
SecurityManager security = System.getSecurityManager();
if (security != null) {
security.checkRead(path);
}
if (isInvalid()) {
return null;
}
return fs.list(this);
}
//源码位于UnixFileSystem#list中
public String[] list(File f) {
BlockGuard.getThreadPolicy().onReadFromDisk();
BlockGuard.getVmPolicy().onPathAccess(f.getPath());
return list0(f);
}
private native String[] list0(File f);
复制代码
Native层分析
listIml()
,位于/libcore/luni/src/main/native/java_io_File.cpp(跳转),实现逻辑如下:
- 不断调用
readdir
(老版本使用readdir_r,原因见此处)读取目录,直到读到目录结尾,即返回值为null停止。 std::vector<std::string> DirEntries
来保存文件名,扩容交给数据结构vector
。(耗时主要是vector扩容时进行数据拷贝)。- 最后将vector中的string数据转换为String[]并返回。
// Iterates over the filenames in the given directory.
class ScopedReaddir {
public:
ScopedReaddir(const char* path) {
mDirStream = opendir(path);
mIsBad = (mDirStream == NULL);
}
~ScopedReaddir() {
if (mDirStream != NULL) {
closedir(mDirStream);
}
}
// Returns the next filename, or NULL.
const char* next() {
if (mIsBad) {
return NULL;
}
errno = 0;
dirent* result = readdir(mDirStream);
if (result != NULL) {
return result->d_name;
}
if (errno != 0) {
mIsBad = true;
}
return NULL;
}
// Has an error occurred on this stream?
bool isBad() const {
return mIsBad;
}
private:
DIR* mDirStream;
bool mIsBad;
// Disallow copy and assignment.
ScopedReaddir(const ScopedReaddir&);
void operator=(const ScopedReaddir&);
};
typedef std::vector<std::string> DirEntries;
// Reads the directory referred to by 'pathBytes', adding each directory entry
// to 'entries'.
static bool readDirectory(JNIEnv* env, jstring javaPath, DirEntries& entries) {
ScopedUtfChars path(env, javaPath);
if (path.c_str() == NULL) {
return false;
}
ScopedReaddir dir(path.c_str());
const char* filename;
while ((filename = dir.next()) != NULL) {
if (strcmp(filename, ".") != 0 && strcmp(filename, "..") != 0) {
// TODO: this hides allocation failures from us. Push directory iteration up into Java?
entries.push_back(filename);
}
}
return !dir.isBad();
}
static jobjectArray File_listImpl(JNIEnv* env, jclass, jstring javaPath) {
// Read the directory entries into an intermediate form.
DirEntries entries;
if (!readDirectory(env, javaPath, entries)) {
return NULL;
}
// Translate the intermediate form into a Java String[].
return toStringArray(env, entries);
}
复制代码
list0()
,位于libcore/ojluni/src/main/native/UnixFileSystem_md.c中(跳转)。实现逻辑如下:
- 不断调用
readdir64
读取目录,直到读到目录结尾,即返回值为null停止 - 初始化一个长度为16的String数组(这里存放的Java数据类型的String),确定当前读取的文件数是否超过maxLen,如果超过就创建新数组,长度为(
maxlen <<= 1
),然后使用JNU_CopyObjectArray
方法,将旧数据拷贝到新数组中,并删除旧数据;没有超过maxlen就往里存数据。(这里的耗时主要来源于C调用Java方法进行数据拷贝) - 最后重新创建一个长度为len的String数组,并将原始的数据拷贝到新数组中,减少内存。
// Android-changed: Name changed because of added thread policy check
JNIEXPORT jobjectArray JNICALL
Java_java_io_UnixFileSystem_list0(JNIEnv *env, jobject this,
jobject file)
{
DIR *dir = NULL;
struct dirent64 *ptr;
// Android-removed: Integrate OpenJDK 12 commit to use readdir, not readdir_r. b/64362645
// struct dirent64 *result;
int len, maxlen;
jobjectArray rv, old;
jclass str_class;
str_class = JNU_ClassString(env);
CHECK_NULL_RETURN(str_class, NULL);
WITH_FIELD_PLATFORM_STRING(env, file, ids.path, path) {
dir = opendir(path);
} END_PLATFORM_STRING(env, path);
if (dir == NULL) return NULL;
// Android-removed: Integrate OpenJDK 12 commit to use readdir, not readdir_r. b/64362645
/*
ptr = malloc(sizeof(struct dirent64) + (PATH_MAX + 1));
if (ptr == NULL) {
JNU_ThrowOutOfMemoryError(env, "heap allocation failed");
closedir(dir);
return NULL;
}
*/
/* Allocate an initial String array */
len = 0;
maxlen = 16;
rv = (*env)->NewObjectArray(env, maxlen, str_class, NULL);
if (rv == NULL) goto error;
/* Scan the directory */
// Android-changed: Integrate OpenJDK 12 commit to use readdir, not readdir_r. b/64362645
// while ((readdir64_r(dir, ptr, &result) == 0) && (result != NULL)) {
while ((ptr = readdir64(dir)) != NULL) {
jstring name;
if (!strcmp(ptr->d_name, ".") || !strcmp(ptr->d_name, ".."))
continue;
if (len == maxlen) {
old = rv;
rv = (*env)->NewObjectArray(env, maxlen <<= 1, str_class, NULL);
if (rv == NULL) goto error;
if (JNU_CopyObjectArray(env, rv, old, len) < 0) goto error;
(*env)->DeleteLocalRef(env, old);
}
#ifdef MACOSX
name = newStringPlatform(env, ptr->d_name);
#else
name = JNU_NewStringPlatform(env, ptr->d_name);
#endif
if (name == NULL) goto error;
(*env)->SetObjectArrayElement(env, rv, len++, name);
(*env)->DeleteLocalRef(env, name);
}
closedir(dir);
// Android-removed: Integrate OpenJDK 12 commit to use readdir, not readdir_r. b/64362645
// free(ptr);
/* Copy the final results into an appropriately-sized array */
old = rv;
rv = (*env)->NewObjectArray(env, len, str_class, NULL);
if (rv == NULL) {
return NULL;
}
if (JNU_CopyObjectArray(env, rv, old, len) < 0) {
return NULL;
}
return rv;
error:
closedir(dir);
// Android-removed: Integrate OpenJDK 12 commit to use readdir, not readdir_r. b/64362645
// free(ptr);
return NULL;
}
复制代码
优化源码
经过源码分析,发现两个版本都进行了扩容+拷贝操作,于是使用链表进行优化,经过测试,虽然时间确实有所降低,没有达到理想效果。
using InfoList = struct Info {
char *d_name = nullptr;
Info *next = nullptr;
};
static JNIEXPORT jobjectArray JNICALL
nativeList(JNIEnv *env, jclass clazz, jstring jpath) {
const char *path = env->GetStringUTFChars(jpath, nullptr);
DIR *dir;
struct dirent *ent;
InfoList *head = nullptr;
InfoList *temp;
InfoList *current = nullptr;
auto size = 0;
if ((dir = opendir(path)) != nullptr) {
while ((ent = readdir(dir)) != nullptr) {
int length = ent->d_reclen;
if (strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0) {
continue;
}
if (head == nullptr) {
head = static_cast<InfoList *>(malloc(sizeof(InfoList)));
head->d_name = static_cast<char *>(malloc((length + 1) * sizeof(char)));
memcpy(head->d_name, ent->d_name, length);
head->d_name[length] = '\0';
current = head;
} else {
temp = static_cast<InfoList *>(malloc(sizeof(InfoList)));
temp->d_name = static_cast<char *>(malloc((length + 1) * sizeof(char)));
memcpy(temp->d_name, ent->d_name, length);
temp->d_name[length] = '\0';
current->next = temp;
current = temp;
}
size++;
}
closedir(dir);
if (size > 0) {
jobjectArray result = env->NewObjectArray(size, env->FindClass("java/lang/String"),
nullptr);
for (int i = 0; i < size; i++) {
jstring tempStr = env->NewStringUTF(head->d_name);
env->SetObjectArrayElement(result, i, tempStr);
env->DeleteLocalRef(tempStr);
temp = head;
head = head->next;
temp->next = nullptr;
if (temp->d_name) {
free(temp->d_name);
temp->d_name = nullptr;
}
free(temp);
temp = nullptr;
}
return result;
}
}
return nullptr;
}
复制代码
总结
又经过一段时间的奇葩操作,最终找到导致耗时剧增的元凶 stat()/stat64()
(一个用于获取文件属性的方法)。当文件仅执行 list
操作时,对于50000个文件来说,仅要十几秒就能完成,当要一并获取文件大小,修改时间等文件属性封装成Info时,不忍直视。最后采用的方案是Info中仅封装路径信息,当RecyclerView$Adapter#onBindViewHolder
时,再通过路径转换成File,刷新UI;当然你也可以进一步优化 nativeList
方法,使其支持分页的方式获取文件名,进一步减少扫描目录的耗时。
遗留问题
前面已经分析了File#list
方法在6.0之后版本差异,为什么7.0之后优化成后者?明显在native中进行数据的扩容拷贝速度效率高于Native层调用Java层进行数据的扩容和拷贝,于是将低版本类似的实现方式与 File#list
在高版本手机中进行比较,结果是新版的效率高,也可能是数据量不够的问题,欢迎指正。下面是实现代码:
static JNIEXPORT jobjectArray JNICALL
nativeList64(JNIEnv *env, jclass clazz, jstring jpath) {
const char *path = env->GetStringUTFChars(jpath, nullptr);
DIR *dir = nullptr;
struct dirent64 *ent;
std::vector<std::string> data;
if ((dir = opendir(path)) != nullptr) {
while ((ent = readdir64(dir)) != nullptr) {
if (strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0) {
continue;
}
data.emplace_back(ent->d_name);
}
closedir(dir);
int size = data.size();
if (size > 0) {
jobjectArray result = env->NewObjectArray(size, env->FindClass("java/lang/String"),
nullptr);
for (int i = 0; i < size; i++) {
jstring temp = env->NewStringUTF(data.at(i).c_str());
env->SetObjectArrayElement(result, i, temp);
env->DeleteLocalRef(temp);
}
std::vector<std::string>().swap(data);
return result;
}
}
return nullptr;
}
复制代码
顺便提一下,希望好心人帮忙内推一下(疫情+创业导致白白浪费了两年时间,?),floatwind@foxmail.com,感谢!