未验证 提交 002647fc 编写于 作者: I imhameed 提交者: GitHub

[mono] Decompress ICU data during iOS/tvOS app startup (#64967)

This change adds support for decompressing ICU data files using the lzfse
decompressor built into Apple OSes.

If the data file path passed to `GlobalizationNative_LoadICUData` does not have
`.lzfse` as a suffix, then the contents of this file will be mapped into
shareable memory and passed directly to ICU.

Otherwise, the contents are decompressed using a fixed-size working buffer and
are stored inside a cache file with a name that contains a decimal encoded
representation of the originating compressed file's inode number, modification
time, and size. This filesystem metadata is extremely likely to change if the
contents of the source file ever changes, so there's no need to compute a
checksum of the data to determine if the cache is still valid. If a cache file
with an appropriate name is already present then it is mapped into shareable
memory and passed to ICU. Stale cache files (defined here to be any file with a
filename that ends with "-icudt.dat.uncompressed" that doesn't exactly match
the desired cache file name) are purged on startup.

`icudt.dat` for mobile is 2126 KiB right now; when compressed with lzfse it
shrinks to 675 KiB. On an iPhone SE 1st gen, this takes 4ms to decompress.

The "decompression framework" also supports lz4, zlib, and lzma. They are all
worse either in decompression time (lzma especially + zlib) or compression
ratio (lz4):

| Algorithm | icudt.dat compressed size | decompression time |
| --------- | ------------------------- | ------------------ |
| lz4       | 1031 KiB                  | 2.41 ms            |
| lzfse     | 675 KiB                   | 4.20 ms            |
| zlib      | 659 KiB                   | 9.61 ms            |
| lzma      | 427 KiB                   | 49.20 ms           |

I am not comfortable adding 50ms to app startup time. On this same
iPhone SE 1st gen, the "Contacts" app takes 166.7ms to display app-generated
pixels. This is end-to-end latency, from the first indication that iOS has
recognized my finger tap, to the display displaying any non-placeholder
content. This was measured with with a 240fps camera. A barebones
xamarin-macios app takes 125ms to display a "Hello world!" message.

This uses the filename's suffix to control decompression for simplicity, but it
would also be possible to instead frame compressed data with a very simple
header consisting of a long-enough magic number and a decompressed payload size
that can be used as a buffer sizing hint to the decompression loop.

Miscellany:
- On iOS, the cache-directory appears to be app-specific. Example: `/var/mobile/Containers/Data/Application/0C22E0D1-26CD-46CB-9EBC-6CF55B513ED1/Library/Caches/`.
- https://www.usenix.org/system/files/conference/osdi14/osdi14-paper-pillai.pdf
上级 2dd232a5
......@@ -12,13 +12,13 @@ include_directories(
${PROJECT_SOURCE_DIR}/../sgen)
if(HOST_DARWIN)
set(OS_LIBS "-framework CoreFoundation" "-framework Foundation")
set(OS_LIBS "-framework CoreFoundation" "-framework Foundation" "-lcompression")
if(CMAKE_SYSTEM_VARIANT STREQUAL "MacCatalyst")
set(OS_LIBS ${OS_LIBS} "-lobjc" "-lc++")
endif()
elseif(HOST_IOS)
set(OS_LIBS "-framework CoreFoundation" "-lobjc" "-lc++")
set(OS_LIBS "-framework CoreFoundation" "-lcompression" "-lobjc" "-lc++")
elseif(HOST_ANDROID)
set(OS_LIBS m dl log)
elseif(HOST_LINUX)
......
......@@ -19,6 +19,20 @@
#define strncasecmp _strnicmp
#endif
#if defined(TARGET_IOS) || defined(TARGET_OSX) || defined(TARGET_WATCHOS) || defined(TARGET_TVOS)
#define USE_APPLE_DECOMPRESSION
#include <compression.h> // compression_stream_init, compression_stream_process, compression_stream_destroy
#include <dirent.h> // fdopendir, readdir, closedir
#include <fcntl.h> // open
#include <inttypes.h> // PRIu64
#include <limits.h> // PATH_MAX
#include <sys/errno.h> // errno
#include <sys/mman.h> // mmap, munmap
#include <sys/stat.h> // fstat
#include <sysdir.h> // sysdir_start_search_path_enumeration, sysdir_get_next_search_path_enumeration
#include <unistd.h> // write
#endif
static int32_t isLoaded = 0;
static int32_t isDataSet = 0;
......@@ -28,13 +42,14 @@ static void log_shim_error(const char* format, ...)
va_start(args, format);
vfprintf(stderr, format, args);
fputc('\n', stderr);
va_end(args);
}
static void log_icu_error(const char* name, UErrorCode status)
{
const char * statusText = u_errorName(status);
log_shim_error("ICU call %s failed with error #%d '%s'.\n", name, status, statusText);
log_shim_error("ICU call %s failed with error #%d '%s'.", name, status, statusText);
}
static void U_CALLCONV icu_trace_data(const void* context, int32_t fnNumber, int32_t level, const char* fmt, va_list args)
......@@ -47,7 +62,7 @@ static void U_CALLCONV icu_trace_data(const void* context, int32_t fnNumber, int
#ifdef __EMSCRIPTEN__
#include <emscripten.h>
static int32_t load_icu_data(void* pData);
static int32_t load_icu_data(const void* pData);
EMSCRIPTEN_KEEPALIVE const char* mono_wasm_get_icudt_name(const char* culture);
......@@ -56,9 +71,9 @@ EMSCRIPTEN_KEEPALIVE const char* mono_wasm_get_icudt_name(const char* culture)
return GlobalizationNative_GetICUDTName(culture);
}
EMSCRIPTEN_KEEPALIVE int32_t mono_wasm_load_icu_data(void* pData);
EMSCRIPTEN_KEEPALIVE int32_t mono_wasm_load_icu_data(const void* pData);
EMSCRIPTEN_KEEPALIVE int32_t mono_wasm_load_icu_data(void* pData)
EMSCRIPTEN_KEEPALIVE int32_t mono_wasm_load_icu_data(const void* pData)
{
return load_icu_data(pData);
}
......@@ -77,16 +92,19 @@ void mono_wasm_link_icu_shim(void)
#endif
static int32_t load_icu_data(void* pData)
static int32_t load_icu_data(const void* pData)
{
UErrorCode status = 0;
udata_setCommonData(pData, &status);
if (U_FAILURE(status)) {
if (U_FAILURE(status))
{
log_icu_error("udata_setCommonData", status);
return 0;
} else {
}
else
{
#if defined(ICU_TRACING)
// see https://github.com/unicode-org/icu/blob/master/docs/userguide/icu_data/tracing.md
......@@ -98,57 +116,356 @@ static int32_t load_icu_data(void* pData)
}
}
int32_t GlobalizationNative_LoadICUData(const char* path)
#if defined(USE_APPLE_DECOMPRESSION)
static int
apple_cache_directory(char path[static (PATH_MAX + 1)])
{
char buf[PATH_MAX + 1];
sysdir_search_path_enumeration_state st = sysdir_start_search_path_enumeration(SYSDIR_DIRECTORY_CACHES, SYSDIR_DOMAIN_MASK_USER);
st = sysdir_get_next_search_path_enumeration(st, buf);
if (!st)
{
log_shim_error("apple_cache_directory: sysdir_get_next_search_path_enumeration did not yield a path");
return -1;
}
size_t path_len = strlen(buf);
size_t ret_len = path_len + 2 /* '/' + '\0' */;
const char *home = "";
size_t home_len = 0;
if (buf[0] == '~' && buf[1] == '/') {
home = getenv("HOME");
if (home == NULL)
{
log_shim_error("apple_cache_directory: cache directory begins with ~ but $HOME is not set");
return -1;
}
home_len = strlen(home);
ret_len += home_len + 1;
}
snprintf(path, PATH_MAX, "%s%s", home, &buf[1]);
return 0;
}
#define APPLE_DECOMPRESSION_BUF_SIZE 16384
static int
apple_decompress_to_fd(int dst_fd, size_t *dst_len, const char *src_buf, size_t src_len)
{
int cs_init = 0;
compression_stream cs = { 0 };
uint8_t buf[APPLE_DECOMPRESSION_BUF_SIZE];
size_t dst_size = 0;
compression_status status = compression_stream_init(&cs, COMPRESSION_STREAM_DECODE, COMPRESSION_LZFSE);
if (status == COMPRESSION_STATUS_ERROR)
{
log_shim_error("apple_decompress_to_fd: Failed to initialize decompression stream.");
goto error;
}
cs_init = 1;
cs.src_ptr = (const uint8_t *) src_buf;
cs.src_size = src_len;
cs.dst_ptr = buf;
cs.dst_size = APPLE_DECOMPRESSION_BUF_SIZE;
int flags = 0;
while (status == COMPRESSION_STATUS_OK)
{
status = compression_stream_process(&cs, flags);
if (status == COMPRESSION_STATUS_ERROR)
{
log_shim_error("apple_decompress_to_fd: Error while decompressing.");
goto error;
}
size_t bytes_to_write = APPLE_DECOMPRESSION_BUF_SIZE - cs.dst_size;
uint8_t *write_cursor = buf;
dst_size += bytes_to_write;
while (bytes_to_write > 0) {
ssize_t bytes_written = write(dst_fd, write_cursor, bytes_to_write);
int last_error = errno;
if (bytes_written == -1)
{
if (last_error == EINTR) continue;
log_shim_error("apple_decompress_to_fd: Error during write().");
goto error;
}
else if (bytes_written == 0)
{
log_shim_error("apple_decompress_to_fd: write() returned 0.");
goto error;
}
bytes_to_write -= (size_t) bytes_written;
write_cursor += bytes_written;
}
cs.dst_ptr = buf;
cs.dst_size = APPLE_DECOMPRESSION_BUF_SIZE;
if (cs.src_size == 0)
{
flags = COMPRESSION_STREAM_FINALIZE;
}
}
compression_stream_destroy(&cs);
if (dst_len != NULL)
{
*dst_len = dst_size;
}
return 1;
error:
if (cs_init)
{
compression_stream_destroy(&cs);
}
return 0;
}
static int
apple_clean_stale_cache_files(DIR *d, const char *current_cache_filename)
{
int32_t ret = -1;
char* icu_data;
int dir_fd = dirfd(d);
struct dirent *de = NULL;
rewinddir(d);
while ((de = readdir(d)) != NULL)
{
const char *fn = de->d_name;
if (strcmp(fn, current_cache_filename) != 0)
{
if (strstr(fn, "-icudt.dat.decompressed") != NULL)
{
unlinkat(dir_fd, fn, 0);
}
}
}
return 0;
}
#define APPLE_TMPFILE_NAME_SIZE 128
static const char *
apple_mmap_icu_data(const char *path)
{
const char *ret = NULL;
DIR *dirp = NULL;
int cache_fd = -1;
int src_fd = open(path, O_RDONLY | O_CLOEXEC);
if (src_fd == -1)
{
log_shim_error("apple_mmap_icu_data: failed to open %s", path);
goto error;
}
struct stat src_st;
int result = fstat(src_fd, &src_st);
if (result == -1)
{
log_shim_error("apple_mmap_icu_data: failed to fstat %s", path);
goto error;
}
char tmp_file[APPLE_TMPFILE_NAME_SIZE];
size_t src_size = src_st.st_size;
size_t icu_data_size = 0;
const char *last_period = strrchr(path, '.');
const char *icu_data_name = NULL;
if (last_period && strcmp(last_period, ".lzfse") == 0)
{
char *cache_file = tmp_file;
uint64_t pid = (uint64_t) getpid();
int written = snprintf(tmp_file, APPLE_TMPFILE_NAME_SIZE, "%" PRIu64 ".", pid);
if (written < 0)
{
log_shim_error("apple_mmap_icu_data: failed to generate tmpfile PID prefix");
goto error;
}
cache_file += written;
written = snprintf(cache_file, APPLE_TMPFILE_NAME_SIZE - written, "%" PRIu64 "-%" PRIu64 "-%" PRIu64 "-icudt.dat.decompressed",
(uint64_t) src_st.st_ino,
(uint64_t) src_st.st_size,
(uint64_t) src_st.st_mtimespec.tv_sec);
if (written < 0)
{
log_shim_error("apple_mmap_icu_data: failed to generate cache file name");
goto error;
}
char cache_dir[PATH_MAX + 1];
result = apple_cache_directory(cache_dir);
if (result == -1)
{
goto error;
}
int dir_fd = open(cache_dir, O_DIRECTORY | O_RDONLY | O_CLOEXEC);
if (dir_fd == -1)
{
log_shim_error("apple_mmap_icu_data: failed to open directory %s", cache_dir);
goto error;
}
dirp = fdopendir(dir_fd);
apple_clean_stale_cache_files(dirp, cache_file);
cache_fd = openat(dir_fd, cache_file, O_RDONLY | O_CLOEXEC);
if (cache_fd == -1)
{
cache_fd = openat(dir_fd, tmp_file, O_RDWR | O_CREAT, 0640);
if (cache_fd == -1)
{
log_shim_error("apple_mmap_icu_data: failed to open %s/%s for writing", cache_dir, tmp_file);
goto error;
}
const char *src_mem = mmap(NULL, src_size, PROT_READ, MAP_SHARED, src_fd, 0);
if (src_mem == MAP_FAILED)
{
log_shim_error("apple_mmap_icu_data: failed to map %s with size %zu", path, src_size);
goto error;
}
size_t decompressed_data_size = 0;
int result = apple_decompress_to_fd(cache_fd, &decompressed_data_size, src_mem, src_st.st_size);
munmap((void *) src_mem, src_st.st_size);
fsync(cache_fd);
if (!result)
{
goto error;
}
result = renameat(dir_fd, tmp_file, dir_fd, cache_file);
fsync(dir_fd);
if (result == -1)
{
log_shim_error("apple_mmap_icu_data: failed to rename %s to %s", tmp_file, cache_file);
goto error;
}
}
close(src_fd);
src_fd = -1;
struct stat cache_st;
result = fstat(cache_fd, &cache_st);
if (result == -1)
{
log_shim_error("apple_mmap_icu_data: failed to fstat %s", cache_file);
goto error;
}
icu_data_size = cache_st.st_size;
icu_data_name = cache_file;
}
else
{
cache_fd = src_fd;
src_fd = -1;
icu_data_size = src_size;
icu_data_name = path;
}
const char *cache_mem = mmap(NULL, icu_data_size, PROT_READ, MAP_SHARED, cache_fd, 0);
if (cache_mem == MAP_FAILED)
{
log_shim_error("apple_mmap_icu_data: failed to map %s with size %zu", icu_data_name, icu_data_size);
goto error;
}
ret = cache_mem;
error:
if (dirp != 0)
{
closedir(dirp);
}
if (src_fd >= 0)
{
close(src_fd);
}
if (cache_fd >= 0)
{
close(cache_fd);
}
return ret;
}
#else
static const char *
cstdlib_load_icu_data(const char *path)
{
char *file_buf = NULL;
FILE *fp = fopen(path, "rb");
if (fp == NULL) {
if (fp == NULL)
{
log_shim_error("Unable to load ICU dat file '%s'.", path);
return ret;
goto error;
}
if (fseek(fp, 0L, SEEK_END) != 0) {
fclose(fp);
if (fseek(fp, 0L, SEEK_END) != 0)
{
log_shim_error("Unable to determine size of the dat file");
return ret;
goto error;
}
long bufsize = ftell(fp);
long file_buf_size = ftell(fp);
if (bufsize == -1) {
fclose(fp);
if (file_buf_size == -1)
{
log_shim_error("Unable to determine size of the ICU dat file.");
return ret;
goto error;
}
icu_data = malloc(sizeof(char) * (bufsize + 1));
file_buf = malloc(sizeof(char) * (file_buf_size + 1));
if (icu_data == NULL) {
fclose(fp);
if (file_buf == NULL)
{
log_shim_error("Unable to allocate enough to read the ICU dat file");
return ret;
goto error;
}
if (fseek(fp, 0L, SEEK_SET) != 0) {
fclose(fp);
if (fseek(fp, 0L, SEEK_SET) != 0)
{
log_shim_error("Unable to seek ICU dat file.");
return ret;
goto error;
}
fread(icu_data, sizeof(char), bufsize, fp);
if (ferror( fp ) != 0 ) {
fclose(fp);
fread(file_buf, sizeof(char), file_buf_size, fp);
if (ferror( fp ) != 0)
{
log_shim_error("Unable to read ICU dat file");
return ret;
goto error;
}
fclose(fp);
fp = NULL;
return file_buf;
error:
if (fp != NULL)
{
fclose(fp);
}
if (file_buf != NULL)
{
free(file_buf);
}
return NULL;
}
#endif
int32_t
GlobalizationNative_LoadICUData(const char* path)
{
const char *icu_data =
#if defined(USE_APPLE_DECOMPRESSION)
apple_mmap_icu_data(path)
#else
cstdlib_load_icu_data(path)
#endif
;
if (icu_data == NULL)
{
log_shim_error("Failed to load ICU data.");
return -1;
}
if (load_icu_data(icu_data) == 0) {
log_shim_error("ICU BAD EXIT %d.", ret);
return ret;
if (load_icu_data(icu_data) == 0)
{
log_shim_error("ICU BAD EXIT.");
return -1;
}
return GlobalizationNative_LoadICU();
......@@ -163,8 +480,8 @@ const char* GlobalizationNative_GetICUDTName(const char* culture)
return "icudt.dat";
// CJK: starts with "ja", "ko" or "zh"
if (!strncasecmp("ja", culture, 2) ||
!strncasecmp("ko", culture, 2) ||
if (!strncasecmp("ja", culture, 2) ||
!strncasecmp("ko", culture, 2) ||
!strncasecmp("zh", culture, 2))
return "icudt_CJK.dat"; // contains "en" as well.
......@@ -185,7 +502,8 @@ const char* GlobalizationNative_GetICUDTName(const char* culture)
int32_t GlobalizationNative_LoadICU(void)
{
if (!isDataSet) {
if (!isDataSet)
{
// don't try to locate icudt.dat automatically if mono_wasm_load_icu_data wasn't called
// and fallback to invariant mode
return 0;
......@@ -196,11 +514,12 @@ int32_t GlobalizationNative_LoadICU(void)
// whether it worked.
ulocdata_getCLDRVersion(version, &status);
if (U_FAILURE(status)) {
if (U_FAILURE(status))
{
log_icu_error("ulocdata_getCLDRVersion", status);
return 0;
}
isLoaded = 1;
return 1;
}
......
......@@ -65,5 +65,6 @@ target_link_libraries(
"-lz"
"-lc++"
"-liconv"
"-lcompression"
%NativeLibrariesToLink%
)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册