From d860035c9f447c9ab9cc6a8be8eb07fcc76f9d02 Mon Sep 17 00:00:00 2001 From: Bin Li Date: Wed, 19 Feb 2020 17:38:34 +0800 Subject: [PATCH] Support Qualcomm ION buffer for GPU --- .gitignore | 1 - mace/BUILD.bazel | 11 + mace/core/BUILD.bazel | 5 + mace/core/CMakeLists.txt | 4 + mace/core/allocator.h | 34 ++- mace/core/buffer.h | 38 ++- mace/core/runtime/opencl/opencl_allocator.cc | 253 ++++++++++++++---- mace/core/runtime/opencl/opencl_allocator.h | 27 +- mace/core/runtime/opencl/opencl_extension.h | 4 + mace/core/runtime/opencl/opencl_runtime.cc | 78 ++++++ mace/core/runtime/opencl/opencl_runtime.h | 20 ++ mace/core/runtime/opencl/opencl_wrapper.cc | 31 +++ mace/core/tensor.h | 5 +- mace/mace.bzl | 6 + mace/ops/opencl/buffer_transformer.h | 4 +- test/ccunit/mace/ops/batch_norm_test.cc | 29 +- test/ccunit/mace/ops/buffer_to_image_test.cc | 9 + test/ccunit/mace/ops/buffer_transform_test.cc | 6 + .../ops/opencl/out_of_range_check_test.cc | 2 +- third_party/rpcmem/BUILD.bazel | 24 ++ third_party/rpcmem/arm64-v8a/rpcmem.a | Bin 0 -> 24682 bytes third_party/rpcmem/armeabi-v7a/rpcmem.a | Bin 0 -> 18718 bytes third_party/rpcmem/license.txt | 5 + third_party/rpcmem/rpcmem.cmake | 10 + third_party/rpcmem/rpcmem.h | 141 ++++++++++ third_party/third_party.cmake | 1 + tools/bazel-build-standalone-lib.sh | 16 +- tools/bazel_adb_run.py | 6 + tools/cmake/cmake-build-arm64-v8a.sh | 1 + tools/cmake/cmake-build-armeabi-v7a.sh | 1 + tools/sh_commands.py | 3 + 31 files changed, 672 insertions(+), 103 deletions(-) create mode 100644 third_party/rpcmem/BUILD.bazel create mode 100644 third_party/rpcmem/arm64-v8a/rpcmem.a create mode 100644 third_party/rpcmem/armeabi-v7a/rpcmem.a create mode 100755 third_party/rpcmem/license.txt create mode 100644 third_party/rpcmem/rpcmem.cmake create mode 100755 third_party/rpcmem/rpcmem.h diff --git a/.gitignore b/.gitignore index d4f869a4..c02dd020 100644 --- a/.gitignore +++ b/.gitignore @@ -3,7 +3,6 @@ build/* cmake-build/ cmake-build-debug/ docs/_build/ -*.a .idea/ .vscode/ diff --git a/mace/BUILD.bazel b/mace/BUILD.bazel index 35c6b405..748af938 100644 --- a/mace/BUILD.bazel +++ b/mace/BUILD.bazel @@ -131,3 +131,14 @@ config_setting( }, visibility = ["//visibility:public"], ) + +config_setting( + name = "rpcmem_enabled", + define_values = { + "rpcmem": "true", + }, + values = { + "crosstool_top": "//external:android/crosstool", + }, + visibility = ["//visibility:public"], +) diff --git a/mace/core/BUILD.bazel b/mace/core/BUILD.bazel index f9cc9697..fcb7b207 100644 --- a/mace/core/BUILD.bazel +++ b/mace/core/BUILD.bazel @@ -18,6 +18,7 @@ load( "if_opencl_enabled", "if_openmp_enabled", "if_quantize_enabled", + "if_rpcmem_enabled", ) cc_library( @@ -75,6 +76,8 @@ cc_library( ]) + if_android_armv7([ "-mfpu=neon-fp16", "-mfloat-abi=softfp", + ]) + if_rpcmem_enabled([ + "-DMACE_ENABLE_RPCMEM", ]), linkopts = ["-ldl"], deps = [ @@ -94,6 +97,8 @@ cc_library( "//third_party/hta", ]) + if_apu_enabled([ "//third_party/apu:libapu-frontend", + ]) + if_rpcmem_enabled([ + "//third_party/rpcmem", ]), ) diff --git a/mace/core/CMakeLists.txt b/mace/core/CMakeLists.txt index 25dd1680..75b74bb9 100644 --- a/mace/core/CMakeLists.txt +++ b/mace/core/CMakeLists.txt @@ -45,6 +45,10 @@ if(MACE_ENABLE_MTK_APU) set(EXTRA_LINK_LIBS ${EXTRA_LINK_LIBS} apu-frontend) endif(MACE_ENABLE_MTK_APU) +if(MACE_ENABLE_RPCMEM) + set(EXTRA_LINK_LIBS ${EXTRA_LINK_LIBS} rpcmem) +endif(MACE_ENABLE_RPCMEM) + add_library(core STATIC ${CORE_SRCS}) target_link_libraries(core PRIVATE proto diff --git a/mace/core/allocator.h b/mace/core/allocator.h index 66987c46..e4e7b35f 100644 --- a/mace/core/allocator.h +++ b/mace/core/allocator.h @@ -47,16 +47,20 @@ class Allocator { public: Allocator() {} virtual ~Allocator() noexcept {} - virtual MaceStatus New(size_t nbytes, void **result) const = 0; + virtual MaceStatus New(size_t nbytes, void **result) = 0; virtual MaceStatus NewImage(const std::vector &image_shape, const DataType dt, - void **result) const = 0; - virtual void Delete(void *data) const = 0; - virtual void DeleteImage(void *data) const = 0; - virtual void *Map(void *buffer, size_t offset, size_t nbytes) const = 0; + void **result) = 0; + virtual void Delete(void *data) = 0; + virtual void DeleteImage(void *data) = 0; + virtual void *Map(void *buffer, + size_t offset, + size_t nbytes, + bool finish_cmd_queue) const = 0; virtual void *MapImage(void *buffer, const std::vector &image_shape, - std::vector *mapped_image_pitch) const = 0; + std::vector *mapped_image_pitch, + bool finish_cmd_queue) const = 0; virtual void Unmap(void *buffer, void *mapper_ptr) const = 0; virtual bool OnHost() const = 0; }; @@ -64,7 +68,7 @@ class Allocator { class CPUAllocator : public Allocator { public: ~CPUAllocator() override {} - MaceStatus New(size_t nbytes, void **result) const override { + MaceStatus New(size_t nbytes, void **result) override { VLOG(3) << "Allocate CPU buffer: " << nbytes; if (nbytes == 0) { return MaceStatus::MACE_SUCCESS; @@ -82,7 +86,7 @@ class CPUAllocator : public Allocator { MaceStatus NewImage(const std::vector &shape, const DataType dt, - void **result) const override { + void **result) override { MACE_UNUSED(shape); MACE_UNUSED(dt); MACE_UNUSED(result); @@ -90,24 +94,30 @@ class CPUAllocator : public Allocator { return MaceStatus::MACE_SUCCESS; } - void Delete(void *data) const override { + void Delete(void *data) override { MACE_CHECK_NOTNULL(data); VLOG(3) << "Free CPU buffer"; free(data); } - void DeleteImage(void *data) const override { + void DeleteImage(void *data) override { LOG(FATAL) << "Free CPU image"; free(data); }; - void *Map(void *buffer, size_t offset, size_t nbytes) const override { + void *Map(void *buffer, + size_t offset, + size_t nbytes, + bool finish_cmd_queue) const override { MACE_UNUSED(nbytes); + MACE_UNUSED(finish_cmd_queue); return reinterpret_cast(buffer) + offset; } void *MapImage(void *buffer, const std::vector &image_shape, - std::vector *mapped_image_pitch) const override { + std::vector *mapped_image_pitch, + bool finish_cmd_queue) const override { MACE_UNUSED(image_shape); MACE_UNUSED(mapped_image_pitch); + MACE_UNUSED(finish_cmd_queue); return buffer; } void Unmap(void *buffer, void *mapper_ptr) const override { diff --git a/mace/core/buffer.h b/mace/core/buffer.h index d1f5f1a5..49be4217 100644 --- a/mace/core/buffer.h +++ b/mace/core/buffer.h @@ -54,11 +54,13 @@ class BufferBase { virtual void *Map(index_t offset, index_t length, - std::vector *pitch) const = 0; + std::vector *pitch, + bool finish_cmd_queue) const = 0; virtual void UnMap(void *mapped_ptr) const = 0; - virtual void Map(std::vector *pitch) = 0; + virtual void Map(std::vector *pitch, + bool finish_cmd_queue = true) = 0; virtual void UnMap() = 0; @@ -171,10 +173,13 @@ class Buffer : public BufferBase { return this->Allocate(nbytes); } - void *Map(index_t offset, index_t length, std::vector *pitch) const { + void *Map(index_t offset, + index_t length, + std::vector *pitch, + bool finish_cmd_queue) const { MACE_CHECK_NOTNULL(buf_); MACE_UNUSED(pitch); - return allocator_->Map(buf_, offset, length); + return allocator_->Map(buf_, offset, length, finish_cmd_queue); } void UnMap(void *mapped_ptr) const { @@ -183,9 +188,9 @@ class Buffer : public BufferBase { allocator_->Unmap(buf_, mapped_ptr); } - void Map(std::vector *pitch) { + void Map(std::vector *pitch, bool finish_cmd_queue = true) { MACE_CHECK(mapped_buf_ == nullptr, "buf has been already mapped"); - mapped_buf_ = Map(0, size_, pitch); + mapped_buf_ = Map(0, size_, pitch, finish_cmd_queue); } void UnMap() { @@ -300,10 +305,14 @@ class Image : public BufferBase { return allocator_->NewImage(shape, data_type, &buf_); } - void *Map(index_t offset, index_t length, std::vector *pitch) const { + void *Map(index_t offset, + index_t length, + std::vector *pitch, + bool finish_cmd_queue) const { MACE_UNUSED(offset); MACE_UNUSED(length); MACE_UNUSED(pitch); + MACE_UNUSED(finish_cmd_queue); MACE_NOT_IMPLEMENTED; return nullptr; } @@ -314,11 +323,11 @@ class Image : public BufferBase { allocator_->Unmap(buf_, mapped_ptr); } - void Map(std::vector *pitch) { + void Map(std::vector *pitch, bool finish_cmd_queue = true) { MACE_CHECK_NOTNULL(buf_); MACE_CHECK(mapped_buf_ == nullptr, "buf has been already mapped"); MACE_CHECK_NOTNULL(pitch); - mapped_buf_ = allocator_->MapImage(buf_, shape_, pitch); + mapped_buf_ = allocator_->MapImage(buf_, shape_, pitch, finish_cmd_queue); } void UnMap() { @@ -434,18 +443,21 @@ class BufferSlice : public BufferBase { return MaceStatus::MACE_SUCCESS; } - void *Map(index_t offset, index_t length, std::vector *pitch) const { - return buffer_->Map(offset_ + offset, length, pitch); + void *Map(index_t offset, + index_t length, + std::vector *pitch, + bool finish_cmd_queue) const { + return buffer_->Map(offset_ + offset, length, pitch, finish_cmd_queue); } void UnMap(void *mapped_ptr) const { buffer_->UnMap(mapped_ptr); } - void Map(std::vector *pitch) { + void Map(std::vector *pitch, bool finish_cmd_queue = true) { MACE_CHECK_NOTNULL(buffer_); MACE_CHECK(mapped_buf_ == nullptr, "mapped buf is not null"); - mapped_buf_ = buffer_->Map(offset_, size_, pitch); + mapped_buf_ = buffer_->Map(offset_, size_, pitch, finish_cmd_queue); } void UnMap() { diff --git a/mace/core/runtime/opencl/opencl_allocator.cc b/mace/core/runtime/opencl/opencl_allocator.cc index d9994169..b0bf041b 100644 --- a/mace/core/runtime/opencl/opencl_allocator.cc +++ b/mace/core/runtime/opencl/opencl_allocator.cc @@ -16,9 +16,11 @@ #include "mace/core/runtime/opencl/opencl_allocator.h" #include "mace/core/runtime/opencl/opencl_runtime.h" +#ifdef MACE_ENABLE_RPCMEM +#include "third_party/rpcmem/rpcmem.h" +#endif // MACE_ENABLE_RPCMEM namespace mace { - namespace { static cl_channel_type DataTypeToCLChannelType(const DataType t) { @@ -36,14 +38,29 @@ static cl_channel_type DataTypeToCLChannelType(const DataType t) { return 0; } } + +#ifdef MACE_ENABLE_RPCMEM +std::once_flag ion_prepared; +void PrepareQualcommION() { + rpcmem_init(); + std::atexit(rpcmem_deinit); +} +#endif // MACE_ENABLE_RPCMEM + } // namespace OpenCLAllocator::OpenCLAllocator( - OpenCLRuntime *opencl_runtime): - opencl_runtime_(opencl_runtime) {} + OpenCLRuntime *opencl_runtime): opencl_runtime_(opencl_runtime) { +#ifdef MACE_ENABLE_RPCMEM + if (opencl_runtime_->ion_type() == IONType::QUALCOMM_ION) { + std::call_once(ion_prepared, PrepareQualcommION); + } +#endif // MACE_ENABLE_RPCMEM +} OpenCLAllocator::~OpenCLAllocator() {} -MaceStatus OpenCLAllocator::New(size_t nbytes, void **result) const { + +MaceStatus OpenCLAllocator::New(size_t nbytes, void **result) { if (nbytes == 0) { return MaceStatus::MACE_SUCCESS; } @@ -53,10 +70,27 @@ MaceStatus OpenCLAllocator::New(size_t nbytes, void **result) const { return MaceStatus::MACE_OUT_OF_RESOURCES; } - cl_int error; - cl::Buffer *buffer = new cl::Buffer(opencl_runtime_->context(), - CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, - nbytes, nullptr, &error); + cl_int error = CL_SUCCESS; + cl::Buffer *buffer = nullptr; +#ifdef MACE_ENABLE_RPCMEM + if (opencl_runtime_->ion_type() == IONType::QUALCOMM_ION) { + cl_mem_ion_host_ptr ion_host; + CreateQualcommBufferIONHostPtr(nbytes, &ion_host); + + buffer = new cl::Buffer( + opencl_runtime_->context(), + CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR | CL_MEM_EXT_HOST_PTR_QCOM, + nbytes, &ion_host, &error); + + cl_to_host_map_[static_cast(buffer)] = ion_host.ion_hostptr; + } else { +#endif // MACE_ENABLE_RPCMEM + buffer = new cl::Buffer(opencl_runtime_->context(), + CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, + nbytes, nullptr, &error); +#ifdef MACE_ENABLE_RPCMEM + } +#endif // MACE_ENABLE_RPCMEM if (error != CL_SUCCESS) { LOG(WARNING) << "Allocate OpenCL Buffer with " << nbytes << " bytes failed because of " @@ -72,7 +106,7 @@ MaceStatus OpenCLAllocator::New(size_t nbytes, void **result) const { MaceStatus OpenCLAllocator::NewImage(const std::vector &image_shape, const DataType dt, - void **result) const { + void **result) { MACE_CHECK(image_shape.size() == 2, "Image shape's size must equal 2"); MACE_LATENCY_LOGGER(1, "Allocate OpenCL image: ", image_shape[0], ", ", image_shape[1]); @@ -82,12 +116,29 @@ MaceStatus OpenCLAllocator::NewImage(const std::vector &image_shape, } cl::ImageFormat img_format(CL_RGBA, DataTypeToCLChannelType(dt)); + cl_int error = CL_SUCCESS; + cl::Image2D *cl_image = nullptr; +#ifdef MACE_ENABLE_RPCMEM + if (opencl_runtime_->ion_type() == IONType::QUALCOMM_ION) { + cl_mem_ion_host_ptr ion_host; + size_t pitch; + CreateQualcommImageIONHostPtr(image_shape, img_format, &pitch, &ion_host); - cl_int error; - cl::Image2D *cl_image = - new cl::Image2D(opencl_runtime_->context(), - CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, img_format, - image_shape[0], image_shape[1], 0, nullptr, &error); + cl_image = new cl::Image2D( + opencl_runtime_->context(), + CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR | CL_MEM_EXT_HOST_PTR_QCOM, + img_format, image_shape[0], image_shape[1], pitch, &ion_host, &error); + + cl_to_host_map_[static_cast(cl_image)] = ion_host.ion_hostptr; + } else { +#endif // MACE_ENABLE_RPCMEM + cl_image = + new cl::Image2D(opencl_runtime_->context(), + CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, img_format, + image_shape[0], image_shape[1], 0, nullptr, &error); +#ifdef MACE_ENABLE_RPCMEM + } +#endif // MACE_ENABLE_RPCMEM if (error != CL_SUCCESS) { LOG(WARNING) << "Allocate OpenCL image with shape: [" << image_shape[0] << ", " << image_shape[1] @@ -108,72 +159,180 @@ MaceStatus OpenCLAllocator::NewImage(const std::vector &image_shape, } } -void OpenCLAllocator::Delete(void *buffer) const { +void OpenCLAllocator::Delete(void *buffer) { MACE_LATENCY_LOGGER(1, "Free OpenCL buffer"); if (buffer != nullptr) { cl::Buffer *cl_buffer = static_cast(buffer); delete cl_buffer; +#ifdef MACE_ENABLE_RPCMEM + if (opencl_runtime_->ion_type() == IONType::QUALCOMM_ION) { + auto it = cl_to_host_map_.find(buffer); + MACE_CHECK(it != cl_to_host_map_.end(), "OpenCL buffer not found!"); + rpcmem_free(it->second); + cl_to_host_map_.erase(buffer); + } +#endif // MACE_ENABLE_RPCMEM } } -void OpenCLAllocator::DeleteImage(void *buffer) const { +void OpenCLAllocator::DeleteImage(void *buffer) { MACE_LATENCY_LOGGER(1, "Free OpenCL image"); if (buffer != nullptr) { cl::Image2D *cl_image = static_cast(buffer); delete cl_image; +#ifdef MACE_ENABLE_RPCMEM + if (opencl_runtime_->ion_type() == IONType::QUALCOMM_ION) { + auto it = cl_to_host_map_.find(buffer); + MACE_CHECK(it != cl_to_host_map_.end(), "OpenCL image not found!"); + rpcmem_free(it->second); + cl_to_host_map_.erase(buffer); + } +#endif // MACE_ENABLE_RPCMEM } } -void *OpenCLAllocator::Map(void *buffer, size_t offset, size_t nbytes) const { +void *OpenCLAllocator::Map(void *buffer, + size_t offset, + size_t nbytes, + bool finish_cmd_queue) const { MACE_LATENCY_LOGGER(1, "Map OpenCL buffer"); - auto cl_buffer = static_cast(buffer); - auto queue = opencl_runtime_->command_queue(); - // TODO(heliangliang) Non-blocking call - cl_int error; - void *mapped_ptr = - queue.enqueueMapBuffer(*cl_buffer, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, - offset, nbytes, nullptr, nullptr, &error); - if (error != CL_SUCCESS) { - LOG(ERROR) << "Map buffer failed, error: " << OpenCLErrorToString(error); - mapped_ptr = nullptr; + void *mapped_ptr = nullptr; +#ifdef MACE_ENABLE_RPCMEM + if (opencl_runtime_->ion_type() == IONType::QUALCOMM_ION) { + auto it = cl_to_host_map_.find(buffer); + MACE_CHECK(it != cl_to_host_map_.end(), "Try to map unallocated Buffer!"); + mapped_ptr = it->second; + + if (finish_cmd_queue) { + opencl_runtime_->command_queue().finish(); + } + + if (opencl_runtime_->qcom_host_cache_policy() == + CL_MEM_HOST_WRITEBACK_QCOM) { + MACE_CHECK(rpcmem_sync_cache(mapped_ptr, RPCMEM_SYNC_START) == 0); + } + } else { +#endif // MACE_ENABLE_RPCMEM + MACE_UNUSED(finish_cmd_queue); + auto cl_buffer = static_cast(buffer); + auto queue = opencl_runtime_->command_queue(); + // TODO(heliangliang) Non-blocking call + cl_int error; + mapped_ptr = + queue.enqueueMapBuffer(*cl_buffer, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, + offset, nbytes, nullptr, nullptr, &error); + if (error != CL_SUCCESS) { + LOG(ERROR) << "Map buffer failed, error: " << OpenCLErrorToString(error); + } +#ifdef MACE_ENABLE_RPCMEM } +#endif // MACE_ENABLE_RPCMEM return mapped_ptr; } // TODO(liuqi) there is something wrong with half type. void *OpenCLAllocator::MapImage(void *buffer, const std::vector &image_shape, - std::vector *mapped_image_pitch) const { + std::vector *mapped_image_pitch, + bool finish_cmd_queue) const { MACE_LATENCY_LOGGER(1, "Map OpenCL Image"); MACE_CHECK(image_shape.size() == 2) << "Just support map 2d image"; - auto cl_image = static_cast(buffer); - std::array origin = {{0, 0, 0}}; - std::array region = {{image_shape[0], image_shape[1], 1}}; - - mapped_image_pitch->resize(2); - cl_int error; - void *mapped_ptr = opencl_runtime_->command_queue().enqueueMapImage( - *cl_image, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, origin, region, - mapped_image_pitch->data(), mapped_image_pitch->data() + 1, nullptr, - nullptr, &error); - if (error != CL_SUCCESS) { - LOG(ERROR) << "Map Image failed, error: " << OpenCLErrorToString(error); - mapped_ptr = nullptr; + void *mapped_ptr = nullptr; +#ifdef MACE_ENABLE_RPCMEM + if (opencl_runtime_->ion_type() == IONType::QUALCOMM_ION) { + // TODO(libin): Set mapped_image_pitch if needed + auto it = cl_to_host_map_.find(buffer); + MACE_CHECK(it != cl_to_host_map_.end(), "Try to map unallocated Image!"); + mapped_ptr = it->second; + + if (finish_cmd_queue) { + opencl_runtime_->command_queue().finish(); + } + + if (opencl_runtime_->qcom_host_cache_policy() == + CL_MEM_HOST_WRITEBACK_QCOM) { + MACE_CHECK(rpcmem_sync_cache(mapped_ptr, RPCMEM_SYNC_START) == 0); + } + } else { +#endif // MACE_ENABLE_RPCMEM + MACE_UNUSED(finish_cmd_queue); + auto cl_image = static_cast(buffer); + std::array origin = {{0, 0, 0}}; + std::array region = {{image_shape[0], image_shape[1], 1}}; + + mapped_image_pitch->resize(2); + cl_int error; + mapped_ptr = opencl_runtime_->command_queue().enqueueMapImage( + *cl_image, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, origin, region, + mapped_image_pitch->data(), mapped_image_pitch->data() + 1, nullptr, + nullptr, &error); + if (error != CL_SUCCESS) { + LOG(ERROR) << "Map Image failed, error: " << OpenCLErrorToString(error); + } +#ifdef MACE_ENABLE_RPCMEM } +#endif // MACE_ENABLE_RPCMEM return mapped_ptr; } void OpenCLAllocator::Unmap(void *buffer, void *mapped_ptr) const { MACE_LATENCY_LOGGER(1, "Unmap OpenCL buffer/Image"); - auto cl_buffer = static_cast(buffer); - auto queue = opencl_runtime_->command_queue(); - cl_int error = queue.enqueueUnmapMemObject(*cl_buffer, mapped_ptr, - nullptr, nullptr); - if (error != CL_SUCCESS) { - LOG(ERROR) << "Unmap buffer failed, error: " << OpenCLErrorToString(error); +#ifdef MACE_ENABLE_RPCMEM + if (opencl_runtime_->ion_type() == IONType::QUALCOMM_ION) { + if (opencl_runtime_->qcom_host_cache_policy() == + CL_MEM_HOST_WRITEBACK_QCOM) { + MACE_CHECK(rpcmem_sync_cache(mapped_ptr, RPCMEM_SYNC_END) == 0); + } + } else { +#endif // MACE_ENABLE_RPCMEM + auto cl_buffer = static_cast(buffer); + auto queue = opencl_runtime_->command_queue(); + cl_int error = queue.enqueueUnmapMemObject(*cl_buffer, mapped_ptr, + nullptr, nullptr); + if (error != CL_SUCCESS) { + LOG(ERROR) << "Unmap buffer failed, error: " + << OpenCLErrorToString(error); + } +#ifdef MACE_ENABLE_RPCMEM } +#endif // MACE_ENABLE_RPCMEM } bool OpenCLAllocator::OnHost() const { return false; } +#ifdef MACE_ENABLE_RPCMEM +void OpenCLAllocator::CreateQualcommBufferIONHostPtr( + const size_t nbytes, + cl_mem_ion_host_ptr *ion_host) { + void *host = rpcmem_alloc(RPCMEM_HEAP_ID_SYSTEM, RPCMEM_FLAG_CACHED, + nbytes + opencl_runtime_->qcom_ext_mem_padding()); + MACE_CHECK_NOTNULL(host); + auto host_addr = reinterpret_cast(host); + auto page_size = opencl_runtime_->qcom_page_size(); + MACE_CHECK(host_addr % page_size == 0, "ION memory address: ", host_addr, + " must be aligned to page size: ", page_size); + int fd = rpcmem_to_fd(host); + MACE_CHECK(fd >= 0, "Invalid rpcmem file descriptor: ", fd); + + ion_host->ext_host_ptr.allocation_type = CL_MEM_ION_HOST_PTR_QCOM; + ion_host->ext_host_ptr.host_cache_policy = + opencl_runtime_->qcom_host_cache_policy(); + ion_host->ion_filedesc = fd; + ion_host->ion_hostptr = host; +} + +void OpenCLAllocator::CreateQualcommImageIONHostPtr( + const std::vector &shape, + const cl::ImageFormat &format, + size_t *pitch, + cl_mem_ion_host_ptr *ion_host) { + cl_int error = clGetDeviceImageInfoQCOM( + opencl_runtime_->device().get(), shape[0], shape[1], &format, + CL_IMAGE_ROW_PITCH, sizeof(*pitch), pitch, nullptr); + MACE_CHECK(error == CL_SUCCESS, "clGetDeviceImageInfoQCOM failed, error: ", + OpenCLErrorToString(error)); + + CreateQualcommBufferIONHostPtr(*pitch * shape[1], ion_host); +} +#endif // MACE_ENABLE_RPCMEM } // namespace mace diff --git a/mace/core/runtime/opencl/opencl_allocator.h b/mace/core/runtime/opencl/opencl_allocator.h index 9ee9c81d..0c2783a1 100644 --- a/mace/core/runtime/opencl/opencl_allocator.h +++ b/mace/core/runtime/opencl/opencl_allocator.h @@ -16,6 +16,7 @@ #define MACE_CORE_RUNTIME_OPENCL_OPENCL_ALLOCATOR_H_ #include +#include #include #include "mace/core/allocator.h" @@ -29,7 +30,7 @@ class OpenCLAllocator : public Allocator { ~OpenCLAllocator() override; - MaceStatus New(size_t nbytes, void **result) const override; + MaceStatus New(size_t nbytes, void **result) override; /* * Use Image2D with RGBA (128-bit) format to represent the image. @@ -38,23 +39,37 @@ class OpenCLAllocator : public Allocator { */ MaceStatus NewImage(const std::vector &image_shape, const DataType dt, - void **result) const override; + void **result) override; - void Delete(void *buffer) const override; + void Delete(void *buffer) override; - void DeleteImage(void *buffer) const override; + void DeleteImage(void *buffer) override; - void *Map(void *buffer, size_t offset, size_t nbytes) const override; + void *Map(void *buffer, + size_t offset, + size_t nbytes, + bool finish_cmd_queue) const override; void *MapImage(void *buffer, const std::vector &image_shape, - std::vector *mapped_image_pitch) const override; + std::vector *mapped_image_pitch, + bool finish_cmd_queue) const override; void Unmap(void *buffer, void *mapped_ptr) const override; bool OnHost() const override; private: +#ifdef MACE_ENABLE_RPCMEM + void CreateQualcommBufferIONHostPtr(const size_t nbytes, + cl_mem_ion_host_ptr *ion_host); + void CreateQualcommImageIONHostPtr(const std::vector &shape, + const cl::ImageFormat &format, + size_t *pitch, + cl_mem_ion_host_ptr *ion_host); + + std::unordered_map cl_to_host_map_; +#endif // MACE_ENABLE_RPCMEM OpenCLRuntime *opencl_runtime_; }; diff --git a/mace/core/runtime/opencl/opencl_extension.h b/mace/core/runtime/opencl/opencl_extension.h index da3ba855..dabf81fe 100644 --- a/mace/core/runtime/opencl/opencl_extension.h +++ b/mace/core/runtime/opencl/opencl_extension.h @@ -37,4 +37,8 @@ typedef cl_uint cl_priority_hint; /* Accepted by clGetKernelWorkGroupInfo */ #define CL_KERNEL_WAVE_SIZE_QCOM 0xAA02 + +// Cache policy specifying io-coherence +#define CL_MEM_HOST_IOCOHERENT_QCOM 0x40A9 + #endif // MACE_CORE_RUNTIME_OPENCL_OPENCL_EXTENSION_H_ diff --git a/mace/core/runtime/opencl/opencl_runtime.cc b/mace/core/runtime/opencl/opencl_runtime.cc index 17f1dd5a..cb2e650f 100644 --- a/mace/core/runtime/opencl/opencl_runtime.cc +++ b/mace/core/runtime/opencl/opencl_runtime.cc @@ -230,6 +230,37 @@ GPUType ParseGPUType(const std::string &device_name) { } } +#ifdef MACE_ENABLE_RPCMEM +IONType ParseIONType(const std::string &device_extensions) { + constexpr const char *kQualcommIONStr = "cl_qcom_ion_host_ptr"; + + if (device_extensions.find(kQualcommIONStr) != std::string::npos) { + return IONType::QUALCOMM_ION; + } else { + return IONType::NONE_ION; + } +} + +uint32_t ParseQcomHostCachePolicy(const std::string &device_extensions) { + constexpr const char *kQualcommIocoherentStr = + "cl_qcom_ext_host_ptr_iocoherent"; + + if (device_extensions.find(kQualcommIocoherentStr) != std::string::npos) { + return CL_MEM_HOST_IOCOHERENT_QCOM; + } else { + return CL_MEM_HOST_WRITEBACK_QCOM; + } +} + +std::string QcomHostCachePolicyToString(uint32_t policy) { + switch (policy) { + case CL_MEM_HOST_IOCOHERENT_QCOM: return "CL_MEM_HOST_IOCOHERENT_QCOM"; + case CL_MEM_HOST_WRITEBACK_QCOM: return "CL_MEM_HOST_WRITEBACK_QCOM"; + default: return MakeString("UNKNOWN: ", policy); + } +} +#endif // MACE_ENABLE_RPCMEM + const char *kOpenCLPlatformInfoKey = "mace_opencl_precompiled_platform_info_key"; } // namespace @@ -311,6 +342,35 @@ OpenCLRuntime::OpenCLRuntime( return; } +#ifdef MACE_ENABLE_RPCMEM + const std::string device_extensions = + device.getInfo(); + ion_type_ = ParseIONType(device_extensions); + if (ion_type_ == IONType::QUALCOMM_ION) { + qcom_ext_mem_padding_ = 0; + cl_int err = device.getInfo(CL_DEVICE_EXT_MEM_PADDING_IN_BYTES_QCOM, + &qcom_ext_mem_padding_); + if (err != CL_SUCCESS) { + LOG(ERROR) << "Failed to get CL_DEVICE_EXT_MEM_PADDING_IN_BYTES_QCOM " + << OpenCLErrorToString(err); + } + + qcom_page_size_ = 4096; + err = device.getInfo(CL_DEVICE_PAGE_SIZE_QCOM, &qcom_page_size_); + if (err != CL_SUCCESS) { + LOG(ERROR) << "Failed to get CL_DEVICE_PAGE_SIZE_QCOM: " + << OpenCLErrorToString(err); + } + + qcom_host_cache_policy_ = ParseQcomHostCachePolicy(device_extensions); + + VLOG(1) << "Using QUALCOMM ION buffer with padding size: " + << qcom_ext_mem_padding_ << ", page size: " << qcom_page_size_ + << ", with host cache policy: " + << QcomHostCachePolicyToString(qcom_host_cache_policy_); + } +#endif // MACE_ENABLE_RPCMEM + VLOG(1) << "Using device: " << device_name; break; } @@ -776,6 +836,24 @@ GPUType OpenCLRuntime::gpu_type() const { return gpu_type_; } +#ifdef MACE_ENABLE_RPCMEM +IONType OpenCLRuntime::ion_type() const { + return ion_type_; +} + +uint32_t OpenCLRuntime::qcom_ext_mem_padding() const { + return qcom_ext_mem_padding_; +} + +uint32_t OpenCLRuntime::qcom_page_size() const { + return qcom_page_size_; +} + +uint32_t OpenCLRuntime::qcom_host_cache_policy() const { + return qcom_host_cache_policy_; +} +#endif // MACE_ENABLE_RPCMEM + const std::string OpenCLRuntime::platform_info() const { return platform_info_; } diff --git a/mace/core/runtime/opencl/opencl_runtime.h b/mace/core/runtime/opencl/opencl_runtime.h index f06f313a..2aefdde8 100644 --- a/mace/core/runtime/opencl/opencl_runtime.h +++ b/mace/core/runtime/opencl/opencl_runtime.h @@ -49,6 +49,12 @@ enum OpenCLVersion { CL_VER_2_1, }; +#ifdef MACE_ENABLE_RPCMEM +enum IONType { + QUALCOMM_ION, + NONE_ION, +}; +#endif // MACE_ENABLE_RPCMEM const std::string OpenCLErrorToString(cl_int error); @@ -86,6 +92,13 @@ class OpenCLRuntime { Tuner *tuner(); bool is_opencl_avaliable(); +#ifdef MACE_ENABLE_RPCMEM + IONType ion_type() const; + uint32_t qcom_ext_mem_padding() const; + uint32_t qcom_page_size() const; + uint32_t qcom_host_cache_policy() const; +#endif // MACE_ENABLE_RPCMEM + void GetCallStats(const cl::Event &event, CallStats *stats); uint64_t GetDeviceMaxWorkGroupSize(); uint64_t GetDeviceMaxMemAllocSize(); @@ -144,6 +157,13 @@ class OpenCLRuntime { bool out_of_range_check_; uint64_t device_global_mem_cache_size_; uint32_t device_compute_units_; + +#ifdef MACE_ENABLE_RPCMEM + IONType ion_type_; + uint32_t qcom_ext_mem_padding_; + uint32_t qcom_page_size_; + uint32_t qcom_host_cache_policy_; +#endif // MACE_ENABLE_RPCMEM }; class OpenCLProfilingTimer : public Timer { diff --git a/mace/core/runtime/opencl/opencl_wrapper.cc b/mace/core/runtime/opencl/opencl_wrapper.cc index 552c413a..41dd529b 100644 --- a/mace/core/runtime/opencl/opencl_wrapper.cc +++ b/mace/core/runtime/opencl/opencl_wrapper.cc @@ -216,6 +216,15 @@ class OpenCLLibrary final { using clGetImageInfoFunc = cl_int (*)(cl_mem, cl_image_info, size_t, void *, size_t *); + using clGetDeviceImageInfoQCOMFunc = cl_int (*)(cl_device_id, + size_t, + size_t, + const cl_image_format *, + cl_image_pitch_info_qcom, + size_t, + void *, + size_t *); + #define MACE_CL_DEFINE_FUNC_PTR(func) func##Func func = nullptr MACE_CL_DEFINE_FUNC_PTR(clGetPlatformIDs); @@ -265,6 +274,7 @@ class OpenCLLibrary final { MACE_CL_DEFINE_FUNC_PTR(clGetEventInfo); MACE_CL_DEFINE_FUNC_PTR(clGetEventProfilingInfo); MACE_CL_DEFINE_FUNC_PTR(clGetImageInfo); + MACE_CL_DEFINE_FUNC_PTR(clGetDeviceImageInfoQCOM); #undef MACE_CL_DEFINE_FUNC_PTR @@ -400,6 +410,7 @@ void *OpenCLLibrary::LoadFromPath(const std::string &path) { MACE_CL_ASSIGN_FROM_DLSYM(clGetEventInfo); MACE_CL_ASSIGN_FROM_DLSYM(clGetEventProfilingInfo); MACE_CL_ASSIGN_FROM_DLSYM(clGetImageInfo); + MACE_CL_ASSIGN_FROM_DLSYM(clGetDeviceImageInfoQCOM); #undef MACE_CL_ASSIGN_FROM_DLSYM @@ -802,6 +813,26 @@ CL_API_ENTRY cl_int clGetImageInfo(cl_mem image, } } +CL_API_ENTRY cl_int clGetDeviceImageInfoQCOM( + cl_device_id device, + size_t image_width, + size_t image_height, + const cl_image_format *image_format, + cl_image_pitch_info_qcom param_name, + size_t param_value_size, + void *param_value, + size_t *param_value_size_ret) + CL_EXT_SUFFIX__VERSION_1_1 { + auto func = mace::runtime::OpenCLLibrary::Get()->clGetDeviceImageInfoQCOM; + if (func != nullptr) { + MACE_LATENCY_LOGGER(3, "clGetDeviceImageInfoQCOM"); + return func(device, image_width, image_height, image_format, param_name, + param_value_size, param_value, param_value_size_ret); + } else { + return CL_INVALID_PLATFORM; + } +} + // Command Queue APIs CL_API_ENTRY cl_command_queue clCreateCommandQueueWithProperties( cl_context context, diff --git a/mace/core/tensor.h b/mace/core/tensor.h index 92414a72..dc7d24b4 100644 --- a/mace/core/tensor.h +++ b/mace/core/tensor.h @@ -426,10 +426,11 @@ class Tensor { class MappingGuard { public: - explicit MappingGuard(const Tensor *tensor) : tensor_(tensor) { + explicit MappingGuard(const Tensor *tensor, bool finish_cmd_queue = true) : + tensor_(tensor) { if (tensor_ != nullptr) { MACE_CHECK_NOTNULL(tensor_->buffer_); - tensor_->buffer_->Map(&mapped_image_pitch_); + tensor_->buffer_->Map(&mapped_image_pitch_, finish_cmd_queue); } } diff --git a/mace/mace.bzl b/mace/mace.bzl index 47d44edb..6322e035 100644 --- a/mace/mace.bzl +++ b/mace/mace.bzl @@ -109,6 +109,12 @@ def if_quantize_enabled(a): "//conditions:default": [], }) +def if_rpcmem_enabled(a): + return select({ + "//mace:rpcmem_enabled": a, + "//conditions:default": [], + }) + def mace_version_genrule(): native.genrule( name = "mace_version_gen", diff --git a/mace/ops/opencl/buffer_transformer.h b/mace/ops/opencl/buffer_transformer.h index f3df8bc4..0dcec529 100644 --- a/mace/ops/opencl/buffer_transformer.h +++ b/mace/ops/opencl/buffer_transformer.h @@ -66,7 +66,9 @@ class OpenCLBufferTransformer { << " with data type " << dt; internal_tensor->Resize(input->shape()); const uint8_t *input_ptr = input->data(); - Tensor::MappingGuard guard(internal_tensor); + // No need to finish the opencl command queue to write to the tensor + // from CPU, this can accelerate the mapping if using ION buffer. + Tensor::MappingGuard guard(internal_tensor, false); uint8_t *internal_ptr = internal_tensor->mutable_data(); memcpy(internal_ptr, input_ptr, input->raw_size()); // 2. convert the internal GPU Buffer to output. diff --git a/test/ccunit/mace/ops/batch_norm_test.cc b/test/ccunit/mace/ops/batch_norm_test.cc index 83c8219f..0a07fc64 100644 --- a/test/ccunit/mace/ops/batch_norm_test.cc +++ b/test/ccunit/mace/ops/batch_norm_test.cc @@ -134,14 +134,16 @@ TEST_F(BatchNormOpTest, SimpleRandomOPENCL) { .AddFloatArg("leakyrelu_coefficient", 0.1) .Finalize(net.NewOperatorDef()); + net.Setup(DeviceType::GPU); + // Tuning setenv("MACE_TUNING", "1", 1); - net.RunOp(DeviceType::GPU); + net.Run(); unsetenv("MACE_TUNING"); // Run on opencl - net.RunOp(DeviceType::GPU); - net.Sync(); + net.Run(); + ExpectTensorNear(*expected, *net.GetOutput("Output"), 1e-5, 1e-4); } @@ -200,14 +202,15 @@ TEST_F(BatchNormOpTest, SimpleRandomHalfOPENCL) { .AddIntArg("T", static_cast(DataType::DT_HALF)) .Finalize(net.NewOperatorDef()); + net.Setup(DeviceType::GPU); + // Tuning setenv("MACE_TUNING", "1", 1); - net.RunOp(DeviceType::GPU); + net.Run(); unsetenv("MACE_TUNING"); // Run on opencl - net.RunOp(DeviceType::GPU); - net.Sync(); + net.Run(); ExpectTensorNear(*expected, *net.GetOutput("Output"), 1e-1, 1e-2); @@ -266,14 +269,15 @@ TEST_F(BatchNormOpTest, ComplexRandomOPENCL) { .Output("Output") .Finalize(net.NewOperatorDef()); + net.Setup(DeviceType::GPU); + // tuning setenv("MACE_TUNING", "1", 1); - net.RunOp(DeviceType::GPU); + net.Run(); unsetenv("MACE_TUNING"); // Run on opencl - net.RunOp(DeviceType::GPU); - net.Sync(); + net.Run(); ExpectTensorNear(*expected, *net.GetOutput("Output"), 1e-5, 1e-4); @@ -333,14 +337,15 @@ TEST_F(BatchNormOpTest, ComplexRandomHalfOPENCL) { .AddIntArg("T", static_cast(DataType::DT_HALF)) .Finalize(net.NewOperatorDef()); + net.Setup(DeviceType::GPU); + // tuning setenv("MACE_TUNING", "1", 1); - net.RunOp(DeviceType::GPU); + net.Run(); unsetenv("MACE_TUNING"); // Run on opencl - net.RunOp(DeviceType::GPU); - net.Sync(); + net.Run(); ExpectTensorNear(*expected, *net.GetOutput("Output"), 1e-1, 1e-2); diff --git a/test/ccunit/mace/ops/buffer_to_image_test.cc b/test/ccunit/mace/ops/buffer_to_image_test.cc index 644283d4..b1252876 100644 --- a/test/ccunit/mace/ops/buffer_to_image_test.cc +++ b/test/ccunit/mace/ops/buffer_to_image_test.cc @@ -46,6 +46,9 @@ void TestBidirectionTransform(const OpenCLBufferType type, .Transform(&context, b2i_output, type, MemoryType::GPU_BUFFER, 0, i2b_output); + net.Setup(DeviceType::GPU); + net.Sync(); + // Check ExpectTensorNear(*net.GetOutput("Input"), *net.GetOutput("I2BOutput"), 1e-5); @@ -187,6 +190,9 @@ void TestDiffTypeBidirectionTransform(const OpenCLBufferType type, .Transform(&context, b2i_output, type, MemoryType::GPU_BUFFER, 0, i2b_output); + net.Setup(DeviceType::GPU); + net.Sync(); + // Check ExpectTensorNear(*net.GetOutput("Input"), *net.GetOutput("I2BOutput"), 1e-3, 1e-6); @@ -227,6 +233,9 @@ void TestStringHalfBidirectionTransform(const OpenCLBufferType type, .Transform(&context, b2i_output, type, MemoryType::GPU_BUFFER, 0, i2b_output); + net.Setup(DeviceType::GPU); + net.Sync(); + // Check ExpectTensorNear(*net.GetOutput("Input"), *net.GetOutput("I2BOutput"), 1e-3, 1e-6); diff --git a/test/ccunit/mace/ops/buffer_transform_test.cc b/test/ccunit/mace/ops/buffer_transform_test.cc index f29a2e01..fe6d6987 100644 --- a/test/ccunit/mace/ops/buffer_transform_test.cc +++ b/test/ccunit/mace/ops/buffer_transform_test.cc @@ -59,6 +59,9 @@ void TestBidirectionTransform(const OpenCLBufferType type, .Transform(&context, bt_output, type, MemoryType::GPU_BUFFER, 0, output); + net.Setup(DeviceType::GPU); + net.Sync(); + if (DataTypeToEnum::value == DataTypeToEnum::value) { EXPECT_EQ(net.GetOutput("Input")->UnderlyingBuffer(), net.GetOutput("Output")->UnderlyingBuffer()); @@ -96,6 +99,9 @@ void TestArgumentTransform(const index_t input_size) { OpenCLBufferType::ARGUMENT, MemoryType::GPU_BUFFER, 0, output); + net.Setup(DeviceType::GPU); + net.Sync(); + index_t expected_size = RoundUp(input_size, 4); EXPECT_EQ(expected_size, output->buffer_shape()[0]); diff --git a/test/ccunit/mace/ops/opencl/out_of_range_check_test.cc b/test/ccunit/mace/ops/opencl/out_of_range_check_test.cc index 8a17c2d2..5ee423d3 100644 --- a/test/ccunit/mace/ops/opencl/out_of_range_check_test.cc +++ b/test/ccunit/mace/ops/opencl/out_of_range_check_test.cc @@ -110,7 +110,7 @@ MaceStatus BufferToImageOpImpl(OpContext *context, bool is_out_of_range = false; if (runtime->IsOutOfRangeCheckEnabled()) { oorc_flag->Map(nullptr); - is_out_of_range = *(oorc_flag->mutable_data()) == 1 ? true : false; + is_out_of_range = *(oorc_flag->mutable_data()) == 1 ? true : false; oorc_flag->UnMap(); } return is_out_of_range ? MaceStatus::MACE_OUT_OF_RESOURCES diff --git a/third_party/rpcmem/BUILD.bazel b/third_party/rpcmem/BUILD.bazel new file mode 100644 index 00000000..1a1a061c --- /dev/null +++ b/third_party/rpcmem/BUILD.bazel @@ -0,0 +1,24 @@ +# These files are generated fron rpcmem project + +licenses(["notice"]) + +exports_files(["license.txt"]) + +load( + "//mace:mace.bzl", + "if_android_arm64", + "if_android_armv7", +) + +cc_library( + name = "rpcmem", + srcs = if_android_armv7([ + "armeabi-v7a/rpcmem.a", + ]) + if_android_arm64([ + "arm64-v8a/rpcmem.a", + ]), + hdrs = [ + "rpcmem.h", + ], + visibility = ["//visibility:public"], +) diff --git a/third_party/rpcmem/arm64-v8a/rpcmem.a b/third_party/rpcmem/arm64-v8a/rpcmem.a new file mode 100644 index 0000000000000000000000000000000000000000..982714b75645f2a408f7fde000376cf32d18055b GIT binary patch literal 24682 zcmd5^eQ;FQb-%kS!Ai(BF~WTKLyrWgq>-i7>RSbSg(NH{;-es9l3}`9b{C{r(u&n$ z%LUuA%^xsBTd0`~X~|eg{0Cx4OQyJ$GE8m9&V)9kB~y>fbUd~Aa4W}j#x25@5;W>L z_no7?dv+fd*tLBb@!tErbARXDbI(2ZzPInaWvfE*aK|ILWmanO8He`1`M+hA*Vd9m zd#+L{M=95aPy37ahP$KP!BAIMEUe6fU}sMv8t(~pF&c?>_H-urXlFbcHTToRP2s+$ zd&0qRDBLkqB@qkmjBtH@kR(uLvf)O%Gwqghv#s`AZbDBa9_x&h#>#TbOt(RG^U(BP zb!DX{_gDJKkL|h7HMg#HyIkN*ySUWftK<=*N)2vL?g#X40Nn3V(=WPz1BZv<@5m*O zI&!^HO-}928JcQz9|7#>fJ;?+;k?%sm~_K-4akkoE%3YG7|-Lm4?N1B<5G1+O65bB z9xiezta}*GS$BD%hmpJBWC6%nu7TB956=@`jK>S$xZSNrEZ0Q!t@O3K10&0{Wr0f~eX(2DW*#4=hEy3SDYsYVW$C>y3d) zX!nuVJ;8TKSKR@Zs+rpR+7Q&$c1?jVY#umv0aupVJ+=3Z6W>&-4e)%dJMD4*hU>P^>?=b`T4vw@e8x7MZdmx8YE zcmk8xJ+5N(?Z+Nf+yKY6kCh+gRQAAT+#){iQ6~er>P)g9Y?=4wsPdPeRYO*;YFm&y zQSh-dS4fdTHtVKZZW{M59b zbgmkyqW-)e&UbhMBPb7Xv-{Ux)rNIg9({6P>A;A0>d_t|nG_AHq3(=Vw1Mqw;XfRB>(Q1@aT|M#hZdtVWDc>r4se?LgU zZC#&$E*MxT?FipBu(==po&|i&lc&eRnCEVPmOh^E2fcvb>mQq^CS^N6xnGT-Pus!g zE*M`f&v5+mH|#NhvTEK$TFi_U*!`1ikMb9Kk~|H_;{qPfl9L?613BqD(iW7xG&eB0 ze`R3g@>~TzQvOlMAMe8-^T2rg=|HZUe0^?)xX2BpwQp>Z()sE-_4lol z9>eNLPIh1tu>&jBP^C*v=4ab=yz1oU=iJ`!zVwh9Sq5W7+It1Y1=@u?1EBl)rGb$b zz;-X>$P-GP>3DYO7LTjoUX-bWKA@a2d2Wt6gIFbCNgqN?0AuZau=PJ-?4k^g>HFMvZZTy!CR5vK$10Bd_dWLk z#{$g(4LEa5sY>KR&S~)T=%N73A&)`sTRZgeI; z6?_EwV)ElfE|?a@xboQJ3Uk3AjEAuWW?w0P$4aGjArIC?z2iBf)yY1x?dCW-Fi*XT zG6f#B8)X(Rc=@%JX^JZ!vCUfZH-~;rR8T->I{G6u)lM#p9 z0_#$pJ9K`@PPX|Qz=JvU5!j~cw>jH^?-Jxr-Pgn+ecu7O0b=4zruvyMl!qAqX#jlh zbbidOCbvyYSN(HJI}7}r6Mi;5y$I)k`DC|@k<$~E>Jd* zojy;D-Jd#63Sq86S#z97pQDaU^N{kS^CMTjC-XSNbC8cF$FdYm9rijTD16R&8@d4|WVvK>*wSdiyHnXkRCm5TUGq^1+u4G$EQfy1c4|JZ< zmfMSU)KlnAU$17e3u7H|jQ3GkC$rtZfcWEmPl_!CeqJA8yE2V^yiS2NE6sZgVa~d* z_`f{)dfpm^x#4lBHwts&r_EJ!rluE-Z~O;X$Ge6o=hop`e5z3$!8s@d_Tu~(0QL>q}Vw zYM!HMyWF7TOq2d0UgtcPGO?F5@}&d7&(^PsY7F46ko0nlTw&r;j?Luwuj&P$a|zg^eF5h|kG{U-*eHbdV0`=# zVrd@wnvNC4%e*B6Bj0uB{wnC2_uJ_@?opG25O1!)A-Fa=5x*R^;{f8K zSWQjO-22FfcA#D6TE>F5PfxpsaE(0;^YIC2U(Vn`xHeI{b6^e~gZ1RsL!DjG$g*4~`KtQ9L?~gc)nZo6sOeCnvZB7e zvu9_l-U`Dl!p@#u_11>1t*zFw)mCA*wc6^|Hx3K?I{zdZ+gVuLT~t)$q#UW5#d!!xV2^d`mGk+Y>maA zwn9A-DcQJjL)*H?v}EJ@1`>2~W_4v+zbq?+)&zHUg?9B>D^tj~ZfI<1Y-w(C>>6Z? zQrCb(KO<9W?|c@r+O$Uctkn>CJrG9_d^8}!v7SWdF8daCVRwnu5e@YQyF-0Xl%Ry| zkrE3=5$|HOmRKu$W6r@^4XO=~e35EX-SVC?TRx@VfFOLD(sA_h3c(!8#iEDv3r>23Y~Az)mp|LJe?71MWn>C zQ^_s(EBrNSUEYm93^~}N<&FOORM}K#-%7lyva%kV*Aq)veS3O)V{ypLr4DvfRhQSd z#QG9eZ%qHlrEBes?dgHsx1!gvR!t>&b4A1|gl6=_w9ks_t)}%2!8Kdg2G?zDY-@#- zzoC)5>&R7G>%;}EK%QXZ)-7!tVdBtJwG&ST*d*uY!adNa-b6f@D2l^mlZXa6c7hN) z4ldC|M%UaqNot?n5l>}Zjqc1RqwzjWLonU;M4!sMLaBl^5O%yF+ZEjv3O}7$ywc}S zbC}Os#Zw7r+Zl0ik*?aDT)^B;wN$6fwq|*Gy-vnfB$NnYto7RCQ;&QaugO8`(BN`= zU>I4wGZs#C@$g^a*xd5k`tI&fZ&JB~0>3ZKqqpdFXZIq{>>}{i9I3v(GgC4J2`qWL zAAY1a6p6&0BE%2%j3)SSnLL|1sV2{^TKdAg@JaZ^ySV7#@>!{c`i7~uT{qR_+pC_w zP*@KO?zIi;TAQ2fD+}^7tnoCnRa}IjScsP%CHC}H+7<07u{Leq*cRN}+|aa7?>)YG zU0ZXBg=|||8a6jKm7x1^^px&|3C-uY-mCP3O~%;%O4yU?;xFB#igIHyAtpdrXIu6% z++04=++~U7gQsWjp z&I{ej8X$@$?{(Zc$*dc7D`V=?#`>&g{~D-gG-B3uQf;19wb~5JyW3f4V12{X+gd3k zb#o@Zz3S;}q%VCItxdfP`jxOJH7dS*lla;gZ#{fr3yrGs^rShZ)U6B>;3c-zjzMocG>MQkIg&m1R zZ(n^`8T@XgpR3;$i|y))_Qm$Z!_m@kth+1`i*NR(2i<e`w*U(s!GcXd74ZPkRTtIKO6)pgODXl;3Ud9*xI zSr-YzVQqOuO?6GUDjce*sty75OzT=(zqQ`dZRvZm2cEidD4)2L%2NLWkF}+i2jM{n zJR{3ujl&b8qLgb~|Ce&+>u}^!*=jzaylo`hBD|2cSHK=O{Oczbr)c}4an5_Y&PjN$f24){*6CzE`;1O5XC z{0#^Ge{qn9XT3A({hkAU)&U3lj6C?zL#B2;=wJ^#1Cvp{6>tlt&q%ih5}XYsmn~BM-S$BRp>J6Zj?} z&$eP5VnMyg!}vChD#iR39d8#n9jXZJFpovYU9^?tSC!FnM9Nz!XWhrdGX7B3%1A>Q7H4!fDPY{mw==ZxT=BK~2)&-m*GkC`7v1&@c87*_=kj{%3!d*0PekxA-sQm4FL=I1$zwq9 zFn-j5=a|7`=9A-u%XmIz$mf9z&XL~JgtH$QA0}M-b3*X^9mUB_!NYhyJhY94{r?#C z-(td9FXQl>yDn1iD!~KKyz3GWJdC$E@FWCJn2!4e599kCcn%AmKO%dM3LeH!2>E}f z<5NPO@iT-=KfEq@j!-|oC3qOWDdgRp6e)h$|BUAmF7?hQT*j4U$eZ&;3E|xDKO?<9 z!dWllwFZxwPnrbJBAT>Y1P|jpj-~zENM73CVdypOPYAugB)$DYFXICSk7@q_!DGRT zZCDNo9>$M4@SHYqcs_onye0UX;AJ)}=LA3F7YrWL&UXb*KdrY%1rOub9C*eJ9?VM? z8Utm*;4yK&6vX}_*J$}H(vR_j^1MFu5{~g>;uhh^LoU!fwFVF7z2DL?Ur%rxGTvhF znEq)c9Ovr?HM9NScEWMKHt{Zl$HWr`&jSVz`xSXie8Au_@dJeO_-d!|b%=0o7vqCM zemBMQD?*;}VIhBpI_IpAXZ$?ja@@Ub;O4sVUBUlv)M=xFpYaKU$Ltp`-EUw&zg z;9*?Nwe?7zEW)|Hysq{V&h27+u_2H4 z|264-fN-{-@#Ta|y#c{9Mm(*8hw+F5PY2=B&VEDQoG%U#&T$BjG3s)NaJHZEL4(JP z=Tm~`&xq%=;9-2&f#)pY7+3Jl%uIQmaEu=lzd$(K{|@QBNI2Wi_^6P-i~M|5$TNP8 zaM@p8ic{A6-{g!u!dWll_!1fx$+Ott!Fi!YOF=FrT=rv$A&>DEq~zoyob@tZOSsg# zU+_FdemEd_7(XuL3rX*wkY{{Y$UjW|a#qMQevWWy=LG{d^YBH%|0MbUUBS=zRU!Wx zIuFX2kZ1fO!lgZV&`fm*=o>eqwpXn+(0?xZ7ml z=D64{_(Q}G-%8Pi`5Espcuf9&!Sie4*(Z1y-|xV4NbqEl{|^fu#*aDhoDw_(#B*Bk zFh1~~;nEKu8SCa;ZZszS%g1_}Xdpw*L{EQD9JZ9XU7d$^9zg-YKj9+x%85KO^#B){f zFn-N}=OeI597TKJpF>Fn|SsK9>(`O@Ej97 z9}v%R!Nd3~Lf%dCCxkrXr-l4aNdAnFXZ$U~Wxt#^aC4pZw%~t{#@9u`&-l2(WA=-W z=4XsI+SLQKk`CnW7n_8+WH~dHljRk90XpIFDb(&k-*B{cXYX zKJi=>JdBSz@LVGt?Kkf`O%RUuo4A*P1MM;Ke8Rb1BV^BF!ns|H7ZQ%Ym(i~FLkS(o z{OKcH=Fe4xqdjK6Z6h4{DV1yfLxwyJ35_2mT=wHJ!eu`mC!E_mkFHw=3Fr1Qeu{9} zkEaQj?Hwjuw)Y(2tQS9if#p2mte5e33738z6+GW3o~wd~@oR)j`)?Y!>HqnZj8LAJ zVrZvDINFaP)`B3E<%G-iUZuffuJ_smkIkagHo|3IN(gz2D)bYMenWd0-$yvwZ|0=| z!Nd7vzu;l~Eg{eR=Y%}t=Ltu>W}dk!c)(O$;QJuD$o6KD0qkeX7E@lrWqb1om+hS| zc<4~&3m(SXggoorCgd64PPlAuLhz9Js-JM#UcSCb8^0u&LZ1D}gH85t77aq_XD{K>Z+V1E zzs)C{^G^f$EuV1q8{^9fmwsz9cufDa7(6E4YVerjVw2!$r9f#Tobw6e`wbp5pByG! z`r#9Jk)b4*{vt9esT&!&d@= zp+q9Cy7%|QSv8*Z!}a2-+73ZKDJ@&!_W*6AvSy&K2vj4xGUBdg*NoYW4n4n-7}vdPyaBv z`CW7Ru#5@mmoL*)h)RN7m2M*+m>;WQj325Ft5WFqL}&+WpYF!(@<^#4|0{=3 zi0Sv3L7*HCm-mu(HrQjL{Fd1`Jd+#2I^r4xny5(eSE&xEPTjYd|sCKO}UF@=23EA%Fc)Oi!Rc_T>m09 L4C`UOk^cKX2cUh6 literal 0 HcmV?d00001 diff --git a/third_party/rpcmem/armeabi-v7a/rpcmem.a b/third_party/rpcmem/armeabi-v7a/rpcmem.a new file mode 100644 index 0000000000000000000000000000000000000000..faa1baa5f8e7a689e66ac126df0446c27d08e061 GIT binary patch literal 18718 zcmd5^3w%`7nLm@6OhQBnG)jzsHw5Se@|brf5ipO*We9o6t7y@g$;>1(CCQ941B8e= zzR_A6Dz#|cF57kcqx2O}yFQ9R*PmJyH?gkmR&YbNt)RAzmTI)!vj6YibMMR~j0S48 z!|%-h`=7^mzVn^uy>m$MQg753T%K1cq<@SlRF+e}1;ONS@Eo2z#@IB*G_4;Oq7h$D zpvUd)?hgA{DshK;*tTAu+voKK)gtk*yVI|f zmq=0ptDM;Ily;v^G%3v%Jb6kJdi~LG$X^kz%&Saw8;YmWRR7tnR!OKTjz?t$#!wf9R7<;HuiqE!6U|`dcI{0wbkM1A*yelO3IZwU+*He-G<*xFkK4-s^ z1vNpvKyiy*n&LjECNgJm@Zj8)Y{A^7d9H29(dV2VVZ~3KVDl+gk&7W0Nrf$g@0_~y zTBpG??Z^eL*%P!Gg3k9eGcVfr;=4M{MY||})&k0(eS~BdywH$4q*;*X>T~KmR3jU@ z2ljR&^ddwMIuNcvSc|Y8VG+U)5e5++K%la$j~NPwA}bOPhs7Ll(ygcXpvG|Uc%pXi z5Mq!{Ws49lKp1;i`=jy`Y{r)db>d;y4w2e(zNp{H`ZNaZh(RPBFC07WIxe+nc$^Jr zh5sIB`4m4#@)WU~M7VuyAn~gvtCeWACsTp<(^rX=P7|7V+>2*4{RzVYx$1V~HztyV!@i zlN-chaka?C4t+qj+?Y5#K7Ef?%-*FJo^DFkN|IK1vPrh#P!n5ll}K^octKmgQ!DHf z;~sH*db2p5yOG-XNDHN+UxUnW`O!yJdtMu#&T8K~bv1kMRAfg(&cH)&<-6NPvhN@K zQ=+o%WmdfZt^Bsuk^OHCT-rLq3~j@0Cll?fMt3)5Ez%2FCH>CaV;Vttb3;Vma9>*M z^?Tv9tLu3iH=sQmJ|GKr?BCg>7xjWglr3)?J7qmSdV3Slt{3oa>36;kuJ|8X;?dDz zF@m^FsB;tQ+&>B{^0V*HAry4EL~-$2g?&N5`ipu4=u`X_V@_5~IB?h%u*ma-0nw8rKcV z@r7f1+FlsXPXtER@!CtcA6KJmh8_e(Z0FcN1EG4n^#@qyeP<)6wF=ocRz7et?p&j7@#MJ|@0@fhRL&yVX2xE|ciBN=aAC*IR9kLGL z*N}gU#q?&L@J~oO4C2 zABnGtxxrtcl^Vm=ksqsl%Y=>MgH6uyf*JizkH>jJtJeyYi`xFfR+7-in1i){+?)8G zT54a@DiJd=e?)8?W3@$C3v%x+9I8FZ7HTjrX@2f#pt94xj(!?AG&={QCi`x9)>a~U zk;b?&_~N?-8vQ=apAyAXTee8!y!MYbC+==45%YGxfYI6SBrWoYBZ*ccHkL@-sakS} z>v{2sAg#5qaqON1O*b5K0}-PU8JGUd4otA zZAZ7NHPOn0IeS^oPG$W$Jf1(eqapiV{lLtztjgN4UnQjV2kQ~7Mv1XqGHgOfGX2J+DJ;!(8YiQx64e^9N` zpLQP0bC}AKcN&L(qvj_KHd=e~Zy%-gpYq7sX*JN_OZAL2QTg<=Tt+2>55AaI=J_;6 z>x9NwGW<+h><2+sPAR#bk`cD>tJb(r%ty#W$U=Az^W|3v zzd+cIKywMT*5(YX7im1t83Jq18K4pOji&6~xdVJGN+Vt#>sn#j*OO!Vt7)yqdctUw z>#>p-NGmzDjz+k6^j74QeSV8H(gjcIIKSaEdgQfzzS4F{kKt&as)uxwrAIL$FCC7l z@ln?Sk<$8{WU8x;<qR zb$o_C5oAHnqsvkDl2NhgfJmdN-x(+&%by5-=Z$K$d~#&lPCK$m_n!SmHGf4~ePj>i z&=|9a7o)cCzOew`n$bo0mc2;+q*}Y*U_;-*+I|CRjPN1k zyMQi?J8AC#zYcN%Fc0BF$h6MW9`GE(6A1ed&O^{4`~`c#dkC*1JVW~e!ea<@F5QJd zXVV1Fe+BT=`_DW^=kIK>l1`>LW$JLsJa~MN=F!j+#8C?tuiWO$4eqF)KCHv~_wC@m z`tcEOkd+n=<$CChTj#+hw4j*h7J%77txy`g7`~wLrNR66&sF_6y1iCE_m!fiSFp?X zJ7)!>`{5}wlX__8=ml8aG zhd}3b55jVURs`A$TgG(tgAQ ztw&2tL8^6*%M_#?rTx*ls*P8U>mEONd`$Az`DzRI1WQH5RwnH6>8A&p}Px4>4ipPo6|5#}L$3BiD|G>rbziL>Bk)>lZr zvkKzy!5Q59=kdPMj!YY6=rjHB@QJLFZx`KH|NhH2hj6|`etHU#{Wy%XiW7Pr2=q)= zfbjR$uk^tkgW2LrsTCT-v=NufHQ_1k;?YLebpA}(fPDJkt@Sy>ub#-76)M_UKl^8P z@b^cS^R_H=Es^|4PmGuFaVPsGZ&3EBo#(Vsop=VDJHiB3mOaommV;eHD~Rv1V%>ml z-(cuzoS4Z!I_d0j^vGhVM7G>V!Ouf`#i(u{D_-O}^oMqqyEC`=bEv!FgD3M?+5X^# zp8cWhomk~Bqcxrd{}{y6q$wBYoUB>S_w+Nfu=c0w|G|;-)AY}Rem0;NN=5lp^iSyX z`IxIk2y+lTVc(u7b<@CPE%fX^Eb_{;L936buA7b-|is*aUT_nOPstZi%& zI>S+6O-su<0WY?~(QSga*FQ05LtE?Gl`Ev24J~y%V<*(dc@U1Otr&ANt$#x-j`rr+F9JfZKV&`VThijAK}b?LqH z_pZ@lS9&EH=nBQ~@>}TG+*u^@ig@ zY;zN_B=e_hRi^7|B?w%F4aD&@UL!jNY*1v(g&D z*;41OUnja(wl%D6#**LKz`dJMm&1`s7g|Aj+zsp2tZl=_A#K%}^jKii#QoW@8I6j> zqwaWV6q`*v;8wiCqr-GMHzZ`?QwWP^ZBQ3^)-y8SR zSc}N>Q=0jFz9uJWh6bDDiIK9Zhr+&iw=(?~W^As>QPb1ojU+42P=VPv$)g|9>zUn4 zdZso4KUzn+ug}hu(+v_Fd3(y2N4$Q2G}8z%qg+Z8jC4$%N||(%r&et8!Q8L~zb-B= zEjLZ6T9i*Q^^D7=n|x-)Cm$4AaNri}RyH>}a37`>JjYjwOb^Ib(wyS9Pf%;R&ps)| zlULZY+6N~*U!Ap%^hTbYn#qqEAE77nLDyOZMSPUYJ`VV1G`l$HqO;VnX z>D9xhcF?eyCNG*(-1nuT>D$9p9w<|4REGCdD>R?>Q1sC|oqSMO*|0`EDP%Y#*hqVN zb3RGU>7(sqcf6^aZJE4MeXRB=ZT~jXd?pT=y4Evv#uOD#eti9`C$-H2n^Ii(4`ENb z^FMo&?6QZXN2AQ^9^RCn?jeib2J?q1^?8eKTr*ZD-#ZozHlxj9HQ9s?{8r_sU;m;O zOSRQlT`Ksxy?D8=yhC$T7%R+zAsCNGVl|bO_^nF1x7-yDcXbD1;muKBpu!jKsf>ri z-M*kV)LSW+w;D}VmJV-+-5l^$S?y+%#bK-R8yyy(G2rMhdOJF7c1KmUvGj~|cXw~; z5vsg)yUF3VR|l#B4wK0gF!`<3ejljAWT~=O`D{LKmCf#jYPEIE&FfnPsV%WBy``+~ z68OR!@OFfl#ssm!Tw$zc`n;UFT+OtsJneMNd2EIzUo$gnmaaf^zGk+zP;;TCC~LlE z5&e{#_L+lwBFaa{-Z}LYe&?m%h2>8cdl3BBviA{{#eSp1oh1C`tEA5bFTiiS3SWo& zD2WvQ73d#L;r|MK3uYwqpG0}-2WBZMicG#pZ+W zNlCvH_0ezE@+XUJg)aKdTH!x}e){cN;cq}c{dTSJHk7B|!WBLX`sp`qh2I8!^c%Ut zhoEn5ioNecAN_W&q(8(Z{PzJgThAUv8p%e^4bXoaWv=1;yGZvz=T^=e!G8(f&-rVp zi|VWA{94HOAjCNT9OP*z6XyJ3lU~_ z?x~Xfg-Cl8p^Wo;k^UR-O3rQI1v!kBb3Oul>cH)s_keE&Z{~a&_`5n>~e_jJ_z?@op|9=be z*B8N~;Olw*r;&dJ=Ic*5r?LMW(iQzzgBQx zaHW5>;3vS9{{3gzXE;CEzn6gj1YGIgpF@B1112tu>A%CU z=UH&2ze3c0&1+6VjiWkpB1tery6yDK6FDF(JJ^B?H-4K7n7I!Ud#% zGo?K7S0<$Y8T?wTZ`3EV;Gfyx=OWOaM|>^f{|kZU6Y;%BE5~|;Zj$)hz#CAWkkU5; zkwx0miQfbA4)W98ZQ^eLcY=F(e){ierWAey;*}4zkH$vcE{s9Ke)$ev-YxQdy1bj@ zJEat#p!Dk*yOZ!5`HZE-xZew|-NM-S38V7eTWt(um^jiOA>1mjSDx+iIMZVMKTYv{ zn0r8qf06hC+{Yw@zT?2Wn{fviIPY3~p&b-Y_kKx#E+O=m0rL`=BZRP%{x1k|59f4$ znBwo_ILL9FqaKc?bOXm)j=MP?;yB7t!2G2AMnd?*$@zMYojksu^Bo+&MR)`J!ufL? zUnRU1cN+{(`(-t$f0qB80Hxk0113>cg_c=bpQLmTdiwM!b zHJrC|yqV)3j*oFH%8|=0=D3t&JI78!^nZf$`#3&8cn8i*oWI2J2*-Ch=I6@#MUJh6 z=zn^ap#Jp$X+HT0D;NtBn((YZh<=C=S{REHVv+76wBy-=unKkoJ+R|u;ts}k5E?Px z2}Sse5PEhIx)|F{XvY8hB3ywpJ&^o)oa2Xt@ZWjUp9ZdmEz+ZuOqwz_H+IK#{(RXaHMBMtqt}QIx!Cjt1%A< zFTp$@T#9)>xD4}va5?4y)@JRcm*>*Vjd8-p`C=-%pHWQF%N*$U$w;7U>*=+GjS2F!#tq$9he8i*JB?MwT z0`YS64LJz&Op!(=phT2(1{PP%J zgs|62i25CzF9lM*TH*%u6OVThVx4K`>FvZzF}?`lpA9^IBk{#d`Yhgtc>#$qpJO3M zfg`Gr(y`=7h$TqET8=J`?HpMo9*hROes|C2c%aYi55(fp@HSQv4Rlx3t!}9Z^o9I= zGDUivb;sh&?dC7g+}+_WcO;72d5m74^#maFsDHeG|0#p?3R3@Ifh5!QeOA#Ci!rG_ zZ#*6ib-9Eiut<3 zu>jf-iH5s+y*=vRZ83TaScNM^refd6!gFsghllY^r2cC5tMVRmoJ%7WgahEq8B)L{;W+DDhRQ zWLG7dDp^&@qDp25TM(}>sT>lxN|QpBT8ApxRmrAGR#md7l3A5ZRZ=@tE>V>^>}nmV zWLG7dDp^&@qDp4F)DD%KB(73baM;v3RLQPNHdV5!l0}uws${ZB?NGTyRpzj&b*PeE zm29eHRV9lmnXOVgRBn>EN>#yOQR`48yDHgK$*M{gRWhrR$s)Bwrb!N_JJU zsghNdEUIKSOYKm(N#ZJ11&2wkLzV2RWK$)pDp^#?Y$EIN&1Z#4;`mm$)Hp2AVd+vJ zCa9@GO>`KQY*ngcS0$S&Syjnmq$ZhF4vD6Y${~T%<}$@kc^3Asl)T$<#;ejNBQ#v3 zEskU*2}f(@!QQ8Q@VF#>7U6pumkFM>BOf+J9FrKTVw8{e$cqr5FM=bC^w|YnIIc-~ zC_>RU8*-|?1+b6ukv__!=zA8=HDsR|fj)0TU)>y8rf*84fFp)fztUzS&N#G3D<7p^ z+J`B>;!4U#&*I8Q(YFplN;GqnD>-CT(!LCeIwMLd + * rpcmem_alloc(21, ION_FLAG_CACHED, 2048); + * + * just give me the defaults, 2kb + * rpcmem_alloc(RPCMEM_DEFAULT_HEAP, RPCMEM_DEFAULT_FLAGS, 2048); + * rpcmem_alloc_def(2048); + * + * give me the default flags, but from heap 18, 4kb + * rpcmem_alloc(18, RPCMEM_DEFAULT_FLAGS, 4096); + * + */ +#define ION_SECURE_FLAGS ((1 << 31) | (1 << 19)) + +/** + * To flag start/end for rpcmem_sync_cache + */ +#define RPCMEM_SYNC_START 0 +#define RPCMEM_SYNC_END 1 + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * call once to initialize the library + * NOTE: rpcmem_init is not thread safe + */ +void rpcmem_init(void); +/** + * call once for cleanup + * NOTE: rpcmem_deinit is not thread safe + */ +void rpcmem_deinit(void); + +/** + * Allocate via ION a buffer of size + * @heapid, the heap id to use + * @flags, ion flags to use to when allocating + * @size, the buffer size to allocate + * @retval, 0 on failure, pointer to buffer on success + * + * For example: + * buf = rpcmem_alloc(RPCMEM_DEFAULT_HEAP, RPCMEM_DEFAULT_FLAGS, size); + */ + +void* rpcmem_alloc(int heapid, unsigned int flags, int size); + +/** + * allocate with default settings + */ + #if !defined(WINNT) && !defined (_WIN32_WINNT) +__attribute__((unused)) +#endif +static __inline void* rpcmem_alloc_def(int size) { + return rpcmem_alloc(RPCMEM_DEFAULT_HEAP, RPCMEM_DEFAULT_FLAGS, size); +} + +/** + * free buffer, ignores invalid buffers + */ +void rpcmem_free(void* po); + +/** + * returns associated fd + */ +int rpcmem_to_fd(void* po); + +/** + * cache coherency management + */ +int rpcmem_sync_cache(void* po, unsigned int flags); + +#ifdef __cplusplus +} +#endif + +/** these are deprecated + */ +#define RPCMEM_HEAP_DEFAULT 0x80000000 +#define RPCMEM_HEAP_NOREG 0x40000000 +#define RPCMEM_HEAP_UNCACHED 0x20000000 +#define RPCMEM_HEAP_NOVA 0x10000000 +#define RPCMEM_HEAP_NONCOHERENT 0x08000000 + +#endif //RPCMEM_H diff --git a/third_party/third_party.cmake b/third_party/third_party.cmake index f8007587..02cd3c18 100644 --- a/third_party/third_party.cmake +++ b/third_party/third_party.cmake @@ -51,6 +51,7 @@ include(${PROJECT_SOURCE_DIR}/third_party/opencl-headers/opencl-headers.cmake) include(${PROJECT_SOURCE_DIR}/third_party/protobuf/protobuf.cmake) include(${PROJECT_SOURCE_DIR}/third_party/tflite/tflite.cmake) include(${PROJECT_SOURCE_DIR}/third_party/caffe/caffe.cmake) +include(${PROJECT_SOURCE_DIR}/third_party/rpcmem/rpcmem.cmake) if(MACE_ENABLE_HEXAGON_DSP) include(${PROJECT_SOURCE_DIR}/third_party/nnlib/nnlib.cmake) diff --git a/tools/bazel-build-standalone-lib.sh b/tools/bazel-build-standalone-lib.sh index 4b87f4b4..8a078113 100755 --- a/tools/bazel-build-standalone-lib.sh +++ b/tools/bazel-build-standalone-lib.sh @@ -34,12 +34,12 @@ mkdir -p $LIB_DIR/aarch64_linux_gnu/cpu_gpu # build shared libraries echo "build shared lib for armeabi-v7a + cpu_gpu_dsp" -bazel build --config android --config optimization mace/libmace:libmace_dynamic --define neon=true --define opencl=true --define hexagon=true --define quantize=true --cpu=armeabi-v7a +bazel build --config android --config optimization mace/libmace:libmace_dynamic --define neon=true --define opencl=true --define hexagon=true --define quantize=true --cpu=armeabi-v7a --define rpcmem=true cp bazel-bin/mace/libmace/libmace.so $LIB_DIR/armeabi-v7a/cpu_gpu_dsp/ cp third_party/nnlib/armeabi-v7a/*so $LIB_DIR/armeabi-v7a/cpu_gpu_dsp/ echo "build shared lib for arm64-v8a + cpu_gpu_dsp" -bazel build --config android --config optimization mace/libmace:libmace_dynamic --define neon=true --define opencl=true --define hexagon=true --define quantize=true --cpu=arm64-v8a +bazel build --config android --config optimization mace/libmace:libmace_dynamic --define neon=true --define opencl=true --define hexagon=true --define quantize=true --cpu=arm64-v8a --define rpcmem=true cp bazel-bin/mace/libmace/libmace.so $LIB_DIR/arm64-v8a/cpu_gpu_dsp/ cp third_party/nnlib/arm64-v8a/*so $LIB_DIR/arm64-v8a/cpu_gpu_dsp/ @@ -49,11 +49,11 @@ cp bazel-bin/mace/libmace/libmace.so $LIB_DIR/arm64-v8a/cpu_gpu_apu/ cp third_party/apu/*so $LIB_DIR/arm64-v8a/cpu_gpu_apu/ echo "build shared lib for armeabi-v7a + cpu_gpu" -bazel build --config android --config optimization mace/libmace:libmace_dynamic --define neon=true --define opencl=true --define quantize=true --cpu=armeabi-v7a +bazel build --config android --config optimization mace/libmace:libmace_dynamic --define neon=true --define opencl=true --define quantize=true --cpu=armeabi-v7a --define rpcmem=true cp bazel-bin/mace/libmace/libmace.so $LIB_DIR/armeabi-v7a/cpu_gpu/ echo "build shared lib for arm64-v8a + cpu_gpu" -bazel build --config android --config optimization mace/libmace:libmace_dynamic --define neon=true --define opencl=true --define quantize=true --cpu=arm64-v8a +bazel build --config android --config optimization mace/libmace:libmace_dynamic --define neon=true --define opencl=true --define quantize=true --cpu=arm64-v8a --define rpcmem=true cp bazel-bin/mace/libmace/libmace.so $LIB_DIR/arm64-v8a/cpu_gpu/ echo "build shared lib for arm_linux_gnueabihf + cpu_gpu" @@ -72,12 +72,12 @@ fi # build static libraries echo "build static lib for armeabi-v7a + cpu_gpu_dsp" -bazel build --config android --config optimization mace/libmace:libmace_static --config symbol_hidden --define neon=true --define opencl=true --define hexagon=true --define quantize=true --cpu=armeabi-v7a +bazel build --config android --config optimization mace/libmace:libmace_static --config symbol_hidden --define neon=true --define opencl=true --define hexagon=true --define quantize=true --cpu=armeabi-v7a --define rpcmem=true cp bazel-genfiles/mace/libmace/libmace.a $LIB_DIR/armeabi-v7a/cpu_gpu_dsp/ cp third_party/nnlib/armeabi-v7a/*so $LIB_DIR/armeabi-v7a/cpu_gpu_dsp/ echo "build static lib for arm64-v8a + cpu_gpu_dsp" -bazel build --config android --config optimization mace/libmace:libmace_static --config symbol_hidden --define neon=true --define opencl=true --define hexagon=true --define quantize=true --cpu=arm64-v8a +bazel build --config android --config optimization mace/libmace:libmace_static --config symbol_hidden --define neon=true --define opencl=true --define hexagon=true --define quantize=true --cpu=arm64-v8a --define rpcmem=true cp bazel-genfiles/mace/libmace/libmace.a $LIB_DIR/arm64-v8a/cpu_gpu_dsp/ cp third_party/nnlib/arm64-v8a/*so $LIB_DIR/arm64-v8a/cpu_gpu_dsp/ @@ -87,11 +87,11 @@ cp bazel-genfiles/mace/libmace/libmace.a $LIB_DIR/arm64-v8a/cpu_gpu_apu/ cp third_party/apu/*so $LIB_DIR/arm64-v8a/cpu_gpu_apu/ echo "build static lib for armeabi-v7a + cpu_gpu" -bazel build --config android --config optimization mace/libmace:libmace_static --config symbol_hidden --define neon=true --define opencl=true --define quantize=true --cpu=armeabi-v7a +bazel build --config android --config optimization mace/libmace:libmace_static --config symbol_hidden --define neon=true --define opencl=true --define quantize=true --cpu=armeabi-v7a --define rpcmem=true cp bazel-genfiles/mace/libmace/libmace.a $LIB_DIR/armeabi-v7a/cpu_gpu/ echo "build static lib for arm64-v8a + cpu_gpu" -bazel build --config android --config optimization mace/libmace:libmace_static --config symbol_hidden --define neon=true --define opencl=true --define quantize=true --cpu=arm64-v8a +bazel build --config android --config optimization mace/libmace:libmace_static --config symbol_hidden --define neon=true --define opencl=true --define quantize=true --cpu=arm64-v8a --define rpcmem=true cp bazel-genfiles/mace/libmace/libmace.a $LIB_DIR/arm64-v8a/cpu_gpu/ echo "build static lib for arm_linux_gnueabihf + cpu_gpu" diff --git a/tools/bazel_adb_run.py b/tools/bazel_adb_run.py index 564401eb..1679f604 100644 --- a/tools/bazel_adb_run.py +++ b/tools/bazel_adb_run.py @@ -95,6 +95,11 @@ def parse_args(): type=str2bool, default=True, help="Whether to use quantization ops") + parser.add_argument( + "--enable_rpcmem", + type=str2bool, + default=True, + help="Whether to use rpcmem") parser.add_argument( '--address_sanitizer', action="store_true", @@ -164,6 +169,7 @@ def main(unused_args): toolchain=toolchain, enable_neon=FLAGS.enable_neon, enable_quantize=FLAGS.enable_quantize, + enable_rpcmem=FLAGS.enable_rpcmem, address_sanitizer=FLAGS.address_sanitizer, debug_mode=FLAGS.debug_mode) if FLAGS.run_target: diff --git a/tools/cmake/cmake-build-arm64-v8a.sh b/tools/cmake/cmake-build-arm64-v8a.sh index e1efeed2..999b0b74 100755 --- a/tools/cmake/cmake-build-arm64-v8a.sh +++ b/tools/cmake/cmake-build-arm64-v8a.sh @@ -43,6 +43,7 @@ cmake -DANDROID_ABI="arm64-v8a" \ -DMACE_ENABLE_TESTS=ON \ -DMACE_ENABLE_BENCHMARKS=ON \ -DMACE_ENABLE_CODE_MODE=${MACE_ENABLE_CODE_MODE} \ + -DMACE_ENABLE_RPCMEM=ON \ -DCMAKE_INSTALL_PREFIX=install \ ../../.. make -j6 VERBOSE=1 && make install diff --git a/tools/cmake/cmake-build-armeabi-v7a.sh b/tools/cmake/cmake-build-armeabi-v7a.sh index 12fab64f..c98d196c 100755 --- a/tools/cmake/cmake-build-armeabi-v7a.sh +++ b/tools/cmake/cmake-build-armeabi-v7a.sh @@ -45,6 +45,7 @@ cmake -DANDROID_ABI="armeabi-v7a" \ -DMACE_ENABLE_TESTS=ON \ -DMACE_ENABLE_BENCHMARKS=ON \ -DMACE_ENABLE_CODE_MODE=${MACE_ENABLE_CODE_MODE} \ + -DMACE_ENABLE_RPCMEM=ON \ -DCMAKE_INSTALL_PREFIX=install \ ../../.. make -j6 VERBOSE=1 && make install diff --git a/tools/sh_commands.py b/tools/sh_commands.py index 5c1c8569..831d015d 100644 --- a/tools/sh_commands.py +++ b/tools/sh_commands.py @@ -270,6 +270,7 @@ def bazel_build(target, enable_neon=True, enable_opencl=True, enable_quantize=True, + enable_rpcmem=True, address_sanitizer=False, symbol_hidden=True, debug_mode=False, @@ -303,6 +304,8 @@ def bazel_build(target, "--define", "quantize=%s" % str(enable_quantize).lower(), "--define", + "rpcmem=%s" % str(enable_rpcmem).lower(), + "--define", "hexagon=%s" % str(enable_hexagon).lower(), "--define", "hta=%s" % str(enable_hta).lower(), -- GitLab