提交 d860035c 编写于 作者: B Bin Li

Support Qualcomm ION buffer for GPU

上级 edfde3ca
...@@ -3,7 +3,6 @@ build/* ...@@ -3,7 +3,6 @@ build/*
cmake-build/ cmake-build/
cmake-build-debug/ cmake-build-debug/
docs/_build/ docs/_build/
*.a
.idea/ .idea/
.vscode/ .vscode/
......
...@@ -131,3 +131,14 @@ config_setting( ...@@ -131,3 +131,14 @@ config_setting(
}, },
visibility = ["//visibility:public"], visibility = ["//visibility:public"],
) )
config_setting(
name = "rpcmem_enabled",
define_values = {
"rpcmem": "true",
},
values = {
"crosstool_top": "//external:android/crosstool",
},
visibility = ["//visibility:public"],
)
...@@ -18,6 +18,7 @@ load( ...@@ -18,6 +18,7 @@ load(
"if_opencl_enabled", "if_opencl_enabled",
"if_openmp_enabled", "if_openmp_enabled",
"if_quantize_enabled", "if_quantize_enabled",
"if_rpcmem_enabled",
) )
cc_library( cc_library(
...@@ -75,6 +76,8 @@ cc_library( ...@@ -75,6 +76,8 @@ cc_library(
]) + if_android_armv7([ ]) + if_android_armv7([
"-mfpu=neon-fp16", "-mfpu=neon-fp16",
"-mfloat-abi=softfp", "-mfloat-abi=softfp",
]) + if_rpcmem_enabled([
"-DMACE_ENABLE_RPCMEM",
]), ]),
linkopts = ["-ldl"], linkopts = ["-ldl"],
deps = [ deps = [
...@@ -94,6 +97,8 @@ cc_library( ...@@ -94,6 +97,8 @@ cc_library(
"//third_party/hta", "//third_party/hta",
]) + if_apu_enabled([ ]) + if_apu_enabled([
"//third_party/apu:libapu-frontend", "//third_party/apu:libapu-frontend",
]) + if_rpcmem_enabled([
"//third_party/rpcmem",
]), ]),
) )
......
...@@ -45,6 +45,10 @@ if(MACE_ENABLE_MTK_APU) ...@@ -45,6 +45,10 @@ if(MACE_ENABLE_MTK_APU)
set(EXTRA_LINK_LIBS ${EXTRA_LINK_LIBS} apu-frontend) set(EXTRA_LINK_LIBS ${EXTRA_LINK_LIBS} apu-frontend)
endif(MACE_ENABLE_MTK_APU) endif(MACE_ENABLE_MTK_APU)
if(MACE_ENABLE_RPCMEM)
set(EXTRA_LINK_LIBS ${EXTRA_LINK_LIBS} rpcmem)
endif(MACE_ENABLE_RPCMEM)
add_library(core STATIC ${CORE_SRCS}) add_library(core STATIC ${CORE_SRCS})
target_link_libraries(core PRIVATE target_link_libraries(core PRIVATE
proto proto
......
...@@ -47,16 +47,20 @@ class Allocator { ...@@ -47,16 +47,20 @@ class Allocator {
public: public:
Allocator() {} Allocator() {}
virtual ~Allocator() noexcept {} virtual ~Allocator() noexcept {}
virtual MaceStatus New(size_t nbytes, void **result) const = 0; virtual MaceStatus New(size_t nbytes, void **result) = 0;
virtual MaceStatus NewImage(const std::vector<size_t> &image_shape, virtual MaceStatus NewImage(const std::vector<size_t> &image_shape,
const DataType dt, const DataType dt,
void **result) const = 0; void **result) = 0;
virtual void Delete(void *data) const = 0; virtual void Delete(void *data) = 0;
virtual void DeleteImage(void *data) const = 0; virtual void DeleteImage(void *data) = 0;
virtual void *Map(void *buffer, size_t offset, size_t nbytes) const = 0; virtual void *Map(void *buffer,
size_t offset,
size_t nbytes,
bool finish_cmd_queue) const = 0;
virtual void *MapImage(void *buffer, virtual void *MapImage(void *buffer,
const std::vector<size_t> &image_shape, const std::vector<size_t> &image_shape,
std::vector<size_t> *mapped_image_pitch) const = 0; std::vector<size_t> *mapped_image_pitch,
bool finish_cmd_queue) const = 0;
virtual void Unmap(void *buffer, void *mapper_ptr) const = 0; virtual void Unmap(void *buffer, void *mapper_ptr) const = 0;
virtual bool OnHost() const = 0; virtual bool OnHost() const = 0;
}; };
...@@ -64,7 +68,7 @@ class Allocator { ...@@ -64,7 +68,7 @@ class Allocator {
class CPUAllocator : public Allocator { class CPUAllocator : public Allocator {
public: public:
~CPUAllocator() override {} ~CPUAllocator() override {}
MaceStatus New(size_t nbytes, void **result) const override { MaceStatus New(size_t nbytes, void **result) override {
VLOG(3) << "Allocate CPU buffer: " << nbytes; VLOG(3) << "Allocate CPU buffer: " << nbytes;
if (nbytes == 0) { if (nbytes == 0) {
return MaceStatus::MACE_SUCCESS; return MaceStatus::MACE_SUCCESS;
...@@ -82,7 +86,7 @@ class CPUAllocator : public Allocator { ...@@ -82,7 +86,7 @@ class CPUAllocator : public Allocator {
MaceStatus NewImage(const std::vector<size_t> &shape, MaceStatus NewImage(const std::vector<size_t> &shape,
const DataType dt, const DataType dt,
void **result) const override { void **result) override {
MACE_UNUSED(shape); MACE_UNUSED(shape);
MACE_UNUSED(dt); MACE_UNUSED(dt);
MACE_UNUSED(result); MACE_UNUSED(result);
...@@ -90,24 +94,30 @@ class CPUAllocator : public Allocator { ...@@ -90,24 +94,30 @@ class CPUAllocator : public Allocator {
return MaceStatus::MACE_SUCCESS; return MaceStatus::MACE_SUCCESS;
} }
void Delete(void *data) const override { void Delete(void *data) override {
MACE_CHECK_NOTNULL(data); MACE_CHECK_NOTNULL(data);
VLOG(3) << "Free CPU buffer"; VLOG(3) << "Free CPU buffer";
free(data); free(data);
} }
void DeleteImage(void *data) const override { void DeleteImage(void *data) override {
LOG(FATAL) << "Free CPU image"; LOG(FATAL) << "Free CPU image";
free(data); free(data);
}; };
void *Map(void *buffer, size_t offset, size_t nbytes) const override { void *Map(void *buffer,
size_t offset,
size_t nbytes,
bool finish_cmd_queue) const override {
MACE_UNUSED(nbytes); MACE_UNUSED(nbytes);
MACE_UNUSED(finish_cmd_queue);
return reinterpret_cast<char*>(buffer) + offset; return reinterpret_cast<char*>(buffer) + offset;
} }
void *MapImage(void *buffer, void *MapImage(void *buffer,
const std::vector<size_t> &image_shape, const std::vector<size_t> &image_shape,
std::vector<size_t> *mapped_image_pitch) const override { std::vector<size_t> *mapped_image_pitch,
bool finish_cmd_queue) const override {
MACE_UNUSED(image_shape); MACE_UNUSED(image_shape);
MACE_UNUSED(mapped_image_pitch); MACE_UNUSED(mapped_image_pitch);
MACE_UNUSED(finish_cmd_queue);
return buffer; return buffer;
} }
void Unmap(void *buffer, void *mapper_ptr) const override { void Unmap(void *buffer, void *mapper_ptr) const override {
......
...@@ -54,11 +54,13 @@ class BufferBase { ...@@ -54,11 +54,13 @@ class BufferBase {
virtual void *Map(index_t offset, virtual void *Map(index_t offset,
index_t length, index_t length,
std::vector<size_t> *pitch) const = 0; std::vector<size_t> *pitch,
bool finish_cmd_queue) const = 0;
virtual void UnMap(void *mapped_ptr) const = 0; virtual void UnMap(void *mapped_ptr) const = 0;
virtual void Map(std::vector<size_t> *pitch) = 0; virtual void Map(std::vector<size_t> *pitch,
bool finish_cmd_queue = true) = 0;
virtual void UnMap() = 0; virtual void UnMap() = 0;
...@@ -171,10 +173,13 @@ class Buffer : public BufferBase { ...@@ -171,10 +173,13 @@ class Buffer : public BufferBase {
return this->Allocate(nbytes); return this->Allocate(nbytes);
} }
void *Map(index_t offset, index_t length, std::vector<size_t> *pitch) const { void *Map(index_t offset,
index_t length,
std::vector<size_t> *pitch,
bool finish_cmd_queue) const {
MACE_CHECK_NOTNULL(buf_); MACE_CHECK_NOTNULL(buf_);
MACE_UNUSED(pitch); MACE_UNUSED(pitch);
return allocator_->Map(buf_, offset, length); return allocator_->Map(buf_, offset, length, finish_cmd_queue);
} }
void UnMap(void *mapped_ptr) const { void UnMap(void *mapped_ptr) const {
...@@ -183,9 +188,9 @@ class Buffer : public BufferBase { ...@@ -183,9 +188,9 @@ class Buffer : public BufferBase {
allocator_->Unmap(buf_, mapped_ptr); allocator_->Unmap(buf_, mapped_ptr);
} }
void Map(std::vector<size_t> *pitch) { void Map(std::vector<size_t> *pitch, bool finish_cmd_queue = true) {
MACE_CHECK(mapped_buf_ == nullptr, "buf has been already mapped"); MACE_CHECK(mapped_buf_ == nullptr, "buf has been already mapped");
mapped_buf_ = Map(0, size_, pitch); mapped_buf_ = Map(0, size_, pitch, finish_cmd_queue);
} }
void UnMap() { void UnMap() {
...@@ -300,10 +305,14 @@ class Image : public BufferBase { ...@@ -300,10 +305,14 @@ class Image : public BufferBase {
return allocator_->NewImage(shape, data_type, &buf_); return allocator_->NewImage(shape, data_type, &buf_);
} }
void *Map(index_t offset, index_t length, std::vector<size_t> *pitch) const { void *Map(index_t offset,
index_t length,
std::vector<size_t> *pitch,
bool finish_cmd_queue) const {
MACE_UNUSED(offset); MACE_UNUSED(offset);
MACE_UNUSED(length); MACE_UNUSED(length);
MACE_UNUSED(pitch); MACE_UNUSED(pitch);
MACE_UNUSED(finish_cmd_queue);
MACE_NOT_IMPLEMENTED; MACE_NOT_IMPLEMENTED;
return nullptr; return nullptr;
} }
...@@ -314,11 +323,11 @@ class Image : public BufferBase { ...@@ -314,11 +323,11 @@ class Image : public BufferBase {
allocator_->Unmap(buf_, mapped_ptr); allocator_->Unmap(buf_, mapped_ptr);
} }
void Map(std::vector<size_t> *pitch) { void Map(std::vector<size_t> *pitch, bool finish_cmd_queue = true) {
MACE_CHECK_NOTNULL(buf_); MACE_CHECK_NOTNULL(buf_);
MACE_CHECK(mapped_buf_ == nullptr, "buf has been already mapped"); MACE_CHECK(mapped_buf_ == nullptr, "buf has been already mapped");
MACE_CHECK_NOTNULL(pitch); MACE_CHECK_NOTNULL(pitch);
mapped_buf_ = allocator_->MapImage(buf_, shape_, pitch); mapped_buf_ = allocator_->MapImage(buf_, shape_, pitch, finish_cmd_queue);
} }
void UnMap() { void UnMap() {
...@@ -434,18 +443,21 @@ class BufferSlice : public BufferBase { ...@@ -434,18 +443,21 @@ class BufferSlice : public BufferBase {
return MaceStatus::MACE_SUCCESS; return MaceStatus::MACE_SUCCESS;
} }
void *Map(index_t offset, index_t length, std::vector<size_t> *pitch) const { void *Map(index_t offset,
return buffer_->Map(offset_ + offset, length, pitch); index_t length,
std::vector<size_t> *pitch,
bool finish_cmd_queue) const {
return buffer_->Map(offset_ + offset, length, pitch, finish_cmd_queue);
} }
void UnMap(void *mapped_ptr) const { void UnMap(void *mapped_ptr) const {
buffer_->UnMap(mapped_ptr); buffer_->UnMap(mapped_ptr);
} }
void Map(std::vector<size_t> *pitch) { void Map(std::vector<size_t> *pitch, bool finish_cmd_queue = true) {
MACE_CHECK_NOTNULL(buffer_); MACE_CHECK_NOTNULL(buffer_);
MACE_CHECK(mapped_buf_ == nullptr, "mapped buf is not null"); MACE_CHECK(mapped_buf_ == nullptr, "mapped buf is not null");
mapped_buf_ = buffer_->Map(offset_, size_, pitch); mapped_buf_ = buffer_->Map(offset_, size_, pitch, finish_cmd_queue);
} }
void UnMap() { void UnMap() {
......
...@@ -16,9 +16,11 @@ ...@@ -16,9 +16,11 @@
#include "mace/core/runtime/opencl/opencl_allocator.h" #include "mace/core/runtime/opencl/opencl_allocator.h"
#include "mace/core/runtime/opencl/opencl_runtime.h" #include "mace/core/runtime/opencl/opencl_runtime.h"
#ifdef MACE_ENABLE_RPCMEM
#include "third_party/rpcmem/rpcmem.h"
#endif // MACE_ENABLE_RPCMEM
namespace mace { namespace mace {
namespace { namespace {
static cl_channel_type DataTypeToCLChannelType(const DataType t) { static cl_channel_type DataTypeToCLChannelType(const DataType t) {
...@@ -36,14 +38,29 @@ static cl_channel_type DataTypeToCLChannelType(const DataType t) { ...@@ -36,14 +38,29 @@ static cl_channel_type DataTypeToCLChannelType(const DataType t) {
return 0; return 0;
} }
} }
#ifdef MACE_ENABLE_RPCMEM
std::once_flag ion_prepared;
void PrepareQualcommION() {
rpcmem_init();
std::atexit(rpcmem_deinit);
}
#endif // MACE_ENABLE_RPCMEM
} // namespace } // namespace
OpenCLAllocator::OpenCLAllocator( OpenCLAllocator::OpenCLAllocator(
OpenCLRuntime *opencl_runtime): OpenCLRuntime *opencl_runtime): opencl_runtime_(opencl_runtime) {
opencl_runtime_(opencl_runtime) {} #ifdef MACE_ENABLE_RPCMEM
if (opencl_runtime_->ion_type() == IONType::QUALCOMM_ION) {
std::call_once(ion_prepared, PrepareQualcommION);
}
#endif // MACE_ENABLE_RPCMEM
}
OpenCLAllocator::~OpenCLAllocator() {} OpenCLAllocator::~OpenCLAllocator() {}
MaceStatus OpenCLAllocator::New(size_t nbytes, void **result) const {
MaceStatus OpenCLAllocator::New(size_t nbytes, void **result) {
if (nbytes == 0) { if (nbytes == 0) {
return MaceStatus::MACE_SUCCESS; return MaceStatus::MACE_SUCCESS;
} }
...@@ -53,10 +70,27 @@ MaceStatus OpenCLAllocator::New(size_t nbytes, void **result) const { ...@@ -53,10 +70,27 @@ MaceStatus OpenCLAllocator::New(size_t nbytes, void **result) const {
return MaceStatus::MACE_OUT_OF_RESOURCES; return MaceStatus::MACE_OUT_OF_RESOURCES;
} }
cl_int error; cl_int error = CL_SUCCESS;
cl::Buffer *buffer = new cl::Buffer(opencl_runtime_->context(), cl::Buffer *buffer = nullptr;
CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, #ifdef MACE_ENABLE_RPCMEM
nbytes, nullptr, &error); if (opencl_runtime_->ion_type() == IONType::QUALCOMM_ION) {
cl_mem_ion_host_ptr ion_host;
CreateQualcommBufferIONHostPtr(nbytes, &ion_host);
buffer = new cl::Buffer(
opencl_runtime_->context(),
CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR | CL_MEM_EXT_HOST_PTR_QCOM,
nbytes, &ion_host, &error);
cl_to_host_map_[static_cast<void *>(buffer)] = ion_host.ion_hostptr;
} else {
#endif // MACE_ENABLE_RPCMEM
buffer = new cl::Buffer(opencl_runtime_->context(),
CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
nbytes, nullptr, &error);
#ifdef MACE_ENABLE_RPCMEM
}
#endif // MACE_ENABLE_RPCMEM
if (error != CL_SUCCESS) { if (error != CL_SUCCESS) {
LOG(WARNING) << "Allocate OpenCL Buffer with " LOG(WARNING) << "Allocate OpenCL Buffer with "
<< nbytes << " bytes failed because of " << nbytes << " bytes failed because of "
...@@ -72,7 +106,7 @@ MaceStatus OpenCLAllocator::New(size_t nbytes, void **result) const { ...@@ -72,7 +106,7 @@ MaceStatus OpenCLAllocator::New(size_t nbytes, void **result) const {
MaceStatus OpenCLAllocator::NewImage(const std::vector<size_t> &image_shape, MaceStatus OpenCLAllocator::NewImage(const std::vector<size_t> &image_shape,
const DataType dt, const DataType dt,
void **result) const { void **result) {
MACE_CHECK(image_shape.size() == 2, "Image shape's size must equal 2"); MACE_CHECK(image_shape.size() == 2, "Image shape's size must equal 2");
MACE_LATENCY_LOGGER(1, "Allocate OpenCL image: ", MACE_LATENCY_LOGGER(1, "Allocate OpenCL image: ",
image_shape[0], ", ", image_shape[1]); image_shape[0], ", ", image_shape[1]);
...@@ -82,12 +116,29 @@ MaceStatus OpenCLAllocator::NewImage(const std::vector<size_t> &image_shape, ...@@ -82,12 +116,29 @@ MaceStatus OpenCLAllocator::NewImage(const std::vector<size_t> &image_shape,
} }
cl::ImageFormat img_format(CL_RGBA, DataTypeToCLChannelType(dt)); cl::ImageFormat img_format(CL_RGBA, DataTypeToCLChannelType(dt));
cl_int error = CL_SUCCESS;
cl::Image2D *cl_image = nullptr;
#ifdef MACE_ENABLE_RPCMEM
if (opencl_runtime_->ion_type() == IONType::QUALCOMM_ION) {
cl_mem_ion_host_ptr ion_host;
size_t pitch;
CreateQualcommImageIONHostPtr(image_shape, img_format, &pitch, &ion_host);
cl_int error; cl_image = new cl::Image2D(
cl::Image2D *cl_image = opencl_runtime_->context(),
new cl::Image2D(opencl_runtime_->context(), CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR | CL_MEM_EXT_HOST_PTR_QCOM,
CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, img_format, img_format, image_shape[0], image_shape[1], pitch, &ion_host, &error);
image_shape[0], image_shape[1], 0, nullptr, &error);
cl_to_host_map_[static_cast<void *>(cl_image)] = ion_host.ion_hostptr;
} else {
#endif // MACE_ENABLE_RPCMEM
cl_image =
new cl::Image2D(opencl_runtime_->context(),
CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, img_format,
image_shape[0], image_shape[1], 0, nullptr, &error);
#ifdef MACE_ENABLE_RPCMEM
}
#endif // MACE_ENABLE_RPCMEM
if (error != CL_SUCCESS) { if (error != CL_SUCCESS) {
LOG(WARNING) << "Allocate OpenCL image with shape: [" LOG(WARNING) << "Allocate OpenCL image with shape: ["
<< image_shape[0] << ", " << image_shape[1] << image_shape[0] << ", " << image_shape[1]
...@@ -108,72 +159,180 @@ MaceStatus OpenCLAllocator::NewImage(const std::vector<size_t> &image_shape, ...@@ -108,72 +159,180 @@ MaceStatus OpenCLAllocator::NewImage(const std::vector<size_t> &image_shape,
} }
} }
void OpenCLAllocator::Delete(void *buffer) const { void OpenCLAllocator::Delete(void *buffer) {
MACE_LATENCY_LOGGER(1, "Free OpenCL buffer"); MACE_LATENCY_LOGGER(1, "Free OpenCL buffer");
if (buffer != nullptr) { if (buffer != nullptr) {
cl::Buffer *cl_buffer = static_cast<cl::Buffer *>(buffer); cl::Buffer *cl_buffer = static_cast<cl::Buffer *>(buffer);
delete cl_buffer; delete cl_buffer;
#ifdef MACE_ENABLE_RPCMEM
if (opencl_runtime_->ion_type() == IONType::QUALCOMM_ION) {
auto it = cl_to_host_map_.find(buffer);
MACE_CHECK(it != cl_to_host_map_.end(), "OpenCL buffer not found!");
rpcmem_free(it->second);
cl_to_host_map_.erase(buffer);
}
#endif // MACE_ENABLE_RPCMEM
} }
} }
void OpenCLAllocator::DeleteImage(void *buffer) const { void OpenCLAllocator::DeleteImage(void *buffer) {
MACE_LATENCY_LOGGER(1, "Free OpenCL image"); MACE_LATENCY_LOGGER(1, "Free OpenCL image");
if (buffer != nullptr) { if (buffer != nullptr) {
cl::Image2D *cl_image = static_cast<cl::Image2D *>(buffer); cl::Image2D *cl_image = static_cast<cl::Image2D *>(buffer);
delete cl_image; delete cl_image;
#ifdef MACE_ENABLE_RPCMEM
if (opencl_runtime_->ion_type() == IONType::QUALCOMM_ION) {
auto it = cl_to_host_map_.find(buffer);
MACE_CHECK(it != cl_to_host_map_.end(), "OpenCL image not found!");
rpcmem_free(it->second);
cl_to_host_map_.erase(buffer);
}
#endif // MACE_ENABLE_RPCMEM
} }
} }
void *OpenCLAllocator::Map(void *buffer, size_t offset, size_t nbytes) const { void *OpenCLAllocator::Map(void *buffer,
size_t offset,
size_t nbytes,
bool finish_cmd_queue) const {
MACE_LATENCY_LOGGER(1, "Map OpenCL buffer"); MACE_LATENCY_LOGGER(1, "Map OpenCL buffer");
auto cl_buffer = static_cast<cl::Buffer *>(buffer); void *mapped_ptr = nullptr;
auto queue = opencl_runtime_->command_queue(); #ifdef MACE_ENABLE_RPCMEM
// TODO(heliangliang) Non-blocking call if (opencl_runtime_->ion_type() == IONType::QUALCOMM_ION) {
cl_int error; auto it = cl_to_host_map_.find(buffer);
void *mapped_ptr = MACE_CHECK(it != cl_to_host_map_.end(), "Try to map unallocated Buffer!");
queue.enqueueMapBuffer(*cl_buffer, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, mapped_ptr = it->second;
offset, nbytes, nullptr, nullptr, &error);
if (error != CL_SUCCESS) { if (finish_cmd_queue) {
LOG(ERROR) << "Map buffer failed, error: " << OpenCLErrorToString(error); opencl_runtime_->command_queue().finish();
mapped_ptr = nullptr; }
if (opencl_runtime_->qcom_host_cache_policy() ==
CL_MEM_HOST_WRITEBACK_QCOM) {
MACE_CHECK(rpcmem_sync_cache(mapped_ptr, RPCMEM_SYNC_START) == 0);
}
} else {
#endif // MACE_ENABLE_RPCMEM
MACE_UNUSED(finish_cmd_queue);
auto cl_buffer = static_cast<cl::Buffer *>(buffer);
auto queue = opencl_runtime_->command_queue();
// TODO(heliangliang) Non-blocking call
cl_int error;
mapped_ptr =
queue.enqueueMapBuffer(*cl_buffer, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE,
offset, nbytes, nullptr, nullptr, &error);
if (error != CL_SUCCESS) {
LOG(ERROR) << "Map buffer failed, error: " << OpenCLErrorToString(error);
}
#ifdef MACE_ENABLE_RPCMEM
} }
#endif // MACE_ENABLE_RPCMEM
return mapped_ptr; return mapped_ptr;
} }
// TODO(liuqi) there is something wrong with half type. // TODO(liuqi) there is something wrong with half type.
void *OpenCLAllocator::MapImage(void *buffer, void *OpenCLAllocator::MapImage(void *buffer,
const std::vector<size_t> &image_shape, const std::vector<size_t> &image_shape,
std::vector<size_t> *mapped_image_pitch) const { std::vector<size_t> *mapped_image_pitch,
bool finish_cmd_queue) const {
MACE_LATENCY_LOGGER(1, "Map OpenCL Image"); MACE_LATENCY_LOGGER(1, "Map OpenCL Image");
MACE_CHECK(image_shape.size() == 2) << "Just support map 2d image"; MACE_CHECK(image_shape.size() == 2) << "Just support map 2d image";
auto cl_image = static_cast<cl::Image2D *>(buffer); void *mapped_ptr = nullptr;
std::array<size_t, 3> origin = {{0, 0, 0}}; #ifdef MACE_ENABLE_RPCMEM
std::array<size_t, 3> region = {{image_shape[0], image_shape[1], 1}}; if (opencl_runtime_->ion_type() == IONType::QUALCOMM_ION) {
// TODO(libin): Set mapped_image_pitch if needed
mapped_image_pitch->resize(2); auto it = cl_to_host_map_.find(buffer);
cl_int error; MACE_CHECK(it != cl_to_host_map_.end(), "Try to map unallocated Image!");
void *mapped_ptr = opencl_runtime_->command_queue().enqueueMapImage( mapped_ptr = it->second;
*cl_image, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, origin, region,
mapped_image_pitch->data(), mapped_image_pitch->data() + 1, nullptr, if (finish_cmd_queue) {
nullptr, &error); opencl_runtime_->command_queue().finish();
if (error != CL_SUCCESS) { }
LOG(ERROR) << "Map Image failed, error: " << OpenCLErrorToString(error);
mapped_ptr = nullptr; if (opencl_runtime_->qcom_host_cache_policy() ==
CL_MEM_HOST_WRITEBACK_QCOM) {
MACE_CHECK(rpcmem_sync_cache(mapped_ptr, RPCMEM_SYNC_START) == 0);
}
} else {
#endif // MACE_ENABLE_RPCMEM
MACE_UNUSED(finish_cmd_queue);
auto cl_image = static_cast<cl::Image2D *>(buffer);
std::array<size_t, 3> origin = {{0, 0, 0}};
std::array<size_t, 3> region = {{image_shape[0], image_shape[1], 1}};
mapped_image_pitch->resize(2);
cl_int error;
mapped_ptr = opencl_runtime_->command_queue().enqueueMapImage(
*cl_image, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, origin, region,
mapped_image_pitch->data(), mapped_image_pitch->data() + 1, nullptr,
nullptr, &error);
if (error != CL_SUCCESS) {
LOG(ERROR) << "Map Image failed, error: " << OpenCLErrorToString(error);
}
#ifdef MACE_ENABLE_RPCMEM
} }
#endif // MACE_ENABLE_RPCMEM
return mapped_ptr; return mapped_ptr;
} }
void OpenCLAllocator::Unmap(void *buffer, void *mapped_ptr) const { void OpenCLAllocator::Unmap(void *buffer, void *mapped_ptr) const {
MACE_LATENCY_LOGGER(1, "Unmap OpenCL buffer/Image"); MACE_LATENCY_LOGGER(1, "Unmap OpenCL buffer/Image");
auto cl_buffer = static_cast<cl::Buffer *>(buffer); #ifdef MACE_ENABLE_RPCMEM
auto queue = opencl_runtime_->command_queue(); if (opencl_runtime_->ion_type() == IONType::QUALCOMM_ION) {
cl_int error = queue.enqueueUnmapMemObject(*cl_buffer, mapped_ptr, if (opencl_runtime_->qcom_host_cache_policy() ==
nullptr, nullptr); CL_MEM_HOST_WRITEBACK_QCOM) {
if (error != CL_SUCCESS) { MACE_CHECK(rpcmem_sync_cache(mapped_ptr, RPCMEM_SYNC_END) == 0);
LOG(ERROR) << "Unmap buffer failed, error: " << OpenCLErrorToString(error); }
} else {
#endif // MACE_ENABLE_RPCMEM
auto cl_buffer = static_cast<cl::Buffer *>(buffer);
auto queue = opencl_runtime_->command_queue();
cl_int error = queue.enqueueUnmapMemObject(*cl_buffer, mapped_ptr,
nullptr, nullptr);
if (error != CL_SUCCESS) {
LOG(ERROR) << "Unmap buffer failed, error: "
<< OpenCLErrorToString(error);
}
#ifdef MACE_ENABLE_RPCMEM
} }
#endif // MACE_ENABLE_RPCMEM
} }
bool OpenCLAllocator::OnHost() const { return false; } bool OpenCLAllocator::OnHost() const { return false; }
#ifdef MACE_ENABLE_RPCMEM
void OpenCLAllocator::CreateQualcommBufferIONHostPtr(
const size_t nbytes,
cl_mem_ion_host_ptr *ion_host) {
void *host = rpcmem_alloc(RPCMEM_HEAP_ID_SYSTEM, RPCMEM_FLAG_CACHED,
nbytes + opencl_runtime_->qcom_ext_mem_padding());
MACE_CHECK_NOTNULL(host);
auto host_addr = reinterpret_cast<std::uintptr_t>(host);
auto page_size = opencl_runtime_->qcom_page_size();
MACE_CHECK(host_addr % page_size == 0, "ION memory address: ", host_addr,
" must be aligned to page size: ", page_size);
int fd = rpcmem_to_fd(host);
MACE_CHECK(fd >= 0, "Invalid rpcmem file descriptor: ", fd);
ion_host->ext_host_ptr.allocation_type = CL_MEM_ION_HOST_PTR_QCOM;
ion_host->ext_host_ptr.host_cache_policy =
opencl_runtime_->qcom_host_cache_policy();
ion_host->ion_filedesc = fd;
ion_host->ion_hostptr = host;
}
void OpenCLAllocator::CreateQualcommImageIONHostPtr(
const std::vector<size_t> &shape,
const cl::ImageFormat &format,
size_t *pitch,
cl_mem_ion_host_ptr *ion_host) {
cl_int error = clGetDeviceImageInfoQCOM(
opencl_runtime_->device().get(), shape[0], shape[1], &format,
CL_IMAGE_ROW_PITCH, sizeof(*pitch), pitch, nullptr);
MACE_CHECK(error == CL_SUCCESS, "clGetDeviceImageInfoQCOM failed, error: ",
OpenCLErrorToString(error));
CreateQualcommBufferIONHostPtr(*pitch * shape[1], ion_host);
}
#endif // MACE_ENABLE_RPCMEM
} // namespace mace } // namespace mace
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#define MACE_CORE_RUNTIME_OPENCL_OPENCL_ALLOCATOR_H_ #define MACE_CORE_RUNTIME_OPENCL_OPENCL_ALLOCATOR_H_
#include <memory> #include <memory>
#include <unordered_map>
#include <vector> #include <vector>
#include "mace/core/allocator.h" #include "mace/core/allocator.h"
...@@ -29,7 +30,7 @@ class OpenCLAllocator : public Allocator { ...@@ -29,7 +30,7 @@ class OpenCLAllocator : public Allocator {
~OpenCLAllocator() override; ~OpenCLAllocator() override;
MaceStatus New(size_t nbytes, void **result) const override; MaceStatus New(size_t nbytes, void **result) override;
/* /*
* Use Image2D with RGBA (128-bit) format to represent the image. * Use Image2D with RGBA (128-bit) format to represent the image.
...@@ -38,23 +39,37 @@ class OpenCLAllocator : public Allocator { ...@@ -38,23 +39,37 @@ class OpenCLAllocator : public Allocator {
*/ */
MaceStatus NewImage(const std::vector<size_t> &image_shape, MaceStatus NewImage(const std::vector<size_t> &image_shape,
const DataType dt, const DataType dt,
void **result) const override; void **result) override;
void Delete(void *buffer) const override; void Delete(void *buffer) override;
void DeleteImage(void *buffer) const override; void DeleteImage(void *buffer) override;
void *Map(void *buffer, size_t offset, size_t nbytes) const override; void *Map(void *buffer,
size_t offset,
size_t nbytes,
bool finish_cmd_queue) const override;
void *MapImage(void *buffer, void *MapImage(void *buffer,
const std::vector<size_t> &image_shape, const std::vector<size_t> &image_shape,
std::vector<size_t> *mapped_image_pitch) const override; std::vector<size_t> *mapped_image_pitch,
bool finish_cmd_queue) const override;
void Unmap(void *buffer, void *mapped_ptr) const override; void Unmap(void *buffer, void *mapped_ptr) const override;
bool OnHost() const override; bool OnHost() const override;
private: private:
#ifdef MACE_ENABLE_RPCMEM
void CreateQualcommBufferIONHostPtr(const size_t nbytes,
cl_mem_ion_host_ptr *ion_host);
void CreateQualcommImageIONHostPtr(const std::vector<size_t> &shape,
const cl::ImageFormat &format,
size_t *pitch,
cl_mem_ion_host_ptr *ion_host);
std::unordered_map<void *, void *> cl_to_host_map_;
#endif // MACE_ENABLE_RPCMEM
OpenCLRuntime *opencl_runtime_; OpenCLRuntime *opencl_runtime_;
}; };
......
...@@ -37,4 +37,8 @@ typedef cl_uint cl_priority_hint; ...@@ -37,4 +37,8 @@ typedef cl_uint cl_priority_hint;
/* Accepted by clGetKernelWorkGroupInfo */ /* Accepted by clGetKernelWorkGroupInfo */
#define CL_KERNEL_WAVE_SIZE_QCOM 0xAA02 #define CL_KERNEL_WAVE_SIZE_QCOM 0xAA02
// Cache policy specifying io-coherence
#define CL_MEM_HOST_IOCOHERENT_QCOM 0x40A9
#endif // MACE_CORE_RUNTIME_OPENCL_OPENCL_EXTENSION_H_ #endif // MACE_CORE_RUNTIME_OPENCL_OPENCL_EXTENSION_H_
...@@ -230,6 +230,37 @@ GPUType ParseGPUType(const std::string &device_name) { ...@@ -230,6 +230,37 @@ GPUType ParseGPUType(const std::string &device_name) {
} }
} }
#ifdef MACE_ENABLE_RPCMEM
IONType ParseIONType(const std::string &device_extensions) {
constexpr const char *kQualcommIONStr = "cl_qcom_ion_host_ptr";
if (device_extensions.find(kQualcommIONStr) != std::string::npos) {
return IONType::QUALCOMM_ION;
} else {
return IONType::NONE_ION;
}
}
uint32_t ParseQcomHostCachePolicy(const std::string &device_extensions) {
constexpr const char *kQualcommIocoherentStr =
"cl_qcom_ext_host_ptr_iocoherent";
if (device_extensions.find(kQualcommIocoherentStr) != std::string::npos) {
return CL_MEM_HOST_IOCOHERENT_QCOM;
} else {
return CL_MEM_HOST_WRITEBACK_QCOM;
}
}
std::string QcomHostCachePolicyToString(uint32_t policy) {
switch (policy) {
case CL_MEM_HOST_IOCOHERENT_QCOM: return "CL_MEM_HOST_IOCOHERENT_QCOM";
case CL_MEM_HOST_WRITEBACK_QCOM: return "CL_MEM_HOST_WRITEBACK_QCOM";
default: return MakeString("UNKNOWN: ", policy);
}
}
#endif // MACE_ENABLE_RPCMEM
const char *kOpenCLPlatformInfoKey = const char *kOpenCLPlatformInfoKey =
"mace_opencl_precompiled_platform_info_key"; "mace_opencl_precompiled_platform_info_key";
} // namespace } // namespace
...@@ -311,6 +342,35 @@ OpenCLRuntime::OpenCLRuntime( ...@@ -311,6 +342,35 @@ OpenCLRuntime::OpenCLRuntime(
return; return;
} }
#ifdef MACE_ENABLE_RPCMEM
const std::string device_extensions =
device.getInfo<CL_DEVICE_EXTENSIONS>();
ion_type_ = ParseIONType(device_extensions);
if (ion_type_ == IONType::QUALCOMM_ION) {
qcom_ext_mem_padding_ = 0;
cl_int err = device.getInfo(CL_DEVICE_EXT_MEM_PADDING_IN_BYTES_QCOM,
&qcom_ext_mem_padding_);
if (err != CL_SUCCESS) {
LOG(ERROR) << "Failed to get CL_DEVICE_EXT_MEM_PADDING_IN_BYTES_QCOM "
<< OpenCLErrorToString(err);
}
qcom_page_size_ = 4096;
err = device.getInfo(CL_DEVICE_PAGE_SIZE_QCOM, &qcom_page_size_);
if (err != CL_SUCCESS) {
LOG(ERROR) << "Failed to get CL_DEVICE_PAGE_SIZE_QCOM: "
<< OpenCLErrorToString(err);
}
qcom_host_cache_policy_ = ParseQcomHostCachePolicy(device_extensions);
VLOG(1) << "Using QUALCOMM ION buffer with padding size: "
<< qcom_ext_mem_padding_ << ", page size: " << qcom_page_size_
<< ", with host cache policy: "
<< QcomHostCachePolicyToString(qcom_host_cache_policy_);
}
#endif // MACE_ENABLE_RPCMEM
VLOG(1) << "Using device: " << device_name; VLOG(1) << "Using device: " << device_name;
break; break;
} }
...@@ -776,6 +836,24 @@ GPUType OpenCLRuntime::gpu_type() const { ...@@ -776,6 +836,24 @@ GPUType OpenCLRuntime::gpu_type() const {
return gpu_type_; return gpu_type_;
} }
#ifdef MACE_ENABLE_RPCMEM
IONType OpenCLRuntime::ion_type() const {
return ion_type_;
}
uint32_t OpenCLRuntime::qcom_ext_mem_padding() const {
return qcom_ext_mem_padding_;
}
uint32_t OpenCLRuntime::qcom_page_size() const {
return qcom_page_size_;
}
uint32_t OpenCLRuntime::qcom_host_cache_policy() const {
return qcom_host_cache_policy_;
}
#endif // MACE_ENABLE_RPCMEM
const std::string OpenCLRuntime::platform_info() const { const std::string OpenCLRuntime::platform_info() const {
return platform_info_; return platform_info_;
} }
......
...@@ -49,6 +49,12 @@ enum OpenCLVersion { ...@@ -49,6 +49,12 @@ enum OpenCLVersion {
CL_VER_2_1, CL_VER_2_1,
}; };
#ifdef MACE_ENABLE_RPCMEM
enum IONType {
QUALCOMM_ION,
NONE_ION,
};
#endif // MACE_ENABLE_RPCMEM
const std::string OpenCLErrorToString(cl_int error); const std::string OpenCLErrorToString(cl_int error);
...@@ -86,6 +92,13 @@ class OpenCLRuntime { ...@@ -86,6 +92,13 @@ class OpenCLRuntime {
Tuner<uint32_t> *tuner(); Tuner<uint32_t> *tuner();
bool is_opencl_avaliable(); bool is_opencl_avaliable();
#ifdef MACE_ENABLE_RPCMEM
IONType ion_type() const;
uint32_t qcom_ext_mem_padding() const;
uint32_t qcom_page_size() const;
uint32_t qcom_host_cache_policy() const;
#endif // MACE_ENABLE_RPCMEM
void GetCallStats(const cl::Event &event, CallStats *stats); void GetCallStats(const cl::Event &event, CallStats *stats);
uint64_t GetDeviceMaxWorkGroupSize(); uint64_t GetDeviceMaxWorkGroupSize();
uint64_t GetDeviceMaxMemAllocSize(); uint64_t GetDeviceMaxMemAllocSize();
...@@ -144,6 +157,13 @@ class OpenCLRuntime { ...@@ -144,6 +157,13 @@ class OpenCLRuntime {
bool out_of_range_check_; bool out_of_range_check_;
uint64_t device_global_mem_cache_size_; uint64_t device_global_mem_cache_size_;
uint32_t device_compute_units_; uint32_t device_compute_units_;
#ifdef MACE_ENABLE_RPCMEM
IONType ion_type_;
uint32_t qcom_ext_mem_padding_;
uint32_t qcom_page_size_;
uint32_t qcom_host_cache_policy_;
#endif // MACE_ENABLE_RPCMEM
}; };
class OpenCLProfilingTimer : public Timer { class OpenCLProfilingTimer : public Timer {
......
...@@ -216,6 +216,15 @@ class OpenCLLibrary final { ...@@ -216,6 +216,15 @@ class OpenCLLibrary final {
using clGetImageInfoFunc = using clGetImageInfoFunc =
cl_int (*)(cl_mem, cl_image_info, size_t, void *, size_t *); cl_int (*)(cl_mem, cl_image_info, size_t, void *, size_t *);
using clGetDeviceImageInfoQCOMFunc = cl_int (*)(cl_device_id,
size_t,
size_t,
const cl_image_format *,
cl_image_pitch_info_qcom,
size_t,
void *,
size_t *);
#define MACE_CL_DEFINE_FUNC_PTR(func) func##Func func = nullptr #define MACE_CL_DEFINE_FUNC_PTR(func) func##Func func = nullptr
MACE_CL_DEFINE_FUNC_PTR(clGetPlatformIDs); MACE_CL_DEFINE_FUNC_PTR(clGetPlatformIDs);
...@@ -265,6 +274,7 @@ class OpenCLLibrary final { ...@@ -265,6 +274,7 @@ class OpenCLLibrary final {
MACE_CL_DEFINE_FUNC_PTR(clGetEventInfo); MACE_CL_DEFINE_FUNC_PTR(clGetEventInfo);
MACE_CL_DEFINE_FUNC_PTR(clGetEventProfilingInfo); MACE_CL_DEFINE_FUNC_PTR(clGetEventProfilingInfo);
MACE_CL_DEFINE_FUNC_PTR(clGetImageInfo); MACE_CL_DEFINE_FUNC_PTR(clGetImageInfo);
MACE_CL_DEFINE_FUNC_PTR(clGetDeviceImageInfoQCOM);
#undef MACE_CL_DEFINE_FUNC_PTR #undef MACE_CL_DEFINE_FUNC_PTR
...@@ -400,6 +410,7 @@ void *OpenCLLibrary::LoadFromPath(const std::string &path) { ...@@ -400,6 +410,7 @@ void *OpenCLLibrary::LoadFromPath(const std::string &path) {
MACE_CL_ASSIGN_FROM_DLSYM(clGetEventInfo); MACE_CL_ASSIGN_FROM_DLSYM(clGetEventInfo);
MACE_CL_ASSIGN_FROM_DLSYM(clGetEventProfilingInfo); MACE_CL_ASSIGN_FROM_DLSYM(clGetEventProfilingInfo);
MACE_CL_ASSIGN_FROM_DLSYM(clGetImageInfo); MACE_CL_ASSIGN_FROM_DLSYM(clGetImageInfo);
MACE_CL_ASSIGN_FROM_DLSYM(clGetDeviceImageInfoQCOM);
#undef MACE_CL_ASSIGN_FROM_DLSYM #undef MACE_CL_ASSIGN_FROM_DLSYM
...@@ -802,6 +813,26 @@ CL_API_ENTRY cl_int clGetImageInfo(cl_mem image, ...@@ -802,6 +813,26 @@ CL_API_ENTRY cl_int clGetImageInfo(cl_mem image,
} }
} }
CL_API_ENTRY cl_int clGetDeviceImageInfoQCOM(
cl_device_id device,
size_t image_width,
size_t image_height,
const cl_image_format *image_format,
cl_image_pitch_info_qcom param_name,
size_t param_value_size,
void *param_value,
size_t *param_value_size_ret)
CL_EXT_SUFFIX__VERSION_1_1 {
auto func = mace::runtime::OpenCLLibrary::Get()->clGetDeviceImageInfoQCOM;
if (func != nullptr) {
MACE_LATENCY_LOGGER(3, "clGetDeviceImageInfoQCOM");
return func(device, image_width, image_height, image_format, param_name,
param_value_size, param_value, param_value_size_ret);
} else {
return CL_INVALID_PLATFORM;
}
}
// Command Queue APIs // Command Queue APIs
CL_API_ENTRY cl_command_queue clCreateCommandQueueWithProperties( CL_API_ENTRY cl_command_queue clCreateCommandQueueWithProperties(
cl_context context, cl_context context,
......
...@@ -426,10 +426,11 @@ class Tensor { ...@@ -426,10 +426,11 @@ class Tensor {
class MappingGuard { class MappingGuard {
public: public:
explicit MappingGuard(const Tensor *tensor) : tensor_(tensor) { explicit MappingGuard(const Tensor *tensor, bool finish_cmd_queue = true) :
tensor_(tensor) {
if (tensor_ != nullptr) { if (tensor_ != nullptr) {
MACE_CHECK_NOTNULL(tensor_->buffer_); MACE_CHECK_NOTNULL(tensor_->buffer_);
tensor_->buffer_->Map(&mapped_image_pitch_); tensor_->buffer_->Map(&mapped_image_pitch_, finish_cmd_queue);
} }
} }
......
...@@ -109,6 +109,12 @@ def if_quantize_enabled(a): ...@@ -109,6 +109,12 @@ def if_quantize_enabled(a):
"//conditions:default": [], "//conditions:default": [],
}) })
def if_rpcmem_enabled(a):
return select({
"//mace:rpcmem_enabled": a,
"//conditions:default": [],
})
def mace_version_genrule(): def mace_version_genrule():
native.genrule( native.genrule(
name = "mace_version_gen", name = "mace_version_gen",
......
...@@ -66,7 +66,9 @@ class OpenCLBufferTransformer { ...@@ -66,7 +66,9 @@ class OpenCLBufferTransformer {
<< " with data type " << dt; << " with data type " << dt;
internal_tensor->Resize(input->shape()); internal_tensor->Resize(input->shape());
const uint8_t *input_ptr = input->data<uint8_t>(); const uint8_t *input_ptr = input->data<uint8_t>();
Tensor::MappingGuard guard(internal_tensor); // No need to finish the opencl command queue to write to the tensor
// from CPU, this can accelerate the mapping if using ION buffer.
Tensor::MappingGuard guard(internal_tensor, false);
uint8_t *internal_ptr = internal_tensor->mutable_data<uint8_t>(); uint8_t *internal_ptr = internal_tensor->mutable_data<uint8_t>();
memcpy(internal_ptr, input_ptr, input->raw_size()); memcpy(internal_ptr, input_ptr, input->raw_size());
// 2. convert the internal GPU Buffer to output. // 2. convert the internal GPU Buffer to output.
......
...@@ -134,14 +134,16 @@ TEST_F(BatchNormOpTest, SimpleRandomOPENCL) { ...@@ -134,14 +134,16 @@ TEST_F(BatchNormOpTest, SimpleRandomOPENCL) {
.AddFloatArg("leakyrelu_coefficient", 0.1) .AddFloatArg("leakyrelu_coefficient", 0.1)
.Finalize(net.NewOperatorDef()); .Finalize(net.NewOperatorDef());
net.Setup(DeviceType::GPU);
// Tuning // Tuning
setenv("MACE_TUNING", "1", 1); setenv("MACE_TUNING", "1", 1);
net.RunOp(DeviceType::GPU); net.Run();
unsetenv("MACE_TUNING"); unsetenv("MACE_TUNING");
// Run on opencl // Run on opencl
net.RunOp(DeviceType::GPU); net.Run();
net.Sync();
ExpectTensorNear<float>(*expected, *net.GetOutput("Output"), ExpectTensorNear<float>(*expected, *net.GetOutput("Output"),
1e-5, 1e-4); 1e-5, 1e-4);
} }
...@@ -200,14 +202,15 @@ TEST_F(BatchNormOpTest, SimpleRandomHalfOPENCL) { ...@@ -200,14 +202,15 @@ TEST_F(BatchNormOpTest, SimpleRandomHalfOPENCL) {
.AddIntArg("T", static_cast<int>(DataType::DT_HALF)) .AddIntArg("T", static_cast<int>(DataType::DT_HALF))
.Finalize(net.NewOperatorDef()); .Finalize(net.NewOperatorDef());
net.Setup(DeviceType::GPU);
// Tuning // Tuning
setenv("MACE_TUNING", "1", 1); setenv("MACE_TUNING", "1", 1);
net.RunOp(DeviceType::GPU); net.Run();
unsetenv("MACE_TUNING"); unsetenv("MACE_TUNING");
// Run on opencl // Run on opencl
net.RunOp(DeviceType::GPU); net.Run();
net.Sync();
ExpectTensorNear<float>(*expected, *net.GetOutput("Output"), ExpectTensorNear<float>(*expected, *net.GetOutput("Output"),
1e-1, 1e-2); 1e-1, 1e-2);
...@@ -266,14 +269,15 @@ TEST_F(BatchNormOpTest, ComplexRandomOPENCL) { ...@@ -266,14 +269,15 @@ TEST_F(BatchNormOpTest, ComplexRandomOPENCL) {
.Output("Output") .Output("Output")
.Finalize(net.NewOperatorDef()); .Finalize(net.NewOperatorDef());
net.Setup(DeviceType::GPU);
// tuning // tuning
setenv("MACE_TUNING", "1", 1); setenv("MACE_TUNING", "1", 1);
net.RunOp(DeviceType::GPU); net.Run();
unsetenv("MACE_TUNING"); unsetenv("MACE_TUNING");
// Run on opencl // Run on opencl
net.RunOp(DeviceType::GPU); net.Run();
net.Sync();
ExpectTensorNear<float>(*expected, *net.GetOutput("Output"), ExpectTensorNear<float>(*expected, *net.GetOutput("Output"),
1e-5, 1e-4); 1e-5, 1e-4);
...@@ -333,14 +337,15 @@ TEST_F(BatchNormOpTest, ComplexRandomHalfOPENCL) { ...@@ -333,14 +337,15 @@ TEST_F(BatchNormOpTest, ComplexRandomHalfOPENCL) {
.AddIntArg("T", static_cast<int>(DataType::DT_HALF)) .AddIntArg("T", static_cast<int>(DataType::DT_HALF))
.Finalize(net.NewOperatorDef()); .Finalize(net.NewOperatorDef());
net.Setup(DeviceType::GPU);
// tuning // tuning
setenv("MACE_TUNING", "1", 1); setenv("MACE_TUNING", "1", 1);
net.RunOp(DeviceType::GPU); net.Run();
unsetenv("MACE_TUNING"); unsetenv("MACE_TUNING");
// Run on opencl // Run on opencl
net.RunOp(DeviceType::GPU); net.Run();
net.Sync();
ExpectTensorNear<float>(*expected, *net.GetOutput("Output"), ExpectTensorNear<float>(*expected, *net.GetOutput("Output"),
1e-1, 1e-2); 1e-1, 1e-2);
......
...@@ -46,6 +46,9 @@ void TestBidirectionTransform(const OpenCLBufferType type, ...@@ -46,6 +46,9 @@ void TestBidirectionTransform(const OpenCLBufferType type,
.Transform(&context, b2i_output, .Transform(&context, b2i_output,
type, MemoryType::GPU_BUFFER, 0, i2b_output); type, MemoryType::GPU_BUFFER, 0, i2b_output);
net.Setup(DeviceType::GPU);
net.Sync();
// Check // Check
ExpectTensorNear<T>(*net.GetOutput("Input"), *net.GetOutput("I2BOutput"), ExpectTensorNear<T>(*net.GetOutput("Input"), *net.GetOutput("I2BOutput"),
1e-5); 1e-5);
...@@ -187,6 +190,9 @@ void TestDiffTypeBidirectionTransform(const OpenCLBufferType type, ...@@ -187,6 +190,9 @@ void TestDiffTypeBidirectionTransform(const OpenCLBufferType type,
.Transform(&context, b2i_output, .Transform(&context, b2i_output,
type, MemoryType::GPU_BUFFER, 0, i2b_output); type, MemoryType::GPU_BUFFER, 0, i2b_output);
net.Setup(DeviceType::GPU);
net.Sync();
// Check // Check
ExpectTensorNear<float>(*net.GetOutput("Input"), *net.GetOutput("I2BOutput"), ExpectTensorNear<float>(*net.GetOutput("Input"), *net.GetOutput("I2BOutput"),
1e-3, 1e-6); 1e-3, 1e-6);
...@@ -227,6 +233,9 @@ void TestStringHalfBidirectionTransform(const OpenCLBufferType type, ...@@ -227,6 +233,9 @@ void TestStringHalfBidirectionTransform(const OpenCLBufferType type,
.Transform(&context, b2i_output, .Transform(&context, b2i_output,
type, MemoryType::GPU_BUFFER, 0, i2b_output); type, MemoryType::GPU_BUFFER, 0, i2b_output);
net.Setup(DeviceType::GPU);
net.Sync();
// Check // Check
ExpectTensorNear<half>(*net.GetOutput("Input"), *net.GetOutput("I2BOutput"), ExpectTensorNear<half>(*net.GetOutput("Input"), *net.GetOutput("I2BOutput"),
1e-3, 1e-6); 1e-3, 1e-6);
......
...@@ -59,6 +59,9 @@ void TestBidirectionTransform(const OpenCLBufferType type, ...@@ -59,6 +59,9 @@ void TestBidirectionTransform(const OpenCLBufferType type,
.Transform(&context, bt_output, .Transform(&context, bt_output,
type, MemoryType::GPU_BUFFER, 0, output); type, MemoryType::GPU_BUFFER, 0, output);
net.Setup(DeviceType::GPU);
net.Sync();
if (DataTypeToEnum<OrgType>::value == DataTypeToEnum<DstType>::value) { if (DataTypeToEnum<OrgType>::value == DataTypeToEnum<DstType>::value) {
EXPECT_EQ(net.GetOutput("Input")->UnderlyingBuffer(), EXPECT_EQ(net.GetOutput("Input")->UnderlyingBuffer(),
net.GetOutput("Output")->UnderlyingBuffer()); net.GetOutput("Output")->UnderlyingBuffer());
...@@ -96,6 +99,9 @@ void TestArgumentTransform(const index_t input_size) { ...@@ -96,6 +99,9 @@ void TestArgumentTransform(const index_t input_size) {
OpenCLBufferType::ARGUMENT, MemoryType::GPU_BUFFER, OpenCLBufferType::ARGUMENT, MemoryType::GPU_BUFFER,
0, output); 0, output);
net.Setup(DeviceType::GPU);
net.Sync();
index_t expected_size = RoundUp<index_t>(input_size, 4); index_t expected_size = RoundUp<index_t>(input_size, 4);
EXPECT_EQ(expected_size, output->buffer_shape()[0]); EXPECT_EQ(expected_size, output->buffer_shape()[0]);
......
...@@ -110,7 +110,7 @@ MaceStatus BufferToImageOpImpl(OpContext *context, ...@@ -110,7 +110,7 @@ MaceStatus BufferToImageOpImpl(OpContext *context,
bool is_out_of_range = false; bool is_out_of_range = false;
if (runtime->IsOutOfRangeCheckEnabled()) { if (runtime->IsOutOfRangeCheckEnabled()) {
oorc_flag->Map(nullptr); oorc_flag->Map(nullptr);
is_out_of_range = *(oorc_flag->mutable_data<char>()) == 1 ? true : false; is_out_of_range = *(oorc_flag->mutable_data<int>()) == 1 ? true : false;
oorc_flag->UnMap(); oorc_flag->UnMap();
} }
return is_out_of_range ? MaceStatus::MACE_OUT_OF_RESOURCES return is_out_of_range ? MaceStatus::MACE_OUT_OF_RESOURCES
......
# These files are generated fron rpcmem project
licenses(["notice"])
exports_files(["license.txt"])
load(
"//mace:mace.bzl",
"if_android_arm64",
"if_android_armv7",
)
cc_library(
name = "rpcmem",
srcs = if_android_armv7([
"armeabi-v7a/rpcmem.a",
]) + if_android_arm64([
"arm64-v8a/rpcmem.a",
]),
hdrs = [
"rpcmem.h",
],
visibility = ["//visibility:public"],
)
/*==============================================================================
Copyright (c) 2012-2013 Qualcomm Technologies, Inc.
All rights reserved. Qualcomm Proprietary and Confidential.
rpcmem.h and rpcmem.a are generated from Hexagon SDK and modified by Xiaomi, Inc.
==============================================================================*/
set(RPCMEM_INSTALL_DIR "${PROJECT_SOURCE_DIR}/third_party/rpcmem")
set(RPCMEM_INCLUDE_DIR "${RPCMEM_INSTALL_DIR}")
include_directories(SYSTEM "${RPCMEM_INCLUDE_DIR}")
set(RPCMEM_LIB "${RPCMEM_INSTALL_DIR}/${ANDROID_ABI}/rpcmem.a")
add_library(rpcmem STATIC IMPORTED GLOBAL)
set_target_properties(rpcmem PROPERTIES IMPORTED_LOCATION ${RPCMEM_LIB})
install(FILES ${RPCMEM_LIB} DESTINATION lib)
/*==============================================================================
Copyright (c) 2012-2013 Qualcomm Technologies, Inc.
All rights reserved. Qualcomm Proprietary and Confidential.
==============================================================================*/
#ifndef RPCMEM_H
#define RPCMEM_H
/**
* RPCMEM_DEFAULT_HEAP
* Dynamicaly select the heap to use. This should be ok for most usecases.
*/
#define RPCMEM_DEFAULT_HEAP -1
/**
* RPCMEM HEAP IDs
* SYSTEM HEAP:
* - non-contiguous physical memory
* - for sub-systems with SMMU
* - recommended for HVX/CDSPs
* CONTIG HEAP:
* - Contiguous physical memory
* - limited memory
* - for sub-systems without SMMU (ex. sDSP and mDSP)
*/
#define RPCMEM_HEAP_ID_SYSTEM (25)
#define RPCMEM_HEAP_ID_CONTIG (22)
#define RPCMEM_HEAP_ID_SECURE (9)
#define RPCMEM_HEAP_ID_SYSTEM_CONTIG (21)
/**
* RPCMEM_DEFAULT_FLAGS should allocate memory with the same properties
* as the ION_FLAG_CACHED flag
*/
#ifdef ION_FLAG_CACHED
#define RPCMEM_DEFAULT_FLAGS ION_FLAG_CACHED
#else
#define RPCMEM_DEFAULT_FLAGS 1
#endif
/**
* RPCMEM_FLAG_UNCACHED
* ION_FLAG_CACHED should be defined as 1
*/
#define RPCMEM_FLAG_UNCACHED 0
#define RPCMEM_FLAG_CACHED RPCMEM_DEFAULT_FLAGS
/**
* examples:
*
* heap 22, uncached, 1kb
* rpcmem_alloc(22, 0, 1024);
* rpcmem_alloc(22, RPCMEM_FLAG_UNCACHED, 1024);
*
* heap 21, cached, 2kb
* rpcmem_alloc(21, RPCMEM_FLAG_CACHED, 2048);
* #include <ion.h>
* rpcmem_alloc(21, ION_FLAG_CACHED, 2048);
*
* just give me the defaults, 2kb
* rpcmem_alloc(RPCMEM_DEFAULT_HEAP, RPCMEM_DEFAULT_FLAGS, 2048);
* rpcmem_alloc_def(2048);
*
* give me the default flags, but from heap 18, 4kb
* rpcmem_alloc(18, RPCMEM_DEFAULT_FLAGS, 4096);
*
*/
#define ION_SECURE_FLAGS ((1 << 31) | (1 << 19))
/**
* To flag start/end for rpcmem_sync_cache
*/
#define RPCMEM_SYNC_START 0
#define RPCMEM_SYNC_END 1
#ifdef __cplusplus
extern "C" {
#endif
/**
* call once to initialize the library
* NOTE: rpcmem_init is not thread safe
*/
void rpcmem_init(void);
/**
* call once for cleanup
* NOTE: rpcmem_deinit is not thread safe
*/
void rpcmem_deinit(void);
/**
* Allocate via ION a buffer of size
* @heapid, the heap id to use
* @flags, ion flags to use to when allocating
* @size, the buffer size to allocate
* @retval, 0 on failure, pointer to buffer on success
*
* For example:
* buf = rpcmem_alloc(RPCMEM_DEFAULT_HEAP, RPCMEM_DEFAULT_FLAGS, size);
*/
void* rpcmem_alloc(int heapid, unsigned int flags, int size);
/**
* allocate with default settings
*/
#if !defined(WINNT) && !defined (_WIN32_WINNT)
__attribute__((unused))
#endif
static __inline void* rpcmem_alloc_def(int size) {
return rpcmem_alloc(RPCMEM_DEFAULT_HEAP, RPCMEM_DEFAULT_FLAGS, size);
}
/**
* free buffer, ignores invalid buffers
*/
void rpcmem_free(void* po);
/**
* returns associated fd
*/
int rpcmem_to_fd(void* po);
/**
* cache coherency management
*/
int rpcmem_sync_cache(void* po, unsigned int flags);
#ifdef __cplusplus
}
#endif
/** these are deprecated
*/
#define RPCMEM_HEAP_DEFAULT 0x80000000
#define RPCMEM_HEAP_NOREG 0x40000000
#define RPCMEM_HEAP_UNCACHED 0x20000000
#define RPCMEM_HEAP_NOVA 0x10000000
#define RPCMEM_HEAP_NONCOHERENT 0x08000000
#endif //RPCMEM_H
...@@ -51,6 +51,7 @@ include(${PROJECT_SOURCE_DIR}/third_party/opencl-headers/opencl-headers.cmake) ...@@ -51,6 +51,7 @@ include(${PROJECT_SOURCE_DIR}/third_party/opencl-headers/opencl-headers.cmake)
include(${PROJECT_SOURCE_DIR}/third_party/protobuf/protobuf.cmake) include(${PROJECT_SOURCE_DIR}/third_party/protobuf/protobuf.cmake)
include(${PROJECT_SOURCE_DIR}/third_party/tflite/tflite.cmake) include(${PROJECT_SOURCE_DIR}/third_party/tflite/tflite.cmake)
include(${PROJECT_SOURCE_DIR}/third_party/caffe/caffe.cmake) include(${PROJECT_SOURCE_DIR}/third_party/caffe/caffe.cmake)
include(${PROJECT_SOURCE_DIR}/third_party/rpcmem/rpcmem.cmake)
if(MACE_ENABLE_HEXAGON_DSP) if(MACE_ENABLE_HEXAGON_DSP)
include(${PROJECT_SOURCE_DIR}/third_party/nnlib/nnlib.cmake) include(${PROJECT_SOURCE_DIR}/third_party/nnlib/nnlib.cmake)
......
...@@ -34,12 +34,12 @@ mkdir -p $LIB_DIR/aarch64_linux_gnu/cpu_gpu ...@@ -34,12 +34,12 @@ mkdir -p $LIB_DIR/aarch64_linux_gnu/cpu_gpu
# build shared libraries # build shared libraries
echo "build shared lib for armeabi-v7a + cpu_gpu_dsp" echo "build shared lib for armeabi-v7a + cpu_gpu_dsp"
bazel build --config android --config optimization mace/libmace:libmace_dynamic --define neon=true --define opencl=true --define hexagon=true --define quantize=true --cpu=armeabi-v7a bazel build --config android --config optimization mace/libmace:libmace_dynamic --define neon=true --define opencl=true --define hexagon=true --define quantize=true --cpu=armeabi-v7a --define rpcmem=true
cp bazel-bin/mace/libmace/libmace.so $LIB_DIR/armeabi-v7a/cpu_gpu_dsp/ cp bazel-bin/mace/libmace/libmace.so $LIB_DIR/armeabi-v7a/cpu_gpu_dsp/
cp third_party/nnlib/armeabi-v7a/*so $LIB_DIR/armeabi-v7a/cpu_gpu_dsp/ cp third_party/nnlib/armeabi-v7a/*so $LIB_DIR/armeabi-v7a/cpu_gpu_dsp/
echo "build shared lib for arm64-v8a + cpu_gpu_dsp" echo "build shared lib for arm64-v8a + cpu_gpu_dsp"
bazel build --config android --config optimization mace/libmace:libmace_dynamic --define neon=true --define opencl=true --define hexagon=true --define quantize=true --cpu=arm64-v8a bazel build --config android --config optimization mace/libmace:libmace_dynamic --define neon=true --define opencl=true --define hexagon=true --define quantize=true --cpu=arm64-v8a --define rpcmem=true
cp bazel-bin/mace/libmace/libmace.so $LIB_DIR/arm64-v8a/cpu_gpu_dsp/ cp bazel-bin/mace/libmace/libmace.so $LIB_DIR/arm64-v8a/cpu_gpu_dsp/
cp third_party/nnlib/arm64-v8a/*so $LIB_DIR/arm64-v8a/cpu_gpu_dsp/ cp third_party/nnlib/arm64-v8a/*so $LIB_DIR/arm64-v8a/cpu_gpu_dsp/
...@@ -49,11 +49,11 @@ cp bazel-bin/mace/libmace/libmace.so $LIB_DIR/arm64-v8a/cpu_gpu_apu/ ...@@ -49,11 +49,11 @@ cp bazel-bin/mace/libmace/libmace.so $LIB_DIR/arm64-v8a/cpu_gpu_apu/
cp third_party/apu/*so $LIB_DIR/arm64-v8a/cpu_gpu_apu/ cp third_party/apu/*so $LIB_DIR/arm64-v8a/cpu_gpu_apu/
echo "build shared lib for armeabi-v7a + cpu_gpu" echo "build shared lib for armeabi-v7a + cpu_gpu"
bazel build --config android --config optimization mace/libmace:libmace_dynamic --define neon=true --define opencl=true --define quantize=true --cpu=armeabi-v7a bazel build --config android --config optimization mace/libmace:libmace_dynamic --define neon=true --define opencl=true --define quantize=true --cpu=armeabi-v7a --define rpcmem=true
cp bazel-bin/mace/libmace/libmace.so $LIB_DIR/armeabi-v7a/cpu_gpu/ cp bazel-bin/mace/libmace/libmace.so $LIB_DIR/armeabi-v7a/cpu_gpu/
echo "build shared lib for arm64-v8a + cpu_gpu" echo "build shared lib for arm64-v8a + cpu_gpu"
bazel build --config android --config optimization mace/libmace:libmace_dynamic --define neon=true --define opencl=true --define quantize=true --cpu=arm64-v8a bazel build --config android --config optimization mace/libmace:libmace_dynamic --define neon=true --define opencl=true --define quantize=true --cpu=arm64-v8a --define rpcmem=true
cp bazel-bin/mace/libmace/libmace.so $LIB_DIR/arm64-v8a/cpu_gpu/ cp bazel-bin/mace/libmace/libmace.so $LIB_DIR/arm64-v8a/cpu_gpu/
echo "build shared lib for arm_linux_gnueabihf + cpu_gpu" echo "build shared lib for arm_linux_gnueabihf + cpu_gpu"
...@@ -72,12 +72,12 @@ fi ...@@ -72,12 +72,12 @@ fi
# build static libraries # build static libraries
echo "build static lib for armeabi-v7a + cpu_gpu_dsp" echo "build static lib for armeabi-v7a + cpu_gpu_dsp"
bazel build --config android --config optimization mace/libmace:libmace_static --config symbol_hidden --define neon=true --define opencl=true --define hexagon=true --define quantize=true --cpu=armeabi-v7a bazel build --config android --config optimization mace/libmace:libmace_static --config symbol_hidden --define neon=true --define opencl=true --define hexagon=true --define quantize=true --cpu=armeabi-v7a --define rpcmem=true
cp bazel-genfiles/mace/libmace/libmace.a $LIB_DIR/armeabi-v7a/cpu_gpu_dsp/ cp bazel-genfiles/mace/libmace/libmace.a $LIB_DIR/armeabi-v7a/cpu_gpu_dsp/
cp third_party/nnlib/armeabi-v7a/*so $LIB_DIR/armeabi-v7a/cpu_gpu_dsp/ cp third_party/nnlib/armeabi-v7a/*so $LIB_DIR/armeabi-v7a/cpu_gpu_dsp/
echo "build static lib for arm64-v8a + cpu_gpu_dsp" echo "build static lib for arm64-v8a + cpu_gpu_dsp"
bazel build --config android --config optimization mace/libmace:libmace_static --config symbol_hidden --define neon=true --define opencl=true --define hexagon=true --define quantize=true --cpu=arm64-v8a bazel build --config android --config optimization mace/libmace:libmace_static --config symbol_hidden --define neon=true --define opencl=true --define hexagon=true --define quantize=true --cpu=arm64-v8a --define rpcmem=true
cp bazel-genfiles/mace/libmace/libmace.a $LIB_DIR/arm64-v8a/cpu_gpu_dsp/ cp bazel-genfiles/mace/libmace/libmace.a $LIB_DIR/arm64-v8a/cpu_gpu_dsp/
cp third_party/nnlib/arm64-v8a/*so $LIB_DIR/arm64-v8a/cpu_gpu_dsp/ cp third_party/nnlib/arm64-v8a/*so $LIB_DIR/arm64-v8a/cpu_gpu_dsp/
...@@ -87,11 +87,11 @@ cp bazel-genfiles/mace/libmace/libmace.a $LIB_DIR/arm64-v8a/cpu_gpu_apu/ ...@@ -87,11 +87,11 @@ cp bazel-genfiles/mace/libmace/libmace.a $LIB_DIR/arm64-v8a/cpu_gpu_apu/
cp third_party/apu/*so $LIB_DIR/arm64-v8a/cpu_gpu_apu/ cp third_party/apu/*so $LIB_DIR/arm64-v8a/cpu_gpu_apu/
echo "build static lib for armeabi-v7a + cpu_gpu" echo "build static lib for armeabi-v7a + cpu_gpu"
bazel build --config android --config optimization mace/libmace:libmace_static --config symbol_hidden --define neon=true --define opencl=true --define quantize=true --cpu=armeabi-v7a bazel build --config android --config optimization mace/libmace:libmace_static --config symbol_hidden --define neon=true --define opencl=true --define quantize=true --cpu=armeabi-v7a --define rpcmem=true
cp bazel-genfiles/mace/libmace/libmace.a $LIB_DIR/armeabi-v7a/cpu_gpu/ cp bazel-genfiles/mace/libmace/libmace.a $LIB_DIR/armeabi-v7a/cpu_gpu/
echo "build static lib for arm64-v8a + cpu_gpu" echo "build static lib for arm64-v8a + cpu_gpu"
bazel build --config android --config optimization mace/libmace:libmace_static --config symbol_hidden --define neon=true --define opencl=true --define quantize=true --cpu=arm64-v8a bazel build --config android --config optimization mace/libmace:libmace_static --config symbol_hidden --define neon=true --define opencl=true --define quantize=true --cpu=arm64-v8a --define rpcmem=true
cp bazel-genfiles/mace/libmace/libmace.a $LIB_DIR/arm64-v8a/cpu_gpu/ cp bazel-genfiles/mace/libmace/libmace.a $LIB_DIR/arm64-v8a/cpu_gpu/
echo "build static lib for arm_linux_gnueabihf + cpu_gpu" echo "build static lib for arm_linux_gnueabihf + cpu_gpu"
......
...@@ -95,6 +95,11 @@ def parse_args(): ...@@ -95,6 +95,11 @@ def parse_args():
type=str2bool, type=str2bool,
default=True, default=True,
help="Whether to use quantization ops") help="Whether to use quantization ops")
parser.add_argument(
"--enable_rpcmem",
type=str2bool,
default=True,
help="Whether to use rpcmem")
parser.add_argument( parser.add_argument(
'--address_sanitizer', '--address_sanitizer',
action="store_true", action="store_true",
...@@ -164,6 +169,7 @@ def main(unused_args): ...@@ -164,6 +169,7 @@ def main(unused_args):
toolchain=toolchain, toolchain=toolchain,
enable_neon=FLAGS.enable_neon, enable_neon=FLAGS.enable_neon,
enable_quantize=FLAGS.enable_quantize, enable_quantize=FLAGS.enable_quantize,
enable_rpcmem=FLAGS.enable_rpcmem,
address_sanitizer=FLAGS.address_sanitizer, address_sanitizer=FLAGS.address_sanitizer,
debug_mode=FLAGS.debug_mode) debug_mode=FLAGS.debug_mode)
if FLAGS.run_target: if FLAGS.run_target:
......
...@@ -43,6 +43,7 @@ cmake -DANDROID_ABI="arm64-v8a" \ ...@@ -43,6 +43,7 @@ cmake -DANDROID_ABI="arm64-v8a" \
-DMACE_ENABLE_TESTS=ON \ -DMACE_ENABLE_TESTS=ON \
-DMACE_ENABLE_BENCHMARKS=ON \ -DMACE_ENABLE_BENCHMARKS=ON \
-DMACE_ENABLE_CODE_MODE=${MACE_ENABLE_CODE_MODE} \ -DMACE_ENABLE_CODE_MODE=${MACE_ENABLE_CODE_MODE} \
-DMACE_ENABLE_RPCMEM=ON \
-DCMAKE_INSTALL_PREFIX=install \ -DCMAKE_INSTALL_PREFIX=install \
../../.. ../../..
make -j6 VERBOSE=1 && make install make -j6 VERBOSE=1 && make install
......
...@@ -45,6 +45,7 @@ cmake -DANDROID_ABI="armeabi-v7a" \ ...@@ -45,6 +45,7 @@ cmake -DANDROID_ABI="armeabi-v7a" \
-DMACE_ENABLE_TESTS=ON \ -DMACE_ENABLE_TESTS=ON \
-DMACE_ENABLE_BENCHMARKS=ON \ -DMACE_ENABLE_BENCHMARKS=ON \
-DMACE_ENABLE_CODE_MODE=${MACE_ENABLE_CODE_MODE} \ -DMACE_ENABLE_CODE_MODE=${MACE_ENABLE_CODE_MODE} \
-DMACE_ENABLE_RPCMEM=ON \
-DCMAKE_INSTALL_PREFIX=install \ -DCMAKE_INSTALL_PREFIX=install \
../../.. ../../..
make -j6 VERBOSE=1 && make install make -j6 VERBOSE=1 && make install
......
...@@ -270,6 +270,7 @@ def bazel_build(target, ...@@ -270,6 +270,7 @@ def bazel_build(target,
enable_neon=True, enable_neon=True,
enable_opencl=True, enable_opencl=True,
enable_quantize=True, enable_quantize=True,
enable_rpcmem=True,
address_sanitizer=False, address_sanitizer=False,
symbol_hidden=True, symbol_hidden=True,
debug_mode=False, debug_mode=False,
...@@ -303,6 +304,8 @@ def bazel_build(target, ...@@ -303,6 +304,8 @@ def bazel_build(target,
"--define", "--define",
"quantize=%s" % str(enable_quantize).lower(), "quantize=%s" % str(enable_quantize).lower(),
"--define", "--define",
"rpcmem=%s" % str(enable_rpcmem).lower(),
"--define",
"hexagon=%s" % str(enable_hexagon).lower(), "hexagon=%s" % str(enable_hexagon).lower(),
"--define", "--define",
"hta=%s" % str(enable_hta).lower(), "hta=%s" % str(enable_hta).lower(),
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册