diff --git a/docker/Dockerfile b/docker/Dockerfile index 32d0cef583f71ff0a19593f3795b441c9103df99..8a8c089c493baa0982fbd68e8fa815ed68ea3e45 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -110,7 +110,8 @@ RUN apt-get install -y --no-install-recommends \ # Install tools RUN pip install -i http://pypi.douban.com/simple/ --trusted-host pypi.douban.com setuptools -RUN pip install -i http://pypi.douban.com/simple/ --trusted-host pypi.douban.com tensorflow==1.6.0 \ +RUN pip install -i http://pypi.douban.com/simple/ --trusted-host pypi.douban.com tensorflow==1.7.0 \ + numpy>=1.14.0 \ scipy \ jinja2 \ pyyaml \ diff --git a/docs/getting_started/how_to_build.rst b/docs/getting_started/how_to_build.rst index 7f2c0d3a3e7030c78dc523b7f7812d0c9360a4d9..43178cf946ac54bc81689bc6e90d993bcc02fc91 100644 --- a/docs/getting_started/how_to_build.rst +++ b/docs/getting_started/how_to_build.rst @@ -33,11 +33,13 @@ How to build +=====================+=================+===================================================================================================+ | bazel | >= 0.5.4 | - | +---------------------+-----------------+---------------------------------------------------------------------------------------------------+ -| android-ndk | r12c | - | +| android-ndk | r15c,r16b | - | +---------------------+-----------------+---------------------------------------------------------------------------------------------------+ | adb | >= 1.0.32 | apt install -y android-tools-adb | +---------------------+-----------------+---------------------------------------------------------------------------------------------------+ -| tensorflow | 1.4.0 | pip install tensorflow==1.4.0 | +| tensorflow | 1.7.0 | pip install tensorflow==1.7.0 | ++---------------------+-----------------+---------------------------------------------------------------------------------------------------+ +| numpy | >= 1.14.0 | pip install numpy | +---------------------+-----------------+---------------------------------------------------------------------------------------------------+ | scipy | >= 1.0.0 | pip install scipy | +---------------------+-----------------+---------------------------------------------------------------------------------------------------+ @@ -45,6 +47,10 @@ How to build +---------------------+-----------------+---------------------------------------------------------------------------------------------------+ | PyYaml | >= 3.12 | pip install pyyaml | +---------------------+-----------------+---------------------------------------------------------------------------------------------------+ +| sh | >= 1.12.14 | pip install sh | ++---------------------+-----------------+---------------------------------------------------------------------------------------------------+ +| filelock | >= 3.0.0 | pip install filelock | ++---------------------+-----------------+---------------------------------------------------------------------------------------------------+ | docker(for caffe) | >= 17.09.0-ce | `install doc `__ | +---------------------+-----------------+---------------------------------------------------------------------------------------------------+ @@ -229,29 +235,47 @@ Caffe目前只支持最新版本,旧版本请使用Caffe的工具进行升级 // 引入头文件 #include "mace/public/mace.h" - #include "mace/public/{MODEL_TAG}.h" + #include "mace/public/mace_engine_factory.h" - // 0. 设置内部存储 + // 0. 设置内部存储(设置一次即可) const std::string file_path ="/path/to/store/internel/files"; std::shared_ptr storage_factory( new FileStorageFactory(file_path)); ConfigKVStorageFactory(storage_factory); - //1. 从文件或代码中Load模型数据,也可通过自定义的方式来Load (例如可自己实现压缩加密等) - // 如果使用的是数据嵌入的方式,将参数设为nullptr。 - unsigned char *model_data = mace::MACE_MODEL_TAG::LoadModelData(FLAGS_model_data_file.c_str()); + //1. 声明设备类型(必须与build时指定的runtime一致) + DeviceType device_type = DeviceType::GPU; - //2. 创建net对象 - NetDef net_def = mace::MACE_MODEL_TAG::CreateNet(model_data); - - //3. 声明设备类型(必须与build时指定的runtime一致) - DeviceType device_type = DeviceType::OPENCL; - - //4. 定义输入输出名称数组 + //2. 定义输入输出名称数组 std::vector input_names = {...}; std::vector output_names = {...}; - //5. 创建输入输出对象 + //3. 创建MaceEngine对象 + std::shared_ptr engine; + MaceStatus create_engine_status; + // Create Engine + if (model_data_file.empty()) { + create_engine_status = + CreateMaceEngine(model_name.c_str(), + nullptr, + input_names, + output_names, + device_type, + &engine); + } else { + create_engine_status = + CreateMaceEngine(model_name.c_str(), + model_data_file.c_str(), + input_names, + output_names, + device_type, + &engine); + } + if (create_engine_status != MaceStatus::MACE_SUCCESS) { + // do something + } + + //4. 创建输入输出对象 std::map inputs; std::map outputs; for (size_t i = 0; i < input_count; ++i) { @@ -276,14 +300,6 @@ Caffe目前只支持最新版本,旧版本请使用Caffe的工具进行升级 outputs[output_names[i]] = mace::MaceTensor(output_shapes[i], buffer_out); } - //6. 创建MaceEngine对象 - mace::MaceEngine engine(&net_def, device_type, input_names, output_names); - - //7. 如果设备类型是OPENCL或HEXAGON,可以在此释放model_data - if (device_type == DeviceType::OPENCL || device_type == DeviceType::HEXAGON) { - mace::MACE_MODEL_TAG::UnloadModelData(model_data); - } - - //8. 执行模型,得到结果 + //5. 执行模型,得到结果 engine.Run(inputs, &outputs); diff --git a/mace/core/allocator.h b/mace/core/allocator.h index 8c73025b4923cd860f3a47f3109cc4325728b259..7ab701ddd21b15b0bb88b258d9b2f85801b8dda2 100644 --- a/mace/core/allocator.h +++ b/mace/core/allocator.h @@ -16,6 +16,7 @@ #define MACE_CORE_ALLOCATOR_H_ #include +#include #include #include #include @@ -42,9 +43,10 @@ class Allocator { public: Allocator() {} virtual ~Allocator() noexcept {} - virtual void *New(size_t nbytes) const = 0; - virtual void *NewImage(const std::vector &image_shape, - const DataType dt) const = 0; + virtual MaceStatus New(size_t nbytes, void **result) const = 0; + virtual MaceStatus NewImage(const std::vector &image_shape, + const DataType dt, + void **result) const = 0; virtual void Delete(void *data) const = 0; virtual void DeleteImage(void *data) const = 0; virtual void *Map(void *buffer, size_t offset, size_t nbytes) const = 0; @@ -53,44 +55,54 @@ class Allocator { std::vector *mapped_image_pitch) const = 0; virtual void Unmap(void *buffer, void *mapper_ptr) const = 0; virtual bool OnHost() const = 0; - - template - T *New(size_t num_elements) { - if (num_elements > (std::numeric_limits::max() / sizeof(T))) { - return nullptr; - } - void *p = New(sizeof(T) * num_elements); - T *typed_p = reinterpret_cast(p); - return typed_p; - } }; class CPUAllocator : public Allocator { public: ~CPUAllocator() override {} - void *New(size_t nbytes) const override { + MaceStatus New(size_t nbytes, void **result) const override { VLOG(3) << "Allocate CPU buffer: " << nbytes; + if (nbytes == 0) { + return MaceStatus::MACE_SUCCESS; + } void *data = nullptr; #if defined(__ANDROID__) || defined(__hexagon__) data = memalign(kMaceAlignment, nbytes); + if (data == NULL) { + LOG(WARNING) << "Allocate CPU Buffer with " + << nbytes << " bytes failed because of" + << strerror(errno); + *result = nullptr; + return MaceStatus::MACE_OUT_OF_RESOURCES; + } #else - MACE_CHECK(posix_memalign(&data, kMaceAlignment, nbytes) == 0); + int ret = posix_memalign(&data, kMaceAlignment, nbytes); + if (ret != 0) { + LOG(WARNING) << "Allocate CPU Buffer with " + << nbytes << " bytes failed because of" + << strerror(errno); + *result = nullptr; + return MaceStatus::MACE_OUT_OF_RESOURCES; + } #endif - MACE_CHECK_NOTNULL(data); // TODO(heliangliang) This should be avoided sometimes memset(data, 0, nbytes); - return data; + *result = data; + return MaceStatus::MACE_SUCCESS; } - void *NewImage(const std::vector &shape, - const DataType dt) const override { + MaceStatus NewImage(const std::vector &shape, + const DataType dt, + void **result) const override { MACE_UNUSED(shape); MACE_UNUSED(dt); + MACE_UNUSED(result); LOG(FATAL) << "Allocate CPU image"; - return nullptr; + return MaceStatus::MACE_SUCCESS; } void Delete(void *data) const override { + MACE_CHECK_NOTNULL(data); VLOG(3) << "Free CPU buffer"; free(data); } diff --git a/mace/core/buffer.h b/mace/core/buffer.h index f4b252a776296b1e065816c3a9b6288d13d03837..afac3f296b1bb44798a1261f639bb612336317ae 100644 --- a/mace/core/buffer.h +++ b/mace/core/buffer.h @@ -38,6 +38,11 @@ class BufferBase { virtual void *raw_mutable_data() = 0; + virtual MaceStatus Allocate(index_t nbytes) = 0; + + virtual MaceStatus Allocate(const std::vector &shape, + DataType data_type) = 0; + virtual void *Map(index_t offset, index_t length, std::vector *pitch) const = 0; @@ -48,7 +53,7 @@ class BufferBase { virtual void UnMap() = 0; - virtual void Resize(index_t size) = 0; + virtual MaceStatus Resize(index_t nbytes) = 0; virtual void Copy(void *src, index_t offset, index_t length) = 0; @@ -83,14 +88,6 @@ class Buffer : public BufferBase { mapped_buf_(nullptr), is_data_owner_(true) {} - Buffer(Allocator *allocator, index_t size) - : BufferBase(size), - allocator_(allocator), - mapped_buf_(nullptr), - is_data_owner_(true) { - buf_ = allocator->New(size); - } - Buffer(Allocator *allocator, void *data, index_t size) : BufferBase(size), allocator_(allocator), @@ -132,6 +129,31 @@ class Buffer : public BufferBase { } } + MaceStatus Allocate(index_t nbytes) { + if (nbytes <= 0) { + return MaceStatus::MACE_SUCCESS; + } + MACE_CHECK(is_data_owner_, + "data is not owned by this buffer, cannot reallocate"); + if (mapped_buf_ != nullptr) { + UnMap(); + } + if (buf_ != nullptr) { + allocator_->Delete(buf_); + } + size_ = nbytes; + return allocator_->New(nbytes, &buf_); + } + + MaceStatus Allocate(const std::vector &shape, + DataType data_type) { + if (shape.empty()) return MaceStatus::MACE_SUCCESS; + index_t nbytes = std::accumulate(shape.begin(), shape.end(), + 1, std::multiplies()) + * GetEnumTypeSize(data_type); + return this->Allocate(nbytes); + } + void *Map(index_t offset, index_t length, std::vector *pitch) const { MACE_CHECK_NOTNULL(buf_); MACE_UNUSED(pitch); @@ -154,16 +176,17 @@ class Buffer : public BufferBase { mapped_buf_ = nullptr; } - void Resize(index_t size) { + MaceStatus Resize(index_t nbytes) { MACE_CHECK(is_data_owner_, "data is not owned by this buffer, cannot resize"); - if (size != size_) { + if (nbytes != size_) { if (buf_ != nullptr) { allocator_->Delete(buf_); } - size_ = size; - buf_ = allocator_->New(size); + size_ = nbytes; + return allocator_->New(nbytes, &buf_); } + return MaceStatus::MACE_SUCCESS; } void Copy(void *src, index_t offset, index_t length) { @@ -195,18 +218,6 @@ class Image : public BufferBase { buf_(nullptr), mapped_buf_(nullptr) {} - Image(std::vector shape, DataType data_type) - : BufferBase( - std::accumulate( - shape.begin(), shape.end(), 1, std::multiplies()) * - GetEnumTypeSize(data_type)), - allocator_(GetDeviceAllocator(GPU)), - mapped_buf_(nullptr) { - shape_ = shape; - data_type_ = data_type; - buf_ = allocator_->NewImage(shape, data_type); - } - virtual ~Image() { if (mapped_buf_ != nullptr) { UnMap(); @@ -233,6 +244,29 @@ class Image : public BufferBase { std::vector image_shape() const { return shape_; } + MaceStatus Allocate(index_t nbytes) { + MACE_UNUSED(nbytes); + LOG(FATAL) << "Image should not call this allocate function"; + return MaceStatus::MACE_SUCCESS; + } + + MaceStatus Allocate(const std::vector &shape, + DataType data_type) { + index_t size = std::accumulate( + shape.begin(), shape.end(), 1, std::multiplies()) * + GetEnumTypeSize(data_type); + if (mapped_buf_ != nullptr) { + UnMap(); + } + if (buf_ != nullptr) { + allocator_->DeleteImage(buf_); + } + size_ = size; + shape_ = shape; + data_type_ = data_type; + return allocator_->NewImage(shape, data_type, &buf_); + } + void *Map(index_t offset, index_t length, std::vector *pitch) const { MACE_UNUSED(offset); MACE_UNUSED(length); @@ -259,9 +293,10 @@ class Image : public BufferBase { mapped_buf_ = nullptr; } - void Resize(index_t size) { + MaceStatus Resize(index_t size) { MACE_UNUSED(size); MACE_NOT_IMPLEMENTED; + return MaceStatus::MACE_SUCCESS; } void Copy(void *src, index_t offset, index_t length) { @@ -339,6 +374,20 @@ class BufferSlice : public BufferBase { } } + MaceStatus Allocate(index_t size) { + MACE_UNUSED(size); + LOG(FATAL) << "BufferSlice should not call allocate function"; + return MaceStatus::MACE_SUCCESS; + } + + MaceStatus Allocate(const std::vector &shape, + DataType data_type) { + MACE_UNUSED(shape); + MACE_UNUSED(data_type); + LOG(FATAL) << "BufferSlice should not call allocate function"; + return MaceStatus::MACE_SUCCESS; + } + void *Map(index_t offset, index_t length, std::vector *pitch) const { MACE_UNUSED(offset); MACE_UNUSED(length); @@ -364,9 +413,10 @@ class BufferSlice : public BufferBase { mapped_buf_ = nullptr; } - void Resize(index_t size) { + MaceStatus Resize(index_t size) { MACE_CHECK(size == size_, "resize buffer slice from ", size_, " to ", size, " is illegal"); + return MaceStatus::MACE_SUCCESS; } void Copy(void *src, index_t offset, index_t length) { @@ -396,20 +446,17 @@ class ScratchBuffer: public Buffer { : Buffer(allocator), offset_(0) {} - ScratchBuffer(Allocator *allocator, index_t size) - : Buffer(allocator, size), - offset_(0) {} - ScratchBuffer(Allocator *allocator, void *data, index_t size) : Buffer(allocator, data, size), offset_(0) {} virtual ~ScratchBuffer() {} - void GrowSize(index_t size) { + MaceStatus GrowSize(index_t size) { if (size > size_) { - Resize(size); + return Resize(size); } + return MaceStatus::MACE_SUCCESS; } BufferSlice Scratch(index_t size) { diff --git a/mace/core/mace.cc b/mace/core/mace.cc index dc9cbaa90ed9e96a4f24fd813caa86d0b0fb64fa..83ab4bd19ae48debffd5585fe9035a150ac24dac 100644 --- a/mace/core/mace.cc +++ b/mace/core/mace.cc @@ -82,12 +82,14 @@ std::shared_ptr MaceTensor::data() { return impl_->data; } // Mace Engine class MaceEngine::Impl { public: - explicit Impl(const NetDef *net_def, - DeviceType device_type, - const std::vector &input_nodes, - const std::vector &output_nodes); + explicit Impl(DeviceType device_type); + ~Impl(); + MaceStatus Init(const NetDef *net_def, + const std::vector &input_nodes, + const std::vector &output_nodes); + MaceStatus Run(const std::map &inputs, std::map *outputs, RunMetadata *run_metadata); @@ -104,10 +106,8 @@ class MaceEngine::Impl { DISABLE_COPY_AND_ASSIGN(Impl); }; -MaceEngine::Impl::Impl(const NetDef *net_def, - DeviceType device_type, - const std::vector &input_nodes, - const std::vector &output_nodes) + +MaceEngine::Impl::Impl(DeviceType device_type) : op_registry_(new OperatorRegistry()), device_type_(device_type), ws_(new Workspace()), @@ -115,7 +115,12 @@ MaceEngine::Impl::Impl(const NetDef *net_def, #ifdef MACE_ENABLE_HEXAGON , hexagon_controller_(nullptr) #endif -{ +{} + +MaceStatus MaceEngine::Impl::Init( + const NetDef *net_def, + const std::vector &input_nodes, + const std::vector &output_nodes) { LOG(INFO) << "MACE version: " << MaceVersion(); // Set storage path for internal usage for (auto input_name : input_nodes) { @@ -127,7 +132,7 @@ MaceEngine::Impl::Impl(const NetDef *net_def, GetDeviceAllocator(device_type_), DT_FLOAT); } #ifdef MACE_ENABLE_HEXAGON - if (device_type == HEXAGON) { + if (device_type_ == HEXAGON) { hexagon_controller_.reset(new HexagonControlWrapper()); MACE_CHECK(hexagon_controller_->Config(), "hexagon config error"); MACE_CHECK(hexagon_controller_->Init(), "hexagon init error"); @@ -143,18 +148,22 @@ MaceEngine::Impl::Impl(const NetDef *net_def, } } else { #endif - ws_->LoadModelTensor(*net_def, device_type); + MaceStatus status = ws_->LoadModelTensor(*net_def, device_type_); + if (status != MaceStatus::MACE_SUCCESS) { + return status; + } - // Init model - auto net = CreateNet(op_registry_, *net_def, ws_.get(), device_type, + // Init model + auto net = CreateNet(op_registry_, *net_def, ws_.get(), device_type_, NetMode::INIT); if (!net->Run()) { LOG(FATAL) << "Net init run failed"; } - net_ = CreateNet(op_registry_, *net_def, ws_.get(), device_type); + net_ = CreateNet(op_registry_, *net_def, ws_.get(), device_type_); #ifdef MACE_ENABLE_HEXAGON } #endif + return MaceStatus::MACE_SUCCESS; } MaceEngine::Impl::~Impl() { @@ -244,16 +253,17 @@ MaceStatus MaceEngine::Impl::Run( return MACE_SUCCESS; } -MaceEngine::MaceEngine(const NetDef *net_def, - DeviceType device_type, - const std::vector &input_nodes, - const std::vector &output_nodes) { - impl_ = std::unique_ptr( - new MaceEngine::Impl(net_def, device_type, input_nodes, output_nodes)); -} +MaceEngine::MaceEngine(DeviceType device_type): + impl_(new MaceEngine::Impl(device_type)) {} MaceEngine::~MaceEngine() = default; +MaceStatus MaceEngine::Init(const NetDef *net_def, + const std::vector &input_nodes, + const std::vector &output_nodes) { + return impl_->Init(net_def, input_nodes, output_nodes); +} + MaceStatus MaceEngine::Run(const std::map &inputs, std::map *outputs, RunMetadata *run_metadata) { diff --git a/mace/core/runtime/opencl/opencl_allocator.cc b/mace/core/runtime/opencl/opencl_allocator.cc index 9b94eb92ec15eb187d76fdf429e14712995baf09..96b083cfcbd0fe9b5fea8cbd2862a69e0700fec5 100644 --- a/mace/core/runtime/opencl/opencl_allocator.cc +++ b/mace/core/runtime/opencl/opencl_allocator.cc @@ -44,18 +44,30 @@ static cl_channel_type DataTypeToCLChannelType(const DataType t) { OpenCLAllocator::OpenCLAllocator() {} OpenCLAllocator::~OpenCLAllocator() {} -void *OpenCLAllocator::New(size_t nbytes) const { +MaceStatus OpenCLAllocator::New(size_t nbytes, void **result) const { + if (nbytes == 0) { + return MaceStatus::MACE_SUCCESS; + } VLOG(3) << "Allocate OpenCL buffer: " << nbytes; cl_int error; cl::Buffer *buffer = new cl::Buffer(OpenCLRuntime::Global()->context(), CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, nbytes, nullptr, &error); - MACE_CHECK_CL_SUCCESS(error); - return static_cast(buffer); + if (error != CL_SUCCESS) { + LOG(WARNING) << "Allocate OpenCL Buffer with " + << nbytes << " bytes failed because of" + << OpenCLErrorToString(error); + *result = nullptr; + return MaceStatus::MACE_OUT_OF_RESOURCES; + } else { + *result = buffer; + return MaceStatus::MACE_SUCCESS; + } } -void *OpenCLAllocator::NewImage(const std::vector &image_shape, - const DataType dt) const { +MaceStatus OpenCLAllocator::NewImage(const std::vector &image_shape, + const DataType dt, + void **result) const { MACE_CHECK(image_shape.size() == 2) << "Image shape's size must equal 2"; VLOG(3) << "Allocate OpenCL image: " << image_shape[0] << ", " << image_shape[1]; @@ -67,11 +79,17 @@ void *OpenCLAllocator::NewImage(const std::vector &image_shape, new cl::Image2D(OpenCLRuntime::Global()->context(), CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, img_format, image_shape[0], image_shape[1], 0, nullptr, &error); - MACE_CHECK_CL_SUCCESS(error) << " with image shape: [" - << image_shape[0] << ", " << image_shape[1] - << "]"; - - return cl_image; + if (error != CL_SUCCESS) { + LOG(WARNING) << "Allocate OpenCL image with shape: [" + << image_shape[0] << ", " << image_shape[1] + << "] failed because of" + << OpenCLErrorToString(error); + *result = nullptr; + return MaceStatus::MACE_OUT_OF_RESOURCES; + } else { + *result = cl_image; + return MaceStatus::MACE_SUCCESS; + } } void OpenCLAllocator::Delete(void *buffer) const { diff --git a/mace/core/runtime/opencl/opencl_allocator.h b/mace/core/runtime/opencl/opencl_allocator.h index 0ec50f61062aad81e2e8b0fc010e2a57b457a833..6304add8583f7b2e47c58cd6e6b186ea43b7f092 100644 --- a/mace/core/runtime/opencl/opencl_allocator.h +++ b/mace/core/runtime/opencl/opencl_allocator.h @@ -27,15 +27,16 @@ class OpenCLAllocator : public Allocator { ~OpenCLAllocator() override; - void *New(size_t nbytes) const override; + MaceStatus New(size_t nbytes, void **result) const override; /* * Use Image2D with RGBA (128-bit) format to represent the image. * * @ shape : [depth, ..., height, width ]. */ - void *NewImage(const std::vector &image_shape, - const DataType dt) const override; + MaceStatus NewImage(const std::vector &image_shape, + const DataType dt, + void **result) const override; void Delete(void *buffer) const override; diff --git a/mace/core/tensor.h b/mace/core/tensor.h index 87a6cb3c1c9ff9f3712c50e07c8c6e0d69f5cf61..8c9f3e3b6a139f25d1d6f7a03754bc817e245795 100644 --- a/mace/core/tensor.h +++ b/mace/core/tensor.h @@ -216,16 +216,19 @@ class Tensor { MACE_CHECK(raw_size() <= buffer_->size()); } - inline void Resize(const std::vector &shape) { + inline MaceStatus Resize(const std::vector &shape) { shape_ = shape; image_shape_.clear(); if (buffer_ != nullptr) { MACE_CHECK(!has_opencl_image(), "Cannot resize image, use ResizeImage."); - if (raw_size() + EXTRA_BUFFER_PAD_SIZE > buffer_->size()) - buffer_->Resize(raw_size() + EXTRA_BUFFER_PAD_SIZE); + if (raw_size() + EXTRA_BUFFER_PAD_SIZE > buffer_->size()) { + return buffer_->Resize(raw_size() + EXTRA_BUFFER_PAD_SIZE); + } + return MaceStatus::MACE_SUCCESS; } else { MACE_CHECK(is_buffer_owner_); - buffer_ = new Buffer(allocator_, raw_size() + EXTRA_BUFFER_PAD_SIZE); + buffer_ = new Buffer(allocator_); + return buffer_->Allocate(raw_size() + EXTRA_BUFFER_PAD_SIZE); } } @@ -241,13 +244,14 @@ class Tensor { is_buffer_owner_ = false; } - inline void ResizeImage(const std::vector &shape, - const std::vector &image_shape) { + inline MaceStatus ResizeImage(const std::vector &shape, + const std::vector &image_shape) { shape_ = shape; image_shape_ = image_shape; if (buffer_ == nullptr) { MACE_CHECK(is_buffer_owner_); - buffer_ = new Image(image_shape, dtype_); + buffer_ = new Image(); + return buffer_->Allocate(image_shape, dtype_); } else { MACE_CHECK(has_opencl_image(), "Cannot ResizeImage buffer, use Resize."); Image *image = dynamic_cast(buffer_); @@ -257,24 +261,27 @@ class Tensor { "): current physical image shape: ", image->image_shape()[0], ", ", image->image_shape()[1], " < logical image shape: ", image_shape[0], ", ", image_shape[1]); + return MaceStatus::MACE_SUCCESS; } } - inline void ResizeLike(const Tensor &other) { ResizeLike(&other); } + inline MaceStatus ResizeLike(const Tensor &other) { + return ResizeLike(&other); + } - inline void ResizeLike(const Tensor *other) { + inline MaceStatus ResizeLike(const Tensor *other) { if (other->has_opencl_image()) { if (is_buffer_owner_ && buffer_ != nullptr && !has_opencl_image()) { delete buffer_; buffer_ = nullptr; } - ResizeImage(other->shape(), other->image_shape_); + return ResizeImage(other->shape(), other->image_shape_); } else { if (is_buffer_owner_ && buffer_ != nullptr && has_opencl_image()) { delete buffer_; buffer_ = nullptr; } - Resize(other->shape()); + return Resize(other->shape()); } } diff --git a/mace/core/workspace.cc b/mace/core/workspace.cc index 46ae0b48a49aa461fb0efee21826a8bd5fc443aa..1f653a458ecdf98abb9dc6441439dbda6b855a23 100644 --- a/mace/core/workspace.cc +++ b/mace/core/workspace.cc @@ -60,7 +60,7 @@ std::vector Workspace::Tensors() const { return names; } -void Workspace::LoadModelTensor(const NetDef &net_def, DeviceType type) { +MaceStatus Workspace::LoadModelTensor(const NetDef &net_def, DeviceType type) { MACE_LATENCY_LOGGER(1, "Load model tensors"); index_t model_data_size = 0; unsigned char *model_data_ptr = nullptr; @@ -89,7 +89,11 @@ void Workspace::LoadModelTensor(const NetDef &net_def, DeviceType type) { model_data_size)); } else { tensor_buffer_ = std::unique_ptr( - new Buffer(GetDeviceAllocator(type), model_data_size)); + new Buffer(GetDeviceAllocator(type))); + MaceStatus status = tensor_buffer_->Allocate(model_data_size); + if (status != MaceStatus::MACE_SUCCESS) { + return status; + } tensor_buffer_->Map(nullptr); tensor_buffer_->Copy(model_data_ptr, 0, model_data_size); tensor_buffer_->UnMap(); @@ -120,14 +124,16 @@ void Workspace::LoadModelTensor(const NetDef &net_def, DeviceType type) { } if (type == DeviceType::CPU || type == DeviceType::GPU) { - CreateOutputTensorBuffer(net_def, type); + MaceStatus status = CreateOutputTensorBuffer(net_def, type); + if (status != MaceStatus::MACE_SUCCESS) return status; } + return MaceStatus::MACE_SUCCESS; } -void Workspace::CreateOutputTensorBuffer(const NetDef &net_def, - DeviceType device_type) { +MaceStatus Workspace::CreateOutputTensorBuffer(const NetDef &net_def, + DeviceType device_type) { if (!net_def.has_mem_arena() || net_def.mem_arena().mem_block_size() == 0) { - return; + return MaceStatus::MACE_SUCCESS; } DataType dtype = DataType::DT_INVALID; @@ -157,14 +163,24 @@ void Workspace::CreateOutputTensorBuffer(const NetDef &net_def, // TODO(liuqi): refactor based on PB if (mem_block.mem_id() >= 20000) { std::unique_ptr image_buf( - new Image({mem_block.x(), mem_block.y()}, dtype)); + new Image()); + MaceStatus status = image_buf->Allocate( + {mem_block.x(), mem_block.y()}, dtype); + if (status != MaceStatus::MACE_SUCCESS) { + return status; + } preallocated_allocator_.SetBuffer(mem_block.mem_id(), std::move(image_buf)); } } else { if (mem_block.mem_id() < 20000) { std::unique_ptr tensor_buf( - new Buffer(GetDeviceAllocator(device_type), mem_block.x())); + new Buffer(GetDeviceAllocator(device_type))); + MaceStatus status = tensor_buf->Allocate( + mem_block.x() * GetEnumTypeSize(dtype)); + if (status != MaceStatus::MACE_SUCCESS) { + return status; + } preallocated_allocator_.SetBuffer(mem_block.mem_id(), std::move(tensor_buf)); } @@ -201,6 +217,7 @@ void Workspace::CreateOutputTensorBuffer(const NetDef &net_def, } } } + return MaceStatus::MACE_SUCCESS; } ScratchBuffer *Workspace::GetScratchBuffer(DeviceType device_type) { diff --git a/mace/core/workspace.h b/mace/core/workspace.h index e9e11ea38810a8fd71c9871c07cb199c803d3dd2..06236fb987fed16ba42fb3a3ce885d2ccf7dbf72 100644 --- a/mace/core/workspace.h +++ b/mace/core/workspace.h @@ -47,12 +47,13 @@ class Workspace { std::vector Tensors() const; - void LoadModelTensor(const NetDef &net_def, DeviceType type); + MaceStatus LoadModelTensor(const NetDef &net_def, DeviceType type); ScratchBuffer *GetScratchBuffer(DeviceType device_type); private: - void CreateOutputTensorBuffer(const NetDef &net_def, DeviceType device_type); + MaceStatus CreateOutputTensorBuffer(const NetDef &net_def, + DeviceType device_type); TensorMap tensor_map_; diff --git a/mace/kernels/conv_2d.h b/mace/kernels/conv_2d.h index 7f8258280728872c32f96cfc49316e69d370eb08..bf8dc7055ecb727ba2c13d8fa5b30edfded49621 100644 --- a/mace/kernels/conv_2d.h +++ b/mace/kernels/conv_2d.h @@ -99,7 +99,6 @@ struct Conv2dFunctor : Conv2dFunctorBase { #pragma omp parallel for collapse(2) for (index_t b = 0; b < in_shape[0]; b++) { for (index_t m = 0; m < filter_shape[0]; m += 4) { - const index_t in_height = in_shape[2]; const index_t in_width = in_shape[3]; const index_t out_height = out_shape[2]; const index_t out_width = out_shape[3]; @@ -322,8 +321,6 @@ struct Conv2dFunctor : Conv2dFunctorBase { index_t dilation_h = dilations_[0]; index_t dilation_w = dilations_[1]; - const index_t filter_hw[2] = {filter_h, filter_w}; - MACE_CHECK(batch == input_batch, "Input/Output batch size mismatch"); index_t padded_input_height = input_height + paddings[0]; diff --git a/mace/kernels/opencl/activation.cc b/mace/kernels/opencl/activation.cc index 5cee48620aa0aa6be6600bbbe331016a879c4c54..6b55696614201b24d4492275c2ae219a5038926e 100644 --- a/mace/kernels/opencl/activation.cc +++ b/mace/kernels/opencl/activation.cc @@ -45,7 +45,8 @@ void ActivationFunctor::operator()(const Tensor *input, if (runtime->IsOutOfRangeCheckEnabled()) { built_options.emplace("-DOUT_OF_RANGE_CHECK"); kernel_error_ = std::move(std::unique_ptr( - new Buffer(GetDeviceAllocator(DeviceType::GPU), 1))); + new Buffer(GetDeviceAllocator(DeviceType::GPU)))); + kernel_error_->Allocate(1); kernel_error_->Map(nullptr); *(kernel_error_->mutable_data()) = 0; kernel_error_->UnMap(); diff --git a/mace/kernels/opencl/addn.cc b/mace/kernels/opencl/addn.cc index 4587a2cb7b2c8a5cebe0470533a1457bb6937e1a..c47213f593cce3da126555993b5b500e95019414 100644 --- a/mace/kernels/opencl/addn.cc +++ b/mace/kernels/opencl/addn.cc @@ -58,7 +58,8 @@ void AddNFunctor::operator()( if (runtime->IsOutOfRangeCheckEnabled()) { built_options.emplace("-DOUT_OF_RANGE_CHECK"); kernel_error_ = std::move(std::unique_ptr( - new Buffer(GetDeviceAllocator(DeviceType::GPU), 1))); + new Buffer(GetDeviceAllocator(DeviceType::GPU)))); + kernel_error_->Allocate(1); kernel_error_->Map(nullptr); *(kernel_error_->mutable_data()) = 0; kernel_error_->UnMap(); diff --git a/mace/kernels/opencl/batch_norm.cc b/mace/kernels/opencl/batch_norm.cc index f28c9ccc6cda25ec713c108bc1eae2ad3f9a38ed..80fafdbcb3f0f129f6ce97ab6cf57406cb617b60 100644 --- a/mace/kernels/opencl/batch_norm.cc +++ b/mace/kernels/opencl/batch_norm.cc @@ -56,7 +56,8 @@ void BatchNormFunctor::operator()(const Tensor *input, if (runtime->IsOutOfRangeCheckEnabled()) { built_options.emplace("-DOUT_OF_RANGE_CHECK"); kernel_error_ = std::move(std::unique_ptr( - new Buffer(GetDeviceAllocator(DeviceType::GPU), 1))); + new Buffer(GetDeviceAllocator(DeviceType::GPU)))); + kernel_error_->Allocate(1); kernel_error_->Map(nullptr); *(kernel_error_->mutable_data()) = 0; kernel_error_->UnMap(); diff --git a/mace/kernels/opencl/bias_add.cc b/mace/kernels/opencl/bias_add.cc index 05b9d169c966a117730f866b6d402c94524844a7..e50dcf58611360833ae2fbaeff2dc9d2d721c01b 100644 --- a/mace/kernels/opencl/bias_add.cc +++ b/mace/kernels/opencl/bias_add.cc @@ -49,7 +49,8 @@ void BiasAddFunctor::operator()(const Tensor *input, if (runtime->IsOutOfRangeCheckEnabled()) { built_options.emplace("-DOUT_OF_RANGE_CHECK"); kernel_error_ = std::move(std::unique_ptr( - new Buffer(GetDeviceAllocator(DeviceType::GPU), 1))); + new Buffer(GetDeviceAllocator(DeviceType::GPU)))); + kernel_error_->Allocate(1); kernel_error_->Map(nullptr); *(kernel_error_->mutable_data()) = 0; kernel_error_->UnMap(); diff --git a/mace/kernels/opencl/buffer_to_image.cc b/mace/kernels/opencl/buffer_to_image.cc index 1bce914c1b817a489f444e32b5284c25a7f0d527..bf629e373b77ce57f24640c342a6ce6fe2c5ab45 100644 --- a/mace/kernels/opencl/buffer_to_image.cc +++ b/mace/kernels/opencl/buffer_to_image.cc @@ -93,7 +93,8 @@ void BufferToImageFunctor::operator()( built_options.emplace("-DOUT_OF_RANGE_CHECK"); if (!kernel_error_) { kernel_error_ = std::move(std::unique_ptr( - new Buffer(GetDeviceAllocator(DeviceType::GPU), 1))); + new Buffer(GetDeviceAllocator(DeviceType::GPU)))); + kernel_error_->Allocate(1); kernel_error_->Map(nullptr); *(kernel_error_->mutable_data()) = 0; kernel_error_->UnMap(); diff --git a/mace/kernels/opencl/channel_shuffle.cc b/mace/kernels/opencl/channel_shuffle.cc index 7cb082544f55c2bf72711ec0fe6ec0e8448442eb..d16a3d8a73c14df3282c940f01db5b2848a78d34 100644 --- a/mace/kernels/opencl/channel_shuffle.cc +++ b/mace/kernels/opencl/channel_shuffle.cc @@ -56,7 +56,8 @@ void ChannelShuffleFunctor::operator()( if (runtime->IsOutOfRangeCheckEnabled()) { built_options.emplace("-DOUT_OF_RANGE_CHECK"); kernel_error_ = std::move(std::unique_ptr( - new Buffer(GetDeviceAllocator(DeviceType::GPU), 1))); + new Buffer(GetDeviceAllocator(DeviceType::GPU)))); + kernel_error_->Allocate(1); kernel_error_->Map(nullptr); *(kernel_error_->mutable_data()) = 0; kernel_error_->UnMap(); diff --git a/mace/kernels/opencl/concat.cc b/mace/kernels/opencl/concat.cc index 96c15fd8adfe4369d483cc6be424d341fe59b743..239041008bcf08cd898bbe9b7f722d68cb4afdec 100644 --- a/mace/kernels/opencl/concat.cc +++ b/mace/kernels/opencl/concat.cc @@ -67,7 +67,8 @@ static void Concat2(cl::Kernel *kernel, if (runtime->IsOutOfRangeCheckEnabled()) { built_options.emplace("-DOUT_OF_RANGE_CHECK"); *kernel_error = std::move(std::unique_ptr( - new Buffer(GetDeviceAllocator(DeviceType::GPU), 1))); + new Buffer(GetDeviceAllocator(DeviceType::GPU)))); + (*kernel_error)->Allocate(1); (*kernel_error)->Map(nullptr); *((*kernel_error)->mutable_data()) = 0; (*kernel_error)->UnMap(); @@ -148,7 +149,8 @@ static void ConcatN(cl::Kernel *kernel, if (runtime->IsOutOfRangeCheckEnabled()) { built_options.emplace("-DOUT_OF_RANGE_CHECK"); *kernel_error = std::move(std::unique_ptr( - new Buffer(GetDeviceAllocator(DeviceType::GPU), 1))); + new Buffer(GetDeviceAllocator(DeviceType::GPU)))); + (*kernel_error)->Allocate(1); (*kernel_error)->Map(nullptr); *((*kernel_error)->mutable_data()) = 0; (*kernel_error)->UnMap(); diff --git a/mace/kernels/opencl/conv_2d_1x1.cc b/mace/kernels/opencl/conv_2d_1x1.cc index 52ed0368fc6fbe824ee2f254394ecfbc6324071b..5b79ea662a3f0d101ca4f4e3ab0faf2ce5f2ffd3 100644 --- a/mace/kernels/opencl/conv_2d_1x1.cc +++ b/mace/kernels/opencl/conv_2d_1x1.cc @@ -100,7 +100,8 @@ extern void Conv2dOpenclK1x1(cl::Kernel *kernel, if (runtime->IsOutOfRangeCheckEnabled()) { built_options.emplace("-DOUT_OF_RANGE_CHECK"); *kernel_error = std::move(std::unique_ptr( - new Buffer(GetDeviceAllocator(DeviceType::GPU), 1))); + new Buffer(GetDeviceAllocator(DeviceType::GPU)))); + (*kernel_error)->Allocate(1); (*kernel_error)->Map(nullptr); *((*kernel_error)->mutable_data()) = 0; (*kernel_error)->UnMap(); diff --git a/mace/kernels/opencl/conv_2d_3x3.cc b/mace/kernels/opencl/conv_2d_3x3.cc index f5600883d850e7f05052e8830b2ab79815ffd15e..5386c4173ad5441be104c7e10c183113223915ef 100644 --- a/mace/kernels/opencl/conv_2d_3x3.cc +++ b/mace/kernels/opencl/conv_2d_3x3.cc @@ -86,7 +86,8 @@ extern void Conv2dOpenclK3x3(cl::Kernel *kernel, if (runtime->IsOutOfRangeCheckEnabled()) { built_options.emplace("-DOUT_OF_RANGE_CHECK"); *kernel_error = std::move(std::unique_ptr( - new Buffer(GetDeviceAllocator(DeviceType::GPU), 1))); + new Buffer(GetDeviceAllocator(DeviceType::GPU)))); + (*kernel_error)->Allocate(1); (*kernel_error)->Map(nullptr); *((*kernel_error)->mutable_data()) = 0; (*kernel_error)->UnMap(); diff --git a/mace/kernels/opencl/conv_2d_general.cc b/mace/kernels/opencl/conv_2d_general.cc index 2329984a0c53592c8d73a4b629a63167081c2a33..e44d898168fdfd096fa495d6b076a668f4f1a2fd 100644 --- a/mace/kernels/opencl/conv_2d_general.cc +++ b/mace/kernels/opencl/conv_2d_general.cc @@ -94,7 +94,8 @@ extern void Conv2dOpencl(cl::Kernel *kernel, if (runtime->IsOutOfRangeCheckEnabled()) { built_options.emplace("-DOUT_OF_RANGE_CHECK"); *kernel_error = std::move(std::unique_ptr( - new Buffer(GetDeviceAllocator(DeviceType::GPU), 1))); + new Buffer(GetDeviceAllocator(DeviceType::GPU)))); + (*kernel_error)->Allocate(1); (*kernel_error)->Map(nullptr); *((*kernel_error)->mutable_data()) = 0; (*kernel_error)->UnMap(); diff --git a/mace/kernels/opencl/deconv_2d_opencl.cc b/mace/kernels/opencl/deconv_2d_opencl.cc index bbcbec6c01126095af0e706986d5c47a1ffa707a..abb4b43effff898009bdc56436f9ae44b16cc40b 100644 --- a/mace/kernels/opencl/deconv_2d_opencl.cc +++ b/mace/kernels/opencl/deconv_2d_opencl.cc @@ -65,7 +65,8 @@ void Deconv2dOpencl(cl::Kernel *kernel, if (runtime->IsOutOfRangeCheckEnabled()) { built_options.emplace("-DOUT_OF_RANGE_CHECK"); *kernel_error = std::move(std::unique_ptr( - new Buffer(GetDeviceAllocator(DeviceType::GPU), 1))); + new Buffer(GetDeviceAllocator(DeviceType::GPU)))); + (*kernel_error)->Allocate(1); (*kernel_error)->Map(nullptr); *((*kernel_error)->mutable_data()) = 0; (*kernel_error)->UnMap(); diff --git a/mace/kernels/opencl/depth_to_space.cc b/mace/kernels/opencl/depth_to_space.cc index fd25f948c355999909bcd670e41ff249dc4e5aea..609ad20516444970013ecc5ba796eafb16c060f3 100644 --- a/mace/kernels/opencl/depth_to_space.cc +++ b/mace/kernels/opencl/depth_to_space.cc @@ -86,7 +86,8 @@ void DepthToSpaceOpFunctor::operator()( if (runtime->IsOutOfRangeCheckEnabled()) { built_options.emplace("-DOUT_OF_RANGE_CHECK"); kernel_error_ = std::move(std::unique_ptr( - new Buffer(GetDeviceAllocator(DeviceType::GPU), 1))); + new Buffer(GetDeviceAllocator(DeviceType::GPU)))); + kernel_error_->Allocate(1); kernel_error_->Map(nullptr); *(kernel_error_->mutable_data()) = 0; kernel_error_->UnMap(); diff --git a/mace/kernels/opencl/depthwise_conv.cc b/mace/kernels/opencl/depthwise_conv.cc index 43a24e662828a5282914c230553220cc2adc30c9..c7800d0ae24e5d4ed7471cc1f1ea3829ddacd9fc 100644 --- a/mace/kernels/opencl/depthwise_conv.cc +++ b/mace/kernels/opencl/depthwise_conv.cc @@ -97,7 +97,8 @@ static void DepthwiseConv2d(cl::Kernel *kernel, if (runtime->IsOutOfRangeCheckEnabled()) { built_options.emplace("-DOUT_OF_RANGE_CHECK"); *kernel_error = std::move(std::unique_ptr( - new Buffer(GetDeviceAllocator(DeviceType::GPU), 1))); + new Buffer(GetDeviceAllocator(DeviceType::GPU)))); + (*kernel_error)->Allocate(1); (*kernel_error)->Map(nullptr); *((*kernel_error)->mutable_data()) = 0; (*kernel_error)->UnMap(); diff --git a/mace/kernels/opencl/eltwise.cc b/mace/kernels/opencl/eltwise.cc index 94b4c322bc625fa82f9d2e482c99e5b95fdd41d3..4f0590466b02c1a400682a89b656394bdd7318b3 100644 --- a/mace/kernels/opencl/eltwise.cc +++ b/mace/kernels/opencl/eltwise.cc @@ -97,7 +97,8 @@ void EltwiseFunctor::operator()(const Tensor *input0, if (runtime->IsOutOfRangeCheckEnabled()) { built_options.emplace("-DOUT_OF_RANGE_CHECK"); kernel_error_ = std::move(std::unique_ptr( - new Buffer(GetDeviceAllocator(DeviceType::GPU), 1))); + new Buffer(GetDeviceAllocator(DeviceType::GPU)))); + kernel_error_->Allocate(1); kernel_error_->Map(nullptr); *(kernel_error_->mutable_data()) = 0; kernel_error_->UnMap(); diff --git a/mace/kernels/opencl/fully_connected.cc b/mace/kernels/opencl/fully_connected.cc index 0022b92380208a7f4eb06dff68a8ec45c18dff39..6e0678daff35642a9c673ecd1a76e4059858aa67 100644 --- a/mace/kernels/opencl/fully_connected.cc +++ b/mace/kernels/opencl/fully_connected.cc @@ -74,7 +74,8 @@ void FCWXKernel(cl::Kernel *kernel, if (runtime->IsOutOfRangeCheckEnabled()) { built_options.emplace("-DOUT_OF_RANGE_CHECK"); *kernel_error = std::move(std::unique_ptr( - new Buffer(GetDeviceAllocator(DeviceType::GPU), 1))); + new Buffer(GetDeviceAllocator(DeviceType::GPU)))); + (*kernel_error)->Allocate(1); (*kernel_error)->Map(nullptr); *((*kernel_error)->mutable_data()) = 0; (*kernel_error)->UnMap(); @@ -200,7 +201,8 @@ void FCWTXKernel(cl::Kernel *kernel, if (runtime->IsOutOfRangeCheckEnabled()) { built_options.emplace("-DOUT_OF_RANGE_CHECK"); *kernel_error = std::move(std::unique_ptr( - new Buffer(GetDeviceAllocator(DeviceType::GPU), 1))); + new Buffer(GetDeviceAllocator(DeviceType::GPU)))); + (*kernel_error)->Allocate(1); (*kernel_error)->Map(nullptr); *((*kernel_error)->mutable_data()) = 0; (*kernel_error)->UnMap(); diff --git a/mace/kernels/opencl/image_to_buffer.cc b/mace/kernels/opencl/image_to_buffer.cc index 09b040dd454fbe1a2d24a85019c66116d3610e70..1cefff9e6ba56f00ed366e16aae268dcd0a78e16 100644 --- a/mace/kernels/opencl/image_to_buffer.cc +++ b/mace/kernels/opencl/image_to_buffer.cc @@ -86,7 +86,8 @@ void ImageToBufferFunctor::operator()( built_options.emplace("-DOUT_OF_RANGE_CHECK"); if (!kernel_error_) { kernel_error_ = std::move(std::unique_ptr( - new Buffer(GetDeviceAllocator(DeviceType::GPU), 1))); + new Buffer(GetDeviceAllocator(DeviceType::GPU)))); + kernel_error_->Allocate(1); kernel_error_->Map(nullptr); *(kernel_error_->mutable_data()) = 0; kernel_error_->UnMap(); diff --git a/mace/kernels/opencl/matmul.cc b/mace/kernels/opencl/matmul.cc index 9a16694a0284f1b6583ee633487b4725283bafea..cc63ed04962938f1c4b75e57ec7a618f06fbf2aa 100644 --- a/mace/kernels/opencl/matmul.cc +++ b/mace/kernels/opencl/matmul.cc @@ -54,7 +54,8 @@ void MatMulFunctor::operator()(const Tensor *A, if (runtime->IsOutOfRangeCheckEnabled()) { built_options.emplace("-DOUT_OF_RANGE_CHECK"); kernel_error_ = std::move(std::unique_ptr( - new Buffer(GetDeviceAllocator(DeviceType::GPU), 1))); + new Buffer(GetDeviceAllocator(DeviceType::GPU)))); + kernel_error_->Allocate(1); kernel_error_->Map(nullptr); *(kernel_error_->mutable_data()) = 0; kernel_error_->UnMap(); diff --git a/mace/kernels/opencl/out_of_range_check_test.cc b/mace/kernels/opencl/out_of_range_check_test.cc index 012edd70cd9a8c73a409886d37fad0b29ef8411b..467a595309c32f26de3ae271040045092252d840 100644 --- a/mace/kernels/opencl/out_of_range_check_test.cc +++ b/mace/kernels/opencl/out_of_range_check_test.cc @@ -57,7 +57,8 @@ bool BufferToImageOpImpl(Tensor *buffer, if (runtime->IsOutOfRangeCheckEnabled()) { built_options.emplace("-DOUT_OF_RANGE_CHECK"); kernel_error = std::move(std::unique_ptr( - new Buffer(GetDeviceAllocator(DeviceType::GPU), 1))); + new Buffer(GetDeviceAllocator(DeviceType::GPU)))); + kernel_error->Allocate(1); kernel_error->Map(nullptr); *(kernel_error->mutable_data()) = 0; kernel_error->UnMap(); diff --git a/mace/kernels/opencl/pad.cc b/mace/kernels/opencl/pad.cc index bc093c16e3f2b66017fe368436b5f172bb9b3d5f..34fbf659ebab12a5258f87234df7f131701f84cf 100644 --- a/mace/kernels/opencl/pad.cc +++ b/mace/kernels/opencl/pad.cc @@ -60,7 +60,8 @@ void PadFunctor::operator()( if (runtime->IsOutOfRangeCheckEnabled()) { built_options.emplace("-DOUT_OF_RANGE_CHECK"); kernel_error_ = std::move(std::unique_ptr( - new Buffer(GetDeviceAllocator(DeviceType::GPU), 1))); + new Buffer(GetDeviceAllocator(DeviceType::GPU)))); + kernel_error_->Allocate(1); kernel_error_->Map(nullptr); *(kernel_error_->mutable_data()) = 0; kernel_error_->UnMap(); diff --git a/mace/kernels/opencl/pooling.cc b/mace/kernels/opencl/pooling.cc index df2fcbe9223f902c721de2ace2aa7d5b780498c1..8a9f91e90df8c79369793b35f59a49e39af7c6e1 100644 --- a/mace/kernels/opencl/pooling.cc +++ b/mace/kernels/opencl/pooling.cc @@ -72,7 +72,8 @@ void PoolingFunctor::operator()(const Tensor *input, if (runtime->IsOutOfRangeCheckEnabled()) { built_options.emplace("-DOUT_OF_RANGE_CHECK"); kernel_error_ = std::move(std::unique_ptr( - new Buffer(GetDeviceAllocator(DeviceType::GPU), 1))); + new Buffer(GetDeviceAllocator(DeviceType::GPU)))); + kernel_error_->Allocate(1); kernel_error_->Map(nullptr); *(kernel_error_->mutable_data()) = 0; kernel_error_->UnMap(); diff --git a/mace/kernels/opencl/resize_bilinear.cc b/mace/kernels/opencl/resize_bilinear.cc index 1c36b27e37dbb5a5649203c2d8fa37a69f5f3266..0c86cae840ac15ce4a56be3727d8d747cfe9d179 100644 --- a/mace/kernels/opencl/resize_bilinear.cc +++ b/mace/kernels/opencl/resize_bilinear.cc @@ -78,7 +78,8 @@ void ResizeBilinearFunctor::operator()( if (runtime->IsOutOfRangeCheckEnabled()) { built_options.emplace("-DOUT_OF_RANGE_CHECK"); kernel_error_ = std::move(std::unique_ptr( - new Buffer(GetDeviceAllocator(DeviceType::GPU), 1))); + new Buffer(GetDeviceAllocator(DeviceType::GPU)))); + kernel_error_->Allocate(1); kernel_error_->Map(nullptr); *(kernel_error_->mutable_data()) = 0; kernel_error_->UnMap(); diff --git a/mace/kernels/opencl/slice.cc b/mace/kernels/opencl/slice.cc index 7944ee88b9845f93438d363069e9afbc4065f873..21fdbca1ecb6b2a492787ffe93601af9466bce96 100644 --- a/mace/kernels/opencl/slice.cc +++ b/mace/kernels/opencl/slice.cc @@ -51,7 +51,8 @@ void SliceFunctor::operator()( if (runtime->IsOutOfRangeCheckEnabled()) { built_options.emplace("-DOUT_OF_RANGE_CHECK"); kernel_error_ = std::move(std::unique_ptr( - new Buffer(GetDeviceAllocator(DeviceType::GPU), 1))); + new Buffer(GetDeviceAllocator(DeviceType::GPU)))); + kernel_error_->Allocate(1); kernel_error_->Map(nullptr); *(kernel_error_->mutable_data()) = 0; kernel_error_->UnMap(); diff --git a/mace/kernels/opencl/softmax.cc b/mace/kernels/opencl/softmax.cc index 24329be41496ee740e34125a9cfaa08f793d5644..8e5be84509bd5ae4d49fdcfecbd0cdbd7e9f0359 100644 --- a/mace/kernels/opencl/softmax.cc +++ b/mace/kernels/opencl/softmax.cc @@ -70,7 +70,8 @@ void SoftmaxFunctor::operator()(const Tensor *logits, if (runtime->IsOutOfRangeCheckEnabled()) { built_options.emplace("-DOUT_OF_RANGE_CHECK"); kernel_error_ = std::move(std::unique_ptr( - new Buffer(GetDeviceAllocator(DeviceType::GPU), 1))); + new Buffer(GetDeviceAllocator(DeviceType::GPU)))); + kernel_error_->Allocate(1); kernel_error_->Map(nullptr); *(kernel_error_->mutable_data()) = 0; kernel_error_->UnMap(); diff --git a/mace/kernels/opencl/space_to_batch.cc b/mace/kernels/opencl/space_to_batch.cc index fa4850cb95348b460fefde519c77fcd97f818504..c3c45f0b2a18bce690cad718b2d61391d243a0f4 100644 --- a/mace/kernels/opencl/space_to_batch.cc +++ b/mace/kernels/opencl/space_to_batch.cc @@ -70,7 +70,8 @@ void SpaceToBatchFunctor::operator()( if (runtime->IsOutOfRangeCheckEnabled()) { built_options.emplace("-DOUT_OF_RANGE_CHECK"); kernel_error_ = std::move(std::unique_ptr( - new Buffer(GetDeviceAllocator(DeviceType::GPU), 1))); + new Buffer(GetDeviceAllocator(DeviceType::GPU)))); + kernel_error_->Allocate(1); kernel_error_->Map(nullptr); *(kernel_error_->mutable_data()) = 0; kernel_error_->UnMap(); diff --git a/mace/kernels/opencl/winograd_transform.cc b/mace/kernels/opencl/winograd_transform.cc index fcf815281ea783fc184819b8d86ac3480bcadd66..da7dea0b974c09e7cf8e7e45442ff44a95eadfe4 100644 --- a/mace/kernels/opencl/winograd_transform.cc +++ b/mace/kernels/opencl/winograd_transform.cc @@ -39,7 +39,8 @@ void WinogradTransformFunctor::operator()( if (runtime->IsOutOfRangeCheckEnabled()) { built_options.emplace("-DOUT_OF_RANGE_CHECK"); kernel_error_ = std::move(std::unique_ptr( - new Buffer(GetDeviceAllocator(DeviceType::GPU), 1))); + new Buffer(GetDeviceAllocator(DeviceType::GPU)))); + kernel_error_->Allocate(1); kernel_error_->Map(nullptr); *(kernel_error_->mutable_data()) = 0; kernel_error_->UnMap(); @@ -138,7 +139,8 @@ void WinogradInverseTransformFunctor::operator()( if (runtime->IsOutOfRangeCheckEnabled()) { built_options.emplace("-DOUT_OF_RANGE_CHECK"); kernel_error_ = std::move(std::unique_ptr( - new Buffer(GetDeviceAllocator(DeviceType::GPU), 1))); + new Buffer(GetDeviceAllocator(DeviceType::GPU)))); + kernel_error_->Allocate(1); kernel_error_->Map(nullptr); *(kernel_error_->mutable_data()) = 0; kernel_error_->UnMap(); diff --git a/mace/public/mace.h b/mace/public/mace.h index 02d903fd1a7a9f40395452e40ab91ef74b5ca9be..1bc8c36a3fc0eced343ac66f26ff5eee49ab82dd 100644 --- a/mace/public/mace.h +++ b/mace/public/mace.h @@ -30,7 +30,11 @@ const char *MaceVersion(); enum DeviceType { CPU = 0, GPU = 2, HEXAGON = 3 }; -enum MaceStatus { MACE_SUCCESS = 0, MACE_INVALID_ARGS = 1 }; +enum MaceStatus { + MACE_SUCCESS = 0, + MACE_INVALID_ARGS = 1, + MACE_OUT_OF_RESOURCES = 2 +}; // MACE input/output tensor class MaceTensor { @@ -61,12 +65,13 @@ class RunMetadata; class MaceEngine { public: - explicit MaceEngine(const NetDef *net_def, - DeviceType device_type, - const std::vector &input_nodes, - const std::vector &output_nodes); + explicit MaceEngine(DeviceType device_type); ~MaceEngine(); + MaceStatus Init(const NetDef *net_def, + const std::vector &input_nodes, + const std::vector &output_nodes); + MaceStatus Run(const std::map &inputs, std::map *outputs); diff --git a/mace/python/tools/mace_engine_factory.h.jinja2 b/mace/python/tools/mace_engine_factory.h.jinja2 index 3b4994699ebe85e988f0909832a7e5bf37bafb09..3110ab37b3e4fa6f3a47825c604a24e4ae2101d3 100644 --- a/mace/python/tools/mace_engine_factory.h.jinja2 +++ b/mace/python/tools/mace_engine_factory.h.jinja2 @@ -61,24 +61,25 @@ MaceStatus CreateMaceEngine( } const unsigned char * model_data = nullptr; NetDef net_def; + MaceStatus status = MaceStatus::MACE_SUCCESS; switch (model_name_map[model_name]) { {% for i in range(model_tags |length) %} case {{ i }}: model_data = mace::{{model_tags[i]}}::LoadModelData(model_data_file); net_def = mace::{{model_tags[i]}}::CreateNet(model_data); - engine->reset( - new mace::MaceEngine(&net_def, device_type, input_nodes, output_nodes)); + engine->reset(new mace::MaceEngine(device_type)); + status = (*engine)->Init(&net_def, input_nodes, output_nodes); if (device_type == DeviceType::GPU || device_type == DeviceType::HEXAGON) { mace::{{model_tags[i]}}::UnloadModelData(model_data); } break; {% endfor %} default: - return MaceStatus::MACE_INVALID_ARGS; + status = MaceStatus::MACE_INVALID_ARGS; } - return MaceStatus::MACE_SUCCESS; + return status; } } // namespace mace diff --git a/mace/test/mace_api_mt_test.cc b/mace/test/mace_api_mt_test.cc index a1271b28b433c46b5caf5052d3db9562b032dcaf..a4088930dcc57ab1f2ed4976195519747a1810b8 100644 --- a/mace/test/mace_api_mt_test.cc +++ b/mace/test/mace_api_mt_test.cc @@ -304,7 +304,9 @@ void MaceRunFunc(const int in_out_size) { new FileStorageFactory(file_path)); mace::SetKVStorageFactory(storage_factory); - MaceEngine engine(&net_def, device, input_names, output_names); + MaceEngine engine(device); + MaceStatus status = engine.Init(&net_def, input_names, output_names); + ASSERT_EQ(status, MaceStatus::MACE_SUCCESS); std::map inputs; std::map outputs; diff --git a/mace/test/mace_api_test.cc b/mace/test/mace_api_test.cc index 776fa6744c231aadff21aa592b52e43900f423b5..0b16c762da94f2379fc2eaf4d50eafd4d9873cc0 100644 --- a/mace/test/mace_api_test.cc +++ b/mace/test/mace_api_test.cc @@ -308,7 +308,9 @@ void MaceRun(const int in_out_size, &net_def); } - MaceEngine engine(&net_def, device, input_names, output_names); + MaceEngine engine(device); + MaceStatus status = engine.Init(&net_def, input_names, output_names); + ASSERT_EQ(status, MaceStatus::MACE_SUCCESS); std::map inputs; std::map outputs;