From e7a89f6fbb0dd932a32f9c72bfa61ff6eca5fbe9 Mon Sep 17 00:00:00 2001 From: liuqi Date: Fri, 18 May 2018 13:54:35 +0800 Subject: [PATCH] Add Init api in MaceEngine for return error code. --- docker/Dockerfile | 3 +- docs/getting_started/how_to_build.rst | 64 ++++++---- mace/core/allocator.h | 52 ++++---- mace/core/buffer.h | 113 +++++++++++++----- mace/core/mace.cc | 52 ++++---- mace/core/runtime/opencl/opencl_allocator.cc | 38 ++++-- mace/core/runtime/opencl/opencl_allocator.h | 7 +- mace/core/tensor.h | 29 +++-- mace/core/workspace.cc | 33 +++-- mace/core/workspace.h | 5 +- mace/kernels/conv_2d.h | 3 - mace/kernels/opencl/activation.cc | 3 +- mace/kernels/opencl/addn.cc | 3 +- mace/kernels/opencl/batch_norm.cc | 3 +- mace/kernels/opencl/bias_add.cc | 3 +- mace/kernels/opencl/buffer_to_image.cc | 3 +- mace/kernels/opencl/channel_shuffle.cc | 3 +- mace/kernels/opencl/concat.cc | 6 +- mace/kernels/opencl/conv_2d_1x1.cc | 3 +- mace/kernels/opencl/conv_2d_3x3.cc | 3 +- mace/kernels/opencl/conv_2d_general.cc | 3 +- mace/kernels/opencl/deconv_2d_opencl.cc | 3 +- mace/kernels/opencl/depth_to_space.cc | 3 +- mace/kernels/opencl/depthwise_conv.cc | 3 +- mace/kernels/opencl/eltwise.cc | 3 +- mace/kernels/opencl/fully_connected.cc | 6 +- mace/kernels/opencl/image_to_buffer.cc | 3 +- mace/kernels/opencl/matmul.cc | 3 +- .../kernels/opencl/out_of_range_check_test.cc | 3 +- mace/kernels/opencl/pad.cc | 3 +- mace/kernels/opencl/pooling.cc | 3 +- mace/kernels/opencl/resize_bilinear.cc | 3 +- mace/kernels/opencl/slice.cc | 3 +- mace/kernels/opencl/softmax.cc | 3 +- mace/kernels/opencl/space_to_batch.cc | 3 +- mace/kernels/opencl/winograd_transform.cc | 6 +- mace/public/mace.h | 15 ++- .../python/tools/mace_engine_factory.h.jinja2 | 9 +- mace/test/mace_api_mt_test.cc | 4 +- mace/test/mace_api_test.cc | 4 +- 40 files changed, 340 insertions(+), 175 deletions(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index 32d0cef5..8a8c089c 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -110,7 +110,8 @@ RUN apt-get install -y --no-install-recommends \ # Install tools RUN pip install -i http://pypi.douban.com/simple/ --trusted-host pypi.douban.com setuptools -RUN pip install -i http://pypi.douban.com/simple/ --trusted-host pypi.douban.com tensorflow==1.6.0 \ +RUN pip install -i http://pypi.douban.com/simple/ --trusted-host pypi.douban.com tensorflow==1.7.0 \ + numpy>=1.14.0 \ scipy \ jinja2 \ pyyaml \ diff --git a/docs/getting_started/how_to_build.rst b/docs/getting_started/how_to_build.rst index 7f2c0d3a..43178cf9 100644 --- a/docs/getting_started/how_to_build.rst +++ b/docs/getting_started/how_to_build.rst @@ -33,11 +33,13 @@ How to build +=====================+=================+===================================================================================================+ | bazel | >= 0.5.4 | - | +---------------------+-----------------+---------------------------------------------------------------------------------------------------+ -| android-ndk | r12c | - | +| android-ndk | r15c,r16b | - | +---------------------+-----------------+---------------------------------------------------------------------------------------------------+ | adb | >= 1.0.32 | apt install -y android-tools-adb | +---------------------+-----------------+---------------------------------------------------------------------------------------------------+ -| tensorflow | 1.4.0 | pip install tensorflow==1.4.0 | +| tensorflow | 1.7.0 | pip install tensorflow==1.7.0 | ++---------------------+-----------------+---------------------------------------------------------------------------------------------------+ +| numpy | >= 1.14.0 | pip install numpy | +---------------------+-----------------+---------------------------------------------------------------------------------------------------+ | scipy | >= 1.0.0 | pip install scipy | +---------------------+-----------------+---------------------------------------------------------------------------------------------------+ @@ -45,6 +47,10 @@ How to build +---------------------+-----------------+---------------------------------------------------------------------------------------------------+ | PyYaml | >= 3.12 | pip install pyyaml | +---------------------+-----------------+---------------------------------------------------------------------------------------------------+ +| sh | >= 1.12.14 | pip install sh | ++---------------------+-----------------+---------------------------------------------------------------------------------------------------+ +| filelock | >= 3.0.0 | pip install filelock | ++---------------------+-----------------+---------------------------------------------------------------------------------------------------+ | docker(for caffe) | >= 17.09.0-ce | `install doc `__ | +---------------------+-----------------+---------------------------------------------------------------------------------------------------+ @@ -229,29 +235,47 @@ Caffe目前只支持最新版本,旧版本请使用Caffe的工具进行升级 // 引入头文件 #include "mace/public/mace.h" - #include "mace/public/{MODEL_TAG}.h" + #include "mace/public/mace_engine_factory.h" - // 0. 设置内部存储 + // 0. 设置内部存储(设置一次即可) const std::string file_path ="/path/to/store/internel/files"; std::shared_ptr storage_factory( new FileStorageFactory(file_path)); ConfigKVStorageFactory(storage_factory); - //1. 从文件或代码中Load模型数据,也可通过自定义的方式来Load (例如可自己实现压缩加密等) - // 如果使用的是数据嵌入的方式,将参数设为nullptr。 - unsigned char *model_data = mace::MACE_MODEL_TAG::LoadModelData(FLAGS_model_data_file.c_str()); + //1. 声明设备类型(必须与build时指定的runtime一致) + DeviceType device_type = DeviceType::GPU; - //2. 创建net对象 - NetDef net_def = mace::MACE_MODEL_TAG::CreateNet(model_data); - - //3. 声明设备类型(必须与build时指定的runtime一致) - DeviceType device_type = DeviceType::OPENCL; - - //4. 定义输入输出名称数组 + //2. 定义输入输出名称数组 std::vector input_names = {...}; std::vector output_names = {...}; - //5. 创建输入输出对象 + //3. 创建MaceEngine对象 + std::shared_ptr engine; + MaceStatus create_engine_status; + // Create Engine + if (model_data_file.empty()) { + create_engine_status = + CreateMaceEngine(model_name.c_str(), + nullptr, + input_names, + output_names, + device_type, + &engine); + } else { + create_engine_status = + CreateMaceEngine(model_name.c_str(), + model_data_file.c_str(), + input_names, + output_names, + device_type, + &engine); + } + if (create_engine_status != MaceStatus::MACE_SUCCESS) { + // do something + } + + //4. 创建输入输出对象 std::map inputs; std::map outputs; for (size_t i = 0; i < input_count; ++i) { @@ -276,14 +300,6 @@ Caffe目前只支持最新版本,旧版本请使用Caffe的工具进行升级 outputs[output_names[i]] = mace::MaceTensor(output_shapes[i], buffer_out); } - //6. 创建MaceEngine对象 - mace::MaceEngine engine(&net_def, device_type, input_names, output_names); - - //7. 如果设备类型是OPENCL或HEXAGON,可以在此释放model_data - if (device_type == DeviceType::OPENCL || device_type == DeviceType::HEXAGON) { - mace::MACE_MODEL_TAG::UnloadModelData(model_data); - } - - //8. 执行模型,得到结果 + //5. 执行模型,得到结果 engine.Run(inputs, &outputs); diff --git a/mace/core/allocator.h b/mace/core/allocator.h index 8c73025b..7ab701dd 100644 --- a/mace/core/allocator.h +++ b/mace/core/allocator.h @@ -16,6 +16,7 @@ #define MACE_CORE_ALLOCATOR_H_ #include +#include #include #include #include @@ -42,9 +43,10 @@ class Allocator { public: Allocator() {} virtual ~Allocator() noexcept {} - virtual void *New(size_t nbytes) const = 0; - virtual void *NewImage(const std::vector &image_shape, - const DataType dt) const = 0; + virtual MaceStatus New(size_t nbytes, void **result) const = 0; + virtual MaceStatus NewImage(const std::vector &image_shape, + const DataType dt, + void **result) const = 0; virtual void Delete(void *data) const = 0; virtual void DeleteImage(void *data) const = 0; virtual void *Map(void *buffer, size_t offset, size_t nbytes) const = 0; @@ -53,44 +55,54 @@ class Allocator { std::vector *mapped_image_pitch) const = 0; virtual void Unmap(void *buffer, void *mapper_ptr) const = 0; virtual bool OnHost() const = 0; - - template - T *New(size_t num_elements) { - if (num_elements > (std::numeric_limits::max() / sizeof(T))) { - return nullptr; - } - void *p = New(sizeof(T) * num_elements); - T *typed_p = reinterpret_cast(p); - return typed_p; - } }; class CPUAllocator : public Allocator { public: ~CPUAllocator() override {} - void *New(size_t nbytes) const override { + MaceStatus New(size_t nbytes, void **result) const override { VLOG(3) << "Allocate CPU buffer: " << nbytes; + if (nbytes == 0) { + return MaceStatus::MACE_SUCCESS; + } void *data = nullptr; #if defined(__ANDROID__) || defined(__hexagon__) data = memalign(kMaceAlignment, nbytes); + if (data == NULL) { + LOG(WARNING) << "Allocate CPU Buffer with " + << nbytes << " bytes failed because of" + << strerror(errno); + *result = nullptr; + return MaceStatus::MACE_OUT_OF_RESOURCES; + } #else - MACE_CHECK(posix_memalign(&data, kMaceAlignment, nbytes) == 0); + int ret = posix_memalign(&data, kMaceAlignment, nbytes); + if (ret != 0) { + LOG(WARNING) << "Allocate CPU Buffer with " + << nbytes << " bytes failed because of" + << strerror(errno); + *result = nullptr; + return MaceStatus::MACE_OUT_OF_RESOURCES; + } #endif - MACE_CHECK_NOTNULL(data); // TODO(heliangliang) This should be avoided sometimes memset(data, 0, nbytes); - return data; + *result = data; + return MaceStatus::MACE_SUCCESS; } - void *NewImage(const std::vector &shape, - const DataType dt) const override { + MaceStatus NewImage(const std::vector &shape, + const DataType dt, + void **result) const override { MACE_UNUSED(shape); MACE_UNUSED(dt); + MACE_UNUSED(result); LOG(FATAL) << "Allocate CPU image"; - return nullptr; + return MaceStatus::MACE_SUCCESS; } void Delete(void *data) const override { + MACE_CHECK_NOTNULL(data); VLOG(3) << "Free CPU buffer"; free(data); } diff --git a/mace/core/buffer.h b/mace/core/buffer.h index f4b252a7..afac3f29 100644 --- a/mace/core/buffer.h +++ b/mace/core/buffer.h @@ -38,6 +38,11 @@ class BufferBase { virtual void *raw_mutable_data() = 0; + virtual MaceStatus Allocate(index_t nbytes) = 0; + + virtual MaceStatus Allocate(const std::vector &shape, + DataType data_type) = 0; + virtual void *Map(index_t offset, index_t length, std::vector *pitch) const = 0; @@ -48,7 +53,7 @@ class BufferBase { virtual void UnMap() = 0; - virtual void Resize(index_t size) = 0; + virtual MaceStatus Resize(index_t nbytes) = 0; virtual void Copy(void *src, index_t offset, index_t length) = 0; @@ -83,14 +88,6 @@ class Buffer : public BufferBase { mapped_buf_(nullptr), is_data_owner_(true) {} - Buffer(Allocator *allocator, index_t size) - : BufferBase(size), - allocator_(allocator), - mapped_buf_(nullptr), - is_data_owner_(true) { - buf_ = allocator->New(size); - } - Buffer(Allocator *allocator, void *data, index_t size) : BufferBase(size), allocator_(allocator), @@ -132,6 +129,31 @@ class Buffer : public BufferBase { } } + MaceStatus Allocate(index_t nbytes) { + if (nbytes <= 0) { + return MaceStatus::MACE_SUCCESS; + } + MACE_CHECK(is_data_owner_, + "data is not owned by this buffer, cannot reallocate"); + if (mapped_buf_ != nullptr) { + UnMap(); + } + if (buf_ != nullptr) { + allocator_->Delete(buf_); + } + size_ = nbytes; + return allocator_->New(nbytes, &buf_); + } + + MaceStatus Allocate(const std::vector &shape, + DataType data_type) { + if (shape.empty()) return MaceStatus::MACE_SUCCESS; + index_t nbytes = std::accumulate(shape.begin(), shape.end(), + 1, std::multiplies()) + * GetEnumTypeSize(data_type); + return this->Allocate(nbytes); + } + void *Map(index_t offset, index_t length, std::vector *pitch) const { MACE_CHECK_NOTNULL(buf_); MACE_UNUSED(pitch); @@ -154,16 +176,17 @@ class Buffer : public BufferBase { mapped_buf_ = nullptr; } - void Resize(index_t size) { + MaceStatus Resize(index_t nbytes) { MACE_CHECK(is_data_owner_, "data is not owned by this buffer, cannot resize"); - if (size != size_) { + if (nbytes != size_) { if (buf_ != nullptr) { allocator_->Delete(buf_); } - size_ = size; - buf_ = allocator_->New(size); + size_ = nbytes; + return allocator_->New(nbytes, &buf_); } + return MaceStatus::MACE_SUCCESS; } void Copy(void *src, index_t offset, index_t length) { @@ -195,18 +218,6 @@ class Image : public BufferBase { buf_(nullptr), mapped_buf_(nullptr) {} - Image(std::vector shape, DataType data_type) - : BufferBase( - std::accumulate( - shape.begin(), shape.end(), 1, std::multiplies()) * - GetEnumTypeSize(data_type)), - allocator_(GetDeviceAllocator(GPU)), - mapped_buf_(nullptr) { - shape_ = shape; - data_type_ = data_type; - buf_ = allocator_->NewImage(shape, data_type); - } - virtual ~Image() { if (mapped_buf_ != nullptr) { UnMap(); @@ -233,6 +244,29 @@ class Image : public BufferBase { std::vector image_shape() const { return shape_; } + MaceStatus Allocate(index_t nbytes) { + MACE_UNUSED(nbytes); + LOG(FATAL) << "Image should not call this allocate function"; + return MaceStatus::MACE_SUCCESS; + } + + MaceStatus Allocate(const std::vector &shape, + DataType data_type) { + index_t size = std::accumulate( + shape.begin(), shape.end(), 1, std::multiplies()) * + GetEnumTypeSize(data_type); + if (mapped_buf_ != nullptr) { + UnMap(); + } + if (buf_ != nullptr) { + allocator_->DeleteImage(buf_); + } + size_ = size; + shape_ = shape; + data_type_ = data_type; + return allocator_->NewImage(shape, data_type, &buf_); + } + void *Map(index_t offset, index_t length, std::vector *pitch) const { MACE_UNUSED(offset); MACE_UNUSED(length); @@ -259,9 +293,10 @@ class Image : public BufferBase { mapped_buf_ = nullptr; } - void Resize(index_t size) { + MaceStatus Resize(index_t size) { MACE_UNUSED(size); MACE_NOT_IMPLEMENTED; + return MaceStatus::MACE_SUCCESS; } void Copy(void *src, index_t offset, index_t length) { @@ -339,6 +374,20 @@ class BufferSlice : public BufferBase { } } + MaceStatus Allocate(index_t size) { + MACE_UNUSED(size); + LOG(FATAL) << "BufferSlice should not call allocate function"; + return MaceStatus::MACE_SUCCESS; + } + + MaceStatus Allocate(const std::vector &shape, + DataType data_type) { + MACE_UNUSED(shape); + MACE_UNUSED(data_type); + LOG(FATAL) << "BufferSlice should not call allocate function"; + return MaceStatus::MACE_SUCCESS; + } + void *Map(index_t offset, index_t length, std::vector *pitch) const { MACE_UNUSED(offset); MACE_UNUSED(length); @@ -364,9 +413,10 @@ class BufferSlice : public BufferBase { mapped_buf_ = nullptr; } - void Resize(index_t size) { + MaceStatus Resize(index_t size) { MACE_CHECK(size == size_, "resize buffer slice from ", size_, " to ", size, " is illegal"); + return MaceStatus::MACE_SUCCESS; } void Copy(void *src, index_t offset, index_t length) { @@ -396,20 +446,17 @@ class ScratchBuffer: public Buffer { : Buffer(allocator), offset_(0) {} - ScratchBuffer(Allocator *allocator, index_t size) - : Buffer(allocator, size), - offset_(0) {} - ScratchBuffer(Allocator *allocator, void *data, index_t size) : Buffer(allocator, data, size), offset_(0) {} virtual ~ScratchBuffer() {} - void GrowSize(index_t size) { + MaceStatus GrowSize(index_t size) { if (size > size_) { - Resize(size); + return Resize(size); } + return MaceStatus::MACE_SUCCESS; } BufferSlice Scratch(index_t size) { diff --git a/mace/core/mace.cc b/mace/core/mace.cc index dc9cbaa9..83ab4bd1 100644 --- a/mace/core/mace.cc +++ b/mace/core/mace.cc @@ -82,12 +82,14 @@ std::shared_ptr MaceTensor::data() { return impl_->data; } // Mace Engine class MaceEngine::Impl { public: - explicit Impl(const NetDef *net_def, - DeviceType device_type, - const std::vector &input_nodes, - const std::vector &output_nodes); + explicit Impl(DeviceType device_type); + ~Impl(); + MaceStatus Init(const NetDef *net_def, + const std::vector &input_nodes, + const std::vector &output_nodes); + MaceStatus Run(const std::map &inputs, std::map *outputs, RunMetadata *run_metadata); @@ -104,10 +106,8 @@ class MaceEngine::Impl { DISABLE_COPY_AND_ASSIGN(Impl); }; -MaceEngine::Impl::Impl(const NetDef *net_def, - DeviceType device_type, - const std::vector &input_nodes, - const std::vector &output_nodes) + +MaceEngine::Impl::Impl(DeviceType device_type) : op_registry_(new OperatorRegistry()), device_type_(device_type), ws_(new Workspace()), @@ -115,7 +115,12 @@ MaceEngine::Impl::Impl(const NetDef *net_def, #ifdef MACE_ENABLE_HEXAGON , hexagon_controller_(nullptr) #endif -{ +{} + +MaceStatus MaceEngine::Impl::Init( + const NetDef *net_def, + const std::vector &input_nodes, + const std::vector &output_nodes) { LOG(INFO) << "MACE version: " << MaceVersion(); // Set storage path for internal usage for (auto input_name : input_nodes) { @@ -127,7 +132,7 @@ MaceEngine::Impl::Impl(const NetDef *net_def, GetDeviceAllocator(device_type_), DT_FLOAT); } #ifdef MACE_ENABLE_HEXAGON - if (device_type == HEXAGON) { + if (device_type_ == HEXAGON) { hexagon_controller_.reset(new HexagonControlWrapper()); MACE_CHECK(hexagon_controller_->Config(), "hexagon config error"); MACE_CHECK(hexagon_controller_->Init(), "hexagon init error"); @@ -143,18 +148,22 @@ MaceEngine::Impl::Impl(const NetDef *net_def, } } else { #endif - ws_->LoadModelTensor(*net_def, device_type); + MaceStatus status = ws_->LoadModelTensor(*net_def, device_type_); + if (status != MaceStatus::MACE_SUCCESS) { + return status; + } - // Init model - auto net = CreateNet(op_registry_, *net_def, ws_.get(), device_type, + // Init model + auto net = CreateNet(op_registry_, *net_def, ws_.get(), device_type_, NetMode::INIT); if (!net->Run()) { LOG(FATAL) << "Net init run failed"; } - net_ = CreateNet(op_registry_, *net_def, ws_.get(), device_type); + net_ = CreateNet(op_registry_, *net_def, ws_.get(), device_type_); #ifdef MACE_ENABLE_HEXAGON } #endif + return MaceStatus::MACE_SUCCESS; } MaceEngine::Impl::~Impl() { @@ -244,16 +253,17 @@ MaceStatus MaceEngine::Impl::Run( return MACE_SUCCESS; } -MaceEngine::MaceEngine(const NetDef *net_def, - DeviceType device_type, - const std::vector &input_nodes, - const std::vector &output_nodes) { - impl_ = std::unique_ptr( - new MaceEngine::Impl(net_def, device_type, input_nodes, output_nodes)); -} +MaceEngine::MaceEngine(DeviceType device_type): + impl_(new MaceEngine::Impl(device_type)) {} MaceEngine::~MaceEngine() = default; +MaceStatus MaceEngine::Init(const NetDef *net_def, + const std::vector &input_nodes, + const std::vector &output_nodes) { + return impl_->Init(net_def, input_nodes, output_nodes); +} + MaceStatus MaceEngine::Run(const std::map &inputs, std::map *outputs, RunMetadata *run_metadata) { diff --git a/mace/core/runtime/opencl/opencl_allocator.cc b/mace/core/runtime/opencl/opencl_allocator.cc index 9b94eb92..96b083cf 100644 --- a/mace/core/runtime/opencl/opencl_allocator.cc +++ b/mace/core/runtime/opencl/opencl_allocator.cc @@ -44,18 +44,30 @@ static cl_channel_type DataTypeToCLChannelType(const DataType t) { OpenCLAllocator::OpenCLAllocator() {} OpenCLAllocator::~OpenCLAllocator() {} -void *OpenCLAllocator::New(size_t nbytes) const { +MaceStatus OpenCLAllocator::New(size_t nbytes, void **result) const { + if (nbytes == 0) { + return MaceStatus::MACE_SUCCESS; + } VLOG(3) << "Allocate OpenCL buffer: " << nbytes; cl_int error; cl::Buffer *buffer = new cl::Buffer(OpenCLRuntime::Global()->context(), CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, nbytes, nullptr, &error); - MACE_CHECK_CL_SUCCESS(error); - return static_cast(buffer); + if (error != CL_SUCCESS) { + LOG(WARNING) << "Allocate OpenCL Buffer with " + << nbytes << " bytes failed because of" + << OpenCLErrorToString(error); + *result = nullptr; + return MaceStatus::MACE_OUT_OF_RESOURCES; + } else { + *result = buffer; + return MaceStatus::MACE_SUCCESS; + } } -void *OpenCLAllocator::NewImage(const std::vector &image_shape, - const DataType dt) const { +MaceStatus OpenCLAllocator::NewImage(const std::vector &image_shape, + const DataType dt, + void **result) const { MACE_CHECK(image_shape.size() == 2) << "Image shape's size must equal 2"; VLOG(3) << "Allocate OpenCL image: " << image_shape[0] << ", " << image_shape[1]; @@ -67,11 +79,17 @@ void *OpenCLAllocator::NewImage(const std::vector &image_shape, new cl::Image2D(OpenCLRuntime::Global()->context(), CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, img_format, image_shape[0], image_shape[1], 0, nullptr, &error); - MACE_CHECK_CL_SUCCESS(error) << " with image shape: [" - << image_shape[0] << ", " << image_shape[1] - << "]"; - - return cl_image; + if (error != CL_SUCCESS) { + LOG(WARNING) << "Allocate OpenCL image with shape: [" + << image_shape[0] << ", " << image_shape[1] + << "] failed because of" + << OpenCLErrorToString(error); + *result = nullptr; + return MaceStatus::MACE_OUT_OF_RESOURCES; + } else { + *result = cl_image; + return MaceStatus::MACE_SUCCESS; + } } void OpenCLAllocator::Delete(void *buffer) const { diff --git a/mace/core/runtime/opencl/opencl_allocator.h b/mace/core/runtime/opencl/opencl_allocator.h index 0ec50f61..6304add8 100644 --- a/mace/core/runtime/opencl/opencl_allocator.h +++ b/mace/core/runtime/opencl/opencl_allocator.h @@ -27,15 +27,16 @@ class OpenCLAllocator : public Allocator { ~OpenCLAllocator() override; - void *New(size_t nbytes) const override; + MaceStatus New(size_t nbytes, void **result) const override; /* * Use Image2D with RGBA (128-bit) format to represent the image. * * @ shape : [depth, ..., height, width ]. */ - void *NewImage(const std::vector &image_shape, - const DataType dt) const override; + MaceStatus NewImage(const std::vector &image_shape, + const DataType dt, + void **result) const override; void Delete(void *buffer) const override; diff --git a/mace/core/tensor.h b/mace/core/tensor.h index 87a6cb3c..8c9f3e3b 100644 --- a/mace/core/tensor.h +++ b/mace/core/tensor.h @@ -216,16 +216,19 @@ class Tensor { MACE_CHECK(raw_size() <= buffer_->size()); } - inline void Resize(const std::vector &shape) { + inline MaceStatus Resize(const std::vector &shape) { shape_ = shape; image_shape_.clear(); if (buffer_ != nullptr) { MACE_CHECK(!has_opencl_image(), "Cannot resize image, use ResizeImage."); - if (raw_size() + EXTRA_BUFFER_PAD_SIZE > buffer_->size()) - buffer_->Resize(raw_size() + EXTRA_BUFFER_PAD_SIZE); + if (raw_size() + EXTRA_BUFFER_PAD_SIZE > buffer_->size()) { + return buffer_->Resize(raw_size() + EXTRA_BUFFER_PAD_SIZE); + } + return MaceStatus::MACE_SUCCESS; } else { MACE_CHECK(is_buffer_owner_); - buffer_ = new Buffer(allocator_, raw_size() + EXTRA_BUFFER_PAD_SIZE); + buffer_ = new Buffer(allocator_); + return buffer_->Allocate(raw_size() + EXTRA_BUFFER_PAD_SIZE); } } @@ -241,13 +244,14 @@ class Tensor { is_buffer_owner_ = false; } - inline void ResizeImage(const std::vector &shape, - const std::vector &image_shape) { + inline MaceStatus ResizeImage(const std::vector &shape, + const std::vector &image_shape) { shape_ = shape; image_shape_ = image_shape; if (buffer_ == nullptr) { MACE_CHECK(is_buffer_owner_); - buffer_ = new Image(image_shape, dtype_); + buffer_ = new Image(); + return buffer_->Allocate(image_shape, dtype_); } else { MACE_CHECK(has_opencl_image(), "Cannot ResizeImage buffer, use Resize."); Image *image = dynamic_cast(buffer_); @@ -257,24 +261,27 @@ class Tensor { "): current physical image shape: ", image->image_shape()[0], ", ", image->image_shape()[1], " < logical image shape: ", image_shape[0], ", ", image_shape[1]); + return MaceStatus::MACE_SUCCESS; } } - inline void ResizeLike(const Tensor &other) { ResizeLike(&other); } + inline MaceStatus ResizeLike(const Tensor &other) { + return ResizeLike(&other); + } - inline void ResizeLike(const Tensor *other) { + inline MaceStatus ResizeLike(const Tensor *other) { if (other->has_opencl_image()) { if (is_buffer_owner_ && buffer_ != nullptr && !has_opencl_image()) { delete buffer_; buffer_ = nullptr; } - ResizeImage(other->shape(), other->image_shape_); + return ResizeImage(other->shape(), other->image_shape_); } else { if (is_buffer_owner_ && buffer_ != nullptr && has_opencl_image()) { delete buffer_; buffer_ = nullptr; } - Resize(other->shape()); + return Resize(other->shape()); } } diff --git a/mace/core/workspace.cc b/mace/core/workspace.cc index 46ae0b48..1f653a45 100644 --- a/mace/core/workspace.cc +++ b/mace/core/workspace.cc @@ -60,7 +60,7 @@ std::vector Workspace::Tensors() const { return names; } -void Workspace::LoadModelTensor(const NetDef &net_def, DeviceType type) { +MaceStatus Workspace::LoadModelTensor(const NetDef &net_def, DeviceType type) { MACE_LATENCY_LOGGER(1, "Load model tensors"); index_t model_data_size = 0; unsigned char *model_data_ptr = nullptr; @@ -89,7 +89,11 @@ void Workspace::LoadModelTensor(const NetDef &net_def, DeviceType type) { model_data_size)); } else { tensor_buffer_ = std::unique_ptr( - new Buffer(GetDeviceAllocator(type), model_data_size)); + new Buffer(GetDeviceAllocator(type))); + MaceStatus status = tensor_buffer_->Allocate(model_data_size); + if (status != MaceStatus::MACE_SUCCESS) { + return status; + } tensor_buffer_->Map(nullptr); tensor_buffer_->Copy(model_data_ptr, 0, model_data_size); tensor_buffer_->UnMap(); @@ -120,14 +124,16 @@ void Workspace::LoadModelTensor(const NetDef &net_def, DeviceType type) { } if (type == DeviceType::CPU || type == DeviceType::GPU) { - CreateOutputTensorBuffer(net_def, type); + MaceStatus status = CreateOutputTensorBuffer(net_def, type); + if (status != MaceStatus::MACE_SUCCESS) return status; } + return MaceStatus::MACE_SUCCESS; } -void Workspace::CreateOutputTensorBuffer(const NetDef &net_def, - DeviceType device_type) { +MaceStatus Workspace::CreateOutputTensorBuffer(const NetDef &net_def, + DeviceType device_type) { if (!net_def.has_mem_arena() || net_def.mem_arena().mem_block_size() == 0) { - return; + return MaceStatus::MACE_SUCCESS; } DataType dtype = DataType::DT_INVALID; @@ -157,14 +163,24 @@ void Workspace::CreateOutputTensorBuffer(const NetDef &net_def, // TODO(liuqi): refactor based on PB if (mem_block.mem_id() >= 20000) { std::unique_ptr image_buf( - new Image({mem_block.x(), mem_block.y()}, dtype)); + new Image()); + MaceStatus status = image_buf->Allocate( + {mem_block.x(), mem_block.y()}, dtype); + if (status != MaceStatus::MACE_SUCCESS) { + return status; + } preallocated_allocator_.SetBuffer(mem_block.mem_id(), std::move(image_buf)); } } else { if (mem_block.mem_id() < 20000) { std::unique_ptr tensor_buf( - new Buffer(GetDeviceAllocator(device_type), mem_block.x())); + new Buffer(GetDeviceAllocator(device_type))); + MaceStatus status = tensor_buf->Allocate( + mem_block.x() * GetEnumTypeSize(dtype)); + if (status != MaceStatus::MACE_SUCCESS) { + return status; + } preallocated_allocator_.SetBuffer(mem_block.mem_id(), std::move(tensor_buf)); } @@ -201,6 +217,7 @@ void Workspace::CreateOutputTensorBuffer(const NetDef &net_def, } } } + return MaceStatus::MACE_SUCCESS; } ScratchBuffer *Workspace::GetScratchBuffer(DeviceType device_type) { diff --git a/mace/core/workspace.h b/mace/core/workspace.h index e9e11ea3..06236fb9 100644 --- a/mace/core/workspace.h +++ b/mace/core/workspace.h @@ -47,12 +47,13 @@ class Workspace { std::vector Tensors() const; - void LoadModelTensor(const NetDef &net_def, DeviceType type); + MaceStatus LoadModelTensor(const NetDef &net_def, DeviceType type); ScratchBuffer *GetScratchBuffer(DeviceType device_type); private: - void CreateOutputTensorBuffer(const NetDef &net_def, DeviceType device_type); + MaceStatus CreateOutputTensorBuffer(const NetDef &net_def, + DeviceType device_type); TensorMap tensor_map_; diff --git a/mace/kernels/conv_2d.h b/mace/kernels/conv_2d.h index 7f825828..bf8dc705 100644 --- a/mace/kernels/conv_2d.h +++ b/mace/kernels/conv_2d.h @@ -99,7 +99,6 @@ struct Conv2dFunctor : Conv2dFunctorBase { #pragma omp parallel for collapse(2) for (index_t b = 0; b < in_shape[0]; b++) { for (index_t m = 0; m < filter_shape[0]; m += 4) { - const index_t in_height = in_shape[2]; const index_t in_width = in_shape[3]; const index_t out_height = out_shape[2]; const index_t out_width = out_shape[3]; @@ -322,8 +321,6 @@ struct Conv2dFunctor : Conv2dFunctorBase { index_t dilation_h = dilations_[0]; index_t dilation_w = dilations_[1]; - const index_t filter_hw[2] = {filter_h, filter_w}; - MACE_CHECK(batch == input_batch, "Input/Output batch size mismatch"); index_t padded_input_height = input_height + paddings[0]; diff --git a/mace/kernels/opencl/activation.cc b/mace/kernels/opencl/activation.cc index 5cee4862..6b556966 100644 --- a/mace/kernels/opencl/activation.cc +++ b/mace/kernels/opencl/activation.cc @@ -45,7 +45,8 @@ void ActivationFunctor::operator()(const Tensor *input, if (runtime->IsOutOfRangeCheckEnabled()) { built_options.emplace("-DOUT_OF_RANGE_CHECK"); kernel_error_ = std::move(std::unique_ptr( - new Buffer(GetDeviceAllocator(DeviceType::GPU), 1))); + new Buffer(GetDeviceAllocator(DeviceType::GPU)))); + kernel_error_->Allocate(1); kernel_error_->Map(nullptr); *(kernel_error_->mutable_data()) = 0; kernel_error_->UnMap(); diff --git a/mace/kernels/opencl/addn.cc b/mace/kernels/opencl/addn.cc index 4587a2cb..c47213f5 100644 --- a/mace/kernels/opencl/addn.cc +++ b/mace/kernels/opencl/addn.cc @@ -58,7 +58,8 @@ void AddNFunctor::operator()( if (runtime->IsOutOfRangeCheckEnabled()) { built_options.emplace("-DOUT_OF_RANGE_CHECK"); kernel_error_ = std::move(std::unique_ptr( - new Buffer(GetDeviceAllocator(DeviceType::GPU), 1))); + new Buffer(GetDeviceAllocator(DeviceType::GPU)))); + kernel_error_->Allocate(1); kernel_error_->Map(nullptr); *(kernel_error_->mutable_data()) = 0; kernel_error_->UnMap(); diff --git a/mace/kernels/opencl/batch_norm.cc b/mace/kernels/opencl/batch_norm.cc index f28c9ccc..80fafdbc 100644 --- a/mace/kernels/opencl/batch_norm.cc +++ b/mace/kernels/opencl/batch_norm.cc @@ -56,7 +56,8 @@ void BatchNormFunctor::operator()(const Tensor *input, if (runtime->IsOutOfRangeCheckEnabled()) { built_options.emplace("-DOUT_OF_RANGE_CHECK"); kernel_error_ = std::move(std::unique_ptr( - new Buffer(GetDeviceAllocator(DeviceType::GPU), 1))); + new Buffer(GetDeviceAllocator(DeviceType::GPU)))); + kernel_error_->Allocate(1); kernel_error_->Map(nullptr); *(kernel_error_->mutable_data()) = 0; kernel_error_->UnMap(); diff --git a/mace/kernels/opencl/bias_add.cc b/mace/kernels/opencl/bias_add.cc index 05b9d169..e50dcf58 100644 --- a/mace/kernels/opencl/bias_add.cc +++ b/mace/kernels/opencl/bias_add.cc @@ -49,7 +49,8 @@ void BiasAddFunctor::operator()(const Tensor *input, if (runtime->IsOutOfRangeCheckEnabled()) { built_options.emplace("-DOUT_OF_RANGE_CHECK"); kernel_error_ = std::move(std::unique_ptr( - new Buffer(GetDeviceAllocator(DeviceType::GPU), 1))); + new Buffer(GetDeviceAllocator(DeviceType::GPU)))); + kernel_error_->Allocate(1); kernel_error_->Map(nullptr); *(kernel_error_->mutable_data()) = 0; kernel_error_->UnMap(); diff --git a/mace/kernels/opencl/buffer_to_image.cc b/mace/kernels/opencl/buffer_to_image.cc index 1bce914c..bf629e37 100644 --- a/mace/kernels/opencl/buffer_to_image.cc +++ b/mace/kernels/opencl/buffer_to_image.cc @@ -93,7 +93,8 @@ void BufferToImageFunctor::operator()( built_options.emplace("-DOUT_OF_RANGE_CHECK"); if (!kernel_error_) { kernel_error_ = std::move(std::unique_ptr( - new Buffer(GetDeviceAllocator(DeviceType::GPU), 1))); + new Buffer(GetDeviceAllocator(DeviceType::GPU)))); + kernel_error_->Allocate(1); kernel_error_->Map(nullptr); *(kernel_error_->mutable_data()) = 0; kernel_error_->UnMap(); diff --git a/mace/kernels/opencl/channel_shuffle.cc b/mace/kernels/opencl/channel_shuffle.cc index 7cb08254..d16a3d8a 100644 --- a/mace/kernels/opencl/channel_shuffle.cc +++ b/mace/kernels/opencl/channel_shuffle.cc @@ -56,7 +56,8 @@ void ChannelShuffleFunctor::operator()( if (runtime->IsOutOfRangeCheckEnabled()) { built_options.emplace("-DOUT_OF_RANGE_CHECK"); kernel_error_ = std::move(std::unique_ptr( - new Buffer(GetDeviceAllocator(DeviceType::GPU), 1))); + new Buffer(GetDeviceAllocator(DeviceType::GPU)))); + kernel_error_->Allocate(1); kernel_error_->Map(nullptr); *(kernel_error_->mutable_data()) = 0; kernel_error_->UnMap(); diff --git a/mace/kernels/opencl/concat.cc b/mace/kernels/opencl/concat.cc index 96c15fd8..23904100 100644 --- a/mace/kernels/opencl/concat.cc +++ b/mace/kernels/opencl/concat.cc @@ -67,7 +67,8 @@ static void Concat2(cl::Kernel *kernel, if (runtime->IsOutOfRangeCheckEnabled()) { built_options.emplace("-DOUT_OF_RANGE_CHECK"); *kernel_error = std::move(std::unique_ptr( - new Buffer(GetDeviceAllocator(DeviceType::GPU), 1))); + new Buffer(GetDeviceAllocator(DeviceType::GPU)))); + (*kernel_error)->Allocate(1); (*kernel_error)->Map(nullptr); *((*kernel_error)->mutable_data()) = 0; (*kernel_error)->UnMap(); @@ -148,7 +149,8 @@ static void ConcatN(cl::Kernel *kernel, if (runtime->IsOutOfRangeCheckEnabled()) { built_options.emplace("-DOUT_OF_RANGE_CHECK"); *kernel_error = std::move(std::unique_ptr( - new Buffer(GetDeviceAllocator(DeviceType::GPU), 1))); + new Buffer(GetDeviceAllocator(DeviceType::GPU)))); + (*kernel_error)->Allocate(1); (*kernel_error)->Map(nullptr); *((*kernel_error)->mutable_data()) = 0; (*kernel_error)->UnMap(); diff --git a/mace/kernels/opencl/conv_2d_1x1.cc b/mace/kernels/opencl/conv_2d_1x1.cc index 52ed0368..5b79ea66 100644 --- a/mace/kernels/opencl/conv_2d_1x1.cc +++ b/mace/kernels/opencl/conv_2d_1x1.cc @@ -100,7 +100,8 @@ extern void Conv2dOpenclK1x1(cl::Kernel *kernel, if (runtime->IsOutOfRangeCheckEnabled()) { built_options.emplace("-DOUT_OF_RANGE_CHECK"); *kernel_error = std::move(std::unique_ptr( - new Buffer(GetDeviceAllocator(DeviceType::GPU), 1))); + new Buffer(GetDeviceAllocator(DeviceType::GPU)))); + (*kernel_error)->Allocate(1); (*kernel_error)->Map(nullptr); *((*kernel_error)->mutable_data()) = 0; (*kernel_error)->UnMap(); diff --git a/mace/kernels/opencl/conv_2d_3x3.cc b/mace/kernels/opencl/conv_2d_3x3.cc index f5600883..5386c417 100644 --- a/mace/kernels/opencl/conv_2d_3x3.cc +++ b/mace/kernels/opencl/conv_2d_3x3.cc @@ -86,7 +86,8 @@ extern void Conv2dOpenclK3x3(cl::Kernel *kernel, if (runtime->IsOutOfRangeCheckEnabled()) { built_options.emplace("-DOUT_OF_RANGE_CHECK"); *kernel_error = std::move(std::unique_ptr( - new Buffer(GetDeviceAllocator(DeviceType::GPU), 1))); + new Buffer(GetDeviceAllocator(DeviceType::GPU)))); + (*kernel_error)->Allocate(1); (*kernel_error)->Map(nullptr); *((*kernel_error)->mutable_data()) = 0; (*kernel_error)->UnMap(); diff --git a/mace/kernels/opencl/conv_2d_general.cc b/mace/kernels/opencl/conv_2d_general.cc index 2329984a..e44d8981 100644 --- a/mace/kernels/opencl/conv_2d_general.cc +++ b/mace/kernels/opencl/conv_2d_general.cc @@ -94,7 +94,8 @@ extern void Conv2dOpencl(cl::Kernel *kernel, if (runtime->IsOutOfRangeCheckEnabled()) { built_options.emplace("-DOUT_OF_RANGE_CHECK"); *kernel_error = std::move(std::unique_ptr( - new Buffer(GetDeviceAllocator(DeviceType::GPU), 1))); + new Buffer(GetDeviceAllocator(DeviceType::GPU)))); + (*kernel_error)->Allocate(1); (*kernel_error)->Map(nullptr); *((*kernel_error)->mutable_data()) = 0; (*kernel_error)->UnMap(); diff --git a/mace/kernels/opencl/deconv_2d_opencl.cc b/mace/kernels/opencl/deconv_2d_opencl.cc index bbcbec6c..abb4b43e 100644 --- a/mace/kernels/opencl/deconv_2d_opencl.cc +++ b/mace/kernels/opencl/deconv_2d_opencl.cc @@ -65,7 +65,8 @@ void Deconv2dOpencl(cl::Kernel *kernel, if (runtime->IsOutOfRangeCheckEnabled()) { built_options.emplace("-DOUT_OF_RANGE_CHECK"); *kernel_error = std::move(std::unique_ptr( - new Buffer(GetDeviceAllocator(DeviceType::GPU), 1))); + new Buffer(GetDeviceAllocator(DeviceType::GPU)))); + (*kernel_error)->Allocate(1); (*kernel_error)->Map(nullptr); *((*kernel_error)->mutable_data()) = 0; (*kernel_error)->UnMap(); diff --git a/mace/kernels/opencl/depth_to_space.cc b/mace/kernels/opencl/depth_to_space.cc index fd25f948..609ad205 100644 --- a/mace/kernels/opencl/depth_to_space.cc +++ b/mace/kernels/opencl/depth_to_space.cc @@ -86,7 +86,8 @@ void DepthToSpaceOpFunctor::operator()( if (runtime->IsOutOfRangeCheckEnabled()) { built_options.emplace("-DOUT_OF_RANGE_CHECK"); kernel_error_ = std::move(std::unique_ptr( - new Buffer(GetDeviceAllocator(DeviceType::GPU), 1))); + new Buffer(GetDeviceAllocator(DeviceType::GPU)))); + kernel_error_->Allocate(1); kernel_error_->Map(nullptr); *(kernel_error_->mutable_data()) = 0; kernel_error_->UnMap(); diff --git a/mace/kernels/opencl/depthwise_conv.cc b/mace/kernels/opencl/depthwise_conv.cc index 43a24e66..c7800d0a 100644 --- a/mace/kernels/opencl/depthwise_conv.cc +++ b/mace/kernels/opencl/depthwise_conv.cc @@ -97,7 +97,8 @@ static void DepthwiseConv2d(cl::Kernel *kernel, if (runtime->IsOutOfRangeCheckEnabled()) { built_options.emplace("-DOUT_OF_RANGE_CHECK"); *kernel_error = std::move(std::unique_ptr( - new Buffer(GetDeviceAllocator(DeviceType::GPU), 1))); + new Buffer(GetDeviceAllocator(DeviceType::GPU)))); + (*kernel_error)->Allocate(1); (*kernel_error)->Map(nullptr); *((*kernel_error)->mutable_data()) = 0; (*kernel_error)->UnMap(); diff --git a/mace/kernels/opencl/eltwise.cc b/mace/kernels/opencl/eltwise.cc index 94b4c322..4f059046 100644 --- a/mace/kernels/opencl/eltwise.cc +++ b/mace/kernels/opencl/eltwise.cc @@ -97,7 +97,8 @@ void EltwiseFunctor::operator()(const Tensor *input0, if (runtime->IsOutOfRangeCheckEnabled()) { built_options.emplace("-DOUT_OF_RANGE_CHECK"); kernel_error_ = std::move(std::unique_ptr( - new Buffer(GetDeviceAllocator(DeviceType::GPU), 1))); + new Buffer(GetDeviceAllocator(DeviceType::GPU)))); + kernel_error_->Allocate(1); kernel_error_->Map(nullptr); *(kernel_error_->mutable_data()) = 0; kernel_error_->UnMap(); diff --git a/mace/kernels/opencl/fully_connected.cc b/mace/kernels/opencl/fully_connected.cc index 0022b923..6e0678da 100644 --- a/mace/kernels/opencl/fully_connected.cc +++ b/mace/kernels/opencl/fully_connected.cc @@ -74,7 +74,8 @@ void FCWXKernel(cl::Kernel *kernel, if (runtime->IsOutOfRangeCheckEnabled()) { built_options.emplace("-DOUT_OF_RANGE_CHECK"); *kernel_error = std::move(std::unique_ptr( - new Buffer(GetDeviceAllocator(DeviceType::GPU), 1))); + new Buffer(GetDeviceAllocator(DeviceType::GPU)))); + (*kernel_error)->Allocate(1); (*kernel_error)->Map(nullptr); *((*kernel_error)->mutable_data()) = 0; (*kernel_error)->UnMap(); @@ -200,7 +201,8 @@ void FCWTXKernel(cl::Kernel *kernel, if (runtime->IsOutOfRangeCheckEnabled()) { built_options.emplace("-DOUT_OF_RANGE_CHECK"); *kernel_error = std::move(std::unique_ptr( - new Buffer(GetDeviceAllocator(DeviceType::GPU), 1))); + new Buffer(GetDeviceAllocator(DeviceType::GPU)))); + (*kernel_error)->Allocate(1); (*kernel_error)->Map(nullptr); *((*kernel_error)->mutable_data()) = 0; (*kernel_error)->UnMap(); diff --git a/mace/kernels/opencl/image_to_buffer.cc b/mace/kernels/opencl/image_to_buffer.cc index 09b040dd..1cefff9e 100644 --- a/mace/kernels/opencl/image_to_buffer.cc +++ b/mace/kernels/opencl/image_to_buffer.cc @@ -86,7 +86,8 @@ void ImageToBufferFunctor::operator()( built_options.emplace("-DOUT_OF_RANGE_CHECK"); if (!kernel_error_) { kernel_error_ = std::move(std::unique_ptr( - new Buffer(GetDeviceAllocator(DeviceType::GPU), 1))); + new Buffer(GetDeviceAllocator(DeviceType::GPU)))); + kernel_error_->Allocate(1); kernel_error_->Map(nullptr); *(kernel_error_->mutable_data()) = 0; kernel_error_->UnMap(); diff --git a/mace/kernels/opencl/matmul.cc b/mace/kernels/opencl/matmul.cc index 9a16694a..cc63ed04 100644 --- a/mace/kernels/opencl/matmul.cc +++ b/mace/kernels/opencl/matmul.cc @@ -54,7 +54,8 @@ void MatMulFunctor::operator()(const Tensor *A, if (runtime->IsOutOfRangeCheckEnabled()) { built_options.emplace("-DOUT_OF_RANGE_CHECK"); kernel_error_ = std::move(std::unique_ptr( - new Buffer(GetDeviceAllocator(DeviceType::GPU), 1))); + new Buffer(GetDeviceAllocator(DeviceType::GPU)))); + kernel_error_->Allocate(1); kernel_error_->Map(nullptr); *(kernel_error_->mutable_data()) = 0; kernel_error_->UnMap(); diff --git a/mace/kernels/opencl/out_of_range_check_test.cc b/mace/kernels/opencl/out_of_range_check_test.cc index 012edd70..467a5953 100644 --- a/mace/kernels/opencl/out_of_range_check_test.cc +++ b/mace/kernels/opencl/out_of_range_check_test.cc @@ -57,7 +57,8 @@ bool BufferToImageOpImpl(Tensor *buffer, if (runtime->IsOutOfRangeCheckEnabled()) { built_options.emplace("-DOUT_OF_RANGE_CHECK"); kernel_error = std::move(std::unique_ptr( - new Buffer(GetDeviceAllocator(DeviceType::GPU), 1))); + new Buffer(GetDeviceAllocator(DeviceType::GPU)))); + kernel_error->Allocate(1); kernel_error->Map(nullptr); *(kernel_error->mutable_data()) = 0; kernel_error->UnMap(); diff --git a/mace/kernels/opencl/pad.cc b/mace/kernels/opencl/pad.cc index bc093c16..34fbf659 100644 --- a/mace/kernels/opencl/pad.cc +++ b/mace/kernels/opencl/pad.cc @@ -60,7 +60,8 @@ void PadFunctor::operator()( if (runtime->IsOutOfRangeCheckEnabled()) { built_options.emplace("-DOUT_OF_RANGE_CHECK"); kernel_error_ = std::move(std::unique_ptr( - new Buffer(GetDeviceAllocator(DeviceType::GPU), 1))); + new Buffer(GetDeviceAllocator(DeviceType::GPU)))); + kernel_error_->Allocate(1); kernel_error_->Map(nullptr); *(kernel_error_->mutable_data()) = 0; kernel_error_->UnMap(); diff --git a/mace/kernels/opencl/pooling.cc b/mace/kernels/opencl/pooling.cc index df2fcbe9..8a9f91e9 100644 --- a/mace/kernels/opencl/pooling.cc +++ b/mace/kernels/opencl/pooling.cc @@ -72,7 +72,8 @@ void PoolingFunctor::operator()(const Tensor *input, if (runtime->IsOutOfRangeCheckEnabled()) { built_options.emplace("-DOUT_OF_RANGE_CHECK"); kernel_error_ = std::move(std::unique_ptr( - new Buffer(GetDeviceAllocator(DeviceType::GPU), 1))); + new Buffer(GetDeviceAllocator(DeviceType::GPU)))); + kernel_error_->Allocate(1); kernel_error_->Map(nullptr); *(kernel_error_->mutable_data()) = 0; kernel_error_->UnMap(); diff --git a/mace/kernels/opencl/resize_bilinear.cc b/mace/kernels/opencl/resize_bilinear.cc index 1c36b27e..0c86cae8 100644 --- a/mace/kernels/opencl/resize_bilinear.cc +++ b/mace/kernels/opencl/resize_bilinear.cc @@ -78,7 +78,8 @@ void ResizeBilinearFunctor::operator()( if (runtime->IsOutOfRangeCheckEnabled()) { built_options.emplace("-DOUT_OF_RANGE_CHECK"); kernel_error_ = std::move(std::unique_ptr( - new Buffer(GetDeviceAllocator(DeviceType::GPU), 1))); + new Buffer(GetDeviceAllocator(DeviceType::GPU)))); + kernel_error_->Allocate(1); kernel_error_->Map(nullptr); *(kernel_error_->mutable_data()) = 0; kernel_error_->UnMap(); diff --git a/mace/kernels/opencl/slice.cc b/mace/kernels/opencl/slice.cc index 7944ee88..21fdbca1 100644 --- a/mace/kernels/opencl/slice.cc +++ b/mace/kernels/opencl/slice.cc @@ -51,7 +51,8 @@ void SliceFunctor::operator()( if (runtime->IsOutOfRangeCheckEnabled()) { built_options.emplace("-DOUT_OF_RANGE_CHECK"); kernel_error_ = std::move(std::unique_ptr( - new Buffer(GetDeviceAllocator(DeviceType::GPU), 1))); + new Buffer(GetDeviceAllocator(DeviceType::GPU)))); + kernel_error_->Allocate(1); kernel_error_->Map(nullptr); *(kernel_error_->mutable_data()) = 0; kernel_error_->UnMap(); diff --git a/mace/kernels/opencl/softmax.cc b/mace/kernels/opencl/softmax.cc index 24329be4..8e5be845 100644 --- a/mace/kernels/opencl/softmax.cc +++ b/mace/kernels/opencl/softmax.cc @@ -70,7 +70,8 @@ void SoftmaxFunctor::operator()(const Tensor *logits, if (runtime->IsOutOfRangeCheckEnabled()) { built_options.emplace("-DOUT_OF_RANGE_CHECK"); kernel_error_ = std::move(std::unique_ptr( - new Buffer(GetDeviceAllocator(DeviceType::GPU), 1))); + new Buffer(GetDeviceAllocator(DeviceType::GPU)))); + kernel_error_->Allocate(1); kernel_error_->Map(nullptr); *(kernel_error_->mutable_data()) = 0; kernel_error_->UnMap(); diff --git a/mace/kernels/opencl/space_to_batch.cc b/mace/kernels/opencl/space_to_batch.cc index fa4850cb..c3c45f0b 100644 --- a/mace/kernels/opencl/space_to_batch.cc +++ b/mace/kernels/opencl/space_to_batch.cc @@ -70,7 +70,8 @@ void SpaceToBatchFunctor::operator()( if (runtime->IsOutOfRangeCheckEnabled()) { built_options.emplace("-DOUT_OF_RANGE_CHECK"); kernel_error_ = std::move(std::unique_ptr( - new Buffer(GetDeviceAllocator(DeviceType::GPU), 1))); + new Buffer(GetDeviceAllocator(DeviceType::GPU)))); + kernel_error_->Allocate(1); kernel_error_->Map(nullptr); *(kernel_error_->mutable_data()) = 0; kernel_error_->UnMap(); diff --git a/mace/kernels/opencl/winograd_transform.cc b/mace/kernels/opencl/winograd_transform.cc index fcf81528..da7dea0b 100644 --- a/mace/kernels/opencl/winograd_transform.cc +++ b/mace/kernels/opencl/winograd_transform.cc @@ -39,7 +39,8 @@ void WinogradTransformFunctor::operator()( if (runtime->IsOutOfRangeCheckEnabled()) { built_options.emplace("-DOUT_OF_RANGE_CHECK"); kernel_error_ = std::move(std::unique_ptr( - new Buffer(GetDeviceAllocator(DeviceType::GPU), 1))); + new Buffer(GetDeviceAllocator(DeviceType::GPU)))); + kernel_error_->Allocate(1); kernel_error_->Map(nullptr); *(kernel_error_->mutable_data()) = 0; kernel_error_->UnMap(); @@ -138,7 +139,8 @@ void WinogradInverseTransformFunctor::operator()( if (runtime->IsOutOfRangeCheckEnabled()) { built_options.emplace("-DOUT_OF_RANGE_CHECK"); kernel_error_ = std::move(std::unique_ptr( - new Buffer(GetDeviceAllocator(DeviceType::GPU), 1))); + new Buffer(GetDeviceAllocator(DeviceType::GPU)))); + kernel_error_->Allocate(1); kernel_error_->Map(nullptr); *(kernel_error_->mutable_data()) = 0; kernel_error_->UnMap(); diff --git a/mace/public/mace.h b/mace/public/mace.h index 02d903fd..1bc8c36a 100644 --- a/mace/public/mace.h +++ b/mace/public/mace.h @@ -30,7 +30,11 @@ const char *MaceVersion(); enum DeviceType { CPU = 0, GPU = 2, HEXAGON = 3 }; -enum MaceStatus { MACE_SUCCESS = 0, MACE_INVALID_ARGS = 1 }; +enum MaceStatus { + MACE_SUCCESS = 0, + MACE_INVALID_ARGS = 1, + MACE_OUT_OF_RESOURCES = 2 +}; // MACE input/output tensor class MaceTensor { @@ -61,12 +65,13 @@ class RunMetadata; class MaceEngine { public: - explicit MaceEngine(const NetDef *net_def, - DeviceType device_type, - const std::vector &input_nodes, - const std::vector &output_nodes); + explicit MaceEngine(DeviceType device_type); ~MaceEngine(); + MaceStatus Init(const NetDef *net_def, + const std::vector &input_nodes, + const std::vector &output_nodes); + MaceStatus Run(const std::map &inputs, std::map *outputs); diff --git a/mace/python/tools/mace_engine_factory.h.jinja2 b/mace/python/tools/mace_engine_factory.h.jinja2 index 3b499469..3110ab37 100644 --- a/mace/python/tools/mace_engine_factory.h.jinja2 +++ b/mace/python/tools/mace_engine_factory.h.jinja2 @@ -61,24 +61,25 @@ MaceStatus CreateMaceEngine( } const unsigned char * model_data = nullptr; NetDef net_def; + MaceStatus status = MaceStatus::MACE_SUCCESS; switch (model_name_map[model_name]) { {% for i in range(model_tags |length) %} case {{ i }}: model_data = mace::{{model_tags[i]}}::LoadModelData(model_data_file); net_def = mace::{{model_tags[i]}}::CreateNet(model_data); - engine->reset( - new mace::MaceEngine(&net_def, device_type, input_nodes, output_nodes)); + engine->reset(new mace::MaceEngine(device_type)); + status = (*engine)->Init(&net_def, input_nodes, output_nodes); if (device_type == DeviceType::GPU || device_type == DeviceType::HEXAGON) { mace::{{model_tags[i]}}::UnloadModelData(model_data); } break; {% endfor %} default: - return MaceStatus::MACE_INVALID_ARGS; + status = MaceStatus::MACE_INVALID_ARGS; } - return MaceStatus::MACE_SUCCESS; + return status; } } // namespace mace diff --git a/mace/test/mace_api_mt_test.cc b/mace/test/mace_api_mt_test.cc index a1271b28..a4088930 100644 --- a/mace/test/mace_api_mt_test.cc +++ b/mace/test/mace_api_mt_test.cc @@ -304,7 +304,9 @@ void MaceRunFunc(const int in_out_size) { new FileStorageFactory(file_path)); mace::SetKVStorageFactory(storage_factory); - MaceEngine engine(&net_def, device, input_names, output_names); + MaceEngine engine(device); + MaceStatus status = engine.Init(&net_def, input_names, output_names); + ASSERT_EQ(status, MaceStatus::MACE_SUCCESS); std::map inputs; std::map outputs; diff --git a/mace/test/mace_api_test.cc b/mace/test/mace_api_test.cc index 776fa674..0b16c762 100644 --- a/mace/test/mace_api_test.cc +++ b/mace/test/mace_api_test.cc @@ -308,7 +308,9 @@ void MaceRun(const int in_out_size, &net_def); } - MaceEngine engine(&net_def, device, input_names, output_names); + MaceEngine engine(device); + MaceStatus status = engine.Init(&net_def, input_names, output_names); + ASSERT_EQ(status, MaceStatus::MACE_SUCCESS); std::map inputs; std::map outputs; -- GitLab