diff --git a/mace/core/allocator.h b/mace/core/allocator.h index a9f76fcab7d91d8c81a76d181f9515998fb2f72a..cf1e1db5c55b24903685d48799ec847fe833d1ed 100644 --- a/mace/core/allocator.h +++ b/mace/core/allocator.h @@ -29,9 +29,10 @@ class Allocator { public: Allocator() {} virtual ~Allocator() noexcept {} - virtual void *New(size_t nbytes) const = 0; - virtual void *NewImage(const std::vector &image_shape, - const DataType dt) const = 0; + virtual MaceStatus New(size_t nbytes, void **result) const = 0; + virtual MaceStatus NewImage(const std::vector &image_shape, + const DataType dt, + void **result) const = 0; virtual void Delete(void *data) const = 0; virtual void DeleteImage(void *data) const = 0; virtual void *Map(void *buffer, size_t offset, size_t nbytes) const = 0; @@ -40,22 +41,12 @@ class Allocator { std::vector *mapped_image_pitch) const = 0; virtual void Unmap(void *buffer, void *mapper_ptr) const = 0; virtual bool OnHost() const = 0; - - template - T *New(size_t num_elements) { - if (num_elements > (std::numeric_limits::max() / sizeof(T))) { - return nullptr; - } - void *p = New(sizeof(T) * num_elements); - T *typed_p = reinterpret_cast(p); - return typed_p; - } }; class CPUAllocator : public Allocator { public: ~CPUAllocator() override {} - void *New(size_t nbytes) const override { + MaceStatus New(size_t nbytes, void **result) const override { VLOG(3) << "Allocate CPU buffer: " << nbytes; void *data = nullptr; #ifdef __ANDROID__ @@ -66,16 +57,19 @@ class CPUAllocator : public Allocator { MACE_CHECK_NOTNULL(data); // TODO(heliangliang) This should be avoided sometimes memset(data, 0, nbytes); - return data; + *result = data; + return MaceStatus::MACE_SUCCESS; } - void *NewImage(const std::vector &shape, - const DataType dt) const override { + MaceStatus NewImage(const std::vector &shape, + const DataType dt, + void **status) const override { LOG(FATAL) << "Allocate CPU image"; - return nullptr; + return MaceStatus::MACE_SUCCESS; } void Delete(void *data) const override { + MACE_CHECK_NOTNULL(data); VLOG(3) << "Free CPU buffer"; free(data); } diff --git a/mace/core/buffer.h b/mace/core/buffer.h index b655fdc4b10857a7a446b1b7ffc859bf535427e9..0eae1b84313137696ab16ec813e6da0e9f52e6a8 100644 --- a/mace/core/buffer.h +++ b/mace/core/buffer.h @@ -25,6 +25,11 @@ class BufferBase { virtual void *raw_mutable_data() = 0; + virtual MaceStatus Allocate(index_t size) = 0; + + virtual MaceStatus Allocate(const std::vector &shape, + DataType data_type) = 0; + virtual void *Map(index_t offset, index_t length, std::vector *pitch) const = 0; @@ -35,7 +40,7 @@ class BufferBase { virtual void UnMap() = 0; - virtual void Resize(index_t size) = 0; + virtual MaceStatus Resize(index_t size) = 0; virtual void Copy(void *src, index_t offset, index_t length) = 0; @@ -70,14 +75,6 @@ class Buffer : public BufferBase { mapped_buf_(nullptr), is_data_owner_(true) {} - Buffer(Allocator *allocator, index_t size) - : BufferBase(size), - allocator_(allocator), - mapped_buf_(nullptr), - is_data_owner_(true) { - buf_ = allocator->New(size); - } - Buffer(Allocator *allocator, void *data, index_t size) : BufferBase(size), allocator_(allocator), @@ -94,6 +91,7 @@ class Buffer : public BufferBase { } } + void *buffer() { MACE_CHECK_NOTNULL(buf_); return buf_; @@ -119,6 +117,28 @@ class Buffer : public BufferBase { } } + MaceStatus Allocate(index_t size) { + if (size <= 0) { + return MaceStatus::MACE_SUCCESS; + } + MACE_CHECK(is_data_owner_, + "data is not owned by this buffer, cannot reallocate"); + if (mapped_buf_ != nullptr) { + UnMap(); + } + if (buf_ != nullptr) { + allocator_->Delete(buf_); + } + size_ = size; + return allocator_->New(size, &buf_); + } + + MaceStatus Allocate(const std::vector &shape, + DataType data_type) { + MACE_NOT_IMPLEMENTED; + return MACE_SUCCESS; + } + void *Map(index_t offset, index_t length, std::vector *pitch) const { MACE_CHECK_NOTNULL(buf_); return allocator_->Map(buf_, offset, length); @@ -140,7 +160,7 @@ class Buffer : public BufferBase { mapped_buf_ = nullptr; } - void Resize(index_t size) { + MaceStatus Resize(index_t size) { MACE_CHECK(is_data_owner_, "data is not owned by this buffer, cannot resize"); if (size != size_) { @@ -148,8 +168,9 @@ class Buffer : public BufferBase { allocator_->Delete(buf_); } size_ = size; - buf_ = allocator_->New(size); + return allocator_->New(size, &buf_); } + return MaceStatus::MACE_SUCCESS; } void Copy(void *src, index_t offset, index_t length) { @@ -183,25 +204,35 @@ class Image : public BufferBase { buf_(nullptr), mapped_buf_(nullptr) {} - Image(std::vector shape, DataType data_type) - : BufferBase( - std::accumulate( - shape.begin(), shape.end(), 1, std::multiplies()) * - GetEnumTypeSize(data_type)), - allocator_(GetDeviceAllocator(OPENCL)), - mapped_buf_(nullptr) { - shape_ = shape; - data_type_ = data_type; - buf_ = allocator_->NewImage(shape, data_type); + virtual ~Image() { + if (mapped_buf_ != nullptr) { + UnMap(); + } + if (buf_ != nullptr) { + allocator_->DeleteImage(buf_); + } } - virtual ~Image() { + MaceStatus Allocate(index_t size) { + LOG(FATAL) << "Image should not call this allocate function"; + return MaceStatus::MACE_SUCCESS; + } + + MaceStatus Allocate(const std::vector &shape, + DataType data_type) { + index_t size = std::accumulate( + shape.begin(), shape.end(), 1, std::multiplies()) * + GetEnumTypeSize(data_type); if (mapped_buf_ != nullptr) { UnMap(); } if (buf_ != nullptr) { allocator_->DeleteImage(buf_); } + size_ = size; + shape_ = shape; + data_type_ = data_type; + return allocator_->NewImage(shape, data_type, &buf_); } void *buffer() { @@ -244,7 +275,10 @@ class Image : public BufferBase { mapped_buf_ = nullptr; } - void Resize(index_t size) { MACE_NOT_IMPLEMENTED; } + MaceStatus Resize(index_t size) { + MACE_NOT_IMPLEMENTED; + return MaceStatus::MACE_SUCCESS; + } void Copy(void *src, index_t offset, index_t length) { MACE_NOT_IMPLEMENTED; } @@ -287,6 +321,17 @@ class BufferSlice : public BufferBase { } } + MaceStatus Allocate(index_t size) { + LOG(FATAL) << "BufferSlice should not call allocate function"; + return MaceStatus::MACE_SUCCESS; + } + + MaceStatus Allocate(const std::vector &shape, + DataType data_type) { + LOG(FATAL) << "BufferSlice should not call allocate function"; + return MaceStatus::MACE_SUCCESS; + } + void *buffer() { MACE_CHECK_NOTNULL(buffer_); return buffer_->buffer(); @@ -326,7 +371,10 @@ class BufferSlice : public BufferBase { mapped_buf_ = nullptr; } - void Resize(index_t size) { MACE_NOT_IMPLEMENTED; } + MaceStatus Resize(index_t size) { + MACE_NOT_IMPLEMENTED; + return MaceStatus::MACE_SUCCESS; + } void Copy(void *src, index_t offset, index_t length) { MACE_NOT_IMPLEMENTED; } diff --git a/mace/core/mace.cc b/mace/core/mace.cc index 5f7647ed4a0df7377f143191bf2bf42c74429dc6..7df44aa9234f7e56e0e526736db43526106f6965 100644 --- a/mace/core/mace.cc +++ b/mace/core/mace.cc @@ -66,12 +66,13 @@ std::shared_ptr MaceTensor::data() { return impl_->data; } // Mace Engine class MaceEngine::Impl { public: - explicit Impl(const NetDef *net_def, - DeviceType device_type, - const std::vector &input_nodes, - const std::vector &output_nodes); + explicit Impl(DeviceType device_type); ~Impl(); + MaceStatus Init(const NetDef *net_def, + const std::vector &input_nodes, + const std::vector &output_nodes); + MaceStatus Run(const std::map &inputs, std::map *outputs, RunMetadata *run_metadata); @@ -86,15 +87,17 @@ class MaceEngine::Impl { DISABLE_COPY_AND_ASSIGN(Impl); }; -MaceEngine::Impl::Impl(const NetDef *net_def, - DeviceType device_type, - const std::vector &input_nodes, - const std::vector &output_nodes) +MaceEngine::Impl::Impl(DeviceType device_type) : op_registry_(new OperatorRegistry()), device_type_(device_type), ws_(new Workspace()), net_(nullptr), - hexagon_controller_(nullptr) { + hexagon_controller_(nullptr) {} + +MaceStatus MaceEngine::Impl::Init( + const NetDef *net_def, + const std::vector &input_nodes, + const std::vector &output_nodes) { LOG(INFO) << "MACE version: " << MaceVersion(); // Set storage path for internal usage for (auto input_name : input_nodes) { @@ -105,7 +108,7 @@ MaceEngine::Impl::Impl(const NetDef *net_def, ws_->CreateTensor(MakeString("mace_output_node_", output_name, ":0"), GetDeviceAllocator(device_type_), DT_FLOAT); } - if (device_type == HEXAGON) { + if (device_type_ == HEXAGON) { hexagon_controller_.reset(new HexagonControlWrapper()); MACE_CHECK(hexagon_controller_->Config(), "hexagon config error"); MACE_CHECK(hexagon_controller_->Init(), "hexagon init error"); @@ -120,16 +123,21 @@ MaceEngine::Impl::Impl(const NetDef *net_def, hexagon_controller_->PrintGraph(); } } else { - ws_->LoadModelTensor(*net_def, device_type); + MaceStatus status = ws_->LoadModelTensor(*net_def, device_type_); + if (status != MaceStatus::MACE_SUCCESS) { + return status; + } // Init model - auto net = CreateNet(op_registry_, *net_def, ws_.get(), device_type, + auto net = CreateNet(op_registry_, *net_def, ws_.get(), device_type_, NetMode::INIT); if (!net->Run()) { LOG(FATAL) << "Net init run failed"; } - net_ = std::move(CreateNet(op_registry_, *net_def, ws_.get(), device_type)); + net_ = std::move(CreateNet(op_registry_, *net_def, + ws_.get(), device_type_)); } + return MaceStatus::MACE_SUCCESS; } MaceEngine::Impl::~Impl() { @@ -202,16 +210,19 @@ MaceStatus MaceEngine::Impl::Run( return MACE_SUCCESS; } -MaceEngine::MaceEngine(const NetDef *net_def, - DeviceType device_type, - const std::vector &input_nodes, - const std::vector &output_nodes) { +MaceEngine::MaceEngine(DeviceType device_type) { impl_ = std::unique_ptr( - new MaceEngine::Impl(net_def, device_type, input_nodes, output_nodes)); + new MaceEngine::Impl(device_type)); } MaceEngine::~MaceEngine() = default; +MaceStatus MaceEngine::Init(const NetDef *net_def, + const std::vector &input_nodes, + const std::vector &output_nodes) { + return impl_->Init(net_def, input_nodes, output_nodes); +} + MaceStatus MaceEngine::Run(const std::map &inputs, std::map *outputs, RunMetadata *run_metadata) { diff --git a/mace/core/runtime/opencl/opencl_allocator.cc b/mace/core/runtime/opencl/opencl_allocator.cc index e0f63b8689b6838d67c088e98f85247198fe361b..f94ec17c08ec03efbe25221599af06b597a4b006 100644 --- a/mace/core/runtime/opencl/opencl_allocator.cc +++ b/mace/core/runtime/opencl/opencl_allocator.cc @@ -34,18 +34,27 @@ static cl_channel_type DataTypeToCLChannelType(const DataType t) { OpenCLAllocator::OpenCLAllocator() {} OpenCLAllocator::~OpenCLAllocator() {} -void *OpenCLAllocator::New(size_t nbytes) const { +MaceStatus OpenCLAllocator::New(size_t nbytes, void **result) const { VLOG(3) << "Allocate OpenCL buffer: " << nbytes; cl_int error; cl::Buffer *buffer = new cl::Buffer(OpenCLRuntime::Global()->context(), CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, nbytes, nullptr, &error); - MACE_CHECK_CL_SUCCESS(error); - return static_cast(buffer); + if (error != CL_SUCCESS) { + LOG(WARNING) << "Allocate OpenCL Buffer with " + << nbytes << " bytes failed because of" + << OpenCLErrorToString(error); + *result = nullptr; + return MaceStatus::MACE_OUT_OF_RESOURCES; + } else { + *result = buffer; + return MaceStatus::MACE_SUCCESS; + } } -void *OpenCLAllocator::NewImage(const std::vector &image_shape, - const DataType dt) const { +MaceStatus OpenCLAllocator::NewImage(const std::vector &image_shape, + const DataType dt, + void **result) const { MACE_CHECK(image_shape.size() == 2) << "Image shape's size must equal 2"; VLOG(3) << "Allocate OpenCL image: " << image_shape[0] << ", " << image_shape[1]; @@ -57,11 +66,17 @@ void *OpenCLAllocator::NewImage(const std::vector &image_shape, new cl::Image2D(OpenCLRuntime::Global()->context(), CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, img_format, image_shape[0], image_shape[1], 0, nullptr, &error); - MACE_CHECK_CL_SUCCESS(error) << " with image shape: [" - << image_shape[0] << ", " << image_shape[1] - << "]"; - - return cl_image; + if (error != CL_SUCCESS) { + LOG(WARNING) << "Allocate OpenCL image with shape: [" + << image_shape[0] << ", " << image_shape[1] + << "] failed because of" + << OpenCLErrorToString(error); + *result = nullptr; + return MaceStatus::MACE_OUT_OF_RESOURCES; + } else { + *result = cl_image; + return MaceStatus::MACE_SUCCESS; + } } void OpenCLAllocator::Delete(void *buffer) const { diff --git a/mace/core/runtime/opencl/opencl_allocator.h b/mace/core/runtime/opencl/opencl_allocator.h index b801476716771f81a6ee06a0b7e5ae4228394c65..896dba17ebc50b82237e6c408e5ab208947088af 100644 --- a/mace/core/runtime/opencl/opencl_allocator.h +++ b/mace/core/runtime/opencl/opencl_allocator.h @@ -17,15 +17,16 @@ class OpenCLAllocator : public Allocator { ~OpenCLAllocator() override; - void *New(size_t nbytes) const override; + MaceStatus New(size_t nbytes, void **result) const override; /* * Use Image2D with RGBA (128-bit) format to represent the image. * * @ shape : [depth, ..., height, width ]. */ - void *NewImage(const std::vector &image_shape, - const DataType dt) const override; + MaceStatus NewImage(const std::vector &image_shape, + const DataType dt, + void **result) const override; void Delete(void *buffer) const override; diff --git a/mace/core/tensor.h b/mace/core/tensor.h index 29571d96140c436eb71bfe64128842e297899ee8..c2a189463a9b90cdb9ce1466845f9ef769b7cb10 100644 --- a/mace/core/tensor.h +++ b/mace/core/tensor.h @@ -171,15 +171,16 @@ class Tensor { MACE_CHECK(raw_size() <= buffer_->size()); } - inline void Resize(const std::vector &shape) { + inline MaceStatus Resize(const std::vector &shape) { shape_ = shape; image_shape_.clear(); if (buffer_ != nullptr) { MACE_CHECK(!has_opencl_image(), "Cannot resize image, use ResizeImage."); - buffer_->Resize(raw_size()); + return buffer_->Resize(raw_size()); } else { MACE_CHECK(is_buffer_owner_); - buffer_ = new Buffer(allocator_, raw_size()); + buffer_ = new Buffer(allocator_); + return buffer_->Allocate(raw_size()); } } @@ -195,13 +196,14 @@ class Tensor { is_buffer_owner_ = false; } - inline void ResizeImage(const std::vector &shape, - const std::vector &image_shape) { + inline MaceStatus ResizeImage(const std::vector &shape, + const std::vector &image_shape) { shape_ = shape; image_shape_ = image_shape; if (buffer_ == nullptr) { MACE_CHECK(is_buffer_owner_); - buffer_ = new Image(image_shape, dtype_); + buffer_ = new Image(); + return buffer_->Allocate(image_shape, dtype_); } else { MACE_CHECK(has_opencl_image(), "Cannot ResizeImage buffer, use Resize."); Image *image = dynamic_cast(buffer_); @@ -211,24 +213,27 @@ class Tensor { "): current physical image shape: ", image->image_shape()[0], ", ", image->image_shape()[1], " < logical image shape: ", image_shape[0], ", ", image_shape[1]); + return MaceStatus::MACE_SUCCESS; } } - inline void ResizeLike(const Tensor &other) { ResizeLike(&other); } + inline MaceStatus ResizeLike(const Tensor &other) { + return ResizeLike(&other); + } - inline void ResizeLike(const Tensor *other) { + inline MaceStatus ResizeLike(const Tensor *other) { if (other->has_opencl_image()) { if (is_buffer_owner_ && buffer_ != nullptr && !has_opencl_image()) { delete buffer_; buffer_ = nullptr; } - ResizeImage(other->shape(), other->image_shape_); + return ResizeImage(other->shape(), other->image_shape_); } else { if (is_buffer_owner_ && buffer_ != nullptr && has_opencl_image()) { delete buffer_; buffer_ = nullptr; } - Resize(other->shape()); + return Resize(other->shape()); } } diff --git a/mace/core/workspace.cc b/mace/core/workspace.cc index 227c99737c1fc766c6a8fe0944ce6ea5b84cacc3..86bf87fe3f25b071c5909285ea33f070a008ac8c 100644 --- a/mace/core/workspace.cc +++ b/mace/core/workspace.cc @@ -48,7 +48,7 @@ std::vector Workspace::Tensors() const { return names; } -void Workspace::LoadModelTensor(const NetDef &net_def, DeviceType type) { +MaceStatus Workspace::LoadModelTensor(const NetDef &net_def, DeviceType type) { MACE_LATENCY_LOGGER(1, "Load model tensors"); index_t model_data_size = 0; unsigned char *model_data_ptr = nullptr; @@ -74,7 +74,11 @@ void Workspace::LoadModelTensor(const NetDef &net_def, DeviceType type) { new Buffer(GetDeviceAllocator(type), model_data_ptr, model_data_size))); } else { tensor_buffer_ = std::move(std::unique_ptr( - new Buffer(GetDeviceAllocator(type), model_data_size))); + new Buffer(GetDeviceAllocator(type)))); + MaceStatus status = tensor_buffer_->Allocate(model_data_size); + if (status != MaceStatus::MACE_SUCCESS) { + return status; + } tensor_buffer_->Map(nullptr); tensor_buffer_->Copy(model_data_ptr, 0, model_data_size); tensor_buffer_->UnMap(); @@ -104,13 +108,15 @@ void Workspace::LoadModelTensor(const NetDef &net_def, DeviceType type) { } if (type == DeviceType::OPENCL) { - CreateImageOutputTensor(net_def); + MaceStatus status = CreateImageOutputTensor(net_def); + if (status != MaceStatus::MACE_SUCCESS) return status; } + return MaceStatus::MACE_SUCCESS; } -void Workspace::CreateImageOutputTensor(const NetDef &net_def) { +MaceStatus Workspace::CreateImageOutputTensor(const NetDef &net_def) { if (!net_def.has_mem_arena() || net_def.mem_arena().mem_block_size() == 0) { - return; + return MaceStatus::MACE_SUCCESS; } DataType dtype = DataType::DT_INVALID; @@ -133,7 +139,12 @@ void Workspace::CreateImageOutputTensor(const NetDef &net_def) { MACE_CHECK(dtype != DataType::DT_INVALID, "data type is invalid."); for (auto &mem_block : net_def.mem_arena().mem_block()) { std::unique_ptr image_buf( - new Image({mem_block.x(), mem_block.y()}, dtype)); + new Image()); + MaceStatus status = image_buf->Allocate( + {mem_block.x(), mem_block.y()}, dtype); + if (status != MaceStatus::MACE_SUCCESS) { + return status; + } preallocated_allocator_.SetBuffer(mem_block.mem_id(), std::move(image_buf)); } VLOG(3) << "Preallocate image to tensors"; @@ -157,6 +168,7 @@ void Workspace::CreateImageOutputTensor(const NetDef &net_def) { } } } + return MaceStatus::MACE_SUCCESS; } } // namespace mace diff --git a/mace/core/workspace.h b/mace/core/workspace.h index 1e1012672c30d388fe34ff645b50ed36a292c16b..7573b29c183035c807b16e287e56f33c31e8cd60 100644 --- a/mace/core/workspace.h +++ b/mace/core/workspace.h @@ -37,10 +37,10 @@ class Workspace { std::vector Tensors() const; - void LoadModelTensor(const NetDef &net_def, DeviceType type); + MaceStatus LoadModelTensor(const NetDef &net_def, DeviceType type); private: - void CreateImageOutputTensor(const NetDef &net_def); + MaceStatus CreateImageOutputTensor(const NetDef &net_def); TensorMap tensor_map_; diff --git a/mace/ops/depthwise_conv2d_test.cc b/mace/ops/depthwise_conv2d_test.cc index c3bca21ff9c2e34728a922be3644199288298485..af84069ae3e6c41906b4218093034a4522778313 100644 --- a/mace/ops/depthwise_conv2d_test.cc +++ b/mace/ops/depthwise_conv2d_test.cc @@ -387,7 +387,7 @@ void TestNEONNxNS12(const index_t height, } TEST_F(DepthwiseConv2dOpTest, NEONTest) { - TestNEONNxNS12(4, 4, 32, 1); + TestNEONNxNS12(5, 5, 32, 1); TestNEONNxNS12(64, 64, 32, 1); TestNEONNxNS12(112, 112, 32, 1); TestNEONNxNS12(128, 128, 15, 1); diff --git a/mace/public/mace.h b/mace/public/mace.h index db57fbcb9eaf27c3f8ea2f16ba062c6ca73a09fd..0ab02181f45b438a6806ee8c9f8590c8c0a6edb7 100644 --- a/mace/public/mace.h +++ b/mace/public/mace.h @@ -20,7 +20,11 @@ const char *MaceVersion(); enum DeviceType { CPU = 0, NEON = 1, OPENCL = 2, HEXAGON = 3 }; -enum MaceStatus { MACE_SUCCESS = 0, MACE_INVALID_ARGS = 1 }; +enum MaceStatus { + MACE_SUCCESS = 0, + MACE_INVALID_ARGS = 1, + MACE_OUT_OF_RESOURCES = 2, +}; // MACE input/output tensor class MaceTensor { @@ -51,12 +55,14 @@ class RunMetadata; class MaceEngine { public: - explicit MaceEngine(const NetDef *net_def, - DeviceType device_type, - const std::vector &input_nodes, - const std::vector &output_nodes); + explicit MaceEngine(DeviceType device_type); + ~MaceEngine(); + MaceStatus Init(const NetDef *net_def, + const std::vector &input_nodes, + const std::vector &output_nodes); + MaceStatus Run(const std::map &inputs, std::map *outputs); diff --git a/mace/tools/validation/mace_run.cc b/mace/tools/validation/mace_run.cc index 60f5cd598cc96d650108c49c007d65bd783f4241..9be1ad365549fb95cf6eb8178b4dda643723b9d3 100644 --- a/mace/tools/validation/mace_run.cc +++ b/mace/tools/validation/mace_run.cc @@ -231,7 +231,11 @@ bool RunModel(const std::vector &input_names, std::shared_ptr storage_factory( new FileStorageFactory(kernel_file_path)); ConfigKVStorageFactory(storage_factory); - mace::MaceEngine engine(&net_def, device_type, input_names, output_names); + mace::MaceEngine engine(device_type); + MaceStatus status = engine.Init(&net_def, input_names, output_names); + if (status != MaceStatus::MACE_SUCCESS) { + LOG(FATAL) << "Engine init failed with status: " << status; + } if (device_type == DeviceType::OPENCL || device_type == DeviceType::HEXAGON) { mace::MACE_MODEL_TAG::UnloadModelData(model_data); }