diff --git a/mace/core/preallocated_pooled_allocator.h b/mace/core/preallocated_pooled_allocator.h new file mode 100644 index 0000000000000000000000000000000000000000..dcb35070d725865a10fc981943b2dc9b18084b53 --- /dev/null +++ b/mace/core/preallocated_pooled_allocator.h @@ -0,0 +1,31 @@ +// +// Copyright (c) 2018 XiaoMi All rights reserved. +// + +#ifndef MACE_CORE_PREALLOCATED_POOLED_ALLOCATOR_H_ +#define MACE_CORE_PREALLOCATED_POOLED_ALLOCATOR_H_ + +#include "mace/core/allocator.h" + +namespace mace { + +class PreallocatedPooledAllocator { + public: + PreallocatedPooledAllocator() {} + + virtual ~PreallocatedPooledAllocator() noexcept {} + + virtual void PreallocateImage(int mem_id, + const std::vector &image_shape, + DataType data_type) = 0; + + virtual void *GetImage(int mem_id) = 0; + + virtual bool HasImage(int mem_id) = 0; + + virtual std::vector GetImageSize(int mem_id) = 0; +}; + +} // namespace mace + +#endif // MACE_CORE_PREALLOCATED_POOLED_ALLOCATOR_H_ diff --git a/mace/core/runtime/opencl/opencl_preallocated_pooled_allocator.cc b/mace/core/runtime/opencl/opencl_preallocated_pooled_allocator.cc new file mode 100644 index 0000000000000000000000000000000000000000..f854b9c5c6b19c194b0f10ee215aac04904545fb --- /dev/null +++ b/mace/core/runtime/opencl/opencl_preallocated_pooled_allocator.cc @@ -0,0 +1,30 @@ +// +// Copyright (c) 2018 XiaoMi All rights reserved. +// + +#include "mace/core/runtime/opencl/opencl_preallocated_pooled_allocator.h" + +namespace mace { + +OpenCLPreallocatedPooledAllocator::OpenCLPreallocatedPooledAllocator() + : allocator(GetDeviceAllocator(DeviceType::OPENCL)) { +} + +OpenCLPreallocatedPooledAllocator::~OpenCLPreallocatedPooledAllocator() { +} + +void OpenCLPreallocatedPooledAllocator::PreallocateImage(int mem_id, + const std::vector< + size_t> &image_shape, + DataType data_type) { + MACE_CHECK(!this->HasImage(mem_id), "Memory already exists: ", mem_id); + VLOG(3) << "Preallocate OpenCL image: " << mem_id << " " + << image_shape[0] << ", " << image_shape[1]; + images_[mem_id] = std::move(std::unique_ptr>( + allocator->NewImage(image_shape, data_type), [this](void *p) { + this->allocator->DeleteImage(p); + })); + image_shapes_[mem_id] = image_shape; +} + +} // namespace mace \ No newline at end of file diff --git a/mace/core/runtime/opencl/opencl_preallocated_pooled_allocator.h b/mace/core/runtime/opencl/opencl_preallocated_pooled_allocator.h new file mode 100644 index 0000000000000000000000000000000000000000..960ff174e4d6706fcc42f0304ca273c3c96c58c1 --- /dev/null +++ b/mace/core/runtime/opencl/opencl_preallocated_pooled_allocator.h @@ -0,0 +1,45 @@ +// +// Copyright (c) 2018 XiaoMi All rights reserved. +// + +#ifndef MACE_CORE_RUNTIME_OPENCL_PREALLOCATED_POOLED_ALLOCATOR_H_ +#define MACE_CORE_RUNTIME_OPENCL_PREALLOCATED_POOLED_ALLOCATOR_H_ + +#include "mace/core/preallocated_pooled_allocator.h" +#include + +namespace mace { + +class OpenCLPreallocatedPooledAllocator : public PreallocatedPooledAllocator { + public: + OpenCLPreallocatedPooledAllocator(); + + ~OpenCLPreallocatedPooledAllocator() override; + + void PreallocateImage(int mem_id, + const std::vector &image_shape, + DataType data_type) override; + + inline void *GetImage(int mem_id) override { + MACE_CHECK(HasImage(mem_id), "image does not exist"); + return images_[mem_id].get(); + } + + inline bool HasImage(int mem_id) override { + return images_.find(mem_id) != images_.end(); + } + + inline std::vector GetImageSize(int mem_id) override { + return image_shapes_[mem_id]; + } + + private: + std::unordered_map>> + images_; + std::unordered_map> image_shapes_; + Allocator *allocator; +}; + +} // namepsace mace + +#endif // MACE_CORE_RUNTIME_OPENCL_PREALLOCATED_POOLED_ALLOCATOR_H_ diff --git a/mace/core/tensor.h b/mace/core/tensor.h index 62d368974e024e8b89aec7019e6fd04d512c2ca2..83961e1058b86b5b8832cd9c443a79d9f79c2ef8 100644 --- a/mace/core/tensor.h +++ b/mace/core/tensor.h @@ -10,6 +10,7 @@ #include "mace/utils/logging.h" #include "mace/core/types.h" #include "mace/core/public/mace.h" +#include "preallocated_pooled_allocator.h" namespace mace { @@ -71,7 +72,8 @@ class Tensor { buffer_(nullptr), data_(nullptr), unused_(false), - is_image_(false){}; + is_image_(false){ +}; Tensor(Allocator *alloc, DataType type) : alloc_(alloc), @@ -80,18 +82,10 @@ class Tensor { buffer_(nullptr), data_(nullptr), unused_(false), - is_image_(false){}; + is_image_(false){ + }; ~Tensor() { - MACE_CHECK(data_ == nullptr, "Buffer must be unmapped before destroy"); - if (buffer_ != nullptr) { - MACE_CHECK_NOTNULL(alloc_); - if (is_image_) { - alloc_->DeleteImage(buffer_); - } else { - alloc_->Delete(buffer_); - } - } } inline DataType dtype() const { return dtype_; } @@ -132,13 +126,13 @@ class Tensor { inline void Map() const { if (!OnHost()) { MACE_CHECK(buffer_ != nullptr && data_ == nullptr); - data_ = alloc_->Map(buffer_, size_ * SizeOfType()); + data_ = alloc_->Map(buffer_.get(), size_ * SizeOfType()); } } inline void MapImage(std::vector &mapped_image_pitch) const { MACE_CHECK(!OnHost() && buffer_ != nullptr && data_ == nullptr); - data_ = alloc_->MapImage(buffer_, image_shape_, mapped_image_pitch); + data_ = alloc_->MapImage(buffer_.get(), image_shape_, mapped_image_pitch); } /* @@ -147,12 +141,12 @@ class Tensor { inline void Unmap() const { if (!OnHost()) { MACE_CHECK(buffer_ != nullptr && data_ != nullptr); - alloc_->Unmap(buffer_, data_); + alloc_->Unmap(buffer_.get(), data_); data_ = nullptr; } } - void *buffer() const { return buffer_; } + void *buffer() const { return buffer_.get(); } inline const void *raw_data() const { void *data = MappedBuffer(); @@ -181,42 +175,51 @@ class Tensor { } inline void Resize(const vector &shape) { + MACE_CHECK(!is_image_ || buffer_ == nullptr, + "Resize is not for image, use ResizeImage instead."); + is_image_ = false; shape_ = shape; index_t size = NumElements(); - if (size_ != size || is_image_) { + if (size_ != size) { size_ = size; MACE_CHECK(data_ == nullptr, "Buffer must be unmapped before resize"); - if (is_image_) { - alloc_->DeleteImage(buffer_); - } else { - alloc_->Delete(buffer_); - } - is_image_ = false; - CASES(dtype_, buffer_ = alloc_->New(size_ * sizeof(T))); + CASES(dtype_, + (buffer_ = + std::move(std::unique_ptr>( + alloc_->New(size_ * sizeof(T)), + [this](void *p) { + this->alloc_->Delete(p); + }) + ))); } } inline void ResizeImage(const vector &shape, const std::vector &image_shape) { + MACE_CHECK(is_image_ || buffer_ == nullptr, + "ResizeImage is not for buffer, use Resize instead."); + is_image_ = true; shape_ = shape; index_t size = NumElements(); - if (size_ != size || !is_image_) { + if (size_ != size) { size_ = size; - MACE_CHECK(data_ == nullptr, "Buffer must be unmapped before resize"); - - if (is_image_ && !image_shape_.empty()) { - MACE_ASSERT(image_shape_.size() == 2 - && image_shape_[0] >= image_shape[0] - || image_shape_[1] >= image_shape[1], - "image shape not large enough"); - } - if (!is_image_ && buffer_ != nullptr) { - alloc_->Delete(buffer_); - } - is_image_ = true; - if (image_shape_.empty()) { - image_shape_ = image_shape; - buffer_ = alloc_->NewImage(image_shape, dtype_); + image_shape_ = image_shape; + if (!preallocated_image_shape_.empty()) { + MACE_CHECK(preallocated_image_shape_[0] >= image_shape[0] + && preallocated_image_shape_[1] >= image_shape[1], + "image shape not large enough: preallocated ", + preallocated_image_shape_[0], + " ", + preallocated_image_shape_[1], + "apply for ", + image_shape[0], + " ", + image_shape[1]); + } else { + buffer_ = std::move(std::unique_ptr>( + alloc_->NewImage(image_shape, dtype_), + [this](void *p) { this->alloc_->DeleteImage(p); })); + preallocated_image_shape_ = image_shape; } } } @@ -237,15 +240,14 @@ class Tensor { } } - inline void AllocateImageMemory(const std::vector &image_shape) { + inline void PreallocateImage(void *image, + const std::vector& image_shape) { is_image_ = true; - if (image_shape_ != image_shape) { - if (buffer_ != nullptr) { - alloc_->DeleteImage(buffer_); - } - image_shape_ = image_shape; - buffer_ = alloc_->NewImage(image_shape, dtype_); - } + buffer_ = std::move(std::unique_ptr>( + image, [](void *p) { + // tensor does not have ownership of preallocated memory + })); + preallocated_image_shape_ = image_shape; } template @@ -273,7 +275,7 @@ class Tensor { inline void DebugPrint() const { using namespace numerical_chars; std::stringstream os; - for (int i : shape_) { + for (index_t i : shape_) { os << i << ", "; } @@ -336,7 +338,7 @@ class Tensor { private: inline void *MappedBuffer() const { if (OnHost()) { - return buffer_; + return buffer_.get(); } return data_; } @@ -346,7 +348,7 @@ class Tensor { DataType dtype_; // Raw buffer, must be mapped as host accessable data before // read or write - void *buffer_; + std::unique_ptr> buffer_; // Mapped buffer mutable void *data_; vector shape_; @@ -354,6 +356,7 @@ class Tensor { bool unused_; bool is_image_; std::vector image_shape_; + std::vector preallocated_image_shape_; DISABLE_COPY_AND_ASSIGN(Tensor); }; diff --git a/mace/core/workspace.cc b/mace/core/workspace.cc index 140986a4492fa1f59fe5f2467462ff3a9bd7581c..6d3916d25ba1b20e94d1572937179f0e3a76db7e 100644 --- a/mace/core/workspace.cc +++ b/mace/core/workspace.cc @@ -5,6 +5,7 @@ #include "mace/core/workspace.h" #include "mace/core/serializer.h" #include "mace/core/arg_helper.h" +#include "mace/core/runtime/opencl/opencl_preallocated_pooled_allocator.h" namespace mace { @@ -23,7 +24,7 @@ Tensor *Workspace::CreateTensor(const string &name, VLOG(1) << "Tensor " << name << " already exists. Skipping."; } else { VLOG(1) << "Creating Tensor " << name; - tensor_map_[name] = unique_ptr(new Tensor(alloc, type)); + tensor_map_[name] = std::move(std::unique_ptr(new Tensor(alloc, type))); } return GetTensor(name); } @@ -84,25 +85,45 @@ void Workspace::CreateImageOutputTensor(const NetDef &net_def) { if (!net_def.has_mem_arena() || net_def.mem_arena().mem_block_size() == 0) { return; } - std::map> mem_tensor_map; - const DataType dtype = static_cast( - ArgumentHelper::GetSingleArgument( - net_def.op(0), + preallocated_allocator_ = + std::move(std::unique_ptr( + new OpenCLPreallocatedPooledAllocator)); + + DataType dtype = DataType::DT_INVALID; + // We use the data type of the first op (with mem id, must be image), + // as GPU have consistent data type for each layer for now. + // As DSP may have different data output type for each op, + // we stick to the same concept. + for (auto &op: net_def.op()) { + if (op.has_mem_id()) { + const DataType op_dtype = static_cast( + ArgumentHelper::GetSingleArgument( + op, "T", static_cast(DT_FLOAT))); + if (op_dtype != DataType::DT_INVALID) { + dtype = op_dtype; + // find first valid data type, break + break; + } + } + } + MACE_CHECK(dtype != DataType::DT_INVALID, "data type is invalid."); for (auto &mem_block: net_def.mem_arena().mem_block()) { - string mem_block_name = MemBlockName(mem_block.mem_id()); - mem_tensor_map[mem_block_name].reset(new Tensor( - GetDeviceAllocator(DeviceType::OPENCL), - dtype)); - mem_tensor_map[mem_block_name]->AllocateImageMemory({mem_block.x(), - mem_block.y()}); + preallocated_allocator_->PreallocateImage(mem_block.mem_id(), + {mem_block.x(), mem_block.y()}, + dtype); } + VLOG(1) << "Preallocate image to tensors"; + auto allocator = GetDeviceAllocator(DeviceType::OPENCL); for (auto &op: net_def.op()) { if (op.has_mem_id()) { - tensor_map_[op.output(0)] = mem_tensor_map[MemBlockName(op.mem_id())]; + CreateTensor(op.output(0), allocator, dtype); + tensor_map_[op.output(0)]->PreallocateImage( + preallocated_allocator_->GetImage(op.mem_id()), + preallocated_allocator_->GetImageSize(op.mem_id())); } } } -} // namespace mace \ No newline at end of file +} // namespace mace diff --git a/mace/core/workspace.h b/mace/core/workspace.h index 5e0b4ace3393b25edbddbf224ae7962c50ec6735..8b69f806d092f6cee9e1ce7a6a528fbe01694f8e 100644 --- a/mace/core/workspace.h +++ b/mace/core/workspace.h @@ -8,14 +8,17 @@ #include "mace/core/common.h" #include "mace/core/tensor.h" #include "mace/core/public/mace.h" +#include "mace/core/preallocated_pooled_allocator.h" namespace mace { class Workspace { public: - typedef map> TensorMap; + typedef map> TensorMap; - Workspace() {} + Workspace() + : preallocated_allocator_(nullptr) {} + ~Workspace() {} vector Tensors() const; @@ -35,15 +38,13 @@ class Workspace { void LoadModelTensor(const NetDef &net_def, DeviceType type); - inline std::string MemBlockName(int mem_id) const { - return internal::MakeString("mem_block_", mem_id); - }; - private: void CreateImageOutputTensor(const NetDef &net_def); TensorMap tensor_map_; + std::unique_ptr preallocated_allocator_; + DISABLE_COPY_AND_ASSIGN(Workspace); };