提交 ed7a7f53 编写于 作者: 李寅

Seperate physical memory pool with logical tensor

上级 40d6571f
//
// Copyright (c) 2018 XiaoMi All rights reserved.
//
#ifndef MACE_CORE_PREALLOCATED_POOLED_ALLOCATOR_H_
#define MACE_CORE_PREALLOCATED_POOLED_ALLOCATOR_H_
#include "mace/core/allocator.h"
namespace mace {
class PreallocatedPooledAllocator {
public:
PreallocatedPooledAllocator() {}
virtual ~PreallocatedPooledAllocator() noexcept {}
virtual void PreallocateImage(int mem_id,
const std::vector<size_t> &image_shape,
DataType data_type) = 0;
virtual void *GetImage(int mem_id) = 0;
virtual bool HasImage(int mem_id) = 0;
virtual std::vector<size_t> GetImageSize(int mem_id) = 0;
};
} // namespace mace
#endif // MACE_CORE_PREALLOCATED_POOLED_ALLOCATOR_H_
//
// Copyright (c) 2018 XiaoMi All rights reserved.
//
#include "mace/core/runtime/opencl/opencl_preallocated_pooled_allocator.h"
namespace mace {
OpenCLPreallocatedPooledAllocator::OpenCLPreallocatedPooledAllocator()
: allocator(GetDeviceAllocator(DeviceType::OPENCL)) {
}
OpenCLPreallocatedPooledAllocator::~OpenCLPreallocatedPooledAllocator() {
}
void OpenCLPreallocatedPooledAllocator::PreallocateImage(int mem_id,
const std::vector<
size_t> &image_shape,
DataType data_type) {
MACE_CHECK(!this->HasImage(mem_id), "Memory already exists: ", mem_id);
VLOG(3) << "Preallocate OpenCL image: " << mem_id << " "
<< image_shape[0] << ", " << image_shape[1];
images_[mem_id] = std::move(std::unique_ptr<void, std::function<void(void *)>>(
allocator->NewImage(image_shape, data_type), [this](void *p) {
this->allocator->DeleteImage(p);
}));
image_shapes_[mem_id] = image_shape;
}
} // namespace mace
\ No newline at end of file
//
// Copyright (c) 2018 XiaoMi All rights reserved.
//
#ifndef MACE_CORE_RUNTIME_OPENCL_PREALLOCATED_POOLED_ALLOCATOR_H_
#define MACE_CORE_RUNTIME_OPENCL_PREALLOCATED_POOLED_ALLOCATOR_H_
#include "mace/core/preallocated_pooled_allocator.h"
#include <unordered_map>
namespace mace {
class OpenCLPreallocatedPooledAllocator : public PreallocatedPooledAllocator {
public:
OpenCLPreallocatedPooledAllocator();
~OpenCLPreallocatedPooledAllocator() override;
void PreallocateImage(int mem_id,
const std::vector<size_t> &image_shape,
DataType data_type) override;
inline void *GetImage(int mem_id) override {
MACE_CHECK(HasImage(mem_id), "image does not exist");
return images_[mem_id].get();
}
inline bool HasImage(int mem_id) override {
return images_.find(mem_id) != images_.end();
}
inline std::vector<size_t> GetImageSize(int mem_id) override {
return image_shapes_[mem_id];
}
private:
std::unordered_map<int, std::unique_ptr<void, std::function<void(void *)>>>
images_;
std::unordered_map<int, std::vector<size_t>> image_shapes_;
Allocator *allocator;
};
} // namepsace mace
#endif // MACE_CORE_RUNTIME_OPENCL_PREALLOCATED_POOLED_ALLOCATOR_H_
...@@ -10,6 +10,7 @@ ...@@ -10,6 +10,7 @@
#include "mace/utils/logging.h" #include "mace/utils/logging.h"
#include "mace/core/types.h" #include "mace/core/types.h"
#include "mace/core/public/mace.h" #include "mace/core/public/mace.h"
#include "preallocated_pooled_allocator.h"
namespace mace { namespace mace {
...@@ -71,7 +72,8 @@ class Tensor { ...@@ -71,7 +72,8 @@ class Tensor {
buffer_(nullptr), buffer_(nullptr),
data_(nullptr), data_(nullptr),
unused_(false), unused_(false),
is_image_(false){}; is_image_(false){
};
Tensor(Allocator *alloc, DataType type) Tensor(Allocator *alloc, DataType type)
: alloc_(alloc), : alloc_(alloc),
...@@ -80,18 +82,10 @@ class Tensor { ...@@ -80,18 +82,10 @@ class Tensor {
buffer_(nullptr), buffer_(nullptr),
data_(nullptr), data_(nullptr),
unused_(false), unused_(false),
is_image_(false){}; is_image_(false){
};
~Tensor() { ~Tensor() {
MACE_CHECK(data_ == nullptr, "Buffer must be unmapped before destroy");
if (buffer_ != nullptr) {
MACE_CHECK_NOTNULL(alloc_);
if (is_image_) {
alloc_->DeleteImage(buffer_);
} else {
alloc_->Delete(buffer_);
}
}
} }
inline DataType dtype() const { return dtype_; } inline DataType dtype() const { return dtype_; }
...@@ -132,13 +126,13 @@ class Tensor { ...@@ -132,13 +126,13 @@ class Tensor {
inline void Map() const { inline void Map() const {
if (!OnHost()) { if (!OnHost()) {
MACE_CHECK(buffer_ != nullptr && data_ == nullptr); MACE_CHECK(buffer_ != nullptr && data_ == nullptr);
data_ = alloc_->Map(buffer_, size_ * SizeOfType()); data_ = alloc_->Map(buffer_.get(), size_ * SizeOfType());
} }
} }
inline void MapImage(std::vector<size_t> &mapped_image_pitch) const { inline void MapImage(std::vector<size_t> &mapped_image_pitch) const {
MACE_CHECK(!OnHost() && buffer_ != nullptr && data_ == nullptr); MACE_CHECK(!OnHost() && buffer_ != nullptr && data_ == nullptr);
data_ = alloc_->MapImage(buffer_, image_shape_, mapped_image_pitch); data_ = alloc_->MapImage(buffer_.get(), image_shape_, mapped_image_pitch);
} }
/* /*
...@@ -147,12 +141,12 @@ class Tensor { ...@@ -147,12 +141,12 @@ class Tensor {
inline void Unmap() const { inline void Unmap() const {
if (!OnHost()) { if (!OnHost()) {
MACE_CHECK(buffer_ != nullptr && data_ != nullptr); MACE_CHECK(buffer_ != nullptr && data_ != nullptr);
alloc_->Unmap(buffer_, data_); alloc_->Unmap(buffer_.get(), data_);
data_ = nullptr; data_ = nullptr;
} }
} }
void *buffer() const { return buffer_; } void *buffer() const { return buffer_.get(); }
inline const void *raw_data() const { inline const void *raw_data() const {
void *data = MappedBuffer(); void *data = MappedBuffer();
...@@ -181,42 +175,51 @@ class Tensor { ...@@ -181,42 +175,51 @@ class Tensor {
} }
inline void Resize(const vector<index_t> &shape) { inline void Resize(const vector<index_t> &shape) {
MACE_CHECK(!is_image_ || buffer_ == nullptr,
"Resize is not for image, use ResizeImage instead.");
is_image_ = false;
shape_ = shape; shape_ = shape;
index_t size = NumElements(); index_t size = NumElements();
if (size_ != size || is_image_) { if (size_ != size) {
size_ = size; size_ = size;
MACE_CHECK(data_ == nullptr, "Buffer must be unmapped before resize"); MACE_CHECK(data_ == nullptr, "Buffer must be unmapped before resize");
if (is_image_) { CASES(dtype_,
alloc_->DeleteImage(buffer_); (buffer_ =
} else { std::move(std::unique_ptr<void, std::function<void(void *)>>(
alloc_->Delete(buffer_); alloc_->New(size_ * sizeof(T)),
} [this](void *p) {
is_image_ = false; this->alloc_->Delete(p);
CASES(dtype_, buffer_ = alloc_->New(size_ * sizeof(T))); })
)));
} }
} }
inline void ResizeImage(const vector<index_t> &shape, inline void ResizeImage(const vector<index_t> &shape,
const std::vector<size_t> &image_shape) { const std::vector<size_t> &image_shape) {
MACE_CHECK(is_image_ || buffer_ == nullptr,
"ResizeImage is not for buffer, use Resize instead.");
is_image_ = true;
shape_ = shape; shape_ = shape;
index_t size = NumElements(); index_t size = NumElements();
if (size_ != size || !is_image_) { if (size_ != size) {
size_ = size; size_ = size;
MACE_CHECK(data_ == nullptr, "Buffer must be unmapped before resize"); image_shape_ = image_shape;
if (!preallocated_image_shape_.empty()) {
if (is_image_ && !image_shape_.empty()) { MACE_CHECK(preallocated_image_shape_[0] >= image_shape[0]
MACE_ASSERT(image_shape_.size() == 2 && preallocated_image_shape_[1] >= image_shape[1],
&& image_shape_[0] >= image_shape[0] "image shape not large enough: preallocated ",
|| image_shape_[1] >= image_shape[1], preallocated_image_shape_[0],
"image shape not large enough"); " ",
} preallocated_image_shape_[1],
if (!is_image_ && buffer_ != nullptr) { "apply for ",
alloc_->Delete(buffer_); image_shape[0],
} " ",
is_image_ = true; image_shape[1]);
if (image_shape_.empty()) { } else {
image_shape_ = image_shape; buffer_ = std::move(std::unique_ptr<void, std::function<void(void *)>>(
buffer_ = alloc_->NewImage(image_shape, dtype_); alloc_->NewImage(image_shape, dtype_),
[this](void *p) { this->alloc_->DeleteImage(p); }));
preallocated_image_shape_ = image_shape;
} }
} }
} }
...@@ -237,15 +240,14 @@ class Tensor { ...@@ -237,15 +240,14 @@ class Tensor {
} }
} }
inline void AllocateImageMemory(const std::vector<size_t> &image_shape) { inline void PreallocateImage(void *image,
const std::vector<size_t>& image_shape) {
is_image_ = true; is_image_ = true;
if (image_shape_ != image_shape) { buffer_ = std::move(std::unique_ptr<void, std::function<void(void *)>>(
if (buffer_ != nullptr) { image, [](void *p) {
alloc_->DeleteImage(buffer_); // tensor does not have ownership of preallocated memory
} }));
image_shape_ = image_shape; preallocated_image_shape_ = image_shape;
buffer_ = alloc_->NewImage(image_shape, dtype_);
}
} }
template <typename T> template <typename T>
...@@ -273,7 +275,7 @@ class Tensor { ...@@ -273,7 +275,7 @@ class Tensor {
inline void DebugPrint() const { inline void DebugPrint() const {
using namespace numerical_chars; using namespace numerical_chars;
std::stringstream os; std::stringstream os;
for (int i : shape_) { for (index_t i : shape_) {
os << i << ", "; os << i << ", ";
} }
...@@ -336,7 +338,7 @@ class Tensor { ...@@ -336,7 +338,7 @@ class Tensor {
private: private:
inline void *MappedBuffer() const { inline void *MappedBuffer() const {
if (OnHost()) { if (OnHost()) {
return buffer_; return buffer_.get();
} }
return data_; return data_;
} }
...@@ -346,7 +348,7 @@ class Tensor { ...@@ -346,7 +348,7 @@ class Tensor {
DataType dtype_; DataType dtype_;
// Raw buffer, must be mapped as host accessable data before // Raw buffer, must be mapped as host accessable data before
// read or write // read or write
void *buffer_; std::unique_ptr<void, std::function<void(void*)>> buffer_;
// Mapped buffer // Mapped buffer
mutable void *data_; mutable void *data_;
vector<index_t> shape_; vector<index_t> shape_;
...@@ -354,6 +356,7 @@ class Tensor { ...@@ -354,6 +356,7 @@ class Tensor {
bool unused_; bool unused_;
bool is_image_; bool is_image_;
std::vector<size_t> image_shape_; std::vector<size_t> image_shape_;
std::vector<size_t> preallocated_image_shape_;
DISABLE_COPY_AND_ASSIGN(Tensor); DISABLE_COPY_AND_ASSIGN(Tensor);
}; };
......
...@@ -5,6 +5,7 @@ ...@@ -5,6 +5,7 @@
#include "mace/core/workspace.h" #include "mace/core/workspace.h"
#include "mace/core/serializer.h" #include "mace/core/serializer.h"
#include "mace/core/arg_helper.h" #include "mace/core/arg_helper.h"
#include "mace/core/runtime/opencl/opencl_preallocated_pooled_allocator.h"
namespace mace { namespace mace {
...@@ -23,7 +24,7 @@ Tensor *Workspace::CreateTensor(const string &name, ...@@ -23,7 +24,7 @@ Tensor *Workspace::CreateTensor(const string &name,
VLOG(1) << "Tensor " << name << " already exists. Skipping."; VLOG(1) << "Tensor " << name << " already exists. Skipping.";
} else { } else {
VLOG(1) << "Creating Tensor " << name; VLOG(1) << "Creating Tensor " << name;
tensor_map_[name] = unique_ptr<Tensor>(new Tensor(alloc, type)); tensor_map_[name] = std::move(std::unique_ptr<Tensor>(new Tensor(alloc, type)));
} }
return GetTensor(name); return GetTensor(name);
} }
...@@ -84,25 +85,45 @@ void Workspace::CreateImageOutputTensor(const NetDef &net_def) { ...@@ -84,25 +85,45 @@ void Workspace::CreateImageOutputTensor(const NetDef &net_def) {
if (!net_def.has_mem_arena() || net_def.mem_arena().mem_block_size() == 0) { if (!net_def.has_mem_arena() || net_def.mem_arena().mem_block_size() == 0) {
return; return;
} }
std::map<std::string, std::shared_ptr<Tensor>> mem_tensor_map; preallocated_allocator_ =
const DataType dtype = static_cast<DataType>( std::move(std::unique_ptr<PreallocatedPooledAllocator>(
ArgumentHelper::GetSingleArgument<OperatorDef, int>( new OpenCLPreallocatedPooledAllocator));
net_def.op(0),
DataType dtype = DataType::DT_INVALID;
// We use the data type of the first op (with mem id, must be image),
// as GPU have consistent data type for each layer for now.
// As DSP may have different data output type for each op,
// we stick to the same concept.
for (auto &op: net_def.op()) {
if (op.has_mem_id()) {
const DataType op_dtype = static_cast<DataType>(
ArgumentHelper::GetSingleArgument<OperatorDef, int>(
op,
"T", "T",
static_cast<int>(DT_FLOAT))); static_cast<int>(DT_FLOAT)));
if (op_dtype != DataType::DT_INVALID) {
dtype = op_dtype;
// find first valid data type, break
break;
}
}
}
MACE_CHECK(dtype != DataType::DT_INVALID, "data type is invalid.");
for (auto &mem_block: net_def.mem_arena().mem_block()) { for (auto &mem_block: net_def.mem_arena().mem_block()) {
string mem_block_name = MemBlockName(mem_block.mem_id()); preallocated_allocator_->PreallocateImage(mem_block.mem_id(),
mem_tensor_map[mem_block_name].reset(new Tensor( {mem_block.x(), mem_block.y()},
GetDeviceAllocator(DeviceType::OPENCL), dtype);
dtype));
mem_tensor_map[mem_block_name]->AllocateImageMemory({mem_block.x(),
mem_block.y()});
} }
VLOG(1) << "Preallocate image to tensors";
auto allocator = GetDeviceAllocator(DeviceType::OPENCL);
for (auto &op: net_def.op()) { for (auto &op: net_def.op()) {
if (op.has_mem_id()) { if (op.has_mem_id()) {
tensor_map_[op.output(0)] = mem_tensor_map[MemBlockName(op.mem_id())]; CreateTensor(op.output(0), allocator, dtype);
tensor_map_[op.output(0)]->PreallocateImage(
preallocated_allocator_->GetImage(op.mem_id()),
preallocated_allocator_->GetImageSize(op.mem_id()));
} }
} }
} }
} // namespace mace } // namespace mace
\ No newline at end of file
...@@ -8,14 +8,17 @@ ...@@ -8,14 +8,17 @@
#include "mace/core/common.h" #include "mace/core/common.h"
#include "mace/core/tensor.h" #include "mace/core/tensor.h"
#include "mace/core/public/mace.h" #include "mace/core/public/mace.h"
#include "mace/core/preallocated_pooled_allocator.h"
namespace mace { namespace mace {
class Workspace { class Workspace {
public: public:
typedef map<string, std::shared_ptr<Tensor>> TensorMap; typedef map<string, std::unique_ptr<Tensor>> TensorMap;
Workspace() {} Workspace()
: preallocated_allocator_(nullptr) {}
~Workspace() {}
vector<string> Tensors() const; vector<string> Tensors() const;
...@@ -35,15 +38,13 @@ class Workspace { ...@@ -35,15 +38,13 @@ class Workspace {
void LoadModelTensor(const NetDef &net_def, DeviceType type); void LoadModelTensor(const NetDef &net_def, DeviceType type);
inline std::string MemBlockName(int mem_id) const {
return internal::MakeString("mem_block_", mem_id);
};
private: private:
void CreateImageOutputTensor(const NetDef &net_def); void CreateImageOutputTensor(const NetDef &net_def);
TensorMap tensor_map_; TensorMap tensor_map_;
std::unique_ptr<PreallocatedPooledAllocator> preallocated_allocator_;
DISABLE_COPY_AND_ASSIGN(Workspace); DISABLE_COPY_AND_ASSIGN(Workspace);
}; };
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册