提交 537b4600 编写于 作者: 刘琦

Merge branch 'master' into 'master'

Seperate physical memory pool with logical tensor

See merge request !224
//
// Copyright (c) 2018 XiaoMi All rights reserved.
//
#ifndef MACE_CORE_PREALLOCATED_POOLED_ALLOCATOR_H_
#define MACE_CORE_PREALLOCATED_POOLED_ALLOCATOR_H_
#include "mace/core/allocator.h"
namespace mace {
class PreallocatedPooledAllocator {
public:
PreallocatedPooledAllocator() {}
virtual ~PreallocatedPooledAllocator() noexcept {}
virtual void PreallocateImage(int mem_id,
const std::vector<size_t> &image_shape,
DataType data_type) = 0;
virtual void *GetImage(int mem_id) = 0;
virtual bool HasImage(int mem_id) = 0;
virtual std::vector<size_t> GetImageSize(int mem_id) = 0;
};
} // namespace mace
#endif // MACE_CORE_PREALLOCATED_POOLED_ALLOCATOR_H_
//
// Copyright (c) 2018 XiaoMi All rights reserved.
//
#include "mace/core/runtime/opencl/opencl_preallocated_pooled_allocator.h"
namespace mace {
OpenCLPreallocatedPooledAllocator::OpenCLPreallocatedPooledAllocator()
: allocator(GetDeviceAllocator(DeviceType::OPENCL)) {
}
OpenCLPreallocatedPooledAllocator::~OpenCLPreallocatedPooledAllocator() {
}
void OpenCLPreallocatedPooledAllocator::PreallocateImage(int mem_id,
const std::vector<
size_t> &image_shape,
DataType data_type) {
MACE_CHECK(!this->HasImage(mem_id), "Memory already exists: ", mem_id);
VLOG(3) << "Preallocate OpenCL image: " << mem_id << " "
<< image_shape[0] << ", " << image_shape[1];
images_[mem_id] = std::move(std::unique_ptr<void, std::function<void(void *)>>(
allocator->NewImage(image_shape, data_type), [this](void *p) {
this->allocator->DeleteImage(p);
}));
image_shapes_[mem_id] = image_shape;
}
} // namespace mace
\ No newline at end of file
//
// Copyright (c) 2018 XiaoMi All rights reserved.
//
#ifndef MACE_CORE_RUNTIME_OPENCL_PREALLOCATED_POOLED_ALLOCATOR_H_
#define MACE_CORE_RUNTIME_OPENCL_PREALLOCATED_POOLED_ALLOCATOR_H_
#include "mace/core/preallocated_pooled_allocator.h"
#include <unordered_map>
namespace mace {
class OpenCLPreallocatedPooledAllocator : public PreallocatedPooledAllocator {
public:
OpenCLPreallocatedPooledAllocator();
~OpenCLPreallocatedPooledAllocator() override;
void PreallocateImage(int mem_id,
const std::vector<size_t> &image_shape,
DataType data_type) override;
inline void *GetImage(int mem_id) override {
MACE_CHECK(HasImage(mem_id), "image does not exist");
return images_[mem_id].get();
}
inline bool HasImage(int mem_id) override {
return images_.find(mem_id) != images_.end();
}
inline std::vector<size_t> GetImageSize(int mem_id) override {
return image_shapes_[mem_id];
}
private:
std::unordered_map<int, std::unique_ptr<void, std::function<void(void *)>>>
images_;
std::unordered_map<int, std::vector<size_t>> image_shapes_;
Allocator *allocator;
};
} // namepsace mace
#endif // MACE_CORE_RUNTIME_OPENCL_PREALLOCATED_POOLED_ALLOCATOR_H_
......@@ -10,6 +10,7 @@
#include "mace/utils/logging.h"
#include "mace/core/types.h"
#include "mace/core/public/mace.h"
#include "preallocated_pooled_allocator.h"
namespace mace {
......@@ -71,7 +72,8 @@ class Tensor {
buffer_(nullptr),
data_(nullptr),
unused_(false),
is_image_(false){};
is_image_(false){
};
Tensor(Allocator *alloc, DataType type)
: alloc_(alloc),
......@@ -80,18 +82,10 @@ class Tensor {
buffer_(nullptr),
data_(nullptr),
unused_(false),
is_image_(false){};
is_image_(false){
};
~Tensor() {
MACE_CHECK(data_ == nullptr, "Buffer must be unmapped before destroy");
if (buffer_ != nullptr) {
MACE_CHECK_NOTNULL(alloc_);
if (is_image_) {
alloc_->DeleteImage(buffer_);
} else {
alloc_->Delete(buffer_);
}
}
}
inline DataType dtype() const { return dtype_; }
......@@ -132,13 +126,13 @@ class Tensor {
inline void Map() const {
if (!OnHost()) {
MACE_CHECK(buffer_ != nullptr && data_ == nullptr);
data_ = alloc_->Map(buffer_, size_ * SizeOfType());
data_ = alloc_->Map(buffer_.get(), size_ * SizeOfType());
}
}
inline void MapImage(std::vector<size_t> &mapped_image_pitch) const {
MACE_CHECK(!OnHost() && buffer_ != nullptr && data_ == nullptr);
data_ = alloc_->MapImage(buffer_, image_shape_, mapped_image_pitch);
data_ = alloc_->MapImage(buffer_.get(), image_shape_, mapped_image_pitch);
}
/*
......@@ -147,12 +141,12 @@ class Tensor {
inline void Unmap() const {
if (!OnHost()) {
MACE_CHECK(buffer_ != nullptr && data_ != nullptr);
alloc_->Unmap(buffer_, data_);
alloc_->Unmap(buffer_.get(), data_);
data_ = nullptr;
}
}
void *buffer() const { return buffer_; }
void *buffer() const { return buffer_.get(); }
inline const void *raw_data() const {
void *data = MappedBuffer();
......@@ -181,42 +175,51 @@ class Tensor {
}
inline void Resize(const vector<index_t> &shape) {
MACE_CHECK(!is_image_ || buffer_ == nullptr,
"Resize is not for image, use ResizeImage instead.");
is_image_ = false;
shape_ = shape;
index_t size = NumElements();
if (size_ != size || is_image_) {
if (size_ != size) {
size_ = size;
MACE_CHECK(data_ == nullptr, "Buffer must be unmapped before resize");
if (is_image_) {
alloc_->DeleteImage(buffer_);
} else {
alloc_->Delete(buffer_);
}
is_image_ = false;
CASES(dtype_, buffer_ = alloc_->New(size_ * sizeof(T)));
CASES(dtype_,
(buffer_ =
std::move(std::unique_ptr<void, std::function<void(void *)>>(
alloc_->New(size_ * sizeof(T)),
[this](void *p) {
this->alloc_->Delete(p);
})
)));
}
}
inline void ResizeImage(const vector<index_t> &shape,
const std::vector<size_t> &image_shape) {
MACE_CHECK(is_image_ || buffer_ == nullptr,
"ResizeImage is not for buffer, use Resize instead.");
is_image_ = true;
shape_ = shape;
index_t size = NumElements();
if (size_ != size || !is_image_) {
if (size_ != size) {
size_ = size;
MACE_CHECK(data_ == nullptr, "Buffer must be unmapped before resize");
if (is_image_ && !image_shape_.empty()) {
MACE_ASSERT(image_shape_.size() == 2
&& image_shape_[0] >= image_shape[0]
|| image_shape_[1] >= image_shape[1],
"image shape not large enough");
}
if (!is_image_ && buffer_ != nullptr) {
alloc_->Delete(buffer_);
}
is_image_ = true;
if (image_shape_.empty()) {
image_shape_ = image_shape;
buffer_ = alloc_->NewImage(image_shape, dtype_);
image_shape_ = image_shape;
if (!preallocated_image_shape_.empty()) {
MACE_CHECK(preallocated_image_shape_[0] >= image_shape[0]
&& preallocated_image_shape_[1] >= image_shape[1],
"image shape not large enough: preallocated ",
preallocated_image_shape_[0],
" ",
preallocated_image_shape_[1],
"apply for ",
image_shape[0],
" ",
image_shape[1]);
} else {
buffer_ = std::move(std::unique_ptr<void, std::function<void(void *)>>(
alloc_->NewImage(image_shape, dtype_),
[this](void *p) { this->alloc_->DeleteImage(p); }));
preallocated_image_shape_ = image_shape;
}
}
}
......@@ -237,15 +240,14 @@ class Tensor {
}
}
inline void AllocateImageMemory(const std::vector<size_t> &image_shape) {
inline void PreallocateImage(void *image,
const std::vector<size_t>& image_shape) {
is_image_ = true;
if (image_shape_ != image_shape) {
if (buffer_ != nullptr) {
alloc_->DeleteImage(buffer_);
}
image_shape_ = image_shape;
buffer_ = alloc_->NewImage(image_shape, dtype_);
}
buffer_ = std::move(std::unique_ptr<void, std::function<void(void *)>>(
image, [](void *p) {
// tensor does not have ownership of preallocated memory
}));
preallocated_image_shape_ = image_shape;
}
template <typename T>
......@@ -273,7 +275,7 @@ class Tensor {
inline void DebugPrint() const {
using namespace numerical_chars;
std::stringstream os;
for (int i : shape_) {
for (index_t i : shape_) {
os << i << ", ";
}
......@@ -336,7 +338,7 @@ class Tensor {
private:
inline void *MappedBuffer() const {
if (OnHost()) {
return buffer_;
return buffer_.get();
}
return data_;
}
......@@ -346,7 +348,7 @@ class Tensor {
DataType dtype_;
// Raw buffer, must be mapped as host accessable data before
// read or write
void *buffer_;
std::unique_ptr<void, std::function<void(void*)>> buffer_;
// Mapped buffer
mutable void *data_;
vector<index_t> shape_;
......@@ -354,6 +356,7 @@ class Tensor {
bool unused_;
bool is_image_;
std::vector<size_t> image_shape_;
std::vector<size_t> preallocated_image_shape_;
DISABLE_COPY_AND_ASSIGN(Tensor);
};
......
......@@ -5,6 +5,7 @@
#include "mace/core/workspace.h"
#include "mace/core/serializer.h"
#include "mace/core/arg_helper.h"
#include "mace/core/runtime/opencl/opencl_preallocated_pooled_allocator.h"
namespace mace {
......@@ -23,7 +24,7 @@ Tensor *Workspace::CreateTensor(const string &name,
VLOG(1) << "Tensor " << name << " already exists. Skipping.";
} else {
VLOG(1) << "Creating Tensor " << name;
tensor_map_[name] = unique_ptr<Tensor>(new Tensor(alloc, type));
tensor_map_[name] = std::move(std::unique_ptr<Tensor>(new Tensor(alloc, type)));
}
return GetTensor(name);
}
......@@ -84,25 +85,45 @@ void Workspace::CreateImageOutputTensor(const NetDef &net_def) {
if (!net_def.has_mem_arena() || net_def.mem_arena().mem_block_size() == 0) {
return;
}
std::map<std::string, std::shared_ptr<Tensor>> mem_tensor_map;
const DataType dtype = static_cast<DataType>(
ArgumentHelper::GetSingleArgument<OperatorDef, int>(
net_def.op(0),
preallocated_allocator_ =
std::move(std::unique_ptr<PreallocatedPooledAllocator>(
new OpenCLPreallocatedPooledAllocator));
DataType dtype = DataType::DT_INVALID;
// We use the data type of the first op (with mem id, must be image),
// as GPU have consistent data type for each layer for now.
// As DSP may have different data output type for each op,
// we stick to the same concept.
for (auto &op: net_def.op()) {
if (op.has_mem_id()) {
const DataType op_dtype = static_cast<DataType>(
ArgumentHelper::GetSingleArgument<OperatorDef, int>(
op,
"T",
static_cast<int>(DT_FLOAT)));
if (op_dtype != DataType::DT_INVALID) {
dtype = op_dtype;
// find first valid data type, break
break;
}
}
}
MACE_CHECK(dtype != DataType::DT_INVALID, "data type is invalid.");
for (auto &mem_block: net_def.mem_arena().mem_block()) {
string mem_block_name = MemBlockName(mem_block.mem_id());
mem_tensor_map[mem_block_name].reset(new Tensor(
GetDeviceAllocator(DeviceType::OPENCL),
dtype));
mem_tensor_map[mem_block_name]->AllocateImageMemory({mem_block.x(),
mem_block.y()});
preallocated_allocator_->PreallocateImage(mem_block.mem_id(),
{mem_block.x(), mem_block.y()},
dtype);
}
VLOG(1) << "Preallocate image to tensors";
auto allocator = GetDeviceAllocator(DeviceType::OPENCL);
for (auto &op: net_def.op()) {
if (op.has_mem_id()) {
tensor_map_[op.output(0)] = mem_tensor_map[MemBlockName(op.mem_id())];
CreateTensor(op.output(0), allocator, dtype);
tensor_map_[op.output(0)]->PreallocateImage(
preallocated_allocator_->GetImage(op.mem_id()),
preallocated_allocator_->GetImageSize(op.mem_id()));
}
}
}
} // namespace mace
\ No newline at end of file
} // namespace mace
......@@ -8,14 +8,17 @@
#include "mace/core/common.h"
#include "mace/core/tensor.h"
#include "mace/core/public/mace.h"
#include "mace/core/preallocated_pooled_allocator.h"
namespace mace {
class Workspace {
public:
typedef map<string, std::shared_ptr<Tensor>> TensorMap;
typedef map<string, std::unique_ptr<Tensor>> TensorMap;
Workspace() {}
Workspace()
: preallocated_allocator_(nullptr) {}
~Workspace() {}
vector<string> Tensors() const;
......@@ -35,15 +38,13 @@ class Workspace {
void LoadModelTensor(const NetDef &net_def, DeviceType type);
inline std::string MemBlockName(int mem_id) const {
return internal::MakeString("mem_block_", mem_id);
};
private:
void CreateImageOutputTensor(const NetDef &net_def);
TensorMap tensor_map_;
std::unique_ptr<PreallocatedPooledAllocator> preallocated_allocator_;
DISABLE_COPY_AND_ASSIGN(Workspace);
};
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册