Merge branch 'master' into 'master'

Seperate physical memory pool with logical tensor See merge request !224

Merge branch 'master' into 'master'
Seperate physical memory pool with logical tensor See merge request !224
537b4600 · 刘琦 · 40d6571f · ed7a7f53 · 537b4600 · 537b4600
6 changed file
--- a/mace/core/preallocated_pooled_allocator.h
+++ b/mace/core/preallocated_pooled_allocator.h
+//
+// Copyright (c) 2018 XiaoMi All rights reserved.
+//
+
+#ifndef MACE_CORE_PREALLOCATED_POOLED_ALLOCATOR_H_
+#define MACE_CORE_PREALLOCATED_POOLED_ALLOCATOR_H_
+
+#include "mace/core/allocator.h"
+
+namespace mace {
+
+class PreallocatedPooledAllocator {
+ public:
+  PreallocatedPooledAllocator() {}
+
+  virtual ~PreallocatedPooledAllocator() noexcept {}
+
+  virtual void PreallocateImage(int mem_id,
+                                const std::vector<size_t> &image_shape,
+                                DataType data_type) = 0;
+
+  virtual void *GetImage(int mem_id) = 0;
+
+  virtual bool HasImage(int mem_id) = 0;
+
+  virtual std::vector<size_t> GetImageSize(int mem_id) = 0;
+};
+
+} // namespace mace
+
+#endif // MACE_CORE_PREALLOCATED_POOLED_ALLOCATOR_H_
--- a/mace/core/runtime/opencl/opencl_preallocated_pooled_allocator.cc
+++ b/mace/core/runtime/opencl/opencl_preallocated_pooled_allocator.cc
+//
+// Copyright (c) 2018 XiaoMi All rights reserved.
+//
+
+#include "mace/core/runtime/opencl/opencl_preallocated_pooled_allocator.h"
+
+namespace mace {
+
+OpenCLPreallocatedPooledAllocator::OpenCLPreallocatedPooledAllocator()
+  : allocator(GetDeviceAllocator(DeviceType::OPENCL)) {
+}
+
+OpenCLPreallocatedPooledAllocator::~OpenCLPreallocatedPooledAllocator() {
+}
+
+void OpenCLPreallocatedPooledAllocator::PreallocateImage(int mem_id,
+                                                         const std::vector<
+                                                           size_t> &image_shape,
+                                                         DataType data_type) {
+  MACE_CHECK(!this->HasImage(mem_id), "Memory already exists: ", mem_id);
+  VLOG(3) << "Preallocate OpenCL image: " << mem_id << " "
+          << image_shape[0] << ", " << image_shape[1];
+  images_[mem_id] = std::move(std::unique_ptr<void, std::function<void(void *)>>(
+    allocator->NewImage(image_shape, data_type), [this](void *p) {
+      this->allocator->DeleteImage(p);
+    }));
+  image_shapes_[mem_id] = image_shape;
+}
+
+} // namespace mace
\ No newline at end of file
--- a/mace/core/runtime/opencl/opencl_preallocated_pooled_allocator.h
+++ b/mace/core/runtime/opencl/opencl_preallocated_pooled_allocator.h
+//
+// Copyright (c) 2018 XiaoMi All rights reserved.
+//
+
+#ifndef MACE_CORE_RUNTIME_OPENCL_PREALLOCATED_POOLED_ALLOCATOR_H_
+#define MACE_CORE_RUNTIME_OPENCL_PREALLOCATED_POOLED_ALLOCATOR_H_
+
+#include "mace/core/preallocated_pooled_allocator.h"
+#include <unordered_map>
+
+namespace mace {
+
+class OpenCLPreallocatedPooledAllocator : public PreallocatedPooledAllocator {
+ public:
+  OpenCLPreallocatedPooledAllocator();
+
+  ~OpenCLPreallocatedPooledAllocator() override;
+
+  void PreallocateImage(int mem_id,
+                        const std::vector<size_t> &image_shape,
+                        DataType data_type) override;
+
+  inline void *GetImage(int mem_id) override {
+    MACE_CHECK(HasImage(mem_id), "image does not exist");
+    return images_[mem_id].get();
+  }
+
+  inline bool HasImage(int mem_id) override {
+    return images_.find(mem_id) != images_.end();
+  }
+
+  inline std::vector<size_t> GetImageSize(int mem_id) override {
+    return image_shapes_[mem_id];
+  }
+
+ private:
+  std::unordered_map<int, std::unique_ptr<void, std::function<void(void *)>>>
+    images_;
+  std::unordered_map<int, std::vector<size_t>> image_shapes_;
+  Allocator *allocator;
+};
+
+} // namepsace mace
+
+#endif // MACE_CORE_RUNTIME_OPENCL_PREALLOCATED_POOLED_ALLOCATOR_H_
--- a/mace/core/tensor.h
+++ b/mace/core/tensor.h
@@ -10,6 +10,7 @@
 #include "mace/utils/logging.h"
 #include "mace/core/types.h"
 #include "mace/core/public/mace.h"
+#include "preallocated_pooled_allocator.h"

 namespace mace {

@@ -71,7 +72,8 @@ class Tensor {
        buffer_(nullptr),
        data_(nullptr),
        unused_(false),
-        is_image_(false){};
+        is_image_(false){
+};

  Tensor(Allocator *alloc, DataType type)
      : alloc_(alloc),
@@ -80,18 +82,10 @@ class Tensor {
        buffer_(nullptr),
        data_(nullptr),
        unused_(false),
-        is_image_(false){};
+        is_image_(false){
+  };

  ~Tensor() {
-    MACE_CHECK(data_ == nullptr, "Buffer must be unmapped before destroy");
-    if (buffer_ != nullptr) {
-      MACE_CHECK_NOTNULL(alloc_);
-      if (is_image_) {
-        alloc_->DeleteImage(buffer_);
-      } else {
-        alloc_->Delete(buffer_);
-      }
-    }
  }

  inline DataType dtype() const { return dtype_; }
@@ -132,13 +126,13 @@ class Tensor {
  inline void Map() const {
    if (!OnHost()) {
      MACE_CHECK(buffer_ != nullptr && data_ == nullptr);
-      data_ = alloc_->Map(buffer_, size_ * SizeOfType());
+      data_ = alloc_->Map(buffer_.get(), size_ * SizeOfType());
    }
  }

  inline void MapImage(std::vector<size_t> &mapped_image_pitch) const {
    MACE_CHECK(!OnHost() && buffer_ != nullptr && data_ == nullptr);
-    data_ = alloc_->MapImage(buffer_, image_shape_, mapped_image_pitch);
+    data_ = alloc_->MapImage(buffer_.get(), image_shape_, mapped_image_pitch);
  }

  /*
@@ -147,12 +141,12 @@ class Tensor {
  inline void Unmap() const {
    if (!OnHost()) {
      MACE_CHECK(buffer_ != nullptr && data_ != nullptr);
-      alloc_->Unmap(buffer_, data_);
+      alloc_->Unmap(buffer_.get(), data_);
      data_ = nullptr;
    }
  }

-  void *buffer() const { return buffer_; }
+  void *buffer() const { return buffer_.get(); }

  inline const void *raw_data() const {
    void *data = MappedBuffer();
@@ -181,42 +175,51 @@ class Tensor {
  }

  inline void Resize(const vector<index_t> &shape) {
+    MACE_CHECK(!is_image_ || buffer_ == nullptr,
+               "Resize is not for image, use ResizeImage instead.");
+    is_image_ = false;
    shape_ = shape;
    index_t size = NumElements();
-    if (size_ != size || is_image_) {
+    if (size_ != size) {
      size_ = size;
      MACE_CHECK(data_ == nullptr, "Buffer must be unmapped before resize");
-      if (is_image_) {
-        alloc_->DeleteImage(buffer_);
-      } else {
-        alloc_->Delete(buffer_);
-      }
-      is_image_ = false;
-      CASES(dtype_, buffer_ = alloc_->New(size_ * sizeof(T)));
+      CASES(dtype_,
+            (buffer_ =
+               std::move(std::unique_ptr<void, std::function<void(void *)>>(
+                 alloc_->New(size_ * sizeof(T)),
+                 [this](void *p) {
+                   this->alloc_->Delete(p);
+                 })
+               )));
    }
  }

  inline void ResizeImage(const vector<index_t> &shape,
                          const std::vector<size_t> &image_shape) {
+    MACE_CHECK(is_image_ || buffer_ == nullptr,
+               "ResizeImage is not for buffer, use Resize instead.");
+    is_image_ = true;
    shape_ = shape;
    index_t size = NumElements();
-    if (size_ != size || !is_image_) {
+    if (size_ != size) {
      size_ = size;
-      MACE_CHECK(data_ == nullptr, "Buffer must be unmapped before resize");
-
-      if (is_image_ && !image_shape_.empty()) {
-        MACE_ASSERT(image_shape_.size() == 2
-                        && image_shape_[0] >= image_shape[0]
-                        || image_shape_[1] >= image_shape[1],
-                    "image shape not large enough");
-      }
-      if (!is_image_ && buffer_ != nullptr) {
-        alloc_->Delete(buffer_);
-      }
-      is_image_ = true;
-      if (image_shape_.empty()) {
-        image_shape_ = image_shape;
-        buffer_ = alloc_->NewImage(image_shape, dtype_);
+      image_shape_ = image_shape;
+      if (!preallocated_image_shape_.empty()) {
+        MACE_CHECK(preallocated_image_shape_[0] >= image_shape[0]
+                     && preallocated_image_shape_[1] >= image_shape[1],
+                   "image shape not large enough: preallocated ",
+                   preallocated_image_shape_[0],
+                   " ",
+                   preallocated_image_shape_[1],
+                   "apply for ",
+                   image_shape[0],
+                   " ",
+                   image_shape[1]);
+      } else {
+        buffer_ = std::move(std::unique_ptr<void, std::function<void(void *)>>(
+          alloc_->NewImage(image_shape, dtype_),
+          [this](void *p) { this->alloc_->DeleteImage(p); }));
+        preallocated_image_shape_ = image_shape;
      }
    }
  }
@@ -237,15 +240,14 @@ class Tensor {
    }
  }

-  inline void AllocateImageMemory(const std::vector<size_t> &image_shape) {
+  inline void PreallocateImage(void *image,
+                               const std::vector<size_t>& image_shape) {
    is_image_ = true;
-    if (image_shape_ != image_shape) {
-      if (buffer_ != nullptr) {
-        alloc_->DeleteImage(buffer_);
-      }
-      image_shape_ = image_shape;
-      buffer_ = alloc_->NewImage(image_shape, dtype_);
-    }
+    buffer_ = std::move(std::unique_ptr<void, std::function<void(void *)>>(
+      image, [](void *p) {
+        // tensor does not have ownership of preallocated memory
+      }));
+    preallocated_image_shape_ = image_shape;
  }

  template <typename T>
@@ -273,7 +275,7 @@ class Tensor {
  inline void DebugPrint() const {
    using namespace numerical_chars;
    std::stringstream os;
-    for (int i : shape_) {
+    for (index_t i : shape_) {
      os << i << ", ";
    }

@@ -336,7 +338,7 @@ class Tensor {
 private:
  inline void *MappedBuffer() const {
    if (OnHost()) {
-      return buffer_;
+      return buffer_.get();
    }
    return data_;
  }
@@ -346,7 +348,7 @@ class Tensor {
  DataType dtype_;
  // Raw buffer, must be mapped as host accessable data before
  // read or write
-  void *buffer_;
+  std::unique_ptr<void, std::function<void(void*)>> buffer_;
  // Mapped buffer
  mutable void *data_;
  vector<index_t> shape_;
@@ -354,6 +356,7 @@ class Tensor {
  bool unused_;
  bool is_image_;
  std::vector<size_t> image_shape_;
+  std::vector<size_t> preallocated_image_shape_;

  DISABLE_COPY_AND_ASSIGN(Tensor);
 };

--- a/mace/core/workspace.cc
+++ b/mace/core/workspace.cc
@@ -5,6 +5,7 @@
 #include "mace/core/workspace.h"
 #include "mace/core/serializer.h"
 #include "mace/core/arg_helper.h"
+#include "mace/core/runtime/opencl/opencl_preallocated_pooled_allocator.h"

 namespace mace {

@@ -23,7 +24,7 @@ Tensor *Workspace::CreateTensor(const string &name,
    VLOG(1) << "Tensor " << name << " already exists. Skipping.";
  } else {
    VLOG(1) << "Creating Tensor " << name;
-    tensor_map_[name] = unique_ptr<Tensor>(new Tensor(alloc, type));
+    tensor_map_[name] = std::move(std::unique_ptr<Tensor>(new Tensor(alloc, type)));
  }
  return GetTensor(name);
 }
@@ -84,25 +85,45 @@ void Workspace::CreateImageOutputTensor(const NetDef &net_def) {
  if (!net_def.has_mem_arena() || net_def.mem_arena().mem_block_size() == 0) {
    return;
  }
-  std::map<std::string, std::shared_ptr<Tensor>> mem_tensor_map;
-  const DataType dtype = static_cast<DataType>(
-      ArgumentHelper::GetSingleArgument<OperatorDef, int>(
-          net_def.op(0),
+  preallocated_allocator_ =
+    std::move(std::unique_ptr<PreallocatedPooledAllocator>(
+      new OpenCLPreallocatedPooledAllocator));
+
+  DataType dtype = DataType::DT_INVALID;
+  // We use the data type of the first op (with mem id, must be image),
+  // as GPU have consistent data type for each layer for now.
+  // As DSP may have different data output type for each op,
+  // we stick to the same concept.
+  for (auto &op: net_def.op()) {
+    if (op.has_mem_id()) {
+      const DataType op_dtype = static_cast<DataType>(
+        ArgumentHelper::GetSingleArgument<OperatorDef, int>(
+          op,
          "T",
          static_cast<int>(DT_FLOAT)));
+      if (op_dtype != DataType::DT_INVALID) {
+        dtype = op_dtype;
+        // find first valid data type, break
+        break;
+      }
+    }
+  }
+  MACE_CHECK(dtype != DataType::DT_INVALID, "data type is invalid.");
  for (auto &mem_block: net_def.mem_arena().mem_block()) {
-    string mem_block_name = MemBlockName(mem_block.mem_id());
-    mem_tensor_map[mem_block_name].reset(new Tensor(
-        GetDeviceAllocator(DeviceType::OPENCL),
-        dtype));
-    mem_tensor_map[mem_block_name]->AllocateImageMemory({mem_block.x(),
-                                                         mem_block.y()});
+    preallocated_allocator_->PreallocateImage(mem_block.mem_id(),
+                                              {mem_block.x(), mem_block.y()},
+                                              dtype);
  }
+  VLOG(1) << "Preallocate image to tensors";
+  auto allocator = GetDeviceAllocator(DeviceType::OPENCL);
  for (auto &op: net_def.op()) {
    if (op.has_mem_id()) {
-      tensor_map_[op.output(0)] = mem_tensor_map[MemBlockName(op.mem_id())];
+      CreateTensor(op.output(0), allocator, dtype);
+      tensor_map_[op.output(0)]->PreallocateImage(
+        preallocated_allocator_->GetImage(op.mem_id()),
+        preallocated_allocator_->GetImageSize(op.mem_id()));
    }
  }
 }

-}  // namespace mace
\ No newline at end of file
+}  // namespace mace
--- a/mace/core/workspace.h
+++ b/mace/core/workspace.h
@@ -8,14 +8,17 @@
 #include "mace/core/common.h"
 #include "mace/core/tensor.h"
 #include "mace/core/public/mace.h"
+#include "mace/core/preallocated_pooled_allocator.h"

 namespace mace {

 class Workspace {
 public:
-  typedef map<string, std::shared_ptr<Tensor>> TensorMap;
+  typedef map<string, std::unique_ptr<Tensor>> TensorMap;

-  Workspace() {}
+  Workspace()
+    : preallocated_allocator_(nullptr) {}
+  ~Workspace() {}

  vector<string> Tensors() const;

@@ -35,15 +38,13 @@ class Workspace {

  void LoadModelTensor(const NetDef &net_def, DeviceType type);

-  inline std::string MemBlockName(int mem_id) const {
-	  return internal::MakeString("mem_block_", mem_id);
-  };
-
 private:
  void CreateImageOutputTensor(const NetDef &net_def);

  TensorMap tensor_map_;

+  std::unique_ptr<PreallocatedPooledAllocator> preallocated_allocator_;
+
  DISABLE_COPY_AND_ASSIGN(Workspace);
 };